1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include <linux/circ_buf.h> 7 8 #include "gem/i915_gem_context.h" 9 #include "gem/i915_gem_lmem.h" 10 #include "gt/gen8_engine_cs.h" 11 #include "gt/intel_breadcrumbs.h" 12 #include "gt/intel_context.h" 13 #include "gt/intel_engine_heartbeat.h" 14 #include "gt/intel_engine_pm.h" 15 #include "gt/intel_engine_regs.h" 16 #include "gt/intel_gpu_commands.h" 17 #include "gt/intel_gt.h" 18 #include "gt/intel_gt_clock_utils.h" 19 #include "gt/intel_gt_irq.h" 20 #include "gt/intel_gt_pm.h" 21 #include "gt/intel_gt_regs.h" 22 #include "gt/intel_gt_requests.h" 23 #include "gt/intel_lrc.h" 24 #include "gt/intel_lrc_reg.h" 25 #include "gt/intel_mocs.h" 26 #include "gt/intel_ring.h" 27 28 #include "intel_guc_ads.h" 29 #include "intel_guc_capture.h" 30 #include "intel_guc_print.h" 31 #include "intel_guc_submission.h" 32 33 #include "i915_drv.h" 34 #include "i915_reg.h" 35 #include "i915_irq.h" 36 #include "i915_trace.h" 37 38 /** 39 * DOC: GuC-based command submission 40 * 41 * The Scratch registers: 42 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 43 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 44 * triggers an interrupt on the GuC via another register write (0xC4C8). 45 * Firmware writes a success/fail code back to the action register after 46 * processes the request. The kernel driver polls waiting for this update and 47 * then proceeds. 48 * 49 * Command Transport buffers (CTBs): 50 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host 51 * - G2H) are a message interface between the i915 and GuC. 52 * 53 * Context registration: 54 * Before a context can be submitted it must be registered with the GuC via a 55 * H2G. A unique guc_id is associated with each context. The context is either 56 * registered at request creation time (normal operation) or at submission time 57 * (abnormal operation, e.g. after a reset). 58 * 59 * Context submission: 60 * The i915 updates the LRC tail value in memory. The i915 must enable the 61 * scheduling of the context within the GuC for the GuC to actually consider it. 62 * Therefore, the first time a disabled context is submitted we use a schedule 63 * enable H2G, while follow up submissions are done via the context submit H2G, 64 * which informs the GuC that a previously enabled context has new work 65 * available. 66 * 67 * Context unpin: 68 * To unpin a context a H2G is used to disable scheduling. When the 69 * corresponding G2H returns indicating the scheduling disable operation has 70 * completed it is safe to unpin the context. While a disable is in flight it 71 * isn't safe to resubmit the context so a fence is used to stall all future 72 * requests of that context until the G2H is returned. Because this interaction 73 * with the GuC takes a non-zero amount of time we delay the disabling of 74 * scheduling after the pin count goes to zero by a configurable period of time 75 * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of 76 * time to resubmit something on the context before doing this costly operation. 77 * This delay is only done if the context isn't closed and the guc_id usage is 78 * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD). 79 * 80 * Context deregistration: 81 * Before a context can be destroyed or if we steal its guc_id we must 82 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't 83 * safe to submit anything to this guc_id until the deregister completes so a 84 * fence is used to stall all requests associated with this guc_id until the 85 * corresponding G2H returns indicating the guc_id has been deregistered. 86 * 87 * submission_state.guc_ids: 88 * Unique number associated with private GuC context data passed in during 89 * context registration / submission / deregistration. 64k available. Simple ida 90 * is used for allocation. 91 * 92 * Stealing guc_ids: 93 * If no guc_ids are available they can be stolen from another context at 94 * request creation time if that context is unpinned. If a guc_id can't be found 95 * we punt this problem to the user as we believe this is near impossible to hit 96 * during normal use cases. 97 * 98 * Locking: 99 * In the GuC submission code we have 3 basic spin locks which protect 100 * everything. Details about each below. 101 * 102 * sched_engine->lock 103 * This is the submission lock for all contexts that share an i915 schedule 104 * engine (sched_engine), thus only one of the contexts which share a 105 * sched_engine can be submitting at a time. Currently only one sched_engine is 106 * used for all of GuC submission but that could change in the future. 107 * 108 * guc->submission_state.lock 109 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts 110 * list. 111 * 112 * ce->guc_state.lock 113 * Protects everything under ce->guc_state. Ensures that a context is in the 114 * correct state before issuing a H2G. e.g. We don't issue a schedule disable 115 * on a disabled context (bad idea), we don't issue a schedule enable when a 116 * schedule disable is in flight, etc... Also protects list of inflight requests 117 * on the context and the priority management state. Lock is individual to each 118 * context. 119 * 120 * Lock ordering rules: 121 * sched_engine->lock -> ce->guc_state.lock 122 * guc->submission_state.lock -> ce->guc_state.lock 123 * 124 * Reset races: 125 * When a full GT reset is triggered it is assumed that some G2H responses to 126 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be 127 * fatal as we do certain operations upon receiving a G2H (e.g. destroy 128 * contexts, release guc_ids, etc...). When this occurs we can scrub the 129 * context state and cleanup appropriately, however this is quite racey. 130 * To avoid races, the reset code must disable submission before scrubbing for 131 * the missing G2H, while the submission code must check for submission being 132 * disabled and skip sending H2Gs and updating context states when it is. Both 133 * sides must also make sure to hold the relevant locks. 134 */ 135 136 /* GuC Virtual Engine */ 137 struct guc_virtual_engine { 138 struct intel_engine_cs base; 139 struct intel_context context; 140 }; 141 142 static struct intel_context * 143 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 144 unsigned long flags); 145 146 static struct intel_context * 147 guc_create_parallel(struct intel_engine_cs **engines, 148 unsigned int num_siblings, 149 unsigned int width); 150 151 #define GUC_REQUEST_SIZE 64 /* bytes */ 152 153 /* 154 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous 155 * per the GuC submission interface. A different allocation algorithm is used 156 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to 157 * partition the guc_id space. We believe the number of multi-lrc contexts in 158 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for 159 * multi-lrc. 160 */ 161 #define NUMBER_MULTI_LRC_GUC_ID(guc) \ 162 ((guc)->submission_state.num_guc_ids / 16) 163 164 /* 165 * Below is a set of functions which control the GuC scheduling state which 166 * require a lock. 167 */ 168 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 169 #define SCHED_STATE_DESTROYED BIT(1) 170 #define SCHED_STATE_PENDING_DISABLE BIT(2) 171 #define SCHED_STATE_BANNED BIT(3) 172 #define SCHED_STATE_ENABLED BIT(4) 173 #define SCHED_STATE_PENDING_ENABLE BIT(5) 174 #define SCHED_STATE_REGISTERED BIT(6) 175 #define SCHED_STATE_POLICY_REQUIRED BIT(7) 176 #define SCHED_STATE_CLOSED BIT(8) 177 #define SCHED_STATE_BLOCKED_SHIFT 9 178 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) 179 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) 180 181 static inline void init_sched_state(struct intel_context *ce) 182 { 183 lockdep_assert_held(&ce->guc_state.lock); 184 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; 185 } 186 187 /* 188 * Kernel contexts can have SCHED_STATE_REGISTERED after suspend. 189 * A context close can race with the submission path, so SCHED_STATE_CLOSED 190 * can be set immediately before we try to register. 191 */ 192 #define SCHED_STATE_VALID_INIT \ 193 (SCHED_STATE_BLOCKED_MASK | \ 194 SCHED_STATE_CLOSED | \ 195 SCHED_STATE_REGISTERED) 196 197 __maybe_unused 198 static bool sched_state_is_init(struct intel_context *ce) 199 { 200 return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT); 201 } 202 203 static inline bool 204 context_wait_for_deregister_to_register(struct intel_context *ce) 205 { 206 return ce->guc_state.sched_state & 207 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 208 } 209 210 static inline void 211 set_context_wait_for_deregister_to_register(struct intel_context *ce) 212 { 213 lockdep_assert_held(&ce->guc_state.lock); 214 ce->guc_state.sched_state |= 215 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 216 } 217 218 static inline void 219 clr_context_wait_for_deregister_to_register(struct intel_context *ce) 220 { 221 lockdep_assert_held(&ce->guc_state.lock); 222 ce->guc_state.sched_state &= 223 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 224 } 225 226 static inline bool 227 context_destroyed(struct intel_context *ce) 228 { 229 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 230 } 231 232 static inline void 233 set_context_destroyed(struct intel_context *ce) 234 { 235 lockdep_assert_held(&ce->guc_state.lock); 236 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 237 } 238 239 static inline bool context_pending_disable(struct intel_context *ce) 240 { 241 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; 242 } 243 244 static inline void set_context_pending_disable(struct intel_context *ce) 245 { 246 lockdep_assert_held(&ce->guc_state.lock); 247 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; 248 } 249 250 static inline void clr_context_pending_disable(struct intel_context *ce) 251 { 252 lockdep_assert_held(&ce->guc_state.lock); 253 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; 254 } 255 256 static inline bool context_banned(struct intel_context *ce) 257 { 258 return ce->guc_state.sched_state & SCHED_STATE_BANNED; 259 } 260 261 static inline void set_context_banned(struct intel_context *ce) 262 { 263 lockdep_assert_held(&ce->guc_state.lock); 264 ce->guc_state.sched_state |= SCHED_STATE_BANNED; 265 } 266 267 static inline void clr_context_banned(struct intel_context *ce) 268 { 269 lockdep_assert_held(&ce->guc_state.lock); 270 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; 271 } 272 273 static inline bool context_enabled(struct intel_context *ce) 274 { 275 return ce->guc_state.sched_state & SCHED_STATE_ENABLED; 276 } 277 278 static inline void set_context_enabled(struct intel_context *ce) 279 { 280 lockdep_assert_held(&ce->guc_state.lock); 281 ce->guc_state.sched_state |= SCHED_STATE_ENABLED; 282 } 283 284 static inline void clr_context_enabled(struct intel_context *ce) 285 { 286 lockdep_assert_held(&ce->guc_state.lock); 287 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; 288 } 289 290 static inline bool context_pending_enable(struct intel_context *ce) 291 { 292 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; 293 } 294 295 static inline void set_context_pending_enable(struct intel_context *ce) 296 { 297 lockdep_assert_held(&ce->guc_state.lock); 298 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; 299 } 300 301 static inline void clr_context_pending_enable(struct intel_context *ce) 302 { 303 lockdep_assert_held(&ce->guc_state.lock); 304 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; 305 } 306 307 static inline bool context_registered(struct intel_context *ce) 308 { 309 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; 310 } 311 312 static inline void set_context_registered(struct intel_context *ce) 313 { 314 lockdep_assert_held(&ce->guc_state.lock); 315 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; 316 } 317 318 static inline void clr_context_registered(struct intel_context *ce) 319 { 320 lockdep_assert_held(&ce->guc_state.lock); 321 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; 322 } 323 324 static inline bool context_policy_required(struct intel_context *ce) 325 { 326 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED; 327 } 328 329 static inline void set_context_policy_required(struct intel_context *ce) 330 { 331 lockdep_assert_held(&ce->guc_state.lock); 332 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED; 333 } 334 335 static inline void clr_context_policy_required(struct intel_context *ce) 336 { 337 lockdep_assert_held(&ce->guc_state.lock); 338 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; 339 } 340 341 static inline bool context_close_done(struct intel_context *ce) 342 { 343 return ce->guc_state.sched_state & SCHED_STATE_CLOSED; 344 } 345 346 static inline void set_context_close_done(struct intel_context *ce) 347 { 348 lockdep_assert_held(&ce->guc_state.lock); 349 ce->guc_state.sched_state |= SCHED_STATE_CLOSED; 350 } 351 352 static inline u32 context_blocked(struct intel_context *ce) 353 { 354 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> 355 SCHED_STATE_BLOCKED_SHIFT; 356 } 357 358 static inline void incr_context_blocked(struct intel_context *ce) 359 { 360 lockdep_assert_held(&ce->guc_state.lock); 361 362 ce->guc_state.sched_state += SCHED_STATE_BLOCKED; 363 364 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ 365 } 366 367 static inline void decr_context_blocked(struct intel_context *ce) 368 { 369 lockdep_assert_held(&ce->guc_state.lock); 370 371 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ 372 373 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; 374 } 375 376 static struct intel_context * 377 request_to_scheduling_context(struct i915_request *rq) 378 { 379 return intel_context_to_parent(rq->context); 380 } 381 382 static inline bool context_guc_id_invalid(struct intel_context *ce) 383 { 384 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID; 385 } 386 387 static inline void set_context_guc_id_invalid(struct intel_context *ce) 388 { 389 ce->guc_id.id = GUC_INVALID_CONTEXT_ID; 390 } 391 392 static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 393 { 394 return &ce->engine->gt->uc.guc; 395 } 396 397 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 398 { 399 return rb_entry(rb, struct i915_priolist, node); 400 } 401 402 /* 403 * When using multi-lrc submission a scratch memory area is reserved in the 404 * parent's context state for the process descriptor, work queue, and handshake 405 * between the parent + children contexts to insert safe preemption points 406 * between each of the BBs. Currently the scratch area is sized to a page. 407 * 408 * The layout of this scratch area is below: 409 * 0 guc_process_desc 410 * + sizeof(struct guc_process_desc) child go 411 * + CACHELINE_BYTES child join[0] 412 * ... 413 * + CACHELINE_BYTES child join[n - 1] 414 * ... unused 415 * PARENT_SCRATCH_SIZE / 2 work queue start 416 * ... work queue 417 * PARENT_SCRATCH_SIZE - 1 work queue end 418 */ 419 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) 420 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) 421 422 struct sync_semaphore { 423 u32 semaphore; 424 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 425 }; 426 427 struct parent_scratch { 428 union guc_descs { 429 struct guc_sched_wq_desc wq_desc; 430 struct guc_process_desc_v69 pdesc; 431 } descs; 432 433 struct sync_semaphore go; 434 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; 435 436 u8 unused[WQ_OFFSET - sizeof(union guc_descs) - 437 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; 438 439 u32 wq[WQ_SIZE / sizeof(u32)]; 440 }; 441 442 static u32 __get_parent_scratch_offset(struct intel_context *ce) 443 { 444 GEM_BUG_ON(!ce->parallel.guc.parent_page); 445 446 return ce->parallel.guc.parent_page * PAGE_SIZE; 447 } 448 449 static u32 __get_wq_offset(struct intel_context *ce) 450 { 451 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); 452 453 return __get_parent_scratch_offset(ce) + WQ_OFFSET; 454 } 455 456 static struct parent_scratch * 457 __get_parent_scratch(struct intel_context *ce) 458 { 459 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); 460 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); 461 462 /* 463 * Need to subtract LRC_STATE_OFFSET here as the 464 * parallel.guc.parent_page is the offset into ce->state while 465 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. 466 */ 467 return (struct parent_scratch *) 468 (ce->lrc_reg_state + 469 ((__get_parent_scratch_offset(ce) - 470 LRC_STATE_OFFSET) / sizeof(u32))); 471 } 472 473 static struct guc_process_desc_v69 * 474 __get_process_desc_v69(struct intel_context *ce) 475 { 476 struct parent_scratch *ps = __get_parent_scratch(ce); 477 478 return &ps->descs.pdesc; 479 } 480 481 static struct guc_sched_wq_desc * 482 __get_wq_desc_v70(struct intel_context *ce) 483 { 484 struct parent_scratch *ps = __get_parent_scratch(ce); 485 486 return &ps->descs.wq_desc; 487 } 488 489 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size) 490 { 491 /* 492 * Check for space in work queue. Caching a value of head pointer in 493 * intel_context structure in order reduce the number accesses to shared 494 * GPU memory which may be across a PCIe bus. 495 */ 496 #define AVAILABLE_SPACE \ 497 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) 498 if (wqi_size > AVAILABLE_SPACE) { 499 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head); 500 501 if (wqi_size > AVAILABLE_SPACE) 502 return NULL; 503 } 504 #undef AVAILABLE_SPACE 505 506 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; 507 } 508 509 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) 510 { 511 struct intel_context *ce = xa_load(&guc->context_lookup, id); 512 513 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID); 514 515 return ce; 516 } 517 518 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index) 519 { 520 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69; 521 522 if (!base) 523 return NULL; 524 525 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); 526 527 return &base[index]; 528 } 529 530 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc) 531 { 532 u32 size; 533 int ret; 534 535 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) * 536 GUC_MAX_CONTEXT_ID); 537 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69, 538 (void **)&guc->lrc_desc_pool_vaddr_v69); 539 if (ret) 540 return ret; 541 542 return 0; 543 } 544 545 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc) 546 { 547 if (!guc->lrc_desc_pool_vaddr_v69) 548 return; 549 550 guc->lrc_desc_pool_vaddr_v69 = NULL; 551 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP); 552 } 553 554 static inline bool guc_submission_initialized(struct intel_guc *guc) 555 { 556 return guc->submission_initialized; 557 } 558 559 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id) 560 { 561 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id); 562 563 if (desc) 564 memset(desc, 0, sizeof(*desc)); 565 } 566 567 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) 568 { 569 return __get_context(guc, id); 570 } 571 572 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id, 573 struct intel_context *ce) 574 { 575 unsigned long flags; 576 577 /* 578 * xarray API doesn't have xa_save_irqsave wrapper, so calling the 579 * lower level functions directly. 580 */ 581 xa_lock_irqsave(&guc->context_lookup, flags); 582 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); 583 xa_unlock_irqrestore(&guc->context_lookup, flags); 584 } 585 586 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) 587 { 588 unsigned long flags; 589 590 if (unlikely(!guc_submission_initialized(guc))) 591 return; 592 593 _reset_lrc_desc_v69(guc, id); 594 595 /* 596 * xarray API doesn't have xa_erase_irqsave wrapper, so calling 597 * the lower level functions directly. 598 */ 599 xa_lock_irqsave(&guc->context_lookup, flags); 600 __xa_erase(&guc->context_lookup, id); 601 xa_unlock_irqrestore(&guc->context_lookup, flags); 602 } 603 604 static void decr_outstanding_submission_g2h(struct intel_guc *guc) 605 { 606 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) 607 wake_up_all(&guc->ct.wq); 608 } 609 610 static int guc_submission_send_busy_loop(struct intel_guc *guc, 611 const u32 *action, 612 u32 len, 613 u32 g2h_len_dw, 614 bool loop) 615 { 616 /* 617 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), 618 * so we don't handle the case where we don't get a reply because we 619 * aborted the send due to the channel being busy. 620 */ 621 GEM_BUG_ON(g2h_len_dw && !loop); 622 623 if (g2h_len_dw) 624 atomic_inc(&guc->outstanding_submission_g2h); 625 626 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); 627 } 628 629 int intel_guc_wait_for_pending_msg(struct intel_guc *guc, 630 atomic_t *wait_var, 631 bool interruptible, 632 long timeout) 633 { 634 const int state = interruptible ? 635 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 636 DEFINE_WAIT(wait); 637 638 might_sleep(); 639 GEM_BUG_ON(timeout < 0); 640 641 if (!atomic_read(wait_var)) 642 return 0; 643 644 if (!timeout) 645 return -ETIME; 646 647 for (;;) { 648 prepare_to_wait(&guc->ct.wq, &wait, state); 649 650 if (!atomic_read(wait_var)) 651 break; 652 653 if (signal_pending_state(state, current)) { 654 timeout = -EINTR; 655 break; 656 } 657 658 if (!timeout) { 659 timeout = -ETIME; 660 break; 661 } 662 663 timeout = io_schedule_timeout(timeout); 664 } 665 finish_wait(&guc->ct.wq, &wait); 666 667 return (timeout < 0) ? timeout : 0; 668 } 669 670 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) 671 { 672 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) 673 return 0; 674 675 return intel_guc_wait_for_pending_msg(guc, 676 &guc->outstanding_submission_g2h, 677 true, timeout); 678 } 679 680 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop); 681 static int try_context_registration(struct intel_context *ce, bool loop); 682 683 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) 684 { 685 int err = 0; 686 struct intel_context *ce = request_to_scheduling_context(rq); 687 u32 action[3]; 688 int len = 0; 689 u32 g2h_len_dw = 0; 690 bool enabled; 691 692 lockdep_assert_held(&rq->engine->sched_engine->lock); 693 694 /* 695 * Corner case where requests were sitting in the priority list or a 696 * request resubmitted after the context was banned. 697 */ 698 if (unlikely(!intel_context_is_schedulable(ce))) { 699 i915_request_put(i915_request_mark_eio(rq)); 700 intel_engine_signal_breadcrumbs(ce->engine); 701 return 0; 702 } 703 704 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 705 GEM_BUG_ON(context_guc_id_invalid(ce)); 706 707 if (context_policy_required(ce)) { 708 err = guc_context_policy_init_v70(ce, false); 709 if (err) 710 return err; 711 } 712 713 spin_lock(&ce->guc_state.lock); 714 715 /* 716 * The request / context will be run on the hardware when scheduling 717 * gets enabled in the unblock. For multi-lrc we still submit the 718 * context to move the LRC tails. 719 */ 720 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) 721 goto out; 722 723 enabled = context_enabled(ce) || context_blocked(ce); 724 725 if (!enabled) { 726 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 727 action[len++] = ce->guc_id.id; 728 action[len++] = GUC_CONTEXT_ENABLE; 729 set_context_pending_enable(ce); 730 intel_context_get(ce); 731 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 732 } else { 733 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 734 action[len++] = ce->guc_id.id; 735 } 736 737 err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 738 if (!enabled && !err) { 739 trace_intel_context_sched_enable(ce); 740 atomic_inc(&guc->outstanding_submission_g2h); 741 set_context_enabled(ce); 742 743 /* 744 * Without multi-lrc KMD does the submission step (moving the 745 * lrc tail) so enabling scheduling is sufficient to submit the 746 * context. This isn't the case in multi-lrc submission as the 747 * GuC needs to move the tails, hence the need for another H2G 748 * to submit a multi-lrc context after enabling scheduling. 749 */ 750 if (intel_context_is_parent(ce)) { 751 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; 752 err = intel_guc_send_nb(guc, action, len - 1, 0); 753 } 754 } else if (!enabled) { 755 clr_context_pending_enable(ce); 756 intel_context_put(ce); 757 } 758 if (likely(!err)) 759 trace_i915_request_guc_submit(rq); 760 761 out: 762 spin_unlock(&ce->guc_state.lock); 763 return err; 764 } 765 766 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) 767 { 768 int ret = __guc_add_request(guc, rq); 769 770 if (unlikely(ret == -EBUSY)) { 771 guc->stalled_request = rq; 772 guc->submission_stall_reason = STALL_ADD_REQUEST; 773 } 774 775 return ret; 776 } 777 778 static inline void guc_set_lrc_tail(struct i915_request *rq) 779 { 780 rq->context->lrc_reg_state[CTX_RING_TAIL] = 781 intel_ring_set_tail(rq->ring, rq->tail); 782 } 783 784 static inline int rq_prio(const struct i915_request *rq) 785 { 786 return rq->sched.attr.priority; 787 } 788 789 static bool is_multi_lrc_rq(struct i915_request *rq) 790 { 791 return intel_context_is_parallel(rq->context); 792 } 793 794 static bool can_merge_rq(struct i915_request *rq, 795 struct i915_request *last) 796 { 797 return request_to_scheduling_context(rq) == 798 request_to_scheduling_context(last); 799 } 800 801 static u32 wq_space_until_wrap(struct intel_context *ce) 802 { 803 return (WQ_SIZE - ce->parallel.guc.wqi_tail); 804 } 805 806 static void write_wqi(struct intel_context *ce, u32 wqi_size) 807 { 808 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); 809 810 /* 811 * Ensure WQI are visible before updating tail 812 */ 813 intel_guc_write_barrier(ce_to_guc(ce)); 814 815 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & 816 (WQ_SIZE - 1); 817 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail); 818 } 819 820 static int guc_wq_noop_append(struct intel_context *ce) 821 { 822 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce)); 823 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; 824 825 if (!wqi) 826 return -EBUSY; 827 828 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 829 830 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 831 FIELD_PREP(WQ_LEN_MASK, len_dw); 832 ce->parallel.guc.wqi_tail = 0; 833 834 return 0; 835 } 836 837 static int __guc_wq_item_append(struct i915_request *rq) 838 { 839 struct intel_context *ce = request_to_scheduling_context(rq); 840 struct intel_context *child; 841 unsigned int wqi_size = (ce->parallel.number_children + 4) * 842 sizeof(u32); 843 u32 *wqi; 844 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 845 int ret; 846 847 /* Ensure context is in correct state updating work queue */ 848 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 849 GEM_BUG_ON(context_guc_id_invalid(ce)); 850 GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); 851 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)); 852 853 /* Insert NOOP if this work queue item will wrap the tail pointer. */ 854 if (wqi_size > wq_space_until_wrap(ce)) { 855 ret = guc_wq_noop_append(ce); 856 if (ret) 857 return ret; 858 } 859 860 wqi = get_wq_pointer(ce, wqi_size); 861 if (!wqi) 862 return -EBUSY; 863 864 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 865 866 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 867 FIELD_PREP(WQ_LEN_MASK, len_dw); 868 *wqi++ = ce->lrc.lrca; 869 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | 870 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); 871 *wqi++ = 0; /* fence_id */ 872 for_each_child(ce, child) 873 *wqi++ = child->ring->tail / sizeof(u64); 874 875 write_wqi(ce, wqi_size); 876 877 return 0; 878 } 879 880 static int guc_wq_item_append(struct intel_guc *guc, 881 struct i915_request *rq) 882 { 883 struct intel_context *ce = request_to_scheduling_context(rq); 884 int ret; 885 886 if (unlikely(!intel_context_is_schedulable(ce))) 887 return 0; 888 889 ret = __guc_wq_item_append(rq); 890 if (unlikely(ret == -EBUSY)) { 891 guc->stalled_request = rq; 892 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; 893 } 894 895 return ret; 896 } 897 898 static bool multi_lrc_submit(struct i915_request *rq) 899 { 900 struct intel_context *ce = request_to_scheduling_context(rq); 901 902 intel_ring_set_tail(rq->ring, rq->tail); 903 904 /* 905 * We expect the front end (execbuf IOCTL) to set this flag on the last 906 * request generated from a multi-BB submission. This indicates to the 907 * backend (GuC interface) that we should submit this context thus 908 * submitting all the requests generated in parallel. 909 */ 910 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || 911 !intel_context_is_schedulable(ce); 912 } 913 914 static int guc_dequeue_one_context(struct intel_guc *guc) 915 { 916 struct i915_sched_engine * const sched_engine = guc->sched_engine; 917 struct i915_request *last = NULL; 918 bool submit = false; 919 struct rb_node *rb; 920 int ret; 921 922 lockdep_assert_held(&sched_engine->lock); 923 924 if (guc->stalled_request) { 925 submit = true; 926 last = guc->stalled_request; 927 928 switch (guc->submission_stall_reason) { 929 case STALL_REGISTER_CONTEXT: 930 goto register_context; 931 case STALL_MOVE_LRC_TAIL: 932 goto move_lrc_tail; 933 case STALL_ADD_REQUEST: 934 goto add_request; 935 default: 936 MISSING_CASE(guc->submission_stall_reason); 937 } 938 } 939 940 while ((rb = rb_first_cached(&sched_engine->queue))) { 941 struct i915_priolist *p = to_priolist(rb); 942 struct i915_request *rq, *rn; 943 944 priolist_for_each_request_consume(rq, rn, p) { 945 if (last && !can_merge_rq(rq, last)) 946 goto register_context; 947 948 list_del_init(&rq->sched.link); 949 950 __i915_request_submit(rq); 951 952 trace_i915_request_in(rq, 0); 953 last = rq; 954 955 if (is_multi_lrc_rq(rq)) { 956 /* 957 * We need to coalesce all multi-lrc requests in 958 * a relationship into a single H2G. We are 959 * guaranteed that all of these requests will be 960 * submitted sequentially. 961 */ 962 if (multi_lrc_submit(rq)) { 963 submit = true; 964 goto register_context; 965 } 966 } else { 967 submit = true; 968 } 969 } 970 971 rb_erase_cached(&p->node, &sched_engine->queue); 972 i915_priolist_free(p); 973 } 974 975 register_context: 976 if (submit) { 977 struct intel_context *ce = request_to_scheduling_context(last); 978 979 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) && 980 intel_context_is_schedulable(ce))) { 981 ret = try_context_registration(ce, false); 982 if (unlikely(ret == -EPIPE)) { 983 goto deadlk; 984 } else if (ret == -EBUSY) { 985 guc->stalled_request = last; 986 guc->submission_stall_reason = 987 STALL_REGISTER_CONTEXT; 988 goto schedule_tasklet; 989 } else if (ret != 0) { 990 GEM_WARN_ON(ret); /* Unexpected */ 991 goto deadlk; 992 } 993 } 994 995 move_lrc_tail: 996 if (is_multi_lrc_rq(last)) { 997 ret = guc_wq_item_append(guc, last); 998 if (ret == -EBUSY) { 999 goto schedule_tasklet; 1000 } else if (ret != 0) { 1001 GEM_WARN_ON(ret); /* Unexpected */ 1002 goto deadlk; 1003 } 1004 } else { 1005 guc_set_lrc_tail(last); 1006 } 1007 1008 add_request: 1009 ret = guc_add_request(guc, last); 1010 if (unlikely(ret == -EPIPE)) { 1011 goto deadlk; 1012 } else if (ret == -EBUSY) { 1013 goto schedule_tasklet; 1014 } else if (ret != 0) { 1015 GEM_WARN_ON(ret); /* Unexpected */ 1016 goto deadlk; 1017 } 1018 } 1019 1020 guc->stalled_request = NULL; 1021 guc->submission_stall_reason = STALL_NONE; 1022 return submit; 1023 1024 deadlk: 1025 sched_engine->tasklet.callback = NULL; 1026 tasklet_disable_nosync(&sched_engine->tasklet); 1027 return false; 1028 1029 schedule_tasklet: 1030 tasklet_schedule(&sched_engine->tasklet); 1031 return false; 1032 } 1033 1034 static void guc_submission_tasklet(struct tasklet_struct *t) 1035 { 1036 struct i915_sched_engine *sched_engine = 1037 from_tasklet(sched_engine, t, tasklet); 1038 unsigned long flags; 1039 bool loop; 1040 1041 spin_lock_irqsave(&sched_engine->lock, flags); 1042 1043 do { 1044 loop = guc_dequeue_one_context(sched_engine->private_data); 1045 } while (loop); 1046 1047 i915_sched_engine_reset_on_empty(sched_engine); 1048 1049 spin_unlock_irqrestore(&sched_engine->lock, flags); 1050 } 1051 1052 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) 1053 { 1054 if (iir & GT_RENDER_USER_INTERRUPT) 1055 intel_engine_signal_breadcrumbs(engine); 1056 } 1057 1058 static void __guc_context_destroy(struct intel_context *ce); 1059 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); 1060 static void guc_signal_context_fence(struct intel_context *ce); 1061 static void guc_cancel_context_requests(struct intel_context *ce); 1062 static void guc_blocked_fence_complete(struct intel_context *ce); 1063 1064 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) 1065 { 1066 struct intel_context *ce; 1067 unsigned long index, flags; 1068 bool pending_disable, pending_enable, deregister, destroyed, banned; 1069 1070 xa_lock_irqsave(&guc->context_lookup, flags); 1071 xa_for_each(&guc->context_lookup, index, ce) { 1072 /* 1073 * Corner case where the ref count on the object is zero but and 1074 * deregister G2H was lost. In this case we don't touch the ref 1075 * count and finish the destroy of the context. 1076 */ 1077 bool do_put = kref_get_unless_zero(&ce->ref); 1078 1079 xa_unlock(&guc->context_lookup); 1080 1081 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 1082 (cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) { 1083 /* successful cancel so jump straight to close it */ 1084 intel_context_sched_disable_unpin(ce); 1085 } 1086 1087 spin_lock(&ce->guc_state.lock); 1088 1089 /* 1090 * Once we are at this point submission_disabled() is guaranteed 1091 * to be visible to all callers who set the below flags (see above 1092 * flush and flushes in reset_prepare). If submission_disabled() 1093 * is set, the caller shouldn't set these flags. 1094 */ 1095 1096 destroyed = context_destroyed(ce); 1097 pending_enable = context_pending_enable(ce); 1098 pending_disable = context_pending_disable(ce); 1099 deregister = context_wait_for_deregister_to_register(ce); 1100 banned = context_banned(ce); 1101 init_sched_state(ce); 1102 1103 spin_unlock(&ce->guc_state.lock); 1104 1105 if (pending_enable || destroyed || deregister) { 1106 decr_outstanding_submission_g2h(guc); 1107 if (deregister) 1108 guc_signal_context_fence(ce); 1109 if (destroyed) { 1110 intel_gt_pm_put_async_untracked(guc_to_gt(guc)); 1111 release_guc_id(guc, ce); 1112 __guc_context_destroy(ce); 1113 } 1114 if (pending_enable || deregister) 1115 intel_context_put(ce); 1116 } 1117 1118 /* Not mutualy exclusive with above if statement. */ 1119 if (pending_disable) { 1120 guc_signal_context_fence(ce); 1121 if (banned) { 1122 guc_cancel_context_requests(ce); 1123 intel_engine_signal_breadcrumbs(ce->engine); 1124 } 1125 intel_context_sched_disable_unpin(ce); 1126 decr_outstanding_submission_g2h(guc); 1127 1128 spin_lock(&ce->guc_state.lock); 1129 guc_blocked_fence_complete(ce); 1130 spin_unlock(&ce->guc_state.lock); 1131 1132 intel_context_put(ce); 1133 } 1134 1135 if (do_put) 1136 intel_context_put(ce); 1137 xa_lock(&guc->context_lookup); 1138 } 1139 xa_unlock_irqrestore(&guc->context_lookup, flags); 1140 } 1141 1142 /* 1143 * GuC stores busyness stats for each engine at context in/out boundaries. A 1144 * context 'in' logs execution start time, 'out' adds in -> out delta to total. 1145 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with 1146 * GuC. 1147 * 1148 * __i915_pmu_event_read samples engine busyness. When sampling, if context id 1149 * is valid (!= ~0) and start is non-zero, the engine is considered to be 1150 * active. For an active engine total busyness = total + (now - start), where 1151 * 'now' is the time at which the busyness is sampled. For inactive engine, 1152 * total busyness = total. 1153 * 1154 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. 1155 * 1156 * The start and total values provided by GuC are 32 bits and wrap around in a 1157 * few minutes. Since perf pmu provides busyness as 64 bit monotonically 1158 * increasing ns values, there is a need for this implementation to account for 1159 * overflows and extend the GuC provided values to 64 bits before returning 1160 * busyness to the user. In order to do that, a worker runs periodically at 1161 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in 1162 * 27 seconds for a gt clock frequency of 19.2 MHz). 1163 */ 1164 1165 #define WRAP_TIME_CLKS U32_MAX 1166 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) 1167 1168 static void 1169 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) 1170 { 1171 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1172 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); 1173 1174 if (new_start == lower_32_bits(*prev_start)) 1175 return; 1176 1177 /* 1178 * When gt is unparked, we update the gt timestamp and start the ping 1179 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt 1180 * is unparked, all switched in contexts will have a start time that is 1181 * within +/- POLL_TIME_CLKS of the most recent gt_stamp. 1182 * 1183 * If neither gt_stamp nor new_start has rolled over, then the 1184 * gt_stamp_hi does not need to be adjusted, however if one of them has 1185 * rolled over, we need to adjust gt_stamp_hi accordingly. 1186 * 1187 * The below conditions address the cases of new_start rollover and 1188 * gt_stamp_last rollover respectively. 1189 */ 1190 if (new_start < gt_stamp_last && 1191 (new_start - gt_stamp_last) <= POLL_TIME_CLKS) 1192 gt_stamp_hi++; 1193 1194 if (new_start > gt_stamp_last && 1195 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) 1196 gt_stamp_hi--; 1197 1198 *prev_start = ((u64)gt_stamp_hi << 32) | new_start; 1199 } 1200 1201 #define record_read(map_, field_) \ 1202 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_) 1203 1204 /* 1205 * GuC updates shared memory and KMD reads it. Since this is not synchronized, 1206 * we run into a race where the value read is inconsistent. Sometimes the 1207 * inconsistency is in reading the upper MSB bytes of the last_in value when 1208 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper 1209 * 24 bits are zero. Since these are non-zero values, it is non-trivial to 1210 * determine validity of these values. Instead we read the values multiple times 1211 * until they are consistent. In test runs, 3 attempts results in consistent 1212 * values. The upper bound is set to 6 attempts and may need to be tuned as per 1213 * any new occurences. 1214 */ 1215 static void __get_engine_usage_record(struct intel_engine_cs *engine, 1216 u32 *last_in, u32 *id, u32 *total) 1217 { 1218 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); 1219 int i = 0; 1220 1221 do { 1222 *last_in = record_read(&rec_map, last_switch_in_stamp); 1223 *id = record_read(&rec_map, current_context_index); 1224 *total = record_read(&rec_map, total_runtime); 1225 1226 if (record_read(&rec_map, last_switch_in_stamp) == *last_in && 1227 record_read(&rec_map, current_context_index) == *id && 1228 record_read(&rec_map, total_runtime) == *total) 1229 break; 1230 } while (++i < 6); 1231 } 1232 1233 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) 1234 { 1235 struct intel_engine_guc_stats *stats = &engine->stats.guc; 1236 struct intel_guc *guc = &engine->gt->uc.guc; 1237 u32 last_switch, ctx_id, total; 1238 1239 lockdep_assert_held(&guc->timestamp.lock); 1240 1241 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); 1242 1243 stats->running = ctx_id != ~0U && last_switch; 1244 if (stats->running) 1245 __extend_last_switch(guc, &stats->start_gt_clk, last_switch); 1246 1247 /* 1248 * Instead of adjusting the total for overflow, just add the 1249 * difference from previous sample stats->total_gt_clks 1250 */ 1251 if (total && total != ~0U) { 1252 stats->total_gt_clks += (u32)(total - stats->prev_total); 1253 stats->prev_total = total; 1254 } 1255 } 1256 1257 static u32 gpm_timestamp_shift(struct intel_gt *gt) 1258 { 1259 intel_wakeref_t wakeref; 1260 u32 reg, shift; 1261 1262 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 1263 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); 1264 1265 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> 1266 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; 1267 1268 return 3 - shift; 1269 } 1270 1271 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) 1272 { 1273 struct intel_gt *gt = guc_to_gt(guc); 1274 u32 gt_stamp_lo, gt_stamp_hi; 1275 u64 gpm_ts; 1276 1277 lockdep_assert_held(&guc->timestamp.lock); 1278 1279 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1280 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0, 1281 MISC_STATUS1) >> guc->timestamp.shift; 1282 gt_stamp_lo = lower_32_bits(gpm_ts); 1283 *now = ktime_get(); 1284 1285 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) 1286 gt_stamp_hi++; 1287 1288 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; 1289 } 1290 1291 /* 1292 * Unlike the execlist mode of submission total and active times are in terms of 1293 * gt clocks. The *now parameter is retained to return the cpu time at which the 1294 * busyness was sampled. 1295 */ 1296 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) 1297 { 1298 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; 1299 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; 1300 struct intel_gt *gt = engine->gt; 1301 struct intel_guc *guc = >->uc.guc; 1302 u64 total, gt_stamp_saved; 1303 unsigned long flags; 1304 u32 reset_count; 1305 bool in_reset; 1306 intel_wakeref_t wakeref; 1307 1308 spin_lock_irqsave(&guc->timestamp.lock, flags); 1309 1310 /* 1311 * If a reset happened, we risk reading partially updated engine 1312 * busyness from GuC, so we just use the driver stored copy of busyness. 1313 * Synchronize with gt reset using reset_count and the 1314 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count 1315 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is 1316 * usable by checking the flag afterwards. 1317 */ 1318 reset_count = i915_reset_count(gpu_error); 1319 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); 1320 1321 *now = ktime_get(); 1322 1323 /* 1324 * The active busyness depends on start_gt_clk and gt_stamp. 1325 * gt_stamp is updated by i915 only when gt is awake and the 1326 * start_gt_clk is derived from GuC state. To get a consistent 1327 * view of activity, we query the GuC state only if gt is awake. 1328 */ 1329 wakeref = in_reset ? 0 : intel_gt_pm_get_if_awake(gt); 1330 if (wakeref) { 1331 stats_saved = *stats; 1332 gt_stamp_saved = guc->timestamp.gt_stamp; 1333 /* 1334 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - 1335 * start_gt_clk' calculation below for active engines. 1336 */ 1337 guc_update_engine_gt_clks(engine); 1338 guc_update_pm_timestamp(guc, now); 1339 intel_gt_pm_put_async(gt, wakeref); 1340 if (i915_reset_count(gpu_error) != reset_count) { 1341 *stats = stats_saved; 1342 guc->timestamp.gt_stamp = gt_stamp_saved; 1343 } 1344 } 1345 1346 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); 1347 if (stats->running) { 1348 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; 1349 1350 total += intel_gt_clock_interval_to_ns(gt, clk); 1351 } 1352 1353 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1354 1355 return ns_to_ktime(total); 1356 } 1357 1358 static void guc_enable_busyness_worker(struct intel_guc *guc) 1359 { 1360 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, guc->timestamp.ping_delay); 1361 } 1362 1363 static void guc_cancel_busyness_worker(struct intel_guc *guc) 1364 { 1365 cancel_delayed_work_sync(&guc->timestamp.work); 1366 } 1367 1368 static void __reset_guc_busyness_stats(struct intel_guc *guc) 1369 { 1370 struct intel_gt *gt = guc_to_gt(guc); 1371 struct intel_engine_cs *engine; 1372 enum intel_engine_id id; 1373 unsigned long flags; 1374 ktime_t unused; 1375 1376 guc_cancel_busyness_worker(guc); 1377 1378 spin_lock_irqsave(&guc->timestamp.lock, flags); 1379 1380 guc_update_pm_timestamp(guc, &unused); 1381 for_each_engine(engine, gt, id) { 1382 guc_update_engine_gt_clks(engine); 1383 engine->stats.guc.prev_total = 0; 1384 } 1385 1386 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1387 } 1388 1389 static void __update_guc_busyness_stats(struct intel_guc *guc) 1390 { 1391 struct intel_gt *gt = guc_to_gt(guc); 1392 struct intel_engine_cs *engine; 1393 enum intel_engine_id id; 1394 unsigned long flags; 1395 ktime_t unused; 1396 1397 guc->timestamp.last_stat_jiffies = jiffies; 1398 1399 spin_lock_irqsave(&guc->timestamp.lock, flags); 1400 1401 guc_update_pm_timestamp(guc, &unused); 1402 for_each_engine(engine, gt, id) 1403 guc_update_engine_gt_clks(engine); 1404 1405 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1406 } 1407 1408 static void __guc_context_update_stats(struct intel_context *ce) 1409 { 1410 struct intel_guc *guc = ce_to_guc(ce); 1411 unsigned long flags; 1412 1413 spin_lock_irqsave(&guc->timestamp.lock, flags); 1414 lrc_update_runtime(ce); 1415 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1416 } 1417 1418 static void guc_context_update_stats(struct intel_context *ce) 1419 { 1420 if (!intel_context_pin_if_active(ce)) 1421 return; 1422 1423 __guc_context_update_stats(ce); 1424 intel_context_unpin(ce); 1425 } 1426 1427 static void guc_timestamp_ping(struct work_struct *wrk) 1428 { 1429 struct intel_guc *guc = container_of(wrk, typeof(*guc), 1430 timestamp.work.work); 1431 struct intel_uc *uc = container_of(guc, typeof(*uc), guc); 1432 struct intel_gt *gt = guc_to_gt(guc); 1433 struct intel_context *ce; 1434 intel_wakeref_t wakeref; 1435 unsigned long index; 1436 int srcu, ret; 1437 1438 /* 1439 * Ideally the busyness worker should take a gt pm wakeref because the 1440 * worker only needs to be active while gt is awake. However, the 1441 * gt_park path cancels the worker synchronously and this complicates 1442 * the flow if the worker is also running at the same time. The cancel 1443 * waits for the worker and when the worker releases the wakeref, that 1444 * would call gt_park and would lead to a deadlock. 1445 * 1446 * The resolution is to take the global pm wakeref if runtime pm is 1447 * already active. If not, we don't need to update the busyness stats as 1448 * the stats would already be updated when the gt was parked. 1449 * 1450 * Note: 1451 * - We do not requeue the worker if we cannot take a reference to runtime 1452 * pm since intel_guc_busyness_unpark would requeue the worker in the 1453 * resume path. 1454 * 1455 * - If the gt was parked longer than time taken for GT timestamp to roll 1456 * over, we ignore those rollovers since we don't care about tracking 1457 * the exact GT time. We only care about roll overs when the gt is 1458 * active and running workloads. 1459 * 1460 * - There is a window of time between gt_park and runtime suspend, 1461 * where the worker may run. This is acceptable since the worker will 1462 * not find any new data to update busyness. 1463 */ 1464 wakeref = intel_runtime_pm_get_if_active(>->i915->runtime_pm); 1465 if (!wakeref) 1466 return; 1467 1468 /* 1469 * Synchronize with gt reset to make sure the worker does not 1470 * corrupt the engine/guc stats. NB: can't actually block waiting 1471 * for a reset to complete as the reset requires flushing out 1472 * this worker thread if started. So waiting would deadlock. 1473 */ 1474 ret = intel_gt_reset_trylock(gt, &srcu); 1475 if (ret) 1476 goto err_trylock; 1477 1478 __update_guc_busyness_stats(guc); 1479 1480 /* adjust context stats for overflow */ 1481 xa_for_each(&guc->context_lookup, index, ce) 1482 guc_context_update_stats(ce); 1483 1484 intel_gt_reset_unlock(gt, srcu); 1485 1486 guc_enable_busyness_worker(guc); 1487 1488 err_trylock: 1489 intel_runtime_pm_put(>->i915->runtime_pm, wakeref); 1490 } 1491 1492 static int guc_action_enable_usage_stats(struct intel_guc *guc) 1493 { 1494 u32 offset = intel_guc_engine_usage_offset(guc); 1495 u32 action[] = { 1496 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, 1497 offset, 1498 0, 1499 }; 1500 1501 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 1502 } 1503 1504 static int guc_init_engine_stats(struct intel_guc *guc) 1505 { 1506 struct intel_gt *gt = guc_to_gt(guc); 1507 intel_wakeref_t wakeref; 1508 int ret; 1509 1510 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) 1511 ret = guc_action_enable_usage_stats(guc); 1512 1513 if (ret) 1514 guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret)); 1515 else 1516 guc_enable_busyness_worker(guc); 1517 1518 return ret; 1519 } 1520 1521 static void guc_fini_engine_stats(struct intel_guc *guc) 1522 { 1523 guc_cancel_busyness_worker(guc); 1524 } 1525 1526 void intel_guc_busyness_park(struct intel_gt *gt) 1527 { 1528 struct intel_guc *guc = >->uc.guc; 1529 1530 if (!guc_submission_initialized(guc)) 1531 return; 1532 1533 /* 1534 * There is a race with suspend flow where the worker runs after suspend 1535 * and causes an unclaimed register access warning. Cancel the worker 1536 * synchronously here. 1537 */ 1538 guc_cancel_busyness_worker(guc); 1539 1540 /* 1541 * Before parking, we should sample engine busyness stats if we need to. 1542 * We can skip it if we are less than half a ping from the last time we 1543 * sampled the busyness stats. 1544 */ 1545 if (guc->timestamp.last_stat_jiffies && 1546 !time_after(jiffies, guc->timestamp.last_stat_jiffies + 1547 (guc->timestamp.ping_delay / 2))) 1548 return; 1549 1550 __update_guc_busyness_stats(guc); 1551 } 1552 1553 void intel_guc_busyness_unpark(struct intel_gt *gt) 1554 { 1555 struct intel_guc *guc = >->uc.guc; 1556 unsigned long flags; 1557 ktime_t unused; 1558 1559 if (!guc_submission_initialized(guc)) 1560 return; 1561 1562 spin_lock_irqsave(&guc->timestamp.lock, flags); 1563 guc_update_pm_timestamp(guc, &unused); 1564 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1565 guc_enable_busyness_worker(guc); 1566 } 1567 1568 static inline bool 1569 submission_disabled(struct intel_guc *guc) 1570 { 1571 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1572 1573 return unlikely(!sched_engine || 1574 !__tasklet_is_enabled(&sched_engine->tasklet) || 1575 intel_gt_is_wedged(guc_to_gt(guc))); 1576 } 1577 1578 static void disable_submission(struct intel_guc *guc) 1579 { 1580 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1581 1582 if (__tasklet_is_enabled(&sched_engine->tasklet)) { 1583 GEM_BUG_ON(!guc->ct.enabled); 1584 __tasklet_disable_sync_once(&sched_engine->tasklet); 1585 sched_engine->tasklet.callback = NULL; 1586 } 1587 } 1588 1589 static void enable_submission(struct intel_guc *guc) 1590 { 1591 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1592 unsigned long flags; 1593 1594 spin_lock_irqsave(&guc->sched_engine->lock, flags); 1595 sched_engine->tasklet.callback = guc_submission_tasklet; 1596 wmb(); /* Make sure callback visible */ 1597 if (!__tasklet_is_enabled(&sched_engine->tasklet) && 1598 __tasklet_enable(&sched_engine->tasklet)) { 1599 GEM_BUG_ON(!guc->ct.enabled); 1600 1601 /* And kick in case we missed a new request submission. */ 1602 tasklet_hi_schedule(&sched_engine->tasklet); 1603 } 1604 spin_unlock_irqrestore(&guc->sched_engine->lock, flags); 1605 } 1606 1607 static void guc_flush_submissions(struct intel_guc *guc) 1608 { 1609 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1610 unsigned long flags; 1611 1612 spin_lock_irqsave(&sched_engine->lock, flags); 1613 spin_unlock_irqrestore(&sched_engine->lock, flags); 1614 } 1615 1616 static void guc_flush_destroyed_contexts(struct intel_guc *guc); 1617 1618 void intel_guc_submission_reset_prepare(struct intel_guc *guc) 1619 { 1620 if (unlikely(!guc_submission_initialized(guc))) { 1621 /* Reset called during driver load? GuC not yet initialised! */ 1622 return; 1623 } 1624 1625 intel_gt_park_heartbeats(guc_to_gt(guc)); 1626 disable_submission(guc); 1627 guc->interrupts.disable(guc); 1628 __reset_guc_busyness_stats(guc); 1629 1630 /* Flush IRQ handler */ 1631 spin_lock_irq(guc_to_gt(guc)->irq_lock); 1632 spin_unlock_irq(guc_to_gt(guc)->irq_lock); 1633 1634 guc_flush_submissions(guc); 1635 guc_flush_destroyed_contexts(guc); 1636 flush_work(&guc->ct.requests.worker); 1637 1638 scrub_guc_desc_for_outstanding_g2h(guc); 1639 } 1640 1641 static struct intel_engine_cs * 1642 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) 1643 { 1644 struct intel_engine_cs *engine; 1645 intel_engine_mask_t tmp, mask = ve->mask; 1646 unsigned int num_siblings = 0; 1647 1648 for_each_engine_masked(engine, ve->gt, mask, tmp) 1649 if (num_siblings++ == sibling) 1650 return engine; 1651 1652 return NULL; 1653 } 1654 1655 static inline struct intel_engine_cs * 1656 __context_to_physical_engine(struct intel_context *ce) 1657 { 1658 struct intel_engine_cs *engine = ce->engine; 1659 1660 if (intel_engine_is_virtual(engine)) 1661 engine = guc_virtual_get_sibling(engine, 0); 1662 1663 return engine; 1664 } 1665 1666 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) 1667 { 1668 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 1669 1670 if (!intel_context_is_schedulable(ce)) 1671 return; 1672 1673 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1674 1675 /* 1676 * We want a simple context + ring to execute the breadcrumb update. 1677 * We cannot rely on the context being intact across the GPU hang, 1678 * so clear it and rebuild just what we need for the breadcrumb. 1679 * All pending requests for this context will be zapped, and any 1680 * future request will be after userspace has had the opportunity 1681 * to recreate its own state. 1682 */ 1683 if (scrub) 1684 lrc_init_regs(ce, engine, true); 1685 1686 /* Rerun the request; its payload has been neutered (if guilty). */ 1687 lrc_update_regs(ce, engine, head); 1688 } 1689 1690 static void guc_engine_reset_prepare(struct intel_engine_cs *engine) 1691 { 1692 /* 1693 * Wa_22011802037: In addition to stopping the cs, we need 1694 * to wait for any pending mi force wakeups 1695 */ 1696 if (intel_engine_reset_needs_wa_22011802037(engine->gt)) { 1697 intel_engine_stop_cs(engine); 1698 intel_engine_wait_for_pending_mi_fw(engine); 1699 } 1700 } 1701 1702 static void guc_reset_nop(struct intel_engine_cs *engine) 1703 { 1704 } 1705 1706 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) 1707 { 1708 } 1709 1710 static void 1711 __unwind_incomplete_requests(struct intel_context *ce) 1712 { 1713 struct i915_request *rq, *rn; 1714 struct list_head *pl; 1715 int prio = I915_PRIORITY_INVALID; 1716 struct i915_sched_engine * const sched_engine = 1717 ce->engine->sched_engine; 1718 unsigned long flags; 1719 1720 spin_lock_irqsave(&sched_engine->lock, flags); 1721 spin_lock(&ce->guc_state.lock); 1722 list_for_each_entry_safe_reverse(rq, rn, 1723 &ce->guc_state.requests, 1724 sched.link) { 1725 if (i915_request_completed(rq)) 1726 continue; 1727 1728 list_del_init(&rq->sched.link); 1729 __i915_request_unsubmit(rq); 1730 1731 /* Push the request back into the queue for later resubmission. */ 1732 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 1733 if (rq_prio(rq) != prio) { 1734 prio = rq_prio(rq); 1735 pl = i915_sched_lookup_priolist(sched_engine, prio); 1736 } 1737 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); 1738 1739 list_add(&rq->sched.link, pl); 1740 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1741 } 1742 spin_unlock(&ce->guc_state.lock); 1743 spin_unlock_irqrestore(&sched_engine->lock, flags); 1744 } 1745 1746 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled) 1747 { 1748 bool guilty; 1749 struct i915_request *rq; 1750 unsigned long flags; 1751 u32 head; 1752 int i, number_children = ce->parallel.number_children; 1753 struct intel_context *parent = ce; 1754 1755 GEM_BUG_ON(intel_context_is_child(ce)); 1756 1757 intel_context_get(ce); 1758 1759 /* 1760 * GuC will implicitly mark the context as non-schedulable when it sends 1761 * the reset notification. Make sure our state reflects this change. The 1762 * context will be marked enabled on resubmission. 1763 */ 1764 spin_lock_irqsave(&ce->guc_state.lock, flags); 1765 clr_context_enabled(ce); 1766 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1767 1768 /* 1769 * For each context in the relationship find the hanging request 1770 * resetting each context / request as needed 1771 */ 1772 for (i = 0; i < number_children + 1; ++i) { 1773 if (!intel_context_is_pinned(ce)) 1774 goto next_context; 1775 1776 guilty = false; 1777 rq = intel_context_get_active_request(ce); 1778 if (!rq) { 1779 head = ce->ring->tail; 1780 goto out_replay; 1781 } 1782 1783 if (i915_request_started(rq)) 1784 guilty = stalled & ce->engine->mask; 1785 1786 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 1787 head = intel_ring_wrap(ce->ring, rq->head); 1788 1789 __i915_request_reset(rq, guilty); 1790 i915_request_put(rq); 1791 out_replay: 1792 guc_reset_state(ce, head, guilty); 1793 next_context: 1794 if (i != number_children) 1795 ce = list_next_entry(ce, parallel.child_link); 1796 } 1797 1798 __unwind_incomplete_requests(parent); 1799 intel_context_put(parent); 1800 } 1801 1802 void wake_up_all_tlb_invalidate(struct intel_guc *guc) 1803 { 1804 struct intel_guc_tlb_wait *wait; 1805 unsigned long i; 1806 1807 if (!intel_guc_tlb_invalidation_is_available(guc)) 1808 return; 1809 1810 xa_lock_irq(&guc->tlb_lookup); 1811 xa_for_each(&guc->tlb_lookup, i, wait) 1812 wake_up(&wait->wq); 1813 xa_unlock_irq(&guc->tlb_lookup); 1814 } 1815 1816 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) 1817 { 1818 struct intel_context *ce; 1819 unsigned long index; 1820 unsigned long flags; 1821 1822 if (unlikely(!guc_submission_initialized(guc))) { 1823 /* Reset called during driver load? GuC not yet initialised! */ 1824 return; 1825 } 1826 1827 xa_lock_irqsave(&guc->context_lookup, flags); 1828 xa_for_each(&guc->context_lookup, index, ce) { 1829 if (!kref_get_unless_zero(&ce->ref)) 1830 continue; 1831 1832 xa_unlock(&guc->context_lookup); 1833 1834 if (intel_context_is_pinned(ce) && 1835 !intel_context_is_child(ce)) 1836 __guc_reset_context(ce, stalled); 1837 1838 intel_context_put(ce); 1839 1840 xa_lock(&guc->context_lookup); 1841 } 1842 xa_unlock_irqrestore(&guc->context_lookup, flags); 1843 1844 /* GuC is blown away, drop all references to contexts */ 1845 xa_destroy(&guc->context_lookup); 1846 } 1847 1848 static void guc_cancel_context_requests(struct intel_context *ce) 1849 { 1850 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; 1851 struct i915_request *rq; 1852 unsigned long flags; 1853 1854 /* Mark all executing requests as skipped. */ 1855 spin_lock_irqsave(&sched_engine->lock, flags); 1856 spin_lock(&ce->guc_state.lock); 1857 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) 1858 i915_request_put(i915_request_mark_eio(rq)); 1859 spin_unlock(&ce->guc_state.lock); 1860 spin_unlock_irqrestore(&sched_engine->lock, flags); 1861 } 1862 1863 static void 1864 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) 1865 { 1866 struct i915_request *rq, *rn; 1867 struct rb_node *rb; 1868 unsigned long flags; 1869 1870 /* Can be called during boot if GuC fails to load */ 1871 if (!sched_engine) 1872 return; 1873 1874 /* 1875 * Before we call engine->cancel_requests(), we should have exclusive 1876 * access to the submission state. This is arranged for us by the 1877 * caller disabling the interrupt generation, the tasklet and other 1878 * threads that may then access the same state, giving us a free hand 1879 * to reset state. However, we still need to let lockdep be aware that 1880 * we know this state may be accessed in hardirq context, so we 1881 * disable the irq around this manipulation and we want to keep 1882 * the spinlock focused on its duties and not accidentally conflate 1883 * coverage to the submission's irq state. (Similarly, although we 1884 * shouldn't need to disable irq around the manipulation of the 1885 * submission's irq state, we also wish to remind ourselves that 1886 * it is irq state.) 1887 */ 1888 spin_lock_irqsave(&sched_engine->lock, flags); 1889 1890 /* Flush the queued requests to the timeline list (for retiring). */ 1891 while ((rb = rb_first_cached(&sched_engine->queue))) { 1892 struct i915_priolist *p = to_priolist(rb); 1893 1894 priolist_for_each_request_consume(rq, rn, p) { 1895 list_del_init(&rq->sched.link); 1896 1897 __i915_request_submit(rq); 1898 1899 i915_request_put(i915_request_mark_eio(rq)); 1900 } 1901 1902 rb_erase_cached(&p->node, &sched_engine->queue); 1903 i915_priolist_free(p); 1904 } 1905 1906 /* Remaining _unready_ requests will be nop'ed when submitted */ 1907 1908 sched_engine->queue_priority_hint = INT_MIN; 1909 sched_engine->queue = RB_ROOT_CACHED; 1910 1911 spin_unlock_irqrestore(&sched_engine->lock, flags); 1912 } 1913 1914 void intel_guc_submission_cancel_requests(struct intel_guc *guc) 1915 { 1916 struct intel_context *ce; 1917 unsigned long index; 1918 unsigned long flags; 1919 1920 xa_lock_irqsave(&guc->context_lookup, flags); 1921 xa_for_each(&guc->context_lookup, index, ce) { 1922 if (!kref_get_unless_zero(&ce->ref)) 1923 continue; 1924 1925 xa_unlock(&guc->context_lookup); 1926 1927 if (intel_context_is_pinned(ce) && 1928 !intel_context_is_child(ce)) 1929 guc_cancel_context_requests(ce); 1930 1931 intel_context_put(ce); 1932 1933 xa_lock(&guc->context_lookup); 1934 } 1935 xa_unlock_irqrestore(&guc->context_lookup, flags); 1936 1937 guc_cancel_sched_engine_requests(guc->sched_engine); 1938 1939 /* GuC is blown away, drop all references to contexts */ 1940 xa_destroy(&guc->context_lookup); 1941 1942 /* 1943 * Wedged GT won't respond to any TLB invalidation request. Simply 1944 * release all the blocked waiters. 1945 */ 1946 wake_up_all_tlb_invalidate(guc); 1947 } 1948 1949 void intel_guc_submission_reset_finish(struct intel_guc *guc) 1950 { 1951 /* Reset called during driver load or during wedge? */ 1952 if (unlikely(!guc_submission_initialized(guc) || 1953 intel_gt_is_wedged(guc_to_gt(guc)))) { 1954 return; 1955 } 1956 1957 /* 1958 * Technically possible for either of these values to be non-zero here, 1959 * but very unlikely + harmless. Regardless let's add a warn so we can 1960 * see in CI if this happens frequently / a precursor to taking down the 1961 * machine. 1962 */ 1963 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); 1964 atomic_set(&guc->outstanding_submission_g2h, 0); 1965 1966 intel_guc_global_policies_update(guc); 1967 enable_submission(guc); 1968 intel_gt_unpark_heartbeats(guc_to_gt(guc)); 1969 1970 /* 1971 * The full GT reset will have cleared the TLB caches and flushed the 1972 * G2H message queue; we can release all the blocked waiters. 1973 */ 1974 wake_up_all_tlb_invalidate(guc); 1975 } 1976 1977 static void destroyed_worker_func(struct work_struct *w); 1978 static void reset_fail_worker_func(struct work_struct *w); 1979 1980 bool intel_guc_tlb_invalidation_is_available(struct intel_guc *guc) 1981 { 1982 return HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915) && 1983 intel_guc_is_ready(guc); 1984 } 1985 1986 static int init_tlb_lookup(struct intel_guc *guc) 1987 { 1988 struct intel_guc_tlb_wait *wait; 1989 int err; 1990 1991 if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915)) 1992 return 0; 1993 1994 xa_init_flags(&guc->tlb_lookup, XA_FLAGS_ALLOC); 1995 1996 wait = kzalloc(sizeof(*wait), GFP_KERNEL); 1997 if (!wait) 1998 return -ENOMEM; 1999 2000 init_waitqueue_head(&wait->wq); 2001 2002 /* Preallocate a shared id for use under memory pressure. */ 2003 err = xa_alloc_cyclic_irq(&guc->tlb_lookup, &guc->serial_slot, wait, 2004 xa_limit_32b, &guc->next_seqno, GFP_KERNEL); 2005 if (err < 0) { 2006 kfree(wait); 2007 return err; 2008 } 2009 2010 return 0; 2011 } 2012 2013 static void fini_tlb_lookup(struct intel_guc *guc) 2014 { 2015 struct intel_guc_tlb_wait *wait; 2016 2017 if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915)) 2018 return; 2019 2020 wait = xa_load(&guc->tlb_lookup, guc->serial_slot); 2021 if (wait && wait->busy) 2022 guc_err(guc, "Unexpected busy item in tlb_lookup on fini\n"); 2023 kfree(wait); 2024 2025 xa_destroy(&guc->tlb_lookup); 2026 } 2027 2028 /* 2029 * Set up the memory resources to be shared with the GuC (via the GGTT) 2030 * at firmware loading time. 2031 */ 2032 int intel_guc_submission_init(struct intel_guc *guc) 2033 { 2034 struct intel_gt *gt = guc_to_gt(guc); 2035 int ret; 2036 2037 if (guc->submission_initialized) 2038 return 0; 2039 2040 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 0, 0)) { 2041 ret = guc_lrc_desc_pool_create_v69(guc); 2042 if (ret) 2043 return ret; 2044 } 2045 2046 ret = init_tlb_lookup(guc); 2047 if (ret) 2048 goto destroy_pool; 2049 2050 guc->submission_state.guc_ids_bitmap = 2051 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); 2052 if (!guc->submission_state.guc_ids_bitmap) { 2053 ret = -ENOMEM; 2054 goto destroy_tlb; 2055 } 2056 2057 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; 2058 guc->timestamp.shift = gpm_timestamp_shift(gt); 2059 guc->submission_initialized = true; 2060 2061 return 0; 2062 2063 destroy_tlb: 2064 fini_tlb_lookup(guc); 2065 destroy_pool: 2066 guc_lrc_desc_pool_destroy_v69(guc); 2067 return ret; 2068 } 2069 2070 void intel_guc_submission_fini(struct intel_guc *guc) 2071 { 2072 if (!guc->submission_initialized) 2073 return; 2074 2075 guc_flush_destroyed_contexts(guc); 2076 guc_lrc_desc_pool_destroy_v69(guc); 2077 i915_sched_engine_put(guc->sched_engine); 2078 bitmap_free(guc->submission_state.guc_ids_bitmap); 2079 fini_tlb_lookup(guc); 2080 guc->submission_initialized = false; 2081 } 2082 2083 static inline void queue_request(struct i915_sched_engine *sched_engine, 2084 struct i915_request *rq, 2085 int prio) 2086 { 2087 GEM_BUG_ON(!list_empty(&rq->sched.link)); 2088 list_add_tail(&rq->sched.link, 2089 i915_sched_lookup_priolist(sched_engine, prio)); 2090 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2091 tasklet_hi_schedule(&sched_engine->tasklet); 2092 } 2093 2094 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 2095 struct i915_request *rq) 2096 { 2097 int ret = 0; 2098 2099 __i915_request_submit(rq); 2100 2101 trace_i915_request_in(rq, 0); 2102 2103 if (is_multi_lrc_rq(rq)) { 2104 if (multi_lrc_submit(rq)) { 2105 ret = guc_wq_item_append(guc, rq); 2106 if (!ret) 2107 ret = guc_add_request(guc, rq); 2108 } 2109 } else { 2110 guc_set_lrc_tail(rq); 2111 ret = guc_add_request(guc, rq); 2112 } 2113 2114 if (unlikely(ret == -EPIPE)) 2115 disable_submission(guc); 2116 2117 return ret; 2118 } 2119 2120 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) 2121 { 2122 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 2123 struct intel_context *ce = request_to_scheduling_context(rq); 2124 2125 return submission_disabled(guc) || guc->stalled_request || 2126 !i915_sched_engine_is_empty(sched_engine) || 2127 !ctx_id_mapped(guc, ce->guc_id.id); 2128 } 2129 2130 static void guc_submit_request(struct i915_request *rq) 2131 { 2132 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 2133 struct intel_guc *guc = &rq->engine->gt->uc.guc; 2134 unsigned long flags; 2135 2136 /* Will be called from irq-context when using foreign fences. */ 2137 spin_lock_irqsave(&sched_engine->lock, flags); 2138 2139 if (need_tasklet(guc, rq)) 2140 queue_request(sched_engine, rq, rq_prio(rq)); 2141 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) 2142 tasklet_hi_schedule(&sched_engine->tasklet); 2143 2144 spin_unlock_irqrestore(&sched_engine->lock, flags); 2145 } 2146 2147 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) 2148 { 2149 int ret; 2150 2151 GEM_BUG_ON(intel_context_is_child(ce)); 2152 2153 if (intel_context_is_parent(ce)) 2154 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, 2155 NUMBER_MULTI_LRC_GUC_ID(guc), 2156 order_base_2(ce->parallel.number_children 2157 + 1)); 2158 else 2159 ret = ida_simple_get(&guc->submission_state.guc_ids, 2160 NUMBER_MULTI_LRC_GUC_ID(guc), 2161 guc->submission_state.num_guc_ids, 2162 GFP_KERNEL | __GFP_RETRY_MAYFAIL | 2163 __GFP_NOWARN); 2164 if (unlikely(ret < 0)) 2165 return ret; 2166 2167 if (!intel_context_is_parent(ce)) 2168 ++guc->submission_state.guc_ids_in_use; 2169 2170 ce->guc_id.id = ret; 2171 return 0; 2172 } 2173 2174 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2175 { 2176 GEM_BUG_ON(intel_context_is_child(ce)); 2177 2178 if (!context_guc_id_invalid(ce)) { 2179 if (intel_context_is_parent(ce)) { 2180 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 2181 ce->guc_id.id, 2182 order_base_2(ce->parallel.number_children 2183 + 1)); 2184 } else { 2185 --guc->submission_state.guc_ids_in_use; 2186 ida_simple_remove(&guc->submission_state.guc_ids, 2187 ce->guc_id.id); 2188 } 2189 clr_ctx_id_mapping(guc, ce->guc_id.id); 2190 set_context_guc_id_invalid(ce); 2191 } 2192 if (!list_empty(&ce->guc_id.link)) 2193 list_del_init(&ce->guc_id.link); 2194 } 2195 2196 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2197 { 2198 unsigned long flags; 2199 2200 spin_lock_irqsave(&guc->submission_state.lock, flags); 2201 __release_guc_id(guc, ce); 2202 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2203 } 2204 2205 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) 2206 { 2207 struct intel_context *cn; 2208 2209 lockdep_assert_held(&guc->submission_state.lock); 2210 GEM_BUG_ON(intel_context_is_child(ce)); 2211 GEM_BUG_ON(intel_context_is_parent(ce)); 2212 2213 if (!list_empty(&guc->submission_state.guc_id_list)) { 2214 cn = list_first_entry(&guc->submission_state.guc_id_list, 2215 struct intel_context, 2216 guc_id.link); 2217 2218 GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); 2219 GEM_BUG_ON(context_guc_id_invalid(cn)); 2220 GEM_BUG_ON(intel_context_is_child(cn)); 2221 GEM_BUG_ON(intel_context_is_parent(cn)); 2222 2223 list_del_init(&cn->guc_id.link); 2224 ce->guc_id.id = cn->guc_id.id; 2225 2226 spin_lock(&cn->guc_state.lock); 2227 clr_context_registered(cn); 2228 spin_unlock(&cn->guc_state.lock); 2229 2230 set_context_guc_id_invalid(cn); 2231 2232 #ifdef CONFIG_DRM_I915_SELFTEST 2233 guc->number_guc_id_stolen++; 2234 #endif 2235 2236 return 0; 2237 } else { 2238 return -EAGAIN; 2239 } 2240 } 2241 2242 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) 2243 { 2244 int ret; 2245 2246 lockdep_assert_held(&guc->submission_state.lock); 2247 GEM_BUG_ON(intel_context_is_child(ce)); 2248 2249 ret = new_guc_id(guc, ce); 2250 if (unlikely(ret < 0)) { 2251 if (intel_context_is_parent(ce)) 2252 return -ENOSPC; 2253 2254 ret = steal_guc_id(guc, ce); 2255 if (ret < 0) 2256 return ret; 2257 } 2258 2259 if (intel_context_is_parent(ce)) { 2260 struct intel_context *child; 2261 int i = 1; 2262 2263 for_each_child(ce, child) 2264 child->guc_id.id = ce->guc_id.id + i++; 2265 } 2266 2267 return 0; 2268 } 2269 2270 #define PIN_GUC_ID_TRIES 4 2271 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2272 { 2273 int ret = 0; 2274 unsigned long flags, tries = PIN_GUC_ID_TRIES; 2275 2276 GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); 2277 2278 try_again: 2279 spin_lock_irqsave(&guc->submission_state.lock, flags); 2280 2281 might_lock(&ce->guc_state.lock); 2282 2283 if (context_guc_id_invalid(ce)) { 2284 ret = assign_guc_id(guc, ce); 2285 if (ret) 2286 goto out_unlock; 2287 ret = 1; /* Indidcates newly assigned guc_id */ 2288 } 2289 if (!list_empty(&ce->guc_id.link)) 2290 list_del_init(&ce->guc_id.link); 2291 atomic_inc(&ce->guc_id.ref); 2292 2293 out_unlock: 2294 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2295 2296 /* 2297 * -EAGAIN indicates no guc_id are available, let's retire any 2298 * outstanding requests to see if that frees up a guc_id. If the first 2299 * retire didn't help, insert a sleep with the timeslice duration before 2300 * attempting to retire more requests. Double the sleep period each 2301 * subsequent pass before finally giving up. The sleep period has max of 2302 * 100ms and minimum of 1ms. 2303 */ 2304 if (ret == -EAGAIN && --tries) { 2305 if (PIN_GUC_ID_TRIES - tries > 1) { 2306 unsigned int timeslice_shifted = 2307 ce->engine->props.timeslice_duration_ms << 2308 (PIN_GUC_ID_TRIES - tries - 2); 2309 unsigned int max = min_t(unsigned int, 100, 2310 timeslice_shifted); 2311 2312 msleep(max_t(unsigned int, max, 1)); 2313 } 2314 intel_gt_retire_requests(guc_to_gt(guc)); 2315 goto try_again; 2316 } 2317 2318 return ret; 2319 } 2320 2321 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2322 { 2323 unsigned long flags; 2324 2325 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); 2326 GEM_BUG_ON(intel_context_is_child(ce)); 2327 2328 if (unlikely(context_guc_id_invalid(ce) || 2329 intel_context_is_parent(ce))) 2330 return; 2331 2332 spin_lock_irqsave(&guc->submission_state.lock, flags); 2333 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && 2334 !atomic_read(&ce->guc_id.ref)) 2335 list_add_tail(&ce->guc_id.link, 2336 &guc->submission_state.guc_id_list); 2337 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2338 } 2339 2340 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc, 2341 struct intel_context *ce, 2342 u32 guc_id, 2343 u32 offset, 2344 bool loop) 2345 { 2346 struct intel_context *child; 2347 u32 action[4 + MAX_ENGINE_INSTANCE]; 2348 int len = 0; 2349 2350 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2351 2352 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2353 action[len++] = guc_id; 2354 action[len++] = ce->parallel.number_children + 1; 2355 action[len++] = offset; 2356 for_each_child(ce, child) { 2357 offset += sizeof(struct guc_lrc_desc_v69); 2358 action[len++] = offset; 2359 } 2360 2361 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2362 } 2363 2364 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc, 2365 struct intel_context *ce, 2366 struct guc_ctxt_registration_info *info, 2367 bool loop) 2368 { 2369 struct intel_context *child; 2370 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; 2371 int len = 0; 2372 u32 next_id; 2373 2374 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2375 2376 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2377 action[len++] = info->flags; 2378 action[len++] = info->context_idx; 2379 action[len++] = info->engine_class; 2380 action[len++] = info->engine_submit_mask; 2381 action[len++] = info->wq_desc_lo; 2382 action[len++] = info->wq_desc_hi; 2383 action[len++] = info->wq_base_lo; 2384 action[len++] = info->wq_base_hi; 2385 action[len++] = info->wq_size; 2386 action[len++] = ce->parallel.number_children + 1; 2387 action[len++] = info->hwlrca_lo; 2388 action[len++] = info->hwlrca_hi; 2389 2390 next_id = info->context_idx + 1; 2391 for_each_child(ce, child) { 2392 GEM_BUG_ON(next_id++ != child->guc_id.id); 2393 2394 /* 2395 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2396 * only supports 32 bit currently. 2397 */ 2398 action[len++] = lower_32_bits(child->lrc.lrca); 2399 action[len++] = upper_32_bits(child->lrc.lrca); 2400 } 2401 2402 GEM_BUG_ON(len > ARRAY_SIZE(action)); 2403 2404 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2405 } 2406 2407 static int __guc_action_register_context_v69(struct intel_guc *guc, 2408 u32 guc_id, 2409 u32 offset, 2410 bool loop) 2411 { 2412 u32 action[] = { 2413 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2414 guc_id, 2415 offset, 2416 }; 2417 2418 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2419 0, loop); 2420 } 2421 2422 static int __guc_action_register_context_v70(struct intel_guc *guc, 2423 struct guc_ctxt_registration_info *info, 2424 bool loop) 2425 { 2426 u32 action[] = { 2427 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2428 info->flags, 2429 info->context_idx, 2430 info->engine_class, 2431 info->engine_submit_mask, 2432 info->wq_desc_lo, 2433 info->wq_desc_hi, 2434 info->wq_base_lo, 2435 info->wq_base_hi, 2436 info->wq_size, 2437 info->hwlrca_lo, 2438 info->hwlrca_hi, 2439 }; 2440 2441 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2442 0, loop); 2443 } 2444 2445 static void prepare_context_registration_info_v69(struct intel_context *ce); 2446 static void prepare_context_registration_info_v70(struct intel_context *ce, 2447 struct guc_ctxt_registration_info *info); 2448 2449 static int 2450 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop) 2451 { 2452 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) + 2453 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69); 2454 2455 prepare_context_registration_info_v69(ce); 2456 2457 if (intel_context_is_parent(ce)) 2458 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id, 2459 offset, loop); 2460 else 2461 return __guc_action_register_context_v69(guc, ce->guc_id.id, 2462 offset, loop); 2463 } 2464 2465 static int 2466 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop) 2467 { 2468 struct guc_ctxt_registration_info info; 2469 2470 prepare_context_registration_info_v70(ce, &info); 2471 2472 if (intel_context_is_parent(ce)) 2473 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop); 2474 else 2475 return __guc_action_register_context_v70(guc, &info, loop); 2476 } 2477 2478 static int register_context(struct intel_context *ce, bool loop) 2479 { 2480 struct intel_guc *guc = ce_to_guc(ce); 2481 int ret; 2482 2483 GEM_BUG_ON(intel_context_is_child(ce)); 2484 trace_intel_context_register(ce); 2485 2486 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) 2487 ret = register_context_v70(guc, ce, loop); 2488 else 2489 ret = register_context_v69(guc, ce, loop); 2490 2491 if (likely(!ret)) { 2492 unsigned long flags; 2493 2494 spin_lock_irqsave(&ce->guc_state.lock, flags); 2495 set_context_registered(ce); 2496 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2497 2498 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) 2499 guc_context_policy_init_v70(ce, loop); 2500 } 2501 2502 return ret; 2503 } 2504 2505 static int __guc_action_deregister_context(struct intel_guc *guc, 2506 u32 guc_id) 2507 { 2508 u32 action[] = { 2509 INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 2510 guc_id, 2511 }; 2512 2513 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2514 G2H_LEN_DW_DEREGISTER_CONTEXT, 2515 true); 2516 } 2517 2518 static int deregister_context(struct intel_context *ce, u32 guc_id) 2519 { 2520 struct intel_guc *guc = ce_to_guc(ce); 2521 2522 GEM_BUG_ON(intel_context_is_child(ce)); 2523 trace_intel_context_deregister(ce); 2524 2525 return __guc_action_deregister_context(guc, guc_id); 2526 } 2527 2528 static inline void clear_children_join_go_memory(struct intel_context *ce) 2529 { 2530 struct parent_scratch *ps = __get_parent_scratch(ce); 2531 int i; 2532 2533 ps->go.semaphore = 0; 2534 for (i = 0; i < ce->parallel.number_children + 1; ++i) 2535 ps->join[i].semaphore = 0; 2536 } 2537 2538 static inline u32 get_children_go_value(struct intel_context *ce) 2539 { 2540 return __get_parent_scratch(ce)->go.semaphore; 2541 } 2542 2543 static inline u32 get_children_join_value(struct intel_context *ce, 2544 u8 child_index) 2545 { 2546 return __get_parent_scratch(ce)->join[child_index].semaphore; 2547 } 2548 2549 struct context_policy { 2550 u32 count; 2551 struct guc_update_context_policy h2g; 2552 }; 2553 2554 static u32 __guc_context_policy_action_size(struct context_policy *policy) 2555 { 2556 size_t bytes = sizeof(policy->h2g.header) + 2557 (sizeof(policy->h2g.klv[0]) * policy->count); 2558 2559 return bytes / sizeof(u32); 2560 } 2561 2562 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id) 2563 { 2564 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 2565 policy->h2g.header.ctx_id = guc_id; 2566 policy->count = 0; 2567 } 2568 2569 #define MAKE_CONTEXT_POLICY_ADD(func, id) \ 2570 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \ 2571 { \ 2572 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 2573 policy->h2g.klv[policy->count].kl = \ 2574 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 2575 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 2576 policy->h2g.klv[policy->count].value = data; \ 2577 policy->count++; \ 2578 } 2579 2580 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 2581 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 2582 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) 2583 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) 2584 2585 #undef MAKE_CONTEXT_POLICY_ADD 2586 2587 static int __guc_context_set_context_policies(struct intel_guc *guc, 2588 struct context_policy *policy, 2589 bool loop) 2590 { 2591 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g, 2592 __guc_context_policy_action_size(policy), 2593 0, loop); 2594 } 2595 2596 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) 2597 { 2598 struct intel_engine_cs *engine = ce->engine; 2599 struct intel_guc *guc = &engine->gt->uc.guc; 2600 struct context_policy policy; 2601 u32 execution_quantum; 2602 u32 preemption_timeout; 2603 unsigned long flags; 2604 int ret; 2605 2606 /* NB: For both of these, zero means disabled. */ 2607 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2608 execution_quantum)); 2609 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2610 preemption_timeout)); 2611 execution_quantum = engine->props.timeslice_duration_ms * 1000; 2612 preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2613 2614 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 2615 2616 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2617 __guc_context_policy_add_execution_quantum(&policy, execution_quantum); 2618 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2619 2620 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2621 __guc_context_policy_add_preempt_to_idle(&policy, 1); 2622 2623 ret = __guc_context_set_context_policies(guc, &policy, loop); 2624 2625 spin_lock_irqsave(&ce->guc_state.lock, flags); 2626 if (ret != 0) 2627 set_context_policy_required(ce); 2628 else 2629 clr_context_policy_required(ce); 2630 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2631 2632 return ret; 2633 } 2634 2635 static void guc_context_policy_init_v69(struct intel_engine_cs *engine, 2636 struct guc_lrc_desc_v69 *desc) 2637 { 2638 desc->policy_flags = 0; 2639 2640 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2641 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; 2642 2643 /* NB: For both of these, zero means disabled. */ 2644 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2645 desc->execution_quantum)); 2646 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2647 desc->preemption_timeout)); 2648 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; 2649 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2650 } 2651 2652 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) 2653 { 2654 /* 2655 * this matches the mapping we do in map_i915_prio_to_guc_prio() 2656 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL) 2657 */ 2658 switch (prio) { 2659 default: 2660 MISSING_CASE(prio); 2661 fallthrough; 2662 case GUC_CLIENT_PRIORITY_KMD_NORMAL: 2663 return GEN12_CTX_PRIORITY_NORMAL; 2664 case GUC_CLIENT_PRIORITY_NORMAL: 2665 return GEN12_CTX_PRIORITY_LOW; 2666 case GUC_CLIENT_PRIORITY_HIGH: 2667 case GUC_CLIENT_PRIORITY_KMD_HIGH: 2668 return GEN12_CTX_PRIORITY_HIGH; 2669 } 2670 } 2671 2672 static void prepare_context_registration_info_v69(struct intel_context *ce) 2673 { 2674 struct intel_engine_cs *engine = ce->engine; 2675 struct intel_guc *guc = &engine->gt->uc.guc; 2676 u32 ctx_id = ce->guc_id.id; 2677 struct guc_lrc_desc_v69 *desc; 2678 struct intel_context *child; 2679 2680 GEM_BUG_ON(!engine->mask); 2681 2682 /* 2683 * Ensure LRC + CT vmas are is same region as write barrier is done 2684 * based on CT vma region. 2685 */ 2686 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2687 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2688 2689 desc = __get_lrc_desc_v69(guc, ctx_id); 2690 GEM_BUG_ON(!desc); 2691 desc->engine_class = engine_class_to_guc_class(engine->class); 2692 desc->engine_submit_mask = engine->logical_mask; 2693 desc->hw_context_desc = ce->lrc.lrca; 2694 desc->priority = ce->guc_state.prio; 2695 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2696 guc_context_policy_init_v69(engine, desc); 2697 2698 /* 2699 * If context is a parent, we need to register a process descriptor 2700 * describing a work queue and register all child contexts. 2701 */ 2702 if (intel_context_is_parent(ce)) { 2703 struct guc_process_desc_v69 *pdesc; 2704 2705 ce->parallel.guc.wqi_tail = 0; 2706 ce->parallel.guc.wqi_head = 0; 2707 2708 desc->process_desc = i915_ggtt_offset(ce->state) + 2709 __get_parent_scratch_offset(ce); 2710 desc->wq_addr = i915_ggtt_offset(ce->state) + 2711 __get_wq_offset(ce); 2712 desc->wq_size = WQ_SIZE; 2713 2714 pdesc = __get_process_desc_v69(ce); 2715 memset(pdesc, 0, sizeof(*(pdesc))); 2716 pdesc->stage_id = ce->guc_id.id; 2717 pdesc->wq_base_addr = desc->wq_addr; 2718 pdesc->wq_size_bytes = desc->wq_size; 2719 pdesc->wq_status = WQ_STATUS_ACTIVE; 2720 2721 ce->parallel.guc.wq_head = &pdesc->head; 2722 ce->parallel.guc.wq_tail = &pdesc->tail; 2723 ce->parallel.guc.wq_status = &pdesc->wq_status; 2724 2725 for_each_child(ce, child) { 2726 desc = __get_lrc_desc_v69(guc, child->guc_id.id); 2727 2728 desc->engine_class = 2729 engine_class_to_guc_class(engine->class); 2730 desc->hw_context_desc = child->lrc.lrca; 2731 desc->priority = ce->guc_state.prio; 2732 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2733 guc_context_policy_init_v69(engine, desc); 2734 } 2735 2736 clear_children_join_go_memory(ce); 2737 } 2738 } 2739 2740 static void prepare_context_registration_info_v70(struct intel_context *ce, 2741 struct guc_ctxt_registration_info *info) 2742 { 2743 struct intel_engine_cs *engine = ce->engine; 2744 struct intel_guc *guc = &engine->gt->uc.guc; 2745 u32 ctx_id = ce->guc_id.id; 2746 2747 GEM_BUG_ON(!engine->mask); 2748 2749 /* 2750 * Ensure LRC + CT vmas are is same region as write barrier is done 2751 * based on CT vma region. 2752 */ 2753 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2754 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2755 2756 memset(info, 0, sizeof(*info)); 2757 info->context_idx = ctx_id; 2758 info->engine_class = engine_class_to_guc_class(engine->class); 2759 info->engine_submit_mask = engine->logical_mask; 2760 /* 2761 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2762 * only supports 32 bit currently. 2763 */ 2764 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); 2765 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); 2766 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY) 2767 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio); 2768 info->flags = CONTEXT_REGISTRATION_FLAG_KMD; 2769 2770 /* 2771 * If context is a parent, we need to register a process descriptor 2772 * describing a work queue and register all child contexts. 2773 */ 2774 if (intel_context_is_parent(ce)) { 2775 struct guc_sched_wq_desc *wq_desc; 2776 u64 wq_desc_offset, wq_base_offset; 2777 2778 ce->parallel.guc.wqi_tail = 0; 2779 ce->parallel.guc.wqi_head = 0; 2780 2781 wq_desc_offset = i915_ggtt_offset(ce->state) + 2782 __get_parent_scratch_offset(ce); 2783 wq_base_offset = i915_ggtt_offset(ce->state) + 2784 __get_wq_offset(ce); 2785 info->wq_desc_lo = lower_32_bits(wq_desc_offset); 2786 info->wq_desc_hi = upper_32_bits(wq_desc_offset); 2787 info->wq_base_lo = lower_32_bits(wq_base_offset); 2788 info->wq_base_hi = upper_32_bits(wq_base_offset); 2789 info->wq_size = WQ_SIZE; 2790 2791 wq_desc = __get_wq_desc_v70(ce); 2792 memset(wq_desc, 0, sizeof(*wq_desc)); 2793 wq_desc->wq_status = WQ_STATUS_ACTIVE; 2794 2795 ce->parallel.guc.wq_head = &wq_desc->head; 2796 ce->parallel.guc.wq_tail = &wq_desc->tail; 2797 ce->parallel.guc.wq_status = &wq_desc->wq_status; 2798 2799 clear_children_join_go_memory(ce); 2800 } 2801 } 2802 2803 static int try_context_registration(struct intel_context *ce, bool loop) 2804 { 2805 struct intel_engine_cs *engine = ce->engine; 2806 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 2807 struct intel_guc *guc = &engine->gt->uc.guc; 2808 intel_wakeref_t wakeref; 2809 u32 ctx_id = ce->guc_id.id; 2810 bool context_registered; 2811 int ret = 0; 2812 2813 GEM_BUG_ON(!sched_state_is_init(ce)); 2814 2815 context_registered = ctx_id_mapped(guc, ctx_id); 2816 2817 clr_ctx_id_mapping(guc, ctx_id); 2818 set_ctx_id_mapping(guc, ctx_id, ce); 2819 2820 /* 2821 * The context_lookup xarray is used to determine if the hardware 2822 * context is currently registered. There are two cases in which it 2823 * could be registered either the guc_id has been stolen from another 2824 * context or the lrc descriptor address of this context has changed. In 2825 * either case the context needs to be deregistered with the GuC before 2826 * registering this context. 2827 */ 2828 if (context_registered) { 2829 bool disabled; 2830 unsigned long flags; 2831 2832 trace_intel_context_steal_guc_id(ce); 2833 GEM_BUG_ON(!loop); 2834 2835 /* Seal race with Reset */ 2836 spin_lock_irqsave(&ce->guc_state.lock, flags); 2837 disabled = submission_disabled(guc); 2838 if (likely(!disabled)) { 2839 set_context_wait_for_deregister_to_register(ce); 2840 intel_context_get(ce); 2841 } 2842 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2843 if (unlikely(disabled)) { 2844 clr_ctx_id_mapping(guc, ctx_id); 2845 return 0; /* Will get registered later */ 2846 } 2847 2848 /* 2849 * If stealing the guc_id, this ce has the same guc_id as the 2850 * context whose guc_id was stolen. 2851 */ 2852 with_intel_runtime_pm(runtime_pm, wakeref) 2853 ret = deregister_context(ce, ce->guc_id.id); 2854 if (unlikely(ret == -ENODEV)) 2855 ret = 0; /* Will get registered later */ 2856 } else { 2857 with_intel_runtime_pm(runtime_pm, wakeref) 2858 ret = register_context(ce, loop); 2859 if (unlikely(ret == -EBUSY)) { 2860 clr_ctx_id_mapping(guc, ctx_id); 2861 } else if (unlikely(ret == -ENODEV)) { 2862 clr_ctx_id_mapping(guc, ctx_id); 2863 ret = 0; /* Will get registered later */ 2864 } 2865 } 2866 2867 return ret; 2868 } 2869 2870 static int __guc_context_pre_pin(struct intel_context *ce, 2871 struct intel_engine_cs *engine, 2872 struct i915_gem_ww_ctx *ww, 2873 void **vaddr) 2874 { 2875 return lrc_pre_pin(ce, engine, ww, vaddr); 2876 } 2877 2878 static int __guc_context_pin(struct intel_context *ce, 2879 struct intel_engine_cs *engine, 2880 void *vaddr) 2881 { 2882 if (i915_ggtt_offset(ce->state) != 2883 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 2884 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2885 2886 /* 2887 * GuC context gets pinned in guc_request_alloc. See that function for 2888 * explaination of why. 2889 */ 2890 2891 return lrc_pin(ce, engine, vaddr); 2892 } 2893 2894 static int guc_context_pre_pin(struct intel_context *ce, 2895 struct i915_gem_ww_ctx *ww, 2896 void **vaddr) 2897 { 2898 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); 2899 } 2900 2901 static int guc_context_pin(struct intel_context *ce, void *vaddr) 2902 { 2903 int ret = __guc_context_pin(ce, ce->engine, vaddr); 2904 2905 if (likely(!ret && !intel_context_is_barrier(ce))) 2906 intel_engine_pm_get(ce->engine); 2907 2908 return ret; 2909 } 2910 2911 static void guc_context_unpin(struct intel_context *ce) 2912 { 2913 struct intel_guc *guc = ce_to_guc(ce); 2914 2915 __guc_context_update_stats(ce); 2916 unpin_guc_id(guc, ce); 2917 lrc_unpin(ce); 2918 2919 if (likely(!intel_context_is_barrier(ce))) 2920 intel_engine_pm_put_async(ce->engine); 2921 } 2922 2923 static void guc_context_post_unpin(struct intel_context *ce) 2924 { 2925 lrc_post_unpin(ce); 2926 } 2927 2928 static void __guc_context_sched_enable(struct intel_guc *guc, 2929 struct intel_context *ce) 2930 { 2931 u32 action[] = { 2932 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2933 ce->guc_id.id, 2934 GUC_CONTEXT_ENABLE 2935 }; 2936 2937 trace_intel_context_sched_enable(ce); 2938 2939 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2940 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2941 } 2942 2943 static void __guc_context_sched_disable(struct intel_guc *guc, 2944 struct intel_context *ce, 2945 u16 guc_id) 2946 { 2947 u32 action[] = { 2948 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2949 guc_id, /* ce->guc_id.id not stable */ 2950 GUC_CONTEXT_DISABLE 2951 }; 2952 2953 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID); 2954 2955 GEM_BUG_ON(intel_context_is_child(ce)); 2956 trace_intel_context_sched_disable(ce); 2957 2958 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2959 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2960 } 2961 2962 static void guc_blocked_fence_complete(struct intel_context *ce) 2963 { 2964 lockdep_assert_held(&ce->guc_state.lock); 2965 2966 if (!i915_sw_fence_done(&ce->guc_state.blocked)) 2967 i915_sw_fence_complete(&ce->guc_state.blocked); 2968 } 2969 2970 static void guc_blocked_fence_reinit(struct intel_context *ce) 2971 { 2972 lockdep_assert_held(&ce->guc_state.lock); 2973 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); 2974 2975 /* 2976 * This fence is always complete unless a pending schedule disable is 2977 * outstanding. We arm the fence here and complete it when we receive 2978 * the pending schedule disable complete message. 2979 */ 2980 i915_sw_fence_fini(&ce->guc_state.blocked); 2981 i915_sw_fence_reinit(&ce->guc_state.blocked); 2982 i915_sw_fence_await(&ce->guc_state.blocked); 2983 i915_sw_fence_commit(&ce->guc_state.blocked); 2984 } 2985 2986 static u16 prep_context_pending_disable(struct intel_context *ce) 2987 { 2988 lockdep_assert_held(&ce->guc_state.lock); 2989 2990 set_context_pending_disable(ce); 2991 clr_context_enabled(ce); 2992 guc_blocked_fence_reinit(ce); 2993 intel_context_get(ce); 2994 2995 return ce->guc_id.id; 2996 } 2997 2998 static struct i915_sw_fence *guc_context_block(struct intel_context *ce) 2999 { 3000 struct intel_guc *guc = ce_to_guc(ce); 3001 unsigned long flags; 3002 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 3003 intel_wakeref_t wakeref; 3004 u16 guc_id; 3005 bool enabled; 3006 3007 GEM_BUG_ON(intel_context_is_child(ce)); 3008 3009 spin_lock_irqsave(&ce->guc_state.lock, flags); 3010 3011 incr_context_blocked(ce); 3012 3013 enabled = context_enabled(ce); 3014 if (unlikely(!enabled || submission_disabled(guc))) { 3015 if (enabled) 3016 clr_context_enabled(ce); 3017 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3018 return &ce->guc_state.blocked; 3019 } 3020 3021 /* 3022 * We add +2 here as the schedule disable complete CTB handler calls 3023 * intel_context_sched_disable_unpin (-2 to pin_count). 3024 */ 3025 atomic_add(2, &ce->pin_count); 3026 3027 guc_id = prep_context_pending_disable(ce); 3028 3029 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3030 3031 with_intel_runtime_pm(runtime_pm, wakeref) 3032 __guc_context_sched_disable(guc, ce, guc_id); 3033 3034 return &ce->guc_state.blocked; 3035 } 3036 3037 #define SCHED_STATE_MULTI_BLOCKED_MASK \ 3038 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) 3039 #define SCHED_STATE_NO_UNBLOCK \ 3040 (SCHED_STATE_MULTI_BLOCKED_MASK | \ 3041 SCHED_STATE_PENDING_DISABLE | \ 3042 SCHED_STATE_BANNED) 3043 3044 static bool context_cant_unblock(struct intel_context *ce) 3045 { 3046 lockdep_assert_held(&ce->guc_state.lock); 3047 3048 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || 3049 context_guc_id_invalid(ce) || 3050 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) || 3051 !intel_context_is_pinned(ce); 3052 } 3053 3054 static void guc_context_unblock(struct intel_context *ce) 3055 { 3056 struct intel_guc *guc = ce_to_guc(ce); 3057 unsigned long flags; 3058 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 3059 intel_wakeref_t wakeref; 3060 bool enable; 3061 3062 GEM_BUG_ON(context_enabled(ce)); 3063 GEM_BUG_ON(intel_context_is_child(ce)); 3064 3065 spin_lock_irqsave(&ce->guc_state.lock, flags); 3066 3067 if (unlikely(submission_disabled(guc) || 3068 context_cant_unblock(ce))) { 3069 enable = false; 3070 } else { 3071 enable = true; 3072 set_context_pending_enable(ce); 3073 set_context_enabled(ce); 3074 intel_context_get(ce); 3075 } 3076 3077 decr_context_blocked(ce); 3078 3079 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3080 3081 if (enable) { 3082 with_intel_runtime_pm(runtime_pm, wakeref) 3083 __guc_context_sched_enable(guc, ce); 3084 } 3085 } 3086 3087 static void guc_context_cancel_request(struct intel_context *ce, 3088 struct i915_request *rq) 3089 { 3090 struct intel_context *block_context = 3091 request_to_scheduling_context(rq); 3092 3093 if (i915_sw_fence_signaled(&rq->submit)) { 3094 struct i915_sw_fence *fence; 3095 3096 intel_context_get(ce); 3097 fence = guc_context_block(block_context); 3098 i915_sw_fence_wait(fence); 3099 if (!i915_request_completed(rq)) { 3100 __i915_request_skip(rq); 3101 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), 3102 true); 3103 } 3104 3105 guc_context_unblock(block_context); 3106 intel_context_put(ce); 3107 } 3108 } 3109 3110 static void __guc_context_set_preemption_timeout(struct intel_guc *guc, 3111 u16 guc_id, 3112 u32 preemption_timeout) 3113 { 3114 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { 3115 struct context_policy policy; 3116 3117 __guc_context_policy_start_klv(&policy, guc_id); 3118 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 3119 __guc_context_set_context_policies(guc, &policy, true); 3120 } else { 3121 u32 action[] = { 3122 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT, 3123 guc_id, 3124 preemption_timeout 3125 }; 3126 3127 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3128 } 3129 } 3130 3131 static void 3132 guc_context_revoke(struct intel_context *ce, struct i915_request *rq, 3133 unsigned int preempt_timeout_ms) 3134 { 3135 struct intel_guc *guc = ce_to_guc(ce); 3136 struct intel_runtime_pm *runtime_pm = 3137 &ce->engine->gt->i915->runtime_pm; 3138 intel_wakeref_t wakeref; 3139 unsigned long flags; 3140 3141 GEM_BUG_ON(intel_context_is_child(ce)); 3142 3143 guc_flush_submissions(guc); 3144 3145 spin_lock_irqsave(&ce->guc_state.lock, flags); 3146 set_context_banned(ce); 3147 3148 if (submission_disabled(guc) || 3149 (!context_enabled(ce) && !context_pending_disable(ce))) { 3150 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3151 3152 guc_cancel_context_requests(ce); 3153 intel_engine_signal_breadcrumbs(ce->engine); 3154 } else if (!context_pending_disable(ce)) { 3155 u16 guc_id; 3156 3157 /* 3158 * We add +2 here as the schedule disable complete CTB handler 3159 * calls intel_context_sched_disable_unpin (-2 to pin_count). 3160 */ 3161 atomic_add(2, &ce->pin_count); 3162 3163 guc_id = prep_context_pending_disable(ce); 3164 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3165 3166 /* 3167 * In addition to disabling scheduling, set the preemption 3168 * timeout to the minimum value (1 us) so the banned context 3169 * gets kicked off the HW ASAP. 3170 */ 3171 with_intel_runtime_pm(runtime_pm, wakeref) { 3172 __guc_context_set_preemption_timeout(guc, guc_id, 3173 preempt_timeout_ms); 3174 __guc_context_sched_disable(guc, ce, guc_id); 3175 } 3176 } else { 3177 if (!context_guc_id_invalid(ce)) 3178 with_intel_runtime_pm(runtime_pm, wakeref) 3179 __guc_context_set_preemption_timeout(guc, 3180 ce->guc_id.id, 3181 preempt_timeout_ms); 3182 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3183 } 3184 } 3185 3186 static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce, 3187 unsigned long flags) 3188 __releases(ce->guc_state.lock) 3189 { 3190 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; 3191 intel_wakeref_t wakeref; 3192 u16 guc_id; 3193 3194 lockdep_assert_held(&ce->guc_state.lock); 3195 guc_id = prep_context_pending_disable(ce); 3196 3197 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3198 3199 with_intel_runtime_pm(runtime_pm, wakeref) 3200 __guc_context_sched_disable(guc, ce, guc_id); 3201 } 3202 3203 static bool bypass_sched_disable(struct intel_guc *guc, 3204 struct intel_context *ce) 3205 { 3206 lockdep_assert_held(&ce->guc_state.lock); 3207 GEM_BUG_ON(intel_context_is_child(ce)); 3208 3209 if (submission_disabled(guc) || context_guc_id_invalid(ce) || 3210 !ctx_id_mapped(guc, ce->guc_id.id)) { 3211 clr_context_enabled(ce); 3212 return true; 3213 } 3214 3215 return !context_enabled(ce); 3216 } 3217 3218 static void __delay_sched_disable(struct work_struct *wrk) 3219 { 3220 struct intel_context *ce = 3221 container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work); 3222 struct intel_guc *guc = ce_to_guc(ce); 3223 unsigned long flags; 3224 3225 spin_lock_irqsave(&ce->guc_state.lock, flags); 3226 3227 if (bypass_sched_disable(guc, ce)) { 3228 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3229 intel_context_sched_disable_unpin(ce); 3230 } else { 3231 do_sched_disable(guc, ce, flags); 3232 } 3233 } 3234 3235 static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce) 3236 { 3237 /* 3238 * parent contexts are perma-pinned, if we are unpinning do schedule 3239 * disable immediately. 3240 */ 3241 if (intel_context_is_parent(ce)) 3242 return true; 3243 3244 /* 3245 * If we are beyond the threshold for avail guc_ids, do schedule disable immediately. 3246 */ 3247 return guc->submission_state.guc_ids_in_use > 3248 guc->submission_state.sched_disable_gucid_threshold; 3249 } 3250 3251 static void guc_context_sched_disable(struct intel_context *ce) 3252 { 3253 struct intel_guc *guc = ce_to_guc(ce); 3254 u64 delay = guc->submission_state.sched_disable_delay_ms; 3255 unsigned long flags; 3256 3257 spin_lock_irqsave(&ce->guc_state.lock, flags); 3258 3259 if (bypass_sched_disable(guc, ce)) { 3260 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3261 intel_context_sched_disable_unpin(ce); 3262 } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) && 3263 delay) { 3264 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3265 mod_delayed_work(system_unbound_wq, 3266 &ce->guc_state.sched_disable_delay_work, 3267 msecs_to_jiffies(delay)); 3268 } else { 3269 do_sched_disable(guc, ce, flags); 3270 } 3271 } 3272 3273 static void guc_context_close(struct intel_context *ce) 3274 { 3275 unsigned long flags; 3276 3277 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 3278 cancel_delayed_work(&ce->guc_state.sched_disable_delay_work)) 3279 __delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work); 3280 3281 spin_lock_irqsave(&ce->guc_state.lock, flags); 3282 set_context_close_done(ce); 3283 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3284 } 3285 3286 static inline void guc_lrc_desc_unpin(struct intel_context *ce) 3287 { 3288 struct intel_guc *guc = ce_to_guc(ce); 3289 struct intel_gt *gt = guc_to_gt(guc); 3290 unsigned long flags; 3291 bool disabled; 3292 3293 GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); 3294 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); 3295 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); 3296 GEM_BUG_ON(context_enabled(ce)); 3297 3298 /* Seal race with Reset */ 3299 spin_lock_irqsave(&ce->guc_state.lock, flags); 3300 disabled = submission_disabled(guc); 3301 if (likely(!disabled)) { 3302 __intel_gt_pm_get(gt); 3303 set_context_destroyed(ce); 3304 clr_context_registered(ce); 3305 } 3306 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3307 if (unlikely(disabled)) { 3308 release_guc_id(guc, ce); 3309 __guc_context_destroy(ce); 3310 return; 3311 } 3312 3313 deregister_context(ce, ce->guc_id.id); 3314 } 3315 3316 static void __guc_context_destroy(struct intel_context *ce) 3317 { 3318 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || 3319 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || 3320 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || 3321 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); 3322 3323 lrc_fini(ce); 3324 intel_context_fini(ce); 3325 3326 if (intel_engine_is_virtual(ce->engine)) { 3327 struct guc_virtual_engine *ve = 3328 container_of(ce, typeof(*ve), context); 3329 3330 if (ve->base.breadcrumbs) 3331 intel_breadcrumbs_put(ve->base.breadcrumbs); 3332 3333 kfree(ve); 3334 } else { 3335 intel_context_free(ce); 3336 } 3337 } 3338 3339 static void guc_flush_destroyed_contexts(struct intel_guc *guc) 3340 { 3341 struct intel_context *ce; 3342 unsigned long flags; 3343 3344 GEM_BUG_ON(!submission_disabled(guc) && 3345 guc_submission_initialized(guc)); 3346 3347 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3348 spin_lock_irqsave(&guc->submission_state.lock, flags); 3349 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3350 struct intel_context, 3351 destroyed_link); 3352 if (ce) 3353 list_del_init(&ce->destroyed_link); 3354 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3355 3356 if (!ce) 3357 break; 3358 3359 release_guc_id(guc, ce); 3360 __guc_context_destroy(ce); 3361 } 3362 } 3363 3364 static void deregister_destroyed_contexts(struct intel_guc *guc) 3365 { 3366 struct intel_context *ce; 3367 unsigned long flags; 3368 3369 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3370 spin_lock_irqsave(&guc->submission_state.lock, flags); 3371 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3372 struct intel_context, 3373 destroyed_link); 3374 if (ce) 3375 list_del_init(&ce->destroyed_link); 3376 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3377 3378 if (!ce) 3379 break; 3380 3381 guc_lrc_desc_unpin(ce); 3382 } 3383 } 3384 3385 static void destroyed_worker_func(struct work_struct *w) 3386 { 3387 struct intel_guc *guc = container_of(w, struct intel_guc, 3388 submission_state.destroyed_worker); 3389 struct intel_gt *gt = guc_to_gt(guc); 3390 intel_wakeref_t wakeref; 3391 3392 with_intel_gt_pm(gt, wakeref) 3393 deregister_destroyed_contexts(guc); 3394 } 3395 3396 static void guc_context_destroy(struct kref *kref) 3397 { 3398 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3399 struct intel_guc *guc = ce_to_guc(ce); 3400 unsigned long flags; 3401 bool destroy; 3402 3403 /* 3404 * If the guc_id is invalid this context has been stolen and we can free 3405 * it immediately. Also can be freed immediately if the context is not 3406 * registered with the GuC or the GuC is in the middle of a reset. 3407 */ 3408 spin_lock_irqsave(&guc->submission_state.lock, flags); 3409 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || 3410 !ctx_id_mapped(guc, ce->guc_id.id); 3411 if (likely(!destroy)) { 3412 if (!list_empty(&ce->guc_id.link)) 3413 list_del_init(&ce->guc_id.link); 3414 list_add_tail(&ce->destroyed_link, 3415 &guc->submission_state.destroyed_contexts); 3416 } else { 3417 __release_guc_id(guc, ce); 3418 } 3419 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3420 if (unlikely(destroy)) { 3421 __guc_context_destroy(ce); 3422 return; 3423 } 3424 3425 /* 3426 * We use a worker to issue the H2G to deregister the context as we can 3427 * take the GT PM for the first time which isn't allowed from an atomic 3428 * context. 3429 */ 3430 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); 3431 } 3432 3433 static int guc_context_alloc(struct intel_context *ce) 3434 { 3435 return lrc_alloc(ce, ce->engine); 3436 } 3437 3438 static void __guc_context_set_prio(struct intel_guc *guc, 3439 struct intel_context *ce) 3440 { 3441 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { 3442 struct context_policy policy; 3443 3444 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 3445 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 3446 __guc_context_set_context_policies(guc, &policy, true); 3447 } else { 3448 u32 action[] = { 3449 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY, 3450 ce->guc_id.id, 3451 ce->guc_state.prio, 3452 }; 3453 3454 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3455 } 3456 } 3457 3458 static void guc_context_set_prio(struct intel_guc *guc, 3459 struct intel_context *ce, 3460 u8 prio) 3461 { 3462 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || 3463 prio > GUC_CLIENT_PRIORITY_NORMAL); 3464 lockdep_assert_held(&ce->guc_state.lock); 3465 3466 if (ce->guc_state.prio == prio || submission_disabled(guc) || 3467 !context_registered(ce)) { 3468 ce->guc_state.prio = prio; 3469 return; 3470 } 3471 3472 ce->guc_state.prio = prio; 3473 __guc_context_set_prio(guc, ce); 3474 3475 trace_intel_context_set_prio(ce); 3476 } 3477 3478 static inline u8 map_i915_prio_to_guc_prio(int prio) 3479 { 3480 if (prio == I915_PRIORITY_NORMAL) 3481 return GUC_CLIENT_PRIORITY_KMD_NORMAL; 3482 else if (prio < I915_PRIORITY_NORMAL) 3483 return GUC_CLIENT_PRIORITY_NORMAL; 3484 else if (prio < I915_PRIORITY_DISPLAY) 3485 return GUC_CLIENT_PRIORITY_HIGH; 3486 else 3487 return GUC_CLIENT_PRIORITY_KMD_HIGH; 3488 } 3489 3490 static inline void add_context_inflight_prio(struct intel_context *ce, 3491 u8 guc_prio) 3492 { 3493 lockdep_assert_held(&ce->guc_state.lock); 3494 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3495 3496 ++ce->guc_state.prio_count[guc_prio]; 3497 3498 /* Overflow protection */ 3499 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3500 } 3501 3502 static inline void sub_context_inflight_prio(struct intel_context *ce, 3503 u8 guc_prio) 3504 { 3505 lockdep_assert_held(&ce->guc_state.lock); 3506 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3507 3508 /* Underflow protection */ 3509 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3510 3511 --ce->guc_state.prio_count[guc_prio]; 3512 } 3513 3514 static inline void update_context_prio(struct intel_context *ce) 3515 { 3516 struct intel_guc *guc = &ce->engine->gt->uc.guc; 3517 int i; 3518 3519 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); 3520 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); 3521 3522 lockdep_assert_held(&ce->guc_state.lock); 3523 3524 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { 3525 if (ce->guc_state.prio_count[i]) { 3526 guc_context_set_prio(guc, ce, i); 3527 break; 3528 } 3529 } 3530 } 3531 3532 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) 3533 { 3534 /* Lower value is higher priority */ 3535 return new_guc_prio < old_guc_prio; 3536 } 3537 3538 static void add_to_context(struct i915_request *rq) 3539 { 3540 struct intel_context *ce = request_to_scheduling_context(rq); 3541 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); 3542 3543 GEM_BUG_ON(intel_context_is_child(ce)); 3544 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); 3545 3546 spin_lock(&ce->guc_state.lock); 3547 list_move_tail(&rq->sched.link, &ce->guc_state.requests); 3548 3549 if (rq->guc_prio == GUC_PRIO_INIT) { 3550 rq->guc_prio = new_guc_prio; 3551 add_context_inflight_prio(ce, rq->guc_prio); 3552 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { 3553 sub_context_inflight_prio(ce, rq->guc_prio); 3554 rq->guc_prio = new_guc_prio; 3555 add_context_inflight_prio(ce, rq->guc_prio); 3556 } 3557 update_context_prio(ce); 3558 3559 spin_unlock(&ce->guc_state.lock); 3560 } 3561 3562 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) 3563 { 3564 lockdep_assert_held(&ce->guc_state.lock); 3565 3566 if (rq->guc_prio != GUC_PRIO_INIT && 3567 rq->guc_prio != GUC_PRIO_FINI) { 3568 sub_context_inflight_prio(ce, rq->guc_prio); 3569 update_context_prio(ce); 3570 } 3571 rq->guc_prio = GUC_PRIO_FINI; 3572 } 3573 3574 static void remove_from_context(struct i915_request *rq) 3575 { 3576 struct intel_context *ce = request_to_scheduling_context(rq); 3577 3578 GEM_BUG_ON(intel_context_is_child(ce)); 3579 3580 spin_lock_irq(&ce->guc_state.lock); 3581 3582 list_del_init(&rq->sched.link); 3583 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 3584 3585 /* Prevent further __await_execution() registering a cb, then flush */ 3586 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 3587 3588 guc_prio_fini(rq, ce); 3589 3590 spin_unlock_irq(&ce->guc_state.lock); 3591 3592 atomic_dec(&ce->guc_id.ref); 3593 i915_request_notify_execute_cb_imm(rq); 3594 } 3595 3596 static const struct intel_context_ops guc_context_ops = { 3597 .flags = COPS_RUNTIME_CYCLES, 3598 .alloc = guc_context_alloc, 3599 3600 .close = guc_context_close, 3601 3602 .pre_pin = guc_context_pre_pin, 3603 .pin = guc_context_pin, 3604 .unpin = guc_context_unpin, 3605 .post_unpin = guc_context_post_unpin, 3606 3607 .revoke = guc_context_revoke, 3608 3609 .cancel_request = guc_context_cancel_request, 3610 3611 .enter = intel_context_enter_engine, 3612 .exit = intel_context_exit_engine, 3613 3614 .sched_disable = guc_context_sched_disable, 3615 3616 .update_stats = guc_context_update_stats, 3617 3618 .reset = lrc_reset, 3619 .destroy = guc_context_destroy, 3620 3621 .create_virtual = guc_create_virtual, 3622 .create_parallel = guc_create_parallel, 3623 }; 3624 3625 static void submit_work_cb(struct irq_work *wrk) 3626 { 3627 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); 3628 3629 might_lock(&rq->engine->sched_engine->lock); 3630 i915_sw_fence_complete(&rq->submit); 3631 } 3632 3633 static void __guc_signal_context_fence(struct intel_context *ce) 3634 { 3635 struct i915_request *rq, *rn; 3636 3637 lockdep_assert_held(&ce->guc_state.lock); 3638 3639 if (!list_empty(&ce->guc_state.fences)) 3640 trace_intel_context_fence_release(ce); 3641 3642 /* 3643 * Use an IRQ to ensure locking order of sched_engine->lock -> 3644 * ce->guc_state.lock is preserved. 3645 */ 3646 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, 3647 guc_fence_link) { 3648 list_del(&rq->guc_fence_link); 3649 irq_work_queue(&rq->submit_work); 3650 } 3651 3652 INIT_LIST_HEAD(&ce->guc_state.fences); 3653 } 3654 3655 static void guc_signal_context_fence(struct intel_context *ce) 3656 { 3657 unsigned long flags; 3658 3659 GEM_BUG_ON(intel_context_is_child(ce)); 3660 3661 spin_lock_irqsave(&ce->guc_state.lock, flags); 3662 clr_context_wait_for_deregister_to_register(ce); 3663 __guc_signal_context_fence(ce); 3664 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3665 } 3666 3667 static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 3668 { 3669 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 3670 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) && 3671 !submission_disabled(ce_to_guc(ce)); 3672 } 3673 3674 static void guc_context_init(struct intel_context *ce) 3675 { 3676 const struct i915_gem_context *ctx; 3677 int prio = I915_CONTEXT_DEFAULT_PRIORITY; 3678 3679 rcu_read_lock(); 3680 ctx = rcu_dereference(ce->gem_context); 3681 if (ctx) 3682 prio = ctx->sched.priority; 3683 rcu_read_unlock(); 3684 3685 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); 3686 3687 INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work, 3688 __delay_sched_disable); 3689 3690 set_bit(CONTEXT_GUC_INIT, &ce->flags); 3691 } 3692 3693 static int guc_request_alloc(struct i915_request *rq) 3694 { 3695 struct intel_context *ce = request_to_scheduling_context(rq); 3696 struct intel_guc *guc = ce_to_guc(ce); 3697 unsigned long flags; 3698 int ret; 3699 3700 GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 3701 3702 /* 3703 * Flush enough space to reduce the likelihood of waiting after 3704 * we start building the request - in which case we will just 3705 * have to repeat work. 3706 */ 3707 rq->reserved_space += GUC_REQUEST_SIZE; 3708 3709 /* 3710 * Note that after this point, we have committed to using 3711 * this request as it is being used to both track the 3712 * state of engine initialisation and liveness of the 3713 * golden renderstate above. Think twice before you try 3714 * to cancel/unwind this request now. 3715 */ 3716 3717 /* Unconditionally invalidate GPU caches and TLBs. */ 3718 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 3719 if (ret) 3720 return ret; 3721 3722 rq->reserved_space -= GUC_REQUEST_SIZE; 3723 3724 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) 3725 guc_context_init(ce); 3726 3727 /* 3728 * If the context gets closed while the execbuf is ongoing, the context 3729 * close code will race with the below code to cancel the delayed work. 3730 * If the context close wins the race and cancels the work, it will 3731 * immediately call the sched disable (see guc_context_close), so there 3732 * is a chance we can get past this check while the sched_disable code 3733 * is being executed. To make sure that code completes before we check 3734 * the status further down, we wait for the close process to complete. 3735 * Else, this code path could send a request down thinking that the 3736 * context is still in a schedule-enable mode while the GuC ends up 3737 * dropping the request completely because the disable did go from the 3738 * context_close path right to GuC just prior. In the event the CT is 3739 * full, we could potentially need to wait up to 1.5 seconds. 3740 */ 3741 if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work)) 3742 intel_context_sched_disable_unpin(ce); 3743 else if (intel_context_is_closed(ce)) 3744 if (wait_for(context_close_done(ce), 1500)) 3745 guc_warn(guc, "timed out waiting on context sched close before realloc\n"); 3746 /* 3747 * Call pin_guc_id here rather than in the pinning step as with 3748 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 3749 * guc_id and creating horrible race conditions. This is especially bad 3750 * when guc_id are being stolen due to over subscription. By the time 3751 * this function is reached, it is guaranteed that the guc_id will be 3752 * persistent until the generated request is retired. Thus, sealing these 3753 * race conditions. It is still safe to fail here if guc_id are 3754 * exhausted and return -EAGAIN to the user indicating that they can try 3755 * again in the future. 3756 * 3757 * There is no need for a lock here as the timeline mutex ensures at 3758 * most one context can be executing this code path at once. The 3759 * guc_id_ref is incremented once for every request in flight and 3760 * decremented on each retire. When it is zero, a lock around the 3761 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 3762 */ 3763 if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) 3764 goto out; 3765 3766 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 3767 if (unlikely(ret < 0)) 3768 return ret; 3769 if (context_needs_register(ce, !!ret)) { 3770 ret = try_context_registration(ce, true); 3771 if (unlikely(ret)) { /* unwind */ 3772 if (ret == -EPIPE) { 3773 disable_submission(guc); 3774 goto out; /* GPU will be reset */ 3775 } 3776 atomic_dec(&ce->guc_id.ref); 3777 unpin_guc_id(guc, ce); 3778 return ret; 3779 } 3780 } 3781 3782 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 3783 3784 out: 3785 /* 3786 * We block all requests on this context if a G2H is pending for a 3787 * schedule disable or context deregistration as the GuC will fail a 3788 * schedule enable or context registration if either G2H is pending 3789 * respectfully. Once a G2H returns, the fence is released that is 3790 * blocking these requests (see guc_signal_context_fence). 3791 */ 3792 spin_lock_irqsave(&ce->guc_state.lock, flags); 3793 if (context_wait_for_deregister_to_register(ce) || 3794 context_pending_disable(ce)) { 3795 init_irq_work(&rq->submit_work, submit_work_cb); 3796 i915_sw_fence_await(&rq->submit); 3797 3798 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); 3799 } 3800 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3801 3802 return 0; 3803 } 3804 3805 static int guc_virtual_context_pre_pin(struct intel_context *ce, 3806 struct i915_gem_ww_ctx *ww, 3807 void **vaddr) 3808 { 3809 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3810 3811 return __guc_context_pre_pin(ce, engine, ww, vaddr); 3812 } 3813 3814 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) 3815 { 3816 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3817 int ret = __guc_context_pin(ce, engine, vaddr); 3818 intel_engine_mask_t tmp, mask = ce->engine->mask; 3819 3820 if (likely(!ret)) 3821 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3822 intel_engine_pm_get(engine); 3823 3824 return ret; 3825 } 3826 3827 static void guc_virtual_context_unpin(struct intel_context *ce) 3828 { 3829 intel_engine_mask_t tmp, mask = ce->engine->mask; 3830 struct intel_engine_cs *engine; 3831 struct intel_guc *guc = ce_to_guc(ce); 3832 3833 GEM_BUG_ON(context_enabled(ce)); 3834 GEM_BUG_ON(intel_context_is_barrier(ce)); 3835 3836 unpin_guc_id(guc, ce); 3837 lrc_unpin(ce); 3838 3839 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3840 intel_engine_pm_put_async(engine); 3841 } 3842 3843 static void guc_virtual_context_enter(struct intel_context *ce) 3844 { 3845 intel_engine_mask_t tmp, mask = ce->engine->mask; 3846 struct intel_engine_cs *engine; 3847 3848 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3849 intel_engine_pm_get(engine); 3850 3851 intel_timeline_enter(ce->timeline); 3852 } 3853 3854 static void guc_virtual_context_exit(struct intel_context *ce) 3855 { 3856 intel_engine_mask_t tmp, mask = ce->engine->mask; 3857 struct intel_engine_cs *engine; 3858 3859 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3860 intel_engine_pm_put(engine); 3861 3862 intel_timeline_exit(ce->timeline); 3863 } 3864 3865 static int guc_virtual_context_alloc(struct intel_context *ce) 3866 { 3867 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3868 3869 return lrc_alloc(ce, engine); 3870 } 3871 3872 static const struct intel_context_ops virtual_guc_context_ops = { 3873 .flags = COPS_RUNTIME_CYCLES, 3874 .alloc = guc_virtual_context_alloc, 3875 3876 .close = guc_context_close, 3877 3878 .pre_pin = guc_virtual_context_pre_pin, 3879 .pin = guc_virtual_context_pin, 3880 .unpin = guc_virtual_context_unpin, 3881 .post_unpin = guc_context_post_unpin, 3882 3883 .revoke = guc_context_revoke, 3884 3885 .cancel_request = guc_context_cancel_request, 3886 3887 .enter = guc_virtual_context_enter, 3888 .exit = guc_virtual_context_exit, 3889 3890 .sched_disable = guc_context_sched_disable, 3891 .update_stats = guc_context_update_stats, 3892 3893 .destroy = guc_context_destroy, 3894 3895 .get_sibling = guc_virtual_get_sibling, 3896 }; 3897 3898 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 3899 { 3900 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3901 struct intel_guc *guc = ce_to_guc(ce); 3902 int ret; 3903 3904 GEM_BUG_ON(!intel_context_is_parent(ce)); 3905 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3906 3907 ret = pin_guc_id(guc, ce); 3908 if (unlikely(ret < 0)) 3909 return ret; 3910 3911 return __guc_context_pin(ce, engine, vaddr); 3912 } 3913 3914 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 3915 { 3916 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3917 3918 GEM_BUG_ON(!intel_context_is_child(ce)); 3919 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3920 3921 __intel_context_pin(ce->parallel.parent); 3922 return __guc_context_pin(ce, engine, vaddr); 3923 } 3924 3925 static void guc_parent_context_unpin(struct intel_context *ce) 3926 { 3927 struct intel_guc *guc = ce_to_guc(ce); 3928 3929 GEM_BUG_ON(context_enabled(ce)); 3930 GEM_BUG_ON(intel_context_is_barrier(ce)); 3931 GEM_BUG_ON(!intel_context_is_parent(ce)); 3932 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3933 3934 unpin_guc_id(guc, ce); 3935 lrc_unpin(ce); 3936 } 3937 3938 static void guc_child_context_unpin(struct intel_context *ce) 3939 { 3940 GEM_BUG_ON(context_enabled(ce)); 3941 GEM_BUG_ON(intel_context_is_barrier(ce)); 3942 GEM_BUG_ON(!intel_context_is_child(ce)); 3943 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3944 3945 lrc_unpin(ce); 3946 } 3947 3948 static void guc_child_context_post_unpin(struct intel_context *ce) 3949 { 3950 GEM_BUG_ON(!intel_context_is_child(ce)); 3951 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); 3952 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3953 3954 lrc_post_unpin(ce); 3955 intel_context_unpin(ce->parallel.parent); 3956 } 3957 3958 static void guc_child_context_destroy(struct kref *kref) 3959 { 3960 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3961 3962 __guc_context_destroy(ce); 3963 } 3964 3965 static const struct intel_context_ops virtual_parent_context_ops = { 3966 .alloc = guc_virtual_context_alloc, 3967 3968 .close = guc_context_close, 3969 3970 .pre_pin = guc_context_pre_pin, 3971 .pin = guc_parent_context_pin, 3972 .unpin = guc_parent_context_unpin, 3973 .post_unpin = guc_context_post_unpin, 3974 3975 .revoke = guc_context_revoke, 3976 3977 .cancel_request = guc_context_cancel_request, 3978 3979 .enter = guc_virtual_context_enter, 3980 .exit = guc_virtual_context_exit, 3981 3982 .sched_disable = guc_context_sched_disable, 3983 3984 .destroy = guc_context_destroy, 3985 3986 .get_sibling = guc_virtual_get_sibling, 3987 }; 3988 3989 static const struct intel_context_ops virtual_child_context_ops = { 3990 .alloc = guc_virtual_context_alloc, 3991 3992 .pre_pin = guc_context_pre_pin, 3993 .pin = guc_child_context_pin, 3994 .unpin = guc_child_context_unpin, 3995 .post_unpin = guc_child_context_post_unpin, 3996 3997 .cancel_request = guc_context_cancel_request, 3998 3999 .enter = guc_virtual_context_enter, 4000 .exit = guc_virtual_context_exit, 4001 4002 .destroy = guc_child_context_destroy, 4003 4004 .get_sibling = guc_virtual_get_sibling, 4005 }; 4006 4007 /* 4008 * The below override of the breadcrumbs is enabled when the user configures a 4009 * context for parallel submission (multi-lrc, parent-child). 4010 * 4011 * The overridden breadcrumbs implements an algorithm which allows the GuC to 4012 * safely preempt all the hw contexts configured for parallel submission 4013 * between each BB. The contract between the i915 and GuC is if the parent 4014 * context can be preempted, all the children can be preempted, and the GuC will 4015 * always try to preempt the parent before the children. A handshake between the 4016 * parent / children breadcrumbs ensures the i915 holds up its end of the deal 4017 * creating a window to preempt between each set of BBs. 4018 */ 4019 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 4020 u64 offset, u32 len, 4021 const unsigned int flags); 4022 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 4023 u64 offset, u32 len, 4024 const unsigned int flags); 4025 static u32 * 4026 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4027 u32 *cs); 4028 static u32 * 4029 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 4030 u32 *cs); 4031 4032 static struct intel_context * 4033 guc_create_parallel(struct intel_engine_cs **engines, 4034 unsigned int num_siblings, 4035 unsigned int width) 4036 { 4037 struct intel_engine_cs **siblings = NULL; 4038 struct intel_context *parent = NULL, *ce, *err; 4039 int i, j; 4040 4041 siblings = kmalloc_array(num_siblings, 4042 sizeof(*siblings), 4043 GFP_KERNEL); 4044 if (!siblings) 4045 return ERR_PTR(-ENOMEM); 4046 4047 for (i = 0; i < width; ++i) { 4048 for (j = 0; j < num_siblings; ++j) 4049 siblings[j] = engines[i * num_siblings + j]; 4050 4051 ce = intel_engine_create_virtual(siblings, num_siblings, 4052 FORCE_VIRTUAL); 4053 if (IS_ERR(ce)) { 4054 err = ERR_CAST(ce); 4055 goto unwind; 4056 } 4057 4058 if (i == 0) { 4059 parent = ce; 4060 parent->ops = &virtual_parent_context_ops; 4061 } else { 4062 ce->ops = &virtual_child_context_ops; 4063 intel_context_bind_parent_child(parent, ce); 4064 } 4065 } 4066 4067 parent->parallel.fence_context = dma_fence_context_alloc(1); 4068 4069 parent->engine->emit_bb_start = 4070 emit_bb_start_parent_no_preempt_mid_batch; 4071 parent->engine->emit_fini_breadcrumb = 4072 emit_fini_breadcrumb_parent_no_preempt_mid_batch; 4073 parent->engine->emit_fini_breadcrumb_dw = 4074 12 + 4 * parent->parallel.number_children; 4075 for_each_child(parent, ce) { 4076 ce->engine->emit_bb_start = 4077 emit_bb_start_child_no_preempt_mid_batch; 4078 ce->engine->emit_fini_breadcrumb = 4079 emit_fini_breadcrumb_child_no_preempt_mid_batch; 4080 ce->engine->emit_fini_breadcrumb_dw = 16; 4081 } 4082 4083 kfree(siblings); 4084 return parent; 4085 4086 unwind: 4087 if (parent) 4088 intel_context_put(parent); 4089 kfree(siblings); 4090 return err; 4091 } 4092 4093 static bool 4094 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) 4095 { 4096 struct intel_engine_cs *sibling; 4097 intel_engine_mask_t tmp, mask = b->engine_mask; 4098 bool result = false; 4099 4100 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 4101 result |= intel_engine_irq_enable(sibling); 4102 4103 return result; 4104 } 4105 4106 static void 4107 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) 4108 { 4109 struct intel_engine_cs *sibling; 4110 intel_engine_mask_t tmp, mask = b->engine_mask; 4111 4112 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 4113 intel_engine_irq_disable(sibling); 4114 } 4115 4116 static void guc_init_breadcrumbs(struct intel_engine_cs *engine) 4117 { 4118 int i; 4119 4120 /* 4121 * In GuC submission mode we do not know which physical engine a request 4122 * will be scheduled on, this creates a problem because the breadcrumb 4123 * interrupt is per physical engine. To work around this we attach 4124 * requests and direct all breadcrumb interrupts to the first instance 4125 * of an engine per class. In addition all breadcrumb interrupts are 4126 * enabled / disabled across an engine class in unison. 4127 */ 4128 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { 4129 struct intel_engine_cs *sibling = 4130 engine->gt->engine_class[engine->class][i]; 4131 4132 if (sibling) { 4133 if (engine->breadcrumbs != sibling->breadcrumbs) { 4134 intel_breadcrumbs_put(engine->breadcrumbs); 4135 engine->breadcrumbs = 4136 intel_breadcrumbs_get(sibling->breadcrumbs); 4137 } 4138 break; 4139 } 4140 } 4141 4142 if (engine->breadcrumbs) { 4143 engine->breadcrumbs->engine_mask |= engine->mask; 4144 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; 4145 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; 4146 } 4147 } 4148 4149 static void guc_bump_inflight_request_prio(struct i915_request *rq, 4150 int prio) 4151 { 4152 struct intel_context *ce = request_to_scheduling_context(rq); 4153 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); 4154 4155 /* Short circuit function */ 4156 if (prio < I915_PRIORITY_NORMAL || 4157 rq->guc_prio == GUC_PRIO_FINI || 4158 (rq->guc_prio != GUC_PRIO_INIT && 4159 !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) 4160 return; 4161 4162 spin_lock(&ce->guc_state.lock); 4163 if (rq->guc_prio != GUC_PRIO_FINI) { 4164 if (rq->guc_prio != GUC_PRIO_INIT) 4165 sub_context_inflight_prio(ce, rq->guc_prio); 4166 rq->guc_prio = new_guc_prio; 4167 add_context_inflight_prio(ce, rq->guc_prio); 4168 update_context_prio(ce); 4169 } 4170 spin_unlock(&ce->guc_state.lock); 4171 } 4172 4173 static void guc_retire_inflight_request_prio(struct i915_request *rq) 4174 { 4175 struct intel_context *ce = request_to_scheduling_context(rq); 4176 4177 spin_lock(&ce->guc_state.lock); 4178 guc_prio_fini(rq, ce); 4179 spin_unlock(&ce->guc_state.lock); 4180 } 4181 4182 static void sanitize_hwsp(struct intel_engine_cs *engine) 4183 { 4184 struct intel_timeline *tl; 4185 4186 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 4187 intel_timeline_reset_seqno(tl); 4188 } 4189 4190 static void guc_sanitize(struct intel_engine_cs *engine) 4191 { 4192 /* 4193 * Poison residual state on resume, in case the suspend didn't! 4194 * 4195 * We have to assume that across suspend/resume (or other loss 4196 * of control) that the contents of our pinned buffers has been 4197 * lost, replaced by garbage. Since this doesn't always happen, 4198 * let's poison such state so that we more quickly spot when 4199 * we falsely assume it has been preserved. 4200 */ 4201 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 4202 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 4203 4204 /* 4205 * The kernel_context HWSP is stored in the status_page. As above, 4206 * that may be lost on resume/initialisation, and so we need to 4207 * reset the value in the HWSP. 4208 */ 4209 sanitize_hwsp(engine); 4210 4211 /* And scrub the dirty cachelines for the HWSP */ 4212 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); 4213 4214 intel_engine_reset_pinned_contexts(engine); 4215 } 4216 4217 static void setup_hwsp(struct intel_engine_cs *engine) 4218 { 4219 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 4220 4221 ENGINE_WRITE_FW(engine, 4222 RING_HWS_PGA, 4223 i915_ggtt_offset(engine->status_page.vma)); 4224 } 4225 4226 static void start_engine(struct intel_engine_cs *engine) 4227 { 4228 ENGINE_WRITE_FW(engine, 4229 RING_MODE_GEN7, 4230 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 4231 4232 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 4233 ENGINE_POSTING_READ(engine, RING_MI_MODE); 4234 } 4235 4236 static int guc_resume(struct intel_engine_cs *engine) 4237 { 4238 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 4239 4240 intel_mocs_init_engine(engine); 4241 4242 intel_breadcrumbs_reset(engine->breadcrumbs); 4243 4244 setup_hwsp(engine); 4245 start_engine(engine); 4246 4247 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) 4248 xehp_enable_ccs_engines(engine); 4249 4250 return 0; 4251 } 4252 4253 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) 4254 { 4255 return !sched_engine->tasklet.callback; 4256 } 4257 4258 static void guc_set_default_submission(struct intel_engine_cs *engine) 4259 { 4260 engine->submit_request = guc_submit_request; 4261 } 4262 4263 static inline int guc_kernel_context_pin(struct intel_guc *guc, 4264 struct intel_context *ce) 4265 { 4266 int ret; 4267 4268 /* 4269 * Note: we purposefully do not check the returns below because 4270 * the registration can only fail if a reset is just starting. 4271 * This is called at the end of reset so presumably another reset 4272 * isn't happening and even it did this code would be run again. 4273 */ 4274 4275 if (context_guc_id_invalid(ce)) { 4276 ret = pin_guc_id(guc, ce); 4277 4278 if (ret < 0) 4279 return ret; 4280 } 4281 4282 if (!test_bit(CONTEXT_GUC_INIT, &ce->flags)) 4283 guc_context_init(ce); 4284 4285 ret = try_context_registration(ce, true); 4286 if (ret) 4287 unpin_guc_id(guc, ce); 4288 4289 return ret; 4290 } 4291 4292 static inline int guc_init_submission(struct intel_guc *guc) 4293 { 4294 struct intel_gt *gt = guc_to_gt(guc); 4295 struct intel_engine_cs *engine; 4296 enum intel_engine_id id; 4297 4298 /* make sure all descriptors are clean... */ 4299 xa_destroy(&guc->context_lookup); 4300 4301 /* 4302 * A reset might have occurred while we had a pending stalled request, 4303 * so make sure we clean that up. 4304 */ 4305 guc->stalled_request = NULL; 4306 guc->submission_stall_reason = STALL_NONE; 4307 4308 /* 4309 * Some contexts might have been pinned before we enabled GuC 4310 * submission, so we need to add them to the GuC bookeeping. 4311 * Also, after a reset the of the GuC we want to make sure that the 4312 * information shared with GuC is properly reset. The kernel LRCs are 4313 * not attached to the gem_context, so they need to be added separately. 4314 */ 4315 for_each_engine(engine, gt, id) { 4316 struct intel_context *ce; 4317 4318 list_for_each_entry(ce, &engine->pinned_contexts_list, 4319 pinned_contexts_link) { 4320 int ret = guc_kernel_context_pin(guc, ce); 4321 4322 if (ret) { 4323 /* No point in trying to clean up as i915 will wedge on failure */ 4324 return ret; 4325 } 4326 } 4327 } 4328 4329 return 0; 4330 } 4331 4332 static void guc_release(struct intel_engine_cs *engine) 4333 { 4334 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 4335 4336 intel_engine_cleanup_common(engine); 4337 lrc_fini_wa_ctx(engine); 4338 } 4339 4340 static void virtual_guc_bump_serial(struct intel_engine_cs *engine) 4341 { 4342 struct intel_engine_cs *e; 4343 intel_engine_mask_t tmp, mask = engine->mask; 4344 4345 for_each_engine_masked(e, engine->gt, mask, tmp) 4346 e->serial++; 4347 } 4348 4349 static void guc_default_vfuncs(struct intel_engine_cs *engine) 4350 { 4351 /* Default vfuncs which can be overridden by each engine. */ 4352 4353 engine->resume = guc_resume; 4354 4355 engine->cops = &guc_context_ops; 4356 engine->request_alloc = guc_request_alloc; 4357 engine->add_active_request = add_to_context; 4358 engine->remove_active_request = remove_from_context; 4359 4360 engine->sched_engine->schedule = i915_schedule; 4361 4362 engine->reset.prepare = guc_engine_reset_prepare; 4363 engine->reset.rewind = guc_rewind_nop; 4364 engine->reset.cancel = guc_reset_nop; 4365 engine->reset.finish = guc_reset_nop; 4366 4367 engine->emit_flush = gen8_emit_flush_xcs; 4368 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 4369 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 4370 if (GRAPHICS_VER(engine->i915) >= 12) { 4371 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 4372 engine->emit_flush = gen12_emit_flush_xcs; 4373 } 4374 engine->set_default_submission = guc_set_default_submission; 4375 engine->busyness = guc_engine_busyness; 4376 4377 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 4378 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 4379 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 4380 4381 /* Wa_14014475959:dg2 */ 4382 if (engine->class == COMPUTE_CLASS) 4383 if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || 4384 IS_DG2(engine->i915)) 4385 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; 4386 4387 /* 4388 * TODO: GuC supports timeslicing and semaphores as well, but they're 4389 * handled by the firmware so some minor tweaks are required before 4390 * enabling. 4391 * 4392 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 4393 */ 4394 4395 engine->emit_bb_start = gen8_emit_bb_start; 4396 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 4397 engine->emit_bb_start = xehp_emit_bb_start; 4398 } 4399 4400 static void rcs_submission_override(struct intel_engine_cs *engine) 4401 { 4402 switch (GRAPHICS_VER(engine->i915)) { 4403 case 12: 4404 engine->emit_flush = gen12_emit_flush_rcs; 4405 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 4406 break; 4407 case 11: 4408 engine->emit_flush = gen11_emit_flush_rcs; 4409 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 4410 break; 4411 default: 4412 engine->emit_flush = gen8_emit_flush_rcs; 4413 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 4414 break; 4415 } 4416 } 4417 4418 static inline void guc_default_irqs(struct intel_engine_cs *engine) 4419 { 4420 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; 4421 intel_engine_set_irq_handler(engine, cs_irq_handler); 4422 } 4423 4424 static void guc_sched_engine_destroy(struct kref *kref) 4425 { 4426 struct i915_sched_engine *sched_engine = 4427 container_of(kref, typeof(*sched_engine), ref); 4428 struct intel_guc *guc = sched_engine->private_data; 4429 4430 guc->sched_engine = NULL; 4431 tasklet_kill(&sched_engine->tasklet); /* flush the callback */ 4432 kfree(sched_engine); 4433 } 4434 4435 int intel_guc_submission_setup(struct intel_engine_cs *engine) 4436 { 4437 struct drm_i915_private *i915 = engine->i915; 4438 struct intel_guc *guc = &engine->gt->uc.guc; 4439 4440 /* 4441 * The setup relies on several assumptions (e.g. irqs always enabled) 4442 * that are only valid on gen11+ 4443 */ 4444 GEM_BUG_ON(GRAPHICS_VER(i915) < 11); 4445 4446 if (!guc->sched_engine) { 4447 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 4448 if (!guc->sched_engine) 4449 return -ENOMEM; 4450 4451 guc->sched_engine->schedule = i915_schedule; 4452 guc->sched_engine->disabled = guc_sched_engine_disabled; 4453 guc->sched_engine->private_data = guc; 4454 guc->sched_engine->destroy = guc_sched_engine_destroy; 4455 guc->sched_engine->bump_inflight_request_prio = 4456 guc_bump_inflight_request_prio; 4457 guc->sched_engine->retire_inflight_request_prio = 4458 guc_retire_inflight_request_prio; 4459 tasklet_setup(&guc->sched_engine->tasklet, 4460 guc_submission_tasklet); 4461 } 4462 i915_sched_engine_put(engine->sched_engine); 4463 engine->sched_engine = i915_sched_engine_get(guc->sched_engine); 4464 4465 guc_default_vfuncs(engine); 4466 guc_default_irqs(engine); 4467 guc_init_breadcrumbs(engine); 4468 4469 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) 4470 rcs_submission_override(engine); 4471 4472 lrc_init_wa_ctx(engine); 4473 4474 /* Finally, take ownership and responsibility for cleanup! */ 4475 engine->sanitize = guc_sanitize; 4476 engine->release = guc_release; 4477 4478 return 0; 4479 } 4480 4481 struct scheduling_policy { 4482 /* internal data */ 4483 u32 max_words, num_words; 4484 u32 count; 4485 /* API data */ 4486 struct guc_update_scheduling_policy h2g; 4487 }; 4488 4489 static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy) 4490 { 4491 u32 *start = (void *)&policy->h2g; 4492 u32 *end = policy->h2g.data + policy->num_words; 4493 size_t delta = end - start; 4494 4495 return delta; 4496 } 4497 4498 static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy) 4499 { 4500 policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 4501 policy->max_words = ARRAY_SIZE(policy->h2g.data); 4502 policy->num_words = 0; 4503 policy->count = 0; 4504 4505 return policy; 4506 } 4507 4508 static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy, 4509 u32 action, u32 *data, u32 len) 4510 { 4511 u32 *klv_ptr = policy->h2g.data + policy->num_words; 4512 4513 GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words); 4514 *(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) | 4515 FIELD_PREP(GUC_KLV_0_LEN, len); 4516 memcpy(klv_ptr, data, sizeof(u32) * len); 4517 policy->num_words += 1 + len; 4518 policy->count++; 4519 } 4520 4521 static int __guc_action_set_scheduling_policies(struct intel_guc *guc, 4522 struct scheduling_policy *policy) 4523 { 4524 int ret; 4525 4526 ret = intel_guc_send(guc, (u32 *)&policy->h2g, 4527 __guc_scheduling_policy_action_size(policy)); 4528 if (ret < 0) { 4529 guc_probe_error(guc, "Failed to configure global scheduling policies: %pe!\n", 4530 ERR_PTR(ret)); 4531 return ret; 4532 } 4533 4534 if (ret != policy->count) { 4535 guc_warn(guc, "global scheduler policy processed %d of %d KLVs!", 4536 ret, policy->count); 4537 if (ret > policy->count) 4538 return -EPROTO; 4539 } 4540 4541 return 0; 4542 } 4543 4544 static int guc_init_global_schedule_policy(struct intel_guc *guc) 4545 { 4546 struct scheduling_policy policy; 4547 struct intel_gt *gt = guc_to_gt(guc); 4548 intel_wakeref_t wakeref; 4549 int ret; 4550 4551 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 4552 return 0; 4553 4554 __guc_scheduling_policy_start_klv(&policy); 4555 4556 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 4557 u32 yield[] = { 4558 GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION, 4559 GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO, 4560 }; 4561 4562 __guc_scheduling_policy_add_klv(&policy, 4563 GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD, 4564 yield, ARRAY_SIZE(yield)); 4565 4566 ret = __guc_action_set_scheduling_policies(guc, &policy); 4567 } 4568 4569 return ret; 4570 } 4571 4572 static void guc_route_semaphores(struct intel_guc *guc, bool to_guc) 4573 { 4574 struct intel_gt *gt = guc_to_gt(guc); 4575 u32 val; 4576 4577 if (GRAPHICS_VER(gt->i915) < 12) 4578 return; 4579 4580 if (to_guc) 4581 val = GUC_SEM_INTR_ROUTE_TO_GUC | GUC_SEM_INTR_ENABLE_ALL; 4582 else 4583 val = 0; 4584 4585 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, val); 4586 } 4587 4588 int intel_guc_submission_enable(struct intel_guc *guc) 4589 { 4590 int ret; 4591 4592 /* Semaphore interrupt enable and route to GuC */ 4593 guc_route_semaphores(guc, true); 4594 4595 ret = guc_init_submission(guc); 4596 if (ret) 4597 goto fail_sem; 4598 4599 ret = guc_init_engine_stats(guc); 4600 if (ret) 4601 goto fail_sem; 4602 4603 ret = guc_init_global_schedule_policy(guc); 4604 if (ret) 4605 goto fail_stats; 4606 4607 return 0; 4608 4609 fail_stats: 4610 guc_fini_engine_stats(guc); 4611 fail_sem: 4612 guc_route_semaphores(guc, false); 4613 return ret; 4614 } 4615 4616 /* Note: By the time we're here, GuC may have already been reset */ 4617 void intel_guc_submission_disable(struct intel_guc *guc) 4618 { 4619 guc_cancel_busyness_worker(guc); 4620 4621 /* Semaphore interrupt disable and route to host */ 4622 guc_route_semaphores(guc, false); 4623 } 4624 4625 static bool __guc_submission_supported(struct intel_guc *guc) 4626 { 4627 /* GuC submission is unavailable for pre-Gen11 */ 4628 return intel_guc_is_supported(guc) && 4629 GRAPHICS_VER(guc_to_i915(guc)) >= 11; 4630 } 4631 4632 static bool __guc_submission_selected(struct intel_guc *guc) 4633 { 4634 struct drm_i915_private *i915 = guc_to_i915(guc); 4635 4636 if (!intel_guc_submission_is_supported(guc)) 4637 return false; 4638 4639 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; 4640 } 4641 4642 int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc) 4643 { 4644 return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc); 4645 } 4646 4647 /* 4648 * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher 4649 * workloads are able to enjoy the latency reduction when delaying the schedule-disable 4650 * operation. This matches the 30fps game-render + encode (real world) workload this 4651 * knob was tested against. 4652 */ 4653 #define SCHED_DISABLE_DELAY_MS 34 4654 4655 /* 4656 * A threshold of 75% is a reasonable starting point considering that real world apps 4657 * generally don't get anywhere near this. 4658 */ 4659 #define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \ 4660 (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4) 4661 4662 void intel_guc_submission_init_early(struct intel_guc *guc) 4663 { 4664 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 4665 4666 spin_lock_init(&guc->submission_state.lock); 4667 INIT_LIST_HEAD(&guc->submission_state.guc_id_list); 4668 ida_init(&guc->submission_state.guc_ids); 4669 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); 4670 INIT_WORK(&guc->submission_state.destroyed_worker, 4671 destroyed_worker_func); 4672 INIT_WORK(&guc->submission_state.reset_fail_worker, 4673 reset_fail_worker_func); 4674 4675 spin_lock_init(&guc->timestamp.lock); 4676 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); 4677 4678 guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS; 4679 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; 4680 guc->submission_state.sched_disable_gucid_threshold = 4681 NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc); 4682 guc->submission_supported = __guc_submission_supported(guc); 4683 guc->submission_selected = __guc_submission_selected(guc); 4684 } 4685 4686 static inline struct intel_context * 4687 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) 4688 { 4689 struct intel_context *ce; 4690 4691 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) { 4692 guc_err(guc, "Invalid ctx_id %u\n", ctx_id); 4693 return NULL; 4694 } 4695 4696 ce = __get_context(guc, ctx_id); 4697 if (unlikely(!ce)) { 4698 guc_err(guc, "Context is NULL, ctx_id %u\n", ctx_id); 4699 return NULL; 4700 } 4701 4702 if (unlikely(intel_context_is_child(ce))) { 4703 guc_err(guc, "Context is child, ctx_id %u\n", ctx_id); 4704 return NULL; 4705 } 4706 4707 return ce; 4708 } 4709 4710 static void wait_wake_outstanding_tlb_g2h(struct intel_guc *guc, u32 seqno) 4711 { 4712 struct intel_guc_tlb_wait *wait; 4713 unsigned long flags; 4714 4715 xa_lock_irqsave(&guc->tlb_lookup, flags); 4716 wait = xa_load(&guc->tlb_lookup, seqno); 4717 4718 if (wait) 4719 wake_up(&wait->wq); 4720 else 4721 guc_dbg(guc, 4722 "Stale TLB invalidation response with seqno %d\n", seqno); 4723 4724 xa_unlock_irqrestore(&guc->tlb_lookup, flags); 4725 } 4726 4727 int intel_guc_tlb_invalidation_done(struct intel_guc *guc, 4728 const u32 *payload, u32 len) 4729 { 4730 if (len < 1) 4731 return -EPROTO; 4732 4733 wait_wake_outstanding_tlb_g2h(guc, payload[0]); 4734 return 0; 4735 } 4736 4737 static long must_wait_woken(struct wait_queue_entry *wq_entry, long timeout) 4738 { 4739 /* 4740 * This is equivalent to wait_woken() with the exception that 4741 * we do not wake up early if the kthread task has been completed. 4742 * As we are called from page reclaim in any task context, 4743 * we may be invoked from stopped kthreads, but we *must* 4744 * complete the wait from the HW. 4745 */ 4746 do { 4747 set_current_state(TASK_UNINTERRUPTIBLE); 4748 if (wq_entry->flags & WQ_FLAG_WOKEN) 4749 break; 4750 4751 timeout = schedule_timeout(timeout); 4752 } while (timeout); 4753 4754 /* See wait_woken() and woken_wake_function() */ 4755 __set_current_state(TASK_RUNNING); 4756 smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); 4757 4758 return timeout; 4759 } 4760 4761 static bool intel_gt_is_enabled(const struct intel_gt *gt) 4762 { 4763 /* Check if GT is wedged or suspended */ 4764 if (intel_gt_is_wedged(gt) || !intel_irqs_enabled(gt->i915)) 4765 return false; 4766 return true; 4767 } 4768 4769 static int guc_send_invalidate_tlb(struct intel_guc *guc, 4770 enum intel_guc_tlb_invalidation_type type) 4771 { 4772 struct intel_guc_tlb_wait _wq, *wq = &_wq; 4773 struct intel_gt *gt = guc_to_gt(guc); 4774 DEFINE_WAIT_FUNC(wait, woken_wake_function); 4775 int err; 4776 u32 seqno; 4777 u32 action[] = { 4778 INTEL_GUC_ACTION_TLB_INVALIDATION, 4779 0, 4780 REG_FIELD_PREP(INTEL_GUC_TLB_INVAL_TYPE_MASK, type) | 4781 REG_FIELD_PREP(INTEL_GUC_TLB_INVAL_MODE_MASK, 4782 INTEL_GUC_TLB_INVAL_MODE_HEAVY) | 4783 INTEL_GUC_TLB_INVAL_FLUSH_CACHE, 4784 }; 4785 u32 size = ARRAY_SIZE(action); 4786 4787 /* 4788 * Early guard against GT enablement. TLB invalidation should not be 4789 * attempted if the GT is disabled due to suspend/wedge. 4790 */ 4791 if (!intel_gt_is_enabled(gt)) 4792 return -EINVAL; 4793 4794 init_waitqueue_head(&_wq.wq); 4795 4796 if (xa_alloc_cyclic_irq(&guc->tlb_lookup, &seqno, wq, 4797 xa_limit_32b, &guc->next_seqno, 4798 GFP_ATOMIC | __GFP_NOWARN) < 0) { 4799 /* Under severe memory pressure? Serialise TLB allocations */ 4800 xa_lock_irq(&guc->tlb_lookup); 4801 wq = xa_load(&guc->tlb_lookup, guc->serial_slot); 4802 wait_event_lock_irq(wq->wq, 4803 !READ_ONCE(wq->busy), 4804 guc->tlb_lookup.xa_lock); 4805 /* 4806 * Update wq->busy under lock to ensure only one waiter can 4807 * issue the TLB invalidation command using the serial slot at a 4808 * time. The condition is set to true before releasing the lock 4809 * so that other caller continue to wait until woken up again. 4810 */ 4811 wq->busy = true; 4812 xa_unlock_irq(&guc->tlb_lookup); 4813 4814 seqno = guc->serial_slot; 4815 } 4816 4817 action[1] = seqno; 4818 4819 add_wait_queue(&wq->wq, &wait); 4820 4821 /* This is a critical reclaim path and thus we must loop here. */ 4822 err = intel_guc_send_busy_loop(guc, action, size, G2H_LEN_DW_INVALIDATE_TLB, true); 4823 if (err) 4824 goto out; 4825 4826 /* 4827 * Late guard against GT enablement. It is not an error for the TLB 4828 * invalidation to time out if the GT is disabled during the process 4829 * due to suspend/wedge. In fact, the TLB invalidation is cancelled 4830 * in this case. 4831 */ 4832 if (!must_wait_woken(&wait, intel_guc_ct_max_queue_time_jiffies()) && 4833 intel_gt_is_enabled(gt)) { 4834 guc_err(guc, 4835 "TLB invalidation response timed out for seqno %u\n", seqno); 4836 err = -ETIME; 4837 } 4838 out: 4839 remove_wait_queue(&wq->wq, &wait); 4840 if (seqno != guc->serial_slot) 4841 xa_erase_irq(&guc->tlb_lookup, seqno); 4842 4843 return err; 4844 } 4845 4846 /* Send a H2G command to invalidate the TLBs at engine level and beyond. */ 4847 int intel_guc_invalidate_tlb_engines(struct intel_guc *guc) 4848 { 4849 return guc_send_invalidate_tlb(guc, INTEL_GUC_TLB_INVAL_ENGINES); 4850 } 4851 4852 /* Send a H2G command to invalidate the GuC's internal TLB. */ 4853 int intel_guc_invalidate_tlb_guc(struct intel_guc *guc) 4854 { 4855 return guc_send_invalidate_tlb(guc, INTEL_GUC_TLB_INVAL_GUC); 4856 } 4857 4858 int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 4859 const u32 *msg, 4860 u32 len) 4861 { 4862 struct intel_context *ce; 4863 u32 ctx_id; 4864 4865 if (unlikely(len < 1)) { 4866 guc_err(guc, "Invalid length %u\n", len); 4867 return -EPROTO; 4868 } 4869 ctx_id = msg[0]; 4870 4871 ce = g2h_context_lookup(guc, ctx_id); 4872 if (unlikely(!ce)) 4873 return -EPROTO; 4874 4875 trace_intel_context_deregister_done(ce); 4876 4877 #ifdef CONFIG_DRM_I915_SELFTEST 4878 if (unlikely(ce->drop_deregister)) { 4879 ce->drop_deregister = false; 4880 return 0; 4881 } 4882 #endif 4883 4884 if (context_wait_for_deregister_to_register(ce)) { 4885 struct intel_runtime_pm *runtime_pm = 4886 &ce->engine->gt->i915->runtime_pm; 4887 intel_wakeref_t wakeref; 4888 4889 /* 4890 * Previous owner of this guc_id has been deregistered, now safe 4891 * register this context. 4892 */ 4893 with_intel_runtime_pm(runtime_pm, wakeref) 4894 register_context(ce, true); 4895 guc_signal_context_fence(ce); 4896 intel_context_put(ce); 4897 } else if (context_destroyed(ce)) { 4898 /* Context has been destroyed */ 4899 intel_gt_pm_put_async_untracked(guc_to_gt(guc)); 4900 release_guc_id(guc, ce); 4901 __guc_context_destroy(ce); 4902 } 4903 4904 decr_outstanding_submission_g2h(guc); 4905 4906 return 0; 4907 } 4908 4909 int intel_guc_sched_done_process_msg(struct intel_guc *guc, 4910 const u32 *msg, 4911 u32 len) 4912 { 4913 struct intel_context *ce; 4914 unsigned long flags; 4915 u32 ctx_id; 4916 4917 if (unlikely(len < 2)) { 4918 guc_err(guc, "Invalid length %u\n", len); 4919 return -EPROTO; 4920 } 4921 ctx_id = msg[0]; 4922 4923 ce = g2h_context_lookup(guc, ctx_id); 4924 if (unlikely(!ce)) 4925 return -EPROTO; 4926 4927 if (unlikely(context_destroyed(ce) || 4928 (!context_pending_enable(ce) && 4929 !context_pending_disable(ce)))) { 4930 guc_err(guc, "Bad context sched_state 0x%x, ctx_id %u\n", 4931 ce->guc_state.sched_state, ctx_id); 4932 return -EPROTO; 4933 } 4934 4935 trace_intel_context_sched_done(ce); 4936 4937 if (context_pending_enable(ce)) { 4938 #ifdef CONFIG_DRM_I915_SELFTEST 4939 if (unlikely(ce->drop_schedule_enable)) { 4940 ce->drop_schedule_enable = false; 4941 return 0; 4942 } 4943 #endif 4944 4945 spin_lock_irqsave(&ce->guc_state.lock, flags); 4946 clr_context_pending_enable(ce); 4947 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4948 } else if (context_pending_disable(ce)) { 4949 bool banned; 4950 4951 #ifdef CONFIG_DRM_I915_SELFTEST 4952 if (unlikely(ce->drop_schedule_disable)) { 4953 ce->drop_schedule_disable = false; 4954 return 0; 4955 } 4956 #endif 4957 4958 /* 4959 * Unpin must be done before __guc_signal_context_fence, 4960 * otherwise a race exists between the requests getting 4961 * submitted + retired before this unpin completes resulting in 4962 * the pin_count going to zero and the context still being 4963 * enabled. 4964 */ 4965 intel_context_sched_disable_unpin(ce); 4966 4967 spin_lock_irqsave(&ce->guc_state.lock, flags); 4968 banned = context_banned(ce); 4969 clr_context_banned(ce); 4970 clr_context_pending_disable(ce); 4971 __guc_signal_context_fence(ce); 4972 guc_blocked_fence_complete(ce); 4973 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4974 4975 if (banned) { 4976 guc_cancel_context_requests(ce); 4977 intel_engine_signal_breadcrumbs(ce->engine); 4978 } 4979 } 4980 4981 decr_outstanding_submission_g2h(guc); 4982 intel_context_put(ce); 4983 4984 return 0; 4985 } 4986 4987 static void capture_error_state(struct intel_guc *guc, 4988 struct intel_context *ce) 4989 { 4990 struct intel_gt *gt = guc_to_gt(guc); 4991 struct drm_i915_private *i915 = gt->i915; 4992 intel_wakeref_t wakeref; 4993 intel_engine_mask_t engine_mask; 4994 4995 if (intel_engine_is_virtual(ce->engine)) { 4996 struct intel_engine_cs *e; 4997 intel_engine_mask_t tmp, virtual_mask = ce->engine->mask; 4998 4999 engine_mask = 0; 5000 for_each_engine_masked(e, ce->engine->gt, virtual_mask, tmp) { 5001 bool match = intel_guc_capture_is_matching_engine(gt, ce, e); 5002 5003 if (match) { 5004 intel_engine_set_hung_context(e, ce); 5005 engine_mask |= e->mask; 5006 i915_increase_reset_engine_count(&i915->gpu_error, 5007 e); 5008 } 5009 } 5010 5011 if (!engine_mask) { 5012 guc_warn(guc, "No matching physical engine capture for virtual engine context 0x%04X / %s", 5013 ce->guc_id.id, ce->engine->name); 5014 engine_mask = ~0U; 5015 } 5016 } else { 5017 intel_engine_set_hung_context(ce->engine, ce); 5018 engine_mask = ce->engine->mask; 5019 i915_increase_reset_engine_count(&i915->gpu_error, ce->engine); 5020 } 5021 5022 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 5023 i915_capture_error_state(gt, engine_mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); 5024 } 5025 5026 static void guc_context_replay(struct intel_context *ce) 5027 { 5028 struct i915_sched_engine *sched_engine = ce->engine->sched_engine; 5029 5030 __guc_reset_context(ce, ce->engine->mask); 5031 tasklet_hi_schedule(&sched_engine->tasklet); 5032 } 5033 5034 static void guc_handle_context_reset(struct intel_guc *guc, 5035 struct intel_context *ce) 5036 { 5037 bool capture = intel_context_is_schedulable(ce); 5038 5039 trace_intel_context_reset(ce); 5040 5041 guc_dbg(guc, "%s context reset notification: 0x%04X on %s, exiting = %s, banned = %s\n", 5042 capture ? "Got" : "Ignoring", 5043 ce->guc_id.id, ce->engine->name, 5044 str_yes_no(intel_context_is_exiting(ce)), 5045 str_yes_no(intel_context_is_banned(ce))); 5046 5047 if (capture) { 5048 capture_error_state(guc, ce); 5049 guc_context_replay(ce); 5050 } 5051 } 5052 5053 int intel_guc_context_reset_process_msg(struct intel_guc *guc, 5054 const u32 *msg, u32 len) 5055 { 5056 struct intel_context *ce; 5057 unsigned long flags; 5058 int ctx_id; 5059 5060 if (unlikely(len != 1)) { 5061 guc_err(guc, "Invalid length %u", len); 5062 return -EPROTO; 5063 } 5064 5065 ctx_id = msg[0]; 5066 5067 /* 5068 * The context lookup uses the xarray but lookups only require an RCU lock 5069 * not the full spinlock. So take the lock explicitly and keep it until the 5070 * context has been reference count locked to ensure it can't be destroyed 5071 * asynchronously until the reset is done. 5072 */ 5073 xa_lock_irqsave(&guc->context_lookup, flags); 5074 ce = g2h_context_lookup(guc, ctx_id); 5075 if (ce) 5076 intel_context_get(ce); 5077 xa_unlock_irqrestore(&guc->context_lookup, flags); 5078 5079 if (unlikely(!ce)) 5080 return -EPROTO; 5081 5082 guc_handle_context_reset(guc, ce); 5083 intel_context_put(ce); 5084 5085 return 0; 5086 } 5087 5088 int intel_guc_error_capture_process_msg(struct intel_guc *guc, 5089 const u32 *msg, u32 len) 5090 { 5091 u32 status; 5092 5093 if (unlikely(len != 1)) { 5094 guc_dbg(guc, "Invalid length %u", len); 5095 return -EPROTO; 5096 } 5097 5098 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 5099 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 5100 guc_warn(guc, "No space for error capture"); 5101 5102 intel_guc_capture_process(guc); 5103 5104 return 0; 5105 } 5106 5107 struct intel_engine_cs * 5108 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) 5109 { 5110 struct intel_gt *gt = guc_to_gt(guc); 5111 u8 engine_class = guc_class_to_engine_class(guc_class); 5112 5113 /* Class index is checked in class converter */ 5114 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); 5115 5116 return gt->engine_class[engine_class][instance]; 5117 } 5118 5119 static void reset_fail_worker_func(struct work_struct *w) 5120 { 5121 struct intel_guc *guc = container_of(w, struct intel_guc, 5122 submission_state.reset_fail_worker); 5123 struct intel_gt *gt = guc_to_gt(guc); 5124 intel_engine_mask_t reset_fail_mask; 5125 unsigned long flags; 5126 5127 spin_lock_irqsave(&guc->submission_state.lock, flags); 5128 reset_fail_mask = guc->submission_state.reset_fail_mask; 5129 guc->submission_state.reset_fail_mask = 0; 5130 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 5131 5132 if (likely(reset_fail_mask)) { 5133 struct intel_engine_cs *engine; 5134 enum intel_engine_id id; 5135 5136 /* 5137 * GuC is toast at this point - it dead loops after sending the failed 5138 * reset notification. So need to manually determine the guilty context. 5139 * Note that it should be reliable to do this here because the GuC is 5140 * toast and will not be scheduling behind the KMD's back. 5141 */ 5142 for_each_engine_masked(engine, gt, reset_fail_mask, id) 5143 intel_guc_find_hung_context(engine); 5144 5145 intel_gt_handle_error(gt, reset_fail_mask, 5146 I915_ERROR_CAPTURE, 5147 "GuC failed to reset engine mask=0x%x", 5148 reset_fail_mask); 5149 } 5150 } 5151 5152 int intel_guc_engine_failure_process_msg(struct intel_guc *guc, 5153 const u32 *msg, u32 len) 5154 { 5155 struct intel_engine_cs *engine; 5156 u8 guc_class, instance; 5157 u32 reason; 5158 unsigned long flags; 5159 5160 if (unlikely(len != 3)) { 5161 guc_err(guc, "Invalid length %u", len); 5162 return -EPROTO; 5163 } 5164 5165 guc_class = msg[0]; 5166 instance = msg[1]; 5167 reason = msg[2]; 5168 5169 engine = intel_guc_lookup_engine(guc, guc_class, instance); 5170 if (unlikely(!engine)) { 5171 guc_err(guc, "Invalid engine %d:%d", guc_class, instance); 5172 return -EPROTO; 5173 } 5174 5175 /* 5176 * This is an unexpected failure of a hardware feature. So, log a real 5177 * error message not just the informational that comes with the reset. 5178 */ 5179 guc_err(guc, "Engine reset failed on %d:%d (%s) because 0x%08X", 5180 guc_class, instance, engine->name, reason); 5181 5182 spin_lock_irqsave(&guc->submission_state.lock, flags); 5183 guc->submission_state.reset_fail_mask |= engine->mask; 5184 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 5185 5186 /* 5187 * A GT reset flushes this worker queue (G2H handler) so we must use 5188 * another worker to trigger a GT reset. 5189 */ 5190 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker); 5191 5192 return 0; 5193 } 5194 5195 void intel_guc_find_hung_context(struct intel_engine_cs *engine) 5196 { 5197 struct intel_guc *guc = &engine->gt->uc.guc; 5198 struct intel_context *ce; 5199 struct i915_request *rq; 5200 unsigned long index; 5201 unsigned long flags; 5202 5203 /* Reset called during driver load? GuC not yet initialised! */ 5204 if (unlikely(!guc_submission_initialized(guc))) 5205 return; 5206 5207 xa_lock_irqsave(&guc->context_lookup, flags); 5208 xa_for_each(&guc->context_lookup, index, ce) { 5209 bool found; 5210 5211 if (!kref_get_unless_zero(&ce->ref)) 5212 continue; 5213 5214 xa_unlock(&guc->context_lookup); 5215 5216 if (!intel_context_is_pinned(ce)) 5217 goto next; 5218 5219 if (intel_engine_is_virtual(ce->engine)) { 5220 if (!(ce->engine->mask & engine->mask)) 5221 goto next; 5222 } else { 5223 if (ce->engine != engine) 5224 goto next; 5225 } 5226 5227 found = false; 5228 spin_lock(&ce->guc_state.lock); 5229 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { 5230 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) 5231 continue; 5232 5233 found = true; 5234 break; 5235 } 5236 spin_unlock(&ce->guc_state.lock); 5237 5238 if (found) { 5239 intel_engine_set_hung_context(engine, ce); 5240 5241 /* Can only cope with one hang at a time... */ 5242 intel_context_put(ce); 5243 xa_lock(&guc->context_lookup); 5244 goto done; 5245 } 5246 5247 next: 5248 intel_context_put(ce); 5249 xa_lock(&guc->context_lookup); 5250 } 5251 done: 5252 xa_unlock_irqrestore(&guc->context_lookup, flags); 5253 } 5254 5255 void intel_guc_dump_active_requests(struct intel_engine_cs *engine, 5256 struct i915_request *hung_rq, 5257 struct drm_printer *m) 5258 { 5259 struct intel_guc *guc = &engine->gt->uc.guc; 5260 struct intel_context *ce; 5261 unsigned long index; 5262 unsigned long flags; 5263 5264 /* Reset called during driver load? GuC not yet initialised! */ 5265 if (unlikely(!guc_submission_initialized(guc))) 5266 return; 5267 5268 xa_lock_irqsave(&guc->context_lookup, flags); 5269 xa_for_each(&guc->context_lookup, index, ce) { 5270 if (!kref_get_unless_zero(&ce->ref)) 5271 continue; 5272 5273 xa_unlock(&guc->context_lookup); 5274 5275 if (!intel_context_is_pinned(ce)) 5276 goto next; 5277 5278 if (intel_engine_is_virtual(ce->engine)) { 5279 if (!(ce->engine->mask & engine->mask)) 5280 goto next; 5281 } else { 5282 if (ce->engine != engine) 5283 goto next; 5284 } 5285 5286 spin_lock(&ce->guc_state.lock); 5287 intel_engine_dump_active_requests(&ce->guc_state.requests, 5288 hung_rq, m); 5289 spin_unlock(&ce->guc_state.lock); 5290 5291 next: 5292 intel_context_put(ce); 5293 xa_lock(&guc->context_lookup); 5294 } 5295 xa_unlock_irqrestore(&guc->context_lookup, flags); 5296 } 5297 5298 void intel_guc_submission_print_info(struct intel_guc *guc, 5299 struct drm_printer *p) 5300 { 5301 struct i915_sched_engine *sched_engine = guc->sched_engine; 5302 struct rb_node *rb; 5303 unsigned long flags; 5304 5305 if (!sched_engine) 5306 return; 5307 5308 drm_printf(p, "GuC Submission API Version: %d.%d.%d\n", 5309 guc->submission_version.major, guc->submission_version.minor, 5310 guc->submission_version.patch); 5311 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", 5312 atomic_read(&guc->outstanding_submission_g2h)); 5313 drm_printf(p, "GuC tasklet count: %u\n", 5314 atomic_read(&sched_engine->tasklet.count)); 5315 5316 spin_lock_irqsave(&sched_engine->lock, flags); 5317 drm_printf(p, "Requests in GuC submit tasklet:\n"); 5318 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 5319 struct i915_priolist *pl = to_priolist(rb); 5320 struct i915_request *rq; 5321 5322 priolist_for_each_request(rq, pl) 5323 drm_printf(p, "guc_id=%u, seqno=%llu\n", 5324 rq->context->guc_id.id, 5325 rq->fence.seqno); 5326 } 5327 spin_unlock_irqrestore(&sched_engine->lock, flags); 5328 drm_printf(p, "\n"); 5329 } 5330 5331 static inline void guc_log_context_priority(struct drm_printer *p, 5332 struct intel_context *ce) 5333 { 5334 int i; 5335 5336 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); 5337 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); 5338 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; 5339 i < GUC_CLIENT_PRIORITY_NUM; ++i) { 5340 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", 5341 i, ce->guc_state.prio_count[i]); 5342 } 5343 drm_printf(p, "\n"); 5344 } 5345 5346 static inline void guc_log_context(struct drm_printer *p, 5347 struct intel_context *ce) 5348 { 5349 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); 5350 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); 5351 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", 5352 ce->ring->head, 5353 ce->lrc_reg_state[CTX_RING_HEAD]); 5354 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", 5355 ce->ring->tail, 5356 ce->lrc_reg_state[CTX_RING_TAIL]); 5357 drm_printf(p, "\t\tContext Pin Count: %u\n", 5358 atomic_read(&ce->pin_count)); 5359 drm_printf(p, "\t\tGuC ID Ref Count: %u\n", 5360 atomic_read(&ce->guc_id.ref)); 5361 drm_printf(p, "\t\tSchedule State: 0x%x\n", 5362 ce->guc_state.sched_state); 5363 } 5364 5365 void intel_guc_submission_print_context_info(struct intel_guc *guc, 5366 struct drm_printer *p) 5367 { 5368 struct intel_context *ce; 5369 unsigned long index; 5370 unsigned long flags; 5371 5372 xa_lock_irqsave(&guc->context_lookup, flags); 5373 xa_for_each(&guc->context_lookup, index, ce) { 5374 GEM_BUG_ON(intel_context_is_child(ce)); 5375 5376 guc_log_context(p, ce); 5377 guc_log_context_priority(p, ce); 5378 5379 if (intel_context_is_parent(ce)) { 5380 struct intel_context *child; 5381 5382 drm_printf(p, "\t\tNumber children: %u\n", 5383 ce->parallel.number_children); 5384 5385 if (ce->parallel.guc.wq_status) { 5386 drm_printf(p, "\t\tWQI Head: %u\n", 5387 READ_ONCE(*ce->parallel.guc.wq_head)); 5388 drm_printf(p, "\t\tWQI Tail: %u\n", 5389 READ_ONCE(*ce->parallel.guc.wq_tail)); 5390 drm_printf(p, "\t\tWQI Status: %u\n", 5391 READ_ONCE(*ce->parallel.guc.wq_status)); 5392 } 5393 5394 if (ce->engine->emit_bb_start == 5395 emit_bb_start_parent_no_preempt_mid_batch) { 5396 u8 i; 5397 5398 drm_printf(p, "\t\tChildren Go: %u\n", 5399 get_children_go_value(ce)); 5400 for (i = 0; i < ce->parallel.number_children; ++i) 5401 drm_printf(p, "\t\tChildren Join: %u\n", 5402 get_children_join_value(ce, i)); 5403 } 5404 5405 for_each_child(ce, child) 5406 guc_log_context(p, child); 5407 } 5408 } 5409 xa_unlock_irqrestore(&guc->context_lookup, flags); 5410 } 5411 5412 static inline u32 get_children_go_addr(struct intel_context *ce) 5413 { 5414 GEM_BUG_ON(!intel_context_is_parent(ce)); 5415 5416 return i915_ggtt_offset(ce->state) + 5417 __get_parent_scratch_offset(ce) + 5418 offsetof(struct parent_scratch, go.semaphore); 5419 } 5420 5421 static inline u32 get_children_join_addr(struct intel_context *ce, 5422 u8 child_index) 5423 { 5424 GEM_BUG_ON(!intel_context_is_parent(ce)); 5425 5426 return i915_ggtt_offset(ce->state) + 5427 __get_parent_scratch_offset(ce) + 5428 offsetof(struct parent_scratch, join[child_index].semaphore); 5429 } 5430 5431 #define PARENT_GO_BB 1 5432 #define PARENT_GO_FINI_BREADCRUMB 0 5433 #define CHILD_GO_BB 1 5434 #define CHILD_GO_FINI_BREADCRUMB 0 5435 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 5436 u64 offset, u32 len, 5437 const unsigned int flags) 5438 { 5439 struct intel_context *ce = rq->context; 5440 u32 *cs; 5441 u8 i; 5442 5443 GEM_BUG_ON(!intel_context_is_parent(ce)); 5444 5445 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); 5446 if (IS_ERR(cs)) 5447 return PTR_ERR(cs); 5448 5449 /* Wait on children */ 5450 for (i = 0; i < ce->parallel.number_children; ++i) { 5451 *cs++ = (MI_SEMAPHORE_WAIT | 5452 MI_SEMAPHORE_GLOBAL_GTT | 5453 MI_SEMAPHORE_POLL | 5454 MI_SEMAPHORE_SAD_EQ_SDD); 5455 *cs++ = PARENT_GO_BB; 5456 *cs++ = get_children_join_addr(ce, i); 5457 *cs++ = 0; 5458 } 5459 5460 /* Turn off preemption */ 5461 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5462 *cs++ = MI_NOOP; 5463 5464 /* Tell children go */ 5465 cs = gen8_emit_ggtt_write(cs, 5466 CHILD_GO_BB, 5467 get_children_go_addr(ce), 5468 0); 5469 5470 /* Jump to batch */ 5471 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5472 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5473 *cs++ = lower_32_bits(offset); 5474 *cs++ = upper_32_bits(offset); 5475 *cs++ = MI_NOOP; 5476 5477 intel_ring_advance(rq, cs); 5478 5479 return 0; 5480 } 5481 5482 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 5483 u64 offset, u32 len, 5484 const unsigned int flags) 5485 { 5486 struct intel_context *ce = rq->context; 5487 struct intel_context *parent = intel_context_to_parent(ce); 5488 u32 *cs; 5489 5490 GEM_BUG_ON(!intel_context_is_child(ce)); 5491 5492 cs = intel_ring_begin(rq, 12); 5493 if (IS_ERR(cs)) 5494 return PTR_ERR(cs); 5495 5496 /* Signal parent */ 5497 cs = gen8_emit_ggtt_write(cs, 5498 PARENT_GO_BB, 5499 get_children_join_addr(parent, 5500 ce->parallel.child_index), 5501 0); 5502 5503 /* Wait on parent for go */ 5504 *cs++ = (MI_SEMAPHORE_WAIT | 5505 MI_SEMAPHORE_GLOBAL_GTT | 5506 MI_SEMAPHORE_POLL | 5507 MI_SEMAPHORE_SAD_EQ_SDD); 5508 *cs++ = CHILD_GO_BB; 5509 *cs++ = get_children_go_addr(parent); 5510 *cs++ = 0; 5511 5512 /* Turn off preemption */ 5513 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5514 5515 /* Jump to batch */ 5516 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5517 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5518 *cs++ = lower_32_bits(offset); 5519 *cs++ = upper_32_bits(offset); 5520 5521 intel_ring_advance(rq, cs); 5522 5523 return 0; 5524 } 5525 5526 static u32 * 5527 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5528 u32 *cs) 5529 { 5530 struct intel_context *ce = rq->context; 5531 u8 i; 5532 5533 GEM_BUG_ON(!intel_context_is_parent(ce)); 5534 5535 /* Wait on children */ 5536 for (i = 0; i < ce->parallel.number_children; ++i) { 5537 *cs++ = (MI_SEMAPHORE_WAIT | 5538 MI_SEMAPHORE_GLOBAL_GTT | 5539 MI_SEMAPHORE_POLL | 5540 MI_SEMAPHORE_SAD_EQ_SDD); 5541 *cs++ = PARENT_GO_FINI_BREADCRUMB; 5542 *cs++ = get_children_join_addr(ce, i); 5543 *cs++ = 0; 5544 } 5545 5546 /* Turn on preemption */ 5547 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5548 *cs++ = MI_NOOP; 5549 5550 /* Tell children go */ 5551 cs = gen8_emit_ggtt_write(cs, 5552 CHILD_GO_FINI_BREADCRUMB, 5553 get_children_go_addr(ce), 5554 0); 5555 5556 return cs; 5557 } 5558 5559 /* 5560 * If this true, a submission of multi-lrc requests had an error and the 5561 * requests need to be skipped. The front end (execuf IOCTL) should've called 5562 * i915_request_skip which squashes the BB but we still need to emit the fini 5563 * breadrcrumbs seqno write. At this point we don't know how many of the 5564 * requests in the multi-lrc submission were generated so we can't do the 5565 * handshake between the parent and children (e.g. if 4 requests should be 5566 * generated but 2nd hit an error only 1 would be seen by the GuC backend). 5567 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error 5568 * has occurred on any of the requests in submission / relationship. 5569 */ 5570 static inline bool skip_handshake(struct i915_request *rq) 5571 { 5572 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); 5573 } 5574 5575 #define NON_SKIP_LEN 6 5576 static u32 * 5577 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5578 u32 *cs) 5579 { 5580 struct intel_context *ce = rq->context; 5581 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5582 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5583 5584 GEM_BUG_ON(!intel_context_is_parent(ce)); 5585 5586 if (unlikely(skip_handshake(rq))) { 5587 /* 5588 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, 5589 * the NON_SKIP_LEN comes from the length of the emits below. 5590 */ 5591 memset(cs, 0, sizeof(u32) * 5592 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5593 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5594 } else { 5595 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); 5596 } 5597 5598 /* Emit fini breadcrumb */ 5599 before_fini_breadcrumb_user_interrupt_cs = cs; 5600 cs = gen8_emit_ggtt_write(cs, 5601 rq->fence.seqno, 5602 i915_request_active_timeline(rq)->hwsp_offset, 5603 0); 5604 5605 /* User interrupt */ 5606 *cs++ = MI_USER_INTERRUPT; 5607 *cs++ = MI_NOOP; 5608 5609 /* Ensure our math for skip + emit is correct */ 5610 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5611 cs); 5612 GEM_BUG_ON(start_fini_breadcrumb_cs + 5613 ce->engine->emit_fini_breadcrumb_dw != cs); 5614 5615 rq->tail = intel_ring_offset(rq, cs); 5616 5617 return cs; 5618 } 5619 5620 static u32 * 5621 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5622 u32 *cs) 5623 { 5624 struct intel_context *ce = rq->context; 5625 struct intel_context *parent = intel_context_to_parent(ce); 5626 5627 GEM_BUG_ON(!intel_context_is_child(ce)); 5628 5629 /* Turn on preemption */ 5630 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5631 *cs++ = MI_NOOP; 5632 5633 /* Signal parent */ 5634 cs = gen8_emit_ggtt_write(cs, 5635 PARENT_GO_FINI_BREADCRUMB, 5636 get_children_join_addr(parent, 5637 ce->parallel.child_index), 5638 0); 5639 5640 /* Wait parent on for go */ 5641 *cs++ = (MI_SEMAPHORE_WAIT | 5642 MI_SEMAPHORE_GLOBAL_GTT | 5643 MI_SEMAPHORE_POLL | 5644 MI_SEMAPHORE_SAD_EQ_SDD); 5645 *cs++ = CHILD_GO_FINI_BREADCRUMB; 5646 *cs++ = get_children_go_addr(parent); 5647 *cs++ = 0; 5648 5649 return cs; 5650 } 5651 5652 static u32 * 5653 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5654 u32 *cs) 5655 { 5656 struct intel_context *ce = rq->context; 5657 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5658 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5659 5660 GEM_BUG_ON(!intel_context_is_child(ce)); 5661 5662 if (unlikely(skip_handshake(rq))) { 5663 /* 5664 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, 5665 * the NON_SKIP_LEN comes from the length of the emits below. 5666 */ 5667 memset(cs, 0, sizeof(u32) * 5668 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5669 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5670 } else { 5671 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); 5672 } 5673 5674 /* Emit fini breadcrumb */ 5675 before_fini_breadcrumb_user_interrupt_cs = cs; 5676 cs = gen8_emit_ggtt_write(cs, 5677 rq->fence.seqno, 5678 i915_request_active_timeline(rq)->hwsp_offset, 5679 0); 5680 5681 /* User interrupt */ 5682 *cs++ = MI_USER_INTERRUPT; 5683 *cs++ = MI_NOOP; 5684 5685 /* Ensure our math for skip + emit is correct */ 5686 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5687 cs); 5688 GEM_BUG_ON(start_fini_breadcrumb_cs + 5689 ce->engine->emit_fini_breadcrumb_dw != cs); 5690 5691 rq->tail = intel_ring_offset(rq, cs); 5692 5693 return cs; 5694 } 5695 5696 #undef NON_SKIP_LEN 5697 5698 static struct intel_context * 5699 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 5700 unsigned long flags) 5701 { 5702 struct guc_virtual_engine *ve; 5703 struct intel_guc *guc; 5704 unsigned int n; 5705 int err; 5706 5707 ve = kzalloc(sizeof(*ve), GFP_KERNEL); 5708 if (!ve) 5709 return ERR_PTR(-ENOMEM); 5710 5711 guc = &siblings[0]->gt->uc.guc; 5712 5713 ve->base.i915 = siblings[0]->i915; 5714 ve->base.gt = siblings[0]->gt; 5715 ve->base.uncore = siblings[0]->uncore; 5716 ve->base.id = -1; 5717 5718 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 5719 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5720 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5721 ve->base.saturated = ALL_ENGINES; 5722 5723 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 5724 5725 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); 5726 5727 ve->base.cops = &virtual_guc_context_ops; 5728 ve->base.request_alloc = guc_request_alloc; 5729 ve->base.bump_serial = virtual_guc_bump_serial; 5730 5731 ve->base.submit_request = guc_submit_request; 5732 5733 ve->base.flags = I915_ENGINE_IS_VIRTUAL; 5734 5735 BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES); 5736 ve->base.mask = VIRTUAL_ENGINES; 5737 5738 intel_context_init(&ve->context, &ve->base); 5739 5740 for (n = 0; n < count; n++) { 5741 struct intel_engine_cs *sibling = siblings[n]; 5742 5743 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 5744 if (sibling->mask & ve->base.mask) { 5745 guc_dbg(guc, "duplicate %s entry in load balancer\n", 5746 sibling->name); 5747 err = -EINVAL; 5748 goto err_put; 5749 } 5750 5751 ve->base.mask |= sibling->mask; 5752 ve->base.logical_mask |= sibling->logical_mask; 5753 5754 if (n != 0 && ve->base.class != sibling->class) { 5755 guc_dbg(guc, "invalid mixing of engine class, sibling %d, already %d\n", 5756 sibling->class, ve->base.class); 5757 err = -EINVAL; 5758 goto err_put; 5759 } else if (n == 0) { 5760 ve->base.class = sibling->class; 5761 ve->base.uabi_class = sibling->uabi_class; 5762 snprintf(ve->base.name, sizeof(ve->base.name), 5763 "v%dx%d", ve->base.class, count); 5764 ve->base.context_size = sibling->context_size; 5765 5766 ve->base.add_active_request = 5767 sibling->add_active_request; 5768 ve->base.remove_active_request = 5769 sibling->remove_active_request; 5770 ve->base.emit_bb_start = sibling->emit_bb_start; 5771 ve->base.emit_flush = sibling->emit_flush; 5772 ve->base.emit_init_breadcrumb = 5773 sibling->emit_init_breadcrumb; 5774 ve->base.emit_fini_breadcrumb = 5775 sibling->emit_fini_breadcrumb; 5776 ve->base.emit_fini_breadcrumb_dw = 5777 sibling->emit_fini_breadcrumb_dw; 5778 ve->base.breadcrumbs = 5779 intel_breadcrumbs_get(sibling->breadcrumbs); 5780 5781 ve->base.flags |= sibling->flags; 5782 5783 ve->base.props.timeslice_duration_ms = 5784 sibling->props.timeslice_duration_ms; 5785 ve->base.props.preempt_timeout_ms = 5786 sibling->props.preempt_timeout_ms; 5787 } 5788 } 5789 5790 return &ve->context; 5791 5792 err_put: 5793 intel_context_put(&ve->context); 5794 return ERR_PTR(err); 5795 } 5796 5797 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) 5798 { 5799 struct intel_engine_cs *engine; 5800 intel_engine_mask_t tmp, mask = ve->mask; 5801 5802 for_each_engine_masked(engine, ve->gt, mask, tmp) 5803 if (READ_ONCE(engine->props.heartbeat_interval_ms)) 5804 return true; 5805 5806 return false; 5807 } 5808 5809 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5810 #include "selftest_guc.c" 5811 #include "selftest_guc_multi_lrc.c" 5812 #include "selftest_guc_hangcheck.c" 5813 #endif 5814