1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include <linux/circ_buf.h> 7 8 #include "gem/i915_gem_context.h" 9 #include "gem/i915_gem_lmem.h" 10 #include "gt/gen8_engine_cs.h" 11 #include "gt/intel_breadcrumbs.h" 12 #include "gt/intel_context.h" 13 #include "gt/intel_engine_heartbeat.h" 14 #include "gt/intel_engine_pm.h" 15 #include "gt/intel_engine_regs.h" 16 #include "gt/intel_gpu_commands.h" 17 #include "gt/intel_gt.h" 18 #include "gt/intel_gt_clock_utils.h" 19 #include "gt/intel_gt_irq.h" 20 #include "gt/intel_gt_pm.h" 21 #include "gt/intel_gt_regs.h" 22 #include "gt/intel_gt_requests.h" 23 #include "gt/intel_lrc.h" 24 #include "gt/intel_lrc_reg.h" 25 #include "gt/intel_mocs.h" 26 #include "gt/intel_ring.h" 27 28 #include "intel_guc_ads.h" 29 #include "intel_guc_capture.h" 30 #include "intel_guc_print.h" 31 #include "intel_guc_submission.h" 32 33 #include "i915_drv.h" 34 #include "i915_reg.h" 35 #include "i915_irq.h" 36 #include "i915_trace.h" 37 38 /** 39 * DOC: GuC-based command submission 40 * 41 * The Scratch registers: 42 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 43 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 44 * triggers an interrupt on the GuC via another register write (0xC4C8). 45 * Firmware writes a success/fail code back to the action register after 46 * processes the request. The kernel driver polls waiting for this update and 47 * then proceeds. 48 * 49 * Command Transport buffers (CTBs): 50 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host 51 * - G2H) are a message interface between the i915 and GuC. 52 * 53 * Context registration: 54 * Before a context can be submitted it must be registered with the GuC via a 55 * H2G. A unique guc_id is associated with each context. The context is either 56 * registered at request creation time (normal operation) or at submission time 57 * (abnormal operation, e.g. after a reset). 58 * 59 * Context submission: 60 * The i915 updates the LRC tail value in memory. The i915 must enable the 61 * scheduling of the context within the GuC for the GuC to actually consider it. 62 * Therefore, the first time a disabled context is submitted we use a schedule 63 * enable H2G, while follow up submissions are done via the context submit H2G, 64 * which informs the GuC that a previously enabled context has new work 65 * available. 66 * 67 * Context unpin: 68 * To unpin a context a H2G is used to disable scheduling. When the 69 * corresponding G2H returns indicating the scheduling disable operation has 70 * completed it is safe to unpin the context. While a disable is in flight it 71 * isn't safe to resubmit the context so a fence is used to stall all future 72 * requests of that context until the G2H is returned. Because this interaction 73 * with the GuC takes a non-zero amount of time we delay the disabling of 74 * scheduling after the pin count goes to zero by a configurable period of time 75 * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of 76 * time to resubmit something on the context before doing this costly operation. 77 * This delay is only done if the context isn't closed and the guc_id usage is 78 * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD). 79 * 80 * Context deregistration: 81 * Before a context can be destroyed or if we steal its guc_id we must 82 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't 83 * safe to submit anything to this guc_id until the deregister completes so a 84 * fence is used to stall all requests associated with this guc_id until the 85 * corresponding G2H returns indicating the guc_id has been deregistered. 86 * 87 * submission_state.guc_ids: 88 * Unique number associated with private GuC context data passed in during 89 * context registration / submission / deregistration. 64k available. Simple ida 90 * is used for allocation. 91 * 92 * Stealing guc_ids: 93 * If no guc_ids are available they can be stolen from another context at 94 * request creation time if that context is unpinned. If a guc_id can't be found 95 * we punt this problem to the user as we believe this is near impossible to hit 96 * during normal use cases. 97 * 98 * Locking: 99 * In the GuC submission code we have 3 basic spin locks which protect 100 * everything. Details about each below. 101 * 102 * sched_engine->lock 103 * This is the submission lock for all contexts that share an i915 schedule 104 * engine (sched_engine), thus only one of the contexts which share a 105 * sched_engine can be submitting at a time. Currently only one sched_engine is 106 * used for all of GuC submission but that could change in the future. 107 * 108 * guc->submission_state.lock 109 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts 110 * list. 111 * 112 * ce->guc_state.lock 113 * Protects everything under ce->guc_state. Ensures that a context is in the 114 * correct state before issuing a H2G. e.g. We don't issue a schedule disable 115 * on a disabled context (bad idea), we don't issue a schedule enable when a 116 * schedule disable is in flight, etc... Also protects list of inflight requests 117 * on the context and the priority management state. Lock is individual to each 118 * context. 119 * 120 * Lock ordering rules: 121 * sched_engine->lock -> ce->guc_state.lock 122 * guc->submission_state.lock -> ce->guc_state.lock 123 * 124 * Reset races: 125 * When a full GT reset is triggered it is assumed that some G2H responses to 126 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be 127 * fatal as we do certain operations upon receiving a G2H (e.g. destroy 128 * contexts, release guc_ids, etc...). When this occurs we can scrub the 129 * context state and cleanup appropriately, however this is quite racey. 130 * To avoid races, the reset code must disable submission before scrubbing for 131 * the missing G2H, while the submission code must check for submission being 132 * disabled and skip sending H2Gs and updating context states when it is. Both 133 * sides must also make sure to hold the relevant locks. 134 */ 135 136 /* GuC Virtual Engine */ 137 struct guc_virtual_engine { 138 struct intel_engine_cs base; 139 struct intel_context context; 140 }; 141 142 static struct intel_context * 143 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 144 unsigned long flags); 145 146 static struct intel_context * 147 guc_create_parallel(struct intel_engine_cs **engines, 148 unsigned int num_siblings, 149 unsigned int width); 150 151 #define GUC_REQUEST_SIZE 64 /* bytes */ 152 153 /* 154 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous 155 * per the GuC submission interface. A different allocation algorithm is used 156 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to 157 * partition the guc_id space. We believe the number of multi-lrc contexts in 158 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for 159 * multi-lrc. 160 */ 161 #define NUMBER_MULTI_LRC_GUC_ID(guc) \ 162 ((guc)->submission_state.num_guc_ids / 16) 163 164 /* 165 * Below is a set of functions which control the GuC scheduling state which 166 * require a lock. 167 */ 168 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 169 #define SCHED_STATE_DESTROYED BIT(1) 170 #define SCHED_STATE_PENDING_DISABLE BIT(2) 171 #define SCHED_STATE_BANNED BIT(3) 172 #define SCHED_STATE_ENABLED BIT(4) 173 #define SCHED_STATE_PENDING_ENABLE BIT(5) 174 #define SCHED_STATE_REGISTERED BIT(6) 175 #define SCHED_STATE_POLICY_REQUIRED BIT(7) 176 #define SCHED_STATE_CLOSED BIT(8) 177 #define SCHED_STATE_BLOCKED_SHIFT 9 178 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) 179 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) 180 181 static inline void init_sched_state(struct intel_context *ce) 182 { 183 lockdep_assert_held(&ce->guc_state.lock); 184 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; 185 } 186 187 /* 188 * Kernel contexts can have SCHED_STATE_REGISTERED after suspend. 189 * A context close can race with the submission path, so SCHED_STATE_CLOSED 190 * can be set immediately before we try to register. 191 */ 192 #define SCHED_STATE_VALID_INIT \ 193 (SCHED_STATE_BLOCKED_MASK | \ 194 SCHED_STATE_CLOSED | \ 195 SCHED_STATE_REGISTERED) 196 197 __maybe_unused 198 static bool sched_state_is_init(struct intel_context *ce) 199 { 200 return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT); 201 } 202 203 static inline bool 204 context_wait_for_deregister_to_register(struct intel_context *ce) 205 { 206 return ce->guc_state.sched_state & 207 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 208 } 209 210 static inline void 211 set_context_wait_for_deregister_to_register(struct intel_context *ce) 212 { 213 lockdep_assert_held(&ce->guc_state.lock); 214 ce->guc_state.sched_state |= 215 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 216 } 217 218 static inline void 219 clr_context_wait_for_deregister_to_register(struct intel_context *ce) 220 { 221 lockdep_assert_held(&ce->guc_state.lock); 222 ce->guc_state.sched_state &= 223 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 224 } 225 226 static inline bool 227 context_destroyed(struct intel_context *ce) 228 { 229 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 230 } 231 232 static inline void 233 set_context_destroyed(struct intel_context *ce) 234 { 235 lockdep_assert_held(&ce->guc_state.lock); 236 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 237 } 238 239 static inline void 240 clr_context_destroyed(struct intel_context *ce) 241 { 242 lockdep_assert_held(&ce->guc_state.lock); 243 ce->guc_state.sched_state &= ~SCHED_STATE_DESTROYED; 244 } 245 246 static inline bool context_pending_disable(struct intel_context *ce) 247 { 248 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; 249 } 250 251 static inline void set_context_pending_disable(struct intel_context *ce) 252 { 253 lockdep_assert_held(&ce->guc_state.lock); 254 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; 255 } 256 257 static inline void clr_context_pending_disable(struct intel_context *ce) 258 { 259 lockdep_assert_held(&ce->guc_state.lock); 260 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; 261 } 262 263 static inline bool context_banned(struct intel_context *ce) 264 { 265 return ce->guc_state.sched_state & SCHED_STATE_BANNED; 266 } 267 268 static inline void set_context_banned(struct intel_context *ce) 269 { 270 lockdep_assert_held(&ce->guc_state.lock); 271 ce->guc_state.sched_state |= SCHED_STATE_BANNED; 272 } 273 274 static inline void clr_context_banned(struct intel_context *ce) 275 { 276 lockdep_assert_held(&ce->guc_state.lock); 277 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; 278 } 279 280 static inline bool context_enabled(struct intel_context *ce) 281 { 282 return ce->guc_state.sched_state & SCHED_STATE_ENABLED; 283 } 284 285 static inline void set_context_enabled(struct intel_context *ce) 286 { 287 lockdep_assert_held(&ce->guc_state.lock); 288 ce->guc_state.sched_state |= SCHED_STATE_ENABLED; 289 } 290 291 static inline void clr_context_enabled(struct intel_context *ce) 292 { 293 lockdep_assert_held(&ce->guc_state.lock); 294 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; 295 } 296 297 static inline bool context_pending_enable(struct intel_context *ce) 298 { 299 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; 300 } 301 302 static inline void set_context_pending_enable(struct intel_context *ce) 303 { 304 lockdep_assert_held(&ce->guc_state.lock); 305 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; 306 } 307 308 static inline void clr_context_pending_enable(struct intel_context *ce) 309 { 310 lockdep_assert_held(&ce->guc_state.lock); 311 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; 312 } 313 314 static inline bool context_registered(struct intel_context *ce) 315 { 316 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; 317 } 318 319 static inline void set_context_registered(struct intel_context *ce) 320 { 321 lockdep_assert_held(&ce->guc_state.lock); 322 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; 323 } 324 325 static inline void clr_context_registered(struct intel_context *ce) 326 { 327 lockdep_assert_held(&ce->guc_state.lock); 328 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; 329 } 330 331 static inline bool context_policy_required(struct intel_context *ce) 332 { 333 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED; 334 } 335 336 static inline void set_context_policy_required(struct intel_context *ce) 337 { 338 lockdep_assert_held(&ce->guc_state.lock); 339 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED; 340 } 341 342 static inline void clr_context_policy_required(struct intel_context *ce) 343 { 344 lockdep_assert_held(&ce->guc_state.lock); 345 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; 346 } 347 348 static inline bool context_close_done(struct intel_context *ce) 349 { 350 return ce->guc_state.sched_state & SCHED_STATE_CLOSED; 351 } 352 353 static inline void set_context_close_done(struct intel_context *ce) 354 { 355 lockdep_assert_held(&ce->guc_state.lock); 356 ce->guc_state.sched_state |= SCHED_STATE_CLOSED; 357 } 358 359 static inline u32 context_blocked(struct intel_context *ce) 360 { 361 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> 362 SCHED_STATE_BLOCKED_SHIFT; 363 } 364 365 static inline void incr_context_blocked(struct intel_context *ce) 366 { 367 lockdep_assert_held(&ce->guc_state.lock); 368 369 ce->guc_state.sched_state += SCHED_STATE_BLOCKED; 370 371 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ 372 } 373 374 static inline void decr_context_blocked(struct intel_context *ce) 375 { 376 lockdep_assert_held(&ce->guc_state.lock); 377 378 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ 379 380 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; 381 } 382 383 static struct intel_context * 384 request_to_scheduling_context(struct i915_request *rq) 385 { 386 return intel_context_to_parent(rq->context); 387 } 388 389 static inline bool context_guc_id_invalid(struct intel_context *ce) 390 { 391 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID; 392 } 393 394 static inline void set_context_guc_id_invalid(struct intel_context *ce) 395 { 396 ce->guc_id.id = GUC_INVALID_CONTEXT_ID; 397 } 398 399 static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 400 { 401 return &ce->engine->gt->uc.guc; 402 } 403 404 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 405 { 406 return rb_entry(rb, struct i915_priolist, node); 407 } 408 409 /* 410 * When using multi-lrc submission a scratch memory area is reserved in the 411 * parent's context state for the process descriptor, work queue, and handshake 412 * between the parent + children contexts to insert safe preemption points 413 * between each of the BBs. Currently the scratch area is sized to a page. 414 * 415 * The layout of this scratch area is below: 416 * 0 guc_process_desc 417 * + sizeof(struct guc_process_desc) child go 418 * + CACHELINE_BYTES child join[0] 419 * ... 420 * + CACHELINE_BYTES child join[n - 1] 421 * ... unused 422 * PARENT_SCRATCH_SIZE / 2 work queue start 423 * ... work queue 424 * PARENT_SCRATCH_SIZE - 1 work queue end 425 */ 426 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) 427 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) 428 429 struct sync_semaphore { 430 u32 semaphore; 431 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 432 }; 433 434 struct parent_scratch { 435 union guc_descs { 436 struct guc_sched_wq_desc wq_desc; 437 struct guc_process_desc_v69 pdesc; 438 } descs; 439 440 struct sync_semaphore go; 441 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; 442 443 u8 unused[WQ_OFFSET - sizeof(union guc_descs) - 444 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; 445 446 u32 wq[WQ_SIZE / sizeof(u32)]; 447 }; 448 449 static u32 __get_parent_scratch_offset(struct intel_context *ce) 450 { 451 GEM_BUG_ON(!ce->parallel.guc.parent_page); 452 453 return ce->parallel.guc.parent_page * PAGE_SIZE; 454 } 455 456 static u32 __get_wq_offset(struct intel_context *ce) 457 { 458 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); 459 460 return __get_parent_scratch_offset(ce) + WQ_OFFSET; 461 } 462 463 static struct parent_scratch * 464 __get_parent_scratch(struct intel_context *ce) 465 { 466 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); 467 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); 468 469 /* 470 * Need to subtract LRC_STATE_OFFSET here as the 471 * parallel.guc.parent_page is the offset into ce->state while 472 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. 473 */ 474 return (struct parent_scratch *) 475 (ce->lrc_reg_state + 476 ((__get_parent_scratch_offset(ce) - 477 LRC_STATE_OFFSET) / sizeof(u32))); 478 } 479 480 static struct guc_process_desc_v69 * 481 __get_process_desc_v69(struct intel_context *ce) 482 { 483 struct parent_scratch *ps = __get_parent_scratch(ce); 484 485 return &ps->descs.pdesc; 486 } 487 488 static struct guc_sched_wq_desc * 489 __get_wq_desc_v70(struct intel_context *ce) 490 { 491 struct parent_scratch *ps = __get_parent_scratch(ce); 492 493 return &ps->descs.wq_desc; 494 } 495 496 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size) 497 { 498 /* 499 * Check for space in work queue. Caching a value of head pointer in 500 * intel_context structure in order reduce the number accesses to shared 501 * GPU memory which may be across a PCIe bus. 502 */ 503 #define AVAILABLE_SPACE \ 504 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) 505 if (wqi_size > AVAILABLE_SPACE) { 506 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head); 507 508 if (wqi_size > AVAILABLE_SPACE) 509 return NULL; 510 } 511 #undef AVAILABLE_SPACE 512 513 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; 514 } 515 516 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) 517 { 518 struct intel_context *ce = xa_load(&guc->context_lookup, id); 519 520 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID); 521 522 return ce; 523 } 524 525 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index) 526 { 527 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69; 528 529 if (!base) 530 return NULL; 531 532 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); 533 534 return &base[index]; 535 } 536 537 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc) 538 { 539 u32 size; 540 int ret; 541 542 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) * 543 GUC_MAX_CONTEXT_ID); 544 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69, 545 (void **)&guc->lrc_desc_pool_vaddr_v69); 546 if (ret) 547 return ret; 548 549 return 0; 550 } 551 552 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc) 553 { 554 if (!guc->lrc_desc_pool_vaddr_v69) 555 return; 556 557 guc->lrc_desc_pool_vaddr_v69 = NULL; 558 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP); 559 } 560 561 static inline bool guc_submission_initialized(struct intel_guc *guc) 562 { 563 return guc->submission_initialized; 564 } 565 566 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id) 567 { 568 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id); 569 570 if (desc) 571 memset(desc, 0, sizeof(*desc)); 572 } 573 574 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) 575 { 576 return __get_context(guc, id); 577 } 578 579 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id, 580 struct intel_context *ce) 581 { 582 unsigned long flags; 583 584 /* 585 * xarray API doesn't have xa_save_irqsave wrapper, so calling the 586 * lower level functions directly. 587 */ 588 xa_lock_irqsave(&guc->context_lookup, flags); 589 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); 590 xa_unlock_irqrestore(&guc->context_lookup, flags); 591 } 592 593 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) 594 { 595 unsigned long flags; 596 597 if (unlikely(!guc_submission_initialized(guc))) 598 return; 599 600 _reset_lrc_desc_v69(guc, id); 601 602 /* 603 * xarray API doesn't have xa_erase_irqsave wrapper, so calling 604 * the lower level functions directly. 605 */ 606 xa_lock_irqsave(&guc->context_lookup, flags); 607 __xa_erase(&guc->context_lookup, id); 608 xa_unlock_irqrestore(&guc->context_lookup, flags); 609 } 610 611 static void decr_outstanding_submission_g2h(struct intel_guc *guc) 612 { 613 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) 614 wake_up_all(&guc->ct.wq); 615 } 616 617 static int guc_submission_send_busy_loop(struct intel_guc *guc, 618 const u32 *action, 619 u32 len, 620 u32 g2h_len_dw, 621 bool loop) 622 { 623 int ret; 624 625 /* 626 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), 627 * so we don't handle the case where we don't get a reply because we 628 * aborted the send due to the channel being busy. 629 */ 630 GEM_BUG_ON(g2h_len_dw && !loop); 631 632 if (g2h_len_dw) 633 atomic_inc(&guc->outstanding_submission_g2h); 634 635 ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); 636 if (ret) 637 atomic_dec(&guc->outstanding_submission_g2h); 638 639 return ret; 640 } 641 642 int intel_guc_wait_for_pending_msg(struct intel_guc *guc, 643 atomic_t *wait_var, 644 bool interruptible, 645 long timeout) 646 { 647 const int state = interruptible ? 648 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 649 DEFINE_WAIT(wait); 650 651 might_sleep(); 652 GEM_BUG_ON(timeout < 0); 653 654 if (!atomic_read(wait_var)) 655 return 0; 656 657 if (!timeout) 658 return -ETIME; 659 660 for (;;) { 661 prepare_to_wait(&guc->ct.wq, &wait, state); 662 663 if (!atomic_read(wait_var)) 664 break; 665 666 if (signal_pending_state(state, current)) { 667 timeout = -EINTR; 668 break; 669 } 670 671 if (!timeout) { 672 timeout = -ETIME; 673 break; 674 } 675 676 timeout = io_schedule_timeout(timeout); 677 } 678 finish_wait(&guc->ct.wq, &wait); 679 680 return (timeout < 0) ? timeout : 0; 681 } 682 683 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) 684 { 685 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) 686 return 0; 687 688 return intel_guc_wait_for_pending_msg(guc, 689 &guc->outstanding_submission_g2h, 690 true, timeout); 691 } 692 693 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop); 694 static int try_context_registration(struct intel_context *ce, bool loop); 695 696 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) 697 { 698 int err = 0; 699 struct intel_context *ce = request_to_scheduling_context(rq); 700 u32 action[3]; 701 int len = 0; 702 u32 g2h_len_dw = 0; 703 bool enabled; 704 705 lockdep_assert_held(&rq->engine->sched_engine->lock); 706 707 /* 708 * Corner case where requests were sitting in the priority list or a 709 * request resubmitted after the context was banned. 710 */ 711 if (unlikely(!intel_context_is_schedulable(ce))) { 712 i915_request_put(i915_request_mark_eio(rq)); 713 intel_engine_signal_breadcrumbs(ce->engine); 714 return 0; 715 } 716 717 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 718 GEM_BUG_ON(context_guc_id_invalid(ce)); 719 720 if (context_policy_required(ce)) { 721 err = guc_context_policy_init_v70(ce, false); 722 if (err) 723 return err; 724 } 725 726 spin_lock(&ce->guc_state.lock); 727 728 /* 729 * The request / context will be run on the hardware when scheduling 730 * gets enabled in the unblock. For multi-lrc we still submit the 731 * context to move the LRC tails. 732 */ 733 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) 734 goto out; 735 736 enabled = context_enabled(ce) || context_blocked(ce); 737 738 if (!enabled) { 739 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 740 action[len++] = ce->guc_id.id; 741 action[len++] = GUC_CONTEXT_ENABLE; 742 set_context_pending_enable(ce); 743 intel_context_get(ce); 744 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 745 } else { 746 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 747 action[len++] = ce->guc_id.id; 748 } 749 750 err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 751 if (!enabled && !err) { 752 trace_intel_context_sched_enable(ce); 753 atomic_inc(&guc->outstanding_submission_g2h); 754 set_context_enabled(ce); 755 756 /* 757 * Without multi-lrc KMD does the submission step (moving the 758 * lrc tail) so enabling scheduling is sufficient to submit the 759 * context. This isn't the case in multi-lrc submission as the 760 * GuC needs to move the tails, hence the need for another H2G 761 * to submit a multi-lrc context after enabling scheduling. 762 */ 763 if (intel_context_is_parent(ce)) { 764 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; 765 err = intel_guc_send_nb(guc, action, len - 1, 0); 766 } 767 } else if (!enabled) { 768 clr_context_pending_enable(ce); 769 intel_context_put(ce); 770 } 771 if (likely(!err)) 772 trace_i915_request_guc_submit(rq); 773 774 out: 775 spin_unlock(&ce->guc_state.lock); 776 return err; 777 } 778 779 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) 780 { 781 int ret = __guc_add_request(guc, rq); 782 783 if (unlikely(ret == -EBUSY)) { 784 guc->stalled_request = rq; 785 guc->submission_stall_reason = STALL_ADD_REQUEST; 786 } 787 788 return ret; 789 } 790 791 static inline void guc_set_lrc_tail(struct i915_request *rq) 792 { 793 rq->context->lrc_reg_state[CTX_RING_TAIL] = 794 intel_ring_set_tail(rq->ring, rq->tail); 795 } 796 797 static inline int rq_prio(const struct i915_request *rq) 798 { 799 return rq->sched.attr.priority; 800 } 801 802 static bool is_multi_lrc_rq(struct i915_request *rq) 803 { 804 return intel_context_is_parallel(rq->context); 805 } 806 807 static bool can_merge_rq(struct i915_request *rq, 808 struct i915_request *last) 809 { 810 return request_to_scheduling_context(rq) == 811 request_to_scheduling_context(last); 812 } 813 814 static u32 wq_space_until_wrap(struct intel_context *ce) 815 { 816 return (WQ_SIZE - ce->parallel.guc.wqi_tail); 817 } 818 819 static void write_wqi(struct intel_context *ce, u32 wqi_size) 820 { 821 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); 822 823 /* 824 * Ensure WQI are visible before updating tail 825 */ 826 intel_guc_write_barrier(ce_to_guc(ce)); 827 828 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & 829 (WQ_SIZE - 1); 830 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail); 831 } 832 833 static int guc_wq_noop_append(struct intel_context *ce) 834 { 835 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce)); 836 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; 837 838 if (!wqi) 839 return -EBUSY; 840 841 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 842 843 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 844 FIELD_PREP(WQ_LEN_MASK, len_dw); 845 ce->parallel.guc.wqi_tail = 0; 846 847 return 0; 848 } 849 850 static int __guc_wq_item_append(struct i915_request *rq) 851 { 852 struct intel_context *ce = request_to_scheduling_context(rq); 853 struct intel_context *child; 854 unsigned int wqi_size = (ce->parallel.number_children + 4) * 855 sizeof(u32); 856 u32 *wqi; 857 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 858 int ret; 859 860 /* Ensure context is in correct state updating work queue */ 861 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 862 GEM_BUG_ON(context_guc_id_invalid(ce)); 863 GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); 864 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)); 865 866 /* Insert NOOP if this work queue item will wrap the tail pointer. */ 867 if (wqi_size > wq_space_until_wrap(ce)) { 868 ret = guc_wq_noop_append(ce); 869 if (ret) 870 return ret; 871 } 872 873 wqi = get_wq_pointer(ce, wqi_size); 874 if (!wqi) 875 return -EBUSY; 876 877 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 878 879 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 880 FIELD_PREP(WQ_LEN_MASK, len_dw); 881 *wqi++ = ce->lrc.lrca; 882 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | 883 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); 884 *wqi++ = 0; /* fence_id */ 885 for_each_child(ce, child) 886 *wqi++ = child->ring->tail / sizeof(u64); 887 888 write_wqi(ce, wqi_size); 889 890 return 0; 891 } 892 893 static int guc_wq_item_append(struct intel_guc *guc, 894 struct i915_request *rq) 895 { 896 struct intel_context *ce = request_to_scheduling_context(rq); 897 int ret; 898 899 if (unlikely(!intel_context_is_schedulable(ce))) 900 return 0; 901 902 ret = __guc_wq_item_append(rq); 903 if (unlikely(ret == -EBUSY)) { 904 guc->stalled_request = rq; 905 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; 906 } 907 908 return ret; 909 } 910 911 static bool multi_lrc_submit(struct i915_request *rq) 912 { 913 struct intel_context *ce = request_to_scheduling_context(rq); 914 915 intel_ring_set_tail(rq->ring, rq->tail); 916 917 /* 918 * We expect the front end (execbuf IOCTL) to set this flag on the last 919 * request generated from a multi-BB submission. This indicates to the 920 * backend (GuC interface) that we should submit this context thus 921 * submitting all the requests generated in parallel. 922 */ 923 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || 924 !intel_context_is_schedulable(ce); 925 } 926 927 static int guc_dequeue_one_context(struct intel_guc *guc) 928 { 929 struct i915_sched_engine * const sched_engine = guc->sched_engine; 930 struct i915_request *last = NULL; 931 bool submit = false; 932 struct rb_node *rb; 933 int ret; 934 935 lockdep_assert_held(&sched_engine->lock); 936 937 if (guc->stalled_request) { 938 submit = true; 939 last = guc->stalled_request; 940 941 switch (guc->submission_stall_reason) { 942 case STALL_REGISTER_CONTEXT: 943 goto register_context; 944 case STALL_MOVE_LRC_TAIL: 945 goto move_lrc_tail; 946 case STALL_ADD_REQUEST: 947 goto add_request; 948 default: 949 MISSING_CASE(guc->submission_stall_reason); 950 } 951 } 952 953 while ((rb = rb_first_cached(&sched_engine->queue))) { 954 struct i915_priolist *p = to_priolist(rb); 955 struct i915_request *rq, *rn; 956 957 priolist_for_each_request_consume(rq, rn, p) { 958 if (last && !can_merge_rq(rq, last)) 959 goto register_context; 960 961 list_del_init(&rq->sched.link); 962 963 __i915_request_submit(rq); 964 965 trace_i915_request_in(rq, 0); 966 last = rq; 967 968 if (is_multi_lrc_rq(rq)) { 969 /* 970 * We need to coalesce all multi-lrc requests in 971 * a relationship into a single H2G. We are 972 * guaranteed that all of these requests will be 973 * submitted sequentially. 974 */ 975 if (multi_lrc_submit(rq)) { 976 submit = true; 977 goto register_context; 978 } 979 } else { 980 submit = true; 981 } 982 } 983 984 rb_erase_cached(&p->node, &sched_engine->queue); 985 i915_priolist_free(p); 986 } 987 988 register_context: 989 if (submit) { 990 struct intel_context *ce = request_to_scheduling_context(last); 991 992 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) && 993 intel_context_is_schedulable(ce))) { 994 ret = try_context_registration(ce, false); 995 if (unlikely(ret == -EPIPE)) { 996 goto deadlk; 997 } else if (ret == -EBUSY) { 998 guc->stalled_request = last; 999 guc->submission_stall_reason = 1000 STALL_REGISTER_CONTEXT; 1001 goto schedule_tasklet; 1002 } else if (ret != 0) { 1003 GEM_WARN_ON(ret); /* Unexpected */ 1004 goto deadlk; 1005 } 1006 } 1007 1008 move_lrc_tail: 1009 if (is_multi_lrc_rq(last)) { 1010 ret = guc_wq_item_append(guc, last); 1011 if (ret == -EBUSY) { 1012 goto schedule_tasklet; 1013 } else if (ret != 0) { 1014 GEM_WARN_ON(ret); /* Unexpected */ 1015 goto deadlk; 1016 } 1017 } else { 1018 guc_set_lrc_tail(last); 1019 } 1020 1021 add_request: 1022 ret = guc_add_request(guc, last); 1023 if (unlikely(ret == -EPIPE)) { 1024 goto deadlk; 1025 } else if (ret == -EBUSY) { 1026 goto schedule_tasklet; 1027 } else if (ret != 0) { 1028 GEM_WARN_ON(ret); /* Unexpected */ 1029 goto deadlk; 1030 } 1031 } 1032 1033 guc->stalled_request = NULL; 1034 guc->submission_stall_reason = STALL_NONE; 1035 return submit; 1036 1037 deadlk: 1038 sched_engine->tasklet.callback = NULL; 1039 tasklet_disable_nosync(&sched_engine->tasklet); 1040 return false; 1041 1042 schedule_tasklet: 1043 tasklet_schedule(&sched_engine->tasklet); 1044 return false; 1045 } 1046 1047 static void guc_submission_tasklet(struct tasklet_struct *t) 1048 { 1049 struct i915_sched_engine *sched_engine = 1050 from_tasklet(sched_engine, t, tasklet); 1051 unsigned long flags; 1052 bool loop; 1053 1054 spin_lock_irqsave(&sched_engine->lock, flags); 1055 1056 do { 1057 loop = guc_dequeue_one_context(sched_engine->private_data); 1058 } while (loop); 1059 1060 i915_sched_engine_reset_on_empty(sched_engine); 1061 1062 spin_unlock_irqrestore(&sched_engine->lock, flags); 1063 } 1064 1065 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) 1066 { 1067 if (iir & GT_RENDER_USER_INTERRUPT) 1068 intel_engine_signal_breadcrumbs(engine); 1069 } 1070 1071 static void __guc_context_destroy(struct intel_context *ce); 1072 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); 1073 static void guc_signal_context_fence(struct intel_context *ce); 1074 static void guc_cancel_context_requests(struct intel_context *ce); 1075 static void guc_blocked_fence_complete(struct intel_context *ce); 1076 1077 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) 1078 { 1079 struct intel_context *ce; 1080 unsigned long index, flags; 1081 bool pending_disable, pending_enable, deregister, destroyed, banned; 1082 1083 xa_lock_irqsave(&guc->context_lookup, flags); 1084 xa_for_each(&guc->context_lookup, index, ce) { 1085 /* 1086 * Corner case where the ref count on the object is zero but and 1087 * deregister G2H was lost. In this case we don't touch the ref 1088 * count and finish the destroy of the context. 1089 */ 1090 bool do_put = kref_get_unless_zero(&ce->ref); 1091 1092 xa_unlock(&guc->context_lookup); 1093 1094 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 1095 (cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) { 1096 /* successful cancel so jump straight to close it */ 1097 intel_context_sched_disable_unpin(ce); 1098 } 1099 1100 spin_lock(&ce->guc_state.lock); 1101 1102 /* 1103 * Once we are at this point submission_disabled() is guaranteed 1104 * to be visible to all callers who set the below flags (see above 1105 * flush and flushes in reset_prepare). If submission_disabled() 1106 * is set, the caller shouldn't set these flags. 1107 */ 1108 1109 destroyed = context_destroyed(ce); 1110 pending_enable = context_pending_enable(ce); 1111 pending_disable = context_pending_disable(ce); 1112 deregister = context_wait_for_deregister_to_register(ce); 1113 banned = context_banned(ce); 1114 init_sched_state(ce); 1115 1116 spin_unlock(&ce->guc_state.lock); 1117 1118 if (pending_enable || destroyed || deregister) { 1119 decr_outstanding_submission_g2h(guc); 1120 if (deregister) 1121 guc_signal_context_fence(ce); 1122 if (destroyed) { 1123 intel_gt_pm_put_async_untracked(guc_to_gt(guc)); 1124 release_guc_id(guc, ce); 1125 __guc_context_destroy(ce); 1126 } 1127 if (pending_enable || deregister) 1128 intel_context_put(ce); 1129 } 1130 1131 /* Not mutualy exclusive with above if statement. */ 1132 if (pending_disable) { 1133 guc_signal_context_fence(ce); 1134 if (banned) { 1135 guc_cancel_context_requests(ce); 1136 intel_engine_signal_breadcrumbs(ce->engine); 1137 } 1138 intel_context_sched_disable_unpin(ce); 1139 decr_outstanding_submission_g2h(guc); 1140 1141 spin_lock(&ce->guc_state.lock); 1142 guc_blocked_fence_complete(ce); 1143 spin_unlock(&ce->guc_state.lock); 1144 1145 intel_context_put(ce); 1146 } 1147 1148 if (do_put) 1149 intel_context_put(ce); 1150 xa_lock(&guc->context_lookup); 1151 } 1152 xa_unlock_irqrestore(&guc->context_lookup, flags); 1153 } 1154 1155 /* 1156 * GuC stores busyness stats for each engine at context in/out boundaries. A 1157 * context 'in' logs execution start time, 'out' adds in -> out delta to total. 1158 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with 1159 * GuC. 1160 * 1161 * __i915_pmu_event_read samples engine busyness. When sampling, if context id 1162 * is valid (!= ~0) and start is non-zero, the engine is considered to be 1163 * active. For an active engine total busyness = total + (now - start), where 1164 * 'now' is the time at which the busyness is sampled. For inactive engine, 1165 * total busyness = total. 1166 * 1167 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. 1168 * 1169 * The start and total values provided by GuC are 32 bits and wrap around in a 1170 * few minutes. Since perf pmu provides busyness as 64 bit monotonically 1171 * increasing ns values, there is a need for this implementation to account for 1172 * overflows and extend the GuC provided values to 64 bits before returning 1173 * busyness to the user. In order to do that, a worker runs periodically at 1174 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in 1175 * 27 seconds for a gt clock frequency of 19.2 MHz). 1176 */ 1177 1178 #define WRAP_TIME_CLKS U32_MAX 1179 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) 1180 1181 static void 1182 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) 1183 { 1184 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1185 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); 1186 1187 if (new_start == lower_32_bits(*prev_start)) 1188 return; 1189 1190 /* 1191 * When gt is unparked, we update the gt timestamp and start the ping 1192 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt 1193 * is unparked, all switched in contexts will have a start time that is 1194 * within +/- POLL_TIME_CLKS of the most recent gt_stamp. 1195 * 1196 * If neither gt_stamp nor new_start has rolled over, then the 1197 * gt_stamp_hi does not need to be adjusted, however if one of them has 1198 * rolled over, we need to adjust gt_stamp_hi accordingly. 1199 * 1200 * The below conditions address the cases of new_start rollover and 1201 * gt_stamp_last rollover respectively. 1202 */ 1203 if (new_start < gt_stamp_last && 1204 (new_start - gt_stamp_last) <= POLL_TIME_CLKS) 1205 gt_stamp_hi++; 1206 1207 if (new_start > gt_stamp_last && 1208 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) 1209 gt_stamp_hi--; 1210 1211 *prev_start = ((u64)gt_stamp_hi << 32) | new_start; 1212 } 1213 1214 #define record_read(map_, field_) \ 1215 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_) 1216 1217 /* 1218 * GuC updates shared memory and KMD reads it. Since this is not synchronized, 1219 * we run into a race where the value read is inconsistent. Sometimes the 1220 * inconsistency is in reading the upper MSB bytes of the last_in value when 1221 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper 1222 * 24 bits are zero. Since these are non-zero values, it is non-trivial to 1223 * determine validity of these values. Instead we read the values multiple times 1224 * until they are consistent. In test runs, 3 attempts results in consistent 1225 * values. The upper bound is set to 6 attempts and may need to be tuned as per 1226 * any new occurences. 1227 */ 1228 static void __get_engine_usage_record(struct intel_engine_cs *engine, 1229 u32 *last_in, u32 *id, u32 *total) 1230 { 1231 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); 1232 int i = 0; 1233 1234 do { 1235 *last_in = record_read(&rec_map, last_switch_in_stamp); 1236 *id = record_read(&rec_map, current_context_index); 1237 *total = record_read(&rec_map, total_runtime); 1238 1239 if (record_read(&rec_map, last_switch_in_stamp) == *last_in && 1240 record_read(&rec_map, current_context_index) == *id && 1241 record_read(&rec_map, total_runtime) == *total) 1242 break; 1243 } while (++i < 6); 1244 } 1245 1246 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) 1247 { 1248 struct intel_engine_guc_stats *stats = &engine->stats.guc; 1249 struct intel_guc *guc = &engine->gt->uc.guc; 1250 u32 last_switch, ctx_id, total; 1251 1252 lockdep_assert_held(&guc->timestamp.lock); 1253 1254 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); 1255 1256 stats->running = ctx_id != ~0U && last_switch; 1257 if (stats->running) 1258 __extend_last_switch(guc, &stats->start_gt_clk, last_switch); 1259 1260 /* 1261 * Instead of adjusting the total for overflow, just add the 1262 * difference from previous sample stats->total_gt_clks 1263 */ 1264 if (total && total != ~0U) { 1265 stats->total_gt_clks += (u32)(total - stats->prev_total); 1266 stats->prev_total = total; 1267 } 1268 } 1269 1270 static u32 gpm_timestamp_shift(struct intel_gt *gt) 1271 { 1272 intel_wakeref_t wakeref; 1273 u32 reg, shift; 1274 1275 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 1276 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); 1277 1278 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> 1279 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; 1280 1281 return 3 - shift; 1282 } 1283 1284 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) 1285 { 1286 struct intel_gt *gt = guc_to_gt(guc); 1287 u32 gt_stamp_lo, gt_stamp_hi; 1288 u64 gpm_ts; 1289 1290 lockdep_assert_held(&guc->timestamp.lock); 1291 1292 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1293 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0, 1294 MISC_STATUS1) >> guc->timestamp.shift; 1295 gt_stamp_lo = lower_32_bits(gpm_ts); 1296 *now = ktime_get(); 1297 1298 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) 1299 gt_stamp_hi++; 1300 1301 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; 1302 } 1303 1304 /* 1305 * Unlike the execlist mode of submission total and active times are in terms of 1306 * gt clocks. The *now parameter is retained to return the cpu time at which the 1307 * busyness was sampled. 1308 */ 1309 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) 1310 { 1311 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; 1312 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; 1313 struct intel_gt *gt = engine->gt; 1314 struct intel_guc *guc = >->uc.guc; 1315 u64 total, gt_stamp_saved; 1316 unsigned long flags; 1317 u32 reset_count; 1318 bool in_reset; 1319 intel_wakeref_t wakeref; 1320 1321 spin_lock_irqsave(&guc->timestamp.lock, flags); 1322 1323 /* 1324 * If a reset happened, we risk reading partially updated engine 1325 * busyness from GuC, so we just use the driver stored copy of busyness. 1326 * Synchronize with gt reset using reset_count and the 1327 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count 1328 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is 1329 * usable by checking the flag afterwards. 1330 */ 1331 reset_count = i915_reset_count(gpu_error); 1332 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); 1333 1334 *now = ktime_get(); 1335 1336 /* 1337 * The active busyness depends on start_gt_clk and gt_stamp. 1338 * gt_stamp is updated by i915 only when gt is awake and the 1339 * start_gt_clk is derived from GuC state. To get a consistent 1340 * view of activity, we query the GuC state only if gt is awake. 1341 */ 1342 wakeref = in_reset ? 0 : intel_gt_pm_get_if_awake(gt); 1343 if (wakeref) { 1344 stats_saved = *stats; 1345 gt_stamp_saved = guc->timestamp.gt_stamp; 1346 /* 1347 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - 1348 * start_gt_clk' calculation below for active engines. 1349 */ 1350 guc_update_engine_gt_clks(engine); 1351 guc_update_pm_timestamp(guc, now); 1352 intel_gt_pm_put_async(gt, wakeref); 1353 if (i915_reset_count(gpu_error) != reset_count) { 1354 *stats = stats_saved; 1355 guc->timestamp.gt_stamp = gt_stamp_saved; 1356 } 1357 } 1358 1359 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); 1360 if (stats->running) { 1361 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; 1362 1363 total += intel_gt_clock_interval_to_ns(gt, clk); 1364 } 1365 1366 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1367 1368 return ns_to_ktime(total); 1369 } 1370 1371 static void guc_enable_busyness_worker(struct intel_guc *guc) 1372 { 1373 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, guc->timestamp.ping_delay); 1374 } 1375 1376 static void guc_cancel_busyness_worker(struct intel_guc *guc) 1377 { 1378 /* 1379 * There are many different call stacks that can get here. Some of them 1380 * hold the reset mutex. The busyness worker also attempts to acquire the 1381 * reset mutex. Synchronously flushing a worker thread requires acquiring 1382 * the worker mutex. Lockdep sees this as a conflict. It thinks that the 1383 * flush can deadlock because it holds the worker mutex while waiting for 1384 * the reset mutex, but another thread is holding the reset mutex and might 1385 * attempt to use other worker functions. 1386 * 1387 * In practice, this scenario does not exist because the busyness worker 1388 * does not block waiting for the reset mutex. It does a try-lock on it and 1389 * immediately exits if the lock is already held. Unfortunately, the mutex 1390 * in question (I915_RESET_BACKOFF) is an i915 implementation which has lockdep 1391 * annotation but not to the extent of explaining the 'might lock' is also a 1392 * 'does not need to lock'. So one option would be to add more complex lockdep 1393 * annotations to ignore the issue (if at all possible). A simpler option is to 1394 * just not flush synchronously when a rest in progress. Given that the worker 1395 * will just early exit and re-schedule itself anyway, there is no advantage 1396 * to running it immediately. 1397 * 1398 * If a reset is not in progress, then the synchronous flush may be required. 1399 * As noted many call stacks lead here, some during suspend and driver unload 1400 * which do require a synchronous flush to make sure the worker is stopped 1401 * before memory is freed. 1402 * 1403 * Trying to pass a 'need_sync' or 'in_reset' flag all the way down through 1404 * every possible call stack is unfeasible. It would be too intrusive to many 1405 * areas that really don't care about the GuC backend. However, there is the 1406 * 'reset_in_progress' flag available, so just use that. 1407 * 1408 * And note that in the case of a reset occurring during driver unload 1409 * (wedge_on_fini), skipping the cancel in _prepare (when the reset flag is set 1410 * is fine because there is another cancel in _finish (when the reset flag is 1411 * not). 1412 */ 1413 if (guc_to_gt(guc)->uc.reset_in_progress) 1414 cancel_delayed_work(&guc->timestamp.work); 1415 else 1416 cancel_delayed_work_sync(&guc->timestamp.work); 1417 } 1418 1419 static void __reset_guc_busyness_stats(struct intel_guc *guc) 1420 { 1421 struct intel_gt *gt = guc_to_gt(guc); 1422 struct intel_engine_cs *engine; 1423 enum intel_engine_id id; 1424 unsigned long flags; 1425 ktime_t unused; 1426 1427 guc_cancel_busyness_worker(guc); 1428 1429 spin_lock_irqsave(&guc->timestamp.lock, flags); 1430 1431 guc_update_pm_timestamp(guc, &unused); 1432 for_each_engine(engine, gt, id) { 1433 guc_update_engine_gt_clks(engine); 1434 engine->stats.guc.prev_total = 0; 1435 } 1436 1437 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1438 } 1439 1440 static void __update_guc_busyness_stats(struct intel_guc *guc) 1441 { 1442 struct intel_gt *gt = guc_to_gt(guc); 1443 struct intel_engine_cs *engine; 1444 enum intel_engine_id id; 1445 unsigned long flags; 1446 ktime_t unused; 1447 1448 guc->timestamp.last_stat_jiffies = jiffies; 1449 1450 spin_lock_irqsave(&guc->timestamp.lock, flags); 1451 1452 guc_update_pm_timestamp(guc, &unused); 1453 for_each_engine(engine, gt, id) 1454 guc_update_engine_gt_clks(engine); 1455 1456 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1457 } 1458 1459 static void __guc_context_update_stats(struct intel_context *ce) 1460 { 1461 struct intel_guc *guc = ce_to_guc(ce); 1462 unsigned long flags; 1463 1464 spin_lock_irqsave(&guc->timestamp.lock, flags); 1465 lrc_update_runtime(ce); 1466 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1467 } 1468 1469 static void guc_context_update_stats(struct intel_context *ce) 1470 { 1471 if (!intel_context_pin_if_active(ce)) 1472 return; 1473 1474 __guc_context_update_stats(ce); 1475 intel_context_unpin(ce); 1476 } 1477 1478 static void guc_timestamp_ping(struct work_struct *wrk) 1479 { 1480 struct intel_guc *guc = container_of(wrk, typeof(*guc), 1481 timestamp.work.work); 1482 struct intel_uc *uc = container_of(guc, typeof(*uc), guc); 1483 struct intel_gt *gt = guc_to_gt(guc); 1484 struct intel_context *ce; 1485 intel_wakeref_t wakeref; 1486 unsigned long index; 1487 int srcu, ret; 1488 1489 /* 1490 * Ideally the busyness worker should take a gt pm wakeref because the 1491 * worker only needs to be active while gt is awake. However, the 1492 * gt_park path cancels the worker synchronously and this complicates 1493 * the flow if the worker is also running at the same time. The cancel 1494 * waits for the worker and when the worker releases the wakeref, that 1495 * would call gt_park and would lead to a deadlock. 1496 * 1497 * The resolution is to take the global pm wakeref if runtime pm is 1498 * already active. If not, we don't need to update the busyness stats as 1499 * the stats would already be updated when the gt was parked. 1500 * 1501 * Note: 1502 * - We do not requeue the worker if we cannot take a reference to runtime 1503 * pm since intel_guc_busyness_unpark would requeue the worker in the 1504 * resume path. 1505 * 1506 * - If the gt was parked longer than time taken for GT timestamp to roll 1507 * over, we ignore those rollovers since we don't care about tracking 1508 * the exact GT time. We only care about roll overs when the gt is 1509 * active and running workloads. 1510 * 1511 * - There is a window of time between gt_park and runtime suspend, 1512 * where the worker may run. This is acceptable since the worker will 1513 * not find any new data to update busyness. 1514 */ 1515 wakeref = intel_runtime_pm_get_if_active(>->i915->runtime_pm); 1516 if (!wakeref) 1517 return; 1518 1519 /* 1520 * Synchronize with gt reset to make sure the worker does not 1521 * corrupt the engine/guc stats. NB: can't actually block waiting 1522 * for a reset to complete as the reset requires flushing out 1523 * this worker thread if started. So waiting would deadlock. 1524 */ 1525 ret = intel_gt_reset_trylock(gt, &srcu); 1526 if (ret) 1527 goto err_trylock; 1528 1529 __update_guc_busyness_stats(guc); 1530 1531 /* adjust context stats for overflow */ 1532 xa_for_each(&guc->context_lookup, index, ce) 1533 guc_context_update_stats(ce); 1534 1535 intel_gt_reset_unlock(gt, srcu); 1536 1537 guc_enable_busyness_worker(guc); 1538 1539 err_trylock: 1540 intel_runtime_pm_put(>->i915->runtime_pm, wakeref); 1541 } 1542 1543 static int guc_action_enable_usage_stats(struct intel_guc *guc) 1544 { 1545 u32 offset = intel_guc_engine_usage_offset(guc); 1546 u32 action[] = { 1547 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, 1548 offset, 1549 0, 1550 }; 1551 1552 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 1553 } 1554 1555 static int guc_init_engine_stats(struct intel_guc *guc) 1556 { 1557 struct intel_gt *gt = guc_to_gt(guc); 1558 intel_wakeref_t wakeref; 1559 int ret; 1560 1561 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) 1562 ret = guc_action_enable_usage_stats(guc); 1563 1564 if (ret) 1565 guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret)); 1566 else 1567 guc_enable_busyness_worker(guc); 1568 1569 return ret; 1570 } 1571 1572 static void guc_fini_engine_stats(struct intel_guc *guc) 1573 { 1574 guc_cancel_busyness_worker(guc); 1575 } 1576 1577 void intel_guc_busyness_park(struct intel_gt *gt) 1578 { 1579 struct intel_guc *guc = >->uc.guc; 1580 1581 if (!guc_submission_initialized(guc)) 1582 return; 1583 1584 /* 1585 * There is a race with suspend flow where the worker runs after suspend 1586 * and causes an unclaimed register access warning. Cancel the worker 1587 * synchronously here. 1588 */ 1589 guc_cancel_busyness_worker(guc); 1590 1591 /* 1592 * Before parking, we should sample engine busyness stats if we need to. 1593 * We can skip it if we are less than half a ping from the last time we 1594 * sampled the busyness stats. 1595 */ 1596 if (guc->timestamp.last_stat_jiffies && 1597 !time_after(jiffies, guc->timestamp.last_stat_jiffies + 1598 (guc->timestamp.ping_delay / 2))) 1599 return; 1600 1601 __update_guc_busyness_stats(guc); 1602 } 1603 1604 void intel_guc_busyness_unpark(struct intel_gt *gt) 1605 { 1606 struct intel_guc *guc = >->uc.guc; 1607 unsigned long flags; 1608 ktime_t unused; 1609 1610 if (!guc_submission_initialized(guc)) 1611 return; 1612 1613 spin_lock_irqsave(&guc->timestamp.lock, flags); 1614 guc_update_pm_timestamp(guc, &unused); 1615 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1616 guc_enable_busyness_worker(guc); 1617 } 1618 1619 static inline bool 1620 submission_disabled(struct intel_guc *guc) 1621 { 1622 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1623 1624 return unlikely(!sched_engine || 1625 !__tasklet_is_enabled(&sched_engine->tasklet) || 1626 intel_gt_is_wedged(guc_to_gt(guc))); 1627 } 1628 1629 static void disable_submission(struct intel_guc *guc) 1630 { 1631 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1632 1633 if (__tasklet_is_enabled(&sched_engine->tasklet)) { 1634 GEM_BUG_ON(!guc->ct.enabled); 1635 __tasklet_disable_sync_once(&sched_engine->tasklet); 1636 sched_engine->tasklet.callback = NULL; 1637 } 1638 } 1639 1640 static void enable_submission(struct intel_guc *guc) 1641 { 1642 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1643 unsigned long flags; 1644 1645 spin_lock_irqsave(&guc->sched_engine->lock, flags); 1646 sched_engine->tasklet.callback = guc_submission_tasklet; 1647 wmb(); /* Make sure callback visible */ 1648 if (!__tasklet_is_enabled(&sched_engine->tasklet) && 1649 __tasklet_enable(&sched_engine->tasklet)) { 1650 GEM_BUG_ON(!guc->ct.enabled); 1651 1652 /* And kick in case we missed a new request submission. */ 1653 tasklet_hi_schedule(&sched_engine->tasklet); 1654 } 1655 spin_unlock_irqrestore(&guc->sched_engine->lock, flags); 1656 } 1657 1658 static void guc_flush_submissions(struct intel_guc *guc) 1659 { 1660 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1661 unsigned long flags; 1662 1663 spin_lock_irqsave(&sched_engine->lock, flags); 1664 spin_unlock_irqrestore(&sched_engine->lock, flags); 1665 } 1666 1667 void intel_guc_submission_flush_work(struct intel_guc *guc) 1668 { 1669 flush_work(&guc->submission_state.destroyed_worker); 1670 } 1671 1672 static void guc_flush_destroyed_contexts(struct intel_guc *guc); 1673 1674 void intel_guc_submission_reset_prepare(struct intel_guc *guc) 1675 { 1676 if (unlikely(!guc_submission_initialized(guc))) { 1677 /* Reset called during driver load? GuC not yet initialised! */ 1678 return; 1679 } 1680 1681 intel_gt_park_heartbeats(guc_to_gt(guc)); 1682 disable_submission(guc); 1683 guc->interrupts.disable(guc); 1684 __reset_guc_busyness_stats(guc); 1685 1686 /* Flush IRQ handler */ 1687 spin_lock_irq(guc_to_gt(guc)->irq_lock); 1688 spin_unlock_irq(guc_to_gt(guc)->irq_lock); 1689 1690 guc_flush_submissions(guc); 1691 guc_flush_destroyed_contexts(guc); 1692 flush_work(&guc->ct.requests.worker); 1693 1694 scrub_guc_desc_for_outstanding_g2h(guc); 1695 } 1696 1697 static struct intel_engine_cs * 1698 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) 1699 { 1700 struct intel_engine_cs *engine; 1701 intel_engine_mask_t tmp, mask = ve->mask; 1702 unsigned int num_siblings = 0; 1703 1704 for_each_engine_masked(engine, ve->gt, mask, tmp) 1705 if (num_siblings++ == sibling) 1706 return engine; 1707 1708 return NULL; 1709 } 1710 1711 static inline struct intel_engine_cs * 1712 __context_to_physical_engine(struct intel_context *ce) 1713 { 1714 struct intel_engine_cs *engine = ce->engine; 1715 1716 if (intel_engine_is_virtual(engine)) 1717 engine = guc_virtual_get_sibling(engine, 0); 1718 1719 return engine; 1720 } 1721 1722 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) 1723 { 1724 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 1725 1726 if (!intel_context_is_schedulable(ce)) 1727 return; 1728 1729 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1730 1731 /* 1732 * We want a simple context + ring to execute the breadcrumb update. 1733 * We cannot rely on the context being intact across the GPU hang, 1734 * so clear it and rebuild just what we need for the breadcrumb. 1735 * All pending requests for this context will be zapped, and any 1736 * future request will be after userspace has had the opportunity 1737 * to recreate its own state. 1738 */ 1739 if (scrub) 1740 lrc_init_regs(ce, engine, true); 1741 1742 /* Rerun the request; its payload has been neutered (if guilty). */ 1743 lrc_update_regs(ce, engine, head); 1744 } 1745 1746 static void guc_engine_reset_prepare(struct intel_engine_cs *engine) 1747 { 1748 /* 1749 * Wa_22011802037: In addition to stopping the cs, we need 1750 * to wait for any pending mi force wakeups 1751 */ 1752 if (intel_engine_reset_needs_wa_22011802037(engine->gt)) { 1753 intel_engine_stop_cs(engine); 1754 intel_engine_wait_for_pending_mi_fw(engine); 1755 } 1756 } 1757 1758 static void guc_reset_nop(struct intel_engine_cs *engine) 1759 { 1760 } 1761 1762 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) 1763 { 1764 } 1765 1766 static void 1767 __unwind_incomplete_requests(struct intel_context *ce) 1768 { 1769 struct i915_request *rq, *rn; 1770 struct list_head *pl; 1771 int prio = I915_PRIORITY_INVALID; 1772 struct i915_sched_engine * const sched_engine = 1773 ce->engine->sched_engine; 1774 unsigned long flags; 1775 1776 spin_lock_irqsave(&sched_engine->lock, flags); 1777 spin_lock(&ce->guc_state.lock); 1778 list_for_each_entry_safe_reverse(rq, rn, 1779 &ce->guc_state.requests, 1780 sched.link) { 1781 if (i915_request_completed(rq)) 1782 continue; 1783 1784 list_del_init(&rq->sched.link); 1785 __i915_request_unsubmit(rq); 1786 1787 /* Push the request back into the queue for later resubmission. */ 1788 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 1789 if (rq_prio(rq) != prio) { 1790 prio = rq_prio(rq); 1791 pl = i915_sched_lookup_priolist(sched_engine, prio); 1792 } 1793 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); 1794 1795 list_add(&rq->sched.link, pl); 1796 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1797 } 1798 spin_unlock(&ce->guc_state.lock); 1799 spin_unlock_irqrestore(&sched_engine->lock, flags); 1800 } 1801 1802 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled) 1803 { 1804 bool guilty; 1805 struct i915_request *rq; 1806 unsigned long flags; 1807 u32 head; 1808 int i, number_children = ce->parallel.number_children; 1809 struct intel_context *parent = ce; 1810 1811 GEM_BUG_ON(intel_context_is_child(ce)); 1812 1813 intel_context_get(ce); 1814 1815 /* 1816 * GuC will implicitly mark the context as non-schedulable when it sends 1817 * the reset notification. Make sure our state reflects this change. The 1818 * context will be marked enabled on resubmission. 1819 */ 1820 spin_lock_irqsave(&ce->guc_state.lock, flags); 1821 clr_context_enabled(ce); 1822 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1823 1824 /* 1825 * For each context in the relationship find the hanging request 1826 * resetting each context / request as needed 1827 */ 1828 for (i = 0; i < number_children + 1; ++i) { 1829 if (!intel_context_is_pinned(ce)) 1830 goto next_context; 1831 1832 guilty = false; 1833 rq = intel_context_get_active_request(ce); 1834 if (!rq) { 1835 head = ce->ring->tail; 1836 goto out_replay; 1837 } 1838 1839 if (i915_request_started(rq)) 1840 guilty = stalled & ce->engine->mask; 1841 1842 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 1843 head = intel_ring_wrap(ce->ring, rq->head); 1844 1845 __i915_request_reset(rq, guilty); 1846 i915_request_put(rq); 1847 out_replay: 1848 guc_reset_state(ce, head, guilty); 1849 next_context: 1850 if (i != number_children) 1851 ce = list_next_entry(ce, parallel.child_link); 1852 } 1853 1854 __unwind_incomplete_requests(parent); 1855 intel_context_put(parent); 1856 } 1857 1858 void wake_up_all_tlb_invalidate(struct intel_guc *guc) 1859 { 1860 struct intel_guc_tlb_wait *wait; 1861 unsigned long i; 1862 1863 if (!intel_guc_tlb_invalidation_is_available(guc)) 1864 return; 1865 1866 xa_lock_irq(&guc->tlb_lookup); 1867 xa_for_each(&guc->tlb_lookup, i, wait) 1868 wake_up(&wait->wq); 1869 xa_unlock_irq(&guc->tlb_lookup); 1870 } 1871 1872 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) 1873 { 1874 struct intel_context *ce; 1875 unsigned long index; 1876 unsigned long flags; 1877 1878 if (unlikely(!guc_submission_initialized(guc))) { 1879 /* Reset called during driver load? GuC not yet initialised! */ 1880 return; 1881 } 1882 1883 xa_lock_irqsave(&guc->context_lookup, flags); 1884 xa_for_each(&guc->context_lookup, index, ce) { 1885 if (!kref_get_unless_zero(&ce->ref)) 1886 continue; 1887 1888 xa_unlock(&guc->context_lookup); 1889 1890 if (intel_context_is_pinned(ce) && 1891 !intel_context_is_child(ce)) 1892 __guc_reset_context(ce, stalled); 1893 1894 intel_context_put(ce); 1895 1896 xa_lock(&guc->context_lookup); 1897 } 1898 xa_unlock_irqrestore(&guc->context_lookup, flags); 1899 1900 /* GuC is blown away, drop all references to contexts */ 1901 xa_destroy(&guc->context_lookup); 1902 } 1903 1904 static void guc_cancel_context_requests(struct intel_context *ce) 1905 { 1906 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; 1907 struct i915_request *rq; 1908 unsigned long flags; 1909 1910 /* Mark all executing requests as skipped. */ 1911 spin_lock_irqsave(&sched_engine->lock, flags); 1912 spin_lock(&ce->guc_state.lock); 1913 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) 1914 i915_request_put(i915_request_mark_eio(rq)); 1915 spin_unlock(&ce->guc_state.lock); 1916 spin_unlock_irqrestore(&sched_engine->lock, flags); 1917 } 1918 1919 static void 1920 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) 1921 { 1922 struct i915_request *rq, *rn; 1923 struct rb_node *rb; 1924 unsigned long flags; 1925 1926 /* Can be called during boot if GuC fails to load */ 1927 if (!sched_engine) 1928 return; 1929 1930 /* 1931 * Before we call engine->cancel_requests(), we should have exclusive 1932 * access to the submission state. This is arranged for us by the 1933 * caller disabling the interrupt generation, the tasklet and other 1934 * threads that may then access the same state, giving us a free hand 1935 * to reset state. However, we still need to let lockdep be aware that 1936 * we know this state may be accessed in hardirq context, so we 1937 * disable the irq around this manipulation and we want to keep 1938 * the spinlock focused on its duties and not accidentally conflate 1939 * coverage to the submission's irq state. (Similarly, although we 1940 * shouldn't need to disable irq around the manipulation of the 1941 * submission's irq state, we also wish to remind ourselves that 1942 * it is irq state.) 1943 */ 1944 spin_lock_irqsave(&sched_engine->lock, flags); 1945 1946 /* Flush the queued requests to the timeline list (for retiring). */ 1947 while ((rb = rb_first_cached(&sched_engine->queue))) { 1948 struct i915_priolist *p = to_priolist(rb); 1949 1950 priolist_for_each_request_consume(rq, rn, p) { 1951 list_del_init(&rq->sched.link); 1952 1953 __i915_request_submit(rq); 1954 1955 i915_request_put(i915_request_mark_eio(rq)); 1956 } 1957 1958 rb_erase_cached(&p->node, &sched_engine->queue); 1959 i915_priolist_free(p); 1960 } 1961 1962 /* Remaining _unready_ requests will be nop'ed when submitted */ 1963 1964 sched_engine->queue_priority_hint = INT_MIN; 1965 sched_engine->queue = RB_ROOT_CACHED; 1966 1967 spin_unlock_irqrestore(&sched_engine->lock, flags); 1968 } 1969 1970 void intel_guc_submission_cancel_requests(struct intel_guc *guc) 1971 { 1972 struct intel_context *ce; 1973 unsigned long index; 1974 unsigned long flags; 1975 1976 xa_lock_irqsave(&guc->context_lookup, flags); 1977 xa_for_each(&guc->context_lookup, index, ce) { 1978 if (!kref_get_unless_zero(&ce->ref)) 1979 continue; 1980 1981 xa_unlock(&guc->context_lookup); 1982 1983 if (intel_context_is_pinned(ce) && 1984 !intel_context_is_child(ce)) 1985 guc_cancel_context_requests(ce); 1986 1987 intel_context_put(ce); 1988 1989 xa_lock(&guc->context_lookup); 1990 } 1991 xa_unlock_irqrestore(&guc->context_lookup, flags); 1992 1993 guc_cancel_sched_engine_requests(guc->sched_engine); 1994 1995 /* GuC is blown away, drop all references to contexts */ 1996 xa_destroy(&guc->context_lookup); 1997 1998 /* 1999 * Wedged GT won't respond to any TLB invalidation request. Simply 2000 * release all the blocked waiters. 2001 */ 2002 wake_up_all_tlb_invalidate(guc); 2003 } 2004 2005 void intel_guc_submission_reset_finish(struct intel_guc *guc) 2006 { 2007 /* 2008 * Ensure the busyness worker gets cancelled even on a fatal wedge. 2009 * Note that reset_prepare is not allowed to because it confuses lockdep. 2010 */ 2011 if (guc_submission_initialized(guc)) 2012 guc_cancel_busyness_worker(guc); 2013 2014 /* Reset called during driver load or during wedge? */ 2015 if (unlikely(!guc_submission_initialized(guc) || 2016 !intel_guc_is_fw_running(guc) || 2017 intel_gt_is_wedged(guc_to_gt(guc)))) { 2018 return; 2019 } 2020 2021 /* 2022 * Technically possible for either of these values to be non-zero here, 2023 * but very unlikely + harmless. Regardless let's add a warn so we can 2024 * see in CI if this happens frequently / a precursor to taking down the 2025 * machine. 2026 */ 2027 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); 2028 atomic_set(&guc->outstanding_submission_g2h, 0); 2029 2030 intel_guc_global_policies_update(guc); 2031 enable_submission(guc); 2032 intel_gt_unpark_heartbeats(guc_to_gt(guc)); 2033 2034 /* 2035 * The full GT reset will have cleared the TLB caches and flushed the 2036 * G2H message queue; we can release all the blocked waiters. 2037 */ 2038 wake_up_all_tlb_invalidate(guc); 2039 } 2040 2041 static void destroyed_worker_func(struct work_struct *w); 2042 static void reset_fail_worker_func(struct work_struct *w); 2043 2044 bool intel_guc_tlb_invalidation_is_available(struct intel_guc *guc) 2045 { 2046 return HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915) && 2047 intel_guc_is_ready(guc); 2048 } 2049 2050 static int init_tlb_lookup(struct intel_guc *guc) 2051 { 2052 struct intel_guc_tlb_wait *wait; 2053 int err; 2054 2055 if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915)) 2056 return 0; 2057 2058 xa_init_flags(&guc->tlb_lookup, XA_FLAGS_ALLOC); 2059 2060 wait = kzalloc(sizeof(*wait), GFP_KERNEL); 2061 if (!wait) 2062 return -ENOMEM; 2063 2064 init_waitqueue_head(&wait->wq); 2065 2066 /* Preallocate a shared id for use under memory pressure. */ 2067 err = xa_alloc_cyclic_irq(&guc->tlb_lookup, &guc->serial_slot, wait, 2068 xa_limit_32b, &guc->next_seqno, GFP_KERNEL); 2069 if (err < 0) { 2070 kfree(wait); 2071 return err; 2072 } 2073 2074 return 0; 2075 } 2076 2077 static void fini_tlb_lookup(struct intel_guc *guc) 2078 { 2079 struct intel_guc_tlb_wait *wait; 2080 2081 if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915)) 2082 return; 2083 2084 wait = xa_load(&guc->tlb_lookup, guc->serial_slot); 2085 if (wait && wait->busy) 2086 guc_err(guc, "Unexpected busy item in tlb_lookup on fini\n"); 2087 kfree(wait); 2088 2089 xa_destroy(&guc->tlb_lookup); 2090 } 2091 2092 /* 2093 * Set up the memory resources to be shared with the GuC (via the GGTT) 2094 * at firmware loading time. 2095 */ 2096 int intel_guc_submission_init(struct intel_guc *guc) 2097 { 2098 struct intel_gt *gt = guc_to_gt(guc); 2099 int ret; 2100 2101 if (guc->submission_initialized) 2102 return 0; 2103 2104 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 0, 0)) { 2105 ret = guc_lrc_desc_pool_create_v69(guc); 2106 if (ret) 2107 return ret; 2108 } 2109 2110 ret = init_tlb_lookup(guc); 2111 if (ret) 2112 goto destroy_pool; 2113 2114 guc->submission_state.guc_ids_bitmap = 2115 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); 2116 if (!guc->submission_state.guc_ids_bitmap) { 2117 ret = -ENOMEM; 2118 goto destroy_tlb; 2119 } 2120 2121 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; 2122 guc->timestamp.shift = gpm_timestamp_shift(gt); 2123 guc->submission_initialized = true; 2124 2125 return 0; 2126 2127 destroy_tlb: 2128 fini_tlb_lookup(guc); 2129 destroy_pool: 2130 guc_lrc_desc_pool_destroy_v69(guc); 2131 return ret; 2132 } 2133 2134 void intel_guc_submission_fini(struct intel_guc *guc) 2135 { 2136 if (!guc->submission_initialized) 2137 return; 2138 2139 guc_flush_destroyed_contexts(guc); 2140 guc_lrc_desc_pool_destroy_v69(guc); 2141 i915_sched_engine_put(guc->sched_engine); 2142 bitmap_free(guc->submission_state.guc_ids_bitmap); 2143 fini_tlb_lookup(guc); 2144 guc->submission_initialized = false; 2145 } 2146 2147 static inline void queue_request(struct i915_sched_engine *sched_engine, 2148 struct i915_request *rq, 2149 int prio) 2150 { 2151 GEM_BUG_ON(!list_empty(&rq->sched.link)); 2152 list_add_tail(&rq->sched.link, 2153 i915_sched_lookup_priolist(sched_engine, prio)); 2154 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2155 tasklet_hi_schedule(&sched_engine->tasklet); 2156 } 2157 2158 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 2159 struct i915_request *rq) 2160 { 2161 int ret = 0; 2162 2163 __i915_request_submit(rq); 2164 2165 trace_i915_request_in(rq, 0); 2166 2167 if (is_multi_lrc_rq(rq)) { 2168 if (multi_lrc_submit(rq)) { 2169 ret = guc_wq_item_append(guc, rq); 2170 if (!ret) 2171 ret = guc_add_request(guc, rq); 2172 } 2173 } else { 2174 guc_set_lrc_tail(rq); 2175 ret = guc_add_request(guc, rq); 2176 } 2177 2178 if (unlikely(ret == -EPIPE)) 2179 disable_submission(guc); 2180 2181 return ret; 2182 } 2183 2184 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) 2185 { 2186 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 2187 struct intel_context *ce = request_to_scheduling_context(rq); 2188 2189 return submission_disabled(guc) || guc->stalled_request || 2190 !i915_sched_engine_is_empty(sched_engine) || 2191 !ctx_id_mapped(guc, ce->guc_id.id); 2192 } 2193 2194 static void guc_submit_request(struct i915_request *rq) 2195 { 2196 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 2197 struct intel_guc *guc = &rq->engine->gt->uc.guc; 2198 unsigned long flags; 2199 2200 /* Will be called from irq-context when using foreign fences. */ 2201 spin_lock_irqsave(&sched_engine->lock, flags); 2202 2203 if (need_tasklet(guc, rq)) 2204 queue_request(sched_engine, rq, rq_prio(rq)); 2205 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) 2206 tasklet_hi_schedule(&sched_engine->tasklet); 2207 2208 spin_unlock_irqrestore(&sched_engine->lock, flags); 2209 } 2210 2211 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) 2212 { 2213 int ret; 2214 2215 GEM_BUG_ON(intel_context_is_child(ce)); 2216 2217 if (intel_context_is_parent(ce)) 2218 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, 2219 NUMBER_MULTI_LRC_GUC_ID(guc), 2220 order_base_2(ce->parallel.number_children 2221 + 1)); 2222 else 2223 ret = ida_simple_get(&guc->submission_state.guc_ids, 2224 NUMBER_MULTI_LRC_GUC_ID(guc), 2225 guc->submission_state.num_guc_ids, 2226 GFP_KERNEL | __GFP_RETRY_MAYFAIL | 2227 __GFP_NOWARN); 2228 if (unlikely(ret < 0)) 2229 return ret; 2230 2231 if (!intel_context_is_parent(ce)) 2232 ++guc->submission_state.guc_ids_in_use; 2233 2234 ce->guc_id.id = ret; 2235 return 0; 2236 } 2237 2238 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2239 { 2240 GEM_BUG_ON(intel_context_is_child(ce)); 2241 2242 if (!context_guc_id_invalid(ce)) { 2243 if (intel_context_is_parent(ce)) { 2244 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 2245 ce->guc_id.id, 2246 order_base_2(ce->parallel.number_children 2247 + 1)); 2248 } else { 2249 --guc->submission_state.guc_ids_in_use; 2250 ida_simple_remove(&guc->submission_state.guc_ids, 2251 ce->guc_id.id); 2252 } 2253 clr_ctx_id_mapping(guc, ce->guc_id.id); 2254 set_context_guc_id_invalid(ce); 2255 } 2256 if (!list_empty(&ce->guc_id.link)) 2257 list_del_init(&ce->guc_id.link); 2258 } 2259 2260 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2261 { 2262 unsigned long flags; 2263 2264 spin_lock_irqsave(&guc->submission_state.lock, flags); 2265 __release_guc_id(guc, ce); 2266 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2267 } 2268 2269 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) 2270 { 2271 struct intel_context *cn; 2272 2273 lockdep_assert_held(&guc->submission_state.lock); 2274 GEM_BUG_ON(intel_context_is_child(ce)); 2275 GEM_BUG_ON(intel_context_is_parent(ce)); 2276 2277 if (!list_empty(&guc->submission_state.guc_id_list)) { 2278 cn = list_first_entry(&guc->submission_state.guc_id_list, 2279 struct intel_context, 2280 guc_id.link); 2281 2282 GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); 2283 GEM_BUG_ON(context_guc_id_invalid(cn)); 2284 GEM_BUG_ON(intel_context_is_child(cn)); 2285 GEM_BUG_ON(intel_context_is_parent(cn)); 2286 2287 list_del_init(&cn->guc_id.link); 2288 ce->guc_id.id = cn->guc_id.id; 2289 2290 spin_lock(&cn->guc_state.lock); 2291 clr_context_registered(cn); 2292 spin_unlock(&cn->guc_state.lock); 2293 2294 set_context_guc_id_invalid(cn); 2295 2296 #ifdef CONFIG_DRM_I915_SELFTEST 2297 guc->number_guc_id_stolen++; 2298 #endif 2299 2300 return 0; 2301 } else { 2302 return -EAGAIN; 2303 } 2304 } 2305 2306 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) 2307 { 2308 int ret; 2309 2310 lockdep_assert_held(&guc->submission_state.lock); 2311 GEM_BUG_ON(intel_context_is_child(ce)); 2312 2313 ret = new_guc_id(guc, ce); 2314 if (unlikely(ret < 0)) { 2315 if (intel_context_is_parent(ce)) 2316 return -ENOSPC; 2317 2318 ret = steal_guc_id(guc, ce); 2319 if (ret < 0) 2320 return ret; 2321 } 2322 2323 if (intel_context_is_parent(ce)) { 2324 struct intel_context *child; 2325 int i = 1; 2326 2327 for_each_child(ce, child) 2328 child->guc_id.id = ce->guc_id.id + i++; 2329 } 2330 2331 return 0; 2332 } 2333 2334 #define PIN_GUC_ID_TRIES 4 2335 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2336 { 2337 int ret = 0; 2338 unsigned long flags, tries = PIN_GUC_ID_TRIES; 2339 2340 GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); 2341 2342 try_again: 2343 spin_lock_irqsave(&guc->submission_state.lock, flags); 2344 2345 might_lock(&ce->guc_state.lock); 2346 2347 if (context_guc_id_invalid(ce)) { 2348 ret = assign_guc_id(guc, ce); 2349 if (ret) 2350 goto out_unlock; 2351 ret = 1; /* Indidcates newly assigned guc_id */ 2352 } 2353 if (!list_empty(&ce->guc_id.link)) 2354 list_del_init(&ce->guc_id.link); 2355 atomic_inc(&ce->guc_id.ref); 2356 2357 out_unlock: 2358 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2359 2360 /* 2361 * -EAGAIN indicates no guc_id are available, let's retire any 2362 * outstanding requests to see if that frees up a guc_id. If the first 2363 * retire didn't help, insert a sleep with the timeslice duration before 2364 * attempting to retire more requests. Double the sleep period each 2365 * subsequent pass before finally giving up. The sleep period has max of 2366 * 100ms and minimum of 1ms. 2367 */ 2368 if (ret == -EAGAIN && --tries) { 2369 if (PIN_GUC_ID_TRIES - tries > 1) { 2370 unsigned int timeslice_shifted = 2371 ce->engine->props.timeslice_duration_ms << 2372 (PIN_GUC_ID_TRIES - tries - 2); 2373 unsigned int max = min_t(unsigned int, 100, 2374 timeslice_shifted); 2375 2376 msleep(max_t(unsigned int, max, 1)); 2377 } 2378 intel_gt_retire_requests(guc_to_gt(guc)); 2379 goto try_again; 2380 } 2381 2382 return ret; 2383 } 2384 2385 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2386 { 2387 unsigned long flags; 2388 2389 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); 2390 GEM_BUG_ON(intel_context_is_child(ce)); 2391 2392 if (unlikely(context_guc_id_invalid(ce) || 2393 intel_context_is_parent(ce))) 2394 return; 2395 2396 spin_lock_irqsave(&guc->submission_state.lock, flags); 2397 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && 2398 !atomic_read(&ce->guc_id.ref)) 2399 list_add_tail(&ce->guc_id.link, 2400 &guc->submission_state.guc_id_list); 2401 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2402 } 2403 2404 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc, 2405 struct intel_context *ce, 2406 u32 guc_id, 2407 u32 offset, 2408 bool loop) 2409 { 2410 struct intel_context *child; 2411 u32 action[4 + MAX_ENGINE_INSTANCE]; 2412 int len = 0; 2413 2414 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2415 2416 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2417 action[len++] = guc_id; 2418 action[len++] = ce->parallel.number_children + 1; 2419 action[len++] = offset; 2420 for_each_child(ce, child) { 2421 offset += sizeof(struct guc_lrc_desc_v69); 2422 action[len++] = offset; 2423 } 2424 2425 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2426 } 2427 2428 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc, 2429 struct intel_context *ce, 2430 struct guc_ctxt_registration_info *info, 2431 bool loop) 2432 { 2433 struct intel_context *child; 2434 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; 2435 int len = 0; 2436 u32 next_id; 2437 2438 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2439 2440 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2441 action[len++] = info->flags; 2442 action[len++] = info->context_idx; 2443 action[len++] = info->engine_class; 2444 action[len++] = info->engine_submit_mask; 2445 action[len++] = info->wq_desc_lo; 2446 action[len++] = info->wq_desc_hi; 2447 action[len++] = info->wq_base_lo; 2448 action[len++] = info->wq_base_hi; 2449 action[len++] = info->wq_size; 2450 action[len++] = ce->parallel.number_children + 1; 2451 action[len++] = info->hwlrca_lo; 2452 action[len++] = info->hwlrca_hi; 2453 2454 next_id = info->context_idx + 1; 2455 for_each_child(ce, child) { 2456 GEM_BUG_ON(next_id++ != child->guc_id.id); 2457 2458 /* 2459 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2460 * only supports 32 bit currently. 2461 */ 2462 action[len++] = lower_32_bits(child->lrc.lrca); 2463 action[len++] = upper_32_bits(child->lrc.lrca); 2464 } 2465 2466 GEM_BUG_ON(len > ARRAY_SIZE(action)); 2467 2468 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2469 } 2470 2471 static int __guc_action_register_context_v69(struct intel_guc *guc, 2472 u32 guc_id, 2473 u32 offset, 2474 bool loop) 2475 { 2476 u32 action[] = { 2477 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2478 guc_id, 2479 offset, 2480 }; 2481 2482 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2483 0, loop); 2484 } 2485 2486 static int __guc_action_register_context_v70(struct intel_guc *guc, 2487 struct guc_ctxt_registration_info *info, 2488 bool loop) 2489 { 2490 u32 action[] = { 2491 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2492 info->flags, 2493 info->context_idx, 2494 info->engine_class, 2495 info->engine_submit_mask, 2496 info->wq_desc_lo, 2497 info->wq_desc_hi, 2498 info->wq_base_lo, 2499 info->wq_base_hi, 2500 info->wq_size, 2501 info->hwlrca_lo, 2502 info->hwlrca_hi, 2503 }; 2504 2505 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2506 0, loop); 2507 } 2508 2509 static void prepare_context_registration_info_v69(struct intel_context *ce); 2510 static void prepare_context_registration_info_v70(struct intel_context *ce, 2511 struct guc_ctxt_registration_info *info); 2512 2513 static int 2514 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop) 2515 { 2516 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) + 2517 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69); 2518 2519 prepare_context_registration_info_v69(ce); 2520 2521 if (intel_context_is_parent(ce)) 2522 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id, 2523 offset, loop); 2524 else 2525 return __guc_action_register_context_v69(guc, ce->guc_id.id, 2526 offset, loop); 2527 } 2528 2529 static int 2530 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop) 2531 { 2532 struct guc_ctxt_registration_info info; 2533 2534 prepare_context_registration_info_v70(ce, &info); 2535 2536 if (intel_context_is_parent(ce)) 2537 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop); 2538 else 2539 return __guc_action_register_context_v70(guc, &info, loop); 2540 } 2541 2542 static int register_context(struct intel_context *ce, bool loop) 2543 { 2544 struct intel_guc *guc = ce_to_guc(ce); 2545 int ret; 2546 2547 GEM_BUG_ON(intel_context_is_child(ce)); 2548 trace_intel_context_register(ce); 2549 2550 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) 2551 ret = register_context_v70(guc, ce, loop); 2552 else 2553 ret = register_context_v69(guc, ce, loop); 2554 2555 if (likely(!ret)) { 2556 unsigned long flags; 2557 2558 spin_lock_irqsave(&ce->guc_state.lock, flags); 2559 set_context_registered(ce); 2560 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2561 2562 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) 2563 guc_context_policy_init_v70(ce, loop); 2564 } 2565 2566 return ret; 2567 } 2568 2569 static int __guc_action_deregister_context(struct intel_guc *guc, 2570 u32 guc_id) 2571 { 2572 u32 action[] = { 2573 INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 2574 guc_id, 2575 }; 2576 2577 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2578 G2H_LEN_DW_DEREGISTER_CONTEXT, 2579 true); 2580 } 2581 2582 static int deregister_context(struct intel_context *ce, u32 guc_id) 2583 { 2584 struct intel_guc *guc = ce_to_guc(ce); 2585 2586 GEM_BUG_ON(intel_context_is_child(ce)); 2587 trace_intel_context_deregister(ce); 2588 2589 return __guc_action_deregister_context(guc, guc_id); 2590 } 2591 2592 static inline void clear_children_join_go_memory(struct intel_context *ce) 2593 { 2594 struct parent_scratch *ps = __get_parent_scratch(ce); 2595 int i; 2596 2597 ps->go.semaphore = 0; 2598 for (i = 0; i < ce->parallel.number_children + 1; ++i) 2599 ps->join[i].semaphore = 0; 2600 } 2601 2602 static inline u32 get_children_go_value(struct intel_context *ce) 2603 { 2604 return __get_parent_scratch(ce)->go.semaphore; 2605 } 2606 2607 static inline u32 get_children_join_value(struct intel_context *ce, 2608 u8 child_index) 2609 { 2610 return __get_parent_scratch(ce)->join[child_index].semaphore; 2611 } 2612 2613 struct context_policy { 2614 u32 count; 2615 struct guc_update_context_policy h2g; 2616 }; 2617 2618 static u32 __guc_context_policy_action_size(struct context_policy *policy) 2619 { 2620 size_t bytes = sizeof(policy->h2g.header) + 2621 (sizeof(policy->h2g.klv[0]) * policy->count); 2622 2623 return bytes / sizeof(u32); 2624 } 2625 2626 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id) 2627 { 2628 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 2629 policy->h2g.header.ctx_id = guc_id; 2630 policy->count = 0; 2631 } 2632 2633 #define MAKE_CONTEXT_POLICY_ADD(func, id) \ 2634 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \ 2635 { \ 2636 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 2637 policy->h2g.klv[policy->count].kl = \ 2638 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 2639 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 2640 policy->h2g.klv[policy->count].value = data; \ 2641 policy->count++; \ 2642 } 2643 2644 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 2645 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 2646 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) 2647 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) 2648 2649 #undef MAKE_CONTEXT_POLICY_ADD 2650 2651 static int __guc_context_set_context_policies(struct intel_guc *guc, 2652 struct context_policy *policy, 2653 bool loop) 2654 { 2655 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g, 2656 __guc_context_policy_action_size(policy), 2657 0, loop); 2658 } 2659 2660 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) 2661 { 2662 struct intel_engine_cs *engine = ce->engine; 2663 struct intel_guc *guc = &engine->gt->uc.guc; 2664 struct context_policy policy; 2665 u32 execution_quantum; 2666 u32 preemption_timeout; 2667 unsigned long flags; 2668 int ret; 2669 2670 /* NB: For both of these, zero means disabled. */ 2671 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2672 execution_quantum)); 2673 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2674 preemption_timeout)); 2675 execution_quantum = engine->props.timeslice_duration_ms * 1000; 2676 preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2677 2678 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 2679 2680 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2681 __guc_context_policy_add_execution_quantum(&policy, execution_quantum); 2682 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2683 2684 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2685 __guc_context_policy_add_preempt_to_idle(&policy, 1); 2686 2687 ret = __guc_context_set_context_policies(guc, &policy, loop); 2688 2689 spin_lock_irqsave(&ce->guc_state.lock, flags); 2690 if (ret != 0) 2691 set_context_policy_required(ce); 2692 else 2693 clr_context_policy_required(ce); 2694 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2695 2696 return ret; 2697 } 2698 2699 static void guc_context_policy_init_v69(struct intel_engine_cs *engine, 2700 struct guc_lrc_desc_v69 *desc) 2701 { 2702 desc->policy_flags = 0; 2703 2704 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2705 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; 2706 2707 /* NB: For both of these, zero means disabled. */ 2708 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2709 desc->execution_quantum)); 2710 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2711 desc->preemption_timeout)); 2712 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; 2713 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2714 } 2715 2716 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) 2717 { 2718 /* 2719 * this matches the mapping we do in map_i915_prio_to_guc_prio() 2720 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL) 2721 */ 2722 switch (prio) { 2723 default: 2724 MISSING_CASE(prio); 2725 fallthrough; 2726 case GUC_CLIENT_PRIORITY_KMD_NORMAL: 2727 return GEN12_CTX_PRIORITY_NORMAL; 2728 case GUC_CLIENT_PRIORITY_NORMAL: 2729 return GEN12_CTX_PRIORITY_LOW; 2730 case GUC_CLIENT_PRIORITY_HIGH: 2731 case GUC_CLIENT_PRIORITY_KMD_HIGH: 2732 return GEN12_CTX_PRIORITY_HIGH; 2733 } 2734 } 2735 2736 static void prepare_context_registration_info_v69(struct intel_context *ce) 2737 { 2738 struct intel_engine_cs *engine = ce->engine; 2739 struct intel_guc *guc = &engine->gt->uc.guc; 2740 u32 ctx_id = ce->guc_id.id; 2741 struct guc_lrc_desc_v69 *desc; 2742 struct intel_context *child; 2743 2744 GEM_BUG_ON(!engine->mask); 2745 2746 /* 2747 * Ensure LRC + CT vmas are is same region as write barrier is done 2748 * based on CT vma region. 2749 */ 2750 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2751 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2752 2753 desc = __get_lrc_desc_v69(guc, ctx_id); 2754 GEM_BUG_ON(!desc); 2755 desc->engine_class = engine_class_to_guc_class(engine->class); 2756 desc->engine_submit_mask = engine->logical_mask; 2757 desc->hw_context_desc = ce->lrc.lrca; 2758 desc->priority = ce->guc_state.prio; 2759 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2760 guc_context_policy_init_v69(engine, desc); 2761 2762 /* 2763 * If context is a parent, we need to register a process descriptor 2764 * describing a work queue and register all child contexts. 2765 */ 2766 if (intel_context_is_parent(ce)) { 2767 struct guc_process_desc_v69 *pdesc; 2768 2769 ce->parallel.guc.wqi_tail = 0; 2770 ce->parallel.guc.wqi_head = 0; 2771 2772 desc->process_desc = i915_ggtt_offset(ce->state) + 2773 __get_parent_scratch_offset(ce); 2774 desc->wq_addr = i915_ggtt_offset(ce->state) + 2775 __get_wq_offset(ce); 2776 desc->wq_size = WQ_SIZE; 2777 2778 pdesc = __get_process_desc_v69(ce); 2779 memset(pdesc, 0, sizeof(*(pdesc))); 2780 pdesc->stage_id = ce->guc_id.id; 2781 pdesc->wq_base_addr = desc->wq_addr; 2782 pdesc->wq_size_bytes = desc->wq_size; 2783 pdesc->wq_status = WQ_STATUS_ACTIVE; 2784 2785 ce->parallel.guc.wq_head = &pdesc->head; 2786 ce->parallel.guc.wq_tail = &pdesc->tail; 2787 ce->parallel.guc.wq_status = &pdesc->wq_status; 2788 2789 for_each_child(ce, child) { 2790 desc = __get_lrc_desc_v69(guc, child->guc_id.id); 2791 2792 desc->engine_class = 2793 engine_class_to_guc_class(engine->class); 2794 desc->hw_context_desc = child->lrc.lrca; 2795 desc->priority = ce->guc_state.prio; 2796 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2797 guc_context_policy_init_v69(engine, desc); 2798 } 2799 2800 clear_children_join_go_memory(ce); 2801 } 2802 } 2803 2804 static void prepare_context_registration_info_v70(struct intel_context *ce, 2805 struct guc_ctxt_registration_info *info) 2806 { 2807 struct intel_engine_cs *engine = ce->engine; 2808 struct intel_guc *guc = &engine->gt->uc.guc; 2809 u32 ctx_id = ce->guc_id.id; 2810 2811 GEM_BUG_ON(!engine->mask); 2812 2813 /* 2814 * Ensure LRC + CT vmas are is same region as write barrier is done 2815 * based on CT vma region. 2816 */ 2817 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2818 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2819 2820 memset(info, 0, sizeof(*info)); 2821 info->context_idx = ctx_id; 2822 info->engine_class = engine_class_to_guc_class(engine->class); 2823 info->engine_submit_mask = engine->logical_mask; 2824 /* 2825 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2826 * only supports 32 bit currently. 2827 */ 2828 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); 2829 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); 2830 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY) 2831 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio); 2832 info->flags = CONTEXT_REGISTRATION_FLAG_KMD; 2833 2834 /* 2835 * If context is a parent, we need to register a process descriptor 2836 * describing a work queue and register all child contexts. 2837 */ 2838 if (intel_context_is_parent(ce)) { 2839 struct guc_sched_wq_desc *wq_desc; 2840 u64 wq_desc_offset, wq_base_offset; 2841 2842 ce->parallel.guc.wqi_tail = 0; 2843 ce->parallel.guc.wqi_head = 0; 2844 2845 wq_desc_offset = i915_ggtt_offset(ce->state) + 2846 __get_parent_scratch_offset(ce); 2847 wq_base_offset = i915_ggtt_offset(ce->state) + 2848 __get_wq_offset(ce); 2849 info->wq_desc_lo = lower_32_bits(wq_desc_offset); 2850 info->wq_desc_hi = upper_32_bits(wq_desc_offset); 2851 info->wq_base_lo = lower_32_bits(wq_base_offset); 2852 info->wq_base_hi = upper_32_bits(wq_base_offset); 2853 info->wq_size = WQ_SIZE; 2854 2855 wq_desc = __get_wq_desc_v70(ce); 2856 memset(wq_desc, 0, sizeof(*wq_desc)); 2857 wq_desc->wq_status = WQ_STATUS_ACTIVE; 2858 2859 ce->parallel.guc.wq_head = &wq_desc->head; 2860 ce->parallel.guc.wq_tail = &wq_desc->tail; 2861 ce->parallel.guc.wq_status = &wq_desc->wq_status; 2862 2863 clear_children_join_go_memory(ce); 2864 } 2865 } 2866 2867 static int try_context_registration(struct intel_context *ce, bool loop) 2868 { 2869 struct intel_engine_cs *engine = ce->engine; 2870 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 2871 struct intel_guc *guc = &engine->gt->uc.guc; 2872 intel_wakeref_t wakeref; 2873 u32 ctx_id = ce->guc_id.id; 2874 bool context_registered; 2875 int ret = 0; 2876 2877 GEM_BUG_ON(!sched_state_is_init(ce)); 2878 2879 context_registered = ctx_id_mapped(guc, ctx_id); 2880 2881 clr_ctx_id_mapping(guc, ctx_id); 2882 set_ctx_id_mapping(guc, ctx_id, ce); 2883 2884 /* 2885 * The context_lookup xarray is used to determine if the hardware 2886 * context is currently registered. There are two cases in which it 2887 * could be registered either the guc_id has been stolen from another 2888 * context or the lrc descriptor address of this context has changed. In 2889 * either case the context needs to be deregistered with the GuC before 2890 * registering this context. 2891 */ 2892 if (context_registered) { 2893 bool disabled; 2894 unsigned long flags; 2895 2896 trace_intel_context_steal_guc_id(ce); 2897 GEM_BUG_ON(!loop); 2898 2899 /* Seal race with Reset */ 2900 spin_lock_irqsave(&ce->guc_state.lock, flags); 2901 disabled = submission_disabled(guc); 2902 if (likely(!disabled)) { 2903 set_context_wait_for_deregister_to_register(ce); 2904 intel_context_get(ce); 2905 } 2906 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2907 if (unlikely(disabled)) { 2908 clr_ctx_id_mapping(guc, ctx_id); 2909 return 0; /* Will get registered later */ 2910 } 2911 2912 /* 2913 * If stealing the guc_id, this ce has the same guc_id as the 2914 * context whose guc_id was stolen. 2915 */ 2916 with_intel_runtime_pm(runtime_pm, wakeref) 2917 ret = deregister_context(ce, ce->guc_id.id); 2918 if (unlikely(ret == -ENODEV)) 2919 ret = 0; /* Will get registered later */ 2920 } else { 2921 with_intel_runtime_pm(runtime_pm, wakeref) 2922 ret = register_context(ce, loop); 2923 if (unlikely(ret == -EBUSY)) { 2924 clr_ctx_id_mapping(guc, ctx_id); 2925 } else if (unlikely(ret == -ENODEV)) { 2926 clr_ctx_id_mapping(guc, ctx_id); 2927 ret = 0; /* Will get registered later */ 2928 } 2929 } 2930 2931 return ret; 2932 } 2933 2934 static int __guc_context_pre_pin(struct intel_context *ce, 2935 struct intel_engine_cs *engine, 2936 struct i915_gem_ww_ctx *ww, 2937 void **vaddr) 2938 { 2939 return lrc_pre_pin(ce, engine, ww, vaddr); 2940 } 2941 2942 static int __guc_context_pin(struct intel_context *ce, 2943 struct intel_engine_cs *engine, 2944 void *vaddr) 2945 { 2946 if (i915_ggtt_offset(ce->state) != 2947 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 2948 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2949 2950 /* 2951 * GuC context gets pinned in guc_request_alloc. See that function for 2952 * explaination of why. 2953 */ 2954 2955 return lrc_pin(ce, engine, vaddr); 2956 } 2957 2958 static int guc_context_pre_pin(struct intel_context *ce, 2959 struct i915_gem_ww_ctx *ww, 2960 void **vaddr) 2961 { 2962 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); 2963 } 2964 2965 static int guc_context_pin(struct intel_context *ce, void *vaddr) 2966 { 2967 int ret = __guc_context_pin(ce, ce->engine, vaddr); 2968 2969 if (likely(!ret && !intel_context_is_barrier(ce))) 2970 intel_engine_pm_get(ce->engine); 2971 2972 return ret; 2973 } 2974 2975 static void guc_context_unpin(struct intel_context *ce) 2976 { 2977 struct intel_guc *guc = ce_to_guc(ce); 2978 2979 __guc_context_update_stats(ce); 2980 unpin_guc_id(guc, ce); 2981 lrc_unpin(ce); 2982 2983 if (likely(!intel_context_is_barrier(ce))) 2984 intel_engine_pm_put_async(ce->engine); 2985 } 2986 2987 static void guc_context_post_unpin(struct intel_context *ce) 2988 { 2989 lrc_post_unpin(ce); 2990 } 2991 2992 static void __guc_context_sched_enable(struct intel_guc *guc, 2993 struct intel_context *ce) 2994 { 2995 u32 action[] = { 2996 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2997 ce->guc_id.id, 2998 GUC_CONTEXT_ENABLE 2999 }; 3000 3001 trace_intel_context_sched_enable(ce); 3002 3003 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 3004 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 3005 } 3006 3007 static void __guc_context_sched_disable(struct intel_guc *guc, 3008 struct intel_context *ce, 3009 u16 guc_id) 3010 { 3011 u32 action[] = { 3012 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 3013 guc_id, /* ce->guc_id.id not stable */ 3014 GUC_CONTEXT_DISABLE 3015 }; 3016 3017 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID); 3018 3019 GEM_BUG_ON(intel_context_is_child(ce)); 3020 trace_intel_context_sched_disable(ce); 3021 3022 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 3023 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 3024 } 3025 3026 static void guc_blocked_fence_complete(struct intel_context *ce) 3027 { 3028 lockdep_assert_held(&ce->guc_state.lock); 3029 3030 if (!i915_sw_fence_done(&ce->guc_state.blocked)) 3031 i915_sw_fence_complete(&ce->guc_state.blocked); 3032 } 3033 3034 static void guc_blocked_fence_reinit(struct intel_context *ce) 3035 { 3036 lockdep_assert_held(&ce->guc_state.lock); 3037 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); 3038 3039 /* 3040 * This fence is always complete unless a pending schedule disable is 3041 * outstanding. We arm the fence here and complete it when we receive 3042 * the pending schedule disable complete message. 3043 */ 3044 i915_sw_fence_fini(&ce->guc_state.blocked); 3045 i915_sw_fence_reinit(&ce->guc_state.blocked); 3046 i915_sw_fence_await(&ce->guc_state.blocked); 3047 i915_sw_fence_commit(&ce->guc_state.blocked); 3048 } 3049 3050 static u16 prep_context_pending_disable(struct intel_context *ce) 3051 { 3052 lockdep_assert_held(&ce->guc_state.lock); 3053 3054 set_context_pending_disable(ce); 3055 clr_context_enabled(ce); 3056 guc_blocked_fence_reinit(ce); 3057 intel_context_get(ce); 3058 3059 return ce->guc_id.id; 3060 } 3061 3062 static struct i915_sw_fence *guc_context_block(struct intel_context *ce) 3063 { 3064 struct intel_guc *guc = ce_to_guc(ce); 3065 unsigned long flags; 3066 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 3067 intel_wakeref_t wakeref; 3068 u16 guc_id; 3069 bool enabled; 3070 3071 GEM_BUG_ON(intel_context_is_child(ce)); 3072 3073 spin_lock_irqsave(&ce->guc_state.lock, flags); 3074 3075 incr_context_blocked(ce); 3076 3077 enabled = context_enabled(ce); 3078 if (unlikely(!enabled || submission_disabled(guc))) { 3079 if (enabled) 3080 clr_context_enabled(ce); 3081 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3082 return &ce->guc_state.blocked; 3083 } 3084 3085 /* 3086 * We add +2 here as the schedule disable complete CTB handler calls 3087 * intel_context_sched_disable_unpin (-2 to pin_count). 3088 */ 3089 atomic_add(2, &ce->pin_count); 3090 3091 guc_id = prep_context_pending_disable(ce); 3092 3093 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3094 3095 with_intel_runtime_pm(runtime_pm, wakeref) 3096 __guc_context_sched_disable(guc, ce, guc_id); 3097 3098 return &ce->guc_state.blocked; 3099 } 3100 3101 #define SCHED_STATE_MULTI_BLOCKED_MASK \ 3102 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) 3103 #define SCHED_STATE_NO_UNBLOCK \ 3104 (SCHED_STATE_MULTI_BLOCKED_MASK | \ 3105 SCHED_STATE_PENDING_DISABLE | \ 3106 SCHED_STATE_BANNED) 3107 3108 static bool context_cant_unblock(struct intel_context *ce) 3109 { 3110 lockdep_assert_held(&ce->guc_state.lock); 3111 3112 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || 3113 context_guc_id_invalid(ce) || 3114 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) || 3115 !intel_context_is_pinned(ce); 3116 } 3117 3118 static void guc_context_unblock(struct intel_context *ce) 3119 { 3120 struct intel_guc *guc = ce_to_guc(ce); 3121 unsigned long flags; 3122 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 3123 intel_wakeref_t wakeref; 3124 bool enable; 3125 3126 GEM_BUG_ON(context_enabled(ce)); 3127 GEM_BUG_ON(intel_context_is_child(ce)); 3128 3129 spin_lock_irqsave(&ce->guc_state.lock, flags); 3130 3131 if (unlikely(submission_disabled(guc) || 3132 context_cant_unblock(ce))) { 3133 enable = false; 3134 } else { 3135 enable = true; 3136 set_context_pending_enable(ce); 3137 set_context_enabled(ce); 3138 intel_context_get(ce); 3139 } 3140 3141 decr_context_blocked(ce); 3142 3143 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3144 3145 if (enable) { 3146 with_intel_runtime_pm(runtime_pm, wakeref) 3147 __guc_context_sched_enable(guc, ce); 3148 } 3149 } 3150 3151 static void guc_context_cancel_request(struct intel_context *ce, 3152 struct i915_request *rq) 3153 { 3154 struct intel_context *block_context = 3155 request_to_scheduling_context(rq); 3156 3157 if (i915_sw_fence_signaled(&rq->submit)) { 3158 struct i915_sw_fence *fence; 3159 3160 intel_context_get(ce); 3161 fence = guc_context_block(block_context); 3162 i915_sw_fence_wait(fence); 3163 if (!i915_request_completed(rq)) { 3164 __i915_request_skip(rq); 3165 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), 3166 true); 3167 } 3168 3169 guc_context_unblock(block_context); 3170 intel_context_put(ce); 3171 } 3172 } 3173 3174 static void __guc_context_set_preemption_timeout(struct intel_guc *guc, 3175 u16 guc_id, 3176 u32 preemption_timeout) 3177 { 3178 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { 3179 struct context_policy policy; 3180 3181 __guc_context_policy_start_klv(&policy, guc_id); 3182 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 3183 __guc_context_set_context_policies(guc, &policy, true); 3184 } else { 3185 u32 action[] = { 3186 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT, 3187 guc_id, 3188 preemption_timeout 3189 }; 3190 3191 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3192 } 3193 } 3194 3195 static void 3196 guc_context_revoke(struct intel_context *ce, struct i915_request *rq, 3197 unsigned int preempt_timeout_ms) 3198 { 3199 struct intel_guc *guc = ce_to_guc(ce); 3200 struct intel_runtime_pm *runtime_pm = 3201 &ce->engine->gt->i915->runtime_pm; 3202 intel_wakeref_t wakeref; 3203 unsigned long flags; 3204 3205 GEM_BUG_ON(intel_context_is_child(ce)); 3206 3207 guc_flush_submissions(guc); 3208 3209 spin_lock_irqsave(&ce->guc_state.lock, flags); 3210 set_context_banned(ce); 3211 3212 if (submission_disabled(guc) || 3213 (!context_enabled(ce) && !context_pending_disable(ce))) { 3214 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3215 3216 guc_cancel_context_requests(ce); 3217 intel_engine_signal_breadcrumbs(ce->engine); 3218 } else if (!context_pending_disable(ce)) { 3219 u16 guc_id; 3220 3221 /* 3222 * We add +2 here as the schedule disable complete CTB handler 3223 * calls intel_context_sched_disable_unpin (-2 to pin_count). 3224 */ 3225 atomic_add(2, &ce->pin_count); 3226 3227 guc_id = prep_context_pending_disable(ce); 3228 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3229 3230 /* 3231 * In addition to disabling scheduling, set the preemption 3232 * timeout to the minimum value (1 us) so the banned context 3233 * gets kicked off the HW ASAP. 3234 */ 3235 with_intel_runtime_pm(runtime_pm, wakeref) { 3236 __guc_context_set_preemption_timeout(guc, guc_id, 3237 preempt_timeout_ms); 3238 __guc_context_sched_disable(guc, ce, guc_id); 3239 } 3240 } else { 3241 if (!context_guc_id_invalid(ce)) 3242 with_intel_runtime_pm(runtime_pm, wakeref) 3243 __guc_context_set_preemption_timeout(guc, 3244 ce->guc_id.id, 3245 preempt_timeout_ms); 3246 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3247 } 3248 } 3249 3250 static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce, 3251 unsigned long flags) 3252 __releases(ce->guc_state.lock) 3253 { 3254 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; 3255 intel_wakeref_t wakeref; 3256 u16 guc_id; 3257 3258 lockdep_assert_held(&ce->guc_state.lock); 3259 guc_id = prep_context_pending_disable(ce); 3260 3261 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3262 3263 with_intel_runtime_pm(runtime_pm, wakeref) 3264 __guc_context_sched_disable(guc, ce, guc_id); 3265 } 3266 3267 static bool bypass_sched_disable(struct intel_guc *guc, 3268 struct intel_context *ce) 3269 { 3270 lockdep_assert_held(&ce->guc_state.lock); 3271 GEM_BUG_ON(intel_context_is_child(ce)); 3272 3273 if (submission_disabled(guc) || context_guc_id_invalid(ce) || 3274 !ctx_id_mapped(guc, ce->guc_id.id)) { 3275 clr_context_enabled(ce); 3276 return true; 3277 } 3278 3279 return !context_enabled(ce); 3280 } 3281 3282 static void __delay_sched_disable(struct work_struct *wrk) 3283 { 3284 struct intel_context *ce = 3285 container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work); 3286 struct intel_guc *guc = ce_to_guc(ce); 3287 unsigned long flags; 3288 3289 spin_lock_irqsave(&ce->guc_state.lock, flags); 3290 3291 if (bypass_sched_disable(guc, ce)) { 3292 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3293 intel_context_sched_disable_unpin(ce); 3294 } else { 3295 do_sched_disable(guc, ce, flags); 3296 } 3297 } 3298 3299 static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce) 3300 { 3301 /* 3302 * parent contexts are perma-pinned, if we are unpinning do schedule 3303 * disable immediately. 3304 */ 3305 if (intel_context_is_parent(ce)) 3306 return true; 3307 3308 /* 3309 * If we are beyond the threshold for avail guc_ids, do schedule disable immediately. 3310 */ 3311 return guc->submission_state.guc_ids_in_use > 3312 guc->submission_state.sched_disable_gucid_threshold; 3313 } 3314 3315 static void guc_context_sched_disable(struct intel_context *ce) 3316 { 3317 struct intel_guc *guc = ce_to_guc(ce); 3318 u64 delay = guc->submission_state.sched_disable_delay_ms; 3319 unsigned long flags; 3320 3321 spin_lock_irqsave(&ce->guc_state.lock, flags); 3322 3323 if (bypass_sched_disable(guc, ce)) { 3324 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3325 intel_context_sched_disable_unpin(ce); 3326 } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) && 3327 delay) { 3328 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3329 mod_delayed_work(system_unbound_wq, 3330 &ce->guc_state.sched_disable_delay_work, 3331 msecs_to_jiffies(delay)); 3332 } else { 3333 do_sched_disable(guc, ce, flags); 3334 } 3335 } 3336 3337 static void guc_context_close(struct intel_context *ce) 3338 { 3339 unsigned long flags; 3340 3341 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 3342 cancel_delayed_work(&ce->guc_state.sched_disable_delay_work)) 3343 __delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work); 3344 3345 spin_lock_irqsave(&ce->guc_state.lock, flags); 3346 set_context_close_done(ce); 3347 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3348 } 3349 3350 static inline int guc_lrc_desc_unpin(struct intel_context *ce) 3351 { 3352 struct intel_guc *guc = ce_to_guc(ce); 3353 struct intel_gt *gt = guc_to_gt(guc); 3354 unsigned long flags; 3355 bool disabled; 3356 int ret; 3357 3358 GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); 3359 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); 3360 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); 3361 GEM_BUG_ON(context_enabled(ce)); 3362 3363 /* Seal race with Reset */ 3364 spin_lock_irqsave(&ce->guc_state.lock, flags); 3365 disabled = submission_disabled(guc); 3366 if (likely(!disabled)) { 3367 /* 3368 * Take a gt-pm ref and change context state to be destroyed. 3369 * NOTE: a G2H IRQ that comes after will put this gt-pm ref back 3370 */ 3371 __intel_gt_pm_get(gt); 3372 set_context_destroyed(ce); 3373 clr_context_registered(ce); 3374 } 3375 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3376 3377 if (unlikely(disabled)) { 3378 release_guc_id(guc, ce); 3379 __guc_context_destroy(ce); 3380 return 0; 3381 } 3382 3383 /* 3384 * GuC is active, lets destroy this context, but at this point we can still be racing 3385 * with suspend, so we undo everything if the H2G fails in deregister_context so 3386 * that GuC reset will find this context during clean up. 3387 */ 3388 ret = deregister_context(ce, ce->guc_id.id); 3389 if (ret) { 3390 spin_lock(&ce->guc_state.lock); 3391 set_context_registered(ce); 3392 clr_context_destroyed(ce); 3393 spin_unlock(&ce->guc_state.lock); 3394 /* 3395 * As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements 3396 * the wakeref immediately but per function spec usage call this after unlock. 3397 */ 3398 intel_wakeref_put_async(>->wakeref); 3399 } 3400 3401 return ret; 3402 } 3403 3404 static void __guc_context_destroy(struct intel_context *ce) 3405 { 3406 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || 3407 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || 3408 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || 3409 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); 3410 3411 lrc_fini(ce); 3412 intel_context_fini(ce); 3413 3414 if (intel_engine_is_virtual(ce->engine)) { 3415 struct guc_virtual_engine *ve = 3416 container_of(ce, typeof(*ve), context); 3417 3418 if (ve->base.breadcrumbs) 3419 intel_breadcrumbs_put(ve->base.breadcrumbs); 3420 3421 kfree(ve); 3422 } else { 3423 intel_context_free(ce); 3424 } 3425 } 3426 3427 static void guc_flush_destroyed_contexts(struct intel_guc *guc) 3428 { 3429 struct intel_context *ce; 3430 unsigned long flags; 3431 3432 GEM_BUG_ON(!submission_disabled(guc) && 3433 guc_submission_initialized(guc)); 3434 3435 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3436 spin_lock_irqsave(&guc->submission_state.lock, flags); 3437 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3438 struct intel_context, 3439 destroyed_link); 3440 if (ce) 3441 list_del_init(&ce->destroyed_link); 3442 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3443 3444 if (!ce) 3445 break; 3446 3447 release_guc_id(guc, ce); 3448 __guc_context_destroy(ce); 3449 } 3450 } 3451 3452 static void deregister_destroyed_contexts(struct intel_guc *guc) 3453 { 3454 struct intel_context *ce; 3455 unsigned long flags; 3456 3457 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3458 spin_lock_irqsave(&guc->submission_state.lock, flags); 3459 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3460 struct intel_context, 3461 destroyed_link); 3462 if (ce) 3463 list_del_init(&ce->destroyed_link); 3464 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3465 3466 if (!ce) 3467 break; 3468 3469 if (guc_lrc_desc_unpin(ce)) { 3470 /* 3471 * This means GuC's CT link severed mid-way which could happen 3472 * in suspend-resume corner cases. In this case, put the 3473 * context back into the destroyed_contexts list which will 3474 * get picked up on the next context deregistration event or 3475 * purged in a GuC sanitization event (reset/unload/wedged/...). 3476 */ 3477 spin_lock_irqsave(&guc->submission_state.lock, flags); 3478 list_add_tail(&ce->destroyed_link, 3479 &guc->submission_state.destroyed_contexts); 3480 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3481 /* Bail now since the list might never be emptied if h2gs fail */ 3482 break; 3483 } 3484 3485 } 3486 } 3487 3488 static void destroyed_worker_func(struct work_struct *w) 3489 { 3490 struct intel_guc *guc = container_of(w, struct intel_guc, 3491 submission_state.destroyed_worker); 3492 struct intel_gt *gt = guc_to_gt(guc); 3493 intel_wakeref_t wakeref; 3494 3495 /* 3496 * In rare cases we can get here via async context-free fence-signals that 3497 * come very late in suspend flow or very early in resume flows. In these 3498 * cases, GuC won't be ready but just skipping it here is fine as these 3499 * pending-destroy-contexts get destroyed totally at GuC reset time at the 3500 * end of suspend.. OR.. this worker can be picked up later on the next 3501 * context destruction trigger after resume-completes 3502 */ 3503 if (!intel_guc_is_ready(guc)) 3504 return; 3505 3506 with_intel_gt_pm(gt, wakeref) 3507 deregister_destroyed_contexts(guc); 3508 } 3509 3510 static void guc_context_destroy(struct kref *kref) 3511 { 3512 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3513 struct intel_guc *guc = ce_to_guc(ce); 3514 unsigned long flags; 3515 bool destroy; 3516 3517 /* 3518 * If the guc_id is invalid this context has been stolen and we can free 3519 * it immediately. Also can be freed immediately if the context is not 3520 * registered with the GuC or the GuC is in the middle of a reset. 3521 */ 3522 spin_lock_irqsave(&guc->submission_state.lock, flags); 3523 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || 3524 !ctx_id_mapped(guc, ce->guc_id.id); 3525 if (likely(!destroy)) { 3526 if (!list_empty(&ce->guc_id.link)) 3527 list_del_init(&ce->guc_id.link); 3528 list_add_tail(&ce->destroyed_link, 3529 &guc->submission_state.destroyed_contexts); 3530 } else { 3531 __release_guc_id(guc, ce); 3532 } 3533 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3534 if (unlikely(destroy)) { 3535 __guc_context_destroy(ce); 3536 return; 3537 } 3538 3539 /* 3540 * We use a worker to issue the H2G to deregister the context as we can 3541 * take the GT PM for the first time which isn't allowed from an atomic 3542 * context. 3543 */ 3544 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); 3545 } 3546 3547 static int guc_context_alloc(struct intel_context *ce) 3548 { 3549 return lrc_alloc(ce, ce->engine); 3550 } 3551 3552 static void __guc_context_set_prio(struct intel_guc *guc, 3553 struct intel_context *ce) 3554 { 3555 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { 3556 struct context_policy policy; 3557 3558 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 3559 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 3560 __guc_context_set_context_policies(guc, &policy, true); 3561 } else { 3562 u32 action[] = { 3563 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY, 3564 ce->guc_id.id, 3565 ce->guc_state.prio, 3566 }; 3567 3568 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3569 } 3570 } 3571 3572 static void guc_context_set_prio(struct intel_guc *guc, 3573 struct intel_context *ce, 3574 u8 prio) 3575 { 3576 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || 3577 prio > GUC_CLIENT_PRIORITY_NORMAL); 3578 lockdep_assert_held(&ce->guc_state.lock); 3579 3580 if (ce->guc_state.prio == prio || submission_disabled(guc) || 3581 !context_registered(ce)) { 3582 ce->guc_state.prio = prio; 3583 return; 3584 } 3585 3586 ce->guc_state.prio = prio; 3587 __guc_context_set_prio(guc, ce); 3588 3589 trace_intel_context_set_prio(ce); 3590 } 3591 3592 static inline u8 map_i915_prio_to_guc_prio(int prio) 3593 { 3594 if (prio == I915_PRIORITY_NORMAL) 3595 return GUC_CLIENT_PRIORITY_KMD_NORMAL; 3596 else if (prio < I915_PRIORITY_NORMAL) 3597 return GUC_CLIENT_PRIORITY_NORMAL; 3598 else if (prio < I915_PRIORITY_DISPLAY) 3599 return GUC_CLIENT_PRIORITY_HIGH; 3600 else 3601 return GUC_CLIENT_PRIORITY_KMD_HIGH; 3602 } 3603 3604 static inline void add_context_inflight_prio(struct intel_context *ce, 3605 u8 guc_prio) 3606 { 3607 lockdep_assert_held(&ce->guc_state.lock); 3608 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3609 3610 ++ce->guc_state.prio_count[guc_prio]; 3611 3612 /* Overflow protection */ 3613 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3614 } 3615 3616 static inline void sub_context_inflight_prio(struct intel_context *ce, 3617 u8 guc_prio) 3618 { 3619 lockdep_assert_held(&ce->guc_state.lock); 3620 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3621 3622 /* Underflow protection */ 3623 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3624 3625 --ce->guc_state.prio_count[guc_prio]; 3626 } 3627 3628 static inline void update_context_prio(struct intel_context *ce) 3629 { 3630 struct intel_guc *guc = &ce->engine->gt->uc.guc; 3631 int i; 3632 3633 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); 3634 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); 3635 3636 lockdep_assert_held(&ce->guc_state.lock); 3637 3638 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { 3639 if (ce->guc_state.prio_count[i]) { 3640 guc_context_set_prio(guc, ce, i); 3641 break; 3642 } 3643 } 3644 } 3645 3646 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) 3647 { 3648 /* Lower value is higher priority */ 3649 return new_guc_prio < old_guc_prio; 3650 } 3651 3652 static void add_to_context(struct i915_request *rq) 3653 { 3654 struct intel_context *ce = request_to_scheduling_context(rq); 3655 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); 3656 3657 GEM_BUG_ON(intel_context_is_child(ce)); 3658 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); 3659 3660 spin_lock(&ce->guc_state.lock); 3661 list_move_tail(&rq->sched.link, &ce->guc_state.requests); 3662 3663 if (rq->guc_prio == GUC_PRIO_INIT) { 3664 rq->guc_prio = new_guc_prio; 3665 add_context_inflight_prio(ce, rq->guc_prio); 3666 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { 3667 sub_context_inflight_prio(ce, rq->guc_prio); 3668 rq->guc_prio = new_guc_prio; 3669 add_context_inflight_prio(ce, rq->guc_prio); 3670 } 3671 update_context_prio(ce); 3672 3673 spin_unlock(&ce->guc_state.lock); 3674 } 3675 3676 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) 3677 { 3678 lockdep_assert_held(&ce->guc_state.lock); 3679 3680 if (rq->guc_prio != GUC_PRIO_INIT && 3681 rq->guc_prio != GUC_PRIO_FINI) { 3682 sub_context_inflight_prio(ce, rq->guc_prio); 3683 update_context_prio(ce); 3684 } 3685 rq->guc_prio = GUC_PRIO_FINI; 3686 } 3687 3688 static void remove_from_context(struct i915_request *rq) 3689 { 3690 struct intel_context *ce = request_to_scheduling_context(rq); 3691 3692 GEM_BUG_ON(intel_context_is_child(ce)); 3693 3694 spin_lock_irq(&ce->guc_state.lock); 3695 3696 list_del_init(&rq->sched.link); 3697 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 3698 3699 /* Prevent further __await_execution() registering a cb, then flush */ 3700 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 3701 3702 guc_prio_fini(rq, ce); 3703 3704 spin_unlock_irq(&ce->guc_state.lock); 3705 3706 atomic_dec(&ce->guc_id.ref); 3707 i915_request_notify_execute_cb_imm(rq); 3708 } 3709 3710 static const struct intel_context_ops guc_context_ops = { 3711 .flags = COPS_RUNTIME_CYCLES, 3712 .alloc = guc_context_alloc, 3713 3714 .close = guc_context_close, 3715 3716 .pre_pin = guc_context_pre_pin, 3717 .pin = guc_context_pin, 3718 .unpin = guc_context_unpin, 3719 .post_unpin = guc_context_post_unpin, 3720 3721 .revoke = guc_context_revoke, 3722 3723 .cancel_request = guc_context_cancel_request, 3724 3725 .enter = intel_context_enter_engine, 3726 .exit = intel_context_exit_engine, 3727 3728 .sched_disable = guc_context_sched_disable, 3729 3730 .update_stats = guc_context_update_stats, 3731 3732 .reset = lrc_reset, 3733 .destroy = guc_context_destroy, 3734 3735 .create_virtual = guc_create_virtual, 3736 .create_parallel = guc_create_parallel, 3737 }; 3738 3739 static void submit_work_cb(struct irq_work *wrk) 3740 { 3741 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); 3742 3743 might_lock(&rq->engine->sched_engine->lock); 3744 i915_sw_fence_complete(&rq->submit); 3745 } 3746 3747 static void __guc_signal_context_fence(struct intel_context *ce) 3748 { 3749 struct i915_request *rq, *rn; 3750 3751 lockdep_assert_held(&ce->guc_state.lock); 3752 3753 if (!list_empty(&ce->guc_state.fences)) 3754 trace_intel_context_fence_release(ce); 3755 3756 /* 3757 * Use an IRQ to ensure locking order of sched_engine->lock -> 3758 * ce->guc_state.lock is preserved. 3759 */ 3760 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, 3761 guc_fence_link) { 3762 list_del(&rq->guc_fence_link); 3763 irq_work_queue(&rq->submit_work); 3764 } 3765 3766 INIT_LIST_HEAD(&ce->guc_state.fences); 3767 } 3768 3769 static void guc_signal_context_fence(struct intel_context *ce) 3770 { 3771 unsigned long flags; 3772 3773 GEM_BUG_ON(intel_context_is_child(ce)); 3774 3775 spin_lock_irqsave(&ce->guc_state.lock, flags); 3776 clr_context_wait_for_deregister_to_register(ce); 3777 __guc_signal_context_fence(ce); 3778 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3779 } 3780 3781 static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 3782 { 3783 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 3784 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) && 3785 !submission_disabled(ce_to_guc(ce)); 3786 } 3787 3788 static void guc_context_init(struct intel_context *ce) 3789 { 3790 const struct i915_gem_context *ctx; 3791 int prio = I915_CONTEXT_DEFAULT_PRIORITY; 3792 3793 rcu_read_lock(); 3794 ctx = rcu_dereference(ce->gem_context); 3795 if (ctx) 3796 prio = ctx->sched.priority; 3797 rcu_read_unlock(); 3798 3799 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); 3800 3801 INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work, 3802 __delay_sched_disable); 3803 3804 set_bit(CONTEXT_GUC_INIT, &ce->flags); 3805 } 3806 3807 static int guc_request_alloc(struct i915_request *rq) 3808 { 3809 struct intel_context *ce = request_to_scheduling_context(rq); 3810 struct intel_guc *guc = ce_to_guc(ce); 3811 unsigned long flags; 3812 int ret; 3813 3814 GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 3815 3816 /* 3817 * Flush enough space to reduce the likelihood of waiting after 3818 * we start building the request - in which case we will just 3819 * have to repeat work. 3820 */ 3821 rq->reserved_space += GUC_REQUEST_SIZE; 3822 3823 /* 3824 * Note that after this point, we have committed to using 3825 * this request as it is being used to both track the 3826 * state of engine initialisation and liveness of the 3827 * golden renderstate above. Think twice before you try 3828 * to cancel/unwind this request now. 3829 */ 3830 3831 /* Unconditionally invalidate GPU caches and TLBs. */ 3832 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 3833 if (ret) 3834 return ret; 3835 3836 rq->reserved_space -= GUC_REQUEST_SIZE; 3837 3838 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) 3839 guc_context_init(ce); 3840 3841 /* 3842 * If the context gets closed while the execbuf is ongoing, the context 3843 * close code will race with the below code to cancel the delayed work. 3844 * If the context close wins the race and cancels the work, it will 3845 * immediately call the sched disable (see guc_context_close), so there 3846 * is a chance we can get past this check while the sched_disable code 3847 * is being executed. To make sure that code completes before we check 3848 * the status further down, we wait for the close process to complete. 3849 * Else, this code path could send a request down thinking that the 3850 * context is still in a schedule-enable mode while the GuC ends up 3851 * dropping the request completely because the disable did go from the 3852 * context_close path right to GuC just prior. In the event the CT is 3853 * full, we could potentially need to wait up to 1.5 seconds. 3854 */ 3855 if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work)) 3856 intel_context_sched_disable_unpin(ce); 3857 else if (intel_context_is_closed(ce)) 3858 if (wait_for(context_close_done(ce), 1500)) 3859 guc_warn(guc, "timed out waiting on context sched close before realloc\n"); 3860 /* 3861 * Call pin_guc_id here rather than in the pinning step as with 3862 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 3863 * guc_id and creating horrible race conditions. This is especially bad 3864 * when guc_id are being stolen due to over subscription. By the time 3865 * this function is reached, it is guaranteed that the guc_id will be 3866 * persistent until the generated request is retired. Thus, sealing these 3867 * race conditions. It is still safe to fail here if guc_id are 3868 * exhausted and return -EAGAIN to the user indicating that they can try 3869 * again in the future. 3870 * 3871 * There is no need for a lock here as the timeline mutex ensures at 3872 * most one context can be executing this code path at once. The 3873 * guc_id_ref is incremented once for every request in flight and 3874 * decremented on each retire. When it is zero, a lock around the 3875 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 3876 */ 3877 if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) 3878 goto out; 3879 3880 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 3881 if (unlikely(ret < 0)) 3882 return ret; 3883 if (context_needs_register(ce, !!ret)) { 3884 ret = try_context_registration(ce, true); 3885 if (unlikely(ret)) { /* unwind */ 3886 if (ret == -EPIPE) { 3887 disable_submission(guc); 3888 goto out; /* GPU will be reset */ 3889 } 3890 atomic_dec(&ce->guc_id.ref); 3891 unpin_guc_id(guc, ce); 3892 return ret; 3893 } 3894 } 3895 3896 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 3897 3898 out: 3899 /* 3900 * We block all requests on this context if a G2H is pending for a 3901 * schedule disable or context deregistration as the GuC will fail a 3902 * schedule enable or context registration if either G2H is pending 3903 * respectfully. Once a G2H returns, the fence is released that is 3904 * blocking these requests (see guc_signal_context_fence). 3905 */ 3906 spin_lock_irqsave(&ce->guc_state.lock, flags); 3907 if (context_wait_for_deregister_to_register(ce) || 3908 context_pending_disable(ce)) { 3909 init_irq_work(&rq->submit_work, submit_work_cb); 3910 i915_sw_fence_await(&rq->submit); 3911 3912 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); 3913 } 3914 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3915 3916 return 0; 3917 } 3918 3919 static int guc_virtual_context_pre_pin(struct intel_context *ce, 3920 struct i915_gem_ww_ctx *ww, 3921 void **vaddr) 3922 { 3923 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3924 3925 return __guc_context_pre_pin(ce, engine, ww, vaddr); 3926 } 3927 3928 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) 3929 { 3930 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3931 int ret = __guc_context_pin(ce, engine, vaddr); 3932 intel_engine_mask_t tmp, mask = ce->engine->mask; 3933 3934 if (likely(!ret)) 3935 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3936 intel_engine_pm_get(engine); 3937 3938 return ret; 3939 } 3940 3941 static void guc_virtual_context_unpin(struct intel_context *ce) 3942 { 3943 intel_engine_mask_t tmp, mask = ce->engine->mask; 3944 struct intel_engine_cs *engine; 3945 struct intel_guc *guc = ce_to_guc(ce); 3946 3947 GEM_BUG_ON(context_enabled(ce)); 3948 GEM_BUG_ON(intel_context_is_barrier(ce)); 3949 3950 unpin_guc_id(guc, ce); 3951 lrc_unpin(ce); 3952 3953 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3954 intel_engine_pm_put_async(engine); 3955 } 3956 3957 static void guc_virtual_context_enter(struct intel_context *ce) 3958 { 3959 intel_engine_mask_t tmp, mask = ce->engine->mask; 3960 struct intel_engine_cs *engine; 3961 3962 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3963 intel_engine_pm_get(engine); 3964 3965 intel_timeline_enter(ce->timeline); 3966 } 3967 3968 static void guc_virtual_context_exit(struct intel_context *ce) 3969 { 3970 intel_engine_mask_t tmp, mask = ce->engine->mask; 3971 struct intel_engine_cs *engine; 3972 3973 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3974 intel_engine_pm_put(engine); 3975 3976 intel_timeline_exit(ce->timeline); 3977 } 3978 3979 static int guc_virtual_context_alloc(struct intel_context *ce) 3980 { 3981 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3982 3983 return lrc_alloc(ce, engine); 3984 } 3985 3986 static const struct intel_context_ops virtual_guc_context_ops = { 3987 .flags = COPS_RUNTIME_CYCLES, 3988 .alloc = guc_virtual_context_alloc, 3989 3990 .close = guc_context_close, 3991 3992 .pre_pin = guc_virtual_context_pre_pin, 3993 .pin = guc_virtual_context_pin, 3994 .unpin = guc_virtual_context_unpin, 3995 .post_unpin = guc_context_post_unpin, 3996 3997 .revoke = guc_context_revoke, 3998 3999 .cancel_request = guc_context_cancel_request, 4000 4001 .enter = guc_virtual_context_enter, 4002 .exit = guc_virtual_context_exit, 4003 4004 .sched_disable = guc_context_sched_disable, 4005 .update_stats = guc_context_update_stats, 4006 4007 .destroy = guc_context_destroy, 4008 4009 .get_sibling = guc_virtual_get_sibling, 4010 }; 4011 4012 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 4013 { 4014 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 4015 struct intel_guc *guc = ce_to_guc(ce); 4016 int ret; 4017 4018 GEM_BUG_ON(!intel_context_is_parent(ce)); 4019 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 4020 4021 ret = pin_guc_id(guc, ce); 4022 if (unlikely(ret < 0)) 4023 return ret; 4024 4025 return __guc_context_pin(ce, engine, vaddr); 4026 } 4027 4028 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 4029 { 4030 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 4031 4032 GEM_BUG_ON(!intel_context_is_child(ce)); 4033 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 4034 4035 __intel_context_pin(ce->parallel.parent); 4036 return __guc_context_pin(ce, engine, vaddr); 4037 } 4038 4039 static void guc_parent_context_unpin(struct intel_context *ce) 4040 { 4041 struct intel_guc *guc = ce_to_guc(ce); 4042 4043 GEM_BUG_ON(context_enabled(ce)); 4044 GEM_BUG_ON(intel_context_is_barrier(ce)); 4045 GEM_BUG_ON(!intel_context_is_parent(ce)); 4046 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 4047 4048 unpin_guc_id(guc, ce); 4049 lrc_unpin(ce); 4050 } 4051 4052 static void guc_child_context_unpin(struct intel_context *ce) 4053 { 4054 GEM_BUG_ON(context_enabled(ce)); 4055 GEM_BUG_ON(intel_context_is_barrier(ce)); 4056 GEM_BUG_ON(!intel_context_is_child(ce)); 4057 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 4058 4059 lrc_unpin(ce); 4060 } 4061 4062 static void guc_child_context_post_unpin(struct intel_context *ce) 4063 { 4064 GEM_BUG_ON(!intel_context_is_child(ce)); 4065 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); 4066 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 4067 4068 lrc_post_unpin(ce); 4069 intel_context_unpin(ce->parallel.parent); 4070 } 4071 4072 static void guc_child_context_destroy(struct kref *kref) 4073 { 4074 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 4075 4076 __guc_context_destroy(ce); 4077 } 4078 4079 static const struct intel_context_ops virtual_parent_context_ops = { 4080 .alloc = guc_virtual_context_alloc, 4081 4082 .close = guc_context_close, 4083 4084 .pre_pin = guc_context_pre_pin, 4085 .pin = guc_parent_context_pin, 4086 .unpin = guc_parent_context_unpin, 4087 .post_unpin = guc_context_post_unpin, 4088 4089 .revoke = guc_context_revoke, 4090 4091 .cancel_request = guc_context_cancel_request, 4092 4093 .enter = guc_virtual_context_enter, 4094 .exit = guc_virtual_context_exit, 4095 4096 .sched_disable = guc_context_sched_disable, 4097 4098 .destroy = guc_context_destroy, 4099 4100 .get_sibling = guc_virtual_get_sibling, 4101 }; 4102 4103 static const struct intel_context_ops virtual_child_context_ops = { 4104 .alloc = guc_virtual_context_alloc, 4105 4106 .pre_pin = guc_context_pre_pin, 4107 .pin = guc_child_context_pin, 4108 .unpin = guc_child_context_unpin, 4109 .post_unpin = guc_child_context_post_unpin, 4110 4111 .cancel_request = guc_context_cancel_request, 4112 4113 .enter = guc_virtual_context_enter, 4114 .exit = guc_virtual_context_exit, 4115 4116 .destroy = guc_child_context_destroy, 4117 4118 .get_sibling = guc_virtual_get_sibling, 4119 }; 4120 4121 /* 4122 * The below override of the breadcrumbs is enabled when the user configures a 4123 * context for parallel submission (multi-lrc, parent-child). 4124 * 4125 * The overridden breadcrumbs implements an algorithm which allows the GuC to 4126 * safely preempt all the hw contexts configured for parallel submission 4127 * between each BB. The contract between the i915 and GuC is if the parent 4128 * context can be preempted, all the children can be preempted, and the GuC will 4129 * always try to preempt the parent before the children. A handshake between the 4130 * parent / children breadcrumbs ensures the i915 holds up its end of the deal 4131 * creating a window to preempt between each set of BBs. 4132 */ 4133 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 4134 u64 offset, u32 len, 4135 const unsigned int flags); 4136 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 4137 u64 offset, u32 len, 4138 const unsigned int flags); 4139 static u32 * 4140 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4141 u32 *cs); 4142 static u32 * 4143 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 4144 u32 *cs); 4145 4146 static struct intel_context * 4147 guc_create_parallel(struct intel_engine_cs **engines, 4148 unsigned int num_siblings, 4149 unsigned int width) 4150 { 4151 struct intel_engine_cs **siblings = NULL; 4152 struct intel_context *parent = NULL, *ce, *err; 4153 int i, j; 4154 4155 siblings = kmalloc_array(num_siblings, 4156 sizeof(*siblings), 4157 GFP_KERNEL); 4158 if (!siblings) 4159 return ERR_PTR(-ENOMEM); 4160 4161 for (i = 0; i < width; ++i) { 4162 for (j = 0; j < num_siblings; ++j) 4163 siblings[j] = engines[i * num_siblings + j]; 4164 4165 ce = intel_engine_create_virtual(siblings, num_siblings, 4166 FORCE_VIRTUAL); 4167 if (IS_ERR(ce)) { 4168 err = ERR_CAST(ce); 4169 goto unwind; 4170 } 4171 4172 if (i == 0) { 4173 parent = ce; 4174 parent->ops = &virtual_parent_context_ops; 4175 } else { 4176 ce->ops = &virtual_child_context_ops; 4177 intel_context_bind_parent_child(parent, ce); 4178 } 4179 } 4180 4181 parent->parallel.fence_context = dma_fence_context_alloc(1); 4182 4183 parent->engine->emit_bb_start = 4184 emit_bb_start_parent_no_preempt_mid_batch; 4185 parent->engine->emit_fini_breadcrumb = 4186 emit_fini_breadcrumb_parent_no_preempt_mid_batch; 4187 parent->engine->emit_fini_breadcrumb_dw = 4188 12 + 4 * parent->parallel.number_children; 4189 for_each_child(parent, ce) { 4190 ce->engine->emit_bb_start = 4191 emit_bb_start_child_no_preempt_mid_batch; 4192 ce->engine->emit_fini_breadcrumb = 4193 emit_fini_breadcrumb_child_no_preempt_mid_batch; 4194 ce->engine->emit_fini_breadcrumb_dw = 16; 4195 } 4196 4197 kfree(siblings); 4198 return parent; 4199 4200 unwind: 4201 if (parent) 4202 intel_context_put(parent); 4203 kfree(siblings); 4204 return err; 4205 } 4206 4207 static bool 4208 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) 4209 { 4210 struct intel_engine_cs *sibling; 4211 intel_engine_mask_t tmp, mask = b->engine_mask; 4212 bool result = false; 4213 4214 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 4215 result |= intel_engine_irq_enable(sibling); 4216 4217 return result; 4218 } 4219 4220 static void 4221 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) 4222 { 4223 struct intel_engine_cs *sibling; 4224 intel_engine_mask_t tmp, mask = b->engine_mask; 4225 4226 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 4227 intel_engine_irq_disable(sibling); 4228 } 4229 4230 static void guc_init_breadcrumbs(struct intel_engine_cs *engine) 4231 { 4232 int i; 4233 4234 /* 4235 * In GuC submission mode we do not know which physical engine a request 4236 * will be scheduled on, this creates a problem because the breadcrumb 4237 * interrupt is per physical engine. To work around this we attach 4238 * requests and direct all breadcrumb interrupts to the first instance 4239 * of an engine per class. In addition all breadcrumb interrupts are 4240 * enabled / disabled across an engine class in unison. 4241 */ 4242 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { 4243 struct intel_engine_cs *sibling = 4244 engine->gt->engine_class[engine->class][i]; 4245 4246 if (sibling) { 4247 if (engine->breadcrumbs != sibling->breadcrumbs) { 4248 intel_breadcrumbs_put(engine->breadcrumbs); 4249 engine->breadcrumbs = 4250 intel_breadcrumbs_get(sibling->breadcrumbs); 4251 } 4252 break; 4253 } 4254 } 4255 4256 if (engine->breadcrumbs) { 4257 engine->breadcrumbs->engine_mask |= engine->mask; 4258 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; 4259 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; 4260 } 4261 } 4262 4263 static void guc_bump_inflight_request_prio(struct i915_request *rq, 4264 int prio) 4265 { 4266 struct intel_context *ce = request_to_scheduling_context(rq); 4267 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); 4268 4269 /* Short circuit function */ 4270 if (prio < I915_PRIORITY_NORMAL || 4271 rq->guc_prio == GUC_PRIO_FINI || 4272 (rq->guc_prio != GUC_PRIO_INIT && 4273 !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) 4274 return; 4275 4276 spin_lock(&ce->guc_state.lock); 4277 if (rq->guc_prio != GUC_PRIO_FINI) { 4278 if (rq->guc_prio != GUC_PRIO_INIT) 4279 sub_context_inflight_prio(ce, rq->guc_prio); 4280 rq->guc_prio = new_guc_prio; 4281 add_context_inflight_prio(ce, rq->guc_prio); 4282 update_context_prio(ce); 4283 } 4284 spin_unlock(&ce->guc_state.lock); 4285 } 4286 4287 static void guc_retire_inflight_request_prio(struct i915_request *rq) 4288 { 4289 struct intel_context *ce = request_to_scheduling_context(rq); 4290 4291 spin_lock(&ce->guc_state.lock); 4292 guc_prio_fini(rq, ce); 4293 spin_unlock(&ce->guc_state.lock); 4294 } 4295 4296 static void sanitize_hwsp(struct intel_engine_cs *engine) 4297 { 4298 struct intel_timeline *tl; 4299 4300 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 4301 intel_timeline_reset_seqno(tl); 4302 } 4303 4304 static void guc_sanitize(struct intel_engine_cs *engine) 4305 { 4306 /* 4307 * Poison residual state on resume, in case the suspend didn't! 4308 * 4309 * We have to assume that across suspend/resume (or other loss 4310 * of control) that the contents of our pinned buffers has been 4311 * lost, replaced by garbage. Since this doesn't always happen, 4312 * let's poison such state so that we more quickly spot when 4313 * we falsely assume it has been preserved. 4314 */ 4315 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 4316 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 4317 4318 /* 4319 * The kernel_context HWSP is stored in the status_page. As above, 4320 * that may be lost on resume/initialisation, and so we need to 4321 * reset the value in the HWSP. 4322 */ 4323 sanitize_hwsp(engine); 4324 4325 /* And scrub the dirty cachelines for the HWSP */ 4326 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); 4327 4328 intel_engine_reset_pinned_contexts(engine); 4329 } 4330 4331 static void setup_hwsp(struct intel_engine_cs *engine) 4332 { 4333 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 4334 4335 ENGINE_WRITE_FW(engine, 4336 RING_HWS_PGA, 4337 i915_ggtt_offset(engine->status_page.vma)); 4338 } 4339 4340 static void start_engine(struct intel_engine_cs *engine) 4341 { 4342 ENGINE_WRITE_FW(engine, 4343 RING_MODE_GEN7, 4344 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 4345 4346 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 4347 ENGINE_POSTING_READ(engine, RING_MI_MODE); 4348 } 4349 4350 static int guc_resume(struct intel_engine_cs *engine) 4351 { 4352 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 4353 4354 intel_mocs_init_engine(engine); 4355 4356 intel_breadcrumbs_reset(engine->breadcrumbs); 4357 4358 setup_hwsp(engine); 4359 start_engine(engine); 4360 4361 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) 4362 xehp_enable_ccs_engines(engine); 4363 4364 return 0; 4365 } 4366 4367 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) 4368 { 4369 return !sched_engine->tasklet.callback; 4370 } 4371 4372 static void guc_set_default_submission(struct intel_engine_cs *engine) 4373 { 4374 engine->submit_request = guc_submit_request; 4375 } 4376 4377 static inline int guc_kernel_context_pin(struct intel_guc *guc, 4378 struct intel_context *ce) 4379 { 4380 int ret; 4381 4382 /* 4383 * Note: we purposefully do not check the returns below because 4384 * the registration can only fail if a reset is just starting. 4385 * This is called at the end of reset so presumably another reset 4386 * isn't happening and even it did this code would be run again. 4387 */ 4388 4389 if (context_guc_id_invalid(ce)) { 4390 ret = pin_guc_id(guc, ce); 4391 4392 if (ret < 0) 4393 return ret; 4394 } 4395 4396 if (!test_bit(CONTEXT_GUC_INIT, &ce->flags)) 4397 guc_context_init(ce); 4398 4399 ret = try_context_registration(ce, true); 4400 if (ret) 4401 unpin_guc_id(guc, ce); 4402 4403 return ret; 4404 } 4405 4406 static inline int guc_init_submission(struct intel_guc *guc) 4407 { 4408 struct intel_gt *gt = guc_to_gt(guc); 4409 struct intel_engine_cs *engine; 4410 enum intel_engine_id id; 4411 4412 /* make sure all descriptors are clean... */ 4413 xa_destroy(&guc->context_lookup); 4414 4415 /* 4416 * A reset might have occurred while we had a pending stalled request, 4417 * so make sure we clean that up. 4418 */ 4419 guc->stalled_request = NULL; 4420 guc->submission_stall_reason = STALL_NONE; 4421 4422 /* 4423 * Some contexts might have been pinned before we enabled GuC 4424 * submission, so we need to add them to the GuC bookeeping. 4425 * Also, after a reset the of the GuC we want to make sure that the 4426 * information shared with GuC is properly reset. The kernel LRCs are 4427 * not attached to the gem_context, so they need to be added separately. 4428 */ 4429 for_each_engine(engine, gt, id) { 4430 struct intel_context *ce; 4431 4432 list_for_each_entry(ce, &engine->pinned_contexts_list, 4433 pinned_contexts_link) { 4434 int ret = guc_kernel_context_pin(guc, ce); 4435 4436 if (ret) { 4437 /* No point in trying to clean up as i915 will wedge on failure */ 4438 return ret; 4439 } 4440 } 4441 } 4442 4443 return 0; 4444 } 4445 4446 static void guc_release(struct intel_engine_cs *engine) 4447 { 4448 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 4449 4450 intel_engine_cleanup_common(engine); 4451 lrc_fini_wa_ctx(engine); 4452 } 4453 4454 static void virtual_guc_bump_serial(struct intel_engine_cs *engine) 4455 { 4456 struct intel_engine_cs *e; 4457 intel_engine_mask_t tmp, mask = engine->mask; 4458 4459 for_each_engine_masked(e, engine->gt, mask, tmp) 4460 e->serial++; 4461 } 4462 4463 static void guc_default_vfuncs(struct intel_engine_cs *engine) 4464 { 4465 /* Default vfuncs which can be overridden by each engine. */ 4466 4467 engine->resume = guc_resume; 4468 4469 engine->cops = &guc_context_ops; 4470 engine->request_alloc = guc_request_alloc; 4471 engine->add_active_request = add_to_context; 4472 engine->remove_active_request = remove_from_context; 4473 4474 engine->sched_engine->schedule = i915_schedule; 4475 4476 engine->reset.prepare = guc_engine_reset_prepare; 4477 engine->reset.rewind = guc_rewind_nop; 4478 engine->reset.cancel = guc_reset_nop; 4479 engine->reset.finish = guc_reset_nop; 4480 4481 engine->emit_flush = gen8_emit_flush_xcs; 4482 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 4483 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 4484 if (GRAPHICS_VER(engine->i915) >= 12) { 4485 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 4486 engine->emit_flush = gen12_emit_flush_xcs; 4487 } 4488 engine->set_default_submission = guc_set_default_submission; 4489 engine->busyness = guc_engine_busyness; 4490 4491 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 4492 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 4493 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 4494 4495 /* Wa_14014475959:dg2 */ 4496 if (engine->class == COMPUTE_CLASS) 4497 if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || 4498 IS_DG2(engine->i915)) 4499 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; 4500 4501 /* 4502 * TODO: GuC supports timeslicing and semaphores as well, but they're 4503 * handled by the firmware so some minor tweaks are required before 4504 * enabling. 4505 * 4506 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 4507 */ 4508 4509 engine->emit_bb_start = gen8_emit_bb_start; 4510 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 4511 engine->emit_bb_start = xehp_emit_bb_start; 4512 } 4513 4514 static void rcs_submission_override(struct intel_engine_cs *engine) 4515 { 4516 switch (GRAPHICS_VER(engine->i915)) { 4517 case 12: 4518 engine->emit_flush = gen12_emit_flush_rcs; 4519 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 4520 break; 4521 case 11: 4522 engine->emit_flush = gen11_emit_flush_rcs; 4523 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 4524 break; 4525 default: 4526 engine->emit_flush = gen8_emit_flush_rcs; 4527 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 4528 break; 4529 } 4530 } 4531 4532 static inline void guc_default_irqs(struct intel_engine_cs *engine) 4533 { 4534 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; 4535 intel_engine_set_irq_handler(engine, cs_irq_handler); 4536 } 4537 4538 static void guc_sched_engine_destroy(struct kref *kref) 4539 { 4540 struct i915_sched_engine *sched_engine = 4541 container_of(kref, typeof(*sched_engine), ref); 4542 struct intel_guc *guc = sched_engine->private_data; 4543 4544 guc->sched_engine = NULL; 4545 tasklet_kill(&sched_engine->tasklet); /* flush the callback */ 4546 kfree(sched_engine); 4547 } 4548 4549 int intel_guc_submission_setup(struct intel_engine_cs *engine) 4550 { 4551 struct drm_i915_private *i915 = engine->i915; 4552 struct intel_guc *guc = &engine->gt->uc.guc; 4553 4554 /* 4555 * The setup relies on several assumptions (e.g. irqs always enabled) 4556 * that are only valid on gen11+ 4557 */ 4558 GEM_BUG_ON(GRAPHICS_VER(i915) < 11); 4559 4560 if (!guc->sched_engine) { 4561 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 4562 if (!guc->sched_engine) 4563 return -ENOMEM; 4564 4565 guc->sched_engine->schedule = i915_schedule; 4566 guc->sched_engine->disabled = guc_sched_engine_disabled; 4567 guc->sched_engine->private_data = guc; 4568 guc->sched_engine->destroy = guc_sched_engine_destroy; 4569 guc->sched_engine->bump_inflight_request_prio = 4570 guc_bump_inflight_request_prio; 4571 guc->sched_engine->retire_inflight_request_prio = 4572 guc_retire_inflight_request_prio; 4573 tasklet_setup(&guc->sched_engine->tasklet, 4574 guc_submission_tasklet); 4575 } 4576 i915_sched_engine_put(engine->sched_engine); 4577 engine->sched_engine = i915_sched_engine_get(guc->sched_engine); 4578 4579 guc_default_vfuncs(engine); 4580 guc_default_irqs(engine); 4581 guc_init_breadcrumbs(engine); 4582 4583 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) 4584 rcs_submission_override(engine); 4585 4586 lrc_init_wa_ctx(engine); 4587 4588 /* Finally, take ownership and responsibility for cleanup! */ 4589 engine->sanitize = guc_sanitize; 4590 engine->release = guc_release; 4591 4592 return 0; 4593 } 4594 4595 struct scheduling_policy { 4596 /* internal data */ 4597 u32 max_words, num_words; 4598 u32 count; 4599 /* API data */ 4600 struct guc_update_scheduling_policy h2g; 4601 }; 4602 4603 static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy) 4604 { 4605 u32 *start = (void *)&policy->h2g; 4606 u32 *end = policy->h2g.data + policy->num_words; 4607 size_t delta = end - start; 4608 4609 return delta; 4610 } 4611 4612 static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy) 4613 { 4614 policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 4615 policy->max_words = ARRAY_SIZE(policy->h2g.data); 4616 policy->num_words = 0; 4617 policy->count = 0; 4618 4619 return policy; 4620 } 4621 4622 static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy, 4623 u32 action, u32 *data, u32 len) 4624 { 4625 u32 *klv_ptr = policy->h2g.data + policy->num_words; 4626 4627 GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words); 4628 *(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) | 4629 FIELD_PREP(GUC_KLV_0_LEN, len); 4630 memcpy(klv_ptr, data, sizeof(u32) * len); 4631 policy->num_words += 1 + len; 4632 policy->count++; 4633 } 4634 4635 static int __guc_action_set_scheduling_policies(struct intel_guc *guc, 4636 struct scheduling_policy *policy) 4637 { 4638 int ret; 4639 4640 ret = intel_guc_send(guc, (u32 *)&policy->h2g, 4641 __guc_scheduling_policy_action_size(policy)); 4642 if (ret < 0) { 4643 guc_probe_error(guc, "Failed to configure global scheduling policies: %pe!\n", 4644 ERR_PTR(ret)); 4645 return ret; 4646 } 4647 4648 if (ret != policy->count) { 4649 guc_warn(guc, "global scheduler policy processed %d of %d KLVs!", 4650 ret, policy->count); 4651 if (ret > policy->count) 4652 return -EPROTO; 4653 } 4654 4655 return 0; 4656 } 4657 4658 static int guc_init_global_schedule_policy(struct intel_guc *guc) 4659 { 4660 struct scheduling_policy policy; 4661 struct intel_gt *gt = guc_to_gt(guc); 4662 intel_wakeref_t wakeref; 4663 int ret; 4664 4665 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 4666 return 0; 4667 4668 __guc_scheduling_policy_start_klv(&policy); 4669 4670 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 4671 u32 yield[] = { 4672 GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION, 4673 GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO, 4674 }; 4675 4676 __guc_scheduling_policy_add_klv(&policy, 4677 GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD, 4678 yield, ARRAY_SIZE(yield)); 4679 4680 ret = __guc_action_set_scheduling_policies(guc, &policy); 4681 } 4682 4683 return ret; 4684 } 4685 4686 static void guc_route_semaphores(struct intel_guc *guc, bool to_guc) 4687 { 4688 struct intel_gt *gt = guc_to_gt(guc); 4689 u32 val; 4690 4691 if (GRAPHICS_VER(gt->i915) < 12) 4692 return; 4693 4694 if (to_guc) 4695 val = GUC_SEM_INTR_ROUTE_TO_GUC | GUC_SEM_INTR_ENABLE_ALL; 4696 else 4697 val = 0; 4698 4699 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, val); 4700 } 4701 4702 int intel_guc_submission_enable(struct intel_guc *guc) 4703 { 4704 int ret; 4705 4706 /* Semaphore interrupt enable and route to GuC */ 4707 guc_route_semaphores(guc, true); 4708 4709 ret = guc_init_submission(guc); 4710 if (ret) 4711 goto fail_sem; 4712 4713 ret = guc_init_engine_stats(guc); 4714 if (ret) 4715 goto fail_sem; 4716 4717 ret = guc_init_global_schedule_policy(guc); 4718 if (ret) 4719 goto fail_stats; 4720 4721 return 0; 4722 4723 fail_stats: 4724 guc_fini_engine_stats(guc); 4725 fail_sem: 4726 guc_route_semaphores(guc, false); 4727 return ret; 4728 } 4729 4730 /* Note: By the time we're here, GuC may have already been reset */ 4731 void intel_guc_submission_disable(struct intel_guc *guc) 4732 { 4733 guc_cancel_busyness_worker(guc); 4734 4735 /* Semaphore interrupt disable and route to host */ 4736 guc_route_semaphores(guc, false); 4737 } 4738 4739 static bool __guc_submission_supported(struct intel_guc *guc) 4740 { 4741 /* GuC submission is unavailable for pre-Gen11 */ 4742 return intel_guc_is_supported(guc) && 4743 GRAPHICS_VER(guc_to_i915(guc)) >= 11; 4744 } 4745 4746 static bool __guc_submission_selected(struct intel_guc *guc) 4747 { 4748 struct drm_i915_private *i915 = guc_to_i915(guc); 4749 4750 if (!intel_guc_submission_is_supported(guc)) 4751 return false; 4752 4753 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; 4754 } 4755 4756 int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc) 4757 { 4758 return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc); 4759 } 4760 4761 /* 4762 * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher 4763 * workloads are able to enjoy the latency reduction when delaying the schedule-disable 4764 * operation. This matches the 30fps game-render + encode (real world) workload this 4765 * knob was tested against. 4766 */ 4767 #define SCHED_DISABLE_DELAY_MS 34 4768 4769 /* 4770 * A threshold of 75% is a reasonable starting point considering that real world apps 4771 * generally don't get anywhere near this. 4772 */ 4773 #define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \ 4774 (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4) 4775 4776 void intel_guc_submission_init_early(struct intel_guc *guc) 4777 { 4778 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 4779 4780 spin_lock_init(&guc->submission_state.lock); 4781 INIT_LIST_HEAD(&guc->submission_state.guc_id_list); 4782 ida_init(&guc->submission_state.guc_ids); 4783 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); 4784 INIT_WORK(&guc->submission_state.destroyed_worker, 4785 destroyed_worker_func); 4786 INIT_WORK(&guc->submission_state.reset_fail_worker, 4787 reset_fail_worker_func); 4788 4789 spin_lock_init(&guc->timestamp.lock); 4790 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); 4791 4792 guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS; 4793 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; 4794 guc->submission_state.sched_disable_gucid_threshold = 4795 NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc); 4796 guc->submission_supported = __guc_submission_supported(guc); 4797 guc->submission_selected = __guc_submission_selected(guc); 4798 } 4799 4800 static inline struct intel_context * 4801 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) 4802 { 4803 struct intel_context *ce; 4804 4805 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) { 4806 guc_err(guc, "Invalid ctx_id %u\n", ctx_id); 4807 return NULL; 4808 } 4809 4810 ce = __get_context(guc, ctx_id); 4811 if (unlikely(!ce)) { 4812 guc_err(guc, "Context is NULL, ctx_id %u\n", ctx_id); 4813 return NULL; 4814 } 4815 4816 if (unlikely(intel_context_is_child(ce))) { 4817 guc_err(guc, "Context is child, ctx_id %u\n", ctx_id); 4818 return NULL; 4819 } 4820 4821 return ce; 4822 } 4823 4824 static void wait_wake_outstanding_tlb_g2h(struct intel_guc *guc, u32 seqno) 4825 { 4826 struct intel_guc_tlb_wait *wait; 4827 unsigned long flags; 4828 4829 xa_lock_irqsave(&guc->tlb_lookup, flags); 4830 wait = xa_load(&guc->tlb_lookup, seqno); 4831 4832 if (wait) 4833 wake_up(&wait->wq); 4834 else 4835 guc_dbg(guc, 4836 "Stale TLB invalidation response with seqno %d\n", seqno); 4837 4838 xa_unlock_irqrestore(&guc->tlb_lookup, flags); 4839 } 4840 4841 int intel_guc_tlb_invalidation_done(struct intel_guc *guc, 4842 const u32 *payload, u32 len) 4843 { 4844 if (len < 1) 4845 return -EPROTO; 4846 4847 wait_wake_outstanding_tlb_g2h(guc, payload[0]); 4848 return 0; 4849 } 4850 4851 static long must_wait_woken(struct wait_queue_entry *wq_entry, long timeout) 4852 { 4853 /* 4854 * This is equivalent to wait_woken() with the exception that 4855 * we do not wake up early if the kthread task has been completed. 4856 * As we are called from page reclaim in any task context, 4857 * we may be invoked from stopped kthreads, but we *must* 4858 * complete the wait from the HW. 4859 */ 4860 do { 4861 set_current_state(TASK_UNINTERRUPTIBLE); 4862 if (wq_entry->flags & WQ_FLAG_WOKEN) 4863 break; 4864 4865 timeout = schedule_timeout(timeout); 4866 } while (timeout); 4867 4868 /* See wait_woken() and woken_wake_function() */ 4869 __set_current_state(TASK_RUNNING); 4870 smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); 4871 4872 return timeout; 4873 } 4874 4875 static bool intel_gt_is_enabled(const struct intel_gt *gt) 4876 { 4877 /* Check if GT is wedged or suspended */ 4878 if (intel_gt_is_wedged(gt) || !intel_irqs_enabled(gt->i915)) 4879 return false; 4880 return true; 4881 } 4882 4883 static int guc_send_invalidate_tlb(struct intel_guc *guc, 4884 enum intel_guc_tlb_invalidation_type type) 4885 { 4886 struct intel_guc_tlb_wait _wq, *wq = &_wq; 4887 struct intel_gt *gt = guc_to_gt(guc); 4888 DEFINE_WAIT_FUNC(wait, woken_wake_function); 4889 int err; 4890 u32 seqno; 4891 u32 action[] = { 4892 INTEL_GUC_ACTION_TLB_INVALIDATION, 4893 0, 4894 REG_FIELD_PREP(INTEL_GUC_TLB_INVAL_TYPE_MASK, type) | 4895 REG_FIELD_PREP(INTEL_GUC_TLB_INVAL_MODE_MASK, 4896 INTEL_GUC_TLB_INVAL_MODE_HEAVY) | 4897 INTEL_GUC_TLB_INVAL_FLUSH_CACHE, 4898 }; 4899 u32 size = ARRAY_SIZE(action); 4900 4901 /* 4902 * Early guard against GT enablement. TLB invalidation should not be 4903 * attempted if the GT is disabled due to suspend/wedge. 4904 */ 4905 if (!intel_gt_is_enabled(gt)) 4906 return -EINVAL; 4907 4908 init_waitqueue_head(&_wq.wq); 4909 4910 if (xa_alloc_cyclic_irq(&guc->tlb_lookup, &seqno, wq, 4911 xa_limit_32b, &guc->next_seqno, 4912 GFP_ATOMIC | __GFP_NOWARN) < 0) { 4913 /* Under severe memory pressure? Serialise TLB allocations */ 4914 xa_lock_irq(&guc->tlb_lookup); 4915 wq = xa_load(&guc->tlb_lookup, guc->serial_slot); 4916 wait_event_lock_irq(wq->wq, 4917 !READ_ONCE(wq->busy), 4918 guc->tlb_lookup.xa_lock); 4919 /* 4920 * Update wq->busy under lock to ensure only one waiter can 4921 * issue the TLB invalidation command using the serial slot at a 4922 * time. The condition is set to true before releasing the lock 4923 * so that other caller continue to wait until woken up again. 4924 */ 4925 wq->busy = true; 4926 xa_unlock_irq(&guc->tlb_lookup); 4927 4928 seqno = guc->serial_slot; 4929 } 4930 4931 action[1] = seqno; 4932 4933 add_wait_queue(&wq->wq, &wait); 4934 4935 /* This is a critical reclaim path and thus we must loop here. */ 4936 err = intel_guc_send_busy_loop(guc, action, size, G2H_LEN_DW_INVALIDATE_TLB, true); 4937 if (err) 4938 goto out; 4939 4940 /* 4941 * Late guard against GT enablement. It is not an error for the TLB 4942 * invalidation to time out if the GT is disabled during the process 4943 * due to suspend/wedge. In fact, the TLB invalidation is cancelled 4944 * in this case. 4945 */ 4946 if (!must_wait_woken(&wait, intel_guc_ct_max_queue_time_jiffies()) && 4947 intel_gt_is_enabled(gt)) { 4948 guc_err(guc, 4949 "TLB invalidation response timed out for seqno %u\n", seqno); 4950 err = -ETIME; 4951 } 4952 out: 4953 remove_wait_queue(&wq->wq, &wait); 4954 if (seqno != guc->serial_slot) 4955 xa_erase_irq(&guc->tlb_lookup, seqno); 4956 4957 return err; 4958 } 4959 4960 /* Send a H2G command to invalidate the TLBs at engine level and beyond. */ 4961 int intel_guc_invalidate_tlb_engines(struct intel_guc *guc) 4962 { 4963 return guc_send_invalidate_tlb(guc, INTEL_GUC_TLB_INVAL_ENGINES); 4964 } 4965 4966 /* Send a H2G command to invalidate the GuC's internal TLB. */ 4967 int intel_guc_invalidate_tlb_guc(struct intel_guc *guc) 4968 { 4969 return guc_send_invalidate_tlb(guc, INTEL_GUC_TLB_INVAL_GUC); 4970 } 4971 4972 int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 4973 const u32 *msg, 4974 u32 len) 4975 { 4976 struct intel_context *ce; 4977 u32 ctx_id; 4978 4979 if (unlikely(len < 1)) { 4980 guc_err(guc, "Invalid length %u\n", len); 4981 return -EPROTO; 4982 } 4983 ctx_id = msg[0]; 4984 4985 ce = g2h_context_lookup(guc, ctx_id); 4986 if (unlikely(!ce)) 4987 return -EPROTO; 4988 4989 trace_intel_context_deregister_done(ce); 4990 4991 #ifdef CONFIG_DRM_I915_SELFTEST 4992 if (unlikely(ce->drop_deregister)) { 4993 ce->drop_deregister = false; 4994 return 0; 4995 } 4996 #endif 4997 4998 if (context_wait_for_deregister_to_register(ce)) { 4999 struct intel_runtime_pm *runtime_pm = 5000 &ce->engine->gt->i915->runtime_pm; 5001 intel_wakeref_t wakeref; 5002 5003 /* 5004 * Previous owner of this guc_id has been deregistered, now safe 5005 * register this context. 5006 */ 5007 with_intel_runtime_pm(runtime_pm, wakeref) 5008 register_context(ce, true); 5009 guc_signal_context_fence(ce); 5010 intel_context_put(ce); 5011 } else if (context_destroyed(ce)) { 5012 /* Context has been destroyed */ 5013 intel_gt_pm_put_async_untracked(guc_to_gt(guc)); 5014 release_guc_id(guc, ce); 5015 __guc_context_destroy(ce); 5016 } 5017 5018 decr_outstanding_submission_g2h(guc); 5019 5020 return 0; 5021 } 5022 5023 int intel_guc_sched_done_process_msg(struct intel_guc *guc, 5024 const u32 *msg, 5025 u32 len) 5026 { 5027 struct intel_context *ce; 5028 unsigned long flags; 5029 u32 ctx_id; 5030 5031 if (unlikely(len < 2)) { 5032 guc_err(guc, "Invalid length %u\n", len); 5033 return -EPROTO; 5034 } 5035 ctx_id = msg[0]; 5036 5037 ce = g2h_context_lookup(guc, ctx_id); 5038 if (unlikely(!ce)) 5039 return -EPROTO; 5040 5041 if (unlikely(context_destroyed(ce) || 5042 (!context_pending_enable(ce) && 5043 !context_pending_disable(ce)))) { 5044 guc_err(guc, "Bad context sched_state 0x%x, ctx_id %u\n", 5045 ce->guc_state.sched_state, ctx_id); 5046 return -EPROTO; 5047 } 5048 5049 trace_intel_context_sched_done(ce); 5050 5051 if (context_pending_enable(ce)) { 5052 #ifdef CONFIG_DRM_I915_SELFTEST 5053 if (unlikely(ce->drop_schedule_enable)) { 5054 ce->drop_schedule_enable = false; 5055 return 0; 5056 } 5057 #endif 5058 5059 spin_lock_irqsave(&ce->guc_state.lock, flags); 5060 clr_context_pending_enable(ce); 5061 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 5062 } else if (context_pending_disable(ce)) { 5063 bool banned; 5064 5065 #ifdef CONFIG_DRM_I915_SELFTEST 5066 if (unlikely(ce->drop_schedule_disable)) { 5067 ce->drop_schedule_disable = false; 5068 return 0; 5069 } 5070 #endif 5071 5072 /* 5073 * Unpin must be done before __guc_signal_context_fence, 5074 * otherwise a race exists between the requests getting 5075 * submitted + retired before this unpin completes resulting in 5076 * the pin_count going to zero and the context still being 5077 * enabled. 5078 */ 5079 intel_context_sched_disable_unpin(ce); 5080 5081 spin_lock_irqsave(&ce->guc_state.lock, flags); 5082 banned = context_banned(ce); 5083 clr_context_banned(ce); 5084 clr_context_pending_disable(ce); 5085 __guc_signal_context_fence(ce); 5086 guc_blocked_fence_complete(ce); 5087 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 5088 5089 if (banned) { 5090 guc_cancel_context_requests(ce); 5091 intel_engine_signal_breadcrumbs(ce->engine); 5092 } 5093 } 5094 5095 decr_outstanding_submission_g2h(guc); 5096 intel_context_put(ce); 5097 5098 return 0; 5099 } 5100 5101 static void capture_error_state(struct intel_guc *guc, 5102 struct intel_context *ce) 5103 { 5104 struct intel_gt *gt = guc_to_gt(guc); 5105 struct drm_i915_private *i915 = gt->i915; 5106 intel_wakeref_t wakeref; 5107 intel_engine_mask_t engine_mask; 5108 5109 if (intel_engine_is_virtual(ce->engine)) { 5110 struct intel_engine_cs *e; 5111 intel_engine_mask_t tmp, virtual_mask = ce->engine->mask; 5112 5113 engine_mask = 0; 5114 for_each_engine_masked(e, ce->engine->gt, virtual_mask, tmp) { 5115 bool match = intel_guc_capture_is_matching_engine(gt, ce, e); 5116 5117 if (match) { 5118 intel_engine_set_hung_context(e, ce); 5119 engine_mask |= e->mask; 5120 i915_increase_reset_engine_count(&i915->gpu_error, 5121 e); 5122 } 5123 } 5124 5125 if (!engine_mask) { 5126 guc_warn(guc, "No matching physical engine capture for virtual engine context 0x%04X / %s", 5127 ce->guc_id.id, ce->engine->name); 5128 engine_mask = ~0U; 5129 } 5130 } else { 5131 intel_engine_set_hung_context(ce->engine, ce); 5132 engine_mask = ce->engine->mask; 5133 i915_increase_reset_engine_count(&i915->gpu_error, ce->engine); 5134 } 5135 5136 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 5137 i915_capture_error_state(gt, engine_mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); 5138 } 5139 5140 static void guc_context_replay(struct intel_context *ce) 5141 { 5142 struct i915_sched_engine *sched_engine = ce->engine->sched_engine; 5143 5144 __guc_reset_context(ce, ce->engine->mask); 5145 tasklet_hi_schedule(&sched_engine->tasklet); 5146 } 5147 5148 static void guc_handle_context_reset(struct intel_guc *guc, 5149 struct intel_context *ce) 5150 { 5151 bool capture = intel_context_is_schedulable(ce); 5152 5153 trace_intel_context_reset(ce); 5154 5155 guc_dbg(guc, "%s context reset notification: 0x%04X on %s, exiting = %s, banned = %s\n", 5156 capture ? "Got" : "Ignoring", 5157 ce->guc_id.id, ce->engine->name, 5158 str_yes_no(intel_context_is_exiting(ce)), 5159 str_yes_no(intel_context_is_banned(ce))); 5160 5161 if (capture) { 5162 capture_error_state(guc, ce); 5163 guc_context_replay(ce); 5164 } 5165 } 5166 5167 int intel_guc_context_reset_process_msg(struct intel_guc *guc, 5168 const u32 *msg, u32 len) 5169 { 5170 struct intel_context *ce; 5171 unsigned long flags; 5172 int ctx_id; 5173 5174 if (unlikely(len != 1)) { 5175 guc_err(guc, "Invalid length %u", len); 5176 return -EPROTO; 5177 } 5178 5179 ctx_id = msg[0]; 5180 5181 /* 5182 * The context lookup uses the xarray but lookups only require an RCU lock 5183 * not the full spinlock. So take the lock explicitly and keep it until the 5184 * context has been reference count locked to ensure it can't be destroyed 5185 * asynchronously until the reset is done. 5186 */ 5187 xa_lock_irqsave(&guc->context_lookup, flags); 5188 ce = g2h_context_lookup(guc, ctx_id); 5189 if (ce) 5190 intel_context_get(ce); 5191 xa_unlock_irqrestore(&guc->context_lookup, flags); 5192 5193 if (unlikely(!ce)) 5194 return -EPROTO; 5195 5196 guc_handle_context_reset(guc, ce); 5197 intel_context_put(ce); 5198 5199 return 0; 5200 } 5201 5202 int intel_guc_error_capture_process_msg(struct intel_guc *guc, 5203 const u32 *msg, u32 len) 5204 { 5205 u32 status; 5206 5207 if (unlikely(len != 1)) { 5208 guc_dbg(guc, "Invalid length %u", len); 5209 return -EPROTO; 5210 } 5211 5212 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 5213 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 5214 guc_warn(guc, "No space for error capture"); 5215 5216 intel_guc_capture_process(guc); 5217 5218 return 0; 5219 } 5220 5221 struct intel_engine_cs * 5222 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) 5223 { 5224 struct intel_gt *gt = guc_to_gt(guc); 5225 u8 engine_class = guc_class_to_engine_class(guc_class); 5226 5227 /* Class index is checked in class converter */ 5228 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); 5229 5230 return gt->engine_class[engine_class][instance]; 5231 } 5232 5233 static void reset_fail_worker_func(struct work_struct *w) 5234 { 5235 struct intel_guc *guc = container_of(w, struct intel_guc, 5236 submission_state.reset_fail_worker); 5237 struct intel_gt *gt = guc_to_gt(guc); 5238 intel_engine_mask_t reset_fail_mask; 5239 unsigned long flags; 5240 5241 spin_lock_irqsave(&guc->submission_state.lock, flags); 5242 reset_fail_mask = guc->submission_state.reset_fail_mask; 5243 guc->submission_state.reset_fail_mask = 0; 5244 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 5245 5246 if (likely(reset_fail_mask)) { 5247 struct intel_engine_cs *engine; 5248 enum intel_engine_id id; 5249 5250 /* 5251 * GuC is toast at this point - it dead loops after sending the failed 5252 * reset notification. So need to manually determine the guilty context. 5253 * Note that it should be reliable to do this here because the GuC is 5254 * toast and will not be scheduling behind the KMD's back. 5255 */ 5256 for_each_engine_masked(engine, gt, reset_fail_mask, id) 5257 intel_guc_find_hung_context(engine); 5258 5259 intel_gt_handle_error(gt, reset_fail_mask, 5260 I915_ERROR_CAPTURE, 5261 "GuC failed to reset engine mask=0x%x", 5262 reset_fail_mask); 5263 } 5264 } 5265 5266 int intel_guc_engine_failure_process_msg(struct intel_guc *guc, 5267 const u32 *msg, u32 len) 5268 { 5269 struct intel_engine_cs *engine; 5270 u8 guc_class, instance; 5271 u32 reason; 5272 unsigned long flags; 5273 5274 if (unlikely(len != 3)) { 5275 guc_err(guc, "Invalid length %u", len); 5276 return -EPROTO; 5277 } 5278 5279 guc_class = msg[0]; 5280 instance = msg[1]; 5281 reason = msg[2]; 5282 5283 engine = intel_guc_lookup_engine(guc, guc_class, instance); 5284 if (unlikely(!engine)) { 5285 guc_err(guc, "Invalid engine %d:%d", guc_class, instance); 5286 return -EPROTO; 5287 } 5288 5289 /* 5290 * This is an unexpected failure of a hardware feature. So, log a real 5291 * error message not just the informational that comes with the reset. 5292 */ 5293 guc_err(guc, "Engine reset failed on %d:%d (%s) because 0x%08X", 5294 guc_class, instance, engine->name, reason); 5295 5296 spin_lock_irqsave(&guc->submission_state.lock, flags); 5297 guc->submission_state.reset_fail_mask |= engine->mask; 5298 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 5299 5300 /* 5301 * A GT reset flushes this worker queue (G2H handler) so we must use 5302 * another worker to trigger a GT reset. 5303 */ 5304 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker); 5305 5306 return 0; 5307 } 5308 5309 void intel_guc_find_hung_context(struct intel_engine_cs *engine) 5310 { 5311 struct intel_guc *guc = &engine->gt->uc.guc; 5312 struct intel_context *ce; 5313 struct i915_request *rq; 5314 unsigned long index; 5315 unsigned long flags; 5316 5317 /* Reset called during driver load? GuC not yet initialised! */ 5318 if (unlikely(!guc_submission_initialized(guc))) 5319 return; 5320 5321 xa_lock_irqsave(&guc->context_lookup, flags); 5322 xa_for_each(&guc->context_lookup, index, ce) { 5323 bool found; 5324 5325 if (!kref_get_unless_zero(&ce->ref)) 5326 continue; 5327 5328 xa_unlock(&guc->context_lookup); 5329 5330 if (!intel_context_is_pinned(ce)) 5331 goto next; 5332 5333 if (intel_engine_is_virtual(ce->engine)) { 5334 if (!(ce->engine->mask & engine->mask)) 5335 goto next; 5336 } else { 5337 if (ce->engine != engine) 5338 goto next; 5339 } 5340 5341 found = false; 5342 spin_lock(&ce->guc_state.lock); 5343 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { 5344 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) 5345 continue; 5346 5347 found = true; 5348 break; 5349 } 5350 spin_unlock(&ce->guc_state.lock); 5351 5352 if (found) { 5353 intel_engine_set_hung_context(engine, ce); 5354 5355 /* Can only cope with one hang at a time... */ 5356 intel_context_put(ce); 5357 xa_lock(&guc->context_lookup); 5358 goto done; 5359 } 5360 5361 next: 5362 intel_context_put(ce); 5363 xa_lock(&guc->context_lookup); 5364 } 5365 done: 5366 xa_unlock_irqrestore(&guc->context_lookup, flags); 5367 } 5368 5369 void intel_guc_dump_active_requests(struct intel_engine_cs *engine, 5370 struct i915_request *hung_rq, 5371 struct drm_printer *m) 5372 { 5373 struct intel_guc *guc = &engine->gt->uc.guc; 5374 struct intel_context *ce; 5375 unsigned long index; 5376 unsigned long flags; 5377 5378 /* Reset called during driver load? GuC not yet initialised! */ 5379 if (unlikely(!guc_submission_initialized(guc))) 5380 return; 5381 5382 xa_lock_irqsave(&guc->context_lookup, flags); 5383 xa_for_each(&guc->context_lookup, index, ce) { 5384 if (!kref_get_unless_zero(&ce->ref)) 5385 continue; 5386 5387 xa_unlock(&guc->context_lookup); 5388 5389 if (!intel_context_is_pinned(ce)) 5390 goto next; 5391 5392 if (intel_engine_is_virtual(ce->engine)) { 5393 if (!(ce->engine->mask & engine->mask)) 5394 goto next; 5395 } else { 5396 if (ce->engine != engine) 5397 goto next; 5398 } 5399 5400 spin_lock(&ce->guc_state.lock); 5401 intel_engine_dump_active_requests(&ce->guc_state.requests, 5402 hung_rq, m); 5403 spin_unlock(&ce->guc_state.lock); 5404 5405 next: 5406 intel_context_put(ce); 5407 xa_lock(&guc->context_lookup); 5408 } 5409 xa_unlock_irqrestore(&guc->context_lookup, flags); 5410 } 5411 5412 void intel_guc_submission_print_info(struct intel_guc *guc, 5413 struct drm_printer *p) 5414 { 5415 struct i915_sched_engine *sched_engine = guc->sched_engine; 5416 struct rb_node *rb; 5417 unsigned long flags; 5418 5419 if (!sched_engine) 5420 return; 5421 5422 drm_printf(p, "GuC Submission API Version: %d.%d.%d\n", 5423 guc->submission_version.major, guc->submission_version.minor, 5424 guc->submission_version.patch); 5425 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", 5426 atomic_read(&guc->outstanding_submission_g2h)); 5427 drm_printf(p, "GuC tasklet count: %u\n", 5428 atomic_read(&sched_engine->tasklet.count)); 5429 5430 spin_lock_irqsave(&sched_engine->lock, flags); 5431 drm_printf(p, "Requests in GuC submit tasklet:\n"); 5432 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 5433 struct i915_priolist *pl = to_priolist(rb); 5434 struct i915_request *rq; 5435 5436 priolist_for_each_request(rq, pl) 5437 drm_printf(p, "guc_id=%u, seqno=%llu\n", 5438 rq->context->guc_id.id, 5439 rq->fence.seqno); 5440 } 5441 spin_unlock_irqrestore(&sched_engine->lock, flags); 5442 drm_printf(p, "\n"); 5443 } 5444 5445 static inline void guc_log_context_priority(struct drm_printer *p, 5446 struct intel_context *ce) 5447 { 5448 int i; 5449 5450 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); 5451 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); 5452 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; 5453 i < GUC_CLIENT_PRIORITY_NUM; ++i) { 5454 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", 5455 i, ce->guc_state.prio_count[i]); 5456 } 5457 drm_printf(p, "\n"); 5458 } 5459 5460 static inline void guc_log_context(struct drm_printer *p, 5461 struct intel_context *ce) 5462 { 5463 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); 5464 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); 5465 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", 5466 ce->ring->head, 5467 ce->lrc_reg_state[CTX_RING_HEAD]); 5468 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", 5469 ce->ring->tail, 5470 ce->lrc_reg_state[CTX_RING_TAIL]); 5471 drm_printf(p, "\t\tContext Pin Count: %u\n", 5472 atomic_read(&ce->pin_count)); 5473 drm_printf(p, "\t\tGuC ID Ref Count: %u\n", 5474 atomic_read(&ce->guc_id.ref)); 5475 drm_printf(p, "\t\tSchedule State: 0x%x\n", 5476 ce->guc_state.sched_state); 5477 } 5478 5479 void intel_guc_submission_print_context_info(struct intel_guc *guc, 5480 struct drm_printer *p) 5481 { 5482 struct intel_context *ce; 5483 unsigned long index; 5484 unsigned long flags; 5485 5486 xa_lock_irqsave(&guc->context_lookup, flags); 5487 xa_for_each(&guc->context_lookup, index, ce) { 5488 GEM_BUG_ON(intel_context_is_child(ce)); 5489 5490 guc_log_context(p, ce); 5491 guc_log_context_priority(p, ce); 5492 5493 if (intel_context_is_parent(ce)) { 5494 struct intel_context *child; 5495 5496 drm_printf(p, "\t\tNumber children: %u\n", 5497 ce->parallel.number_children); 5498 5499 if (ce->parallel.guc.wq_status) { 5500 drm_printf(p, "\t\tWQI Head: %u\n", 5501 READ_ONCE(*ce->parallel.guc.wq_head)); 5502 drm_printf(p, "\t\tWQI Tail: %u\n", 5503 READ_ONCE(*ce->parallel.guc.wq_tail)); 5504 drm_printf(p, "\t\tWQI Status: %u\n", 5505 READ_ONCE(*ce->parallel.guc.wq_status)); 5506 } 5507 5508 if (ce->engine->emit_bb_start == 5509 emit_bb_start_parent_no_preempt_mid_batch) { 5510 u8 i; 5511 5512 drm_printf(p, "\t\tChildren Go: %u\n", 5513 get_children_go_value(ce)); 5514 for (i = 0; i < ce->parallel.number_children; ++i) 5515 drm_printf(p, "\t\tChildren Join: %u\n", 5516 get_children_join_value(ce, i)); 5517 } 5518 5519 for_each_child(ce, child) 5520 guc_log_context(p, child); 5521 } 5522 } 5523 xa_unlock_irqrestore(&guc->context_lookup, flags); 5524 } 5525 5526 static inline u32 get_children_go_addr(struct intel_context *ce) 5527 { 5528 GEM_BUG_ON(!intel_context_is_parent(ce)); 5529 5530 return i915_ggtt_offset(ce->state) + 5531 __get_parent_scratch_offset(ce) + 5532 offsetof(struct parent_scratch, go.semaphore); 5533 } 5534 5535 static inline u32 get_children_join_addr(struct intel_context *ce, 5536 u8 child_index) 5537 { 5538 GEM_BUG_ON(!intel_context_is_parent(ce)); 5539 5540 return i915_ggtt_offset(ce->state) + 5541 __get_parent_scratch_offset(ce) + 5542 offsetof(struct parent_scratch, join[child_index].semaphore); 5543 } 5544 5545 #define PARENT_GO_BB 1 5546 #define PARENT_GO_FINI_BREADCRUMB 0 5547 #define CHILD_GO_BB 1 5548 #define CHILD_GO_FINI_BREADCRUMB 0 5549 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 5550 u64 offset, u32 len, 5551 const unsigned int flags) 5552 { 5553 struct intel_context *ce = rq->context; 5554 u32 *cs; 5555 u8 i; 5556 5557 GEM_BUG_ON(!intel_context_is_parent(ce)); 5558 5559 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); 5560 if (IS_ERR(cs)) 5561 return PTR_ERR(cs); 5562 5563 /* Wait on children */ 5564 for (i = 0; i < ce->parallel.number_children; ++i) { 5565 *cs++ = (MI_SEMAPHORE_WAIT | 5566 MI_SEMAPHORE_GLOBAL_GTT | 5567 MI_SEMAPHORE_POLL | 5568 MI_SEMAPHORE_SAD_EQ_SDD); 5569 *cs++ = PARENT_GO_BB; 5570 *cs++ = get_children_join_addr(ce, i); 5571 *cs++ = 0; 5572 } 5573 5574 /* Turn off preemption */ 5575 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5576 *cs++ = MI_NOOP; 5577 5578 /* Tell children go */ 5579 cs = gen8_emit_ggtt_write(cs, 5580 CHILD_GO_BB, 5581 get_children_go_addr(ce), 5582 0); 5583 5584 /* Jump to batch */ 5585 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5586 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5587 *cs++ = lower_32_bits(offset); 5588 *cs++ = upper_32_bits(offset); 5589 *cs++ = MI_NOOP; 5590 5591 intel_ring_advance(rq, cs); 5592 5593 return 0; 5594 } 5595 5596 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 5597 u64 offset, u32 len, 5598 const unsigned int flags) 5599 { 5600 struct intel_context *ce = rq->context; 5601 struct intel_context *parent = intel_context_to_parent(ce); 5602 u32 *cs; 5603 5604 GEM_BUG_ON(!intel_context_is_child(ce)); 5605 5606 cs = intel_ring_begin(rq, 12); 5607 if (IS_ERR(cs)) 5608 return PTR_ERR(cs); 5609 5610 /* Signal parent */ 5611 cs = gen8_emit_ggtt_write(cs, 5612 PARENT_GO_BB, 5613 get_children_join_addr(parent, 5614 ce->parallel.child_index), 5615 0); 5616 5617 /* Wait on parent for go */ 5618 *cs++ = (MI_SEMAPHORE_WAIT | 5619 MI_SEMAPHORE_GLOBAL_GTT | 5620 MI_SEMAPHORE_POLL | 5621 MI_SEMAPHORE_SAD_EQ_SDD); 5622 *cs++ = CHILD_GO_BB; 5623 *cs++ = get_children_go_addr(parent); 5624 *cs++ = 0; 5625 5626 /* Turn off preemption */ 5627 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5628 5629 /* Jump to batch */ 5630 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5631 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5632 *cs++ = lower_32_bits(offset); 5633 *cs++ = upper_32_bits(offset); 5634 5635 intel_ring_advance(rq, cs); 5636 5637 return 0; 5638 } 5639 5640 static u32 * 5641 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5642 u32 *cs) 5643 { 5644 struct intel_context *ce = rq->context; 5645 u8 i; 5646 5647 GEM_BUG_ON(!intel_context_is_parent(ce)); 5648 5649 /* Wait on children */ 5650 for (i = 0; i < ce->parallel.number_children; ++i) { 5651 *cs++ = (MI_SEMAPHORE_WAIT | 5652 MI_SEMAPHORE_GLOBAL_GTT | 5653 MI_SEMAPHORE_POLL | 5654 MI_SEMAPHORE_SAD_EQ_SDD); 5655 *cs++ = PARENT_GO_FINI_BREADCRUMB; 5656 *cs++ = get_children_join_addr(ce, i); 5657 *cs++ = 0; 5658 } 5659 5660 /* Turn on preemption */ 5661 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5662 *cs++ = MI_NOOP; 5663 5664 /* Tell children go */ 5665 cs = gen8_emit_ggtt_write(cs, 5666 CHILD_GO_FINI_BREADCRUMB, 5667 get_children_go_addr(ce), 5668 0); 5669 5670 return cs; 5671 } 5672 5673 /* 5674 * If this true, a submission of multi-lrc requests had an error and the 5675 * requests need to be skipped. The front end (execuf IOCTL) should've called 5676 * i915_request_skip which squashes the BB but we still need to emit the fini 5677 * breadrcrumbs seqno write. At this point we don't know how many of the 5678 * requests in the multi-lrc submission were generated so we can't do the 5679 * handshake between the parent and children (e.g. if 4 requests should be 5680 * generated but 2nd hit an error only 1 would be seen by the GuC backend). 5681 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error 5682 * has occurred on any of the requests in submission / relationship. 5683 */ 5684 static inline bool skip_handshake(struct i915_request *rq) 5685 { 5686 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); 5687 } 5688 5689 #define NON_SKIP_LEN 6 5690 static u32 * 5691 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5692 u32 *cs) 5693 { 5694 struct intel_context *ce = rq->context; 5695 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5696 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5697 5698 GEM_BUG_ON(!intel_context_is_parent(ce)); 5699 5700 if (unlikely(skip_handshake(rq))) { 5701 /* 5702 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, 5703 * the NON_SKIP_LEN comes from the length of the emits below. 5704 */ 5705 memset(cs, 0, sizeof(u32) * 5706 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5707 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5708 } else { 5709 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); 5710 } 5711 5712 /* Emit fini breadcrumb */ 5713 before_fini_breadcrumb_user_interrupt_cs = cs; 5714 cs = gen8_emit_ggtt_write(cs, 5715 rq->fence.seqno, 5716 i915_request_active_timeline(rq)->hwsp_offset, 5717 0); 5718 5719 /* User interrupt */ 5720 *cs++ = MI_USER_INTERRUPT; 5721 *cs++ = MI_NOOP; 5722 5723 /* Ensure our math for skip + emit is correct */ 5724 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5725 cs); 5726 GEM_BUG_ON(start_fini_breadcrumb_cs + 5727 ce->engine->emit_fini_breadcrumb_dw != cs); 5728 5729 rq->tail = intel_ring_offset(rq, cs); 5730 5731 return cs; 5732 } 5733 5734 static u32 * 5735 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5736 u32 *cs) 5737 { 5738 struct intel_context *ce = rq->context; 5739 struct intel_context *parent = intel_context_to_parent(ce); 5740 5741 GEM_BUG_ON(!intel_context_is_child(ce)); 5742 5743 /* Turn on preemption */ 5744 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5745 *cs++ = MI_NOOP; 5746 5747 /* Signal parent */ 5748 cs = gen8_emit_ggtt_write(cs, 5749 PARENT_GO_FINI_BREADCRUMB, 5750 get_children_join_addr(parent, 5751 ce->parallel.child_index), 5752 0); 5753 5754 /* Wait parent on for go */ 5755 *cs++ = (MI_SEMAPHORE_WAIT | 5756 MI_SEMAPHORE_GLOBAL_GTT | 5757 MI_SEMAPHORE_POLL | 5758 MI_SEMAPHORE_SAD_EQ_SDD); 5759 *cs++ = CHILD_GO_FINI_BREADCRUMB; 5760 *cs++ = get_children_go_addr(parent); 5761 *cs++ = 0; 5762 5763 return cs; 5764 } 5765 5766 static u32 * 5767 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5768 u32 *cs) 5769 { 5770 struct intel_context *ce = rq->context; 5771 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5772 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5773 5774 GEM_BUG_ON(!intel_context_is_child(ce)); 5775 5776 if (unlikely(skip_handshake(rq))) { 5777 /* 5778 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, 5779 * the NON_SKIP_LEN comes from the length of the emits below. 5780 */ 5781 memset(cs, 0, sizeof(u32) * 5782 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5783 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5784 } else { 5785 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); 5786 } 5787 5788 /* Emit fini breadcrumb */ 5789 before_fini_breadcrumb_user_interrupt_cs = cs; 5790 cs = gen8_emit_ggtt_write(cs, 5791 rq->fence.seqno, 5792 i915_request_active_timeline(rq)->hwsp_offset, 5793 0); 5794 5795 /* User interrupt */ 5796 *cs++ = MI_USER_INTERRUPT; 5797 *cs++ = MI_NOOP; 5798 5799 /* Ensure our math for skip + emit is correct */ 5800 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5801 cs); 5802 GEM_BUG_ON(start_fini_breadcrumb_cs + 5803 ce->engine->emit_fini_breadcrumb_dw != cs); 5804 5805 rq->tail = intel_ring_offset(rq, cs); 5806 5807 return cs; 5808 } 5809 5810 #undef NON_SKIP_LEN 5811 5812 static struct intel_context * 5813 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 5814 unsigned long flags) 5815 { 5816 struct guc_virtual_engine *ve; 5817 struct intel_guc *guc; 5818 unsigned int n; 5819 int err; 5820 5821 ve = kzalloc(sizeof(*ve), GFP_KERNEL); 5822 if (!ve) 5823 return ERR_PTR(-ENOMEM); 5824 5825 guc = &siblings[0]->gt->uc.guc; 5826 5827 ve->base.i915 = siblings[0]->i915; 5828 ve->base.gt = siblings[0]->gt; 5829 ve->base.uncore = siblings[0]->uncore; 5830 ve->base.id = -1; 5831 5832 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 5833 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5834 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5835 ve->base.saturated = ALL_ENGINES; 5836 5837 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 5838 5839 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); 5840 5841 ve->base.cops = &virtual_guc_context_ops; 5842 ve->base.request_alloc = guc_request_alloc; 5843 ve->base.bump_serial = virtual_guc_bump_serial; 5844 5845 ve->base.submit_request = guc_submit_request; 5846 5847 ve->base.flags = I915_ENGINE_IS_VIRTUAL; 5848 5849 BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES); 5850 ve->base.mask = VIRTUAL_ENGINES; 5851 5852 intel_context_init(&ve->context, &ve->base); 5853 5854 for (n = 0; n < count; n++) { 5855 struct intel_engine_cs *sibling = siblings[n]; 5856 5857 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 5858 if (sibling->mask & ve->base.mask) { 5859 guc_dbg(guc, "duplicate %s entry in load balancer\n", 5860 sibling->name); 5861 err = -EINVAL; 5862 goto err_put; 5863 } 5864 5865 ve->base.mask |= sibling->mask; 5866 ve->base.logical_mask |= sibling->logical_mask; 5867 5868 if (n != 0 && ve->base.class != sibling->class) { 5869 guc_dbg(guc, "invalid mixing of engine class, sibling %d, already %d\n", 5870 sibling->class, ve->base.class); 5871 err = -EINVAL; 5872 goto err_put; 5873 } else if (n == 0) { 5874 ve->base.class = sibling->class; 5875 ve->base.uabi_class = sibling->uabi_class; 5876 snprintf(ve->base.name, sizeof(ve->base.name), 5877 "v%dx%d", ve->base.class, count); 5878 ve->base.context_size = sibling->context_size; 5879 5880 ve->base.add_active_request = 5881 sibling->add_active_request; 5882 ve->base.remove_active_request = 5883 sibling->remove_active_request; 5884 ve->base.emit_bb_start = sibling->emit_bb_start; 5885 ve->base.emit_flush = sibling->emit_flush; 5886 ve->base.emit_init_breadcrumb = 5887 sibling->emit_init_breadcrumb; 5888 ve->base.emit_fini_breadcrumb = 5889 sibling->emit_fini_breadcrumb; 5890 ve->base.emit_fini_breadcrumb_dw = 5891 sibling->emit_fini_breadcrumb_dw; 5892 ve->base.breadcrumbs = 5893 intel_breadcrumbs_get(sibling->breadcrumbs); 5894 5895 ve->base.flags |= sibling->flags; 5896 5897 ve->base.props.timeslice_duration_ms = 5898 sibling->props.timeslice_duration_ms; 5899 ve->base.props.preempt_timeout_ms = 5900 sibling->props.preempt_timeout_ms; 5901 } 5902 } 5903 5904 return &ve->context; 5905 5906 err_put: 5907 intel_context_put(&ve->context); 5908 return ERR_PTR(err); 5909 } 5910 5911 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) 5912 { 5913 struct intel_engine_cs *engine; 5914 intel_engine_mask_t tmp, mask = ve->mask; 5915 5916 for_each_engine_masked(engine, ve->gt, mask, tmp) 5917 if (READ_ONCE(engine->props.heartbeat_interval_ms)) 5918 return true; 5919 5920 return false; 5921 } 5922 5923 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5924 #include "selftest_guc.c" 5925 #include "selftest_guc_multi_lrc.c" 5926 #include "selftest_guc_hangcheck.c" 5927 #endif 5928