1 /* 2 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Zhi Wang <zhi.a.wang@intel.com> 25 * 26 * Contributors: 27 * Ping Gao <ping.a.gao@intel.com> 28 * Tina Zhang <tina.zhang@intel.com> 29 * Chanbin Du <changbin.du@intel.com> 30 * Min He <min.he@intel.com> 31 * Bing Niu <bing.niu@intel.com> 32 * Zhenyu Wang <zhenyuw@linux.intel.com> 33 * 34 */ 35 36 #include <linux/kthread.h> 37 38 #include <drm/drm_print.h> 39 40 #include "gem/i915_gem_pm.h" 41 42 #include "gt/intel_context.h" 43 #include "gt/intel_engine_regs.h" 44 #include "gt/intel_execlists_submission.h" 45 #include "gt/intel_gt_regs.h" 46 #include "gt/intel_lrc.h" 47 #include "gt/intel_ring.h" 48 49 #include "gvt.h" 50 #include "i915_drv.h" 51 #include "i915_gem_gtt.h" 52 #include "i915_perf_oa_regs.h" 53 #include "sched_policy.h" 54 55 #define RING_CTX_OFF(x) \ 56 offsetof(struct execlist_ring_context, x) 57 58 #define IS_RESTORE_INHIBIT(a) \ 59 IS_MASKED_BITS_ENABLED(a, CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) 60 61 static void set_context_pdp_root_pointer( 62 struct execlist_ring_context *ring_context, 63 u32 pdp[8]) 64 { 65 int i; 66 67 for (i = 0; i < 8; i++) 68 ring_context->pdps[i].val = pdp[7 - i]; 69 } 70 71 static void update_shadow_pdps(struct intel_vgpu_workload *workload) 72 { 73 struct execlist_ring_context *shadow_ring_context; 74 struct intel_context *ctx = workload->req->context; 75 76 if (WARN_ON(!workload->shadow_mm)) 77 return; 78 79 if (WARN_ON(!atomic_read(&workload->shadow_mm->pincount))) 80 return; 81 82 shadow_ring_context = (struct execlist_ring_context *)ctx->lrc_reg_state; 83 set_context_pdp_root_pointer(shadow_ring_context, 84 (void *)workload->shadow_mm->ppgtt_mm.shadow_pdps); 85 } 86 87 /* 88 * When populating shadow ctx from guest, we should not override oa related 89 * registers, so that they will not be overlapped by guest oa configs. Thus 90 * made it possible to capture oa data from host for both host and guests. 91 */ 92 static void sr_oa_regs(struct intel_vgpu_workload *workload, 93 u32 *reg_state, bool save) 94 { 95 struct drm_i915_private *dev_priv = workload->vgpu->gvt->gt->i915; 96 u32 ctx_oactxctrl = dev_priv->perf.ctx_oactxctrl_offset; 97 u32 ctx_flexeu0 = dev_priv->perf.ctx_flexeu0_offset; 98 int i = 0; 99 u32 flex_mmio[] = { 100 i915_mmio_reg_offset(EU_PERF_CNTL0), 101 i915_mmio_reg_offset(EU_PERF_CNTL1), 102 i915_mmio_reg_offset(EU_PERF_CNTL2), 103 i915_mmio_reg_offset(EU_PERF_CNTL3), 104 i915_mmio_reg_offset(EU_PERF_CNTL4), 105 i915_mmio_reg_offset(EU_PERF_CNTL5), 106 i915_mmio_reg_offset(EU_PERF_CNTL6), 107 }; 108 109 if (workload->engine->id != RCS0) 110 return; 111 112 if (save) { 113 workload->oactxctrl = reg_state[ctx_oactxctrl + 1]; 114 115 for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) { 116 u32 state_offset = ctx_flexeu0 + i * 2; 117 118 workload->flex_mmio[i] = reg_state[state_offset + 1]; 119 } 120 } else { 121 reg_state[ctx_oactxctrl] = 122 i915_mmio_reg_offset(GEN8_OACTXCONTROL); 123 reg_state[ctx_oactxctrl + 1] = workload->oactxctrl; 124 125 for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) { 126 u32 state_offset = ctx_flexeu0 + i * 2; 127 u32 mmio = flex_mmio[i]; 128 129 reg_state[state_offset] = mmio; 130 reg_state[state_offset + 1] = workload->flex_mmio[i]; 131 } 132 } 133 } 134 135 static int populate_shadow_context(struct intel_vgpu_workload *workload) 136 { 137 struct intel_vgpu *vgpu = workload->vgpu; 138 struct intel_gvt *gvt = vgpu->gvt; 139 struct intel_context *ctx = workload->req->context; 140 struct execlist_ring_context *shadow_ring_context; 141 void *dst; 142 void *context_base; 143 unsigned long context_gpa, context_page_num; 144 unsigned long gpa_base; /* first gpa of consecutive GPAs */ 145 unsigned long gpa_size; /* size of consecutive GPAs */ 146 struct intel_vgpu_submission *s = &vgpu->submission; 147 int i; 148 bool skip = false; 149 int ring_id = workload->engine->id; 150 int ret; 151 152 GEM_BUG_ON(!intel_context_is_pinned(ctx)); 153 154 context_base = (void *) ctx->lrc_reg_state - 155 (LRC_STATE_PN << I915_GTT_PAGE_SHIFT); 156 157 shadow_ring_context = (void *) ctx->lrc_reg_state; 158 159 sr_oa_regs(workload, (u32 *)shadow_ring_context, true); 160 #define COPY_REG(name) \ 161 intel_gvt_read_gpa(vgpu, workload->ring_context_gpa \ 162 + RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4) 163 #define COPY_REG_MASKED(name) {\ 164 intel_gvt_read_gpa(vgpu, workload->ring_context_gpa \ 165 + RING_CTX_OFF(name.val),\ 166 &shadow_ring_context->name.val, 4);\ 167 shadow_ring_context->name.val |= 0xffff << 16;\ 168 } 169 170 COPY_REG_MASKED(ctx_ctrl); 171 COPY_REG(ctx_timestamp); 172 173 if (workload->engine->id == RCS0) { 174 COPY_REG(bb_per_ctx_ptr); 175 COPY_REG(rcs_indirect_ctx); 176 COPY_REG(rcs_indirect_ctx_offset); 177 } else if (workload->engine->id == BCS0) 178 intel_gvt_read_gpa(vgpu, 179 workload->ring_context_gpa + 180 BCS_TILE_REGISTER_VAL_OFFSET, 181 (void *)shadow_ring_context + 182 BCS_TILE_REGISTER_VAL_OFFSET, 4); 183 #undef COPY_REG 184 #undef COPY_REG_MASKED 185 186 /* don't copy Ring Context (the first 0x50 dwords), 187 * only copy the Engine Context part from guest 188 */ 189 intel_gvt_read_gpa(vgpu, 190 workload->ring_context_gpa + 191 RING_CTX_SIZE, 192 (void *)shadow_ring_context + 193 RING_CTX_SIZE, 194 I915_GTT_PAGE_SIZE - RING_CTX_SIZE); 195 196 sr_oa_regs(workload, (u32 *)shadow_ring_context, false); 197 198 gvt_dbg_sched("ring %s workload lrca %x, ctx_id %x, ctx gpa %llx", 199 workload->engine->name, workload->ctx_desc.lrca, 200 workload->ctx_desc.context_id, 201 workload->ring_context_gpa); 202 203 /* only need to ensure this context is not pinned/unpinned during the 204 * period from last submission to this this submission. 205 * Upon reaching this function, the currently submitted context is not 206 * supposed to get unpinned. If a misbehaving guest driver ever does 207 * this, it would corrupt itself. 208 */ 209 if (s->last_ctx[ring_id].valid && 210 (s->last_ctx[ring_id].lrca == 211 workload->ctx_desc.lrca) && 212 (s->last_ctx[ring_id].ring_context_gpa == 213 workload->ring_context_gpa)) 214 skip = true; 215 216 s->last_ctx[ring_id].lrca = workload->ctx_desc.lrca; 217 s->last_ctx[ring_id].ring_context_gpa = workload->ring_context_gpa; 218 219 if (IS_RESTORE_INHIBIT(shadow_ring_context->ctx_ctrl.val) || skip) 220 return 0; 221 222 s->last_ctx[ring_id].valid = false; 223 context_page_num = workload->engine->context_size; 224 context_page_num = context_page_num >> PAGE_SHIFT; 225 226 if (IS_BROADWELL(gvt->gt->i915) && workload->engine->id == RCS0) 227 context_page_num = 19; 228 229 /* find consecutive GPAs from gma until the first inconsecutive GPA. 230 * read from the continuous GPAs into dst virtual address 231 */ 232 gpa_size = 0; 233 for (i = 2; i < context_page_num; i++) { 234 context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, 235 (u32)((workload->ctx_desc.lrca + i) << 236 I915_GTT_PAGE_SHIFT)); 237 if (context_gpa == INTEL_GVT_INVALID_ADDR) { 238 gvt_vgpu_err("Invalid guest context descriptor\n"); 239 return -EFAULT; 240 } 241 242 if (gpa_size == 0) { 243 gpa_base = context_gpa; 244 dst = context_base + (i << I915_GTT_PAGE_SHIFT); 245 } else if (context_gpa != gpa_base + gpa_size) 246 goto read; 247 248 gpa_size += I915_GTT_PAGE_SIZE; 249 250 if (i == context_page_num - 1) 251 goto read; 252 253 continue; 254 255 read: 256 intel_gvt_read_gpa(vgpu, gpa_base, dst, gpa_size); 257 gpa_base = context_gpa; 258 gpa_size = I915_GTT_PAGE_SIZE; 259 dst = context_base + (i << I915_GTT_PAGE_SHIFT); 260 } 261 ret = intel_gvt_scan_engine_context(workload); 262 if (ret) { 263 gvt_vgpu_err("invalid cmd found in guest context pages\n"); 264 return ret; 265 } 266 s->last_ctx[ring_id].valid = true; 267 return 0; 268 } 269 270 static inline bool is_gvt_request(struct i915_request *rq) 271 { 272 return intel_context_force_single_submission(rq->context); 273 } 274 275 static void save_ring_hw_state(struct intel_vgpu *vgpu, 276 const struct intel_engine_cs *engine) 277 { 278 struct intel_uncore *uncore = engine->uncore; 279 i915_reg_t reg; 280 281 reg = RING_INSTDONE(engine->mmio_base); 282 vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = 283 intel_uncore_read(uncore, reg); 284 285 reg = RING_ACTHD(engine->mmio_base); 286 vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = 287 intel_uncore_read(uncore, reg); 288 289 reg = RING_ACTHD_UDW(engine->mmio_base); 290 vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = 291 intel_uncore_read(uncore, reg); 292 } 293 294 static int shadow_context_status_change(struct notifier_block *nb, 295 unsigned long action, void *data) 296 { 297 struct i915_request *rq = data; 298 struct intel_gvt *gvt = container_of(nb, struct intel_gvt, 299 shadow_ctx_notifier_block[rq->engine->id]); 300 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; 301 enum intel_engine_id ring_id = rq->engine->id; 302 struct intel_vgpu_workload *workload; 303 unsigned long flags; 304 305 if (!is_gvt_request(rq)) { 306 spin_lock_irqsave(&scheduler->mmio_context_lock, flags); 307 if (action == INTEL_CONTEXT_SCHEDULE_IN && 308 scheduler->engine_owner[ring_id]) { 309 /* Switch ring from vGPU to host. */ 310 intel_gvt_switch_mmio(scheduler->engine_owner[ring_id], 311 NULL, rq->engine); 312 scheduler->engine_owner[ring_id] = NULL; 313 } 314 spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags); 315 316 return NOTIFY_OK; 317 } 318 319 workload = scheduler->current_workload[ring_id]; 320 if (unlikely(!workload)) 321 return NOTIFY_OK; 322 323 switch (action) { 324 case INTEL_CONTEXT_SCHEDULE_IN: 325 spin_lock_irqsave(&scheduler->mmio_context_lock, flags); 326 if (workload->vgpu != scheduler->engine_owner[ring_id]) { 327 /* Switch ring from host to vGPU or vGPU to vGPU. */ 328 intel_gvt_switch_mmio(scheduler->engine_owner[ring_id], 329 workload->vgpu, rq->engine); 330 scheduler->engine_owner[ring_id] = workload->vgpu; 331 } else 332 gvt_dbg_sched("skip ring %d mmio switch for vgpu%d\n", 333 ring_id, workload->vgpu->id); 334 spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags); 335 atomic_set(&workload->shadow_ctx_active, 1); 336 break; 337 case INTEL_CONTEXT_SCHEDULE_OUT: 338 save_ring_hw_state(workload->vgpu, rq->engine); 339 atomic_set(&workload->shadow_ctx_active, 0); 340 break; 341 case INTEL_CONTEXT_SCHEDULE_PREEMPTED: 342 save_ring_hw_state(workload->vgpu, rq->engine); 343 break; 344 default: 345 WARN_ON(1); 346 return NOTIFY_OK; 347 } 348 wake_up(&workload->shadow_ctx_status_wq); 349 return NOTIFY_OK; 350 } 351 352 static void 353 shadow_context_descriptor_update(struct intel_context *ce, 354 struct intel_vgpu_workload *workload) 355 { 356 u64 desc = ce->lrc.desc; 357 358 /* 359 * Update bits 0-11 of the context descriptor which includes flags 360 * like GEN8_CTX_* cached in desc_template 361 */ 362 desc &= ~(0x3ull << GEN8_CTX_ADDRESSING_MODE_SHIFT); 363 desc |= (u64)workload->ctx_desc.addressing_mode << 364 GEN8_CTX_ADDRESSING_MODE_SHIFT; 365 366 ce->lrc.desc = desc; 367 } 368 369 static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) 370 { 371 struct intel_vgpu *vgpu = workload->vgpu; 372 struct i915_request *req = workload->req; 373 void *shadow_ring_buffer_va; 374 u32 *cs; 375 int err; 376 377 if (GRAPHICS_VER(req->engine->i915) == 9 && is_inhibit_context(req->context)) 378 intel_vgpu_restore_inhibit_context(vgpu, req); 379 380 /* 381 * To track whether a request has started on HW, we can emit a 382 * breadcrumb at the beginning of the request and check its 383 * timeline's HWSP to see if the breadcrumb has advanced past the 384 * start of this request. Actually, the request must have the 385 * init_breadcrumb if its timeline set has_init_bread_crumb, or the 386 * scheduler might get a wrong state of it during reset. Since the 387 * requests from gvt always set the has_init_breadcrumb flag, here 388 * need to do the emit_init_breadcrumb for all the requests. 389 */ 390 if (req->engine->emit_init_breadcrumb) { 391 err = req->engine->emit_init_breadcrumb(req); 392 if (err) { 393 gvt_vgpu_err("fail to emit init breadcrumb\n"); 394 return err; 395 } 396 } 397 398 /* allocate shadow ring buffer */ 399 cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); 400 if (IS_ERR(cs)) { 401 gvt_vgpu_err("fail to alloc size =%ld shadow ring buffer\n", 402 workload->rb_len); 403 return PTR_ERR(cs); 404 } 405 406 shadow_ring_buffer_va = workload->shadow_ring_buffer_va; 407 408 /* get shadow ring buffer va */ 409 workload->shadow_ring_buffer_va = cs; 410 411 memcpy(cs, shadow_ring_buffer_va, 412 workload->rb_len); 413 414 cs += workload->rb_len / sizeof(u32); 415 intel_ring_advance(workload->req, cs); 416 417 return 0; 418 } 419 420 static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) 421 { 422 if (!wa_ctx->indirect_ctx.obj) 423 return; 424 425 i915_gem_object_lock(wa_ctx->indirect_ctx.obj, NULL); 426 i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj); 427 i915_gem_object_unlock(wa_ctx->indirect_ctx.obj); 428 i915_gem_object_put(wa_ctx->indirect_ctx.obj); 429 430 wa_ctx->indirect_ctx.obj = NULL; 431 wa_ctx->indirect_ctx.shadow_va = NULL; 432 } 433 434 static void set_dma_address(struct i915_page_directory *pd, dma_addr_t addr) 435 { 436 struct scatterlist *sg = pd->pt.base->mm.pages->sgl; 437 438 /* This is not a good idea */ 439 sg->dma_address = addr; 440 } 441 442 static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, 443 struct intel_context *ce) 444 { 445 struct intel_vgpu_mm *mm = workload->shadow_mm; 446 struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm); 447 int i = 0; 448 449 if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { 450 set_dma_address(ppgtt->pd, mm->ppgtt_mm.shadow_pdps[0]); 451 } else { 452 for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) { 453 struct i915_page_directory * const pd = 454 i915_pd_entry(ppgtt->pd, i); 455 /* skip now as current i915 ppgtt alloc won't allocate 456 top level pdp for non 4-level table, won't impact 457 shadow ppgtt. */ 458 if (!pd) 459 break; 460 461 set_dma_address(pd, mm->ppgtt_mm.shadow_pdps[i]); 462 } 463 } 464 } 465 466 static int 467 intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload) 468 { 469 struct intel_vgpu *vgpu = workload->vgpu; 470 struct intel_vgpu_submission *s = &vgpu->submission; 471 struct i915_request *rq; 472 473 if (workload->req) 474 return 0; 475 476 rq = i915_request_create(s->shadow[workload->engine->id]); 477 if (IS_ERR(rq)) { 478 gvt_vgpu_err("fail to allocate gem request\n"); 479 return PTR_ERR(rq); 480 } 481 482 workload->req = i915_request_get(rq); 483 return 0; 484 } 485 486 /** 487 * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and 488 * shadow it as well, include ringbuffer,wa_ctx and ctx. 489 * @workload: an abstract entity for each execlist submission. 490 * 491 * This function is called before the workload submitting to i915, to make 492 * sure the content of the workload is valid. 493 */ 494 int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) 495 { 496 struct intel_vgpu *vgpu = workload->vgpu; 497 struct intel_vgpu_submission *s = &vgpu->submission; 498 int ret; 499 500 lockdep_assert_held(&vgpu->vgpu_lock); 501 502 if (workload->shadow) 503 return 0; 504 505 if (!test_and_set_bit(workload->engine->id, s->shadow_ctx_desc_updated)) 506 shadow_context_descriptor_update(s->shadow[workload->engine->id], 507 workload); 508 509 ret = intel_gvt_scan_and_shadow_ringbuffer(workload); 510 if (ret) 511 return ret; 512 513 if (workload->engine->id == RCS0 && 514 workload->wa_ctx.indirect_ctx.size) { 515 ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx); 516 if (ret) 517 goto err_shadow; 518 } 519 520 workload->shadow = true; 521 return 0; 522 523 err_shadow: 524 release_shadow_wa_ctx(&workload->wa_ctx); 525 return ret; 526 } 527 528 static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload); 529 530 static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) 531 { 532 struct intel_gvt *gvt = workload->vgpu->gvt; 533 const int gmadr_bytes = gvt->device_info.gmadr_bytes_in_cmd; 534 struct intel_vgpu_shadow_bb *bb; 535 struct i915_gem_ww_ctx ww; 536 int ret; 537 538 list_for_each_entry(bb, &workload->shadow_bb, list) { 539 /* 540 * For privilege batch buffer and not wa_ctx, the bb_start_cmd_va 541 * is only updated into ring_scan_buffer, not real ring address 542 * allocated in later copy_workload_to_ring_buffer. Please be noted 543 * shadow_ring_buffer_va is now pointed to real ring buffer va 544 * in copy_workload_to_ring_buffer. 545 */ 546 547 if (bb->bb_offset) 548 bb->bb_start_cmd_va = workload->shadow_ring_buffer_va 549 + bb->bb_offset; 550 551 /* 552 * For non-priv bb, scan&shadow is only for 553 * debugging purpose, so the content of shadow bb 554 * is the same as original bb. Therefore, 555 * here, rather than switch to shadow bb's gma 556 * address, we directly use original batch buffer's 557 * gma address, and send original bb to hardware 558 * directly. 559 */ 560 if (!bb->ppgtt) { 561 i915_gem_ww_ctx_init(&ww, false); 562 retry: 563 i915_gem_object_lock(bb->obj, &ww); 564 565 bb->vma = i915_gem_object_ggtt_pin_ww(bb->obj, &ww, 566 NULL, 0, 0, 0); 567 if (IS_ERR(bb->vma)) { 568 ret = PTR_ERR(bb->vma); 569 if (ret == -EDEADLK) { 570 ret = i915_gem_ww_ctx_backoff(&ww); 571 if (!ret) 572 goto retry; 573 } 574 goto err; 575 } 576 577 /* relocate shadow batch buffer */ 578 bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); 579 if (gmadr_bytes == 8) 580 bb->bb_start_cmd_va[2] = 0; 581 582 ret = i915_vma_move_to_active(bb->vma, workload->req, 583 __EXEC_OBJECT_NO_REQUEST_AWAIT); 584 if (ret) 585 goto err; 586 587 /* No one is going to touch shadow bb from now on. */ 588 i915_gem_object_flush_map(bb->obj); 589 i915_gem_ww_ctx_fini(&ww); 590 } 591 } 592 return 0; 593 err: 594 i915_gem_ww_ctx_fini(&ww); 595 release_shadow_batch_buffer(workload); 596 return ret; 597 } 598 599 static void update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) 600 { 601 struct intel_vgpu_workload *workload = 602 container_of(wa_ctx, struct intel_vgpu_workload, wa_ctx); 603 struct i915_request *rq = workload->req; 604 struct execlist_ring_context *shadow_ring_context = 605 (struct execlist_ring_context *)rq->context->lrc_reg_state; 606 607 shadow_ring_context->bb_per_ctx_ptr.val = 608 (shadow_ring_context->bb_per_ctx_ptr.val & 609 (~PER_CTX_ADDR_MASK)) | wa_ctx->per_ctx.shadow_gma; 610 shadow_ring_context->rcs_indirect_ctx.val = 611 (shadow_ring_context->rcs_indirect_ctx.val & 612 (~INDIRECT_CTX_ADDR_MASK)) | wa_ctx->indirect_ctx.shadow_gma; 613 } 614 615 static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) 616 { 617 struct i915_vma *vma; 618 unsigned char *per_ctx_va = 619 (unsigned char *)wa_ctx->indirect_ctx.shadow_va + 620 wa_ctx->indirect_ctx.size; 621 struct i915_gem_ww_ctx ww; 622 int ret; 623 624 if (wa_ctx->indirect_ctx.size == 0) 625 return 0; 626 627 i915_gem_ww_ctx_init(&ww, false); 628 retry: 629 i915_gem_object_lock(wa_ctx->indirect_ctx.obj, &ww); 630 631 vma = i915_gem_object_ggtt_pin_ww(wa_ctx->indirect_ctx.obj, &ww, NULL, 632 0, CACHELINE_BYTES, 0); 633 if (IS_ERR(vma)) { 634 ret = PTR_ERR(vma); 635 if (ret == -EDEADLK) { 636 ret = i915_gem_ww_ctx_backoff(&ww); 637 if (!ret) 638 goto retry; 639 } 640 return ret; 641 } 642 643 i915_gem_ww_ctx_fini(&ww); 644 645 /* FIXME: we are not tracking our pinned VMA leaving it 646 * up to the core to fix up the stray pin_count upon 647 * free. 648 */ 649 650 wa_ctx->indirect_ctx.shadow_gma = i915_ggtt_offset(vma); 651 652 wa_ctx->per_ctx.shadow_gma = *((unsigned int *)per_ctx_va + 1); 653 memset(per_ctx_va, 0, CACHELINE_BYTES); 654 655 update_wa_ctx_2_shadow_ctx(wa_ctx); 656 return 0; 657 } 658 659 static void update_vreg_in_ctx(struct intel_vgpu_workload *workload) 660 { 661 vgpu_vreg_t(workload->vgpu, RING_START(workload->engine->mmio_base)) = 662 workload->rb_start; 663 } 664 665 static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) 666 { 667 struct intel_vgpu_shadow_bb *bb, *pos; 668 669 if (list_empty(&workload->shadow_bb)) 670 return; 671 672 bb = list_first_entry(&workload->shadow_bb, 673 struct intel_vgpu_shadow_bb, list); 674 675 list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) { 676 if (bb->obj) { 677 i915_gem_object_lock(bb->obj, NULL); 678 if (bb->va && !IS_ERR(bb->va)) 679 i915_gem_object_unpin_map(bb->obj); 680 681 if (bb->vma && !IS_ERR(bb->vma)) 682 i915_vma_unpin(bb->vma); 683 684 i915_gem_object_unlock(bb->obj); 685 i915_gem_object_put(bb->obj); 686 } 687 list_del(&bb->list); 688 kfree(bb); 689 } 690 } 691 692 static int 693 intel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload) 694 { 695 struct intel_vgpu *vgpu = workload->vgpu; 696 struct intel_vgpu_mm *m; 697 int ret = 0; 698 699 ret = intel_vgpu_pin_mm(workload->shadow_mm); 700 if (ret) { 701 gvt_vgpu_err("fail to vgpu pin mm\n"); 702 return ret; 703 } 704 705 if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT || 706 !workload->shadow_mm->ppgtt_mm.shadowed) { 707 intel_vgpu_unpin_mm(workload->shadow_mm); 708 gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); 709 return -EINVAL; 710 } 711 712 if (!list_empty(&workload->lri_shadow_mm)) { 713 list_for_each_entry(m, &workload->lri_shadow_mm, 714 ppgtt_mm.link) { 715 ret = intel_vgpu_pin_mm(m); 716 if (ret) { 717 list_for_each_entry_from_reverse(m, 718 &workload->lri_shadow_mm, 719 ppgtt_mm.link) 720 intel_vgpu_unpin_mm(m); 721 gvt_vgpu_err("LRI shadow ppgtt fail to pin\n"); 722 break; 723 } 724 } 725 } 726 727 if (ret) 728 intel_vgpu_unpin_mm(workload->shadow_mm); 729 730 return ret; 731 } 732 733 static void 734 intel_vgpu_shadow_mm_unpin(struct intel_vgpu_workload *workload) 735 { 736 struct intel_vgpu_mm *m; 737 738 if (!list_empty(&workload->lri_shadow_mm)) { 739 list_for_each_entry(m, &workload->lri_shadow_mm, 740 ppgtt_mm.link) 741 intel_vgpu_unpin_mm(m); 742 } 743 intel_vgpu_unpin_mm(workload->shadow_mm); 744 } 745 746 static int prepare_workload(struct intel_vgpu_workload *workload) 747 { 748 struct intel_vgpu *vgpu = workload->vgpu; 749 struct intel_vgpu_submission *s = &vgpu->submission; 750 int ret = 0; 751 752 ret = intel_vgpu_shadow_mm_pin(workload); 753 if (ret) { 754 gvt_vgpu_err("fail to pin shadow mm\n"); 755 return ret; 756 } 757 758 update_shadow_pdps(workload); 759 760 set_context_ppgtt_from_shadow(workload, s->shadow[workload->engine->id]); 761 762 ret = intel_vgpu_sync_oos_pages(workload->vgpu); 763 if (ret) { 764 gvt_vgpu_err("fail to vgpu sync oos pages\n"); 765 goto err_unpin_mm; 766 } 767 768 ret = intel_vgpu_flush_post_shadow(workload->vgpu); 769 if (ret) { 770 gvt_vgpu_err("fail to flush post shadow\n"); 771 goto err_unpin_mm; 772 } 773 774 ret = copy_workload_to_ring_buffer(workload); 775 if (ret) { 776 gvt_vgpu_err("fail to generate request\n"); 777 goto err_unpin_mm; 778 } 779 780 ret = prepare_shadow_batch_buffer(workload); 781 if (ret) { 782 gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n"); 783 goto err_unpin_mm; 784 } 785 786 ret = prepare_shadow_wa_ctx(&workload->wa_ctx); 787 if (ret) { 788 gvt_vgpu_err("fail to prepare_shadow_wa_ctx\n"); 789 goto err_shadow_batch; 790 } 791 792 if (workload->prepare) { 793 ret = workload->prepare(workload); 794 if (ret) 795 goto err_shadow_wa_ctx; 796 } 797 798 return 0; 799 err_shadow_wa_ctx: 800 release_shadow_wa_ctx(&workload->wa_ctx); 801 err_shadow_batch: 802 release_shadow_batch_buffer(workload); 803 err_unpin_mm: 804 intel_vgpu_shadow_mm_unpin(workload); 805 return ret; 806 } 807 808 static int dispatch_workload(struct intel_vgpu_workload *workload) 809 { 810 struct intel_vgpu *vgpu = workload->vgpu; 811 struct i915_request *rq; 812 int ret; 813 814 gvt_dbg_sched("ring id %s prepare to dispatch workload %p\n", 815 workload->engine->name, workload); 816 817 mutex_lock(&vgpu->vgpu_lock); 818 819 ret = intel_gvt_workload_req_alloc(workload); 820 if (ret) 821 goto err_req; 822 823 ret = intel_gvt_scan_and_shadow_workload(workload); 824 if (ret) 825 goto out; 826 827 ret = populate_shadow_context(workload); 828 if (ret) { 829 release_shadow_wa_ctx(&workload->wa_ctx); 830 goto out; 831 } 832 833 ret = prepare_workload(workload); 834 out: 835 if (ret) { 836 /* We might still need to add request with 837 * clean ctx to retire it properly.. 838 */ 839 rq = fetch_and_zero(&workload->req); 840 i915_request_put(rq); 841 } 842 843 if (!IS_ERR_OR_NULL(workload->req)) { 844 gvt_dbg_sched("ring id %s submit workload to i915 %p\n", 845 workload->engine->name, workload->req); 846 i915_request_add(workload->req); 847 workload->dispatched = true; 848 } 849 err_req: 850 if (ret) 851 workload->status = ret; 852 mutex_unlock(&vgpu->vgpu_lock); 853 return ret; 854 } 855 856 static struct intel_vgpu_workload * 857 pick_next_workload(struct intel_gvt *gvt, struct intel_engine_cs *engine) 858 { 859 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; 860 struct intel_vgpu_workload *workload = NULL; 861 862 mutex_lock(&gvt->sched_lock); 863 864 /* 865 * no current vgpu / will be scheduled out / no workload 866 * bail out 867 */ 868 if (!scheduler->current_vgpu) { 869 gvt_dbg_sched("ring %s stop - no current vgpu\n", engine->name); 870 goto out; 871 } 872 873 if (scheduler->need_reschedule) { 874 gvt_dbg_sched("ring %s stop - will reschedule\n", engine->name); 875 goto out; 876 } 877 878 if (!test_bit(INTEL_VGPU_STATUS_ACTIVE, 879 scheduler->current_vgpu->status) || 880 list_empty(workload_q_head(scheduler->current_vgpu, engine))) 881 goto out; 882 883 /* 884 * still have current workload, maybe the workload disptacher 885 * fail to submit it for some reason, resubmit it. 886 */ 887 if (scheduler->current_workload[engine->id]) { 888 workload = scheduler->current_workload[engine->id]; 889 gvt_dbg_sched("ring %s still have current workload %p\n", 890 engine->name, workload); 891 goto out; 892 } 893 894 /* 895 * pick a workload as current workload 896 * once current workload is set, schedule policy routines 897 * will wait the current workload is finished when trying to 898 * schedule out a vgpu. 899 */ 900 scheduler->current_workload[engine->id] = 901 list_first_entry(workload_q_head(scheduler->current_vgpu, 902 engine), 903 struct intel_vgpu_workload, list); 904 905 workload = scheduler->current_workload[engine->id]; 906 907 gvt_dbg_sched("ring %s pick new workload %p\n", engine->name, workload); 908 909 atomic_inc(&workload->vgpu->submission.running_workload_num); 910 out: 911 mutex_unlock(&gvt->sched_lock); 912 return workload; 913 } 914 915 static void update_guest_pdps(struct intel_vgpu *vgpu, 916 u64 ring_context_gpa, u32 pdp[8]) 917 { 918 u64 gpa; 919 int i; 920 921 gpa = ring_context_gpa + RING_CTX_OFF(pdps[0].val); 922 923 for (i = 0; i < 8; i++) 924 intel_gvt_write_gpa(vgpu, gpa + i * 8, &pdp[7 - i], 4); 925 } 926 927 static __maybe_unused bool 928 check_shadow_context_ppgtt(struct execlist_ring_context *c, struct intel_vgpu_mm *m) 929 { 930 if (m->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { 931 u64 shadow_pdp = c->pdps[7].val | (u64) c->pdps[6].val << 32; 932 933 if (shadow_pdp != m->ppgtt_mm.shadow_pdps[0]) { 934 gvt_dbg_mm("4-level context ppgtt not match LRI command\n"); 935 return false; 936 } 937 return true; 938 } else { 939 /* see comment in LRI handler in cmd_parser.c */ 940 gvt_dbg_mm("invalid shadow mm type\n"); 941 return false; 942 } 943 } 944 945 static void update_guest_context(struct intel_vgpu_workload *workload) 946 { 947 struct i915_request *rq = workload->req; 948 struct intel_vgpu *vgpu = workload->vgpu; 949 struct execlist_ring_context *shadow_ring_context; 950 struct intel_context *ctx = workload->req->context; 951 void *context_base; 952 void *src; 953 unsigned long context_gpa, context_page_num; 954 unsigned long gpa_base; /* first gpa of consecutive GPAs */ 955 unsigned long gpa_size; /* size of consecutive GPAs*/ 956 int i; 957 u32 ring_base; 958 u32 head, tail; 959 u16 wrap_count; 960 961 gvt_dbg_sched("ring id %d workload lrca %x\n", rq->engine->id, 962 workload->ctx_desc.lrca); 963 964 GEM_BUG_ON(!intel_context_is_pinned(ctx)); 965 966 head = workload->rb_head; 967 tail = workload->rb_tail; 968 wrap_count = workload->guest_rb_head >> RB_HEAD_WRAP_CNT_OFF; 969 970 if (tail < head) { 971 if (wrap_count == RB_HEAD_WRAP_CNT_MAX) 972 wrap_count = 0; 973 else 974 wrap_count += 1; 975 } 976 977 head = (wrap_count << RB_HEAD_WRAP_CNT_OFF) | tail; 978 979 ring_base = rq->engine->mmio_base; 980 vgpu_vreg_t(vgpu, RING_TAIL(ring_base)) = tail; 981 vgpu_vreg_t(vgpu, RING_HEAD(ring_base)) = head; 982 983 context_page_num = rq->engine->context_size; 984 context_page_num = context_page_num >> PAGE_SHIFT; 985 986 if (IS_BROADWELL(rq->i915) && rq->engine->id == RCS0) 987 context_page_num = 19; 988 989 context_base = (void *) ctx->lrc_reg_state - 990 (LRC_STATE_PN << I915_GTT_PAGE_SHIFT); 991 992 /* find consecutive GPAs from gma until the first inconsecutive GPA. 993 * write to the consecutive GPAs from src virtual address 994 */ 995 gpa_size = 0; 996 for (i = 2; i < context_page_num; i++) { 997 context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, 998 (u32)((workload->ctx_desc.lrca + i) << 999 I915_GTT_PAGE_SHIFT)); 1000 if (context_gpa == INTEL_GVT_INVALID_ADDR) { 1001 gvt_vgpu_err("invalid guest context descriptor\n"); 1002 return; 1003 } 1004 1005 if (gpa_size == 0) { 1006 gpa_base = context_gpa; 1007 src = context_base + (i << I915_GTT_PAGE_SHIFT); 1008 } else if (context_gpa != gpa_base + gpa_size) 1009 goto write; 1010 1011 gpa_size += I915_GTT_PAGE_SIZE; 1012 1013 if (i == context_page_num - 1) 1014 goto write; 1015 1016 continue; 1017 1018 write: 1019 intel_gvt_write_gpa(vgpu, gpa_base, src, gpa_size); 1020 gpa_base = context_gpa; 1021 gpa_size = I915_GTT_PAGE_SIZE; 1022 src = context_base + (i << I915_GTT_PAGE_SHIFT); 1023 } 1024 1025 intel_gvt_write_gpa(vgpu, workload->ring_context_gpa + 1026 RING_CTX_OFF(ring_header.val), &workload->rb_tail, 4); 1027 1028 shadow_ring_context = (void *) ctx->lrc_reg_state; 1029 1030 if (!list_empty(&workload->lri_shadow_mm)) { 1031 struct intel_vgpu_mm *m = list_last_entry(&workload->lri_shadow_mm, 1032 struct intel_vgpu_mm, 1033 ppgtt_mm.link); 1034 GEM_BUG_ON(!check_shadow_context_ppgtt(shadow_ring_context, m)); 1035 update_guest_pdps(vgpu, workload->ring_context_gpa, 1036 (void *)m->ppgtt_mm.guest_pdps); 1037 } 1038 1039 #define COPY_REG(name) \ 1040 intel_gvt_write_gpa(vgpu, workload->ring_context_gpa + \ 1041 RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4) 1042 1043 COPY_REG(ctx_ctrl); 1044 COPY_REG(ctx_timestamp); 1045 1046 #undef COPY_REG 1047 1048 intel_gvt_write_gpa(vgpu, 1049 workload->ring_context_gpa + 1050 sizeof(*shadow_ring_context), 1051 (void *)shadow_ring_context + 1052 sizeof(*shadow_ring_context), 1053 I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); 1054 } 1055 1056 void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu, 1057 intel_engine_mask_t engine_mask) 1058 { 1059 struct intel_vgpu_submission *s = &vgpu->submission; 1060 struct intel_engine_cs *engine; 1061 struct intel_vgpu_workload *pos, *n; 1062 intel_engine_mask_t tmp; 1063 1064 /* free the unsubmitted workloads in the queues. */ 1065 for_each_engine_masked(engine, vgpu->gvt->gt, engine_mask, tmp) { 1066 list_for_each_entry_safe(pos, n, 1067 &s->workload_q_head[engine->id], list) { 1068 list_del_init(&pos->list); 1069 intel_vgpu_destroy_workload(pos); 1070 } 1071 clear_bit(engine->id, s->shadow_ctx_desc_updated); 1072 } 1073 } 1074 1075 static void complete_current_workload(struct intel_gvt *gvt, int ring_id) 1076 { 1077 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; 1078 struct intel_vgpu_workload *workload = 1079 scheduler->current_workload[ring_id]; 1080 struct intel_vgpu *vgpu = workload->vgpu; 1081 struct intel_vgpu_submission *s = &vgpu->submission; 1082 struct i915_request *rq = workload->req; 1083 int event; 1084 1085 mutex_lock(&vgpu->vgpu_lock); 1086 mutex_lock(&gvt->sched_lock); 1087 1088 /* For the workload w/ request, needs to wait for the context 1089 * switch to make sure request is completed. 1090 * For the workload w/o request, directly complete the workload. 1091 */ 1092 if (rq) { 1093 wait_event(workload->shadow_ctx_status_wq, 1094 !atomic_read(&workload->shadow_ctx_active)); 1095 1096 /* If this request caused GPU hang, req->fence.error will 1097 * be set to -EIO. Use -EIO to set workload status so 1098 * that when this request caused GPU hang, didn't trigger 1099 * context switch interrupt to guest. 1100 */ 1101 if (likely(workload->status == -EINPROGRESS)) { 1102 if (workload->req->fence.error == -EIO) 1103 workload->status = -EIO; 1104 else 1105 workload->status = 0; 1106 } 1107 1108 if (!workload->status && 1109 !(vgpu->resetting_eng & BIT(ring_id))) { 1110 update_guest_context(workload); 1111 1112 for_each_set_bit(event, workload->pending_events, 1113 INTEL_GVT_EVENT_MAX) 1114 intel_vgpu_trigger_virtual_event(vgpu, event); 1115 } 1116 1117 i915_request_put(fetch_and_zero(&workload->req)); 1118 } 1119 1120 gvt_dbg_sched("ring id %d complete workload %p status %d\n", 1121 ring_id, workload, workload->status); 1122 1123 scheduler->current_workload[ring_id] = NULL; 1124 1125 list_del_init(&workload->list); 1126 1127 if (workload->status || vgpu->resetting_eng & BIT(ring_id)) { 1128 /* if workload->status is not successful means HW GPU 1129 * has occurred GPU hang or something wrong with i915/GVT, 1130 * and GVT won't inject context switch interrupt to guest. 1131 * So this error is a vGPU hang actually to the guest. 1132 * According to this we should emunlate a vGPU hang. If 1133 * there are pending workloads which are already submitted 1134 * from guest, we should clean them up like HW GPU does. 1135 * 1136 * if it is in middle of engine resetting, the pending 1137 * workloads won't be submitted to HW GPU and will be 1138 * cleaned up during the resetting process later, so doing 1139 * the workload clean up here doesn't have any impact. 1140 **/ 1141 intel_vgpu_clean_workloads(vgpu, BIT(ring_id)); 1142 } 1143 1144 workload->complete(workload); 1145 1146 intel_vgpu_shadow_mm_unpin(workload); 1147 intel_vgpu_destroy_workload(workload); 1148 1149 atomic_dec(&s->running_workload_num); 1150 wake_up(&scheduler->workload_complete_wq); 1151 1152 if (gvt->scheduler.need_reschedule) 1153 intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED); 1154 1155 mutex_unlock(&gvt->sched_lock); 1156 mutex_unlock(&vgpu->vgpu_lock); 1157 } 1158 1159 static int workload_thread(void *arg) 1160 { 1161 struct intel_engine_cs *engine = arg; 1162 const bool need_force_wake = GRAPHICS_VER(engine->i915) >= 9; 1163 struct intel_gvt *gvt = engine->i915->gvt; 1164 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; 1165 struct intel_vgpu_workload *workload = NULL; 1166 struct intel_vgpu *vgpu = NULL; 1167 int ret; 1168 DEFINE_WAIT_FUNC(wait, woken_wake_function); 1169 1170 gvt_dbg_core("workload thread for ring %s started\n", engine->name); 1171 1172 while (!kthread_should_stop()) { 1173 intel_wakeref_t wakeref; 1174 1175 add_wait_queue(&scheduler->waitq[engine->id], &wait); 1176 do { 1177 workload = pick_next_workload(gvt, engine); 1178 if (workload) 1179 break; 1180 wait_woken(&wait, TASK_INTERRUPTIBLE, 1181 MAX_SCHEDULE_TIMEOUT); 1182 } while (!kthread_should_stop()); 1183 remove_wait_queue(&scheduler->waitq[engine->id], &wait); 1184 1185 if (!workload) 1186 break; 1187 1188 gvt_dbg_sched("ring %s next workload %p vgpu %d\n", 1189 engine->name, workload, 1190 workload->vgpu->id); 1191 1192 wakeref = intel_runtime_pm_get(engine->uncore->rpm); 1193 1194 gvt_dbg_sched("ring %s will dispatch workload %p\n", 1195 engine->name, workload); 1196 1197 if (need_force_wake) 1198 intel_uncore_forcewake_get(engine->uncore, 1199 FORCEWAKE_ALL); 1200 /* 1201 * Update the vReg of the vGPU which submitted this 1202 * workload. The vGPU may use these registers for checking 1203 * the context state. The value comes from GPU commands 1204 * in this workload. 1205 */ 1206 update_vreg_in_ctx(workload); 1207 1208 ret = dispatch_workload(workload); 1209 1210 if (ret) { 1211 vgpu = workload->vgpu; 1212 gvt_vgpu_err("fail to dispatch workload, skip\n"); 1213 goto complete; 1214 } 1215 1216 gvt_dbg_sched("ring %s wait workload %p\n", 1217 engine->name, workload); 1218 i915_request_wait(workload->req, 0, MAX_SCHEDULE_TIMEOUT); 1219 1220 complete: 1221 gvt_dbg_sched("will complete workload %p, status: %d\n", 1222 workload, workload->status); 1223 1224 complete_current_workload(gvt, engine->id); 1225 1226 if (need_force_wake) 1227 intel_uncore_forcewake_put(engine->uncore, 1228 FORCEWAKE_ALL); 1229 1230 intel_runtime_pm_put(engine->uncore->rpm, wakeref); 1231 if (ret && (vgpu_is_vm_unhealthy(ret))) 1232 enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); 1233 } 1234 return 0; 1235 } 1236 1237 void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu) 1238 { 1239 struct intel_vgpu_submission *s = &vgpu->submission; 1240 struct intel_gvt *gvt = vgpu->gvt; 1241 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; 1242 1243 if (atomic_read(&s->running_workload_num)) { 1244 gvt_dbg_sched("wait vgpu idle\n"); 1245 1246 wait_event(scheduler->workload_complete_wq, 1247 !atomic_read(&s->running_workload_num)); 1248 } 1249 } 1250 1251 void intel_gvt_clean_workload_scheduler(struct intel_gvt *gvt) 1252 { 1253 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; 1254 struct intel_engine_cs *engine; 1255 enum intel_engine_id i; 1256 1257 gvt_dbg_core("clean workload scheduler\n"); 1258 1259 for_each_engine(engine, gvt->gt, i) { 1260 atomic_notifier_chain_unregister( 1261 &engine->context_status_notifier, 1262 &gvt->shadow_ctx_notifier_block[i]); 1263 kthread_stop(scheduler->thread[i]); 1264 } 1265 } 1266 1267 int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt) 1268 { 1269 struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; 1270 struct intel_engine_cs *engine; 1271 enum intel_engine_id i; 1272 int ret; 1273 1274 gvt_dbg_core("init workload scheduler\n"); 1275 1276 init_waitqueue_head(&scheduler->workload_complete_wq); 1277 1278 for_each_engine(engine, gvt->gt, i) { 1279 init_waitqueue_head(&scheduler->waitq[i]); 1280 1281 scheduler->thread[i] = kthread_run(workload_thread, engine, 1282 "gvt:%s", engine->name); 1283 if (IS_ERR(scheduler->thread[i])) { 1284 gvt_err("fail to create workload thread\n"); 1285 ret = PTR_ERR(scheduler->thread[i]); 1286 goto err; 1287 } 1288 1289 gvt->shadow_ctx_notifier_block[i].notifier_call = 1290 shadow_context_status_change; 1291 atomic_notifier_chain_register(&engine->context_status_notifier, 1292 &gvt->shadow_ctx_notifier_block[i]); 1293 } 1294 1295 return 0; 1296 1297 err: 1298 intel_gvt_clean_workload_scheduler(gvt); 1299 return ret; 1300 } 1301 1302 static void 1303 i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s, 1304 struct i915_ppgtt *ppgtt) 1305 { 1306 int i; 1307 1308 if (i915_vm_is_4lvl(&ppgtt->vm)) { 1309 set_dma_address(ppgtt->pd, s->i915_context_pml4); 1310 } else { 1311 for (i = 0; i < GEN8_3LVL_PDPES; i++) { 1312 struct i915_page_directory * const pd = 1313 i915_pd_entry(ppgtt->pd, i); 1314 1315 set_dma_address(pd, s->i915_context_pdps[i]); 1316 } 1317 } 1318 } 1319 1320 /** 1321 * intel_vgpu_clean_submission - free submission-related resource for vGPU 1322 * @vgpu: a vGPU 1323 * 1324 * This function is called when a vGPU is being destroyed. 1325 * 1326 */ 1327 void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) 1328 { 1329 struct intel_vgpu_submission *s = &vgpu->submission; 1330 struct intel_engine_cs *engine; 1331 enum intel_engine_id id; 1332 1333 intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0); 1334 1335 i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(s->shadow[0]->vm)); 1336 for_each_engine(engine, vgpu->gvt->gt, id) 1337 intel_context_put(s->shadow[id]); 1338 1339 kmem_cache_destroy(s->workloads); 1340 } 1341 1342 1343 /** 1344 * intel_vgpu_reset_submission - reset submission-related resource for vGPU 1345 * @vgpu: a vGPU 1346 * @engine_mask: engines expected to be reset 1347 * 1348 * This function is called when a vGPU is being destroyed. 1349 * 1350 */ 1351 void intel_vgpu_reset_submission(struct intel_vgpu *vgpu, 1352 intel_engine_mask_t engine_mask) 1353 { 1354 struct intel_vgpu_submission *s = &vgpu->submission; 1355 1356 if (!s->active) 1357 return; 1358 1359 intel_vgpu_clean_workloads(vgpu, engine_mask); 1360 s->ops->reset(vgpu, engine_mask); 1361 } 1362 1363 static void 1364 i915_context_ppgtt_root_save(struct intel_vgpu_submission *s, 1365 struct i915_ppgtt *ppgtt) 1366 { 1367 int i; 1368 1369 if (i915_vm_is_4lvl(&ppgtt->vm)) { 1370 s->i915_context_pml4 = px_dma(ppgtt->pd); 1371 } else { 1372 for (i = 0; i < GEN8_3LVL_PDPES; i++) { 1373 struct i915_page_directory * const pd = 1374 i915_pd_entry(ppgtt->pd, i); 1375 1376 s->i915_context_pdps[i] = px_dma(pd); 1377 } 1378 } 1379 } 1380 1381 /** 1382 * intel_vgpu_setup_submission - setup submission-related resource for vGPU 1383 * @vgpu: a vGPU 1384 * 1385 * This function is called when a vGPU is being created. 1386 * 1387 * Returns: 1388 * Zero on success, negative error code if failed. 1389 * 1390 */ 1391 int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) 1392 { 1393 struct drm_i915_private *i915 = vgpu->gvt->gt->i915; 1394 struct intel_vgpu_submission *s = &vgpu->submission; 1395 struct intel_engine_cs *engine; 1396 struct i915_ppgtt *ppgtt; 1397 enum intel_engine_id i; 1398 int ret; 1399 1400 ppgtt = i915_ppgtt_create(to_gt(i915), I915_BO_ALLOC_PM_EARLY); 1401 if (IS_ERR(ppgtt)) 1402 return PTR_ERR(ppgtt); 1403 1404 i915_context_ppgtt_root_save(s, ppgtt); 1405 1406 for_each_engine(engine, vgpu->gvt->gt, i) { 1407 struct intel_context *ce; 1408 1409 INIT_LIST_HEAD(&s->workload_q_head[i]); 1410 s->shadow[i] = ERR_PTR(-EINVAL); 1411 1412 ce = intel_context_create(engine); 1413 if (IS_ERR(ce)) { 1414 ret = PTR_ERR(ce); 1415 goto out_shadow_ctx; 1416 } 1417 1418 i915_vm_put(ce->vm); 1419 ce->vm = i915_vm_get(&ppgtt->vm); 1420 intel_context_set_single_submission(ce); 1421 1422 /* Max ring buffer size */ 1423 if (!intel_uc_wants_guc_submission(&engine->gt->uc)) 1424 ce->ring_size = SZ_2M; 1425 1426 s->shadow[i] = ce; 1427 } 1428 1429 bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); 1430 1431 s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload", 1432 sizeof(struct intel_vgpu_workload), 0, 1433 SLAB_HWCACHE_ALIGN, 1434 offsetof(struct intel_vgpu_workload, rb_tail), 1435 sizeof_field(struct intel_vgpu_workload, rb_tail), 1436 NULL); 1437 1438 if (!s->workloads) { 1439 ret = -ENOMEM; 1440 goto out_shadow_ctx; 1441 } 1442 1443 atomic_set(&s->running_workload_num, 0); 1444 bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES); 1445 1446 memset(s->last_ctx, 0, sizeof(s->last_ctx)); 1447 1448 i915_vm_put(&ppgtt->vm); 1449 return 0; 1450 1451 out_shadow_ctx: 1452 i915_context_ppgtt_root_restore(s, ppgtt); 1453 for_each_engine(engine, vgpu->gvt->gt, i) { 1454 if (IS_ERR(s->shadow[i])) 1455 break; 1456 1457 intel_context_put(s->shadow[i]); 1458 } 1459 i915_vm_put(&ppgtt->vm); 1460 return ret; 1461 } 1462 1463 /** 1464 * intel_vgpu_select_submission_ops - select virtual submission interface 1465 * @vgpu: a vGPU 1466 * @engine_mask: either ALL_ENGINES or target engine mask 1467 * @interface: expected vGPU virtual submission interface 1468 * 1469 * This function is called when guest configures submission interface. 1470 * 1471 * Returns: 1472 * Zero on success, negative error code if failed. 1473 * 1474 */ 1475 int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu, 1476 intel_engine_mask_t engine_mask, 1477 unsigned int interface) 1478 { 1479 struct drm_i915_private *i915 = vgpu->gvt->gt->i915; 1480 struct intel_vgpu_submission *s = &vgpu->submission; 1481 const struct intel_vgpu_submission_ops *ops[] = { 1482 [INTEL_VGPU_EXECLIST_SUBMISSION] = 1483 &intel_vgpu_execlist_submission_ops, 1484 }; 1485 int ret; 1486 1487 if (drm_WARN_ON(&i915->drm, interface >= ARRAY_SIZE(ops))) 1488 return -EINVAL; 1489 1490 if (drm_WARN_ON(&i915->drm, 1491 interface == 0 && engine_mask != ALL_ENGINES)) 1492 return -EINVAL; 1493 1494 if (s->active) 1495 s->ops->clean(vgpu, engine_mask); 1496 1497 if (interface == 0) { 1498 s->ops = NULL; 1499 s->virtual_submission_interface = 0; 1500 s->active = false; 1501 gvt_dbg_core("vgpu%d: remove submission ops\n", vgpu->id); 1502 return 0; 1503 } 1504 1505 ret = ops[interface]->init(vgpu, engine_mask); 1506 if (ret) 1507 return ret; 1508 1509 s->ops = ops[interface]; 1510 s->virtual_submission_interface = interface; 1511 s->active = true; 1512 1513 gvt_dbg_core("vgpu%d: activate ops [ %s ]\n", 1514 vgpu->id, s->ops->name); 1515 1516 return 0; 1517 } 1518 1519 /** 1520 * intel_vgpu_destroy_workload - destroy a vGPU workload 1521 * @workload: workload to destroy 1522 * 1523 * This function is called when destroy a vGPU workload. 1524 * 1525 */ 1526 void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload) 1527 { 1528 struct intel_vgpu_submission *s = &workload->vgpu->submission; 1529 1530 intel_context_unpin(s->shadow[workload->engine->id]); 1531 release_shadow_batch_buffer(workload); 1532 release_shadow_wa_ctx(&workload->wa_ctx); 1533 1534 if (!list_empty(&workload->lri_shadow_mm)) { 1535 struct intel_vgpu_mm *m, *mm; 1536 list_for_each_entry_safe(m, mm, &workload->lri_shadow_mm, 1537 ppgtt_mm.link) { 1538 list_del(&m->ppgtt_mm.link); 1539 intel_vgpu_mm_put(m); 1540 } 1541 } 1542 1543 GEM_BUG_ON(!list_empty(&workload->lri_shadow_mm)); 1544 if (workload->shadow_mm) 1545 intel_vgpu_mm_put(workload->shadow_mm); 1546 1547 kmem_cache_free(s->workloads, workload); 1548 } 1549 1550 static struct intel_vgpu_workload * 1551 alloc_workload(struct intel_vgpu *vgpu) 1552 { 1553 struct intel_vgpu_submission *s = &vgpu->submission; 1554 struct intel_vgpu_workload *workload; 1555 1556 workload = kmem_cache_zalloc(s->workloads, GFP_KERNEL); 1557 if (!workload) 1558 return ERR_PTR(-ENOMEM); 1559 1560 INIT_LIST_HEAD(&workload->list); 1561 INIT_LIST_HEAD(&workload->shadow_bb); 1562 INIT_LIST_HEAD(&workload->lri_shadow_mm); 1563 1564 init_waitqueue_head(&workload->shadow_ctx_status_wq); 1565 atomic_set(&workload->shadow_ctx_active, 0); 1566 1567 workload->status = -EINPROGRESS; 1568 workload->vgpu = vgpu; 1569 1570 return workload; 1571 } 1572 1573 #define RING_CTX_OFF(x) \ 1574 offsetof(struct execlist_ring_context, x) 1575 1576 static void read_guest_pdps(struct intel_vgpu *vgpu, 1577 u64 ring_context_gpa, u32 pdp[8]) 1578 { 1579 u64 gpa; 1580 int i; 1581 1582 gpa = ring_context_gpa + RING_CTX_OFF(pdps[0].val); 1583 1584 for (i = 0; i < 8; i++) 1585 intel_gvt_read_gpa(vgpu, 1586 gpa + i * 8, &pdp[7 - i], 4); 1587 } 1588 1589 static int prepare_mm(struct intel_vgpu_workload *workload) 1590 { 1591 struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc; 1592 struct intel_vgpu_mm *mm; 1593 struct intel_vgpu *vgpu = workload->vgpu; 1594 enum intel_gvt_gtt_type root_entry_type; 1595 u64 pdps[GVT_RING_CTX_NR_PDPS]; 1596 1597 switch (desc->addressing_mode) { 1598 case 1: /* legacy 32-bit */ 1599 root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY; 1600 break; 1601 case 3: /* legacy 64-bit */ 1602 root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY; 1603 break; 1604 default: 1605 gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n"); 1606 return -EINVAL; 1607 } 1608 1609 read_guest_pdps(workload->vgpu, workload->ring_context_gpa, (void *)pdps); 1610 1611 mm = intel_vgpu_get_ppgtt_mm(workload->vgpu, root_entry_type, pdps); 1612 if (IS_ERR(mm)) 1613 return PTR_ERR(mm); 1614 1615 workload->shadow_mm = mm; 1616 return 0; 1617 } 1618 1619 #define same_context(a, b) (((a)->context_id == (b)->context_id) && \ 1620 ((a)->lrca == (b)->lrca)) 1621 1622 /** 1623 * intel_vgpu_create_workload - create a vGPU workload 1624 * @vgpu: a vGPU 1625 * @engine: the engine 1626 * @desc: a guest context descriptor 1627 * 1628 * This function is called when creating a vGPU workload. 1629 * 1630 * Returns: 1631 * struct intel_vgpu_workload * on success, negative error code in 1632 * pointer if failed. 1633 * 1634 */ 1635 struct intel_vgpu_workload * 1636 intel_vgpu_create_workload(struct intel_vgpu *vgpu, 1637 const struct intel_engine_cs *engine, 1638 struct execlist_ctx_descriptor_format *desc) 1639 { 1640 struct intel_vgpu_submission *s = &vgpu->submission; 1641 struct list_head *q = workload_q_head(vgpu, engine); 1642 struct intel_vgpu_workload *last_workload = NULL; 1643 struct intel_vgpu_workload *workload = NULL; 1644 u64 ring_context_gpa; 1645 u32 head, tail, start, ctl, ctx_ctl, per_ctx, indirect_ctx; 1646 u32 guest_head; 1647 int ret; 1648 1649 ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, 1650 (u32)((desc->lrca + 1) << I915_GTT_PAGE_SHIFT)); 1651 if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) { 1652 gvt_vgpu_err("invalid guest context LRCA: %x\n", desc->lrca); 1653 return ERR_PTR(-EINVAL); 1654 } 1655 1656 intel_gvt_read_gpa(vgpu, ring_context_gpa + 1657 RING_CTX_OFF(ring_header.val), &head, 4); 1658 1659 intel_gvt_read_gpa(vgpu, ring_context_gpa + 1660 RING_CTX_OFF(ring_tail.val), &tail, 4); 1661 1662 guest_head = head; 1663 1664 head &= RB_HEAD_OFF_MASK; 1665 tail &= RB_TAIL_OFF_MASK; 1666 1667 list_for_each_entry_reverse(last_workload, q, list) { 1668 1669 if (same_context(&last_workload->ctx_desc, desc)) { 1670 gvt_dbg_el("ring %s cur workload == last\n", 1671 engine->name); 1672 gvt_dbg_el("ctx head %x real head %lx\n", head, 1673 last_workload->rb_tail); 1674 /* 1675 * cannot use guest context head pointer here, 1676 * as it might not be updated at this time 1677 */ 1678 head = last_workload->rb_tail; 1679 break; 1680 } 1681 } 1682 1683 gvt_dbg_el("ring %s begin a new workload\n", engine->name); 1684 1685 /* record some ring buffer register values for scan and shadow */ 1686 intel_gvt_read_gpa(vgpu, ring_context_gpa + 1687 RING_CTX_OFF(rb_start.val), &start, 4); 1688 intel_gvt_read_gpa(vgpu, ring_context_gpa + 1689 RING_CTX_OFF(rb_ctrl.val), &ctl, 4); 1690 intel_gvt_read_gpa(vgpu, ring_context_gpa + 1691 RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4); 1692 1693 if (!intel_gvt_ggtt_validate_range(vgpu, start, 1694 _RING_CTL_BUF_SIZE(ctl))) { 1695 gvt_vgpu_err("context contain invalid rb at: 0x%x\n", start); 1696 return ERR_PTR(-EINVAL); 1697 } 1698 1699 workload = alloc_workload(vgpu); 1700 if (IS_ERR(workload)) 1701 return workload; 1702 1703 workload->engine = engine; 1704 workload->ctx_desc = *desc; 1705 workload->ring_context_gpa = ring_context_gpa; 1706 workload->rb_head = head; 1707 workload->guest_rb_head = guest_head; 1708 workload->rb_tail = tail; 1709 workload->rb_start = start; 1710 workload->rb_ctl = ctl; 1711 1712 if (engine->id == RCS0) { 1713 intel_gvt_read_gpa(vgpu, ring_context_gpa + 1714 RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4); 1715 intel_gvt_read_gpa(vgpu, ring_context_gpa + 1716 RING_CTX_OFF(rcs_indirect_ctx.val), &indirect_ctx, 4); 1717 1718 workload->wa_ctx.indirect_ctx.guest_gma = 1719 indirect_ctx & INDIRECT_CTX_ADDR_MASK; 1720 workload->wa_ctx.indirect_ctx.size = 1721 (indirect_ctx & INDIRECT_CTX_SIZE_MASK) * 1722 CACHELINE_BYTES; 1723 1724 if (workload->wa_ctx.indirect_ctx.size != 0) { 1725 if (!intel_gvt_ggtt_validate_range(vgpu, 1726 workload->wa_ctx.indirect_ctx.guest_gma, 1727 workload->wa_ctx.indirect_ctx.size)) { 1728 gvt_vgpu_err("invalid wa_ctx at: 0x%lx\n", 1729 workload->wa_ctx.indirect_ctx.guest_gma); 1730 kmem_cache_free(s->workloads, workload); 1731 return ERR_PTR(-EINVAL); 1732 } 1733 } 1734 1735 workload->wa_ctx.per_ctx.guest_gma = 1736 per_ctx & PER_CTX_ADDR_MASK; 1737 workload->wa_ctx.per_ctx.valid = per_ctx & 1; 1738 if (workload->wa_ctx.per_ctx.valid) { 1739 if (!intel_gvt_ggtt_validate_range(vgpu, 1740 workload->wa_ctx.per_ctx.guest_gma, 1741 CACHELINE_BYTES)) { 1742 gvt_vgpu_err("invalid per_ctx at: 0x%lx\n", 1743 workload->wa_ctx.per_ctx.guest_gma); 1744 kmem_cache_free(s->workloads, workload); 1745 return ERR_PTR(-EINVAL); 1746 } 1747 } 1748 } 1749 1750 gvt_dbg_el("workload %p ring %s head %x tail %x start %x ctl %x\n", 1751 workload, engine->name, head, tail, start, ctl); 1752 1753 ret = prepare_mm(workload); 1754 if (ret) { 1755 kmem_cache_free(s->workloads, workload); 1756 return ERR_PTR(ret); 1757 } 1758 1759 /* Only scan and shadow the first workload in the queue 1760 * as there is only one pre-allocated buf-obj for shadow. 1761 */ 1762 if (list_empty(q)) { 1763 intel_wakeref_t wakeref; 1764 1765 with_intel_runtime_pm(engine->gt->uncore->rpm, wakeref) 1766 ret = intel_gvt_scan_and_shadow_workload(workload); 1767 } 1768 1769 if (ret) { 1770 if (vgpu_is_vm_unhealthy(ret)) 1771 enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); 1772 intel_vgpu_destroy_workload(workload); 1773 return ERR_PTR(ret); 1774 } 1775 1776 ret = intel_context_pin(s->shadow[engine->id]); 1777 if (ret) { 1778 intel_vgpu_destroy_workload(workload); 1779 return ERR_PTR(ret); 1780 } 1781 1782 return workload; 1783 } 1784 1785 /** 1786 * intel_vgpu_queue_workload - Queue a vGPU workload 1787 * @workload: the workload to queue in 1788 */ 1789 void intel_vgpu_queue_workload(struct intel_vgpu_workload *workload) 1790 { 1791 list_add_tail(&workload->list, 1792 workload_q_head(workload->vgpu, workload->engine)); 1793 intel_gvt_kick_schedule(workload->vgpu->gvt); 1794 wake_up(&workload->vgpu->gvt->scheduler.waitq[workload->engine->id]); 1795 } 1796