1 /* 2 * GTT virtualization 3 * 4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Zhi Wang <zhi.a.wang@intel.com> 27 * Zhenyu Wang <zhenyuw@linux.intel.com> 28 * Xiao Zheng <xiao.zheng@intel.com> 29 * 30 * Contributors: 31 * Min He <min.he@intel.com> 32 * Bing Niu <bing.niu@intel.com> 33 * 34 */ 35 36 #include "i915_drv.h" 37 #include "gvt.h" 38 #include "i915_pvinfo.h" 39 #include "trace.h" 40 41 static bool enable_out_of_sync = false; 42 static int preallocated_oos_pages = 8192; 43 44 /* 45 * validate a gm address and related range size, 46 * translate it to host gm address 47 */ 48 bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size) 49 { 50 if ((!vgpu_gmadr_is_valid(vgpu, addr)) || (size 51 && !vgpu_gmadr_is_valid(vgpu, addr + size - 1))) { 52 gvt_err("vgpu%d: invalid range gmadr 0x%llx size 0x%x\n", 53 vgpu->id, addr, size); 54 return false; 55 } 56 return true; 57 } 58 59 /* translate a guest gmadr to host gmadr */ 60 int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr) 61 { 62 if (WARN(!vgpu_gmadr_is_valid(vgpu, g_addr), 63 "invalid guest gmadr %llx\n", g_addr)) 64 return -EACCES; 65 66 if (vgpu_gmadr_is_aperture(vgpu, g_addr)) 67 *h_addr = vgpu_aperture_gmadr_base(vgpu) 68 + (g_addr - vgpu_aperture_offset(vgpu)); 69 else 70 *h_addr = vgpu_hidden_gmadr_base(vgpu) 71 + (g_addr - vgpu_hidden_offset(vgpu)); 72 return 0; 73 } 74 75 /* translate a host gmadr to guest gmadr */ 76 int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr) 77 { 78 if (WARN(!gvt_gmadr_is_valid(vgpu->gvt, h_addr), 79 "invalid host gmadr %llx\n", h_addr)) 80 return -EACCES; 81 82 if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr)) 83 *g_addr = vgpu_aperture_gmadr_base(vgpu) 84 + (h_addr - gvt_aperture_gmadr_base(vgpu->gvt)); 85 else 86 *g_addr = vgpu_hidden_gmadr_base(vgpu) 87 + (h_addr - gvt_hidden_gmadr_base(vgpu->gvt)); 88 return 0; 89 } 90 91 int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index, 92 unsigned long *h_index) 93 { 94 u64 h_addr; 95 int ret; 96 97 ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << GTT_PAGE_SHIFT, 98 &h_addr); 99 if (ret) 100 return ret; 101 102 *h_index = h_addr >> GTT_PAGE_SHIFT; 103 return 0; 104 } 105 106 int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index, 107 unsigned long *g_index) 108 { 109 u64 g_addr; 110 int ret; 111 112 ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << GTT_PAGE_SHIFT, 113 &g_addr); 114 if (ret) 115 return ret; 116 117 *g_index = g_addr >> GTT_PAGE_SHIFT; 118 return 0; 119 } 120 121 #define gtt_type_is_entry(type) \ 122 (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \ 123 && type != GTT_TYPE_PPGTT_PTE_ENTRY \ 124 && type != GTT_TYPE_PPGTT_ROOT_ENTRY) 125 126 #define gtt_type_is_pt(type) \ 127 (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) 128 129 #define gtt_type_is_pte_pt(type) \ 130 (type == GTT_TYPE_PPGTT_PTE_PT) 131 132 #define gtt_type_is_root_pointer(type) \ 133 (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY) 134 135 #define gtt_init_entry(e, t, p, v) do { \ 136 (e)->type = t; \ 137 (e)->pdev = p; \ 138 memcpy(&(e)->val64, &v, sizeof(v)); \ 139 } while (0) 140 141 enum { 142 GTT_TYPE_INVALID = -1, 143 144 GTT_TYPE_GGTT_PTE, 145 146 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 147 GTT_TYPE_PPGTT_PTE_2M_ENTRY, 148 GTT_TYPE_PPGTT_PTE_1G_ENTRY, 149 150 GTT_TYPE_PPGTT_PTE_ENTRY, 151 152 GTT_TYPE_PPGTT_PDE_ENTRY, 153 GTT_TYPE_PPGTT_PDP_ENTRY, 154 GTT_TYPE_PPGTT_PML4_ENTRY, 155 156 GTT_TYPE_PPGTT_ROOT_ENTRY, 157 158 GTT_TYPE_PPGTT_ROOT_L3_ENTRY, 159 GTT_TYPE_PPGTT_ROOT_L4_ENTRY, 160 161 GTT_TYPE_PPGTT_ENTRY, 162 163 GTT_TYPE_PPGTT_PTE_PT, 164 GTT_TYPE_PPGTT_PDE_PT, 165 GTT_TYPE_PPGTT_PDP_PT, 166 GTT_TYPE_PPGTT_PML4_PT, 167 168 GTT_TYPE_MAX, 169 }; 170 171 /* 172 * Mappings between GTT_TYPE* enumerations. 173 * Following information can be found according to the given type: 174 * - type of next level page table 175 * - type of entry inside this level page table 176 * - type of entry with PSE set 177 * 178 * If the given type doesn't have such a kind of information, 179 * e.g. give a l4 root entry type, then request to get its PSE type, 180 * give a PTE page table type, then request to get its next level page 181 * table type, as we know l4 root entry doesn't have a PSE bit, 182 * and a PTE page table doesn't have a next level page table type, 183 * GTT_TYPE_INVALID will be returned. This is useful when traversing a 184 * page table. 185 */ 186 187 struct gtt_type_table_entry { 188 int entry_type; 189 int next_pt_type; 190 int pse_entry_type; 191 }; 192 193 #define GTT_TYPE_TABLE_ENTRY(type, e_type, npt_type, pse_type) \ 194 [type] = { \ 195 .entry_type = e_type, \ 196 .next_pt_type = npt_type, \ 197 .pse_entry_type = pse_type, \ 198 } 199 200 static struct gtt_type_table_entry gtt_type_table[] = { 201 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY, 202 GTT_TYPE_PPGTT_ROOT_L4_ENTRY, 203 GTT_TYPE_PPGTT_PML4_PT, 204 GTT_TYPE_INVALID), 205 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT, 206 GTT_TYPE_PPGTT_PML4_ENTRY, 207 GTT_TYPE_PPGTT_PDP_PT, 208 GTT_TYPE_INVALID), 209 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY, 210 GTT_TYPE_PPGTT_PML4_ENTRY, 211 GTT_TYPE_PPGTT_PDP_PT, 212 GTT_TYPE_INVALID), 213 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT, 214 GTT_TYPE_PPGTT_PDP_ENTRY, 215 GTT_TYPE_PPGTT_PDE_PT, 216 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 217 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY, 218 GTT_TYPE_PPGTT_ROOT_L3_ENTRY, 219 GTT_TYPE_PPGTT_PDE_PT, 220 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 221 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY, 222 GTT_TYPE_PPGTT_PDP_ENTRY, 223 GTT_TYPE_PPGTT_PDE_PT, 224 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 225 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT, 226 GTT_TYPE_PPGTT_PDE_ENTRY, 227 GTT_TYPE_PPGTT_PTE_PT, 228 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 229 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY, 230 GTT_TYPE_PPGTT_PDE_ENTRY, 231 GTT_TYPE_PPGTT_PTE_PT, 232 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 233 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT, 234 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 235 GTT_TYPE_INVALID, 236 GTT_TYPE_INVALID), 237 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY, 238 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 239 GTT_TYPE_INVALID, 240 GTT_TYPE_INVALID), 241 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY, 242 GTT_TYPE_PPGTT_PDE_ENTRY, 243 GTT_TYPE_INVALID, 244 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 245 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY, 246 GTT_TYPE_PPGTT_PDP_ENTRY, 247 GTT_TYPE_INVALID, 248 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 249 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE, 250 GTT_TYPE_GGTT_PTE, 251 GTT_TYPE_INVALID, 252 GTT_TYPE_INVALID), 253 }; 254 255 static inline int get_next_pt_type(int type) 256 { 257 return gtt_type_table[type].next_pt_type; 258 } 259 260 static inline int get_entry_type(int type) 261 { 262 return gtt_type_table[type].entry_type; 263 } 264 265 static inline int get_pse_type(int type) 266 { 267 return gtt_type_table[type].pse_entry_type; 268 } 269 270 static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index) 271 { 272 void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index; 273 u64 pte; 274 275 #ifdef readq 276 pte = readq(addr); 277 #else 278 pte = ioread32(addr); 279 pte |= ioread32(addr + 4) << 32; 280 #endif 281 return pte; 282 } 283 284 static void write_pte64(struct drm_i915_private *dev_priv, 285 unsigned long index, u64 pte) 286 { 287 void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index; 288 289 #ifdef writeq 290 writeq(pte, addr); 291 #else 292 iowrite32((u32)pte, addr); 293 iowrite32(pte >> 32, addr + 4); 294 #endif 295 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 296 POSTING_READ(GFX_FLSH_CNTL_GEN6); 297 } 298 299 static inline struct intel_gvt_gtt_entry *gtt_get_entry64(void *pt, 300 struct intel_gvt_gtt_entry *e, 301 unsigned long index, bool hypervisor_access, unsigned long gpa, 302 struct intel_vgpu *vgpu) 303 { 304 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 305 int ret; 306 307 if (WARN_ON(info->gtt_entry_size != 8)) 308 return e; 309 310 if (hypervisor_access) { 311 ret = intel_gvt_hypervisor_read_gpa(vgpu, gpa + 312 (index << info->gtt_entry_size_shift), 313 &e->val64, 8); 314 WARN_ON(ret); 315 } else if (!pt) { 316 e->val64 = read_pte64(vgpu->gvt->dev_priv, index); 317 } else { 318 e->val64 = *((u64 *)pt + index); 319 } 320 return e; 321 } 322 323 static inline struct intel_gvt_gtt_entry *gtt_set_entry64(void *pt, 324 struct intel_gvt_gtt_entry *e, 325 unsigned long index, bool hypervisor_access, unsigned long gpa, 326 struct intel_vgpu *vgpu) 327 { 328 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 329 int ret; 330 331 if (WARN_ON(info->gtt_entry_size != 8)) 332 return e; 333 334 if (hypervisor_access) { 335 ret = intel_gvt_hypervisor_write_gpa(vgpu, gpa + 336 (index << info->gtt_entry_size_shift), 337 &e->val64, 8); 338 WARN_ON(ret); 339 } else if (!pt) { 340 write_pte64(vgpu->gvt->dev_priv, index, e->val64); 341 } else { 342 *((u64 *)pt + index) = e->val64; 343 } 344 return e; 345 } 346 347 #define GTT_HAW 46 348 349 #define ADDR_1G_MASK (((1UL << (GTT_HAW - 30 + 1)) - 1) << 30) 350 #define ADDR_2M_MASK (((1UL << (GTT_HAW - 21 + 1)) - 1) << 21) 351 #define ADDR_4K_MASK (((1UL << (GTT_HAW - 12 + 1)) - 1) << 12) 352 353 static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) 354 { 355 unsigned long pfn; 356 357 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) 358 pfn = (e->val64 & ADDR_1G_MASK) >> 12; 359 else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) 360 pfn = (e->val64 & ADDR_2M_MASK) >> 12; 361 else 362 pfn = (e->val64 & ADDR_4K_MASK) >> 12; 363 return pfn; 364 } 365 366 static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn) 367 { 368 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { 369 e->val64 &= ~ADDR_1G_MASK; 370 pfn &= (ADDR_1G_MASK >> 12); 371 } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) { 372 e->val64 &= ~ADDR_2M_MASK; 373 pfn &= (ADDR_2M_MASK >> 12); 374 } else { 375 e->val64 &= ~ADDR_4K_MASK; 376 pfn &= (ADDR_4K_MASK >> 12); 377 } 378 379 e->val64 |= (pfn << 12); 380 } 381 382 static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e) 383 { 384 /* Entry doesn't have PSE bit. */ 385 if (get_pse_type(e->type) == GTT_TYPE_INVALID) 386 return false; 387 388 e->type = get_entry_type(e->type); 389 if (!(e->val64 & (1 << 7))) 390 return false; 391 392 e->type = get_pse_type(e->type); 393 return true; 394 } 395 396 static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e) 397 { 398 /* 399 * i915 writes PDP root pointer registers without present bit, 400 * it also works, so we need to treat root pointer entry 401 * specifically. 402 */ 403 if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY 404 || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) 405 return (e->val64 != 0); 406 else 407 return (e->val64 & (1 << 0)); 408 } 409 410 static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e) 411 { 412 e->val64 &= ~(1 << 0); 413 } 414 415 /* 416 * Per-platform GMA routines. 417 */ 418 static unsigned long gma_to_ggtt_pte_index(unsigned long gma) 419 { 420 unsigned long x = (gma >> GTT_PAGE_SHIFT); 421 422 trace_gma_index(__func__, gma, x); 423 return x; 424 } 425 426 #define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \ 427 static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \ 428 { \ 429 unsigned long x = (exp); \ 430 trace_gma_index(__func__, gma, x); \ 431 return x; \ 432 } 433 434 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff)); 435 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff)); 436 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3)); 437 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff)); 438 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff)); 439 440 static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = { 441 .get_entry = gtt_get_entry64, 442 .set_entry = gtt_set_entry64, 443 .clear_present = gtt_entry_clear_present, 444 .test_present = gen8_gtt_test_present, 445 .test_pse = gen8_gtt_test_pse, 446 .get_pfn = gen8_gtt_get_pfn, 447 .set_pfn = gen8_gtt_set_pfn, 448 }; 449 450 static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = { 451 .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index, 452 .gma_to_pte_index = gen8_gma_to_pte_index, 453 .gma_to_pde_index = gen8_gma_to_pde_index, 454 .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index, 455 .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index, 456 .gma_to_pml4_index = gen8_gma_to_pml4_index, 457 }; 458 459 static int gtt_entry_p2m(struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *p, 460 struct intel_gvt_gtt_entry *m) 461 { 462 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 463 unsigned long gfn, mfn; 464 465 *m = *p; 466 467 if (!ops->test_present(p)) 468 return 0; 469 470 gfn = ops->get_pfn(p); 471 472 mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn); 473 if (mfn == INTEL_GVT_INVALID_ADDR) { 474 gvt_err("fail to translate gfn: 0x%lx\n", gfn); 475 return -ENXIO; 476 } 477 478 ops->set_pfn(m, mfn); 479 return 0; 480 } 481 482 /* 483 * MM helpers. 484 */ 485 struct intel_gvt_gtt_entry *intel_vgpu_mm_get_entry(struct intel_vgpu_mm *mm, 486 void *page_table, struct intel_gvt_gtt_entry *e, 487 unsigned long index) 488 { 489 struct intel_gvt *gvt = mm->vgpu->gvt; 490 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 491 492 e->type = mm->page_table_entry_type; 493 494 ops->get_entry(page_table, e, index, false, 0, mm->vgpu); 495 ops->test_pse(e); 496 return e; 497 } 498 499 struct intel_gvt_gtt_entry *intel_vgpu_mm_set_entry(struct intel_vgpu_mm *mm, 500 void *page_table, struct intel_gvt_gtt_entry *e, 501 unsigned long index) 502 { 503 struct intel_gvt *gvt = mm->vgpu->gvt; 504 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 505 506 return ops->set_entry(page_table, e, index, false, 0, mm->vgpu); 507 } 508 509 /* 510 * PPGTT shadow page table helpers. 511 */ 512 static inline struct intel_gvt_gtt_entry *ppgtt_spt_get_entry( 513 struct intel_vgpu_ppgtt_spt *spt, 514 void *page_table, int type, 515 struct intel_gvt_gtt_entry *e, unsigned long index, 516 bool guest) 517 { 518 struct intel_gvt *gvt = spt->vgpu->gvt; 519 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 520 521 e->type = get_entry_type(type); 522 523 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) 524 return e; 525 526 ops->get_entry(page_table, e, index, guest, 527 spt->guest_page.gfn << GTT_PAGE_SHIFT, 528 spt->vgpu); 529 ops->test_pse(e); 530 return e; 531 } 532 533 static inline struct intel_gvt_gtt_entry *ppgtt_spt_set_entry( 534 struct intel_vgpu_ppgtt_spt *spt, 535 void *page_table, int type, 536 struct intel_gvt_gtt_entry *e, unsigned long index, 537 bool guest) 538 { 539 struct intel_gvt *gvt = spt->vgpu->gvt; 540 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 541 542 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) 543 return e; 544 545 return ops->set_entry(page_table, e, index, guest, 546 spt->guest_page.gfn << GTT_PAGE_SHIFT, 547 spt->vgpu); 548 } 549 550 #define ppgtt_get_guest_entry(spt, e, index) \ 551 ppgtt_spt_get_entry(spt, NULL, \ 552 spt->guest_page_type, e, index, true) 553 554 #define ppgtt_set_guest_entry(spt, e, index) \ 555 ppgtt_spt_set_entry(spt, NULL, \ 556 spt->guest_page_type, e, index, true) 557 558 #define ppgtt_get_shadow_entry(spt, e, index) \ 559 ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \ 560 spt->shadow_page.type, e, index, false) 561 562 #define ppgtt_set_shadow_entry(spt, e, index) \ 563 ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \ 564 spt->shadow_page.type, e, index, false) 565 566 /** 567 * intel_vgpu_init_guest_page - init a guest page data structure 568 * @vgpu: a vGPU 569 * @p: a guest page data structure 570 * @gfn: guest memory page frame number 571 * @handler: function will be called when target guest memory page has 572 * been modified. 573 * 574 * This function is called when user wants to track a guest memory page. 575 * 576 * Returns: 577 * Zero on success, negative error code if failed. 578 */ 579 int intel_vgpu_init_guest_page(struct intel_vgpu *vgpu, 580 struct intel_vgpu_guest_page *p, 581 unsigned long gfn, 582 int (*handler)(void *, u64, void *, int), 583 void *data) 584 { 585 INIT_HLIST_NODE(&p->node); 586 587 p->writeprotection = false; 588 p->gfn = gfn; 589 p->handler = handler; 590 p->data = data; 591 p->oos_page = NULL; 592 p->write_cnt = 0; 593 594 hash_add(vgpu->gtt.guest_page_hash_table, &p->node, p->gfn); 595 return 0; 596 } 597 598 static int detach_oos_page(struct intel_vgpu *vgpu, 599 struct intel_vgpu_oos_page *oos_page); 600 601 /** 602 * intel_vgpu_clean_guest_page - release the resource owned by guest page data 603 * structure 604 * @vgpu: a vGPU 605 * @p: a tracked guest page 606 * 607 * This function is called when user tries to stop tracking a guest memory 608 * page. 609 */ 610 void intel_vgpu_clean_guest_page(struct intel_vgpu *vgpu, 611 struct intel_vgpu_guest_page *p) 612 { 613 if (!hlist_unhashed(&p->node)) 614 hash_del(&p->node); 615 616 if (p->oos_page) 617 detach_oos_page(vgpu, p->oos_page); 618 619 if (p->writeprotection) 620 intel_gvt_hypervisor_unset_wp_page(vgpu, p); 621 } 622 623 /** 624 * intel_vgpu_find_guest_page - find a guest page data structure by GFN. 625 * @vgpu: a vGPU 626 * @gfn: guest memory page frame number 627 * 628 * This function is called when emulation logic wants to know if a trapped GFN 629 * is a tracked guest page. 630 * 631 * Returns: 632 * Pointer to guest page data structure, NULL if failed. 633 */ 634 struct intel_vgpu_guest_page *intel_vgpu_find_guest_page( 635 struct intel_vgpu *vgpu, unsigned long gfn) 636 { 637 struct intel_vgpu_guest_page *p; 638 639 hash_for_each_possible(vgpu->gtt.guest_page_hash_table, 640 p, node, gfn) { 641 if (p->gfn == gfn) 642 return p; 643 } 644 return NULL; 645 } 646 647 static inline int init_shadow_page(struct intel_vgpu *vgpu, 648 struct intel_vgpu_shadow_page *p, int type) 649 { 650 p->vaddr = page_address(p->page); 651 p->type = type; 652 653 INIT_HLIST_NODE(&p->node); 654 655 p->mfn = intel_gvt_hypervisor_virt_to_mfn(p->vaddr); 656 if (p->mfn == INTEL_GVT_INVALID_ADDR) 657 return -EFAULT; 658 659 hash_add(vgpu->gtt.shadow_page_hash_table, &p->node, p->mfn); 660 return 0; 661 } 662 663 static inline void clean_shadow_page(struct intel_vgpu_shadow_page *p) 664 { 665 if (!hlist_unhashed(&p->node)) 666 hash_del(&p->node); 667 } 668 669 static inline struct intel_vgpu_shadow_page *find_shadow_page( 670 struct intel_vgpu *vgpu, unsigned long mfn) 671 { 672 struct intel_vgpu_shadow_page *p; 673 674 hash_for_each_possible(vgpu->gtt.shadow_page_hash_table, 675 p, node, mfn) { 676 if (p->mfn == mfn) 677 return p; 678 } 679 return NULL; 680 } 681 682 #define guest_page_to_ppgtt_spt(ptr) \ 683 container_of(ptr, struct intel_vgpu_ppgtt_spt, guest_page) 684 685 #define shadow_page_to_ppgtt_spt(ptr) \ 686 container_of(ptr, struct intel_vgpu_ppgtt_spt, shadow_page) 687 688 static void *alloc_spt(gfp_t gfp_mask) 689 { 690 struct intel_vgpu_ppgtt_spt *spt; 691 692 spt = kzalloc(sizeof(*spt), gfp_mask); 693 if (!spt) 694 return NULL; 695 696 spt->shadow_page.page = alloc_page(gfp_mask); 697 if (!spt->shadow_page.page) { 698 kfree(spt); 699 return NULL; 700 } 701 return spt; 702 } 703 704 static void free_spt(struct intel_vgpu_ppgtt_spt *spt) 705 { 706 __free_page(spt->shadow_page.page); 707 kfree(spt); 708 } 709 710 static void ppgtt_free_shadow_page(struct intel_vgpu_ppgtt_spt *spt) 711 { 712 trace_spt_free(spt->vgpu->id, spt, spt->shadow_page.type); 713 714 clean_shadow_page(&spt->shadow_page); 715 intel_vgpu_clean_guest_page(spt->vgpu, &spt->guest_page); 716 list_del_init(&spt->post_shadow_list); 717 718 free_spt(spt); 719 } 720 721 static void ppgtt_free_all_shadow_page(struct intel_vgpu *vgpu) 722 { 723 struct hlist_node *n; 724 struct intel_vgpu_shadow_page *sp; 725 int i; 726 727 hash_for_each_safe(vgpu->gtt.shadow_page_hash_table, i, n, sp, node) 728 ppgtt_free_shadow_page(shadow_page_to_ppgtt_spt(sp)); 729 } 730 731 static int ppgtt_handle_guest_write_page_table_bytes(void *gp, 732 u64 pa, void *p_data, int bytes); 733 734 static int ppgtt_write_protection_handler(void *gp, u64 pa, 735 void *p_data, int bytes) 736 { 737 struct intel_vgpu_guest_page *gpt = (struct intel_vgpu_guest_page *)gp; 738 int ret; 739 740 if (bytes != 4 && bytes != 8) 741 return -EINVAL; 742 743 if (!gpt->writeprotection) 744 return -EINVAL; 745 746 ret = ppgtt_handle_guest_write_page_table_bytes(gp, 747 pa, p_data, bytes); 748 if (ret) 749 return ret; 750 return ret; 751 } 752 753 static int reclaim_one_mm(struct intel_gvt *gvt); 754 755 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_shadow_page( 756 struct intel_vgpu *vgpu, int type, unsigned long gfn) 757 { 758 struct intel_vgpu_ppgtt_spt *spt = NULL; 759 int ret; 760 761 retry: 762 spt = alloc_spt(GFP_KERNEL | __GFP_ZERO); 763 if (!spt) { 764 if (reclaim_one_mm(vgpu->gvt)) 765 goto retry; 766 767 gvt_err("fail to allocate ppgtt shadow page\n"); 768 return ERR_PTR(-ENOMEM); 769 } 770 771 spt->vgpu = vgpu; 772 spt->guest_page_type = type; 773 atomic_set(&spt->refcount, 1); 774 INIT_LIST_HEAD(&spt->post_shadow_list); 775 776 /* 777 * TODO: guest page type may be different with shadow page type, 778 * when we support PSE page in future. 779 */ 780 ret = init_shadow_page(vgpu, &spt->shadow_page, type); 781 if (ret) { 782 gvt_err("fail to initialize shadow page for spt\n"); 783 goto err; 784 } 785 786 ret = intel_vgpu_init_guest_page(vgpu, &spt->guest_page, 787 gfn, ppgtt_write_protection_handler, NULL); 788 if (ret) { 789 gvt_err("fail to initialize guest page for spt\n"); 790 goto err; 791 } 792 793 trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); 794 return spt; 795 err: 796 ppgtt_free_shadow_page(spt); 797 return ERR_PTR(ret); 798 } 799 800 static struct intel_vgpu_ppgtt_spt *ppgtt_find_shadow_page( 801 struct intel_vgpu *vgpu, unsigned long mfn) 802 { 803 struct intel_vgpu_shadow_page *p = find_shadow_page(vgpu, mfn); 804 805 if (p) 806 return shadow_page_to_ppgtt_spt(p); 807 808 gvt_err("vgpu%d: fail to find ppgtt shadow page: 0x%lx\n", 809 vgpu->id, mfn); 810 return NULL; 811 } 812 813 #define pt_entry_size_shift(spt) \ 814 ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift) 815 816 #define pt_entries(spt) \ 817 (GTT_PAGE_SIZE >> pt_entry_size_shift(spt)) 818 819 #define for_each_present_guest_entry(spt, e, i) \ 820 for (i = 0; i < pt_entries(spt); i++) \ 821 if (spt->vgpu->gvt->gtt.pte_ops->test_present( \ 822 ppgtt_get_guest_entry(spt, e, i))) 823 824 #define for_each_present_shadow_entry(spt, e, i) \ 825 for (i = 0; i < pt_entries(spt); i++) \ 826 if (spt->vgpu->gvt->gtt.pte_ops->test_present( \ 827 ppgtt_get_shadow_entry(spt, e, i))) 828 829 static void ppgtt_get_shadow_page(struct intel_vgpu_ppgtt_spt *spt) 830 { 831 int v = atomic_read(&spt->refcount); 832 833 trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1)); 834 835 atomic_inc(&spt->refcount); 836 } 837 838 static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt); 839 840 static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu, 841 struct intel_gvt_gtt_entry *e) 842 { 843 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 844 struct intel_vgpu_ppgtt_spt *s; 845 846 if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(e->type)))) 847 return -EINVAL; 848 849 if (ops->get_pfn(e) == vgpu->gtt.scratch_page_mfn) 850 return 0; 851 852 s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e)); 853 if (!s) { 854 gvt_err("vgpu%d: fail to find shadow page: mfn: 0x%lx\n", 855 vgpu->id, ops->get_pfn(e)); 856 return -ENXIO; 857 } 858 return ppgtt_invalidate_shadow_page(s); 859 } 860 861 static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) 862 { 863 struct intel_gvt_gtt_entry e; 864 unsigned long index; 865 int ret; 866 int v = atomic_read(&spt->refcount); 867 868 trace_spt_change(spt->vgpu->id, "die", spt, 869 spt->guest_page.gfn, spt->shadow_page.type); 870 871 trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1)); 872 873 if (atomic_dec_return(&spt->refcount) > 0) 874 return 0; 875 876 if (gtt_type_is_pte_pt(spt->shadow_page.type)) 877 goto release; 878 879 for_each_present_shadow_entry(spt, &e, index) { 880 if (!gtt_type_is_pt(get_next_pt_type(e.type))) { 881 gvt_err("GVT doesn't support pse bit for now\n"); 882 return -EINVAL; 883 } 884 ret = ppgtt_invalidate_shadow_page_by_shadow_entry( 885 spt->vgpu, &e); 886 if (ret) 887 goto fail; 888 } 889 release: 890 trace_spt_change(spt->vgpu->id, "release", spt, 891 spt->guest_page.gfn, spt->shadow_page.type); 892 ppgtt_free_shadow_page(spt); 893 return 0; 894 fail: 895 gvt_err("vgpu%d: fail: shadow page %p shadow entry 0x%llx type %d\n", 896 spt->vgpu->id, spt, e.val64, e.type); 897 return ret; 898 } 899 900 static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt); 901 902 static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry( 903 struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we) 904 { 905 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 906 struct intel_vgpu_ppgtt_spt *s = NULL; 907 struct intel_vgpu_guest_page *g; 908 int ret; 909 910 if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(we->type)))) { 911 ret = -EINVAL; 912 goto fail; 913 } 914 915 g = intel_vgpu_find_guest_page(vgpu, ops->get_pfn(we)); 916 if (g) { 917 s = guest_page_to_ppgtt_spt(g); 918 ppgtt_get_shadow_page(s); 919 } else { 920 int type = get_next_pt_type(we->type); 921 922 s = ppgtt_alloc_shadow_page(vgpu, type, ops->get_pfn(we)); 923 if (IS_ERR(s)) { 924 ret = PTR_ERR(s); 925 goto fail; 926 } 927 928 ret = intel_gvt_hypervisor_set_wp_page(vgpu, &s->guest_page); 929 if (ret) 930 goto fail; 931 932 ret = ppgtt_populate_shadow_page(s); 933 if (ret) 934 goto fail; 935 936 trace_spt_change(vgpu->id, "new", s, s->guest_page.gfn, 937 s->shadow_page.type); 938 } 939 return s; 940 fail: 941 gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n", 942 vgpu->id, s, we->val64, we->type); 943 return ERR_PTR(ret); 944 } 945 946 static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, 947 struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge) 948 { 949 struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops; 950 951 se->type = ge->type; 952 se->val64 = ge->val64; 953 954 ops->set_pfn(se, s->shadow_page.mfn); 955 } 956 957 static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) 958 { 959 struct intel_vgpu *vgpu = spt->vgpu; 960 struct intel_vgpu_ppgtt_spt *s; 961 struct intel_gvt_gtt_entry se, ge; 962 unsigned long i; 963 int ret; 964 965 trace_spt_change(spt->vgpu->id, "born", spt, 966 spt->guest_page.gfn, spt->shadow_page.type); 967 968 if (gtt_type_is_pte_pt(spt->shadow_page.type)) { 969 for_each_present_guest_entry(spt, &ge, i) { 970 ret = gtt_entry_p2m(vgpu, &ge, &se); 971 if (ret) 972 goto fail; 973 ppgtt_set_shadow_entry(spt, &se, i); 974 } 975 return 0; 976 } 977 978 for_each_present_guest_entry(spt, &ge, i) { 979 if (!gtt_type_is_pt(get_next_pt_type(ge.type))) { 980 gvt_err("GVT doesn't support pse bit now\n"); 981 ret = -EINVAL; 982 goto fail; 983 } 984 985 s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge); 986 if (IS_ERR(s)) { 987 ret = PTR_ERR(s); 988 goto fail; 989 } 990 ppgtt_get_shadow_entry(spt, &se, i); 991 ppgtt_generate_shadow_entry(&se, s, &ge); 992 ppgtt_set_shadow_entry(spt, &se, i); 993 } 994 return 0; 995 fail: 996 gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n", 997 vgpu->id, spt, ge.val64, ge.type); 998 return ret; 999 } 1000 1001 static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, 1002 struct intel_gvt_gtt_entry *we, unsigned long index) 1003 { 1004 struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); 1005 struct intel_vgpu_shadow_page *sp = &spt->shadow_page; 1006 struct intel_vgpu *vgpu = spt->vgpu; 1007 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1008 struct intel_gvt_gtt_entry e; 1009 int ret; 1010 1011 trace_gpt_change(spt->vgpu->id, "remove", spt, sp->type, 1012 we->val64, index); 1013 1014 ppgtt_get_shadow_entry(spt, &e, index); 1015 if (!ops->test_present(&e)) 1016 return 0; 1017 1018 if (ops->get_pfn(&e) == vgpu->gtt.scratch_page_mfn) 1019 return 0; 1020 1021 if (gtt_type_is_pt(get_next_pt_type(we->type))) { 1022 struct intel_vgpu_guest_page *g = 1023 intel_vgpu_find_guest_page(vgpu, ops->get_pfn(we)); 1024 if (!g) { 1025 gvt_err("fail to find guest page\n"); 1026 ret = -ENXIO; 1027 goto fail; 1028 } 1029 ret = ppgtt_invalidate_shadow_page(guest_page_to_ppgtt_spt(g)); 1030 if (ret) 1031 goto fail; 1032 } 1033 ops->set_pfn(&e, vgpu->gtt.scratch_page_mfn); 1034 ppgtt_set_shadow_entry(spt, &e, index); 1035 return 0; 1036 fail: 1037 gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n", 1038 vgpu->id, spt, we->val64, we->type); 1039 return ret; 1040 } 1041 1042 static int ppgtt_handle_guest_entry_add(struct intel_vgpu_guest_page *gpt, 1043 struct intel_gvt_gtt_entry *we, unsigned long index) 1044 { 1045 struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); 1046 struct intel_vgpu_shadow_page *sp = &spt->shadow_page; 1047 struct intel_vgpu *vgpu = spt->vgpu; 1048 struct intel_gvt_gtt_entry m; 1049 struct intel_vgpu_ppgtt_spt *s; 1050 int ret; 1051 1052 trace_gpt_change(spt->vgpu->id, "add", spt, sp->type, 1053 we->val64, index); 1054 1055 if (gtt_type_is_pt(get_next_pt_type(we->type))) { 1056 s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, we); 1057 if (IS_ERR(s)) { 1058 ret = PTR_ERR(s); 1059 goto fail; 1060 } 1061 ppgtt_get_shadow_entry(spt, &m, index); 1062 ppgtt_generate_shadow_entry(&m, s, we); 1063 ppgtt_set_shadow_entry(spt, &m, index); 1064 } else { 1065 ret = gtt_entry_p2m(vgpu, we, &m); 1066 if (ret) 1067 goto fail; 1068 ppgtt_set_shadow_entry(spt, &m, index); 1069 } 1070 return 0; 1071 fail: 1072 gvt_err("vgpu%d: fail: spt %p guest entry 0x%llx type %d\n", vgpu->id, 1073 spt, we->val64, we->type); 1074 return ret; 1075 } 1076 1077 static int sync_oos_page(struct intel_vgpu *vgpu, 1078 struct intel_vgpu_oos_page *oos_page) 1079 { 1080 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1081 struct intel_gvt *gvt = vgpu->gvt; 1082 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 1083 struct intel_vgpu_ppgtt_spt *spt = 1084 guest_page_to_ppgtt_spt(oos_page->guest_page); 1085 struct intel_gvt_gtt_entry old, new, m; 1086 int index; 1087 int ret; 1088 1089 trace_oos_change(vgpu->id, "sync", oos_page->id, 1090 oos_page->guest_page, spt->guest_page_type); 1091 1092 old.type = new.type = get_entry_type(spt->guest_page_type); 1093 old.val64 = new.val64 = 0; 1094 1095 for (index = 0; index < (GTT_PAGE_SIZE >> info->gtt_entry_size_shift); 1096 index++) { 1097 ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu); 1098 ops->get_entry(NULL, &new, index, true, 1099 oos_page->guest_page->gfn << PAGE_SHIFT, vgpu); 1100 1101 if (old.val64 == new.val64 1102 && !test_and_clear_bit(index, spt->post_shadow_bitmap)) 1103 continue; 1104 1105 trace_oos_sync(vgpu->id, oos_page->id, 1106 oos_page->guest_page, spt->guest_page_type, 1107 new.val64, index); 1108 1109 ret = gtt_entry_p2m(vgpu, &new, &m); 1110 if (ret) 1111 return ret; 1112 1113 ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu); 1114 ppgtt_set_shadow_entry(spt, &m, index); 1115 } 1116 1117 oos_page->guest_page->write_cnt = 0; 1118 list_del_init(&spt->post_shadow_list); 1119 return 0; 1120 } 1121 1122 static int detach_oos_page(struct intel_vgpu *vgpu, 1123 struct intel_vgpu_oos_page *oos_page) 1124 { 1125 struct intel_gvt *gvt = vgpu->gvt; 1126 struct intel_vgpu_ppgtt_spt *spt = 1127 guest_page_to_ppgtt_spt(oos_page->guest_page); 1128 1129 trace_oos_change(vgpu->id, "detach", oos_page->id, 1130 oos_page->guest_page, spt->guest_page_type); 1131 1132 oos_page->guest_page->write_cnt = 0; 1133 oos_page->guest_page->oos_page = NULL; 1134 oos_page->guest_page = NULL; 1135 1136 list_del_init(&oos_page->vm_list); 1137 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head); 1138 1139 return 0; 1140 } 1141 1142 static int attach_oos_page(struct intel_vgpu *vgpu, 1143 struct intel_vgpu_oos_page *oos_page, 1144 struct intel_vgpu_guest_page *gpt) 1145 { 1146 struct intel_gvt *gvt = vgpu->gvt; 1147 int ret; 1148 1149 ret = intel_gvt_hypervisor_read_gpa(vgpu, gpt->gfn << GTT_PAGE_SHIFT, 1150 oos_page->mem, GTT_PAGE_SIZE); 1151 if (ret) 1152 return ret; 1153 1154 oos_page->guest_page = gpt; 1155 gpt->oos_page = oos_page; 1156 1157 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head); 1158 1159 trace_oos_change(vgpu->id, "attach", gpt->oos_page->id, 1160 gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type); 1161 return 0; 1162 } 1163 1164 static int ppgtt_set_guest_page_sync(struct intel_vgpu *vgpu, 1165 struct intel_vgpu_guest_page *gpt) 1166 { 1167 int ret; 1168 1169 ret = intel_gvt_hypervisor_set_wp_page(vgpu, gpt); 1170 if (ret) 1171 return ret; 1172 1173 trace_oos_change(vgpu->id, "set page sync", gpt->oos_page->id, 1174 gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type); 1175 1176 list_del_init(&gpt->oos_page->vm_list); 1177 return sync_oos_page(vgpu, gpt->oos_page); 1178 } 1179 1180 static int ppgtt_allocate_oos_page(struct intel_vgpu *vgpu, 1181 struct intel_vgpu_guest_page *gpt) 1182 { 1183 struct intel_gvt *gvt = vgpu->gvt; 1184 struct intel_gvt_gtt *gtt = &gvt->gtt; 1185 struct intel_vgpu_oos_page *oos_page = gpt->oos_page; 1186 int ret; 1187 1188 WARN(oos_page, "shadow PPGTT page has already has a oos page\n"); 1189 1190 if (list_empty(>t->oos_page_free_list_head)) { 1191 oos_page = container_of(gtt->oos_page_use_list_head.next, 1192 struct intel_vgpu_oos_page, list); 1193 ret = ppgtt_set_guest_page_sync(vgpu, oos_page->guest_page); 1194 if (ret) 1195 return ret; 1196 ret = detach_oos_page(vgpu, oos_page); 1197 if (ret) 1198 return ret; 1199 } else 1200 oos_page = container_of(gtt->oos_page_free_list_head.next, 1201 struct intel_vgpu_oos_page, list); 1202 return attach_oos_page(vgpu, oos_page, gpt); 1203 } 1204 1205 static int ppgtt_set_guest_page_oos(struct intel_vgpu *vgpu, 1206 struct intel_vgpu_guest_page *gpt) 1207 { 1208 struct intel_vgpu_oos_page *oos_page = gpt->oos_page; 1209 1210 if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n")) 1211 return -EINVAL; 1212 1213 trace_oos_change(vgpu->id, "set page out of sync", gpt->oos_page->id, 1214 gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type); 1215 1216 list_add_tail(&oos_page->vm_list, &vgpu->gtt.oos_page_list_head); 1217 return intel_gvt_hypervisor_unset_wp_page(vgpu, gpt); 1218 } 1219 1220 /** 1221 * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU 1222 * @vgpu: a vGPU 1223 * 1224 * This function is called before submitting a guest workload to host, 1225 * to sync all the out-of-synced shadow for vGPU 1226 * 1227 * Returns: 1228 * Zero on success, negative error code if failed. 1229 */ 1230 int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu) 1231 { 1232 struct list_head *pos, *n; 1233 struct intel_vgpu_oos_page *oos_page; 1234 int ret; 1235 1236 if (!enable_out_of_sync) 1237 return 0; 1238 1239 list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) { 1240 oos_page = container_of(pos, 1241 struct intel_vgpu_oos_page, vm_list); 1242 ret = ppgtt_set_guest_page_sync(vgpu, oos_page->guest_page); 1243 if (ret) 1244 return ret; 1245 } 1246 return 0; 1247 } 1248 1249 /* 1250 * The heart of PPGTT shadow page table. 1251 */ 1252 static int ppgtt_handle_guest_write_page_table( 1253 struct intel_vgpu_guest_page *gpt, 1254 struct intel_gvt_gtt_entry *we, unsigned long index) 1255 { 1256 struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); 1257 struct intel_vgpu *vgpu = spt->vgpu; 1258 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1259 struct intel_gvt_gtt_entry ge; 1260 1261 int old_present, new_present; 1262 int ret; 1263 1264 ppgtt_get_guest_entry(spt, &ge, index); 1265 1266 old_present = ops->test_present(&ge); 1267 new_present = ops->test_present(we); 1268 1269 ppgtt_set_guest_entry(spt, we, index); 1270 1271 if (old_present) { 1272 ret = ppgtt_handle_guest_entry_removal(gpt, &ge, index); 1273 if (ret) 1274 goto fail; 1275 } 1276 if (new_present) { 1277 ret = ppgtt_handle_guest_entry_add(gpt, we, index); 1278 if (ret) 1279 goto fail; 1280 } 1281 return 0; 1282 fail: 1283 gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d.\n", 1284 vgpu->id, spt, we->val64, we->type); 1285 return ret; 1286 } 1287 1288 static inline bool can_do_out_of_sync(struct intel_vgpu_guest_page *gpt) 1289 { 1290 return enable_out_of_sync 1291 && gtt_type_is_pte_pt( 1292 guest_page_to_ppgtt_spt(gpt)->guest_page_type) 1293 && gpt->write_cnt >= 2; 1294 } 1295 1296 static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt, 1297 unsigned long index) 1298 { 1299 set_bit(index, spt->post_shadow_bitmap); 1300 if (!list_empty(&spt->post_shadow_list)) 1301 return; 1302 1303 list_add_tail(&spt->post_shadow_list, 1304 &spt->vgpu->gtt.post_shadow_list_head); 1305 } 1306 1307 /** 1308 * intel_vgpu_flush_post_shadow - flush the post shadow transactions 1309 * @vgpu: a vGPU 1310 * 1311 * This function is called before submitting a guest workload to host, 1312 * to flush all the post shadows for a vGPU. 1313 * 1314 * Returns: 1315 * Zero on success, negative error code if failed. 1316 */ 1317 int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) 1318 { 1319 struct list_head *pos, *n; 1320 struct intel_vgpu_ppgtt_spt *spt; 1321 struct intel_gvt_gtt_entry ge, e; 1322 unsigned long index; 1323 int ret; 1324 1325 list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) { 1326 spt = container_of(pos, struct intel_vgpu_ppgtt_spt, 1327 post_shadow_list); 1328 1329 for_each_set_bit(index, spt->post_shadow_bitmap, 1330 GTT_ENTRY_NUM_IN_ONE_PAGE) { 1331 ppgtt_get_guest_entry(spt, &ge, index); 1332 e = ge; 1333 e.val64 = 0; 1334 ppgtt_set_guest_entry(spt, &e, index); 1335 1336 ret = ppgtt_handle_guest_write_page_table( 1337 &spt->guest_page, &ge, index); 1338 if (ret) 1339 return ret; 1340 clear_bit(index, spt->post_shadow_bitmap); 1341 } 1342 list_del_init(&spt->post_shadow_list); 1343 } 1344 return 0; 1345 } 1346 1347 static int ppgtt_handle_guest_write_page_table_bytes(void *gp, 1348 u64 pa, void *p_data, int bytes) 1349 { 1350 struct intel_vgpu_guest_page *gpt = (struct intel_vgpu_guest_page *)gp; 1351 struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); 1352 struct intel_vgpu *vgpu = spt->vgpu; 1353 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1354 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1355 struct intel_gvt_gtt_entry we; 1356 unsigned long index; 1357 int ret; 1358 1359 index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift; 1360 1361 ppgtt_get_guest_entry(spt, &we, index); 1362 memcpy((void *)&we.val64 + (pa & (info->gtt_entry_size - 1)), 1363 p_data, bytes); 1364 1365 ops->test_pse(&we); 1366 1367 if (bytes == info->gtt_entry_size) { 1368 ret = ppgtt_handle_guest_write_page_table(gpt, &we, index); 1369 if (ret) 1370 return ret; 1371 } else { 1372 struct intel_gvt_gtt_entry ge; 1373 1374 ppgtt_get_guest_entry(spt, &ge, index); 1375 1376 if (!test_bit(index, spt->post_shadow_bitmap)) { 1377 ret = ppgtt_handle_guest_entry_removal(gpt, 1378 &ge, index); 1379 if (ret) 1380 return ret; 1381 } 1382 1383 ppgtt_set_post_shadow(spt, index); 1384 ppgtt_set_guest_entry(spt, &we, index); 1385 } 1386 1387 if (!enable_out_of_sync) 1388 return 0; 1389 1390 gpt->write_cnt++; 1391 1392 if (gpt->oos_page) 1393 ops->set_entry(gpt->oos_page->mem, &we, index, 1394 false, 0, vgpu); 1395 1396 if (can_do_out_of_sync(gpt)) { 1397 if (!gpt->oos_page) 1398 ppgtt_allocate_oos_page(vgpu, gpt); 1399 1400 ret = ppgtt_set_guest_page_oos(vgpu, gpt); 1401 if (ret < 0) 1402 return ret; 1403 } 1404 return 0; 1405 } 1406 1407 /* 1408 * mm page table allocation policy for bdw+ 1409 * - for ggtt, only virtual page table will be allocated. 1410 * - for ppgtt, dedicated virtual/shadow page table will be allocated. 1411 */ 1412 static int gen8_mm_alloc_page_table(struct intel_vgpu_mm *mm) 1413 { 1414 struct intel_vgpu *vgpu = mm->vgpu; 1415 struct intel_gvt *gvt = vgpu->gvt; 1416 const struct intel_gvt_device_info *info = &gvt->device_info; 1417 void *mem; 1418 1419 if (mm->type == INTEL_GVT_MM_PPGTT) { 1420 mm->page_table_entry_cnt = 4; 1421 mm->page_table_entry_size = mm->page_table_entry_cnt * 1422 info->gtt_entry_size; 1423 mem = kzalloc(mm->has_shadow_page_table ? 1424 mm->page_table_entry_size * 2 1425 : mm->page_table_entry_size, 1426 GFP_ATOMIC); 1427 if (!mem) 1428 return -ENOMEM; 1429 mm->virtual_page_table = mem; 1430 if (!mm->has_shadow_page_table) 1431 return 0; 1432 mm->shadow_page_table = mem + mm->page_table_entry_size; 1433 } else if (mm->type == INTEL_GVT_MM_GGTT) { 1434 mm->page_table_entry_cnt = 1435 (gvt_ggtt_gm_sz(gvt) >> GTT_PAGE_SHIFT); 1436 mm->page_table_entry_size = mm->page_table_entry_cnt * 1437 info->gtt_entry_size; 1438 mem = vzalloc(mm->page_table_entry_size); 1439 if (!mem) 1440 return -ENOMEM; 1441 mm->virtual_page_table = mem; 1442 } 1443 return 0; 1444 } 1445 1446 static void gen8_mm_free_page_table(struct intel_vgpu_mm *mm) 1447 { 1448 if (mm->type == INTEL_GVT_MM_PPGTT) { 1449 kfree(mm->virtual_page_table); 1450 } else if (mm->type == INTEL_GVT_MM_GGTT) { 1451 if (mm->virtual_page_table) 1452 vfree(mm->virtual_page_table); 1453 } 1454 mm->virtual_page_table = mm->shadow_page_table = NULL; 1455 } 1456 1457 static void invalidate_mm(struct intel_vgpu_mm *mm) 1458 { 1459 struct intel_vgpu *vgpu = mm->vgpu; 1460 struct intel_gvt *gvt = vgpu->gvt; 1461 struct intel_gvt_gtt *gtt = &gvt->gtt; 1462 struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; 1463 struct intel_gvt_gtt_entry se; 1464 int i; 1465 1466 if (WARN_ON(!mm->has_shadow_page_table || !mm->shadowed)) 1467 return; 1468 1469 for (i = 0; i < mm->page_table_entry_cnt; i++) { 1470 ppgtt_get_shadow_root_entry(mm, &se, i); 1471 if (!ops->test_present(&se)) 1472 continue; 1473 ppgtt_invalidate_shadow_page_by_shadow_entry( 1474 vgpu, &se); 1475 se.val64 = 0; 1476 ppgtt_set_shadow_root_entry(mm, &se, i); 1477 1478 trace_gpt_change(vgpu->id, "destroy root pointer", 1479 NULL, se.type, se.val64, i); 1480 } 1481 mm->shadowed = false; 1482 } 1483 1484 /** 1485 * intel_vgpu_destroy_mm - destroy a mm object 1486 * @mm: a kref object 1487 * 1488 * This function is used to destroy a mm object for vGPU 1489 * 1490 */ 1491 void intel_vgpu_destroy_mm(struct kref *mm_ref) 1492 { 1493 struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref); 1494 struct intel_vgpu *vgpu = mm->vgpu; 1495 struct intel_gvt *gvt = vgpu->gvt; 1496 struct intel_gvt_gtt *gtt = &gvt->gtt; 1497 1498 if (!mm->initialized) 1499 goto out; 1500 1501 list_del(&mm->list); 1502 list_del(&mm->lru_list); 1503 1504 if (mm->has_shadow_page_table) 1505 invalidate_mm(mm); 1506 1507 gtt->mm_free_page_table(mm); 1508 out: 1509 kfree(mm); 1510 } 1511 1512 static int shadow_mm(struct intel_vgpu_mm *mm) 1513 { 1514 struct intel_vgpu *vgpu = mm->vgpu; 1515 struct intel_gvt *gvt = vgpu->gvt; 1516 struct intel_gvt_gtt *gtt = &gvt->gtt; 1517 struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; 1518 struct intel_vgpu_ppgtt_spt *spt; 1519 struct intel_gvt_gtt_entry ge, se; 1520 int i; 1521 int ret; 1522 1523 if (WARN_ON(!mm->has_shadow_page_table || mm->shadowed)) 1524 return 0; 1525 1526 mm->shadowed = true; 1527 1528 for (i = 0; i < mm->page_table_entry_cnt; i++) { 1529 ppgtt_get_guest_root_entry(mm, &ge, i); 1530 if (!ops->test_present(&ge)) 1531 continue; 1532 1533 trace_gpt_change(vgpu->id, __func__, NULL, 1534 ge.type, ge.val64, i); 1535 1536 spt = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge); 1537 if (IS_ERR(spt)) { 1538 gvt_err("fail to populate guest root pointer\n"); 1539 ret = PTR_ERR(spt); 1540 goto fail; 1541 } 1542 ppgtt_generate_shadow_entry(&se, spt, &ge); 1543 ppgtt_set_shadow_root_entry(mm, &se, i); 1544 1545 trace_gpt_change(vgpu->id, "populate root pointer", 1546 NULL, se.type, se.val64, i); 1547 } 1548 return 0; 1549 fail: 1550 invalidate_mm(mm); 1551 return ret; 1552 } 1553 1554 /** 1555 * intel_vgpu_create_mm - create a mm object for a vGPU 1556 * @vgpu: a vGPU 1557 * @mm_type: mm object type, should be PPGTT or GGTT 1558 * @virtual_page_table: page table root pointers. Could be NULL if user wants 1559 * to populate shadow later. 1560 * @page_table_level: describe the page table level of the mm object 1561 * @pde_base_index: pde root pointer base in GGTT MMIO. 1562 * 1563 * This function is used to create a mm object for a vGPU. 1564 * 1565 * Returns: 1566 * Zero on success, negative error code in pointer if failed. 1567 */ 1568 struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu, 1569 int mm_type, void *virtual_page_table, int page_table_level, 1570 u32 pde_base_index) 1571 { 1572 struct intel_gvt *gvt = vgpu->gvt; 1573 struct intel_gvt_gtt *gtt = &gvt->gtt; 1574 struct intel_vgpu_mm *mm; 1575 int ret; 1576 1577 mm = kzalloc(sizeof(*mm), GFP_ATOMIC); 1578 if (!mm) { 1579 ret = -ENOMEM; 1580 goto fail; 1581 } 1582 1583 mm->type = mm_type; 1584 1585 if (page_table_level == 1) 1586 mm->page_table_entry_type = GTT_TYPE_GGTT_PTE; 1587 else if (page_table_level == 3) 1588 mm->page_table_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY; 1589 else if (page_table_level == 4) 1590 mm->page_table_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY; 1591 else { 1592 WARN_ON(1); 1593 ret = -EINVAL; 1594 goto fail; 1595 } 1596 1597 mm->page_table_level = page_table_level; 1598 mm->pde_base_index = pde_base_index; 1599 1600 mm->vgpu = vgpu; 1601 mm->has_shadow_page_table = !!(mm_type == INTEL_GVT_MM_PPGTT); 1602 1603 kref_init(&mm->ref); 1604 atomic_set(&mm->pincount, 0); 1605 INIT_LIST_HEAD(&mm->list); 1606 INIT_LIST_HEAD(&mm->lru_list); 1607 list_add_tail(&mm->list, &vgpu->gtt.mm_list_head); 1608 1609 ret = gtt->mm_alloc_page_table(mm); 1610 if (ret) { 1611 gvt_err("fail to allocate page table for mm\n"); 1612 goto fail; 1613 } 1614 1615 mm->initialized = true; 1616 1617 if (virtual_page_table) 1618 memcpy(mm->virtual_page_table, virtual_page_table, 1619 mm->page_table_entry_size); 1620 1621 if (mm->has_shadow_page_table) { 1622 ret = shadow_mm(mm); 1623 if (ret) 1624 goto fail; 1625 list_add_tail(&mm->lru_list, &gvt->gtt.mm_lru_list_head); 1626 } 1627 return mm; 1628 fail: 1629 gvt_err("fail to create mm\n"); 1630 if (mm) 1631 intel_gvt_mm_unreference(mm); 1632 return ERR_PTR(ret); 1633 } 1634 1635 /** 1636 * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object 1637 * @mm: a vGPU mm object 1638 * 1639 * This function is called when user doesn't want to use a vGPU mm object 1640 */ 1641 void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm) 1642 { 1643 if (WARN_ON(mm->type != INTEL_GVT_MM_PPGTT)) 1644 return; 1645 1646 atomic_dec(&mm->pincount); 1647 } 1648 1649 /** 1650 * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object 1651 * @vgpu: a vGPU 1652 * 1653 * This function is called when user wants to use a vGPU mm object. If this 1654 * mm object hasn't been shadowed yet, the shadow will be populated at this 1655 * time. 1656 * 1657 * Returns: 1658 * Zero on success, negative error code if failed. 1659 */ 1660 int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm) 1661 { 1662 int ret; 1663 1664 if (WARN_ON(mm->type != INTEL_GVT_MM_PPGTT)) 1665 return 0; 1666 1667 atomic_inc(&mm->pincount); 1668 1669 if (!mm->shadowed) { 1670 ret = shadow_mm(mm); 1671 if (ret) 1672 return ret; 1673 } 1674 1675 list_del_init(&mm->lru_list); 1676 list_add_tail(&mm->lru_list, &mm->vgpu->gvt->gtt.mm_lru_list_head); 1677 return 0; 1678 } 1679 1680 static int reclaim_one_mm(struct intel_gvt *gvt) 1681 { 1682 struct intel_vgpu_mm *mm; 1683 struct list_head *pos, *n; 1684 1685 list_for_each_safe(pos, n, &gvt->gtt.mm_lru_list_head) { 1686 mm = container_of(pos, struct intel_vgpu_mm, lru_list); 1687 1688 if (mm->type != INTEL_GVT_MM_PPGTT) 1689 continue; 1690 if (atomic_read(&mm->pincount)) 1691 continue; 1692 1693 list_del_init(&mm->lru_list); 1694 invalidate_mm(mm); 1695 return 1; 1696 } 1697 return 0; 1698 } 1699 1700 /* 1701 * GMA translation APIs. 1702 */ 1703 static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm, 1704 struct intel_gvt_gtt_entry *e, unsigned long index, bool guest) 1705 { 1706 struct intel_vgpu *vgpu = mm->vgpu; 1707 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1708 struct intel_vgpu_ppgtt_spt *s; 1709 1710 if (WARN_ON(!mm->has_shadow_page_table)) 1711 return -EINVAL; 1712 1713 s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e)); 1714 if (!s) 1715 return -ENXIO; 1716 1717 if (!guest) 1718 ppgtt_get_shadow_entry(s, e, index); 1719 else 1720 ppgtt_get_guest_entry(s, e, index); 1721 return 0; 1722 } 1723 1724 /** 1725 * intel_vgpu_gma_to_gpa - translate a gma to GPA 1726 * @mm: mm object. could be a PPGTT or GGTT mm object 1727 * @gma: graphics memory address in this mm object 1728 * 1729 * This function is used to translate a graphics memory address in specific 1730 * graphics memory space to guest physical address. 1731 * 1732 * Returns: 1733 * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed. 1734 */ 1735 unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) 1736 { 1737 struct intel_vgpu *vgpu = mm->vgpu; 1738 struct intel_gvt *gvt = vgpu->gvt; 1739 struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops; 1740 struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops; 1741 unsigned long gpa = INTEL_GVT_INVALID_ADDR; 1742 unsigned long gma_index[4]; 1743 struct intel_gvt_gtt_entry e; 1744 int i, index; 1745 int ret; 1746 1747 if (mm->type != INTEL_GVT_MM_GGTT && mm->type != INTEL_GVT_MM_PPGTT) 1748 return INTEL_GVT_INVALID_ADDR; 1749 1750 if (mm->type == INTEL_GVT_MM_GGTT) { 1751 if (!vgpu_gmadr_is_valid(vgpu, gma)) 1752 goto err; 1753 1754 ggtt_get_guest_entry(mm, &e, 1755 gma_ops->gma_to_ggtt_pte_index(gma)); 1756 gpa = (pte_ops->get_pfn(&e) << GTT_PAGE_SHIFT) 1757 + (gma & ~GTT_PAGE_MASK); 1758 1759 trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa); 1760 return gpa; 1761 } 1762 1763 switch (mm->page_table_level) { 1764 case 4: 1765 ppgtt_get_shadow_root_entry(mm, &e, 0); 1766 gma_index[0] = gma_ops->gma_to_pml4_index(gma); 1767 gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma); 1768 gma_index[2] = gma_ops->gma_to_pde_index(gma); 1769 gma_index[3] = gma_ops->gma_to_pte_index(gma); 1770 index = 4; 1771 break; 1772 case 3: 1773 ppgtt_get_shadow_root_entry(mm, &e, 1774 gma_ops->gma_to_l3_pdp_index(gma)); 1775 gma_index[0] = gma_ops->gma_to_pde_index(gma); 1776 gma_index[1] = gma_ops->gma_to_pte_index(gma); 1777 index = 2; 1778 break; 1779 case 2: 1780 ppgtt_get_shadow_root_entry(mm, &e, 1781 gma_ops->gma_to_pde_index(gma)); 1782 gma_index[0] = gma_ops->gma_to_pte_index(gma); 1783 index = 1; 1784 break; 1785 default: 1786 WARN_ON(1); 1787 goto err; 1788 } 1789 1790 /* walk into the shadow page table and get gpa from guest entry */ 1791 for (i = 0; i < index; i++) { 1792 ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i], 1793 (i == index - 1)); 1794 if (ret) 1795 goto err; 1796 } 1797 1798 gpa = (pte_ops->get_pfn(&e) << GTT_PAGE_SHIFT) 1799 + (gma & ~GTT_PAGE_MASK); 1800 1801 trace_gma_translate(vgpu->id, "ppgtt", 0, 1802 mm->page_table_level, gma, gpa); 1803 return gpa; 1804 err: 1805 gvt_err("invalid mm type: %d gma %lx\n", mm->type, gma); 1806 return INTEL_GVT_INVALID_ADDR; 1807 } 1808 1809 static int emulate_gtt_mmio_read(struct intel_vgpu *vgpu, 1810 unsigned int off, void *p_data, unsigned int bytes) 1811 { 1812 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; 1813 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1814 unsigned long index = off >> info->gtt_entry_size_shift; 1815 struct intel_gvt_gtt_entry e; 1816 1817 if (bytes != 4 && bytes != 8) 1818 return -EINVAL; 1819 1820 ggtt_get_guest_entry(ggtt_mm, &e, index); 1821 memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)), 1822 bytes); 1823 return 0; 1824 } 1825 1826 /** 1827 * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read 1828 * @vgpu: a vGPU 1829 * @off: register offset 1830 * @p_data: data will be returned to guest 1831 * @bytes: data length 1832 * 1833 * This function is used to emulate the GTT MMIO register read 1834 * 1835 * Returns: 1836 * Zero on success, error code if failed. 1837 */ 1838 int intel_vgpu_emulate_gtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, 1839 void *p_data, unsigned int bytes) 1840 { 1841 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1842 int ret; 1843 1844 if (bytes != 4 && bytes != 8) 1845 return -EINVAL; 1846 1847 off -= info->gtt_start_offset; 1848 ret = emulate_gtt_mmio_read(vgpu, off, p_data, bytes); 1849 return ret; 1850 } 1851 1852 static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, 1853 void *p_data, unsigned int bytes) 1854 { 1855 struct intel_gvt *gvt = vgpu->gvt; 1856 const struct intel_gvt_device_info *info = &gvt->device_info; 1857 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; 1858 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 1859 unsigned long g_gtt_index = off >> info->gtt_entry_size_shift; 1860 unsigned long gma; 1861 struct intel_gvt_gtt_entry e, m; 1862 int ret; 1863 1864 if (bytes != 4 && bytes != 8) 1865 return -EINVAL; 1866 1867 gma = g_gtt_index << GTT_PAGE_SHIFT; 1868 1869 /* the VM may configure the whole GM space when ballooning is used */ 1870 if (WARN_ONCE(!vgpu_gmadr_is_valid(vgpu, gma), 1871 "vgpu%d: found oob ggtt write, offset %x\n", 1872 vgpu->id, off)) { 1873 return 0; 1874 } 1875 1876 ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index); 1877 1878 memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data, 1879 bytes); 1880 1881 if (ops->test_present(&e)) { 1882 ret = gtt_entry_p2m(vgpu, &e, &m); 1883 if (ret) { 1884 gvt_err("vgpu%d: fail to translate guest gtt entry\n", 1885 vgpu->id); 1886 return ret; 1887 } 1888 } else { 1889 m = e; 1890 m.val64 = 0; 1891 } 1892 1893 ggtt_set_shadow_entry(ggtt_mm, &m, g_gtt_index); 1894 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); 1895 return 0; 1896 } 1897 1898 /* 1899 * intel_vgpu_emulate_gtt_mmio_write - emulate GTT MMIO register write 1900 * @vgpu: a vGPU 1901 * @off: register offset 1902 * @p_data: data from guest write 1903 * @bytes: data length 1904 * 1905 * This function is used to emulate the GTT MMIO register write 1906 * 1907 * Returns: 1908 * Zero on success, error code if failed. 1909 */ 1910 int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, 1911 void *p_data, unsigned int bytes) 1912 { 1913 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1914 int ret; 1915 1916 if (bytes != 4 && bytes != 8) 1917 return -EINVAL; 1918 1919 off -= info->gtt_start_offset; 1920 ret = emulate_gtt_mmio_write(vgpu, off, p_data, bytes); 1921 return ret; 1922 } 1923 1924 static int create_scratch_page(struct intel_vgpu *vgpu) 1925 { 1926 struct intel_vgpu_gtt *gtt = &vgpu->gtt; 1927 void *p; 1928 void *vaddr; 1929 unsigned long mfn; 1930 1931 gtt->scratch_page = alloc_page(GFP_KERNEL); 1932 if (!gtt->scratch_page) { 1933 gvt_err("Failed to allocate scratch page.\n"); 1934 return -ENOMEM; 1935 } 1936 1937 /* set to zero */ 1938 p = kmap_atomic(gtt->scratch_page); 1939 memset(p, 0, PAGE_SIZE); 1940 kunmap_atomic(p); 1941 1942 /* translate page to mfn */ 1943 vaddr = page_address(gtt->scratch_page); 1944 mfn = intel_gvt_hypervisor_virt_to_mfn(vaddr); 1945 1946 if (mfn == INTEL_GVT_INVALID_ADDR) { 1947 gvt_err("fail to translate vaddr:0x%llx\n", (u64)vaddr); 1948 __free_page(gtt->scratch_page); 1949 gtt->scratch_page = NULL; 1950 return -ENXIO; 1951 } 1952 1953 gtt->scratch_page_mfn = mfn; 1954 gvt_dbg_core("vgpu%d create scratch page: mfn=0x%lx\n", vgpu->id, mfn); 1955 return 0; 1956 } 1957 1958 static void release_scratch_page(struct intel_vgpu *vgpu) 1959 { 1960 if (vgpu->gtt.scratch_page != NULL) { 1961 __free_page(vgpu->gtt.scratch_page); 1962 vgpu->gtt.scratch_page = NULL; 1963 vgpu->gtt.scratch_page_mfn = 0; 1964 } 1965 } 1966 1967 /** 1968 * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization 1969 * @vgpu: a vGPU 1970 * 1971 * This function is used to initialize per-vGPU graphics memory virtualization 1972 * components. 1973 * 1974 * Returns: 1975 * Zero on success, error code if failed. 1976 */ 1977 int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) 1978 { 1979 struct intel_vgpu_gtt *gtt = &vgpu->gtt; 1980 struct intel_vgpu_mm *ggtt_mm; 1981 1982 hash_init(gtt->guest_page_hash_table); 1983 hash_init(gtt->shadow_page_hash_table); 1984 1985 INIT_LIST_HEAD(>t->mm_list_head); 1986 INIT_LIST_HEAD(>t->oos_page_list_head); 1987 INIT_LIST_HEAD(>t->post_shadow_list_head); 1988 1989 ggtt_mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_GGTT, 1990 NULL, 1, 0); 1991 if (IS_ERR(ggtt_mm)) { 1992 gvt_err("fail to create mm for ggtt.\n"); 1993 return PTR_ERR(ggtt_mm); 1994 } 1995 1996 gtt->ggtt_mm = ggtt_mm; 1997 1998 return create_scratch_page(vgpu); 1999 } 2000 2001 /** 2002 * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization 2003 * @vgpu: a vGPU 2004 * 2005 * This function is used to clean up per-vGPU graphics memory virtualization 2006 * components. 2007 * 2008 * Returns: 2009 * Zero on success, error code if failed. 2010 */ 2011 void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu) 2012 { 2013 struct list_head *pos, *n; 2014 struct intel_vgpu_mm *mm; 2015 2016 ppgtt_free_all_shadow_page(vgpu); 2017 release_scratch_page(vgpu); 2018 2019 list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) { 2020 mm = container_of(pos, struct intel_vgpu_mm, list); 2021 vgpu->gvt->gtt.mm_free_page_table(mm); 2022 list_del(&mm->list); 2023 list_del(&mm->lru_list); 2024 kfree(mm); 2025 } 2026 } 2027 2028 static void clean_spt_oos(struct intel_gvt *gvt) 2029 { 2030 struct intel_gvt_gtt *gtt = &gvt->gtt; 2031 struct list_head *pos, *n; 2032 struct intel_vgpu_oos_page *oos_page; 2033 2034 WARN(!list_empty(>t->oos_page_use_list_head), 2035 "someone is still using oos page\n"); 2036 2037 list_for_each_safe(pos, n, >t->oos_page_free_list_head) { 2038 oos_page = container_of(pos, struct intel_vgpu_oos_page, list); 2039 list_del(&oos_page->list); 2040 kfree(oos_page); 2041 } 2042 } 2043 2044 static int setup_spt_oos(struct intel_gvt *gvt) 2045 { 2046 struct intel_gvt_gtt *gtt = &gvt->gtt; 2047 struct intel_vgpu_oos_page *oos_page; 2048 int i; 2049 int ret; 2050 2051 INIT_LIST_HEAD(>t->oos_page_free_list_head); 2052 INIT_LIST_HEAD(>t->oos_page_use_list_head); 2053 2054 for (i = 0; i < preallocated_oos_pages; i++) { 2055 oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL); 2056 if (!oos_page) { 2057 gvt_err("fail to pre-allocate oos page\n"); 2058 ret = -ENOMEM; 2059 goto fail; 2060 } 2061 2062 INIT_LIST_HEAD(&oos_page->list); 2063 INIT_LIST_HEAD(&oos_page->vm_list); 2064 oos_page->id = i; 2065 list_add_tail(&oos_page->list, >t->oos_page_free_list_head); 2066 } 2067 2068 gvt_dbg_mm("%d oos pages preallocated\n", i); 2069 2070 return 0; 2071 fail: 2072 clean_spt_oos(gvt); 2073 return ret; 2074 } 2075 2076 /** 2077 * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object 2078 * @vgpu: a vGPU 2079 * @page_table_level: PPGTT page table level 2080 * @root_entry: PPGTT page table root pointers 2081 * 2082 * This function is used to find a PPGTT mm object from mm object pool 2083 * 2084 * Returns: 2085 * pointer to mm object on success, NULL if failed. 2086 */ 2087 struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, 2088 int page_table_level, void *root_entry) 2089 { 2090 struct list_head *pos; 2091 struct intel_vgpu_mm *mm; 2092 u64 *src, *dst; 2093 2094 list_for_each(pos, &vgpu->gtt.mm_list_head) { 2095 mm = container_of(pos, struct intel_vgpu_mm, list); 2096 if (mm->type != INTEL_GVT_MM_PPGTT) 2097 continue; 2098 2099 if (mm->page_table_level != page_table_level) 2100 continue; 2101 2102 src = root_entry; 2103 dst = mm->virtual_page_table; 2104 2105 if (page_table_level == 3) { 2106 if (src[0] == dst[0] 2107 && src[1] == dst[1] 2108 && src[2] == dst[2] 2109 && src[3] == dst[3]) 2110 return mm; 2111 } else { 2112 if (src[0] == dst[0]) 2113 return mm; 2114 } 2115 } 2116 return NULL; 2117 } 2118 2119 /** 2120 * intel_vgpu_g2v_create_ppgtt_mm - create a PPGTT mm object from 2121 * g2v notification 2122 * @vgpu: a vGPU 2123 * @page_table_level: PPGTT page table level 2124 * 2125 * This function is used to create a PPGTT mm object from a guest to GVT-g 2126 * notification. 2127 * 2128 * Returns: 2129 * Zero on success, negative error code if failed. 2130 */ 2131 int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, 2132 int page_table_level) 2133 { 2134 u64 *pdp = (u64 *)&vgpu_vreg64(vgpu, vgtif_reg(pdp[0])); 2135 struct intel_vgpu_mm *mm; 2136 2137 if (WARN_ON((page_table_level != 4) && (page_table_level != 3))) 2138 return -EINVAL; 2139 2140 mm = intel_vgpu_find_ppgtt_mm(vgpu, page_table_level, pdp); 2141 if (mm) { 2142 intel_gvt_mm_reference(mm); 2143 } else { 2144 mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_PPGTT, 2145 pdp, page_table_level, 0); 2146 if (IS_ERR(mm)) { 2147 gvt_err("fail to create mm\n"); 2148 return PTR_ERR(mm); 2149 } 2150 } 2151 return 0; 2152 } 2153 2154 /** 2155 * intel_vgpu_g2v_destroy_ppgtt_mm - destroy a PPGTT mm object from 2156 * g2v notification 2157 * @vgpu: a vGPU 2158 * @page_table_level: PPGTT page table level 2159 * 2160 * This function is used to create a PPGTT mm object from a guest to GVT-g 2161 * notification. 2162 * 2163 * Returns: 2164 * Zero on success, negative error code if failed. 2165 */ 2166 int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu, 2167 int page_table_level) 2168 { 2169 u64 *pdp = (u64 *)&vgpu_vreg64(vgpu, vgtif_reg(pdp[0])); 2170 struct intel_vgpu_mm *mm; 2171 2172 if (WARN_ON((page_table_level != 4) && (page_table_level != 3))) 2173 return -EINVAL; 2174 2175 mm = intel_vgpu_find_ppgtt_mm(vgpu, page_table_level, pdp); 2176 if (!mm) { 2177 gvt_err("fail to find ppgtt instance.\n"); 2178 return -EINVAL; 2179 } 2180 intel_gvt_mm_unreference(mm); 2181 return 0; 2182 } 2183 2184 /** 2185 * intel_gvt_init_gtt - initialize mm components of a GVT device 2186 * @gvt: GVT device 2187 * 2188 * This function is called at the initialization stage, to initialize 2189 * the mm components of a GVT device. 2190 * 2191 * Returns: 2192 * zero on success, negative error code if failed. 2193 */ 2194 int intel_gvt_init_gtt(struct intel_gvt *gvt) 2195 { 2196 int ret; 2197 2198 gvt_dbg_core("init gtt\n"); 2199 2200 if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) { 2201 gvt->gtt.pte_ops = &gen8_gtt_pte_ops; 2202 gvt->gtt.gma_ops = &gen8_gtt_gma_ops; 2203 gvt->gtt.mm_alloc_page_table = gen8_mm_alloc_page_table; 2204 gvt->gtt.mm_free_page_table = gen8_mm_free_page_table; 2205 } else { 2206 return -ENODEV; 2207 } 2208 2209 if (enable_out_of_sync) { 2210 ret = setup_spt_oos(gvt); 2211 if (ret) { 2212 gvt_err("fail to initialize SPT oos\n"); 2213 return ret; 2214 } 2215 } 2216 INIT_LIST_HEAD(&gvt->gtt.mm_lru_list_head); 2217 return 0; 2218 } 2219 2220 /** 2221 * intel_gvt_clean_gtt - clean up mm components of a GVT device 2222 * @gvt: GVT device 2223 * 2224 * This function is called at the driver unloading stage, to clean up the 2225 * the mm components of a GVT device. 2226 * 2227 */ 2228 void intel_gvt_clean_gtt(struct intel_gvt *gvt) 2229 { 2230 if (enable_out_of_sync) 2231 clean_spt_oos(gvt); 2232 } 2233