1 /* 2 * GTT virtualization 3 * 4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Zhi Wang <zhi.a.wang@intel.com> 27 * Zhenyu Wang <zhenyuw@linux.intel.com> 28 * Xiao Zheng <xiao.zheng@intel.com> 29 * 30 * Contributors: 31 * Min He <min.he@intel.com> 32 * Bing Niu <bing.niu@intel.com> 33 * 34 */ 35 36 #include <linux/vmalloc.h> 37 38 #include <drm/drm_print.h> 39 40 #include "gt/intel_gt_regs.h" 41 42 #include "gvt.h" 43 #include "i915_drv.h" 44 #include "i915_pvinfo.h" 45 #include "trace.h" 46 47 #if defined(VERBOSE_DEBUG) 48 #define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args) 49 #else 50 #define gvt_vdbg_mm(fmt, args...) 51 #endif 52 53 static bool enable_out_of_sync = false; 54 static int preallocated_oos_pages = 8192; 55 56 /* 57 * validate a gm address and related range size, 58 * translate it to host gm address 59 */ 60 bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size) 61 { 62 if (size == 0) 63 return vgpu_gmadr_is_valid(vgpu, addr); 64 65 if (vgpu_gmadr_is_aperture(vgpu, addr) && 66 vgpu_gmadr_is_aperture(vgpu, addr + size - 1)) 67 return true; 68 else if (vgpu_gmadr_is_hidden(vgpu, addr) && 69 vgpu_gmadr_is_hidden(vgpu, addr + size - 1)) 70 return true; 71 72 gvt_dbg_mm("Invalid ggtt range at 0x%llx, size: 0x%x\n", 73 addr, size); 74 return false; 75 } 76 77 #define gtt_type_is_entry(type) \ 78 (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \ 79 && type != GTT_TYPE_PPGTT_PTE_ENTRY \ 80 && type != GTT_TYPE_PPGTT_ROOT_ENTRY) 81 82 #define gtt_type_is_pt(type) \ 83 (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) 84 85 #define gtt_type_is_pte_pt(type) \ 86 (type == GTT_TYPE_PPGTT_PTE_PT) 87 88 #define gtt_type_is_root_pointer(type) \ 89 (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY) 90 91 #define gtt_init_entry(e, t, p, v) do { \ 92 (e)->type = t; \ 93 (e)->pdev = p; \ 94 memcpy(&(e)->val64, &v, sizeof(v)); \ 95 } while (0) 96 97 /* 98 * Mappings between GTT_TYPE* enumerations. 99 * Following information can be found according to the given type: 100 * - type of next level page table 101 * - type of entry inside this level page table 102 * - type of entry with PSE set 103 * 104 * If the given type doesn't have such a kind of information, 105 * e.g. give a l4 root entry type, then request to get its PSE type, 106 * give a PTE page table type, then request to get its next level page 107 * table type, as we know l4 root entry doesn't have a PSE bit, 108 * and a PTE page table doesn't have a next level page table type, 109 * GTT_TYPE_INVALID will be returned. This is useful when traversing a 110 * page table. 111 */ 112 113 struct gtt_type_table_entry { 114 int entry_type; 115 int pt_type; 116 int next_pt_type; 117 int pse_entry_type; 118 }; 119 120 #define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \ 121 [type] = { \ 122 .entry_type = e_type, \ 123 .pt_type = cpt_type, \ 124 .next_pt_type = npt_type, \ 125 .pse_entry_type = pse_type, \ 126 } 127 128 static const struct gtt_type_table_entry gtt_type_table[] = { 129 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY, 130 GTT_TYPE_PPGTT_ROOT_L4_ENTRY, 131 GTT_TYPE_INVALID, 132 GTT_TYPE_PPGTT_PML4_PT, 133 GTT_TYPE_INVALID), 134 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT, 135 GTT_TYPE_PPGTT_PML4_ENTRY, 136 GTT_TYPE_PPGTT_PML4_PT, 137 GTT_TYPE_PPGTT_PDP_PT, 138 GTT_TYPE_INVALID), 139 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY, 140 GTT_TYPE_PPGTT_PML4_ENTRY, 141 GTT_TYPE_PPGTT_PML4_PT, 142 GTT_TYPE_PPGTT_PDP_PT, 143 GTT_TYPE_INVALID), 144 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT, 145 GTT_TYPE_PPGTT_PDP_ENTRY, 146 GTT_TYPE_PPGTT_PDP_PT, 147 GTT_TYPE_PPGTT_PDE_PT, 148 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 149 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY, 150 GTT_TYPE_PPGTT_ROOT_L3_ENTRY, 151 GTT_TYPE_INVALID, 152 GTT_TYPE_PPGTT_PDE_PT, 153 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 154 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY, 155 GTT_TYPE_PPGTT_PDP_ENTRY, 156 GTT_TYPE_PPGTT_PDP_PT, 157 GTT_TYPE_PPGTT_PDE_PT, 158 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 159 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT, 160 GTT_TYPE_PPGTT_PDE_ENTRY, 161 GTT_TYPE_PPGTT_PDE_PT, 162 GTT_TYPE_PPGTT_PTE_PT, 163 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 164 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY, 165 GTT_TYPE_PPGTT_PDE_ENTRY, 166 GTT_TYPE_PPGTT_PDE_PT, 167 GTT_TYPE_PPGTT_PTE_PT, 168 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 169 /* We take IPS bit as 'PSE' for PTE level. */ 170 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT, 171 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 172 GTT_TYPE_PPGTT_PTE_PT, 173 GTT_TYPE_INVALID, 174 GTT_TYPE_PPGTT_PTE_64K_ENTRY), 175 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY, 176 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 177 GTT_TYPE_PPGTT_PTE_PT, 178 GTT_TYPE_INVALID, 179 GTT_TYPE_PPGTT_PTE_64K_ENTRY), 180 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY, 181 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 182 GTT_TYPE_PPGTT_PTE_PT, 183 GTT_TYPE_INVALID, 184 GTT_TYPE_PPGTT_PTE_64K_ENTRY), 185 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY, 186 GTT_TYPE_PPGTT_PDE_ENTRY, 187 GTT_TYPE_PPGTT_PDE_PT, 188 GTT_TYPE_INVALID, 189 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 190 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY, 191 GTT_TYPE_PPGTT_PDP_ENTRY, 192 GTT_TYPE_PPGTT_PDP_PT, 193 GTT_TYPE_INVALID, 194 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 195 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE, 196 GTT_TYPE_GGTT_PTE, 197 GTT_TYPE_INVALID, 198 GTT_TYPE_INVALID, 199 GTT_TYPE_INVALID), 200 }; 201 202 static inline int get_next_pt_type(int type) 203 { 204 return gtt_type_table[type].next_pt_type; 205 } 206 207 static inline int get_entry_type(int type) 208 { 209 return gtt_type_table[type].entry_type; 210 } 211 212 static inline int get_pse_type(int type) 213 { 214 return gtt_type_table[type].pse_entry_type; 215 } 216 217 static u64 read_pte64(struct i915_ggtt *ggtt, unsigned long index) 218 { 219 void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index; 220 221 return readq(addr); 222 } 223 224 static void ggtt_invalidate(struct intel_gt *gt) 225 { 226 intel_wakeref_t wakeref; 227 228 wakeref = mmio_hw_access_pre(gt); 229 intel_uncore_write(gt->uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 230 mmio_hw_access_post(gt, wakeref); 231 } 232 233 static void write_pte64(struct i915_ggtt *ggtt, unsigned long index, u64 pte) 234 { 235 void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index; 236 237 writeq(pte, addr); 238 } 239 240 static inline int gtt_get_entry64(void *pt, 241 struct intel_gvt_gtt_entry *e, 242 unsigned long index, bool hypervisor_access, unsigned long gpa, 243 struct intel_vgpu *vgpu) 244 { 245 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 246 int ret; 247 248 if (WARN_ON(info->gtt_entry_size != 8)) 249 return -EINVAL; 250 251 if (hypervisor_access) { 252 ret = intel_gvt_read_gpa(vgpu, gpa + 253 (index << info->gtt_entry_size_shift), 254 &e->val64, 8); 255 if (WARN_ON(ret)) 256 return ret; 257 } else if (!pt) { 258 e->val64 = read_pte64(vgpu->gvt->gt->ggtt, index); 259 } else { 260 e->val64 = *((u64 *)pt + index); 261 } 262 return 0; 263 } 264 265 static inline int gtt_set_entry64(void *pt, 266 struct intel_gvt_gtt_entry *e, 267 unsigned long index, bool hypervisor_access, unsigned long gpa, 268 struct intel_vgpu *vgpu) 269 { 270 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 271 int ret; 272 273 if (WARN_ON(info->gtt_entry_size != 8)) 274 return -EINVAL; 275 276 if (hypervisor_access) { 277 ret = intel_gvt_write_gpa(vgpu, gpa + 278 (index << info->gtt_entry_size_shift), 279 &e->val64, 8); 280 if (WARN_ON(ret)) 281 return ret; 282 } else if (!pt) { 283 write_pte64(vgpu->gvt->gt->ggtt, index, e->val64); 284 } else { 285 *((u64 *)pt + index) = e->val64; 286 } 287 return 0; 288 } 289 290 #define GTT_HAW 46 291 292 #define ADDR_1G_MASK GENMASK_ULL(GTT_HAW - 1, 30) 293 #define ADDR_2M_MASK GENMASK_ULL(GTT_HAW - 1, 21) 294 #define ADDR_64K_MASK GENMASK_ULL(GTT_HAW - 1, 16) 295 #define ADDR_4K_MASK GENMASK_ULL(GTT_HAW - 1, 12) 296 297 #define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52) 298 #define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */ 299 300 #define GTT_64K_PTE_STRIDE 16 301 302 static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) 303 { 304 unsigned long pfn; 305 306 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) 307 pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT; 308 else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) 309 pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT; 310 else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) 311 pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT; 312 else 313 pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT; 314 return pfn; 315 } 316 317 static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn) 318 { 319 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { 320 e->val64 &= ~ADDR_1G_MASK; 321 pfn &= (ADDR_1G_MASK >> PAGE_SHIFT); 322 } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) { 323 e->val64 &= ~ADDR_2M_MASK; 324 pfn &= (ADDR_2M_MASK >> PAGE_SHIFT); 325 } else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) { 326 e->val64 &= ~ADDR_64K_MASK; 327 pfn &= (ADDR_64K_MASK >> PAGE_SHIFT); 328 } else { 329 e->val64 &= ~ADDR_4K_MASK; 330 pfn &= (ADDR_4K_MASK >> PAGE_SHIFT); 331 } 332 333 e->val64 |= (pfn << PAGE_SHIFT); 334 } 335 336 static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e) 337 { 338 return !!(e->val64 & _PAGE_PSE); 339 } 340 341 static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e) 342 { 343 if (gen8_gtt_test_pse(e)) { 344 switch (e->type) { 345 case GTT_TYPE_PPGTT_PTE_2M_ENTRY: 346 e->val64 &= ~_PAGE_PSE; 347 e->type = GTT_TYPE_PPGTT_PDE_ENTRY; 348 break; 349 case GTT_TYPE_PPGTT_PTE_1G_ENTRY: 350 e->type = GTT_TYPE_PPGTT_PDP_ENTRY; 351 e->val64 &= ~_PAGE_PSE; 352 break; 353 default: 354 WARN_ON(1); 355 } 356 } 357 } 358 359 static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e) 360 { 361 if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY)) 362 return false; 363 364 return !!(e->val64 & GEN8_PDE_IPS_64K); 365 } 366 367 static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e) 368 { 369 if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY)) 370 return; 371 372 e->val64 &= ~GEN8_PDE_IPS_64K; 373 } 374 375 static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e) 376 { 377 /* 378 * i915 writes PDP root pointer registers without present bit, 379 * it also works, so we need to treat root pointer entry 380 * specifically. 381 */ 382 if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY 383 || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) 384 return (e->val64 != 0); 385 else 386 return (e->val64 & GEN8_PAGE_PRESENT); 387 } 388 389 static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e) 390 { 391 e->val64 &= ~GEN8_PAGE_PRESENT; 392 } 393 394 static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e) 395 { 396 e->val64 |= GEN8_PAGE_PRESENT; 397 } 398 399 static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e) 400 { 401 return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED); 402 } 403 404 static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e) 405 { 406 e->val64 |= GTT_SPTE_FLAG_64K_SPLITED; 407 } 408 409 static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e) 410 { 411 e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED; 412 } 413 414 /* 415 * Per-platform GMA routines. 416 */ 417 static unsigned long gma_to_ggtt_pte_index(unsigned long gma) 418 { 419 unsigned long x = (gma >> I915_GTT_PAGE_SHIFT); 420 421 trace_gma_index(__func__, gma, x); 422 return x; 423 } 424 425 #define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \ 426 static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \ 427 { \ 428 unsigned long x = (exp); \ 429 trace_gma_index(__func__, gma, x); \ 430 return x; \ 431 } 432 433 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff)); 434 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff)); 435 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3)); 436 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff)); 437 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff)); 438 439 static const struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = { 440 .get_entry = gtt_get_entry64, 441 .set_entry = gtt_set_entry64, 442 .clear_present = gtt_entry_clear_present, 443 .set_present = gtt_entry_set_present, 444 .test_present = gen8_gtt_test_present, 445 .test_pse = gen8_gtt_test_pse, 446 .clear_pse = gen8_gtt_clear_pse, 447 .clear_ips = gen8_gtt_clear_ips, 448 .test_ips = gen8_gtt_test_ips, 449 .clear_64k_splited = gen8_gtt_clear_64k_splited, 450 .set_64k_splited = gen8_gtt_set_64k_splited, 451 .test_64k_splited = gen8_gtt_test_64k_splited, 452 .get_pfn = gen8_gtt_get_pfn, 453 .set_pfn = gen8_gtt_set_pfn, 454 }; 455 456 static const struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = { 457 .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index, 458 .gma_to_pte_index = gen8_gma_to_pte_index, 459 .gma_to_pde_index = gen8_gma_to_pde_index, 460 .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index, 461 .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index, 462 .gma_to_pml4_index = gen8_gma_to_pml4_index, 463 }; 464 465 /* Update entry type per pse and ips bit. */ 466 static void update_entry_type_for_real(const struct intel_gvt_gtt_pte_ops *pte_ops, 467 struct intel_gvt_gtt_entry *entry, bool ips) 468 { 469 switch (entry->type) { 470 case GTT_TYPE_PPGTT_PDE_ENTRY: 471 case GTT_TYPE_PPGTT_PDP_ENTRY: 472 if (pte_ops->test_pse(entry)) 473 entry->type = get_pse_type(entry->type); 474 break; 475 case GTT_TYPE_PPGTT_PTE_4K_ENTRY: 476 if (ips) 477 entry->type = get_pse_type(entry->type); 478 break; 479 default: 480 GEM_BUG_ON(!gtt_type_is_entry(entry->type)); 481 } 482 483 GEM_BUG_ON(entry->type == GTT_TYPE_INVALID); 484 } 485 486 /* 487 * MM helpers. 488 */ 489 static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm, 490 struct intel_gvt_gtt_entry *entry, unsigned long index, 491 bool guest) 492 { 493 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 494 495 GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT); 496 497 entry->type = mm->ppgtt_mm.root_entry_type; 498 pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps : 499 mm->ppgtt_mm.shadow_pdps, 500 entry, index, false, 0, mm->vgpu); 501 update_entry_type_for_real(pte_ops, entry, false); 502 } 503 504 static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm, 505 struct intel_gvt_gtt_entry *entry, unsigned long index) 506 { 507 _ppgtt_get_root_entry(mm, entry, index, true); 508 } 509 510 static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm, 511 struct intel_gvt_gtt_entry *entry, unsigned long index) 512 { 513 _ppgtt_get_root_entry(mm, entry, index, false); 514 } 515 516 static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm, 517 struct intel_gvt_gtt_entry *entry, unsigned long index, 518 bool guest) 519 { 520 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 521 522 pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps : 523 mm->ppgtt_mm.shadow_pdps, 524 entry, index, false, 0, mm->vgpu); 525 } 526 527 static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm, 528 struct intel_gvt_gtt_entry *entry, unsigned long index) 529 { 530 _ppgtt_set_root_entry(mm, entry, index, false); 531 } 532 533 static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm, 534 struct intel_gvt_gtt_entry *entry, unsigned long index) 535 { 536 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 537 538 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 539 540 entry->type = GTT_TYPE_GGTT_PTE; 541 pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index, 542 false, 0, mm->vgpu); 543 } 544 545 static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm, 546 struct intel_gvt_gtt_entry *entry, unsigned long index) 547 { 548 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 549 550 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 551 552 pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index, 553 false, 0, mm->vgpu); 554 } 555 556 static void ggtt_get_host_entry(struct intel_vgpu_mm *mm, 557 struct intel_gvt_gtt_entry *entry, unsigned long index) 558 { 559 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 560 561 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 562 563 pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu); 564 } 565 566 static void ggtt_set_host_entry(struct intel_vgpu_mm *mm, 567 struct intel_gvt_gtt_entry *entry, unsigned long index) 568 { 569 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 570 unsigned long offset = index; 571 572 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 573 574 if (vgpu_gmadr_is_aperture(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) { 575 offset -= (vgpu_aperture_gmadr_base(mm->vgpu) >> PAGE_SHIFT); 576 mm->ggtt_mm.host_ggtt_aperture[offset] = entry->val64; 577 } else if (vgpu_gmadr_is_hidden(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) { 578 offset -= (vgpu_hidden_gmadr_base(mm->vgpu) >> PAGE_SHIFT); 579 mm->ggtt_mm.host_ggtt_hidden[offset] = entry->val64; 580 } 581 582 pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu); 583 } 584 585 /* 586 * PPGTT shadow page table helpers. 587 */ 588 static inline int ppgtt_spt_get_entry( 589 struct intel_vgpu_ppgtt_spt *spt, 590 void *page_table, int type, 591 struct intel_gvt_gtt_entry *e, unsigned long index, 592 bool guest) 593 { 594 struct intel_gvt *gvt = spt->vgpu->gvt; 595 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 596 int ret; 597 598 e->type = get_entry_type(type); 599 600 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) 601 return -EINVAL; 602 603 ret = ops->get_entry(page_table, e, index, guest, 604 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, 605 spt->vgpu); 606 if (ret) 607 return ret; 608 609 update_entry_type_for_real(ops, e, guest ? 610 spt->guest_page.pde_ips : false); 611 612 gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n", 613 type, e->type, index, e->val64); 614 return 0; 615 } 616 617 static inline int ppgtt_spt_set_entry( 618 struct intel_vgpu_ppgtt_spt *spt, 619 void *page_table, int type, 620 struct intel_gvt_gtt_entry *e, unsigned long index, 621 bool guest) 622 { 623 struct intel_gvt *gvt = spt->vgpu->gvt; 624 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 625 626 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) 627 return -EINVAL; 628 629 gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n", 630 type, e->type, index, e->val64); 631 632 return ops->set_entry(page_table, e, index, guest, 633 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, 634 spt->vgpu); 635 } 636 637 #define ppgtt_get_guest_entry(spt, e, index) \ 638 ppgtt_spt_get_entry(spt, NULL, \ 639 spt->guest_page.type, e, index, true) 640 641 #define ppgtt_set_guest_entry(spt, e, index) \ 642 ppgtt_spt_set_entry(spt, NULL, \ 643 spt->guest_page.type, e, index, true) 644 645 #define ppgtt_get_shadow_entry(spt, e, index) \ 646 ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \ 647 spt->shadow_page.type, e, index, false) 648 649 #define ppgtt_set_shadow_entry(spt, e, index) \ 650 ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \ 651 spt->shadow_page.type, e, index, false) 652 653 static void *alloc_spt(gfp_t gfp_mask) 654 { 655 struct intel_vgpu_ppgtt_spt *spt; 656 657 spt = kzalloc_obj(*spt, gfp_mask); 658 if (!spt) 659 return NULL; 660 661 spt->shadow_page.page = alloc_page(gfp_mask); 662 if (!spt->shadow_page.page) { 663 kfree(spt); 664 return NULL; 665 } 666 return spt; 667 } 668 669 static void free_spt(struct intel_vgpu_ppgtt_spt *spt) 670 { 671 __free_page(spt->shadow_page.page); 672 kfree(spt); 673 } 674 675 static int detach_oos_page(struct intel_vgpu *vgpu, 676 struct intel_vgpu_oos_page *oos_page); 677 678 static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt) 679 { 680 struct device *kdev = spt->vgpu->gvt->gt->i915->drm.dev; 681 682 trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type); 683 684 dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096, 685 DMA_BIDIRECTIONAL); 686 687 radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn); 688 689 if (spt->guest_page.gfn) { 690 if (spt->guest_page.oos_page) 691 detach_oos_page(spt->vgpu, spt->guest_page.oos_page); 692 693 intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn); 694 } 695 696 list_del_init(&spt->post_shadow_list); 697 free_spt(spt); 698 } 699 700 static void ppgtt_free_all_spt(struct intel_vgpu *vgpu) 701 { 702 struct intel_vgpu_ppgtt_spt *spt, *spn; 703 struct radix_tree_iter iter; 704 LIST_HEAD(all_spt); 705 void __rcu **slot; 706 707 rcu_read_lock(); 708 radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) { 709 spt = radix_tree_deref_slot(slot); 710 list_move(&spt->post_shadow_list, &all_spt); 711 } 712 rcu_read_unlock(); 713 714 list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list) 715 ppgtt_free_spt(spt); 716 } 717 718 static int ppgtt_handle_guest_write_page_table_bytes( 719 struct intel_vgpu_ppgtt_spt *spt, 720 u64 pa, void *p_data, int bytes); 721 722 static int ppgtt_write_protection_handler( 723 struct intel_vgpu_page_track *page_track, 724 u64 gpa, void *data, int bytes) 725 { 726 struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data; 727 728 int ret; 729 730 if (bytes != 4 && bytes != 8) 731 return -EINVAL; 732 733 ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes); 734 if (ret) 735 return ret; 736 return ret; 737 } 738 739 /* Find a spt by guest gfn. */ 740 static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn( 741 struct intel_vgpu *vgpu, unsigned long gfn) 742 { 743 struct intel_vgpu_page_track *track; 744 745 track = intel_vgpu_find_page_track(vgpu, gfn); 746 if (track && track->handler == ppgtt_write_protection_handler) 747 return track->priv_data; 748 749 return NULL; 750 } 751 752 /* Find the spt by shadow page mfn. */ 753 static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn( 754 struct intel_vgpu *vgpu, unsigned long mfn) 755 { 756 return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn); 757 } 758 759 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt); 760 761 /* Allocate shadow page table without guest page. */ 762 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt( 763 struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type) 764 { 765 struct device *kdev = vgpu->gvt->gt->i915->drm.dev; 766 struct intel_vgpu_ppgtt_spt *spt = NULL; 767 dma_addr_t daddr; 768 int ret; 769 770 retry: 771 spt = alloc_spt(GFP_KERNEL | __GFP_ZERO); 772 if (!spt) { 773 if (reclaim_one_ppgtt_mm(vgpu->gvt)) 774 goto retry; 775 776 gvt_vgpu_err("fail to allocate ppgtt shadow page\n"); 777 return ERR_PTR(-ENOMEM); 778 } 779 780 spt->vgpu = vgpu; 781 atomic_set(&spt->refcount, 1); 782 INIT_LIST_HEAD(&spt->post_shadow_list); 783 784 /* 785 * Init shadow_page. 786 */ 787 spt->shadow_page.type = type; 788 daddr = dma_map_page(kdev, spt->shadow_page.page, 789 0, 4096, DMA_BIDIRECTIONAL); 790 if (dma_mapping_error(kdev, daddr)) { 791 gvt_vgpu_err("fail to map dma addr\n"); 792 ret = -EINVAL; 793 goto err_free_spt; 794 } 795 spt->shadow_page.vaddr = page_address(spt->shadow_page.page); 796 spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT; 797 798 ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt); 799 if (ret) 800 goto err_unmap_dma; 801 802 return spt; 803 804 err_unmap_dma: 805 dma_unmap_page(kdev, daddr, PAGE_SIZE, DMA_BIDIRECTIONAL); 806 err_free_spt: 807 free_spt(spt); 808 return ERR_PTR(ret); 809 } 810 811 /* Allocate shadow page table associated with specific gfn. */ 812 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn( 813 struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type, 814 unsigned long gfn, bool guest_pde_ips) 815 { 816 struct intel_vgpu_ppgtt_spt *spt; 817 int ret; 818 819 spt = ppgtt_alloc_spt(vgpu, type); 820 if (IS_ERR(spt)) 821 return spt; 822 823 /* 824 * Init guest_page. 825 */ 826 ret = intel_vgpu_register_page_track(vgpu, gfn, 827 ppgtt_write_protection_handler, spt); 828 if (ret) { 829 ppgtt_free_spt(spt); 830 return ERR_PTR(ret); 831 } 832 833 spt->guest_page.type = type; 834 spt->guest_page.gfn = gfn; 835 spt->guest_page.pde_ips = guest_pde_ips; 836 837 trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); 838 839 return spt; 840 } 841 842 #define pt_entry_size_shift(spt) \ 843 ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift) 844 845 #define pt_entries(spt) \ 846 (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt)) 847 848 #define for_each_present_guest_entry(spt, e, i) \ 849 for (i = 0; i < pt_entries(spt); \ 850 i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \ 851 if (!ppgtt_get_guest_entry(spt, e, i) && \ 852 spt->vgpu->gvt->gtt.pte_ops->test_present(e)) 853 854 #define for_each_present_shadow_entry(spt, e, i) \ 855 for (i = 0; i < pt_entries(spt); \ 856 i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \ 857 if (!ppgtt_get_shadow_entry(spt, e, i) && \ 858 spt->vgpu->gvt->gtt.pte_ops->test_present(e)) 859 860 #define for_each_shadow_entry(spt, e, i) \ 861 for (i = 0; i < pt_entries(spt); \ 862 i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \ 863 if (!ppgtt_get_shadow_entry(spt, e, i)) 864 865 static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt) 866 { 867 int v = atomic_read(&spt->refcount); 868 869 trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1)); 870 atomic_inc(&spt->refcount); 871 } 872 873 static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt) 874 { 875 int v = atomic_read(&spt->refcount); 876 877 trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1)); 878 return atomic_dec_return(&spt->refcount); 879 } 880 881 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt); 882 883 static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu, 884 struct intel_gvt_gtt_entry *e) 885 { 886 struct drm_i915_private *i915 = vgpu->gvt->gt->i915; 887 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 888 struct intel_vgpu_ppgtt_spt *s; 889 enum intel_gvt_gtt_type cur_pt_type; 890 891 GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type))); 892 893 if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY 894 && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { 895 cur_pt_type = get_next_pt_type(e->type); 896 897 if (!gtt_type_is_pt(cur_pt_type) || 898 !gtt_type_is_pt(cur_pt_type + 1)) { 899 drm_WARN(&i915->drm, 1, 900 "Invalid page table type, cur_pt_type is: %d\n", 901 cur_pt_type); 902 return -EINVAL; 903 } 904 905 cur_pt_type += 1; 906 907 if (ops->get_pfn(e) == 908 vgpu->gtt.scratch_pt[cur_pt_type].page_mfn) 909 return 0; 910 } 911 s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e)); 912 if (!s) { 913 gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n", 914 ops->get_pfn(e)); 915 return -ENXIO; 916 } 917 return ppgtt_invalidate_spt(s); 918 } 919 920 static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt, 921 struct intel_gvt_gtt_entry *entry) 922 { 923 struct intel_vgpu *vgpu = spt->vgpu; 924 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 925 unsigned long pfn; 926 int type; 927 928 pfn = ops->get_pfn(entry); 929 type = spt->shadow_page.type; 930 931 /* Uninitialized spte or unshadowed spte. */ 932 if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn) 933 return; 934 935 intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT); 936 } 937 938 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt) 939 { 940 struct intel_vgpu *vgpu = spt->vgpu; 941 struct intel_gvt_gtt_entry e; 942 unsigned long index; 943 int ret; 944 945 trace_spt_change(spt->vgpu->id, "die", spt, 946 spt->guest_page.gfn, spt->shadow_page.type); 947 948 if (ppgtt_put_spt(spt) > 0) 949 return 0; 950 951 for_each_present_shadow_entry(spt, &e, index) { 952 switch (e.type) { 953 case GTT_TYPE_PPGTT_PTE_4K_ENTRY: 954 gvt_vdbg_mm("invalidate 4K entry\n"); 955 ppgtt_invalidate_pte(spt, &e); 956 break; 957 case GTT_TYPE_PPGTT_PTE_64K_ENTRY: 958 /* We don't setup 64K shadow entry so far. */ 959 WARN(1, "suspicious 64K gtt entry\n"); 960 continue; 961 case GTT_TYPE_PPGTT_PTE_2M_ENTRY: 962 gvt_vdbg_mm("invalidate 2M entry\n"); 963 continue; 964 case GTT_TYPE_PPGTT_PTE_1G_ENTRY: 965 WARN(1, "GVT doesn't support 1GB page\n"); 966 continue; 967 case GTT_TYPE_PPGTT_PML4_ENTRY: 968 case GTT_TYPE_PPGTT_PDP_ENTRY: 969 case GTT_TYPE_PPGTT_PDE_ENTRY: 970 gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n"); 971 ret = ppgtt_invalidate_spt_by_shadow_entry( 972 spt->vgpu, &e); 973 if (ret) 974 goto fail; 975 break; 976 default: 977 GEM_BUG_ON(1); 978 } 979 } 980 981 trace_spt_change(spt->vgpu->id, "release", spt, 982 spt->guest_page.gfn, spt->shadow_page.type); 983 ppgtt_free_spt(spt); 984 return 0; 985 fail: 986 gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n", 987 spt, e.val64, e.type); 988 return ret; 989 } 990 991 static bool vgpu_ips_enabled(struct intel_vgpu *vgpu) 992 { 993 struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; 994 995 if (GRAPHICS_VER(dev_priv) == 9) { 996 u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) & 997 GAMW_ECO_ENABLE_64K_IPS_FIELD; 998 999 return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD; 1000 } else if (GRAPHICS_VER(dev_priv) >= 11) { 1001 /* 64K paging only controlled by IPS bit in PTE now. */ 1002 return true; 1003 } else 1004 return false; 1005 } 1006 1007 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt); 1008 1009 static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry( 1010 struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we) 1011 { 1012 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1013 struct intel_vgpu_ppgtt_spt *spt = NULL; 1014 bool ips = false; 1015 int ret; 1016 1017 GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type))); 1018 1019 if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY) 1020 ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we); 1021 1022 spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we)); 1023 if (spt) { 1024 ppgtt_get_spt(spt); 1025 1026 if (ips != spt->guest_page.pde_ips) { 1027 spt->guest_page.pde_ips = ips; 1028 1029 gvt_dbg_mm("reshadow PDE since ips changed\n"); 1030 clear_page(spt->shadow_page.vaddr); 1031 ret = ppgtt_populate_spt(spt); 1032 if (ret) { 1033 ppgtt_put_spt(spt); 1034 goto err; 1035 } 1036 } 1037 } else { 1038 int type = get_next_pt_type(we->type); 1039 1040 if (!gtt_type_is_pt(type)) { 1041 ret = -EINVAL; 1042 goto err; 1043 } 1044 1045 spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips); 1046 if (IS_ERR(spt)) { 1047 ret = PTR_ERR(spt); 1048 goto err; 1049 } 1050 1051 ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn); 1052 if (ret) 1053 goto err_free_spt; 1054 1055 ret = ppgtt_populate_spt(spt); 1056 if (ret) 1057 goto err_free_spt; 1058 1059 trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn, 1060 spt->shadow_page.type); 1061 } 1062 return spt; 1063 1064 err_free_spt: 1065 ppgtt_free_spt(spt); 1066 spt = NULL; 1067 err: 1068 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", 1069 spt, we->val64, we->type); 1070 return ERR_PTR(ret); 1071 } 1072 1073 static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, 1074 struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge) 1075 { 1076 const struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops; 1077 1078 se->type = ge->type; 1079 se->val64 = ge->val64; 1080 1081 /* Because we always split 64KB pages, so clear IPS in shadow PDE. */ 1082 if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY) 1083 ops->clear_ips(se); 1084 1085 ops->set_pfn(se, s->shadow_page.mfn); 1086 } 1087 1088 static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, 1089 struct intel_vgpu_ppgtt_spt *spt, unsigned long index, 1090 struct intel_gvt_gtt_entry *se) 1091 { 1092 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1093 struct intel_vgpu_ppgtt_spt *sub_spt; 1094 struct intel_gvt_gtt_entry sub_se; 1095 unsigned long start_gfn; 1096 dma_addr_t dma_addr; 1097 unsigned long sub_index; 1098 int ret; 1099 1100 gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index); 1101 1102 start_gfn = ops->get_pfn(se); 1103 1104 sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT); 1105 if (IS_ERR(sub_spt)) 1106 return PTR_ERR(sub_spt); 1107 1108 for_each_shadow_entry(sub_spt, &sub_se, sub_index) { 1109 ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + sub_index, 1110 PAGE_SIZE, &dma_addr); 1111 if (ret) 1112 goto err; 1113 sub_se.val64 = se->val64; 1114 1115 /* Copy the PAT field from PDE. */ 1116 sub_se.val64 &= ~_PAGE_PAT; 1117 sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5; 1118 1119 ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT); 1120 ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index); 1121 } 1122 1123 /* Clear dirty field. */ 1124 se->val64 &= ~_PAGE_DIRTY; 1125 1126 ops->clear_pse(se); 1127 ops->clear_ips(se); 1128 ops->set_pfn(se, sub_spt->shadow_page.mfn); 1129 ppgtt_set_shadow_entry(spt, se, index); 1130 return 0; 1131 err: 1132 /* Cancel the existing address mappings of DMA addr. */ 1133 for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) { 1134 gvt_vdbg_mm("invalidate 4K entry\n"); 1135 ppgtt_invalidate_pte(sub_spt, &sub_se); 1136 } 1137 /* Release the new allocated spt. */ 1138 trace_spt_change(sub_spt->vgpu->id, "release", sub_spt, 1139 sub_spt->guest_page.gfn, sub_spt->shadow_page.type); 1140 ppgtt_free_spt(sub_spt); 1141 return ret; 1142 } 1143 1144 static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, 1145 struct intel_vgpu_ppgtt_spt *spt, unsigned long index, 1146 struct intel_gvt_gtt_entry *se) 1147 { 1148 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1149 struct intel_gvt_gtt_entry entry = *se; 1150 unsigned long start_gfn; 1151 dma_addr_t dma_addr; 1152 int i, ret; 1153 1154 gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index); 1155 1156 GEM_BUG_ON(index % GTT_64K_PTE_STRIDE); 1157 1158 start_gfn = ops->get_pfn(se); 1159 1160 entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY; 1161 ops->set_64k_splited(&entry); 1162 1163 for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { 1164 ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + i, 1165 PAGE_SIZE, &dma_addr); 1166 if (ret) 1167 return ret; 1168 1169 ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT); 1170 ppgtt_set_shadow_entry(spt, &entry, index + i); 1171 } 1172 return 0; 1173 } 1174 1175 static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, 1176 struct intel_vgpu_ppgtt_spt *spt, unsigned long index, 1177 struct intel_gvt_gtt_entry *ge) 1178 { 1179 const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; 1180 struct intel_gvt_gtt_entry se = *ge; 1181 unsigned long gfn; 1182 dma_addr_t dma_addr; 1183 int ret; 1184 1185 if (!pte_ops->test_present(ge)) 1186 return 0; 1187 1188 gfn = pte_ops->get_pfn(ge); 1189 1190 switch (ge->type) { 1191 case GTT_TYPE_PPGTT_PTE_4K_ENTRY: 1192 gvt_vdbg_mm("shadow 4K gtt entry\n"); 1193 ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr); 1194 if (ret) 1195 return -ENXIO; 1196 break; 1197 case GTT_TYPE_PPGTT_PTE_64K_ENTRY: 1198 gvt_vdbg_mm("shadow 64K gtt entry\n"); 1199 /* 1200 * The layout of 64K page is special, the page size is 1201 * controlled by upper PDE. To be simple, we always split 1202 * 64K page to smaller 4K pages in shadow PT. 1203 */ 1204 return split_64KB_gtt_entry(vgpu, spt, index, &se); 1205 case GTT_TYPE_PPGTT_PTE_2M_ENTRY: 1206 gvt_vdbg_mm("shadow 2M gtt entry\n"); 1207 if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M) || 1208 intel_gvt_dma_map_guest_page(vgpu, gfn, 1209 I915_GTT_PAGE_SIZE_2M, &dma_addr)) 1210 return split_2MB_gtt_entry(vgpu, spt, index, &se); 1211 break; 1212 case GTT_TYPE_PPGTT_PTE_1G_ENTRY: 1213 gvt_vgpu_err("GVT doesn't support 1GB entry\n"); 1214 return -EINVAL; 1215 default: 1216 GEM_BUG_ON(1); 1217 return -EINVAL; 1218 } 1219 1220 /* Successfully shadowed a 4K or 2M page (without splitting). */ 1221 pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT); 1222 ppgtt_set_shadow_entry(spt, &se, index); 1223 return 0; 1224 } 1225 1226 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt) 1227 { 1228 struct intel_vgpu *vgpu = spt->vgpu; 1229 struct intel_vgpu_ppgtt_spt *s; 1230 struct intel_gvt_gtt_entry se, ge; 1231 unsigned long i; 1232 int ret; 1233 1234 trace_spt_change(spt->vgpu->id, "born", spt, 1235 spt->guest_page.gfn, spt->shadow_page.type); 1236 1237 for_each_present_guest_entry(spt, &ge, i) { 1238 if (gtt_type_is_pt(get_next_pt_type(ge.type))) { 1239 s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge); 1240 if (IS_ERR(s)) { 1241 ret = PTR_ERR(s); 1242 goto fail; 1243 } 1244 ppgtt_get_shadow_entry(spt, &se, i); 1245 ppgtt_generate_shadow_entry(&se, s, &ge); 1246 ppgtt_set_shadow_entry(spt, &se, i); 1247 } else { 1248 ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge); 1249 if (ret) 1250 goto fail; 1251 } 1252 } 1253 return 0; 1254 fail: 1255 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", 1256 spt, ge.val64, ge.type); 1257 return ret; 1258 } 1259 1260 static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt, 1261 struct intel_gvt_gtt_entry *se, unsigned long index) 1262 { 1263 struct intel_vgpu *vgpu = spt->vgpu; 1264 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1265 int ret; 1266 1267 trace_spt_guest_change(spt->vgpu->id, "remove", spt, 1268 spt->shadow_page.type, se->val64, index); 1269 1270 gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n", 1271 se->type, index, se->val64); 1272 1273 if (!ops->test_present(se)) 1274 return 0; 1275 1276 if (ops->get_pfn(se) == 1277 vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn) 1278 return 0; 1279 1280 if (gtt_type_is_pt(get_next_pt_type(se->type))) { 1281 struct intel_vgpu_ppgtt_spt *s = 1282 intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se)); 1283 if (!s) { 1284 gvt_vgpu_err("fail to find guest page\n"); 1285 ret = -ENXIO; 1286 goto fail; 1287 } 1288 ret = ppgtt_invalidate_spt(s); 1289 if (ret) 1290 goto fail; 1291 } else { 1292 /* We don't setup 64K shadow entry so far. */ 1293 WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY, 1294 "suspicious 64K entry\n"); 1295 ppgtt_invalidate_pte(spt, se); 1296 } 1297 1298 return 0; 1299 fail: 1300 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", 1301 spt, se->val64, se->type); 1302 return ret; 1303 } 1304 1305 static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt, 1306 struct intel_gvt_gtt_entry *we, unsigned long index) 1307 { 1308 struct intel_vgpu *vgpu = spt->vgpu; 1309 struct intel_gvt_gtt_entry m; 1310 struct intel_vgpu_ppgtt_spt *s; 1311 int ret; 1312 1313 trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type, 1314 we->val64, index); 1315 1316 gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n", 1317 we->type, index, we->val64); 1318 1319 if (gtt_type_is_pt(get_next_pt_type(we->type))) { 1320 s = ppgtt_populate_spt_by_guest_entry(vgpu, we); 1321 if (IS_ERR(s)) { 1322 ret = PTR_ERR(s); 1323 goto fail; 1324 } 1325 ppgtt_get_shadow_entry(spt, &m, index); 1326 ppgtt_generate_shadow_entry(&m, s, we); 1327 ppgtt_set_shadow_entry(spt, &m, index); 1328 } else { 1329 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we); 1330 if (ret) 1331 goto fail; 1332 } 1333 return 0; 1334 fail: 1335 gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n", 1336 spt, we->val64, we->type); 1337 return ret; 1338 } 1339 1340 static int sync_oos_page(struct intel_vgpu *vgpu, 1341 struct intel_vgpu_oos_page *oos_page) 1342 { 1343 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1344 struct intel_gvt *gvt = vgpu->gvt; 1345 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 1346 struct intel_vgpu_ppgtt_spt *spt = oos_page->spt; 1347 struct intel_gvt_gtt_entry old, new; 1348 int index; 1349 int ret; 1350 1351 trace_oos_change(vgpu->id, "sync", oos_page->id, 1352 spt, spt->guest_page.type); 1353 1354 old.type = new.type = get_entry_type(spt->guest_page.type); 1355 old.val64 = new.val64 = 0; 1356 1357 for (index = 0; index < (I915_GTT_PAGE_SIZE >> 1358 info->gtt_entry_size_shift); index++) { 1359 ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu); 1360 ops->get_entry(NULL, &new, index, true, 1361 spt->guest_page.gfn << PAGE_SHIFT, vgpu); 1362 1363 if (old.val64 == new.val64 1364 && !test_and_clear_bit(index, spt->post_shadow_bitmap)) 1365 continue; 1366 1367 trace_oos_sync(vgpu->id, oos_page->id, 1368 spt, spt->guest_page.type, 1369 new.val64, index); 1370 1371 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new); 1372 if (ret) 1373 return ret; 1374 1375 ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu); 1376 } 1377 1378 spt->guest_page.write_cnt = 0; 1379 list_del_init(&spt->post_shadow_list); 1380 return 0; 1381 } 1382 1383 static int detach_oos_page(struct intel_vgpu *vgpu, 1384 struct intel_vgpu_oos_page *oos_page) 1385 { 1386 struct intel_gvt *gvt = vgpu->gvt; 1387 struct intel_vgpu_ppgtt_spt *spt = oos_page->spt; 1388 1389 trace_oos_change(vgpu->id, "detach", oos_page->id, 1390 spt, spt->guest_page.type); 1391 1392 spt->guest_page.write_cnt = 0; 1393 spt->guest_page.oos_page = NULL; 1394 oos_page->spt = NULL; 1395 1396 list_del_init(&oos_page->vm_list); 1397 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head); 1398 1399 return 0; 1400 } 1401 1402 static int attach_oos_page(struct intel_vgpu_oos_page *oos_page, 1403 struct intel_vgpu_ppgtt_spt *spt) 1404 { 1405 struct intel_gvt *gvt = spt->vgpu->gvt; 1406 int ret; 1407 1408 ret = intel_gvt_read_gpa(spt->vgpu, 1409 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, 1410 oos_page->mem, I915_GTT_PAGE_SIZE); 1411 if (ret) 1412 return ret; 1413 1414 oos_page->spt = spt; 1415 spt->guest_page.oos_page = oos_page; 1416 1417 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head); 1418 1419 trace_oos_change(spt->vgpu->id, "attach", oos_page->id, 1420 spt, spt->guest_page.type); 1421 return 0; 1422 } 1423 1424 static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt) 1425 { 1426 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; 1427 int ret; 1428 1429 ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn); 1430 if (ret) 1431 return ret; 1432 1433 trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id, 1434 spt, spt->guest_page.type); 1435 1436 list_del_init(&oos_page->vm_list); 1437 return sync_oos_page(spt->vgpu, oos_page); 1438 } 1439 1440 static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt) 1441 { 1442 struct intel_gvt *gvt = spt->vgpu->gvt; 1443 struct intel_gvt_gtt *gtt = &gvt->gtt; 1444 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; 1445 int ret; 1446 1447 WARN(oos_page, "shadow PPGTT page has already has a oos page\n"); 1448 1449 if (list_empty(>t->oos_page_free_list_head)) { 1450 oos_page = container_of(gtt->oos_page_use_list_head.next, 1451 struct intel_vgpu_oos_page, list); 1452 ret = ppgtt_set_guest_page_sync(oos_page->spt); 1453 if (ret) 1454 return ret; 1455 ret = detach_oos_page(spt->vgpu, oos_page); 1456 if (ret) 1457 return ret; 1458 } else 1459 oos_page = container_of(gtt->oos_page_free_list_head.next, 1460 struct intel_vgpu_oos_page, list); 1461 return attach_oos_page(oos_page, spt); 1462 } 1463 1464 static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt) 1465 { 1466 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; 1467 1468 if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n")) 1469 return -EINVAL; 1470 1471 trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id, 1472 spt, spt->guest_page.type); 1473 1474 list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head); 1475 return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn); 1476 } 1477 1478 /** 1479 * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU 1480 * @vgpu: a vGPU 1481 * 1482 * This function is called before submitting a guest workload to host, 1483 * to sync all the out-of-synced shadow for vGPU 1484 * 1485 * Returns: 1486 * Zero on success, negative error code if failed. 1487 */ 1488 int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu) 1489 { 1490 struct list_head *pos, *n; 1491 struct intel_vgpu_oos_page *oos_page; 1492 int ret; 1493 1494 if (!enable_out_of_sync) 1495 return 0; 1496 1497 list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) { 1498 oos_page = container_of(pos, 1499 struct intel_vgpu_oos_page, vm_list); 1500 ret = ppgtt_set_guest_page_sync(oos_page->spt); 1501 if (ret) 1502 return ret; 1503 } 1504 return 0; 1505 } 1506 1507 /* 1508 * The heart of PPGTT shadow page table. 1509 */ 1510 static int ppgtt_handle_guest_write_page_table( 1511 struct intel_vgpu_ppgtt_spt *spt, 1512 struct intel_gvt_gtt_entry *we, unsigned long index) 1513 { 1514 struct intel_vgpu *vgpu = spt->vgpu; 1515 int type = spt->shadow_page.type; 1516 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1517 struct intel_gvt_gtt_entry old_se; 1518 int new_present; 1519 int i, ret; 1520 1521 new_present = ops->test_present(we); 1522 1523 /* 1524 * Adding the new entry first and then removing the old one, that can 1525 * guarantee the ppgtt table is validated during the window between 1526 * adding and removal. 1527 */ 1528 ppgtt_get_shadow_entry(spt, &old_se, index); 1529 1530 if (new_present) { 1531 ret = ppgtt_handle_guest_entry_add(spt, we, index); 1532 if (ret) 1533 goto fail; 1534 } 1535 1536 ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index); 1537 if (ret) 1538 goto fail; 1539 1540 if (!new_present) { 1541 /* For 64KB splited entries, we need clear them all. */ 1542 if (ops->test_64k_splited(&old_se) && 1543 !(index % GTT_64K_PTE_STRIDE)) { 1544 gvt_vdbg_mm("remove splited 64K shadow entries\n"); 1545 for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { 1546 ops->clear_64k_splited(&old_se); 1547 ops->set_pfn(&old_se, 1548 vgpu->gtt.scratch_pt[type].page_mfn); 1549 ppgtt_set_shadow_entry(spt, &old_se, index + i); 1550 } 1551 } else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY || 1552 old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { 1553 ops->clear_pse(&old_se); 1554 ops->set_pfn(&old_se, 1555 vgpu->gtt.scratch_pt[type].page_mfn); 1556 ppgtt_set_shadow_entry(spt, &old_se, index); 1557 } else { 1558 ops->set_pfn(&old_se, 1559 vgpu->gtt.scratch_pt[type].page_mfn); 1560 ppgtt_set_shadow_entry(spt, &old_se, index); 1561 } 1562 } 1563 1564 return 0; 1565 fail: 1566 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n", 1567 spt, we->val64, we->type); 1568 return ret; 1569 } 1570 1571 1572 1573 static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt) 1574 { 1575 return enable_out_of_sync 1576 && gtt_type_is_pte_pt(spt->guest_page.type) 1577 && spt->guest_page.write_cnt >= 2; 1578 } 1579 1580 static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt, 1581 unsigned long index) 1582 { 1583 set_bit(index, spt->post_shadow_bitmap); 1584 if (!list_empty(&spt->post_shadow_list)) 1585 return; 1586 1587 list_add_tail(&spt->post_shadow_list, 1588 &spt->vgpu->gtt.post_shadow_list_head); 1589 } 1590 1591 /** 1592 * intel_vgpu_flush_post_shadow - flush the post shadow transactions 1593 * @vgpu: a vGPU 1594 * 1595 * This function is called before submitting a guest workload to host, 1596 * to flush all the post shadows for a vGPU. 1597 * 1598 * Returns: 1599 * Zero on success, negative error code if failed. 1600 */ 1601 int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) 1602 { 1603 struct list_head *pos, *n; 1604 struct intel_vgpu_ppgtt_spt *spt; 1605 struct intel_gvt_gtt_entry ge; 1606 unsigned long index; 1607 int ret; 1608 1609 list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) { 1610 spt = container_of(pos, struct intel_vgpu_ppgtt_spt, 1611 post_shadow_list); 1612 1613 for_each_set_bit(index, spt->post_shadow_bitmap, 1614 GTT_ENTRY_NUM_IN_ONE_PAGE) { 1615 ppgtt_get_guest_entry(spt, &ge, index); 1616 1617 ret = ppgtt_handle_guest_write_page_table(spt, 1618 &ge, index); 1619 if (ret) 1620 return ret; 1621 clear_bit(index, spt->post_shadow_bitmap); 1622 } 1623 list_del_init(&spt->post_shadow_list); 1624 } 1625 return 0; 1626 } 1627 1628 static int ppgtt_handle_guest_write_page_table_bytes( 1629 struct intel_vgpu_ppgtt_spt *spt, 1630 u64 pa, void *p_data, int bytes) 1631 { 1632 struct intel_vgpu *vgpu = spt->vgpu; 1633 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1634 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1635 struct intel_gvt_gtt_entry we, se; 1636 unsigned long index; 1637 int ret; 1638 1639 index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift; 1640 1641 ppgtt_get_guest_entry(spt, &we, index); 1642 1643 /* 1644 * For page table which has 64K gtt entry, only PTE#0, PTE#16, 1645 * PTE#32, ... PTE#496 are used. Unused PTEs update should be 1646 * ignored. 1647 */ 1648 if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY && 1649 (index % GTT_64K_PTE_STRIDE)) { 1650 gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n", 1651 index); 1652 return 0; 1653 } 1654 1655 if (bytes == info->gtt_entry_size) { 1656 ret = ppgtt_handle_guest_write_page_table(spt, &we, index); 1657 if (ret) 1658 return ret; 1659 } else { 1660 if (!test_bit(index, spt->post_shadow_bitmap)) { 1661 int type = spt->shadow_page.type; 1662 1663 ppgtt_get_shadow_entry(spt, &se, index); 1664 ret = ppgtt_handle_guest_entry_removal(spt, &se, index); 1665 if (ret) 1666 return ret; 1667 ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn); 1668 ppgtt_set_shadow_entry(spt, &se, index); 1669 } 1670 ppgtt_set_post_shadow(spt, index); 1671 } 1672 1673 if (!enable_out_of_sync) 1674 return 0; 1675 1676 spt->guest_page.write_cnt++; 1677 1678 if (spt->guest_page.oos_page) 1679 ops->set_entry(spt->guest_page.oos_page->mem, &we, index, 1680 false, 0, vgpu); 1681 1682 if (can_do_out_of_sync(spt)) { 1683 if (!spt->guest_page.oos_page) 1684 ppgtt_allocate_oos_page(spt); 1685 1686 ret = ppgtt_set_guest_page_oos(spt); 1687 if (ret < 0) 1688 return ret; 1689 } 1690 return 0; 1691 } 1692 1693 static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm) 1694 { 1695 struct intel_vgpu *vgpu = mm->vgpu; 1696 struct intel_gvt *gvt = vgpu->gvt; 1697 struct intel_gvt_gtt *gtt = &gvt->gtt; 1698 const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; 1699 struct intel_gvt_gtt_entry se; 1700 int index; 1701 1702 if (!mm->ppgtt_mm.shadowed) 1703 return; 1704 1705 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) { 1706 ppgtt_get_shadow_root_entry(mm, &se, index); 1707 1708 if (!ops->test_present(&se)) 1709 continue; 1710 1711 ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se); 1712 se.val64 = 0; 1713 ppgtt_set_shadow_root_entry(mm, &se, index); 1714 1715 trace_spt_guest_change(vgpu->id, "destroy root pointer", 1716 NULL, se.type, se.val64, index); 1717 } 1718 1719 mm->ppgtt_mm.shadowed = false; 1720 } 1721 1722 1723 static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm) 1724 { 1725 struct intel_vgpu *vgpu = mm->vgpu; 1726 struct intel_gvt *gvt = vgpu->gvt; 1727 struct intel_gvt_gtt *gtt = &gvt->gtt; 1728 const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; 1729 struct intel_vgpu_ppgtt_spt *spt; 1730 struct intel_gvt_gtt_entry ge, se; 1731 int index, ret; 1732 1733 if (mm->ppgtt_mm.shadowed) 1734 return 0; 1735 1736 if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) 1737 return -EINVAL; 1738 1739 mm->ppgtt_mm.shadowed = true; 1740 1741 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) { 1742 ppgtt_get_guest_root_entry(mm, &ge, index); 1743 1744 if (!ops->test_present(&ge)) 1745 continue; 1746 1747 trace_spt_guest_change(vgpu->id, __func__, NULL, 1748 ge.type, ge.val64, index); 1749 1750 spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge); 1751 if (IS_ERR(spt)) { 1752 gvt_vgpu_err("fail to populate guest root pointer\n"); 1753 ret = PTR_ERR(spt); 1754 goto fail; 1755 } 1756 ppgtt_generate_shadow_entry(&se, spt, &ge); 1757 ppgtt_set_shadow_root_entry(mm, &se, index); 1758 1759 trace_spt_guest_change(vgpu->id, "populate root pointer", 1760 NULL, se.type, se.val64, index); 1761 } 1762 1763 return 0; 1764 fail: 1765 invalidate_ppgtt_mm(mm); 1766 return ret; 1767 } 1768 1769 static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu) 1770 { 1771 struct intel_vgpu_mm *mm; 1772 1773 mm = kzalloc_obj(*mm); 1774 if (!mm) 1775 return NULL; 1776 1777 mm->vgpu = vgpu; 1778 kref_init(&mm->ref); 1779 atomic_set(&mm->pincount, 0); 1780 1781 return mm; 1782 } 1783 1784 static void vgpu_free_mm(struct intel_vgpu_mm *mm) 1785 { 1786 kfree(mm); 1787 } 1788 1789 /** 1790 * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU 1791 * @vgpu: a vGPU 1792 * @root_entry_type: ppgtt root entry type 1793 * @pdps: guest pdps. 1794 * 1795 * This function is used to create a ppgtt mm object for a vGPU. 1796 * 1797 * Returns: 1798 * Zero on success, negative error code in pointer if failed. 1799 */ 1800 struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, 1801 enum intel_gvt_gtt_type root_entry_type, u64 pdps[]) 1802 { 1803 struct intel_gvt *gvt = vgpu->gvt; 1804 struct intel_vgpu_mm *mm; 1805 int ret; 1806 1807 mm = vgpu_alloc_mm(vgpu); 1808 if (!mm) 1809 return ERR_PTR(-ENOMEM); 1810 1811 mm->type = INTEL_GVT_MM_PPGTT; 1812 1813 GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY && 1814 root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY); 1815 mm->ppgtt_mm.root_entry_type = root_entry_type; 1816 1817 INIT_LIST_HEAD(&mm->ppgtt_mm.list); 1818 INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list); 1819 INIT_LIST_HEAD(&mm->ppgtt_mm.link); 1820 1821 if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) 1822 mm->ppgtt_mm.guest_pdps[0] = pdps[0]; 1823 else 1824 memcpy(mm->ppgtt_mm.guest_pdps, pdps, 1825 sizeof(mm->ppgtt_mm.guest_pdps)); 1826 1827 ret = shadow_ppgtt_mm(mm); 1828 if (ret) { 1829 gvt_vgpu_err("failed to shadow ppgtt mm\n"); 1830 vgpu_free_mm(mm); 1831 return ERR_PTR(ret); 1832 } 1833 1834 list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head); 1835 1836 mutex_lock(&gvt->gtt.ppgtt_mm_lock); 1837 list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head); 1838 mutex_unlock(&gvt->gtt.ppgtt_mm_lock); 1839 1840 return mm; 1841 } 1842 1843 static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu) 1844 { 1845 struct intel_vgpu_mm *mm; 1846 unsigned long nr_entries; 1847 1848 mm = vgpu_alloc_mm(vgpu); 1849 if (!mm) 1850 return ERR_PTR(-ENOMEM); 1851 1852 mm->type = INTEL_GVT_MM_GGTT; 1853 1854 nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT; 1855 mm->ggtt_mm.virtual_ggtt = 1856 vzalloc(array_size(nr_entries, 1857 vgpu->gvt->device_info.gtt_entry_size)); 1858 if (!mm->ggtt_mm.virtual_ggtt) { 1859 vgpu_free_mm(mm); 1860 return ERR_PTR(-ENOMEM); 1861 } 1862 1863 mm->ggtt_mm.host_ggtt_aperture = vzalloc((vgpu_aperture_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64)); 1864 if (!mm->ggtt_mm.host_ggtt_aperture) { 1865 vfree(mm->ggtt_mm.virtual_ggtt); 1866 vgpu_free_mm(mm); 1867 return ERR_PTR(-ENOMEM); 1868 } 1869 1870 mm->ggtt_mm.host_ggtt_hidden = vzalloc((vgpu_hidden_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64)); 1871 if (!mm->ggtt_mm.host_ggtt_hidden) { 1872 vfree(mm->ggtt_mm.host_ggtt_aperture); 1873 vfree(mm->ggtt_mm.virtual_ggtt); 1874 vgpu_free_mm(mm); 1875 return ERR_PTR(-ENOMEM); 1876 } 1877 1878 return mm; 1879 } 1880 1881 /** 1882 * _intel_vgpu_mm_release - destroy a mm object 1883 * @mm_ref: a kref object 1884 * 1885 * This function is used to destroy a mm object for vGPU 1886 * 1887 */ 1888 void _intel_vgpu_mm_release(struct kref *mm_ref) 1889 { 1890 struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref); 1891 1892 if (GEM_WARN_ON(atomic_read(&mm->pincount))) 1893 gvt_err("vgpu mm pin count bug detected\n"); 1894 1895 if (mm->type == INTEL_GVT_MM_PPGTT) { 1896 list_del(&mm->ppgtt_mm.list); 1897 1898 mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); 1899 list_del(&mm->ppgtt_mm.lru_list); 1900 mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); 1901 1902 invalidate_ppgtt_mm(mm); 1903 } else { 1904 vfree(mm->ggtt_mm.virtual_ggtt); 1905 vfree(mm->ggtt_mm.host_ggtt_aperture); 1906 vfree(mm->ggtt_mm.host_ggtt_hidden); 1907 } 1908 1909 vgpu_free_mm(mm); 1910 } 1911 1912 /** 1913 * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object 1914 * @mm: a vGPU mm object 1915 * 1916 * This function is called when user doesn't want to use a vGPU mm object 1917 */ 1918 void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm) 1919 { 1920 atomic_dec_if_positive(&mm->pincount); 1921 } 1922 1923 /** 1924 * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object 1925 * @mm: target vgpu mm 1926 * 1927 * This function is called when user wants to use a vGPU mm object. If this 1928 * mm object hasn't been shadowed yet, the shadow will be populated at this 1929 * time. 1930 * 1931 * Returns: 1932 * Zero on success, negative error code if failed. 1933 */ 1934 int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm) 1935 { 1936 int ret; 1937 1938 atomic_inc(&mm->pincount); 1939 1940 if (mm->type == INTEL_GVT_MM_PPGTT) { 1941 ret = shadow_ppgtt_mm(mm); 1942 if (ret) 1943 return ret; 1944 1945 mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); 1946 list_move_tail(&mm->ppgtt_mm.lru_list, 1947 &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head); 1948 mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); 1949 } 1950 1951 return 0; 1952 } 1953 1954 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt) 1955 { 1956 struct intel_vgpu_mm *mm; 1957 struct list_head *pos, *n; 1958 1959 mutex_lock(&gvt->gtt.ppgtt_mm_lock); 1960 1961 list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) { 1962 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list); 1963 1964 if (atomic_read(&mm->pincount)) 1965 continue; 1966 1967 list_del_init(&mm->ppgtt_mm.lru_list); 1968 mutex_unlock(&gvt->gtt.ppgtt_mm_lock); 1969 invalidate_ppgtt_mm(mm); 1970 return 1; 1971 } 1972 mutex_unlock(&gvt->gtt.ppgtt_mm_lock); 1973 return 0; 1974 } 1975 1976 /* 1977 * GMA translation APIs. 1978 */ 1979 static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm, 1980 struct intel_gvt_gtt_entry *e, unsigned long index, bool guest) 1981 { 1982 struct intel_vgpu *vgpu = mm->vgpu; 1983 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1984 struct intel_vgpu_ppgtt_spt *s; 1985 1986 s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e)); 1987 if (!s) 1988 return -ENXIO; 1989 1990 if (!guest) 1991 ppgtt_get_shadow_entry(s, e, index); 1992 else 1993 ppgtt_get_guest_entry(s, e, index); 1994 return 0; 1995 } 1996 1997 /** 1998 * intel_vgpu_gma_to_gpa - translate a gma to GPA 1999 * @mm: mm object. could be a PPGTT or GGTT mm object 2000 * @gma: graphics memory address in this mm object 2001 * 2002 * This function is used to translate a graphics memory address in specific 2003 * graphics memory space to guest physical address. 2004 * 2005 * Returns: 2006 * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed. 2007 */ 2008 unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) 2009 { 2010 struct intel_vgpu *vgpu = mm->vgpu; 2011 struct intel_gvt *gvt = vgpu->gvt; 2012 const struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops; 2013 const struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops; 2014 unsigned long gpa = INTEL_GVT_INVALID_ADDR; 2015 unsigned long gma_index[4]; 2016 struct intel_gvt_gtt_entry e; 2017 int i, levels = 0; 2018 int ret; 2019 2020 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT && 2021 mm->type != INTEL_GVT_MM_PPGTT); 2022 2023 if (mm->type == INTEL_GVT_MM_GGTT) { 2024 if (!vgpu_gmadr_is_valid(vgpu, gma)) 2025 goto err; 2026 2027 ggtt_get_guest_entry(mm, &e, 2028 gma_ops->gma_to_ggtt_pte_index(gma)); 2029 2030 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) 2031 + (gma & ~I915_GTT_PAGE_MASK); 2032 2033 trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa); 2034 } else { 2035 switch (mm->ppgtt_mm.root_entry_type) { 2036 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY: 2037 ppgtt_get_shadow_root_entry(mm, &e, 0); 2038 2039 gma_index[0] = gma_ops->gma_to_pml4_index(gma); 2040 gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma); 2041 gma_index[2] = gma_ops->gma_to_pde_index(gma); 2042 gma_index[3] = gma_ops->gma_to_pte_index(gma); 2043 levels = 4; 2044 break; 2045 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY: 2046 ppgtt_get_shadow_root_entry(mm, &e, 2047 gma_ops->gma_to_l3_pdp_index(gma)); 2048 2049 gma_index[0] = gma_ops->gma_to_pde_index(gma); 2050 gma_index[1] = gma_ops->gma_to_pte_index(gma); 2051 levels = 2; 2052 break; 2053 default: 2054 GEM_BUG_ON(1); 2055 } 2056 2057 /* walk the shadow page table and get gpa from guest entry */ 2058 for (i = 0; i < levels; i++) { 2059 ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i], 2060 (i == levels - 1)); 2061 if (ret) 2062 goto err; 2063 2064 if (!pte_ops->test_present(&e)) { 2065 gvt_dbg_core("GMA 0x%lx is not present\n", gma); 2066 goto err; 2067 } 2068 } 2069 2070 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) + 2071 (gma & ~I915_GTT_PAGE_MASK); 2072 trace_gma_translate(vgpu->id, "ppgtt", 0, 2073 mm->ppgtt_mm.root_entry_type, gma, gpa); 2074 } 2075 2076 return gpa; 2077 err: 2078 gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma); 2079 return INTEL_GVT_INVALID_ADDR; 2080 } 2081 2082 static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, 2083 unsigned int off, void *p_data, unsigned int bytes) 2084 { 2085 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; 2086 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 2087 unsigned long index = off >> info->gtt_entry_size_shift; 2088 unsigned long gma; 2089 struct intel_gvt_gtt_entry e; 2090 2091 if (bytes != 4 && bytes != 8) 2092 return -EINVAL; 2093 2094 gma = index << I915_GTT_PAGE_SHIFT; 2095 if (!intel_gvt_ggtt_validate_range(vgpu, 2096 gma, 1 << I915_GTT_PAGE_SHIFT)) { 2097 gvt_dbg_mm("read invalid ggtt at 0x%lx\n", gma); 2098 memset(p_data, 0, bytes); 2099 return 0; 2100 } 2101 2102 ggtt_get_guest_entry(ggtt_mm, &e, index); 2103 memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)), 2104 bytes); 2105 return 0; 2106 } 2107 2108 /** 2109 * intel_vgpu_emulate_ggtt_mmio_read - emulate GTT MMIO register read 2110 * @vgpu: a vGPU 2111 * @off: register offset 2112 * @p_data: data will be returned to guest 2113 * @bytes: data length 2114 * 2115 * This function is used to emulate the GTT MMIO register read 2116 * 2117 * Returns: 2118 * Zero on success, error code if failed. 2119 */ 2120 int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, 2121 void *p_data, unsigned int bytes) 2122 { 2123 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 2124 int ret; 2125 2126 if (bytes != 4 && bytes != 8) 2127 return -EINVAL; 2128 2129 off -= info->gtt_start_offset; 2130 ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes); 2131 return ret; 2132 } 2133 2134 static void ggtt_invalidate_pte(struct intel_vgpu *vgpu, 2135 struct intel_gvt_gtt_entry *entry) 2136 { 2137 const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; 2138 unsigned long pfn; 2139 2140 pfn = pte_ops->get_pfn(entry); 2141 if (pfn != vgpu->gvt->gtt.scratch_mfn) 2142 intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT); 2143 } 2144 2145 static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, 2146 void *p_data, unsigned int bytes) 2147 { 2148 struct intel_gvt *gvt = vgpu->gvt; 2149 const struct intel_gvt_device_info *info = &gvt->device_info; 2150 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; 2151 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 2152 unsigned long g_gtt_index = off >> info->gtt_entry_size_shift; 2153 unsigned long gma, gfn; 2154 struct intel_gvt_gtt_entry e = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE}; 2155 struct intel_gvt_gtt_entry m = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE}; 2156 dma_addr_t dma_addr; 2157 int ret; 2158 struct intel_gvt_partial_pte *partial_pte, *pos, *n; 2159 bool partial_update = false; 2160 2161 if (bytes != 4 && bytes != 8) 2162 return -EINVAL; 2163 2164 gma = g_gtt_index << I915_GTT_PAGE_SHIFT; 2165 2166 /* the VM may configure the whole GM space when ballooning is used */ 2167 if (!vgpu_gmadr_is_valid(vgpu, gma)) 2168 return 0; 2169 2170 e.type = GTT_TYPE_GGTT_PTE; 2171 memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data, 2172 bytes); 2173 2174 /* If ggtt entry size is 8 bytes, and it's split into two 4 bytes 2175 * write, save the first 4 bytes in a list and update virtual 2176 * PTE. Only update shadow PTE when the second 4 bytes comes. 2177 */ 2178 if (bytes < info->gtt_entry_size) { 2179 bool found = false; 2180 2181 list_for_each_entry_safe(pos, n, 2182 &ggtt_mm->ggtt_mm.partial_pte_list, list) { 2183 if (g_gtt_index == pos->offset >> 2184 info->gtt_entry_size_shift) { 2185 if (off != pos->offset) { 2186 /* the second partial part*/ 2187 int last_off = pos->offset & 2188 (info->gtt_entry_size - 1); 2189 2190 memcpy((void *)&e.val64 + last_off, 2191 (void *)&pos->data + last_off, 2192 bytes); 2193 2194 list_del(&pos->list); 2195 kfree(pos); 2196 found = true; 2197 break; 2198 } 2199 2200 /* update of the first partial part */ 2201 pos->data = e.val64; 2202 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); 2203 return 0; 2204 } 2205 } 2206 2207 if (!found) { 2208 /* the first partial part */ 2209 partial_pte = kzalloc_obj(*partial_pte); 2210 if (!partial_pte) 2211 return -ENOMEM; 2212 partial_pte->offset = off; 2213 partial_pte->data = e.val64; 2214 list_add_tail(&partial_pte->list, 2215 &ggtt_mm->ggtt_mm.partial_pte_list); 2216 partial_update = true; 2217 } 2218 } 2219 2220 if (!partial_update && (ops->test_present(&e))) { 2221 gfn = ops->get_pfn(&e); 2222 m.val64 = e.val64; 2223 m.type = e.type; 2224 2225 ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, 2226 &dma_addr); 2227 if (ret) { 2228 gvt_vgpu_err("fail to populate guest ggtt entry\n"); 2229 /* guest driver may read/write the entry when partial 2230 * update the entry in this situation p2m will fail 2231 * setting the shadow entry to point to a scratch page 2232 */ 2233 ops->set_pfn(&m, gvt->gtt.scratch_mfn); 2234 } else 2235 ops->set_pfn(&m, dma_addr >> PAGE_SHIFT); 2236 } else { 2237 ops->set_pfn(&m, gvt->gtt.scratch_mfn); 2238 ops->clear_present(&m); 2239 } 2240 2241 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); 2242 2243 ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index); 2244 ggtt_invalidate_pte(vgpu, &e); 2245 2246 ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index); 2247 ggtt_invalidate(gvt->gt); 2248 return 0; 2249 } 2250 2251 /* 2252 * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write 2253 * @vgpu: a vGPU 2254 * @off: register offset 2255 * @p_data: data from guest write 2256 * @bytes: data length 2257 * 2258 * This function is used to emulate the GTT MMIO register write 2259 * 2260 * Returns: 2261 * Zero on success, error code if failed. 2262 */ 2263 int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, 2264 unsigned int off, void *p_data, unsigned int bytes) 2265 { 2266 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 2267 int ret; 2268 struct intel_vgpu_submission *s = &vgpu->submission; 2269 struct intel_engine_cs *engine; 2270 int i; 2271 2272 if (bytes != 4 && bytes != 8) 2273 return -EINVAL; 2274 2275 off -= info->gtt_start_offset; 2276 ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes); 2277 2278 /* if ggtt of last submitted context is written, 2279 * that context is probably got unpinned. 2280 * Set last shadowed ctx to invalid. 2281 */ 2282 for_each_engine(engine, vgpu->gvt->gt, i) { 2283 if (!s->last_ctx[i].valid) 2284 continue; 2285 2286 if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift)) 2287 s->last_ctx[i].valid = false; 2288 } 2289 return ret; 2290 } 2291 2292 static int alloc_scratch_pages(struct intel_vgpu *vgpu, 2293 enum intel_gvt_gtt_type type) 2294 { 2295 struct drm_i915_private *i915 = vgpu->gvt->gt->i915; 2296 struct intel_vgpu_gtt *gtt = &vgpu->gtt; 2297 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 2298 int page_entry_num = I915_GTT_PAGE_SIZE >> 2299 vgpu->gvt->device_info.gtt_entry_size_shift; 2300 void *scratch_pt; 2301 int i; 2302 struct device *dev = vgpu->gvt->gt->i915->drm.dev; 2303 dma_addr_t daddr; 2304 2305 if (drm_WARN_ON(&i915->drm, 2306 type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX)) 2307 return -EINVAL; 2308 2309 scratch_pt = (void *)get_zeroed_page(GFP_KERNEL); 2310 if (!scratch_pt) { 2311 gvt_vgpu_err("fail to allocate scratch page\n"); 2312 return -ENOMEM; 2313 } 2314 2315 daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, 4096, DMA_BIDIRECTIONAL); 2316 if (dma_mapping_error(dev, daddr)) { 2317 gvt_vgpu_err("fail to dmamap scratch_pt\n"); 2318 __free_page(virt_to_page(scratch_pt)); 2319 return -ENOMEM; 2320 } 2321 gtt->scratch_pt[type].page_mfn = 2322 (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); 2323 gtt->scratch_pt[type].page = virt_to_page(scratch_pt); 2324 gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n", 2325 vgpu->id, type, gtt->scratch_pt[type].page_mfn); 2326 2327 /* Build the tree by full filled the scratch pt with the entries which 2328 * point to the next level scratch pt or scratch page. The 2329 * scratch_pt[type] indicate the scratch pt/scratch page used by the 2330 * 'type' pt. 2331 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by 2332 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self 2333 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn. 2334 */ 2335 if (type > GTT_TYPE_PPGTT_PTE_PT) { 2336 struct intel_gvt_gtt_entry se; 2337 2338 memset(&se, 0, sizeof(struct intel_gvt_gtt_entry)); 2339 se.type = get_entry_type(type - 1); 2340 ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn); 2341 2342 /* The entry parameters like present/writeable/cache type 2343 * set to the same as i915's scratch page tree. 2344 */ 2345 se.val64 |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW; 2346 if (type == GTT_TYPE_PPGTT_PDE_PT) 2347 se.val64 |= PPAT_CACHED; 2348 2349 for (i = 0; i < page_entry_num; i++) 2350 ops->set_entry(scratch_pt, &se, i, false, 0, vgpu); 2351 } 2352 2353 return 0; 2354 } 2355 2356 static int release_scratch_page_tree(struct intel_vgpu *vgpu) 2357 { 2358 int i; 2359 struct device *dev = vgpu->gvt->gt->i915->drm.dev; 2360 dma_addr_t daddr; 2361 2362 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { 2363 if (vgpu->gtt.scratch_pt[i].page != NULL) { 2364 daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn << 2365 I915_GTT_PAGE_SHIFT); 2366 dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL); 2367 __free_page(vgpu->gtt.scratch_pt[i].page); 2368 vgpu->gtt.scratch_pt[i].page = NULL; 2369 vgpu->gtt.scratch_pt[i].page_mfn = 0; 2370 } 2371 } 2372 2373 return 0; 2374 } 2375 2376 static int create_scratch_page_tree(struct intel_vgpu *vgpu) 2377 { 2378 int i, ret; 2379 2380 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { 2381 ret = alloc_scratch_pages(vgpu, i); 2382 if (ret) 2383 goto err; 2384 } 2385 2386 return 0; 2387 2388 err: 2389 release_scratch_page_tree(vgpu); 2390 return ret; 2391 } 2392 2393 /** 2394 * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization 2395 * @vgpu: a vGPU 2396 * 2397 * This function is used to initialize per-vGPU graphics memory virtualization 2398 * components. 2399 * 2400 * Returns: 2401 * Zero on success, error code if failed. 2402 */ 2403 int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) 2404 { 2405 struct intel_vgpu_gtt *gtt = &vgpu->gtt; 2406 2407 INIT_RADIX_TREE(>t->spt_tree, GFP_KERNEL); 2408 2409 INIT_LIST_HEAD(>t->ppgtt_mm_list_head); 2410 INIT_LIST_HEAD(>t->oos_page_list_head); 2411 INIT_LIST_HEAD(>t->post_shadow_list_head); 2412 2413 gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu); 2414 if (IS_ERR(gtt->ggtt_mm)) { 2415 gvt_vgpu_err("fail to create mm for ggtt.\n"); 2416 return PTR_ERR(gtt->ggtt_mm); 2417 } 2418 2419 intel_vgpu_reset_ggtt(vgpu, false); 2420 2421 INIT_LIST_HEAD(>t->ggtt_mm->ggtt_mm.partial_pte_list); 2422 2423 return create_scratch_page_tree(vgpu); 2424 } 2425 2426 void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) 2427 { 2428 struct list_head *pos, *n; 2429 struct intel_vgpu_mm *mm; 2430 2431 list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { 2432 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); 2433 intel_vgpu_destroy_mm(mm); 2434 } 2435 2436 if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head))) 2437 gvt_err("vgpu ppgtt mm is not fully destroyed\n"); 2438 2439 if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) { 2440 gvt_err("Why we still has spt not freed?\n"); 2441 ppgtt_free_all_spt(vgpu); 2442 } 2443 } 2444 2445 static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu) 2446 { 2447 struct intel_gvt_partial_pte *pos, *next; 2448 2449 list_for_each_entry_safe(pos, next, 2450 &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list, 2451 list) { 2452 gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n", 2453 pos->offset, pos->data); 2454 kfree(pos); 2455 } 2456 intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm); 2457 vgpu->gtt.ggtt_mm = NULL; 2458 } 2459 2460 /** 2461 * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization 2462 * @vgpu: a vGPU 2463 * 2464 * This function is used to clean up per-vGPU graphics memory virtualization 2465 * components. 2466 * 2467 * Returns: 2468 * Zero on success, error code if failed. 2469 */ 2470 void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu) 2471 { 2472 intel_vgpu_destroy_all_ppgtt_mm(vgpu); 2473 intel_vgpu_destroy_ggtt_mm(vgpu); 2474 release_scratch_page_tree(vgpu); 2475 } 2476 2477 static void clean_spt_oos(struct intel_gvt *gvt) 2478 { 2479 struct intel_gvt_gtt *gtt = &gvt->gtt; 2480 struct list_head *pos, *n; 2481 struct intel_vgpu_oos_page *oos_page; 2482 2483 WARN(!list_empty(>t->oos_page_use_list_head), 2484 "someone is still using oos page\n"); 2485 2486 list_for_each_safe(pos, n, >t->oos_page_free_list_head) { 2487 oos_page = container_of(pos, struct intel_vgpu_oos_page, list); 2488 list_del(&oos_page->list); 2489 free_page((unsigned long)oos_page->mem); 2490 kfree(oos_page); 2491 } 2492 } 2493 2494 static int setup_spt_oos(struct intel_gvt *gvt) 2495 { 2496 struct intel_gvt_gtt *gtt = &gvt->gtt; 2497 struct intel_vgpu_oos_page *oos_page; 2498 int i; 2499 int ret; 2500 2501 INIT_LIST_HEAD(>t->oos_page_free_list_head); 2502 INIT_LIST_HEAD(>t->oos_page_use_list_head); 2503 2504 for (i = 0; i < preallocated_oos_pages; i++) { 2505 oos_page = kzalloc_obj(*oos_page); 2506 if (!oos_page) { 2507 ret = -ENOMEM; 2508 goto fail; 2509 } 2510 oos_page->mem = (void *)__get_free_pages(GFP_KERNEL, 0); 2511 if (!oos_page->mem) { 2512 ret = -ENOMEM; 2513 kfree(oos_page); 2514 goto fail; 2515 } 2516 2517 INIT_LIST_HEAD(&oos_page->list); 2518 INIT_LIST_HEAD(&oos_page->vm_list); 2519 oos_page->id = i; 2520 list_add_tail(&oos_page->list, >t->oos_page_free_list_head); 2521 } 2522 2523 gvt_dbg_mm("%d oos pages preallocated\n", i); 2524 2525 return 0; 2526 fail: 2527 clean_spt_oos(gvt); 2528 return ret; 2529 } 2530 2531 /** 2532 * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object 2533 * @vgpu: a vGPU 2534 * @pdps: pdp root array 2535 * 2536 * This function is used to find a PPGTT mm object from mm object pool 2537 * 2538 * Returns: 2539 * pointer to mm object on success, NULL if failed. 2540 */ 2541 struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, 2542 u64 pdps[]) 2543 { 2544 struct intel_vgpu_mm *mm; 2545 struct list_head *pos; 2546 2547 list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) { 2548 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); 2549 2550 switch (mm->ppgtt_mm.root_entry_type) { 2551 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY: 2552 if (pdps[0] == mm->ppgtt_mm.guest_pdps[0]) 2553 return mm; 2554 break; 2555 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY: 2556 if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps, 2557 sizeof(mm->ppgtt_mm.guest_pdps))) 2558 return mm; 2559 break; 2560 default: 2561 GEM_BUG_ON(1); 2562 } 2563 } 2564 return NULL; 2565 } 2566 2567 /** 2568 * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object. 2569 * @vgpu: a vGPU 2570 * @root_entry_type: ppgtt root entry type 2571 * @pdps: guest pdps 2572 * 2573 * This function is used to find or create a PPGTT mm object from a guest. 2574 * 2575 * Returns: 2576 * Zero on success, negative error code if failed. 2577 */ 2578 struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu, 2579 enum intel_gvt_gtt_type root_entry_type, u64 pdps[]) 2580 { 2581 struct intel_vgpu_mm *mm; 2582 2583 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); 2584 if (mm) { 2585 intel_vgpu_mm_get(mm); 2586 } else { 2587 mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps); 2588 if (IS_ERR(mm)) 2589 gvt_vgpu_err("fail to create mm\n"); 2590 } 2591 return mm; 2592 } 2593 2594 /** 2595 * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object. 2596 * @vgpu: a vGPU 2597 * @pdps: guest pdps 2598 * 2599 * This function is used to find a PPGTT mm object from a guest and destroy it. 2600 * 2601 * Returns: 2602 * Zero on success, negative error code if failed. 2603 */ 2604 int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]) 2605 { 2606 struct intel_vgpu_mm *mm; 2607 2608 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); 2609 if (!mm) { 2610 gvt_vgpu_err("fail to find ppgtt instance.\n"); 2611 return -EINVAL; 2612 } 2613 intel_vgpu_mm_put(mm); 2614 return 0; 2615 } 2616 2617 /** 2618 * intel_gvt_init_gtt - initialize mm components of a GVT device 2619 * @gvt: GVT device 2620 * 2621 * This function is called at the initialization stage, to initialize 2622 * the mm components of a GVT device. 2623 * 2624 * Returns: 2625 * zero on success, negative error code if failed. 2626 */ 2627 int intel_gvt_init_gtt(struct intel_gvt *gvt) 2628 { 2629 int ret; 2630 void *page; 2631 struct device *dev = gvt->gt->i915->drm.dev; 2632 dma_addr_t daddr; 2633 2634 gvt_dbg_core("init gtt\n"); 2635 2636 gvt->gtt.pte_ops = &gen8_gtt_pte_ops; 2637 gvt->gtt.gma_ops = &gen8_gtt_gma_ops; 2638 2639 page = (void *)get_zeroed_page(GFP_KERNEL); 2640 if (!page) { 2641 gvt_err("fail to allocate scratch ggtt page\n"); 2642 return -ENOMEM; 2643 } 2644 2645 daddr = dma_map_page(dev, virt_to_page(page), 0, 2646 4096, DMA_BIDIRECTIONAL); 2647 if (dma_mapping_error(dev, daddr)) { 2648 gvt_err("fail to dmamap scratch ggtt page\n"); 2649 __free_page(virt_to_page(page)); 2650 return -ENOMEM; 2651 } 2652 2653 gvt->gtt.scratch_page = virt_to_page(page); 2654 gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); 2655 2656 if (enable_out_of_sync) { 2657 ret = setup_spt_oos(gvt); 2658 if (ret) { 2659 gvt_err("fail to initialize SPT oos\n"); 2660 dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL); 2661 __free_page(gvt->gtt.scratch_page); 2662 return ret; 2663 } 2664 } 2665 INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head); 2666 mutex_init(&gvt->gtt.ppgtt_mm_lock); 2667 return 0; 2668 } 2669 2670 /** 2671 * intel_gvt_clean_gtt - clean up mm components of a GVT device 2672 * @gvt: GVT device 2673 * 2674 * This function is called at the driver unloading stage, to clean up 2675 * the mm components of a GVT device. 2676 * 2677 */ 2678 void intel_gvt_clean_gtt(struct intel_gvt *gvt) 2679 { 2680 struct device *dev = gvt->gt->i915->drm.dev; 2681 dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn << 2682 I915_GTT_PAGE_SHIFT); 2683 2684 dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL); 2685 2686 __free_page(gvt->gtt.scratch_page); 2687 2688 if (enable_out_of_sync) 2689 clean_spt_oos(gvt); 2690 } 2691 2692 /** 2693 * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances 2694 * @vgpu: a vGPU 2695 * 2696 * This function is called when invalidate all PPGTT instances of a vGPU. 2697 * 2698 */ 2699 void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu) 2700 { 2701 struct list_head *pos, *n; 2702 struct intel_vgpu_mm *mm; 2703 2704 list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { 2705 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); 2706 if (mm->type == INTEL_GVT_MM_PPGTT) { 2707 mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock); 2708 list_del_init(&mm->ppgtt_mm.lru_list); 2709 mutex_unlock(&vgpu->gvt->gtt.ppgtt_mm_lock); 2710 if (mm->ppgtt_mm.shadowed) 2711 invalidate_ppgtt_mm(mm); 2712 } 2713 } 2714 } 2715 2716 /** 2717 * intel_vgpu_reset_ggtt - reset the GGTT entry 2718 * @vgpu: a vGPU 2719 * @invalidate_old: invalidate old entries 2720 * 2721 * This function is called at the vGPU create stage 2722 * to reset all the GGTT entries. 2723 * 2724 */ 2725 void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old) 2726 { 2727 struct intel_gvt *gvt = vgpu->gvt; 2728 const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; 2729 struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE}; 2730 struct intel_gvt_gtt_entry old_entry; 2731 u32 index; 2732 u32 num_entries; 2733 2734 pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn); 2735 pte_ops->set_present(&entry); 2736 2737 index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; 2738 num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT; 2739 while (num_entries--) { 2740 if (invalidate_old) { 2741 ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index); 2742 ggtt_invalidate_pte(vgpu, &old_entry); 2743 } 2744 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); 2745 } 2746 2747 index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; 2748 num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; 2749 while (num_entries--) { 2750 if (invalidate_old) { 2751 ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index); 2752 ggtt_invalidate_pte(vgpu, &old_entry); 2753 } 2754 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); 2755 } 2756 2757 ggtt_invalidate(gvt->gt); 2758 } 2759 2760 /** 2761 * intel_gvt_restore_ggtt - restore all vGPU's ggtt entries 2762 * @gvt: intel gvt device 2763 * 2764 * This function is called at driver resume stage to restore 2765 * GGTT entries of every vGPU. 2766 * 2767 */ 2768 void intel_gvt_restore_ggtt(struct intel_gvt *gvt) 2769 { 2770 struct intel_vgpu *vgpu; 2771 struct intel_vgpu_mm *mm; 2772 int id; 2773 gen8_pte_t pte; 2774 u32 idx, num_low, num_hi, offset; 2775 2776 /* Restore dirty host ggtt for all vGPUs */ 2777 idr_for_each_entry(&(gvt)->vgpu_idr, vgpu, id) { 2778 mm = vgpu->gtt.ggtt_mm; 2779 2780 num_low = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT; 2781 offset = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; 2782 for (idx = 0; idx < num_low; idx++) { 2783 pte = mm->ggtt_mm.host_ggtt_aperture[idx]; 2784 if (pte & GEN8_PAGE_PRESENT) 2785 write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte); 2786 } 2787 2788 num_hi = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; 2789 offset = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; 2790 for (idx = 0; idx < num_hi; idx++) { 2791 pte = mm->ggtt_mm.host_ggtt_hidden[idx]; 2792 if (pte & GEN8_PAGE_PRESENT) 2793 write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte); 2794 } 2795 } 2796 } 2797