1 /* 2 * GTT virtualization 3 * 4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Zhi Wang <zhi.a.wang@intel.com> 27 * Zhenyu Wang <zhenyuw@linux.intel.com> 28 * Xiao Zheng <xiao.zheng@intel.com> 29 * 30 * Contributors: 31 * Min He <min.he@intel.com> 32 * Bing Niu <bing.niu@intel.com> 33 * 34 */ 35 36 #include <drm/drm_print.h> 37 38 #include "i915_drv.h" 39 #include "gvt.h" 40 #include "i915_pvinfo.h" 41 #include "trace.h" 42 43 #include "gt/intel_gt_regs.h" 44 #include <linux/vmalloc.h> 45 46 #if defined(VERBOSE_DEBUG) 47 #define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args) 48 #else 49 #define gvt_vdbg_mm(fmt, args...) 50 #endif 51 52 static bool enable_out_of_sync = false; 53 static int preallocated_oos_pages = 8192; 54 55 /* 56 * validate a gm address and related range size, 57 * translate it to host gm address 58 */ 59 bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size) 60 { 61 if (size == 0) 62 return vgpu_gmadr_is_valid(vgpu, addr); 63 64 if (vgpu_gmadr_is_aperture(vgpu, addr) && 65 vgpu_gmadr_is_aperture(vgpu, addr + size - 1)) 66 return true; 67 else if (vgpu_gmadr_is_hidden(vgpu, addr) && 68 vgpu_gmadr_is_hidden(vgpu, addr + size - 1)) 69 return true; 70 71 gvt_dbg_mm("Invalid ggtt range at 0x%llx, size: 0x%x\n", 72 addr, size); 73 return false; 74 } 75 76 #define gtt_type_is_entry(type) \ 77 (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \ 78 && type != GTT_TYPE_PPGTT_PTE_ENTRY \ 79 && type != GTT_TYPE_PPGTT_ROOT_ENTRY) 80 81 #define gtt_type_is_pt(type) \ 82 (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) 83 84 #define gtt_type_is_pte_pt(type) \ 85 (type == GTT_TYPE_PPGTT_PTE_PT) 86 87 #define gtt_type_is_root_pointer(type) \ 88 (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY) 89 90 #define gtt_init_entry(e, t, p, v) do { \ 91 (e)->type = t; \ 92 (e)->pdev = p; \ 93 memcpy(&(e)->val64, &v, sizeof(v)); \ 94 } while (0) 95 96 /* 97 * Mappings between GTT_TYPE* enumerations. 98 * Following information can be found according to the given type: 99 * - type of next level page table 100 * - type of entry inside this level page table 101 * - type of entry with PSE set 102 * 103 * If the given type doesn't have such a kind of information, 104 * e.g. give a l4 root entry type, then request to get its PSE type, 105 * give a PTE page table type, then request to get its next level page 106 * table type, as we know l4 root entry doesn't have a PSE bit, 107 * and a PTE page table doesn't have a next level page table type, 108 * GTT_TYPE_INVALID will be returned. This is useful when traversing a 109 * page table. 110 */ 111 112 struct gtt_type_table_entry { 113 int entry_type; 114 int pt_type; 115 int next_pt_type; 116 int pse_entry_type; 117 }; 118 119 #define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \ 120 [type] = { \ 121 .entry_type = e_type, \ 122 .pt_type = cpt_type, \ 123 .next_pt_type = npt_type, \ 124 .pse_entry_type = pse_type, \ 125 } 126 127 static const struct gtt_type_table_entry gtt_type_table[] = { 128 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY, 129 GTT_TYPE_PPGTT_ROOT_L4_ENTRY, 130 GTT_TYPE_INVALID, 131 GTT_TYPE_PPGTT_PML4_PT, 132 GTT_TYPE_INVALID), 133 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT, 134 GTT_TYPE_PPGTT_PML4_ENTRY, 135 GTT_TYPE_PPGTT_PML4_PT, 136 GTT_TYPE_PPGTT_PDP_PT, 137 GTT_TYPE_INVALID), 138 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY, 139 GTT_TYPE_PPGTT_PML4_ENTRY, 140 GTT_TYPE_PPGTT_PML4_PT, 141 GTT_TYPE_PPGTT_PDP_PT, 142 GTT_TYPE_INVALID), 143 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT, 144 GTT_TYPE_PPGTT_PDP_ENTRY, 145 GTT_TYPE_PPGTT_PDP_PT, 146 GTT_TYPE_PPGTT_PDE_PT, 147 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 148 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY, 149 GTT_TYPE_PPGTT_ROOT_L3_ENTRY, 150 GTT_TYPE_INVALID, 151 GTT_TYPE_PPGTT_PDE_PT, 152 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 153 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY, 154 GTT_TYPE_PPGTT_PDP_ENTRY, 155 GTT_TYPE_PPGTT_PDP_PT, 156 GTT_TYPE_PPGTT_PDE_PT, 157 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 158 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT, 159 GTT_TYPE_PPGTT_PDE_ENTRY, 160 GTT_TYPE_PPGTT_PDE_PT, 161 GTT_TYPE_PPGTT_PTE_PT, 162 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 163 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY, 164 GTT_TYPE_PPGTT_PDE_ENTRY, 165 GTT_TYPE_PPGTT_PDE_PT, 166 GTT_TYPE_PPGTT_PTE_PT, 167 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 168 /* We take IPS bit as 'PSE' for PTE level. */ 169 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT, 170 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 171 GTT_TYPE_PPGTT_PTE_PT, 172 GTT_TYPE_INVALID, 173 GTT_TYPE_PPGTT_PTE_64K_ENTRY), 174 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY, 175 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 176 GTT_TYPE_PPGTT_PTE_PT, 177 GTT_TYPE_INVALID, 178 GTT_TYPE_PPGTT_PTE_64K_ENTRY), 179 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY, 180 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 181 GTT_TYPE_PPGTT_PTE_PT, 182 GTT_TYPE_INVALID, 183 GTT_TYPE_PPGTT_PTE_64K_ENTRY), 184 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY, 185 GTT_TYPE_PPGTT_PDE_ENTRY, 186 GTT_TYPE_PPGTT_PDE_PT, 187 GTT_TYPE_INVALID, 188 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 189 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY, 190 GTT_TYPE_PPGTT_PDP_ENTRY, 191 GTT_TYPE_PPGTT_PDP_PT, 192 GTT_TYPE_INVALID, 193 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 194 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE, 195 GTT_TYPE_GGTT_PTE, 196 GTT_TYPE_INVALID, 197 GTT_TYPE_INVALID, 198 GTT_TYPE_INVALID), 199 }; 200 201 static inline int get_next_pt_type(int type) 202 { 203 return gtt_type_table[type].next_pt_type; 204 } 205 206 static inline int get_entry_type(int type) 207 { 208 return gtt_type_table[type].entry_type; 209 } 210 211 static inline int get_pse_type(int type) 212 { 213 return gtt_type_table[type].pse_entry_type; 214 } 215 216 static u64 read_pte64(struct i915_ggtt *ggtt, unsigned long index) 217 { 218 void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index; 219 220 return readq(addr); 221 } 222 223 static void ggtt_invalidate(struct intel_gt *gt) 224 { 225 intel_wakeref_t wakeref; 226 227 wakeref = mmio_hw_access_pre(gt); 228 intel_uncore_write(gt->uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 229 mmio_hw_access_post(gt, wakeref); 230 } 231 232 static void write_pte64(struct i915_ggtt *ggtt, unsigned long index, u64 pte) 233 { 234 void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index; 235 236 writeq(pte, addr); 237 } 238 239 static inline int gtt_get_entry64(void *pt, 240 struct intel_gvt_gtt_entry *e, 241 unsigned long index, bool hypervisor_access, unsigned long gpa, 242 struct intel_vgpu *vgpu) 243 { 244 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 245 int ret; 246 247 if (WARN_ON(info->gtt_entry_size != 8)) 248 return -EINVAL; 249 250 if (hypervisor_access) { 251 ret = intel_gvt_read_gpa(vgpu, gpa + 252 (index << info->gtt_entry_size_shift), 253 &e->val64, 8); 254 if (WARN_ON(ret)) 255 return ret; 256 } else if (!pt) { 257 e->val64 = read_pte64(vgpu->gvt->gt->ggtt, index); 258 } else { 259 e->val64 = *((u64 *)pt + index); 260 } 261 return 0; 262 } 263 264 static inline int gtt_set_entry64(void *pt, 265 struct intel_gvt_gtt_entry *e, 266 unsigned long index, bool hypervisor_access, unsigned long gpa, 267 struct intel_vgpu *vgpu) 268 { 269 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 270 int ret; 271 272 if (WARN_ON(info->gtt_entry_size != 8)) 273 return -EINVAL; 274 275 if (hypervisor_access) { 276 ret = intel_gvt_write_gpa(vgpu, gpa + 277 (index << info->gtt_entry_size_shift), 278 &e->val64, 8); 279 if (WARN_ON(ret)) 280 return ret; 281 } else if (!pt) { 282 write_pte64(vgpu->gvt->gt->ggtt, index, e->val64); 283 } else { 284 *((u64 *)pt + index) = e->val64; 285 } 286 return 0; 287 } 288 289 #define GTT_HAW 46 290 291 #define ADDR_1G_MASK GENMASK_ULL(GTT_HAW - 1, 30) 292 #define ADDR_2M_MASK GENMASK_ULL(GTT_HAW - 1, 21) 293 #define ADDR_64K_MASK GENMASK_ULL(GTT_HAW - 1, 16) 294 #define ADDR_4K_MASK GENMASK_ULL(GTT_HAW - 1, 12) 295 296 #define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52) 297 #define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */ 298 299 #define GTT_64K_PTE_STRIDE 16 300 301 static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) 302 { 303 unsigned long pfn; 304 305 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) 306 pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT; 307 else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) 308 pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT; 309 else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) 310 pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT; 311 else 312 pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT; 313 return pfn; 314 } 315 316 static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn) 317 { 318 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { 319 e->val64 &= ~ADDR_1G_MASK; 320 pfn &= (ADDR_1G_MASK >> PAGE_SHIFT); 321 } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) { 322 e->val64 &= ~ADDR_2M_MASK; 323 pfn &= (ADDR_2M_MASK >> PAGE_SHIFT); 324 } else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) { 325 e->val64 &= ~ADDR_64K_MASK; 326 pfn &= (ADDR_64K_MASK >> PAGE_SHIFT); 327 } else { 328 e->val64 &= ~ADDR_4K_MASK; 329 pfn &= (ADDR_4K_MASK >> PAGE_SHIFT); 330 } 331 332 e->val64 |= (pfn << PAGE_SHIFT); 333 } 334 335 static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e) 336 { 337 return !!(e->val64 & _PAGE_PSE); 338 } 339 340 static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e) 341 { 342 if (gen8_gtt_test_pse(e)) { 343 switch (e->type) { 344 case GTT_TYPE_PPGTT_PTE_2M_ENTRY: 345 e->val64 &= ~_PAGE_PSE; 346 e->type = GTT_TYPE_PPGTT_PDE_ENTRY; 347 break; 348 case GTT_TYPE_PPGTT_PTE_1G_ENTRY: 349 e->type = GTT_TYPE_PPGTT_PDP_ENTRY; 350 e->val64 &= ~_PAGE_PSE; 351 break; 352 default: 353 WARN_ON(1); 354 } 355 } 356 } 357 358 static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e) 359 { 360 if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY)) 361 return false; 362 363 return !!(e->val64 & GEN8_PDE_IPS_64K); 364 } 365 366 static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e) 367 { 368 if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY)) 369 return; 370 371 e->val64 &= ~GEN8_PDE_IPS_64K; 372 } 373 374 static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e) 375 { 376 /* 377 * i915 writes PDP root pointer registers without present bit, 378 * it also works, so we need to treat root pointer entry 379 * specifically. 380 */ 381 if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY 382 || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) 383 return (e->val64 != 0); 384 else 385 return (e->val64 & GEN8_PAGE_PRESENT); 386 } 387 388 static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e) 389 { 390 e->val64 &= ~GEN8_PAGE_PRESENT; 391 } 392 393 static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e) 394 { 395 e->val64 |= GEN8_PAGE_PRESENT; 396 } 397 398 static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e) 399 { 400 return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED); 401 } 402 403 static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e) 404 { 405 e->val64 |= GTT_SPTE_FLAG_64K_SPLITED; 406 } 407 408 static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e) 409 { 410 e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED; 411 } 412 413 /* 414 * Per-platform GMA routines. 415 */ 416 static unsigned long gma_to_ggtt_pte_index(unsigned long gma) 417 { 418 unsigned long x = (gma >> I915_GTT_PAGE_SHIFT); 419 420 trace_gma_index(__func__, gma, x); 421 return x; 422 } 423 424 #define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \ 425 static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \ 426 { \ 427 unsigned long x = (exp); \ 428 trace_gma_index(__func__, gma, x); \ 429 return x; \ 430 } 431 432 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff)); 433 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff)); 434 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3)); 435 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff)); 436 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff)); 437 438 static const struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = { 439 .get_entry = gtt_get_entry64, 440 .set_entry = gtt_set_entry64, 441 .clear_present = gtt_entry_clear_present, 442 .set_present = gtt_entry_set_present, 443 .test_present = gen8_gtt_test_present, 444 .test_pse = gen8_gtt_test_pse, 445 .clear_pse = gen8_gtt_clear_pse, 446 .clear_ips = gen8_gtt_clear_ips, 447 .test_ips = gen8_gtt_test_ips, 448 .clear_64k_splited = gen8_gtt_clear_64k_splited, 449 .set_64k_splited = gen8_gtt_set_64k_splited, 450 .test_64k_splited = gen8_gtt_test_64k_splited, 451 .get_pfn = gen8_gtt_get_pfn, 452 .set_pfn = gen8_gtt_set_pfn, 453 }; 454 455 static const struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = { 456 .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index, 457 .gma_to_pte_index = gen8_gma_to_pte_index, 458 .gma_to_pde_index = gen8_gma_to_pde_index, 459 .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index, 460 .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index, 461 .gma_to_pml4_index = gen8_gma_to_pml4_index, 462 }; 463 464 /* Update entry type per pse and ips bit. */ 465 static void update_entry_type_for_real(const struct intel_gvt_gtt_pte_ops *pte_ops, 466 struct intel_gvt_gtt_entry *entry, bool ips) 467 { 468 switch (entry->type) { 469 case GTT_TYPE_PPGTT_PDE_ENTRY: 470 case GTT_TYPE_PPGTT_PDP_ENTRY: 471 if (pte_ops->test_pse(entry)) 472 entry->type = get_pse_type(entry->type); 473 break; 474 case GTT_TYPE_PPGTT_PTE_4K_ENTRY: 475 if (ips) 476 entry->type = get_pse_type(entry->type); 477 break; 478 default: 479 GEM_BUG_ON(!gtt_type_is_entry(entry->type)); 480 } 481 482 GEM_BUG_ON(entry->type == GTT_TYPE_INVALID); 483 } 484 485 /* 486 * MM helpers. 487 */ 488 static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm, 489 struct intel_gvt_gtt_entry *entry, unsigned long index, 490 bool guest) 491 { 492 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 493 494 GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT); 495 496 entry->type = mm->ppgtt_mm.root_entry_type; 497 pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps : 498 mm->ppgtt_mm.shadow_pdps, 499 entry, index, false, 0, mm->vgpu); 500 update_entry_type_for_real(pte_ops, entry, false); 501 } 502 503 static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm, 504 struct intel_gvt_gtt_entry *entry, unsigned long index) 505 { 506 _ppgtt_get_root_entry(mm, entry, index, true); 507 } 508 509 static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm, 510 struct intel_gvt_gtt_entry *entry, unsigned long index) 511 { 512 _ppgtt_get_root_entry(mm, entry, index, false); 513 } 514 515 static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm, 516 struct intel_gvt_gtt_entry *entry, unsigned long index, 517 bool guest) 518 { 519 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 520 521 pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps : 522 mm->ppgtt_mm.shadow_pdps, 523 entry, index, false, 0, mm->vgpu); 524 } 525 526 static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm, 527 struct intel_gvt_gtt_entry *entry, unsigned long index) 528 { 529 _ppgtt_set_root_entry(mm, entry, index, false); 530 } 531 532 static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm, 533 struct intel_gvt_gtt_entry *entry, unsigned long index) 534 { 535 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 536 537 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 538 539 entry->type = GTT_TYPE_GGTT_PTE; 540 pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index, 541 false, 0, mm->vgpu); 542 } 543 544 static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm, 545 struct intel_gvt_gtt_entry *entry, unsigned long index) 546 { 547 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 548 549 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 550 551 pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index, 552 false, 0, mm->vgpu); 553 } 554 555 static void ggtt_get_host_entry(struct intel_vgpu_mm *mm, 556 struct intel_gvt_gtt_entry *entry, unsigned long index) 557 { 558 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 559 560 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 561 562 pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu); 563 } 564 565 static void ggtt_set_host_entry(struct intel_vgpu_mm *mm, 566 struct intel_gvt_gtt_entry *entry, unsigned long index) 567 { 568 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 569 unsigned long offset = index; 570 571 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 572 573 if (vgpu_gmadr_is_aperture(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) { 574 offset -= (vgpu_aperture_gmadr_base(mm->vgpu) >> PAGE_SHIFT); 575 mm->ggtt_mm.host_ggtt_aperture[offset] = entry->val64; 576 } else if (vgpu_gmadr_is_hidden(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) { 577 offset -= (vgpu_hidden_gmadr_base(mm->vgpu) >> PAGE_SHIFT); 578 mm->ggtt_mm.host_ggtt_hidden[offset] = entry->val64; 579 } 580 581 pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu); 582 } 583 584 /* 585 * PPGTT shadow page table helpers. 586 */ 587 static inline int ppgtt_spt_get_entry( 588 struct intel_vgpu_ppgtt_spt *spt, 589 void *page_table, int type, 590 struct intel_gvt_gtt_entry *e, unsigned long index, 591 bool guest) 592 { 593 struct intel_gvt *gvt = spt->vgpu->gvt; 594 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 595 int ret; 596 597 e->type = get_entry_type(type); 598 599 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) 600 return -EINVAL; 601 602 ret = ops->get_entry(page_table, e, index, guest, 603 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, 604 spt->vgpu); 605 if (ret) 606 return ret; 607 608 update_entry_type_for_real(ops, e, guest ? 609 spt->guest_page.pde_ips : false); 610 611 gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n", 612 type, e->type, index, e->val64); 613 return 0; 614 } 615 616 static inline int ppgtt_spt_set_entry( 617 struct intel_vgpu_ppgtt_spt *spt, 618 void *page_table, int type, 619 struct intel_gvt_gtt_entry *e, unsigned long index, 620 bool guest) 621 { 622 struct intel_gvt *gvt = spt->vgpu->gvt; 623 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 624 625 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) 626 return -EINVAL; 627 628 gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n", 629 type, e->type, index, e->val64); 630 631 return ops->set_entry(page_table, e, index, guest, 632 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, 633 spt->vgpu); 634 } 635 636 #define ppgtt_get_guest_entry(spt, e, index) \ 637 ppgtt_spt_get_entry(spt, NULL, \ 638 spt->guest_page.type, e, index, true) 639 640 #define ppgtt_set_guest_entry(spt, e, index) \ 641 ppgtt_spt_set_entry(spt, NULL, \ 642 spt->guest_page.type, e, index, true) 643 644 #define ppgtt_get_shadow_entry(spt, e, index) \ 645 ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \ 646 spt->shadow_page.type, e, index, false) 647 648 #define ppgtt_set_shadow_entry(spt, e, index) \ 649 ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \ 650 spt->shadow_page.type, e, index, false) 651 652 static void *alloc_spt(gfp_t gfp_mask) 653 { 654 struct intel_vgpu_ppgtt_spt *spt; 655 656 spt = kzalloc(sizeof(*spt), gfp_mask); 657 if (!spt) 658 return NULL; 659 660 spt->shadow_page.page = alloc_page(gfp_mask); 661 if (!spt->shadow_page.page) { 662 kfree(spt); 663 return NULL; 664 } 665 return spt; 666 } 667 668 static void free_spt(struct intel_vgpu_ppgtt_spt *spt) 669 { 670 __free_page(spt->shadow_page.page); 671 kfree(spt); 672 } 673 674 static int detach_oos_page(struct intel_vgpu *vgpu, 675 struct intel_vgpu_oos_page *oos_page); 676 677 static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt) 678 { 679 struct device *kdev = spt->vgpu->gvt->gt->i915->drm.dev; 680 681 trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type); 682 683 dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096, 684 DMA_BIDIRECTIONAL); 685 686 radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn); 687 688 if (spt->guest_page.gfn) { 689 if (spt->guest_page.oos_page) 690 detach_oos_page(spt->vgpu, spt->guest_page.oos_page); 691 692 intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn); 693 } 694 695 list_del_init(&spt->post_shadow_list); 696 free_spt(spt); 697 } 698 699 static void ppgtt_free_all_spt(struct intel_vgpu *vgpu) 700 { 701 struct intel_vgpu_ppgtt_spt *spt, *spn; 702 struct radix_tree_iter iter; 703 LIST_HEAD(all_spt); 704 void __rcu **slot; 705 706 rcu_read_lock(); 707 radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) { 708 spt = radix_tree_deref_slot(slot); 709 list_move(&spt->post_shadow_list, &all_spt); 710 } 711 rcu_read_unlock(); 712 713 list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list) 714 ppgtt_free_spt(spt); 715 } 716 717 static int ppgtt_handle_guest_write_page_table_bytes( 718 struct intel_vgpu_ppgtt_spt *spt, 719 u64 pa, void *p_data, int bytes); 720 721 static int ppgtt_write_protection_handler( 722 struct intel_vgpu_page_track *page_track, 723 u64 gpa, void *data, int bytes) 724 { 725 struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data; 726 727 int ret; 728 729 if (bytes != 4 && bytes != 8) 730 return -EINVAL; 731 732 ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes); 733 if (ret) 734 return ret; 735 return ret; 736 } 737 738 /* Find a spt by guest gfn. */ 739 static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn( 740 struct intel_vgpu *vgpu, unsigned long gfn) 741 { 742 struct intel_vgpu_page_track *track; 743 744 track = intel_vgpu_find_page_track(vgpu, gfn); 745 if (track && track->handler == ppgtt_write_protection_handler) 746 return track->priv_data; 747 748 return NULL; 749 } 750 751 /* Find the spt by shadow page mfn. */ 752 static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn( 753 struct intel_vgpu *vgpu, unsigned long mfn) 754 { 755 return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn); 756 } 757 758 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt); 759 760 /* Allocate shadow page table without guest page. */ 761 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt( 762 struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type) 763 { 764 struct device *kdev = vgpu->gvt->gt->i915->drm.dev; 765 struct intel_vgpu_ppgtt_spt *spt = NULL; 766 dma_addr_t daddr; 767 int ret; 768 769 retry: 770 spt = alloc_spt(GFP_KERNEL | __GFP_ZERO); 771 if (!spt) { 772 if (reclaim_one_ppgtt_mm(vgpu->gvt)) 773 goto retry; 774 775 gvt_vgpu_err("fail to allocate ppgtt shadow page\n"); 776 return ERR_PTR(-ENOMEM); 777 } 778 779 spt->vgpu = vgpu; 780 atomic_set(&spt->refcount, 1); 781 INIT_LIST_HEAD(&spt->post_shadow_list); 782 783 /* 784 * Init shadow_page. 785 */ 786 spt->shadow_page.type = type; 787 daddr = dma_map_page(kdev, spt->shadow_page.page, 788 0, 4096, DMA_BIDIRECTIONAL); 789 if (dma_mapping_error(kdev, daddr)) { 790 gvt_vgpu_err("fail to map dma addr\n"); 791 ret = -EINVAL; 792 goto err_free_spt; 793 } 794 spt->shadow_page.vaddr = page_address(spt->shadow_page.page); 795 spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT; 796 797 ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt); 798 if (ret) 799 goto err_unmap_dma; 800 801 return spt; 802 803 err_unmap_dma: 804 dma_unmap_page(kdev, daddr, PAGE_SIZE, DMA_BIDIRECTIONAL); 805 err_free_spt: 806 free_spt(spt); 807 return ERR_PTR(ret); 808 } 809 810 /* Allocate shadow page table associated with specific gfn. */ 811 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn( 812 struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type, 813 unsigned long gfn, bool guest_pde_ips) 814 { 815 struct intel_vgpu_ppgtt_spt *spt; 816 int ret; 817 818 spt = ppgtt_alloc_spt(vgpu, type); 819 if (IS_ERR(spt)) 820 return spt; 821 822 /* 823 * Init guest_page. 824 */ 825 ret = intel_vgpu_register_page_track(vgpu, gfn, 826 ppgtt_write_protection_handler, spt); 827 if (ret) { 828 ppgtt_free_spt(spt); 829 return ERR_PTR(ret); 830 } 831 832 spt->guest_page.type = type; 833 spt->guest_page.gfn = gfn; 834 spt->guest_page.pde_ips = guest_pde_ips; 835 836 trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); 837 838 return spt; 839 } 840 841 #define pt_entry_size_shift(spt) \ 842 ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift) 843 844 #define pt_entries(spt) \ 845 (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt)) 846 847 #define for_each_present_guest_entry(spt, e, i) \ 848 for (i = 0; i < pt_entries(spt); \ 849 i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \ 850 if (!ppgtt_get_guest_entry(spt, e, i) && \ 851 spt->vgpu->gvt->gtt.pte_ops->test_present(e)) 852 853 #define for_each_present_shadow_entry(spt, e, i) \ 854 for (i = 0; i < pt_entries(spt); \ 855 i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \ 856 if (!ppgtt_get_shadow_entry(spt, e, i) && \ 857 spt->vgpu->gvt->gtt.pte_ops->test_present(e)) 858 859 #define for_each_shadow_entry(spt, e, i) \ 860 for (i = 0; i < pt_entries(spt); \ 861 i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \ 862 if (!ppgtt_get_shadow_entry(spt, e, i)) 863 864 static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt) 865 { 866 int v = atomic_read(&spt->refcount); 867 868 trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1)); 869 atomic_inc(&spt->refcount); 870 } 871 872 static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt) 873 { 874 int v = atomic_read(&spt->refcount); 875 876 trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1)); 877 return atomic_dec_return(&spt->refcount); 878 } 879 880 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt); 881 882 static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu, 883 struct intel_gvt_gtt_entry *e) 884 { 885 struct drm_i915_private *i915 = vgpu->gvt->gt->i915; 886 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 887 struct intel_vgpu_ppgtt_spt *s; 888 enum intel_gvt_gtt_type cur_pt_type; 889 890 GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type))); 891 892 if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY 893 && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { 894 cur_pt_type = get_next_pt_type(e->type); 895 896 if (!gtt_type_is_pt(cur_pt_type) || 897 !gtt_type_is_pt(cur_pt_type + 1)) { 898 drm_WARN(&i915->drm, 1, 899 "Invalid page table type, cur_pt_type is: %d\n", 900 cur_pt_type); 901 return -EINVAL; 902 } 903 904 cur_pt_type += 1; 905 906 if (ops->get_pfn(e) == 907 vgpu->gtt.scratch_pt[cur_pt_type].page_mfn) 908 return 0; 909 } 910 s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e)); 911 if (!s) { 912 gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n", 913 ops->get_pfn(e)); 914 return -ENXIO; 915 } 916 return ppgtt_invalidate_spt(s); 917 } 918 919 static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt, 920 struct intel_gvt_gtt_entry *entry) 921 { 922 struct intel_vgpu *vgpu = spt->vgpu; 923 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 924 unsigned long pfn; 925 int type; 926 927 pfn = ops->get_pfn(entry); 928 type = spt->shadow_page.type; 929 930 /* Uninitialized spte or unshadowed spte. */ 931 if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn) 932 return; 933 934 intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT); 935 } 936 937 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt) 938 { 939 struct intel_vgpu *vgpu = spt->vgpu; 940 struct intel_gvt_gtt_entry e; 941 unsigned long index; 942 int ret; 943 944 trace_spt_change(spt->vgpu->id, "die", spt, 945 spt->guest_page.gfn, spt->shadow_page.type); 946 947 if (ppgtt_put_spt(spt) > 0) 948 return 0; 949 950 for_each_present_shadow_entry(spt, &e, index) { 951 switch (e.type) { 952 case GTT_TYPE_PPGTT_PTE_4K_ENTRY: 953 gvt_vdbg_mm("invalidate 4K entry\n"); 954 ppgtt_invalidate_pte(spt, &e); 955 break; 956 case GTT_TYPE_PPGTT_PTE_64K_ENTRY: 957 /* We don't setup 64K shadow entry so far. */ 958 WARN(1, "suspicious 64K gtt entry\n"); 959 continue; 960 case GTT_TYPE_PPGTT_PTE_2M_ENTRY: 961 gvt_vdbg_mm("invalidate 2M entry\n"); 962 continue; 963 case GTT_TYPE_PPGTT_PTE_1G_ENTRY: 964 WARN(1, "GVT doesn't support 1GB page\n"); 965 continue; 966 case GTT_TYPE_PPGTT_PML4_ENTRY: 967 case GTT_TYPE_PPGTT_PDP_ENTRY: 968 case GTT_TYPE_PPGTT_PDE_ENTRY: 969 gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n"); 970 ret = ppgtt_invalidate_spt_by_shadow_entry( 971 spt->vgpu, &e); 972 if (ret) 973 goto fail; 974 break; 975 default: 976 GEM_BUG_ON(1); 977 } 978 } 979 980 trace_spt_change(spt->vgpu->id, "release", spt, 981 spt->guest_page.gfn, spt->shadow_page.type); 982 ppgtt_free_spt(spt); 983 return 0; 984 fail: 985 gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n", 986 spt, e.val64, e.type); 987 return ret; 988 } 989 990 static bool vgpu_ips_enabled(struct intel_vgpu *vgpu) 991 { 992 struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; 993 994 if (GRAPHICS_VER(dev_priv) == 9) { 995 u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) & 996 GAMW_ECO_ENABLE_64K_IPS_FIELD; 997 998 return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD; 999 } else if (GRAPHICS_VER(dev_priv) >= 11) { 1000 /* 64K paging only controlled by IPS bit in PTE now. */ 1001 return true; 1002 } else 1003 return false; 1004 } 1005 1006 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt); 1007 1008 static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry( 1009 struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we) 1010 { 1011 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1012 struct intel_vgpu_ppgtt_spt *spt = NULL; 1013 bool ips = false; 1014 int ret; 1015 1016 GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type))); 1017 1018 if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY) 1019 ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we); 1020 1021 spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we)); 1022 if (spt) { 1023 ppgtt_get_spt(spt); 1024 1025 if (ips != spt->guest_page.pde_ips) { 1026 spt->guest_page.pde_ips = ips; 1027 1028 gvt_dbg_mm("reshadow PDE since ips changed\n"); 1029 clear_page(spt->shadow_page.vaddr); 1030 ret = ppgtt_populate_spt(spt); 1031 if (ret) { 1032 ppgtt_put_spt(spt); 1033 goto err; 1034 } 1035 } 1036 } else { 1037 int type = get_next_pt_type(we->type); 1038 1039 if (!gtt_type_is_pt(type)) { 1040 ret = -EINVAL; 1041 goto err; 1042 } 1043 1044 spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips); 1045 if (IS_ERR(spt)) { 1046 ret = PTR_ERR(spt); 1047 goto err; 1048 } 1049 1050 ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn); 1051 if (ret) 1052 goto err_free_spt; 1053 1054 ret = ppgtt_populate_spt(spt); 1055 if (ret) 1056 goto err_free_spt; 1057 1058 trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn, 1059 spt->shadow_page.type); 1060 } 1061 return spt; 1062 1063 err_free_spt: 1064 ppgtt_free_spt(spt); 1065 spt = NULL; 1066 err: 1067 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", 1068 spt, we->val64, we->type); 1069 return ERR_PTR(ret); 1070 } 1071 1072 static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, 1073 struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge) 1074 { 1075 const struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops; 1076 1077 se->type = ge->type; 1078 se->val64 = ge->val64; 1079 1080 /* Because we always split 64KB pages, so clear IPS in shadow PDE. */ 1081 if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY) 1082 ops->clear_ips(se); 1083 1084 ops->set_pfn(se, s->shadow_page.mfn); 1085 } 1086 1087 static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, 1088 struct intel_vgpu_ppgtt_spt *spt, unsigned long index, 1089 struct intel_gvt_gtt_entry *se) 1090 { 1091 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1092 struct intel_vgpu_ppgtt_spt *sub_spt; 1093 struct intel_gvt_gtt_entry sub_se; 1094 unsigned long start_gfn; 1095 dma_addr_t dma_addr; 1096 unsigned long sub_index; 1097 int ret; 1098 1099 gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index); 1100 1101 start_gfn = ops->get_pfn(se); 1102 1103 sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT); 1104 if (IS_ERR(sub_spt)) 1105 return PTR_ERR(sub_spt); 1106 1107 for_each_shadow_entry(sub_spt, &sub_se, sub_index) { 1108 ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + sub_index, 1109 PAGE_SIZE, &dma_addr); 1110 if (ret) 1111 goto err; 1112 sub_se.val64 = se->val64; 1113 1114 /* Copy the PAT field from PDE. */ 1115 sub_se.val64 &= ~_PAGE_PAT; 1116 sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5; 1117 1118 ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT); 1119 ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index); 1120 } 1121 1122 /* Clear dirty field. */ 1123 se->val64 &= ~_PAGE_DIRTY; 1124 1125 ops->clear_pse(se); 1126 ops->clear_ips(se); 1127 ops->set_pfn(se, sub_spt->shadow_page.mfn); 1128 ppgtt_set_shadow_entry(spt, se, index); 1129 return 0; 1130 err: 1131 /* Cancel the existing address mappings of DMA addr. */ 1132 for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) { 1133 gvt_vdbg_mm("invalidate 4K entry\n"); 1134 ppgtt_invalidate_pte(sub_spt, &sub_se); 1135 } 1136 /* Release the new allocated spt. */ 1137 trace_spt_change(sub_spt->vgpu->id, "release", sub_spt, 1138 sub_spt->guest_page.gfn, sub_spt->shadow_page.type); 1139 ppgtt_free_spt(sub_spt); 1140 return ret; 1141 } 1142 1143 static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, 1144 struct intel_vgpu_ppgtt_spt *spt, unsigned long index, 1145 struct intel_gvt_gtt_entry *se) 1146 { 1147 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1148 struct intel_gvt_gtt_entry entry = *se; 1149 unsigned long start_gfn; 1150 dma_addr_t dma_addr; 1151 int i, ret; 1152 1153 gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index); 1154 1155 GEM_BUG_ON(index % GTT_64K_PTE_STRIDE); 1156 1157 start_gfn = ops->get_pfn(se); 1158 1159 entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY; 1160 ops->set_64k_splited(&entry); 1161 1162 for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { 1163 ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + i, 1164 PAGE_SIZE, &dma_addr); 1165 if (ret) 1166 return ret; 1167 1168 ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT); 1169 ppgtt_set_shadow_entry(spt, &entry, index + i); 1170 } 1171 return 0; 1172 } 1173 1174 static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, 1175 struct intel_vgpu_ppgtt_spt *spt, unsigned long index, 1176 struct intel_gvt_gtt_entry *ge) 1177 { 1178 const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; 1179 struct intel_gvt_gtt_entry se = *ge; 1180 unsigned long gfn; 1181 dma_addr_t dma_addr; 1182 int ret; 1183 1184 if (!pte_ops->test_present(ge)) 1185 return 0; 1186 1187 gfn = pte_ops->get_pfn(ge); 1188 1189 switch (ge->type) { 1190 case GTT_TYPE_PPGTT_PTE_4K_ENTRY: 1191 gvt_vdbg_mm("shadow 4K gtt entry\n"); 1192 ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr); 1193 if (ret) 1194 return -ENXIO; 1195 break; 1196 case GTT_TYPE_PPGTT_PTE_64K_ENTRY: 1197 gvt_vdbg_mm("shadow 64K gtt entry\n"); 1198 /* 1199 * The layout of 64K page is special, the page size is 1200 * controlled by upper PDE. To be simple, we always split 1201 * 64K page to smaller 4K pages in shadow PT. 1202 */ 1203 return split_64KB_gtt_entry(vgpu, spt, index, &se); 1204 case GTT_TYPE_PPGTT_PTE_2M_ENTRY: 1205 gvt_vdbg_mm("shadow 2M gtt entry\n"); 1206 if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M) || 1207 intel_gvt_dma_map_guest_page(vgpu, gfn, 1208 I915_GTT_PAGE_SIZE_2M, &dma_addr)) 1209 return split_2MB_gtt_entry(vgpu, spt, index, &se); 1210 break; 1211 case GTT_TYPE_PPGTT_PTE_1G_ENTRY: 1212 gvt_vgpu_err("GVT doesn't support 1GB entry\n"); 1213 return -EINVAL; 1214 default: 1215 GEM_BUG_ON(1); 1216 return -EINVAL; 1217 } 1218 1219 /* Successfully shadowed a 4K or 2M page (without splitting). */ 1220 pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT); 1221 ppgtt_set_shadow_entry(spt, &se, index); 1222 return 0; 1223 } 1224 1225 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt) 1226 { 1227 struct intel_vgpu *vgpu = spt->vgpu; 1228 struct intel_vgpu_ppgtt_spt *s; 1229 struct intel_gvt_gtt_entry se, ge; 1230 unsigned long i; 1231 int ret; 1232 1233 trace_spt_change(spt->vgpu->id, "born", spt, 1234 spt->guest_page.gfn, spt->shadow_page.type); 1235 1236 for_each_present_guest_entry(spt, &ge, i) { 1237 if (gtt_type_is_pt(get_next_pt_type(ge.type))) { 1238 s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge); 1239 if (IS_ERR(s)) { 1240 ret = PTR_ERR(s); 1241 goto fail; 1242 } 1243 ppgtt_get_shadow_entry(spt, &se, i); 1244 ppgtt_generate_shadow_entry(&se, s, &ge); 1245 ppgtt_set_shadow_entry(spt, &se, i); 1246 } else { 1247 ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge); 1248 if (ret) 1249 goto fail; 1250 } 1251 } 1252 return 0; 1253 fail: 1254 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", 1255 spt, ge.val64, ge.type); 1256 return ret; 1257 } 1258 1259 static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt, 1260 struct intel_gvt_gtt_entry *se, unsigned long index) 1261 { 1262 struct intel_vgpu *vgpu = spt->vgpu; 1263 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1264 int ret; 1265 1266 trace_spt_guest_change(spt->vgpu->id, "remove", spt, 1267 spt->shadow_page.type, se->val64, index); 1268 1269 gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n", 1270 se->type, index, se->val64); 1271 1272 if (!ops->test_present(se)) 1273 return 0; 1274 1275 if (ops->get_pfn(se) == 1276 vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn) 1277 return 0; 1278 1279 if (gtt_type_is_pt(get_next_pt_type(se->type))) { 1280 struct intel_vgpu_ppgtt_spt *s = 1281 intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se)); 1282 if (!s) { 1283 gvt_vgpu_err("fail to find guest page\n"); 1284 ret = -ENXIO; 1285 goto fail; 1286 } 1287 ret = ppgtt_invalidate_spt(s); 1288 if (ret) 1289 goto fail; 1290 } else { 1291 /* We don't setup 64K shadow entry so far. */ 1292 WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY, 1293 "suspicious 64K entry\n"); 1294 ppgtt_invalidate_pte(spt, se); 1295 } 1296 1297 return 0; 1298 fail: 1299 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", 1300 spt, se->val64, se->type); 1301 return ret; 1302 } 1303 1304 static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt, 1305 struct intel_gvt_gtt_entry *we, unsigned long index) 1306 { 1307 struct intel_vgpu *vgpu = spt->vgpu; 1308 struct intel_gvt_gtt_entry m; 1309 struct intel_vgpu_ppgtt_spt *s; 1310 int ret; 1311 1312 trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type, 1313 we->val64, index); 1314 1315 gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n", 1316 we->type, index, we->val64); 1317 1318 if (gtt_type_is_pt(get_next_pt_type(we->type))) { 1319 s = ppgtt_populate_spt_by_guest_entry(vgpu, we); 1320 if (IS_ERR(s)) { 1321 ret = PTR_ERR(s); 1322 goto fail; 1323 } 1324 ppgtt_get_shadow_entry(spt, &m, index); 1325 ppgtt_generate_shadow_entry(&m, s, we); 1326 ppgtt_set_shadow_entry(spt, &m, index); 1327 } else { 1328 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we); 1329 if (ret) 1330 goto fail; 1331 } 1332 return 0; 1333 fail: 1334 gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n", 1335 spt, we->val64, we->type); 1336 return ret; 1337 } 1338 1339 static int sync_oos_page(struct intel_vgpu *vgpu, 1340 struct intel_vgpu_oos_page *oos_page) 1341 { 1342 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1343 struct intel_gvt *gvt = vgpu->gvt; 1344 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 1345 struct intel_vgpu_ppgtt_spt *spt = oos_page->spt; 1346 struct intel_gvt_gtt_entry old, new; 1347 int index; 1348 int ret; 1349 1350 trace_oos_change(vgpu->id, "sync", oos_page->id, 1351 spt, spt->guest_page.type); 1352 1353 old.type = new.type = get_entry_type(spt->guest_page.type); 1354 old.val64 = new.val64 = 0; 1355 1356 for (index = 0; index < (I915_GTT_PAGE_SIZE >> 1357 info->gtt_entry_size_shift); index++) { 1358 ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu); 1359 ops->get_entry(NULL, &new, index, true, 1360 spt->guest_page.gfn << PAGE_SHIFT, vgpu); 1361 1362 if (old.val64 == new.val64 1363 && !test_and_clear_bit(index, spt->post_shadow_bitmap)) 1364 continue; 1365 1366 trace_oos_sync(vgpu->id, oos_page->id, 1367 spt, spt->guest_page.type, 1368 new.val64, index); 1369 1370 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new); 1371 if (ret) 1372 return ret; 1373 1374 ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu); 1375 } 1376 1377 spt->guest_page.write_cnt = 0; 1378 list_del_init(&spt->post_shadow_list); 1379 return 0; 1380 } 1381 1382 static int detach_oos_page(struct intel_vgpu *vgpu, 1383 struct intel_vgpu_oos_page *oos_page) 1384 { 1385 struct intel_gvt *gvt = vgpu->gvt; 1386 struct intel_vgpu_ppgtt_spt *spt = oos_page->spt; 1387 1388 trace_oos_change(vgpu->id, "detach", oos_page->id, 1389 spt, spt->guest_page.type); 1390 1391 spt->guest_page.write_cnt = 0; 1392 spt->guest_page.oos_page = NULL; 1393 oos_page->spt = NULL; 1394 1395 list_del_init(&oos_page->vm_list); 1396 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head); 1397 1398 return 0; 1399 } 1400 1401 static int attach_oos_page(struct intel_vgpu_oos_page *oos_page, 1402 struct intel_vgpu_ppgtt_spt *spt) 1403 { 1404 struct intel_gvt *gvt = spt->vgpu->gvt; 1405 int ret; 1406 1407 ret = intel_gvt_read_gpa(spt->vgpu, 1408 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, 1409 oos_page->mem, I915_GTT_PAGE_SIZE); 1410 if (ret) 1411 return ret; 1412 1413 oos_page->spt = spt; 1414 spt->guest_page.oos_page = oos_page; 1415 1416 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head); 1417 1418 trace_oos_change(spt->vgpu->id, "attach", oos_page->id, 1419 spt, spt->guest_page.type); 1420 return 0; 1421 } 1422 1423 static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt) 1424 { 1425 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; 1426 int ret; 1427 1428 ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn); 1429 if (ret) 1430 return ret; 1431 1432 trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id, 1433 spt, spt->guest_page.type); 1434 1435 list_del_init(&oos_page->vm_list); 1436 return sync_oos_page(spt->vgpu, oos_page); 1437 } 1438 1439 static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt) 1440 { 1441 struct intel_gvt *gvt = spt->vgpu->gvt; 1442 struct intel_gvt_gtt *gtt = &gvt->gtt; 1443 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; 1444 int ret; 1445 1446 WARN(oos_page, "shadow PPGTT page has already has a oos page\n"); 1447 1448 if (list_empty(>t->oos_page_free_list_head)) { 1449 oos_page = container_of(gtt->oos_page_use_list_head.next, 1450 struct intel_vgpu_oos_page, list); 1451 ret = ppgtt_set_guest_page_sync(oos_page->spt); 1452 if (ret) 1453 return ret; 1454 ret = detach_oos_page(spt->vgpu, oos_page); 1455 if (ret) 1456 return ret; 1457 } else 1458 oos_page = container_of(gtt->oos_page_free_list_head.next, 1459 struct intel_vgpu_oos_page, list); 1460 return attach_oos_page(oos_page, spt); 1461 } 1462 1463 static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt) 1464 { 1465 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; 1466 1467 if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n")) 1468 return -EINVAL; 1469 1470 trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id, 1471 spt, spt->guest_page.type); 1472 1473 list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head); 1474 return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn); 1475 } 1476 1477 /** 1478 * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU 1479 * @vgpu: a vGPU 1480 * 1481 * This function is called before submitting a guest workload to host, 1482 * to sync all the out-of-synced shadow for vGPU 1483 * 1484 * Returns: 1485 * Zero on success, negative error code if failed. 1486 */ 1487 int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu) 1488 { 1489 struct list_head *pos, *n; 1490 struct intel_vgpu_oos_page *oos_page; 1491 int ret; 1492 1493 if (!enable_out_of_sync) 1494 return 0; 1495 1496 list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) { 1497 oos_page = container_of(pos, 1498 struct intel_vgpu_oos_page, vm_list); 1499 ret = ppgtt_set_guest_page_sync(oos_page->spt); 1500 if (ret) 1501 return ret; 1502 } 1503 return 0; 1504 } 1505 1506 /* 1507 * The heart of PPGTT shadow page table. 1508 */ 1509 static int ppgtt_handle_guest_write_page_table( 1510 struct intel_vgpu_ppgtt_spt *spt, 1511 struct intel_gvt_gtt_entry *we, unsigned long index) 1512 { 1513 struct intel_vgpu *vgpu = spt->vgpu; 1514 int type = spt->shadow_page.type; 1515 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1516 struct intel_gvt_gtt_entry old_se; 1517 int new_present; 1518 int i, ret; 1519 1520 new_present = ops->test_present(we); 1521 1522 /* 1523 * Adding the new entry first and then removing the old one, that can 1524 * guarantee the ppgtt table is validated during the window between 1525 * adding and removal. 1526 */ 1527 ppgtt_get_shadow_entry(spt, &old_se, index); 1528 1529 if (new_present) { 1530 ret = ppgtt_handle_guest_entry_add(spt, we, index); 1531 if (ret) 1532 goto fail; 1533 } 1534 1535 ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index); 1536 if (ret) 1537 goto fail; 1538 1539 if (!new_present) { 1540 /* For 64KB splited entries, we need clear them all. */ 1541 if (ops->test_64k_splited(&old_se) && 1542 !(index % GTT_64K_PTE_STRIDE)) { 1543 gvt_vdbg_mm("remove splited 64K shadow entries\n"); 1544 for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { 1545 ops->clear_64k_splited(&old_se); 1546 ops->set_pfn(&old_se, 1547 vgpu->gtt.scratch_pt[type].page_mfn); 1548 ppgtt_set_shadow_entry(spt, &old_se, index + i); 1549 } 1550 } else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY || 1551 old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { 1552 ops->clear_pse(&old_se); 1553 ops->set_pfn(&old_se, 1554 vgpu->gtt.scratch_pt[type].page_mfn); 1555 ppgtt_set_shadow_entry(spt, &old_se, index); 1556 } else { 1557 ops->set_pfn(&old_se, 1558 vgpu->gtt.scratch_pt[type].page_mfn); 1559 ppgtt_set_shadow_entry(spt, &old_se, index); 1560 } 1561 } 1562 1563 return 0; 1564 fail: 1565 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n", 1566 spt, we->val64, we->type); 1567 return ret; 1568 } 1569 1570 1571 1572 static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt) 1573 { 1574 return enable_out_of_sync 1575 && gtt_type_is_pte_pt(spt->guest_page.type) 1576 && spt->guest_page.write_cnt >= 2; 1577 } 1578 1579 static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt, 1580 unsigned long index) 1581 { 1582 set_bit(index, spt->post_shadow_bitmap); 1583 if (!list_empty(&spt->post_shadow_list)) 1584 return; 1585 1586 list_add_tail(&spt->post_shadow_list, 1587 &spt->vgpu->gtt.post_shadow_list_head); 1588 } 1589 1590 /** 1591 * intel_vgpu_flush_post_shadow - flush the post shadow transactions 1592 * @vgpu: a vGPU 1593 * 1594 * This function is called before submitting a guest workload to host, 1595 * to flush all the post shadows for a vGPU. 1596 * 1597 * Returns: 1598 * Zero on success, negative error code if failed. 1599 */ 1600 int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) 1601 { 1602 struct list_head *pos, *n; 1603 struct intel_vgpu_ppgtt_spt *spt; 1604 struct intel_gvt_gtt_entry ge; 1605 unsigned long index; 1606 int ret; 1607 1608 list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) { 1609 spt = container_of(pos, struct intel_vgpu_ppgtt_spt, 1610 post_shadow_list); 1611 1612 for_each_set_bit(index, spt->post_shadow_bitmap, 1613 GTT_ENTRY_NUM_IN_ONE_PAGE) { 1614 ppgtt_get_guest_entry(spt, &ge, index); 1615 1616 ret = ppgtt_handle_guest_write_page_table(spt, 1617 &ge, index); 1618 if (ret) 1619 return ret; 1620 clear_bit(index, spt->post_shadow_bitmap); 1621 } 1622 list_del_init(&spt->post_shadow_list); 1623 } 1624 return 0; 1625 } 1626 1627 static int ppgtt_handle_guest_write_page_table_bytes( 1628 struct intel_vgpu_ppgtt_spt *spt, 1629 u64 pa, void *p_data, int bytes) 1630 { 1631 struct intel_vgpu *vgpu = spt->vgpu; 1632 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1633 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1634 struct intel_gvt_gtt_entry we, se; 1635 unsigned long index; 1636 int ret; 1637 1638 index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift; 1639 1640 ppgtt_get_guest_entry(spt, &we, index); 1641 1642 /* 1643 * For page table which has 64K gtt entry, only PTE#0, PTE#16, 1644 * PTE#32, ... PTE#496 are used. Unused PTEs update should be 1645 * ignored. 1646 */ 1647 if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY && 1648 (index % GTT_64K_PTE_STRIDE)) { 1649 gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n", 1650 index); 1651 return 0; 1652 } 1653 1654 if (bytes == info->gtt_entry_size) { 1655 ret = ppgtt_handle_guest_write_page_table(spt, &we, index); 1656 if (ret) 1657 return ret; 1658 } else { 1659 if (!test_bit(index, spt->post_shadow_bitmap)) { 1660 int type = spt->shadow_page.type; 1661 1662 ppgtt_get_shadow_entry(spt, &se, index); 1663 ret = ppgtt_handle_guest_entry_removal(spt, &se, index); 1664 if (ret) 1665 return ret; 1666 ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn); 1667 ppgtt_set_shadow_entry(spt, &se, index); 1668 } 1669 ppgtt_set_post_shadow(spt, index); 1670 } 1671 1672 if (!enable_out_of_sync) 1673 return 0; 1674 1675 spt->guest_page.write_cnt++; 1676 1677 if (spt->guest_page.oos_page) 1678 ops->set_entry(spt->guest_page.oos_page->mem, &we, index, 1679 false, 0, vgpu); 1680 1681 if (can_do_out_of_sync(spt)) { 1682 if (!spt->guest_page.oos_page) 1683 ppgtt_allocate_oos_page(spt); 1684 1685 ret = ppgtt_set_guest_page_oos(spt); 1686 if (ret < 0) 1687 return ret; 1688 } 1689 return 0; 1690 } 1691 1692 static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm) 1693 { 1694 struct intel_vgpu *vgpu = mm->vgpu; 1695 struct intel_gvt *gvt = vgpu->gvt; 1696 struct intel_gvt_gtt *gtt = &gvt->gtt; 1697 const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; 1698 struct intel_gvt_gtt_entry se; 1699 int index; 1700 1701 if (!mm->ppgtt_mm.shadowed) 1702 return; 1703 1704 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) { 1705 ppgtt_get_shadow_root_entry(mm, &se, index); 1706 1707 if (!ops->test_present(&se)) 1708 continue; 1709 1710 ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se); 1711 se.val64 = 0; 1712 ppgtt_set_shadow_root_entry(mm, &se, index); 1713 1714 trace_spt_guest_change(vgpu->id, "destroy root pointer", 1715 NULL, se.type, se.val64, index); 1716 } 1717 1718 mm->ppgtt_mm.shadowed = false; 1719 } 1720 1721 1722 static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm) 1723 { 1724 struct intel_vgpu *vgpu = mm->vgpu; 1725 struct intel_gvt *gvt = vgpu->gvt; 1726 struct intel_gvt_gtt *gtt = &gvt->gtt; 1727 const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; 1728 struct intel_vgpu_ppgtt_spt *spt; 1729 struct intel_gvt_gtt_entry ge, se; 1730 int index, ret; 1731 1732 if (mm->ppgtt_mm.shadowed) 1733 return 0; 1734 1735 if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) 1736 return -EINVAL; 1737 1738 mm->ppgtt_mm.shadowed = true; 1739 1740 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) { 1741 ppgtt_get_guest_root_entry(mm, &ge, index); 1742 1743 if (!ops->test_present(&ge)) 1744 continue; 1745 1746 trace_spt_guest_change(vgpu->id, __func__, NULL, 1747 ge.type, ge.val64, index); 1748 1749 spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge); 1750 if (IS_ERR(spt)) { 1751 gvt_vgpu_err("fail to populate guest root pointer\n"); 1752 ret = PTR_ERR(spt); 1753 goto fail; 1754 } 1755 ppgtt_generate_shadow_entry(&se, spt, &ge); 1756 ppgtt_set_shadow_root_entry(mm, &se, index); 1757 1758 trace_spt_guest_change(vgpu->id, "populate root pointer", 1759 NULL, se.type, se.val64, index); 1760 } 1761 1762 return 0; 1763 fail: 1764 invalidate_ppgtt_mm(mm); 1765 return ret; 1766 } 1767 1768 static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu) 1769 { 1770 struct intel_vgpu_mm *mm; 1771 1772 mm = kzalloc(sizeof(*mm), GFP_KERNEL); 1773 if (!mm) 1774 return NULL; 1775 1776 mm->vgpu = vgpu; 1777 kref_init(&mm->ref); 1778 atomic_set(&mm->pincount, 0); 1779 1780 return mm; 1781 } 1782 1783 static void vgpu_free_mm(struct intel_vgpu_mm *mm) 1784 { 1785 kfree(mm); 1786 } 1787 1788 /** 1789 * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU 1790 * @vgpu: a vGPU 1791 * @root_entry_type: ppgtt root entry type 1792 * @pdps: guest pdps. 1793 * 1794 * This function is used to create a ppgtt mm object for a vGPU. 1795 * 1796 * Returns: 1797 * Zero on success, negative error code in pointer if failed. 1798 */ 1799 struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, 1800 enum intel_gvt_gtt_type root_entry_type, u64 pdps[]) 1801 { 1802 struct intel_gvt *gvt = vgpu->gvt; 1803 struct intel_vgpu_mm *mm; 1804 int ret; 1805 1806 mm = vgpu_alloc_mm(vgpu); 1807 if (!mm) 1808 return ERR_PTR(-ENOMEM); 1809 1810 mm->type = INTEL_GVT_MM_PPGTT; 1811 1812 GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY && 1813 root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY); 1814 mm->ppgtt_mm.root_entry_type = root_entry_type; 1815 1816 INIT_LIST_HEAD(&mm->ppgtt_mm.list); 1817 INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list); 1818 INIT_LIST_HEAD(&mm->ppgtt_mm.link); 1819 1820 if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) 1821 mm->ppgtt_mm.guest_pdps[0] = pdps[0]; 1822 else 1823 memcpy(mm->ppgtt_mm.guest_pdps, pdps, 1824 sizeof(mm->ppgtt_mm.guest_pdps)); 1825 1826 ret = shadow_ppgtt_mm(mm); 1827 if (ret) { 1828 gvt_vgpu_err("failed to shadow ppgtt mm\n"); 1829 vgpu_free_mm(mm); 1830 return ERR_PTR(ret); 1831 } 1832 1833 list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head); 1834 1835 mutex_lock(&gvt->gtt.ppgtt_mm_lock); 1836 list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head); 1837 mutex_unlock(&gvt->gtt.ppgtt_mm_lock); 1838 1839 return mm; 1840 } 1841 1842 static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu) 1843 { 1844 struct intel_vgpu_mm *mm; 1845 unsigned long nr_entries; 1846 1847 mm = vgpu_alloc_mm(vgpu); 1848 if (!mm) 1849 return ERR_PTR(-ENOMEM); 1850 1851 mm->type = INTEL_GVT_MM_GGTT; 1852 1853 nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT; 1854 mm->ggtt_mm.virtual_ggtt = 1855 vzalloc(array_size(nr_entries, 1856 vgpu->gvt->device_info.gtt_entry_size)); 1857 if (!mm->ggtt_mm.virtual_ggtt) { 1858 vgpu_free_mm(mm); 1859 return ERR_PTR(-ENOMEM); 1860 } 1861 1862 mm->ggtt_mm.host_ggtt_aperture = vzalloc((vgpu_aperture_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64)); 1863 if (!mm->ggtt_mm.host_ggtt_aperture) { 1864 vfree(mm->ggtt_mm.virtual_ggtt); 1865 vgpu_free_mm(mm); 1866 return ERR_PTR(-ENOMEM); 1867 } 1868 1869 mm->ggtt_mm.host_ggtt_hidden = vzalloc((vgpu_hidden_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64)); 1870 if (!mm->ggtt_mm.host_ggtt_hidden) { 1871 vfree(mm->ggtt_mm.host_ggtt_aperture); 1872 vfree(mm->ggtt_mm.virtual_ggtt); 1873 vgpu_free_mm(mm); 1874 return ERR_PTR(-ENOMEM); 1875 } 1876 1877 return mm; 1878 } 1879 1880 /** 1881 * _intel_vgpu_mm_release - destroy a mm object 1882 * @mm_ref: a kref object 1883 * 1884 * This function is used to destroy a mm object for vGPU 1885 * 1886 */ 1887 void _intel_vgpu_mm_release(struct kref *mm_ref) 1888 { 1889 struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref); 1890 1891 if (GEM_WARN_ON(atomic_read(&mm->pincount))) 1892 gvt_err("vgpu mm pin count bug detected\n"); 1893 1894 if (mm->type == INTEL_GVT_MM_PPGTT) { 1895 list_del(&mm->ppgtt_mm.list); 1896 1897 mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); 1898 list_del(&mm->ppgtt_mm.lru_list); 1899 mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); 1900 1901 invalidate_ppgtt_mm(mm); 1902 } else { 1903 vfree(mm->ggtt_mm.virtual_ggtt); 1904 vfree(mm->ggtt_mm.host_ggtt_aperture); 1905 vfree(mm->ggtt_mm.host_ggtt_hidden); 1906 } 1907 1908 vgpu_free_mm(mm); 1909 } 1910 1911 /** 1912 * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object 1913 * @mm: a vGPU mm object 1914 * 1915 * This function is called when user doesn't want to use a vGPU mm object 1916 */ 1917 void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm) 1918 { 1919 atomic_dec_if_positive(&mm->pincount); 1920 } 1921 1922 /** 1923 * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object 1924 * @mm: target vgpu mm 1925 * 1926 * This function is called when user wants to use a vGPU mm object. If this 1927 * mm object hasn't been shadowed yet, the shadow will be populated at this 1928 * time. 1929 * 1930 * Returns: 1931 * Zero on success, negative error code if failed. 1932 */ 1933 int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm) 1934 { 1935 int ret; 1936 1937 atomic_inc(&mm->pincount); 1938 1939 if (mm->type == INTEL_GVT_MM_PPGTT) { 1940 ret = shadow_ppgtt_mm(mm); 1941 if (ret) 1942 return ret; 1943 1944 mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); 1945 list_move_tail(&mm->ppgtt_mm.lru_list, 1946 &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head); 1947 mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); 1948 } 1949 1950 return 0; 1951 } 1952 1953 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt) 1954 { 1955 struct intel_vgpu_mm *mm; 1956 struct list_head *pos, *n; 1957 1958 mutex_lock(&gvt->gtt.ppgtt_mm_lock); 1959 1960 list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) { 1961 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list); 1962 1963 if (atomic_read(&mm->pincount)) 1964 continue; 1965 1966 list_del_init(&mm->ppgtt_mm.lru_list); 1967 mutex_unlock(&gvt->gtt.ppgtt_mm_lock); 1968 invalidate_ppgtt_mm(mm); 1969 return 1; 1970 } 1971 mutex_unlock(&gvt->gtt.ppgtt_mm_lock); 1972 return 0; 1973 } 1974 1975 /* 1976 * GMA translation APIs. 1977 */ 1978 static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm, 1979 struct intel_gvt_gtt_entry *e, unsigned long index, bool guest) 1980 { 1981 struct intel_vgpu *vgpu = mm->vgpu; 1982 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1983 struct intel_vgpu_ppgtt_spt *s; 1984 1985 s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e)); 1986 if (!s) 1987 return -ENXIO; 1988 1989 if (!guest) 1990 ppgtt_get_shadow_entry(s, e, index); 1991 else 1992 ppgtt_get_guest_entry(s, e, index); 1993 return 0; 1994 } 1995 1996 /** 1997 * intel_vgpu_gma_to_gpa - translate a gma to GPA 1998 * @mm: mm object. could be a PPGTT or GGTT mm object 1999 * @gma: graphics memory address in this mm object 2000 * 2001 * This function is used to translate a graphics memory address in specific 2002 * graphics memory space to guest physical address. 2003 * 2004 * Returns: 2005 * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed. 2006 */ 2007 unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) 2008 { 2009 struct intel_vgpu *vgpu = mm->vgpu; 2010 struct intel_gvt *gvt = vgpu->gvt; 2011 const struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops; 2012 const struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops; 2013 unsigned long gpa = INTEL_GVT_INVALID_ADDR; 2014 unsigned long gma_index[4]; 2015 struct intel_gvt_gtt_entry e; 2016 int i, levels = 0; 2017 int ret; 2018 2019 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT && 2020 mm->type != INTEL_GVT_MM_PPGTT); 2021 2022 if (mm->type == INTEL_GVT_MM_GGTT) { 2023 if (!vgpu_gmadr_is_valid(vgpu, gma)) 2024 goto err; 2025 2026 ggtt_get_guest_entry(mm, &e, 2027 gma_ops->gma_to_ggtt_pte_index(gma)); 2028 2029 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) 2030 + (gma & ~I915_GTT_PAGE_MASK); 2031 2032 trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa); 2033 } else { 2034 switch (mm->ppgtt_mm.root_entry_type) { 2035 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY: 2036 ppgtt_get_shadow_root_entry(mm, &e, 0); 2037 2038 gma_index[0] = gma_ops->gma_to_pml4_index(gma); 2039 gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma); 2040 gma_index[2] = gma_ops->gma_to_pde_index(gma); 2041 gma_index[3] = gma_ops->gma_to_pte_index(gma); 2042 levels = 4; 2043 break; 2044 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY: 2045 ppgtt_get_shadow_root_entry(mm, &e, 2046 gma_ops->gma_to_l3_pdp_index(gma)); 2047 2048 gma_index[0] = gma_ops->gma_to_pde_index(gma); 2049 gma_index[1] = gma_ops->gma_to_pte_index(gma); 2050 levels = 2; 2051 break; 2052 default: 2053 GEM_BUG_ON(1); 2054 } 2055 2056 /* walk the shadow page table and get gpa from guest entry */ 2057 for (i = 0; i < levels; i++) { 2058 ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i], 2059 (i == levels - 1)); 2060 if (ret) 2061 goto err; 2062 2063 if (!pte_ops->test_present(&e)) { 2064 gvt_dbg_core("GMA 0x%lx is not present\n", gma); 2065 goto err; 2066 } 2067 } 2068 2069 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) + 2070 (gma & ~I915_GTT_PAGE_MASK); 2071 trace_gma_translate(vgpu->id, "ppgtt", 0, 2072 mm->ppgtt_mm.root_entry_type, gma, gpa); 2073 } 2074 2075 return gpa; 2076 err: 2077 gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma); 2078 return INTEL_GVT_INVALID_ADDR; 2079 } 2080 2081 static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, 2082 unsigned int off, void *p_data, unsigned int bytes) 2083 { 2084 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; 2085 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 2086 unsigned long index = off >> info->gtt_entry_size_shift; 2087 unsigned long gma; 2088 struct intel_gvt_gtt_entry e; 2089 2090 if (bytes != 4 && bytes != 8) 2091 return -EINVAL; 2092 2093 gma = index << I915_GTT_PAGE_SHIFT; 2094 if (!intel_gvt_ggtt_validate_range(vgpu, 2095 gma, 1 << I915_GTT_PAGE_SHIFT)) { 2096 gvt_dbg_mm("read invalid ggtt at 0x%lx\n", gma); 2097 memset(p_data, 0, bytes); 2098 return 0; 2099 } 2100 2101 ggtt_get_guest_entry(ggtt_mm, &e, index); 2102 memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)), 2103 bytes); 2104 return 0; 2105 } 2106 2107 /** 2108 * intel_vgpu_emulate_ggtt_mmio_read - emulate GTT MMIO register read 2109 * @vgpu: a vGPU 2110 * @off: register offset 2111 * @p_data: data will be returned to guest 2112 * @bytes: data length 2113 * 2114 * This function is used to emulate the GTT MMIO register read 2115 * 2116 * Returns: 2117 * Zero on success, error code if failed. 2118 */ 2119 int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, 2120 void *p_data, unsigned int bytes) 2121 { 2122 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 2123 int ret; 2124 2125 if (bytes != 4 && bytes != 8) 2126 return -EINVAL; 2127 2128 off -= info->gtt_start_offset; 2129 ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes); 2130 return ret; 2131 } 2132 2133 static void ggtt_invalidate_pte(struct intel_vgpu *vgpu, 2134 struct intel_gvt_gtt_entry *entry) 2135 { 2136 const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; 2137 unsigned long pfn; 2138 2139 pfn = pte_ops->get_pfn(entry); 2140 if (pfn != vgpu->gvt->gtt.scratch_mfn) 2141 intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT); 2142 } 2143 2144 static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, 2145 void *p_data, unsigned int bytes) 2146 { 2147 struct intel_gvt *gvt = vgpu->gvt; 2148 const struct intel_gvt_device_info *info = &gvt->device_info; 2149 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; 2150 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 2151 unsigned long g_gtt_index = off >> info->gtt_entry_size_shift; 2152 unsigned long gma, gfn; 2153 struct intel_gvt_gtt_entry e = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE}; 2154 struct intel_gvt_gtt_entry m = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE}; 2155 dma_addr_t dma_addr; 2156 int ret; 2157 struct intel_gvt_partial_pte *partial_pte, *pos, *n; 2158 bool partial_update = false; 2159 2160 if (bytes != 4 && bytes != 8) 2161 return -EINVAL; 2162 2163 gma = g_gtt_index << I915_GTT_PAGE_SHIFT; 2164 2165 /* the VM may configure the whole GM space when ballooning is used */ 2166 if (!vgpu_gmadr_is_valid(vgpu, gma)) 2167 return 0; 2168 2169 e.type = GTT_TYPE_GGTT_PTE; 2170 memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data, 2171 bytes); 2172 2173 /* If ggtt entry size is 8 bytes, and it's split into two 4 bytes 2174 * write, save the first 4 bytes in a list and update virtual 2175 * PTE. Only update shadow PTE when the second 4 bytes comes. 2176 */ 2177 if (bytes < info->gtt_entry_size) { 2178 bool found = false; 2179 2180 list_for_each_entry_safe(pos, n, 2181 &ggtt_mm->ggtt_mm.partial_pte_list, list) { 2182 if (g_gtt_index == pos->offset >> 2183 info->gtt_entry_size_shift) { 2184 if (off != pos->offset) { 2185 /* the second partial part*/ 2186 int last_off = pos->offset & 2187 (info->gtt_entry_size - 1); 2188 2189 memcpy((void *)&e.val64 + last_off, 2190 (void *)&pos->data + last_off, 2191 bytes); 2192 2193 list_del(&pos->list); 2194 kfree(pos); 2195 found = true; 2196 break; 2197 } 2198 2199 /* update of the first partial part */ 2200 pos->data = e.val64; 2201 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); 2202 return 0; 2203 } 2204 } 2205 2206 if (!found) { 2207 /* the first partial part */ 2208 partial_pte = kzalloc(sizeof(*partial_pte), GFP_KERNEL); 2209 if (!partial_pte) 2210 return -ENOMEM; 2211 partial_pte->offset = off; 2212 partial_pte->data = e.val64; 2213 list_add_tail(&partial_pte->list, 2214 &ggtt_mm->ggtt_mm.partial_pte_list); 2215 partial_update = true; 2216 } 2217 } 2218 2219 if (!partial_update && (ops->test_present(&e))) { 2220 gfn = ops->get_pfn(&e); 2221 m.val64 = e.val64; 2222 m.type = e.type; 2223 2224 ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, 2225 &dma_addr); 2226 if (ret) { 2227 gvt_vgpu_err("fail to populate guest ggtt entry\n"); 2228 /* guest driver may read/write the entry when partial 2229 * update the entry in this situation p2m will fail 2230 * setting the shadow entry to point to a scratch page 2231 */ 2232 ops->set_pfn(&m, gvt->gtt.scratch_mfn); 2233 } else 2234 ops->set_pfn(&m, dma_addr >> PAGE_SHIFT); 2235 } else { 2236 ops->set_pfn(&m, gvt->gtt.scratch_mfn); 2237 ops->clear_present(&m); 2238 } 2239 2240 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); 2241 2242 ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index); 2243 ggtt_invalidate_pte(vgpu, &e); 2244 2245 ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index); 2246 ggtt_invalidate(gvt->gt); 2247 return 0; 2248 } 2249 2250 /* 2251 * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write 2252 * @vgpu: a vGPU 2253 * @off: register offset 2254 * @p_data: data from guest write 2255 * @bytes: data length 2256 * 2257 * This function is used to emulate the GTT MMIO register write 2258 * 2259 * Returns: 2260 * Zero on success, error code if failed. 2261 */ 2262 int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, 2263 unsigned int off, void *p_data, unsigned int bytes) 2264 { 2265 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 2266 int ret; 2267 struct intel_vgpu_submission *s = &vgpu->submission; 2268 struct intel_engine_cs *engine; 2269 int i; 2270 2271 if (bytes != 4 && bytes != 8) 2272 return -EINVAL; 2273 2274 off -= info->gtt_start_offset; 2275 ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes); 2276 2277 /* if ggtt of last submitted context is written, 2278 * that context is probably got unpinned. 2279 * Set last shadowed ctx to invalid. 2280 */ 2281 for_each_engine(engine, vgpu->gvt->gt, i) { 2282 if (!s->last_ctx[i].valid) 2283 continue; 2284 2285 if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift)) 2286 s->last_ctx[i].valid = false; 2287 } 2288 return ret; 2289 } 2290 2291 static int alloc_scratch_pages(struct intel_vgpu *vgpu, 2292 enum intel_gvt_gtt_type type) 2293 { 2294 struct drm_i915_private *i915 = vgpu->gvt->gt->i915; 2295 struct intel_vgpu_gtt *gtt = &vgpu->gtt; 2296 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 2297 int page_entry_num = I915_GTT_PAGE_SIZE >> 2298 vgpu->gvt->device_info.gtt_entry_size_shift; 2299 void *scratch_pt; 2300 int i; 2301 struct device *dev = vgpu->gvt->gt->i915->drm.dev; 2302 dma_addr_t daddr; 2303 2304 if (drm_WARN_ON(&i915->drm, 2305 type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX)) 2306 return -EINVAL; 2307 2308 scratch_pt = (void *)get_zeroed_page(GFP_KERNEL); 2309 if (!scratch_pt) { 2310 gvt_vgpu_err("fail to allocate scratch page\n"); 2311 return -ENOMEM; 2312 } 2313 2314 daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, 4096, DMA_BIDIRECTIONAL); 2315 if (dma_mapping_error(dev, daddr)) { 2316 gvt_vgpu_err("fail to dmamap scratch_pt\n"); 2317 __free_page(virt_to_page(scratch_pt)); 2318 return -ENOMEM; 2319 } 2320 gtt->scratch_pt[type].page_mfn = 2321 (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); 2322 gtt->scratch_pt[type].page = virt_to_page(scratch_pt); 2323 gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n", 2324 vgpu->id, type, gtt->scratch_pt[type].page_mfn); 2325 2326 /* Build the tree by full filled the scratch pt with the entries which 2327 * point to the next level scratch pt or scratch page. The 2328 * scratch_pt[type] indicate the scratch pt/scratch page used by the 2329 * 'type' pt. 2330 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by 2331 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self 2332 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn. 2333 */ 2334 if (type > GTT_TYPE_PPGTT_PTE_PT) { 2335 struct intel_gvt_gtt_entry se; 2336 2337 memset(&se, 0, sizeof(struct intel_gvt_gtt_entry)); 2338 se.type = get_entry_type(type - 1); 2339 ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn); 2340 2341 /* The entry parameters like present/writeable/cache type 2342 * set to the same as i915's scratch page tree. 2343 */ 2344 se.val64 |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW; 2345 if (type == GTT_TYPE_PPGTT_PDE_PT) 2346 se.val64 |= PPAT_CACHED; 2347 2348 for (i = 0; i < page_entry_num; i++) 2349 ops->set_entry(scratch_pt, &se, i, false, 0, vgpu); 2350 } 2351 2352 return 0; 2353 } 2354 2355 static int release_scratch_page_tree(struct intel_vgpu *vgpu) 2356 { 2357 int i; 2358 struct device *dev = vgpu->gvt->gt->i915->drm.dev; 2359 dma_addr_t daddr; 2360 2361 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { 2362 if (vgpu->gtt.scratch_pt[i].page != NULL) { 2363 daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn << 2364 I915_GTT_PAGE_SHIFT); 2365 dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL); 2366 __free_page(vgpu->gtt.scratch_pt[i].page); 2367 vgpu->gtt.scratch_pt[i].page = NULL; 2368 vgpu->gtt.scratch_pt[i].page_mfn = 0; 2369 } 2370 } 2371 2372 return 0; 2373 } 2374 2375 static int create_scratch_page_tree(struct intel_vgpu *vgpu) 2376 { 2377 int i, ret; 2378 2379 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { 2380 ret = alloc_scratch_pages(vgpu, i); 2381 if (ret) 2382 goto err; 2383 } 2384 2385 return 0; 2386 2387 err: 2388 release_scratch_page_tree(vgpu); 2389 return ret; 2390 } 2391 2392 /** 2393 * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization 2394 * @vgpu: a vGPU 2395 * 2396 * This function is used to initialize per-vGPU graphics memory virtualization 2397 * components. 2398 * 2399 * Returns: 2400 * Zero on success, error code if failed. 2401 */ 2402 int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) 2403 { 2404 struct intel_vgpu_gtt *gtt = &vgpu->gtt; 2405 2406 INIT_RADIX_TREE(>t->spt_tree, GFP_KERNEL); 2407 2408 INIT_LIST_HEAD(>t->ppgtt_mm_list_head); 2409 INIT_LIST_HEAD(>t->oos_page_list_head); 2410 INIT_LIST_HEAD(>t->post_shadow_list_head); 2411 2412 gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu); 2413 if (IS_ERR(gtt->ggtt_mm)) { 2414 gvt_vgpu_err("fail to create mm for ggtt.\n"); 2415 return PTR_ERR(gtt->ggtt_mm); 2416 } 2417 2418 intel_vgpu_reset_ggtt(vgpu, false); 2419 2420 INIT_LIST_HEAD(>t->ggtt_mm->ggtt_mm.partial_pte_list); 2421 2422 return create_scratch_page_tree(vgpu); 2423 } 2424 2425 void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) 2426 { 2427 struct list_head *pos, *n; 2428 struct intel_vgpu_mm *mm; 2429 2430 list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { 2431 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); 2432 intel_vgpu_destroy_mm(mm); 2433 } 2434 2435 if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head))) 2436 gvt_err("vgpu ppgtt mm is not fully destroyed\n"); 2437 2438 if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) { 2439 gvt_err("Why we still has spt not freed?\n"); 2440 ppgtt_free_all_spt(vgpu); 2441 } 2442 } 2443 2444 static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu) 2445 { 2446 struct intel_gvt_partial_pte *pos, *next; 2447 2448 list_for_each_entry_safe(pos, next, 2449 &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list, 2450 list) { 2451 gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n", 2452 pos->offset, pos->data); 2453 kfree(pos); 2454 } 2455 intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm); 2456 vgpu->gtt.ggtt_mm = NULL; 2457 } 2458 2459 /** 2460 * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization 2461 * @vgpu: a vGPU 2462 * 2463 * This function is used to clean up per-vGPU graphics memory virtualization 2464 * components. 2465 * 2466 * Returns: 2467 * Zero on success, error code if failed. 2468 */ 2469 void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu) 2470 { 2471 intel_vgpu_destroy_all_ppgtt_mm(vgpu); 2472 intel_vgpu_destroy_ggtt_mm(vgpu); 2473 release_scratch_page_tree(vgpu); 2474 } 2475 2476 static void clean_spt_oos(struct intel_gvt *gvt) 2477 { 2478 struct intel_gvt_gtt *gtt = &gvt->gtt; 2479 struct list_head *pos, *n; 2480 struct intel_vgpu_oos_page *oos_page; 2481 2482 WARN(!list_empty(>t->oos_page_use_list_head), 2483 "someone is still using oos page\n"); 2484 2485 list_for_each_safe(pos, n, >t->oos_page_free_list_head) { 2486 oos_page = container_of(pos, struct intel_vgpu_oos_page, list); 2487 list_del(&oos_page->list); 2488 free_page((unsigned long)oos_page->mem); 2489 kfree(oos_page); 2490 } 2491 } 2492 2493 static int setup_spt_oos(struct intel_gvt *gvt) 2494 { 2495 struct intel_gvt_gtt *gtt = &gvt->gtt; 2496 struct intel_vgpu_oos_page *oos_page; 2497 int i; 2498 int ret; 2499 2500 INIT_LIST_HEAD(>t->oos_page_free_list_head); 2501 INIT_LIST_HEAD(>t->oos_page_use_list_head); 2502 2503 for (i = 0; i < preallocated_oos_pages; i++) { 2504 oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL); 2505 if (!oos_page) { 2506 ret = -ENOMEM; 2507 goto fail; 2508 } 2509 oos_page->mem = (void *)__get_free_pages(GFP_KERNEL, 0); 2510 if (!oos_page->mem) { 2511 ret = -ENOMEM; 2512 kfree(oos_page); 2513 goto fail; 2514 } 2515 2516 INIT_LIST_HEAD(&oos_page->list); 2517 INIT_LIST_HEAD(&oos_page->vm_list); 2518 oos_page->id = i; 2519 list_add_tail(&oos_page->list, >t->oos_page_free_list_head); 2520 } 2521 2522 gvt_dbg_mm("%d oos pages preallocated\n", i); 2523 2524 return 0; 2525 fail: 2526 clean_spt_oos(gvt); 2527 return ret; 2528 } 2529 2530 /** 2531 * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object 2532 * @vgpu: a vGPU 2533 * @pdps: pdp root array 2534 * 2535 * This function is used to find a PPGTT mm object from mm object pool 2536 * 2537 * Returns: 2538 * pointer to mm object on success, NULL if failed. 2539 */ 2540 struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, 2541 u64 pdps[]) 2542 { 2543 struct intel_vgpu_mm *mm; 2544 struct list_head *pos; 2545 2546 list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) { 2547 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); 2548 2549 switch (mm->ppgtt_mm.root_entry_type) { 2550 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY: 2551 if (pdps[0] == mm->ppgtt_mm.guest_pdps[0]) 2552 return mm; 2553 break; 2554 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY: 2555 if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps, 2556 sizeof(mm->ppgtt_mm.guest_pdps))) 2557 return mm; 2558 break; 2559 default: 2560 GEM_BUG_ON(1); 2561 } 2562 } 2563 return NULL; 2564 } 2565 2566 /** 2567 * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object. 2568 * @vgpu: a vGPU 2569 * @root_entry_type: ppgtt root entry type 2570 * @pdps: guest pdps 2571 * 2572 * This function is used to find or create a PPGTT mm object from a guest. 2573 * 2574 * Returns: 2575 * Zero on success, negative error code if failed. 2576 */ 2577 struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu, 2578 enum intel_gvt_gtt_type root_entry_type, u64 pdps[]) 2579 { 2580 struct intel_vgpu_mm *mm; 2581 2582 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); 2583 if (mm) { 2584 intel_vgpu_mm_get(mm); 2585 } else { 2586 mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps); 2587 if (IS_ERR(mm)) 2588 gvt_vgpu_err("fail to create mm\n"); 2589 } 2590 return mm; 2591 } 2592 2593 /** 2594 * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object. 2595 * @vgpu: a vGPU 2596 * @pdps: guest pdps 2597 * 2598 * This function is used to find a PPGTT mm object from a guest and destroy it. 2599 * 2600 * Returns: 2601 * Zero on success, negative error code if failed. 2602 */ 2603 int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]) 2604 { 2605 struct intel_vgpu_mm *mm; 2606 2607 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); 2608 if (!mm) { 2609 gvt_vgpu_err("fail to find ppgtt instance.\n"); 2610 return -EINVAL; 2611 } 2612 intel_vgpu_mm_put(mm); 2613 return 0; 2614 } 2615 2616 /** 2617 * intel_gvt_init_gtt - initialize mm components of a GVT device 2618 * @gvt: GVT device 2619 * 2620 * This function is called at the initialization stage, to initialize 2621 * the mm components of a GVT device. 2622 * 2623 * Returns: 2624 * zero on success, negative error code if failed. 2625 */ 2626 int intel_gvt_init_gtt(struct intel_gvt *gvt) 2627 { 2628 int ret; 2629 void *page; 2630 struct device *dev = gvt->gt->i915->drm.dev; 2631 dma_addr_t daddr; 2632 2633 gvt_dbg_core("init gtt\n"); 2634 2635 gvt->gtt.pte_ops = &gen8_gtt_pte_ops; 2636 gvt->gtt.gma_ops = &gen8_gtt_gma_ops; 2637 2638 page = (void *)get_zeroed_page(GFP_KERNEL); 2639 if (!page) { 2640 gvt_err("fail to allocate scratch ggtt page\n"); 2641 return -ENOMEM; 2642 } 2643 2644 daddr = dma_map_page(dev, virt_to_page(page), 0, 2645 4096, DMA_BIDIRECTIONAL); 2646 if (dma_mapping_error(dev, daddr)) { 2647 gvt_err("fail to dmamap scratch ggtt page\n"); 2648 __free_page(virt_to_page(page)); 2649 return -ENOMEM; 2650 } 2651 2652 gvt->gtt.scratch_page = virt_to_page(page); 2653 gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); 2654 2655 if (enable_out_of_sync) { 2656 ret = setup_spt_oos(gvt); 2657 if (ret) { 2658 gvt_err("fail to initialize SPT oos\n"); 2659 dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL); 2660 __free_page(gvt->gtt.scratch_page); 2661 return ret; 2662 } 2663 } 2664 INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head); 2665 mutex_init(&gvt->gtt.ppgtt_mm_lock); 2666 return 0; 2667 } 2668 2669 /** 2670 * intel_gvt_clean_gtt - clean up mm components of a GVT device 2671 * @gvt: GVT device 2672 * 2673 * This function is called at the driver unloading stage, to clean up 2674 * the mm components of a GVT device. 2675 * 2676 */ 2677 void intel_gvt_clean_gtt(struct intel_gvt *gvt) 2678 { 2679 struct device *dev = gvt->gt->i915->drm.dev; 2680 dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn << 2681 I915_GTT_PAGE_SHIFT); 2682 2683 dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL); 2684 2685 __free_page(gvt->gtt.scratch_page); 2686 2687 if (enable_out_of_sync) 2688 clean_spt_oos(gvt); 2689 } 2690 2691 /** 2692 * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances 2693 * @vgpu: a vGPU 2694 * 2695 * This function is called when invalidate all PPGTT instances of a vGPU. 2696 * 2697 */ 2698 void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu) 2699 { 2700 struct list_head *pos, *n; 2701 struct intel_vgpu_mm *mm; 2702 2703 list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { 2704 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); 2705 if (mm->type == INTEL_GVT_MM_PPGTT) { 2706 mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock); 2707 list_del_init(&mm->ppgtt_mm.lru_list); 2708 mutex_unlock(&vgpu->gvt->gtt.ppgtt_mm_lock); 2709 if (mm->ppgtt_mm.shadowed) 2710 invalidate_ppgtt_mm(mm); 2711 } 2712 } 2713 } 2714 2715 /** 2716 * intel_vgpu_reset_ggtt - reset the GGTT entry 2717 * @vgpu: a vGPU 2718 * @invalidate_old: invalidate old entries 2719 * 2720 * This function is called at the vGPU create stage 2721 * to reset all the GGTT entries. 2722 * 2723 */ 2724 void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old) 2725 { 2726 struct intel_gvt *gvt = vgpu->gvt; 2727 const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; 2728 struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE}; 2729 struct intel_gvt_gtt_entry old_entry; 2730 u32 index; 2731 u32 num_entries; 2732 2733 pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn); 2734 pte_ops->set_present(&entry); 2735 2736 index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; 2737 num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT; 2738 while (num_entries--) { 2739 if (invalidate_old) { 2740 ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index); 2741 ggtt_invalidate_pte(vgpu, &old_entry); 2742 } 2743 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); 2744 } 2745 2746 index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; 2747 num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; 2748 while (num_entries--) { 2749 if (invalidate_old) { 2750 ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index); 2751 ggtt_invalidate_pte(vgpu, &old_entry); 2752 } 2753 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); 2754 } 2755 2756 ggtt_invalidate(gvt->gt); 2757 } 2758 2759 /** 2760 * intel_gvt_restore_ggtt - restore all vGPU's ggtt entries 2761 * @gvt: intel gvt device 2762 * 2763 * This function is called at driver resume stage to restore 2764 * GGTT entries of every vGPU. 2765 * 2766 */ 2767 void intel_gvt_restore_ggtt(struct intel_gvt *gvt) 2768 { 2769 struct intel_vgpu *vgpu; 2770 struct intel_vgpu_mm *mm; 2771 int id; 2772 gen8_pte_t pte; 2773 u32 idx, num_low, num_hi, offset; 2774 2775 /* Restore dirty host ggtt for all vGPUs */ 2776 idr_for_each_entry(&(gvt)->vgpu_idr, vgpu, id) { 2777 mm = vgpu->gtt.ggtt_mm; 2778 2779 num_low = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT; 2780 offset = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; 2781 for (idx = 0; idx < num_low; idx++) { 2782 pte = mm->ggtt_mm.host_ggtt_aperture[idx]; 2783 if (pte & GEN8_PAGE_PRESENT) 2784 write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte); 2785 } 2786 2787 num_hi = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; 2788 offset = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; 2789 for (idx = 0; idx < num_hi; idx++) { 2790 pte = mm->ggtt_mm.host_ggtt_hidden[idx]; 2791 if (pte & GEN8_PAGE_PRESENT) 2792 write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte); 2793 } 2794 } 2795 } 2796