1 /* 2 * GTT virtualization 3 * 4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Zhi Wang <zhi.a.wang@intel.com> 27 * Zhenyu Wang <zhenyuw@linux.intel.com> 28 * Xiao Zheng <xiao.zheng@intel.com> 29 * 30 * Contributors: 31 * Min He <min.he@intel.com> 32 * Bing Niu <bing.niu@intel.com> 33 * 34 */ 35 36 #include "i915_drv.h" 37 #include "gvt.h" 38 #include "i915_pvinfo.h" 39 #include "trace.h" 40 41 #include "gt/intel_gt_regs.h" 42 #include <linux/vmalloc.h> 43 44 #if defined(VERBOSE_DEBUG) 45 #define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args) 46 #else 47 #define gvt_vdbg_mm(fmt, args...) 48 #endif 49 50 static bool enable_out_of_sync = false; 51 static int preallocated_oos_pages = 8192; 52 53 /* 54 * validate a gm address and related range size, 55 * translate it to host gm address 56 */ 57 bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size) 58 { 59 if (size == 0) 60 return vgpu_gmadr_is_valid(vgpu, addr); 61 62 if (vgpu_gmadr_is_aperture(vgpu, addr) && 63 vgpu_gmadr_is_aperture(vgpu, addr + size - 1)) 64 return true; 65 else if (vgpu_gmadr_is_hidden(vgpu, addr) && 66 vgpu_gmadr_is_hidden(vgpu, addr + size - 1)) 67 return true; 68 69 gvt_dbg_mm("Invalid ggtt range at 0x%llx, size: 0x%x\n", 70 addr, size); 71 return false; 72 } 73 74 #define gtt_type_is_entry(type) \ 75 (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \ 76 && type != GTT_TYPE_PPGTT_PTE_ENTRY \ 77 && type != GTT_TYPE_PPGTT_ROOT_ENTRY) 78 79 #define gtt_type_is_pt(type) \ 80 (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) 81 82 #define gtt_type_is_pte_pt(type) \ 83 (type == GTT_TYPE_PPGTT_PTE_PT) 84 85 #define gtt_type_is_root_pointer(type) \ 86 (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY) 87 88 #define gtt_init_entry(e, t, p, v) do { \ 89 (e)->type = t; \ 90 (e)->pdev = p; \ 91 memcpy(&(e)->val64, &v, sizeof(v)); \ 92 } while (0) 93 94 /* 95 * Mappings between GTT_TYPE* enumerations. 96 * Following information can be found according to the given type: 97 * - type of next level page table 98 * - type of entry inside this level page table 99 * - type of entry with PSE set 100 * 101 * If the given type doesn't have such a kind of information, 102 * e.g. give a l4 root entry type, then request to get its PSE type, 103 * give a PTE page table type, then request to get its next level page 104 * table type, as we know l4 root entry doesn't have a PSE bit, 105 * and a PTE page table doesn't have a next level page table type, 106 * GTT_TYPE_INVALID will be returned. This is useful when traversing a 107 * page table. 108 */ 109 110 struct gtt_type_table_entry { 111 int entry_type; 112 int pt_type; 113 int next_pt_type; 114 int pse_entry_type; 115 }; 116 117 #define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \ 118 [type] = { \ 119 .entry_type = e_type, \ 120 .pt_type = cpt_type, \ 121 .next_pt_type = npt_type, \ 122 .pse_entry_type = pse_type, \ 123 } 124 125 static const struct gtt_type_table_entry gtt_type_table[] = { 126 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY, 127 GTT_TYPE_PPGTT_ROOT_L4_ENTRY, 128 GTT_TYPE_INVALID, 129 GTT_TYPE_PPGTT_PML4_PT, 130 GTT_TYPE_INVALID), 131 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT, 132 GTT_TYPE_PPGTT_PML4_ENTRY, 133 GTT_TYPE_PPGTT_PML4_PT, 134 GTT_TYPE_PPGTT_PDP_PT, 135 GTT_TYPE_INVALID), 136 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY, 137 GTT_TYPE_PPGTT_PML4_ENTRY, 138 GTT_TYPE_PPGTT_PML4_PT, 139 GTT_TYPE_PPGTT_PDP_PT, 140 GTT_TYPE_INVALID), 141 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT, 142 GTT_TYPE_PPGTT_PDP_ENTRY, 143 GTT_TYPE_PPGTT_PDP_PT, 144 GTT_TYPE_PPGTT_PDE_PT, 145 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 146 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY, 147 GTT_TYPE_PPGTT_ROOT_L3_ENTRY, 148 GTT_TYPE_INVALID, 149 GTT_TYPE_PPGTT_PDE_PT, 150 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 151 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY, 152 GTT_TYPE_PPGTT_PDP_ENTRY, 153 GTT_TYPE_PPGTT_PDP_PT, 154 GTT_TYPE_PPGTT_PDE_PT, 155 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 156 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT, 157 GTT_TYPE_PPGTT_PDE_ENTRY, 158 GTT_TYPE_PPGTT_PDE_PT, 159 GTT_TYPE_PPGTT_PTE_PT, 160 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 161 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY, 162 GTT_TYPE_PPGTT_PDE_ENTRY, 163 GTT_TYPE_PPGTT_PDE_PT, 164 GTT_TYPE_PPGTT_PTE_PT, 165 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 166 /* We take IPS bit as 'PSE' for PTE level. */ 167 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT, 168 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 169 GTT_TYPE_PPGTT_PTE_PT, 170 GTT_TYPE_INVALID, 171 GTT_TYPE_PPGTT_PTE_64K_ENTRY), 172 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY, 173 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 174 GTT_TYPE_PPGTT_PTE_PT, 175 GTT_TYPE_INVALID, 176 GTT_TYPE_PPGTT_PTE_64K_ENTRY), 177 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY, 178 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 179 GTT_TYPE_PPGTT_PTE_PT, 180 GTT_TYPE_INVALID, 181 GTT_TYPE_PPGTT_PTE_64K_ENTRY), 182 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY, 183 GTT_TYPE_PPGTT_PDE_ENTRY, 184 GTT_TYPE_PPGTT_PDE_PT, 185 GTT_TYPE_INVALID, 186 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 187 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY, 188 GTT_TYPE_PPGTT_PDP_ENTRY, 189 GTT_TYPE_PPGTT_PDP_PT, 190 GTT_TYPE_INVALID, 191 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 192 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE, 193 GTT_TYPE_GGTT_PTE, 194 GTT_TYPE_INVALID, 195 GTT_TYPE_INVALID, 196 GTT_TYPE_INVALID), 197 }; 198 199 static inline int get_next_pt_type(int type) 200 { 201 return gtt_type_table[type].next_pt_type; 202 } 203 204 static inline int get_entry_type(int type) 205 { 206 return gtt_type_table[type].entry_type; 207 } 208 209 static inline int get_pse_type(int type) 210 { 211 return gtt_type_table[type].pse_entry_type; 212 } 213 214 static u64 read_pte64(struct i915_ggtt *ggtt, unsigned long index) 215 { 216 void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index; 217 218 return readq(addr); 219 } 220 221 static void ggtt_invalidate(struct intel_gt *gt) 222 { 223 intel_wakeref_t wakeref; 224 225 wakeref = mmio_hw_access_pre(gt); 226 intel_uncore_write(gt->uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 227 mmio_hw_access_post(gt, wakeref); 228 } 229 230 static void write_pte64(struct i915_ggtt *ggtt, unsigned long index, u64 pte) 231 { 232 void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index; 233 234 writeq(pte, addr); 235 } 236 237 static inline int gtt_get_entry64(void *pt, 238 struct intel_gvt_gtt_entry *e, 239 unsigned long index, bool hypervisor_access, unsigned long gpa, 240 struct intel_vgpu *vgpu) 241 { 242 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 243 int ret; 244 245 if (WARN_ON(info->gtt_entry_size != 8)) 246 return -EINVAL; 247 248 if (hypervisor_access) { 249 ret = intel_gvt_read_gpa(vgpu, gpa + 250 (index << info->gtt_entry_size_shift), 251 &e->val64, 8); 252 if (WARN_ON(ret)) 253 return ret; 254 } else if (!pt) { 255 e->val64 = read_pte64(vgpu->gvt->gt->ggtt, index); 256 } else { 257 e->val64 = *((u64 *)pt + index); 258 } 259 return 0; 260 } 261 262 static inline int gtt_set_entry64(void *pt, 263 struct intel_gvt_gtt_entry *e, 264 unsigned long index, bool hypervisor_access, unsigned long gpa, 265 struct intel_vgpu *vgpu) 266 { 267 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 268 int ret; 269 270 if (WARN_ON(info->gtt_entry_size != 8)) 271 return -EINVAL; 272 273 if (hypervisor_access) { 274 ret = intel_gvt_write_gpa(vgpu, gpa + 275 (index << info->gtt_entry_size_shift), 276 &e->val64, 8); 277 if (WARN_ON(ret)) 278 return ret; 279 } else if (!pt) { 280 write_pte64(vgpu->gvt->gt->ggtt, index, e->val64); 281 } else { 282 *((u64 *)pt + index) = e->val64; 283 } 284 return 0; 285 } 286 287 #define GTT_HAW 46 288 289 #define ADDR_1G_MASK GENMASK_ULL(GTT_HAW - 1, 30) 290 #define ADDR_2M_MASK GENMASK_ULL(GTT_HAW - 1, 21) 291 #define ADDR_64K_MASK GENMASK_ULL(GTT_HAW - 1, 16) 292 #define ADDR_4K_MASK GENMASK_ULL(GTT_HAW - 1, 12) 293 294 #define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52) 295 #define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */ 296 297 #define GTT_64K_PTE_STRIDE 16 298 299 static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) 300 { 301 unsigned long pfn; 302 303 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) 304 pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT; 305 else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) 306 pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT; 307 else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) 308 pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT; 309 else 310 pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT; 311 return pfn; 312 } 313 314 static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn) 315 { 316 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { 317 e->val64 &= ~ADDR_1G_MASK; 318 pfn &= (ADDR_1G_MASK >> PAGE_SHIFT); 319 } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) { 320 e->val64 &= ~ADDR_2M_MASK; 321 pfn &= (ADDR_2M_MASK >> PAGE_SHIFT); 322 } else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) { 323 e->val64 &= ~ADDR_64K_MASK; 324 pfn &= (ADDR_64K_MASK >> PAGE_SHIFT); 325 } else { 326 e->val64 &= ~ADDR_4K_MASK; 327 pfn &= (ADDR_4K_MASK >> PAGE_SHIFT); 328 } 329 330 e->val64 |= (pfn << PAGE_SHIFT); 331 } 332 333 static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e) 334 { 335 return !!(e->val64 & _PAGE_PSE); 336 } 337 338 static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e) 339 { 340 if (gen8_gtt_test_pse(e)) { 341 switch (e->type) { 342 case GTT_TYPE_PPGTT_PTE_2M_ENTRY: 343 e->val64 &= ~_PAGE_PSE; 344 e->type = GTT_TYPE_PPGTT_PDE_ENTRY; 345 break; 346 case GTT_TYPE_PPGTT_PTE_1G_ENTRY: 347 e->type = GTT_TYPE_PPGTT_PDP_ENTRY; 348 e->val64 &= ~_PAGE_PSE; 349 break; 350 default: 351 WARN_ON(1); 352 } 353 } 354 } 355 356 static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e) 357 { 358 if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY)) 359 return false; 360 361 return !!(e->val64 & GEN8_PDE_IPS_64K); 362 } 363 364 static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e) 365 { 366 if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY)) 367 return; 368 369 e->val64 &= ~GEN8_PDE_IPS_64K; 370 } 371 372 static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e) 373 { 374 /* 375 * i915 writes PDP root pointer registers without present bit, 376 * it also works, so we need to treat root pointer entry 377 * specifically. 378 */ 379 if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY 380 || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) 381 return (e->val64 != 0); 382 else 383 return (e->val64 & GEN8_PAGE_PRESENT); 384 } 385 386 static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e) 387 { 388 e->val64 &= ~GEN8_PAGE_PRESENT; 389 } 390 391 static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e) 392 { 393 e->val64 |= GEN8_PAGE_PRESENT; 394 } 395 396 static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e) 397 { 398 return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED); 399 } 400 401 static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e) 402 { 403 e->val64 |= GTT_SPTE_FLAG_64K_SPLITED; 404 } 405 406 static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e) 407 { 408 e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED; 409 } 410 411 /* 412 * Per-platform GMA routines. 413 */ 414 static unsigned long gma_to_ggtt_pte_index(unsigned long gma) 415 { 416 unsigned long x = (gma >> I915_GTT_PAGE_SHIFT); 417 418 trace_gma_index(__func__, gma, x); 419 return x; 420 } 421 422 #define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \ 423 static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \ 424 { \ 425 unsigned long x = (exp); \ 426 trace_gma_index(__func__, gma, x); \ 427 return x; \ 428 } 429 430 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff)); 431 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff)); 432 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3)); 433 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff)); 434 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff)); 435 436 static const struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = { 437 .get_entry = gtt_get_entry64, 438 .set_entry = gtt_set_entry64, 439 .clear_present = gtt_entry_clear_present, 440 .set_present = gtt_entry_set_present, 441 .test_present = gen8_gtt_test_present, 442 .test_pse = gen8_gtt_test_pse, 443 .clear_pse = gen8_gtt_clear_pse, 444 .clear_ips = gen8_gtt_clear_ips, 445 .test_ips = gen8_gtt_test_ips, 446 .clear_64k_splited = gen8_gtt_clear_64k_splited, 447 .set_64k_splited = gen8_gtt_set_64k_splited, 448 .test_64k_splited = gen8_gtt_test_64k_splited, 449 .get_pfn = gen8_gtt_get_pfn, 450 .set_pfn = gen8_gtt_set_pfn, 451 }; 452 453 static const struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = { 454 .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index, 455 .gma_to_pte_index = gen8_gma_to_pte_index, 456 .gma_to_pde_index = gen8_gma_to_pde_index, 457 .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index, 458 .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index, 459 .gma_to_pml4_index = gen8_gma_to_pml4_index, 460 }; 461 462 /* Update entry type per pse and ips bit. */ 463 static void update_entry_type_for_real(const struct intel_gvt_gtt_pte_ops *pte_ops, 464 struct intel_gvt_gtt_entry *entry, bool ips) 465 { 466 switch (entry->type) { 467 case GTT_TYPE_PPGTT_PDE_ENTRY: 468 case GTT_TYPE_PPGTT_PDP_ENTRY: 469 if (pte_ops->test_pse(entry)) 470 entry->type = get_pse_type(entry->type); 471 break; 472 case GTT_TYPE_PPGTT_PTE_4K_ENTRY: 473 if (ips) 474 entry->type = get_pse_type(entry->type); 475 break; 476 default: 477 GEM_BUG_ON(!gtt_type_is_entry(entry->type)); 478 } 479 480 GEM_BUG_ON(entry->type == GTT_TYPE_INVALID); 481 } 482 483 /* 484 * MM helpers. 485 */ 486 static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm, 487 struct intel_gvt_gtt_entry *entry, unsigned long index, 488 bool guest) 489 { 490 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 491 492 GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT); 493 494 entry->type = mm->ppgtt_mm.root_entry_type; 495 pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps : 496 mm->ppgtt_mm.shadow_pdps, 497 entry, index, false, 0, mm->vgpu); 498 update_entry_type_for_real(pte_ops, entry, false); 499 } 500 501 static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm, 502 struct intel_gvt_gtt_entry *entry, unsigned long index) 503 { 504 _ppgtt_get_root_entry(mm, entry, index, true); 505 } 506 507 static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm, 508 struct intel_gvt_gtt_entry *entry, unsigned long index) 509 { 510 _ppgtt_get_root_entry(mm, entry, index, false); 511 } 512 513 static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm, 514 struct intel_gvt_gtt_entry *entry, unsigned long index, 515 bool guest) 516 { 517 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 518 519 pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps : 520 mm->ppgtt_mm.shadow_pdps, 521 entry, index, false, 0, mm->vgpu); 522 } 523 524 static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm, 525 struct intel_gvt_gtt_entry *entry, unsigned long index) 526 { 527 _ppgtt_set_root_entry(mm, entry, index, false); 528 } 529 530 static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm, 531 struct intel_gvt_gtt_entry *entry, unsigned long index) 532 { 533 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 534 535 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 536 537 entry->type = GTT_TYPE_GGTT_PTE; 538 pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index, 539 false, 0, mm->vgpu); 540 } 541 542 static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm, 543 struct intel_gvt_gtt_entry *entry, unsigned long index) 544 { 545 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 546 547 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 548 549 pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index, 550 false, 0, mm->vgpu); 551 } 552 553 static void ggtt_get_host_entry(struct intel_vgpu_mm *mm, 554 struct intel_gvt_gtt_entry *entry, unsigned long index) 555 { 556 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 557 558 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 559 560 pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu); 561 } 562 563 static void ggtt_set_host_entry(struct intel_vgpu_mm *mm, 564 struct intel_gvt_gtt_entry *entry, unsigned long index) 565 { 566 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 567 unsigned long offset = index; 568 569 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 570 571 if (vgpu_gmadr_is_aperture(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) { 572 offset -= (vgpu_aperture_gmadr_base(mm->vgpu) >> PAGE_SHIFT); 573 mm->ggtt_mm.host_ggtt_aperture[offset] = entry->val64; 574 } else if (vgpu_gmadr_is_hidden(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) { 575 offset -= (vgpu_hidden_gmadr_base(mm->vgpu) >> PAGE_SHIFT); 576 mm->ggtt_mm.host_ggtt_hidden[offset] = entry->val64; 577 } 578 579 pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu); 580 } 581 582 /* 583 * PPGTT shadow page table helpers. 584 */ 585 static inline int ppgtt_spt_get_entry( 586 struct intel_vgpu_ppgtt_spt *spt, 587 void *page_table, int type, 588 struct intel_gvt_gtt_entry *e, unsigned long index, 589 bool guest) 590 { 591 struct intel_gvt *gvt = spt->vgpu->gvt; 592 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 593 int ret; 594 595 e->type = get_entry_type(type); 596 597 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) 598 return -EINVAL; 599 600 ret = ops->get_entry(page_table, e, index, guest, 601 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, 602 spt->vgpu); 603 if (ret) 604 return ret; 605 606 update_entry_type_for_real(ops, e, guest ? 607 spt->guest_page.pde_ips : false); 608 609 gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n", 610 type, e->type, index, e->val64); 611 return 0; 612 } 613 614 static inline int ppgtt_spt_set_entry( 615 struct intel_vgpu_ppgtt_spt *spt, 616 void *page_table, int type, 617 struct intel_gvt_gtt_entry *e, unsigned long index, 618 bool guest) 619 { 620 struct intel_gvt *gvt = spt->vgpu->gvt; 621 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 622 623 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) 624 return -EINVAL; 625 626 gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n", 627 type, e->type, index, e->val64); 628 629 return ops->set_entry(page_table, e, index, guest, 630 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, 631 spt->vgpu); 632 } 633 634 #define ppgtt_get_guest_entry(spt, e, index) \ 635 ppgtt_spt_get_entry(spt, NULL, \ 636 spt->guest_page.type, e, index, true) 637 638 #define ppgtt_set_guest_entry(spt, e, index) \ 639 ppgtt_spt_set_entry(spt, NULL, \ 640 spt->guest_page.type, e, index, true) 641 642 #define ppgtt_get_shadow_entry(spt, e, index) \ 643 ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \ 644 spt->shadow_page.type, e, index, false) 645 646 #define ppgtt_set_shadow_entry(spt, e, index) \ 647 ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \ 648 spt->shadow_page.type, e, index, false) 649 650 static void *alloc_spt(gfp_t gfp_mask) 651 { 652 struct intel_vgpu_ppgtt_spt *spt; 653 654 spt = kzalloc(sizeof(*spt), gfp_mask); 655 if (!spt) 656 return NULL; 657 658 spt->shadow_page.page = alloc_page(gfp_mask); 659 if (!spt->shadow_page.page) { 660 kfree(spt); 661 return NULL; 662 } 663 return spt; 664 } 665 666 static void free_spt(struct intel_vgpu_ppgtt_spt *spt) 667 { 668 __free_page(spt->shadow_page.page); 669 kfree(spt); 670 } 671 672 static int detach_oos_page(struct intel_vgpu *vgpu, 673 struct intel_vgpu_oos_page *oos_page); 674 675 static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt) 676 { 677 struct device *kdev = spt->vgpu->gvt->gt->i915->drm.dev; 678 679 trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type); 680 681 dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096, 682 DMA_BIDIRECTIONAL); 683 684 radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn); 685 686 if (spt->guest_page.gfn) { 687 if (spt->guest_page.oos_page) 688 detach_oos_page(spt->vgpu, spt->guest_page.oos_page); 689 690 intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn); 691 } 692 693 list_del_init(&spt->post_shadow_list); 694 free_spt(spt); 695 } 696 697 static void ppgtt_free_all_spt(struct intel_vgpu *vgpu) 698 { 699 struct intel_vgpu_ppgtt_spt *spt, *spn; 700 struct radix_tree_iter iter; 701 LIST_HEAD(all_spt); 702 void __rcu **slot; 703 704 rcu_read_lock(); 705 radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) { 706 spt = radix_tree_deref_slot(slot); 707 list_move(&spt->post_shadow_list, &all_spt); 708 } 709 rcu_read_unlock(); 710 711 list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list) 712 ppgtt_free_spt(spt); 713 } 714 715 static int ppgtt_handle_guest_write_page_table_bytes( 716 struct intel_vgpu_ppgtt_spt *spt, 717 u64 pa, void *p_data, int bytes); 718 719 static int ppgtt_write_protection_handler( 720 struct intel_vgpu_page_track *page_track, 721 u64 gpa, void *data, int bytes) 722 { 723 struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data; 724 725 int ret; 726 727 if (bytes != 4 && bytes != 8) 728 return -EINVAL; 729 730 ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes); 731 if (ret) 732 return ret; 733 return ret; 734 } 735 736 /* Find a spt by guest gfn. */ 737 static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn( 738 struct intel_vgpu *vgpu, unsigned long gfn) 739 { 740 struct intel_vgpu_page_track *track; 741 742 track = intel_vgpu_find_page_track(vgpu, gfn); 743 if (track && track->handler == ppgtt_write_protection_handler) 744 return track->priv_data; 745 746 return NULL; 747 } 748 749 /* Find the spt by shadow page mfn. */ 750 static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn( 751 struct intel_vgpu *vgpu, unsigned long mfn) 752 { 753 return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn); 754 } 755 756 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt); 757 758 /* Allocate shadow page table without guest page. */ 759 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt( 760 struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type) 761 { 762 struct device *kdev = vgpu->gvt->gt->i915->drm.dev; 763 struct intel_vgpu_ppgtt_spt *spt = NULL; 764 dma_addr_t daddr; 765 int ret; 766 767 retry: 768 spt = alloc_spt(GFP_KERNEL | __GFP_ZERO); 769 if (!spt) { 770 if (reclaim_one_ppgtt_mm(vgpu->gvt)) 771 goto retry; 772 773 gvt_vgpu_err("fail to allocate ppgtt shadow page\n"); 774 return ERR_PTR(-ENOMEM); 775 } 776 777 spt->vgpu = vgpu; 778 atomic_set(&spt->refcount, 1); 779 INIT_LIST_HEAD(&spt->post_shadow_list); 780 781 /* 782 * Init shadow_page. 783 */ 784 spt->shadow_page.type = type; 785 daddr = dma_map_page(kdev, spt->shadow_page.page, 786 0, 4096, DMA_BIDIRECTIONAL); 787 if (dma_mapping_error(kdev, daddr)) { 788 gvt_vgpu_err("fail to map dma addr\n"); 789 ret = -EINVAL; 790 goto err_free_spt; 791 } 792 spt->shadow_page.vaddr = page_address(spt->shadow_page.page); 793 spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT; 794 795 ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt); 796 if (ret) 797 goto err_unmap_dma; 798 799 return spt; 800 801 err_unmap_dma: 802 dma_unmap_page(kdev, daddr, PAGE_SIZE, DMA_BIDIRECTIONAL); 803 err_free_spt: 804 free_spt(spt); 805 return ERR_PTR(ret); 806 } 807 808 /* Allocate shadow page table associated with specific gfn. */ 809 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn( 810 struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type, 811 unsigned long gfn, bool guest_pde_ips) 812 { 813 struct intel_vgpu_ppgtt_spt *spt; 814 int ret; 815 816 spt = ppgtt_alloc_spt(vgpu, type); 817 if (IS_ERR(spt)) 818 return spt; 819 820 /* 821 * Init guest_page. 822 */ 823 ret = intel_vgpu_register_page_track(vgpu, gfn, 824 ppgtt_write_protection_handler, spt); 825 if (ret) { 826 ppgtt_free_spt(spt); 827 return ERR_PTR(ret); 828 } 829 830 spt->guest_page.type = type; 831 spt->guest_page.gfn = gfn; 832 spt->guest_page.pde_ips = guest_pde_ips; 833 834 trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); 835 836 return spt; 837 } 838 839 #define pt_entry_size_shift(spt) \ 840 ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift) 841 842 #define pt_entries(spt) \ 843 (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt)) 844 845 #define for_each_present_guest_entry(spt, e, i) \ 846 for (i = 0; i < pt_entries(spt); \ 847 i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \ 848 if (!ppgtt_get_guest_entry(spt, e, i) && \ 849 spt->vgpu->gvt->gtt.pte_ops->test_present(e)) 850 851 #define for_each_present_shadow_entry(spt, e, i) \ 852 for (i = 0; i < pt_entries(spt); \ 853 i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \ 854 if (!ppgtt_get_shadow_entry(spt, e, i) && \ 855 spt->vgpu->gvt->gtt.pte_ops->test_present(e)) 856 857 #define for_each_shadow_entry(spt, e, i) \ 858 for (i = 0; i < pt_entries(spt); \ 859 i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \ 860 if (!ppgtt_get_shadow_entry(spt, e, i)) 861 862 static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt) 863 { 864 int v = atomic_read(&spt->refcount); 865 866 trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1)); 867 atomic_inc(&spt->refcount); 868 } 869 870 static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt) 871 { 872 int v = atomic_read(&spt->refcount); 873 874 trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1)); 875 return atomic_dec_return(&spt->refcount); 876 } 877 878 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt); 879 880 static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu, 881 struct intel_gvt_gtt_entry *e) 882 { 883 struct drm_i915_private *i915 = vgpu->gvt->gt->i915; 884 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 885 struct intel_vgpu_ppgtt_spt *s; 886 enum intel_gvt_gtt_type cur_pt_type; 887 888 GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type))); 889 890 if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY 891 && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { 892 cur_pt_type = get_next_pt_type(e->type); 893 894 if (!gtt_type_is_pt(cur_pt_type) || 895 !gtt_type_is_pt(cur_pt_type + 1)) { 896 drm_WARN(&i915->drm, 1, 897 "Invalid page table type, cur_pt_type is: %d\n", 898 cur_pt_type); 899 return -EINVAL; 900 } 901 902 cur_pt_type += 1; 903 904 if (ops->get_pfn(e) == 905 vgpu->gtt.scratch_pt[cur_pt_type].page_mfn) 906 return 0; 907 } 908 s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e)); 909 if (!s) { 910 gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n", 911 ops->get_pfn(e)); 912 return -ENXIO; 913 } 914 return ppgtt_invalidate_spt(s); 915 } 916 917 static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt, 918 struct intel_gvt_gtt_entry *entry) 919 { 920 struct intel_vgpu *vgpu = spt->vgpu; 921 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 922 unsigned long pfn; 923 int type; 924 925 pfn = ops->get_pfn(entry); 926 type = spt->shadow_page.type; 927 928 /* Uninitialized spte or unshadowed spte. */ 929 if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn) 930 return; 931 932 intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT); 933 } 934 935 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt) 936 { 937 struct intel_vgpu *vgpu = spt->vgpu; 938 struct intel_gvt_gtt_entry e; 939 unsigned long index; 940 int ret; 941 942 trace_spt_change(spt->vgpu->id, "die", spt, 943 spt->guest_page.gfn, spt->shadow_page.type); 944 945 if (ppgtt_put_spt(spt) > 0) 946 return 0; 947 948 for_each_present_shadow_entry(spt, &e, index) { 949 switch (e.type) { 950 case GTT_TYPE_PPGTT_PTE_4K_ENTRY: 951 gvt_vdbg_mm("invalidate 4K entry\n"); 952 ppgtt_invalidate_pte(spt, &e); 953 break; 954 case GTT_TYPE_PPGTT_PTE_64K_ENTRY: 955 /* We don't setup 64K shadow entry so far. */ 956 WARN(1, "suspicious 64K gtt entry\n"); 957 continue; 958 case GTT_TYPE_PPGTT_PTE_2M_ENTRY: 959 gvt_vdbg_mm("invalidate 2M entry\n"); 960 continue; 961 case GTT_TYPE_PPGTT_PTE_1G_ENTRY: 962 WARN(1, "GVT doesn't support 1GB page\n"); 963 continue; 964 case GTT_TYPE_PPGTT_PML4_ENTRY: 965 case GTT_TYPE_PPGTT_PDP_ENTRY: 966 case GTT_TYPE_PPGTT_PDE_ENTRY: 967 gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n"); 968 ret = ppgtt_invalidate_spt_by_shadow_entry( 969 spt->vgpu, &e); 970 if (ret) 971 goto fail; 972 break; 973 default: 974 GEM_BUG_ON(1); 975 } 976 } 977 978 trace_spt_change(spt->vgpu->id, "release", spt, 979 spt->guest_page.gfn, spt->shadow_page.type); 980 ppgtt_free_spt(spt); 981 return 0; 982 fail: 983 gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n", 984 spt, e.val64, e.type); 985 return ret; 986 } 987 988 static bool vgpu_ips_enabled(struct intel_vgpu *vgpu) 989 { 990 struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; 991 992 if (GRAPHICS_VER(dev_priv) == 9) { 993 u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) & 994 GAMW_ECO_ENABLE_64K_IPS_FIELD; 995 996 return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD; 997 } else if (GRAPHICS_VER(dev_priv) >= 11) { 998 /* 64K paging only controlled by IPS bit in PTE now. */ 999 return true; 1000 } else 1001 return false; 1002 } 1003 1004 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt); 1005 1006 static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry( 1007 struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we) 1008 { 1009 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1010 struct intel_vgpu_ppgtt_spt *spt = NULL; 1011 bool ips = false; 1012 int ret; 1013 1014 GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type))); 1015 1016 if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY) 1017 ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we); 1018 1019 spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we)); 1020 if (spt) { 1021 ppgtt_get_spt(spt); 1022 1023 if (ips != spt->guest_page.pde_ips) { 1024 spt->guest_page.pde_ips = ips; 1025 1026 gvt_dbg_mm("reshadow PDE since ips changed\n"); 1027 clear_page(spt->shadow_page.vaddr); 1028 ret = ppgtt_populate_spt(spt); 1029 if (ret) { 1030 ppgtt_put_spt(spt); 1031 goto err; 1032 } 1033 } 1034 } else { 1035 int type = get_next_pt_type(we->type); 1036 1037 if (!gtt_type_is_pt(type)) { 1038 ret = -EINVAL; 1039 goto err; 1040 } 1041 1042 spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips); 1043 if (IS_ERR(spt)) { 1044 ret = PTR_ERR(spt); 1045 goto err; 1046 } 1047 1048 ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn); 1049 if (ret) 1050 goto err_free_spt; 1051 1052 ret = ppgtt_populate_spt(spt); 1053 if (ret) 1054 goto err_free_spt; 1055 1056 trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn, 1057 spt->shadow_page.type); 1058 } 1059 return spt; 1060 1061 err_free_spt: 1062 ppgtt_free_spt(spt); 1063 spt = NULL; 1064 err: 1065 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", 1066 spt, we->val64, we->type); 1067 return ERR_PTR(ret); 1068 } 1069 1070 static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, 1071 struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge) 1072 { 1073 const struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops; 1074 1075 se->type = ge->type; 1076 se->val64 = ge->val64; 1077 1078 /* Because we always split 64KB pages, so clear IPS in shadow PDE. */ 1079 if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY) 1080 ops->clear_ips(se); 1081 1082 ops->set_pfn(se, s->shadow_page.mfn); 1083 } 1084 1085 static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, 1086 struct intel_vgpu_ppgtt_spt *spt, unsigned long index, 1087 struct intel_gvt_gtt_entry *se) 1088 { 1089 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1090 struct intel_vgpu_ppgtt_spt *sub_spt; 1091 struct intel_gvt_gtt_entry sub_se; 1092 unsigned long start_gfn; 1093 dma_addr_t dma_addr; 1094 unsigned long sub_index; 1095 int ret; 1096 1097 gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index); 1098 1099 start_gfn = ops->get_pfn(se); 1100 1101 sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT); 1102 if (IS_ERR(sub_spt)) 1103 return PTR_ERR(sub_spt); 1104 1105 for_each_shadow_entry(sub_spt, &sub_se, sub_index) { 1106 ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + sub_index, 1107 PAGE_SIZE, &dma_addr); 1108 if (ret) 1109 goto err; 1110 sub_se.val64 = se->val64; 1111 1112 /* Copy the PAT field from PDE. */ 1113 sub_se.val64 &= ~_PAGE_PAT; 1114 sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5; 1115 1116 ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT); 1117 ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index); 1118 } 1119 1120 /* Clear dirty field. */ 1121 se->val64 &= ~_PAGE_DIRTY; 1122 1123 ops->clear_pse(se); 1124 ops->clear_ips(se); 1125 ops->set_pfn(se, sub_spt->shadow_page.mfn); 1126 ppgtt_set_shadow_entry(spt, se, index); 1127 return 0; 1128 err: 1129 /* Cancel the existing address mappings of DMA addr. */ 1130 for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) { 1131 gvt_vdbg_mm("invalidate 4K entry\n"); 1132 ppgtt_invalidate_pte(sub_spt, &sub_se); 1133 } 1134 /* Release the new allocated spt. */ 1135 trace_spt_change(sub_spt->vgpu->id, "release", sub_spt, 1136 sub_spt->guest_page.gfn, sub_spt->shadow_page.type); 1137 ppgtt_free_spt(sub_spt); 1138 return ret; 1139 } 1140 1141 static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, 1142 struct intel_vgpu_ppgtt_spt *spt, unsigned long index, 1143 struct intel_gvt_gtt_entry *se) 1144 { 1145 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1146 struct intel_gvt_gtt_entry entry = *se; 1147 unsigned long start_gfn; 1148 dma_addr_t dma_addr; 1149 int i, ret; 1150 1151 gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index); 1152 1153 GEM_BUG_ON(index % GTT_64K_PTE_STRIDE); 1154 1155 start_gfn = ops->get_pfn(se); 1156 1157 entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY; 1158 ops->set_64k_splited(&entry); 1159 1160 for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { 1161 ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + i, 1162 PAGE_SIZE, &dma_addr); 1163 if (ret) 1164 return ret; 1165 1166 ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT); 1167 ppgtt_set_shadow_entry(spt, &entry, index + i); 1168 } 1169 return 0; 1170 } 1171 1172 static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, 1173 struct intel_vgpu_ppgtt_spt *spt, unsigned long index, 1174 struct intel_gvt_gtt_entry *ge) 1175 { 1176 const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; 1177 struct intel_gvt_gtt_entry se = *ge; 1178 unsigned long gfn; 1179 dma_addr_t dma_addr; 1180 int ret; 1181 1182 if (!pte_ops->test_present(ge)) 1183 return 0; 1184 1185 gfn = pte_ops->get_pfn(ge); 1186 1187 switch (ge->type) { 1188 case GTT_TYPE_PPGTT_PTE_4K_ENTRY: 1189 gvt_vdbg_mm("shadow 4K gtt entry\n"); 1190 ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr); 1191 if (ret) 1192 return -ENXIO; 1193 break; 1194 case GTT_TYPE_PPGTT_PTE_64K_ENTRY: 1195 gvt_vdbg_mm("shadow 64K gtt entry\n"); 1196 /* 1197 * The layout of 64K page is special, the page size is 1198 * controlled by upper PDE. To be simple, we always split 1199 * 64K page to smaller 4K pages in shadow PT. 1200 */ 1201 return split_64KB_gtt_entry(vgpu, spt, index, &se); 1202 case GTT_TYPE_PPGTT_PTE_2M_ENTRY: 1203 gvt_vdbg_mm("shadow 2M gtt entry\n"); 1204 if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M) || 1205 intel_gvt_dma_map_guest_page(vgpu, gfn, 1206 I915_GTT_PAGE_SIZE_2M, &dma_addr)) 1207 return split_2MB_gtt_entry(vgpu, spt, index, &se); 1208 break; 1209 case GTT_TYPE_PPGTT_PTE_1G_ENTRY: 1210 gvt_vgpu_err("GVT doesn't support 1GB entry\n"); 1211 return -EINVAL; 1212 default: 1213 GEM_BUG_ON(1); 1214 return -EINVAL; 1215 } 1216 1217 /* Successfully shadowed a 4K or 2M page (without splitting). */ 1218 pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT); 1219 ppgtt_set_shadow_entry(spt, &se, index); 1220 return 0; 1221 } 1222 1223 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt) 1224 { 1225 struct intel_vgpu *vgpu = spt->vgpu; 1226 struct intel_vgpu_ppgtt_spt *s; 1227 struct intel_gvt_gtt_entry se, ge; 1228 unsigned long i; 1229 int ret; 1230 1231 trace_spt_change(spt->vgpu->id, "born", spt, 1232 spt->guest_page.gfn, spt->shadow_page.type); 1233 1234 for_each_present_guest_entry(spt, &ge, i) { 1235 if (gtt_type_is_pt(get_next_pt_type(ge.type))) { 1236 s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge); 1237 if (IS_ERR(s)) { 1238 ret = PTR_ERR(s); 1239 goto fail; 1240 } 1241 ppgtt_get_shadow_entry(spt, &se, i); 1242 ppgtt_generate_shadow_entry(&se, s, &ge); 1243 ppgtt_set_shadow_entry(spt, &se, i); 1244 } else { 1245 ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge); 1246 if (ret) 1247 goto fail; 1248 } 1249 } 1250 return 0; 1251 fail: 1252 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", 1253 spt, ge.val64, ge.type); 1254 return ret; 1255 } 1256 1257 static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt, 1258 struct intel_gvt_gtt_entry *se, unsigned long index) 1259 { 1260 struct intel_vgpu *vgpu = spt->vgpu; 1261 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1262 int ret; 1263 1264 trace_spt_guest_change(spt->vgpu->id, "remove", spt, 1265 spt->shadow_page.type, se->val64, index); 1266 1267 gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n", 1268 se->type, index, se->val64); 1269 1270 if (!ops->test_present(se)) 1271 return 0; 1272 1273 if (ops->get_pfn(se) == 1274 vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn) 1275 return 0; 1276 1277 if (gtt_type_is_pt(get_next_pt_type(se->type))) { 1278 struct intel_vgpu_ppgtt_spt *s = 1279 intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se)); 1280 if (!s) { 1281 gvt_vgpu_err("fail to find guest page\n"); 1282 ret = -ENXIO; 1283 goto fail; 1284 } 1285 ret = ppgtt_invalidate_spt(s); 1286 if (ret) 1287 goto fail; 1288 } else { 1289 /* We don't setup 64K shadow entry so far. */ 1290 WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY, 1291 "suspicious 64K entry\n"); 1292 ppgtt_invalidate_pte(spt, se); 1293 } 1294 1295 return 0; 1296 fail: 1297 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", 1298 spt, se->val64, se->type); 1299 return ret; 1300 } 1301 1302 static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt, 1303 struct intel_gvt_gtt_entry *we, unsigned long index) 1304 { 1305 struct intel_vgpu *vgpu = spt->vgpu; 1306 struct intel_gvt_gtt_entry m; 1307 struct intel_vgpu_ppgtt_spt *s; 1308 int ret; 1309 1310 trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type, 1311 we->val64, index); 1312 1313 gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n", 1314 we->type, index, we->val64); 1315 1316 if (gtt_type_is_pt(get_next_pt_type(we->type))) { 1317 s = ppgtt_populate_spt_by_guest_entry(vgpu, we); 1318 if (IS_ERR(s)) { 1319 ret = PTR_ERR(s); 1320 goto fail; 1321 } 1322 ppgtt_get_shadow_entry(spt, &m, index); 1323 ppgtt_generate_shadow_entry(&m, s, we); 1324 ppgtt_set_shadow_entry(spt, &m, index); 1325 } else { 1326 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we); 1327 if (ret) 1328 goto fail; 1329 } 1330 return 0; 1331 fail: 1332 gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n", 1333 spt, we->val64, we->type); 1334 return ret; 1335 } 1336 1337 static int sync_oos_page(struct intel_vgpu *vgpu, 1338 struct intel_vgpu_oos_page *oos_page) 1339 { 1340 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1341 struct intel_gvt *gvt = vgpu->gvt; 1342 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 1343 struct intel_vgpu_ppgtt_spt *spt = oos_page->spt; 1344 struct intel_gvt_gtt_entry old, new; 1345 int index; 1346 int ret; 1347 1348 trace_oos_change(vgpu->id, "sync", oos_page->id, 1349 spt, spt->guest_page.type); 1350 1351 old.type = new.type = get_entry_type(spt->guest_page.type); 1352 old.val64 = new.val64 = 0; 1353 1354 for (index = 0; index < (I915_GTT_PAGE_SIZE >> 1355 info->gtt_entry_size_shift); index++) { 1356 ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu); 1357 ops->get_entry(NULL, &new, index, true, 1358 spt->guest_page.gfn << PAGE_SHIFT, vgpu); 1359 1360 if (old.val64 == new.val64 1361 && !test_and_clear_bit(index, spt->post_shadow_bitmap)) 1362 continue; 1363 1364 trace_oos_sync(vgpu->id, oos_page->id, 1365 spt, spt->guest_page.type, 1366 new.val64, index); 1367 1368 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new); 1369 if (ret) 1370 return ret; 1371 1372 ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu); 1373 } 1374 1375 spt->guest_page.write_cnt = 0; 1376 list_del_init(&spt->post_shadow_list); 1377 return 0; 1378 } 1379 1380 static int detach_oos_page(struct intel_vgpu *vgpu, 1381 struct intel_vgpu_oos_page *oos_page) 1382 { 1383 struct intel_gvt *gvt = vgpu->gvt; 1384 struct intel_vgpu_ppgtt_spt *spt = oos_page->spt; 1385 1386 trace_oos_change(vgpu->id, "detach", oos_page->id, 1387 spt, spt->guest_page.type); 1388 1389 spt->guest_page.write_cnt = 0; 1390 spt->guest_page.oos_page = NULL; 1391 oos_page->spt = NULL; 1392 1393 list_del_init(&oos_page->vm_list); 1394 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head); 1395 1396 return 0; 1397 } 1398 1399 static int attach_oos_page(struct intel_vgpu_oos_page *oos_page, 1400 struct intel_vgpu_ppgtt_spt *spt) 1401 { 1402 struct intel_gvt *gvt = spt->vgpu->gvt; 1403 int ret; 1404 1405 ret = intel_gvt_read_gpa(spt->vgpu, 1406 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, 1407 oos_page->mem, I915_GTT_PAGE_SIZE); 1408 if (ret) 1409 return ret; 1410 1411 oos_page->spt = spt; 1412 spt->guest_page.oos_page = oos_page; 1413 1414 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head); 1415 1416 trace_oos_change(spt->vgpu->id, "attach", oos_page->id, 1417 spt, spt->guest_page.type); 1418 return 0; 1419 } 1420 1421 static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt) 1422 { 1423 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; 1424 int ret; 1425 1426 ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn); 1427 if (ret) 1428 return ret; 1429 1430 trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id, 1431 spt, spt->guest_page.type); 1432 1433 list_del_init(&oos_page->vm_list); 1434 return sync_oos_page(spt->vgpu, oos_page); 1435 } 1436 1437 static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt) 1438 { 1439 struct intel_gvt *gvt = spt->vgpu->gvt; 1440 struct intel_gvt_gtt *gtt = &gvt->gtt; 1441 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; 1442 int ret; 1443 1444 WARN(oos_page, "shadow PPGTT page has already has a oos page\n"); 1445 1446 if (list_empty(>t->oos_page_free_list_head)) { 1447 oos_page = container_of(gtt->oos_page_use_list_head.next, 1448 struct intel_vgpu_oos_page, list); 1449 ret = ppgtt_set_guest_page_sync(oos_page->spt); 1450 if (ret) 1451 return ret; 1452 ret = detach_oos_page(spt->vgpu, oos_page); 1453 if (ret) 1454 return ret; 1455 } else 1456 oos_page = container_of(gtt->oos_page_free_list_head.next, 1457 struct intel_vgpu_oos_page, list); 1458 return attach_oos_page(oos_page, spt); 1459 } 1460 1461 static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt) 1462 { 1463 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; 1464 1465 if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n")) 1466 return -EINVAL; 1467 1468 trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id, 1469 spt, spt->guest_page.type); 1470 1471 list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head); 1472 return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn); 1473 } 1474 1475 /** 1476 * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU 1477 * @vgpu: a vGPU 1478 * 1479 * This function is called before submitting a guest workload to host, 1480 * to sync all the out-of-synced shadow for vGPU 1481 * 1482 * Returns: 1483 * Zero on success, negative error code if failed. 1484 */ 1485 int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu) 1486 { 1487 struct list_head *pos, *n; 1488 struct intel_vgpu_oos_page *oos_page; 1489 int ret; 1490 1491 if (!enable_out_of_sync) 1492 return 0; 1493 1494 list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) { 1495 oos_page = container_of(pos, 1496 struct intel_vgpu_oos_page, vm_list); 1497 ret = ppgtt_set_guest_page_sync(oos_page->spt); 1498 if (ret) 1499 return ret; 1500 } 1501 return 0; 1502 } 1503 1504 /* 1505 * The heart of PPGTT shadow page table. 1506 */ 1507 static int ppgtt_handle_guest_write_page_table( 1508 struct intel_vgpu_ppgtt_spt *spt, 1509 struct intel_gvt_gtt_entry *we, unsigned long index) 1510 { 1511 struct intel_vgpu *vgpu = spt->vgpu; 1512 int type = spt->shadow_page.type; 1513 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1514 struct intel_gvt_gtt_entry old_se; 1515 int new_present; 1516 int i, ret; 1517 1518 new_present = ops->test_present(we); 1519 1520 /* 1521 * Adding the new entry first and then removing the old one, that can 1522 * guarantee the ppgtt table is validated during the window between 1523 * adding and removal. 1524 */ 1525 ppgtt_get_shadow_entry(spt, &old_se, index); 1526 1527 if (new_present) { 1528 ret = ppgtt_handle_guest_entry_add(spt, we, index); 1529 if (ret) 1530 goto fail; 1531 } 1532 1533 ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index); 1534 if (ret) 1535 goto fail; 1536 1537 if (!new_present) { 1538 /* For 64KB splited entries, we need clear them all. */ 1539 if (ops->test_64k_splited(&old_se) && 1540 !(index % GTT_64K_PTE_STRIDE)) { 1541 gvt_vdbg_mm("remove splited 64K shadow entries\n"); 1542 for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { 1543 ops->clear_64k_splited(&old_se); 1544 ops->set_pfn(&old_se, 1545 vgpu->gtt.scratch_pt[type].page_mfn); 1546 ppgtt_set_shadow_entry(spt, &old_se, index + i); 1547 } 1548 } else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY || 1549 old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { 1550 ops->clear_pse(&old_se); 1551 ops->set_pfn(&old_se, 1552 vgpu->gtt.scratch_pt[type].page_mfn); 1553 ppgtt_set_shadow_entry(spt, &old_se, index); 1554 } else { 1555 ops->set_pfn(&old_se, 1556 vgpu->gtt.scratch_pt[type].page_mfn); 1557 ppgtt_set_shadow_entry(spt, &old_se, index); 1558 } 1559 } 1560 1561 return 0; 1562 fail: 1563 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n", 1564 spt, we->val64, we->type); 1565 return ret; 1566 } 1567 1568 1569 1570 static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt) 1571 { 1572 return enable_out_of_sync 1573 && gtt_type_is_pte_pt(spt->guest_page.type) 1574 && spt->guest_page.write_cnt >= 2; 1575 } 1576 1577 static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt, 1578 unsigned long index) 1579 { 1580 set_bit(index, spt->post_shadow_bitmap); 1581 if (!list_empty(&spt->post_shadow_list)) 1582 return; 1583 1584 list_add_tail(&spt->post_shadow_list, 1585 &spt->vgpu->gtt.post_shadow_list_head); 1586 } 1587 1588 /** 1589 * intel_vgpu_flush_post_shadow - flush the post shadow transactions 1590 * @vgpu: a vGPU 1591 * 1592 * This function is called before submitting a guest workload to host, 1593 * to flush all the post shadows for a vGPU. 1594 * 1595 * Returns: 1596 * Zero on success, negative error code if failed. 1597 */ 1598 int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) 1599 { 1600 struct list_head *pos, *n; 1601 struct intel_vgpu_ppgtt_spt *spt; 1602 struct intel_gvt_gtt_entry ge; 1603 unsigned long index; 1604 int ret; 1605 1606 list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) { 1607 spt = container_of(pos, struct intel_vgpu_ppgtt_spt, 1608 post_shadow_list); 1609 1610 for_each_set_bit(index, spt->post_shadow_bitmap, 1611 GTT_ENTRY_NUM_IN_ONE_PAGE) { 1612 ppgtt_get_guest_entry(spt, &ge, index); 1613 1614 ret = ppgtt_handle_guest_write_page_table(spt, 1615 &ge, index); 1616 if (ret) 1617 return ret; 1618 clear_bit(index, spt->post_shadow_bitmap); 1619 } 1620 list_del_init(&spt->post_shadow_list); 1621 } 1622 return 0; 1623 } 1624 1625 static int ppgtt_handle_guest_write_page_table_bytes( 1626 struct intel_vgpu_ppgtt_spt *spt, 1627 u64 pa, void *p_data, int bytes) 1628 { 1629 struct intel_vgpu *vgpu = spt->vgpu; 1630 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1631 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1632 struct intel_gvt_gtt_entry we, se; 1633 unsigned long index; 1634 int ret; 1635 1636 index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift; 1637 1638 ppgtt_get_guest_entry(spt, &we, index); 1639 1640 /* 1641 * For page table which has 64K gtt entry, only PTE#0, PTE#16, 1642 * PTE#32, ... PTE#496 are used. Unused PTEs update should be 1643 * ignored. 1644 */ 1645 if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY && 1646 (index % GTT_64K_PTE_STRIDE)) { 1647 gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n", 1648 index); 1649 return 0; 1650 } 1651 1652 if (bytes == info->gtt_entry_size) { 1653 ret = ppgtt_handle_guest_write_page_table(spt, &we, index); 1654 if (ret) 1655 return ret; 1656 } else { 1657 if (!test_bit(index, spt->post_shadow_bitmap)) { 1658 int type = spt->shadow_page.type; 1659 1660 ppgtt_get_shadow_entry(spt, &se, index); 1661 ret = ppgtt_handle_guest_entry_removal(spt, &se, index); 1662 if (ret) 1663 return ret; 1664 ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn); 1665 ppgtt_set_shadow_entry(spt, &se, index); 1666 } 1667 ppgtt_set_post_shadow(spt, index); 1668 } 1669 1670 if (!enable_out_of_sync) 1671 return 0; 1672 1673 spt->guest_page.write_cnt++; 1674 1675 if (spt->guest_page.oos_page) 1676 ops->set_entry(spt->guest_page.oos_page->mem, &we, index, 1677 false, 0, vgpu); 1678 1679 if (can_do_out_of_sync(spt)) { 1680 if (!spt->guest_page.oos_page) 1681 ppgtt_allocate_oos_page(spt); 1682 1683 ret = ppgtt_set_guest_page_oos(spt); 1684 if (ret < 0) 1685 return ret; 1686 } 1687 return 0; 1688 } 1689 1690 static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm) 1691 { 1692 struct intel_vgpu *vgpu = mm->vgpu; 1693 struct intel_gvt *gvt = vgpu->gvt; 1694 struct intel_gvt_gtt *gtt = &gvt->gtt; 1695 const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; 1696 struct intel_gvt_gtt_entry se; 1697 int index; 1698 1699 if (!mm->ppgtt_mm.shadowed) 1700 return; 1701 1702 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) { 1703 ppgtt_get_shadow_root_entry(mm, &se, index); 1704 1705 if (!ops->test_present(&se)) 1706 continue; 1707 1708 ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se); 1709 se.val64 = 0; 1710 ppgtt_set_shadow_root_entry(mm, &se, index); 1711 1712 trace_spt_guest_change(vgpu->id, "destroy root pointer", 1713 NULL, se.type, se.val64, index); 1714 } 1715 1716 mm->ppgtt_mm.shadowed = false; 1717 } 1718 1719 1720 static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm) 1721 { 1722 struct intel_vgpu *vgpu = mm->vgpu; 1723 struct intel_gvt *gvt = vgpu->gvt; 1724 struct intel_gvt_gtt *gtt = &gvt->gtt; 1725 const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; 1726 struct intel_vgpu_ppgtt_spt *spt; 1727 struct intel_gvt_gtt_entry ge, se; 1728 int index, ret; 1729 1730 if (mm->ppgtt_mm.shadowed) 1731 return 0; 1732 1733 if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) 1734 return -EINVAL; 1735 1736 mm->ppgtt_mm.shadowed = true; 1737 1738 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) { 1739 ppgtt_get_guest_root_entry(mm, &ge, index); 1740 1741 if (!ops->test_present(&ge)) 1742 continue; 1743 1744 trace_spt_guest_change(vgpu->id, __func__, NULL, 1745 ge.type, ge.val64, index); 1746 1747 spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge); 1748 if (IS_ERR(spt)) { 1749 gvt_vgpu_err("fail to populate guest root pointer\n"); 1750 ret = PTR_ERR(spt); 1751 goto fail; 1752 } 1753 ppgtt_generate_shadow_entry(&se, spt, &ge); 1754 ppgtt_set_shadow_root_entry(mm, &se, index); 1755 1756 trace_spt_guest_change(vgpu->id, "populate root pointer", 1757 NULL, se.type, se.val64, index); 1758 } 1759 1760 return 0; 1761 fail: 1762 invalidate_ppgtt_mm(mm); 1763 return ret; 1764 } 1765 1766 static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu) 1767 { 1768 struct intel_vgpu_mm *mm; 1769 1770 mm = kzalloc(sizeof(*mm), GFP_KERNEL); 1771 if (!mm) 1772 return NULL; 1773 1774 mm->vgpu = vgpu; 1775 kref_init(&mm->ref); 1776 atomic_set(&mm->pincount, 0); 1777 1778 return mm; 1779 } 1780 1781 static void vgpu_free_mm(struct intel_vgpu_mm *mm) 1782 { 1783 kfree(mm); 1784 } 1785 1786 /** 1787 * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU 1788 * @vgpu: a vGPU 1789 * @root_entry_type: ppgtt root entry type 1790 * @pdps: guest pdps. 1791 * 1792 * This function is used to create a ppgtt mm object for a vGPU. 1793 * 1794 * Returns: 1795 * Zero on success, negative error code in pointer if failed. 1796 */ 1797 struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, 1798 enum intel_gvt_gtt_type root_entry_type, u64 pdps[]) 1799 { 1800 struct intel_gvt *gvt = vgpu->gvt; 1801 struct intel_vgpu_mm *mm; 1802 int ret; 1803 1804 mm = vgpu_alloc_mm(vgpu); 1805 if (!mm) 1806 return ERR_PTR(-ENOMEM); 1807 1808 mm->type = INTEL_GVT_MM_PPGTT; 1809 1810 GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY && 1811 root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY); 1812 mm->ppgtt_mm.root_entry_type = root_entry_type; 1813 1814 INIT_LIST_HEAD(&mm->ppgtt_mm.list); 1815 INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list); 1816 INIT_LIST_HEAD(&mm->ppgtt_mm.link); 1817 1818 if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) 1819 mm->ppgtt_mm.guest_pdps[0] = pdps[0]; 1820 else 1821 memcpy(mm->ppgtt_mm.guest_pdps, pdps, 1822 sizeof(mm->ppgtt_mm.guest_pdps)); 1823 1824 ret = shadow_ppgtt_mm(mm); 1825 if (ret) { 1826 gvt_vgpu_err("failed to shadow ppgtt mm\n"); 1827 vgpu_free_mm(mm); 1828 return ERR_PTR(ret); 1829 } 1830 1831 list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head); 1832 1833 mutex_lock(&gvt->gtt.ppgtt_mm_lock); 1834 list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head); 1835 mutex_unlock(&gvt->gtt.ppgtt_mm_lock); 1836 1837 return mm; 1838 } 1839 1840 static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu) 1841 { 1842 struct intel_vgpu_mm *mm; 1843 unsigned long nr_entries; 1844 1845 mm = vgpu_alloc_mm(vgpu); 1846 if (!mm) 1847 return ERR_PTR(-ENOMEM); 1848 1849 mm->type = INTEL_GVT_MM_GGTT; 1850 1851 nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT; 1852 mm->ggtt_mm.virtual_ggtt = 1853 vzalloc(array_size(nr_entries, 1854 vgpu->gvt->device_info.gtt_entry_size)); 1855 if (!mm->ggtt_mm.virtual_ggtt) { 1856 vgpu_free_mm(mm); 1857 return ERR_PTR(-ENOMEM); 1858 } 1859 1860 mm->ggtt_mm.host_ggtt_aperture = vzalloc((vgpu_aperture_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64)); 1861 if (!mm->ggtt_mm.host_ggtt_aperture) { 1862 vfree(mm->ggtt_mm.virtual_ggtt); 1863 vgpu_free_mm(mm); 1864 return ERR_PTR(-ENOMEM); 1865 } 1866 1867 mm->ggtt_mm.host_ggtt_hidden = vzalloc((vgpu_hidden_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64)); 1868 if (!mm->ggtt_mm.host_ggtt_hidden) { 1869 vfree(mm->ggtt_mm.host_ggtt_aperture); 1870 vfree(mm->ggtt_mm.virtual_ggtt); 1871 vgpu_free_mm(mm); 1872 return ERR_PTR(-ENOMEM); 1873 } 1874 1875 return mm; 1876 } 1877 1878 /** 1879 * _intel_vgpu_mm_release - destroy a mm object 1880 * @mm_ref: a kref object 1881 * 1882 * This function is used to destroy a mm object for vGPU 1883 * 1884 */ 1885 void _intel_vgpu_mm_release(struct kref *mm_ref) 1886 { 1887 struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref); 1888 1889 if (GEM_WARN_ON(atomic_read(&mm->pincount))) 1890 gvt_err("vgpu mm pin count bug detected\n"); 1891 1892 if (mm->type == INTEL_GVT_MM_PPGTT) { 1893 list_del(&mm->ppgtt_mm.list); 1894 1895 mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); 1896 list_del(&mm->ppgtt_mm.lru_list); 1897 mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); 1898 1899 invalidate_ppgtt_mm(mm); 1900 } else { 1901 vfree(mm->ggtt_mm.virtual_ggtt); 1902 vfree(mm->ggtt_mm.host_ggtt_aperture); 1903 vfree(mm->ggtt_mm.host_ggtt_hidden); 1904 } 1905 1906 vgpu_free_mm(mm); 1907 } 1908 1909 /** 1910 * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object 1911 * @mm: a vGPU mm object 1912 * 1913 * This function is called when user doesn't want to use a vGPU mm object 1914 */ 1915 void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm) 1916 { 1917 atomic_dec_if_positive(&mm->pincount); 1918 } 1919 1920 /** 1921 * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object 1922 * @mm: target vgpu mm 1923 * 1924 * This function is called when user wants to use a vGPU mm object. If this 1925 * mm object hasn't been shadowed yet, the shadow will be populated at this 1926 * time. 1927 * 1928 * Returns: 1929 * Zero on success, negative error code if failed. 1930 */ 1931 int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm) 1932 { 1933 int ret; 1934 1935 atomic_inc(&mm->pincount); 1936 1937 if (mm->type == INTEL_GVT_MM_PPGTT) { 1938 ret = shadow_ppgtt_mm(mm); 1939 if (ret) 1940 return ret; 1941 1942 mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); 1943 list_move_tail(&mm->ppgtt_mm.lru_list, 1944 &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head); 1945 mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); 1946 } 1947 1948 return 0; 1949 } 1950 1951 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt) 1952 { 1953 struct intel_vgpu_mm *mm; 1954 struct list_head *pos, *n; 1955 1956 mutex_lock(&gvt->gtt.ppgtt_mm_lock); 1957 1958 list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) { 1959 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list); 1960 1961 if (atomic_read(&mm->pincount)) 1962 continue; 1963 1964 list_del_init(&mm->ppgtt_mm.lru_list); 1965 mutex_unlock(&gvt->gtt.ppgtt_mm_lock); 1966 invalidate_ppgtt_mm(mm); 1967 return 1; 1968 } 1969 mutex_unlock(&gvt->gtt.ppgtt_mm_lock); 1970 return 0; 1971 } 1972 1973 /* 1974 * GMA translation APIs. 1975 */ 1976 static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm, 1977 struct intel_gvt_gtt_entry *e, unsigned long index, bool guest) 1978 { 1979 struct intel_vgpu *vgpu = mm->vgpu; 1980 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1981 struct intel_vgpu_ppgtt_spt *s; 1982 1983 s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e)); 1984 if (!s) 1985 return -ENXIO; 1986 1987 if (!guest) 1988 ppgtt_get_shadow_entry(s, e, index); 1989 else 1990 ppgtt_get_guest_entry(s, e, index); 1991 return 0; 1992 } 1993 1994 /** 1995 * intel_vgpu_gma_to_gpa - translate a gma to GPA 1996 * @mm: mm object. could be a PPGTT or GGTT mm object 1997 * @gma: graphics memory address in this mm object 1998 * 1999 * This function is used to translate a graphics memory address in specific 2000 * graphics memory space to guest physical address. 2001 * 2002 * Returns: 2003 * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed. 2004 */ 2005 unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) 2006 { 2007 struct intel_vgpu *vgpu = mm->vgpu; 2008 struct intel_gvt *gvt = vgpu->gvt; 2009 const struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops; 2010 const struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops; 2011 unsigned long gpa = INTEL_GVT_INVALID_ADDR; 2012 unsigned long gma_index[4]; 2013 struct intel_gvt_gtt_entry e; 2014 int i, levels = 0; 2015 int ret; 2016 2017 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT && 2018 mm->type != INTEL_GVT_MM_PPGTT); 2019 2020 if (mm->type == INTEL_GVT_MM_GGTT) { 2021 if (!vgpu_gmadr_is_valid(vgpu, gma)) 2022 goto err; 2023 2024 ggtt_get_guest_entry(mm, &e, 2025 gma_ops->gma_to_ggtt_pte_index(gma)); 2026 2027 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) 2028 + (gma & ~I915_GTT_PAGE_MASK); 2029 2030 trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa); 2031 } else { 2032 switch (mm->ppgtt_mm.root_entry_type) { 2033 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY: 2034 ppgtt_get_shadow_root_entry(mm, &e, 0); 2035 2036 gma_index[0] = gma_ops->gma_to_pml4_index(gma); 2037 gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma); 2038 gma_index[2] = gma_ops->gma_to_pde_index(gma); 2039 gma_index[3] = gma_ops->gma_to_pte_index(gma); 2040 levels = 4; 2041 break; 2042 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY: 2043 ppgtt_get_shadow_root_entry(mm, &e, 2044 gma_ops->gma_to_l3_pdp_index(gma)); 2045 2046 gma_index[0] = gma_ops->gma_to_pde_index(gma); 2047 gma_index[1] = gma_ops->gma_to_pte_index(gma); 2048 levels = 2; 2049 break; 2050 default: 2051 GEM_BUG_ON(1); 2052 } 2053 2054 /* walk the shadow page table and get gpa from guest entry */ 2055 for (i = 0; i < levels; i++) { 2056 ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i], 2057 (i == levels - 1)); 2058 if (ret) 2059 goto err; 2060 2061 if (!pte_ops->test_present(&e)) { 2062 gvt_dbg_core("GMA 0x%lx is not present\n", gma); 2063 goto err; 2064 } 2065 } 2066 2067 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) + 2068 (gma & ~I915_GTT_PAGE_MASK); 2069 trace_gma_translate(vgpu->id, "ppgtt", 0, 2070 mm->ppgtt_mm.root_entry_type, gma, gpa); 2071 } 2072 2073 return gpa; 2074 err: 2075 gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma); 2076 return INTEL_GVT_INVALID_ADDR; 2077 } 2078 2079 static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, 2080 unsigned int off, void *p_data, unsigned int bytes) 2081 { 2082 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; 2083 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 2084 unsigned long index = off >> info->gtt_entry_size_shift; 2085 unsigned long gma; 2086 struct intel_gvt_gtt_entry e; 2087 2088 if (bytes != 4 && bytes != 8) 2089 return -EINVAL; 2090 2091 gma = index << I915_GTT_PAGE_SHIFT; 2092 if (!intel_gvt_ggtt_validate_range(vgpu, 2093 gma, 1 << I915_GTT_PAGE_SHIFT)) { 2094 gvt_dbg_mm("read invalid ggtt at 0x%lx\n", gma); 2095 memset(p_data, 0, bytes); 2096 return 0; 2097 } 2098 2099 ggtt_get_guest_entry(ggtt_mm, &e, index); 2100 memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)), 2101 bytes); 2102 return 0; 2103 } 2104 2105 /** 2106 * intel_vgpu_emulate_ggtt_mmio_read - emulate GTT MMIO register read 2107 * @vgpu: a vGPU 2108 * @off: register offset 2109 * @p_data: data will be returned to guest 2110 * @bytes: data length 2111 * 2112 * This function is used to emulate the GTT MMIO register read 2113 * 2114 * Returns: 2115 * Zero on success, error code if failed. 2116 */ 2117 int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, 2118 void *p_data, unsigned int bytes) 2119 { 2120 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 2121 int ret; 2122 2123 if (bytes != 4 && bytes != 8) 2124 return -EINVAL; 2125 2126 off -= info->gtt_start_offset; 2127 ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes); 2128 return ret; 2129 } 2130 2131 static void ggtt_invalidate_pte(struct intel_vgpu *vgpu, 2132 struct intel_gvt_gtt_entry *entry) 2133 { 2134 const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; 2135 unsigned long pfn; 2136 2137 pfn = pte_ops->get_pfn(entry); 2138 if (pfn != vgpu->gvt->gtt.scratch_mfn) 2139 intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT); 2140 } 2141 2142 static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, 2143 void *p_data, unsigned int bytes) 2144 { 2145 struct intel_gvt *gvt = vgpu->gvt; 2146 const struct intel_gvt_device_info *info = &gvt->device_info; 2147 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; 2148 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 2149 unsigned long g_gtt_index = off >> info->gtt_entry_size_shift; 2150 unsigned long gma, gfn; 2151 struct intel_gvt_gtt_entry e = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE}; 2152 struct intel_gvt_gtt_entry m = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE}; 2153 dma_addr_t dma_addr; 2154 int ret; 2155 struct intel_gvt_partial_pte *partial_pte, *pos, *n; 2156 bool partial_update = false; 2157 2158 if (bytes != 4 && bytes != 8) 2159 return -EINVAL; 2160 2161 gma = g_gtt_index << I915_GTT_PAGE_SHIFT; 2162 2163 /* the VM may configure the whole GM space when ballooning is used */ 2164 if (!vgpu_gmadr_is_valid(vgpu, gma)) 2165 return 0; 2166 2167 e.type = GTT_TYPE_GGTT_PTE; 2168 memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data, 2169 bytes); 2170 2171 /* If ggtt entry size is 8 bytes, and it's split into two 4 bytes 2172 * write, save the first 4 bytes in a list and update virtual 2173 * PTE. Only update shadow PTE when the second 4 bytes comes. 2174 */ 2175 if (bytes < info->gtt_entry_size) { 2176 bool found = false; 2177 2178 list_for_each_entry_safe(pos, n, 2179 &ggtt_mm->ggtt_mm.partial_pte_list, list) { 2180 if (g_gtt_index == pos->offset >> 2181 info->gtt_entry_size_shift) { 2182 if (off != pos->offset) { 2183 /* the second partial part*/ 2184 int last_off = pos->offset & 2185 (info->gtt_entry_size - 1); 2186 2187 memcpy((void *)&e.val64 + last_off, 2188 (void *)&pos->data + last_off, 2189 bytes); 2190 2191 list_del(&pos->list); 2192 kfree(pos); 2193 found = true; 2194 break; 2195 } 2196 2197 /* update of the first partial part */ 2198 pos->data = e.val64; 2199 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); 2200 return 0; 2201 } 2202 } 2203 2204 if (!found) { 2205 /* the first partial part */ 2206 partial_pte = kzalloc(sizeof(*partial_pte), GFP_KERNEL); 2207 if (!partial_pte) 2208 return -ENOMEM; 2209 partial_pte->offset = off; 2210 partial_pte->data = e.val64; 2211 list_add_tail(&partial_pte->list, 2212 &ggtt_mm->ggtt_mm.partial_pte_list); 2213 partial_update = true; 2214 } 2215 } 2216 2217 if (!partial_update && (ops->test_present(&e))) { 2218 gfn = ops->get_pfn(&e); 2219 m.val64 = e.val64; 2220 m.type = e.type; 2221 2222 ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, 2223 &dma_addr); 2224 if (ret) { 2225 gvt_vgpu_err("fail to populate guest ggtt entry\n"); 2226 /* guest driver may read/write the entry when partial 2227 * update the entry in this situation p2m will fail 2228 * setting the shadow entry to point to a scratch page 2229 */ 2230 ops->set_pfn(&m, gvt->gtt.scratch_mfn); 2231 } else 2232 ops->set_pfn(&m, dma_addr >> PAGE_SHIFT); 2233 } else { 2234 ops->set_pfn(&m, gvt->gtt.scratch_mfn); 2235 ops->clear_present(&m); 2236 } 2237 2238 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); 2239 2240 ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index); 2241 ggtt_invalidate_pte(vgpu, &e); 2242 2243 ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index); 2244 ggtt_invalidate(gvt->gt); 2245 return 0; 2246 } 2247 2248 /* 2249 * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write 2250 * @vgpu: a vGPU 2251 * @off: register offset 2252 * @p_data: data from guest write 2253 * @bytes: data length 2254 * 2255 * This function is used to emulate the GTT MMIO register write 2256 * 2257 * Returns: 2258 * Zero on success, error code if failed. 2259 */ 2260 int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, 2261 unsigned int off, void *p_data, unsigned int bytes) 2262 { 2263 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 2264 int ret; 2265 struct intel_vgpu_submission *s = &vgpu->submission; 2266 struct intel_engine_cs *engine; 2267 int i; 2268 2269 if (bytes != 4 && bytes != 8) 2270 return -EINVAL; 2271 2272 off -= info->gtt_start_offset; 2273 ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes); 2274 2275 /* if ggtt of last submitted context is written, 2276 * that context is probably got unpinned. 2277 * Set last shadowed ctx to invalid. 2278 */ 2279 for_each_engine(engine, vgpu->gvt->gt, i) { 2280 if (!s->last_ctx[i].valid) 2281 continue; 2282 2283 if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift)) 2284 s->last_ctx[i].valid = false; 2285 } 2286 return ret; 2287 } 2288 2289 static int alloc_scratch_pages(struct intel_vgpu *vgpu, 2290 enum intel_gvt_gtt_type type) 2291 { 2292 struct drm_i915_private *i915 = vgpu->gvt->gt->i915; 2293 struct intel_vgpu_gtt *gtt = &vgpu->gtt; 2294 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 2295 int page_entry_num = I915_GTT_PAGE_SIZE >> 2296 vgpu->gvt->device_info.gtt_entry_size_shift; 2297 void *scratch_pt; 2298 int i; 2299 struct device *dev = vgpu->gvt->gt->i915->drm.dev; 2300 dma_addr_t daddr; 2301 2302 if (drm_WARN_ON(&i915->drm, 2303 type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX)) 2304 return -EINVAL; 2305 2306 scratch_pt = (void *)get_zeroed_page(GFP_KERNEL); 2307 if (!scratch_pt) { 2308 gvt_vgpu_err("fail to allocate scratch page\n"); 2309 return -ENOMEM; 2310 } 2311 2312 daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, 4096, DMA_BIDIRECTIONAL); 2313 if (dma_mapping_error(dev, daddr)) { 2314 gvt_vgpu_err("fail to dmamap scratch_pt\n"); 2315 __free_page(virt_to_page(scratch_pt)); 2316 return -ENOMEM; 2317 } 2318 gtt->scratch_pt[type].page_mfn = 2319 (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); 2320 gtt->scratch_pt[type].page = virt_to_page(scratch_pt); 2321 gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n", 2322 vgpu->id, type, gtt->scratch_pt[type].page_mfn); 2323 2324 /* Build the tree by full filled the scratch pt with the entries which 2325 * point to the next level scratch pt or scratch page. The 2326 * scratch_pt[type] indicate the scratch pt/scratch page used by the 2327 * 'type' pt. 2328 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by 2329 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self 2330 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn. 2331 */ 2332 if (type > GTT_TYPE_PPGTT_PTE_PT) { 2333 struct intel_gvt_gtt_entry se; 2334 2335 memset(&se, 0, sizeof(struct intel_gvt_gtt_entry)); 2336 se.type = get_entry_type(type - 1); 2337 ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn); 2338 2339 /* The entry parameters like present/writeable/cache type 2340 * set to the same as i915's scratch page tree. 2341 */ 2342 se.val64 |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW; 2343 if (type == GTT_TYPE_PPGTT_PDE_PT) 2344 se.val64 |= PPAT_CACHED; 2345 2346 for (i = 0; i < page_entry_num; i++) 2347 ops->set_entry(scratch_pt, &se, i, false, 0, vgpu); 2348 } 2349 2350 return 0; 2351 } 2352 2353 static int release_scratch_page_tree(struct intel_vgpu *vgpu) 2354 { 2355 int i; 2356 struct device *dev = vgpu->gvt->gt->i915->drm.dev; 2357 dma_addr_t daddr; 2358 2359 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { 2360 if (vgpu->gtt.scratch_pt[i].page != NULL) { 2361 daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn << 2362 I915_GTT_PAGE_SHIFT); 2363 dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL); 2364 __free_page(vgpu->gtt.scratch_pt[i].page); 2365 vgpu->gtt.scratch_pt[i].page = NULL; 2366 vgpu->gtt.scratch_pt[i].page_mfn = 0; 2367 } 2368 } 2369 2370 return 0; 2371 } 2372 2373 static int create_scratch_page_tree(struct intel_vgpu *vgpu) 2374 { 2375 int i, ret; 2376 2377 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { 2378 ret = alloc_scratch_pages(vgpu, i); 2379 if (ret) 2380 goto err; 2381 } 2382 2383 return 0; 2384 2385 err: 2386 release_scratch_page_tree(vgpu); 2387 return ret; 2388 } 2389 2390 /** 2391 * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization 2392 * @vgpu: a vGPU 2393 * 2394 * This function is used to initialize per-vGPU graphics memory virtualization 2395 * components. 2396 * 2397 * Returns: 2398 * Zero on success, error code if failed. 2399 */ 2400 int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) 2401 { 2402 struct intel_vgpu_gtt *gtt = &vgpu->gtt; 2403 2404 INIT_RADIX_TREE(>t->spt_tree, GFP_KERNEL); 2405 2406 INIT_LIST_HEAD(>t->ppgtt_mm_list_head); 2407 INIT_LIST_HEAD(>t->oos_page_list_head); 2408 INIT_LIST_HEAD(>t->post_shadow_list_head); 2409 2410 gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu); 2411 if (IS_ERR(gtt->ggtt_mm)) { 2412 gvt_vgpu_err("fail to create mm for ggtt.\n"); 2413 return PTR_ERR(gtt->ggtt_mm); 2414 } 2415 2416 intel_vgpu_reset_ggtt(vgpu, false); 2417 2418 INIT_LIST_HEAD(>t->ggtt_mm->ggtt_mm.partial_pte_list); 2419 2420 return create_scratch_page_tree(vgpu); 2421 } 2422 2423 void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) 2424 { 2425 struct list_head *pos, *n; 2426 struct intel_vgpu_mm *mm; 2427 2428 list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { 2429 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); 2430 intel_vgpu_destroy_mm(mm); 2431 } 2432 2433 if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head))) 2434 gvt_err("vgpu ppgtt mm is not fully destroyed\n"); 2435 2436 if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) { 2437 gvt_err("Why we still has spt not freed?\n"); 2438 ppgtt_free_all_spt(vgpu); 2439 } 2440 } 2441 2442 static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu) 2443 { 2444 struct intel_gvt_partial_pte *pos, *next; 2445 2446 list_for_each_entry_safe(pos, next, 2447 &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list, 2448 list) { 2449 gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n", 2450 pos->offset, pos->data); 2451 kfree(pos); 2452 } 2453 intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm); 2454 vgpu->gtt.ggtt_mm = NULL; 2455 } 2456 2457 /** 2458 * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization 2459 * @vgpu: a vGPU 2460 * 2461 * This function is used to clean up per-vGPU graphics memory virtualization 2462 * components. 2463 * 2464 * Returns: 2465 * Zero on success, error code if failed. 2466 */ 2467 void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu) 2468 { 2469 intel_vgpu_destroy_all_ppgtt_mm(vgpu); 2470 intel_vgpu_destroy_ggtt_mm(vgpu); 2471 release_scratch_page_tree(vgpu); 2472 } 2473 2474 static void clean_spt_oos(struct intel_gvt *gvt) 2475 { 2476 struct intel_gvt_gtt *gtt = &gvt->gtt; 2477 struct list_head *pos, *n; 2478 struct intel_vgpu_oos_page *oos_page; 2479 2480 WARN(!list_empty(>t->oos_page_use_list_head), 2481 "someone is still using oos page\n"); 2482 2483 list_for_each_safe(pos, n, >t->oos_page_free_list_head) { 2484 oos_page = container_of(pos, struct intel_vgpu_oos_page, list); 2485 list_del(&oos_page->list); 2486 free_page((unsigned long)oos_page->mem); 2487 kfree(oos_page); 2488 } 2489 } 2490 2491 static int setup_spt_oos(struct intel_gvt *gvt) 2492 { 2493 struct intel_gvt_gtt *gtt = &gvt->gtt; 2494 struct intel_vgpu_oos_page *oos_page; 2495 int i; 2496 int ret; 2497 2498 INIT_LIST_HEAD(>t->oos_page_free_list_head); 2499 INIT_LIST_HEAD(>t->oos_page_use_list_head); 2500 2501 for (i = 0; i < preallocated_oos_pages; i++) { 2502 oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL); 2503 if (!oos_page) { 2504 ret = -ENOMEM; 2505 goto fail; 2506 } 2507 oos_page->mem = (void *)__get_free_pages(GFP_KERNEL, 0); 2508 if (!oos_page->mem) { 2509 ret = -ENOMEM; 2510 kfree(oos_page); 2511 goto fail; 2512 } 2513 2514 INIT_LIST_HEAD(&oos_page->list); 2515 INIT_LIST_HEAD(&oos_page->vm_list); 2516 oos_page->id = i; 2517 list_add_tail(&oos_page->list, >t->oos_page_free_list_head); 2518 } 2519 2520 gvt_dbg_mm("%d oos pages preallocated\n", i); 2521 2522 return 0; 2523 fail: 2524 clean_spt_oos(gvt); 2525 return ret; 2526 } 2527 2528 /** 2529 * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object 2530 * @vgpu: a vGPU 2531 * @pdps: pdp root array 2532 * 2533 * This function is used to find a PPGTT mm object from mm object pool 2534 * 2535 * Returns: 2536 * pointer to mm object on success, NULL if failed. 2537 */ 2538 struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, 2539 u64 pdps[]) 2540 { 2541 struct intel_vgpu_mm *mm; 2542 struct list_head *pos; 2543 2544 list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) { 2545 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); 2546 2547 switch (mm->ppgtt_mm.root_entry_type) { 2548 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY: 2549 if (pdps[0] == mm->ppgtt_mm.guest_pdps[0]) 2550 return mm; 2551 break; 2552 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY: 2553 if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps, 2554 sizeof(mm->ppgtt_mm.guest_pdps))) 2555 return mm; 2556 break; 2557 default: 2558 GEM_BUG_ON(1); 2559 } 2560 } 2561 return NULL; 2562 } 2563 2564 /** 2565 * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object. 2566 * @vgpu: a vGPU 2567 * @root_entry_type: ppgtt root entry type 2568 * @pdps: guest pdps 2569 * 2570 * This function is used to find or create a PPGTT mm object from a guest. 2571 * 2572 * Returns: 2573 * Zero on success, negative error code if failed. 2574 */ 2575 struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu, 2576 enum intel_gvt_gtt_type root_entry_type, u64 pdps[]) 2577 { 2578 struct intel_vgpu_mm *mm; 2579 2580 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); 2581 if (mm) { 2582 intel_vgpu_mm_get(mm); 2583 } else { 2584 mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps); 2585 if (IS_ERR(mm)) 2586 gvt_vgpu_err("fail to create mm\n"); 2587 } 2588 return mm; 2589 } 2590 2591 /** 2592 * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object. 2593 * @vgpu: a vGPU 2594 * @pdps: guest pdps 2595 * 2596 * This function is used to find a PPGTT mm object from a guest and destroy it. 2597 * 2598 * Returns: 2599 * Zero on success, negative error code if failed. 2600 */ 2601 int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]) 2602 { 2603 struct intel_vgpu_mm *mm; 2604 2605 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); 2606 if (!mm) { 2607 gvt_vgpu_err("fail to find ppgtt instance.\n"); 2608 return -EINVAL; 2609 } 2610 intel_vgpu_mm_put(mm); 2611 return 0; 2612 } 2613 2614 /** 2615 * intel_gvt_init_gtt - initialize mm components of a GVT device 2616 * @gvt: GVT device 2617 * 2618 * This function is called at the initialization stage, to initialize 2619 * the mm components of a GVT device. 2620 * 2621 * Returns: 2622 * zero on success, negative error code if failed. 2623 */ 2624 int intel_gvt_init_gtt(struct intel_gvt *gvt) 2625 { 2626 int ret; 2627 void *page; 2628 struct device *dev = gvt->gt->i915->drm.dev; 2629 dma_addr_t daddr; 2630 2631 gvt_dbg_core("init gtt\n"); 2632 2633 gvt->gtt.pte_ops = &gen8_gtt_pte_ops; 2634 gvt->gtt.gma_ops = &gen8_gtt_gma_ops; 2635 2636 page = (void *)get_zeroed_page(GFP_KERNEL); 2637 if (!page) { 2638 gvt_err("fail to allocate scratch ggtt page\n"); 2639 return -ENOMEM; 2640 } 2641 2642 daddr = dma_map_page(dev, virt_to_page(page), 0, 2643 4096, DMA_BIDIRECTIONAL); 2644 if (dma_mapping_error(dev, daddr)) { 2645 gvt_err("fail to dmamap scratch ggtt page\n"); 2646 __free_page(virt_to_page(page)); 2647 return -ENOMEM; 2648 } 2649 2650 gvt->gtt.scratch_page = virt_to_page(page); 2651 gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); 2652 2653 if (enable_out_of_sync) { 2654 ret = setup_spt_oos(gvt); 2655 if (ret) { 2656 gvt_err("fail to initialize SPT oos\n"); 2657 dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL); 2658 __free_page(gvt->gtt.scratch_page); 2659 return ret; 2660 } 2661 } 2662 INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head); 2663 mutex_init(&gvt->gtt.ppgtt_mm_lock); 2664 return 0; 2665 } 2666 2667 /** 2668 * intel_gvt_clean_gtt - clean up mm components of a GVT device 2669 * @gvt: GVT device 2670 * 2671 * This function is called at the driver unloading stage, to clean up 2672 * the mm components of a GVT device. 2673 * 2674 */ 2675 void intel_gvt_clean_gtt(struct intel_gvt *gvt) 2676 { 2677 struct device *dev = gvt->gt->i915->drm.dev; 2678 dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn << 2679 I915_GTT_PAGE_SHIFT); 2680 2681 dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL); 2682 2683 __free_page(gvt->gtt.scratch_page); 2684 2685 if (enable_out_of_sync) 2686 clean_spt_oos(gvt); 2687 } 2688 2689 /** 2690 * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances 2691 * @vgpu: a vGPU 2692 * 2693 * This function is called when invalidate all PPGTT instances of a vGPU. 2694 * 2695 */ 2696 void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu) 2697 { 2698 struct list_head *pos, *n; 2699 struct intel_vgpu_mm *mm; 2700 2701 list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { 2702 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); 2703 if (mm->type == INTEL_GVT_MM_PPGTT) { 2704 mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock); 2705 list_del_init(&mm->ppgtt_mm.lru_list); 2706 mutex_unlock(&vgpu->gvt->gtt.ppgtt_mm_lock); 2707 if (mm->ppgtt_mm.shadowed) 2708 invalidate_ppgtt_mm(mm); 2709 } 2710 } 2711 } 2712 2713 /** 2714 * intel_vgpu_reset_ggtt - reset the GGTT entry 2715 * @vgpu: a vGPU 2716 * @invalidate_old: invalidate old entries 2717 * 2718 * This function is called at the vGPU create stage 2719 * to reset all the GGTT entries. 2720 * 2721 */ 2722 void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old) 2723 { 2724 struct intel_gvt *gvt = vgpu->gvt; 2725 const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; 2726 struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE}; 2727 struct intel_gvt_gtt_entry old_entry; 2728 u32 index; 2729 u32 num_entries; 2730 2731 pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn); 2732 pte_ops->set_present(&entry); 2733 2734 index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; 2735 num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT; 2736 while (num_entries--) { 2737 if (invalidate_old) { 2738 ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index); 2739 ggtt_invalidate_pte(vgpu, &old_entry); 2740 } 2741 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); 2742 } 2743 2744 index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; 2745 num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; 2746 while (num_entries--) { 2747 if (invalidate_old) { 2748 ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index); 2749 ggtt_invalidate_pte(vgpu, &old_entry); 2750 } 2751 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); 2752 } 2753 2754 ggtt_invalidate(gvt->gt); 2755 } 2756 2757 /** 2758 * intel_gvt_restore_ggtt - restore all vGPU's ggtt entries 2759 * @gvt: intel gvt device 2760 * 2761 * This function is called at driver resume stage to restore 2762 * GGTT entries of every vGPU. 2763 * 2764 */ 2765 void intel_gvt_restore_ggtt(struct intel_gvt *gvt) 2766 { 2767 struct intel_vgpu *vgpu; 2768 struct intel_vgpu_mm *mm; 2769 int id; 2770 gen8_pte_t pte; 2771 u32 idx, num_low, num_hi, offset; 2772 2773 /* Restore dirty host ggtt for all vGPUs */ 2774 idr_for_each_entry(&(gvt)->vgpu_idr, vgpu, id) { 2775 mm = vgpu->gtt.ggtt_mm; 2776 2777 num_low = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT; 2778 offset = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; 2779 for (idx = 0; idx < num_low; idx++) { 2780 pte = mm->ggtt_mm.host_ggtt_aperture[idx]; 2781 if (pte & GEN8_PAGE_PRESENT) 2782 write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte); 2783 } 2784 2785 num_hi = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; 2786 offset = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; 2787 for (idx = 0; idx < num_hi; idx++) { 2788 pte = mm->ggtt_mm.host_ggtt_hidden[idx]; 2789 if (pte & GEN8_PAGE_PRESENT) 2790 write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte); 2791 } 2792 } 2793 } 2794