1 /* 2 * GTT virtualization 3 * 4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Zhi Wang <zhi.a.wang@intel.com> 27 * Zhenyu Wang <zhenyuw@linux.intel.com> 28 * Xiao Zheng <xiao.zheng@intel.com> 29 * 30 * Contributors: 31 * Min He <min.he@intel.com> 32 * Bing Niu <bing.niu@intel.com> 33 * 34 */ 35 36 #include "i915_drv.h" 37 #include "gvt.h" 38 #include "i915_pvinfo.h" 39 #include "trace.h" 40 41 #include "gt/intel_gt_regs.h" 42 #include <linux/vmalloc.h> 43 44 #if defined(VERBOSE_DEBUG) 45 #define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args) 46 #else 47 #define gvt_vdbg_mm(fmt, args...) 48 #endif 49 50 static bool enable_out_of_sync = false; 51 static int preallocated_oos_pages = 8192; 52 53 /* 54 * validate a gm address and related range size, 55 * translate it to host gm address 56 */ 57 bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size) 58 { 59 if (size == 0) 60 return vgpu_gmadr_is_valid(vgpu, addr); 61 62 if (vgpu_gmadr_is_aperture(vgpu, addr) && 63 vgpu_gmadr_is_aperture(vgpu, addr + size - 1)) 64 return true; 65 else if (vgpu_gmadr_is_hidden(vgpu, addr) && 66 vgpu_gmadr_is_hidden(vgpu, addr + size - 1)) 67 return true; 68 69 gvt_dbg_mm("Invalid ggtt range at 0x%llx, size: 0x%x\n", 70 addr, size); 71 return false; 72 } 73 74 #define gtt_type_is_entry(type) \ 75 (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \ 76 && type != GTT_TYPE_PPGTT_PTE_ENTRY \ 77 && type != GTT_TYPE_PPGTT_ROOT_ENTRY) 78 79 #define gtt_type_is_pt(type) \ 80 (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) 81 82 #define gtt_type_is_pte_pt(type) \ 83 (type == GTT_TYPE_PPGTT_PTE_PT) 84 85 #define gtt_type_is_root_pointer(type) \ 86 (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY) 87 88 #define gtt_init_entry(e, t, p, v) do { \ 89 (e)->type = t; \ 90 (e)->pdev = p; \ 91 memcpy(&(e)->val64, &v, sizeof(v)); \ 92 } while (0) 93 94 /* 95 * Mappings between GTT_TYPE* enumerations. 96 * Following information can be found according to the given type: 97 * - type of next level page table 98 * - type of entry inside this level page table 99 * - type of entry with PSE set 100 * 101 * If the given type doesn't have such a kind of information, 102 * e.g. give a l4 root entry type, then request to get its PSE type, 103 * give a PTE page table type, then request to get its next level page 104 * table type, as we know l4 root entry doesn't have a PSE bit, 105 * and a PTE page table doesn't have a next level page table type, 106 * GTT_TYPE_INVALID will be returned. This is useful when traversing a 107 * page table. 108 */ 109 110 struct gtt_type_table_entry { 111 int entry_type; 112 int pt_type; 113 int next_pt_type; 114 int pse_entry_type; 115 }; 116 117 #define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \ 118 [type] = { \ 119 .entry_type = e_type, \ 120 .pt_type = cpt_type, \ 121 .next_pt_type = npt_type, \ 122 .pse_entry_type = pse_type, \ 123 } 124 125 static const struct gtt_type_table_entry gtt_type_table[] = { 126 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY, 127 GTT_TYPE_PPGTT_ROOT_L4_ENTRY, 128 GTT_TYPE_INVALID, 129 GTT_TYPE_PPGTT_PML4_PT, 130 GTT_TYPE_INVALID), 131 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT, 132 GTT_TYPE_PPGTT_PML4_ENTRY, 133 GTT_TYPE_PPGTT_PML4_PT, 134 GTT_TYPE_PPGTT_PDP_PT, 135 GTT_TYPE_INVALID), 136 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY, 137 GTT_TYPE_PPGTT_PML4_ENTRY, 138 GTT_TYPE_PPGTT_PML4_PT, 139 GTT_TYPE_PPGTT_PDP_PT, 140 GTT_TYPE_INVALID), 141 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT, 142 GTT_TYPE_PPGTT_PDP_ENTRY, 143 GTT_TYPE_PPGTT_PDP_PT, 144 GTT_TYPE_PPGTT_PDE_PT, 145 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 146 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY, 147 GTT_TYPE_PPGTT_ROOT_L3_ENTRY, 148 GTT_TYPE_INVALID, 149 GTT_TYPE_PPGTT_PDE_PT, 150 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 151 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY, 152 GTT_TYPE_PPGTT_PDP_ENTRY, 153 GTT_TYPE_PPGTT_PDP_PT, 154 GTT_TYPE_PPGTT_PDE_PT, 155 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 156 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT, 157 GTT_TYPE_PPGTT_PDE_ENTRY, 158 GTT_TYPE_PPGTT_PDE_PT, 159 GTT_TYPE_PPGTT_PTE_PT, 160 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 161 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY, 162 GTT_TYPE_PPGTT_PDE_ENTRY, 163 GTT_TYPE_PPGTT_PDE_PT, 164 GTT_TYPE_PPGTT_PTE_PT, 165 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 166 /* We take IPS bit as 'PSE' for PTE level. */ 167 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT, 168 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 169 GTT_TYPE_PPGTT_PTE_PT, 170 GTT_TYPE_INVALID, 171 GTT_TYPE_PPGTT_PTE_64K_ENTRY), 172 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY, 173 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 174 GTT_TYPE_PPGTT_PTE_PT, 175 GTT_TYPE_INVALID, 176 GTT_TYPE_PPGTT_PTE_64K_ENTRY), 177 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY, 178 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 179 GTT_TYPE_PPGTT_PTE_PT, 180 GTT_TYPE_INVALID, 181 GTT_TYPE_PPGTT_PTE_64K_ENTRY), 182 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY, 183 GTT_TYPE_PPGTT_PDE_ENTRY, 184 GTT_TYPE_PPGTT_PDE_PT, 185 GTT_TYPE_INVALID, 186 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 187 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY, 188 GTT_TYPE_PPGTT_PDP_ENTRY, 189 GTT_TYPE_PPGTT_PDP_PT, 190 GTT_TYPE_INVALID, 191 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 192 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE, 193 GTT_TYPE_GGTT_PTE, 194 GTT_TYPE_INVALID, 195 GTT_TYPE_INVALID, 196 GTT_TYPE_INVALID), 197 }; 198 199 static inline int get_next_pt_type(int type) 200 { 201 return gtt_type_table[type].next_pt_type; 202 } 203 204 static inline int get_entry_type(int type) 205 { 206 return gtt_type_table[type].entry_type; 207 } 208 209 static inline int get_pse_type(int type) 210 { 211 return gtt_type_table[type].pse_entry_type; 212 } 213 214 static u64 read_pte64(struct i915_ggtt *ggtt, unsigned long index) 215 { 216 void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index; 217 218 return readq(addr); 219 } 220 221 static void ggtt_invalidate(struct intel_gt *gt) 222 { 223 mmio_hw_access_pre(gt); 224 intel_uncore_write(gt->uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 225 mmio_hw_access_post(gt); 226 } 227 228 static void write_pte64(struct i915_ggtt *ggtt, unsigned long index, u64 pte) 229 { 230 void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index; 231 232 writeq(pte, addr); 233 } 234 235 static inline int gtt_get_entry64(void *pt, 236 struct intel_gvt_gtt_entry *e, 237 unsigned long index, bool hypervisor_access, unsigned long gpa, 238 struct intel_vgpu *vgpu) 239 { 240 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 241 int ret; 242 243 if (WARN_ON(info->gtt_entry_size != 8)) 244 return -EINVAL; 245 246 if (hypervisor_access) { 247 ret = intel_gvt_read_gpa(vgpu, gpa + 248 (index << info->gtt_entry_size_shift), 249 &e->val64, 8); 250 if (WARN_ON(ret)) 251 return ret; 252 } else if (!pt) { 253 e->val64 = read_pte64(vgpu->gvt->gt->ggtt, index); 254 } else { 255 e->val64 = *((u64 *)pt + index); 256 } 257 return 0; 258 } 259 260 static inline int gtt_set_entry64(void *pt, 261 struct intel_gvt_gtt_entry *e, 262 unsigned long index, bool hypervisor_access, unsigned long gpa, 263 struct intel_vgpu *vgpu) 264 { 265 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 266 int ret; 267 268 if (WARN_ON(info->gtt_entry_size != 8)) 269 return -EINVAL; 270 271 if (hypervisor_access) { 272 ret = intel_gvt_write_gpa(vgpu, gpa + 273 (index << info->gtt_entry_size_shift), 274 &e->val64, 8); 275 if (WARN_ON(ret)) 276 return ret; 277 } else if (!pt) { 278 write_pte64(vgpu->gvt->gt->ggtt, index, e->val64); 279 } else { 280 *((u64 *)pt + index) = e->val64; 281 } 282 return 0; 283 } 284 285 #define GTT_HAW 46 286 287 #define ADDR_1G_MASK GENMASK_ULL(GTT_HAW - 1, 30) 288 #define ADDR_2M_MASK GENMASK_ULL(GTT_HAW - 1, 21) 289 #define ADDR_64K_MASK GENMASK_ULL(GTT_HAW - 1, 16) 290 #define ADDR_4K_MASK GENMASK_ULL(GTT_HAW - 1, 12) 291 292 #define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52) 293 #define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */ 294 295 #define GTT_64K_PTE_STRIDE 16 296 297 static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) 298 { 299 unsigned long pfn; 300 301 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) 302 pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT; 303 else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) 304 pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT; 305 else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) 306 pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT; 307 else 308 pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT; 309 return pfn; 310 } 311 312 static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn) 313 { 314 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { 315 e->val64 &= ~ADDR_1G_MASK; 316 pfn &= (ADDR_1G_MASK >> PAGE_SHIFT); 317 } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) { 318 e->val64 &= ~ADDR_2M_MASK; 319 pfn &= (ADDR_2M_MASK >> PAGE_SHIFT); 320 } else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) { 321 e->val64 &= ~ADDR_64K_MASK; 322 pfn &= (ADDR_64K_MASK >> PAGE_SHIFT); 323 } else { 324 e->val64 &= ~ADDR_4K_MASK; 325 pfn &= (ADDR_4K_MASK >> PAGE_SHIFT); 326 } 327 328 e->val64 |= (pfn << PAGE_SHIFT); 329 } 330 331 static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e) 332 { 333 return !!(e->val64 & _PAGE_PSE); 334 } 335 336 static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e) 337 { 338 if (gen8_gtt_test_pse(e)) { 339 switch (e->type) { 340 case GTT_TYPE_PPGTT_PTE_2M_ENTRY: 341 e->val64 &= ~_PAGE_PSE; 342 e->type = GTT_TYPE_PPGTT_PDE_ENTRY; 343 break; 344 case GTT_TYPE_PPGTT_PTE_1G_ENTRY: 345 e->type = GTT_TYPE_PPGTT_PDP_ENTRY; 346 e->val64 &= ~_PAGE_PSE; 347 break; 348 default: 349 WARN_ON(1); 350 } 351 } 352 } 353 354 static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e) 355 { 356 if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY)) 357 return false; 358 359 return !!(e->val64 & GEN8_PDE_IPS_64K); 360 } 361 362 static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e) 363 { 364 if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY)) 365 return; 366 367 e->val64 &= ~GEN8_PDE_IPS_64K; 368 } 369 370 static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e) 371 { 372 /* 373 * i915 writes PDP root pointer registers without present bit, 374 * it also works, so we need to treat root pointer entry 375 * specifically. 376 */ 377 if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY 378 || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) 379 return (e->val64 != 0); 380 else 381 return (e->val64 & GEN8_PAGE_PRESENT); 382 } 383 384 static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e) 385 { 386 e->val64 &= ~GEN8_PAGE_PRESENT; 387 } 388 389 static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e) 390 { 391 e->val64 |= GEN8_PAGE_PRESENT; 392 } 393 394 static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e) 395 { 396 return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED); 397 } 398 399 static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e) 400 { 401 e->val64 |= GTT_SPTE_FLAG_64K_SPLITED; 402 } 403 404 static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e) 405 { 406 e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED; 407 } 408 409 /* 410 * Per-platform GMA routines. 411 */ 412 static unsigned long gma_to_ggtt_pte_index(unsigned long gma) 413 { 414 unsigned long x = (gma >> I915_GTT_PAGE_SHIFT); 415 416 trace_gma_index(__func__, gma, x); 417 return x; 418 } 419 420 #define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \ 421 static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \ 422 { \ 423 unsigned long x = (exp); \ 424 trace_gma_index(__func__, gma, x); \ 425 return x; \ 426 } 427 428 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff)); 429 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff)); 430 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3)); 431 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff)); 432 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff)); 433 434 static const struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = { 435 .get_entry = gtt_get_entry64, 436 .set_entry = gtt_set_entry64, 437 .clear_present = gtt_entry_clear_present, 438 .set_present = gtt_entry_set_present, 439 .test_present = gen8_gtt_test_present, 440 .test_pse = gen8_gtt_test_pse, 441 .clear_pse = gen8_gtt_clear_pse, 442 .clear_ips = gen8_gtt_clear_ips, 443 .test_ips = gen8_gtt_test_ips, 444 .clear_64k_splited = gen8_gtt_clear_64k_splited, 445 .set_64k_splited = gen8_gtt_set_64k_splited, 446 .test_64k_splited = gen8_gtt_test_64k_splited, 447 .get_pfn = gen8_gtt_get_pfn, 448 .set_pfn = gen8_gtt_set_pfn, 449 }; 450 451 static const struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = { 452 .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index, 453 .gma_to_pte_index = gen8_gma_to_pte_index, 454 .gma_to_pde_index = gen8_gma_to_pde_index, 455 .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index, 456 .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index, 457 .gma_to_pml4_index = gen8_gma_to_pml4_index, 458 }; 459 460 /* Update entry type per pse and ips bit. */ 461 static void update_entry_type_for_real(const struct intel_gvt_gtt_pte_ops *pte_ops, 462 struct intel_gvt_gtt_entry *entry, bool ips) 463 { 464 switch (entry->type) { 465 case GTT_TYPE_PPGTT_PDE_ENTRY: 466 case GTT_TYPE_PPGTT_PDP_ENTRY: 467 if (pte_ops->test_pse(entry)) 468 entry->type = get_pse_type(entry->type); 469 break; 470 case GTT_TYPE_PPGTT_PTE_4K_ENTRY: 471 if (ips) 472 entry->type = get_pse_type(entry->type); 473 break; 474 default: 475 GEM_BUG_ON(!gtt_type_is_entry(entry->type)); 476 } 477 478 GEM_BUG_ON(entry->type == GTT_TYPE_INVALID); 479 } 480 481 /* 482 * MM helpers. 483 */ 484 static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm, 485 struct intel_gvt_gtt_entry *entry, unsigned long index, 486 bool guest) 487 { 488 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 489 490 GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT); 491 492 entry->type = mm->ppgtt_mm.root_entry_type; 493 pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps : 494 mm->ppgtt_mm.shadow_pdps, 495 entry, index, false, 0, mm->vgpu); 496 update_entry_type_for_real(pte_ops, entry, false); 497 } 498 499 static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm, 500 struct intel_gvt_gtt_entry *entry, unsigned long index) 501 { 502 _ppgtt_get_root_entry(mm, entry, index, true); 503 } 504 505 static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm, 506 struct intel_gvt_gtt_entry *entry, unsigned long index) 507 { 508 _ppgtt_get_root_entry(mm, entry, index, false); 509 } 510 511 static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm, 512 struct intel_gvt_gtt_entry *entry, unsigned long index, 513 bool guest) 514 { 515 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 516 517 pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps : 518 mm->ppgtt_mm.shadow_pdps, 519 entry, index, false, 0, mm->vgpu); 520 } 521 522 static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm, 523 struct intel_gvt_gtt_entry *entry, unsigned long index) 524 { 525 _ppgtt_set_root_entry(mm, entry, index, false); 526 } 527 528 static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm, 529 struct intel_gvt_gtt_entry *entry, unsigned long index) 530 { 531 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 532 533 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 534 535 entry->type = GTT_TYPE_GGTT_PTE; 536 pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index, 537 false, 0, mm->vgpu); 538 } 539 540 static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm, 541 struct intel_gvt_gtt_entry *entry, unsigned long index) 542 { 543 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 544 545 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 546 547 pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index, 548 false, 0, mm->vgpu); 549 } 550 551 static void ggtt_get_host_entry(struct intel_vgpu_mm *mm, 552 struct intel_gvt_gtt_entry *entry, unsigned long index) 553 { 554 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 555 556 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 557 558 pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu); 559 } 560 561 static void ggtt_set_host_entry(struct intel_vgpu_mm *mm, 562 struct intel_gvt_gtt_entry *entry, unsigned long index) 563 { 564 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 565 unsigned long offset = index; 566 567 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 568 569 if (vgpu_gmadr_is_aperture(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) { 570 offset -= (vgpu_aperture_gmadr_base(mm->vgpu) >> PAGE_SHIFT); 571 mm->ggtt_mm.host_ggtt_aperture[offset] = entry->val64; 572 } else if (vgpu_gmadr_is_hidden(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) { 573 offset -= (vgpu_hidden_gmadr_base(mm->vgpu) >> PAGE_SHIFT); 574 mm->ggtt_mm.host_ggtt_hidden[offset] = entry->val64; 575 } 576 577 pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu); 578 } 579 580 /* 581 * PPGTT shadow page table helpers. 582 */ 583 static inline int ppgtt_spt_get_entry( 584 struct intel_vgpu_ppgtt_spt *spt, 585 void *page_table, int type, 586 struct intel_gvt_gtt_entry *e, unsigned long index, 587 bool guest) 588 { 589 struct intel_gvt *gvt = spt->vgpu->gvt; 590 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 591 int ret; 592 593 e->type = get_entry_type(type); 594 595 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) 596 return -EINVAL; 597 598 ret = ops->get_entry(page_table, e, index, guest, 599 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, 600 spt->vgpu); 601 if (ret) 602 return ret; 603 604 update_entry_type_for_real(ops, e, guest ? 605 spt->guest_page.pde_ips : false); 606 607 gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n", 608 type, e->type, index, e->val64); 609 return 0; 610 } 611 612 static inline int ppgtt_spt_set_entry( 613 struct intel_vgpu_ppgtt_spt *spt, 614 void *page_table, int type, 615 struct intel_gvt_gtt_entry *e, unsigned long index, 616 bool guest) 617 { 618 struct intel_gvt *gvt = spt->vgpu->gvt; 619 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 620 621 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) 622 return -EINVAL; 623 624 gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n", 625 type, e->type, index, e->val64); 626 627 return ops->set_entry(page_table, e, index, guest, 628 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, 629 spt->vgpu); 630 } 631 632 #define ppgtt_get_guest_entry(spt, e, index) \ 633 ppgtt_spt_get_entry(spt, NULL, \ 634 spt->guest_page.type, e, index, true) 635 636 #define ppgtt_set_guest_entry(spt, e, index) \ 637 ppgtt_spt_set_entry(spt, NULL, \ 638 spt->guest_page.type, e, index, true) 639 640 #define ppgtt_get_shadow_entry(spt, e, index) \ 641 ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \ 642 spt->shadow_page.type, e, index, false) 643 644 #define ppgtt_set_shadow_entry(spt, e, index) \ 645 ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \ 646 spt->shadow_page.type, e, index, false) 647 648 static void *alloc_spt(gfp_t gfp_mask) 649 { 650 struct intel_vgpu_ppgtt_spt *spt; 651 652 spt = kzalloc(sizeof(*spt), gfp_mask); 653 if (!spt) 654 return NULL; 655 656 spt->shadow_page.page = alloc_page(gfp_mask); 657 if (!spt->shadow_page.page) { 658 kfree(spt); 659 return NULL; 660 } 661 return spt; 662 } 663 664 static void free_spt(struct intel_vgpu_ppgtt_spt *spt) 665 { 666 __free_page(spt->shadow_page.page); 667 kfree(spt); 668 } 669 670 static int detach_oos_page(struct intel_vgpu *vgpu, 671 struct intel_vgpu_oos_page *oos_page); 672 673 static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt) 674 { 675 struct device *kdev = spt->vgpu->gvt->gt->i915->drm.dev; 676 677 trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type); 678 679 dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096, 680 DMA_BIDIRECTIONAL); 681 682 radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn); 683 684 if (spt->guest_page.gfn) { 685 if (spt->guest_page.oos_page) 686 detach_oos_page(spt->vgpu, spt->guest_page.oos_page); 687 688 intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn); 689 } 690 691 list_del_init(&spt->post_shadow_list); 692 free_spt(spt); 693 } 694 695 static void ppgtt_free_all_spt(struct intel_vgpu *vgpu) 696 { 697 struct intel_vgpu_ppgtt_spt *spt, *spn; 698 struct radix_tree_iter iter; 699 LIST_HEAD(all_spt); 700 void __rcu **slot; 701 702 rcu_read_lock(); 703 radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) { 704 spt = radix_tree_deref_slot(slot); 705 list_move(&spt->post_shadow_list, &all_spt); 706 } 707 rcu_read_unlock(); 708 709 list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list) 710 ppgtt_free_spt(spt); 711 } 712 713 static int ppgtt_handle_guest_write_page_table_bytes( 714 struct intel_vgpu_ppgtt_spt *spt, 715 u64 pa, void *p_data, int bytes); 716 717 static int ppgtt_write_protection_handler( 718 struct intel_vgpu_page_track *page_track, 719 u64 gpa, void *data, int bytes) 720 { 721 struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data; 722 723 int ret; 724 725 if (bytes != 4 && bytes != 8) 726 return -EINVAL; 727 728 ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes); 729 if (ret) 730 return ret; 731 return ret; 732 } 733 734 /* Find a spt by guest gfn. */ 735 static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn( 736 struct intel_vgpu *vgpu, unsigned long gfn) 737 { 738 struct intel_vgpu_page_track *track; 739 740 track = intel_vgpu_find_page_track(vgpu, gfn); 741 if (track && track->handler == ppgtt_write_protection_handler) 742 return track->priv_data; 743 744 return NULL; 745 } 746 747 /* Find the spt by shadow page mfn. */ 748 static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn( 749 struct intel_vgpu *vgpu, unsigned long mfn) 750 { 751 return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn); 752 } 753 754 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt); 755 756 /* Allocate shadow page table without guest page. */ 757 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt( 758 struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type) 759 { 760 struct device *kdev = vgpu->gvt->gt->i915->drm.dev; 761 struct intel_vgpu_ppgtt_spt *spt = NULL; 762 dma_addr_t daddr; 763 int ret; 764 765 retry: 766 spt = alloc_spt(GFP_KERNEL | __GFP_ZERO); 767 if (!spt) { 768 if (reclaim_one_ppgtt_mm(vgpu->gvt)) 769 goto retry; 770 771 gvt_vgpu_err("fail to allocate ppgtt shadow page\n"); 772 return ERR_PTR(-ENOMEM); 773 } 774 775 spt->vgpu = vgpu; 776 atomic_set(&spt->refcount, 1); 777 INIT_LIST_HEAD(&spt->post_shadow_list); 778 779 /* 780 * Init shadow_page. 781 */ 782 spt->shadow_page.type = type; 783 daddr = dma_map_page(kdev, spt->shadow_page.page, 784 0, 4096, DMA_BIDIRECTIONAL); 785 if (dma_mapping_error(kdev, daddr)) { 786 gvt_vgpu_err("fail to map dma addr\n"); 787 ret = -EINVAL; 788 goto err_free_spt; 789 } 790 spt->shadow_page.vaddr = page_address(spt->shadow_page.page); 791 spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT; 792 793 ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt); 794 if (ret) 795 goto err_unmap_dma; 796 797 return spt; 798 799 err_unmap_dma: 800 dma_unmap_page(kdev, daddr, PAGE_SIZE, DMA_BIDIRECTIONAL); 801 err_free_spt: 802 free_spt(spt); 803 return ERR_PTR(ret); 804 } 805 806 /* Allocate shadow page table associated with specific gfn. */ 807 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn( 808 struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type, 809 unsigned long gfn, bool guest_pde_ips) 810 { 811 struct intel_vgpu_ppgtt_spt *spt; 812 int ret; 813 814 spt = ppgtt_alloc_spt(vgpu, type); 815 if (IS_ERR(spt)) 816 return spt; 817 818 /* 819 * Init guest_page. 820 */ 821 ret = intel_vgpu_register_page_track(vgpu, gfn, 822 ppgtt_write_protection_handler, spt); 823 if (ret) { 824 ppgtt_free_spt(spt); 825 return ERR_PTR(ret); 826 } 827 828 spt->guest_page.type = type; 829 spt->guest_page.gfn = gfn; 830 spt->guest_page.pde_ips = guest_pde_ips; 831 832 trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); 833 834 return spt; 835 } 836 837 #define pt_entry_size_shift(spt) \ 838 ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift) 839 840 #define pt_entries(spt) \ 841 (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt)) 842 843 #define for_each_present_guest_entry(spt, e, i) \ 844 for (i = 0; i < pt_entries(spt); \ 845 i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \ 846 if (!ppgtt_get_guest_entry(spt, e, i) && \ 847 spt->vgpu->gvt->gtt.pte_ops->test_present(e)) 848 849 #define for_each_present_shadow_entry(spt, e, i) \ 850 for (i = 0; i < pt_entries(spt); \ 851 i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \ 852 if (!ppgtt_get_shadow_entry(spt, e, i) && \ 853 spt->vgpu->gvt->gtt.pte_ops->test_present(e)) 854 855 #define for_each_shadow_entry(spt, e, i) \ 856 for (i = 0; i < pt_entries(spt); \ 857 i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \ 858 if (!ppgtt_get_shadow_entry(spt, e, i)) 859 860 static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt) 861 { 862 int v = atomic_read(&spt->refcount); 863 864 trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1)); 865 atomic_inc(&spt->refcount); 866 } 867 868 static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt) 869 { 870 int v = atomic_read(&spt->refcount); 871 872 trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1)); 873 return atomic_dec_return(&spt->refcount); 874 } 875 876 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt); 877 878 static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu, 879 struct intel_gvt_gtt_entry *e) 880 { 881 struct drm_i915_private *i915 = vgpu->gvt->gt->i915; 882 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 883 struct intel_vgpu_ppgtt_spt *s; 884 enum intel_gvt_gtt_type cur_pt_type; 885 886 GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type))); 887 888 if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY 889 && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { 890 cur_pt_type = get_next_pt_type(e->type); 891 892 if (!gtt_type_is_pt(cur_pt_type) || 893 !gtt_type_is_pt(cur_pt_type + 1)) { 894 drm_WARN(&i915->drm, 1, 895 "Invalid page table type, cur_pt_type is: %d\n", 896 cur_pt_type); 897 return -EINVAL; 898 } 899 900 cur_pt_type += 1; 901 902 if (ops->get_pfn(e) == 903 vgpu->gtt.scratch_pt[cur_pt_type].page_mfn) 904 return 0; 905 } 906 s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e)); 907 if (!s) { 908 gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n", 909 ops->get_pfn(e)); 910 return -ENXIO; 911 } 912 return ppgtt_invalidate_spt(s); 913 } 914 915 static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt, 916 struct intel_gvt_gtt_entry *entry) 917 { 918 struct intel_vgpu *vgpu = spt->vgpu; 919 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 920 unsigned long pfn; 921 int type; 922 923 pfn = ops->get_pfn(entry); 924 type = spt->shadow_page.type; 925 926 /* Uninitialized spte or unshadowed spte. */ 927 if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn) 928 return; 929 930 intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT); 931 } 932 933 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt) 934 { 935 struct intel_vgpu *vgpu = spt->vgpu; 936 struct intel_gvt_gtt_entry e; 937 unsigned long index; 938 int ret; 939 940 trace_spt_change(spt->vgpu->id, "die", spt, 941 spt->guest_page.gfn, spt->shadow_page.type); 942 943 if (ppgtt_put_spt(spt) > 0) 944 return 0; 945 946 for_each_present_shadow_entry(spt, &e, index) { 947 switch (e.type) { 948 case GTT_TYPE_PPGTT_PTE_4K_ENTRY: 949 gvt_vdbg_mm("invalidate 4K entry\n"); 950 ppgtt_invalidate_pte(spt, &e); 951 break; 952 case GTT_TYPE_PPGTT_PTE_64K_ENTRY: 953 /* We don't setup 64K shadow entry so far. */ 954 WARN(1, "suspicious 64K gtt entry\n"); 955 continue; 956 case GTT_TYPE_PPGTT_PTE_2M_ENTRY: 957 gvt_vdbg_mm("invalidate 2M entry\n"); 958 continue; 959 case GTT_TYPE_PPGTT_PTE_1G_ENTRY: 960 WARN(1, "GVT doesn't support 1GB page\n"); 961 continue; 962 case GTT_TYPE_PPGTT_PML4_ENTRY: 963 case GTT_TYPE_PPGTT_PDP_ENTRY: 964 case GTT_TYPE_PPGTT_PDE_ENTRY: 965 gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n"); 966 ret = ppgtt_invalidate_spt_by_shadow_entry( 967 spt->vgpu, &e); 968 if (ret) 969 goto fail; 970 break; 971 default: 972 GEM_BUG_ON(1); 973 } 974 } 975 976 trace_spt_change(spt->vgpu->id, "release", spt, 977 spt->guest_page.gfn, spt->shadow_page.type); 978 ppgtt_free_spt(spt); 979 return 0; 980 fail: 981 gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n", 982 spt, e.val64, e.type); 983 return ret; 984 } 985 986 static bool vgpu_ips_enabled(struct intel_vgpu *vgpu) 987 { 988 struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; 989 990 if (GRAPHICS_VER(dev_priv) == 9) { 991 u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) & 992 GAMW_ECO_ENABLE_64K_IPS_FIELD; 993 994 return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD; 995 } else if (GRAPHICS_VER(dev_priv) >= 11) { 996 /* 64K paging only controlled by IPS bit in PTE now. */ 997 return true; 998 } else 999 return false; 1000 } 1001 1002 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt); 1003 1004 static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry( 1005 struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we) 1006 { 1007 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1008 struct intel_vgpu_ppgtt_spt *spt = NULL; 1009 bool ips = false; 1010 int ret; 1011 1012 GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type))); 1013 1014 if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY) 1015 ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we); 1016 1017 spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we)); 1018 if (spt) { 1019 ppgtt_get_spt(spt); 1020 1021 if (ips != spt->guest_page.pde_ips) { 1022 spt->guest_page.pde_ips = ips; 1023 1024 gvt_dbg_mm("reshadow PDE since ips changed\n"); 1025 clear_page(spt->shadow_page.vaddr); 1026 ret = ppgtt_populate_spt(spt); 1027 if (ret) { 1028 ppgtt_put_spt(spt); 1029 goto err; 1030 } 1031 } 1032 } else { 1033 int type = get_next_pt_type(we->type); 1034 1035 if (!gtt_type_is_pt(type)) { 1036 ret = -EINVAL; 1037 goto err; 1038 } 1039 1040 spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips); 1041 if (IS_ERR(spt)) { 1042 ret = PTR_ERR(spt); 1043 goto err; 1044 } 1045 1046 ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn); 1047 if (ret) 1048 goto err_free_spt; 1049 1050 ret = ppgtt_populate_spt(spt); 1051 if (ret) 1052 goto err_free_spt; 1053 1054 trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn, 1055 spt->shadow_page.type); 1056 } 1057 return spt; 1058 1059 err_free_spt: 1060 ppgtt_free_spt(spt); 1061 spt = NULL; 1062 err: 1063 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", 1064 spt, we->val64, we->type); 1065 return ERR_PTR(ret); 1066 } 1067 1068 static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, 1069 struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge) 1070 { 1071 const struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops; 1072 1073 se->type = ge->type; 1074 se->val64 = ge->val64; 1075 1076 /* Because we always split 64KB pages, so clear IPS in shadow PDE. */ 1077 if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY) 1078 ops->clear_ips(se); 1079 1080 ops->set_pfn(se, s->shadow_page.mfn); 1081 } 1082 1083 static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, 1084 struct intel_vgpu_ppgtt_spt *spt, unsigned long index, 1085 struct intel_gvt_gtt_entry *se) 1086 { 1087 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1088 struct intel_vgpu_ppgtt_spt *sub_spt; 1089 struct intel_gvt_gtt_entry sub_se; 1090 unsigned long start_gfn; 1091 dma_addr_t dma_addr; 1092 unsigned long sub_index; 1093 int ret; 1094 1095 gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index); 1096 1097 start_gfn = ops->get_pfn(se); 1098 1099 sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT); 1100 if (IS_ERR(sub_spt)) 1101 return PTR_ERR(sub_spt); 1102 1103 for_each_shadow_entry(sub_spt, &sub_se, sub_index) { 1104 ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + sub_index, 1105 PAGE_SIZE, &dma_addr); 1106 if (ret) 1107 goto err; 1108 sub_se.val64 = se->val64; 1109 1110 /* Copy the PAT field from PDE. */ 1111 sub_se.val64 &= ~_PAGE_PAT; 1112 sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5; 1113 1114 ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT); 1115 ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index); 1116 } 1117 1118 /* Clear dirty field. */ 1119 se->val64 &= ~_PAGE_DIRTY; 1120 1121 ops->clear_pse(se); 1122 ops->clear_ips(se); 1123 ops->set_pfn(se, sub_spt->shadow_page.mfn); 1124 ppgtt_set_shadow_entry(spt, se, index); 1125 return 0; 1126 err: 1127 /* Cancel the existing address mappings of DMA addr. */ 1128 for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) { 1129 gvt_vdbg_mm("invalidate 4K entry\n"); 1130 ppgtt_invalidate_pte(sub_spt, &sub_se); 1131 } 1132 /* Release the new allocated spt. */ 1133 trace_spt_change(sub_spt->vgpu->id, "release", sub_spt, 1134 sub_spt->guest_page.gfn, sub_spt->shadow_page.type); 1135 ppgtt_free_spt(sub_spt); 1136 return ret; 1137 } 1138 1139 static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, 1140 struct intel_vgpu_ppgtt_spt *spt, unsigned long index, 1141 struct intel_gvt_gtt_entry *se) 1142 { 1143 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1144 struct intel_gvt_gtt_entry entry = *se; 1145 unsigned long start_gfn; 1146 dma_addr_t dma_addr; 1147 int i, ret; 1148 1149 gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index); 1150 1151 GEM_BUG_ON(index % GTT_64K_PTE_STRIDE); 1152 1153 start_gfn = ops->get_pfn(se); 1154 1155 entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY; 1156 ops->set_64k_splited(&entry); 1157 1158 for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { 1159 ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + i, 1160 PAGE_SIZE, &dma_addr); 1161 if (ret) 1162 return ret; 1163 1164 ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT); 1165 ppgtt_set_shadow_entry(spt, &entry, index + i); 1166 } 1167 return 0; 1168 } 1169 1170 static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, 1171 struct intel_vgpu_ppgtt_spt *spt, unsigned long index, 1172 struct intel_gvt_gtt_entry *ge) 1173 { 1174 const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; 1175 struct intel_gvt_gtt_entry se = *ge; 1176 unsigned long gfn; 1177 dma_addr_t dma_addr; 1178 int ret; 1179 1180 if (!pte_ops->test_present(ge)) 1181 return 0; 1182 1183 gfn = pte_ops->get_pfn(ge); 1184 1185 switch (ge->type) { 1186 case GTT_TYPE_PPGTT_PTE_4K_ENTRY: 1187 gvt_vdbg_mm("shadow 4K gtt entry\n"); 1188 ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr); 1189 if (ret) 1190 return -ENXIO; 1191 break; 1192 case GTT_TYPE_PPGTT_PTE_64K_ENTRY: 1193 gvt_vdbg_mm("shadow 64K gtt entry\n"); 1194 /* 1195 * The layout of 64K page is special, the page size is 1196 * controlled by upper PDE. To be simple, we always split 1197 * 64K page to smaller 4K pages in shadow PT. 1198 */ 1199 return split_64KB_gtt_entry(vgpu, spt, index, &se); 1200 case GTT_TYPE_PPGTT_PTE_2M_ENTRY: 1201 gvt_vdbg_mm("shadow 2M gtt entry\n"); 1202 if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M) || 1203 intel_gvt_dma_map_guest_page(vgpu, gfn, 1204 I915_GTT_PAGE_SIZE_2M, &dma_addr)) 1205 return split_2MB_gtt_entry(vgpu, spt, index, &se); 1206 break; 1207 case GTT_TYPE_PPGTT_PTE_1G_ENTRY: 1208 gvt_vgpu_err("GVT doesn't support 1GB entry\n"); 1209 return -EINVAL; 1210 default: 1211 GEM_BUG_ON(1); 1212 return -EINVAL; 1213 } 1214 1215 /* Successfully shadowed a 4K or 2M page (without splitting). */ 1216 pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT); 1217 ppgtt_set_shadow_entry(spt, &se, index); 1218 return 0; 1219 } 1220 1221 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt) 1222 { 1223 struct intel_vgpu *vgpu = spt->vgpu; 1224 struct intel_vgpu_ppgtt_spt *s; 1225 struct intel_gvt_gtt_entry se, ge; 1226 unsigned long i; 1227 int ret; 1228 1229 trace_spt_change(spt->vgpu->id, "born", spt, 1230 spt->guest_page.gfn, spt->shadow_page.type); 1231 1232 for_each_present_guest_entry(spt, &ge, i) { 1233 if (gtt_type_is_pt(get_next_pt_type(ge.type))) { 1234 s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge); 1235 if (IS_ERR(s)) { 1236 ret = PTR_ERR(s); 1237 goto fail; 1238 } 1239 ppgtt_get_shadow_entry(spt, &se, i); 1240 ppgtt_generate_shadow_entry(&se, s, &ge); 1241 ppgtt_set_shadow_entry(spt, &se, i); 1242 } else { 1243 ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge); 1244 if (ret) 1245 goto fail; 1246 } 1247 } 1248 return 0; 1249 fail: 1250 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", 1251 spt, ge.val64, ge.type); 1252 return ret; 1253 } 1254 1255 static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt, 1256 struct intel_gvt_gtt_entry *se, unsigned long index) 1257 { 1258 struct intel_vgpu *vgpu = spt->vgpu; 1259 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1260 int ret; 1261 1262 trace_spt_guest_change(spt->vgpu->id, "remove", spt, 1263 spt->shadow_page.type, se->val64, index); 1264 1265 gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n", 1266 se->type, index, se->val64); 1267 1268 if (!ops->test_present(se)) 1269 return 0; 1270 1271 if (ops->get_pfn(se) == 1272 vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn) 1273 return 0; 1274 1275 if (gtt_type_is_pt(get_next_pt_type(se->type))) { 1276 struct intel_vgpu_ppgtt_spt *s = 1277 intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se)); 1278 if (!s) { 1279 gvt_vgpu_err("fail to find guest page\n"); 1280 ret = -ENXIO; 1281 goto fail; 1282 } 1283 ret = ppgtt_invalidate_spt(s); 1284 if (ret) 1285 goto fail; 1286 } else { 1287 /* We don't setup 64K shadow entry so far. */ 1288 WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY, 1289 "suspicious 64K entry\n"); 1290 ppgtt_invalidate_pte(spt, se); 1291 } 1292 1293 return 0; 1294 fail: 1295 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", 1296 spt, se->val64, se->type); 1297 return ret; 1298 } 1299 1300 static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt, 1301 struct intel_gvt_gtt_entry *we, unsigned long index) 1302 { 1303 struct intel_vgpu *vgpu = spt->vgpu; 1304 struct intel_gvt_gtt_entry m; 1305 struct intel_vgpu_ppgtt_spt *s; 1306 int ret; 1307 1308 trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type, 1309 we->val64, index); 1310 1311 gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n", 1312 we->type, index, we->val64); 1313 1314 if (gtt_type_is_pt(get_next_pt_type(we->type))) { 1315 s = ppgtt_populate_spt_by_guest_entry(vgpu, we); 1316 if (IS_ERR(s)) { 1317 ret = PTR_ERR(s); 1318 goto fail; 1319 } 1320 ppgtt_get_shadow_entry(spt, &m, index); 1321 ppgtt_generate_shadow_entry(&m, s, we); 1322 ppgtt_set_shadow_entry(spt, &m, index); 1323 } else { 1324 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we); 1325 if (ret) 1326 goto fail; 1327 } 1328 return 0; 1329 fail: 1330 gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n", 1331 spt, we->val64, we->type); 1332 return ret; 1333 } 1334 1335 static int sync_oos_page(struct intel_vgpu *vgpu, 1336 struct intel_vgpu_oos_page *oos_page) 1337 { 1338 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1339 struct intel_gvt *gvt = vgpu->gvt; 1340 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 1341 struct intel_vgpu_ppgtt_spt *spt = oos_page->spt; 1342 struct intel_gvt_gtt_entry old, new; 1343 int index; 1344 int ret; 1345 1346 trace_oos_change(vgpu->id, "sync", oos_page->id, 1347 spt, spt->guest_page.type); 1348 1349 old.type = new.type = get_entry_type(spt->guest_page.type); 1350 old.val64 = new.val64 = 0; 1351 1352 for (index = 0; index < (I915_GTT_PAGE_SIZE >> 1353 info->gtt_entry_size_shift); index++) { 1354 ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu); 1355 ops->get_entry(NULL, &new, index, true, 1356 spt->guest_page.gfn << PAGE_SHIFT, vgpu); 1357 1358 if (old.val64 == new.val64 1359 && !test_and_clear_bit(index, spt->post_shadow_bitmap)) 1360 continue; 1361 1362 trace_oos_sync(vgpu->id, oos_page->id, 1363 spt, spt->guest_page.type, 1364 new.val64, index); 1365 1366 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new); 1367 if (ret) 1368 return ret; 1369 1370 ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu); 1371 } 1372 1373 spt->guest_page.write_cnt = 0; 1374 list_del_init(&spt->post_shadow_list); 1375 return 0; 1376 } 1377 1378 static int detach_oos_page(struct intel_vgpu *vgpu, 1379 struct intel_vgpu_oos_page *oos_page) 1380 { 1381 struct intel_gvt *gvt = vgpu->gvt; 1382 struct intel_vgpu_ppgtt_spt *spt = oos_page->spt; 1383 1384 trace_oos_change(vgpu->id, "detach", oos_page->id, 1385 spt, spt->guest_page.type); 1386 1387 spt->guest_page.write_cnt = 0; 1388 spt->guest_page.oos_page = NULL; 1389 oos_page->spt = NULL; 1390 1391 list_del_init(&oos_page->vm_list); 1392 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head); 1393 1394 return 0; 1395 } 1396 1397 static int attach_oos_page(struct intel_vgpu_oos_page *oos_page, 1398 struct intel_vgpu_ppgtt_spt *spt) 1399 { 1400 struct intel_gvt *gvt = spt->vgpu->gvt; 1401 int ret; 1402 1403 ret = intel_gvt_read_gpa(spt->vgpu, 1404 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, 1405 oos_page->mem, I915_GTT_PAGE_SIZE); 1406 if (ret) 1407 return ret; 1408 1409 oos_page->spt = spt; 1410 spt->guest_page.oos_page = oos_page; 1411 1412 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head); 1413 1414 trace_oos_change(spt->vgpu->id, "attach", oos_page->id, 1415 spt, spt->guest_page.type); 1416 return 0; 1417 } 1418 1419 static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt) 1420 { 1421 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; 1422 int ret; 1423 1424 ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn); 1425 if (ret) 1426 return ret; 1427 1428 trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id, 1429 spt, spt->guest_page.type); 1430 1431 list_del_init(&oos_page->vm_list); 1432 return sync_oos_page(spt->vgpu, oos_page); 1433 } 1434 1435 static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt) 1436 { 1437 struct intel_gvt *gvt = spt->vgpu->gvt; 1438 struct intel_gvt_gtt *gtt = &gvt->gtt; 1439 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; 1440 int ret; 1441 1442 WARN(oos_page, "shadow PPGTT page has already has a oos page\n"); 1443 1444 if (list_empty(>t->oos_page_free_list_head)) { 1445 oos_page = container_of(gtt->oos_page_use_list_head.next, 1446 struct intel_vgpu_oos_page, list); 1447 ret = ppgtt_set_guest_page_sync(oos_page->spt); 1448 if (ret) 1449 return ret; 1450 ret = detach_oos_page(spt->vgpu, oos_page); 1451 if (ret) 1452 return ret; 1453 } else 1454 oos_page = container_of(gtt->oos_page_free_list_head.next, 1455 struct intel_vgpu_oos_page, list); 1456 return attach_oos_page(oos_page, spt); 1457 } 1458 1459 static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt) 1460 { 1461 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; 1462 1463 if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n")) 1464 return -EINVAL; 1465 1466 trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id, 1467 spt, spt->guest_page.type); 1468 1469 list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head); 1470 return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn); 1471 } 1472 1473 /** 1474 * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU 1475 * @vgpu: a vGPU 1476 * 1477 * This function is called before submitting a guest workload to host, 1478 * to sync all the out-of-synced shadow for vGPU 1479 * 1480 * Returns: 1481 * Zero on success, negative error code if failed. 1482 */ 1483 int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu) 1484 { 1485 struct list_head *pos, *n; 1486 struct intel_vgpu_oos_page *oos_page; 1487 int ret; 1488 1489 if (!enable_out_of_sync) 1490 return 0; 1491 1492 list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) { 1493 oos_page = container_of(pos, 1494 struct intel_vgpu_oos_page, vm_list); 1495 ret = ppgtt_set_guest_page_sync(oos_page->spt); 1496 if (ret) 1497 return ret; 1498 } 1499 return 0; 1500 } 1501 1502 /* 1503 * The heart of PPGTT shadow page table. 1504 */ 1505 static int ppgtt_handle_guest_write_page_table( 1506 struct intel_vgpu_ppgtt_spt *spt, 1507 struct intel_gvt_gtt_entry *we, unsigned long index) 1508 { 1509 struct intel_vgpu *vgpu = spt->vgpu; 1510 int type = spt->shadow_page.type; 1511 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1512 struct intel_gvt_gtt_entry old_se; 1513 int new_present; 1514 int i, ret; 1515 1516 new_present = ops->test_present(we); 1517 1518 /* 1519 * Adding the new entry first and then removing the old one, that can 1520 * guarantee the ppgtt table is validated during the window between 1521 * adding and removal. 1522 */ 1523 ppgtt_get_shadow_entry(spt, &old_se, index); 1524 1525 if (new_present) { 1526 ret = ppgtt_handle_guest_entry_add(spt, we, index); 1527 if (ret) 1528 goto fail; 1529 } 1530 1531 ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index); 1532 if (ret) 1533 goto fail; 1534 1535 if (!new_present) { 1536 /* For 64KB splited entries, we need clear them all. */ 1537 if (ops->test_64k_splited(&old_se) && 1538 !(index % GTT_64K_PTE_STRIDE)) { 1539 gvt_vdbg_mm("remove splited 64K shadow entries\n"); 1540 for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { 1541 ops->clear_64k_splited(&old_se); 1542 ops->set_pfn(&old_se, 1543 vgpu->gtt.scratch_pt[type].page_mfn); 1544 ppgtt_set_shadow_entry(spt, &old_se, index + i); 1545 } 1546 } else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY || 1547 old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { 1548 ops->clear_pse(&old_se); 1549 ops->set_pfn(&old_se, 1550 vgpu->gtt.scratch_pt[type].page_mfn); 1551 ppgtt_set_shadow_entry(spt, &old_se, index); 1552 } else { 1553 ops->set_pfn(&old_se, 1554 vgpu->gtt.scratch_pt[type].page_mfn); 1555 ppgtt_set_shadow_entry(spt, &old_se, index); 1556 } 1557 } 1558 1559 return 0; 1560 fail: 1561 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n", 1562 spt, we->val64, we->type); 1563 return ret; 1564 } 1565 1566 1567 1568 static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt) 1569 { 1570 return enable_out_of_sync 1571 && gtt_type_is_pte_pt(spt->guest_page.type) 1572 && spt->guest_page.write_cnt >= 2; 1573 } 1574 1575 static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt, 1576 unsigned long index) 1577 { 1578 set_bit(index, spt->post_shadow_bitmap); 1579 if (!list_empty(&spt->post_shadow_list)) 1580 return; 1581 1582 list_add_tail(&spt->post_shadow_list, 1583 &spt->vgpu->gtt.post_shadow_list_head); 1584 } 1585 1586 /** 1587 * intel_vgpu_flush_post_shadow - flush the post shadow transactions 1588 * @vgpu: a vGPU 1589 * 1590 * This function is called before submitting a guest workload to host, 1591 * to flush all the post shadows for a vGPU. 1592 * 1593 * Returns: 1594 * Zero on success, negative error code if failed. 1595 */ 1596 int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) 1597 { 1598 struct list_head *pos, *n; 1599 struct intel_vgpu_ppgtt_spt *spt; 1600 struct intel_gvt_gtt_entry ge; 1601 unsigned long index; 1602 int ret; 1603 1604 list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) { 1605 spt = container_of(pos, struct intel_vgpu_ppgtt_spt, 1606 post_shadow_list); 1607 1608 for_each_set_bit(index, spt->post_shadow_bitmap, 1609 GTT_ENTRY_NUM_IN_ONE_PAGE) { 1610 ppgtt_get_guest_entry(spt, &ge, index); 1611 1612 ret = ppgtt_handle_guest_write_page_table(spt, 1613 &ge, index); 1614 if (ret) 1615 return ret; 1616 clear_bit(index, spt->post_shadow_bitmap); 1617 } 1618 list_del_init(&spt->post_shadow_list); 1619 } 1620 return 0; 1621 } 1622 1623 static int ppgtt_handle_guest_write_page_table_bytes( 1624 struct intel_vgpu_ppgtt_spt *spt, 1625 u64 pa, void *p_data, int bytes) 1626 { 1627 struct intel_vgpu *vgpu = spt->vgpu; 1628 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1629 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1630 struct intel_gvt_gtt_entry we, se; 1631 unsigned long index; 1632 int ret; 1633 1634 index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift; 1635 1636 ppgtt_get_guest_entry(spt, &we, index); 1637 1638 /* 1639 * For page table which has 64K gtt entry, only PTE#0, PTE#16, 1640 * PTE#32, ... PTE#496 are used. Unused PTEs update should be 1641 * ignored. 1642 */ 1643 if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY && 1644 (index % GTT_64K_PTE_STRIDE)) { 1645 gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n", 1646 index); 1647 return 0; 1648 } 1649 1650 if (bytes == info->gtt_entry_size) { 1651 ret = ppgtt_handle_guest_write_page_table(spt, &we, index); 1652 if (ret) 1653 return ret; 1654 } else { 1655 if (!test_bit(index, spt->post_shadow_bitmap)) { 1656 int type = spt->shadow_page.type; 1657 1658 ppgtt_get_shadow_entry(spt, &se, index); 1659 ret = ppgtt_handle_guest_entry_removal(spt, &se, index); 1660 if (ret) 1661 return ret; 1662 ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn); 1663 ppgtt_set_shadow_entry(spt, &se, index); 1664 } 1665 ppgtt_set_post_shadow(spt, index); 1666 } 1667 1668 if (!enable_out_of_sync) 1669 return 0; 1670 1671 spt->guest_page.write_cnt++; 1672 1673 if (spt->guest_page.oos_page) 1674 ops->set_entry(spt->guest_page.oos_page->mem, &we, index, 1675 false, 0, vgpu); 1676 1677 if (can_do_out_of_sync(spt)) { 1678 if (!spt->guest_page.oos_page) 1679 ppgtt_allocate_oos_page(spt); 1680 1681 ret = ppgtt_set_guest_page_oos(spt); 1682 if (ret < 0) 1683 return ret; 1684 } 1685 return 0; 1686 } 1687 1688 static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm) 1689 { 1690 struct intel_vgpu *vgpu = mm->vgpu; 1691 struct intel_gvt *gvt = vgpu->gvt; 1692 struct intel_gvt_gtt *gtt = &gvt->gtt; 1693 const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; 1694 struct intel_gvt_gtt_entry se; 1695 int index; 1696 1697 if (!mm->ppgtt_mm.shadowed) 1698 return; 1699 1700 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) { 1701 ppgtt_get_shadow_root_entry(mm, &se, index); 1702 1703 if (!ops->test_present(&se)) 1704 continue; 1705 1706 ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se); 1707 se.val64 = 0; 1708 ppgtt_set_shadow_root_entry(mm, &se, index); 1709 1710 trace_spt_guest_change(vgpu->id, "destroy root pointer", 1711 NULL, se.type, se.val64, index); 1712 } 1713 1714 mm->ppgtt_mm.shadowed = false; 1715 } 1716 1717 1718 static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm) 1719 { 1720 struct intel_vgpu *vgpu = mm->vgpu; 1721 struct intel_gvt *gvt = vgpu->gvt; 1722 struct intel_gvt_gtt *gtt = &gvt->gtt; 1723 const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; 1724 struct intel_vgpu_ppgtt_spt *spt; 1725 struct intel_gvt_gtt_entry ge, se; 1726 int index, ret; 1727 1728 if (mm->ppgtt_mm.shadowed) 1729 return 0; 1730 1731 if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) 1732 return -EINVAL; 1733 1734 mm->ppgtt_mm.shadowed = true; 1735 1736 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) { 1737 ppgtt_get_guest_root_entry(mm, &ge, index); 1738 1739 if (!ops->test_present(&ge)) 1740 continue; 1741 1742 trace_spt_guest_change(vgpu->id, __func__, NULL, 1743 ge.type, ge.val64, index); 1744 1745 spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge); 1746 if (IS_ERR(spt)) { 1747 gvt_vgpu_err("fail to populate guest root pointer\n"); 1748 ret = PTR_ERR(spt); 1749 goto fail; 1750 } 1751 ppgtt_generate_shadow_entry(&se, spt, &ge); 1752 ppgtt_set_shadow_root_entry(mm, &se, index); 1753 1754 trace_spt_guest_change(vgpu->id, "populate root pointer", 1755 NULL, se.type, se.val64, index); 1756 } 1757 1758 return 0; 1759 fail: 1760 invalidate_ppgtt_mm(mm); 1761 return ret; 1762 } 1763 1764 static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu) 1765 { 1766 struct intel_vgpu_mm *mm; 1767 1768 mm = kzalloc(sizeof(*mm), GFP_KERNEL); 1769 if (!mm) 1770 return NULL; 1771 1772 mm->vgpu = vgpu; 1773 kref_init(&mm->ref); 1774 atomic_set(&mm->pincount, 0); 1775 1776 return mm; 1777 } 1778 1779 static void vgpu_free_mm(struct intel_vgpu_mm *mm) 1780 { 1781 kfree(mm); 1782 } 1783 1784 /** 1785 * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU 1786 * @vgpu: a vGPU 1787 * @root_entry_type: ppgtt root entry type 1788 * @pdps: guest pdps. 1789 * 1790 * This function is used to create a ppgtt mm object for a vGPU. 1791 * 1792 * Returns: 1793 * Zero on success, negative error code in pointer if failed. 1794 */ 1795 struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, 1796 enum intel_gvt_gtt_type root_entry_type, u64 pdps[]) 1797 { 1798 struct intel_gvt *gvt = vgpu->gvt; 1799 struct intel_vgpu_mm *mm; 1800 int ret; 1801 1802 mm = vgpu_alloc_mm(vgpu); 1803 if (!mm) 1804 return ERR_PTR(-ENOMEM); 1805 1806 mm->type = INTEL_GVT_MM_PPGTT; 1807 1808 GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY && 1809 root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY); 1810 mm->ppgtt_mm.root_entry_type = root_entry_type; 1811 1812 INIT_LIST_HEAD(&mm->ppgtt_mm.list); 1813 INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list); 1814 INIT_LIST_HEAD(&mm->ppgtt_mm.link); 1815 1816 if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) 1817 mm->ppgtt_mm.guest_pdps[0] = pdps[0]; 1818 else 1819 memcpy(mm->ppgtt_mm.guest_pdps, pdps, 1820 sizeof(mm->ppgtt_mm.guest_pdps)); 1821 1822 ret = shadow_ppgtt_mm(mm); 1823 if (ret) { 1824 gvt_vgpu_err("failed to shadow ppgtt mm\n"); 1825 vgpu_free_mm(mm); 1826 return ERR_PTR(ret); 1827 } 1828 1829 list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head); 1830 1831 mutex_lock(&gvt->gtt.ppgtt_mm_lock); 1832 list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head); 1833 mutex_unlock(&gvt->gtt.ppgtt_mm_lock); 1834 1835 return mm; 1836 } 1837 1838 static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu) 1839 { 1840 struct intel_vgpu_mm *mm; 1841 unsigned long nr_entries; 1842 1843 mm = vgpu_alloc_mm(vgpu); 1844 if (!mm) 1845 return ERR_PTR(-ENOMEM); 1846 1847 mm->type = INTEL_GVT_MM_GGTT; 1848 1849 nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT; 1850 mm->ggtt_mm.virtual_ggtt = 1851 vzalloc(array_size(nr_entries, 1852 vgpu->gvt->device_info.gtt_entry_size)); 1853 if (!mm->ggtt_mm.virtual_ggtt) { 1854 vgpu_free_mm(mm); 1855 return ERR_PTR(-ENOMEM); 1856 } 1857 1858 mm->ggtt_mm.host_ggtt_aperture = vzalloc((vgpu_aperture_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64)); 1859 if (!mm->ggtt_mm.host_ggtt_aperture) { 1860 vfree(mm->ggtt_mm.virtual_ggtt); 1861 vgpu_free_mm(mm); 1862 return ERR_PTR(-ENOMEM); 1863 } 1864 1865 mm->ggtt_mm.host_ggtt_hidden = vzalloc((vgpu_hidden_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64)); 1866 if (!mm->ggtt_mm.host_ggtt_hidden) { 1867 vfree(mm->ggtt_mm.host_ggtt_aperture); 1868 vfree(mm->ggtt_mm.virtual_ggtt); 1869 vgpu_free_mm(mm); 1870 return ERR_PTR(-ENOMEM); 1871 } 1872 1873 return mm; 1874 } 1875 1876 /** 1877 * _intel_vgpu_mm_release - destroy a mm object 1878 * @mm_ref: a kref object 1879 * 1880 * This function is used to destroy a mm object for vGPU 1881 * 1882 */ 1883 void _intel_vgpu_mm_release(struct kref *mm_ref) 1884 { 1885 struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref); 1886 1887 if (GEM_WARN_ON(atomic_read(&mm->pincount))) 1888 gvt_err("vgpu mm pin count bug detected\n"); 1889 1890 if (mm->type == INTEL_GVT_MM_PPGTT) { 1891 list_del(&mm->ppgtt_mm.list); 1892 1893 mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); 1894 list_del(&mm->ppgtt_mm.lru_list); 1895 mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); 1896 1897 invalidate_ppgtt_mm(mm); 1898 } else { 1899 vfree(mm->ggtt_mm.virtual_ggtt); 1900 vfree(mm->ggtt_mm.host_ggtt_aperture); 1901 vfree(mm->ggtt_mm.host_ggtt_hidden); 1902 } 1903 1904 vgpu_free_mm(mm); 1905 } 1906 1907 /** 1908 * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object 1909 * @mm: a vGPU mm object 1910 * 1911 * This function is called when user doesn't want to use a vGPU mm object 1912 */ 1913 void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm) 1914 { 1915 atomic_dec_if_positive(&mm->pincount); 1916 } 1917 1918 /** 1919 * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object 1920 * @mm: target vgpu mm 1921 * 1922 * This function is called when user wants to use a vGPU mm object. If this 1923 * mm object hasn't been shadowed yet, the shadow will be populated at this 1924 * time. 1925 * 1926 * Returns: 1927 * Zero on success, negative error code if failed. 1928 */ 1929 int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm) 1930 { 1931 int ret; 1932 1933 atomic_inc(&mm->pincount); 1934 1935 if (mm->type == INTEL_GVT_MM_PPGTT) { 1936 ret = shadow_ppgtt_mm(mm); 1937 if (ret) 1938 return ret; 1939 1940 mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); 1941 list_move_tail(&mm->ppgtt_mm.lru_list, 1942 &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head); 1943 mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); 1944 } 1945 1946 return 0; 1947 } 1948 1949 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt) 1950 { 1951 struct intel_vgpu_mm *mm; 1952 struct list_head *pos, *n; 1953 1954 mutex_lock(&gvt->gtt.ppgtt_mm_lock); 1955 1956 list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) { 1957 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list); 1958 1959 if (atomic_read(&mm->pincount)) 1960 continue; 1961 1962 list_del_init(&mm->ppgtt_mm.lru_list); 1963 mutex_unlock(&gvt->gtt.ppgtt_mm_lock); 1964 invalidate_ppgtt_mm(mm); 1965 return 1; 1966 } 1967 mutex_unlock(&gvt->gtt.ppgtt_mm_lock); 1968 return 0; 1969 } 1970 1971 /* 1972 * GMA translation APIs. 1973 */ 1974 static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm, 1975 struct intel_gvt_gtt_entry *e, unsigned long index, bool guest) 1976 { 1977 struct intel_vgpu *vgpu = mm->vgpu; 1978 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1979 struct intel_vgpu_ppgtt_spt *s; 1980 1981 s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e)); 1982 if (!s) 1983 return -ENXIO; 1984 1985 if (!guest) 1986 ppgtt_get_shadow_entry(s, e, index); 1987 else 1988 ppgtt_get_guest_entry(s, e, index); 1989 return 0; 1990 } 1991 1992 /** 1993 * intel_vgpu_gma_to_gpa - translate a gma to GPA 1994 * @mm: mm object. could be a PPGTT or GGTT mm object 1995 * @gma: graphics memory address in this mm object 1996 * 1997 * This function is used to translate a graphics memory address in specific 1998 * graphics memory space to guest physical address. 1999 * 2000 * Returns: 2001 * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed. 2002 */ 2003 unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) 2004 { 2005 struct intel_vgpu *vgpu = mm->vgpu; 2006 struct intel_gvt *gvt = vgpu->gvt; 2007 const struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops; 2008 const struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops; 2009 unsigned long gpa = INTEL_GVT_INVALID_ADDR; 2010 unsigned long gma_index[4]; 2011 struct intel_gvt_gtt_entry e; 2012 int i, levels = 0; 2013 int ret; 2014 2015 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT && 2016 mm->type != INTEL_GVT_MM_PPGTT); 2017 2018 if (mm->type == INTEL_GVT_MM_GGTT) { 2019 if (!vgpu_gmadr_is_valid(vgpu, gma)) 2020 goto err; 2021 2022 ggtt_get_guest_entry(mm, &e, 2023 gma_ops->gma_to_ggtt_pte_index(gma)); 2024 2025 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) 2026 + (gma & ~I915_GTT_PAGE_MASK); 2027 2028 trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa); 2029 } else { 2030 switch (mm->ppgtt_mm.root_entry_type) { 2031 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY: 2032 ppgtt_get_shadow_root_entry(mm, &e, 0); 2033 2034 gma_index[0] = gma_ops->gma_to_pml4_index(gma); 2035 gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma); 2036 gma_index[2] = gma_ops->gma_to_pde_index(gma); 2037 gma_index[3] = gma_ops->gma_to_pte_index(gma); 2038 levels = 4; 2039 break; 2040 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY: 2041 ppgtt_get_shadow_root_entry(mm, &e, 2042 gma_ops->gma_to_l3_pdp_index(gma)); 2043 2044 gma_index[0] = gma_ops->gma_to_pde_index(gma); 2045 gma_index[1] = gma_ops->gma_to_pte_index(gma); 2046 levels = 2; 2047 break; 2048 default: 2049 GEM_BUG_ON(1); 2050 } 2051 2052 /* walk the shadow page table and get gpa from guest entry */ 2053 for (i = 0; i < levels; i++) { 2054 ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i], 2055 (i == levels - 1)); 2056 if (ret) 2057 goto err; 2058 2059 if (!pte_ops->test_present(&e)) { 2060 gvt_dbg_core("GMA 0x%lx is not present\n", gma); 2061 goto err; 2062 } 2063 } 2064 2065 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) + 2066 (gma & ~I915_GTT_PAGE_MASK); 2067 trace_gma_translate(vgpu->id, "ppgtt", 0, 2068 mm->ppgtt_mm.root_entry_type, gma, gpa); 2069 } 2070 2071 return gpa; 2072 err: 2073 gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma); 2074 return INTEL_GVT_INVALID_ADDR; 2075 } 2076 2077 static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, 2078 unsigned int off, void *p_data, unsigned int bytes) 2079 { 2080 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; 2081 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 2082 unsigned long index = off >> info->gtt_entry_size_shift; 2083 unsigned long gma; 2084 struct intel_gvt_gtt_entry e; 2085 2086 if (bytes != 4 && bytes != 8) 2087 return -EINVAL; 2088 2089 gma = index << I915_GTT_PAGE_SHIFT; 2090 if (!intel_gvt_ggtt_validate_range(vgpu, 2091 gma, 1 << I915_GTT_PAGE_SHIFT)) { 2092 gvt_dbg_mm("read invalid ggtt at 0x%lx\n", gma); 2093 memset(p_data, 0, bytes); 2094 return 0; 2095 } 2096 2097 ggtt_get_guest_entry(ggtt_mm, &e, index); 2098 memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)), 2099 bytes); 2100 return 0; 2101 } 2102 2103 /** 2104 * intel_vgpu_emulate_ggtt_mmio_read - emulate GTT MMIO register read 2105 * @vgpu: a vGPU 2106 * @off: register offset 2107 * @p_data: data will be returned to guest 2108 * @bytes: data length 2109 * 2110 * This function is used to emulate the GTT MMIO register read 2111 * 2112 * Returns: 2113 * Zero on success, error code if failed. 2114 */ 2115 int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, 2116 void *p_data, unsigned int bytes) 2117 { 2118 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 2119 int ret; 2120 2121 if (bytes != 4 && bytes != 8) 2122 return -EINVAL; 2123 2124 off -= info->gtt_start_offset; 2125 ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes); 2126 return ret; 2127 } 2128 2129 static void ggtt_invalidate_pte(struct intel_vgpu *vgpu, 2130 struct intel_gvt_gtt_entry *entry) 2131 { 2132 const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; 2133 unsigned long pfn; 2134 2135 pfn = pte_ops->get_pfn(entry); 2136 if (pfn != vgpu->gvt->gtt.scratch_mfn) 2137 intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT); 2138 } 2139 2140 static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, 2141 void *p_data, unsigned int bytes) 2142 { 2143 struct intel_gvt *gvt = vgpu->gvt; 2144 const struct intel_gvt_device_info *info = &gvt->device_info; 2145 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; 2146 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 2147 unsigned long g_gtt_index = off >> info->gtt_entry_size_shift; 2148 unsigned long gma, gfn; 2149 struct intel_gvt_gtt_entry e = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE}; 2150 struct intel_gvt_gtt_entry m = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE}; 2151 dma_addr_t dma_addr; 2152 int ret; 2153 struct intel_gvt_partial_pte *partial_pte, *pos, *n; 2154 bool partial_update = false; 2155 2156 if (bytes != 4 && bytes != 8) 2157 return -EINVAL; 2158 2159 gma = g_gtt_index << I915_GTT_PAGE_SHIFT; 2160 2161 /* the VM may configure the whole GM space when ballooning is used */ 2162 if (!vgpu_gmadr_is_valid(vgpu, gma)) 2163 return 0; 2164 2165 e.type = GTT_TYPE_GGTT_PTE; 2166 memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data, 2167 bytes); 2168 2169 /* If ggtt entry size is 8 bytes, and it's split into two 4 bytes 2170 * write, save the first 4 bytes in a list and update virtual 2171 * PTE. Only update shadow PTE when the second 4 bytes comes. 2172 */ 2173 if (bytes < info->gtt_entry_size) { 2174 bool found = false; 2175 2176 list_for_each_entry_safe(pos, n, 2177 &ggtt_mm->ggtt_mm.partial_pte_list, list) { 2178 if (g_gtt_index == pos->offset >> 2179 info->gtt_entry_size_shift) { 2180 if (off != pos->offset) { 2181 /* the second partial part*/ 2182 int last_off = pos->offset & 2183 (info->gtt_entry_size - 1); 2184 2185 memcpy((void *)&e.val64 + last_off, 2186 (void *)&pos->data + last_off, 2187 bytes); 2188 2189 list_del(&pos->list); 2190 kfree(pos); 2191 found = true; 2192 break; 2193 } 2194 2195 /* update of the first partial part */ 2196 pos->data = e.val64; 2197 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); 2198 return 0; 2199 } 2200 } 2201 2202 if (!found) { 2203 /* the first partial part */ 2204 partial_pte = kzalloc(sizeof(*partial_pte), GFP_KERNEL); 2205 if (!partial_pte) 2206 return -ENOMEM; 2207 partial_pte->offset = off; 2208 partial_pte->data = e.val64; 2209 list_add_tail(&partial_pte->list, 2210 &ggtt_mm->ggtt_mm.partial_pte_list); 2211 partial_update = true; 2212 } 2213 } 2214 2215 if (!partial_update && (ops->test_present(&e))) { 2216 gfn = ops->get_pfn(&e); 2217 m.val64 = e.val64; 2218 m.type = e.type; 2219 2220 ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, 2221 &dma_addr); 2222 if (ret) { 2223 gvt_vgpu_err("fail to populate guest ggtt entry\n"); 2224 /* guest driver may read/write the entry when partial 2225 * update the entry in this situation p2m will fail 2226 * setting the shadow entry to point to a scratch page 2227 */ 2228 ops->set_pfn(&m, gvt->gtt.scratch_mfn); 2229 } else 2230 ops->set_pfn(&m, dma_addr >> PAGE_SHIFT); 2231 } else { 2232 ops->set_pfn(&m, gvt->gtt.scratch_mfn); 2233 ops->clear_present(&m); 2234 } 2235 2236 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); 2237 2238 ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index); 2239 ggtt_invalidate_pte(vgpu, &e); 2240 2241 ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index); 2242 ggtt_invalidate(gvt->gt); 2243 return 0; 2244 } 2245 2246 /* 2247 * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write 2248 * @vgpu: a vGPU 2249 * @off: register offset 2250 * @p_data: data from guest write 2251 * @bytes: data length 2252 * 2253 * This function is used to emulate the GTT MMIO register write 2254 * 2255 * Returns: 2256 * Zero on success, error code if failed. 2257 */ 2258 int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, 2259 unsigned int off, void *p_data, unsigned int bytes) 2260 { 2261 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 2262 int ret; 2263 struct intel_vgpu_submission *s = &vgpu->submission; 2264 struct intel_engine_cs *engine; 2265 int i; 2266 2267 if (bytes != 4 && bytes != 8) 2268 return -EINVAL; 2269 2270 off -= info->gtt_start_offset; 2271 ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes); 2272 2273 /* if ggtt of last submitted context is written, 2274 * that context is probably got unpinned. 2275 * Set last shadowed ctx to invalid. 2276 */ 2277 for_each_engine(engine, vgpu->gvt->gt, i) { 2278 if (!s->last_ctx[i].valid) 2279 continue; 2280 2281 if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift)) 2282 s->last_ctx[i].valid = false; 2283 } 2284 return ret; 2285 } 2286 2287 static int alloc_scratch_pages(struct intel_vgpu *vgpu, 2288 enum intel_gvt_gtt_type type) 2289 { 2290 struct drm_i915_private *i915 = vgpu->gvt->gt->i915; 2291 struct intel_vgpu_gtt *gtt = &vgpu->gtt; 2292 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 2293 int page_entry_num = I915_GTT_PAGE_SIZE >> 2294 vgpu->gvt->device_info.gtt_entry_size_shift; 2295 void *scratch_pt; 2296 int i; 2297 struct device *dev = vgpu->gvt->gt->i915->drm.dev; 2298 dma_addr_t daddr; 2299 2300 if (drm_WARN_ON(&i915->drm, 2301 type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX)) 2302 return -EINVAL; 2303 2304 scratch_pt = (void *)get_zeroed_page(GFP_KERNEL); 2305 if (!scratch_pt) { 2306 gvt_vgpu_err("fail to allocate scratch page\n"); 2307 return -ENOMEM; 2308 } 2309 2310 daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, 4096, DMA_BIDIRECTIONAL); 2311 if (dma_mapping_error(dev, daddr)) { 2312 gvt_vgpu_err("fail to dmamap scratch_pt\n"); 2313 __free_page(virt_to_page(scratch_pt)); 2314 return -ENOMEM; 2315 } 2316 gtt->scratch_pt[type].page_mfn = 2317 (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); 2318 gtt->scratch_pt[type].page = virt_to_page(scratch_pt); 2319 gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n", 2320 vgpu->id, type, gtt->scratch_pt[type].page_mfn); 2321 2322 /* Build the tree by full filled the scratch pt with the entries which 2323 * point to the next level scratch pt or scratch page. The 2324 * scratch_pt[type] indicate the scratch pt/scratch page used by the 2325 * 'type' pt. 2326 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by 2327 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self 2328 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn. 2329 */ 2330 if (type > GTT_TYPE_PPGTT_PTE_PT) { 2331 struct intel_gvt_gtt_entry se; 2332 2333 memset(&se, 0, sizeof(struct intel_gvt_gtt_entry)); 2334 se.type = get_entry_type(type - 1); 2335 ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn); 2336 2337 /* The entry parameters like present/writeable/cache type 2338 * set to the same as i915's scratch page tree. 2339 */ 2340 se.val64 |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW; 2341 if (type == GTT_TYPE_PPGTT_PDE_PT) 2342 se.val64 |= PPAT_CACHED; 2343 2344 for (i = 0; i < page_entry_num; i++) 2345 ops->set_entry(scratch_pt, &se, i, false, 0, vgpu); 2346 } 2347 2348 return 0; 2349 } 2350 2351 static int release_scratch_page_tree(struct intel_vgpu *vgpu) 2352 { 2353 int i; 2354 struct device *dev = vgpu->gvt->gt->i915->drm.dev; 2355 dma_addr_t daddr; 2356 2357 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { 2358 if (vgpu->gtt.scratch_pt[i].page != NULL) { 2359 daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn << 2360 I915_GTT_PAGE_SHIFT); 2361 dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL); 2362 __free_page(vgpu->gtt.scratch_pt[i].page); 2363 vgpu->gtt.scratch_pt[i].page = NULL; 2364 vgpu->gtt.scratch_pt[i].page_mfn = 0; 2365 } 2366 } 2367 2368 return 0; 2369 } 2370 2371 static int create_scratch_page_tree(struct intel_vgpu *vgpu) 2372 { 2373 int i, ret; 2374 2375 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { 2376 ret = alloc_scratch_pages(vgpu, i); 2377 if (ret) 2378 goto err; 2379 } 2380 2381 return 0; 2382 2383 err: 2384 release_scratch_page_tree(vgpu); 2385 return ret; 2386 } 2387 2388 /** 2389 * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization 2390 * @vgpu: a vGPU 2391 * 2392 * This function is used to initialize per-vGPU graphics memory virtualization 2393 * components. 2394 * 2395 * Returns: 2396 * Zero on success, error code if failed. 2397 */ 2398 int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) 2399 { 2400 struct intel_vgpu_gtt *gtt = &vgpu->gtt; 2401 2402 INIT_RADIX_TREE(>t->spt_tree, GFP_KERNEL); 2403 2404 INIT_LIST_HEAD(>t->ppgtt_mm_list_head); 2405 INIT_LIST_HEAD(>t->oos_page_list_head); 2406 INIT_LIST_HEAD(>t->post_shadow_list_head); 2407 2408 gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu); 2409 if (IS_ERR(gtt->ggtt_mm)) { 2410 gvt_vgpu_err("fail to create mm for ggtt.\n"); 2411 return PTR_ERR(gtt->ggtt_mm); 2412 } 2413 2414 intel_vgpu_reset_ggtt(vgpu, false); 2415 2416 INIT_LIST_HEAD(>t->ggtt_mm->ggtt_mm.partial_pte_list); 2417 2418 return create_scratch_page_tree(vgpu); 2419 } 2420 2421 void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) 2422 { 2423 struct list_head *pos, *n; 2424 struct intel_vgpu_mm *mm; 2425 2426 list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { 2427 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); 2428 intel_vgpu_destroy_mm(mm); 2429 } 2430 2431 if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head))) 2432 gvt_err("vgpu ppgtt mm is not fully destroyed\n"); 2433 2434 if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) { 2435 gvt_err("Why we still has spt not freed?\n"); 2436 ppgtt_free_all_spt(vgpu); 2437 } 2438 } 2439 2440 static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu) 2441 { 2442 struct intel_gvt_partial_pte *pos, *next; 2443 2444 list_for_each_entry_safe(pos, next, 2445 &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list, 2446 list) { 2447 gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n", 2448 pos->offset, pos->data); 2449 kfree(pos); 2450 } 2451 intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm); 2452 vgpu->gtt.ggtt_mm = NULL; 2453 } 2454 2455 /** 2456 * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization 2457 * @vgpu: a vGPU 2458 * 2459 * This function is used to clean up per-vGPU graphics memory virtualization 2460 * components. 2461 * 2462 * Returns: 2463 * Zero on success, error code if failed. 2464 */ 2465 void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu) 2466 { 2467 intel_vgpu_destroy_all_ppgtt_mm(vgpu); 2468 intel_vgpu_destroy_ggtt_mm(vgpu); 2469 release_scratch_page_tree(vgpu); 2470 } 2471 2472 static void clean_spt_oos(struct intel_gvt *gvt) 2473 { 2474 struct intel_gvt_gtt *gtt = &gvt->gtt; 2475 struct list_head *pos, *n; 2476 struct intel_vgpu_oos_page *oos_page; 2477 2478 WARN(!list_empty(>t->oos_page_use_list_head), 2479 "someone is still using oos page\n"); 2480 2481 list_for_each_safe(pos, n, >t->oos_page_free_list_head) { 2482 oos_page = container_of(pos, struct intel_vgpu_oos_page, list); 2483 list_del(&oos_page->list); 2484 free_page((unsigned long)oos_page->mem); 2485 kfree(oos_page); 2486 } 2487 } 2488 2489 static int setup_spt_oos(struct intel_gvt *gvt) 2490 { 2491 struct intel_gvt_gtt *gtt = &gvt->gtt; 2492 struct intel_vgpu_oos_page *oos_page; 2493 int i; 2494 int ret; 2495 2496 INIT_LIST_HEAD(>t->oos_page_free_list_head); 2497 INIT_LIST_HEAD(>t->oos_page_use_list_head); 2498 2499 for (i = 0; i < preallocated_oos_pages; i++) { 2500 oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL); 2501 if (!oos_page) { 2502 ret = -ENOMEM; 2503 goto fail; 2504 } 2505 oos_page->mem = (void *)__get_free_pages(GFP_KERNEL, 0); 2506 if (!oos_page->mem) { 2507 ret = -ENOMEM; 2508 kfree(oos_page); 2509 goto fail; 2510 } 2511 2512 INIT_LIST_HEAD(&oos_page->list); 2513 INIT_LIST_HEAD(&oos_page->vm_list); 2514 oos_page->id = i; 2515 list_add_tail(&oos_page->list, >t->oos_page_free_list_head); 2516 } 2517 2518 gvt_dbg_mm("%d oos pages preallocated\n", i); 2519 2520 return 0; 2521 fail: 2522 clean_spt_oos(gvt); 2523 return ret; 2524 } 2525 2526 /** 2527 * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object 2528 * @vgpu: a vGPU 2529 * @pdps: pdp root array 2530 * 2531 * This function is used to find a PPGTT mm object from mm object pool 2532 * 2533 * Returns: 2534 * pointer to mm object on success, NULL if failed. 2535 */ 2536 struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, 2537 u64 pdps[]) 2538 { 2539 struct intel_vgpu_mm *mm; 2540 struct list_head *pos; 2541 2542 list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) { 2543 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); 2544 2545 switch (mm->ppgtt_mm.root_entry_type) { 2546 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY: 2547 if (pdps[0] == mm->ppgtt_mm.guest_pdps[0]) 2548 return mm; 2549 break; 2550 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY: 2551 if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps, 2552 sizeof(mm->ppgtt_mm.guest_pdps))) 2553 return mm; 2554 break; 2555 default: 2556 GEM_BUG_ON(1); 2557 } 2558 } 2559 return NULL; 2560 } 2561 2562 /** 2563 * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object. 2564 * @vgpu: a vGPU 2565 * @root_entry_type: ppgtt root entry type 2566 * @pdps: guest pdps 2567 * 2568 * This function is used to find or create a PPGTT mm object from a guest. 2569 * 2570 * Returns: 2571 * Zero on success, negative error code if failed. 2572 */ 2573 struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu, 2574 enum intel_gvt_gtt_type root_entry_type, u64 pdps[]) 2575 { 2576 struct intel_vgpu_mm *mm; 2577 2578 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); 2579 if (mm) { 2580 intel_vgpu_mm_get(mm); 2581 } else { 2582 mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps); 2583 if (IS_ERR(mm)) 2584 gvt_vgpu_err("fail to create mm\n"); 2585 } 2586 return mm; 2587 } 2588 2589 /** 2590 * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object. 2591 * @vgpu: a vGPU 2592 * @pdps: guest pdps 2593 * 2594 * This function is used to find a PPGTT mm object from a guest and destroy it. 2595 * 2596 * Returns: 2597 * Zero on success, negative error code if failed. 2598 */ 2599 int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]) 2600 { 2601 struct intel_vgpu_mm *mm; 2602 2603 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); 2604 if (!mm) { 2605 gvt_vgpu_err("fail to find ppgtt instance.\n"); 2606 return -EINVAL; 2607 } 2608 intel_vgpu_mm_put(mm); 2609 return 0; 2610 } 2611 2612 /** 2613 * intel_gvt_init_gtt - initialize mm components of a GVT device 2614 * @gvt: GVT device 2615 * 2616 * This function is called at the initialization stage, to initialize 2617 * the mm components of a GVT device. 2618 * 2619 * Returns: 2620 * zero on success, negative error code if failed. 2621 */ 2622 int intel_gvt_init_gtt(struct intel_gvt *gvt) 2623 { 2624 int ret; 2625 void *page; 2626 struct device *dev = gvt->gt->i915->drm.dev; 2627 dma_addr_t daddr; 2628 2629 gvt_dbg_core("init gtt\n"); 2630 2631 gvt->gtt.pte_ops = &gen8_gtt_pte_ops; 2632 gvt->gtt.gma_ops = &gen8_gtt_gma_ops; 2633 2634 page = (void *)get_zeroed_page(GFP_KERNEL); 2635 if (!page) { 2636 gvt_err("fail to allocate scratch ggtt page\n"); 2637 return -ENOMEM; 2638 } 2639 2640 daddr = dma_map_page(dev, virt_to_page(page), 0, 2641 4096, DMA_BIDIRECTIONAL); 2642 if (dma_mapping_error(dev, daddr)) { 2643 gvt_err("fail to dmamap scratch ggtt page\n"); 2644 __free_page(virt_to_page(page)); 2645 return -ENOMEM; 2646 } 2647 2648 gvt->gtt.scratch_page = virt_to_page(page); 2649 gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); 2650 2651 if (enable_out_of_sync) { 2652 ret = setup_spt_oos(gvt); 2653 if (ret) { 2654 gvt_err("fail to initialize SPT oos\n"); 2655 dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL); 2656 __free_page(gvt->gtt.scratch_page); 2657 return ret; 2658 } 2659 } 2660 INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head); 2661 mutex_init(&gvt->gtt.ppgtt_mm_lock); 2662 return 0; 2663 } 2664 2665 /** 2666 * intel_gvt_clean_gtt - clean up mm components of a GVT device 2667 * @gvt: GVT device 2668 * 2669 * This function is called at the driver unloading stage, to clean up 2670 * the mm components of a GVT device. 2671 * 2672 */ 2673 void intel_gvt_clean_gtt(struct intel_gvt *gvt) 2674 { 2675 struct device *dev = gvt->gt->i915->drm.dev; 2676 dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn << 2677 I915_GTT_PAGE_SHIFT); 2678 2679 dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL); 2680 2681 __free_page(gvt->gtt.scratch_page); 2682 2683 if (enable_out_of_sync) 2684 clean_spt_oos(gvt); 2685 } 2686 2687 /** 2688 * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances 2689 * @vgpu: a vGPU 2690 * 2691 * This function is called when invalidate all PPGTT instances of a vGPU. 2692 * 2693 */ 2694 void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu) 2695 { 2696 struct list_head *pos, *n; 2697 struct intel_vgpu_mm *mm; 2698 2699 list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { 2700 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); 2701 if (mm->type == INTEL_GVT_MM_PPGTT) { 2702 mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock); 2703 list_del_init(&mm->ppgtt_mm.lru_list); 2704 mutex_unlock(&vgpu->gvt->gtt.ppgtt_mm_lock); 2705 if (mm->ppgtt_mm.shadowed) 2706 invalidate_ppgtt_mm(mm); 2707 } 2708 } 2709 } 2710 2711 /** 2712 * intel_vgpu_reset_ggtt - reset the GGTT entry 2713 * @vgpu: a vGPU 2714 * @invalidate_old: invalidate old entries 2715 * 2716 * This function is called at the vGPU create stage 2717 * to reset all the GGTT entries. 2718 * 2719 */ 2720 void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old) 2721 { 2722 struct intel_gvt *gvt = vgpu->gvt; 2723 const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; 2724 struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE}; 2725 struct intel_gvt_gtt_entry old_entry; 2726 u32 index; 2727 u32 num_entries; 2728 2729 pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn); 2730 pte_ops->set_present(&entry); 2731 2732 index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; 2733 num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT; 2734 while (num_entries--) { 2735 if (invalidate_old) { 2736 ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index); 2737 ggtt_invalidate_pte(vgpu, &old_entry); 2738 } 2739 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); 2740 } 2741 2742 index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; 2743 num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; 2744 while (num_entries--) { 2745 if (invalidate_old) { 2746 ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index); 2747 ggtt_invalidate_pte(vgpu, &old_entry); 2748 } 2749 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); 2750 } 2751 2752 ggtt_invalidate(gvt->gt); 2753 } 2754 2755 /** 2756 * intel_gvt_restore_ggtt - restore all vGPU's ggtt entries 2757 * @gvt: intel gvt device 2758 * 2759 * This function is called at driver resume stage to restore 2760 * GGTT entries of every vGPU. 2761 * 2762 */ 2763 void intel_gvt_restore_ggtt(struct intel_gvt *gvt) 2764 { 2765 struct intel_vgpu *vgpu; 2766 struct intel_vgpu_mm *mm; 2767 int id; 2768 gen8_pte_t pte; 2769 u32 idx, num_low, num_hi, offset; 2770 2771 /* Restore dirty host ggtt for all vGPUs */ 2772 idr_for_each_entry(&(gvt)->vgpu_idr, vgpu, id) { 2773 mm = vgpu->gtt.ggtt_mm; 2774 2775 num_low = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT; 2776 offset = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; 2777 for (idx = 0; idx < num_low; idx++) { 2778 pte = mm->ggtt_mm.host_ggtt_aperture[idx]; 2779 if (pte & GEN8_PAGE_PRESENT) 2780 write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte); 2781 } 2782 2783 num_hi = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; 2784 offset = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; 2785 for (idx = 0; idx < num_hi; idx++) { 2786 pte = mm->ggtt_mm.host_ggtt_hidden[idx]; 2787 if (pte & GEN8_PAGE_PRESENT) 2788 write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte); 2789 } 2790 } 2791 } 2792