1 /* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * KVM/MIPS MMU handling in the KVM module. 7 * 8 * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. 9 * Authors: Sanjay Lal <sanjayl@kymasys.com> 10 */ 11 12 #include <linux/highmem.h> 13 #include <linux/kvm_host.h> 14 #include <linux/uaccess.h> 15 #include <asm/mmu_context.h> 16 #include <asm/pgalloc.h> 17 18 /* 19 * KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels 20 * for which pages need to be cached. 21 */ 22 #if defined(__PAGETABLE_PMD_FOLDED) 23 #define KVM_MMU_CACHE_MIN_PAGES 1 24 #else 25 #define KVM_MMU_CACHE_MIN_PAGES 2 26 #endif 27 28 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) 29 { 30 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); 31 } 32 33 /** 34 * kvm_pgd_init() - Initialise KVM GPA page directory. 35 * @page: Pointer to page directory (PGD) for KVM GPA. 36 * 37 * Initialise a KVM GPA page directory with pointers to the invalid table, i.e. 38 * representing no mappings. This is similar to pgd_init(), however it 39 * initialises all the page directory pointers, not just the ones corresponding 40 * to the userland address space (since it is for the guest physical address 41 * space rather than a virtual address space). 42 */ 43 static void kvm_pgd_init(void *page) 44 { 45 unsigned long *p, *end; 46 unsigned long entry; 47 48 #ifdef __PAGETABLE_PMD_FOLDED 49 entry = (unsigned long)invalid_pte_table; 50 #else 51 entry = (unsigned long)invalid_pmd_table; 52 #endif 53 54 p = (unsigned long *)page; 55 end = p + PTRS_PER_PGD; 56 57 do { 58 p[0] = entry; 59 p[1] = entry; 60 p[2] = entry; 61 p[3] = entry; 62 p[4] = entry; 63 p += 8; 64 p[-3] = entry; 65 p[-2] = entry; 66 p[-1] = entry; 67 } while (p != end); 68 } 69 70 /** 71 * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory. 72 * 73 * Allocate a blank KVM GPA page directory (PGD) for representing guest physical 74 * to host physical page mappings. 75 * 76 * Returns: Pointer to new KVM GPA page directory. 77 * NULL on allocation failure. 78 */ 79 pgd_t *kvm_pgd_alloc(void) 80 { 81 pgd_t *ret; 82 83 ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD_ORDER); 84 if (ret) 85 kvm_pgd_init(ret); 86 87 return ret; 88 } 89 90 /** 91 * kvm_mips_walk_pgd() - Walk page table with optional allocation. 92 * @pgd: Page directory pointer. 93 * @addr: Address to index page table using. 94 * @cache: MMU page cache to allocate new page tables from, or NULL. 95 * 96 * Walk the page tables pointed to by @pgd to find the PTE corresponding to the 97 * address @addr. If page tables don't exist for @addr, they will be created 98 * from the MMU cache if @cache is not NULL. 99 * 100 * Returns: Pointer to pte_t corresponding to @addr. 101 * NULL if a page table doesn't exist for @addr and !@cache. 102 * NULL if a page table allocation failed. 103 */ 104 static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache, 105 unsigned long addr) 106 { 107 p4d_t *p4d; 108 pud_t *pud; 109 pmd_t *pmd; 110 111 pgd += pgd_index(addr); 112 if (pgd_none(*pgd)) { 113 /* Not used on MIPS yet */ 114 BUG(); 115 return NULL; 116 } 117 p4d = p4d_offset(pgd, addr); 118 pud = pud_offset(p4d, addr); 119 if (pud_none(*pud)) { 120 pmd_t *new_pmd; 121 122 if (!cache) 123 return NULL; 124 new_pmd = kvm_mmu_memory_cache_alloc(cache); 125 pmd_init((unsigned long)new_pmd, 126 (unsigned long)invalid_pte_table); 127 pud_populate(NULL, pud, new_pmd); 128 } 129 pmd = pmd_offset(pud, addr); 130 if (pmd_none(*pmd)) { 131 pte_t *new_pte; 132 133 if (!cache) 134 return NULL; 135 new_pte = kvm_mmu_memory_cache_alloc(cache); 136 clear_page(new_pte); 137 pmd_populate_kernel(NULL, pmd, new_pte); 138 } 139 return pte_offset_kernel(pmd, addr); 140 } 141 142 /* Caller must hold kvm->mm_lock */ 143 static pte_t *kvm_mips_pte_for_gpa(struct kvm *kvm, 144 struct kvm_mmu_memory_cache *cache, 145 unsigned long addr) 146 { 147 return kvm_mips_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr); 148 } 149 150 /* 151 * kvm_mips_flush_gpa_{pte,pmd,pud,pgd,pt}. 152 * Flush a range of guest physical address space from the VM's GPA page tables. 153 */ 154 155 static bool kvm_mips_flush_gpa_pte(pte_t *pte, unsigned long start_gpa, 156 unsigned long end_gpa) 157 { 158 int i_min = pte_index(start_gpa); 159 int i_max = pte_index(end_gpa); 160 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1); 161 int i; 162 163 for (i = i_min; i <= i_max; ++i) { 164 if (!pte_present(pte[i])) 165 continue; 166 167 set_pte(pte + i, __pte(0)); 168 } 169 return safe_to_remove; 170 } 171 172 static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa, 173 unsigned long end_gpa) 174 { 175 pte_t *pte; 176 unsigned long end = ~0ul; 177 int i_min = pmd_index(start_gpa); 178 int i_max = pmd_index(end_gpa); 179 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1); 180 int i; 181 182 for (i = i_min; i <= i_max; ++i, start_gpa = 0) { 183 if (!pmd_present(pmd[i])) 184 continue; 185 186 pte = pte_offset_kernel(pmd + i, 0); 187 if (i == i_max) 188 end = end_gpa; 189 190 if (kvm_mips_flush_gpa_pte(pte, start_gpa, end)) { 191 pmd_clear(pmd + i); 192 pte_free_kernel(NULL, pte); 193 } else { 194 safe_to_remove = false; 195 } 196 } 197 return safe_to_remove; 198 } 199 200 static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa, 201 unsigned long end_gpa) 202 { 203 pmd_t *pmd; 204 unsigned long end = ~0ul; 205 int i_min = pud_index(start_gpa); 206 int i_max = pud_index(end_gpa); 207 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1); 208 int i; 209 210 for (i = i_min; i <= i_max; ++i, start_gpa = 0) { 211 if (!pud_present(pud[i])) 212 continue; 213 214 pmd = pmd_offset(pud + i, 0); 215 if (i == i_max) 216 end = end_gpa; 217 218 if (kvm_mips_flush_gpa_pmd(pmd, start_gpa, end)) { 219 pud_clear(pud + i); 220 pmd_free(NULL, pmd); 221 } else { 222 safe_to_remove = false; 223 } 224 } 225 return safe_to_remove; 226 } 227 228 static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa, 229 unsigned long end_gpa) 230 { 231 p4d_t *p4d; 232 pud_t *pud; 233 unsigned long end = ~0ul; 234 int i_min = pgd_index(start_gpa); 235 int i_max = pgd_index(end_gpa); 236 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1); 237 int i; 238 239 for (i = i_min; i <= i_max; ++i, start_gpa = 0) { 240 if (!pgd_present(pgd[i])) 241 continue; 242 243 p4d = p4d_offset(pgd, 0); 244 pud = pud_offset(p4d + i, 0); 245 if (i == i_max) 246 end = end_gpa; 247 248 if (kvm_mips_flush_gpa_pud(pud, start_gpa, end)) { 249 pgd_clear(pgd + i); 250 pud_free(NULL, pud); 251 } else { 252 safe_to_remove = false; 253 } 254 } 255 return safe_to_remove; 256 } 257 258 /** 259 * kvm_mips_flush_gpa_pt() - Flush a range of guest physical addresses. 260 * @kvm: KVM pointer. 261 * @start_gfn: Guest frame number of first page in GPA range to flush. 262 * @end_gfn: Guest frame number of last page in GPA range to flush. 263 * 264 * Flushes a range of GPA mappings from the GPA page tables. 265 * 266 * The caller must hold the @kvm->mmu_lock spinlock. 267 * 268 * Returns: Whether its safe to remove the top level page directory because 269 * all lower levels have been removed. 270 */ 271 bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn) 272 { 273 return kvm_mips_flush_gpa_pgd(kvm->arch.gpa_mm.pgd, 274 start_gfn << PAGE_SHIFT, 275 end_gfn << PAGE_SHIFT); 276 } 277 278 #define BUILD_PTE_RANGE_OP(name, op) \ 279 static int kvm_mips_##name##_pte(pte_t *pte, unsigned long start, \ 280 unsigned long end) \ 281 { \ 282 int ret = 0; \ 283 int i_min = pte_index(start); \ 284 int i_max = pte_index(end); \ 285 int i; \ 286 pte_t old, new; \ 287 \ 288 for (i = i_min; i <= i_max; ++i) { \ 289 if (!pte_present(pte[i])) \ 290 continue; \ 291 \ 292 old = pte[i]; \ 293 new = op(old); \ 294 if (pte_val(new) == pte_val(old)) \ 295 continue; \ 296 set_pte(pte + i, new); \ 297 ret = 1; \ 298 } \ 299 return ret; \ 300 } \ 301 \ 302 /* returns true if anything was done */ \ 303 static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start, \ 304 unsigned long end) \ 305 { \ 306 int ret = 0; \ 307 pte_t *pte; \ 308 unsigned long cur_end = ~0ul; \ 309 int i_min = pmd_index(start); \ 310 int i_max = pmd_index(end); \ 311 int i; \ 312 \ 313 for (i = i_min; i <= i_max; ++i, start = 0) { \ 314 if (!pmd_present(pmd[i])) \ 315 continue; \ 316 \ 317 pte = pte_offset_kernel(pmd + i, 0); \ 318 if (i == i_max) \ 319 cur_end = end; \ 320 \ 321 ret |= kvm_mips_##name##_pte(pte, start, cur_end); \ 322 } \ 323 return ret; \ 324 } \ 325 \ 326 static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start, \ 327 unsigned long end) \ 328 { \ 329 int ret = 0; \ 330 pmd_t *pmd; \ 331 unsigned long cur_end = ~0ul; \ 332 int i_min = pud_index(start); \ 333 int i_max = pud_index(end); \ 334 int i; \ 335 \ 336 for (i = i_min; i <= i_max; ++i, start = 0) { \ 337 if (!pud_present(pud[i])) \ 338 continue; \ 339 \ 340 pmd = pmd_offset(pud + i, 0); \ 341 if (i == i_max) \ 342 cur_end = end; \ 343 \ 344 ret |= kvm_mips_##name##_pmd(pmd, start, cur_end); \ 345 } \ 346 return ret; \ 347 } \ 348 \ 349 static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start, \ 350 unsigned long end) \ 351 { \ 352 int ret = 0; \ 353 p4d_t *p4d; \ 354 pud_t *pud; \ 355 unsigned long cur_end = ~0ul; \ 356 int i_min = pgd_index(start); \ 357 int i_max = pgd_index(end); \ 358 int i; \ 359 \ 360 for (i = i_min; i <= i_max; ++i, start = 0) { \ 361 if (!pgd_present(pgd[i])) \ 362 continue; \ 363 \ 364 p4d = p4d_offset(pgd, 0); \ 365 pud = pud_offset(p4d + i, 0); \ 366 if (i == i_max) \ 367 cur_end = end; \ 368 \ 369 ret |= kvm_mips_##name##_pud(pud, start, cur_end); \ 370 } \ 371 return ret; \ 372 } 373 374 /* 375 * kvm_mips_mkclean_gpa_pt. 376 * Mark a range of guest physical address space clean (writes fault) in the VM's 377 * GPA page table to allow dirty page tracking. 378 */ 379 380 BUILD_PTE_RANGE_OP(mkclean, pte_mkclean) 381 382 /** 383 * kvm_mips_mkclean_gpa_pt() - Make a range of guest physical addresses clean. 384 * @kvm: KVM pointer. 385 * @start_gfn: Guest frame number of first page in GPA range to flush. 386 * @end_gfn: Guest frame number of last page in GPA range to flush. 387 * 388 * Make a range of GPA mappings clean so that guest writes will fault and 389 * trigger dirty page logging. 390 * 391 * The caller must hold the @kvm->mmu_lock spinlock. 392 * 393 * Returns: Whether any GPA mappings were modified, which would require 394 * derived mappings (GVA page tables & TLB enties) to be 395 * invalidated. 396 */ 397 int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn) 398 { 399 return kvm_mips_mkclean_pgd(kvm->arch.gpa_mm.pgd, 400 start_gfn << PAGE_SHIFT, 401 end_gfn << PAGE_SHIFT); 402 } 403 404 /** 405 * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages 406 * @kvm: The KVM pointer 407 * @slot: The memory slot associated with mask 408 * @gfn_offset: The gfn offset in memory slot 409 * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory 410 * slot to be write protected 411 * 412 * Walks bits set in mask write protects the associated pte's. Caller must 413 * acquire @kvm->mmu_lock. 414 */ 415 void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, 416 struct kvm_memory_slot *slot, 417 gfn_t gfn_offset, unsigned long mask) 418 { 419 gfn_t base_gfn = slot->base_gfn + gfn_offset; 420 gfn_t start = base_gfn + __ffs(mask); 421 gfn_t end = base_gfn + __fls(mask); 422 423 kvm_mips_mkclean_gpa_pt(kvm, start, end); 424 } 425 426 /* 427 * kvm_mips_mkold_gpa_pt. 428 * Mark a range of guest physical address space old (all accesses fault) in the 429 * VM's GPA page table to allow detection of commonly used pages. 430 */ 431 432 BUILD_PTE_RANGE_OP(mkold, pte_mkold) 433 434 static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn, 435 gfn_t end_gfn) 436 { 437 return kvm_mips_mkold_pgd(kvm->arch.gpa_mm.pgd, 438 start_gfn << PAGE_SHIFT, 439 end_gfn << PAGE_SHIFT); 440 } 441 442 static int handle_hva_to_gpa(struct kvm *kvm, 443 unsigned long start, 444 unsigned long end, 445 int (*handler)(struct kvm *kvm, gfn_t gfn, 446 gpa_t gfn_end, 447 struct kvm_memory_slot *memslot, 448 void *data), 449 void *data) 450 { 451 struct kvm_memslots *slots; 452 struct kvm_memory_slot *memslot; 453 int ret = 0; 454 455 slots = kvm_memslots(kvm); 456 457 /* we only care about the pages that the guest sees */ 458 kvm_for_each_memslot(memslot, slots) { 459 unsigned long hva_start, hva_end; 460 gfn_t gfn, gfn_end; 461 462 hva_start = max(start, memslot->userspace_addr); 463 hva_end = min(end, memslot->userspace_addr + 464 (memslot->npages << PAGE_SHIFT)); 465 if (hva_start >= hva_end) 466 continue; 467 468 /* 469 * {gfn(page) | page intersects with [hva_start, hva_end)} = 470 * {gfn_start, gfn_start+1, ..., gfn_end-1}. 471 */ 472 gfn = hva_to_gfn_memslot(hva_start, memslot); 473 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); 474 475 ret |= handler(kvm, gfn, gfn_end, memslot, data); 476 } 477 478 return ret; 479 } 480 481 482 static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, 483 struct kvm_memory_slot *memslot, void *data) 484 { 485 kvm_mips_flush_gpa_pt(kvm, gfn, gfn_end); 486 return 1; 487 } 488 489 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, 490 unsigned flags) 491 { 492 handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); 493 494 kvm_mips_callbacks->flush_shadow_all(kvm); 495 return 0; 496 } 497 498 static int kvm_set_spte_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, 499 struct kvm_memory_slot *memslot, void *data) 500 { 501 gpa_t gpa = gfn << PAGE_SHIFT; 502 pte_t hva_pte = *(pte_t *)data; 503 pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); 504 pte_t old_pte; 505 506 if (!gpa_pte) 507 return 0; 508 509 /* Mapping may need adjusting depending on memslot flags */ 510 old_pte = *gpa_pte; 511 if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte)) 512 hva_pte = pte_mkclean(hva_pte); 513 else if (memslot->flags & KVM_MEM_READONLY) 514 hva_pte = pte_wrprotect(hva_pte); 515 516 set_pte(gpa_pte, hva_pte); 517 518 /* Replacing an absent or old page doesn't need flushes */ 519 if (!pte_present(old_pte) || !pte_young(old_pte)) 520 return 0; 521 522 /* Pages swapped, aged, moved, or cleaned require flushes */ 523 return !pte_present(hva_pte) || 524 !pte_young(hva_pte) || 525 pte_pfn(old_pte) != pte_pfn(hva_pte) || 526 (pte_dirty(old_pte) && !pte_dirty(hva_pte)); 527 } 528 529 int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) 530 { 531 unsigned long end = hva + PAGE_SIZE; 532 int ret; 533 534 ret = handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &pte); 535 if (ret) 536 kvm_mips_callbacks->flush_shadow_all(kvm); 537 return 0; 538 } 539 540 static int kvm_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, 541 struct kvm_memory_slot *memslot, void *data) 542 { 543 return kvm_mips_mkold_gpa_pt(kvm, gfn, gfn_end); 544 } 545 546 static int kvm_test_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, 547 struct kvm_memory_slot *memslot, void *data) 548 { 549 gpa_t gpa = gfn << PAGE_SHIFT; 550 pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); 551 552 if (!gpa_pte) 553 return 0; 554 return pte_young(*gpa_pte); 555 } 556 557 int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) 558 { 559 return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); 560 } 561 562 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 563 { 564 return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); 565 } 566 567 /** 568 * _kvm_mips_map_page_fast() - Fast path GPA fault handler. 569 * @vcpu: VCPU pointer. 570 * @gpa: Guest physical address of fault. 571 * @write_fault: Whether the fault was due to a write. 572 * @out_entry: New PTE for @gpa (written on success unless NULL). 573 * @out_buddy: New PTE for @gpa's buddy (written on success unless 574 * NULL). 575 * 576 * Perform fast path GPA fault handling, doing all that can be done without 577 * calling into KVM. This handles marking old pages young (for idle page 578 * tracking), and dirtying of clean pages (for dirty page logging). 579 * 580 * Returns: 0 on success, in which case we can update derived mappings and 581 * resume guest execution. 582 * -EFAULT on failure due to absent GPA mapping or write to 583 * read-only page, in which case KVM must be consulted. 584 */ 585 static int _kvm_mips_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, 586 bool write_fault, 587 pte_t *out_entry, pte_t *out_buddy) 588 { 589 struct kvm *kvm = vcpu->kvm; 590 gfn_t gfn = gpa >> PAGE_SHIFT; 591 pte_t *ptep; 592 kvm_pfn_t pfn = 0; /* silence bogus GCC warning */ 593 bool pfn_valid = false; 594 int ret = 0; 595 596 spin_lock(&kvm->mmu_lock); 597 598 /* Fast path - just check GPA page table for an existing entry */ 599 ptep = kvm_mips_pte_for_gpa(kvm, NULL, gpa); 600 if (!ptep || !pte_present(*ptep)) { 601 ret = -EFAULT; 602 goto out; 603 } 604 605 /* Track access to pages marked old */ 606 if (!pte_young(*ptep)) { 607 set_pte(ptep, pte_mkyoung(*ptep)); 608 pfn = pte_pfn(*ptep); 609 pfn_valid = true; 610 /* call kvm_set_pfn_accessed() after unlock */ 611 } 612 if (write_fault && !pte_dirty(*ptep)) { 613 if (!pte_write(*ptep)) { 614 ret = -EFAULT; 615 goto out; 616 } 617 618 /* Track dirtying of writeable pages */ 619 set_pte(ptep, pte_mkdirty(*ptep)); 620 pfn = pte_pfn(*ptep); 621 mark_page_dirty(kvm, gfn); 622 kvm_set_pfn_dirty(pfn); 623 } 624 625 if (out_entry) 626 *out_entry = *ptep; 627 if (out_buddy) 628 *out_buddy = *ptep_buddy(ptep); 629 630 out: 631 spin_unlock(&kvm->mmu_lock); 632 if (pfn_valid) 633 kvm_set_pfn_accessed(pfn); 634 return ret; 635 } 636 637 /** 638 * kvm_mips_map_page() - Map a guest physical page. 639 * @vcpu: VCPU pointer. 640 * @gpa: Guest physical address of fault. 641 * @write_fault: Whether the fault was due to a write. 642 * @out_entry: New PTE for @gpa (written on success unless NULL). 643 * @out_buddy: New PTE for @gpa's buddy (written on success unless 644 * NULL). 645 * 646 * Handle GPA faults by creating a new GPA mapping (or updating an existing 647 * one). 648 * 649 * This takes care of marking pages young or dirty (idle/dirty page tracking), 650 * asking KVM for the corresponding PFN, and creating a mapping in the GPA page 651 * tables. Derived mappings (GVA page tables and TLBs) must be handled by the 652 * caller. 653 * 654 * Returns: 0 on success, in which case the caller may use the @out_entry 655 * and @out_buddy PTEs to update derived mappings and resume guest 656 * execution. 657 * -EFAULT if there is no memory region at @gpa or a write was 658 * attempted to a read-only memory region. This is usually handled 659 * as an MMIO access. 660 */ 661 static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, 662 bool write_fault, 663 pte_t *out_entry, pte_t *out_buddy) 664 { 665 struct kvm *kvm = vcpu->kvm; 666 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; 667 gfn_t gfn = gpa >> PAGE_SHIFT; 668 int srcu_idx, err; 669 kvm_pfn_t pfn; 670 pte_t *ptep, entry, old_pte; 671 bool writeable; 672 unsigned long prot_bits; 673 unsigned long mmu_seq; 674 675 /* Try the fast path to handle old / clean pages */ 676 srcu_idx = srcu_read_lock(&kvm->srcu); 677 err = _kvm_mips_map_page_fast(vcpu, gpa, write_fault, out_entry, 678 out_buddy); 679 if (!err) 680 goto out; 681 682 /* We need a minimum of cached pages ready for page table creation */ 683 err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES); 684 if (err) 685 goto out; 686 687 retry: 688 /* 689 * Used to check for invalidations in progress, of the pfn that is 690 * returned by pfn_to_pfn_prot below. 691 */ 692 mmu_seq = kvm->mmu_notifier_seq; 693 /* 694 * Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in 695 * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't 696 * risk the page we get a reference to getting unmapped before we have a 697 * chance to grab the mmu_lock without mmu_notifier_retry() noticing. 698 * 699 * This smp_rmb() pairs with the effective smp_wmb() of the combination 700 * of the pte_unmap_unlock() after the PTE is zapped, and the 701 * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before 702 * mmu_notifier_seq is incremented. 703 */ 704 smp_rmb(); 705 706 /* Slow path - ask KVM core whether we can access this GPA */ 707 pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writeable); 708 if (is_error_noslot_pfn(pfn)) { 709 err = -EFAULT; 710 goto out; 711 } 712 713 spin_lock(&kvm->mmu_lock); 714 /* Check if an invalidation has taken place since we got pfn */ 715 if (mmu_notifier_retry(kvm, mmu_seq)) { 716 /* 717 * This can happen when mappings are changed asynchronously, but 718 * also synchronously if a COW is triggered by 719 * gfn_to_pfn_prot(). 720 */ 721 spin_unlock(&kvm->mmu_lock); 722 kvm_release_pfn_clean(pfn); 723 goto retry; 724 } 725 726 /* Ensure page tables are allocated */ 727 ptep = kvm_mips_pte_for_gpa(kvm, memcache, gpa); 728 729 /* Set up the PTE */ 730 prot_bits = _PAGE_PRESENT | __READABLE | _page_cachable_default; 731 if (writeable) { 732 prot_bits |= _PAGE_WRITE; 733 if (write_fault) { 734 prot_bits |= __WRITEABLE; 735 mark_page_dirty(kvm, gfn); 736 kvm_set_pfn_dirty(pfn); 737 } 738 } 739 entry = pfn_pte(pfn, __pgprot(prot_bits)); 740 741 /* Write the PTE */ 742 old_pte = *ptep; 743 set_pte(ptep, entry); 744 745 err = 0; 746 if (out_entry) 747 *out_entry = *ptep; 748 if (out_buddy) 749 *out_buddy = *ptep_buddy(ptep); 750 751 spin_unlock(&kvm->mmu_lock); 752 kvm_release_pfn_clean(pfn); 753 kvm_set_pfn_accessed(pfn); 754 out: 755 srcu_read_unlock(&kvm->srcu, srcu_idx); 756 return err; 757 } 758 759 static pte_t *kvm_trap_emul_pte_for_gva(struct kvm_vcpu *vcpu, 760 unsigned long addr) 761 { 762 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; 763 pgd_t *pgdp; 764 int ret; 765 766 /* We need a minimum of cached pages ready for page table creation */ 767 ret = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES); 768 if (ret) 769 return NULL; 770 771 if (KVM_GUEST_KERNEL_MODE(vcpu)) 772 pgdp = vcpu->arch.guest_kernel_mm.pgd; 773 else 774 pgdp = vcpu->arch.guest_user_mm.pgd; 775 776 return kvm_mips_walk_pgd(pgdp, memcache, addr); 777 } 778 779 void kvm_trap_emul_invalidate_gva(struct kvm_vcpu *vcpu, unsigned long addr, 780 bool user) 781 { 782 pgd_t *pgdp; 783 pte_t *ptep; 784 785 addr &= PAGE_MASK << 1; 786 787 pgdp = vcpu->arch.guest_kernel_mm.pgd; 788 ptep = kvm_mips_walk_pgd(pgdp, NULL, addr); 789 if (ptep) { 790 ptep[0] = pfn_pte(0, __pgprot(0)); 791 ptep[1] = pfn_pte(0, __pgprot(0)); 792 } 793 794 if (user) { 795 pgdp = vcpu->arch.guest_user_mm.pgd; 796 ptep = kvm_mips_walk_pgd(pgdp, NULL, addr); 797 if (ptep) { 798 ptep[0] = pfn_pte(0, __pgprot(0)); 799 ptep[1] = pfn_pte(0, __pgprot(0)); 800 } 801 } 802 } 803 804 /* 805 * kvm_mips_flush_gva_{pte,pmd,pud,pgd,pt}. 806 * Flush a range of guest physical address space from the VM's GPA page tables. 807 */ 808 809 static bool kvm_mips_flush_gva_pte(pte_t *pte, unsigned long start_gva, 810 unsigned long end_gva) 811 { 812 int i_min = pte_index(start_gva); 813 int i_max = pte_index(end_gva); 814 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1); 815 int i; 816 817 /* 818 * There's no freeing to do, so there's no point clearing individual 819 * entries unless only part of the last level page table needs flushing. 820 */ 821 if (safe_to_remove) 822 return true; 823 824 for (i = i_min; i <= i_max; ++i) { 825 if (!pte_present(pte[i])) 826 continue; 827 828 set_pte(pte + i, __pte(0)); 829 } 830 return false; 831 } 832 833 static bool kvm_mips_flush_gva_pmd(pmd_t *pmd, unsigned long start_gva, 834 unsigned long end_gva) 835 { 836 pte_t *pte; 837 unsigned long end = ~0ul; 838 int i_min = pmd_index(start_gva); 839 int i_max = pmd_index(end_gva); 840 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1); 841 int i; 842 843 for (i = i_min; i <= i_max; ++i, start_gva = 0) { 844 if (!pmd_present(pmd[i])) 845 continue; 846 847 pte = pte_offset_kernel(pmd + i, 0); 848 if (i == i_max) 849 end = end_gva; 850 851 if (kvm_mips_flush_gva_pte(pte, start_gva, end)) { 852 pmd_clear(pmd + i); 853 pte_free_kernel(NULL, pte); 854 } else { 855 safe_to_remove = false; 856 } 857 } 858 return safe_to_remove; 859 } 860 861 static bool kvm_mips_flush_gva_pud(pud_t *pud, unsigned long start_gva, 862 unsigned long end_gva) 863 { 864 pmd_t *pmd; 865 unsigned long end = ~0ul; 866 int i_min = pud_index(start_gva); 867 int i_max = pud_index(end_gva); 868 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1); 869 int i; 870 871 for (i = i_min; i <= i_max; ++i, start_gva = 0) { 872 if (!pud_present(pud[i])) 873 continue; 874 875 pmd = pmd_offset(pud + i, 0); 876 if (i == i_max) 877 end = end_gva; 878 879 if (kvm_mips_flush_gva_pmd(pmd, start_gva, end)) { 880 pud_clear(pud + i); 881 pmd_free(NULL, pmd); 882 } else { 883 safe_to_remove = false; 884 } 885 } 886 return safe_to_remove; 887 } 888 889 static bool kvm_mips_flush_gva_pgd(pgd_t *pgd, unsigned long start_gva, 890 unsigned long end_gva) 891 { 892 p4d_t *p4d; 893 pud_t *pud; 894 unsigned long end = ~0ul; 895 int i_min = pgd_index(start_gva); 896 int i_max = pgd_index(end_gva); 897 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1); 898 int i; 899 900 for (i = i_min; i <= i_max; ++i, start_gva = 0) { 901 if (!pgd_present(pgd[i])) 902 continue; 903 904 p4d = p4d_offset(pgd, 0); 905 pud = pud_offset(p4d + i, 0); 906 if (i == i_max) 907 end = end_gva; 908 909 if (kvm_mips_flush_gva_pud(pud, start_gva, end)) { 910 pgd_clear(pgd + i); 911 pud_free(NULL, pud); 912 } else { 913 safe_to_remove = false; 914 } 915 } 916 return safe_to_remove; 917 } 918 919 void kvm_mips_flush_gva_pt(pgd_t *pgd, enum kvm_mips_flush flags) 920 { 921 if (flags & KMF_GPA) { 922 /* all of guest virtual address space could be affected */ 923 if (flags & KMF_KERN) 924 /* useg, kseg0, seg2/3 */ 925 kvm_mips_flush_gva_pgd(pgd, 0, 0x7fffffff); 926 else 927 /* useg */ 928 kvm_mips_flush_gva_pgd(pgd, 0, 0x3fffffff); 929 } else { 930 /* useg */ 931 kvm_mips_flush_gva_pgd(pgd, 0, 0x3fffffff); 932 933 /* kseg2/3 */ 934 if (flags & KMF_KERN) 935 kvm_mips_flush_gva_pgd(pgd, 0x60000000, 0x7fffffff); 936 } 937 } 938 939 static pte_t kvm_mips_gpa_pte_to_gva_unmapped(pte_t pte) 940 { 941 /* 942 * Don't leak writeable but clean entries from GPA page tables. We don't 943 * want the normal Linux tlbmod handler to handle dirtying when KVM 944 * accesses guest memory. 945 */ 946 if (!pte_dirty(pte)) 947 pte = pte_wrprotect(pte); 948 949 return pte; 950 } 951 952 static pte_t kvm_mips_gpa_pte_to_gva_mapped(pte_t pte, long entrylo) 953 { 954 /* Guest EntryLo overrides host EntryLo */ 955 if (!(entrylo & ENTRYLO_D)) 956 pte = pte_mkclean(pte); 957 958 return kvm_mips_gpa_pte_to_gva_unmapped(pte); 959 } 960 961 #ifdef CONFIG_KVM_MIPS_VZ 962 int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr, 963 struct kvm_vcpu *vcpu, 964 bool write_fault) 965 { 966 int ret; 967 968 ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL); 969 if (ret) 970 return ret; 971 972 /* Invalidate this entry in the TLB */ 973 return kvm_vz_host_tlb_inv(vcpu, badvaddr); 974 } 975 #endif 976 977 /* XXXKYMA: Must be called with interrupts disabled */ 978 int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, 979 struct kvm_vcpu *vcpu, 980 bool write_fault) 981 { 982 unsigned long gpa; 983 pte_t pte_gpa[2], *ptep_gva; 984 int idx; 985 986 if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) { 987 kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr); 988 kvm_mips_dump_host_tlbs(); 989 return -1; 990 } 991 992 /* Get the GPA page table entry */ 993 gpa = KVM_GUEST_CPHYSADDR(badvaddr); 994 idx = (badvaddr >> PAGE_SHIFT) & 1; 995 if (kvm_mips_map_page(vcpu, gpa, write_fault, &pte_gpa[idx], 996 &pte_gpa[!idx]) < 0) 997 return -1; 998 999 /* Get the GVA page table entry */ 1000 ptep_gva = kvm_trap_emul_pte_for_gva(vcpu, badvaddr & ~PAGE_SIZE); 1001 if (!ptep_gva) { 1002 kvm_err("No ptep for gva %lx\n", badvaddr); 1003 return -1; 1004 } 1005 1006 /* Copy a pair of entries from GPA page table to GVA page table */ 1007 ptep_gva[0] = kvm_mips_gpa_pte_to_gva_unmapped(pte_gpa[0]); 1008 ptep_gva[1] = kvm_mips_gpa_pte_to_gva_unmapped(pte_gpa[1]); 1009 1010 /* Invalidate this entry in the TLB, guest kernel ASID only */ 1011 kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true); 1012 return 0; 1013 } 1014 1015 int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, 1016 struct kvm_mips_tlb *tlb, 1017 unsigned long gva, 1018 bool write_fault) 1019 { 1020 struct kvm *kvm = vcpu->kvm; 1021 long tlb_lo[2]; 1022 pte_t pte_gpa[2], *ptep_buddy, *ptep_gva; 1023 unsigned int idx = TLB_LO_IDX(*tlb, gva); 1024 bool kernel = KVM_GUEST_KERNEL_MODE(vcpu); 1025 1026 tlb_lo[0] = tlb->tlb_lo[0]; 1027 tlb_lo[1] = tlb->tlb_lo[1]; 1028 1029 /* 1030 * The commpage address must not be mapped to anything else if the guest 1031 * TLB contains entries nearby, or commpage accesses will break. 1032 */ 1033 if (!((gva ^ KVM_GUEST_COMMPAGE_ADDR) & VPN2_MASK & (PAGE_MASK << 1))) 1034 tlb_lo[TLB_LO_IDX(*tlb, KVM_GUEST_COMMPAGE_ADDR)] = 0; 1035 1036 /* Get the GPA page table entry */ 1037 if (kvm_mips_map_page(vcpu, mips3_tlbpfn_to_paddr(tlb_lo[idx]), 1038 write_fault, &pte_gpa[idx], NULL) < 0) 1039 return -1; 1040 1041 /* And its GVA buddy's GPA page table entry if it also exists */ 1042 pte_gpa[!idx] = pfn_pte(0, __pgprot(0)); 1043 if (tlb_lo[!idx] & ENTRYLO_V) { 1044 spin_lock(&kvm->mmu_lock); 1045 ptep_buddy = kvm_mips_pte_for_gpa(kvm, NULL, 1046 mips3_tlbpfn_to_paddr(tlb_lo[!idx])); 1047 if (ptep_buddy) 1048 pte_gpa[!idx] = *ptep_buddy; 1049 spin_unlock(&kvm->mmu_lock); 1050 } 1051 1052 /* Get the GVA page table entry pair */ 1053 ptep_gva = kvm_trap_emul_pte_for_gva(vcpu, gva & ~PAGE_SIZE); 1054 if (!ptep_gva) { 1055 kvm_err("No ptep for gva %lx\n", gva); 1056 return -1; 1057 } 1058 1059 /* Copy a pair of entries from GPA page table to GVA page table */ 1060 ptep_gva[0] = kvm_mips_gpa_pte_to_gva_mapped(pte_gpa[0], tlb_lo[0]); 1061 ptep_gva[1] = kvm_mips_gpa_pte_to_gva_mapped(pte_gpa[1], tlb_lo[1]); 1062 1063 /* Invalidate this entry in the TLB, current guest mode ASID only */ 1064 kvm_mips_host_tlb_inv(vcpu, gva, !kernel, kernel); 1065 1066 kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc, 1067 tlb->tlb_lo[0], tlb->tlb_lo[1]); 1068 1069 return 0; 1070 } 1071 1072 int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, 1073 struct kvm_vcpu *vcpu) 1074 { 1075 kvm_pfn_t pfn; 1076 pte_t *ptep; 1077 pgprot_t prot; 1078 1079 ptep = kvm_trap_emul_pte_for_gva(vcpu, badvaddr); 1080 if (!ptep) { 1081 kvm_err("No ptep for commpage %lx\n", badvaddr); 1082 return -1; 1083 } 1084 1085 pfn = PFN_DOWN(virt_to_phys(vcpu->arch.kseg0_commpage)); 1086 /* Also set valid and dirty, so refill handler doesn't have to */ 1087 prot = vm_get_page_prot(VM_READ|VM_WRITE|VM_SHARED); 1088 *ptep = pte_mkyoung(pte_mkdirty(pfn_pte(pfn, prot))); 1089 1090 /* Invalidate this entry in the TLB, guest kernel ASID only */ 1091 kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true); 1092 return 0; 1093 } 1094 1095 /** 1096 * kvm_mips_migrate_count() - Migrate timer. 1097 * @vcpu: Virtual CPU. 1098 * 1099 * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it 1100 * if it was running prior to being cancelled. 1101 * 1102 * Must be called when the VCPU is migrated to a different CPU to ensure that 1103 * timer expiry during guest execution interrupts the guest and causes the 1104 * interrupt to be delivered in a timely manner. 1105 */ 1106 static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu) 1107 { 1108 if (hrtimer_cancel(&vcpu->arch.comparecount_timer)) 1109 hrtimer_restart(&vcpu->arch.comparecount_timer); 1110 } 1111 1112 /* Restore ASID once we are scheduled back after preemption */ 1113 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1114 { 1115 unsigned long flags; 1116 1117 kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu); 1118 1119 local_irq_save(flags); 1120 1121 vcpu->cpu = cpu; 1122 if (vcpu->arch.last_sched_cpu != cpu) { 1123 kvm_debug("[%d->%d]KVM VCPU[%d] switch\n", 1124 vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id); 1125 /* 1126 * Migrate the timer interrupt to the current CPU so that it 1127 * always interrupts the guest and synchronously triggers a 1128 * guest timer interrupt. 1129 */ 1130 kvm_mips_migrate_count(vcpu); 1131 } 1132 1133 /* restore guest state to registers */ 1134 kvm_mips_callbacks->vcpu_load(vcpu, cpu); 1135 1136 local_irq_restore(flags); 1137 } 1138 1139 /* ASID can change if another task is scheduled during preemption */ 1140 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1141 { 1142 unsigned long flags; 1143 int cpu; 1144 1145 local_irq_save(flags); 1146 1147 cpu = smp_processor_id(); 1148 vcpu->arch.last_sched_cpu = cpu; 1149 vcpu->cpu = -1; 1150 1151 /* save guest state in registers */ 1152 kvm_mips_callbacks->vcpu_put(vcpu, cpu); 1153 1154 local_irq_restore(flags); 1155 } 1156 1157 /** 1158 * kvm_trap_emul_gva_fault() - Safely attempt to handle a GVA access fault. 1159 * @vcpu: Virtual CPU. 1160 * @gva: Guest virtual address to be accessed. 1161 * @write: True if write attempted (must be dirtied and made writable). 1162 * 1163 * Safely attempt to handle a GVA fault, mapping GVA pages if necessary, and 1164 * dirtying the page if @write so that guest instructions can be modified. 1165 * 1166 * Returns: KVM_MIPS_MAPPED on success. 1167 * KVM_MIPS_GVA if bad guest virtual address. 1168 * KVM_MIPS_GPA if bad guest physical address. 1169 * KVM_MIPS_TLB if guest TLB not present. 1170 * KVM_MIPS_TLBINV if guest TLB present but not valid. 1171 * KVM_MIPS_TLBMOD if guest TLB read only. 1172 */ 1173 enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu, 1174 unsigned long gva, 1175 bool write) 1176 { 1177 struct mips_coproc *cop0 = vcpu->arch.cop0; 1178 struct kvm_mips_tlb *tlb; 1179 int index; 1180 1181 if (KVM_GUEST_KSEGX(gva) == KVM_GUEST_KSEG0) { 1182 if (kvm_mips_handle_kseg0_tlb_fault(gva, vcpu, write) < 0) 1183 return KVM_MIPS_GPA; 1184 } else if ((KVM_GUEST_KSEGX(gva) < KVM_GUEST_KSEG0) || 1185 KVM_GUEST_KSEGX(gva) == KVM_GUEST_KSEG23) { 1186 /* Address should be in the guest TLB */ 1187 index = kvm_mips_guest_tlb_lookup(vcpu, (gva & VPN2_MASK) | 1188 (kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID)); 1189 if (index < 0) 1190 return KVM_MIPS_TLB; 1191 tlb = &vcpu->arch.guest_tlb[index]; 1192 1193 /* Entry should be valid, and dirty for writes */ 1194 if (!TLB_IS_VALID(*tlb, gva)) 1195 return KVM_MIPS_TLBINV; 1196 if (write && !TLB_IS_DIRTY(*tlb, gva)) 1197 return KVM_MIPS_TLBMOD; 1198 1199 if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, gva, write)) 1200 return KVM_MIPS_GPA; 1201 } else { 1202 return KVM_MIPS_GVA; 1203 } 1204 1205 return KVM_MIPS_MAPPED; 1206 } 1207 1208 int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out) 1209 { 1210 int err; 1211 1212 if (WARN(IS_ENABLED(CONFIG_KVM_MIPS_VZ), 1213 "Expect BadInstr/BadInstrP registers to be used with VZ\n")) 1214 return -EINVAL; 1215 1216 retry: 1217 kvm_trap_emul_gva_lockless_begin(vcpu); 1218 err = get_user(*out, opc); 1219 kvm_trap_emul_gva_lockless_end(vcpu); 1220 1221 if (unlikely(err)) { 1222 /* 1223 * Try to handle the fault, maybe we just raced with a GVA 1224 * invalidation. 1225 */ 1226 err = kvm_trap_emul_gva_fault(vcpu, (unsigned long)opc, 1227 false); 1228 if (unlikely(err)) { 1229 kvm_err("%s: illegal address: %p\n", 1230 __func__, opc); 1231 return -EFAULT; 1232 } 1233 1234 /* Hopefully it'll work now */ 1235 goto retry; 1236 } 1237 return 0; 1238 } 1239