1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 5 */ 6 7 #include <linux/types.h> 8 #include <linux/string.h> 9 #include <linux/kvm.h> 10 #include <linux/kvm_host.h> 11 #include <linux/hugetlb.h> 12 #include <linux/module.h> 13 #include <linux/log2.h> 14 #include <linux/sizes.h> 15 16 #include <asm/trace.h> 17 #include <asm/kvm_ppc.h> 18 #include <asm/kvm_book3s.h> 19 #include <asm/book3s/64/mmu-hash.h> 20 #include <asm/hvcall.h> 21 #include <asm/synch.h> 22 #include <asm/ppc-opcode.h> 23 #include <asm/pte-walk.h> 24 25 /* Translate address of a vmalloc'd thing to a linear map address */ 26 static void *real_vmalloc_addr(void *x) 27 { 28 unsigned long addr = (unsigned long) x; 29 pte_t *p; 30 /* 31 * assume we don't have huge pages in vmalloc space... 32 * So don't worry about THP collapse/split. Called 33 * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore. 34 */ 35 p = find_init_mm_pte(addr, NULL); 36 if (!p || !pte_present(*p)) 37 return NULL; 38 addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); 39 return __va(addr); 40 } 41 42 /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */ 43 static int global_invalidates(struct kvm *kvm) 44 { 45 int global; 46 int cpu; 47 48 /* 49 * If there is only one vcore, and it's currently running, 50 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set, 51 * we can use tlbiel as long as we mark all other physical 52 * cores as potentially having stale TLB entries for this lpid. 53 * Otherwise, don't use tlbiel. 54 */ 55 if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu) 56 global = 0; 57 else 58 global = 1; 59 60 if (!global) { 61 /* any other core might now have stale TLB entries... */ 62 smp_wmb(); 63 cpumask_setall(&kvm->arch.need_tlb_flush); 64 cpu = local_paca->kvm_hstate.kvm_vcore->pcpu; 65 /* 66 * On POWER9, threads are independent but the TLB is shared, 67 * so use the bit for the first thread to represent the core. 68 */ 69 if (cpu_has_feature(CPU_FTR_ARCH_300)) 70 cpu = cpu_first_thread_sibling(cpu); 71 cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush); 72 } 73 74 return global; 75 } 76 77 /* 78 * Add this HPTE into the chain for the real page. 79 * Must be called with the chain locked; it unlocks the chain. 80 */ 81 void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, 82 unsigned long *rmap, long pte_index, int realmode) 83 { 84 struct revmap_entry *head, *tail; 85 unsigned long i; 86 87 if (*rmap & KVMPPC_RMAP_PRESENT) { 88 i = *rmap & KVMPPC_RMAP_INDEX; 89 head = &kvm->arch.hpt.rev[i]; 90 if (realmode) 91 head = real_vmalloc_addr(head); 92 tail = &kvm->arch.hpt.rev[head->back]; 93 if (realmode) 94 tail = real_vmalloc_addr(tail); 95 rev->forw = i; 96 rev->back = head->back; 97 tail->forw = pte_index; 98 head->back = pte_index; 99 } else { 100 rev->forw = rev->back = pte_index; 101 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | 102 pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT; 103 } 104 unlock_rmap(rmap); 105 } 106 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); 107 108 /* Update the dirty bitmap of a memslot */ 109 void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot, 110 unsigned long gfn, unsigned long psize) 111 { 112 unsigned long npages; 113 114 if (!psize || !memslot->dirty_bitmap) 115 return; 116 npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE; 117 gfn -= memslot->base_gfn; 118 set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages); 119 } 120 EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map); 121 122 static void kvmppc_set_dirty_from_hpte(struct kvm *kvm, 123 unsigned long hpte_v, unsigned long hpte_gr) 124 { 125 struct kvm_memory_slot *memslot; 126 unsigned long gfn; 127 unsigned long psize; 128 129 psize = kvmppc_actual_pgsz(hpte_v, hpte_gr); 130 gfn = hpte_rpn(hpte_gr, psize); 131 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 132 if (memslot && memslot->dirty_bitmap) 133 kvmppc_update_dirty_map(memslot, gfn, psize); 134 } 135 136 /* Returns a pointer to the revmap entry for the page mapped by a HPTE */ 137 static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v, 138 unsigned long hpte_gr, 139 struct kvm_memory_slot **memslotp, 140 unsigned long *gfnp) 141 { 142 struct kvm_memory_slot *memslot; 143 unsigned long *rmap; 144 unsigned long gfn; 145 146 gfn = hpte_rpn(hpte_gr, kvmppc_actual_pgsz(hpte_v, hpte_gr)); 147 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 148 if (memslotp) 149 *memslotp = memslot; 150 if (gfnp) 151 *gfnp = gfn; 152 if (!memslot) 153 return NULL; 154 155 rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); 156 return rmap; 157 } 158 159 /* Remove this HPTE from the chain for a real page */ 160 static void remove_revmap_chain(struct kvm *kvm, long pte_index, 161 struct revmap_entry *rev, 162 unsigned long hpte_v, unsigned long hpte_r) 163 { 164 struct revmap_entry *next, *prev; 165 unsigned long ptel, head; 166 unsigned long *rmap; 167 unsigned long rcbits; 168 struct kvm_memory_slot *memslot; 169 unsigned long gfn; 170 171 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); 172 ptel = rev->guest_rpte |= rcbits; 173 rmap = revmap_for_hpte(kvm, hpte_v, ptel, &memslot, &gfn); 174 if (!rmap) 175 return; 176 lock_rmap(rmap); 177 178 head = *rmap & KVMPPC_RMAP_INDEX; 179 next = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->forw]); 180 prev = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->back]); 181 next->back = rev->back; 182 prev->forw = rev->forw; 183 if (head == pte_index) { 184 head = rev->forw; 185 if (head == pte_index) 186 *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); 187 else 188 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head; 189 } 190 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT; 191 if (rcbits & HPTE_R_C) 192 kvmppc_update_dirty_map(memslot, gfn, 193 kvmppc_actual_pgsz(hpte_v, hpte_r)); 194 unlock_rmap(rmap); 195 } 196 197 long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, 198 long pte_index, unsigned long pteh, unsigned long ptel, 199 pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret) 200 { 201 unsigned long i, pa, gpa, gfn, psize; 202 unsigned long slot_fn, hva; 203 __be64 *hpte; 204 struct revmap_entry *rev; 205 unsigned long g_ptel; 206 struct kvm_memory_slot *memslot; 207 unsigned hpage_shift; 208 bool is_ci; 209 unsigned long *rmap; 210 pte_t *ptep; 211 unsigned int writing; 212 unsigned long mmu_seq; 213 unsigned long rcbits, irq_flags = 0; 214 215 if (kvm_is_radix(kvm)) 216 return H_FUNCTION; 217 psize = kvmppc_actual_pgsz(pteh, ptel); 218 if (!psize) 219 return H_PARAMETER; 220 writing = hpte_is_writable(ptel); 221 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); 222 ptel &= ~HPTE_GR_RESERVED; 223 g_ptel = ptel; 224 225 /* used later to detect if we might have been invalidated */ 226 mmu_seq = kvm->mmu_notifier_seq; 227 smp_rmb(); 228 229 /* Find the memslot (if any) for this address */ 230 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); 231 gfn = gpa >> PAGE_SHIFT; 232 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 233 pa = 0; 234 is_ci = false; 235 rmap = NULL; 236 if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { 237 /* Emulated MMIO - mark this with key=31 */ 238 pteh |= HPTE_V_ABSENT; 239 ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO; 240 goto do_insert; 241 } 242 243 /* Check if the requested page fits entirely in the memslot. */ 244 if (!slot_is_aligned(memslot, psize)) 245 return H_PARAMETER; 246 slot_fn = gfn - memslot->base_gfn; 247 rmap = &memslot->arch.rmap[slot_fn]; 248 249 /* Translate to host virtual address */ 250 hva = __gfn_to_hva_memslot(memslot, gfn); 251 /* 252 * If we had a page table table change after lookup, we would 253 * retry via mmu_notifier_retry. 254 */ 255 if (!realmode) 256 local_irq_save(irq_flags); 257 /* 258 * If called in real mode we have MSR_EE = 0. Otherwise 259 * we disable irq above. 260 */ 261 ptep = __find_linux_pte(pgdir, hva, NULL, &hpage_shift); 262 if (ptep) { 263 pte_t pte; 264 unsigned int host_pte_size; 265 266 if (hpage_shift) 267 host_pte_size = 1ul << hpage_shift; 268 else 269 host_pte_size = PAGE_SIZE; 270 /* 271 * We should always find the guest page size 272 * to <= host page size, if host is using hugepage 273 */ 274 if (host_pte_size < psize) { 275 if (!realmode) 276 local_irq_restore(flags); 277 return H_PARAMETER; 278 } 279 pte = kvmppc_read_update_linux_pte(ptep, writing); 280 if (pte_present(pte) && !pte_protnone(pte)) { 281 if (writing && !__pte_write(pte)) 282 /* make the actual HPTE be read-only */ 283 ptel = hpte_make_readonly(ptel); 284 is_ci = pte_ci(pte); 285 pa = pte_pfn(pte) << PAGE_SHIFT; 286 pa |= hva & (host_pte_size - 1); 287 pa |= gpa & ~PAGE_MASK; 288 } 289 } 290 if (!realmode) 291 local_irq_restore(irq_flags); 292 293 ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1); 294 ptel |= pa; 295 296 if (pa) 297 pteh |= HPTE_V_VALID; 298 else { 299 pteh |= HPTE_V_ABSENT; 300 ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); 301 } 302 303 /*If we had host pte mapping then Check WIMG */ 304 if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) { 305 if (is_ci) 306 return H_PARAMETER; 307 /* 308 * Allow guest to map emulated device memory as 309 * uncacheable, but actually make it cacheable. 310 */ 311 ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G); 312 ptel |= HPTE_R_M; 313 } 314 315 /* Find and lock the HPTEG slot to use */ 316 do_insert: 317 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 318 return H_PARAMETER; 319 if (likely((flags & H_EXACT) == 0)) { 320 pte_index &= ~7UL; 321 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 322 for (i = 0; i < 8; ++i) { 323 if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 && 324 try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | 325 HPTE_V_ABSENT)) 326 break; 327 hpte += 2; 328 } 329 if (i == 8) { 330 /* 331 * Since try_lock_hpte doesn't retry (not even stdcx. 332 * failures), it could be that there is a free slot 333 * but we transiently failed to lock it. Try again, 334 * actually locking each slot and checking it. 335 */ 336 hpte -= 16; 337 for (i = 0; i < 8; ++i) { 338 u64 pte; 339 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 340 cpu_relax(); 341 pte = be64_to_cpu(hpte[0]); 342 if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT))) 343 break; 344 __unlock_hpte(hpte, pte); 345 hpte += 2; 346 } 347 if (i == 8) 348 return H_PTEG_FULL; 349 } 350 pte_index += i; 351 } else { 352 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 353 if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | 354 HPTE_V_ABSENT)) { 355 /* Lock the slot and check again */ 356 u64 pte; 357 358 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 359 cpu_relax(); 360 pte = be64_to_cpu(hpte[0]); 361 if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) { 362 __unlock_hpte(hpte, pte); 363 return H_PTEG_FULL; 364 } 365 } 366 } 367 368 /* Save away the guest's idea of the second HPTE dword */ 369 rev = &kvm->arch.hpt.rev[pte_index]; 370 if (realmode) 371 rev = real_vmalloc_addr(rev); 372 if (rev) { 373 rev->guest_rpte = g_ptel; 374 note_hpte_modification(kvm, rev); 375 } 376 377 /* Link HPTE into reverse-map chain */ 378 if (pteh & HPTE_V_VALID) { 379 if (realmode) 380 rmap = real_vmalloc_addr(rmap); 381 lock_rmap(rmap); 382 /* Check for pending invalidations under the rmap chain lock */ 383 if (mmu_notifier_retry(kvm, mmu_seq)) { 384 /* inval in progress, write a non-present HPTE */ 385 pteh |= HPTE_V_ABSENT; 386 pteh &= ~HPTE_V_VALID; 387 ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); 388 unlock_rmap(rmap); 389 } else { 390 kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, 391 realmode); 392 /* Only set R/C in real HPTE if already set in *rmap */ 393 rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; 394 ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C); 395 } 396 } 397 398 /* Convert to new format on P9 */ 399 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 400 ptel = hpte_old_to_new_r(pteh, ptel); 401 pteh = hpte_old_to_new_v(pteh); 402 } 403 hpte[1] = cpu_to_be64(ptel); 404 405 /* Write the first HPTE dword, unlocking the HPTE and making it valid */ 406 eieio(); 407 __unlock_hpte(hpte, pteh); 408 asm volatile("ptesync" : : : "memory"); 409 410 *pte_idx_ret = pte_index; 411 return H_SUCCESS; 412 } 413 EXPORT_SYMBOL_GPL(kvmppc_do_h_enter); 414 415 long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 416 long pte_index, unsigned long pteh, unsigned long ptel) 417 { 418 return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel, 419 vcpu->arch.pgdir, true, 420 &vcpu->arch.regs.gpr[4]); 421 } 422 423 #ifdef __BIG_ENDIAN__ 424 #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) 425 #else 426 #define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index)) 427 #endif 428 429 static inline int is_mmio_hpte(unsigned long v, unsigned long r) 430 { 431 return ((v & HPTE_V_ABSENT) && 432 (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == 433 (HPTE_R_KEY_HI | HPTE_R_KEY_LO)); 434 } 435 436 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, 437 long npages, int global, bool need_sync) 438 { 439 long i; 440 441 /* 442 * We use the POWER9 5-operand versions of tlbie and tlbiel here. 443 * Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores 444 * the RS field, this is backwards-compatible with P7 and P8. 445 */ 446 if (global) { 447 if (need_sync) 448 asm volatile("ptesync" : : : "memory"); 449 for (i = 0; i < npages; ++i) { 450 asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : 451 "r" (rbvalues[i]), "r" (kvm->arch.lpid)); 452 } 453 454 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { 455 /* 456 * Need the extra ptesync to make sure we don't 457 * re-order the tlbie 458 */ 459 asm volatile("ptesync": : :"memory"); 460 asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : 461 "r" (rbvalues[0]), "r" (kvm->arch.lpid)); 462 } 463 464 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 465 } else { 466 if (need_sync) 467 asm volatile("ptesync" : : : "memory"); 468 for (i = 0; i < npages; ++i) { 469 asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : : 470 "r" (rbvalues[i]), "r" (0)); 471 } 472 asm volatile("ptesync" : : : "memory"); 473 } 474 } 475 476 long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, 477 unsigned long pte_index, unsigned long avpn, 478 unsigned long *hpret) 479 { 480 __be64 *hpte; 481 unsigned long v, r, rb; 482 struct revmap_entry *rev; 483 u64 pte, orig_pte, pte_r; 484 485 if (kvm_is_radix(kvm)) 486 return H_FUNCTION; 487 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 488 return H_PARAMETER; 489 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 490 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 491 cpu_relax(); 492 pte = orig_pte = be64_to_cpu(hpte[0]); 493 pte_r = be64_to_cpu(hpte[1]); 494 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 495 pte = hpte_new_to_old_v(pte, pte_r); 496 pte_r = hpte_new_to_old_r(pte_r); 497 } 498 if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || 499 ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) || 500 ((flags & H_ANDCOND) && (pte & avpn) != 0)) { 501 __unlock_hpte(hpte, orig_pte); 502 return H_NOT_FOUND; 503 } 504 505 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 506 v = pte & ~HPTE_V_HVLOCK; 507 if (v & HPTE_V_VALID) { 508 hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); 509 rb = compute_tlbie_rb(v, pte_r, pte_index); 510 do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); 511 /* 512 * The reference (R) and change (C) bits in a HPT 513 * entry can be set by hardware at any time up until 514 * the HPTE is invalidated and the TLB invalidation 515 * sequence has completed. This means that when 516 * removing a HPTE, we need to re-read the HPTE after 517 * the invalidation sequence has completed in order to 518 * obtain reliable values of R and C. 519 */ 520 remove_revmap_chain(kvm, pte_index, rev, v, 521 be64_to_cpu(hpte[1])); 522 } 523 r = rev->guest_rpte & ~HPTE_GR_RESERVED; 524 note_hpte_modification(kvm, rev); 525 unlock_hpte(hpte, 0); 526 527 if (is_mmio_hpte(v, pte_r)) 528 atomic64_inc(&kvm->arch.mmio_update); 529 530 if (v & HPTE_V_ABSENT) 531 v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID; 532 hpret[0] = v; 533 hpret[1] = r; 534 return H_SUCCESS; 535 } 536 EXPORT_SYMBOL_GPL(kvmppc_do_h_remove); 537 538 long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, 539 unsigned long pte_index, unsigned long avpn) 540 { 541 return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn, 542 &vcpu->arch.regs.gpr[4]); 543 } 544 545 long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) 546 { 547 struct kvm *kvm = vcpu->kvm; 548 unsigned long *args = &vcpu->arch.regs.gpr[4]; 549 __be64 *hp, *hptes[4]; 550 unsigned long tlbrb[4]; 551 long int i, j, k, n, found, indexes[4]; 552 unsigned long flags, req, pte_index, rcbits; 553 int global; 554 long int ret = H_SUCCESS; 555 struct revmap_entry *rev, *revs[4]; 556 u64 hp0, hp1; 557 558 if (kvm_is_radix(kvm)) 559 return H_FUNCTION; 560 global = global_invalidates(kvm); 561 for (i = 0; i < 4 && ret == H_SUCCESS; ) { 562 n = 0; 563 for (; i < 4; ++i) { 564 j = i * 2; 565 pte_index = args[j]; 566 flags = pte_index >> 56; 567 pte_index &= ((1ul << 56) - 1); 568 req = flags >> 6; 569 flags &= 3; 570 if (req == 3) { /* no more requests */ 571 i = 4; 572 break; 573 } 574 if (req != 1 || flags == 3 || 575 pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) { 576 /* parameter error */ 577 args[j] = ((0xa0 | flags) << 56) + pte_index; 578 ret = H_PARAMETER; 579 break; 580 } 581 hp = (__be64 *) (kvm->arch.hpt.virt + (pte_index << 4)); 582 /* to avoid deadlock, don't spin except for first */ 583 if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) { 584 if (n) 585 break; 586 while (!try_lock_hpte(hp, HPTE_V_HVLOCK)) 587 cpu_relax(); 588 } 589 found = 0; 590 hp0 = be64_to_cpu(hp[0]); 591 hp1 = be64_to_cpu(hp[1]); 592 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 593 hp0 = hpte_new_to_old_v(hp0, hp1); 594 hp1 = hpte_new_to_old_r(hp1); 595 } 596 if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) { 597 switch (flags & 3) { 598 case 0: /* absolute */ 599 found = 1; 600 break; 601 case 1: /* andcond */ 602 if (!(hp0 & args[j + 1])) 603 found = 1; 604 break; 605 case 2: /* AVPN */ 606 if ((hp0 & ~0x7fUL) == args[j + 1]) 607 found = 1; 608 break; 609 } 610 } 611 if (!found) { 612 hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); 613 args[j] = ((0x90 | flags) << 56) + pte_index; 614 continue; 615 } 616 617 args[j] = ((0x80 | flags) << 56) + pte_index; 618 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 619 note_hpte_modification(kvm, rev); 620 621 if (!(hp0 & HPTE_V_VALID)) { 622 /* insert R and C bits from PTE */ 623 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); 624 args[j] |= rcbits << (56 - 5); 625 hp[0] = 0; 626 if (is_mmio_hpte(hp0, hp1)) 627 atomic64_inc(&kvm->arch.mmio_update); 628 continue; 629 } 630 631 /* leave it locked */ 632 hp[0] &= ~cpu_to_be64(HPTE_V_VALID); 633 tlbrb[n] = compute_tlbie_rb(hp0, hp1, pte_index); 634 indexes[n] = j; 635 hptes[n] = hp; 636 revs[n] = rev; 637 ++n; 638 } 639 640 if (!n) 641 break; 642 643 /* Now that we've collected a batch, do the tlbies */ 644 do_tlbies(kvm, tlbrb, n, global, true); 645 646 /* Read PTE low words after tlbie to get final R/C values */ 647 for (k = 0; k < n; ++k) { 648 j = indexes[k]; 649 pte_index = args[j] & ((1ul << 56) - 1); 650 hp = hptes[k]; 651 rev = revs[k]; 652 remove_revmap_chain(kvm, pte_index, rev, 653 be64_to_cpu(hp[0]), be64_to_cpu(hp[1])); 654 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); 655 args[j] |= rcbits << (56 - 5); 656 __unlock_hpte(hp, 0); 657 } 658 } 659 660 return ret; 661 } 662 663 long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, 664 unsigned long pte_index, unsigned long avpn, 665 unsigned long va) 666 { 667 struct kvm *kvm = vcpu->kvm; 668 __be64 *hpte; 669 struct revmap_entry *rev; 670 unsigned long v, r, rb, mask, bits; 671 u64 pte_v, pte_r; 672 673 if (kvm_is_radix(kvm)) 674 return H_FUNCTION; 675 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 676 return H_PARAMETER; 677 678 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 679 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 680 cpu_relax(); 681 v = pte_v = be64_to_cpu(hpte[0]); 682 if (cpu_has_feature(CPU_FTR_ARCH_300)) 683 v = hpte_new_to_old_v(v, be64_to_cpu(hpte[1])); 684 if ((v & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || 685 ((flags & H_AVPN) && (v & ~0x7fUL) != avpn)) { 686 __unlock_hpte(hpte, pte_v); 687 return H_NOT_FOUND; 688 } 689 690 pte_r = be64_to_cpu(hpte[1]); 691 bits = (flags << 55) & HPTE_R_PP0; 692 bits |= (flags << 48) & HPTE_R_KEY_HI; 693 bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); 694 695 /* Update guest view of 2nd HPTE dword */ 696 mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | 697 HPTE_R_KEY_HI | HPTE_R_KEY_LO; 698 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 699 if (rev) { 700 r = (rev->guest_rpte & ~mask) | bits; 701 rev->guest_rpte = r; 702 note_hpte_modification(kvm, rev); 703 } 704 705 /* Update HPTE */ 706 if (v & HPTE_V_VALID) { 707 /* 708 * If the page is valid, don't let it transition from 709 * readonly to writable. If it should be writable, we'll 710 * take a trap and let the page fault code sort it out. 711 */ 712 r = (pte_r & ~mask) | bits; 713 if (hpte_is_writable(r) && !hpte_is_writable(pte_r)) 714 r = hpte_make_readonly(r); 715 /* If the PTE is changing, invalidate it first */ 716 if (r != pte_r) { 717 rb = compute_tlbie_rb(v, r, pte_index); 718 hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) | 719 HPTE_V_ABSENT); 720 do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); 721 /* Don't lose R/C bit updates done by hardware */ 722 r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C); 723 hpte[1] = cpu_to_be64(r); 724 } 725 } 726 unlock_hpte(hpte, pte_v & ~HPTE_V_HVLOCK); 727 asm volatile("ptesync" : : : "memory"); 728 if (is_mmio_hpte(v, pte_r)) 729 atomic64_inc(&kvm->arch.mmio_update); 730 731 return H_SUCCESS; 732 } 733 734 long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, 735 unsigned long pte_index) 736 { 737 struct kvm *kvm = vcpu->kvm; 738 __be64 *hpte; 739 unsigned long v, r; 740 int i, n = 1; 741 struct revmap_entry *rev = NULL; 742 743 if (kvm_is_radix(kvm)) 744 return H_FUNCTION; 745 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 746 return H_PARAMETER; 747 if (flags & H_READ_4) { 748 pte_index &= ~3; 749 n = 4; 750 } 751 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 752 for (i = 0; i < n; ++i, ++pte_index) { 753 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 754 v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; 755 r = be64_to_cpu(hpte[1]); 756 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 757 v = hpte_new_to_old_v(v, r); 758 r = hpte_new_to_old_r(r); 759 } 760 if (v & HPTE_V_ABSENT) { 761 v &= ~HPTE_V_ABSENT; 762 v |= HPTE_V_VALID; 763 } 764 if (v & HPTE_V_VALID) { 765 r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C)); 766 r &= ~HPTE_GR_RESERVED; 767 } 768 vcpu->arch.regs.gpr[4 + i * 2] = v; 769 vcpu->arch.regs.gpr[5 + i * 2] = r; 770 } 771 return H_SUCCESS; 772 } 773 774 long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, 775 unsigned long pte_index) 776 { 777 struct kvm *kvm = vcpu->kvm; 778 __be64 *hpte; 779 unsigned long v, r, gr; 780 struct revmap_entry *rev; 781 unsigned long *rmap; 782 long ret = H_NOT_FOUND; 783 784 if (kvm_is_radix(kvm)) 785 return H_FUNCTION; 786 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 787 return H_PARAMETER; 788 789 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 790 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 791 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 792 cpu_relax(); 793 v = be64_to_cpu(hpte[0]); 794 r = be64_to_cpu(hpte[1]); 795 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) 796 goto out; 797 798 gr = rev->guest_rpte; 799 if (rev->guest_rpte & HPTE_R_R) { 800 rev->guest_rpte &= ~HPTE_R_R; 801 note_hpte_modification(kvm, rev); 802 } 803 if (v & HPTE_V_VALID) { 804 gr |= r & (HPTE_R_R | HPTE_R_C); 805 if (r & HPTE_R_R) { 806 kvmppc_clear_ref_hpte(kvm, hpte, pte_index); 807 rmap = revmap_for_hpte(kvm, v, gr, NULL, NULL); 808 if (rmap) { 809 lock_rmap(rmap); 810 *rmap |= KVMPPC_RMAP_REFERENCED; 811 unlock_rmap(rmap); 812 } 813 } 814 } 815 vcpu->arch.regs.gpr[4] = gr; 816 ret = H_SUCCESS; 817 out: 818 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); 819 return ret; 820 } 821 822 long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, 823 unsigned long pte_index) 824 { 825 struct kvm *kvm = vcpu->kvm; 826 __be64 *hpte; 827 unsigned long v, r, gr; 828 struct revmap_entry *rev; 829 long ret = H_NOT_FOUND; 830 831 if (kvm_is_radix(kvm)) 832 return H_FUNCTION; 833 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 834 return H_PARAMETER; 835 836 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 837 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 838 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 839 cpu_relax(); 840 v = be64_to_cpu(hpte[0]); 841 r = be64_to_cpu(hpte[1]); 842 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) 843 goto out; 844 845 gr = rev->guest_rpte; 846 if (gr & HPTE_R_C) { 847 rev->guest_rpte &= ~HPTE_R_C; 848 note_hpte_modification(kvm, rev); 849 } 850 if (v & HPTE_V_VALID) { 851 /* need to make it temporarily absent so C is stable */ 852 hpte[0] |= cpu_to_be64(HPTE_V_ABSENT); 853 kvmppc_invalidate_hpte(kvm, hpte, pte_index); 854 r = be64_to_cpu(hpte[1]); 855 gr |= r & (HPTE_R_R | HPTE_R_C); 856 if (r & HPTE_R_C) { 857 hpte[1] = cpu_to_be64(r & ~HPTE_R_C); 858 eieio(); 859 kvmppc_set_dirty_from_hpte(kvm, v, gr); 860 } 861 } 862 vcpu->arch.regs.gpr[4] = gr; 863 ret = H_SUCCESS; 864 out: 865 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); 866 return ret; 867 } 868 869 static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long gpa, 870 int writing, unsigned long *hpa, 871 struct kvm_memory_slot **memslot_p) 872 { 873 struct kvm *kvm = vcpu->kvm; 874 struct kvm_memory_slot *memslot; 875 unsigned long gfn, hva, pa, psize = PAGE_SHIFT; 876 unsigned int shift; 877 pte_t *ptep, pte; 878 879 /* Find the memslot for this address */ 880 gfn = gpa >> PAGE_SHIFT; 881 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 882 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 883 return H_PARAMETER; 884 885 /* Translate to host virtual address */ 886 hva = __gfn_to_hva_memslot(memslot, gfn); 887 888 /* Try to find the host pte for that virtual address */ 889 ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); 890 if (!ptep) 891 return H_TOO_HARD; 892 pte = kvmppc_read_update_linux_pte(ptep, writing); 893 if (!pte_present(pte)) 894 return H_TOO_HARD; 895 896 /* Convert to a physical address */ 897 if (shift) 898 psize = 1UL << shift; 899 pa = pte_pfn(pte) << PAGE_SHIFT; 900 pa |= hva & (psize - 1); 901 pa |= gpa & ~PAGE_MASK; 902 903 if (hpa) 904 *hpa = pa; 905 if (memslot_p) 906 *memslot_p = memslot; 907 908 return H_SUCCESS; 909 } 910 911 static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, 912 unsigned long dest) 913 { 914 struct kvm_memory_slot *memslot; 915 struct kvm *kvm = vcpu->kvm; 916 unsigned long pa, mmu_seq; 917 long ret = H_SUCCESS; 918 int i; 919 920 /* Used later to detect if we might have been invalidated */ 921 mmu_seq = kvm->mmu_notifier_seq; 922 smp_rmb(); 923 924 ret = kvmppc_get_hpa(vcpu, dest, 1, &pa, &memslot); 925 if (ret != H_SUCCESS) 926 return ret; 927 928 /* Check if we've been invalidated */ 929 raw_spin_lock(&kvm->mmu_lock.rlock); 930 if (mmu_notifier_retry(kvm, mmu_seq)) { 931 ret = H_TOO_HARD; 932 goto out_unlock; 933 } 934 935 /* Zero the page */ 936 for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES) 937 dcbz((void *)pa); 938 kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE); 939 940 out_unlock: 941 raw_spin_unlock(&kvm->mmu_lock.rlock); 942 return ret; 943 } 944 945 static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, 946 unsigned long dest, unsigned long src) 947 { 948 unsigned long dest_pa, src_pa, mmu_seq; 949 struct kvm_memory_slot *dest_memslot; 950 struct kvm *kvm = vcpu->kvm; 951 long ret = H_SUCCESS; 952 953 /* Used later to detect if we might have been invalidated */ 954 mmu_seq = kvm->mmu_notifier_seq; 955 smp_rmb(); 956 957 ret = kvmppc_get_hpa(vcpu, dest, 1, &dest_pa, &dest_memslot); 958 if (ret != H_SUCCESS) 959 return ret; 960 ret = kvmppc_get_hpa(vcpu, src, 0, &src_pa, NULL); 961 if (ret != H_SUCCESS) 962 return ret; 963 964 /* Check if we've been invalidated */ 965 raw_spin_lock(&kvm->mmu_lock.rlock); 966 if (mmu_notifier_retry(kvm, mmu_seq)) { 967 ret = H_TOO_HARD; 968 goto out_unlock; 969 } 970 971 /* Copy the page */ 972 memcpy((void *)dest_pa, (void *)src_pa, SZ_4K); 973 974 kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE); 975 976 out_unlock: 977 raw_spin_unlock(&kvm->mmu_lock.rlock); 978 return ret; 979 } 980 981 long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags, 982 unsigned long dest, unsigned long src) 983 { 984 struct kvm *kvm = vcpu->kvm; 985 u64 pg_mask = SZ_4K - 1; /* 4K page size */ 986 long ret = H_SUCCESS; 987 988 /* Don't handle radix mode here, go up to the virtual mode handler */ 989 if (kvm_is_radix(kvm)) 990 return H_TOO_HARD; 991 992 /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */ 993 if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE | 994 H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED)) 995 return H_PARAMETER; 996 997 /* dest (and src if copy_page flag set) must be page aligned */ 998 if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask))) 999 return H_PARAMETER; 1000 1001 /* zero and/or copy the page as determined by the flags */ 1002 if (flags & H_COPY_PAGE) 1003 ret = kvmppc_do_h_page_init_copy(vcpu, dest, src); 1004 else if (flags & H_ZERO_PAGE) 1005 ret = kvmppc_do_h_page_init_zero(vcpu, dest); 1006 1007 /* We can ignore the other flags */ 1008 1009 return ret; 1010 } 1011 1012 void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, 1013 unsigned long pte_index) 1014 { 1015 unsigned long rb; 1016 u64 hp0, hp1; 1017 1018 hptep[0] &= ~cpu_to_be64(HPTE_V_VALID); 1019 hp0 = be64_to_cpu(hptep[0]); 1020 hp1 = be64_to_cpu(hptep[1]); 1021 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1022 hp0 = hpte_new_to_old_v(hp0, hp1); 1023 hp1 = hpte_new_to_old_r(hp1); 1024 } 1025 rb = compute_tlbie_rb(hp0, hp1, pte_index); 1026 do_tlbies(kvm, &rb, 1, 1, true); 1027 } 1028 EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); 1029 1030 void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, 1031 unsigned long pte_index) 1032 { 1033 unsigned long rb; 1034 unsigned char rbyte; 1035 u64 hp0, hp1; 1036 1037 hp0 = be64_to_cpu(hptep[0]); 1038 hp1 = be64_to_cpu(hptep[1]); 1039 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1040 hp0 = hpte_new_to_old_v(hp0, hp1); 1041 hp1 = hpte_new_to_old_r(hp1); 1042 } 1043 rb = compute_tlbie_rb(hp0, hp1, pte_index); 1044 rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8; 1045 /* modify only the second-last byte, which contains the ref bit */ 1046 *((char *)hptep + 14) = rbyte; 1047 do_tlbies(kvm, &rb, 1, 1, false); 1048 } 1049 EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte); 1050 1051 static int slb_base_page_shift[4] = { 1052 24, /* 16M */ 1053 16, /* 64k */ 1054 34, /* 16G */ 1055 20, /* 1M, unsupported */ 1056 }; 1057 1058 static struct mmio_hpte_cache_entry *mmio_cache_search(struct kvm_vcpu *vcpu, 1059 unsigned long eaddr, unsigned long slb_v, long mmio_update) 1060 { 1061 struct mmio_hpte_cache_entry *entry = NULL; 1062 unsigned int pshift; 1063 unsigned int i; 1064 1065 for (i = 0; i < MMIO_HPTE_CACHE_SIZE; i++) { 1066 entry = &vcpu->arch.mmio_cache.entry[i]; 1067 if (entry->mmio_update == mmio_update) { 1068 pshift = entry->slb_base_pshift; 1069 if ((entry->eaddr >> pshift) == (eaddr >> pshift) && 1070 entry->slb_v == slb_v) 1071 return entry; 1072 } 1073 } 1074 return NULL; 1075 } 1076 1077 static struct mmio_hpte_cache_entry * 1078 next_mmio_cache_entry(struct kvm_vcpu *vcpu) 1079 { 1080 unsigned int index = vcpu->arch.mmio_cache.index; 1081 1082 vcpu->arch.mmio_cache.index++; 1083 if (vcpu->arch.mmio_cache.index == MMIO_HPTE_CACHE_SIZE) 1084 vcpu->arch.mmio_cache.index = 0; 1085 1086 return &vcpu->arch.mmio_cache.entry[index]; 1087 } 1088 1089 /* When called from virtmode, this func should be protected by 1090 * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK 1091 * can trigger deadlock issue. 1092 */ 1093 long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, 1094 unsigned long valid) 1095 { 1096 unsigned int i; 1097 unsigned int pshift; 1098 unsigned long somask; 1099 unsigned long vsid, hash; 1100 unsigned long avpn; 1101 __be64 *hpte; 1102 unsigned long mask, val; 1103 unsigned long v, r, orig_v; 1104 1105 /* Get page shift, work out hash and AVPN etc. */ 1106 mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY; 1107 val = 0; 1108 pshift = 12; 1109 if (slb_v & SLB_VSID_L) { 1110 mask |= HPTE_V_LARGE; 1111 val |= HPTE_V_LARGE; 1112 pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4]; 1113 } 1114 if (slb_v & SLB_VSID_B_1T) { 1115 somask = (1UL << 40) - 1; 1116 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T; 1117 vsid ^= vsid << 25; 1118 } else { 1119 somask = (1UL << 28) - 1; 1120 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT; 1121 } 1122 hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvmppc_hpt_mask(&kvm->arch.hpt); 1123 avpn = slb_v & ~(somask >> 16); /* also includes B */ 1124 avpn |= (eaddr & somask) >> 16; 1125 1126 if (pshift >= 24) 1127 avpn &= ~((1UL << (pshift - 16)) - 1); 1128 else 1129 avpn &= ~0x7fUL; 1130 val |= avpn; 1131 1132 for (;;) { 1133 hpte = (__be64 *)(kvm->arch.hpt.virt + (hash << 7)); 1134 1135 for (i = 0; i < 16; i += 2) { 1136 /* Read the PTE racily */ 1137 v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; 1138 if (cpu_has_feature(CPU_FTR_ARCH_300)) 1139 v = hpte_new_to_old_v(v, be64_to_cpu(hpte[i+1])); 1140 1141 /* Check valid/absent, hash, segment size and AVPN */ 1142 if (!(v & valid) || (v & mask) != val) 1143 continue; 1144 1145 /* Lock the PTE and read it under the lock */ 1146 while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK)) 1147 cpu_relax(); 1148 v = orig_v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; 1149 r = be64_to_cpu(hpte[i+1]); 1150 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1151 v = hpte_new_to_old_v(v, r); 1152 r = hpte_new_to_old_r(r); 1153 } 1154 1155 /* 1156 * Check the HPTE again, including base page size 1157 */ 1158 if ((v & valid) && (v & mask) == val && 1159 kvmppc_hpte_base_page_shift(v, r) == pshift) 1160 /* Return with the HPTE still locked */ 1161 return (hash << 3) + (i >> 1); 1162 1163 __unlock_hpte(&hpte[i], orig_v); 1164 } 1165 1166 if (val & HPTE_V_SECONDARY) 1167 break; 1168 val |= HPTE_V_SECONDARY; 1169 hash = hash ^ kvmppc_hpt_mask(&kvm->arch.hpt); 1170 } 1171 return -1; 1172 } 1173 EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte); 1174 1175 /* 1176 * Called in real mode to check whether an HPTE not found fault 1177 * is due to accessing a paged-out page or an emulated MMIO page, 1178 * or if a protection fault is due to accessing a page that the 1179 * guest wanted read/write access to but which we made read-only. 1180 * Returns a possibly modified status (DSISR) value if not 1181 * (i.e. pass the interrupt to the guest), 1182 * -1 to pass the fault up to host kernel mode code, -2 to do that 1183 * and also load the instruction word (for MMIO emulation), 1184 * or 0 if we should make the guest retry the access. 1185 */ 1186 long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, 1187 unsigned long slb_v, unsigned int status, bool data) 1188 { 1189 struct kvm *kvm = vcpu->kvm; 1190 long int index; 1191 unsigned long v, r, gr, orig_v; 1192 __be64 *hpte; 1193 unsigned long valid; 1194 struct revmap_entry *rev; 1195 unsigned long pp, key; 1196 struct mmio_hpte_cache_entry *cache_entry = NULL; 1197 long mmio_update = 0; 1198 1199 /* For protection fault, expect to find a valid HPTE */ 1200 valid = HPTE_V_VALID; 1201 if (status & DSISR_NOHPTE) { 1202 valid |= HPTE_V_ABSENT; 1203 mmio_update = atomic64_read(&kvm->arch.mmio_update); 1204 cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update); 1205 } 1206 if (cache_entry) { 1207 index = cache_entry->pte_index; 1208 v = cache_entry->hpte_v; 1209 r = cache_entry->hpte_r; 1210 gr = cache_entry->rpte; 1211 } else { 1212 index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); 1213 if (index < 0) { 1214 if (status & DSISR_NOHPTE) 1215 return status; /* there really was no HPTE */ 1216 return 0; /* for prot fault, HPTE disappeared */ 1217 } 1218 hpte = (__be64 *)(kvm->arch.hpt.virt + (index << 4)); 1219 v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; 1220 r = be64_to_cpu(hpte[1]); 1221 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1222 v = hpte_new_to_old_v(v, r); 1223 r = hpte_new_to_old_r(r); 1224 } 1225 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[index]); 1226 gr = rev->guest_rpte; 1227 1228 unlock_hpte(hpte, orig_v); 1229 } 1230 1231 /* For not found, if the HPTE is valid by now, retry the instruction */ 1232 if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID)) 1233 return 0; 1234 1235 /* Check access permissions to the page */ 1236 pp = gr & (HPTE_R_PP0 | HPTE_R_PP); 1237 key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; 1238 status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */ 1239 if (!data) { 1240 if (gr & (HPTE_R_N | HPTE_R_G)) 1241 return status | SRR1_ISI_N_OR_G; 1242 if (!hpte_read_permission(pp, slb_v & key)) 1243 return status | SRR1_ISI_PROT; 1244 } else if (status & DSISR_ISSTORE) { 1245 /* check write permission */ 1246 if (!hpte_write_permission(pp, slb_v & key)) 1247 return status | DSISR_PROTFAULT; 1248 } else { 1249 if (!hpte_read_permission(pp, slb_v & key)) 1250 return status | DSISR_PROTFAULT; 1251 } 1252 1253 /* Check storage key, if applicable */ 1254 if (data && (vcpu->arch.shregs.msr & MSR_DR)) { 1255 unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr); 1256 if (status & DSISR_ISSTORE) 1257 perm >>= 1; 1258 if (perm & 1) 1259 return status | DSISR_KEYFAULT; 1260 } 1261 1262 /* Save HPTE info for virtual-mode handler */ 1263 vcpu->arch.pgfault_addr = addr; 1264 vcpu->arch.pgfault_index = index; 1265 vcpu->arch.pgfault_hpte[0] = v; 1266 vcpu->arch.pgfault_hpte[1] = r; 1267 vcpu->arch.pgfault_cache = cache_entry; 1268 1269 /* Check the storage key to see if it is possibly emulated MMIO */ 1270 if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == 1271 (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) { 1272 if (!cache_entry) { 1273 unsigned int pshift = 12; 1274 unsigned int pshift_index; 1275 1276 if (slb_v & SLB_VSID_L) { 1277 pshift_index = ((slb_v & SLB_VSID_LP) >> 4); 1278 pshift = slb_base_page_shift[pshift_index]; 1279 } 1280 cache_entry = next_mmio_cache_entry(vcpu); 1281 cache_entry->eaddr = addr; 1282 cache_entry->slb_base_pshift = pshift; 1283 cache_entry->pte_index = index; 1284 cache_entry->hpte_v = v; 1285 cache_entry->hpte_r = r; 1286 cache_entry->rpte = gr; 1287 cache_entry->slb_v = slb_v; 1288 cache_entry->mmio_update = mmio_update; 1289 } 1290 if (data && (vcpu->arch.shregs.msr & MSR_IR)) 1291 return -2; /* MMIO emulation - load instr word */ 1292 } 1293 1294 return -1; /* send fault up to host kernel mode */ 1295 } 1296