1 /* 2 * This program is free software; you can redistribute it and/or modify 3 * it under the terms of the GNU General Public License, version 2, as 4 * published by the Free Software Foundation. 5 * 6 * This program is distributed in the hope that it will be useful, 7 * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 * GNU General Public License for more details. 10 * 11 * You should have received a copy of the GNU General Public License 12 * along with this program; if not, write to the Free Software 13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 * 15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 16 * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> 17 * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com> 18 */ 19 20 #include <linux/types.h> 21 #include <linux/string.h> 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/highmem.h> 25 #include <linux/gfp.h> 26 #include <linux/slab.h> 27 #include <linux/sched/signal.h> 28 #include <linux/hugetlb.h> 29 #include <linux/list.h> 30 #include <linux/anon_inodes.h> 31 #include <linux/iommu.h> 32 #include <linux/file.h> 33 34 #include <asm/tlbflush.h> 35 #include <asm/kvm_ppc.h> 36 #include <asm/kvm_book3s.h> 37 #include <asm/book3s/64/mmu-hash.h> 38 #include <asm/hvcall.h> 39 #include <asm/synch.h> 40 #include <asm/ppc-opcode.h> 41 #include <asm/kvm_host.h> 42 #include <asm/udbg.h> 43 #include <asm/iommu.h> 44 #include <asm/tce.h> 45 #include <asm/mmu_context.h> 46 47 static unsigned long kvmppc_tce_pages(unsigned long iommu_pages) 48 { 49 return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; 50 } 51 52 static unsigned long kvmppc_stt_pages(unsigned long tce_pages) 53 { 54 unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) + 55 (tce_pages * sizeof(struct page *)); 56 57 return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE; 58 } 59 60 static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc) 61 { 62 long ret = 0; 63 64 if (!current || !current->mm) 65 return ret; /* process exited */ 66 67 down_write(¤t->mm->mmap_sem); 68 69 if (inc) { 70 unsigned long locked, lock_limit; 71 72 locked = current->mm->locked_vm + stt_pages; 73 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 74 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) 75 ret = -ENOMEM; 76 else 77 current->mm->locked_vm += stt_pages; 78 } else { 79 if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm)) 80 stt_pages = current->mm->locked_vm; 81 82 current->mm->locked_vm -= stt_pages; 83 } 84 85 pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid, 86 inc ? '+' : '-', 87 stt_pages << PAGE_SHIFT, 88 current->mm->locked_vm << PAGE_SHIFT, 89 rlimit(RLIMIT_MEMLOCK), 90 ret ? " - exceeded" : ""); 91 92 up_write(¤t->mm->mmap_sem); 93 94 return ret; 95 } 96 97 static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head) 98 { 99 struct kvmppc_spapr_tce_iommu_table *stit = container_of(head, 100 struct kvmppc_spapr_tce_iommu_table, rcu); 101 102 iommu_tce_table_put(stit->tbl); 103 104 kfree(stit); 105 } 106 107 static void kvm_spapr_tce_liobn_put(struct kref *kref) 108 { 109 struct kvmppc_spapr_tce_iommu_table *stit = container_of(kref, 110 struct kvmppc_spapr_tce_iommu_table, kref); 111 112 list_del_rcu(&stit->next); 113 114 call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free); 115 } 116 117 extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, 118 struct iommu_group *grp) 119 { 120 int i; 121 struct kvmppc_spapr_tce_table *stt; 122 struct kvmppc_spapr_tce_iommu_table *stit, *tmp; 123 struct iommu_table_group *table_group = NULL; 124 125 list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { 126 127 table_group = iommu_group_get_iommudata(grp); 128 if (WARN_ON(!table_group)) 129 continue; 130 131 list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { 132 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 133 if (table_group->tables[i] != stit->tbl) 134 continue; 135 136 kref_put(&stit->kref, kvm_spapr_tce_liobn_put); 137 return; 138 } 139 } 140 } 141 } 142 143 extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, 144 struct iommu_group *grp) 145 { 146 struct kvmppc_spapr_tce_table *stt = NULL; 147 bool found = false; 148 struct iommu_table *tbl = NULL; 149 struct iommu_table_group *table_group; 150 long i; 151 struct kvmppc_spapr_tce_iommu_table *stit; 152 struct fd f; 153 154 f = fdget(tablefd); 155 if (!f.file) 156 return -EBADF; 157 158 list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { 159 if (stt == f.file->private_data) { 160 found = true; 161 break; 162 } 163 } 164 165 fdput(f); 166 167 if (!found) 168 return -EINVAL; 169 170 table_group = iommu_group_get_iommudata(grp); 171 if (WARN_ON(!table_group)) 172 return -EFAULT; 173 174 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 175 struct iommu_table *tbltmp = table_group->tables[i]; 176 177 if (!tbltmp) 178 continue; 179 /* 180 * Make sure hardware table parameters are exactly the same; 181 * this is used in the TCE handlers where boundary checks 182 * use only the first attached table. 183 */ 184 if ((tbltmp->it_page_shift == stt->page_shift) && 185 (tbltmp->it_offset == stt->offset) && 186 (tbltmp->it_size == stt->size)) { 187 /* 188 * Reference the table to avoid races with 189 * add/remove DMA windows. 190 */ 191 tbl = iommu_tce_table_get(tbltmp); 192 break; 193 } 194 } 195 if (!tbl) 196 return -EINVAL; 197 198 list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { 199 if (tbl != stit->tbl) 200 continue; 201 202 if (!kref_get_unless_zero(&stit->kref)) { 203 /* stit is being destroyed */ 204 iommu_tce_table_put(tbl); 205 return -ENOTTY; 206 } 207 /* 208 * The table is already known to this KVM, we just increased 209 * its KVM reference counter and can return. 210 */ 211 return 0; 212 } 213 214 stit = kzalloc(sizeof(*stit), GFP_KERNEL); 215 if (!stit) { 216 iommu_tce_table_put(tbl); 217 return -ENOMEM; 218 } 219 220 stit->tbl = tbl; 221 kref_init(&stit->kref); 222 223 list_add_rcu(&stit->next, &stt->iommu_tables); 224 225 return 0; 226 } 227 228 static void release_spapr_tce_table(struct rcu_head *head) 229 { 230 struct kvmppc_spapr_tce_table *stt = container_of(head, 231 struct kvmppc_spapr_tce_table, rcu); 232 unsigned long i, npages = kvmppc_tce_pages(stt->size); 233 234 for (i = 0; i < npages; i++) 235 __free_page(stt->pages[i]); 236 237 kfree(stt); 238 } 239 240 static int kvm_spapr_tce_fault(struct vm_fault *vmf) 241 { 242 struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data; 243 struct page *page; 244 245 if (vmf->pgoff >= kvmppc_tce_pages(stt->size)) 246 return VM_FAULT_SIGBUS; 247 248 page = stt->pages[vmf->pgoff]; 249 get_page(page); 250 vmf->page = page; 251 return 0; 252 } 253 254 static const struct vm_operations_struct kvm_spapr_tce_vm_ops = { 255 .fault = kvm_spapr_tce_fault, 256 }; 257 258 static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) 259 { 260 vma->vm_ops = &kvm_spapr_tce_vm_ops; 261 return 0; 262 } 263 264 static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) 265 { 266 struct kvmppc_spapr_tce_table *stt = filp->private_data; 267 struct kvmppc_spapr_tce_iommu_table *stit, *tmp; 268 struct kvm *kvm = stt->kvm; 269 270 mutex_lock(&kvm->lock); 271 list_del_rcu(&stt->list); 272 mutex_unlock(&kvm->lock); 273 274 list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { 275 WARN_ON(!kref_read(&stit->kref)); 276 while (1) { 277 if (kref_put(&stit->kref, kvm_spapr_tce_liobn_put)) 278 break; 279 } 280 } 281 282 kvm_put_kvm(stt->kvm); 283 284 kvmppc_account_memlimit( 285 kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false); 286 call_rcu(&stt->rcu, release_spapr_tce_table); 287 288 return 0; 289 } 290 291 static const struct file_operations kvm_spapr_tce_fops = { 292 .mmap = kvm_spapr_tce_mmap, 293 .release = kvm_spapr_tce_release, 294 }; 295 296 long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, 297 struct kvm_create_spapr_tce_64 *args) 298 { 299 struct kvmppc_spapr_tce_table *stt = NULL; 300 struct kvmppc_spapr_tce_table *siter; 301 unsigned long npages, size; 302 int ret = -ENOMEM; 303 int i; 304 305 if (!args->size) 306 return -EINVAL; 307 308 size = _ALIGN_UP(args->size, PAGE_SIZE >> 3); 309 npages = kvmppc_tce_pages(size); 310 ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); 311 if (ret) 312 return ret; 313 314 ret = -ENOMEM; 315 stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), 316 GFP_KERNEL); 317 if (!stt) 318 goto fail_acct; 319 320 stt->liobn = args->liobn; 321 stt->page_shift = args->page_shift; 322 stt->offset = args->offset; 323 stt->size = size; 324 stt->kvm = kvm; 325 INIT_LIST_HEAD_RCU(&stt->iommu_tables); 326 327 for (i = 0; i < npages; i++) { 328 stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); 329 if (!stt->pages[i]) 330 goto fail; 331 } 332 333 mutex_lock(&kvm->lock); 334 335 /* Check this LIOBN hasn't been previously allocated */ 336 ret = 0; 337 list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) { 338 if (siter->liobn == args->liobn) { 339 ret = -EBUSY; 340 break; 341 } 342 } 343 344 if (!ret) 345 ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, 346 stt, O_RDWR | O_CLOEXEC); 347 348 if (ret >= 0) { 349 list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); 350 kvm_get_kvm(kvm); 351 } 352 353 mutex_unlock(&kvm->lock); 354 355 if (ret >= 0) 356 return ret; 357 358 fail: 359 for (i = 0; i < npages; i++) 360 if (stt->pages[i]) 361 __free_page(stt->pages[i]); 362 363 kfree(stt); 364 fail_acct: 365 kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); 366 return ret; 367 } 368 369 static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry) 370 { 371 unsigned long hpa = 0; 372 enum dma_data_direction dir = DMA_NONE; 373 374 iommu_tce_xchg(tbl, entry, &hpa, &dir); 375 } 376 377 static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm, 378 struct iommu_table *tbl, unsigned long entry) 379 { 380 struct mm_iommu_table_group_mem_t *mem = NULL; 381 const unsigned long pgsize = 1ULL << tbl->it_page_shift; 382 unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); 383 384 if (!pua) 385 /* it_userspace allocation might be delayed */ 386 return H_TOO_HARD; 387 388 mem = mm_iommu_lookup(kvm->mm, *pua, pgsize); 389 if (!mem) 390 return H_TOO_HARD; 391 392 mm_iommu_mapped_dec(mem); 393 394 *pua = 0; 395 396 return H_SUCCESS; 397 } 398 399 static long kvmppc_tce_iommu_unmap(struct kvm *kvm, 400 struct iommu_table *tbl, unsigned long entry) 401 { 402 enum dma_data_direction dir = DMA_NONE; 403 unsigned long hpa = 0; 404 long ret; 405 406 if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir))) 407 return H_HARDWARE; 408 409 if (dir == DMA_NONE) 410 return H_SUCCESS; 411 412 ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); 413 if (ret != H_SUCCESS) 414 iommu_tce_xchg(tbl, entry, &hpa, &dir); 415 416 return ret; 417 } 418 419 long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, 420 unsigned long entry, unsigned long ua, 421 enum dma_data_direction dir) 422 { 423 long ret; 424 unsigned long hpa, *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); 425 struct mm_iommu_table_group_mem_t *mem; 426 427 if (!pua) 428 /* it_userspace allocation might be delayed */ 429 return H_TOO_HARD; 430 431 mem = mm_iommu_lookup(kvm->mm, ua, 1ULL << tbl->it_page_shift); 432 if (!mem) 433 /* This only handles v2 IOMMU type, v1 is handled via ioctl() */ 434 return H_TOO_HARD; 435 436 if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa))) 437 return H_HARDWARE; 438 439 if (mm_iommu_mapped_inc(mem)) 440 return H_CLOSED; 441 442 ret = iommu_tce_xchg(tbl, entry, &hpa, &dir); 443 if (WARN_ON_ONCE(ret)) { 444 mm_iommu_mapped_dec(mem); 445 return H_HARDWARE; 446 } 447 448 if (dir != DMA_NONE) 449 kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); 450 451 *pua = ua; 452 453 return 0; 454 } 455 456 long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 457 unsigned long ioba, unsigned long tce) 458 { 459 struct kvmppc_spapr_tce_table *stt; 460 long ret, idx; 461 struct kvmppc_spapr_tce_iommu_table *stit; 462 unsigned long entry, ua = 0; 463 enum dma_data_direction dir; 464 465 /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ 466 /* liobn, ioba, tce); */ 467 468 stt = kvmppc_find_table(vcpu->kvm, liobn); 469 if (!stt) 470 return H_TOO_HARD; 471 472 ret = kvmppc_ioba_validate(stt, ioba, 1); 473 if (ret != H_SUCCESS) 474 return ret; 475 476 ret = kvmppc_tce_validate(stt, tce); 477 if (ret != H_SUCCESS) 478 return ret; 479 480 dir = iommu_tce_direction(tce); 481 482 idx = srcu_read_lock(&vcpu->kvm->srcu); 483 484 if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, 485 tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) { 486 ret = H_PARAMETER; 487 goto unlock_exit; 488 } 489 490 entry = ioba >> stt->page_shift; 491 492 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { 493 if (dir == DMA_NONE) 494 ret = kvmppc_tce_iommu_unmap(vcpu->kvm, 495 stit->tbl, entry); 496 else 497 ret = kvmppc_tce_iommu_map(vcpu->kvm, stit->tbl, 498 entry, ua, dir); 499 500 if (ret == H_SUCCESS) 501 continue; 502 503 if (ret == H_TOO_HARD) 504 goto unlock_exit; 505 506 WARN_ON_ONCE(1); 507 kvmppc_clear_tce(stit->tbl, entry); 508 } 509 510 kvmppc_tce_put(stt, entry, tce); 511 512 unlock_exit: 513 srcu_read_unlock(&vcpu->kvm->srcu, idx); 514 515 return ret; 516 } 517 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce); 518 519 long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, 520 unsigned long liobn, unsigned long ioba, 521 unsigned long tce_list, unsigned long npages) 522 { 523 struct kvmppc_spapr_tce_table *stt; 524 long i, ret = H_SUCCESS, idx; 525 unsigned long entry, ua = 0; 526 u64 __user *tces; 527 u64 tce; 528 struct kvmppc_spapr_tce_iommu_table *stit; 529 530 stt = kvmppc_find_table(vcpu->kvm, liobn); 531 if (!stt) 532 return H_TOO_HARD; 533 534 entry = ioba >> stt->page_shift; 535 /* 536 * SPAPR spec says that the maximum size of the list is 512 TCEs 537 * so the whole table fits in 4K page 538 */ 539 if (npages > 512) 540 return H_PARAMETER; 541 542 if (tce_list & (SZ_4K - 1)) 543 return H_PARAMETER; 544 545 ret = kvmppc_ioba_validate(stt, ioba, npages); 546 if (ret != H_SUCCESS) 547 return ret; 548 549 idx = srcu_read_lock(&vcpu->kvm->srcu); 550 if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) { 551 ret = H_TOO_HARD; 552 goto unlock_exit; 553 } 554 tces = (u64 __user *) ua; 555 556 for (i = 0; i < npages; ++i) { 557 if (get_user(tce, tces + i)) { 558 ret = H_TOO_HARD; 559 goto unlock_exit; 560 } 561 tce = be64_to_cpu(tce); 562 563 ret = kvmppc_tce_validate(stt, tce); 564 if (ret != H_SUCCESS) 565 goto unlock_exit; 566 567 if (kvmppc_gpa_to_ua(vcpu->kvm, 568 tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), 569 &ua, NULL)) 570 return H_PARAMETER; 571 572 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { 573 ret = kvmppc_tce_iommu_map(vcpu->kvm, 574 stit->tbl, entry + i, ua, 575 iommu_tce_direction(tce)); 576 577 if (ret == H_SUCCESS) 578 continue; 579 580 if (ret == H_TOO_HARD) 581 goto unlock_exit; 582 583 WARN_ON_ONCE(1); 584 kvmppc_clear_tce(stit->tbl, entry); 585 } 586 587 kvmppc_tce_put(stt, entry + i, tce); 588 } 589 590 unlock_exit: 591 srcu_read_unlock(&vcpu->kvm->srcu, idx); 592 593 return ret; 594 } 595 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect); 596 597 long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, 598 unsigned long liobn, unsigned long ioba, 599 unsigned long tce_value, unsigned long npages) 600 { 601 struct kvmppc_spapr_tce_table *stt; 602 long i, ret; 603 struct kvmppc_spapr_tce_iommu_table *stit; 604 605 stt = kvmppc_find_table(vcpu->kvm, liobn); 606 if (!stt) 607 return H_TOO_HARD; 608 609 ret = kvmppc_ioba_validate(stt, ioba, npages); 610 if (ret != H_SUCCESS) 611 return ret; 612 613 /* Check permission bits only to allow userspace poison TCE for debug */ 614 if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) 615 return H_PARAMETER; 616 617 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { 618 unsigned long entry = ioba >> stit->tbl->it_page_shift; 619 620 for (i = 0; i < npages; ++i) { 621 ret = kvmppc_tce_iommu_unmap(vcpu->kvm, 622 stit->tbl, entry + i); 623 624 if (ret == H_SUCCESS) 625 continue; 626 627 if (ret == H_TOO_HARD) 628 return ret; 629 630 WARN_ON_ONCE(1); 631 kvmppc_clear_tce(stit->tbl, entry); 632 } 633 } 634 635 for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) 636 kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); 637 638 return H_SUCCESS; 639 } 640 EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce); 641