1 /* 2 * This program is free software; you can redistribute it and/or modify 3 * it under the terms of the GNU General Public License, version 2, as 4 * published by the Free Software Foundation. 5 * 6 * This program is distributed in the hope that it will be useful, 7 * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 * GNU General Public License for more details. 10 * 11 * You should have received a copy of the GNU General Public License 12 * along with this program; if not, write to the Free Software 13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 * 15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 16 * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> 17 * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com> 18 */ 19 20 #include <linux/types.h> 21 #include <linux/string.h> 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/highmem.h> 25 #include <linux/gfp.h> 26 #include <linux/slab.h> 27 #include <linux/sched/signal.h> 28 #include <linux/hugetlb.h> 29 #include <linux/list.h> 30 #include <linux/anon_inodes.h> 31 #include <linux/iommu.h> 32 #include <linux/file.h> 33 34 #include <asm/tlbflush.h> 35 #include <asm/kvm_ppc.h> 36 #include <asm/kvm_book3s.h> 37 #include <asm/book3s/64/mmu-hash.h> 38 #include <asm/hvcall.h> 39 #include <asm/synch.h> 40 #include <asm/ppc-opcode.h> 41 #include <asm/kvm_host.h> 42 #include <asm/udbg.h> 43 #include <asm/iommu.h> 44 #include <asm/tce.h> 45 #include <asm/mmu_context.h> 46 47 static unsigned long kvmppc_tce_pages(unsigned long iommu_pages) 48 { 49 return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; 50 } 51 52 static unsigned long kvmppc_stt_pages(unsigned long tce_pages) 53 { 54 unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) + 55 (tce_pages * sizeof(struct page *)); 56 57 return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE; 58 } 59 60 static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc) 61 { 62 long ret = 0; 63 64 if (!current || !current->mm) 65 return ret; /* process exited */ 66 67 down_write(¤t->mm->mmap_sem); 68 69 if (inc) { 70 unsigned long locked, lock_limit; 71 72 locked = current->mm->locked_vm + stt_pages; 73 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 74 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) 75 ret = -ENOMEM; 76 else 77 current->mm->locked_vm += stt_pages; 78 } else { 79 if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm)) 80 stt_pages = current->mm->locked_vm; 81 82 current->mm->locked_vm -= stt_pages; 83 } 84 85 pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid, 86 inc ? '+' : '-', 87 stt_pages << PAGE_SHIFT, 88 current->mm->locked_vm << PAGE_SHIFT, 89 rlimit(RLIMIT_MEMLOCK), 90 ret ? " - exceeded" : ""); 91 92 up_write(¤t->mm->mmap_sem); 93 94 return ret; 95 } 96 97 static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head) 98 { 99 struct kvmppc_spapr_tce_iommu_table *stit = container_of(head, 100 struct kvmppc_spapr_tce_iommu_table, rcu); 101 102 iommu_tce_table_put(stit->tbl); 103 104 kfree(stit); 105 } 106 107 static void kvm_spapr_tce_liobn_put(struct kref *kref) 108 { 109 struct kvmppc_spapr_tce_iommu_table *stit = container_of(kref, 110 struct kvmppc_spapr_tce_iommu_table, kref); 111 112 list_del_rcu(&stit->next); 113 114 call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free); 115 } 116 117 extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, 118 struct iommu_group *grp) 119 { 120 int i; 121 struct kvmppc_spapr_tce_table *stt; 122 struct kvmppc_spapr_tce_iommu_table *stit, *tmp; 123 struct iommu_table_group *table_group = NULL; 124 125 list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { 126 127 table_group = iommu_group_get_iommudata(grp); 128 if (WARN_ON(!table_group)) 129 continue; 130 131 list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { 132 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 133 if (table_group->tables[i] != stit->tbl) 134 continue; 135 136 kref_put(&stit->kref, kvm_spapr_tce_liobn_put); 137 return; 138 } 139 } 140 } 141 } 142 143 extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, 144 struct iommu_group *grp) 145 { 146 struct kvmppc_spapr_tce_table *stt = NULL; 147 bool found = false; 148 struct iommu_table *tbl = NULL; 149 struct iommu_table_group *table_group; 150 long i; 151 struct kvmppc_spapr_tce_iommu_table *stit; 152 struct fd f; 153 154 f = fdget(tablefd); 155 if (!f.file) 156 return -EBADF; 157 158 list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { 159 if (stt == f.file->private_data) { 160 found = true; 161 break; 162 } 163 } 164 165 fdput(f); 166 167 if (!found) 168 return -EINVAL; 169 170 table_group = iommu_group_get_iommudata(grp); 171 if (WARN_ON(!table_group)) 172 return -EFAULT; 173 174 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { 175 struct iommu_table *tbltmp = table_group->tables[i]; 176 177 if (!tbltmp) 178 continue; 179 /* 180 * Make sure hardware table parameters are exactly the same; 181 * this is used in the TCE handlers where boundary checks 182 * use only the first attached table. 183 */ 184 if ((tbltmp->it_page_shift == stt->page_shift) && 185 (tbltmp->it_offset == stt->offset) && 186 (tbltmp->it_size == stt->size)) { 187 /* 188 * Reference the table to avoid races with 189 * add/remove DMA windows. 190 */ 191 tbl = iommu_tce_table_get(tbltmp); 192 break; 193 } 194 } 195 if (!tbl) 196 return -EINVAL; 197 198 list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { 199 if (tbl != stit->tbl) 200 continue; 201 202 if (!kref_get_unless_zero(&stit->kref)) { 203 /* stit is being destroyed */ 204 iommu_tce_table_put(tbl); 205 return -ENOTTY; 206 } 207 /* 208 * The table is already known to this KVM, we just increased 209 * its KVM reference counter and can return. 210 */ 211 return 0; 212 } 213 214 stit = kzalloc(sizeof(*stit), GFP_KERNEL); 215 if (!stit) { 216 iommu_tce_table_put(tbl); 217 return -ENOMEM; 218 } 219 220 stit->tbl = tbl; 221 kref_init(&stit->kref); 222 223 list_add_rcu(&stit->next, &stt->iommu_tables); 224 225 return 0; 226 } 227 228 static void release_spapr_tce_table(struct rcu_head *head) 229 { 230 struct kvmppc_spapr_tce_table *stt = container_of(head, 231 struct kvmppc_spapr_tce_table, rcu); 232 unsigned long i, npages = kvmppc_tce_pages(stt->size); 233 234 for (i = 0; i < npages; i++) 235 __free_page(stt->pages[i]); 236 237 kfree(stt); 238 } 239 240 static int kvm_spapr_tce_fault(struct vm_fault *vmf) 241 { 242 struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data; 243 struct page *page; 244 245 if (vmf->pgoff >= kvmppc_tce_pages(stt->size)) 246 return VM_FAULT_SIGBUS; 247 248 page = stt->pages[vmf->pgoff]; 249 get_page(page); 250 vmf->page = page; 251 return 0; 252 } 253 254 static const struct vm_operations_struct kvm_spapr_tce_vm_ops = { 255 .fault = kvm_spapr_tce_fault, 256 }; 257 258 static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) 259 { 260 vma->vm_ops = &kvm_spapr_tce_vm_ops; 261 return 0; 262 } 263 264 static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) 265 { 266 struct kvmppc_spapr_tce_table *stt = filp->private_data; 267 struct kvmppc_spapr_tce_iommu_table *stit, *tmp; 268 269 list_del_rcu(&stt->list); 270 271 list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { 272 WARN_ON(!kref_read(&stit->kref)); 273 while (1) { 274 if (kref_put(&stit->kref, kvm_spapr_tce_liobn_put)) 275 break; 276 } 277 } 278 279 kvm_put_kvm(stt->kvm); 280 281 kvmppc_account_memlimit( 282 kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false); 283 call_rcu(&stt->rcu, release_spapr_tce_table); 284 285 return 0; 286 } 287 288 static const struct file_operations kvm_spapr_tce_fops = { 289 .mmap = kvm_spapr_tce_mmap, 290 .release = kvm_spapr_tce_release, 291 }; 292 293 long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, 294 struct kvm_create_spapr_tce_64 *args) 295 { 296 struct kvmppc_spapr_tce_table *stt = NULL; 297 unsigned long npages, size; 298 int ret = -ENOMEM; 299 int i; 300 301 if (!args->size) 302 return -EINVAL; 303 304 /* Check this LIOBN hasn't been previously allocated */ 305 list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { 306 if (stt->liobn == args->liobn) 307 return -EBUSY; 308 } 309 310 size = _ALIGN_UP(args->size, PAGE_SIZE >> 3); 311 npages = kvmppc_tce_pages(size); 312 ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); 313 if (ret) { 314 stt = NULL; 315 goto fail; 316 } 317 318 ret = -ENOMEM; 319 stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), 320 GFP_KERNEL); 321 if (!stt) 322 goto fail; 323 324 stt->liobn = args->liobn; 325 stt->page_shift = args->page_shift; 326 stt->offset = args->offset; 327 stt->size = size; 328 stt->kvm = kvm; 329 INIT_LIST_HEAD_RCU(&stt->iommu_tables); 330 331 for (i = 0; i < npages; i++) { 332 stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); 333 if (!stt->pages[i]) 334 goto fail; 335 } 336 337 kvm_get_kvm(kvm); 338 339 mutex_lock(&kvm->lock); 340 list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); 341 342 mutex_unlock(&kvm->lock); 343 344 return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, 345 stt, O_RDWR | O_CLOEXEC); 346 347 fail: 348 if (stt) { 349 for (i = 0; i < npages; i++) 350 if (stt->pages[i]) 351 __free_page(stt->pages[i]); 352 353 kfree(stt); 354 } 355 return ret; 356 } 357 358 static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry) 359 { 360 unsigned long hpa = 0; 361 enum dma_data_direction dir = DMA_NONE; 362 363 iommu_tce_xchg(tbl, entry, &hpa, &dir); 364 } 365 366 static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm, 367 struct iommu_table *tbl, unsigned long entry) 368 { 369 struct mm_iommu_table_group_mem_t *mem = NULL; 370 const unsigned long pgsize = 1ULL << tbl->it_page_shift; 371 unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); 372 373 if (!pua) 374 /* it_userspace allocation might be delayed */ 375 return H_TOO_HARD; 376 377 mem = mm_iommu_lookup(kvm->mm, *pua, pgsize); 378 if (!mem) 379 return H_TOO_HARD; 380 381 mm_iommu_mapped_dec(mem); 382 383 *pua = 0; 384 385 return H_SUCCESS; 386 } 387 388 static long kvmppc_tce_iommu_unmap(struct kvm *kvm, 389 struct iommu_table *tbl, unsigned long entry) 390 { 391 enum dma_data_direction dir = DMA_NONE; 392 unsigned long hpa = 0; 393 long ret; 394 395 if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir))) 396 return H_HARDWARE; 397 398 if (dir == DMA_NONE) 399 return H_SUCCESS; 400 401 ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); 402 if (ret != H_SUCCESS) 403 iommu_tce_xchg(tbl, entry, &hpa, &dir); 404 405 return ret; 406 } 407 408 long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, 409 unsigned long entry, unsigned long ua, 410 enum dma_data_direction dir) 411 { 412 long ret; 413 unsigned long hpa, *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); 414 struct mm_iommu_table_group_mem_t *mem; 415 416 if (!pua) 417 /* it_userspace allocation might be delayed */ 418 return H_TOO_HARD; 419 420 mem = mm_iommu_lookup(kvm->mm, ua, 1ULL << tbl->it_page_shift); 421 if (!mem) 422 /* This only handles v2 IOMMU type, v1 is handled via ioctl() */ 423 return H_TOO_HARD; 424 425 if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa))) 426 return H_HARDWARE; 427 428 if (mm_iommu_mapped_inc(mem)) 429 return H_CLOSED; 430 431 ret = iommu_tce_xchg(tbl, entry, &hpa, &dir); 432 if (WARN_ON_ONCE(ret)) { 433 mm_iommu_mapped_dec(mem); 434 return H_HARDWARE; 435 } 436 437 if (dir != DMA_NONE) 438 kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); 439 440 *pua = ua; 441 442 return 0; 443 } 444 445 long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 446 unsigned long ioba, unsigned long tce) 447 { 448 struct kvmppc_spapr_tce_table *stt; 449 long ret, idx; 450 struct kvmppc_spapr_tce_iommu_table *stit; 451 unsigned long entry, ua = 0; 452 enum dma_data_direction dir; 453 454 /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ 455 /* liobn, ioba, tce); */ 456 457 stt = kvmppc_find_table(vcpu->kvm, liobn); 458 if (!stt) 459 return H_TOO_HARD; 460 461 ret = kvmppc_ioba_validate(stt, ioba, 1); 462 if (ret != H_SUCCESS) 463 return ret; 464 465 ret = kvmppc_tce_validate(stt, tce); 466 if (ret != H_SUCCESS) 467 return ret; 468 469 dir = iommu_tce_direction(tce); 470 if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, 471 tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) 472 return H_PARAMETER; 473 474 entry = ioba >> stt->page_shift; 475 476 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { 477 if (dir == DMA_NONE) { 478 ret = kvmppc_tce_iommu_unmap(vcpu->kvm, 479 stit->tbl, entry); 480 } else { 481 idx = srcu_read_lock(&vcpu->kvm->srcu); 482 ret = kvmppc_tce_iommu_map(vcpu->kvm, stit->tbl, 483 entry, ua, dir); 484 srcu_read_unlock(&vcpu->kvm->srcu, idx); 485 } 486 487 if (ret == H_SUCCESS) 488 continue; 489 490 if (ret == H_TOO_HARD) 491 return ret; 492 493 WARN_ON_ONCE(1); 494 kvmppc_clear_tce(stit->tbl, entry); 495 } 496 497 kvmppc_tce_put(stt, entry, tce); 498 499 return H_SUCCESS; 500 } 501 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce); 502 503 long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, 504 unsigned long liobn, unsigned long ioba, 505 unsigned long tce_list, unsigned long npages) 506 { 507 struct kvmppc_spapr_tce_table *stt; 508 long i, ret = H_SUCCESS, idx; 509 unsigned long entry, ua = 0; 510 u64 __user *tces; 511 u64 tce; 512 struct kvmppc_spapr_tce_iommu_table *stit; 513 514 stt = kvmppc_find_table(vcpu->kvm, liobn); 515 if (!stt) 516 return H_TOO_HARD; 517 518 entry = ioba >> stt->page_shift; 519 /* 520 * SPAPR spec says that the maximum size of the list is 512 TCEs 521 * so the whole table fits in 4K page 522 */ 523 if (npages > 512) 524 return H_PARAMETER; 525 526 if (tce_list & (SZ_4K - 1)) 527 return H_PARAMETER; 528 529 ret = kvmppc_ioba_validate(stt, ioba, npages); 530 if (ret != H_SUCCESS) 531 return ret; 532 533 idx = srcu_read_lock(&vcpu->kvm->srcu); 534 if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) { 535 ret = H_TOO_HARD; 536 goto unlock_exit; 537 } 538 tces = (u64 __user *) ua; 539 540 for (i = 0; i < npages; ++i) { 541 if (get_user(tce, tces + i)) { 542 ret = H_TOO_HARD; 543 goto unlock_exit; 544 } 545 tce = be64_to_cpu(tce); 546 547 ret = kvmppc_tce_validate(stt, tce); 548 if (ret != H_SUCCESS) 549 goto unlock_exit; 550 551 if (kvmppc_gpa_to_ua(vcpu->kvm, 552 tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), 553 &ua, NULL)) 554 return H_PARAMETER; 555 556 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { 557 ret = kvmppc_tce_iommu_map(vcpu->kvm, 558 stit->tbl, entry + i, ua, 559 iommu_tce_direction(tce)); 560 561 if (ret == H_SUCCESS) 562 continue; 563 564 if (ret == H_TOO_HARD) 565 goto unlock_exit; 566 567 WARN_ON_ONCE(1); 568 kvmppc_clear_tce(stit->tbl, entry); 569 } 570 571 kvmppc_tce_put(stt, entry + i, tce); 572 } 573 574 unlock_exit: 575 srcu_read_unlock(&vcpu->kvm->srcu, idx); 576 577 return ret; 578 } 579 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect); 580 581 long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, 582 unsigned long liobn, unsigned long ioba, 583 unsigned long tce_value, unsigned long npages) 584 { 585 struct kvmppc_spapr_tce_table *stt; 586 long i, ret; 587 struct kvmppc_spapr_tce_iommu_table *stit; 588 589 stt = kvmppc_find_table(vcpu->kvm, liobn); 590 if (!stt) 591 return H_TOO_HARD; 592 593 ret = kvmppc_ioba_validate(stt, ioba, npages); 594 if (ret != H_SUCCESS) 595 return ret; 596 597 /* Check permission bits only to allow userspace poison TCE for debug */ 598 if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) 599 return H_PARAMETER; 600 601 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { 602 unsigned long entry = ioba >> stit->tbl->it_page_shift; 603 604 for (i = 0; i < npages; ++i) { 605 ret = kvmppc_tce_iommu_unmap(vcpu->kvm, 606 stit->tbl, entry + i); 607 608 if (ret == H_SUCCESS) 609 continue; 610 611 if (ret == H_TOO_HARD) 612 return ret; 613 614 WARN_ON_ONCE(1); 615 kvmppc_clear_tce(stit->tbl, entry); 616 } 617 } 618 619 for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) 620 kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); 621 622 return H_SUCCESS; 623 } 624 EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce); 625