1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 4 */ 5 6 #include <linux/mm.h> 7 #include <linux/module.h> 8 #include <linux/sched/signal.h> 9 10 #include <asm/tlbflush.h> 11 #include <as-layout.h> 12 #include <mem_user.h> 13 #include <os.h> 14 #include <skas.h> 15 #include <kern_util.h> 16 17 struct host_vm_change { 18 struct host_vm_op { 19 enum { NONE, MMAP, MUNMAP, MPROTECT } type; 20 union { 21 struct { 22 unsigned long addr; 23 unsigned long len; 24 unsigned int prot; 25 int fd; 26 __u64 offset; 27 } mmap; 28 struct { 29 unsigned long addr; 30 unsigned long len; 31 } munmap; 32 struct { 33 unsigned long addr; 34 unsigned long len; 35 unsigned int prot; 36 } mprotect; 37 } u; 38 } ops[1]; 39 int userspace; 40 int index; 41 struct mm_struct *mm; 42 void *data; 43 int force; 44 }; 45 46 #define INIT_HVC(mm, force, userspace) \ 47 ((struct host_vm_change) \ 48 { .ops = { { .type = NONE } }, \ 49 .mm = mm, \ 50 .data = NULL, \ 51 .userspace = userspace, \ 52 .index = 0, \ 53 .force = force }) 54 55 static void report_enomem(void) 56 { 57 printk(KERN_ERR "UML ran out of memory on the host side! " 58 "This can happen due to a memory limitation or " 59 "vm.max_map_count has been reached.\n"); 60 } 61 62 static int do_ops(struct host_vm_change *hvc, int end, 63 int finished) 64 { 65 struct host_vm_op *op; 66 int i, ret = 0; 67 68 for (i = 0; i < end && !ret; i++) { 69 op = &hvc->ops[i]; 70 switch (op->type) { 71 case MMAP: 72 if (hvc->userspace) 73 ret = map(&hvc->mm->context.id, op->u.mmap.addr, 74 op->u.mmap.len, op->u.mmap.prot, 75 op->u.mmap.fd, 76 op->u.mmap.offset, finished, 77 &hvc->data); 78 else 79 map_memory(op->u.mmap.addr, op->u.mmap.offset, 80 op->u.mmap.len, 1, 1, 1); 81 break; 82 case MUNMAP: 83 if (hvc->userspace) 84 ret = unmap(&hvc->mm->context.id, 85 op->u.munmap.addr, 86 op->u.munmap.len, finished, 87 &hvc->data); 88 else 89 ret = os_unmap_memory( 90 (void *) op->u.munmap.addr, 91 op->u.munmap.len); 92 93 break; 94 case MPROTECT: 95 if (hvc->userspace) 96 ret = protect(&hvc->mm->context.id, 97 op->u.mprotect.addr, 98 op->u.mprotect.len, 99 op->u.mprotect.prot, 100 finished, &hvc->data); 101 else 102 ret = os_protect_memory( 103 (void *) op->u.mprotect.addr, 104 op->u.mprotect.len, 105 1, 1, 1); 106 break; 107 default: 108 printk(KERN_ERR "Unknown op type %d in do_ops\n", 109 op->type); 110 BUG(); 111 break; 112 } 113 } 114 115 if (ret == -ENOMEM) 116 report_enomem(); 117 118 return ret; 119 } 120 121 static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, 122 unsigned int prot, struct host_vm_change *hvc) 123 { 124 __u64 offset; 125 struct host_vm_op *last; 126 int fd = -1, ret = 0; 127 128 if (hvc->userspace) 129 fd = phys_mapping(phys, &offset); 130 else 131 offset = phys; 132 if (hvc->index != 0) { 133 last = &hvc->ops[hvc->index - 1]; 134 if ((last->type == MMAP) && 135 (last->u.mmap.addr + last->u.mmap.len == virt) && 136 (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) && 137 (last->u.mmap.offset + last->u.mmap.len == offset)) { 138 last->u.mmap.len += len; 139 return 0; 140 } 141 } 142 143 if (hvc->index == ARRAY_SIZE(hvc->ops)) { 144 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 145 hvc->index = 0; 146 } 147 148 hvc->ops[hvc->index++] = ((struct host_vm_op) 149 { .type = MMAP, 150 .u = { .mmap = { .addr = virt, 151 .len = len, 152 .prot = prot, 153 .fd = fd, 154 .offset = offset } 155 } }); 156 return ret; 157 } 158 159 static int add_munmap(unsigned long addr, unsigned long len, 160 struct host_vm_change *hvc) 161 { 162 struct host_vm_op *last; 163 int ret = 0; 164 165 if ((addr >= STUB_START) && (addr < STUB_END)) 166 return -EINVAL; 167 168 if (hvc->index != 0) { 169 last = &hvc->ops[hvc->index - 1]; 170 if ((last->type == MUNMAP) && 171 (last->u.munmap.addr + last->u.mmap.len == addr)) { 172 last->u.munmap.len += len; 173 return 0; 174 } 175 } 176 177 if (hvc->index == ARRAY_SIZE(hvc->ops)) { 178 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 179 hvc->index = 0; 180 } 181 182 hvc->ops[hvc->index++] = ((struct host_vm_op) 183 { .type = MUNMAP, 184 .u = { .munmap = { .addr = addr, 185 .len = len } } }); 186 return ret; 187 } 188 189 static int add_mprotect(unsigned long addr, unsigned long len, 190 unsigned int prot, struct host_vm_change *hvc) 191 { 192 struct host_vm_op *last; 193 int ret = 0; 194 195 if (hvc->index != 0) { 196 last = &hvc->ops[hvc->index - 1]; 197 if ((last->type == MPROTECT) && 198 (last->u.mprotect.addr + last->u.mprotect.len == addr) && 199 (last->u.mprotect.prot == prot)) { 200 last->u.mprotect.len += len; 201 return 0; 202 } 203 } 204 205 if (hvc->index == ARRAY_SIZE(hvc->ops)) { 206 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 207 hvc->index = 0; 208 } 209 210 hvc->ops[hvc->index++] = ((struct host_vm_op) 211 { .type = MPROTECT, 212 .u = { .mprotect = { .addr = addr, 213 .len = len, 214 .prot = prot } } }); 215 return ret; 216 } 217 218 #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) 219 220 static inline int update_pte_range(pmd_t *pmd, unsigned long addr, 221 unsigned long end, 222 struct host_vm_change *hvc) 223 { 224 pte_t *pte; 225 int r, w, x, prot, ret = 0; 226 227 pte = pte_offset_kernel(pmd, addr); 228 do { 229 if ((addr >= STUB_START) && (addr < STUB_END)) 230 continue; 231 232 r = pte_read(*pte); 233 w = pte_write(*pte); 234 x = pte_exec(*pte); 235 if (!pte_young(*pte)) { 236 r = 0; 237 w = 0; 238 } else if (!pte_dirty(*pte)) 239 w = 0; 240 241 prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | 242 (x ? UM_PROT_EXEC : 0)); 243 if (hvc->force || pte_newpage(*pte)) { 244 if (pte_present(*pte)) { 245 if (pte_newpage(*pte)) 246 ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK, 247 PAGE_SIZE, prot, hvc); 248 } else 249 ret = add_munmap(addr, PAGE_SIZE, hvc); 250 } else if (pte_newprot(*pte)) 251 ret = add_mprotect(addr, PAGE_SIZE, prot, hvc); 252 *pte = pte_mkuptodate(*pte); 253 } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); 254 return ret; 255 } 256 257 static inline int update_pmd_range(pud_t *pud, unsigned long addr, 258 unsigned long end, 259 struct host_vm_change *hvc) 260 { 261 pmd_t *pmd; 262 unsigned long next; 263 int ret = 0; 264 265 pmd = pmd_offset(pud, addr); 266 do { 267 next = pmd_addr_end(addr, end); 268 if (!pmd_present(*pmd)) { 269 if (hvc->force || pmd_newpage(*pmd)) { 270 ret = add_munmap(addr, next - addr, hvc); 271 pmd_mkuptodate(*pmd); 272 } 273 } 274 else ret = update_pte_range(pmd, addr, next, hvc); 275 } while (pmd++, addr = next, ((addr < end) && !ret)); 276 return ret; 277 } 278 279 static inline int update_pud_range(p4d_t *p4d, unsigned long addr, 280 unsigned long end, 281 struct host_vm_change *hvc) 282 { 283 pud_t *pud; 284 unsigned long next; 285 int ret = 0; 286 287 pud = pud_offset(p4d, addr); 288 do { 289 next = pud_addr_end(addr, end); 290 if (!pud_present(*pud)) { 291 if (hvc->force || pud_newpage(*pud)) { 292 ret = add_munmap(addr, next - addr, hvc); 293 pud_mkuptodate(*pud); 294 } 295 } 296 else ret = update_pmd_range(pud, addr, next, hvc); 297 } while (pud++, addr = next, ((addr < end) && !ret)); 298 return ret; 299 } 300 301 static inline int update_p4d_range(pgd_t *pgd, unsigned long addr, 302 unsigned long end, 303 struct host_vm_change *hvc) 304 { 305 p4d_t *p4d; 306 unsigned long next; 307 int ret = 0; 308 309 p4d = p4d_offset(pgd, addr); 310 do { 311 next = p4d_addr_end(addr, end); 312 if (!p4d_present(*p4d)) { 313 if (hvc->force || p4d_newpage(*p4d)) { 314 ret = add_munmap(addr, next - addr, hvc); 315 p4d_mkuptodate(*p4d); 316 } 317 } else 318 ret = update_pud_range(p4d, addr, next, hvc); 319 } while (p4d++, addr = next, ((addr < end) && !ret)); 320 return ret; 321 } 322 323 void fix_range_common(struct mm_struct *mm, unsigned long start_addr, 324 unsigned long end_addr, int force) 325 { 326 pgd_t *pgd; 327 struct host_vm_change hvc; 328 unsigned long addr = start_addr, next; 329 int ret = 0, userspace = 1; 330 331 hvc = INIT_HVC(mm, force, userspace); 332 pgd = pgd_offset(mm, addr); 333 do { 334 next = pgd_addr_end(addr, end_addr); 335 if (!pgd_present(*pgd)) { 336 if (force || pgd_newpage(*pgd)) { 337 ret = add_munmap(addr, next - addr, &hvc); 338 pgd_mkuptodate(*pgd); 339 } 340 } else 341 ret = update_p4d_range(pgd, addr, next, &hvc); 342 } while (pgd++, addr = next, ((addr < end_addr) && !ret)); 343 344 if (!ret) 345 ret = do_ops(&hvc, hvc.index, 1); 346 347 /* This is not an else because ret is modified above */ 348 if (ret) { 349 printk(KERN_ERR "fix_range_common: failed, killing current " 350 "process: %d\n", task_tgid_vnr(current)); 351 /* We are under mmap_lock, release it such that current can terminate */ 352 mmap_write_unlock(current->mm); 353 force_sig(SIGKILL); 354 do_signal(¤t->thread.regs); 355 } 356 } 357 358 static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) 359 { 360 struct mm_struct *mm; 361 pgd_t *pgd; 362 p4d_t *p4d; 363 pud_t *pud; 364 pmd_t *pmd; 365 pte_t *pte; 366 unsigned long addr, last; 367 int updated = 0, err = 0, force = 0, userspace = 0; 368 struct host_vm_change hvc; 369 370 mm = &init_mm; 371 hvc = INIT_HVC(mm, force, userspace); 372 for (addr = start; addr < end;) { 373 pgd = pgd_offset(mm, addr); 374 if (!pgd_present(*pgd)) { 375 last = ADD_ROUND(addr, PGDIR_SIZE); 376 if (last > end) 377 last = end; 378 if (pgd_newpage(*pgd)) { 379 updated = 1; 380 err = add_munmap(addr, last - addr, &hvc); 381 if (err < 0) 382 panic("munmap failed, errno = %d\n", 383 -err); 384 } 385 addr = last; 386 continue; 387 } 388 389 p4d = p4d_offset(pgd, addr); 390 if (!p4d_present(*p4d)) { 391 last = ADD_ROUND(addr, P4D_SIZE); 392 if (last > end) 393 last = end; 394 if (p4d_newpage(*p4d)) { 395 updated = 1; 396 err = add_munmap(addr, last - addr, &hvc); 397 if (err < 0) 398 panic("munmap failed, errno = %d\n", 399 -err); 400 } 401 addr = last; 402 continue; 403 } 404 405 pud = pud_offset(p4d, addr); 406 if (!pud_present(*pud)) { 407 last = ADD_ROUND(addr, PUD_SIZE); 408 if (last > end) 409 last = end; 410 if (pud_newpage(*pud)) { 411 updated = 1; 412 err = add_munmap(addr, last - addr, &hvc); 413 if (err < 0) 414 panic("munmap failed, errno = %d\n", 415 -err); 416 } 417 addr = last; 418 continue; 419 } 420 421 pmd = pmd_offset(pud, addr); 422 if (!pmd_present(*pmd)) { 423 last = ADD_ROUND(addr, PMD_SIZE); 424 if (last > end) 425 last = end; 426 if (pmd_newpage(*pmd)) { 427 updated = 1; 428 err = add_munmap(addr, last - addr, &hvc); 429 if (err < 0) 430 panic("munmap failed, errno = %d\n", 431 -err); 432 } 433 addr = last; 434 continue; 435 } 436 437 pte = pte_offset_kernel(pmd, addr); 438 if (!pte_present(*pte) || pte_newpage(*pte)) { 439 updated = 1; 440 err = add_munmap(addr, PAGE_SIZE, &hvc); 441 if (err < 0) 442 panic("munmap failed, errno = %d\n", 443 -err); 444 if (pte_present(*pte)) 445 err = add_mmap(addr, pte_val(*pte) & PAGE_MASK, 446 PAGE_SIZE, 0, &hvc); 447 } 448 else if (pte_newprot(*pte)) { 449 updated = 1; 450 err = add_mprotect(addr, PAGE_SIZE, 0, &hvc); 451 } 452 addr += PAGE_SIZE; 453 } 454 if (!err) 455 err = do_ops(&hvc, hvc.index, 1); 456 457 if (err < 0) 458 panic("flush_tlb_kernel failed, errno = %d\n", err); 459 return updated; 460 } 461 462 void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) 463 { 464 pgd_t *pgd; 465 p4d_t *p4d; 466 pud_t *pud; 467 pmd_t *pmd; 468 pte_t *pte; 469 struct mm_struct *mm = vma->vm_mm; 470 void *flush = NULL; 471 int r, w, x, prot, err = 0; 472 struct mm_id *mm_id; 473 474 address &= PAGE_MASK; 475 pgd = pgd_offset(mm, address); 476 if (!pgd_present(*pgd)) 477 goto kill; 478 479 p4d = p4d_offset(pgd, address); 480 if (!p4d_present(*p4d)) 481 goto kill; 482 483 pud = pud_offset(p4d, address); 484 if (!pud_present(*pud)) 485 goto kill; 486 487 pmd = pmd_offset(pud, address); 488 if (!pmd_present(*pmd)) 489 goto kill; 490 491 pte = pte_offset_kernel(pmd, address); 492 493 r = pte_read(*pte); 494 w = pte_write(*pte); 495 x = pte_exec(*pte); 496 if (!pte_young(*pte)) { 497 r = 0; 498 w = 0; 499 } else if (!pte_dirty(*pte)) { 500 w = 0; 501 } 502 503 mm_id = &mm->context.id; 504 prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | 505 (x ? UM_PROT_EXEC : 0)); 506 if (pte_newpage(*pte)) { 507 if (pte_present(*pte)) { 508 unsigned long long offset; 509 int fd; 510 511 fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset); 512 err = map(mm_id, address, PAGE_SIZE, prot, fd, offset, 513 1, &flush); 514 } 515 else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush); 516 } 517 else if (pte_newprot(*pte)) 518 err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush); 519 520 if (err) { 521 if (err == -ENOMEM) 522 report_enomem(); 523 524 goto kill; 525 } 526 527 *pte = pte_mkuptodate(*pte); 528 529 return; 530 531 kill: 532 printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address); 533 force_sig(SIGKILL); 534 } 535 536 void flush_tlb_all(void) 537 { 538 /* 539 * Don't bother flushing if this address space is about to be 540 * destroyed. 541 */ 542 if (atomic_read(¤t->mm->mm_users) == 0) 543 return; 544 545 flush_tlb_mm(current->mm); 546 } 547 548 void flush_tlb_kernel_range(unsigned long start, unsigned long end) 549 { 550 flush_tlb_kernel_range_common(start, end); 551 } 552 553 void flush_tlb_kernel_vm(void) 554 { 555 flush_tlb_kernel_range_common(start_vm, end_vm); 556 } 557 558 void __flush_tlb_one(unsigned long addr) 559 { 560 flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE); 561 } 562 563 static void fix_range(struct mm_struct *mm, unsigned long start_addr, 564 unsigned long end_addr, int force) 565 { 566 /* 567 * Don't bother flushing if this address space is about to be 568 * destroyed. 569 */ 570 if (atomic_read(&mm->mm_users) == 0) 571 return; 572 573 fix_range_common(mm, start_addr, end_addr, force); 574 } 575 576 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 577 unsigned long end) 578 { 579 if (vma->vm_mm == NULL) 580 flush_tlb_kernel_range_common(start, end); 581 else fix_range(vma->vm_mm, start, end, 0); 582 } 583 EXPORT_SYMBOL(flush_tlb_range); 584 585 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, 586 unsigned long end) 587 { 588 fix_range(mm, start, end, 0); 589 } 590 591 void flush_tlb_mm(struct mm_struct *mm) 592 { 593 struct vm_area_struct *vma = mm->mmap; 594 595 while (vma != NULL) { 596 fix_range(mm, vma->vm_start, vma->vm_end, 0); 597 vma = vma->vm_next; 598 } 599 } 600 601 void force_flush_all(void) 602 { 603 struct mm_struct *mm = current->mm; 604 struct vm_area_struct *vma = mm->mmap; 605 606 while (vma != NULL) { 607 fix_range(mm, vma->vm_start, vma->vm_end, 1); 608 vma = vma->vm_next; 609 } 610 } 611 612 struct page_change_data { 613 unsigned int set_mask, clear_mask; 614 }; 615 616 static int change_page_range(pte_t *ptep, unsigned long addr, void *data) 617 { 618 struct page_change_data *cdata = data; 619 pte_t pte = READ_ONCE(*ptep); 620 621 pte_clear_bits(pte, cdata->clear_mask); 622 pte_set_bits(pte, cdata->set_mask); 623 624 set_pte(ptep, pte); 625 return 0; 626 } 627 628 static int change_memory(unsigned long start, unsigned long pages, 629 unsigned int set_mask, unsigned int clear_mask) 630 { 631 unsigned long size = pages * PAGE_SIZE; 632 struct page_change_data data; 633 int ret; 634 635 data.set_mask = set_mask; 636 data.clear_mask = clear_mask; 637 638 ret = apply_to_page_range(&init_mm, start, size, change_page_range, 639 &data); 640 641 flush_tlb_kernel_range(start, start + size); 642 643 return ret; 644 } 645 646 int set_memory_ro(unsigned long addr, int numpages) 647 { 648 return change_memory(addr, numpages, 0, _PAGE_RW); 649 } 650 651 int set_memory_rw(unsigned long addr, int numpages) 652 { 653 return change_memory(addr, numpages, _PAGE_RW, 0); 654 } 655 656 int set_memory_nx(unsigned long addr, int numpages) 657 { 658 return -EOPNOTSUPP; 659 } 660 661 int set_memory_x(unsigned long addr, int numpages) 662 { 663 return -EOPNOTSUPP; 664 } 665