1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 4 */ 5 6 #include <linux/mm.h> 7 #include <linux/module.h> 8 #include <linux/sched/signal.h> 9 10 #include <asm/pgtable.h> 11 #include <asm/tlbflush.h> 12 #include <as-layout.h> 13 #include <mem_user.h> 14 #include <os.h> 15 #include <skas.h> 16 #include <kern_util.h> 17 18 struct host_vm_change { 19 struct host_vm_op { 20 enum { NONE, MMAP, MUNMAP, MPROTECT } type; 21 union { 22 struct { 23 unsigned long addr; 24 unsigned long len; 25 unsigned int prot; 26 int fd; 27 __u64 offset; 28 } mmap; 29 struct { 30 unsigned long addr; 31 unsigned long len; 32 } munmap; 33 struct { 34 unsigned long addr; 35 unsigned long len; 36 unsigned int prot; 37 } mprotect; 38 } u; 39 } ops[1]; 40 int userspace; 41 int index; 42 struct mm_struct *mm; 43 void *data; 44 int force; 45 }; 46 47 #define INIT_HVC(mm, force, userspace) \ 48 ((struct host_vm_change) \ 49 { .ops = { { .type = NONE } }, \ 50 .mm = mm, \ 51 .data = NULL, \ 52 .userspace = userspace, \ 53 .index = 0, \ 54 .force = force }) 55 56 static void report_enomem(void) 57 { 58 printk(KERN_ERR "UML ran out of memory on the host side! " 59 "This can happen due to a memory limitation or " 60 "vm.max_map_count has been reached.\n"); 61 } 62 63 static int do_ops(struct host_vm_change *hvc, int end, 64 int finished) 65 { 66 struct host_vm_op *op; 67 int i, ret = 0; 68 69 for (i = 0; i < end && !ret; i++) { 70 op = &hvc->ops[i]; 71 switch (op->type) { 72 case MMAP: 73 if (hvc->userspace) 74 ret = map(&hvc->mm->context.id, op->u.mmap.addr, 75 op->u.mmap.len, op->u.mmap.prot, 76 op->u.mmap.fd, 77 op->u.mmap.offset, finished, 78 &hvc->data); 79 else 80 map_memory(op->u.mmap.addr, op->u.mmap.offset, 81 op->u.mmap.len, 1, 1, 1); 82 break; 83 case MUNMAP: 84 if (hvc->userspace) 85 ret = unmap(&hvc->mm->context.id, 86 op->u.munmap.addr, 87 op->u.munmap.len, finished, 88 &hvc->data); 89 else 90 ret = os_unmap_memory( 91 (void *) op->u.munmap.addr, 92 op->u.munmap.len); 93 94 break; 95 case MPROTECT: 96 if (hvc->userspace) 97 ret = protect(&hvc->mm->context.id, 98 op->u.mprotect.addr, 99 op->u.mprotect.len, 100 op->u.mprotect.prot, 101 finished, &hvc->data); 102 else 103 ret = os_protect_memory( 104 (void *) op->u.mprotect.addr, 105 op->u.mprotect.len, 106 1, 1, 1); 107 break; 108 default: 109 printk(KERN_ERR "Unknown op type %d in do_ops\n", 110 op->type); 111 BUG(); 112 break; 113 } 114 } 115 116 if (ret == -ENOMEM) 117 report_enomem(); 118 119 return ret; 120 } 121 122 static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, 123 unsigned int prot, struct host_vm_change *hvc) 124 { 125 __u64 offset; 126 struct host_vm_op *last; 127 int fd = -1, ret = 0; 128 129 if (hvc->userspace) 130 fd = phys_mapping(phys, &offset); 131 else 132 offset = phys; 133 if (hvc->index != 0) { 134 last = &hvc->ops[hvc->index - 1]; 135 if ((last->type == MMAP) && 136 (last->u.mmap.addr + last->u.mmap.len == virt) && 137 (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) && 138 (last->u.mmap.offset + last->u.mmap.len == offset)) { 139 last->u.mmap.len += len; 140 return 0; 141 } 142 } 143 144 if (hvc->index == ARRAY_SIZE(hvc->ops)) { 145 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 146 hvc->index = 0; 147 } 148 149 hvc->ops[hvc->index++] = ((struct host_vm_op) 150 { .type = MMAP, 151 .u = { .mmap = { .addr = virt, 152 .len = len, 153 .prot = prot, 154 .fd = fd, 155 .offset = offset } 156 } }); 157 return ret; 158 } 159 160 static int add_munmap(unsigned long addr, unsigned long len, 161 struct host_vm_change *hvc) 162 { 163 struct host_vm_op *last; 164 int ret = 0; 165 166 if ((addr >= STUB_START) && (addr < STUB_END)) 167 return -EINVAL; 168 169 if (hvc->index != 0) { 170 last = &hvc->ops[hvc->index - 1]; 171 if ((last->type == MUNMAP) && 172 (last->u.munmap.addr + last->u.mmap.len == addr)) { 173 last->u.munmap.len += len; 174 return 0; 175 } 176 } 177 178 if (hvc->index == ARRAY_SIZE(hvc->ops)) { 179 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 180 hvc->index = 0; 181 } 182 183 hvc->ops[hvc->index++] = ((struct host_vm_op) 184 { .type = MUNMAP, 185 .u = { .munmap = { .addr = addr, 186 .len = len } } }); 187 return ret; 188 } 189 190 static int add_mprotect(unsigned long addr, unsigned long len, 191 unsigned int prot, struct host_vm_change *hvc) 192 { 193 struct host_vm_op *last; 194 int ret = 0; 195 196 if (hvc->index != 0) { 197 last = &hvc->ops[hvc->index - 1]; 198 if ((last->type == MPROTECT) && 199 (last->u.mprotect.addr + last->u.mprotect.len == addr) && 200 (last->u.mprotect.prot == prot)) { 201 last->u.mprotect.len += len; 202 return 0; 203 } 204 } 205 206 if (hvc->index == ARRAY_SIZE(hvc->ops)) { 207 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 208 hvc->index = 0; 209 } 210 211 hvc->ops[hvc->index++] = ((struct host_vm_op) 212 { .type = MPROTECT, 213 .u = { .mprotect = { .addr = addr, 214 .len = len, 215 .prot = prot } } }); 216 return ret; 217 } 218 219 #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) 220 221 static inline int update_pte_range(pmd_t *pmd, unsigned long addr, 222 unsigned long end, 223 struct host_vm_change *hvc) 224 { 225 pte_t *pte; 226 int r, w, x, prot, ret = 0; 227 228 pte = pte_offset_kernel(pmd, addr); 229 do { 230 if ((addr >= STUB_START) && (addr < STUB_END)) 231 continue; 232 233 r = pte_read(*pte); 234 w = pte_write(*pte); 235 x = pte_exec(*pte); 236 if (!pte_young(*pte)) { 237 r = 0; 238 w = 0; 239 } else if (!pte_dirty(*pte)) 240 w = 0; 241 242 prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | 243 (x ? UM_PROT_EXEC : 0)); 244 if (hvc->force || pte_newpage(*pte)) { 245 if (pte_present(*pte)) { 246 if (pte_newpage(*pte)) 247 ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK, 248 PAGE_SIZE, prot, hvc); 249 } else 250 ret = add_munmap(addr, PAGE_SIZE, hvc); 251 } else if (pte_newprot(*pte)) 252 ret = add_mprotect(addr, PAGE_SIZE, prot, hvc); 253 *pte = pte_mkuptodate(*pte); 254 } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); 255 return ret; 256 } 257 258 static inline int update_pmd_range(pud_t *pud, unsigned long addr, 259 unsigned long end, 260 struct host_vm_change *hvc) 261 { 262 pmd_t *pmd; 263 unsigned long next; 264 int ret = 0; 265 266 pmd = pmd_offset(pud, addr); 267 do { 268 next = pmd_addr_end(addr, end); 269 if (!pmd_present(*pmd)) { 270 if (hvc->force || pmd_newpage(*pmd)) { 271 ret = add_munmap(addr, next - addr, hvc); 272 pmd_mkuptodate(*pmd); 273 } 274 } 275 else ret = update_pte_range(pmd, addr, next, hvc); 276 } while (pmd++, addr = next, ((addr < end) && !ret)); 277 return ret; 278 } 279 280 static inline int update_pud_range(p4d_t *p4d, unsigned long addr, 281 unsigned long end, 282 struct host_vm_change *hvc) 283 { 284 pud_t *pud; 285 unsigned long next; 286 int ret = 0; 287 288 pud = pud_offset(p4d, addr); 289 do { 290 next = pud_addr_end(addr, end); 291 if (!pud_present(*pud)) { 292 if (hvc->force || pud_newpage(*pud)) { 293 ret = add_munmap(addr, next - addr, hvc); 294 pud_mkuptodate(*pud); 295 } 296 } 297 else ret = update_pmd_range(pud, addr, next, hvc); 298 } while (pud++, addr = next, ((addr < end) && !ret)); 299 return ret; 300 } 301 302 static inline int update_p4d_range(pgd_t *pgd, unsigned long addr, 303 unsigned long end, 304 struct host_vm_change *hvc) 305 { 306 p4d_t *p4d; 307 unsigned long next; 308 int ret = 0; 309 310 p4d = p4d_offset(pgd, addr); 311 do { 312 next = p4d_addr_end(addr, end); 313 if (!p4d_present(*p4d)) { 314 if (hvc->force || p4d_newpage(*p4d)) { 315 ret = add_munmap(addr, next - addr, hvc); 316 p4d_mkuptodate(*p4d); 317 } 318 } else 319 ret = update_pud_range(p4d, addr, next, hvc); 320 } while (p4d++, addr = next, ((addr < end) && !ret)); 321 return ret; 322 } 323 324 void fix_range_common(struct mm_struct *mm, unsigned long start_addr, 325 unsigned long end_addr, int force) 326 { 327 pgd_t *pgd; 328 struct host_vm_change hvc; 329 unsigned long addr = start_addr, next; 330 int ret = 0, userspace = 1; 331 332 hvc = INIT_HVC(mm, force, userspace); 333 pgd = pgd_offset(mm, addr); 334 do { 335 next = pgd_addr_end(addr, end_addr); 336 if (!pgd_present(*pgd)) { 337 if (force || pgd_newpage(*pgd)) { 338 ret = add_munmap(addr, next - addr, &hvc); 339 pgd_mkuptodate(*pgd); 340 } 341 } else 342 ret = update_p4d_range(pgd, addr, next, &hvc); 343 } while (pgd++, addr = next, ((addr < end_addr) && !ret)); 344 345 if (!ret) 346 ret = do_ops(&hvc, hvc.index, 1); 347 348 /* This is not an else because ret is modified above */ 349 if (ret) { 350 printk(KERN_ERR "fix_range_common: failed, killing current " 351 "process: %d\n", task_tgid_vnr(current)); 352 /* We are under mmap_sem, release it such that current can terminate */ 353 up_write(¤t->mm->mmap_sem); 354 force_sig(SIGKILL); 355 do_signal(¤t->thread.regs); 356 } 357 } 358 359 static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) 360 { 361 struct mm_struct *mm; 362 pgd_t *pgd; 363 p4d_t *p4d; 364 pud_t *pud; 365 pmd_t *pmd; 366 pte_t *pte; 367 unsigned long addr, last; 368 int updated = 0, err = 0, force = 0, userspace = 0; 369 struct host_vm_change hvc; 370 371 mm = &init_mm; 372 hvc = INIT_HVC(mm, force, userspace); 373 for (addr = start; addr < end;) { 374 pgd = pgd_offset(mm, addr); 375 if (!pgd_present(*pgd)) { 376 last = ADD_ROUND(addr, PGDIR_SIZE); 377 if (last > end) 378 last = end; 379 if (pgd_newpage(*pgd)) { 380 updated = 1; 381 err = add_munmap(addr, last - addr, &hvc); 382 if (err < 0) 383 panic("munmap failed, errno = %d\n", 384 -err); 385 } 386 addr = last; 387 continue; 388 } 389 390 p4d = p4d_offset(pgd, addr); 391 if (!p4d_present(*p4d)) { 392 last = ADD_ROUND(addr, P4D_SIZE); 393 if (last > end) 394 last = end; 395 if (p4d_newpage(*p4d)) { 396 updated = 1; 397 err = add_munmap(addr, last - addr, &hvc); 398 if (err < 0) 399 panic("munmap failed, errno = %d\n", 400 -err); 401 } 402 addr = last; 403 continue; 404 } 405 406 pud = pud_offset(p4d, addr); 407 if (!pud_present(*pud)) { 408 last = ADD_ROUND(addr, PUD_SIZE); 409 if (last > end) 410 last = end; 411 if (pud_newpage(*pud)) { 412 updated = 1; 413 err = add_munmap(addr, last - addr, &hvc); 414 if (err < 0) 415 panic("munmap failed, errno = %d\n", 416 -err); 417 } 418 addr = last; 419 continue; 420 } 421 422 pmd = pmd_offset(pud, addr); 423 if (!pmd_present(*pmd)) { 424 last = ADD_ROUND(addr, PMD_SIZE); 425 if (last > end) 426 last = end; 427 if (pmd_newpage(*pmd)) { 428 updated = 1; 429 err = add_munmap(addr, last - addr, &hvc); 430 if (err < 0) 431 panic("munmap failed, errno = %d\n", 432 -err); 433 } 434 addr = last; 435 continue; 436 } 437 438 pte = pte_offset_kernel(pmd, addr); 439 if (!pte_present(*pte) || pte_newpage(*pte)) { 440 updated = 1; 441 err = add_munmap(addr, PAGE_SIZE, &hvc); 442 if (err < 0) 443 panic("munmap failed, errno = %d\n", 444 -err); 445 if (pte_present(*pte)) 446 err = add_mmap(addr, pte_val(*pte) & PAGE_MASK, 447 PAGE_SIZE, 0, &hvc); 448 } 449 else if (pte_newprot(*pte)) { 450 updated = 1; 451 err = add_mprotect(addr, PAGE_SIZE, 0, &hvc); 452 } 453 addr += PAGE_SIZE; 454 } 455 if (!err) 456 err = do_ops(&hvc, hvc.index, 1); 457 458 if (err < 0) 459 panic("flush_tlb_kernel failed, errno = %d\n", err); 460 return updated; 461 } 462 463 void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) 464 { 465 pgd_t *pgd; 466 p4d_t *p4d; 467 pud_t *pud; 468 pmd_t *pmd; 469 pte_t *pte; 470 struct mm_struct *mm = vma->vm_mm; 471 void *flush = NULL; 472 int r, w, x, prot, err = 0; 473 struct mm_id *mm_id; 474 475 address &= PAGE_MASK; 476 pgd = pgd_offset(mm, address); 477 if (!pgd_present(*pgd)) 478 goto kill; 479 480 p4d = p4d_offset(pgd, address); 481 if (!p4d_present(*p4d)) 482 goto kill; 483 484 pud = pud_offset(p4d, address); 485 if (!pud_present(*pud)) 486 goto kill; 487 488 pmd = pmd_offset(pud, address); 489 if (!pmd_present(*pmd)) 490 goto kill; 491 492 pte = pte_offset_kernel(pmd, address); 493 494 r = pte_read(*pte); 495 w = pte_write(*pte); 496 x = pte_exec(*pte); 497 if (!pte_young(*pte)) { 498 r = 0; 499 w = 0; 500 } else if (!pte_dirty(*pte)) { 501 w = 0; 502 } 503 504 mm_id = &mm->context.id; 505 prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | 506 (x ? UM_PROT_EXEC : 0)); 507 if (pte_newpage(*pte)) { 508 if (pte_present(*pte)) { 509 unsigned long long offset; 510 int fd; 511 512 fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset); 513 err = map(mm_id, address, PAGE_SIZE, prot, fd, offset, 514 1, &flush); 515 } 516 else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush); 517 } 518 else if (pte_newprot(*pte)) 519 err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush); 520 521 if (err) { 522 if (err == -ENOMEM) 523 report_enomem(); 524 525 goto kill; 526 } 527 528 *pte = pte_mkuptodate(*pte); 529 530 return; 531 532 kill: 533 printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address); 534 force_sig(SIGKILL); 535 } 536 537 void flush_tlb_all(void) 538 { 539 /* 540 * Don't bother flushing if this address space is about to be 541 * destroyed. 542 */ 543 if (atomic_read(¤t->mm->mm_users) == 0) 544 return; 545 546 flush_tlb_mm(current->mm); 547 } 548 549 void flush_tlb_kernel_range(unsigned long start, unsigned long end) 550 { 551 flush_tlb_kernel_range_common(start, end); 552 } 553 554 void flush_tlb_kernel_vm(void) 555 { 556 flush_tlb_kernel_range_common(start_vm, end_vm); 557 } 558 559 void __flush_tlb_one(unsigned long addr) 560 { 561 flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE); 562 } 563 564 static void fix_range(struct mm_struct *mm, unsigned long start_addr, 565 unsigned long end_addr, int force) 566 { 567 /* 568 * Don't bother flushing if this address space is about to be 569 * destroyed. 570 */ 571 if (atomic_read(&mm->mm_users) == 0) 572 return; 573 574 fix_range_common(mm, start_addr, end_addr, force); 575 } 576 577 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 578 unsigned long end) 579 { 580 if (vma->vm_mm == NULL) 581 flush_tlb_kernel_range_common(start, end); 582 else fix_range(vma->vm_mm, start, end, 0); 583 } 584 EXPORT_SYMBOL(flush_tlb_range); 585 586 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, 587 unsigned long end) 588 { 589 fix_range(mm, start, end, 0); 590 } 591 592 void flush_tlb_mm(struct mm_struct *mm) 593 { 594 struct vm_area_struct *vma = mm->mmap; 595 596 while (vma != NULL) { 597 fix_range(mm, vma->vm_start, vma->vm_end, 0); 598 vma = vma->vm_next; 599 } 600 } 601 602 void force_flush_all(void) 603 { 604 struct mm_struct *mm = current->mm; 605 struct vm_area_struct *vma = mm->mmap; 606 607 while (vma != NULL) { 608 fix_range(mm, vma->vm_start, vma->vm_end, 1); 609 vma = vma->vm_next; 610 } 611 } 612