1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 4 */ 5 6 #include <linux/mm.h> 7 #include <linux/module.h> 8 #include <linux/sched/signal.h> 9 10 #include <asm/tlbflush.h> 11 #include <asm/mmu_context.h> 12 #include <as-layout.h> 13 #include <mem_user.h> 14 #include <os.h> 15 #include <skas.h> 16 #include <kern_util.h> 17 18 struct host_vm_change { 19 struct host_vm_op { 20 enum { NONE, MMAP, MUNMAP, MPROTECT } type; 21 union { 22 struct { 23 unsigned long addr; 24 unsigned long len; 25 unsigned int prot; 26 int fd; 27 __u64 offset; 28 } mmap; 29 struct { 30 unsigned long addr; 31 unsigned long len; 32 } munmap; 33 struct { 34 unsigned long addr; 35 unsigned long len; 36 unsigned int prot; 37 } mprotect; 38 } u; 39 } ops[1]; 40 int userspace; 41 int index; 42 struct mm_struct *mm; 43 void *data; 44 int force; 45 }; 46 47 #define INIT_HVC(mm, force, userspace) \ 48 ((struct host_vm_change) \ 49 { .ops = { { .type = NONE } }, \ 50 .mm = mm, \ 51 .data = NULL, \ 52 .userspace = userspace, \ 53 .index = 0, \ 54 .force = force }) 55 56 static void report_enomem(void) 57 { 58 printk(KERN_ERR "UML ran out of memory on the host side! " 59 "This can happen due to a memory limitation or " 60 "vm.max_map_count has been reached.\n"); 61 } 62 63 static int do_ops(struct host_vm_change *hvc, int end, 64 int finished) 65 { 66 struct host_vm_op *op; 67 int i, ret = 0; 68 69 for (i = 0; i < end && !ret; i++) { 70 op = &hvc->ops[i]; 71 switch (op->type) { 72 case MMAP: 73 if (hvc->userspace) 74 ret = map(&hvc->mm->context.id, op->u.mmap.addr, 75 op->u.mmap.len, op->u.mmap.prot, 76 op->u.mmap.fd, 77 op->u.mmap.offset, finished, 78 &hvc->data); 79 else 80 map_memory(op->u.mmap.addr, op->u.mmap.offset, 81 op->u.mmap.len, 1, 1, 1); 82 break; 83 case MUNMAP: 84 if (hvc->userspace) 85 ret = unmap(&hvc->mm->context.id, 86 op->u.munmap.addr, 87 op->u.munmap.len, finished, 88 &hvc->data); 89 else 90 ret = os_unmap_memory( 91 (void *) op->u.munmap.addr, 92 op->u.munmap.len); 93 94 break; 95 case MPROTECT: 96 if (hvc->userspace) 97 ret = protect(&hvc->mm->context.id, 98 op->u.mprotect.addr, 99 op->u.mprotect.len, 100 op->u.mprotect.prot, 101 finished, &hvc->data); 102 else 103 ret = os_protect_memory( 104 (void *) op->u.mprotect.addr, 105 op->u.mprotect.len, 106 1, 1, 1); 107 break; 108 default: 109 printk(KERN_ERR "Unknown op type %d in do_ops\n", 110 op->type); 111 BUG(); 112 break; 113 } 114 } 115 116 if (ret == -ENOMEM) 117 report_enomem(); 118 119 return ret; 120 } 121 122 static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, 123 unsigned int prot, struct host_vm_change *hvc) 124 { 125 __u64 offset; 126 struct host_vm_op *last; 127 int fd = -1, ret = 0; 128 129 if (hvc->userspace) 130 fd = phys_mapping(phys, &offset); 131 else 132 offset = phys; 133 if (hvc->index != 0) { 134 last = &hvc->ops[hvc->index - 1]; 135 if ((last->type == MMAP) && 136 (last->u.mmap.addr + last->u.mmap.len == virt) && 137 (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) && 138 (last->u.mmap.offset + last->u.mmap.len == offset)) { 139 last->u.mmap.len += len; 140 return 0; 141 } 142 } 143 144 if (hvc->index == ARRAY_SIZE(hvc->ops)) { 145 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 146 hvc->index = 0; 147 } 148 149 hvc->ops[hvc->index++] = ((struct host_vm_op) 150 { .type = MMAP, 151 .u = { .mmap = { .addr = virt, 152 .len = len, 153 .prot = prot, 154 .fd = fd, 155 .offset = offset } 156 } }); 157 return ret; 158 } 159 160 static int add_munmap(unsigned long addr, unsigned long len, 161 struct host_vm_change *hvc) 162 { 163 struct host_vm_op *last; 164 int ret = 0; 165 166 if (hvc->index != 0) { 167 last = &hvc->ops[hvc->index - 1]; 168 if ((last->type == MUNMAP) && 169 (last->u.munmap.addr + last->u.mmap.len == addr)) { 170 last->u.munmap.len += len; 171 return 0; 172 } 173 } 174 175 if (hvc->index == ARRAY_SIZE(hvc->ops)) { 176 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 177 hvc->index = 0; 178 } 179 180 hvc->ops[hvc->index++] = ((struct host_vm_op) 181 { .type = MUNMAP, 182 .u = { .munmap = { .addr = addr, 183 .len = len } } }); 184 return ret; 185 } 186 187 static int add_mprotect(unsigned long addr, unsigned long len, 188 unsigned int prot, struct host_vm_change *hvc) 189 { 190 struct host_vm_op *last; 191 int ret = 0; 192 193 if (hvc->index != 0) { 194 last = &hvc->ops[hvc->index - 1]; 195 if ((last->type == MPROTECT) && 196 (last->u.mprotect.addr + last->u.mprotect.len == addr) && 197 (last->u.mprotect.prot == prot)) { 198 last->u.mprotect.len += len; 199 return 0; 200 } 201 } 202 203 if (hvc->index == ARRAY_SIZE(hvc->ops)) { 204 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); 205 hvc->index = 0; 206 } 207 208 hvc->ops[hvc->index++] = ((struct host_vm_op) 209 { .type = MPROTECT, 210 .u = { .mprotect = { .addr = addr, 211 .len = len, 212 .prot = prot } } }); 213 return ret; 214 } 215 216 #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) 217 218 static inline int update_pte_range(pmd_t *pmd, unsigned long addr, 219 unsigned long end, 220 struct host_vm_change *hvc) 221 { 222 pte_t *pte; 223 int r, w, x, prot, ret = 0; 224 225 pte = pte_offset_kernel(pmd, addr); 226 do { 227 r = pte_read(*pte); 228 w = pte_write(*pte); 229 x = pte_exec(*pte); 230 if (!pte_young(*pte)) { 231 r = 0; 232 w = 0; 233 } else if (!pte_dirty(*pte)) 234 w = 0; 235 236 prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | 237 (x ? UM_PROT_EXEC : 0)); 238 if (hvc->force || pte_newpage(*pte)) { 239 if (pte_present(*pte)) { 240 if (pte_newpage(*pte)) 241 ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK, 242 PAGE_SIZE, prot, hvc); 243 } else 244 ret = add_munmap(addr, PAGE_SIZE, hvc); 245 } else if (pte_newprot(*pte)) 246 ret = add_mprotect(addr, PAGE_SIZE, prot, hvc); 247 *pte = pte_mkuptodate(*pte); 248 } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); 249 return ret; 250 } 251 252 static inline int update_pmd_range(pud_t *pud, unsigned long addr, 253 unsigned long end, 254 struct host_vm_change *hvc) 255 { 256 pmd_t *pmd; 257 unsigned long next; 258 int ret = 0; 259 260 pmd = pmd_offset(pud, addr); 261 do { 262 next = pmd_addr_end(addr, end); 263 if (!pmd_present(*pmd)) { 264 if (hvc->force || pmd_newpage(*pmd)) { 265 ret = add_munmap(addr, next - addr, hvc); 266 pmd_mkuptodate(*pmd); 267 } 268 } 269 else ret = update_pte_range(pmd, addr, next, hvc); 270 } while (pmd++, addr = next, ((addr < end) && !ret)); 271 return ret; 272 } 273 274 static inline int update_pud_range(p4d_t *p4d, unsigned long addr, 275 unsigned long end, 276 struct host_vm_change *hvc) 277 { 278 pud_t *pud; 279 unsigned long next; 280 int ret = 0; 281 282 pud = pud_offset(p4d, addr); 283 do { 284 next = pud_addr_end(addr, end); 285 if (!pud_present(*pud)) { 286 if (hvc->force || pud_newpage(*pud)) { 287 ret = add_munmap(addr, next - addr, hvc); 288 pud_mkuptodate(*pud); 289 } 290 } 291 else ret = update_pmd_range(pud, addr, next, hvc); 292 } while (pud++, addr = next, ((addr < end) && !ret)); 293 return ret; 294 } 295 296 static inline int update_p4d_range(pgd_t *pgd, unsigned long addr, 297 unsigned long end, 298 struct host_vm_change *hvc) 299 { 300 p4d_t *p4d; 301 unsigned long next; 302 int ret = 0; 303 304 p4d = p4d_offset(pgd, addr); 305 do { 306 next = p4d_addr_end(addr, end); 307 if (!p4d_present(*p4d)) { 308 if (hvc->force || p4d_newpage(*p4d)) { 309 ret = add_munmap(addr, next - addr, hvc); 310 p4d_mkuptodate(*p4d); 311 } 312 } else 313 ret = update_pud_range(p4d, addr, next, hvc); 314 } while (p4d++, addr = next, ((addr < end) && !ret)); 315 return ret; 316 } 317 318 static void fix_range_common(struct mm_struct *mm, unsigned long start_addr, 319 unsigned long end_addr, int force) 320 { 321 pgd_t *pgd; 322 struct host_vm_change hvc; 323 unsigned long addr = start_addr, next; 324 int ret = 0, userspace = 1; 325 326 hvc = INIT_HVC(mm, force, userspace); 327 pgd = pgd_offset(mm, addr); 328 do { 329 next = pgd_addr_end(addr, end_addr); 330 if (!pgd_present(*pgd)) { 331 if (force || pgd_newpage(*pgd)) { 332 ret = add_munmap(addr, next - addr, &hvc); 333 pgd_mkuptodate(*pgd); 334 } 335 } else 336 ret = update_p4d_range(pgd, addr, next, &hvc); 337 } while (pgd++, addr = next, ((addr < end_addr) && !ret)); 338 339 if (!ret) 340 ret = do_ops(&hvc, hvc.index, 1); 341 342 /* This is not an else because ret is modified above */ 343 if (ret) { 344 struct mm_id *mm_idp = ¤t->mm->context.id; 345 346 printk(KERN_ERR "fix_range_common: failed, killing current " 347 "process: %d\n", task_tgid_vnr(current)); 348 mm_idp->kill = 1; 349 } 350 } 351 352 static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) 353 { 354 struct mm_struct *mm; 355 pgd_t *pgd; 356 p4d_t *p4d; 357 pud_t *pud; 358 pmd_t *pmd; 359 pte_t *pte; 360 unsigned long addr, last; 361 int updated = 0, err = 0, force = 0, userspace = 0; 362 struct host_vm_change hvc; 363 364 mm = &init_mm; 365 hvc = INIT_HVC(mm, force, userspace); 366 for (addr = start; addr < end;) { 367 pgd = pgd_offset(mm, addr); 368 if (!pgd_present(*pgd)) { 369 last = ADD_ROUND(addr, PGDIR_SIZE); 370 if (last > end) 371 last = end; 372 if (pgd_newpage(*pgd)) { 373 updated = 1; 374 err = add_munmap(addr, last - addr, &hvc); 375 if (err < 0) 376 panic("munmap failed, errno = %d\n", 377 -err); 378 } 379 addr = last; 380 continue; 381 } 382 383 p4d = p4d_offset(pgd, addr); 384 if (!p4d_present(*p4d)) { 385 last = ADD_ROUND(addr, P4D_SIZE); 386 if (last > end) 387 last = end; 388 if (p4d_newpage(*p4d)) { 389 updated = 1; 390 err = add_munmap(addr, last - addr, &hvc); 391 if (err < 0) 392 panic("munmap failed, errno = %d\n", 393 -err); 394 } 395 addr = last; 396 continue; 397 } 398 399 pud = pud_offset(p4d, addr); 400 if (!pud_present(*pud)) { 401 last = ADD_ROUND(addr, PUD_SIZE); 402 if (last > end) 403 last = end; 404 if (pud_newpage(*pud)) { 405 updated = 1; 406 err = add_munmap(addr, last - addr, &hvc); 407 if (err < 0) 408 panic("munmap failed, errno = %d\n", 409 -err); 410 } 411 addr = last; 412 continue; 413 } 414 415 pmd = pmd_offset(pud, addr); 416 if (!pmd_present(*pmd)) { 417 last = ADD_ROUND(addr, PMD_SIZE); 418 if (last > end) 419 last = end; 420 if (pmd_newpage(*pmd)) { 421 updated = 1; 422 err = add_munmap(addr, last - addr, &hvc); 423 if (err < 0) 424 panic("munmap failed, errno = %d\n", 425 -err); 426 } 427 addr = last; 428 continue; 429 } 430 431 pte = pte_offset_kernel(pmd, addr); 432 if (!pte_present(*pte) || pte_newpage(*pte)) { 433 updated = 1; 434 err = add_munmap(addr, PAGE_SIZE, &hvc); 435 if (err < 0) 436 panic("munmap failed, errno = %d\n", 437 -err); 438 if (pte_present(*pte)) 439 err = add_mmap(addr, pte_val(*pte) & PAGE_MASK, 440 PAGE_SIZE, 0, &hvc); 441 } 442 else if (pte_newprot(*pte)) { 443 updated = 1; 444 err = add_mprotect(addr, PAGE_SIZE, 0, &hvc); 445 } 446 addr += PAGE_SIZE; 447 } 448 if (!err) 449 err = do_ops(&hvc, hvc.index, 1); 450 451 if (err < 0) 452 panic("flush_tlb_kernel failed, errno = %d\n", err); 453 return updated; 454 } 455 456 void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) 457 { 458 pgd_t *pgd; 459 p4d_t *p4d; 460 pud_t *pud; 461 pmd_t *pmd; 462 pte_t *pte; 463 struct mm_struct *mm = vma->vm_mm; 464 void *flush = NULL; 465 int r, w, x, prot, err = 0; 466 struct mm_id *mm_id; 467 468 address &= PAGE_MASK; 469 470 pgd = pgd_offset(mm, address); 471 if (!pgd_present(*pgd)) 472 goto kill; 473 474 p4d = p4d_offset(pgd, address); 475 if (!p4d_present(*p4d)) 476 goto kill; 477 478 pud = pud_offset(p4d, address); 479 if (!pud_present(*pud)) 480 goto kill; 481 482 pmd = pmd_offset(pud, address); 483 if (!pmd_present(*pmd)) 484 goto kill; 485 486 pte = pte_offset_kernel(pmd, address); 487 488 r = pte_read(*pte); 489 w = pte_write(*pte); 490 x = pte_exec(*pte); 491 if (!pte_young(*pte)) { 492 r = 0; 493 w = 0; 494 } else if (!pte_dirty(*pte)) { 495 w = 0; 496 } 497 498 mm_id = &mm->context.id; 499 prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | 500 (x ? UM_PROT_EXEC : 0)); 501 if (pte_newpage(*pte)) { 502 if (pte_present(*pte)) { 503 unsigned long long offset; 504 int fd; 505 506 fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset); 507 err = map(mm_id, address, PAGE_SIZE, prot, fd, offset, 508 1, &flush); 509 } 510 else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush); 511 } 512 else if (pte_newprot(*pte)) 513 err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush); 514 515 if (err) { 516 if (err == -ENOMEM) 517 report_enomem(); 518 519 goto kill; 520 } 521 522 *pte = pte_mkuptodate(*pte); 523 524 return; 525 526 kill: 527 printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address); 528 force_sig(SIGKILL); 529 } 530 531 void flush_tlb_all(void) 532 { 533 /* 534 * Don't bother flushing if this address space is about to be 535 * destroyed. 536 */ 537 if (atomic_read(¤t->mm->mm_users) == 0) 538 return; 539 540 flush_tlb_mm(current->mm); 541 } 542 543 void flush_tlb_kernel_range(unsigned long start, unsigned long end) 544 { 545 flush_tlb_kernel_range_common(start, end); 546 } 547 548 void flush_tlb_kernel_vm(void) 549 { 550 flush_tlb_kernel_range_common(start_vm, end_vm); 551 } 552 553 void __flush_tlb_one(unsigned long addr) 554 { 555 flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE); 556 } 557 558 static void fix_range(struct mm_struct *mm, unsigned long start_addr, 559 unsigned long end_addr, int force) 560 { 561 /* 562 * Don't bother flushing if this address space is about to be 563 * destroyed. 564 */ 565 if (atomic_read(&mm->mm_users) == 0) 566 return; 567 568 fix_range_common(mm, start_addr, end_addr, force); 569 } 570 571 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 572 unsigned long end) 573 { 574 if (vma->vm_mm == NULL) 575 flush_tlb_kernel_range_common(start, end); 576 else fix_range(vma->vm_mm, start, end, 0); 577 } 578 EXPORT_SYMBOL(flush_tlb_range); 579 580 void flush_tlb_mm(struct mm_struct *mm) 581 { 582 struct vm_area_struct *vma; 583 VMA_ITERATOR(vmi, mm, 0); 584 585 for_each_vma(vmi, vma) 586 fix_range(mm, vma->vm_start, vma->vm_end, 0); 587 } 588 589 void force_flush_all(void) 590 { 591 struct mm_struct *mm = current->mm; 592 struct vm_area_struct *vma; 593 VMA_ITERATOR(vmi, mm, 0); 594 595 mmap_read_lock(mm); 596 for_each_vma(vmi, vma) 597 fix_range(mm, vma->vm_start, vma->vm_end, 1); 598 mmap_read_unlock(mm); 599 } 600