1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * 40 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 41 * $Id: vm_mmap.c,v 1.25 1995/07/09 06:58:01 davidg Exp $ 42 */ 43 44 /* 45 * Mapped file (mmap) interface to VM 46 */ 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/filedesc.h> 51 #include <sys/resourcevar.h> 52 #include <sys/proc.h> 53 #include <sys/vnode.h> 54 #include <sys/file.h> 55 #include <sys/mman.h> 56 #include <sys/conf.h> 57 58 #include <miscfs/specfs/specdev.h> 59 60 #include <vm/vm.h> 61 #include <vm/vm_pager.h> 62 #include <vm/vm_pageout.h> 63 #include <vm/vm_prot.h> 64 65 void pmap_object_init_pt(); 66 67 struct sbrk_args { 68 int incr; 69 }; 70 71 /* ARGSUSED */ 72 int 73 sbrk(p, uap, retval) 74 struct proc *p; 75 struct sbrk_args *uap; 76 int *retval; 77 { 78 79 /* Not yet implemented */ 80 return (EOPNOTSUPP); 81 } 82 83 struct sstk_args { 84 int incr; 85 }; 86 87 /* ARGSUSED */ 88 int 89 sstk(p, uap, retval) 90 struct proc *p; 91 struct sstk_args *uap; 92 int *retval; 93 { 94 95 /* Not yet implemented */ 96 return (EOPNOTSUPP); 97 } 98 99 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 100 struct getpagesize_args { 101 int dummy; 102 }; 103 104 /* ARGSUSED */ 105 int 106 ogetpagesize(p, uap, retval) 107 struct proc *p; 108 struct getpagesize_args *uap; 109 int *retval; 110 { 111 112 *retval = PAGE_SIZE; 113 return (0); 114 } 115 #endif /* COMPAT_43 || COMPAT_SUNOS */ 116 117 struct mmap_args { 118 caddr_t addr; 119 size_t len; 120 int prot; 121 int flags; 122 int fd; 123 long pad; 124 off_t pos; 125 }; 126 127 int 128 mmap(p, uap, retval) 129 struct proc *p; 130 register struct mmap_args *uap; 131 int *retval; 132 { 133 register struct filedesc *fdp = p->p_fd; 134 register struct file *fp; 135 struct vnode *vp; 136 vm_offset_t addr; 137 vm_size_t size; 138 vm_prot_t prot, maxprot; 139 caddr_t handle; 140 int flags, error; 141 142 prot = uap->prot & VM_PROT_ALL; 143 flags = uap->flags; 144 /* 145 * Address (if FIXED) must be page aligned. Size is implicitly rounded 146 * to a page boundary. 147 */ 148 addr = (vm_offset_t) uap->addr; 149 if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) || 150 (ssize_t) uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1)) 151 return (EINVAL); 152 size = (vm_size_t) round_page(uap->len); 153 /* 154 * Check for illegal addresses. Watch out for address wrap... Note 155 * that VM_*_ADDRESS are not constants due to casts (argh). 156 */ 157 if (flags & MAP_FIXED) { 158 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 159 return (EINVAL); 160 #ifndef i386 161 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 162 return (EINVAL); 163 #endif 164 if (addr + size < addr) 165 return (EINVAL); 166 } 167 /* 168 * XXX if no hint provided for a non-fixed mapping place it after the 169 * end of the largest possible heap. 170 * 171 * There should really be a pmap call to determine a reasonable location. 172 */ 173 if (addr == 0 && (flags & MAP_FIXED) == 0) 174 addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ); 175 if (flags & MAP_ANON) { 176 /* 177 * Mapping blank space is trivial. 178 */ 179 handle = NULL; 180 maxprot = VM_PROT_ALL; 181 } else { 182 /* 183 * Mapping file, get fp for validation. Obtain vnode and make 184 * sure it is of appropriate type. 185 */ 186 if (((unsigned) uap->fd) >= fdp->fd_nfiles || 187 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 188 return (EBADF); 189 if (fp->f_type != DTYPE_VNODE) 190 return (EINVAL); 191 vp = (struct vnode *) fp->f_data; 192 if (vp->v_type != VREG && vp->v_type != VCHR) 193 return (EINVAL); 194 /* 195 * XXX hack to handle use of /dev/zero to map anon memory (ala 196 * SunOS). 197 */ 198 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { 199 handle = NULL; 200 maxprot = VM_PROT_ALL; 201 flags |= MAP_ANON; 202 } else { 203 /* 204 * Ensure that file and memory protections are 205 * compatible. Note that we only worry about 206 * writability if mapping is shared; in this case, 207 * current and max prot are dictated by the open file. 208 * XXX use the vnode instead? Problem is: what 209 * credentials do we use for determination? What if 210 * proc does a setuid? 211 */ 212 maxprot = VM_PROT_EXECUTE; /* ??? */ 213 if (fp->f_flag & FREAD) 214 maxprot |= VM_PROT_READ; 215 else if (prot & PROT_READ) 216 return (EACCES); 217 if (flags & MAP_SHARED) { 218 if (fp->f_flag & FWRITE) 219 maxprot |= VM_PROT_WRITE; 220 else if (prot & PROT_WRITE) 221 return (EACCES); 222 } else 223 maxprot |= VM_PROT_WRITE; 224 handle = (caddr_t) vp; 225 } 226 } 227 error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 228 flags, handle, (vm_offset_t) uap->pos); 229 if (error == 0) 230 *retval = (int) addr; 231 return (error); 232 } 233 234 #ifdef COMPAT_43 235 struct ommap_args { 236 caddr_t addr; 237 int len; 238 int prot; 239 int flags; 240 int fd; 241 long pos; 242 }; 243 int 244 ommap(p, uap, retval) 245 struct proc *p; 246 register struct ommap_args *uap; 247 int *retval; 248 { 249 struct mmap_args nargs; 250 static const char cvtbsdprot[8] = { 251 0, 252 PROT_EXEC, 253 PROT_WRITE, 254 PROT_EXEC | PROT_WRITE, 255 PROT_READ, 256 PROT_EXEC | PROT_READ, 257 PROT_WRITE | PROT_READ, 258 PROT_EXEC | PROT_WRITE | PROT_READ, 259 }; 260 261 #define OMAP_ANON 0x0002 262 #define OMAP_COPY 0x0020 263 #define OMAP_SHARED 0x0010 264 #define OMAP_FIXED 0x0100 265 #define OMAP_INHERIT 0x0800 266 267 nargs.addr = uap->addr; 268 nargs.len = uap->len; 269 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 270 nargs.flags = 0; 271 if (uap->flags & OMAP_ANON) 272 nargs.flags |= MAP_ANON; 273 if (uap->flags & OMAP_COPY) 274 nargs.flags |= MAP_COPY; 275 if (uap->flags & OMAP_SHARED) 276 nargs.flags |= MAP_SHARED; 277 else 278 nargs.flags |= MAP_PRIVATE; 279 if (uap->flags & OMAP_FIXED) 280 nargs.flags |= MAP_FIXED; 281 if (uap->flags & OMAP_INHERIT) 282 nargs.flags |= MAP_INHERIT; 283 nargs.fd = uap->fd; 284 nargs.pos = uap->pos; 285 return (mmap(p, &nargs, retval)); 286 } 287 #endif /* COMPAT_43 */ 288 289 290 struct msync_args { 291 caddr_t addr; 292 int len; 293 int flags; 294 }; 295 int 296 msync(p, uap, retval) 297 struct proc *p; 298 struct msync_args *uap; 299 int *retval; 300 { 301 vm_offset_t addr; 302 vm_size_t size; 303 int flags; 304 vm_map_t map; 305 int rv; 306 307 map = &p->p_vmspace->vm_map; 308 addr = (vm_offset_t) uap->addr; 309 size = (vm_size_t) uap->len; 310 flags = uap->flags; 311 312 if (((int) addr & PAGE_MASK) || addr + size < addr || 313 (flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 314 return (EINVAL); 315 316 /* 317 * XXX Gak! If size is zero we are supposed to sync "all modified 318 * pages with the region containing addr". Unfortunately, we don't 319 * really keep track of individual mmaps so we approximate by flushing 320 * the range of the map entry containing addr. This can be incorrect 321 * if the region splits or is coalesced with a neighbor. 322 */ 323 if (size == 0) { 324 vm_map_entry_t entry; 325 326 vm_map_lock_read(map); 327 rv = vm_map_lookup_entry(map, addr, &entry); 328 vm_map_unlock_read(map); 329 if (rv == FALSE) 330 return (EINVAL); 331 addr = entry->start; 332 size = entry->end - entry->start; 333 } 334 335 /* 336 * Clean the pages and interpret the return value. 337 */ 338 rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0, 339 (flags & MS_INVALIDATE) != 0); 340 341 switch (rv) { 342 case KERN_SUCCESS: 343 break; 344 case KERN_INVALID_ADDRESS: 345 return (EINVAL); /* Sun returns ENOMEM? */ 346 case KERN_FAILURE: 347 return (EIO); 348 default: 349 return (EINVAL); 350 } 351 352 return (0); 353 } 354 355 struct munmap_args { 356 caddr_t addr; 357 int len; 358 }; 359 int 360 munmap(p, uap, retval) 361 register struct proc *p; 362 register struct munmap_args *uap; 363 int *retval; 364 { 365 vm_offset_t addr; 366 vm_size_t size; 367 vm_map_t map; 368 369 addr = (vm_offset_t) uap->addr; 370 if ((addr & PAGE_MASK) || uap->len < 0) 371 return (EINVAL); 372 size = (vm_size_t) round_page(uap->len); 373 if (size == 0) 374 return (0); 375 /* 376 * Check for illegal addresses. Watch out for address wrap... Note 377 * that VM_*_ADDRESS are not constants due to casts (argh). 378 */ 379 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 380 return (EINVAL); 381 #ifndef i386 382 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 383 return (EINVAL); 384 #endif 385 if (addr + size < addr) 386 return (EINVAL); 387 map = &p->p_vmspace->vm_map; 388 /* 389 * Make sure entire range is allocated. 390 */ 391 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) 392 return (EINVAL); 393 /* returns nothing but KERN_SUCCESS anyway */ 394 (void) vm_map_remove(map, addr, addr + size); 395 return (0); 396 } 397 398 void 399 munmapfd(p, fd) 400 struct proc *p; 401 int fd; 402 { 403 /* 404 * XXX should unmap any regions mapped to this file 405 */ 406 p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; 407 } 408 409 struct mprotect_args { 410 caddr_t addr; 411 int len; 412 int prot; 413 }; 414 int 415 mprotect(p, uap, retval) 416 struct proc *p; 417 struct mprotect_args *uap; 418 int *retval; 419 { 420 vm_offset_t addr; 421 vm_size_t size; 422 register vm_prot_t prot; 423 424 addr = (vm_offset_t) uap->addr; 425 if ((addr & PAGE_MASK) || uap->len < 0) 426 return (EINVAL); 427 size = (vm_size_t) uap->len; 428 prot = uap->prot & VM_PROT_ALL; 429 430 switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, 431 FALSE)) { 432 case KERN_SUCCESS: 433 return (0); 434 case KERN_PROTECTION_FAILURE: 435 return (EACCES); 436 } 437 return (EINVAL); 438 } 439 440 struct madvise_args { 441 caddr_t addr; 442 int len; 443 int behav; 444 }; 445 446 /* ARGSUSED */ 447 int 448 madvise(p, uap, retval) 449 struct proc *p; 450 struct madvise_args *uap; 451 int *retval; 452 { 453 454 /* Not yet implemented */ 455 return (EOPNOTSUPP); 456 } 457 458 struct mincore_args { 459 caddr_t addr; 460 int len; 461 char *vec; 462 }; 463 464 /* ARGSUSED */ 465 int 466 mincore(p, uap, retval) 467 struct proc *p; 468 struct mincore_args *uap; 469 int *retval; 470 { 471 472 /* Not yet implemented */ 473 return (EOPNOTSUPP); 474 } 475 476 struct mlock_args { 477 caddr_t addr; 478 size_t len; 479 }; 480 int 481 mlock(p, uap, retval) 482 struct proc *p; 483 struct mlock_args *uap; 484 int *retval; 485 { 486 vm_offset_t addr; 487 vm_size_t size; 488 int error; 489 490 addr = (vm_offset_t) uap->addr; 491 if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr) 492 return (EINVAL); 493 size = round_page((vm_size_t) uap->len); 494 if (atop(size) + cnt.v_wire_count > vm_page_max_wired) 495 return (EAGAIN); 496 #ifdef pmap_wired_count 497 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 498 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 499 return (EAGAIN); 500 #else 501 error = suser(p->p_ucred, &p->p_acflag); 502 if (error) 503 return (error); 504 #endif 505 506 error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE); 507 return (error == KERN_SUCCESS ? 0 : ENOMEM); 508 } 509 510 struct munlock_args { 511 caddr_t addr; 512 size_t len; 513 }; 514 int 515 munlock(p, uap, retval) 516 struct proc *p; 517 struct munlock_args *uap; 518 int *retval; 519 { 520 vm_offset_t addr; 521 vm_size_t size; 522 int error; 523 524 addr = (vm_offset_t) uap->addr; 525 if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr) 526 return (EINVAL); 527 #ifndef pmap_wired_count 528 error = suser(p->p_ucred, &p->p_acflag); 529 if (error) 530 return (error); 531 #endif 532 size = round_page((vm_size_t) uap->len); 533 534 error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE); 535 return (error == KERN_SUCCESS ? 0 : ENOMEM); 536 } 537 538 /* 539 * Internal version of mmap. 540 * Currently used by mmap, exec, and sys5 shared memory. 541 * Handle is either a vnode pointer or NULL for MAP_ANON. 542 */ 543 int 544 vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) 545 register vm_map_t map; 546 register vm_offset_t *addr; 547 register vm_size_t size; 548 vm_prot_t prot, maxprot; 549 register int flags; 550 caddr_t handle; /* XXX should be vp */ 551 vm_offset_t foff; 552 { 553 boolean_t fitit; 554 vm_object_t object; 555 struct vnode *vp = NULL; 556 objtype_t type; 557 int rv = KERN_SUCCESS; 558 vm_size_t objsize; 559 struct proc *p = curproc; 560 561 if (size == 0) 562 return (0); 563 564 objsize = size = round_page(size); 565 566 /* 567 * We currently can only deal with page aligned file offsets. 568 * The check is here rather than in the syscall because the 569 * kernel calls this function internally for other mmaping 570 * operations (such as in exec) and non-aligned offsets will 571 * cause pmap inconsistencies...so we want to be sure to 572 * disallow this in all cases. 573 */ 574 if (foff & PAGE_MASK) 575 return (EINVAL); 576 577 if ((flags & MAP_FIXED) == 0) { 578 fitit = TRUE; 579 *addr = round_page(*addr); 580 } else { 581 if (*addr != trunc_page(*addr)) 582 return (EINVAL); 583 fitit = FALSE; 584 (void) vm_map_remove(map, *addr, *addr + size); 585 } 586 587 /* 588 * Lookup/allocate object. 589 */ 590 if (flags & MAP_ANON) { 591 type = OBJT_SWAP; 592 /* 593 * Unnamed anonymous regions always start at 0. 594 */ 595 if (handle == 0) 596 foff = 0; 597 } else { 598 vp = (struct vnode *) handle; 599 if (vp->v_type == VCHR) { 600 type = OBJT_DEVICE; 601 handle = (caddr_t) vp->v_rdev; 602 } else { 603 struct vattr vat; 604 int error; 605 606 error = VOP_GETATTR(vp, &vat, p->p_ucred, p); 607 if (error) 608 return (error); 609 objsize = vat.va_size; 610 type = OBJT_VNODE; 611 } 612 } 613 object = vm_pager_allocate(type, handle, objsize, prot, foff); 614 if (object == NULL) 615 return (type == OBJT_DEVICE ? EINVAL : ENOMEM); 616 617 /* 618 * Anonymous memory, shared file, or character special file. 619 */ 620 if ((flags & (MAP_ANON|MAP_SHARED)) || (type == OBJT_DEVICE)) { 621 rv = vm_map_find(map, object, foff, addr, size, fitit); 622 if (rv != KERN_SUCCESS) { 623 /* 624 * Lose the object reference. Will destroy the 625 * object if it's an unnamed anonymous mapping 626 * or named anonymous without other references. 627 */ 628 vm_object_deallocate(object); 629 goto out; 630 } 631 } 632 /* 633 * mmap a COW regular file 634 */ 635 else { 636 vm_map_entry_t entry; 637 vm_object_t private_object; 638 639 /* 640 * Create a new object and make the original object 641 * the backing object. NOTE: the object reference gained 642 * above is now changed into the reference held by 643 * private_object. Since we don't map 'object', we want 644 * only this one reference. 645 */ 646 private_object = vm_object_allocate(OBJT_DEFAULT, object->size); 647 private_object->backing_object = object; 648 TAILQ_INSERT_TAIL(&object->shadow_head, 649 private_object, shadow_list); 650 651 rv = vm_map_find(map, private_object, foff, addr, size, fitit); 652 if (rv != KERN_SUCCESS) { 653 vm_object_deallocate(private_object); 654 goto out; 655 } 656 657 if (!vm_map_lookup_entry(map, *addr, &entry)) { 658 panic("vm_mmap: missing map entry!!!"); 659 } 660 entry->copy_on_write = TRUE; 661 662 /* 663 * set pages COW and protect for read access only 664 */ 665 vm_object_pmap_copy(object, foff, foff + size); 666 667 } 668 669 /* 670 * "Pre-fault" resident pages. 671 */ 672 if ((type == OBJT_VNODE) && (map->pmap != NULL)) { 673 pmap_object_init_pt(map->pmap, *addr, object, foff, size); 674 } 675 676 /* 677 * Correct protection (default is VM_PROT_ALL). If maxprot is 678 * different than prot, we must set both explicitly. 679 */ 680 rv = KERN_SUCCESS; 681 if (maxprot != VM_PROT_ALL) 682 rv = vm_map_protect(map, *addr, *addr + size, maxprot, TRUE); 683 if (rv == KERN_SUCCESS && prot != maxprot) 684 rv = vm_map_protect(map, *addr, *addr + size, prot, FALSE); 685 if (rv != KERN_SUCCESS) { 686 (void) vm_map_remove(map, *addr, *addr + size); 687 goto out; 688 } 689 /* 690 * Shared memory is also shared with children. 691 */ 692 if (flags & MAP_SHARED) { 693 rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); 694 if (rv != KERN_SUCCESS) { 695 (void) vm_map_remove(map, *addr, *addr + size); 696 goto out; 697 } 698 } 699 out: 700 switch (rv) { 701 case KERN_SUCCESS: 702 return (0); 703 case KERN_INVALID_ADDRESS: 704 case KERN_NO_SPACE: 705 return (ENOMEM); 706 case KERN_PROTECTION_FAILURE: 707 return (EACCES); 708 default: 709 return (EINVAL); 710 } 711 } 712