1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * 40 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 41 * $Id: vm_mmap.c,v 1.27 1995/10/21 17:42:28 dyson Exp $ 42 */ 43 44 /* 45 * Mapped file (mmap) interface to VM 46 */ 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/filedesc.h> 51 #include <sys/resourcevar.h> 52 #include <sys/proc.h> 53 #include <sys/vnode.h> 54 #include <sys/file.h> 55 #include <sys/mman.h> 56 #include <sys/conf.h> 57 58 #include <miscfs/specfs/specdev.h> 59 60 #include <vm/vm.h> 61 #include <vm/vm_pager.h> 62 #include <vm/vm_pageout.h> 63 #include <vm/vm_prot.h> 64 65 void pmap_object_init_pt(); 66 67 struct sbrk_args { 68 int incr; 69 }; 70 71 /* ARGSUSED */ 72 int 73 sbrk(p, uap, retval) 74 struct proc *p; 75 struct sbrk_args *uap; 76 int *retval; 77 { 78 79 /* Not yet implemented */ 80 return (EOPNOTSUPP); 81 } 82 83 struct sstk_args { 84 int incr; 85 }; 86 87 /* ARGSUSED */ 88 int 89 sstk(p, uap, retval) 90 struct proc *p; 91 struct sstk_args *uap; 92 int *retval; 93 { 94 95 /* Not yet implemented */ 96 return (EOPNOTSUPP); 97 } 98 99 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 100 struct getpagesize_args { 101 int dummy; 102 }; 103 104 /* ARGSUSED */ 105 int 106 ogetpagesize(p, uap, retval) 107 struct proc *p; 108 struct getpagesize_args *uap; 109 int *retval; 110 { 111 112 *retval = PAGE_SIZE; 113 return (0); 114 } 115 #endif /* COMPAT_43 || COMPAT_SUNOS */ 116 117 struct mmap_args { 118 caddr_t addr; 119 size_t len; 120 int prot; 121 int flags; 122 int fd; 123 long pad; 124 off_t pos; 125 }; 126 127 int 128 mmap(p, uap, retval) 129 struct proc *p; 130 register struct mmap_args *uap; 131 int *retval; 132 { 133 register struct filedesc *fdp = p->p_fd; 134 register struct file *fp; 135 struct vnode *vp; 136 vm_offset_t addr; 137 vm_size_t size; 138 vm_prot_t prot, maxprot; 139 caddr_t handle; 140 int flags, error; 141 142 prot = uap->prot & VM_PROT_ALL; 143 flags = uap->flags; 144 /* 145 * Address (if FIXED) must be page aligned. Size is implicitly rounded 146 * to a page boundary. 147 */ 148 addr = (vm_offset_t) uap->addr; 149 if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) || 150 (ssize_t) uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1)) 151 return (EINVAL); 152 size = (vm_size_t) round_page(uap->len); 153 /* 154 * Check for illegal addresses. Watch out for address wrap... Note 155 * that VM_*_ADDRESS are not constants due to casts (argh). 156 */ 157 if (flags & MAP_FIXED) { 158 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 159 return (EINVAL); 160 #ifndef i386 161 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 162 return (EINVAL); 163 #endif 164 if (addr + size < addr) 165 return (EINVAL); 166 } 167 /* 168 * XXX if no hint provided for a non-fixed mapping place it after the 169 * end of the largest possible heap. 170 * 171 * There should really be a pmap call to determine a reasonable location. 172 */ 173 if (addr == 0 && (flags & MAP_FIXED) == 0) 174 addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ); 175 if (flags & MAP_ANON) { 176 /* 177 * Mapping blank space is trivial. 178 */ 179 handle = NULL; 180 maxprot = VM_PROT_ALL; 181 } else { 182 /* 183 * Mapping file, get fp for validation. Obtain vnode and make 184 * sure it is of appropriate type. 185 */ 186 if (((unsigned) uap->fd) >= fdp->fd_nfiles || 187 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 188 return (EBADF); 189 if (fp->f_type != DTYPE_VNODE) 190 return (EINVAL); 191 vp = (struct vnode *) fp->f_data; 192 if (vp->v_type != VREG && vp->v_type != VCHR) 193 return (EINVAL); 194 /* 195 * XXX hack to handle use of /dev/zero to map anon memory (ala 196 * SunOS). 197 */ 198 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { 199 handle = NULL; 200 maxprot = VM_PROT_ALL; 201 flags |= MAP_ANON; 202 } else { 203 /* 204 * Ensure that file and memory protections are 205 * compatible. Note that we only worry about 206 * writability if mapping is shared; in this case, 207 * current and max prot are dictated by the open file. 208 * XXX use the vnode instead? Problem is: what 209 * credentials do we use for determination? What if 210 * proc does a setuid? 211 */ 212 maxprot = VM_PROT_EXECUTE; /* ??? */ 213 if (fp->f_flag & FREAD) 214 maxprot |= VM_PROT_READ; 215 else if (prot & PROT_READ) 216 return (EACCES); 217 if (flags & MAP_SHARED) { 218 if (fp->f_flag & FWRITE) 219 maxprot |= VM_PROT_WRITE; 220 else if (prot & PROT_WRITE) 221 return (EACCES); 222 } else 223 maxprot |= VM_PROT_WRITE; 224 handle = (caddr_t) vp; 225 } 226 } 227 error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 228 flags, handle, (vm_offset_t) uap->pos); 229 if (error == 0) 230 *retval = (int) addr; 231 return (error); 232 } 233 234 #ifdef COMPAT_43 235 struct ommap_args { 236 caddr_t addr; 237 int len; 238 int prot; 239 int flags; 240 int fd; 241 long pos; 242 }; 243 int 244 ommap(p, uap, retval) 245 struct proc *p; 246 register struct ommap_args *uap; 247 int *retval; 248 { 249 struct mmap_args nargs; 250 static const char cvtbsdprot[8] = { 251 0, 252 PROT_EXEC, 253 PROT_WRITE, 254 PROT_EXEC | PROT_WRITE, 255 PROT_READ, 256 PROT_EXEC | PROT_READ, 257 PROT_WRITE | PROT_READ, 258 PROT_EXEC | PROT_WRITE | PROT_READ, 259 }; 260 261 #define OMAP_ANON 0x0002 262 #define OMAP_COPY 0x0020 263 #define OMAP_SHARED 0x0010 264 #define OMAP_FIXED 0x0100 265 #define OMAP_INHERIT 0x0800 266 267 nargs.addr = uap->addr; 268 nargs.len = uap->len; 269 nargs.prot = cvtbsdprot[uap->prot & 0x7]; 270 nargs.flags = 0; 271 if (uap->flags & OMAP_ANON) 272 nargs.flags |= MAP_ANON; 273 if (uap->flags & OMAP_COPY) 274 nargs.flags |= MAP_COPY; 275 if (uap->flags & OMAP_SHARED) 276 nargs.flags |= MAP_SHARED; 277 else 278 nargs.flags |= MAP_PRIVATE; 279 if (uap->flags & OMAP_FIXED) 280 nargs.flags |= MAP_FIXED; 281 if (uap->flags & OMAP_INHERIT) 282 nargs.flags |= MAP_INHERIT; 283 nargs.fd = uap->fd; 284 nargs.pos = uap->pos; 285 return (mmap(p, &nargs, retval)); 286 } 287 #endif /* COMPAT_43 */ 288 289 290 struct msync_args { 291 caddr_t addr; 292 int len; 293 int flags; 294 }; 295 int 296 msync(p, uap, retval) 297 struct proc *p; 298 struct msync_args *uap; 299 int *retval; 300 { 301 vm_offset_t addr; 302 vm_size_t size; 303 int flags; 304 vm_map_t map; 305 int rv; 306 307 map = &p->p_vmspace->vm_map; 308 addr = (vm_offset_t) uap->addr; 309 size = (vm_size_t) uap->len; 310 flags = uap->flags; 311 312 if (((int) addr & PAGE_MASK) || addr + size < addr || 313 (flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 314 return (EINVAL); 315 316 /* 317 * XXX Gak! If size is zero we are supposed to sync "all modified 318 * pages with the region containing addr". Unfortunately, we don't 319 * really keep track of individual mmaps so we approximate by flushing 320 * the range of the map entry containing addr. This can be incorrect 321 * if the region splits or is coalesced with a neighbor. 322 */ 323 if (size == 0) { 324 vm_map_entry_t entry; 325 326 vm_map_lock_read(map); 327 rv = vm_map_lookup_entry(map, addr, &entry); 328 vm_map_unlock_read(map); 329 if (rv == FALSE) 330 return (EINVAL); 331 addr = entry->start; 332 size = entry->end - entry->start; 333 } 334 335 /* 336 * Clean the pages and interpret the return value. 337 */ 338 rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0, 339 (flags & MS_INVALIDATE) != 0); 340 341 switch (rv) { 342 case KERN_SUCCESS: 343 break; 344 case KERN_INVALID_ADDRESS: 345 return (EINVAL); /* Sun returns ENOMEM? */ 346 case KERN_FAILURE: 347 return (EIO); 348 default: 349 return (EINVAL); 350 } 351 352 return (0); 353 } 354 355 struct munmap_args { 356 caddr_t addr; 357 int len; 358 }; 359 int 360 munmap(p, uap, retval) 361 register struct proc *p; 362 register struct munmap_args *uap; 363 int *retval; 364 { 365 vm_offset_t addr; 366 vm_size_t size; 367 vm_map_t map; 368 369 addr = (vm_offset_t) uap->addr; 370 if ((addr & PAGE_MASK) || uap->len < 0) 371 return (EINVAL); 372 size = (vm_size_t) round_page(uap->len); 373 if (size == 0) 374 return (0); 375 /* 376 * Check for illegal addresses. Watch out for address wrap... Note 377 * that VM_*_ADDRESS are not constants due to casts (argh). 378 */ 379 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 380 return (EINVAL); 381 #ifndef i386 382 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 383 return (EINVAL); 384 #endif 385 if (addr + size < addr) 386 return (EINVAL); 387 map = &p->p_vmspace->vm_map; 388 /* 389 * Make sure entire range is allocated. 390 */ 391 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) 392 return (EINVAL); 393 /* returns nothing but KERN_SUCCESS anyway */ 394 (void) vm_map_remove(map, addr, addr + size); 395 return (0); 396 } 397 398 void 399 munmapfd(p, fd) 400 struct proc *p; 401 int fd; 402 { 403 /* 404 * XXX should unmap any regions mapped to this file 405 */ 406 p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; 407 } 408 409 struct mprotect_args { 410 caddr_t addr; 411 int len; 412 int prot; 413 }; 414 int 415 mprotect(p, uap, retval) 416 struct proc *p; 417 struct mprotect_args *uap; 418 int *retval; 419 { 420 vm_offset_t addr; 421 vm_size_t size; 422 register vm_prot_t prot; 423 424 addr = (vm_offset_t) uap->addr; 425 if ((addr & PAGE_MASK) || uap->len < 0) 426 return (EINVAL); 427 size = (vm_size_t) uap->len; 428 prot = uap->prot & VM_PROT_ALL; 429 430 switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, 431 FALSE)) { 432 case KERN_SUCCESS: 433 return (0); 434 case KERN_PROTECTION_FAILURE: 435 return (EACCES); 436 } 437 return (EINVAL); 438 } 439 440 struct madvise_args { 441 caddr_t addr; 442 int len; 443 int behav; 444 }; 445 446 /* ARGSUSED */ 447 int 448 madvise(p, uap, retval) 449 struct proc *p; 450 struct madvise_args *uap; 451 int *retval; 452 { 453 454 /* Not yet implemented */ 455 return (EOPNOTSUPP); 456 } 457 458 struct mincore_args { 459 caddr_t addr; 460 int len; 461 char *vec; 462 }; 463 464 /* ARGSUSED */ 465 int 466 mincore(p, uap, retval) 467 struct proc *p; 468 struct mincore_args *uap; 469 int *retval; 470 { 471 vm_offset_t addr; 472 vm_offset_t end; 473 char *vec; 474 475 addr = trunc_page((vm_offset_t) uap->addr); 476 end = addr + round_page((vm_size_t) uap->len); 477 if (VM_MAXUSER_ADDRESS > 0 && end > VM_MAXUSER_ADDRESS) 478 return (EINVAL); 479 if (end < addr) 480 return (EINVAL); 481 482 vec = uap->vec; 483 while(addr < end) { 484 int error; 485 if (pmap_extract(&p->p_vmspace->vm_pmap, addr)) { 486 error = subyte( vec, 1); 487 } else { 488 error = subyte( vec, 0); 489 } 490 if (error) 491 return EFAULT; 492 vec++; 493 addr += PAGE_SIZE; 494 } 495 return (0); 496 } 497 498 struct mlock_args { 499 caddr_t addr; 500 size_t len; 501 }; 502 int 503 mlock(p, uap, retval) 504 struct proc *p; 505 struct mlock_args *uap; 506 int *retval; 507 { 508 vm_offset_t addr; 509 vm_size_t size; 510 int error; 511 512 addr = (vm_offset_t) uap->addr; 513 if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr) 514 return (EINVAL); 515 size = round_page((vm_size_t) uap->len); 516 if (atop(size) + cnt.v_wire_count > vm_page_max_wired) 517 return (EAGAIN); 518 #ifdef pmap_wired_count 519 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 520 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 521 return (EAGAIN); 522 #else 523 error = suser(p->p_ucred, &p->p_acflag); 524 if (error) 525 return (error); 526 #endif 527 528 error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE); 529 return (error == KERN_SUCCESS ? 0 : ENOMEM); 530 } 531 532 struct munlock_args { 533 caddr_t addr; 534 size_t len; 535 }; 536 int 537 munlock(p, uap, retval) 538 struct proc *p; 539 struct munlock_args *uap; 540 int *retval; 541 { 542 vm_offset_t addr; 543 vm_size_t size; 544 int error; 545 546 addr = (vm_offset_t) uap->addr; 547 if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr) 548 return (EINVAL); 549 #ifndef pmap_wired_count 550 error = suser(p->p_ucred, &p->p_acflag); 551 if (error) 552 return (error); 553 #endif 554 size = round_page((vm_size_t) uap->len); 555 556 error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE); 557 return (error == KERN_SUCCESS ? 0 : ENOMEM); 558 } 559 560 /* 561 * Internal version of mmap. 562 * Currently used by mmap, exec, and sys5 shared memory. 563 * Handle is either a vnode pointer or NULL for MAP_ANON. 564 */ 565 int 566 vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) 567 register vm_map_t map; 568 register vm_offset_t *addr; 569 register vm_size_t size; 570 vm_prot_t prot, maxprot; 571 register int flags; 572 caddr_t handle; /* XXX should be vp */ 573 vm_offset_t foff; 574 { 575 boolean_t fitit; 576 vm_object_t object; 577 struct vnode *vp = NULL; 578 objtype_t type; 579 int rv = KERN_SUCCESS; 580 vm_size_t objsize; 581 struct proc *p = curproc; 582 583 if (size == 0) 584 return (0); 585 586 objsize = size = round_page(size); 587 588 /* 589 * We currently can only deal with page aligned file offsets. 590 * The check is here rather than in the syscall because the 591 * kernel calls this function internally for other mmaping 592 * operations (such as in exec) and non-aligned offsets will 593 * cause pmap inconsistencies...so we want to be sure to 594 * disallow this in all cases. 595 */ 596 if (foff & PAGE_MASK) 597 return (EINVAL); 598 599 if ((flags & MAP_FIXED) == 0) { 600 fitit = TRUE; 601 *addr = round_page(*addr); 602 } else { 603 if (*addr != trunc_page(*addr)) 604 return (EINVAL); 605 fitit = FALSE; 606 (void) vm_map_remove(map, *addr, *addr + size); 607 } 608 609 /* 610 * Lookup/allocate object. 611 */ 612 if (flags & MAP_ANON) { 613 type = OBJT_SWAP; 614 /* 615 * Unnamed anonymous regions always start at 0. 616 */ 617 if (handle == 0) 618 foff = 0; 619 } else { 620 vp = (struct vnode *) handle; 621 if (vp->v_type == VCHR) { 622 type = OBJT_DEVICE; 623 handle = (caddr_t) vp->v_rdev; 624 } else { 625 struct vattr vat; 626 int error; 627 628 error = VOP_GETATTR(vp, &vat, p->p_ucred, p); 629 if (error) 630 return (error); 631 objsize = vat.va_size; 632 type = OBJT_VNODE; 633 } 634 } 635 object = vm_pager_allocate(type, handle, objsize, prot, foff); 636 if (object == NULL) 637 return (type == OBJT_DEVICE ? EINVAL : ENOMEM); 638 639 rv = vm_map_find(map, object, foff, addr, size, fitit); 640 if (rv != KERN_SUCCESS) { 641 /* 642 * Lose the object reference. Will destroy the 643 * object if it's an unnamed anonymous mapping 644 * or named anonymous without other references. 645 */ 646 vm_object_deallocate(object); 647 goto out; 648 } 649 650 /* 651 * mmap a COW regular file 652 */ 653 if ((flags & (MAP_ANON|MAP_SHARED)) == 0 && (type != OBJT_DEVICE)) { 654 vm_map_entry_t entry; 655 if (!vm_map_lookup_entry(map, *addr, &entry)) { 656 panic("vm_mmap: missing map entry!!!"); 657 } 658 entry->copy_on_write = TRUE; 659 /* 660 * This will create the processes private object on 661 * an as needed basis. 662 */ 663 entry->needs_copy = TRUE; 664 665 /* 666 * set pages COW and protect for read access only 667 */ 668 vm_object_pmap_copy(object, foff, foff + size); 669 670 } 671 672 /* 673 * "Pre-fault" resident pages. 674 */ 675 if ((type == OBJT_VNODE) && (map->pmap != NULL)) { 676 pmap_object_init_pt(map->pmap, *addr, object, foff, size); 677 } 678 679 /* 680 * Correct protection (default is VM_PROT_ALL). If maxprot is 681 * different than prot, we must set both explicitly. 682 */ 683 rv = KERN_SUCCESS; 684 if (maxprot != VM_PROT_ALL) 685 rv = vm_map_protect(map, *addr, *addr + size, maxprot, TRUE); 686 if (rv == KERN_SUCCESS && prot != maxprot) 687 rv = vm_map_protect(map, *addr, *addr + size, prot, FALSE); 688 if (rv != KERN_SUCCESS) { 689 (void) vm_map_remove(map, *addr, *addr + size); 690 goto out; 691 } 692 /* 693 * Shared memory is also shared with children. 694 */ 695 if (flags & MAP_SHARED) { 696 rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); 697 if (rv != KERN_SUCCESS) { 698 (void) vm_map_remove(map, *addr, *addr + size); 699 goto out; 700 } 701 } 702 out: 703 switch (rv) { 704 case KERN_SUCCESS: 705 return (0); 706 case KERN_INVALID_ADDRESS: 707 case KERN_NO_SPACE: 708 return (ENOMEM); 709 case KERN_PROTECTION_FAILURE: 710 return (EACCES); 711 default: 712 return (EINVAL); 713 } 714 } 715