1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #include <sys/types.h> 31 #include <sys/inttypes.h> 32 #include <sys/param.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/signal.h> 36 #include <sys/user.h> 37 #include <sys/errno.h> 38 #include <sys/var.h> 39 #include <sys/proc.h> 40 #include <sys/tuneable.h> 41 #include <sys/debug.h> 42 #include <sys/cmn_err.h> 43 #include <sys/cred.h> 44 #include <sys/vnode.h> 45 #include <sys/vfs.h> 46 #include <sys/vm.h> 47 #include <sys/file.h> 48 #include <sys/mman.h> 49 #include <sys/vmparam.h> 50 #include <sys/fcntl.h> 51 #include <sys/lwpchan_impl.h> 52 #include <sys/nbmlock.h> 53 54 #include <vm/hat.h> 55 #include <vm/as.h> 56 #include <vm/seg.h> 57 #include <vm/seg_dev.h> 58 #include <vm/seg_vn.h> 59 60 int use_brk_lpg = 1; 61 int use_stk_lpg = 1; 62 63 static int brk_lpg(caddr_t nva); 64 static int grow_lpg(caddr_t sp); 65 66 int 67 brk(caddr_t nva) 68 { 69 int error; 70 proc_t *p = curproc; 71 72 /* 73 * Serialize brk operations on an address space. 74 * This also serves as the lock protecting p_brksize 75 * and p_brkpageszc. 76 */ 77 as_rangelock(p->p_as); 78 if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) { 79 error = brk_lpg(nva); 80 } else { 81 error = brk_internal(nva, p->p_brkpageszc); 82 } 83 as_rangeunlock(p->p_as); 84 return ((error != 0 ? set_errno(error) : 0)); 85 } 86 87 /* 88 * Algorithm: call arch-specific map_pgsz to get best page size to use, 89 * then call brk_internal(). 90 * Returns 0 on success. 91 */ 92 static int 93 brk_lpg(caddr_t nva) 94 { 95 struct proc *p = curproc; 96 size_t pgsz, len; 97 caddr_t addr, brkend; 98 caddr_t bssbase = p->p_bssbase; 99 caddr_t brkbase = p->p_brkbase; 100 int oszc, szc; 101 int err; 102 103 oszc = p->p_brkpageszc; 104 105 /* 106 * If p_brkbase has not yet been set, the first call 107 * to brk_internal() will initialize it. 108 */ 109 if (brkbase == 0) { 110 return (brk_internal(nva, oszc)); 111 } 112 113 len = nva - bssbase; 114 115 pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0); 116 szc = page_szc(pgsz); 117 118 /* 119 * Covers two cases: 120 * 1. page_szc() returns -1 for invalid page size, so we want to 121 * ignore it in that case. 122 * 2. By design we never decrease page size, as it is more stable. 123 */ 124 if (szc <= oszc) { 125 err = brk_internal(nva, oszc); 126 /* If failed, back off to base page size. */ 127 if (err != 0 && oszc != 0) { 128 err = brk_internal(nva, 0); 129 } 130 return (err); 131 } 132 133 err = brk_internal(nva, szc); 134 /* If using szc failed, map with base page size and return. */ 135 if (err != 0) { 136 if (szc != 0) { 137 err = brk_internal(nva, 0); 138 } 139 return (err); 140 } 141 142 /* 143 * Round up brk base to a large page boundary and remap 144 * anything in the segment already faulted in beyond that 145 * point. 146 */ 147 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz); 148 brkend = brkbase + p->p_brksize; 149 len = brkend - addr; 150 /* Check that len is not negative. Update page size code for heap. */ 151 if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) { 152 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); 153 p->p_brkpageszc = szc; 154 } 155 156 ASSERT(err == 0); 157 return (err); /* should always be 0 */ 158 } 159 160 /* 161 * Returns 0 on success. 162 */ 163 int 164 brk_internal(caddr_t nva, uint_t brkszc) 165 { 166 caddr_t ova; /* current break address */ 167 size_t size; 168 int error; 169 struct proc *p = curproc; 170 struct as *as = p->p_as; 171 size_t pgsz; 172 uint_t szc; 173 rctl_qty_t as_rctl; 174 175 /* 176 * extend heap to brkszc alignment but use current p->p_brkpageszc 177 * for the newly created segment. This allows the new extension 178 * segment to be concatenated successfully with the existing brk 179 * segment. 180 */ 181 if ((szc = brkszc) != 0) { 182 pgsz = page_get_pagesize(szc); 183 ASSERT(pgsz > PAGESIZE); 184 } else { 185 pgsz = PAGESIZE; 186 } 187 188 mutex_enter(&p->p_lock); 189 as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA], 190 p->p_rctls, p); 191 mutex_exit(&p->p_lock); 192 193 /* 194 * If p_brkbase has not yet been set, the first call 195 * to brk() will initialize it. 196 */ 197 if (p->p_brkbase == 0) 198 p->p_brkbase = nva; 199 200 /* 201 * Before multiple page size support existed p_brksize was the value 202 * not rounded to the pagesize (i.e. it stored the exact user request 203 * for heap size). If pgsz is greater than PAGESIZE calculate the 204 * heap size as the real new heap size by rounding it up to pgsz. 205 * This is useful since we may want to know where the heap ends 206 * without knowing heap pagesize (e.g. some old code) and also if 207 * heap pagesize changes we can update p_brkpageszc but delay adding 208 * new mapping yet still know from p_brksize where the heap really 209 * ends. The user requested heap end is stored in libc variable. 210 */ 211 if (pgsz > PAGESIZE) { 212 caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); 213 size = tnva - p->p_brkbase; 214 if (tnva < p->p_brkbase || (size > p->p_brksize && 215 size > (size_t)as_rctl)) { 216 szc = 0; 217 pgsz = PAGESIZE; 218 size = nva - p->p_brkbase; 219 } 220 } else { 221 size = nva - p->p_brkbase; 222 } 223 224 /* 225 * use PAGESIZE to roundup ova because we want to know the real value 226 * of the current heap end in case p_brkpageszc changes since the last 227 * p_brksize was computed. 228 */ 229 nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); 230 ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize), 231 PAGESIZE); 232 233 if ((nva < p->p_brkbase) || (size > p->p_brksize && 234 size > as_rctl)) { 235 mutex_enter(&p->p_lock); 236 (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p, 237 RCA_SAFE); 238 mutex_exit(&p->p_lock); 239 return (ENOMEM); 240 } 241 242 if (nva > ova) { 243 struct segvn_crargs crargs = 244 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 245 246 if (!(p->p_datprot & PROT_EXEC)) { 247 crargs.prot &= ~PROT_EXEC; 248 } 249 250 /* 251 * Add new zfod mapping to extend UNIX data segment 252 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies 253 * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate 254 * page sizes if ova is not aligned to szc's pgsz. 255 */ 256 if (szc > 0) { 257 caddr_t rbss; 258 259 rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, 260 pgsz); 261 if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) { 262 crargs.szc = p->p_brkpageszc ? p->p_brkpageszc : 263 AS_MAP_NO_LPOOB; 264 } else if (ova == rbss) { 265 crargs.szc = szc; 266 } else { 267 crargs.szc = AS_MAP_HEAP; 268 } 269 } else { 270 crargs.szc = AS_MAP_NO_LPOOB; 271 } 272 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP; 273 error = as_map(as, ova, (size_t)(nva - ova), segvn_create, 274 &crargs); 275 if (error) { 276 return (error); 277 } 278 279 } else if (nva < ova) { 280 /* 281 * Release mapping to shrink UNIX data segment. 282 */ 283 (void) as_unmap(as, nva, (size_t)(ova - nva)); 284 } 285 p->p_brksize = size; 286 return (0); 287 } 288 289 /* 290 * Grow the stack to include sp. Return 1 if successful, 0 otherwise. 291 * This routine assumes that the stack grows downward. 292 */ 293 int 294 grow(caddr_t sp) 295 { 296 struct proc *p = curproc; 297 struct as *as = p->p_as; 298 size_t oldsize = p->p_stksize; 299 size_t newsize; 300 int err; 301 302 /* 303 * Serialize grow operations on an address space. 304 * This also serves as the lock protecting p_stksize 305 * and p_stkpageszc. 306 */ 307 as_rangelock(as); 308 if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) { 309 err = grow_lpg(sp); 310 } else { 311 err = grow_internal(sp, p->p_stkpageszc); 312 } 313 as_rangeunlock(as); 314 315 if (err == 0 && (newsize = p->p_stksize) > oldsize) { 316 ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE)); 317 ASSERT(IS_P2ALIGNED(newsize, PAGESIZE)); 318 /* 319 * Set up translations so the process doesn't have to fault in 320 * the stack pages we just gave it. 321 */ 322 (void) as_fault(as->a_hat, as, p->p_usrstack - newsize, 323 newsize - oldsize, F_INVAL, S_WRITE); 324 } 325 return ((err == 0 ? 1 : 0)); 326 } 327 328 /* 329 * Algorithm: call arch-specific map_pgsz to get best page size to use, 330 * then call grow_internal(). 331 * Returns 0 on success. 332 */ 333 static int 334 grow_lpg(caddr_t sp) 335 { 336 struct proc *p = curproc; 337 size_t pgsz; 338 size_t len, newsize; 339 caddr_t addr, saddr; 340 caddr_t growend; 341 int oszc, szc; 342 int err; 343 344 newsize = p->p_usrstack - sp; 345 346 oszc = p->p_stkpageszc; 347 pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0); 348 szc = page_szc(pgsz); 349 350 /* 351 * Covers two cases: 352 * 1. page_szc() returns -1 for invalid page size, so we want to 353 * ignore it in that case. 354 * 2. By design we never decrease page size, as it is more stable. 355 * This shouldn't happen as the stack never shrinks. 356 */ 357 if (szc <= oszc) { 358 err = grow_internal(sp, oszc); 359 /* failed, fall back to base page size */ 360 if (err != 0 && oszc != 0) { 361 err = grow_internal(sp, 0); 362 } 363 return (err); 364 } 365 366 /* 367 * We've grown sufficiently to switch to a new page size. 368 * So we are going to remap the whole segment with the new page size. 369 */ 370 err = grow_internal(sp, szc); 371 /* The grow with szc failed, so fall back to base page size. */ 372 if (err != 0) { 373 if (szc != 0) { 374 err = grow_internal(sp, 0); 375 } 376 return (err); 377 } 378 379 /* 380 * Round up stack pointer to a large page boundary and remap 381 * any pgsz pages in the segment already faulted in beyond that 382 * point. 383 */ 384 saddr = p->p_usrstack - p->p_stksize; 385 addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz); 386 growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz); 387 len = growend - addr; 388 /* Check that len is not negative. Update page size code for stack. */ 389 if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) { 390 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); 391 p->p_stkpageszc = szc; 392 } 393 394 ASSERT(err == 0); 395 return (err); /* should always be 0 */ 396 } 397 398 /* 399 * This routine assumes that the stack grows downward. 400 * Returns 0 on success, errno on failure. 401 */ 402 int 403 grow_internal(caddr_t sp, uint_t growszc) 404 { 405 struct proc *p = curproc; 406 size_t newsize; 407 size_t oldsize; 408 int error; 409 size_t pgsz; 410 uint_t szc; 411 struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 412 413 ASSERT(sp < p->p_usrstack); 414 sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE); 415 416 /* 417 * grow to growszc alignment but use current p->p_stkpageszc for 418 * the segvn_crargs szc passed to segvn_create. For memcntl to 419 * increase the szc, this allows the new extension segment to be 420 * concatenated successfully with the existing stack segment. 421 */ 422 if ((szc = growszc) != 0) { 423 pgsz = page_get_pagesize(szc); 424 ASSERT(pgsz > PAGESIZE); 425 newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz); 426 if (newsize > (size_t)p->p_stk_ctl) { 427 szc = 0; 428 pgsz = PAGESIZE; 429 newsize = p->p_usrstack - sp; 430 } 431 } else { 432 pgsz = PAGESIZE; 433 newsize = p->p_usrstack - sp; 434 } 435 436 if (newsize > (size_t)p->p_stk_ctl) { 437 (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p, 438 RCA_UNSAFE_ALL); 439 440 return (ENOMEM); 441 } 442 443 oldsize = p->p_stksize; 444 ASSERT(P2PHASE(oldsize, PAGESIZE) == 0); 445 446 if (newsize <= oldsize) { /* prevent the stack from shrinking */ 447 return (0); 448 } 449 450 if (!(p->p_stkprot & PROT_EXEC)) { 451 crargs.prot &= ~PROT_EXEC; 452 } 453 /* 454 * extend stack with the proposed new growszc, which is different 455 * than p_stkpageszc only on a memcntl to increase the stack pagesize. 456 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via 457 * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes 458 * if not aligned to szc's pgsz. 459 */ 460 if (szc > 0) { 461 caddr_t oldsp = p->p_usrstack - oldsize; 462 caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, 463 pgsz); 464 465 if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) { 466 crargs.szc = p->p_stkpageszc ? p->p_stkpageszc : 467 AS_MAP_NO_LPOOB; 468 } else if (oldsp == austk) { 469 crargs.szc = szc; 470 } else { 471 crargs.szc = AS_MAP_STACK; 472 } 473 } else { 474 crargs.szc = AS_MAP_NO_LPOOB; 475 } 476 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN; 477 478 if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize, 479 segvn_create, &crargs)) != 0) { 480 if (error == EAGAIN) { 481 cmn_err(CE_WARN, "Sorry, no swap space to grow stack " 482 "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm); 483 } 484 return (error); 485 } 486 p->p_stksize = newsize; 487 return (0); 488 } 489 490 /* 491 * Find address for user to map. 492 * If MAP_FIXED is not specified, we can pick any address we want, but we will 493 * first try the value in *addrp if it is non-NULL. Thus this is implementing 494 * a way to try and get a preferred address. 495 */ 496 int 497 choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off, 498 int vacalign, uint_t flags) 499 { 500 caddr_t basep = (caddr_t)(uintptr_t)((uintptr_t)*addrp & PAGEMASK); 501 size_t lenp = len; 502 503 ASSERT(AS_ISCLAIMGAP(as)); /* searches should be serialized */ 504 if (flags & MAP_FIXED) { 505 (void) as_unmap(as, *addrp, len); 506 return (0); 507 } else if (basep != NULL && ((flags & MAP_ALIGN) == 0) && 508 !as_gap(as, len, &basep, &lenp, 0, *addrp)) { 509 /* User supplied address was available */ 510 *addrp = basep; 511 } else { 512 /* 513 * No user supplied address or the address supplied was not 514 * available. 515 */ 516 map_addr(addrp, len, off, vacalign, flags); 517 } 518 if (*addrp == NULL) 519 return (ENOMEM); 520 return (0); 521 } 522 523 524 /* 525 * Used for MAP_ANON - fast way to get anonymous pages 526 */ 527 static int 528 zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags, 529 offset_t pos) 530 { 531 struct segvn_crargs vn_a; 532 int error; 533 534 if (((PROT_ALL & uprot) != uprot)) 535 return (EACCES); 536 537 if ((flags & MAP_FIXED) != 0) { 538 caddr_t userlimit; 539 540 /* 541 * Use the user address. First verify that 542 * the address to be used is page aligned. 543 * Then make some simple bounds checks. 544 */ 545 if (((uintptr_t)*addrp & PAGEOFFSET) != 0) 546 return (EINVAL); 547 548 userlimit = flags & _MAP_LOW32 ? 549 (caddr_t)USERLIMIT32 : as->a_userlimit; 550 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) { 551 case RANGE_OKAY: 552 break; 553 case RANGE_BADPROT: 554 return (ENOTSUP); 555 case RANGE_BADADDR: 556 default: 557 return (ENOMEM); 558 } 559 } 560 /* 561 * No need to worry about vac alignment for anonymous 562 * pages since this is a "clone" object that doesn't 563 * yet exist. 564 */ 565 error = choose_addr(as, addrp, len, pos, ADDR_NOVACALIGN, flags); 566 if (error != 0) { 567 return (error); 568 } 569 570 /* 571 * Use the seg_vn segment driver; passing in the NULL amp 572 * gives the desired "cloning" effect. 573 */ 574 vn_a.vp = NULL; 575 vn_a.offset = 0; 576 vn_a.type = flags & MAP_TYPE; 577 vn_a.prot = uprot; 578 vn_a.maxprot = PROT_ALL; 579 vn_a.flags = flags & ~MAP_TYPE; 580 vn_a.cred = CRED(); 581 vn_a.amp = NULL; 582 vn_a.szc = 0; 583 vn_a.lgrp_mem_policy_flags = 0; 584 585 return (as_map(as, *addrp, len, segvn_create, &vn_a)); 586 } 587 588 static int 589 smmap_common(caddr_t *addrp, size_t len, 590 int prot, int flags, struct file *fp, offset_t pos) 591 { 592 struct vnode *vp; 593 struct as *as = curproc->p_as; 594 uint_t uprot, maxprot, type; 595 int error; 596 int in_crit = 0; 597 598 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW | 599 _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN | 600 MAP_TEXT | MAP_INITDATA)) != 0) { 601 /* | MAP_RENAME */ /* not implemented, let user know */ 602 return (EINVAL); 603 } 604 605 if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) { 606 return (EINVAL); 607 } 608 609 if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) { 610 return (EINVAL); 611 } 612 613 #if defined(__sparc) 614 /* 615 * See if this is an "old mmap call". If so, remember this 616 * fact and convert the flags value given to mmap to indicate 617 * the specified address in the system call must be used. 618 * _MAP_NEW is turned set by all new uses of mmap. 619 */ 620 if ((flags & _MAP_NEW) == 0) 621 flags |= MAP_FIXED; 622 #endif 623 flags &= ~_MAP_NEW; 624 625 type = flags & MAP_TYPE; 626 if (type != MAP_PRIVATE && type != MAP_SHARED) 627 return (EINVAL); 628 629 630 if (flags & MAP_ALIGN) { 631 632 if (flags & MAP_FIXED) 633 return (EINVAL); 634 635 /* alignment needs to be a power of 2 >= page size */ 636 if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) || 637 !ISP2((uintptr_t)*addrp)) 638 return (EINVAL); 639 } 640 /* 641 * Check for bad lengths and file position. 642 * We let the VOP_MAP routine check for negative lengths 643 * since on some vnode types this might be appropriate. 644 */ 645 if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0) 646 return (EINVAL); 647 648 maxprot = PROT_ALL; /* start out allowing all accesses */ 649 uprot = prot | PROT_USER; 650 651 if (fp == NULL) { 652 ASSERT(flags & MAP_ANON); 653 /* discard lwpchan mappings, like munmap() */ 654 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL) 655 lwpchan_delete_mapping(curproc, *addrp, *addrp + len); 656 as_rangelock(as); 657 error = zmap(as, addrp, len, uprot, flags, pos); 658 as_rangeunlock(as); 659 return (error); 660 } else if ((flags & MAP_ANON) != 0) 661 return (EINVAL); 662 663 vp = fp->f_vnode; 664 665 /* Can't execute code from "noexec" mounted filesystem. */ 666 if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0) 667 maxprot &= ~PROT_EXEC; 668 669 /* 670 * These checks were added as part of large files. 671 * 672 * Return ENXIO if the initial position is negative; return EOVERFLOW 673 * if (offset + len) would overflow the maximum allowed offset for the 674 * type of file descriptor being used. 675 */ 676 if (vp->v_type == VREG) { 677 if (pos < 0) 678 return (ENXIO); 679 if ((offset_t)len > (OFFSET_MAX(fp) - pos)) 680 return (EOVERFLOW); 681 } 682 683 if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) { 684 /* no write access allowed */ 685 maxprot &= ~PROT_WRITE; 686 } 687 688 /* 689 * XXX - Do we also adjust maxprot based on protections 690 * of the vnode? E.g. if no execute permission is given 691 * on the vnode for the current user, maxprot probably 692 * should disallow PROT_EXEC also? This is different 693 * from the write access as this would be a per vnode 694 * test as opposed to a per fd test for writability. 695 */ 696 697 /* 698 * Verify that the specified protections are not greater than 699 * the maximum allowable protections. Also test to make sure 700 * that the file descriptor does allows for read access since 701 * "write only" mappings are hard to do since normally we do 702 * the read from the file before the page can be written. 703 */ 704 if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0) 705 return (EACCES); 706 707 /* 708 * If the user specified an address, do some simple checks here 709 */ 710 if ((flags & MAP_FIXED) != 0) { 711 caddr_t userlimit; 712 713 /* 714 * Use the user address. First verify that 715 * the address to be used is page aligned. 716 * Then make some simple bounds checks. 717 */ 718 if (((uintptr_t)*addrp & PAGEOFFSET) != 0) 719 return (EINVAL); 720 721 userlimit = flags & _MAP_LOW32 ? 722 (caddr_t)USERLIMIT32 : as->a_userlimit; 723 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) { 724 case RANGE_OKAY: 725 break; 726 case RANGE_BADPROT: 727 return (ENOTSUP); 728 case RANGE_BADADDR: 729 default: 730 return (ENOMEM); 731 } 732 } 733 734 if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) && 735 nbl_need_check(vp)) { 736 int svmand; 737 nbl_op_t nop; 738 739 nbl_start_crit(vp, RW_READER); 740 in_crit = 1; 741 error = nbl_svmand(vp, fp->f_cred, &svmand); 742 if (error != 0) 743 goto done; 744 if ((prot & PROT_WRITE) && (type == MAP_SHARED)) { 745 if (prot & (PROT_READ | PROT_EXEC)) { 746 nop = NBL_READWRITE; 747 } else { 748 nop = NBL_WRITE; 749 } 750 } else { 751 nop = NBL_READ; 752 } 753 if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) { 754 error = EACCES; 755 goto done; 756 } 757 } 758 759 /* discard lwpchan mappings, like munmap() */ 760 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL) 761 lwpchan_delete_mapping(curproc, *addrp, *addrp + len); 762 763 /* 764 * Ok, now let the vnode map routine do its thing to set things up. 765 */ 766 error = VOP_MAP(vp, pos, as, 767 addrp, len, uprot, maxprot, flags, fp->f_cred, NULL); 768 769 if (error == 0) { 770 if (vp->v_type == VREG && 771 (flags & (MAP_TEXT | MAP_INITDATA)) != 0) { 772 /* 773 * Mark this as an executable vnode 774 */ 775 mutex_enter(&vp->v_lock); 776 vp->v_flag |= VVMEXEC; 777 mutex_exit(&vp->v_lock); 778 } 779 } 780 781 done: 782 if (in_crit) 783 nbl_end_crit(vp); 784 return (error); 785 } 786 787 #ifdef _LP64 788 /* 789 * LP64 mmap(2) system call: 64-bit offset, 64-bit address. 790 * 791 * The "large file" mmap routine mmap64(2) is also mapped to this routine 792 * by the 64-bit version of libc. 793 * 794 * Eventually, this should be the only version, and have smmap_common() 795 * folded back into it again. Some day. 796 */ 797 caddr_t 798 smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos) 799 { 800 struct file *fp; 801 int error; 802 803 if (flags & _MAP_LOW32) 804 error = EINVAL; 805 else if (fd == -1 && (flags & MAP_ANON) != 0) 806 error = smmap_common(&addr, len, prot, flags, 807 NULL, (offset_t)pos); 808 else if ((fp = getf(fd)) != NULL) { 809 error = smmap_common(&addr, len, prot, flags, 810 fp, (offset_t)pos); 811 releasef(fd); 812 } else 813 error = EBADF; 814 815 return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr); 816 } 817 #endif /* _LP64 */ 818 819 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 820 821 /* 822 * ILP32 mmap(2) system call: 32-bit offset, 32-bit address. 823 */ 824 caddr_t 825 smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos) 826 { 827 struct file *fp; 828 int error; 829 caddr_t a = (caddr_t)(uintptr_t)addr; 830 831 if (flags & _MAP_LOW32) 832 error = EINVAL; 833 else if (fd == -1 && (flags & MAP_ANON) != 0) 834 error = smmap_common(&a, (size_t)len, prot, 835 flags | _MAP_LOW32, NULL, (offset_t)pos); 836 else if ((fp = getf(fd)) != NULL) { 837 error = smmap_common(&a, (size_t)len, prot, 838 flags | _MAP_LOW32, fp, (offset_t)pos); 839 releasef(fd); 840 } else 841 error = EBADF; 842 843 ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX); 844 845 return (error ? (caddr_t)(uintptr_t)set_errno(error) : a); 846 } 847 848 /* 849 * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address. 850 * 851 * Now things really get ugly because we can't use the C-style 852 * calling convention for more than 6 args, and 64-bit parameter 853 * passing on 32-bit systems is less than clean. 854 */ 855 856 struct mmaplf32a { 857 caddr_t addr; 858 size_t len; 859 #ifdef _LP64 860 /* 861 * 32-bit contents, 64-bit cells 862 */ 863 uint64_t prot; 864 uint64_t flags; 865 uint64_t fd; 866 uint64_t offhi; 867 uint64_t offlo; 868 #else 869 /* 870 * 32-bit contents, 32-bit cells 871 */ 872 uint32_t prot; 873 uint32_t flags; 874 uint32_t fd; 875 uint32_t offhi; 876 uint32_t offlo; 877 #endif 878 }; 879 880 int 881 smmaplf32(struct mmaplf32a *uap, rval_t *rvp) 882 { 883 struct file *fp; 884 int error; 885 caddr_t a = uap->addr; 886 int flags = (int)uap->flags; 887 int fd = (int)uap->fd; 888 #ifdef _BIG_ENDIAN 889 offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo; 890 #else 891 offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi; 892 #endif 893 894 if (flags & _MAP_LOW32) 895 error = EINVAL; 896 else if (fd == -1 && (flags & MAP_ANON) != 0) 897 error = smmap_common(&a, uap->len, (int)uap->prot, 898 flags | _MAP_LOW32, NULL, off); 899 else if ((fp = getf(fd)) != NULL) { 900 error = smmap_common(&a, uap->len, (int)uap->prot, 901 flags | _MAP_LOW32, fp, off); 902 releasef(fd); 903 } else 904 error = EBADF; 905 906 if (error == 0) 907 rvp->r_val1 = (uintptr_t)a; 908 return (error); 909 } 910 911 #endif /* _SYSCALL32_IMPL || _ILP32 */ 912 913 int 914 munmap(caddr_t addr, size_t len) 915 { 916 struct proc *p = curproc; 917 struct as *as = p->p_as; 918 919 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) 920 return (set_errno(EINVAL)); 921 922 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) 923 return (set_errno(EINVAL)); 924 925 /* 926 * Discard lwpchan mappings. 927 */ 928 if (p->p_lcp != NULL) 929 lwpchan_delete_mapping(p, addr, addr + len); 930 if (as_unmap(as, addr, len) != 0) 931 return (set_errno(EINVAL)); 932 933 return (0); 934 } 935 936 int 937 mprotect(caddr_t addr, size_t len, int prot) 938 { 939 struct as *as = curproc->p_as; 940 uint_t uprot = prot | PROT_USER; 941 int error; 942 943 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) 944 return (set_errno(EINVAL)); 945 946 switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) { 947 case RANGE_OKAY: 948 break; 949 case RANGE_BADPROT: 950 return (set_errno(ENOTSUP)); 951 case RANGE_BADADDR: 952 default: 953 return (set_errno(ENOMEM)); 954 } 955 956 error = as_setprot(as, addr, len, uprot); 957 if (error) 958 return (set_errno(error)); 959 return (0); 960 } 961 962 #define MC_CACHE 128 /* internal result buffer */ 963 #define MC_QUANTUM (MC_CACHE * PAGESIZE) /* addresses covered in loop */ 964 965 int 966 mincore(caddr_t addr, size_t len, char *vecp) 967 { 968 struct as *as = curproc->p_as; 969 caddr_t ea; /* end address of loop */ 970 size_t rl; /* inner result length */ 971 char vec[MC_CACHE]; /* local vector cache */ 972 int error; 973 model_t model; 974 long llen; 975 976 model = get_udatamodel(); 977 /* 978 * Validate form of address parameters. 979 */ 980 if (model == DATAMODEL_NATIVE) { 981 llen = (long)len; 982 } else { 983 llen = (int32_t)(size32_t)len; 984 } 985 if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0) 986 return (set_errno(EINVAL)); 987 988 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) 989 return (set_errno(ENOMEM)); 990 991 /* 992 * Loop over subranges of interval [addr : addr + len), recovering 993 * results internally and then copying them out to caller. Subrange 994 * is based on the size of MC_CACHE, defined above. 995 */ 996 for (ea = addr + len; addr < ea; addr += MC_QUANTUM) { 997 error = as_incore(as, addr, 998 (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl); 999 if (rl != 0) { 1000 rl = (rl + PAGESIZE - 1) / PAGESIZE; 1001 if (copyout(vec, vecp, rl) != 0) 1002 return (set_errno(EFAULT)); 1003 vecp += rl; 1004 } 1005 if (error != 0) 1006 return (set_errno(ENOMEM)); 1007 } 1008 return (0); 1009 } 1010