1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/types.h> 33 #include <sys/inttypes.h> 34 #include <sys/param.h> 35 #include <sys/sysmacros.h> 36 #include <sys/systm.h> 37 #include <sys/signal.h> 38 #include <sys/user.h> 39 #include <sys/errno.h> 40 #include <sys/var.h> 41 #include <sys/proc.h> 42 #include <sys/tuneable.h> 43 #include <sys/debug.h> 44 #include <sys/cmn_err.h> 45 #include <sys/cred.h> 46 #include <sys/vnode.h> 47 #include <sys/vfs.h> 48 #include <sys/vm.h> 49 #include <sys/file.h> 50 #include <sys/mman.h> 51 #include <sys/vmparam.h> 52 #include <sys/fcntl.h> 53 #include <sys/lwpchan_impl.h> 54 55 #include <vm/hat.h> 56 #include <vm/as.h> 57 #include <vm/seg.h> 58 #include <vm/seg_dev.h> 59 #include <vm/seg_vn.h> 60 61 int use_brk_lpg = 1; 62 int use_stk_lpg = 1; 63 64 static int brk_lpg(caddr_t nva); 65 static int grow_lpg(caddr_t sp); 66 67 int 68 brk(caddr_t nva) 69 { 70 int error; 71 proc_t *p = curproc; 72 73 /* 74 * Serialize brk operations on an address space. 75 * This also serves as the lock protecting p_brksize 76 * and p_brkpageszc. 77 */ 78 as_rangelock(p->p_as); 79 if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) { 80 error = brk_lpg(nva); 81 } else { 82 error = brk_internal(nva, p->p_brkpageszc); 83 } 84 as_rangeunlock(p->p_as); 85 return ((error != 0 ? set_errno(error) : 0)); 86 } 87 88 /* 89 * Algorithm: call arch-specific map_pgsz to get best page size to use, 90 * then call brk_internal(). 91 * Returns 0 on success. 92 */ 93 static int 94 brk_lpg(caddr_t nva) 95 { 96 struct proc *p = curproc; 97 size_t pgsz, len; 98 caddr_t addr, brkend; 99 caddr_t bssbase = p->p_bssbase; 100 caddr_t brkbase = p->p_brkbase; 101 int oszc, szc; 102 int err; 103 104 oszc = p->p_brkpageszc; 105 106 /* 107 * If p_brkbase has not yet been set, the first call 108 * to brk_internal() will initialize it. 109 */ 110 if (brkbase == 0) { 111 return (brk_internal(nva, oszc)); 112 } 113 114 len = nva - bssbase; 115 116 pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0); 117 szc = page_szc(pgsz); 118 119 /* 120 * Covers two cases: 121 * 1. page_szc() returns -1 for invalid page size, so we want to 122 * ignore it in that case. 123 * 2. By design we never decrease page size, as it is more stable. 124 */ 125 if (szc <= oszc) { 126 err = brk_internal(nva, oszc); 127 /* If failed, back off to base page size. */ 128 if (err != 0 && oszc != 0) { 129 err = brk_internal(nva, 0); 130 } 131 return (err); 132 } 133 134 err = brk_internal(nva, szc); 135 /* If using szc failed, map with base page size and return. */ 136 if (err != 0) { 137 if (szc != 0) { 138 err = brk_internal(nva, 0); 139 } 140 return (err); 141 } 142 143 /* 144 * Round up brk base to a large page boundary and remap 145 * anything in the segment already faulted in beyond that 146 * point. 147 */ 148 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz); 149 brkend = brkbase + p->p_brksize; 150 len = brkend - addr; 151 /* Check that len is not negative. Update page size code for heap. */ 152 if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) { 153 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); 154 p->p_brkpageszc = szc; 155 } 156 157 ASSERT(err == 0); 158 return (err); /* should always be 0 */ 159 } 160 161 /* 162 * Returns 0 on success. 163 */ 164 int 165 brk_internal(caddr_t nva, uint_t brkszc) 166 { 167 caddr_t ova; /* current break address */ 168 size_t size; 169 int error; 170 struct proc *p = curproc; 171 struct as *as = p->p_as; 172 size_t pgsz; 173 uint_t szc; 174 rctl_qty_t as_rctl; 175 176 /* 177 * extend heap to brkszc alignment but use current p->p_brkpageszc 178 * for the newly created segment. This allows the new extension 179 * segment to be concatenated successfully with the existing brk 180 * segment. 181 */ 182 if ((szc = brkszc) != 0) { 183 pgsz = page_get_pagesize(szc); 184 ASSERT(pgsz > PAGESIZE); 185 } else { 186 pgsz = PAGESIZE; 187 } 188 189 mutex_enter(&p->p_lock); 190 as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA], 191 p->p_rctls, p); 192 mutex_exit(&p->p_lock); 193 194 /* 195 * If p_brkbase has not yet been set, the first call 196 * to brk() will initialize it. 197 */ 198 if (p->p_brkbase == 0) 199 p->p_brkbase = nva; 200 201 /* 202 * Before multiple page size support existed p_brksize was the value 203 * not rounded to the pagesize (i.e. it stored the exact user request 204 * for heap size). If pgsz is greater than PAGESIZE calculate the 205 * heap size as the real new heap size by rounding it up to pgsz. 206 * This is useful since we may want to know where the heap ends 207 * without knowing heap pagesize (e.g. some old code) and also if 208 * heap pagesize changes we can update p_brkpageszc but delay adding 209 * new mapping yet still know from p_brksize where the heap really 210 * ends. The user requested heap end is stored in libc variable. 211 */ 212 if (pgsz > PAGESIZE) { 213 caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); 214 size = tnva - p->p_brkbase; 215 if (tnva < p->p_brkbase || (size > p->p_brksize && 216 size > (size_t)as_rctl)) { 217 szc = 0; 218 pgsz = PAGESIZE; 219 size = nva - p->p_brkbase; 220 } 221 } else { 222 size = nva - p->p_brkbase; 223 } 224 225 /* 226 * use PAGESIZE to roundup ova because we want to know the real value 227 * of the current heap end in case p_brkpageszc changes since the last 228 * p_brksize was computed. 229 */ 230 nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); 231 ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize), 232 PAGESIZE); 233 234 if ((nva < p->p_brkbase) || (size > p->p_brksize && 235 size > as_rctl)) { 236 mutex_enter(&p->p_lock); 237 (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p, 238 RCA_SAFE); 239 mutex_exit(&p->p_lock); 240 return (ENOMEM); 241 } 242 243 if (nva > ova) { 244 struct segvn_crargs crargs = 245 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 246 247 if (!(p->p_datprot & PROT_EXEC)) { 248 crargs.prot &= ~PROT_EXEC; 249 } 250 251 /* 252 * Add new zfod mapping to extend UNIX data segment 253 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies 254 * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate 255 * page sizes if ova is not aligned to szc's pgsz. 256 */ 257 if (szc > 0) { 258 caddr_t rbss; 259 260 rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, 261 pgsz); 262 if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) { 263 crargs.szc = p->p_brkpageszc ? p->p_brkpageszc : 264 AS_MAP_NO_LPOOB; 265 } else if (ova == rbss) { 266 crargs.szc = szc; 267 } else { 268 crargs.szc = AS_MAP_HEAP; 269 } 270 } else { 271 crargs.szc = AS_MAP_NO_LPOOB; 272 } 273 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP; 274 error = as_map(as, ova, (size_t)(nva - ova), segvn_create, 275 &crargs); 276 if (error) { 277 return (error); 278 } 279 280 } else if (nva < ova) { 281 /* 282 * Release mapping to shrink UNIX data segment. 283 */ 284 (void) as_unmap(as, nva, (size_t)(ova - nva)); 285 } 286 p->p_brksize = size; 287 return (0); 288 } 289 290 /* 291 * Grow the stack to include sp. Return 1 if successful, 0 otherwise. 292 * This routine assumes that the stack grows downward. 293 */ 294 int 295 grow(caddr_t sp) 296 { 297 struct proc *p = curproc; 298 struct as *as = p->p_as; 299 size_t oldsize = p->p_stksize; 300 size_t newsize; 301 int err; 302 303 /* 304 * Serialize grow operations on an address space. 305 * This also serves as the lock protecting p_stksize 306 * and p_stkpageszc. 307 */ 308 as_rangelock(as); 309 if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) { 310 err = grow_lpg(sp); 311 } else { 312 err = grow_internal(sp, p->p_stkpageszc); 313 } 314 as_rangeunlock(as); 315 316 if (err == 0 && (newsize = p->p_stksize) > oldsize) { 317 ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE)); 318 ASSERT(IS_P2ALIGNED(newsize, PAGESIZE)); 319 /* 320 * Set up translations so the process doesn't have to fault in 321 * the stack pages we just gave it. 322 */ 323 (void) as_fault(as->a_hat, as, p->p_usrstack - newsize, 324 newsize - oldsize, F_INVAL, S_WRITE); 325 } 326 return ((err == 0 ? 1 : 0)); 327 } 328 329 /* 330 * Algorithm: call arch-specific map_pgsz to get best page size to use, 331 * then call grow_internal(). 332 * Returns 0 on success. 333 */ 334 static int 335 grow_lpg(caddr_t sp) 336 { 337 struct proc *p = curproc; 338 size_t pgsz; 339 size_t len, newsize; 340 caddr_t addr, saddr; 341 caddr_t growend; 342 int oszc, szc; 343 int err; 344 345 newsize = p->p_usrstack - sp; 346 347 oszc = p->p_stkpageszc; 348 pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0); 349 szc = page_szc(pgsz); 350 351 /* 352 * Covers two cases: 353 * 1. page_szc() returns -1 for invalid page size, so we want to 354 * ignore it in that case. 355 * 2. By design we never decrease page size, as it is more stable. 356 * This shouldn't happen as the stack never shrinks. 357 */ 358 if (szc <= oszc) { 359 err = grow_internal(sp, oszc); 360 /* failed, fall back to base page size */ 361 if (err != 0 && oszc != 0) { 362 err = grow_internal(sp, 0); 363 } 364 return (err); 365 } 366 367 /* 368 * We've grown sufficiently to switch to a new page size. 369 * So we are going to remap the whole segment with the new page size. 370 */ 371 err = grow_internal(sp, szc); 372 /* The grow with szc failed, so fall back to base page size. */ 373 if (err != 0) { 374 if (szc != 0) { 375 err = grow_internal(sp, 0); 376 } 377 return (err); 378 } 379 380 /* 381 * Round up stack pointer to a large page boundary and remap 382 * any pgsz pages in the segment already faulted in beyond that 383 * point. 384 */ 385 saddr = p->p_usrstack - p->p_stksize; 386 addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz); 387 growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz); 388 len = growend - addr; 389 /* Check that len is not negative. Update page size code for stack. */ 390 if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) { 391 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); 392 p->p_stkpageszc = szc; 393 } 394 395 ASSERT(err == 0); 396 return (err); /* should always be 0 */ 397 } 398 399 /* 400 * This routine assumes that the stack grows downward. 401 * Returns 0 on success, errno on failure. 402 */ 403 int 404 grow_internal(caddr_t sp, uint_t growszc) 405 { 406 struct proc *p = curproc; 407 size_t newsize; 408 size_t oldsize; 409 int error; 410 size_t pgsz; 411 uint_t szc; 412 struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 413 414 ASSERT(sp < p->p_usrstack); 415 sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE); 416 417 /* 418 * grow to growszc alignment but use current p->p_stkpageszc for 419 * the segvn_crargs szc passed to segvn_create. For memcntl to 420 * increase the szc, this allows the new extension segment to be 421 * concatenated successfully with the existing stack segment. 422 */ 423 if ((szc = growszc) != 0) { 424 pgsz = page_get_pagesize(szc); 425 ASSERT(pgsz > PAGESIZE); 426 newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz); 427 if (newsize > (size_t)p->p_stk_ctl) { 428 szc = 0; 429 pgsz = PAGESIZE; 430 newsize = p->p_usrstack - sp; 431 } 432 } else { 433 pgsz = PAGESIZE; 434 newsize = p->p_usrstack - sp; 435 } 436 437 if (newsize > (size_t)p->p_stk_ctl) { 438 (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p, 439 RCA_UNSAFE_ALL); 440 441 return (ENOMEM); 442 } 443 444 oldsize = p->p_stksize; 445 ASSERT(P2PHASE(oldsize, PAGESIZE) == 0); 446 447 if (newsize <= oldsize) { /* prevent the stack from shrinking */ 448 return (0); 449 } 450 451 if (!(p->p_stkprot & PROT_EXEC)) { 452 crargs.prot &= ~PROT_EXEC; 453 } 454 /* 455 * extend stack with the proposed new growszc, which is different 456 * than p_stkpageszc only on a memcntl to increase the stack pagesize. 457 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via 458 * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes 459 * if not aligned to szc's pgsz. 460 */ 461 if (szc > 0) { 462 caddr_t oldsp = p->p_usrstack - oldsize; 463 caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, 464 pgsz); 465 466 if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) { 467 crargs.szc = p->p_stkpageszc ? p->p_stkpageszc : 468 AS_MAP_NO_LPOOB; 469 } else if (oldsp == austk) { 470 crargs.szc = szc; 471 } else { 472 crargs.szc = AS_MAP_STACK; 473 } 474 } else { 475 crargs.szc = AS_MAP_NO_LPOOB; 476 } 477 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN; 478 479 if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize, 480 segvn_create, &crargs)) != 0) { 481 if (error == EAGAIN) { 482 cmn_err(CE_WARN, "Sorry, no swap space to grow stack " 483 "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm); 484 } 485 return (error); 486 } 487 p->p_stksize = newsize; 488 return (0); 489 } 490 491 /* 492 * Used for MAP_ANON - fast way to get anonymous pages 493 */ 494 static int 495 zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags, 496 offset_t pos) 497 { 498 struct segvn_crargs vn_a; 499 500 if (((PROT_ALL & uprot) != uprot)) 501 return (EACCES); 502 503 if ((flags & MAP_FIXED) != 0) { 504 caddr_t userlimit; 505 506 /* 507 * Use the user address. First verify that 508 * the address to be used is page aligned. 509 * Then make some simple bounds checks. 510 */ 511 if (((uintptr_t)*addrp & PAGEOFFSET) != 0) 512 return (EINVAL); 513 514 userlimit = flags & _MAP_LOW32 ? 515 (caddr_t)USERLIMIT32 : as->a_userlimit; 516 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) { 517 case RANGE_OKAY: 518 break; 519 case RANGE_BADPROT: 520 return (ENOTSUP); 521 case RANGE_BADADDR: 522 default: 523 return (ENOMEM); 524 } 525 (void) as_unmap(as, *addrp, len); 526 } else { 527 /* 528 * No need to worry about vac alignment for anonymous 529 * pages since this is a "clone" object that doesn't 530 * yet exist. 531 */ 532 map_addr(addrp, len, pos, 0, flags); 533 if (*addrp == NULL) 534 return (ENOMEM); 535 } 536 537 /* 538 * Use the seg_vn segment driver; passing in the NULL amp 539 * gives the desired "cloning" effect. 540 */ 541 vn_a.vp = NULL; 542 vn_a.offset = 0; 543 vn_a.type = flags & MAP_TYPE; 544 vn_a.prot = uprot; 545 vn_a.maxprot = PROT_ALL; 546 vn_a.flags = flags & ~MAP_TYPE; 547 vn_a.cred = CRED(); 548 vn_a.amp = NULL; 549 vn_a.szc = 0; 550 vn_a.lgrp_mem_policy_flags = 0; 551 552 return (as_map(as, *addrp, len, segvn_create, &vn_a)); 553 } 554 555 static int 556 smmap_common(caddr_t *addrp, size_t len, 557 int prot, int flags, struct file *fp, offset_t pos) 558 { 559 struct vnode *vp; 560 struct as *as = curproc->p_as; 561 uint_t uprot, maxprot, type; 562 int error; 563 564 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW | 565 _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN | 566 MAP_TEXT | MAP_INITDATA)) != 0) { 567 /* | MAP_RENAME */ /* not implemented, let user know */ 568 return (EINVAL); 569 } 570 571 if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) { 572 return (EINVAL); 573 } 574 575 if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) { 576 return (EINVAL); 577 } 578 579 #if defined(__sparc) 580 /* 581 * See if this is an "old mmap call". If so, remember this 582 * fact and convert the flags value given to mmap to indicate 583 * the specified address in the system call must be used. 584 * _MAP_NEW is turned set by all new uses of mmap. 585 */ 586 if ((flags & _MAP_NEW) == 0) 587 flags |= MAP_FIXED; 588 #endif 589 flags &= ~_MAP_NEW; 590 591 type = flags & MAP_TYPE; 592 if (type != MAP_PRIVATE && type != MAP_SHARED) 593 return (EINVAL); 594 595 596 if (flags & MAP_ALIGN) { 597 598 if (flags & MAP_FIXED) 599 return (EINVAL); 600 601 /* alignment needs to be a power of 2 >= page size */ 602 if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) || 603 !ISP2((uintptr_t)*addrp)) 604 return (EINVAL); 605 } 606 /* 607 * Check for bad lengths and file position. 608 * We let the VOP_MAP routine check for negative lengths 609 * since on some vnode types this might be appropriate. 610 */ 611 if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0) 612 return (EINVAL); 613 614 maxprot = PROT_ALL; /* start out allowing all accesses */ 615 uprot = prot | PROT_USER; 616 617 if (fp == NULL) { 618 ASSERT(flags & MAP_ANON); 619 as_rangelock(as); 620 error = zmap(as, addrp, len, uprot, flags, pos); 621 as_rangeunlock(as); 622 return (error); 623 } else if ((flags & MAP_ANON) != 0) 624 return (EINVAL); 625 626 vp = fp->f_vnode; 627 628 /* Can't execute code from "noexec" mounted filesystem. */ 629 if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0) 630 maxprot &= ~PROT_EXEC; 631 632 /* 633 * These checks were added as part of large files. 634 * 635 * Return ENXIO if the initial position is negative; return EOVERFLOW 636 * if (offset + len) would overflow the maximum allowed offset for the 637 * type of file descriptor being used. 638 */ 639 if (vp->v_type == VREG) { 640 if (pos < 0) 641 return (ENXIO); 642 if ((offset_t)len > (OFFSET_MAX(fp) - pos)) 643 return (EOVERFLOW); 644 } 645 646 if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) { 647 /* no write access allowed */ 648 maxprot &= ~PROT_WRITE; 649 } 650 651 /* 652 * XXX - Do we also adjust maxprot based on protections 653 * of the vnode? E.g. if no execute permission is given 654 * on the vnode for the current user, maxprot probably 655 * should disallow PROT_EXEC also? This is different 656 * from the write access as this would be a per vnode 657 * test as opposed to a per fd test for writability. 658 */ 659 660 /* 661 * Verify that the specified protections are not greater than 662 * the maximum allowable protections. Also test to make sure 663 * that the file descriptor does allows for read access since 664 * "write only" mappings are hard to do since normally we do 665 * the read from the file before the page can be written. 666 */ 667 if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0) 668 return (EACCES); 669 670 /* 671 * If the user specified an address, do some simple checks here 672 */ 673 if ((flags & MAP_FIXED) != 0) { 674 caddr_t userlimit; 675 676 /* 677 * Use the user address. First verify that 678 * the address to be used is page aligned. 679 * Then make some simple bounds checks. 680 */ 681 if (((uintptr_t)*addrp & PAGEOFFSET) != 0) 682 return (EINVAL); 683 684 userlimit = flags & _MAP_LOW32 ? 685 (caddr_t)USERLIMIT32 : as->a_userlimit; 686 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) { 687 case RANGE_OKAY: 688 break; 689 case RANGE_BADPROT: 690 return (ENOTSUP); 691 case RANGE_BADADDR: 692 default: 693 return (ENOMEM); 694 } 695 } 696 697 698 /* 699 * Ok, now let the vnode map routine do its thing to set things up. 700 */ 701 error = VOP_MAP(vp, pos, as, 702 addrp, len, uprot, maxprot, flags, fp->f_cred); 703 704 if (error == 0) { 705 if (vp->v_type == VREG && 706 (flags & (MAP_TEXT | MAP_INITDATA)) != 0) { 707 /* 708 * Mark this as an executable vnode 709 */ 710 mutex_enter(&vp->v_lock); 711 vp->v_flag |= VVMEXEC; 712 mutex_exit(&vp->v_lock); 713 } 714 } 715 716 return (error); 717 } 718 719 #ifdef _LP64 720 /* 721 * LP64 mmap(2) system call: 64-bit offset, 64-bit address. 722 * 723 * The "large file" mmap routine mmap64(2) is also mapped to this routine 724 * by the 64-bit version of libc. 725 * 726 * Eventually, this should be the only version, and have smmap_common() 727 * folded back into it again. Some day. 728 */ 729 caddr_t 730 smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos) 731 { 732 struct file *fp; 733 int error; 734 735 if (flags & _MAP_LOW32) 736 error = EINVAL; 737 else if (fd == -1 && (flags & MAP_ANON) != 0) 738 error = smmap_common(&addr, len, prot, flags, 739 NULL, (offset_t)pos); 740 else if ((fp = getf(fd)) != NULL) { 741 error = smmap_common(&addr, len, prot, flags, 742 fp, (offset_t)pos); 743 releasef(fd); 744 } else 745 error = EBADF; 746 747 return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr); 748 } 749 #endif /* _LP64 */ 750 751 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 752 753 /* 754 * ILP32 mmap(2) system call: 32-bit offset, 32-bit address. 755 */ 756 caddr_t 757 smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos) 758 { 759 struct file *fp; 760 int error; 761 caddr_t a = (caddr_t)(uintptr_t)addr; 762 763 if (flags & _MAP_LOW32) 764 error = EINVAL; 765 else if (fd == -1 && (flags & MAP_ANON) != 0) 766 error = smmap_common(&a, (size_t)len, prot, 767 flags | _MAP_LOW32, NULL, (offset_t)pos); 768 else if ((fp = getf(fd)) != NULL) { 769 error = smmap_common(&a, (size_t)len, prot, 770 flags | _MAP_LOW32, fp, (offset_t)pos); 771 releasef(fd); 772 } else 773 error = EBADF; 774 775 ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX); 776 777 return (error ? (caddr_t)(uintptr_t)set_errno(error) : a); 778 } 779 780 /* 781 * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address. 782 * 783 * Now things really get ugly because we can't use the C-style 784 * calling convention for more than 6 args, and 64-bit parameter 785 * passing on 32-bit systems is less than clean. 786 */ 787 788 struct mmaplf32a { 789 caddr_t addr; 790 size_t len; 791 #ifdef _LP64 792 /* 793 * 32-bit contents, 64-bit cells 794 */ 795 uint64_t prot; 796 uint64_t flags; 797 uint64_t fd; 798 uint64_t offhi; 799 uint64_t offlo; 800 #else 801 /* 802 * 32-bit contents, 32-bit cells 803 */ 804 uint32_t prot; 805 uint32_t flags; 806 uint32_t fd; 807 uint32_t offhi; 808 uint32_t offlo; 809 #endif 810 }; 811 812 int 813 smmaplf32(struct mmaplf32a *uap, rval_t *rvp) 814 { 815 struct file *fp; 816 int error; 817 caddr_t a = uap->addr; 818 int flags = (int)uap->flags; 819 int fd = (int)uap->fd; 820 #ifdef _BIG_ENDIAN 821 offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo; 822 #else 823 offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi; 824 #endif 825 826 if (flags & _MAP_LOW32) 827 error = EINVAL; 828 else if (fd == -1 && (flags & MAP_ANON) != 0) 829 error = smmap_common(&a, uap->len, (int)uap->prot, 830 flags | _MAP_LOW32, NULL, off); 831 else if ((fp = getf(fd)) != NULL) { 832 error = smmap_common(&a, uap->len, (int)uap->prot, 833 flags | _MAP_LOW32, fp, off); 834 releasef(fd); 835 } else 836 error = EBADF; 837 838 if (error == 0) 839 rvp->r_val1 = (uintptr_t)a; 840 return (error); 841 } 842 843 #endif /* _SYSCALL32_IMPL || _ILP32 */ 844 845 int 846 munmap(caddr_t addr, size_t len) 847 { 848 struct proc *p = curproc; 849 struct as *as = p->p_as; 850 851 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) 852 return (set_errno(EINVAL)); 853 854 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) 855 return (set_errno(EINVAL)); 856 857 /* 858 * Discard lwpchan mappings. 859 */ 860 if (p->p_lcp != NULL) 861 lwpchan_delete_mapping(p, addr, addr + len); 862 if (as_unmap(as, addr, len) != 0) 863 return (set_errno(EINVAL)); 864 865 return (0); 866 } 867 868 int 869 mprotect(caddr_t addr, size_t len, int prot) 870 { 871 struct as *as = curproc->p_as; 872 uint_t uprot = prot | PROT_USER; 873 int error; 874 875 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) 876 return (set_errno(EINVAL)); 877 878 switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) { 879 case RANGE_OKAY: 880 break; 881 case RANGE_BADPROT: 882 return (set_errno(ENOTSUP)); 883 case RANGE_BADADDR: 884 default: 885 return (set_errno(ENOMEM)); 886 } 887 888 error = as_setprot(as, addr, len, uprot); 889 if (error) 890 return (set_errno(error)); 891 return (0); 892 } 893 894 #define MC_CACHE 128 /* internal result buffer */ 895 #define MC_QUANTUM (MC_CACHE * PAGESIZE) /* addresses covered in loop */ 896 897 int 898 mincore(caddr_t addr, size_t len, char *vecp) 899 { 900 struct as *as = curproc->p_as; 901 caddr_t ea; /* end address of loop */ 902 size_t rl; /* inner result length */ 903 char vec[MC_CACHE]; /* local vector cache */ 904 int error; 905 model_t model; 906 long llen; 907 908 model = get_udatamodel(); 909 /* 910 * Validate form of address parameters. 911 */ 912 if (model == DATAMODEL_NATIVE) { 913 llen = (long)len; 914 } else { 915 llen = (int32_t)(size32_t)len; 916 } 917 if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0) 918 return (set_errno(EINVAL)); 919 920 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) 921 return (set_errno(ENOMEM)); 922 923 /* 924 * Loop over subranges of interval [addr : addr + len), recovering 925 * results internally and then copying them out to caller. Subrange 926 * is based on the size of MC_CACHE, defined above. 927 */ 928 for (ea = addr + len; addr < ea; addr += MC_QUANTUM) { 929 error = as_incore(as, addr, 930 (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl); 931 if (rl != 0) { 932 rl = (rl + PAGESIZE - 1) / PAGESIZE; 933 if (copyout(vec, vecp, rl) != 0) 934 return (set_errno(EFAULT)); 935 vecp += rl; 936 } 937 if (error != 0) 938 return (set_errno(ENOMEM)); 939 } 940 return (0); 941 } 942