1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. */ 23 24 /* 25 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 30 /* All Rights Reserved */ 31 32 #include <sys/types.h> 33 #include <sys/inttypes.h> 34 #include <sys/param.h> 35 #include <sys/sysmacros.h> 36 #include <sys/systm.h> 37 #include <sys/signal.h> 38 #include <sys/user.h> 39 #include <sys/errno.h> 40 #include <sys/var.h> 41 #include <sys/proc.h> 42 #include <sys/tuneable.h> 43 #include <sys/debug.h> 44 #include <sys/cmn_err.h> 45 #include <sys/cred.h> 46 #include <sys/vnode.h> 47 #include <sys/vfs.h> 48 #include <sys/vm.h> 49 #include <sys/file.h> 50 #include <sys/mman.h> 51 #include <sys/vmparam.h> 52 #include <sys/fcntl.h> 53 #include <sys/lwpchan_impl.h> 54 #include <sys/nbmlock.h> 55 56 #include <vm/hat.h> 57 #include <vm/as.h> 58 #include <vm/seg.h> 59 #include <vm/seg_dev.h> 60 #include <vm/seg_vn.h> 61 62 int use_brk_lpg = 1; 63 int use_stk_lpg = 1; 64 65 /* 66 * If set, we will not randomize mappings where the 'addr' argument is 67 * non-NULL and not an alignment. 68 */ 69 int aslr_respect_mmap_hint = 1; 70 71 static int brk_lpg(caddr_t nva); 72 static int grow_lpg(caddr_t sp); 73 74 intptr_t 75 brk(caddr_t nva) 76 { 77 int error; 78 proc_t *p = curproc; 79 80 /* 81 * Serialize brk operations on an address space. 82 * This also serves as the lock protecting p_brksize 83 * and p_brkpageszc. 84 */ 85 as_rangelock(p->p_as); 86 87 /* 88 * As a special case to aid the implementation of sbrk(3C), if given a 89 * new brk of 0, return the current brk. We'll hide this in brk(3C). 90 */ 91 if (nva == 0) { 92 intptr_t base = (intptr_t)(p->p_brkbase + p->p_brksize); 93 as_rangeunlock(p->p_as); 94 return (base); 95 } 96 97 if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) { 98 error = brk_lpg(nva); 99 } else { 100 error = brk_internal(nva, p->p_brkpageszc); 101 } 102 as_rangeunlock(p->p_as); 103 return ((error != 0 ? set_errno(error) : 0)); 104 } 105 106 /* 107 * Algorithm: call arch-specific map_pgsz to get best page size to use, 108 * then call brk_internal(). 109 * Returns 0 on success. 110 */ 111 static int 112 brk_lpg(caddr_t nva) 113 { 114 struct proc *p = curproc; 115 size_t pgsz, len; 116 caddr_t addr, brkend; 117 caddr_t bssbase = p->p_bssbase; 118 caddr_t brkbase = p->p_brkbase; 119 int oszc, szc; 120 int err; 121 122 oszc = p->p_brkpageszc; 123 124 /* 125 * If p_brkbase has not yet been set, the first call 126 * to brk_internal() will initialize it. 127 */ 128 if (brkbase == 0) { 129 return (brk_internal(nva, oszc)); 130 } 131 132 len = nva - bssbase; 133 134 pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0); 135 szc = page_szc(pgsz); 136 137 /* 138 * Covers two cases: 139 * 1. page_szc() returns -1 for invalid page size, so we want to 140 * ignore it in that case. 141 * 2. By design we never decrease page size, as it is more stable. 142 */ 143 if (szc <= oszc) { 144 err = brk_internal(nva, oszc); 145 /* If failed, back off to base page size. */ 146 if (err != 0 && oszc != 0) { 147 err = brk_internal(nva, 0); 148 } 149 return (err); 150 } 151 152 err = brk_internal(nva, szc); 153 /* If using szc failed, map with base page size and return. */ 154 if (err != 0) { 155 if (szc != 0) { 156 err = brk_internal(nva, 0); 157 } 158 return (err); 159 } 160 161 /* 162 * Round up brk base to a large page boundary and remap 163 * anything in the segment already faulted in beyond that 164 * point. 165 */ 166 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz); 167 brkend = brkbase + p->p_brksize; 168 len = brkend - addr; 169 /* Check that len is not negative. Update page size code for heap. */ 170 if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) { 171 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); 172 p->p_brkpageszc = szc; 173 } 174 175 ASSERT(err == 0); 176 return (err); /* should always be 0 */ 177 } 178 179 /* 180 * Returns 0 on success. 181 */ 182 int 183 brk_internal(caddr_t nva, uint_t brkszc) 184 { 185 caddr_t ova; /* current break address */ 186 size_t size; 187 int error; 188 struct proc *p = curproc; 189 struct as *as = p->p_as; 190 size_t pgsz; 191 uint_t szc; 192 rctl_qty_t as_rctl; 193 194 /* 195 * extend heap to brkszc alignment but use current p->p_brkpageszc 196 * for the newly created segment. This allows the new extension 197 * segment to be concatenated successfully with the existing brk 198 * segment. 199 */ 200 if ((szc = brkszc) != 0) { 201 pgsz = page_get_pagesize(szc); 202 ASSERT(pgsz > PAGESIZE); 203 } else { 204 pgsz = PAGESIZE; 205 } 206 207 mutex_enter(&p->p_lock); 208 as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA], 209 p->p_rctls, p); 210 mutex_exit(&p->p_lock); 211 212 /* 213 * If p_brkbase has not yet been set, the first call 214 * to brk() will initialize it. 215 */ 216 if (p->p_brkbase == 0) 217 p->p_brkbase = nva; 218 219 /* 220 * Before multiple page size support existed p_brksize was the value 221 * not rounded to the pagesize (i.e. it stored the exact user request 222 * for heap size). If pgsz is greater than PAGESIZE calculate the 223 * heap size as the real new heap size by rounding it up to pgsz. 224 * This is useful since we may want to know where the heap ends 225 * without knowing heap pagesize (e.g. some old code) and also if 226 * heap pagesize changes we can update p_brkpageszc but delay adding 227 * new mapping yet still know from p_brksize where the heap really 228 * ends. The user requested heap end is stored in libc variable. 229 */ 230 if (pgsz > PAGESIZE) { 231 caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); 232 size = tnva - p->p_brkbase; 233 if (tnva < p->p_brkbase || (size > p->p_brksize && 234 size > (size_t)as_rctl)) { 235 szc = 0; 236 pgsz = PAGESIZE; 237 size = nva - p->p_brkbase; 238 } 239 } else { 240 size = nva - p->p_brkbase; 241 } 242 243 /* 244 * use PAGESIZE to roundup ova because we want to know the real value 245 * of the current heap end in case p_brkpageszc changes since the last 246 * p_brksize was computed. 247 */ 248 nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); 249 ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize), 250 PAGESIZE); 251 252 if ((nva < p->p_brkbase) || (size > p->p_brksize && 253 size > as_rctl)) { 254 mutex_enter(&p->p_lock); 255 (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p, 256 RCA_SAFE); 257 mutex_exit(&p->p_lock); 258 return (ENOMEM); 259 } 260 261 if (nva > ova) { 262 struct segvn_crargs crargs = 263 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 264 265 if (!(p->p_datprot & PROT_EXEC)) { 266 crargs.prot &= ~PROT_EXEC; 267 } 268 269 /* 270 * Add new zfod mapping to extend UNIX data segment 271 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies 272 * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate 273 * page sizes if ova is not aligned to szc's pgsz. 274 */ 275 if (szc > 0) { 276 caddr_t rbss; 277 278 rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, 279 pgsz); 280 if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) { 281 crargs.szc = p->p_brkpageszc ? p->p_brkpageszc : 282 AS_MAP_NO_LPOOB; 283 } else if (ova == rbss) { 284 crargs.szc = szc; 285 } else { 286 crargs.szc = AS_MAP_HEAP; 287 } 288 } else { 289 crargs.szc = AS_MAP_NO_LPOOB; 290 } 291 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP; 292 error = as_map(as, ova, (size_t)(nva - ova), segvn_create, 293 &crargs); 294 if (error) { 295 return (error); 296 } 297 298 } else if (nva < ova) { 299 /* 300 * Release mapping to shrink UNIX data segment. 301 */ 302 (void) as_unmap(as, nva, (size_t)(ova - nva)); 303 } 304 p->p_brksize = size; 305 return (0); 306 } 307 308 /* 309 * Grow the stack to include sp. Return 1 if successful, 0 otherwise. 310 * This routine assumes that the stack grows downward. 311 */ 312 int 313 grow(caddr_t sp) 314 { 315 struct proc *p = curproc; 316 struct as *as = p->p_as; 317 size_t oldsize = p->p_stksize; 318 size_t newsize; 319 int err; 320 321 /* 322 * Serialize grow operations on an address space. 323 * This also serves as the lock protecting p_stksize 324 * and p_stkpageszc. 325 */ 326 as_rangelock(as); 327 if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) { 328 err = grow_lpg(sp); 329 } else { 330 err = grow_internal(sp, p->p_stkpageszc); 331 } 332 as_rangeunlock(as); 333 334 if (err == 0 && (newsize = p->p_stksize) > oldsize) { 335 ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE)); 336 ASSERT(IS_P2ALIGNED(newsize, PAGESIZE)); 337 /* 338 * Set up translations so the process doesn't have to fault in 339 * the stack pages we just gave it. 340 */ 341 (void) as_fault(as->a_hat, as, p->p_usrstack - newsize, 342 newsize - oldsize, F_INVAL, S_WRITE); 343 } 344 return ((err == 0 ? 1 : 0)); 345 } 346 347 /* 348 * Algorithm: call arch-specific map_pgsz to get best page size to use, 349 * then call grow_internal(). 350 * Returns 0 on success. 351 */ 352 static int 353 grow_lpg(caddr_t sp) 354 { 355 struct proc *p = curproc; 356 size_t pgsz; 357 size_t len, newsize; 358 caddr_t addr, saddr; 359 caddr_t growend; 360 int oszc, szc; 361 int err; 362 363 newsize = p->p_usrstack - sp; 364 365 oszc = p->p_stkpageszc; 366 pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0); 367 szc = page_szc(pgsz); 368 369 /* 370 * Covers two cases: 371 * 1. page_szc() returns -1 for invalid page size, so we want to 372 * ignore it in that case. 373 * 2. By design we never decrease page size, as it is more stable. 374 * This shouldn't happen as the stack never shrinks. 375 */ 376 if (szc <= oszc) { 377 err = grow_internal(sp, oszc); 378 /* failed, fall back to base page size */ 379 if (err != 0 && oszc != 0) { 380 err = grow_internal(sp, 0); 381 } 382 return (err); 383 } 384 385 /* 386 * We've grown sufficiently to switch to a new page size. 387 * So we are going to remap the whole segment with the new page size. 388 */ 389 err = grow_internal(sp, szc); 390 /* The grow with szc failed, so fall back to base page size. */ 391 if (err != 0) { 392 if (szc != 0) { 393 err = grow_internal(sp, 0); 394 } 395 return (err); 396 } 397 398 /* 399 * Round up stack pointer to a large page boundary and remap 400 * any pgsz pages in the segment already faulted in beyond that 401 * point. 402 */ 403 saddr = p->p_usrstack - p->p_stksize; 404 addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz); 405 growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz); 406 len = growend - addr; 407 /* Check that len is not negative. Update page size code for stack. */ 408 if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) { 409 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); 410 p->p_stkpageszc = szc; 411 } 412 413 ASSERT(err == 0); 414 return (err); /* should always be 0 */ 415 } 416 417 /* 418 * This routine assumes that the stack grows downward. 419 * Returns 0 on success, errno on failure. 420 */ 421 int 422 grow_internal(caddr_t sp, uint_t growszc) 423 { 424 struct proc *p = curproc; 425 size_t newsize; 426 size_t oldsize; 427 int error; 428 size_t pgsz; 429 uint_t szc; 430 struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 431 432 ASSERT(sp < p->p_usrstack); 433 sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE); 434 435 /* 436 * grow to growszc alignment but use current p->p_stkpageszc for 437 * the segvn_crargs szc passed to segvn_create. For memcntl to 438 * increase the szc, this allows the new extension segment to be 439 * concatenated successfully with the existing stack segment. 440 */ 441 if ((szc = growszc) != 0) { 442 pgsz = page_get_pagesize(szc); 443 ASSERT(pgsz > PAGESIZE); 444 newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz); 445 if (newsize > (size_t)p->p_stk_ctl) { 446 szc = 0; 447 pgsz = PAGESIZE; 448 newsize = p->p_usrstack - sp; 449 } 450 } else { 451 pgsz = PAGESIZE; 452 newsize = p->p_usrstack - sp; 453 } 454 455 if (newsize > (size_t)p->p_stk_ctl) { 456 (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p, 457 RCA_UNSAFE_ALL); 458 459 return (ENOMEM); 460 } 461 462 oldsize = p->p_stksize; 463 ASSERT(P2PHASE(oldsize, PAGESIZE) == 0); 464 465 if (newsize <= oldsize) { /* prevent the stack from shrinking */ 466 return (0); 467 } 468 469 if (!(p->p_stkprot & PROT_EXEC)) { 470 crargs.prot &= ~PROT_EXEC; 471 } 472 /* 473 * extend stack with the proposed new growszc, which is different 474 * than p_stkpageszc only on a memcntl to increase the stack pagesize. 475 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via 476 * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes 477 * if not aligned to szc's pgsz. 478 */ 479 if (szc > 0) { 480 caddr_t oldsp = p->p_usrstack - oldsize; 481 caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, 482 pgsz); 483 484 if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) { 485 crargs.szc = p->p_stkpageszc ? p->p_stkpageszc : 486 AS_MAP_NO_LPOOB; 487 } else if (oldsp == austk) { 488 crargs.szc = szc; 489 } else { 490 crargs.szc = AS_MAP_STACK; 491 } 492 } else { 493 crargs.szc = AS_MAP_NO_LPOOB; 494 } 495 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN; 496 497 if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize, 498 segvn_create, &crargs)) != 0) { 499 if (error == EAGAIN) { 500 cmn_err(CE_WARN, "Sorry, no swap space to grow stack " 501 "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm); 502 } 503 return (error); 504 } 505 p->p_stksize = newsize; 506 return (0); 507 } 508 509 /* 510 * Find address for user to map. If MAP_FIXED is not specified, we can pick 511 * any address we want, but we will first try the value in *addrp if it is 512 * non-NULL and _MAP_RANDOMIZE is not set. Thus this is implementing a way to 513 * try and get a preferred address. 514 */ 515 int 516 choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off, 517 int vacalign, uint_t flags) 518 { 519 caddr_t basep = (caddr_t)(uintptr_t)((uintptr_t)*addrp & PAGEMASK); 520 size_t lenp = len; 521 522 ASSERT(AS_ISCLAIMGAP(as)); /* searches should be serialized */ 523 if (flags & MAP_FIXED) { 524 (void) as_unmap(as, *addrp, len); 525 return (0); 526 } else if (basep != NULL && 527 ((flags & (MAP_ALIGN | _MAP_RANDOMIZE)) == 0) && 528 !as_gap(as, len, &basep, &lenp, 0, *addrp)) { 529 /* User supplied address was available */ 530 *addrp = basep; 531 } else { 532 /* 533 * No user supplied address or the address supplied was not 534 * available. 535 */ 536 map_addr(addrp, len, off, vacalign, flags); 537 } 538 if (*addrp == NULL) 539 return (ENOMEM); 540 return (0); 541 } 542 543 544 /* 545 * Used for MAP_ANON - fast way to get anonymous pages 546 */ 547 static int 548 zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags, 549 offset_t pos) 550 { 551 struct segvn_crargs vn_a; 552 int error; 553 554 if (((PROT_ALL & uprot) != uprot)) 555 return (EACCES); 556 557 if ((flags & MAP_FIXED) != 0) { 558 caddr_t userlimit; 559 560 /* 561 * Use the user address. First verify that 562 * the address to be used is page aligned. 563 * Then make some simple bounds checks. 564 */ 565 if (((uintptr_t)*addrp & PAGEOFFSET) != 0) 566 return (EINVAL); 567 568 userlimit = flags & _MAP_LOW32 ? 569 (caddr_t)USERLIMIT32 : as->a_userlimit; 570 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) { 571 case RANGE_OKAY: 572 break; 573 case RANGE_BADPROT: 574 return (ENOTSUP); 575 case RANGE_BADADDR: 576 default: 577 return (ENOMEM); 578 } 579 } 580 /* 581 * No need to worry about vac alignment for anonymous 582 * pages since this is a "clone" object that doesn't 583 * yet exist. 584 */ 585 error = choose_addr(as, addrp, len, pos, ADDR_NOVACALIGN, flags); 586 if (error != 0) { 587 return (error); 588 } 589 590 /* 591 * Use the seg_vn segment driver; passing in the NULL amp 592 * gives the desired "cloning" effect. 593 */ 594 vn_a.vp = NULL; 595 vn_a.offset = 0; 596 vn_a.type = flags & MAP_TYPE; 597 vn_a.prot = uprot; 598 vn_a.maxprot = PROT_ALL; 599 vn_a.flags = flags & ~MAP_TYPE; 600 vn_a.cred = CRED(); 601 vn_a.amp = NULL; 602 vn_a.szc = 0; 603 vn_a.lgrp_mem_policy_flags = 0; 604 605 return (as_map(as, *addrp, len, segvn_create, &vn_a)); 606 } 607 608 #define RANDOMIZABLE_MAPPING(addr, flags) (((flags & MAP_FIXED) == 0) && \ 609 !(((flags & MAP_ALIGN) == 0) && (addr != 0) && aslr_respect_mmap_hint)) 610 611 static int 612 smmap_common(caddr_t *addrp, size_t len, 613 int prot, int flags, struct file *fp, offset_t pos) 614 { 615 struct vnode *vp; 616 struct as *as = curproc->p_as; 617 uint_t uprot, maxprot, type; 618 int error; 619 int in_crit = 0; 620 621 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW | 622 _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN | 623 MAP_TEXT | MAP_INITDATA)) != 0) { 624 /* | MAP_RENAME */ /* not implemented, let user know */ 625 return (EINVAL); 626 } 627 628 if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) { 629 return (EINVAL); 630 } 631 632 if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) { 633 return (EINVAL); 634 } 635 636 if ((flags & (MAP_FIXED | _MAP_RANDOMIZE)) == 637 (MAP_FIXED | _MAP_RANDOMIZE)) { 638 return (EINVAL); 639 } 640 641 /* 642 * If it's not a fixed allocation and mmap ASLR is enabled, randomize 643 * it. 644 */ 645 if (RANDOMIZABLE_MAPPING(*addrp, flags) && 646 secflag_enabled(curproc, PROC_SEC_ASLR)) 647 flags |= _MAP_RANDOMIZE; 648 649 #if defined(__sparc) 650 /* 651 * See if this is an "old mmap call". If so, remember this 652 * fact and convert the flags value given to mmap to indicate 653 * the specified address in the system call must be used. 654 * _MAP_NEW is turned set by all new uses of mmap. 655 */ 656 if ((flags & _MAP_NEW) == 0) 657 flags |= MAP_FIXED; 658 #endif 659 flags &= ~_MAP_NEW; 660 661 type = flags & MAP_TYPE; 662 if (type != MAP_PRIVATE && type != MAP_SHARED) 663 return (EINVAL); 664 665 666 if (flags & MAP_ALIGN) { 667 if (flags & MAP_FIXED) 668 return (EINVAL); 669 670 /* alignment needs to be a power of 2 >= page size */ 671 if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) || 672 !ISP2((uintptr_t)*addrp)) 673 return (EINVAL); 674 } 675 /* 676 * Check for bad lengths and file position. 677 * We let the VOP_MAP routine check for negative lengths 678 * since on some vnode types this might be appropriate. 679 */ 680 if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0) 681 return (EINVAL); 682 683 maxprot = PROT_ALL; /* start out allowing all accesses */ 684 uprot = prot | PROT_USER; 685 686 if (fp == NULL) { 687 ASSERT(flags & MAP_ANON); 688 /* discard lwpchan mappings, like munmap() */ 689 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL) 690 lwpchan_delete_mapping(curproc, *addrp, *addrp + len); 691 as_rangelock(as); 692 error = zmap(as, addrp, len, uprot, flags, pos); 693 as_rangeunlock(as); 694 /* 695 * Tell machine specific code that lwp has mapped shared memory 696 */ 697 if (error == 0 && (flags & MAP_SHARED)) { 698 /* EMPTY */ 699 LWP_MMODEL_SHARED_AS(*addrp, len); 700 } 701 return (error); 702 } else if ((flags & MAP_ANON) != 0) 703 return (EINVAL); 704 705 vp = fp->f_vnode; 706 707 /* Can't execute code from "noexec" mounted filesystem. */ 708 if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0) 709 maxprot &= ~PROT_EXEC; 710 711 /* 712 * These checks were added as part of large files. 713 * 714 * Return ENXIO if the initial position is negative; return EOVERFLOW 715 * if (offset + len) would overflow the maximum allowed offset for the 716 * type of file descriptor being used. 717 */ 718 if (vp->v_type == VREG) { 719 if (pos < 0) 720 return (ENXIO); 721 if ((offset_t)len > (OFFSET_MAX(fp) - pos)) 722 return (EOVERFLOW); 723 } 724 725 if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) { 726 /* no write access allowed */ 727 maxprot &= ~PROT_WRITE; 728 } 729 730 /* 731 * XXX - Do we also adjust maxprot based on protections 732 * of the vnode? E.g. if no execute permission is given 733 * on the vnode for the current user, maxprot probably 734 * should disallow PROT_EXEC also? This is different 735 * from the write access as this would be a per vnode 736 * test as opposed to a per fd test for writability. 737 */ 738 739 /* 740 * Verify that the specified protections are not greater than 741 * the maximum allowable protections. Also test to make sure 742 * that the file descriptor does allows for read access since 743 * "write only" mappings are hard to do since normally we do 744 * the read from the file before the page can be written. 745 */ 746 if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0) 747 return (EACCES); 748 749 /* 750 * If the user specified an address, do some simple checks here 751 */ 752 if ((flags & MAP_FIXED) != 0) { 753 caddr_t userlimit; 754 755 /* 756 * Use the user address. First verify that 757 * the address to be used is page aligned. 758 * Then make some simple bounds checks. 759 */ 760 if (((uintptr_t)*addrp & PAGEOFFSET) != 0) 761 return (EINVAL); 762 763 userlimit = flags & _MAP_LOW32 ? 764 (caddr_t)USERLIMIT32 : as->a_userlimit; 765 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) { 766 case RANGE_OKAY: 767 break; 768 case RANGE_BADPROT: 769 return (ENOTSUP); 770 case RANGE_BADADDR: 771 default: 772 return (ENOMEM); 773 } 774 } 775 776 if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) && 777 nbl_need_check(vp)) { 778 int svmand; 779 nbl_op_t nop; 780 781 nbl_start_crit(vp, RW_READER); 782 in_crit = 1; 783 error = nbl_svmand(vp, fp->f_cred, &svmand); 784 if (error != 0) 785 goto done; 786 if ((prot & PROT_WRITE) && (type == MAP_SHARED)) { 787 if (prot & (PROT_READ | PROT_EXEC)) { 788 nop = NBL_READWRITE; 789 } else { 790 nop = NBL_WRITE; 791 } 792 } else { 793 nop = NBL_READ; 794 } 795 if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) { 796 error = EACCES; 797 goto done; 798 } 799 } 800 801 /* discard lwpchan mappings, like munmap() */ 802 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL) 803 lwpchan_delete_mapping(curproc, *addrp, *addrp + len); 804 805 /* 806 * Ok, now let the vnode map routine do its thing to set things up. 807 */ 808 error = VOP_MAP(vp, pos, as, 809 addrp, len, uprot, maxprot, flags, fp->f_cred, NULL); 810 811 if (error == 0) { 812 /* 813 * Tell machine specific code that lwp has mapped shared memory 814 */ 815 if (flags & MAP_SHARED) { 816 /* EMPTY */ 817 LWP_MMODEL_SHARED_AS(*addrp, len); 818 } 819 if (vp->v_type == VREG && 820 (flags & (MAP_TEXT | MAP_INITDATA)) != 0) { 821 /* 822 * Mark this as an executable vnode 823 */ 824 mutex_enter(&vp->v_lock); 825 vp->v_flag |= VVMEXEC; 826 mutex_exit(&vp->v_lock); 827 } 828 } 829 830 done: 831 if (in_crit) 832 nbl_end_crit(vp); 833 return (error); 834 } 835 836 #ifdef _LP64 837 /* 838 * LP64 mmap(2) system call: 64-bit offset, 64-bit address. 839 * 840 * The "large file" mmap routine mmap64(2) is also mapped to this routine 841 * by the 64-bit version of libc. 842 * 843 * Eventually, this should be the only version, and have smmap_common() 844 * folded back into it again. Some day. 845 */ 846 caddr_t 847 smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos) 848 { 849 struct file *fp; 850 int error; 851 852 if (fd == -1 && (flags & MAP_ANON) != 0) 853 error = smmap_common(&addr, len, prot, flags, 854 NULL, (offset_t)pos); 855 else if ((fp = getf(fd)) != NULL) { 856 error = smmap_common(&addr, len, prot, flags, 857 fp, (offset_t)pos); 858 releasef(fd); 859 } else 860 error = EBADF; 861 862 return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr); 863 } 864 #endif /* _LP64 */ 865 866 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 867 868 /* 869 * ILP32 mmap(2) system call: 32-bit offset, 32-bit address. 870 */ 871 caddr_t 872 smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos) 873 { 874 struct file *fp; 875 int error; 876 caddr_t a = (caddr_t)(uintptr_t)addr; 877 878 if (flags & _MAP_LOW32) 879 error = EINVAL; 880 else if (fd == -1 && (flags & MAP_ANON) != 0) 881 error = smmap_common(&a, (size_t)len, prot, 882 flags | _MAP_LOW32, NULL, (offset_t)pos); 883 else if ((fp = getf(fd)) != NULL) { 884 error = smmap_common(&a, (size_t)len, prot, 885 flags | _MAP_LOW32, fp, (offset_t)pos); 886 releasef(fd); 887 } else 888 error = EBADF; 889 890 ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX); 891 892 return (error ? (caddr_t)(uintptr_t)set_errno(error) : a); 893 } 894 895 /* 896 * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address. 897 * 898 * Now things really get ugly because we can't use the C-style 899 * calling convention for more than 6 args, and 64-bit parameter 900 * passing on 32-bit systems is less than clean. 901 */ 902 903 struct mmaplf32a { 904 caddr_t addr; 905 size_t len; 906 #ifdef _LP64 907 /* 908 * 32-bit contents, 64-bit cells 909 */ 910 uint64_t prot; 911 uint64_t flags; 912 uint64_t fd; 913 uint64_t offhi; 914 uint64_t offlo; 915 #else 916 /* 917 * 32-bit contents, 32-bit cells 918 */ 919 uint32_t prot; 920 uint32_t flags; 921 uint32_t fd; 922 uint32_t offhi; 923 uint32_t offlo; 924 #endif 925 }; 926 927 int 928 smmaplf32(struct mmaplf32a *uap, rval_t *rvp) 929 { 930 struct file *fp; 931 int error; 932 caddr_t a = uap->addr; 933 int flags = (int)uap->flags; 934 int fd = (int)uap->fd; 935 #ifdef _BIG_ENDIAN 936 offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo; 937 #else 938 offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi; 939 #endif 940 941 if (flags & _MAP_LOW32) 942 error = EINVAL; 943 else if (fd == -1 && (flags & MAP_ANON) != 0) 944 error = smmap_common(&a, uap->len, (int)uap->prot, 945 flags | _MAP_LOW32, NULL, off); 946 else if ((fp = getf(fd)) != NULL) { 947 error = smmap_common(&a, uap->len, (int)uap->prot, 948 flags | _MAP_LOW32, fp, off); 949 releasef(fd); 950 } else 951 error = EBADF; 952 953 if (error == 0) 954 rvp->r_val1 = (uintptr_t)a; 955 return (error); 956 } 957 958 #endif /* _SYSCALL32_IMPL || _ILP32 */ 959 960 int 961 munmap(caddr_t addr, size_t len) 962 { 963 struct proc *p = curproc; 964 struct as *as = p->p_as; 965 966 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) 967 return (set_errno(EINVAL)); 968 969 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) 970 return (set_errno(EINVAL)); 971 972 /* 973 * Discard lwpchan mappings. 974 */ 975 if (p->p_lcp != NULL) 976 lwpchan_delete_mapping(p, addr, addr + len); 977 if (as_unmap(as, addr, len) != 0) 978 return (set_errno(EINVAL)); 979 980 return (0); 981 } 982 983 int 984 mprotect(caddr_t addr, size_t len, int prot) 985 { 986 struct as *as = curproc->p_as; 987 uint_t uprot = prot | PROT_USER; 988 int error; 989 990 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) 991 return (set_errno(EINVAL)); 992 993 switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) { 994 case RANGE_OKAY: 995 break; 996 case RANGE_BADPROT: 997 return (set_errno(ENOTSUP)); 998 case RANGE_BADADDR: 999 default: 1000 return (set_errno(ENOMEM)); 1001 } 1002 1003 error = as_setprot(as, addr, len, uprot); 1004 if (error) 1005 return (set_errno(error)); 1006 return (0); 1007 } 1008 1009 #define MC_CACHE 128 /* internal result buffer */ 1010 #define MC_QUANTUM (MC_CACHE * PAGESIZE) /* addresses covered in loop */ 1011 1012 int 1013 mincore(caddr_t addr, size_t len, char *vecp) 1014 { 1015 struct as *as = curproc->p_as; 1016 caddr_t ea; /* end address of loop */ 1017 size_t rl; /* inner result length */ 1018 char vec[MC_CACHE]; /* local vector cache */ 1019 int error; 1020 model_t model; 1021 long llen; 1022 1023 model = get_udatamodel(); 1024 /* 1025 * Validate form of address parameters. 1026 */ 1027 if (model == DATAMODEL_NATIVE) { 1028 llen = (long)len; 1029 } else { 1030 llen = (int32_t)(size32_t)len; 1031 } 1032 if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0) 1033 return (set_errno(EINVAL)); 1034 1035 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) 1036 return (set_errno(ENOMEM)); 1037 1038 /* 1039 * Loop over subranges of interval [addr : addr + len), recovering 1040 * results internally and then copying them out to caller. Subrange 1041 * is based on the size of MC_CACHE, defined above. 1042 */ 1043 for (ea = addr + len; addr < ea; addr += MC_QUANTUM) { 1044 error = as_incore(as, addr, 1045 (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl); 1046 if (rl != 0) { 1047 rl = (rl + PAGESIZE - 1) / PAGESIZE; 1048 if (copyout(vec, vecp, rl) != 0) 1049 return (set_errno(EFAULT)); 1050 vecp += rl; 1051 } 1052 if (error != 0) 1053 return (set_errno(ENOMEM)); 1054 } 1055 return (0); 1056 } 1057