1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. 24 * Copyright 2017 Joyent, Inc. 25 */ 26 27 /* 28 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 29 * Use is subject to license terms. 30 */ 31 32 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 33 /* All Rights Reserved */ 34 35 #include <sys/types.h> 36 #include <sys/inttypes.h> 37 #include <sys/param.h> 38 #include <sys/sysmacros.h> 39 #include <sys/systm.h> 40 #include <sys/signal.h> 41 #include <sys/user.h> 42 #include <sys/errno.h> 43 #include <sys/var.h> 44 #include <sys/proc.h> 45 #include <sys/tuneable.h> 46 #include <sys/debug.h> 47 #include <sys/cmn_err.h> 48 #include <sys/cred.h> 49 #include <sys/vnode.h> 50 #include <sys/vfs.h> 51 #include <sys/vm.h> 52 #include <sys/file.h> 53 #include <sys/mman.h> 54 #include <sys/vmparam.h> 55 #include <sys/fcntl.h> 56 #include <sys/lwpchan_impl.h> 57 #include <sys/nbmlock.h> 58 59 #include <vm/hat.h> 60 #include <vm/as.h> 61 #include <vm/seg.h> 62 #include <vm/seg_dev.h> 63 #include <vm/seg_vn.h> 64 65 int use_brk_lpg = 1; 66 int use_stk_lpg = 1; 67 68 /* 69 * If set, we will not randomize mappings where the 'addr' argument is 70 * non-NULL and not an alignment. 71 */ 72 int aslr_respect_mmap_hint = 1; 73 74 static int brk_lpg(caddr_t nva); 75 static int grow_lpg(caddr_t sp); 76 77 intptr_t 78 brk(caddr_t nva) 79 { 80 int error; 81 proc_t *p = curproc; 82 83 /* 84 * Serialize brk operations on an address space. 85 * This also serves as the lock protecting p_brksize 86 * and p_brkpageszc. 87 */ 88 as_rangelock(p->p_as); 89 90 /* 91 * As a special case to aid the implementation of sbrk(3C), if given a 92 * new brk of 0, return the current brk. We'll hide this in brk(3C). 93 */ 94 if (nva == 0) { 95 intptr_t base = (intptr_t)(p->p_brkbase + p->p_brksize); 96 as_rangeunlock(p->p_as); 97 return (base); 98 } 99 100 if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) { 101 error = brk_lpg(nva); 102 } else { 103 error = brk_internal(nva, p->p_brkpageszc); 104 } 105 as_rangeunlock(p->p_as); 106 return ((error != 0 ? set_errno(error) : 0)); 107 } 108 109 /* 110 * Algorithm: call arch-specific map_pgsz to get best page size to use, 111 * then call brk_internal(). 112 * Returns 0 on success. 113 */ 114 static int 115 brk_lpg(caddr_t nva) 116 { 117 struct proc *p = curproc; 118 size_t pgsz, len; 119 caddr_t addr, brkend; 120 caddr_t bssbase = p->p_bssbase; 121 caddr_t brkbase = p->p_brkbase; 122 int oszc, szc; 123 int err; 124 125 oszc = p->p_brkpageszc; 126 127 /* 128 * If p_brkbase has not yet been set, the first call 129 * to brk_internal() will initialize it. 130 */ 131 if (brkbase == 0) { 132 return (brk_internal(nva, oszc)); 133 } 134 135 len = nva - bssbase; 136 137 pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0); 138 szc = page_szc(pgsz); 139 140 /* 141 * Covers two cases: 142 * 1. page_szc() returns -1 for invalid page size, so we want to 143 * ignore it in that case. 144 * 2. By design we never decrease page size, as it is more stable. 145 */ 146 if (szc <= oszc) { 147 err = brk_internal(nva, oszc); 148 /* If failed, back off to base page size. */ 149 if (err != 0 && oszc != 0) { 150 err = brk_internal(nva, 0); 151 } 152 return (err); 153 } 154 155 err = brk_internal(nva, szc); 156 /* If using szc failed, map with base page size and return. */ 157 if (err != 0) { 158 if (szc != 0) { 159 err = brk_internal(nva, 0); 160 } 161 return (err); 162 } 163 164 /* 165 * Round up brk base to a large page boundary and remap 166 * anything in the segment already faulted in beyond that 167 * point. 168 */ 169 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz); 170 brkend = brkbase + p->p_brksize; 171 len = brkend - addr; 172 /* Check that len is not negative. Update page size code for heap. */ 173 if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) { 174 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); 175 p->p_brkpageszc = szc; 176 } 177 178 ASSERT(err == 0); 179 return (err); /* should always be 0 */ 180 } 181 182 /* 183 * Returns 0 on success. 184 */ 185 int 186 brk_internal(caddr_t nva, uint_t brkszc) 187 { 188 caddr_t ova; /* current break address */ 189 size_t size; 190 int error; 191 struct proc *p = curproc; 192 struct as *as = p->p_as; 193 size_t pgsz; 194 uint_t szc; 195 rctl_qty_t as_rctl; 196 197 /* 198 * extend heap to brkszc alignment but use current p->p_brkpageszc 199 * for the newly created segment. This allows the new extension 200 * segment to be concatenated successfully with the existing brk 201 * segment. 202 */ 203 if ((szc = brkszc) != 0) { 204 pgsz = page_get_pagesize(szc); 205 ASSERT(pgsz > PAGESIZE); 206 } else { 207 pgsz = PAGESIZE; 208 } 209 210 mutex_enter(&p->p_lock); 211 as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA], 212 p->p_rctls, p); 213 mutex_exit(&p->p_lock); 214 215 /* 216 * If p_brkbase has not yet been set, the first call 217 * to brk() will initialize it. 218 */ 219 if (p->p_brkbase == 0) 220 p->p_brkbase = nva; 221 222 /* 223 * Before multiple page size support existed p_brksize was the value 224 * not rounded to the pagesize (i.e. it stored the exact user request 225 * for heap size). If pgsz is greater than PAGESIZE calculate the 226 * heap size as the real new heap size by rounding it up to pgsz. 227 * This is useful since we may want to know where the heap ends 228 * without knowing heap pagesize (e.g. some old code) and also if 229 * heap pagesize changes we can update p_brkpageszc but delay adding 230 * new mapping yet still know from p_brksize where the heap really 231 * ends. The user requested heap end is stored in libc variable. 232 */ 233 if (pgsz > PAGESIZE) { 234 caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); 235 size = tnva - p->p_brkbase; 236 if (tnva < p->p_brkbase || (size > p->p_brksize && 237 size > (size_t)as_rctl)) { 238 szc = 0; 239 pgsz = PAGESIZE; 240 size = nva - p->p_brkbase; 241 } 242 } else { 243 size = nva - p->p_brkbase; 244 } 245 246 /* 247 * use PAGESIZE to roundup ova because we want to know the real value 248 * of the current heap end in case p_brkpageszc changes since the last 249 * p_brksize was computed. 250 */ 251 nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); 252 ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize), 253 PAGESIZE); 254 255 if ((nva < p->p_brkbase) || (size > p->p_brksize && 256 size > as_rctl)) { 257 mutex_enter(&p->p_lock); 258 (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p, 259 RCA_SAFE); 260 mutex_exit(&p->p_lock); 261 return (ENOMEM); 262 } 263 264 if (nva > ova) { 265 struct segvn_crargs crargs = 266 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 267 268 if (!(p->p_datprot & PROT_EXEC)) { 269 crargs.prot &= ~PROT_EXEC; 270 } 271 272 /* 273 * Add new zfod mapping to extend UNIX data segment 274 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies 275 * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate 276 * page sizes if ova is not aligned to szc's pgsz. 277 */ 278 if (szc > 0) { 279 caddr_t rbss; 280 281 rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, 282 pgsz); 283 if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) { 284 crargs.szc = p->p_brkpageszc ? p->p_brkpageszc : 285 AS_MAP_NO_LPOOB; 286 } else if (ova == rbss) { 287 crargs.szc = szc; 288 } else { 289 crargs.szc = AS_MAP_HEAP; 290 } 291 } else { 292 crargs.szc = AS_MAP_NO_LPOOB; 293 } 294 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP; 295 error = as_map(as, ova, (size_t)(nva - ova), segvn_create, 296 &crargs); 297 if (error) { 298 return (error); 299 } 300 301 } else if (nva < ova) { 302 /* 303 * Release mapping to shrink UNIX data segment. 304 */ 305 (void) as_unmap(as, nva, (size_t)(ova - nva)); 306 } 307 p->p_brksize = size; 308 return (0); 309 } 310 311 /* 312 * Grow the stack to include sp. Return 1 if successful, 0 otherwise. 313 * This routine assumes that the stack grows downward. 314 */ 315 int 316 grow(caddr_t sp) 317 { 318 struct proc *p = curproc; 319 struct as *as = p->p_as; 320 size_t oldsize = p->p_stksize; 321 size_t newsize; 322 int err; 323 324 /* 325 * Serialize grow operations on an address space. 326 * This also serves as the lock protecting p_stksize 327 * and p_stkpageszc. 328 */ 329 as_rangelock(as); 330 if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) { 331 err = grow_lpg(sp); 332 } else { 333 err = grow_internal(sp, p->p_stkpageszc); 334 } 335 newsize = p->p_stksize; 336 as_rangeunlock(as); 337 338 if (err == 0 && newsize > oldsize) { 339 ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE)); 340 ASSERT(IS_P2ALIGNED(newsize, PAGESIZE)); 341 /* 342 * Set up translations so the process doesn't have to fault in 343 * the stack pages we just gave it. 344 */ 345 (void) as_fault(as->a_hat, as, p->p_usrstack - newsize, 346 newsize - oldsize, F_INVAL, S_WRITE); 347 } 348 return ((err == 0 ? 1 : 0)); 349 } 350 351 /* 352 * Algorithm: call arch-specific map_pgsz to get best page size to use, 353 * then call grow_internal(). 354 * Returns 0 on success. 355 */ 356 static int 357 grow_lpg(caddr_t sp) 358 { 359 struct proc *p = curproc; 360 size_t pgsz; 361 size_t len, newsize; 362 caddr_t addr, saddr; 363 caddr_t growend; 364 int oszc, szc; 365 int err; 366 367 newsize = p->p_usrstack - sp; 368 369 oszc = p->p_stkpageszc; 370 pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0); 371 szc = page_szc(pgsz); 372 373 /* 374 * Covers two cases: 375 * 1. page_szc() returns -1 for invalid page size, so we want to 376 * ignore it in that case. 377 * 2. By design we never decrease page size, as it is more stable. 378 * This shouldn't happen as the stack never shrinks. 379 */ 380 if (szc <= oszc) { 381 err = grow_internal(sp, oszc); 382 /* failed, fall back to base page size */ 383 if (err != 0 && oszc != 0) { 384 err = grow_internal(sp, 0); 385 } 386 return (err); 387 } 388 389 /* 390 * We've grown sufficiently to switch to a new page size. 391 * So we are going to remap the whole segment with the new page size. 392 */ 393 err = grow_internal(sp, szc); 394 /* The grow with szc failed, so fall back to base page size. */ 395 if (err != 0) { 396 if (szc != 0) { 397 err = grow_internal(sp, 0); 398 } 399 return (err); 400 } 401 402 /* 403 * Round up stack pointer to a large page boundary and remap 404 * any pgsz pages in the segment already faulted in beyond that 405 * point. 406 */ 407 saddr = p->p_usrstack - p->p_stksize; 408 addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz); 409 growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz); 410 len = growend - addr; 411 /* Check that len is not negative. Update page size code for stack. */ 412 if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) { 413 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); 414 p->p_stkpageszc = szc; 415 } 416 417 ASSERT(err == 0); 418 return (err); /* should always be 0 */ 419 } 420 421 /* 422 * This routine assumes that the stack grows downward. 423 * Returns 0 on success, errno on failure. 424 */ 425 int 426 grow_internal(caddr_t sp, uint_t growszc) 427 { 428 struct proc *p = curproc; 429 size_t newsize; 430 size_t oldsize; 431 uintptr_t new_start; 432 int error; 433 size_t pgsz; 434 uint_t szc; 435 struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 436 437 ASSERT(sp < p->p_usrstack); 438 sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE); 439 440 /* 441 * grow to growszc alignment but use current p->p_stkpageszc for 442 * the segvn_crargs szc passed to segvn_create. For memcntl to 443 * increase the szc, this allows the new extension segment to be 444 * concatenated successfully with the existing stack segment. 445 */ 446 if ((szc = growszc) != 0) { 447 pgsz = page_get_pagesize(szc); 448 ASSERT(pgsz > PAGESIZE); 449 newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz); 450 if (newsize > (size_t)p->p_stk_ctl) { 451 szc = 0; 452 pgsz = PAGESIZE; 453 newsize = p->p_usrstack - sp; 454 } 455 } else { 456 pgsz = PAGESIZE; 457 newsize = p->p_usrstack - sp; 458 } 459 460 if (newsize > (size_t)p->p_stk_ctl) { 461 (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p, 462 RCA_UNSAFE_ALL); 463 464 return (ENOMEM); 465 } 466 467 oldsize = p->p_stksize; 468 ASSERT(P2PHASE(oldsize, PAGESIZE) == 0); 469 470 if (newsize <= oldsize) { /* prevent the stack from shrinking */ 471 return (0); 472 } 473 474 if (!(p->p_stkprot & PROT_EXEC)) { 475 crargs.prot &= ~PROT_EXEC; 476 } 477 /* 478 * extend stack with the proposed new growszc, which is different 479 * than p_stkpageszc only on a memcntl to increase the stack pagesize. 480 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via 481 * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes 482 * if not aligned to szc's pgsz. 483 */ 484 if (szc > 0) { 485 caddr_t oldsp = p->p_usrstack - oldsize; 486 caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, 487 pgsz); 488 489 if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) { 490 crargs.szc = p->p_stkpageszc ? p->p_stkpageszc : 491 AS_MAP_NO_LPOOB; 492 } else if (oldsp == austk) { 493 crargs.szc = szc; 494 } else { 495 crargs.szc = AS_MAP_STACK; 496 } 497 } else { 498 crargs.szc = AS_MAP_NO_LPOOB; 499 } 500 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN; 501 502 /* 503 * The stack is about to grow into its guard. This can be acceptable 504 * if the size restriction on the stack has been expanded since its 505 * initialization during exec(). In such cases, the guard segment will 506 * be shrunk, provided the new size is reasonable. 507 */ 508 new_start = (uintptr_t)p->p_usrstack - newsize; 509 if (p->p_stkg_start != 0 && new_start > p->p_stkg_start && 510 new_start < p->p_stkg_end) { 511 const size_t unmap_sz = p->p_stkg_end - new_start; 512 const size_t remain_sz = new_start - p->p_stkg_start; 513 extern size_t stack_guard_min_sz; 514 515 /* Do not allow the guard to shrink below minimum size */ 516 if (remain_sz < stack_guard_min_sz) { 517 return (ENOMEM); 518 } 519 520 error = as_unmap(p->p_as, (caddr_t)new_start, unmap_sz); 521 if (error != 0) { 522 return (error); 523 } 524 p->p_stkg_end -= unmap_sz; 525 } 526 527 if ((error = as_map(p->p_as, (caddr_t)new_start, newsize - oldsize, 528 segvn_create, &crargs)) != 0) { 529 if (error == EAGAIN) { 530 cmn_err(CE_WARN, "Sorry, no swap space to grow stack " 531 "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm); 532 } 533 return (error); 534 } 535 p->p_stksize = newsize; 536 return (0); 537 } 538 539 /* 540 * Find address for user to map. If MAP_FIXED is not specified, we can pick 541 * any address we want, but we will first try the value in *addrp if it is 542 * non-NULL and _MAP_RANDOMIZE is not set. Thus this is implementing a way to 543 * try and get a preferred address. 544 */ 545 int 546 choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off, 547 int vacalign, uint_t flags) 548 { 549 caddr_t basep = (caddr_t)(uintptr_t)((uintptr_t)*addrp & PAGEMASK); 550 size_t lenp = len; 551 552 ASSERT(AS_ISCLAIMGAP(as)); /* searches should be serialized */ 553 if (flags & MAP_FIXED) { 554 (void) as_unmap(as, *addrp, len); 555 return (0); 556 } else if (basep != NULL && 557 ((flags & (MAP_ALIGN | _MAP_RANDOMIZE)) == 0) && 558 !as_gap(as, len, &basep, &lenp, 0, *addrp)) { 559 /* User supplied address was available */ 560 *addrp = basep; 561 } else { 562 /* 563 * No user supplied address or the address supplied was not 564 * available. 565 */ 566 map_addr(addrp, len, off, vacalign, flags); 567 } 568 if (*addrp == NULL) 569 return (ENOMEM); 570 return (0); 571 } 572 573 574 /* 575 * Used for MAP_ANON - fast way to get anonymous pages 576 */ 577 static int 578 zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags, 579 offset_t pos) 580 { 581 struct segvn_crargs vn_a; 582 int error; 583 584 if (((PROT_ALL & uprot) != uprot)) 585 return (EACCES); 586 587 if ((flags & MAP_FIXED) != 0) { 588 caddr_t userlimit; 589 590 /* 591 * Use the user address. First verify that 592 * the address to be used is page aligned. 593 * Then make some simple bounds checks. 594 */ 595 if (((uintptr_t)*addrp & PAGEOFFSET) != 0) 596 return (EINVAL); 597 598 userlimit = flags & _MAP_LOW32 ? 599 (caddr_t)USERLIMIT32 : as->a_userlimit; 600 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) { 601 case RANGE_OKAY: 602 break; 603 case RANGE_BADPROT: 604 return (ENOTSUP); 605 case RANGE_BADADDR: 606 default: 607 return (ENOMEM); 608 } 609 } 610 /* 611 * No need to worry about vac alignment for anonymous 612 * pages since this is a "clone" object that doesn't 613 * yet exist. 614 */ 615 error = choose_addr(as, addrp, len, pos, ADDR_NOVACALIGN, flags); 616 if (error != 0) { 617 return (error); 618 } 619 620 /* 621 * Use the seg_vn segment driver; passing in the NULL amp 622 * gives the desired "cloning" effect. 623 */ 624 vn_a.vp = NULL; 625 vn_a.offset = 0; 626 vn_a.type = flags & MAP_TYPE; 627 vn_a.prot = uprot; 628 vn_a.maxprot = PROT_ALL; 629 vn_a.flags = flags & ~MAP_TYPE; 630 vn_a.cred = CRED(); 631 vn_a.amp = NULL; 632 vn_a.szc = 0; 633 vn_a.lgrp_mem_policy_flags = 0; 634 635 return (as_map(as, *addrp, len, segvn_create, &vn_a)); 636 } 637 638 #define RANDOMIZABLE_MAPPING(addr, flags) (((flags & MAP_FIXED) == 0) && \ 639 !(((flags & MAP_ALIGN) == 0) && (addr != 0) && aslr_respect_mmap_hint)) 640 641 static int 642 smmap_common(caddr_t *addrp, size_t len, 643 int prot, int flags, struct file *fp, offset_t pos) 644 { 645 struct vnode *vp; 646 struct as *as = curproc->p_as; 647 uint_t uprot, maxprot, type; 648 int error; 649 int in_crit = 0; 650 651 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW | 652 _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN | 653 MAP_TEXT | MAP_INITDATA)) != 0) { 654 /* | MAP_RENAME */ /* not implemented, let user know */ 655 return (EINVAL); 656 } 657 658 if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) { 659 return (EINVAL); 660 } 661 662 if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) { 663 return (EINVAL); 664 } 665 666 if ((flags & (MAP_FIXED | _MAP_RANDOMIZE)) == 667 (MAP_FIXED | _MAP_RANDOMIZE)) { 668 return (EINVAL); 669 } 670 671 /* 672 * If it's not a fixed allocation and mmap ASLR is enabled, randomize 673 * it. 674 */ 675 if (RANDOMIZABLE_MAPPING(*addrp, flags) && 676 secflag_enabled(curproc, PROC_SEC_ASLR)) 677 flags |= _MAP_RANDOMIZE; 678 679 #if defined(__sparc) 680 /* 681 * See if this is an "old mmap call". If so, remember this 682 * fact and convert the flags value given to mmap to indicate 683 * the specified address in the system call must be used. 684 * _MAP_NEW is turned set by all new uses of mmap. 685 */ 686 if ((flags & _MAP_NEW) == 0) 687 flags |= MAP_FIXED; 688 #endif 689 flags &= ~_MAP_NEW; 690 691 type = flags & MAP_TYPE; 692 if (type != MAP_PRIVATE && type != MAP_SHARED) 693 return (EINVAL); 694 695 696 if (flags & MAP_ALIGN) { 697 if (flags & MAP_FIXED) 698 return (EINVAL); 699 700 /* alignment needs to be a power of 2 >= page size */ 701 if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) || 702 !ISP2((uintptr_t)*addrp)) 703 return (EINVAL); 704 } 705 /* 706 * Check for bad lengths and file position. 707 * We let the VOP_MAP routine check for negative lengths 708 * since on some vnode types this might be appropriate. 709 */ 710 if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0) 711 return (EINVAL); 712 713 maxprot = PROT_ALL; /* start out allowing all accesses */ 714 uprot = prot | PROT_USER; 715 716 if (fp == NULL) { 717 ASSERT(flags & MAP_ANON); 718 /* discard lwpchan mappings, like munmap() */ 719 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL) 720 lwpchan_delete_mapping(curproc, *addrp, *addrp + len); 721 as_rangelock(as); 722 error = zmap(as, addrp, len, uprot, flags, pos); 723 as_rangeunlock(as); 724 /* 725 * Tell machine specific code that lwp has mapped shared memory 726 */ 727 if (error == 0 && (flags & MAP_SHARED)) { 728 /* EMPTY */ 729 LWP_MMODEL_SHARED_AS(*addrp, len); 730 } 731 return (error); 732 } else if ((flags & MAP_ANON) != 0) 733 return (EINVAL); 734 735 vp = fp->f_vnode; 736 737 /* Can't execute code from "noexec" mounted filesystem. */ 738 if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0) 739 maxprot &= ~PROT_EXEC; 740 741 /* 742 * These checks were added as part of large files. 743 * 744 * Return ENXIO if the initial position is negative; return EOVERFLOW 745 * if (offset + len) would overflow the maximum allowed offset for the 746 * type of file descriptor being used. 747 */ 748 if (vp->v_type == VREG) { 749 if (pos < 0) 750 return (ENXIO); 751 if ((offset_t)len > (OFFSET_MAX(fp) - pos)) 752 return (EOVERFLOW); 753 } 754 755 if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) { 756 /* no write access allowed */ 757 maxprot &= ~PROT_WRITE; 758 } 759 760 /* 761 * XXX - Do we also adjust maxprot based on protections 762 * of the vnode? E.g. if no execute permission is given 763 * on the vnode for the current user, maxprot probably 764 * should disallow PROT_EXEC also? This is different 765 * from the write access as this would be a per vnode 766 * test as opposed to a per fd test for writability. 767 */ 768 769 /* 770 * Verify that the specified protections are not greater than 771 * the maximum allowable protections. Also test to make sure 772 * that the file descriptor does allows for read access since 773 * "write only" mappings are hard to do since normally we do 774 * the read from the file before the page can be written. 775 */ 776 if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0) 777 return (EACCES); 778 779 /* 780 * If the user specified an address, do some simple checks here 781 */ 782 if ((flags & MAP_FIXED) != 0) { 783 caddr_t userlimit; 784 785 /* 786 * Use the user address. First verify that 787 * the address to be used is page aligned. 788 * Then make some simple bounds checks. 789 */ 790 if (((uintptr_t)*addrp & PAGEOFFSET) != 0) 791 return (EINVAL); 792 793 userlimit = flags & _MAP_LOW32 ? 794 (caddr_t)USERLIMIT32 : as->a_userlimit; 795 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) { 796 case RANGE_OKAY: 797 break; 798 case RANGE_BADPROT: 799 return (ENOTSUP); 800 case RANGE_BADADDR: 801 default: 802 return (ENOMEM); 803 } 804 } 805 806 if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) && 807 nbl_need_check(vp)) { 808 int svmand; 809 nbl_op_t nop; 810 811 nbl_start_crit(vp, RW_READER); 812 in_crit = 1; 813 error = nbl_svmand(vp, fp->f_cred, &svmand); 814 if (error != 0) 815 goto done; 816 if ((prot & PROT_WRITE) && (type == MAP_SHARED)) { 817 if (prot & (PROT_READ | PROT_EXEC)) { 818 nop = NBL_READWRITE; 819 } else { 820 nop = NBL_WRITE; 821 } 822 } else { 823 nop = NBL_READ; 824 } 825 if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) { 826 error = EACCES; 827 goto done; 828 } 829 } 830 831 /* discard lwpchan mappings, like munmap() */ 832 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL) 833 lwpchan_delete_mapping(curproc, *addrp, *addrp + len); 834 835 /* 836 * Ok, now let the vnode map routine do its thing to set things up. 837 */ 838 error = VOP_MAP(vp, pos, as, 839 addrp, len, uprot, maxprot, flags, fp->f_cred, NULL); 840 841 if (error == 0) { 842 /* 843 * Tell machine specific code that lwp has mapped shared memory 844 */ 845 if (flags & MAP_SHARED) { 846 /* EMPTY */ 847 LWP_MMODEL_SHARED_AS(*addrp, len); 848 } 849 if (vp->v_type == VREG && 850 (flags & (MAP_TEXT | MAP_INITDATA)) != 0) { 851 /* 852 * Mark this as an executable vnode 853 */ 854 mutex_enter(&vp->v_lock); 855 vp->v_flag |= VVMEXEC; 856 mutex_exit(&vp->v_lock); 857 } 858 } 859 860 done: 861 if (in_crit) 862 nbl_end_crit(vp); 863 return (error); 864 } 865 866 #ifdef _LP64 867 /* 868 * LP64 mmap(2) system call: 64-bit offset, 64-bit address. 869 * 870 * The "large file" mmap routine mmap64(2) is also mapped to this routine 871 * by the 64-bit version of libc. 872 * 873 * Eventually, this should be the only version, and have smmap_common() 874 * folded back into it again. Some day. 875 */ 876 caddr_t 877 smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos) 878 { 879 struct file *fp; 880 int error; 881 882 if (fd == -1 && (flags & MAP_ANON) != 0) 883 error = smmap_common(&addr, len, prot, flags, 884 NULL, (offset_t)pos); 885 else if ((fp = getf(fd)) != NULL) { 886 error = smmap_common(&addr, len, prot, flags, 887 fp, (offset_t)pos); 888 releasef(fd); 889 } else 890 error = EBADF; 891 892 return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr); 893 } 894 #endif /* _LP64 */ 895 896 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 897 898 /* 899 * ILP32 mmap(2) system call: 32-bit offset, 32-bit address. 900 */ 901 caddr_t 902 smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos) 903 { 904 struct file *fp; 905 int error; 906 caddr_t a = (caddr_t)(uintptr_t)addr; 907 908 if (flags & _MAP_LOW32) 909 error = EINVAL; 910 else if (fd == -1 && (flags & MAP_ANON) != 0) 911 error = smmap_common(&a, (size_t)len, prot, 912 flags | _MAP_LOW32, NULL, (offset_t)pos); 913 else if ((fp = getf(fd)) != NULL) { 914 error = smmap_common(&a, (size_t)len, prot, 915 flags | _MAP_LOW32, fp, (offset_t)pos); 916 releasef(fd); 917 } else 918 error = EBADF; 919 920 ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX); 921 922 return (error ? (caddr_t)(uintptr_t)set_errno(error) : a); 923 } 924 925 /* 926 * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address. 927 * 928 * Now things really get ugly because we can't use the C-style 929 * calling convention for more than 6 args, and 64-bit parameter 930 * passing on 32-bit systems is less than clean. 931 */ 932 933 struct mmaplf32a { 934 caddr_t addr; 935 size_t len; 936 #ifdef _LP64 937 /* 938 * 32-bit contents, 64-bit cells 939 */ 940 uint64_t prot; 941 uint64_t flags; 942 uint64_t fd; 943 uint64_t offhi; 944 uint64_t offlo; 945 #else 946 /* 947 * 32-bit contents, 32-bit cells 948 */ 949 uint32_t prot; 950 uint32_t flags; 951 uint32_t fd; 952 uint32_t offhi; 953 uint32_t offlo; 954 #endif 955 }; 956 957 int 958 smmaplf32(struct mmaplf32a *uap, rval_t *rvp) 959 { 960 struct file *fp; 961 int error; 962 caddr_t a = uap->addr; 963 int flags = (int)uap->flags; 964 int fd = (int)uap->fd; 965 #ifdef _BIG_ENDIAN 966 offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo; 967 #else 968 offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi; 969 #endif 970 971 if (flags & _MAP_LOW32) 972 error = EINVAL; 973 else if (fd == -1 && (flags & MAP_ANON) != 0) 974 error = smmap_common(&a, uap->len, (int)uap->prot, 975 flags | _MAP_LOW32, NULL, off); 976 else if ((fp = getf(fd)) != NULL) { 977 error = smmap_common(&a, uap->len, (int)uap->prot, 978 flags | _MAP_LOW32, fp, off); 979 releasef(fd); 980 } else 981 error = EBADF; 982 983 if (error == 0) 984 rvp->r_val1 = (uintptr_t)a; 985 return (error); 986 } 987 988 #endif /* _SYSCALL32_IMPL || _ILP32 */ 989 990 int 991 munmap(caddr_t addr, size_t len) 992 { 993 struct proc *p = curproc; 994 struct as *as = p->p_as; 995 996 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) 997 return (set_errno(EINVAL)); 998 999 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) 1000 return (set_errno(EINVAL)); 1001 1002 /* 1003 * Discard lwpchan mappings. 1004 */ 1005 if (p->p_lcp != NULL) 1006 lwpchan_delete_mapping(p, addr, addr + len); 1007 if (as_unmap(as, addr, len) != 0) 1008 return (set_errno(EINVAL)); 1009 1010 return (0); 1011 } 1012 1013 int 1014 mprotect(caddr_t addr, size_t len, int prot) 1015 { 1016 struct as *as = curproc->p_as; 1017 uint_t uprot = prot | PROT_USER; 1018 int error; 1019 1020 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) 1021 return (set_errno(EINVAL)); 1022 1023 switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) { 1024 case RANGE_OKAY: 1025 break; 1026 case RANGE_BADPROT: 1027 return (set_errno(ENOTSUP)); 1028 case RANGE_BADADDR: 1029 default: 1030 return (set_errno(ENOMEM)); 1031 } 1032 1033 error = as_setprot(as, addr, len, uprot); 1034 if (error) 1035 return (set_errno(error)); 1036 return (0); 1037 } 1038 1039 #define MC_CACHE 128 /* internal result buffer */ 1040 #define MC_QUANTUM (MC_CACHE * PAGESIZE) /* addresses covered in loop */ 1041 1042 int 1043 mincore(caddr_t addr, size_t len, char *vecp) 1044 { 1045 struct as *as = curproc->p_as; 1046 caddr_t ea; /* end address of loop */ 1047 size_t rl; /* inner result length */ 1048 char vec[MC_CACHE]; /* local vector cache */ 1049 int error; 1050 model_t model; 1051 long llen; 1052 1053 model = get_udatamodel(); 1054 /* 1055 * Validate form of address parameters. 1056 */ 1057 if (model == DATAMODEL_NATIVE) { 1058 llen = (long)len; 1059 } else { 1060 llen = (int32_t)(size32_t)len; 1061 } 1062 if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0) 1063 return (set_errno(EINVAL)); 1064 1065 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) 1066 return (set_errno(ENOMEM)); 1067 1068 /* 1069 * Loop over subranges of interval [addr : addr + len), recovering 1070 * results internally and then copying them out to caller. Subrange 1071 * is based on the size of MC_CACHE, defined above. 1072 */ 1073 for (ea = addr + len; addr < ea; addr += MC_QUANTUM) { 1074 error = as_incore(as, addr, 1075 (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl); 1076 if (rl != 0) { 1077 rl = (rl + PAGESIZE - 1) / PAGESIZE; 1078 if (copyout(vec, vecp, rl) != 0) 1079 return (set_errno(EFAULT)); 1080 vecp += rl; 1081 } 1082 if (error != 0) 1083 return (set_errno(ENOMEM)); 1084 } 1085 return (0); 1086 } 1087