1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/types.h> 34 #include <sys/inttypes.h> 35 #include <sys/param.h> 36 #include <sys/sysmacros.h> 37 #include <sys/systm.h> 38 #include <sys/signal.h> 39 #include <sys/user.h> 40 #include <sys/errno.h> 41 #include <sys/var.h> 42 #include <sys/proc.h> 43 #include <sys/tuneable.h> 44 #include <sys/debug.h> 45 #include <sys/cmn_err.h> 46 #include <sys/cred.h> 47 #include <sys/vnode.h> 48 #include <sys/vfs.h> 49 #include <sys/vm.h> 50 #include <sys/file.h> 51 #include <sys/mman.h> 52 #include <sys/vmparam.h> 53 #include <sys/fcntl.h> 54 #include <sys/lwpchan_impl.h> 55 56 #include <vm/hat.h> 57 #include <vm/as.h> 58 #include <vm/seg.h> 59 #include <vm/seg_dev.h> 60 #include <vm/seg_vn.h> 61 62 int use_brk_lpg = 1; 63 int use_stk_lpg = 1; 64 int use_zmap_lpg = 1; 65 66 static int brk_lpg(caddr_t nva); 67 static int grow_lpg(caddr_t sp); 68 69 int 70 brk(caddr_t nva) 71 { 72 int error; 73 proc_t *p = curproc; 74 75 /* 76 * Serialize brk operations on an address space. 77 * This also serves as the lock protecting p_brksize 78 * and p_brkpageszc. 79 */ 80 as_rangelock(p->p_as); 81 if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) { 82 error = brk_lpg(nva); 83 } else { 84 error = brk_internal(nva, p->p_brkpageszc); 85 } 86 as_rangeunlock(p->p_as); 87 return ((error != 0 ? set_errno(error) : 0)); 88 } 89 90 /* 91 * Algorithm: call arch-specific map_pgsz to get best page size to use, 92 * then call brk_internal(). 93 * Returns 0 on success. 94 */ 95 static int 96 brk_lpg(caddr_t nva) 97 { 98 struct proc *p = curproc; 99 size_t pgsz, len; 100 caddr_t addr; 101 caddr_t bssbase = p->p_bssbase; 102 caddr_t brkbase = p->p_brkbase; 103 int oszc, szc; 104 int err; 105 int remap = 0; 106 107 oszc = p->p_brkpageszc; 108 109 /* 110 * If p_brkbase has not yet been set, the first call 111 * to brk_internal() will initialize it. 112 */ 113 if (brkbase == 0) { 114 return (brk_internal(nva, oszc)); 115 } 116 117 len = nva - bssbase; 118 119 pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, &remap); 120 szc = page_szc(pgsz); 121 122 /* 123 * Covers two cases: 124 * 1. page_szc() returns -1 for invalid page size, so we want to 125 * ignore it in that case. 126 * 2. By design we never decrease page size, as it is more stable. 127 */ 128 if (szc <= oszc) { 129 err = brk_internal(nva, oszc); 130 /* If failed, back off to base page size. */ 131 if (err != 0 && oszc != 0) { 132 err = brk_internal(nva, 0); 133 } 134 return (err); 135 } 136 137 if (remap == 0) { 138 /* 139 * Map from the current brk end up to the new page size 140 * alignment using the current page size. 141 */ 142 addr = brkbase + p->p_brksize; 143 addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 144 if (addr < nva) { 145 err = brk_internal(addr, oszc); 146 /* 147 * In failure case, try again if oszc is not base page 148 * size, then return err. 149 */ 150 if (err != 0) { 151 if (oszc != 0) { 152 err = brk_internal(nva, 0); 153 } 154 return (err); 155 } 156 } 157 } 158 159 err = brk_internal(nva, szc); 160 /* If using szc failed, map with base page size and return. */ 161 if (err != 0) { 162 if (szc != 0) { 163 err = brk_internal(nva, 0); 164 } 165 return (err); 166 } 167 168 if (remap != 0) { 169 /* 170 * Round up brk base to a large page boundary and remap 171 * anything in the segment already faulted in beyond that 172 * point. 173 */ 174 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz); 175 len = (brkbase + p->p_brksize) - addr; 176 /* advisory, so ignore errors */ 177 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); 178 } 179 180 ASSERT(err == 0); 181 return (err); /* should always be 0 */ 182 } 183 184 /* 185 * Returns 0 on success. 186 */ 187 int 188 brk_internal(caddr_t nva, uint_t brkszc) 189 { 190 caddr_t ova; /* current break address */ 191 size_t size; 192 int error; 193 struct proc *p = curproc; 194 struct as *as = p->p_as; 195 size_t pgsz; 196 uint_t szc; 197 rctl_qty_t as_rctl; 198 199 /* 200 * extend heap to brkszc alignment but use current p->p_brkpageszc 201 * for the newly created segment. This allows the new extension 202 * segment to be concatenated successfully with the existing brk 203 * segment. 204 */ 205 if ((szc = brkszc) != 0) { 206 pgsz = page_get_pagesize(szc); 207 ASSERT(pgsz > PAGESIZE); 208 } else { 209 pgsz = PAGESIZE; 210 } 211 212 mutex_enter(&p->p_lock); 213 as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA], 214 p->p_rctls, p); 215 mutex_exit(&p->p_lock); 216 217 /* 218 * If p_brkbase has not yet been set, the first call 219 * to brk() will initialize it. 220 */ 221 if (p->p_brkbase == 0) 222 p->p_brkbase = nva; 223 224 /* 225 * Before multiple page size support existed p_brksize was the value 226 * not rounded to the pagesize (i.e. it stored the exact user request 227 * for heap size). If pgsz is greater than PAGESIZE calculate the 228 * heap size as the real new heap size by rounding it up to pgsz. 229 * This is useful since we may want to know where the heap ends 230 * without knowing heap pagesize (e.g. some old code) and also if 231 * heap pagesize changes we can update p_brkpageszc but delay adding 232 * new mapping yet still know from p_brksize where the heap really 233 * ends. The user requested heap end is stored in libc variable. 234 */ 235 if (pgsz > PAGESIZE) { 236 caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); 237 size = tnva - p->p_brkbase; 238 if (tnva < p->p_brkbase || (size > p->p_brksize && 239 size > (size_t)as_rctl)) { 240 szc = 0; 241 pgsz = PAGESIZE; 242 size = nva - p->p_brkbase; 243 } 244 } else { 245 size = nva - p->p_brkbase; 246 } 247 248 /* 249 * use PAGESIZE to roundup ova because we want to know the real value 250 * of the current heap end in case p_brkpageszc changes since the last 251 * p_brksize was computed. 252 */ 253 nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); 254 ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize), 255 PAGESIZE); 256 257 if ((nva < p->p_brkbase) || (size > p->p_brksize && 258 size > as_rctl)) { 259 mutex_enter(&p->p_lock); 260 (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p, 261 RCA_SAFE); 262 mutex_exit(&p->p_lock); 263 return (ENOMEM); 264 } 265 266 if (nva > ova) { 267 struct segvn_crargs crargs = 268 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 269 270 if (!(p->p_datprot & PROT_EXEC)) { 271 crargs.prot &= ~PROT_EXEC; 272 } 273 274 /* 275 * Add new zfod mapping to extend UNIX data segment 276 */ 277 crargs.szc = szc; 278 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP; 279 error = as_map(as, ova, (size_t)(nva - ova), segvn_create, 280 &crargs); 281 if (error) { 282 return (error); 283 } 284 285 } else if (nva < ova) { 286 /* 287 * Release mapping to shrink UNIX data segment. 288 */ 289 (void) as_unmap(as, nva, (size_t)(ova - nva)); 290 } 291 p->p_brksize = size; 292 p->p_brkpageszc = szc; 293 return (0); 294 } 295 296 /* 297 * Grow the stack to include sp. Return 1 if successful, 0 otherwise. 298 * This routine assumes that the stack grows downward. 299 */ 300 int 301 grow(caddr_t sp) 302 { 303 struct proc *p = curproc; 304 int err; 305 306 /* 307 * Serialize grow operations on an address space. 308 * This also serves as the lock protecting p_stksize 309 * and p_stkpageszc. 310 */ 311 as_rangelock(p->p_as); 312 if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) { 313 err = grow_lpg(sp); 314 } else { 315 err = grow_internal(sp, p->p_stkpageszc); 316 } 317 as_rangeunlock(p->p_as); 318 return ((err == 0 ? 1 : 0)); 319 } 320 321 /* 322 * Algorithm: call arch-specific map_pgsz to get best page size to use, 323 * then call grow_internal(). 324 * Returns 0 on success. 325 */ 326 static int 327 grow_lpg(caddr_t sp) 328 { 329 struct proc *p = curproc; 330 size_t pgsz; 331 size_t len, newsize; 332 caddr_t addr, oldsp; 333 int oszc, szc; 334 int err; 335 int remap = 0; 336 337 newsize = p->p_usrstack - sp; 338 339 oszc = p->p_stkpageszc; 340 pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, &remap); 341 szc = page_szc(pgsz); 342 343 /* 344 * Covers two cases: 345 * 1. page_szc() returns -1 for invalid page size, so we want to 346 * ignore it in that case. 347 * 2. By design we never decrease page size, as it is more stable. 348 * This shouldn't happen as the stack never shrinks. 349 */ 350 if (szc <= oszc) { 351 err = grow_internal(sp, oszc); 352 /* failed, fall back to base page size */ 353 if (err != 0 && oszc != 0) { 354 err = grow_internal(sp, 0); 355 } 356 return (err); 357 } 358 359 /* 360 * We've grown sufficiently to switch to a new page size. 361 * If we're not going to remap the whole segment with the new 362 * page size, split the grow into two operations: map to the new 363 * page size alignment boundary with the existing page size, then 364 * map the rest with the new page size. 365 */ 366 err = 0; 367 if (remap == 0) { 368 oldsp = p->p_usrstack - p->p_stksize; 369 addr = (caddr_t)P2ALIGN((uintptr_t)oldsp, pgsz); 370 if (addr > sp) { 371 err = grow_internal(addr, oszc); 372 /* 373 * In this case, grow with oszc failed, so grow all the 374 * way to sp with base page size. 375 */ 376 if (err != 0) { 377 if (oszc != 0) { 378 err = grow_internal(sp, 0); 379 } 380 return (err); 381 } 382 } 383 } 384 385 err = grow_internal(sp, szc); 386 /* The grow with szc failed, so fall back to base page size. */ 387 if (err != 0) { 388 if (szc != 0) { 389 err = grow_internal(sp, 0); 390 } 391 return (err); 392 } 393 394 if (remap) { 395 /* 396 * Round up stack pointer to a large page boundary and remap 397 * any pgsz pages in the segment already faulted in beyond that 398 * point. 399 */ 400 addr = p->p_usrstack - p->p_stksize; 401 addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 402 len = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz) - addr; 403 /* advisory, so ignore errors */ 404 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); 405 } 406 407 /* Update page size code for stack. */ 408 p->p_stkpageszc = szc; 409 410 ASSERT(err == 0); 411 return (err); /* should always be 0 */ 412 } 413 414 /* 415 * This routine assumes that the stack grows downward. 416 * Returns 0 on success, errno on failure. 417 */ 418 int 419 grow_internal(caddr_t sp, uint_t growszc) 420 { 421 struct proc *p = curproc; 422 struct as *as = p->p_as; 423 size_t newsize = p->p_usrstack - sp; 424 size_t oldsize; 425 int error; 426 size_t pgsz; 427 uint_t szc; 428 struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 429 430 ASSERT(sp < p->p_usrstack); 431 432 /* 433 * grow to growszc alignment but use current p->p_stkpageszc for 434 * the segvn_crargs szc passed to segvn_create. For memcntl to 435 * increase the szc, this allows the new extension segment to be 436 * concatenated successfully with the existing stack segment. 437 */ 438 if ((szc = growszc) != 0) { 439 pgsz = page_get_pagesize(szc); 440 ASSERT(pgsz > PAGESIZE); 441 newsize = P2ROUNDUP(newsize, pgsz); 442 if (newsize > (size_t)p->p_stk_ctl) { 443 szc = 0; 444 pgsz = PAGESIZE; 445 newsize = p->p_usrstack - sp; 446 } 447 } else { 448 pgsz = PAGESIZE; 449 } 450 451 if (newsize > (size_t)p->p_stk_ctl) { 452 (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p, 453 RCA_UNSAFE_ALL); 454 455 return (ENOMEM); 456 } 457 458 oldsize = p->p_stksize; 459 newsize = P2ROUNDUP(newsize, pgsz); 460 ASSERT(P2PHASE(oldsize, PAGESIZE) == 0); 461 462 if (newsize <= oldsize) { /* prevent the stack from shrinking */ 463 return (0); 464 } 465 466 if (!(p->p_stkprot & PROT_EXEC)) { 467 crargs.prot &= ~PROT_EXEC; 468 } 469 /* 470 * extend stack with the p_stkpageszc. growszc is different than 471 * p_stkpageszc only on a memcntl to increase the stack pagesize. 472 */ 473 crargs.szc = p->p_stkpageszc; 474 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN; 475 476 if ((error = as_map(as, p->p_usrstack - newsize, newsize - oldsize, 477 segvn_create, &crargs)) != 0) { 478 if (error == EAGAIN) { 479 cmn_err(CE_WARN, "Sorry, no swap space to grow stack " 480 "for pid %d (%s)", p->p_pid, u.u_comm); 481 } 482 return (error); 483 } 484 p->p_stksize = newsize; 485 486 487 /* 488 * Set up translations so the process doesn't have to fault in 489 * the stack pages we just gave it. 490 */ 491 (void) as_fault(as->a_hat, as, 492 p->p_usrstack - newsize, newsize - oldsize, F_INVAL, S_WRITE); 493 494 return (0); 495 } 496 497 /* 498 * Used for MAP_ANON - fast way to get anonymous pages 499 */ 500 static int 501 zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags, 502 offset_t pos) 503 { 504 struct segvn_crargs a, b; 505 struct proc *p = curproc; 506 int err; 507 size_t pgsz; 508 size_t l0, l1, l2, l3, l4; /* 0th through 5th chunks */ 509 caddr_t ruaddr, ruaddr0; /* rounded up addresses */ 510 extern size_t auto_lpg_va_default; 511 512 if (((PROT_ALL & uprot) != uprot)) 513 return (EACCES); 514 515 if ((flags & MAP_FIXED) != 0) { 516 caddr_t userlimit; 517 518 /* 519 * Use the user address. First verify that 520 * the address to be used is page aligned. 521 * Then make some simple bounds checks. 522 */ 523 if (((uintptr_t)*addrp & PAGEOFFSET) != 0) 524 return (EINVAL); 525 526 userlimit = flags & _MAP_LOW32 ? 527 (caddr_t)USERLIMIT32 : as->a_userlimit; 528 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) { 529 case RANGE_OKAY: 530 break; 531 case RANGE_BADPROT: 532 return (ENOTSUP); 533 case RANGE_BADADDR: 534 default: 535 return (ENOMEM); 536 } 537 (void) as_unmap(as, *addrp, len); 538 } else { 539 /* 540 * No need to worry about vac alignment for anonymous 541 * pages since this is a "clone" object that doesn't 542 * yet exist. 543 */ 544 map_addr(addrp, len, pos, 0, flags); 545 if (*addrp == NULL) 546 return (ENOMEM); 547 } 548 549 /* 550 * Use the seg_vn segment driver; passing in the NULL amp 551 * gives the desired "cloning" effect. 552 */ 553 a.vp = NULL; 554 a.offset = 0; 555 a.type = flags & MAP_TYPE; 556 a.prot = uprot; 557 a.maxprot = PROT_ALL; 558 a.flags = flags & ~MAP_TYPE; 559 a.cred = CRED(); 560 a.amp = NULL; 561 a.szc = 0; 562 a.lgrp_mem_policy_flags = 0; 563 564 /* 565 * Call arch-specific map_pgsz routine to pick best page size to map 566 * this segment, and break the mapping up into parts if required. 567 * 568 * The parts work like this: 569 * 570 * addr --------- 571 * | | l0 572 * --------- 573 * | | l1 574 * --------- 575 * | | l2 576 * --------- 577 * | | l3 578 * --------- 579 * | | l4 580 * --------- 581 * addr+len 582 * 583 * Starting from the middle, l2 is the number of bytes mapped by the 584 * selected large page. l1 and l3 are mapped by auto_lpg_va_default 585 * page size pages, and l0 and l4 are mapped by base page size pages. 586 * If auto_lpg_va_default is the base page size, then l0 == l4 == 0. 587 * If the requested address or length are aligned to the selected large 588 * page size, l1 or l3 may also be 0. 589 */ 590 if (use_zmap_lpg) { 591 592 pgsz = map_pgsz(MAPPGSZ_VA, p, *addrp, len, NULL); 593 if (pgsz <= PAGESIZE || len < pgsz) { 594 return (as_map(as, *addrp, len, segvn_create, &a)); 595 } 596 597 ruaddr = (caddr_t)P2ROUNDUP((uintptr_t)*addrp, pgsz); 598 if (auto_lpg_va_default != MMU_PAGESIZE) { 599 ruaddr0 = (caddr_t)P2ROUNDUP((uintptr_t)*addrp, 600 auto_lpg_va_default); 601 l0 = ruaddr0 - *addrp; 602 } else { 603 l0 = 0; 604 ruaddr0 = *addrp; 605 } 606 l1 = ruaddr - ruaddr0; 607 l3 = P2PHASE(len - l0 - l1, pgsz); 608 if (auto_lpg_va_default == MMU_PAGESIZE) { 609 l4 = 0; 610 } else { 611 l4 = P2PHASE(l3, auto_lpg_va_default); 612 l3 -= l4; 613 } 614 l2 = len - l0 - l1 - l3 - l4; 615 616 if (l0) { 617 b = a; 618 err = as_map(as, *addrp, l0, segvn_create, &b); 619 if (err) { 620 return (err); 621 } 622 } 623 624 if (l1) { 625 b = a; 626 b.szc = page_szc(auto_lpg_va_default); 627 err = as_map(as, ruaddr0, l1, segvn_create, &b); 628 if (err) { 629 goto error1; 630 } 631 } 632 633 if (l2) { 634 b = a; 635 b.szc = page_szc(pgsz); 636 err = as_map(as, ruaddr, l2, segvn_create, &b); 637 if (err) { 638 goto error2; 639 } 640 } 641 642 if (l3) { 643 b = a; 644 b.szc = page_szc(auto_lpg_va_default); 645 err = as_map(as, ruaddr + l2, l3, segvn_create, &b); 646 if (err) { 647 goto error3; 648 } 649 } 650 if (l4) { 651 err = as_map(as, ruaddr + l2 + l3, l4, segvn_create, 652 &a); 653 if (err) { 654 error3: 655 if (l3) { 656 (void) as_unmap(as, ruaddr + l2, l3); 657 } 658 error2: 659 if (l2) { 660 (void) as_unmap(as, ruaddr, l2); 661 } 662 error1: 663 if (l1) { 664 (void) as_unmap(as, ruaddr0, l1); 665 } 666 if (l0) { 667 (void) as_unmap(as, *addrp, l0); 668 } 669 return (err); 670 } 671 } 672 673 return (0); 674 } 675 676 return (as_map(as, *addrp, len, segvn_create, &a)); 677 } 678 679 static int 680 smmap_common(caddr_t *addrp, size_t len, 681 int prot, int flags, struct file *fp, offset_t pos) 682 { 683 struct vnode *vp; 684 struct as *as = curproc->p_as; 685 uint_t uprot, maxprot, type; 686 int error; 687 688 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW | 689 _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN | 690 MAP_TEXT | MAP_INITDATA)) != 0) { 691 /* | MAP_RENAME */ /* not implemented, let user know */ 692 return (EINVAL); 693 } 694 695 if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) { 696 return (EINVAL); 697 } 698 699 if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) { 700 return (EINVAL); 701 } 702 703 #if defined(__sparc) 704 /* 705 * See if this is an "old mmap call". If so, remember this 706 * fact and convert the flags value given to mmap to indicate 707 * the specified address in the system call must be used. 708 * _MAP_NEW is turned set by all new uses of mmap. 709 */ 710 if ((flags & _MAP_NEW) == 0) 711 flags |= MAP_FIXED; 712 #endif 713 flags &= ~_MAP_NEW; 714 715 type = flags & MAP_TYPE; 716 if (type != MAP_PRIVATE && type != MAP_SHARED) 717 return (EINVAL); 718 719 720 if (flags & MAP_ALIGN) { 721 722 if (flags & MAP_FIXED) 723 return (EINVAL); 724 725 /* alignment needs to be a power of 2 >= page size */ 726 if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) || 727 !ISP2((uintptr_t)*addrp)) 728 return (EINVAL); 729 } 730 /* 731 * Check for bad lengths and file position. 732 * We let the VOP_MAP routine check for negative lengths 733 * since on some vnode types this might be appropriate. 734 */ 735 if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0) 736 return (EINVAL); 737 738 maxprot = PROT_ALL; /* start out allowing all accesses */ 739 uprot = prot | PROT_USER; 740 741 if (fp == NULL) { 742 ASSERT(flags & MAP_ANON); 743 as_rangelock(as); 744 error = zmap(as, addrp, len, uprot, flags, pos); 745 as_rangeunlock(as); 746 return (error); 747 } else if ((flags & MAP_ANON) != 0) 748 return (EINVAL); 749 750 vp = fp->f_vnode; 751 752 /* Can't execute code from "noexec" mounted filesystem. */ 753 if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0) 754 maxprot &= ~PROT_EXEC; 755 756 /* 757 * These checks were added as part of large files. 758 * 759 * Return EINVAL if the initial position is negative; return EOVERFLOW 760 * if (offset + len) would overflow the maximum allowed offset for the 761 * type of file descriptor being used. 762 */ 763 if (vp->v_type == VREG) { 764 if (pos < (offset_t)0) 765 return (EINVAL); 766 if ((offset_t)len > (OFFSET_MAX(fp) - pos)) 767 return (EOVERFLOW); 768 } 769 770 if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) { 771 /* no write access allowed */ 772 maxprot &= ~PROT_WRITE; 773 } 774 775 /* 776 * XXX - Do we also adjust maxprot based on protections 777 * of the vnode? E.g. if no execute permission is given 778 * on the vnode for the current user, maxprot probably 779 * should disallow PROT_EXEC also? This is different 780 * from the write access as this would be a per vnode 781 * test as opposed to a per fd test for writability. 782 */ 783 784 /* 785 * Verify that the specified protections are not greater than 786 * the maximum allowable protections. Also test to make sure 787 * that the file descriptor does allows for read access since 788 * "write only" mappings are hard to do since normally we do 789 * the read from the file before the page can be written. 790 */ 791 if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0) 792 return (EACCES); 793 794 /* 795 * If the user specified an address, do some simple checks here 796 */ 797 if ((flags & MAP_FIXED) != 0) { 798 caddr_t userlimit; 799 800 /* 801 * Use the user address. First verify that 802 * the address to be used is page aligned. 803 * Then make some simple bounds checks. 804 */ 805 if (((uintptr_t)*addrp & PAGEOFFSET) != 0) 806 return (EINVAL); 807 808 userlimit = flags & _MAP_LOW32 ? 809 (caddr_t)USERLIMIT32 : as->a_userlimit; 810 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) { 811 case RANGE_OKAY: 812 break; 813 case RANGE_BADPROT: 814 return (ENOTSUP); 815 case RANGE_BADADDR: 816 default: 817 return (ENOMEM); 818 } 819 } 820 821 822 /* 823 * Ok, now let the vnode map routine do its thing to set things up. 824 */ 825 error = VOP_MAP(vp, pos, as, 826 addrp, len, uprot, maxprot, flags, fp->f_cred); 827 828 if (error == 0) { 829 if (vp->v_type == VREG && 830 (flags & (MAP_TEXT | MAP_INITDATA)) != 0) { 831 /* 832 * Mark this as an executable vnode 833 */ 834 mutex_enter(&vp->v_lock); 835 vp->v_flag |= VVMEXEC; 836 mutex_exit(&vp->v_lock); 837 } 838 } 839 840 return (error); 841 } 842 843 #ifdef _LP64 844 /* 845 * LP64 mmap(2) system call: 64-bit offset, 64-bit address. 846 * 847 * The "large file" mmap routine mmap64(2) is also mapped to this routine 848 * by the 64-bit version of libc. 849 * 850 * Eventually, this should be the only version, and have smmap_common() 851 * folded back into it again. Some day. 852 */ 853 caddr_t 854 smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos) 855 { 856 struct file *fp; 857 int error; 858 859 if (flags & _MAP_LOW32) 860 error = EINVAL; 861 else if (fd == -1 && (flags & MAP_ANON) != 0) 862 error = smmap_common(&addr, len, prot, flags, 863 NULL, (offset_t)pos); 864 else if ((fp = getf(fd)) != NULL) { 865 error = smmap_common(&addr, len, prot, flags, 866 fp, (offset_t)pos); 867 releasef(fd); 868 } else 869 error = EBADF; 870 871 return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr); 872 } 873 #endif /* _LP64 */ 874 875 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 876 877 /* 878 * ILP32 mmap(2) system call: 32-bit offset, 32-bit address. 879 */ 880 caddr_t 881 smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos) 882 { 883 struct file *fp; 884 int error; 885 caddr_t a = (caddr_t)(uintptr_t)addr; 886 887 if (flags & _MAP_LOW32) 888 error = EINVAL; 889 else if (fd == -1 && (flags & MAP_ANON) != 0) 890 error = smmap_common(&a, (size_t)len, prot, 891 flags | _MAP_LOW32, NULL, (offset_t)pos); 892 else if ((fp = getf(fd)) != NULL) { 893 error = smmap_common(&a, (size_t)len, prot, 894 flags | _MAP_LOW32, fp, (offset_t)pos); 895 releasef(fd); 896 } else 897 error = EBADF; 898 899 ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX); 900 901 return (error ? (caddr_t)(uintptr_t)set_errno(error) : a); 902 } 903 904 /* 905 * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address. 906 * 907 * Now things really get ugly because we can't use the C-style 908 * calling convention for more than 6 args, and 64-bit parameter 909 * passing on 32-bit systems is less than clean. 910 */ 911 912 struct mmaplf32a { 913 caddr_t addr; 914 size_t len; 915 #ifdef _LP64 916 /* 917 * 32-bit contents, 64-bit cells 918 */ 919 uint64_t prot; 920 uint64_t flags; 921 uint64_t fd; 922 uint64_t offhi; 923 uint64_t offlo; 924 #else 925 /* 926 * 32-bit contents, 32-bit cells 927 */ 928 uint32_t prot; 929 uint32_t flags; 930 uint32_t fd; 931 uint32_t offhi; 932 uint32_t offlo; 933 #endif 934 }; 935 936 int 937 smmaplf32(struct mmaplf32a *uap, rval_t *rvp) 938 { 939 struct file *fp; 940 int error; 941 caddr_t a = uap->addr; 942 int flags = (int)uap->flags; 943 int fd = (int)uap->fd; 944 #ifdef _BIG_ENDIAN 945 offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo; 946 #else 947 offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi; 948 #endif 949 950 if (flags & _MAP_LOW32) 951 error = EINVAL; 952 else if (fd == -1 && (flags & MAP_ANON) != 0) 953 error = smmap_common(&a, uap->len, (int)uap->prot, 954 flags | _MAP_LOW32, NULL, off); 955 else if ((fp = getf(fd)) != NULL) { 956 error = smmap_common(&a, uap->len, (int)uap->prot, 957 flags | _MAP_LOW32, fp, off); 958 releasef(fd); 959 } else 960 error = EBADF; 961 962 if (error == 0) 963 rvp->r_val1 = (uintptr_t)a; 964 return (error); 965 } 966 967 #endif /* _SYSCALL32_IMPL || _ILP32 */ 968 969 int 970 munmap(caddr_t addr, size_t len) 971 { 972 struct proc *p = curproc; 973 struct as *as = p->p_as; 974 975 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) 976 return (set_errno(EINVAL)); 977 978 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) 979 return (set_errno(EINVAL)); 980 981 /* 982 * Discard lwpchan mappings. 983 */ 984 if (p->p_lcp != NULL) 985 lwpchan_delete_mapping(p, addr, addr + len); 986 if (as_unmap(as, addr, len) != 0) 987 return (set_errno(EINVAL)); 988 989 return (0); 990 } 991 992 int 993 mprotect(caddr_t addr, size_t len, int prot) 994 { 995 struct as *as = curproc->p_as; 996 uint_t uprot = prot | PROT_USER; 997 int error; 998 999 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) 1000 return (set_errno(EINVAL)); 1001 1002 switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) { 1003 case RANGE_OKAY: 1004 break; 1005 case RANGE_BADPROT: 1006 return (set_errno(ENOTSUP)); 1007 case RANGE_BADADDR: 1008 default: 1009 return (set_errno(ENOMEM)); 1010 } 1011 1012 error = as_setprot(as, addr, len, uprot); 1013 if (error) 1014 return (set_errno(error)); 1015 return (0); 1016 } 1017 1018 #define MC_CACHE 128 /* internal result buffer */ 1019 #define MC_QUANTUM (MC_CACHE * PAGESIZE) /* addresses covered in loop */ 1020 1021 int 1022 mincore(caddr_t addr, size_t len, char *vecp) 1023 { 1024 struct as *as = curproc->p_as; 1025 caddr_t ea; /* end address of loop */ 1026 size_t rl; /* inner result length */ 1027 char vec[MC_CACHE]; /* local vector cache */ 1028 int error; 1029 model_t model; 1030 long llen; 1031 1032 model = get_udatamodel(); 1033 /* 1034 * Validate form of address parameters. 1035 */ 1036 if (model == DATAMODEL_NATIVE) { 1037 llen = (long)len; 1038 } else { 1039 llen = (int32_t)(size32_t)len; 1040 } 1041 if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0) 1042 return (set_errno(EINVAL)); 1043 1044 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) 1045 return (set_errno(ENOMEM)); 1046 1047 /* 1048 * Loop over subranges of interval [addr : addr + len), recovering 1049 * results internally and then copying them out to caller. Subrange 1050 * is based on the size of MC_CACHE, defined above. 1051 */ 1052 for (ea = addr + len; addr < ea; addr += MC_QUANTUM) { 1053 error = as_incore(as, addr, 1054 (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl); 1055 if (rl != 0) { 1056 rl = (rl + PAGESIZE - 1) / PAGESIZE; 1057 if (copyout(vec, vecp, rl) != 0) 1058 return (set_errno(EFAULT)); 1059 vecp += rl; 1060 } 1061 if (error != 0) 1062 return (set_errno(ENOMEM)); 1063 } 1064 return (0); 1065 } 1066