1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/types.h> 33 #include <sys/inttypes.h> 34 #include <sys/param.h> 35 #include <sys/sysmacros.h> 36 #include <sys/systm.h> 37 #include <sys/signal.h> 38 #include <sys/user.h> 39 #include <sys/errno.h> 40 #include <sys/var.h> 41 #include <sys/proc.h> 42 #include <sys/tuneable.h> 43 #include <sys/debug.h> 44 #include <sys/cmn_err.h> 45 #include <sys/cred.h> 46 #include <sys/vnode.h> 47 #include <sys/vfs.h> 48 #include <sys/vm.h> 49 #include <sys/file.h> 50 #include <sys/mman.h> 51 #include <sys/vmparam.h> 52 #include <sys/fcntl.h> 53 #include <sys/lwpchan_impl.h> 54 55 #include <vm/hat.h> 56 #include <vm/as.h> 57 #include <vm/seg.h> 58 #include <vm/seg_dev.h> 59 #include <vm/seg_vn.h> 60 61 int use_brk_lpg = 1; 62 int use_stk_lpg = 1; 63 int use_zmap_lpg = 1; 64 65 static int brk_lpg(caddr_t nva); 66 static int grow_lpg(caddr_t sp); 67 68 int 69 brk(caddr_t nva) 70 { 71 int error; 72 proc_t *p = curproc; 73 74 /* 75 * Serialize brk operations on an address space. 76 * This also serves as the lock protecting p_brksize 77 * and p_brkpageszc. 78 */ 79 as_rangelock(p->p_as); 80 if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) { 81 error = brk_lpg(nva); 82 } else { 83 error = brk_internal(nva, p->p_brkpageszc); 84 } 85 as_rangeunlock(p->p_as); 86 return ((error != 0 ? set_errno(error) : 0)); 87 } 88 89 /* 90 * Algorithm: call arch-specific map_pgsz to get best page size to use, 91 * then call brk_internal(). 92 * Returns 0 on success. 93 */ 94 static int 95 brk_lpg(caddr_t nva) 96 { 97 struct proc *p = curproc; 98 size_t pgsz, len; 99 caddr_t addr; 100 caddr_t bssbase = p->p_bssbase; 101 caddr_t brkbase = p->p_brkbase; 102 int oszc, szc; 103 int err; 104 int remap = 0; 105 106 oszc = p->p_brkpageszc; 107 108 /* 109 * If p_brkbase has not yet been set, the first call 110 * to brk_internal() will initialize it. 111 */ 112 if (brkbase == 0) { 113 return (brk_internal(nva, oszc)); 114 } 115 116 len = nva - bssbase; 117 118 pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, &remap); 119 szc = page_szc(pgsz); 120 121 /* 122 * Covers two cases: 123 * 1. page_szc() returns -1 for invalid page size, so we want to 124 * ignore it in that case. 125 * 2. By design we never decrease page size, as it is more stable. 126 */ 127 if (szc <= oszc) { 128 err = brk_internal(nva, oszc); 129 /* If failed, back off to base page size. */ 130 if (err != 0 && oszc != 0) { 131 err = brk_internal(nva, 0); 132 } 133 return (err); 134 } 135 136 if (remap == 0) { 137 /* 138 * Map from the current brk end up to the new page size 139 * alignment using the current page size. 140 */ 141 addr = brkbase + p->p_brksize; 142 addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 143 if (addr < nva) { 144 err = brk_internal(addr, oszc); 145 /* 146 * In failure case, try again if oszc is not base page 147 * size, then return err. 148 */ 149 if (err != 0) { 150 if (oszc != 0) { 151 err = brk_internal(nva, 0); 152 } 153 return (err); 154 } 155 } 156 } 157 158 err = brk_internal(nva, szc); 159 /* If using szc failed, map with base page size and return. */ 160 if (err != 0) { 161 if (szc != 0) { 162 err = brk_internal(nva, 0); 163 } 164 return (err); 165 } 166 167 if (remap != 0) { 168 /* 169 * Round up brk base to a large page boundary and remap 170 * anything in the segment already faulted in beyond that 171 * point. 172 */ 173 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz); 174 len = (brkbase + p->p_brksize) - addr; 175 /* advisory, so ignore errors */ 176 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); 177 } 178 179 ASSERT(err == 0); 180 return (err); /* should always be 0 */ 181 } 182 183 /* 184 * Returns 0 on success. 185 */ 186 int 187 brk_internal(caddr_t nva, uint_t brkszc) 188 { 189 caddr_t ova; /* current break address */ 190 size_t size; 191 int error; 192 struct proc *p = curproc; 193 struct as *as = p->p_as; 194 size_t pgsz; 195 uint_t szc; 196 rctl_qty_t as_rctl; 197 198 /* 199 * extend heap to brkszc alignment but use current p->p_brkpageszc 200 * for the newly created segment. This allows the new extension 201 * segment to be concatenated successfully with the existing brk 202 * segment. 203 */ 204 if ((szc = brkszc) != 0) { 205 pgsz = page_get_pagesize(szc); 206 ASSERT(pgsz > PAGESIZE); 207 } else { 208 pgsz = PAGESIZE; 209 } 210 211 mutex_enter(&p->p_lock); 212 as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA], 213 p->p_rctls, p); 214 mutex_exit(&p->p_lock); 215 216 /* 217 * If p_brkbase has not yet been set, the first call 218 * to brk() will initialize it. 219 */ 220 if (p->p_brkbase == 0) 221 p->p_brkbase = nva; 222 223 /* 224 * Before multiple page size support existed p_brksize was the value 225 * not rounded to the pagesize (i.e. it stored the exact user request 226 * for heap size). If pgsz is greater than PAGESIZE calculate the 227 * heap size as the real new heap size by rounding it up to pgsz. 228 * This is useful since we may want to know where the heap ends 229 * without knowing heap pagesize (e.g. some old code) and also if 230 * heap pagesize changes we can update p_brkpageszc but delay adding 231 * new mapping yet still know from p_brksize where the heap really 232 * ends. The user requested heap end is stored in libc variable. 233 */ 234 if (pgsz > PAGESIZE) { 235 caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); 236 size = tnva - p->p_brkbase; 237 if (tnva < p->p_brkbase || (size > p->p_brksize && 238 size > (size_t)as_rctl)) { 239 szc = 0; 240 pgsz = PAGESIZE; 241 size = nva - p->p_brkbase; 242 } 243 } else { 244 size = nva - p->p_brkbase; 245 } 246 247 /* 248 * use PAGESIZE to roundup ova because we want to know the real value 249 * of the current heap end in case p_brkpageszc changes since the last 250 * p_brksize was computed. 251 */ 252 nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); 253 ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize), 254 PAGESIZE); 255 256 if ((nva < p->p_brkbase) || (size > p->p_brksize && 257 size > as_rctl)) { 258 mutex_enter(&p->p_lock); 259 (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p, 260 RCA_SAFE); 261 mutex_exit(&p->p_lock); 262 return (ENOMEM); 263 } 264 265 if (nva > ova) { 266 struct segvn_crargs crargs = 267 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 268 269 if (!(p->p_datprot & PROT_EXEC)) { 270 crargs.prot &= ~PROT_EXEC; 271 } 272 273 /* 274 * Add new zfod mapping to extend UNIX data segment 275 */ 276 crargs.szc = szc; 277 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP; 278 error = as_map(as, ova, (size_t)(nva - ova), segvn_create, 279 &crargs); 280 if (error) { 281 return (error); 282 } 283 284 } else if (nva < ova) { 285 /* 286 * Release mapping to shrink UNIX data segment. 287 */ 288 (void) as_unmap(as, nva, (size_t)(ova - nva)); 289 } 290 p->p_brksize = size; 291 p->p_brkpageszc = szc; 292 return (0); 293 } 294 295 /* 296 * Grow the stack to include sp. Return 1 if successful, 0 otherwise. 297 * This routine assumes that the stack grows downward. 298 */ 299 int 300 grow(caddr_t sp) 301 { 302 struct proc *p = curproc; 303 int err; 304 305 /* 306 * Serialize grow operations on an address space. 307 * This also serves as the lock protecting p_stksize 308 * and p_stkpageszc. 309 */ 310 as_rangelock(p->p_as); 311 if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) { 312 err = grow_lpg(sp); 313 } else { 314 err = grow_internal(sp, p->p_stkpageszc); 315 } 316 as_rangeunlock(p->p_as); 317 return ((err == 0 ? 1 : 0)); 318 } 319 320 /* 321 * Algorithm: call arch-specific map_pgsz to get best page size to use, 322 * then call grow_internal(). 323 * Returns 0 on success. 324 */ 325 static int 326 grow_lpg(caddr_t sp) 327 { 328 struct proc *p = curproc; 329 size_t pgsz; 330 size_t len, newsize; 331 caddr_t addr, oldsp; 332 int oszc, szc; 333 int err; 334 int remap = 0; 335 336 newsize = p->p_usrstack - sp; 337 338 oszc = p->p_stkpageszc; 339 pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, &remap); 340 szc = page_szc(pgsz); 341 342 /* 343 * Covers two cases: 344 * 1. page_szc() returns -1 for invalid page size, so we want to 345 * ignore it in that case. 346 * 2. By design we never decrease page size, as it is more stable. 347 * This shouldn't happen as the stack never shrinks. 348 */ 349 if (szc <= oszc) { 350 err = grow_internal(sp, oszc); 351 /* failed, fall back to base page size */ 352 if (err != 0 && oszc != 0) { 353 err = grow_internal(sp, 0); 354 } 355 return (err); 356 } 357 358 /* 359 * We've grown sufficiently to switch to a new page size. 360 * If we're not going to remap the whole segment with the new 361 * page size, split the grow into two operations: map to the new 362 * page size alignment boundary with the existing page size, then 363 * map the rest with the new page size. 364 */ 365 err = 0; 366 if (remap == 0) { 367 oldsp = p->p_usrstack - p->p_stksize; 368 addr = (caddr_t)P2ALIGN((uintptr_t)oldsp, pgsz); 369 if (addr > sp) { 370 err = grow_internal(addr, oszc); 371 /* 372 * In this case, grow with oszc failed, so grow all the 373 * way to sp with base page size. 374 */ 375 if (err != 0) { 376 if (oszc != 0) { 377 err = grow_internal(sp, 0); 378 } 379 return (err); 380 } 381 } 382 } 383 384 err = grow_internal(sp, szc); 385 /* The grow with szc failed, so fall back to base page size. */ 386 if (err != 0) { 387 if (szc != 0) { 388 err = grow_internal(sp, 0); 389 } 390 return (err); 391 } 392 393 if (remap) { 394 /* 395 * Round up stack pointer to a large page boundary and remap 396 * any pgsz pages in the segment already faulted in beyond that 397 * point. 398 */ 399 addr = p->p_usrstack - p->p_stksize; 400 addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 401 len = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz) - addr; 402 /* advisory, so ignore errors */ 403 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); 404 } 405 406 /* Update page size code for stack. */ 407 p->p_stkpageszc = szc; 408 409 ASSERT(err == 0); 410 return (err); /* should always be 0 */ 411 } 412 413 /* 414 * This routine assumes that the stack grows downward. 415 * Returns 0 on success, errno on failure. 416 */ 417 int 418 grow_internal(caddr_t sp, uint_t growszc) 419 { 420 struct proc *p = curproc; 421 struct as *as = p->p_as; 422 size_t newsize = p->p_usrstack - sp; 423 size_t oldsize; 424 int error; 425 size_t pgsz; 426 uint_t szc; 427 struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 428 429 ASSERT(sp < p->p_usrstack); 430 431 /* 432 * grow to growszc alignment but use current p->p_stkpageszc for 433 * the segvn_crargs szc passed to segvn_create. For memcntl to 434 * increase the szc, this allows the new extension segment to be 435 * concatenated successfully with the existing stack segment. 436 */ 437 if ((szc = growszc) != 0) { 438 pgsz = page_get_pagesize(szc); 439 ASSERT(pgsz > PAGESIZE); 440 newsize = P2ROUNDUP(newsize, pgsz); 441 if (newsize > (size_t)p->p_stk_ctl) { 442 szc = 0; 443 pgsz = PAGESIZE; 444 newsize = p->p_usrstack - sp; 445 } 446 } else { 447 pgsz = PAGESIZE; 448 } 449 450 if (newsize > (size_t)p->p_stk_ctl) { 451 (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p, 452 RCA_UNSAFE_ALL); 453 454 return (ENOMEM); 455 } 456 457 oldsize = p->p_stksize; 458 newsize = P2ROUNDUP(newsize, pgsz); 459 ASSERT(P2PHASE(oldsize, PAGESIZE) == 0); 460 461 if (newsize <= oldsize) { /* prevent the stack from shrinking */ 462 return (0); 463 } 464 465 if (!(p->p_stkprot & PROT_EXEC)) { 466 crargs.prot &= ~PROT_EXEC; 467 } 468 /* 469 * extend stack with the p_stkpageszc. growszc is different than 470 * p_stkpageszc only on a memcntl to increase the stack pagesize. 471 */ 472 crargs.szc = p->p_stkpageszc; 473 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN; 474 475 if ((error = as_map(as, p->p_usrstack - newsize, newsize - oldsize, 476 segvn_create, &crargs)) != 0) { 477 if (error == EAGAIN) { 478 cmn_err(CE_WARN, "Sorry, no swap space to grow stack " 479 "for pid %d (%s)", p->p_pid, u.u_comm); 480 } 481 return (error); 482 } 483 p->p_stksize = newsize; 484 485 486 /* 487 * Set up translations so the process doesn't have to fault in 488 * the stack pages we just gave it. 489 */ 490 (void) as_fault(as->a_hat, as, 491 p->p_usrstack - newsize, newsize - oldsize, F_INVAL, S_WRITE); 492 493 return (0); 494 } 495 496 /* 497 * Used for MAP_ANON - fast way to get anonymous pages 498 */ 499 static int 500 zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags, 501 offset_t pos) 502 { 503 struct segvn_crargs a, b; 504 struct proc *p = curproc; 505 int err; 506 size_t pgsz; 507 size_t l0, l1, l2, l3, l4; /* 0th through 5th chunks */ 508 caddr_t ruaddr, ruaddr0; /* rounded up addresses */ 509 extern size_t auto_lpg_va_default; 510 511 if (((PROT_ALL & uprot) != uprot)) 512 return (EACCES); 513 514 if ((flags & MAP_FIXED) != 0) { 515 caddr_t userlimit; 516 517 /* 518 * Use the user address. First verify that 519 * the address to be used is page aligned. 520 * Then make some simple bounds checks. 521 */ 522 if (((uintptr_t)*addrp & PAGEOFFSET) != 0) 523 return (EINVAL); 524 525 userlimit = flags & _MAP_LOW32 ? 526 (caddr_t)USERLIMIT32 : as->a_userlimit; 527 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) { 528 case RANGE_OKAY: 529 break; 530 case RANGE_BADPROT: 531 return (ENOTSUP); 532 case RANGE_BADADDR: 533 default: 534 return (ENOMEM); 535 } 536 (void) as_unmap(as, *addrp, len); 537 } else { 538 /* 539 * No need to worry about vac alignment for anonymous 540 * pages since this is a "clone" object that doesn't 541 * yet exist. 542 */ 543 map_addr(addrp, len, pos, 0, flags); 544 if (*addrp == NULL) 545 return (ENOMEM); 546 } 547 548 /* 549 * Use the seg_vn segment driver; passing in the NULL amp 550 * gives the desired "cloning" effect. 551 */ 552 a.vp = NULL; 553 a.offset = 0; 554 a.type = flags & MAP_TYPE; 555 a.prot = uprot; 556 a.maxprot = PROT_ALL; 557 a.flags = flags & ~MAP_TYPE; 558 a.cred = CRED(); 559 a.amp = NULL; 560 a.szc = 0; 561 a.lgrp_mem_policy_flags = 0; 562 563 /* 564 * Call arch-specific map_pgsz routine to pick best page size to map 565 * this segment, and break the mapping up into parts if required. 566 * 567 * The parts work like this: 568 * 569 * addr --------- 570 * | | l0 571 * --------- 572 * | | l1 573 * --------- 574 * | | l2 575 * --------- 576 * | | l3 577 * --------- 578 * | | l4 579 * --------- 580 * addr+len 581 * 582 * Starting from the middle, l2 is the number of bytes mapped by the 583 * selected large page. l1 and l3 are mapped by auto_lpg_va_default 584 * page size pages, and l0 and l4 are mapped by base page size pages. 585 * If auto_lpg_va_default is the base page size, then l0 == l4 == 0. 586 * If the requested address or length are aligned to the selected large 587 * page size, l1 or l3 may also be 0. 588 */ 589 if (use_zmap_lpg && a.type == MAP_PRIVATE) { 590 591 pgsz = map_pgsz(MAPPGSZ_VA, p, *addrp, len, NULL); 592 if (pgsz <= PAGESIZE || len < pgsz) { 593 return (as_map(as, *addrp, len, segvn_create, &a)); 594 } 595 596 ruaddr = (caddr_t)P2ROUNDUP((uintptr_t)*addrp, pgsz); 597 if (auto_lpg_va_default != MMU_PAGESIZE) { 598 ruaddr0 = (caddr_t)P2ROUNDUP((uintptr_t)*addrp, 599 auto_lpg_va_default); 600 l0 = ruaddr0 - *addrp; 601 } else { 602 l0 = 0; 603 ruaddr0 = *addrp; 604 } 605 l1 = ruaddr - ruaddr0; 606 l3 = P2PHASE(len - l0 - l1, pgsz); 607 if (auto_lpg_va_default == MMU_PAGESIZE) { 608 l4 = 0; 609 } else { 610 l4 = P2PHASE(l3, auto_lpg_va_default); 611 l3 -= l4; 612 } 613 l2 = len - l0 - l1 - l3 - l4; 614 615 if (l0) { 616 b = a; 617 err = as_map(as, *addrp, l0, segvn_create, &b); 618 if (err) { 619 return (err); 620 } 621 } 622 623 if (l1) { 624 b = a; 625 b.szc = page_szc(auto_lpg_va_default); 626 err = as_map(as, ruaddr0, l1, segvn_create, &b); 627 if (err) { 628 goto error1; 629 } 630 } 631 632 if (l2) { 633 b = a; 634 b.szc = page_szc(pgsz); 635 err = as_map(as, ruaddr, l2, segvn_create, &b); 636 if (err) { 637 goto error2; 638 } 639 } 640 641 if (l3) { 642 b = a; 643 b.szc = page_szc(auto_lpg_va_default); 644 err = as_map(as, ruaddr + l2, l3, segvn_create, &b); 645 if (err) { 646 goto error3; 647 } 648 } 649 if (l4) { 650 err = as_map(as, ruaddr + l2 + l3, l4, segvn_create, 651 &a); 652 if (err) { 653 error3: 654 if (l3) { 655 (void) as_unmap(as, ruaddr + l2, l3); 656 } 657 error2: 658 if (l2) { 659 (void) as_unmap(as, ruaddr, l2); 660 } 661 error1: 662 if (l1) { 663 (void) as_unmap(as, ruaddr0, l1); 664 } 665 if (l0) { 666 (void) as_unmap(as, *addrp, l0); 667 } 668 return (err); 669 } 670 } 671 672 return (0); 673 } 674 675 return (as_map(as, *addrp, len, segvn_create, &a)); 676 } 677 678 static int 679 smmap_common(caddr_t *addrp, size_t len, 680 int prot, int flags, struct file *fp, offset_t pos) 681 { 682 struct vnode *vp; 683 struct as *as = curproc->p_as; 684 uint_t uprot, maxprot, type; 685 int error; 686 687 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW | 688 _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN | 689 MAP_TEXT | MAP_INITDATA)) != 0) { 690 /* | MAP_RENAME */ /* not implemented, let user know */ 691 return (EINVAL); 692 } 693 694 if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) { 695 return (EINVAL); 696 } 697 698 if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) { 699 return (EINVAL); 700 } 701 702 #if defined(__sparc) 703 /* 704 * See if this is an "old mmap call". If so, remember this 705 * fact and convert the flags value given to mmap to indicate 706 * the specified address in the system call must be used. 707 * _MAP_NEW is turned set by all new uses of mmap. 708 */ 709 if ((flags & _MAP_NEW) == 0) 710 flags |= MAP_FIXED; 711 #endif 712 flags &= ~_MAP_NEW; 713 714 type = flags & MAP_TYPE; 715 if (type != MAP_PRIVATE && type != MAP_SHARED) 716 return (EINVAL); 717 718 719 if (flags & MAP_ALIGN) { 720 721 if (flags & MAP_FIXED) 722 return (EINVAL); 723 724 /* alignment needs to be a power of 2 >= page size */ 725 if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) || 726 !ISP2((uintptr_t)*addrp)) 727 return (EINVAL); 728 } 729 /* 730 * Check for bad lengths and file position. 731 * We let the VOP_MAP routine check for negative lengths 732 * since on some vnode types this might be appropriate. 733 */ 734 if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0) 735 return (EINVAL); 736 737 maxprot = PROT_ALL; /* start out allowing all accesses */ 738 uprot = prot | PROT_USER; 739 740 if (fp == NULL) { 741 ASSERT(flags & MAP_ANON); 742 as_rangelock(as); 743 error = zmap(as, addrp, len, uprot, flags, pos); 744 as_rangeunlock(as); 745 return (error); 746 } else if ((flags & MAP_ANON) != 0) 747 return (EINVAL); 748 749 vp = fp->f_vnode; 750 751 /* Can't execute code from "noexec" mounted filesystem. */ 752 if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0) 753 maxprot &= ~PROT_EXEC; 754 755 /* 756 * These checks were added as part of large files. 757 * 758 * Return ENXIO if the initial position is negative; return EOVERFLOW 759 * if (offset + len) would overflow the maximum allowed offset for the 760 * type of file descriptor being used. 761 */ 762 if (vp->v_type == VREG) { 763 if (pos < 0) 764 return (ENXIO); 765 if ((offset_t)len > (OFFSET_MAX(fp) - pos)) 766 return (EOVERFLOW); 767 } 768 769 if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) { 770 /* no write access allowed */ 771 maxprot &= ~PROT_WRITE; 772 } 773 774 /* 775 * XXX - Do we also adjust maxprot based on protections 776 * of the vnode? E.g. if no execute permission is given 777 * on the vnode for the current user, maxprot probably 778 * should disallow PROT_EXEC also? This is different 779 * from the write access as this would be a per vnode 780 * test as opposed to a per fd test for writability. 781 */ 782 783 /* 784 * Verify that the specified protections are not greater than 785 * the maximum allowable protections. Also test to make sure 786 * that the file descriptor does allows for read access since 787 * "write only" mappings are hard to do since normally we do 788 * the read from the file before the page can be written. 789 */ 790 if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0) 791 return (EACCES); 792 793 /* 794 * If the user specified an address, do some simple checks here 795 */ 796 if ((flags & MAP_FIXED) != 0) { 797 caddr_t userlimit; 798 799 /* 800 * Use the user address. First verify that 801 * the address to be used is page aligned. 802 * Then make some simple bounds checks. 803 */ 804 if (((uintptr_t)*addrp & PAGEOFFSET) != 0) 805 return (EINVAL); 806 807 userlimit = flags & _MAP_LOW32 ? 808 (caddr_t)USERLIMIT32 : as->a_userlimit; 809 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) { 810 case RANGE_OKAY: 811 break; 812 case RANGE_BADPROT: 813 return (ENOTSUP); 814 case RANGE_BADADDR: 815 default: 816 return (ENOMEM); 817 } 818 } 819 820 821 /* 822 * Ok, now let the vnode map routine do its thing to set things up. 823 */ 824 error = VOP_MAP(vp, pos, as, 825 addrp, len, uprot, maxprot, flags, fp->f_cred); 826 827 if (error == 0) { 828 if (vp->v_type == VREG && 829 (flags & (MAP_TEXT | MAP_INITDATA)) != 0) { 830 /* 831 * Mark this as an executable vnode 832 */ 833 mutex_enter(&vp->v_lock); 834 vp->v_flag |= VVMEXEC; 835 mutex_exit(&vp->v_lock); 836 } 837 } 838 839 return (error); 840 } 841 842 #ifdef _LP64 843 /* 844 * LP64 mmap(2) system call: 64-bit offset, 64-bit address. 845 * 846 * The "large file" mmap routine mmap64(2) is also mapped to this routine 847 * by the 64-bit version of libc. 848 * 849 * Eventually, this should be the only version, and have smmap_common() 850 * folded back into it again. Some day. 851 */ 852 caddr_t 853 smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos) 854 { 855 struct file *fp; 856 int error; 857 858 if (flags & _MAP_LOW32) 859 error = EINVAL; 860 else if (fd == -1 && (flags & MAP_ANON) != 0) 861 error = smmap_common(&addr, len, prot, flags, 862 NULL, (offset_t)pos); 863 else if ((fp = getf(fd)) != NULL) { 864 error = smmap_common(&addr, len, prot, flags, 865 fp, (offset_t)pos); 866 releasef(fd); 867 } else 868 error = EBADF; 869 870 return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr); 871 } 872 #endif /* _LP64 */ 873 874 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 875 876 /* 877 * ILP32 mmap(2) system call: 32-bit offset, 32-bit address. 878 */ 879 caddr_t 880 smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos) 881 { 882 struct file *fp; 883 int error; 884 caddr_t a = (caddr_t)(uintptr_t)addr; 885 886 if (flags & _MAP_LOW32) 887 error = EINVAL; 888 else if (fd == -1 && (flags & MAP_ANON) != 0) 889 error = smmap_common(&a, (size_t)len, prot, 890 flags | _MAP_LOW32, NULL, (offset_t)pos); 891 else if ((fp = getf(fd)) != NULL) { 892 error = smmap_common(&a, (size_t)len, prot, 893 flags | _MAP_LOW32, fp, (offset_t)pos); 894 releasef(fd); 895 } else 896 error = EBADF; 897 898 ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX); 899 900 return (error ? (caddr_t)(uintptr_t)set_errno(error) : a); 901 } 902 903 /* 904 * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address. 905 * 906 * Now things really get ugly because we can't use the C-style 907 * calling convention for more than 6 args, and 64-bit parameter 908 * passing on 32-bit systems is less than clean. 909 */ 910 911 struct mmaplf32a { 912 caddr_t addr; 913 size_t len; 914 #ifdef _LP64 915 /* 916 * 32-bit contents, 64-bit cells 917 */ 918 uint64_t prot; 919 uint64_t flags; 920 uint64_t fd; 921 uint64_t offhi; 922 uint64_t offlo; 923 #else 924 /* 925 * 32-bit contents, 32-bit cells 926 */ 927 uint32_t prot; 928 uint32_t flags; 929 uint32_t fd; 930 uint32_t offhi; 931 uint32_t offlo; 932 #endif 933 }; 934 935 int 936 smmaplf32(struct mmaplf32a *uap, rval_t *rvp) 937 { 938 struct file *fp; 939 int error; 940 caddr_t a = uap->addr; 941 int flags = (int)uap->flags; 942 int fd = (int)uap->fd; 943 #ifdef _BIG_ENDIAN 944 offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo; 945 #else 946 offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi; 947 #endif 948 949 if (flags & _MAP_LOW32) 950 error = EINVAL; 951 else if (fd == -1 && (flags & MAP_ANON) != 0) 952 error = smmap_common(&a, uap->len, (int)uap->prot, 953 flags | _MAP_LOW32, NULL, off); 954 else if ((fp = getf(fd)) != NULL) { 955 error = smmap_common(&a, uap->len, (int)uap->prot, 956 flags | _MAP_LOW32, fp, off); 957 releasef(fd); 958 } else 959 error = EBADF; 960 961 if (error == 0) 962 rvp->r_val1 = (uintptr_t)a; 963 return (error); 964 } 965 966 #endif /* _SYSCALL32_IMPL || _ILP32 */ 967 968 int 969 munmap(caddr_t addr, size_t len) 970 { 971 struct proc *p = curproc; 972 struct as *as = p->p_as; 973 974 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) 975 return (set_errno(EINVAL)); 976 977 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) 978 return (set_errno(EINVAL)); 979 980 /* 981 * Discard lwpchan mappings. 982 */ 983 if (p->p_lcp != NULL) 984 lwpchan_delete_mapping(p, addr, addr + len); 985 if (as_unmap(as, addr, len) != 0) 986 return (set_errno(EINVAL)); 987 988 return (0); 989 } 990 991 int 992 mprotect(caddr_t addr, size_t len, int prot) 993 { 994 struct as *as = curproc->p_as; 995 uint_t uprot = prot | PROT_USER; 996 int error; 997 998 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) 999 return (set_errno(EINVAL)); 1000 1001 switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) { 1002 case RANGE_OKAY: 1003 break; 1004 case RANGE_BADPROT: 1005 return (set_errno(ENOTSUP)); 1006 case RANGE_BADADDR: 1007 default: 1008 return (set_errno(ENOMEM)); 1009 } 1010 1011 error = as_setprot(as, addr, len, uprot); 1012 if (error) 1013 return (set_errno(error)); 1014 return (0); 1015 } 1016 1017 #define MC_CACHE 128 /* internal result buffer */ 1018 #define MC_QUANTUM (MC_CACHE * PAGESIZE) /* addresses covered in loop */ 1019 1020 int 1021 mincore(caddr_t addr, size_t len, char *vecp) 1022 { 1023 struct as *as = curproc->p_as; 1024 caddr_t ea; /* end address of loop */ 1025 size_t rl; /* inner result length */ 1026 char vec[MC_CACHE]; /* local vector cache */ 1027 int error; 1028 model_t model; 1029 long llen; 1030 1031 model = get_udatamodel(); 1032 /* 1033 * Validate form of address parameters. 1034 */ 1035 if (model == DATAMODEL_NATIVE) { 1036 llen = (long)len; 1037 } else { 1038 llen = (int32_t)(size32_t)len; 1039 } 1040 if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0) 1041 return (set_errno(EINVAL)); 1042 1043 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) 1044 return (set_errno(ENOMEM)); 1045 1046 /* 1047 * Loop over subranges of interval [addr : addr + len), recovering 1048 * results internally and then copying them out to caller. Subrange 1049 * is based on the size of MC_CACHE, defined above. 1050 */ 1051 for (ea = addr + len; addr < ea; addr += MC_QUANTUM) { 1052 error = as_incore(as, addr, 1053 (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl); 1054 if (rl != 0) { 1055 rl = (rl + PAGESIZE - 1) / PAGESIZE; 1056 if (copyout(vec, vecp, rl) != 0) 1057 return (set_errno(EFAULT)); 1058 vecp += rl; 1059 } 1060 if (error != 0) 1061 return (set_errno(ENOMEM)); 1062 } 1063 return (0); 1064 } 1065