/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static uint_t mem_getpgszc(size_t); /* * Memory control operations */ int memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask) { struct as *as = ttoproc(curthread)->p_as; struct proc *p = ttoproc(curthread); size_t pgsz; uint_t szc, oszc, pgcmd; int error = 0; faultcode_t fc; uintptr_t iarg; STRUCT_DECL(memcntl_mha, mha); if (mask) return (set_errno(EINVAL)); if ((cmd == MC_LOCKAS) || (cmd == MC_UNLOCKAS)) { if ((addr != 0) || (len != 0)) { return (set_errno(EINVAL)); } } else if (cmd != MC_HAT_ADVISE) { if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) { return (set_errno(EINVAL)); } /* * We're only concerned with the address range * here, not the protections. The protections * are only used as a "filter" in this code, * they aren't set or modified here. */ if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) { return (set_errno(ENOMEM)); } } if (cmd == MC_HAT_ADVISE) { if (attr != 0 || mask != 0) { return (set_errno(EINVAL)); } } else { if ((VALID_ATTR & attr) != attr) { return (set_errno(EINVAL)); } if ((attr & SHARED) && (attr & PRIVATE)) { return (set_errno(EINVAL)); } if (((cmd == MC_LOCKAS) || (cmd == MC_LOCK) || (cmd == MC_UNLOCKAS) || (cmd == MC_UNLOCK)) && (error = secpolicy_lock_memory(CRED())) != 0) return (set_errno(error)); } if (attr) { attr |= PROT_USER; } switch (cmd) { case MC_SYNC: /* * MS_SYNC used to be defined to be zero but is now non-zero. * For binary compatibility we still accept zero * (the absence of MS_ASYNC) to mean the same thing. */ iarg = (uintptr_t)arg; if ((iarg & ~MS_INVALIDATE) == 0) iarg |= MS_SYNC; if (((iarg & ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE)) != 0) || ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))) { error = set_errno(EINVAL); } else { error = as_ctl(as, addr, len, cmd, attr, iarg, NULL, 0); if (error) { (void) set_errno(error); } } return (error); case MC_LOCKAS: if ((uintptr_t)arg & ~(MCL_FUTURE|MCL_CURRENT) || (uintptr_t)arg == 0) { return (set_errno(EINVAL)); } break; case MC_LOCK: case MC_UNLOCKAS: case MC_UNLOCK: break; case MC_HAT_ADVISE: /* * Set prefered page size. */ STRUCT_INIT(mha, get_udatamodel()); if (copyin(arg, STRUCT_BUF(mha), STRUCT_SIZE(mha))) { return (set_errno(EFAULT)); } pgcmd = STRUCT_FGET(mha, mha_cmd); /* * Currently only MHA_MAPSIZE_VA, MHA_MAPSIZE_STACK * and MHA_MAPSIZE_BSSBRK are supported. Only one * command may be specified at a time. */ if ((~(MHA_MAPSIZE_VA|MHA_MAPSIZE_STACK|MHA_MAPSIZE_BSSBRK) & pgcmd) || pgcmd == 0 || !ISP2(pgcmd) || STRUCT_FGET(mha, mha_flags)) return (set_errno(EINVAL)); pgsz = STRUCT_FGET(mha, mha_pagesize); /* * call platform specific map_pgsz() routine to get the * optimal pgsz if pgsz is 0. * * For stack and heap operations addr and len must be zero. */ if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) { if (addr != NULL || len != 0) { return (set_errno(EINVAL)); } /* * Disable autompss for this process unless pgsz == 0, * which means the system should pick. In the * pgsz == 0 case, leave the SAUTOLPG setting alone, as * we don't want to enable it when someone has * disabled automatic large page selection for the * whole system. */ mutex_enter(&p->p_lock); if (pgsz != 0) { p->p_flag &= ~SAUTOLPG; } mutex_exit(&p->p_lock); as_rangelock(as); if (pgsz == 0) { int type; if (pgcmd == MHA_MAPSIZE_BSSBRK) type = MAPPGSZ_HEAP; else type = MAPPGSZ_STK; pgsz = map_pgsz(type, p, 0, 0, 1); } } else { /* * addr and len must be valid for range specified. */ if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) { return (set_errno(ENOMEM)); } /* * Note that we don't disable automatic large page * selection for anon segments based on use of * memcntl(). */ if (pgsz == 0) { error = as_set_default_lpsize(as, addr, len); if (error) { (void) set_errno(error); } return (error); } /* * addr and len must be prefered page size aligned */ if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(len, pgsz)) { return (set_errno(EINVAL)); } } szc = mem_getpgszc(pgsz); if (szc == (uint_t)-1) { if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) { as_rangeunlock(as); } return (set_errno(EINVAL)); } /* * For stack and heap operations we first need to pad * out existing range (create new mappings) to the new * prefered page size boundary. Also the start of the * .bss for the heap or user's stack base may not be on * the new prefered page size boundary. For these cases * we align the base of the request on the new prefered * page size. */ if (pgcmd & MHA_MAPSIZE_BSSBRK) { if (szc == p->p_brkpageszc) { as_rangeunlock(as); return (0); } if (szc > p->p_brkpageszc) { error = brk_internal(p->p_brkbase + p->p_brksize, szc); if (error) { as_rangeunlock(as); return (set_errno(error)); } } /* * It is possible for brk_internal to silently fail to * promote the heap size, so don't panic or ASSERT. */ if (!IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz)) { as_rangeunlock(as); return (set_errno(ENOMEM)); } oszc = p->p_brkpageszc; p->p_brkpageszc = szc; addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz); len = (p->p_brkbase + p->p_brksize) - addr; ASSERT(IS_P2ALIGNED(len, pgsz)); /* * Perhaps no existing pages to promote. */ if (len == 0) { as_rangeunlock(as); return (0); } } /* * The code below, as does grow.c, assumes stacks always grow * downward. */ if (pgcmd & MHA_MAPSIZE_STACK) { /* * Some boxes (x86) have a top of stack that * is not large page aligned. Since stacks are * usually small we'll just return and do nothing * for theses cases. Prefeered page size is advisory * so no need to return an error. */ if (szc == p->p_stkpageszc || !IS_P2ALIGNED(p->p_usrstack, pgsz)) { as_rangeunlock(as); return (0); } if (szc > p->p_stkpageszc) { error = grow_internal(p->p_usrstack - p->p_stksize, szc); if (error) { as_rangeunlock(as); return (set_errno(error)); } } /* * It is possible for grow_internal to silently fail to * promote the stack size, so don't panic or ASSERT. */ if (!IS_P2ALIGNED(p->p_usrstack - p->p_stksize, pgsz)) { as_rangeunlock(as); return (set_errno(ENOMEM)); } oszc = p->p_stkpageszc; p->p_stkpageszc = szc; addr = p->p_usrstack - p->p_stksize; len = p->p_stksize; /* * Perhaps nothing to promote, we wrapped around * or grow did not not grow the stack to a large * page boundary. */ if (!IS_P2ALIGNED(len, pgsz) || len == 0 || addr >= p->p_usrstack || (addr + len) < addr) { as_rangeunlock(as); return (0); } } ASSERT(IS_P2ALIGNED(addr, pgsz)); ASSERT(IS_P2ALIGNED(len, pgsz)); error = as_setpagesize(as, addr, len, szc, B_TRUE); /* * On stack or heap failures restore original * pg size code. */ if (error) { if ((pgcmd & MHA_MAPSIZE_BSSBRK) != 0) { p->p_brkpageszc = oszc; } if ((pgcmd & MHA_MAPSIZE_STACK) != 0) { p->p_stkpageszc = oszc; } (void) set_errno(error); } if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) { as_rangeunlock(as); } return (error); case MC_ADVISE: if ((uintptr_t)arg == MADV_FREE) { len &= PAGEMASK; } switch ((uintptr_t)arg) { case MADV_WILLNEED: fc = as_faulta(as, addr, len); if (fc) { if (FC_CODE(fc) == FC_OBJERR) error = set_errno(FC_ERRNO(fc)); else if (FC_CODE(fc) == FC_NOMAP) error = set_errno(ENOMEM); else error = set_errno(EINVAL); return (error); } break; case MADV_DONTNEED: /* * For now, don't need is turned into an as_ctl(MC_SYNC) * operation flagged for async invalidate. */ error = as_ctl(as, addr, len, MC_SYNC, attr, MS_ASYNC | MS_INVALIDATE, NULL, 0); if (error) (void) set_errno(error); return (error); default: error = as_ctl(as, addr, len, cmd, attr, (uintptr_t)arg, NULL, 0); if (error) (void) set_errno(error); return (error); } break; default: return (set_errno(EINVAL)); } error = as_ctl(as, addr, len, cmd, attr, (uintptr_t)arg, NULL, 0); if (error) (void) set_errno(error); return (error); } /* * Return page size code for page size passed in. If * matching page size not found or supported, return -1. */ static uint_t mem_getpgszc(size_t pgsz) { return ((uint_t)page_szc_user_filtered(pgsz)); }