1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/types.h> 34 #include <sys/bitmap.h> 35 #include <sys/sysmacros.h> 36 #include <sys/kmem.h> 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/user.h> 40 #include <sys/unistd.h> 41 #include <sys/errno.h> 42 #include <sys/proc.h> 43 #include <sys/mman.h> 44 #include <sys/tuneable.h> 45 #include <sys/cmn_err.h> 46 #include <sys/cred.h> 47 #include <sys/vmsystm.h> 48 #include <sys/debug.h> 49 #include <sys/policy.h> 50 51 #include <vm/as.h> 52 #include <vm/seg.h> 53 54 static uint_t mem_getpgszc(size_t); 55 56 /* 57 * Memory control operations 58 */ 59 int 60 memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask) 61 { 62 struct as *as = ttoproc(curthread)->p_as; 63 struct proc *p = ttoproc(curthread); 64 size_t pgsz; 65 uint_t szc, oszc, pgcmd; 66 int error = 0; 67 faultcode_t fc; 68 uintptr_t iarg; 69 STRUCT_DECL(memcntl_mha, mha); 70 71 if (mask) 72 return (set_errno(EINVAL)); 73 if ((cmd == MC_LOCKAS) || (cmd == MC_UNLOCKAS)) { 74 if ((addr != 0) || (len != 0)) { 75 return (set_errno(EINVAL)); 76 } 77 } else if (cmd != MC_HAT_ADVISE) { 78 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) { 79 return (set_errno(EINVAL)); 80 } 81 /* 82 * We're only concerned with the address range 83 * here, not the protections. The protections 84 * are only used as a "filter" in this code, 85 * they aren't set or modified here. 86 */ 87 if (valid_usr_range(addr, len, 0, as, 88 as->a_userlimit) != RANGE_OKAY) { 89 return (set_errno(ENOMEM)); 90 } 91 } 92 93 if (cmd == MC_HAT_ADVISE) { 94 if (attr != 0 || mask != 0) { 95 return (set_errno(EINVAL)); 96 } 97 98 } else { 99 if ((VALID_ATTR & attr) != attr) { 100 return (set_errno(EINVAL)); 101 } 102 if ((attr & SHARED) && (attr & PRIVATE)) { 103 return (set_errno(EINVAL)); 104 } 105 if (((cmd == MC_LOCKAS) || (cmd == MC_LOCK) || 106 (cmd == MC_UNLOCKAS) || (cmd == MC_UNLOCK)) && 107 (error = secpolicy_lock_memory(CRED())) != 0) 108 return (set_errno(error)); 109 } 110 if (attr) { 111 attr |= PROT_USER; 112 } 113 114 switch (cmd) { 115 case MC_SYNC: 116 /* 117 * MS_SYNC used to be defined to be zero but is now non-zero. 118 * For binary compatibility we still accept zero 119 * (the absence of MS_ASYNC) to mean the same thing. 120 */ 121 iarg = (uintptr_t)arg; 122 if ((iarg & ~MS_INVALIDATE) == 0) 123 iarg |= MS_SYNC; 124 125 if (((iarg & ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE)) != 0) || 126 ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))) { 127 error = set_errno(EINVAL); 128 } else { 129 error = as_ctl(as, addr, len, cmd, attr, iarg, NULL, 0); 130 if (error) { 131 (void) set_errno(error); 132 } 133 } 134 return (error); 135 case MC_LOCKAS: 136 if ((uintptr_t)arg & ~(MCL_FUTURE|MCL_CURRENT) || 137 (uintptr_t)arg == 0) { 138 return (set_errno(EINVAL)); 139 } 140 break; 141 case MC_LOCK: 142 case MC_UNLOCKAS: 143 case MC_UNLOCK: 144 break; 145 case MC_HAT_ADVISE: 146 /* 147 * Set prefered page size. 148 */ 149 STRUCT_INIT(mha, get_udatamodel()); 150 if (copyin(arg, STRUCT_BUF(mha), STRUCT_SIZE(mha))) { 151 return (set_errno(EFAULT)); 152 } 153 154 pgcmd = STRUCT_FGET(mha, mha_cmd); 155 156 /* 157 * Currently only MHA_MAPSIZE_VA, MHA_MAPSIZE_STACK 158 * and MHA_MAPSIZE_BSSBRK are supported. Only one 159 * command may be specified at a time. 160 */ 161 if ((~(MHA_MAPSIZE_VA|MHA_MAPSIZE_STACK|MHA_MAPSIZE_BSSBRK) & 162 pgcmd) || pgcmd == 0 || !ISP2(pgcmd) || 163 STRUCT_FGET(mha, mha_flags)) 164 return (set_errno(EINVAL)); 165 166 pgsz = STRUCT_FGET(mha, mha_pagesize); 167 168 /* 169 * call platform specific map_pgsz() routine to get the 170 * optimal pgsz if pgsz is 0. 171 * 172 * For stack and heap operations addr and len must be zero. 173 */ 174 if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) { 175 if (addr != NULL || len != 0) { 176 return (set_errno(EINVAL)); 177 } 178 179 /* 180 * Disable autompss for this process unless pgsz == 0, 181 * which means the system should pick. In the 182 * pgsz == 0 case, leave the SAUTOLPG setting alone, as 183 * we don't want to enable it when someone has 184 * disabled automatic large page selection for the 185 * whole system. 186 */ 187 mutex_enter(&p->p_lock); 188 if (pgsz != 0) { 189 p->p_flag &= ~SAUTOLPG; 190 } 191 mutex_exit(&p->p_lock); 192 193 as_rangelock(as); 194 195 if (pgsz == 0) { 196 int type; 197 198 if (pgcmd == MHA_MAPSIZE_BSSBRK) 199 type = MAPPGSZ_HEAP; 200 else 201 type = MAPPGSZ_STK; 202 203 pgsz = map_pgsz(type, p, 0, 0, NULL); 204 } 205 } else { 206 /* 207 * Note that we don't disable automatic large page 208 * selection for anon segments based on use of 209 * memcntl(). 210 */ 211 if (pgsz == 0) { 212 pgsz = map_pgsz(MAPPGSZ_VA, p, addr, len, 213 NULL); 214 } 215 216 /* 217 * addr and len must be prefered page size aligned 218 * and valid for range specified. 219 */ 220 if (!IS_P2ALIGNED(addr, pgsz) || 221 !IS_P2ALIGNED(len, pgsz)) { 222 return (set_errno(EINVAL)); 223 } 224 if (valid_usr_range(addr, len, 0, as, 225 as->a_userlimit) != RANGE_OKAY) { 226 return (set_errno(ENOMEM)); 227 } 228 } 229 230 szc = mem_getpgszc(pgsz); 231 if (szc == (uint_t)-1) { 232 if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) 233 != 0) { 234 as_rangeunlock(as); 235 } 236 return (set_errno(EINVAL)); 237 } 238 239 /* 240 * For stack and heap operations we first need to pad 241 * out existing range (create new mappings) to the new 242 * prefered page size boundary. Also the start of the 243 * .bss for the heap or user's stack base may not be on 244 * the new prefered page size boundary. For these cases 245 * we align the base of the request on the new prefered 246 * page size. 247 */ 248 if (pgcmd & MHA_MAPSIZE_BSSBRK) { 249 if (szc == p->p_brkpageszc) { 250 as_rangeunlock(as); 251 return (0); 252 } 253 if (szc > p->p_brkpageszc) { 254 error = brk_internal(p->p_brkbase 255 + p->p_brksize, szc); 256 if (error) { 257 as_rangeunlock(as); 258 return (set_errno(error)); 259 } 260 } 261 oszc = p->p_brkpageszc; 262 p->p_brkpageszc = szc; 263 264 ASSERT(IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz)); 265 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, 266 pgsz); 267 len = (p->p_brkbase + p->p_brksize) - addr; 268 ASSERT(IS_P2ALIGNED(len, pgsz)); 269 /* 270 * Perhaps no existing pages to promote. 271 */ 272 if (len == 0) { 273 as_rangeunlock(as); 274 return (0); 275 } 276 } 277 /* 278 * The code below, as does grow.c, assumes stacks always grow 279 * downward. 280 */ 281 if (pgcmd & MHA_MAPSIZE_STACK) { 282 /* 283 * Some boxes (x86) have a top of stack that 284 * is not large page aligned. Since stacks are 285 * usually small we'll just return and do nothing 286 * for theses cases. Prefeered page size is advisory 287 * so no need to return an error. 288 */ 289 if (szc == p->p_stkpageszc || 290 !IS_P2ALIGNED(p->p_usrstack, pgsz)) { 291 as_rangeunlock(as); 292 return (0); 293 } 294 295 if (szc > p->p_stkpageszc) { 296 error = grow_internal(p->p_usrstack 297 - p->p_stksize, szc); 298 if (error) { 299 as_rangeunlock(as); 300 return (set_errno(error)); 301 } 302 } 303 oszc = p->p_stkpageszc; 304 p->p_stkpageszc = szc; 305 306 ASSERT(IS_P2ALIGNED(p->p_usrstack, pgsz)); 307 addr = p->p_usrstack - p->p_stksize; 308 len = p->p_stksize; 309 310 /* 311 * Perhaps nothing to promote, we wrapped around 312 * or grow did not not grow the stack to a large 313 * page boundary. 314 */ 315 if (!IS_P2ALIGNED(len, pgsz) || len == 0 || 316 addr >= p->p_usrstack || (addr + len) < addr) { 317 as_rangeunlock(as); 318 return (0); 319 } 320 } 321 ASSERT(IS_P2ALIGNED(addr, pgsz)); 322 ASSERT(IS_P2ALIGNED(len, pgsz)); 323 error = as_setpagesize(as, addr, len, szc, B_TRUE); 324 325 /* 326 * On stack or heap failures restore original 327 * pg size code. 328 */ 329 if (error) { 330 if ((pgcmd & MHA_MAPSIZE_BSSBRK) != 0) { 331 p->p_brkpageszc = oszc; 332 } 333 if ((pgcmd & MHA_MAPSIZE_STACK) != 0) { 334 p->p_stkpageszc = oszc; 335 } 336 (void) set_errno(error); 337 } 338 if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) { 339 as_rangeunlock(as); 340 } 341 return (error); 342 case MC_ADVISE: 343 switch ((uintptr_t)arg) { 344 case MADV_WILLNEED: 345 fc = as_faulta(as, addr, len); 346 if (fc) { 347 if (FC_CODE(fc) == FC_OBJERR) 348 error = set_errno(FC_ERRNO(fc)); 349 else if (FC_CODE(fc) == FC_NOMAP) 350 error = set_errno(ENOMEM); 351 else 352 error = set_errno(EINVAL); 353 return (error); 354 } 355 break; 356 357 case MADV_DONTNEED: 358 /* 359 * For now, don't need is turned into an as_ctl(MC_SYNC) 360 * operation flagged for async invalidate. 361 */ 362 error = as_ctl(as, addr, len, MC_SYNC, attr, 363 MS_ASYNC | MS_INVALIDATE, NULL, 0); 364 if (error) 365 (void) set_errno(error); 366 return (error); 367 368 default: 369 error = as_ctl(as, addr, len, cmd, attr, 370 (uintptr_t)arg, NULL, 0); 371 if (error) 372 (void) set_errno(error); 373 return (error); 374 } 375 break; 376 default: 377 return (set_errno(EINVAL)); 378 } 379 380 error = as_ctl(as, addr, len, cmd, attr, (uintptr_t)arg, NULL, 0); 381 382 if (error) 383 (void) set_errno(error); 384 return (error); 385 } 386 387 /* 388 * Return page size code for page size passed in. If 389 * matching page size not found or supported, return -1. 390 */ 391 static uint_t 392 mem_getpgszc(size_t pgsz) { 393 return ((uint_t)page_szc_user_filtered(pgsz)); 394 } 395