1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/types.h> 33 #include <sys/bitmap.h> 34 #include <sys/sysmacros.h> 35 #include <sys/kmem.h> 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/user.h> 39 #include <sys/unistd.h> 40 #include <sys/errno.h> 41 #include <sys/proc.h> 42 #include <sys/mman.h> 43 #include <sys/tuneable.h> 44 #include <sys/cmn_err.h> 45 #include <sys/cred.h> 46 #include <sys/vmsystm.h> 47 #include <sys/debug.h> 48 #include <sys/policy.h> 49 50 #include <vm/as.h> 51 #include <vm/seg.h> 52 53 static uint_t mem_getpgszc(size_t); 54 55 /* 56 * Memory control operations 57 */ 58 int 59 memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask) 60 { 61 struct as *as = ttoproc(curthread)->p_as; 62 struct proc *p = ttoproc(curthread); 63 size_t pgsz; 64 uint_t szc, oszc, pgcmd; 65 int error = 0; 66 faultcode_t fc; 67 uintptr_t iarg; 68 STRUCT_DECL(memcntl_mha, mha); 69 70 if (mask) 71 return (set_errno(EINVAL)); 72 if ((cmd == MC_LOCKAS) || (cmd == MC_UNLOCKAS)) { 73 if ((addr != 0) || (len != 0)) { 74 return (set_errno(EINVAL)); 75 } 76 } else if (cmd != MC_HAT_ADVISE) { 77 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) { 78 return (set_errno(EINVAL)); 79 } 80 /* 81 * We're only concerned with the address range 82 * here, not the protections. The protections 83 * are only used as a "filter" in this code, 84 * they aren't set or modified here. 85 */ 86 if (valid_usr_range(addr, len, 0, as, 87 as->a_userlimit) != RANGE_OKAY) { 88 return (set_errno(ENOMEM)); 89 } 90 } 91 92 if (cmd == MC_HAT_ADVISE) { 93 if (attr != 0 || mask != 0) { 94 return (set_errno(EINVAL)); 95 } 96 97 } else { 98 if ((VALID_ATTR & attr) != attr) { 99 return (set_errno(EINVAL)); 100 } 101 if ((attr & SHARED) && (attr & PRIVATE)) { 102 return (set_errno(EINVAL)); 103 } 104 if (((cmd == MC_LOCKAS) || (cmd == MC_LOCK) || 105 (cmd == MC_UNLOCKAS) || (cmd == MC_UNLOCK)) && 106 (error = secpolicy_lock_memory(CRED())) != 0) 107 return (set_errno(error)); 108 } 109 if (attr) { 110 attr |= PROT_USER; 111 } 112 113 switch (cmd) { 114 case MC_SYNC: 115 /* 116 * MS_SYNC used to be defined to be zero but is now non-zero. 117 * For binary compatibility we still accept zero 118 * (the absence of MS_ASYNC) to mean the same thing. 119 */ 120 iarg = (uintptr_t)arg; 121 if ((iarg & ~MS_INVALIDATE) == 0) 122 iarg |= MS_SYNC; 123 124 if (((iarg & ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE)) != 0) || 125 ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))) { 126 error = set_errno(EINVAL); 127 } else { 128 error = as_ctl(as, addr, len, cmd, attr, iarg, NULL, 0); 129 if (error) { 130 (void) set_errno(error); 131 } 132 } 133 return (error); 134 case MC_LOCKAS: 135 if ((uintptr_t)arg & ~(MCL_FUTURE|MCL_CURRENT) || 136 (uintptr_t)arg == 0) { 137 return (set_errno(EINVAL)); 138 } 139 break; 140 case MC_LOCK: 141 case MC_UNLOCKAS: 142 case MC_UNLOCK: 143 break; 144 case MC_HAT_ADVISE: 145 /* 146 * Set prefered page size. 147 */ 148 STRUCT_INIT(mha, get_udatamodel()); 149 if (copyin(arg, STRUCT_BUF(mha), STRUCT_SIZE(mha))) { 150 return (set_errno(EFAULT)); 151 } 152 153 pgcmd = STRUCT_FGET(mha, mha_cmd); 154 155 /* 156 * Currently only MHA_MAPSIZE_VA, MHA_MAPSIZE_STACK 157 * and MHA_MAPSIZE_BSSBRK are supported. Only one 158 * command may be specified at a time. 159 */ 160 if ((~(MHA_MAPSIZE_VA|MHA_MAPSIZE_STACK|MHA_MAPSIZE_BSSBRK) & 161 pgcmd) || pgcmd == 0 || !ISP2(pgcmd) || 162 STRUCT_FGET(mha, mha_flags)) 163 return (set_errno(EINVAL)); 164 165 pgsz = STRUCT_FGET(mha, mha_pagesize); 166 167 /* 168 * call platform specific map_pgsz() routine to get the 169 * optimal pgsz if pgsz is 0. 170 * 171 * For stack and heap operations addr and len must be zero. 172 */ 173 if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) { 174 if (addr != NULL || len != 0) { 175 return (set_errno(EINVAL)); 176 } 177 178 /* 179 * Disable autompss for this process unless pgsz == 0, 180 * which means the system should pick. In the 181 * pgsz == 0 case, leave the SAUTOLPG setting alone, as 182 * we don't want to enable it when someone has 183 * disabled automatic large page selection for the 184 * whole system. 185 */ 186 mutex_enter(&p->p_lock); 187 if (pgsz != 0) { 188 p->p_flag &= ~SAUTOLPG; 189 } 190 mutex_exit(&p->p_lock); 191 192 as_rangelock(as); 193 194 if (pgsz == 0) { 195 int type; 196 197 if (pgcmd == MHA_MAPSIZE_BSSBRK) 198 type = MAPPGSZ_HEAP; 199 else 200 type = MAPPGSZ_STK; 201 202 pgsz = map_pgsz(type, p, 0, 0, 1); 203 } 204 } else { 205 /* 206 * addr and len must be valid for range specified. 207 */ 208 if (valid_usr_range(addr, len, 0, as, 209 as->a_userlimit) != RANGE_OKAY) { 210 return (set_errno(ENOMEM)); 211 } 212 /* 213 * Note that we don't disable automatic large page 214 * selection for anon segments based on use of 215 * memcntl(). 216 */ 217 if (pgsz == 0) { 218 error = as_set_default_lpsize(as, addr, len); 219 if (error) { 220 (void) set_errno(error); 221 } 222 return (error); 223 } 224 225 /* 226 * addr and len must be prefered page size aligned 227 */ 228 if (!IS_P2ALIGNED(addr, pgsz) || 229 !IS_P2ALIGNED(len, pgsz)) { 230 return (set_errno(EINVAL)); 231 } 232 } 233 234 szc = mem_getpgszc(pgsz); 235 if (szc == (uint_t)-1) { 236 if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) 237 != 0) { 238 as_rangeunlock(as); 239 } 240 return (set_errno(EINVAL)); 241 } 242 243 /* 244 * For stack and heap operations we first need to pad 245 * out existing range (create new mappings) to the new 246 * prefered page size boundary. Also the start of the 247 * .bss for the heap or user's stack base may not be on 248 * the new prefered page size boundary. For these cases 249 * we align the base of the request on the new prefered 250 * page size. 251 */ 252 if (pgcmd & MHA_MAPSIZE_BSSBRK) { 253 if (szc == p->p_brkpageszc) { 254 as_rangeunlock(as); 255 return (0); 256 } 257 if (szc > p->p_brkpageszc) { 258 error = brk_internal(p->p_brkbase 259 + p->p_brksize, szc); 260 if (error) { 261 as_rangeunlock(as); 262 return (set_errno(error)); 263 } 264 } 265 /* 266 * It is possible for brk_internal to silently fail to 267 * promote the heap size, so don't panic or ASSERT. 268 */ 269 if (!IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz)) { 270 as_rangeunlock(as); 271 return (set_errno(ENOMEM)); 272 } 273 oszc = p->p_brkpageszc; 274 p->p_brkpageszc = szc; 275 276 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, 277 pgsz); 278 len = (p->p_brkbase + p->p_brksize) - addr; 279 ASSERT(IS_P2ALIGNED(len, pgsz)); 280 /* 281 * Perhaps no existing pages to promote. 282 */ 283 if (len == 0) { 284 as_rangeunlock(as); 285 return (0); 286 } 287 } 288 /* 289 * The code below, as does grow.c, assumes stacks always grow 290 * downward. 291 */ 292 if (pgcmd & MHA_MAPSIZE_STACK) { 293 if (szc == p->p_stkpageszc) { 294 as_rangeunlock(as); 295 return (0); 296 } 297 298 if (szc > p->p_stkpageszc) { 299 error = grow_internal(p->p_usrstack - 300 p->p_stksize, szc); 301 if (error) { 302 as_rangeunlock(as); 303 return (set_errno(error)); 304 } 305 } 306 /* 307 * It is possible for grow_internal to silently fail to 308 * promote the stack size, so don't panic or ASSERT. 309 */ 310 if (!IS_P2ALIGNED(p->p_usrstack - p->p_stksize, pgsz)) { 311 as_rangeunlock(as); 312 return (set_errno(ENOMEM)); 313 } 314 oszc = p->p_stkpageszc; 315 p->p_stkpageszc = szc; 316 317 addr = p->p_usrstack - p->p_stksize; 318 len = P2ALIGN(p->p_stksize, pgsz); 319 320 /* 321 * Perhaps nothing to promote. 322 */ 323 if (len == 0 || addr >= p->p_usrstack || 324 (addr + len) < addr) { 325 as_rangeunlock(as); 326 return (0); 327 } 328 } 329 ASSERT(IS_P2ALIGNED(addr, pgsz)); 330 ASSERT(IS_P2ALIGNED(len, pgsz)); 331 error = as_setpagesize(as, addr, len, szc, B_TRUE); 332 333 /* 334 * On stack or heap failures restore original 335 * pg size code. 336 */ 337 if (error) { 338 if ((pgcmd & MHA_MAPSIZE_BSSBRK) != 0) { 339 p->p_brkpageszc = oszc; 340 } 341 if ((pgcmd & MHA_MAPSIZE_STACK) != 0) { 342 p->p_stkpageszc = oszc; 343 } 344 (void) set_errno(error); 345 } 346 if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) { 347 as_rangeunlock(as); 348 } 349 return (error); 350 case MC_ADVISE: 351 if ((uintptr_t)arg == MADV_FREE) { 352 len &= PAGEMASK; 353 } 354 switch ((uintptr_t)arg) { 355 case MADV_WILLNEED: 356 fc = as_faulta(as, addr, len); 357 if (fc) { 358 if (FC_CODE(fc) == FC_OBJERR) 359 error = set_errno(FC_ERRNO(fc)); 360 else if (FC_CODE(fc) == FC_NOMAP) 361 error = set_errno(ENOMEM); 362 else 363 error = set_errno(EINVAL); 364 return (error); 365 } 366 break; 367 368 case MADV_DONTNEED: 369 /* 370 * For now, don't need is turned into an as_ctl(MC_SYNC) 371 * operation flagged for async invalidate. 372 */ 373 error = as_ctl(as, addr, len, MC_SYNC, attr, 374 MS_ASYNC | MS_INVALIDATE, NULL, 0); 375 if (error) 376 (void) set_errno(error); 377 return (error); 378 379 default: 380 error = as_ctl(as, addr, len, cmd, attr, 381 (uintptr_t)arg, NULL, 0); 382 if (error) 383 (void) set_errno(error); 384 return (error); 385 } 386 break; 387 default: 388 return (set_errno(EINVAL)); 389 } 390 391 error = as_ctl(as, addr, len, cmd, attr, (uintptr_t)arg, NULL, 0); 392 393 if (error) 394 (void) set_errno(error); 395 return (error); 396 } 397 398 /* 399 * Return page size code for page size passed in. If 400 * matching page size not found or supported, return -1. 401 */ 402 static uint_t 403 mem_getpgszc(size_t pgsz) { 404 return ((uint_t)page_szc_user_filtered(pgsz)); 405 } 406