1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright (c) 2015 Joyent, Inc. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 31 #include <sys/types.h> 32 #include <sys/bitmap.h> 33 #include <sys/sysmacros.h> 34 #include <sys/kmem.h> 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/user.h> 38 #include <sys/unistd.h> 39 #include <sys/errno.h> 40 #include <sys/proc.h> 41 #include <sys/mman.h> 42 #include <sys/tuneable.h> 43 #include <sys/cmn_err.h> 44 #include <sys/cred.h> 45 #include <sys/vmsystm.h> 46 #include <sys/debug.h> 47 #include <sys/policy.h> 48 49 #include <vm/as.h> 50 #include <vm/seg.h> 51 52 static uint_t mem_getpgszc(size_t); 53 54 /* 55 * Memory control operations 56 */ 57 int 58 memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask) 59 { 60 struct as *as = ttoproc(curthread)->p_as; 61 struct proc *p = ttoproc(curthread); 62 size_t pgsz; 63 uint_t szc, oszc, pgcmd; 64 int error = 0; 65 faultcode_t fc; 66 uintptr_t iarg; 67 STRUCT_DECL(memcntl_mha, mha); 68 69 if (mask) 70 return (set_errno(EINVAL)); 71 if ((cmd == MC_LOCKAS) || (cmd == MC_UNLOCKAS)) { 72 if ((addr != 0) || (len != 0)) { 73 return (set_errno(EINVAL)); 74 } 75 } else if (cmd != MC_HAT_ADVISE) { 76 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) { 77 return (set_errno(EINVAL)); 78 } 79 /* 80 * We're only concerned with the address range 81 * here, not the protections. The protections 82 * are only used as a "filter" in this code, 83 * they aren't set or modified here. 84 */ 85 if (valid_usr_range(addr, len, 0, as, 86 as->a_userlimit) != RANGE_OKAY) { 87 return (set_errno(ENOMEM)); 88 } 89 } 90 91 if (cmd == MC_HAT_ADVISE) { 92 if (attr != 0 || mask != 0) { 93 return (set_errno(EINVAL)); 94 } 95 96 } else { 97 if ((VALID_ATTR & attr) != attr) { 98 return (set_errno(EINVAL)); 99 } 100 if ((attr & SHARED) && (attr & PRIVATE)) { 101 return (set_errno(EINVAL)); 102 } 103 if (((cmd == MC_LOCKAS) || (cmd == MC_LOCK) || 104 (cmd == MC_UNLOCKAS) || (cmd == MC_UNLOCK)) && 105 (error = secpolicy_lock_memory(CRED())) != 0) 106 return (set_errno(error)); 107 } 108 if (attr) { 109 attr |= PROT_USER; 110 } 111 112 switch (cmd) { 113 case MC_SYNC: 114 /* 115 * MS_SYNC used to be defined to be zero but is now non-zero. 116 * For binary compatibility we still accept zero 117 * (the absence of MS_ASYNC) to mean the same thing. 118 */ 119 iarg = (uintptr_t)arg; 120 if ((iarg & ~MS_INVALIDATE) == 0) 121 iarg |= MS_SYNC; 122 123 if (((iarg & ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE)) != 0) || 124 ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))) { 125 error = set_errno(EINVAL); 126 } else { 127 error = as_ctl(as, addr, len, cmd, attr, iarg, NULL, 0); 128 if (error) { 129 (void) set_errno(error); 130 } 131 } 132 return (error); 133 case MC_LOCKAS: 134 if ((uintptr_t)arg & ~(MCL_FUTURE|MCL_CURRENT) || 135 (uintptr_t)arg == 0) { 136 return (set_errno(EINVAL)); 137 } 138 break; 139 case MC_LOCK: 140 case MC_UNLOCKAS: 141 case MC_UNLOCK: 142 break; 143 case MC_HAT_ADVISE: 144 /* 145 * Set prefered page size. 146 */ 147 STRUCT_INIT(mha, get_udatamodel()); 148 if (copyin(arg, STRUCT_BUF(mha), STRUCT_SIZE(mha))) { 149 return (set_errno(EFAULT)); 150 } 151 152 pgcmd = STRUCT_FGET(mha, mha_cmd); 153 154 /* 155 * Currently only MHA_MAPSIZE_VA, MHA_MAPSIZE_STACK 156 * and MHA_MAPSIZE_BSSBRK are supported. Only one 157 * command may be specified at a time. 158 */ 159 if ((~(MHA_MAPSIZE_VA|MHA_MAPSIZE_STACK|MHA_MAPSIZE_BSSBRK) & 160 pgcmd) || pgcmd == 0 || !ISP2(pgcmd) || 161 STRUCT_FGET(mha, mha_flags)) 162 return (set_errno(EINVAL)); 163 164 pgsz = STRUCT_FGET(mha, mha_pagesize); 165 166 /* 167 * call platform specific map_pgsz() routine to get the 168 * optimal pgsz if pgsz is 0. 169 * 170 * For stack and heap operations addr and len must be zero. 171 */ 172 if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) { 173 if (addr != NULL || len != 0) { 174 return (set_errno(EINVAL)); 175 } 176 177 /* 178 * Disable autompss for this process unless pgsz == 0, 179 * which means the system should pick. In the 180 * pgsz == 0 case, leave the SAUTOLPG setting alone, as 181 * we don't want to enable it when someone has 182 * disabled automatic large page selection for the 183 * whole system. 184 */ 185 mutex_enter(&p->p_lock); 186 if (pgsz != 0) { 187 p->p_flag &= ~SAUTOLPG; 188 } 189 mutex_exit(&p->p_lock); 190 191 as_rangelock(as); 192 193 if (pgsz == 0) { 194 int type; 195 196 if (pgcmd == MHA_MAPSIZE_BSSBRK) 197 type = MAPPGSZ_HEAP; 198 else 199 type = MAPPGSZ_STK; 200 201 pgsz = map_pgsz(type, p, 0, 0, 1); 202 } 203 } else { 204 /* 205 * addr and len must be valid for range specified. 206 */ 207 if (valid_usr_range(addr, len, 0, as, 208 as->a_userlimit) != RANGE_OKAY) { 209 return (set_errno(ENOMEM)); 210 } 211 /* 212 * Note that we don't disable automatic large page 213 * selection for anon segments based on use of 214 * memcntl(). 215 */ 216 if (pgsz == 0) { 217 error = as_set_default_lpsize(as, addr, len); 218 if (error) { 219 (void) set_errno(error); 220 } 221 return (error); 222 } 223 224 /* 225 * addr and len must be prefered page size aligned 226 */ 227 if (!IS_P2ALIGNED(addr, pgsz) || 228 !IS_P2ALIGNED(len, pgsz)) { 229 return (set_errno(EINVAL)); 230 } 231 } 232 233 szc = mem_getpgszc(pgsz); 234 if (szc == (uint_t)-1) { 235 if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) 236 != 0) { 237 as_rangeunlock(as); 238 } 239 return (set_errno(EINVAL)); 240 } 241 242 /* 243 * For stack and heap operations we first need to pad 244 * out existing range (create new mappings) to the new 245 * prefered page size boundary. Also the start of the 246 * .bss for the heap or user's stack base may not be on 247 * the new prefered page size boundary. For these cases 248 * we align the base of the request on the new prefered 249 * page size. 250 */ 251 if (pgcmd & MHA_MAPSIZE_BSSBRK) { 252 if (szc == p->p_brkpageszc) { 253 as_rangeunlock(as); 254 return (0); 255 } 256 if (szc > p->p_brkpageszc) { 257 error = brk_internal(p->p_brkbase 258 + p->p_brksize, szc); 259 if (error) { 260 as_rangeunlock(as); 261 return (set_errno(error)); 262 } 263 } 264 /* 265 * It is possible for brk_internal to silently fail to 266 * promote the heap size, so don't panic or ASSERT. 267 */ 268 if (!IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz)) { 269 as_rangeunlock(as); 270 return (set_errno(ENOMEM)); 271 } 272 oszc = p->p_brkpageszc; 273 p->p_brkpageszc = szc; 274 275 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, 276 pgsz); 277 len = (p->p_brkbase + p->p_brksize) - addr; 278 ASSERT(IS_P2ALIGNED(len, pgsz)); 279 /* 280 * Perhaps no existing pages to promote. 281 */ 282 if (len == 0) { 283 as_rangeunlock(as); 284 return (0); 285 } 286 } 287 /* 288 * The code below, as does grow.c, assumes stacks always grow 289 * downward. 290 */ 291 if (pgcmd & MHA_MAPSIZE_STACK) { 292 if (szc == p->p_stkpageszc) { 293 as_rangeunlock(as); 294 return (0); 295 } 296 297 if (szc > p->p_stkpageszc) { 298 error = grow_internal(p->p_usrstack - 299 p->p_stksize, szc); 300 if (error) { 301 as_rangeunlock(as); 302 return (set_errno(error)); 303 } 304 } 305 /* 306 * It is possible for grow_internal to silently fail to 307 * promote the stack size, so don't panic or ASSERT. 308 */ 309 if (!IS_P2ALIGNED(p->p_usrstack - p->p_stksize, pgsz)) { 310 as_rangeunlock(as); 311 return (set_errno(ENOMEM)); 312 } 313 oszc = p->p_stkpageszc; 314 p->p_stkpageszc = szc; 315 316 addr = p->p_usrstack - p->p_stksize; 317 len = P2ALIGN(p->p_stksize, pgsz); 318 319 /* 320 * Perhaps nothing to promote. 321 */ 322 if (len == 0 || addr >= p->p_usrstack || 323 (addr + len) < addr) { 324 as_rangeunlock(as); 325 return (0); 326 } 327 } 328 ASSERT(IS_P2ALIGNED(addr, pgsz)); 329 ASSERT(IS_P2ALIGNED(len, pgsz)); 330 error = as_setpagesize(as, addr, len, szc, B_TRUE); 331 332 /* 333 * On stack or heap failures restore original 334 * pg size code. 335 */ 336 if (error) { 337 if ((pgcmd & MHA_MAPSIZE_BSSBRK) != 0) { 338 p->p_brkpageszc = oszc; 339 } 340 if ((pgcmd & MHA_MAPSIZE_STACK) != 0) { 341 p->p_stkpageszc = oszc; 342 } 343 (void) set_errno(error); 344 } 345 if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) { 346 as_rangeunlock(as); 347 } 348 return (error); 349 case MC_ADVISE: 350 if ((uintptr_t)arg == MADV_FREE) { 351 len &= PAGEMASK; 352 } 353 switch ((uintptr_t)arg) { 354 case MADV_WILLNEED: 355 fc = as_faulta(as, addr, len); 356 if (fc) { 357 if (FC_CODE(fc) == FC_OBJERR) 358 error = set_errno(FC_ERRNO(fc)); 359 else if (FC_CODE(fc) == FC_NOMAP) 360 error = set_errno(ENOMEM); 361 else 362 error = set_errno(EINVAL); 363 return (error); 364 } 365 break; 366 367 case MADV_DONTNEED: 368 /* 369 * For now, don't need is turned into an as_ctl(MC_SYNC) 370 * operation flagged for async invalidate. 371 */ 372 error = as_ctl(as, addr, len, MC_SYNC, attr, 373 MS_ASYNC | MS_INVALIDATE, NULL, 0); 374 if (error) 375 (void) set_errno(error); 376 return (error); 377 378 default: 379 error = as_ctl(as, addr, len, cmd, attr, 380 (uintptr_t)arg, NULL, 0); 381 if (error) 382 (void) set_errno(error); 383 return (error); 384 } 385 break; 386 case MC_INHERIT_ZERO: 387 if (arg != 0 || attr != 0 || mask != 0) 388 return (set_errno(EINVAL)); 389 break; 390 default: 391 return (set_errno(EINVAL)); 392 } 393 394 error = as_ctl(as, addr, len, cmd, attr, (uintptr_t)arg, NULL, 0); 395 396 if (error) 397 (void) set_errno(error); 398 return (error); 399 } 400 401 /* 402 * Return page size code for page size passed in. If 403 * matching page size not found or supported, return -1. 404 */ 405 static uint_t 406 mem_getpgszc(size_t pgsz) { 407 return ((uint_t)page_szc_user_filtered(pgsz)); 408 } 409