1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright (c) 2015 Joyent, Inc.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30
31 #include <sys/types.h>
32 #include <sys/bitmap.h>
33 #include <sys/sysmacros.h>
34 #include <sys/kmem.h>
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/user.h>
38 #include <sys/unistd.h>
39 #include <sys/errno.h>
40 #include <sys/proc.h>
41 #include <sys/mman.h>
42 #include <sys/tuneable.h>
43 #include <sys/cmn_err.h>
44 #include <sys/cred.h>
45 #include <sys/vmsystm.h>
46 #include <sys/debug.h>
47 #include <sys/policy.h>
48
49 #include <vm/as.h>
50 #include <vm/seg.h>
51
52 static uint_t mem_getpgszc(size_t);
53
54 /*
55 * Memory control operations
56 */
57 int
memcntl(caddr_t addr,size_t len,int cmd,caddr_t arg,int attr,int mask)58 memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask)
59 {
60 struct as *as = ttoproc(curthread)->p_as;
61 struct proc *p = ttoproc(curthread);
62 size_t pgsz;
63 uint_t szc, oszc, pgcmd;
64 int error = 0;
65 faultcode_t fc;
66 uintptr_t iarg;
67 STRUCT_DECL(memcntl_mha, mha);
68
69 if (mask)
70 return (set_errno(EINVAL));
71 if ((cmd == MC_LOCKAS) || (cmd == MC_UNLOCKAS)) {
72 if ((addr != 0) || (len != 0)) {
73 return (set_errno(EINVAL));
74 }
75 } else if (cmd != MC_HAT_ADVISE) {
76 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) {
77 return (set_errno(EINVAL));
78 }
79 /*
80 * We're only concerned with the address range
81 * here, not the protections. The protections
82 * are only used as a "filter" in this code,
83 * they aren't set or modified here.
84 */
85 if (valid_usr_range(addr, len, 0, as,
86 as->a_userlimit) != RANGE_OKAY) {
87 return (set_errno(ENOMEM));
88 }
89 }
90
91 if (cmd == MC_HAT_ADVISE) {
92 if (attr != 0 || mask != 0) {
93 return (set_errno(EINVAL));
94 }
95
96 } else {
97 if ((VALID_ATTR & attr) != attr) {
98 return (set_errno(EINVAL));
99 }
100 if ((attr & SHARED) && (attr & PRIVATE)) {
101 return (set_errno(EINVAL));
102 }
103 if (((cmd == MC_LOCKAS) || (cmd == MC_LOCK) ||
104 (cmd == MC_UNLOCKAS) || (cmd == MC_UNLOCK)) &&
105 (error = secpolicy_lock_memory(CRED())) != 0)
106 return (set_errno(error));
107 }
108 if (attr) {
109 attr |= PROT_USER;
110 }
111
112 oszc = 0;
113 switch (cmd) {
114 case MC_SYNC:
115 /*
116 * MS_SYNC used to be defined to be zero but is now non-zero.
117 * For binary compatibility we still accept zero
118 * (the absence of MS_ASYNC) to mean the same thing.
119 */
120 iarg = (uintptr_t)arg;
121 if ((iarg & ~MS_INVALIDATE) == 0)
122 iarg |= MS_SYNC;
123
124 if (((iarg & ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE)) != 0) ||
125 ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))) {
126 error = set_errno(EINVAL);
127 } else {
128 error = as_ctl(as, addr, len, cmd, attr, iarg, NULL, 0);
129 if (error) {
130 (void) set_errno(error);
131 }
132 }
133 return (error);
134 case MC_LOCKAS:
135 if ((uintptr_t)arg & ~(MCL_FUTURE|MCL_CURRENT) ||
136 (uintptr_t)arg == 0) {
137 return (set_errno(EINVAL));
138 }
139 break;
140 case MC_LOCK:
141 case MC_UNLOCKAS:
142 case MC_UNLOCK:
143 break;
144 case MC_HAT_ADVISE:
145 /*
146 * Set prefered page size.
147 */
148 STRUCT_INIT(mha, get_udatamodel());
149 if (copyin(arg, STRUCT_BUF(mha), STRUCT_SIZE(mha))) {
150 return (set_errno(EFAULT));
151 }
152
153 pgcmd = STRUCT_FGET(mha, mha_cmd);
154
155 /*
156 * Currently only MHA_MAPSIZE_VA, MHA_MAPSIZE_STACK
157 * and MHA_MAPSIZE_BSSBRK are supported. Only one
158 * command may be specified at a time.
159 */
160 if ((~(MHA_MAPSIZE_VA|MHA_MAPSIZE_STACK|MHA_MAPSIZE_BSSBRK) &
161 pgcmd) || pgcmd == 0 || !ISP2(pgcmd) ||
162 STRUCT_FGET(mha, mha_flags))
163 return (set_errno(EINVAL));
164
165 pgsz = STRUCT_FGET(mha, mha_pagesize);
166
167 /*
168 * call platform specific map_pgsz() routine to get the
169 * optimal pgsz if pgsz is 0.
170 *
171 * For stack and heap operations addr and len must be zero.
172 */
173 if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) {
174 if (addr != NULL || len != 0) {
175 return (set_errno(EINVAL));
176 }
177
178 /*
179 * Disable autompss for this process unless pgsz == 0,
180 * which means the system should pick. In the
181 * pgsz == 0 case, leave the SAUTOLPG setting alone, as
182 * we don't want to enable it when someone has
183 * disabled automatic large page selection for the
184 * whole system.
185 */
186 mutex_enter(&p->p_lock);
187 if (pgsz != 0) {
188 p->p_flag &= ~SAUTOLPG;
189 }
190 mutex_exit(&p->p_lock);
191
192 as_rangelock(as);
193
194 if (pgsz == 0) {
195 int type;
196
197 if (pgcmd == MHA_MAPSIZE_BSSBRK)
198 type = MAPPGSZ_HEAP;
199 else
200 type = MAPPGSZ_STK;
201
202 pgsz = map_pgsz(type, p, 0, 0, 1);
203 }
204 } else {
205 /*
206 * addr and len must be valid for range specified.
207 */
208 if (valid_usr_range(addr, len, 0, as,
209 as->a_userlimit) != RANGE_OKAY) {
210 return (set_errno(ENOMEM));
211 }
212 /*
213 * Note that we don't disable automatic large page
214 * selection for anon segments based on use of
215 * memcntl().
216 */
217 if (pgsz == 0) {
218 error = as_set_default_lpsize(as, addr, len);
219 if (error) {
220 (void) set_errno(error);
221 }
222 return (error);
223 }
224
225 /*
226 * addr and len must be prefered page size aligned
227 */
228 if (!IS_P2ALIGNED(addr, pgsz) ||
229 !IS_P2ALIGNED(len, pgsz)) {
230 return (set_errno(EINVAL));
231 }
232 }
233
234 szc = mem_getpgszc(pgsz);
235 if (szc == (uint_t)-1) {
236 if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK))
237 != 0) {
238 as_rangeunlock(as);
239 }
240 return (set_errno(EINVAL));
241 }
242
243 /*
244 * For stack and heap operations we first need to pad
245 * out existing range (create new mappings) to the new
246 * prefered page size boundary. Also the start of the
247 * .bss for the heap or user's stack base may not be on
248 * the new prefered page size boundary. For these cases
249 * we align the base of the request on the new prefered
250 * page size.
251 */
252 if (pgcmd & MHA_MAPSIZE_BSSBRK) {
253 if (szc == p->p_brkpageszc) {
254 as_rangeunlock(as);
255 return (0);
256 }
257 if (szc > p->p_brkpageszc) {
258 error = brk_internal(p->p_brkbase
259 + p->p_brksize, szc);
260 if (error) {
261 as_rangeunlock(as);
262 return (set_errno(error));
263 }
264 }
265 /*
266 * It is possible for brk_internal to silently fail to
267 * promote the heap size, so don't panic or ASSERT.
268 */
269 if (!IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz)) {
270 as_rangeunlock(as);
271 return (set_errno(ENOMEM));
272 }
273 oszc = p->p_brkpageszc;
274 p->p_brkpageszc = szc;
275
276 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
277 pgsz);
278 len = (p->p_brkbase + p->p_brksize) - addr;
279 ASSERT(IS_P2ALIGNED(len, pgsz));
280 /*
281 * Perhaps no existing pages to promote.
282 */
283 if (len == 0) {
284 as_rangeunlock(as);
285 return (0);
286 }
287 }
288 /*
289 * The code below, as does grow.c, assumes stacks always grow
290 * downward.
291 */
292 if (pgcmd & MHA_MAPSIZE_STACK) {
293 if (szc == p->p_stkpageszc) {
294 as_rangeunlock(as);
295 return (0);
296 }
297
298 if (szc > p->p_stkpageszc) {
299 error = grow_internal(p->p_usrstack -
300 p->p_stksize, szc);
301 if (error) {
302 as_rangeunlock(as);
303 return (set_errno(error));
304 }
305 }
306 /*
307 * It is possible for grow_internal to silently fail to
308 * promote the stack size, so don't panic or ASSERT.
309 */
310 if (!IS_P2ALIGNED(p->p_usrstack - p->p_stksize, pgsz)) {
311 as_rangeunlock(as);
312 return (set_errno(ENOMEM));
313 }
314 oszc = p->p_stkpageszc;
315 p->p_stkpageszc = szc;
316
317 addr = p->p_usrstack - p->p_stksize;
318 len = P2ALIGN(p->p_stksize, pgsz);
319
320 /*
321 * Perhaps nothing to promote.
322 */
323 if (len == 0 || addr >= p->p_usrstack ||
324 (addr + len) < addr) {
325 as_rangeunlock(as);
326 return (0);
327 }
328 }
329 ASSERT(IS_P2ALIGNED(addr, pgsz));
330 ASSERT(IS_P2ALIGNED(len, pgsz));
331 error = as_setpagesize(as, addr, len, szc, B_TRUE);
332
333 /*
334 * On stack or heap failures restore original
335 * pg size code.
336 */
337 if (error) {
338 if ((pgcmd & MHA_MAPSIZE_BSSBRK) != 0) {
339 p->p_brkpageszc = oszc;
340 }
341 if ((pgcmd & MHA_MAPSIZE_STACK) != 0) {
342 p->p_stkpageszc = oszc;
343 }
344 (void) set_errno(error);
345 }
346 if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) {
347 as_rangeunlock(as);
348 }
349 return (error);
350 case MC_ADVISE:
351 if ((uintptr_t)arg == MADV_FREE ||
352 (uintptr_t)arg == MADV_PURGE) {
353 len &= PAGEMASK;
354 }
355 switch ((uintptr_t)arg) {
356 case MADV_WILLNEED:
357 fc = as_faulta(as, addr, len);
358 if (fc) {
359 if (FC_CODE(fc) == FC_OBJERR)
360 error = set_errno(FC_ERRNO(fc));
361 else if (FC_CODE(fc) == FC_NOMAP)
362 error = set_errno(ENOMEM);
363 else
364 error = set_errno(EINVAL);
365 return (error);
366 }
367 break;
368
369 case MADV_DONTNEED:
370 /*
371 * For now, don't need is turned into an as_ctl(MC_SYNC)
372 * operation flagged for async invalidate.
373 */
374 error = as_ctl(as, addr, len, MC_SYNC, attr,
375 MS_ASYNC | MS_INVALIDATE, NULL, 0);
376 if (error)
377 (void) set_errno(error);
378 return (error);
379
380 default:
381 error = as_ctl(as, addr, len, cmd, attr,
382 (uintptr_t)arg, NULL, 0);
383 if (error)
384 (void) set_errno(error);
385 return (error);
386 }
387 break;
388 case MC_INHERIT_ZERO:
389 if (arg != 0 || attr != 0 || mask != 0)
390 return (set_errno(EINVAL));
391 break;
392 default:
393 return (set_errno(EINVAL));
394 }
395
396 error = as_ctl(as, addr, len, cmd, attr, (uintptr_t)arg, NULL, 0);
397
398 if (error)
399 (void) set_errno(error);
400 return (error);
401 }
402
403 /*
404 * Return page size code for page size passed in. If
405 * matching page size not found or supported, return -1.
406 */
407 static uint_t
mem_getpgszc(size_t pgsz)408 mem_getpgszc(size_t pgsz) {
409 return ((uint_t)page_szc_user_filtered(pgsz));
410 }
411