xref: /illumos-gate/usr/src/uts/common/syscall/rctlsys.c (revision 129b3e6c5b0ac55b5021a4c38db6387b6acdaaf1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 
28 #include <sys/cmn_err.h>
29 #include <sys/cred.h>
30 #include <sys/errno.h>
31 #include <sys/rctl.h>
32 #include <sys/rctl_impl.h>
33 #include <sys/strlog.h>
34 #include <sys/syslog.h>
35 #include <sys/sysmacros.h>
36 #include <sys/systm.h>
37 #include <sys/policy.h>
38 #include <sys/proc.h>
39 #include <sys/task.h>
40 
41 /*
42  * setrctl(2), getrctl(2), and private rctlsys(2*) system calls
43  *
44  * Resource control block (rctlblk_ptr_t, rctl_opaque_t)
45  *   The resource control system call interfaces present the resource control
46  *   values and flags via the resource control block abstraction, made manifest
47  *   via an opaque data type with strict type definitions.  Keeping the formal
48  *   definitions in the rcontrol block allows us to be clever in the kernel,
49  *   combining attributes where appropriate in the current implementation while
50  *   preserving binary compatibility in the face of implementation changes.
51  */
52 
53 #define	RBX_TO_BLK	0x1
54 #define	RBX_FROM_BLK	0x2
55 #define	RBX_VAL		0x4
56 #define	RBX_CTL		0x8
57 
58 static void
59 rctlsys_rblk_xfrm(rctl_opaque_t *blk, rctl_dict_entry_t *rde,
60     rctl_val_t *val, int flags)
61 {
62 	if (flags & RBX_FROM_BLK) {
63 		if (flags & RBX_VAL) {
64 			/*
65 			 * Firing time cannot be set.
66 			 */
67 			val->rcv_privilege = blk->rcq_privilege;
68 			val->rcv_value = blk->rcq_value;
69 			val->rcv_flagaction = blk->rcq_local_flagaction;
70 			val->rcv_action_signal = blk->rcq_local_signal;
71 			val->rcv_action_recip_pid =
72 			    blk->rcq_local_recipient_pid;
73 		}
74 		if (flags & RBX_CTL) {
75 			rde->rcd_flagaction = blk->rcq_global_flagaction;
76 			rde->rcd_syslog_level = blk->rcq_global_syslog_level;
77 
78 			/*
79 			 * Because the strlog() interface supports fewer options
80 			 * than are made available via the syslog() interface to
81 			 * userland, we map the syslog level down to a smaller
82 			 * set of distinct logging behaviours.
83 			 */
84 			rde->rcd_strlog_flags = 0;
85 			switch (blk->rcq_global_syslog_level) {
86 				case LOG_EMERG:
87 				case LOG_ALERT:
88 				case LOG_CRIT:
89 					rde->rcd_strlog_flags |= SL_CONSOLE;
90 					/*FALLTHROUGH*/
91 				case LOG_ERR:
92 					rde->rcd_strlog_flags |= SL_ERROR;
93 					/*FALLTHROUGH*/
94 				case LOG_WARNING:
95 					rde->rcd_strlog_flags |= SL_WARN;
96 					break;
97 				case LOG_NOTICE:
98 					rde->rcd_strlog_flags |= SL_CONSOLE;
99 					/*FALLTHROUGH*/
100 				case LOG_INFO:	/* informational */
101 				case LOG_DEBUG:	/* debug-level messages */
102 				default:
103 					rde->rcd_strlog_flags |= SL_NOTE;
104 					break;
105 			}
106 		}
107 	} else {
108 		bzero(blk,  sizeof (rctl_opaque_t));
109 		if (flags & RBX_VAL) {
110 			blk->rcq_privilege = val->rcv_privilege;
111 			blk->rcq_value = val->rcv_value;
112 			blk->rcq_enforced_value = rctl_model_value(rde,
113 			    curproc, val->rcv_value);
114 			blk->rcq_local_flagaction = val->rcv_flagaction;
115 			blk->rcq_local_signal = val->rcv_action_signal;
116 			blk->rcq_firing_time = val->rcv_firing_time;
117 			blk->rcq_local_recipient_pid =
118 			    val->rcv_action_recip_pid;
119 		}
120 		if (flags & RBX_CTL) {
121 			blk->rcq_global_flagaction = rde->rcd_flagaction;
122 			blk->rcq_global_syslog_level = rde->rcd_syslog_level;
123 		}
124 	}
125 }
126 
127 /*
128  * int rctl_invalid_value(rctl_dict_entry_t *, rctl_val_t *)
129  *
130  * Overview
131  *   Perform basic validation of proposed new resource control value against the
132  *   global properties set on the control.  Any system call operation presented
133  *   with an invalid resource control value should return -1 and set errno to
134  *   EINVAL.
135  *
136  * Return values
137  *   0 if valid, 1 if invalid.
138  *
139  * Caller's context
140  *   No restriction on context.
141  */
142 int
143 rctl_invalid_value(rctl_dict_entry_t *rde, rctl_val_t *rval)
144 {
145 	rctl_val_t *sys_rval;
146 
147 	if (rval->rcv_privilege != RCPRIV_BASIC &&
148 	    rval->rcv_privilege != RCPRIV_PRIVILEGED &&
149 	    rval->rcv_privilege != RCPRIV_SYSTEM)
150 		return (1);
151 
152 	if (rval->rcv_flagaction & ~RCTL_LOCAL_MASK)
153 		return (1);
154 
155 	if (rval->rcv_privilege == RCPRIV_BASIC &&
156 	    (rde->rcd_flagaction & RCTL_GLOBAL_NOBASIC) != 0)
157 		return (1);
158 
159 	if ((rval->rcv_flagaction & RCTL_LOCAL_DENY) == 0 &&
160 	    (rde->rcd_flagaction & RCTL_GLOBAL_DENY_ALWAYS) != 0)
161 		return (1);
162 
163 	if ((rval->rcv_flagaction & RCTL_LOCAL_DENY) &&
164 	    (rde->rcd_flagaction & RCTL_GLOBAL_DENY_NEVER))
165 		return (1);
166 
167 	if ((rval->rcv_flagaction & RCTL_LOCAL_SIGNAL) &&
168 	    (rde->rcd_flagaction & RCTL_GLOBAL_SIGNAL_NEVER))
169 		return (1);
170 
171 	if ((rval->rcv_flagaction & RCTL_LOCAL_SIGNAL) &&
172 	    rval->rcv_action_signal == 0)
173 		return (1);
174 
175 	if (rval->rcv_action_signal == SIGXCPU &&
176 	    (rde->rcd_flagaction & RCTL_GLOBAL_CPU_TIME) == 0)
177 		return (1);
178 	else if (rval->rcv_action_signal == SIGXFSZ &&
179 	    (rde->rcd_flagaction & RCTL_GLOBAL_FILE_SIZE) == 0)
180 		return (1);
181 	else if (rval->rcv_action_signal != SIGHUP &&
182 	    rval->rcv_action_signal != SIGABRT &&
183 	    rval->rcv_action_signal != SIGKILL &&
184 	    rval->rcv_action_signal != SIGTERM &&
185 	    rval->rcv_action_signal != SIGSTOP &&
186 	    rval->rcv_action_signal != SIGXCPU &&
187 	    rval->rcv_action_signal != SIGXFSZ &&
188 	    rval->rcv_action_signal != SIGXRES &&
189 	    rval->rcv_action_signal != 0)	/* That is, no signal is ok. */
190 		return (1);
191 
192 	sys_rval = rde->rcd_default_value;
193 	while (sys_rval->rcv_privilege != RCPRIV_SYSTEM)
194 		sys_rval = sys_rval->rcv_next;
195 
196 	if (rval->rcv_value > sys_rval->rcv_value)
197 		return (1);
198 
199 	return (0);
200 }
201 
202 /*
203  * static long rctlsys_get(char *name, rctl_opaque_t *old_rblk,
204  *   rctl_opaque_t *new_rblk, int flags)
205  *
206  * Overview
207  *   rctlsys_get() is the implementation of the core logic of getrctl(2), the
208  *   public system call for fetching resource control values.  Three mutually
209  *   exclusive flag values are supported: RCTL_USAGE, RCTL_FIRST and RCTL_NEXT.
210  *   When RCTL_USAGE is presented, the current usage for the resource control
211  *   is returned in new_blk if the resource control provides an implementation
212  *   of the usage operation.  When RCTL_FIRST is presented, the value of
213  *   old_rblk is ignored, and the first value in the resource control value
214  *   sequence for the named control is transformed and placed in the user
215  *   memory location at new_rblk.  In the RCTL_NEXT case, the value of old_rblk
216  *   is examined, and the next value in the sequence is transformed and placed
217  *   at new_rblk.
218  */
219 static long
220 rctlsys_get(char *name, rctl_opaque_t *old_rblk, rctl_opaque_t *new_rblk,
221     int flags)
222 {
223 	rctl_val_t *nval;
224 	rctl_opaque_t *nblk;
225 	rctl_hndl_t hndl;
226 	char *kname;
227 	size_t klen;
228 	rctl_dict_entry_t *krde;
229 	int ret;
230 	int action = flags & (~RCTLSYS_ACTION_MASK);
231 
232 	if (flags & (~RCTLSYS_MASK))
233 		return (set_errno(EINVAL));
234 
235 	if (action != RCTL_FIRST && action != RCTL_NEXT &&
236 	    action != RCTL_USAGE)
237 		return (set_errno(EINVAL));
238 
239 	if (new_rblk == NULL || name == NULL)
240 		return (set_errno(EFAULT));
241 
242 	kname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
243 	krde = kmem_alloc(sizeof (rctl_dict_entry_t), KM_SLEEP);
244 
245 	if (copyinstr(name, kname, MAXPATHLEN, &klen) != 0) {
246 		kmem_free(kname, MAXPATHLEN);
247 		kmem_free(krde, sizeof (rctl_dict_entry_t));
248 		return (set_errno(EFAULT));
249 	}
250 
251 	if ((hndl = rctl_hndl_lookup(kname)) == -1) {
252 		kmem_free(kname, MAXPATHLEN);
253 		kmem_free(krde, sizeof (rctl_dict_entry_t));
254 		return (set_errno(EINVAL));
255 	}
256 
257 	if (rctl_global_get(kname, krde) == -1) {
258 		kmem_free(kname, MAXPATHLEN);
259 		kmem_free(krde, sizeof (rctl_dict_entry_t));
260 		return (set_errno(ESRCH));
261 	}
262 
263 	kmem_free(kname, MAXPATHLEN);
264 
265 	if (action != RCTL_USAGE)
266 		nval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
267 
268 	if (action == RCTL_USAGE) {
269 		rctl_set_t *rset;
270 		rctl_t *rctl;
271 		rctl_qty_t usage;
272 
273 		mutex_enter(&curproc->p_lock);
274 		if ((rset = rctl_entity_obtain_rset(krde, curproc)) == NULL) {
275 			mutex_exit(&curproc->p_lock);
276 			kmem_free(krde, sizeof (rctl_dict_entry_t));
277 			return (set_errno(ESRCH));
278 		}
279 		mutex_enter(&rset->rcs_lock);
280 		if (rctl_set_find(rset, hndl, &rctl) == -1) {
281 			mutex_exit(&rset->rcs_lock);
282 			mutex_exit(&curproc->p_lock);
283 			kmem_free(krde, sizeof (rctl_dict_entry_t));
284 			return (set_errno(ESRCH));
285 		}
286 		if (RCTLOP_NO_USAGE(rctl)) {
287 			mutex_exit(&rset->rcs_lock);
288 			mutex_exit(&curproc->p_lock);
289 			kmem_free(krde, sizeof (rctl_dict_entry_t));
290 			return (set_errno(ENOTSUP));
291 		}
292 		usage = RCTLOP_GET_USAGE(rctl, curproc);
293 		mutex_exit(&rset->rcs_lock);
294 		mutex_exit(&curproc->p_lock);
295 
296 		nblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP);
297 		bzero(nblk, sizeof (rctl_opaque_t));
298 		nblk->rcq_value = usage;
299 
300 		ret = copyout(nblk, new_rblk, sizeof (rctl_opaque_t));
301 		kmem_free(nblk, sizeof (rctl_opaque_t));
302 		kmem_free(krde, sizeof (rctl_dict_entry_t));
303 		return (ret == 0 ? 0 : set_errno(EFAULT));
304 	} else if (action == RCTL_FIRST) {
305 
306 		mutex_enter(&curproc->p_lock);
307 		if (ret = rctl_local_get(hndl, NULL, nval, curproc)) {
308 			mutex_exit(&curproc->p_lock);
309 			kmem_cache_free(rctl_val_cache, nval);
310 			kmem_free(krde, sizeof (rctl_dict_entry_t));
311 			return (set_errno(ret));
312 		}
313 		mutex_exit(&curproc->p_lock);
314 	} else {
315 		/*
316 		 * RCTL_NEXT
317 		 */
318 		rctl_val_t *oval;
319 		rctl_opaque_t *oblk;
320 
321 		oblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP);
322 
323 		if (copyin(old_rblk, oblk, sizeof (rctl_opaque_t)) == -1) {
324 			kmem_cache_free(rctl_val_cache, nval);
325 			kmem_free(oblk, sizeof (rctl_opaque_t));
326 			kmem_free(krde, sizeof (rctl_dict_entry_t));
327 			return (set_errno(EFAULT));
328 		}
329 
330 		oval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
331 
332 		rctlsys_rblk_xfrm(oblk, NULL, oval, RBX_FROM_BLK | RBX_VAL);
333 		mutex_enter(&curproc->p_lock);
334 		ret = rctl_local_get(hndl, oval, nval, curproc);
335 		mutex_exit(&curproc->p_lock);
336 
337 		kmem_cache_free(rctl_val_cache, oval);
338 		kmem_free(oblk, sizeof (rctl_opaque_t));
339 
340 		if (ret != 0) {
341 			kmem_cache_free(rctl_val_cache, nval);
342 			kmem_free(krde, sizeof (rctl_dict_entry_t));
343 			return (set_errno(ret));
344 		}
345 	}
346 
347 	nblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP);
348 
349 	rctlsys_rblk_xfrm(nblk, krde, nval, RBX_TO_BLK | RBX_VAL | RBX_CTL);
350 
351 	kmem_free(krde, sizeof (rctl_dict_entry_t));
352 	kmem_cache_free(rctl_val_cache, nval);
353 
354 	if (copyout(nblk, new_rblk, sizeof (rctl_opaque_t)) == -1) {
355 		kmem_free(nblk, sizeof (rctl_opaque_t));
356 		return (set_errno(EFAULT));
357 	}
358 
359 	kmem_free(nblk, sizeof (rctl_opaque_t));
360 
361 	return (0);
362 }
363 
364 /*
365  * static long rctlsys_set(char *name, rctl_opaque_t *old_rblk,
366  *   rctl_opaque_t *new_rblk, int flags)
367  *
368  * Overview
369  *   rctlsys_set() is the implementation of the core login of setrctl(2), which
370  *   allows the establishment of resource control values.  Flags may take on any
371  *   of three exclusive values:  RCTL_INSERT, RCTL_DELETE, and RCTL_REPLACE.
372  *   RCTL_INSERT ignores old_rblk and inserts the value in the appropriate
373  *   position in the ordered sequence of resource control values.  RCTL_DELETE
374  *   ignores old_rblk and deletes the first resource control value matching
375  *   (value, priority) in the given resource block.  If no matching value is
376  *   found, -1 is returned and errno is set to ENOENT.  Finally, in the case of
377  *   RCTL_REPLACE, old_rblk is used to match (value, priority); the matching
378  *   resource control value in the sequence is replaced with the contents of
379  *   new_rblk.  Again, if no match is found, -1 is returned and errno is set to
380  *   ENOENT.
381  *
382  *   rctlsys_set() causes a cursor test, which can reactivate resource controls
383  *   that have previously fired.
384  */
385 static long
386 rctlsys_set(char *name, rctl_opaque_t *old_rblk, rctl_opaque_t *new_rblk,
387     int flags)
388 {
389 	rctl_val_t *nval;
390 	rctl_dict_entry_t *rde;
391 	rctl_opaque_t *nblk;
392 	rctl_hndl_t hndl;
393 	char *kname;
394 	size_t klen;
395 	long ret = 0;
396 	proc_t *pp = NULL;
397 	pid_t pid;
398 	int action = flags & (~RCTLSYS_ACTION_MASK);
399 	rctl_val_t *oval;
400 	rctl_val_t *rval1;
401 	rctl_val_t *rval2;
402 	rctl_val_t *tval;
403 	rctl_opaque_t *oblk;
404 
405 	if (flags & (~RCTLSYS_MASK))
406 		return (set_errno(EINVAL));
407 
408 	if (action != RCTL_INSERT &&
409 	    action != RCTL_DELETE &&
410 	    action != RCTL_REPLACE)
411 		return (set_errno(EINVAL));
412 
413 	if (new_rblk == NULL || name == NULL)
414 		return (set_errno(EFAULT));
415 
416 	kname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
417 	if (copyinstr(name, kname, MAXPATHLEN, &klen) != 0) {
418 		kmem_free(kname, MAXPATHLEN);
419 		return (set_errno(EFAULT));
420 	}
421 
422 	if ((hndl = rctl_hndl_lookup(kname)) == -1) {
423 		kmem_free(kname, MAXPATHLEN);
424 		return (set_errno(EINVAL));
425 	}
426 
427 	kmem_free(kname, MAXPATHLEN);
428 
429 	rde = rctl_dict_lookup_hndl(hndl);
430 
431 	nblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP);
432 
433 	if (copyin(new_rblk, nblk, sizeof (rctl_opaque_t)) == -1) {
434 		kmem_free(nblk, sizeof (rctl_opaque_t));
435 		return (set_errno(EFAULT));
436 	}
437 
438 	nval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
439 
440 	rctlsys_rblk_xfrm(nblk, NULL, nval, RBX_FROM_BLK | RBX_VAL);
441 
442 	if (rctl_invalid_value(rde, nval)) {
443 		kmem_free(nblk, sizeof (rctl_opaque_t));
444 		kmem_cache_free(rctl_val_cache, nval);
445 		return (set_errno(EINVAL));
446 	}
447 
448 	/* allocate what we might need before potentially grabbing p_lock */
449 	oblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP);
450 	oval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
451 	rval1 = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
452 	rval2 = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
453 
454 	if (nval->rcv_privilege == RCPRIV_BASIC) {
455 		if (flags & RCTL_USE_RECIPIENT_PID) {
456 			pid = nval->rcv_action_recip_pid;
457 
458 			/* case for manipulating rctl values on other procs */
459 			if (pid != curproc->p_pid) {
460 				/* cannot be other pid on process rctls */
461 				if (rde->rcd_entity == RCENTITY_PROCESS) {
462 					ret = set_errno(EINVAL);
463 					goto rctlsys_out;
464 				}
465 				/*
466 				 * must have privilege to manipulate controls
467 				 * on other processes
468 				 */
469 				if (secpolicy_rctlsys(CRED(), B_FALSE) != 0) {
470 					ret = set_errno(EACCES);
471 					goto rctlsys_out;
472 				}
473 
474 				pid = nval->rcv_action_recip_pid;
475 				mutex_enter(&pidlock);
476 				pp = prfind(pid);
477 				if (!pp) {
478 					mutex_exit(&pidlock);
479 					ret = set_errno(ESRCH);
480 					goto rctlsys_out;
481 				}
482 
483 				/*
484 				 * idle or zombie procs have either not yet
485 				 * set up their rctls or have already done
486 				 * their rctl_set_tearoff's.
487 				 */
488 				if (pp->p_stat == SZOMB ||
489 				    pp->p_stat == SIDL) {
490 					mutex_exit(&pidlock);
491 					ret = set_errno(ESRCH);
492 					goto rctlsys_out;
493 				}
494 
495 				/*
496 				 * hold this pp's p_lock to ensure that
497 				 * it does not do it's rctl_set_tearoff
498 				 * If we did not do this, we could
499 				 * potentially add rctls to the entity
500 				 * with a recipient that is a process
501 				 * that has exited.
502 				 */
503 				mutex_enter(&pp->p_lock);
504 				mutex_exit(&pidlock);
505 
506 				/*
507 				 * We know that curproc's task, project,
508 				 * and zone pointers will not change
509 				 * because functions that change them
510 				 * call holdlwps(SHOLDFORK1) first.
511 				 */
512 
513 				/*
514 				 * verify that the found pp is in the
515 				 * current task.  If it is, then it
516 				 * is also within the current project
517 				 * and zone.
518 				 */
519 				if (rde->rcd_entity == RCENTITY_TASK &&
520 				    pp->p_task != curproc->p_task) {
521 					ret = set_errno(ESRCH);
522 					goto rctlsys_out;
523 				}
524 
525 				ASSERT(pp->p_task->tk_proj ==
526 				    curproc->p_task->tk_proj);
527 				ASSERT(pp->p_zone == curproc->p_zone);
528 
529 
530 				nval->rcv_action_recipient = pp;
531 				nval->rcv_action_recip_pid = pid;
532 
533 			} else {
534 				/* for manipulating rctl values on this proc */
535 				mutex_enter(&curproc->p_lock);
536 				pp = curproc;
537 				nval->rcv_action_recipient = curproc;
538 				nval->rcv_action_recip_pid = curproc->p_pid;
539 			}
540 
541 		} else {
542 			/* RCTL_USE_RECIPIENT_PID not set, use this proc */
543 			mutex_enter(&curproc->p_lock);
544 			pp = curproc;
545 			nval->rcv_action_recipient = curproc;
546 			nval->rcv_action_recip_pid = curproc->p_pid;
547 		}
548 
549 	} else {
550 		/* privileged controls have no recipient pid */
551 		mutex_enter(&curproc->p_lock);
552 		pp = curproc;
553 		nval->rcv_action_recipient = NULL;
554 		nval->rcv_action_recip_pid = -1;
555 	}
556 
557 	nval->rcv_firing_time = 0;
558 
559 	if (action == RCTL_REPLACE) {
560 
561 		if (copyin(old_rblk, oblk, sizeof (rctl_opaque_t)) == -1) {
562 			ret = set_errno(EFAULT);
563 			goto rctlsys_out;
564 		}
565 
566 		rctlsys_rblk_xfrm(oblk, NULL, oval, RBX_FROM_BLK | RBX_VAL);
567 
568 		if (rctl_invalid_value(rde, oval)) {
569 			ret = set_errno(EINVAL);
570 			goto rctlsys_out;
571 		}
572 
573 		if (oval->rcv_privilege == RCPRIV_BASIC) {
574 			if (!(flags & RCTL_USE_RECIPIENT_PID)) {
575 				oval->rcv_action_recipient = curproc;
576 				oval->rcv_action_recip_pid = curproc->p_pid;
577 			}
578 		} else {
579 			oval->rcv_action_recipient = NULL;
580 			oval->rcv_action_recip_pid = -1;
581 		}
582 
583 		/*
584 		 * Find the real value we're attempting to replace on the
585 		 * sequence, rather than trusting the one delivered from
586 		 * userland.
587 		 */
588 		if (ret = rctl_local_get(hndl, NULL, rval1, pp)) {
589 			(void) set_errno(ret);
590 			goto rctlsys_out;
591 		}
592 
593 		do {
594 			if (rval1->rcv_privilege == RCPRIV_SYSTEM ||
595 			    rctl_val_cmp(oval, rval1, 0) == 0)
596 				break;
597 
598 			tval = rval1;
599 			rval1 = rval2;
600 			rval2 = tval;
601 		} while (rctl_local_get(hndl, rval2, rval1, pp) == 0);
602 
603 		if (rval1->rcv_privilege == RCPRIV_SYSTEM) {
604 			if (rctl_val_cmp(oval, rval1, 1) == 0)
605 				ret = set_errno(EPERM);
606 			else
607 				ret = set_errno(ESRCH);
608 
609 			goto rctlsys_out;
610 		}
611 
612 		bcopy(rval1, oval, sizeof (rctl_val_t));
613 
614 		/*
615 		 * System controls are immutable.
616 		 */
617 		if (nval->rcv_privilege == RCPRIV_SYSTEM) {
618 			ret = set_errno(EPERM);
619 			goto rctlsys_out;
620 		}
621 
622 		/*
623 		 * Only privileged processes in the global zone can modify
624 		 * privileged rctls of type RCENTITY_ZONE; replacing privileged
625 		 * controls with basic ones are not allowed either.  Lowering a
626 		 * lowerable one might be OK for privileged processes in a
627 		 * non-global zone, but lowerable rctls probably don't make
628 		 * sense for zones (hence, not modifiable from within a zone).
629 		 */
630 		if (rde->rcd_entity == RCENTITY_ZONE &&
631 		    (nval->rcv_privilege == RCPRIV_PRIVILEGED ||
632 		    oval->rcv_privilege == RCPRIV_PRIVILEGED) &&
633 		    secpolicy_rctlsys(CRED(), B_TRUE) != 0) {
634 			ret = set_errno(EACCES);
635 			goto rctlsys_out;
636 		}
637 
638 		/*
639 		 * Must be privileged to replace a privileged control with
640 		 * a basic one.
641 		 */
642 		if (oval->rcv_privilege == RCPRIV_PRIVILEGED &&
643 		    nval->rcv_privilege != RCPRIV_PRIVILEGED &&
644 		    secpolicy_rctlsys(CRED(), B_FALSE) != 0) {
645 			ret = set_errno(EACCES);
646 			goto rctlsys_out;
647 		}
648 
649 		/*
650 		 * Must have lowerable global property for non-privileged
651 		 * to lower the value of a privileged control; otherwise must
652 		 * have sufficient privileges to modify privileged controls
653 		 * at all.
654 		 */
655 		if (oval->rcv_privilege == RCPRIV_PRIVILEGED &&
656 		    nval->rcv_privilege == RCPRIV_PRIVILEGED &&
657 		    ((((rde->rcd_flagaction & RCTL_GLOBAL_LOWERABLE) == 0) ||
658 		    oval->rcv_flagaction != nval->rcv_flagaction ||
659 		    oval->rcv_action_signal != nval->rcv_action_signal ||
660 		    oval->rcv_value < nval->rcv_value)) &&
661 		    secpolicy_rctlsys(CRED(), B_FALSE) != 0) {
662 			ret = set_errno(EACCES);
663 			goto rctlsys_out;
664 		}
665 
666 		if (ret = rctl_local_replace(hndl, oval, nval, pp)) {
667 			(void) set_errno(ret);
668 			goto rctlsys_out;
669 		}
670 
671 		/* ensure that nval is not freed */
672 		nval = NULL;
673 
674 	} else if (action == RCTL_INSERT) {
675 		/*
676 		 * System controls are immutable.
677 		 */
678 		if (nval->rcv_privilege == RCPRIV_SYSTEM) {
679 			ret = set_errno(EPERM);
680 			goto rctlsys_out;
681 		}
682 
683 		/*
684 		 * Only privileged processes in the global zone may add
685 		 * privileged zone.* rctls.  Only privileged processes
686 		 * may add other privileged rctls.
687 		 */
688 		if (nval->rcv_privilege == RCPRIV_PRIVILEGED) {
689 			if ((rde->rcd_entity == RCENTITY_ZONE &&
690 			    secpolicy_rctlsys(CRED(), B_TRUE) != 0) ||
691 			    (rde->rcd_entity != RCENTITY_ZONE &&
692 			    secpolicy_rctlsys(CRED(), B_FALSE) != 0)) {
693 				ret = set_errno(EACCES);
694 				goto rctlsys_out;
695 			}
696 		}
697 
698 		/*
699 		 * Only one basic control is allowed per rctl.
700 		 * If a basic control is being inserted, delete
701 		 * any other basic control.
702 		 */
703 		if ((nval->rcv_privilege == RCPRIV_BASIC) &&
704 		    (rctl_local_get(hndl, NULL, rval1, pp) == 0)) {
705 			do {
706 				if (rval1->rcv_privilege == RCPRIV_BASIC &&
707 				    rval1->rcv_action_recipient == curproc) {
708 					(void) rctl_local_delete(hndl, rval1,
709 					    pp);
710 					if (rctl_local_get(hndl, NULL, rval1,
711 					    pp) != 0)
712 						break;
713 				}
714 
715 				tval = rval1;
716 				rval1 = rval2;
717 				rval2 = tval;
718 			} while (rctl_local_get(hndl, rval2, rval1, pp)
719 			    == 0);
720 		}
721 
722 
723 		if (ret = rctl_local_insert(hndl, nval, pp)) {
724 			(void) set_errno(ret);
725 			goto rctlsys_out;
726 		}
727 
728 		/* ensure that nval is not freed */
729 		nval = NULL;
730 
731 	} else {
732 		/*
733 		 * RCTL_DELETE
734 		 */
735 		if (nval->rcv_privilege == RCPRIV_SYSTEM) {
736 			ret = set_errno(EPERM);
737 			goto rctlsys_out;
738 		}
739 
740 		if (nval->rcv_privilege == RCPRIV_PRIVILEGED) {
741 			if ((rde->rcd_entity == RCENTITY_ZONE &&
742 			    secpolicy_rctlsys(CRED(), B_TRUE) != 0) ||
743 			    (rde->rcd_entity != RCENTITY_ZONE &&
744 			    secpolicy_rctlsys(CRED(), B_FALSE) != 0)) {
745 				ret = set_errno(EACCES);
746 				goto rctlsys_out;
747 			}
748 		}
749 
750 		if (ret = rctl_local_delete(hndl, nval, pp)) {
751 			(void) set_errno(ret);
752 			goto rctlsys_out;
753 		}
754 	}
755 
756 rctlsys_out:
757 
758 	if (pp)
759 		mutex_exit(&pp->p_lock);
760 
761 	kmem_free(nblk, sizeof (rctl_opaque_t));
762 	kmem_free(oblk, sizeof (rctl_opaque_t));
763 
764 	/* only free nval if we did not rctl_local_insert it */
765 	if (nval)
766 		kmem_cache_free(rctl_val_cache, nval);
767 
768 	kmem_cache_free(rctl_val_cache, oval);
769 	kmem_cache_free(rctl_val_cache, rval1);
770 	kmem_cache_free(rctl_val_cache, rval2);
771 
772 	return (ret);
773 }
774 
775 static long
776 rctlsys_lst(char *ubuf, size_t ubufsz)
777 {
778 	char *kbuf;
779 	size_t kbufsz;
780 
781 	kbufsz = rctl_build_name_buf(&kbuf);
782 
783 	if (kbufsz <= ubufsz &&
784 	    copyout(kbuf, ubuf, kbufsz) != 0) {
785 		kmem_free(kbuf, kbufsz);
786 		return (set_errno(EFAULT));
787 	}
788 
789 	kmem_free(kbuf, kbufsz);
790 
791 	return (kbufsz);
792 }
793 
794 static long
795 rctlsys_ctl(char *name, rctl_opaque_t *rblk, int flags)
796 {
797 	rctl_dict_entry_t *krde;
798 	rctl_opaque_t *krblk;
799 	char *kname;
800 	size_t klen;
801 
802 	kname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
803 
804 	if (name == NULL || copyinstr(name, kname, MAXPATHLEN, &klen) != 0) {
805 		kmem_free(kname, MAXPATHLEN);
806 		return (set_errno(EFAULT));
807 	}
808 
809 	switch (flags) {
810 	case RCTLCTL_GET:
811 		krde = kmem_alloc(sizeof (rctl_dict_entry_t), KM_SLEEP);
812 		krblk = kmem_zalloc(sizeof (rctl_opaque_t), KM_SLEEP);
813 
814 		if (rctl_global_get(kname, krde) == -1) {
815 			kmem_free(krde, sizeof (rctl_dict_entry_t));
816 			kmem_free(krblk, sizeof (rctl_opaque_t));
817 			kmem_free(kname, MAXPATHLEN);
818 			return (set_errno(ESRCH));
819 		}
820 
821 		rctlsys_rblk_xfrm(krblk, krde, NULL, RBX_TO_BLK | RBX_CTL);
822 
823 		if (copyout(krblk, rblk, sizeof (rctl_opaque_t)) != 0) {
824 			kmem_free(krde, sizeof (rctl_dict_entry_t));
825 			kmem_free(krblk, sizeof (rctl_opaque_t));
826 			kmem_free(kname, MAXPATHLEN);
827 			return (set_errno(EFAULT));
828 		}
829 
830 		kmem_free(krde, sizeof (rctl_dict_entry_t));
831 		kmem_free(krblk, sizeof (rctl_opaque_t));
832 		kmem_free(kname, MAXPATHLEN);
833 		break;
834 	case RCTLCTL_SET:
835 		if (secpolicy_rctlsys(CRED(), B_TRUE) != 0) {
836 			kmem_free(kname, MAXPATHLEN);
837 			return (set_errno(EPERM));
838 		}
839 
840 		krde = kmem_alloc(sizeof (rctl_dict_entry_t), KM_SLEEP);
841 		krblk = kmem_zalloc(sizeof (rctl_opaque_t), KM_SLEEP);
842 
843 		if (rctl_global_get(kname, krde) == -1) {
844 			kmem_free(krde, sizeof (rctl_dict_entry_t));
845 			kmem_free(krblk, sizeof (rctl_opaque_t));
846 			kmem_free(kname, MAXPATHLEN);
847 			return (set_errno(ESRCH));
848 		}
849 
850 		if (copyin(rblk, krblk, sizeof (rctl_opaque_t)) != 0) {
851 			kmem_free(krde, sizeof (rctl_dict_entry_t));
852 			kmem_free(krblk, sizeof (rctl_opaque_t));
853 			kmem_free(kname, MAXPATHLEN);
854 			return (set_errno(EFAULT));
855 		}
856 
857 		rctlsys_rblk_xfrm(krblk, krde, NULL, RBX_FROM_BLK | RBX_CTL);
858 
859 		if (rctl_global_set(kname, krde) == -1) {
860 			kmem_free(krde, sizeof (rctl_dict_entry_t));
861 			kmem_free(krblk, sizeof (rctl_opaque_t));
862 			kmem_free(kname, MAXPATHLEN);
863 			return (set_errno(ESRCH));
864 		}
865 
866 		kmem_free(krde, sizeof (rctl_dict_entry_t));
867 		kmem_free(krblk, sizeof (rctl_opaque_t));
868 		kmem_free(kname, MAXPATHLEN);
869 
870 		break;
871 	default:
872 		kmem_free(kname, MAXPATHLEN);
873 		return (set_errno(EINVAL));
874 	}
875 
876 	return (0);
877 }
878 
879 /*
880  * The arbitrary maximum number of rctl_opaque_t that we can pass to
881  * rctl_projset().
882  */
883 #define	RCTL_PROJSET_MAXSIZE	1024
884 
885 static long
886 rctlsys_projset(char *name, rctl_opaque_t *rblk, size_t size, int flags)
887 {
888 	rctl_dict_entry_t *krde;
889 	rctl_opaque_t *krblk;
890 	char *kname;
891 	size_t klen;
892 	rctl_hndl_t hndl;
893 	rctl_val_t *new_values = NULL;
894 	rctl_val_t *alloc_values = NULL;
895 	rctl_val_t *new_val;
896 	rctl_val_t *alloc_val;
897 	int error = 0;
898 	int count;
899 
900 	kname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
901 
902 	if (name == NULL || copyinstr(name, kname, MAXPATHLEN, &klen) != 0) {
903 		kmem_free(kname, MAXPATHLEN);
904 		return (set_errno(EFAULT));
905 	}
906 
907 	if (size > RCTL_PROJSET_MAXSIZE) {
908 		kmem_free(kname, MAXPATHLEN);
909 		return (set_errno(EINVAL));
910 	}
911 
912 	if ((hndl = rctl_hndl_lookup(kname)) == -1) {
913 		kmem_free(kname, MAXPATHLEN);
914 		return (set_errno(EINVAL));
915 	}
916 
917 	krde = rctl_dict_lookup_hndl(hndl);
918 
919 	/* If not a project entity then exit */
920 	if ((krde->rcd_entity != RCENTITY_PROJECT) || (size <= 0)) {
921 		kmem_free(kname, MAXPATHLEN);
922 		return (set_errno(EINVAL));
923 	}
924 
925 	if (secpolicy_rctlsys(CRED(), B_FALSE) != 0) {
926 		kmem_free(kname, MAXPATHLEN);
927 		return (set_errno(EPERM));
928 	}
929 
930 	/* Allocate an array large enough for all resource control blocks */
931 	krblk = kmem_zalloc(sizeof (rctl_opaque_t) * size, KM_SLEEP);
932 
933 	if (copyin(rblk, krblk, sizeof (rctl_opaque_t) * size) == 0) {
934 
935 		for (count = 0; (count < size) && (error == 0); count++) {
936 			new_val = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
937 			alloc_val = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
938 
939 			rctlsys_rblk_xfrm(&krblk[count], NULL, new_val,
940 			    RBX_FROM_BLK | RBX_VAL);
941 
942 			/*
943 			 * Project entity resource control values should always
944 			 * be privileged
945 			 */
946 			if (new_val->rcv_privilege != RCPRIV_PRIVILEGED) {
947 				kmem_cache_free(rctl_val_cache, new_val);
948 				kmem_cache_free(rctl_val_cache, alloc_val);
949 
950 				error = EPERM;
951 			} else if (rctl_invalid_value(krde, new_val) == 0) {
952 
953 				/*
954 				 * This is a project entity; we do not set
955 				 * rcv_action_recipient or rcv_action_recip_pid
956 				 */
957 				new_val->rcv_action_recipient = NULL;
958 				new_val->rcv_action_recip_pid = -1;
959 				new_val->rcv_flagaction |= RCTL_LOCAL_PROJDB;
960 				new_val->rcv_firing_time = 0;
961 
962 				new_val->rcv_prev = NULL;
963 				new_val->rcv_next = new_values;
964 				new_values = new_val;
965 
966 				/*
967 				 * alloc_val is left largely uninitialized, it
968 				 * is a pre-allocated rctl_val_t which is used
969 				 * later in rctl_local_replace_all() /
970 				 * rctl_local_insert_all().
971 				 */
972 				alloc_val->rcv_prev = NULL;
973 				alloc_val->rcv_next = alloc_values;
974 				alloc_values = alloc_val;
975 			} else {
976 				kmem_cache_free(rctl_val_cache, new_val);
977 				kmem_cache_free(rctl_val_cache, alloc_val);
978 
979 				error = EINVAL;
980 			}
981 		}
982 
983 	} else {
984 		error = EFAULT;
985 	}
986 
987 	kmem_free(krblk, sizeof (rctl_opaque_t) * size);
988 	kmem_free(kname, MAXPATHLEN);
989 
990 	if (error) {
991 		/*
992 		 * We will have the same number of items in the alloc_values
993 		 * linked list, as we have in new_values.  However, we remain
994 		 * cautious, and teardown the linked lists individually.
995 		 */
996 		while (new_values != NULL) {
997 			new_val = new_values;
998 			new_values = new_values->rcv_next;
999 			kmem_cache_free(rctl_val_cache, new_val);
1000 		}
1001 
1002 		while (alloc_values != NULL) {
1003 			alloc_val = alloc_values;
1004 			alloc_values = alloc_values->rcv_next;
1005 			kmem_cache_free(rctl_val_cache, alloc_val);
1006 		}
1007 
1008 		return (set_errno(error));
1009 	}
1010 
1011 	/*
1012 	 * We take the p_lock here to maintain consistency with other functions
1013 	 * - rctlsys_get() and rctlsys_set()
1014 	 */
1015 	mutex_enter(&curproc->p_lock);
1016 	if (flags & TASK_PROJ_PURGE)  {
1017 		(void) rctl_local_replace_all(hndl, new_values, alloc_values,
1018 		    curproc);
1019 	} else {
1020 		(void) rctl_local_insert_all(hndl, new_values, alloc_values,
1021 		    curproc);
1022 	}
1023 	mutex_exit(&curproc->p_lock);
1024 
1025 	return (0);
1026 }
1027 
1028 long
1029 rctlsys(int code, char *name, void *obuf, void *nbuf, size_t obufsz, int flags)
1030 {
1031 	switch (code) {
1032 	case 0:
1033 		return (rctlsys_get(name, obuf, nbuf, flags));
1034 
1035 	case 1:
1036 		return (rctlsys_set(name, obuf, nbuf, flags));
1037 
1038 	case 2:
1039 		/*
1040 		 * Private call for rctl_walk(3C).
1041 		 */
1042 		return (rctlsys_lst(obuf, obufsz));
1043 
1044 	case 3:
1045 		/*
1046 		 * Private code for rctladm(1M):  "rctlctl".
1047 		 */
1048 		return (rctlsys_ctl(name, obuf, flags));
1049 	case 4:
1050 		/*
1051 		 * Private code for setproject(3PROJECT).
1052 		 */
1053 		return (rctlsys_projset(name, nbuf, obufsz, flags));
1054 
1055 	default:
1056 		return (set_errno(EINVAL));
1057 	}
1058 }
1059