xref: /illumos-gate/usr/src/uts/common/syscall/rctlsys.c (revision ddb365bfc9e868ad24ccdcb0dc91af18b10df082)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 
28 #include <sys/cmn_err.h>
29 #include <sys/cred.h>
30 #include <sys/errno.h>
31 #include <sys/rctl.h>
32 #include <sys/rctl_impl.h>
33 #include <sys/strlog.h>
34 #include <sys/syslog.h>
35 #include <sys/sysmacros.h>
36 #include <sys/systm.h>
37 #include <sys/policy.h>
38 #include <sys/proc.h>
39 #include <sys/task.h>
40 
41 /*
42  * setrctl(2), getrctl(2), and private rctlsys(2*) system calls
43  *
44  * Resource control block (rctlblk_ptr_t, rctl_opaque_t)
45  *   The resource control system call interfaces present the resource control
46  *   values and flags via the resource control block abstraction, made manifest
47  *   via an opaque data type with strict type definitions.  Keeping the formal
48  *   definitions in the rcontrol block allows us to be clever in the kernel,
49  *   combining attributes where appropriate in the current implementation while
50  *   preserving binary compatibility in the face of implementation changes.
51  */
52 
53 #define	RBX_TO_BLK	0x1
54 #define	RBX_FROM_BLK	0x2
55 #define	RBX_VAL		0x4
56 #define	RBX_CTL		0x8
57 
58 static void
59 rctlsys_rblk_xfrm(rctl_opaque_t *blk, rctl_dict_entry_t *rde,
60     rctl_val_t *val, int flags)
61 {
62 	if (flags & RBX_FROM_BLK) {
63 		if (flags & RBX_VAL) {
64 			/*
65 			 * Firing time cannot be set.
66 			 */
67 			val->rcv_privilege = blk->rcq_privilege;
68 			val->rcv_value = blk->rcq_value;
69 			val->rcv_flagaction = blk->rcq_local_flagaction;
70 			val->rcv_action_signal = blk->rcq_local_signal;
71 			val->rcv_action_recip_pid =
72 			    blk->rcq_local_recipient_pid;
73 		}
74 		if (flags & RBX_CTL) {
75 			rde->rcd_flagaction = blk->rcq_global_flagaction;
76 			rde->rcd_syslog_level = blk->rcq_global_syslog_level;
77 
78 			/*
79 			 * Because the strlog() interface supports fewer options
80 			 * than are made available via the syslog() interface to
81 			 * userland, we map the syslog level down to a smaller
82 			 * set of distinct logging behaviours.
83 			 */
84 			rde->rcd_strlog_flags = 0;
85 			switch (blk->rcq_global_syslog_level) {
86 				case LOG_EMERG:
87 				case LOG_ALERT:
88 				case LOG_CRIT:
89 					rde->rcd_strlog_flags |= SL_CONSOLE;
90 					/*FALLTHROUGH*/
91 				case LOG_ERR:
92 					rde->rcd_strlog_flags |= SL_ERROR;
93 					/*FALLTHROUGH*/
94 				case LOG_WARNING:
95 					rde->rcd_strlog_flags |= SL_WARN;
96 					break;
97 				case LOG_NOTICE:
98 					rde->rcd_strlog_flags |= SL_CONSOLE;
99 					/*FALLTHROUGH*/
100 				case LOG_INFO:	/* informational */
101 				case LOG_DEBUG:	/* debug-level messages */
102 				default:
103 					rde->rcd_strlog_flags |= SL_NOTE;
104 					break;
105 			}
106 		}
107 	} else {
108 		bzero(blk,  sizeof (rctl_opaque_t));
109 		if (flags & RBX_VAL) {
110 			blk->rcq_privilege = val->rcv_privilege;
111 			blk->rcq_value = val->rcv_value;
112 			blk->rcq_enforced_value = rctl_model_value(rde,
113 			    curproc, val->rcv_value);
114 			blk->rcq_local_flagaction = val->rcv_flagaction;
115 			blk->rcq_local_signal = val->rcv_action_signal;
116 			blk->rcq_firing_time = val->rcv_firing_time;
117 			blk->rcq_local_recipient_pid =
118 			    val->rcv_action_recip_pid;
119 		}
120 		if (flags & RBX_CTL) {
121 			blk->rcq_global_flagaction = rde->rcd_flagaction;
122 			blk->rcq_global_syslog_level = rde->rcd_syslog_level;
123 		}
124 	}
125 }
126 
127 /*
128  * int rctl_invalid_value(rctl_dict_entry_t *, rctl_val_t *)
129  *
130  * Overview
131  *   Perform basic validation of proposed new resource control value against the
132  *   global properties set on the control.  Any system call operation presented
133  *   with an invalid resource control value should return -1 and set errno to
134  *   EINVAL.
135  *
136  * Return values
137  *   0 if valid, 1 if invalid.
138  *
139  * Caller's context
140  *   No restriction on context.
141  */
142 int
143 rctl_invalid_value(rctl_dict_entry_t *rde, rctl_val_t *rval)
144 {
145 	rctl_val_t *sys_rval;
146 
147 	if (rval->rcv_privilege != RCPRIV_BASIC &&
148 	    rval->rcv_privilege != RCPRIV_PRIVILEGED &&
149 	    rval->rcv_privilege != RCPRIV_SYSTEM)
150 		return (1);
151 
152 	if (rval->rcv_flagaction & ~RCTL_LOCAL_MASK)
153 		return (1);
154 
155 	if (rval->rcv_privilege == RCPRIV_BASIC &&
156 	    (rde->rcd_flagaction & RCTL_GLOBAL_NOBASIC) != 0)
157 		return (1);
158 
159 	if ((rval->rcv_flagaction & RCTL_LOCAL_DENY) == 0 &&
160 	    (rde->rcd_flagaction & RCTL_GLOBAL_DENY_ALWAYS) != 0)
161 		return (1);
162 
163 	if ((rval->rcv_flagaction & RCTL_LOCAL_DENY) &&
164 	    (rde->rcd_flagaction & RCTL_GLOBAL_DENY_NEVER))
165 		return (1);
166 
167 	if ((rval->rcv_flagaction & RCTL_LOCAL_SIGNAL) &&
168 	    (rde->rcd_flagaction & RCTL_GLOBAL_SIGNAL_NEVER))
169 		return (1);
170 
171 	if ((rval->rcv_flagaction & RCTL_LOCAL_SIGNAL) &&
172 	    rval->rcv_action_signal == 0)
173 		return (1);
174 
175 	if (rval->rcv_action_signal == SIGXCPU &&
176 	    (rde->rcd_flagaction & RCTL_GLOBAL_CPU_TIME) == 0)
177 		return (1);
178 	else if (rval->rcv_action_signal == SIGXFSZ &&
179 	    (rde->rcd_flagaction & RCTL_GLOBAL_FILE_SIZE) == 0)
180 		return (1);
181 	else if (rval->rcv_action_signal != SIGHUP &&
182 	    rval->rcv_action_signal != SIGABRT &&
183 	    rval->rcv_action_signal != SIGKILL &&
184 	    rval->rcv_action_signal != SIGTERM &&
185 	    rval->rcv_action_signal != SIGSTOP &&
186 	    rval->rcv_action_signal != SIGXCPU &&
187 	    rval->rcv_action_signal != SIGXFSZ &&
188 	    rval->rcv_action_signal != SIGXRES &&
189 	    rval->rcv_action_signal != 0)	/* That is, no signal is ok. */
190 		return (1);
191 
192 	sys_rval = rde->rcd_default_value;
193 	while (sys_rval->rcv_privilege != RCPRIV_SYSTEM)
194 		sys_rval = sys_rval->rcv_next;
195 
196 	if (rval->rcv_value > sys_rval->rcv_value)
197 		return (1);
198 
199 	return (0);
200 }
201 
202 /*
203  * static long rctlsys_get(char *name, rctl_opaque_t *old_rblk,
204  *   rctl_opaque_t *new_rblk, int flags)
205  *
206  * Overview
207  *   rctlsys_get() is the implementation of the core logic of getrctl(2), the
208  *   public system call for fetching resource control values.  Three mutually
209  *   exclusive flag values are supported: RCTL_USAGE, RCTL_FIRST and RCTL_NEXT.
210  *   When RCTL_USAGE is presented, the current usage for the resource control
211  *   is returned in new_blk if the resource control provides an implementation
212  *   of the usage operation.  When RCTL_FIRST is presented, the value of
213  *   old_rblk is ignored, and the first value in the resource control value
214  *   sequence for the named control is transformed and placed in the user
215  *   memory location at new_rblk.  In the RCTL_NEXT case, the value of old_rblk
216  *   is examined, and the next value in the sequence is transformed and placed
217  *   at new_rblk.
218  */
219 static long
220 rctlsys_get(char *name, rctl_opaque_t *old_rblk, rctl_opaque_t *new_rblk,
221     int flags)
222 {
223 	rctl_val_t *nval;
224 	rctl_opaque_t *nblk;
225 	rctl_hndl_t hndl;
226 	char *kname;
227 	size_t klen;
228 	rctl_dict_entry_t *krde;
229 	int ret;
230 	int action = flags & (~RCTLSYS_ACTION_MASK);
231 
232 	if (flags & (~RCTLSYS_MASK))
233 		return (set_errno(EINVAL));
234 
235 	if (action != RCTL_FIRST && action != RCTL_NEXT &&
236 	    action != RCTL_USAGE)
237 		return (set_errno(EINVAL));
238 
239 	if (new_rblk == NULL || name == NULL)
240 		return (set_errno(EFAULT));
241 
242 	kname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
243 	krde = kmem_alloc(sizeof (rctl_dict_entry_t), KM_SLEEP);
244 
245 	if (copyinstr(name, kname, MAXPATHLEN, &klen) != 0) {
246 		kmem_free(kname, MAXPATHLEN);
247 		kmem_free(krde, sizeof (rctl_dict_entry_t));
248 		return (set_errno(EFAULT));
249 	}
250 
251 	if ((hndl = rctl_hndl_lookup(kname)) == -1) {
252 		kmem_free(kname, MAXPATHLEN);
253 		kmem_free(krde, sizeof (rctl_dict_entry_t));
254 		return (set_errno(EINVAL));
255 	}
256 
257 	if (rctl_global_get(kname, krde) == -1) {
258 		kmem_free(kname, MAXPATHLEN);
259 		kmem_free(krde, sizeof (rctl_dict_entry_t));
260 		return (set_errno(ESRCH));
261 	}
262 
263 	kmem_free(kname, MAXPATHLEN);
264 
265 	if (action != RCTL_USAGE)
266 		nval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
267 
268 	if (action == RCTL_USAGE) {
269 		rctl_set_t *rset;
270 		rctl_t *rctl;
271 		rctl_qty_t usage;
272 
273 		mutex_enter(&curproc->p_lock);
274 		if ((rset = rctl_entity_obtain_rset(krde, curproc)) == NULL) {
275 			mutex_exit(&curproc->p_lock);
276 			kmem_free(krde, sizeof (rctl_dict_entry_t));
277 			return (set_errno(ESRCH));
278 		}
279 		mutex_enter(&rset->rcs_lock);
280 		if (rctl_set_find(rset, hndl, &rctl) == -1) {
281 			mutex_exit(&rset->rcs_lock);
282 			mutex_exit(&curproc->p_lock);
283 			kmem_free(krde, sizeof (rctl_dict_entry_t));
284 			return (set_errno(ESRCH));
285 		}
286 		if (RCTLOP_NO_USAGE(rctl)) {
287 			mutex_exit(&rset->rcs_lock);
288 			mutex_exit(&curproc->p_lock);
289 			kmem_free(krde, sizeof (rctl_dict_entry_t));
290 			return (set_errno(ENOTSUP));
291 		}
292 		usage = RCTLOP_GET_USAGE(rctl, curproc);
293 		mutex_exit(&rset->rcs_lock);
294 		mutex_exit(&curproc->p_lock);
295 
296 		nblk = kmem_zalloc(sizeof (rctl_opaque_t), KM_SLEEP);
297 		nblk->rcq_value = usage;
298 
299 		ret = copyout(nblk, new_rblk, sizeof (rctl_opaque_t));
300 		kmem_free(nblk, sizeof (rctl_opaque_t));
301 		kmem_free(krde, sizeof (rctl_dict_entry_t));
302 		return (ret == 0 ? 0 : set_errno(EFAULT));
303 	} else if (action == RCTL_FIRST) {
304 
305 		mutex_enter(&curproc->p_lock);
306 		if (ret = rctl_local_get(hndl, NULL, nval, curproc)) {
307 			mutex_exit(&curproc->p_lock);
308 			kmem_cache_free(rctl_val_cache, nval);
309 			kmem_free(krde, sizeof (rctl_dict_entry_t));
310 			return (set_errno(ret));
311 		}
312 		mutex_exit(&curproc->p_lock);
313 	} else {
314 		/*
315 		 * RCTL_NEXT
316 		 */
317 		rctl_val_t *oval;
318 		rctl_opaque_t *oblk;
319 
320 		oblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP);
321 
322 		if (copyin(old_rblk, oblk, sizeof (rctl_opaque_t)) == -1) {
323 			kmem_cache_free(rctl_val_cache, nval);
324 			kmem_free(oblk, sizeof (rctl_opaque_t));
325 			kmem_free(krde, sizeof (rctl_dict_entry_t));
326 			return (set_errno(EFAULT));
327 		}
328 
329 		oval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
330 
331 		rctlsys_rblk_xfrm(oblk, NULL, oval, RBX_FROM_BLK | RBX_VAL);
332 		mutex_enter(&curproc->p_lock);
333 		ret = rctl_local_get(hndl, oval, nval, curproc);
334 		mutex_exit(&curproc->p_lock);
335 
336 		kmem_cache_free(rctl_val_cache, oval);
337 		kmem_free(oblk, sizeof (rctl_opaque_t));
338 
339 		if (ret != 0) {
340 			kmem_cache_free(rctl_val_cache, nval);
341 			kmem_free(krde, sizeof (rctl_dict_entry_t));
342 			return (set_errno(ret));
343 		}
344 	}
345 
346 	nblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP);
347 
348 	rctlsys_rblk_xfrm(nblk, krde, nval, RBX_TO_BLK | RBX_VAL | RBX_CTL);
349 
350 	kmem_free(krde, sizeof (rctl_dict_entry_t));
351 	kmem_cache_free(rctl_val_cache, nval);
352 
353 	if (copyout(nblk, new_rblk, sizeof (rctl_opaque_t)) == -1) {
354 		kmem_free(nblk, sizeof (rctl_opaque_t));
355 		return (set_errno(EFAULT));
356 	}
357 
358 	kmem_free(nblk, sizeof (rctl_opaque_t));
359 
360 	return (0);
361 }
362 
363 /*
364  * static long rctlsys_set(char *name, rctl_opaque_t *old_rblk,
365  *   rctl_opaque_t *new_rblk, int flags)
366  *
367  * Overview
368  *   rctlsys_set() is the implementation of the core login of setrctl(2), which
369  *   allows the establishment of resource control values.  Flags may take on any
370  *   of three exclusive values:  RCTL_INSERT, RCTL_DELETE, and RCTL_REPLACE.
371  *   RCTL_INSERT ignores old_rblk and inserts the value in the appropriate
372  *   position in the ordered sequence of resource control values.  RCTL_DELETE
373  *   ignores old_rblk and deletes the first resource control value matching
374  *   (value, priority) in the given resource block.  If no matching value is
375  *   found, -1 is returned and errno is set to ENOENT.  Finally, in the case of
376  *   RCTL_REPLACE, old_rblk is used to match (value, priority); the matching
377  *   resource control value in the sequence is replaced with the contents of
378  *   new_rblk.  Again, if no match is found, -1 is returned and errno is set to
379  *   ENOENT.
380  *
381  *   rctlsys_set() causes a cursor test, which can reactivate resource controls
382  *   that have previously fired.
383  */
384 static long
385 rctlsys_set(char *name, rctl_opaque_t *old_rblk, rctl_opaque_t *new_rblk,
386     int flags)
387 {
388 	rctl_val_t *nval;
389 	rctl_dict_entry_t *rde;
390 	rctl_opaque_t *nblk;
391 	rctl_hndl_t hndl;
392 	char *kname;
393 	size_t klen;
394 	long ret = 0;
395 	proc_t *pp = NULL;
396 	pid_t pid;
397 	int action = flags & (~RCTLSYS_ACTION_MASK);
398 	rctl_val_t *oval;
399 	rctl_val_t *rval1;
400 	rctl_val_t *rval2;
401 	rctl_val_t *tval;
402 	rctl_opaque_t *oblk;
403 
404 	if (flags & (~RCTLSYS_MASK))
405 		return (set_errno(EINVAL));
406 
407 	if (action != RCTL_INSERT &&
408 	    action != RCTL_DELETE &&
409 	    action != RCTL_REPLACE)
410 		return (set_errno(EINVAL));
411 
412 	if (new_rblk == NULL || name == NULL)
413 		return (set_errno(EFAULT));
414 
415 	kname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
416 	if (copyinstr(name, kname, MAXPATHLEN, &klen) != 0) {
417 		kmem_free(kname, MAXPATHLEN);
418 		return (set_errno(EFAULT));
419 	}
420 
421 	if ((hndl = rctl_hndl_lookup(kname)) == -1) {
422 		kmem_free(kname, MAXPATHLEN);
423 		return (set_errno(EINVAL));
424 	}
425 
426 	kmem_free(kname, MAXPATHLEN);
427 
428 	rde = rctl_dict_lookup_hndl(hndl);
429 
430 	nblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP);
431 
432 	if (copyin(new_rblk, nblk, sizeof (rctl_opaque_t)) == -1) {
433 		kmem_free(nblk, sizeof (rctl_opaque_t));
434 		return (set_errno(EFAULT));
435 	}
436 
437 	nval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
438 
439 	rctlsys_rblk_xfrm(nblk, NULL, nval, RBX_FROM_BLK | RBX_VAL);
440 
441 	if (rctl_invalid_value(rde, nval)) {
442 		kmem_free(nblk, sizeof (rctl_opaque_t));
443 		kmem_cache_free(rctl_val_cache, nval);
444 		return (set_errno(EINVAL));
445 	}
446 
447 	/* allocate what we might need before potentially grabbing p_lock */
448 	oblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP);
449 	oval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
450 	rval1 = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
451 	rval2 = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
452 
453 	if (nval->rcv_privilege == RCPRIV_BASIC) {
454 		if (flags & RCTL_USE_RECIPIENT_PID) {
455 			pid = nval->rcv_action_recip_pid;
456 
457 			/* case for manipulating rctl values on other procs */
458 			if (pid != curproc->p_pid) {
459 				/* cannot be other pid on process rctls */
460 				if (rde->rcd_entity == RCENTITY_PROCESS) {
461 					ret = set_errno(EINVAL);
462 					goto rctlsys_out;
463 				}
464 				/*
465 				 * must have privilege to manipulate controls
466 				 * on other processes
467 				 */
468 				if (secpolicy_rctlsys(CRED(), B_FALSE) != 0) {
469 					ret = set_errno(EACCES);
470 					goto rctlsys_out;
471 				}
472 
473 				pid = nval->rcv_action_recip_pid;
474 				mutex_enter(&pidlock);
475 				pp = prfind(pid);
476 				if (!pp) {
477 					mutex_exit(&pidlock);
478 					ret = set_errno(ESRCH);
479 					goto rctlsys_out;
480 				}
481 
482 				/*
483 				 * idle or zombie procs have either not yet
484 				 * set up their rctls or have already done
485 				 * their rctl_set_tearoff's.
486 				 */
487 				if (pp->p_stat == SZOMB ||
488 				    pp->p_stat == SIDL) {
489 					mutex_exit(&pidlock);
490 					ret = set_errno(ESRCH);
491 					goto rctlsys_out;
492 				}
493 
494 				/*
495 				 * hold this pp's p_lock to ensure that
496 				 * it does not do it's rctl_set_tearoff
497 				 * If we did not do this, we could
498 				 * potentially add rctls to the entity
499 				 * with a recipient that is a process
500 				 * that has exited.
501 				 */
502 				mutex_enter(&pp->p_lock);
503 				mutex_exit(&pidlock);
504 
505 				/*
506 				 * We know that curproc's task, project,
507 				 * and zone pointers will not change
508 				 * because functions that change them
509 				 * call holdlwps(SHOLDFORK1) first.
510 				 */
511 
512 				/*
513 				 * verify that the found pp is in the
514 				 * current task.  If it is, then it
515 				 * is also within the current project
516 				 * and zone.
517 				 */
518 				if (rde->rcd_entity == RCENTITY_TASK &&
519 				    pp->p_task != curproc->p_task) {
520 					ret = set_errno(ESRCH);
521 					goto rctlsys_out;
522 				}
523 
524 				ASSERT(pp->p_task->tk_proj ==
525 				    curproc->p_task->tk_proj);
526 				ASSERT(pp->p_zone == curproc->p_zone);
527 
528 
529 				nval->rcv_action_recipient = pp;
530 				nval->rcv_action_recip_pid = pid;
531 
532 			} else {
533 				/* for manipulating rctl values on this proc */
534 				mutex_enter(&curproc->p_lock);
535 				pp = curproc;
536 				nval->rcv_action_recipient = curproc;
537 				nval->rcv_action_recip_pid = curproc->p_pid;
538 			}
539 
540 		} else {
541 			/* RCTL_USE_RECIPIENT_PID not set, use this proc */
542 			mutex_enter(&curproc->p_lock);
543 			pp = curproc;
544 			nval->rcv_action_recipient = curproc;
545 			nval->rcv_action_recip_pid = curproc->p_pid;
546 		}
547 
548 	} else {
549 		/* privileged controls have no recipient pid */
550 		mutex_enter(&curproc->p_lock);
551 		pp = curproc;
552 		nval->rcv_action_recipient = NULL;
553 		nval->rcv_action_recip_pid = -1;
554 	}
555 
556 	nval->rcv_firing_time = 0;
557 
558 	if (action == RCTL_REPLACE) {
559 
560 		if (copyin(old_rblk, oblk, sizeof (rctl_opaque_t)) == -1) {
561 			ret = set_errno(EFAULT);
562 			goto rctlsys_out;
563 		}
564 
565 		rctlsys_rblk_xfrm(oblk, NULL, oval, RBX_FROM_BLK | RBX_VAL);
566 
567 		if (rctl_invalid_value(rde, oval)) {
568 			ret = set_errno(EINVAL);
569 			goto rctlsys_out;
570 		}
571 
572 		if (oval->rcv_privilege == RCPRIV_BASIC) {
573 			if (!(flags & RCTL_USE_RECIPIENT_PID)) {
574 				oval->rcv_action_recipient = curproc;
575 				oval->rcv_action_recip_pid = curproc->p_pid;
576 			}
577 		} else {
578 			oval->rcv_action_recipient = NULL;
579 			oval->rcv_action_recip_pid = -1;
580 		}
581 
582 		/*
583 		 * Find the real value we're attempting to replace on the
584 		 * sequence, rather than trusting the one delivered from
585 		 * userland.
586 		 */
587 		if (ret = rctl_local_get(hndl, NULL, rval1, pp)) {
588 			(void) set_errno(ret);
589 			goto rctlsys_out;
590 		}
591 
592 		do {
593 			if (rval1->rcv_privilege == RCPRIV_SYSTEM ||
594 			    rctl_val_cmp(oval, rval1, 0) == 0)
595 				break;
596 
597 			tval = rval1;
598 			rval1 = rval2;
599 			rval2 = tval;
600 		} while (rctl_local_get(hndl, rval2, rval1, pp) == 0);
601 
602 		if (rval1->rcv_privilege == RCPRIV_SYSTEM) {
603 			if (rctl_val_cmp(oval, rval1, 1) == 0)
604 				ret = set_errno(EPERM);
605 			else
606 				ret = set_errno(ESRCH);
607 
608 			goto rctlsys_out;
609 		}
610 
611 		bcopy(rval1, oval, sizeof (rctl_val_t));
612 
613 		/*
614 		 * System controls are immutable.
615 		 */
616 		if (nval->rcv_privilege == RCPRIV_SYSTEM) {
617 			ret = set_errno(EPERM);
618 			goto rctlsys_out;
619 		}
620 
621 		/*
622 		 * Only privileged processes in the global zone can modify
623 		 * privileged rctls of type RCENTITY_ZONE; replacing privileged
624 		 * controls with basic ones are not allowed either.  Lowering a
625 		 * lowerable one might be OK for privileged processes in a
626 		 * non-global zone, but lowerable rctls probably don't make
627 		 * sense for zones (hence, not modifiable from within a zone).
628 		 */
629 		if (rde->rcd_entity == RCENTITY_ZONE &&
630 		    (nval->rcv_privilege == RCPRIV_PRIVILEGED ||
631 		    oval->rcv_privilege == RCPRIV_PRIVILEGED) &&
632 		    secpolicy_rctlsys(CRED(), B_TRUE) != 0) {
633 			ret = set_errno(EACCES);
634 			goto rctlsys_out;
635 		}
636 
637 		/*
638 		 * Must be privileged to replace a privileged control with
639 		 * a basic one.
640 		 */
641 		if (oval->rcv_privilege == RCPRIV_PRIVILEGED &&
642 		    nval->rcv_privilege != RCPRIV_PRIVILEGED &&
643 		    secpolicy_rctlsys(CRED(), B_FALSE) != 0) {
644 			ret = set_errno(EACCES);
645 			goto rctlsys_out;
646 		}
647 
648 		/*
649 		 * Must have lowerable global property for non-privileged
650 		 * to lower the value of a privileged control; otherwise must
651 		 * have sufficient privileges to modify privileged controls
652 		 * at all.
653 		 */
654 		if (oval->rcv_privilege == RCPRIV_PRIVILEGED &&
655 		    nval->rcv_privilege == RCPRIV_PRIVILEGED &&
656 		    ((((rde->rcd_flagaction & RCTL_GLOBAL_LOWERABLE) == 0) ||
657 		    oval->rcv_flagaction != nval->rcv_flagaction ||
658 		    oval->rcv_action_signal != nval->rcv_action_signal ||
659 		    oval->rcv_value < nval->rcv_value)) &&
660 		    secpolicy_rctlsys(CRED(), B_FALSE) != 0) {
661 			ret = set_errno(EACCES);
662 			goto rctlsys_out;
663 		}
664 
665 		if (ret = rctl_local_replace(hndl, oval, nval, pp)) {
666 			(void) set_errno(ret);
667 			goto rctlsys_out;
668 		}
669 
670 		/* ensure that nval is not freed */
671 		nval = NULL;
672 
673 	} else if (action == RCTL_INSERT) {
674 		/*
675 		 * System controls are immutable.
676 		 */
677 		if (nval->rcv_privilege == RCPRIV_SYSTEM) {
678 			ret = set_errno(EPERM);
679 			goto rctlsys_out;
680 		}
681 
682 		/*
683 		 * Only privileged processes in the global zone may add
684 		 * privileged zone.* rctls.  Only privileged processes
685 		 * may add other privileged rctls.
686 		 */
687 		if (nval->rcv_privilege == RCPRIV_PRIVILEGED) {
688 			if ((rde->rcd_entity == RCENTITY_ZONE &&
689 			    secpolicy_rctlsys(CRED(), B_TRUE) != 0) ||
690 			    (rde->rcd_entity != RCENTITY_ZONE &&
691 			    secpolicy_rctlsys(CRED(), B_FALSE) != 0)) {
692 				ret = set_errno(EACCES);
693 				goto rctlsys_out;
694 			}
695 		}
696 
697 		/*
698 		 * Only one basic control is allowed per rctl.
699 		 * If a basic control is being inserted, delete
700 		 * any other basic control.
701 		 */
702 		if ((nval->rcv_privilege == RCPRIV_BASIC) &&
703 		    (rctl_local_get(hndl, NULL, rval1, pp) == 0)) {
704 			do {
705 				if (rval1->rcv_privilege == RCPRIV_BASIC &&
706 				    rval1->rcv_action_recipient == curproc) {
707 					(void) rctl_local_delete(hndl, rval1,
708 					    pp);
709 					if (rctl_local_get(hndl, NULL, rval1,
710 					    pp) != 0)
711 						break;
712 				}
713 
714 				tval = rval1;
715 				rval1 = rval2;
716 				rval2 = tval;
717 			} while (rctl_local_get(hndl, rval2, rval1, pp)
718 			    == 0);
719 		}
720 
721 
722 		if (ret = rctl_local_insert(hndl, nval, pp)) {
723 			(void) set_errno(ret);
724 			goto rctlsys_out;
725 		}
726 
727 		/* ensure that nval is not freed */
728 		nval = NULL;
729 
730 	} else {
731 		/*
732 		 * RCTL_DELETE
733 		 */
734 		if (nval->rcv_privilege == RCPRIV_SYSTEM) {
735 			ret = set_errno(EPERM);
736 			goto rctlsys_out;
737 		}
738 
739 		if (nval->rcv_privilege == RCPRIV_PRIVILEGED) {
740 			if ((rde->rcd_entity == RCENTITY_ZONE &&
741 			    secpolicy_rctlsys(CRED(), B_TRUE) != 0) ||
742 			    (rde->rcd_entity != RCENTITY_ZONE &&
743 			    secpolicy_rctlsys(CRED(), B_FALSE) != 0)) {
744 				ret = set_errno(EACCES);
745 				goto rctlsys_out;
746 			}
747 		}
748 
749 		if (ret = rctl_local_delete(hndl, nval, pp)) {
750 			(void) set_errno(ret);
751 			goto rctlsys_out;
752 		}
753 	}
754 
755 rctlsys_out:
756 
757 	if (pp)
758 		mutex_exit(&pp->p_lock);
759 
760 	kmem_free(nblk, sizeof (rctl_opaque_t));
761 	kmem_free(oblk, sizeof (rctl_opaque_t));
762 
763 	/* only free nval if we did not rctl_local_insert it */
764 	if (nval)
765 		kmem_cache_free(rctl_val_cache, nval);
766 
767 	kmem_cache_free(rctl_val_cache, oval);
768 	kmem_cache_free(rctl_val_cache, rval1);
769 	kmem_cache_free(rctl_val_cache, rval2);
770 
771 	return (ret);
772 }
773 
774 static long
775 rctlsys_lst(char *ubuf, size_t ubufsz)
776 {
777 	char *kbuf;
778 	size_t kbufsz;
779 
780 	kbufsz = rctl_build_name_buf(&kbuf);
781 
782 	if (kbufsz <= ubufsz &&
783 	    copyout(kbuf, ubuf, kbufsz) != 0) {
784 		kmem_free(kbuf, kbufsz);
785 		return (set_errno(EFAULT));
786 	}
787 
788 	kmem_free(kbuf, kbufsz);
789 
790 	return (kbufsz);
791 }
792 
793 static long
794 rctlsys_ctl(char *name, rctl_opaque_t *rblk, int flags)
795 {
796 	rctl_dict_entry_t *krde;
797 	rctl_opaque_t *krblk;
798 	char *kname;
799 	size_t klen;
800 
801 	kname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
802 
803 	if (name == NULL || copyinstr(name, kname, MAXPATHLEN, &klen) != 0) {
804 		kmem_free(kname, MAXPATHLEN);
805 		return (set_errno(EFAULT));
806 	}
807 
808 	switch (flags) {
809 	case RCTLCTL_GET:
810 		krde = kmem_alloc(sizeof (rctl_dict_entry_t), KM_SLEEP);
811 		krblk = kmem_zalloc(sizeof (rctl_opaque_t), KM_SLEEP);
812 
813 		if (rctl_global_get(kname, krde) == -1) {
814 			kmem_free(krde, sizeof (rctl_dict_entry_t));
815 			kmem_free(krblk, sizeof (rctl_opaque_t));
816 			kmem_free(kname, MAXPATHLEN);
817 			return (set_errno(ESRCH));
818 		}
819 
820 		rctlsys_rblk_xfrm(krblk, krde, NULL, RBX_TO_BLK | RBX_CTL);
821 
822 		if (copyout(krblk, rblk, sizeof (rctl_opaque_t)) != 0) {
823 			kmem_free(krde, sizeof (rctl_dict_entry_t));
824 			kmem_free(krblk, sizeof (rctl_opaque_t));
825 			kmem_free(kname, MAXPATHLEN);
826 			return (set_errno(EFAULT));
827 		}
828 
829 		kmem_free(krde, sizeof (rctl_dict_entry_t));
830 		kmem_free(krblk, sizeof (rctl_opaque_t));
831 		kmem_free(kname, MAXPATHLEN);
832 		break;
833 	case RCTLCTL_SET:
834 		if (secpolicy_rctlsys(CRED(), B_TRUE) != 0) {
835 			kmem_free(kname, MAXPATHLEN);
836 			return (set_errno(EPERM));
837 		}
838 
839 		krde = kmem_alloc(sizeof (rctl_dict_entry_t), KM_SLEEP);
840 		krblk = kmem_zalloc(sizeof (rctl_opaque_t), KM_SLEEP);
841 
842 		if (rctl_global_get(kname, krde) == -1) {
843 			kmem_free(krde, sizeof (rctl_dict_entry_t));
844 			kmem_free(krblk, sizeof (rctl_opaque_t));
845 			kmem_free(kname, MAXPATHLEN);
846 			return (set_errno(ESRCH));
847 		}
848 
849 		if (copyin(rblk, krblk, sizeof (rctl_opaque_t)) != 0) {
850 			kmem_free(krde, sizeof (rctl_dict_entry_t));
851 			kmem_free(krblk, sizeof (rctl_opaque_t));
852 			kmem_free(kname, MAXPATHLEN);
853 			return (set_errno(EFAULT));
854 		}
855 
856 		rctlsys_rblk_xfrm(krblk, krde, NULL, RBX_FROM_BLK | RBX_CTL);
857 
858 		if (rctl_global_set(kname, krde) == -1) {
859 			kmem_free(krde, sizeof (rctl_dict_entry_t));
860 			kmem_free(krblk, sizeof (rctl_opaque_t));
861 			kmem_free(kname, MAXPATHLEN);
862 			return (set_errno(ESRCH));
863 		}
864 
865 		kmem_free(krde, sizeof (rctl_dict_entry_t));
866 		kmem_free(krblk, sizeof (rctl_opaque_t));
867 		kmem_free(kname, MAXPATHLEN);
868 
869 		break;
870 	default:
871 		kmem_free(kname, MAXPATHLEN);
872 		return (set_errno(EINVAL));
873 	}
874 
875 	return (0);
876 }
877 
878 /*
879  * The arbitrary maximum number of rctl_opaque_t that we can pass to
880  * rctl_projset().
881  */
882 #define	RCTL_PROJSET_MAXSIZE	1024
883 
884 static long
885 rctlsys_projset(char *name, rctl_opaque_t *rblk, size_t size, int flags)
886 {
887 	rctl_dict_entry_t *krde;
888 	rctl_opaque_t *krblk;
889 	char *kname;
890 	size_t klen;
891 	rctl_hndl_t hndl;
892 	rctl_val_t *new_values = NULL;
893 	rctl_val_t *alloc_values = NULL;
894 	rctl_val_t *new_val;
895 	rctl_val_t *alloc_val;
896 	int error = 0;
897 	int count;
898 
899 	kname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
900 
901 	if (name == NULL || copyinstr(name, kname, MAXPATHLEN, &klen) != 0) {
902 		kmem_free(kname, MAXPATHLEN);
903 		return (set_errno(EFAULT));
904 	}
905 
906 	if (size > RCTL_PROJSET_MAXSIZE) {
907 		kmem_free(kname, MAXPATHLEN);
908 		return (set_errno(EINVAL));
909 	}
910 
911 	if ((hndl = rctl_hndl_lookup(kname)) == -1) {
912 		kmem_free(kname, MAXPATHLEN);
913 		return (set_errno(EINVAL));
914 	}
915 
916 	krde = rctl_dict_lookup_hndl(hndl);
917 
918 	/* If not a project entity then exit */
919 	if ((krde->rcd_entity != RCENTITY_PROJECT) || (size <= 0)) {
920 		kmem_free(kname, MAXPATHLEN);
921 		return (set_errno(EINVAL));
922 	}
923 
924 	if (secpolicy_rctlsys(CRED(), B_FALSE) != 0) {
925 		kmem_free(kname, MAXPATHLEN);
926 		return (set_errno(EPERM));
927 	}
928 
929 	/* Allocate an array large enough for all resource control blocks */
930 	krblk = kmem_zalloc(sizeof (rctl_opaque_t) * size, KM_SLEEP);
931 
932 	if (copyin(rblk, krblk, sizeof (rctl_opaque_t) * size) == 0) {
933 
934 		for (count = 0; (count < size) && (error == 0); count++) {
935 			new_val = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
936 			alloc_val = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
937 
938 			rctlsys_rblk_xfrm(&krblk[count], NULL, new_val,
939 			    RBX_FROM_BLK | RBX_VAL);
940 
941 			/*
942 			 * Project entity resource control values should always
943 			 * be privileged
944 			 */
945 			if (new_val->rcv_privilege != RCPRIV_PRIVILEGED) {
946 				kmem_cache_free(rctl_val_cache, new_val);
947 				kmem_cache_free(rctl_val_cache, alloc_val);
948 
949 				error = EPERM;
950 			} else if (rctl_invalid_value(krde, new_val) == 0) {
951 
952 				/*
953 				 * This is a project entity; we do not set
954 				 * rcv_action_recipient or rcv_action_recip_pid
955 				 */
956 				new_val->rcv_action_recipient = NULL;
957 				new_val->rcv_action_recip_pid = -1;
958 				new_val->rcv_flagaction |= RCTL_LOCAL_PROJDB;
959 				new_val->rcv_firing_time = 0;
960 
961 				new_val->rcv_prev = NULL;
962 				new_val->rcv_next = new_values;
963 				new_values = new_val;
964 
965 				/*
966 				 * alloc_val is left largely uninitialized, it
967 				 * is a pre-allocated rctl_val_t which is used
968 				 * later in rctl_local_replace_all() /
969 				 * rctl_local_insert_all().
970 				 */
971 				alloc_val->rcv_prev = NULL;
972 				alloc_val->rcv_next = alloc_values;
973 				alloc_values = alloc_val;
974 			} else {
975 				kmem_cache_free(rctl_val_cache, new_val);
976 				kmem_cache_free(rctl_val_cache, alloc_val);
977 
978 				error = EINVAL;
979 			}
980 		}
981 
982 	} else {
983 		error = EFAULT;
984 	}
985 
986 	kmem_free(krblk, sizeof (rctl_opaque_t) * size);
987 	kmem_free(kname, MAXPATHLEN);
988 
989 	if (error) {
990 		/*
991 		 * We will have the same number of items in the alloc_values
992 		 * linked list, as we have in new_values.  However, we remain
993 		 * cautious, and teardown the linked lists individually.
994 		 */
995 		while (new_values != NULL) {
996 			new_val = new_values;
997 			new_values = new_values->rcv_next;
998 			kmem_cache_free(rctl_val_cache, new_val);
999 		}
1000 
1001 		while (alloc_values != NULL) {
1002 			alloc_val = alloc_values;
1003 			alloc_values = alloc_values->rcv_next;
1004 			kmem_cache_free(rctl_val_cache, alloc_val);
1005 		}
1006 
1007 		return (set_errno(error));
1008 	}
1009 
1010 	/*
1011 	 * We take the p_lock here to maintain consistency with other functions
1012 	 * - rctlsys_get() and rctlsys_set()
1013 	 */
1014 	mutex_enter(&curproc->p_lock);
1015 	if (flags & TASK_PROJ_PURGE)  {
1016 		(void) rctl_local_replace_all(hndl, new_values, alloc_values,
1017 		    curproc);
1018 	} else {
1019 		(void) rctl_local_insert_all(hndl, new_values, alloc_values,
1020 		    curproc);
1021 	}
1022 	mutex_exit(&curproc->p_lock);
1023 
1024 	return (0);
1025 }
1026 
1027 long
1028 rctlsys(int code, char *name, void *obuf, void *nbuf, size_t obufsz, int flags)
1029 {
1030 	switch (code) {
1031 	case 0:
1032 		return (rctlsys_get(name, obuf, nbuf, flags));
1033 
1034 	case 1:
1035 		return (rctlsys_set(name, obuf, nbuf, flags));
1036 
1037 	case 2:
1038 		/*
1039 		 * Private call for rctl_walk(3C).
1040 		 */
1041 		return (rctlsys_lst(obuf, obufsz));
1042 
1043 	case 3:
1044 		/*
1045 		 * Private code for rctladm(8):  "rctlctl".
1046 		 */
1047 		return (rctlsys_ctl(name, obuf, flags));
1048 	case 4:
1049 		/*
1050 		 * Private code for setproject(3PROJECT).
1051 		 */
1052 		return (rctlsys_projset(name, nbuf, obufsz, flags));
1053 
1054 	default:
1055 		return (set_errno(EINVAL));
1056 	}
1057 }
1058