xref: /titanic_44/usr/src/uts/common/syscall/rctlsys.c (revision 2a9459bdd821c1cf59590a7a9069ac9c591e8a6b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 
30 #include <sys/cmn_err.h>
31 #include <sys/cred.h>
32 #include <sys/errno.h>
33 #include <sys/rctl.h>
34 #include <sys/rctl_impl.h>
35 #include <sys/strlog.h>
36 #include <sys/syslog.h>
37 #include <sys/sysmacros.h>
38 #include <sys/systm.h>
39 #include <sys/policy.h>
40 #include <sys/proc.h>
41 #include <sys/task.h>
42 
43 /*
44  * setrctl(2), getrctl(2), and private rctlsys(2*) system calls
45  *
46  * Resource control block (rctlblk_ptr_t, rctl_opaque_t)
47  *   The resource control system call interfaces present the resource control
48  *   values and flags via the resource control block abstraction, made manifest
49  *   via an opaque data type with strict type definitions.  Keeping the formal
50  *   definitions in the rcontrol block allows us to be clever in the kernel,
51  *   combining attributes where appropriate in the current implementation while
52  *   preserving binary compatibility in the face of implementation changes.
53  */
54 
55 #define	RBX_TO_BLK	0x1
56 #define	RBX_FROM_BLK	0x2
57 #define	RBX_VAL		0x4
58 #define	RBX_CTL		0x8
59 
60 static void
61 rctlsys_rblk_xfrm(rctl_opaque_t *blk, rctl_dict_entry_t *rde,
62     rctl_val_t *val, int flags)
63 {
64 	if (flags & RBX_FROM_BLK) {
65 		if (flags & RBX_VAL) {
66 			/*
67 			 * Firing time cannot be set.
68 			 */
69 			val->rcv_privilege = blk->rcq_privilege;
70 			val->rcv_value = blk->rcq_value;
71 			val->rcv_flagaction = blk->rcq_local_flagaction;
72 			val->rcv_action_signal = blk->rcq_local_signal;
73 			val->rcv_action_recip_pid =
74 			    blk->rcq_local_recipient_pid;
75 		}
76 		if (flags & RBX_CTL) {
77 			rde->rcd_flagaction = blk->rcq_global_flagaction;
78 			rde->rcd_syslog_level = blk->rcq_global_syslog_level;
79 
80 			/*
81 			 * Because the strlog() interface supports fewer options
82 			 * than are made available via the syslog() interface to
83 			 * userland, we map the syslog level down to a smaller
84 			 * set of distinct logging behaviours.
85 			 */
86 			rde->rcd_strlog_flags = 0;
87 			switch (blk->rcq_global_syslog_level) {
88 				case LOG_EMERG:
89 				case LOG_ALERT:
90 				case LOG_CRIT:
91 					rde->rcd_strlog_flags |= SL_CONSOLE;
92 					/*FALLTHROUGH*/
93 				case LOG_ERR:
94 					rde->rcd_strlog_flags |= SL_ERROR;
95 					/*FALLTHROUGH*/
96 				case LOG_WARNING:
97 					rde->rcd_strlog_flags |= SL_WARN;
98 					break;
99 				case LOG_NOTICE:
100 					rde->rcd_strlog_flags |= SL_CONSOLE;
101 					/*FALLTHROUGH*/
102 				case LOG_INFO:	/* informational */
103 				case LOG_DEBUG:	/* debug-level messages */
104 				default:
105 					rde->rcd_strlog_flags |= SL_NOTE;
106 					break;
107 			}
108 		}
109 	} else {
110 		bzero(blk,  sizeof (rctl_opaque_t));
111 		if (flags & RBX_VAL) {
112 			blk->rcq_privilege = val->rcv_privilege;
113 			blk->rcq_value = val->rcv_value;
114 			blk->rcq_enforced_value = rctl_model_value(rde,
115 			    curproc, val->rcv_value);
116 			blk->rcq_local_flagaction = val->rcv_flagaction;
117 			blk->rcq_local_signal = val->rcv_action_signal;
118 			blk->rcq_firing_time = val->rcv_firing_time;
119 			blk->rcq_local_recipient_pid =
120 			    val->rcv_action_recip_pid;
121 		}
122 		if (flags & RBX_CTL) {
123 			blk->rcq_global_flagaction = rde->rcd_flagaction;
124 			blk->rcq_global_syslog_level = rde->rcd_syslog_level;
125 		}
126 	}
127 }
128 
129 /*
130  * int rctl_invalid_value(rctl_dict_entry_t *, rctl_val_t *)
131  *
132  * Overview
133  *   Perform basic validation of proposed new resource control value against the
134  *   global properties set on the control.  Any system call operation presented
135  *   with an invalid resource control value should return -1 and set errno to
136  *   EINVAL.
137  *
138  * Return values
139  *   0 if valid, 1 if invalid.
140  *
141  * Caller's context
142  *   No restriction on context.
143  */
144 int
145 rctl_invalid_value(rctl_dict_entry_t *rde, rctl_val_t *rval)
146 {
147 	rctl_val_t *sys_rval;
148 
149 	if (rval->rcv_privilege != RCPRIV_BASIC &&
150 	    rval->rcv_privilege != RCPRIV_PRIVILEGED &&
151 	    rval->rcv_privilege != RCPRIV_SYSTEM)
152 		return (1);
153 
154 	if (rval->rcv_flagaction & ~RCTL_LOCAL_MASK)
155 		return (1);
156 
157 	if (rval->rcv_privilege == RCPRIV_BASIC &&
158 	    (rde->rcd_flagaction & RCTL_GLOBAL_NOBASIC) != 0)
159 		return (1);
160 
161 	if ((rval->rcv_flagaction & RCTL_LOCAL_DENY) == 0 &&
162 	    (rde->rcd_flagaction & RCTL_GLOBAL_DENY_ALWAYS) != 0)
163 		return (1);
164 
165 	if ((rval->rcv_flagaction & RCTL_LOCAL_DENY) &&
166 	    (rde->rcd_flagaction & RCTL_GLOBAL_DENY_NEVER))
167 		return (1);
168 
169 	if ((rval->rcv_flagaction & RCTL_LOCAL_SIGNAL) &&
170 	    (rde->rcd_flagaction & RCTL_GLOBAL_SIGNAL_NEVER))
171 		return (1);
172 
173 	if ((rval->rcv_flagaction & RCTL_LOCAL_SIGNAL) &&
174 	    rval->rcv_action_signal == 0)
175 		return (1);
176 
177 	if (rval->rcv_action_signal == SIGXCPU &&
178 	    (rde->rcd_flagaction & RCTL_GLOBAL_CPU_TIME) == 0)
179 		return (1);
180 	else if (rval->rcv_action_signal == SIGXFSZ &&
181 	    (rde->rcd_flagaction & RCTL_GLOBAL_FILE_SIZE) == 0)
182 		return (1);
183 	else if (rval->rcv_action_signal != SIGHUP &&
184 	    rval->rcv_action_signal != SIGABRT &&
185 	    rval->rcv_action_signal != SIGKILL &&
186 	    rval->rcv_action_signal != SIGTERM &&
187 	    rval->rcv_action_signal != SIGSTOP &&
188 	    rval->rcv_action_signal != SIGXCPU &&
189 	    rval->rcv_action_signal != SIGXFSZ &&
190 	    rval->rcv_action_signal != SIGXRES &&
191 	    rval->rcv_action_signal != 0)	/* That is, no signal is ok. */
192 		return (1);
193 
194 	sys_rval = rde->rcd_default_value;
195 	while (sys_rval->rcv_privilege != RCPRIV_SYSTEM)
196 		sys_rval = sys_rval->rcv_next;
197 
198 	if (rval->rcv_value > sys_rval->rcv_value)
199 		return (1);
200 
201 	return (0);
202 }
203 
204 /*
205  * static long rctlsys_get(char *name, rctl_opaque_t *old_rblk,
206  *   rctl_opaque_t *new_rblk, int flags)
207  *
208  * Overview
209  *   rctlsys_get() is the implementation of the core logic of getrctl(2), the
210  *   public system call for fetching resource control values.  Two mutually
211  *   exclusive flag values are supported:  RCTL_FIRST and RCTL_NEXT.  When
212  *   RCTL_FIRST is presented, the value of old_rblk is ignored, and the first
213  *   value in the resource control value sequence for the named control is
214  *   transformed and placed in the user memory location at new_rblk.  In the
215  *   RCTL_NEXT case, the value of old_rblk is examined, and the next value in
216  *   the sequence is transformed and placed at new_rblk.
217  */
218 static long
219 rctlsys_get(char *name, rctl_opaque_t *old_rblk, rctl_opaque_t *new_rblk,
220     int flags)
221 {
222 	rctl_val_t *nval;
223 	rctl_opaque_t *nblk;
224 	rctl_hndl_t hndl;
225 	char *kname;
226 	size_t klen;
227 	rctl_dict_entry_t *krde;
228 	int ret;
229 	int action = flags & (~RCTLSYS_ACTION_MASK);
230 
231 	if (flags & (~RCTLSYS_MASK))
232 		return (set_errno(EINVAL));
233 
234 	if (action != RCTL_FIRST && action != RCTL_NEXT &&
235 	    action != RCTL_USAGE)
236 		return (set_errno(EINVAL));
237 
238 	if (new_rblk == NULL || name == NULL)
239 		return (set_errno(EFAULT));
240 
241 	kname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
242 	krde = kmem_alloc(sizeof (rctl_dict_entry_t), KM_SLEEP);
243 
244 	if (copyinstr(name, kname, MAXPATHLEN, &klen) != 0) {
245 		kmem_free(kname, MAXPATHLEN);
246 		kmem_free(krde, sizeof (rctl_dict_entry_t));
247 		return (set_errno(EFAULT));
248 	}
249 
250 	if ((hndl = rctl_hndl_lookup(kname)) == -1) {
251 		kmem_free(kname, MAXPATHLEN);
252 		kmem_free(krde, sizeof (rctl_dict_entry_t));
253 		return (set_errno(EINVAL));
254 	}
255 
256 	if (rctl_global_get(kname, krde) == -1) {
257 		kmem_free(kname, MAXPATHLEN);
258 		kmem_free(krde, sizeof (rctl_dict_entry_t));
259 		return (set_errno(ESRCH));
260 	}
261 
262 	kmem_free(kname, MAXPATHLEN);
263 
264 	nval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
265 
266 	if (action == RCTL_USAGE) {
267 		kmem_cache_free(rctl_val_cache, nval);
268 		kmem_free(krde, sizeof (rctl_dict_entry_t));
269 		return (set_errno(ENOTSUP));
270 	} else if (action == RCTL_FIRST) {
271 
272 		mutex_enter(&curproc->p_lock);
273 		if (ret = rctl_local_get(hndl, NULL, nval, curproc)) {
274 			mutex_exit(&curproc->p_lock);
275 			kmem_cache_free(rctl_val_cache, nval);
276 			kmem_free(krde, sizeof (rctl_dict_entry_t));
277 			return (set_errno(ret));
278 		}
279 		mutex_exit(&curproc->p_lock);
280 	} else {
281 		/*
282 		 * RCTL_NEXT
283 		 */
284 		rctl_val_t *oval;
285 		rctl_opaque_t *oblk;
286 
287 		oblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP);
288 
289 		if (copyin(old_rblk, oblk, sizeof (rctl_opaque_t)) == -1) {
290 			kmem_cache_free(rctl_val_cache, nval);
291 			kmem_free(oblk, sizeof (rctl_opaque_t));
292 			kmem_free(krde, sizeof (rctl_dict_entry_t));
293 			return (set_errno(EFAULT));
294 		}
295 
296 		oval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
297 
298 		rctlsys_rblk_xfrm(oblk, NULL, oval, RBX_FROM_BLK | RBX_VAL);
299 		mutex_enter(&curproc->p_lock);
300 		ret = rctl_local_get(hndl, oval, nval, curproc);
301 		mutex_exit(&curproc->p_lock);
302 
303 		kmem_cache_free(rctl_val_cache, oval);
304 		kmem_free(oblk, sizeof (rctl_opaque_t));
305 
306 		if (ret != 0) {
307 			kmem_cache_free(rctl_val_cache, nval);
308 			kmem_free(krde, sizeof (rctl_dict_entry_t));
309 			return (set_errno(ret));
310 		}
311 	}
312 
313 	nblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP);
314 
315 	rctlsys_rblk_xfrm(nblk, krde, nval, RBX_TO_BLK | RBX_VAL | RBX_CTL);
316 
317 	kmem_free(krde, sizeof (rctl_dict_entry_t));
318 	kmem_cache_free(rctl_val_cache, nval);
319 
320 	if (copyout(nblk, new_rblk, sizeof (rctl_opaque_t)) == -1) {
321 		kmem_free(nblk, sizeof (rctl_opaque_t));
322 		return (set_errno(EFAULT));
323 	}
324 
325 	kmem_free(nblk, sizeof (rctl_opaque_t));
326 
327 	return (0);
328 }
329 
330 /*
331  * static long rctlsys_set(char *name, rctl_opaque_t *old_rblk,
332  *   rctl_opaque_t *new_rblk, int flags)
333  *
334  * Overview
335  *   rctlsys_set() is the implementation of the core login of setrctl(2), which
336  *   allows the establishment of resource control values.  Flags may take on any
337  *   of three exclusive values:  RCTL_INSERT, RCTL_DELETE, and RCTL_REPLACE.
338  *   RCTL_INSERT ignores old_rblk and inserts the value in the appropriate
339  *   position in the ordered sequence of resource control values.  RCTL_DELETE
340  *   ignores old_rblk and deletes the first resource control value matching
341  *   (value, priority) in the given resource block.  If no matching value is
342  *   found, -1 is returned and errno is set to ENOENT.  Finally, in the case of
343  *   RCTL_REPLACE, old_rblk is used to match (value, priority); the matching
344  *   resource control value in the sequence is replaced with the contents of
345  *   new_rblk.  Again, if no match is found, -1 is returned and errno is set to
346  *   ENOENT.
347  *
348  *   rctlsys_set() causes a cursor test, which can reactivate resource controls
349  *   that have previously fired.
350  */
351 static long
352 rctlsys_set(char *name, rctl_opaque_t *old_rblk, rctl_opaque_t *new_rblk,
353     int flags)
354 {
355 	rctl_val_t *nval;
356 	rctl_dict_entry_t *rde;
357 	rctl_opaque_t *nblk;
358 	rctl_hndl_t hndl;
359 	char *kname;
360 	size_t klen;
361 	long ret = 0;
362 	proc_t *pp = NULL;
363 	pid_t pid;
364 	int action = flags & (~RCTLSYS_ACTION_MASK);
365 	rctl_val_t *oval;
366 	rctl_val_t *rval1;
367 	rctl_val_t *rval2;
368 	rctl_val_t *tval;
369 	rctl_opaque_t *oblk;
370 
371 	if (flags & (~RCTLSYS_MASK))
372 		return (set_errno(EINVAL));
373 
374 	if (action != RCTL_INSERT &&
375 	    action != RCTL_DELETE &&
376 	    action != RCTL_REPLACE)
377 		return (set_errno(EINVAL));
378 
379 	if (new_rblk == NULL || name == NULL)
380 		return (set_errno(EFAULT));
381 
382 	kname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
383 	if (copyinstr(name, kname, MAXPATHLEN, &klen) != 0) {
384 		kmem_free(kname, MAXPATHLEN);
385 		return (set_errno(EFAULT));
386 	}
387 
388 	if ((hndl = rctl_hndl_lookup(kname)) == -1) {
389 		kmem_free(kname, MAXPATHLEN);
390 		return (set_errno(EINVAL));
391 	}
392 
393 	kmem_free(kname, MAXPATHLEN);
394 
395 	rde = rctl_dict_lookup_hndl(hndl);
396 
397 	nblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP);
398 
399 	if (copyin(new_rblk, nblk, sizeof (rctl_opaque_t)) == -1) {
400 		kmem_free(nblk, sizeof (rctl_opaque_t));
401 		return (set_errno(EFAULT));
402 	}
403 
404 	nval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
405 
406 	rctlsys_rblk_xfrm(nblk, NULL, nval, RBX_FROM_BLK | RBX_VAL);
407 
408 	if (rctl_invalid_value(rde, nval)) {
409 		kmem_free(nblk, sizeof (rctl_opaque_t));
410 		kmem_cache_free(rctl_val_cache, nval);
411 		return (set_errno(EINVAL));
412 	}
413 
414 	/* allocate what we might need before potentially grabbing p_lock */
415 	oblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP);
416 	oval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
417 	rval1 = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
418 	rval2 = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
419 
420 	if (nval->rcv_privilege == RCPRIV_BASIC) {
421 		if (flags & RCTL_USE_RECIPIENT_PID) {
422 			pid = nval->rcv_action_recip_pid;
423 
424 			/* case for manipulating rctl values on other procs */
425 			if (pid != curproc->p_pid) {
426 				/* cannot be other pid on process rctls */
427 				if (rde->rcd_entity == RCENTITY_PROCESS) {
428 					ret = set_errno(EINVAL);
429 					goto rctlsys_out;
430 				}
431 				/*
432 				 * must have privilege to manipulate controls
433 				 * on other processes
434 				 */
435 				if (secpolicy_rctlsys(CRED(), B_FALSE) != 0) {
436 					ret = set_errno(EACCES);
437 					goto rctlsys_out;
438 				}
439 
440 				pid = nval->rcv_action_recip_pid;
441 				mutex_enter(&pidlock);
442 				pp = prfind(pid);
443 				if (!pp) {
444 					mutex_exit(&pidlock);
445 					ret = set_errno(ESRCH);
446 					goto rctlsys_out;
447 				}
448 
449 				/*
450 				 * idle or zombie procs have either not yet
451 				 * set up their rctls or have already done
452 				 * their rctl_set_tearoff's.
453 				 */
454 				if (pp->p_stat == SZOMB ||
455 				    pp->p_stat == SIDL) {
456 					mutex_exit(&pidlock);
457 					ret = set_errno(ESRCH);
458 					goto rctlsys_out;
459 				}
460 
461 				/*
462 				 * hold this pp's p_lock to ensure that
463 				 * it does not do it's rctl_set_tearoff
464 				 * If we did not do this, we could
465 				 * potentially add rctls to the entity
466 				 * with a recipient that is a process
467 				 * that has exited.
468 				 */
469 				mutex_enter(&pp->p_lock);
470 				mutex_exit(&pidlock);
471 
472 				/*
473 				 * We know that curproc's task, project,
474 				 * and zone pointers will not change
475 				 * because functions that change them
476 				 * call holdlwps(SHOLDFORK1) first.
477 				 */
478 
479 				/*
480 				 * verify that the found pp is in the
481 				 * current task.  If it is, then it
482 				 * is also within the current project
483 				 * and zone.
484 				 */
485 				if (rde->rcd_entity == RCENTITY_TASK &&
486 				    pp->p_task != curproc->p_task) {
487 					ret = set_errno(ESRCH);
488 					goto rctlsys_out;
489 				}
490 
491 				ASSERT(pp->p_task->tk_proj ==
492 				    curproc->p_task->tk_proj);
493 				ASSERT(pp->p_zone == curproc->p_zone);
494 
495 
496 				nval->rcv_action_recipient = pp;
497 				nval->rcv_action_recip_pid = pid;
498 
499 			} else {
500 				/* for manipulating rctl values on this proc */
501 				mutex_enter(&curproc->p_lock);
502 				pp = curproc;
503 				nval->rcv_action_recipient = curproc;
504 				nval->rcv_action_recip_pid = curproc->p_pid;
505 			}
506 
507 		} else {
508 			/* RCTL_USE_RECIPIENT_PID not set, use this proc */
509 			mutex_enter(&curproc->p_lock);
510 			pp = curproc;
511 			nval->rcv_action_recipient = curproc;
512 			nval->rcv_action_recip_pid = curproc->p_pid;
513 		}
514 
515 	} else {
516 		/* privileged controls have no recipient pid */
517 		mutex_enter(&curproc->p_lock);
518 		pp = curproc;
519 		nval->rcv_action_recipient = NULL;
520 		nval->rcv_action_recip_pid = -1;
521 	}
522 
523 	nval->rcv_firing_time = 0;
524 
525 	if (action == RCTL_REPLACE) {
526 
527 		if (copyin(old_rblk, oblk, sizeof (rctl_opaque_t)) == -1) {
528 			ret = set_errno(EFAULT);
529 			goto rctlsys_out;
530 		}
531 
532 		rctlsys_rblk_xfrm(oblk, NULL, oval, RBX_FROM_BLK | RBX_VAL);
533 
534 		if (rctl_invalid_value(rde, oval)) {
535 			ret = set_errno(EINVAL);
536 			goto rctlsys_out;
537 		}
538 
539 		if (oval->rcv_privilege == RCPRIV_BASIC) {
540 			if (!(flags & RCTL_USE_RECIPIENT_PID)) {
541 				oval->rcv_action_recipient = curproc;
542 				oval->rcv_action_recip_pid = curproc->p_pid;
543 			}
544 		} else {
545 			oval->rcv_action_recipient = NULL;
546 			oval->rcv_action_recip_pid = -1;
547 		}
548 
549 		/*
550 		 * Find the real value we're attempting to replace on the
551 		 * sequence, rather than trusting the one delivered from
552 		 * userland.
553 		 */
554 		if (ret = rctl_local_get(hndl, NULL, rval1, pp)) {
555 			(void) set_errno(ret);
556 			goto rctlsys_out;
557 		}
558 
559 		do {
560 			if (rval1->rcv_privilege == RCPRIV_SYSTEM ||
561 			    rctl_val_cmp(oval, rval1, 0) == 0)
562 				break;
563 
564 			tval = rval1;
565 			rval1 = rval2;
566 			rval2 = tval;
567 		} while (rctl_local_get(hndl, rval2, rval1, pp) == 0);
568 
569 		if (rval1->rcv_privilege == RCPRIV_SYSTEM) {
570 			if (rctl_val_cmp(oval, rval1, 1) == 0)
571 				ret = set_errno(EPERM);
572 			else
573 				ret = set_errno(ESRCH);
574 
575 			goto rctlsys_out;
576 		}
577 
578 		bcopy(rval1, oval, sizeof (rctl_val_t));
579 
580 		/*
581 		 * System controls are immutable.
582 		 */
583 		if (nval->rcv_privilege == RCPRIV_SYSTEM) {
584 			ret = set_errno(EPERM);
585 			goto rctlsys_out;
586 		}
587 
588 		/*
589 		 * Only privileged processes in the global zone can modify
590 		 * privileged rctls of type RCENTITY_ZONE; replacing privileged
591 		 * controls with basic ones are not allowed either.  Lowering a
592 		 * lowerable one might be OK for privileged processes in a
593 		 * non-global zone, but lowerable rctls probably don't make
594 		 * sense for zones (hence, not modifiable from within a zone).
595 		 */
596 		if (rde->rcd_entity == RCENTITY_ZONE &&
597 		    (nval->rcv_privilege == RCPRIV_PRIVILEGED ||
598 		    oval->rcv_privilege == RCPRIV_PRIVILEGED) &&
599 		    secpolicy_rctlsys(CRED(), B_TRUE) != 0) {
600 			ret = set_errno(EACCES);
601 			goto rctlsys_out;
602 		}
603 
604 		/*
605 		 * Must be privileged to replace a privileged control with
606 		 * a basic one.
607 		 */
608 		if (oval->rcv_privilege == RCPRIV_PRIVILEGED &&
609 		    nval->rcv_privilege != RCPRIV_PRIVILEGED &&
610 		    secpolicy_rctlsys(CRED(), B_FALSE) != 0) {
611 			ret = set_errno(EACCES);
612 			goto rctlsys_out;
613 		}
614 
615 		/*
616 		 * Must have lowerable global property for non-privileged
617 		 * to lower the value of a privileged control; otherwise must
618 		 * have sufficient privileges to modify privileged controls
619 		 * at all.
620 		 */
621 		if (oval->rcv_privilege == RCPRIV_PRIVILEGED &&
622 		    nval->rcv_privilege == RCPRIV_PRIVILEGED &&
623 		    ((((rde->rcd_flagaction & RCTL_GLOBAL_LOWERABLE) == 0) ||
624 		    oval->rcv_flagaction != nval->rcv_flagaction ||
625 		    oval->rcv_action_signal != nval->rcv_action_signal ||
626 		    oval->rcv_value < nval->rcv_value)) &&
627 		    secpolicy_rctlsys(CRED(), B_FALSE) != 0) {
628 			ret = set_errno(EACCES);
629 			goto rctlsys_out;
630 		}
631 
632 		if (ret = rctl_local_replace(hndl, oval, nval, pp)) {
633 			(void) set_errno(ret);
634 			goto rctlsys_out;
635 		}
636 
637 		/* ensure that nval is not freed */
638 		nval = NULL;
639 
640 	} else if (action == RCTL_INSERT) {
641 		/*
642 		 * System controls are immutable.
643 		 */
644 		if (nval->rcv_privilege == RCPRIV_SYSTEM) {
645 			ret = set_errno(EPERM);
646 			goto rctlsys_out;
647 		}
648 
649 		/*
650 		 * Only privileged processes in the global zone may add
651 		 * privileged zone.* rctls.  Only privileged processes
652 		 * may add other privileged rctls.
653 		 */
654 		if (nval->rcv_privilege == RCPRIV_PRIVILEGED) {
655 			if ((rde->rcd_entity == RCENTITY_ZONE &&
656 			    secpolicy_rctlsys(CRED(), B_TRUE) != 0) ||
657 			    (rde->rcd_entity != RCENTITY_ZONE &&
658 			    secpolicy_rctlsys(CRED(), B_FALSE) != 0)) {
659 				ret = set_errno(EACCES);
660 				goto rctlsys_out;
661 			}
662 		}
663 
664 		/*
665 		 * Only one basic control is allowed per rctl.
666 		 * If a basic control is being inserted, delete
667 		 * any other basic control.
668 		 */
669 		if ((nval->rcv_privilege == RCPRIV_BASIC) &&
670 		    (rctl_local_get(hndl, NULL, rval1, pp) == 0)) {
671 			do {
672 				if (rval1->rcv_privilege == RCPRIV_BASIC &&
673 				    rval1->rcv_action_recipient == curproc) {
674 					(void) rctl_local_delete(hndl, rval1,
675 					    pp);
676 					if (rctl_local_get(hndl, NULL, rval1,
677 					    pp) != 0)
678 						break;
679 				}
680 
681 				tval = rval1;
682 				rval1 = rval2;
683 				rval2 = tval;
684 			} while (rctl_local_get(hndl, rval2, rval1, pp)
685 			    == 0);
686 		}
687 
688 
689 		if (ret = rctl_local_insert(hndl, nval, pp)) {
690 			(void) set_errno(ret);
691 			goto rctlsys_out;
692 		}
693 
694 		/* ensure that nval is not freed */
695 		nval = NULL;
696 
697 	} else {
698 		/*
699 		 * RCTL_DELETE
700 		 */
701 		if (nval->rcv_privilege == RCPRIV_SYSTEM) {
702 			ret = set_errno(EPERM);
703 			goto rctlsys_out;
704 		}
705 
706 		if (nval->rcv_privilege == RCPRIV_PRIVILEGED) {
707 			if ((rde->rcd_entity == RCENTITY_ZONE &&
708 			    secpolicy_rctlsys(CRED(), B_TRUE) != 0) ||
709 			    (rde->rcd_entity != RCENTITY_ZONE &&
710 			    secpolicy_rctlsys(CRED(), B_FALSE) != 0)) {
711 				ret = set_errno(EACCES);
712 				goto rctlsys_out;
713 			}
714 		}
715 
716 		if (ret = rctl_local_delete(hndl, nval, pp)) {
717 			(void) set_errno(ret);
718 			goto rctlsys_out;
719 		}
720 	}
721 
722 rctlsys_out:
723 
724 	if (pp)
725 		mutex_exit(&pp->p_lock);
726 
727 	kmem_free(nblk, sizeof (rctl_opaque_t));
728 	kmem_free(oblk, sizeof (rctl_opaque_t));
729 
730 	/* only free nval if we did not rctl_local_insert it */
731 	if (nval)
732 		kmem_cache_free(rctl_val_cache, nval);
733 
734 	kmem_cache_free(rctl_val_cache, oval);
735 	kmem_cache_free(rctl_val_cache, rval1);
736 	kmem_cache_free(rctl_val_cache, rval2);
737 
738 	return (ret);
739 }
740 
741 static long
742 rctlsys_lst(char *ubuf, size_t ubufsz)
743 {
744 	char *kbuf;
745 	size_t kbufsz;
746 
747 	kbufsz = rctl_build_name_buf(&kbuf);
748 
749 	if (kbufsz <= ubufsz &&
750 	    copyout(kbuf, ubuf, kbufsz) != 0) {
751 		kmem_free(kbuf, kbufsz);
752 		return (set_errno(EFAULT));
753 	}
754 
755 	kmem_free(kbuf, kbufsz);
756 
757 	return (kbufsz);
758 }
759 
760 static long
761 rctlsys_ctl(char *name, rctl_opaque_t *rblk, int flags)
762 {
763 	rctl_dict_entry_t *krde;
764 	rctl_opaque_t *krblk;
765 	char *kname;
766 	size_t klen;
767 
768 	kname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
769 
770 	if (name == NULL || copyinstr(name, kname, MAXPATHLEN, &klen) != 0) {
771 		kmem_free(kname, MAXPATHLEN);
772 		return (set_errno(EFAULT));
773 	}
774 
775 	switch (flags) {
776 	case RCTLCTL_GET:
777 		krde = kmem_alloc(sizeof (rctl_dict_entry_t), KM_SLEEP);
778 		krblk = kmem_zalloc(sizeof (rctl_opaque_t), KM_SLEEP);
779 
780 		if (rctl_global_get(kname, krde) == -1) {
781 			kmem_free(krde, sizeof (rctl_dict_entry_t));
782 			kmem_free(krblk, sizeof (rctl_opaque_t));
783 			kmem_free(kname, MAXPATHLEN);
784 			return (set_errno(ESRCH));
785 		}
786 
787 		rctlsys_rblk_xfrm(krblk, krde, NULL, RBX_TO_BLK | RBX_CTL);
788 
789 		if (copyout(krblk, rblk, sizeof (rctl_opaque_t)) != 0) {
790 			kmem_free(krde, sizeof (rctl_dict_entry_t));
791 			kmem_free(krblk, sizeof (rctl_opaque_t));
792 			kmem_free(kname, MAXPATHLEN);
793 			return (set_errno(EFAULT));
794 		}
795 
796 		kmem_free(krde, sizeof (rctl_dict_entry_t));
797 		kmem_free(krblk, sizeof (rctl_opaque_t));
798 		kmem_free(kname, MAXPATHLEN);
799 		break;
800 	case RCTLCTL_SET:
801 		if (secpolicy_rctlsys(CRED(), B_TRUE) != 0) {
802 			kmem_free(kname, MAXPATHLEN);
803 			return (set_errno(EPERM));
804 		}
805 
806 		krde = kmem_alloc(sizeof (rctl_dict_entry_t), KM_SLEEP);
807 		krblk = kmem_zalloc(sizeof (rctl_opaque_t), KM_SLEEP);
808 
809 		if (rctl_global_get(kname, krde) == -1) {
810 			kmem_free(krde, sizeof (rctl_dict_entry_t));
811 			kmem_free(krblk, sizeof (rctl_opaque_t));
812 			kmem_free(kname, MAXPATHLEN);
813 			return (set_errno(ESRCH));
814 		}
815 
816 		if (copyin(rblk, krblk, sizeof (rctl_opaque_t)) != 0) {
817 			kmem_free(krde, sizeof (rctl_dict_entry_t));
818 			kmem_free(krblk, sizeof (rctl_opaque_t));
819 			kmem_free(kname, MAXPATHLEN);
820 			return (set_errno(EFAULT));
821 		}
822 
823 		rctlsys_rblk_xfrm(krblk, krde, NULL, RBX_FROM_BLK | RBX_CTL);
824 
825 		if (rctl_global_set(kname, krde) == -1) {
826 			kmem_free(krde, sizeof (rctl_dict_entry_t));
827 			kmem_free(krblk, sizeof (rctl_opaque_t));
828 			kmem_free(kname, MAXPATHLEN);
829 			return (set_errno(ESRCH));
830 		}
831 
832 		kmem_free(krde, sizeof (rctl_dict_entry_t));
833 		kmem_free(krblk, sizeof (rctl_opaque_t));
834 		kmem_free(kname, MAXPATHLEN);
835 
836 		break;
837 	default:
838 		kmem_free(kname, MAXPATHLEN);
839 		return (set_errno(EINVAL));
840 	}
841 
842 	return (0);
843 }
844 
845 /*
846  * The arbitrary maximum number of rctl_opaque_t that we can pass to
847  * rctl_projset().
848  */
849 #define	RCTL_PROJSET_MAXSIZE	1024
850 
851 static long
852 rctlsys_projset(char *name, rctl_opaque_t *rblk, size_t size, int flags)
853 {
854 	rctl_dict_entry_t *krde;
855 	rctl_opaque_t *krblk;
856 	char *kname;
857 	size_t klen;
858 	rctl_hndl_t hndl;
859 	rctl_val_t *new_values = NULL;
860 	rctl_val_t *alloc_values = NULL;
861 	rctl_val_t *new_val;
862 	rctl_val_t *alloc_val;
863 	int error = 0;
864 	int count;
865 
866 	kname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
867 
868 	if (name == NULL || copyinstr(name, kname, MAXPATHLEN, &klen) != 0) {
869 		kmem_free(kname, MAXPATHLEN);
870 		return (set_errno(EFAULT));
871 	}
872 
873 	if (size > RCTL_PROJSET_MAXSIZE) {
874 		kmem_free(kname, MAXPATHLEN);
875 		return (set_errno(EINVAL));
876 	}
877 
878 	if ((hndl = rctl_hndl_lookup(kname)) == -1) {
879 		kmem_free(kname, MAXPATHLEN);
880 		return (set_errno(EINVAL));
881 	}
882 
883 	krde = rctl_dict_lookup_hndl(hndl);
884 
885 	/* If not a project entity then exit */
886 	if ((krde->rcd_entity != RCENTITY_PROJECT) || (size <= 0)) {
887 		kmem_free(kname, MAXPATHLEN);
888 		return (set_errno(EINVAL));
889 	}
890 
891 	if (secpolicy_rctlsys(CRED(), B_FALSE) != 0) {
892 		kmem_free(kname, MAXPATHLEN);
893 		return (set_errno(EPERM));
894 	}
895 
896 	/* Allocate an array large enough for all resource control blocks */
897 	krblk = kmem_zalloc(sizeof (rctl_opaque_t) * size, KM_SLEEP);
898 
899 	if (copyin(rblk, krblk, sizeof (rctl_opaque_t) * size) == 0) {
900 
901 		for (count = 0; (count < size) && (error == 0); count++) {
902 			new_val = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
903 			alloc_val = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
904 
905 			rctlsys_rblk_xfrm(&krblk[count], NULL, new_val,
906 			    RBX_FROM_BLK | RBX_VAL);
907 
908 			/*
909 			 * Project entity resource control values should always
910 			 * be privileged
911 			 */
912 			if (new_val->rcv_privilege != RCPRIV_PRIVILEGED) {
913 				kmem_cache_free(rctl_val_cache, new_val);
914 				kmem_cache_free(rctl_val_cache, alloc_val);
915 
916 				error = EPERM;
917 			} else if (rctl_invalid_value(krde, new_val) == 0) {
918 
919 				/*
920 				 * This is a project entity; we do not set
921 				 * rcv_action_recipient or rcv_action_recip_pid
922 				 */
923 				new_val->rcv_action_recipient = NULL;
924 				new_val->rcv_action_recip_pid = -1;
925 				new_val->rcv_flagaction |= RCTL_LOCAL_PROJDB;
926 				new_val->rcv_firing_time = 0;
927 
928 				new_val->rcv_prev = NULL;
929 				new_val->rcv_next = new_values;
930 				new_values = new_val;
931 
932 				/*
933 				 * alloc_val is left largely uninitialized, it
934 				 * is a pre-allocated rctl_val_t which is used
935 				 * later in rctl_local_replace_all() /
936 				 * rctl_local_insert_all().
937 				 */
938 				alloc_val->rcv_prev = NULL;
939 				alloc_val->rcv_next = alloc_values;
940 				alloc_values = alloc_val;
941 			} else {
942 				kmem_cache_free(rctl_val_cache, new_val);
943 				kmem_cache_free(rctl_val_cache, alloc_val);
944 
945 				error = EINVAL;
946 			}
947 		}
948 
949 	} else {
950 		error = EFAULT;
951 	}
952 
953 	kmem_free(krblk, sizeof (rctl_opaque_t) * size);
954 	kmem_free(kname, MAXPATHLEN);
955 
956 	if (error) {
957 		/*
958 		 * We will have the same number of items in the alloc_values
959 		 * linked list, as we have in new_values.  However, we remain
960 		 * cautious, and teardown the linked lists individually.
961 		 */
962 		while (new_values != NULL) {
963 			new_val = new_values;
964 			new_values = new_values->rcv_next;
965 			kmem_cache_free(rctl_val_cache, new_val);
966 		}
967 
968 		while (alloc_values != NULL) {
969 			alloc_val = alloc_values;
970 			alloc_values = alloc_values->rcv_next;
971 			kmem_cache_free(rctl_val_cache, alloc_val);
972 		}
973 
974 		return (set_errno(error));
975 	}
976 
977 	/*
978 	 * We take the p_lock here to maintain consistency with other functions
979 	 * - rctlsys_get() and rctlsys_set()
980 	 */
981 	mutex_enter(&curproc->p_lock);
982 	if (flags & TASK_PROJ_PURGE)  {
983 		(void) rctl_local_replace_all(hndl, new_values, alloc_values,
984 		    curproc);
985 	} else {
986 		(void) rctl_local_insert_all(hndl, new_values, alloc_values,
987 		    curproc);
988 	}
989 	mutex_exit(&curproc->p_lock);
990 
991 	return (0);
992 }
993 
994 long
995 rctlsys(int code, char *name, void *obuf, void *nbuf, size_t obufsz, int flags)
996 {
997 	switch (code) {
998 	case 0:
999 		return (rctlsys_get(name, obuf, nbuf, flags));
1000 
1001 	case 1:
1002 		return (rctlsys_set(name, obuf, nbuf, flags));
1003 
1004 	case 2:
1005 		/*
1006 		 * Private call for rctl_walk(3C).
1007 		 */
1008 		return (rctlsys_lst(obuf, obufsz));
1009 
1010 	case 3:
1011 		/*
1012 		 * Private code for rctladm(1M):  "rctlctl".
1013 		 */
1014 		return (rctlsys_ctl(name, obuf, flags));
1015 	case 4:
1016 		/*
1017 		 * Private code for setproject(3PROJECT).
1018 		 */
1019 		return (rctlsys_projset(name, nbuf, obufsz, flags));
1020 
1021 	default:
1022 		return (set_errno(EINVAL));
1023 	}
1024 }
1025