xref: /titanic_52/usr/src/uts/common/disp/priocntl.c (revision 18c4e255539c8eac2a18c73be8729ec1f6fa818a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/sysmacros.h>
33 #include <sys/signal.h>
34 #include <sys/pcb.h>
35 #include <sys/user.h>
36 #include <sys/systm.h>
37 #include <sys/sysinfo.h>
38 #include <sys/var.h>
39 #include <sys/errno.h>
40 #include <sys/cred.h>
41 #include <sys/proc.h>
42 #include <sys/procset.h>
43 #include <sys/debug.h>
44 #include <sys/inline.h>
45 #include <sys/priocntl.h>
46 #include <sys/disp.h>
47 #include <sys/class.h>
48 #include <sys/modctl.h>
49 #include <sys/t_lock.h>
50 #include <sys/uadmin.h>
51 #include <sys/cmn_err.h>
52 #include <sys/policy.h>
53 #include <sys/schedctl.h>
54 
55 /*
56  * Structure used to pass arguments to the proccmp() function.
57  * The arguments must be passed in a structure because proccmp()
58  * is called indirectly through the dotoprocs() function which
59  * will only pass through a single one word argument.
60  */
61 struct pcmpargs {
62 	id_t	*pcmp_cidp;
63 	int	*pcmp_cntp;
64 	kthread_t **pcmp_retthreadp;
65 };
66 
67 /*
68  * Structure used to pass arguments to the setparms() function
69  * which is called indirectly through dotoprocs().
70  */
71 struct stprmargs {
72 	struct pcparms	*stp_parmsp;	/* pointer to parameters */
73 	int		stp_error;	/* some errors returned here */
74 };
75 
76 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
77 /*
78  * A vaparm_t is an int followed by a long long -- this packs differently
79  * between the 64-bit kernel ABI and the 32-bit user ABI.
80  */
81 static int
82 copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap, uio_seg_t seg)
83 {
84 	pc_vaparms32_t vaparms32;
85 	pc_vaparm32_t *src;
86 	pc_vaparm_t *dst;
87 	uint_t cnt;
88 
89 	ASSERT(get_udatamodel() == DATAMODEL_ILP32);
90 
91 	if ((seg == UIO_USERSPACE ? copyin : kcopy)(arg, &vaparms32,
92 	    sizeof (vaparms32)))
93 		return (EFAULT);
94 
95 	vap->pc_vaparmscnt = vaparms32.pc_vaparmscnt;
96 	if ((cnt = vaparms32.pc_vaparmscnt) > PC_VAPARMCNT)
97 		cnt = PC_VAPARMCNT;
98 	for (src = vaparms32.pc_parms, dst = vap->pc_parms;
99 	    cnt--; src++, dst++) {
100 		dst->pc_key = src->pc_key;
101 		dst->pc_parm = src->pc_parm;
102 	}
103 	return (0);
104 }
105 
106 #define	COPYIN_VAPARMS(arg, vap, size, seg)	\
107 	(get_udatamodel() == DATAMODEL_NATIVE ?	\
108 	(*copyinfn)(arg, vap, size) : copyin_vaparms32(arg, vap, seg))
109 
110 #else
111 
112 #define	COPYIN_VAPARMS(arg, vap, size, seg)	(*copyinfn)(arg, vap, size)
113 
114 #endif
115 
116 static int donice(procset_t *, pcnice_t *);
117 static int doprio(procset_t *, pcprio_t *);
118 static int proccmp(proc_t *, struct pcmpargs *);
119 static int setparms(proc_t *, struct stprmargs *);
120 extern int threadcmp(struct pcmpargs *, kthread_t *);
121 
122 /*
123  * The priocntl system call.
124  */
125 long
126 priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg,
127     caddr_t arg2, uio_seg_t seg)
128 {
129 	pcinfo_t		pcinfo;
130 	pcparms_t		pcparms;
131 	pcnice_t		pcnice;
132 	pcprio_t		pcprio;
133 	pcadmin_t		pcadmin;
134 	pcpri_t			pcpri;
135 	procset_t		procset;
136 	struct stprmargs	stprmargs;
137 	struct pcmpargs		pcmpargs;
138 	pc_vaparms_t		vaparms;
139 	char			clname[PC_CLNMSZ];
140 	char			*outstr;
141 	int			count;
142 	kthread_t		*retthreadp;
143 	proc_t			*initpp;
144 	int			clnullflag;
145 	int			error = 0;
146 	int			error1 = 0;
147 	int			rv = 0;
148 	pid_t			saved_pid;
149 	id_t			classid;
150 	int			size;
151 	int (*copyinfn)(const void *, void *, size_t);
152 	int (*copyoutfn)(const void *, void *, size_t);
153 
154 	/*
155 	 * First just check the version number. Right now there is only
156 	 * one version we know about and support.  If we get some other
157 	 * version number from the application it may be that the
158 	 * application was built with some future version and is trying
159 	 * to run on an old release of the system (that's us).  In any
160 	 * case if we don't recognize the version number all we can do is
161 	 * return error.
162 	 */
163 	if (pc_version != PC_VERSION)
164 		return (set_errno(EINVAL));
165 
166 	if (seg == UIO_USERSPACE) {
167 		copyinfn = copyin;
168 		copyoutfn = copyout;
169 	} else {
170 		copyinfn = kcopy;
171 		copyoutfn = kcopy;
172 	}
173 
174 	switch (cmd) {
175 	case PC_GETCID:
176 		/*
177 		 * If the arg pointer is NULL, the user just wants to
178 		 * know the number of classes. If non-NULL, the pointer
179 		 * should point to a valid user pcinfo buffer.  In the
180 		 * dynamic world we need to return the number of loaded
181 		 * classes, not the max number of available classes that
182 		 * can be loaded.
183 		 */
184 		if (arg == NULL) {
185 			rv = loaded_classes;
186 			break;
187 		} else {
188 			if ((*copyinfn)(arg, &pcinfo, sizeof (pcinfo)))
189 				return (set_errno(EFAULT));
190 		}
191 
192 		pcinfo.pc_clname[PC_CLNMSZ-1] = '\0';
193 
194 		/*
195 		 * Get the class ID corresponding to user supplied name.
196 		 */
197 		error = getcid(pcinfo.pc_clname, &pcinfo.pc_cid);
198 		if (error)
199 			return (set_errno(error));
200 
201 		/*
202 		 * Can't get info about the sys class.
203 		 */
204 		if (pcinfo.pc_cid == 0)
205 			return (set_errno(EINVAL));
206 
207 		/*
208 		 * Get the class specific information.
209 		 * we MUST make sure that the class has not already
210 		 * been unloaded before we try the CL_GETCLINFO.
211 		 * If it has then we need to load it.
212 		 */
213 		error =
214 		    scheduler_load(pcinfo.pc_clname, &sclass[pcinfo.pc_cid]);
215 		if (error)
216 			return (set_errno(error));
217 		error = CL_GETCLINFO(&sclass[pcinfo.pc_cid], pcinfo.pc_clinfo);
218 		if (error)
219 			return (set_errno(error));
220 
221 		if ((*copyoutfn)(&pcinfo, arg, sizeof (pcinfo)))
222 			return (set_errno(EFAULT));
223 
224 		rv = loaded_classes;
225 
226 		break;
227 
228 	case PC_GETCLINFO:
229 		/*
230 		 * If the arg pointer is NULL, the user just wants to know
231 		 * the number of classes. If non-NULL, the pointer should
232 		 * point to a valid user pcinfo buffer.
233 		 */
234 		if (arg == NULL) {
235 			rv = loaded_classes;
236 			break;
237 		} else {
238 			if ((*copyinfn)(arg, &pcinfo, sizeof (pcinfo)))
239 				return (set_errno(EFAULT));
240 		}
241 
242 		if (pcinfo.pc_cid >= loaded_classes || pcinfo.pc_cid < 1)
243 			return (set_errno(EINVAL));
244 
245 		(void) strncpy(pcinfo.pc_clname, sclass[pcinfo.pc_cid].cl_name,
246 		    PC_CLNMSZ);
247 
248 		/*
249 		 * Get the class specific information.  we MUST make sure
250 		 * that the class has not already been unloaded before we
251 		 * try the CL_GETCLINFO.  If it has then we need to load
252 		 * it.
253 		 */
254 		error =
255 		    scheduler_load(pcinfo.pc_clname, &sclass[pcinfo.pc_cid]);
256 		if (error)
257 			return (set_errno(error));
258 		error = CL_GETCLINFO(&sclass[pcinfo.pc_cid], pcinfo.pc_clinfo);
259 		if (error)
260 			return (set_errno(error));
261 
262 		if ((*copyoutfn)(&pcinfo, arg, sizeof (pcinfo)))
263 			return (set_errno(EFAULT));
264 
265 		rv = loaded_classes;
266 		break;
267 
268 	case PC_SETPARMS:
269 	case PC_SETXPARMS:
270 		/*
271 		 * First check the validity of the parameters we got from
272 		 * the user.  We don't do any permissions checking here
273 		 * because it's done on a per thread basis by parmsset().
274 		 */
275 		if (cmd == PC_SETPARMS) {
276 			if ((*copyinfn)(arg, &pcparms, sizeof (pcparms)))
277 				return (set_errno(EFAULT));
278 
279 			error = parmsin(&pcparms, NULL);
280 		} else {
281 			if ((*copyinfn)(arg, clname, PC_CLNMSZ) ||
282 			    COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms),
283 			    seg))
284 				return (set_errno(EFAULT));
285 			clname[PC_CLNMSZ-1] = '\0';
286 
287 			if (getcid(clname, &pcparms.pc_cid))
288 				return (set_errno(EINVAL));
289 
290 			error = parmsin(&pcparms, &vaparms);
291 		}
292 
293 		if (error)
294 			return (set_errno(error));
295 
296 		/*
297 		 * Get the procset from the user.
298 		 */
299 		if ((*copyinfn)(psp, &procset, sizeof (procset)))
300 			return (set_errno(EFAULT));
301 
302 		/*
303 		 * For performance we do a quick check here to catch
304 		 * common cases where the current thread is the only one
305 		 * in the set.  In such cases we can call parmsset()
306 		 * directly, avoiding the relatively lengthy path through
307 		 * dotoprocs().  The underlying classes expect pidlock to
308 		 * be held.
309 		 */
310 		if (cur_inset_only(&procset) == B_TRUE) {
311 			/* do a single LWP */
312 			if ((procset.p_lidtype == P_LWPID) ||
313 			    (procset.p_ridtype == P_LWPID)) {
314 				mutex_enter(&pidlock);
315 				mutex_enter(&curproc->p_lock);
316 				error = parmsset(&pcparms, curthread);
317 				mutex_exit(&curproc->p_lock);
318 				mutex_exit(&pidlock);
319 			} else {
320 				/* do the entire process otherwise */
321 				stprmargs.stp_parmsp = &pcparms;
322 				stprmargs.stp_error = 0;
323 				mutex_enter(&pidlock);
324 				error = setparms(curproc, &stprmargs);
325 				mutex_exit(&pidlock);
326 				if (error == 0 && stprmargs.stp_error != 0)
327 					error = stprmargs.stp_error;
328 			}
329 			if (error)
330 				return (set_errno(error));
331 		} else {
332 			stprmargs.stp_parmsp = &pcparms;
333 			stprmargs.stp_error = 0;
334 
335 			error1 = error = ESRCH;
336 
337 			/*
338 			 * The dotoprocs() call below will cause
339 			 * setparms() to be called for each thread in the
340 			 * specified procset. setparms() will in turn
341 			 * call parmsset() (which does the real work).
342 			 */
343 			if ((procset.p_lidtype != P_LWPID) ||
344 			    (procset.p_ridtype != P_LWPID)) {
345 				error1 = dotoprocs(&procset, setparms,
346 				    (char *)&stprmargs);
347 			}
348 
349 			/*
350 			 * take care of the case when any of the
351 			 * operands happen to be LWP's
352 			 */
353 
354 			if ((procset.p_lidtype == P_LWPID) ||
355 			    (procset.p_ridtype == P_LWPID)) {
356 				error = dotolwp(&procset, parmsset,
357 				    (char *)&pcparms);
358 				/*
359 				 * Dotolwp() returns with p_lock held.
360 				 * This is required for the GETPARMS case
361 				 * below. So, here we just release the
362 				 * p_lock.
363 				 */
364 				if (MUTEX_HELD(&curproc->p_lock))
365 					mutex_exit(&curproc->p_lock);
366 			}
367 
368 			/*
369 			 * If setparms() encounters a permissions error
370 			 * for one or more of the threads it returns
371 			 * EPERM in stp_error so dotoprocs() will
372 			 * continue through the thread set.  If
373 			 * dotoprocs() returned an error above, it was
374 			 * more serious than permissions and dotoprocs
375 			 * quit when the error was encountered.  We
376 			 * return the more serious error if there was
377 			 * one, otherwise we return EPERM if we got that
378 			 * back.
379 			 */
380 			if (error1 != ESRCH)
381 				error = error1;
382 			if (error == 0 && stprmargs.stp_error != 0)
383 				error = stprmargs.stp_error;
384 		}
385 		break;
386 
387 	case PC_GETPARMS:
388 	case PC_GETXPARMS:
389 		if (cmd == PC_GETPARMS) {
390 			if ((*copyinfn)(arg, &pcparms, sizeof (pcparms)))
391 				return (set_errno(EFAULT));
392 		} else {
393 			if (arg != NULL) {
394 				if ((*copyinfn)(arg, clname, PC_CLNMSZ))
395 					return (set_errno(EFAULT));
396 
397 				clname[PC_CLNMSZ-1] = '\0';
398 
399 				if (getcid(clname, &pcparms.pc_cid))
400 					return (set_errno(EINVAL));
401 			} else
402 				pcparms.pc_cid = PC_CLNULL;
403 
404 			if (COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms),
405 			    seg))
406 				return (set_errno(EFAULT));
407 		}
408 
409 		if (pcparms.pc_cid >= loaded_classes ||
410 		    (pcparms.pc_cid < 1 && pcparms.pc_cid != PC_CLNULL))
411 			return (set_errno(EINVAL));
412 
413 		if ((*copyinfn)(psp, &procset, sizeof (procset)))
414 			return (set_errno(EFAULT));
415 
416 		/*
417 		 * Check to see if the current thread is the only one
418 		 * in the set. If not we must go through the whole set
419 		 * to select a thread.
420 		 */
421 		if (cur_inset_only(&procset) == B_TRUE) {
422 			/* do a single LWP */
423 			if ((procset.p_lidtype == P_LWPID) ||
424 			    (procset.p_ridtype == P_LWPID)) {
425 				if (pcparms.pc_cid != PC_CLNULL &&
426 				    pcparms.pc_cid != curthread->t_cid) {
427 					/*
428 					 * Specified thread not in
429 					 * specified class.
430 					 */
431 					return (set_errno(ESRCH));
432 				} else {
433 					mutex_enter(&curproc->p_lock);
434 					retthreadp = curthread;
435 				}
436 			} else {
437 				count = 0;
438 				retthreadp = NULL;
439 				pcmpargs.pcmp_cidp = &pcparms.pc_cid;
440 				pcmpargs.pcmp_cntp = &count;
441 				pcmpargs.pcmp_retthreadp = &retthreadp;
442 				/*
443 				 * Specified thread not in specified class.
444 				 */
445 				if (pcparms.pc_cid != PC_CLNULL &&
446 				    pcparms.pc_cid != curthread->t_cid)
447 					return (set_errno(ESRCH));
448 				error = proccmp(curproc, &pcmpargs);
449 				if (error) {
450 					if (retthreadp != NULL)
451 						mutex_exit(&(curproc->p_lock));
452 					return (set_errno(error));
453 				}
454 			}
455 		} else {
456 			/*
457 			 * get initpp early to avoid lock ordering problems
458 			 * (we cannot get pidlock while holding any p_lock).
459 			 */
460 			mutex_enter(&pidlock);
461 			initpp = prfind(P_INITPID);
462 			mutex_exit(&pidlock);
463 
464 			/*
465 			 * Select the thread (from the set) whose
466 			 * parameters we are going to return.  First we
467 			 * set up some locations for return values, then
468 			 * we call proccmp() indirectly through
469 			 * dotoprocs().  proccmp() will call a class
470 			 * specific routine which actually does the
471 			 * selection.  To understand how this works take
472 			 * a careful look at the code below, the
473 			 * dotoprocs() function, the proccmp() function,
474 			 * and the class specific cl_proccmp() functions.
475 			 */
476 			if (pcparms.pc_cid == PC_CLNULL)
477 				clnullflag = 1;
478 			else
479 				clnullflag = 0;
480 			count = 0;
481 			retthreadp = NULL;
482 			pcmpargs.pcmp_cidp = &pcparms.pc_cid;
483 			pcmpargs.pcmp_cntp = &count;
484 			pcmpargs.pcmp_retthreadp = &retthreadp;
485 			error1 = error = ESRCH;
486 
487 			if ((procset.p_lidtype != P_LWPID) ||
488 			    (procset.p_ridtype != P_LWPID)) {
489 				error1 = dotoprocs(&procset, proccmp,
490 				    (char *)&pcmpargs);
491 			}
492 
493 			/*
494 			 * take care of combination of LWP and process
495 			 * set case in a procset
496 			 */
497 			if ((procset.p_lidtype == P_LWPID) ||
498 			    (procset.p_ridtype == P_LWPID)) {
499 				error = dotolwp(&procset, threadcmp,
500 				    (char *)&pcmpargs);
501 			}
502 
503 			/*
504 			 * Both proccmp() and threadcmp() return with the
505 			 * p_lock held for the ttoproc(retthreadp). This
506 			 * is required to make sure that the process we
507 			 * chose as the winner doesn't go away
508 			 * i.e. retthreadp has to be a valid pointer.
509 			 *
510 			 * The case below can only happen if the thread
511 			 * with the highest priority was not in your
512 			 * process.  In that case, dotolwp will return
513 			 * holding p_lock for both your process as well
514 			 * as the process in which retthreadp is a
515 			 * thread.
516 			 */
517 			if ((retthreadp != NULL) &&
518 			    (ttoproc(retthreadp) != curproc) &&
519 			    MUTEX_HELD(&(curproc)->p_lock))
520 				mutex_exit(&(curproc)->p_lock);
521 
522 			ASSERT(retthreadp == NULL ||
523 			    MUTEX_HELD(&(ttoproc(retthreadp)->p_lock)));
524 			if (error1 != ESRCH)
525 				error = error1;
526 			if (error) {
527 				if (retthreadp != NULL)
528 				    /* CSTYLED */
529 				    mutex_exit(&(ttoproc(retthreadp)->p_lock));
530 				ASSERT(MUTEX_NOT_HELD(&(curproc)->p_lock));
531 				return (set_errno(error));
532 			}
533 			/*
534 			 * dotoprocs() ignores the init process if it is
535 			 * in the set, unless it was the only process found.
536 			 * Since we are getting parameters here rather than
537 			 * setting them, we want to make sure init is not
538 			 * excluded if it is in the set.
539 			 */
540 			if (initpp != NULL && retthreadp != NULL &&
541 			    ttoproc(retthreadp) != initpp) {
542 				mutex_enter(&initpp->p_lock);
543 				if (procinset(initpp, &procset)) {
544 					mutex_exit(&initpp->p_lock);
545 					(void) proccmp(initpp, &pcmpargs);
546 				} else {
547 					mutex_exit(&initpp->p_lock);
548 				}
549 			}
550 
551 			/*
552 			 * If dotoprocs returned success it found at least
553 			 * one thread in the set.  If proccmp() failed to
554 			 * select a thread it is because the user specified
555 			 * a class and none of the threads in the set
556 			 * belonged to that class, or because the process
557 			 * specified was in the middle of exiting and had
558 			 * cleared its thread list.
559 			 */
560 			if (retthreadp == NULL) {
561 				/*
562 				 * Might be here and still holding p_lock
563 				 * if we did a dotolwp on an lwp that
564 				 * existed but was in the wrong class.
565 				 */
566 				if (MUTEX_HELD(&(curproc)->p_lock))
567 					mutex_exit(&(curproc)->p_lock);
568 				return (set_errno(ESRCH));
569 			}
570 
571 			/*
572 			 * User can only use PC_CLNULL with one thread in set.
573 			 */
574 			if (clnullflag && count > 1) {
575 				if (retthreadp != NULL)
576 					mutex_exit(
577 					    &(ttoproc(retthreadp)->p_lock));
578 				ASSERT(MUTEX_NOT_HELD(&(curproc)->p_lock));
579 				return (set_errno(EINVAL));
580 			}
581 		}
582 
583 		ASSERT(retthreadp == NULL ||
584 		    MUTEX_HELD(&(ttoproc(retthreadp)->p_lock)));
585 		/*
586 		 * It is possible to have retthreadp == NULL. Proccmp()
587 		 * in the rare case (p_tlist == NULL) could return without
588 		 * setting a value for retthreadp.
589 		 */
590 		if (retthreadp == NULL) {
591 			ASSERT(MUTEX_NOT_HELD(&(curproc)->p_lock));
592 			return (set_errno(ESRCH));
593 		}
594 		/*
595 		 * We've selected a thread so now get the parameters.
596 		 */
597 		parmsget(retthreadp, &pcparms);
598 
599 		/*
600 		 * Prepare to return parameters to the user
601 		 */
602 		error = parmsout(&pcparms,
603 		    (cmd == PC_GETPARMS ? NULL : &vaparms));
604 
605 		/*
606 		 * Save pid of selected thread before dropping p_lock.
607 		 */
608 		saved_pid = ttoproc(retthreadp)->p_pid;
609 		mutex_exit(&(ttoproc(retthreadp)->p_lock));
610 		ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
611 
612 		if (error)
613 			return (set_errno(error));
614 
615 		if (cmd == PC_GETPARMS) {
616 			if ((*copyoutfn)(&pcparms, arg, sizeof (pcparms)))
617 				return (set_errno(EFAULT));
618 		} else if ((error = vaparmsout(arg, &pcparms, &vaparms,
619 		    seg)) != 0)
620 			return (set_errno(error));
621 
622 		/*
623 		 * And finally, return the pid of the selected thread.
624 		 */
625 		rv = saved_pid;
626 		break;
627 
628 	case PC_ADMIN:
629 		if (get_udatamodel() == DATAMODEL_NATIVE) {
630 			if ((*copyinfn)(arg, &pcadmin, sizeof (pcadmin_t)))
631 				return (set_errno(EFAULT));
632 #ifdef _SYSCALL32_IMPL
633 		} else {
634 			/* pcadmin struct from ILP32 callers */
635 			pcadmin32_t pcadmin32;
636 
637 			if ((*copyinfn)(arg, &pcadmin32, sizeof (pcadmin32_t)))
638 				return (set_errno(EFAULT));
639 			pcadmin.pc_cid = pcadmin32.pc_cid;
640 			pcadmin.pc_cladmin = (caddr_t)(uintptr_t)
641 			    pcadmin32.pc_cladmin;
642 #endif /* _SYSCALL32_IMPL */
643 		}
644 
645 		if (pcadmin.pc_cid >= loaded_classes ||
646 		    pcadmin.pc_cid < 1)
647 			return (set_errno(EINVAL));
648 
649 		/*
650 		 * Have the class do whatever the user is requesting.
651 		 */
652 		mutex_enter(&ualock);
653 		error = CL_ADMIN(&sclass[pcadmin.pc_cid], pcadmin.pc_cladmin,
654 		    CRED());
655 		mutex_exit(&ualock);
656 		break;
657 
658 	case PC_GETPRIRANGE:
659 		if ((*copyinfn)(arg, &pcpri, sizeof (pcpri_t)))
660 			return (set_errno(EFAULT));
661 
662 		if (pcpri.pc_cid >= loaded_classes || pcpri.pc_cid < 0)
663 			return (set_errno(EINVAL));
664 
665 		error = CL_GETCLPRI(&sclass[pcpri.pc_cid], &pcpri);
666 		if (!error) {
667 			if ((*copyoutfn)(&pcpri, arg, sizeof (pcpri)))
668 				return (set_errno(EFAULT));
669 		}
670 		break;
671 
672 	case PC_DONICE:
673 		/*
674 		 * Get pcnice and procset structures from the user.
675 		 */
676 		if ((*copyinfn)(arg, &pcnice, sizeof (pcnice)) ||
677 		    (*copyinfn)(psp, &procset, sizeof (procset)))
678 			return (set_errno(EFAULT));
679 
680 		error = donice(&procset, &pcnice);
681 
682 		if (!error && (pcnice.pc_op == PC_GETNICE)) {
683 			if ((*copyoutfn)(&pcnice, arg, sizeof (pcnice)))
684 				return (set_errno(EFAULT));
685 		}
686 		break;
687 
688 	case PC_DOPRIO:
689 		/*
690 		 * Get pcprio and procset structures from the user.
691 		 */
692 		if ((*copyinfn)(arg, &pcprio, sizeof (pcprio)) ||
693 		    (*copyinfn)(psp, &procset, sizeof (procset)))
694 			return (set_errno(EFAULT));
695 
696 		error = doprio(&procset, &pcprio);
697 
698 		if (!error && (pcprio.pc_op == PC_GETPRIO)) {
699 			if ((*copyoutfn)(&pcprio, arg, sizeof (pcprio)))
700 				return (set_errno(EFAULT));
701 		}
702 		break;
703 
704 	case PC_SETDFLCL:
705 		if (secpolicy_dispadm(CRED()) != 0)
706 			return (set_errno(EPERM));
707 
708 		if (copyin(arg, (caddr_t)clname, PC_CLNMSZ) != 0)
709 			return (set_errno(EFAULT));
710 		clname[PC_CLNMSZ-1] = '\0';
711 
712 		if (getcid(clname, &classid) != 0)
713 			return (set_errno(EINVAL));
714 		if (CLASS_KERNEL(classid))
715 			return (set_errno(EINVAL));
716 		defaultcid = classid;
717 		ASSERT(defaultcid > 0 && defaultcid < loaded_classes);
718 		break;
719 
720 	case PC_GETDFLCL:
721 		mutex_enter(&class_lock);
722 
723 		if (defaultcid >= loaded_classes)
724 			outstr = "";
725 		else
726 			outstr = sclass[defaultcid].cl_name;
727 		size = strlen(outstr) + 1;
728 		if (arg != NULL)
729 			if ((*copyoutfn)(outstr, arg, size) != 0)
730 				error = EFAULT;
731 
732 		mutex_exit(&class_lock);
733 		break;
734 
735 	default:
736 		error = EINVAL;
737 		break;
738 	}
739 	return (error ? (set_errno(error)) : rv);
740 }
741 
742 long
743 priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
744 {
745 	return (priocntl_common(pc_version, psp, cmd, arg, arg2,
746 	    UIO_USERSPACE));
747 }
748 
749 /*
750  * The proccmp() function is part of the implementation of the
751  * PC_GETPARMS command of the priocntl system call.  This function works
752  * with the system call code and with the class specific cl_globpri()
753  * function to select one thread from a specified procset based on class
754  * specific criteria. proccmp() is called indirectly from the priocntl
755  * code through the dotoprocs function.  Basic strategy is dotoprocs()
756  * calls us once for each thread in the set.  We in turn call the class
757  * specific function to compare the current thread from dotoprocs to the
758  * "best" (according to the class criteria) found so far.  We keep the
759  * "best" thread in *pcmp_retthreadp.
760  */
761 static int
762 proccmp(proc_t *pp, struct pcmpargs *argp)
763 {
764 	kthread_t	*tx;
765 	kthread_t	*ty;
766 	int		last_pri = -1;
767 	int		tx_pri;
768 	int		found = 0;
769 
770 	mutex_enter(&pp->p_lock);
771 
772 	if (pp->p_tlist == NULL) {
773 		mutex_exit(&pp->p_lock);
774 		return (0);
775 	}
776 	(*argp->pcmp_cntp)++;	/* Increment count of procs in the set */
777 
778 	if (*argp->pcmp_cidp == PC_CLNULL) {
779 		/*
780 		 * If no cid is specified, then lets just pick the first one.
781 		 * It doesn't matter because if the number of processes in the
782 		 * set are more than 1, then we return EINVAL in priocntlsys.
783 		 */
784 		*argp->pcmp_cidp = pp->p_tlist->t_cid;
785 	}
786 	ty = tx = pp->p_tlist;
787 	do {
788 		if (tx->t_cid == *argp->pcmp_cidp) {
789 			/*
790 			 * We found one which matches the required cid.
791 			 */
792 			found = 1;
793 			if ((tx_pri = CL_GLOBPRI(tx)) > last_pri) {
794 				last_pri = tx_pri;
795 				ty = tx;
796 			}
797 		}
798 	} while ((tx = tx->t_forw) != pp->p_tlist);
799 	if (found) {
800 		if (*argp->pcmp_retthreadp == NULL) {
801 			/*
802 			 * First time through for this set.
803 			 * keep the mutex held. He might be the one!
804 			 */
805 			*argp->pcmp_retthreadp = ty;
806 		} else {
807 			tx = *argp->pcmp_retthreadp;
808 			if (CL_GLOBPRI(ty) <= CL_GLOBPRI(tx)) {
809 				mutex_exit(&pp->p_lock);
810 			} else {
811 				mutex_exit(&(ttoproc(tx)->p_lock));
812 				*argp->pcmp_retthreadp = ty;
813 			}
814 		}
815 	} else {
816 		/*
817 		 * We actually didn't find anything of the same cid in
818 		 * this process.
819 		 */
820 		mutex_exit(&pp->p_lock);
821 	}
822 	return (0);
823 }
824 
825 
826 int
827 threadcmp(struct pcmpargs *argp, kthread_t *tp)
828 {
829 	kthread_t	*tx;
830 	proc_t		*pp;
831 
832 	ASSERT(MUTEX_HELD(&(ttoproc(tp))->p_lock));
833 
834 	(*argp->pcmp_cntp)++;   /* Increment count of procs in the set */
835 	if (*argp->pcmp_cidp == PC_CLNULL) {
836 		/*
837 		 * If no cid is specified, then lets just pick the first one.
838 		 * It doesn't matter because if the number of threads in the
839 		 * set are more than 1, then we return EINVAL in priocntlsys.
840 		 */
841 		*argp->pcmp_cidp = tp->t_cid;
842 	}
843 	if (tp->t_cid == *argp->pcmp_cidp) {
844 		if (*argp->pcmp_retthreadp == NULL) {
845 			/*
846 			 * First time through for this set.
847 			 */
848 			*argp->pcmp_retthreadp = tp;
849 		} else {
850 			tx = *argp->pcmp_retthreadp;
851 			if (CL_GLOBPRI(tp) > CL_GLOBPRI(tx)) {
852 				/*
853 				 * Unlike proccmp(), we don't release the
854 				 * p_lock of the ttoproc(tp) if tp's global
855 				 * priority is less than tx's. We need to go
856 				 * through the entire list before we can do
857 				 * that. The p_lock is released by the caller
858 				 * of dotolwp().
859 				 */
860 				pp = ttoproc(tx);
861 				ASSERT(MUTEX_HELD(&pp->p_lock));
862 				if (pp != curproc) {
863 					mutex_exit(&pp->p_lock);
864 				}
865 				*argp->pcmp_retthreadp = tp;
866 			}
867 		}
868 	}
869 	return (0);
870 }
871 
872 
873 /*
874  * The setparms() function is called indirectly by priocntlsys()
875  * through the dotoprocs() function.  setparms() acts as an
876  * intermediary between dotoprocs() and the parmsset() function,
877  * calling parmsset() for each thread in the set and handling
878  * the error returns on their way back up to dotoprocs().
879  */
880 static int
881 setparms(proc_t *targpp, struct stprmargs *stprmp)
882 {
883 	int error = 0;
884 	kthread_t *t;
885 	int err;
886 
887 	mutex_enter(&targpp->p_lock);
888 	if ((t = targpp->p_tlist) == NULL) {
889 		mutex_exit(&targpp->p_lock);
890 		return (0);
891 	}
892 	do {
893 		err = parmsset(stprmp->stp_parmsp, t);
894 		if (error == 0)
895 			error = err;
896 	} while ((t = t->t_forw) != targpp->p_tlist);
897 	mutex_exit(&targpp->p_lock);
898 	if (error) {
899 		if (error == EPERM) {
900 			stprmp->stp_error = EPERM;
901 			return (0);
902 		} else {
903 			return (error);
904 		}
905 	} else
906 		return (0);
907 }
908 
909 int
910 setthreadnice(pcnice_t *pcnice, kthread_t *tp)
911 {
912 	int error;
913 	int nice;
914 	int inc;
915 	id_t rtcid;
916 
917 	ASSERT(MUTEX_HELD(&pidlock));
918 	ASSERT(MUTEX_HELD(&(ttoproc(tp)->p_lock)));
919 
920 	/*
921 	 * The XPG5 standard requires that any realtime process or thread
922 	 * must be unaffected by a call to setpriority().
923 	 */
924 	error = getcidbyname("RT", &rtcid);
925 	if (error == 0 && tp->t_cid == rtcid) {
926 		if (pcnice->pc_op == PC_SETNICE)
927 			return (0);
928 	}
929 
930 	if ((error = CL_DONICE(tp, CRED(), 0, &nice)) != 0)
931 		return (error);
932 
933 	if (pcnice->pc_op == PC_GETNICE) {
934 		/*
935 		 * If there is no change to priority, we should return the
936 		 * highest priority (lowest numerical value) pertaining to
937 		 * any of the specified threads.
938 		 */
939 		if (nice < pcnice->pc_val)
940 			pcnice->pc_val = nice;
941 	} else {
942 		ASSERT(pcnice->pc_op == PC_SETNICE);
943 		/*
944 		 * Try to change the nice value of the thread.
945 		 */
946 		inc = pcnice->pc_val - nice;
947 
948 		error = CL_DONICE(tp, CRED(), inc, &inc);
949 		schedctl_set_cidpri(tp);
950 	}
951 
952 	return (error);
953 }
954 
955 int
956 setprocnice(proc_t *pp, pcnice_t *pcnice)
957 {
958 	kthread_t *tp;
959 	int retval = 0;
960 	int error;
961 
962 	ASSERT(MUTEX_HELD(&pidlock));
963 	mutex_enter(&pp->p_lock);
964 
965 	if ((tp = pp->p_tlist) == NULL) {
966 		mutex_exit(&pp->p_lock);
967 		return (ESRCH);
968 	}
969 
970 	/*
971 	 * Check permissions before changing the nice value.
972 	 */
973 	if (pcnice->pc_op == PC_SETNICE) {
974 		if (!prochasprocperm(pp, curproc, CRED())) {
975 			mutex_exit(&pp->p_lock);
976 			return (EPERM);
977 		}
978 	}
979 
980 	do {
981 		error = setthreadnice(pcnice, tp);
982 		if (error)
983 			retval = error;
984 	} while ((tp = tp->t_forw) != pp->p_tlist);
985 
986 	mutex_exit(&pp->p_lock);
987 	return (retval);
988 }
989 
990 /*
991  * Update the nice value of the specified LWP or set of processes.
992  */
993 static int
994 donice(procset_t *procset, pcnice_t *pcnice)
995 {
996 	int err_proc = 0;
997 	int err_thread = 0;
998 	int err = 0;
999 
1000 	/*
1001 	 * Sanity check.
1002 	 */
1003 	if (pcnice->pc_op != PC_GETNICE && pcnice->pc_op != PC_SETNICE)
1004 		return (EINVAL);
1005 
1006 	/*
1007 	 * If it is PC_GETNICE operation then set pc_val to the largest
1008 	 * possible nice value to help us find the lowest nice value
1009 	 * pertaining to any of the specified processes.
1010 	 */
1011 	if (pcnice->pc_op == PC_GETNICE)
1012 		pcnice->pc_val = NZERO;
1013 
1014 	if (procset->p_lidtype != P_LWPID ||
1015 	    procset->p_ridtype != P_LWPID)
1016 		err_proc = dotoprocs(procset, setprocnice, (char *)pcnice);
1017 
1018 	if (procset->p_lidtype == P_LWPID || procset->p_ridtype == P_LWPID) {
1019 		err_thread = dotolwp(procset, setthreadnice, (char *)pcnice);
1020 		/*
1021 		 * dotolwp() can return with p_lock held.  This is required
1022 		 * for the priocntl GETPARMS case.  So, here we just release
1023 		 * the p_lock.
1024 		 */
1025 		if (MUTEX_HELD(&curproc->p_lock))
1026 			mutex_exit(&curproc->p_lock);
1027 
1028 		/*
1029 		 * If we were called for a single LWP, then ignore ESRCH
1030 		 * returned by the previous dotoprocs() call.
1031 		 */
1032 		if (err_proc == ESRCH)
1033 			err_proc = 0;
1034 	}
1035 
1036 	/*
1037 	 * dotoprocs() ignores the init process if it is in the set, unless
1038 	 * it was the only process found. We want to make sure init is not
1039 	 * excluded if we're going PC_GETNICE operation.
1040 	 */
1041 	if (pcnice->pc_op == PC_GETNICE) {
1042 		proc_t *initpp;
1043 
1044 		mutex_enter(&pidlock);
1045 		if ((initpp = prfind(P_INITPID)) != NULL) {
1046 			mutex_enter(&initpp->p_lock);
1047 			if (procinset(initpp, procset)) {
1048 				mutex_exit(&initpp->p_lock);
1049 				err = setprocnice(initpp, pcnice);
1050 			} else {
1051 				mutex_exit(&initpp->p_lock);
1052 			}
1053 		}
1054 		mutex_exit(&pidlock);
1055 	}
1056 
1057 	/*
1058 	 * We're returning the latest error here that we've got back from
1059 	 * the setthreadnice() or setprocnice(). That is, err_thread and/or
1060 	 * err_proc can be replaced by err.
1061 	 */
1062 	if (!err)
1063 		err = err_thread ? err_thread : err_proc;
1064 
1065 	return (err);
1066 }
1067 
1068 int
1069 setthreadprio(pcprio_t *pcprio, kthread_t *tp)
1070 {
1071 	int prio = 0;
1072 	int incr;
1073 	int error;
1074 
1075 	ASSERT(MUTEX_HELD(&pidlock));
1076 	ASSERT(MUTEX_HELD(&(ttoproc(tp)->p_lock)));
1077 
1078 	if (pcprio->pc_op == PC_SETPRIO && pcprio->pc_cid != tp->t_cid) {
1079 		/*
1080 		 * Target thread must change to new class.
1081 		 * See comments in parmsset(), from where this code was copied.
1082 		 */
1083 		void *bufp = NULL;
1084 		caddr_t clprocp = (caddr_t)tp->t_cldata;
1085 		id_t oldcid = tp->t_cid;
1086 
1087 		error = CL_CANEXIT(tp, NULL);
1088 		if (error)
1089 			return (error);
1090 		if (CL_ALLOC(&bufp, pcprio->pc_cid, KM_NOSLEEP) != 0)
1091 			return (ENOMEM);
1092 		error = CL_ENTERCLASS(tp, pcprio->pc_cid, NULL, CRED(), bufp);
1093 		if (error) {
1094 			CL_FREE(pcprio->pc_cid, bufp);
1095 			return (error);
1096 		}
1097 		CL_EXITCLASS(oldcid, clprocp);
1098 		schedctl_set_cidpri(tp);
1099 	}
1100 
1101 	if ((error = CL_DOPRIO(tp, CRED(), 0, &prio)) != 0)
1102 		return (error);
1103 
1104 	if (pcprio->pc_op == PC_GETPRIO) {
1105 		/*
1106 		 * If we are not setting the priority, we should return the
1107 		 * highest priority pertaining to any of the specified threads.
1108 		 */
1109 		if (prio > pcprio->pc_val) {
1110 			pcprio->pc_cid = tp->t_cid;
1111 			pcprio->pc_val = prio;
1112 		}
1113 	} else if (prio != pcprio->pc_val) {
1114 		/*
1115 		 * Try to change the priority of the thread.
1116 		 */
1117 		incr = pcprio->pc_val - prio;
1118 		error = CL_DOPRIO(tp, CRED(), incr, &prio);
1119 		schedctl_set_cidpri(tp);
1120 	}
1121 
1122 	return (error);
1123 }
1124 
1125 int
1126 setprocprio(proc_t *pp, pcprio_t *pcprio)
1127 {
1128 	kthread_t *tp;
1129 	int retval = 0;
1130 	int error;
1131 
1132 	ASSERT(MUTEX_HELD(&pidlock));
1133 	mutex_enter(&pp->p_lock);
1134 
1135 	if ((tp = pp->p_tlist) == NULL) {
1136 		mutex_exit(&pp->p_lock);
1137 		return (ESRCH);
1138 	}
1139 
1140 	/*
1141 	 * Check permissions before changing the prio value.
1142 	 */
1143 	if (pcprio->pc_op == PC_SETPRIO) {
1144 		if (!prochasprocperm(pp, curproc, CRED())) {
1145 			mutex_exit(&pp->p_lock);
1146 			return (EPERM);
1147 		}
1148 	}
1149 
1150 	do {
1151 		error = setthreadprio(pcprio, tp);
1152 		if (error)
1153 			retval = error;
1154 	} while ((tp = tp->t_forw) != pp->p_tlist);
1155 
1156 	mutex_exit(&pp->p_lock);
1157 	return (retval);
1158 }
1159 
1160 /*
1161  * Set the class and priority of the specified LWP or set of processes.
1162  */
1163 static int
1164 doprio(procset_t *procset, pcprio_t *pcprio)
1165 {
1166 	int err_proc = 0;
1167 	int err_thread = 0;
1168 	int err = 0;
1169 
1170 	/*
1171 	 * Sanity check.
1172 	 */
1173 	if (pcprio->pc_op != PC_GETPRIO && pcprio->pc_op != PC_SETPRIO)
1174 		return (EINVAL);
1175 	if (pcprio->pc_op == PC_SETPRIO &&
1176 	    (pcprio->pc_cid >= loaded_classes || pcprio->pc_cid < 1))
1177 		return (EINVAL);
1178 
1179 	/*
1180 	 * If it is a PC_GETPRIO operation then set pc_val to the smallest
1181 	 * possible prio value to help us find the highest priority
1182 	 * pertaining to any of the specified processes.
1183 	 */
1184 	if (pcprio->pc_op == PC_GETPRIO)
1185 		pcprio->pc_val = SHRT_MIN;
1186 
1187 	if (procset->p_lidtype != P_LWPID ||
1188 	    procset->p_ridtype != P_LWPID)
1189 		err_proc = dotoprocs(procset, setprocprio, (char *)pcprio);
1190 
1191 	if (procset->p_lidtype == P_LWPID || procset->p_ridtype == P_LWPID) {
1192 		err_thread = dotolwp(procset, setthreadprio, (char *)pcprio);
1193 		/*
1194 		 * dotolwp() can return with p_lock held.  This is required
1195 		 * for the priocntl GETPARMS case.  So, here we just release
1196 		 * the p_lock.
1197 		 */
1198 		if (MUTEX_HELD(&curproc->p_lock))
1199 			mutex_exit(&curproc->p_lock);
1200 
1201 		/*
1202 		 * If we were called for a single LWP, then ignore ESRCH
1203 		 * returned by the previous dotoprocs() call.
1204 		 */
1205 		if (err_proc == ESRCH)
1206 			err_proc = 0;
1207 	}
1208 
1209 	/*
1210 	 * dotoprocs() ignores the init process if it is in the set, unless
1211 	 * it was the only process found. We want to make sure init is not
1212 	 * excluded if we're going PC_GETPRIO operation.
1213 	 */
1214 	if (pcprio->pc_op == PC_GETPRIO) {
1215 		proc_t *initpp;
1216 
1217 		mutex_enter(&pidlock);
1218 		if ((initpp = prfind(P_INITPID)) != NULL) {
1219 			mutex_enter(&initpp->p_lock);
1220 			if (procinset(initpp, procset)) {
1221 				mutex_exit(&initpp->p_lock);
1222 				err = setprocprio(initpp, pcprio);
1223 			} else {
1224 				mutex_exit(&initpp->p_lock);
1225 			}
1226 		}
1227 		mutex_exit(&pidlock);
1228 	}
1229 
1230 	/*
1231 	 * We're returning the latest error here that we've got back from
1232 	 * the setthreadprio() or setprocprio(). That is, err_thread and/or
1233 	 * err_proc can be replaced by err.
1234 	 */
1235 	if (!err)
1236 		err = err_thread ? err_thread : err_proc;
1237 
1238 	return (err);
1239 }
1240