xref: /illumos-gate/usr/src/uts/common/disp/priocntl.c (revision 4c28a617e3922d92a58e813a5b955eb526b9c386)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  * Copyright (c) 2016 by Delphix. All rights reserved.
26  */
27 
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved  	*/
30 
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/sysmacros.h>
34 #include <sys/signal.h>
35 #include <sys/pcb.h>
36 #include <sys/user.h>
37 #include <sys/systm.h>
38 #include <sys/sysinfo.h>
39 #include <sys/var.h>
40 #include <sys/errno.h>
41 #include <sys/cred.h>
42 #include <sys/proc.h>
43 #include <sys/procset.h>
44 #include <sys/debug.h>
45 #include <sys/inline.h>
46 #include <sys/priocntl.h>
47 #include <sys/disp.h>
48 #include <sys/class.h>
49 #include <sys/modctl.h>
50 #include <sys/t_lock.h>
51 #include <sys/uadmin.h>
52 #include <sys/cmn_err.h>
53 #include <sys/policy.h>
54 #include <sys/schedctl.h>
55 
56 /*
57  * Structure used to pass arguments to the proccmp() function.
58  * The arguments must be passed in a structure because proccmp()
59  * is called indirectly through the dotoprocs() function which
60  * will only pass through a single one word argument.
61  */
62 struct pcmpargs {
63 	id_t	*pcmp_cidp;
64 	int	*pcmp_cntp;
65 	kthread_t **pcmp_retthreadp;
66 };
67 
68 /*
69  * Structure used to pass arguments to the setparms() function
70  * which is called indirectly through dotoprocs().
71  */
72 struct stprmargs {
73 	struct pcparms	*stp_parmsp;	/* pointer to parameters */
74 	int		stp_error;	/* some errors returned here */
75 };
76 
77 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
78 /*
79  * A vaparm_t is an int followed by a long long -- this packs differently
80  * between the 64-bit kernel ABI and the 32-bit user ABI.
81  */
82 static int
83 copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap, uio_seg_t seg)
84 {
85 	pc_vaparms32_t vaparms32;
86 	pc_vaparm32_t *src;
87 	pc_vaparm_t *dst;
88 	uint_t cnt;
89 
90 	ASSERT(get_udatamodel() == DATAMODEL_ILP32);
91 
92 	if ((seg == UIO_USERSPACE ? copyin : kcopy)(arg, &vaparms32,
93 	    sizeof (vaparms32)))
94 		return (EFAULT);
95 
96 	vap->pc_vaparmscnt = vaparms32.pc_vaparmscnt;
97 	if ((cnt = vaparms32.pc_vaparmscnt) > PC_VAPARMCNT)
98 		cnt = PC_VAPARMCNT;
99 	for (src = vaparms32.pc_parms, dst = vap->pc_parms;
100 	    cnt--; src++, dst++) {
101 		dst->pc_key = src->pc_key;
102 		dst->pc_parm = src->pc_parm;
103 	}
104 	return (0);
105 }
106 
107 #define	COPYIN_VAPARMS(arg, vap, size, seg)	\
108 	(get_udatamodel() == DATAMODEL_NATIVE ?	\
109 	(*copyinfn)(arg, vap, size) : copyin_vaparms32(arg, vap, seg))
110 
111 #else
112 
113 #define	COPYIN_VAPARMS(arg, vap, size, seg)	(*copyinfn)(arg, vap, size)
114 
115 #endif
116 
117 static int donice(procset_t *, pcnice_t *);
118 static int doprio(procset_t *, pcprio_t *);
119 static int proccmp(proc_t *, struct pcmpargs *);
120 static int setparms(proc_t *, struct stprmargs *);
121 extern int threadcmp(struct pcmpargs *, kthread_t *);
122 
123 /*
124  * The priocntl system call.
125  */
126 long
127 priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg,
128     caddr_t arg2, uio_seg_t seg)
129 {
130 	pcinfo_t		pcinfo;
131 	pcparms_t		pcparms;
132 	pcnice_t		pcnice;
133 	pcprio_t		pcprio;
134 	pcadmin_t		pcadmin;
135 	pcpri_t			pcpri;
136 	procset_t		procset;
137 	struct stprmargs	stprmargs;
138 	struct pcmpargs		pcmpargs;
139 	pc_vaparms_t		vaparms;
140 	char			clname[PC_CLNMSZ];
141 	char			*outstr;
142 	int			count;
143 	kthread_t		*retthreadp;
144 	proc_t			*initpp;
145 	int			clnullflag;
146 	int			error = 0;
147 	int			error1 = 0;
148 	int			rv = 0;
149 	pid_t			saved_pid;
150 	id_t			classid;
151 	int			size;
152 	int (*copyinfn)(const void *, void *, size_t);
153 	int (*copyoutfn)(const void *, void *, size_t);
154 
155 	/*
156 	 * First just check the version number. Right now there is only
157 	 * one version we know about and support.  If we get some other
158 	 * version number from the application it may be that the
159 	 * application was built with some future version and is trying
160 	 * to run on an old release of the system (that's us).  In any
161 	 * case if we don't recognize the version number all we can do is
162 	 * return error.
163 	 */
164 	if (pc_version != PC_VERSION)
165 		return (set_errno(EINVAL));
166 
167 	if (seg == UIO_USERSPACE) {
168 		copyinfn = copyin;
169 		copyoutfn = copyout;
170 	} else {
171 		copyinfn = kcopy;
172 		copyoutfn = kcopy;
173 	}
174 
175 	switch (cmd) {
176 	case PC_GETCID:
177 		/*
178 		 * If the arg pointer is NULL, the user just wants to
179 		 * know the number of classes. If non-NULL, the pointer
180 		 * should point to a valid user pcinfo buffer.  In the
181 		 * dynamic world we need to return the number of loaded
182 		 * classes, not the max number of available classes that
183 		 * can be loaded.
184 		 */
185 		if (arg == NULL) {
186 			rv = loaded_classes;
187 			break;
188 		} else {
189 			if ((*copyinfn)(arg, &pcinfo, sizeof (pcinfo)))
190 				return (set_errno(EFAULT));
191 		}
192 
193 		pcinfo.pc_clname[PC_CLNMSZ-1] = '\0';
194 
195 		/*
196 		 * Get the class ID corresponding to user supplied name.
197 		 */
198 		error = getcid(pcinfo.pc_clname, &pcinfo.pc_cid);
199 		if (error)
200 			return (set_errno(error));
201 
202 		/*
203 		 * Can't get info about the sys class.
204 		 */
205 		if (pcinfo.pc_cid == 0)
206 			return (set_errno(EINVAL));
207 
208 		/*
209 		 * Get the class specific information.
210 		 * we MUST make sure that the class has not already
211 		 * been unloaded before we try the CL_GETCLINFO.
212 		 * If it has then we need to load it.
213 		 */
214 		error =
215 		    scheduler_load(pcinfo.pc_clname, &sclass[pcinfo.pc_cid]);
216 		if (error)
217 			return (set_errno(error));
218 		error = CL_GETCLINFO(&sclass[pcinfo.pc_cid], pcinfo.pc_clinfo);
219 		if (error)
220 			return (set_errno(error));
221 
222 		if ((*copyoutfn)(&pcinfo, arg, sizeof (pcinfo)))
223 			return (set_errno(EFAULT));
224 
225 		rv = loaded_classes;
226 
227 		break;
228 
229 	case PC_GETCLINFO:
230 		/*
231 		 * If the arg pointer is NULL, the user just wants to know
232 		 * the number of classes. If non-NULL, the pointer should
233 		 * point to a valid user pcinfo buffer.
234 		 */
235 		if (arg == NULL) {
236 			rv = loaded_classes;
237 			break;
238 		} else {
239 			if ((*copyinfn)(arg, &pcinfo, sizeof (pcinfo)))
240 				return (set_errno(EFAULT));
241 		}
242 
243 		if (pcinfo.pc_cid >= loaded_classes || pcinfo.pc_cid < 1)
244 			return (set_errno(EINVAL));
245 
246 		(void) strncpy(pcinfo.pc_clname, sclass[pcinfo.pc_cid].cl_name,
247 		    PC_CLNMSZ);
248 
249 		/*
250 		 * Get the class specific information.  we MUST make sure
251 		 * that the class has not already been unloaded before we
252 		 * try the CL_GETCLINFO.  If it has then we need to load
253 		 * it.
254 		 */
255 		error =
256 		    scheduler_load(pcinfo.pc_clname, &sclass[pcinfo.pc_cid]);
257 		if (error)
258 			return (set_errno(error));
259 		error = CL_GETCLINFO(&sclass[pcinfo.pc_cid], pcinfo.pc_clinfo);
260 		if (error)
261 			return (set_errno(error));
262 
263 		if ((*copyoutfn)(&pcinfo, arg, sizeof (pcinfo)))
264 			return (set_errno(EFAULT));
265 
266 		rv = loaded_classes;
267 		break;
268 
269 	case PC_SETPARMS:
270 	case PC_SETXPARMS:
271 		/*
272 		 * First check the validity of the parameters we got from
273 		 * the user.  We don't do any permissions checking here
274 		 * because it's done on a per thread basis by parmsset().
275 		 */
276 		if (cmd == PC_SETPARMS) {
277 			if ((*copyinfn)(arg, &pcparms, sizeof (pcparms)))
278 				return (set_errno(EFAULT));
279 
280 			error = parmsin(&pcparms, NULL);
281 		} else {
282 			if ((*copyinfn)(arg, clname, PC_CLNMSZ) ||
283 			    COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms),
284 			    seg))
285 				return (set_errno(EFAULT));
286 			clname[PC_CLNMSZ-1] = '\0';
287 
288 			if (getcid(clname, &pcparms.pc_cid))
289 				return (set_errno(EINVAL));
290 
291 			error = parmsin(&pcparms, &vaparms);
292 		}
293 
294 		if (error)
295 			return (set_errno(error));
296 
297 		/*
298 		 * Get the procset from the user.
299 		 */
300 		if ((*copyinfn)(psp, &procset, sizeof (procset)))
301 			return (set_errno(EFAULT));
302 
303 		/*
304 		 * For performance we do a quick check here to catch
305 		 * common cases where the current thread is the only one
306 		 * in the set.  In such cases we can call parmsset()
307 		 * directly, avoiding the relatively lengthy path through
308 		 * dotoprocs().  The underlying classes expect pidlock to
309 		 * be held.
310 		 */
311 		if (cur_inset_only(&procset) == B_TRUE) {
312 			/* do a single LWP */
313 			if ((procset.p_lidtype == P_LWPID) ||
314 			    (procset.p_ridtype == P_LWPID)) {
315 				mutex_enter(&pidlock);
316 				mutex_enter(&curproc->p_lock);
317 				error = parmsset(&pcparms, curthread);
318 				mutex_exit(&curproc->p_lock);
319 				mutex_exit(&pidlock);
320 			} else {
321 				/* do the entire process otherwise */
322 				stprmargs.stp_parmsp = &pcparms;
323 				stprmargs.stp_error = 0;
324 				mutex_enter(&pidlock);
325 				error = setparms(curproc, &stprmargs);
326 				mutex_exit(&pidlock);
327 				if (error == 0 && stprmargs.stp_error != 0)
328 					error = stprmargs.stp_error;
329 			}
330 			if (error)
331 				return (set_errno(error));
332 		} else {
333 			stprmargs.stp_parmsp = &pcparms;
334 			stprmargs.stp_error = 0;
335 
336 			error1 = error = ESRCH;
337 
338 			/*
339 			 * The dotoprocs() call below will cause
340 			 * setparms() to be called for each thread in the
341 			 * specified procset. setparms() will in turn
342 			 * call parmsset() (which does the real work).
343 			 */
344 			if ((procset.p_lidtype != P_LWPID) ||
345 			    (procset.p_ridtype != P_LWPID)) {
346 				error1 = dotoprocs(&procset, setparms,
347 				    (char *)&stprmargs);
348 			}
349 
350 			/*
351 			 * take care of the case when any of the
352 			 * operands happen to be LWP's
353 			 */
354 
355 			if ((procset.p_lidtype == P_LWPID) ||
356 			    (procset.p_ridtype == P_LWPID)) {
357 				error = dotolwp(&procset, parmsset,
358 				    (char *)&pcparms);
359 				/*
360 				 * Dotolwp() returns with p_lock held.
361 				 * This is required for the GETPARMS case
362 				 * below. So, here we just release the
363 				 * p_lock.
364 				 */
365 				if (MUTEX_HELD(&curproc->p_lock))
366 					mutex_exit(&curproc->p_lock);
367 			}
368 
369 			/*
370 			 * If setparms() encounters a permissions error
371 			 * for one or more of the threads it returns
372 			 * EPERM in stp_error so dotoprocs() will
373 			 * continue through the thread set.  If
374 			 * dotoprocs() returned an error above, it was
375 			 * more serious than permissions and dotoprocs
376 			 * quit when the error was encountered.  We
377 			 * return the more serious error if there was
378 			 * one, otherwise we return EPERM if we got that
379 			 * back.
380 			 */
381 			if (error1 != ESRCH)
382 				error = error1;
383 			if (error == 0 && stprmargs.stp_error != 0)
384 				error = stprmargs.stp_error;
385 		}
386 		break;
387 
388 	case PC_GETPARMS:
389 	case PC_GETXPARMS:
390 		if (cmd == PC_GETPARMS) {
391 			if ((*copyinfn)(arg, &pcparms, sizeof (pcparms)))
392 				return (set_errno(EFAULT));
393 		} else {
394 			if (arg != NULL) {
395 				if ((*copyinfn)(arg, clname, PC_CLNMSZ))
396 					return (set_errno(EFAULT));
397 
398 				clname[PC_CLNMSZ-1] = '\0';
399 
400 				if (getcid(clname, &pcparms.pc_cid))
401 					return (set_errno(EINVAL));
402 			} else
403 				pcparms.pc_cid = PC_CLNULL;
404 
405 			if (COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms),
406 			    seg))
407 				return (set_errno(EFAULT));
408 		}
409 
410 		if (pcparms.pc_cid >= loaded_classes ||
411 		    (pcparms.pc_cid < 1 && pcparms.pc_cid != PC_CLNULL))
412 			return (set_errno(EINVAL));
413 
414 		if ((*copyinfn)(psp, &procset, sizeof (procset)))
415 			return (set_errno(EFAULT));
416 
417 		/*
418 		 * Check to see if the current thread is the only one
419 		 * in the set. If not we must go through the whole set
420 		 * to select a thread.
421 		 */
422 		if (cur_inset_only(&procset) == B_TRUE) {
423 			/* do a single LWP */
424 			if ((procset.p_lidtype == P_LWPID) ||
425 			    (procset.p_ridtype == P_LWPID)) {
426 				if (pcparms.pc_cid != PC_CLNULL &&
427 				    pcparms.pc_cid != curthread->t_cid) {
428 					/*
429 					 * Specified thread not in
430 					 * specified class.
431 					 */
432 					return (set_errno(ESRCH));
433 				} else {
434 					mutex_enter(&curproc->p_lock);
435 					retthreadp = curthread;
436 				}
437 			} else {
438 				count = 0;
439 				retthreadp = NULL;
440 				pcmpargs.pcmp_cidp = &pcparms.pc_cid;
441 				pcmpargs.pcmp_cntp = &count;
442 				pcmpargs.pcmp_retthreadp = &retthreadp;
443 				/*
444 				 * Specified thread not in specified class.
445 				 */
446 				if (pcparms.pc_cid != PC_CLNULL &&
447 				    pcparms.pc_cid != curthread->t_cid)
448 					return (set_errno(ESRCH));
449 				error = proccmp(curproc, &pcmpargs);
450 				if (error) {
451 					if (retthreadp != NULL)
452 						mutex_exit(&(curproc->p_lock));
453 					return (set_errno(error));
454 				}
455 			}
456 		} else {
457 			/*
458 			 * get initpp early to avoid lock ordering problems
459 			 * (we cannot get pidlock while holding any p_lock).
460 			 */
461 			mutex_enter(&pidlock);
462 			initpp = prfind(P_INITPID);
463 			mutex_exit(&pidlock);
464 
465 			/*
466 			 * Select the thread (from the set) whose
467 			 * parameters we are going to return.  First we
468 			 * set up some locations for return values, then
469 			 * we call proccmp() indirectly through
470 			 * dotoprocs().  proccmp() will call a class
471 			 * specific routine which actually does the
472 			 * selection.  To understand how this works take
473 			 * a careful look at the code below, the
474 			 * dotoprocs() function, the proccmp() function,
475 			 * and the class specific cl_proccmp() functions.
476 			 */
477 			if (pcparms.pc_cid == PC_CLNULL)
478 				clnullflag = 1;
479 			else
480 				clnullflag = 0;
481 			count = 0;
482 			retthreadp = NULL;
483 			pcmpargs.pcmp_cidp = &pcparms.pc_cid;
484 			pcmpargs.pcmp_cntp = &count;
485 			pcmpargs.pcmp_retthreadp = &retthreadp;
486 			error1 = error = ESRCH;
487 
488 			if ((procset.p_lidtype != P_LWPID) ||
489 			    (procset.p_ridtype != P_LWPID)) {
490 				error1 = dotoprocs(&procset, proccmp,
491 				    (char *)&pcmpargs);
492 			}
493 
494 			/*
495 			 * take care of combination of LWP and process
496 			 * set case in a procset
497 			 */
498 			if ((procset.p_lidtype == P_LWPID) ||
499 			    (procset.p_ridtype == P_LWPID)) {
500 				error = dotolwp(&procset, threadcmp,
501 				    (char *)&pcmpargs);
502 			}
503 
504 			/*
505 			 * Both proccmp() and threadcmp() return with the
506 			 * p_lock held for the ttoproc(retthreadp). This
507 			 * is required to make sure that the process we
508 			 * chose as the winner doesn't go away
509 			 * i.e. retthreadp has to be a valid pointer.
510 			 *
511 			 * The case below can only happen if the thread
512 			 * with the highest priority was not in your
513 			 * process.  In that case, dotolwp will return
514 			 * holding p_lock for both your process as well
515 			 * as the process in which retthreadp is a
516 			 * thread.
517 			 */
518 			if ((retthreadp != NULL) &&
519 			    (ttoproc(retthreadp) != curproc) &&
520 			    MUTEX_HELD(&(curproc)->p_lock))
521 				mutex_exit(&(curproc)->p_lock);
522 
523 			ASSERT(retthreadp == NULL ||
524 			    MUTEX_HELD(&(ttoproc(retthreadp)->p_lock)));
525 			if (error1 != ESRCH)
526 				error = error1;
527 			if (error) {
528 				if (retthreadp != NULL)
529 				    /* CSTYLED */
530 				    mutex_exit(&(ttoproc(retthreadp)->p_lock));
531 				ASSERT(MUTEX_NOT_HELD(&(curproc)->p_lock));
532 				return (set_errno(error));
533 			}
534 			/*
535 			 * dotoprocs() ignores the init process if it is
536 			 * in the set, unless it was the only process found.
537 			 * Since we are getting parameters here rather than
538 			 * setting them, we want to make sure init is not
539 			 * excluded if it is in the set.
540 			 */
541 			if (initpp != NULL && retthreadp != NULL &&
542 			    ttoproc(retthreadp) != initpp) {
543 				mutex_enter(&initpp->p_lock);
544 				if (procinset(initpp, &procset)) {
545 					mutex_exit(&initpp->p_lock);
546 					(void) proccmp(initpp, &pcmpargs);
547 				} else {
548 					mutex_exit(&initpp->p_lock);
549 				}
550 			}
551 
552 			/*
553 			 * If dotoprocs returned success it found at least
554 			 * one thread in the set.  If proccmp() failed to
555 			 * select a thread it is because the user specified
556 			 * a class and none of the threads in the set
557 			 * belonged to that class, or because the process
558 			 * specified was in the middle of exiting and had
559 			 * cleared its thread list.
560 			 */
561 			if (retthreadp == NULL) {
562 				/*
563 				 * Might be here and still holding p_lock
564 				 * if we did a dotolwp on an lwp that
565 				 * existed but was in the wrong class.
566 				 */
567 				if (MUTEX_HELD(&(curproc)->p_lock))
568 					mutex_exit(&(curproc)->p_lock);
569 				return (set_errno(ESRCH));
570 			}
571 
572 			/*
573 			 * User can only use PC_CLNULL with one thread in set.
574 			 */
575 			if (clnullflag && count > 1) {
576 				if (retthreadp != NULL)
577 					mutex_exit(
578 					    &(ttoproc(retthreadp)->p_lock));
579 				ASSERT(MUTEX_NOT_HELD(&(curproc)->p_lock));
580 				return (set_errno(EINVAL));
581 			}
582 		}
583 
584 		ASSERT(retthreadp == NULL ||
585 		    MUTEX_HELD(&(ttoproc(retthreadp)->p_lock)));
586 		/*
587 		 * It is possible to have retthreadp == NULL. Proccmp()
588 		 * in the rare case (p_tlist == NULL) could return without
589 		 * setting a value for retthreadp.
590 		 */
591 		if (retthreadp == NULL) {
592 			ASSERT(MUTEX_NOT_HELD(&(curproc)->p_lock));
593 			return (set_errno(ESRCH));
594 		}
595 		/*
596 		 * We've selected a thread so now get the parameters.
597 		 */
598 		parmsget(retthreadp, &pcparms);
599 
600 		/*
601 		 * Prepare to return parameters to the user
602 		 */
603 		error = parmsout(&pcparms,
604 		    (cmd == PC_GETPARMS ? NULL : &vaparms));
605 
606 		/*
607 		 * Save pid of selected thread before dropping p_lock.
608 		 */
609 		saved_pid = ttoproc(retthreadp)->p_pid;
610 		mutex_exit(&(ttoproc(retthreadp)->p_lock));
611 		ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
612 
613 		if (error)
614 			return (set_errno(error));
615 
616 		if (cmd == PC_GETPARMS) {
617 			if ((*copyoutfn)(&pcparms, arg, sizeof (pcparms)))
618 				return (set_errno(EFAULT));
619 		} else if ((error = vaparmsout(arg, &pcparms, &vaparms,
620 		    seg)) != 0)
621 			return (set_errno(error));
622 
623 		/*
624 		 * And finally, return the pid of the selected thread.
625 		 */
626 		rv = saved_pid;
627 		break;
628 
629 	case PC_ADMIN:
630 		if (get_udatamodel() == DATAMODEL_NATIVE) {
631 			if ((*copyinfn)(arg, &pcadmin, sizeof (pcadmin_t)))
632 				return (set_errno(EFAULT));
633 #ifdef _SYSCALL32_IMPL
634 		} else {
635 			/* pcadmin struct from ILP32 callers */
636 			pcadmin32_t pcadmin32;
637 
638 			if ((*copyinfn)(arg, &pcadmin32, sizeof (pcadmin32_t)))
639 				return (set_errno(EFAULT));
640 			pcadmin.pc_cid = pcadmin32.pc_cid;
641 			pcadmin.pc_cladmin = (caddr_t)(uintptr_t)
642 			    pcadmin32.pc_cladmin;
643 #endif /* _SYSCALL32_IMPL */
644 		}
645 
646 		if (pcadmin.pc_cid >= loaded_classes ||
647 		    pcadmin.pc_cid < 1)
648 			return (set_errno(EINVAL));
649 
650 		/*
651 		 * Have the class do whatever the user is requesting.
652 		 */
653 		mutex_enter(&ualock);
654 		error = CL_ADMIN(&sclass[pcadmin.pc_cid], pcadmin.pc_cladmin,
655 		    CRED());
656 		mutex_exit(&ualock);
657 		break;
658 
659 	case PC_GETPRIRANGE:
660 		if ((*copyinfn)(arg, &pcpri, sizeof (pcpri_t)))
661 			return (set_errno(EFAULT));
662 
663 		if (pcpri.pc_cid >= loaded_classes || pcpri.pc_cid < 0)
664 			return (set_errno(EINVAL));
665 
666 		error = CL_GETCLPRI(&sclass[pcpri.pc_cid], &pcpri);
667 		if (!error) {
668 			if ((*copyoutfn)(&pcpri, arg, sizeof (pcpri)))
669 				return (set_errno(EFAULT));
670 		}
671 		break;
672 
673 	case PC_DONICE:
674 		/*
675 		 * Get pcnice and procset structures from the user.
676 		 */
677 		if ((*copyinfn)(arg, &pcnice, sizeof (pcnice)) ||
678 		    (*copyinfn)(psp, &procset, sizeof (procset)))
679 			return (set_errno(EFAULT));
680 
681 		error = donice(&procset, &pcnice);
682 
683 		if (!error && (pcnice.pc_op == PC_GETNICE)) {
684 			if ((*copyoutfn)(&pcnice, arg, sizeof (pcnice)))
685 				return (set_errno(EFAULT));
686 		}
687 		break;
688 
689 	case PC_DOPRIO:
690 		/*
691 		 * Get pcprio and procset structures from the user.
692 		 */
693 		if ((*copyinfn)(arg, &pcprio, sizeof (pcprio)) ||
694 		    (*copyinfn)(psp, &procset, sizeof (procset)))
695 			return (set_errno(EFAULT));
696 
697 		error = doprio(&procset, &pcprio);
698 
699 		if (!error && (pcprio.pc_op == PC_GETPRIO)) {
700 			if ((*copyoutfn)(&pcprio, arg, sizeof (pcprio)))
701 				return (set_errno(EFAULT));
702 		}
703 		break;
704 
705 	case PC_SETDFLCL:
706 		if (secpolicy_dispadm(CRED()) != 0)
707 			return (set_errno(EPERM));
708 
709 		if (copyin(arg, (caddr_t)clname, PC_CLNMSZ) != 0)
710 			return (set_errno(EFAULT));
711 		clname[PC_CLNMSZ-1] = '\0';
712 
713 		if (getcid(clname, &classid) != 0)
714 			return (set_errno(EINVAL));
715 		if (CLASS_KERNEL(classid))
716 			return (set_errno(EINVAL));
717 		defaultcid = classid;
718 		ASSERT(defaultcid > 0 && defaultcid < loaded_classes);
719 		break;
720 
721 	case PC_GETDFLCL:
722 		mutex_enter(&class_lock);
723 
724 		if (defaultcid >= loaded_classes)
725 			outstr = "";
726 		else
727 			outstr = sclass[defaultcid].cl_name;
728 		size = strlen(outstr) + 1;
729 		if (arg != NULL)
730 			if ((*copyoutfn)(outstr, arg, size) != 0)
731 				error = EFAULT;
732 
733 		mutex_exit(&class_lock);
734 		break;
735 
736 	default:
737 		error = EINVAL;
738 		break;
739 	}
740 	return (error ? (set_errno(error)) : rv);
741 }
742 
743 long
744 priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
745 {
746 	return (priocntl_common(pc_version, psp, cmd, arg, arg2,
747 	    UIO_USERSPACE));
748 }
749 
750 /*
751  * The proccmp() function is part of the implementation of the
752  * PC_GETPARMS command of the priocntl system call.  This function works
753  * with the system call code and with the class specific cl_globpri()
754  * function to select one thread from a specified procset based on class
755  * specific criteria. proccmp() is called indirectly from the priocntl
756  * code through the dotoprocs function.  Basic strategy is dotoprocs()
757  * calls us once for each thread in the set.  We in turn call the class
758  * specific function to compare the current thread from dotoprocs to the
759  * "best" (according to the class criteria) found so far.  We keep the
760  * "best" thread in *pcmp_retthreadp.
761  */
762 static int
763 proccmp(proc_t *pp, struct pcmpargs *argp)
764 {
765 	kthread_t	*tx;
766 	kthread_t	*ty;
767 	int		last_pri = -1;
768 	int		tx_pri;
769 	int		found = 0;
770 
771 	mutex_enter(&pp->p_lock);
772 
773 	if (pp->p_tlist == NULL) {
774 		mutex_exit(&pp->p_lock);
775 		return (0);
776 	}
777 	(*argp->pcmp_cntp)++;	/* Increment count of procs in the set */
778 
779 	if (*argp->pcmp_cidp == PC_CLNULL) {
780 		/*
781 		 * If no cid is specified, then lets just pick the first one.
782 		 * It doesn't matter because if the number of processes in the
783 		 * set are more than 1, then we return EINVAL in priocntlsys.
784 		 */
785 		*argp->pcmp_cidp = pp->p_tlist->t_cid;
786 	}
787 	ty = tx = pp->p_tlist;
788 	do {
789 		if (tx->t_cid == *argp->pcmp_cidp) {
790 			/*
791 			 * We found one which matches the required cid.
792 			 */
793 			found = 1;
794 			if ((tx_pri = CL_GLOBPRI(tx)) > last_pri) {
795 				last_pri = tx_pri;
796 				ty = tx;
797 			}
798 		}
799 	} while ((tx = tx->t_forw) != pp->p_tlist);
800 	if (found) {
801 		if (*argp->pcmp_retthreadp == NULL) {
802 			/*
803 			 * First time through for this set.
804 			 * keep the mutex held. It might be the one!
805 			 */
806 			*argp->pcmp_retthreadp = ty;
807 		} else {
808 			tx = *argp->pcmp_retthreadp;
809 			if (CL_GLOBPRI(ty) <= CL_GLOBPRI(tx)) {
810 				mutex_exit(&pp->p_lock);
811 			} else {
812 				mutex_exit(&(ttoproc(tx)->p_lock));
813 				*argp->pcmp_retthreadp = ty;
814 			}
815 		}
816 	} else {
817 		/*
818 		 * We actually didn't find anything of the same cid in
819 		 * this process.
820 		 */
821 		mutex_exit(&pp->p_lock);
822 	}
823 	return (0);
824 }
825 
826 
827 int
828 threadcmp(struct pcmpargs *argp, kthread_t *tp)
829 {
830 	kthread_t	*tx;
831 	proc_t		*pp;
832 
833 	ASSERT(MUTEX_HELD(&(ttoproc(tp))->p_lock));
834 
835 	(*argp->pcmp_cntp)++;   /* Increment count of procs in the set */
836 	if (*argp->pcmp_cidp == PC_CLNULL) {
837 		/*
838 		 * If no cid is specified, then lets just pick the first one.
839 		 * It doesn't matter because if the number of threads in the
840 		 * set are more than 1, then we return EINVAL in priocntlsys.
841 		 */
842 		*argp->pcmp_cidp = tp->t_cid;
843 	}
844 	if (tp->t_cid == *argp->pcmp_cidp) {
845 		if (*argp->pcmp_retthreadp == NULL) {
846 			/*
847 			 * First time through for this set.
848 			 */
849 			*argp->pcmp_retthreadp = tp;
850 		} else {
851 			tx = *argp->pcmp_retthreadp;
852 			if (CL_GLOBPRI(tp) > CL_GLOBPRI(tx)) {
853 				/*
854 				 * Unlike proccmp(), we don't release the
855 				 * p_lock of the ttoproc(tp) if tp's global
856 				 * priority is less than tx's. We need to go
857 				 * through the entire list before we can do
858 				 * that. The p_lock is released by the caller
859 				 * of dotolwp().
860 				 */
861 				pp = ttoproc(tx);
862 				ASSERT(MUTEX_HELD(&pp->p_lock));
863 				if (pp != curproc) {
864 					mutex_exit(&pp->p_lock);
865 				}
866 				*argp->pcmp_retthreadp = tp;
867 			}
868 		}
869 	}
870 	return (0);
871 }
872 
873 
874 /*
875  * The setparms() function is called indirectly by priocntlsys()
876  * through the dotoprocs() function.  setparms() acts as an
877  * intermediary between dotoprocs() and the parmsset() function,
878  * calling parmsset() for each thread in the set and handling
879  * the error returns on their way back up to dotoprocs().
880  */
881 static int
882 setparms(proc_t *targpp, struct stprmargs *stprmp)
883 {
884 	int error = 0;
885 	kthread_t *t;
886 	int err;
887 
888 	mutex_enter(&targpp->p_lock);
889 	if ((t = targpp->p_tlist) == NULL) {
890 		mutex_exit(&targpp->p_lock);
891 		return (0);
892 	}
893 	do {
894 		err = parmsset(stprmp->stp_parmsp, t);
895 		if (error == 0)
896 			error = err;
897 	} while ((t = t->t_forw) != targpp->p_tlist);
898 	mutex_exit(&targpp->p_lock);
899 	if (error) {
900 		if (error == EPERM) {
901 			stprmp->stp_error = EPERM;
902 			return (0);
903 		} else {
904 			return (error);
905 		}
906 	} else
907 		return (0);
908 }
909 
910 int
911 setthreadnice(pcnice_t *pcnice, kthread_t *tp)
912 {
913 	int error;
914 	int nice;
915 	int inc;
916 	id_t rtcid;
917 
918 	ASSERT(MUTEX_HELD(&pidlock));
919 	ASSERT(MUTEX_HELD(&(ttoproc(tp)->p_lock)));
920 
921 	/*
922 	 * The XPG5 standard requires that any realtime process or thread
923 	 * must be unaffected by a call to setpriority().
924 	 */
925 	error = getcidbyname("RT", &rtcid);
926 	if (error == 0 && tp->t_cid == rtcid) {
927 		if (pcnice->pc_op == PC_SETNICE)
928 			return (0);
929 	}
930 
931 	if ((error = CL_DONICE(tp, CRED(), 0, &nice)) != 0)
932 		return (error);
933 
934 	if (pcnice->pc_op == PC_GETNICE) {
935 		/*
936 		 * If there is no change to priority, we should return the
937 		 * highest priority (lowest numerical value) pertaining to
938 		 * any of the specified threads.
939 		 */
940 		if (nice < pcnice->pc_val)
941 			pcnice->pc_val = nice;
942 	} else {
943 		ASSERT(pcnice->pc_op == PC_SETNICE);
944 		/*
945 		 * Try to change the nice value of the thread.
946 		 */
947 		inc = pcnice->pc_val - nice;
948 
949 		error = CL_DONICE(tp, CRED(), inc, &inc);
950 		schedctl_set_cidpri(tp);
951 	}
952 
953 	return (error);
954 }
955 
956 int
957 setprocnice(proc_t *pp, pcnice_t *pcnice)
958 {
959 	kthread_t *tp;
960 	int retval = 0;
961 	int error;
962 
963 	ASSERT(MUTEX_HELD(&pidlock));
964 	mutex_enter(&pp->p_lock);
965 
966 	if ((tp = pp->p_tlist) == NULL) {
967 		mutex_exit(&pp->p_lock);
968 		return (ESRCH);
969 	}
970 
971 	/*
972 	 * Check permissions before changing the nice value.
973 	 */
974 	if (pcnice->pc_op == PC_SETNICE) {
975 		if (!prochasprocperm(pp, curproc, CRED())) {
976 			mutex_exit(&pp->p_lock);
977 			return (EPERM);
978 		}
979 	}
980 
981 	do {
982 		error = setthreadnice(pcnice, tp);
983 		if (error)
984 			retval = error;
985 	} while ((tp = tp->t_forw) != pp->p_tlist);
986 
987 	mutex_exit(&pp->p_lock);
988 	return (retval);
989 }
990 
991 /*
992  * Update the nice value of the specified LWP or set of processes.
993  */
994 static int
995 donice(procset_t *procset, pcnice_t *pcnice)
996 {
997 	int err_proc = 0;
998 	int err_thread = 0;
999 	int err = 0;
1000 
1001 	/*
1002 	 * Sanity check.
1003 	 */
1004 	if (pcnice->pc_op != PC_GETNICE && pcnice->pc_op != PC_SETNICE)
1005 		return (EINVAL);
1006 
1007 	/*
1008 	 * If it is PC_GETNICE operation then set pc_val to the largest
1009 	 * possible nice value to help us find the lowest nice value
1010 	 * pertaining to any of the specified processes.
1011 	 */
1012 	if (pcnice->pc_op == PC_GETNICE)
1013 		pcnice->pc_val = NZERO;
1014 
1015 	if (procset->p_lidtype != P_LWPID ||
1016 	    procset->p_ridtype != P_LWPID)
1017 		err_proc = dotoprocs(procset, setprocnice, (char *)pcnice);
1018 
1019 	if (procset->p_lidtype == P_LWPID || procset->p_ridtype == P_LWPID) {
1020 		err_thread = dotolwp(procset, setthreadnice, (char *)pcnice);
1021 		/*
1022 		 * dotolwp() can return with p_lock held.  This is required
1023 		 * for the priocntl GETPARMS case.  So, here we just release
1024 		 * the p_lock.
1025 		 */
1026 		if (MUTEX_HELD(&curproc->p_lock))
1027 			mutex_exit(&curproc->p_lock);
1028 
1029 		/*
1030 		 * If we were called for a single LWP, then ignore ESRCH
1031 		 * returned by the previous dotoprocs() call.
1032 		 */
1033 		if (err_proc == ESRCH)
1034 			err_proc = 0;
1035 	}
1036 
1037 	/*
1038 	 * dotoprocs() ignores the init process if it is in the set, unless
1039 	 * it was the only process found. We want to make sure init is not
1040 	 * excluded if we're going PC_GETNICE operation.
1041 	 */
1042 	if (pcnice->pc_op == PC_GETNICE) {
1043 		proc_t *initpp;
1044 
1045 		mutex_enter(&pidlock);
1046 		if ((initpp = prfind(P_INITPID)) != NULL) {
1047 			mutex_enter(&initpp->p_lock);
1048 			if (procinset(initpp, procset)) {
1049 				mutex_exit(&initpp->p_lock);
1050 				err = setprocnice(initpp, pcnice);
1051 			} else {
1052 				mutex_exit(&initpp->p_lock);
1053 			}
1054 		}
1055 		mutex_exit(&pidlock);
1056 	}
1057 
1058 	/*
1059 	 * We're returning the latest error here that we've got back from
1060 	 * the setthreadnice() or setprocnice(). That is, err_thread and/or
1061 	 * err_proc can be replaced by err.
1062 	 */
1063 	if (!err)
1064 		err = err_thread ? err_thread : err_proc;
1065 
1066 	return (err);
1067 }
1068 
1069 int
1070 setthreadprio(pcprio_t *pcprio, kthread_t *tp)
1071 {
1072 	int prio = 0;
1073 	int incr;
1074 	int error;
1075 
1076 	ASSERT(MUTEX_HELD(&pidlock));
1077 	ASSERT(MUTEX_HELD(&(ttoproc(tp)->p_lock)));
1078 
1079 	if (pcprio->pc_op == PC_SETPRIO && pcprio->pc_cid != tp->t_cid) {
1080 		/*
1081 		 * Target thread must change to new class.
1082 		 * See comments in parmsset(), from where this code was copied.
1083 		 */
1084 		void *bufp = NULL;
1085 		caddr_t clprocp = (caddr_t)tp->t_cldata;
1086 		id_t oldcid = tp->t_cid;
1087 
1088 		error = CL_CANEXIT(tp, NULL);
1089 		if (error)
1090 			return (error);
1091 		if (CL_ALLOC(&bufp, pcprio->pc_cid, KM_NOSLEEP) != 0)
1092 			return (ENOMEM);
1093 		error = CL_ENTERCLASS(tp, pcprio->pc_cid, NULL, CRED(), bufp);
1094 		if (error) {
1095 			CL_FREE(pcprio->pc_cid, bufp);
1096 			return (error);
1097 		}
1098 		CL_EXITCLASS(oldcid, clprocp);
1099 		schedctl_set_cidpri(tp);
1100 	}
1101 
1102 	if ((error = CL_DOPRIO(tp, CRED(), 0, &prio)) != 0)
1103 		return (error);
1104 
1105 	if (pcprio->pc_op == PC_GETPRIO) {
1106 		/*
1107 		 * If we are not setting the priority, we should return the
1108 		 * highest priority pertaining to any of the specified threads.
1109 		 */
1110 		if (prio > pcprio->pc_val) {
1111 			pcprio->pc_cid = tp->t_cid;
1112 			pcprio->pc_val = prio;
1113 		}
1114 	} else if (prio != pcprio->pc_val) {
1115 		/*
1116 		 * Try to change the priority of the thread.
1117 		 */
1118 		incr = pcprio->pc_val - prio;
1119 		error = CL_DOPRIO(tp, CRED(), incr, &prio);
1120 		schedctl_set_cidpri(tp);
1121 	}
1122 
1123 	return (error);
1124 }
1125 
1126 int
1127 setprocprio(proc_t *pp, pcprio_t *pcprio)
1128 {
1129 	kthread_t *tp;
1130 	int retval = 0;
1131 	int error;
1132 
1133 	ASSERT(MUTEX_HELD(&pidlock));
1134 	mutex_enter(&pp->p_lock);
1135 
1136 	if ((tp = pp->p_tlist) == NULL) {
1137 		mutex_exit(&pp->p_lock);
1138 		return (ESRCH);
1139 	}
1140 
1141 	/*
1142 	 * Check permissions before changing the prio value.
1143 	 */
1144 	if (pcprio->pc_op == PC_SETPRIO) {
1145 		if (!prochasprocperm(pp, curproc, CRED())) {
1146 			mutex_exit(&pp->p_lock);
1147 			return (EPERM);
1148 		}
1149 	}
1150 
1151 	do {
1152 		error = setthreadprio(pcprio, tp);
1153 		if (error)
1154 			retval = error;
1155 	} while ((tp = tp->t_forw) != pp->p_tlist);
1156 
1157 	mutex_exit(&pp->p_lock);
1158 	return (retval);
1159 }
1160 
1161 /*
1162  * Set the class and priority of the specified LWP or set of processes.
1163  */
1164 static int
1165 doprio(procset_t *procset, pcprio_t *pcprio)
1166 {
1167 	int err_proc = 0;
1168 	int err_thread = 0;
1169 	int err = 0;
1170 
1171 	/*
1172 	 * Sanity check.
1173 	 */
1174 	if (pcprio->pc_op != PC_GETPRIO && pcprio->pc_op != PC_SETPRIO)
1175 		return (EINVAL);
1176 	if (pcprio->pc_op == PC_SETPRIO &&
1177 	    (pcprio->pc_cid >= loaded_classes || pcprio->pc_cid < 1))
1178 		return (EINVAL);
1179 
1180 	/*
1181 	 * If it is a PC_GETPRIO operation then set pc_val to the smallest
1182 	 * possible prio value to help us find the highest priority
1183 	 * pertaining to any of the specified processes.
1184 	 */
1185 	if (pcprio->pc_op == PC_GETPRIO)
1186 		pcprio->pc_val = SHRT_MIN;
1187 
1188 	if (procset->p_lidtype != P_LWPID ||
1189 	    procset->p_ridtype != P_LWPID)
1190 		err_proc = dotoprocs(procset, setprocprio, (char *)pcprio);
1191 
1192 	if (procset->p_lidtype == P_LWPID || procset->p_ridtype == P_LWPID) {
1193 		err_thread = dotolwp(procset, setthreadprio, (char *)pcprio);
1194 		/*
1195 		 * dotolwp() can return with p_lock held.  This is required
1196 		 * for the priocntl GETPARMS case.  So, here we just release
1197 		 * the p_lock.
1198 		 */
1199 		if (MUTEX_HELD(&curproc->p_lock))
1200 			mutex_exit(&curproc->p_lock);
1201 
1202 		/*
1203 		 * If we were called for a single LWP, then ignore ESRCH
1204 		 * returned by the previous dotoprocs() call.
1205 		 */
1206 		if (err_proc == ESRCH)
1207 			err_proc = 0;
1208 	}
1209 
1210 	/*
1211 	 * dotoprocs() ignores the init process if it is in the set, unless
1212 	 * it was the only process found. We want to make sure init is not
1213 	 * excluded if we're going PC_GETPRIO operation.
1214 	 */
1215 	if (pcprio->pc_op == PC_GETPRIO) {
1216 		proc_t *initpp;
1217 
1218 		mutex_enter(&pidlock);
1219 		if ((initpp = prfind(P_INITPID)) != NULL) {
1220 			mutex_enter(&initpp->p_lock);
1221 			if (procinset(initpp, procset)) {
1222 				mutex_exit(&initpp->p_lock);
1223 				err = setprocprio(initpp, pcprio);
1224 			} else {
1225 				mutex_exit(&initpp->p_lock);
1226 			}
1227 		}
1228 		mutex_exit(&pidlock);
1229 	}
1230 
1231 	/*
1232 	 * We're returning the latest error here that we've got back from
1233 	 * the setthreadprio() or setprocprio(). That is, err_thread and/or
1234 	 * err_proc can be replaced by err.
1235 	 */
1236 	if (!err)
1237 		err = err_thread ? err_thread : err_proc;
1238 
1239 	return (err);
1240 }
1241