xref: /illumos-gate/usr/src/uts/common/disp/priocntl.c (revision 60a3f738d56f92ae8b80e4b62a2331c6e1f2311f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/sysmacros.h>
35 #include <sys/signal.h>
36 #include <sys/pcb.h>
37 #include <sys/user.h>
38 #include <sys/systm.h>
39 #include <sys/sysinfo.h>
40 #include <sys/var.h>
41 #include <sys/errno.h>
42 #include <sys/cred.h>
43 #include <sys/proc.h>
44 #include <sys/procset.h>
45 #include <sys/debug.h>
46 #include <sys/inline.h>
47 #include <sys/priocntl.h>
48 #include <sys/disp.h>
49 #include <sys/class.h>
50 #include <sys/modctl.h>
51 #include <sys/t_lock.h>
52 #include <sys/uadmin.h>
53 #include <sys/cmn_err.h>
54 #include <sys/policy.h>
55 
56 /*
57  * Structure used to pass arguments to the proccmp() function.
58  * The arguments must be passed in a structure because proccmp()
59  * is called indirectly through the dotoprocs() function which
60  * will only pass through a single one word argument.
61  */
62 struct pcmpargs {
63 	id_t	*pcmp_cidp;
64 	int	*pcmp_cntp;
65 	kthread_id_t	*pcmp_retthreadp;
66 };
67 
68 /*
69  * Structure used to pass arguments to the setparms() function
70  * which is called indirectly through dotoprocs().
71  */
72 struct stprmargs {
73 	struct pcparms	*stp_parmsp;	/* pointer to parameters */
74 	int		stp_error;	/* some errors returned here */
75 };
76 
77 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
78 /*
79  * A vaparm_t is an int followed by a long long -- this packs differently
80  * between the 64-bit kernel ABI and the 32-bit user ABI.
81  */
82 static int
83 copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap, uio_seg_t seg)
84 {
85 	pc_vaparms32_t vaparms32;
86 	pc_vaparm32_t *src;
87 	pc_vaparm_t *dst;
88 	uint_t cnt;
89 
90 	ASSERT(get_udatamodel() == DATAMODEL_ILP32);
91 
92 	if ((seg == UIO_USERSPACE ? copyin : kcopy)(arg, &vaparms32,
93 	    sizeof (vaparms32)))
94 		return (EFAULT);
95 
96 	vap->pc_vaparmscnt = vaparms32.pc_vaparmscnt;
97 	if ((cnt = vaparms32.pc_vaparmscnt) > PC_VAPARMCNT)
98 		cnt = PC_VAPARMCNT;
99 	for (src = vaparms32.pc_parms, dst = vap->pc_parms;
100 	    cnt--; src++, dst++) {
101 		dst->pc_key = src->pc_key;
102 		dst->pc_parm = src->pc_parm;
103 	}
104 	return (0);
105 }
106 
107 #define	COPYIN_VAPARMS(arg, vap, size, seg)	\
108 	(get_udatamodel() == DATAMODEL_NATIVE ?	\
109 	(*copyinfn)(arg, vap, size) : copyin_vaparms32(arg, vap, seg))
110 
111 #else
112 
113 #define	COPYIN_VAPARMS(arg, vap, size, seg)	(*copyinfn)(arg, vap, size)
114 
115 #endif
116 
117 static int donice(procset_t *, pcnice_t *);
118 static int proccmp(proc_t *, struct pcmpargs *);
119 static int setparms(proc_t *, struct stprmargs *);
120 extern int threadcmp(struct pcmpargs *, kthread_id_t);
121 
122 /*
123  * The priocntl system call.
124  */
125 long
126 priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg,
127     caddr_t arg2, uio_seg_t seg)
128 {
129 	pcinfo_t		pcinfo;
130 	pcparms_t		pcparms;
131 	pcnice_t		pcnice;
132 	pcadmin_t		pcadmin;
133 	pcpri_t			pcpri;
134 	procset_t		procset;
135 	struct stprmargs	stprmargs;
136 	struct pcmpargs		pcmpargs;
137 	pc_vaparms_t		vaparms;
138 	char			clname[PC_CLNMSZ];
139 	int			count;
140 	kthread_id_t		retthreadp;
141 	proc_t			*initpp;
142 	int			clnullflag;
143 	int			error = 0;
144 	int			error1 = 0;
145 	int			rv = 0;
146 	pid_t			saved_pid;
147 	id_t			classid;
148 	int (*copyinfn)(const void *, void *, size_t);
149 	int (*copyoutfn)(const void *, void *, size_t);
150 
151 	/*
152 	 * First just check the version number. Right now there is only
153 	 * one version we know about and support.  If we get some other
154 	 * version number from the application it may be that the
155 	 * application was built with some future version and is trying
156 	 * to run on an old release of the system (that's us).  In any
157 	 * case if we don't recognize the version number all we can do is
158 	 * return error.
159 	 */
160 	if (pc_version != PC_VERSION)
161 		return (set_errno(EINVAL));
162 
163 	if (seg == UIO_USERSPACE) {
164 		copyinfn = copyin;
165 		copyoutfn = copyout;
166 	} else {
167 		copyinfn = kcopy;
168 		copyoutfn = kcopy;
169 	}
170 
171 	switch (cmd) {
172 	case PC_GETCID:
173 		/*
174 		 * If the arg pointer is NULL, the user just wants to
175 		 * know the number of classes. If non-NULL, the pointer
176 		 * should point to a valid user pcinfo buffer.  In the
177 		 * dynamic world we need to return the number of loaded
178 		 * classes, not the max number of available classes that
179 		 * can be loaded.
180 		 */
181 		if (arg == NULL) {
182 			rv = loaded_classes;
183 			break;
184 		} else {
185 			if ((*copyinfn)(arg, &pcinfo, sizeof (pcinfo)))
186 				return (set_errno(EFAULT));
187 		}
188 
189 		pcinfo.pc_clname[PC_CLNMSZ-1] = '\0';
190 
191 		/*
192 		 * Get the class ID corresponding to user supplied name.
193 		 */
194 		error = getcid(pcinfo.pc_clname, &pcinfo.pc_cid);
195 		if (error)
196 			return (set_errno(error));
197 
198 		/*
199 		 * Can't get info about the sys class.
200 		 */
201 		if (pcinfo.pc_cid == 0)
202 			return (set_errno(EINVAL));
203 
204 		/*
205 		 * Get the class specific information.
206 		 * we MUST make sure that the class has not already
207 		 * been unloaded before we try the CL_GETCLINFO.
208 		 * If it has then we need to load it.
209 		 */
210 		error =
211 		    scheduler_load(pcinfo.pc_clname, &sclass[pcinfo.pc_cid]);
212 		if (error)
213 			return (set_errno(error));
214 		error = CL_GETCLINFO(&sclass[pcinfo.pc_cid], pcinfo.pc_clinfo);
215 		if (error)
216 			return (set_errno(error));
217 
218 		if ((*copyoutfn)(&pcinfo, arg, sizeof (pcinfo)))
219 			return (set_errno(EFAULT));
220 
221 		rv = loaded_classes;
222 
223 		break;
224 
225 	case PC_GETCLINFO:
226 		/*
227 		 * If the arg pointer is NULL, the user just wants to know
228 		 * the number of classes. If non-NULL, the pointer should
229 		 * point to a valid user pcinfo buffer.
230 		 */
231 		if (arg == NULL) {
232 			rv = loaded_classes;
233 			break;
234 		} else {
235 			if ((*copyinfn)(arg, &pcinfo, sizeof (pcinfo)))
236 				return (set_errno(EFAULT));
237 		}
238 
239 		if (pcinfo.pc_cid >= loaded_classes || pcinfo.pc_cid < 1)
240 			return (set_errno(EINVAL));
241 
242 		(void) strncpy(pcinfo.pc_clname, sclass[pcinfo.pc_cid].cl_name,
243 		    PC_CLNMSZ);
244 
245 		/*
246 		 * Get the class specific information.  we MUST make sure
247 		 * that the class has not already been unloaded before we
248 		 * try the CL_GETCLINFO.  If it has then we need to load
249 		 * it.
250 		 */
251 		error =
252 		    scheduler_load(pcinfo.pc_clname, &sclass[pcinfo.pc_cid]);
253 		if (error)
254 			return (set_errno(error));
255 		error = CL_GETCLINFO(&sclass[pcinfo.pc_cid], pcinfo.pc_clinfo);
256 		if (error)
257 			return (set_errno(error));
258 
259 		if ((*copyoutfn)(&pcinfo, arg, sizeof (pcinfo)))
260 			return (set_errno(EFAULT));
261 
262 		rv = loaded_classes;
263 		break;
264 
265 	case PC_SETPARMS:
266 	case PC_SETXPARMS:
267 		/*
268 		 * First check the validity of the parameters we got from
269 		 * the user.  We don't do any permissions checking here
270 		 * because it's done on a per thread basis by parmsset().
271 		 */
272 		if (cmd == PC_SETPARMS) {
273 			if ((*copyinfn)(arg, &pcparms, sizeof (pcparms)))
274 				return (set_errno(EFAULT));
275 
276 			error = parmsin(&pcparms, NULL);
277 		} else {
278 			if ((*copyinfn)(arg, clname, PC_CLNMSZ) ||
279 			    COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms),
280 			    seg))
281 				return (set_errno(EFAULT));
282 			clname[PC_CLNMSZ-1] = '\0';
283 
284 			if (getcid(clname, &pcparms.pc_cid))
285 				return (set_errno(EINVAL));
286 
287 			error = parmsin(&pcparms, &vaparms);
288 		}
289 
290 		if (error)
291 			return (set_errno(error));
292 
293 		/*
294 		 * Get the procset from the user.
295 		 */
296 		if ((*copyinfn)(psp, &procset, sizeof (procset)))
297 			return (set_errno(EFAULT));
298 
299 		/*
300 		 * For performance we do a quick check here to catch
301 		 * common cases where the current thread is the only one
302 		 * in the set.  In such cases we can call parmsset()
303 		 * directly, avoiding the relatively lengthy path through
304 		 * dotoprocs().  The underlying classes expect pidlock to
305 		 * be held.
306 		 */
307 		if (cur_inset_only(&procset) == B_TRUE) {
308 			/* do a single LWP */
309 			if ((procset.p_lidtype == P_LWPID) ||
310 			    (procset.p_ridtype == P_LWPID)) {
311 				mutex_enter(&pidlock);
312 				mutex_enter(&curproc->p_lock);
313 				error = parmsset(&pcparms, curthread);
314 				mutex_exit(&curproc->p_lock);
315 				mutex_exit(&pidlock);
316 			} else {
317 				/* do the entire process otherwise */
318 				stprmargs.stp_parmsp = &pcparms;
319 				stprmargs.stp_error = 0;
320 				mutex_enter(&pidlock);
321 				error = setparms(curproc, &stprmargs);
322 				mutex_exit(&pidlock);
323 				if (error == 0 && stprmargs.stp_error != 0)
324 					error = stprmargs.stp_error;
325 			}
326 			if (error)
327 				return (set_errno(error));
328 		} else {
329 			stprmargs.stp_parmsp = &pcparms;
330 			stprmargs.stp_error = 0;
331 
332 			error1 = error = ESRCH;
333 
334 			/*
335 			 * The dotoprocs() call below will cause
336 			 * setparms() to be called for each thread in the
337 			 * specified procset. setparms() will in turn
338 			 * call parmsset() (which does the real work).
339 			 */
340 			if ((procset.p_lidtype != P_LWPID) ||
341 				(procset.p_ridtype != P_LWPID)) {
342 				error1 = dotoprocs(&procset, setparms,
343 				    (char *)&stprmargs);
344 			}
345 
346 			/*
347 			 * take care of the case when any of the
348 			 * operands happen to be LWP's
349 			 */
350 
351 			if ((procset.p_lidtype == P_LWPID) ||
352 			    (procset.p_ridtype == P_LWPID)) {
353 				error = dotolwp(&procset, parmsset,
354 				    (char *)&pcparms);
355 				/*
356 				 * Dotolwp() returns with p_lock held.
357 				 * This is required for the GETPARMS case
358 				 * below. So, here we just release the
359 				 * p_lock.
360 				 */
361 				if (MUTEX_HELD(&curproc->p_lock))
362 					mutex_exit(&curproc->p_lock);
363 			}
364 
365 			/*
366 			 * If setparms() encounters a permissions error
367 			 * for one or more of the threads it returns
368 			 * EPERM in stp_error so dotoprocs() will
369 			 * continue through the thread set.  If
370 			 * dotoprocs() returned an error above, it was
371 			 * more serious than permissions and dotoprocs
372 			 * quit when the error was encountered.  We
373 			 * return the more serious error if there was
374 			 * one, otherwise we return EPERM if we got that
375 			 * back.
376 			 */
377 			if (error1 != ESRCH)
378 				error = error1;
379 			if (error == 0 && stprmargs.stp_error != 0)
380 				error = stprmargs.stp_error;
381 		}
382 		break;
383 
384 	case PC_GETPARMS:
385 	case PC_GETXPARMS:
386 		if (cmd == PC_GETPARMS) {
387 			if ((*copyinfn)(arg, &pcparms, sizeof (pcparms)))
388 				return (set_errno(EFAULT));
389 		} else {
390 			if (arg != NULL) {
391 				if ((*copyinfn)(arg, clname, PC_CLNMSZ))
392 					return (set_errno(EFAULT));
393 
394 				clname[PC_CLNMSZ-1] = '\0';
395 
396 				if (getcid(clname, &pcparms.pc_cid))
397 					return (set_errno(EINVAL));
398 			} else
399 				pcparms.pc_cid = PC_CLNULL;
400 
401 			if (COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms),
402 			    seg))
403 				return (set_errno(EFAULT));
404 		}
405 
406 		if (pcparms.pc_cid >= loaded_classes ||
407 		    (pcparms.pc_cid < 1 && pcparms.pc_cid != PC_CLNULL))
408 			return (set_errno(EINVAL));
409 
410 		if ((*copyinfn)(psp, &procset, sizeof (procset)))
411 			return (set_errno(EFAULT));
412 
413 		/*
414 		 * Check to see if the current thread is the only one
415 		 * in the set. If not we must go through the whole set
416 		 * to select a thread.
417 		 */
418 		if (cur_inset_only(&procset) == B_TRUE) {
419 			/* do a single LWP */
420 			if ((procset.p_lidtype == P_LWPID) ||
421 			    (procset.p_ridtype == P_LWPID)) {
422 				if (pcparms.pc_cid != PC_CLNULL &&
423 				    pcparms.pc_cid != curthread->t_cid) {
424 					/*
425 					 * Specified thread not in
426 					 * specified class.
427 					 */
428 					return (set_errno(ESRCH));
429 				} else {
430 					mutex_enter(&curproc->p_lock);
431 					retthreadp = curthread;
432 				}
433 			} else {
434 				count = 0;
435 				retthreadp = NULL;
436 				pcmpargs.pcmp_cidp = &pcparms.pc_cid;
437 				pcmpargs.pcmp_cntp = &count;
438 				pcmpargs.pcmp_retthreadp = &retthreadp;
439 				/*
440 				 * Specified thread not in specified class.
441 				 */
442 				if (pcparms.pc_cid != PC_CLNULL &&
443 				    pcparms.pc_cid != curthread->t_cid)
444 					return (set_errno(ESRCH));
445 				error = proccmp(curproc, &pcmpargs);
446 				if (error) {
447 					if (retthreadp != NULL)
448 						mutex_exit(&(curproc->p_lock));
449 					return (set_errno(error));
450 				}
451 			}
452 		} else {
453 			/*
454 			 * get initpp early to avoid lock ordering problems
455 			 * (we cannot get pidlock while holding any p_lock).
456 			 */
457 			mutex_enter(&pidlock);
458 			initpp = prfind(P_INITPID);
459 			mutex_exit(&pidlock);
460 
461 			/*
462 			 * Select the thread (from the set) whose
463 			 * parameters we are going to return.  First we
464 			 * set up some locations for return values, then
465 			 * we call proccmp() indirectly through
466 			 * dotoprocs().  proccmp() will call a class
467 			 * specific routine which actually does the
468 			 * selection.  To understand how this works take
469 			 * a careful look at the code below, the
470 			 * dotoprocs() function, the proccmp() function,
471 			 * and the class specific cl_proccmp() functions.
472 			 */
473 			if (pcparms.pc_cid == PC_CLNULL)
474 				clnullflag = 1;
475 			else
476 				clnullflag = 0;
477 			count = 0;
478 			retthreadp = NULL;
479 			pcmpargs.pcmp_cidp = &pcparms.pc_cid;
480 			pcmpargs.pcmp_cntp = &count;
481 			pcmpargs.pcmp_retthreadp = &retthreadp;
482 			error1 = error = ESRCH;
483 
484 			if ((procset.p_lidtype != P_LWPID) ||
485 			    (procset.p_ridtype != P_LWPID)) {
486 				error1 = dotoprocs(&procset, proccmp,
487 				    (char *)&pcmpargs);
488 			}
489 
490 			/*
491 			 * take care of combination of LWP and process
492 			 * set case in a procset
493 			 */
494 			if ((procset.p_lidtype == P_LWPID) ||
495 			    (procset.p_ridtype == P_LWPID)) {
496 				error = dotolwp(&procset, threadcmp,
497 				    (char *)&pcmpargs);
498 			}
499 
500 			/*
501 			 * Both proccmp() and threadcmp() return with the
502 			 * p_lock held for the ttoproc(retthreadp). This
503 			 * is required to make sure that the process we
504 			 * chose as the winner doesn't go away
505 			 * i.e. retthreadp has to be a valid pointer.
506 			 *
507 			 * The case below can only happen if the thread
508 			 * with the highest priority was not in your
509 			 * process.  In that case, dotolwp will return
510 			 * holding p_lock for both your process as well
511 			 * as the process in which retthreadp is a
512 			 * thread.
513 			 */
514 			if ((retthreadp != NULL) &&
515 			    (ttoproc(retthreadp) != curproc) &&
516 			    MUTEX_HELD(&(curproc)->p_lock))
517 				mutex_exit(&(curproc)->p_lock);
518 
519 			ASSERT(retthreadp == NULL ||
520 			    MUTEX_HELD(&(ttoproc(retthreadp)->p_lock)));
521 			if (error1 != ESRCH)
522 				error = error1;
523 			if (error) {
524 				if (retthreadp != NULL)
525 				    mutex_exit(&(ttoproc(retthreadp)->p_lock));
526 				ASSERT(MUTEX_NOT_HELD(&(curproc)->p_lock));
527 				return (set_errno(error));
528 			}
529 			/*
530 			 * dotoprocs() ignores the init process if it is
531 			 * in the set, unless it was the only process found.
532 			 * Since we are getting parameters here rather than
533 			 * setting them, we want to make sure init is not
534 			 * excluded if it is in the set.
535 			 */
536 			if (initpp != NULL &&
537 			    procinset(initpp, &procset) &&
538 			    (retthreadp != NULL) &&
539 			    ttoproc(retthreadp) != initpp)
540 				(void) proccmp(initpp, &pcmpargs);
541 
542 			/*
543 			 * If dotoprocs returned success it found at least
544 			 * one thread in the set.  If proccmp() failed to
545 			 * select a thread it is because the user specified
546 			 * a class and none of the threads in the set
547 			 * belonged to that class, or because the process
548 			 * specified was in the middle of exiting and had
549 			 * cleared its thread list.
550 			 */
551 			if (retthreadp == NULL) {
552 				/*
553 				 * Might be here and still holding p_lock
554 				 * if we did a dotolwp on an lwp that
555 				 * existed but was in the wrong class.
556 				 */
557 				if (MUTEX_HELD(&(curproc)->p_lock))
558 					mutex_exit(&(curproc)->p_lock);
559 				return (set_errno(ESRCH));
560 			}
561 
562 			/*
563 			 * User can only use PC_CLNULL with one thread in set.
564 			 */
565 			if (clnullflag && count > 1) {
566 				if (retthreadp != NULL)
567 					mutex_exit(
568 					    &(ttoproc(retthreadp)->p_lock));
569 				ASSERT(MUTEX_NOT_HELD(&(curproc)->p_lock));
570 				return (set_errno(EINVAL));
571 			}
572 		}
573 
574 		ASSERT(retthreadp == NULL ||
575 		    MUTEX_HELD(&(ttoproc(retthreadp)->p_lock)));
576 		/*
577 		 * It is possible to have retthreadp == NULL. Proccmp()
578 		 * in the rare case (p_tlist == NULL) could return without
579 		 * setting a value for retthreadp.
580 		 */
581 		if (retthreadp == NULL) {
582 			ASSERT(MUTEX_NOT_HELD(&(curproc)->p_lock));
583 			return (set_errno(ESRCH));
584 		}
585 		/*
586 		 * We've selected a thread so now get the parameters.
587 		 */
588 		parmsget(retthreadp, &pcparms);
589 
590 		/*
591 		 * Prepare to return parameters to the user
592 		 */
593 		error = parmsout(&pcparms,
594 		    (cmd == PC_GETPARMS ? NULL : &vaparms));
595 
596 		/*
597 		 * Save pid of selected thread before dropping p_lock.
598 		 */
599 		saved_pid = ttoproc(retthreadp)->p_pid;
600 		mutex_exit(&(ttoproc(retthreadp)->p_lock));
601 		ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
602 
603 		if (error)
604 			return (set_errno(error));
605 
606 		if (cmd == PC_GETPARMS) {
607 			if ((*copyoutfn)(&pcparms, arg, sizeof (pcparms)))
608 				return (set_errno(EFAULT));
609 		} else if ((error = vaparmsout(arg, &pcparms, &vaparms,
610 		    seg)) != 0)
611 			return (set_errno(error));
612 
613 		/*
614 		 * And finally, return the pid of the selected thread.
615 		 */
616 		rv = saved_pid;
617 		break;
618 
619 	case PC_ADMIN:
620 		if (get_udatamodel() == DATAMODEL_NATIVE) {
621 			if ((*copyinfn)(arg, &pcadmin, sizeof (pcadmin_t)))
622 				return (set_errno(EFAULT));
623 #ifdef _SYSCALL32_IMPL
624 		} else {
625 			/* pcadmin struct from ILP32 callers */
626 			pcadmin32_t pcadmin32;
627 
628 			if ((*copyinfn)(arg, &pcadmin32, sizeof (pcadmin32_t)))
629 				return (set_errno(EFAULT));
630 			pcadmin.pc_cid = pcadmin32.pc_cid;
631 			pcadmin.pc_cladmin = (caddr_t)(uintptr_t)
632 			    pcadmin32.pc_cladmin;
633 #endif /* _SYSCALL32_IMPL */
634 		}
635 
636 		if (pcadmin.pc_cid >= loaded_classes ||
637 		    pcadmin.pc_cid < 1)
638 			return (set_errno(EINVAL));
639 
640 		/*
641 		 * Have the class do whatever the user is requesting.
642 		 */
643 		mutex_enter(&ualock);
644 		error = CL_ADMIN(&sclass[pcadmin.pc_cid], pcadmin.pc_cladmin,
645 				CRED());
646 		mutex_exit(&ualock);
647 		break;
648 
649 	case PC_GETPRIRANGE:
650 		if ((*copyinfn)(arg, &pcpri, sizeof (pcpri_t)))
651 			return (set_errno(EFAULT));
652 
653 		if (pcpri.pc_cid >= loaded_classes || pcpri.pc_cid < 0)
654 			return (set_errno(EINVAL));
655 
656 		error = CL_GETCLPRI(&sclass[pcpri.pc_cid], &pcpri);
657 		if (!error) {
658 			if ((*copyoutfn)(&pcpri, arg, sizeof (pcpri)))
659 				return (set_errno(EFAULT));
660 		}
661 		break;
662 
663 	case PC_DONICE:
664 		/*
665 		 * Get pcnice and procset structures from the user.
666 		 */
667 		if ((*copyinfn)(arg, &pcnice, sizeof (pcnice)) ||
668 		    (*copyinfn)(psp, &procset, sizeof (procset)))
669 			return (set_errno(EFAULT));
670 
671 		error = donice(&procset, &pcnice);
672 
673 		if (!error && (pcnice.pc_op == PC_GETNICE)) {
674 			if ((*copyoutfn)(&pcnice, arg, sizeof (pcnice)))
675 				return (set_errno(EFAULT));
676 		}
677 		break;
678 
679 	case PC_SETDFLCL:
680 		if (secpolicy_dispadm(CRED()) != 0)
681 			return (set_errno(EPERM));
682 
683 		if (copyin(arg, (caddr_t)clname, PC_CLNMSZ) != 0)
684 			return (set_errno(EFAULT));
685 		clname[PC_CLNMSZ-1] = '\0';
686 
687 		if (getcid(clname, &classid) != 0)
688 			return (set_errno(EINVAL));
689 		if (classid == syscid)
690 			return (set_errno(EINVAL));
691 		defaultcid = classid;
692 		ASSERT(defaultcid > 0 && defaultcid < loaded_classes);
693 		break;
694 
695 	default:
696 		error = EINVAL;
697 		break;
698 	}
699 	return (error ? (set_errno(error)) : rv);
700 }
701 
702 long
703 priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
704 {
705 	return (priocntl_common(pc_version, psp, cmd, arg, arg2,
706 	    UIO_USERSPACE));
707 }
708 
709 /*
710  * The proccmp() function is part of the implementation of the
711  * PC_GETPARMS command of the priocntl system call.  This function works
712  * with the system call code and with the class specific cl_globpri()
713  * function to select one thread from a specified procset based on class
714  * specific criteria. proccmp() is called indirectly from the priocntl
715  * code through the dotoprocs function.  Basic strategy is dotoprocs()
716  * calls us once for each thread in the set.  We in turn call the class
717  * specific function to compare the current thread from dotoprocs to the
718  * "best" (according to the class criteria) found so far.  We keep the
719  * "best" thread in *pcmp_retthreadp.
720  */
721 static int
722 proccmp(proc_t *pp, struct pcmpargs *argp)
723 {
724 	kthread_id_t	tx, ty;
725 	int		last_pri = -1;
726 	int		tx_pri;
727 	int		found = 0;
728 
729 	mutex_enter(&pp->p_lock);
730 
731 	if (pp->p_tlist == NULL) {
732 		mutex_exit(&pp->p_lock);
733 		return (0);
734 	}
735 	(*argp->pcmp_cntp)++;	/* Increment count of procs in the set */
736 
737 	if (*argp->pcmp_cidp == PC_CLNULL) {
738 		/*
739 		 * If no cid is specified, then lets just pick the first one.
740 		 * It doesn't matter because if the number of processes in the
741 		 * set are more than 1, then we return EINVAL in priocntlsys.
742 		 */
743 		*argp->pcmp_cidp = pp->p_tlist->t_cid;
744 	}
745 	ty = tx = pp->p_tlist;
746 	do {
747 		if (tx->t_cid == *argp->pcmp_cidp) {
748 			/*
749 			 * We found one which matches the required cid.
750 			 */
751 			found = 1;
752 			if ((tx_pri = CL_GLOBPRI(tx)) > last_pri) {
753 				last_pri = tx_pri;
754 				ty = tx;
755 			}
756 		}
757 	} while ((tx = tx->t_forw) != pp->p_tlist);
758 	if (found) {
759 		if (*argp->pcmp_retthreadp == NULL) {
760 			/*
761 			 * First time through for this set.
762 			 * keep the mutex held. He might be the one!
763 			 */
764 			*argp->pcmp_retthreadp = ty;
765 		} else {
766 			tx = *argp->pcmp_retthreadp;
767 			if (CL_GLOBPRI(ty) <= CL_GLOBPRI(tx)) {
768 				mutex_exit(&pp->p_lock);
769 			} else {
770 				mutex_exit(&(ttoproc(tx)->p_lock));
771 				*argp->pcmp_retthreadp = ty;
772 			}
773 		}
774 	} else {
775 		/*
776 		 * We actually didn't find anything of the same cid in
777 		 * this process.
778 		 */
779 		mutex_exit(&pp->p_lock);
780 	}
781 	return (0);
782 }
783 
784 
785 int
786 threadcmp(struct pcmpargs *argp, kthread_id_t tp)
787 {
788 	kthread_id_t	tx;
789 	proc_t		*pp;
790 
791 	ASSERT(MUTEX_HELD(&(ttoproc(tp))->p_lock));
792 
793 	(*argp->pcmp_cntp)++;   /* Increment count of procs in the set */
794 	if (*argp->pcmp_cidp == PC_CLNULL) {
795 		/*
796 		 * If no cid is specified, then lets just pick the first one.
797 		 * It doesn't matter because if the number of threads in the
798 		 * set are more than 1, then we return EINVAL in priocntlsys.
799 		 */
800 		*argp->pcmp_cidp = tp->t_cid;
801 	}
802 	if (tp->t_cid == *argp->pcmp_cidp) {
803 		if (*argp->pcmp_retthreadp == NULL) {
804 			/*
805 			 * First time through for this set.
806 			 */
807 			*argp->pcmp_retthreadp = tp;
808 		} else {
809 			tx = *argp->pcmp_retthreadp;
810 			if (CL_GLOBPRI(tp) > CL_GLOBPRI(tx)) {
811 				/*
812 				 * Unlike proccmp(), we don't release the
813 				 * p_lock of the ttoproc(tp) if tp's global
814 				 * priority is less than tx's. We need to go
815 				 * through the entire list before we can do
816 				 * that. The p_lock is released by the caller
817 				 * of dotolwp().
818 				 */
819 				pp = ttoproc(tx);
820 				ASSERT(MUTEX_HELD(&pp->p_lock));
821 				if (pp != curproc) {
822 					mutex_exit(&pp->p_lock);
823 				}
824 				*argp->pcmp_retthreadp = tp;
825 			}
826 		}
827 	}
828 	return (0);
829 }
830 
831 
832 /*
833  * The setparms() function is called indirectly by priocntlsys()
834  * through the dotoprocs() function.  setparms() acts as an
835  * intermediary between dotoprocs() and the parmsset() function,
836  * calling parmsset() for each thread in the set and handling
837  * the error returns on their way back up to dotoprocs().
838  */
839 static int
840 setparms(proc_t *targpp, struct stprmargs *stprmp)
841 {
842 	int error = 0;
843 	kthread_id_t t;
844 	int err;
845 
846 	mutex_enter(&targpp->p_lock);
847 	if ((t = targpp->p_tlist) == NULL) {
848 		mutex_exit(&targpp->p_lock);
849 		return (0);
850 	}
851 	do {
852 		err = parmsset(stprmp->stp_parmsp, t);
853 		if (error == 0)
854 			error = err;
855 	} while ((t = t->t_forw) != targpp->p_tlist);
856 	mutex_exit(&targpp->p_lock);
857 	if (error) {
858 		if (error == EPERM) {
859 			stprmp->stp_error = EPERM;
860 			return (0);
861 		} else {
862 			return (error);
863 		}
864 	} else
865 		return (0);
866 }
867 
868 int
869 setthreadnice(pcnice_t *pcnice, kthread_t *tp)
870 {
871 	int error = 0;
872 	int nice;
873 	int inc;
874 	id_t rtcid;
875 
876 	ASSERT(MUTEX_HELD(&pidlock));
877 	ASSERT(MUTEX_HELD(&(ttoproc(tp)->p_lock)));
878 
879 	/*
880 	 * The XPG5 standard requires that any realtime process or thread
881 	 * must be unaffected by a call to setpriority().
882 	 */
883 	error = getcidbyname("RT", &rtcid);
884 	if ((error == 0) && (tp->t_cid == rtcid)) {
885 		if (pcnice->pc_op == PC_SETNICE)
886 			return (error);
887 	}
888 
889 	if ((error = CL_DONICE(tp, CRED(), 0, &nice)) != 0)
890 		return (error);
891 
892 	if (pcnice->pc_op == PC_GETNICE) {
893 		/*
894 		 * If there is no change to priority, we should return the
895 		 * highest priority (lowest numerical value) pertaining to
896 		 * any of the specified threads.
897 		 */
898 		if (nice < pcnice->pc_val)
899 			pcnice->pc_val = nice;
900 	} else {
901 		ASSERT(pcnice->pc_op == PC_SETNICE);
902 		/*
903 		 * Try to change the nice value of the thread.
904 		 */
905 		inc = pcnice->pc_val - nice;
906 
907 		error = CL_DONICE(tp, CRED(), inc, &inc);
908 	}
909 
910 	return (error);
911 }
912 
913 int
914 setprocnice(proc_t *pp, pcnice_t *pcnice)
915 {
916 	kthread_t *tp;
917 	int retval = 0;
918 	int error = 0;
919 
920 	ASSERT(MUTEX_HELD(&pidlock));
921 	mutex_enter(&pp->p_lock);
922 
923 	if ((tp = pp->p_tlist) == NULL) {
924 		mutex_exit(&pp->p_lock);
925 		return (ESRCH);
926 	}
927 
928 	/*
929 	 * Check permissions before changing the nice value.
930 	 */
931 	if (pcnice->pc_op == PC_SETNICE) {
932 		if (!prochasprocperm(pp, curproc, CRED())) {
933 			mutex_exit(&pp->p_lock);
934 			return (EPERM);
935 		}
936 	}
937 
938 	do {
939 		error = setthreadnice(pcnice, tp);
940 		if (error)
941 			retval = error;
942 	} while ((tp = tp->t_forw) != pp->p_tlist);
943 
944 	mutex_exit(&pp->p_lock);
945 	return (retval);
946 }
947 
948 /*
949  * Update the nice value of the specified LWP or set of processes.
950  */
951 static int
952 donice(procset_t *procset, pcnice_t *pcnice)
953 {
954 	int err_proc = 0;
955 	int err_thread = 0;
956 	int err = 0;
957 
958 	/*
959 	 * Sanity check.
960 	 */
961 	if (pcnice->pc_op != PC_GETNICE && pcnice->pc_op != PC_SETNICE)
962 		return (EINVAL);
963 
964 	/*
965 	 * If it is PC_GETNICE operation then set pc_val to the largest
966 	 * possible nice value to help us find the lowest nice value
967 	 * pertaining to any of the specified processes.
968 	 */
969 	if (pcnice->pc_op == PC_GETNICE)
970 		pcnice->pc_val = NZERO;
971 
972 	if (procset->p_lidtype != P_LWPID ||
973 	    procset->p_ridtype != P_LWPID)
974 		err_proc = dotoprocs(procset, setprocnice, (char *)pcnice);
975 
976 	if (procset->p_lidtype == P_LWPID || procset->p_ridtype == P_LWPID) {
977 		err_thread = dotolwp(procset, setthreadnice, (char *)pcnice);
978 		/*
979 		 * dotolwp() can return with p_lock held.  This is required
980 		 * for the priocntl GETPARMS case.  So, here we just release
981 		 * the p_lock.
982 		 */
983 		if (MUTEX_HELD(&curproc->p_lock))
984 			mutex_exit(&curproc->p_lock);
985 
986 		/*
987 		 * If we were called for a single LWP, then ignore ESRCH
988 		 * returned by the previous dotoprocs() call.
989 		 */
990 		if (err_proc == ESRCH)
991 			err_proc = 0;
992 	}
993 
994 	/*
995 	 * dotoprocs() ignores the init process if it is in the set, unless
996 	 * it was the only process found. We want to make sure init is not
997 	 * excluded if we're going PC_GETNICE operation.
998 	 */
999 	if (pcnice->pc_op == PC_GETNICE) {
1000 		proc_t *initpp;
1001 
1002 		mutex_enter(&pidlock);
1003 		initpp = prfind(P_INITPID);
1004 		if (initpp != NULL && procinset(initpp, procset))
1005 			err = setprocnice(initpp, pcnice);
1006 		mutex_exit(&pidlock);
1007 	}
1008 
1009 	/*
1010 	 * We're returning the latest error here that we've got back from
1011 	 * the setthreadnice() or setprocnice(). That is, err_thread and/or
1012 	 * err_proc can be replaced by err.
1013 	 */
1014 	if (!err)
1015 		err = err_thread ? err_thread : err_proc;
1016 
1017 	return (err);
1018 }
1019