xref: /titanic_52/usr/src/uts/common/disp/rt.c (revision 24d819e6779cf7765ce55eee9cf6f0a8fcea4e30)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  * Copyright 2013 Joyent, Inc.  All rights reserved.
26  */
27 
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved  	*/
30 
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/sysmacros.h>
34 #include <sys/cred.h>
35 #include <sys/proc.h>
36 #include <sys/pcb.h>
37 #include <sys/signal.h>
38 #include <sys/user.h>
39 #include <sys/priocntl.h>
40 #include <sys/class.h>
41 #include <sys/disp.h>
42 #include <sys/procset.h>
43 #include <sys/cmn_err.h>
44 #include <sys/debug.h>
45 #include <sys/rt.h>
46 #include <sys/rtpriocntl.h>
47 #include <sys/kmem.h>
48 #include <sys/systm.h>
49 #include <sys/schedctl.h>
50 #include <sys/errno.h>
51 #include <sys/cpuvar.h>
52 #include <sys/vmsystm.h>
53 #include <sys/time.h>
54 #include <sys/policy.h>
55 #include <sys/sdt.h>
56 #include <sys/cpupart.h>
57 #include <sys/modctl.h>
58 
59 static pri_t	rt_init(id_t, int, classfuncs_t **);
60 
61 static struct sclass csw = {
62 	"RT",
63 	rt_init,
64 	0
65 };
66 
67 static struct modlsched modlsched = {
68 	&mod_schedops, "realtime scheduling class", &csw
69 };
70 
71 static struct modlinkage modlinkage = {
72 	MODREV_1, (void *)&modlsched, NULL
73 };
74 
75 int
76 _init()
77 {
78 	return (mod_install(&modlinkage));
79 }
80 
81 int
82 _fini()
83 {
84 	return (EBUSY);		/* don't remove RT for now */
85 }
86 
87 int
88 _info(struct modinfo *modinfop)
89 {
90 	return (mod_info(&modlinkage, modinfop));
91 }
92 
93 
94 /*
95  * Class specific code for the real-time class
96  */
97 
98 /*
99  * Extern declarations for variables defined in the rt master file
100  */
101 #define	RTMAXPRI 59
102 
103 pri_t rt_maxpri = RTMAXPRI;	/* maximum real-time priority */
104 rtdpent_t *rt_dptbl;	  /* real-time dispatcher parameter table */
105 
106 /*
107  * control flags (kparms->rt_cflags).
108  */
109 #define	RT_DOPRI	0x01	/* change priority */
110 #define	RT_DOTQ		0x02	/* change RT time quantum */
111 #define	RT_DOSIG	0x04	/* change RT time quantum signal */
112 
113 static int	rt_admin(caddr_t, cred_t *);
114 static int	rt_enterclass(kthread_t *, id_t, void *, cred_t *, void *);
115 static int	rt_fork(kthread_t *, kthread_t *, void *);
116 static int	rt_getclinfo(void *);
117 static int	rt_getclpri(pcpri_t *);
118 static int	rt_parmsin(void *);
119 static int	rt_parmsout(void *, pc_vaparms_t *);
120 static int	rt_vaparmsin(void *, pc_vaparms_t *);
121 static int	rt_vaparmsout(void *, pc_vaparms_t *);
122 static int	rt_parmsset(kthread_t *, void *, id_t, cred_t *);
123 static int	rt_donice(kthread_t *, cred_t *, int, int *);
124 static int	rt_doprio(kthread_t *, cred_t *, int, int *);
125 static void	rt_exitclass(void *);
126 static int	rt_canexit(kthread_t *, cred_t *);
127 static void	rt_forkret(kthread_t *, kthread_t *);
128 static void	rt_nullsys();
129 static void	rt_parmsget(kthread_t *, void *);
130 static void	rt_preempt(kthread_t *);
131 static void	rt_setrun(kthread_t *);
132 static void	rt_tick(kthread_t *);
133 static void	rt_wakeup(kthread_t *);
134 static pri_t	rt_swapin(kthread_t *, int);
135 static pri_t	rt_swapout(kthread_t *, int);
136 static pri_t	rt_globpri(kthread_t *);
137 static void	rt_yield(kthread_t *);
138 static int	rt_alloc(void **, int);
139 static void	rt_free(void *);
140 
141 static void	rt_change_priority(kthread_t *, rtproc_t *);
142 
143 static id_t	rt_cid;		/* real-time class ID */
144 static rtproc_t	rt_plisthead;	/* dummy rtproc at head of rtproc list */
145 static kmutex_t	rt_dptblock;	/* protects realtime dispatch table */
146 static kmutex_t	rt_list_lock;	/* protects RT thread list */
147 
148 extern rtdpent_t *rt_getdptbl(void);
149 
150 static struct classfuncs rt_classfuncs = {
151 	/* class ops */
152 	rt_admin,
153 	rt_getclinfo,
154 	rt_parmsin,
155 	rt_parmsout,
156 	rt_vaparmsin,
157 	rt_vaparmsout,
158 	rt_getclpri,
159 	rt_alloc,
160 	rt_free,
161 	/* thread ops */
162 	rt_enterclass,
163 	rt_exitclass,
164 	rt_canexit,
165 	rt_fork,
166 	rt_forkret,
167 	rt_parmsget,
168 	rt_parmsset,
169 	rt_nullsys,	/* stop */
170 	rt_nullsys,	/* exit */
171 	rt_nullsys,	/* active */
172 	rt_nullsys,	/* inactive */
173 	rt_swapin,
174 	rt_swapout,
175 	rt_nullsys,	/* trapret */
176 	rt_preempt,
177 	rt_setrun,
178 	rt_nullsys,	/* sleep */
179 	rt_tick,
180 	rt_wakeup,
181 	rt_donice,
182 	rt_globpri,
183 	rt_nullsys,	/* set_process_group */
184 	rt_yield,
185 	rt_doprio,
186 };
187 
188 /*
189  * Real-time class initialization. Called by dispinit() at boot time.
190  * We can ignore the clparmsz argument since we know that the smallest
191  * possible parameter buffer is big enough for us.
192  */
193 /* ARGSUSED */
194 pri_t
195 rt_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp)
196 {
197 	rt_dptbl = rt_getdptbl();
198 	rt_cid = cid;	/* Record our class ID */
199 
200 	/*
201 	 * Initialize the rtproc list.
202 	 */
203 	rt_plisthead.rt_next = rt_plisthead.rt_prev = &rt_plisthead;
204 
205 	/*
206 	 * We're required to return a pointer to our classfuncs
207 	 * structure and the highest global priority value we use.
208 	 */
209 	*clfuncspp = &rt_classfuncs;
210 	mutex_init(&rt_dptblock, NULL, MUTEX_DEFAULT, NULL);
211 	mutex_init(&rt_list_lock, NULL, MUTEX_DEFAULT, NULL);
212 	return (rt_dptbl[rt_maxpri].rt_globpri);
213 }
214 
215 /*
216  * Get or reset the rt_dptbl values per the user's request.
217  */
218 /* ARGSUSED */
219 static int
220 rt_admin(caddr_t uaddr, cred_t *reqpcredp)
221 {
222 	rtadmin_t	rtadmin;
223 	rtdpent_t	*tmpdpp;
224 	size_t		userdpsz;
225 	size_t		rtdpsz;
226 	int		i;
227 
228 	if (get_udatamodel() == DATAMODEL_NATIVE) {
229 		if (copyin(uaddr, &rtadmin, sizeof (rtadmin_t)))
230 			return (EFAULT);
231 	}
232 #ifdef _SYSCALL32_IMPL
233 	else {
234 		/* rtadmin struct from ILP32 callers */
235 		rtadmin32_t rtadmin32;
236 		if (copyin(uaddr, &rtadmin32, sizeof (rtadmin32_t)))
237 			return (EFAULT);
238 		rtadmin.rt_dpents =
239 		    (struct rtdpent *)(uintptr_t)rtadmin32.rt_dpents;
240 		rtadmin.rt_ndpents = rtadmin32.rt_ndpents;
241 		rtadmin.rt_cmd = rtadmin32.rt_cmd;
242 	}
243 #endif /* _SYSCALL32_IMPL */
244 
245 	rtdpsz = (rt_maxpri + 1) * sizeof (rtdpent_t);
246 
247 	switch (rtadmin.rt_cmd) {
248 
249 	case RT_GETDPSIZE:
250 		rtadmin.rt_ndpents = rt_maxpri + 1;
251 
252 		if (get_udatamodel() == DATAMODEL_NATIVE) {
253 			if (copyout(&rtadmin, uaddr, sizeof (rtadmin_t)))
254 				return (EFAULT);
255 		}
256 #ifdef _SYSCALL32_IMPL
257 		else {
258 			/* return rtadmin struct to ILP32 callers */
259 			rtadmin32_t rtadmin32;
260 			rtadmin32.rt_dpents =
261 			    (caddr32_t)(uintptr_t)rtadmin.rt_dpents;
262 			rtadmin32.rt_ndpents = rtadmin.rt_ndpents;
263 			rtadmin32.rt_cmd = rtadmin.rt_cmd;
264 			if (copyout(&rtadmin32, uaddr, sizeof (rtadmin32_t)))
265 				return (EFAULT);
266 		}
267 #endif /* _SYSCALL32_IMPL */
268 
269 		break;
270 
271 	case RT_GETDPTBL:
272 		userdpsz = MIN(rtadmin.rt_ndpents * sizeof (rtdpent_t),
273 		    rtdpsz);
274 		if (copyout(rt_dptbl, rtadmin.rt_dpents, userdpsz))
275 			return (EFAULT);
276 		rtadmin.rt_ndpents = userdpsz / sizeof (rtdpent_t);
277 
278 		if (get_udatamodel() == DATAMODEL_NATIVE) {
279 			if (copyout(&rtadmin, uaddr, sizeof (rtadmin_t)))
280 				return (EFAULT);
281 		}
282 #ifdef _SYSCALL32_IMPL
283 		else {
284 			/* return rtadmin struct to ILP32 callers */
285 			rtadmin32_t rtadmin32;
286 			rtadmin32.rt_dpents =
287 			    (caddr32_t)(uintptr_t)rtadmin.rt_dpents;
288 			rtadmin32.rt_ndpents = rtadmin.rt_ndpents;
289 			rtadmin32.rt_cmd = rtadmin.rt_cmd;
290 			if (copyout(&rtadmin32, uaddr, sizeof (rtadmin32_t)))
291 				return (EFAULT);
292 		}
293 #endif /* _SYSCALL32_IMPL */
294 		break;
295 
296 	case RT_SETDPTBL:
297 		/*
298 		 * We require that the requesting process has sufficient
299 		 * priveleges.  We also require that the table supplied by
300 		 * the user exactly match the current rt_dptbl in size.
301 		 */
302 		if (secpolicy_dispadm(reqpcredp) != 0)
303 			return (EPERM);
304 		if (rtadmin.rt_ndpents * sizeof (rtdpent_t) != rtdpsz)
305 			return (EINVAL);
306 
307 		/*
308 		 * We read the user supplied table into a temporary buffer
309 		 * where the time quantum values are validated before
310 		 * being copied to the rt_dptbl.
311 		 */
312 		tmpdpp = kmem_alloc(rtdpsz, KM_SLEEP);
313 		if (copyin(rtadmin.rt_dpents, tmpdpp, rtdpsz)) {
314 			kmem_free(tmpdpp, rtdpsz);
315 			return (EFAULT);
316 		}
317 		for (i = 0; i < rtadmin.rt_ndpents; i++) {
318 
319 			/*
320 			 * Validate the user supplied time quantum values.
321 			 */
322 			if (tmpdpp[i].rt_quantum <= 0 &&
323 			    tmpdpp[i].rt_quantum != RT_TQINF) {
324 				kmem_free(tmpdpp, rtdpsz);
325 				return (EINVAL);
326 			}
327 		}
328 
329 		/*
330 		 * Copy the user supplied values over the current rt_dptbl
331 		 * values.  The rt_globpri member is read-only so we don't
332 		 * overwrite it.
333 		 */
334 		mutex_enter(&rt_dptblock);
335 		for (i = 0; i < rtadmin.rt_ndpents; i++)
336 			rt_dptbl[i].rt_quantum = tmpdpp[i].rt_quantum;
337 		mutex_exit(&rt_dptblock);
338 		kmem_free(tmpdpp, rtdpsz);
339 		break;
340 
341 	default:
342 		return (EINVAL);
343 	}
344 	return (0);
345 }
346 
347 
348 /*
349  * Allocate a real-time class specific proc structure and
350  * initialize it with the parameters supplied. Also move thread
351  * to specified real-time priority.
352  */
353 /* ARGSUSED */
354 static int
355 rt_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp,
356     void *bufp)
357 {
358 	rtkparms_t *rtkparmsp = (rtkparms_t *)parmsp;
359 	rtproc_t *rtpp;
360 
361 	/*
362 	 * For a thread to enter the real-time class the thread
363 	 * which initiates the request must be privileged.
364 	 * This may have been checked previously but if our
365 	 * caller passed us a credential structure we assume it
366 	 * hasn't and we check it here.
367 	 */
368 	if (reqpcredp != NULL && secpolicy_setpriority(reqpcredp) != 0)
369 		return (EPERM);
370 
371 	rtpp = (rtproc_t *)bufp;
372 	ASSERT(rtpp != NULL);
373 
374 	/*
375 	 * If this thread's lwp is swapped out, it will be brought in
376 	 * when it is put onto the runqueue.
377 	 *
378 	 * Now, Initialize the rtproc structure.
379 	 */
380 	if (rtkparmsp == NULL) {
381 		/*
382 		 * Use default values
383 		 */
384 		rtpp->rt_pri = 0;
385 		rtpp->rt_pquantum = rt_dptbl[0].rt_quantum;
386 		rtpp->rt_tqsignal = 0;
387 	} else {
388 		/*
389 		 * Use supplied values
390 		 */
391 		if ((rtkparmsp->rt_cflags & RT_DOPRI) == 0)
392 			rtpp->rt_pri = 0;
393 		else
394 			rtpp->rt_pri = rtkparmsp->rt_pri;
395 
396 		if (rtkparmsp->rt_tqntm == RT_TQINF)
397 			rtpp->rt_pquantum = RT_TQINF;
398 		else if (rtkparmsp->rt_tqntm == RT_TQDEF ||
399 		    (rtkparmsp->rt_cflags & RT_DOTQ) == 0)
400 			rtpp->rt_pquantum = rt_dptbl[rtpp->rt_pri].rt_quantum;
401 		else
402 			rtpp->rt_pquantum = rtkparmsp->rt_tqntm;
403 
404 		if ((rtkparmsp->rt_cflags & RT_DOSIG) == 0)
405 			rtpp->rt_tqsignal = 0;
406 		else
407 			rtpp->rt_tqsignal = rtkparmsp->rt_tqsig;
408 	}
409 	rtpp->rt_flags = 0;
410 	rtpp->rt_tp = t;
411 	/*
412 	 * Reset thread priority
413 	 */
414 	thread_lock(t);
415 	t->t_clfuncs = &(sclass[cid].cl_funcs->thread);
416 	t->t_cid = cid;
417 	t->t_cldata = (void *)rtpp;
418 	t->t_schedflag &= ~TS_RUNQMATCH;
419 	rt_change_priority(t, rtpp);
420 	thread_unlock(t);
421 	/*
422 	 * Link new structure into rtproc list
423 	 */
424 	mutex_enter(&rt_list_lock);
425 	rtpp->rt_next = rt_plisthead.rt_next;
426 	rtpp->rt_prev = &rt_plisthead;
427 	rt_plisthead.rt_next->rt_prev = rtpp;
428 	rt_plisthead.rt_next = rtpp;
429 	mutex_exit(&rt_list_lock);
430 	return (0);
431 }
432 
433 
434 /*
435  * Free rtproc structure of thread.
436  */
437 static void
438 rt_exitclass(void *procp)
439 {
440 	rtproc_t *rtprocp = (rtproc_t *)procp;
441 
442 	mutex_enter(&rt_list_lock);
443 	rtprocp->rt_prev->rt_next = rtprocp->rt_next;
444 	rtprocp->rt_next->rt_prev = rtprocp->rt_prev;
445 	mutex_exit(&rt_list_lock);
446 	kmem_free(rtprocp, sizeof (rtproc_t));
447 }
448 
449 
450 /*
451  * Allocate and initialize real-time class specific
452  * proc structure for child.
453  */
454 /* ARGSUSED */
455 static int
456 rt_fork(kthread_t *t, kthread_t *ct, void *bufp)
457 {
458 	rtproc_t *prtpp;
459 	rtproc_t *crtpp;
460 
461 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
462 
463 	/*
464 	 * Initialize child's rtproc structure
465 	 */
466 	crtpp = (rtproc_t *)bufp;
467 	ASSERT(crtpp != NULL);
468 	prtpp = (rtproc_t *)t->t_cldata;
469 	thread_lock(t);
470 	crtpp->rt_timeleft = crtpp->rt_pquantum = prtpp->rt_pquantum;
471 	crtpp->rt_pri = prtpp->rt_pri;
472 	crtpp->rt_flags = prtpp->rt_flags & ~RTBACKQ;
473 	crtpp->rt_tqsignal = prtpp->rt_tqsignal;
474 
475 	crtpp->rt_tp = ct;
476 	thread_unlock(t);
477 
478 	/*
479 	 * Link new structure into rtproc list
480 	 */
481 	ct->t_cldata = (void *)crtpp;
482 	mutex_enter(&rt_list_lock);
483 	crtpp->rt_next = rt_plisthead.rt_next;
484 	crtpp->rt_prev = &rt_plisthead;
485 	rt_plisthead.rt_next->rt_prev = crtpp;
486 	rt_plisthead.rt_next = crtpp;
487 	mutex_exit(&rt_list_lock);
488 	return (0);
489 }
490 
491 
492 /*
493  * The child goes to the back of its dispatcher queue while the
494  * parent continues to run after a real time thread forks.
495  */
496 /* ARGSUSED */
497 static void
498 rt_forkret(kthread_t *t, kthread_t *ct)
499 {
500 	proc_t *pp = ttoproc(t);
501 	proc_t *cp = ttoproc(ct);
502 
503 	ASSERT(t == curthread);
504 	ASSERT(MUTEX_HELD(&pidlock));
505 
506 	/*
507 	 * Grab the child's p_lock before dropping pidlock to ensure
508 	 * the process does not disappear before we set it running.
509 	 */
510 	mutex_enter(&cp->p_lock);
511 	mutex_exit(&pidlock);
512 	continuelwps(cp);
513 	mutex_exit(&cp->p_lock);
514 
515 	mutex_enter(&pp->p_lock);
516 	continuelwps(pp);
517 	mutex_exit(&pp->p_lock);
518 }
519 
520 
521 /*
522  * Get information about the real-time class into the buffer
523  * pointed to by rtinfop.  The maximum configured real-time
524  * priority is the only information we supply.  We ignore the
525  * class and credential arguments because anyone can have this
526  * information.
527  */
528 /* ARGSUSED */
529 static int
530 rt_getclinfo(void *infop)
531 {
532 	rtinfo_t *rtinfop = (rtinfo_t *)infop;
533 	rtinfop->rt_maxpri = rt_maxpri;
534 	return (0);
535 }
536 
537 /*
538  * Return the user mode scheduling priority range.
539  */
540 static int
541 rt_getclpri(pcpri_t *pcprip)
542 {
543 	pcprip->pc_clpmax = rt_maxpri;
544 	pcprip->pc_clpmin = 0;
545 	return (0);
546 }
547 
548 static void
549 rt_nullsys()
550 {
551 }
552 
553 /* ARGSUSED */
554 static int
555 rt_canexit(kthread_t *t, cred_t *cred)
556 {
557 	/*
558 	 * Thread can always leave RT class
559 	 */
560 	return (0);
561 }
562 
563 /*
564  * Get the real-time scheduling parameters of the thread pointed to by
565  * rtprocp into the buffer pointed to by rtkparmsp.
566  */
567 static void
568 rt_parmsget(kthread_t *t, void *parmsp)
569 {
570 	rtproc_t	*rtprocp = (rtproc_t *)t->t_cldata;
571 	rtkparms_t	*rtkparmsp = (rtkparms_t *)parmsp;
572 
573 	rtkparmsp->rt_pri = rtprocp->rt_pri;
574 	rtkparmsp->rt_tqntm = rtprocp->rt_pquantum;
575 	rtkparmsp->rt_tqsig = rtprocp->rt_tqsignal;
576 }
577 
578 
579 
580 /*
581  * Check the validity of the real-time parameters in the buffer
582  * pointed to by rtprmsp.
583  * We convert the rtparms buffer from the user supplied format to
584  * our internal format (i.e. time quantum expressed in ticks).
585  */
586 static int
587 rt_parmsin(void *prmsp)
588 {
589 	rtparms_t *rtprmsp = (rtparms_t *)prmsp;
590 	longlong_t	ticks;
591 	uint_t		cflags;
592 
593 	/*
594 	 * First check the validity of parameters and convert
595 	 * the buffer to kernel format.
596 	 */
597 	if ((rtprmsp->rt_pri < 0 || rtprmsp->rt_pri > rt_maxpri) &&
598 	    rtprmsp->rt_pri != RT_NOCHANGE)
599 		return (EINVAL);
600 
601 	cflags = (rtprmsp->rt_pri != RT_NOCHANGE ? RT_DOPRI : 0);
602 
603 	if ((rtprmsp->rt_tqsecs == 0 && rtprmsp->rt_tqnsecs == 0) ||
604 	    rtprmsp->rt_tqnsecs >= NANOSEC)
605 		return (EINVAL);
606 
607 	if (rtprmsp->rt_tqnsecs != RT_NOCHANGE)
608 		cflags |= RT_DOTQ;
609 
610 	if (rtprmsp->rt_tqnsecs >= 0) {
611 		if ((ticks = SEC_TO_TICK((longlong_t)rtprmsp->rt_tqsecs) +
612 		    NSEC_TO_TICK_ROUNDUP(rtprmsp->rt_tqnsecs)) > INT_MAX)
613 			return (ERANGE);
614 
615 		((rtkparms_t *)rtprmsp)->rt_tqntm = (int)ticks;
616 	} else {
617 		if (rtprmsp->rt_tqnsecs != RT_NOCHANGE &&
618 		    rtprmsp->rt_tqnsecs != RT_TQINF &&
619 		    rtprmsp->rt_tqnsecs != RT_TQDEF)
620 			return (EINVAL);
621 
622 		((rtkparms_t *)rtprmsp)->rt_tqntm = rtprmsp->rt_tqnsecs;
623 	}
624 	((rtkparms_t *)rtprmsp)->rt_cflags = cflags;
625 
626 	return (0);
627 }
628 
629 
630 /*
631  * Check the validity of the real-time parameters in the pc_vaparms_t
632  * structure vaparmsp and put them in the buffer pointed to by rtprmsp.
633  * pc_vaparms_t contains (key, value) pairs of parameter.
634  * rt_vaparmsin() is the variable parameter version of rt_parmsin().
635  */
636 static int
637 rt_vaparmsin(void *prmsp, pc_vaparms_t *vaparmsp)
638 {
639 	uint_t		secs = 0;
640 	uint_t		cnt;
641 	int		nsecs = 0;
642 	int		priflag, secflag, nsecflag, sigflag;
643 	longlong_t	ticks;
644 	rtkparms_t	*rtprmsp = (rtkparms_t *)prmsp;
645 	pc_vaparm_t	*vpp = &vaparmsp->pc_parms[0];
646 
647 
648 	/*
649 	 * First check the validity of parameters and convert them
650 	 * from the user supplied format to the internal format.
651 	 */
652 	priflag = secflag = nsecflag = sigflag = 0;
653 	rtprmsp->rt_cflags = 0;
654 
655 	if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
656 		return (EINVAL);
657 
658 	for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
659 
660 		switch (vpp->pc_key) {
661 		case RT_KY_PRI:
662 			if (priflag++)
663 				return (EINVAL);
664 			rtprmsp->rt_cflags |= RT_DOPRI;
665 			rtprmsp->rt_pri = (pri_t)vpp->pc_parm;
666 			if (rtprmsp->rt_pri < 0 || rtprmsp->rt_pri > rt_maxpri)
667 				return (EINVAL);
668 			break;
669 
670 		case RT_KY_TQSECS:
671 			if (secflag++)
672 				return (EINVAL);
673 			rtprmsp->rt_cflags |= RT_DOTQ;
674 			secs = (uint_t)vpp->pc_parm;
675 			break;
676 
677 		case RT_KY_TQNSECS:
678 			if (nsecflag++)
679 				return (EINVAL);
680 			rtprmsp->rt_cflags |= RT_DOTQ;
681 			nsecs = (int)vpp->pc_parm;
682 			break;
683 
684 		case RT_KY_TQSIG:
685 			if (sigflag++)
686 				return (EINVAL);
687 			rtprmsp->rt_cflags |= RT_DOSIG;
688 			rtprmsp->rt_tqsig = (int)vpp->pc_parm;
689 			if (rtprmsp->rt_tqsig < 0 || rtprmsp->rt_tqsig >= NSIG)
690 				return (EINVAL);
691 			break;
692 
693 		default:
694 			return (EINVAL);
695 		}
696 	}
697 
698 	if (vaparmsp->pc_vaparmscnt == 0) {
699 		/*
700 		 * Use default parameters.
701 		 */
702 		rtprmsp->rt_pri = 0;
703 		rtprmsp->rt_tqntm = RT_TQDEF;
704 		rtprmsp->rt_tqsig = 0;
705 		rtprmsp->rt_cflags = RT_DOPRI | RT_DOTQ | RT_DOSIG;
706 	} else if ((rtprmsp->rt_cflags & RT_DOTQ) != 0) {
707 		if ((secs == 0 && nsecs == 0) || nsecs >= NANOSEC)
708 			return (EINVAL);
709 
710 		if (nsecs >= 0) {
711 			if ((ticks = SEC_TO_TICK((longlong_t)secs) +
712 			    NSEC_TO_TICK_ROUNDUP(nsecs)) > INT_MAX)
713 				return (ERANGE);
714 
715 			rtprmsp->rt_tqntm = (int)ticks;
716 		} else {
717 			if (nsecs != RT_TQINF && nsecs != RT_TQDEF)
718 				return (EINVAL);
719 			rtprmsp->rt_tqntm = nsecs;
720 		}
721 	}
722 
723 	return (0);
724 }
725 
726 /*
727  * Do required processing on the real-time parameter buffer
728  * before it is copied out to the user.
729  * All we have to do is convert the buffer from kernel to user format
730  * (i.e. convert time quantum from ticks to seconds-nanoseconds).
731  */
732 /* ARGSUSED */
733 static int
734 rt_parmsout(void *prmsp, pc_vaparms_t *vaparmsp)
735 {
736 	rtkparms_t	*rtkprmsp = (rtkparms_t *)prmsp;
737 
738 	if (vaparmsp != NULL)
739 		return (0);
740 
741 	if (rtkprmsp->rt_tqntm < 0) {
742 		/*
743 		 * Quantum field set to special value (e.g. RT_TQINF)
744 		 */
745 		((rtparms_t *)rtkprmsp)->rt_tqnsecs = rtkprmsp->rt_tqntm;
746 		((rtparms_t *)rtkprmsp)->rt_tqsecs = 0;
747 	} else {
748 		/* Convert quantum from ticks to seconds-nanoseconds */
749 
750 		timestruc_t ts;
751 		TICK_TO_TIMESTRUC(rtkprmsp->rt_tqntm, &ts);
752 		((rtparms_t *)rtkprmsp)->rt_tqsecs = ts.tv_sec;
753 		((rtparms_t *)rtkprmsp)->rt_tqnsecs = ts.tv_nsec;
754 	}
755 
756 	return (0);
757 }
758 
759 
760 /*
761  * Copy all selected real-time class parameters to the user.
762  * The parameters are specified by a key.
763  */
764 static int
765 rt_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp)
766 {
767 	rtkparms_t	*rtkprmsp = (rtkparms_t *)prmsp;
768 	timestruc_t	ts;
769 	uint_t		cnt;
770 	uint_t		secs;
771 	int		nsecs;
772 	int		priflag, secflag, nsecflag, sigflag;
773 	pc_vaparm_t	*vpp = &vaparmsp->pc_parms[0];
774 
775 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
776 
777 	priflag = secflag = nsecflag = sigflag = 0;
778 
779 	if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
780 		return (EINVAL);
781 
782 	if (rtkprmsp->rt_tqntm < 0) {
783 		/*
784 		 * Quantum field set to special value (e.g. RT_TQINF).
785 		 */
786 		secs = 0;
787 		nsecs = rtkprmsp->rt_tqntm;
788 	} else {
789 		/*
790 		 * Convert quantum from ticks to seconds-nanoseconds.
791 		 */
792 		TICK_TO_TIMESTRUC(rtkprmsp->rt_tqntm, &ts);
793 		secs = ts.tv_sec;
794 		nsecs = ts.tv_nsec;
795 	}
796 
797 
798 	for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
799 
800 		switch (vpp->pc_key) {
801 		case RT_KY_PRI:
802 			if (priflag++)
803 				return (EINVAL);
804 			if (copyout(&rtkprmsp->rt_pri,
805 			    (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
806 				return (EFAULT);
807 			break;
808 
809 		case RT_KY_TQSECS:
810 			if (secflag++)
811 				return (EINVAL);
812 			if (copyout(&secs, (caddr_t)(uintptr_t)vpp->pc_parm,
813 			    sizeof (uint_t)))
814 				return (EFAULT);
815 			break;
816 
817 		case RT_KY_TQNSECS:
818 			if (nsecflag++)
819 				return (EINVAL);
820 			if (copyout(&nsecs, (caddr_t)(uintptr_t)vpp->pc_parm,
821 			    sizeof (int)))
822 				return (EFAULT);
823 			break;
824 
825 		case RT_KY_TQSIG:
826 			if (sigflag++)
827 				return (EINVAL);
828 			if (copyout(&rtkprmsp->rt_tqsig,
829 			    (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (int)))
830 				return (EFAULT);
831 			break;
832 
833 		default:
834 			return (EINVAL);
835 		}
836 	}
837 
838 	return (0);
839 }
840 
841 
842 /*
843  * Set the scheduling parameters of the thread pointed to by rtprocp
844  * to those specified in the buffer pointed to by rtkprmsp.
845  * Note that the parameters are expected to be in kernel format
846  * (i.e. time quantm expressed in ticks).  Real time parameters copied
847  * in from the user should be processed by rt_parmsin() before they are
848  * passed to this function.
849  */
850 static int
851 rt_parmsset(kthread_t *tx, void *prmsp, id_t reqpcid, cred_t *reqpcredp)
852 {
853 	rtkparms_t *rtkprmsp = (rtkparms_t *)prmsp;
854 	rtproc_t *rtpp = (rtproc_t *)tx->t_cldata;
855 
856 	ASSERT(MUTEX_HELD(&(ttoproc(tx))->p_lock));
857 
858 	/*
859 	 * Basic permissions enforced by generic kernel code
860 	 * for all classes require that a thread attempting
861 	 * to change the scheduling parameters of a target thread
862 	 * be privileged or have a real or effective UID
863 	 * matching that of the target thread. We are not
864 	 * called unless these basic permission checks have
865 	 * already passed. The real-time class requires in addition
866 	 * that the requesting thread be real-time unless it is privileged.
867 	 * This may also have been checked previously but if our caller
868 	 * passes us a credential structure we assume it hasn't and
869 	 * we check it here.
870 	 */
871 	if (reqpcredp != NULL && reqpcid != rt_cid &&
872 	    secpolicy_raisepriority(reqpcredp) != 0)
873 		return (EPERM);
874 
875 	thread_lock(tx);
876 	if ((rtkprmsp->rt_cflags & RT_DOPRI) != 0) {
877 		rtpp->rt_pri = rtkprmsp->rt_pri;
878 		rt_change_priority(tx, rtpp);
879 	}
880 	if (rtkprmsp->rt_tqntm == RT_TQINF)
881 		rtpp->rt_pquantum = RT_TQINF;
882 	else if (rtkprmsp->rt_tqntm == RT_TQDEF)
883 		rtpp->rt_timeleft = rtpp->rt_pquantum =
884 		    rt_dptbl[rtpp->rt_pri].rt_quantum;
885 	else if ((rtkprmsp->rt_cflags & RT_DOTQ) != 0)
886 		rtpp->rt_timeleft = rtpp->rt_pquantum = rtkprmsp->rt_tqntm;
887 
888 	if ((rtkprmsp->rt_cflags & RT_DOSIG) != 0)
889 		rtpp->rt_tqsignal = rtkprmsp->rt_tqsig;
890 
891 	thread_unlock(tx);
892 	return (0);
893 }
894 
895 
896 /*
897  * Arrange for thread to be placed in appropriate location
898  * on dispatcher queue.  Runs at splhi() since the clock
899  * interrupt can cause RTBACKQ to be set.
900  */
901 static void
902 rt_preempt(kthread_t *t)
903 {
904 	rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
905 	klwp_t *lwp;
906 
907 	ASSERT(THREAD_LOCK_HELD(t));
908 
909 	/*
910 	 * If the state is user I allow swapping because I know I won't
911 	 * be holding any locks.
912 	 */
913 	if ((lwp = curthread->t_lwp) != NULL && lwp->lwp_state == LWP_USER)
914 		t->t_schedflag &= ~TS_DONT_SWAP;
915 	if ((rtpp->rt_flags & RTBACKQ) != 0) {
916 		rtpp->rt_timeleft = rtpp->rt_pquantum;
917 		rtpp->rt_flags &= ~RTBACKQ;
918 		setbackdq(t);
919 	} else
920 		setfrontdq(t);
921 
922 }
923 
924 /*
925  * Return the global priority associated with this rt_pri.
926  */
927 static pri_t
928 rt_globpri(kthread_t *t)
929 {
930 	rtproc_t *rtprocp = (rtproc_t *)t->t_cldata;
931 	return (rt_dptbl[rtprocp->rt_pri].rt_globpri);
932 }
933 
934 static void
935 rt_setrun(kthread_t *t)
936 {
937 	rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
938 
939 	ASSERT(THREAD_LOCK_HELD(t));
940 
941 	rtpp->rt_timeleft = rtpp->rt_pquantum;
942 	rtpp->rt_flags &= ~RTBACKQ;
943 	setbackdq(t);
944 }
945 
946 /*
947  * Returns the priority of the thread, -1 if the thread is loaded or ineligible
948  * for swapin.
949  *
950  * FX and RT threads are designed so that they don't swapout; however, it
951  * is possible that while the thread is swapped out and in another class, it
952  * can be changed to FX or RT.  Since these threads should be swapped in as
953  * soon as they're runnable, rt_swapin returns SHRT_MAX, and fx_swapin
954  * returns SHRT_MAX - 1, so that it gives deference to any swapped out RT
955  * threads.
956  */
957 /* ARGSUSED */
958 static pri_t
959 rt_swapin(kthread_t *t, int flags)
960 {
961 	pri_t	tpri = -1;
962 
963 	ASSERT(THREAD_LOCK_HELD(t));
964 
965 	if (t->t_state == TS_RUN && (t->t_schedflag & TS_LOAD) == 0) {
966 		tpri = (pri_t)SHRT_MAX;
967 	}
968 
969 	return (tpri);
970 }
971 
972 /*
973  * Return an effective priority for swapout.
974  */
975 /* ARGSUSED */
976 static pri_t
977 rt_swapout(kthread_t *t, int flags)
978 {
979 	ASSERT(THREAD_LOCK_HELD(t));
980 
981 	return (-1);
982 }
983 
984 /*
985  * Check for time slice expiration (unless thread has infinite time
986  * slice).  If time slice has expired arrange for thread to be preempted
987  * and placed on back of queue.
988  */
989 static void
990 rt_tick(kthread_t *t)
991 {
992 	rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
993 
994 	ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
995 
996 	thread_lock(t);
997 	if ((rtpp->rt_pquantum != RT_TQINF && --rtpp->rt_timeleft == 0) ||
998 	    (t->t_state == TS_ONPROC && DISP_MUST_SURRENDER(t))) {
999 		if (rtpp->rt_timeleft == 0 && rtpp->rt_tqsignal) {
1000 			thread_unlock(t);
1001 			sigtoproc(ttoproc(t), t, rtpp->rt_tqsignal);
1002 			thread_lock(t);
1003 		}
1004 		rtpp->rt_flags |= RTBACKQ;
1005 		cpu_surrender(t);
1006 	}
1007 	thread_unlock(t);
1008 }
1009 
1010 
1011 /*
1012  * Place the thread waking up on the dispatcher queue.
1013  */
1014 static void
1015 rt_wakeup(kthread_t *t)
1016 {
1017 	rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
1018 
1019 	ASSERT(THREAD_LOCK_HELD(t));
1020 
1021 	rtpp->rt_timeleft = rtpp->rt_pquantum;
1022 	rtpp->rt_flags &= ~RTBACKQ;
1023 	setbackdq(t);
1024 }
1025 
1026 static void
1027 rt_yield(kthread_t *t)
1028 {
1029 	rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
1030 
1031 	ASSERT(t == curthread);
1032 	ASSERT(THREAD_LOCK_HELD(t));
1033 
1034 	rtpp->rt_flags &= ~RTBACKQ;
1035 	setbackdq(t);
1036 }
1037 
1038 /* ARGSUSED */
1039 static int
1040 rt_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp)
1041 {
1042 	return (EINVAL);
1043 }
1044 
1045 /*
1046  * Increment the priority of the specified thread by incr and
1047  * return the new value in *retvalp.
1048  */
1049 static int
1050 rt_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp)
1051 {
1052 	int newpri;
1053 	rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
1054 	rtkparms_t rtkparms;
1055 
1056 	/* If there's no change to the priority, just return current setting */
1057 	if (incr == 0) {
1058 		*retvalp = rtpp->rt_pri;
1059 		return (0);
1060 	}
1061 
1062 	newpri = rtpp->rt_pri + incr;
1063 	if (newpri > rt_maxpri || newpri < 0)
1064 		return (EINVAL);
1065 
1066 	*retvalp = newpri;
1067 	rtkparms.rt_pri = newpri;
1068 	rtkparms.rt_tqntm = RT_NOCHANGE;
1069 	rtkparms.rt_tqsig = 0;
1070 	rtkparms.rt_cflags = RT_DOPRI;
1071 	return (rt_parmsset(t, &rtkparms, rt_cid, cr));
1072 }
1073 
1074 static int
1075 rt_alloc(void **p, int flag)
1076 {
1077 	void *bufp;
1078 	bufp = kmem_alloc(sizeof (rtproc_t), flag);
1079 	if (bufp == NULL) {
1080 		return (ENOMEM);
1081 	} else {
1082 		*p = bufp;
1083 		return (0);
1084 	}
1085 }
1086 
1087 static void
1088 rt_free(void *bufp)
1089 {
1090 	if (bufp)
1091 		kmem_free(bufp, sizeof (rtproc_t));
1092 }
1093 
1094 static void
1095 rt_change_priority(kthread_t *t, rtproc_t *rtpp)
1096 {
1097 	pri_t new_pri;
1098 
1099 	ASSERT(THREAD_LOCK_HELD(t));
1100 
1101 	new_pri = rt_dptbl[rtpp->rt_pri].rt_globpri;
1102 
1103 	t->t_cpri = rtpp->rt_pri;
1104 	if (t == curthread || t->t_state == TS_ONPROC) {
1105 		cpu_t	*cp = t->t_disp_queue->disp_cpu;
1106 		THREAD_CHANGE_PRI(t, new_pri);
1107 		if (t == cp->cpu_dispthread)
1108 			cp->cpu_dispatch_pri = DISP_PRIO(t);
1109 		if (DISP_MUST_SURRENDER(t)) {
1110 			rtpp->rt_flags |= RTBACKQ;
1111 			cpu_surrender(t);
1112 		} else {
1113 			rtpp->rt_timeleft = rtpp->rt_pquantum;
1114 		}
1115 	} else {
1116 		/*
1117 		 * When the priority of a thread is changed,
1118 		 * it may be necessary to adjust its position
1119 		 * on a sleep queue or dispatch queue.  The
1120 		 * function thread_change_pri() accomplishes this.
1121 		 */
1122 		if (thread_change_pri(t, new_pri, 0)) {
1123 			/*
1124 			 * The thread was on a run queue.
1125 			 * Reset its CPU timeleft.
1126 			 */
1127 			rtpp->rt_timeleft = rtpp->rt_pquantum;
1128 		} else {
1129 			rtpp->rt_flags |= RTBACKQ;
1130 		}
1131 	}
1132 }
1133