xref: /illumos-gate/usr/src/uts/common/disp/rt.c (revision 141040e8a310da49386b596573e5dde5580572ec)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/sysmacros.h>
36 #include <sys/cred.h>
37 #include <sys/proc.h>
38 #include <sys/pcb.h>
39 #include <sys/signal.h>
40 #include <sys/user.h>
41 #include <sys/priocntl.h>
42 #include <sys/class.h>
43 #include <sys/disp.h>
44 #include <sys/procset.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/rt.h>
48 #include <sys/rtpriocntl.h>
49 #include <sys/kmem.h>
50 #include <sys/systm.h>
51 #include <sys/errno.h>
52 #include <sys/cpuvar.h>
53 #include <sys/vmsystm.h>
54 #include <sys/time.h>
55 #include <sys/policy.h>
56 #include <sys/sdt.h>
57 #include <sys/cpupart.h>
58 #include <sys/modctl.h>
59 
60 static pri_t	rt_init(id_t, int, classfuncs_t **);
61 
62 static struct sclass csw = {
63 	"RT",
64 	rt_init,
65 	0
66 };
67 
68 static struct modlsched modlsched = {
69 	&mod_schedops, "realtime scheduling class", &csw
70 };
71 
72 static struct modlinkage modlinkage = {
73 	MODREV_1, (void *)&modlsched, NULL
74 };
75 
76 int
77 _init()
78 {
79 	return (mod_install(&modlinkage));
80 }
81 
82 int
83 _fini()
84 {
85 	return (EBUSY);		/* don't remove RT for now */
86 }
87 
88 int
89 _info(struct modinfo *modinfop)
90 {
91 	return (mod_info(&modlinkage, modinfop));
92 }
93 
94 
95 /*
96  * Class specific code for the real-time class
97  */
98 
99 /*
100  * Extern declarations for variables defined in the rt master file
101  */
102 #define	RTMAXPRI 59
103 
104 pri_t rt_maxpri = RTMAXPRI;	/* maximum real-time priority */
105 rtdpent_t *rt_dptbl;	  /* real-time dispatcher parameter table */
106 
107 /*
108  * control flags (kparms->rt_cflags).
109  */
110 #define	RT_DOPRI	0x01	/* change priority */
111 #define	RT_DOTQ		0x02	/* change RT time quantum */
112 #define	RT_DOSIG	0x04	/* change RT time quantum signal */
113 
114 static int	rt_admin(caddr_t, cred_t *);
115 static int	rt_enterclass(kthread_t *, id_t, void *, cred_t *, void *);
116 static int	rt_fork(kthread_t *, kthread_t *, void *);
117 static int	rt_getclinfo(void *);
118 static int	rt_getclpri(pcpri_t *);
119 static int	rt_parmsin(void *);
120 static int	rt_parmsout(void *, pc_vaparms_t *);
121 static int	rt_vaparmsin(void *, pc_vaparms_t *);
122 static int	rt_vaparmsout(void *, pc_vaparms_t *);
123 static int	rt_parmsset(kthread_t *, void *, id_t, cred_t *);
124 static int	rt_donice(kthread_t *, cred_t *, int, int *);
125 static void	rt_exitclass(void *);
126 static int	rt_canexit(kthread_t *, cred_t *);
127 static void	rt_forkret(kthread_t *, kthread_t *);
128 static void	rt_nullsys();
129 static void	rt_parmsget(kthread_t *, void *);
130 static void	rt_preempt(kthread_t *);
131 static void	rt_setrun(kthread_t *);
132 static void	rt_tick(kthread_t *);
133 static void	rt_wakeup(kthread_t *);
134 static pri_t	rt_swapin(kthread_t *, int);
135 static pri_t	rt_swapout(kthread_t *, int);
136 static pri_t	rt_globpri(kthread_t *);
137 static void	rt_yield(kthread_t *);
138 static int	rt_alloc(void **, int);
139 static void	rt_free(void *);
140 
141 static void	rt_change_priority(kthread_t *, rtproc_t *);
142 
143 static id_t	rt_cid;		/* real-time class ID */
144 static rtproc_t	rt_plisthead;	/* dummy rtproc at head of rtproc list */
145 static kmutex_t	rt_dptblock;	/* protects realtime dispatch table */
146 static kmutex_t	rt_list_lock;	/* protects RT thread list */
147 
148 extern rtdpent_t *rt_getdptbl(void);
149 
150 static struct classfuncs rt_classfuncs = {
151 	/* class ops */
152 	rt_admin,
153 	rt_getclinfo,
154 	rt_parmsin,
155 	rt_parmsout,
156 	rt_vaparmsin,
157 	rt_vaparmsout,
158 	rt_getclpri,
159 	rt_alloc,
160 	rt_free,
161 	/* thread ops */
162 	rt_enterclass,
163 	rt_exitclass,
164 	rt_canexit,
165 	rt_fork,
166 	rt_forkret,
167 	rt_parmsget,
168 	rt_parmsset,
169 	rt_nullsys,	/* stop */
170 	rt_nullsys,	/* exit */
171 	rt_nullsys,	/* active */
172 	rt_nullsys,	/* inactive */
173 	rt_swapin,
174 	rt_swapout,
175 	rt_nullsys,	/* trapret */
176 	rt_preempt,
177 	rt_setrun,
178 	rt_nullsys,	/* sleep */
179 	rt_tick,
180 	rt_wakeup,
181 	rt_donice,
182 	rt_globpri,
183 	rt_nullsys,	/* set_process_group */
184 	rt_yield,
185 };
186 
187 /*
188  * Real-time class initialization. Called by dispinit() at boot time.
189  * We can ignore the clparmsz argument since we know that the smallest
190  * possible parameter buffer is big enough for us.
191  */
192 /* ARGSUSED */
193 pri_t
194 rt_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp)
195 {
196 	rt_dptbl = rt_getdptbl();
197 	rt_cid = cid;	/* Record our class ID */
198 
199 	/*
200 	 * Initialize the rtproc list.
201 	 */
202 	rt_plisthead.rt_next = rt_plisthead.rt_prev = &rt_plisthead;
203 
204 	/*
205 	 * We're required to return a pointer to our classfuncs
206 	 * structure and the highest global priority value we use.
207 	 */
208 	*clfuncspp = &rt_classfuncs;
209 	mutex_init(&rt_dptblock, NULL, MUTEX_DEFAULT, NULL);
210 	mutex_init(&rt_list_lock, NULL, MUTEX_DEFAULT, NULL);
211 	return (rt_dptbl[rt_maxpri].rt_globpri);
212 }
213 
214 /*
215  * Get or reset the rt_dptbl values per the user's request.
216  */
217 /* ARGSUSED */
218 static int
219 rt_admin(caddr_t uaddr, cred_t *reqpcredp)
220 {
221 	rtadmin_t	rtadmin;
222 	rtdpent_t	*tmpdpp;
223 	size_t		userdpsz;
224 	size_t		rtdpsz;
225 	int		i;
226 
227 	if (get_udatamodel() == DATAMODEL_NATIVE) {
228 		if (copyin(uaddr, &rtadmin, sizeof (rtadmin_t)))
229 			return (EFAULT);
230 	}
231 #ifdef _SYSCALL32_IMPL
232 	else {
233 		/* rtadmin struct from ILP32 callers */
234 		rtadmin32_t rtadmin32;
235 		if (copyin(uaddr, &rtadmin32, sizeof (rtadmin32_t)))
236 			return (EFAULT);
237 		rtadmin.rt_dpents =
238 		    (struct rtdpent *)(uintptr_t)rtadmin32.rt_dpents;
239 		rtadmin.rt_ndpents = rtadmin32.rt_ndpents;
240 		rtadmin.rt_cmd = rtadmin32.rt_cmd;
241 	}
242 #endif /* _SYSCALL32_IMPL */
243 
244 	rtdpsz = (rt_maxpri + 1) * sizeof (rtdpent_t);
245 
246 	switch (rtadmin.rt_cmd) {
247 
248 	case RT_GETDPSIZE:
249 		rtadmin.rt_ndpents = rt_maxpri + 1;
250 
251 		if (get_udatamodel() == DATAMODEL_NATIVE) {
252 			if (copyout(&rtadmin, uaddr, sizeof (rtadmin_t)))
253 				return (EFAULT);
254 		}
255 #ifdef _SYSCALL32_IMPL
256 		else {
257 			/* return rtadmin struct to ILP32 callers */
258 			rtadmin32_t rtadmin32;
259 			rtadmin32.rt_dpents =
260 			    (caddr32_t)(uintptr_t)rtadmin.rt_dpents;
261 			rtadmin32.rt_ndpents = rtadmin.rt_ndpents;
262 			rtadmin32.rt_cmd = rtadmin.rt_cmd;
263 			if (copyout(&rtadmin32, uaddr, sizeof (rtadmin32_t)))
264 				return (EFAULT);
265 		}
266 #endif /* _SYSCALL32_IMPL */
267 
268 		break;
269 
270 	case RT_GETDPTBL:
271 		userdpsz = MIN(rtadmin.rt_ndpents * sizeof (rtdpent_t),
272 		    rtdpsz);
273 		if (copyout(rt_dptbl, rtadmin.rt_dpents, userdpsz))
274 			return (EFAULT);
275 		rtadmin.rt_ndpents = userdpsz / sizeof (rtdpent_t);
276 
277 		if (get_udatamodel() == DATAMODEL_NATIVE) {
278 			if (copyout(&rtadmin, uaddr, sizeof (rtadmin_t)))
279 				return (EFAULT);
280 		}
281 #ifdef _SYSCALL32_IMPL
282 		else {
283 			/* return rtadmin struct to ILP32 callers */
284 			rtadmin32_t rtadmin32;
285 			rtadmin32.rt_dpents =
286 			    (caddr32_t)(uintptr_t)rtadmin.rt_dpents;
287 			rtadmin32.rt_ndpents = rtadmin.rt_ndpents;
288 			rtadmin32.rt_cmd = rtadmin.rt_cmd;
289 			if (copyout(&rtadmin32, uaddr, sizeof (rtadmin32_t)))
290 				return (EFAULT);
291 		}
292 #endif /* _SYSCALL32_IMPL */
293 		break;
294 
295 	case RT_SETDPTBL:
296 		/*
297 		 * We require that the requesting process has sufficient
298 		 * priveleges.  We also require that the table supplied by
299 		 * the user exactly match the current rt_dptbl in size.
300 		 */
301 		if (secpolicy_dispadm(reqpcredp) != 0)
302 			return (EPERM);
303 		if (rtadmin.rt_ndpents * sizeof (rtdpent_t) != rtdpsz)
304 			return (EINVAL);
305 
306 		/*
307 		 * We read the user supplied table into a temporary buffer
308 		 * where the time quantum values are validated before
309 		 * being copied to the rt_dptbl.
310 		 */
311 		tmpdpp = kmem_alloc(rtdpsz, KM_SLEEP);
312 		if (copyin(rtadmin.rt_dpents, tmpdpp, rtdpsz)) {
313 			kmem_free(tmpdpp, rtdpsz);
314 			return (EFAULT);
315 		}
316 		for (i = 0; i < rtadmin.rt_ndpents; i++) {
317 
318 			/*
319 			 * Validate the user supplied time quantum values.
320 			 */
321 			if (tmpdpp[i].rt_quantum <= 0 &&
322 			    tmpdpp[i].rt_quantum != RT_TQINF) {
323 				kmem_free(tmpdpp, rtdpsz);
324 				return (EINVAL);
325 			}
326 		}
327 
328 		/*
329 		 * Copy the user supplied values over the current rt_dptbl
330 		 * values.  The rt_globpri member is read-only so we don't
331 		 * overwrite it.
332 		 */
333 		mutex_enter(&rt_dptblock);
334 		for (i = 0; i < rtadmin.rt_ndpents; i++)
335 			rt_dptbl[i].rt_quantum = tmpdpp[i].rt_quantum;
336 		mutex_exit(&rt_dptblock);
337 		kmem_free(tmpdpp, rtdpsz);
338 		break;
339 
340 	default:
341 		return (EINVAL);
342 	}
343 	return (0);
344 }
345 
346 
347 /*
348  * Allocate a real-time class specific proc structure and
349  * initialize it with the parameters supplied. Also move thread
350  * to specified real-time priority.
351  */
352 /* ARGSUSED */
353 static int
354 rt_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp,
355     void *bufp)
356 {
357 	rtkparms_t *rtkparmsp = (rtkparms_t *)parmsp;
358 	rtproc_t *rtpp;
359 
360 	/*
361 	 * For a thread to enter the real-time class the thread
362 	 * which initiates the request must be privileged.
363 	 * This may have been checked previously but if our
364 	 * caller passed us a credential structure we assume it
365 	 * hasn't and we check it here.
366 	 */
367 	if (reqpcredp != NULL && secpolicy_setpriority(reqpcredp) != 0)
368 		return (EPERM);
369 
370 	rtpp = (rtproc_t *)bufp;
371 	ASSERT(rtpp != NULL);
372 
373 	/*
374 	 * If this thread's lwp is swapped out, it will be brought in
375 	 * when it is put onto the runqueue.
376 	 *
377 	 * Now, Initialize the rtproc structure.
378 	 */
379 	if (rtkparmsp == NULL) {
380 		/*
381 		 * Use default values
382 		 */
383 		rtpp->rt_pri = 0;
384 		rtpp->rt_pquantum = rt_dptbl[0].rt_quantum;
385 		rtpp->rt_tqsignal = 0;
386 	} else {
387 		/*
388 		 * Use supplied values
389 		 */
390 		if ((rtkparmsp->rt_cflags & RT_DOPRI) == 0)
391 			rtpp->rt_pri = 0;
392 		else
393 			rtpp->rt_pri = rtkparmsp->rt_pri;
394 
395 		if (rtkparmsp->rt_tqntm == RT_TQINF)
396 			rtpp->rt_pquantum = RT_TQINF;
397 		else if (rtkparmsp->rt_tqntm == RT_TQDEF ||
398 		    (rtkparmsp->rt_cflags & RT_DOTQ) == 0)
399 			rtpp->rt_pquantum = rt_dptbl[rtpp->rt_pri].rt_quantum;
400 		else
401 			rtpp->rt_pquantum = rtkparmsp->rt_tqntm;
402 
403 		if ((rtkparmsp->rt_cflags & RT_DOSIG) == 0)
404 			rtpp->rt_tqsignal = 0;
405 		else
406 			rtpp->rt_tqsignal = rtkparmsp->rt_tqsig;
407 	}
408 	rtpp->rt_flags = 0;
409 	rtpp->rt_tp = t;
410 	/*
411 	 * Reset thread priority
412 	 */
413 	thread_lock(t);
414 	t->t_clfuncs = &(sclass[cid].cl_funcs->thread);
415 	t->t_cid = cid;
416 	t->t_cldata = (void *)rtpp;
417 	t->t_schedflag &= ~TS_RUNQMATCH;
418 	rt_change_priority(t, rtpp);
419 	thread_unlock(t);
420 	/*
421 	 * Link new structure into rtproc list
422 	 */
423 	mutex_enter(&rt_list_lock);
424 	rtpp->rt_next = rt_plisthead.rt_next;
425 	rtpp->rt_prev = &rt_plisthead;
426 	rt_plisthead.rt_next->rt_prev = rtpp;
427 	rt_plisthead.rt_next = rtpp;
428 	mutex_exit(&rt_list_lock);
429 	return (0);
430 }
431 
432 
433 /*
434  * Free rtproc structure of thread.
435  */
436 static void
437 rt_exitclass(void *procp)
438 {
439 	rtproc_t *rtprocp = (rtproc_t *)procp;
440 
441 	mutex_enter(&rt_list_lock);
442 	rtprocp->rt_prev->rt_next = rtprocp->rt_next;
443 	rtprocp->rt_next->rt_prev = rtprocp->rt_prev;
444 	mutex_exit(&rt_list_lock);
445 	kmem_free(rtprocp, sizeof (rtproc_t));
446 }
447 
448 
449 /*
450  * Allocate and initialize real-time class specific
451  * proc structure for child.
452  */
453 /* ARGSUSED */
454 static int
455 rt_fork(kthread_t *t, kthread_t *ct, void *bufp)
456 {
457 	rtproc_t *prtpp;
458 	rtproc_t *crtpp;
459 
460 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
461 
462 	/*
463 	 * Initialize child's rtproc structure
464 	 */
465 	crtpp = (rtproc_t *)bufp;
466 	ASSERT(crtpp != NULL);
467 	prtpp = (rtproc_t *)t->t_cldata;
468 	thread_lock(t);
469 	crtpp->rt_timeleft = crtpp->rt_pquantum = prtpp->rt_pquantum;
470 	crtpp->rt_pri = prtpp->rt_pri;
471 	crtpp->rt_flags = prtpp->rt_flags & ~RTBACKQ;
472 	crtpp->rt_tqsignal = prtpp->rt_tqsignal;
473 
474 	crtpp->rt_tp = ct;
475 	thread_unlock(t);
476 
477 	/*
478 	 * Link new structure into rtproc list
479 	 */
480 	ct->t_cldata = (void *)crtpp;
481 	mutex_enter(&rt_list_lock);
482 	crtpp->rt_next = rt_plisthead.rt_next;
483 	crtpp->rt_prev = &rt_plisthead;
484 	rt_plisthead.rt_next->rt_prev = crtpp;
485 	rt_plisthead.rt_next = crtpp;
486 	mutex_exit(&rt_list_lock);
487 	return (0);
488 }
489 
490 
491 /*
492  * The child goes to the back of its dispatcher queue while the
493  * parent continues to run after a real time thread forks.
494  */
495 /* ARGSUSED */
496 static void
497 rt_forkret(kthread_t *t, kthread_t *ct)
498 {
499 	proc_t *pp = ttoproc(t);
500 	proc_t *cp = ttoproc(ct);
501 
502 	ASSERT(t == curthread);
503 	ASSERT(MUTEX_HELD(&pidlock));
504 
505 	/*
506 	 * Grab the child's p_lock before dropping pidlock to ensure
507 	 * the process does not disappear before we set it running.
508 	 */
509 	mutex_enter(&cp->p_lock);
510 	mutex_exit(&pidlock);
511 	continuelwps(cp);
512 	mutex_exit(&cp->p_lock);
513 
514 	mutex_enter(&pp->p_lock);
515 	continuelwps(pp);
516 	mutex_exit(&pp->p_lock);
517 }
518 
519 
520 /*
521  * Get information about the real-time class into the buffer
522  * pointed to by rtinfop.  The maximum configured real-time
523  * priority is the only information we supply.  We ignore the
524  * class and credential arguments because anyone can have this
525  * information.
526  */
527 /* ARGSUSED */
528 static int
529 rt_getclinfo(void *infop)
530 {
531 	rtinfo_t *rtinfop = (rtinfo_t *)infop;
532 	rtinfop->rt_maxpri = rt_maxpri;
533 	return (0);
534 }
535 
536 /*
537  * Return the global scheduling priority ranges of the realtime
538  * class in pcpri_t structure.
539  */
540 static int
541 rt_getclpri(pcpri_t *pcprip)
542 {
543 	pcprip->pc_clpmax = rt_dptbl[rt_maxpri].rt_globpri;
544 	pcprip->pc_clpmin = rt_dptbl[0].rt_globpri;
545 	return (0);
546 }
547 static void
548 rt_nullsys()
549 {
550 }
551 
552 /* ARGSUSED */
553 static int
554 rt_canexit(kthread_t *t, cred_t *cred)
555 {
556 	/*
557 	 * Thread can always leave RT class
558 	 */
559 	return (0);
560 }
561 
562 /*
563  * Get the real-time scheduling parameters of the thread pointed to by
564  * rtprocp into the buffer pointed to by rtkparmsp.
565  */
566 static void
567 rt_parmsget(kthread_t *t, void *parmsp)
568 {
569 	rtproc_t	*rtprocp = (rtproc_t *)t->t_cldata;
570 	rtkparms_t	*rtkparmsp = (rtkparms_t *)parmsp;
571 
572 	rtkparmsp->rt_pri = rtprocp->rt_pri;
573 	rtkparmsp->rt_tqntm = rtprocp->rt_pquantum;
574 	rtkparmsp->rt_tqsig = rtprocp->rt_tqsignal;
575 }
576 
577 
578 
579 /*
580  * Check the validity of the real-time parameters in the buffer
581  * pointed to by rtprmsp.
582  * We convert the rtparms buffer from the user supplied format to
583  * our internal format (i.e. time quantum expressed in ticks).
584  */
585 static int
586 rt_parmsin(void *prmsp)
587 {
588 	rtparms_t *rtprmsp = (rtparms_t *)prmsp;
589 	longlong_t	ticks;
590 	uint_t		cflags;
591 
592 	/*
593 	 * First check the validity of parameters and convert
594 	 * the buffer to kernel format.
595 	 */
596 	if ((rtprmsp->rt_pri < 0 || rtprmsp->rt_pri > rt_maxpri) &&
597 	    rtprmsp->rt_pri != RT_NOCHANGE)
598 		return (EINVAL);
599 
600 	cflags = (rtprmsp->rt_pri != RT_NOCHANGE ? RT_DOPRI : 0);
601 
602 	if ((rtprmsp->rt_tqsecs == 0 && rtprmsp->rt_tqnsecs == 0) ||
603 	    rtprmsp->rt_tqnsecs >= NANOSEC)
604 		return (EINVAL);
605 
606 	if (rtprmsp->rt_tqnsecs != RT_NOCHANGE)
607 		cflags |= RT_DOTQ;
608 
609 	if (rtprmsp->rt_tqnsecs >= 0) {
610 		if ((ticks = SEC_TO_TICK((longlong_t)rtprmsp->rt_tqsecs) +
611 		    NSEC_TO_TICK_ROUNDUP(rtprmsp->rt_tqnsecs)) > INT_MAX)
612 			return (ERANGE);
613 
614 		((rtkparms_t *)rtprmsp)->rt_tqntm = (int)ticks;
615 	} else {
616 		if (rtprmsp->rt_tqnsecs != RT_NOCHANGE &&
617 		    rtprmsp->rt_tqnsecs != RT_TQINF &&
618 		    rtprmsp->rt_tqnsecs != RT_TQDEF)
619 			return (EINVAL);
620 
621 		((rtkparms_t *)rtprmsp)->rt_tqntm = rtprmsp->rt_tqnsecs;
622 	}
623 	((rtkparms_t *)rtprmsp)->rt_cflags = cflags;
624 
625 	return (0);
626 }
627 
628 
629 /*
630  * Check the validity of the real-time parameters in the pc_vaparms_t
631  * structure vaparmsp and put them in the buffer pointed to by rtprmsp.
632  * pc_vaparms_t contains (key, value) pairs of parameter.
633  * rt_vaparmsin() is the variable parameter version of rt_parmsin().
634  */
635 static int
636 rt_vaparmsin(void *prmsp, pc_vaparms_t *vaparmsp)
637 {
638 	uint_t		secs = 0;
639 	uint_t		cnt;
640 	int		nsecs = 0;
641 	int		priflag, secflag, nsecflag, sigflag;
642 	longlong_t	ticks;
643 	rtkparms_t	*rtprmsp = (rtkparms_t *)prmsp;
644 	pc_vaparm_t	*vpp = &vaparmsp->pc_parms[0];
645 
646 
647 	/*
648 	 * First check the validity of parameters and convert them
649 	 * from the user supplied format to the internal format.
650 	 */
651 	priflag = secflag = nsecflag = sigflag = 0;
652 	rtprmsp->rt_cflags = 0;
653 
654 	if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
655 		return (EINVAL);
656 
657 	for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
658 
659 		switch (vpp->pc_key) {
660 		case RT_KY_PRI:
661 			if (priflag++)
662 				return (EINVAL);
663 			rtprmsp->rt_cflags |= RT_DOPRI;
664 			rtprmsp->rt_pri = (pri_t)vpp->pc_parm;
665 			if (rtprmsp->rt_pri < 0 || rtprmsp->rt_pri > rt_maxpri)
666 				return (EINVAL);
667 			break;
668 
669 		case RT_KY_TQSECS:
670 			if (secflag++)
671 				return (EINVAL);
672 			rtprmsp->rt_cflags |= RT_DOTQ;
673 			secs = (uint_t)vpp->pc_parm;
674 			break;
675 
676 		case RT_KY_TQNSECS:
677 			if (nsecflag++)
678 				return (EINVAL);
679 			rtprmsp->rt_cflags |= RT_DOTQ;
680 			nsecs = (int)vpp->pc_parm;
681 			break;
682 
683 		case RT_KY_TQSIG:
684 			if (sigflag++)
685 				return (EINVAL);
686 			rtprmsp->rt_cflags |= RT_DOSIG;
687 			rtprmsp->rt_tqsig = (int)vpp->pc_parm;
688 			if (rtprmsp->rt_tqsig < 0 || rtprmsp->rt_tqsig >= NSIG)
689 				return (EINVAL);
690 			break;
691 
692 		default:
693 			return (EINVAL);
694 		}
695 	}
696 
697 	if (vaparmsp->pc_vaparmscnt == 0) {
698 		/*
699 		 * Use default parameters.
700 		 */
701 		rtprmsp->rt_pri = 0;
702 		rtprmsp->rt_tqntm = RT_TQDEF;
703 		rtprmsp->rt_tqsig = 0;
704 		rtprmsp->rt_cflags = RT_DOPRI | RT_DOTQ | RT_DOSIG;
705 	} else if ((rtprmsp->rt_cflags & RT_DOTQ) != 0) {
706 		if ((secs == 0 && nsecs == 0) || nsecs >= NANOSEC)
707 			return (EINVAL);
708 
709 		if (nsecs >= 0) {
710 			if ((ticks = SEC_TO_TICK((longlong_t)secs) +
711 			    NSEC_TO_TICK_ROUNDUP(nsecs)) > INT_MAX)
712 				return (ERANGE);
713 
714 			rtprmsp->rt_tqntm = (int)ticks;
715 		} else {
716 			if (nsecs != RT_TQINF && nsecs != RT_TQDEF)
717 				return (EINVAL);
718 			rtprmsp->rt_tqntm = nsecs;
719 		}
720 	}
721 
722 	return (0);
723 }
724 
725 /*
726  * Do required processing on the real-time parameter buffer
727  * before it is copied out to the user.
728  * All we have to do is convert the buffer from kernel to user format
729  * (i.e. convert time quantum from ticks to seconds-nanoseconds).
730  */
731 /* ARGSUSED */
732 static int
733 rt_parmsout(void *prmsp, pc_vaparms_t *vaparmsp)
734 {
735 	rtkparms_t	*rtkprmsp = (rtkparms_t *)prmsp;
736 
737 	if (vaparmsp != NULL)
738 		return (0);
739 
740 	if (rtkprmsp->rt_tqntm < 0) {
741 		/*
742 		 * Quantum field set to special value (e.g. RT_TQINF)
743 		 */
744 		((rtparms_t *)rtkprmsp)->rt_tqnsecs = rtkprmsp->rt_tqntm;
745 		((rtparms_t *)rtkprmsp)->rt_tqsecs = 0;
746 	} else {
747 		/* Convert quantum from ticks to seconds-nanoseconds */
748 
749 		timestruc_t ts;
750 		TICK_TO_TIMESTRUC(rtkprmsp->rt_tqntm, &ts);
751 		((rtparms_t *)rtkprmsp)->rt_tqsecs = ts.tv_sec;
752 		((rtparms_t *)rtkprmsp)->rt_tqnsecs = ts.tv_nsec;
753 	}
754 
755 	return (0);
756 }
757 
758 
759 /*
760  * Copy all selected real-time class parameters to the user.
761  * The parameters are specified by a key.
762  */
763 static int
764 rt_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp)
765 {
766 	rtkparms_t	*rtkprmsp = (rtkparms_t *)prmsp;
767 	timestruc_t	ts;
768 	uint_t		cnt;
769 	uint_t		secs;
770 	int		nsecs;
771 	int		priflag, secflag, nsecflag, sigflag;
772 	pc_vaparm_t	*vpp = &vaparmsp->pc_parms[0];
773 
774 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
775 
776 	priflag = secflag = nsecflag = sigflag = 0;
777 
778 	if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
779 		return (EINVAL);
780 
781 	if (rtkprmsp->rt_tqntm < 0) {
782 		/*
783 		 * Quantum field set to special value (e.g. RT_TQINF).
784 		 */
785 		secs = 0;
786 		nsecs = rtkprmsp->rt_tqntm;
787 	} else {
788 		/*
789 		 * Convert quantum from ticks to seconds-nanoseconds.
790 		 */
791 		TICK_TO_TIMESTRUC(rtkprmsp->rt_tqntm, &ts);
792 		secs = ts.tv_sec;
793 		nsecs = ts.tv_nsec;
794 	}
795 
796 
797 	for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
798 
799 		switch (vpp->pc_key) {
800 		case RT_KY_PRI:
801 			if (priflag++)
802 				return (EINVAL);
803 			if (copyout(&rtkprmsp->rt_pri,
804 			    (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
805 				return (EFAULT);
806 			break;
807 
808 		case RT_KY_TQSECS:
809 			if (secflag++)
810 				return (EINVAL);
811 			if (copyout(&secs, (caddr_t)(uintptr_t)vpp->pc_parm,
812 			    sizeof (uint_t)))
813 				return (EFAULT);
814 			break;
815 
816 		case RT_KY_TQNSECS:
817 			if (nsecflag++)
818 				return (EINVAL);
819 			if (copyout(&nsecs, (caddr_t)(uintptr_t)vpp->pc_parm,
820 			    sizeof (int)))
821 				return (EFAULT);
822 			break;
823 
824 		case RT_KY_TQSIG:
825 			if (sigflag++)
826 				return (EINVAL);
827 			if (copyout(&rtkprmsp->rt_tqsig,
828 			    (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (int)))
829 				return (EFAULT);
830 			break;
831 
832 		default:
833 			return (EINVAL);
834 		}
835 	}
836 
837 	return (0);
838 }
839 
840 
841 /*
842  * Set the scheduling parameters of the thread pointed to by rtprocp
843  * to those specified in the buffer pointed to by rtkprmsp.
844  * Note that the parameters are expected to be in kernel format
845  * (i.e. time quantm expressed in ticks).  Real time parameters copied
846  * in from the user should be processed by rt_parmsin() before they are
847  * passed to this function.
848  */
849 static int
850 rt_parmsset(kthread_t *tx, void *prmsp, id_t reqpcid, cred_t *reqpcredp)
851 {
852 	rtkparms_t *rtkprmsp = (rtkparms_t *)prmsp;
853 	rtproc_t *rtpp = (rtproc_t *)tx->t_cldata;
854 
855 	ASSERT(MUTEX_HELD(&(ttoproc(tx))->p_lock));
856 
857 	/*
858 	 * Basic permissions enforced by generic kernel code
859 	 * for all classes require that a thread attempting
860 	 * to change the scheduling parameters of a target thread
861 	 * be privileged or have a real or effective UID
862 	 * matching that of the target thread. We are not
863 	 * called unless these basic permission checks have
864 	 * already passed. The real-time class requires in addition
865 	 * that the requesting thread be real-time unless it is privileged.
866 	 * This may also have been checked previously but if our caller
867 	 * passes us a credential structure we assume it hasn't and
868 	 * we check it here.
869 	 */
870 	if (reqpcredp != NULL && reqpcid != rt_cid &&
871 	    secpolicy_setpriority(reqpcredp) != 0)
872 		return (EPERM);
873 
874 	thread_lock(tx);
875 	if ((rtkprmsp->rt_cflags & RT_DOPRI) != 0) {
876 		rtpp->rt_pri = rtkprmsp->rt_pri;
877 		rt_change_priority(tx, rtpp);
878 	}
879 	if (rtkprmsp->rt_tqntm == RT_TQINF)
880 		rtpp->rt_pquantum = RT_TQINF;
881 	else if (rtkprmsp->rt_tqntm == RT_TQDEF)
882 		rtpp->rt_timeleft = rtpp->rt_pquantum =
883 		    rt_dptbl[rtpp->rt_pri].rt_quantum;
884 	else if ((rtkprmsp->rt_cflags & RT_DOTQ) != 0)
885 		rtpp->rt_timeleft = rtpp->rt_pquantum = rtkprmsp->rt_tqntm;
886 
887 	if ((rtkprmsp->rt_cflags & RT_DOSIG) != 0)
888 		rtpp->rt_tqsignal = rtkprmsp->rt_tqsig;
889 
890 	thread_unlock(tx);
891 	return (0);
892 }
893 
894 
895 /*
896  * Arrange for thread to be placed in appropriate location
897  * on dispatcher queue.  Runs at splhi() since the clock
898  * interrupt can cause RTBACKQ to be set.
899  */
900 static void
901 rt_preempt(kthread_t *t)
902 {
903 	rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
904 	klwp_t *lwp;
905 
906 	ASSERT(THREAD_LOCK_HELD(t));
907 
908 	/*
909 	 * If the state is user I allow swapping because I know I won't
910 	 * be holding any locks.
911 	 */
912 	if ((lwp = curthread->t_lwp) != NULL && lwp->lwp_state == LWP_USER)
913 		t->t_schedflag &= ~TS_DONT_SWAP;
914 	if ((rtpp->rt_flags & RTBACKQ) != 0) {
915 		rtpp->rt_timeleft = rtpp->rt_pquantum;
916 		rtpp->rt_flags &= ~RTBACKQ;
917 		setbackdq(t);
918 	} else
919 		setfrontdq(t);
920 
921 }
922 
923 /*
924  * Return the global priority associated with this rt_pri.
925  */
926 static pri_t
927 rt_globpri(kthread_t *t)
928 {
929 	rtproc_t *rtprocp = (rtproc_t *)t->t_cldata;
930 	return (rt_dptbl[rtprocp->rt_pri].rt_globpri);
931 }
932 
933 static void
934 rt_setrun(kthread_t *t)
935 {
936 	rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
937 
938 	ASSERT(THREAD_LOCK_HELD(t));
939 
940 	rtpp->rt_timeleft = rtpp->rt_pquantum;
941 	rtpp->rt_flags &= ~RTBACKQ;
942 	setbackdq(t);
943 }
944 
945 /*
946  * Returns the priority of the thread, -1 if the thread is loaded or ineligible
947  * for swapin.
948  *
949  * FX and RT threads are designed so that they don't swapout; however, it
950  * is possible that while the thread is swapped out and in another class, it
951  * can be changed to FX or RT.  Since these threads should be swapped in as
952  * soon as they're runnable, rt_swapin returns SHRT_MAX, and fx_swapin
953  * returns SHRT_MAX - 1, so that it gives deference to any swapped out RT
954  * threads.
955  */
956 /* ARGSUSED */
957 static pri_t
958 rt_swapin(kthread_t *t, int flags)
959 {
960 	pri_t	tpri = -1;
961 
962 	ASSERT(THREAD_LOCK_HELD(t));
963 
964 	if (t->t_state == TS_RUN && (t->t_schedflag & TS_LOAD) == 0) {
965 		tpri = (pri_t)SHRT_MAX;
966 	}
967 
968 	return (tpri);
969 }
970 
971 /*
972  * Return an effective priority for swapout.
973  */
974 /* ARGSUSED */
975 static pri_t
976 rt_swapout(kthread_t *t, int flags)
977 {
978 	ASSERT(THREAD_LOCK_HELD(t));
979 
980 	return (-1);
981 }
982 
983 /*
984  * Check for time slice expiration (unless thread has infinite time
985  * slice).  If time slice has expired arrange for thread to be preempted
986  * and placed on back of queue.
987  */
988 static void
989 rt_tick(kthread_t *t)
990 {
991 	rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
992 
993 	ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
994 
995 	thread_lock(t);
996 	if ((rtpp->rt_pquantum != RT_TQINF && --rtpp->rt_timeleft == 0) ||
997 	    (t->t_state == TS_ONPROC && DISP_MUST_SURRENDER(t))) {
998 		if (rtpp->rt_timeleft == 0 && rtpp->rt_tqsignal) {
999 			thread_unlock(t);
1000 			sigtoproc(ttoproc(t), t, rtpp->rt_tqsignal);
1001 			thread_lock(t);
1002 		}
1003 		rtpp->rt_flags |= RTBACKQ;
1004 		cpu_surrender(t);
1005 	}
1006 	thread_unlock(t);
1007 }
1008 
1009 
1010 /*
1011  * Place the thread waking up on the dispatcher queue.
1012  */
1013 static void
1014 rt_wakeup(kthread_t *t)
1015 {
1016 	rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
1017 
1018 	ASSERT(THREAD_LOCK_HELD(t));
1019 
1020 	rtpp->rt_timeleft = rtpp->rt_pquantum;
1021 	rtpp->rt_flags &= ~RTBACKQ;
1022 	setbackdq(t);
1023 }
1024 
1025 static void
1026 rt_yield(kthread_t *t)
1027 {
1028 	rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
1029 
1030 	ASSERT(t == curthread);
1031 	ASSERT(THREAD_LOCK_HELD(t));
1032 
1033 	rtpp->rt_flags &= ~RTBACKQ;
1034 	setbackdq(t);
1035 }
1036 
1037 /* ARGSUSED */
1038 static int
1039 rt_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp)
1040 {
1041 	return (EINVAL);
1042 }
1043 
1044 static int
1045 rt_alloc(void **p, int flag)
1046 {
1047 	void *bufp;
1048 	bufp = kmem_alloc(sizeof (rtproc_t), flag);
1049 	if (bufp == NULL) {
1050 		return (ENOMEM);
1051 	} else {
1052 		*p = bufp;
1053 		return (0);
1054 	}
1055 }
1056 
1057 static void
1058 rt_free(void *bufp)
1059 {
1060 	if (bufp)
1061 		kmem_free(bufp, sizeof (rtproc_t));
1062 }
1063 
1064 static void
1065 rt_change_priority(kthread_t *t, rtproc_t *rtpp)
1066 {
1067 	pri_t new_pri;
1068 
1069 	ASSERT(THREAD_LOCK_HELD(t));
1070 
1071 	new_pri = rt_dptbl[rtpp->rt_pri].rt_globpri;
1072 
1073 	if (t == curthread || t->t_state == TS_ONPROC) {
1074 		cpu_t	*cp = t->t_disp_queue->disp_cpu;
1075 		THREAD_CHANGE_PRI(t, new_pri);
1076 		if (t == cp->cpu_dispthread)
1077 			cp->cpu_dispatch_pri = DISP_PRIO(t);
1078 		if (DISP_MUST_SURRENDER(t)) {
1079 			rtpp->rt_flags |= RTBACKQ;
1080 			cpu_surrender(t);
1081 		} else {
1082 			rtpp->rt_timeleft = rtpp->rt_pquantum;
1083 		}
1084 	} else {
1085 		/*
1086 		 * When the priority of a thread is changed,
1087 		 * it may be necessary to adjust its position
1088 		 * on a sleep queue or dispatch queue.  The
1089 		 * function thread_change_pri() accomplishes this.
1090 		 */
1091 		if (thread_change_pri(t, new_pri, 0)) {
1092 			/*
1093 			 * The thread was on a run queue.
1094 			 * Reset its CPU timeleft.
1095 			 */
1096 			rtpp->rt_timeleft = rtpp->rt_pquantum;
1097 		} else {
1098 			rtpp->rt_flags |= RTBACKQ;
1099 		}
1100 	}
1101 }
1102