xref: /titanic_41/usr/src/uts/common/disp/rt.c (revision d24234c24aeaca4ca56ee3ac2794507968f274c4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/sysmacros.h>
35 #include <sys/cred.h>
36 #include <sys/proc.h>
37 #include <sys/pcb.h>
38 #include <sys/signal.h>
39 #include <sys/user.h>
40 #include <sys/priocntl.h>
41 #include <sys/class.h>
42 #include <sys/disp.h>
43 #include <sys/procset.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/rt.h>
47 #include <sys/rtpriocntl.h>
48 #include <sys/kmem.h>
49 #include <sys/systm.h>
50 #include <sys/schedctl.h>
51 #include <sys/errno.h>
52 #include <sys/cpuvar.h>
53 #include <sys/vmsystm.h>
54 #include <sys/time.h>
55 #include <sys/policy.h>
56 #include <sys/sdt.h>
57 #include <sys/cpupart.h>
58 #include <sys/modctl.h>
59 
60 static pri_t	rt_init(id_t, int, classfuncs_t **);
61 
62 static struct sclass csw = {
63 	"RT",
64 	rt_init,
65 	0
66 };
67 
68 static struct modlsched modlsched = {
69 	&mod_schedops, "realtime scheduling class", &csw
70 };
71 
72 static struct modlinkage modlinkage = {
73 	MODREV_1, (void *)&modlsched, NULL
74 };
75 
76 int
77 _init()
78 {
79 	return (mod_install(&modlinkage));
80 }
81 
82 int
83 _fini()
84 {
85 	return (EBUSY);		/* don't remove RT for now */
86 }
87 
88 int
89 _info(struct modinfo *modinfop)
90 {
91 	return (mod_info(&modlinkage, modinfop));
92 }
93 
94 
95 /*
96  * Class specific code for the real-time class
97  */
98 
99 /*
100  * Extern declarations for variables defined in the rt master file
101  */
102 #define	RTMAXPRI 59
103 
104 pri_t rt_maxpri = RTMAXPRI;	/* maximum real-time priority */
105 rtdpent_t *rt_dptbl;	  /* real-time dispatcher parameter table */
106 
107 /*
108  * control flags (kparms->rt_cflags).
109  */
110 #define	RT_DOPRI	0x01	/* change priority */
111 #define	RT_DOTQ		0x02	/* change RT time quantum */
112 #define	RT_DOSIG	0x04	/* change RT time quantum signal */
113 
114 static int	rt_admin(caddr_t, cred_t *);
115 static int	rt_enterclass(kthread_t *, id_t, void *, cred_t *, void *);
116 static int	rt_fork(kthread_t *, kthread_t *, void *);
117 static int	rt_getclinfo(void *);
118 static int	rt_getclpri(pcpri_t *);
119 static int	rt_parmsin(void *);
120 static int	rt_parmsout(void *, pc_vaparms_t *);
121 static int	rt_vaparmsin(void *, pc_vaparms_t *);
122 static int	rt_vaparmsout(void *, pc_vaparms_t *);
123 static int	rt_parmsset(kthread_t *, void *, id_t, cred_t *);
124 static int	rt_donice(kthread_t *, cred_t *, int, int *);
125 static int	rt_doprio(kthread_t *, cred_t *, int, int *);
126 static void	rt_exitclass(void *);
127 static int	rt_canexit(kthread_t *, cred_t *);
128 static void	rt_forkret(kthread_t *, kthread_t *);
129 static void	rt_nullsys();
130 static void	rt_parmsget(kthread_t *, void *);
131 static void	rt_preempt(kthread_t *);
132 static void	rt_setrun(kthread_t *);
133 static void	rt_tick(kthread_t *);
134 static void	rt_wakeup(kthread_t *);
135 static pri_t	rt_swapin(kthread_t *, int);
136 static pri_t	rt_swapout(kthread_t *, int);
137 static pri_t	rt_globpri(kthread_t *);
138 static void	rt_yield(kthread_t *);
139 static int	rt_alloc(void **, int);
140 static void	rt_free(void *);
141 
142 static void	rt_change_priority(kthread_t *, rtproc_t *);
143 
144 static id_t	rt_cid;		/* real-time class ID */
145 static rtproc_t	rt_plisthead;	/* dummy rtproc at head of rtproc list */
146 static kmutex_t	rt_dptblock;	/* protects realtime dispatch table */
147 static kmutex_t	rt_list_lock;	/* protects RT thread list */
148 
149 extern rtdpent_t *rt_getdptbl(void);
150 
151 static struct classfuncs rt_classfuncs = {
152 	/* class ops */
153 	rt_admin,
154 	rt_getclinfo,
155 	rt_parmsin,
156 	rt_parmsout,
157 	rt_vaparmsin,
158 	rt_vaparmsout,
159 	rt_getclpri,
160 	rt_alloc,
161 	rt_free,
162 	/* thread ops */
163 	rt_enterclass,
164 	rt_exitclass,
165 	rt_canexit,
166 	rt_fork,
167 	rt_forkret,
168 	rt_parmsget,
169 	rt_parmsset,
170 	rt_nullsys,	/* stop */
171 	rt_nullsys,	/* exit */
172 	rt_nullsys,	/* active */
173 	rt_nullsys,	/* inactive */
174 	rt_swapin,
175 	rt_swapout,
176 	rt_nullsys,	/* trapret */
177 	rt_preempt,
178 	rt_setrun,
179 	rt_nullsys,	/* sleep */
180 	rt_tick,
181 	rt_wakeup,
182 	rt_donice,
183 	rt_globpri,
184 	rt_nullsys,	/* set_process_group */
185 	rt_yield,
186 	rt_doprio,
187 };
188 
189 /*
190  * Real-time class initialization. Called by dispinit() at boot time.
191  * We can ignore the clparmsz argument since we know that the smallest
192  * possible parameter buffer is big enough for us.
193  */
194 /* ARGSUSED */
195 pri_t
196 rt_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp)
197 {
198 	rt_dptbl = rt_getdptbl();
199 	rt_cid = cid;	/* Record our class ID */
200 
201 	/*
202 	 * Initialize the rtproc list.
203 	 */
204 	rt_plisthead.rt_next = rt_plisthead.rt_prev = &rt_plisthead;
205 
206 	/*
207 	 * We're required to return a pointer to our classfuncs
208 	 * structure and the highest global priority value we use.
209 	 */
210 	*clfuncspp = &rt_classfuncs;
211 	mutex_init(&rt_dptblock, NULL, MUTEX_DEFAULT, NULL);
212 	mutex_init(&rt_list_lock, NULL, MUTEX_DEFAULT, NULL);
213 	return (rt_dptbl[rt_maxpri].rt_globpri);
214 }
215 
216 /*
217  * Get or reset the rt_dptbl values per the user's request.
218  */
219 /* ARGSUSED */
220 static int
221 rt_admin(caddr_t uaddr, cred_t *reqpcredp)
222 {
223 	rtadmin_t	rtadmin;
224 	rtdpent_t	*tmpdpp;
225 	size_t		userdpsz;
226 	size_t		rtdpsz;
227 	int		i;
228 
229 	if (get_udatamodel() == DATAMODEL_NATIVE) {
230 		if (copyin(uaddr, &rtadmin, sizeof (rtadmin_t)))
231 			return (EFAULT);
232 	}
233 #ifdef _SYSCALL32_IMPL
234 	else {
235 		/* rtadmin struct from ILP32 callers */
236 		rtadmin32_t rtadmin32;
237 		if (copyin(uaddr, &rtadmin32, sizeof (rtadmin32_t)))
238 			return (EFAULT);
239 		rtadmin.rt_dpents =
240 		    (struct rtdpent *)(uintptr_t)rtadmin32.rt_dpents;
241 		rtadmin.rt_ndpents = rtadmin32.rt_ndpents;
242 		rtadmin.rt_cmd = rtadmin32.rt_cmd;
243 	}
244 #endif /* _SYSCALL32_IMPL */
245 
246 	rtdpsz = (rt_maxpri + 1) * sizeof (rtdpent_t);
247 
248 	switch (rtadmin.rt_cmd) {
249 
250 	case RT_GETDPSIZE:
251 		rtadmin.rt_ndpents = rt_maxpri + 1;
252 
253 		if (get_udatamodel() == DATAMODEL_NATIVE) {
254 			if (copyout(&rtadmin, uaddr, sizeof (rtadmin_t)))
255 				return (EFAULT);
256 		}
257 #ifdef _SYSCALL32_IMPL
258 		else {
259 			/* return rtadmin struct to ILP32 callers */
260 			rtadmin32_t rtadmin32;
261 			rtadmin32.rt_dpents =
262 			    (caddr32_t)(uintptr_t)rtadmin.rt_dpents;
263 			rtadmin32.rt_ndpents = rtadmin.rt_ndpents;
264 			rtadmin32.rt_cmd = rtadmin.rt_cmd;
265 			if (copyout(&rtadmin32, uaddr, sizeof (rtadmin32_t)))
266 				return (EFAULT);
267 		}
268 #endif /* _SYSCALL32_IMPL */
269 
270 		break;
271 
272 	case RT_GETDPTBL:
273 		userdpsz = MIN(rtadmin.rt_ndpents * sizeof (rtdpent_t),
274 		    rtdpsz);
275 		if (copyout(rt_dptbl, rtadmin.rt_dpents, userdpsz))
276 			return (EFAULT);
277 		rtadmin.rt_ndpents = userdpsz / sizeof (rtdpent_t);
278 
279 		if (get_udatamodel() == DATAMODEL_NATIVE) {
280 			if (copyout(&rtadmin, uaddr, sizeof (rtadmin_t)))
281 				return (EFAULT);
282 		}
283 #ifdef _SYSCALL32_IMPL
284 		else {
285 			/* return rtadmin struct to ILP32 callers */
286 			rtadmin32_t rtadmin32;
287 			rtadmin32.rt_dpents =
288 			    (caddr32_t)(uintptr_t)rtadmin.rt_dpents;
289 			rtadmin32.rt_ndpents = rtadmin.rt_ndpents;
290 			rtadmin32.rt_cmd = rtadmin.rt_cmd;
291 			if (copyout(&rtadmin32, uaddr, sizeof (rtadmin32_t)))
292 				return (EFAULT);
293 		}
294 #endif /* _SYSCALL32_IMPL */
295 		break;
296 
297 	case RT_SETDPTBL:
298 		/*
299 		 * We require that the requesting process has sufficient
300 		 * priveleges.  We also require that the table supplied by
301 		 * the user exactly match the current rt_dptbl in size.
302 		 */
303 		if (secpolicy_dispadm(reqpcredp) != 0)
304 			return (EPERM);
305 		if (rtadmin.rt_ndpents * sizeof (rtdpent_t) != rtdpsz)
306 			return (EINVAL);
307 
308 		/*
309 		 * We read the user supplied table into a temporary buffer
310 		 * where the time quantum values are validated before
311 		 * being copied to the rt_dptbl.
312 		 */
313 		tmpdpp = kmem_alloc(rtdpsz, KM_SLEEP);
314 		if (copyin(rtadmin.rt_dpents, tmpdpp, rtdpsz)) {
315 			kmem_free(tmpdpp, rtdpsz);
316 			return (EFAULT);
317 		}
318 		for (i = 0; i < rtadmin.rt_ndpents; i++) {
319 
320 			/*
321 			 * Validate the user supplied time quantum values.
322 			 */
323 			if (tmpdpp[i].rt_quantum <= 0 &&
324 			    tmpdpp[i].rt_quantum != RT_TQINF) {
325 				kmem_free(tmpdpp, rtdpsz);
326 				return (EINVAL);
327 			}
328 		}
329 
330 		/*
331 		 * Copy the user supplied values over the current rt_dptbl
332 		 * values.  The rt_globpri member is read-only so we don't
333 		 * overwrite it.
334 		 */
335 		mutex_enter(&rt_dptblock);
336 		for (i = 0; i < rtadmin.rt_ndpents; i++)
337 			rt_dptbl[i].rt_quantum = tmpdpp[i].rt_quantum;
338 		mutex_exit(&rt_dptblock);
339 		kmem_free(tmpdpp, rtdpsz);
340 		break;
341 
342 	default:
343 		return (EINVAL);
344 	}
345 	return (0);
346 }
347 
348 
349 /*
350  * Allocate a real-time class specific proc structure and
351  * initialize it with the parameters supplied. Also move thread
352  * to specified real-time priority.
353  */
354 /* ARGSUSED */
355 static int
356 rt_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp,
357     void *bufp)
358 {
359 	rtkparms_t *rtkparmsp = (rtkparms_t *)parmsp;
360 	rtproc_t *rtpp;
361 
362 	/*
363 	 * For a thread to enter the real-time class the thread
364 	 * which initiates the request must be privileged.
365 	 * This may have been checked previously but if our
366 	 * caller passed us a credential structure we assume it
367 	 * hasn't and we check it here.
368 	 */
369 	if (reqpcredp != NULL && secpolicy_setpriority(reqpcredp) != 0)
370 		return (EPERM);
371 
372 	rtpp = (rtproc_t *)bufp;
373 	ASSERT(rtpp != NULL);
374 
375 	/*
376 	 * If this thread's lwp is swapped out, it will be brought in
377 	 * when it is put onto the runqueue.
378 	 *
379 	 * Now, Initialize the rtproc structure.
380 	 */
381 	if (rtkparmsp == NULL) {
382 		/*
383 		 * Use default values
384 		 */
385 		rtpp->rt_pri = 0;
386 		rtpp->rt_pquantum = rt_dptbl[0].rt_quantum;
387 		rtpp->rt_tqsignal = 0;
388 	} else {
389 		/*
390 		 * Use supplied values
391 		 */
392 		if ((rtkparmsp->rt_cflags & RT_DOPRI) == 0)
393 			rtpp->rt_pri = 0;
394 		else
395 			rtpp->rt_pri = rtkparmsp->rt_pri;
396 
397 		if (rtkparmsp->rt_tqntm == RT_TQINF)
398 			rtpp->rt_pquantum = RT_TQINF;
399 		else if (rtkparmsp->rt_tqntm == RT_TQDEF ||
400 		    (rtkparmsp->rt_cflags & RT_DOTQ) == 0)
401 			rtpp->rt_pquantum = rt_dptbl[rtpp->rt_pri].rt_quantum;
402 		else
403 			rtpp->rt_pquantum = rtkparmsp->rt_tqntm;
404 
405 		if ((rtkparmsp->rt_cflags & RT_DOSIG) == 0)
406 			rtpp->rt_tqsignal = 0;
407 		else
408 			rtpp->rt_tqsignal = rtkparmsp->rt_tqsig;
409 	}
410 	rtpp->rt_flags = 0;
411 	rtpp->rt_tp = t;
412 	/*
413 	 * Reset thread priority
414 	 */
415 	thread_lock(t);
416 	t->t_clfuncs = &(sclass[cid].cl_funcs->thread);
417 	t->t_cid = cid;
418 	t->t_cldata = (void *)rtpp;
419 	t->t_schedflag &= ~TS_RUNQMATCH;
420 	rt_change_priority(t, rtpp);
421 	thread_unlock(t);
422 	/*
423 	 * Link new structure into rtproc list
424 	 */
425 	mutex_enter(&rt_list_lock);
426 	rtpp->rt_next = rt_plisthead.rt_next;
427 	rtpp->rt_prev = &rt_plisthead;
428 	rt_plisthead.rt_next->rt_prev = rtpp;
429 	rt_plisthead.rt_next = rtpp;
430 	mutex_exit(&rt_list_lock);
431 	return (0);
432 }
433 
434 
435 /*
436  * Free rtproc structure of thread.
437  */
438 static void
439 rt_exitclass(void *procp)
440 {
441 	rtproc_t *rtprocp = (rtproc_t *)procp;
442 
443 	mutex_enter(&rt_list_lock);
444 	rtprocp->rt_prev->rt_next = rtprocp->rt_next;
445 	rtprocp->rt_next->rt_prev = rtprocp->rt_prev;
446 	mutex_exit(&rt_list_lock);
447 	kmem_free(rtprocp, sizeof (rtproc_t));
448 }
449 
450 
451 /*
452  * Allocate and initialize real-time class specific
453  * proc structure for child.
454  */
455 /* ARGSUSED */
456 static int
457 rt_fork(kthread_t *t, kthread_t *ct, void *bufp)
458 {
459 	rtproc_t *prtpp;
460 	rtproc_t *crtpp;
461 
462 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
463 
464 	/*
465 	 * Initialize child's rtproc structure
466 	 */
467 	crtpp = (rtproc_t *)bufp;
468 	ASSERT(crtpp != NULL);
469 	prtpp = (rtproc_t *)t->t_cldata;
470 	thread_lock(t);
471 	crtpp->rt_timeleft = crtpp->rt_pquantum = prtpp->rt_pquantum;
472 	crtpp->rt_pri = prtpp->rt_pri;
473 	crtpp->rt_flags = prtpp->rt_flags & ~RTBACKQ;
474 	crtpp->rt_tqsignal = prtpp->rt_tqsignal;
475 
476 	crtpp->rt_tp = ct;
477 	thread_unlock(t);
478 
479 	/*
480 	 * Link new structure into rtproc list
481 	 */
482 	ct->t_cldata = (void *)crtpp;
483 	mutex_enter(&rt_list_lock);
484 	crtpp->rt_next = rt_plisthead.rt_next;
485 	crtpp->rt_prev = &rt_plisthead;
486 	rt_plisthead.rt_next->rt_prev = crtpp;
487 	rt_plisthead.rt_next = crtpp;
488 	mutex_exit(&rt_list_lock);
489 	return (0);
490 }
491 
492 
493 /*
494  * The child goes to the back of its dispatcher queue while the
495  * parent continues to run after a real time thread forks.
496  */
497 /* ARGSUSED */
498 static void
499 rt_forkret(kthread_t *t, kthread_t *ct)
500 {
501 	proc_t *pp = ttoproc(t);
502 	proc_t *cp = ttoproc(ct);
503 
504 	ASSERT(t == curthread);
505 	ASSERT(MUTEX_HELD(&pidlock));
506 
507 	/*
508 	 * Grab the child's p_lock before dropping pidlock to ensure
509 	 * the process does not disappear before we set it running.
510 	 */
511 	mutex_enter(&cp->p_lock);
512 	mutex_exit(&pidlock);
513 	continuelwps(cp);
514 	mutex_exit(&cp->p_lock);
515 
516 	mutex_enter(&pp->p_lock);
517 	continuelwps(pp);
518 	mutex_exit(&pp->p_lock);
519 }
520 
521 
522 /*
523  * Get information about the real-time class into the buffer
524  * pointed to by rtinfop.  The maximum configured real-time
525  * priority is the only information we supply.  We ignore the
526  * class and credential arguments because anyone can have this
527  * information.
528  */
529 /* ARGSUSED */
530 static int
531 rt_getclinfo(void *infop)
532 {
533 	rtinfo_t *rtinfop = (rtinfo_t *)infop;
534 	rtinfop->rt_maxpri = rt_maxpri;
535 	return (0);
536 }
537 
538 /*
539  * Return the user mode scheduling priority range.
540  */
541 static int
542 rt_getclpri(pcpri_t *pcprip)
543 {
544 	pcprip->pc_clpmax = rt_maxpri;
545 	pcprip->pc_clpmin = 0;
546 	return (0);
547 }
548 
549 static void
550 rt_nullsys()
551 {
552 }
553 
554 /* ARGSUSED */
555 static int
556 rt_canexit(kthread_t *t, cred_t *cred)
557 {
558 	/*
559 	 * Thread can always leave RT class
560 	 */
561 	return (0);
562 }
563 
564 /*
565  * Get the real-time scheduling parameters of the thread pointed to by
566  * rtprocp into the buffer pointed to by rtkparmsp.
567  */
568 static void
569 rt_parmsget(kthread_t *t, void *parmsp)
570 {
571 	rtproc_t	*rtprocp = (rtproc_t *)t->t_cldata;
572 	rtkparms_t	*rtkparmsp = (rtkparms_t *)parmsp;
573 
574 	rtkparmsp->rt_pri = rtprocp->rt_pri;
575 	rtkparmsp->rt_tqntm = rtprocp->rt_pquantum;
576 	rtkparmsp->rt_tqsig = rtprocp->rt_tqsignal;
577 }
578 
579 
580 
581 /*
582  * Check the validity of the real-time parameters in the buffer
583  * pointed to by rtprmsp.
584  * We convert the rtparms buffer from the user supplied format to
585  * our internal format (i.e. time quantum expressed in ticks).
586  */
587 static int
588 rt_parmsin(void *prmsp)
589 {
590 	rtparms_t *rtprmsp = (rtparms_t *)prmsp;
591 	longlong_t	ticks;
592 	uint_t		cflags;
593 
594 	/*
595 	 * First check the validity of parameters and convert
596 	 * the buffer to kernel format.
597 	 */
598 	if ((rtprmsp->rt_pri < 0 || rtprmsp->rt_pri > rt_maxpri) &&
599 	    rtprmsp->rt_pri != RT_NOCHANGE)
600 		return (EINVAL);
601 
602 	cflags = (rtprmsp->rt_pri != RT_NOCHANGE ? RT_DOPRI : 0);
603 
604 	if ((rtprmsp->rt_tqsecs == 0 && rtprmsp->rt_tqnsecs == 0) ||
605 	    rtprmsp->rt_tqnsecs >= NANOSEC)
606 		return (EINVAL);
607 
608 	if (rtprmsp->rt_tqnsecs != RT_NOCHANGE)
609 		cflags |= RT_DOTQ;
610 
611 	if (rtprmsp->rt_tqnsecs >= 0) {
612 		if ((ticks = SEC_TO_TICK((longlong_t)rtprmsp->rt_tqsecs) +
613 		    NSEC_TO_TICK_ROUNDUP(rtprmsp->rt_tqnsecs)) > INT_MAX)
614 			return (ERANGE);
615 
616 		((rtkparms_t *)rtprmsp)->rt_tqntm = (int)ticks;
617 	} else {
618 		if (rtprmsp->rt_tqnsecs != RT_NOCHANGE &&
619 		    rtprmsp->rt_tqnsecs != RT_TQINF &&
620 		    rtprmsp->rt_tqnsecs != RT_TQDEF)
621 			return (EINVAL);
622 
623 		((rtkparms_t *)rtprmsp)->rt_tqntm = rtprmsp->rt_tqnsecs;
624 	}
625 	((rtkparms_t *)rtprmsp)->rt_cflags = cflags;
626 
627 	return (0);
628 }
629 
630 
631 /*
632  * Check the validity of the real-time parameters in the pc_vaparms_t
633  * structure vaparmsp and put them in the buffer pointed to by rtprmsp.
634  * pc_vaparms_t contains (key, value) pairs of parameter.
635  * rt_vaparmsin() is the variable parameter version of rt_parmsin().
636  */
637 static int
638 rt_vaparmsin(void *prmsp, pc_vaparms_t *vaparmsp)
639 {
640 	uint_t		secs = 0;
641 	uint_t		cnt;
642 	int		nsecs = 0;
643 	int		priflag, secflag, nsecflag, sigflag;
644 	longlong_t	ticks;
645 	rtkparms_t	*rtprmsp = (rtkparms_t *)prmsp;
646 	pc_vaparm_t	*vpp = &vaparmsp->pc_parms[0];
647 
648 
649 	/*
650 	 * First check the validity of parameters and convert them
651 	 * from the user supplied format to the internal format.
652 	 */
653 	priflag = secflag = nsecflag = sigflag = 0;
654 	rtprmsp->rt_cflags = 0;
655 
656 	if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
657 		return (EINVAL);
658 
659 	for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
660 
661 		switch (vpp->pc_key) {
662 		case RT_KY_PRI:
663 			if (priflag++)
664 				return (EINVAL);
665 			rtprmsp->rt_cflags |= RT_DOPRI;
666 			rtprmsp->rt_pri = (pri_t)vpp->pc_parm;
667 			if (rtprmsp->rt_pri < 0 || rtprmsp->rt_pri > rt_maxpri)
668 				return (EINVAL);
669 			break;
670 
671 		case RT_KY_TQSECS:
672 			if (secflag++)
673 				return (EINVAL);
674 			rtprmsp->rt_cflags |= RT_DOTQ;
675 			secs = (uint_t)vpp->pc_parm;
676 			break;
677 
678 		case RT_KY_TQNSECS:
679 			if (nsecflag++)
680 				return (EINVAL);
681 			rtprmsp->rt_cflags |= RT_DOTQ;
682 			nsecs = (int)vpp->pc_parm;
683 			break;
684 
685 		case RT_KY_TQSIG:
686 			if (sigflag++)
687 				return (EINVAL);
688 			rtprmsp->rt_cflags |= RT_DOSIG;
689 			rtprmsp->rt_tqsig = (int)vpp->pc_parm;
690 			if (rtprmsp->rt_tqsig < 0 || rtprmsp->rt_tqsig >= NSIG)
691 				return (EINVAL);
692 			break;
693 
694 		default:
695 			return (EINVAL);
696 		}
697 	}
698 
699 	if (vaparmsp->pc_vaparmscnt == 0) {
700 		/*
701 		 * Use default parameters.
702 		 */
703 		rtprmsp->rt_pri = 0;
704 		rtprmsp->rt_tqntm = RT_TQDEF;
705 		rtprmsp->rt_tqsig = 0;
706 		rtprmsp->rt_cflags = RT_DOPRI | RT_DOTQ | RT_DOSIG;
707 	} else if ((rtprmsp->rt_cflags & RT_DOTQ) != 0) {
708 		if ((secs == 0 && nsecs == 0) || nsecs >= NANOSEC)
709 			return (EINVAL);
710 
711 		if (nsecs >= 0) {
712 			if ((ticks = SEC_TO_TICK((longlong_t)secs) +
713 			    NSEC_TO_TICK_ROUNDUP(nsecs)) > INT_MAX)
714 				return (ERANGE);
715 
716 			rtprmsp->rt_tqntm = (int)ticks;
717 		} else {
718 			if (nsecs != RT_TQINF && nsecs != RT_TQDEF)
719 				return (EINVAL);
720 			rtprmsp->rt_tqntm = nsecs;
721 		}
722 	}
723 
724 	return (0);
725 }
726 
727 /*
728  * Do required processing on the real-time parameter buffer
729  * before it is copied out to the user.
730  * All we have to do is convert the buffer from kernel to user format
731  * (i.e. convert time quantum from ticks to seconds-nanoseconds).
732  */
733 /* ARGSUSED */
734 static int
735 rt_parmsout(void *prmsp, pc_vaparms_t *vaparmsp)
736 {
737 	rtkparms_t	*rtkprmsp = (rtkparms_t *)prmsp;
738 
739 	if (vaparmsp != NULL)
740 		return (0);
741 
742 	if (rtkprmsp->rt_tqntm < 0) {
743 		/*
744 		 * Quantum field set to special value (e.g. RT_TQINF)
745 		 */
746 		((rtparms_t *)rtkprmsp)->rt_tqnsecs = rtkprmsp->rt_tqntm;
747 		((rtparms_t *)rtkprmsp)->rt_tqsecs = 0;
748 	} else {
749 		/* Convert quantum from ticks to seconds-nanoseconds */
750 
751 		timestruc_t ts;
752 		TICK_TO_TIMESTRUC(rtkprmsp->rt_tqntm, &ts);
753 		((rtparms_t *)rtkprmsp)->rt_tqsecs = ts.tv_sec;
754 		((rtparms_t *)rtkprmsp)->rt_tqnsecs = ts.tv_nsec;
755 	}
756 
757 	return (0);
758 }
759 
760 
761 /*
762  * Copy all selected real-time class parameters to the user.
763  * The parameters are specified by a key.
764  */
765 static int
766 rt_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp)
767 {
768 	rtkparms_t	*rtkprmsp = (rtkparms_t *)prmsp;
769 	timestruc_t	ts;
770 	uint_t		cnt;
771 	uint_t		secs;
772 	int		nsecs;
773 	int		priflag, secflag, nsecflag, sigflag;
774 	pc_vaparm_t	*vpp = &vaparmsp->pc_parms[0];
775 
776 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
777 
778 	priflag = secflag = nsecflag = sigflag = 0;
779 
780 	if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
781 		return (EINVAL);
782 
783 	if (rtkprmsp->rt_tqntm < 0) {
784 		/*
785 		 * Quantum field set to special value (e.g. RT_TQINF).
786 		 */
787 		secs = 0;
788 		nsecs = rtkprmsp->rt_tqntm;
789 	} else {
790 		/*
791 		 * Convert quantum from ticks to seconds-nanoseconds.
792 		 */
793 		TICK_TO_TIMESTRUC(rtkprmsp->rt_tqntm, &ts);
794 		secs = ts.tv_sec;
795 		nsecs = ts.tv_nsec;
796 	}
797 
798 
799 	for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
800 
801 		switch (vpp->pc_key) {
802 		case RT_KY_PRI:
803 			if (priflag++)
804 				return (EINVAL);
805 			if (copyout(&rtkprmsp->rt_pri,
806 			    (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
807 				return (EFAULT);
808 			break;
809 
810 		case RT_KY_TQSECS:
811 			if (secflag++)
812 				return (EINVAL);
813 			if (copyout(&secs, (caddr_t)(uintptr_t)vpp->pc_parm,
814 			    sizeof (uint_t)))
815 				return (EFAULT);
816 			break;
817 
818 		case RT_KY_TQNSECS:
819 			if (nsecflag++)
820 				return (EINVAL);
821 			if (copyout(&nsecs, (caddr_t)(uintptr_t)vpp->pc_parm,
822 			    sizeof (int)))
823 				return (EFAULT);
824 			break;
825 
826 		case RT_KY_TQSIG:
827 			if (sigflag++)
828 				return (EINVAL);
829 			if (copyout(&rtkprmsp->rt_tqsig,
830 			    (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (int)))
831 				return (EFAULT);
832 			break;
833 
834 		default:
835 			return (EINVAL);
836 		}
837 	}
838 
839 	return (0);
840 }
841 
842 
843 /*
844  * Set the scheduling parameters of the thread pointed to by rtprocp
845  * to those specified in the buffer pointed to by rtkprmsp.
846  * Note that the parameters are expected to be in kernel format
847  * (i.e. time quantm expressed in ticks).  Real time parameters copied
848  * in from the user should be processed by rt_parmsin() before they are
849  * passed to this function.
850  */
851 static int
852 rt_parmsset(kthread_t *tx, void *prmsp, id_t reqpcid, cred_t *reqpcredp)
853 {
854 	rtkparms_t *rtkprmsp = (rtkparms_t *)prmsp;
855 	rtproc_t *rtpp = (rtproc_t *)tx->t_cldata;
856 
857 	ASSERT(MUTEX_HELD(&(ttoproc(tx))->p_lock));
858 
859 	/*
860 	 * Basic permissions enforced by generic kernel code
861 	 * for all classes require that a thread attempting
862 	 * to change the scheduling parameters of a target thread
863 	 * be privileged or have a real or effective UID
864 	 * matching that of the target thread. We are not
865 	 * called unless these basic permission checks have
866 	 * already passed. The real-time class requires in addition
867 	 * that the requesting thread be real-time unless it is privileged.
868 	 * This may also have been checked previously but if our caller
869 	 * passes us a credential structure we assume it hasn't and
870 	 * we check it here.
871 	 */
872 	if (reqpcredp != NULL && reqpcid != rt_cid &&
873 	    secpolicy_setpriority(reqpcredp) != 0)
874 		return (EPERM);
875 
876 	thread_lock(tx);
877 	if ((rtkprmsp->rt_cflags & RT_DOPRI) != 0) {
878 		rtpp->rt_pri = rtkprmsp->rt_pri;
879 		rt_change_priority(tx, rtpp);
880 	}
881 	if (rtkprmsp->rt_tqntm == RT_TQINF)
882 		rtpp->rt_pquantum = RT_TQINF;
883 	else if (rtkprmsp->rt_tqntm == RT_TQDEF)
884 		rtpp->rt_timeleft = rtpp->rt_pquantum =
885 		    rt_dptbl[rtpp->rt_pri].rt_quantum;
886 	else if ((rtkprmsp->rt_cflags & RT_DOTQ) != 0)
887 		rtpp->rt_timeleft = rtpp->rt_pquantum = rtkprmsp->rt_tqntm;
888 
889 	if ((rtkprmsp->rt_cflags & RT_DOSIG) != 0)
890 		rtpp->rt_tqsignal = rtkprmsp->rt_tqsig;
891 
892 	thread_unlock(tx);
893 	return (0);
894 }
895 
896 
897 /*
898  * Arrange for thread to be placed in appropriate location
899  * on dispatcher queue.  Runs at splhi() since the clock
900  * interrupt can cause RTBACKQ to be set.
901  */
902 static void
903 rt_preempt(kthread_t *t)
904 {
905 	rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
906 	klwp_t *lwp;
907 
908 	ASSERT(THREAD_LOCK_HELD(t));
909 
910 	/*
911 	 * If the state is user I allow swapping because I know I won't
912 	 * be holding any locks.
913 	 */
914 	if ((lwp = curthread->t_lwp) != NULL && lwp->lwp_state == LWP_USER)
915 		t->t_schedflag &= ~TS_DONT_SWAP;
916 	if ((rtpp->rt_flags & RTBACKQ) != 0) {
917 		rtpp->rt_timeleft = rtpp->rt_pquantum;
918 		rtpp->rt_flags &= ~RTBACKQ;
919 		setbackdq(t);
920 	} else
921 		setfrontdq(t);
922 
923 }
924 
925 /*
926  * Return the global priority associated with this rt_pri.
927  */
928 static pri_t
929 rt_globpri(kthread_t *t)
930 {
931 	rtproc_t *rtprocp = (rtproc_t *)t->t_cldata;
932 	return (rt_dptbl[rtprocp->rt_pri].rt_globpri);
933 }
934 
935 static void
936 rt_setrun(kthread_t *t)
937 {
938 	rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
939 
940 	ASSERT(THREAD_LOCK_HELD(t));
941 
942 	rtpp->rt_timeleft = rtpp->rt_pquantum;
943 	rtpp->rt_flags &= ~RTBACKQ;
944 	setbackdq(t);
945 }
946 
947 /*
948  * Returns the priority of the thread, -1 if the thread is loaded or ineligible
949  * for swapin.
950  *
951  * FX and RT threads are designed so that they don't swapout; however, it
952  * is possible that while the thread is swapped out and in another class, it
953  * can be changed to FX or RT.  Since these threads should be swapped in as
954  * soon as they're runnable, rt_swapin returns SHRT_MAX, and fx_swapin
955  * returns SHRT_MAX - 1, so that it gives deference to any swapped out RT
956  * threads.
957  */
958 /* ARGSUSED */
959 static pri_t
960 rt_swapin(kthread_t *t, int flags)
961 {
962 	pri_t	tpri = -1;
963 
964 	ASSERT(THREAD_LOCK_HELD(t));
965 
966 	if (t->t_state == TS_RUN && (t->t_schedflag & TS_LOAD) == 0) {
967 		tpri = (pri_t)SHRT_MAX;
968 	}
969 
970 	return (tpri);
971 }
972 
973 /*
974  * Return an effective priority for swapout.
975  */
976 /* ARGSUSED */
977 static pri_t
978 rt_swapout(kthread_t *t, int flags)
979 {
980 	ASSERT(THREAD_LOCK_HELD(t));
981 
982 	return (-1);
983 }
984 
985 /*
986  * Check for time slice expiration (unless thread has infinite time
987  * slice).  If time slice has expired arrange for thread to be preempted
988  * and placed on back of queue.
989  */
990 static void
991 rt_tick(kthread_t *t)
992 {
993 	rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
994 
995 	ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
996 
997 	thread_lock(t);
998 	if ((rtpp->rt_pquantum != RT_TQINF && --rtpp->rt_timeleft == 0) ||
999 	    (t->t_state == TS_ONPROC && DISP_MUST_SURRENDER(t))) {
1000 		if (rtpp->rt_timeleft == 0 && rtpp->rt_tqsignal) {
1001 			thread_unlock(t);
1002 			sigtoproc(ttoproc(t), t, rtpp->rt_tqsignal);
1003 			thread_lock(t);
1004 		}
1005 		rtpp->rt_flags |= RTBACKQ;
1006 		cpu_surrender(t);
1007 	}
1008 	thread_unlock(t);
1009 }
1010 
1011 
1012 /*
1013  * Place the thread waking up on the dispatcher queue.
1014  */
1015 static void
1016 rt_wakeup(kthread_t *t)
1017 {
1018 	rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
1019 
1020 	ASSERT(THREAD_LOCK_HELD(t));
1021 
1022 	rtpp->rt_timeleft = rtpp->rt_pquantum;
1023 	rtpp->rt_flags &= ~RTBACKQ;
1024 	setbackdq(t);
1025 }
1026 
1027 static void
1028 rt_yield(kthread_t *t)
1029 {
1030 	rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
1031 
1032 	ASSERT(t == curthread);
1033 	ASSERT(THREAD_LOCK_HELD(t));
1034 
1035 	rtpp->rt_flags &= ~RTBACKQ;
1036 	setbackdq(t);
1037 }
1038 
1039 /* ARGSUSED */
1040 static int
1041 rt_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp)
1042 {
1043 	return (EINVAL);
1044 }
1045 
1046 /*
1047  * Increment the priority of the specified thread by incr and
1048  * return the new value in *retvalp.
1049  */
1050 static int
1051 rt_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp)
1052 {
1053 	int newpri;
1054 	rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
1055 	rtkparms_t rtkparms;
1056 
1057 	/* If there's no change to the priority, just return current setting */
1058 	if (incr == 0) {
1059 		*retvalp = rtpp->rt_pri;
1060 		return (0);
1061 	}
1062 
1063 	newpri = rtpp->rt_pri + incr;
1064 	if (newpri > rt_maxpri || newpri < 0)
1065 		return (EINVAL);
1066 
1067 	*retvalp = newpri;
1068 	rtkparms.rt_pri = newpri;
1069 	rtkparms.rt_tqntm = RT_NOCHANGE;
1070 	rtkparms.rt_tqsig = 0;
1071 	rtkparms.rt_cflags = RT_DOPRI;
1072 	return (rt_parmsset(t, &rtkparms, rt_cid, cr));
1073 }
1074 
1075 static int
1076 rt_alloc(void **p, int flag)
1077 {
1078 	void *bufp;
1079 	bufp = kmem_alloc(sizeof (rtproc_t), flag);
1080 	if (bufp == NULL) {
1081 		return (ENOMEM);
1082 	} else {
1083 		*p = bufp;
1084 		return (0);
1085 	}
1086 }
1087 
1088 static void
1089 rt_free(void *bufp)
1090 {
1091 	if (bufp)
1092 		kmem_free(bufp, sizeof (rtproc_t));
1093 }
1094 
1095 static void
1096 rt_change_priority(kthread_t *t, rtproc_t *rtpp)
1097 {
1098 	pri_t new_pri;
1099 
1100 	ASSERT(THREAD_LOCK_HELD(t));
1101 
1102 	new_pri = rt_dptbl[rtpp->rt_pri].rt_globpri;
1103 
1104 	t->t_cpri = rtpp->rt_pri;
1105 	if (t == curthread || t->t_state == TS_ONPROC) {
1106 		cpu_t	*cp = t->t_disp_queue->disp_cpu;
1107 		THREAD_CHANGE_PRI(t, new_pri);
1108 		if (t == cp->cpu_dispthread)
1109 			cp->cpu_dispatch_pri = DISP_PRIO(t);
1110 		if (DISP_MUST_SURRENDER(t)) {
1111 			rtpp->rt_flags |= RTBACKQ;
1112 			cpu_surrender(t);
1113 		} else {
1114 			rtpp->rt_timeleft = rtpp->rt_pquantum;
1115 		}
1116 	} else {
1117 		/*
1118 		 * When the priority of a thread is changed,
1119 		 * it may be necessary to adjust its position
1120 		 * on a sleep queue or dispatch queue.  The
1121 		 * function thread_change_pri() accomplishes this.
1122 		 */
1123 		if (thread_change_pri(t, new_pri, 0)) {
1124 			/*
1125 			 * The thread was on a run queue.
1126 			 * Reset its CPU timeleft.
1127 			 */
1128 			rtpp->rt_timeleft = rtpp->rt_pquantum;
1129 		} else {
1130 			rtpp->rt_flags |= RTBACKQ;
1131 		}
1132 	}
1133 }
1134