1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/param.h> 30 #include <sys/sysmacros.h> 31 #include <sys/cred.h> 32 #include <sys/proc.h> 33 #include <sys/session.h> 34 #include <sys/strsubr.h> 35 #include <sys/user.h> 36 #include <sys/priocntl.h> 37 #include <sys/class.h> 38 #include <sys/disp.h> 39 #include <sys/procset.h> 40 #include <sys/debug.h> 41 #include <sys/kmem.h> 42 #include <sys/errno.h> 43 #include <sys/fx.h> 44 #include <sys/fxpriocntl.h> 45 #include <sys/cpuvar.h> 46 #include <sys/systm.h> 47 #include <sys/vtrace.h> 48 #include <sys/schedctl.h> 49 #include <sys/tnf_probe.h> 50 #include <sys/sunddi.h> 51 #include <sys/spl.h> 52 #include <sys/modctl.h> 53 #include <sys/policy.h> 54 #include <sys/sdt.h> 55 #include <sys/cpupart.h> 56 #include <sys/cpucaps.h> 57 58 static pri_t fx_init(id_t, int, classfuncs_t **); 59 60 static struct sclass csw = { 61 "FX", 62 fx_init, 63 0 64 }; 65 66 static struct modlsched modlsched = { 67 &mod_schedops, "Fixed priority sched class", &csw 68 }; 69 70 static struct modlinkage modlinkage = { 71 MODREV_1, (void *)&modlsched, NULL 72 }; 73 74 75 /* 76 * control flags (kparms->fx_cflags). 77 */ 78 #define FX_DOUPRILIM 0x01 /* change user priority limit */ 79 #define FX_DOUPRI 0x02 /* change user priority */ 80 #define FX_DOTQ 0x04 /* change FX time quantum */ 81 82 83 #define FXMAXUPRI 60 /* maximum user priority setting */ 84 85 #define FX_MAX_UNPRIV_PRI 0 /* maximum unpriviledge priority */ 86 87 /* 88 * The fxproc_t structures that have a registered callback vector, 89 * are also kept in an array of circular doubly linked lists. A hash on 90 * the thread id (from ddi_get_kt_did()) is used to determine which list 91 * each of such fxproc structures should be placed. Each list has a dummy 92 * "head" which is never removed, so the list is never empty. 93 */ 94 95 #define FX_CB_LISTS 16 /* number of lists, must be power of 2 */ 96 #define FX_CB_LIST_HASH(ktid) ((uint_t)ktid & (FX_CB_LISTS - 1)) 97 98 /* Insert fxproc into callback list */ 99 #define FX_CB_LIST_INSERT(fxpp) \ 100 { \ 101 int index = FX_CB_LIST_HASH(fxpp->fx_ktid); \ 102 kmutex_t *lockp = &fx_cb_list_lock[index]; \ 103 fxproc_t *headp = &fx_cb_plisthead[index]; \ 104 mutex_enter(lockp); \ 105 fxpp->fx_cb_next = headp->fx_cb_next; \ 106 fxpp->fx_cb_prev = headp; \ 107 headp->fx_cb_next->fx_cb_prev = fxpp; \ 108 headp->fx_cb_next = fxpp; \ 109 mutex_exit(lockp); \ 110 } 111 112 /* 113 * Remove thread from callback list. 114 */ 115 #define FX_CB_LIST_DELETE(fxpp) \ 116 { \ 117 int index = FX_CB_LIST_HASH(fxpp->fx_ktid); \ 118 kmutex_t *lockp = &fx_cb_list_lock[index]; \ 119 mutex_enter(lockp); \ 120 fxpp->fx_cb_prev->fx_cb_next = fxpp->fx_cb_next; \ 121 fxpp->fx_cb_next->fx_cb_prev = fxpp->fx_cb_prev; \ 122 mutex_exit(lockp); \ 123 } 124 125 #define FX_HAS_CB(fxpp) (fxpp->fx_callback != NULL) 126 127 /* adjust x to be between 0 and fx_maxumdpri */ 128 129 #define FX_ADJUST_PRI(pri) \ 130 { \ 131 if (pri < 0) \ 132 pri = 0; \ 133 else if (pri > fx_maxumdpri) \ 134 pri = fx_maxumdpri; \ 135 } 136 137 #define FX_ADJUST_QUANTUM(q) \ 138 { \ 139 if (q > INT_MAX) \ 140 q = INT_MAX; \ 141 else if (q <= 0) \ 142 q = FX_TQINF; \ 143 } 144 145 #define FX_ISVALID(pri, quantum) \ 146 (((pri >= 0) || (pri == FX_CB_NOCHANGE)) && \ 147 ((quantum >= 0) || (quantum == FX_NOCHANGE) || \ 148 (quantum == FX_TQDEF) || (quantum == FX_TQINF))) 149 150 151 static id_t fx_cid; /* fixed priority class ID */ 152 static fxdpent_t *fx_dptbl; /* fixed priority disp parameter table */ 153 154 static pri_t fx_maxupri = FXMAXUPRI; 155 static pri_t fx_maxumdpri; /* max user mode fixed priority */ 156 157 static pri_t fx_maxglobpri; /* maximum global priority used by fx class */ 158 static kmutex_t fx_dptblock; /* protects fixed priority dispatch table */ 159 160 161 static kmutex_t fx_cb_list_lock[FX_CB_LISTS]; /* protects list of fxprocs */ 162 /* that have callbacks */ 163 static fxproc_t fx_cb_plisthead[FX_CB_LISTS]; /* dummy fxproc at head of */ 164 /* list of fxprocs with */ 165 /* callbacks */ 166 167 static int fx_admin(caddr_t, cred_t *); 168 static int fx_getclinfo(void *); 169 static int fx_parmsin(void *); 170 static int fx_parmsout(void *, pc_vaparms_t *); 171 static int fx_vaparmsin(void *, pc_vaparms_t *); 172 static int fx_vaparmsout(void *, pc_vaparms_t *); 173 static int fx_getclpri(pcpri_t *); 174 static int fx_alloc(void **, int); 175 static void fx_free(void *); 176 static int fx_enterclass(kthread_t *, id_t, void *, cred_t *, void *); 177 static void fx_exitclass(void *); 178 static int fx_canexit(kthread_t *, cred_t *); 179 static int fx_fork(kthread_t *, kthread_t *, void *); 180 static void fx_forkret(kthread_t *, kthread_t *); 181 static void fx_parmsget(kthread_t *, void *); 182 static int fx_parmsset(kthread_t *, void *, id_t, cred_t *); 183 static void fx_stop(kthread_t *, int, int); 184 static void fx_exit(kthread_t *); 185 static pri_t fx_swapin(kthread_t *, int); 186 static pri_t fx_swapout(kthread_t *, int); 187 static void fx_trapret(kthread_t *); 188 static void fx_preempt(kthread_t *); 189 static void fx_setrun(kthread_t *); 190 static void fx_sleep(kthread_t *); 191 static void fx_tick(kthread_t *); 192 static void fx_wakeup(kthread_t *); 193 static int fx_donice(kthread_t *, cred_t *, int, int *); 194 static pri_t fx_globpri(kthread_t *); 195 static void fx_yield(kthread_t *); 196 static void fx_nullsys(); 197 198 extern fxdpent_t *fx_getdptbl(void); 199 200 static void fx_change_priority(kthread_t *, fxproc_t *); 201 static fxproc_t *fx_list_lookup(kt_did_t); 202 static void fx_list_release(fxproc_t *); 203 204 205 static struct classfuncs fx_classfuncs = { 206 /* class functions */ 207 fx_admin, 208 fx_getclinfo, 209 fx_parmsin, 210 fx_parmsout, 211 fx_vaparmsin, 212 fx_vaparmsout, 213 fx_getclpri, 214 fx_alloc, 215 fx_free, 216 217 /* thread functions */ 218 fx_enterclass, 219 fx_exitclass, 220 fx_canexit, 221 fx_fork, 222 fx_forkret, 223 fx_parmsget, 224 fx_parmsset, 225 fx_stop, 226 fx_exit, 227 fx_nullsys, /* active */ 228 fx_nullsys, /* inactive */ 229 fx_swapin, 230 fx_swapout, 231 fx_trapret, 232 fx_preempt, 233 fx_setrun, 234 fx_sleep, 235 fx_tick, 236 fx_wakeup, 237 fx_donice, 238 fx_globpri, 239 fx_nullsys, /* set_process_group */ 240 fx_yield, 241 }; 242 243 244 int 245 _init() 246 { 247 return (mod_install(&modlinkage)); 248 } 249 250 int 251 _fini() 252 { 253 return (EBUSY); 254 } 255 256 int 257 _info(struct modinfo *modinfop) 258 { 259 return (mod_info(&modlinkage, modinfop)); 260 } 261 262 /* 263 * Fixed priority class initialization. Called by dispinit() at boot time. 264 * We can ignore the clparmsz argument since we know that the smallest 265 * possible parameter buffer is big enough for us. 266 */ 267 /* ARGSUSED */ 268 static pri_t 269 fx_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp) 270 { 271 int i; 272 extern pri_t fx_getmaxumdpri(void); 273 274 fx_dptbl = fx_getdptbl(); 275 fx_maxumdpri = fx_getmaxumdpri(); 276 fx_maxglobpri = fx_dptbl[fx_maxumdpri].fx_globpri; 277 278 fx_cid = cid; /* Record our class ID */ 279 280 /* 281 * Initialize the hash table for fxprocs with callbacks 282 */ 283 for (i = 0; i < FX_CB_LISTS; i++) { 284 fx_cb_plisthead[i].fx_cb_next = fx_cb_plisthead[i].fx_cb_prev = 285 &fx_cb_plisthead[i]; 286 } 287 288 /* 289 * We're required to return a pointer to our classfuncs 290 * structure and the highest global priority value we use. 291 */ 292 *clfuncspp = &fx_classfuncs; 293 return (fx_maxglobpri); 294 } 295 296 /* 297 * Get or reset the fx_dptbl values per the user's request. 298 */ 299 static int 300 fx_admin(caddr_t uaddr, cred_t *reqpcredp) 301 { 302 fxadmin_t fxadmin; 303 fxdpent_t *tmpdpp; 304 int userdpsz; 305 int i; 306 size_t fxdpsz; 307 308 if (get_udatamodel() == DATAMODEL_NATIVE) { 309 if (copyin(uaddr, &fxadmin, sizeof (fxadmin_t))) 310 return (EFAULT); 311 } 312 #ifdef _SYSCALL32_IMPL 313 else { 314 /* get fxadmin struct from ILP32 caller */ 315 fxadmin32_t fxadmin32; 316 if (copyin(uaddr, &fxadmin32, sizeof (fxadmin32_t))) 317 return (EFAULT); 318 fxadmin.fx_dpents = 319 (struct fxdpent *)(uintptr_t)fxadmin32.fx_dpents; 320 fxadmin.fx_ndpents = fxadmin32.fx_ndpents; 321 fxadmin.fx_cmd = fxadmin32.fx_cmd; 322 } 323 #endif /* _SYSCALL32_IMPL */ 324 325 fxdpsz = (fx_maxumdpri + 1) * sizeof (fxdpent_t); 326 327 switch (fxadmin.fx_cmd) { 328 case FX_GETDPSIZE: 329 fxadmin.fx_ndpents = fx_maxumdpri + 1; 330 331 if (get_udatamodel() == DATAMODEL_NATIVE) { 332 if (copyout(&fxadmin, uaddr, sizeof (fxadmin_t))) 333 return (EFAULT); 334 } 335 #ifdef _SYSCALL32_IMPL 336 else { 337 /* return fxadmin struct to ILP32 caller */ 338 fxadmin32_t fxadmin32; 339 fxadmin32.fx_dpents = 340 (caddr32_t)(uintptr_t)fxadmin.fx_dpents; 341 fxadmin32.fx_ndpents = fxadmin.fx_ndpents; 342 fxadmin32.fx_cmd = fxadmin.fx_cmd; 343 if (copyout(&fxadmin32, uaddr, sizeof (fxadmin32_t))) 344 return (EFAULT); 345 } 346 #endif /* _SYSCALL32_IMPL */ 347 break; 348 349 case FX_GETDPTBL: 350 userdpsz = MIN(fxadmin.fx_ndpents * sizeof (fxdpent_t), 351 fxdpsz); 352 if (copyout(fx_dptbl, fxadmin.fx_dpents, userdpsz)) 353 return (EFAULT); 354 355 fxadmin.fx_ndpents = userdpsz / sizeof (fxdpent_t); 356 357 if (get_udatamodel() == DATAMODEL_NATIVE) { 358 if (copyout(&fxadmin, uaddr, sizeof (fxadmin_t))) 359 return (EFAULT); 360 } 361 #ifdef _SYSCALL32_IMPL 362 else { 363 /* return fxadmin struct to ILP32 callers */ 364 fxadmin32_t fxadmin32; 365 fxadmin32.fx_dpents = 366 (caddr32_t)(uintptr_t)fxadmin.fx_dpents; 367 fxadmin32.fx_ndpents = fxadmin.fx_ndpents; 368 fxadmin32.fx_cmd = fxadmin.fx_cmd; 369 if (copyout(&fxadmin32, uaddr, sizeof (fxadmin32_t))) 370 return (EFAULT); 371 } 372 #endif /* _SYSCALL32_IMPL */ 373 break; 374 375 case FX_SETDPTBL: 376 /* 377 * We require that the requesting process has sufficient 378 * privileges. We also require that the table supplied by 379 * the user exactly match the current fx_dptbl in size. 380 */ 381 if (secpolicy_dispadm(reqpcredp) != 0) { 382 return (EPERM); 383 } 384 if (fxadmin.fx_ndpents * sizeof (fxdpent_t) != fxdpsz) { 385 return (EINVAL); 386 } 387 388 /* 389 * We read the user supplied table into a temporary buffer 390 * where it is validated before being copied over the 391 * fx_dptbl. 392 */ 393 tmpdpp = kmem_alloc(fxdpsz, KM_SLEEP); 394 if (copyin(fxadmin.fx_dpents, tmpdpp, fxdpsz)) { 395 kmem_free(tmpdpp, fxdpsz); 396 return (EFAULT); 397 } 398 for (i = 0; i < fxadmin.fx_ndpents; i++) { 399 400 /* 401 * Validate the user supplied values. All we are doing 402 * here is verifying that the values are within their 403 * allowable ranges and will not panic the system. We 404 * make no attempt to ensure that the resulting 405 * configuration makes sense or results in reasonable 406 * performance. 407 */ 408 if (tmpdpp[i].fx_quantum <= 0 && 409 tmpdpp[i].fx_quantum != FX_TQINF) { 410 kmem_free(tmpdpp, fxdpsz); 411 return (EINVAL); 412 } 413 } 414 415 /* 416 * Copy the user supplied values over the current fx_dptbl 417 * values. The fx_globpri member is read-only so we don't 418 * overwrite it. 419 */ 420 mutex_enter(&fx_dptblock); 421 for (i = 0; i < fxadmin.fx_ndpents; i++) { 422 fx_dptbl[i].fx_quantum = tmpdpp[i].fx_quantum; 423 } 424 mutex_exit(&fx_dptblock); 425 kmem_free(tmpdpp, fxdpsz); 426 break; 427 428 default: 429 return (EINVAL); 430 } 431 return (0); 432 } 433 434 /* 435 * Allocate a fixed priority class specific thread structure and 436 * initialize it with the parameters supplied. Also move the thread 437 * to specified priority. 438 */ 439 static int 440 fx_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp, 441 void *bufp) 442 { 443 fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp; 444 fxproc_t *fxpp; 445 pri_t reqfxupri; 446 pri_t reqfxuprilim; 447 448 fxpp = (fxproc_t *)bufp; 449 ASSERT(fxpp != NULL); 450 451 /* 452 * Initialize the fxproc structure. 453 */ 454 fxpp->fx_flags = 0; 455 fxpp->fx_callback = NULL; 456 fxpp->fx_cookie = NULL; 457 458 if (fxkparmsp == NULL) { 459 /* 460 * Use default values. 461 */ 462 fxpp->fx_pri = fxpp->fx_uprilim = 0; 463 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 464 fxpp->fx_nice = NZERO; 465 } else { 466 /* 467 * Use supplied values. 468 */ 469 470 if ((fxkparmsp->fx_cflags & FX_DOUPRILIM) == 0) { 471 reqfxuprilim = 0; 472 } else { 473 if (fxkparmsp->fx_uprilim > FX_MAX_UNPRIV_PRI && 474 secpolicy_setpriority(reqpcredp) != 0) 475 return (EPERM); 476 reqfxuprilim = fxkparmsp->fx_uprilim; 477 FX_ADJUST_PRI(reqfxuprilim); 478 } 479 480 if ((fxkparmsp->fx_cflags & FX_DOUPRI) == 0) { 481 reqfxupri = reqfxuprilim; 482 } else { 483 if (fxkparmsp->fx_upri > FX_MAX_UNPRIV_PRI && 484 secpolicy_setpriority(reqpcredp) != 0) 485 return (EPERM); 486 /* 487 * Set the user priority to the requested value 488 * or the upri limit, whichever is lower. 489 */ 490 reqfxupri = fxkparmsp->fx_upri; 491 FX_ADJUST_PRI(reqfxupri); 492 493 if (reqfxupri > reqfxuprilim) 494 reqfxupri = reqfxuprilim; 495 } 496 497 498 fxpp->fx_uprilim = reqfxuprilim; 499 fxpp->fx_pri = reqfxupri; 500 501 fxpp->fx_nice = NZERO - (NZERO * reqfxupri) 502 / fx_maxupri; 503 504 if (((fxkparmsp->fx_cflags & FX_DOTQ) == 0) || 505 (fxkparmsp->fx_tqntm == FX_TQDEF)) { 506 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 507 } else { 508 if (secpolicy_setpriority(reqpcredp) != 0) 509 return (EPERM); 510 511 if (fxkparmsp->fx_tqntm == FX_TQINF) 512 fxpp->fx_pquantum = FX_TQINF; 513 else { 514 fxpp->fx_pquantum = fxkparmsp->fx_tqntm; 515 } 516 } 517 518 } 519 520 fxpp->fx_timeleft = fxpp->fx_pquantum; 521 cpucaps_sc_init(&fxpp->fx_caps); 522 fxpp->fx_tp = t; 523 524 thread_lock(t); /* get dispatcher lock on thread */ 525 t->t_clfuncs = &(sclass[cid].cl_funcs->thread); 526 t->t_cid = cid; 527 t->t_cldata = (void *)fxpp; 528 t->t_schedflag &= ~TS_RUNQMATCH; 529 fx_change_priority(t, fxpp); 530 thread_unlock(t); 531 532 return (0); 533 } 534 535 /* 536 * The thread is exiting. 537 */ 538 static void 539 fx_exit(kthread_t *t) 540 { 541 fxproc_t *fxpp; 542 543 thread_lock(t); 544 fxpp = (fxproc_t *)(t->t_cldata); 545 546 /* 547 * A thread could be exiting in between clock ticks, so we need to 548 * calculate how much CPU time it used since it was charged last time. 549 * 550 * CPU caps are not enforced on exiting processes - it is usually 551 * desirable to exit as soon as possible to free resources. 552 */ 553 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ONLY); 554 555 if (FX_HAS_CB(fxpp)) { 556 FX_CB_EXIT(FX_CALLB(fxpp), fxpp->fx_cookie); 557 fxpp->fx_callback = NULL; 558 fxpp->fx_cookie = NULL; 559 thread_unlock(t); 560 FX_CB_LIST_DELETE(fxpp); 561 return; 562 } 563 564 thread_unlock(t); 565 } 566 567 /* 568 * Exiting the class. Free fxproc structure of thread. 569 */ 570 static void 571 fx_exitclass(void *procp) 572 { 573 fxproc_t *fxpp = (fxproc_t *)procp; 574 575 thread_lock(fxpp->fx_tp); 576 if (FX_HAS_CB(fxpp)) { 577 578 FX_CB_EXIT(FX_CALLB(fxpp), fxpp->fx_cookie); 579 580 fxpp->fx_callback = NULL; 581 fxpp->fx_cookie = NULL; 582 thread_unlock(fxpp->fx_tp); 583 FX_CB_LIST_DELETE(fxpp); 584 } else 585 thread_unlock(fxpp->fx_tp); 586 587 kmem_free(fxpp, sizeof (fxproc_t)); 588 } 589 590 /* ARGSUSED */ 591 static int 592 fx_canexit(kthread_t *t, cred_t *cred) 593 { 594 /* 595 * A thread can always leave the FX class 596 */ 597 return (0); 598 } 599 600 /* 601 * Initialize fixed-priority class specific proc structure for a child. 602 * callbacks are not inherited upon fork. 603 */ 604 static int 605 fx_fork(kthread_t *t, kthread_t *ct, void *bufp) 606 { 607 fxproc_t *pfxpp; /* ptr to parent's fxproc structure */ 608 fxproc_t *cfxpp; /* ptr to child's fxproc structure */ 609 610 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 611 612 cfxpp = (fxproc_t *)bufp; 613 ASSERT(cfxpp != NULL); 614 thread_lock(t); 615 pfxpp = (fxproc_t *)t->t_cldata; 616 /* 617 * Initialize child's fxproc structure. 618 */ 619 cfxpp->fx_timeleft = cfxpp->fx_pquantum = pfxpp->fx_pquantum; 620 cfxpp->fx_pri = pfxpp->fx_pri; 621 cfxpp->fx_uprilim = pfxpp->fx_uprilim; 622 cfxpp->fx_nice = pfxpp->fx_nice; 623 cfxpp->fx_callback = NULL; 624 cfxpp->fx_cookie = NULL; 625 cfxpp->fx_flags = pfxpp->fx_flags & ~(FXBACKQ); 626 cpucaps_sc_init(&cfxpp->fx_caps); 627 628 cfxpp->fx_tp = ct; 629 ct->t_cldata = (void *)cfxpp; 630 thread_unlock(t); 631 632 /* 633 * Link new structure into fxproc list. 634 */ 635 return (0); 636 } 637 638 639 /* 640 * Child is placed at back of dispatcher queue and parent gives 641 * up processor so that the child runs first after the fork. 642 * This allows the child immediately execing to break the multiple 643 * use of copy on write pages with no disk home. The parent will 644 * get to steal them back rather than uselessly copying them. 645 */ 646 static void 647 fx_forkret(kthread_t *t, kthread_t *ct) 648 { 649 proc_t *pp = ttoproc(t); 650 proc_t *cp = ttoproc(ct); 651 fxproc_t *fxpp; 652 653 ASSERT(t == curthread); 654 ASSERT(MUTEX_HELD(&pidlock)); 655 656 /* 657 * Grab the child's p_lock before dropping pidlock to ensure 658 * the process does not disappear before we set it running. 659 */ 660 mutex_enter(&cp->p_lock); 661 mutex_exit(&pidlock); 662 continuelwps(cp); 663 mutex_exit(&cp->p_lock); 664 665 mutex_enter(&pp->p_lock); 666 continuelwps(pp); 667 mutex_exit(&pp->p_lock); 668 669 thread_lock(t); 670 fxpp = (fxproc_t *)(t->t_cldata); 671 t->t_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; 672 ASSERT(t->t_pri >= 0 && t->t_pri <= fx_maxglobpri); 673 THREAD_TRANSITION(t); 674 fx_setrun(t); 675 thread_unlock(t); 676 677 swtch(); 678 } 679 680 681 /* 682 * Get information about the fixed-priority class into the buffer 683 * pointed to by fxinfop. The maximum configured user priority 684 * is the only information we supply. 685 */ 686 static int 687 fx_getclinfo(void *infop) 688 { 689 fxinfo_t *fxinfop = (fxinfo_t *)infop; 690 fxinfop->fx_maxupri = fx_maxupri; 691 return (0); 692 } 693 694 695 696 /* 697 * Return the global scheduling priority ranges for the fixed-priority 698 * class in pcpri_t structure. 699 */ 700 static int 701 fx_getclpri(pcpri_t *pcprip) 702 { 703 pcprip->pc_clpmax = fx_dptbl[fx_maxumdpri].fx_globpri; 704 pcprip->pc_clpmin = fx_dptbl[0].fx_globpri; 705 return (0); 706 } 707 708 709 static void 710 fx_nullsys() 711 {} 712 713 714 /* 715 * Get the fixed-priority parameters of the thread pointed to by 716 * fxprocp into the buffer pointed to by fxparmsp. 717 */ 718 static void 719 fx_parmsget(kthread_t *t, void *parmsp) 720 { 721 fxproc_t *fxpp = (fxproc_t *)t->t_cldata; 722 fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp; 723 724 fxkparmsp->fx_upri = fxpp->fx_pri; 725 fxkparmsp->fx_uprilim = fxpp->fx_uprilim; 726 fxkparmsp->fx_tqntm = fxpp->fx_pquantum; 727 } 728 729 730 731 /* 732 * Check the validity of the fixed-priority parameters in the buffer 733 * pointed to by fxparmsp. 734 */ 735 static int 736 fx_parmsin(void *parmsp) 737 { 738 fxparms_t *fxparmsp = (fxparms_t *)parmsp; 739 uint_t cflags; 740 longlong_t ticks; 741 /* 742 * Check validity of parameters. 743 */ 744 745 if ((fxparmsp->fx_uprilim > fx_maxupri || 746 fxparmsp->fx_uprilim < 0) && 747 fxparmsp->fx_uprilim != FX_NOCHANGE) 748 return (EINVAL); 749 750 if ((fxparmsp->fx_upri > fx_maxupri || 751 fxparmsp->fx_upri < 0) && 752 fxparmsp->fx_upri != FX_NOCHANGE) 753 return (EINVAL); 754 755 if ((fxparmsp->fx_tqsecs == 0 && fxparmsp->fx_tqnsecs == 0) || 756 fxparmsp->fx_tqnsecs >= NANOSEC) 757 return (EINVAL); 758 759 cflags = (fxparmsp->fx_upri != FX_NOCHANGE ? FX_DOUPRI : 0); 760 761 if (fxparmsp->fx_uprilim != FX_NOCHANGE) { 762 cflags |= FX_DOUPRILIM; 763 } 764 765 if (fxparmsp->fx_tqnsecs != FX_NOCHANGE) 766 cflags |= FX_DOTQ; 767 768 /* 769 * convert the buffer to kernel format. 770 */ 771 772 if (fxparmsp->fx_tqnsecs >= 0) { 773 if ((ticks = SEC_TO_TICK((longlong_t)fxparmsp->fx_tqsecs) + 774 NSEC_TO_TICK_ROUNDUP(fxparmsp->fx_tqnsecs)) > INT_MAX) 775 return (ERANGE); 776 777 ((fxkparms_t *)fxparmsp)->fx_tqntm = (int)ticks; 778 } else { 779 if ((fxparmsp->fx_tqnsecs != FX_NOCHANGE) && 780 (fxparmsp->fx_tqnsecs != FX_TQINF) && 781 (fxparmsp->fx_tqnsecs != FX_TQDEF)) 782 return (EINVAL); 783 ((fxkparms_t *)fxparmsp)->fx_tqntm = fxparmsp->fx_tqnsecs; 784 } 785 786 ((fxkparms_t *)fxparmsp)->fx_cflags = cflags; 787 788 return (0); 789 } 790 791 792 /* 793 * Check the validity of the fixed-priority parameters in the pc_vaparms_t 794 * structure vaparmsp and put them in the buffer pointed to by fxprmsp. 795 * pc_vaparms_t contains (key, value) pairs of parameter. 796 */ 797 static int 798 fx_vaparmsin(void *prmsp, pc_vaparms_t *vaparmsp) 799 { 800 uint_t secs = 0; 801 uint_t cnt; 802 int nsecs = 0; 803 int priflag, secflag, nsecflag, limflag; 804 longlong_t ticks; 805 fxkparms_t *fxprmsp = (fxkparms_t *)prmsp; 806 pc_vaparm_t *vpp = &vaparmsp->pc_parms[0]; 807 808 809 /* 810 * First check the validity of parameters and convert them 811 * from the user supplied format to the internal format. 812 */ 813 priflag = secflag = nsecflag = limflag = 0; 814 815 fxprmsp->fx_cflags = 0; 816 817 if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT) 818 return (EINVAL); 819 820 for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) { 821 822 switch (vpp->pc_key) { 823 case FX_KY_UPRILIM: 824 if (limflag++) 825 return (EINVAL); 826 fxprmsp->fx_cflags |= FX_DOUPRILIM; 827 fxprmsp->fx_uprilim = (pri_t)vpp->pc_parm; 828 if (fxprmsp->fx_uprilim > fx_maxupri || 829 fxprmsp->fx_uprilim < 0) 830 return (EINVAL); 831 break; 832 833 case FX_KY_UPRI: 834 if (priflag++) 835 return (EINVAL); 836 fxprmsp->fx_cflags |= FX_DOUPRI; 837 fxprmsp->fx_upri = (pri_t)vpp->pc_parm; 838 if (fxprmsp->fx_upri > fx_maxupri || 839 fxprmsp->fx_upri < 0) 840 return (EINVAL); 841 break; 842 843 case FX_KY_TQSECS: 844 if (secflag++) 845 return (EINVAL); 846 fxprmsp->fx_cflags |= FX_DOTQ; 847 secs = (uint_t)vpp->pc_parm; 848 break; 849 850 case FX_KY_TQNSECS: 851 if (nsecflag++) 852 return (EINVAL); 853 fxprmsp->fx_cflags |= FX_DOTQ; 854 nsecs = (int)vpp->pc_parm; 855 break; 856 857 default: 858 return (EINVAL); 859 } 860 } 861 862 if (vaparmsp->pc_vaparmscnt == 0) { 863 /* 864 * Use default parameters. 865 */ 866 fxprmsp->fx_upri = 0; 867 fxprmsp->fx_uprilim = 0; 868 fxprmsp->fx_tqntm = FX_TQDEF; 869 fxprmsp->fx_cflags = FX_DOUPRI | FX_DOUPRILIM | FX_DOTQ; 870 } else if ((fxprmsp->fx_cflags & FX_DOTQ) != 0) { 871 if ((secs == 0 && nsecs == 0) || nsecs >= NANOSEC) 872 return (EINVAL); 873 874 if (nsecs >= 0) { 875 if ((ticks = SEC_TO_TICK((longlong_t)secs) + 876 NSEC_TO_TICK_ROUNDUP(nsecs)) > INT_MAX) 877 return (ERANGE); 878 879 fxprmsp->fx_tqntm = (int)ticks; 880 } else { 881 if (nsecs != FX_TQINF && nsecs != FX_TQDEF) 882 return (EINVAL); 883 fxprmsp->fx_tqntm = nsecs; 884 } 885 } 886 887 return (0); 888 } 889 890 891 /* 892 * Nothing to do here but return success. 893 */ 894 /* ARGSUSED */ 895 static int 896 fx_parmsout(void *parmsp, pc_vaparms_t *vaparmsp) 897 { 898 register fxkparms_t *fxkprmsp = (fxkparms_t *)parmsp; 899 900 if (vaparmsp != NULL) 901 return (0); 902 903 if (fxkprmsp->fx_tqntm < 0) { 904 /* 905 * Quantum field set to special value (e.g. FX_TQINF) 906 */ 907 ((fxparms_t *)fxkprmsp)->fx_tqnsecs = fxkprmsp->fx_tqntm; 908 ((fxparms_t *)fxkprmsp)->fx_tqsecs = 0; 909 910 } else { 911 /* Convert quantum from ticks to seconds-nanoseconds */ 912 913 timestruc_t ts; 914 TICK_TO_TIMESTRUC(fxkprmsp->fx_tqntm, &ts); 915 ((fxparms_t *)fxkprmsp)->fx_tqsecs = ts.tv_sec; 916 ((fxparms_t *)fxkprmsp)->fx_tqnsecs = ts.tv_nsec; 917 } 918 919 return (0); 920 } 921 922 923 /* 924 * Copy all selected fixed-priority class parameters to the user. 925 * The parameters are specified by a key. 926 */ 927 static int 928 fx_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp) 929 { 930 fxkparms_t *fxkprmsp = (fxkparms_t *)prmsp; 931 timestruc_t ts; 932 uint_t cnt; 933 uint_t secs; 934 int nsecs; 935 int priflag, secflag, nsecflag, limflag; 936 pc_vaparm_t *vpp = &vaparmsp->pc_parms[0]; 937 938 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 939 940 priflag = secflag = nsecflag = limflag = 0; 941 942 if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT) 943 return (EINVAL); 944 945 if (fxkprmsp->fx_tqntm < 0) { 946 /* 947 * Quantum field set to special value (e.g. FX_TQINF). 948 */ 949 secs = 0; 950 nsecs = fxkprmsp->fx_tqntm; 951 } else { 952 /* 953 * Convert quantum from ticks to seconds-nanoseconds. 954 */ 955 TICK_TO_TIMESTRUC(fxkprmsp->fx_tqntm, &ts); 956 secs = ts.tv_sec; 957 nsecs = ts.tv_nsec; 958 } 959 960 961 for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) { 962 963 switch (vpp->pc_key) { 964 case FX_KY_UPRILIM: 965 if (limflag++) 966 return (EINVAL); 967 if (copyout(&fxkprmsp->fx_uprilim, 968 (void *)(uintptr_t)vpp->pc_parm, sizeof (pri_t))) 969 return (EFAULT); 970 break; 971 972 case FX_KY_UPRI: 973 if (priflag++) 974 return (EINVAL); 975 if (copyout(&fxkprmsp->fx_upri, 976 (void *)(uintptr_t)vpp->pc_parm, sizeof (pri_t))) 977 return (EFAULT); 978 break; 979 980 case FX_KY_TQSECS: 981 if (secflag++) 982 return (EINVAL); 983 if (copyout(&secs, 984 (void *)(uintptr_t)vpp->pc_parm, sizeof (uint_t))) 985 return (EFAULT); 986 break; 987 988 case FX_KY_TQNSECS: 989 if (nsecflag++) 990 return (EINVAL); 991 if (copyout(&nsecs, 992 (void *)(uintptr_t)vpp->pc_parm, sizeof (int))) 993 return (EFAULT); 994 break; 995 996 default: 997 return (EINVAL); 998 } 999 } 1000 1001 return (0); 1002 } 1003 1004 /* 1005 * Set the scheduling parameters of the thread pointed to by fxprocp 1006 * to those specified in the buffer pointed to by fxparmsp. 1007 */ 1008 /* ARGSUSED */ 1009 static int 1010 fx_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp) 1011 { 1012 char nice; 1013 pri_t reqfxuprilim; 1014 pri_t reqfxupri; 1015 fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp; 1016 fxproc_t *fxpp; 1017 1018 1019 ASSERT(MUTEX_HELD(&(ttoproc(tx))->p_lock)); 1020 1021 thread_lock(tx); 1022 fxpp = (fxproc_t *)tx->t_cldata; 1023 1024 if ((fxkparmsp->fx_cflags & FX_DOUPRILIM) == 0) 1025 reqfxuprilim = fxpp->fx_uprilim; 1026 else 1027 reqfxuprilim = fxkparmsp->fx_uprilim; 1028 1029 /* 1030 * Basic permissions enforced by generic kernel code 1031 * for all classes require that a thread attempting 1032 * to change the scheduling parameters of a target 1033 * thread be privileged or have a real or effective 1034 * UID matching that of the target thread. We are not 1035 * called unless these basic permission checks have 1036 * already passed. The fixed priority class requires in 1037 * addition that the calling thread be privileged if it 1038 * is attempting to raise the pri above its current 1039 * value This may have been checked previously but if our 1040 * caller passed us a non-NULL credential pointer we assume 1041 * it hasn't and we check it here. 1042 */ 1043 1044 if ((reqpcredp != NULL) && 1045 (reqfxuprilim > fxpp->fx_uprilim || 1046 ((fxkparmsp->fx_cflags & FX_DOTQ) != 0)) && 1047 secpolicy_setpriority(reqpcredp) != 0) { 1048 thread_unlock(tx); 1049 return (EPERM); 1050 } 1051 1052 FX_ADJUST_PRI(reqfxuprilim); 1053 1054 if ((fxkparmsp->fx_cflags & FX_DOUPRI) == 0) 1055 reqfxupri = fxpp->fx_pri; 1056 else 1057 reqfxupri = fxkparmsp->fx_upri; 1058 1059 1060 /* 1061 * Make sure the user priority doesn't exceed the upri limit. 1062 */ 1063 if (reqfxupri > reqfxuprilim) 1064 reqfxupri = reqfxuprilim; 1065 1066 /* 1067 * Set fx_nice to the nice value corresponding to the user 1068 * priority we are setting. Note that setting the nice field 1069 * of the parameter struct won't affect upri or nice. 1070 */ 1071 1072 nice = NZERO - (reqfxupri * NZERO) / fx_maxupri; 1073 1074 if (nice > NZERO) 1075 nice = NZERO; 1076 1077 fxpp->fx_uprilim = reqfxuprilim; 1078 fxpp->fx_pri = reqfxupri; 1079 1080 if (fxkparmsp->fx_tqntm == FX_TQINF) 1081 fxpp->fx_pquantum = FX_TQINF; 1082 else if (fxkparmsp->fx_tqntm == FX_TQDEF) 1083 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 1084 else if ((fxkparmsp->fx_cflags & FX_DOTQ) != 0) 1085 fxpp->fx_pquantum = fxkparmsp->fx_tqntm; 1086 1087 fxpp->fx_nice = nice; 1088 1089 fx_change_priority(tx, fxpp); 1090 thread_unlock(tx); 1091 return (0); 1092 } 1093 1094 1095 /* 1096 * Return the global scheduling priority that would be assigned 1097 * to a thread entering the fixed-priority class with the fx_upri. 1098 */ 1099 static pri_t 1100 fx_globpri(kthread_t *t) 1101 { 1102 fxproc_t *fxpp; 1103 1104 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 1105 1106 fxpp = (fxproc_t *)t->t_cldata; 1107 return (fx_dptbl[fxpp->fx_pri].fx_globpri); 1108 1109 } 1110 1111 /* 1112 * Arrange for thread to be placed in appropriate location 1113 * on dispatcher queue. 1114 * 1115 * This is called with the current thread in TS_ONPROC and locked. 1116 */ 1117 static void 1118 fx_preempt(kthread_t *t) 1119 { 1120 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1121 1122 ASSERT(t == curthread); 1123 ASSERT(THREAD_LOCK_HELD(curthread)); 1124 1125 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ENFORCE); 1126 1127 /* 1128 * Check to see if we're doing "preemption control" here. If 1129 * we are, and if the user has requested that this thread not 1130 * be preempted, and if preemptions haven't been put off for 1131 * too long, let the preemption happen here but try to make 1132 * sure the thread is rescheduled as soon as possible. We do 1133 * this by putting it on the front of the highest priority run 1134 * queue in the FX class. If the preemption has been put off 1135 * for too long, clear the "nopreempt" bit and let the thread 1136 * be preempted. 1137 */ 1138 if (t->t_schedctl && schedctl_get_nopreempt(t)) { 1139 if (fxpp->fx_pquantum == FX_TQINF || 1140 fxpp->fx_timeleft > -SC_MAX_TICKS) { 1141 DTRACE_SCHED1(schedctl__nopreempt, kthread_t *, t); 1142 schedctl_set_yield(t, 1); 1143 setfrontdq(t); 1144 return; 1145 } else { 1146 schedctl_set_nopreempt(t, 0); 1147 DTRACE_SCHED1(schedctl__preempt, kthread_t *, t); 1148 TNF_PROBE_2(schedctl_preempt, "schedctl FX fx_preempt", 1149 /* CSTYLED */, tnf_pid, pid, ttoproc(t)->p_pid, 1150 tnf_lwpid, lwpid, t->t_tid); 1151 /* 1152 * Fall through and be preempted below. 1153 */ 1154 } 1155 } 1156 1157 if (FX_HAS_CB(fxpp)) { 1158 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1159 pri_t newpri = fxpp->fx_pri; 1160 FX_CB_PREEMPT(FX_CALLB(fxpp), fxpp->fx_cookie, 1161 &new_quantum, &newpri); 1162 FX_ADJUST_QUANTUM(new_quantum); 1163 if ((int)new_quantum != fxpp->fx_pquantum) { 1164 fxpp->fx_pquantum = (int)new_quantum; 1165 fxpp->fx_timeleft = fxpp->fx_pquantum; 1166 } 1167 FX_ADJUST_PRI(newpri); 1168 fxpp->fx_pri = newpri; 1169 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1170 } 1171 1172 /* 1173 * This thread may be placed on wait queue by CPU Caps. In this case we 1174 * do not need to do anything until it is removed from the wait queue. 1175 */ 1176 if (CPUCAPS_ENFORCE(t)) { 1177 return; 1178 } 1179 1180 if ((fxpp->fx_flags & (FXBACKQ)) == FXBACKQ) { 1181 fxpp->fx_timeleft = fxpp->fx_pquantum; 1182 fxpp->fx_flags &= ~FXBACKQ; 1183 setbackdq(t); 1184 } else { 1185 setfrontdq(t); 1186 } 1187 } 1188 1189 static void 1190 fx_setrun(kthread_t *t) 1191 { 1192 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1193 1194 ASSERT(THREAD_LOCK_HELD(t)); /* t should be in transition */ 1195 fxpp->fx_flags &= ~FXBACKQ; 1196 1197 if (t->t_disp_time != lbolt) 1198 setbackdq(t); 1199 else 1200 setfrontdq(t); 1201 } 1202 1203 1204 /* 1205 * Prepare thread for sleep. We reset the thread priority so it will 1206 * run at the kernel priority level when it wakes up. 1207 */ 1208 static void 1209 fx_sleep(kthread_t *t) 1210 { 1211 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1212 1213 ASSERT(t == curthread); 1214 ASSERT(THREAD_LOCK_HELD(t)); 1215 1216 /* 1217 * Account for time spent on CPU before going to sleep. 1218 */ 1219 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ENFORCE); 1220 1221 if (FX_HAS_CB(fxpp)) { 1222 FX_CB_SLEEP(FX_CALLB(fxpp), fxpp->fx_cookie); 1223 } 1224 t->t_stime = lbolt; /* time stamp for the swapper */ 1225 } 1226 1227 1228 /* 1229 * Return Values: 1230 * 1231 * -1 if the thread is loaded or is not eligible to be swapped in. 1232 * 1233 * FX and RT threads are designed so that they don't swapout; however, 1234 * it is possible that while the thread is swapped out and in another class, it 1235 * can be changed to FX or RT. Since these threads should be swapped in 1236 * as soon as they're runnable, rt_swapin returns SHRT_MAX, and fx_swapin 1237 * returns SHRT_MAX - 1, so that it gives deference to any swapped out 1238 * RT threads. 1239 */ 1240 /* ARGSUSED */ 1241 static pri_t 1242 fx_swapin(kthread_t *t, int flags) 1243 { 1244 pri_t tpri = -1; 1245 1246 ASSERT(THREAD_LOCK_HELD(t)); 1247 1248 if (t->t_state == TS_RUN && (t->t_schedflag & TS_LOAD) == 0) { 1249 tpri = (pri_t)SHRT_MAX - 1; 1250 } 1251 1252 return (tpri); 1253 } 1254 1255 /* 1256 * Return Values 1257 * -1 if the thread isn't loaded or is not eligible to be swapped out. 1258 */ 1259 /* ARGSUSED */ 1260 static pri_t 1261 fx_swapout(kthread_t *t, int flags) 1262 { 1263 ASSERT(THREAD_LOCK_HELD(t)); 1264 1265 return (-1); 1266 1267 } 1268 1269 /* ARGSUSED */ 1270 static void 1271 fx_stop(kthread_t *t, int why, int what) 1272 { 1273 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1274 1275 ASSERT(THREAD_LOCK_HELD(t)); 1276 1277 if (FX_HAS_CB(fxpp)) { 1278 FX_CB_STOP(FX_CALLB(fxpp), fxpp->fx_cookie); 1279 } 1280 } 1281 1282 /* 1283 * Check for time slice expiration. If time slice has expired 1284 * set runrun to cause preemption. 1285 */ 1286 static void 1287 fx_tick(kthread_t *t) 1288 { 1289 boolean_t call_cpu_surrender = B_FALSE; 1290 fxproc_t *fxpp; 1291 1292 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock)); 1293 1294 thread_lock(t); 1295 1296 fxpp = (fxproc_t *)(t->t_cldata); 1297 1298 if (FX_HAS_CB(fxpp)) { 1299 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1300 pri_t newpri = fxpp->fx_pri; 1301 FX_CB_TICK(FX_CALLB(fxpp), fxpp->fx_cookie, 1302 &new_quantum, &newpri); 1303 FX_ADJUST_QUANTUM(new_quantum); 1304 if ((int)new_quantum != fxpp->fx_pquantum) { 1305 fxpp->fx_pquantum = (int)new_quantum; 1306 fxpp->fx_timeleft = fxpp->fx_pquantum; 1307 } 1308 FX_ADJUST_PRI(newpri); 1309 if (newpri != fxpp->fx_pri) { 1310 fxpp->fx_pri = newpri; 1311 fx_change_priority(t, fxpp); 1312 } 1313 } 1314 1315 /* 1316 * Keep track of thread's project CPU usage. Note that projects 1317 * get charged even when threads are running in the kernel. 1318 */ 1319 call_cpu_surrender = CPUCAPS_CHARGE(t, &fxpp->fx_caps, 1320 CPUCAPS_CHARGE_ENFORCE); 1321 1322 if ((fxpp->fx_pquantum != FX_TQINF) && 1323 (--fxpp->fx_timeleft <= 0)) { 1324 pri_t new_pri; 1325 1326 /* 1327 * If we're doing preemption control and trying to 1328 * avoid preempting this thread, just note that 1329 * the thread should yield soon and let it keep 1330 * running (unless it's been a while). 1331 */ 1332 if (t->t_schedctl && schedctl_get_nopreempt(t)) { 1333 if (fxpp->fx_timeleft > -SC_MAX_TICKS) { 1334 DTRACE_SCHED1(schedctl__nopreempt, 1335 kthread_t *, t); 1336 schedctl_set_yield(t, 1); 1337 thread_unlock_nopreempt(t); 1338 return; 1339 } 1340 TNF_PROBE_2(schedctl_failsafe, 1341 "schedctl FX fx_tick", /* CSTYLED */, 1342 tnf_pid, pid, ttoproc(t)->p_pid, 1343 tnf_lwpid, lwpid, t->t_tid); 1344 } 1345 new_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; 1346 ASSERT(new_pri >= 0 && new_pri <= fx_maxglobpri); 1347 /* 1348 * When the priority of a thread is changed, 1349 * it may be necessary to adjust its position 1350 * on a sleep queue or dispatch queue. Even 1351 * when the priority is not changed, we need 1352 * to preserve round robin on dispatch queue. 1353 * The function thread_change_pri accomplishes 1354 * this. 1355 */ 1356 if (thread_change_pri(t, new_pri, 0)) { 1357 fxpp->fx_timeleft = fxpp->fx_pquantum; 1358 } else { 1359 call_cpu_surrender = B_TRUE; 1360 } 1361 } else if (t->t_state == TS_ONPROC && 1362 t->t_pri < t->t_disp_queue->disp_maxrunpri) { 1363 call_cpu_surrender = B_TRUE; 1364 } 1365 1366 if (call_cpu_surrender) { 1367 fxpp->fx_flags |= FXBACKQ; 1368 cpu_surrender(t); 1369 } 1370 thread_unlock_nopreempt(t); /* clock thread can't be preempted */ 1371 } 1372 1373 1374 static void 1375 fx_trapret(kthread_t *t) 1376 { 1377 cpu_t *cp = CPU; 1378 1379 ASSERT(THREAD_LOCK_HELD(t)); 1380 ASSERT(t == curthread); 1381 ASSERT(cp->cpu_dispthread == t); 1382 ASSERT(t->t_state == TS_ONPROC); 1383 } 1384 1385 1386 /* 1387 * Processes waking up go to the back of their queue. 1388 */ 1389 static void 1390 fx_wakeup(kthread_t *t) 1391 { 1392 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1393 1394 ASSERT(THREAD_LOCK_HELD(t)); 1395 1396 t->t_stime = lbolt; /* time stamp for the swapper */ 1397 if (FX_HAS_CB(fxpp)) { 1398 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1399 pri_t newpri = fxpp->fx_pri; 1400 FX_CB_WAKEUP(FX_CALLB(fxpp), fxpp->fx_cookie, 1401 &new_quantum, &newpri); 1402 FX_ADJUST_QUANTUM(new_quantum); 1403 if ((int)new_quantum != fxpp->fx_pquantum) { 1404 fxpp->fx_pquantum = (int)new_quantum; 1405 fxpp->fx_timeleft = fxpp->fx_pquantum; 1406 } 1407 1408 FX_ADJUST_PRI(newpri); 1409 if (newpri != fxpp->fx_pri) { 1410 fxpp->fx_pri = newpri; 1411 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1412 } 1413 } 1414 1415 fxpp->fx_flags &= ~FXBACKQ; 1416 1417 if (t->t_disp_time != lbolt) 1418 setbackdq(t); 1419 else 1420 setfrontdq(t); 1421 } 1422 1423 1424 /* 1425 * When a thread yields, put it on the back of the run queue. 1426 */ 1427 static void 1428 fx_yield(kthread_t *t) 1429 { 1430 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1431 1432 ASSERT(t == curthread); 1433 ASSERT(THREAD_LOCK_HELD(t)); 1434 1435 /* 1436 * Collect CPU usage spent before yielding CPU. 1437 */ 1438 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ENFORCE); 1439 1440 if (FX_HAS_CB(fxpp)) { 1441 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1442 pri_t newpri = fxpp->fx_pri; 1443 FX_CB_PREEMPT(FX_CALLB(fxpp), fxpp->fx_cookie, 1444 &new_quantum, &newpri); 1445 FX_ADJUST_QUANTUM(new_quantum); 1446 if ((int)new_quantum != fxpp->fx_pquantum) { 1447 fxpp->fx_pquantum = (int)new_quantum; 1448 fxpp->fx_timeleft = fxpp->fx_pquantum; 1449 } 1450 FX_ADJUST_PRI(newpri); 1451 fxpp->fx_pri = newpri; 1452 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1453 } 1454 1455 /* 1456 * Clear the preemption control "yield" bit since the user is 1457 * doing a yield. 1458 */ 1459 if (t->t_schedctl) 1460 schedctl_set_yield(t, 0); 1461 1462 if (fxpp->fx_timeleft <= 0) { 1463 /* 1464 * Time slice was artificially extended to avoid 1465 * preemption, so pretend we're preempting it now. 1466 */ 1467 DTRACE_SCHED1(schedctl__yield, int, -fxpp->fx_timeleft); 1468 fxpp->fx_timeleft = fxpp->fx_pquantum; 1469 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1470 ASSERT(t->t_pri >= 0 && t->t_pri <= fx_maxglobpri); 1471 } 1472 1473 fxpp->fx_flags &= ~FXBACKQ; 1474 setbackdq(t); 1475 } 1476 1477 1478 /* 1479 * Increment the nice value of the specified thread by incr and 1480 * return the new value in *retvalp. 1481 */ 1482 static int 1483 fx_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp) 1484 { 1485 int newnice; 1486 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1487 fxkparms_t fxkparms; 1488 1489 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock)); 1490 1491 /* If there's no change to priority, just return current setting */ 1492 if (incr == 0) { 1493 if (retvalp) { 1494 *retvalp = fxpp->fx_nice - NZERO; 1495 } 1496 return (0); 1497 } 1498 1499 if ((incr < 0 || incr > 2 * NZERO) && 1500 secpolicy_setpriority(cr) != 0) 1501 return (EPERM); 1502 1503 /* 1504 * Specifying a nice increment greater than the upper limit of 1505 * 2 * NZERO - 1 will result in the thread's nice value being 1506 * set to the upper limit. We check for this before computing 1507 * the new value because otherwise we could get overflow 1508 * if a privileged user specified some ridiculous increment. 1509 */ 1510 if (incr > 2 * NZERO - 1) 1511 incr = 2 * NZERO - 1; 1512 1513 newnice = fxpp->fx_nice + incr; 1514 if (newnice > NZERO) 1515 newnice = NZERO; 1516 else if (newnice < 0) 1517 newnice = 0; 1518 1519 fxkparms.fx_uprilim = fxkparms.fx_upri = 1520 -((newnice - NZERO) * fx_maxupri) / NZERO; 1521 1522 fxkparms.fx_cflags = FX_DOUPRILIM | FX_DOUPRI; 1523 1524 fxkparms.fx_tqntm = FX_TQDEF; 1525 1526 /* 1527 * Reset the uprilim and upri values of the thread. Adjust 1528 * time quantum accordingly. 1529 */ 1530 1531 (void) fx_parmsset(t, (void *)&fxkparms, (id_t)0, (cred_t *)NULL); 1532 1533 /* 1534 * Although fx_parmsset already reset fx_nice it may 1535 * not have been set to precisely the value calculated above 1536 * because fx_parmsset determines the nice value from the 1537 * user priority and we may have truncated during the integer 1538 * conversion from nice value to user priority and back. 1539 * We reset fx_nice to the value we calculated above. 1540 */ 1541 fxpp->fx_nice = (char)newnice; 1542 1543 if (retvalp) 1544 *retvalp = newnice - NZERO; 1545 1546 return (0); 1547 } 1548 1549 static void 1550 fx_change_priority(kthread_t *t, fxproc_t *fxpp) 1551 { 1552 pri_t new_pri; 1553 1554 ASSERT(THREAD_LOCK_HELD(t)); 1555 new_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; 1556 ASSERT(new_pri >= 0 && new_pri <= fx_maxglobpri); 1557 if (t == curthread || t->t_state == TS_ONPROC) { 1558 /* curthread is always onproc */ 1559 cpu_t *cp = t->t_disp_queue->disp_cpu; 1560 THREAD_CHANGE_PRI(t, new_pri); 1561 if (t == cp->cpu_dispthread) 1562 cp->cpu_dispatch_pri = DISP_PRIO(t); 1563 if (DISP_MUST_SURRENDER(t)) { 1564 fxpp->fx_flags |= FXBACKQ; 1565 cpu_surrender(t); 1566 } else { 1567 fxpp->fx_timeleft = fxpp->fx_pquantum; 1568 } 1569 } else { 1570 /* 1571 * When the priority of a thread is changed, 1572 * it may be necessary to adjust its position 1573 * on a sleep queue or dispatch queue. 1574 * The function thread_change_pri accomplishes 1575 * this. 1576 */ 1577 if (thread_change_pri(t, new_pri, 0)) { 1578 /* 1579 * The thread was on a run queue. Reset 1580 * its CPU timeleft from the quantum 1581 * associated with the new priority. 1582 */ 1583 fxpp->fx_timeleft = fxpp->fx_pquantum; 1584 } else { 1585 fxpp->fx_flags |= FXBACKQ; 1586 } 1587 } 1588 } 1589 1590 static int 1591 fx_alloc(void **p, int flag) 1592 { 1593 void *bufp; 1594 1595 bufp = kmem_alloc(sizeof (fxproc_t), flag); 1596 if (bufp == NULL) { 1597 return (ENOMEM); 1598 } else { 1599 *p = bufp; 1600 return (0); 1601 } 1602 } 1603 1604 static void 1605 fx_free(void *bufp) 1606 { 1607 if (bufp) 1608 kmem_free(bufp, sizeof (fxproc_t)); 1609 } 1610 1611 /* 1612 * Release the callback list mutex after successful lookup 1613 */ 1614 void 1615 fx_list_release(fxproc_t *fxpp) 1616 { 1617 int index = FX_CB_LIST_HASH(fxpp->fx_ktid); 1618 kmutex_t *lockp = &fx_cb_list_lock[index]; 1619 mutex_exit(lockp); 1620 } 1621 1622 fxproc_t * 1623 fx_list_lookup(kt_did_t ktid) 1624 { 1625 int index = FX_CB_LIST_HASH(ktid); 1626 kmutex_t *lockp = &fx_cb_list_lock[index]; 1627 fxproc_t *fxpp; 1628 1629 mutex_enter(lockp); 1630 1631 for (fxpp = fx_cb_plisthead[index].fx_cb_next; 1632 fxpp != &fx_cb_plisthead[index]; fxpp = fxpp->fx_cb_next) { 1633 if (fxpp->fx_tp->t_cid == fx_cid && fxpp->fx_ktid == ktid && 1634 fxpp->fx_callback != NULL) { 1635 /* 1636 * The caller is responsible for calling 1637 * fx_list_release to drop the lock upon 1638 * successful lookup 1639 */ 1640 return (fxpp); 1641 } 1642 } 1643 mutex_exit(lockp); 1644 return ((fxproc_t *)NULL); 1645 } 1646 1647 1648 /* 1649 * register a callback set of routines for current thread 1650 * thread should already be in FX class 1651 */ 1652 int 1653 fx_register_callbacks(fx_callbacks_t *fx_callback, fx_cookie_t cookie, 1654 pri_t pri, clock_t quantum) 1655 { 1656 1657 fxproc_t *fxpp; 1658 1659 if (fx_callback == NULL) 1660 return (EINVAL); 1661 1662 if (secpolicy_dispadm(CRED()) != 0) 1663 return (EPERM); 1664 1665 if (FX_CB_VERSION(fx_callback) != FX_CALLB_REV) 1666 return (EINVAL); 1667 1668 if (!FX_ISVALID(pri, quantum)) 1669 return (EINVAL); 1670 1671 thread_lock(curthread); /* get dispatcher lock on thread */ 1672 1673 if (curthread->t_cid != fx_cid) { 1674 thread_unlock(curthread); 1675 return (EINVAL); 1676 } 1677 1678 fxpp = (fxproc_t *)(curthread->t_cldata); 1679 ASSERT(fxpp != NULL); 1680 if (FX_HAS_CB(fxpp)) { 1681 thread_unlock(curthread); 1682 return (EINVAL); 1683 } 1684 1685 fxpp->fx_callback = fx_callback; 1686 fxpp->fx_cookie = cookie; 1687 1688 if (pri != FX_CB_NOCHANGE) { 1689 fxpp->fx_pri = pri; 1690 FX_ADJUST_PRI(fxpp->fx_pri); 1691 if (quantum == FX_TQDEF) { 1692 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 1693 } else if (quantum == FX_TQINF) { 1694 fxpp->fx_pquantum = FX_TQINF; 1695 } else if (quantum != FX_NOCHANGE) { 1696 FX_ADJUST_QUANTUM(quantum); 1697 fxpp->fx_pquantum = quantum; 1698 } 1699 } else if (quantum != FX_NOCHANGE && quantum != FX_TQDEF) { 1700 if (quantum == FX_TQINF) 1701 fxpp->fx_pquantum = FX_TQINF; 1702 else { 1703 FX_ADJUST_QUANTUM(quantum); 1704 fxpp->fx_pquantum = quantum; 1705 } 1706 } 1707 1708 fxpp->fx_ktid = ddi_get_kt_did(); 1709 1710 fx_change_priority(curthread, fxpp); 1711 1712 thread_unlock(curthread); 1713 1714 /* 1715 * Link new structure into fxproc list. 1716 */ 1717 FX_CB_LIST_INSERT(fxpp); 1718 return (0); 1719 } 1720 1721 /* unregister a callback set of routines for current thread */ 1722 int 1723 fx_unregister_callbacks() 1724 { 1725 fxproc_t *fxpp; 1726 1727 if ((fxpp = fx_list_lookup(ddi_get_kt_did())) == NULL) { 1728 /* 1729 * did not have a registered callback; 1730 */ 1731 return (EINVAL); 1732 } 1733 1734 thread_lock(fxpp->fx_tp); 1735 fxpp->fx_callback = NULL; 1736 fxpp->fx_cookie = NULL; 1737 thread_unlock(fxpp->fx_tp); 1738 fx_list_release(fxpp); 1739 1740 FX_CB_LIST_DELETE(fxpp); 1741 return (0); 1742 } 1743 1744 /* 1745 * modify priority and/or quantum value of a thread with callback 1746 */ 1747 int 1748 fx_modify_priority(kt_did_t ktid, clock_t quantum, pri_t pri) 1749 { 1750 fxproc_t *fxpp; 1751 1752 if (!FX_ISVALID(pri, quantum)) 1753 return (EINVAL); 1754 1755 if ((fxpp = fx_list_lookup(ktid)) == NULL) { 1756 /* 1757 * either thread had exited or did not have a registered 1758 * callback; 1759 */ 1760 return (ESRCH); 1761 } 1762 1763 thread_lock(fxpp->fx_tp); 1764 1765 if (pri != FX_CB_NOCHANGE) { 1766 fxpp->fx_pri = pri; 1767 FX_ADJUST_PRI(fxpp->fx_pri); 1768 if (quantum == FX_TQDEF) { 1769 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 1770 } else if (quantum == FX_TQINF) { 1771 fxpp->fx_pquantum = FX_TQINF; 1772 } else if (quantum != FX_NOCHANGE) { 1773 FX_ADJUST_QUANTUM(quantum); 1774 fxpp->fx_pquantum = quantum; 1775 } 1776 } else if (quantum != FX_NOCHANGE && quantum != FX_TQDEF) { 1777 if (quantum == FX_TQINF) { 1778 fxpp->fx_pquantum = FX_TQINF; 1779 } else { 1780 FX_ADJUST_QUANTUM(quantum); 1781 fxpp->fx_pquantum = quantum; 1782 } 1783 } 1784 1785 fx_change_priority(fxpp->fx_tp, fxpp); 1786 1787 thread_unlock(fxpp->fx_tp); 1788 fx_list_release(fxpp); 1789 return (0); 1790 } 1791 1792 1793 /* 1794 * return an iblock cookie for mutex initialization to be used in callbacks 1795 */ 1796 void * 1797 fx_get_mutex_cookie() 1798 { 1799 return ((void *)(uintptr_t)__ipltospl(DISP_LEVEL)); 1800 } 1801 1802 /* 1803 * return maximum relative priority 1804 */ 1805 pri_t 1806 fx_get_maxpri() 1807 { 1808 return (fx_maxumdpri); 1809 } 1810