1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/param.h> 28 #include <sys/sysmacros.h> 29 #include <sys/cred.h> 30 #include <sys/proc.h> 31 #include <sys/session.h> 32 #include <sys/strsubr.h> 33 #include <sys/user.h> 34 #include <sys/priocntl.h> 35 #include <sys/class.h> 36 #include <sys/disp.h> 37 #include <sys/procset.h> 38 #include <sys/debug.h> 39 #include <sys/kmem.h> 40 #include <sys/errno.h> 41 #include <sys/fx.h> 42 #include <sys/fxpriocntl.h> 43 #include <sys/cpuvar.h> 44 #include <sys/systm.h> 45 #include <sys/vtrace.h> 46 #include <sys/schedctl.h> 47 #include <sys/tnf_probe.h> 48 #include <sys/sunddi.h> 49 #include <sys/spl.h> 50 #include <sys/modctl.h> 51 #include <sys/policy.h> 52 #include <sys/sdt.h> 53 #include <sys/cpupart.h> 54 #include <sys/cpucaps.h> 55 56 static pri_t fx_init(id_t, int, classfuncs_t **); 57 58 static struct sclass csw = { 59 "FX", 60 fx_init, 61 0 62 }; 63 64 static struct modlsched modlsched = { 65 &mod_schedops, "Fixed priority sched class", &csw 66 }; 67 68 static struct modlinkage modlinkage = { 69 MODREV_1, (void *)&modlsched, NULL 70 }; 71 72 73 /* 74 * control flags (kparms->fx_cflags). 75 */ 76 #define FX_DOUPRILIM 0x01 /* change user priority limit */ 77 #define FX_DOUPRI 0x02 /* change user priority */ 78 #define FX_DOTQ 0x04 /* change FX time quantum */ 79 80 81 #define FXMAXUPRI 60 /* maximum user priority setting */ 82 83 #define FX_MAX_UNPRIV_PRI 0 /* maximum unpriviledge priority */ 84 85 /* 86 * The fxproc_t structures that have a registered callback vector, 87 * are also kept in an array of circular doubly linked lists. A hash on 88 * the thread id (from ddi_get_kt_did()) is used to determine which list 89 * each of such fxproc structures should be placed. Each list has a dummy 90 * "head" which is never removed, so the list is never empty. 91 */ 92 93 #define FX_CB_LISTS 16 /* number of lists, must be power of 2 */ 94 #define FX_CB_LIST_HASH(ktid) ((uint_t)ktid & (FX_CB_LISTS - 1)) 95 96 /* Insert fxproc into callback list */ 97 #define FX_CB_LIST_INSERT(fxpp) \ 98 { \ 99 int index = FX_CB_LIST_HASH(fxpp->fx_ktid); \ 100 kmutex_t *lockp = &fx_cb_list_lock[index]; \ 101 fxproc_t *headp = &fx_cb_plisthead[index]; \ 102 mutex_enter(lockp); \ 103 fxpp->fx_cb_next = headp->fx_cb_next; \ 104 fxpp->fx_cb_prev = headp; \ 105 headp->fx_cb_next->fx_cb_prev = fxpp; \ 106 headp->fx_cb_next = fxpp; \ 107 mutex_exit(lockp); \ 108 } 109 110 /* 111 * Remove thread from callback list. 112 */ 113 #define FX_CB_LIST_DELETE(fxpp) \ 114 { \ 115 int index = FX_CB_LIST_HASH(fxpp->fx_ktid); \ 116 kmutex_t *lockp = &fx_cb_list_lock[index]; \ 117 mutex_enter(lockp); \ 118 fxpp->fx_cb_prev->fx_cb_next = fxpp->fx_cb_next; \ 119 fxpp->fx_cb_next->fx_cb_prev = fxpp->fx_cb_prev; \ 120 mutex_exit(lockp); \ 121 } 122 123 #define FX_HAS_CB(fxpp) (fxpp->fx_callback != NULL) 124 125 /* adjust x to be between 0 and fx_maxumdpri */ 126 127 #define FX_ADJUST_PRI(pri) \ 128 { \ 129 if (pri < 0) \ 130 pri = 0; \ 131 else if (pri > fx_maxumdpri) \ 132 pri = fx_maxumdpri; \ 133 } 134 135 #define FX_ADJUST_QUANTUM(q) \ 136 { \ 137 if (q > INT_MAX) \ 138 q = INT_MAX; \ 139 else if (q <= 0) \ 140 q = FX_TQINF; \ 141 } 142 143 #define FX_ISVALID(pri, quantum) \ 144 (((pri >= 0) || (pri == FX_CB_NOCHANGE)) && \ 145 ((quantum >= 0) || (quantum == FX_NOCHANGE) || \ 146 (quantum == FX_TQDEF) || (quantum == FX_TQINF))) 147 148 149 static id_t fx_cid; /* fixed priority class ID */ 150 static fxdpent_t *fx_dptbl; /* fixed priority disp parameter table */ 151 152 static pri_t fx_maxupri = FXMAXUPRI; 153 static pri_t fx_maxumdpri; /* max user mode fixed priority */ 154 155 static pri_t fx_maxglobpri; /* maximum global priority used by fx class */ 156 static kmutex_t fx_dptblock; /* protects fixed priority dispatch table */ 157 158 159 static kmutex_t fx_cb_list_lock[FX_CB_LISTS]; /* protects list of fxprocs */ 160 /* that have callbacks */ 161 static fxproc_t fx_cb_plisthead[FX_CB_LISTS]; /* dummy fxproc at head of */ 162 /* list of fxprocs with */ 163 /* callbacks */ 164 165 static int fx_admin(caddr_t, cred_t *); 166 static int fx_getclinfo(void *); 167 static int fx_parmsin(void *); 168 static int fx_parmsout(void *, pc_vaparms_t *); 169 static int fx_vaparmsin(void *, pc_vaparms_t *); 170 static int fx_vaparmsout(void *, pc_vaparms_t *); 171 static int fx_getclpri(pcpri_t *); 172 static int fx_alloc(void **, int); 173 static void fx_free(void *); 174 static int fx_enterclass(kthread_t *, id_t, void *, cred_t *, void *); 175 static void fx_exitclass(void *); 176 static int fx_canexit(kthread_t *, cred_t *); 177 static int fx_fork(kthread_t *, kthread_t *, void *); 178 static void fx_forkret(kthread_t *, kthread_t *); 179 static void fx_parmsget(kthread_t *, void *); 180 static int fx_parmsset(kthread_t *, void *, id_t, cred_t *); 181 static void fx_stop(kthread_t *, int, int); 182 static void fx_exit(kthread_t *); 183 static pri_t fx_swapin(kthread_t *, int); 184 static pri_t fx_swapout(kthread_t *, int); 185 static void fx_trapret(kthread_t *); 186 static void fx_preempt(kthread_t *); 187 static void fx_setrun(kthread_t *); 188 static void fx_sleep(kthread_t *); 189 static void fx_tick(kthread_t *); 190 static void fx_wakeup(kthread_t *); 191 static int fx_donice(kthread_t *, cred_t *, int, int *); 192 static int fx_doprio(kthread_t *, cred_t *, int, int *); 193 static pri_t fx_globpri(kthread_t *); 194 static void fx_yield(kthread_t *); 195 static void fx_nullsys(); 196 197 extern fxdpent_t *fx_getdptbl(void); 198 199 static void fx_change_priority(kthread_t *, fxproc_t *); 200 static fxproc_t *fx_list_lookup(kt_did_t); 201 static void fx_list_release(fxproc_t *); 202 203 204 static struct classfuncs fx_classfuncs = { 205 /* class functions */ 206 fx_admin, 207 fx_getclinfo, 208 fx_parmsin, 209 fx_parmsout, 210 fx_vaparmsin, 211 fx_vaparmsout, 212 fx_getclpri, 213 fx_alloc, 214 fx_free, 215 216 /* thread functions */ 217 fx_enterclass, 218 fx_exitclass, 219 fx_canexit, 220 fx_fork, 221 fx_forkret, 222 fx_parmsget, 223 fx_parmsset, 224 fx_stop, 225 fx_exit, 226 fx_nullsys, /* active */ 227 fx_nullsys, /* inactive */ 228 fx_swapin, 229 fx_swapout, 230 fx_trapret, 231 fx_preempt, 232 fx_setrun, 233 fx_sleep, 234 fx_tick, 235 fx_wakeup, 236 fx_donice, 237 fx_globpri, 238 fx_nullsys, /* set_process_group */ 239 fx_yield, 240 fx_doprio, 241 }; 242 243 244 int 245 _init() 246 { 247 return (mod_install(&modlinkage)); 248 } 249 250 int 251 _fini() 252 { 253 return (EBUSY); 254 } 255 256 int 257 _info(struct modinfo *modinfop) 258 { 259 return (mod_info(&modlinkage, modinfop)); 260 } 261 262 /* 263 * Fixed priority class initialization. Called by dispinit() at boot time. 264 * We can ignore the clparmsz argument since we know that the smallest 265 * possible parameter buffer is big enough for us. 266 */ 267 /* ARGSUSED */ 268 static pri_t 269 fx_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp) 270 { 271 int i; 272 extern pri_t fx_getmaxumdpri(void); 273 274 fx_dptbl = fx_getdptbl(); 275 fx_maxumdpri = fx_getmaxumdpri(); 276 fx_maxglobpri = fx_dptbl[fx_maxumdpri].fx_globpri; 277 278 fx_cid = cid; /* Record our class ID */ 279 280 /* 281 * Initialize the hash table for fxprocs with callbacks 282 */ 283 for (i = 0; i < FX_CB_LISTS; i++) { 284 fx_cb_plisthead[i].fx_cb_next = fx_cb_plisthead[i].fx_cb_prev = 285 &fx_cb_plisthead[i]; 286 } 287 288 /* 289 * We're required to return a pointer to our classfuncs 290 * structure and the highest global priority value we use. 291 */ 292 *clfuncspp = &fx_classfuncs; 293 return (fx_maxglobpri); 294 } 295 296 /* 297 * Get or reset the fx_dptbl values per the user's request. 298 */ 299 static int 300 fx_admin(caddr_t uaddr, cred_t *reqpcredp) 301 { 302 fxadmin_t fxadmin; 303 fxdpent_t *tmpdpp; 304 int userdpsz; 305 int i; 306 size_t fxdpsz; 307 308 if (get_udatamodel() == DATAMODEL_NATIVE) { 309 if (copyin(uaddr, &fxadmin, sizeof (fxadmin_t))) 310 return (EFAULT); 311 } 312 #ifdef _SYSCALL32_IMPL 313 else { 314 /* get fxadmin struct from ILP32 caller */ 315 fxadmin32_t fxadmin32; 316 if (copyin(uaddr, &fxadmin32, sizeof (fxadmin32_t))) 317 return (EFAULT); 318 fxadmin.fx_dpents = 319 (struct fxdpent *)(uintptr_t)fxadmin32.fx_dpents; 320 fxadmin.fx_ndpents = fxadmin32.fx_ndpents; 321 fxadmin.fx_cmd = fxadmin32.fx_cmd; 322 } 323 #endif /* _SYSCALL32_IMPL */ 324 325 fxdpsz = (fx_maxumdpri + 1) * sizeof (fxdpent_t); 326 327 switch (fxadmin.fx_cmd) { 328 case FX_GETDPSIZE: 329 fxadmin.fx_ndpents = fx_maxumdpri + 1; 330 331 if (get_udatamodel() == DATAMODEL_NATIVE) { 332 if (copyout(&fxadmin, uaddr, sizeof (fxadmin_t))) 333 return (EFAULT); 334 } 335 #ifdef _SYSCALL32_IMPL 336 else { 337 /* return fxadmin struct to ILP32 caller */ 338 fxadmin32_t fxadmin32; 339 fxadmin32.fx_dpents = 340 (caddr32_t)(uintptr_t)fxadmin.fx_dpents; 341 fxadmin32.fx_ndpents = fxadmin.fx_ndpents; 342 fxadmin32.fx_cmd = fxadmin.fx_cmd; 343 if (copyout(&fxadmin32, uaddr, sizeof (fxadmin32_t))) 344 return (EFAULT); 345 } 346 #endif /* _SYSCALL32_IMPL */ 347 break; 348 349 case FX_GETDPTBL: 350 userdpsz = MIN(fxadmin.fx_ndpents * sizeof (fxdpent_t), 351 fxdpsz); 352 if (copyout(fx_dptbl, fxadmin.fx_dpents, userdpsz)) 353 return (EFAULT); 354 355 fxadmin.fx_ndpents = userdpsz / sizeof (fxdpent_t); 356 357 if (get_udatamodel() == DATAMODEL_NATIVE) { 358 if (copyout(&fxadmin, uaddr, sizeof (fxadmin_t))) 359 return (EFAULT); 360 } 361 #ifdef _SYSCALL32_IMPL 362 else { 363 /* return fxadmin struct to ILP32 callers */ 364 fxadmin32_t fxadmin32; 365 fxadmin32.fx_dpents = 366 (caddr32_t)(uintptr_t)fxadmin.fx_dpents; 367 fxadmin32.fx_ndpents = fxadmin.fx_ndpents; 368 fxadmin32.fx_cmd = fxadmin.fx_cmd; 369 if (copyout(&fxadmin32, uaddr, sizeof (fxadmin32_t))) 370 return (EFAULT); 371 } 372 #endif /* _SYSCALL32_IMPL */ 373 break; 374 375 case FX_SETDPTBL: 376 /* 377 * We require that the requesting process has sufficient 378 * privileges. We also require that the table supplied by 379 * the user exactly match the current fx_dptbl in size. 380 */ 381 if (secpolicy_dispadm(reqpcredp) != 0) { 382 return (EPERM); 383 } 384 if (fxadmin.fx_ndpents * sizeof (fxdpent_t) != fxdpsz) { 385 return (EINVAL); 386 } 387 388 /* 389 * We read the user supplied table into a temporary buffer 390 * where it is validated before being copied over the 391 * fx_dptbl. 392 */ 393 tmpdpp = kmem_alloc(fxdpsz, KM_SLEEP); 394 if (copyin(fxadmin.fx_dpents, tmpdpp, fxdpsz)) { 395 kmem_free(tmpdpp, fxdpsz); 396 return (EFAULT); 397 } 398 for (i = 0; i < fxadmin.fx_ndpents; i++) { 399 400 /* 401 * Validate the user supplied values. All we are doing 402 * here is verifying that the values are within their 403 * allowable ranges and will not panic the system. We 404 * make no attempt to ensure that the resulting 405 * configuration makes sense or results in reasonable 406 * performance. 407 */ 408 if (tmpdpp[i].fx_quantum <= 0 && 409 tmpdpp[i].fx_quantum != FX_TQINF) { 410 kmem_free(tmpdpp, fxdpsz); 411 return (EINVAL); 412 } 413 } 414 415 /* 416 * Copy the user supplied values over the current fx_dptbl 417 * values. The fx_globpri member is read-only so we don't 418 * overwrite it. 419 */ 420 mutex_enter(&fx_dptblock); 421 for (i = 0; i < fxadmin.fx_ndpents; i++) { 422 fx_dptbl[i].fx_quantum = tmpdpp[i].fx_quantum; 423 } 424 mutex_exit(&fx_dptblock); 425 kmem_free(tmpdpp, fxdpsz); 426 break; 427 428 default: 429 return (EINVAL); 430 } 431 return (0); 432 } 433 434 /* 435 * Allocate a fixed priority class specific thread structure and 436 * initialize it with the parameters supplied. Also move the thread 437 * to specified priority. 438 */ 439 static int 440 fx_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp, 441 void *bufp) 442 { 443 fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp; 444 fxproc_t *fxpp; 445 pri_t reqfxupri; 446 pri_t reqfxuprilim; 447 448 fxpp = (fxproc_t *)bufp; 449 ASSERT(fxpp != NULL); 450 451 /* 452 * Initialize the fxproc structure. 453 */ 454 fxpp->fx_flags = 0; 455 fxpp->fx_callback = NULL; 456 fxpp->fx_cookie = NULL; 457 458 if (fxkparmsp == NULL) { 459 /* 460 * Use default values. 461 */ 462 fxpp->fx_pri = fxpp->fx_uprilim = 0; 463 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 464 fxpp->fx_nice = NZERO; 465 } else { 466 /* 467 * Use supplied values. 468 */ 469 470 if ((fxkparmsp->fx_cflags & FX_DOUPRILIM) == 0) { 471 reqfxuprilim = 0; 472 } else { 473 if (fxkparmsp->fx_uprilim > FX_MAX_UNPRIV_PRI && 474 secpolicy_setpriority(reqpcredp) != 0) 475 return (EPERM); 476 reqfxuprilim = fxkparmsp->fx_uprilim; 477 FX_ADJUST_PRI(reqfxuprilim); 478 } 479 480 if ((fxkparmsp->fx_cflags & FX_DOUPRI) == 0) { 481 reqfxupri = reqfxuprilim; 482 } else { 483 if (fxkparmsp->fx_upri > FX_MAX_UNPRIV_PRI && 484 secpolicy_setpriority(reqpcredp) != 0) 485 return (EPERM); 486 /* 487 * Set the user priority to the requested value 488 * or the upri limit, whichever is lower. 489 */ 490 reqfxupri = fxkparmsp->fx_upri; 491 FX_ADJUST_PRI(reqfxupri); 492 493 if (reqfxupri > reqfxuprilim) 494 reqfxupri = reqfxuprilim; 495 } 496 497 498 fxpp->fx_uprilim = reqfxuprilim; 499 fxpp->fx_pri = reqfxupri; 500 501 fxpp->fx_nice = NZERO - (NZERO * reqfxupri) / fx_maxupri; 502 503 if (((fxkparmsp->fx_cflags & FX_DOTQ) == 0) || 504 (fxkparmsp->fx_tqntm == FX_TQDEF)) { 505 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 506 } else { 507 if (secpolicy_setpriority(reqpcredp) != 0) 508 return (EPERM); 509 510 if (fxkparmsp->fx_tqntm == FX_TQINF) 511 fxpp->fx_pquantum = FX_TQINF; 512 else { 513 fxpp->fx_pquantum = fxkparmsp->fx_tqntm; 514 } 515 } 516 517 } 518 519 fxpp->fx_timeleft = fxpp->fx_pquantum; 520 cpucaps_sc_init(&fxpp->fx_caps); 521 fxpp->fx_tp = t; 522 523 thread_lock(t); /* get dispatcher lock on thread */ 524 t->t_clfuncs = &(sclass[cid].cl_funcs->thread); 525 t->t_cid = cid; 526 t->t_cldata = (void *)fxpp; 527 t->t_schedflag &= ~TS_RUNQMATCH; 528 fx_change_priority(t, fxpp); 529 thread_unlock(t); 530 531 return (0); 532 } 533 534 /* 535 * The thread is exiting. 536 */ 537 static void 538 fx_exit(kthread_t *t) 539 { 540 fxproc_t *fxpp; 541 542 thread_lock(t); 543 fxpp = (fxproc_t *)(t->t_cldata); 544 545 /* 546 * A thread could be exiting in between clock ticks, so we need to 547 * calculate how much CPU time it used since it was charged last time. 548 * 549 * CPU caps are not enforced on exiting processes - it is usually 550 * desirable to exit as soon as possible to free resources. 551 */ 552 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ONLY); 553 554 if (FX_HAS_CB(fxpp)) { 555 FX_CB_EXIT(FX_CALLB(fxpp), fxpp->fx_cookie); 556 fxpp->fx_callback = NULL; 557 fxpp->fx_cookie = NULL; 558 thread_unlock(t); 559 FX_CB_LIST_DELETE(fxpp); 560 return; 561 } 562 563 thread_unlock(t); 564 } 565 566 /* 567 * Exiting the class. Free fxproc structure of thread. 568 */ 569 static void 570 fx_exitclass(void *procp) 571 { 572 fxproc_t *fxpp = (fxproc_t *)procp; 573 574 thread_lock(fxpp->fx_tp); 575 if (FX_HAS_CB(fxpp)) { 576 577 FX_CB_EXIT(FX_CALLB(fxpp), fxpp->fx_cookie); 578 579 fxpp->fx_callback = NULL; 580 fxpp->fx_cookie = NULL; 581 thread_unlock(fxpp->fx_tp); 582 FX_CB_LIST_DELETE(fxpp); 583 } else 584 thread_unlock(fxpp->fx_tp); 585 586 kmem_free(fxpp, sizeof (fxproc_t)); 587 } 588 589 /* ARGSUSED */ 590 static int 591 fx_canexit(kthread_t *t, cred_t *cred) 592 { 593 /* 594 * A thread can always leave the FX class 595 */ 596 return (0); 597 } 598 599 /* 600 * Initialize fixed-priority class specific proc structure for a child. 601 * callbacks are not inherited upon fork. 602 */ 603 static int 604 fx_fork(kthread_t *t, kthread_t *ct, void *bufp) 605 { 606 fxproc_t *pfxpp; /* ptr to parent's fxproc structure */ 607 fxproc_t *cfxpp; /* ptr to child's fxproc structure */ 608 609 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 610 611 cfxpp = (fxproc_t *)bufp; 612 ASSERT(cfxpp != NULL); 613 thread_lock(t); 614 pfxpp = (fxproc_t *)t->t_cldata; 615 /* 616 * Initialize child's fxproc structure. 617 */ 618 cfxpp->fx_timeleft = cfxpp->fx_pquantum = pfxpp->fx_pquantum; 619 cfxpp->fx_pri = pfxpp->fx_pri; 620 cfxpp->fx_uprilim = pfxpp->fx_uprilim; 621 cfxpp->fx_nice = pfxpp->fx_nice; 622 cfxpp->fx_callback = NULL; 623 cfxpp->fx_cookie = NULL; 624 cfxpp->fx_flags = pfxpp->fx_flags & ~(FXBACKQ); 625 cpucaps_sc_init(&cfxpp->fx_caps); 626 627 cfxpp->fx_tp = ct; 628 ct->t_cldata = (void *)cfxpp; 629 thread_unlock(t); 630 631 /* 632 * Link new structure into fxproc list. 633 */ 634 return (0); 635 } 636 637 638 /* 639 * Child is placed at back of dispatcher queue and parent gives 640 * up processor so that the child runs first after the fork. 641 * This allows the child immediately execing to break the multiple 642 * use of copy on write pages with no disk home. The parent will 643 * get to steal them back rather than uselessly copying them. 644 */ 645 static void 646 fx_forkret(kthread_t *t, kthread_t *ct) 647 { 648 proc_t *pp = ttoproc(t); 649 proc_t *cp = ttoproc(ct); 650 fxproc_t *fxpp; 651 652 ASSERT(t == curthread); 653 ASSERT(MUTEX_HELD(&pidlock)); 654 655 /* 656 * Grab the child's p_lock before dropping pidlock to ensure 657 * the process does not disappear before we set it running. 658 */ 659 mutex_enter(&cp->p_lock); 660 continuelwps(cp); 661 mutex_exit(&cp->p_lock); 662 663 mutex_enter(&pp->p_lock); 664 mutex_exit(&pidlock); 665 continuelwps(pp); 666 667 thread_lock(t); 668 fxpp = (fxproc_t *)(t->t_cldata); 669 t->t_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; 670 ASSERT(t->t_pri >= 0 && t->t_pri <= fx_maxglobpri); 671 THREAD_TRANSITION(t); 672 fx_setrun(t); 673 thread_unlock(t); 674 /* 675 * Safe to drop p_lock now since it is safe to change 676 * the scheduling class after this point. 677 */ 678 mutex_exit(&pp->p_lock); 679 680 swtch(); 681 } 682 683 684 /* 685 * Get information about the fixed-priority class into the buffer 686 * pointed to by fxinfop. The maximum configured user priority 687 * is the only information we supply. 688 */ 689 static int 690 fx_getclinfo(void *infop) 691 { 692 fxinfo_t *fxinfop = (fxinfo_t *)infop; 693 fxinfop->fx_maxupri = fx_maxupri; 694 return (0); 695 } 696 697 698 699 /* 700 * Return the user mode scheduling priority range. 701 */ 702 static int 703 fx_getclpri(pcpri_t *pcprip) 704 { 705 pcprip->pc_clpmax = fx_maxupri; 706 pcprip->pc_clpmin = 0; 707 return (0); 708 } 709 710 711 static void 712 fx_nullsys() 713 {} 714 715 716 /* 717 * Get the fixed-priority parameters of the thread pointed to by 718 * fxprocp into the buffer pointed to by fxparmsp. 719 */ 720 static void 721 fx_parmsget(kthread_t *t, void *parmsp) 722 { 723 fxproc_t *fxpp = (fxproc_t *)t->t_cldata; 724 fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp; 725 726 fxkparmsp->fx_upri = fxpp->fx_pri; 727 fxkparmsp->fx_uprilim = fxpp->fx_uprilim; 728 fxkparmsp->fx_tqntm = fxpp->fx_pquantum; 729 } 730 731 732 733 /* 734 * Check the validity of the fixed-priority parameters in the buffer 735 * pointed to by fxparmsp. 736 */ 737 static int 738 fx_parmsin(void *parmsp) 739 { 740 fxparms_t *fxparmsp = (fxparms_t *)parmsp; 741 uint_t cflags; 742 longlong_t ticks; 743 /* 744 * Check validity of parameters. 745 */ 746 747 if ((fxparmsp->fx_uprilim > fx_maxupri || 748 fxparmsp->fx_uprilim < 0) && 749 fxparmsp->fx_uprilim != FX_NOCHANGE) 750 return (EINVAL); 751 752 if ((fxparmsp->fx_upri > fx_maxupri || 753 fxparmsp->fx_upri < 0) && 754 fxparmsp->fx_upri != FX_NOCHANGE) 755 return (EINVAL); 756 757 if ((fxparmsp->fx_tqsecs == 0 && fxparmsp->fx_tqnsecs == 0) || 758 fxparmsp->fx_tqnsecs >= NANOSEC) 759 return (EINVAL); 760 761 cflags = (fxparmsp->fx_upri != FX_NOCHANGE ? FX_DOUPRI : 0); 762 763 if (fxparmsp->fx_uprilim != FX_NOCHANGE) { 764 cflags |= FX_DOUPRILIM; 765 } 766 767 if (fxparmsp->fx_tqnsecs != FX_NOCHANGE) 768 cflags |= FX_DOTQ; 769 770 /* 771 * convert the buffer to kernel format. 772 */ 773 774 if (fxparmsp->fx_tqnsecs >= 0) { 775 if ((ticks = SEC_TO_TICK((longlong_t)fxparmsp->fx_tqsecs) + 776 NSEC_TO_TICK_ROUNDUP(fxparmsp->fx_tqnsecs)) > INT_MAX) 777 return (ERANGE); 778 779 ((fxkparms_t *)fxparmsp)->fx_tqntm = (int)ticks; 780 } else { 781 if ((fxparmsp->fx_tqnsecs != FX_NOCHANGE) && 782 (fxparmsp->fx_tqnsecs != FX_TQINF) && 783 (fxparmsp->fx_tqnsecs != FX_TQDEF)) 784 return (EINVAL); 785 ((fxkparms_t *)fxparmsp)->fx_tqntm = fxparmsp->fx_tqnsecs; 786 } 787 788 ((fxkparms_t *)fxparmsp)->fx_cflags = cflags; 789 790 return (0); 791 } 792 793 794 /* 795 * Check the validity of the fixed-priority parameters in the pc_vaparms_t 796 * structure vaparmsp and put them in the buffer pointed to by fxprmsp. 797 * pc_vaparms_t contains (key, value) pairs of parameter. 798 */ 799 static int 800 fx_vaparmsin(void *prmsp, pc_vaparms_t *vaparmsp) 801 { 802 uint_t secs = 0; 803 uint_t cnt; 804 int nsecs = 0; 805 int priflag, secflag, nsecflag, limflag; 806 longlong_t ticks; 807 fxkparms_t *fxprmsp = (fxkparms_t *)prmsp; 808 pc_vaparm_t *vpp = &vaparmsp->pc_parms[0]; 809 810 811 /* 812 * First check the validity of parameters and convert them 813 * from the user supplied format to the internal format. 814 */ 815 priflag = secflag = nsecflag = limflag = 0; 816 817 fxprmsp->fx_cflags = 0; 818 819 if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT) 820 return (EINVAL); 821 822 for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) { 823 824 switch (vpp->pc_key) { 825 case FX_KY_UPRILIM: 826 if (limflag++) 827 return (EINVAL); 828 fxprmsp->fx_cflags |= FX_DOUPRILIM; 829 fxprmsp->fx_uprilim = (pri_t)vpp->pc_parm; 830 if (fxprmsp->fx_uprilim > fx_maxupri || 831 fxprmsp->fx_uprilim < 0) 832 return (EINVAL); 833 break; 834 835 case FX_KY_UPRI: 836 if (priflag++) 837 return (EINVAL); 838 fxprmsp->fx_cflags |= FX_DOUPRI; 839 fxprmsp->fx_upri = (pri_t)vpp->pc_parm; 840 if (fxprmsp->fx_upri > fx_maxupri || 841 fxprmsp->fx_upri < 0) 842 return (EINVAL); 843 break; 844 845 case FX_KY_TQSECS: 846 if (secflag++) 847 return (EINVAL); 848 fxprmsp->fx_cflags |= FX_DOTQ; 849 secs = (uint_t)vpp->pc_parm; 850 break; 851 852 case FX_KY_TQNSECS: 853 if (nsecflag++) 854 return (EINVAL); 855 fxprmsp->fx_cflags |= FX_DOTQ; 856 nsecs = (int)vpp->pc_parm; 857 break; 858 859 default: 860 return (EINVAL); 861 } 862 } 863 864 if (vaparmsp->pc_vaparmscnt == 0) { 865 /* 866 * Use default parameters. 867 */ 868 fxprmsp->fx_upri = 0; 869 fxprmsp->fx_uprilim = 0; 870 fxprmsp->fx_tqntm = FX_TQDEF; 871 fxprmsp->fx_cflags = FX_DOUPRI | FX_DOUPRILIM | FX_DOTQ; 872 } else if ((fxprmsp->fx_cflags & FX_DOTQ) != 0) { 873 if ((secs == 0 && nsecs == 0) || nsecs >= NANOSEC) 874 return (EINVAL); 875 876 if (nsecs >= 0) { 877 if ((ticks = SEC_TO_TICK((longlong_t)secs) + 878 NSEC_TO_TICK_ROUNDUP(nsecs)) > INT_MAX) 879 return (ERANGE); 880 881 fxprmsp->fx_tqntm = (int)ticks; 882 } else { 883 if (nsecs != FX_TQINF && nsecs != FX_TQDEF) 884 return (EINVAL); 885 fxprmsp->fx_tqntm = nsecs; 886 } 887 } 888 889 return (0); 890 } 891 892 893 /* 894 * Nothing to do here but return success. 895 */ 896 /* ARGSUSED */ 897 static int 898 fx_parmsout(void *parmsp, pc_vaparms_t *vaparmsp) 899 { 900 register fxkparms_t *fxkprmsp = (fxkparms_t *)parmsp; 901 902 if (vaparmsp != NULL) 903 return (0); 904 905 if (fxkprmsp->fx_tqntm < 0) { 906 /* 907 * Quantum field set to special value (e.g. FX_TQINF) 908 */ 909 ((fxparms_t *)fxkprmsp)->fx_tqnsecs = fxkprmsp->fx_tqntm; 910 ((fxparms_t *)fxkprmsp)->fx_tqsecs = 0; 911 912 } else { 913 /* Convert quantum from ticks to seconds-nanoseconds */ 914 915 timestruc_t ts; 916 TICK_TO_TIMESTRUC(fxkprmsp->fx_tqntm, &ts); 917 ((fxparms_t *)fxkprmsp)->fx_tqsecs = ts.tv_sec; 918 ((fxparms_t *)fxkprmsp)->fx_tqnsecs = ts.tv_nsec; 919 } 920 921 return (0); 922 } 923 924 925 /* 926 * Copy all selected fixed-priority class parameters to the user. 927 * The parameters are specified by a key. 928 */ 929 static int 930 fx_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp) 931 { 932 fxkparms_t *fxkprmsp = (fxkparms_t *)prmsp; 933 timestruc_t ts; 934 uint_t cnt; 935 uint_t secs; 936 int nsecs; 937 int priflag, secflag, nsecflag, limflag; 938 pc_vaparm_t *vpp = &vaparmsp->pc_parms[0]; 939 940 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 941 942 priflag = secflag = nsecflag = limflag = 0; 943 944 if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT) 945 return (EINVAL); 946 947 if (fxkprmsp->fx_tqntm < 0) { 948 /* 949 * Quantum field set to special value (e.g. FX_TQINF). 950 */ 951 secs = 0; 952 nsecs = fxkprmsp->fx_tqntm; 953 } else { 954 /* 955 * Convert quantum from ticks to seconds-nanoseconds. 956 */ 957 TICK_TO_TIMESTRUC(fxkprmsp->fx_tqntm, &ts); 958 secs = ts.tv_sec; 959 nsecs = ts.tv_nsec; 960 } 961 962 963 for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) { 964 965 switch (vpp->pc_key) { 966 case FX_KY_UPRILIM: 967 if (limflag++) 968 return (EINVAL); 969 if (copyout(&fxkprmsp->fx_uprilim, 970 (void *)(uintptr_t)vpp->pc_parm, sizeof (pri_t))) 971 return (EFAULT); 972 break; 973 974 case FX_KY_UPRI: 975 if (priflag++) 976 return (EINVAL); 977 if (copyout(&fxkprmsp->fx_upri, 978 (void *)(uintptr_t)vpp->pc_parm, sizeof (pri_t))) 979 return (EFAULT); 980 break; 981 982 case FX_KY_TQSECS: 983 if (secflag++) 984 return (EINVAL); 985 if (copyout(&secs, 986 (void *)(uintptr_t)vpp->pc_parm, sizeof (uint_t))) 987 return (EFAULT); 988 break; 989 990 case FX_KY_TQNSECS: 991 if (nsecflag++) 992 return (EINVAL); 993 if (copyout(&nsecs, 994 (void *)(uintptr_t)vpp->pc_parm, sizeof (int))) 995 return (EFAULT); 996 break; 997 998 default: 999 return (EINVAL); 1000 } 1001 } 1002 1003 return (0); 1004 } 1005 1006 /* 1007 * Set the scheduling parameters of the thread pointed to by fxprocp 1008 * to those specified in the buffer pointed to by fxparmsp. 1009 */ 1010 /* ARGSUSED */ 1011 static int 1012 fx_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp) 1013 { 1014 char nice; 1015 pri_t reqfxuprilim; 1016 pri_t reqfxupri; 1017 fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp; 1018 fxproc_t *fxpp; 1019 1020 1021 ASSERT(MUTEX_HELD(&(ttoproc(tx))->p_lock)); 1022 1023 thread_lock(tx); 1024 fxpp = (fxproc_t *)tx->t_cldata; 1025 1026 if ((fxkparmsp->fx_cflags & FX_DOUPRILIM) == 0) 1027 reqfxuprilim = fxpp->fx_uprilim; 1028 else 1029 reqfxuprilim = fxkparmsp->fx_uprilim; 1030 1031 /* 1032 * Basic permissions enforced by generic kernel code 1033 * for all classes require that a thread attempting 1034 * to change the scheduling parameters of a target 1035 * thread be privileged or have a real or effective 1036 * UID matching that of the target thread. We are not 1037 * called unless these basic permission checks have 1038 * already passed. The fixed priority class requires in 1039 * addition that the calling thread be privileged if it 1040 * is attempting to raise the pri above its current 1041 * value This may have been checked previously but if our 1042 * caller passed us a non-NULL credential pointer we assume 1043 * it hasn't and we check it here. 1044 */ 1045 1046 if ((reqpcredp != NULL) && 1047 (reqfxuprilim > fxpp->fx_uprilim || 1048 ((fxkparmsp->fx_cflags & FX_DOTQ) != 0)) && 1049 secpolicy_setpriority(reqpcredp) != 0) { 1050 thread_unlock(tx); 1051 return (EPERM); 1052 } 1053 1054 FX_ADJUST_PRI(reqfxuprilim); 1055 1056 if ((fxkparmsp->fx_cflags & FX_DOUPRI) == 0) 1057 reqfxupri = fxpp->fx_pri; 1058 else 1059 reqfxupri = fxkparmsp->fx_upri; 1060 1061 1062 /* 1063 * Make sure the user priority doesn't exceed the upri limit. 1064 */ 1065 if (reqfxupri > reqfxuprilim) 1066 reqfxupri = reqfxuprilim; 1067 1068 /* 1069 * Set fx_nice to the nice value corresponding to the user 1070 * priority we are setting. Note that setting the nice field 1071 * of the parameter struct won't affect upri or nice. 1072 */ 1073 1074 nice = NZERO - (reqfxupri * NZERO) / fx_maxupri; 1075 1076 if (nice > NZERO) 1077 nice = NZERO; 1078 1079 fxpp->fx_uprilim = reqfxuprilim; 1080 fxpp->fx_pri = reqfxupri; 1081 1082 if (fxkparmsp->fx_tqntm == FX_TQINF) 1083 fxpp->fx_pquantum = FX_TQINF; 1084 else if (fxkparmsp->fx_tqntm == FX_TQDEF) 1085 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 1086 else if ((fxkparmsp->fx_cflags & FX_DOTQ) != 0) 1087 fxpp->fx_pquantum = fxkparmsp->fx_tqntm; 1088 1089 fxpp->fx_nice = nice; 1090 1091 fx_change_priority(tx, fxpp); 1092 thread_unlock(tx); 1093 return (0); 1094 } 1095 1096 1097 /* 1098 * Return the global scheduling priority that would be assigned 1099 * to a thread entering the fixed-priority class with the fx_upri. 1100 */ 1101 static pri_t 1102 fx_globpri(kthread_t *t) 1103 { 1104 fxproc_t *fxpp; 1105 1106 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 1107 1108 fxpp = (fxproc_t *)t->t_cldata; 1109 return (fx_dptbl[fxpp->fx_pri].fx_globpri); 1110 1111 } 1112 1113 /* 1114 * Arrange for thread to be placed in appropriate location 1115 * on dispatcher queue. 1116 * 1117 * This is called with the current thread in TS_ONPROC and locked. 1118 */ 1119 static void 1120 fx_preempt(kthread_t *t) 1121 { 1122 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1123 1124 ASSERT(t == curthread); 1125 ASSERT(THREAD_LOCK_HELD(curthread)); 1126 1127 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ENFORCE); 1128 1129 /* 1130 * Check to see if we're doing "preemption control" here. If 1131 * we are, and if the user has requested that this thread not 1132 * be preempted, and if preemptions haven't been put off for 1133 * too long, let the preemption happen here but try to make 1134 * sure the thread is rescheduled as soon as possible. We do 1135 * this by putting it on the front of the highest priority run 1136 * queue in the FX class. If the preemption has been put off 1137 * for too long, clear the "nopreempt" bit and let the thread 1138 * be preempted. 1139 */ 1140 if (t->t_schedctl && schedctl_get_nopreempt(t)) { 1141 if (fxpp->fx_pquantum == FX_TQINF || 1142 fxpp->fx_timeleft > -SC_MAX_TICKS) { 1143 DTRACE_SCHED1(schedctl__nopreempt, kthread_t *, t); 1144 schedctl_set_yield(t, 1); 1145 setfrontdq(t); 1146 return; 1147 } else { 1148 schedctl_set_nopreempt(t, 0); 1149 DTRACE_SCHED1(schedctl__preempt, kthread_t *, t); 1150 TNF_PROBE_2(schedctl_preempt, "schedctl FX fx_preempt", 1151 /* CSTYLED */, tnf_pid, pid, ttoproc(t)->p_pid, 1152 tnf_lwpid, lwpid, t->t_tid); 1153 /* 1154 * Fall through and be preempted below. 1155 */ 1156 } 1157 } 1158 1159 if (FX_HAS_CB(fxpp)) { 1160 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1161 pri_t newpri = fxpp->fx_pri; 1162 FX_CB_PREEMPT(FX_CALLB(fxpp), fxpp->fx_cookie, 1163 &new_quantum, &newpri); 1164 FX_ADJUST_QUANTUM(new_quantum); 1165 if ((int)new_quantum != fxpp->fx_pquantum) { 1166 fxpp->fx_pquantum = (int)new_quantum; 1167 fxpp->fx_timeleft = fxpp->fx_pquantum; 1168 } 1169 FX_ADJUST_PRI(newpri); 1170 fxpp->fx_pri = newpri; 1171 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1172 } 1173 1174 /* 1175 * This thread may be placed on wait queue by CPU Caps. In this case we 1176 * do not need to do anything until it is removed from the wait queue. 1177 */ 1178 if (CPUCAPS_ENFORCE(t)) { 1179 return; 1180 } 1181 1182 if ((fxpp->fx_flags & (FXBACKQ)) == FXBACKQ) { 1183 fxpp->fx_timeleft = fxpp->fx_pquantum; 1184 fxpp->fx_flags &= ~FXBACKQ; 1185 setbackdq(t); 1186 } else { 1187 setfrontdq(t); 1188 } 1189 } 1190 1191 static void 1192 fx_setrun(kthread_t *t) 1193 { 1194 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1195 1196 ASSERT(THREAD_LOCK_HELD(t)); /* t should be in transition */ 1197 fxpp->fx_flags &= ~FXBACKQ; 1198 1199 if (t->t_disp_time != ddi_get_lbolt()) 1200 setbackdq(t); 1201 else 1202 setfrontdq(t); 1203 } 1204 1205 1206 /* 1207 * Prepare thread for sleep. We reset the thread priority so it will 1208 * run at the kernel priority level when it wakes up. 1209 */ 1210 static void 1211 fx_sleep(kthread_t *t) 1212 { 1213 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1214 1215 ASSERT(t == curthread); 1216 ASSERT(THREAD_LOCK_HELD(t)); 1217 1218 /* 1219 * Account for time spent on CPU before going to sleep. 1220 */ 1221 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ENFORCE); 1222 1223 if (FX_HAS_CB(fxpp)) { 1224 FX_CB_SLEEP(FX_CALLB(fxpp), fxpp->fx_cookie); 1225 } 1226 t->t_stime = ddi_get_lbolt(); /* time stamp for the swapper */ 1227 } 1228 1229 1230 /* 1231 * Return Values: 1232 * 1233 * -1 if the thread is loaded or is not eligible to be swapped in. 1234 * 1235 * FX and RT threads are designed so that they don't swapout; however, 1236 * it is possible that while the thread is swapped out and in another class, it 1237 * can be changed to FX or RT. Since these threads should be swapped in 1238 * as soon as they're runnable, rt_swapin returns SHRT_MAX, and fx_swapin 1239 * returns SHRT_MAX - 1, so that it gives deference to any swapped out 1240 * RT threads. 1241 */ 1242 /* ARGSUSED */ 1243 static pri_t 1244 fx_swapin(kthread_t *t, int flags) 1245 { 1246 pri_t tpri = -1; 1247 1248 ASSERT(THREAD_LOCK_HELD(t)); 1249 1250 if (t->t_state == TS_RUN && (t->t_schedflag & TS_LOAD) == 0) { 1251 tpri = (pri_t)SHRT_MAX - 1; 1252 } 1253 1254 return (tpri); 1255 } 1256 1257 /* 1258 * Return Values 1259 * -1 if the thread isn't loaded or is not eligible to be swapped out. 1260 */ 1261 /* ARGSUSED */ 1262 static pri_t 1263 fx_swapout(kthread_t *t, int flags) 1264 { 1265 ASSERT(THREAD_LOCK_HELD(t)); 1266 1267 return (-1); 1268 1269 } 1270 1271 /* ARGSUSED */ 1272 static void 1273 fx_stop(kthread_t *t, int why, int what) 1274 { 1275 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1276 1277 ASSERT(THREAD_LOCK_HELD(t)); 1278 1279 if (FX_HAS_CB(fxpp)) { 1280 FX_CB_STOP(FX_CALLB(fxpp), fxpp->fx_cookie); 1281 } 1282 } 1283 1284 /* 1285 * Check for time slice expiration. If time slice has expired 1286 * set runrun to cause preemption. 1287 */ 1288 static void 1289 fx_tick(kthread_t *t) 1290 { 1291 boolean_t call_cpu_surrender = B_FALSE; 1292 fxproc_t *fxpp; 1293 1294 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock)); 1295 1296 thread_lock(t); 1297 1298 fxpp = (fxproc_t *)(t->t_cldata); 1299 1300 if (FX_HAS_CB(fxpp)) { 1301 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1302 pri_t newpri = fxpp->fx_pri; 1303 FX_CB_TICK(FX_CALLB(fxpp), fxpp->fx_cookie, 1304 &new_quantum, &newpri); 1305 FX_ADJUST_QUANTUM(new_quantum); 1306 if ((int)new_quantum != fxpp->fx_pquantum) { 1307 fxpp->fx_pquantum = (int)new_quantum; 1308 fxpp->fx_timeleft = fxpp->fx_pquantum; 1309 } 1310 FX_ADJUST_PRI(newpri); 1311 if (newpri != fxpp->fx_pri) { 1312 fxpp->fx_pri = newpri; 1313 fx_change_priority(t, fxpp); 1314 } 1315 } 1316 1317 /* 1318 * Keep track of thread's project CPU usage. Note that projects 1319 * get charged even when threads are running in the kernel. 1320 */ 1321 call_cpu_surrender = CPUCAPS_CHARGE(t, &fxpp->fx_caps, 1322 CPUCAPS_CHARGE_ENFORCE); 1323 1324 if ((fxpp->fx_pquantum != FX_TQINF) && 1325 (--fxpp->fx_timeleft <= 0)) { 1326 pri_t new_pri; 1327 1328 /* 1329 * If we're doing preemption control and trying to 1330 * avoid preempting this thread, just note that 1331 * the thread should yield soon and let it keep 1332 * running (unless it's been a while). 1333 */ 1334 if (t->t_schedctl && schedctl_get_nopreempt(t)) { 1335 if (fxpp->fx_timeleft > -SC_MAX_TICKS) { 1336 DTRACE_SCHED1(schedctl__nopreempt, 1337 kthread_t *, t); 1338 schedctl_set_yield(t, 1); 1339 thread_unlock_nopreempt(t); 1340 return; 1341 } 1342 TNF_PROBE_2(schedctl_failsafe, 1343 "schedctl FX fx_tick", /* CSTYLED */, 1344 tnf_pid, pid, ttoproc(t)->p_pid, 1345 tnf_lwpid, lwpid, t->t_tid); 1346 } 1347 new_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; 1348 ASSERT(new_pri >= 0 && new_pri <= fx_maxglobpri); 1349 /* 1350 * When the priority of a thread is changed, 1351 * it may be necessary to adjust its position 1352 * on a sleep queue or dispatch queue. Even 1353 * when the priority is not changed, we need 1354 * to preserve round robin on dispatch queue. 1355 * The function thread_change_pri accomplishes 1356 * this. 1357 */ 1358 if (thread_change_pri(t, new_pri, 0)) { 1359 fxpp->fx_timeleft = fxpp->fx_pquantum; 1360 } else { 1361 call_cpu_surrender = B_TRUE; 1362 } 1363 } else if (t->t_state == TS_ONPROC && 1364 t->t_pri < t->t_disp_queue->disp_maxrunpri) { 1365 call_cpu_surrender = B_TRUE; 1366 } 1367 1368 if (call_cpu_surrender) { 1369 fxpp->fx_flags |= FXBACKQ; 1370 cpu_surrender(t); 1371 } 1372 thread_unlock_nopreempt(t); /* clock thread can't be preempted */ 1373 } 1374 1375 1376 static void 1377 fx_trapret(kthread_t *t) 1378 { 1379 cpu_t *cp = CPU; 1380 1381 ASSERT(THREAD_LOCK_HELD(t)); 1382 ASSERT(t == curthread); 1383 ASSERT(cp->cpu_dispthread == t); 1384 ASSERT(t->t_state == TS_ONPROC); 1385 } 1386 1387 1388 /* 1389 * Processes waking up go to the back of their queue. 1390 */ 1391 static void 1392 fx_wakeup(kthread_t *t) 1393 { 1394 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1395 1396 ASSERT(THREAD_LOCK_HELD(t)); 1397 1398 t->t_stime = ddi_get_lbolt(); /* time stamp for the swapper */ 1399 if (FX_HAS_CB(fxpp)) { 1400 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1401 pri_t newpri = fxpp->fx_pri; 1402 FX_CB_WAKEUP(FX_CALLB(fxpp), fxpp->fx_cookie, 1403 &new_quantum, &newpri); 1404 FX_ADJUST_QUANTUM(new_quantum); 1405 if ((int)new_quantum != fxpp->fx_pquantum) { 1406 fxpp->fx_pquantum = (int)new_quantum; 1407 fxpp->fx_timeleft = fxpp->fx_pquantum; 1408 } 1409 1410 FX_ADJUST_PRI(newpri); 1411 if (newpri != fxpp->fx_pri) { 1412 fxpp->fx_pri = newpri; 1413 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1414 } 1415 } 1416 1417 fxpp->fx_flags &= ~FXBACKQ; 1418 1419 if (t->t_disp_time != ddi_get_lbolt()) 1420 setbackdq(t); 1421 else 1422 setfrontdq(t); 1423 } 1424 1425 1426 /* 1427 * When a thread yields, put it on the back of the run queue. 1428 */ 1429 static void 1430 fx_yield(kthread_t *t) 1431 { 1432 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1433 1434 ASSERT(t == curthread); 1435 ASSERT(THREAD_LOCK_HELD(t)); 1436 1437 /* 1438 * Collect CPU usage spent before yielding CPU. 1439 */ 1440 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ENFORCE); 1441 1442 if (FX_HAS_CB(fxpp)) { 1443 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1444 pri_t newpri = fxpp->fx_pri; 1445 FX_CB_PREEMPT(FX_CALLB(fxpp), fxpp->fx_cookie, 1446 &new_quantum, &newpri); 1447 FX_ADJUST_QUANTUM(new_quantum); 1448 if ((int)new_quantum != fxpp->fx_pquantum) { 1449 fxpp->fx_pquantum = (int)new_quantum; 1450 fxpp->fx_timeleft = fxpp->fx_pquantum; 1451 } 1452 FX_ADJUST_PRI(newpri); 1453 fxpp->fx_pri = newpri; 1454 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1455 } 1456 1457 /* 1458 * Clear the preemption control "yield" bit since the user is 1459 * doing a yield. 1460 */ 1461 if (t->t_schedctl) 1462 schedctl_set_yield(t, 0); 1463 1464 if (fxpp->fx_timeleft <= 0) { 1465 /* 1466 * Time slice was artificially extended to avoid 1467 * preemption, so pretend we're preempting it now. 1468 */ 1469 DTRACE_SCHED1(schedctl__yield, int, -fxpp->fx_timeleft); 1470 fxpp->fx_timeleft = fxpp->fx_pquantum; 1471 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1472 ASSERT(t->t_pri >= 0 && t->t_pri <= fx_maxglobpri); 1473 } 1474 1475 fxpp->fx_flags &= ~FXBACKQ; 1476 setbackdq(t); 1477 } 1478 1479 /* 1480 * Increment the nice value of the specified thread by incr and 1481 * return the new value in *retvalp. 1482 */ 1483 static int 1484 fx_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp) 1485 { 1486 int newnice; 1487 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1488 fxkparms_t fxkparms; 1489 1490 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock)); 1491 1492 /* If there's no change to priority, just return current setting */ 1493 if (incr == 0) { 1494 if (retvalp) { 1495 *retvalp = fxpp->fx_nice - NZERO; 1496 } 1497 return (0); 1498 } 1499 1500 if ((incr < 0 || incr > 2 * NZERO) && 1501 secpolicy_setpriority(cr) != 0) 1502 return (EPERM); 1503 1504 /* 1505 * Specifying a nice increment greater than the upper limit of 1506 * 2 * NZERO - 1 will result in the thread's nice value being 1507 * set to the upper limit. We check for this before computing 1508 * the new value because otherwise we could get overflow 1509 * if a privileged user specified some ridiculous increment. 1510 */ 1511 if (incr > 2 * NZERO - 1) 1512 incr = 2 * NZERO - 1; 1513 1514 newnice = fxpp->fx_nice + incr; 1515 if (newnice > NZERO) 1516 newnice = NZERO; 1517 else if (newnice < 0) 1518 newnice = 0; 1519 1520 fxkparms.fx_uprilim = fxkparms.fx_upri = 1521 -((newnice - NZERO) * fx_maxupri) / NZERO; 1522 1523 fxkparms.fx_cflags = FX_DOUPRILIM | FX_DOUPRI; 1524 1525 fxkparms.fx_tqntm = FX_TQDEF; 1526 1527 /* 1528 * Reset the uprilim and upri values of the thread. Adjust 1529 * time quantum accordingly. 1530 */ 1531 1532 (void) fx_parmsset(t, (void *)&fxkparms, (id_t)0, (cred_t *)NULL); 1533 1534 /* 1535 * Although fx_parmsset already reset fx_nice it may 1536 * not have been set to precisely the value calculated above 1537 * because fx_parmsset determines the nice value from the 1538 * user priority and we may have truncated during the integer 1539 * conversion from nice value to user priority and back. 1540 * We reset fx_nice to the value we calculated above. 1541 */ 1542 fxpp->fx_nice = (char)newnice; 1543 1544 if (retvalp) 1545 *retvalp = newnice - NZERO; 1546 1547 return (0); 1548 } 1549 1550 /* 1551 * Increment the priority of the specified thread by incr and 1552 * return the new value in *retvalp. 1553 */ 1554 static int 1555 fx_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp) 1556 { 1557 int newpri; 1558 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1559 fxkparms_t fxkparms; 1560 1561 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock)); 1562 1563 /* If there's no change to priority, just return current setting */ 1564 if (incr == 0) { 1565 *retvalp = fxpp->fx_pri; 1566 return (0); 1567 } 1568 1569 newpri = fxpp->fx_pri + incr; 1570 if (newpri > fx_maxupri || newpri < 0) 1571 return (EINVAL); 1572 1573 *retvalp = newpri; 1574 fxkparms.fx_uprilim = fxkparms.fx_upri = newpri; 1575 fxkparms.fx_tqntm = FX_NOCHANGE; 1576 fxkparms.fx_cflags = FX_DOUPRILIM | FX_DOUPRI; 1577 1578 /* 1579 * Reset the uprilim and upri values of the thread. 1580 */ 1581 return (fx_parmsset(t, (void *)&fxkparms, (id_t)0, cr)); 1582 } 1583 1584 static void 1585 fx_change_priority(kthread_t *t, fxproc_t *fxpp) 1586 { 1587 pri_t new_pri; 1588 1589 ASSERT(THREAD_LOCK_HELD(t)); 1590 new_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; 1591 ASSERT(new_pri >= 0 && new_pri <= fx_maxglobpri); 1592 t->t_cpri = fxpp->fx_pri; 1593 if (t == curthread || t->t_state == TS_ONPROC) { 1594 /* curthread is always onproc */ 1595 cpu_t *cp = t->t_disp_queue->disp_cpu; 1596 THREAD_CHANGE_PRI(t, new_pri); 1597 if (t == cp->cpu_dispthread) 1598 cp->cpu_dispatch_pri = DISP_PRIO(t); 1599 if (DISP_MUST_SURRENDER(t)) { 1600 fxpp->fx_flags |= FXBACKQ; 1601 cpu_surrender(t); 1602 } else { 1603 fxpp->fx_timeleft = fxpp->fx_pquantum; 1604 } 1605 } else { 1606 /* 1607 * When the priority of a thread is changed, 1608 * it may be necessary to adjust its position 1609 * on a sleep queue or dispatch queue. 1610 * The function thread_change_pri accomplishes 1611 * this. 1612 */ 1613 if (thread_change_pri(t, new_pri, 0)) { 1614 /* 1615 * The thread was on a run queue. Reset 1616 * its CPU timeleft from the quantum 1617 * associated with the new priority. 1618 */ 1619 fxpp->fx_timeleft = fxpp->fx_pquantum; 1620 } else { 1621 fxpp->fx_flags |= FXBACKQ; 1622 } 1623 } 1624 } 1625 1626 static int 1627 fx_alloc(void **p, int flag) 1628 { 1629 void *bufp; 1630 1631 bufp = kmem_alloc(sizeof (fxproc_t), flag); 1632 if (bufp == NULL) { 1633 return (ENOMEM); 1634 } else { 1635 *p = bufp; 1636 return (0); 1637 } 1638 } 1639 1640 static void 1641 fx_free(void *bufp) 1642 { 1643 if (bufp) 1644 kmem_free(bufp, sizeof (fxproc_t)); 1645 } 1646 1647 /* 1648 * Release the callback list mutex after successful lookup 1649 */ 1650 void 1651 fx_list_release(fxproc_t *fxpp) 1652 { 1653 int index = FX_CB_LIST_HASH(fxpp->fx_ktid); 1654 kmutex_t *lockp = &fx_cb_list_lock[index]; 1655 mutex_exit(lockp); 1656 } 1657 1658 fxproc_t * 1659 fx_list_lookup(kt_did_t ktid) 1660 { 1661 int index = FX_CB_LIST_HASH(ktid); 1662 kmutex_t *lockp = &fx_cb_list_lock[index]; 1663 fxproc_t *fxpp; 1664 1665 mutex_enter(lockp); 1666 1667 for (fxpp = fx_cb_plisthead[index].fx_cb_next; 1668 fxpp != &fx_cb_plisthead[index]; fxpp = fxpp->fx_cb_next) { 1669 if (fxpp->fx_tp->t_cid == fx_cid && fxpp->fx_ktid == ktid && 1670 fxpp->fx_callback != NULL) { 1671 /* 1672 * The caller is responsible for calling 1673 * fx_list_release to drop the lock upon 1674 * successful lookup 1675 */ 1676 return (fxpp); 1677 } 1678 } 1679 mutex_exit(lockp); 1680 return ((fxproc_t *)NULL); 1681 } 1682 1683 1684 /* 1685 * register a callback set of routines for current thread 1686 * thread should already be in FX class 1687 */ 1688 int 1689 fx_register_callbacks(fx_callbacks_t *fx_callback, fx_cookie_t cookie, 1690 pri_t pri, clock_t quantum) 1691 { 1692 1693 fxproc_t *fxpp; 1694 1695 if (fx_callback == NULL) 1696 return (EINVAL); 1697 1698 if (secpolicy_dispadm(CRED()) != 0) 1699 return (EPERM); 1700 1701 if (FX_CB_VERSION(fx_callback) != FX_CALLB_REV) 1702 return (EINVAL); 1703 1704 if (!FX_ISVALID(pri, quantum)) 1705 return (EINVAL); 1706 1707 thread_lock(curthread); /* get dispatcher lock on thread */ 1708 1709 if (curthread->t_cid != fx_cid) { 1710 thread_unlock(curthread); 1711 return (EINVAL); 1712 } 1713 1714 fxpp = (fxproc_t *)(curthread->t_cldata); 1715 ASSERT(fxpp != NULL); 1716 if (FX_HAS_CB(fxpp)) { 1717 thread_unlock(curthread); 1718 return (EINVAL); 1719 } 1720 1721 fxpp->fx_callback = fx_callback; 1722 fxpp->fx_cookie = cookie; 1723 1724 if (pri != FX_CB_NOCHANGE) { 1725 fxpp->fx_pri = pri; 1726 FX_ADJUST_PRI(fxpp->fx_pri); 1727 if (quantum == FX_TQDEF) { 1728 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 1729 } else if (quantum == FX_TQINF) { 1730 fxpp->fx_pquantum = FX_TQINF; 1731 } else if (quantum != FX_NOCHANGE) { 1732 FX_ADJUST_QUANTUM(quantum); 1733 fxpp->fx_pquantum = quantum; 1734 } 1735 } else if (quantum != FX_NOCHANGE && quantum != FX_TQDEF) { 1736 if (quantum == FX_TQINF) 1737 fxpp->fx_pquantum = FX_TQINF; 1738 else { 1739 FX_ADJUST_QUANTUM(quantum); 1740 fxpp->fx_pquantum = quantum; 1741 } 1742 } 1743 1744 fxpp->fx_ktid = ddi_get_kt_did(); 1745 1746 fx_change_priority(curthread, fxpp); 1747 1748 thread_unlock(curthread); 1749 1750 /* 1751 * Link new structure into fxproc list. 1752 */ 1753 FX_CB_LIST_INSERT(fxpp); 1754 return (0); 1755 } 1756 1757 /* unregister a callback set of routines for current thread */ 1758 int 1759 fx_unregister_callbacks() 1760 { 1761 fxproc_t *fxpp; 1762 1763 if ((fxpp = fx_list_lookup(ddi_get_kt_did())) == NULL) { 1764 /* 1765 * did not have a registered callback; 1766 */ 1767 return (EINVAL); 1768 } 1769 1770 thread_lock(fxpp->fx_tp); 1771 fxpp->fx_callback = NULL; 1772 fxpp->fx_cookie = NULL; 1773 thread_unlock(fxpp->fx_tp); 1774 fx_list_release(fxpp); 1775 1776 FX_CB_LIST_DELETE(fxpp); 1777 return (0); 1778 } 1779 1780 /* 1781 * modify priority and/or quantum value of a thread with callback 1782 */ 1783 int 1784 fx_modify_priority(kt_did_t ktid, clock_t quantum, pri_t pri) 1785 { 1786 fxproc_t *fxpp; 1787 1788 if (!FX_ISVALID(pri, quantum)) 1789 return (EINVAL); 1790 1791 if ((fxpp = fx_list_lookup(ktid)) == NULL) { 1792 /* 1793 * either thread had exited or did not have a registered 1794 * callback; 1795 */ 1796 return (ESRCH); 1797 } 1798 1799 thread_lock(fxpp->fx_tp); 1800 1801 if (pri != FX_CB_NOCHANGE) { 1802 fxpp->fx_pri = pri; 1803 FX_ADJUST_PRI(fxpp->fx_pri); 1804 if (quantum == FX_TQDEF) { 1805 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 1806 } else if (quantum == FX_TQINF) { 1807 fxpp->fx_pquantum = FX_TQINF; 1808 } else if (quantum != FX_NOCHANGE) { 1809 FX_ADJUST_QUANTUM(quantum); 1810 fxpp->fx_pquantum = quantum; 1811 } 1812 } else if (quantum != FX_NOCHANGE && quantum != FX_TQDEF) { 1813 if (quantum == FX_TQINF) { 1814 fxpp->fx_pquantum = FX_TQINF; 1815 } else { 1816 FX_ADJUST_QUANTUM(quantum); 1817 fxpp->fx_pquantum = quantum; 1818 } 1819 } 1820 1821 fx_change_priority(fxpp->fx_tp, fxpp); 1822 1823 thread_unlock(fxpp->fx_tp); 1824 fx_list_release(fxpp); 1825 return (0); 1826 } 1827 1828 1829 /* 1830 * return an iblock cookie for mutex initialization to be used in callbacks 1831 */ 1832 void * 1833 fx_get_mutex_cookie() 1834 { 1835 return ((void *)(uintptr_t)__ipltospl(DISP_LEVEL)); 1836 } 1837 1838 /* 1839 * return maximum relative priority 1840 */ 1841 pri_t 1842 fx_get_maxpri() 1843 { 1844 return (fx_maxumdpri); 1845 } 1846