1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/sysmacros.h> 32 #include <sys/cred.h> 33 #include <sys/proc.h> 34 #include <sys/session.h> 35 #include <sys/strsubr.h> 36 #include <sys/user.h> 37 #include <sys/priocntl.h> 38 #include <sys/class.h> 39 #include <sys/disp.h> 40 #include <sys/procset.h> 41 #include <sys/debug.h> 42 #include <sys/kmem.h> 43 #include <sys/errno.h> 44 #include <sys/fx.h> 45 #include <sys/fxpriocntl.h> 46 #include <sys/cpuvar.h> 47 #include <sys/systm.h> 48 #include <sys/vtrace.h> 49 #include <sys/schedctl.h> 50 #include <sys/tnf_probe.h> 51 #include <sys/sunddi.h> 52 #include <sys/spl.h> 53 #include <sys/modctl.h> 54 #include <sys/policy.h> 55 #include <sys/sdt.h> 56 #include <sys/cpupart.h> 57 58 static pri_t fx_init(id_t, int, classfuncs_t **); 59 60 static struct sclass csw = { 61 "FX", 62 fx_init, 63 0 64 }; 65 66 static struct modlsched modlsched = { 67 &mod_schedops, "Fixed priority sched class", &csw 68 }; 69 70 static struct modlinkage modlinkage = { 71 MODREV_1, (void *)&modlsched, NULL 72 }; 73 74 75 /* 76 * control flags (kparms->fx_cflags). 77 */ 78 #define FX_DOUPRILIM 0x01 /* change user priority limit */ 79 #define FX_DOUPRI 0x02 /* change user priority */ 80 #define FX_DOTQ 0x04 /* change FX time quantum */ 81 82 83 #define FXMAXUPRI 60 /* maximum user priority setting */ 84 85 #define FX_MAX_UNPRIV_PRI 0 /* maximum unpriviledge priority */ 86 87 /* 88 * The fxproc_t structures are kept in an array of circular doubly linked 89 * lists. A hash on the thread pointer is used to determine which list 90 * each fxproc structure should be placed. Each list has a dummy "head" which 91 * is never removed, so the list is never empty. 92 */ 93 94 #define FX_LISTS 16 /* number of lists, must be power of 2 */ 95 #define FX_LIST_HASH(tp) (((uintptr_t)(tp) >> 9) & (FX_LISTS - 1)) 96 97 #define FX_LIST_INSERT(fxpp) \ 98 { \ 99 int index = FX_LIST_HASH(fxpp->fx_tp); \ 100 kmutex_t *lockp = &fx_list_lock[index]; \ 101 fxproc_t *headp = &fx_plisthead[index]; \ 102 mutex_enter(lockp); \ 103 fxpp->fx_next = headp->fx_next; \ 104 fxpp->fx_prev = headp; \ 105 headp->fx_next->fx_prev = fxpp; \ 106 headp->fx_next = fxpp; \ 107 mutex_exit(lockp); \ 108 } 109 110 #define FX_LIST_DELETE(fxpp) \ 111 { \ 112 int index = FX_LIST_HASH(fxpp->fx_tp); \ 113 kmutex_t *lockp = &fx_list_lock[index]; \ 114 mutex_enter(lockp); \ 115 fxpp->fx_prev->fx_next = fxpp->fx_next; \ 116 fxpp->fx_next->fx_prev = fxpp->fx_prev; \ 117 mutex_exit(lockp); \ 118 } 119 120 121 /* 122 * The fxproc_t structures that have a registered callback vector, 123 * are also kept in an array of circular doubly linked lists. A hash on 124 * the thread id (from ddi_get_kt_did()) is used to determine which list 125 * each of such fxproc structures should be placed. Each list has a dummy 126 * "head" which is never removed, so the list is never empty. 127 */ 128 129 #define FX_CB_LISTS 16 /* number of lists, must be power of 2 */ 130 #define FX_CB_LIST_HASH(ktid) ((uint_t)ktid & (FX_CB_LISTS - 1)) 131 132 /* Insert fxproc into callback list */ 133 #define FX_CB_LIST_INSERT(fxpp) \ 134 { \ 135 int index = FX_CB_LIST_HASH(fxpp->fx_ktid); \ 136 kmutex_t *lockp = &fx_cb_list_lock[index]; \ 137 fxproc_t *headp = &fx_cb_plisthead[index]; \ 138 mutex_enter(lockp); \ 139 fxpp->fx_cb_next = headp->fx_cb_next; \ 140 fxpp->fx_cb_prev = headp; \ 141 headp->fx_cb_next->fx_cb_prev = fxpp; \ 142 headp->fx_cb_next = fxpp; \ 143 mutex_exit(lockp); \ 144 } 145 146 /* 147 * Remove thread from callback list. 148 */ 149 #define FX_CB_LIST_DELETE(fxpp) \ 150 { \ 151 int index = FX_CB_LIST_HASH(fxpp->fx_ktid); \ 152 kmutex_t *lockp = &fx_cb_list_lock[index]; \ 153 mutex_enter(lockp); \ 154 fxpp->fx_cb_prev->fx_cb_next = fxpp->fx_cb_next; \ 155 fxpp->fx_cb_next->fx_cb_prev = fxpp->fx_cb_prev; \ 156 mutex_exit(lockp); \ 157 } 158 159 #define FX_HAS_CB(fxpp) (fxpp->fx_callback != NULL) 160 161 /* adjust x to be between 0 and fx_maxumdpri */ 162 163 #define FX_ADJUST_PRI(pri) \ 164 { \ 165 if (pri < 0) \ 166 pri = 0; \ 167 else if (pri > fx_maxumdpri) \ 168 pri = fx_maxumdpri; \ 169 } 170 171 #define FX_ADJUST_QUANTUM(q) \ 172 { \ 173 if (q > INT_MAX) \ 174 q = INT_MAX; \ 175 else if (q <= 0) \ 176 q = FX_TQINF; \ 177 } 178 179 #define FX_ISVALID(pri, quantum) \ 180 (((pri >= 0) || (pri == FX_CB_NOCHANGE)) && \ 181 ((quantum >= 0) || (quantum == FX_NOCHANGE) || \ 182 (quantum == FX_TQDEF) || (quantum == FX_TQINF))) 183 184 185 static id_t fx_cid; /* fixed priority class ID */ 186 static fxdpent_t *fx_dptbl; /* fixed priority disp parameter table */ 187 188 static pri_t fx_maxupri = FXMAXUPRI; 189 static pri_t fx_maxumdpri; /* max user mode fixed priority */ 190 191 static pri_t fx_maxglobpri; /* maximum global priority used by fx class */ 192 static kmutex_t fx_dptblock; /* protects fixed priority dispatch table */ 193 194 195 static kmutex_t fx_list_lock[FX_LISTS]; /* protects fxproc lists */ 196 static fxproc_t fx_plisthead[FX_LISTS]; /* dummy fxproc at head of lists */ 197 198 199 static kmutex_t fx_cb_list_lock[FX_CB_LISTS]; /* protects list of fxprocs */ 200 /* that have callbacks */ 201 static fxproc_t fx_cb_plisthead[FX_CB_LISTS]; /* dummy fxproc at head of */ 202 /* list of fxprocs with */ 203 /* callbacks */ 204 205 static int fx_admin(caddr_t, cred_t *); 206 static int fx_getclinfo(void *); 207 static int fx_parmsin(void *); 208 static int fx_parmsout(void *, pc_vaparms_t *); 209 static int fx_vaparmsin(void *, pc_vaparms_t *); 210 static int fx_vaparmsout(void *, pc_vaparms_t *); 211 static int fx_getclpri(pcpri_t *); 212 static int fx_alloc(void **, int); 213 static void fx_free(void *); 214 static int fx_enterclass(kthread_t *, id_t, void *, cred_t *, void *); 215 static void fx_exitclass(void *); 216 static int fx_canexit(kthread_t *, cred_t *); 217 static int fx_fork(kthread_t *, kthread_t *, void *); 218 static void fx_forkret(kthread_t *, kthread_t *); 219 static void fx_parmsget(kthread_t *, void *); 220 static int fx_parmsset(kthread_t *, void *, id_t, cred_t *); 221 static void fx_stop(kthread_t *, int, int); 222 static void fx_exit(kthread_t *); 223 static pri_t fx_swapin(kthread_t *, int); 224 static pri_t fx_swapout(kthread_t *, int); 225 static void fx_trapret(kthread_t *); 226 static void fx_preempt(kthread_t *); 227 static void fx_setrun(kthread_t *); 228 static void fx_sleep(kthread_t *); 229 static void fx_tick(kthread_t *); 230 static void fx_wakeup(kthread_t *); 231 static int fx_donice(kthread_t *, cred_t *, int, int *); 232 static pri_t fx_globpri(kthread_t *); 233 static void fx_yield(kthread_t *); 234 static void fx_nullsys(); 235 236 extern fxdpent_t *fx_getdptbl(void); 237 238 static void fx_change_priority(kthread_t *, fxproc_t *); 239 static fxproc_t *fx_list_lookup(kt_did_t); 240 static void fx_list_release(fxproc_t *); 241 242 243 static struct classfuncs fx_classfuncs = { 244 /* class functions */ 245 fx_admin, 246 fx_getclinfo, 247 fx_parmsin, 248 fx_parmsout, 249 fx_vaparmsin, 250 fx_vaparmsout, 251 fx_getclpri, 252 fx_alloc, 253 fx_free, 254 255 /* thread functions */ 256 fx_enterclass, 257 fx_exitclass, 258 fx_canexit, 259 fx_fork, 260 fx_forkret, 261 fx_parmsget, 262 fx_parmsset, 263 fx_stop, 264 fx_exit, 265 fx_nullsys, /* active */ 266 fx_nullsys, /* inactive */ 267 fx_swapin, 268 fx_swapout, 269 fx_trapret, 270 fx_preempt, 271 fx_setrun, 272 fx_sleep, 273 fx_tick, 274 fx_wakeup, 275 fx_donice, 276 fx_globpri, 277 fx_nullsys, /* set_process_group */ 278 fx_yield, 279 }; 280 281 282 int 283 _init() 284 { 285 return (mod_install(&modlinkage)); 286 } 287 288 int 289 _fini() 290 { 291 return (EBUSY); 292 } 293 294 int 295 _info(struct modinfo *modinfop) 296 { 297 return (mod_info(&modlinkage, modinfop)); 298 } 299 300 /* 301 * Fixed priority class initialization. Called by dispinit() at boot time. 302 * We can ignore the clparmsz argument since we know that the smallest 303 * possible parameter buffer is big enough for us. 304 */ 305 /* ARGSUSED */ 306 static pri_t 307 fx_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp) 308 { 309 int i; 310 extern pri_t fx_getmaxumdpri(void); 311 312 fx_dptbl = fx_getdptbl(); 313 fx_maxumdpri = fx_getmaxumdpri(); 314 fx_maxglobpri = fx_dptbl[fx_maxumdpri].fx_globpri; 315 316 fx_cid = cid; /* Record our class ID */ 317 318 /* 319 * Initialize the fxproc hash table 320 */ 321 for (i = 0; i < FX_LISTS; i++) { 322 fx_plisthead[i].fx_next = fx_plisthead[i].fx_prev = 323 &fx_plisthead[i]; 324 } 325 326 /* 327 * Initialize the hash table for fxprocs with callbacks 328 */ 329 for (i = 0; i < FX_CB_LISTS; i++) { 330 fx_cb_plisthead[i].fx_cb_next = fx_cb_plisthead[i].fx_cb_prev = 331 &fx_cb_plisthead[i]; 332 } 333 334 /* 335 * We're required to return a pointer to our classfuncs 336 * structure and the highest global priority value we use. 337 */ 338 *clfuncspp = &fx_classfuncs; 339 return (fx_maxglobpri); 340 } 341 342 /* 343 * Get or reset the fx_dptbl values per the user's request. 344 */ 345 static int 346 fx_admin(caddr_t uaddr, cred_t *reqpcredp) 347 { 348 fxadmin_t fxadmin; 349 fxdpent_t *tmpdpp; 350 int userdpsz; 351 int i; 352 size_t fxdpsz; 353 354 if (get_udatamodel() == DATAMODEL_NATIVE) { 355 if (copyin(uaddr, &fxadmin, sizeof (fxadmin_t))) 356 return (EFAULT); 357 } 358 #ifdef _SYSCALL32_IMPL 359 else { 360 /* get fxadmin struct from ILP32 caller */ 361 fxadmin32_t fxadmin32; 362 if (copyin(uaddr, &fxadmin32, sizeof (fxadmin32_t))) 363 return (EFAULT); 364 fxadmin.fx_dpents = 365 (struct fxdpent *)(uintptr_t)fxadmin32.fx_dpents; 366 fxadmin.fx_ndpents = fxadmin32.fx_ndpents; 367 fxadmin.fx_cmd = fxadmin32.fx_cmd; 368 } 369 #endif /* _SYSCALL32_IMPL */ 370 371 fxdpsz = (fx_maxumdpri + 1) * sizeof (fxdpent_t); 372 373 switch (fxadmin.fx_cmd) { 374 case FX_GETDPSIZE: 375 fxadmin.fx_ndpents = fx_maxumdpri + 1; 376 377 if (get_udatamodel() == DATAMODEL_NATIVE) { 378 if (copyout(&fxadmin, uaddr, sizeof (fxadmin_t))) 379 return (EFAULT); 380 } 381 #ifdef _SYSCALL32_IMPL 382 else { 383 /* return fxadmin struct to ILP32 caller */ 384 fxadmin32_t fxadmin32; 385 fxadmin32.fx_dpents = 386 (caddr32_t)(uintptr_t)fxadmin.fx_dpents; 387 fxadmin32.fx_ndpents = fxadmin.fx_ndpents; 388 fxadmin32.fx_cmd = fxadmin.fx_cmd; 389 if (copyout(&fxadmin32, uaddr, sizeof (fxadmin32_t))) 390 return (EFAULT); 391 } 392 #endif /* _SYSCALL32_IMPL */ 393 break; 394 395 case FX_GETDPTBL: 396 userdpsz = MIN(fxadmin.fx_ndpents * sizeof (fxdpent_t), 397 fxdpsz); 398 if (copyout(fx_dptbl, fxadmin.fx_dpents, userdpsz)) 399 return (EFAULT); 400 401 fxadmin.fx_ndpents = userdpsz / sizeof (fxdpent_t); 402 403 if (get_udatamodel() == DATAMODEL_NATIVE) { 404 if (copyout(&fxadmin, uaddr, sizeof (fxadmin_t))) 405 return (EFAULT); 406 } 407 #ifdef _SYSCALL32_IMPL 408 else { 409 /* return fxadmin struct to ILP32 callers */ 410 fxadmin32_t fxadmin32; 411 fxadmin32.fx_dpents = 412 (caddr32_t)(uintptr_t)fxadmin.fx_dpents; 413 fxadmin32.fx_ndpents = fxadmin.fx_ndpents; 414 fxadmin32.fx_cmd = fxadmin.fx_cmd; 415 if (copyout(&fxadmin32, uaddr, sizeof (fxadmin32_t))) 416 return (EFAULT); 417 } 418 #endif /* _SYSCALL32_IMPL */ 419 break; 420 421 case FX_SETDPTBL: 422 /* 423 * We require that the requesting process has sufficient 424 * privileges. We also require that the table supplied by 425 * the user exactly match the current fx_dptbl in size. 426 */ 427 if (secpolicy_dispadm(reqpcredp) != 0) { 428 return (EPERM); 429 } 430 if (fxadmin.fx_ndpents * sizeof (fxdpent_t) != fxdpsz) { 431 return (EINVAL); 432 } 433 434 /* 435 * We read the user supplied table into a temporary buffer 436 * where it is validated before being copied over the 437 * fx_dptbl. 438 */ 439 tmpdpp = kmem_alloc(fxdpsz, KM_SLEEP); 440 if (copyin(fxadmin.fx_dpents, tmpdpp, fxdpsz)) { 441 kmem_free(tmpdpp, fxdpsz); 442 return (EFAULT); 443 } 444 for (i = 0; i < fxadmin.fx_ndpents; i++) { 445 446 /* 447 * Validate the user supplied values. All we are doing 448 * here is verifying that the values are within their 449 * allowable ranges and will not panic the system. We 450 * make no attempt to ensure that the resulting 451 * configuration makes sense or results in reasonable 452 * performance. 453 */ 454 if (tmpdpp[i].fx_quantum <= 0 && 455 tmpdpp[i].fx_quantum != FX_TQINF) { 456 kmem_free(tmpdpp, fxdpsz); 457 return (EINVAL); 458 } 459 } 460 461 /* 462 * Copy the user supplied values over the current fx_dptbl 463 * values. The fx_globpri member is read-only so we don't 464 * overwrite it. 465 */ 466 mutex_enter(&fx_dptblock); 467 for (i = 0; i < fxadmin.fx_ndpents; i++) { 468 fx_dptbl[i].fx_quantum = tmpdpp[i].fx_quantum; 469 } 470 mutex_exit(&fx_dptblock); 471 kmem_free(tmpdpp, fxdpsz); 472 break; 473 474 default: 475 return (EINVAL); 476 } 477 return (0); 478 } 479 480 481 /* 482 * Allocate a fixed priority class specific thread structure and 483 * initialize it with the parameters supplied. Also move the thread 484 * to specified priority. 485 */ 486 static int 487 fx_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp, 488 void *bufp) 489 { 490 fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp; 491 fxproc_t *fxpp; 492 pri_t reqfxupri; 493 pri_t reqfxuprilim; 494 495 fxpp = (fxproc_t *)bufp; 496 ASSERT(fxpp != NULL); 497 498 /* 499 * Initialize the fxproc structure. 500 */ 501 fxpp->fx_flags = 0; 502 fxpp->fx_callback = NULL; 503 fxpp->fx_cookie = NULL; 504 505 if (fxkparmsp == NULL) { 506 /* 507 * Use default values. 508 */ 509 fxpp->fx_pri = fxpp->fx_uprilim = 0; 510 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 511 fxpp->fx_nice = NZERO; 512 } else { 513 /* 514 * Use supplied values. 515 */ 516 517 if ((fxkparmsp->fx_cflags & FX_DOUPRILIM) == 0) { 518 reqfxuprilim = 0; 519 } else { 520 if (fxkparmsp->fx_uprilim > FX_MAX_UNPRIV_PRI && 521 secpolicy_setpriority(reqpcredp) != 0) 522 return (EPERM); 523 reqfxuprilim = fxkparmsp->fx_uprilim; 524 FX_ADJUST_PRI(reqfxuprilim); 525 } 526 527 if ((fxkparmsp->fx_cflags & FX_DOUPRI) == 0) { 528 reqfxupri = reqfxuprilim; 529 } else { 530 if (fxkparmsp->fx_upri > FX_MAX_UNPRIV_PRI && 531 secpolicy_setpriority(reqpcredp) != 0) 532 return (EPERM); 533 /* 534 * Set the user priority to the requested value 535 * or the upri limit, whichever is lower. 536 */ 537 reqfxupri = fxkparmsp->fx_upri; 538 FX_ADJUST_PRI(reqfxupri); 539 540 if (reqfxupri > reqfxuprilim) 541 reqfxupri = reqfxuprilim; 542 } 543 544 545 fxpp->fx_uprilim = reqfxuprilim; 546 fxpp->fx_pri = reqfxupri; 547 548 fxpp->fx_nice = NZERO - (NZERO * reqfxupri) 549 / fx_maxupri; 550 551 if (((fxkparmsp->fx_cflags & FX_DOTQ) == 0) || 552 (fxkparmsp->fx_tqntm == FX_TQDEF)) { 553 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 554 } else { 555 if (secpolicy_setpriority(reqpcredp) != 0) 556 return (EPERM); 557 558 if (fxkparmsp->fx_tqntm == FX_TQINF) 559 fxpp->fx_pquantum = FX_TQINF; 560 else { 561 fxpp->fx_pquantum = fxkparmsp->fx_tqntm; 562 } 563 } 564 565 } 566 567 fxpp->fx_timeleft = fxpp->fx_pquantum; 568 fxpp->fx_tp = t; 569 570 thread_lock(t); /* get dispatcher lock on thread */ 571 t->t_clfuncs = &(sclass[cid].cl_funcs->thread); 572 t->t_cid = cid; 573 t->t_cldata = (void *)fxpp; 574 t->t_schedflag &= ~TS_RUNQMATCH; 575 fx_change_priority(t, fxpp); 576 thread_unlock(t); 577 578 FX_LIST_INSERT(fxpp); 579 580 return (0); 581 } 582 583 /* 584 * The thread is exiting. 585 */ 586 static void 587 fx_exit(kthread_t *t) 588 { 589 fxproc_t *fxpp; 590 591 thread_lock(t); 592 fxpp = (fxproc_t *)(t->t_cldata); 593 594 if (FX_HAS_CB(fxpp)) { 595 FX_CB_EXIT(FX_CALLB(fxpp), fxpp->fx_cookie); 596 fxpp->fx_callback = NULL; 597 fxpp->fx_cookie = NULL; 598 thread_unlock(t); 599 FX_CB_LIST_DELETE(fxpp); 600 return; 601 } 602 thread_unlock(t); 603 } 604 605 /* 606 * Exiting the class. Free fxproc structure of thread. 607 */ 608 static void 609 fx_exitclass(void *procp) 610 { 611 fxproc_t *fxpp = (fxproc_t *)procp; 612 613 thread_lock(fxpp->fx_tp); 614 if (FX_HAS_CB(fxpp)) { 615 616 FX_CB_EXIT(FX_CALLB(fxpp), fxpp->fx_cookie); 617 618 fxpp->fx_callback = NULL; 619 fxpp->fx_cookie = NULL; 620 thread_unlock(fxpp->fx_tp); 621 FX_CB_LIST_DELETE(fxpp); 622 } else 623 thread_unlock(fxpp->fx_tp); 624 FX_LIST_DELETE(fxpp); 625 626 kmem_free(fxpp, sizeof (fxproc_t)); 627 } 628 629 /* ARGSUSED */ 630 static int 631 fx_canexit(kthread_t *t, cred_t *cred) 632 { 633 /* 634 * A thread can always leave the FX class 635 */ 636 return (0); 637 } 638 639 /* 640 * Initialize fixed-priority class specific proc structure for a child. 641 * callbacks are not inherited upon fork. 642 */ 643 static int 644 fx_fork(kthread_t *t, kthread_t *ct, void *bufp) 645 { 646 fxproc_t *pfxpp; /* ptr to parent's fxproc structure */ 647 fxproc_t *cfxpp; /* ptr to child's fxproc structure */ 648 649 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 650 651 cfxpp = (fxproc_t *)bufp; 652 ASSERT(cfxpp != NULL); 653 thread_lock(t); 654 pfxpp = (fxproc_t *)t->t_cldata; 655 /* 656 * Initialize child's fxproc structure. 657 */ 658 cfxpp->fx_timeleft = cfxpp->fx_pquantum = pfxpp->fx_pquantum; 659 cfxpp->fx_pri = pfxpp->fx_pri; 660 cfxpp->fx_uprilim = pfxpp->fx_uprilim; 661 cfxpp->fx_nice = pfxpp->fx_nice; 662 cfxpp->fx_callback = NULL; 663 cfxpp->fx_cookie = NULL; 664 cfxpp->fx_flags = pfxpp->fx_flags & ~(FXBACKQ); 665 666 cfxpp->fx_tp = ct; 667 ct->t_cldata = (void *)cfxpp; 668 thread_unlock(t); 669 670 /* 671 * Link new structure into fxproc list. 672 */ 673 FX_LIST_INSERT(cfxpp); 674 return (0); 675 } 676 677 678 /* 679 * Child is placed at back of dispatcher queue and parent gives 680 * up processor so that the child runs first after the fork. 681 * This allows the child immediately execing to break the multiple 682 * use of copy on write pages with no disk home. The parent will 683 * get to steal them back rather than uselessly copying them. 684 */ 685 static void 686 fx_forkret(kthread_t *t, kthread_t *ct) 687 { 688 proc_t *pp = ttoproc(t); 689 proc_t *cp = ttoproc(ct); 690 fxproc_t *fxpp; 691 692 ASSERT(t == curthread); 693 ASSERT(MUTEX_HELD(&pidlock)); 694 695 /* 696 * Grab the child's p_lock before dropping pidlock to ensure 697 * the process does not disappear before we set it running. 698 */ 699 mutex_enter(&cp->p_lock); 700 mutex_exit(&pidlock); 701 continuelwps(cp); 702 mutex_exit(&cp->p_lock); 703 704 mutex_enter(&pp->p_lock); 705 continuelwps(pp); 706 mutex_exit(&pp->p_lock); 707 708 thread_lock(t); 709 fxpp = (fxproc_t *)(t->t_cldata); 710 t->t_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; 711 ASSERT(t->t_pri >= 0 && t->t_pri <= fx_maxglobpri); 712 THREAD_TRANSITION(t); 713 fx_setrun(t); 714 thread_unlock(t); 715 716 swtch(); 717 } 718 719 720 /* 721 * Get information about the fixed-priority class into the buffer 722 * pointed to by fxinfop. The maximum configured user priority 723 * is the only information we supply. 724 */ 725 static int 726 fx_getclinfo(void *infop) 727 { 728 fxinfo_t *fxinfop = (fxinfo_t *)infop; 729 fxinfop->fx_maxupri = fx_maxupri; 730 return (0); 731 } 732 733 734 735 /* 736 * Return the global scheduling priority ranges for the fixed-priority 737 * class in pcpri_t structure. 738 */ 739 static int 740 fx_getclpri(pcpri_t *pcprip) 741 { 742 pcprip->pc_clpmax = fx_dptbl[fx_maxumdpri].fx_globpri; 743 pcprip->pc_clpmin = fx_dptbl[0].fx_globpri; 744 return (0); 745 } 746 747 748 static void 749 fx_nullsys() 750 {} 751 752 753 /* 754 * Get the fixed-priority parameters of the thread pointed to by 755 * fxprocp into the buffer pointed to by fxparmsp. 756 */ 757 static void 758 fx_parmsget(kthread_t *t, void *parmsp) 759 { 760 fxproc_t *fxpp = (fxproc_t *)t->t_cldata; 761 fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp; 762 763 fxkparmsp->fx_upri = fxpp->fx_pri; 764 fxkparmsp->fx_uprilim = fxpp->fx_uprilim; 765 fxkparmsp->fx_tqntm = fxpp->fx_pquantum; 766 } 767 768 769 770 /* 771 * Check the validity of the fixed-priority parameters in the buffer 772 * pointed to by fxparmsp. 773 */ 774 static int 775 fx_parmsin(void *parmsp) 776 { 777 fxparms_t *fxparmsp = (fxparms_t *)parmsp; 778 uint_t cflags; 779 longlong_t ticks; 780 /* 781 * Check validity of parameters. 782 */ 783 784 if ((fxparmsp->fx_uprilim > fx_maxupri || 785 fxparmsp->fx_uprilim < 0) && 786 fxparmsp->fx_uprilim != FX_NOCHANGE) 787 return (EINVAL); 788 789 if ((fxparmsp->fx_upri > fx_maxupri || 790 fxparmsp->fx_upri < 0) && 791 fxparmsp->fx_upri != FX_NOCHANGE) 792 return (EINVAL); 793 794 if ((fxparmsp->fx_tqsecs == 0 && fxparmsp->fx_tqnsecs == 0) || 795 fxparmsp->fx_tqnsecs >= NANOSEC) 796 return (EINVAL); 797 798 cflags = (fxparmsp->fx_upri != FX_NOCHANGE ? FX_DOUPRI : 0); 799 800 if (fxparmsp->fx_uprilim != FX_NOCHANGE) { 801 cflags |= FX_DOUPRILIM; 802 } 803 804 if (fxparmsp->fx_tqnsecs != FX_NOCHANGE) 805 cflags |= FX_DOTQ; 806 807 /* 808 * convert the buffer to kernel format. 809 */ 810 811 if (fxparmsp->fx_tqnsecs >= 0) { 812 if ((ticks = SEC_TO_TICK((longlong_t)fxparmsp->fx_tqsecs) + 813 NSEC_TO_TICK_ROUNDUP(fxparmsp->fx_tqnsecs)) > INT_MAX) 814 return (ERANGE); 815 816 ((fxkparms_t *)fxparmsp)->fx_tqntm = (int)ticks; 817 } else { 818 if ((fxparmsp->fx_tqnsecs != FX_NOCHANGE) && 819 (fxparmsp->fx_tqnsecs != FX_TQINF) && 820 (fxparmsp->fx_tqnsecs != FX_TQDEF)) 821 return (EINVAL); 822 ((fxkparms_t *)fxparmsp)->fx_tqntm = fxparmsp->fx_tqnsecs; 823 } 824 825 ((fxkparms_t *)fxparmsp)->fx_cflags = cflags; 826 827 return (0); 828 } 829 830 831 /* 832 * Check the validity of the fixed-priority parameters in the pc_vaparms_t 833 * structure vaparmsp and put them in the buffer pointed to by fxprmsp. 834 * pc_vaparms_t contains (key, value) pairs of parameter. 835 */ 836 static int 837 fx_vaparmsin(void *prmsp, pc_vaparms_t *vaparmsp) 838 { 839 uint_t secs = 0; 840 uint_t cnt; 841 int nsecs = 0; 842 int priflag, secflag, nsecflag, limflag; 843 longlong_t ticks; 844 fxkparms_t *fxprmsp = (fxkparms_t *)prmsp; 845 pc_vaparm_t *vpp = &vaparmsp->pc_parms[0]; 846 847 848 /* 849 * First check the validity of parameters and convert them 850 * from the user supplied format to the internal format. 851 */ 852 priflag = secflag = nsecflag = limflag = 0; 853 854 fxprmsp->fx_cflags = 0; 855 856 if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT) 857 return (EINVAL); 858 859 for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) { 860 861 switch (vpp->pc_key) { 862 case FX_KY_UPRILIM: 863 if (limflag++) 864 return (EINVAL); 865 fxprmsp->fx_cflags |= FX_DOUPRILIM; 866 fxprmsp->fx_uprilim = (pri_t)vpp->pc_parm; 867 if (fxprmsp->fx_uprilim > fx_maxupri || 868 fxprmsp->fx_uprilim < 0) 869 return (EINVAL); 870 break; 871 872 case FX_KY_UPRI: 873 if (priflag++) 874 return (EINVAL); 875 fxprmsp->fx_cflags |= FX_DOUPRI; 876 fxprmsp->fx_upri = (pri_t)vpp->pc_parm; 877 if (fxprmsp->fx_upri > fx_maxupri || 878 fxprmsp->fx_upri < 0) 879 return (EINVAL); 880 break; 881 882 case FX_KY_TQSECS: 883 if (secflag++) 884 return (EINVAL); 885 fxprmsp->fx_cflags |= FX_DOTQ; 886 secs = (uint_t)vpp->pc_parm; 887 break; 888 889 case FX_KY_TQNSECS: 890 if (nsecflag++) 891 return (EINVAL); 892 fxprmsp->fx_cflags |= FX_DOTQ; 893 nsecs = (int)vpp->pc_parm; 894 break; 895 896 default: 897 return (EINVAL); 898 } 899 } 900 901 if (vaparmsp->pc_vaparmscnt == 0) { 902 /* 903 * Use default parameters. 904 */ 905 fxprmsp->fx_upri = 0; 906 fxprmsp->fx_uprilim = 0; 907 fxprmsp->fx_tqntm = FX_TQDEF; 908 fxprmsp->fx_cflags = FX_DOUPRI | FX_DOUPRILIM | FX_DOTQ; 909 } else if ((fxprmsp->fx_cflags & FX_DOTQ) != 0) { 910 if ((secs == 0 && nsecs == 0) || nsecs >= NANOSEC) 911 return (EINVAL); 912 913 if (nsecs >= 0) { 914 if ((ticks = SEC_TO_TICK((longlong_t)secs) + 915 NSEC_TO_TICK_ROUNDUP(nsecs)) > INT_MAX) 916 return (ERANGE); 917 918 fxprmsp->fx_tqntm = (int)ticks; 919 } else { 920 if (nsecs != FX_TQINF && nsecs != FX_TQDEF) 921 return (EINVAL); 922 fxprmsp->fx_tqntm = nsecs; 923 } 924 } 925 926 return (0); 927 } 928 929 930 /* 931 * Nothing to do here but return success. 932 */ 933 /* ARGSUSED */ 934 static int 935 fx_parmsout(void *parmsp, pc_vaparms_t *vaparmsp) 936 { 937 register fxkparms_t *fxkprmsp = (fxkparms_t *)parmsp; 938 939 if (vaparmsp != NULL) 940 return (0); 941 942 if (fxkprmsp->fx_tqntm < 0) { 943 /* 944 * Quantum field set to special value (e.g. FX_TQINF) 945 */ 946 ((fxparms_t *)fxkprmsp)->fx_tqnsecs = fxkprmsp->fx_tqntm; 947 ((fxparms_t *)fxkprmsp)->fx_tqsecs = 0; 948 949 } else { 950 /* Convert quantum from ticks to seconds-nanoseconds */ 951 952 timestruc_t ts; 953 TICK_TO_TIMESTRUC(fxkprmsp->fx_tqntm, &ts); 954 ((fxparms_t *)fxkprmsp)->fx_tqsecs = ts.tv_sec; 955 ((fxparms_t *)fxkprmsp)->fx_tqnsecs = ts.tv_nsec; 956 } 957 958 return (0); 959 } 960 961 962 /* 963 * Copy all selected fixed-priority class parameters to the user. 964 * The parameters are specified by a key. 965 */ 966 static int 967 fx_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp) 968 { 969 fxkparms_t *fxkprmsp = (fxkparms_t *)prmsp; 970 timestruc_t ts; 971 uint_t cnt; 972 uint_t secs; 973 int nsecs; 974 int priflag, secflag, nsecflag, limflag; 975 pc_vaparm_t *vpp = &vaparmsp->pc_parms[0]; 976 977 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 978 979 priflag = secflag = nsecflag = limflag = 0; 980 981 if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT) 982 return (EINVAL); 983 984 if (fxkprmsp->fx_tqntm < 0) { 985 /* 986 * Quantum field set to special value (e.g. FX_TQINF). 987 */ 988 secs = 0; 989 nsecs = fxkprmsp->fx_tqntm; 990 } else { 991 /* 992 * Convert quantum from ticks to seconds-nanoseconds. 993 */ 994 TICK_TO_TIMESTRUC(fxkprmsp->fx_tqntm, &ts); 995 secs = ts.tv_sec; 996 nsecs = ts.tv_nsec; 997 } 998 999 1000 for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) { 1001 1002 switch (vpp->pc_key) { 1003 case FX_KY_UPRILIM: 1004 if (limflag++) 1005 return (EINVAL); 1006 if (copyout(&fxkprmsp->fx_uprilim, 1007 (void *)(uintptr_t)vpp->pc_parm, sizeof (pri_t))) 1008 return (EFAULT); 1009 break; 1010 1011 case FX_KY_UPRI: 1012 if (priflag++) 1013 return (EINVAL); 1014 if (copyout(&fxkprmsp->fx_upri, 1015 (void *)(uintptr_t)vpp->pc_parm, sizeof (pri_t))) 1016 return (EFAULT); 1017 break; 1018 1019 case FX_KY_TQSECS: 1020 if (secflag++) 1021 return (EINVAL); 1022 if (copyout(&secs, 1023 (void *)(uintptr_t)vpp->pc_parm, sizeof (uint_t))) 1024 return (EFAULT); 1025 break; 1026 1027 case FX_KY_TQNSECS: 1028 if (nsecflag++) 1029 return (EINVAL); 1030 if (copyout(&nsecs, 1031 (void *)(uintptr_t)vpp->pc_parm, sizeof (int))) 1032 return (EFAULT); 1033 break; 1034 1035 default: 1036 return (EINVAL); 1037 } 1038 } 1039 1040 return (0); 1041 } 1042 1043 /* 1044 * Set the scheduling parameters of the thread pointed to by fxprocp 1045 * to those specified in the buffer pointed to by fxparmsp. 1046 */ 1047 /* ARGSUSED */ 1048 static int 1049 fx_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp) 1050 { 1051 char nice; 1052 pri_t reqfxuprilim; 1053 pri_t reqfxupri; 1054 fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp; 1055 fxproc_t *fxpp; 1056 1057 1058 ASSERT(MUTEX_HELD(&(ttoproc(tx))->p_lock)); 1059 1060 thread_lock(tx); 1061 fxpp = (fxproc_t *)tx->t_cldata; 1062 1063 if ((fxkparmsp->fx_cflags & FX_DOUPRILIM) == 0) 1064 reqfxuprilim = fxpp->fx_uprilim; 1065 else 1066 reqfxuprilim = fxkparmsp->fx_uprilim; 1067 1068 /* 1069 * Basic permissions enforced by generic kernel code 1070 * for all classes require that a thread attempting 1071 * to change the scheduling parameters of a target 1072 * thread be privileged or have a real or effective 1073 * UID matching that of the target thread. We are not 1074 * called unless these basic permission checks have 1075 * already passed. The fixed priority class requires in 1076 * addition that the calling thread be privileged if it 1077 * is attempting to raise the pri above its current 1078 * value This may have been checked previously but if our 1079 * caller passed us a non-NULL credential pointer we assume 1080 * it hasn't and we check it here. 1081 */ 1082 1083 if ((reqpcredp != NULL) && 1084 (reqfxuprilim > fxpp->fx_uprilim || 1085 ((fxkparmsp->fx_cflags & FX_DOTQ) != 0)) && 1086 secpolicy_setpriority(reqpcredp) != 0) { 1087 thread_unlock(tx); 1088 return (EPERM); 1089 } 1090 1091 FX_ADJUST_PRI(reqfxuprilim); 1092 1093 if ((fxkparmsp->fx_cflags & FX_DOUPRI) == 0) 1094 reqfxupri = fxpp->fx_pri; 1095 else 1096 reqfxupri = fxkparmsp->fx_upri; 1097 1098 1099 /* 1100 * Make sure the user priority doesn't exceed the upri limit. 1101 */ 1102 if (reqfxupri > reqfxuprilim) 1103 reqfxupri = reqfxuprilim; 1104 1105 /* 1106 * Set fx_nice to the nice value corresponding to the user 1107 * priority we are setting. Note that setting the nice field 1108 * of the parameter struct won't affect upri or nice. 1109 */ 1110 1111 nice = NZERO - (reqfxupri * NZERO) / fx_maxupri; 1112 1113 if (nice > NZERO) 1114 nice = NZERO; 1115 1116 fxpp->fx_uprilim = reqfxuprilim; 1117 fxpp->fx_pri = reqfxupri; 1118 1119 if (fxkparmsp->fx_tqntm == FX_TQINF) 1120 fxpp->fx_pquantum = FX_TQINF; 1121 else if (fxkparmsp->fx_tqntm == FX_TQDEF) 1122 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 1123 else if ((fxkparmsp->fx_cflags & FX_DOTQ) != 0) 1124 fxpp->fx_pquantum = fxkparmsp->fx_tqntm; 1125 1126 fxpp->fx_nice = nice; 1127 1128 fx_change_priority(tx, fxpp); 1129 thread_unlock(tx); 1130 return (0); 1131 } 1132 1133 1134 /* 1135 * Return the global scheduling priority that would be assigned 1136 * to a thread entering the fixed-priority class with the fx_upri. 1137 */ 1138 static pri_t 1139 fx_globpri(kthread_t *t) 1140 { 1141 fxproc_t *fxpp; 1142 1143 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 1144 1145 fxpp = (fxproc_t *)t->t_cldata; 1146 return (fx_dptbl[fxpp->fx_pri].fx_globpri); 1147 1148 } 1149 1150 /* 1151 * Arrange for thread to be placed in appropriate location 1152 * on dispatcher queue. 1153 * 1154 * This is called with the current thread in TS_ONPROC and locked. 1155 */ 1156 static void 1157 fx_preempt(kthread_t *t) 1158 { 1159 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1160 #ifdef KSLICE 1161 extern int kslice; 1162 #endif 1163 1164 ASSERT(t == curthread); 1165 ASSERT(THREAD_LOCK_HELD(curthread)); 1166 1167 /* 1168 * Check to see if we're doing "preemption control" here. If 1169 * we are, and if the user has requested that this thread not 1170 * be preempted, and if preemptions haven't been put off for 1171 * too long, let the preemption happen here but try to make 1172 * sure the thread is rescheduled as soon as possible. We do 1173 * this by putting it on the front of the highest priority run 1174 * queue in the FX class. If the preemption has been put off 1175 * for too long, clear the "nopreempt" bit and let the thread 1176 * be preempted. 1177 */ 1178 if (t->t_schedctl && schedctl_get_nopreempt(t)) { 1179 if (fxpp->fx_pquantum == FX_TQINF || 1180 fxpp->fx_timeleft > -SC_MAX_TICKS) { 1181 DTRACE_SCHED1(schedctl__nopreempt, kthread_t *, t); 1182 schedctl_set_yield(t, 1); 1183 setfrontdq(t); 1184 return; 1185 } else { 1186 schedctl_set_nopreempt(t, 0); 1187 DTRACE_SCHED1(schedctl__preempt, kthread_t *, t); 1188 TNF_PROBE_2(schedctl_preempt, "schedctl FX fx_preempt", 1189 /* CSTYLED */, tnf_pid, pid, ttoproc(t)->p_pid, 1190 tnf_lwpid, lwpid, t->t_tid); 1191 /* 1192 * Fall through and be preempted below. 1193 */ 1194 } 1195 } 1196 1197 if (FX_HAS_CB(fxpp)) { 1198 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1199 pri_t newpri = fxpp->fx_pri; 1200 FX_CB_PREEMPT(FX_CALLB(fxpp), fxpp->fx_cookie, 1201 &new_quantum, &newpri); 1202 FX_ADJUST_QUANTUM(new_quantum); 1203 if ((int)new_quantum != fxpp->fx_pquantum) { 1204 fxpp->fx_pquantum = (int)new_quantum; 1205 fxpp->fx_timeleft = fxpp->fx_pquantum; 1206 } 1207 FX_ADJUST_PRI(newpri); 1208 fxpp->fx_pri = newpri; 1209 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1210 } 1211 1212 if ((fxpp->fx_flags & (FXBACKQ)) == FXBACKQ) { 1213 fxpp->fx_timeleft = fxpp->fx_pquantum; 1214 fxpp->fx_flags &= ~FXBACKQ; 1215 setbackdq(t); 1216 } else { 1217 #ifdef KSLICE 1218 if (kslice) 1219 setbackdq(t); 1220 else 1221 #endif 1222 setfrontdq(t); 1223 } 1224 } 1225 1226 static void 1227 fx_setrun(kthread_t *t) 1228 { 1229 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1230 1231 ASSERT(THREAD_LOCK_HELD(t)); /* t should be in transition */ 1232 fxpp->fx_flags &= ~FXBACKQ; 1233 1234 if (t->t_disp_time != lbolt) 1235 setbackdq(t); 1236 else 1237 setfrontdq(t); 1238 } 1239 1240 1241 /* 1242 * Prepare thread for sleep. We reset the thread priority so it will 1243 * run at the kernel priority level when it wakes up. 1244 */ 1245 static void 1246 fx_sleep(kthread_t *t) 1247 { 1248 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1249 1250 ASSERT(t == curthread); 1251 ASSERT(THREAD_LOCK_HELD(t)); 1252 1253 if (FX_HAS_CB(fxpp)) { 1254 FX_CB_SLEEP(FX_CALLB(fxpp), fxpp->fx_cookie); 1255 } 1256 t->t_stime = lbolt; /* time stamp for the swapper */ 1257 } 1258 1259 1260 /* 1261 * Return Values: 1262 * 1263 * -1 if the thread is loaded or is not eligible to be swapped in. 1264 * 1265 * FX and RT threads are designed so that they don't swapout; however, 1266 * it is possible that while the thread is swapped out and in another class, it 1267 * can be changed to FX or RT. Since these threads should be swapped in 1268 * as soon as they're runnable, rt_swapin returns SHRT_MAX, and fx_swapin 1269 * returns SHRT_MAX - 1, so that it gives deference to any swapped out 1270 * RT threads. 1271 */ 1272 /* ARGSUSED */ 1273 static pri_t 1274 fx_swapin(kthread_t *t, int flags) 1275 { 1276 pri_t tpri = -1; 1277 1278 ASSERT(THREAD_LOCK_HELD(t)); 1279 1280 if (t->t_state == TS_RUN && (t->t_schedflag & TS_LOAD) == 0) { 1281 tpri = (pri_t)SHRT_MAX - 1; 1282 } 1283 1284 return (tpri); 1285 } 1286 1287 /* 1288 * Return Values 1289 * -1 if the thread isn't loaded or is not eligible to be swapped out. 1290 */ 1291 /* ARGSUSED */ 1292 static pri_t 1293 fx_swapout(kthread_t *t, int flags) 1294 { 1295 ASSERT(THREAD_LOCK_HELD(t)); 1296 1297 return (-1); 1298 1299 } 1300 1301 /* ARGSUSED */ 1302 static void 1303 fx_stop(kthread_t *t, int why, int what) 1304 { 1305 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1306 1307 ASSERT(THREAD_LOCK_HELD(t)); 1308 1309 if (FX_HAS_CB(fxpp)) { 1310 FX_CB_STOP(FX_CALLB(fxpp), fxpp->fx_cookie); 1311 } 1312 } 1313 1314 /* 1315 * Check for time slice expiration. If time slice has expired 1316 * set runrun to cause preemption. 1317 */ 1318 static void 1319 fx_tick(kthread_t *t) 1320 { 1321 fxproc_t *fxpp; 1322 1323 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock)); 1324 1325 thread_lock(t); 1326 1327 fxpp = (fxproc_t *)(t->t_cldata); 1328 1329 if (FX_HAS_CB(fxpp)) { 1330 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1331 pri_t newpri = fxpp->fx_pri; 1332 FX_CB_TICK(FX_CALLB(fxpp), fxpp->fx_cookie, 1333 &new_quantum, &newpri); 1334 FX_ADJUST_QUANTUM(new_quantum); 1335 if ((int)new_quantum != fxpp->fx_pquantum) { 1336 fxpp->fx_pquantum = (int)new_quantum; 1337 fxpp->fx_timeleft = fxpp->fx_pquantum; 1338 } 1339 FX_ADJUST_PRI(newpri); 1340 if (newpri != fxpp->fx_pri) { 1341 fxpp->fx_pri = newpri; 1342 fx_change_priority(t, fxpp); 1343 } 1344 } 1345 if ((fxpp->fx_pquantum != FX_TQINF) && 1346 (--fxpp->fx_timeleft <= 0)) { 1347 pri_t new_pri; 1348 1349 /* 1350 * If we're doing preemption control and trying to 1351 * avoid preempting this thread, just note that 1352 * the thread should yield soon and let it keep 1353 * running (unless it's been a while). 1354 */ 1355 if (t->t_schedctl && schedctl_get_nopreempt(t)) { 1356 if (fxpp->fx_timeleft > -SC_MAX_TICKS) { 1357 DTRACE_SCHED1(schedctl__nopreempt, 1358 kthread_t *, t); 1359 schedctl_set_yield(t, 1); 1360 thread_unlock_nopreempt(t); 1361 return; 1362 } 1363 TNF_PROBE_2(schedctl_failsafe, 1364 "schedctl FX fx_tick", /* CSTYLED */, 1365 tnf_pid, pid, ttoproc(t)->p_pid, 1366 tnf_lwpid, lwpid, t->t_tid); 1367 } 1368 new_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; 1369 ASSERT(new_pri >= 0 && new_pri <= fx_maxglobpri); 1370 /* 1371 * When the priority of a thread is changed, 1372 * it may be necessary to adjust its position 1373 * on a sleep queue or dispatch queue. Even 1374 * when the priority is not changed, we need 1375 * to preserve round robin on dispatch queue. 1376 * The function thread_change_pri accomplishes 1377 * this. 1378 */ 1379 if (thread_change_pri(t, new_pri, 0)) { 1380 fxpp->fx_timeleft = fxpp->fx_pquantum; 1381 } else { 1382 fxpp->fx_flags |= FXBACKQ; 1383 cpu_surrender(t); 1384 } 1385 } else if (t->t_pri < t->t_disp_queue->disp_maxrunpri) { 1386 fxpp->fx_flags |= FXBACKQ; 1387 cpu_surrender(t); 1388 } 1389 1390 thread_unlock_nopreempt(t); /* clock thread can't be preempted */ 1391 } 1392 1393 1394 static void 1395 fx_trapret(kthread_t *t) 1396 { 1397 cpu_t *cp = CPU; 1398 1399 ASSERT(THREAD_LOCK_HELD(t)); 1400 ASSERT(t == curthread); 1401 ASSERT(cp->cpu_dispthread == t); 1402 ASSERT(t->t_state == TS_ONPROC); 1403 } 1404 1405 1406 /* 1407 * Processes waking up go to the back of their queue. 1408 */ 1409 static void 1410 fx_wakeup(kthread_t *t) 1411 { 1412 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1413 1414 ASSERT(THREAD_LOCK_HELD(t)); 1415 1416 t->t_stime = lbolt; /* time stamp for the swapper */ 1417 if (FX_HAS_CB(fxpp)) { 1418 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1419 pri_t newpri = fxpp->fx_pri; 1420 FX_CB_WAKEUP(FX_CALLB(fxpp), fxpp->fx_cookie, 1421 &new_quantum, &newpri); 1422 FX_ADJUST_QUANTUM(new_quantum); 1423 if ((int)new_quantum != fxpp->fx_pquantum) { 1424 fxpp->fx_pquantum = (int)new_quantum; 1425 fxpp->fx_timeleft = fxpp->fx_pquantum; 1426 } 1427 1428 FX_ADJUST_PRI(newpri); 1429 if (newpri != fxpp->fx_pri) { 1430 fxpp->fx_pri = newpri; 1431 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1432 } 1433 } 1434 1435 fxpp->fx_flags &= ~FXBACKQ; 1436 1437 if (t->t_disp_time != lbolt) 1438 setbackdq(t); 1439 else 1440 setfrontdq(t); 1441 } 1442 1443 1444 /* 1445 * When a thread yields, put it on the back of the run queue. 1446 */ 1447 static void 1448 fx_yield(kthread_t *t) 1449 { 1450 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1451 1452 ASSERT(t == curthread); 1453 ASSERT(THREAD_LOCK_HELD(t)); 1454 1455 if (FX_HAS_CB(fxpp)) { 1456 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1457 pri_t newpri = fxpp->fx_pri; 1458 FX_CB_PREEMPT(FX_CALLB(fxpp), fxpp->fx_cookie, 1459 &new_quantum, &newpri); 1460 FX_ADJUST_QUANTUM(new_quantum); 1461 if ((int)new_quantum != fxpp->fx_pquantum) { 1462 fxpp->fx_pquantum = (int)new_quantum; 1463 fxpp->fx_timeleft = fxpp->fx_pquantum; 1464 } 1465 FX_ADJUST_PRI(newpri); 1466 fxpp->fx_pri = newpri; 1467 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1468 } 1469 1470 /* 1471 * Clear the preemption control "yield" bit since the user is 1472 * doing a yield. 1473 */ 1474 if (t->t_schedctl) 1475 schedctl_set_yield(t, 0); 1476 1477 if (fxpp->fx_timeleft <= 0) { 1478 /* 1479 * Time slice was artificially extended to avoid 1480 * preemption, so pretend we're preempting it now. 1481 */ 1482 DTRACE_SCHED1(schedctl__yield, int, -fxpp->fx_timeleft); 1483 fxpp->fx_timeleft = fxpp->fx_pquantum; 1484 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1485 ASSERT(t->t_pri >= 0 && t->t_pri <= fx_maxglobpri); 1486 } 1487 1488 fxpp->fx_flags &= ~FXBACKQ; 1489 setbackdq(t); 1490 } 1491 1492 1493 /* 1494 * Increment the nice value of the specified thread by incr and 1495 * return the new value in *retvalp. 1496 */ 1497 static int 1498 fx_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp) 1499 { 1500 int newnice; 1501 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1502 fxkparms_t fxkparms; 1503 1504 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock)); 1505 1506 /* If there's no change to priority, just return current setting */ 1507 if (incr == 0) { 1508 if (retvalp) { 1509 *retvalp = fxpp->fx_nice - NZERO; 1510 } 1511 return (0); 1512 } 1513 1514 if ((incr < 0 || incr > 2 * NZERO) && 1515 secpolicy_setpriority(cr) != 0) 1516 return (EPERM); 1517 1518 /* 1519 * Specifying a nice increment greater than the upper limit of 1520 * 2 * NZERO - 1 will result in the thread's nice value being 1521 * set to the upper limit. We check for this before computing 1522 * the new value because otherwise we could get overflow 1523 * if a privileged user specified some ridiculous increment. 1524 */ 1525 if (incr > 2 * NZERO - 1) 1526 incr = 2 * NZERO - 1; 1527 1528 newnice = fxpp->fx_nice + incr; 1529 if (newnice > NZERO) 1530 newnice = NZERO; 1531 else if (newnice < 0) 1532 newnice = 0; 1533 1534 fxkparms.fx_uprilim = fxkparms.fx_upri = 1535 -((newnice - NZERO) * fx_maxupri) / NZERO; 1536 1537 fxkparms.fx_cflags = FX_DOUPRILIM | FX_DOUPRI; 1538 1539 fxkparms.fx_tqntm = FX_TQDEF; 1540 1541 /* 1542 * Reset the uprilim and upri values of the thread. Adjust 1543 * time quantum accordingly. 1544 */ 1545 1546 (void) fx_parmsset(t, (void *)&fxkparms, (id_t)0, (cred_t *)NULL); 1547 1548 /* 1549 * Although fx_parmsset already reset fx_nice it may 1550 * not have been set to precisely the value calculated above 1551 * because fx_parmsset determines the nice value from the 1552 * user priority and we may have truncated during the integer 1553 * conversion from nice value to user priority and back. 1554 * We reset fx_nice to the value we calculated above. 1555 */ 1556 fxpp->fx_nice = (char)newnice; 1557 1558 if (retvalp) 1559 *retvalp = newnice - NZERO; 1560 1561 return (0); 1562 } 1563 1564 static void 1565 fx_change_priority(kthread_t *t, fxproc_t *fxpp) 1566 { 1567 pri_t new_pri; 1568 1569 ASSERT(THREAD_LOCK_HELD(t)); 1570 new_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; 1571 ASSERT(new_pri >= 0 && new_pri <= fx_maxglobpri); 1572 if (t == curthread || t->t_state == TS_ONPROC) { 1573 /* curthread is always onproc */ 1574 cpu_t *cp = t->t_disp_queue->disp_cpu; 1575 THREAD_CHANGE_PRI(t, new_pri); 1576 if (t == cp->cpu_dispthread) 1577 cp->cpu_dispatch_pri = DISP_PRIO(t); 1578 if (DISP_MUST_SURRENDER(t)) { 1579 fxpp->fx_flags |= FXBACKQ; 1580 cpu_surrender(t); 1581 } else { 1582 fxpp->fx_timeleft = fxpp->fx_pquantum; 1583 } 1584 } else { 1585 /* 1586 * When the priority of a thread is changed, 1587 * it may be necessary to adjust its position 1588 * on a sleep queue or dispatch queue. 1589 * The function thread_change_pri accomplishes 1590 * this. 1591 */ 1592 if (thread_change_pri(t, new_pri, 0)) { 1593 /* 1594 * The thread was on a run queue. Reset 1595 * its CPU timeleft from the quantum 1596 * associated with the new priority. 1597 */ 1598 fxpp->fx_timeleft = fxpp->fx_pquantum; 1599 } else { 1600 fxpp->fx_flags |= FXBACKQ; 1601 } 1602 } 1603 } 1604 1605 static int 1606 fx_alloc(void **p, int flag) 1607 { 1608 void *bufp; 1609 1610 bufp = kmem_alloc(sizeof (fxproc_t), flag); 1611 if (bufp == NULL) { 1612 return (ENOMEM); 1613 } else { 1614 *p = bufp; 1615 return (0); 1616 } 1617 } 1618 1619 static void 1620 fx_free(void *bufp) 1621 { 1622 if (bufp) 1623 kmem_free(bufp, sizeof (fxproc_t)); 1624 } 1625 1626 /* 1627 * Release the callback list mutex after successful lookup 1628 */ 1629 void 1630 fx_list_release(fxproc_t *fxpp) 1631 { 1632 int index = FX_CB_LIST_HASH(fxpp->fx_ktid); 1633 kmutex_t *lockp = &fx_cb_list_lock[index]; 1634 mutex_exit(lockp); 1635 } 1636 1637 fxproc_t * 1638 fx_list_lookup(kt_did_t ktid) 1639 { 1640 int index = FX_CB_LIST_HASH(ktid); 1641 kmutex_t *lockp = &fx_cb_list_lock[index]; 1642 fxproc_t *fxpp; 1643 1644 mutex_enter(lockp); 1645 1646 for (fxpp = fx_cb_plisthead[index].fx_cb_next; 1647 fxpp != &fx_cb_plisthead[index]; fxpp = fxpp->fx_cb_next) { 1648 if (fxpp->fx_tp->t_cid == fx_cid && fxpp->fx_ktid == ktid && 1649 fxpp->fx_callback != NULL) { 1650 /* 1651 * The caller is responsible for calling 1652 * fx_list_release to drop the lock upon 1653 * successful lookup 1654 */ 1655 return (fxpp); 1656 } 1657 } 1658 mutex_exit(lockp); 1659 return ((fxproc_t *)NULL); 1660 } 1661 1662 1663 /* 1664 * register a callback set of routines for current thread 1665 * thread should already be in FX class 1666 */ 1667 int 1668 fx_register_callbacks(fx_callbacks_t *fx_callback, fx_cookie_t cookie, 1669 pri_t pri, clock_t quantum) 1670 { 1671 1672 fxproc_t *fxpp; 1673 1674 if (fx_callback == NULL) 1675 return (EINVAL); 1676 1677 if (secpolicy_dispadm(CRED()) != 0) 1678 return (EPERM); 1679 1680 if (FX_CB_VERSION(fx_callback) != FX_CALLB_REV) 1681 return (EINVAL); 1682 1683 if (!FX_ISVALID(pri, quantum)) 1684 return (EINVAL); 1685 1686 thread_lock(curthread); /* get dispatcher lock on thread */ 1687 1688 if (curthread->t_cid != fx_cid) { 1689 thread_unlock(curthread); 1690 return (EINVAL); 1691 } 1692 1693 fxpp = (fxproc_t *)(curthread->t_cldata); 1694 ASSERT(fxpp != NULL); 1695 if (FX_HAS_CB(fxpp)) { 1696 thread_unlock(curthread); 1697 return (EINVAL); 1698 } 1699 1700 fxpp->fx_callback = fx_callback; 1701 fxpp->fx_cookie = cookie; 1702 1703 if (pri != FX_CB_NOCHANGE) { 1704 fxpp->fx_pri = pri; 1705 FX_ADJUST_PRI(fxpp->fx_pri); 1706 if (quantum == FX_TQDEF) { 1707 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 1708 } else if (quantum == FX_TQINF) { 1709 fxpp->fx_pquantum = FX_TQINF; 1710 } else if (quantum != FX_NOCHANGE) { 1711 FX_ADJUST_QUANTUM(quantum); 1712 fxpp->fx_pquantum = quantum; 1713 } 1714 } else if (quantum != FX_NOCHANGE && quantum != FX_TQDEF) { 1715 if (quantum == FX_TQINF) 1716 fxpp->fx_pquantum = FX_TQINF; 1717 else { 1718 FX_ADJUST_QUANTUM(quantum); 1719 fxpp->fx_pquantum = quantum; 1720 } 1721 } 1722 1723 fxpp->fx_ktid = ddi_get_kt_did(); 1724 1725 fx_change_priority(curthread, fxpp); 1726 1727 thread_unlock(curthread); 1728 1729 /* 1730 * Link new structure into fxproc list. 1731 */ 1732 FX_CB_LIST_INSERT(fxpp); 1733 return (0); 1734 } 1735 1736 /* unregister a callback set of routines for current thread */ 1737 int 1738 fx_unregister_callbacks() 1739 { 1740 fxproc_t *fxpp; 1741 1742 if ((fxpp = fx_list_lookup(ddi_get_kt_did())) == NULL) { 1743 /* 1744 * did not have a registered callback; 1745 */ 1746 return (EINVAL); 1747 } 1748 1749 thread_lock(fxpp->fx_tp); 1750 fxpp->fx_callback = NULL; 1751 fxpp->fx_cookie = NULL; 1752 thread_unlock(fxpp->fx_tp); 1753 fx_list_release(fxpp); 1754 1755 FX_CB_LIST_DELETE(fxpp); 1756 return (0); 1757 } 1758 1759 /* 1760 * modify priority and/or quantum value of a thread with callback 1761 */ 1762 int 1763 fx_modify_priority(kt_did_t ktid, clock_t quantum, pri_t pri) 1764 { 1765 fxproc_t *fxpp; 1766 1767 if (!FX_ISVALID(pri, quantum)) 1768 return (EINVAL); 1769 1770 if ((fxpp = fx_list_lookup(ktid)) == NULL) { 1771 /* 1772 * either thread had exited or did not have a registered 1773 * callback; 1774 */ 1775 return (ESRCH); 1776 } 1777 1778 thread_lock(fxpp->fx_tp); 1779 1780 if (pri != FX_CB_NOCHANGE) { 1781 fxpp->fx_pri = pri; 1782 FX_ADJUST_PRI(fxpp->fx_pri); 1783 if (quantum == FX_TQDEF) { 1784 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 1785 } else if (quantum == FX_TQINF) { 1786 fxpp->fx_pquantum = FX_TQINF; 1787 } else if (quantum != FX_NOCHANGE) { 1788 FX_ADJUST_QUANTUM(quantum); 1789 fxpp->fx_pquantum = quantum; 1790 } 1791 } else if (quantum != FX_NOCHANGE && quantum != FX_TQDEF) { 1792 if (quantum == FX_TQINF) { 1793 fxpp->fx_pquantum = FX_TQINF; 1794 } else { 1795 FX_ADJUST_QUANTUM(quantum); 1796 fxpp->fx_pquantum = quantum; 1797 } 1798 } 1799 1800 fx_change_priority(fxpp->fx_tp, fxpp); 1801 1802 thread_unlock(fxpp->fx_tp); 1803 fx_list_release(fxpp); 1804 return (0); 1805 } 1806 1807 1808 /* 1809 * return an iblock cookie for mutex initialization to be used in callbacks 1810 */ 1811 void * 1812 fx_get_mutex_cookie() 1813 { 1814 return ((void *)(uintptr_t)__ipltospl(DISP_LEVEL)); 1815 } 1816 1817 /* 1818 * return maximum relative priority 1819 */ 1820 pri_t 1821 fx_get_maxpri() 1822 { 1823 return (fx_maxumdpri); 1824 } 1825