1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/sysmacros.h> 33 #include <sys/signal.h> 34 #include <sys/user.h> 35 #include <sys/systm.h> 36 #include <sys/sysinfo.h> 37 #include <sys/var.h> 38 #include <sys/errno.h> 39 #include <sys/cmn_err.h> 40 #include <sys/debug.h> 41 #include <sys/inline.h> 42 #include <sys/disp.h> 43 #include <sys/class.h> 44 #include <sys/bitmap.h> 45 #include <sys/kmem.h> 46 #include <sys/cpuvar.h> 47 #include <sys/vtrace.h> 48 #include <sys/tnf.h> 49 #include <sys/cpupart.h> 50 #include <sys/lgrp.h> 51 #include <sys/pg.h> 52 #include <sys/cmt.h> 53 #include <sys/bitset.h> 54 #include <sys/schedctl.h> 55 #include <sys/atomic.h> 56 #include <sys/dtrace.h> 57 #include <sys/sdt.h> 58 #include <sys/archsystm.h> 59 60 #include <vm/as.h> 61 62 #define BOUND_CPU 0x1 63 #define BOUND_PARTITION 0x2 64 #define BOUND_INTR 0x4 65 66 /* Dispatch queue allocation structure and functions */ 67 struct disp_queue_info { 68 disp_t *dp; 69 dispq_t *olddispq; 70 dispq_t *newdispq; 71 ulong_t *olddqactmap; 72 ulong_t *newdqactmap; 73 int oldnglobpris; 74 }; 75 static void disp_dq_alloc(struct disp_queue_info *dptr, int numpris, 76 disp_t *dp); 77 static void disp_dq_assign(struct disp_queue_info *dptr, int numpris); 78 static void disp_dq_free(struct disp_queue_info *dptr); 79 80 /* platform-specific routine to call when processor is idle */ 81 static void generic_idle_cpu(); 82 void (*idle_cpu)() = generic_idle_cpu; 83 84 /* routines invoked when a CPU enters/exits the idle loop */ 85 static void idle_enter(); 86 static void idle_exit(); 87 88 /* platform-specific routine to call when thread is enqueued */ 89 static void generic_enq_thread(cpu_t *, int); 90 void (*disp_enq_thread)(cpu_t *, int) = generic_enq_thread; 91 92 pri_t kpreemptpri; /* priority where kernel preemption applies */ 93 pri_t upreemptpri = 0; /* priority where normal preemption applies */ 94 pri_t intr_pri; /* interrupt thread priority base level */ 95 96 #define KPQPRI -1 /* pri where cpu affinity is dropped for kpq */ 97 pri_t kpqpri = KPQPRI; /* can be set in /etc/system */ 98 disp_t cpu0_disp; /* boot CPU's dispatch queue */ 99 disp_lock_t swapped_lock; /* lock swapped threads and swap queue */ 100 int nswapped; /* total number of swapped threads */ 101 void disp_swapped_enq(kthread_t *tp); 102 static void disp_swapped_setrun(kthread_t *tp); 103 static void cpu_resched(cpu_t *cp, pri_t tpri); 104 105 /* 106 * If this is set, only interrupt threads will cause kernel preemptions. 107 * This is done by changing the value of kpreemptpri. kpreemptpri 108 * will either be the max sysclass pri + 1 or the min interrupt pri. 109 */ 110 int only_intr_kpreempt; 111 112 extern void set_idle_cpu(int cpun); 113 extern void unset_idle_cpu(int cpun); 114 static void setkpdq(kthread_t *tp, int borf); 115 #define SETKP_BACK 0 116 #define SETKP_FRONT 1 117 /* 118 * Parameter that determines how recently a thread must have run 119 * on the CPU to be considered loosely-bound to that CPU to reduce 120 * cold cache effects. The interval is in hertz. 121 */ 122 #define RECHOOSE_INTERVAL 3 123 int rechoose_interval = RECHOOSE_INTERVAL; 124 static cpu_t *cpu_choose(kthread_t *, pri_t); 125 126 /* 127 * Parameter that determines how long (in nanoseconds) a thread must 128 * be sitting on a run queue before it can be stolen by another CPU 129 * to reduce migrations. The interval is in nanoseconds. 130 * 131 * The nosteal_nsec should be set by platform code cmp_set_nosteal_interval() 132 * to an appropriate value. nosteal_nsec is set to NOSTEAL_UNINITIALIZED 133 * here indicating it is uninitiallized. 134 * Setting nosteal_nsec to 0 effectively disables the nosteal 'protection'. 135 * 136 */ 137 #define NOSTEAL_UNINITIALIZED (-1) 138 hrtime_t nosteal_nsec = NOSTEAL_UNINITIALIZED; 139 extern void cmp_set_nosteal_interval(void); 140 141 id_t defaultcid; /* system "default" class; see dispadmin(1M) */ 142 143 disp_lock_t transition_lock; /* lock on transitioning threads */ 144 disp_lock_t stop_lock; /* lock on stopped threads */ 145 146 static void cpu_dispqalloc(int numpris); 147 148 /* 149 * This gets returned by disp_getwork/disp_getbest if we couldn't steal 150 * a thread because it was sitting on its run queue for a very short 151 * period of time. 152 */ 153 #define T_DONTSTEAL (kthread_t *)(-1) /* returned by disp_getwork/getbest */ 154 155 static kthread_t *disp_getwork(cpu_t *to); 156 static kthread_t *disp_getbest(disp_t *from); 157 static kthread_t *disp_ratify(kthread_t *tp, disp_t *kpq); 158 159 void swtch_to(kthread_t *); 160 161 /* 162 * dispatcher and scheduler initialization 163 */ 164 165 /* 166 * disp_setup - Common code to calculate and allocate dispatcher 167 * variables and structures based on the maximum priority. 168 */ 169 static void 170 disp_setup(pri_t maxglobpri, pri_t oldnglobpris) 171 { 172 pri_t newnglobpris; 173 174 ASSERT(MUTEX_HELD(&cpu_lock)); 175 176 newnglobpris = maxglobpri + 1 + LOCK_LEVEL; 177 178 if (newnglobpris > oldnglobpris) { 179 /* 180 * Allocate new kp queues for each CPU partition. 181 */ 182 cpupart_kpqalloc(newnglobpris); 183 184 /* 185 * Allocate new dispatch queues for each CPU. 186 */ 187 cpu_dispqalloc(newnglobpris); 188 189 /* 190 * compute new interrupt thread base priority 191 */ 192 intr_pri = maxglobpri; 193 if (only_intr_kpreempt) { 194 kpreemptpri = intr_pri + 1; 195 if (kpqpri == KPQPRI) 196 kpqpri = kpreemptpri; 197 } 198 v.v_nglobpris = newnglobpris; 199 } 200 } 201 202 /* 203 * dispinit - Called to initialize all loaded classes and the 204 * dispatcher framework. 205 */ 206 void 207 dispinit(void) 208 { 209 id_t cid; 210 pri_t maxglobpri; 211 pri_t cl_maxglobpri; 212 213 maxglobpri = -1; 214 215 /* 216 * Initialize transition lock, which will always be set. 217 */ 218 DISP_LOCK_INIT(&transition_lock); 219 disp_lock_enter_high(&transition_lock); 220 DISP_LOCK_INIT(&stop_lock); 221 222 mutex_enter(&cpu_lock); 223 CPU->cpu_disp->disp_maxrunpri = -1; 224 CPU->cpu_disp->disp_max_unbound_pri = -1; 225 226 /* 227 * Initialize the default CPU partition. 228 */ 229 cpupart_initialize_default(); 230 /* 231 * Call the class specific initialization functions for 232 * all pre-installed schedulers. 233 * 234 * We pass the size of a class specific parameter 235 * buffer to each of the initialization functions 236 * to try to catch problems with backward compatibility 237 * of class modules. 238 * 239 * For example a new class module running on an old system 240 * which didn't provide sufficiently large parameter buffers 241 * would be bad news. Class initialization modules can check for 242 * this and take action if they detect a problem. 243 */ 244 245 for (cid = 0; cid < nclass; cid++) { 246 sclass_t *sc; 247 248 sc = &sclass[cid]; 249 if (SCHED_INSTALLED(sc)) { 250 cl_maxglobpri = sc->cl_init(cid, PC_CLPARMSZ, 251 &sc->cl_funcs); 252 if (cl_maxglobpri > maxglobpri) 253 maxglobpri = cl_maxglobpri; 254 } 255 } 256 kpreemptpri = (pri_t)v.v_maxsyspri + 1; 257 if (kpqpri == KPQPRI) 258 kpqpri = kpreemptpri; 259 260 ASSERT(maxglobpri >= 0); 261 disp_setup(maxglobpri, 0); 262 263 mutex_exit(&cpu_lock); 264 265 /* 266 * Platform specific sticky scheduler setup. 267 */ 268 if (nosteal_nsec == NOSTEAL_UNINITIALIZED) 269 cmp_set_nosteal_interval(); 270 271 /* 272 * Get the default class ID; this may be later modified via 273 * dispadmin(1M). This will load the class (normally TS) and that will 274 * call disp_add(), which is why we had to drop cpu_lock first. 275 */ 276 if (getcid(defaultclass, &defaultcid) != 0) { 277 cmn_err(CE_PANIC, "Couldn't load default scheduling class '%s'", 278 defaultclass); 279 } 280 } 281 282 /* 283 * disp_add - Called with class pointer to initialize the dispatcher 284 * for a newly loaded class. 285 */ 286 void 287 disp_add(sclass_t *clp) 288 { 289 pri_t maxglobpri; 290 pri_t cl_maxglobpri; 291 292 mutex_enter(&cpu_lock); 293 /* 294 * Initialize the scheduler class. 295 */ 296 maxglobpri = (pri_t)(v.v_nglobpris - LOCK_LEVEL - 1); 297 cl_maxglobpri = clp->cl_init(clp - sclass, PC_CLPARMSZ, &clp->cl_funcs); 298 if (cl_maxglobpri > maxglobpri) 299 maxglobpri = cl_maxglobpri; 300 301 /* 302 * Save old queue information. Since we're initializing a 303 * new scheduling class which has just been loaded, then 304 * the size of the dispq may have changed. We need to handle 305 * that here. 306 */ 307 disp_setup(maxglobpri, v.v_nglobpris); 308 309 mutex_exit(&cpu_lock); 310 } 311 312 313 /* 314 * For each CPU, allocate new dispatch queues 315 * with the stated number of priorities. 316 */ 317 static void 318 cpu_dispqalloc(int numpris) 319 { 320 cpu_t *cpup; 321 struct disp_queue_info *disp_mem; 322 int i, num; 323 324 ASSERT(MUTEX_HELD(&cpu_lock)); 325 326 disp_mem = kmem_zalloc(NCPU * 327 sizeof (struct disp_queue_info), KM_SLEEP); 328 329 /* 330 * This routine must allocate all of the memory before stopping 331 * the cpus because it must not sleep in kmem_alloc while the 332 * CPUs are stopped. Locks they hold will not be freed until they 333 * are restarted. 334 */ 335 i = 0; 336 cpup = cpu_list; 337 do { 338 disp_dq_alloc(&disp_mem[i], numpris, cpup->cpu_disp); 339 i++; 340 cpup = cpup->cpu_next; 341 } while (cpup != cpu_list); 342 num = i; 343 344 pause_cpus(NULL); 345 for (i = 0; i < num; i++) 346 disp_dq_assign(&disp_mem[i], numpris); 347 start_cpus(); 348 349 /* 350 * I must free all of the memory after starting the cpus because 351 * I can not risk sleeping in kmem_free while the cpus are stopped. 352 */ 353 for (i = 0; i < num; i++) 354 disp_dq_free(&disp_mem[i]); 355 356 kmem_free(disp_mem, NCPU * sizeof (struct disp_queue_info)); 357 } 358 359 static void 360 disp_dq_alloc(struct disp_queue_info *dptr, int numpris, disp_t *dp) 361 { 362 dptr->newdispq = kmem_zalloc(numpris * sizeof (dispq_t), KM_SLEEP); 363 dptr->newdqactmap = kmem_zalloc(((numpris / BT_NBIPUL) + 1) * 364 sizeof (long), KM_SLEEP); 365 dptr->dp = dp; 366 } 367 368 static void 369 disp_dq_assign(struct disp_queue_info *dptr, int numpris) 370 { 371 disp_t *dp; 372 373 dp = dptr->dp; 374 dptr->olddispq = dp->disp_q; 375 dptr->olddqactmap = dp->disp_qactmap; 376 dptr->oldnglobpris = dp->disp_npri; 377 378 ASSERT(dptr->oldnglobpris < numpris); 379 380 if (dptr->olddispq != NULL) { 381 /* 382 * Use kcopy because bcopy is platform-specific 383 * and could block while we might have paused the cpus. 384 */ 385 (void) kcopy(dptr->olddispq, dptr->newdispq, 386 dptr->oldnglobpris * sizeof (dispq_t)); 387 (void) kcopy(dptr->olddqactmap, dptr->newdqactmap, 388 ((dptr->oldnglobpris / BT_NBIPUL) + 1) * 389 sizeof (long)); 390 } 391 dp->disp_q = dptr->newdispq; 392 dp->disp_qactmap = dptr->newdqactmap; 393 dp->disp_q_limit = &dptr->newdispq[numpris]; 394 dp->disp_npri = numpris; 395 } 396 397 static void 398 disp_dq_free(struct disp_queue_info *dptr) 399 { 400 if (dptr->olddispq != NULL) 401 kmem_free(dptr->olddispq, 402 dptr->oldnglobpris * sizeof (dispq_t)); 403 if (dptr->olddqactmap != NULL) 404 kmem_free(dptr->olddqactmap, 405 ((dptr->oldnglobpris / BT_NBIPUL) + 1) * sizeof (long)); 406 } 407 408 /* 409 * For a newly created CPU, initialize the dispatch queue. 410 * This is called before the CPU is known through cpu[] or on any lists. 411 */ 412 void 413 disp_cpu_init(cpu_t *cp) 414 { 415 disp_t *dp; 416 dispq_t *newdispq; 417 ulong_t *newdqactmap; 418 419 ASSERT(MUTEX_HELD(&cpu_lock)); /* protect dispatcher queue sizes */ 420 421 if (cp == cpu0_disp.disp_cpu) 422 dp = &cpu0_disp; 423 else 424 dp = kmem_alloc(sizeof (disp_t), KM_SLEEP); 425 bzero(dp, sizeof (disp_t)); 426 cp->cpu_disp = dp; 427 dp->disp_cpu = cp; 428 dp->disp_maxrunpri = -1; 429 dp->disp_max_unbound_pri = -1; 430 DISP_LOCK_INIT(&cp->cpu_thread_lock); 431 /* 432 * Allocate memory for the dispatcher queue headers 433 * and the active queue bitmap. 434 */ 435 newdispq = kmem_zalloc(v.v_nglobpris * sizeof (dispq_t), KM_SLEEP); 436 newdqactmap = kmem_zalloc(((v.v_nglobpris / BT_NBIPUL) + 1) * 437 sizeof (long), KM_SLEEP); 438 dp->disp_q = newdispq; 439 dp->disp_qactmap = newdqactmap; 440 dp->disp_q_limit = &newdispq[v.v_nglobpris]; 441 dp->disp_npri = v.v_nglobpris; 442 } 443 444 void 445 disp_cpu_fini(cpu_t *cp) 446 { 447 ASSERT(MUTEX_HELD(&cpu_lock)); 448 449 disp_kp_free(cp->cpu_disp); 450 if (cp->cpu_disp != &cpu0_disp) 451 kmem_free(cp->cpu_disp, sizeof (disp_t)); 452 } 453 454 /* 455 * Allocate new, larger kpreempt dispatch queue to replace the old one. 456 */ 457 void 458 disp_kp_alloc(disp_t *dq, pri_t npri) 459 { 460 struct disp_queue_info mem_info; 461 462 if (npri > dq->disp_npri) { 463 /* 464 * Allocate memory for the new array. 465 */ 466 disp_dq_alloc(&mem_info, npri, dq); 467 468 /* 469 * We need to copy the old structures to the new 470 * and free the old. 471 */ 472 disp_dq_assign(&mem_info, npri); 473 disp_dq_free(&mem_info); 474 } 475 } 476 477 /* 478 * Free dispatch queue. 479 * Used for the kpreempt queues for a removed CPU partition and 480 * for the per-CPU queues of deleted CPUs. 481 */ 482 void 483 disp_kp_free(disp_t *dq) 484 { 485 struct disp_queue_info mem_info; 486 487 mem_info.olddispq = dq->disp_q; 488 mem_info.olddqactmap = dq->disp_qactmap; 489 mem_info.oldnglobpris = dq->disp_npri; 490 disp_dq_free(&mem_info); 491 } 492 493 /* 494 * End dispatcher and scheduler initialization. 495 */ 496 497 /* 498 * See if there's anything to do other than remain idle. 499 * Return non-zero if there is. 500 * 501 * This function must be called with high spl, or with 502 * kernel preemption disabled to prevent the partition's 503 * active cpu list from changing while being traversed. 504 * 505 */ 506 int 507 disp_anywork(void) 508 { 509 cpu_t *cp = CPU; 510 cpu_t *ocp; 511 512 if (cp->cpu_disp->disp_nrunnable != 0) 513 return (1); 514 515 if (!(cp->cpu_flags & CPU_OFFLINE)) { 516 if (CP_MAXRUNPRI(cp->cpu_part) >= 0) 517 return (1); 518 519 /* 520 * Work can be taken from another CPU if: 521 * - There is unbound work on the run queue 522 * - That work isn't a thread undergoing a 523 * - context switch on an otherwise empty queue. 524 * - The CPU isn't running the idle loop. 525 */ 526 for (ocp = cp->cpu_next_part; ocp != cp; 527 ocp = ocp->cpu_next_part) { 528 ASSERT(CPU_ACTIVE(ocp)); 529 530 if (ocp->cpu_disp->disp_max_unbound_pri != -1 && 531 !((ocp->cpu_disp_flags & CPU_DISP_DONTSTEAL) && 532 ocp->cpu_disp->disp_nrunnable == 1) && 533 ocp->cpu_dispatch_pri != -1) 534 return (1); 535 } 536 } 537 return (0); 538 } 539 540 /* 541 * Called when CPU enters the idle loop 542 */ 543 static void 544 idle_enter() 545 { 546 cpu_t *cp = CPU; 547 548 new_cpu_mstate(CMS_IDLE, gethrtime_unscaled()); 549 CPU_STATS_ADDQ(cp, sys, idlethread, 1); 550 set_idle_cpu(cp->cpu_id); /* arch-dependent hook */ 551 } 552 553 /* 554 * Called when CPU exits the idle loop 555 */ 556 static void 557 idle_exit() 558 { 559 cpu_t *cp = CPU; 560 561 new_cpu_mstate(CMS_SYSTEM, gethrtime_unscaled()); 562 unset_idle_cpu(cp->cpu_id); /* arch-dependent hook */ 563 } 564 565 /* 566 * Idle loop. 567 */ 568 void 569 idle() 570 { 571 struct cpu *cp = CPU; /* pointer to this CPU */ 572 kthread_t *t; /* taken thread */ 573 574 idle_enter(); 575 576 /* 577 * Uniprocessor version of idle loop. 578 * Do this until notified that we're on an actual multiprocessor. 579 */ 580 while (ncpus == 1) { 581 if (cp->cpu_disp->disp_nrunnable == 0) { 582 (*idle_cpu)(); 583 continue; 584 } 585 idle_exit(); 586 swtch(); 587 588 idle_enter(); /* returned from swtch */ 589 } 590 591 /* 592 * Multiprocessor idle loop. 593 */ 594 for (;;) { 595 /* 596 * If CPU is completely quiesced by p_online(2), just wait 597 * here with minimal bus traffic until put online. 598 */ 599 while (cp->cpu_flags & CPU_QUIESCED) 600 (*idle_cpu)(); 601 602 if (cp->cpu_disp->disp_nrunnable != 0) { 603 idle_exit(); 604 swtch(); 605 } else { 606 if (cp->cpu_flags & CPU_OFFLINE) 607 continue; 608 if ((t = disp_getwork(cp)) == NULL) { 609 if (cp->cpu_chosen_level != -1) { 610 disp_t *dp = cp->cpu_disp; 611 disp_t *kpq; 612 613 disp_lock_enter(&dp->disp_lock); 614 /* 615 * Set kpq under lock to prevent 616 * migration between partitions. 617 */ 618 kpq = &cp->cpu_part->cp_kp_queue; 619 if (kpq->disp_maxrunpri == -1) 620 cp->cpu_chosen_level = -1; 621 disp_lock_exit(&dp->disp_lock); 622 } 623 (*idle_cpu)(); 624 continue; 625 } 626 /* 627 * If there was a thread but we couldn't steal 628 * it, then keep trying. 629 */ 630 if (t == T_DONTSTEAL) 631 continue; 632 idle_exit(); 633 swtch_to(t); 634 } 635 idle_enter(); /* returned from swtch/swtch_to */ 636 } 637 } 638 639 640 /* 641 * Preempt the currently running thread in favor of the highest 642 * priority thread. The class of the current thread controls 643 * where it goes on the dispatcher queues. If panicking, turn 644 * preemption off. 645 */ 646 void 647 preempt() 648 { 649 kthread_t *t = curthread; 650 klwp_t *lwp = ttolwp(curthread); 651 652 if (panicstr) 653 return; 654 655 TRACE_0(TR_FAC_DISP, TR_PREEMPT_START, "preempt_start"); 656 657 thread_lock(t); 658 659 if (t->t_state != TS_ONPROC || t->t_disp_queue != CPU->cpu_disp) { 660 /* 661 * this thread has already been chosen to be run on 662 * another CPU. Clear kprunrun on this CPU since we're 663 * already headed for swtch(). 664 */ 665 CPU->cpu_kprunrun = 0; 666 thread_unlock_nopreempt(t); 667 TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end"); 668 } else { 669 if (lwp != NULL) 670 lwp->lwp_ru.nivcsw++; 671 CPU_STATS_ADDQ(CPU, sys, inv_swtch, 1); 672 THREAD_TRANSITION(t); 673 CL_PREEMPT(t); 674 DTRACE_SCHED(preempt); 675 thread_unlock_nopreempt(t); 676 677 TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end"); 678 679 swtch(); /* clears CPU->cpu_runrun via disp() */ 680 } 681 } 682 683 extern kthread_t *thread_unpin(); 684 685 /* 686 * disp() - find the highest priority thread for this processor to run, and 687 * set it in TS_ONPROC state so that resume() can be called to run it. 688 */ 689 static kthread_t * 690 disp() 691 { 692 cpu_t *cpup; 693 disp_t *dp; 694 kthread_t *tp; 695 dispq_t *dq; 696 int maxrunword; 697 pri_t pri; 698 disp_t *kpq; 699 700 TRACE_0(TR_FAC_DISP, TR_DISP_START, "disp_start"); 701 702 cpup = CPU; 703 /* 704 * Find the highest priority loaded, runnable thread. 705 */ 706 dp = cpup->cpu_disp; 707 708 reschedule: 709 /* 710 * If there is more important work on the global queue with a better 711 * priority than the maximum on this CPU, take it now. 712 */ 713 kpq = &cpup->cpu_part->cp_kp_queue; 714 while ((pri = kpq->disp_maxrunpri) >= 0 && 715 pri >= dp->disp_maxrunpri && 716 (cpup->cpu_flags & CPU_OFFLINE) == 0 && 717 (tp = disp_getbest(kpq)) != NULL) { 718 if (disp_ratify(tp, kpq) != NULL) { 719 TRACE_1(TR_FAC_DISP, TR_DISP_END, 720 "disp_end:tid %p", tp); 721 return (tp); 722 } 723 } 724 725 disp_lock_enter(&dp->disp_lock); 726 pri = dp->disp_maxrunpri; 727 728 /* 729 * If there is nothing to run, look at what's runnable on other queues. 730 * Choose the idle thread if the CPU is quiesced. 731 * Note that CPUs that have the CPU_OFFLINE flag set can still run 732 * interrupt threads, which will be the only threads on the CPU's own 733 * queue, but cannot run threads from other queues. 734 */ 735 if (pri == -1) { 736 if (!(cpup->cpu_flags & CPU_OFFLINE)) { 737 disp_lock_exit(&dp->disp_lock); 738 if ((tp = disp_getwork(cpup)) == NULL || 739 tp == T_DONTSTEAL) { 740 tp = cpup->cpu_idle_thread; 741 (void) splhigh(); 742 THREAD_ONPROC(tp, cpup); 743 cpup->cpu_dispthread = tp; 744 cpup->cpu_dispatch_pri = -1; 745 cpup->cpu_runrun = cpup->cpu_kprunrun = 0; 746 cpup->cpu_chosen_level = -1; 747 } 748 } else { 749 disp_lock_exit_high(&dp->disp_lock); 750 tp = cpup->cpu_idle_thread; 751 THREAD_ONPROC(tp, cpup); 752 cpup->cpu_dispthread = tp; 753 cpup->cpu_dispatch_pri = -1; 754 cpup->cpu_runrun = cpup->cpu_kprunrun = 0; 755 cpup->cpu_chosen_level = -1; 756 } 757 TRACE_1(TR_FAC_DISP, TR_DISP_END, 758 "disp_end:tid %p", tp); 759 return (tp); 760 } 761 762 dq = &dp->disp_q[pri]; 763 tp = dq->dq_first; 764 765 ASSERT(tp != NULL); 766 ASSERT(tp->t_schedflag & TS_LOAD); /* thread must be swapped in */ 767 768 DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp); 769 770 /* 771 * Found it so remove it from queue. 772 */ 773 dp->disp_nrunnable--; 774 dq->dq_sruncnt--; 775 if ((dq->dq_first = tp->t_link) == NULL) { 776 ulong_t *dqactmap = dp->disp_qactmap; 777 778 ASSERT(dq->dq_sruncnt == 0); 779 dq->dq_last = NULL; 780 781 /* 782 * The queue is empty, so the corresponding bit needs to be 783 * turned off in dqactmap. If nrunnable != 0 just took the 784 * last runnable thread off the 785 * highest queue, so recompute disp_maxrunpri. 786 */ 787 maxrunword = pri >> BT_ULSHIFT; 788 dqactmap[maxrunword] &= ~BT_BIW(pri); 789 790 if (dp->disp_nrunnable == 0) { 791 dp->disp_max_unbound_pri = -1; 792 dp->disp_maxrunpri = -1; 793 } else { 794 int ipri; 795 796 ipri = bt_gethighbit(dqactmap, maxrunword); 797 dp->disp_maxrunpri = ipri; 798 if (ipri < dp->disp_max_unbound_pri) 799 dp->disp_max_unbound_pri = ipri; 800 } 801 } else { 802 tp->t_link = NULL; 803 } 804 805 /* 806 * Set TS_DONT_SWAP flag to prevent another processor from swapping 807 * out this thread before we have a chance to run it. 808 * While running, it is protected against swapping by t_lock. 809 */ 810 tp->t_schedflag |= TS_DONT_SWAP; 811 cpup->cpu_dispthread = tp; /* protected by spl only */ 812 cpup->cpu_dispatch_pri = pri; 813 ASSERT(pri == DISP_PRIO(tp)); 814 thread_onproc(tp, cpup); /* set t_state to TS_ONPROC */ 815 disp_lock_exit_high(&dp->disp_lock); /* drop run queue lock */ 816 817 ASSERT(tp != NULL); 818 TRACE_1(TR_FAC_DISP, TR_DISP_END, 819 "disp_end:tid %p", tp); 820 821 if (disp_ratify(tp, kpq) == NULL) 822 goto reschedule; 823 824 return (tp); 825 } 826 827 /* 828 * swtch() 829 * Find best runnable thread and run it. 830 * Called with the current thread already switched to a new state, 831 * on a sleep queue, run queue, stopped, and not zombied. 832 * May be called at any spl level less than or equal to LOCK_LEVEL. 833 * Always drops spl to the base level (spl0()). 834 */ 835 void 836 swtch() 837 { 838 kthread_t *t = curthread; 839 kthread_t *next; 840 cpu_t *cp; 841 842 TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 843 844 if (t->t_flag & T_INTR_THREAD) 845 cpu_intr_swtch_enter(t); 846 847 if (t->t_intr != NULL) { 848 /* 849 * We are an interrupt thread. Setup and return 850 * the interrupted thread to be resumed. 851 */ 852 (void) splhigh(); /* block other scheduler action */ 853 cp = CPU; /* now protected against migration */ 854 ASSERT(CPU_ON_INTR(cp) == 0); /* not called with PIL > 10 */ 855 CPU_STATS_ADDQ(cp, sys, pswitch, 1); 856 CPU_STATS_ADDQ(cp, sys, intrblk, 1); 857 next = thread_unpin(); 858 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 859 resume_from_intr(next); 860 } else { 861 #ifdef DEBUG 862 if (t->t_state == TS_ONPROC && 863 t->t_disp_queue->disp_cpu == CPU && 864 t->t_preempt == 0) { 865 thread_lock(t); 866 ASSERT(t->t_state != TS_ONPROC || 867 t->t_disp_queue->disp_cpu != CPU || 868 t->t_preempt != 0); /* cannot migrate */ 869 thread_unlock_nopreempt(t); 870 } 871 #endif /* DEBUG */ 872 cp = CPU; 873 next = disp(); /* returns with spl high */ 874 ASSERT(CPU_ON_INTR(cp) == 0); /* not called with PIL > 10 */ 875 876 /* OK to steal anything left on run queue */ 877 cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL; 878 879 if (next != t) { 880 if (t == cp->cpu_idle_thread) { 881 PG_NRUN_UPDATE(cp, 1); 882 } else if (next == cp->cpu_idle_thread) { 883 PG_NRUN_UPDATE(cp, -1); 884 } 885 886 /* 887 * If t was previously in the TS_ONPROC state, 888 * setfrontdq and setbackdq won't have set its t_waitrq. 889 * Since we now finally know that we're switching away 890 * from this thread, set its t_waitrq if it is on a run 891 * queue. 892 */ 893 if ((t->t_state == TS_RUN) && (t->t_waitrq == 0)) { 894 t->t_waitrq = gethrtime_unscaled(); 895 } 896 897 /* 898 * restore mstate of thread that we are switching to 899 */ 900 restore_mstate(next); 901 902 CPU_STATS_ADDQ(cp, sys, pswitch, 1); 903 cp->cpu_last_swtch = t->t_disp_time = lbolt; 904 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 905 906 if (dtrace_vtime_active) 907 dtrace_vtime_switch(next); 908 909 resume(next); 910 /* 911 * The TR_RESUME_END and TR_SWTCH_END trace points 912 * appear at the end of resume(), because we may not 913 * return here 914 */ 915 } else { 916 if (t->t_flag & T_INTR_THREAD) 917 cpu_intr_swtch_exit(t); 918 919 DTRACE_SCHED(remain__cpu); 920 TRACE_0(TR_FAC_DISP, TR_SWTCH_END, "swtch_end"); 921 (void) spl0(); 922 } 923 } 924 } 925 926 /* 927 * swtch_from_zombie() 928 * Special case of swtch(), which allows checks for TS_ZOMB to be 929 * eliminated from normal resume. 930 * Find best runnable thread and run it. 931 * Called with the current thread zombied. 932 * Zombies cannot migrate, so CPU references are safe. 933 */ 934 void 935 swtch_from_zombie() 936 { 937 kthread_t *next; 938 cpu_t *cpu = CPU; 939 940 TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 941 942 ASSERT(curthread->t_state == TS_ZOMB); 943 944 next = disp(); /* returns with spl high */ 945 ASSERT(CPU_ON_INTR(CPU) == 0); /* not called with PIL > 10 */ 946 CPU_STATS_ADDQ(CPU, sys, pswitch, 1); 947 ASSERT(next != curthread); 948 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 949 950 if (next == cpu->cpu_idle_thread) 951 PG_NRUN_UPDATE(cpu, -1); 952 953 restore_mstate(next); 954 955 if (dtrace_vtime_active) 956 dtrace_vtime_switch(next); 957 958 resume_from_zombie(next); 959 /* 960 * The TR_RESUME_END and TR_SWTCH_END trace points 961 * appear at the end of resume(), because we certainly will not 962 * return here 963 */ 964 } 965 966 #if defined(DEBUG) && (defined(DISP_DEBUG) || defined(lint)) 967 968 /* 969 * search_disp_queues() 970 * Search the given dispatch queues for thread tp. 971 * Return 1 if tp is found, otherwise return 0. 972 */ 973 static int 974 search_disp_queues(disp_t *dp, kthread_t *tp) 975 { 976 dispq_t *dq; 977 dispq_t *eq; 978 979 disp_lock_enter_high(&dp->disp_lock); 980 981 for (dq = dp->disp_q, eq = dp->disp_q_limit; dq < eq; ++dq) { 982 kthread_t *rp; 983 984 ASSERT(dq->dq_last == NULL || dq->dq_last->t_link == NULL); 985 986 for (rp = dq->dq_first; rp; rp = rp->t_link) 987 if (tp == rp) { 988 disp_lock_exit_high(&dp->disp_lock); 989 return (1); 990 } 991 } 992 disp_lock_exit_high(&dp->disp_lock); 993 994 return (0); 995 } 996 997 /* 998 * thread_on_queue() 999 * Search all per-CPU dispatch queues and all partition-wide kpreempt 1000 * queues for thread tp. Return 1 if tp is found, otherwise return 0. 1001 */ 1002 static int 1003 thread_on_queue(kthread_t *tp) 1004 { 1005 cpu_t *cp; 1006 struct cpupart *part; 1007 1008 ASSERT(getpil() >= DISP_LEVEL); 1009 1010 /* 1011 * Search the per-CPU dispatch queues for tp. 1012 */ 1013 cp = CPU; 1014 do { 1015 if (search_disp_queues(cp->cpu_disp, tp)) 1016 return (1); 1017 } while ((cp = cp->cpu_next_onln) != CPU); 1018 1019 /* 1020 * Search the partition-wide kpreempt queues for tp. 1021 */ 1022 part = CPU->cpu_part; 1023 do { 1024 if (search_disp_queues(&part->cp_kp_queue, tp)) 1025 return (1); 1026 } while ((part = part->cp_next) != CPU->cpu_part); 1027 1028 return (0); 1029 } 1030 1031 #else 1032 1033 #define thread_on_queue(tp) 0 /* ASSERT must be !thread_on_queue */ 1034 1035 #endif /* DEBUG */ 1036 1037 /* 1038 * like swtch(), but switch to a specified thread taken from another CPU. 1039 * called with spl high.. 1040 */ 1041 void 1042 swtch_to(kthread_t *next) 1043 { 1044 cpu_t *cp = CPU; 1045 1046 TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 1047 1048 /* 1049 * Update context switch statistics. 1050 */ 1051 CPU_STATS_ADDQ(cp, sys, pswitch, 1); 1052 1053 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 1054 1055 if (curthread == cp->cpu_idle_thread) 1056 PG_NRUN_UPDATE(cp, 1); 1057 1058 /* OK to steal anything left on run queue */ 1059 cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL; 1060 1061 /* record last execution time */ 1062 cp->cpu_last_swtch = curthread->t_disp_time = lbolt; 1063 1064 /* 1065 * If t was previously in the TS_ONPROC state, setfrontdq and setbackdq 1066 * won't have set its t_waitrq. Since we now finally know that we're 1067 * switching away from this thread, set its t_waitrq if it is on a run 1068 * queue. 1069 */ 1070 if ((curthread->t_state == TS_RUN) && (curthread->t_waitrq == 0)) { 1071 curthread->t_waitrq = gethrtime_unscaled(); 1072 } 1073 1074 /* restore next thread to previously running microstate */ 1075 restore_mstate(next); 1076 1077 if (dtrace_vtime_active) 1078 dtrace_vtime_switch(next); 1079 1080 resume(next); 1081 /* 1082 * The TR_RESUME_END and TR_SWTCH_END trace points 1083 * appear at the end of resume(), because we may not 1084 * return here 1085 */ 1086 } 1087 1088 1089 1090 #define CPU_IDLING(pri) ((pri) == -1) 1091 1092 static void 1093 cpu_resched(cpu_t *cp, pri_t tpri) 1094 { 1095 int call_poke_cpu = 0; 1096 pri_t cpupri = cp->cpu_dispatch_pri; 1097 1098 if (!CPU_IDLING(cpupri) && (cpupri < tpri)) { 1099 TRACE_2(TR_FAC_DISP, TR_CPU_RESCHED, 1100 "CPU_RESCHED:Tpri %d Cpupri %d", tpri, cpupri); 1101 if (tpri >= upreemptpri && cp->cpu_runrun == 0) { 1102 cp->cpu_runrun = 1; 1103 aston(cp->cpu_dispthread); 1104 if (tpri < kpreemptpri && cp != CPU) 1105 call_poke_cpu = 1; 1106 } 1107 if (tpri >= kpreemptpri && cp->cpu_kprunrun == 0) { 1108 cp->cpu_kprunrun = 1; 1109 if (cp != CPU) 1110 call_poke_cpu = 1; 1111 } 1112 } 1113 1114 /* 1115 * Propagate cpu_runrun, and cpu_kprunrun to global visibility. 1116 */ 1117 membar_enter(); 1118 1119 if (call_poke_cpu) 1120 poke_cpu(cp->cpu_id); 1121 } 1122 1123 /* 1124 * Perform multi-level CMT load balancing of running threads. 1125 * tp is the thread being enqueued 1126 * cp is the hint CPU (chosen by cpu_choose()). 1127 */ 1128 static cpu_t * 1129 cmt_balance(kthread_t *tp, cpu_t *cp) 1130 { 1131 int hint, i, cpu, nsiblings; 1132 int self = 0; 1133 group_t *cmt_pgs, *siblings; 1134 pg_cmt_t *pg, *pg_tmp, *tpg = NULL; 1135 int pg_nrun, tpg_nrun; 1136 int level = 0; 1137 cpu_t *newcp; 1138 1139 ASSERT(THREAD_LOCK_HELD(tp)); 1140 1141 cmt_pgs = &cp->cpu_pg->cmt_pgs; 1142 1143 if (GROUP_SIZE(cmt_pgs) == 0) 1144 return (cp); /* nothing to do */ 1145 1146 if (tp == curthread) 1147 self = 1; 1148 1149 /* 1150 * Balance across siblings in the CPUs CMT lineage 1151 */ 1152 do { 1153 pg = GROUP_ACCESS(cmt_pgs, level); 1154 1155 siblings = pg->cmt_siblings; 1156 nsiblings = GROUP_SIZE(siblings); /* self inclusive */ 1157 if (nsiblings == 1) 1158 continue; /* nobody to balance against */ 1159 1160 pg_nrun = pg->cmt_nrunning; 1161 if (self && 1162 bitset_in_set(&pg->cmt_cpus_actv_set, CPU->cpu_seqid)) 1163 pg_nrun--; /* Ignore curthread's effect */ 1164 1165 hint = pg->cmt_hint; 1166 /* 1167 * Check for validity of the hint 1168 * It should reference a valid sibling 1169 */ 1170 if (hint >= nsiblings) 1171 hint = pg->cmt_hint = 0; 1172 else 1173 pg->cmt_hint++; 1174 1175 /* 1176 * Find a balancing candidate from among our siblings 1177 * "hint" is a hint for where to start looking 1178 */ 1179 i = hint; 1180 do { 1181 ASSERT(i < nsiblings); 1182 pg_tmp = GROUP_ACCESS(siblings, i); 1183 1184 /* 1185 * The candidate must not be us, and must 1186 * have some CPU resources in the thread's 1187 * partition 1188 */ 1189 if (pg_tmp != pg && 1190 bitset_in_set(&tp->t_cpupart->cp_cmt_pgs, 1191 ((pg_t *)pg_tmp)->pg_id)) { 1192 tpg = pg_tmp; 1193 break; 1194 } 1195 1196 if (++i >= nsiblings) 1197 i = 0; 1198 } while (i != hint); 1199 1200 if (!tpg) 1201 continue; /* no candidates at this level */ 1202 1203 /* 1204 * Check if the balancing target is underloaded 1205 * Decide to balance if the target is running fewer 1206 * threads, or if it's running the same number of threads 1207 * with more online CPUs 1208 */ 1209 tpg_nrun = tpg->cmt_nrunning; 1210 if (pg_nrun > tpg_nrun || 1211 (pg_nrun == tpg_nrun && 1212 (GROUP_SIZE(&tpg->cmt_cpus_actv) > 1213 GROUP_SIZE(&pg->cmt_cpus_actv)))) { 1214 break; 1215 } 1216 tpg = NULL; 1217 } while (++level < GROUP_SIZE(cmt_pgs)); 1218 1219 1220 if (tpg) { 1221 /* 1222 * Select an idle CPU from the target PG 1223 */ 1224 for (cpu = 0; cpu < GROUP_SIZE(&tpg->cmt_cpus_actv); cpu++) { 1225 newcp = GROUP_ACCESS(&tpg->cmt_cpus_actv, cpu); 1226 if (newcp->cpu_part == tp->t_cpupart && 1227 newcp->cpu_dispatch_pri == -1) { 1228 cp = newcp; 1229 break; 1230 } 1231 } 1232 } 1233 1234 return (cp); 1235 } 1236 1237 /* 1238 * setbackdq() keeps runqs balanced such that the difference in length 1239 * between the chosen runq and the next one is no more than RUNQ_MAX_DIFF. 1240 * For threads with priorities below RUNQ_MATCH_PRI levels, the runq's lengths 1241 * must match. When per-thread TS_RUNQMATCH flag is set, setbackdq() will 1242 * try to keep runqs perfectly balanced regardless of the thread priority. 1243 */ 1244 #define RUNQ_MATCH_PRI 16 /* pri below which queue lengths must match */ 1245 #define RUNQ_MAX_DIFF 2 /* maximum runq length difference */ 1246 #define RUNQ_LEN(cp, pri) ((cp)->cpu_disp->disp_q[pri].dq_sruncnt) 1247 1248 /* 1249 * Put the specified thread on the back of the dispatcher 1250 * queue corresponding to its current priority. 1251 * 1252 * Called with the thread in transition, onproc or stopped state 1253 * and locked (transition implies locked) and at high spl. 1254 * Returns with the thread in TS_RUN state and still locked. 1255 */ 1256 void 1257 setbackdq(kthread_t *tp) 1258 { 1259 dispq_t *dq; 1260 disp_t *dp; 1261 cpu_t *cp; 1262 pri_t tpri; 1263 int bound; 1264 1265 ASSERT(THREAD_LOCK_HELD(tp)); 1266 ASSERT((tp->t_schedflag & TS_ALLSTART) == 0); 1267 ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */ 1268 1269 /* 1270 * If thread is "swapped" or on the swap queue don't 1271 * queue it, but wake sched. 1272 */ 1273 if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) { 1274 disp_swapped_setrun(tp); 1275 return; 1276 } 1277 1278 if (tp->t_bound_cpu || tp->t_weakbound_cpu) 1279 bound = 1; 1280 else 1281 bound = 0; 1282 1283 tpri = DISP_PRIO(tp); 1284 if (ncpus == 1) 1285 cp = tp->t_cpu; 1286 else if (!bound) { 1287 if (tpri >= kpqpri) { 1288 setkpdq(tp, SETKP_BACK); 1289 return; 1290 } 1291 /* 1292 * Let cpu_choose suggest a CPU. 1293 */ 1294 cp = cpu_choose(tp, tpri); 1295 1296 if (tp->t_cpupart == cp->cpu_part) { 1297 int qlen; 1298 1299 /* 1300 * Perform any CMT load balancing 1301 */ 1302 cp = cmt_balance(tp, cp); 1303 1304 /* 1305 * Balance across the run queues 1306 */ 1307 qlen = RUNQ_LEN(cp, tpri); 1308 if (tpri >= RUNQ_MATCH_PRI && 1309 !(tp->t_schedflag & TS_RUNQMATCH)) 1310 qlen -= RUNQ_MAX_DIFF; 1311 if (qlen > 0) { 1312 cpu_t *newcp; 1313 1314 if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID) { 1315 newcp = cp->cpu_next_part; 1316 } else if ((newcp = cp->cpu_next_lpl) == cp) { 1317 newcp = cp->cpu_next_part; 1318 } 1319 1320 if (RUNQ_LEN(newcp, tpri) < qlen) { 1321 DTRACE_PROBE3(runq__balance, 1322 kthread_t *, tp, 1323 cpu_t *, cp, cpu_t *, newcp); 1324 cp = newcp; 1325 } 1326 } 1327 } else { 1328 /* 1329 * Migrate to a cpu in the new partition. 1330 */ 1331 cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, 1332 tp->t_lpl, tp->t_pri, NULL); 1333 } 1334 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 1335 } else { 1336 /* 1337 * It is possible that t_weakbound_cpu != t_bound_cpu (for 1338 * a short time until weak binding that existed when the 1339 * strong binding was established has dropped) so we must 1340 * favour weak binding over strong. 1341 */ 1342 cp = tp->t_weakbound_cpu ? 1343 tp->t_weakbound_cpu : tp->t_bound_cpu; 1344 } 1345 /* 1346 * A thread that is ONPROC may be temporarily placed on the run queue 1347 * but then chosen to run again by disp. If the thread we're placing on 1348 * the queue is in TS_ONPROC state, don't set its t_waitrq until a 1349 * replacement process is actually scheduled in swtch(). In this 1350 * situation, curthread is the only thread that could be in the ONPROC 1351 * state. 1352 */ 1353 if ((tp != curthread) && (tp->t_waitrq == 0)) { 1354 hrtime_t curtime; 1355 1356 curtime = gethrtime_unscaled(); 1357 (void) cpu_update_pct(tp, curtime); 1358 tp->t_waitrq = curtime; 1359 } else { 1360 (void) cpu_update_pct(tp, gethrtime_unscaled()); 1361 } 1362 1363 dp = cp->cpu_disp; 1364 disp_lock_enter_high(&dp->disp_lock); 1365 1366 DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 0); 1367 TRACE_3(TR_FAC_DISP, TR_BACKQ, "setbackdq:pri %d cpu %p tid %p", 1368 tpri, cp, tp); 1369 1370 #ifndef NPROBE 1371 /* Kernel probe */ 1372 if (tnf_tracing_active) 1373 tnf_thread_queue(tp, cp, tpri); 1374 #endif /* NPROBE */ 1375 1376 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 1377 1378 THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */ 1379 tp->t_disp_queue = dp; 1380 tp->t_link = NULL; 1381 1382 dq = &dp->disp_q[tpri]; 1383 dp->disp_nrunnable++; 1384 if (!bound) 1385 dp->disp_steal = 0; 1386 membar_enter(); 1387 1388 if (dq->dq_sruncnt++ != 0) { 1389 ASSERT(dq->dq_first != NULL); 1390 dq->dq_last->t_link = tp; 1391 dq->dq_last = tp; 1392 } else { 1393 ASSERT(dq->dq_first == NULL); 1394 ASSERT(dq->dq_last == NULL); 1395 dq->dq_first = dq->dq_last = tp; 1396 BT_SET(dp->disp_qactmap, tpri); 1397 if (tpri > dp->disp_maxrunpri) { 1398 dp->disp_maxrunpri = tpri; 1399 membar_enter(); 1400 cpu_resched(cp, tpri); 1401 } 1402 } 1403 1404 if (!bound && tpri > dp->disp_max_unbound_pri) { 1405 if (tp == curthread && dp->disp_max_unbound_pri == -1 && 1406 cp == CPU) { 1407 /* 1408 * If there are no other unbound threads on the 1409 * run queue, don't allow other CPUs to steal 1410 * this thread while we are in the middle of a 1411 * context switch. We may just switch to it 1412 * again right away. CPU_DISP_DONTSTEAL is cleared 1413 * in swtch and swtch_to. 1414 */ 1415 cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL; 1416 } 1417 dp->disp_max_unbound_pri = tpri; 1418 } 1419 (*disp_enq_thread)(cp, bound); 1420 } 1421 1422 /* 1423 * Put the specified thread on the front of the dispatcher 1424 * queue corresponding to its current priority. 1425 * 1426 * Called with the thread in transition, onproc or stopped state 1427 * and locked (transition implies locked) and at high spl. 1428 * Returns with the thread in TS_RUN state and still locked. 1429 */ 1430 void 1431 setfrontdq(kthread_t *tp) 1432 { 1433 disp_t *dp; 1434 dispq_t *dq; 1435 cpu_t *cp; 1436 pri_t tpri; 1437 int bound; 1438 1439 ASSERT(THREAD_LOCK_HELD(tp)); 1440 ASSERT((tp->t_schedflag & TS_ALLSTART) == 0); 1441 ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */ 1442 1443 /* 1444 * If thread is "swapped" or on the swap queue don't 1445 * queue it, but wake sched. 1446 */ 1447 if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) { 1448 disp_swapped_setrun(tp); 1449 return; 1450 } 1451 1452 if (tp->t_bound_cpu || tp->t_weakbound_cpu) 1453 bound = 1; 1454 else 1455 bound = 0; 1456 1457 tpri = DISP_PRIO(tp); 1458 if (ncpus == 1) 1459 cp = tp->t_cpu; 1460 else if (!bound) { 1461 if (tpri >= kpqpri) { 1462 setkpdq(tp, SETKP_FRONT); 1463 return; 1464 } 1465 cp = tp->t_cpu; 1466 if (tp->t_cpupart == cp->cpu_part) { 1467 /* 1468 * If we are of higher or equal priority than 1469 * the highest priority runnable thread of 1470 * the current CPU, just pick this CPU. Otherwise 1471 * Let cpu_choose() select the CPU. If this cpu 1472 * is the target of an offline request then do not 1473 * pick it - a thread_nomigrate() on the in motion 1474 * cpu relies on this when it forces a preempt. 1475 */ 1476 if (tpri < cp->cpu_disp->disp_maxrunpri || 1477 cp == cpu_inmotion) 1478 cp = cpu_choose(tp, tpri); 1479 } else { 1480 /* 1481 * Migrate to a cpu in the new partition. 1482 */ 1483 cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, 1484 tp->t_lpl, tp->t_pri, NULL); 1485 } 1486 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 1487 } else { 1488 /* 1489 * It is possible that t_weakbound_cpu != t_bound_cpu (for 1490 * a short time until weak binding that existed when the 1491 * strong binding was established has dropped) so we must 1492 * favour weak binding over strong. 1493 */ 1494 cp = tp->t_weakbound_cpu ? 1495 tp->t_weakbound_cpu : tp->t_bound_cpu; 1496 } 1497 1498 /* 1499 * A thread that is ONPROC may be temporarily placed on the run queue 1500 * but then chosen to run again by disp. If the thread we're placing on 1501 * the queue is in TS_ONPROC state, don't set its t_waitrq until a 1502 * replacement process is actually scheduled in swtch(). In this 1503 * situation, curthread is the only thread that could be in the ONPROC 1504 * state. 1505 */ 1506 if ((tp != curthread) && (tp->t_waitrq == 0)) { 1507 hrtime_t curtime; 1508 1509 curtime = gethrtime_unscaled(); 1510 (void) cpu_update_pct(tp, curtime); 1511 tp->t_waitrq = curtime; 1512 } else { 1513 (void) cpu_update_pct(tp, gethrtime_unscaled()); 1514 } 1515 1516 dp = cp->cpu_disp; 1517 disp_lock_enter_high(&dp->disp_lock); 1518 1519 TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp); 1520 DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 1); 1521 1522 #ifndef NPROBE 1523 /* Kernel probe */ 1524 if (tnf_tracing_active) 1525 tnf_thread_queue(tp, cp, tpri); 1526 #endif /* NPROBE */ 1527 1528 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 1529 1530 THREAD_RUN(tp, &dp->disp_lock); /* set TS_RUN state and lock */ 1531 tp->t_disp_queue = dp; 1532 1533 dq = &dp->disp_q[tpri]; 1534 dp->disp_nrunnable++; 1535 if (!bound) 1536 dp->disp_steal = 0; 1537 membar_enter(); 1538 1539 if (dq->dq_sruncnt++ != 0) { 1540 ASSERT(dq->dq_last != NULL); 1541 tp->t_link = dq->dq_first; 1542 dq->dq_first = tp; 1543 } else { 1544 ASSERT(dq->dq_last == NULL); 1545 ASSERT(dq->dq_first == NULL); 1546 tp->t_link = NULL; 1547 dq->dq_first = dq->dq_last = tp; 1548 BT_SET(dp->disp_qactmap, tpri); 1549 if (tpri > dp->disp_maxrunpri) { 1550 dp->disp_maxrunpri = tpri; 1551 membar_enter(); 1552 cpu_resched(cp, tpri); 1553 } 1554 } 1555 1556 if (!bound && tpri > dp->disp_max_unbound_pri) { 1557 if (tp == curthread && dp->disp_max_unbound_pri == -1 && 1558 cp == CPU) { 1559 /* 1560 * If there are no other unbound threads on the 1561 * run queue, don't allow other CPUs to steal 1562 * this thread while we are in the middle of a 1563 * context switch. We may just switch to it 1564 * again right away. CPU_DISP_DONTSTEAL is cleared 1565 * in swtch and swtch_to. 1566 */ 1567 cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL; 1568 } 1569 dp->disp_max_unbound_pri = tpri; 1570 } 1571 (*disp_enq_thread)(cp, bound); 1572 } 1573 1574 /* 1575 * Put a high-priority unbound thread on the kp queue 1576 */ 1577 static void 1578 setkpdq(kthread_t *tp, int borf) 1579 { 1580 dispq_t *dq; 1581 disp_t *dp; 1582 cpu_t *cp; 1583 pri_t tpri; 1584 1585 tpri = DISP_PRIO(tp); 1586 1587 dp = &tp->t_cpupart->cp_kp_queue; 1588 disp_lock_enter_high(&dp->disp_lock); 1589 1590 TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp); 1591 1592 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 1593 DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, borf); 1594 THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */ 1595 tp->t_disp_queue = dp; 1596 dp->disp_nrunnable++; 1597 dq = &dp->disp_q[tpri]; 1598 1599 if (dq->dq_sruncnt++ != 0) { 1600 if (borf == SETKP_BACK) { 1601 ASSERT(dq->dq_first != NULL); 1602 tp->t_link = NULL; 1603 dq->dq_last->t_link = tp; 1604 dq->dq_last = tp; 1605 } else { 1606 ASSERT(dq->dq_last != NULL); 1607 tp->t_link = dq->dq_first; 1608 dq->dq_first = tp; 1609 } 1610 } else { 1611 if (borf == SETKP_BACK) { 1612 ASSERT(dq->dq_first == NULL); 1613 ASSERT(dq->dq_last == NULL); 1614 dq->dq_first = dq->dq_last = tp; 1615 } else { 1616 ASSERT(dq->dq_last == NULL); 1617 ASSERT(dq->dq_first == NULL); 1618 tp->t_link = NULL; 1619 dq->dq_first = dq->dq_last = tp; 1620 } 1621 BT_SET(dp->disp_qactmap, tpri); 1622 if (tpri > dp->disp_max_unbound_pri) 1623 dp->disp_max_unbound_pri = tpri; 1624 if (tpri > dp->disp_maxrunpri) { 1625 dp->disp_maxrunpri = tpri; 1626 membar_enter(); 1627 } 1628 } 1629 1630 cp = tp->t_cpu; 1631 if (tp->t_cpupart != cp->cpu_part) { 1632 /* migrate to a cpu in the new partition */ 1633 cp = tp->t_cpupart->cp_cpulist; 1634 } 1635 cp = disp_lowpri_cpu(cp, tp->t_lpl, tp->t_pri, NULL); 1636 disp_lock_enter_high(&cp->cpu_disp->disp_lock); 1637 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 1638 1639 #ifndef NPROBE 1640 /* Kernel probe */ 1641 if (tnf_tracing_active) 1642 tnf_thread_queue(tp, cp, tpri); 1643 #endif /* NPROBE */ 1644 1645 if (cp->cpu_chosen_level < tpri) 1646 cp->cpu_chosen_level = tpri; 1647 cpu_resched(cp, tpri); 1648 disp_lock_exit_high(&cp->cpu_disp->disp_lock); 1649 (*disp_enq_thread)(cp, 0); 1650 } 1651 1652 /* 1653 * Remove a thread from the dispatcher queue if it is on it. 1654 * It is not an error if it is not found but we return whether 1655 * or not it was found in case the caller wants to check. 1656 */ 1657 int 1658 dispdeq(kthread_t *tp) 1659 { 1660 disp_t *dp; 1661 dispq_t *dq; 1662 kthread_t *rp; 1663 kthread_t *trp; 1664 kthread_t **ptp; 1665 int tpri; 1666 1667 ASSERT(THREAD_LOCK_HELD(tp)); 1668 1669 if (tp->t_state != TS_RUN) 1670 return (0); 1671 1672 /* 1673 * The thread is "swapped" or is on the swap queue and 1674 * hence no longer on the run queue, so return true. 1675 */ 1676 if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) 1677 return (1); 1678 1679 tpri = DISP_PRIO(tp); 1680 dp = tp->t_disp_queue; 1681 ASSERT(tpri < dp->disp_npri); 1682 dq = &dp->disp_q[tpri]; 1683 ptp = &dq->dq_first; 1684 rp = *ptp; 1685 trp = NULL; 1686 1687 ASSERT(dq->dq_last == NULL || dq->dq_last->t_link == NULL); 1688 1689 /* 1690 * Search for thread in queue. 1691 * Double links would simplify this at the expense of disp/setrun. 1692 */ 1693 while (rp != tp && rp != NULL) { 1694 trp = rp; 1695 ptp = &trp->t_link; 1696 rp = trp->t_link; 1697 } 1698 1699 if (rp == NULL) { 1700 panic("dispdeq: thread not on queue"); 1701 } 1702 1703 DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp); 1704 1705 /* 1706 * Found it so remove it from queue. 1707 */ 1708 if ((*ptp = rp->t_link) == NULL) 1709 dq->dq_last = trp; 1710 1711 dp->disp_nrunnable--; 1712 if (--dq->dq_sruncnt == 0) { 1713 dp->disp_qactmap[tpri >> BT_ULSHIFT] &= ~BT_BIW(tpri); 1714 if (dp->disp_nrunnable == 0) { 1715 dp->disp_max_unbound_pri = -1; 1716 dp->disp_maxrunpri = -1; 1717 } else if (tpri == dp->disp_maxrunpri) { 1718 int ipri; 1719 1720 ipri = bt_gethighbit(dp->disp_qactmap, 1721 dp->disp_maxrunpri >> BT_ULSHIFT); 1722 if (ipri < dp->disp_max_unbound_pri) 1723 dp->disp_max_unbound_pri = ipri; 1724 dp->disp_maxrunpri = ipri; 1725 } 1726 } 1727 tp->t_link = NULL; 1728 THREAD_TRANSITION(tp); /* put in intermediate state */ 1729 return (1); 1730 } 1731 1732 1733 /* 1734 * dq_sruninc and dq_srundec are public functions for 1735 * incrementing/decrementing the sruncnts when a thread on 1736 * a dispatcher queue is made schedulable/unschedulable by 1737 * resetting the TS_LOAD flag. 1738 * 1739 * The caller MUST have the thread lock and therefore the dispatcher 1740 * queue lock so that the operation which changes 1741 * the flag, the operation that checks the status of the thread to 1742 * determine if it's on a disp queue AND the call to this function 1743 * are one atomic operation with respect to interrupts. 1744 */ 1745 1746 /* 1747 * Called by sched AFTER TS_LOAD flag is set on a swapped, runnable thread. 1748 */ 1749 void 1750 dq_sruninc(kthread_t *t) 1751 { 1752 ASSERT(t->t_state == TS_RUN); 1753 ASSERT(t->t_schedflag & TS_LOAD); 1754 1755 THREAD_TRANSITION(t); 1756 setfrontdq(t); 1757 } 1758 1759 /* 1760 * See comment on calling conventions above. 1761 * Called by sched BEFORE TS_LOAD flag is cleared on a runnable thread. 1762 */ 1763 void 1764 dq_srundec(kthread_t *t) 1765 { 1766 ASSERT(t->t_schedflag & TS_LOAD); 1767 1768 (void) dispdeq(t); 1769 disp_swapped_enq(t); 1770 } 1771 1772 /* 1773 * Change the dispatcher lock of thread to the "swapped_lock" 1774 * and return with thread lock still held. 1775 * 1776 * Called with thread_lock held, in transition state, and at high spl. 1777 */ 1778 void 1779 disp_swapped_enq(kthread_t *tp) 1780 { 1781 ASSERT(THREAD_LOCK_HELD(tp)); 1782 ASSERT(tp->t_schedflag & TS_LOAD); 1783 1784 switch (tp->t_state) { 1785 case TS_RUN: 1786 disp_lock_enter_high(&swapped_lock); 1787 THREAD_SWAP(tp, &swapped_lock); /* set TS_RUN state and lock */ 1788 break; 1789 case TS_ONPROC: 1790 disp_lock_enter_high(&swapped_lock); 1791 THREAD_TRANSITION(tp); 1792 wake_sched_sec = 1; /* tell clock to wake sched */ 1793 THREAD_SWAP(tp, &swapped_lock); /* set TS_RUN state and lock */ 1794 break; 1795 default: 1796 panic("disp_swapped: tp: %p bad t_state", (void *)tp); 1797 } 1798 } 1799 1800 /* 1801 * This routine is called by setbackdq/setfrontdq if the thread is 1802 * not loaded or loaded and on the swap queue. 1803 * 1804 * Thread state TS_SLEEP implies that a swapped thread 1805 * has been woken up and needs to be swapped in by the swapper. 1806 * 1807 * Thread state TS_RUN, it implies that the priority of a swapped 1808 * thread is being increased by scheduling class (e.g. ts_update). 1809 */ 1810 static void 1811 disp_swapped_setrun(kthread_t *tp) 1812 { 1813 ASSERT(THREAD_LOCK_HELD(tp)); 1814 ASSERT((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD); 1815 1816 switch (tp->t_state) { 1817 case TS_SLEEP: 1818 disp_lock_enter_high(&swapped_lock); 1819 /* 1820 * Wakeup sched immediately (i.e., next tick) if the 1821 * thread priority is above maxclsyspri. 1822 */ 1823 if (DISP_PRIO(tp) > maxclsyspri) 1824 wake_sched = 1; 1825 else 1826 wake_sched_sec = 1; 1827 THREAD_RUN(tp, &swapped_lock); /* set TS_RUN state and lock */ 1828 break; 1829 case TS_RUN: /* called from ts_update */ 1830 break; 1831 default: 1832 panic("disp_swapped_setrun: tp: %p bad t_state", (void *)tp); 1833 } 1834 } 1835 1836 1837 /* 1838 * Make a thread give up its processor. Find the processor on 1839 * which this thread is executing, and have that processor 1840 * preempt. 1841 */ 1842 void 1843 cpu_surrender(kthread_t *tp) 1844 { 1845 cpu_t *cpup; 1846 int max_pri; 1847 int max_run_pri; 1848 klwp_t *lwp; 1849 1850 ASSERT(THREAD_LOCK_HELD(tp)); 1851 1852 if (tp->t_state != TS_ONPROC) 1853 return; 1854 cpup = tp->t_disp_queue->disp_cpu; /* CPU thread dispatched to */ 1855 max_pri = cpup->cpu_disp->disp_maxrunpri; /* best pri of that CPU */ 1856 max_run_pri = CP_MAXRUNPRI(cpup->cpu_part); 1857 if (max_pri < max_run_pri) 1858 max_pri = max_run_pri; 1859 1860 cpup->cpu_runrun = 1; 1861 if (max_pri >= kpreemptpri && cpup->cpu_kprunrun == 0) { 1862 cpup->cpu_kprunrun = 1; 1863 } 1864 1865 /* 1866 * Propagate cpu_runrun, and cpu_kprunrun to global visibility. 1867 */ 1868 membar_enter(); 1869 1870 DTRACE_SCHED1(surrender, kthread_t *, tp); 1871 1872 /* 1873 * Make the target thread take an excursion through trap() 1874 * to do preempt() (unless we're already in trap or post_syscall, 1875 * calling cpu_surrender via CL_TRAPRET). 1876 */ 1877 if (tp != curthread || (lwp = tp->t_lwp) == NULL || 1878 lwp->lwp_state != LWP_USER) { 1879 aston(tp); 1880 if (cpup != CPU) 1881 poke_cpu(cpup->cpu_id); 1882 } 1883 TRACE_2(TR_FAC_DISP, TR_CPU_SURRENDER, 1884 "cpu_surrender:tid %p cpu %p", tp, cpup); 1885 } 1886 1887 1888 /* 1889 * Commit to and ratify a scheduling decision 1890 */ 1891 /*ARGSUSED*/ 1892 static kthread_t * 1893 disp_ratify(kthread_t *tp, disp_t *kpq) 1894 { 1895 pri_t tpri, maxpri; 1896 pri_t maxkpri; 1897 cpu_t *cpup; 1898 1899 ASSERT(tp != NULL); 1900 /* 1901 * Commit to, then ratify scheduling decision 1902 */ 1903 cpup = CPU; 1904 if (cpup->cpu_runrun != 0) 1905 cpup->cpu_runrun = 0; 1906 if (cpup->cpu_kprunrun != 0) 1907 cpup->cpu_kprunrun = 0; 1908 if (cpup->cpu_chosen_level != -1) 1909 cpup->cpu_chosen_level = -1; 1910 membar_enter(); 1911 tpri = DISP_PRIO(tp); 1912 maxpri = cpup->cpu_disp->disp_maxrunpri; 1913 maxkpri = kpq->disp_maxrunpri; 1914 if (maxpri < maxkpri) 1915 maxpri = maxkpri; 1916 if (tpri < maxpri) { 1917 /* 1918 * should have done better 1919 * put this one back and indicate to try again 1920 */ 1921 cpup->cpu_dispthread = curthread; /* fixup dispthread */ 1922 cpup->cpu_dispatch_pri = DISP_PRIO(curthread); 1923 thread_lock_high(tp); 1924 THREAD_TRANSITION(tp); 1925 setfrontdq(tp); 1926 thread_unlock_nopreempt(tp); 1927 1928 tp = NULL; 1929 } 1930 return (tp); 1931 } 1932 1933 /* 1934 * See if there is any work on the dispatcher queue for other CPUs. 1935 * If there is, dequeue the best thread and return. 1936 */ 1937 static kthread_t * 1938 disp_getwork(cpu_t *cp) 1939 { 1940 cpu_t *ocp; /* other CPU */ 1941 cpu_t *ocp_start; 1942 cpu_t *tcp; /* target local CPU */ 1943 kthread_t *tp; 1944 kthread_t *retval = NULL; 1945 pri_t maxpri; 1946 disp_t *kpq; /* kp queue for this partition */ 1947 lpl_t *lpl, *lpl_leaf; 1948 int hint, leafidx; 1949 hrtime_t stealtime; 1950 1951 maxpri = -1; 1952 tcp = NULL; 1953 1954 kpq = &cp->cpu_part->cp_kp_queue; 1955 while (kpq->disp_maxrunpri >= 0) { 1956 /* 1957 * Try to take a thread from the kp_queue. 1958 */ 1959 tp = (disp_getbest(kpq)); 1960 if (tp) 1961 return (disp_ratify(tp, kpq)); 1962 } 1963 1964 kpreempt_disable(); /* protect the cpu_active list */ 1965 1966 /* 1967 * Try to find something to do on another CPU's run queue. 1968 * Loop through all other CPUs looking for the one with the highest 1969 * priority unbound thread. 1970 * 1971 * On NUMA machines, the partition's CPUs are consulted in order of 1972 * distance from the current CPU. This way, the first available 1973 * work found is also the closest, and will suffer the least 1974 * from being migrated. 1975 */ 1976 lpl = lpl_leaf = cp->cpu_lpl; 1977 hint = leafidx = 0; 1978 1979 /* 1980 * This loop traverses the lpl hierarchy. Higher level lpls represent 1981 * broader levels of locality 1982 */ 1983 do { 1984 /* This loop iterates over the lpl's leaves */ 1985 do { 1986 if (lpl_leaf != cp->cpu_lpl) 1987 ocp = lpl_leaf->lpl_cpus; 1988 else 1989 ocp = cp->cpu_next_lpl; 1990 1991 /* This loop iterates over the CPUs in the leaf */ 1992 ocp_start = ocp; 1993 do { 1994 pri_t pri; 1995 1996 ASSERT(CPU_ACTIVE(ocp)); 1997 1998 /* 1999 * End our stroll around this lpl if: 2000 * 2001 * - Something became runnable on the local 2002 * queue...which also ends our stroll around 2003 * the partition. 2004 * 2005 * - We happen across another idle CPU. 2006 * Since it is patrolling the next portion 2007 * of the lpl's list (assuming it's not 2008 * halted), move to the next higher level 2009 * of locality. 2010 */ 2011 if (cp->cpu_disp->disp_nrunnable != 0) { 2012 kpreempt_enable(); 2013 return (NULL); 2014 } 2015 if (ocp->cpu_dispatch_pri == -1) { 2016 if (ocp->cpu_disp_flags & 2017 CPU_DISP_HALTED) 2018 continue; 2019 else 2020 break; 2021 } 2022 2023 /* 2024 * If there's only one thread and the CPU 2025 * is in the middle of a context switch, 2026 * or it's currently running the idle thread, 2027 * don't steal it. 2028 */ 2029 if ((ocp->cpu_disp_flags & 2030 CPU_DISP_DONTSTEAL) && 2031 ocp->cpu_disp->disp_nrunnable == 1) 2032 continue; 2033 2034 pri = ocp->cpu_disp->disp_max_unbound_pri; 2035 if (pri > maxpri) { 2036 /* 2037 * Don't steal threads that we attempted 2038 * to steal recently until they're ready 2039 * to be stolen again. 2040 */ 2041 stealtime = ocp->cpu_disp->disp_steal; 2042 if (stealtime == 0 || 2043 stealtime - gethrtime() <= 0) { 2044 maxpri = pri; 2045 tcp = ocp; 2046 } else { 2047 /* 2048 * Don't update tcp, just set 2049 * the retval to T_DONTSTEAL, so 2050 * that if no acceptable CPUs 2051 * are found the return value 2052 * will be T_DONTSTEAL rather 2053 * then NULL. 2054 */ 2055 retval = T_DONTSTEAL; 2056 } 2057 } 2058 } while ((ocp = ocp->cpu_next_lpl) != ocp_start); 2059 2060 if ((lpl_leaf = lpl->lpl_rset[++leafidx]) == NULL) { 2061 leafidx = 0; 2062 lpl_leaf = lpl->lpl_rset[leafidx]; 2063 } 2064 } while (leafidx != hint); 2065 2066 hint = leafidx = lpl->lpl_hint; 2067 if ((lpl = lpl->lpl_parent) != NULL) 2068 lpl_leaf = lpl->lpl_rset[hint]; 2069 } while (!tcp && lpl); 2070 2071 kpreempt_enable(); 2072 2073 /* 2074 * If another queue looks good, and there is still nothing on 2075 * the local queue, try to transfer one or more threads 2076 * from it to our queue. 2077 */ 2078 if (tcp && cp->cpu_disp->disp_nrunnable == 0) { 2079 tp = disp_getbest(tcp->cpu_disp); 2080 if (tp == NULL || tp == T_DONTSTEAL) 2081 return (tp); 2082 return (disp_ratify(tp, kpq)); 2083 } 2084 return (retval); 2085 } 2086 2087 2088 /* 2089 * disp_fix_unbound_pri() 2090 * Determines the maximum priority of unbound threads on the queue. 2091 * The priority is kept for the queue, but is only increased, never 2092 * reduced unless some CPU is looking for something on that queue. 2093 * 2094 * The priority argument is the known upper limit. 2095 * 2096 * Perhaps this should be kept accurately, but that probably means 2097 * separate bitmaps for bound and unbound threads. Since only idled 2098 * CPUs will have to do this recalculation, it seems better this way. 2099 */ 2100 static void 2101 disp_fix_unbound_pri(disp_t *dp, pri_t pri) 2102 { 2103 kthread_t *tp; 2104 dispq_t *dq; 2105 ulong_t *dqactmap = dp->disp_qactmap; 2106 ulong_t mapword; 2107 int wx; 2108 2109 ASSERT(DISP_LOCK_HELD(&dp->disp_lock)); 2110 2111 ASSERT(pri >= 0); /* checked by caller */ 2112 2113 /* 2114 * Start the search at the next lowest priority below the supplied 2115 * priority. This depends on the bitmap implementation. 2116 */ 2117 do { 2118 wx = pri >> BT_ULSHIFT; /* index of word in map */ 2119 2120 /* 2121 * Form mask for all lower priorities in the word. 2122 */ 2123 mapword = dqactmap[wx] & (BT_BIW(pri) - 1); 2124 2125 /* 2126 * Get next lower active priority. 2127 */ 2128 if (mapword != 0) { 2129 pri = (wx << BT_ULSHIFT) + highbit(mapword) - 1; 2130 } else if (wx > 0) { 2131 pri = bt_gethighbit(dqactmap, wx - 1); /* sign extend */ 2132 if (pri < 0) 2133 break; 2134 } else { 2135 pri = -1; 2136 break; 2137 } 2138 2139 /* 2140 * Search the queue for unbound, runnable threads. 2141 */ 2142 dq = &dp->disp_q[pri]; 2143 tp = dq->dq_first; 2144 2145 while (tp && (tp->t_bound_cpu || tp->t_weakbound_cpu)) { 2146 tp = tp->t_link; 2147 } 2148 2149 /* 2150 * If a thread was found, set the priority and return. 2151 */ 2152 } while (tp == NULL); 2153 2154 /* 2155 * pri holds the maximum unbound thread priority or -1. 2156 */ 2157 if (dp->disp_max_unbound_pri != pri) 2158 dp->disp_max_unbound_pri = pri; 2159 } 2160 2161 /* 2162 * disp_adjust_unbound_pri() - thread is becoming unbound, so we should 2163 * check if the CPU to which is was previously bound should have 2164 * its disp_max_unbound_pri increased. 2165 */ 2166 void 2167 disp_adjust_unbound_pri(kthread_t *tp) 2168 { 2169 disp_t *dp; 2170 pri_t tpri; 2171 2172 ASSERT(THREAD_LOCK_HELD(tp)); 2173 2174 /* 2175 * Don't do anything if the thread is not bound, or 2176 * currently not runnable or swapped out. 2177 */ 2178 if (tp->t_bound_cpu == NULL || 2179 tp->t_state != TS_RUN || 2180 tp->t_schedflag & TS_ON_SWAPQ) 2181 return; 2182 2183 tpri = DISP_PRIO(tp); 2184 dp = tp->t_bound_cpu->cpu_disp; 2185 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 2186 if (tpri > dp->disp_max_unbound_pri) 2187 dp->disp_max_unbound_pri = tpri; 2188 } 2189 2190 /* 2191 * disp_getbest() 2192 * De-queue the highest priority unbound runnable thread. 2193 * Returns with the thread unlocked and onproc but at splhigh (like disp()). 2194 * Returns NULL if nothing found. 2195 * Returns T_DONTSTEAL if the thread was not stealable. 2196 * so that the caller will try again later. 2197 * 2198 * Passed a pointer to a dispatch queue not associated with this CPU, and 2199 * its type. 2200 */ 2201 static kthread_t * 2202 disp_getbest(disp_t *dp) 2203 { 2204 kthread_t *tp; 2205 dispq_t *dq; 2206 pri_t pri; 2207 cpu_t *cp, *tcp; 2208 boolean_t allbound; 2209 2210 disp_lock_enter(&dp->disp_lock); 2211 2212 /* 2213 * If there is nothing to run, or the CPU is in the middle of a 2214 * context switch of the only thread, return NULL. 2215 */ 2216 tcp = dp->disp_cpu; 2217 cp = CPU; 2218 pri = dp->disp_max_unbound_pri; 2219 if (pri == -1 || 2220 (tcp != NULL && (tcp->cpu_disp_flags & CPU_DISP_DONTSTEAL) && 2221 tcp->cpu_disp->disp_nrunnable == 1)) { 2222 disp_lock_exit_nopreempt(&dp->disp_lock); 2223 return (NULL); 2224 } 2225 2226 dq = &dp->disp_q[pri]; 2227 2228 2229 /* 2230 * Assume that all threads are bound on this queue, and change it 2231 * later when we find out that it is not the case. 2232 */ 2233 allbound = B_TRUE; 2234 for (tp = dq->dq_first; tp != NULL; tp = tp->t_link) { 2235 hrtime_t now, nosteal, rqtime; 2236 2237 /* 2238 * Skip over bound threads which could be here even 2239 * though disp_max_unbound_pri indicated this level. 2240 */ 2241 if (tp->t_bound_cpu || tp->t_weakbound_cpu) 2242 continue; 2243 2244 /* 2245 * We've got some unbound threads on this queue, so turn 2246 * the allbound flag off now. 2247 */ 2248 allbound = B_FALSE; 2249 2250 /* 2251 * The thread is a candidate for stealing from its run queue. We 2252 * don't want to steal threads that became runnable just a 2253 * moment ago. This improves CPU affinity for threads that get 2254 * preempted for short periods of time and go back on the run 2255 * queue. 2256 * 2257 * We want to let it stay on its run queue if it was only placed 2258 * there recently and it was running on the same CPU before that 2259 * to preserve its cache investment. For the thread to remain on 2260 * its run queue, ALL of the following conditions must be 2261 * satisfied: 2262 * 2263 * - the disp queue should not be the kernel preemption queue 2264 * - delayed idle stealing should not be disabled 2265 * - nosteal_nsec should be non-zero 2266 * - it should run with user priority 2267 * - it should be on the run queue of the CPU where it was 2268 * running before being placed on the run queue 2269 * - it should be the only thread on the run queue (to prevent 2270 * extra scheduling latency for other threads) 2271 * - it should sit on the run queue for less than per-chip 2272 * nosteal interval or global nosteal interval 2273 * - in case of CPUs with shared cache it should sit in a run 2274 * queue of a CPU from a different chip 2275 * 2276 * The checks are arranged so that the ones that are faster are 2277 * placed earlier. 2278 */ 2279 if (tcp == NULL || 2280 pri >= minclsyspri || 2281 tp->t_cpu != tcp) 2282 break; 2283 2284 /* 2285 * Steal immediately if, due to CMT processor architecture 2286 * migraiton between cp and tcp would incur no performance 2287 * penalty. 2288 */ 2289 if (pg_cmt_can_migrate(cp, tcp)) 2290 break; 2291 2292 nosteal = nosteal_nsec; 2293 if (nosteal == 0) 2294 break; 2295 2296 /* 2297 * Calculate time spent sitting on run queue 2298 */ 2299 now = gethrtime_unscaled(); 2300 rqtime = now - tp->t_waitrq; 2301 scalehrtime(&rqtime); 2302 2303 /* 2304 * Steal immediately if the time spent on this run queue is more 2305 * than allowed nosteal delay. 2306 * 2307 * Negative rqtime check is needed here to avoid infinite 2308 * stealing delays caused by unlikely but not impossible 2309 * drifts between CPU times on different CPUs. 2310 */ 2311 if (rqtime > nosteal || rqtime < 0) 2312 break; 2313 2314 DTRACE_PROBE4(nosteal, kthread_t *, tp, 2315 cpu_t *, tcp, cpu_t *, cp, hrtime_t, rqtime); 2316 scalehrtime(&now); 2317 /* 2318 * Calculate when this thread becomes stealable 2319 */ 2320 now += (nosteal - rqtime); 2321 2322 /* 2323 * Calculate time when some thread becomes stealable 2324 */ 2325 if (now < dp->disp_steal) 2326 dp->disp_steal = now; 2327 } 2328 2329 /* 2330 * If there were no unbound threads on this queue, find the queue 2331 * where they are and then return later. The value of 2332 * disp_max_unbound_pri is not always accurate because it isn't 2333 * reduced until another idle CPU looks for work. 2334 */ 2335 if (allbound) 2336 disp_fix_unbound_pri(dp, pri); 2337 2338 /* 2339 * If we reached the end of the queue and found no unbound threads 2340 * then return NULL so that other CPUs will be considered. If there 2341 * are unbound threads but they cannot yet be stolen, then 2342 * return T_DONTSTEAL and try again later. 2343 */ 2344 if (tp == NULL) { 2345 disp_lock_exit_nopreempt(&dp->disp_lock); 2346 return (allbound ? NULL : T_DONTSTEAL); 2347 } 2348 2349 /* 2350 * Found a runnable, unbound thread, so remove it from queue. 2351 * dispdeq() requires that we have the thread locked, and we do, 2352 * by virtue of holding the dispatch queue lock. dispdeq() will 2353 * put the thread in transition state, thereby dropping the dispq 2354 * lock. 2355 */ 2356 2357 #ifdef DEBUG 2358 { 2359 int thread_was_on_queue; 2360 2361 thread_was_on_queue = dispdeq(tp); /* drops disp_lock */ 2362 ASSERT(thread_was_on_queue); 2363 } 2364 2365 #else /* DEBUG */ 2366 (void) dispdeq(tp); /* drops disp_lock */ 2367 #endif /* DEBUG */ 2368 2369 /* 2370 * Reset the disp_queue steal time - we do not know what is the smallest 2371 * value across the queue is. 2372 */ 2373 dp->disp_steal = 0; 2374 2375 tp->t_schedflag |= TS_DONT_SWAP; 2376 2377 /* 2378 * Setup thread to run on the current CPU. 2379 */ 2380 tp->t_disp_queue = cp->cpu_disp; 2381 2382 cp->cpu_dispthread = tp; /* protected by spl only */ 2383 cp->cpu_dispatch_pri = pri; 2384 2385 /* 2386 * There can be a memory synchronization race between disp_getbest() 2387 * and disp_ratify() vs cpu_resched() where cpu_resched() is trying 2388 * to preempt the current thread to run the enqueued thread while 2389 * disp_getbest() and disp_ratify() are changing the current thread 2390 * to the stolen thread. This may lead to a situation where 2391 * cpu_resched() tries to preempt the wrong thread and the 2392 * stolen thread continues to run on the CPU which has been tagged 2393 * for preemption. 2394 * Later the clock thread gets enqueued but doesn't get to run on the 2395 * CPU causing the system to hang. 2396 * 2397 * To avoid this, grabbing and dropping the disp_lock (which does 2398 * a memory barrier) is needed to synchronize the execution of 2399 * cpu_resched() with disp_getbest() and disp_ratify() and 2400 * synchronize the memory read and written by cpu_resched(), 2401 * disp_getbest(), and disp_ratify() with each other. 2402 * (see CR#6482861 for more details). 2403 */ 2404 disp_lock_enter_high(&cp->cpu_disp->disp_lock); 2405 disp_lock_exit_high(&cp->cpu_disp->disp_lock); 2406 2407 ASSERT(pri == DISP_PRIO(tp)); 2408 2409 DTRACE_PROBE3(steal, kthread_t *, tp, cpu_t *, tcp, cpu_t *, cp); 2410 2411 thread_onproc(tp, cp); /* set t_state to TS_ONPROC */ 2412 2413 /* 2414 * Return with spl high so that swtch() won't need to raise it. 2415 * The disp_lock was dropped by dispdeq(). 2416 */ 2417 2418 return (tp); 2419 } 2420 2421 /* 2422 * disp_bound_common() - common routine for higher level functions 2423 * that check for bound threads under certain conditions. 2424 * If 'threadlistsafe' is set then there is no need to acquire 2425 * pidlock to stop the thread list from changing (eg, if 2426 * disp_bound_* is called with cpus paused). 2427 */ 2428 static int 2429 disp_bound_common(cpu_t *cp, int threadlistsafe, int flag) 2430 { 2431 int found = 0; 2432 kthread_t *tp; 2433 2434 ASSERT(flag); 2435 2436 if (!threadlistsafe) 2437 mutex_enter(&pidlock); 2438 tp = curthread; /* faster than allthreads */ 2439 do { 2440 if (tp->t_state != TS_FREE) { 2441 /* 2442 * If an interrupt thread is busy, but the 2443 * caller doesn't care (i.e. BOUND_INTR is off), 2444 * then just ignore it and continue through. 2445 */ 2446 if ((tp->t_flag & T_INTR_THREAD) && 2447 !(flag & BOUND_INTR)) 2448 continue; 2449 2450 /* 2451 * Skip the idle thread for the CPU 2452 * we're about to set offline. 2453 */ 2454 if (tp == cp->cpu_idle_thread) 2455 continue; 2456 2457 /* 2458 * Skip the pause thread for the CPU 2459 * we're about to set offline. 2460 */ 2461 if (tp == cp->cpu_pause_thread) 2462 continue; 2463 2464 if ((flag & BOUND_CPU) && 2465 (tp->t_bound_cpu == cp || 2466 tp->t_bind_cpu == cp->cpu_id || 2467 tp->t_weakbound_cpu == cp)) { 2468 found = 1; 2469 break; 2470 } 2471 2472 if ((flag & BOUND_PARTITION) && 2473 (tp->t_cpupart == cp->cpu_part)) { 2474 found = 1; 2475 break; 2476 } 2477 } 2478 } while ((tp = tp->t_next) != curthread && found == 0); 2479 if (!threadlistsafe) 2480 mutex_exit(&pidlock); 2481 return (found); 2482 } 2483 2484 /* 2485 * disp_bound_threads - return nonzero if threads are bound to the processor. 2486 * Called infrequently. Keep this simple. 2487 * Includes threads that are asleep or stopped but not onproc. 2488 */ 2489 int 2490 disp_bound_threads(cpu_t *cp, int threadlistsafe) 2491 { 2492 return (disp_bound_common(cp, threadlistsafe, BOUND_CPU)); 2493 } 2494 2495 /* 2496 * disp_bound_anythreads - return nonzero if _any_ threads are bound 2497 * to the given processor, including interrupt threads. 2498 */ 2499 int 2500 disp_bound_anythreads(cpu_t *cp, int threadlistsafe) 2501 { 2502 return (disp_bound_common(cp, threadlistsafe, BOUND_CPU | BOUND_INTR)); 2503 } 2504 2505 /* 2506 * disp_bound_partition - return nonzero if threads are bound to the same 2507 * partition as the processor. 2508 * Called infrequently. Keep this simple. 2509 * Includes threads that are asleep or stopped but not onproc. 2510 */ 2511 int 2512 disp_bound_partition(cpu_t *cp, int threadlistsafe) 2513 { 2514 return (disp_bound_common(cp, threadlistsafe, BOUND_PARTITION)); 2515 } 2516 2517 /* 2518 * disp_cpu_inactive - make a CPU inactive by moving all of its unbound 2519 * threads to other CPUs. 2520 */ 2521 void 2522 disp_cpu_inactive(cpu_t *cp) 2523 { 2524 kthread_t *tp; 2525 disp_t *dp = cp->cpu_disp; 2526 dispq_t *dq; 2527 pri_t pri; 2528 int wasonq; 2529 2530 disp_lock_enter(&dp->disp_lock); 2531 while ((pri = dp->disp_max_unbound_pri) != -1) { 2532 dq = &dp->disp_q[pri]; 2533 tp = dq->dq_first; 2534 2535 /* 2536 * Skip over bound threads. 2537 */ 2538 while (tp != NULL && tp->t_bound_cpu != NULL) { 2539 tp = tp->t_link; 2540 } 2541 2542 if (tp == NULL) { 2543 /* disp_max_unbound_pri must be inaccurate, so fix it */ 2544 disp_fix_unbound_pri(dp, pri); 2545 continue; 2546 } 2547 2548 wasonq = dispdeq(tp); /* drops disp_lock */ 2549 ASSERT(wasonq); 2550 ASSERT(tp->t_weakbound_cpu == NULL); 2551 2552 setbackdq(tp); 2553 /* 2554 * Called from cpu_offline: 2555 * 2556 * cp has already been removed from the list of active cpus 2557 * and tp->t_cpu has been changed so there is no risk of 2558 * tp ending up back on cp. 2559 * 2560 * Called from cpupart_move_cpu: 2561 * 2562 * The cpu has moved to a new cpupart. Any threads that 2563 * were on it's dispatch queues before the move remain 2564 * in the old partition and can't run in the new partition. 2565 */ 2566 ASSERT(tp->t_cpu != cp); 2567 thread_unlock(tp); 2568 2569 disp_lock_enter(&dp->disp_lock); 2570 } 2571 disp_lock_exit(&dp->disp_lock); 2572 } 2573 2574 /* 2575 * disp_lowpri_cpu - find CPU running the lowest priority thread. 2576 * The hint passed in is used as a starting point so we don't favor 2577 * CPU 0 or any other CPU. The caller should pass in the most recently 2578 * used CPU for the thread. 2579 * 2580 * The lgroup and priority are used to determine the best CPU to run on 2581 * in a NUMA machine. The lgroup specifies which CPUs are closest while 2582 * the thread priority will indicate whether the thread will actually run 2583 * there. To pick the best CPU, the CPUs inside and outside of the given 2584 * lgroup which are running the lowest priority threads are found. The 2585 * remote CPU is chosen only if the thread will not run locally on a CPU 2586 * within the lgroup, but will run on the remote CPU. If the thread 2587 * cannot immediately run on any CPU, the best local CPU will be chosen. 2588 * 2589 * The lpl specified also identifies the cpu partition from which 2590 * disp_lowpri_cpu should select a CPU. 2591 * 2592 * curcpu is used to indicate that disp_lowpri_cpu is being called on 2593 * behalf of the current thread. (curthread is looking for a new cpu) 2594 * In this case, cpu_dispatch_pri for this thread's cpu should be 2595 * ignored. 2596 * 2597 * If a cpu is the target of an offline request then try to avoid it. 2598 * 2599 * This function must be called at either high SPL, or with preemption 2600 * disabled, so that the "hint" CPU cannot be removed from the online 2601 * CPU list while we are traversing it. 2602 */ 2603 cpu_t * 2604 disp_lowpri_cpu(cpu_t *hint, lpl_t *lpl, pri_t tpri, cpu_t *curcpu) 2605 { 2606 cpu_t *bestcpu; 2607 cpu_t *besthomecpu; 2608 cpu_t *cp, *cpstart; 2609 2610 pri_t bestpri; 2611 pri_t cpupri; 2612 2613 klgrpset_t done; 2614 klgrpset_t cur_set; 2615 2616 lpl_t *lpl_iter, *lpl_leaf; 2617 int i; 2618 2619 /* 2620 * Scan for a CPU currently running the lowest priority thread. 2621 * Cannot get cpu_lock here because it is adaptive. 2622 * We do not require lock on CPU list. 2623 */ 2624 ASSERT(hint != NULL); 2625 ASSERT(lpl != NULL); 2626 ASSERT(lpl->lpl_ncpu > 0); 2627 2628 /* 2629 * First examine local CPUs. Note that it's possible the hint CPU 2630 * passed in in remote to the specified home lgroup. If our priority 2631 * isn't sufficient enough such that we can run immediately at home, 2632 * then examine CPUs remote to our home lgroup. 2633 * We would like to give preference to CPUs closest to "home". 2634 * If we can't find a CPU where we'll run at a given level 2635 * of locality, we expand our search to include the next level. 2636 */ 2637 bestcpu = besthomecpu = NULL; 2638 klgrpset_clear(done); 2639 /* start with lpl we were passed */ 2640 2641 lpl_iter = lpl; 2642 2643 do { 2644 2645 bestpri = SHRT_MAX; 2646 klgrpset_clear(cur_set); 2647 2648 for (i = 0; i < lpl_iter->lpl_nrset; i++) { 2649 lpl_leaf = lpl_iter->lpl_rset[i]; 2650 if (klgrpset_ismember(done, lpl_leaf->lpl_lgrpid)) 2651 continue; 2652 2653 klgrpset_add(cur_set, lpl_leaf->lpl_lgrpid); 2654 2655 if (hint->cpu_lpl == lpl_leaf) 2656 cp = cpstart = hint; 2657 else 2658 cp = cpstart = lpl_leaf->lpl_cpus; 2659 2660 do { 2661 if (cp == curcpu) 2662 cpupri = -1; 2663 else if (cp == cpu_inmotion) 2664 cpupri = SHRT_MAX; 2665 else 2666 cpupri = cp->cpu_dispatch_pri; 2667 if (cp->cpu_disp->disp_maxrunpri > cpupri) 2668 cpupri = cp->cpu_disp->disp_maxrunpri; 2669 if (cp->cpu_chosen_level > cpupri) 2670 cpupri = cp->cpu_chosen_level; 2671 if (cpupri < bestpri) { 2672 if (CPU_IDLING(cpupri)) { 2673 ASSERT((cp->cpu_flags & 2674 CPU_QUIESCED) == 0); 2675 return (cp); 2676 } 2677 bestcpu = cp; 2678 bestpri = cpupri; 2679 } 2680 } while ((cp = cp->cpu_next_lpl) != cpstart); 2681 } 2682 2683 if (bestcpu && (tpri > bestpri)) { 2684 ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0); 2685 return (bestcpu); 2686 } 2687 if (besthomecpu == NULL) 2688 besthomecpu = bestcpu; 2689 /* 2690 * Add the lgrps we just considered to the "done" set 2691 */ 2692 klgrpset_or(done, cur_set); 2693 2694 } while ((lpl_iter = lpl_iter->lpl_parent) != NULL); 2695 2696 /* 2697 * The specified priority isn't high enough to run immediately 2698 * anywhere, so just return the best CPU from the home lgroup. 2699 */ 2700 ASSERT((besthomecpu->cpu_flags & CPU_QUIESCED) == 0); 2701 return (besthomecpu); 2702 } 2703 2704 /* 2705 * This routine provides the generic idle cpu function for all processors. 2706 * If a processor has some specific code to execute when idle (say, to stop 2707 * the pipeline and save power) then that routine should be defined in the 2708 * processors specific code (module_xx.c) and the global variable idle_cpu 2709 * set to that function. 2710 */ 2711 static void 2712 generic_idle_cpu(void) 2713 { 2714 } 2715 2716 /*ARGSUSED*/ 2717 static void 2718 generic_enq_thread(cpu_t *cpu, int bound) 2719 { 2720 } 2721 2722 /* 2723 * Select a CPU for this thread to run on. Choose t->t_cpu unless: 2724 * - t->t_cpu is not in this thread's assigned lgrp 2725 * - the time since the thread last came off t->t_cpu exceeds the 2726 * rechoose time for this cpu (ignore this if t is curthread in 2727 * which case it's on CPU and t->t_disp_time is inaccurate) 2728 * - t->t_cpu is presently the target of an offline or partition move 2729 * request 2730 */ 2731 static cpu_t * 2732 cpu_choose(kthread_t *t, pri_t tpri) 2733 { 2734 ASSERT(tpri < kpqpri); 2735 2736 if ((((lbolt - t->t_disp_time) > rechoose_interval) && 2737 t != curthread) || t->t_cpu == cpu_inmotion) { 2738 return (disp_lowpri_cpu(t->t_cpu, t->t_lpl, tpri, NULL)); 2739 } 2740 2741 /* 2742 * Take a trip through disp_lowpri_cpu() if the thread was 2743 * running outside it's home lgroup 2744 */ 2745 if (!klgrpset_ismember(t->t_lpl->lpl_lgrp->lgrp_set[LGRP_RSRC_CPU], 2746 t->t_cpu->cpu_lpl->lpl_lgrpid)) { 2747 return (disp_lowpri_cpu(t->t_cpu, t->t_lpl, tpri, 2748 (t == curthread) ? t->t_cpu : NULL)); 2749 } 2750 return (t->t_cpu); 2751 } 2752