1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/sysmacros.h> 33 #include <sys/signal.h> 34 #include <sys/user.h> 35 #include <sys/systm.h> 36 #include <sys/sysinfo.h> 37 #include <sys/var.h> 38 #include <sys/errno.h> 39 #include <sys/cmn_err.h> 40 #include <sys/debug.h> 41 #include <sys/inline.h> 42 #include <sys/disp.h> 43 #include <sys/class.h> 44 #include <sys/bitmap.h> 45 #include <sys/kmem.h> 46 #include <sys/cpuvar.h> 47 #include <sys/vtrace.h> 48 #include <sys/tnf.h> 49 #include <sys/cpupart.h> 50 #include <sys/lgrp.h> 51 #include <sys/pg.h> 52 #include <sys/cmt.h> 53 #include <sys/bitset.h> 54 #include <sys/schedctl.h> 55 #include <sys/atomic.h> 56 #include <sys/dtrace.h> 57 #include <sys/sdt.h> 58 #include <sys/archsystm.h> 59 60 #include <vm/as.h> 61 62 #define BOUND_CPU 0x1 63 #define BOUND_PARTITION 0x2 64 #define BOUND_INTR 0x4 65 66 /* Dispatch queue allocation structure and functions */ 67 struct disp_queue_info { 68 disp_t *dp; 69 dispq_t *olddispq; 70 dispq_t *newdispq; 71 ulong_t *olddqactmap; 72 ulong_t *newdqactmap; 73 int oldnglobpris; 74 }; 75 static void disp_dq_alloc(struct disp_queue_info *dptr, int numpris, 76 disp_t *dp); 77 static void disp_dq_assign(struct disp_queue_info *dptr, int numpris); 78 static void disp_dq_free(struct disp_queue_info *dptr); 79 80 /* platform-specific routine to call when processor is idle */ 81 static void generic_idle_cpu(); 82 void (*idle_cpu)() = generic_idle_cpu; 83 84 /* routines invoked when a CPU enters/exits the idle loop */ 85 static void idle_enter(); 86 static void idle_exit(); 87 88 /* platform-specific routine to call when thread is enqueued */ 89 static void generic_enq_thread(cpu_t *, int); 90 void (*disp_enq_thread)(cpu_t *, int) = generic_enq_thread; 91 92 pri_t kpreemptpri; /* priority where kernel preemption applies */ 93 pri_t upreemptpri = 0; /* priority where normal preemption applies */ 94 pri_t intr_pri; /* interrupt thread priority base level */ 95 96 #define KPQPRI -1 /* pri where cpu affinity is dropped for kpq */ 97 pri_t kpqpri = KPQPRI; /* can be set in /etc/system */ 98 disp_t cpu0_disp; /* boot CPU's dispatch queue */ 99 disp_lock_t swapped_lock; /* lock swapped threads and swap queue */ 100 int nswapped; /* total number of swapped threads */ 101 void disp_swapped_enq(kthread_t *tp); 102 static void disp_swapped_setrun(kthread_t *tp); 103 static void cpu_resched(cpu_t *cp, pri_t tpri); 104 105 /* 106 * If this is set, only interrupt threads will cause kernel preemptions. 107 * This is done by changing the value of kpreemptpri. kpreemptpri 108 * will either be the max sysclass pri or the min interrupt pri. 109 */ 110 int only_intr_kpreempt; 111 112 extern void set_idle_cpu(int cpun); 113 extern void unset_idle_cpu(int cpun); 114 static void setkpdq(kthread_t *tp, int borf); 115 #define SETKP_BACK 0 116 #define SETKP_FRONT 1 117 /* 118 * Parameter that determines how recently a thread must have run 119 * on the CPU to be considered loosely-bound to that CPU to reduce 120 * cold cache effects. The interval is in hertz. 121 */ 122 #define RECHOOSE_INTERVAL 3 123 int rechoose_interval = RECHOOSE_INTERVAL; 124 125 /* 126 * Parameter that determines how long (in nanoseconds) a thread must 127 * be sitting on a run queue before it can be stolen by another CPU 128 * to reduce migrations. The interval is in nanoseconds. 129 * 130 * The nosteal_nsec should be set by platform code cmp_set_nosteal_interval() 131 * to an appropriate value. nosteal_nsec is set to NOSTEAL_UNINITIALIZED 132 * here indicating it is uninitiallized. 133 * Setting nosteal_nsec to 0 effectively disables the nosteal 'protection'. 134 * 135 */ 136 #define NOSTEAL_UNINITIALIZED (-1) 137 hrtime_t nosteal_nsec = NOSTEAL_UNINITIALIZED; 138 extern void cmp_set_nosteal_interval(void); 139 140 id_t defaultcid; /* system "default" class; see dispadmin(1M) */ 141 142 disp_lock_t transition_lock; /* lock on transitioning threads */ 143 disp_lock_t stop_lock; /* lock on stopped threads */ 144 145 static void cpu_dispqalloc(int numpris); 146 147 /* 148 * This gets returned by disp_getwork/disp_getbest if we couldn't steal 149 * a thread because it was sitting on its run queue for a very short 150 * period of time. 151 */ 152 #define T_DONTSTEAL (kthread_t *)(-1) /* returned by disp_getwork/getbest */ 153 154 static kthread_t *disp_getwork(cpu_t *to); 155 static kthread_t *disp_getbest(disp_t *from); 156 static kthread_t *disp_ratify(kthread_t *tp, disp_t *kpq); 157 158 void swtch_to(kthread_t *); 159 160 /* 161 * dispatcher and scheduler initialization 162 */ 163 164 /* 165 * disp_setup - Common code to calculate and allocate dispatcher 166 * variables and structures based on the maximum priority. 167 */ 168 static void 169 disp_setup(pri_t maxglobpri, pri_t oldnglobpris) 170 { 171 pri_t newnglobpris; 172 173 ASSERT(MUTEX_HELD(&cpu_lock)); 174 175 newnglobpris = maxglobpri + 1 + LOCK_LEVEL; 176 177 if (newnglobpris > oldnglobpris) { 178 /* 179 * Allocate new kp queues for each CPU partition. 180 */ 181 cpupart_kpqalloc(newnglobpris); 182 183 /* 184 * Allocate new dispatch queues for each CPU. 185 */ 186 cpu_dispqalloc(newnglobpris); 187 188 /* 189 * compute new interrupt thread base priority 190 */ 191 intr_pri = maxglobpri; 192 if (only_intr_kpreempt) { 193 kpreemptpri = intr_pri + 1; 194 if (kpqpri == KPQPRI) 195 kpqpri = kpreemptpri; 196 } 197 v.v_nglobpris = newnglobpris; 198 } 199 } 200 201 /* 202 * dispinit - Called to initialize all loaded classes and the 203 * dispatcher framework. 204 */ 205 void 206 dispinit(void) 207 { 208 id_t cid; 209 pri_t maxglobpri; 210 pri_t cl_maxglobpri; 211 212 maxglobpri = -1; 213 214 /* 215 * Initialize transition lock, which will always be set. 216 */ 217 DISP_LOCK_INIT(&transition_lock); 218 disp_lock_enter_high(&transition_lock); 219 DISP_LOCK_INIT(&stop_lock); 220 221 mutex_enter(&cpu_lock); 222 CPU->cpu_disp->disp_maxrunpri = -1; 223 CPU->cpu_disp->disp_max_unbound_pri = -1; 224 225 /* 226 * Initialize the default CPU partition. 227 */ 228 cpupart_initialize_default(); 229 /* 230 * Call the class specific initialization functions for 231 * all pre-installed schedulers. 232 * 233 * We pass the size of a class specific parameter 234 * buffer to each of the initialization functions 235 * to try to catch problems with backward compatibility 236 * of class modules. 237 * 238 * For example a new class module running on an old system 239 * which didn't provide sufficiently large parameter buffers 240 * would be bad news. Class initialization modules can check for 241 * this and take action if they detect a problem. 242 */ 243 244 for (cid = 0; cid < nclass; cid++) { 245 sclass_t *sc; 246 247 sc = &sclass[cid]; 248 if (SCHED_INSTALLED(sc)) { 249 cl_maxglobpri = sc->cl_init(cid, PC_CLPARMSZ, 250 &sc->cl_funcs); 251 if (cl_maxglobpri > maxglobpri) 252 maxglobpri = cl_maxglobpri; 253 } 254 } 255 256 /* 257 * Historically, kpreemptpri was set to v_maxsyspri + 1 -- which is 258 * to say, maxclsyspri + 1. However, over time, the system has used 259 * more and more asynchronous kernel threads, with an increasing number 260 * of these doing work on direct behalf of higher-level software (e.g., 261 * network processing). This has led to potential priority inversions: 262 * threads doing low-priority lengthy kernel work can effectively 263 * delay kernel-level processing of higher-priority data. To minimize 264 * such inversions, we set kpreemptpri to be v_maxsyspri; anything in 265 * the kernel that runs at maxclsyspri will therefore induce kernel 266 * preemption, and this priority should be used if/when an asynchronous 267 * thread (or, as is often the case, task queue) is performing a task 268 * on behalf of higher-level software (or any task that is otherwise 269 * latency-sensitve). 270 */ 271 kpreemptpri = (pri_t)v.v_maxsyspri; 272 if (kpqpri == KPQPRI) 273 kpqpri = kpreemptpri; 274 275 ASSERT(maxglobpri >= 0); 276 disp_setup(maxglobpri, 0); 277 278 mutex_exit(&cpu_lock); 279 280 /* 281 * Platform specific sticky scheduler setup. 282 */ 283 if (nosteal_nsec == NOSTEAL_UNINITIALIZED) 284 cmp_set_nosteal_interval(); 285 286 /* 287 * Get the default class ID; this may be later modified via 288 * dispadmin(1M). This will load the class (normally TS) and that will 289 * call disp_add(), which is why we had to drop cpu_lock first. 290 */ 291 if (getcid(defaultclass, &defaultcid) != 0) { 292 cmn_err(CE_PANIC, "Couldn't load default scheduling class '%s'", 293 defaultclass); 294 } 295 } 296 297 /* 298 * disp_add - Called with class pointer to initialize the dispatcher 299 * for a newly loaded class. 300 */ 301 void 302 disp_add(sclass_t *clp) 303 { 304 pri_t maxglobpri; 305 pri_t cl_maxglobpri; 306 307 mutex_enter(&cpu_lock); 308 /* 309 * Initialize the scheduler class. 310 */ 311 maxglobpri = (pri_t)(v.v_nglobpris - LOCK_LEVEL - 1); 312 cl_maxglobpri = clp->cl_init(clp - sclass, PC_CLPARMSZ, &clp->cl_funcs); 313 if (cl_maxglobpri > maxglobpri) 314 maxglobpri = cl_maxglobpri; 315 316 /* 317 * Save old queue information. Since we're initializing a 318 * new scheduling class which has just been loaded, then 319 * the size of the dispq may have changed. We need to handle 320 * that here. 321 */ 322 disp_setup(maxglobpri, v.v_nglobpris); 323 324 mutex_exit(&cpu_lock); 325 } 326 327 328 /* 329 * For each CPU, allocate new dispatch queues 330 * with the stated number of priorities. 331 */ 332 static void 333 cpu_dispqalloc(int numpris) 334 { 335 cpu_t *cpup; 336 struct disp_queue_info *disp_mem; 337 int i, num; 338 339 ASSERT(MUTEX_HELD(&cpu_lock)); 340 341 disp_mem = kmem_zalloc(NCPU * 342 sizeof (struct disp_queue_info), KM_SLEEP); 343 344 /* 345 * This routine must allocate all of the memory before stopping 346 * the cpus because it must not sleep in kmem_alloc while the 347 * CPUs are stopped. Locks they hold will not be freed until they 348 * are restarted. 349 */ 350 i = 0; 351 cpup = cpu_list; 352 do { 353 disp_dq_alloc(&disp_mem[i], numpris, cpup->cpu_disp); 354 i++; 355 cpup = cpup->cpu_next; 356 } while (cpup != cpu_list); 357 num = i; 358 359 pause_cpus(NULL, NULL); 360 for (i = 0; i < num; i++) 361 disp_dq_assign(&disp_mem[i], numpris); 362 start_cpus(); 363 364 /* 365 * I must free all of the memory after starting the cpus because 366 * I can not risk sleeping in kmem_free while the cpus are stopped. 367 */ 368 for (i = 0; i < num; i++) 369 disp_dq_free(&disp_mem[i]); 370 371 kmem_free(disp_mem, NCPU * sizeof (struct disp_queue_info)); 372 } 373 374 static void 375 disp_dq_alloc(struct disp_queue_info *dptr, int numpris, disp_t *dp) 376 { 377 dptr->newdispq = kmem_zalloc(numpris * sizeof (dispq_t), KM_SLEEP); 378 dptr->newdqactmap = kmem_zalloc(((numpris / BT_NBIPUL) + 1) * 379 sizeof (long), KM_SLEEP); 380 dptr->dp = dp; 381 } 382 383 static void 384 disp_dq_assign(struct disp_queue_info *dptr, int numpris) 385 { 386 disp_t *dp; 387 388 dp = dptr->dp; 389 dptr->olddispq = dp->disp_q; 390 dptr->olddqactmap = dp->disp_qactmap; 391 dptr->oldnglobpris = dp->disp_npri; 392 393 ASSERT(dptr->oldnglobpris < numpris); 394 395 if (dptr->olddispq != NULL) { 396 /* 397 * Use kcopy because bcopy is platform-specific 398 * and could block while we might have paused the cpus. 399 */ 400 (void) kcopy(dptr->olddispq, dptr->newdispq, 401 dptr->oldnglobpris * sizeof (dispq_t)); 402 (void) kcopy(dptr->olddqactmap, dptr->newdqactmap, 403 ((dptr->oldnglobpris / BT_NBIPUL) + 1) * 404 sizeof (long)); 405 } 406 dp->disp_q = dptr->newdispq; 407 dp->disp_qactmap = dptr->newdqactmap; 408 dp->disp_q_limit = &dptr->newdispq[numpris]; 409 dp->disp_npri = numpris; 410 } 411 412 static void 413 disp_dq_free(struct disp_queue_info *dptr) 414 { 415 if (dptr->olddispq != NULL) 416 kmem_free(dptr->olddispq, 417 dptr->oldnglobpris * sizeof (dispq_t)); 418 if (dptr->olddqactmap != NULL) 419 kmem_free(dptr->olddqactmap, 420 ((dptr->oldnglobpris / BT_NBIPUL) + 1) * sizeof (long)); 421 } 422 423 /* 424 * For a newly created CPU, initialize the dispatch queue. 425 * This is called before the CPU is known through cpu[] or on any lists. 426 */ 427 void 428 disp_cpu_init(cpu_t *cp) 429 { 430 disp_t *dp; 431 dispq_t *newdispq; 432 ulong_t *newdqactmap; 433 434 ASSERT(MUTEX_HELD(&cpu_lock)); /* protect dispatcher queue sizes */ 435 436 if (cp == cpu0_disp.disp_cpu) 437 dp = &cpu0_disp; 438 else 439 dp = kmem_alloc(sizeof (disp_t), KM_SLEEP); 440 bzero(dp, sizeof (disp_t)); 441 cp->cpu_disp = dp; 442 dp->disp_cpu = cp; 443 dp->disp_maxrunpri = -1; 444 dp->disp_max_unbound_pri = -1; 445 DISP_LOCK_INIT(&cp->cpu_thread_lock); 446 /* 447 * Allocate memory for the dispatcher queue headers 448 * and the active queue bitmap. 449 */ 450 newdispq = kmem_zalloc(v.v_nglobpris * sizeof (dispq_t), KM_SLEEP); 451 newdqactmap = kmem_zalloc(((v.v_nglobpris / BT_NBIPUL) + 1) * 452 sizeof (long), KM_SLEEP); 453 dp->disp_q = newdispq; 454 dp->disp_qactmap = newdqactmap; 455 dp->disp_q_limit = &newdispq[v.v_nglobpris]; 456 dp->disp_npri = v.v_nglobpris; 457 } 458 459 void 460 disp_cpu_fini(cpu_t *cp) 461 { 462 ASSERT(MUTEX_HELD(&cpu_lock)); 463 464 disp_kp_free(cp->cpu_disp); 465 if (cp->cpu_disp != &cpu0_disp) 466 kmem_free(cp->cpu_disp, sizeof (disp_t)); 467 } 468 469 /* 470 * Allocate new, larger kpreempt dispatch queue to replace the old one. 471 */ 472 void 473 disp_kp_alloc(disp_t *dq, pri_t npri) 474 { 475 struct disp_queue_info mem_info; 476 477 if (npri > dq->disp_npri) { 478 /* 479 * Allocate memory for the new array. 480 */ 481 disp_dq_alloc(&mem_info, npri, dq); 482 483 /* 484 * We need to copy the old structures to the new 485 * and free the old. 486 */ 487 disp_dq_assign(&mem_info, npri); 488 disp_dq_free(&mem_info); 489 } 490 } 491 492 /* 493 * Free dispatch queue. 494 * Used for the kpreempt queues for a removed CPU partition and 495 * for the per-CPU queues of deleted CPUs. 496 */ 497 void 498 disp_kp_free(disp_t *dq) 499 { 500 struct disp_queue_info mem_info; 501 502 mem_info.olddispq = dq->disp_q; 503 mem_info.olddqactmap = dq->disp_qactmap; 504 mem_info.oldnglobpris = dq->disp_npri; 505 disp_dq_free(&mem_info); 506 } 507 508 /* 509 * End dispatcher and scheduler initialization. 510 */ 511 512 /* 513 * See if there's anything to do other than remain idle. 514 * Return non-zero if there is. 515 * 516 * This function must be called with high spl, or with 517 * kernel preemption disabled to prevent the partition's 518 * active cpu list from changing while being traversed. 519 * 520 * This is essentially a simpler version of disp_getwork() 521 * to be called by CPUs preparing to "halt". 522 */ 523 int 524 disp_anywork(void) 525 { 526 cpu_t *cp = CPU; 527 cpu_t *ocp; 528 volatile int *local_nrunnable = &cp->cpu_disp->disp_nrunnable; 529 530 if (!(cp->cpu_flags & CPU_OFFLINE)) { 531 if (CP_MAXRUNPRI(cp->cpu_part) >= 0) 532 return (1); 533 534 for (ocp = cp->cpu_next_part; ocp != cp; 535 ocp = ocp->cpu_next_part) { 536 ASSERT(CPU_ACTIVE(ocp)); 537 538 /* 539 * Something has appeared on the local run queue. 540 */ 541 if (*local_nrunnable > 0) 542 return (1); 543 /* 544 * If we encounter another idle CPU that will 545 * soon be trolling around through disp_anywork() 546 * terminate our walk here and let this other CPU 547 * patrol the next part of the list. 548 */ 549 if (ocp->cpu_dispatch_pri == -1 && 550 (ocp->cpu_disp_flags & CPU_DISP_HALTED) == 0) 551 return (0); 552 /* 553 * Work can be taken from another CPU if: 554 * - There is unbound work on the run queue 555 * - That work isn't a thread undergoing a 556 * - context switch on an otherwise empty queue. 557 * - The CPU isn't running the idle loop. 558 */ 559 if (ocp->cpu_disp->disp_max_unbound_pri != -1 && 560 !((ocp->cpu_disp_flags & CPU_DISP_DONTSTEAL) && 561 ocp->cpu_disp->disp_nrunnable == 1) && 562 ocp->cpu_dispatch_pri != -1) 563 return (1); 564 } 565 } 566 return (0); 567 } 568 569 /* 570 * Called when CPU enters the idle loop 571 */ 572 static void 573 idle_enter() 574 { 575 cpu_t *cp = CPU; 576 577 new_cpu_mstate(CMS_IDLE, gethrtime_unscaled()); 578 CPU_STATS_ADDQ(cp, sys, idlethread, 1); 579 set_idle_cpu(cp->cpu_id); /* arch-dependent hook */ 580 } 581 582 /* 583 * Called when CPU exits the idle loop 584 */ 585 static void 586 idle_exit() 587 { 588 cpu_t *cp = CPU; 589 590 new_cpu_mstate(CMS_SYSTEM, gethrtime_unscaled()); 591 unset_idle_cpu(cp->cpu_id); /* arch-dependent hook */ 592 } 593 594 /* 595 * Idle loop. 596 */ 597 void 598 idle() 599 { 600 struct cpu *cp = CPU; /* pointer to this CPU */ 601 kthread_t *t; /* taken thread */ 602 603 idle_enter(); 604 605 /* 606 * Uniprocessor version of idle loop. 607 * Do this until notified that we're on an actual multiprocessor. 608 */ 609 while (ncpus == 1) { 610 if (cp->cpu_disp->disp_nrunnable == 0) { 611 (*idle_cpu)(); 612 continue; 613 } 614 idle_exit(); 615 swtch(); 616 617 idle_enter(); /* returned from swtch */ 618 } 619 620 /* 621 * Multiprocessor idle loop. 622 */ 623 for (;;) { 624 /* 625 * If CPU is completely quiesced by p_online(2), just wait 626 * here with minimal bus traffic until put online. 627 */ 628 while (cp->cpu_flags & CPU_QUIESCED) 629 (*idle_cpu)(); 630 631 if (cp->cpu_disp->disp_nrunnable != 0) { 632 idle_exit(); 633 swtch(); 634 } else { 635 if (cp->cpu_flags & CPU_OFFLINE) 636 continue; 637 if ((t = disp_getwork(cp)) == NULL) { 638 if (cp->cpu_chosen_level != -1) { 639 disp_t *dp = cp->cpu_disp; 640 disp_t *kpq; 641 642 disp_lock_enter(&dp->disp_lock); 643 /* 644 * Set kpq under lock to prevent 645 * migration between partitions. 646 */ 647 kpq = &cp->cpu_part->cp_kp_queue; 648 if (kpq->disp_maxrunpri == -1) 649 cp->cpu_chosen_level = -1; 650 disp_lock_exit(&dp->disp_lock); 651 } 652 (*idle_cpu)(); 653 continue; 654 } 655 /* 656 * If there was a thread but we couldn't steal 657 * it, then keep trying. 658 */ 659 if (t == T_DONTSTEAL) 660 continue; 661 idle_exit(); 662 swtch_to(t); 663 } 664 idle_enter(); /* returned from swtch/swtch_to */ 665 } 666 } 667 668 669 /* 670 * Preempt the currently running thread in favor of the highest 671 * priority thread. The class of the current thread controls 672 * where it goes on the dispatcher queues. If panicking, turn 673 * preemption off. 674 */ 675 void 676 preempt() 677 { 678 kthread_t *t = curthread; 679 klwp_t *lwp = ttolwp(curthread); 680 681 if (panicstr) 682 return; 683 684 TRACE_0(TR_FAC_DISP, TR_PREEMPT_START, "preempt_start"); 685 686 thread_lock(t); 687 688 if (t->t_state != TS_ONPROC || t->t_disp_queue != CPU->cpu_disp) { 689 /* 690 * this thread has already been chosen to be run on 691 * another CPU. Clear kprunrun on this CPU since we're 692 * already headed for swtch(). 693 */ 694 CPU->cpu_kprunrun = 0; 695 thread_unlock_nopreempt(t); 696 TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end"); 697 } else { 698 if (lwp != NULL) 699 lwp->lwp_ru.nivcsw++; 700 CPU_STATS_ADDQ(CPU, sys, inv_swtch, 1); 701 THREAD_TRANSITION(t); 702 CL_PREEMPT(t); 703 DTRACE_SCHED(preempt); 704 thread_unlock_nopreempt(t); 705 706 TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end"); 707 708 swtch(); /* clears CPU->cpu_runrun via disp() */ 709 } 710 } 711 712 extern kthread_t *thread_unpin(); 713 714 /* 715 * disp() - find the highest priority thread for this processor to run, and 716 * set it in TS_ONPROC state so that resume() can be called to run it. 717 */ 718 static kthread_t * 719 disp() 720 { 721 cpu_t *cpup; 722 disp_t *dp; 723 kthread_t *tp; 724 dispq_t *dq; 725 int maxrunword; 726 pri_t pri; 727 disp_t *kpq; 728 729 TRACE_0(TR_FAC_DISP, TR_DISP_START, "disp_start"); 730 731 cpup = CPU; 732 /* 733 * Find the highest priority loaded, runnable thread. 734 */ 735 dp = cpup->cpu_disp; 736 737 reschedule: 738 /* 739 * If there is more important work on the global queue with a better 740 * priority than the maximum on this CPU, take it now. 741 */ 742 kpq = &cpup->cpu_part->cp_kp_queue; 743 while ((pri = kpq->disp_maxrunpri) >= 0 && 744 pri >= dp->disp_maxrunpri && 745 (cpup->cpu_flags & CPU_OFFLINE) == 0 && 746 (tp = disp_getbest(kpq)) != NULL) { 747 if (disp_ratify(tp, kpq) != NULL) { 748 TRACE_1(TR_FAC_DISP, TR_DISP_END, 749 "disp_end:tid %p", tp); 750 return (tp); 751 } 752 } 753 754 disp_lock_enter(&dp->disp_lock); 755 pri = dp->disp_maxrunpri; 756 757 /* 758 * If there is nothing to run, look at what's runnable on other queues. 759 * Choose the idle thread if the CPU is quiesced. 760 * Note that CPUs that have the CPU_OFFLINE flag set can still run 761 * interrupt threads, which will be the only threads on the CPU's own 762 * queue, but cannot run threads from other queues. 763 */ 764 if (pri == -1) { 765 if (!(cpup->cpu_flags & CPU_OFFLINE)) { 766 disp_lock_exit(&dp->disp_lock); 767 if ((tp = disp_getwork(cpup)) == NULL || 768 tp == T_DONTSTEAL) { 769 tp = cpup->cpu_idle_thread; 770 (void) splhigh(); 771 THREAD_ONPROC(tp, cpup); 772 cpup->cpu_dispthread = tp; 773 cpup->cpu_dispatch_pri = -1; 774 cpup->cpu_runrun = cpup->cpu_kprunrun = 0; 775 cpup->cpu_chosen_level = -1; 776 } 777 } else { 778 disp_lock_exit_high(&dp->disp_lock); 779 tp = cpup->cpu_idle_thread; 780 THREAD_ONPROC(tp, cpup); 781 cpup->cpu_dispthread = tp; 782 cpup->cpu_dispatch_pri = -1; 783 cpup->cpu_runrun = cpup->cpu_kprunrun = 0; 784 cpup->cpu_chosen_level = -1; 785 } 786 TRACE_1(TR_FAC_DISP, TR_DISP_END, 787 "disp_end:tid %p", tp); 788 return (tp); 789 } 790 791 dq = &dp->disp_q[pri]; 792 tp = dq->dq_first; 793 794 ASSERT(tp != NULL); 795 ASSERT(tp->t_schedflag & TS_LOAD); /* thread must be swapped in */ 796 797 DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp); 798 799 /* 800 * Found it so remove it from queue. 801 */ 802 dp->disp_nrunnable--; 803 dq->dq_sruncnt--; 804 if ((dq->dq_first = tp->t_link) == NULL) { 805 ulong_t *dqactmap = dp->disp_qactmap; 806 807 ASSERT(dq->dq_sruncnt == 0); 808 dq->dq_last = NULL; 809 810 /* 811 * The queue is empty, so the corresponding bit needs to be 812 * turned off in dqactmap. If nrunnable != 0 just took the 813 * last runnable thread off the 814 * highest queue, so recompute disp_maxrunpri. 815 */ 816 maxrunword = pri >> BT_ULSHIFT; 817 dqactmap[maxrunword] &= ~BT_BIW(pri); 818 819 if (dp->disp_nrunnable == 0) { 820 dp->disp_max_unbound_pri = -1; 821 dp->disp_maxrunpri = -1; 822 } else { 823 int ipri; 824 825 ipri = bt_gethighbit(dqactmap, maxrunword); 826 dp->disp_maxrunpri = ipri; 827 if (ipri < dp->disp_max_unbound_pri) 828 dp->disp_max_unbound_pri = ipri; 829 } 830 } else { 831 tp->t_link = NULL; 832 } 833 834 /* 835 * Set TS_DONT_SWAP flag to prevent another processor from swapping 836 * out this thread before we have a chance to run it. 837 * While running, it is protected against swapping by t_lock. 838 */ 839 tp->t_schedflag |= TS_DONT_SWAP; 840 cpup->cpu_dispthread = tp; /* protected by spl only */ 841 cpup->cpu_dispatch_pri = pri; 842 ASSERT(pri == DISP_PRIO(tp)); 843 thread_onproc(tp, cpup); /* set t_state to TS_ONPROC */ 844 disp_lock_exit_high(&dp->disp_lock); /* drop run queue lock */ 845 846 ASSERT(tp != NULL); 847 TRACE_1(TR_FAC_DISP, TR_DISP_END, 848 "disp_end:tid %p", tp); 849 850 if (disp_ratify(tp, kpq) == NULL) 851 goto reschedule; 852 853 return (tp); 854 } 855 856 /* 857 * swtch() 858 * Find best runnable thread and run it. 859 * Called with the current thread already switched to a new state, 860 * on a sleep queue, run queue, stopped, and not zombied. 861 * May be called at any spl level less than or equal to LOCK_LEVEL. 862 * Always drops spl to the base level (spl0()). 863 */ 864 void 865 swtch() 866 { 867 kthread_t *t = curthread; 868 kthread_t *next; 869 cpu_t *cp; 870 871 TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 872 873 if (t->t_flag & T_INTR_THREAD) 874 cpu_intr_swtch_enter(t); 875 876 if (t->t_intr != NULL) { 877 /* 878 * We are an interrupt thread. Setup and return 879 * the interrupted thread to be resumed. 880 */ 881 (void) splhigh(); /* block other scheduler action */ 882 cp = CPU; /* now protected against migration */ 883 ASSERT(CPU_ON_INTR(cp) == 0); /* not called with PIL > 10 */ 884 CPU_STATS_ADDQ(cp, sys, pswitch, 1); 885 CPU_STATS_ADDQ(cp, sys, intrblk, 1); 886 next = thread_unpin(); 887 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 888 resume_from_intr(next); 889 } else { 890 #ifdef DEBUG 891 if (t->t_state == TS_ONPROC && 892 t->t_disp_queue->disp_cpu == CPU && 893 t->t_preempt == 0) { 894 thread_lock(t); 895 ASSERT(t->t_state != TS_ONPROC || 896 t->t_disp_queue->disp_cpu != CPU || 897 t->t_preempt != 0); /* cannot migrate */ 898 thread_unlock_nopreempt(t); 899 } 900 #endif /* DEBUG */ 901 cp = CPU; 902 next = disp(); /* returns with spl high */ 903 ASSERT(CPU_ON_INTR(cp) == 0); /* not called with PIL > 10 */ 904 905 /* OK to steal anything left on run queue */ 906 cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL; 907 908 if (next != t) { 909 hrtime_t now; 910 911 now = gethrtime_unscaled(); 912 pg_ev_thread_swtch(cp, now, t, next); 913 914 /* 915 * If t was previously in the TS_ONPROC state, 916 * setfrontdq and setbackdq won't have set its t_waitrq. 917 * Since we now finally know that we're switching away 918 * from this thread, set its t_waitrq if it is on a run 919 * queue. 920 */ 921 if ((t->t_state == TS_RUN) && (t->t_waitrq == 0)) { 922 t->t_waitrq = now; 923 } 924 925 /* 926 * restore mstate of thread that we are switching to 927 */ 928 restore_mstate(next); 929 930 CPU_STATS_ADDQ(cp, sys, pswitch, 1); 931 cp->cpu_last_swtch = t->t_disp_time = ddi_get_lbolt(); 932 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 933 934 if (dtrace_vtime_active) 935 dtrace_vtime_switch(next); 936 937 resume(next); 938 /* 939 * The TR_RESUME_END and TR_SWTCH_END trace points 940 * appear at the end of resume(), because we may not 941 * return here 942 */ 943 } else { 944 if (t->t_flag & T_INTR_THREAD) 945 cpu_intr_swtch_exit(t); 946 /* 947 * Threads that enqueue themselves on a run queue defer 948 * setting t_waitrq. It is then either set in swtch() 949 * when the CPU is actually yielded, or not at all if it 950 * is remaining on the CPU. 951 * There is however a window between where the thread 952 * placed itself on a run queue, and where it selects 953 * itself in disp(), where a third party (eg. clock() 954 * doing tick processing) may have re-enqueued this 955 * thread, setting t_waitrq in the process. We detect 956 * this race by noticing that despite switching to 957 * ourself, our t_waitrq has been set, and should be 958 * cleared. 959 */ 960 if (t->t_waitrq != 0) 961 t->t_waitrq = 0; 962 963 pg_ev_thread_remain(cp, t); 964 965 DTRACE_SCHED(remain__cpu); 966 TRACE_0(TR_FAC_DISP, TR_SWTCH_END, "swtch_end"); 967 (void) spl0(); 968 } 969 } 970 } 971 972 /* 973 * swtch_from_zombie() 974 * Special case of swtch(), which allows checks for TS_ZOMB to be 975 * eliminated from normal resume. 976 * Find best runnable thread and run it. 977 * Called with the current thread zombied. 978 * Zombies cannot migrate, so CPU references are safe. 979 */ 980 void 981 swtch_from_zombie() 982 { 983 kthread_t *next; 984 cpu_t *cpu = CPU; 985 986 TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 987 988 ASSERT(curthread->t_state == TS_ZOMB); 989 990 next = disp(); /* returns with spl high */ 991 ASSERT(CPU_ON_INTR(CPU) == 0); /* not called with PIL > 10 */ 992 CPU_STATS_ADDQ(CPU, sys, pswitch, 1); 993 ASSERT(next != curthread); 994 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 995 996 pg_ev_thread_swtch(cpu, gethrtime_unscaled(), curthread, next); 997 998 restore_mstate(next); 999 1000 if (dtrace_vtime_active) 1001 dtrace_vtime_switch(next); 1002 1003 resume_from_zombie(next); 1004 /* 1005 * The TR_RESUME_END and TR_SWTCH_END trace points 1006 * appear at the end of resume(), because we certainly will not 1007 * return here 1008 */ 1009 } 1010 1011 #if defined(DEBUG) && (defined(DISP_DEBUG) || defined(lint)) 1012 1013 /* 1014 * search_disp_queues() 1015 * Search the given dispatch queues for thread tp. 1016 * Return 1 if tp is found, otherwise return 0. 1017 */ 1018 static int 1019 search_disp_queues(disp_t *dp, kthread_t *tp) 1020 { 1021 dispq_t *dq; 1022 dispq_t *eq; 1023 1024 disp_lock_enter_high(&dp->disp_lock); 1025 1026 for (dq = dp->disp_q, eq = dp->disp_q_limit; dq < eq; ++dq) { 1027 kthread_t *rp; 1028 1029 ASSERT(dq->dq_last == NULL || dq->dq_last->t_link == NULL); 1030 1031 for (rp = dq->dq_first; rp; rp = rp->t_link) 1032 if (tp == rp) { 1033 disp_lock_exit_high(&dp->disp_lock); 1034 return (1); 1035 } 1036 } 1037 disp_lock_exit_high(&dp->disp_lock); 1038 1039 return (0); 1040 } 1041 1042 /* 1043 * thread_on_queue() 1044 * Search all per-CPU dispatch queues and all partition-wide kpreempt 1045 * queues for thread tp. Return 1 if tp is found, otherwise return 0. 1046 */ 1047 static int 1048 thread_on_queue(kthread_t *tp) 1049 { 1050 cpu_t *cp; 1051 struct cpupart *part; 1052 1053 ASSERT(getpil() >= DISP_LEVEL); 1054 1055 /* 1056 * Search the per-CPU dispatch queues for tp. 1057 */ 1058 cp = CPU; 1059 do { 1060 if (search_disp_queues(cp->cpu_disp, tp)) 1061 return (1); 1062 } while ((cp = cp->cpu_next_onln) != CPU); 1063 1064 /* 1065 * Search the partition-wide kpreempt queues for tp. 1066 */ 1067 part = CPU->cpu_part; 1068 do { 1069 if (search_disp_queues(&part->cp_kp_queue, tp)) 1070 return (1); 1071 } while ((part = part->cp_next) != CPU->cpu_part); 1072 1073 return (0); 1074 } 1075 1076 #else 1077 1078 #define thread_on_queue(tp) 0 /* ASSERT must be !thread_on_queue */ 1079 1080 #endif /* DEBUG */ 1081 1082 /* 1083 * like swtch(), but switch to a specified thread taken from another CPU. 1084 * called with spl high.. 1085 */ 1086 void 1087 swtch_to(kthread_t *next) 1088 { 1089 cpu_t *cp = CPU; 1090 hrtime_t now; 1091 1092 TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 1093 1094 /* 1095 * Update context switch statistics. 1096 */ 1097 CPU_STATS_ADDQ(cp, sys, pswitch, 1); 1098 1099 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 1100 1101 now = gethrtime_unscaled(); 1102 pg_ev_thread_swtch(cp, now, curthread, next); 1103 1104 /* OK to steal anything left on run queue */ 1105 cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL; 1106 1107 /* record last execution time */ 1108 cp->cpu_last_swtch = curthread->t_disp_time = ddi_get_lbolt(); 1109 1110 /* 1111 * If t was previously in the TS_ONPROC state, setfrontdq and setbackdq 1112 * won't have set its t_waitrq. Since we now finally know that we're 1113 * switching away from this thread, set its t_waitrq if it is on a run 1114 * queue. 1115 */ 1116 if ((curthread->t_state == TS_RUN) && (curthread->t_waitrq == 0)) { 1117 curthread->t_waitrq = now; 1118 } 1119 1120 /* restore next thread to previously running microstate */ 1121 restore_mstate(next); 1122 1123 if (dtrace_vtime_active) 1124 dtrace_vtime_switch(next); 1125 1126 resume(next); 1127 /* 1128 * The TR_RESUME_END and TR_SWTCH_END trace points 1129 * appear at the end of resume(), because we may not 1130 * return here 1131 */ 1132 } 1133 1134 #define CPU_IDLING(pri) ((pri) == -1) 1135 1136 static void 1137 cpu_resched(cpu_t *cp, pri_t tpri) 1138 { 1139 int call_poke_cpu = 0; 1140 pri_t cpupri = cp->cpu_dispatch_pri; 1141 1142 if (!CPU_IDLING(cpupri) && (cpupri < tpri)) { 1143 TRACE_2(TR_FAC_DISP, TR_CPU_RESCHED, 1144 "CPU_RESCHED:Tpri %d Cpupri %d", tpri, cpupri); 1145 if (tpri >= upreemptpri && cp->cpu_runrun == 0) { 1146 cp->cpu_runrun = 1; 1147 aston(cp->cpu_dispthread); 1148 if (tpri < kpreemptpri && cp != CPU) 1149 call_poke_cpu = 1; 1150 } 1151 if (tpri >= kpreemptpri && cp->cpu_kprunrun == 0) { 1152 cp->cpu_kprunrun = 1; 1153 if (cp != CPU) 1154 call_poke_cpu = 1; 1155 } 1156 } 1157 1158 /* 1159 * Propagate cpu_runrun, and cpu_kprunrun to global visibility. 1160 */ 1161 membar_enter(); 1162 1163 if (call_poke_cpu) 1164 poke_cpu(cp->cpu_id); 1165 } 1166 1167 /* 1168 * setbackdq() keeps runqs balanced such that the difference in length 1169 * between the chosen runq and the next one is no more than RUNQ_MAX_DIFF. 1170 * For threads with priorities below RUNQ_MATCH_PRI levels, the runq's lengths 1171 * must match. When per-thread TS_RUNQMATCH flag is set, setbackdq() will 1172 * try to keep runqs perfectly balanced regardless of the thread priority. 1173 */ 1174 #define RUNQ_MATCH_PRI 16 /* pri below which queue lengths must match */ 1175 #define RUNQ_MAX_DIFF 2 /* maximum runq length difference */ 1176 #define RUNQ_LEN(cp, pri) ((cp)->cpu_disp->disp_q[pri].dq_sruncnt) 1177 1178 /* 1179 * Macro that evaluates to true if it is likely that the thread has cache 1180 * warmth. This is based on the amount of time that has elapsed since the 1181 * thread last ran. If that amount of time is less than "rechoose_interval" 1182 * ticks, then we decide that the thread has enough cache warmth to warrant 1183 * some affinity for t->t_cpu. 1184 */ 1185 #define THREAD_HAS_CACHE_WARMTH(thread) \ 1186 ((thread == curthread) || \ 1187 ((ddi_get_lbolt() - thread->t_disp_time) <= rechoose_interval)) 1188 /* 1189 * Put the specified thread on the back of the dispatcher 1190 * queue corresponding to its current priority. 1191 * 1192 * Called with the thread in transition, onproc or stopped state 1193 * and locked (transition implies locked) and at high spl. 1194 * Returns with the thread in TS_RUN state and still locked. 1195 */ 1196 void 1197 setbackdq(kthread_t *tp) 1198 { 1199 dispq_t *dq; 1200 disp_t *dp; 1201 cpu_t *cp; 1202 pri_t tpri; 1203 int bound; 1204 boolean_t self; 1205 1206 ASSERT(THREAD_LOCK_HELD(tp)); 1207 ASSERT((tp->t_schedflag & TS_ALLSTART) == 0); 1208 ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */ 1209 1210 /* 1211 * If thread is "swapped" or on the swap queue don't 1212 * queue it, but wake sched. 1213 */ 1214 if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) { 1215 disp_swapped_setrun(tp); 1216 return; 1217 } 1218 1219 self = (tp == curthread); 1220 1221 if (tp->t_bound_cpu || tp->t_weakbound_cpu) 1222 bound = 1; 1223 else 1224 bound = 0; 1225 1226 tpri = DISP_PRIO(tp); 1227 if (ncpus == 1) 1228 cp = tp->t_cpu; 1229 else if (!bound) { 1230 if (tpri >= kpqpri) { 1231 setkpdq(tp, SETKP_BACK); 1232 return; 1233 } 1234 1235 /* 1236 * We'll generally let this thread continue to run where 1237 * it last ran...but will consider migration if: 1238 * - We thread probably doesn't have much cache warmth. 1239 * - The CPU where it last ran is the target of an offline 1240 * request. 1241 * - The thread last ran outside it's home lgroup. 1242 */ 1243 if ((!THREAD_HAS_CACHE_WARMTH(tp)) || 1244 (tp->t_cpu == cpu_inmotion)) { 1245 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, NULL); 1246 } else if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) { 1247 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, 1248 self ? tp->t_cpu : NULL); 1249 } else { 1250 cp = tp->t_cpu; 1251 } 1252 1253 if (tp->t_cpupart == cp->cpu_part) { 1254 int qlen; 1255 1256 /* 1257 * Perform any CMT load balancing 1258 */ 1259 cp = cmt_balance(tp, cp); 1260 1261 /* 1262 * Balance across the run queues 1263 */ 1264 qlen = RUNQ_LEN(cp, tpri); 1265 if (tpri >= RUNQ_MATCH_PRI && 1266 !(tp->t_schedflag & TS_RUNQMATCH)) 1267 qlen -= RUNQ_MAX_DIFF; 1268 if (qlen > 0) { 1269 cpu_t *newcp; 1270 1271 if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID) { 1272 newcp = cp->cpu_next_part; 1273 } else if ((newcp = cp->cpu_next_lpl) == cp) { 1274 newcp = cp->cpu_next_part; 1275 } 1276 1277 if (RUNQ_LEN(newcp, tpri) < qlen) { 1278 DTRACE_PROBE3(runq__balance, 1279 kthread_t *, tp, 1280 cpu_t *, cp, cpu_t *, newcp); 1281 cp = newcp; 1282 } 1283 } 1284 } else { 1285 /* 1286 * Migrate to a cpu in the new partition. 1287 */ 1288 cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, 1289 tp->t_lpl, tp->t_pri, NULL); 1290 } 1291 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 1292 } else { 1293 /* 1294 * It is possible that t_weakbound_cpu != t_bound_cpu (for 1295 * a short time until weak binding that existed when the 1296 * strong binding was established has dropped) so we must 1297 * favour weak binding over strong. 1298 */ 1299 cp = tp->t_weakbound_cpu ? 1300 tp->t_weakbound_cpu : tp->t_bound_cpu; 1301 } 1302 /* 1303 * A thread that is ONPROC may be temporarily placed on the run queue 1304 * but then chosen to run again by disp. If the thread we're placing on 1305 * the queue is in TS_ONPROC state, don't set its t_waitrq until a 1306 * replacement process is actually scheduled in swtch(). In this 1307 * situation, curthread is the only thread that could be in the ONPROC 1308 * state. 1309 */ 1310 if ((!self) && (tp->t_waitrq == 0)) { 1311 hrtime_t curtime; 1312 1313 curtime = gethrtime_unscaled(); 1314 (void) cpu_update_pct(tp, curtime); 1315 tp->t_waitrq = curtime; 1316 } else { 1317 (void) cpu_update_pct(tp, gethrtime_unscaled()); 1318 } 1319 1320 dp = cp->cpu_disp; 1321 disp_lock_enter_high(&dp->disp_lock); 1322 1323 DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 0); 1324 TRACE_3(TR_FAC_DISP, TR_BACKQ, "setbackdq:pri %d cpu %p tid %p", 1325 tpri, cp, tp); 1326 1327 #ifndef NPROBE 1328 /* Kernel probe */ 1329 if (tnf_tracing_active) 1330 tnf_thread_queue(tp, cp, tpri); 1331 #endif /* NPROBE */ 1332 1333 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 1334 1335 THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */ 1336 tp->t_disp_queue = dp; 1337 tp->t_link = NULL; 1338 1339 dq = &dp->disp_q[tpri]; 1340 dp->disp_nrunnable++; 1341 if (!bound) 1342 dp->disp_steal = 0; 1343 membar_enter(); 1344 1345 if (dq->dq_sruncnt++ != 0) { 1346 ASSERT(dq->dq_first != NULL); 1347 dq->dq_last->t_link = tp; 1348 dq->dq_last = tp; 1349 } else { 1350 ASSERT(dq->dq_first == NULL); 1351 ASSERT(dq->dq_last == NULL); 1352 dq->dq_first = dq->dq_last = tp; 1353 BT_SET(dp->disp_qactmap, tpri); 1354 if (tpri > dp->disp_maxrunpri) { 1355 dp->disp_maxrunpri = tpri; 1356 membar_enter(); 1357 cpu_resched(cp, tpri); 1358 } 1359 } 1360 1361 if (!bound && tpri > dp->disp_max_unbound_pri) { 1362 if (self && dp->disp_max_unbound_pri == -1 && cp == CPU) { 1363 /* 1364 * If there are no other unbound threads on the 1365 * run queue, don't allow other CPUs to steal 1366 * this thread while we are in the middle of a 1367 * context switch. We may just switch to it 1368 * again right away. CPU_DISP_DONTSTEAL is cleared 1369 * in swtch and swtch_to. 1370 */ 1371 cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL; 1372 } 1373 dp->disp_max_unbound_pri = tpri; 1374 } 1375 (*disp_enq_thread)(cp, bound); 1376 } 1377 1378 /* 1379 * Put the specified thread on the front of the dispatcher 1380 * queue corresponding to its current priority. 1381 * 1382 * Called with the thread in transition, onproc or stopped state 1383 * and locked (transition implies locked) and at high spl. 1384 * Returns with the thread in TS_RUN state and still locked. 1385 */ 1386 void 1387 setfrontdq(kthread_t *tp) 1388 { 1389 disp_t *dp; 1390 dispq_t *dq; 1391 cpu_t *cp; 1392 pri_t tpri; 1393 int bound; 1394 1395 ASSERT(THREAD_LOCK_HELD(tp)); 1396 ASSERT((tp->t_schedflag & TS_ALLSTART) == 0); 1397 ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */ 1398 1399 /* 1400 * If thread is "swapped" or on the swap queue don't 1401 * queue it, but wake sched. 1402 */ 1403 if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) { 1404 disp_swapped_setrun(tp); 1405 return; 1406 } 1407 1408 if (tp->t_bound_cpu || tp->t_weakbound_cpu) 1409 bound = 1; 1410 else 1411 bound = 0; 1412 1413 tpri = DISP_PRIO(tp); 1414 if (ncpus == 1) 1415 cp = tp->t_cpu; 1416 else if (!bound) { 1417 if (tpri >= kpqpri) { 1418 setkpdq(tp, SETKP_FRONT); 1419 return; 1420 } 1421 cp = tp->t_cpu; 1422 if (tp->t_cpupart == cp->cpu_part) { 1423 /* 1424 * We'll generally let this thread continue to run 1425 * where it last ran, but will consider migration if: 1426 * - The thread last ran outside it's home lgroup. 1427 * - The CPU where it last ran is the target of an 1428 * offline request (a thread_nomigrate() on the in 1429 * motion CPU relies on this when forcing a preempt). 1430 * - The thread isn't the highest priority thread where 1431 * it last ran, and it is considered not likely to 1432 * have significant cache warmth. 1433 */ 1434 if ((!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp)) || 1435 (cp == cpu_inmotion)) { 1436 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, 1437 (tp == curthread) ? cp : NULL); 1438 } else if ((tpri < cp->cpu_disp->disp_maxrunpri) && 1439 (!THREAD_HAS_CACHE_WARMTH(tp))) { 1440 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, 1441 NULL); 1442 } 1443 } else { 1444 /* 1445 * Migrate to a cpu in the new partition. 1446 */ 1447 cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, 1448 tp->t_lpl, tp->t_pri, NULL); 1449 } 1450 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 1451 } else { 1452 /* 1453 * It is possible that t_weakbound_cpu != t_bound_cpu (for 1454 * a short time until weak binding that existed when the 1455 * strong binding was established has dropped) so we must 1456 * favour weak binding over strong. 1457 */ 1458 cp = tp->t_weakbound_cpu ? 1459 tp->t_weakbound_cpu : tp->t_bound_cpu; 1460 } 1461 1462 /* 1463 * A thread that is ONPROC may be temporarily placed on the run queue 1464 * but then chosen to run again by disp. If the thread we're placing on 1465 * the queue is in TS_ONPROC state, don't set its t_waitrq until a 1466 * replacement process is actually scheduled in swtch(). In this 1467 * situation, curthread is the only thread that could be in the ONPROC 1468 * state. 1469 */ 1470 if ((tp != curthread) && (tp->t_waitrq == 0)) { 1471 hrtime_t curtime; 1472 1473 curtime = gethrtime_unscaled(); 1474 (void) cpu_update_pct(tp, curtime); 1475 tp->t_waitrq = curtime; 1476 } else { 1477 (void) cpu_update_pct(tp, gethrtime_unscaled()); 1478 } 1479 1480 dp = cp->cpu_disp; 1481 disp_lock_enter_high(&dp->disp_lock); 1482 1483 TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp); 1484 DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 1); 1485 1486 #ifndef NPROBE 1487 /* Kernel probe */ 1488 if (tnf_tracing_active) 1489 tnf_thread_queue(tp, cp, tpri); 1490 #endif /* NPROBE */ 1491 1492 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 1493 1494 THREAD_RUN(tp, &dp->disp_lock); /* set TS_RUN state and lock */ 1495 tp->t_disp_queue = dp; 1496 1497 dq = &dp->disp_q[tpri]; 1498 dp->disp_nrunnable++; 1499 if (!bound) 1500 dp->disp_steal = 0; 1501 membar_enter(); 1502 1503 if (dq->dq_sruncnt++ != 0) { 1504 ASSERT(dq->dq_last != NULL); 1505 tp->t_link = dq->dq_first; 1506 dq->dq_first = tp; 1507 } else { 1508 ASSERT(dq->dq_last == NULL); 1509 ASSERT(dq->dq_first == NULL); 1510 tp->t_link = NULL; 1511 dq->dq_first = dq->dq_last = tp; 1512 BT_SET(dp->disp_qactmap, tpri); 1513 if (tpri > dp->disp_maxrunpri) { 1514 dp->disp_maxrunpri = tpri; 1515 membar_enter(); 1516 cpu_resched(cp, tpri); 1517 } 1518 } 1519 1520 if (!bound && tpri > dp->disp_max_unbound_pri) { 1521 if (tp == curthread && dp->disp_max_unbound_pri == -1 && 1522 cp == CPU) { 1523 /* 1524 * If there are no other unbound threads on the 1525 * run queue, don't allow other CPUs to steal 1526 * this thread while we are in the middle of a 1527 * context switch. We may just switch to it 1528 * again right away. CPU_DISP_DONTSTEAL is cleared 1529 * in swtch and swtch_to. 1530 */ 1531 cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL; 1532 } 1533 dp->disp_max_unbound_pri = tpri; 1534 } 1535 (*disp_enq_thread)(cp, bound); 1536 } 1537 1538 /* 1539 * Put a high-priority unbound thread on the kp queue 1540 */ 1541 static void 1542 setkpdq(kthread_t *tp, int borf) 1543 { 1544 dispq_t *dq; 1545 disp_t *dp; 1546 cpu_t *cp; 1547 pri_t tpri; 1548 1549 tpri = DISP_PRIO(tp); 1550 1551 dp = &tp->t_cpupart->cp_kp_queue; 1552 disp_lock_enter_high(&dp->disp_lock); 1553 1554 TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp); 1555 1556 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 1557 DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, borf); 1558 THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */ 1559 tp->t_disp_queue = dp; 1560 dp->disp_nrunnable++; 1561 dq = &dp->disp_q[tpri]; 1562 1563 if (dq->dq_sruncnt++ != 0) { 1564 if (borf == SETKP_BACK) { 1565 ASSERT(dq->dq_first != NULL); 1566 tp->t_link = NULL; 1567 dq->dq_last->t_link = tp; 1568 dq->dq_last = tp; 1569 } else { 1570 ASSERT(dq->dq_last != NULL); 1571 tp->t_link = dq->dq_first; 1572 dq->dq_first = tp; 1573 } 1574 } else { 1575 if (borf == SETKP_BACK) { 1576 ASSERT(dq->dq_first == NULL); 1577 ASSERT(dq->dq_last == NULL); 1578 dq->dq_first = dq->dq_last = tp; 1579 } else { 1580 ASSERT(dq->dq_last == NULL); 1581 ASSERT(dq->dq_first == NULL); 1582 tp->t_link = NULL; 1583 dq->dq_first = dq->dq_last = tp; 1584 } 1585 BT_SET(dp->disp_qactmap, tpri); 1586 if (tpri > dp->disp_max_unbound_pri) 1587 dp->disp_max_unbound_pri = tpri; 1588 if (tpri > dp->disp_maxrunpri) { 1589 dp->disp_maxrunpri = tpri; 1590 membar_enter(); 1591 } 1592 } 1593 1594 cp = tp->t_cpu; 1595 if (tp->t_cpupart != cp->cpu_part) { 1596 /* migrate to a cpu in the new partition */ 1597 cp = tp->t_cpupart->cp_cpulist; 1598 } 1599 cp = disp_lowpri_cpu(cp, tp->t_lpl, tp->t_pri, NULL); 1600 disp_lock_enter_high(&cp->cpu_disp->disp_lock); 1601 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 1602 1603 #ifndef NPROBE 1604 /* Kernel probe */ 1605 if (tnf_tracing_active) 1606 tnf_thread_queue(tp, cp, tpri); 1607 #endif /* NPROBE */ 1608 1609 if (cp->cpu_chosen_level < tpri) 1610 cp->cpu_chosen_level = tpri; 1611 cpu_resched(cp, tpri); 1612 disp_lock_exit_high(&cp->cpu_disp->disp_lock); 1613 (*disp_enq_thread)(cp, 0); 1614 } 1615 1616 /* 1617 * Remove a thread from the dispatcher queue if it is on it. 1618 * It is not an error if it is not found but we return whether 1619 * or not it was found in case the caller wants to check. 1620 */ 1621 int 1622 dispdeq(kthread_t *tp) 1623 { 1624 disp_t *dp; 1625 dispq_t *dq; 1626 kthread_t *rp; 1627 kthread_t *trp; 1628 kthread_t **ptp; 1629 int tpri; 1630 1631 ASSERT(THREAD_LOCK_HELD(tp)); 1632 1633 if (tp->t_state != TS_RUN) 1634 return (0); 1635 1636 /* 1637 * The thread is "swapped" or is on the swap queue and 1638 * hence no longer on the run queue, so return true. 1639 */ 1640 if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) 1641 return (1); 1642 1643 tpri = DISP_PRIO(tp); 1644 dp = tp->t_disp_queue; 1645 ASSERT(tpri < dp->disp_npri); 1646 dq = &dp->disp_q[tpri]; 1647 ptp = &dq->dq_first; 1648 rp = *ptp; 1649 trp = NULL; 1650 1651 ASSERT(dq->dq_last == NULL || dq->dq_last->t_link == NULL); 1652 1653 /* 1654 * Search for thread in queue. 1655 * Double links would simplify this at the expense of disp/setrun. 1656 */ 1657 while (rp != tp && rp != NULL) { 1658 trp = rp; 1659 ptp = &trp->t_link; 1660 rp = trp->t_link; 1661 } 1662 1663 if (rp == NULL) { 1664 panic("dispdeq: thread not on queue"); 1665 } 1666 1667 DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp); 1668 1669 /* 1670 * Found it so remove it from queue. 1671 */ 1672 if ((*ptp = rp->t_link) == NULL) 1673 dq->dq_last = trp; 1674 1675 dp->disp_nrunnable--; 1676 if (--dq->dq_sruncnt == 0) { 1677 dp->disp_qactmap[tpri >> BT_ULSHIFT] &= ~BT_BIW(tpri); 1678 if (dp->disp_nrunnable == 0) { 1679 dp->disp_max_unbound_pri = -1; 1680 dp->disp_maxrunpri = -1; 1681 } else if (tpri == dp->disp_maxrunpri) { 1682 int ipri; 1683 1684 ipri = bt_gethighbit(dp->disp_qactmap, 1685 dp->disp_maxrunpri >> BT_ULSHIFT); 1686 if (ipri < dp->disp_max_unbound_pri) 1687 dp->disp_max_unbound_pri = ipri; 1688 dp->disp_maxrunpri = ipri; 1689 } 1690 } 1691 tp->t_link = NULL; 1692 THREAD_TRANSITION(tp); /* put in intermediate state */ 1693 return (1); 1694 } 1695 1696 1697 /* 1698 * dq_sruninc and dq_srundec are public functions for 1699 * incrementing/decrementing the sruncnts when a thread on 1700 * a dispatcher queue is made schedulable/unschedulable by 1701 * resetting the TS_LOAD flag. 1702 * 1703 * The caller MUST have the thread lock and therefore the dispatcher 1704 * queue lock so that the operation which changes 1705 * the flag, the operation that checks the status of the thread to 1706 * determine if it's on a disp queue AND the call to this function 1707 * are one atomic operation with respect to interrupts. 1708 */ 1709 1710 /* 1711 * Called by sched AFTER TS_LOAD flag is set on a swapped, runnable thread. 1712 */ 1713 void 1714 dq_sruninc(kthread_t *t) 1715 { 1716 ASSERT(t->t_state == TS_RUN); 1717 ASSERT(t->t_schedflag & TS_LOAD); 1718 1719 THREAD_TRANSITION(t); 1720 setfrontdq(t); 1721 } 1722 1723 /* 1724 * See comment on calling conventions above. 1725 * Called by sched BEFORE TS_LOAD flag is cleared on a runnable thread. 1726 */ 1727 void 1728 dq_srundec(kthread_t *t) 1729 { 1730 ASSERT(t->t_schedflag & TS_LOAD); 1731 1732 (void) dispdeq(t); 1733 disp_swapped_enq(t); 1734 } 1735 1736 /* 1737 * Change the dispatcher lock of thread to the "swapped_lock" 1738 * and return with thread lock still held. 1739 * 1740 * Called with thread_lock held, in transition state, and at high spl. 1741 */ 1742 void 1743 disp_swapped_enq(kthread_t *tp) 1744 { 1745 ASSERT(THREAD_LOCK_HELD(tp)); 1746 ASSERT(tp->t_schedflag & TS_LOAD); 1747 1748 switch (tp->t_state) { 1749 case TS_RUN: 1750 disp_lock_enter_high(&swapped_lock); 1751 THREAD_SWAP(tp, &swapped_lock); /* set TS_RUN state and lock */ 1752 break; 1753 case TS_ONPROC: 1754 disp_lock_enter_high(&swapped_lock); 1755 THREAD_TRANSITION(tp); 1756 wake_sched_sec = 1; /* tell clock to wake sched */ 1757 THREAD_SWAP(tp, &swapped_lock); /* set TS_RUN state and lock */ 1758 break; 1759 default: 1760 panic("disp_swapped: tp: %p bad t_state", (void *)tp); 1761 } 1762 } 1763 1764 /* 1765 * This routine is called by setbackdq/setfrontdq if the thread is 1766 * not loaded or loaded and on the swap queue. 1767 * 1768 * Thread state TS_SLEEP implies that a swapped thread 1769 * has been woken up and needs to be swapped in by the swapper. 1770 * 1771 * Thread state TS_RUN, it implies that the priority of a swapped 1772 * thread is being increased by scheduling class (e.g. ts_update). 1773 */ 1774 static void 1775 disp_swapped_setrun(kthread_t *tp) 1776 { 1777 ASSERT(THREAD_LOCK_HELD(tp)); 1778 ASSERT((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD); 1779 1780 switch (tp->t_state) { 1781 case TS_SLEEP: 1782 disp_lock_enter_high(&swapped_lock); 1783 /* 1784 * Wakeup sched immediately (i.e., next tick) if the 1785 * thread priority is above maxclsyspri. 1786 */ 1787 if (DISP_PRIO(tp) > maxclsyspri) 1788 wake_sched = 1; 1789 else 1790 wake_sched_sec = 1; 1791 THREAD_RUN(tp, &swapped_lock); /* set TS_RUN state and lock */ 1792 break; 1793 case TS_RUN: /* called from ts_update */ 1794 break; 1795 default: 1796 panic("disp_swapped_setrun: tp: %p bad t_state", (void *)tp); 1797 } 1798 } 1799 1800 /* 1801 * Make a thread give up its processor. Find the processor on 1802 * which this thread is executing, and have that processor 1803 * preempt. 1804 * 1805 * We allow System Duty Cycle (SDC) threads to be preempted even if 1806 * they are running at kernel priorities. To implement this, we always 1807 * set cpu_kprunrun; this ensures preempt() will be called. Since SDC 1808 * calls cpu_surrender() very often, we only preempt if there is anyone 1809 * competing with us. 1810 */ 1811 void 1812 cpu_surrender(kthread_t *tp) 1813 { 1814 cpu_t *cpup; 1815 int max_pri; 1816 int max_run_pri; 1817 klwp_t *lwp; 1818 1819 ASSERT(THREAD_LOCK_HELD(tp)); 1820 1821 if (tp->t_state != TS_ONPROC) 1822 return; 1823 cpup = tp->t_disp_queue->disp_cpu; /* CPU thread dispatched to */ 1824 max_pri = cpup->cpu_disp->disp_maxrunpri; /* best pri of that CPU */ 1825 max_run_pri = CP_MAXRUNPRI(cpup->cpu_part); 1826 if (max_pri < max_run_pri) 1827 max_pri = max_run_pri; 1828 1829 if (tp->t_cid == sysdccid) { 1830 uint_t t_pri = DISP_PRIO(tp); 1831 if (t_pri > max_pri) 1832 return; /* we are not competing w/ anyone */ 1833 cpup->cpu_runrun = cpup->cpu_kprunrun = 1; 1834 } else { 1835 cpup->cpu_runrun = 1; 1836 if (max_pri >= kpreemptpri && cpup->cpu_kprunrun == 0) { 1837 cpup->cpu_kprunrun = 1; 1838 } 1839 } 1840 1841 /* 1842 * Propagate cpu_runrun, and cpu_kprunrun to global visibility. 1843 */ 1844 membar_enter(); 1845 1846 DTRACE_SCHED1(surrender, kthread_t *, tp); 1847 1848 /* 1849 * Make the target thread take an excursion through trap() 1850 * to do preempt() (unless we're already in trap or post_syscall, 1851 * calling cpu_surrender via CL_TRAPRET). 1852 */ 1853 if (tp != curthread || (lwp = tp->t_lwp) == NULL || 1854 lwp->lwp_state != LWP_USER) { 1855 aston(tp); 1856 if (cpup != CPU) 1857 poke_cpu(cpup->cpu_id); 1858 } 1859 TRACE_2(TR_FAC_DISP, TR_CPU_SURRENDER, 1860 "cpu_surrender:tid %p cpu %p", tp, cpup); 1861 } 1862 1863 /* 1864 * Commit to and ratify a scheduling decision 1865 */ 1866 /*ARGSUSED*/ 1867 static kthread_t * 1868 disp_ratify(kthread_t *tp, disp_t *kpq) 1869 { 1870 pri_t tpri, maxpri; 1871 pri_t maxkpri; 1872 cpu_t *cpup; 1873 1874 ASSERT(tp != NULL); 1875 /* 1876 * Commit to, then ratify scheduling decision 1877 */ 1878 cpup = CPU; 1879 if (cpup->cpu_runrun != 0) 1880 cpup->cpu_runrun = 0; 1881 if (cpup->cpu_kprunrun != 0) 1882 cpup->cpu_kprunrun = 0; 1883 if (cpup->cpu_chosen_level != -1) 1884 cpup->cpu_chosen_level = -1; 1885 membar_enter(); 1886 tpri = DISP_PRIO(tp); 1887 maxpri = cpup->cpu_disp->disp_maxrunpri; 1888 maxkpri = kpq->disp_maxrunpri; 1889 if (maxpri < maxkpri) 1890 maxpri = maxkpri; 1891 if (tpri < maxpri) { 1892 /* 1893 * should have done better 1894 * put this one back and indicate to try again 1895 */ 1896 cpup->cpu_dispthread = curthread; /* fixup dispthread */ 1897 cpup->cpu_dispatch_pri = DISP_PRIO(curthread); 1898 thread_lock_high(tp); 1899 THREAD_TRANSITION(tp); 1900 setfrontdq(tp); 1901 thread_unlock_nopreempt(tp); 1902 1903 tp = NULL; 1904 } 1905 return (tp); 1906 } 1907 1908 /* 1909 * See if there is any work on the dispatcher queue for other CPUs. 1910 * If there is, dequeue the best thread and return. 1911 */ 1912 static kthread_t * 1913 disp_getwork(cpu_t *cp) 1914 { 1915 cpu_t *ocp; /* other CPU */ 1916 cpu_t *ocp_start; 1917 cpu_t *tcp; /* target local CPU */ 1918 kthread_t *tp; 1919 kthread_t *retval = NULL; 1920 pri_t maxpri; 1921 disp_t *kpq; /* kp queue for this partition */ 1922 lpl_t *lpl, *lpl_leaf; 1923 int leafidx, startidx; 1924 hrtime_t stealtime; 1925 lgrp_id_t local_id; 1926 1927 maxpri = -1; 1928 tcp = NULL; 1929 1930 kpq = &cp->cpu_part->cp_kp_queue; 1931 while (kpq->disp_maxrunpri >= 0) { 1932 /* 1933 * Try to take a thread from the kp_queue. 1934 */ 1935 tp = (disp_getbest(kpq)); 1936 if (tp) 1937 return (disp_ratify(tp, kpq)); 1938 } 1939 1940 kpreempt_disable(); /* protect the cpu_active list */ 1941 1942 /* 1943 * Try to find something to do on another CPU's run queue. 1944 * Loop through all other CPUs looking for the one with the highest 1945 * priority unbound thread. 1946 * 1947 * On NUMA machines, the partition's CPUs are consulted in order of 1948 * distance from the current CPU. This way, the first available 1949 * work found is also the closest, and will suffer the least 1950 * from being migrated. 1951 */ 1952 lpl = lpl_leaf = cp->cpu_lpl; 1953 local_id = lpl_leaf->lpl_lgrpid; 1954 leafidx = startidx = 0; 1955 1956 /* 1957 * This loop traverses the lpl hierarchy. Higher level lpls represent 1958 * broader levels of locality 1959 */ 1960 do { 1961 /* This loop iterates over the lpl's leaves */ 1962 do { 1963 if (lpl_leaf != cp->cpu_lpl) 1964 ocp = lpl_leaf->lpl_cpus; 1965 else 1966 ocp = cp->cpu_next_lpl; 1967 1968 /* This loop iterates over the CPUs in the leaf */ 1969 ocp_start = ocp; 1970 do { 1971 pri_t pri; 1972 1973 ASSERT(CPU_ACTIVE(ocp)); 1974 1975 /* 1976 * End our stroll around this lpl if: 1977 * 1978 * - Something became runnable on the local 1979 * queue...which also ends our stroll around 1980 * the partition. 1981 * 1982 * - We happen across another idle CPU. 1983 * Since it is patrolling the next portion 1984 * of the lpl's list (assuming it's not 1985 * halted, or busy servicing an interrupt), 1986 * move to the next higher level of locality. 1987 */ 1988 if (cp->cpu_disp->disp_nrunnable != 0) { 1989 kpreempt_enable(); 1990 return (NULL); 1991 } 1992 if (ocp->cpu_dispatch_pri == -1) { 1993 if (ocp->cpu_disp_flags & 1994 CPU_DISP_HALTED || 1995 ocp->cpu_intr_actv != 0) 1996 continue; 1997 else 1998 goto next_level; 1999 } 2000 2001 /* 2002 * If there's only one thread and the CPU 2003 * is in the middle of a context switch, 2004 * or it's currently running the idle thread, 2005 * don't steal it. 2006 */ 2007 if ((ocp->cpu_disp_flags & 2008 CPU_DISP_DONTSTEAL) && 2009 ocp->cpu_disp->disp_nrunnable == 1) 2010 continue; 2011 2012 pri = ocp->cpu_disp->disp_max_unbound_pri; 2013 if (pri > maxpri) { 2014 /* 2015 * Don't steal threads that we attempted 2016 * to steal recently until they're ready 2017 * to be stolen again. 2018 */ 2019 stealtime = ocp->cpu_disp->disp_steal; 2020 if (stealtime == 0 || 2021 stealtime - gethrtime() <= 0) { 2022 maxpri = pri; 2023 tcp = ocp; 2024 } else { 2025 /* 2026 * Don't update tcp, just set 2027 * the retval to T_DONTSTEAL, so 2028 * that if no acceptable CPUs 2029 * are found the return value 2030 * will be T_DONTSTEAL rather 2031 * then NULL. 2032 */ 2033 retval = T_DONTSTEAL; 2034 } 2035 } 2036 } while ((ocp = ocp->cpu_next_lpl) != ocp_start); 2037 2038 /* 2039 * Iterate to the next leaf lpl in the resource set 2040 * at this level of locality. If we hit the end of 2041 * the set, wrap back around to the beginning. 2042 * 2043 * Note: This iteration is NULL terminated for a reason 2044 * see lpl_topo_bootstrap() in lgrp.c for details. 2045 */ 2046 if ((lpl_leaf = lpl->lpl_rset[++leafidx]) == NULL) { 2047 leafidx = 0; 2048 lpl_leaf = lpl->lpl_rset[leafidx]; 2049 } 2050 } while (leafidx != startidx); 2051 2052 next_level: 2053 /* 2054 * Expand the search to include farther away CPUs (next 2055 * locality level). The closer CPUs that have already been 2056 * checked will be checked again. In doing so, idle CPUs 2057 * will tend to be more aggresive about stealing from CPUs 2058 * that are closer (since the closer CPUs will be considered 2059 * more often). 2060 * Begin at this level with the CPUs local leaf lpl. 2061 */ 2062 if ((lpl = lpl->lpl_parent) != NULL) { 2063 leafidx = startidx = lpl->lpl_id2rset[local_id]; 2064 lpl_leaf = lpl->lpl_rset[leafidx]; 2065 } 2066 } while (!tcp && lpl); 2067 2068 kpreempt_enable(); 2069 2070 /* 2071 * If another queue looks good, and there is still nothing on 2072 * the local queue, try to transfer one or more threads 2073 * from it to our queue. 2074 */ 2075 if (tcp && cp->cpu_disp->disp_nrunnable == 0) { 2076 tp = disp_getbest(tcp->cpu_disp); 2077 if (tp == NULL || tp == T_DONTSTEAL) 2078 return (tp); 2079 return (disp_ratify(tp, kpq)); 2080 } 2081 return (retval); 2082 } 2083 2084 2085 /* 2086 * disp_fix_unbound_pri() 2087 * Determines the maximum priority of unbound threads on the queue. 2088 * The priority is kept for the queue, but is only increased, never 2089 * reduced unless some CPU is looking for something on that queue. 2090 * 2091 * The priority argument is the known upper limit. 2092 * 2093 * Perhaps this should be kept accurately, but that probably means 2094 * separate bitmaps for bound and unbound threads. Since only idled 2095 * CPUs will have to do this recalculation, it seems better this way. 2096 */ 2097 static void 2098 disp_fix_unbound_pri(disp_t *dp, pri_t pri) 2099 { 2100 kthread_t *tp; 2101 dispq_t *dq; 2102 ulong_t *dqactmap = dp->disp_qactmap; 2103 ulong_t mapword; 2104 int wx; 2105 2106 ASSERT(DISP_LOCK_HELD(&dp->disp_lock)); 2107 2108 ASSERT(pri >= 0); /* checked by caller */ 2109 2110 /* 2111 * Start the search at the next lowest priority below the supplied 2112 * priority. This depends on the bitmap implementation. 2113 */ 2114 do { 2115 wx = pri >> BT_ULSHIFT; /* index of word in map */ 2116 2117 /* 2118 * Form mask for all lower priorities in the word. 2119 */ 2120 mapword = dqactmap[wx] & (BT_BIW(pri) - 1); 2121 2122 /* 2123 * Get next lower active priority. 2124 */ 2125 if (mapword != 0) { 2126 pri = (wx << BT_ULSHIFT) + highbit(mapword) - 1; 2127 } else if (wx > 0) { 2128 pri = bt_gethighbit(dqactmap, wx - 1); /* sign extend */ 2129 if (pri < 0) 2130 break; 2131 } else { 2132 pri = -1; 2133 break; 2134 } 2135 2136 /* 2137 * Search the queue for unbound, runnable threads. 2138 */ 2139 dq = &dp->disp_q[pri]; 2140 tp = dq->dq_first; 2141 2142 while (tp && (tp->t_bound_cpu || tp->t_weakbound_cpu)) { 2143 tp = tp->t_link; 2144 } 2145 2146 /* 2147 * If a thread was found, set the priority and return. 2148 */ 2149 } while (tp == NULL); 2150 2151 /* 2152 * pri holds the maximum unbound thread priority or -1. 2153 */ 2154 if (dp->disp_max_unbound_pri != pri) 2155 dp->disp_max_unbound_pri = pri; 2156 } 2157 2158 /* 2159 * disp_adjust_unbound_pri() - thread is becoming unbound, so we should 2160 * check if the CPU to which is was previously bound should have 2161 * its disp_max_unbound_pri increased. 2162 */ 2163 void 2164 disp_adjust_unbound_pri(kthread_t *tp) 2165 { 2166 disp_t *dp; 2167 pri_t tpri; 2168 2169 ASSERT(THREAD_LOCK_HELD(tp)); 2170 2171 /* 2172 * Don't do anything if the thread is not bound, or 2173 * currently not runnable or swapped out. 2174 */ 2175 if (tp->t_bound_cpu == NULL || 2176 tp->t_state != TS_RUN || 2177 tp->t_schedflag & TS_ON_SWAPQ) 2178 return; 2179 2180 tpri = DISP_PRIO(tp); 2181 dp = tp->t_bound_cpu->cpu_disp; 2182 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 2183 if (tpri > dp->disp_max_unbound_pri) 2184 dp->disp_max_unbound_pri = tpri; 2185 } 2186 2187 /* 2188 * disp_getbest() 2189 * De-queue the highest priority unbound runnable thread. 2190 * Returns with the thread unlocked and onproc but at splhigh (like disp()). 2191 * Returns NULL if nothing found. 2192 * Returns T_DONTSTEAL if the thread was not stealable. 2193 * so that the caller will try again later. 2194 * 2195 * Passed a pointer to a dispatch queue not associated with this CPU, and 2196 * its type. 2197 */ 2198 static kthread_t * 2199 disp_getbest(disp_t *dp) 2200 { 2201 kthread_t *tp; 2202 dispq_t *dq; 2203 pri_t pri; 2204 cpu_t *cp, *tcp; 2205 boolean_t allbound; 2206 2207 disp_lock_enter(&dp->disp_lock); 2208 2209 /* 2210 * If there is nothing to run, or the CPU is in the middle of a 2211 * context switch of the only thread, return NULL. 2212 */ 2213 tcp = dp->disp_cpu; 2214 cp = CPU; 2215 pri = dp->disp_max_unbound_pri; 2216 if (pri == -1 || 2217 (tcp != NULL && (tcp->cpu_disp_flags & CPU_DISP_DONTSTEAL) && 2218 tcp->cpu_disp->disp_nrunnable == 1)) { 2219 disp_lock_exit_nopreempt(&dp->disp_lock); 2220 return (NULL); 2221 } 2222 2223 dq = &dp->disp_q[pri]; 2224 2225 2226 /* 2227 * Assume that all threads are bound on this queue, and change it 2228 * later when we find out that it is not the case. 2229 */ 2230 allbound = B_TRUE; 2231 for (tp = dq->dq_first; tp != NULL; tp = tp->t_link) { 2232 hrtime_t now, nosteal, rqtime; 2233 2234 /* 2235 * Skip over bound threads which could be here even 2236 * though disp_max_unbound_pri indicated this level. 2237 */ 2238 if (tp->t_bound_cpu || tp->t_weakbound_cpu) 2239 continue; 2240 2241 /* 2242 * We've got some unbound threads on this queue, so turn 2243 * the allbound flag off now. 2244 */ 2245 allbound = B_FALSE; 2246 2247 /* 2248 * The thread is a candidate for stealing from its run queue. We 2249 * don't want to steal threads that became runnable just a 2250 * moment ago. This improves CPU affinity for threads that get 2251 * preempted for short periods of time and go back on the run 2252 * queue. 2253 * 2254 * We want to let it stay on its run queue if it was only placed 2255 * there recently and it was running on the same CPU before that 2256 * to preserve its cache investment. For the thread to remain on 2257 * its run queue, ALL of the following conditions must be 2258 * satisfied: 2259 * 2260 * - the disp queue should not be the kernel preemption queue 2261 * - delayed idle stealing should not be disabled 2262 * - nosteal_nsec should be non-zero 2263 * - it should run with user priority 2264 * - it should be on the run queue of the CPU where it was 2265 * running before being placed on the run queue 2266 * - it should be the only thread on the run queue (to prevent 2267 * extra scheduling latency for other threads) 2268 * - it should sit on the run queue for less than per-chip 2269 * nosteal interval or global nosteal interval 2270 * - in case of CPUs with shared cache it should sit in a run 2271 * queue of a CPU from a different chip 2272 * 2273 * The checks are arranged so that the ones that are faster are 2274 * placed earlier. 2275 */ 2276 if (tcp == NULL || 2277 pri >= minclsyspri || 2278 tp->t_cpu != tcp) 2279 break; 2280 2281 /* 2282 * Steal immediately if, due to CMT processor architecture 2283 * migraiton between cp and tcp would incur no performance 2284 * penalty. 2285 */ 2286 if (pg_cmt_can_migrate(cp, tcp)) 2287 break; 2288 2289 nosteal = nosteal_nsec; 2290 if (nosteal == 0) 2291 break; 2292 2293 /* 2294 * Calculate time spent sitting on run queue 2295 */ 2296 now = gethrtime_unscaled(); 2297 rqtime = now - tp->t_waitrq; 2298 scalehrtime(&rqtime); 2299 2300 /* 2301 * Steal immediately if the time spent on this run queue is more 2302 * than allowed nosteal delay. 2303 * 2304 * Negative rqtime check is needed here to avoid infinite 2305 * stealing delays caused by unlikely but not impossible 2306 * drifts between CPU times on different CPUs. 2307 */ 2308 if (rqtime > nosteal || rqtime < 0) 2309 break; 2310 2311 DTRACE_PROBE4(nosteal, kthread_t *, tp, 2312 cpu_t *, tcp, cpu_t *, cp, hrtime_t, rqtime); 2313 scalehrtime(&now); 2314 /* 2315 * Calculate when this thread becomes stealable 2316 */ 2317 now += (nosteal - rqtime); 2318 2319 /* 2320 * Calculate time when some thread becomes stealable 2321 */ 2322 if (now < dp->disp_steal) 2323 dp->disp_steal = now; 2324 } 2325 2326 /* 2327 * If there were no unbound threads on this queue, find the queue 2328 * where they are and then return later. The value of 2329 * disp_max_unbound_pri is not always accurate because it isn't 2330 * reduced until another idle CPU looks for work. 2331 */ 2332 if (allbound) 2333 disp_fix_unbound_pri(dp, pri); 2334 2335 /* 2336 * If we reached the end of the queue and found no unbound threads 2337 * then return NULL so that other CPUs will be considered. If there 2338 * are unbound threads but they cannot yet be stolen, then 2339 * return T_DONTSTEAL and try again later. 2340 */ 2341 if (tp == NULL) { 2342 disp_lock_exit_nopreempt(&dp->disp_lock); 2343 return (allbound ? NULL : T_DONTSTEAL); 2344 } 2345 2346 /* 2347 * Found a runnable, unbound thread, so remove it from queue. 2348 * dispdeq() requires that we have the thread locked, and we do, 2349 * by virtue of holding the dispatch queue lock. dispdeq() will 2350 * put the thread in transition state, thereby dropping the dispq 2351 * lock. 2352 */ 2353 2354 #ifdef DEBUG 2355 { 2356 int thread_was_on_queue; 2357 2358 thread_was_on_queue = dispdeq(tp); /* drops disp_lock */ 2359 ASSERT(thread_was_on_queue); 2360 } 2361 2362 #else /* DEBUG */ 2363 (void) dispdeq(tp); /* drops disp_lock */ 2364 #endif /* DEBUG */ 2365 2366 /* 2367 * Reset the disp_queue steal time - we do not know what is the smallest 2368 * value across the queue is. 2369 */ 2370 dp->disp_steal = 0; 2371 2372 tp->t_schedflag |= TS_DONT_SWAP; 2373 2374 /* 2375 * Setup thread to run on the current CPU. 2376 */ 2377 tp->t_disp_queue = cp->cpu_disp; 2378 2379 cp->cpu_dispthread = tp; /* protected by spl only */ 2380 cp->cpu_dispatch_pri = pri; 2381 2382 /* 2383 * There can be a memory synchronization race between disp_getbest() 2384 * and disp_ratify() vs cpu_resched() where cpu_resched() is trying 2385 * to preempt the current thread to run the enqueued thread while 2386 * disp_getbest() and disp_ratify() are changing the current thread 2387 * to the stolen thread. This may lead to a situation where 2388 * cpu_resched() tries to preempt the wrong thread and the 2389 * stolen thread continues to run on the CPU which has been tagged 2390 * for preemption. 2391 * Later the clock thread gets enqueued but doesn't get to run on the 2392 * CPU causing the system to hang. 2393 * 2394 * To avoid this, grabbing and dropping the disp_lock (which does 2395 * a memory barrier) is needed to synchronize the execution of 2396 * cpu_resched() with disp_getbest() and disp_ratify() and 2397 * synchronize the memory read and written by cpu_resched(), 2398 * disp_getbest(), and disp_ratify() with each other. 2399 * (see CR#6482861 for more details). 2400 */ 2401 disp_lock_enter_high(&cp->cpu_disp->disp_lock); 2402 disp_lock_exit_high(&cp->cpu_disp->disp_lock); 2403 2404 ASSERT(pri == DISP_PRIO(tp)); 2405 2406 DTRACE_PROBE3(steal, kthread_t *, tp, cpu_t *, tcp, cpu_t *, cp); 2407 2408 thread_onproc(tp, cp); /* set t_state to TS_ONPROC */ 2409 2410 /* 2411 * Return with spl high so that swtch() won't need to raise it. 2412 * The disp_lock was dropped by dispdeq(). 2413 */ 2414 2415 return (tp); 2416 } 2417 2418 /* 2419 * disp_bound_common() - common routine for higher level functions 2420 * that check for bound threads under certain conditions. 2421 * If 'threadlistsafe' is set then there is no need to acquire 2422 * pidlock to stop the thread list from changing (eg, if 2423 * disp_bound_* is called with cpus paused). 2424 */ 2425 static int 2426 disp_bound_common(cpu_t *cp, int threadlistsafe, int flag) 2427 { 2428 int found = 0; 2429 kthread_t *tp; 2430 2431 ASSERT(flag); 2432 2433 if (!threadlistsafe) 2434 mutex_enter(&pidlock); 2435 tp = curthread; /* faster than allthreads */ 2436 do { 2437 if (tp->t_state != TS_FREE) { 2438 /* 2439 * If an interrupt thread is busy, but the 2440 * caller doesn't care (i.e. BOUND_INTR is off), 2441 * then just ignore it and continue through. 2442 */ 2443 if ((tp->t_flag & T_INTR_THREAD) && 2444 !(flag & BOUND_INTR)) 2445 continue; 2446 2447 /* 2448 * Skip the idle thread for the CPU 2449 * we're about to set offline. 2450 */ 2451 if (tp == cp->cpu_idle_thread) 2452 continue; 2453 2454 /* 2455 * Skip the pause thread for the CPU 2456 * we're about to set offline. 2457 */ 2458 if (tp == cp->cpu_pause_thread) 2459 continue; 2460 2461 if ((flag & BOUND_CPU) && 2462 (tp->t_bound_cpu == cp || 2463 tp->t_bind_cpu == cp->cpu_id || 2464 tp->t_weakbound_cpu == cp)) { 2465 found = 1; 2466 break; 2467 } 2468 2469 if ((flag & BOUND_PARTITION) && 2470 (tp->t_cpupart == cp->cpu_part)) { 2471 found = 1; 2472 break; 2473 } 2474 } 2475 } while ((tp = tp->t_next) != curthread && found == 0); 2476 if (!threadlistsafe) 2477 mutex_exit(&pidlock); 2478 return (found); 2479 } 2480 2481 /* 2482 * disp_bound_threads - return nonzero if threads are bound to the processor. 2483 * Called infrequently. Keep this simple. 2484 * Includes threads that are asleep or stopped but not onproc. 2485 */ 2486 int 2487 disp_bound_threads(cpu_t *cp, int threadlistsafe) 2488 { 2489 return (disp_bound_common(cp, threadlistsafe, BOUND_CPU)); 2490 } 2491 2492 /* 2493 * disp_bound_anythreads - return nonzero if _any_ threads are bound 2494 * to the given processor, including interrupt threads. 2495 */ 2496 int 2497 disp_bound_anythreads(cpu_t *cp, int threadlistsafe) 2498 { 2499 return (disp_bound_common(cp, threadlistsafe, BOUND_CPU | BOUND_INTR)); 2500 } 2501 2502 /* 2503 * disp_bound_partition - return nonzero if threads are bound to the same 2504 * partition as the processor. 2505 * Called infrequently. Keep this simple. 2506 * Includes threads that are asleep or stopped but not onproc. 2507 */ 2508 int 2509 disp_bound_partition(cpu_t *cp, int threadlistsafe) 2510 { 2511 return (disp_bound_common(cp, threadlistsafe, BOUND_PARTITION)); 2512 } 2513 2514 /* 2515 * disp_cpu_inactive - make a CPU inactive by moving all of its unbound 2516 * threads to other CPUs. 2517 */ 2518 void 2519 disp_cpu_inactive(cpu_t *cp) 2520 { 2521 kthread_t *tp; 2522 disp_t *dp = cp->cpu_disp; 2523 dispq_t *dq; 2524 pri_t pri; 2525 int wasonq; 2526 2527 disp_lock_enter(&dp->disp_lock); 2528 while ((pri = dp->disp_max_unbound_pri) != -1) { 2529 dq = &dp->disp_q[pri]; 2530 tp = dq->dq_first; 2531 2532 /* 2533 * Skip over bound threads. 2534 */ 2535 while (tp != NULL && tp->t_bound_cpu != NULL) { 2536 tp = tp->t_link; 2537 } 2538 2539 if (tp == NULL) { 2540 /* disp_max_unbound_pri must be inaccurate, so fix it */ 2541 disp_fix_unbound_pri(dp, pri); 2542 continue; 2543 } 2544 2545 wasonq = dispdeq(tp); /* drops disp_lock */ 2546 ASSERT(wasonq); 2547 ASSERT(tp->t_weakbound_cpu == NULL); 2548 2549 setbackdq(tp); 2550 /* 2551 * Called from cpu_offline: 2552 * 2553 * cp has already been removed from the list of active cpus 2554 * and tp->t_cpu has been changed so there is no risk of 2555 * tp ending up back on cp. 2556 * 2557 * Called from cpupart_move_cpu: 2558 * 2559 * The cpu has moved to a new cpupart. Any threads that 2560 * were on it's dispatch queues before the move remain 2561 * in the old partition and can't run in the new partition. 2562 */ 2563 ASSERT(tp->t_cpu != cp); 2564 thread_unlock(tp); 2565 2566 disp_lock_enter(&dp->disp_lock); 2567 } 2568 disp_lock_exit(&dp->disp_lock); 2569 } 2570 2571 /* 2572 * disp_lowpri_cpu - find CPU running the lowest priority thread. 2573 * The hint passed in is used as a starting point so we don't favor 2574 * CPU 0 or any other CPU. The caller should pass in the most recently 2575 * used CPU for the thread. 2576 * 2577 * The lgroup and priority are used to determine the best CPU to run on 2578 * in a NUMA machine. The lgroup specifies which CPUs are closest while 2579 * the thread priority will indicate whether the thread will actually run 2580 * there. To pick the best CPU, the CPUs inside and outside of the given 2581 * lgroup which are running the lowest priority threads are found. The 2582 * remote CPU is chosen only if the thread will not run locally on a CPU 2583 * within the lgroup, but will run on the remote CPU. If the thread 2584 * cannot immediately run on any CPU, the best local CPU will be chosen. 2585 * 2586 * The lpl specified also identifies the cpu partition from which 2587 * disp_lowpri_cpu should select a CPU. 2588 * 2589 * curcpu is used to indicate that disp_lowpri_cpu is being called on 2590 * behalf of the current thread. (curthread is looking for a new cpu) 2591 * In this case, cpu_dispatch_pri for this thread's cpu should be 2592 * ignored. 2593 * 2594 * If a cpu is the target of an offline request then try to avoid it. 2595 * 2596 * This function must be called at either high SPL, or with preemption 2597 * disabled, so that the "hint" CPU cannot be removed from the online 2598 * CPU list while we are traversing it. 2599 */ 2600 cpu_t * 2601 disp_lowpri_cpu(cpu_t *hint, lpl_t *lpl, pri_t tpri, cpu_t *curcpu) 2602 { 2603 cpu_t *bestcpu; 2604 cpu_t *besthomecpu; 2605 cpu_t *cp, *cpstart; 2606 2607 pri_t bestpri; 2608 pri_t cpupri; 2609 2610 klgrpset_t done; 2611 klgrpset_t cur_set; 2612 2613 lpl_t *lpl_iter, *lpl_leaf; 2614 int i; 2615 2616 /* 2617 * Scan for a CPU currently running the lowest priority thread. 2618 * Cannot get cpu_lock here because it is adaptive. 2619 * We do not require lock on CPU list. 2620 */ 2621 ASSERT(hint != NULL); 2622 ASSERT(lpl != NULL); 2623 ASSERT(lpl->lpl_ncpu > 0); 2624 2625 /* 2626 * First examine local CPUs. Note that it's possible the hint CPU 2627 * passed in in remote to the specified home lgroup. If our priority 2628 * isn't sufficient enough such that we can run immediately at home, 2629 * then examine CPUs remote to our home lgroup. 2630 * We would like to give preference to CPUs closest to "home". 2631 * If we can't find a CPU where we'll run at a given level 2632 * of locality, we expand our search to include the next level. 2633 */ 2634 bestcpu = besthomecpu = NULL; 2635 klgrpset_clear(done); 2636 /* start with lpl we were passed */ 2637 2638 lpl_iter = lpl; 2639 2640 do { 2641 2642 bestpri = SHRT_MAX; 2643 klgrpset_clear(cur_set); 2644 2645 for (i = 0; i < lpl_iter->lpl_nrset; i++) { 2646 lpl_leaf = lpl_iter->lpl_rset[i]; 2647 if (klgrpset_ismember(done, lpl_leaf->lpl_lgrpid)) 2648 continue; 2649 2650 klgrpset_add(cur_set, lpl_leaf->lpl_lgrpid); 2651 2652 if (hint->cpu_lpl == lpl_leaf) 2653 cp = cpstart = hint; 2654 else 2655 cp = cpstart = lpl_leaf->lpl_cpus; 2656 2657 do { 2658 if (cp == curcpu) 2659 cpupri = -1; 2660 else if (cp == cpu_inmotion) 2661 cpupri = SHRT_MAX; 2662 else 2663 cpupri = cp->cpu_dispatch_pri; 2664 if (cp->cpu_disp->disp_maxrunpri > cpupri) 2665 cpupri = cp->cpu_disp->disp_maxrunpri; 2666 if (cp->cpu_chosen_level > cpupri) 2667 cpupri = cp->cpu_chosen_level; 2668 if (cpupri < bestpri) { 2669 if (CPU_IDLING(cpupri)) { 2670 ASSERT((cp->cpu_flags & 2671 CPU_QUIESCED) == 0); 2672 return (cp); 2673 } 2674 bestcpu = cp; 2675 bestpri = cpupri; 2676 } 2677 } while ((cp = cp->cpu_next_lpl) != cpstart); 2678 } 2679 2680 if (bestcpu && (tpri > bestpri)) { 2681 ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0); 2682 return (bestcpu); 2683 } 2684 if (besthomecpu == NULL) 2685 besthomecpu = bestcpu; 2686 /* 2687 * Add the lgrps we just considered to the "done" set 2688 */ 2689 klgrpset_or(done, cur_set); 2690 2691 } while ((lpl_iter = lpl_iter->lpl_parent) != NULL); 2692 2693 /* 2694 * The specified priority isn't high enough to run immediately 2695 * anywhere, so just return the best CPU from the home lgroup. 2696 */ 2697 ASSERT((besthomecpu->cpu_flags & CPU_QUIESCED) == 0); 2698 return (besthomecpu); 2699 } 2700 2701 /* 2702 * This routine provides the generic idle cpu function for all processors. 2703 * If a processor has some specific code to execute when idle (say, to stop 2704 * the pipeline and save power) then that routine should be defined in the 2705 * processors specific code (module_xx.c) and the global variable idle_cpu 2706 * set to that function. 2707 */ 2708 static void 2709 generic_idle_cpu(void) 2710 { 2711 } 2712 2713 /*ARGSUSED*/ 2714 static void 2715 generic_enq_thread(cpu_t *cpu, int bound) 2716 { 2717 } 2718