1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright 2019 Joyent, Inc. 28 */ 29 30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 31 /* All Rights Reserved */ 32 33 34 #include <sys/types.h> 35 #include <sys/param.h> 36 #include <sys/sysmacros.h> 37 #include <sys/signal.h> 38 #include <sys/user.h> 39 #include <sys/systm.h> 40 #include <sys/sysinfo.h> 41 #include <sys/var.h> 42 #include <sys/errno.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/inline.h> 46 #include <sys/disp.h> 47 #include <sys/class.h> 48 #include <sys/bitmap.h> 49 #include <sys/kmem.h> 50 #include <sys/cpuvar.h> 51 #include <sys/vtrace.h> 52 #include <sys/tnf.h> 53 #include <sys/cpupart.h> 54 #include <sys/lgrp.h> 55 #include <sys/pg.h> 56 #include <sys/cmt.h> 57 #include <sys/bitset.h> 58 #include <sys/schedctl.h> 59 #include <sys/atomic.h> 60 #include <sys/dtrace.h> 61 #include <sys/sdt.h> 62 #include <sys/archsystm.h> 63 #include <sys/smt.h> 64 65 #include <vm/as.h> 66 67 #define BOUND_CPU 0x1 68 #define BOUND_PARTITION 0x2 69 #define BOUND_INTR 0x4 70 71 /* Dispatch queue allocation structure and functions */ 72 struct disp_queue_info { 73 disp_t *dp; 74 dispq_t *olddispq; 75 dispq_t *newdispq; 76 ulong_t *olddqactmap; 77 ulong_t *newdqactmap; 78 int oldnglobpris; 79 }; 80 static void disp_dq_alloc(struct disp_queue_info *dptr, int numpris, 81 disp_t *dp); 82 static void disp_dq_assign(struct disp_queue_info *dptr, int numpris); 83 static void disp_dq_free(struct disp_queue_info *dptr); 84 85 /* platform-specific routine to call when processor is idle */ 86 static void generic_idle_cpu(); 87 void (*idle_cpu)() = generic_idle_cpu; 88 89 /* routines invoked when a CPU enters/exits the idle loop */ 90 static void idle_enter(); 91 static void idle_exit(); 92 93 /* platform-specific routine to call when thread is enqueued */ 94 static void generic_enq_thread(cpu_t *, int); 95 void (*disp_enq_thread)(cpu_t *, int) = generic_enq_thread; 96 97 pri_t kpreemptpri; /* priority where kernel preemption applies */ 98 pri_t upreemptpri = 0; /* priority where normal preemption applies */ 99 pri_t intr_pri; /* interrupt thread priority base level */ 100 101 #define KPQPRI -1 /* pri where cpu affinity is dropped for kpq */ 102 pri_t kpqpri = KPQPRI; /* can be set in /etc/system */ 103 disp_t cpu0_disp; /* boot CPU's dispatch queue */ 104 disp_lock_t swapped_lock; /* lock swapped threads and swap queue */ 105 int nswapped; /* total number of swapped threads */ 106 void disp_swapped_enq(kthread_t *tp); 107 static void disp_swapped_setrun(kthread_t *tp); 108 static void cpu_resched(cpu_t *cp, pri_t tpri); 109 110 /* 111 * If this is set, only interrupt threads will cause kernel preemptions. 112 * This is done by changing the value of kpreemptpri. kpreemptpri 113 * will either be the max sysclass pri or the min interrupt pri. 114 */ 115 int only_intr_kpreempt; 116 117 extern void set_idle_cpu(int cpun); 118 extern void unset_idle_cpu(int cpun); 119 static void setkpdq(kthread_t *tp, int borf); 120 #define SETKP_BACK 0 121 #define SETKP_FRONT 1 122 /* 123 * Parameter that determines how recently a thread must have run 124 * on the CPU to be considered loosely-bound to that CPU to reduce 125 * cold cache effects. The interval is in hertz. 126 */ 127 #define RECHOOSE_INTERVAL 3 128 int rechoose_interval = RECHOOSE_INTERVAL; 129 130 /* 131 * Parameter that determines how long (in nanoseconds) a thread must 132 * be sitting on a run queue before it can be stolen by another CPU 133 * to reduce migrations. The interval is in nanoseconds. 134 * 135 * The nosteal_nsec should be set by platform code cmp_set_nosteal_interval() 136 * to an appropriate value. nosteal_nsec is set to NOSTEAL_UNINITIALIZED 137 * here indicating it is uninitiallized. 138 * Setting nosteal_nsec to 0 effectively disables the nosteal 'protection'. 139 * 140 */ 141 #define NOSTEAL_UNINITIALIZED (-1) 142 hrtime_t nosteal_nsec = NOSTEAL_UNINITIALIZED; 143 extern void cmp_set_nosteal_interval(void); 144 145 id_t defaultcid; /* system "default" class; see dispadmin(1M) */ 146 147 disp_lock_t transition_lock; /* lock on transitioning threads */ 148 disp_lock_t stop_lock; /* lock on stopped threads */ 149 150 static void cpu_dispqalloc(int numpris); 151 152 /* 153 * This gets returned by disp_getwork/disp_getbest if we couldn't steal 154 * a thread because it was sitting on its run queue for a very short 155 * period of time. 156 */ 157 #define T_DONTSTEAL (kthread_t *)(-1) /* returned by disp_getwork/getbest */ 158 159 static kthread_t *disp_getwork(cpu_t *to); 160 static kthread_t *disp_getbest(disp_t *from); 161 static kthread_t *disp_ratify(kthread_t *tp, disp_t *kpq); 162 163 void swtch_to(kthread_t *); 164 165 /* 166 * dispatcher and scheduler initialization 167 */ 168 169 /* 170 * disp_setup - Common code to calculate and allocate dispatcher 171 * variables and structures based on the maximum priority. 172 */ 173 static void 174 disp_setup(pri_t maxglobpri, pri_t oldnglobpris) 175 { 176 pri_t newnglobpris; 177 178 ASSERT(MUTEX_HELD(&cpu_lock)); 179 180 newnglobpris = maxglobpri + 1 + LOCK_LEVEL; 181 182 if (newnglobpris > oldnglobpris) { 183 /* 184 * Allocate new kp queues for each CPU partition. 185 */ 186 cpupart_kpqalloc(newnglobpris); 187 188 /* 189 * Allocate new dispatch queues for each CPU. 190 */ 191 cpu_dispqalloc(newnglobpris); 192 193 /* 194 * compute new interrupt thread base priority 195 */ 196 intr_pri = maxglobpri; 197 if (only_intr_kpreempt) { 198 kpreemptpri = intr_pri + 1; 199 if (kpqpri == KPQPRI) 200 kpqpri = kpreemptpri; 201 } 202 v.v_nglobpris = newnglobpris; 203 } 204 } 205 206 /* 207 * dispinit - Called to initialize all loaded classes and the 208 * dispatcher framework. 209 */ 210 void 211 dispinit(void) 212 { 213 id_t cid; 214 pri_t maxglobpri; 215 pri_t cl_maxglobpri; 216 217 maxglobpri = -1; 218 219 /* 220 * Initialize transition lock, which will always be set. 221 */ 222 DISP_LOCK_INIT(&transition_lock); 223 disp_lock_enter_high(&transition_lock); 224 DISP_LOCK_INIT(&stop_lock); 225 226 mutex_enter(&cpu_lock); 227 CPU->cpu_disp->disp_maxrunpri = -1; 228 CPU->cpu_disp->disp_max_unbound_pri = -1; 229 230 /* 231 * Initialize the default CPU partition. 232 */ 233 cpupart_initialize_default(); 234 /* 235 * Call the class specific initialization functions for 236 * all pre-installed schedulers. 237 * 238 * We pass the size of a class specific parameter 239 * buffer to each of the initialization functions 240 * to try to catch problems with backward compatibility 241 * of class modules. 242 * 243 * For example a new class module running on an old system 244 * which didn't provide sufficiently large parameter buffers 245 * would be bad news. Class initialization modules can check for 246 * this and take action if they detect a problem. 247 */ 248 249 for (cid = 0; cid < nclass; cid++) { 250 sclass_t *sc; 251 252 sc = &sclass[cid]; 253 if (SCHED_INSTALLED(sc)) { 254 cl_maxglobpri = sc->cl_init(cid, PC_CLPARMSZ, 255 &sc->cl_funcs); 256 if (cl_maxglobpri > maxglobpri) 257 maxglobpri = cl_maxglobpri; 258 } 259 } 260 261 /* 262 * Historically, kpreemptpri was set to v_maxsyspri + 1 -- which is 263 * to say, maxclsyspri + 1. However, over time, the system has used 264 * more and more asynchronous kernel threads, with an increasing number 265 * of these doing work on direct behalf of higher-level software (e.g., 266 * network processing). This has led to potential priority inversions: 267 * threads doing low-priority lengthy kernel work can effectively 268 * delay kernel-level processing of higher-priority data. To minimize 269 * such inversions, we set kpreemptpri to be v_maxsyspri; anything in 270 * the kernel that runs at maxclsyspri will therefore induce kernel 271 * preemption, and this priority should be used if/when an asynchronous 272 * thread (or, as is often the case, task queue) is performing a task 273 * on behalf of higher-level software (or any task that is otherwise 274 * latency-sensitve). 275 */ 276 kpreemptpri = (pri_t)v.v_maxsyspri; 277 if (kpqpri == KPQPRI) 278 kpqpri = kpreemptpri; 279 280 ASSERT(maxglobpri >= 0); 281 disp_setup(maxglobpri, 0); 282 283 mutex_exit(&cpu_lock); 284 285 /* 286 * Platform specific sticky scheduler setup. 287 */ 288 if (nosteal_nsec == NOSTEAL_UNINITIALIZED) 289 cmp_set_nosteal_interval(); 290 291 /* 292 * Get the default class ID; this may be later modified via 293 * dispadmin(1M). This will load the class (normally TS) and that will 294 * call disp_add(), which is why we had to drop cpu_lock first. 295 */ 296 if (getcid(defaultclass, &defaultcid) != 0) { 297 cmn_err(CE_PANIC, "Couldn't load default scheduling class '%s'", 298 defaultclass); 299 } 300 } 301 302 /* 303 * disp_add - Called with class pointer to initialize the dispatcher 304 * for a newly loaded class. 305 */ 306 void 307 disp_add(sclass_t *clp) 308 { 309 pri_t maxglobpri; 310 pri_t cl_maxglobpri; 311 312 mutex_enter(&cpu_lock); 313 /* 314 * Initialize the scheduler class. 315 */ 316 maxglobpri = (pri_t)(v.v_nglobpris - LOCK_LEVEL - 1); 317 cl_maxglobpri = clp->cl_init(clp - sclass, PC_CLPARMSZ, &clp->cl_funcs); 318 if (cl_maxglobpri > maxglobpri) 319 maxglobpri = cl_maxglobpri; 320 321 /* 322 * Save old queue information. Since we're initializing a 323 * new scheduling class which has just been loaded, then 324 * the size of the dispq may have changed. We need to handle 325 * that here. 326 */ 327 disp_setup(maxglobpri, v.v_nglobpris); 328 329 mutex_exit(&cpu_lock); 330 } 331 332 333 /* 334 * For each CPU, allocate new dispatch queues 335 * with the stated number of priorities. 336 */ 337 static void 338 cpu_dispqalloc(int numpris) 339 { 340 cpu_t *cpup; 341 struct disp_queue_info *disp_mem; 342 int i, num; 343 344 ASSERT(MUTEX_HELD(&cpu_lock)); 345 346 disp_mem = kmem_zalloc(NCPU * 347 sizeof (struct disp_queue_info), KM_SLEEP); 348 349 /* 350 * This routine must allocate all of the memory before stopping 351 * the cpus because it must not sleep in kmem_alloc while the 352 * CPUs are stopped. Locks they hold will not be freed until they 353 * are restarted. 354 */ 355 i = 0; 356 cpup = cpu_list; 357 do { 358 disp_dq_alloc(&disp_mem[i], numpris, cpup->cpu_disp); 359 i++; 360 cpup = cpup->cpu_next; 361 } while (cpup != cpu_list); 362 num = i; 363 364 pause_cpus(NULL, NULL); 365 for (i = 0; i < num; i++) 366 disp_dq_assign(&disp_mem[i], numpris); 367 start_cpus(); 368 369 /* 370 * I must free all of the memory after starting the cpus because 371 * I can not risk sleeping in kmem_free while the cpus are stopped. 372 */ 373 for (i = 0; i < num; i++) 374 disp_dq_free(&disp_mem[i]); 375 376 kmem_free(disp_mem, NCPU * sizeof (struct disp_queue_info)); 377 } 378 379 static void 380 disp_dq_alloc(struct disp_queue_info *dptr, int numpris, disp_t *dp) 381 { 382 dptr->newdispq = kmem_zalloc(numpris * sizeof (dispq_t), KM_SLEEP); 383 dptr->newdqactmap = kmem_zalloc(((numpris / BT_NBIPUL) + 1) * 384 sizeof (long), KM_SLEEP); 385 dptr->dp = dp; 386 } 387 388 static void 389 disp_dq_assign(struct disp_queue_info *dptr, int numpris) 390 { 391 disp_t *dp; 392 393 dp = dptr->dp; 394 dptr->olddispq = dp->disp_q; 395 dptr->olddqactmap = dp->disp_qactmap; 396 dptr->oldnglobpris = dp->disp_npri; 397 398 ASSERT(dptr->oldnglobpris < numpris); 399 400 if (dptr->olddispq != NULL) { 401 /* 402 * Use kcopy because bcopy is platform-specific 403 * and could block while we might have paused the cpus. 404 */ 405 (void) kcopy(dptr->olddispq, dptr->newdispq, 406 dptr->oldnglobpris * sizeof (dispq_t)); 407 (void) kcopy(dptr->olddqactmap, dptr->newdqactmap, 408 ((dptr->oldnglobpris / BT_NBIPUL) + 1) * 409 sizeof (long)); 410 } 411 dp->disp_q = dptr->newdispq; 412 dp->disp_qactmap = dptr->newdqactmap; 413 dp->disp_q_limit = &dptr->newdispq[numpris]; 414 dp->disp_npri = numpris; 415 } 416 417 static void 418 disp_dq_free(struct disp_queue_info *dptr) 419 { 420 if (dptr->olddispq != NULL) 421 kmem_free(dptr->olddispq, 422 dptr->oldnglobpris * sizeof (dispq_t)); 423 if (dptr->olddqactmap != NULL) 424 kmem_free(dptr->olddqactmap, 425 ((dptr->oldnglobpris / BT_NBIPUL) + 1) * sizeof (long)); 426 } 427 428 /* 429 * For a newly created CPU, initialize the dispatch queue. 430 * This is called before the CPU is known through cpu[] or on any lists. 431 */ 432 void 433 disp_cpu_init(cpu_t *cp) 434 { 435 disp_t *dp; 436 dispq_t *newdispq; 437 ulong_t *newdqactmap; 438 439 ASSERT(MUTEX_HELD(&cpu_lock)); /* protect dispatcher queue sizes */ 440 441 if (cp == cpu0_disp.disp_cpu) 442 dp = &cpu0_disp; 443 else 444 dp = kmem_alloc(sizeof (disp_t), KM_SLEEP); 445 bzero(dp, sizeof (disp_t)); 446 cp->cpu_disp = dp; 447 dp->disp_cpu = cp; 448 dp->disp_maxrunpri = -1; 449 dp->disp_max_unbound_pri = -1; 450 DISP_LOCK_INIT(&cp->cpu_thread_lock); 451 /* 452 * Allocate memory for the dispatcher queue headers 453 * and the active queue bitmap. 454 */ 455 newdispq = kmem_zalloc(v.v_nglobpris * sizeof (dispq_t), KM_SLEEP); 456 newdqactmap = kmem_zalloc(((v.v_nglobpris / BT_NBIPUL) + 1) * 457 sizeof (long), KM_SLEEP); 458 dp->disp_q = newdispq; 459 dp->disp_qactmap = newdqactmap; 460 dp->disp_q_limit = &newdispq[v.v_nglobpris]; 461 dp->disp_npri = v.v_nglobpris; 462 } 463 464 void 465 disp_cpu_fini(cpu_t *cp) 466 { 467 ASSERT(MUTEX_HELD(&cpu_lock)); 468 469 disp_kp_free(cp->cpu_disp); 470 if (cp->cpu_disp != &cpu0_disp) 471 kmem_free(cp->cpu_disp, sizeof (disp_t)); 472 } 473 474 /* 475 * Allocate new, larger kpreempt dispatch queue to replace the old one. 476 */ 477 void 478 disp_kp_alloc(disp_t *dq, pri_t npri) 479 { 480 struct disp_queue_info mem_info; 481 482 if (npri > dq->disp_npri) { 483 /* 484 * Allocate memory for the new array. 485 */ 486 disp_dq_alloc(&mem_info, npri, dq); 487 488 /* 489 * We need to copy the old structures to the new 490 * and free the old. 491 */ 492 disp_dq_assign(&mem_info, npri); 493 disp_dq_free(&mem_info); 494 } 495 } 496 497 /* 498 * Free dispatch queue. 499 * Used for the kpreempt queues for a removed CPU partition and 500 * for the per-CPU queues of deleted CPUs. 501 */ 502 void 503 disp_kp_free(disp_t *dq) 504 { 505 struct disp_queue_info mem_info; 506 507 mem_info.olddispq = dq->disp_q; 508 mem_info.olddqactmap = dq->disp_qactmap; 509 mem_info.oldnglobpris = dq->disp_npri; 510 disp_dq_free(&mem_info); 511 } 512 513 /* 514 * End dispatcher and scheduler initialization. 515 */ 516 517 /* 518 * See if there's anything to do other than remain idle. 519 * Return non-zero if there is. 520 * 521 * This function must be called with high spl, or with 522 * kernel preemption disabled to prevent the partition's 523 * active cpu list from changing while being traversed. 524 * 525 * This is essentially a simpler version of disp_getwork() 526 * to be called by CPUs preparing to "halt". 527 */ 528 int 529 disp_anywork(void) 530 { 531 cpu_t *cp = CPU; 532 cpu_t *ocp; 533 volatile int *local_nrunnable = &cp->cpu_disp->disp_nrunnable; 534 535 if (!(cp->cpu_flags & CPU_OFFLINE)) { 536 if (CP_MAXRUNPRI(cp->cpu_part) >= 0) 537 return (1); 538 539 for (ocp = cp->cpu_next_part; ocp != cp; 540 ocp = ocp->cpu_next_part) { 541 ASSERT(CPU_ACTIVE(ocp)); 542 543 /* 544 * Something has appeared on the local run queue. 545 */ 546 if (*local_nrunnable > 0) 547 return (1); 548 /* 549 * If we encounter another idle CPU that will 550 * soon be trolling around through disp_anywork() 551 * terminate our walk here and let this other CPU 552 * patrol the next part of the list. 553 */ 554 if (ocp->cpu_dispatch_pri == -1 && 555 (ocp->cpu_disp_flags & CPU_DISP_HALTED) == 0) 556 return (0); 557 /* 558 * Work can be taken from another CPU if: 559 * - There is unbound work on the run queue 560 * - That work isn't a thread undergoing a 561 * - context switch on an otherwise empty queue. 562 * - The CPU isn't running the idle loop. 563 */ 564 if (ocp->cpu_disp->disp_max_unbound_pri != -1 && 565 !((ocp->cpu_disp_flags & CPU_DISP_DONTSTEAL) && 566 ocp->cpu_disp->disp_nrunnable == 1) && 567 ocp->cpu_dispatch_pri != -1) 568 return (1); 569 } 570 } 571 return (0); 572 } 573 574 /* 575 * Called when CPU enters the idle loop 576 */ 577 static void 578 idle_enter() 579 { 580 cpu_t *cp = CPU; 581 582 new_cpu_mstate(CMS_IDLE, gethrtime_unscaled()); 583 CPU_STATS_ADDQ(cp, sys, idlethread, 1); 584 set_idle_cpu(cp->cpu_id); /* arch-dependent hook */ 585 } 586 587 /* 588 * Called when CPU exits the idle loop 589 */ 590 static void 591 idle_exit() 592 { 593 cpu_t *cp = CPU; 594 595 new_cpu_mstate(CMS_SYSTEM, gethrtime_unscaled()); 596 unset_idle_cpu(cp->cpu_id); /* arch-dependent hook */ 597 } 598 599 /* 600 * Idle loop. 601 */ 602 void 603 idle() 604 { 605 struct cpu *cp = CPU; /* pointer to this CPU */ 606 kthread_t *t; /* taken thread */ 607 608 idle_enter(); 609 610 /* 611 * Uniprocessor version of idle loop. 612 * Do this until notified that we're on an actual multiprocessor. 613 */ 614 while (ncpus == 1) { 615 if (cp->cpu_disp->disp_nrunnable == 0) { 616 (*idle_cpu)(); 617 continue; 618 } 619 idle_exit(); 620 swtch(); 621 622 idle_enter(); /* returned from swtch */ 623 } 624 625 /* 626 * Multiprocessor idle loop. 627 */ 628 for (;;) { 629 /* 630 * If CPU is completely quiesced by p_online(2), just wait 631 * here with minimal bus traffic until put online. 632 */ 633 while (cp->cpu_flags & CPU_QUIESCED) 634 (*idle_cpu)(); 635 636 if (cp->cpu_disp->disp_nrunnable != 0) { 637 idle_exit(); 638 swtch(); 639 } else { 640 if (cp->cpu_flags & CPU_OFFLINE) 641 continue; 642 if ((t = disp_getwork(cp)) == NULL) { 643 if (cp->cpu_chosen_level != -1) { 644 disp_t *dp = cp->cpu_disp; 645 disp_t *kpq; 646 647 disp_lock_enter(&dp->disp_lock); 648 /* 649 * Set kpq under lock to prevent 650 * migration between partitions. 651 */ 652 kpq = &cp->cpu_part->cp_kp_queue; 653 if (kpq->disp_maxrunpri == -1) 654 cp->cpu_chosen_level = -1; 655 disp_lock_exit(&dp->disp_lock); 656 } 657 (*idle_cpu)(); 658 continue; 659 } 660 /* 661 * If there was a thread but we couldn't steal 662 * it, then keep trying. 663 */ 664 if (t == T_DONTSTEAL) 665 continue; 666 idle_exit(); 667 swtch_to(t); 668 } 669 idle_enter(); /* returned from swtch/swtch_to */ 670 } 671 } 672 673 674 /* 675 * Preempt the currently running thread in favor of the highest 676 * priority thread. The class of the current thread controls 677 * where it goes on the dispatcher queues. If panicking, turn 678 * preemption off. 679 */ 680 void 681 preempt() 682 { 683 kthread_t *t = curthread; 684 klwp_t *lwp = ttolwp(curthread); 685 686 if (panicstr) 687 return; 688 689 TRACE_0(TR_FAC_DISP, TR_PREEMPT_START, "preempt_start"); 690 691 thread_lock(t); 692 693 if (t->t_state != TS_ONPROC || t->t_disp_queue != CPU->cpu_disp) { 694 /* 695 * this thread has already been chosen to be run on 696 * another CPU. Clear kprunrun on this CPU since we're 697 * already headed for swtch(). 698 */ 699 CPU->cpu_kprunrun = 0; 700 thread_unlock_nopreempt(t); 701 TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end"); 702 } else { 703 if (lwp != NULL) 704 lwp->lwp_ru.nivcsw++; 705 CPU_STATS_ADDQ(CPU, sys, inv_swtch, 1); 706 THREAD_TRANSITION(t); 707 CL_PREEMPT(t); 708 DTRACE_SCHED(preempt); 709 thread_unlock_nopreempt(t); 710 711 TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end"); 712 713 swtch(); /* clears CPU->cpu_runrun via disp() */ 714 } 715 } 716 717 extern kthread_t *thread_unpin(); 718 719 /* 720 * disp() - find the highest priority thread for this processor to run, and 721 * set it in TS_ONPROC state so that resume() can be called to run it. 722 */ 723 static kthread_t * 724 disp() 725 { 726 cpu_t *cpup; 727 disp_t *dp; 728 kthread_t *tp; 729 dispq_t *dq; 730 int maxrunword; 731 pri_t pri; 732 disp_t *kpq; 733 734 TRACE_0(TR_FAC_DISP, TR_DISP_START, "disp_start"); 735 736 cpup = CPU; 737 /* 738 * Find the highest priority loaded, runnable thread. 739 */ 740 dp = cpup->cpu_disp; 741 742 reschedule: 743 /* 744 * If there is more important work on the global queue with a better 745 * priority than the maximum on this CPU, take it now. 746 */ 747 kpq = &cpup->cpu_part->cp_kp_queue; 748 while ((pri = kpq->disp_maxrunpri) >= 0 && 749 pri >= dp->disp_maxrunpri && 750 (cpup->cpu_flags & CPU_OFFLINE) == 0 && 751 (tp = disp_getbest(kpq)) != NULL) { 752 if (disp_ratify(tp, kpq) != NULL) { 753 TRACE_1(TR_FAC_DISP, TR_DISP_END, 754 "disp_end:tid %p", tp); 755 return (tp); 756 } 757 } 758 759 disp_lock_enter(&dp->disp_lock); 760 pri = dp->disp_maxrunpri; 761 762 /* 763 * If there is nothing to run, look at what's runnable on other queues. 764 * Choose the idle thread if the CPU is quiesced. 765 * Note that CPUs that have the CPU_OFFLINE flag set can still run 766 * interrupt threads, which will be the only threads on the CPU's own 767 * queue, but cannot run threads from other queues. 768 */ 769 if (pri == -1) { 770 if (!(cpup->cpu_flags & CPU_OFFLINE)) { 771 disp_lock_exit(&dp->disp_lock); 772 if ((tp = disp_getwork(cpup)) == NULL || 773 tp == T_DONTSTEAL) { 774 tp = cpup->cpu_idle_thread; 775 (void) splhigh(); 776 THREAD_ONPROC(tp, cpup); 777 cpup->cpu_dispthread = tp; 778 cpup->cpu_dispatch_pri = -1; 779 cpup->cpu_runrun = cpup->cpu_kprunrun = 0; 780 cpup->cpu_chosen_level = -1; 781 } 782 } else { 783 disp_lock_exit_high(&dp->disp_lock); 784 tp = cpup->cpu_idle_thread; 785 THREAD_ONPROC(tp, cpup); 786 cpup->cpu_dispthread = tp; 787 cpup->cpu_dispatch_pri = -1; 788 cpup->cpu_runrun = cpup->cpu_kprunrun = 0; 789 cpup->cpu_chosen_level = -1; 790 } 791 TRACE_1(TR_FAC_DISP, TR_DISP_END, 792 "disp_end:tid %p", tp); 793 return (tp); 794 } 795 796 dq = &dp->disp_q[pri]; 797 tp = dq->dq_first; 798 799 ASSERT(tp != NULL); 800 ASSERT(tp->t_schedflag & TS_LOAD); /* thread must be swapped in */ 801 802 DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp); 803 804 /* 805 * Found it so remove it from queue. 806 */ 807 dp->disp_nrunnable--; 808 dq->dq_sruncnt--; 809 if ((dq->dq_first = tp->t_link) == NULL) { 810 ulong_t *dqactmap = dp->disp_qactmap; 811 812 ASSERT(dq->dq_sruncnt == 0); 813 dq->dq_last = NULL; 814 815 /* 816 * The queue is empty, so the corresponding bit needs to be 817 * turned off in dqactmap. If nrunnable != 0 just took the 818 * last runnable thread off the 819 * highest queue, so recompute disp_maxrunpri. 820 */ 821 maxrunword = pri >> BT_ULSHIFT; 822 dqactmap[maxrunword] &= ~BT_BIW(pri); 823 824 if (dp->disp_nrunnable == 0) { 825 dp->disp_max_unbound_pri = -1; 826 dp->disp_maxrunpri = -1; 827 } else { 828 int ipri; 829 830 ipri = bt_gethighbit(dqactmap, maxrunword); 831 dp->disp_maxrunpri = ipri; 832 if (ipri < dp->disp_max_unbound_pri) 833 dp->disp_max_unbound_pri = ipri; 834 } 835 } else { 836 tp->t_link = NULL; 837 } 838 839 /* 840 * Set TS_DONT_SWAP flag to prevent another processor from swapping 841 * out this thread before we have a chance to run it. 842 * While running, it is protected against swapping by t_lock. 843 */ 844 tp->t_schedflag |= TS_DONT_SWAP; 845 cpup->cpu_dispthread = tp; /* protected by spl only */ 846 cpup->cpu_dispatch_pri = pri; 847 ASSERT(pri == DISP_PRIO(tp)); 848 thread_onproc(tp, cpup); /* set t_state to TS_ONPROC */ 849 disp_lock_exit_high(&dp->disp_lock); /* drop run queue lock */ 850 851 ASSERT(tp != NULL); 852 TRACE_1(TR_FAC_DISP, TR_DISP_END, 853 "disp_end:tid %p", tp); 854 855 if (disp_ratify(tp, kpq) == NULL) 856 goto reschedule; 857 858 return (tp); 859 } 860 861 /* 862 * swtch() 863 * Find best runnable thread and run it. 864 * Called with the current thread already switched to a new state, 865 * on a sleep queue, run queue, stopped, and not zombied. 866 * May be called at any spl level less than or equal to LOCK_LEVEL. 867 * Always drops spl to the base level (spl0()). 868 */ 869 void 870 swtch() 871 { 872 kthread_t *t = curthread; 873 kthread_t *next; 874 cpu_t *cp; 875 876 TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 877 878 if (t->t_flag & T_INTR_THREAD) 879 cpu_intr_swtch_enter(t); 880 881 if (t->t_intr != NULL) { 882 /* 883 * We are an interrupt thread. Setup and return 884 * the interrupted thread to be resumed. 885 */ 886 (void) splhigh(); /* block other scheduler action */ 887 cp = CPU; /* now protected against migration */ 888 ASSERT(CPU_ON_INTR(cp) == 0); /* not called with PIL > 10 */ 889 CPU_STATS_ADDQ(cp, sys, pswitch, 1); 890 CPU_STATS_ADDQ(cp, sys, intrblk, 1); 891 next = thread_unpin(); 892 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 893 resume_from_intr(next); 894 } else { 895 #ifdef DEBUG 896 if (t->t_state == TS_ONPROC && 897 t->t_disp_queue->disp_cpu == CPU && 898 t->t_preempt == 0) { 899 thread_lock(t); 900 ASSERT(t->t_state != TS_ONPROC || 901 t->t_disp_queue->disp_cpu != CPU || 902 t->t_preempt != 0); /* cannot migrate */ 903 thread_unlock_nopreempt(t); 904 } 905 #endif /* DEBUG */ 906 cp = CPU; 907 next = disp(); /* returns with spl high */ 908 ASSERT(CPU_ON_INTR(cp) == 0); /* not called with PIL > 10 */ 909 910 /* OK to steal anything left on run queue */ 911 cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL; 912 913 if (next != t) { 914 hrtime_t now; 915 916 now = gethrtime_unscaled(); 917 pg_ev_thread_swtch(cp, now, t, next); 918 919 /* 920 * If t was previously in the TS_ONPROC state, 921 * setfrontdq and setbackdq won't have set its t_waitrq. 922 * Since we now finally know that we're switching away 923 * from this thread, set its t_waitrq if it is on a run 924 * queue. 925 */ 926 if ((t->t_state == TS_RUN) && (t->t_waitrq == 0)) { 927 t->t_waitrq = now; 928 } 929 930 /* 931 * restore mstate of thread that we are switching to 932 */ 933 restore_mstate(next); 934 935 CPU_STATS_ADDQ(cp, sys, pswitch, 1); 936 cp->cpu_last_swtch = t->t_disp_time = ddi_get_lbolt(); 937 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 938 939 if (dtrace_vtime_active) 940 dtrace_vtime_switch(next); 941 942 resume(next); 943 /* 944 * The TR_RESUME_END and TR_SWTCH_END trace points 945 * appear at the end of resume(), because we may not 946 * return here 947 */ 948 } else { 949 if (t->t_flag & T_INTR_THREAD) 950 cpu_intr_swtch_exit(t); 951 /* 952 * Threads that enqueue themselves on a run queue defer 953 * setting t_waitrq. It is then either set in swtch() 954 * when the CPU is actually yielded, or not at all if it 955 * is remaining on the CPU. 956 * There is however a window between where the thread 957 * placed itself on a run queue, and where it selects 958 * itself in disp(), where a third party (eg. clock() 959 * doing tick processing) may have re-enqueued this 960 * thread, setting t_waitrq in the process. We detect 961 * this race by noticing that despite switching to 962 * ourself, our t_waitrq has been set, and should be 963 * cleared. 964 */ 965 if (t->t_waitrq != 0) 966 t->t_waitrq = 0; 967 968 pg_ev_thread_remain(cp, t); 969 970 DTRACE_SCHED(remain__cpu); 971 TRACE_0(TR_FAC_DISP, TR_SWTCH_END, "swtch_end"); 972 (void) spl0(); 973 } 974 } 975 } 976 977 /* 978 * swtch_from_zombie() 979 * Special case of swtch(), which allows checks for TS_ZOMB to be 980 * eliminated from normal resume. 981 * Find best runnable thread and run it. 982 * Called with the current thread zombied. 983 * Zombies cannot migrate, so CPU references are safe. 984 */ 985 void 986 swtch_from_zombie() 987 { 988 kthread_t *next; 989 cpu_t *cpu = CPU; 990 991 TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 992 993 ASSERT(curthread->t_state == TS_ZOMB); 994 995 next = disp(); /* returns with spl high */ 996 ASSERT(CPU_ON_INTR(CPU) == 0); /* not called with PIL > 10 */ 997 CPU_STATS_ADDQ(CPU, sys, pswitch, 1); 998 ASSERT(next != curthread); 999 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 1000 1001 pg_ev_thread_swtch(cpu, gethrtime_unscaled(), curthread, next); 1002 1003 restore_mstate(next); 1004 1005 if (dtrace_vtime_active) 1006 dtrace_vtime_switch(next); 1007 1008 resume_from_zombie(next); 1009 /* 1010 * The TR_RESUME_END and TR_SWTCH_END trace points 1011 * appear at the end of resume(), because we certainly will not 1012 * return here 1013 */ 1014 } 1015 1016 #if defined(DEBUG) && (defined(DISP_DEBUG) || defined(lint)) 1017 1018 /* 1019 * search_disp_queues() 1020 * Search the given dispatch queues for thread tp. 1021 * Return 1 if tp is found, otherwise return 0. 1022 */ 1023 static int 1024 search_disp_queues(disp_t *dp, kthread_t *tp) 1025 { 1026 dispq_t *dq; 1027 dispq_t *eq; 1028 1029 disp_lock_enter_high(&dp->disp_lock); 1030 1031 for (dq = dp->disp_q, eq = dp->disp_q_limit; dq < eq; ++dq) { 1032 kthread_t *rp; 1033 1034 ASSERT(dq->dq_last == NULL || dq->dq_last->t_link == NULL); 1035 1036 for (rp = dq->dq_first; rp; rp = rp->t_link) 1037 if (tp == rp) { 1038 disp_lock_exit_high(&dp->disp_lock); 1039 return (1); 1040 } 1041 } 1042 disp_lock_exit_high(&dp->disp_lock); 1043 1044 return (0); 1045 } 1046 1047 /* 1048 * thread_on_queue() 1049 * Search all per-CPU dispatch queues and all partition-wide kpreempt 1050 * queues for thread tp. Return 1 if tp is found, otherwise return 0. 1051 */ 1052 static int 1053 thread_on_queue(kthread_t *tp) 1054 { 1055 cpu_t *cp; 1056 struct cpupart *part; 1057 1058 ASSERT(getpil() >= DISP_LEVEL); 1059 1060 /* 1061 * Search the per-CPU dispatch queues for tp. 1062 */ 1063 cp = CPU; 1064 do { 1065 if (search_disp_queues(cp->cpu_disp, tp)) 1066 return (1); 1067 } while ((cp = cp->cpu_next_onln) != CPU); 1068 1069 /* 1070 * Search the partition-wide kpreempt queues for tp. 1071 */ 1072 part = CPU->cpu_part; 1073 do { 1074 if (search_disp_queues(&part->cp_kp_queue, tp)) 1075 return (1); 1076 } while ((part = part->cp_next) != CPU->cpu_part); 1077 1078 return (0); 1079 } 1080 1081 #else 1082 1083 #define thread_on_queue(tp) 0 /* ASSERT must be !thread_on_queue */ 1084 1085 #endif /* DEBUG */ 1086 1087 /* 1088 * like swtch(), but switch to a specified thread taken from another CPU. 1089 * called with spl high.. 1090 */ 1091 void 1092 swtch_to(kthread_t *next) 1093 { 1094 cpu_t *cp = CPU; 1095 hrtime_t now; 1096 1097 TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 1098 1099 /* 1100 * Update context switch statistics. 1101 */ 1102 CPU_STATS_ADDQ(cp, sys, pswitch, 1); 1103 1104 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 1105 1106 now = gethrtime_unscaled(); 1107 pg_ev_thread_swtch(cp, now, curthread, next); 1108 1109 /* OK to steal anything left on run queue */ 1110 cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL; 1111 1112 /* record last execution time */ 1113 cp->cpu_last_swtch = curthread->t_disp_time = ddi_get_lbolt(); 1114 1115 /* 1116 * If t was previously in the TS_ONPROC state, setfrontdq and setbackdq 1117 * won't have set its t_waitrq. Since we now finally know that we're 1118 * switching away from this thread, set its t_waitrq if it is on a run 1119 * queue. 1120 */ 1121 if ((curthread->t_state == TS_RUN) && (curthread->t_waitrq == 0)) { 1122 curthread->t_waitrq = now; 1123 } 1124 1125 /* restore next thread to previously running microstate */ 1126 restore_mstate(next); 1127 1128 if (dtrace_vtime_active) 1129 dtrace_vtime_switch(next); 1130 1131 resume(next); 1132 /* 1133 * The TR_RESUME_END and TR_SWTCH_END trace points 1134 * appear at the end of resume(), because we may not 1135 * return here 1136 */ 1137 } 1138 1139 static void 1140 cpu_resched(cpu_t *cp, pri_t tpri) 1141 { 1142 int call_poke_cpu = 0; 1143 pri_t cpupri = cp->cpu_dispatch_pri; 1144 1145 if (cpupri != CPU_IDLE_PRI && cpupri < tpri) { 1146 TRACE_2(TR_FAC_DISP, TR_CPU_RESCHED, 1147 "CPU_RESCHED:Tpri %d Cpupri %d", tpri, cpupri); 1148 if (tpri >= upreemptpri && cp->cpu_runrun == 0) { 1149 cp->cpu_runrun = 1; 1150 aston(cp->cpu_dispthread); 1151 if (tpri < kpreemptpri && cp != CPU) 1152 call_poke_cpu = 1; 1153 } 1154 if (tpri >= kpreemptpri && cp->cpu_kprunrun == 0) { 1155 cp->cpu_kprunrun = 1; 1156 if (cp != CPU) 1157 call_poke_cpu = 1; 1158 } 1159 } 1160 1161 /* 1162 * Propagate cpu_runrun, and cpu_kprunrun to global visibility. 1163 */ 1164 membar_enter(); 1165 1166 if (call_poke_cpu) 1167 poke_cpu(cp->cpu_id); 1168 } 1169 1170 /* 1171 * setbackdq() keeps runqs balanced such that the difference in length 1172 * between the chosen runq and the next one is no more than RUNQ_MAX_DIFF. 1173 * For threads with priorities below RUNQ_MATCH_PRI levels, the runq's lengths 1174 * must match. When per-thread TS_RUNQMATCH flag is set, setbackdq() will 1175 * try to keep runqs perfectly balanced regardless of the thread priority. 1176 */ 1177 #define RUNQ_MATCH_PRI 16 /* pri below which queue lengths must match */ 1178 #define RUNQ_MAX_DIFF 2 /* maximum runq length difference */ 1179 #define RUNQ_LEN(cp, pri) ((cp)->cpu_disp->disp_q[pri].dq_sruncnt) 1180 1181 /* 1182 * Macro that evaluates to true if it is likely that the thread has cache 1183 * warmth. This is based on the amount of time that has elapsed since the 1184 * thread last ran. If that amount of time is less than "rechoose_interval" 1185 * ticks, then we decide that the thread has enough cache warmth to warrant 1186 * some affinity for t->t_cpu. 1187 */ 1188 #define THREAD_HAS_CACHE_WARMTH(thread) \ 1189 ((thread == curthread) || \ 1190 ((ddi_get_lbolt() - thread->t_disp_time) <= rechoose_interval)) 1191 /* 1192 * Put the specified thread on the back of the dispatcher 1193 * queue corresponding to its current priority. 1194 * 1195 * Called with the thread in transition, onproc or stopped state 1196 * and locked (transition implies locked) and at high spl. 1197 * Returns with the thread in TS_RUN state and still locked. 1198 */ 1199 void 1200 setbackdq(kthread_t *tp) 1201 { 1202 dispq_t *dq; 1203 disp_t *dp; 1204 cpu_t *cp; 1205 pri_t tpri; 1206 int bound; 1207 boolean_t self; 1208 1209 ASSERT(THREAD_LOCK_HELD(tp)); 1210 ASSERT((tp->t_schedflag & TS_ALLSTART) == 0); 1211 ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */ 1212 1213 /* 1214 * If thread is "swapped" or on the swap queue don't 1215 * queue it, but wake sched. 1216 */ 1217 if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) { 1218 disp_swapped_setrun(tp); 1219 return; 1220 } 1221 1222 self = (tp == curthread); 1223 1224 if (tp->t_bound_cpu || tp->t_weakbound_cpu) 1225 bound = 1; 1226 else 1227 bound = 0; 1228 1229 tpri = DISP_PRIO(tp); 1230 if (ncpus == 1) 1231 cp = tp->t_cpu; 1232 else if (!bound) { 1233 if (tpri >= kpqpri) { 1234 setkpdq(tp, SETKP_BACK); 1235 return; 1236 } 1237 1238 /* 1239 * We'll generally let this thread continue to run where 1240 * it last ran...but will consider migration if: 1241 * - The thread probably doesn't have much cache warmth. 1242 * - SMT exclusion would prefer us to run elsewhere 1243 * - The CPU where it last ran is the target of an offline 1244 * request. 1245 * - The thread last ran outside its home lgroup. 1246 */ 1247 if ((!THREAD_HAS_CACHE_WARMTH(tp)) || 1248 !smt_should_run(tp, tp->t_cpu) || 1249 (tp->t_cpu == cpu_inmotion) || 1250 !LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) { 1251 cp = disp_lowpri_cpu(tp->t_cpu, tp, tpri); 1252 } else { 1253 cp = tp->t_cpu; 1254 } 1255 1256 if (tp->t_cpupart == cp->cpu_part) { 1257 int qlen; 1258 1259 /* 1260 * Perform any CMT load balancing 1261 */ 1262 cp = cmt_balance(tp, cp); 1263 1264 /* 1265 * Balance across the run queues 1266 */ 1267 qlen = RUNQ_LEN(cp, tpri); 1268 if (tpri >= RUNQ_MATCH_PRI && 1269 !(tp->t_schedflag & TS_RUNQMATCH)) 1270 qlen -= RUNQ_MAX_DIFF; 1271 if (qlen > 0) { 1272 cpu_t *newcp; 1273 1274 if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID) { 1275 newcp = cp->cpu_next_part; 1276 } else if ((newcp = cp->cpu_next_lpl) == cp) { 1277 newcp = cp->cpu_next_part; 1278 } 1279 1280 if (smt_should_run(tp, newcp) && 1281 RUNQ_LEN(newcp, tpri) < qlen) { 1282 DTRACE_PROBE3(runq__balance, 1283 kthread_t *, tp, 1284 cpu_t *, cp, cpu_t *, newcp); 1285 cp = newcp; 1286 } 1287 } 1288 } else { 1289 /* 1290 * Migrate to a cpu in the new partition. 1291 */ 1292 cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, tp, 1293 tp->t_pri); 1294 } 1295 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 1296 } else { 1297 /* 1298 * It is possible that t_weakbound_cpu != t_bound_cpu (for 1299 * a short time until weak binding that existed when the 1300 * strong binding was established has dropped) so we must 1301 * favour weak binding over strong. 1302 */ 1303 cp = tp->t_weakbound_cpu ? 1304 tp->t_weakbound_cpu : tp->t_bound_cpu; 1305 } 1306 /* 1307 * A thread that is ONPROC may be temporarily placed on the run queue 1308 * but then chosen to run again by disp. If the thread we're placing on 1309 * the queue is in TS_ONPROC state, don't set its t_waitrq until a 1310 * replacement process is actually scheduled in swtch(). In this 1311 * situation, curthread is the only thread that could be in the ONPROC 1312 * state. 1313 */ 1314 if ((!self) && (tp->t_waitrq == 0)) { 1315 hrtime_t curtime; 1316 1317 curtime = gethrtime_unscaled(); 1318 (void) cpu_update_pct(tp, curtime); 1319 tp->t_waitrq = curtime; 1320 } else { 1321 (void) cpu_update_pct(tp, gethrtime_unscaled()); 1322 } 1323 1324 dp = cp->cpu_disp; 1325 disp_lock_enter_high(&dp->disp_lock); 1326 1327 DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 0); 1328 TRACE_3(TR_FAC_DISP, TR_BACKQ, "setbackdq:pri %d cpu %p tid %p", 1329 tpri, cp, tp); 1330 1331 #ifndef NPROBE 1332 /* Kernel probe */ 1333 if (tnf_tracing_active) 1334 tnf_thread_queue(tp, cp, tpri); 1335 #endif /* NPROBE */ 1336 1337 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 1338 1339 THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */ 1340 tp->t_disp_queue = dp; 1341 tp->t_link = NULL; 1342 1343 dq = &dp->disp_q[tpri]; 1344 dp->disp_nrunnable++; 1345 if (!bound) 1346 dp->disp_steal = 0; 1347 membar_enter(); 1348 1349 if (dq->dq_sruncnt++ != 0) { 1350 ASSERT(dq->dq_first != NULL); 1351 dq->dq_last->t_link = tp; 1352 dq->dq_last = tp; 1353 } else { 1354 ASSERT(dq->dq_first == NULL); 1355 ASSERT(dq->dq_last == NULL); 1356 dq->dq_first = dq->dq_last = tp; 1357 BT_SET(dp->disp_qactmap, tpri); 1358 if (tpri > dp->disp_maxrunpri) { 1359 dp->disp_maxrunpri = tpri; 1360 membar_enter(); 1361 cpu_resched(cp, tpri); 1362 } 1363 } 1364 1365 if (!bound && tpri > dp->disp_max_unbound_pri) { 1366 if (self && dp->disp_max_unbound_pri == -1 && cp == CPU) { 1367 /* 1368 * If there are no other unbound threads on the 1369 * run queue, don't allow other CPUs to steal 1370 * this thread while we are in the middle of a 1371 * context switch. We may just switch to it 1372 * again right away. CPU_DISP_DONTSTEAL is cleared 1373 * in swtch and swtch_to. 1374 */ 1375 cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL; 1376 } 1377 dp->disp_max_unbound_pri = tpri; 1378 } 1379 (*disp_enq_thread)(cp, bound); 1380 } 1381 1382 /* 1383 * Put the specified thread on the front of the dispatcher 1384 * queue corresponding to its current priority. 1385 * 1386 * Called with the thread in transition, onproc or stopped state 1387 * and locked (transition implies locked) and at high spl. 1388 * Returns with the thread in TS_RUN state and still locked. 1389 */ 1390 void 1391 setfrontdq(kthread_t *tp) 1392 { 1393 disp_t *dp; 1394 dispq_t *dq; 1395 cpu_t *cp; 1396 pri_t tpri; 1397 int bound; 1398 1399 ASSERT(THREAD_LOCK_HELD(tp)); 1400 ASSERT((tp->t_schedflag & TS_ALLSTART) == 0); 1401 ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */ 1402 1403 /* 1404 * If thread is "swapped" or on the swap queue don't 1405 * queue it, but wake sched. 1406 */ 1407 if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) { 1408 disp_swapped_setrun(tp); 1409 return; 1410 } 1411 1412 if (tp->t_bound_cpu || tp->t_weakbound_cpu) 1413 bound = 1; 1414 else 1415 bound = 0; 1416 1417 tpri = DISP_PRIO(tp); 1418 if (ncpus == 1) 1419 cp = tp->t_cpu; 1420 else if (!bound) { 1421 if (tpri >= kpqpri) { 1422 setkpdq(tp, SETKP_FRONT); 1423 return; 1424 } 1425 cp = tp->t_cpu; 1426 if (tp->t_cpupart == cp->cpu_part) { 1427 /* 1428 * We'll generally let this thread continue to run 1429 * where it last ran, but will consider migration if: 1430 * - The thread last ran outside its home lgroup. 1431 * - The CPU where it last ran is the target of an 1432 * offline request (a thread_nomigrate() on the in 1433 * motion CPU relies on this when forcing a preempt). 1434 * - The thread isn't the highest priority thread where 1435 * it last ran, and it is considered not likely to 1436 * have significant cache warmth. 1437 */ 1438 if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp) || 1439 cp == cpu_inmotion || 1440 (tpri < cp->cpu_disp->disp_maxrunpri && 1441 !THREAD_HAS_CACHE_WARMTH(tp))) { 1442 cp = disp_lowpri_cpu(tp->t_cpu, tp, tpri); 1443 } 1444 } else { 1445 /* 1446 * Migrate to a cpu in the new partition. 1447 */ 1448 cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, 1449 tp, tp->t_pri); 1450 } 1451 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 1452 } else { 1453 /* 1454 * It is possible that t_weakbound_cpu != t_bound_cpu (for 1455 * a short time until weak binding that existed when the 1456 * strong binding was established has dropped) so we must 1457 * favour weak binding over strong. 1458 */ 1459 cp = tp->t_weakbound_cpu ? 1460 tp->t_weakbound_cpu : tp->t_bound_cpu; 1461 } 1462 1463 /* 1464 * A thread that is ONPROC may be temporarily placed on the run queue 1465 * but then chosen to run again by disp. If the thread we're placing on 1466 * the queue is in TS_ONPROC state, don't set its t_waitrq until a 1467 * replacement process is actually scheduled in swtch(). In this 1468 * situation, curthread is the only thread that could be in the ONPROC 1469 * state. 1470 */ 1471 if ((tp != curthread) && (tp->t_waitrq == 0)) { 1472 hrtime_t curtime; 1473 1474 curtime = gethrtime_unscaled(); 1475 (void) cpu_update_pct(tp, curtime); 1476 tp->t_waitrq = curtime; 1477 } else { 1478 (void) cpu_update_pct(tp, gethrtime_unscaled()); 1479 } 1480 1481 dp = cp->cpu_disp; 1482 disp_lock_enter_high(&dp->disp_lock); 1483 1484 TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp); 1485 DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 1); 1486 1487 #ifndef NPROBE 1488 /* Kernel probe */ 1489 if (tnf_tracing_active) 1490 tnf_thread_queue(tp, cp, tpri); 1491 #endif /* NPROBE */ 1492 1493 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 1494 1495 THREAD_RUN(tp, &dp->disp_lock); /* set TS_RUN state and lock */ 1496 tp->t_disp_queue = dp; 1497 1498 dq = &dp->disp_q[tpri]; 1499 dp->disp_nrunnable++; 1500 if (!bound) 1501 dp->disp_steal = 0; 1502 membar_enter(); 1503 1504 if (dq->dq_sruncnt++ != 0) { 1505 ASSERT(dq->dq_last != NULL); 1506 tp->t_link = dq->dq_first; 1507 dq->dq_first = tp; 1508 } else { 1509 ASSERT(dq->dq_last == NULL); 1510 ASSERT(dq->dq_first == NULL); 1511 tp->t_link = NULL; 1512 dq->dq_first = dq->dq_last = tp; 1513 BT_SET(dp->disp_qactmap, tpri); 1514 if (tpri > dp->disp_maxrunpri) { 1515 dp->disp_maxrunpri = tpri; 1516 membar_enter(); 1517 cpu_resched(cp, tpri); 1518 } 1519 } 1520 1521 if (!bound && tpri > dp->disp_max_unbound_pri) { 1522 if (tp == curthread && dp->disp_max_unbound_pri == -1 && 1523 cp == CPU) { 1524 /* 1525 * If there are no other unbound threads on the 1526 * run queue, don't allow other CPUs to steal 1527 * this thread while we are in the middle of a 1528 * context switch. We may just switch to it 1529 * again right away. CPU_DISP_DONTSTEAL is cleared 1530 * in swtch and swtch_to. 1531 */ 1532 cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL; 1533 } 1534 dp->disp_max_unbound_pri = tpri; 1535 } 1536 (*disp_enq_thread)(cp, bound); 1537 } 1538 1539 /* 1540 * Put a high-priority unbound thread on the kp queue 1541 */ 1542 static void 1543 setkpdq(kthread_t *tp, int borf) 1544 { 1545 dispq_t *dq; 1546 disp_t *dp; 1547 cpu_t *cp; 1548 pri_t tpri; 1549 1550 tpri = DISP_PRIO(tp); 1551 1552 dp = &tp->t_cpupart->cp_kp_queue; 1553 disp_lock_enter_high(&dp->disp_lock); 1554 1555 TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp); 1556 1557 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 1558 DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, borf); 1559 THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */ 1560 tp->t_disp_queue = dp; 1561 dp->disp_nrunnable++; 1562 dq = &dp->disp_q[tpri]; 1563 1564 if (dq->dq_sruncnt++ != 0) { 1565 if (borf == SETKP_BACK) { 1566 ASSERT(dq->dq_first != NULL); 1567 tp->t_link = NULL; 1568 dq->dq_last->t_link = tp; 1569 dq->dq_last = tp; 1570 } else { 1571 ASSERT(dq->dq_last != NULL); 1572 tp->t_link = dq->dq_first; 1573 dq->dq_first = tp; 1574 } 1575 } else { 1576 if (borf == SETKP_BACK) { 1577 ASSERT(dq->dq_first == NULL); 1578 ASSERT(dq->dq_last == NULL); 1579 dq->dq_first = dq->dq_last = tp; 1580 } else { 1581 ASSERT(dq->dq_last == NULL); 1582 ASSERT(dq->dq_first == NULL); 1583 tp->t_link = NULL; 1584 dq->dq_first = dq->dq_last = tp; 1585 } 1586 BT_SET(dp->disp_qactmap, tpri); 1587 if (tpri > dp->disp_max_unbound_pri) 1588 dp->disp_max_unbound_pri = tpri; 1589 if (tpri > dp->disp_maxrunpri) { 1590 dp->disp_maxrunpri = tpri; 1591 membar_enter(); 1592 } 1593 } 1594 1595 cp = tp->t_cpu; 1596 if (tp->t_cpupart != cp->cpu_part) { 1597 /* migrate to a cpu in the new partition */ 1598 cp = tp->t_cpupart->cp_cpulist; 1599 } 1600 cp = disp_lowpri_cpu(cp, tp, tp->t_pri); 1601 disp_lock_enter_high(&cp->cpu_disp->disp_lock); 1602 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 1603 1604 #ifndef NPROBE 1605 /* Kernel probe */ 1606 if (tnf_tracing_active) 1607 tnf_thread_queue(tp, cp, tpri); 1608 #endif /* NPROBE */ 1609 1610 if (cp->cpu_chosen_level < tpri) 1611 cp->cpu_chosen_level = tpri; 1612 cpu_resched(cp, tpri); 1613 disp_lock_exit_high(&cp->cpu_disp->disp_lock); 1614 (*disp_enq_thread)(cp, 0); 1615 } 1616 1617 /* 1618 * Remove a thread from the dispatcher queue if it is on it. 1619 * It is not an error if it is not found but we return whether 1620 * or not it was found in case the caller wants to check. 1621 */ 1622 int 1623 dispdeq(kthread_t *tp) 1624 { 1625 disp_t *dp; 1626 dispq_t *dq; 1627 kthread_t *rp; 1628 kthread_t *trp; 1629 kthread_t **ptp; 1630 int tpri; 1631 1632 ASSERT(THREAD_LOCK_HELD(tp)); 1633 1634 if (tp->t_state != TS_RUN) 1635 return (0); 1636 1637 /* 1638 * The thread is "swapped" or is on the swap queue and 1639 * hence no longer on the run queue, so return true. 1640 */ 1641 if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) 1642 return (1); 1643 1644 tpri = DISP_PRIO(tp); 1645 dp = tp->t_disp_queue; 1646 ASSERT(tpri < dp->disp_npri); 1647 dq = &dp->disp_q[tpri]; 1648 ptp = &dq->dq_first; 1649 rp = *ptp; 1650 trp = NULL; 1651 1652 ASSERT(dq->dq_last == NULL || dq->dq_last->t_link == NULL); 1653 1654 /* 1655 * Search for thread in queue. 1656 * Double links would simplify this at the expense of disp/setrun. 1657 */ 1658 while (rp != tp && rp != NULL) { 1659 trp = rp; 1660 ptp = &trp->t_link; 1661 rp = trp->t_link; 1662 } 1663 1664 if (rp == NULL) { 1665 panic("dispdeq: thread not on queue"); 1666 } 1667 1668 DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp); 1669 1670 /* 1671 * Found it so remove it from queue. 1672 */ 1673 if ((*ptp = rp->t_link) == NULL) 1674 dq->dq_last = trp; 1675 1676 dp->disp_nrunnable--; 1677 if (--dq->dq_sruncnt == 0) { 1678 dp->disp_qactmap[tpri >> BT_ULSHIFT] &= ~BT_BIW(tpri); 1679 if (dp->disp_nrunnable == 0) { 1680 dp->disp_max_unbound_pri = -1; 1681 dp->disp_maxrunpri = -1; 1682 } else if (tpri == dp->disp_maxrunpri) { 1683 int ipri; 1684 1685 ipri = bt_gethighbit(dp->disp_qactmap, 1686 dp->disp_maxrunpri >> BT_ULSHIFT); 1687 if (ipri < dp->disp_max_unbound_pri) 1688 dp->disp_max_unbound_pri = ipri; 1689 dp->disp_maxrunpri = ipri; 1690 } 1691 } 1692 tp->t_link = NULL; 1693 THREAD_TRANSITION(tp); /* put in intermediate state */ 1694 return (1); 1695 } 1696 1697 1698 /* 1699 * dq_sruninc and dq_srundec are public functions for 1700 * incrementing/decrementing the sruncnts when a thread on 1701 * a dispatcher queue is made schedulable/unschedulable by 1702 * resetting the TS_LOAD flag. 1703 * 1704 * The caller MUST have the thread lock and therefore the dispatcher 1705 * queue lock so that the operation which changes 1706 * the flag, the operation that checks the status of the thread to 1707 * determine if it's on a disp queue AND the call to this function 1708 * are one atomic operation with respect to interrupts. 1709 */ 1710 1711 /* 1712 * Called by sched AFTER TS_LOAD flag is set on a swapped, runnable thread. 1713 */ 1714 void 1715 dq_sruninc(kthread_t *t) 1716 { 1717 ASSERT(t->t_state == TS_RUN); 1718 ASSERT(t->t_schedflag & TS_LOAD); 1719 1720 THREAD_TRANSITION(t); 1721 setfrontdq(t); 1722 } 1723 1724 /* 1725 * See comment on calling conventions above. 1726 * Called by sched BEFORE TS_LOAD flag is cleared on a runnable thread. 1727 */ 1728 void 1729 dq_srundec(kthread_t *t) 1730 { 1731 ASSERT(t->t_schedflag & TS_LOAD); 1732 1733 (void) dispdeq(t); 1734 disp_swapped_enq(t); 1735 } 1736 1737 /* 1738 * Change the dispatcher lock of thread to the "swapped_lock" 1739 * and return with thread lock still held. 1740 * 1741 * Called with thread_lock held, in transition state, and at high spl. 1742 */ 1743 void 1744 disp_swapped_enq(kthread_t *tp) 1745 { 1746 ASSERT(THREAD_LOCK_HELD(tp)); 1747 ASSERT(tp->t_schedflag & TS_LOAD); 1748 1749 switch (tp->t_state) { 1750 case TS_RUN: 1751 disp_lock_enter_high(&swapped_lock); 1752 THREAD_SWAP(tp, &swapped_lock); /* set TS_RUN state and lock */ 1753 break; 1754 case TS_ONPROC: 1755 disp_lock_enter_high(&swapped_lock); 1756 THREAD_TRANSITION(tp); 1757 wake_sched_sec = 1; /* tell clock to wake sched */ 1758 THREAD_SWAP(tp, &swapped_lock); /* set TS_RUN state and lock */ 1759 break; 1760 default: 1761 panic("disp_swapped: tp: %p bad t_state", (void *)tp); 1762 } 1763 } 1764 1765 /* 1766 * This routine is called by setbackdq/setfrontdq if the thread is 1767 * not loaded or loaded and on the swap queue. 1768 * 1769 * Thread state TS_SLEEP implies that a swapped thread 1770 * has been woken up and needs to be swapped in by the swapper. 1771 * 1772 * Thread state TS_RUN, it implies that the priority of a swapped 1773 * thread is being increased by scheduling class (e.g. ts_update). 1774 */ 1775 static void 1776 disp_swapped_setrun(kthread_t *tp) 1777 { 1778 ASSERT(THREAD_LOCK_HELD(tp)); 1779 ASSERT((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD); 1780 1781 switch (tp->t_state) { 1782 case TS_SLEEP: 1783 disp_lock_enter_high(&swapped_lock); 1784 /* 1785 * Wakeup sched immediately (i.e., next tick) if the 1786 * thread priority is above maxclsyspri. 1787 */ 1788 if (DISP_PRIO(tp) > maxclsyspri) 1789 wake_sched = 1; 1790 else 1791 wake_sched_sec = 1; 1792 THREAD_RUN(tp, &swapped_lock); /* set TS_RUN state and lock */ 1793 break; 1794 case TS_RUN: /* called from ts_update */ 1795 break; 1796 default: 1797 panic("disp_swapped_setrun: tp: %p bad t_state", (void *)tp); 1798 } 1799 } 1800 1801 /* 1802 * Make a thread give up its processor. Find the processor on 1803 * which this thread is executing, and have that processor 1804 * preempt. 1805 * 1806 * We allow System Duty Cycle (SDC) threads to be preempted even if 1807 * they are running at kernel priorities. To implement this, we always 1808 * set cpu_kprunrun; this ensures preempt() will be called. Since SDC 1809 * calls cpu_surrender() very often, we only preempt if there is anyone 1810 * competing with us. 1811 */ 1812 void 1813 cpu_surrender(kthread_t *tp) 1814 { 1815 cpu_t *cpup; 1816 int max_pri; 1817 int max_run_pri; 1818 klwp_t *lwp; 1819 1820 ASSERT(THREAD_LOCK_HELD(tp)); 1821 1822 if (tp->t_state != TS_ONPROC) 1823 return; 1824 cpup = tp->t_disp_queue->disp_cpu; /* CPU thread dispatched to */ 1825 max_pri = cpup->cpu_disp->disp_maxrunpri; /* best pri of that CPU */ 1826 max_run_pri = CP_MAXRUNPRI(cpup->cpu_part); 1827 if (max_pri < max_run_pri) 1828 max_pri = max_run_pri; 1829 1830 if (tp->t_cid == sysdccid) { 1831 uint_t t_pri = DISP_PRIO(tp); 1832 if (t_pri > max_pri) 1833 return; /* we are not competing w/ anyone */ 1834 cpup->cpu_runrun = cpup->cpu_kprunrun = 1; 1835 } else { 1836 cpup->cpu_runrun = 1; 1837 if (max_pri >= kpreemptpri && cpup->cpu_kprunrun == 0) { 1838 cpup->cpu_kprunrun = 1; 1839 } 1840 } 1841 1842 /* 1843 * Propagate cpu_runrun, and cpu_kprunrun to global visibility. 1844 */ 1845 membar_enter(); 1846 1847 DTRACE_SCHED1(surrender, kthread_t *, tp); 1848 1849 /* 1850 * Make the target thread take an excursion through trap() 1851 * to do preempt() (unless we're already in trap or post_syscall, 1852 * calling cpu_surrender via CL_TRAPRET). 1853 */ 1854 if (tp != curthread || (lwp = tp->t_lwp) == NULL || 1855 lwp->lwp_state != LWP_USER) { 1856 aston(tp); 1857 if (cpup != CPU) 1858 poke_cpu(cpup->cpu_id); 1859 } 1860 TRACE_2(TR_FAC_DISP, TR_CPU_SURRENDER, 1861 "cpu_surrender:tid %p cpu %p", tp, cpup); 1862 } 1863 1864 /* 1865 * Commit to and ratify a scheduling decision 1866 */ 1867 /*ARGSUSED*/ 1868 static kthread_t * 1869 disp_ratify(kthread_t *tp, disp_t *kpq) 1870 { 1871 pri_t tpri, maxpri; 1872 pri_t maxkpri; 1873 cpu_t *cpup; 1874 1875 ASSERT(tp != NULL); 1876 /* 1877 * Commit to, then ratify scheduling decision 1878 */ 1879 cpup = CPU; 1880 if (cpup->cpu_runrun != 0) 1881 cpup->cpu_runrun = 0; 1882 if (cpup->cpu_kprunrun != 0) 1883 cpup->cpu_kprunrun = 0; 1884 if (cpup->cpu_chosen_level != -1) 1885 cpup->cpu_chosen_level = -1; 1886 membar_enter(); 1887 tpri = DISP_PRIO(tp); 1888 maxpri = cpup->cpu_disp->disp_maxrunpri; 1889 maxkpri = kpq->disp_maxrunpri; 1890 if (maxpri < maxkpri) 1891 maxpri = maxkpri; 1892 if (tpri < maxpri) { 1893 /* 1894 * should have done better 1895 * put this one back and indicate to try again 1896 */ 1897 cpup->cpu_dispthread = curthread; /* fixup dispthread */ 1898 cpup->cpu_dispatch_pri = DISP_PRIO(curthread); 1899 thread_lock_high(tp); 1900 THREAD_TRANSITION(tp); 1901 setfrontdq(tp); 1902 thread_unlock_nopreempt(tp); 1903 1904 tp = NULL; 1905 } 1906 return (tp); 1907 } 1908 1909 /* 1910 * See if there is any work on the dispatcher queue for other CPUs. 1911 * If there is, dequeue the best thread and return. 1912 */ 1913 static kthread_t * 1914 disp_getwork(cpu_t *cp) 1915 { 1916 cpu_t *ocp; /* other CPU */ 1917 cpu_t *ocp_start; 1918 cpu_t *tcp; /* target local CPU */ 1919 kthread_t *tp; 1920 kthread_t *retval = NULL; 1921 pri_t maxpri; 1922 disp_t *kpq; /* kp queue for this partition */ 1923 lpl_t *lpl, *lpl_leaf; 1924 int leafidx, startidx; 1925 hrtime_t stealtime; 1926 lgrp_id_t local_id; 1927 1928 maxpri = -1; 1929 tcp = NULL; 1930 1931 kpq = &cp->cpu_part->cp_kp_queue; 1932 while (kpq->disp_maxrunpri >= 0) { 1933 /* 1934 * Try to take a thread from the kp_queue. 1935 */ 1936 tp = (disp_getbest(kpq)); 1937 if (tp) 1938 return (disp_ratify(tp, kpq)); 1939 } 1940 1941 kpreempt_disable(); /* protect the cpu_active list */ 1942 1943 /* 1944 * Try to find something to do on another CPU's run queue. 1945 * Loop through all other CPUs looking for the one with the highest 1946 * priority unbound thread. 1947 * 1948 * On NUMA machines, the partition's CPUs are consulted in order of 1949 * distance from the current CPU. This way, the first available 1950 * work found is also the closest, and will suffer the least 1951 * from being migrated. 1952 */ 1953 lpl = lpl_leaf = cp->cpu_lpl; 1954 local_id = lpl_leaf->lpl_lgrpid; 1955 leafidx = startidx = 0; 1956 1957 /* 1958 * This loop traverses the lpl hierarchy. Higher level lpls represent 1959 * broader levels of locality 1960 */ 1961 do { 1962 /* This loop iterates over the lpl's leaves */ 1963 do { 1964 if (lpl_leaf != cp->cpu_lpl) 1965 ocp = lpl_leaf->lpl_cpus; 1966 else 1967 ocp = cp->cpu_next_lpl; 1968 1969 /* This loop iterates over the CPUs in the leaf */ 1970 ocp_start = ocp; 1971 do { 1972 pri_t pri; 1973 1974 ASSERT(CPU_ACTIVE(ocp)); 1975 1976 /* 1977 * End our stroll around this lpl if: 1978 * 1979 * - Something became runnable on the local 1980 * queue...which also ends our stroll around 1981 * the partition. 1982 * 1983 * - We happen across another idle CPU. 1984 * Since it is patrolling the next portion 1985 * of the lpl's list (assuming it's not 1986 * halted, or busy servicing an interrupt), 1987 * move to the next higher level of locality. 1988 */ 1989 if (cp->cpu_disp->disp_nrunnable != 0) { 1990 kpreempt_enable(); 1991 return (NULL); 1992 } 1993 if (ocp->cpu_dispatch_pri == -1) { 1994 if (ocp->cpu_disp_flags & 1995 CPU_DISP_HALTED || 1996 ocp->cpu_intr_actv != 0) 1997 continue; 1998 else 1999 goto next_level; 2000 } 2001 2002 /* 2003 * If there's only one thread and the CPU 2004 * is in the middle of a context switch, 2005 * or it's currently running the idle thread, 2006 * don't steal it. 2007 */ 2008 if ((ocp->cpu_disp_flags & 2009 CPU_DISP_DONTSTEAL) && 2010 ocp->cpu_disp->disp_nrunnable == 1) 2011 continue; 2012 2013 pri = ocp->cpu_disp->disp_max_unbound_pri; 2014 if (pri > maxpri) { 2015 /* 2016 * Don't steal threads that we attempted 2017 * to steal recently until they're ready 2018 * to be stolen again. 2019 */ 2020 stealtime = ocp->cpu_disp->disp_steal; 2021 if (stealtime == 0 || 2022 stealtime - gethrtime() <= 0) { 2023 maxpri = pri; 2024 tcp = ocp; 2025 } else { 2026 /* 2027 * Don't update tcp, just set 2028 * the retval to T_DONTSTEAL, so 2029 * that if no acceptable CPUs 2030 * are found the return value 2031 * will be T_DONTSTEAL rather 2032 * then NULL. 2033 */ 2034 retval = T_DONTSTEAL; 2035 } 2036 } 2037 } while ((ocp = ocp->cpu_next_lpl) != ocp_start); 2038 2039 /* 2040 * Iterate to the next leaf lpl in the resource set 2041 * at this level of locality. If we hit the end of 2042 * the set, wrap back around to the beginning. 2043 * 2044 * Note: This iteration is NULL terminated for a reason 2045 * see lpl_topo_bootstrap() in lgrp.c for details. 2046 */ 2047 if ((lpl_leaf = lpl->lpl_rset[++leafidx]) == NULL) { 2048 leafidx = 0; 2049 lpl_leaf = lpl->lpl_rset[leafidx]; 2050 } 2051 } while (leafidx != startidx); 2052 2053 next_level: 2054 /* 2055 * Expand the search to include farther away CPUs (next 2056 * locality level). The closer CPUs that have already been 2057 * checked will be checked again. In doing so, idle CPUs 2058 * will tend to be more aggresive about stealing from CPUs 2059 * that are closer (since the closer CPUs will be considered 2060 * more often). 2061 * Begin at this level with the CPUs local leaf lpl. 2062 */ 2063 if ((lpl = lpl->lpl_parent) != NULL) { 2064 leafidx = startidx = lpl->lpl_id2rset[local_id]; 2065 lpl_leaf = lpl->lpl_rset[leafidx]; 2066 } 2067 } while (!tcp && lpl); 2068 2069 kpreempt_enable(); 2070 2071 /* 2072 * If another queue looks good, and there is still nothing on 2073 * the local queue, try to transfer one or more threads 2074 * from it to our queue. 2075 */ 2076 if (tcp && cp->cpu_disp->disp_nrunnable == 0) { 2077 tp = disp_getbest(tcp->cpu_disp); 2078 if (tp == NULL || tp == T_DONTSTEAL) 2079 return (tp); 2080 return (disp_ratify(tp, kpq)); 2081 } 2082 return (retval); 2083 } 2084 2085 2086 /* 2087 * disp_fix_unbound_pri() 2088 * Determines the maximum priority of unbound threads on the queue. 2089 * The priority is kept for the queue, but is only increased, never 2090 * reduced unless some CPU is looking for something on that queue. 2091 * 2092 * The priority argument is the known upper limit. 2093 * 2094 * Perhaps this should be kept accurately, but that probably means 2095 * separate bitmaps for bound and unbound threads. Since only idled 2096 * CPUs will have to do this recalculation, it seems better this way. 2097 */ 2098 static void 2099 disp_fix_unbound_pri(disp_t *dp, pri_t pri) 2100 { 2101 kthread_t *tp; 2102 dispq_t *dq; 2103 ulong_t *dqactmap = dp->disp_qactmap; 2104 ulong_t mapword; 2105 int wx; 2106 2107 ASSERT(DISP_LOCK_HELD(&dp->disp_lock)); 2108 2109 ASSERT(pri >= 0); /* checked by caller */ 2110 2111 /* 2112 * Start the search at the next lowest priority below the supplied 2113 * priority. This depends on the bitmap implementation. 2114 */ 2115 do { 2116 wx = pri >> BT_ULSHIFT; /* index of word in map */ 2117 2118 /* 2119 * Form mask for all lower priorities in the word. 2120 */ 2121 mapword = dqactmap[wx] & (BT_BIW(pri) - 1); 2122 2123 /* 2124 * Get next lower active priority. 2125 */ 2126 if (mapword != 0) { 2127 pri = (wx << BT_ULSHIFT) + highbit(mapword) - 1; 2128 } else if (wx > 0) { 2129 pri = bt_gethighbit(dqactmap, wx - 1); /* sign extend */ 2130 if (pri < 0) 2131 break; 2132 } else { 2133 pri = -1; 2134 break; 2135 } 2136 2137 /* 2138 * Search the queue for unbound, runnable threads. 2139 */ 2140 dq = &dp->disp_q[pri]; 2141 tp = dq->dq_first; 2142 2143 while (tp && (tp->t_bound_cpu || tp->t_weakbound_cpu)) { 2144 tp = tp->t_link; 2145 } 2146 2147 /* 2148 * If a thread was found, set the priority and return. 2149 */ 2150 } while (tp == NULL); 2151 2152 /* 2153 * pri holds the maximum unbound thread priority or -1. 2154 */ 2155 if (dp->disp_max_unbound_pri != pri) 2156 dp->disp_max_unbound_pri = pri; 2157 } 2158 2159 /* 2160 * disp_adjust_unbound_pri() - thread is becoming unbound, so we should 2161 * check if the CPU to which is was previously bound should have 2162 * its disp_max_unbound_pri increased. 2163 */ 2164 void 2165 disp_adjust_unbound_pri(kthread_t *tp) 2166 { 2167 disp_t *dp; 2168 pri_t tpri; 2169 2170 ASSERT(THREAD_LOCK_HELD(tp)); 2171 2172 /* 2173 * Don't do anything if the thread is not bound, or 2174 * currently not runnable or swapped out. 2175 */ 2176 if (tp->t_bound_cpu == NULL || 2177 tp->t_state != TS_RUN || 2178 tp->t_schedflag & TS_ON_SWAPQ) 2179 return; 2180 2181 tpri = DISP_PRIO(tp); 2182 dp = tp->t_bound_cpu->cpu_disp; 2183 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 2184 if (tpri > dp->disp_max_unbound_pri) 2185 dp->disp_max_unbound_pri = tpri; 2186 } 2187 2188 /* 2189 * disp_getbest() 2190 * De-queue the highest priority unbound runnable thread. 2191 * Returns with the thread unlocked and onproc but at splhigh (like disp()). 2192 * Returns NULL if nothing found. 2193 * Returns T_DONTSTEAL if the thread was not stealable. 2194 * so that the caller will try again later. 2195 * 2196 * Passed a pointer to a dispatch queue not associated with this CPU, and 2197 * its type. 2198 */ 2199 static kthread_t * 2200 disp_getbest(disp_t *dp) 2201 { 2202 kthread_t *tp; 2203 dispq_t *dq; 2204 pri_t pri; 2205 cpu_t *cp, *tcp; 2206 boolean_t allbound; 2207 2208 disp_lock_enter(&dp->disp_lock); 2209 2210 /* 2211 * If there is nothing to run, or the CPU is in the middle of a 2212 * context switch of the only thread, return NULL. 2213 */ 2214 tcp = dp->disp_cpu; 2215 cp = CPU; 2216 pri = dp->disp_max_unbound_pri; 2217 if (pri == -1 || 2218 (tcp != NULL && (tcp->cpu_disp_flags & CPU_DISP_DONTSTEAL) && 2219 tcp->cpu_disp->disp_nrunnable == 1)) { 2220 disp_lock_exit_nopreempt(&dp->disp_lock); 2221 return (NULL); 2222 } 2223 2224 dq = &dp->disp_q[pri]; 2225 2226 2227 /* 2228 * Assume that all threads are bound on this queue, and change it 2229 * later when we find out that it is not the case. 2230 */ 2231 allbound = B_TRUE; 2232 for (tp = dq->dq_first; tp != NULL; tp = tp->t_link) { 2233 hrtime_t now, nosteal, rqtime; 2234 2235 /* 2236 * Skip over bound threads which could be here even 2237 * though disp_max_unbound_pri indicated this level. 2238 */ 2239 if (tp->t_bound_cpu || tp->t_weakbound_cpu) 2240 continue; 2241 2242 /* 2243 * We've got some unbound threads on this queue, so turn 2244 * the allbound flag off now. 2245 */ 2246 allbound = B_FALSE; 2247 2248 /* 2249 * The thread is a candidate for stealing from its run queue. We 2250 * don't want to steal threads that became runnable just a 2251 * moment ago. This improves CPU affinity for threads that get 2252 * preempted for short periods of time and go back on the run 2253 * queue. 2254 * 2255 * We want to let it stay on its run queue if it was only placed 2256 * there recently and it was running on the same CPU before that 2257 * to preserve its cache investment. For the thread to remain on 2258 * its run queue, ALL of the following conditions must be 2259 * satisfied: 2260 * 2261 * - the disp queue should not be the kernel preemption queue 2262 * - delayed idle stealing should not be disabled 2263 * - nosteal_nsec should be non-zero 2264 * - it should run with user priority 2265 * - it should be on the run queue of the CPU where it was 2266 * running before being placed on the run queue 2267 * - it should be the only thread on the run queue (to prevent 2268 * extra scheduling latency for other threads) 2269 * - it should sit on the run queue for less than per-chip 2270 * nosteal interval or global nosteal interval 2271 * - in case of CPUs with shared cache it should sit in a run 2272 * queue of a CPU from a different chip 2273 * 2274 * The checks are arranged so that the ones that are faster are 2275 * placed earlier. 2276 */ 2277 if (tcp == NULL || 2278 pri >= minclsyspri || 2279 tp->t_cpu != tcp) 2280 break; 2281 2282 /* 2283 * Steal immediately if, due to CMT processor architecture 2284 * migraiton between cp and tcp would incur no performance 2285 * penalty. 2286 */ 2287 if (pg_cmt_can_migrate(cp, tcp)) 2288 break; 2289 2290 nosteal = nosteal_nsec; 2291 if (nosteal == 0) 2292 break; 2293 2294 /* 2295 * Calculate time spent sitting on run queue 2296 */ 2297 now = gethrtime_unscaled(); 2298 rqtime = now - tp->t_waitrq; 2299 scalehrtime(&rqtime); 2300 2301 /* 2302 * Steal immediately if the time spent on this run queue is more 2303 * than allowed nosteal delay. 2304 * 2305 * Negative rqtime check is needed here to avoid infinite 2306 * stealing delays caused by unlikely but not impossible 2307 * drifts between CPU times on different CPUs. 2308 */ 2309 if (rqtime > nosteal || rqtime < 0) 2310 break; 2311 2312 DTRACE_PROBE4(nosteal, kthread_t *, tp, 2313 cpu_t *, tcp, cpu_t *, cp, hrtime_t, rqtime); 2314 scalehrtime(&now); 2315 /* 2316 * Calculate when this thread becomes stealable 2317 */ 2318 now += (nosteal - rqtime); 2319 2320 /* 2321 * Calculate time when some thread becomes stealable 2322 */ 2323 if (now < dp->disp_steal) 2324 dp->disp_steal = now; 2325 } 2326 2327 /* 2328 * If there were no unbound threads on this queue, find the queue 2329 * where they are and then return later. The value of 2330 * disp_max_unbound_pri is not always accurate because it isn't 2331 * reduced until another idle CPU looks for work. 2332 */ 2333 if (allbound) 2334 disp_fix_unbound_pri(dp, pri); 2335 2336 /* 2337 * If we reached the end of the queue and found no unbound threads 2338 * then return NULL so that other CPUs will be considered. If there 2339 * are unbound threads but they cannot yet be stolen, then 2340 * return T_DONTSTEAL and try again later. 2341 */ 2342 if (tp == NULL) { 2343 disp_lock_exit_nopreempt(&dp->disp_lock); 2344 return (allbound ? NULL : T_DONTSTEAL); 2345 } 2346 2347 /* 2348 * Found a runnable, unbound thread, so remove it from queue. 2349 * dispdeq() requires that we have the thread locked, and we do, 2350 * by virtue of holding the dispatch queue lock. dispdeq() will 2351 * put the thread in transition state, thereby dropping the dispq 2352 * lock. 2353 */ 2354 2355 #ifdef DEBUG 2356 { 2357 int thread_was_on_queue; 2358 2359 thread_was_on_queue = dispdeq(tp); /* drops disp_lock */ 2360 ASSERT(thread_was_on_queue); 2361 } 2362 2363 #else /* DEBUG */ 2364 (void) dispdeq(tp); /* drops disp_lock */ 2365 #endif /* DEBUG */ 2366 2367 /* 2368 * Reset the disp_queue steal time - we do not know what is the smallest 2369 * value across the queue is. 2370 */ 2371 dp->disp_steal = 0; 2372 2373 tp->t_schedflag |= TS_DONT_SWAP; 2374 2375 /* 2376 * Setup thread to run on the current CPU. 2377 */ 2378 tp->t_disp_queue = cp->cpu_disp; 2379 2380 cp->cpu_dispthread = tp; /* protected by spl only */ 2381 cp->cpu_dispatch_pri = pri; 2382 2383 /* 2384 * There can be a memory synchronization race between disp_getbest() 2385 * and disp_ratify() vs cpu_resched() where cpu_resched() is trying 2386 * to preempt the current thread to run the enqueued thread while 2387 * disp_getbest() and disp_ratify() are changing the current thread 2388 * to the stolen thread. This may lead to a situation where 2389 * cpu_resched() tries to preempt the wrong thread and the 2390 * stolen thread continues to run on the CPU which has been tagged 2391 * for preemption. 2392 * Later the clock thread gets enqueued but doesn't get to run on the 2393 * CPU causing the system to hang. 2394 * 2395 * To avoid this, grabbing and dropping the disp_lock (which does 2396 * a memory barrier) is needed to synchronize the execution of 2397 * cpu_resched() with disp_getbest() and disp_ratify() and 2398 * synchronize the memory read and written by cpu_resched(), 2399 * disp_getbest(), and disp_ratify() with each other. 2400 * (see CR#6482861 for more details). 2401 */ 2402 disp_lock_enter_high(&cp->cpu_disp->disp_lock); 2403 disp_lock_exit_high(&cp->cpu_disp->disp_lock); 2404 2405 ASSERT(pri == DISP_PRIO(tp)); 2406 2407 DTRACE_PROBE3(steal, kthread_t *, tp, cpu_t *, tcp, cpu_t *, cp); 2408 2409 thread_onproc(tp, cp); /* set t_state to TS_ONPROC */ 2410 2411 /* 2412 * Return with spl high so that swtch() won't need to raise it. 2413 * The disp_lock was dropped by dispdeq(). 2414 */ 2415 2416 return (tp); 2417 } 2418 2419 /* 2420 * disp_bound_common() - common routine for higher level functions 2421 * that check for bound threads under certain conditions. 2422 * If 'threadlistsafe' is set then there is no need to acquire 2423 * pidlock to stop the thread list from changing (eg, if 2424 * disp_bound_* is called with cpus paused). 2425 */ 2426 static int 2427 disp_bound_common(cpu_t *cp, int threadlistsafe, int flag) 2428 { 2429 int found = 0; 2430 kthread_t *tp; 2431 2432 ASSERT(flag); 2433 2434 if (!threadlistsafe) 2435 mutex_enter(&pidlock); 2436 tp = curthread; /* faster than allthreads */ 2437 do { 2438 if (tp->t_state != TS_FREE) { 2439 /* 2440 * If an interrupt thread is busy, but the 2441 * caller doesn't care (i.e. BOUND_INTR is off), 2442 * then just ignore it and continue through. 2443 */ 2444 if ((tp->t_flag & T_INTR_THREAD) && 2445 !(flag & BOUND_INTR)) 2446 continue; 2447 2448 /* 2449 * Skip the idle thread for the CPU 2450 * we're about to set offline. 2451 */ 2452 if (tp == cp->cpu_idle_thread) 2453 continue; 2454 2455 /* 2456 * Skip the pause thread for the CPU 2457 * we're about to set offline. 2458 */ 2459 if (tp == cp->cpu_pause_thread) 2460 continue; 2461 2462 if ((flag & BOUND_CPU) && 2463 (tp->t_bound_cpu == cp || 2464 tp->t_bind_cpu == cp->cpu_id || 2465 tp->t_weakbound_cpu == cp)) { 2466 found = 1; 2467 break; 2468 } 2469 2470 if ((flag & BOUND_PARTITION) && 2471 (tp->t_cpupart == cp->cpu_part)) { 2472 found = 1; 2473 break; 2474 } 2475 } 2476 } while ((tp = tp->t_next) != curthread && found == 0); 2477 if (!threadlistsafe) 2478 mutex_exit(&pidlock); 2479 return (found); 2480 } 2481 2482 /* 2483 * disp_bound_threads - return nonzero if threads are bound to the processor. 2484 * Called infrequently. Keep this simple. 2485 * Includes threads that are asleep or stopped but not onproc. 2486 */ 2487 int 2488 disp_bound_threads(cpu_t *cp, int threadlistsafe) 2489 { 2490 return (disp_bound_common(cp, threadlistsafe, BOUND_CPU)); 2491 } 2492 2493 /* 2494 * disp_bound_anythreads - return nonzero if _any_ threads are bound 2495 * to the given processor, including interrupt threads. 2496 */ 2497 int 2498 disp_bound_anythreads(cpu_t *cp, int threadlistsafe) 2499 { 2500 return (disp_bound_common(cp, threadlistsafe, BOUND_CPU | BOUND_INTR)); 2501 } 2502 2503 /* 2504 * disp_bound_partition - return nonzero if threads are bound to the same 2505 * partition as the processor. 2506 * Called infrequently. Keep this simple. 2507 * Includes threads that are asleep or stopped but not onproc. 2508 */ 2509 int 2510 disp_bound_partition(cpu_t *cp, int threadlistsafe) 2511 { 2512 return (disp_bound_common(cp, threadlistsafe, BOUND_PARTITION)); 2513 } 2514 2515 /* 2516 * disp_cpu_inactive - make a CPU inactive by moving all of its unbound 2517 * threads to other CPUs. 2518 */ 2519 void 2520 disp_cpu_inactive(cpu_t *cp) 2521 { 2522 kthread_t *tp; 2523 disp_t *dp = cp->cpu_disp; 2524 dispq_t *dq; 2525 pri_t pri; 2526 int wasonq; 2527 2528 disp_lock_enter(&dp->disp_lock); 2529 while ((pri = dp->disp_max_unbound_pri) != -1) { 2530 dq = &dp->disp_q[pri]; 2531 tp = dq->dq_first; 2532 2533 /* 2534 * Skip over bound threads. 2535 */ 2536 while (tp != NULL && tp->t_bound_cpu != NULL) { 2537 tp = tp->t_link; 2538 } 2539 2540 if (tp == NULL) { 2541 /* disp_max_unbound_pri must be inaccurate, so fix it */ 2542 disp_fix_unbound_pri(dp, pri); 2543 continue; 2544 } 2545 2546 wasonq = dispdeq(tp); /* drops disp_lock */ 2547 ASSERT(wasonq); 2548 ASSERT(tp->t_weakbound_cpu == NULL); 2549 2550 setbackdq(tp); 2551 /* 2552 * Called from cpu_offline: 2553 * 2554 * cp has already been removed from the list of active cpus 2555 * and tp->t_cpu has been changed so there is no risk of 2556 * tp ending up back on cp. 2557 * 2558 * Called from cpupart_move_cpu: 2559 * 2560 * The cpu has moved to a new cpupart. Any threads that 2561 * were on it's dispatch queues before the move remain 2562 * in the old partition and can't run in the new partition. 2563 */ 2564 ASSERT(tp->t_cpu != cp); 2565 thread_unlock(tp); 2566 2567 disp_lock_enter(&dp->disp_lock); 2568 } 2569 disp_lock_exit(&dp->disp_lock); 2570 } 2571 2572 /* 2573 * Return a score rating this CPU for running this thread: lower is better. 2574 * 2575 * If curthread is looking for a new CPU, then we ignore cpu_dispatch_pri for 2576 * curcpu (as that's our own priority). 2577 * 2578 * If a cpu is the target of an offline request, then try to avoid it. 2579 * 2580 * Otherwise we'll use double the effective dispatcher priority for the CPU. 2581 * 2582 * We do this so smt_adjust_cpu_score() can increment the score if needed, 2583 * without ending up over-riding a dispatcher priority. 2584 */ 2585 static pri_t 2586 cpu_score(cpu_t *cp, kthread_t *tp) 2587 { 2588 pri_t score; 2589 2590 if (tp == curthread && cp == curthread->t_cpu) 2591 score = 2 * CPU_IDLE_PRI; 2592 else if (cp == cpu_inmotion) 2593 score = SHRT_MAX; 2594 else 2595 score = 2 * cp->cpu_dispatch_pri; 2596 2597 if (2 * cp->cpu_disp->disp_maxrunpri > score) 2598 score = 2 * cp->cpu_disp->disp_maxrunpri; 2599 if (2 * cp->cpu_chosen_level > score) 2600 score = 2 * cp->cpu_chosen_level; 2601 2602 return (smt_adjust_cpu_score(tp, cp, score)); 2603 } 2604 2605 /* 2606 * disp_lowpri_cpu - find a suitable CPU to run the given thread. 2607 * 2608 * We are looking for a CPU with an effective dispatch priority lower than the 2609 * thread's, so that the thread will run immediately rather than be enqueued. 2610 * For NUMA locality, we prefer "home" CPUs within the thread's ->t_lpl group. 2611 * If we don't find an available CPU there, we will expand our search to include 2612 * wider locality levels. (Note these groups are already divided by CPU 2613 * partition.) 2614 * 2615 * If the thread cannot immediately run on *any* CPU, we'll enqueue ourselves on 2616 * the best home CPU we found. 2617 * 2618 * The hint passed in is used as a starting point so we don't favor CPU 0 or any 2619 * other CPU. The caller should pass in the most recently used CPU for the 2620 * thread; it's of course possible that this CPU isn't in the home lgroup. 2621 * 2622 * This function must be called at either high SPL, or with preemption disabled, 2623 * so that the "hint" CPU cannot be removed from the online CPU list while we 2624 * are traversing it. 2625 */ 2626 cpu_t * 2627 disp_lowpri_cpu(cpu_t *hint, kthread_t *tp, pri_t tpri) 2628 { 2629 cpu_t *bestcpu; 2630 cpu_t *besthomecpu; 2631 cpu_t *cp, *cpstart; 2632 2633 klgrpset_t done; 2634 2635 lpl_t *lpl_iter, *lpl_leaf; 2636 2637 ASSERT(hint != NULL); 2638 ASSERT(tp->t_lpl->lpl_ncpu > 0); 2639 2640 bestcpu = besthomecpu = NULL; 2641 klgrpset_clear(done); 2642 2643 lpl_iter = tp->t_lpl; 2644 2645 do { 2646 pri_t best = SHRT_MAX; 2647 klgrpset_t cur_set; 2648 2649 klgrpset_clear(cur_set); 2650 2651 for (int i = 0; i < lpl_iter->lpl_nrset; i++) { 2652 lpl_leaf = lpl_iter->lpl_rset[i]; 2653 if (klgrpset_ismember(done, lpl_leaf->lpl_lgrpid)) 2654 continue; 2655 2656 klgrpset_add(cur_set, lpl_leaf->lpl_lgrpid); 2657 2658 if (hint->cpu_lpl == lpl_leaf) 2659 cp = cpstart = hint; 2660 else 2661 cp = cpstart = lpl_leaf->lpl_cpus; 2662 2663 do { 2664 pri_t score = cpu_score(cp, tp); 2665 2666 if (score < best) { 2667 best = score; 2668 bestcpu = cp; 2669 2670 /* An idle CPU: we're done. */ 2671 if (score / 2 == CPU_IDLE_PRI) 2672 goto out; 2673 } 2674 } while ((cp = cp->cpu_next_lpl) != cpstart); 2675 } 2676 2677 if (bestcpu != NULL && tpri > (best / 2)) 2678 goto out; 2679 2680 if (besthomecpu == NULL) 2681 besthomecpu = bestcpu; 2682 2683 /* 2684 * Add the lgrps we just considered to the "done" set 2685 */ 2686 klgrpset_or(done, cur_set); 2687 2688 } while ((lpl_iter = lpl_iter->lpl_parent) != NULL); 2689 2690 /* 2691 * The specified priority isn't high enough to run immediately 2692 * anywhere, so just return the best CPU from the home lgroup. 2693 */ 2694 bestcpu = besthomecpu; 2695 2696 out: 2697 ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0); 2698 return (bestcpu); 2699 } 2700 2701 /* 2702 * This routine provides the generic idle cpu function for all processors. 2703 * If a processor has some specific code to execute when idle (say, to stop 2704 * the pipeline and save power) then that routine should be defined in the 2705 * processors specific code (module_xx.c) and the global variable idle_cpu 2706 * set to that function. 2707 */ 2708 static void 2709 generic_idle_cpu(void) 2710 { 2711 } 2712 2713 /*ARGSUSED*/ 2714 static void 2715 generic_enq_thread(cpu_t *cpu, int bound) 2716 { 2717 } 2718 2719 cpu_t * 2720 disp_choose_best_cpu(void) 2721 { 2722 kthread_t *t = curthread; 2723 cpu_t *curcpu = CPU; 2724 2725 ASSERT(t->t_preempt > 0); 2726 ASSERT(t->t_state == TS_ONPROC); 2727 ASSERT(t->t_schedflag & TS_VCPU); 2728 2729 if (smt_should_run(t, curcpu)) 2730 return (curcpu); 2731 2732 return (disp_lowpri_cpu(curcpu, t, t->t_pri)); 2733 } 2734