1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright 2019 Joyent, Inc. 28 */ 29 30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 31 /* All Rights Reserved */ 32 33 34 #include <sys/types.h> 35 #include <sys/param.h> 36 #include <sys/sysmacros.h> 37 #include <sys/signal.h> 38 #include <sys/user.h> 39 #include <sys/systm.h> 40 #include <sys/sysinfo.h> 41 #include <sys/var.h> 42 #include <sys/errno.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/inline.h> 46 #include <sys/disp.h> 47 #include <sys/class.h> 48 #include <sys/bitmap.h> 49 #include <sys/kmem.h> 50 #include <sys/cpuvar.h> 51 #include <sys/vtrace.h> 52 #include <sys/cpupart.h> 53 #include <sys/lgrp.h> 54 #include <sys/pg.h> 55 #include <sys/cmt.h> 56 #include <sys/bitset.h> 57 #include <sys/schedctl.h> 58 #include <sys/atomic.h> 59 #include <sys/dtrace.h> 60 #include <sys/sdt.h> 61 #include <sys/archsystm.h> 62 #include <sys/smt.h> 63 64 #include <vm/as.h> 65 66 #define BOUND_CPU 0x1 67 #define BOUND_PARTITION 0x2 68 #define BOUND_INTR 0x4 69 70 /* Dispatch queue allocation structure and functions */ 71 struct disp_queue_info { 72 disp_t *dp; 73 dispq_t *olddispq; 74 dispq_t *newdispq; 75 ulong_t *olddqactmap; 76 ulong_t *newdqactmap; 77 int oldnglobpris; 78 }; 79 static void disp_dq_alloc(struct disp_queue_info *dptr, int numpris, 80 disp_t *dp); 81 static void disp_dq_assign(struct disp_queue_info *dptr, int numpris); 82 static void disp_dq_free(struct disp_queue_info *dptr); 83 84 /* platform-specific routine to call when processor is idle */ 85 static void generic_idle_cpu(); 86 void (*idle_cpu)() = generic_idle_cpu; 87 88 /* routines invoked when a CPU enters/exits the idle loop */ 89 static void idle_enter(); 90 static void idle_exit(); 91 92 /* platform-specific routine to call when thread is enqueued */ 93 static void generic_enq_thread(cpu_t *, int); 94 void (*disp_enq_thread)(cpu_t *, int) = generic_enq_thread; 95 96 pri_t kpreemptpri; /* priority where kernel preemption applies */ 97 pri_t upreemptpri = 0; /* priority where normal preemption applies */ 98 pri_t intr_pri; /* interrupt thread priority base level */ 99 100 #define KPQPRI -1 /* pri where cpu affinity is dropped for kpq */ 101 pri_t kpqpri = KPQPRI; /* can be set in /etc/system */ 102 disp_t cpu0_disp; /* boot CPU's dispatch queue */ 103 disp_lock_t swapped_lock; /* lock swapped threads and swap queue */ 104 int nswapped; /* total number of swapped threads */ 105 void disp_swapped_enq(kthread_t *tp); 106 static void disp_swapped_setrun(kthread_t *tp); 107 static void cpu_resched(cpu_t *cp, pri_t tpri); 108 109 /* 110 * If this is set, only interrupt threads will cause kernel preemptions. 111 * This is done by changing the value of kpreemptpri. kpreemptpri 112 * will either be the max sysclass pri or the min interrupt pri. 113 */ 114 int only_intr_kpreempt; 115 116 extern void set_idle_cpu(int cpun); 117 extern void unset_idle_cpu(int cpun); 118 static void setkpdq(kthread_t *tp, int borf); 119 #define SETKP_BACK 0 120 #define SETKP_FRONT 1 121 /* 122 * Parameter that determines how recently a thread must have run 123 * on the CPU to be considered loosely-bound to that CPU to reduce 124 * cold cache effects. The interval is in hertz. 125 */ 126 #define RECHOOSE_INTERVAL 3 127 int rechoose_interval = RECHOOSE_INTERVAL; 128 129 /* 130 * Parameter that determines how long (in nanoseconds) a thread must 131 * be sitting on a run queue before it can be stolen by another CPU 132 * to reduce migrations. The interval is in nanoseconds. 133 * 134 * The nosteal_nsec should be set by platform code cmp_set_nosteal_interval() 135 * to an appropriate value. nosteal_nsec is set to NOSTEAL_UNINITIALIZED 136 * here indicating it is uninitiallized. 137 * Setting nosteal_nsec to 0 effectively disables the nosteal 'protection'. 138 * 139 */ 140 #define NOSTEAL_UNINITIALIZED (-1) 141 hrtime_t nosteal_nsec = NOSTEAL_UNINITIALIZED; 142 extern void cmp_set_nosteal_interval(void); 143 144 id_t defaultcid; /* system "default" class; see dispadmin(8) */ 145 146 disp_lock_t transition_lock; /* lock on transitioning threads */ 147 disp_lock_t stop_lock; /* lock on stopped threads */ 148 149 static void cpu_dispqalloc(int numpris); 150 151 /* 152 * This gets returned by disp_getwork/disp_getbest if we couldn't steal 153 * a thread because it was sitting on its run queue for a very short 154 * period of time. 155 */ 156 #define T_DONTSTEAL (kthread_t *)(-1) /* returned by disp_getwork/getbest */ 157 158 static kthread_t *disp_getwork(cpu_t *to); 159 static kthread_t *disp_getbest(disp_t *from); 160 static kthread_t *disp_ratify(kthread_t *tp, disp_t *kpq); 161 162 void swtch_to(kthread_t *); 163 164 /* 165 * dispatcher and scheduler initialization 166 */ 167 168 /* 169 * disp_setup - Common code to calculate and allocate dispatcher 170 * variables and structures based on the maximum priority. 171 */ 172 static void 173 disp_setup(pri_t maxglobpri, pri_t oldnglobpris) 174 { 175 pri_t newnglobpris; 176 177 ASSERT(MUTEX_HELD(&cpu_lock)); 178 179 newnglobpris = maxglobpri + 1 + LOCK_LEVEL; 180 181 if (newnglobpris > oldnglobpris) { 182 /* 183 * Allocate new kp queues for each CPU partition. 184 */ 185 cpupart_kpqalloc(newnglobpris); 186 187 /* 188 * Allocate new dispatch queues for each CPU. 189 */ 190 cpu_dispqalloc(newnglobpris); 191 192 /* 193 * compute new interrupt thread base priority 194 */ 195 intr_pri = maxglobpri; 196 if (only_intr_kpreempt) { 197 kpreemptpri = intr_pri + 1; 198 if (kpqpri == KPQPRI) 199 kpqpri = kpreemptpri; 200 } 201 v.v_nglobpris = newnglobpris; 202 } 203 } 204 205 /* 206 * dispinit - Called to initialize all loaded classes and the 207 * dispatcher framework. 208 */ 209 void 210 dispinit(void) 211 { 212 id_t cid; 213 pri_t maxglobpri; 214 pri_t cl_maxglobpri; 215 216 maxglobpri = -1; 217 218 /* 219 * Initialize transition lock, which will always be set. 220 */ 221 DISP_LOCK_INIT(&transition_lock); 222 disp_lock_enter_high(&transition_lock); 223 DISP_LOCK_INIT(&stop_lock); 224 225 mutex_enter(&cpu_lock); 226 CPU->cpu_disp->disp_maxrunpri = -1; 227 CPU->cpu_disp->disp_max_unbound_pri = -1; 228 229 /* 230 * Initialize the default CPU partition. 231 */ 232 cpupart_initialize_default(); 233 /* 234 * Call the class specific initialization functions for 235 * all pre-installed schedulers. 236 * 237 * We pass the size of a class specific parameter 238 * buffer to each of the initialization functions 239 * to try to catch problems with backward compatibility 240 * of class modules. 241 * 242 * For example a new class module running on an old system 243 * which didn't provide sufficiently large parameter buffers 244 * would be bad news. Class initialization modules can check for 245 * this and take action if they detect a problem. 246 */ 247 248 for (cid = 0; cid < nclass; cid++) { 249 sclass_t *sc; 250 251 sc = &sclass[cid]; 252 if (SCHED_INSTALLED(sc)) { 253 cl_maxglobpri = sc->cl_init(cid, PC_CLPARMSZ, 254 &sc->cl_funcs); 255 if (cl_maxglobpri > maxglobpri) 256 maxglobpri = cl_maxglobpri; 257 } 258 } 259 260 /* 261 * Historically, kpreemptpri was set to v_maxsyspri + 1 -- which is 262 * to say, maxclsyspri + 1. However, over time, the system has used 263 * more and more asynchronous kernel threads, with an increasing number 264 * of these doing work on direct behalf of higher-level software (e.g., 265 * network processing). This has led to potential priority inversions: 266 * threads doing low-priority lengthy kernel work can effectively 267 * delay kernel-level processing of higher-priority data. To minimize 268 * such inversions, we set kpreemptpri to be v_maxsyspri; anything in 269 * the kernel that runs at maxclsyspri will therefore induce kernel 270 * preemption, and this priority should be used if/when an asynchronous 271 * thread (or, as is often the case, task queue) is performing a task 272 * on behalf of higher-level software (or any task that is otherwise 273 * latency-sensitve). 274 */ 275 kpreemptpri = (pri_t)v.v_maxsyspri; 276 if (kpqpri == KPQPRI) 277 kpqpri = kpreemptpri; 278 279 ASSERT(maxglobpri >= 0); 280 disp_setup(maxglobpri, 0); 281 282 mutex_exit(&cpu_lock); 283 284 /* 285 * Platform specific sticky scheduler setup. 286 */ 287 if (nosteal_nsec == NOSTEAL_UNINITIALIZED) 288 cmp_set_nosteal_interval(); 289 290 /* 291 * Get the default class ID; this may be later modified via 292 * dispadmin(8). This will load the class (normally TS) and that will 293 * call disp_add(), which is why we had to drop cpu_lock first. 294 */ 295 if (getcid(defaultclass, &defaultcid) != 0) { 296 cmn_err(CE_PANIC, "Couldn't load default scheduling class '%s'", 297 defaultclass); 298 } 299 } 300 301 /* 302 * disp_add - Called with class pointer to initialize the dispatcher 303 * for a newly loaded class. 304 */ 305 void 306 disp_add(sclass_t *clp) 307 { 308 pri_t maxglobpri; 309 pri_t cl_maxglobpri; 310 311 mutex_enter(&cpu_lock); 312 /* 313 * Initialize the scheduler class. 314 */ 315 maxglobpri = (pri_t)(v.v_nglobpris - LOCK_LEVEL - 1); 316 cl_maxglobpri = clp->cl_init(clp - sclass, PC_CLPARMSZ, &clp->cl_funcs); 317 if (cl_maxglobpri > maxglobpri) 318 maxglobpri = cl_maxglobpri; 319 320 /* 321 * Save old queue information. Since we're initializing a 322 * new scheduling class which has just been loaded, then 323 * the size of the dispq may have changed. We need to handle 324 * that here. 325 */ 326 disp_setup(maxglobpri, v.v_nglobpris); 327 328 mutex_exit(&cpu_lock); 329 } 330 331 332 /* 333 * For each CPU, allocate new dispatch queues 334 * with the stated number of priorities. 335 */ 336 static void 337 cpu_dispqalloc(int numpris) 338 { 339 cpu_t *cpup; 340 struct disp_queue_info *disp_mem; 341 int i, num; 342 343 ASSERT(MUTEX_HELD(&cpu_lock)); 344 345 disp_mem = kmem_zalloc(NCPU * 346 sizeof (struct disp_queue_info), KM_SLEEP); 347 348 /* 349 * This routine must allocate all of the memory before stopping 350 * the cpus because it must not sleep in kmem_alloc while the 351 * CPUs are stopped. Locks they hold will not be freed until they 352 * are restarted. 353 */ 354 i = 0; 355 cpup = cpu_list; 356 do { 357 disp_dq_alloc(&disp_mem[i], numpris, cpup->cpu_disp); 358 i++; 359 cpup = cpup->cpu_next; 360 } while (cpup != cpu_list); 361 num = i; 362 363 pause_cpus(NULL, NULL); 364 for (i = 0; i < num; i++) 365 disp_dq_assign(&disp_mem[i], numpris); 366 start_cpus(); 367 368 /* 369 * I must free all of the memory after starting the cpus because 370 * I can not risk sleeping in kmem_free while the cpus are stopped. 371 */ 372 for (i = 0; i < num; i++) 373 disp_dq_free(&disp_mem[i]); 374 375 kmem_free(disp_mem, NCPU * sizeof (struct disp_queue_info)); 376 } 377 378 static void 379 disp_dq_alloc(struct disp_queue_info *dptr, int numpris, disp_t *dp) 380 { 381 dptr->newdispq = kmem_zalloc(numpris * sizeof (dispq_t), KM_SLEEP); 382 dptr->newdqactmap = kmem_zalloc(((numpris / BT_NBIPUL) + 1) * 383 sizeof (long), KM_SLEEP); 384 dptr->dp = dp; 385 } 386 387 static void 388 disp_dq_assign(struct disp_queue_info *dptr, int numpris) 389 { 390 disp_t *dp; 391 392 dp = dptr->dp; 393 dptr->olddispq = dp->disp_q; 394 dptr->olddqactmap = dp->disp_qactmap; 395 dptr->oldnglobpris = dp->disp_npri; 396 397 ASSERT(dptr->oldnglobpris < numpris); 398 399 if (dptr->olddispq != NULL) { 400 /* 401 * Use kcopy because bcopy is platform-specific 402 * and could block while we might have paused the cpus. 403 */ 404 (void) kcopy(dptr->olddispq, dptr->newdispq, 405 dptr->oldnglobpris * sizeof (dispq_t)); 406 (void) kcopy(dptr->olddqactmap, dptr->newdqactmap, 407 ((dptr->oldnglobpris / BT_NBIPUL) + 1) * 408 sizeof (long)); 409 } 410 dp->disp_q = dptr->newdispq; 411 dp->disp_qactmap = dptr->newdqactmap; 412 dp->disp_q_limit = &dptr->newdispq[numpris]; 413 dp->disp_npri = numpris; 414 } 415 416 static void 417 disp_dq_free(struct disp_queue_info *dptr) 418 { 419 if (dptr->olddispq != NULL) 420 kmem_free(dptr->olddispq, 421 dptr->oldnglobpris * sizeof (dispq_t)); 422 if (dptr->olddqactmap != NULL) 423 kmem_free(dptr->olddqactmap, 424 ((dptr->oldnglobpris / BT_NBIPUL) + 1) * sizeof (long)); 425 } 426 427 /* 428 * For a newly created CPU, initialize the dispatch queue. 429 * This is called before the CPU is known through cpu[] or on any lists. 430 */ 431 void 432 disp_cpu_init(cpu_t *cp) 433 { 434 disp_t *dp; 435 dispq_t *newdispq; 436 ulong_t *newdqactmap; 437 438 ASSERT(MUTEX_HELD(&cpu_lock)); /* protect dispatcher queue sizes */ 439 440 if (cp == cpu0_disp.disp_cpu) 441 dp = &cpu0_disp; 442 else 443 dp = kmem_alloc(sizeof (disp_t), KM_SLEEP); 444 bzero(dp, sizeof (disp_t)); 445 cp->cpu_disp = dp; 446 dp->disp_cpu = cp; 447 dp->disp_maxrunpri = -1; 448 dp->disp_max_unbound_pri = -1; 449 DISP_LOCK_INIT(&cp->cpu_thread_lock); 450 /* 451 * Allocate memory for the dispatcher queue headers 452 * and the active queue bitmap. 453 */ 454 newdispq = kmem_zalloc(v.v_nglobpris * sizeof (dispq_t), KM_SLEEP); 455 newdqactmap = kmem_zalloc(((v.v_nglobpris / BT_NBIPUL) + 1) * 456 sizeof (long), KM_SLEEP); 457 dp->disp_q = newdispq; 458 dp->disp_qactmap = newdqactmap; 459 dp->disp_q_limit = &newdispq[v.v_nglobpris]; 460 dp->disp_npri = v.v_nglobpris; 461 } 462 463 void 464 disp_cpu_fini(cpu_t *cp) 465 { 466 ASSERT(MUTEX_HELD(&cpu_lock)); 467 468 disp_kp_free(cp->cpu_disp); 469 if (cp->cpu_disp != &cpu0_disp) 470 kmem_free(cp->cpu_disp, sizeof (disp_t)); 471 } 472 473 /* 474 * Allocate new, larger kpreempt dispatch queue to replace the old one. 475 */ 476 void 477 disp_kp_alloc(disp_t *dq, pri_t npri) 478 { 479 struct disp_queue_info mem_info; 480 481 if (npri > dq->disp_npri) { 482 /* 483 * Allocate memory for the new array. 484 */ 485 disp_dq_alloc(&mem_info, npri, dq); 486 487 /* 488 * We need to copy the old structures to the new 489 * and free the old. 490 */ 491 disp_dq_assign(&mem_info, npri); 492 disp_dq_free(&mem_info); 493 } 494 } 495 496 /* 497 * Free dispatch queue. 498 * Used for the kpreempt queues for a removed CPU partition and 499 * for the per-CPU queues of deleted CPUs. 500 */ 501 void 502 disp_kp_free(disp_t *dq) 503 { 504 struct disp_queue_info mem_info; 505 506 mem_info.olddispq = dq->disp_q; 507 mem_info.olddqactmap = dq->disp_qactmap; 508 mem_info.oldnglobpris = dq->disp_npri; 509 disp_dq_free(&mem_info); 510 } 511 512 /* 513 * End dispatcher and scheduler initialization. 514 */ 515 516 /* 517 * See if there's anything to do other than remain idle. 518 * Return non-zero if there is. 519 * 520 * This function must be called with high spl, or with 521 * kernel preemption disabled to prevent the partition's 522 * active cpu list from changing while being traversed. 523 * 524 * This is essentially a simpler version of disp_getwork() 525 * to be called by CPUs preparing to "halt". 526 */ 527 int 528 disp_anywork(void) 529 { 530 cpu_t *cp = CPU; 531 cpu_t *ocp; 532 volatile int *local_nrunnable = &cp->cpu_disp->disp_nrunnable; 533 534 if (!(cp->cpu_flags & CPU_OFFLINE)) { 535 if (CP_MAXRUNPRI(cp->cpu_part) >= 0) 536 return (1); 537 538 for (ocp = cp->cpu_next_part; ocp != cp; 539 ocp = ocp->cpu_next_part) { 540 ASSERT(CPU_ACTIVE(ocp)); 541 542 /* 543 * Something has appeared on the local run queue. 544 */ 545 if (*local_nrunnable > 0) 546 return (1); 547 /* 548 * If we encounter another idle CPU that will 549 * soon be trolling around through disp_anywork() 550 * terminate our walk here and let this other CPU 551 * patrol the next part of the list. 552 */ 553 if (ocp->cpu_dispatch_pri == -1 && 554 (ocp->cpu_disp_flags & CPU_DISP_HALTED) == 0) 555 return (0); 556 /* 557 * Work can be taken from another CPU if: 558 * - There is unbound work on the run queue 559 * - That work isn't a thread undergoing a 560 * - context switch on an otherwise empty queue. 561 * - The CPU isn't running the idle loop. 562 */ 563 if (ocp->cpu_disp->disp_max_unbound_pri != -1 && 564 !((ocp->cpu_disp_flags & CPU_DISP_DONTSTEAL) && 565 ocp->cpu_disp->disp_nrunnable == 1) && 566 ocp->cpu_dispatch_pri != -1) 567 return (1); 568 } 569 } 570 return (0); 571 } 572 573 /* 574 * Called when CPU enters the idle loop 575 */ 576 static void 577 idle_enter() 578 { 579 cpu_t *cp = CPU; 580 581 new_cpu_mstate(CMS_IDLE, gethrtime_unscaled()); 582 CPU_STATS_ADDQ(cp, sys, idlethread, 1); 583 set_idle_cpu(cp->cpu_id); /* arch-dependent hook */ 584 } 585 586 /* 587 * Called when CPU exits the idle loop 588 */ 589 static void 590 idle_exit() 591 { 592 cpu_t *cp = CPU; 593 594 new_cpu_mstate(CMS_SYSTEM, gethrtime_unscaled()); 595 unset_idle_cpu(cp->cpu_id); /* arch-dependent hook */ 596 } 597 598 /* 599 * Idle loop. 600 */ 601 void 602 idle() 603 { 604 struct cpu *cp = CPU; /* pointer to this CPU */ 605 kthread_t *t; /* taken thread */ 606 607 idle_enter(); 608 609 /* 610 * Uniprocessor version of idle loop. 611 * Do this until notified that we're on an actual multiprocessor. 612 */ 613 while (ncpus == 1) { 614 if (cp->cpu_disp->disp_nrunnable == 0) { 615 (*idle_cpu)(); 616 continue; 617 } 618 idle_exit(); 619 swtch(); 620 621 idle_enter(); /* returned from swtch */ 622 } 623 624 /* 625 * Multiprocessor idle loop. 626 */ 627 for (;;) { 628 /* 629 * If CPU is completely quiesced by p_online(2), just wait 630 * here with minimal bus traffic until put online. 631 */ 632 while (cp->cpu_flags & CPU_QUIESCED) 633 (*idle_cpu)(); 634 635 if (cp->cpu_disp->disp_nrunnable != 0) { 636 idle_exit(); 637 swtch(); 638 } else { 639 if (cp->cpu_flags & CPU_OFFLINE) 640 continue; 641 if ((t = disp_getwork(cp)) == NULL) { 642 if (cp->cpu_chosen_level != -1) { 643 disp_t *dp = cp->cpu_disp; 644 disp_t *kpq; 645 646 disp_lock_enter(&dp->disp_lock); 647 /* 648 * Set kpq under lock to prevent 649 * migration between partitions. 650 */ 651 kpq = &cp->cpu_part->cp_kp_queue; 652 if (kpq->disp_maxrunpri == -1) 653 cp->cpu_chosen_level = -1; 654 disp_lock_exit(&dp->disp_lock); 655 } 656 (*idle_cpu)(); 657 continue; 658 } 659 /* 660 * If there was a thread but we couldn't steal 661 * it, then keep trying. 662 */ 663 if (t == T_DONTSTEAL) 664 continue; 665 idle_exit(); 666 swtch_to(t); 667 } 668 idle_enter(); /* returned from swtch/swtch_to */ 669 } 670 } 671 672 673 /* 674 * Preempt the currently running thread in favor of the highest 675 * priority thread. The class of the current thread controls 676 * where it goes on the dispatcher queues. If panicking, turn 677 * preemption off. 678 */ 679 void 680 preempt() 681 { 682 kthread_t *t = curthread; 683 klwp_t *lwp = ttolwp(curthread); 684 685 if (panicstr) 686 return; 687 688 TRACE_0(TR_FAC_DISP, TR_PREEMPT_START, "preempt_start"); 689 690 thread_lock(t); 691 692 if (t->t_state != TS_ONPROC || t->t_disp_queue != CPU->cpu_disp) { 693 /* 694 * this thread has already been chosen to be run on 695 * another CPU. Clear kprunrun on this CPU since we're 696 * already headed for swtch(). 697 */ 698 CPU->cpu_kprunrun = 0; 699 thread_unlock_nopreempt(t); 700 TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end"); 701 } else { 702 if (lwp != NULL) 703 lwp->lwp_ru.nivcsw++; 704 CPU_STATS_ADDQ(CPU, sys, inv_swtch, 1); 705 THREAD_TRANSITION(t); 706 CL_PREEMPT(t); 707 DTRACE_SCHED(preempt); 708 thread_unlock_nopreempt(t); 709 710 TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end"); 711 712 swtch(); /* clears CPU->cpu_runrun via disp() */ 713 } 714 } 715 716 extern kthread_t *thread_unpin(); 717 718 /* 719 * disp() - find the highest priority thread for this processor to run, and 720 * set it in TS_ONPROC state so that resume() can be called to run it. 721 */ 722 static kthread_t * 723 disp() 724 { 725 cpu_t *cpup; 726 disp_t *dp; 727 kthread_t *tp; 728 dispq_t *dq; 729 int maxrunword; 730 pri_t pri; 731 disp_t *kpq; 732 733 TRACE_0(TR_FAC_DISP, TR_DISP_START, "disp_start"); 734 735 cpup = CPU; 736 /* 737 * Find the highest priority loaded, runnable thread. 738 */ 739 dp = cpup->cpu_disp; 740 741 reschedule: 742 /* 743 * If there is more important work on the global queue with a better 744 * priority than the maximum on this CPU, take it now. 745 */ 746 kpq = &cpup->cpu_part->cp_kp_queue; 747 while ((pri = kpq->disp_maxrunpri) >= 0 && 748 pri >= dp->disp_maxrunpri && 749 (cpup->cpu_flags & CPU_OFFLINE) == 0 && 750 (tp = disp_getbest(kpq)) != NULL) { 751 if (disp_ratify(tp, kpq) != NULL) { 752 TRACE_1(TR_FAC_DISP, TR_DISP_END, 753 "disp_end:tid %p", tp); 754 return (tp); 755 } 756 } 757 758 disp_lock_enter(&dp->disp_lock); 759 pri = dp->disp_maxrunpri; 760 761 /* 762 * If there is nothing to run, look at what's runnable on other queues. 763 * Choose the idle thread if the CPU is quiesced. 764 * Note that CPUs that have the CPU_OFFLINE flag set can still run 765 * interrupt threads, which will be the only threads on the CPU's own 766 * queue, but cannot run threads from other queues. 767 */ 768 if (pri == -1) { 769 if (!(cpup->cpu_flags & CPU_OFFLINE)) { 770 disp_lock_exit(&dp->disp_lock); 771 if ((tp = disp_getwork(cpup)) == NULL || 772 tp == T_DONTSTEAL) { 773 tp = cpup->cpu_idle_thread; 774 (void) splhigh(); 775 THREAD_ONPROC(tp, cpup); 776 cpup->cpu_dispthread = tp; 777 cpup->cpu_dispatch_pri = -1; 778 cpup->cpu_runrun = cpup->cpu_kprunrun = 0; 779 cpup->cpu_chosen_level = -1; 780 } 781 } else { 782 disp_lock_exit_high(&dp->disp_lock); 783 tp = cpup->cpu_idle_thread; 784 THREAD_ONPROC(tp, cpup); 785 cpup->cpu_dispthread = tp; 786 cpup->cpu_dispatch_pri = -1; 787 cpup->cpu_runrun = cpup->cpu_kprunrun = 0; 788 cpup->cpu_chosen_level = -1; 789 } 790 TRACE_1(TR_FAC_DISP, TR_DISP_END, 791 "disp_end:tid %p", tp); 792 return (tp); 793 } 794 795 dq = &dp->disp_q[pri]; 796 tp = dq->dq_first; 797 798 ASSERT(tp != NULL); 799 ASSERT(tp->t_schedflag & TS_LOAD); /* thread must be swapped in */ 800 801 DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp); 802 803 /* 804 * Found it so remove it from queue. 805 */ 806 dp->disp_nrunnable--; 807 dq->dq_sruncnt--; 808 if ((dq->dq_first = tp->t_link) == NULL) { 809 ulong_t *dqactmap = dp->disp_qactmap; 810 811 ASSERT(dq->dq_sruncnt == 0); 812 dq->dq_last = NULL; 813 814 /* 815 * The queue is empty, so the corresponding bit needs to be 816 * turned off in dqactmap. If nrunnable != 0 just took the 817 * last runnable thread off the 818 * highest queue, so recompute disp_maxrunpri. 819 */ 820 maxrunword = pri >> BT_ULSHIFT; 821 dqactmap[maxrunword] &= ~BT_BIW(pri); 822 823 if (dp->disp_nrunnable == 0) { 824 dp->disp_max_unbound_pri = -1; 825 dp->disp_maxrunpri = -1; 826 } else { 827 int ipri; 828 829 ipri = bt_gethighbit(dqactmap, maxrunword); 830 dp->disp_maxrunpri = ipri; 831 if (ipri < dp->disp_max_unbound_pri) 832 dp->disp_max_unbound_pri = ipri; 833 } 834 } else { 835 tp->t_link = NULL; 836 } 837 838 /* 839 * Set TS_DONT_SWAP flag to prevent another processor from swapping 840 * out this thread before we have a chance to run it. 841 * While running, it is protected against swapping by t_lock. 842 */ 843 tp->t_schedflag |= TS_DONT_SWAP; 844 cpup->cpu_dispthread = tp; /* protected by spl only */ 845 cpup->cpu_dispatch_pri = pri; 846 ASSERT(pri == DISP_PRIO(tp)); 847 thread_onproc(tp, cpup); /* set t_state to TS_ONPROC */ 848 disp_lock_exit_high(&dp->disp_lock); /* drop run queue lock */ 849 850 ASSERT(tp != NULL); 851 TRACE_1(TR_FAC_DISP, TR_DISP_END, 852 "disp_end:tid %p", tp); 853 854 if (disp_ratify(tp, kpq) == NULL) 855 goto reschedule; 856 857 return (tp); 858 } 859 860 /* 861 * swtch() 862 * Find best runnable thread and run it. 863 * Called with the current thread already switched to a new state, 864 * on a sleep queue, run queue, stopped, and not zombied. 865 * May be called at any spl level less than or equal to LOCK_LEVEL. 866 * Always drops spl to the base level (spl0()). 867 */ 868 void 869 swtch() 870 { 871 kthread_t *t = curthread; 872 kthread_t *next; 873 cpu_t *cp; 874 875 TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 876 877 if (t->t_flag & T_INTR_THREAD) 878 cpu_intr_swtch_enter(t); 879 880 if (t->t_intr != NULL) { 881 /* 882 * We are an interrupt thread. Setup and return 883 * the interrupted thread to be resumed. 884 */ 885 (void) splhigh(); /* block other scheduler action */ 886 cp = CPU; /* now protected against migration */ 887 ASSERT(CPU_ON_INTR(cp) == 0); /* not called with PIL > 10 */ 888 CPU_STATS_ADDQ(cp, sys, pswitch, 1); 889 CPU_STATS_ADDQ(cp, sys, intrblk, 1); 890 next = thread_unpin(); 891 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 892 resume_from_intr(next); 893 } else { 894 #ifdef DEBUG 895 if (t->t_state == TS_ONPROC && 896 t->t_disp_queue->disp_cpu == CPU && 897 t->t_preempt == 0) { 898 thread_lock(t); 899 ASSERT(t->t_state != TS_ONPROC || 900 t->t_disp_queue->disp_cpu != CPU || 901 t->t_preempt != 0); /* cannot migrate */ 902 thread_unlock_nopreempt(t); 903 } 904 #endif /* DEBUG */ 905 cp = CPU; 906 next = disp(); /* returns with spl high */ 907 ASSERT(CPU_ON_INTR(cp) == 0); /* not called with PIL > 10 */ 908 909 /* OK to steal anything left on run queue */ 910 cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL; 911 912 if (next != t) { 913 hrtime_t now; 914 915 now = gethrtime_unscaled(); 916 pg_ev_thread_swtch(cp, now, t, next); 917 918 /* 919 * If t was previously in the TS_ONPROC state, 920 * setfrontdq and setbackdq won't have set its t_waitrq. 921 * Since we now finally know that we're switching away 922 * from this thread, set its t_waitrq if it is on a run 923 * queue. 924 */ 925 if ((t->t_state == TS_RUN) && (t->t_waitrq == 0)) { 926 t->t_waitrq = now; 927 } 928 929 /* 930 * restore mstate of thread that we are switching to 931 */ 932 restore_mstate(next); 933 934 CPU_STATS_ADDQ(cp, sys, pswitch, 1); 935 cp->cpu_last_swtch = t->t_disp_time = ddi_get_lbolt(); 936 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 937 938 if (dtrace_vtime_active) 939 dtrace_vtime_switch(next); 940 941 resume(next); 942 /* 943 * The TR_RESUME_END and TR_SWTCH_END trace points 944 * appear at the end of resume(), because we may not 945 * return here 946 */ 947 } else { 948 if (t->t_flag & T_INTR_THREAD) 949 cpu_intr_swtch_exit(t); 950 /* 951 * Threads that enqueue themselves on a run queue defer 952 * setting t_waitrq. It is then either set in swtch() 953 * when the CPU is actually yielded, or not at all if it 954 * is remaining on the CPU. 955 * There is however a window between where the thread 956 * placed itself on a run queue, and where it selects 957 * itself in disp(), where a third party (eg. clock() 958 * doing tick processing) may have re-enqueued this 959 * thread, setting t_waitrq in the process. We detect 960 * this race by noticing that despite switching to 961 * ourself, our t_waitrq has been set, and should be 962 * cleared. 963 */ 964 if (t->t_waitrq != 0) 965 t->t_waitrq = 0; 966 967 pg_ev_thread_remain(cp, t); 968 969 DTRACE_SCHED(remain__cpu); 970 TRACE_0(TR_FAC_DISP, TR_SWTCH_END, "swtch_end"); 971 (void) spl0(); 972 } 973 } 974 } 975 976 /* 977 * swtch_from_zombie() 978 * Special case of swtch(), which allows checks for TS_ZOMB to be 979 * eliminated from normal resume. 980 * Find best runnable thread and run it. 981 * Called with the current thread zombied. 982 * Zombies cannot migrate, so CPU references are safe. 983 */ 984 void 985 swtch_from_zombie() 986 { 987 kthread_t *next; 988 cpu_t *cpu = CPU; 989 990 TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 991 992 ASSERT(curthread->t_state == TS_ZOMB); 993 994 next = disp(); /* returns with spl high */ 995 ASSERT(CPU_ON_INTR(CPU) == 0); /* not called with PIL > 10 */ 996 CPU_STATS_ADDQ(CPU, sys, pswitch, 1); 997 ASSERT(next != curthread); 998 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 999 1000 pg_ev_thread_swtch(cpu, gethrtime_unscaled(), curthread, next); 1001 1002 restore_mstate(next); 1003 1004 if (dtrace_vtime_active) 1005 dtrace_vtime_switch(next); 1006 1007 resume_from_zombie(next); 1008 /* 1009 * The TR_RESUME_END and TR_SWTCH_END trace points 1010 * appear at the end of resume(), because we certainly will not 1011 * return here 1012 */ 1013 } 1014 1015 #if defined(DEBUG) && (defined(DISP_DEBUG) || defined(lint)) 1016 1017 /* 1018 * search_disp_queues() 1019 * Search the given dispatch queues for thread tp. 1020 * Return 1 if tp is found, otherwise return 0. 1021 */ 1022 static int 1023 search_disp_queues(disp_t *dp, kthread_t *tp) 1024 { 1025 dispq_t *dq; 1026 dispq_t *eq; 1027 1028 disp_lock_enter_high(&dp->disp_lock); 1029 1030 for (dq = dp->disp_q, eq = dp->disp_q_limit; dq < eq; ++dq) { 1031 kthread_t *rp; 1032 1033 ASSERT(dq->dq_last == NULL || dq->dq_last->t_link == NULL); 1034 1035 for (rp = dq->dq_first; rp; rp = rp->t_link) 1036 if (tp == rp) { 1037 disp_lock_exit_high(&dp->disp_lock); 1038 return (1); 1039 } 1040 } 1041 disp_lock_exit_high(&dp->disp_lock); 1042 1043 return (0); 1044 } 1045 1046 /* 1047 * thread_on_queue() 1048 * Search all per-CPU dispatch queues and all partition-wide kpreempt 1049 * queues for thread tp. Return 1 if tp is found, otherwise return 0. 1050 */ 1051 static int 1052 thread_on_queue(kthread_t *tp) 1053 { 1054 cpu_t *cp; 1055 struct cpupart *part; 1056 1057 ASSERT(getpil() >= DISP_LEVEL); 1058 1059 /* 1060 * Search the per-CPU dispatch queues for tp. 1061 */ 1062 cp = CPU; 1063 do { 1064 if (search_disp_queues(cp->cpu_disp, tp)) 1065 return (1); 1066 } while ((cp = cp->cpu_next_onln) != CPU); 1067 1068 /* 1069 * Search the partition-wide kpreempt queues for tp. 1070 */ 1071 part = CPU->cpu_part; 1072 do { 1073 if (search_disp_queues(&part->cp_kp_queue, tp)) 1074 return (1); 1075 } while ((part = part->cp_next) != CPU->cpu_part); 1076 1077 return (0); 1078 } 1079 1080 #else 1081 1082 #define thread_on_queue(tp) 0 /* ASSERT must be !thread_on_queue */ 1083 1084 #endif /* DEBUG */ 1085 1086 /* 1087 * like swtch(), but switch to a specified thread taken from another CPU. 1088 * called with spl high.. 1089 */ 1090 void 1091 swtch_to(kthread_t *next) 1092 { 1093 cpu_t *cp = CPU; 1094 hrtime_t now; 1095 1096 TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 1097 1098 /* 1099 * Update context switch statistics. 1100 */ 1101 CPU_STATS_ADDQ(cp, sys, pswitch, 1); 1102 1103 TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 1104 1105 now = gethrtime_unscaled(); 1106 pg_ev_thread_swtch(cp, now, curthread, next); 1107 1108 /* OK to steal anything left on run queue */ 1109 cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL; 1110 1111 /* record last execution time */ 1112 cp->cpu_last_swtch = curthread->t_disp_time = ddi_get_lbolt(); 1113 1114 /* 1115 * If t was previously in the TS_ONPROC state, setfrontdq and setbackdq 1116 * won't have set its t_waitrq. Since we now finally know that we're 1117 * switching away from this thread, set its t_waitrq if it is on a run 1118 * queue. 1119 */ 1120 if ((curthread->t_state == TS_RUN) && (curthread->t_waitrq == 0)) { 1121 curthread->t_waitrq = now; 1122 } 1123 1124 /* restore next thread to previously running microstate */ 1125 restore_mstate(next); 1126 1127 if (dtrace_vtime_active) 1128 dtrace_vtime_switch(next); 1129 1130 resume(next); 1131 /* 1132 * The TR_RESUME_END and TR_SWTCH_END trace points 1133 * appear at the end of resume(), because we may not 1134 * return here 1135 */ 1136 } 1137 1138 static void 1139 cpu_resched(cpu_t *cp, pri_t tpri) 1140 { 1141 int call_poke_cpu = 0; 1142 pri_t cpupri = cp->cpu_dispatch_pri; 1143 1144 if (cpupri != CPU_IDLE_PRI && cpupri < tpri) { 1145 TRACE_2(TR_FAC_DISP, TR_CPU_RESCHED, 1146 "CPU_RESCHED:Tpri %d Cpupri %d", tpri, cpupri); 1147 if (tpri >= upreemptpri && cp->cpu_runrun == 0) { 1148 cp->cpu_runrun = 1; 1149 aston(cp->cpu_dispthread); 1150 if (tpri < kpreemptpri && cp != CPU) 1151 call_poke_cpu = 1; 1152 } 1153 if (tpri >= kpreemptpri && cp->cpu_kprunrun == 0) { 1154 cp->cpu_kprunrun = 1; 1155 if (cp != CPU) 1156 call_poke_cpu = 1; 1157 } 1158 } 1159 1160 /* 1161 * Propagate cpu_runrun, and cpu_kprunrun to global visibility. 1162 */ 1163 membar_enter(); 1164 1165 if (call_poke_cpu) 1166 poke_cpu(cp->cpu_id); 1167 } 1168 1169 /* 1170 * setbackdq() keeps runqs balanced such that the difference in length 1171 * between the chosen runq and the next one is no more than RUNQ_MAX_DIFF. 1172 * For threads with priorities below RUNQ_MATCH_PRI levels, the runq's lengths 1173 * must match. When per-thread TS_RUNQMATCH flag is set, setbackdq() will 1174 * try to keep runqs perfectly balanced regardless of the thread priority. 1175 */ 1176 #define RUNQ_MATCH_PRI 16 /* pri below which queue lengths must match */ 1177 #define RUNQ_MAX_DIFF 2 /* maximum runq length difference */ 1178 #define RUNQ_LEN(cp, pri) ((cp)->cpu_disp->disp_q[pri].dq_sruncnt) 1179 1180 /* 1181 * Macro that evaluates to true if it is likely that the thread has cache 1182 * warmth. This is based on the amount of time that has elapsed since the 1183 * thread last ran. If that amount of time is less than "rechoose_interval" 1184 * ticks, then we decide that the thread has enough cache warmth to warrant 1185 * some affinity for t->t_cpu. 1186 */ 1187 #define THREAD_HAS_CACHE_WARMTH(thread) \ 1188 ((thread == curthread) || \ 1189 ((ddi_get_lbolt() - thread->t_disp_time) <= rechoose_interval)) 1190 /* 1191 * Put the specified thread on the back of the dispatcher 1192 * queue corresponding to its current priority. 1193 * 1194 * Called with the thread in transition, onproc or stopped state 1195 * and locked (transition implies locked) and at high spl. 1196 * Returns with the thread in TS_RUN state and still locked. 1197 */ 1198 void 1199 setbackdq(kthread_t *tp) 1200 { 1201 dispq_t *dq; 1202 disp_t *dp; 1203 cpu_t *cp; 1204 pri_t tpri; 1205 int bound; 1206 boolean_t self; 1207 1208 ASSERT(THREAD_LOCK_HELD(tp)); 1209 ASSERT((tp->t_schedflag & TS_ALLSTART) == 0); 1210 ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */ 1211 1212 /* 1213 * If thread is "swapped" or on the swap queue don't 1214 * queue it, but wake sched. 1215 */ 1216 if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) { 1217 disp_swapped_setrun(tp); 1218 return; 1219 } 1220 1221 self = (tp == curthread); 1222 1223 if (tp->t_bound_cpu || tp->t_weakbound_cpu) 1224 bound = 1; 1225 else 1226 bound = 0; 1227 1228 tpri = DISP_PRIO(tp); 1229 if (ncpus == 1) 1230 cp = tp->t_cpu; 1231 else if (!bound) { 1232 if (tpri >= kpqpri) { 1233 setkpdq(tp, SETKP_BACK); 1234 return; 1235 } 1236 1237 /* 1238 * We'll generally let this thread continue to run where 1239 * it last ran...but will consider migration if: 1240 * - The thread probably doesn't have much cache warmth. 1241 * - SMT exclusion would prefer us to run elsewhere 1242 * - The CPU where it last ran is the target of an offline 1243 * request. 1244 * - The thread last ran outside its home lgroup. 1245 */ 1246 if ((!THREAD_HAS_CACHE_WARMTH(tp)) || 1247 !smt_should_run(tp, tp->t_cpu) || 1248 (tp->t_cpu == cpu_inmotion) || 1249 !LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) { 1250 cp = disp_lowpri_cpu(tp->t_cpu, tp, tpri); 1251 } else { 1252 cp = tp->t_cpu; 1253 } 1254 1255 if (tp->t_cpupart == cp->cpu_part) { 1256 int qlen; 1257 1258 /* 1259 * Perform any CMT load balancing 1260 */ 1261 cp = cmt_balance(tp, cp); 1262 1263 /* 1264 * Balance across the run queues 1265 */ 1266 qlen = RUNQ_LEN(cp, tpri); 1267 if (tpri >= RUNQ_MATCH_PRI && 1268 !(tp->t_schedflag & TS_RUNQMATCH)) 1269 qlen -= RUNQ_MAX_DIFF; 1270 if (qlen > 0) { 1271 cpu_t *newcp; 1272 1273 if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID) { 1274 newcp = cp->cpu_next_part; 1275 } else if ((newcp = cp->cpu_next_lpl) == cp) { 1276 newcp = cp->cpu_next_part; 1277 } 1278 1279 if (smt_should_run(tp, newcp) && 1280 RUNQ_LEN(newcp, tpri) < qlen) { 1281 DTRACE_PROBE3(runq__balance, 1282 kthread_t *, tp, 1283 cpu_t *, cp, cpu_t *, newcp); 1284 cp = newcp; 1285 } 1286 } 1287 } else { 1288 /* 1289 * Migrate to a cpu in the new partition. 1290 */ 1291 cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, tp, 1292 tp->t_pri); 1293 } 1294 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 1295 } else { 1296 /* 1297 * It is possible that t_weakbound_cpu != t_bound_cpu (for 1298 * a short time until weak binding that existed when the 1299 * strong binding was established has dropped) so we must 1300 * favour weak binding over strong. 1301 */ 1302 cp = tp->t_weakbound_cpu ? 1303 tp->t_weakbound_cpu : tp->t_bound_cpu; 1304 } 1305 /* 1306 * A thread that is ONPROC may be temporarily placed on the run queue 1307 * but then chosen to run again by disp. If the thread we're placing on 1308 * the queue is in TS_ONPROC state, don't set its t_waitrq until a 1309 * replacement process is actually scheduled in swtch(). In this 1310 * situation, curthread is the only thread that could be in the ONPROC 1311 * state. 1312 */ 1313 if ((!self) && (tp->t_waitrq == 0)) { 1314 hrtime_t curtime; 1315 1316 curtime = gethrtime_unscaled(); 1317 (void) cpu_update_pct(tp, curtime); 1318 tp->t_waitrq = curtime; 1319 } else { 1320 (void) cpu_update_pct(tp, gethrtime_unscaled()); 1321 } 1322 1323 dp = cp->cpu_disp; 1324 disp_lock_enter_high(&dp->disp_lock); 1325 1326 DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 0); 1327 TRACE_3(TR_FAC_DISP, TR_BACKQ, "setbackdq:pri %d cpu %p tid %p", 1328 tpri, cp, tp); 1329 1330 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 1331 1332 THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */ 1333 tp->t_disp_queue = dp; 1334 tp->t_link = NULL; 1335 1336 dq = &dp->disp_q[tpri]; 1337 dp->disp_nrunnable++; 1338 if (!bound) 1339 dp->disp_steal = 0; 1340 membar_enter(); 1341 1342 if (dq->dq_sruncnt++ != 0) { 1343 ASSERT(dq->dq_first != NULL); 1344 dq->dq_last->t_link = tp; 1345 dq->dq_last = tp; 1346 } else { 1347 ASSERT(dq->dq_first == NULL); 1348 ASSERT(dq->dq_last == NULL); 1349 dq->dq_first = dq->dq_last = tp; 1350 BT_SET(dp->disp_qactmap, tpri); 1351 if (tpri > dp->disp_maxrunpri) { 1352 dp->disp_maxrunpri = tpri; 1353 membar_enter(); 1354 cpu_resched(cp, tpri); 1355 } 1356 } 1357 1358 if (!bound && tpri > dp->disp_max_unbound_pri) { 1359 if (self && dp->disp_max_unbound_pri == -1 && cp == CPU) { 1360 /* 1361 * If there are no other unbound threads on the 1362 * run queue, don't allow other CPUs to steal 1363 * this thread while we are in the middle of a 1364 * context switch. We may just switch to it 1365 * again right away. CPU_DISP_DONTSTEAL is cleared 1366 * in swtch and swtch_to. 1367 */ 1368 cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL; 1369 } 1370 dp->disp_max_unbound_pri = tpri; 1371 } 1372 (*disp_enq_thread)(cp, bound); 1373 } 1374 1375 /* 1376 * Put the specified thread on the front of the dispatcher 1377 * queue corresponding to its current priority. 1378 * 1379 * Called with the thread in transition, onproc or stopped state 1380 * and locked (transition implies locked) and at high spl. 1381 * Returns with the thread in TS_RUN state and still locked. 1382 */ 1383 void 1384 setfrontdq(kthread_t *tp) 1385 { 1386 disp_t *dp; 1387 dispq_t *dq; 1388 cpu_t *cp; 1389 pri_t tpri; 1390 int bound; 1391 1392 ASSERT(THREAD_LOCK_HELD(tp)); 1393 ASSERT((tp->t_schedflag & TS_ALLSTART) == 0); 1394 ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */ 1395 1396 /* 1397 * If thread is "swapped" or on the swap queue don't 1398 * queue it, but wake sched. 1399 */ 1400 if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) { 1401 disp_swapped_setrun(tp); 1402 return; 1403 } 1404 1405 if (tp->t_bound_cpu || tp->t_weakbound_cpu) 1406 bound = 1; 1407 else 1408 bound = 0; 1409 1410 tpri = DISP_PRIO(tp); 1411 if (ncpus == 1) 1412 cp = tp->t_cpu; 1413 else if (!bound) { 1414 if (tpri >= kpqpri) { 1415 setkpdq(tp, SETKP_FRONT); 1416 return; 1417 } 1418 cp = tp->t_cpu; 1419 if (tp->t_cpupart == cp->cpu_part) { 1420 /* 1421 * We'll generally let this thread continue to run 1422 * where it last ran, but will consider migration if: 1423 * - The thread last ran outside its home lgroup. 1424 * - The CPU where it last ran is the target of an 1425 * offline request (a thread_nomigrate() on the in 1426 * motion CPU relies on this when forcing a preempt). 1427 * - The thread isn't the highest priority thread where 1428 * it last ran, and it is considered not likely to 1429 * have significant cache warmth. 1430 */ 1431 if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp) || 1432 cp == cpu_inmotion || 1433 (tpri < cp->cpu_disp->disp_maxrunpri && 1434 !THREAD_HAS_CACHE_WARMTH(tp))) { 1435 cp = disp_lowpri_cpu(tp->t_cpu, tp, tpri); 1436 } 1437 } else { 1438 /* 1439 * Migrate to a cpu in the new partition. 1440 */ 1441 cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, 1442 tp, tp->t_pri); 1443 } 1444 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 1445 } else { 1446 /* 1447 * It is possible that t_weakbound_cpu != t_bound_cpu (for 1448 * a short time until weak binding that existed when the 1449 * strong binding was established has dropped) so we must 1450 * favour weak binding over strong. 1451 */ 1452 cp = tp->t_weakbound_cpu ? 1453 tp->t_weakbound_cpu : tp->t_bound_cpu; 1454 } 1455 1456 /* 1457 * A thread that is ONPROC may be temporarily placed on the run queue 1458 * but then chosen to run again by disp. If the thread we're placing on 1459 * the queue is in TS_ONPROC state, don't set its t_waitrq until a 1460 * replacement process is actually scheduled in swtch(). In this 1461 * situation, curthread is the only thread that could be in the ONPROC 1462 * state. 1463 */ 1464 if ((tp != curthread) && (tp->t_waitrq == 0)) { 1465 hrtime_t curtime; 1466 1467 curtime = gethrtime_unscaled(); 1468 (void) cpu_update_pct(tp, curtime); 1469 tp->t_waitrq = curtime; 1470 } else { 1471 (void) cpu_update_pct(tp, gethrtime_unscaled()); 1472 } 1473 1474 dp = cp->cpu_disp; 1475 disp_lock_enter_high(&dp->disp_lock); 1476 1477 TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp); 1478 DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 1); 1479 1480 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 1481 1482 THREAD_RUN(tp, &dp->disp_lock); /* set TS_RUN state and lock */ 1483 tp->t_disp_queue = dp; 1484 1485 dq = &dp->disp_q[tpri]; 1486 dp->disp_nrunnable++; 1487 if (!bound) 1488 dp->disp_steal = 0; 1489 membar_enter(); 1490 1491 if (dq->dq_sruncnt++ != 0) { 1492 ASSERT(dq->dq_last != NULL); 1493 tp->t_link = dq->dq_first; 1494 dq->dq_first = tp; 1495 } else { 1496 ASSERT(dq->dq_last == NULL); 1497 ASSERT(dq->dq_first == NULL); 1498 tp->t_link = NULL; 1499 dq->dq_first = dq->dq_last = tp; 1500 BT_SET(dp->disp_qactmap, tpri); 1501 if (tpri > dp->disp_maxrunpri) { 1502 dp->disp_maxrunpri = tpri; 1503 membar_enter(); 1504 cpu_resched(cp, tpri); 1505 } 1506 } 1507 1508 if (!bound && tpri > dp->disp_max_unbound_pri) { 1509 if (tp == curthread && dp->disp_max_unbound_pri == -1 && 1510 cp == CPU) { 1511 /* 1512 * If there are no other unbound threads on the 1513 * run queue, don't allow other CPUs to steal 1514 * this thread while we are in the middle of a 1515 * context switch. We may just switch to it 1516 * again right away. CPU_DISP_DONTSTEAL is cleared 1517 * in swtch and swtch_to. 1518 */ 1519 cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL; 1520 } 1521 dp->disp_max_unbound_pri = tpri; 1522 } 1523 (*disp_enq_thread)(cp, bound); 1524 } 1525 1526 /* 1527 * Put a high-priority unbound thread on the kp queue 1528 */ 1529 static void 1530 setkpdq(kthread_t *tp, int borf) 1531 { 1532 dispq_t *dq; 1533 disp_t *dp; 1534 cpu_t *cp; 1535 pri_t tpri; 1536 1537 tpri = DISP_PRIO(tp); 1538 1539 dp = &tp->t_cpupart->cp_kp_queue; 1540 disp_lock_enter_high(&dp->disp_lock); 1541 1542 TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp); 1543 1544 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 1545 DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, borf); 1546 THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */ 1547 tp->t_disp_queue = dp; 1548 dp->disp_nrunnable++; 1549 dq = &dp->disp_q[tpri]; 1550 1551 if (dq->dq_sruncnt++ != 0) { 1552 if (borf == SETKP_BACK) { 1553 ASSERT(dq->dq_first != NULL); 1554 tp->t_link = NULL; 1555 dq->dq_last->t_link = tp; 1556 dq->dq_last = tp; 1557 } else { 1558 ASSERT(dq->dq_last != NULL); 1559 tp->t_link = dq->dq_first; 1560 dq->dq_first = tp; 1561 } 1562 } else { 1563 if (borf == SETKP_BACK) { 1564 ASSERT(dq->dq_first == NULL); 1565 ASSERT(dq->dq_last == NULL); 1566 dq->dq_first = dq->dq_last = tp; 1567 } else { 1568 ASSERT(dq->dq_last == NULL); 1569 ASSERT(dq->dq_first == NULL); 1570 tp->t_link = NULL; 1571 dq->dq_first = dq->dq_last = tp; 1572 } 1573 BT_SET(dp->disp_qactmap, tpri); 1574 if (tpri > dp->disp_max_unbound_pri) 1575 dp->disp_max_unbound_pri = tpri; 1576 if (tpri > dp->disp_maxrunpri) { 1577 dp->disp_maxrunpri = tpri; 1578 membar_enter(); 1579 } 1580 } 1581 1582 cp = tp->t_cpu; 1583 if (tp->t_cpupart != cp->cpu_part) { 1584 /* migrate to a cpu in the new partition */ 1585 cp = tp->t_cpupart->cp_cpulist; 1586 } 1587 cp = disp_lowpri_cpu(cp, tp, tp->t_pri); 1588 disp_lock_enter_high(&cp->cpu_disp->disp_lock); 1589 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 1590 1591 if (cp->cpu_chosen_level < tpri) 1592 cp->cpu_chosen_level = tpri; 1593 cpu_resched(cp, tpri); 1594 disp_lock_exit_high(&cp->cpu_disp->disp_lock); 1595 (*disp_enq_thread)(cp, 0); 1596 } 1597 1598 /* 1599 * Remove a thread from the dispatcher queue if it is on it. 1600 * It is not an error if it is not found but we return whether 1601 * or not it was found in case the caller wants to check. 1602 */ 1603 int 1604 dispdeq(kthread_t *tp) 1605 { 1606 disp_t *dp; 1607 dispq_t *dq; 1608 kthread_t *rp; 1609 kthread_t *trp; 1610 kthread_t **ptp; 1611 int tpri; 1612 1613 ASSERT(THREAD_LOCK_HELD(tp)); 1614 1615 if (tp->t_state != TS_RUN) 1616 return (0); 1617 1618 /* 1619 * The thread is "swapped" or is on the swap queue and 1620 * hence no longer on the run queue, so return true. 1621 */ 1622 if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) 1623 return (1); 1624 1625 tpri = DISP_PRIO(tp); 1626 dp = tp->t_disp_queue; 1627 ASSERT(tpri < dp->disp_npri); 1628 dq = &dp->disp_q[tpri]; 1629 ptp = &dq->dq_first; 1630 rp = *ptp; 1631 trp = NULL; 1632 1633 ASSERT(dq->dq_last == NULL || dq->dq_last->t_link == NULL); 1634 1635 /* 1636 * Search for thread in queue. 1637 * Double links would simplify this at the expense of disp/setrun. 1638 */ 1639 while (rp != tp && rp != NULL) { 1640 trp = rp; 1641 ptp = &trp->t_link; 1642 rp = trp->t_link; 1643 } 1644 1645 if (rp == NULL) { 1646 panic("dispdeq: thread not on queue"); 1647 } 1648 1649 DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp); 1650 1651 /* 1652 * Found it so remove it from queue. 1653 */ 1654 if ((*ptp = rp->t_link) == NULL) 1655 dq->dq_last = trp; 1656 1657 dp->disp_nrunnable--; 1658 if (--dq->dq_sruncnt == 0) { 1659 dp->disp_qactmap[tpri >> BT_ULSHIFT] &= ~BT_BIW(tpri); 1660 if (dp->disp_nrunnable == 0) { 1661 dp->disp_max_unbound_pri = -1; 1662 dp->disp_maxrunpri = -1; 1663 } else if (tpri == dp->disp_maxrunpri) { 1664 int ipri; 1665 1666 ipri = bt_gethighbit(dp->disp_qactmap, 1667 dp->disp_maxrunpri >> BT_ULSHIFT); 1668 if (ipri < dp->disp_max_unbound_pri) 1669 dp->disp_max_unbound_pri = ipri; 1670 dp->disp_maxrunpri = ipri; 1671 } 1672 } 1673 tp->t_link = NULL; 1674 THREAD_TRANSITION(tp); /* put in intermediate state */ 1675 return (1); 1676 } 1677 1678 1679 /* 1680 * dq_sruninc and dq_srundec are public functions for 1681 * incrementing/decrementing the sruncnts when a thread on 1682 * a dispatcher queue is made schedulable/unschedulable by 1683 * resetting the TS_LOAD flag. 1684 * 1685 * The caller MUST have the thread lock and therefore the dispatcher 1686 * queue lock so that the operation which changes 1687 * the flag, the operation that checks the status of the thread to 1688 * determine if it's on a disp queue AND the call to this function 1689 * are one atomic operation with respect to interrupts. 1690 */ 1691 1692 /* 1693 * Called by sched AFTER TS_LOAD flag is set on a swapped, runnable thread. 1694 */ 1695 void 1696 dq_sruninc(kthread_t *t) 1697 { 1698 ASSERT(t->t_state == TS_RUN); 1699 ASSERT(t->t_schedflag & TS_LOAD); 1700 1701 THREAD_TRANSITION(t); 1702 setfrontdq(t); 1703 } 1704 1705 /* 1706 * See comment on calling conventions above. 1707 * Called by sched BEFORE TS_LOAD flag is cleared on a runnable thread. 1708 */ 1709 void 1710 dq_srundec(kthread_t *t) 1711 { 1712 ASSERT(t->t_schedflag & TS_LOAD); 1713 1714 (void) dispdeq(t); 1715 disp_swapped_enq(t); 1716 } 1717 1718 /* 1719 * Change the dispatcher lock of thread to the "swapped_lock" 1720 * and return with thread lock still held. 1721 * 1722 * Called with thread_lock held, in transition state, and at high spl. 1723 */ 1724 void 1725 disp_swapped_enq(kthread_t *tp) 1726 { 1727 ASSERT(THREAD_LOCK_HELD(tp)); 1728 ASSERT(tp->t_schedflag & TS_LOAD); 1729 1730 switch (tp->t_state) { 1731 case TS_RUN: 1732 disp_lock_enter_high(&swapped_lock); 1733 THREAD_SWAP(tp, &swapped_lock); /* set TS_RUN state and lock */ 1734 break; 1735 case TS_ONPROC: 1736 disp_lock_enter_high(&swapped_lock); 1737 THREAD_TRANSITION(tp); 1738 wake_sched_sec = 1; /* tell clock to wake sched */ 1739 THREAD_SWAP(tp, &swapped_lock); /* set TS_RUN state and lock */ 1740 break; 1741 default: 1742 panic("disp_swapped: tp: %p bad t_state", (void *)tp); 1743 } 1744 } 1745 1746 /* 1747 * This routine is called by setbackdq/setfrontdq if the thread is 1748 * not loaded or loaded and on the swap queue. 1749 * 1750 * Thread state TS_SLEEP implies that a swapped thread 1751 * has been woken up and needs to be swapped in by the swapper. 1752 * 1753 * Thread state TS_RUN, it implies that the priority of a swapped 1754 * thread is being increased by scheduling class (e.g. ts_update). 1755 */ 1756 static void 1757 disp_swapped_setrun(kthread_t *tp) 1758 { 1759 ASSERT(THREAD_LOCK_HELD(tp)); 1760 ASSERT((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD); 1761 1762 switch (tp->t_state) { 1763 case TS_SLEEP: 1764 disp_lock_enter_high(&swapped_lock); 1765 /* 1766 * Wakeup sched immediately (i.e., next tick) if the 1767 * thread priority is above maxclsyspri. 1768 */ 1769 if (DISP_PRIO(tp) > maxclsyspri) 1770 wake_sched = 1; 1771 else 1772 wake_sched_sec = 1; 1773 THREAD_RUN(tp, &swapped_lock); /* set TS_RUN state and lock */ 1774 break; 1775 case TS_RUN: /* called from ts_update */ 1776 break; 1777 default: 1778 panic("disp_swapped_setrun: tp: %p bad t_state", (void *)tp); 1779 } 1780 } 1781 1782 /* 1783 * Make a thread give up its processor. Find the processor on 1784 * which this thread is executing, and have that processor 1785 * preempt. 1786 * 1787 * We allow System Duty Cycle (SDC) threads to be preempted even if 1788 * they are running at kernel priorities. To implement this, we always 1789 * set cpu_kprunrun; this ensures preempt() will be called. Since SDC 1790 * calls cpu_surrender() very often, we only preempt if there is anyone 1791 * competing with us. 1792 */ 1793 void 1794 cpu_surrender(kthread_t *tp) 1795 { 1796 cpu_t *cpup; 1797 int max_pri; 1798 int max_run_pri; 1799 klwp_t *lwp; 1800 1801 ASSERT(THREAD_LOCK_HELD(tp)); 1802 1803 if (tp->t_state != TS_ONPROC) 1804 return; 1805 cpup = tp->t_disp_queue->disp_cpu; /* CPU thread dispatched to */ 1806 max_pri = cpup->cpu_disp->disp_maxrunpri; /* best pri of that CPU */ 1807 max_run_pri = CP_MAXRUNPRI(cpup->cpu_part); 1808 if (max_pri < max_run_pri) 1809 max_pri = max_run_pri; 1810 1811 if (tp->t_cid == sysdccid) { 1812 uint_t t_pri = DISP_PRIO(tp); 1813 if (t_pri > max_pri) 1814 return; /* we are not competing w/ anyone */ 1815 cpup->cpu_runrun = cpup->cpu_kprunrun = 1; 1816 } else { 1817 cpup->cpu_runrun = 1; 1818 if (max_pri >= kpreemptpri && cpup->cpu_kprunrun == 0) { 1819 cpup->cpu_kprunrun = 1; 1820 } 1821 } 1822 1823 /* 1824 * Propagate cpu_runrun, and cpu_kprunrun to global visibility. 1825 */ 1826 membar_enter(); 1827 1828 DTRACE_SCHED1(surrender, kthread_t *, tp); 1829 1830 /* 1831 * Make the target thread take an excursion through trap() 1832 * to do preempt() (unless we're already in trap or post_syscall, 1833 * calling cpu_surrender via CL_TRAPRET). 1834 */ 1835 if (tp != curthread || (lwp = tp->t_lwp) == NULL || 1836 lwp->lwp_state != LWP_USER) { 1837 aston(tp); 1838 if (cpup != CPU) 1839 poke_cpu(cpup->cpu_id); 1840 } 1841 TRACE_2(TR_FAC_DISP, TR_CPU_SURRENDER, 1842 "cpu_surrender:tid %p cpu %p", tp, cpup); 1843 } 1844 1845 /* 1846 * Commit to and ratify a scheduling decision 1847 */ 1848 /*ARGSUSED*/ 1849 static kthread_t * 1850 disp_ratify(kthread_t *tp, disp_t *kpq) 1851 { 1852 pri_t tpri, maxpri; 1853 pri_t maxkpri; 1854 cpu_t *cpup; 1855 1856 ASSERT(tp != NULL); 1857 /* 1858 * Commit to, then ratify scheduling decision 1859 */ 1860 cpup = CPU; 1861 if (cpup->cpu_runrun != 0) 1862 cpup->cpu_runrun = 0; 1863 if (cpup->cpu_kprunrun != 0) 1864 cpup->cpu_kprunrun = 0; 1865 if (cpup->cpu_chosen_level != -1) 1866 cpup->cpu_chosen_level = -1; 1867 membar_enter(); 1868 tpri = DISP_PRIO(tp); 1869 maxpri = cpup->cpu_disp->disp_maxrunpri; 1870 maxkpri = kpq->disp_maxrunpri; 1871 if (maxpri < maxkpri) 1872 maxpri = maxkpri; 1873 if (tpri < maxpri) { 1874 /* 1875 * should have done better 1876 * put this one back and indicate to try again 1877 */ 1878 cpup->cpu_dispthread = curthread; /* fixup dispthread */ 1879 cpup->cpu_dispatch_pri = DISP_PRIO(curthread); 1880 thread_lock_high(tp); 1881 THREAD_TRANSITION(tp); 1882 setfrontdq(tp); 1883 thread_unlock_nopreempt(tp); 1884 1885 tp = NULL; 1886 } 1887 return (tp); 1888 } 1889 1890 /* 1891 * See if there is any work on the dispatcher queue for other CPUs. 1892 * If there is, dequeue the best thread and return. 1893 */ 1894 static kthread_t * 1895 disp_getwork(cpu_t *cp) 1896 { 1897 cpu_t *ocp; /* other CPU */ 1898 cpu_t *ocp_start; 1899 cpu_t *tcp; /* target local CPU */ 1900 kthread_t *tp; 1901 kthread_t *retval = NULL; 1902 pri_t maxpri; 1903 disp_t *kpq; /* kp queue for this partition */ 1904 lpl_t *lpl, *lpl_leaf; 1905 int leafidx, startidx; 1906 hrtime_t stealtime; 1907 lgrp_id_t local_id; 1908 1909 maxpri = -1; 1910 tcp = NULL; 1911 1912 kpq = &cp->cpu_part->cp_kp_queue; 1913 while (kpq->disp_maxrunpri >= 0) { 1914 /* 1915 * Try to take a thread from the kp_queue. 1916 */ 1917 tp = (disp_getbest(kpq)); 1918 if (tp) 1919 return (disp_ratify(tp, kpq)); 1920 } 1921 1922 kpreempt_disable(); /* protect the cpu_active list */ 1923 1924 /* 1925 * Try to find something to do on another CPU's run queue. 1926 * Loop through all other CPUs looking for the one with the highest 1927 * priority unbound thread. 1928 * 1929 * On NUMA machines, the partition's CPUs are consulted in order of 1930 * distance from the current CPU. This way, the first available 1931 * work found is also the closest, and will suffer the least 1932 * from being migrated. 1933 */ 1934 lpl = lpl_leaf = cp->cpu_lpl; 1935 local_id = lpl_leaf->lpl_lgrpid; 1936 leafidx = startidx = 0; 1937 1938 /* 1939 * This loop traverses the lpl hierarchy. Higher level lpls represent 1940 * broader levels of locality 1941 */ 1942 do { 1943 /* This loop iterates over the lpl's leaves */ 1944 do { 1945 if (lpl_leaf != cp->cpu_lpl) 1946 ocp = lpl_leaf->lpl_cpus; 1947 else 1948 ocp = cp->cpu_next_lpl; 1949 1950 /* This loop iterates over the CPUs in the leaf */ 1951 ocp_start = ocp; 1952 do { 1953 pri_t pri; 1954 1955 ASSERT(CPU_ACTIVE(ocp)); 1956 1957 /* 1958 * End our stroll around this lpl if: 1959 * 1960 * - Something became runnable on the local 1961 * queue...which also ends our stroll around 1962 * the partition. 1963 * 1964 * - We happen across another idle CPU. 1965 * Since it is patrolling the next portion 1966 * of the lpl's list (assuming it's not 1967 * halted, or busy servicing an interrupt), 1968 * move to the next higher level of locality. 1969 */ 1970 if (cp->cpu_disp->disp_nrunnable != 0) { 1971 kpreempt_enable(); 1972 return (NULL); 1973 } 1974 if (ocp->cpu_dispatch_pri == -1) { 1975 if (ocp->cpu_disp_flags & 1976 CPU_DISP_HALTED || 1977 ocp->cpu_intr_actv != 0) 1978 continue; 1979 else 1980 goto next_level; 1981 } 1982 1983 /* 1984 * If there's only one thread and the CPU 1985 * is in the middle of a context switch, 1986 * or it's currently running the idle thread, 1987 * don't steal it. 1988 */ 1989 if ((ocp->cpu_disp_flags & 1990 CPU_DISP_DONTSTEAL) && 1991 ocp->cpu_disp->disp_nrunnable == 1) 1992 continue; 1993 1994 pri = ocp->cpu_disp->disp_max_unbound_pri; 1995 if (pri > maxpri) { 1996 /* 1997 * Don't steal threads that we attempted 1998 * to steal recently until they're ready 1999 * to be stolen again. 2000 */ 2001 stealtime = ocp->cpu_disp->disp_steal; 2002 if (stealtime == 0 || 2003 stealtime - gethrtime() <= 0) { 2004 maxpri = pri; 2005 tcp = ocp; 2006 } else { 2007 /* 2008 * Don't update tcp, just set 2009 * the retval to T_DONTSTEAL, so 2010 * that if no acceptable CPUs 2011 * are found the return value 2012 * will be T_DONTSTEAL rather 2013 * then NULL. 2014 */ 2015 retval = T_DONTSTEAL; 2016 } 2017 } 2018 } while ((ocp = ocp->cpu_next_lpl) != ocp_start); 2019 2020 /* 2021 * Iterate to the next leaf lpl in the resource set 2022 * at this level of locality. If we hit the end of 2023 * the set, wrap back around to the beginning. 2024 * 2025 * Note: This iteration is NULL terminated for a reason 2026 * see lpl_topo_bootstrap() in lgrp.c for details. 2027 */ 2028 if ((lpl_leaf = lpl->lpl_rset[++leafidx]) == NULL) { 2029 leafidx = 0; 2030 lpl_leaf = lpl->lpl_rset[leafidx]; 2031 } 2032 } while (leafidx != startidx); 2033 2034 next_level: 2035 /* 2036 * Expand the search to include farther away CPUs (next 2037 * locality level). The closer CPUs that have already been 2038 * checked will be checked again. In doing so, idle CPUs 2039 * will tend to be more aggresive about stealing from CPUs 2040 * that are closer (since the closer CPUs will be considered 2041 * more often). 2042 * Begin at this level with the CPUs local leaf lpl. 2043 */ 2044 if ((lpl = lpl->lpl_parent) != NULL) { 2045 leafidx = startidx = lpl->lpl_id2rset[local_id]; 2046 lpl_leaf = lpl->lpl_rset[leafidx]; 2047 } 2048 } while (!tcp && lpl); 2049 2050 kpreempt_enable(); 2051 2052 /* 2053 * If another queue looks good, and there is still nothing on 2054 * the local queue, try to transfer one or more threads 2055 * from it to our queue. 2056 */ 2057 if (tcp && cp->cpu_disp->disp_nrunnable == 0) { 2058 tp = disp_getbest(tcp->cpu_disp); 2059 if (tp == NULL || tp == T_DONTSTEAL) 2060 return (tp); 2061 return (disp_ratify(tp, kpq)); 2062 } 2063 return (retval); 2064 } 2065 2066 2067 /* 2068 * disp_fix_unbound_pri() 2069 * Determines the maximum priority of unbound threads on the queue. 2070 * The priority is kept for the queue, but is only increased, never 2071 * reduced unless some CPU is looking for something on that queue. 2072 * 2073 * The priority argument is the known upper limit. 2074 * 2075 * Perhaps this should be kept accurately, but that probably means 2076 * separate bitmaps for bound and unbound threads. Since only idled 2077 * CPUs will have to do this recalculation, it seems better this way. 2078 */ 2079 static void 2080 disp_fix_unbound_pri(disp_t *dp, pri_t pri) 2081 { 2082 kthread_t *tp; 2083 dispq_t *dq; 2084 ulong_t *dqactmap = dp->disp_qactmap; 2085 ulong_t mapword; 2086 int wx; 2087 2088 ASSERT(DISP_LOCK_HELD(&dp->disp_lock)); 2089 2090 ASSERT(pri >= 0); /* checked by caller */ 2091 2092 /* 2093 * Start the search at the next lowest priority below the supplied 2094 * priority. This depends on the bitmap implementation. 2095 */ 2096 do { 2097 wx = pri >> BT_ULSHIFT; /* index of word in map */ 2098 2099 /* 2100 * Form mask for all lower priorities in the word. 2101 */ 2102 mapword = dqactmap[wx] & (BT_BIW(pri) - 1); 2103 2104 /* 2105 * Get next lower active priority. 2106 */ 2107 if (mapword != 0) { 2108 pri = (wx << BT_ULSHIFT) + highbit(mapword) - 1; 2109 } else if (wx > 0) { 2110 pri = bt_gethighbit(dqactmap, wx - 1); /* sign extend */ 2111 if (pri < 0) 2112 break; 2113 } else { 2114 pri = -1; 2115 break; 2116 } 2117 2118 /* 2119 * Search the queue for unbound, runnable threads. 2120 */ 2121 dq = &dp->disp_q[pri]; 2122 tp = dq->dq_first; 2123 2124 while (tp && (tp->t_bound_cpu || tp->t_weakbound_cpu)) { 2125 tp = tp->t_link; 2126 } 2127 2128 /* 2129 * If a thread was found, set the priority and return. 2130 */ 2131 } while (tp == NULL); 2132 2133 /* 2134 * pri holds the maximum unbound thread priority or -1. 2135 */ 2136 if (dp->disp_max_unbound_pri != pri) 2137 dp->disp_max_unbound_pri = pri; 2138 } 2139 2140 /* 2141 * disp_adjust_unbound_pri() - thread is becoming unbound, so we should 2142 * check if the CPU to which is was previously bound should have 2143 * its disp_max_unbound_pri increased. 2144 */ 2145 void 2146 disp_adjust_unbound_pri(kthread_t *tp) 2147 { 2148 disp_t *dp; 2149 pri_t tpri; 2150 2151 ASSERT(THREAD_LOCK_HELD(tp)); 2152 2153 /* 2154 * Don't do anything if the thread is not bound, or 2155 * currently not runnable or swapped out. 2156 */ 2157 if (tp->t_bound_cpu == NULL || 2158 tp->t_state != TS_RUN || 2159 tp->t_schedflag & TS_ON_SWAPQ) 2160 return; 2161 2162 tpri = DISP_PRIO(tp); 2163 dp = tp->t_bound_cpu->cpu_disp; 2164 ASSERT(tpri >= 0 && tpri < dp->disp_npri); 2165 if (tpri > dp->disp_max_unbound_pri) 2166 dp->disp_max_unbound_pri = tpri; 2167 } 2168 2169 /* 2170 * disp_getbest() 2171 * De-queue the highest priority unbound runnable thread. 2172 * Returns with the thread unlocked and onproc but at splhigh (like disp()). 2173 * Returns NULL if nothing found. 2174 * Returns T_DONTSTEAL if the thread was not stealable. 2175 * so that the caller will try again later. 2176 * 2177 * Passed a pointer to a dispatch queue not associated with this CPU, and 2178 * its type. 2179 */ 2180 static kthread_t * 2181 disp_getbest(disp_t *dp) 2182 { 2183 kthread_t *tp; 2184 dispq_t *dq; 2185 pri_t pri; 2186 cpu_t *cp, *tcp; 2187 boolean_t allbound; 2188 2189 disp_lock_enter(&dp->disp_lock); 2190 2191 /* 2192 * If there is nothing to run, or the CPU is in the middle of a 2193 * context switch of the only thread, return NULL. 2194 */ 2195 tcp = dp->disp_cpu; 2196 cp = CPU; 2197 pri = dp->disp_max_unbound_pri; 2198 if (pri == -1 || 2199 (tcp != NULL && (tcp->cpu_disp_flags & CPU_DISP_DONTSTEAL) && 2200 tcp->cpu_disp->disp_nrunnable == 1)) { 2201 disp_lock_exit_nopreempt(&dp->disp_lock); 2202 return (NULL); 2203 } 2204 2205 dq = &dp->disp_q[pri]; 2206 2207 2208 /* 2209 * Assume that all threads are bound on this queue, and change it 2210 * later when we find out that it is not the case. 2211 */ 2212 allbound = B_TRUE; 2213 for (tp = dq->dq_first; tp != NULL; tp = tp->t_link) { 2214 hrtime_t now, nosteal, rqtime; 2215 2216 /* 2217 * Skip over bound threads which could be here even 2218 * though disp_max_unbound_pri indicated this level. 2219 */ 2220 if (tp->t_bound_cpu || tp->t_weakbound_cpu) 2221 continue; 2222 2223 /* 2224 * We've got some unbound threads on this queue, so turn 2225 * the allbound flag off now. 2226 */ 2227 allbound = B_FALSE; 2228 2229 /* 2230 * The thread is a candidate for stealing from its run queue. We 2231 * don't want to steal threads that became runnable just a 2232 * moment ago. This improves CPU affinity for threads that get 2233 * preempted for short periods of time and go back on the run 2234 * queue. 2235 * 2236 * We want to let it stay on its run queue if it was only placed 2237 * there recently and it was running on the same CPU before that 2238 * to preserve its cache investment. For the thread to remain on 2239 * its run queue, ALL of the following conditions must be 2240 * satisfied: 2241 * 2242 * - the disp queue should not be the kernel preemption queue 2243 * - delayed idle stealing should not be disabled 2244 * - nosteal_nsec should be non-zero 2245 * - it should run with user priority 2246 * - it should be on the run queue of the CPU where it was 2247 * running before being placed on the run queue 2248 * - it should be the only thread on the run queue (to prevent 2249 * extra scheduling latency for other threads) 2250 * - it should sit on the run queue for less than per-chip 2251 * nosteal interval or global nosteal interval 2252 * - in case of CPUs with shared cache it should sit in a run 2253 * queue of a CPU from a different chip 2254 * 2255 * The checks are arranged so that the ones that are faster are 2256 * placed earlier. 2257 */ 2258 if (tcp == NULL || 2259 pri >= minclsyspri || 2260 tp->t_cpu != tcp) 2261 break; 2262 2263 /* 2264 * Steal immediately if, due to CMT processor architecture 2265 * migraiton between cp and tcp would incur no performance 2266 * penalty. 2267 */ 2268 if (pg_cmt_can_migrate(cp, tcp)) 2269 break; 2270 2271 nosteal = nosteal_nsec; 2272 if (nosteal == 0) 2273 break; 2274 2275 /* 2276 * Calculate time spent sitting on run queue 2277 */ 2278 now = gethrtime_unscaled(); 2279 rqtime = now - tp->t_waitrq; 2280 scalehrtime(&rqtime); 2281 2282 /* 2283 * Steal immediately if the time spent on this run queue is more 2284 * than allowed nosteal delay. 2285 * 2286 * Negative rqtime check is needed here to avoid infinite 2287 * stealing delays caused by unlikely but not impossible 2288 * drifts between CPU times on different CPUs. 2289 */ 2290 if (rqtime > nosteal || rqtime < 0) 2291 break; 2292 2293 DTRACE_PROBE4(nosteal, kthread_t *, tp, 2294 cpu_t *, tcp, cpu_t *, cp, hrtime_t, rqtime); 2295 scalehrtime(&now); 2296 /* 2297 * Calculate when this thread becomes stealable 2298 */ 2299 now += (nosteal - rqtime); 2300 2301 /* 2302 * Calculate time when some thread becomes stealable 2303 */ 2304 if (now < dp->disp_steal) 2305 dp->disp_steal = now; 2306 } 2307 2308 /* 2309 * If there were no unbound threads on this queue, find the queue 2310 * where they are and then return later. The value of 2311 * disp_max_unbound_pri is not always accurate because it isn't 2312 * reduced until another idle CPU looks for work. 2313 */ 2314 if (allbound) 2315 disp_fix_unbound_pri(dp, pri); 2316 2317 /* 2318 * If we reached the end of the queue and found no unbound threads 2319 * then return NULL so that other CPUs will be considered. If there 2320 * are unbound threads but they cannot yet be stolen, then 2321 * return T_DONTSTEAL and try again later. 2322 */ 2323 if (tp == NULL) { 2324 disp_lock_exit_nopreempt(&dp->disp_lock); 2325 return (allbound ? NULL : T_DONTSTEAL); 2326 } 2327 2328 /* 2329 * Found a runnable, unbound thread, so remove it from queue. 2330 * dispdeq() requires that we have the thread locked, and we do, 2331 * by virtue of holding the dispatch queue lock. dispdeq() will 2332 * put the thread in transition state, thereby dropping the dispq 2333 * lock. 2334 */ 2335 2336 #ifdef DEBUG 2337 { 2338 int thread_was_on_queue; 2339 2340 thread_was_on_queue = dispdeq(tp); /* drops disp_lock */ 2341 ASSERT(thread_was_on_queue); 2342 } 2343 2344 #else /* DEBUG */ 2345 (void) dispdeq(tp); /* drops disp_lock */ 2346 #endif /* DEBUG */ 2347 2348 /* 2349 * Reset the disp_queue steal time - we do not know what is the smallest 2350 * value across the queue is. 2351 */ 2352 dp->disp_steal = 0; 2353 2354 tp->t_schedflag |= TS_DONT_SWAP; 2355 2356 /* 2357 * Setup thread to run on the current CPU. 2358 */ 2359 tp->t_disp_queue = cp->cpu_disp; 2360 2361 cp->cpu_dispthread = tp; /* protected by spl only */ 2362 cp->cpu_dispatch_pri = pri; 2363 2364 /* 2365 * There can be a memory synchronization race between disp_getbest() 2366 * and disp_ratify() vs cpu_resched() where cpu_resched() is trying 2367 * to preempt the current thread to run the enqueued thread while 2368 * disp_getbest() and disp_ratify() are changing the current thread 2369 * to the stolen thread. This may lead to a situation where 2370 * cpu_resched() tries to preempt the wrong thread and the 2371 * stolen thread continues to run on the CPU which has been tagged 2372 * for preemption. 2373 * Later the clock thread gets enqueued but doesn't get to run on the 2374 * CPU causing the system to hang. 2375 * 2376 * To avoid this, grabbing and dropping the disp_lock (which does 2377 * a memory barrier) is needed to synchronize the execution of 2378 * cpu_resched() with disp_getbest() and disp_ratify() and 2379 * synchronize the memory read and written by cpu_resched(), 2380 * disp_getbest(), and disp_ratify() with each other. 2381 * (see CR#6482861 for more details). 2382 */ 2383 disp_lock_enter_high(&cp->cpu_disp->disp_lock); 2384 disp_lock_exit_high(&cp->cpu_disp->disp_lock); 2385 2386 ASSERT(pri == DISP_PRIO(tp)); 2387 2388 DTRACE_PROBE3(steal, kthread_t *, tp, cpu_t *, tcp, cpu_t *, cp); 2389 2390 thread_onproc(tp, cp); /* set t_state to TS_ONPROC */ 2391 2392 /* 2393 * Return with spl high so that swtch() won't need to raise it. 2394 * The disp_lock was dropped by dispdeq(). 2395 */ 2396 2397 return (tp); 2398 } 2399 2400 /* 2401 * disp_bound_common() - common routine for higher level functions 2402 * that check for bound threads under certain conditions. 2403 * If 'threadlistsafe' is set then there is no need to acquire 2404 * pidlock to stop the thread list from changing (eg, if 2405 * disp_bound_* is called with cpus paused). 2406 */ 2407 static int 2408 disp_bound_common(cpu_t *cp, int threadlistsafe, int flag) 2409 { 2410 int found = 0; 2411 kthread_t *tp; 2412 2413 ASSERT(flag); 2414 2415 if (!threadlistsafe) 2416 mutex_enter(&pidlock); 2417 tp = curthread; /* faster than allthreads */ 2418 do { 2419 if (tp->t_state != TS_FREE) { 2420 /* 2421 * If an interrupt thread is busy, but the 2422 * caller doesn't care (i.e. BOUND_INTR is off), 2423 * then just ignore it and continue through. 2424 */ 2425 if ((tp->t_flag & T_INTR_THREAD) && 2426 !(flag & BOUND_INTR)) 2427 continue; 2428 2429 /* 2430 * Skip the idle thread for the CPU 2431 * we're about to set offline. 2432 */ 2433 if (tp == cp->cpu_idle_thread) 2434 continue; 2435 2436 /* 2437 * Skip the pause thread for the CPU 2438 * we're about to set offline. 2439 */ 2440 if (tp == cp->cpu_pause_thread) 2441 continue; 2442 2443 if ((flag & BOUND_CPU) && 2444 (tp->t_bound_cpu == cp || 2445 tp->t_bind_cpu == cp->cpu_id || 2446 tp->t_weakbound_cpu == cp)) { 2447 found = 1; 2448 break; 2449 } 2450 2451 if ((flag & BOUND_PARTITION) && 2452 (tp->t_cpupart == cp->cpu_part)) { 2453 found = 1; 2454 break; 2455 } 2456 } 2457 } while ((tp = tp->t_next) != curthread && found == 0); 2458 if (!threadlistsafe) 2459 mutex_exit(&pidlock); 2460 return (found); 2461 } 2462 2463 /* 2464 * disp_bound_threads - return nonzero if threads are bound to the processor. 2465 * Called infrequently. Keep this simple. 2466 * Includes threads that are asleep or stopped but not onproc. 2467 */ 2468 int 2469 disp_bound_threads(cpu_t *cp, int threadlistsafe) 2470 { 2471 return (disp_bound_common(cp, threadlistsafe, BOUND_CPU)); 2472 } 2473 2474 /* 2475 * disp_bound_anythreads - return nonzero if _any_ threads are bound 2476 * to the given processor, including interrupt threads. 2477 */ 2478 int 2479 disp_bound_anythreads(cpu_t *cp, int threadlistsafe) 2480 { 2481 return (disp_bound_common(cp, threadlistsafe, BOUND_CPU | BOUND_INTR)); 2482 } 2483 2484 /* 2485 * disp_bound_partition - return nonzero if threads are bound to the same 2486 * partition as the processor. 2487 * Called infrequently. Keep this simple. 2488 * Includes threads that are asleep or stopped but not onproc. 2489 */ 2490 int 2491 disp_bound_partition(cpu_t *cp, int threadlistsafe) 2492 { 2493 return (disp_bound_common(cp, threadlistsafe, BOUND_PARTITION)); 2494 } 2495 2496 /* 2497 * disp_cpu_inactive - make a CPU inactive by moving all of its unbound 2498 * threads to other CPUs. 2499 */ 2500 void 2501 disp_cpu_inactive(cpu_t *cp) 2502 { 2503 kthread_t *tp; 2504 disp_t *dp = cp->cpu_disp; 2505 dispq_t *dq; 2506 pri_t pri; 2507 int wasonq; 2508 2509 disp_lock_enter(&dp->disp_lock); 2510 while ((pri = dp->disp_max_unbound_pri) != -1) { 2511 dq = &dp->disp_q[pri]; 2512 tp = dq->dq_first; 2513 2514 /* 2515 * Skip over bound threads. 2516 */ 2517 while (tp != NULL && tp->t_bound_cpu != NULL) { 2518 tp = tp->t_link; 2519 } 2520 2521 if (tp == NULL) { 2522 /* disp_max_unbound_pri must be inaccurate, so fix it */ 2523 disp_fix_unbound_pri(dp, pri); 2524 continue; 2525 } 2526 2527 wasonq = dispdeq(tp); /* drops disp_lock */ 2528 ASSERT(wasonq); 2529 ASSERT(tp->t_weakbound_cpu == NULL); 2530 2531 setbackdq(tp); 2532 /* 2533 * Called from cpu_offline: 2534 * 2535 * cp has already been removed from the list of active cpus 2536 * and tp->t_cpu has been changed so there is no risk of 2537 * tp ending up back on cp. 2538 * 2539 * Called from cpupart_move_cpu: 2540 * 2541 * The cpu has moved to a new cpupart. Any threads that 2542 * were on it's dispatch queues before the move remain 2543 * in the old partition and can't run in the new partition. 2544 */ 2545 ASSERT(tp->t_cpu != cp); 2546 thread_unlock(tp); 2547 2548 disp_lock_enter(&dp->disp_lock); 2549 } 2550 disp_lock_exit(&dp->disp_lock); 2551 } 2552 2553 /* 2554 * Return a score rating this CPU for running this thread: lower is better. 2555 * 2556 * If curthread is looking for a new CPU, then we ignore cpu_dispatch_pri for 2557 * curcpu (as that's our own priority). 2558 * 2559 * If a cpu is the target of an offline request, then try to avoid it. 2560 * 2561 * Otherwise we'll use double the effective dispatcher priority for the CPU. 2562 * 2563 * We do this so smt_adjust_cpu_score() can increment the score if needed, 2564 * without ending up over-riding a dispatcher priority. 2565 */ 2566 static pri_t 2567 cpu_score(cpu_t *cp, kthread_t *tp) 2568 { 2569 pri_t score; 2570 2571 if (tp == curthread && cp == curthread->t_cpu) 2572 score = 2 * CPU_IDLE_PRI; 2573 else if (cp == cpu_inmotion) 2574 score = SHRT_MAX; 2575 else 2576 score = 2 * cp->cpu_dispatch_pri; 2577 2578 if (2 * cp->cpu_disp->disp_maxrunpri > score) 2579 score = 2 * cp->cpu_disp->disp_maxrunpri; 2580 if (2 * cp->cpu_chosen_level > score) 2581 score = 2 * cp->cpu_chosen_level; 2582 2583 return (smt_adjust_cpu_score(tp, cp, score)); 2584 } 2585 2586 /* 2587 * disp_lowpri_cpu - find a suitable CPU to run the given thread. 2588 * 2589 * We are looking for a CPU with an effective dispatch priority lower than the 2590 * thread's, so that the thread will run immediately rather than be enqueued. 2591 * For NUMA locality, we prefer "home" CPUs within the thread's ->t_lpl group. 2592 * If we don't find an available CPU there, we will expand our search to include 2593 * wider locality levels. (Note these groups are already divided by CPU 2594 * partition.) 2595 * 2596 * If the thread cannot immediately run on *any* CPU, we'll enqueue ourselves on 2597 * the best home CPU we found. 2598 * 2599 * The hint passed in is used as a starting point so we don't favor CPU 0 or any 2600 * other CPU. The caller should pass in the most recently used CPU for the 2601 * thread; it's of course possible that this CPU isn't in the home lgroup. 2602 * 2603 * This function must be called at either high SPL, or with preemption disabled, 2604 * so that the "hint" CPU cannot be removed from the online CPU list while we 2605 * are traversing it. 2606 */ 2607 cpu_t * 2608 disp_lowpri_cpu(cpu_t *hint, kthread_t *tp, pri_t tpri) 2609 { 2610 cpu_t *bestcpu; 2611 cpu_t *besthomecpu; 2612 cpu_t *cp, *cpstart; 2613 2614 klgrpset_t done; 2615 2616 lpl_t *lpl_iter, *lpl_leaf; 2617 2618 ASSERT(hint != NULL); 2619 ASSERT(tp->t_lpl->lpl_ncpu > 0); 2620 2621 bestcpu = besthomecpu = NULL; 2622 klgrpset_clear(done); 2623 2624 lpl_iter = tp->t_lpl; 2625 2626 do { 2627 pri_t best = SHRT_MAX; 2628 klgrpset_t cur_set; 2629 2630 klgrpset_clear(cur_set); 2631 2632 for (int i = 0; i < lpl_iter->lpl_nrset; i++) { 2633 lpl_leaf = lpl_iter->lpl_rset[i]; 2634 if (klgrpset_ismember(done, lpl_leaf->lpl_lgrpid)) 2635 continue; 2636 2637 klgrpset_add(cur_set, lpl_leaf->lpl_lgrpid); 2638 2639 if (hint->cpu_lpl == lpl_leaf) 2640 cp = cpstart = hint; 2641 else 2642 cp = cpstart = lpl_leaf->lpl_cpus; 2643 2644 do { 2645 pri_t score = cpu_score(cp, tp); 2646 2647 if (score < best) { 2648 best = score; 2649 bestcpu = cp; 2650 2651 /* An idle CPU: we're done. */ 2652 if (score / 2 == CPU_IDLE_PRI) 2653 goto out; 2654 } 2655 } while ((cp = cp->cpu_next_lpl) != cpstart); 2656 } 2657 2658 if (bestcpu != NULL && tpri > (best / 2)) 2659 goto out; 2660 2661 if (besthomecpu == NULL) 2662 besthomecpu = bestcpu; 2663 2664 /* 2665 * Add the lgrps we just considered to the "done" set 2666 */ 2667 klgrpset_or(done, cur_set); 2668 2669 } while ((lpl_iter = lpl_iter->lpl_parent) != NULL); 2670 2671 /* 2672 * The specified priority isn't high enough to run immediately 2673 * anywhere, so just return the best CPU from the home lgroup. 2674 */ 2675 bestcpu = besthomecpu; 2676 2677 out: 2678 ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0); 2679 return (bestcpu); 2680 } 2681 2682 /* 2683 * This routine provides the generic idle cpu function for all processors. 2684 * If a processor has some specific code to execute when idle (say, to stop 2685 * the pipeline and save power) then that routine should be defined in the 2686 * processors specific code (module_xx.c) and the global variable idle_cpu 2687 * set to that function. 2688 */ 2689 static void 2690 generic_idle_cpu(void) 2691 { 2692 } 2693 2694 /*ARGSUSED*/ 2695 static void 2696 generic_enq_thread(cpu_t *cpu, int bound) 2697 { 2698 } 2699 2700 cpu_t * 2701 disp_choose_best_cpu(void) 2702 { 2703 kthread_t *t = curthread; 2704 cpu_t *curcpu = CPU; 2705 2706 ASSERT(t->t_preempt > 0); 2707 ASSERT(t->t_state == TS_ONPROC); 2708 ASSERT(t->t_schedflag & TS_VCPU); 2709 2710 if (smt_should_run(t, curcpu)) 2711 return (curcpu); 2712 2713 return (disp_lowpri_cpu(curcpu, t, t->t_pri)); 2714 } 2715