1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * The System Duty Cycle (SDC) scheduling class 28 * -------------------------------------------- 29 * 30 * Background 31 * 32 * Kernel threads in Solaris have traditionally not been large consumers 33 * of CPU time. They typically wake up, perform a small amount of 34 * work, then go back to sleep waiting for either a timeout or another 35 * signal. On the assumption that the small amount of work that they do 36 * is important for the behavior of the whole system, these threads are 37 * treated kindly by the dispatcher and the SYS scheduling class: they run 38 * without preemption from anything other than real-time and interrupt 39 * threads; when preempted, they are put at the front of the queue, so they 40 * generally do not migrate between CPUs; and they are allowed to stay 41 * running until they voluntarily give up the CPU. 42 * 43 * As Solaris has evolved, new workloads have emerged which require the 44 * kernel to perform significant amounts of CPU-intensive work. One 45 * example of such a workload is ZFS's transaction group sync processing. 46 * Each sync operation generates a large batch of I/Os, and each I/O 47 * may need to be compressed and/or checksummed before it is written to 48 * storage. The taskq threads which perform the compression and checksums 49 * will run nonstop as long as they have work to do; a large sync operation 50 * on a compression-heavy dataset can keep them busy for seconds on end. 51 * This causes human-time-scale dispatch latency bubbles for any other 52 * threads which have the misfortune to share a CPU with the taskq threads. 53 * 54 * The SDC scheduling class is a solution to this problem. 55 * 56 * 57 * Overview 58 * 59 * SDC is centered around the concept of a thread's duty cycle (DC): 60 * 61 * ONPROC time 62 * Duty Cycle = ---------------------- 63 * ONPROC + Runnable time 64 * 65 * This is the ratio of the time that the thread spent running on a CPU 66 * divided by the time it spent running or trying to run. It is unaffected 67 * by any time the thread spent sleeping, stopped, etc. 68 * 69 * A thread joining the SDC class specifies a "target" DC that it wants 70 * to run at. To implement this policy, the routine sysdc_update() scans 71 * the list of active SDC threads every few ticks and uses each thread's 72 * microstate data to compute the actual duty cycle that that thread 73 * has experienced recently. If the thread is under its target DC, its 74 * priority is increased to the maximum available (sysdc_maxpri, which is 75 * 99 by default). If the thread is over its target DC, its priority is 76 * reduced to the minimum available (sysdc_minpri, 0 by default). This 77 * is a fairly primitive approach, in that it doesn't use any of the 78 * intermediate priorities, but it's not completely inappropriate. Even 79 * though threads in the SDC class might take a while to do their job, they 80 * are by some definition important if they're running inside the kernel, 81 * so it is reasonable that they should get to run at priority 99. 82 * 83 * If a thread is running when sysdc_update() calculates its actual duty 84 * cycle, and there are other threads of equal or greater priority on its 85 * CPU's dispatch queue, sysdc_update() preempts that thread. The thread 86 * acknowledges the preemption by calling sysdc_preempt(), which calls 87 * setbackdq(), which gives other threads with the same priority a chance 88 * to run. This creates a de facto time quantum for threads in the SDC 89 * scheduling class. 90 * 91 * An SDC thread which is assigned priority 0 can continue to run if 92 * nothing else needs to use the CPU that it's running on. Similarly, an 93 * SDC thread at priority 99 might not get to run as much as it wants to 94 * if there are other priority-99 or higher threads on its CPU. These 95 * situations would cause the thread to get ahead of or behind its target 96 * DC; the longer the situations lasted, the further ahead or behind the 97 * thread would get. Rather than condemning a thread to a lifetime of 98 * paying for its youthful indiscretions, SDC keeps "base" values for 99 * ONPROC and Runnable times in each thread's sysdc data, and updates these 100 * values periodically. The duty cycle is then computed using the elapsed 101 * amount of ONPROC and Runnable times since those base times. 102 * 103 * Since sysdc_update() scans SDC threads fairly frequently, it tries to 104 * keep the list of "active" threads small by pruning out threads which 105 * have been asleep for a brief time. They are not pruned immediately upon 106 * going to sleep, since some threads may bounce back and forth between 107 * sleeping and being runnable. 108 * 109 * 110 * Interfaces 111 * 112 * void sysdc_thread_enter(t, dc, flags) 113 * 114 * Moves a kernel thread from the SYS scheduling class to the 115 * SDC class. t must have an associated LWP (created by calling 116 * lwp_kernel_create()). The thread will have a target DC of dc. 117 * Flags should be either 0 or SYSDC_THREAD_BATCH. If 118 * SYSDC_THREAD_BATCH is specified, the thread will run with a 119 * slightly lower priority (see "Batch threads", below). 120 * 121 * 122 * Complications 123 * 124 * - Run queue balancing 125 * 126 * The Solaris dispatcher is biased towards letting a thread run 127 * on the same CPU which it last ran on, if no more than 3 ticks 128 * (i.e. rechoose_interval) have passed since the thread last ran. 129 * This helps to preserve cache warmth. On the other hand, it also 130 * tries to keep the per-CPU run queues fairly balanced; if the CPU 131 * chosen for a runnable thread has a run queue which is three or 132 * more threads longer than a neighboring CPU's queue, the runnable 133 * thread is dispatched onto the neighboring CPU instead. 134 * 135 * These policies work well for some workloads, but not for many SDC 136 * threads. The taskq client of SDC, for example, has many discrete 137 * units of work to do. The work units are largely independent, so 138 * cache warmth is not an important consideration. It is important 139 * that the threads fan out quickly to different CPUs, since the 140 * amount of work these threads have to do (a few seconds worth at a 141 * time) doesn't leave much time to correct thread placement errors 142 * (i.e. two SDC threads being dispatched to the same CPU). 143 * 144 * To fix this, SDC uses the TS_RUNQMATCH flag introduced for FSS. 145 * This tells the dispatcher to keep neighboring run queues' lengths 146 * more evenly matched, which allows SDC threads to migrate more 147 * easily. 148 * 149 * - LWPs and system processes 150 * 151 * SDC can only be used for kernel threads. Since SDC uses microstate 152 * accounting data to compute each thread's actual duty cycle, all 153 * threads entering the SDC class must have associated LWPs (which 154 * store the microstate data). This means that the threads have to 155 * be associated with an SSYS process, i.e. one created by newproc(). 156 * If the microstate accounting information is ever moved into the 157 * kthread_t, this restriction could be lifted. 158 * 159 * - Dealing with oversubscription 160 * 161 * Since SDC duty cycles are per-thread, it is possible that the 162 * aggregate requested duty cycle of all SDC threads in a processor 163 * set could be greater than the total CPU time available in that set. 164 * The FSS scheduling class has an analogous situation, which it deals 165 * with by reducing each thread's allotted CPU time proportionally. 166 * Since SDC doesn't need to be as precise as FSS, it uses a simpler 167 * solution to the oversubscription problem. 168 * 169 * sysdc_update() accumulates the amount of time that max-priority SDC 170 * threads have spent on-CPU in each processor set, and uses that sum 171 * to create an implied duty cycle for that processor set: 172 * 173 * accumulated CPU time 174 * pset DC = ----------------------------------- 175 * (# CPUs) * time since last update 176 * 177 * If this implied duty cycle is above a maximum pset duty cycle (90% 178 * by default), sysdc_update() sets the priority of all SDC threads 179 * in that processor set to sysdc_minpri for a "break" period. After 180 * the break period, it waits for a "nobreak" period before trying to 181 * enforce the pset duty cycle limit again. 182 * 183 * - Processor sets 184 * 185 * As the above implies, SDC is processor set aware, but it does not 186 * currently allow threads to change processor sets while in the SDC 187 * class. Instead, those threads must join the desired processor set 188 * before entering SDC. [1] 189 * 190 * - Batch threads 191 * 192 * A thread joining the SDC class can specify the SDC_THREAD_BATCH 193 * flag. This flag causes the maximum priority for that thread to be 194 * reduced (by default, the maximum is reduced by 1). This allows 195 * longer-running, batch-oriented SDC threads to be interrupted by 196 * more immediate, higher-priority work. 197 * 198 * - t_kpri_req 199 * 200 * The TS and FSS scheduling classes pay attention to t_kpri_req, 201 * which provides a simple form of priority inheritance for 202 * synchronization primitives (such as rwlocks held as READER) which 203 * cannot be traced to a unique thread. The SDC class does not honor 204 * t_kpri_req, for a few reasons: 205 * 206 * 1. t_kpri_req is notoriously inaccurate. A measure of its 207 * inaccuracy is that it needs to be cleared every time a thread 208 * returns to user mode, because it is frequently non-zero at that 209 * point. This can happen because "ownership" of synchronization 210 * primitives that use t_kpri_req can be silently handed off, 211 * leaving no opportunity to will the t_kpri_req inheritance. 212 * 213 * 2. Unlike in TS and FSS, threads in SDC *will* eventually run at 214 * kernel priority. This means that even if an SDC thread 215 * is holding a synchronization primitive and running at low 216 * priority, its priority will eventually be raised above 60, 217 * allowing it to drive on and release the resource. 218 * 219 * 3. The first consumer of SDC uses the taskq subsystem, which holds 220 * a reader lock for the duration of the task's execution. This 221 * would mean that SDC threads would never drop below kernel 222 * priority in practice, which defeats one of the purposes of SDC. 223 * 224 * - Why not FSS? 225 * 226 * It might seem that the existing FSS scheduling class could solve 227 * the problems that SDC is attempting to solve. FSS's more precise 228 * solution to the oversubscription problem would hardly cause 229 * trouble, as long as it performed well. SDC is implemented as 230 * a separate scheduling class for two main reasons: the initial 231 * consumer of SDC does not map well onto the "project" abstraction 232 * that is central to FSS, and FSS does not expect to run at kernel 233 * priorities. 234 * 235 * 236 * Tunables 237 * 238 * - sysdc_batch_niceness: The amount below sysdc_maxpri that 239 * SDC_THREAD_BATCH threads should use as their per-thread 240 * maximum priority. 241 * 242 * - sysdc_update_interval_msec: Number of milliseconds between 243 * consecutive thread priority updates. 244 * 245 * - sysdc_reset_interval_msec: Number of milliseconds between 246 * consecutive resets of a thread's base ONPROC and Runnable 247 * times. 248 * 249 * - sysdc_prune_interval_msec: Number of milliseconds of sleeping 250 * before a thread is pruned from the active list. 251 * 252 * - sysdc_max_pset_DC: Allowable percentage of a processor set's 253 * CPU time which SDC can give to its high-priority threads. 254 * 255 * - sysdc_break_msec: Number of milliseconds of "break" taken when 256 * sysdc_max_pset_DC is exceeded. 257 * 258 * 259 * Future work (in SDC and related subsystems) 260 * 261 * - Per-thread rechoose interval (0 for SDC) 262 * 263 * Allow each thread to specify its own rechoose interval. SDC 264 * threads would specify an interval of zero, which would rechoose 265 * the CPU with the lowest priority once per update. 266 * 267 * - Allow threads to change processor sets after joining the SDC class 268 * 269 * - Thread groups and per-group DC 270 * 271 * It might be nice to be able to specify a duty cycle which applies 272 * to a group of threads in aggregate. 273 * 274 * - Per-group DC callback to allow dynamic DC tuning 275 * 276 * Currently, DCs are assigned when the thread joins SDC. Some 277 * workloads could benefit from being able to tune their DC using 278 * subsystem-specific knowledge about the workload. 279 * 280 * - Finer-grained priority updates 281 * 282 * - More nuanced management of oversubscription 283 * 284 * - Moving other CPU-intensive threads into SDC 285 * 286 * - Move msacct data into kthread_t 287 * 288 * This would allow kernel threads without LWPs to join SDC. 289 * 290 * 291 * Footnotes 292 * 293 * [1] The details of doing so are left as an exercise for the reader. 294 */ 295 296 #include <sys/types.h> 297 #include <sys/sysdc.h> 298 #include <sys/sysdc_impl.h> 299 300 #include <sys/class.h> 301 #include <sys/cmn_err.h> 302 #include <sys/cpuvar.h> 303 #include <sys/cpupart.h> 304 #include <sys/debug.h> 305 #include <sys/disp.h> 306 #include <sys/errno.h> 307 #include <sys/inline.h> 308 #include <sys/kmem.h> 309 #include <sys/modctl.h> 310 #include <sys/schedctl.h> 311 #include <sys/sdt.h> 312 #include <sys/sunddi.h> 313 #include <sys/sysmacros.h> 314 #include <sys/systm.h> 315 #include <sys/var.h> 316 317 /* 318 * Tunables - loaded into the internal state at module load time 319 */ 320 uint_t sysdc_update_interval_msec = 20; 321 uint_t sysdc_reset_interval_msec = 400; 322 uint_t sysdc_prune_interval_msec = 100; 323 uint_t sysdc_max_pset_DC = 90; 324 uint_t sysdc_break_msec = 80; 325 pri_t sysdc_batch_niceness = 1; 326 327 /* 328 * Internal state - constants set up by sysdc_initparam() 329 */ 330 static clock_t sysdc_update_ticks; /* ticks between updates */ 331 static uint_t sysdc_prune_updates; /* updates asleep before pruning */ 332 static uint_t sysdc_reset_updates; /* # of updates before reset */ 333 static uint_t sysdc_break_updates; /* updates to break */ 334 static uint_t sysdc_nobreak_updates; /* updates to not check */ 335 static uint_t sysdc_minDC; /* minimum allowed DC */ 336 static uint_t sysdc_maxDC; /* maximum allowed DC */ 337 static pri_t sysdc_minpri; /* minimum allowed priority */ 338 static pri_t sysdc_maxpri; /* maximum allowed priority */ 339 340 /* 341 * Internal state 342 */ 343 static kmutex_t sysdc_pset_lock; /* lock protecting pset data */ 344 static list_t sysdc_psets; /* list of psets with SDC threads */ 345 static uint_t sysdc_param_init; /* sysdc_initparam() has been called */ 346 static uint_t sysdc_update_timeout_started; /* update timeout is active */ 347 static hrtime_t sysdc_last_update; /* time of last sysdc_update() */ 348 static sysdc_t sysdc_dummy; /* used to terminate active lists */ 349 350 /* 351 * Internal state - active hash table 352 */ 353 #define SYSDC_NLISTS 8 354 #define SYSDC_HASH(sdc) (((uintptr_t)(sdc) >> 6) & (SYSDC_NLISTS - 1)) 355 static sysdc_list_t sysdc_active[SYSDC_NLISTS]; 356 #define SYSDC_LIST(sdc) (&sysdc_active[SYSDC_HASH(sdc)]) 357 358 #ifdef DEBUG 359 static struct { 360 uint64_t sysdc_update_times_asleep; 361 uint64_t sysdc_update_times_base_ran_backwards; 362 uint64_t sysdc_update_times_already_done; 363 uint64_t sysdc_update_times_cur_ran_backwards; 364 uint64_t sysdc_compute_pri_breaking; 365 uint64_t sysdc_activate_enter; 366 uint64_t sysdc_update_enter; 367 uint64_t sysdc_update_exited; 368 uint64_t sysdc_update_not_sdc; 369 uint64_t sysdc_update_idle; 370 uint64_t sysdc_update_take_break; 371 uint64_t sysdc_update_no_psets; 372 uint64_t sysdc_tick_not_sdc; 373 uint64_t sysdc_tick_quantum_expired; 374 uint64_t sysdc_thread_enter_enter; 375 } sysdc_stats; 376 377 #define SYSDC_INC_STAT(x) (sysdc_stats.x++) 378 #else 379 #define SYSDC_INC_STAT(x) ((void)0) 380 #endif 381 382 /* macros are UPPER CASE */ 383 #define HOWMANY(a, b) howmany((a), (b)) 384 #define MSECTOTICKS(a) HOWMANY((a) * 1000, usec_per_tick) 385 386 static void 387 sysdc_initparam(void) 388 { 389 uint_t sysdc_break_ticks; 390 391 /* update / prune intervals */ 392 sysdc_update_ticks = MSECTOTICKS(sysdc_update_interval_msec); 393 394 sysdc_prune_updates = HOWMANY(sysdc_prune_interval_msec, 395 sysdc_update_interval_msec); 396 sysdc_reset_updates = HOWMANY(sysdc_reset_interval_msec, 397 sysdc_update_interval_msec); 398 399 /* We must get at least a little time on CPU. */ 400 sysdc_minDC = 1; 401 sysdc_maxDC = SYSDC_DC_MAX; 402 sysdc_minpri = 0; 403 sysdc_maxpri = maxclsyspri; 404 405 /* break parameters */ 406 if (sysdc_max_pset_DC > SYSDC_DC_MAX) { 407 sysdc_max_pset_DC = SYSDC_DC_MAX; 408 } 409 sysdc_break_ticks = MSECTOTICKS(sysdc_break_msec); 410 sysdc_break_updates = HOWMANY(sysdc_break_ticks, sysdc_update_ticks); 411 412 /* 413 * We want: 414 * 415 * sysdc_max_pset_DC = (nobreak / (break + nobreak)) 416 * 417 * ==> nobreak = sysdc_max_pset_DC * (break + nobreak) 418 * 419 * sysdc_max_pset_DC * break 420 * ==> nobreak = ------------------------- 421 * 1 - sysdc_max_pset_DC 422 */ 423 sysdc_nobreak_updates = 424 HOWMANY((uint64_t)sysdc_break_updates * sysdc_max_pset_DC, 425 (SYSDC_DC_MAX - sysdc_max_pset_DC)); 426 427 sysdc_param_init = 1; 428 } 429 430 #undef HOWMANY 431 #undef MSECTOTICKS 432 433 #define SDC_UPDATE_INITIAL 0x1 /* for the initial update */ 434 #define SDC_UPDATE_TIMEOUT 0x2 /* from sysdc_update() */ 435 #define SDC_UPDATE_TICK 0x4 /* from sysdc_tick(), on expiry */ 436 437 /* 438 * Updates the recorded times in the sdc, and returns the elapsed ONPROC 439 * and Runnable times since the last reset. 440 * 441 * newO is the thread's actual ONPROC time; it's used during sysdc_update() 442 * to track processor set usage. 443 */ 444 static void 445 sysdc_update_times(sysdc_t *sdc, uint_t flags, 446 hrtime_t *O, hrtime_t *R, hrtime_t *newO) 447 { 448 kthread_t *const t = sdc->sdc_thread; 449 const uint_t initial = (flags & SDC_UPDATE_INITIAL); 450 const uint_t update = (flags & SDC_UPDATE_TIMEOUT); 451 const clock_t now = ddi_get_lbolt(); 452 uint_t do_reset; 453 454 ASSERT(THREAD_LOCK_HELD(t)); 455 456 *O = *R = 0; 457 458 /* If we've been sleeping, we know we haven't had any ONPROC time. */ 459 if (sdc->sdc_sleep_updates != 0 && 460 sdc->sdc_sleep_updates != sdc->sdc_nupdates) { 461 *newO = sdc->sdc_last_base_O; 462 SYSDC_INC_STAT(sysdc_update_times_asleep); 463 return; 464 } 465 466 /* 467 * If this is our first update, or we've hit the reset point, 468 * we need to reset our base_{O,R}. Once we've updated them, we 469 * report O and R for the entire prior interval. 470 */ 471 do_reset = initial; 472 if (update) { 473 ++sdc->sdc_nupdates; 474 if ((sdc->sdc_nupdates % sysdc_reset_updates) == 0) 475 do_reset = 1; 476 } 477 if (do_reset) { 478 hrtime_t baseO, baseR; 479 if (initial) { 480 /* 481 * Start off our cycle count somewhere in the middle, 482 * to keep the resets from all happening at once. 483 * 484 * 4999 is a handy prime much larger than 485 * sysdc_reset_updates, so that we don't run into 486 * trouble if the resolution is a multiple of 487 * sysdc_reset_updates. 488 */ 489 sdc->sdc_nupdates = (uint_t)((gethrtime() % 4999) % 490 sysdc_reset_updates); 491 baseO = baseR = 0; 492 } else { 493 baseO = sdc->sdc_base_O; 494 baseR = sdc->sdc_base_R; 495 } 496 497 mstate_systhread_times(t, &sdc->sdc_base_O, &sdc->sdc_base_R); 498 *newO = sdc->sdc_base_O; 499 500 sdc->sdc_reset = now; 501 sdc->sdc_pri_check = -1; /* force mismatch below */ 502 503 /* 504 * See below for rationale. 505 */ 506 if (baseO > sdc->sdc_base_O || baseR > sdc->sdc_base_R) { 507 SYSDC_INC_STAT(sysdc_update_times_base_ran_backwards); 508 baseO = sdc->sdc_base_O; 509 baseR = sdc->sdc_base_R; 510 } 511 512 /* compute based on the entire interval */ 513 *O = (sdc->sdc_base_O - baseO); 514 *R = (sdc->sdc_base_R - baseR); 515 return; 516 } 517 518 /* 519 * If we're called from sysdc_update(), we *must* return a value 520 * for newO, so we always call mstate_systhread_times(). 521 * 522 * Otherwise, if we've already done a pri check this tick, 523 * we can skip it. 524 */ 525 if (!update && sdc->sdc_pri_check == now) { 526 SYSDC_INC_STAT(sysdc_update_times_already_done); 527 return; 528 } 529 530 /* Get the current times from the thread */ 531 sdc->sdc_pri_check = now; 532 mstate_systhread_times(t, &sdc->sdc_cur_O, &sdc->sdc_cur_R); 533 *newO = sdc->sdc_cur_O; 534 535 /* 536 * The updating of microstate accounting is not done under a 537 * consistent set of locks, particularly the t_waitrq field. This 538 * can lead to narrow windows in which we account for time in the 539 * wrong bucket, which on the next read will be accounted for 540 * correctly. 541 * 542 * If our sdc_base_* fields were affected by one of these blips, we 543 * throw away the old data, and pretend this tick didn't happen. 544 */ 545 if (sdc->sdc_cur_O < sdc->sdc_base_O || 546 sdc->sdc_cur_R < sdc->sdc_base_R) { 547 548 sdc->sdc_base_O = sdc->sdc_cur_O; 549 sdc->sdc_base_R = sdc->sdc_cur_R; 550 551 SYSDC_INC_STAT(sysdc_update_times_cur_ran_backwards); 552 return; 553 } 554 555 *O = sdc->sdc_cur_O - sdc->sdc_base_O; 556 *R = sdc->sdc_cur_R - sdc->sdc_base_R; 557 } 558 559 /* 560 * sysdc_compute_pri() 561 * 562 * Recomputes the priority of the thread, leaving the result in 563 * sdc->sdc_epri. Returns 1 if a priority update should occur 564 * (which will also trigger a cpu_surrender()), otherwise 565 * returns 0. 566 */ 567 static uint_t 568 sysdc_compute_pri(sysdc_t *sdc, uint_t flags) 569 { 570 kthread_t *const t = sdc->sdc_thread; 571 const uint_t update = (flags & SDC_UPDATE_TIMEOUT); 572 const uint_t tick = (flags & SDC_UPDATE_TICK); 573 574 hrtime_t O, R; 575 hrtime_t newO = -1; 576 577 ASSERT(THREAD_LOCK_HELD(t)); 578 579 sysdc_update_times(sdc, flags, &O, &R, &newO); 580 ASSERT(!update || newO != -1); 581 582 /* If we have new data, recompute our priority. */ 583 if ((O + R) != 0) { 584 sdc->sdc_cur_DC = (O * SYSDC_DC_MAX) / (O + R); 585 586 /* Adjust our priority to move our DC closer to the target. */ 587 if (sdc->sdc_cur_DC < sdc->sdc_target_DC) 588 sdc->sdc_pri = sdc->sdc_maxpri; 589 else 590 sdc->sdc_pri = sdc->sdc_minpri; 591 } 592 593 /* 594 * If our per-pset duty cycle goes over the max, we will take a break. 595 * This forces all sysdc threads in the pset to minimum priority, in 596 * order to let everyone else have a chance at the CPU. 597 */ 598 if (sdc->sdc_pset->sdp_need_break) { 599 SYSDC_INC_STAT(sysdc_compute_pri_breaking); 600 sdc->sdc_epri = sdc->sdc_minpri; 601 } else { 602 sdc->sdc_epri = sdc->sdc_pri; 603 } 604 605 DTRACE_PROBE4(sysdc__compute__pri, 606 kthread_t *, t, pri_t, sdc->sdc_epri, uint_t, sdc->sdc_cur_DC, 607 uint_t, sdc->sdc_target_DC); 608 609 /* 610 * For sysdc_update(), we compute the ONPROC time for high-priority 611 * threads, which is used to calculate the per-pset duty cycle. We 612 * will always tell our callers to update the thread's priority, 613 * since we want to force a cpu_surrender(). 614 * 615 * We reset sdc_update_ticks so that sysdc_tick() will only update 616 * the thread's priority if our timeout is delayed by a tick or 617 * more. 618 */ 619 if (update) { 620 /* SDC threads are not allowed to change cpupart bindings. */ 621 ASSERT(t->t_cpupart == sdc->sdc_pset->sdp_cpupart); 622 623 /* If we were at MAXPRI, account for our onproc time. */ 624 if (t->t_pri == sdc->sdc_maxpri && 625 sdc->sdc_last_base_O != 0 && 626 sdc->sdc_last_base_O < newO) { 627 sdc->sdc_last_O = newO - sdc->sdc_last_base_O; 628 sdc->sdc_pset->sdp_onproc_time += 629 (uint64_t)sdc->sdc_last_O; 630 sdc->sdc_pset->sdp_onproc_threads++; 631 } else { 632 sdc->sdc_last_O = 0; 633 } 634 sdc->sdc_last_base_O = newO; 635 636 sdc->sdc_update_ticks = sdc->sdc_ticks + sysdc_update_ticks + 1; 637 return (1); 638 } 639 640 /* 641 * Like sysdc_update(), sysdc_tick() always wants to update the 642 * thread's priority, so that the CPU is surrendered if necessary. 643 * We reset sdc_update_ticks so that if the timeout continues to be 644 * delayed, we'll update at the regular interval. 645 */ 646 if (tick) { 647 ASSERT(sdc->sdc_ticks == sdc->sdc_update_ticks); 648 sdc->sdc_update_ticks = sdc->sdc_ticks + sysdc_update_ticks; 649 return (1); 650 } 651 652 /* 653 * Otherwise, only tell our callers to update the priority if it has 654 * changed. 655 */ 656 return (sdc->sdc_epri != t->t_pri); 657 } 658 659 static void 660 sysdc_update_pri(sysdc_t *sdc, uint_t flags) 661 { 662 kthread_t *t = sdc->sdc_thread; 663 664 ASSERT(THREAD_LOCK_HELD(t)); 665 666 if (sysdc_compute_pri(sdc, flags)) { 667 if (!thread_change_pri(t, sdc->sdc_epri, 0)) { 668 cpu_surrender(t); 669 } 670 } 671 } 672 673 /* 674 * Add a thread onto the active list. It will only be removed by 675 * sysdc_update(). 676 */ 677 static void 678 sysdc_activate(sysdc_t *sdc) 679 { 680 sysdc_t *volatile *headp = &SYSDC_LIST(sdc)->sdl_list; 681 sysdc_t *head; 682 kthread_t *t = sdc->sdc_thread; 683 684 SYSDC_INC_STAT(sysdc_activate_enter); 685 686 ASSERT(sdc->sdc_next == NULL); 687 ASSERT(THREAD_LOCK_HELD(t)); 688 689 do { 690 head = *headp; 691 sdc->sdc_next = head; 692 } while (atomic_cas_ptr(headp, head, sdc) != head); 693 } 694 695 /* 696 * sysdc_update() has two jobs: 697 * 698 * 1. It updates the priorities of all active SDC threads on the system. 699 * 2. It measures pset CPU usage and enforces sysdc_max_pset_DC. 700 */ 701 static void 702 sysdc_update(void *arg) 703 { 704 int idx; 705 sysdc_t *freelist = NULL; 706 sysdc_pset_t *cur; 707 hrtime_t now, diff; 708 uint_t redeploy = 1; 709 710 SYSDC_INC_STAT(sysdc_update_enter); 711 712 ASSERT(sysdc_update_timeout_started); 713 714 /* 715 * If this is our first time through, diff will be gigantic, and 716 * no breaks will be necessary. 717 */ 718 now = gethrtime(); 719 diff = now - sysdc_last_update; 720 sysdc_last_update = now; 721 722 mutex_enter(&sysdc_pset_lock); 723 for (cur = list_head(&sysdc_psets); cur != NULL; 724 cur = list_next(&sysdc_psets, cur)) { 725 boolean_t breaking = (cur->sdp_should_break != 0); 726 727 if (cur->sdp_need_break != breaking) { 728 DTRACE_PROBE2(sdc__pset__break, sysdc_pset_t *, cur, 729 boolean_t, breaking); 730 } 731 cur->sdp_onproc_time = 0; 732 cur->sdp_onproc_threads = 0; 733 cur->sdp_need_break = breaking; 734 } 735 mutex_exit(&sysdc_pset_lock); 736 737 for (idx = 0; idx < SYSDC_NLISTS; idx++) { 738 sysdc_list_t *sdl = &sysdc_active[idx]; 739 sysdc_t *volatile *headp = &sdl->sdl_list; 740 sysdc_t *head, *tail; 741 sysdc_t **prevptr; 742 743 if (*headp == &sysdc_dummy) 744 continue; 745 746 /* Prevent any threads from exiting while we're poking them. */ 747 mutex_enter(&sdl->sdl_lock); 748 749 /* 750 * Each sdl_list contains a singly-linked list of active 751 * threads. Threads which become active while we are 752 * processing the list will be added to sdl_list. Since we 753 * don't want that to interfere with our own processing, we 754 * swap in an empty list. Any newly active threads will 755 * go on to this empty list. When finished, we'll put any 756 * such threads at the end of the processed list. 757 */ 758 head = atomic_swap_ptr(headp, &sysdc_dummy); 759 prevptr = &head; 760 while (*prevptr != &sysdc_dummy) { 761 sysdc_t *const sdc = *prevptr; 762 kthread_t *const t = sdc->sdc_thread; 763 764 /* 765 * If the thread has exited, move its sysdc_t onto 766 * freelist, to be freed later. 767 */ 768 if (t == NULL) { 769 *prevptr = sdc->sdc_next; 770 SYSDC_INC_STAT(sysdc_update_exited); 771 sdc->sdc_next = freelist; 772 freelist = sdc; 773 continue; 774 } 775 776 thread_lock(t); 777 if (t->t_cid != sysdccid) { 778 thread_unlock(t); 779 prevptr = &sdc->sdc_next; 780 SYSDC_INC_STAT(sysdc_update_not_sdc); 781 continue; 782 } 783 ASSERT(t->t_cldata == sdc); 784 785 /* 786 * If the thread has been sleeping for longer 787 * than sysdc_prune_interval, make it inactive by 788 * removing it from the list. 789 */ 790 if (!(t->t_state & (TS_RUN | TS_ONPROC)) && 791 sdc->sdc_sleep_updates != 0 && 792 (sdc->sdc_sleep_updates - sdc->sdc_nupdates) > 793 sysdc_prune_updates) { 794 *prevptr = sdc->sdc_next; 795 SYSDC_INC_STAT(sysdc_update_idle); 796 sdc->sdc_next = NULL; 797 thread_unlock(t); 798 continue; 799 } 800 sysdc_update_pri(sdc, SDC_UPDATE_TIMEOUT); 801 thread_unlock(t); 802 803 prevptr = &sdc->sdc_next; 804 } 805 806 /* 807 * Add our list to the bucket, putting any new entries 808 * added while we were working at the tail of the list. 809 */ 810 do { 811 tail = *headp; 812 *prevptr = tail; 813 } while (atomic_cas_ptr(headp, tail, head) != tail); 814 815 mutex_exit(&sdl->sdl_lock); 816 } 817 818 mutex_enter(&sysdc_pset_lock); 819 for (cur = list_head(&sysdc_psets); cur != NULL; 820 cur = list_next(&sysdc_psets, cur)) { 821 822 cur->sdp_vtime_last_interval = 823 diff * cur->sdp_cpupart->cp_ncpus; 824 cur->sdp_DC_last_interval = 825 (cur->sdp_onproc_time * SYSDC_DC_MAX) / 826 cur->sdp_vtime_last_interval; 827 828 if (cur->sdp_should_break > 0) { 829 cur->sdp_should_break--; /* breaking */ 830 continue; 831 } 832 if (cur->sdp_dont_break > 0) { 833 cur->sdp_dont_break--; /* waiting before checking */ 834 continue; 835 } 836 if (cur->sdp_DC_last_interval > sysdc_max_pset_DC) { 837 cur->sdp_should_break = sysdc_break_updates; 838 cur->sdp_dont_break = sysdc_nobreak_updates; 839 SYSDC_INC_STAT(sysdc_update_take_break); 840 } 841 } 842 843 /* 844 * If there are no sysdc_psets, there can be no threads, so 845 * we can stop doing our timeout. Since we're holding the 846 * sysdc_pset_lock, no new sysdc_psets can come in, which will 847 * prevent anyone from racing with this and dropping our timeout 848 * on the floor. 849 */ 850 if (list_is_empty(&sysdc_psets)) { 851 SYSDC_INC_STAT(sysdc_update_no_psets); 852 ASSERT(sysdc_update_timeout_started); 853 sysdc_update_timeout_started = 0; 854 855 redeploy = 0; 856 } 857 mutex_exit(&sysdc_pset_lock); 858 859 while (freelist != NULL) { 860 sysdc_t *cur = freelist; 861 freelist = cur->sdc_next; 862 kmem_free(cur, sizeof (*cur)); 863 } 864 865 if (redeploy) { 866 (void) timeout(sysdc_update, arg, sysdc_update_ticks); 867 } 868 } 869 870 static void 871 sysdc_preempt(kthread_t *t) 872 { 873 ASSERT(t == curthread); 874 ASSERT(THREAD_LOCK_HELD(t)); 875 876 setbackdq(t); /* give others a chance to run */ 877 } 878 879 static void 880 sysdc_tick(kthread_t *t) 881 { 882 sysdc_t *sdc; 883 884 thread_lock(t); 885 if (t->t_cid != sysdccid) { 886 SYSDC_INC_STAT(sysdc_tick_not_sdc); 887 thread_unlock(t); 888 return; 889 } 890 sdc = t->t_cldata; 891 if (t->t_state == TS_ONPROC && 892 t->t_pri < t->t_disp_queue->disp_maxrunpri) { 893 cpu_surrender(t); 894 } 895 896 if (t->t_state == TS_ONPROC || t->t_state == TS_RUN) { 897 ASSERT(sdc->sdc_sleep_updates == 0); 898 } 899 900 ASSERT(sdc->sdc_ticks != sdc->sdc_update_ticks); 901 sdc->sdc_ticks++; 902 if (sdc->sdc_ticks == sdc->sdc_update_ticks) { 903 SYSDC_INC_STAT(sysdc_tick_quantum_expired); 904 sysdc_update_pri(sdc, SDC_UPDATE_TICK); 905 ASSERT(sdc->sdc_ticks != sdc->sdc_update_ticks); 906 } 907 thread_unlock(t); 908 } 909 910 static void 911 sysdc_setrun(kthread_t *t) 912 { 913 sysdc_t *sdc = t->t_cldata; 914 915 ASSERT(THREAD_LOCK_HELD(t)); /* t should be in transition */ 916 917 sdc->sdc_sleep_updates = 0; 918 919 if (sdc->sdc_next == NULL) { 920 /* 921 * Since we're in transition, we don't want to use the 922 * full thread_update_pri(). 923 */ 924 if (sysdc_compute_pri(sdc, 0)) { 925 THREAD_CHANGE_PRI(t, sdc->sdc_epri); 926 } 927 sysdc_activate(sdc); 928 929 ASSERT(sdc->sdc_next != NULL); 930 } 931 932 setbackdq(t); 933 } 934 935 static void 936 sysdc_wakeup(kthread_t *t) 937 { 938 sysdc_setrun(t); 939 } 940 941 static void 942 sysdc_sleep(kthread_t *t) 943 { 944 sysdc_t *sdc = t->t_cldata; 945 946 ASSERT(THREAD_LOCK_HELD(t)); /* t should be in transition */ 947 948 sdc->sdc_sleep_updates = sdc->sdc_nupdates; 949 } 950 951 /*ARGSUSED*/ 952 static int 953 sysdc_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp, 954 void *bufp) 955 { 956 cpupart_t *const cpupart = t->t_cpupart; 957 sysdc_t *sdc = bufp; 958 sysdc_params_t *sdpp = parmsp; 959 sysdc_pset_t *newpset = sdc->sdc_pset; 960 sysdc_pset_t *pset; 961 int start_timeout; 962 963 if (t->t_cid != syscid) 964 return (EPERM); 965 966 ASSERT(ttolwp(t) != NULL); 967 ASSERT(sdpp != NULL); 968 ASSERT(newpset != NULL); 969 ASSERT(sysdc_param_init); 970 971 ASSERT(sdpp->sdp_minpri >= sysdc_minpri); 972 ASSERT(sdpp->sdp_maxpri <= sysdc_maxpri); 973 ASSERT(sdpp->sdp_DC >= sysdc_minDC); 974 ASSERT(sdpp->sdp_DC <= sysdc_maxDC); 975 976 sdc->sdc_thread = t; 977 sdc->sdc_pri = sdpp->sdp_maxpri; /* start off maximally */ 978 sdc->sdc_minpri = sdpp->sdp_minpri; 979 sdc->sdc_maxpri = sdpp->sdp_maxpri; 980 sdc->sdc_target_DC = sdpp->sdp_DC; 981 sdc->sdc_ticks = 0; 982 sdc->sdc_update_ticks = sysdc_update_ticks + 1; 983 984 /* Assign ourselves to the appropriate pset. */ 985 sdc->sdc_pset = NULL; 986 mutex_enter(&sysdc_pset_lock); 987 for (pset = list_head(&sysdc_psets); pset != NULL; 988 pset = list_next(&sysdc_psets, pset)) { 989 if (pset->sdp_cpupart == cpupart) { 990 break; 991 } 992 } 993 if (pset == NULL) { 994 pset = newpset; 995 newpset = NULL; 996 pset->sdp_cpupart = cpupart; 997 list_insert_tail(&sysdc_psets, pset); 998 } 999 pset->sdp_nthreads++; 1000 ASSERT(pset->sdp_nthreads > 0); 1001 1002 sdc->sdc_pset = pset; 1003 1004 start_timeout = (sysdc_update_timeout_started == 0); 1005 sysdc_update_timeout_started = 1; 1006 mutex_exit(&sysdc_pset_lock); 1007 1008 if (newpset != NULL) 1009 kmem_free(newpset, sizeof (*newpset)); 1010 1011 /* Update t's scheduling class and priority. */ 1012 thread_lock(t); 1013 t->t_clfuncs = &(sclass[cid].cl_funcs->thread); 1014 t->t_cid = cid; 1015 t->t_cldata = sdc; 1016 t->t_schedflag |= TS_RUNQMATCH; 1017 1018 sysdc_update_pri(sdc, SDC_UPDATE_INITIAL); 1019 thread_unlock(t); 1020 1021 /* Kick off the thread timeout if we're the first one in. */ 1022 if (start_timeout) { 1023 (void) timeout(sysdc_update, NULL, sysdc_update_ticks); 1024 } 1025 1026 return (0); 1027 } 1028 1029 static void 1030 sysdc_leave(sysdc_t *sdc) 1031 { 1032 sysdc_pset_t *sdp = sdc->sdc_pset; 1033 sysdc_list_t *sdl = SYSDC_LIST(sdc); 1034 uint_t freedc; 1035 1036 mutex_enter(&sdl->sdl_lock); /* block sysdc_update() */ 1037 sdc->sdc_thread = NULL; 1038 freedc = (sdc->sdc_next == NULL); 1039 mutex_exit(&sdl->sdl_lock); 1040 1041 mutex_enter(&sysdc_pset_lock); 1042 sdp = sdc->sdc_pset; 1043 ASSERT(sdp != NULL); 1044 ASSERT(sdp->sdp_nthreads > 0); 1045 --sdp->sdp_nthreads; 1046 if (sdp->sdp_nthreads == 0) { 1047 list_remove(&sysdc_psets, sdp); 1048 } else { 1049 sdp = NULL; 1050 } 1051 mutex_exit(&sysdc_pset_lock); 1052 1053 if (freedc) 1054 kmem_free(sdc, sizeof (*sdc)); 1055 if (sdp != NULL) 1056 kmem_free(sdp, sizeof (*sdp)); 1057 } 1058 1059 static void 1060 sysdc_exitclass(void *buf) 1061 { 1062 sysdc_leave((sysdc_t *)buf); 1063 } 1064 1065 /*ARGSUSED*/ 1066 static int 1067 sysdc_canexit(kthread_t *t, cred_t *reqpcredp) 1068 { 1069 /* Threads cannot exit SDC once joined, except in a body bag. */ 1070 return (EPERM); 1071 } 1072 1073 static void 1074 sysdc_exit(kthread_t *t) 1075 { 1076 sysdc_t *sdc; 1077 1078 /* We're exiting, so we just rejoin the SYS class. */ 1079 thread_lock(t); 1080 ASSERT(t->t_cid == sysdccid); 1081 sdc = t->t_cldata; 1082 t->t_cid = syscid; 1083 t->t_cldata = NULL; 1084 t->t_clfuncs = &(sclass[syscid].cl_funcs->thread); 1085 (void) thread_change_pri(t, maxclsyspri, 0); 1086 t->t_schedflag &= ~TS_RUNQMATCH; 1087 thread_unlock_nopreempt(t); 1088 1089 /* Unlink the sdc from everything. */ 1090 sysdc_leave(sdc); 1091 } 1092 1093 /*ARGSUSED*/ 1094 static int 1095 sysdc_fork(kthread_t *t, kthread_t *ct, void *bufp) 1096 { 1097 /* 1098 * Threads cannot be created with SDC as their class; they must 1099 * be created as SYS and then added with sysdc_thread_enter(). 1100 * Because of this restriction, sysdc_fork() should never be called. 1101 */ 1102 panic("sysdc cannot be forked"); 1103 1104 return (ENOSYS); 1105 } 1106 1107 /*ARGSUSED*/ 1108 static void 1109 sysdc_forkret(kthread_t *t, kthread_t *ct) 1110 { 1111 /* SDC threads are part of system processes, which never fork. */ 1112 panic("sysdc cannot be forked"); 1113 } 1114 1115 static pri_t 1116 sysdc_globpri(kthread_t *t) 1117 { 1118 return (t->t_epri); 1119 } 1120 1121 /*ARGSUSED*/ 1122 static pri_t 1123 sysdc_no_swap(kthread_t *t, int flags) 1124 { 1125 /* SDC threads cannot be swapped. */ 1126 return (-1); 1127 } 1128 1129 /* 1130 * Get maximum and minimum priorities enjoyed by SDC threads. 1131 */ 1132 static int 1133 sysdc_getclpri(pcpri_t *pcprip) 1134 { 1135 pcprip->pc_clpmax = sysdc_maxpri; 1136 pcprip->pc_clpmin = sysdc_minpri; 1137 return (0); 1138 } 1139 1140 /*ARGSUSED*/ 1141 static int 1142 sysdc_getclinfo(void *arg) 1143 { 1144 return (0); /* no class-specific info */ 1145 } 1146 1147 /*ARGSUSED*/ 1148 static int 1149 sysdc_alloc(void **p, int flag) 1150 { 1151 sysdc_t *new; 1152 1153 *p = NULL; 1154 if ((new = kmem_zalloc(sizeof (*new), flag)) == NULL) { 1155 return (ENOMEM); 1156 } 1157 if ((new->sdc_pset = kmem_zalloc(sizeof (*new->sdc_pset), flag)) == 1158 NULL) { 1159 kmem_free(new, sizeof (*new)); 1160 return (ENOMEM); 1161 } 1162 *p = new; 1163 return (0); 1164 } 1165 1166 static void 1167 sysdc_free(void *p) 1168 { 1169 sysdc_t *sdc = p; 1170 1171 if (sdc != NULL) { 1172 /* 1173 * We must have failed CL_ENTERCLASS(), so our pset should be 1174 * there and unused. 1175 */ 1176 ASSERT(sdc->sdc_pset != NULL); 1177 ASSERT(sdc->sdc_pset->sdp_cpupart == NULL); 1178 kmem_free(sdc->sdc_pset, sizeof (*sdc->sdc_pset)); 1179 kmem_free(sdc, sizeof (*sdc)); 1180 } 1181 } 1182 1183 static int sysdc_enosys(); /* Boy, ANSI-C's K&R compatibility is weird. */ 1184 static int sysdc_einval(); 1185 static void sysdc_nullsys(); 1186 1187 static struct classfuncs sysdc_classfuncs = { 1188 /* messages to class manager */ 1189 { 1190 sysdc_enosys, /* admin */ 1191 sysdc_getclinfo, 1192 sysdc_enosys, /* parmsin */ 1193 sysdc_enosys, /* parmsout */ 1194 sysdc_enosys, /* vaparmsin */ 1195 sysdc_enosys, /* vaparmsout */ 1196 sysdc_getclpri, 1197 sysdc_alloc, 1198 sysdc_free, 1199 }, 1200 /* operations on threads */ 1201 { 1202 sysdc_enterclass, 1203 sysdc_exitclass, 1204 sysdc_canexit, 1205 sysdc_fork, 1206 sysdc_forkret, 1207 sysdc_nullsys, /* parmsget */ 1208 sysdc_enosys, /* parmsset */ 1209 sysdc_nullsys, /* stop */ 1210 sysdc_exit, 1211 sysdc_nullsys, /* active */ 1212 sysdc_nullsys, /* inactive */ 1213 sysdc_no_swap, /* swapin */ 1214 sysdc_no_swap, /* swapout */ 1215 sysdc_nullsys, /* trapret */ 1216 sysdc_preempt, 1217 sysdc_setrun, 1218 sysdc_sleep, 1219 sysdc_tick, 1220 sysdc_wakeup, 1221 sysdc_einval, /* donice */ 1222 sysdc_globpri, 1223 sysdc_nullsys, /* set_process_group */ 1224 sysdc_nullsys, /* yield */ 1225 sysdc_einval, /* doprio */ 1226 } 1227 }; 1228 1229 static int 1230 sysdc_enosys() 1231 { 1232 return (ENOSYS); 1233 } 1234 1235 static int 1236 sysdc_einval() 1237 { 1238 return (EINVAL); 1239 } 1240 1241 static void 1242 sysdc_nullsys() 1243 { 1244 } 1245 1246 /*ARGSUSED*/ 1247 static pri_t 1248 sysdc_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp) 1249 { 1250 int idx; 1251 1252 list_create(&sysdc_psets, sizeof (sysdc_pset_t), 1253 offsetof(sysdc_pset_t, sdp_node)); 1254 1255 for (idx = 0; idx < SYSDC_NLISTS; idx++) { 1256 sysdc_active[idx].sdl_list = &sysdc_dummy; 1257 } 1258 1259 sysdc_initparam(); 1260 1261 sysdccid = cid; 1262 *clfuncspp = &sysdc_classfuncs; 1263 1264 return ((pri_t)v.v_maxsyspri); 1265 } 1266 1267 static struct sclass csw = { 1268 "SDC", 1269 sysdc_init, 1270 0 1271 }; 1272 1273 static struct modlsched modlsched = { 1274 &mod_schedops, "system duty cycle scheduling class", &csw 1275 }; 1276 1277 static struct modlinkage modlinkage = { 1278 MODREV_1, (void *)&modlsched, NULL 1279 }; 1280 1281 int 1282 _init() 1283 { 1284 return (mod_install(&modlinkage)); 1285 } 1286 1287 int 1288 _fini() 1289 { 1290 return (EBUSY); /* can't unload for now */ 1291 } 1292 1293 int 1294 _info(struct modinfo *modinfop) 1295 { 1296 return (mod_info(&modlinkage, modinfop)); 1297 } 1298 1299 /* --- consolidation-private interfaces --- */ 1300 void 1301 sysdc_thread_enter(kthread_t *t, uint_t dc, uint_t flags) 1302 { 1303 void *buf = NULL; 1304 sysdc_params_t sdp; 1305 1306 SYSDC_INC_STAT(sysdc_thread_enter_enter); 1307 1308 ASSERT(sysdc_param_init); 1309 ASSERT(sysdccid >= 0); 1310 1311 ASSERT((flags & ~SYSDC_THREAD_BATCH) == 0); 1312 1313 sdp.sdp_minpri = sysdc_minpri; 1314 sdp.sdp_maxpri = sysdc_maxpri; 1315 sdp.sdp_DC = MAX(MIN(dc, sysdc_maxDC), sysdc_minDC); 1316 1317 if (flags & SYSDC_THREAD_BATCH) 1318 sdp.sdp_maxpri -= sysdc_batch_niceness; 1319 1320 VERIFY3U(CL_ALLOC(&buf, sysdccid, KM_SLEEP), ==, 0); 1321 1322 ASSERT(t->t_lwp != NULL); 1323 ASSERT(t->t_cid == syscid); 1324 ASSERT(t->t_cldata == NULL); 1325 VERIFY3U(CL_CANEXIT(t, NULL), ==, 0); 1326 VERIFY3U(CL_ENTERCLASS(t, sysdccid, &sdp, kcred, buf), ==, 0); 1327 CL_EXITCLASS(syscid, NULL); 1328 } 1329