1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24 */
25
26 /*
27 * The System Duty Cycle (SDC) scheduling class
28 * --------------------------------------------
29 *
30 * Background
31 *
32 * Kernel threads in Solaris have traditionally not been large consumers
33 * of CPU time. They typically wake up, perform a small amount of
34 * work, then go back to sleep waiting for either a timeout or another
35 * signal. On the assumption that the small amount of work that they do
36 * is important for the behavior of the whole system, these threads are
37 * treated kindly by the dispatcher and the SYS scheduling class: they run
38 * without preemption from anything other than real-time and interrupt
39 * threads; when preempted, they are put at the front of the queue, so they
40 * generally do not migrate between CPUs; and they are allowed to stay
41 * running until they voluntarily give up the CPU.
42 *
43 * As Solaris has evolved, new workloads have emerged which require the
44 * kernel to perform significant amounts of CPU-intensive work. One
45 * example of such a workload is ZFS's transaction group sync processing.
46 * Each sync operation generates a large batch of I/Os, and each I/O
47 * may need to be compressed and/or checksummed before it is written to
48 * storage. The taskq threads which perform the compression and checksums
49 * will run nonstop as long as they have work to do; a large sync operation
50 * on a compression-heavy dataset can keep them busy for seconds on end.
51 * This causes human-time-scale dispatch latency bubbles for any other
52 * threads which have the misfortune to share a CPU with the taskq threads.
53 *
54 * The SDC scheduling class is a solution to this problem.
55 *
56 *
57 * Overview
58 *
59 * SDC is centered around the concept of a thread's duty cycle (DC):
60 *
61 * ONPROC time
62 * Duty Cycle = ----------------------
63 * ONPROC + Runnable time
64 *
65 * This is the ratio of the time that the thread spent running on a CPU
66 * divided by the time it spent running or trying to run. It is unaffected
67 * by any time the thread spent sleeping, stopped, etc.
68 *
69 * A thread joining the SDC class specifies a "target" DC that it wants
70 * to run at. To implement this policy, the routine sysdc_update() scans
71 * the list of active SDC threads every few ticks and uses each thread's
72 * microstate data to compute the actual duty cycle that that thread
73 * has experienced recently. If the thread is under its target DC, its
74 * priority is increased to the maximum available (sysdc_maxpri, which is
75 * 99 by default). If the thread is over its target DC, its priority is
76 * reduced to the minimum available (sysdc_minpri, 0 by default). This
77 * is a fairly primitive approach, in that it doesn't use any of the
78 * intermediate priorities, but it's not completely inappropriate. Even
79 * though threads in the SDC class might take a while to do their job, they
80 * are by some definition important if they're running inside the kernel,
81 * so it is reasonable that they should get to run at priority 99.
82 *
83 * If a thread is running when sysdc_update() calculates its actual duty
84 * cycle, and there are other threads of equal or greater priority on its
85 * CPU's dispatch queue, sysdc_update() preempts that thread. The thread
86 * acknowledges the preemption by calling sysdc_preempt(), which calls
87 * setbackdq(), which gives other threads with the same priority a chance
88 * to run. This creates a de facto time quantum for threads in the SDC
89 * scheduling class.
90 *
91 * An SDC thread which is assigned priority 0 can continue to run if
92 * nothing else needs to use the CPU that it's running on. Similarly, an
93 * SDC thread at priority 99 might not get to run as much as it wants to
94 * if there are other priority-99 or higher threads on its CPU. These
95 * situations would cause the thread to get ahead of or behind its target
96 * DC; the longer the situations lasted, the further ahead or behind the
97 * thread would get. Rather than condemning a thread to a lifetime of
98 * paying for its youthful indiscretions, SDC keeps "base" values for
99 * ONPROC and Runnable times in each thread's sysdc data, and updates these
100 * values periodically. The duty cycle is then computed using the elapsed
101 * amount of ONPROC and Runnable times since those base times.
102 *
103 * Since sysdc_update() scans SDC threads fairly frequently, it tries to
104 * keep the list of "active" threads small by pruning out threads which
105 * have been asleep for a brief time. They are not pruned immediately upon
106 * going to sleep, since some threads may bounce back and forth between
107 * sleeping and being runnable.
108 *
109 *
110 * Interfaces
111 *
112 * void sysdc_thread_enter(t, dc, flags)
113 *
114 * Moves a kernel thread from the SYS scheduling class to the
115 * SDC class. t must have an associated LWP (created by calling
116 * lwp_kernel_create()). The thread will have a target DC of dc.
117 * Flags should be either 0 or SYSDC_THREAD_BATCH. If
118 * SYSDC_THREAD_BATCH is specified, the thread is expected to be
119 * doing large amounts of processing.
120 *
121 *
122 * Complications
123 *
124 * - Run queue balancing
125 *
126 * The Solaris dispatcher is biased towards letting a thread run
127 * on the same CPU which it last ran on, if no more than 3 ticks
128 * (i.e. rechoose_interval) have passed since the thread last ran.
129 * This helps to preserve cache warmth. On the other hand, it also
130 * tries to keep the per-CPU run queues fairly balanced; if the CPU
131 * chosen for a runnable thread has a run queue which is three or
132 * more threads longer than a neighboring CPU's queue, the runnable
133 * thread is dispatched onto the neighboring CPU instead.
134 *
135 * These policies work well for some workloads, but not for many SDC
136 * threads. The taskq client of SDC, for example, has many discrete
137 * units of work to do. The work units are largely independent, so
138 * cache warmth is not an important consideration. It is important
139 * that the threads fan out quickly to different CPUs, since the
140 * amount of work these threads have to do (a few seconds worth at a
141 * time) doesn't leave much time to correct thread placement errors
142 * (i.e. two SDC threads being dispatched to the same CPU).
143 *
144 * To fix this, SDC uses the TS_RUNQMATCH flag introduced for FSS.
145 * This tells the dispatcher to keep neighboring run queues' lengths
146 * more evenly matched, which allows SDC threads to migrate more
147 * easily.
148 *
149 * - LWPs and system processes
150 *
151 * SDC can only be used for kernel threads. Since SDC uses microstate
152 * accounting data to compute each thread's actual duty cycle, all
153 * threads entering the SDC class must have associated LWPs (which
154 * store the microstate data). This means that the threads have to
155 * be associated with an SSYS process, i.e. one created by newproc().
156 * If the microstate accounting information is ever moved into the
157 * kthread_t, this restriction could be lifted.
158 *
159 * - Dealing with oversubscription
160 *
161 * Since SDC duty cycles are per-thread, it is possible that the
162 * aggregate requested duty cycle of all SDC threads in a processor
163 * set could be greater than the total CPU time available in that set.
164 * The FSS scheduling class has an analogous situation, which it deals
165 * with by reducing each thread's allotted CPU time proportionally.
166 * Since SDC doesn't need to be as precise as FSS, it uses a simpler
167 * solution to the oversubscription problem.
168 *
169 * sysdc_update() accumulates the amount of time that max-priority SDC
170 * threads have spent on-CPU in each processor set, and uses that sum
171 * to create an implied duty cycle for that processor set:
172 *
173 * accumulated CPU time
174 * pset DC = -----------------------------------
175 * (# CPUs) * time since last update
176 *
177 * If this implied duty cycle is above a maximum pset duty cycle (90%
178 * by default), sysdc_update() sets the priority of all SDC threads
179 * in that processor set to sysdc_minpri for a "break" period. After
180 * the break period, it waits for a "nobreak" period before trying to
181 * enforce the pset duty cycle limit again.
182 *
183 * - Processor sets
184 *
185 * As the above implies, SDC is processor set aware, but it does not
186 * currently allow threads to change processor sets while in the SDC
187 * class. Instead, those threads must join the desired processor set
188 * before entering SDC. [1]
189 *
190 * - Batch threads
191 *
192 * A thread joining the SDC class can specify the SDC_THREAD_BATCH
193 * flag. This flag currently has no effect, but marks threads which
194 * do bulk processing.
195 *
196 * - Why not FSS?
197 *
198 * It might seem that the existing FSS scheduling class could solve
199 * the problems that SDC is attempting to solve. FSS's more precise
200 * solution to the oversubscription problem would hardly cause
201 * trouble, as long as it performed well. SDC is implemented as
202 * a separate scheduling class for two main reasons: the initial
203 * consumer of SDC does not map well onto the "project" abstraction
204 * that is central to FSS, and FSS does not expect to run at kernel
205 * priorities.
206 *
207 *
208 * Tunables
209 *
210 * - sysdc_update_interval_msec: Number of milliseconds between
211 * consecutive thread priority updates.
212 *
213 * - sysdc_reset_interval_msec: Number of milliseconds between
214 * consecutive resets of a thread's base ONPROC and Runnable
215 * times.
216 *
217 * - sysdc_prune_interval_msec: Number of milliseconds of sleeping
218 * before a thread is pruned from the active list.
219 *
220 * - sysdc_max_pset_DC: Allowable percentage of a processor set's
221 * CPU time which SDC can give to its high-priority threads.
222 *
223 * - sysdc_break_msec: Number of milliseconds of "break" taken when
224 * sysdc_max_pset_DC is exceeded.
225 *
226 *
227 * Future work (in SDC and related subsystems)
228 *
229 * - Per-thread rechoose interval (0 for SDC)
230 *
231 * Allow each thread to specify its own rechoose interval. SDC
232 * threads would specify an interval of zero, which would rechoose
233 * the CPU with the lowest priority once per update.
234 *
235 * - Allow threads to change processor sets after joining the SDC class
236 *
237 * - Thread groups and per-group DC
238 *
239 * It might be nice to be able to specify a duty cycle which applies
240 * to a group of threads in aggregate.
241 *
242 * - Per-group DC callback to allow dynamic DC tuning
243 *
244 * Currently, DCs are assigned when the thread joins SDC. Some
245 * workloads could benefit from being able to tune their DC using
246 * subsystem-specific knowledge about the workload.
247 *
248 * - Finer-grained priority updates
249 *
250 * - More nuanced management of oversubscription
251 *
252 * - Moving other CPU-intensive threads into SDC
253 *
254 * - Move msacct data into kthread_t
255 *
256 * This would allow kernel threads without LWPs to join SDC.
257 *
258 *
259 * Footnotes
260 *
261 * [1] The details of doing so are left as an exercise for the reader.
262 */
263
264 #include <sys/types.h>
265 #include <sys/sysdc.h>
266 #include <sys/sysdc_impl.h>
267
268 #include <sys/class.h>
269 #include <sys/cmn_err.h>
270 #include <sys/cpuvar.h>
271 #include <sys/cpupart.h>
272 #include <sys/debug.h>
273 #include <sys/disp.h>
274 #include <sys/errno.h>
275 #include <sys/inline.h>
276 #include <sys/kmem.h>
277 #include <sys/modctl.h>
278 #include <sys/schedctl.h>
279 #include <sys/sdt.h>
280 #include <sys/sunddi.h>
281 #include <sys/sysmacros.h>
282 #include <sys/systm.h>
283 #include <sys/var.h>
284
285 /*
286 * Tunables - loaded into the internal state at module load time
287 */
288 uint_t sysdc_update_interval_msec = 20;
289 uint_t sysdc_reset_interval_msec = 400;
290 uint_t sysdc_prune_interval_msec = 100;
291 uint_t sysdc_max_pset_DC = 90;
292 uint_t sysdc_break_msec = 80;
293
294 /*
295 * Internal state - constants set up by sysdc_initparam()
296 */
297 static clock_t sysdc_update_ticks; /* ticks between updates */
298 static uint_t sysdc_prune_updates; /* updates asleep before pruning */
299 static uint_t sysdc_reset_updates; /* # of updates before reset */
300 static uint_t sysdc_break_updates; /* updates to break */
301 static uint_t sysdc_nobreak_updates; /* updates to not check */
302 static uint_t sysdc_minDC; /* minimum allowed DC */
303 static uint_t sysdc_maxDC; /* maximum allowed DC */
304 static pri_t sysdc_minpri; /* minimum allowed priority */
305 static pri_t sysdc_maxpri; /* maximum allowed priority */
306
307 /*
308 * Internal state
309 */
310 static kmutex_t sysdc_pset_lock; /* lock protecting pset data */
311 static list_t sysdc_psets; /* list of psets with SDC threads */
312 static uint_t sysdc_param_init; /* sysdc_initparam() has been called */
313 static uint_t sysdc_update_timeout_started; /* update timeout is active */
314 static hrtime_t sysdc_last_update; /* time of last sysdc_update() */
315 static sysdc_t sysdc_dummy; /* used to terminate active lists */
316
317 /*
318 * Internal state - active hash table
319 */
320 #define SYSDC_NLISTS 8
321 #define SYSDC_HASH(sdc) (((uintptr_t)(sdc) >> 6) & (SYSDC_NLISTS - 1))
322 static sysdc_list_t sysdc_active[SYSDC_NLISTS];
323 #define SYSDC_LIST(sdc) (&sysdc_active[SYSDC_HASH(sdc)])
324
325 #ifdef DEBUG
326 static struct {
327 uint64_t sysdc_update_times_asleep;
328 uint64_t sysdc_update_times_base_ran_backwards;
329 uint64_t sysdc_update_times_already_done;
330 uint64_t sysdc_update_times_cur_ran_backwards;
331 uint64_t sysdc_compute_pri_breaking;
332 uint64_t sysdc_activate_enter;
333 uint64_t sysdc_update_enter;
334 uint64_t sysdc_update_exited;
335 uint64_t sysdc_update_not_sdc;
336 uint64_t sysdc_update_idle;
337 uint64_t sysdc_update_take_break;
338 uint64_t sysdc_update_no_psets;
339 uint64_t sysdc_tick_not_sdc;
340 uint64_t sysdc_tick_quantum_expired;
341 uint64_t sysdc_thread_enter_enter;
342 } sysdc_stats;
343
344 #define SYSDC_INC_STAT(x) (sysdc_stats.x++)
345 #else
346 #define SYSDC_INC_STAT(x) ((void)0)
347 #endif
348
349 /* macros are UPPER CASE */
350 #define HOWMANY(a, b) howmany((a), (b))
351 #define MSECTOTICKS(a) HOWMANY((a) * 1000, usec_per_tick)
352
353 static void
sysdc_initparam(void)354 sysdc_initparam(void)
355 {
356 uint_t sysdc_break_ticks;
357
358 /* update / prune intervals */
359 sysdc_update_ticks = MSECTOTICKS(sysdc_update_interval_msec);
360
361 sysdc_prune_updates = HOWMANY(sysdc_prune_interval_msec,
362 sysdc_update_interval_msec);
363 sysdc_reset_updates = HOWMANY(sysdc_reset_interval_msec,
364 sysdc_update_interval_msec);
365
366 /* We must get at least a little time on CPU. */
367 sysdc_minDC = 1;
368 sysdc_maxDC = SYSDC_DC_MAX;
369 sysdc_minpri = 0;
370 sysdc_maxpri = maxclsyspri - 1;
371
372 /* break parameters */
373 if (sysdc_max_pset_DC > SYSDC_DC_MAX) {
374 sysdc_max_pset_DC = SYSDC_DC_MAX;
375 }
376 sysdc_break_ticks = MSECTOTICKS(sysdc_break_msec);
377 sysdc_break_updates = HOWMANY(sysdc_break_ticks, sysdc_update_ticks);
378
379 /*
380 * We want:
381 *
382 * sysdc_max_pset_DC = (nobreak / (break + nobreak))
383 *
384 * ==> nobreak = sysdc_max_pset_DC * (break + nobreak)
385 *
386 * sysdc_max_pset_DC * break
387 * ==> nobreak = -------------------------
388 * 1 - sysdc_max_pset_DC
389 */
390 sysdc_nobreak_updates =
391 HOWMANY((uint64_t)sysdc_break_updates * sysdc_max_pset_DC,
392 (SYSDC_DC_MAX - sysdc_max_pset_DC));
393
394 sysdc_param_init = 1;
395 }
396
397 #undef HOWMANY
398 #undef MSECTOTICKS
399
400 #define SDC_UPDATE_INITIAL 0x1 /* for the initial update */
401 #define SDC_UPDATE_TIMEOUT 0x2 /* from sysdc_update() */
402 #define SDC_UPDATE_TICK 0x4 /* from sysdc_tick(), on expiry */
403
404 /*
405 * Updates the recorded times in the sdc, and returns the elapsed ONPROC
406 * and Runnable times since the last reset.
407 *
408 * newO is the thread's actual ONPROC time; it's used during sysdc_update()
409 * to track processor set usage.
410 */
411 static void
sysdc_update_times(sysdc_t * sdc,uint_t flags,hrtime_t * O,hrtime_t * R,hrtime_t * newO)412 sysdc_update_times(sysdc_t *sdc, uint_t flags,
413 hrtime_t *O, hrtime_t *R, hrtime_t *newO)
414 {
415 kthread_t *const t = sdc->sdc_thread;
416 const uint_t initial = (flags & SDC_UPDATE_INITIAL);
417 const uint_t update = (flags & SDC_UPDATE_TIMEOUT);
418 const clock_t now = ddi_get_lbolt();
419 uint_t do_reset;
420
421 ASSERT(THREAD_LOCK_HELD(t));
422
423 *O = *R = 0;
424
425 /* If we've been sleeping, we know we haven't had any ONPROC time. */
426 if (sdc->sdc_sleep_updates != 0 &&
427 sdc->sdc_sleep_updates != sdc->sdc_nupdates) {
428 *newO = sdc->sdc_last_base_O;
429 SYSDC_INC_STAT(sysdc_update_times_asleep);
430 return;
431 }
432
433 /*
434 * If this is our first update, or we've hit the reset point,
435 * we need to reset our base_{O,R}. Once we've updated them, we
436 * report O and R for the entire prior interval.
437 */
438 do_reset = initial;
439 if (update) {
440 ++sdc->sdc_nupdates;
441 if ((sdc->sdc_nupdates % sysdc_reset_updates) == 0)
442 do_reset = 1;
443 }
444 if (do_reset) {
445 hrtime_t baseO, baseR;
446 if (initial) {
447 /*
448 * Start off our cycle count somewhere in the middle,
449 * to keep the resets from all happening at once.
450 *
451 * 4999 is a handy prime much larger than
452 * sysdc_reset_updates, so that we don't run into
453 * trouble if the resolution is a multiple of
454 * sysdc_reset_updates.
455 */
456 sdc->sdc_nupdates = (uint_t)((gethrtime() % 4999) %
457 sysdc_reset_updates);
458 baseO = baseR = 0;
459 } else {
460 baseO = sdc->sdc_base_O;
461 baseR = sdc->sdc_base_R;
462 }
463
464 mstate_systhread_times(t, &sdc->sdc_base_O, &sdc->sdc_base_R);
465 *newO = sdc->sdc_base_O;
466
467 sdc->sdc_reset = now;
468 sdc->sdc_pri_check = -1; /* force mismatch below */
469
470 /*
471 * See below for rationale.
472 */
473 if (baseO > sdc->sdc_base_O || baseR > sdc->sdc_base_R) {
474 SYSDC_INC_STAT(sysdc_update_times_base_ran_backwards);
475 baseO = sdc->sdc_base_O;
476 baseR = sdc->sdc_base_R;
477 }
478
479 /* compute based on the entire interval */
480 *O = (sdc->sdc_base_O - baseO);
481 *R = (sdc->sdc_base_R - baseR);
482 return;
483 }
484
485 /*
486 * If we're called from sysdc_update(), we *must* return a value
487 * for newO, so we always call mstate_systhread_times().
488 *
489 * Otherwise, if we've already done a pri check this tick,
490 * we can skip it.
491 */
492 if (!update && sdc->sdc_pri_check == now) {
493 SYSDC_INC_STAT(sysdc_update_times_already_done);
494 return;
495 }
496
497 /* Get the current times from the thread */
498 sdc->sdc_pri_check = now;
499 mstate_systhread_times(t, &sdc->sdc_cur_O, &sdc->sdc_cur_R);
500 *newO = sdc->sdc_cur_O;
501
502 /*
503 * The updating of microstate accounting is not done under a
504 * consistent set of locks, particularly the t_waitrq field. This
505 * can lead to narrow windows in which we account for time in the
506 * wrong bucket, which on the next read will be accounted for
507 * correctly.
508 *
509 * If our sdc_base_* fields were affected by one of these blips, we
510 * throw away the old data, and pretend this tick didn't happen.
511 */
512 if (sdc->sdc_cur_O < sdc->sdc_base_O ||
513 sdc->sdc_cur_R < sdc->sdc_base_R) {
514
515 sdc->sdc_base_O = sdc->sdc_cur_O;
516 sdc->sdc_base_R = sdc->sdc_cur_R;
517
518 SYSDC_INC_STAT(sysdc_update_times_cur_ran_backwards);
519 return;
520 }
521
522 *O = sdc->sdc_cur_O - sdc->sdc_base_O;
523 *R = sdc->sdc_cur_R - sdc->sdc_base_R;
524 }
525
526 /*
527 * sysdc_compute_pri()
528 *
529 * Recomputes the priority of the thread, leaving the result in
530 * sdc->sdc_epri. Returns 1 if a priority update should occur
531 * (which will also trigger a cpu_surrender()), otherwise
532 * returns 0.
533 */
534 static uint_t
sysdc_compute_pri(sysdc_t * sdc,uint_t flags)535 sysdc_compute_pri(sysdc_t *sdc, uint_t flags)
536 {
537 kthread_t *const t = sdc->sdc_thread;
538 const uint_t update = (flags & SDC_UPDATE_TIMEOUT);
539 const uint_t tick = (flags & SDC_UPDATE_TICK);
540
541 hrtime_t O, R;
542 hrtime_t newO = -1;
543
544 ASSERT(THREAD_LOCK_HELD(t));
545
546 sysdc_update_times(sdc, flags, &O, &R, &newO);
547 ASSERT(!update || newO != -1);
548
549 /* If we have new data, recompute our priority. */
550 if ((O + R) != 0) {
551 sdc->sdc_cur_DC = (O * SYSDC_DC_MAX) / (O + R);
552
553 /* Adjust our priority to move our DC closer to the target. */
554 if (sdc->sdc_cur_DC < sdc->sdc_target_DC)
555 sdc->sdc_pri = sdc->sdc_maxpri;
556 else
557 sdc->sdc_pri = sdc->sdc_minpri;
558 }
559
560 /*
561 * If our per-pset duty cycle goes over the max, we will take a break.
562 * This forces all sysdc threads in the pset to minimum priority, in
563 * order to let everyone else have a chance at the CPU.
564 */
565 if (sdc->sdc_pset->sdp_need_break) {
566 SYSDC_INC_STAT(sysdc_compute_pri_breaking);
567 sdc->sdc_epri = sdc->sdc_minpri;
568 } else {
569 sdc->sdc_epri = sdc->sdc_pri;
570 }
571
572 DTRACE_PROBE4(sysdc__compute__pri,
573 kthread_t *, t, pri_t, sdc->sdc_epri, uint_t, sdc->sdc_cur_DC,
574 uint_t, sdc->sdc_target_DC);
575
576 /*
577 * For sysdc_update(), we compute the ONPROC time for high-priority
578 * threads, which is used to calculate the per-pset duty cycle. We
579 * will always tell our callers to update the thread's priority,
580 * since we want to force a cpu_surrender().
581 *
582 * We reset sdc_update_ticks so that sysdc_tick() will only update
583 * the thread's priority if our timeout is delayed by a tick or
584 * more.
585 */
586 if (update) {
587 /* SDC threads are not allowed to change cpupart bindings. */
588 ASSERT(t->t_cpupart == sdc->sdc_pset->sdp_cpupart);
589
590 /* If we were at MAXPRI, account for our onproc time. */
591 if (t->t_pri == sdc->sdc_maxpri &&
592 sdc->sdc_last_base_O != 0 &&
593 sdc->sdc_last_base_O < newO) {
594 sdc->sdc_last_O = newO - sdc->sdc_last_base_O;
595 sdc->sdc_pset->sdp_onproc_time +=
596 (uint64_t)sdc->sdc_last_O;
597 sdc->sdc_pset->sdp_onproc_threads++;
598 } else {
599 sdc->sdc_last_O = 0;
600 }
601 sdc->sdc_last_base_O = newO;
602
603 sdc->sdc_update_ticks = sdc->sdc_ticks + sysdc_update_ticks + 1;
604 return (1);
605 }
606
607 /*
608 * Like sysdc_update(), sysdc_tick() always wants to update the
609 * thread's priority, so that the CPU is surrendered if necessary.
610 * We reset sdc_update_ticks so that if the timeout continues to be
611 * delayed, we'll update at the regular interval.
612 */
613 if (tick) {
614 ASSERT(sdc->sdc_ticks == sdc->sdc_update_ticks);
615 sdc->sdc_update_ticks = sdc->sdc_ticks + sysdc_update_ticks;
616 return (1);
617 }
618
619 /*
620 * Otherwise, only tell our callers to update the priority if it has
621 * changed.
622 */
623 return (sdc->sdc_epri != t->t_pri);
624 }
625
626 static void
sysdc_update_pri(sysdc_t * sdc,uint_t flags)627 sysdc_update_pri(sysdc_t *sdc, uint_t flags)
628 {
629 kthread_t *t = sdc->sdc_thread;
630
631 ASSERT(THREAD_LOCK_HELD(t));
632
633 if (sysdc_compute_pri(sdc, flags)) {
634 if (!thread_change_pri(t, sdc->sdc_epri, 0)) {
635 cpu_surrender(t);
636 }
637 }
638 }
639
640 /*
641 * Add a thread onto the active list. It will only be removed by
642 * sysdc_update().
643 */
644 static void
sysdc_activate(sysdc_t * sdc)645 sysdc_activate(sysdc_t *sdc)
646 {
647 sysdc_t *volatile *headp = &SYSDC_LIST(sdc)->sdl_list;
648 sysdc_t *head;
649 kthread_t *t = sdc->sdc_thread;
650
651 SYSDC_INC_STAT(sysdc_activate_enter);
652
653 ASSERT(sdc->sdc_next == NULL);
654 ASSERT(THREAD_LOCK_HELD(t));
655
656 do {
657 head = *headp;
658 sdc->sdc_next = head;
659 } while (atomic_cas_ptr(headp, head, sdc) != head);
660 }
661
662 /*
663 * sysdc_update() has two jobs:
664 *
665 * 1. It updates the priorities of all active SDC threads on the system.
666 * 2. It measures pset CPU usage and enforces sysdc_max_pset_DC.
667 */
668 static void
sysdc_update(void * arg)669 sysdc_update(void *arg)
670 {
671 int idx;
672 sysdc_t *freelist = NULL;
673 sysdc_pset_t *cur;
674 hrtime_t now, diff;
675 uint_t redeploy = 1;
676
677 SYSDC_INC_STAT(sysdc_update_enter);
678
679 ASSERT(sysdc_update_timeout_started);
680
681 /*
682 * If this is our first time through, diff will be gigantic, and
683 * no breaks will be necessary.
684 */
685 now = gethrtime();
686 diff = now - sysdc_last_update;
687 sysdc_last_update = now;
688
689 mutex_enter(&sysdc_pset_lock);
690 for (cur = list_head(&sysdc_psets); cur != NULL;
691 cur = list_next(&sysdc_psets, cur)) {
692 boolean_t breaking = (cur->sdp_should_break != 0);
693
694 if (cur->sdp_need_break != breaking) {
695 DTRACE_PROBE2(sdc__pset__break, sysdc_pset_t *, cur,
696 boolean_t, breaking);
697 }
698 cur->sdp_onproc_time = 0;
699 cur->sdp_onproc_threads = 0;
700 cur->sdp_need_break = breaking;
701 }
702 mutex_exit(&sysdc_pset_lock);
703
704 for (idx = 0; idx < SYSDC_NLISTS; idx++) {
705 sysdc_list_t *sdl = &sysdc_active[idx];
706 sysdc_t *volatile *headp = &sdl->sdl_list;
707 sysdc_t *head, *tail;
708 sysdc_t **prevptr;
709
710 if (*headp == &sysdc_dummy)
711 continue;
712
713 /* Prevent any threads from exiting while we're poking them. */
714 mutex_enter(&sdl->sdl_lock);
715
716 /*
717 * Each sdl_list contains a singly-linked list of active
718 * threads. Threads which become active while we are
719 * processing the list will be added to sdl_list. Since we
720 * don't want that to interfere with our own processing, we
721 * swap in an empty list. Any newly active threads will
722 * go on to this empty list. When finished, we'll put any
723 * such threads at the end of the processed list.
724 */
725 head = atomic_swap_ptr(headp, &sysdc_dummy);
726 prevptr = &head;
727 while (*prevptr != &sysdc_dummy) {
728 sysdc_t *const sdc = *prevptr;
729 kthread_t *const t = sdc->sdc_thread;
730
731 /*
732 * If the thread has exited, move its sysdc_t onto
733 * freelist, to be freed later.
734 */
735 if (t == NULL) {
736 *prevptr = sdc->sdc_next;
737 SYSDC_INC_STAT(sysdc_update_exited);
738 sdc->sdc_next = freelist;
739 freelist = sdc;
740 continue;
741 }
742
743 thread_lock(t);
744 if (t->t_cid != sysdccid) {
745 thread_unlock(t);
746 prevptr = &sdc->sdc_next;
747 SYSDC_INC_STAT(sysdc_update_not_sdc);
748 continue;
749 }
750 ASSERT(t->t_cldata == sdc);
751
752 /*
753 * If the thread has been sleeping for longer
754 * than sysdc_prune_interval, make it inactive by
755 * removing it from the list.
756 */
757 if (!(t->t_state & (TS_RUN | TS_ONPROC)) &&
758 sdc->sdc_sleep_updates != 0 &&
759 (sdc->sdc_sleep_updates - sdc->sdc_nupdates) >
760 sysdc_prune_updates) {
761 *prevptr = sdc->sdc_next;
762 SYSDC_INC_STAT(sysdc_update_idle);
763 sdc->sdc_next = NULL;
764 thread_unlock(t);
765 continue;
766 }
767 sysdc_update_pri(sdc, SDC_UPDATE_TIMEOUT);
768 thread_unlock(t);
769
770 prevptr = &sdc->sdc_next;
771 }
772
773 /*
774 * Add our list to the bucket, putting any new entries
775 * added while we were working at the tail of the list.
776 */
777 do {
778 tail = *headp;
779 *prevptr = tail;
780 } while (atomic_cas_ptr(headp, tail, head) != tail);
781
782 mutex_exit(&sdl->sdl_lock);
783 }
784
785 mutex_enter(&sysdc_pset_lock);
786 for (cur = list_head(&sysdc_psets); cur != NULL;
787 cur = list_next(&sysdc_psets, cur)) {
788
789 cur->sdp_vtime_last_interval =
790 diff * cur->sdp_cpupart->cp_ncpus;
791 cur->sdp_DC_last_interval =
792 (cur->sdp_onproc_time * SYSDC_DC_MAX) /
793 cur->sdp_vtime_last_interval;
794
795 if (cur->sdp_should_break > 0) {
796 cur->sdp_should_break--; /* breaking */
797 continue;
798 }
799 if (cur->sdp_dont_break > 0) {
800 cur->sdp_dont_break--; /* waiting before checking */
801 continue;
802 }
803 if (cur->sdp_DC_last_interval > sysdc_max_pset_DC) {
804 cur->sdp_should_break = sysdc_break_updates;
805 cur->sdp_dont_break = sysdc_nobreak_updates;
806 SYSDC_INC_STAT(sysdc_update_take_break);
807 }
808 }
809
810 /*
811 * If there are no sysdc_psets, there can be no threads, so
812 * we can stop doing our timeout. Since we're holding the
813 * sysdc_pset_lock, no new sysdc_psets can come in, which will
814 * prevent anyone from racing with this and dropping our timeout
815 * on the floor.
816 */
817 if (list_is_empty(&sysdc_psets)) {
818 SYSDC_INC_STAT(sysdc_update_no_psets);
819 ASSERT(sysdc_update_timeout_started);
820 sysdc_update_timeout_started = 0;
821
822 redeploy = 0;
823 }
824 mutex_exit(&sysdc_pset_lock);
825
826 while (freelist != NULL) {
827 sysdc_t *cur = freelist;
828 freelist = cur->sdc_next;
829 kmem_free(cur, sizeof (*cur));
830 }
831
832 if (redeploy) {
833 (void) timeout(sysdc_update, arg, sysdc_update_ticks);
834 }
835 }
836
837 static void
sysdc_preempt(kthread_t * t)838 sysdc_preempt(kthread_t *t)
839 {
840 ASSERT(t == curthread);
841 ASSERT(THREAD_LOCK_HELD(t));
842
843 setbackdq(t); /* give others a chance to run */
844 }
845
846 static void
sysdc_tick(kthread_t * t)847 sysdc_tick(kthread_t *t)
848 {
849 sysdc_t *sdc;
850
851 thread_lock(t);
852 if (t->t_cid != sysdccid) {
853 SYSDC_INC_STAT(sysdc_tick_not_sdc);
854 thread_unlock(t);
855 return;
856 }
857 sdc = t->t_cldata;
858 if (t->t_state == TS_ONPROC &&
859 t->t_pri < t->t_disp_queue->disp_maxrunpri) {
860 cpu_surrender(t);
861 }
862
863 if (t->t_state == TS_ONPROC || t->t_state == TS_RUN) {
864 ASSERT(sdc->sdc_sleep_updates == 0);
865 }
866
867 ASSERT(sdc->sdc_ticks != sdc->sdc_update_ticks);
868 sdc->sdc_ticks++;
869 if (sdc->sdc_ticks == sdc->sdc_update_ticks) {
870 SYSDC_INC_STAT(sysdc_tick_quantum_expired);
871 sysdc_update_pri(sdc, SDC_UPDATE_TICK);
872 ASSERT(sdc->sdc_ticks != sdc->sdc_update_ticks);
873 }
874 thread_unlock(t);
875 }
876
877 static void
sysdc_setrun(kthread_t * t)878 sysdc_setrun(kthread_t *t)
879 {
880 sysdc_t *sdc = t->t_cldata;
881
882 ASSERT(THREAD_LOCK_HELD(t)); /* t should be in transition */
883
884 sdc->sdc_sleep_updates = 0;
885
886 if (sdc->sdc_next == NULL) {
887 /*
888 * Since we're in transition, we don't want to use the
889 * full thread_update_pri().
890 */
891 if (sysdc_compute_pri(sdc, 0)) {
892 THREAD_CHANGE_PRI(t, sdc->sdc_epri);
893 }
894 sysdc_activate(sdc);
895
896 ASSERT(sdc->sdc_next != NULL);
897 }
898
899 setbackdq(t);
900 }
901
902 static void
sysdc_wakeup(kthread_t * t)903 sysdc_wakeup(kthread_t *t)
904 {
905 sysdc_setrun(t);
906 }
907
908 static void
sysdc_sleep(kthread_t * t)909 sysdc_sleep(kthread_t *t)
910 {
911 sysdc_t *sdc = t->t_cldata;
912
913 ASSERT(THREAD_LOCK_HELD(t)); /* t should be in transition */
914
915 sdc->sdc_sleep_updates = sdc->sdc_nupdates;
916 }
917
918 /*ARGSUSED*/
919 static int
sysdc_enterclass(kthread_t * t,id_t cid,void * parmsp,cred_t * reqpcredp,void * bufp)920 sysdc_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp,
921 void *bufp)
922 {
923 cpupart_t *const cpupart = t->t_cpupart;
924 sysdc_t *sdc = bufp;
925 sysdc_params_t *sdpp = parmsp;
926 sysdc_pset_t *newpset = sdc->sdc_pset;
927 sysdc_pset_t *pset;
928 int start_timeout;
929
930 if (t->t_cid != syscid)
931 return (EPERM);
932
933 ASSERT(ttolwp(t) != NULL);
934 ASSERT(sdpp != NULL);
935 ASSERT(newpset != NULL);
936 ASSERT(sysdc_param_init);
937
938 ASSERT(sdpp->sdp_minpri >= sysdc_minpri);
939 ASSERT(sdpp->sdp_maxpri <= sysdc_maxpri);
940 ASSERT(sdpp->sdp_DC >= sysdc_minDC);
941 ASSERT(sdpp->sdp_DC <= sysdc_maxDC);
942
943 sdc->sdc_thread = t;
944 sdc->sdc_pri = sdpp->sdp_maxpri; /* start off maximally */
945 sdc->sdc_minpri = sdpp->sdp_minpri;
946 sdc->sdc_maxpri = sdpp->sdp_maxpri;
947 sdc->sdc_target_DC = sdpp->sdp_DC;
948 sdc->sdc_ticks = 0;
949 sdc->sdc_update_ticks = sysdc_update_ticks + 1;
950
951 /* Assign ourselves to the appropriate pset. */
952 sdc->sdc_pset = NULL;
953 mutex_enter(&sysdc_pset_lock);
954 for (pset = list_head(&sysdc_psets); pset != NULL;
955 pset = list_next(&sysdc_psets, pset)) {
956 if (pset->sdp_cpupart == cpupart) {
957 break;
958 }
959 }
960 if (pset == NULL) {
961 pset = newpset;
962 newpset = NULL;
963 pset->sdp_cpupart = cpupart;
964 list_insert_tail(&sysdc_psets, pset);
965 }
966 pset->sdp_nthreads++;
967 ASSERT(pset->sdp_nthreads > 0);
968
969 sdc->sdc_pset = pset;
970
971 start_timeout = (sysdc_update_timeout_started == 0);
972 sysdc_update_timeout_started = 1;
973 mutex_exit(&sysdc_pset_lock);
974
975 if (newpset != NULL)
976 kmem_free(newpset, sizeof (*newpset));
977
978 /* Update t's scheduling class and priority. */
979 thread_lock(t);
980 t->t_clfuncs = &(sclass[cid].cl_funcs->thread);
981 t->t_cid = cid;
982 t->t_cldata = sdc;
983 t->t_schedflag |= TS_RUNQMATCH;
984
985 sysdc_update_pri(sdc, SDC_UPDATE_INITIAL);
986 thread_unlock(t);
987
988 /* Kick off the thread timeout if we're the first one in. */
989 if (start_timeout) {
990 (void) timeout(sysdc_update, NULL, sysdc_update_ticks);
991 }
992
993 return (0);
994 }
995
996 static void
sysdc_leave(sysdc_t * sdc)997 sysdc_leave(sysdc_t *sdc)
998 {
999 sysdc_pset_t *sdp = sdc->sdc_pset;
1000 sysdc_list_t *sdl = SYSDC_LIST(sdc);
1001 uint_t freedc;
1002
1003 mutex_enter(&sdl->sdl_lock); /* block sysdc_update() */
1004 sdc->sdc_thread = NULL;
1005 freedc = (sdc->sdc_next == NULL);
1006 mutex_exit(&sdl->sdl_lock);
1007
1008 mutex_enter(&sysdc_pset_lock);
1009 ASSERT(sdp != NULL);
1010 ASSERT(sdp->sdp_nthreads > 0);
1011 --sdp->sdp_nthreads;
1012 if (sdp->sdp_nthreads == 0) {
1013 list_remove(&sysdc_psets, sdp);
1014 } else {
1015 sdp = NULL;
1016 }
1017 mutex_exit(&sysdc_pset_lock);
1018
1019 if (freedc)
1020 kmem_free(sdc, sizeof (*sdc));
1021 if (sdp != NULL)
1022 kmem_free(sdp, sizeof (*sdp));
1023 }
1024
1025 static void
sysdc_exitclass(void * buf)1026 sysdc_exitclass(void *buf)
1027 {
1028 sysdc_leave((sysdc_t *)buf);
1029 }
1030
1031 /*ARGSUSED*/
1032 static int
sysdc_canexit(kthread_t * t,cred_t * reqpcredp)1033 sysdc_canexit(kthread_t *t, cred_t *reqpcredp)
1034 {
1035 /* Threads cannot exit SDC once joined, except in a body bag. */
1036 return (EPERM);
1037 }
1038
1039 static void
sysdc_exit(kthread_t * t)1040 sysdc_exit(kthread_t *t)
1041 {
1042 sysdc_t *sdc;
1043
1044 /* We're exiting, so we just rejoin the SYS class. */
1045 thread_lock(t);
1046 ASSERT(t->t_cid == sysdccid);
1047 sdc = t->t_cldata;
1048 t->t_cid = syscid;
1049 t->t_cldata = NULL;
1050 t->t_clfuncs = &(sclass[syscid].cl_funcs->thread);
1051 (void) thread_change_pri(t, maxclsyspri, 0);
1052 t->t_schedflag &= ~TS_RUNQMATCH;
1053 thread_unlock_nopreempt(t);
1054
1055 /* Unlink the sdc from everything. */
1056 sysdc_leave(sdc);
1057 }
1058
1059 /*ARGSUSED*/
1060 static int
sysdc_fork(kthread_t * t,kthread_t * ct,void * bufp)1061 sysdc_fork(kthread_t *t, kthread_t *ct, void *bufp)
1062 {
1063 /*
1064 * Threads cannot be created with SDC as their class; they must
1065 * be created as SYS and then added with sysdc_thread_enter().
1066 * Because of this restriction, sysdc_fork() should never be called.
1067 */
1068 panic("sysdc cannot be forked");
1069
1070 return (ENOSYS);
1071 }
1072
1073 /*ARGSUSED*/
1074 static void
sysdc_forkret(kthread_t * t,kthread_t * ct)1075 sysdc_forkret(kthread_t *t, kthread_t *ct)
1076 {
1077 /* SDC threads are part of system processes, which never fork. */
1078 panic("sysdc cannot be forked");
1079 }
1080
1081 static pri_t
sysdc_globpri(kthread_t * t)1082 sysdc_globpri(kthread_t *t)
1083 {
1084 return (t->t_epri);
1085 }
1086
1087 /*ARGSUSED*/
1088 static pri_t
sysdc_no_swap(kthread_t * t,int flags)1089 sysdc_no_swap(kthread_t *t, int flags)
1090 {
1091 /* SDC threads cannot be swapped. */
1092 return (-1);
1093 }
1094
1095 /*
1096 * Get maximum and minimum priorities enjoyed by SDC threads.
1097 */
1098 static int
sysdc_getclpri(pcpri_t * pcprip)1099 sysdc_getclpri(pcpri_t *pcprip)
1100 {
1101 pcprip->pc_clpmax = sysdc_maxpri;
1102 pcprip->pc_clpmin = sysdc_minpri;
1103 return (0);
1104 }
1105
1106 /*ARGSUSED*/
1107 static int
sysdc_getclinfo(void * arg)1108 sysdc_getclinfo(void *arg)
1109 {
1110 return (0); /* no class-specific info */
1111 }
1112
1113 /*ARGSUSED*/
1114 static int
sysdc_alloc(void ** p,int flag)1115 sysdc_alloc(void **p, int flag)
1116 {
1117 sysdc_t *new;
1118
1119 *p = NULL;
1120 if ((new = kmem_zalloc(sizeof (*new), flag)) == NULL) {
1121 return (ENOMEM);
1122 }
1123 if ((new->sdc_pset = kmem_zalloc(sizeof (*new->sdc_pset), flag)) ==
1124 NULL) {
1125 kmem_free(new, sizeof (*new));
1126 return (ENOMEM);
1127 }
1128 *p = new;
1129 return (0);
1130 }
1131
1132 static void
sysdc_free(void * p)1133 sysdc_free(void *p)
1134 {
1135 sysdc_t *sdc = p;
1136
1137 if (sdc != NULL) {
1138 /*
1139 * We must have failed CL_ENTERCLASS(), so our pset should be
1140 * there and unused.
1141 */
1142 ASSERT(sdc->sdc_pset != NULL);
1143 ASSERT(sdc->sdc_pset->sdp_cpupart == NULL);
1144 kmem_free(sdc->sdc_pset, sizeof (*sdc->sdc_pset));
1145 kmem_free(sdc, sizeof (*sdc));
1146 }
1147 }
1148
1149 static int sysdc_enosys(); /* Boy, ANSI-C's K&R compatibility is weird. */
1150 static int sysdc_einval();
1151 static void sysdc_nullsys();
1152
1153 static struct classfuncs sysdc_classfuncs = {
1154 /* messages to class manager */
1155 {
1156 sysdc_enosys, /* admin */
1157 sysdc_getclinfo,
1158 sysdc_enosys, /* parmsin */
1159 sysdc_enosys, /* parmsout */
1160 sysdc_enosys, /* vaparmsin */
1161 sysdc_enosys, /* vaparmsout */
1162 sysdc_getclpri,
1163 sysdc_alloc,
1164 sysdc_free,
1165 },
1166 /* operations on threads */
1167 {
1168 sysdc_enterclass,
1169 sysdc_exitclass,
1170 sysdc_canexit,
1171 sysdc_fork,
1172 sysdc_forkret,
1173 sysdc_nullsys, /* parmsget */
1174 sysdc_enosys, /* parmsset */
1175 sysdc_nullsys, /* stop */
1176 sysdc_exit,
1177 sysdc_nullsys, /* active */
1178 sysdc_nullsys, /* inactive */
1179 sysdc_no_swap, /* swapin */
1180 sysdc_no_swap, /* swapout */
1181 sysdc_nullsys, /* trapret */
1182 sysdc_preempt,
1183 sysdc_setrun,
1184 sysdc_sleep,
1185 sysdc_tick,
1186 sysdc_wakeup,
1187 sysdc_einval, /* donice */
1188 sysdc_globpri,
1189 sysdc_nullsys, /* set_process_group */
1190 sysdc_nullsys, /* yield */
1191 sysdc_einval, /* doprio */
1192 }
1193 };
1194
1195 static int
sysdc_enosys()1196 sysdc_enosys()
1197 {
1198 return (ENOSYS);
1199 }
1200
1201 static int
sysdc_einval()1202 sysdc_einval()
1203 {
1204 return (EINVAL);
1205 }
1206
1207 static void
sysdc_nullsys()1208 sysdc_nullsys()
1209 {
1210 }
1211
1212 /*ARGSUSED*/
1213 static pri_t
sysdc_init(id_t cid,int clparmsz,classfuncs_t ** clfuncspp)1214 sysdc_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp)
1215 {
1216 int idx;
1217
1218 list_create(&sysdc_psets, sizeof (sysdc_pset_t),
1219 offsetof(sysdc_pset_t, sdp_node));
1220
1221 for (idx = 0; idx < SYSDC_NLISTS; idx++) {
1222 sysdc_active[idx].sdl_list = &sysdc_dummy;
1223 }
1224
1225 sysdc_initparam();
1226
1227 sysdccid = cid;
1228 *clfuncspp = &sysdc_classfuncs;
1229
1230 return ((pri_t)v.v_maxsyspri);
1231 }
1232
1233 static struct sclass csw = {
1234 "SDC",
1235 sysdc_init,
1236 0
1237 };
1238
1239 static struct modlsched modlsched = {
1240 &mod_schedops, "system duty cycle scheduling class", &csw
1241 };
1242
1243 static struct modlinkage modlinkage = {
1244 MODREV_1, (void *)&modlsched, NULL
1245 };
1246
1247 int
_init()1248 _init()
1249 {
1250 return (mod_install(&modlinkage));
1251 }
1252
1253 int
_fini()1254 _fini()
1255 {
1256 return (EBUSY); /* can't unload for now */
1257 }
1258
1259 int
_info(struct modinfo * modinfop)1260 _info(struct modinfo *modinfop)
1261 {
1262 return (mod_info(&modlinkage, modinfop));
1263 }
1264
1265 /* --- consolidation-private interfaces --- */
1266 void
sysdc_thread_enter(kthread_t * t,uint_t dc,uint_t flags)1267 sysdc_thread_enter(kthread_t *t, uint_t dc, uint_t flags)
1268 {
1269 void *buf = NULL;
1270 sysdc_params_t sdp;
1271
1272 SYSDC_INC_STAT(sysdc_thread_enter_enter);
1273
1274 ASSERT(sysdc_param_init);
1275 ASSERT(sysdccid >= 0);
1276
1277 ASSERT((flags & ~SYSDC_THREAD_BATCH) == 0);
1278
1279 sdp.sdp_minpri = sysdc_minpri;
1280 sdp.sdp_maxpri = sysdc_maxpri;
1281 sdp.sdp_DC = MAX(MIN(dc, sysdc_maxDC), sysdc_minDC);
1282
1283 VERIFY0(CL_ALLOC(&buf, sysdccid, KM_SLEEP));
1284
1285 ASSERT(t->t_lwp != NULL);
1286 ASSERT(t->t_cid == syscid);
1287 ASSERT(t->t_cldata == NULL);
1288 VERIFY0(CL_CANEXIT(t, NULL));
1289 VERIFY0(CL_ENTERCLASS(t, sysdccid, &sdp, kcred, buf));
1290 CL_EXITCLASS(syscid, NULL);
1291 }
1292