xref: /illumos-gate/usr/src/uts/common/os/clock_tick.c (revision 3d393ee6c37fa10ac512ed6d36109ad616dc7c1a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/thread.h>
30 #include <sys/proc.h>
31 #include <sys/task.h>
32 #include <sys/cmn_err.h>
33 #include <sys/class.h>
34 #include <sys/sdt.h>
35 #include <sys/atomic.h>
36 #include <sys/cpu.h>
37 #include <sys/clock_tick.h>
38 #include <sys/sysmacros.h>
39 #include <vm/rm.h>
40 
41 /*
42  * This file contains the implementation of clock tick accounting for threads.
43  * Every tick, user threads running on various CPUs are located and charged
44  * with a tick to account for their use of CPU time.
45  *
46  * Every tick, the clock() handler calls clock_tick_schedule() to perform tick
47  * accounting for all the threads in the system. Tick accounting is done in
48  * two phases:
49  *
50  * Tick scheduling	Done in clock_tick_schedule(). In this phase, cross
51  *			calls are scheduled to multiple CPUs to perform
52  *			multi-threaded tick accounting. The CPUs are chosen
53  *			on a rotational basis so as to distribute the tick
54  *			accounting load evenly across all CPUs.
55  *
56  * Tick execution	Done in clock_tick_execute(). In this phase, tick
57  *			accounting is actually performed by softint handlers
58  *			on multiple CPUs.
59  *
60  * This implementation gives us a multi-threaded tick processing facility that
61  * is suitable for configurations with a large number of CPUs. On smaller
62  * configurations it may be desirable to let the processing be single-threaded
63  * and just allow clock() to do it as it has been done traditionally. To
64  * facilitate this, a variable, clock_tick_threshold, is defined. Platforms
65  * that desire multi-threading should set this variable to something
66  * appropriate. A recommended value may be found in clock_tick.h. At boot time,
67  * if the number of CPUs is greater than clock_tick_threshold, multi-threading
68  * kicks in. Note that this is a decision made at boot time. If more CPUs
69  * are dynamically added later on to exceed the threshold, no attempt is made
70  * to switch to multi-threaded. Similarly, if CPUs are removed dynamically
71  * no attempt is made to switch to single-threaded. This is to keep the
72  * implementation simple. Also note that the threshold can be changed for a
73  * specific customer configuration via /etc/system.
74  *
75  * The boot time decision is reflected in clock_tick_single_threaded.
76  */
77 
78 /*
79  * clock_tick_threshold
80  *	If the number of CPUs at boot time exceeds this threshold,
81  *	multi-threaded tick accounting kicks in.
82  *
83  * clock_tick_ncpus
84  *	The number of CPUs in a set. Each set is scheduled for tick execution
85  *	on a separate processor.
86  *
87  * clock_tick_single_threaded
88  *	Indicates whether or not tick accounting is single threaded.
89  *
90  * clock_tick_total_cpus
91  *	Total number of online CPUs.
92  *
93  * clock_tick_cpus
94  *	Array of online CPU pointers.
95  *
96  * clock_tick_cpu
97  *	Per-CPU, cache-aligned data structures to facilitate multi-threading.
98  *
99  * clock_tick_active
100  *	Counter that indicates the number of active tick processing softints
101  *	in the system.
102  *
103  * clock_tick_pending
104  *	Number of pending ticks that need to be accounted by the softint
105  *	handlers.
106  *
107  * clock_tick_lock
108  *	Mutex to synchronize between clock_tick_schedule() and
109  *	CPU online/offline.
110  *
111  * clock_cpu_id
112  *	CPU id of the clock() CPU. Used to detect when the clock CPU
113  *	is offlined.
114  *
115  * clock_tick_online_cpuset
116  *	CPU set of all online processors that can be X-called.
117  *
118  * clock_tick_proc_max
119  *	Each process is allowed to accumulate a few ticks before checking
120  *	for the task CPU time resource limit. We lower the number of calls
121  *	to rctl_test() to make tick accounting more scalable. The tradeoff
122  *	is that the limit may not get enforced in a timely manner. This is
123  *	typically not a problem.
124  *
125  * clock_tick_set
126  *	Per-set structures. Each structure contains the range of CPUs
127  *	to be processed for the set.
128  *
129  * clock_tick_nsets;
130  *	Number of sets.
131  *
132  * clock_tick_scan
133  *	Where to begin the scan for single-threaded mode. In multi-threaded,
134  *	the clock_tick_set itself contains a field for this.
135  */
136 int			clock_tick_threshold;
137 int			clock_tick_ncpus;
138 int			clock_tick_single_threaded;
139 int			clock_tick_total_cpus;
140 cpu_t			*clock_tick_cpus[NCPU];
141 clock_tick_cpu_t	*clock_tick_cpu[NCPU];
142 ulong_t			clock_tick_active;
143 int			clock_tick_pending;
144 kmutex_t		clock_tick_lock;
145 processorid_t		clock_cpu_id;
146 cpuset_t		clock_tick_online_cpuset;
147 clock_t			clock_tick_proc_max;
148 clock_tick_set_t	*clock_tick_set;
149 int			clock_tick_nsets;
150 int			clock_tick_scan;
151 
152 static uint_t	clock_tick_execute(caddr_t, caddr_t);
153 static void	clock_tick_execute_common(int, int, int, clock_t, int);
154 
155 #define	CLOCK_TICK_ALIGN	64	/* cache alignment */
156 
157 /*
158  * Clock tick initialization is done in two phases:
159  *
160  * 1. Before clock_init() is called, clock_tick_init_pre() is called to set
161  *    up single-threading so the clock() can begin to do its job.
162  *
163  * 2. After the slave CPUs are initialized at boot time, we know the number
164  *    of CPUs. clock_tick_init_post() is called to set up multi-threading if
165  *    required.
166  */
167 void
168 clock_tick_init_pre(void)
169 {
170 	clock_tick_cpu_t	*ctp;
171 	int			i, n;
172 	clock_tick_set_t	*csp;
173 	uintptr_t		buf;
174 	size_t			size;
175 
176 	clock_tick_single_threaded = 1;
177 
178 	size = P2ROUNDUP(sizeof (clock_tick_cpu_t), CLOCK_TICK_ALIGN);
179 	buf = (uintptr_t)kmem_zalloc(size * NCPU + CLOCK_TICK_ALIGN, KM_SLEEP);
180 	buf = P2ROUNDUP(buf, CLOCK_TICK_ALIGN);
181 
182 	/*
183 	 * Perform initialization in case multi-threading is chosen later.
184 	 */
185 	for (i = 0; i < NCPU; i++, buf += size) {
186 		ctp = (clock_tick_cpu_t *)buf;
187 		clock_tick_cpu[i] = ctp;
188 		mutex_init(&ctp->ct_lock, NULL, MUTEX_DEFAULT, NULL);
189 		if (&create_softint != NULL) {
190 			ctp->ct_intr = create_softint(LOCK_LEVEL,
191 			    clock_tick_execute, (caddr_t)ctp);
192 		}
193 		ctp->ct_pending = 0;
194 	}
195 
196 	mutex_init(&clock_tick_lock, NULL, MUTEX_DEFAULT, NULL);
197 
198 	/*
199 	 * Compute clock_tick_ncpus here. We need it to compute the
200 	 * maximum number of tick sets we need to support.
201 	 */
202 	ASSERT(clock_tick_ncpus >= 0);
203 	if (clock_tick_ncpus == 0)
204 		clock_tick_ncpus = CLOCK_TICK_NCPUS;
205 	if (clock_tick_ncpus > max_ncpus)
206 		clock_tick_ncpus = max_ncpus;
207 
208 	/*
209 	 * Allocate and initialize the tick sets.
210 	 */
211 	n = (max_ncpus + clock_tick_ncpus - 1)/clock_tick_ncpus;
212 	clock_tick_set = kmem_zalloc(sizeof (clock_tick_set_t) * n, KM_SLEEP);
213 	for (i = 0; i < n; i++) {
214 		csp = &clock_tick_set[i];
215 		csp->ct_start = i * clock_tick_ncpus;
216 		csp->ct_scan = csp->ct_start;
217 		csp->ct_end = csp->ct_start;
218 	}
219 }
220 
221 void
222 clock_tick_init_post(void)
223 {
224 	/*
225 	 * If a platform does not provide create_softint() and invoke_softint(),
226 	 * then we assume single threaded.
227 	 */
228 	if (&invoke_softint == NULL)
229 		clock_tick_threshold = 0;
230 
231 	ASSERT(clock_tick_threshold >= 0);
232 
233 	if (clock_tick_threshold == 0)
234 		clock_tick_threshold = max_ncpus;
235 
236 	/*
237 	 * If a platform does not specify a threshold or if the number of CPUs
238 	 * at boot time does not exceed the threshold, tick accounting remains
239 	 * single-threaded.
240 	 */
241 	if (ncpus <= clock_tick_threshold) {
242 		clock_tick_ncpus = max_ncpus;
243 		clock_tick_proc_max = 1;
244 		return;
245 	}
246 
247 	/*
248 	 * OK. Multi-thread tick processing. If a platform has not specified
249 	 * the CPU set size for multi-threading, then use the default value.
250 	 * This value has been arrived through measurements on large
251 	 * configuration systems.
252 	 */
253 	clock_tick_single_threaded = 0;
254 	if (clock_tick_proc_max == 0) {
255 		clock_tick_proc_max = CLOCK_TICK_PROC_MAX;
256 		if (hires_tick)
257 			clock_tick_proc_max *= 10;
258 	}
259 }
260 
261 static void
262 clock_tick_schedule_one(clock_tick_set_t *csp, int pending, processorid_t cid)
263 {
264 	clock_tick_cpu_t	*ctp;
265 
266 	ASSERT(&invoke_softint != NULL);
267 	/*
268 	 * Schedule tick accounting for a set of CPUs.
269 	 */
270 	ctp = clock_tick_cpu[cid];
271 	mutex_enter(&ctp->ct_lock);
272 	ctp->ct_lbolt = lbolt;
273 	ctp->ct_pending += pending;
274 	ctp->ct_start = csp->ct_start;
275 	ctp->ct_end = csp->ct_end;
276 	ctp->ct_scan = csp->ct_scan;
277 	mutex_exit(&ctp->ct_lock);
278 
279 	invoke_softint(cid, ctp->ct_intr);
280 	/*
281 	 * Return without waiting for the softint to finish.
282 	 */
283 }
284 
285 static void
286 clock_tick_process(cpu_t *cp, clock_t mylbolt, int pending)
287 {
288 	kthread_t	*t;
289 	kmutex_t	*plockp;
290 	int		notick, intr;
291 	klwp_id_t	lwp;
292 
293 	/*
294 	 * The locking here is rather tricky. thread_free_prevent()
295 	 * prevents the thread returned from being freed while we
296 	 * are looking at it. We can then check if the thread
297 	 * is exiting and get the appropriate p_lock if it
298 	 * is not.  We have to be careful, though, because
299 	 * the _process_ can still be freed while we've
300 	 * prevented thread free.  To avoid touching the
301 	 * proc structure we put a pointer to the p_lock in the
302 	 * thread structure.  The p_lock is persistent so we
303 	 * can acquire it even if the process is gone.  At that
304 	 * point we can check (again) if the thread is exiting
305 	 * and either drop the lock or do the tick processing.
306 	 */
307 	t = cp->cpu_thread;	/* Current running thread */
308 	if (CPU == cp) {
309 		/*
310 		 * 't' will be the tick processing thread on this
311 		 * CPU.  Use the pinned thread (if any) on this CPU
312 		 * as the target of the clock tick.
313 		 */
314 		if (t->t_intr != NULL)
315 			t = t->t_intr;
316 	}
317 
318 	/*
319 	 * We use thread_free_prevent to keep the currently running
320 	 * thread from being freed or recycled while we're
321 	 * looking at it.
322 	 */
323 	thread_free_prevent(t);
324 	/*
325 	 * We cannot hold the cpu_lock to prevent the
326 	 * cpu_active from changing in the clock interrupt.
327 	 * As long as we don't block (or don't get pre-empted)
328 	 * the cpu_list will not change (all threads are paused
329 	 * before list modification).
330 	 */
331 	if (CLOCK_TICK_CPU_OFFLINE(cp)) {
332 		thread_free_allow(t);
333 		return;
334 	}
335 
336 	/*
337 	 * Make sure the thread is still on the CPU.
338 	 */
339 	if ((t != cp->cpu_thread) &&
340 	    ((cp != CPU) || (t != cp->cpu_thread->t_intr))) {
341 		/*
342 		 * We could not locate the thread. Skip this CPU. Race
343 		 * conditions while performing these checks are benign.
344 		 * These checks are not perfect and they don't need
345 		 * to be.
346 		 */
347 		thread_free_allow(t);
348 		return;
349 	}
350 
351 	intr = t->t_flag & T_INTR_THREAD;
352 	lwp = ttolwp(t);
353 	if (lwp == NULL || (t->t_proc_flag & TP_LWPEXIT) || intr) {
354 		/*
355 		 * Thread is exiting (or uninteresting) so don't
356 		 * do tick processing.
357 		 */
358 		thread_free_allow(t);
359 		return;
360 	}
361 
362 	/*
363 	 * OK, try to grab the process lock.  See
364 	 * comments above for why we're not using
365 	 * ttoproc(t)->p_lockp here.
366 	 */
367 	plockp = t->t_plockp;
368 	mutex_enter(plockp);
369 	/* See above comment. */
370 	if (CLOCK_TICK_CPU_OFFLINE(cp)) {
371 		mutex_exit(plockp);
372 		thread_free_allow(t);
373 		return;
374 	}
375 
376 	/*
377 	 * The thread may have exited between when we
378 	 * checked above, and when we got the p_lock.
379 	 */
380 	if (t->t_proc_flag & TP_LWPEXIT) {
381 		mutex_exit(plockp);
382 		thread_free_allow(t);
383 		return;
384 	}
385 
386 	/*
387 	 * Either we have the p_lock for the thread's process,
388 	 * or we don't care about the thread structure any more.
389 	 * Either way we can allow thread free.
390 	 */
391 	thread_free_allow(t);
392 
393 	/*
394 	 * If we haven't done tick processing for this
395 	 * lwp, then do it now. Since we don't hold the
396 	 * lwp down on a CPU it can migrate and show up
397 	 * more than once, hence the lbolt check. mylbolt
398 	 * is copied at the time of tick scheduling to prevent
399 	 * lbolt mismatches.
400 	 *
401 	 * Also, make sure that it's okay to perform the
402 	 * tick processing before calling clock_tick.
403 	 * Setting notick to a TRUE value (ie. not 0)
404 	 * results in tick processing not being performed for
405 	 * that thread.
406 	 */
407 	notick = ((cp->cpu_flags & CPU_QUIESCED) || CPU_ON_INTR(cp) ||
408 	    (cp->cpu_dispthread == cp->cpu_idle_thread));
409 
410 	if ((!notick) && (t->t_lbolt < mylbolt)) {
411 		t->t_lbolt = mylbolt;
412 		clock_tick(t, pending);
413 	}
414 
415 	mutex_exit(plockp);
416 }
417 
418 void
419 clock_tick_schedule(int one_sec)
420 {
421 	ulong_t			active;
422 	int			i, end;
423 	clock_tick_set_t	*csp;
424 	cpu_t			*cp;
425 
426 	if (clock_cpu_id != CPU->cpu_id)
427 		clock_cpu_id = CPU->cpu_id;
428 
429 	if (clock_tick_single_threaded) {
430 		/*
431 		 * Each tick cycle, start the scan from a different
432 		 * CPU for the sake of fairness.
433 		 */
434 		end = clock_tick_total_cpus;
435 		clock_tick_scan++;
436 		if (clock_tick_scan >= end)
437 			clock_tick_scan = 0;
438 
439 		clock_tick_execute_common(0, clock_tick_scan, end, lbolt, 1);
440 
441 		return;
442 	}
443 
444 	/*
445 	 * If the previous invocation of handlers is not yet finished, then
446 	 * simply increment a pending count and return. Eventually when they
447 	 * finish, the pending count is passed down to the next set of
448 	 * handlers to process. This way, ticks that have already elapsed
449 	 * in the past are handled as quickly as possible to minimize the
450 	 * chances of threads getting away before their pending ticks are
451 	 * accounted. The other benefit is that if the pending count is
452 	 * more than one, it can be handled by a single invocation of
453 	 * clock_tick(). This is a good optimization for large configuration
454 	 * busy systems where tick accounting can get backed up for various
455 	 * reasons.
456 	 */
457 	clock_tick_pending++;
458 
459 	active = clock_tick_active;
460 	active = atomic_cas_ulong(&clock_tick_active, active, active);
461 	if (active)
462 		return;
463 
464 	/*
465 	 * We want to handle the clock CPU here. If we
466 	 * scheduled the accounting for the clock CPU to another
467 	 * processor, that processor will find only the clock() thread
468 	 * running and not account for any user thread below it. Also,
469 	 * we want to handle this before we block on anything and allow
470 	 * the pinned thread below the current thread to escape.
471 	 */
472 	clock_tick_process(CPU, lbolt, clock_tick_pending);
473 
474 	mutex_enter(&clock_tick_lock);
475 
476 	/*
477 	 * Schedule each set on a separate processor.
478 	 */
479 	cp = clock_cpu_list;
480 	for (i = 0; i < clock_tick_nsets; i++) {
481 		csp = &clock_tick_set[i];
482 
483 		/*
484 		 * Pick the next online CPU in list for scheduling tick
485 		 * accounting. The clock_tick_lock is held by the caller.
486 		 * So, CPU online/offline cannot muck with this while
487 		 * we are picking our CPU to X-call.
488 		 */
489 		if (cp == CPU)
490 			cp = cp->cpu_next_onln;
491 
492 		/*
493 		 * Each tick cycle, start the scan from a different
494 		 * CPU for the sake of fairness.
495 		 */
496 		csp->ct_scan++;
497 		if (csp->ct_scan >= csp->ct_end)
498 			csp->ct_scan = csp->ct_start;
499 
500 		clock_tick_schedule_one(csp, clock_tick_pending, cp->cpu_id);
501 
502 		cp = cp->cpu_next_onln;
503 	}
504 
505 	if (one_sec) {
506 		/*
507 		 * Move the CPU pointer around every second. This is so
508 		 * all the CPUs can be X-called in a round-robin fashion
509 		 * to evenly distribute the X-calls. We don't do this
510 		 * at a faster rate than this because we don't want
511 		 * to affect cache performance negatively.
512 		 */
513 		clock_cpu_list = clock_cpu_list->cpu_next_onln;
514 	}
515 
516 	mutex_exit(&clock_tick_lock);
517 
518 	clock_tick_pending = 0;
519 }
520 
521 static void
522 clock_tick_execute_common(int start, int scan, int end, clock_t mylbolt,
523 	int pending)
524 {
525 	cpu_t		*cp;
526 	int		i;
527 
528 	ASSERT((start <= scan) && (scan <= end));
529 
530 	/*
531 	 * Handle the thread on current CPU first. This is to prevent a
532 	 * pinned thread from escaping if we ever block on something.
533 	 * Note that in the single-threaded mode, this handles the clock
534 	 * CPU.
535 	 */
536 	clock_tick_process(CPU, mylbolt, pending);
537 
538 	/*
539 	 * Perform tick accounting for the threads running on
540 	 * the scheduled CPUs.
541 	 */
542 	for (i = scan; i < end; i++) {
543 		cp = clock_tick_cpus[i];
544 		if ((cp == NULL) || (cp == CPU) || (cp->cpu_id == clock_cpu_id))
545 			continue;
546 		clock_tick_process(cp, mylbolt, pending);
547 	}
548 
549 	for (i = start; i < scan; i++) {
550 		cp = clock_tick_cpus[i];
551 		if ((cp == NULL) || (cp == CPU) || (cp->cpu_id == clock_cpu_id))
552 			continue;
553 		clock_tick_process(cp, mylbolt, pending);
554 	}
555 }
556 
557 /*ARGSUSED*/
558 static uint_t
559 clock_tick_execute(caddr_t arg1, caddr_t arg2)
560 {
561 	clock_tick_cpu_t	*ctp;
562 	int			start, scan, end, pending;
563 	clock_t			mylbolt;
564 
565 	/*
566 	 * We could have raced with cpu offline. We don't want to
567 	 * process anything on an offlined CPU. If we got blocked
568 	 * on anything, we may not get scheduled when we wakeup
569 	 * later on.
570 	 */
571 	if (!CLOCK_TICK_XCALL_SAFE(CPU))
572 		return (1);
573 
574 	atomic_inc_ulong(&clock_tick_active);
575 
576 	ctp = (clock_tick_cpu_t *)arg1;
577 	mutex_enter(&ctp->ct_lock);
578 	pending = ctp->ct_pending;
579 	if (pending == 0) {
580 		/*
581 		 * If a CPU is busy at LOCK_LEVEL, then an invocation
582 		 * of this softint may be queued for some time. In that case,
583 		 * clock_tick_active will not be incremented.
584 		 * clock_tick_schedule() will then assume that the previous
585 		 * invocation is done and post a new softint. The first one
586 		 * that gets in will reset the pending count so the
587 		 * second one is a noop.
588 		 */
589 		mutex_exit(&ctp->ct_lock);
590 		goto out;
591 	}
592 	ctp->ct_pending = 0;
593 	start = ctp->ct_start;
594 	end = ctp->ct_end;
595 	scan = ctp->ct_scan;
596 	mylbolt = ctp->ct_lbolt;
597 	mutex_exit(&ctp->ct_lock);
598 
599 	clock_tick_execute_common(start, scan, end, mylbolt, pending);
600 
601 out:
602 	/*
603 	 * Signal completion to the clock handler.
604 	 */
605 	atomic_dec_ulong(&clock_tick_active);
606 
607 	return (1);
608 }
609 
610 /*ARGSUSED*/
611 static int
612 clock_tick_cpu_setup(cpu_setup_t what, int cid, void *arg)
613 {
614 	cpu_t			*cp, *ncp;
615 	int			i, set;
616 	clock_tick_set_t	*csp;
617 
618 	/*
619 	 * This function performs some computations at CPU offline/online
620 	 * time. The computed values are used during tick scheduling and
621 	 * execution phases. This avoids having to compute things on
622 	 * an every tick basis. The other benefit is that we perform the
623 	 * computations only for onlined CPUs (not offlined ones). As a
624 	 * result, no tick processing is attempted for offlined CPUs.
625 	 *
626 	 * Also, cpu_offline() calls this function before checking for
627 	 * active interrupt threads. This allows us to avoid posting
628 	 * cross calls to CPUs that are being offlined.
629 	 */
630 
631 	cp = cpu[cid];
632 
633 	mutex_enter(&clock_tick_lock);
634 
635 	switch (what) {
636 	case CPU_ON:
637 		clock_tick_cpus[clock_tick_total_cpus] = cp;
638 		set = clock_tick_total_cpus / clock_tick_ncpus;
639 		csp = &clock_tick_set[set];
640 		csp->ct_end++;
641 		clock_tick_total_cpus++;
642 		clock_tick_nsets =
643 		    (clock_tick_total_cpus + clock_tick_ncpus - 1) /
644 		    clock_tick_ncpus;
645 		CPUSET_ADD(clock_tick_online_cpuset, cp->cpu_id);
646 		membar_sync();
647 		break;
648 
649 	case CPU_OFF:
650 		if (&sync_softint != NULL)
651 			sync_softint(clock_tick_online_cpuset);
652 		CPUSET_DEL(clock_tick_online_cpuset, cp->cpu_id);
653 		clock_tick_total_cpus--;
654 		clock_tick_cpus[clock_tick_total_cpus] = NULL;
655 		clock_tick_nsets =
656 		    (clock_tick_total_cpus + clock_tick_ncpus - 1) /
657 		    clock_tick_ncpus;
658 		set = clock_tick_total_cpus / clock_tick_ncpus;
659 		csp = &clock_tick_set[set];
660 		csp->ct_end--;
661 
662 		i = 0;
663 		ncp = cpu_active;
664 		do {
665 			if (cp == ncp)
666 				continue;
667 			clock_tick_cpus[i] = ncp;
668 			i++;
669 		} while ((ncp = ncp->cpu_next_onln) != cpu_active);
670 		ASSERT(i == clock_tick_total_cpus);
671 		membar_sync();
672 		break;
673 
674 	default:
675 		break;
676 	}
677 
678 	mutex_exit(&clock_tick_lock);
679 
680 	return (0);
681 }
682 
683 
684 void
685 clock_tick_mp_init(void)
686 {
687 	cpu_t	*cp;
688 
689 	mutex_enter(&cpu_lock);
690 
691 	cp = cpu_active;
692 	do {
693 		(void) clock_tick_cpu_setup(CPU_ON, cp->cpu_id, NULL);
694 	} while ((cp = cp->cpu_next_onln) != cpu_active);
695 
696 	register_cpu_setup_func(clock_tick_cpu_setup, NULL);
697 
698 	mutex_exit(&cpu_lock);
699 }
700