xref: /illumos-gate/usr/src/uts/common/os/clock_tick.c (revision 3fe455549728ac525df3be56130ad8e075d645d7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/thread.h>
28 #include <sys/proc.h>
29 #include <sys/task.h>
30 #include <sys/cmn_err.h>
31 #include <sys/class.h>
32 #include <sys/sdt.h>
33 #include <sys/atomic.h>
34 #include <sys/cpu.h>
35 #include <sys/clock_tick.h>
36 #include <sys/clock_impl.h>
37 #include <sys/sysmacros.h>
38 #include <vm/rm.h>
39 
40 /*
41  * This file contains the implementation of clock tick accounting for threads.
42  * Every tick, user threads running on various CPUs are located and charged
43  * with a tick to account for their use of CPU time.
44  *
45  * Every tick, the clock() handler calls clock_tick_schedule() to perform tick
46  * accounting for all the threads in the system. Tick accounting is done in
47  * two phases:
48  *
49  * Tick scheduling	Done in clock_tick_schedule(). In this phase, cross
50  *			calls are scheduled to multiple CPUs to perform
51  *			multi-threaded tick accounting. The CPUs are chosen
52  *			on a rotational basis so as to distribute the tick
53  *			accounting load evenly across all CPUs.
54  *
55  * Tick execution	Done in clock_tick_execute(). In this phase, tick
56  *			accounting is actually performed by softint handlers
57  *			on multiple CPUs.
58  *
59  * This implementation gives us a multi-threaded tick processing facility that
60  * is suitable for configurations with a large number of CPUs. On smaller
61  * configurations it may be desirable to let the processing be single-threaded
62  * and just allow clock() to do it as it has been done traditionally. To
63  * facilitate this, a variable, clock_tick_threshold, is defined. Platforms
64  * that desire multi-threading should set this variable to something
65  * appropriate. A recommended value may be found in clock_tick.h. At boot time,
66  * if the number of CPUs is greater than clock_tick_threshold, multi-threading
67  * kicks in. Note that this is a decision made at boot time. If more CPUs
68  * are dynamically added later on to exceed the threshold, no attempt is made
69  * to switch to multi-threaded. Similarly, if CPUs are removed dynamically
70  * no attempt is made to switch to single-threaded. This is to keep the
71  * implementation simple. Also note that the threshold can be changed for a
72  * specific customer configuration via /etc/system.
73  *
74  * The boot time decision is reflected in clock_tick_single_threaded.
75  */
76 
77 /*
78  * clock_tick_threshold
79  *	If the number of CPUs at boot time exceeds this threshold,
80  *	multi-threaded tick accounting kicks in.
81  *
82  * clock_tick_ncpus
83  *	The number of CPUs in a set. Each set is scheduled for tick execution
84  *	on a separate processor.
85  *
86  * clock_tick_single_threaded
87  *	Indicates whether or not tick accounting is single threaded.
88  *
89  * clock_tick_total_cpus
90  *	Total number of online CPUs.
91  *
92  * clock_tick_cpus
93  *	Array of online CPU pointers.
94  *
95  * clock_tick_cpu
96  *	Per-CPU, cache-aligned data structures to facilitate multi-threading.
97  *
98  * clock_tick_active
99  *	Counter that indicates the number of active tick processing softints
100  *	in the system.
101  *
102  * clock_tick_pending
103  *	Number of pending ticks that need to be accounted by the softint
104  *	handlers.
105  *
106  * clock_tick_lock
107  *	Mutex to synchronize between clock_tick_schedule() and
108  *	CPU online/offline.
109  *
110  * clock_cpu_id
111  *	CPU id of the clock() CPU. Used to detect when the clock CPU
112  *	is offlined.
113  *
114  * clock_tick_online_cpuset
115  *	CPU set of all online processors that can be X-called.
116  *
117  * clock_tick_proc_max
118  *	Each process is allowed to accumulate a few ticks before checking
119  *	for the task CPU time resource limit. We lower the number of calls
120  *	to rctl_test() to make tick accounting more scalable. The tradeoff
121  *	is that the limit may not get enforced in a timely manner. This is
122  *	typically not a problem.
123  *
124  * clock_tick_set
125  *	Per-set structures. Each structure contains the range of CPUs
126  *	to be processed for the set.
127  *
128  * clock_tick_nsets;
129  *	Number of sets.
130  *
131  * clock_tick_scan
132  *	Where to begin the scan for single-threaded mode. In multi-threaded,
133  *	the clock_tick_set itself contains a field for this.
134  */
135 int			clock_tick_threshold;
136 int			clock_tick_ncpus;
137 int			clock_tick_single_threaded;
138 int			clock_tick_total_cpus;
139 cpu_t			*clock_tick_cpus[NCPU];
140 clock_tick_cpu_t	*clock_tick_cpu[NCPU];
141 ulong_t			clock_tick_active;
142 int			clock_tick_pending;
143 kmutex_t		clock_tick_lock;
144 processorid_t		clock_cpu_id;
145 cpuset_t		clock_tick_online_cpuset;
146 clock_t			clock_tick_proc_max;
147 clock_tick_set_t	*clock_tick_set;
148 int			clock_tick_nsets;
149 int			clock_tick_scan;
150 ulong_t			clock_tick_intr;
151 
152 static uint_t	clock_tick_execute(caddr_t, caddr_t);
153 static void	clock_tick_execute_common(int, int, int, clock_t, int);
154 
155 /*
156  * Clock tick initialization is done in two phases:
157  *
158  * 1. Before clock_init() is called, clock_tick_init_pre() is called to set
159  *    up single-threading so the clock() can begin to do its job.
160  *
161  * 2. After the slave CPUs are initialized at boot time, we know the number
162  *    of CPUs. clock_tick_init_post() is called to set up multi-threading if
163  *    required.
164  */
165 void
166 clock_tick_init_pre(void)
167 {
168 	clock_tick_cpu_t	*ctp;
169 	int			i, n;
170 	clock_tick_set_t	*csp;
171 	uintptr_t		abuf, buf;
172 	size_t			size;
173 
174 	clock_tick_single_threaded = 1;
175 
176 	/*
177 	 * We will not free this memory, but to avoid the false sharing,
178 	 * align to cache line size.
179 	 */
180 	size = P2ROUNDUP(sizeof (clock_tick_cpu_t), _CACHE_LINE_SIZE);
181 	abuf = (uintptr_t)kmem_zalloc(size * NCPU + _CACHE_LINE_SIZE, KM_SLEEP);
182 	buf = P2ROUNDUP(abuf, _CACHE_LINE_SIZE);
183 
184 	/*
185 	 * Perform initialization in case multi-threading is chosen later.
186 	 */
187 	if (&create_softint != NULL) {
188 		clock_tick_intr = create_softint(LOCK_LEVEL,
189 		    clock_tick_execute, (caddr_t)NULL);
190 	}
191 	for (i = 0; i < NCPU; i++, buf += size) {
192 		ctp = (clock_tick_cpu_t *)buf;
193 		clock_tick_cpu[i] = ctp;
194 		mutex_init(&ctp->ct_lock, NULL, MUTEX_DEFAULT, NULL);
195 		if (&create_softint != NULL) {
196 			ctp->ct_intr = clock_tick_intr;
197 		}
198 		ctp->ct_pending = 0;
199 	}
200 
201 	mutex_init(&clock_tick_lock, NULL, MUTEX_DEFAULT, NULL);
202 
203 	/*
204 	 * Compute clock_tick_ncpus here. We need it to compute the
205 	 * maximum number of tick sets we need to support.
206 	 */
207 	ASSERT(clock_tick_ncpus >= 0);
208 	if (clock_tick_ncpus == 0)
209 		clock_tick_ncpus = CLOCK_TICK_NCPUS;
210 	if (clock_tick_ncpus > max_ncpus)
211 		clock_tick_ncpus = max_ncpus;
212 
213 	/*
214 	 * Allocate and initialize the tick sets.
215 	 */
216 	n = (max_ncpus + clock_tick_ncpus - 1)/clock_tick_ncpus;
217 	clock_tick_set = kmem_zalloc(sizeof (clock_tick_set_t) * n, KM_SLEEP);
218 	for (i = 0; i < n; i++) {
219 		csp = &clock_tick_set[i];
220 		csp->ct_start = i * clock_tick_ncpus;
221 		csp->ct_scan = csp->ct_start;
222 		csp->ct_end = csp->ct_start;
223 	}
224 }
225 
226 void
227 clock_tick_init_post(void)
228 {
229 	/*
230 	 * If a platform does not provide create_softint() and invoke_softint(),
231 	 * then we assume single threaded.
232 	 */
233 	if (&invoke_softint == NULL)
234 		clock_tick_threshold = 0;
235 
236 	ASSERT(clock_tick_threshold >= 0);
237 
238 	if (clock_tick_threshold == 0)
239 		clock_tick_threshold = max_ncpus;
240 
241 	/*
242 	 * If a platform does not specify a threshold or if the number of CPUs
243 	 * at boot time does not exceed the threshold, tick accounting remains
244 	 * single-threaded.
245 	 */
246 	if (ncpus <= clock_tick_threshold) {
247 		clock_tick_ncpus = max_ncpus;
248 		clock_tick_proc_max = 1;
249 		return;
250 	}
251 
252 	/*
253 	 * OK. Multi-thread tick processing. If a platform has not specified
254 	 * the CPU set size for multi-threading, then use the default value.
255 	 * This value has been arrived through measurements on large
256 	 * configuration systems.
257 	 */
258 	clock_tick_single_threaded = 0;
259 	if (clock_tick_proc_max == 0) {
260 		clock_tick_proc_max = CLOCK_TICK_PROC_MAX;
261 		if (hires_tick)
262 			clock_tick_proc_max *= 10;
263 	}
264 }
265 
266 static void
267 clock_tick_schedule_one(clock_tick_set_t *csp, int pending, processorid_t cid)
268 {
269 	clock_tick_cpu_t	*ctp;
270 
271 	ASSERT(&invoke_softint != NULL);
272 
273 	atomic_inc_ulong(&clock_tick_active);
274 
275 	/*
276 	 * Schedule tick accounting for a set of CPUs.
277 	 */
278 	ctp = clock_tick_cpu[cid];
279 	mutex_enter(&ctp->ct_lock);
280 	ctp->ct_lbolt = LBOLT_NO_ACCOUNT;
281 	ctp->ct_pending += pending;
282 	ctp->ct_start = csp->ct_start;
283 	ctp->ct_end = csp->ct_end;
284 	ctp->ct_scan = csp->ct_scan;
285 	mutex_exit(&ctp->ct_lock);
286 
287 	invoke_softint(cid, ctp->ct_intr);
288 	/*
289 	 * Return without waiting for the softint to finish.
290 	 */
291 }
292 
293 static void
294 clock_tick_process(cpu_t *cp, clock_t mylbolt, int pending)
295 {
296 	kthread_t	*t;
297 	kmutex_t	*plockp;
298 	int		notick, intr;
299 	klwp_id_t	lwp;
300 
301 	/*
302 	 * The locking here is rather tricky. thread_free_prevent()
303 	 * prevents the thread returned from being freed while we
304 	 * are looking at it. We can then check if the thread
305 	 * is exiting and get the appropriate p_lock if it
306 	 * is not.  We have to be careful, though, because
307 	 * the _process_ can still be freed while we've
308 	 * prevented thread free.  To avoid touching the
309 	 * proc structure we put a pointer to the p_lock in the
310 	 * thread structure.  The p_lock is persistent so we
311 	 * can acquire it even if the process is gone.  At that
312 	 * point we can check (again) if the thread is exiting
313 	 * and either drop the lock or do the tick processing.
314 	 */
315 	t = cp->cpu_thread;	/* Current running thread */
316 	if (CPU == cp) {
317 		/*
318 		 * 't' will be the tick processing thread on this
319 		 * CPU.  Use the pinned thread (if any) on this CPU
320 		 * as the target of the clock tick.
321 		 */
322 		if (t->t_intr != NULL)
323 			t = t->t_intr;
324 	}
325 
326 	/*
327 	 * We use thread_free_prevent to keep the currently running
328 	 * thread from being freed or recycled while we're
329 	 * looking at it.
330 	 */
331 	thread_free_prevent(t);
332 	/*
333 	 * We cannot hold the cpu_lock to prevent the
334 	 * cpu_active from changing in the clock interrupt.
335 	 * As long as we don't block (or don't get pre-empted)
336 	 * the cpu_list will not change (all threads are paused
337 	 * before list modification).
338 	 */
339 	if (CLOCK_TICK_CPU_OFFLINE(cp)) {
340 		thread_free_allow(t);
341 		return;
342 	}
343 
344 	/*
345 	 * Make sure the thread is still on the CPU.
346 	 */
347 	if ((t != cp->cpu_thread) &&
348 	    ((cp != CPU) || (t != cp->cpu_thread->t_intr))) {
349 		/*
350 		 * We could not locate the thread. Skip this CPU. Race
351 		 * conditions while performing these checks are benign.
352 		 * These checks are not perfect and they don't need
353 		 * to be.
354 		 */
355 		thread_free_allow(t);
356 		return;
357 	}
358 
359 	intr = t->t_flag & T_INTR_THREAD;
360 	lwp = ttolwp(t);
361 	if (lwp == NULL || (t->t_proc_flag & TP_LWPEXIT) || intr) {
362 		/*
363 		 * Thread is exiting (or uninteresting) so don't
364 		 * do tick processing.
365 		 */
366 		thread_free_allow(t);
367 		return;
368 	}
369 
370 	/*
371 	 * OK, try to grab the process lock.  See
372 	 * comments above for why we're not using
373 	 * ttoproc(t)->p_lockp here.
374 	 */
375 	plockp = t->t_plockp;
376 	mutex_enter(plockp);
377 	/* See above comment. */
378 	if (CLOCK_TICK_CPU_OFFLINE(cp)) {
379 		mutex_exit(plockp);
380 		thread_free_allow(t);
381 		return;
382 	}
383 
384 	/*
385 	 * The thread may have exited between when we
386 	 * checked above, and when we got the p_lock.
387 	 */
388 	if (t->t_proc_flag & TP_LWPEXIT) {
389 		mutex_exit(plockp);
390 		thread_free_allow(t);
391 		return;
392 	}
393 
394 	/*
395 	 * Either we have the p_lock for the thread's process,
396 	 * or we don't care about the thread structure any more.
397 	 * Either way we can allow thread free.
398 	 */
399 	thread_free_allow(t);
400 
401 	/*
402 	 * If we haven't done tick processing for this
403 	 * lwp, then do it now. Since we don't hold the
404 	 * lwp down on a CPU it can migrate and show up
405 	 * more than once, hence the lbolt check. mylbolt
406 	 * is copied at the time of tick scheduling to prevent
407 	 * lbolt mismatches.
408 	 *
409 	 * Also, make sure that it's okay to perform the
410 	 * tick processing before calling clock_tick.
411 	 * Setting notick to a TRUE value (ie. not 0)
412 	 * results in tick processing not being performed for
413 	 * that thread.
414 	 */
415 	notick = ((cp->cpu_flags & CPU_QUIESCED) || CPU_ON_INTR(cp) ||
416 	    (cp->cpu_dispthread == cp->cpu_idle_thread));
417 
418 	if ((!notick) && (t->t_lbolt < mylbolt)) {
419 		t->t_lbolt = mylbolt;
420 		clock_tick(t, pending);
421 	}
422 
423 	mutex_exit(plockp);
424 }
425 
426 void
427 clock_tick_schedule(int one_sec)
428 {
429 	ulong_t			active;
430 	int			i, end;
431 	clock_tick_set_t	*csp;
432 	cpu_t			*cp;
433 
434 	if (clock_cpu_id != CPU->cpu_id)
435 		clock_cpu_id = CPU->cpu_id;
436 
437 	if (clock_tick_single_threaded) {
438 		/*
439 		 * Each tick cycle, start the scan from a different
440 		 * CPU for the sake of fairness.
441 		 */
442 		end = clock_tick_total_cpus;
443 		clock_tick_scan++;
444 		if (clock_tick_scan >= end)
445 			clock_tick_scan = 0;
446 
447 		clock_tick_execute_common(0, clock_tick_scan, end,
448 		    LBOLT_NO_ACCOUNT, 1);
449 
450 		return;
451 	}
452 
453 	/*
454 	 * If the previous invocation of handlers is not yet finished, then
455 	 * simply increment a pending count and return. Eventually when they
456 	 * finish, the pending count is passed down to the next set of
457 	 * handlers to process. This way, ticks that have already elapsed
458 	 * in the past are handled as quickly as possible to minimize the
459 	 * chances of threads getting away before their pending ticks are
460 	 * accounted. The other benefit is that if the pending count is
461 	 * more than one, it can be handled by a single invocation of
462 	 * clock_tick(). This is a good optimization for large configuration
463 	 * busy systems where tick accounting can get backed up for various
464 	 * reasons.
465 	 */
466 	clock_tick_pending++;
467 
468 	active = clock_tick_active;
469 	active = atomic_cas_ulong(&clock_tick_active, active, active);
470 	if (active)
471 		return;
472 
473 	/*
474 	 * We want to handle the clock CPU here. If we
475 	 * scheduled the accounting for the clock CPU to another
476 	 * processor, that processor will find only the clock() thread
477 	 * running and not account for any user thread below it. Also,
478 	 * we want to handle this before we block on anything and allow
479 	 * the pinned thread below the current thread to escape.
480 	 */
481 	clock_tick_process(CPU, LBOLT_NO_ACCOUNT, clock_tick_pending);
482 
483 	mutex_enter(&clock_tick_lock);
484 
485 	/*
486 	 * Schedule each set on a separate processor.
487 	 */
488 	cp = clock_cpu_list;
489 	for (i = 0; i < clock_tick_nsets; i++) {
490 		csp = &clock_tick_set[i];
491 
492 		/*
493 		 * Pick the next online CPU in list for scheduling tick
494 		 * accounting. The clock_tick_lock is held by the caller.
495 		 * So, CPU online/offline cannot muck with this while
496 		 * we are picking our CPU to X-call.
497 		 */
498 		if (cp == CPU)
499 			cp = cp->cpu_next_onln;
500 
501 		/*
502 		 * Each tick cycle, start the scan from a different
503 		 * CPU for the sake of fairness.
504 		 */
505 		csp->ct_scan++;
506 		if (csp->ct_scan >= csp->ct_end)
507 			csp->ct_scan = csp->ct_start;
508 
509 		clock_tick_schedule_one(csp, clock_tick_pending, cp->cpu_id);
510 
511 		cp = cp->cpu_next_onln;
512 	}
513 
514 	if (one_sec) {
515 		/*
516 		 * Move the CPU pointer around every second. This is so
517 		 * all the CPUs can be X-called in a round-robin fashion
518 		 * to evenly distribute the X-calls. We don't do this
519 		 * at a faster rate than this because we don't want
520 		 * to affect cache performance negatively.
521 		 */
522 		clock_cpu_list = clock_cpu_list->cpu_next_onln;
523 	}
524 
525 	mutex_exit(&clock_tick_lock);
526 
527 	clock_tick_pending = 0;
528 }
529 
530 static void
531 clock_tick_execute_common(int start, int scan, int end, clock_t mylbolt,
532     int pending)
533 {
534 	cpu_t		*cp;
535 	int		i;
536 
537 	ASSERT((start <= scan) && (scan <= end));
538 
539 	/*
540 	 * Handle the thread on current CPU first. This is to prevent a
541 	 * pinned thread from escaping if we ever block on something.
542 	 * Note that in the single-threaded mode, this handles the clock
543 	 * CPU.
544 	 */
545 	clock_tick_process(CPU, mylbolt, pending);
546 
547 	/*
548 	 * Perform tick accounting for the threads running on
549 	 * the scheduled CPUs.
550 	 */
551 	for (i = scan; i < end; i++) {
552 		cp = clock_tick_cpus[i];
553 		if ((cp == NULL) || (cp == CPU) || (cp->cpu_id == clock_cpu_id))
554 			continue;
555 		clock_tick_process(cp, mylbolt, pending);
556 	}
557 
558 	for (i = start; i < scan; i++) {
559 		cp = clock_tick_cpus[i];
560 		if ((cp == NULL) || (cp == CPU) || (cp->cpu_id == clock_cpu_id))
561 			continue;
562 		clock_tick_process(cp, mylbolt, pending);
563 	}
564 }
565 
566 /*ARGSUSED*/
567 static uint_t
568 clock_tick_execute(caddr_t arg1, caddr_t arg2)
569 {
570 	clock_tick_cpu_t	*ctp;
571 	int			start, scan, end, pending;
572 	clock_t			mylbolt;
573 
574 	/*
575 	 * We could have raced with cpu offline. We don't want to
576 	 * process anything on an offlined CPU. If we got blocked
577 	 * on anything, we may not get scheduled when we wakeup
578 	 * later on.
579 	 */
580 	if (!CLOCK_TICK_XCALL_SAFE(CPU))
581 		goto out;
582 
583 	ctp = clock_tick_cpu[CPU->cpu_id];
584 
585 	mutex_enter(&ctp->ct_lock);
586 	pending = ctp->ct_pending;
587 	if (pending == 0) {
588 		/*
589 		 * If a CPU is busy at LOCK_LEVEL, then an invocation
590 		 * of this softint may be queued for some time. In that case,
591 		 * clock_tick_active will not be incremented.
592 		 * clock_tick_schedule() will then assume that the previous
593 		 * invocation is done and post a new softint. The first one
594 		 * that gets in will reset the pending count so the
595 		 * second one is a noop.
596 		 */
597 		mutex_exit(&ctp->ct_lock);
598 		goto out;
599 	}
600 	ctp->ct_pending = 0;
601 	start = ctp->ct_start;
602 	end = ctp->ct_end;
603 	scan = ctp->ct_scan;
604 	mylbolt = ctp->ct_lbolt;
605 	mutex_exit(&ctp->ct_lock);
606 
607 	clock_tick_execute_common(start, scan, end, mylbolt, pending);
608 
609 out:
610 	/*
611 	 * Signal completion to the clock handler.
612 	 */
613 	atomic_dec_ulong(&clock_tick_active);
614 
615 	return (1);
616 }
617 
618 /*ARGSUSED*/
619 static int
620 clock_tick_cpu_setup(cpu_setup_t what, int cid, void *arg)
621 {
622 	cpu_t			*cp, *ncp;
623 	int			i, set;
624 	clock_tick_set_t	*csp;
625 
626 	/*
627 	 * This function performs some computations at CPU offline/online
628 	 * time. The computed values are used during tick scheduling and
629 	 * execution phases. This avoids having to compute things on
630 	 * an every tick basis. The other benefit is that we perform the
631 	 * computations only for onlined CPUs (not offlined ones). As a
632 	 * result, no tick processing is attempted for offlined CPUs.
633 	 *
634 	 * Also, cpu_offline() calls this function before checking for
635 	 * active interrupt threads. This allows us to avoid posting
636 	 * cross calls to CPUs that are being offlined.
637 	 */
638 
639 	cp = cpu[cid];
640 
641 	mutex_enter(&clock_tick_lock);
642 
643 	switch (what) {
644 	case CPU_ON:
645 		clock_tick_cpus[clock_tick_total_cpus] = cp;
646 		set = clock_tick_total_cpus / clock_tick_ncpus;
647 		csp = &clock_tick_set[set];
648 		csp->ct_end++;
649 		clock_tick_total_cpus++;
650 		clock_tick_nsets =
651 		    (clock_tick_total_cpus + clock_tick_ncpus - 1) /
652 		    clock_tick_ncpus;
653 		CPUSET_ADD(clock_tick_online_cpuset, cp->cpu_id);
654 		membar_sync();
655 		break;
656 
657 	case CPU_OFF:
658 		if (&sync_softint != NULL)
659 			sync_softint(clock_tick_online_cpuset);
660 		CPUSET_DEL(clock_tick_online_cpuset, cp->cpu_id);
661 		clock_tick_total_cpus--;
662 		clock_tick_cpus[clock_tick_total_cpus] = NULL;
663 		clock_tick_nsets =
664 		    (clock_tick_total_cpus + clock_tick_ncpus - 1) /
665 		    clock_tick_ncpus;
666 		set = clock_tick_total_cpus / clock_tick_ncpus;
667 		csp = &clock_tick_set[set];
668 		csp->ct_end--;
669 
670 		i = 0;
671 		ncp = cpu_active;
672 		do {
673 			if (cp == ncp)
674 				continue;
675 			clock_tick_cpus[i] = ncp;
676 			i++;
677 		} while ((ncp = ncp->cpu_next_onln) != cpu_active);
678 		ASSERT(i == clock_tick_total_cpus);
679 		membar_sync();
680 		break;
681 
682 	default:
683 		break;
684 	}
685 
686 	mutex_exit(&clock_tick_lock);
687 
688 	return (0);
689 }
690 
691 
692 void
693 clock_tick_mp_init(void)
694 {
695 	cpu_t	*cp;
696 
697 	mutex_enter(&cpu_lock);
698 
699 	cp = cpu_active;
700 	do {
701 		(void) clock_tick_cpu_setup(CPU_ON, cp->cpu_id, NULL);
702 	} while ((cp = cp->cpu_next_onln) != cpu_active);
703 
704 	register_cpu_setup_func(clock_tick_cpu_setup, NULL);
705 
706 	mutex_exit(&cpu_lock);
707 }
708