xref: /titanic_52/usr/src/uts/sun4v/os/suspend.c (revision 12d61dab3304980e691068219eaaab6398744a2e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/mutex.h>
27 #include <sys/cpuvar.h>
28 #include <sys/cyclic.h>
29 #include <sys/disp.h>
30 #include <sys/ddi.h>
31 #include <sys/wdt.h>
32 #include <sys/callb.h>
33 #include <sys/cmn_err.h>
34 #include <sys/hypervisor_api.h>
35 #include <sys/membar.h>
36 #include <sys/x_call.h>
37 #include <sys/promif.h>
38 #include <sys/systm.h>
39 #include <sys/mach_descrip.h>
40 #include <sys/cpu_module.h>
41 #include <sys/pg.h>
42 #include <sys/lgrp.h>
43 #include <sys/sysmacros.h>
44 #include <sys/sunddi.h>
45 #include <sys/cpupart.h>
46 #include <sys/hsvc.h>
47 #include <sys/mpo.h>
48 #include <vm/hat_sfmmu.h>
49 #include <sys/time.h>
50 #include <sys/clock.h>
51 
52 /*
53  * Sun4v OS Suspend
54  *
55  * Provides a means to suspend a sun4v guest domain by pausing CPUs and then
56  * calling into the HV to initiate a suspension. Suspension is sequenced
57  * externally by calling suspend_pre, suspend_start, and suspend_post.
58  * suspend_pre and suspend_post are meant to perform any special operations
59  * that should be done before or after a suspend/resume operation. e.g.,
60  * callbacks to cluster software to disable heartbeat monitoring before the
61  * system is suspended. suspend_start prepares kernel services to be suspended
62  * and then suspends the domain by calling hv_guest_suspend.
63  *
64  * Special Handling for %tick and %stick Registers
65  *
66  * After a suspend/resume operation, the %tick and %stick registers may have
67  * jumped forwards or backwards. The delta is assumed to be consistent across
68  * all CPUs, within the negligible level of %tick and %stick variation
69  * acceptable on a cold boot. In order to maintain increasing %tick and %stick
70  * counter values without exposing large positive or negative jumps to kernel
71  * or user code, a %tick and %stick offset is used. Kernel reads of these
72  * counters return the sum of the hardware register counter and offset
73  * variable. After a suspend/resume operation, user reads of %tick or %stick
74  * are emulated. Suspend code enables emulation by setting the
75  * %{tick,stick}.NPT fields which trigger a privileged instruction access
76  * trap whenever the registers are read from user mode. If emulation has been
77  * enabled, the trap handler emulates the instruction. Emulation is only
78  * enabled during a successful suspend/resume operation. When emulation is
79  * enabled, CPUs that are DR'd into the system will have their
80  * %{tick,stick}.NPT bits set to 1 as well.
81  */
82 
83 extern u_longlong_t gettick(void);	/* returns %stick */
84 extern uint64_t gettick_counter(void);	/* returns %tick */
85 extern uint64_t gettick_npt(void);
86 extern uint64_t getstick_npt(void);
87 extern int mach_descrip_update(void);
88 extern cpuset_t cpu_ready_set;
89 extern uint64_t native_tick_offset;
90 extern uint64_t native_stick_offset;
91 extern uint64_t sys_tick_freq;
92 
93 /*
94  * Global Sun Cluster pre/post callbacks.
95  */
96 const char *(*cl_suspend_error_decode)(int);
97 int (*cl_suspend_pre_callback)(void);
98 int (*cl_suspend_post_callback)(void);
99 #define	SC_PRE_FAIL_STR_FMT	"Sun Cluster pre-suspend failure: %d"
100 #define	SC_POST_FAIL_STR_FMT	"Sun Cluster post-suspend failure: %d"
101 #define	SC_FAIL_STR_MAX		256
102 
103 /*
104  * The minimum major and minor version of the HSVC_GROUP_CORE API group
105  * required in order to use OS suspend.
106  */
107 #define	SUSPEND_CORE_MAJOR	1
108 #define	SUSPEND_CORE_MINOR	2
109 
110 /*
111  * By default, sun4v OS suspend is supported if the required HV version
112  * is present. suspend_disabled should be set on platforms that do not
113  * allow OS suspend regardless of whether or not the HV supports it.
114  * It can also be set in /etc/system.
115  */
116 static int suspend_disabled = 0;
117 
118 /*
119  * Controls whether or not user-land tick and stick register emulation
120  * will be enabled following a successful suspend operation.
121  */
122 static int enable_user_tick_stick_emulation = 1;
123 
124 /*
125  * Indicates whether or not tick and stick emulation is currently active.
126  * After a successful suspend operation, if emulation is enabled, this
127  * variable is set to B_TRUE. Global scope to allow emulation code to
128  * check if emulation is active.
129  */
130 boolean_t tick_stick_emulation_active = B_FALSE;
131 
132 /*
133  * When non-zero, after a successful suspend and resume, cpunodes, CPU HW
134  * sharing data structures, and processor groups will be updated using
135  * information from the updated MD.
136  */
137 static int suspend_update_cpu_mappings = 1;
138 
139 /*
140  * The maximum number of microseconds by which the %tick or %stick register
141  * can vary between any two CPUs in the system. To calculate the
142  * native_stick_offset and native_tick_offset, we measure the change in these
143  * registers on one CPU over a suspend/resume. Other CPUs may experience
144  * slightly larger or smaller changes. %tick and %stick should be synchronized
145  * between CPUs, but there may be some variation. So we add an additional value
146  * derived from this variable to ensure that these registers always increase
147  * over a suspend/resume operation, assuming all %tick and %stick registers
148  * are synchronized (within a certain limit) across CPUs in the system. The
149  * delta between %sticks on different CPUs should be a small number of cycles,
150  * not perceptible to readers of %stick that migrate between CPUs. We set this
151  * value to 1 millisecond which means that over a suspend/resume operation,
152  * all CPU's %tick and %stick will advance forwards as long as, across all
153  * CPUs, the %tick and %stick are synchronized to within 1 ms. This applies to
154  * CPUs before the suspend and CPUs after the resume. 1 ms is conservative,
155  * but small enough to not trigger TOD faults.
156  */
157 static uint64_t suspend_tick_stick_max_delta = 1000; /* microseconds */
158 
159 /*
160  * DBG and DBG_PROM() macro.
161  */
162 #ifdef	DEBUG
163 
164 static int suspend_debug_flag = 0;
165 
166 #define	DBG_PROM		\
167 if (suspend_debug_flag)		\
168 	prom_printf
169 
170 #define	DBG			\
171 if (suspend_debug_flag)		\
172 	suspend_debug
173 
174 static void
175 suspend_debug(const char *fmt, ...)
176 {
177 	char	buf[512];
178 	va_list	ap;
179 
180 	va_start(ap, fmt);
181 	(void) vsprintf(buf, fmt, ap);
182 	va_end(ap);
183 
184 	cmn_err(CE_NOTE, "%s", buf);
185 }
186 
187 #else /* DEBUG */
188 
189 #define	DBG_PROM
190 #define	DBG
191 
192 #endif /* DEBUG */
193 
194 /*
195  * Return true if the HV supports OS suspend and if suspend has not been
196  * disabled on this platform.
197  */
198 boolean_t
199 suspend_supported(void)
200 {
201 	uint64_t major, minor;
202 
203 	if (suspend_disabled)
204 		return (B_FALSE);
205 
206 	if (hsvc_version(HSVC_GROUP_CORE, &major, &minor) != 0)
207 		return (B_FALSE);
208 
209 	return ((major == SUSPEND_CORE_MAJOR && minor >= SUSPEND_CORE_MINOR) ||
210 	    (major > SUSPEND_CORE_MAJOR));
211 }
212 
213 /*
214  * Given a source tick, stick, and tod value, set the tick and stick offsets
215  * such that the (current physical register value) + offset == (source value)
216  * and in addition account for some variation between the %tick/%stick on
217  * different CPUs. We account for this variation by adding in double the value
218  * of suspend_tick_stick_max_delta. The following is an explanation of why
219  * suspend_tick_stick_max_delta must be multplied by two and added to
220  * native_stick_offset.
221  *
222  * Consider a guest instance that is yet to be suspended with CPUs p0 and p1
223  * with physical "source" %stick values s0 and s1 respectively. When the guest
224  * is first resumed, the physical "target" %stick values are t0 and t1
225  * respectively. The virtual %stick values after the resume are v0 and v1
226  * respectively. Let x be the maximum difference between any two CPU's %stick
227  * register at a given point in time and let the %stick values be assigned
228  * such that
229  *
230  *     s1 = s0 + x and
231  *     t1 = t0 - x
232  *
233  * Let us assume that p0 is driving the suspend and resume. Then, we will
234  * calculate the stick offset f and the virtual %stick on p0 after the
235  * resume as follows.
236  *
237  *      f = s0 - t0 and
238  *     v0 = t0 + f
239  *
240  * We calculate the virtual %stick v1 on p1 after the resume as
241  *
242  *     v1 = t1 + f
243  *
244  * Substitution yields
245  *
246  *     v1 = t1 + (s0 - t0)
247  *     v1 = (t0 - x) + (s0 - t0)
248  *     v1 = -x + s0
249  *     v1 = s0 - x
250  *     v1 = (s1 - x) - x
251  *     v1 = s1 - 2x
252  *
253  * Therefore, in this scenario, without accounting for %stick variation in
254  * the calculation of the native_stick_offset f, the virtual %stick on p1
255  * is less than the value of the %stick on p1 before the suspend which is
256  * unacceptable. By adding 2x to v1, we guarantee it will be equal to s1
257  * which means the %stick on p1 after the resume will always be greater
258  * than or equal to the %stick on p1 before the suspend. Since v1 = t1 + f
259  * at any point in time, we can accomplish this by adding 2x to f. This
260  * guarantees any processes bound to CPU P0 or P1 will not see a %stick
261  * decrease across a suspend/resume. Hence, in the code below, we multiply
262  * suspend_tick_stick_max_delta by two in the calculation for
263  * native_stick_offset, native_tick_offset, and target_hrtime.
264  */
265 static void
266 set_tick_offsets(uint64_t source_tick, uint64_t source_stick, timestruc_t *tsp)
267 {
268 	uint64_t target_tick;
269 	uint64_t target_stick;
270 	hrtime_t source_hrtime;
271 	hrtime_t target_hrtime;
272 
273 	/*
274 	 * Temporarily set the offsets to zero so that the following reads
275 	 * of the registers will yield physical unadjusted counter values.
276 	 */
277 	native_tick_offset = 0;
278 	native_stick_offset = 0;
279 
280 	target_tick = gettick_counter();	/* returns %tick */
281 	target_stick = gettick();		/* returns %stick */
282 
283 	/*
284 	 * Calculate the new offsets. In addition to the delta observed on
285 	 * this CPU, add an additional value. Multiply the %tick/%stick
286 	 * frequency by suspend_tick_stick_max_delta (us). Then, multiply by 2
287 	 * to account for a delta between CPUs before the suspend and a
288 	 * delta between CPUs after the resume.
289 	 */
290 	native_tick_offset = (source_tick - target_tick) +
291 	    (CPU->cpu_curr_clock * suspend_tick_stick_max_delta * 2 / MICROSEC);
292 	native_stick_offset = (source_stick - target_stick) +
293 	    (sys_tick_freq * suspend_tick_stick_max_delta * 2 / MICROSEC);
294 
295 	/*
296 	 * We've effectively increased %stick and %tick by twice the value
297 	 * of suspend_tick_stick_max_delta to account for variation across
298 	 * CPUs. Now adjust the preserved TOD by the same amount.
299 	 */
300 	source_hrtime = ts2hrt(tsp);
301 	target_hrtime = source_hrtime +
302 	    (suspend_tick_stick_max_delta * 2 * (NANOSEC/MICROSEC));
303 	hrt2ts(target_hrtime, tsp);
304 }
305 
306 /*
307  * Set the {tick,stick}.NPT field to 1 on this CPU.
308  */
309 static void
310 enable_tick_stick_npt(void)
311 {
312 	(void) hv_stick_set_npt(1);
313 	(void) hv_tick_set_npt(1);
314 }
315 
316 /*
317  * Synchronize a CPU's {tick,stick}.NPT fields with the current state
318  * of the system. This is used when a CPU is DR'd into the system.
319  */
320 void
321 suspend_sync_tick_stick_npt(void)
322 {
323 	if (tick_stick_emulation_active) {
324 		DBG("enabling {%%tick/%%stick}.NPT on CPU 0x%x", CPU->cpu_id);
325 		(void) hv_stick_set_npt(1);
326 		(void) hv_tick_set_npt(1);
327 	} else {
328 		ASSERT(gettick_npt() == 0);
329 		ASSERT(getstick_npt() == 0);
330 	}
331 }
332 
333 /*
334  * Obtain an updated MD from the hypervisor and update cpunodes, CPU HW
335  * sharing data structures, and processor groups.
336  */
337 static void
338 update_cpu_mappings(void)
339 {
340 	md_t		*mdp;
341 	processorid_t	id;
342 	cpu_t		*cp;
343 	cpu_pg_t	*pgps[NCPU];
344 
345 	if ((mdp = md_get_handle()) == NULL) {
346 		DBG("suspend: md_get_handle failed");
347 		return;
348 	}
349 
350 	DBG("suspend: updating CPU mappings");
351 
352 	mutex_enter(&cpu_lock);
353 
354 	setup_chip_mappings(mdp);
355 	setup_exec_unit_mappings(mdp);
356 	for (id = 0; id < NCPU; id++) {
357 		if ((cp = cpu_get(id)) == NULL)
358 			continue;
359 		cpu_map_exec_units(cp);
360 	}
361 
362 	/*
363 	 * Re-calculate processor groups.
364 	 *
365 	 * First tear down all PG information before adding any new PG
366 	 * information derived from the MD we just downloaded. We must
367 	 * call pg_cpu_inactive and pg_cpu_active with CPUs paused and
368 	 * we want to minimize the number of times pause_cpus is called.
369 	 * Inactivating all CPUs would leave PGs without any active CPUs,
370 	 * so while CPUs are paused, call pg_cpu_inactive and swap in the
371 	 * bootstrap PG structure saving the original PG structure to be
372 	 * fini'd afterwards. This prevents the dispatcher from encountering
373 	 * PGs in which all CPUs are inactive.
374 	 */
375 	pause_cpus(NULL);
376 	for (id = 0; id < NCPU; id++) {
377 		if ((cp = cpu_get(id)) == NULL)
378 			continue;
379 		pg_cpu_inactive(cp);
380 		pgps[id] = cp->cpu_pg;
381 		pg_cpu_bootstrap(cp);
382 	}
383 	start_cpus();
384 
385 	/*
386 	 * pg_cpu_fini* and pg_cpu_init* must be called while CPUs are
387 	 * not paused. Use two separate loops here so that we do not
388 	 * initialize PG data for CPUs until all the old PG data structures
389 	 * are torn down.
390 	 */
391 	for (id = 0; id < NCPU; id++) {
392 		if ((cp = cpu_get(id)) == NULL)
393 			continue;
394 		pg_cpu_fini(cp, pgps[id]);
395 		mpo_cpu_remove(id);
396 	}
397 
398 	/*
399 	 * Initialize PG data for each CPU, but leave the bootstrapped
400 	 * PG structure in place to avoid running with any PGs containing
401 	 * nothing but inactive CPUs.
402 	 */
403 	for (id = 0; id < NCPU; id++) {
404 		if ((cp = cpu_get(id)) == NULL)
405 			continue;
406 		mpo_cpu_add(mdp, id);
407 		pgps[id] = pg_cpu_init(cp, B_TRUE);
408 	}
409 
410 	/*
411 	 * Now that PG data has been initialized for all CPUs in the
412 	 * system, replace the bootstrapped PG structure with the
413 	 * initialized PG structure and call pg_cpu_active for each CPU.
414 	 */
415 	pause_cpus(NULL);
416 	for (id = 0; id < NCPU; id++) {
417 		if ((cp = cpu_get(id)) == NULL)
418 			continue;
419 		cp->cpu_pg = pgps[id];
420 		pg_cpu_active(cp);
421 	}
422 	start_cpus();
423 
424 	mutex_exit(&cpu_lock);
425 
426 	(void) md_fini_handle(mdp);
427 }
428 
429 /*
430  * Wrapper for the Sun Cluster error decoding function.
431  */
432 static int
433 cluster_error_decode(int error, char *error_reason, size_t max_reason_len)
434 {
435 	const char	*decoded;
436 	size_t		decoded_len;
437 
438 	ASSERT(error_reason != NULL);
439 	ASSERT(max_reason_len > 0);
440 
441 	max_reason_len = MIN(max_reason_len, SC_FAIL_STR_MAX);
442 
443 	if (cl_suspend_error_decode == NULL)
444 		return (-1);
445 
446 	if ((decoded = (*cl_suspend_error_decode)(error)) == NULL)
447 		return (-1);
448 
449 	/* Get number of non-NULL bytes */
450 	if ((decoded_len = strnlen(decoded, max_reason_len - 1)) == 0)
451 		return (-1);
452 
453 	bcopy(decoded, error_reason, decoded_len);
454 
455 	/*
456 	 * The error string returned from cl_suspend_error_decode
457 	 * should be NULL-terminated, but set the terminator here
458 	 * because we only copied non-NULL bytes. If the decoded
459 	 * string was not NULL-terminated, this guarantees that
460 	 * error_reason will be.
461 	 */
462 	error_reason[decoded_len] = '\0';
463 
464 	return (0);
465 }
466 
467 /*
468  * Wrapper for the Sun Cluster pre-suspend callback.
469  */
470 static int
471 cluster_pre_wrapper(char *error_reason, size_t max_reason_len)
472 {
473 	int rv = 0;
474 
475 	if (cl_suspend_pre_callback != NULL) {
476 		rv = (*cl_suspend_pre_callback)();
477 		DBG("suspend: cl_suspend_pre_callback returned %d", rv);
478 		if (rv != 0 && error_reason != NULL && max_reason_len > 0) {
479 			if (cluster_error_decode(rv, error_reason,
480 			    max_reason_len)) {
481 				(void) snprintf(error_reason, max_reason_len,
482 				    SC_PRE_FAIL_STR_FMT, rv);
483 			}
484 		}
485 	}
486 
487 	return (rv);
488 }
489 
490 /*
491  * Wrapper for the Sun Cluster post-suspend callback.
492  */
493 static int
494 cluster_post_wrapper(char *error_reason, size_t max_reason_len)
495 {
496 	int rv = 0;
497 
498 	if (cl_suspend_post_callback != NULL) {
499 		rv = (*cl_suspend_post_callback)();
500 		DBG("suspend: cl_suspend_post_callback returned %d", rv);
501 		if (rv != 0 && error_reason != NULL && max_reason_len > 0) {
502 			if (cluster_error_decode(rv, error_reason,
503 			    max_reason_len)) {
504 				(void) snprintf(error_reason,
505 				    max_reason_len, SC_POST_FAIL_STR_FMT, rv);
506 			}
507 		}
508 	}
509 
510 	return (rv);
511 }
512 
513 /*
514  * Execute pre-suspend callbacks preparing the system for a suspend operation.
515  * Returns zero on success, non-zero on failure. Sets the recovered argument
516  * to indicate whether or not callbacks could be undone in the event of a
517  * failure--if callbacks were successfully undone, *recovered is set to B_TRUE,
518  * otherwise *recovered is set to B_FALSE. Must be called successfully before
519  * suspend_start can be called. Callers should first call suspend_support to
520  * determine if OS suspend is supported.
521  */
522 int
523 suspend_pre(char *error_reason, size_t max_reason_len, boolean_t *recovered)
524 {
525 	int rv;
526 
527 	ASSERT(recovered != NULL);
528 
529 	/*
530 	 * Return an error if suspend_pre is erreoneously called
531 	 * when OS suspend is not supported.
532 	 */
533 	ASSERT(suspend_supported());
534 	if (!suspend_supported()) {
535 		DBG("suspend: suspend_pre called without suspend support");
536 		*recovered = B_TRUE;
537 		return (ENOTSUP);
538 	}
539 	DBG("suspend: %s", __func__);
540 
541 	rv = cluster_pre_wrapper(error_reason, max_reason_len);
542 
543 	/*
544 	 * At present, only one pre-suspend operation exists.
545 	 * If it fails, no recovery needs to be done.
546 	 */
547 	if (rv != 0 && recovered != NULL)
548 		*recovered = B_TRUE;
549 
550 	return (rv);
551 }
552 
553 /*
554  * Execute post-suspend callbacks. Returns zero on success, non-zero on
555  * failure. Must be called after suspend_start is called, regardless of
556  * whether or not suspend_start is successful.
557  */
558 int
559 suspend_post(char *error_reason, size_t max_reason_len)
560 {
561 	ASSERT(suspend_supported());
562 	DBG("suspend: %s", __func__);
563 	return (cluster_post_wrapper(error_reason, max_reason_len));
564 }
565 
566 /*
567  * Suspends the OS by pausing CPUs and calling into the HV to initiate
568  * the suspend. When the HV routine hv_guest_suspend returns, the system
569  * will be resumed. Must be called after a successful call to suspend_pre.
570  * suspend_post must be called after suspend_start, whether or not
571  * suspend_start returns an error.
572  */
573 /*ARGSUSED*/
574 int
575 suspend_start(char *error_reason, size_t max_reason_len)
576 {
577 	uint64_t	source_tick;
578 	uint64_t	source_stick;
579 	uint64_t	rv;
580 	timestruc_t	source_tod;
581 	int		spl;
582 
583 	ASSERT(suspend_supported());
584 	DBG("suspend: %s", __func__);
585 
586 	sfmmu_ctxdoms_lock();
587 
588 	mutex_enter(&cpu_lock);
589 
590 	/* Suspend the watchdog */
591 	watchdog_suspend();
592 
593 	/* Record the TOD */
594 	mutex_enter(&tod_lock);
595 	source_tod = tod_get();
596 	mutex_exit(&tod_lock);
597 
598 	/* Pause all other CPUs */
599 	pause_cpus(NULL);
600 	DBG_PROM("suspend: CPUs paused\n");
601 
602 	/* Suspend cyclics */
603 	cyclic_suspend();
604 	DBG_PROM("suspend: cyclics suspended\n");
605 
606 	/* Disable interrupts */
607 	spl = spl8();
608 	DBG_PROM("suspend: spl8()\n");
609 
610 	source_tick = gettick_counter();
611 	source_stick = gettick();
612 	DBG_PROM("suspend: source_tick: 0x%lx\n", source_tick);
613 	DBG_PROM("suspend: source_stick: 0x%lx\n", source_stick);
614 
615 	/*
616 	 * Call into the HV to initiate the suspend. hv_guest_suspend()
617 	 * returns after the guest has been resumed or if the suspend
618 	 * operation failed or was cancelled. After a successful suspend,
619 	 * the %tick and %stick registers may have changed by an amount
620 	 * that is not proportional to the amount of time that has passed.
621 	 * They may have jumped forwards or backwards. Some variation is
622 	 * allowed and accounted for using suspend_tick_stick_max_delta,
623 	 * but otherwise this jump must be uniform across all CPUs and we
624 	 * operate under the assumption that it is (maintaining two global
625 	 * offset variables--one for %tick and one for %stick.)
626 	 */
627 	DBG_PROM("suspend: suspending... \n");
628 	rv = hv_guest_suspend();
629 	if (rv != 0) {
630 		splx(spl);
631 		cyclic_resume();
632 		start_cpus();
633 		watchdog_resume();
634 		mutex_exit(&cpu_lock);
635 		sfmmu_ctxdoms_unlock();
636 		DBG("suspend: failed, rv: %ld\n", rv);
637 		return (rv);
638 	}
639 
640 	/* Update the global tick and stick offsets and the preserved TOD */
641 	set_tick_offsets(source_tick, source_stick, &source_tod);
642 
643 	/* Ensure new offsets are globally visible before resuming CPUs */
644 	membar_sync();
645 
646 	/* Enable interrupts */
647 	splx(spl);
648 
649 	/* Set the {%tick,%stick}.NPT bits on all CPUs */
650 	if (enable_user_tick_stick_emulation) {
651 		xc_all((xcfunc_t *)enable_tick_stick_npt, NULL, NULL);
652 		xt_sync(cpu_ready_set);
653 		ASSERT(gettick_npt() != 0);
654 		ASSERT(getstick_npt() != 0);
655 	}
656 
657 	/* If emulation is enabled, but not currently active, enable it */
658 	if (enable_user_tick_stick_emulation && !tick_stick_emulation_active) {
659 		tick_stick_emulation_active = B_TRUE;
660 	}
661 
662 	sfmmu_ctxdoms_remove();
663 
664 	/* Resume cyclics, unpause CPUs */
665 	cyclic_resume();
666 	start_cpus();
667 
668 	/* Set the TOD */
669 	mutex_enter(&tod_lock);
670 	tod_set(source_tod);
671 	mutex_exit(&tod_lock);
672 
673 	/* Re-enable the watchdog */
674 	watchdog_resume();
675 
676 	mutex_exit(&cpu_lock);
677 
678 	/* Download the latest MD */
679 	if ((rv = mach_descrip_update()) != 0)
680 		cmn_err(CE_PANIC, "suspend: mach_descrip_update failed: %ld",
681 		    rv);
682 
683 	sfmmu_ctxdoms_update();
684 	sfmmu_ctxdoms_unlock();
685 
686 	/* Get new MD, update CPU mappings/relationships */
687 	if (suspend_update_cpu_mappings)
688 		update_cpu_mappings();
689 
690 	DBG("suspend: target tick: 0x%lx", gettick_counter());
691 	DBG("suspend: target stick: 0x%llx", gettick());
692 	DBG("suspend: user %%tick/%%stick emulation is %d",
693 	    tick_stick_emulation_active);
694 	DBG("suspend: finished");
695 
696 	return (0);
697 }
698