xref: /titanic_44/usr/src/uts/sun4v/os/suspend.c (revision 023e71de9e5670cebc23dd51162833661d3d2d3b)
1*023e71deSHaik Aftandilian /*
2*023e71deSHaik Aftandilian  * CDDL HEADER START
3*023e71deSHaik Aftandilian  *
4*023e71deSHaik Aftandilian  * The contents of this file are subject to the terms of the
5*023e71deSHaik Aftandilian  * Common Development and Distribution License (the "License").
6*023e71deSHaik Aftandilian  * You may not use this file except in compliance with the License.
7*023e71deSHaik Aftandilian  *
8*023e71deSHaik Aftandilian  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*023e71deSHaik Aftandilian  * or http://www.opensolaris.org/os/licensing.
10*023e71deSHaik Aftandilian  * See the License for the specific language governing permissions
11*023e71deSHaik Aftandilian  * and limitations under the License.
12*023e71deSHaik Aftandilian  *
13*023e71deSHaik Aftandilian  * When distributing Covered Code, include this CDDL HEADER in each
14*023e71deSHaik Aftandilian  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*023e71deSHaik Aftandilian  * If applicable, add the following below this CDDL HEADER, with the
16*023e71deSHaik Aftandilian  * fields enclosed by brackets "[]" replaced with your own identifying
17*023e71deSHaik Aftandilian  * information: Portions Copyright [yyyy] [name of copyright owner]
18*023e71deSHaik Aftandilian  *
19*023e71deSHaik Aftandilian  * CDDL HEADER END
20*023e71deSHaik Aftandilian  */
21*023e71deSHaik Aftandilian /*
22*023e71deSHaik Aftandilian  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23*023e71deSHaik Aftandilian  * Use is subject to license terms.
24*023e71deSHaik Aftandilian  */
25*023e71deSHaik Aftandilian 
26*023e71deSHaik Aftandilian #include <sys/mutex.h>
27*023e71deSHaik Aftandilian #include <sys/cpuvar.h>
28*023e71deSHaik Aftandilian #include <sys/cyclic.h>
29*023e71deSHaik Aftandilian #include <sys/disp.h>
30*023e71deSHaik Aftandilian #include <sys/ddi.h>
31*023e71deSHaik Aftandilian #include <sys/wdt.h>
32*023e71deSHaik Aftandilian #include <sys/callb.h>
33*023e71deSHaik Aftandilian #include <sys/cmn_err.h>
34*023e71deSHaik Aftandilian #include <sys/hypervisor_api.h>
35*023e71deSHaik Aftandilian #include <sys/membar.h>
36*023e71deSHaik Aftandilian #include <sys/x_call.h>
37*023e71deSHaik Aftandilian #include <sys/promif.h>
38*023e71deSHaik Aftandilian #include <sys/systm.h>
39*023e71deSHaik Aftandilian #include <sys/mach_descrip.h>
40*023e71deSHaik Aftandilian #include <sys/cpu_module.h>
41*023e71deSHaik Aftandilian #include <sys/pg.h>
42*023e71deSHaik Aftandilian #include <sys/lgrp.h>
43*023e71deSHaik Aftandilian #include <sys/sysmacros.h>
44*023e71deSHaik Aftandilian #include <sys/sunddi.h>
45*023e71deSHaik Aftandilian #include <sys/cpupart.h>
46*023e71deSHaik Aftandilian #include <sys/hsvc.h>
47*023e71deSHaik Aftandilian 
48*023e71deSHaik Aftandilian /*
49*023e71deSHaik Aftandilian  * Sun4v OS Suspend
50*023e71deSHaik Aftandilian  *
51*023e71deSHaik Aftandilian  * Provides a means to suspend a sun4v guest domain by pausing CPUs and then
52*023e71deSHaik Aftandilian  * calling into the HV to initiate a suspension. Suspension is sequenced
53*023e71deSHaik Aftandilian  * externally by calling suspend_pre, suspend_start, and suspend_post.
54*023e71deSHaik Aftandilian  * suspend_pre and suspend_post are meant to perform any special operations
55*023e71deSHaik Aftandilian  * that should be done before or after a suspend/resume operation. e.g.,
56*023e71deSHaik Aftandilian  * callbacks to cluster software to disable heartbeat monitoring before the
57*023e71deSHaik Aftandilian  * system is suspended. suspend_start prepares kernel services to be suspended
58*023e71deSHaik Aftandilian  * and then suspends the domain by calling hv_guest_suspend.
59*023e71deSHaik Aftandilian  *
60*023e71deSHaik Aftandilian  * Special Handling for %tick and %stick Registers
61*023e71deSHaik Aftandilian  *
62*023e71deSHaik Aftandilian  * After a suspend/resume operation, the %tick and %stick registers may have
63*023e71deSHaik Aftandilian  * jumped forwards or backwards. The delta is assumed to be consistent across
64*023e71deSHaik Aftandilian  * all CPUs, within the negligible level of %tick and %stick variation
65*023e71deSHaik Aftandilian  * acceptable on a cold boot. In order to maintain increasing %tick and %stick
66*023e71deSHaik Aftandilian  * counter values without exposing large positive or negative jumps to kernel
67*023e71deSHaik Aftandilian  * or user code, a %tick and %stick offset is used. Kernel reads of these
68*023e71deSHaik Aftandilian  * counters return the sum of the hardware register counter and offset
69*023e71deSHaik Aftandilian  * variable. After a suspend/resume operation, user reads of %tick or %stick
70*023e71deSHaik Aftandilian  * are emulated. Suspend code enables emulation by setting the
71*023e71deSHaik Aftandilian  * %{tick,stick}.NPT fields which trigger a privileged instruction access
72*023e71deSHaik Aftandilian  * trap whenever the registers are read from user mode. If emulation has been
73*023e71deSHaik Aftandilian  * enabled, the trap handler emulates the instruction. Emulation is only
74*023e71deSHaik Aftandilian  * enabled during a successful suspend/resume operation. When emulation is
75*023e71deSHaik Aftandilian  * enabled, CPUs that are DR'd into the system will have their
76*023e71deSHaik Aftandilian  * %{tick,stick}.NPT bits set to 1 as well.
77*023e71deSHaik Aftandilian  */
78*023e71deSHaik Aftandilian 
79*023e71deSHaik Aftandilian extern u_longlong_t gettick(void);	/* returns %stick */
80*023e71deSHaik Aftandilian extern uint64_t gettick_counter(void);	/* returns %tick */
81*023e71deSHaik Aftandilian extern uint64_t gettick_npt(void);
82*023e71deSHaik Aftandilian extern uint64_t getstick_npt(void);
83*023e71deSHaik Aftandilian extern int mach_descrip_update(void);
84*023e71deSHaik Aftandilian extern cpuset_t cpu_ready_set;
85*023e71deSHaik Aftandilian extern uint64_t native_tick_offset;
86*023e71deSHaik Aftandilian extern uint64_t native_stick_offset;
87*023e71deSHaik Aftandilian 
88*023e71deSHaik Aftandilian /*
89*023e71deSHaik Aftandilian  * Global Sun Cluster pre/post callbacks.
90*023e71deSHaik Aftandilian  */
91*023e71deSHaik Aftandilian const char *(*cl_suspend_error_decode)(int);
92*023e71deSHaik Aftandilian int (*cl_suspend_pre_callback)(void);
93*023e71deSHaik Aftandilian int (*cl_suspend_post_callback)(void);
94*023e71deSHaik Aftandilian #define	SC_PRE_FAIL_STR_FMT	"Sun Cluster pre-suspend failure: %d"
95*023e71deSHaik Aftandilian #define	SC_POST_FAIL_STR_FMT	"Sun Cluster post-suspend failure: %d"
96*023e71deSHaik Aftandilian #define	SC_FAIL_STR_MAX		256
97*023e71deSHaik Aftandilian 
98*023e71deSHaik Aftandilian /*
99*023e71deSHaik Aftandilian  * The minimum major and minor version of the HSVC_GROUP_CORE API group
100*023e71deSHaik Aftandilian  * required in order to use OS suspend.
101*023e71deSHaik Aftandilian  */
102*023e71deSHaik Aftandilian #define	SUSPEND_CORE_MAJOR	1
103*023e71deSHaik Aftandilian #define	SUSPEND_CORE_MINOR	2
104*023e71deSHaik Aftandilian 
105*023e71deSHaik Aftandilian /*
106*023e71deSHaik Aftandilian  * By default, sun4v OS suspend is supported if the required HV version
107*023e71deSHaik Aftandilian  * is present. suspend_disabled should be set on platforms that do not
108*023e71deSHaik Aftandilian  * allow OS suspend regardless of whether or not the HV supports it.
109*023e71deSHaik Aftandilian  * It can also be set in /etc/system.
110*023e71deSHaik Aftandilian  */
111*023e71deSHaik Aftandilian static int suspend_disabled = 0;
112*023e71deSHaik Aftandilian 
113*023e71deSHaik Aftandilian /*
114*023e71deSHaik Aftandilian  * Controls whether or not user-land tick and stick register emulation
115*023e71deSHaik Aftandilian  * will be enabled following a successful suspend operation.
116*023e71deSHaik Aftandilian  */
117*023e71deSHaik Aftandilian static int enable_user_tick_stick_emulation = 1;
118*023e71deSHaik Aftandilian 
119*023e71deSHaik Aftandilian /*
120*023e71deSHaik Aftandilian  * Indicates whether or not tick and stick emulation is currently active.
121*023e71deSHaik Aftandilian  * After a successful suspend operation, if emulation is enabled, this
122*023e71deSHaik Aftandilian  * variable is set to B_TRUE. Global scope to allow emulation code to
123*023e71deSHaik Aftandilian  * check if emulation is active.
124*023e71deSHaik Aftandilian  */
125*023e71deSHaik Aftandilian boolean_t tick_stick_emulation_active = B_FALSE;
126*023e71deSHaik Aftandilian 
127*023e71deSHaik Aftandilian /*
128*023e71deSHaik Aftandilian  * Controls whether or not MD information is refreshed after a
129*023e71deSHaik Aftandilian  * successful suspend and resume. When non-zero, after a successful
130*023e71deSHaik Aftandilian  * suspend and resume, the MD will be downloaded, cpunodes updated,
131*023e71deSHaik Aftandilian  * and processor grouping information recalculated.
132*023e71deSHaik Aftandilian  */
133*023e71deSHaik Aftandilian static int suspend_update_cpu_mappings = 1;
134*023e71deSHaik Aftandilian 
135*023e71deSHaik Aftandilian /*
136*023e71deSHaik Aftandilian  * DBG and DBG_PROM() macro.
137*023e71deSHaik Aftandilian  */
138*023e71deSHaik Aftandilian #ifdef	DEBUG
139*023e71deSHaik Aftandilian 
140*023e71deSHaik Aftandilian static int suspend_debug_flag = 0;
141*023e71deSHaik Aftandilian 
142*023e71deSHaik Aftandilian #define	DBG_PROM		\
143*023e71deSHaik Aftandilian if (suspend_debug_flag)		\
144*023e71deSHaik Aftandilian 	prom_printf
145*023e71deSHaik Aftandilian 
146*023e71deSHaik Aftandilian #define	DBG			\
147*023e71deSHaik Aftandilian if (suspend_debug_flag)		\
148*023e71deSHaik Aftandilian 	suspend_debug
149*023e71deSHaik Aftandilian 
150*023e71deSHaik Aftandilian static void
151*023e71deSHaik Aftandilian suspend_debug(const char *fmt, ...)
152*023e71deSHaik Aftandilian {
153*023e71deSHaik Aftandilian 	char	buf[512];
154*023e71deSHaik Aftandilian 	va_list	ap;
155*023e71deSHaik Aftandilian 
156*023e71deSHaik Aftandilian 	va_start(ap, fmt);
157*023e71deSHaik Aftandilian 	(void) vsprintf(buf, fmt, ap);
158*023e71deSHaik Aftandilian 	va_end(ap);
159*023e71deSHaik Aftandilian 
160*023e71deSHaik Aftandilian 	cmn_err(CE_NOTE, "%s", buf);
161*023e71deSHaik Aftandilian }
162*023e71deSHaik Aftandilian 
163*023e71deSHaik Aftandilian #else /* DEBUG */
164*023e71deSHaik Aftandilian 
165*023e71deSHaik Aftandilian #define	DBG_PROM
166*023e71deSHaik Aftandilian #define	DBG
167*023e71deSHaik Aftandilian 
168*023e71deSHaik Aftandilian #endif /* DEBUG */
169*023e71deSHaik Aftandilian 
170*023e71deSHaik Aftandilian /*
171*023e71deSHaik Aftandilian  * Return true if the HV supports OS suspend and if suspend has not been
172*023e71deSHaik Aftandilian  * disabled on this platform.
173*023e71deSHaik Aftandilian  */
174*023e71deSHaik Aftandilian boolean_t
175*023e71deSHaik Aftandilian suspend_supported(void)
176*023e71deSHaik Aftandilian {
177*023e71deSHaik Aftandilian 	uint64_t major, minor;
178*023e71deSHaik Aftandilian 
179*023e71deSHaik Aftandilian 	if (suspend_disabled)
180*023e71deSHaik Aftandilian 		return (B_FALSE);
181*023e71deSHaik Aftandilian 
182*023e71deSHaik Aftandilian 	if (hsvc_version(HSVC_GROUP_CORE, &major, &minor) != 0)
183*023e71deSHaik Aftandilian 		return (B_FALSE);
184*023e71deSHaik Aftandilian 
185*023e71deSHaik Aftandilian 	return ((major == SUSPEND_CORE_MAJOR && minor >= SUSPEND_CORE_MINOR) ||
186*023e71deSHaik Aftandilian 	    (major > SUSPEND_CORE_MAJOR));
187*023e71deSHaik Aftandilian }
188*023e71deSHaik Aftandilian 
189*023e71deSHaik Aftandilian /*
190*023e71deSHaik Aftandilian  * Given a source tick and stick value, set the tick and stick offsets such
191*023e71deSHaik Aftandilian  * that the (current physical register value + offset == source value).
192*023e71deSHaik Aftandilian  */
193*023e71deSHaik Aftandilian static void
194*023e71deSHaik Aftandilian set_tick_offsets(uint64_t source_tick, uint64_t source_stick)
195*023e71deSHaik Aftandilian {
196*023e71deSHaik Aftandilian 	uint64_t target_tick;
197*023e71deSHaik Aftandilian 	uint64_t target_stick;
198*023e71deSHaik Aftandilian 
199*023e71deSHaik Aftandilian 	native_tick_offset = 0;
200*023e71deSHaik Aftandilian 	native_stick_offset = 0;
201*023e71deSHaik Aftandilian 
202*023e71deSHaik Aftandilian 	target_tick = gettick_counter();	/* returns %tick */
203*023e71deSHaik Aftandilian 	target_stick = gettick();		/* returns %stick */
204*023e71deSHaik Aftandilian 
205*023e71deSHaik Aftandilian 	native_tick_offset = source_tick - target_tick;
206*023e71deSHaik Aftandilian 	native_stick_offset = source_stick - target_stick;
207*023e71deSHaik Aftandilian }
208*023e71deSHaik Aftandilian 
209*023e71deSHaik Aftandilian /*
210*023e71deSHaik Aftandilian  * Set the {tick,stick}.NPT field to 1 on this CPU.
211*023e71deSHaik Aftandilian  */
212*023e71deSHaik Aftandilian static void
213*023e71deSHaik Aftandilian enable_tick_stick_npt(void)
214*023e71deSHaik Aftandilian {
215*023e71deSHaik Aftandilian 	hv_stick_set_npt(1);
216*023e71deSHaik Aftandilian 	hv_tick_set_npt(1);
217*023e71deSHaik Aftandilian }
218*023e71deSHaik Aftandilian 
219*023e71deSHaik Aftandilian /*
220*023e71deSHaik Aftandilian  * Synchronize a CPU's {tick,stick}.NPT fields with the current state
221*023e71deSHaik Aftandilian  * of the system. This is used when a CPU is DR'd into the system.
222*023e71deSHaik Aftandilian  */
223*023e71deSHaik Aftandilian void
224*023e71deSHaik Aftandilian suspend_sync_tick_stick_npt(void)
225*023e71deSHaik Aftandilian {
226*023e71deSHaik Aftandilian 	if (tick_stick_emulation_active) {
227*023e71deSHaik Aftandilian 		DBG("enabling {%%tick/%%stick}.NPT on CPU 0x%x", CPU->cpu_id);
228*023e71deSHaik Aftandilian 		hv_stick_set_npt(1);
229*023e71deSHaik Aftandilian 		hv_tick_set_npt(1);
230*023e71deSHaik Aftandilian 	} else {
231*023e71deSHaik Aftandilian 		ASSERT(gettick_npt() == 0);
232*023e71deSHaik Aftandilian 		ASSERT(getstick_npt() == 0);
233*023e71deSHaik Aftandilian 	}
234*023e71deSHaik Aftandilian }
235*023e71deSHaik Aftandilian 
236*023e71deSHaik Aftandilian /*
237*023e71deSHaik Aftandilian  * Obtain an updated MD from the hypervisor and update cpunodes, CPU HW
238*023e71deSHaik Aftandilian  * sharing data structures, and processor groups.
239*023e71deSHaik Aftandilian  */
240*023e71deSHaik Aftandilian static void
241*023e71deSHaik Aftandilian update_cpu_mappings(void)
242*023e71deSHaik Aftandilian {
243*023e71deSHaik Aftandilian 	md_t		*mdp;
244*023e71deSHaik Aftandilian 	processorid_t	id;
245*023e71deSHaik Aftandilian 	cpu_t		*cp;
246*023e71deSHaik Aftandilian 	int		rv;
247*023e71deSHaik Aftandilian 	cpu_pg_t	*pgps[NCPU];
248*023e71deSHaik Aftandilian 
249*023e71deSHaik Aftandilian 	/* Download the latest MD */
250*023e71deSHaik Aftandilian 	if ((rv = mach_descrip_update()) != 0) {
251*023e71deSHaik Aftandilian 		DBG("suspend: mach_descrip_update error: %d", rv);
252*023e71deSHaik Aftandilian 		return;
253*023e71deSHaik Aftandilian 	}
254*023e71deSHaik Aftandilian 
255*023e71deSHaik Aftandilian 	if ((mdp = md_get_handle()) == NULL) {
256*023e71deSHaik Aftandilian 		DBG("suspend: md_get_handle failed");
257*023e71deSHaik Aftandilian 		return;
258*023e71deSHaik Aftandilian 	}
259*023e71deSHaik Aftandilian 
260*023e71deSHaik Aftandilian 	DBG("suspend: updating CPU mappings");
261*023e71deSHaik Aftandilian 
262*023e71deSHaik Aftandilian 	mutex_enter(&cpu_lock);
263*023e71deSHaik Aftandilian 
264*023e71deSHaik Aftandilian 	setup_chip_mappings(mdp);
265*023e71deSHaik Aftandilian 	setup_exec_unit_mappings(mdp);
266*023e71deSHaik Aftandilian 	for (id = 0; id < NCPU; id++) {
267*023e71deSHaik Aftandilian 		if ((cp = cpu_get(id)) == NULL)
268*023e71deSHaik Aftandilian 			continue;
269*023e71deSHaik Aftandilian 		cpu_map_exec_units(cp);
270*023e71deSHaik Aftandilian 	}
271*023e71deSHaik Aftandilian 
272*023e71deSHaik Aftandilian 	/*
273*023e71deSHaik Aftandilian 	 * Re-calculate processor groups.
274*023e71deSHaik Aftandilian 	 *
275*023e71deSHaik Aftandilian 	 * First tear down all PG information before adding any new PG
276*023e71deSHaik Aftandilian 	 * information derived from the MD we just downloaded. We must
277*023e71deSHaik Aftandilian 	 * call pg_cpu_inactive and pg_cpu_active with CPUs paused and
278*023e71deSHaik Aftandilian 	 * we want to minimize the number of times pause_cpus is called.
279*023e71deSHaik Aftandilian 	 * Inactivating all CPUs would leave PGs without any active CPUs,
280*023e71deSHaik Aftandilian 	 * so while CPUs are paused, call pg_cpu_inactive and swap in the
281*023e71deSHaik Aftandilian 	 * bootstrap PG structure saving the original PG structure to be
282*023e71deSHaik Aftandilian 	 * fini'd afterwards. This prevents the dispatcher from encountering
283*023e71deSHaik Aftandilian 	 * PGs in which all CPUs are inactive.
284*023e71deSHaik Aftandilian 	 */
285*023e71deSHaik Aftandilian 	pause_cpus(NULL);
286*023e71deSHaik Aftandilian 	for (id = 0; id < NCPU; id++) {
287*023e71deSHaik Aftandilian 		if ((cp = cpu_get(id)) == NULL)
288*023e71deSHaik Aftandilian 			continue;
289*023e71deSHaik Aftandilian 		pg_cpu_inactive(cp);
290*023e71deSHaik Aftandilian 		pgps[id] = cp->cpu_pg;
291*023e71deSHaik Aftandilian 		pg_cpu_bootstrap(cp);
292*023e71deSHaik Aftandilian 	}
293*023e71deSHaik Aftandilian 	start_cpus();
294*023e71deSHaik Aftandilian 
295*023e71deSHaik Aftandilian 	/*
296*023e71deSHaik Aftandilian 	 * pg_cpu_fini* and pg_cpu_init* must be called while CPUs are
297*023e71deSHaik Aftandilian 	 * not paused. Use two separate loops here so that we do not
298*023e71deSHaik Aftandilian 	 * initialize PG data for CPUs until all the old PG data structures
299*023e71deSHaik Aftandilian 	 * are torn down.
300*023e71deSHaik Aftandilian 	 */
301*023e71deSHaik Aftandilian 	for (id = 0; id < NCPU; id++) {
302*023e71deSHaik Aftandilian 		if ((cp = cpu_get(id)) == NULL)
303*023e71deSHaik Aftandilian 			continue;
304*023e71deSHaik Aftandilian 		pg_cpu_fini(cp, pgps[id]);
305*023e71deSHaik Aftandilian 	}
306*023e71deSHaik Aftandilian 
307*023e71deSHaik Aftandilian 	/*
308*023e71deSHaik Aftandilian 	 * Initialize PG data for each CPU, but leave the bootstrapped
309*023e71deSHaik Aftandilian 	 * PG structure in place to avoid running with any PGs containing
310*023e71deSHaik Aftandilian 	 * nothing but inactive CPUs.
311*023e71deSHaik Aftandilian 	 */
312*023e71deSHaik Aftandilian 	for (id = 0; id < NCPU; id++) {
313*023e71deSHaik Aftandilian 		if ((cp = cpu_get(id)) == NULL)
314*023e71deSHaik Aftandilian 			continue;
315*023e71deSHaik Aftandilian 		pgps[id] = pg_cpu_init(cp, B_TRUE);
316*023e71deSHaik Aftandilian 	}
317*023e71deSHaik Aftandilian 
318*023e71deSHaik Aftandilian 	/*
319*023e71deSHaik Aftandilian 	 * Now that PG data has been initialized for all CPUs in the
320*023e71deSHaik Aftandilian 	 * system, replace the bootstrapped PG structure with the
321*023e71deSHaik Aftandilian 	 * initialized PG structure and call pg_cpu_active for each CPU.
322*023e71deSHaik Aftandilian 	 */
323*023e71deSHaik Aftandilian 	pause_cpus(NULL);
324*023e71deSHaik Aftandilian 	for (id = 0; id < NCPU; id++) {
325*023e71deSHaik Aftandilian 		if ((cp = cpu_get(id)) == NULL)
326*023e71deSHaik Aftandilian 			continue;
327*023e71deSHaik Aftandilian 		cp->cpu_pg = pgps[id];
328*023e71deSHaik Aftandilian 		pg_cpu_active(cp);
329*023e71deSHaik Aftandilian 	}
330*023e71deSHaik Aftandilian 	start_cpus();
331*023e71deSHaik Aftandilian 
332*023e71deSHaik Aftandilian 	mutex_exit(&cpu_lock);
333*023e71deSHaik Aftandilian 
334*023e71deSHaik Aftandilian 	(void) md_fini_handle(mdp);
335*023e71deSHaik Aftandilian }
336*023e71deSHaik Aftandilian 
337*023e71deSHaik Aftandilian /*
338*023e71deSHaik Aftandilian  * Wrapper for the Sun Cluster error decoding function.
339*023e71deSHaik Aftandilian  */
340*023e71deSHaik Aftandilian static int
341*023e71deSHaik Aftandilian cluster_error_decode(int error, char *error_reason, size_t max_reason_len)
342*023e71deSHaik Aftandilian {
343*023e71deSHaik Aftandilian 	const char	*decoded;
344*023e71deSHaik Aftandilian 	size_t		decoded_len;
345*023e71deSHaik Aftandilian 
346*023e71deSHaik Aftandilian 	ASSERT(error_reason != NULL);
347*023e71deSHaik Aftandilian 	ASSERT(max_reason_len > 0);
348*023e71deSHaik Aftandilian 
349*023e71deSHaik Aftandilian 	max_reason_len = MIN(max_reason_len, SC_FAIL_STR_MAX);
350*023e71deSHaik Aftandilian 
351*023e71deSHaik Aftandilian 	if (cl_suspend_error_decode == NULL)
352*023e71deSHaik Aftandilian 		return (-1);
353*023e71deSHaik Aftandilian 
354*023e71deSHaik Aftandilian 	if ((decoded = (*cl_suspend_error_decode)(error)) == NULL)
355*023e71deSHaik Aftandilian 		return (-1);
356*023e71deSHaik Aftandilian 
357*023e71deSHaik Aftandilian 	/* Get number of non-NULL bytes */
358*023e71deSHaik Aftandilian 	if ((decoded_len = strnlen(decoded, max_reason_len - 1)) == 0)
359*023e71deSHaik Aftandilian 		return (-1);
360*023e71deSHaik Aftandilian 
361*023e71deSHaik Aftandilian 	bcopy(decoded, error_reason, decoded_len);
362*023e71deSHaik Aftandilian 
363*023e71deSHaik Aftandilian 	/*
364*023e71deSHaik Aftandilian 	 * The error string returned from cl_suspend_error_decode
365*023e71deSHaik Aftandilian 	 * should be NULL-terminated, but set the terminator here
366*023e71deSHaik Aftandilian 	 * because we only copied non-NULL bytes. If the decoded
367*023e71deSHaik Aftandilian 	 * string was not NULL-terminated, this guarantees that
368*023e71deSHaik Aftandilian 	 * error_reason will be.
369*023e71deSHaik Aftandilian 	 */
370*023e71deSHaik Aftandilian 	error_reason[decoded_len] = '\0';
371*023e71deSHaik Aftandilian 
372*023e71deSHaik Aftandilian 	return (0);
373*023e71deSHaik Aftandilian }
374*023e71deSHaik Aftandilian 
375*023e71deSHaik Aftandilian /*
376*023e71deSHaik Aftandilian  * Wrapper for the Sun Cluster pre-suspend callback.
377*023e71deSHaik Aftandilian  */
378*023e71deSHaik Aftandilian static int
379*023e71deSHaik Aftandilian cluster_pre_wrapper(char *error_reason, size_t max_reason_len)
380*023e71deSHaik Aftandilian {
381*023e71deSHaik Aftandilian 	int rv = 0;
382*023e71deSHaik Aftandilian 
383*023e71deSHaik Aftandilian 	if (cl_suspend_pre_callback != NULL) {
384*023e71deSHaik Aftandilian 		rv = (*cl_suspend_pre_callback)();
385*023e71deSHaik Aftandilian 		DBG("suspend: cl_suspend_pre_callback returned %d", rv);
386*023e71deSHaik Aftandilian 		if (rv != 0 && error_reason != NULL && max_reason_len > 0) {
387*023e71deSHaik Aftandilian 			if (cluster_error_decode(rv, error_reason,
388*023e71deSHaik Aftandilian 			    max_reason_len)) {
389*023e71deSHaik Aftandilian 				(void) snprintf(error_reason, max_reason_len,
390*023e71deSHaik Aftandilian 				    SC_PRE_FAIL_STR_FMT, rv);
391*023e71deSHaik Aftandilian 			}
392*023e71deSHaik Aftandilian 		}
393*023e71deSHaik Aftandilian 	}
394*023e71deSHaik Aftandilian 
395*023e71deSHaik Aftandilian 	return (rv);
396*023e71deSHaik Aftandilian }
397*023e71deSHaik Aftandilian 
398*023e71deSHaik Aftandilian /*
399*023e71deSHaik Aftandilian  * Wrapper for the Sun Cluster post-suspend callback.
400*023e71deSHaik Aftandilian  */
401*023e71deSHaik Aftandilian static int
402*023e71deSHaik Aftandilian cluster_post_wrapper(char *error_reason, size_t max_reason_len)
403*023e71deSHaik Aftandilian {
404*023e71deSHaik Aftandilian 	int rv = 0;
405*023e71deSHaik Aftandilian 
406*023e71deSHaik Aftandilian 	if (cl_suspend_post_callback != NULL) {
407*023e71deSHaik Aftandilian 		rv = (*cl_suspend_post_callback)();
408*023e71deSHaik Aftandilian 		DBG("suspend: cl_suspend_post_callback returned %d", rv);
409*023e71deSHaik Aftandilian 		if (rv != 0 && error_reason != NULL && max_reason_len > 0) {
410*023e71deSHaik Aftandilian 			if (cluster_error_decode(rv, error_reason,
411*023e71deSHaik Aftandilian 			    max_reason_len)) {
412*023e71deSHaik Aftandilian 				(void) snprintf(error_reason,
413*023e71deSHaik Aftandilian 				    max_reason_len, SC_POST_FAIL_STR_FMT, rv);
414*023e71deSHaik Aftandilian 			}
415*023e71deSHaik Aftandilian 		}
416*023e71deSHaik Aftandilian 	}
417*023e71deSHaik Aftandilian 
418*023e71deSHaik Aftandilian 	return (rv);
419*023e71deSHaik Aftandilian }
420*023e71deSHaik Aftandilian 
421*023e71deSHaik Aftandilian /*
422*023e71deSHaik Aftandilian  * Execute pre-suspend callbacks preparing the system for a suspend operation.
423*023e71deSHaik Aftandilian  * Returns zero on success, non-zero on failure. Sets the recovered argument
424*023e71deSHaik Aftandilian  * to indicate whether or not callbacks could be undone in the event of a
425*023e71deSHaik Aftandilian  * failure--if callbacks were successfully undone, *recovered is set to B_TRUE,
426*023e71deSHaik Aftandilian  * otherwise *recovered is set to B_FALSE. Must be called successfully before
427*023e71deSHaik Aftandilian  * suspend_start can be called. Callers should first call suspend_support to
428*023e71deSHaik Aftandilian  * determine if OS suspend is supported.
429*023e71deSHaik Aftandilian  */
430*023e71deSHaik Aftandilian int
431*023e71deSHaik Aftandilian suspend_pre(char *error_reason, size_t max_reason_len, boolean_t *recovered)
432*023e71deSHaik Aftandilian {
433*023e71deSHaik Aftandilian 	int rv;
434*023e71deSHaik Aftandilian 
435*023e71deSHaik Aftandilian 	ASSERT(recovered != NULL);
436*023e71deSHaik Aftandilian 
437*023e71deSHaik Aftandilian 	/*
438*023e71deSHaik Aftandilian 	 * Return an error if suspend_pre is erreoneously called
439*023e71deSHaik Aftandilian 	 * when OS suspend is not supported.
440*023e71deSHaik Aftandilian 	 */
441*023e71deSHaik Aftandilian 	ASSERT(suspend_supported());
442*023e71deSHaik Aftandilian 	if (!suspend_supported()) {
443*023e71deSHaik Aftandilian 		DBG("suspend: suspend_pre called without suspend support");
444*023e71deSHaik Aftandilian 		*recovered = B_TRUE;
445*023e71deSHaik Aftandilian 		return (ENOTSUP);
446*023e71deSHaik Aftandilian 	}
447*023e71deSHaik Aftandilian 	DBG("suspend: %s", __func__);
448*023e71deSHaik Aftandilian 
449*023e71deSHaik Aftandilian 	rv = cluster_pre_wrapper(error_reason, max_reason_len);
450*023e71deSHaik Aftandilian 
451*023e71deSHaik Aftandilian 	/*
452*023e71deSHaik Aftandilian 	 * At present, only one pre-suspend operation exists.
453*023e71deSHaik Aftandilian 	 * If it fails, no recovery needs to be done.
454*023e71deSHaik Aftandilian 	 */
455*023e71deSHaik Aftandilian 	if (rv != 0 && recovered != NULL)
456*023e71deSHaik Aftandilian 		*recovered = B_TRUE;
457*023e71deSHaik Aftandilian 
458*023e71deSHaik Aftandilian 	return (rv);
459*023e71deSHaik Aftandilian }
460*023e71deSHaik Aftandilian 
461*023e71deSHaik Aftandilian /*
462*023e71deSHaik Aftandilian  * Execute post-suspend callbacks. Returns zero on success, non-zero on
463*023e71deSHaik Aftandilian  * failure. Must be called after suspend_start is called, regardless of
464*023e71deSHaik Aftandilian  * whether or not suspend_start is successful.
465*023e71deSHaik Aftandilian  */
466*023e71deSHaik Aftandilian int
467*023e71deSHaik Aftandilian suspend_post(char *error_reason, size_t max_reason_len)
468*023e71deSHaik Aftandilian {
469*023e71deSHaik Aftandilian 	ASSERT(suspend_supported());
470*023e71deSHaik Aftandilian 	DBG("suspend: %s", __func__);
471*023e71deSHaik Aftandilian 	return (cluster_post_wrapper(error_reason, max_reason_len));
472*023e71deSHaik Aftandilian }
473*023e71deSHaik Aftandilian 
474*023e71deSHaik Aftandilian /*
475*023e71deSHaik Aftandilian  * Suspends the OS by pausing CPUs and calling into the HV to initiate
476*023e71deSHaik Aftandilian  * the suspend. When the HV routine hv_guest_suspend returns, the system
477*023e71deSHaik Aftandilian  * will be resumed. Must be called after a successful call to suspend_pre.
478*023e71deSHaik Aftandilian  * suspend_post must be called after suspend_start, whether or not
479*023e71deSHaik Aftandilian  * suspend_start returns an error.
480*023e71deSHaik Aftandilian  */
481*023e71deSHaik Aftandilian /*ARGSUSED*/
482*023e71deSHaik Aftandilian int
483*023e71deSHaik Aftandilian suspend_start(char *error_reason, size_t max_reason_len)
484*023e71deSHaik Aftandilian {
485*023e71deSHaik Aftandilian 	uint64_t	source_tick;
486*023e71deSHaik Aftandilian 	uint64_t	source_stick;
487*023e71deSHaik Aftandilian 	uint64_t	rv;
488*023e71deSHaik Aftandilian 	timestruc_t	source_tod;
489*023e71deSHaik Aftandilian 	int		spl;
490*023e71deSHaik Aftandilian 
491*023e71deSHaik Aftandilian 	ASSERT(suspend_supported());
492*023e71deSHaik Aftandilian 	DBG("suspend: %s", __func__);
493*023e71deSHaik Aftandilian 
494*023e71deSHaik Aftandilian 	mutex_enter(&cpu_lock);
495*023e71deSHaik Aftandilian 
496*023e71deSHaik Aftandilian 	/* Suspend the watchdog */
497*023e71deSHaik Aftandilian 	watchdog_suspend();
498*023e71deSHaik Aftandilian 
499*023e71deSHaik Aftandilian 	/* Record the TOD */
500*023e71deSHaik Aftandilian 	mutex_enter(&tod_lock);
501*023e71deSHaik Aftandilian 	source_tod = tod_get();
502*023e71deSHaik Aftandilian 	mutex_exit(&tod_lock);
503*023e71deSHaik Aftandilian 
504*023e71deSHaik Aftandilian 	/* Pause all other CPUs */
505*023e71deSHaik Aftandilian 	pause_cpus(NULL);
506*023e71deSHaik Aftandilian 	DBG_PROM("suspend: CPUs paused\n");
507*023e71deSHaik Aftandilian 
508*023e71deSHaik Aftandilian 	/* Suspend cyclics and disable interrupts */
509*023e71deSHaik Aftandilian 	cyclic_suspend();
510*023e71deSHaik Aftandilian 	DBG_PROM("suspend: cyclics suspended\n");
511*023e71deSHaik Aftandilian 	spl = spl8();
512*023e71deSHaik Aftandilian 
513*023e71deSHaik Aftandilian 	source_tick = gettick_counter();
514*023e71deSHaik Aftandilian 	source_stick = gettick();
515*023e71deSHaik Aftandilian 	DBG_PROM("suspend: source_tick: 0x%lx\n", source_tick);
516*023e71deSHaik Aftandilian 	DBG_PROM("suspend: source_stick: 0x%lx\n", source_stick);
517*023e71deSHaik Aftandilian 
518*023e71deSHaik Aftandilian 	/*
519*023e71deSHaik Aftandilian 	 * Call into the HV to initiate the suspend.
520*023e71deSHaik Aftandilian 	 * hv_guest_suspend() returns after the guest has been
521*023e71deSHaik Aftandilian 	 * resumed or if the suspend operation failed or was
522*023e71deSHaik Aftandilian 	 * cancelled. After a successful suspend, the %tick and
523*023e71deSHaik Aftandilian 	 * %stick registers may have changed by an amount that is
524*023e71deSHaik Aftandilian 	 * not proportional to the amount of time that has passed.
525*023e71deSHaik Aftandilian 	 * They may have jumped forwards or backwards. This jump
526*023e71deSHaik Aftandilian 	 * must be uniform across all CPUs and we operate under
527*023e71deSHaik Aftandilian 	 * the assumption that it is (maintaining two global offset
528*023e71deSHaik Aftandilian 	 * variables--one for %tick and one for %stick.)
529*023e71deSHaik Aftandilian 	 */
530*023e71deSHaik Aftandilian 	DBG_PROM("suspend: suspending... \n");
531*023e71deSHaik Aftandilian 	rv = hv_guest_suspend();
532*023e71deSHaik Aftandilian 	if (rv != 0) {
533*023e71deSHaik Aftandilian 		splx(spl);
534*023e71deSHaik Aftandilian 		cyclic_resume();
535*023e71deSHaik Aftandilian 		start_cpus();
536*023e71deSHaik Aftandilian 		watchdog_resume();
537*023e71deSHaik Aftandilian 		mutex_exit(&cpu_lock);
538*023e71deSHaik Aftandilian 		DBG("suspend: failed, rv: %ld\n", rv);
539*023e71deSHaik Aftandilian 		return (rv);
540*023e71deSHaik Aftandilian 	}
541*023e71deSHaik Aftandilian 
542*023e71deSHaik Aftandilian 	/* Update the global tick and stick offsets */
543*023e71deSHaik Aftandilian 	set_tick_offsets(source_tick, source_stick);
544*023e71deSHaik Aftandilian 
545*023e71deSHaik Aftandilian 	/* Ensure new offsets are globally visible before resuming CPUs */
546*023e71deSHaik Aftandilian 	membar_sync();
547*023e71deSHaik Aftandilian 
548*023e71deSHaik Aftandilian 	/* Enable interrupts */
549*023e71deSHaik Aftandilian 	splx(spl);
550*023e71deSHaik Aftandilian 
551*023e71deSHaik Aftandilian 	/* Set the {%tick,%stick}.NPT bits on all CPUs */
552*023e71deSHaik Aftandilian 	if (enable_user_tick_stick_emulation) {
553*023e71deSHaik Aftandilian 		xc_all((xcfunc_t *)enable_tick_stick_npt, NULL, NULL);
554*023e71deSHaik Aftandilian 		xt_sync(cpu_ready_set);
555*023e71deSHaik Aftandilian 		ASSERT(gettick_npt() != 0);
556*023e71deSHaik Aftandilian 		ASSERT(getstick_npt() != 0);
557*023e71deSHaik Aftandilian 	}
558*023e71deSHaik Aftandilian 
559*023e71deSHaik Aftandilian 	/* If emulation is enabled, but not currently active, enable it */
560*023e71deSHaik Aftandilian 	if (enable_user_tick_stick_emulation && !tick_stick_emulation_active) {
561*023e71deSHaik Aftandilian 		tick_stick_emulation_active = B_TRUE;
562*023e71deSHaik Aftandilian 	}
563*023e71deSHaik Aftandilian 
564*023e71deSHaik Aftandilian 	/* Resume cyclics, unpause CPUs */
565*023e71deSHaik Aftandilian 	cyclic_resume();
566*023e71deSHaik Aftandilian 	start_cpus();
567*023e71deSHaik Aftandilian 
568*023e71deSHaik Aftandilian 	/* Set the TOD */
569*023e71deSHaik Aftandilian 	mutex_enter(&tod_lock);
570*023e71deSHaik Aftandilian 	tod_set(source_tod);
571*023e71deSHaik Aftandilian 	mutex_exit(&tod_lock);
572*023e71deSHaik Aftandilian 
573*023e71deSHaik Aftandilian 	/* Re-enable the watchdog */
574*023e71deSHaik Aftandilian 	watchdog_resume();
575*023e71deSHaik Aftandilian 
576*023e71deSHaik Aftandilian 	mutex_exit(&cpu_lock);
577*023e71deSHaik Aftandilian 
578*023e71deSHaik Aftandilian 	/* Get new MD, update CPU mappings/relationships */
579*023e71deSHaik Aftandilian 	if (suspend_update_cpu_mappings)
580*023e71deSHaik Aftandilian 		update_cpu_mappings();
581*023e71deSHaik Aftandilian 
582*023e71deSHaik Aftandilian 	DBG("suspend: target tick: 0x%lx", gettick_counter());
583*023e71deSHaik Aftandilian 	DBG("suspend: target stick: 0x%llx", gettick());
584*023e71deSHaik Aftandilian 	DBG("suspend: user %%tick/%%stick emulation is %d",
585*023e71deSHaik Aftandilian 	    tick_stick_emulation_active);
586*023e71deSHaik Aftandilian 	DBG("suspend: finished");
587*023e71deSHaik Aftandilian 
588*023e71deSHaik Aftandilian 	return (0);
589*023e71deSHaik Aftandilian }
590