1*023e71deSHaik Aftandilian /* 2*023e71deSHaik Aftandilian * CDDL HEADER START 3*023e71deSHaik Aftandilian * 4*023e71deSHaik Aftandilian * The contents of this file are subject to the terms of the 5*023e71deSHaik Aftandilian * Common Development and Distribution License (the "License"). 6*023e71deSHaik Aftandilian * You may not use this file except in compliance with the License. 7*023e71deSHaik Aftandilian * 8*023e71deSHaik Aftandilian * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*023e71deSHaik Aftandilian * or http://www.opensolaris.org/os/licensing. 10*023e71deSHaik Aftandilian * See the License for the specific language governing permissions 11*023e71deSHaik Aftandilian * and limitations under the License. 12*023e71deSHaik Aftandilian * 13*023e71deSHaik Aftandilian * When distributing Covered Code, include this CDDL HEADER in each 14*023e71deSHaik Aftandilian * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*023e71deSHaik Aftandilian * If applicable, add the following below this CDDL HEADER, with the 16*023e71deSHaik Aftandilian * fields enclosed by brackets "[]" replaced with your own identifying 17*023e71deSHaik Aftandilian * information: Portions Copyright [yyyy] [name of copyright owner] 18*023e71deSHaik Aftandilian * 19*023e71deSHaik Aftandilian * CDDL HEADER END 20*023e71deSHaik Aftandilian */ 21*023e71deSHaik Aftandilian /* 22*023e71deSHaik Aftandilian * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23*023e71deSHaik Aftandilian * Use is subject to license terms. 24*023e71deSHaik Aftandilian */ 25*023e71deSHaik Aftandilian 26*023e71deSHaik Aftandilian #include <sys/mutex.h> 27*023e71deSHaik Aftandilian #include <sys/cpuvar.h> 28*023e71deSHaik Aftandilian #include <sys/cyclic.h> 29*023e71deSHaik Aftandilian #include <sys/disp.h> 30*023e71deSHaik Aftandilian #include <sys/ddi.h> 31*023e71deSHaik Aftandilian #include <sys/wdt.h> 32*023e71deSHaik Aftandilian #include <sys/callb.h> 33*023e71deSHaik Aftandilian #include <sys/cmn_err.h> 34*023e71deSHaik Aftandilian #include <sys/hypervisor_api.h> 35*023e71deSHaik Aftandilian #include <sys/membar.h> 36*023e71deSHaik Aftandilian #include <sys/x_call.h> 37*023e71deSHaik Aftandilian #include <sys/promif.h> 38*023e71deSHaik Aftandilian #include <sys/systm.h> 39*023e71deSHaik Aftandilian #include <sys/mach_descrip.h> 40*023e71deSHaik Aftandilian #include <sys/cpu_module.h> 41*023e71deSHaik Aftandilian #include <sys/pg.h> 42*023e71deSHaik Aftandilian #include <sys/lgrp.h> 43*023e71deSHaik Aftandilian #include <sys/sysmacros.h> 44*023e71deSHaik Aftandilian #include <sys/sunddi.h> 45*023e71deSHaik Aftandilian #include <sys/cpupart.h> 46*023e71deSHaik Aftandilian #include <sys/hsvc.h> 47*023e71deSHaik Aftandilian 48*023e71deSHaik Aftandilian /* 49*023e71deSHaik Aftandilian * Sun4v OS Suspend 50*023e71deSHaik Aftandilian * 51*023e71deSHaik Aftandilian * Provides a means to suspend a sun4v guest domain by pausing CPUs and then 52*023e71deSHaik Aftandilian * calling into the HV to initiate a suspension. Suspension is sequenced 53*023e71deSHaik Aftandilian * externally by calling suspend_pre, suspend_start, and suspend_post. 54*023e71deSHaik Aftandilian * suspend_pre and suspend_post are meant to perform any special operations 55*023e71deSHaik Aftandilian * that should be done before or after a suspend/resume operation. e.g., 56*023e71deSHaik Aftandilian * callbacks to cluster software to disable heartbeat monitoring before the 57*023e71deSHaik Aftandilian * system is suspended. suspend_start prepares kernel services to be suspended 58*023e71deSHaik Aftandilian * and then suspends the domain by calling hv_guest_suspend. 59*023e71deSHaik Aftandilian * 60*023e71deSHaik Aftandilian * Special Handling for %tick and %stick Registers 61*023e71deSHaik Aftandilian * 62*023e71deSHaik Aftandilian * After a suspend/resume operation, the %tick and %stick registers may have 63*023e71deSHaik Aftandilian * jumped forwards or backwards. The delta is assumed to be consistent across 64*023e71deSHaik Aftandilian * all CPUs, within the negligible level of %tick and %stick variation 65*023e71deSHaik Aftandilian * acceptable on a cold boot. In order to maintain increasing %tick and %stick 66*023e71deSHaik Aftandilian * counter values without exposing large positive or negative jumps to kernel 67*023e71deSHaik Aftandilian * or user code, a %tick and %stick offset is used. Kernel reads of these 68*023e71deSHaik Aftandilian * counters return the sum of the hardware register counter and offset 69*023e71deSHaik Aftandilian * variable. After a suspend/resume operation, user reads of %tick or %stick 70*023e71deSHaik Aftandilian * are emulated. Suspend code enables emulation by setting the 71*023e71deSHaik Aftandilian * %{tick,stick}.NPT fields which trigger a privileged instruction access 72*023e71deSHaik Aftandilian * trap whenever the registers are read from user mode. If emulation has been 73*023e71deSHaik Aftandilian * enabled, the trap handler emulates the instruction. Emulation is only 74*023e71deSHaik Aftandilian * enabled during a successful suspend/resume operation. When emulation is 75*023e71deSHaik Aftandilian * enabled, CPUs that are DR'd into the system will have their 76*023e71deSHaik Aftandilian * %{tick,stick}.NPT bits set to 1 as well. 77*023e71deSHaik Aftandilian */ 78*023e71deSHaik Aftandilian 79*023e71deSHaik Aftandilian extern u_longlong_t gettick(void); /* returns %stick */ 80*023e71deSHaik Aftandilian extern uint64_t gettick_counter(void); /* returns %tick */ 81*023e71deSHaik Aftandilian extern uint64_t gettick_npt(void); 82*023e71deSHaik Aftandilian extern uint64_t getstick_npt(void); 83*023e71deSHaik Aftandilian extern int mach_descrip_update(void); 84*023e71deSHaik Aftandilian extern cpuset_t cpu_ready_set; 85*023e71deSHaik Aftandilian extern uint64_t native_tick_offset; 86*023e71deSHaik Aftandilian extern uint64_t native_stick_offset; 87*023e71deSHaik Aftandilian 88*023e71deSHaik Aftandilian /* 89*023e71deSHaik Aftandilian * Global Sun Cluster pre/post callbacks. 90*023e71deSHaik Aftandilian */ 91*023e71deSHaik Aftandilian const char *(*cl_suspend_error_decode)(int); 92*023e71deSHaik Aftandilian int (*cl_suspend_pre_callback)(void); 93*023e71deSHaik Aftandilian int (*cl_suspend_post_callback)(void); 94*023e71deSHaik Aftandilian #define SC_PRE_FAIL_STR_FMT "Sun Cluster pre-suspend failure: %d" 95*023e71deSHaik Aftandilian #define SC_POST_FAIL_STR_FMT "Sun Cluster post-suspend failure: %d" 96*023e71deSHaik Aftandilian #define SC_FAIL_STR_MAX 256 97*023e71deSHaik Aftandilian 98*023e71deSHaik Aftandilian /* 99*023e71deSHaik Aftandilian * The minimum major and minor version of the HSVC_GROUP_CORE API group 100*023e71deSHaik Aftandilian * required in order to use OS suspend. 101*023e71deSHaik Aftandilian */ 102*023e71deSHaik Aftandilian #define SUSPEND_CORE_MAJOR 1 103*023e71deSHaik Aftandilian #define SUSPEND_CORE_MINOR 2 104*023e71deSHaik Aftandilian 105*023e71deSHaik Aftandilian /* 106*023e71deSHaik Aftandilian * By default, sun4v OS suspend is supported if the required HV version 107*023e71deSHaik Aftandilian * is present. suspend_disabled should be set on platforms that do not 108*023e71deSHaik Aftandilian * allow OS suspend regardless of whether or not the HV supports it. 109*023e71deSHaik Aftandilian * It can also be set in /etc/system. 110*023e71deSHaik Aftandilian */ 111*023e71deSHaik Aftandilian static int suspend_disabled = 0; 112*023e71deSHaik Aftandilian 113*023e71deSHaik Aftandilian /* 114*023e71deSHaik Aftandilian * Controls whether or not user-land tick and stick register emulation 115*023e71deSHaik Aftandilian * will be enabled following a successful suspend operation. 116*023e71deSHaik Aftandilian */ 117*023e71deSHaik Aftandilian static int enable_user_tick_stick_emulation = 1; 118*023e71deSHaik Aftandilian 119*023e71deSHaik Aftandilian /* 120*023e71deSHaik Aftandilian * Indicates whether or not tick and stick emulation is currently active. 121*023e71deSHaik Aftandilian * After a successful suspend operation, if emulation is enabled, this 122*023e71deSHaik Aftandilian * variable is set to B_TRUE. Global scope to allow emulation code to 123*023e71deSHaik Aftandilian * check if emulation is active. 124*023e71deSHaik Aftandilian */ 125*023e71deSHaik Aftandilian boolean_t tick_stick_emulation_active = B_FALSE; 126*023e71deSHaik Aftandilian 127*023e71deSHaik Aftandilian /* 128*023e71deSHaik Aftandilian * Controls whether or not MD information is refreshed after a 129*023e71deSHaik Aftandilian * successful suspend and resume. When non-zero, after a successful 130*023e71deSHaik Aftandilian * suspend and resume, the MD will be downloaded, cpunodes updated, 131*023e71deSHaik Aftandilian * and processor grouping information recalculated. 132*023e71deSHaik Aftandilian */ 133*023e71deSHaik Aftandilian static int suspend_update_cpu_mappings = 1; 134*023e71deSHaik Aftandilian 135*023e71deSHaik Aftandilian /* 136*023e71deSHaik Aftandilian * DBG and DBG_PROM() macro. 137*023e71deSHaik Aftandilian */ 138*023e71deSHaik Aftandilian #ifdef DEBUG 139*023e71deSHaik Aftandilian 140*023e71deSHaik Aftandilian static int suspend_debug_flag = 0; 141*023e71deSHaik Aftandilian 142*023e71deSHaik Aftandilian #define DBG_PROM \ 143*023e71deSHaik Aftandilian if (suspend_debug_flag) \ 144*023e71deSHaik Aftandilian prom_printf 145*023e71deSHaik Aftandilian 146*023e71deSHaik Aftandilian #define DBG \ 147*023e71deSHaik Aftandilian if (suspend_debug_flag) \ 148*023e71deSHaik Aftandilian suspend_debug 149*023e71deSHaik Aftandilian 150*023e71deSHaik Aftandilian static void 151*023e71deSHaik Aftandilian suspend_debug(const char *fmt, ...) 152*023e71deSHaik Aftandilian { 153*023e71deSHaik Aftandilian char buf[512]; 154*023e71deSHaik Aftandilian va_list ap; 155*023e71deSHaik Aftandilian 156*023e71deSHaik Aftandilian va_start(ap, fmt); 157*023e71deSHaik Aftandilian (void) vsprintf(buf, fmt, ap); 158*023e71deSHaik Aftandilian va_end(ap); 159*023e71deSHaik Aftandilian 160*023e71deSHaik Aftandilian cmn_err(CE_NOTE, "%s", buf); 161*023e71deSHaik Aftandilian } 162*023e71deSHaik Aftandilian 163*023e71deSHaik Aftandilian #else /* DEBUG */ 164*023e71deSHaik Aftandilian 165*023e71deSHaik Aftandilian #define DBG_PROM 166*023e71deSHaik Aftandilian #define DBG 167*023e71deSHaik Aftandilian 168*023e71deSHaik Aftandilian #endif /* DEBUG */ 169*023e71deSHaik Aftandilian 170*023e71deSHaik Aftandilian /* 171*023e71deSHaik Aftandilian * Return true if the HV supports OS suspend and if suspend has not been 172*023e71deSHaik Aftandilian * disabled on this platform. 173*023e71deSHaik Aftandilian */ 174*023e71deSHaik Aftandilian boolean_t 175*023e71deSHaik Aftandilian suspend_supported(void) 176*023e71deSHaik Aftandilian { 177*023e71deSHaik Aftandilian uint64_t major, minor; 178*023e71deSHaik Aftandilian 179*023e71deSHaik Aftandilian if (suspend_disabled) 180*023e71deSHaik Aftandilian return (B_FALSE); 181*023e71deSHaik Aftandilian 182*023e71deSHaik Aftandilian if (hsvc_version(HSVC_GROUP_CORE, &major, &minor) != 0) 183*023e71deSHaik Aftandilian return (B_FALSE); 184*023e71deSHaik Aftandilian 185*023e71deSHaik Aftandilian return ((major == SUSPEND_CORE_MAJOR && minor >= SUSPEND_CORE_MINOR) || 186*023e71deSHaik Aftandilian (major > SUSPEND_CORE_MAJOR)); 187*023e71deSHaik Aftandilian } 188*023e71deSHaik Aftandilian 189*023e71deSHaik Aftandilian /* 190*023e71deSHaik Aftandilian * Given a source tick and stick value, set the tick and stick offsets such 191*023e71deSHaik Aftandilian * that the (current physical register value + offset == source value). 192*023e71deSHaik Aftandilian */ 193*023e71deSHaik Aftandilian static void 194*023e71deSHaik Aftandilian set_tick_offsets(uint64_t source_tick, uint64_t source_stick) 195*023e71deSHaik Aftandilian { 196*023e71deSHaik Aftandilian uint64_t target_tick; 197*023e71deSHaik Aftandilian uint64_t target_stick; 198*023e71deSHaik Aftandilian 199*023e71deSHaik Aftandilian native_tick_offset = 0; 200*023e71deSHaik Aftandilian native_stick_offset = 0; 201*023e71deSHaik Aftandilian 202*023e71deSHaik Aftandilian target_tick = gettick_counter(); /* returns %tick */ 203*023e71deSHaik Aftandilian target_stick = gettick(); /* returns %stick */ 204*023e71deSHaik Aftandilian 205*023e71deSHaik Aftandilian native_tick_offset = source_tick - target_tick; 206*023e71deSHaik Aftandilian native_stick_offset = source_stick - target_stick; 207*023e71deSHaik Aftandilian } 208*023e71deSHaik Aftandilian 209*023e71deSHaik Aftandilian /* 210*023e71deSHaik Aftandilian * Set the {tick,stick}.NPT field to 1 on this CPU. 211*023e71deSHaik Aftandilian */ 212*023e71deSHaik Aftandilian static void 213*023e71deSHaik Aftandilian enable_tick_stick_npt(void) 214*023e71deSHaik Aftandilian { 215*023e71deSHaik Aftandilian hv_stick_set_npt(1); 216*023e71deSHaik Aftandilian hv_tick_set_npt(1); 217*023e71deSHaik Aftandilian } 218*023e71deSHaik Aftandilian 219*023e71deSHaik Aftandilian /* 220*023e71deSHaik Aftandilian * Synchronize a CPU's {tick,stick}.NPT fields with the current state 221*023e71deSHaik Aftandilian * of the system. This is used when a CPU is DR'd into the system. 222*023e71deSHaik Aftandilian */ 223*023e71deSHaik Aftandilian void 224*023e71deSHaik Aftandilian suspend_sync_tick_stick_npt(void) 225*023e71deSHaik Aftandilian { 226*023e71deSHaik Aftandilian if (tick_stick_emulation_active) { 227*023e71deSHaik Aftandilian DBG("enabling {%%tick/%%stick}.NPT on CPU 0x%x", CPU->cpu_id); 228*023e71deSHaik Aftandilian hv_stick_set_npt(1); 229*023e71deSHaik Aftandilian hv_tick_set_npt(1); 230*023e71deSHaik Aftandilian } else { 231*023e71deSHaik Aftandilian ASSERT(gettick_npt() == 0); 232*023e71deSHaik Aftandilian ASSERT(getstick_npt() == 0); 233*023e71deSHaik Aftandilian } 234*023e71deSHaik Aftandilian } 235*023e71deSHaik Aftandilian 236*023e71deSHaik Aftandilian /* 237*023e71deSHaik Aftandilian * Obtain an updated MD from the hypervisor and update cpunodes, CPU HW 238*023e71deSHaik Aftandilian * sharing data structures, and processor groups. 239*023e71deSHaik Aftandilian */ 240*023e71deSHaik Aftandilian static void 241*023e71deSHaik Aftandilian update_cpu_mappings(void) 242*023e71deSHaik Aftandilian { 243*023e71deSHaik Aftandilian md_t *mdp; 244*023e71deSHaik Aftandilian processorid_t id; 245*023e71deSHaik Aftandilian cpu_t *cp; 246*023e71deSHaik Aftandilian int rv; 247*023e71deSHaik Aftandilian cpu_pg_t *pgps[NCPU]; 248*023e71deSHaik Aftandilian 249*023e71deSHaik Aftandilian /* Download the latest MD */ 250*023e71deSHaik Aftandilian if ((rv = mach_descrip_update()) != 0) { 251*023e71deSHaik Aftandilian DBG("suspend: mach_descrip_update error: %d", rv); 252*023e71deSHaik Aftandilian return; 253*023e71deSHaik Aftandilian } 254*023e71deSHaik Aftandilian 255*023e71deSHaik Aftandilian if ((mdp = md_get_handle()) == NULL) { 256*023e71deSHaik Aftandilian DBG("suspend: md_get_handle failed"); 257*023e71deSHaik Aftandilian return; 258*023e71deSHaik Aftandilian } 259*023e71deSHaik Aftandilian 260*023e71deSHaik Aftandilian DBG("suspend: updating CPU mappings"); 261*023e71deSHaik Aftandilian 262*023e71deSHaik Aftandilian mutex_enter(&cpu_lock); 263*023e71deSHaik Aftandilian 264*023e71deSHaik Aftandilian setup_chip_mappings(mdp); 265*023e71deSHaik Aftandilian setup_exec_unit_mappings(mdp); 266*023e71deSHaik Aftandilian for (id = 0; id < NCPU; id++) { 267*023e71deSHaik Aftandilian if ((cp = cpu_get(id)) == NULL) 268*023e71deSHaik Aftandilian continue; 269*023e71deSHaik Aftandilian cpu_map_exec_units(cp); 270*023e71deSHaik Aftandilian } 271*023e71deSHaik Aftandilian 272*023e71deSHaik Aftandilian /* 273*023e71deSHaik Aftandilian * Re-calculate processor groups. 274*023e71deSHaik Aftandilian * 275*023e71deSHaik Aftandilian * First tear down all PG information before adding any new PG 276*023e71deSHaik Aftandilian * information derived from the MD we just downloaded. We must 277*023e71deSHaik Aftandilian * call pg_cpu_inactive and pg_cpu_active with CPUs paused and 278*023e71deSHaik Aftandilian * we want to minimize the number of times pause_cpus is called. 279*023e71deSHaik Aftandilian * Inactivating all CPUs would leave PGs without any active CPUs, 280*023e71deSHaik Aftandilian * so while CPUs are paused, call pg_cpu_inactive and swap in the 281*023e71deSHaik Aftandilian * bootstrap PG structure saving the original PG structure to be 282*023e71deSHaik Aftandilian * fini'd afterwards. This prevents the dispatcher from encountering 283*023e71deSHaik Aftandilian * PGs in which all CPUs are inactive. 284*023e71deSHaik Aftandilian */ 285*023e71deSHaik Aftandilian pause_cpus(NULL); 286*023e71deSHaik Aftandilian for (id = 0; id < NCPU; id++) { 287*023e71deSHaik Aftandilian if ((cp = cpu_get(id)) == NULL) 288*023e71deSHaik Aftandilian continue; 289*023e71deSHaik Aftandilian pg_cpu_inactive(cp); 290*023e71deSHaik Aftandilian pgps[id] = cp->cpu_pg; 291*023e71deSHaik Aftandilian pg_cpu_bootstrap(cp); 292*023e71deSHaik Aftandilian } 293*023e71deSHaik Aftandilian start_cpus(); 294*023e71deSHaik Aftandilian 295*023e71deSHaik Aftandilian /* 296*023e71deSHaik Aftandilian * pg_cpu_fini* and pg_cpu_init* must be called while CPUs are 297*023e71deSHaik Aftandilian * not paused. Use two separate loops here so that we do not 298*023e71deSHaik Aftandilian * initialize PG data for CPUs until all the old PG data structures 299*023e71deSHaik Aftandilian * are torn down. 300*023e71deSHaik Aftandilian */ 301*023e71deSHaik Aftandilian for (id = 0; id < NCPU; id++) { 302*023e71deSHaik Aftandilian if ((cp = cpu_get(id)) == NULL) 303*023e71deSHaik Aftandilian continue; 304*023e71deSHaik Aftandilian pg_cpu_fini(cp, pgps[id]); 305*023e71deSHaik Aftandilian } 306*023e71deSHaik Aftandilian 307*023e71deSHaik Aftandilian /* 308*023e71deSHaik Aftandilian * Initialize PG data for each CPU, but leave the bootstrapped 309*023e71deSHaik Aftandilian * PG structure in place to avoid running with any PGs containing 310*023e71deSHaik Aftandilian * nothing but inactive CPUs. 311*023e71deSHaik Aftandilian */ 312*023e71deSHaik Aftandilian for (id = 0; id < NCPU; id++) { 313*023e71deSHaik Aftandilian if ((cp = cpu_get(id)) == NULL) 314*023e71deSHaik Aftandilian continue; 315*023e71deSHaik Aftandilian pgps[id] = pg_cpu_init(cp, B_TRUE); 316*023e71deSHaik Aftandilian } 317*023e71deSHaik Aftandilian 318*023e71deSHaik Aftandilian /* 319*023e71deSHaik Aftandilian * Now that PG data has been initialized for all CPUs in the 320*023e71deSHaik Aftandilian * system, replace the bootstrapped PG structure with the 321*023e71deSHaik Aftandilian * initialized PG structure and call pg_cpu_active for each CPU. 322*023e71deSHaik Aftandilian */ 323*023e71deSHaik Aftandilian pause_cpus(NULL); 324*023e71deSHaik Aftandilian for (id = 0; id < NCPU; id++) { 325*023e71deSHaik Aftandilian if ((cp = cpu_get(id)) == NULL) 326*023e71deSHaik Aftandilian continue; 327*023e71deSHaik Aftandilian cp->cpu_pg = pgps[id]; 328*023e71deSHaik Aftandilian pg_cpu_active(cp); 329*023e71deSHaik Aftandilian } 330*023e71deSHaik Aftandilian start_cpus(); 331*023e71deSHaik Aftandilian 332*023e71deSHaik Aftandilian mutex_exit(&cpu_lock); 333*023e71deSHaik Aftandilian 334*023e71deSHaik Aftandilian (void) md_fini_handle(mdp); 335*023e71deSHaik Aftandilian } 336*023e71deSHaik Aftandilian 337*023e71deSHaik Aftandilian /* 338*023e71deSHaik Aftandilian * Wrapper for the Sun Cluster error decoding function. 339*023e71deSHaik Aftandilian */ 340*023e71deSHaik Aftandilian static int 341*023e71deSHaik Aftandilian cluster_error_decode(int error, char *error_reason, size_t max_reason_len) 342*023e71deSHaik Aftandilian { 343*023e71deSHaik Aftandilian const char *decoded; 344*023e71deSHaik Aftandilian size_t decoded_len; 345*023e71deSHaik Aftandilian 346*023e71deSHaik Aftandilian ASSERT(error_reason != NULL); 347*023e71deSHaik Aftandilian ASSERT(max_reason_len > 0); 348*023e71deSHaik Aftandilian 349*023e71deSHaik Aftandilian max_reason_len = MIN(max_reason_len, SC_FAIL_STR_MAX); 350*023e71deSHaik Aftandilian 351*023e71deSHaik Aftandilian if (cl_suspend_error_decode == NULL) 352*023e71deSHaik Aftandilian return (-1); 353*023e71deSHaik Aftandilian 354*023e71deSHaik Aftandilian if ((decoded = (*cl_suspend_error_decode)(error)) == NULL) 355*023e71deSHaik Aftandilian return (-1); 356*023e71deSHaik Aftandilian 357*023e71deSHaik Aftandilian /* Get number of non-NULL bytes */ 358*023e71deSHaik Aftandilian if ((decoded_len = strnlen(decoded, max_reason_len - 1)) == 0) 359*023e71deSHaik Aftandilian return (-1); 360*023e71deSHaik Aftandilian 361*023e71deSHaik Aftandilian bcopy(decoded, error_reason, decoded_len); 362*023e71deSHaik Aftandilian 363*023e71deSHaik Aftandilian /* 364*023e71deSHaik Aftandilian * The error string returned from cl_suspend_error_decode 365*023e71deSHaik Aftandilian * should be NULL-terminated, but set the terminator here 366*023e71deSHaik Aftandilian * because we only copied non-NULL bytes. If the decoded 367*023e71deSHaik Aftandilian * string was not NULL-terminated, this guarantees that 368*023e71deSHaik Aftandilian * error_reason will be. 369*023e71deSHaik Aftandilian */ 370*023e71deSHaik Aftandilian error_reason[decoded_len] = '\0'; 371*023e71deSHaik Aftandilian 372*023e71deSHaik Aftandilian return (0); 373*023e71deSHaik Aftandilian } 374*023e71deSHaik Aftandilian 375*023e71deSHaik Aftandilian /* 376*023e71deSHaik Aftandilian * Wrapper for the Sun Cluster pre-suspend callback. 377*023e71deSHaik Aftandilian */ 378*023e71deSHaik Aftandilian static int 379*023e71deSHaik Aftandilian cluster_pre_wrapper(char *error_reason, size_t max_reason_len) 380*023e71deSHaik Aftandilian { 381*023e71deSHaik Aftandilian int rv = 0; 382*023e71deSHaik Aftandilian 383*023e71deSHaik Aftandilian if (cl_suspend_pre_callback != NULL) { 384*023e71deSHaik Aftandilian rv = (*cl_suspend_pre_callback)(); 385*023e71deSHaik Aftandilian DBG("suspend: cl_suspend_pre_callback returned %d", rv); 386*023e71deSHaik Aftandilian if (rv != 0 && error_reason != NULL && max_reason_len > 0) { 387*023e71deSHaik Aftandilian if (cluster_error_decode(rv, error_reason, 388*023e71deSHaik Aftandilian max_reason_len)) { 389*023e71deSHaik Aftandilian (void) snprintf(error_reason, max_reason_len, 390*023e71deSHaik Aftandilian SC_PRE_FAIL_STR_FMT, rv); 391*023e71deSHaik Aftandilian } 392*023e71deSHaik Aftandilian } 393*023e71deSHaik Aftandilian } 394*023e71deSHaik Aftandilian 395*023e71deSHaik Aftandilian return (rv); 396*023e71deSHaik Aftandilian } 397*023e71deSHaik Aftandilian 398*023e71deSHaik Aftandilian /* 399*023e71deSHaik Aftandilian * Wrapper for the Sun Cluster post-suspend callback. 400*023e71deSHaik Aftandilian */ 401*023e71deSHaik Aftandilian static int 402*023e71deSHaik Aftandilian cluster_post_wrapper(char *error_reason, size_t max_reason_len) 403*023e71deSHaik Aftandilian { 404*023e71deSHaik Aftandilian int rv = 0; 405*023e71deSHaik Aftandilian 406*023e71deSHaik Aftandilian if (cl_suspend_post_callback != NULL) { 407*023e71deSHaik Aftandilian rv = (*cl_suspend_post_callback)(); 408*023e71deSHaik Aftandilian DBG("suspend: cl_suspend_post_callback returned %d", rv); 409*023e71deSHaik Aftandilian if (rv != 0 && error_reason != NULL && max_reason_len > 0) { 410*023e71deSHaik Aftandilian if (cluster_error_decode(rv, error_reason, 411*023e71deSHaik Aftandilian max_reason_len)) { 412*023e71deSHaik Aftandilian (void) snprintf(error_reason, 413*023e71deSHaik Aftandilian max_reason_len, SC_POST_FAIL_STR_FMT, rv); 414*023e71deSHaik Aftandilian } 415*023e71deSHaik Aftandilian } 416*023e71deSHaik Aftandilian } 417*023e71deSHaik Aftandilian 418*023e71deSHaik Aftandilian return (rv); 419*023e71deSHaik Aftandilian } 420*023e71deSHaik Aftandilian 421*023e71deSHaik Aftandilian /* 422*023e71deSHaik Aftandilian * Execute pre-suspend callbacks preparing the system for a suspend operation. 423*023e71deSHaik Aftandilian * Returns zero on success, non-zero on failure. Sets the recovered argument 424*023e71deSHaik Aftandilian * to indicate whether or not callbacks could be undone in the event of a 425*023e71deSHaik Aftandilian * failure--if callbacks were successfully undone, *recovered is set to B_TRUE, 426*023e71deSHaik Aftandilian * otherwise *recovered is set to B_FALSE. Must be called successfully before 427*023e71deSHaik Aftandilian * suspend_start can be called. Callers should first call suspend_support to 428*023e71deSHaik Aftandilian * determine if OS suspend is supported. 429*023e71deSHaik Aftandilian */ 430*023e71deSHaik Aftandilian int 431*023e71deSHaik Aftandilian suspend_pre(char *error_reason, size_t max_reason_len, boolean_t *recovered) 432*023e71deSHaik Aftandilian { 433*023e71deSHaik Aftandilian int rv; 434*023e71deSHaik Aftandilian 435*023e71deSHaik Aftandilian ASSERT(recovered != NULL); 436*023e71deSHaik Aftandilian 437*023e71deSHaik Aftandilian /* 438*023e71deSHaik Aftandilian * Return an error if suspend_pre is erreoneously called 439*023e71deSHaik Aftandilian * when OS suspend is not supported. 440*023e71deSHaik Aftandilian */ 441*023e71deSHaik Aftandilian ASSERT(suspend_supported()); 442*023e71deSHaik Aftandilian if (!suspend_supported()) { 443*023e71deSHaik Aftandilian DBG("suspend: suspend_pre called without suspend support"); 444*023e71deSHaik Aftandilian *recovered = B_TRUE; 445*023e71deSHaik Aftandilian return (ENOTSUP); 446*023e71deSHaik Aftandilian } 447*023e71deSHaik Aftandilian DBG("suspend: %s", __func__); 448*023e71deSHaik Aftandilian 449*023e71deSHaik Aftandilian rv = cluster_pre_wrapper(error_reason, max_reason_len); 450*023e71deSHaik Aftandilian 451*023e71deSHaik Aftandilian /* 452*023e71deSHaik Aftandilian * At present, only one pre-suspend operation exists. 453*023e71deSHaik Aftandilian * If it fails, no recovery needs to be done. 454*023e71deSHaik Aftandilian */ 455*023e71deSHaik Aftandilian if (rv != 0 && recovered != NULL) 456*023e71deSHaik Aftandilian *recovered = B_TRUE; 457*023e71deSHaik Aftandilian 458*023e71deSHaik Aftandilian return (rv); 459*023e71deSHaik Aftandilian } 460*023e71deSHaik Aftandilian 461*023e71deSHaik Aftandilian /* 462*023e71deSHaik Aftandilian * Execute post-suspend callbacks. Returns zero on success, non-zero on 463*023e71deSHaik Aftandilian * failure. Must be called after suspend_start is called, regardless of 464*023e71deSHaik Aftandilian * whether or not suspend_start is successful. 465*023e71deSHaik Aftandilian */ 466*023e71deSHaik Aftandilian int 467*023e71deSHaik Aftandilian suspend_post(char *error_reason, size_t max_reason_len) 468*023e71deSHaik Aftandilian { 469*023e71deSHaik Aftandilian ASSERT(suspend_supported()); 470*023e71deSHaik Aftandilian DBG("suspend: %s", __func__); 471*023e71deSHaik Aftandilian return (cluster_post_wrapper(error_reason, max_reason_len)); 472*023e71deSHaik Aftandilian } 473*023e71deSHaik Aftandilian 474*023e71deSHaik Aftandilian /* 475*023e71deSHaik Aftandilian * Suspends the OS by pausing CPUs and calling into the HV to initiate 476*023e71deSHaik Aftandilian * the suspend. When the HV routine hv_guest_suspend returns, the system 477*023e71deSHaik Aftandilian * will be resumed. Must be called after a successful call to suspend_pre. 478*023e71deSHaik Aftandilian * suspend_post must be called after suspend_start, whether or not 479*023e71deSHaik Aftandilian * suspend_start returns an error. 480*023e71deSHaik Aftandilian */ 481*023e71deSHaik Aftandilian /*ARGSUSED*/ 482*023e71deSHaik Aftandilian int 483*023e71deSHaik Aftandilian suspend_start(char *error_reason, size_t max_reason_len) 484*023e71deSHaik Aftandilian { 485*023e71deSHaik Aftandilian uint64_t source_tick; 486*023e71deSHaik Aftandilian uint64_t source_stick; 487*023e71deSHaik Aftandilian uint64_t rv; 488*023e71deSHaik Aftandilian timestruc_t source_tod; 489*023e71deSHaik Aftandilian int spl; 490*023e71deSHaik Aftandilian 491*023e71deSHaik Aftandilian ASSERT(suspend_supported()); 492*023e71deSHaik Aftandilian DBG("suspend: %s", __func__); 493*023e71deSHaik Aftandilian 494*023e71deSHaik Aftandilian mutex_enter(&cpu_lock); 495*023e71deSHaik Aftandilian 496*023e71deSHaik Aftandilian /* Suspend the watchdog */ 497*023e71deSHaik Aftandilian watchdog_suspend(); 498*023e71deSHaik Aftandilian 499*023e71deSHaik Aftandilian /* Record the TOD */ 500*023e71deSHaik Aftandilian mutex_enter(&tod_lock); 501*023e71deSHaik Aftandilian source_tod = tod_get(); 502*023e71deSHaik Aftandilian mutex_exit(&tod_lock); 503*023e71deSHaik Aftandilian 504*023e71deSHaik Aftandilian /* Pause all other CPUs */ 505*023e71deSHaik Aftandilian pause_cpus(NULL); 506*023e71deSHaik Aftandilian DBG_PROM("suspend: CPUs paused\n"); 507*023e71deSHaik Aftandilian 508*023e71deSHaik Aftandilian /* Suspend cyclics and disable interrupts */ 509*023e71deSHaik Aftandilian cyclic_suspend(); 510*023e71deSHaik Aftandilian DBG_PROM("suspend: cyclics suspended\n"); 511*023e71deSHaik Aftandilian spl = spl8(); 512*023e71deSHaik Aftandilian 513*023e71deSHaik Aftandilian source_tick = gettick_counter(); 514*023e71deSHaik Aftandilian source_stick = gettick(); 515*023e71deSHaik Aftandilian DBG_PROM("suspend: source_tick: 0x%lx\n", source_tick); 516*023e71deSHaik Aftandilian DBG_PROM("suspend: source_stick: 0x%lx\n", source_stick); 517*023e71deSHaik Aftandilian 518*023e71deSHaik Aftandilian /* 519*023e71deSHaik Aftandilian * Call into the HV to initiate the suspend. 520*023e71deSHaik Aftandilian * hv_guest_suspend() returns after the guest has been 521*023e71deSHaik Aftandilian * resumed or if the suspend operation failed or was 522*023e71deSHaik Aftandilian * cancelled. After a successful suspend, the %tick and 523*023e71deSHaik Aftandilian * %stick registers may have changed by an amount that is 524*023e71deSHaik Aftandilian * not proportional to the amount of time that has passed. 525*023e71deSHaik Aftandilian * They may have jumped forwards or backwards. This jump 526*023e71deSHaik Aftandilian * must be uniform across all CPUs and we operate under 527*023e71deSHaik Aftandilian * the assumption that it is (maintaining two global offset 528*023e71deSHaik Aftandilian * variables--one for %tick and one for %stick.) 529*023e71deSHaik Aftandilian */ 530*023e71deSHaik Aftandilian DBG_PROM("suspend: suspending... \n"); 531*023e71deSHaik Aftandilian rv = hv_guest_suspend(); 532*023e71deSHaik Aftandilian if (rv != 0) { 533*023e71deSHaik Aftandilian splx(spl); 534*023e71deSHaik Aftandilian cyclic_resume(); 535*023e71deSHaik Aftandilian start_cpus(); 536*023e71deSHaik Aftandilian watchdog_resume(); 537*023e71deSHaik Aftandilian mutex_exit(&cpu_lock); 538*023e71deSHaik Aftandilian DBG("suspend: failed, rv: %ld\n", rv); 539*023e71deSHaik Aftandilian return (rv); 540*023e71deSHaik Aftandilian } 541*023e71deSHaik Aftandilian 542*023e71deSHaik Aftandilian /* Update the global tick and stick offsets */ 543*023e71deSHaik Aftandilian set_tick_offsets(source_tick, source_stick); 544*023e71deSHaik Aftandilian 545*023e71deSHaik Aftandilian /* Ensure new offsets are globally visible before resuming CPUs */ 546*023e71deSHaik Aftandilian membar_sync(); 547*023e71deSHaik Aftandilian 548*023e71deSHaik Aftandilian /* Enable interrupts */ 549*023e71deSHaik Aftandilian splx(spl); 550*023e71deSHaik Aftandilian 551*023e71deSHaik Aftandilian /* Set the {%tick,%stick}.NPT bits on all CPUs */ 552*023e71deSHaik Aftandilian if (enable_user_tick_stick_emulation) { 553*023e71deSHaik Aftandilian xc_all((xcfunc_t *)enable_tick_stick_npt, NULL, NULL); 554*023e71deSHaik Aftandilian xt_sync(cpu_ready_set); 555*023e71deSHaik Aftandilian ASSERT(gettick_npt() != 0); 556*023e71deSHaik Aftandilian ASSERT(getstick_npt() != 0); 557*023e71deSHaik Aftandilian } 558*023e71deSHaik Aftandilian 559*023e71deSHaik Aftandilian /* If emulation is enabled, but not currently active, enable it */ 560*023e71deSHaik Aftandilian if (enable_user_tick_stick_emulation && !tick_stick_emulation_active) { 561*023e71deSHaik Aftandilian tick_stick_emulation_active = B_TRUE; 562*023e71deSHaik Aftandilian } 563*023e71deSHaik Aftandilian 564*023e71deSHaik Aftandilian /* Resume cyclics, unpause CPUs */ 565*023e71deSHaik Aftandilian cyclic_resume(); 566*023e71deSHaik Aftandilian start_cpus(); 567*023e71deSHaik Aftandilian 568*023e71deSHaik Aftandilian /* Set the TOD */ 569*023e71deSHaik Aftandilian mutex_enter(&tod_lock); 570*023e71deSHaik Aftandilian tod_set(source_tod); 571*023e71deSHaik Aftandilian mutex_exit(&tod_lock); 572*023e71deSHaik Aftandilian 573*023e71deSHaik Aftandilian /* Re-enable the watchdog */ 574*023e71deSHaik Aftandilian watchdog_resume(); 575*023e71deSHaik Aftandilian 576*023e71deSHaik Aftandilian mutex_exit(&cpu_lock); 577*023e71deSHaik Aftandilian 578*023e71deSHaik Aftandilian /* Get new MD, update CPU mappings/relationships */ 579*023e71deSHaik Aftandilian if (suspend_update_cpu_mappings) 580*023e71deSHaik Aftandilian update_cpu_mappings(); 581*023e71deSHaik Aftandilian 582*023e71deSHaik Aftandilian DBG("suspend: target tick: 0x%lx", gettick_counter()); 583*023e71deSHaik Aftandilian DBG("suspend: target stick: 0x%llx", gettick()); 584*023e71deSHaik Aftandilian DBG("suspend: user %%tick/%%stick emulation is %d", 585*023e71deSHaik Aftandilian tick_stick_emulation_active); 586*023e71deSHaik Aftandilian DBG("suspend: finished"); 587*023e71deSHaik Aftandilian 588*023e71deSHaik Aftandilian return (0); 589*023e71deSHaik Aftandilian } 590