1023e71deSHaik Aftandilian /* 2023e71deSHaik Aftandilian * CDDL HEADER START 3023e71deSHaik Aftandilian * 4023e71deSHaik Aftandilian * The contents of this file are subject to the terms of the 5023e71deSHaik Aftandilian * Common Development and Distribution License (the "License"). 6023e71deSHaik Aftandilian * You may not use this file except in compliance with the License. 7023e71deSHaik Aftandilian * 8023e71deSHaik Aftandilian * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9023e71deSHaik Aftandilian * or http://www.opensolaris.org/os/licensing. 10023e71deSHaik Aftandilian * See the License for the specific language governing permissions 11023e71deSHaik Aftandilian * and limitations under the License. 12023e71deSHaik Aftandilian * 13023e71deSHaik Aftandilian * When distributing Covered Code, include this CDDL HEADER in each 14023e71deSHaik Aftandilian * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15023e71deSHaik Aftandilian * If applicable, add the following below this CDDL HEADER, with the 16023e71deSHaik Aftandilian * fields enclosed by brackets "[]" replaced with your own identifying 17023e71deSHaik Aftandilian * information: Portions Copyright [yyyy] [name of copyright owner] 18023e71deSHaik Aftandilian * 19023e71deSHaik Aftandilian * CDDL HEADER END 20023e71deSHaik Aftandilian */ 21023e71deSHaik Aftandilian /* 22*d2365b01SPavel Tatashin * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23023e71deSHaik Aftandilian * Use is subject to license terms. 24023e71deSHaik Aftandilian */ 25023e71deSHaik Aftandilian 26023e71deSHaik Aftandilian #include <sys/mutex.h> 27023e71deSHaik Aftandilian #include <sys/cpuvar.h> 28023e71deSHaik Aftandilian #include <sys/cyclic.h> 29023e71deSHaik Aftandilian #include <sys/disp.h> 30023e71deSHaik Aftandilian #include <sys/ddi.h> 31023e71deSHaik Aftandilian #include <sys/wdt.h> 32023e71deSHaik Aftandilian #include <sys/callb.h> 33023e71deSHaik Aftandilian #include <sys/cmn_err.h> 34023e71deSHaik Aftandilian #include <sys/hypervisor_api.h> 35023e71deSHaik Aftandilian #include <sys/membar.h> 36023e71deSHaik Aftandilian #include <sys/x_call.h> 37023e71deSHaik Aftandilian #include <sys/promif.h> 38023e71deSHaik Aftandilian #include <sys/systm.h> 39023e71deSHaik Aftandilian #include <sys/mach_descrip.h> 40023e71deSHaik Aftandilian #include <sys/cpu_module.h> 41023e71deSHaik Aftandilian #include <sys/pg.h> 42023e71deSHaik Aftandilian #include <sys/lgrp.h> 43023e71deSHaik Aftandilian #include <sys/sysmacros.h> 44023e71deSHaik Aftandilian #include <sys/sunddi.h> 45023e71deSHaik Aftandilian #include <sys/cpupart.h> 46023e71deSHaik Aftandilian #include <sys/hsvc.h> 47*d2365b01SPavel Tatashin #include <vm/hat_sfmmu.h> 48023e71deSHaik Aftandilian 49023e71deSHaik Aftandilian /* 50023e71deSHaik Aftandilian * Sun4v OS Suspend 51023e71deSHaik Aftandilian * 52023e71deSHaik Aftandilian * Provides a means to suspend a sun4v guest domain by pausing CPUs and then 53023e71deSHaik Aftandilian * calling into the HV to initiate a suspension. Suspension is sequenced 54023e71deSHaik Aftandilian * externally by calling suspend_pre, suspend_start, and suspend_post. 55023e71deSHaik Aftandilian * suspend_pre and suspend_post are meant to perform any special operations 56023e71deSHaik Aftandilian * that should be done before or after a suspend/resume operation. e.g., 57023e71deSHaik Aftandilian * callbacks to cluster software to disable heartbeat monitoring before the 58023e71deSHaik Aftandilian * system is suspended. suspend_start prepares kernel services to be suspended 59023e71deSHaik Aftandilian * and then suspends the domain by calling hv_guest_suspend. 60023e71deSHaik Aftandilian * 61023e71deSHaik Aftandilian * Special Handling for %tick and %stick Registers 62023e71deSHaik Aftandilian * 63023e71deSHaik Aftandilian * After a suspend/resume operation, the %tick and %stick registers may have 64023e71deSHaik Aftandilian * jumped forwards or backwards. The delta is assumed to be consistent across 65023e71deSHaik Aftandilian * all CPUs, within the negligible level of %tick and %stick variation 66023e71deSHaik Aftandilian * acceptable on a cold boot. In order to maintain increasing %tick and %stick 67023e71deSHaik Aftandilian * counter values without exposing large positive or negative jumps to kernel 68023e71deSHaik Aftandilian * or user code, a %tick and %stick offset is used. Kernel reads of these 69023e71deSHaik Aftandilian * counters return the sum of the hardware register counter and offset 70023e71deSHaik Aftandilian * variable. After a suspend/resume operation, user reads of %tick or %stick 71023e71deSHaik Aftandilian * are emulated. Suspend code enables emulation by setting the 72023e71deSHaik Aftandilian * %{tick,stick}.NPT fields which trigger a privileged instruction access 73023e71deSHaik Aftandilian * trap whenever the registers are read from user mode. If emulation has been 74023e71deSHaik Aftandilian * enabled, the trap handler emulates the instruction. Emulation is only 75023e71deSHaik Aftandilian * enabled during a successful suspend/resume operation. When emulation is 76023e71deSHaik Aftandilian * enabled, CPUs that are DR'd into the system will have their 77023e71deSHaik Aftandilian * %{tick,stick}.NPT bits set to 1 as well. 78023e71deSHaik Aftandilian */ 79023e71deSHaik Aftandilian 80023e71deSHaik Aftandilian extern u_longlong_t gettick(void); /* returns %stick */ 81023e71deSHaik Aftandilian extern uint64_t gettick_counter(void); /* returns %tick */ 82023e71deSHaik Aftandilian extern uint64_t gettick_npt(void); 83023e71deSHaik Aftandilian extern uint64_t getstick_npt(void); 84023e71deSHaik Aftandilian extern int mach_descrip_update(void); 85023e71deSHaik Aftandilian extern cpuset_t cpu_ready_set; 86023e71deSHaik Aftandilian extern uint64_t native_tick_offset; 87023e71deSHaik Aftandilian extern uint64_t native_stick_offset; 88023e71deSHaik Aftandilian 89023e71deSHaik Aftandilian /* 90023e71deSHaik Aftandilian * Global Sun Cluster pre/post callbacks. 91023e71deSHaik Aftandilian */ 92023e71deSHaik Aftandilian const char *(*cl_suspend_error_decode)(int); 93023e71deSHaik Aftandilian int (*cl_suspend_pre_callback)(void); 94023e71deSHaik Aftandilian int (*cl_suspend_post_callback)(void); 95023e71deSHaik Aftandilian #define SC_PRE_FAIL_STR_FMT "Sun Cluster pre-suspend failure: %d" 96023e71deSHaik Aftandilian #define SC_POST_FAIL_STR_FMT "Sun Cluster post-suspend failure: %d" 97023e71deSHaik Aftandilian #define SC_FAIL_STR_MAX 256 98023e71deSHaik Aftandilian 99023e71deSHaik Aftandilian /* 100023e71deSHaik Aftandilian * The minimum major and minor version of the HSVC_GROUP_CORE API group 101023e71deSHaik Aftandilian * required in order to use OS suspend. 102023e71deSHaik Aftandilian */ 103023e71deSHaik Aftandilian #define SUSPEND_CORE_MAJOR 1 104023e71deSHaik Aftandilian #define SUSPEND_CORE_MINOR 2 105023e71deSHaik Aftandilian 106023e71deSHaik Aftandilian /* 107023e71deSHaik Aftandilian * By default, sun4v OS suspend is supported if the required HV version 108023e71deSHaik Aftandilian * is present. suspend_disabled should be set on platforms that do not 109023e71deSHaik Aftandilian * allow OS suspend regardless of whether or not the HV supports it. 110023e71deSHaik Aftandilian * It can also be set in /etc/system. 111023e71deSHaik Aftandilian */ 112023e71deSHaik Aftandilian static int suspend_disabled = 0; 113023e71deSHaik Aftandilian 114023e71deSHaik Aftandilian /* 115023e71deSHaik Aftandilian * Controls whether or not user-land tick and stick register emulation 116023e71deSHaik Aftandilian * will be enabled following a successful suspend operation. 117023e71deSHaik Aftandilian */ 118023e71deSHaik Aftandilian static int enable_user_tick_stick_emulation = 1; 119023e71deSHaik Aftandilian 120023e71deSHaik Aftandilian /* 121023e71deSHaik Aftandilian * Indicates whether or not tick and stick emulation is currently active. 122023e71deSHaik Aftandilian * After a successful suspend operation, if emulation is enabled, this 123023e71deSHaik Aftandilian * variable is set to B_TRUE. Global scope to allow emulation code to 124023e71deSHaik Aftandilian * check if emulation is active. 125023e71deSHaik Aftandilian */ 126023e71deSHaik Aftandilian boolean_t tick_stick_emulation_active = B_FALSE; 127023e71deSHaik Aftandilian 128023e71deSHaik Aftandilian /* 129*d2365b01SPavel Tatashin * When non-zero, after a successful suspend and resume, cpunodes, CPU HW 130*d2365b01SPavel Tatashin * sharing data structures, and processor groups will be updated using 131*d2365b01SPavel Tatashin * information from the updated MD. 132023e71deSHaik Aftandilian */ 133023e71deSHaik Aftandilian static int suspend_update_cpu_mappings = 1; 134023e71deSHaik Aftandilian 135023e71deSHaik Aftandilian /* 136023e71deSHaik Aftandilian * DBG and DBG_PROM() macro. 137023e71deSHaik Aftandilian */ 138023e71deSHaik Aftandilian #ifdef DEBUG 139023e71deSHaik Aftandilian 140023e71deSHaik Aftandilian static int suspend_debug_flag = 0; 141023e71deSHaik Aftandilian 142023e71deSHaik Aftandilian #define DBG_PROM \ 143023e71deSHaik Aftandilian if (suspend_debug_flag) \ 144023e71deSHaik Aftandilian prom_printf 145023e71deSHaik Aftandilian 146023e71deSHaik Aftandilian #define DBG \ 147023e71deSHaik Aftandilian if (suspend_debug_flag) \ 148023e71deSHaik Aftandilian suspend_debug 149023e71deSHaik Aftandilian 150023e71deSHaik Aftandilian static void 151023e71deSHaik Aftandilian suspend_debug(const char *fmt, ...) 152023e71deSHaik Aftandilian { 153023e71deSHaik Aftandilian char buf[512]; 154023e71deSHaik Aftandilian va_list ap; 155023e71deSHaik Aftandilian 156023e71deSHaik Aftandilian va_start(ap, fmt); 157023e71deSHaik Aftandilian (void) vsprintf(buf, fmt, ap); 158023e71deSHaik Aftandilian va_end(ap); 159023e71deSHaik Aftandilian 160023e71deSHaik Aftandilian cmn_err(CE_NOTE, "%s", buf); 161023e71deSHaik Aftandilian } 162023e71deSHaik Aftandilian 163023e71deSHaik Aftandilian #else /* DEBUG */ 164023e71deSHaik Aftandilian 165023e71deSHaik Aftandilian #define DBG_PROM 166023e71deSHaik Aftandilian #define DBG 167023e71deSHaik Aftandilian 168023e71deSHaik Aftandilian #endif /* DEBUG */ 169023e71deSHaik Aftandilian 170023e71deSHaik Aftandilian /* 171023e71deSHaik Aftandilian * Return true if the HV supports OS suspend and if suspend has not been 172023e71deSHaik Aftandilian * disabled on this platform. 173023e71deSHaik Aftandilian */ 174023e71deSHaik Aftandilian boolean_t 175023e71deSHaik Aftandilian suspend_supported(void) 176023e71deSHaik Aftandilian { 177023e71deSHaik Aftandilian uint64_t major, minor; 178023e71deSHaik Aftandilian 179023e71deSHaik Aftandilian if (suspend_disabled) 180023e71deSHaik Aftandilian return (B_FALSE); 181023e71deSHaik Aftandilian 182023e71deSHaik Aftandilian if (hsvc_version(HSVC_GROUP_CORE, &major, &minor) != 0) 183023e71deSHaik Aftandilian return (B_FALSE); 184023e71deSHaik Aftandilian 185023e71deSHaik Aftandilian return ((major == SUSPEND_CORE_MAJOR && minor >= SUSPEND_CORE_MINOR) || 186023e71deSHaik Aftandilian (major > SUSPEND_CORE_MAJOR)); 187023e71deSHaik Aftandilian } 188023e71deSHaik Aftandilian 189023e71deSHaik Aftandilian /* 190023e71deSHaik Aftandilian * Given a source tick and stick value, set the tick and stick offsets such 191023e71deSHaik Aftandilian * that the (current physical register value + offset == source value). 192023e71deSHaik Aftandilian */ 193023e71deSHaik Aftandilian static void 194023e71deSHaik Aftandilian set_tick_offsets(uint64_t source_tick, uint64_t source_stick) 195023e71deSHaik Aftandilian { 196023e71deSHaik Aftandilian uint64_t target_tick; 197023e71deSHaik Aftandilian uint64_t target_stick; 198023e71deSHaik Aftandilian 199023e71deSHaik Aftandilian native_tick_offset = 0; 200023e71deSHaik Aftandilian native_stick_offset = 0; 201023e71deSHaik Aftandilian 202023e71deSHaik Aftandilian target_tick = gettick_counter(); /* returns %tick */ 203023e71deSHaik Aftandilian target_stick = gettick(); /* returns %stick */ 204023e71deSHaik Aftandilian 205023e71deSHaik Aftandilian native_tick_offset = source_tick - target_tick; 206023e71deSHaik Aftandilian native_stick_offset = source_stick - target_stick; 207023e71deSHaik Aftandilian } 208023e71deSHaik Aftandilian 209023e71deSHaik Aftandilian /* 210023e71deSHaik Aftandilian * Set the {tick,stick}.NPT field to 1 on this CPU. 211023e71deSHaik Aftandilian */ 212023e71deSHaik Aftandilian static void 213023e71deSHaik Aftandilian enable_tick_stick_npt(void) 214023e71deSHaik Aftandilian { 215c1374a13SSurya Prakki (void) hv_stick_set_npt(1); 216c1374a13SSurya Prakki (void) hv_tick_set_npt(1); 217023e71deSHaik Aftandilian } 218023e71deSHaik Aftandilian 219023e71deSHaik Aftandilian /* 220023e71deSHaik Aftandilian * Synchronize a CPU's {tick,stick}.NPT fields with the current state 221023e71deSHaik Aftandilian * of the system. This is used when a CPU is DR'd into the system. 222023e71deSHaik Aftandilian */ 223023e71deSHaik Aftandilian void 224023e71deSHaik Aftandilian suspend_sync_tick_stick_npt(void) 225023e71deSHaik Aftandilian { 226023e71deSHaik Aftandilian if (tick_stick_emulation_active) { 227023e71deSHaik Aftandilian DBG("enabling {%%tick/%%stick}.NPT on CPU 0x%x", CPU->cpu_id); 228c1374a13SSurya Prakki (void) hv_stick_set_npt(1); 229c1374a13SSurya Prakki (void) hv_tick_set_npt(1); 230023e71deSHaik Aftandilian } else { 231023e71deSHaik Aftandilian ASSERT(gettick_npt() == 0); 232023e71deSHaik Aftandilian ASSERT(getstick_npt() == 0); 233023e71deSHaik Aftandilian } 234023e71deSHaik Aftandilian } 235023e71deSHaik Aftandilian 236023e71deSHaik Aftandilian /* 237023e71deSHaik Aftandilian * Obtain an updated MD from the hypervisor and update cpunodes, CPU HW 238023e71deSHaik Aftandilian * sharing data structures, and processor groups. 239023e71deSHaik Aftandilian */ 240023e71deSHaik Aftandilian static void 241023e71deSHaik Aftandilian update_cpu_mappings(void) 242023e71deSHaik Aftandilian { 243023e71deSHaik Aftandilian md_t *mdp; 244023e71deSHaik Aftandilian processorid_t id; 245023e71deSHaik Aftandilian cpu_t *cp; 246023e71deSHaik Aftandilian cpu_pg_t *pgps[NCPU]; 247023e71deSHaik Aftandilian 248023e71deSHaik Aftandilian if ((mdp = md_get_handle()) == NULL) { 249023e71deSHaik Aftandilian DBG("suspend: md_get_handle failed"); 250023e71deSHaik Aftandilian return; 251023e71deSHaik Aftandilian } 252023e71deSHaik Aftandilian 253023e71deSHaik Aftandilian DBG("suspend: updating CPU mappings"); 254023e71deSHaik Aftandilian 255023e71deSHaik Aftandilian mutex_enter(&cpu_lock); 256023e71deSHaik Aftandilian 257023e71deSHaik Aftandilian setup_chip_mappings(mdp); 258023e71deSHaik Aftandilian setup_exec_unit_mappings(mdp); 259023e71deSHaik Aftandilian for (id = 0; id < NCPU; id++) { 260023e71deSHaik Aftandilian if ((cp = cpu_get(id)) == NULL) 261023e71deSHaik Aftandilian continue; 262023e71deSHaik Aftandilian cpu_map_exec_units(cp); 263023e71deSHaik Aftandilian } 264023e71deSHaik Aftandilian 265023e71deSHaik Aftandilian /* 266023e71deSHaik Aftandilian * Re-calculate processor groups. 267023e71deSHaik Aftandilian * 268023e71deSHaik Aftandilian * First tear down all PG information before adding any new PG 269023e71deSHaik Aftandilian * information derived from the MD we just downloaded. We must 270023e71deSHaik Aftandilian * call pg_cpu_inactive and pg_cpu_active with CPUs paused and 271023e71deSHaik Aftandilian * we want to minimize the number of times pause_cpus is called. 272023e71deSHaik Aftandilian * Inactivating all CPUs would leave PGs without any active CPUs, 273023e71deSHaik Aftandilian * so while CPUs are paused, call pg_cpu_inactive and swap in the 274023e71deSHaik Aftandilian * bootstrap PG structure saving the original PG structure to be 275023e71deSHaik Aftandilian * fini'd afterwards. This prevents the dispatcher from encountering 276023e71deSHaik Aftandilian * PGs in which all CPUs are inactive. 277023e71deSHaik Aftandilian */ 278023e71deSHaik Aftandilian pause_cpus(NULL); 279023e71deSHaik Aftandilian for (id = 0; id < NCPU; id++) { 280023e71deSHaik Aftandilian if ((cp = cpu_get(id)) == NULL) 281023e71deSHaik Aftandilian continue; 282023e71deSHaik Aftandilian pg_cpu_inactive(cp); 283023e71deSHaik Aftandilian pgps[id] = cp->cpu_pg; 284023e71deSHaik Aftandilian pg_cpu_bootstrap(cp); 285023e71deSHaik Aftandilian } 286023e71deSHaik Aftandilian start_cpus(); 287023e71deSHaik Aftandilian 288023e71deSHaik Aftandilian /* 289023e71deSHaik Aftandilian * pg_cpu_fini* and pg_cpu_init* must be called while CPUs are 290023e71deSHaik Aftandilian * not paused. Use two separate loops here so that we do not 291023e71deSHaik Aftandilian * initialize PG data for CPUs until all the old PG data structures 292023e71deSHaik Aftandilian * are torn down. 293023e71deSHaik Aftandilian */ 294023e71deSHaik Aftandilian for (id = 0; id < NCPU; id++) { 295023e71deSHaik Aftandilian if ((cp = cpu_get(id)) == NULL) 296023e71deSHaik Aftandilian continue; 297023e71deSHaik Aftandilian pg_cpu_fini(cp, pgps[id]); 298023e71deSHaik Aftandilian } 299023e71deSHaik Aftandilian 300023e71deSHaik Aftandilian /* 301023e71deSHaik Aftandilian * Initialize PG data for each CPU, but leave the bootstrapped 302023e71deSHaik Aftandilian * PG structure in place to avoid running with any PGs containing 303023e71deSHaik Aftandilian * nothing but inactive CPUs. 304023e71deSHaik Aftandilian */ 305023e71deSHaik Aftandilian for (id = 0; id < NCPU; id++) { 306023e71deSHaik Aftandilian if ((cp = cpu_get(id)) == NULL) 307023e71deSHaik Aftandilian continue; 308023e71deSHaik Aftandilian pgps[id] = pg_cpu_init(cp, B_TRUE); 309023e71deSHaik Aftandilian } 310023e71deSHaik Aftandilian 311023e71deSHaik Aftandilian /* 312023e71deSHaik Aftandilian * Now that PG data has been initialized for all CPUs in the 313023e71deSHaik Aftandilian * system, replace the bootstrapped PG structure with the 314023e71deSHaik Aftandilian * initialized PG structure and call pg_cpu_active for each CPU. 315023e71deSHaik Aftandilian */ 316023e71deSHaik Aftandilian pause_cpus(NULL); 317023e71deSHaik Aftandilian for (id = 0; id < NCPU; id++) { 318023e71deSHaik Aftandilian if ((cp = cpu_get(id)) == NULL) 319023e71deSHaik Aftandilian continue; 320023e71deSHaik Aftandilian cp->cpu_pg = pgps[id]; 321023e71deSHaik Aftandilian pg_cpu_active(cp); 322023e71deSHaik Aftandilian } 323023e71deSHaik Aftandilian start_cpus(); 324023e71deSHaik Aftandilian 325023e71deSHaik Aftandilian mutex_exit(&cpu_lock); 326023e71deSHaik Aftandilian 327023e71deSHaik Aftandilian (void) md_fini_handle(mdp); 328023e71deSHaik Aftandilian } 329023e71deSHaik Aftandilian 330023e71deSHaik Aftandilian /* 331023e71deSHaik Aftandilian * Wrapper for the Sun Cluster error decoding function. 332023e71deSHaik Aftandilian */ 333023e71deSHaik Aftandilian static int 334023e71deSHaik Aftandilian cluster_error_decode(int error, char *error_reason, size_t max_reason_len) 335023e71deSHaik Aftandilian { 336023e71deSHaik Aftandilian const char *decoded; 337023e71deSHaik Aftandilian size_t decoded_len; 338023e71deSHaik Aftandilian 339023e71deSHaik Aftandilian ASSERT(error_reason != NULL); 340023e71deSHaik Aftandilian ASSERT(max_reason_len > 0); 341023e71deSHaik Aftandilian 342023e71deSHaik Aftandilian max_reason_len = MIN(max_reason_len, SC_FAIL_STR_MAX); 343023e71deSHaik Aftandilian 344023e71deSHaik Aftandilian if (cl_suspend_error_decode == NULL) 345023e71deSHaik Aftandilian return (-1); 346023e71deSHaik Aftandilian 347023e71deSHaik Aftandilian if ((decoded = (*cl_suspend_error_decode)(error)) == NULL) 348023e71deSHaik Aftandilian return (-1); 349023e71deSHaik Aftandilian 350023e71deSHaik Aftandilian /* Get number of non-NULL bytes */ 351023e71deSHaik Aftandilian if ((decoded_len = strnlen(decoded, max_reason_len - 1)) == 0) 352023e71deSHaik Aftandilian return (-1); 353023e71deSHaik Aftandilian 354023e71deSHaik Aftandilian bcopy(decoded, error_reason, decoded_len); 355023e71deSHaik Aftandilian 356023e71deSHaik Aftandilian /* 357023e71deSHaik Aftandilian * The error string returned from cl_suspend_error_decode 358023e71deSHaik Aftandilian * should be NULL-terminated, but set the terminator here 359023e71deSHaik Aftandilian * because we only copied non-NULL bytes. If the decoded 360023e71deSHaik Aftandilian * string was not NULL-terminated, this guarantees that 361023e71deSHaik Aftandilian * error_reason will be. 362023e71deSHaik Aftandilian */ 363023e71deSHaik Aftandilian error_reason[decoded_len] = '\0'; 364023e71deSHaik Aftandilian 365023e71deSHaik Aftandilian return (0); 366023e71deSHaik Aftandilian } 367023e71deSHaik Aftandilian 368023e71deSHaik Aftandilian /* 369023e71deSHaik Aftandilian * Wrapper for the Sun Cluster pre-suspend callback. 370023e71deSHaik Aftandilian */ 371023e71deSHaik Aftandilian static int 372023e71deSHaik Aftandilian cluster_pre_wrapper(char *error_reason, size_t max_reason_len) 373023e71deSHaik Aftandilian { 374023e71deSHaik Aftandilian int rv = 0; 375023e71deSHaik Aftandilian 376023e71deSHaik Aftandilian if (cl_suspend_pre_callback != NULL) { 377023e71deSHaik Aftandilian rv = (*cl_suspend_pre_callback)(); 378023e71deSHaik Aftandilian DBG("suspend: cl_suspend_pre_callback returned %d", rv); 379023e71deSHaik Aftandilian if (rv != 0 && error_reason != NULL && max_reason_len > 0) { 380023e71deSHaik Aftandilian if (cluster_error_decode(rv, error_reason, 381023e71deSHaik Aftandilian max_reason_len)) { 382023e71deSHaik Aftandilian (void) snprintf(error_reason, max_reason_len, 383023e71deSHaik Aftandilian SC_PRE_FAIL_STR_FMT, rv); 384023e71deSHaik Aftandilian } 385023e71deSHaik Aftandilian } 386023e71deSHaik Aftandilian } 387023e71deSHaik Aftandilian 388023e71deSHaik Aftandilian return (rv); 389023e71deSHaik Aftandilian } 390023e71deSHaik Aftandilian 391023e71deSHaik Aftandilian /* 392023e71deSHaik Aftandilian * Wrapper for the Sun Cluster post-suspend callback. 393023e71deSHaik Aftandilian */ 394023e71deSHaik Aftandilian static int 395023e71deSHaik Aftandilian cluster_post_wrapper(char *error_reason, size_t max_reason_len) 396023e71deSHaik Aftandilian { 397023e71deSHaik Aftandilian int rv = 0; 398023e71deSHaik Aftandilian 399023e71deSHaik Aftandilian if (cl_suspend_post_callback != NULL) { 400023e71deSHaik Aftandilian rv = (*cl_suspend_post_callback)(); 401023e71deSHaik Aftandilian DBG("suspend: cl_suspend_post_callback returned %d", rv); 402023e71deSHaik Aftandilian if (rv != 0 && error_reason != NULL && max_reason_len > 0) { 403023e71deSHaik Aftandilian if (cluster_error_decode(rv, error_reason, 404023e71deSHaik Aftandilian max_reason_len)) { 405023e71deSHaik Aftandilian (void) snprintf(error_reason, 406023e71deSHaik Aftandilian max_reason_len, SC_POST_FAIL_STR_FMT, rv); 407023e71deSHaik Aftandilian } 408023e71deSHaik Aftandilian } 409023e71deSHaik Aftandilian } 410023e71deSHaik Aftandilian 411023e71deSHaik Aftandilian return (rv); 412023e71deSHaik Aftandilian } 413023e71deSHaik Aftandilian 414023e71deSHaik Aftandilian /* 415023e71deSHaik Aftandilian * Execute pre-suspend callbacks preparing the system for a suspend operation. 416023e71deSHaik Aftandilian * Returns zero on success, non-zero on failure. Sets the recovered argument 417023e71deSHaik Aftandilian * to indicate whether or not callbacks could be undone in the event of a 418023e71deSHaik Aftandilian * failure--if callbacks were successfully undone, *recovered is set to B_TRUE, 419023e71deSHaik Aftandilian * otherwise *recovered is set to B_FALSE. Must be called successfully before 420023e71deSHaik Aftandilian * suspend_start can be called. Callers should first call suspend_support to 421023e71deSHaik Aftandilian * determine if OS suspend is supported. 422023e71deSHaik Aftandilian */ 423023e71deSHaik Aftandilian int 424023e71deSHaik Aftandilian suspend_pre(char *error_reason, size_t max_reason_len, boolean_t *recovered) 425023e71deSHaik Aftandilian { 426023e71deSHaik Aftandilian int rv; 427023e71deSHaik Aftandilian 428023e71deSHaik Aftandilian ASSERT(recovered != NULL); 429023e71deSHaik Aftandilian 430023e71deSHaik Aftandilian /* 431023e71deSHaik Aftandilian * Return an error if suspend_pre is erreoneously called 432023e71deSHaik Aftandilian * when OS suspend is not supported. 433023e71deSHaik Aftandilian */ 434023e71deSHaik Aftandilian ASSERT(suspend_supported()); 435023e71deSHaik Aftandilian if (!suspend_supported()) { 436023e71deSHaik Aftandilian DBG("suspend: suspend_pre called without suspend support"); 437023e71deSHaik Aftandilian *recovered = B_TRUE; 438023e71deSHaik Aftandilian return (ENOTSUP); 439023e71deSHaik Aftandilian } 440023e71deSHaik Aftandilian DBG("suspend: %s", __func__); 441023e71deSHaik Aftandilian 442023e71deSHaik Aftandilian rv = cluster_pre_wrapper(error_reason, max_reason_len); 443023e71deSHaik Aftandilian 444023e71deSHaik Aftandilian /* 445023e71deSHaik Aftandilian * At present, only one pre-suspend operation exists. 446023e71deSHaik Aftandilian * If it fails, no recovery needs to be done. 447023e71deSHaik Aftandilian */ 448023e71deSHaik Aftandilian if (rv != 0 && recovered != NULL) 449023e71deSHaik Aftandilian *recovered = B_TRUE; 450023e71deSHaik Aftandilian 451023e71deSHaik Aftandilian return (rv); 452023e71deSHaik Aftandilian } 453023e71deSHaik Aftandilian 454023e71deSHaik Aftandilian /* 455023e71deSHaik Aftandilian * Execute post-suspend callbacks. Returns zero on success, non-zero on 456023e71deSHaik Aftandilian * failure. Must be called after suspend_start is called, regardless of 457023e71deSHaik Aftandilian * whether or not suspend_start is successful. 458023e71deSHaik Aftandilian */ 459023e71deSHaik Aftandilian int 460023e71deSHaik Aftandilian suspend_post(char *error_reason, size_t max_reason_len) 461023e71deSHaik Aftandilian { 462023e71deSHaik Aftandilian ASSERT(suspend_supported()); 463023e71deSHaik Aftandilian DBG("suspend: %s", __func__); 464023e71deSHaik Aftandilian return (cluster_post_wrapper(error_reason, max_reason_len)); 465023e71deSHaik Aftandilian } 466023e71deSHaik Aftandilian 467023e71deSHaik Aftandilian /* 468023e71deSHaik Aftandilian * Suspends the OS by pausing CPUs and calling into the HV to initiate 469023e71deSHaik Aftandilian * the suspend. When the HV routine hv_guest_suspend returns, the system 470023e71deSHaik Aftandilian * will be resumed. Must be called after a successful call to suspend_pre. 471023e71deSHaik Aftandilian * suspend_post must be called after suspend_start, whether or not 472023e71deSHaik Aftandilian * suspend_start returns an error. 473023e71deSHaik Aftandilian */ 474023e71deSHaik Aftandilian /*ARGSUSED*/ 475023e71deSHaik Aftandilian int 476023e71deSHaik Aftandilian suspend_start(char *error_reason, size_t max_reason_len) 477023e71deSHaik Aftandilian { 478023e71deSHaik Aftandilian uint64_t source_tick; 479023e71deSHaik Aftandilian uint64_t source_stick; 480023e71deSHaik Aftandilian uint64_t rv; 481023e71deSHaik Aftandilian timestruc_t source_tod; 482023e71deSHaik Aftandilian int spl; 483023e71deSHaik Aftandilian 484023e71deSHaik Aftandilian ASSERT(suspend_supported()); 485023e71deSHaik Aftandilian DBG("suspend: %s", __func__); 486023e71deSHaik Aftandilian 487*d2365b01SPavel Tatashin sfmmu_ctxdoms_lock(); 488*d2365b01SPavel Tatashin 489023e71deSHaik Aftandilian mutex_enter(&cpu_lock); 490023e71deSHaik Aftandilian 491023e71deSHaik Aftandilian /* Suspend the watchdog */ 492023e71deSHaik Aftandilian watchdog_suspend(); 493023e71deSHaik Aftandilian 494023e71deSHaik Aftandilian /* Record the TOD */ 495023e71deSHaik Aftandilian mutex_enter(&tod_lock); 496023e71deSHaik Aftandilian source_tod = tod_get(); 497023e71deSHaik Aftandilian mutex_exit(&tod_lock); 498023e71deSHaik Aftandilian 499023e71deSHaik Aftandilian /* Pause all other CPUs */ 500023e71deSHaik Aftandilian pause_cpus(NULL); 501023e71deSHaik Aftandilian DBG_PROM("suspend: CPUs paused\n"); 502023e71deSHaik Aftandilian 503023e71deSHaik Aftandilian /* Suspend cyclics and disable interrupts */ 504023e71deSHaik Aftandilian cyclic_suspend(); 505023e71deSHaik Aftandilian DBG_PROM("suspend: cyclics suspended\n"); 506023e71deSHaik Aftandilian spl = spl8(); 507023e71deSHaik Aftandilian 508023e71deSHaik Aftandilian source_tick = gettick_counter(); 509023e71deSHaik Aftandilian source_stick = gettick(); 510023e71deSHaik Aftandilian DBG_PROM("suspend: source_tick: 0x%lx\n", source_tick); 511023e71deSHaik Aftandilian DBG_PROM("suspend: source_stick: 0x%lx\n", source_stick); 512023e71deSHaik Aftandilian 513023e71deSHaik Aftandilian /* 514023e71deSHaik Aftandilian * Call into the HV to initiate the suspend. 515023e71deSHaik Aftandilian * hv_guest_suspend() returns after the guest has been 516023e71deSHaik Aftandilian * resumed or if the suspend operation failed or was 517023e71deSHaik Aftandilian * cancelled. After a successful suspend, the %tick and 518023e71deSHaik Aftandilian * %stick registers may have changed by an amount that is 519023e71deSHaik Aftandilian * not proportional to the amount of time that has passed. 520023e71deSHaik Aftandilian * They may have jumped forwards or backwards. This jump 521023e71deSHaik Aftandilian * must be uniform across all CPUs and we operate under 522023e71deSHaik Aftandilian * the assumption that it is (maintaining two global offset 523023e71deSHaik Aftandilian * variables--one for %tick and one for %stick.) 524023e71deSHaik Aftandilian */ 525023e71deSHaik Aftandilian DBG_PROM("suspend: suspending... \n"); 526023e71deSHaik Aftandilian rv = hv_guest_suspend(); 527023e71deSHaik Aftandilian if (rv != 0) { 528023e71deSHaik Aftandilian splx(spl); 529023e71deSHaik Aftandilian cyclic_resume(); 530023e71deSHaik Aftandilian start_cpus(); 531023e71deSHaik Aftandilian watchdog_resume(); 532023e71deSHaik Aftandilian mutex_exit(&cpu_lock); 533*d2365b01SPavel Tatashin sfmmu_ctxdoms_unlock(); 534023e71deSHaik Aftandilian DBG("suspend: failed, rv: %ld\n", rv); 535023e71deSHaik Aftandilian return (rv); 536023e71deSHaik Aftandilian } 537023e71deSHaik Aftandilian 538023e71deSHaik Aftandilian /* Update the global tick and stick offsets */ 539023e71deSHaik Aftandilian set_tick_offsets(source_tick, source_stick); 540023e71deSHaik Aftandilian 541023e71deSHaik Aftandilian /* Ensure new offsets are globally visible before resuming CPUs */ 542023e71deSHaik Aftandilian membar_sync(); 543023e71deSHaik Aftandilian 544023e71deSHaik Aftandilian /* Enable interrupts */ 545023e71deSHaik Aftandilian splx(spl); 546023e71deSHaik Aftandilian 547023e71deSHaik Aftandilian /* Set the {%tick,%stick}.NPT bits on all CPUs */ 548023e71deSHaik Aftandilian if (enable_user_tick_stick_emulation) { 549023e71deSHaik Aftandilian xc_all((xcfunc_t *)enable_tick_stick_npt, NULL, NULL); 550023e71deSHaik Aftandilian xt_sync(cpu_ready_set); 551023e71deSHaik Aftandilian ASSERT(gettick_npt() != 0); 552023e71deSHaik Aftandilian ASSERT(getstick_npt() != 0); 553023e71deSHaik Aftandilian } 554023e71deSHaik Aftandilian 555023e71deSHaik Aftandilian /* If emulation is enabled, but not currently active, enable it */ 556023e71deSHaik Aftandilian if (enable_user_tick_stick_emulation && !tick_stick_emulation_active) { 557023e71deSHaik Aftandilian tick_stick_emulation_active = B_TRUE; 558023e71deSHaik Aftandilian } 559023e71deSHaik Aftandilian 560*d2365b01SPavel Tatashin sfmmu_ctxdoms_remove(); 561*d2365b01SPavel Tatashin 562023e71deSHaik Aftandilian /* Resume cyclics, unpause CPUs */ 563023e71deSHaik Aftandilian cyclic_resume(); 564023e71deSHaik Aftandilian start_cpus(); 565023e71deSHaik Aftandilian 566023e71deSHaik Aftandilian /* Set the TOD */ 567023e71deSHaik Aftandilian mutex_enter(&tod_lock); 568023e71deSHaik Aftandilian tod_set(source_tod); 569023e71deSHaik Aftandilian mutex_exit(&tod_lock); 570023e71deSHaik Aftandilian 571023e71deSHaik Aftandilian /* Re-enable the watchdog */ 572023e71deSHaik Aftandilian watchdog_resume(); 573023e71deSHaik Aftandilian 574023e71deSHaik Aftandilian mutex_exit(&cpu_lock); 575023e71deSHaik Aftandilian 576*d2365b01SPavel Tatashin /* Download the latest MD */ 577*d2365b01SPavel Tatashin if ((rv = mach_descrip_update()) != 0) 578*d2365b01SPavel Tatashin cmn_err(CE_PANIC, "suspend: mach_descrip_update failed: %ld", 579*d2365b01SPavel Tatashin rv); 580*d2365b01SPavel Tatashin 581*d2365b01SPavel Tatashin sfmmu_ctxdoms_update(); 582*d2365b01SPavel Tatashin sfmmu_ctxdoms_unlock(); 583*d2365b01SPavel Tatashin 584023e71deSHaik Aftandilian /* Get new MD, update CPU mappings/relationships */ 585023e71deSHaik Aftandilian if (suspend_update_cpu_mappings) 586023e71deSHaik Aftandilian update_cpu_mappings(); 587023e71deSHaik Aftandilian 588023e71deSHaik Aftandilian DBG("suspend: target tick: 0x%lx", gettick_counter()); 589023e71deSHaik Aftandilian DBG("suspend: target stick: 0x%llx", gettick()); 590023e71deSHaik Aftandilian DBG("suspend: user %%tick/%%stick emulation is %d", 591023e71deSHaik Aftandilian tick_stick_emulation_active); 592023e71deSHaik Aftandilian DBG("suspend: finished"); 593023e71deSHaik Aftandilian 594023e71deSHaik Aftandilian return (0); 595023e71deSHaik Aftandilian } 596