1023e71deSHaik Aftandilian /* 2023e71deSHaik Aftandilian * CDDL HEADER START 3023e71deSHaik Aftandilian * 4023e71deSHaik Aftandilian * The contents of this file are subject to the terms of the 5023e71deSHaik Aftandilian * Common Development and Distribution License (the "License"). 6023e71deSHaik Aftandilian * You may not use this file except in compliance with the License. 7023e71deSHaik Aftandilian * 8023e71deSHaik Aftandilian * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9023e71deSHaik Aftandilian * or http://www.opensolaris.org/os/licensing. 10023e71deSHaik Aftandilian * See the License for the specific language governing permissions 11023e71deSHaik Aftandilian * and limitations under the License. 12023e71deSHaik Aftandilian * 13023e71deSHaik Aftandilian * When distributing Covered Code, include this CDDL HEADER in each 14023e71deSHaik Aftandilian * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15023e71deSHaik Aftandilian * If applicable, add the following below this CDDL HEADER, with the 16023e71deSHaik Aftandilian * fields enclosed by brackets "[]" replaced with your own identifying 17023e71deSHaik Aftandilian * information: Portions Copyright [yyyy] [name of copyright owner] 18023e71deSHaik Aftandilian * 19023e71deSHaik Aftandilian * CDDL HEADER END 20023e71deSHaik Aftandilian */ 21023e71deSHaik Aftandilian /* 22d2365b01SPavel Tatashin * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23023e71deSHaik Aftandilian * Use is subject to license terms. 24023e71deSHaik Aftandilian */ 25023e71deSHaik Aftandilian 26023e71deSHaik Aftandilian #include <sys/mutex.h> 27023e71deSHaik Aftandilian #include <sys/cpuvar.h> 28023e71deSHaik Aftandilian #include <sys/cyclic.h> 29023e71deSHaik Aftandilian #include <sys/disp.h> 30023e71deSHaik Aftandilian #include <sys/ddi.h> 31023e71deSHaik Aftandilian #include <sys/wdt.h> 32023e71deSHaik Aftandilian #include <sys/callb.h> 33023e71deSHaik Aftandilian #include <sys/cmn_err.h> 34023e71deSHaik Aftandilian #include <sys/hypervisor_api.h> 35023e71deSHaik Aftandilian #include <sys/membar.h> 36023e71deSHaik Aftandilian #include <sys/x_call.h> 37023e71deSHaik Aftandilian #include <sys/promif.h> 38023e71deSHaik Aftandilian #include <sys/systm.h> 39023e71deSHaik Aftandilian #include <sys/mach_descrip.h> 40023e71deSHaik Aftandilian #include <sys/cpu_module.h> 41023e71deSHaik Aftandilian #include <sys/pg.h> 42023e71deSHaik Aftandilian #include <sys/lgrp.h> 43023e71deSHaik Aftandilian #include <sys/sysmacros.h> 44023e71deSHaik Aftandilian #include <sys/sunddi.h> 45023e71deSHaik Aftandilian #include <sys/cpupart.h> 46023e71deSHaik Aftandilian #include <sys/hsvc.h> 47*183ef8a1SHaik Aftandilian #include <sys/mpo.h> 48d2365b01SPavel Tatashin #include <vm/hat_sfmmu.h> 49023e71deSHaik Aftandilian 50023e71deSHaik Aftandilian /* 51023e71deSHaik Aftandilian * Sun4v OS Suspend 52023e71deSHaik Aftandilian * 53023e71deSHaik Aftandilian * Provides a means to suspend a sun4v guest domain by pausing CPUs and then 54023e71deSHaik Aftandilian * calling into the HV to initiate a suspension. Suspension is sequenced 55023e71deSHaik Aftandilian * externally by calling suspend_pre, suspend_start, and suspend_post. 56023e71deSHaik Aftandilian * suspend_pre and suspend_post are meant to perform any special operations 57023e71deSHaik Aftandilian * that should be done before or after a suspend/resume operation. e.g., 58023e71deSHaik Aftandilian * callbacks to cluster software to disable heartbeat monitoring before the 59023e71deSHaik Aftandilian * system is suspended. suspend_start prepares kernel services to be suspended 60023e71deSHaik Aftandilian * and then suspends the domain by calling hv_guest_suspend. 61023e71deSHaik Aftandilian * 62023e71deSHaik Aftandilian * Special Handling for %tick and %stick Registers 63023e71deSHaik Aftandilian * 64023e71deSHaik Aftandilian * After a suspend/resume operation, the %tick and %stick registers may have 65023e71deSHaik Aftandilian * jumped forwards or backwards. The delta is assumed to be consistent across 66023e71deSHaik Aftandilian * all CPUs, within the negligible level of %tick and %stick variation 67023e71deSHaik Aftandilian * acceptable on a cold boot. In order to maintain increasing %tick and %stick 68023e71deSHaik Aftandilian * counter values without exposing large positive or negative jumps to kernel 69023e71deSHaik Aftandilian * or user code, a %tick and %stick offset is used. Kernel reads of these 70023e71deSHaik Aftandilian * counters return the sum of the hardware register counter and offset 71023e71deSHaik Aftandilian * variable. After a suspend/resume operation, user reads of %tick or %stick 72023e71deSHaik Aftandilian * are emulated. Suspend code enables emulation by setting the 73023e71deSHaik Aftandilian * %{tick,stick}.NPT fields which trigger a privileged instruction access 74023e71deSHaik Aftandilian * trap whenever the registers are read from user mode. If emulation has been 75023e71deSHaik Aftandilian * enabled, the trap handler emulates the instruction. Emulation is only 76023e71deSHaik Aftandilian * enabled during a successful suspend/resume operation. When emulation is 77023e71deSHaik Aftandilian * enabled, CPUs that are DR'd into the system will have their 78023e71deSHaik Aftandilian * %{tick,stick}.NPT bits set to 1 as well. 79023e71deSHaik Aftandilian */ 80023e71deSHaik Aftandilian 81023e71deSHaik Aftandilian extern u_longlong_t gettick(void); /* returns %stick */ 82023e71deSHaik Aftandilian extern uint64_t gettick_counter(void); /* returns %tick */ 83023e71deSHaik Aftandilian extern uint64_t gettick_npt(void); 84023e71deSHaik Aftandilian extern uint64_t getstick_npt(void); 85023e71deSHaik Aftandilian extern int mach_descrip_update(void); 86023e71deSHaik Aftandilian extern cpuset_t cpu_ready_set; 87023e71deSHaik Aftandilian extern uint64_t native_tick_offset; 88023e71deSHaik Aftandilian extern uint64_t native_stick_offset; 89023e71deSHaik Aftandilian 90023e71deSHaik Aftandilian /* 91023e71deSHaik Aftandilian * Global Sun Cluster pre/post callbacks. 92023e71deSHaik Aftandilian */ 93023e71deSHaik Aftandilian const char *(*cl_suspend_error_decode)(int); 94023e71deSHaik Aftandilian int (*cl_suspend_pre_callback)(void); 95023e71deSHaik Aftandilian int (*cl_suspend_post_callback)(void); 96023e71deSHaik Aftandilian #define SC_PRE_FAIL_STR_FMT "Sun Cluster pre-suspend failure: %d" 97023e71deSHaik Aftandilian #define SC_POST_FAIL_STR_FMT "Sun Cluster post-suspend failure: %d" 98023e71deSHaik Aftandilian #define SC_FAIL_STR_MAX 256 99023e71deSHaik Aftandilian 100023e71deSHaik Aftandilian /* 101023e71deSHaik Aftandilian * The minimum major and minor version of the HSVC_GROUP_CORE API group 102023e71deSHaik Aftandilian * required in order to use OS suspend. 103023e71deSHaik Aftandilian */ 104023e71deSHaik Aftandilian #define SUSPEND_CORE_MAJOR 1 105023e71deSHaik Aftandilian #define SUSPEND_CORE_MINOR 2 106023e71deSHaik Aftandilian 107023e71deSHaik Aftandilian /* 108023e71deSHaik Aftandilian * By default, sun4v OS suspend is supported if the required HV version 109023e71deSHaik Aftandilian * is present. suspend_disabled should be set on platforms that do not 110023e71deSHaik Aftandilian * allow OS suspend regardless of whether or not the HV supports it. 111023e71deSHaik Aftandilian * It can also be set in /etc/system. 112023e71deSHaik Aftandilian */ 113023e71deSHaik Aftandilian static int suspend_disabled = 0; 114023e71deSHaik Aftandilian 115023e71deSHaik Aftandilian /* 116023e71deSHaik Aftandilian * Controls whether or not user-land tick and stick register emulation 117023e71deSHaik Aftandilian * will be enabled following a successful suspend operation. 118023e71deSHaik Aftandilian */ 119023e71deSHaik Aftandilian static int enable_user_tick_stick_emulation = 1; 120023e71deSHaik Aftandilian 121023e71deSHaik Aftandilian /* 122023e71deSHaik Aftandilian * Indicates whether or not tick and stick emulation is currently active. 123023e71deSHaik Aftandilian * After a successful suspend operation, if emulation is enabled, this 124023e71deSHaik Aftandilian * variable is set to B_TRUE. Global scope to allow emulation code to 125023e71deSHaik Aftandilian * check if emulation is active. 126023e71deSHaik Aftandilian */ 127023e71deSHaik Aftandilian boolean_t tick_stick_emulation_active = B_FALSE; 128023e71deSHaik Aftandilian 129023e71deSHaik Aftandilian /* 130d2365b01SPavel Tatashin * When non-zero, after a successful suspend and resume, cpunodes, CPU HW 131d2365b01SPavel Tatashin * sharing data structures, and processor groups will be updated using 132d2365b01SPavel Tatashin * information from the updated MD. 133023e71deSHaik Aftandilian */ 134023e71deSHaik Aftandilian static int suspend_update_cpu_mappings = 1; 135023e71deSHaik Aftandilian 136023e71deSHaik Aftandilian /* 137023e71deSHaik Aftandilian * DBG and DBG_PROM() macro. 138023e71deSHaik Aftandilian */ 139023e71deSHaik Aftandilian #ifdef DEBUG 140023e71deSHaik Aftandilian 141023e71deSHaik Aftandilian static int suspend_debug_flag = 0; 142023e71deSHaik Aftandilian 143023e71deSHaik Aftandilian #define DBG_PROM \ 144023e71deSHaik Aftandilian if (suspend_debug_flag) \ 145023e71deSHaik Aftandilian prom_printf 146023e71deSHaik Aftandilian 147023e71deSHaik Aftandilian #define DBG \ 148023e71deSHaik Aftandilian if (suspend_debug_flag) \ 149023e71deSHaik Aftandilian suspend_debug 150023e71deSHaik Aftandilian 151023e71deSHaik Aftandilian static void 152023e71deSHaik Aftandilian suspend_debug(const char *fmt, ...) 153023e71deSHaik Aftandilian { 154023e71deSHaik Aftandilian char buf[512]; 155023e71deSHaik Aftandilian va_list ap; 156023e71deSHaik Aftandilian 157023e71deSHaik Aftandilian va_start(ap, fmt); 158023e71deSHaik Aftandilian (void) vsprintf(buf, fmt, ap); 159023e71deSHaik Aftandilian va_end(ap); 160023e71deSHaik Aftandilian 161023e71deSHaik Aftandilian cmn_err(CE_NOTE, "%s", buf); 162023e71deSHaik Aftandilian } 163023e71deSHaik Aftandilian 164023e71deSHaik Aftandilian #else /* DEBUG */ 165023e71deSHaik Aftandilian 166023e71deSHaik Aftandilian #define DBG_PROM 167023e71deSHaik Aftandilian #define DBG 168023e71deSHaik Aftandilian 169023e71deSHaik Aftandilian #endif /* DEBUG */ 170023e71deSHaik Aftandilian 171023e71deSHaik Aftandilian /* 172023e71deSHaik Aftandilian * Return true if the HV supports OS suspend and if suspend has not been 173023e71deSHaik Aftandilian * disabled on this platform. 174023e71deSHaik Aftandilian */ 175023e71deSHaik Aftandilian boolean_t 176023e71deSHaik Aftandilian suspend_supported(void) 177023e71deSHaik Aftandilian { 178023e71deSHaik Aftandilian uint64_t major, minor; 179023e71deSHaik Aftandilian 180023e71deSHaik Aftandilian if (suspend_disabled) 181023e71deSHaik Aftandilian return (B_FALSE); 182023e71deSHaik Aftandilian 183023e71deSHaik Aftandilian if (hsvc_version(HSVC_GROUP_CORE, &major, &minor) != 0) 184023e71deSHaik Aftandilian return (B_FALSE); 185023e71deSHaik Aftandilian 186023e71deSHaik Aftandilian return ((major == SUSPEND_CORE_MAJOR && minor >= SUSPEND_CORE_MINOR) || 187023e71deSHaik Aftandilian (major > SUSPEND_CORE_MAJOR)); 188023e71deSHaik Aftandilian } 189023e71deSHaik Aftandilian 190023e71deSHaik Aftandilian /* 191023e71deSHaik Aftandilian * Given a source tick and stick value, set the tick and stick offsets such 192023e71deSHaik Aftandilian * that the (current physical register value + offset == source value). 193023e71deSHaik Aftandilian */ 194023e71deSHaik Aftandilian static void 195023e71deSHaik Aftandilian set_tick_offsets(uint64_t source_tick, uint64_t source_stick) 196023e71deSHaik Aftandilian { 197023e71deSHaik Aftandilian uint64_t target_tick; 198023e71deSHaik Aftandilian uint64_t target_stick; 199023e71deSHaik Aftandilian 200023e71deSHaik Aftandilian native_tick_offset = 0; 201023e71deSHaik Aftandilian native_stick_offset = 0; 202023e71deSHaik Aftandilian 203023e71deSHaik Aftandilian target_tick = gettick_counter(); /* returns %tick */ 204023e71deSHaik Aftandilian target_stick = gettick(); /* returns %stick */ 205023e71deSHaik Aftandilian 206023e71deSHaik Aftandilian native_tick_offset = source_tick - target_tick; 207023e71deSHaik Aftandilian native_stick_offset = source_stick - target_stick; 208023e71deSHaik Aftandilian } 209023e71deSHaik Aftandilian 210023e71deSHaik Aftandilian /* 211023e71deSHaik Aftandilian * Set the {tick,stick}.NPT field to 1 on this CPU. 212023e71deSHaik Aftandilian */ 213023e71deSHaik Aftandilian static void 214023e71deSHaik Aftandilian enable_tick_stick_npt(void) 215023e71deSHaik Aftandilian { 216c1374a13SSurya Prakki (void) hv_stick_set_npt(1); 217c1374a13SSurya Prakki (void) hv_tick_set_npt(1); 218023e71deSHaik Aftandilian } 219023e71deSHaik Aftandilian 220023e71deSHaik Aftandilian /* 221023e71deSHaik Aftandilian * Synchronize a CPU's {tick,stick}.NPT fields with the current state 222023e71deSHaik Aftandilian * of the system. This is used when a CPU is DR'd into the system. 223023e71deSHaik Aftandilian */ 224023e71deSHaik Aftandilian void 225023e71deSHaik Aftandilian suspend_sync_tick_stick_npt(void) 226023e71deSHaik Aftandilian { 227023e71deSHaik Aftandilian if (tick_stick_emulation_active) { 228023e71deSHaik Aftandilian DBG("enabling {%%tick/%%stick}.NPT on CPU 0x%x", CPU->cpu_id); 229c1374a13SSurya Prakki (void) hv_stick_set_npt(1); 230c1374a13SSurya Prakki (void) hv_tick_set_npt(1); 231023e71deSHaik Aftandilian } else { 232023e71deSHaik Aftandilian ASSERT(gettick_npt() == 0); 233023e71deSHaik Aftandilian ASSERT(getstick_npt() == 0); 234023e71deSHaik Aftandilian } 235023e71deSHaik Aftandilian } 236023e71deSHaik Aftandilian 237023e71deSHaik Aftandilian /* 238023e71deSHaik Aftandilian * Obtain an updated MD from the hypervisor and update cpunodes, CPU HW 239023e71deSHaik Aftandilian * sharing data structures, and processor groups. 240023e71deSHaik Aftandilian */ 241023e71deSHaik Aftandilian static void 242023e71deSHaik Aftandilian update_cpu_mappings(void) 243023e71deSHaik Aftandilian { 244023e71deSHaik Aftandilian md_t *mdp; 245023e71deSHaik Aftandilian processorid_t id; 246023e71deSHaik Aftandilian cpu_t *cp; 247023e71deSHaik Aftandilian cpu_pg_t *pgps[NCPU]; 248023e71deSHaik Aftandilian 249023e71deSHaik Aftandilian if ((mdp = md_get_handle()) == NULL) { 250023e71deSHaik Aftandilian DBG("suspend: md_get_handle failed"); 251023e71deSHaik Aftandilian return; 252023e71deSHaik Aftandilian } 253023e71deSHaik Aftandilian 254023e71deSHaik Aftandilian DBG("suspend: updating CPU mappings"); 255023e71deSHaik Aftandilian 256023e71deSHaik Aftandilian mutex_enter(&cpu_lock); 257023e71deSHaik Aftandilian 258023e71deSHaik Aftandilian setup_chip_mappings(mdp); 259023e71deSHaik Aftandilian setup_exec_unit_mappings(mdp); 260023e71deSHaik Aftandilian for (id = 0; id < NCPU; id++) { 261023e71deSHaik Aftandilian if ((cp = cpu_get(id)) == NULL) 262023e71deSHaik Aftandilian continue; 263023e71deSHaik Aftandilian cpu_map_exec_units(cp); 264023e71deSHaik Aftandilian } 265023e71deSHaik Aftandilian 266023e71deSHaik Aftandilian /* 267023e71deSHaik Aftandilian * Re-calculate processor groups. 268023e71deSHaik Aftandilian * 269023e71deSHaik Aftandilian * First tear down all PG information before adding any new PG 270023e71deSHaik Aftandilian * information derived from the MD we just downloaded. We must 271023e71deSHaik Aftandilian * call pg_cpu_inactive and pg_cpu_active with CPUs paused and 272023e71deSHaik Aftandilian * we want to minimize the number of times pause_cpus is called. 273023e71deSHaik Aftandilian * Inactivating all CPUs would leave PGs without any active CPUs, 274023e71deSHaik Aftandilian * so while CPUs are paused, call pg_cpu_inactive and swap in the 275023e71deSHaik Aftandilian * bootstrap PG structure saving the original PG structure to be 276023e71deSHaik Aftandilian * fini'd afterwards. This prevents the dispatcher from encountering 277023e71deSHaik Aftandilian * PGs in which all CPUs are inactive. 278023e71deSHaik Aftandilian */ 279023e71deSHaik Aftandilian pause_cpus(NULL); 280023e71deSHaik Aftandilian for (id = 0; id < NCPU; id++) { 281023e71deSHaik Aftandilian if ((cp = cpu_get(id)) == NULL) 282023e71deSHaik Aftandilian continue; 283023e71deSHaik Aftandilian pg_cpu_inactive(cp); 284023e71deSHaik Aftandilian pgps[id] = cp->cpu_pg; 285023e71deSHaik Aftandilian pg_cpu_bootstrap(cp); 286023e71deSHaik Aftandilian } 287023e71deSHaik Aftandilian start_cpus(); 288023e71deSHaik Aftandilian 289023e71deSHaik Aftandilian /* 290023e71deSHaik Aftandilian * pg_cpu_fini* and pg_cpu_init* must be called while CPUs are 291023e71deSHaik Aftandilian * not paused. Use two separate loops here so that we do not 292023e71deSHaik Aftandilian * initialize PG data for CPUs until all the old PG data structures 293023e71deSHaik Aftandilian * are torn down. 294023e71deSHaik Aftandilian */ 295023e71deSHaik Aftandilian for (id = 0; id < NCPU; id++) { 296023e71deSHaik Aftandilian if ((cp = cpu_get(id)) == NULL) 297023e71deSHaik Aftandilian continue; 298023e71deSHaik Aftandilian pg_cpu_fini(cp, pgps[id]); 299*183ef8a1SHaik Aftandilian mpo_cpu_remove(id); 300023e71deSHaik Aftandilian } 301023e71deSHaik Aftandilian 302023e71deSHaik Aftandilian /* 303023e71deSHaik Aftandilian * Initialize PG data for each CPU, but leave the bootstrapped 304023e71deSHaik Aftandilian * PG structure in place to avoid running with any PGs containing 305023e71deSHaik Aftandilian * nothing but inactive CPUs. 306023e71deSHaik Aftandilian */ 307023e71deSHaik Aftandilian for (id = 0; id < NCPU; id++) { 308023e71deSHaik Aftandilian if ((cp = cpu_get(id)) == NULL) 309023e71deSHaik Aftandilian continue; 310*183ef8a1SHaik Aftandilian mpo_cpu_add(mdp, id); 311023e71deSHaik Aftandilian pgps[id] = pg_cpu_init(cp, B_TRUE); 312023e71deSHaik Aftandilian } 313023e71deSHaik Aftandilian 314023e71deSHaik Aftandilian /* 315023e71deSHaik Aftandilian * Now that PG data has been initialized for all CPUs in the 316023e71deSHaik Aftandilian * system, replace the bootstrapped PG structure with the 317023e71deSHaik Aftandilian * initialized PG structure and call pg_cpu_active for each CPU. 318023e71deSHaik Aftandilian */ 319023e71deSHaik Aftandilian pause_cpus(NULL); 320023e71deSHaik Aftandilian for (id = 0; id < NCPU; id++) { 321023e71deSHaik Aftandilian if ((cp = cpu_get(id)) == NULL) 322023e71deSHaik Aftandilian continue; 323023e71deSHaik Aftandilian cp->cpu_pg = pgps[id]; 324023e71deSHaik Aftandilian pg_cpu_active(cp); 325023e71deSHaik Aftandilian } 326023e71deSHaik Aftandilian start_cpus(); 327023e71deSHaik Aftandilian 328023e71deSHaik Aftandilian mutex_exit(&cpu_lock); 329023e71deSHaik Aftandilian 330023e71deSHaik Aftandilian (void) md_fini_handle(mdp); 331023e71deSHaik Aftandilian } 332023e71deSHaik Aftandilian 333023e71deSHaik Aftandilian /* 334023e71deSHaik Aftandilian * Wrapper for the Sun Cluster error decoding function. 335023e71deSHaik Aftandilian */ 336023e71deSHaik Aftandilian static int 337023e71deSHaik Aftandilian cluster_error_decode(int error, char *error_reason, size_t max_reason_len) 338023e71deSHaik Aftandilian { 339023e71deSHaik Aftandilian const char *decoded; 340023e71deSHaik Aftandilian size_t decoded_len; 341023e71deSHaik Aftandilian 342023e71deSHaik Aftandilian ASSERT(error_reason != NULL); 343023e71deSHaik Aftandilian ASSERT(max_reason_len > 0); 344023e71deSHaik Aftandilian 345023e71deSHaik Aftandilian max_reason_len = MIN(max_reason_len, SC_FAIL_STR_MAX); 346023e71deSHaik Aftandilian 347023e71deSHaik Aftandilian if (cl_suspend_error_decode == NULL) 348023e71deSHaik Aftandilian return (-1); 349023e71deSHaik Aftandilian 350023e71deSHaik Aftandilian if ((decoded = (*cl_suspend_error_decode)(error)) == NULL) 351023e71deSHaik Aftandilian return (-1); 352023e71deSHaik Aftandilian 353023e71deSHaik Aftandilian /* Get number of non-NULL bytes */ 354023e71deSHaik Aftandilian if ((decoded_len = strnlen(decoded, max_reason_len - 1)) == 0) 355023e71deSHaik Aftandilian return (-1); 356023e71deSHaik Aftandilian 357023e71deSHaik Aftandilian bcopy(decoded, error_reason, decoded_len); 358023e71deSHaik Aftandilian 359023e71deSHaik Aftandilian /* 360023e71deSHaik Aftandilian * The error string returned from cl_suspend_error_decode 361023e71deSHaik Aftandilian * should be NULL-terminated, but set the terminator here 362023e71deSHaik Aftandilian * because we only copied non-NULL bytes. If the decoded 363023e71deSHaik Aftandilian * string was not NULL-terminated, this guarantees that 364023e71deSHaik Aftandilian * error_reason will be. 365023e71deSHaik Aftandilian */ 366023e71deSHaik Aftandilian error_reason[decoded_len] = '\0'; 367023e71deSHaik Aftandilian 368023e71deSHaik Aftandilian return (0); 369023e71deSHaik Aftandilian } 370023e71deSHaik Aftandilian 371023e71deSHaik Aftandilian /* 372023e71deSHaik Aftandilian * Wrapper for the Sun Cluster pre-suspend callback. 373023e71deSHaik Aftandilian */ 374023e71deSHaik Aftandilian static int 375023e71deSHaik Aftandilian cluster_pre_wrapper(char *error_reason, size_t max_reason_len) 376023e71deSHaik Aftandilian { 377023e71deSHaik Aftandilian int rv = 0; 378023e71deSHaik Aftandilian 379023e71deSHaik Aftandilian if (cl_suspend_pre_callback != NULL) { 380023e71deSHaik Aftandilian rv = (*cl_suspend_pre_callback)(); 381023e71deSHaik Aftandilian DBG("suspend: cl_suspend_pre_callback returned %d", rv); 382023e71deSHaik Aftandilian if (rv != 0 && error_reason != NULL && max_reason_len > 0) { 383023e71deSHaik Aftandilian if (cluster_error_decode(rv, error_reason, 384023e71deSHaik Aftandilian max_reason_len)) { 385023e71deSHaik Aftandilian (void) snprintf(error_reason, max_reason_len, 386023e71deSHaik Aftandilian SC_PRE_FAIL_STR_FMT, rv); 387023e71deSHaik Aftandilian } 388023e71deSHaik Aftandilian } 389023e71deSHaik Aftandilian } 390023e71deSHaik Aftandilian 391023e71deSHaik Aftandilian return (rv); 392023e71deSHaik Aftandilian } 393023e71deSHaik Aftandilian 394023e71deSHaik Aftandilian /* 395023e71deSHaik Aftandilian * Wrapper for the Sun Cluster post-suspend callback. 396023e71deSHaik Aftandilian */ 397023e71deSHaik Aftandilian static int 398023e71deSHaik Aftandilian cluster_post_wrapper(char *error_reason, size_t max_reason_len) 399023e71deSHaik Aftandilian { 400023e71deSHaik Aftandilian int rv = 0; 401023e71deSHaik Aftandilian 402023e71deSHaik Aftandilian if (cl_suspend_post_callback != NULL) { 403023e71deSHaik Aftandilian rv = (*cl_suspend_post_callback)(); 404023e71deSHaik Aftandilian DBG("suspend: cl_suspend_post_callback returned %d", rv); 405023e71deSHaik Aftandilian if (rv != 0 && error_reason != NULL && max_reason_len > 0) { 406023e71deSHaik Aftandilian if (cluster_error_decode(rv, error_reason, 407023e71deSHaik Aftandilian max_reason_len)) { 408023e71deSHaik Aftandilian (void) snprintf(error_reason, 409023e71deSHaik Aftandilian max_reason_len, SC_POST_FAIL_STR_FMT, rv); 410023e71deSHaik Aftandilian } 411023e71deSHaik Aftandilian } 412023e71deSHaik Aftandilian } 413023e71deSHaik Aftandilian 414023e71deSHaik Aftandilian return (rv); 415023e71deSHaik Aftandilian } 416023e71deSHaik Aftandilian 417023e71deSHaik Aftandilian /* 418023e71deSHaik Aftandilian * Execute pre-suspend callbacks preparing the system for a suspend operation. 419023e71deSHaik Aftandilian * Returns zero on success, non-zero on failure. Sets the recovered argument 420023e71deSHaik Aftandilian * to indicate whether or not callbacks could be undone in the event of a 421023e71deSHaik Aftandilian * failure--if callbacks were successfully undone, *recovered is set to B_TRUE, 422023e71deSHaik Aftandilian * otherwise *recovered is set to B_FALSE. Must be called successfully before 423023e71deSHaik Aftandilian * suspend_start can be called. Callers should first call suspend_support to 424023e71deSHaik Aftandilian * determine if OS suspend is supported. 425023e71deSHaik Aftandilian */ 426023e71deSHaik Aftandilian int 427023e71deSHaik Aftandilian suspend_pre(char *error_reason, size_t max_reason_len, boolean_t *recovered) 428023e71deSHaik Aftandilian { 429023e71deSHaik Aftandilian int rv; 430023e71deSHaik Aftandilian 431023e71deSHaik Aftandilian ASSERT(recovered != NULL); 432023e71deSHaik Aftandilian 433023e71deSHaik Aftandilian /* 434023e71deSHaik Aftandilian * Return an error if suspend_pre is erreoneously called 435023e71deSHaik Aftandilian * when OS suspend is not supported. 436023e71deSHaik Aftandilian */ 437023e71deSHaik Aftandilian ASSERT(suspend_supported()); 438023e71deSHaik Aftandilian if (!suspend_supported()) { 439023e71deSHaik Aftandilian DBG("suspend: suspend_pre called without suspend support"); 440023e71deSHaik Aftandilian *recovered = B_TRUE; 441023e71deSHaik Aftandilian return (ENOTSUP); 442023e71deSHaik Aftandilian } 443023e71deSHaik Aftandilian DBG("suspend: %s", __func__); 444023e71deSHaik Aftandilian 445023e71deSHaik Aftandilian rv = cluster_pre_wrapper(error_reason, max_reason_len); 446023e71deSHaik Aftandilian 447023e71deSHaik Aftandilian /* 448023e71deSHaik Aftandilian * At present, only one pre-suspend operation exists. 449023e71deSHaik Aftandilian * If it fails, no recovery needs to be done. 450023e71deSHaik Aftandilian */ 451023e71deSHaik Aftandilian if (rv != 0 && recovered != NULL) 452023e71deSHaik Aftandilian *recovered = B_TRUE; 453023e71deSHaik Aftandilian 454023e71deSHaik Aftandilian return (rv); 455023e71deSHaik Aftandilian } 456023e71deSHaik Aftandilian 457023e71deSHaik Aftandilian /* 458023e71deSHaik Aftandilian * Execute post-suspend callbacks. Returns zero on success, non-zero on 459023e71deSHaik Aftandilian * failure. Must be called after suspend_start is called, regardless of 460023e71deSHaik Aftandilian * whether or not suspend_start is successful. 461023e71deSHaik Aftandilian */ 462023e71deSHaik Aftandilian int 463023e71deSHaik Aftandilian suspend_post(char *error_reason, size_t max_reason_len) 464023e71deSHaik Aftandilian { 465023e71deSHaik Aftandilian ASSERT(suspend_supported()); 466023e71deSHaik Aftandilian DBG("suspend: %s", __func__); 467023e71deSHaik Aftandilian return (cluster_post_wrapper(error_reason, max_reason_len)); 468023e71deSHaik Aftandilian } 469023e71deSHaik Aftandilian 470023e71deSHaik Aftandilian /* 471023e71deSHaik Aftandilian * Suspends the OS by pausing CPUs and calling into the HV to initiate 472023e71deSHaik Aftandilian * the suspend. When the HV routine hv_guest_suspend returns, the system 473023e71deSHaik Aftandilian * will be resumed. Must be called after a successful call to suspend_pre. 474023e71deSHaik Aftandilian * suspend_post must be called after suspend_start, whether or not 475023e71deSHaik Aftandilian * suspend_start returns an error. 476023e71deSHaik Aftandilian */ 477023e71deSHaik Aftandilian /*ARGSUSED*/ 478023e71deSHaik Aftandilian int 479023e71deSHaik Aftandilian suspend_start(char *error_reason, size_t max_reason_len) 480023e71deSHaik Aftandilian { 481023e71deSHaik Aftandilian uint64_t source_tick; 482023e71deSHaik Aftandilian uint64_t source_stick; 483023e71deSHaik Aftandilian uint64_t rv; 484023e71deSHaik Aftandilian timestruc_t source_tod; 485023e71deSHaik Aftandilian int spl; 486023e71deSHaik Aftandilian 487023e71deSHaik Aftandilian ASSERT(suspend_supported()); 488023e71deSHaik Aftandilian DBG("suspend: %s", __func__); 489023e71deSHaik Aftandilian 490d2365b01SPavel Tatashin sfmmu_ctxdoms_lock(); 491d2365b01SPavel Tatashin 492023e71deSHaik Aftandilian mutex_enter(&cpu_lock); 493023e71deSHaik Aftandilian 494023e71deSHaik Aftandilian /* Suspend the watchdog */ 495023e71deSHaik Aftandilian watchdog_suspend(); 496023e71deSHaik Aftandilian 497023e71deSHaik Aftandilian /* Record the TOD */ 498023e71deSHaik Aftandilian mutex_enter(&tod_lock); 499023e71deSHaik Aftandilian source_tod = tod_get(); 500023e71deSHaik Aftandilian mutex_exit(&tod_lock); 501023e71deSHaik Aftandilian 502023e71deSHaik Aftandilian /* Pause all other CPUs */ 503023e71deSHaik Aftandilian pause_cpus(NULL); 504023e71deSHaik Aftandilian DBG_PROM("suspend: CPUs paused\n"); 505023e71deSHaik Aftandilian 506023e71deSHaik Aftandilian /* Suspend cyclics and disable interrupts */ 507023e71deSHaik Aftandilian cyclic_suspend(); 508023e71deSHaik Aftandilian DBG_PROM("suspend: cyclics suspended\n"); 509023e71deSHaik Aftandilian spl = spl8(); 510023e71deSHaik Aftandilian 511023e71deSHaik Aftandilian source_tick = gettick_counter(); 512023e71deSHaik Aftandilian source_stick = gettick(); 513023e71deSHaik Aftandilian DBG_PROM("suspend: source_tick: 0x%lx\n", source_tick); 514023e71deSHaik Aftandilian DBG_PROM("suspend: source_stick: 0x%lx\n", source_stick); 515023e71deSHaik Aftandilian 516023e71deSHaik Aftandilian /* 517023e71deSHaik Aftandilian * Call into the HV to initiate the suspend. 518023e71deSHaik Aftandilian * hv_guest_suspend() returns after the guest has been 519023e71deSHaik Aftandilian * resumed or if the suspend operation failed or was 520023e71deSHaik Aftandilian * cancelled. After a successful suspend, the %tick and 521023e71deSHaik Aftandilian * %stick registers may have changed by an amount that is 522023e71deSHaik Aftandilian * not proportional to the amount of time that has passed. 523023e71deSHaik Aftandilian * They may have jumped forwards or backwards. This jump 524023e71deSHaik Aftandilian * must be uniform across all CPUs and we operate under 525023e71deSHaik Aftandilian * the assumption that it is (maintaining two global offset 526023e71deSHaik Aftandilian * variables--one for %tick and one for %stick.) 527023e71deSHaik Aftandilian */ 528023e71deSHaik Aftandilian DBG_PROM("suspend: suspending... \n"); 529023e71deSHaik Aftandilian rv = hv_guest_suspend(); 530023e71deSHaik Aftandilian if (rv != 0) { 531023e71deSHaik Aftandilian splx(spl); 532023e71deSHaik Aftandilian cyclic_resume(); 533023e71deSHaik Aftandilian start_cpus(); 534023e71deSHaik Aftandilian watchdog_resume(); 535023e71deSHaik Aftandilian mutex_exit(&cpu_lock); 536d2365b01SPavel Tatashin sfmmu_ctxdoms_unlock(); 537023e71deSHaik Aftandilian DBG("suspend: failed, rv: %ld\n", rv); 538023e71deSHaik Aftandilian return (rv); 539023e71deSHaik Aftandilian } 540023e71deSHaik Aftandilian 541023e71deSHaik Aftandilian /* Update the global tick and stick offsets */ 542023e71deSHaik Aftandilian set_tick_offsets(source_tick, source_stick); 543023e71deSHaik Aftandilian 544023e71deSHaik Aftandilian /* Ensure new offsets are globally visible before resuming CPUs */ 545023e71deSHaik Aftandilian membar_sync(); 546023e71deSHaik Aftandilian 547023e71deSHaik Aftandilian /* Enable interrupts */ 548023e71deSHaik Aftandilian splx(spl); 549023e71deSHaik Aftandilian 550023e71deSHaik Aftandilian /* Set the {%tick,%stick}.NPT bits on all CPUs */ 551023e71deSHaik Aftandilian if (enable_user_tick_stick_emulation) { 552023e71deSHaik Aftandilian xc_all((xcfunc_t *)enable_tick_stick_npt, NULL, NULL); 553023e71deSHaik Aftandilian xt_sync(cpu_ready_set); 554023e71deSHaik Aftandilian ASSERT(gettick_npt() != 0); 555023e71deSHaik Aftandilian ASSERT(getstick_npt() != 0); 556023e71deSHaik Aftandilian } 557023e71deSHaik Aftandilian 558023e71deSHaik Aftandilian /* If emulation is enabled, but not currently active, enable it */ 559023e71deSHaik Aftandilian if (enable_user_tick_stick_emulation && !tick_stick_emulation_active) { 560023e71deSHaik Aftandilian tick_stick_emulation_active = B_TRUE; 561023e71deSHaik Aftandilian } 562023e71deSHaik Aftandilian 563d2365b01SPavel Tatashin sfmmu_ctxdoms_remove(); 564d2365b01SPavel Tatashin 565023e71deSHaik Aftandilian /* Resume cyclics, unpause CPUs */ 566023e71deSHaik Aftandilian cyclic_resume(); 567023e71deSHaik Aftandilian start_cpus(); 568023e71deSHaik Aftandilian 569023e71deSHaik Aftandilian /* Set the TOD */ 570023e71deSHaik Aftandilian mutex_enter(&tod_lock); 571023e71deSHaik Aftandilian tod_set(source_tod); 572023e71deSHaik Aftandilian mutex_exit(&tod_lock); 573023e71deSHaik Aftandilian 574023e71deSHaik Aftandilian /* Re-enable the watchdog */ 575023e71deSHaik Aftandilian watchdog_resume(); 576023e71deSHaik Aftandilian 577023e71deSHaik Aftandilian mutex_exit(&cpu_lock); 578023e71deSHaik Aftandilian 579d2365b01SPavel Tatashin /* Download the latest MD */ 580d2365b01SPavel Tatashin if ((rv = mach_descrip_update()) != 0) 581d2365b01SPavel Tatashin cmn_err(CE_PANIC, "suspend: mach_descrip_update failed: %ld", 582d2365b01SPavel Tatashin rv); 583d2365b01SPavel Tatashin 584d2365b01SPavel Tatashin sfmmu_ctxdoms_update(); 585d2365b01SPavel Tatashin sfmmu_ctxdoms_unlock(); 586d2365b01SPavel Tatashin 587023e71deSHaik Aftandilian /* Get new MD, update CPU mappings/relationships */ 588023e71deSHaik Aftandilian if (suspend_update_cpu_mappings) 589023e71deSHaik Aftandilian update_cpu_mappings(); 590023e71deSHaik Aftandilian 591023e71deSHaik Aftandilian DBG("suspend: target tick: 0x%lx", gettick_counter()); 592023e71deSHaik Aftandilian DBG("suspend: target stick: 0x%llx", gettick()); 593023e71deSHaik Aftandilian DBG("suspend: user %%tick/%%stick emulation is %d", 594023e71deSHaik Aftandilian tick_stick_emulation_active); 595023e71deSHaik Aftandilian DBG("suspend: finished"); 596023e71deSHaik Aftandilian 597023e71deSHaik Aftandilian return (0); 598023e71deSHaik Aftandilian } 599