17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 550a83466Sjwahlig * Common Development and Distribution License (the "License"). 650a83466Sjwahlig * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22ddbc368aSRick Mesta * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate /* 277c478bd9Sstevel@tonic-gate * NFS Version 4 state recovery code. 287c478bd9Sstevel@tonic-gate */ 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate #include <nfs/nfs4_clnt.h> 317c478bd9Sstevel@tonic-gate #include <nfs/nfs4.h> 327c478bd9Sstevel@tonic-gate #include <nfs/rnode4.h> 337c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 347c478bd9Sstevel@tonic-gate #include <sys/cred.h> 357c478bd9Sstevel@tonic-gate #include <sys/systm.h> 367c478bd9Sstevel@tonic-gate #include <sys/flock.h> 377c478bd9Sstevel@tonic-gate #include <sys/dnlc.h> 387c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 397c478bd9Sstevel@tonic-gate #include <sys/disp.h> 407c478bd9Sstevel@tonic-gate #include <sys/list.h> 417c478bd9Sstevel@tonic-gate #include <sys/sdt.h> 422f172c55SRobert Thurlow #include <sys/mount.h> 432f172c55SRobert Thurlow #include <sys/door.h> 442f172c55SRobert Thurlow #include <nfs/nfssys.h> 452f172c55SRobert Thurlow #include <nfs/nfsid_map.h> 462f172c55SRobert Thurlow #include <nfs/nfs4_idmap_impl.h> 477c478bd9Sstevel@tonic-gate 487c478bd9Sstevel@tonic-gate extern r4hashq_t *rtable4; 497c478bd9Sstevel@tonic-gate 507c478bd9Sstevel@tonic-gate /* 517c478bd9Sstevel@tonic-gate * Information that describes what needs to be done for recovery. It is 527c478bd9Sstevel@tonic-gate * passed to a client recovery thread as well as passed to various recovery 537c478bd9Sstevel@tonic-gate * routines. rc_mi, rc_vp1, and rc_vp2 refer to the filesystem and 547c478bd9Sstevel@tonic-gate * vnode(s) affected by recovery. rc_vp1 and rc_vp2 are references (use 557c478bd9Sstevel@tonic-gate * VN_HOLD) or NULL. rc_lost_rqst contains information about the lost 567c478bd9Sstevel@tonic-gate * lock or open/close request, and it holds reference counts for the 577c478bd9Sstevel@tonic-gate * various objects (vnode, etc.). The recovery thread also uses flags set 587c478bd9Sstevel@tonic-gate * in the mntinfo4_t or vnode_t to tell it what to do. rc_error is used 597c478bd9Sstevel@tonic-gate * to save the error that originally triggered the recovery event -- will 607c478bd9Sstevel@tonic-gate * later be used to set mi_error if recovery doesn't work. rc_bseqid_rqst 617c478bd9Sstevel@tonic-gate * contains information about the request that got NFS4ERR_BAD_SEQID, and 627c478bd9Sstevel@tonic-gate * it holds reference count for the various objects (vnode, open owner, 637c478bd9Sstevel@tonic-gate * open stream, lock owner). 647c478bd9Sstevel@tonic-gate */ 657c478bd9Sstevel@tonic-gate 667c478bd9Sstevel@tonic-gate typedef struct { 677c478bd9Sstevel@tonic-gate mntinfo4_t *rc_mi; 687c478bd9Sstevel@tonic-gate vnode_t *rc_vp1; 697c478bd9Sstevel@tonic-gate vnode_t *rc_vp2; 707c478bd9Sstevel@tonic-gate nfs4_recov_t rc_action; 717c478bd9Sstevel@tonic-gate stateid4 rc_stateid; 727c478bd9Sstevel@tonic-gate bool_t rc_srv_reboot; /* server has rebooted */ 737c478bd9Sstevel@tonic-gate nfs4_lost_rqst_t *rc_lost_rqst; 747c478bd9Sstevel@tonic-gate nfs4_error_t rc_orig_errors; /* original errors causing recovery */ 757c478bd9Sstevel@tonic-gate int rc_error; 767c478bd9Sstevel@tonic-gate nfs4_bseqid_entry_t *rc_bseqid_rqst; 772f172c55SRobert Thurlow vnode_t *rc_moved_vp; 782f172c55SRobert Thurlow char *rc_moved_nm; 797c478bd9Sstevel@tonic-gate } recov_info_t; 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate /* 827c478bd9Sstevel@tonic-gate * How long to wait before trying again if there is an error doing 837c478bd9Sstevel@tonic-gate * recovery, in seconds. 847c478bd9Sstevel@tonic-gate */ 857c478bd9Sstevel@tonic-gate 867c478bd9Sstevel@tonic-gate static int recov_err_delay = 1; 877c478bd9Sstevel@tonic-gate 887c478bd9Sstevel@tonic-gate /* 897c478bd9Sstevel@tonic-gate * How long to wait when processing NFS4ERR_GRACE or NFS4ERR_DELAY 907c478bd9Sstevel@tonic-gate * errors. Expressed in seconds. Default is defined as 917c478bd9Sstevel@tonic-gate * NFS4ERR_DELAY_TIME and this variable is initialized in nfs4_subr_init() 927c478bd9Sstevel@tonic-gate */ 937c478bd9Sstevel@tonic-gate time_t nfs4err_delay_time = 0; 947c478bd9Sstevel@tonic-gate 957c478bd9Sstevel@tonic-gate /* 967c478bd9Sstevel@tonic-gate * Tuneable to limit how many time "exempt" ops go OTW 977c478bd9Sstevel@tonic-gate * after a recovery error. Exempt op hints are OH_CLOSE, 987c478bd9Sstevel@tonic-gate * OH_LOCKU, OH_DELEGRETURN. These previously always went 997c478bd9Sstevel@tonic-gate * OTW even after rnode was "dead" due to recovery errors. 1007c478bd9Sstevel@tonic-gate * 1017c478bd9Sstevel@tonic-gate * The tuneable below limits the number of times a start_fop 1027c478bd9Sstevel@tonic-gate * invocation will retry the exempt hints. After the limit 1037c478bd9Sstevel@tonic-gate * is reached, nfs4_start_fop will return an error just like 1047c478bd9Sstevel@tonic-gate * it would for non-exempt op hints. 1057c478bd9Sstevel@tonic-gate */ 1067c478bd9Sstevel@tonic-gate int nfs4_max_recov_error_retry = 3; 1077c478bd9Sstevel@tonic-gate 1087c478bd9Sstevel@tonic-gate /* 1097c478bd9Sstevel@tonic-gate * Number of seconds the recovery thread should pause before retry when the 1107c478bd9Sstevel@tonic-gate * filesystem has been forcibly unmounted. 1117c478bd9Sstevel@tonic-gate */ 1127c478bd9Sstevel@tonic-gate 1137c478bd9Sstevel@tonic-gate int nfs4_unmount_delay = 1; 1147c478bd9Sstevel@tonic-gate 1157c478bd9Sstevel@tonic-gate #ifdef DEBUG 1167c478bd9Sstevel@tonic-gate 1177c478bd9Sstevel@tonic-gate /* 1187c478bd9Sstevel@tonic-gate * How long to wait (in seconds) between recovery operations on a given 1197c478bd9Sstevel@tonic-gate * file. Normally zero, but could be set longer for testing purposes. 1207c478bd9Sstevel@tonic-gate */ 1217c478bd9Sstevel@tonic-gate static int nfs4_recovdelay = 0; 1227c478bd9Sstevel@tonic-gate 1237c478bd9Sstevel@tonic-gate /* 1247c478bd9Sstevel@tonic-gate * Switch that controls whether to go into the debugger when recovery 1257c478bd9Sstevel@tonic-gate * fails. 1267c478bd9Sstevel@tonic-gate */ 1277c478bd9Sstevel@tonic-gate static int nfs4_fail_recov_stop = 0; 1287c478bd9Sstevel@tonic-gate 1297c478bd9Sstevel@tonic-gate /* 1307c478bd9Sstevel@tonic-gate * Tuneables to debug client namespace interaction with server 1317c478bd9Sstevel@tonic-gate * mount points: 1327c478bd9Sstevel@tonic-gate * 1337c478bd9Sstevel@tonic-gate * nfs4_srvmnt_fail_cnt: 1347c478bd9Sstevel@tonic-gate * number of times EACCES returned because client 1357c478bd9Sstevel@tonic-gate * attempted to cross server mountpoint 1367c478bd9Sstevel@tonic-gate * 1377c478bd9Sstevel@tonic-gate * nfs4_srvmnt_debug: 1387c478bd9Sstevel@tonic-gate * trigger console printf whenever client attempts 1397c478bd9Sstevel@tonic-gate * to cross server mountpoint 1407c478bd9Sstevel@tonic-gate */ 1417c478bd9Sstevel@tonic-gate int nfs4_srvmnt_fail_cnt = 0; 1427c478bd9Sstevel@tonic-gate int nfs4_srvmnt_debug = 0; 1437c478bd9Sstevel@tonic-gate #endif 1447c478bd9Sstevel@tonic-gate 1452f172c55SRobert Thurlow extern zone_key_t nfs4clnt_zone_key; 1462f172c55SRobert Thurlow 1477c478bd9Sstevel@tonic-gate /* forward references, in alphabetic order */ 1487c478bd9Sstevel@tonic-gate static void close_after_open_resend(vnode_t *, cred_t *, uint32_t, 1497c478bd9Sstevel@tonic-gate nfs4_error_t *); 1507c478bd9Sstevel@tonic-gate static void errs_to_action(recov_info_t *, 1517c478bd9Sstevel@tonic-gate nfs4_server_t *, mntinfo4_t *, stateid4 *, nfs4_lost_rqst_t *, int, 1527c478bd9Sstevel@tonic-gate nfs_opnum4, nfs4_bseqid_entry_t *); 1537c478bd9Sstevel@tonic-gate static void flush_reinstate(nfs4_lost_rqst_t *); 1547c478bd9Sstevel@tonic-gate static void free_milist(mntinfo4_t **, int); 1557c478bd9Sstevel@tonic-gate static mntinfo4_t **make_milist(nfs4_server_t *, int *); 1567c478bd9Sstevel@tonic-gate static int nfs4_check_recov_err(vnode_t *, nfs4_op_hint_t, 1577c478bd9Sstevel@tonic-gate nfs4_recov_state_t *, int, char *); 1587c478bd9Sstevel@tonic-gate static char *nfs4_getsrvnames(mntinfo4_t *, size_t *); 1597c478bd9Sstevel@tonic-gate static void nfs4_recov_fh_fail(vnode_t *, int, nfsstat4); 1607c478bd9Sstevel@tonic-gate static void nfs4_recov_thread(recov_info_t *); 1617c478bd9Sstevel@tonic-gate static void nfs4_remove_lost_rqsts(mntinfo4_t *, nfs4_server_t *); 1627c478bd9Sstevel@tonic-gate static void nfs4_resend_lost_rqsts(recov_info_t *, nfs4_server_t *); 1637c478bd9Sstevel@tonic-gate static cred_t *pid_to_cr(pid_t); 1647c478bd9Sstevel@tonic-gate static void reclaim_one_lock(vnode_t *, flock64_t *, nfs4_error_t *, int *); 1657c478bd9Sstevel@tonic-gate static void recov_bad_seqid(recov_info_t *); 1667c478bd9Sstevel@tonic-gate static void recov_badstate(recov_info_t *, vnode_t *, nfsstat4); 1677c478bd9Sstevel@tonic-gate static void recov_clientid(recov_info_t *, nfs4_server_t *); 1687c478bd9Sstevel@tonic-gate static void recov_done(mntinfo4_t *, recov_info_t *); 1697c478bd9Sstevel@tonic-gate static void recov_filehandle(nfs4_recov_t, mntinfo4_t *, vnode_t *); 1707c478bd9Sstevel@tonic-gate static void recov_newserver(recov_info_t *, nfs4_server_t **, bool_t *); 1717c478bd9Sstevel@tonic-gate static void recov_openfiles(recov_info_t *, nfs4_server_t *); 1727c478bd9Sstevel@tonic-gate static void recov_stale(mntinfo4_t *, vnode_t *); 1737c478bd9Sstevel@tonic-gate static void nfs4_free_lost_rqst(nfs4_lost_rqst_t *, nfs4_server_t *); 1747c478bd9Sstevel@tonic-gate static void recov_throttle(recov_info_t *, vnode_t *); 175ed076bbfSMarcel Telka static void relock_skip_pid(vnode_t *, locklist_t *, pid_t); 1767c478bd9Sstevel@tonic-gate static void resend_lock(nfs4_lost_rqst_t *, nfs4_error_t *); 1777c478bd9Sstevel@tonic-gate static void resend_one_op(nfs4_lost_rqst_t *, nfs4_error_t *, mntinfo4_t *, 1787c478bd9Sstevel@tonic-gate nfs4_server_t *); 1797c478bd9Sstevel@tonic-gate static void save_bseqid_rqst(nfs4_bseqid_entry_t *, recov_info_t *); 1807c478bd9Sstevel@tonic-gate static void start_recovery(recov_info_t *, mntinfo4_t *, vnode_t *, vnode_t *, 1812f172c55SRobert Thurlow nfs4_server_t *, vnode_t *, char *); 1827c478bd9Sstevel@tonic-gate static void start_recovery_action(nfs4_recov_t, bool_t, mntinfo4_t *, vnode_t *, 1837c478bd9Sstevel@tonic-gate vnode_t *); 1847c478bd9Sstevel@tonic-gate static int wait_for_recovery(mntinfo4_t *, nfs4_op_hint_t); 1857c478bd9Sstevel@tonic-gate 1867c478bd9Sstevel@tonic-gate /* 1877c478bd9Sstevel@tonic-gate * Return non-zero if the given errno, status, and rpc status codes 1887c478bd9Sstevel@tonic-gate * in the nfs4_error_t indicate that client recovery is needed. 1897c478bd9Sstevel@tonic-gate * "stateful" indicates whether the call that got the error establishes or 1907c478bd9Sstevel@tonic-gate * removes state on the server (open, close, lock, unlock, delegreturn). 1917c478bd9Sstevel@tonic-gate */ 1927c478bd9Sstevel@tonic-gate 1937c478bd9Sstevel@tonic-gate int 1947c478bd9Sstevel@tonic-gate nfs4_needs_recovery(nfs4_error_t *ep, bool_t stateful, vfs_t *vfsp) 1957c478bd9Sstevel@tonic-gate { 1967c478bd9Sstevel@tonic-gate int recov = 0; 1977c478bd9Sstevel@tonic-gate mntinfo4_t *mi; 1987c478bd9Sstevel@tonic-gate 1997c478bd9Sstevel@tonic-gate /* 2007c478bd9Sstevel@tonic-gate * Try failover if the error values justify it and if 2017c478bd9Sstevel@tonic-gate * it's a failover mount. Don't try if the mount is in 2027c478bd9Sstevel@tonic-gate * progress, failures are handled explicitly by nfs4rootvp. 2037c478bd9Sstevel@tonic-gate */ 2047c478bd9Sstevel@tonic-gate if (nfs4_try_failover(ep)) { 2057c478bd9Sstevel@tonic-gate mi = VFTOMI4(vfsp); 2067c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 2077c478bd9Sstevel@tonic-gate recov = FAILOVER_MOUNT4(mi) && !(mi->mi_flags & MI4_MOUNTING); 2087c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 2097c478bd9Sstevel@tonic-gate if (recov) 2107c478bd9Sstevel@tonic-gate return (recov); 2117c478bd9Sstevel@tonic-gate } 2127c478bd9Sstevel@tonic-gate 2137c478bd9Sstevel@tonic-gate if (ep->error == EINTR || NFS4_FRC_UNMT_ERR(ep->error, vfsp)) { 2147c478bd9Sstevel@tonic-gate /* 2157c478bd9Sstevel@tonic-gate * The server may have gotten the request, so for stateful 2167c478bd9Sstevel@tonic-gate * ops we need to resynchronize and possibly back out the 2177c478bd9Sstevel@tonic-gate * op. 2187c478bd9Sstevel@tonic-gate */ 2197c478bd9Sstevel@tonic-gate return (stateful); 2207c478bd9Sstevel@tonic-gate } 2217c478bd9Sstevel@tonic-gate if (ep->error != 0) 2227c478bd9Sstevel@tonic-gate return (0); 2237c478bd9Sstevel@tonic-gate 2247c478bd9Sstevel@tonic-gate /* stat values are listed alphabetically */ 2257c478bd9Sstevel@tonic-gate /* 2267c478bd9Sstevel@tonic-gate * There are two lists here: the errors for which we have code, and 2277c478bd9Sstevel@tonic-gate * the errors for which we plan to have code before FCS. For the 2287c478bd9Sstevel@tonic-gate * second list, print a warning message but don't attempt recovery. 2297c478bd9Sstevel@tonic-gate */ 2307c478bd9Sstevel@tonic-gate switch (ep->stat) { 2317c478bd9Sstevel@tonic-gate case NFS4ERR_BADHANDLE: 2327c478bd9Sstevel@tonic-gate case NFS4ERR_BAD_SEQID: 2337c478bd9Sstevel@tonic-gate case NFS4ERR_BAD_STATEID: 2347c478bd9Sstevel@tonic-gate case NFS4ERR_DELAY: 2357c478bd9Sstevel@tonic-gate case NFS4ERR_EXPIRED: 2367c478bd9Sstevel@tonic-gate case NFS4ERR_FHEXPIRED: 2377c478bd9Sstevel@tonic-gate case NFS4ERR_GRACE: 2387c478bd9Sstevel@tonic-gate case NFS4ERR_OLD_STATEID: 2397c478bd9Sstevel@tonic-gate case NFS4ERR_RESOURCE: 2407c478bd9Sstevel@tonic-gate case NFS4ERR_STALE_CLIENTID: 2417c478bd9Sstevel@tonic-gate case NFS4ERR_STALE_STATEID: 2427c478bd9Sstevel@tonic-gate case NFS4ERR_WRONGSEC: 2437c478bd9Sstevel@tonic-gate case NFS4ERR_STALE: 2447c478bd9Sstevel@tonic-gate recov = 1; 2457c478bd9Sstevel@tonic-gate break; 2467c478bd9Sstevel@tonic-gate #ifdef DEBUG 2477c478bd9Sstevel@tonic-gate case NFS4ERR_LEASE_MOVED: 2487c478bd9Sstevel@tonic-gate case NFS4ERR_MOVED: 2497c478bd9Sstevel@tonic-gate zcmn_err(VFTOMI4(vfsp)->mi_zone->zone_id, 2507c478bd9Sstevel@tonic-gate CE_WARN, "!Can't yet recover from NFS status %d", 2517c478bd9Sstevel@tonic-gate ep->stat); 2527c478bd9Sstevel@tonic-gate break; 2537c478bd9Sstevel@tonic-gate #endif 2547c478bd9Sstevel@tonic-gate } 2557c478bd9Sstevel@tonic-gate 2567c478bd9Sstevel@tonic-gate return (recov); 2577c478bd9Sstevel@tonic-gate } 2587c478bd9Sstevel@tonic-gate 2597c478bd9Sstevel@tonic-gate /* 2607c478bd9Sstevel@tonic-gate * Some operations such as DELEGRETURN want to avoid invoking 2617c478bd9Sstevel@tonic-gate * recovery actions that will only mark the file dead. If 2627c478bd9Sstevel@tonic-gate * better handlers are invoked for any of these errors, this 2637c478bd9Sstevel@tonic-gate * routine should be modified. 2647c478bd9Sstevel@tonic-gate */ 2657c478bd9Sstevel@tonic-gate int 2667c478bd9Sstevel@tonic-gate nfs4_recov_marks_dead(nfsstat4 status) 2677c478bd9Sstevel@tonic-gate { 2687c478bd9Sstevel@tonic-gate if (status == NFS4ERR_BAD_SEQID || 2697c478bd9Sstevel@tonic-gate status == NFS4ERR_EXPIRED || 2707c478bd9Sstevel@tonic-gate status == NFS4ERR_BAD_STATEID || 2717c478bd9Sstevel@tonic-gate status == NFS4ERR_OLD_STATEID) 2727c478bd9Sstevel@tonic-gate return (1); 2737c478bd9Sstevel@tonic-gate return (0); 2747c478bd9Sstevel@tonic-gate } 2757c478bd9Sstevel@tonic-gate 2767c478bd9Sstevel@tonic-gate /* 2777c478bd9Sstevel@tonic-gate * Transfer the state recovery information in recovp to mi's resend queue, 2787c478bd9Sstevel@tonic-gate * and mark mi as having a lost state request. 2797c478bd9Sstevel@tonic-gate */ 2807c478bd9Sstevel@tonic-gate static void 2817c478bd9Sstevel@tonic-gate nfs4_enqueue_lost_rqst(recov_info_t *recovp, mntinfo4_t *mi) 2827c478bd9Sstevel@tonic-gate { 2837c478bd9Sstevel@tonic-gate nfs4_lost_rqst_t *lrp = recovp->rc_lost_rqst; 2847c478bd9Sstevel@tonic-gate 2857c478bd9Sstevel@tonic-gate ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 2867c478bd9Sstevel@tonic-gate nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 2877c478bd9Sstevel@tonic-gate 2887c478bd9Sstevel@tonic-gate ASSERT(lrp != NULL && lrp->lr_op != 0); 2897c478bd9Sstevel@tonic-gate 2907c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2917c478bd9Sstevel@tonic-gate "nfs4_enqueue_lost_rqst %p, op %d", 2927c478bd9Sstevel@tonic-gate (void *)lrp, lrp->lr_op)); 2937c478bd9Sstevel@tonic-gate 2947c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 2957c478bd9Sstevel@tonic-gate mi->mi_recovflags |= MI4R_LOST_STATE; 2967c478bd9Sstevel@tonic-gate if (lrp->lr_putfirst) 2977c478bd9Sstevel@tonic-gate list_insert_head(&mi->mi_lost_state, lrp); 2987c478bd9Sstevel@tonic-gate else 2997c478bd9Sstevel@tonic-gate list_insert_tail(&mi->mi_lost_state, lrp); 3007c478bd9Sstevel@tonic-gate recovp->rc_lost_rqst = NULL; 3017c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 3027c478bd9Sstevel@tonic-gate 3037c478bd9Sstevel@tonic-gate nfs4_queue_event(RE_LOST_STATE, mi, NULL, lrp->lr_op, lrp->lr_vp, 3047c478bd9Sstevel@tonic-gate lrp->lr_dvp, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 3057c478bd9Sstevel@tonic-gate } 3067c478bd9Sstevel@tonic-gate 3077c478bd9Sstevel@tonic-gate /* 3087c478bd9Sstevel@tonic-gate * Transfer the bad seqid recovery information in recovp to mi's 3097c478bd9Sstevel@tonic-gate * bad seqid queue, and mark mi as having a bad seqid request. 3107c478bd9Sstevel@tonic-gate */ 3117c478bd9Sstevel@tonic-gate void 3127c478bd9Sstevel@tonic-gate enqueue_bseqid_rqst(recov_info_t *recovp, mntinfo4_t *mi) 3137c478bd9Sstevel@tonic-gate { 3147c478bd9Sstevel@tonic-gate ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 3157c478bd9Sstevel@tonic-gate nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 3167c478bd9Sstevel@tonic-gate ASSERT(recovp->rc_bseqid_rqst != NULL); 3177c478bd9Sstevel@tonic-gate 3187c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 3197c478bd9Sstevel@tonic-gate mi->mi_recovflags |= MI4R_BAD_SEQID; 3207c478bd9Sstevel@tonic-gate list_insert_tail(&mi->mi_bseqid_list, recovp->rc_bseqid_rqst); 3217c478bd9Sstevel@tonic-gate recovp->rc_bseqid_rqst = NULL; 3227c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 3237c478bd9Sstevel@tonic-gate } 3247c478bd9Sstevel@tonic-gate 3257c478bd9Sstevel@tonic-gate /* 3267c478bd9Sstevel@tonic-gate * Initiate recovery. 3277c478bd9Sstevel@tonic-gate * 3287c478bd9Sstevel@tonic-gate * The nfs4_error_t contains the return codes that triggered a recovery 3297c478bd9Sstevel@tonic-gate * attempt. mi, vp1, and vp2 refer to the filesystem and files that were 3307c478bd9Sstevel@tonic-gate * being operated on. vp1 and vp2 may be NULL. 3317c478bd9Sstevel@tonic-gate * 3327c478bd9Sstevel@tonic-gate * Multiple calls are okay. If recovery is already underway, the call 3337c478bd9Sstevel@tonic-gate * updates the information about what state needs recovery but does not 3347c478bd9Sstevel@tonic-gate * start a new thread. The caller should hold mi->mi_recovlock as a reader 3357c478bd9Sstevel@tonic-gate * for proper synchronization with any recovery thread. 3367c478bd9Sstevel@tonic-gate * 3377c478bd9Sstevel@tonic-gate * This will return TRUE if recovery was aborted, and FALSE otherwise. 3387c478bd9Sstevel@tonic-gate */ 3397c478bd9Sstevel@tonic-gate bool_t 3407c478bd9Sstevel@tonic-gate nfs4_start_recovery(nfs4_error_t *ep, mntinfo4_t *mi, vnode_t *vp1, 3417c478bd9Sstevel@tonic-gate vnode_t *vp2, stateid4 *sid, nfs4_lost_rqst_t *lost_rqstp, nfs_opnum4 op, 3422f172c55SRobert Thurlow nfs4_bseqid_entry_t *bsep, vnode_t *moved_vp, char *moved_nm) 3437c478bd9Sstevel@tonic-gate { 3447c478bd9Sstevel@tonic-gate recov_info_t *recovp; 3457c478bd9Sstevel@tonic-gate nfs4_server_t *sp; 3467c478bd9Sstevel@tonic-gate bool_t abort = FALSE; 3477c478bd9Sstevel@tonic-gate bool_t gone = FALSE; 3487c478bd9Sstevel@tonic-gate 349108322fbScarlsonj ASSERT(nfs_zone() == mi->mi_zone); 3507c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 3517c478bd9Sstevel@tonic-gate /* 3527c478bd9Sstevel@tonic-gate * If there is lost state, we need to kick off recovery even if the 3537c478bd9Sstevel@tonic-gate * filesystem has been unmounted or the zone is shutting down. 3547c478bd9Sstevel@tonic-gate */ 3557c478bd9Sstevel@tonic-gate gone = FS_OR_ZONE_GONE4(mi->mi_vfsp); 3567c478bd9Sstevel@tonic-gate if (gone) { 3577c478bd9Sstevel@tonic-gate ASSERT(ep->error != EINTR || lost_rqstp != NULL); 3587c478bd9Sstevel@tonic-gate if (ep->error == EIO && lost_rqstp == NULL) { 3597c478bd9Sstevel@tonic-gate /* failed due to forced unmount, no new lost state */ 3607c478bd9Sstevel@tonic-gate abort = TRUE; 3617c478bd9Sstevel@tonic-gate } 3627c478bd9Sstevel@tonic-gate if ((ep->error == 0 || ep->error == ETIMEDOUT) && 3637c478bd9Sstevel@tonic-gate !(mi->mi_recovflags & MI4R_LOST_STATE)) { 3647c478bd9Sstevel@tonic-gate /* some other failure, no existing lost state */ 3657c478bd9Sstevel@tonic-gate abort = TRUE; 3667c478bd9Sstevel@tonic-gate } 3677c478bd9Sstevel@tonic-gate if (abort) { 3687c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 3697c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 3707c478bd9Sstevel@tonic-gate "nfs4_start_recovery: fs unmounted")); 3717c478bd9Sstevel@tonic-gate return (TRUE); 3727c478bd9Sstevel@tonic-gate } 3737c478bd9Sstevel@tonic-gate } 3747c478bd9Sstevel@tonic-gate mi->mi_in_recovery++; 3757c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 3767c478bd9Sstevel@tonic-gate 3777c478bd9Sstevel@tonic-gate recovp = kmem_alloc(sizeof (recov_info_t), KM_SLEEP); 3787c478bd9Sstevel@tonic-gate recovp->rc_orig_errors = *ep; 3797c478bd9Sstevel@tonic-gate sp = find_nfs4_server(mi); 380b9238976Sth199096 errs_to_action(recovp, sp, mi, sid, lost_rqstp, gone, op, bsep); 3817c478bd9Sstevel@tonic-gate if (sp != NULL) 3827c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 3832f172c55SRobert Thurlow start_recovery(recovp, mi, vp1, vp2, sp, moved_vp, moved_nm); 3847c478bd9Sstevel@tonic-gate if (sp != NULL) 3857c478bd9Sstevel@tonic-gate nfs4_server_rele(sp); 3867c478bd9Sstevel@tonic-gate return (FALSE); 3877c478bd9Sstevel@tonic-gate } 3887c478bd9Sstevel@tonic-gate 3897c478bd9Sstevel@tonic-gate /* 3907c478bd9Sstevel@tonic-gate * Internal version of nfs4_start_recovery. The difference is that the 3917c478bd9Sstevel@tonic-gate * caller specifies the recovery action, rather than the errors leading to 3927c478bd9Sstevel@tonic-gate * recovery. 3937c478bd9Sstevel@tonic-gate */ 3947c478bd9Sstevel@tonic-gate static void 3957c478bd9Sstevel@tonic-gate start_recovery_action(nfs4_recov_t what, bool_t reboot, mntinfo4_t *mi, 3967c478bd9Sstevel@tonic-gate vnode_t *vp1, vnode_t *vp2) 3977c478bd9Sstevel@tonic-gate { 3987c478bd9Sstevel@tonic-gate recov_info_t *recovp; 3997c478bd9Sstevel@tonic-gate 400108322fbScarlsonj ASSERT(nfs_zone() == mi->mi_zone); 4017c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 4027c478bd9Sstevel@tonic-gate mi->mi_in_recovery++; 4037c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 4047c478bd9Sstevel@tonic-gate 4057c478bd9Sstevel@tonic-gate recovp = kmem_zalloc(sizeof (recov_info_t), KM_SLEEP); 4067c478bd9Sstevel@tonic-gate recovp->rc_action = what; 4077c478bd9Sstevel@tonic-gate recovp->rc_srv_reboot = reboot; 4087c478bd9Sstevel@tonic-gate recovp->rc_error = EIO; 4092f172c55SRobert Thurlow start_recovery(recovp, mi, vp1, vp2, NULL, NULL, NULL); 4107c478bd9Sstevel@tonic-gate } 4117c478bd9Sstevel@tonic-gate 4127c478bd9Sstevel@tonic-gate static void 4137c478bd9Sstevel@tonic-gate start_recovery(recov_info_t *recovp, mntinfo4_t *mi, 4142f172c55SRobert Thurlow vnode_t *vp1, vnode_t *vp2, nfs4_server_t *sp, 4152f172c55SRobert Thurlow vnode_t *moved_vp, char *moved_nm) 4167c478bd9Sstevel@tonic-gate { 4177c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 4187c478bd9Sstevel@tonic-gate "start_recovery: mi %p, what %s", (void*)mi, 4197c478bd9Sstevel@tonic-gate nfs4_recov_action_to_str(recovp->rc_action))); 4207c478bd9Sstevel@tonic-gate 4217c478bd9Sstevel@tonic-gate /* 4227c478bd9Sstevel@tonic-gate * Bump the reference on the vfs so that we can pass it to the 4237c478bd9Sstevel@tonic-gate * recovery thread. 4247c478bd9Sstevel@tonic-gate */ 4257c478bd9Sstevel@tonic-gate VFS_HOLD(mi->mi_vfsp); 42650a83466Sjwahlig MI4_HOLD(mi); 4277c478bd9Sstevel@tonic-gate again: 4287c478bd9Sstevel@tonic-gate switch (recovp->rc_action) { 4297c478bd9Sstevel@tonic-gate case NR_FAILOVER: 4307c478bd9Sstevel@tonic-gate ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 4317c478bd9Sstevel@tonic-gate nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 4327c478bd9Sstevel@tonic-gate if (mi->mi_servers->sv_next == NULL) 4337c478bd9Sstevel@tonic-gate goto out_no_thread; 4347c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 4357c478bd9Sstevel@tonic-gate mi->mi_recovflags |= MI4R_NEED_NEW_SERVER; 4367c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 4377c478bd9Sstevel@tonic-gate 4387c478bd9Sstevel@tonic-gate if (recovp->rc_lost_rqst != NULL) 4397c478bd9Sstevel@tonic-gate nfs4_enqueue_lost_rqst(recovp, mi); 4407c478bd9Sstevel@tonic-gate break; 4417c478bd9Sstevel@tonic-gate 4427c478bd9Sstevel@tonic-gate case NR_CLIENTID: 4437c478bd9Sstevel@tonic-gate /* 4447c478bd9Sstevel@tonic-gate * If the filesystem has been unmounted, punt. 4457c478bd9Sstevel@tonic-gate */ 4467c478bd9Sstevel@tonic-gate if (sp == NULL) 4477c478bd9Sstevel@tonic-gate goto out_no_thread; 4487c478bd9Sstevel@tonic-gate 4497c478bd9Sstevel@tonic-gate /* 4507c478bd9Sstevel@tonic-gate * If nobody else is working on the clientid, mark the 4517c478bd9Sstevel@tonic-gate * clientid as being no longer set. Then mark the specific 4527c478bd9Sstevel@tonic-gate * filesystem being worked on. 4537c478bd9Sstevel@tonic-gate */ 4547c478bd9Sstevel@tonic-gate if (!nfs4_server_in_recovery(sp)) { 4557c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock); 4567c478bd9Sstevel@tonic-gate sp->s_flags &= ~N4S_CLIENTID_SET; 4577c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 4587c478bd9Sstevel@tonic-gate } 4597c478bd9Sstevel@tonic-gate ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 4607c478bd9Sstevel@tonic-gate nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 4617c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 4627c478bd9Sstevel@tonic-gate mi->mi_recovflags |= MI4R_NEED_CLIENTID; 4637c478bd9Sstevel@tonic-gate if (recovp->rc_srv_reboot) 4647c478bd9Sstevel@tonic-gate mi->mi_recovflags |= MI4R_SRV_REBOOT; 4657c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 4667c478bd9Sstevel@tonic-gate break; 4677c478bd9Sstevel@tonic-gate 4687c478bd9Sstevel@tonic-gate case NR_OPENFILES: 4697c478bd9Sstevel@tonic-gate ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 4707c478bd9Sstevel@tonic-gate nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 4717c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 4727c478bd9Sstevel@tonic-gate mi->mi_recovflags |= MI4R_REOPEN_FILES; 4737c478bd9Sstevel@tonic-gate if (recovp->rc_srv_reboot) 4747c478bd9Sstevel@tonic-gate mi->mi_recovflags |= MI4R_SRV_REBOOT; 4757c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 4767c478bd9Sstevel@tonic-gate break; 4777c478bd9Sstevel@tonic-gate 4787c478bd9Sstevel@tonic-gate case NR_WRONGSEC: 4797c478bd9Sstevel@tonic-gate ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 4807c478bd9Sstevel@tonic-gate nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 4817c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 4827c478bd9Sstevel@tonic-gate mi->mi_recovflags |= MI4R_NEED_SECINFO; 4837c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 4847c478bd9Sstevel@tonic-gate break; 4857c478bd9Sstevel@tonic-gate 4867c478bd9Sstevel@tonic-gate case NR_EXPIRED: 4877c478bd9Sstevel@tonic-gate if (vp1 != NULL) 4887c478bd9Sstevel@tonic-gate recov_badstate(recovp, vp1, NFS4ERR_EXPIRED); 4897c478bd9Sstevel@tonic-gate if (vp2 != NULL) 4907c478bd9Sstevel@tonic-gate recov_badstate(recovp, vp2, NFS4ERR_EXPIRED); 4917c478bd9Sstevel@tonic-gate goto out_no_thread; /* no further recovery possible */ 4927c478bd9Sstevel@tonic-gate 4937c478bd9Sstevel@tonic-gate case NR_BAD_STATEID: 4947c478bd9Sstevel@tonic-gate if (vp1 != NULL) 4957c478bd9Sstevel@tonic-gate recov_badstate(recovp, vp1, NFS4ERR_BAD_STATEID); 4967c478bd9Sstevel@tonic-gate if (vp2 != NULL) 4977c478bd9Sstevel@tonic-gate recov_badstate(recovp, vp2, NFS4ERR_BAD_STATEID); 4987c478bd9Sstevel@tonic-gate goto out_no_thread; /* no further recovery possible */ 4997c478bd9Sstevel@tonic-gate 5007c478bd9Sstevel@tonic-gate case NR_FHEXPIRED: 5017c478bd9Sstevel@tonic-gate case NR_BADHANDLE: 5027c478bd9Sstevel@tonic-gate if (vp1 != NULL) 5037c478bd9Sstevel@tonic-gate recov_throttle(recovp, vp1); 5047c478bd9Sstevel@tonic-gate if (vp2 != NULL) 5057c478bd9Sstevel@tonic-gate recov_throttle(recovp, vp2); 5067c478bd9Sstevel@tonic-gate /* 5077c478bd9Sstevel@tonic-gate * Recover the filehandle now, rather than using a 5087c478bd9Sstevel@tonic-gate * separate thread. We can do this because filehandle 5097c478bd9Sstevel@tonic-gate * recovery is independent of any other state, and because 5107c478bd9Sstevel@tonic-gate * we know that we are not competing with the recovery 5117c478bd9Sstevel@tonic-gate * thread at this time. recov_filehandle will deal with 5127c478bd9Sstevel@tonic-gate * threads that are competing to recover this filehandle. 5137c478bd9Sstevel@tonic-gate */ 5147c478bd9Sstevel@tonic-gate ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 5157c478bd9Sstevel@tonic-gate nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 5167c478bd9Sstevel@tonic-gate if (vp1 != NULL) 5177c478bd9Sstevel@tonic-gate recov_filehandle(recovp->rc_action, mi, vp1); 5187c478bd9Sstevel@tonic-gate if (vp2 != NULL) 5197c478bd9Sstevel@tonic-gate recov_filehandle(recovp->rc_action, mi, vp2); 5207c478bd9Sstevel@tonic-gate goto out_no_thread; /* no further recovery needed */ 5217c478bd9Sstevel@tonic-gate 5227c478bd9Sstevel@tonic-gate case NR_STALE: 5237c478bd9Sstevel@tonic-gate /* 5247c478bd9Sstevel@tonic-gate * NFS4ERR_STALE handling 5257c478bd9Sstevel@tonic-gate * recov_stale() could set MI4R_NEED_NEW_SERVER to 5267c478bd9Sstevel@tonic-gate * indicate that we can and should failover. 5277c478bd9Sstevel@tonic-gate */ 5287c478bd9Sstevel@tonic-gate ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 5297c478bd9Sstevel@tonic-gate nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 5307c478bd9Sstevel@tonic-gate 5317c478bd9Sstevel@tonic-gate if (vp1 != NULL) 5327c478bd9Sstevel@tonic-gate recov_stale(mi, vp1); 5337c478bd9Sstevel@tonic-gate if (vp2 != NULL) 5347c478bd9Sstevel@tonic-gate recov_stale(mi, vp2); 5357c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 5367c478bd9Sstevel@tonic-gate if ((mi->mi_recovflags & MI4R_NEED_NEW_SERVER) == 0) { 5377c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 5387c478bd9Sstevel@tonic-gate goto out_no_thread; 5397c478bd9Sstevel@tonic-gate } 5407c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 5417c478bd9Sstevel@tonic-gate recovp->rc_action = NR_FAILOVER; 5427c478bd9Sstevel@tonic-gate goto again; 5437c478bd9Sstevel@tonic-gate 5447c478bd9Sstevel@tonic-gate case NR_BAD_SEQID: 5457c478bd9Sstevel@tonic-gate if (recovp->rc_bseqid_rqst) { 5467c478bd9Sstevel@tonic-gate enqueue_bseqid_rqst(recovp, mi); 5477c478bd9Sstevel@tonic-gate break; 5487c478bd9Sstevel@tonic-gate } 5497c478bd9Sstevel@tonic-gate 5507c478bd9Sstevel@tonic-gate if (vp1 != NULL) 5517c478bd9Sstevel@tonic-gate recov_badstate(recovp, vp1, NFS4ERR_BAD_SEQID); 5527c478bd9Sstevel@tonic-gate if (vp2 != NULL) 5537c478bd9Sstevel@tonic-gate recov_badstate(recovp, vp2, NFS4ERR_BAD_SEQID); 5547c478bd9Sstevel@tonic-gate goto out_no_thread; /* no further recovery possible */ 5557c478bd9Sstevel@tonic-gate 5567c478bd9Sstevel@tonic-gate case NR_OLDSTATEID: 5577c478bd9Sstevel@tonic-gate if (vp1 != NULL) 5587c478bd9Sstevel@tonic-gate recov_badstate(recovp, vp1, NFS4ERR_OLD_STATEID); 5597c478bd9Sstevel@tonic-gate if (vp2 != NULL) 5607c478bd9Sstevel@tonic-gate recov_badstate(recovp, vp2, NFS4ERR_OLD_STATEID); 5617c478bd9Sstevel@tonic-gate goto out_no_thread; /* no further recovery possible */ 5627c478bd9Sstevel@tonic-gate 5637c478bd9Sstevel@tonic-gate case NR_GRACE: 5647c478bd9Sstevel@tonic-gate nfs4_set_grace_wait(mi); 5657c478bd9Sstevel@tonic-gate goto out_no_thread; /* no further action required for GRACE */ 5667c478bd9Sstevel@tonic-gate 5677c478bd9Sstevel@tonic-gate case NR_DELAY: 5687c478bd9Sstevel@tonic-gate if (vp1) 5697c478bd9Sstevel@tonic-gate nfs4_set_delay_wait(vp1); 5707c478bd9Sstevel@tonic-gate goto out_no_thread; /* no further action required for DELAY */ 5717c478bd9Sstevel@tonic-gate 5727c478bd9Sstevel@tonic-gate case NR_LOST_STATE_RQST: 5737c478bd9Sstevel@tonic-gate case NR_LOST_LOCK: 5747c478bd9Sstevel@tonic-gate nfs4_enqueue_lost_rqst(recovp, mi); 5757c478bd9Sstevel@tonic-gate break; 5767c478bd9Sstevel@tonic-gate default: 5777c478bd9Sstevel@tonic-gate nfs4_queue_event(RE_UNEXPECTED_ACTION, mi, NULL, 5787c478bd9Sstevel@tonic-gate recovp->rc_action, NULL, NULL, 0, NULL, 0, TAG_NONE, 5797c478bd9Sstevel@tonic-gate TAG_NONE, 0, 0); 5807c478bd9Sstevel@tonic-gate goto out_no_thread; 5817c478bd9Sstevel@tonic-gate } 5827c478bd9Sstevel@tonic-gate 5837c478bd9Sstevel@tonic-gate /* 5847c478bd9Sstevel@tonic-gate * If either file recently went through the same recovery, wait 5857c478bd9Sstevel@tonic-gate * awhile. This is in case there is some sort of bug; we might not 5867c478bd9Sstevel@tonic-gate * be able to recover properly, but at least we won't bombard the 5877c478bd9Sstevel@tonic-gate * server with calls, and we won't tie up the client. 5887c478bd9Sstevel@tonic-gate */ 5897c478bd9Sstevel@tonic-gate if (vp1 != NULL) 5907c478bd9Sstevel@tonic-gate recov_throttle(recovp, vp1); 5917c478bd9Sstevel@tonic-gate if (vp2 != NULL) 5927c478bd9Sstevel@tonic-gate recov_throttle(recovp, vp2); 5937c478bd9Sstevel@tonic-gate 5947c478bd9Sstevel@tonic-gate /* 5957c478bd9Sstevel@tonic-gate * If there's already a recovery thread, don't start another one. 5967c478bd9Sstevel@tonic-gate */ 5977c478bd9Sstevel@tonic-gate 5987c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 5997c478bd9Sstevel@tonic-gate if (mi->mi_flags & MI4_RECOV_ACTIV) { 6007c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 6017c478bd9Sstevel@tonic-gate goto out_no_thread; 6027c478bd9Sstevel@tonic-gate } 6037c478bd9Sstevel@tonic-gate mi->mi_flags |= MI4_RECOV_ACTIV; 6047c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 6057c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 6067c478bd9Sstevel@tonic-gate "start_recovery: starting new thread for mi %p", (void*)mi)); 6077c478bd9Sstevel@tonic-gate 6087c478bd9Sstevel@tonic-gate recovp->rc_mi = mi; 6097c478bd9Sstevel@tonic-gate recovp->rc_vp1 = vp1; 6107c478bd9Sstevel@tonic-gate if (vp1 != NULL) { 6117c478bd9Sstevel@tonic-gate ASSERT(VTOMI4(vp1) == mi); 6127c478bd9Sstevel@tonic-gate VN_HOLD(recovp->rc_vp1); 6137c478bd9Sstevel@tonic-gate } 6147c478bd9Sstevel@tonic-gate recovp->rc_vp2 = vp2; 6157c478bd9Sstevel@tonic-gate if (vp2 != NULL) { 6167c478bd9Sstevel@tonic-gate ASSERT(VTOMI4(vp2) == mi); 6177c478bd9Sstevel@tonic-gate VN_HOLD(recovp->rc_vp2); 6187c478bd9Sstevel@tonic-gate } 6192f172c55SRobert Thurlow recovp->rc_moved_vp = moved_vp; 6202f172c55SRobert Thurlow recovp->rc_moved_nm = moved_nm; 6217c478bd9Sstevel@tonic-gate 6227c478bd9Sstevel@tonic-gate (void) zthread_create(NULL, 0, nfs4_recov_thread, recovp, 0, 6237c478bd9Sstevel@tonic-gate minclsyspri); 6247c478bd9Sstevel@tonic-gate return; 6257c478bd9Sstevel@tonic-gate 6267c478bd9Sstevel@tonic-gate /* not reached by thread creating call */ 6277c478bd9Sstevel@tonic-gate out_no_thread: 6287c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 6297c478bd9Sstevel@tonic-gate mi->mi_in_recovery--; 630e749d04dSjwahlig if (mi->mi_in_recovery == 0) 6317c478bd9Sstevel@tonic-gate cv_broadcast(&mi->mi_cv_in_recov); 6327c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 6337c478bd9Sstevel@tonic-gate 6347c478bd9Sstevel@tonic-gate VFS_RELE(mi->mi_vfsp); 63550a83466Sjwahlig MI4_RELE(mi); 6367c478bd9Sstevel@tonic-gate /* 6377c478bd9Sstevel@tonic-gate * Free up resources that were allocated for us. 6387c478bd9Sstevel@tonic-gate */ 6397c478bd9Sstevel@tonic-gate kmem_free(recovp, sizeof (recov_info_t)); 6407c478bd9Sstevel@tonic-gate } 6417c478bd9Sstevel@tonic-gate 6427c478bd9Sstevel@tonic-gate static int 6437c478bd9Sstevel@tonic-gate nfs4_check_recov_err(vnode_t *vp, nfs4_op_hint_t op, 6447c478bd9Sstevel@tonic-gate nfs4_recov_state_t *rsp, int retry_err_cnt, char *str) 6457c478bd9Sstevel@tonic-gate { 6467c478bd9Sstevel@tonic-gate rnode4_t *rp; 6477c478bd9Sstevel@tonic-gate int error = 0; 6487c478bd9Sstevel@tonic-gate int exempt; 6497c478bd9Sstevel@tonic-gate 6507c478bd9Sstevel@tonic-gate if (vp == NULL) 6517c478bd9Sstevel@tonic-gate return (0); 6527c478bd9Sstevel@tonic-gate 6537c478bd9Sstevel@tonic-gate exempt = (op == OH_CLOSE || op == OH_LOCKU || op == OH_DELEGRETURN); 6547c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 6557c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 6567c478bd9Sstevel@tonic-gate 6577c478bd9Sstevel@tonic-gate /* 6587c478bd9Sstevel@tonic-gate * If there was a recovery error, then allow op hints "exempt" from 6597c478bd9Sstevel@tonic-gate * recov errors to retry (currently 3 times). Either r_error or 6607c478bd9Sstevel@tonic-gate * EIO is returned for non-exempt op hints. 6617c478bd9Sstevel@tonic-gate */ 6627c478bd9Sstevel@tonic-gate if (rp->r_flags & R4RECOVERR) { 6637c478bd9Sstevel@tonic-gate if (exempt && rsp->rs_num_retry_despite_err <= 6647c478bd9Sstevel@tonic-gate nfs4_max_recov_error_retry) { 6657c478bd9Sstevel@tonic-gate 6667c478bd9Sstevel@tonic-gate /* 6677c478bd9Sstevel@tonic-gate * Check to make sure that we haven't already inc'd 6687c478bd9Sstevel@tonic-gate * rs_num_retry_despite_err for current nfs4_start_fop 6697c478bd9Sstevel@tonic-gate * instance. We don't want to double inc (if we were 6707c478bd9Sstevel@tonic-gate * called with vp2, then the vp1 call could have 6717c478bd9Sstevel@tonic-gate * already incremented. 6727c478bd9Sstevel@tonic-gate */ 6737c478bd9Sstevel@tonic-gate if (retry_err_cnt == rsp->rs_num_retry_despite_err) 6747c478bd9Sstevel@tonic-gate rsp->rs_num_retry_despite_err++; 6757c478bd9Sstevel@tonic-gate 6767c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 6777c478bd9Sstevel@tonic-gate "nfs4_start_fop: %s %p DEAD, cnt=%d", str, 6787c478bd9Sstevel@tonic-gate (void *)vp, rsp->rs_num_retry_despite_err)); 6797c478bd9Sstevel@tonic-gate } else { 6807c478bd9Sstevel@tonic-gate error = (rp->r_error ? rp->r_error : EIO); 6817c478bd9Sstevel@tonic-gate /* 6827c478bd9Sstevel@tonic-gate * An ESTALE error on a non-regular file is not 6837c478bd9Sstevel@tonic-gate * "sticky". Return the ESTALE error once, but 6847c478bd9Sstevel@tonic-gate * clear the condition to allow future operations 6857c478bd9Sstevel@tonic-gate * to go OTW. This will allow the client to 6867c478bd9Sstevel@tonic-gate * recover if the server has merely unshared then 6877c478bd9Sstevel@tonic-gate * re-shared the file system. For regular files, 6887c478bd9Sstevel@tonic-gate * the unshare has destroyed the open state at the 6897c478bd9Sstevel@tonic-gate * server and we aren't willing to do a reopen (yet). 6907c478bd9Sstevel@tonic-gate */ 6917c478bd9Sstevel@tonic-gate if (error == ESTALE && vp->v_type != VREG) { 6927c478bd9Sstevel@tonic-gate rp->r_flags &= 6937c478bd9Sstevel@tonic-gate ~(R4RECOVERR|R4RECOVERRP|R4STALE); 6947c478bd9Sstevel@tonic-gate rp->r_error = 0; 6957c478bd9Sstevel@tonic-gate error = ESTALE; 6967c478bd9Sstevel@tonic-gate } 6977c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 6987c478bd9Sstevel@tonic-gate "nfs4_start_fop: %s %p DEAD, cnt=%d error=%d", 6997c478bd9Sstevel@tonic-gate str, (void *)vp, 7007c478bd9Sstevel@tonic-gate rsp->rs_num_retry_despite_err, error)); 7017c478bd9Sstevel@tonic-gate } 7027c478bd9Sstevel@tonic-gate } 703b9238976Sth199096 7047c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 7057c478bd9Sstevel@tonic-gate return (error); 7067c478bd9Sstevel@tonic-gate } 7077c478bd9Sstevel@tonic-gate 7087c478bd9Sstevel@tonic-gate /* 7097c478bd9Sstevel@tonic-gate * Initial setup code that every operation should call if it might invoke 7107c478bd9Sstevel@tonic-gate * client recovery. Can block waiting for recovery to finish on a 7117c478bd9Sstevel@tonic-gate * filesystem. Either vnode ptr can be NULL. 7127c478bd9Sstevel@tonic-gate * 7137c478bd9Sstevel@tonic-gate * Returns 0 if there are no outstanding errors. Can return an 7147c478bd9Sstevel@tonic-gate * errno value under various circumstances (e.g., failed recovery, or 7157c478bd9Sstevel@tonic-gate * interrupted while waiting for recovery to finish). 7167c478bd9Sstevel@tonic-gate * 7177c478bd9Sstevel@tonic-gate * There must be a corresponding call to nfs4_end_op() to free up any locks 7187c478bd9Sstevel@tonic-gate * or resources allocated by this call (assuming this call succeeded), 7197c478bd9Sstevel@tonic-gate * using the same rsp that's passed in here. 7207c478bd9Sstevel@tonic-gate * 7217c478bd9Sstevel@tonic-gate * The open and lock seqid synchronization must be stopped before calling this 7227c478bd9Sstevel@tonic-gate * function, as it could lead to deadlock when trying to reopen a file or 7237c478bd9Sstevel@tonic-gate * reclaim a lock. The synchronization is obtained with calls to: 7247c478bd9Sstevel@tonic-gate * nfs4_start_open_seqid_sync() 7257c478bd9Sstevel@tonic-gate * nfs4_start_lock_seqid_sync() 7267c478bd9Sstevel@tonic-gate * 7277c478bd9Sstevel@tonic-gate * *startrecovp is set TRUE if the caller should not bother with the 7287c478bd9Sstevel@tonic-gate * over-the-wire call, and just initiate recovery for the given request. 7297c478bd9Sstevel@tonic-gate * This is typically used for state-releasing ops if the filesystem has 7307c478bd9Sstevel@tonic-gate * been forcibly unmounted. startrecovp may be NULL for 7317c478bd9Sstevel@tonic-gate * non-state-releasing ops. 7327c478bd9Sstevel@tonic-gate */ 7337c478bd9Sstevel@tonic-gate 7347c478bd9Sstevel@tonic-gate int 7357c478bd9Sstevel@tonic-gate nfs4_start_fop(mntinfo4_t *mi, vnode_t *vp1, vnode_t *vp2, nfs4_op_hint_t op, 7367c478bd9Sstevel@tonic-gate nfs4_recov_state_t *rsp, bool_t *startrecovp) 7377c478bd9Sstevel@tonic-gate { 7387c478bd9Sstevel@tonic-gate int error = 0, rerr_cnt; 7397c478bd9Sstevel@tonic-gate nfs4_server_t *sp = NULL; 7407c478bd9Sstevel@tonic-gate nfs4_server_t *tsp; 7417c478bd9Sstevel@tonic-gate nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 7423b895386SPavel Filipensky uint_t droplock_cnt; 7437c478bd9Sstevel@tonic-gate #ifdef DEBUG 7447c478bd9Sstevel@tonic-gate void *fop_caller; 7457c478bd9Sstevel@tonic-gate #endif 7467c478bd9Sstevel@tonic-gate 7477c478bd9Sstevel@tonic-gate ASSERT(vp1 == NULL || vp1->v_vfsp == mi->mi_vfsp); 7487c478bd9Sstevel@tonic-gate ASSERT(vp2 == NULL || vp2->v_vfsp == mi->mi_vfsp); 7497c478bd9Sstevel@tonic-gate 7507c478bd9Sstevel@tonic-gate #ifdef DEBUG 7517c478bd9Sstevel@tonic-gate if ((fop_caller = tsd_get(nfs4_tsd_key)) != NULL) { 7527c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "Missing nfs4_end_fop: last caller %p", 7537c478bd9Sstevel@tonic-gate fop_caller); 7547c478bd9Sstevel@tonic-gate } 7557c478bd9Sstevel@tonic-gate (void) tsd_set(nfs4_tsd_key, caller()); 7567c478bd9Sstevel@tonic-gate #endif 7577c478bd9Sstevel@tonic-gate 7587c478bd9Sstevel@tonic-gate rsp->rs_sp = NULL; 7597c478bd9Sstevel@tonic-gate rsp->rs_flags &= ~NFS4_RS_RENAME_HELD; 7607c478bd9Sstevel@tonic-gate rerr_cnt = rsp->rs_num_retry_despite_err; 7617c478bd9Sstevel@tonic-gate 7627c478bd9Sstevel@tonic-gate /* 7637c478bd9Sstevel@tonic-gate * Process the items that may delay() based on server response 7647c478bd9Sstevel@tonic-gate */ 7657c478bd9Sstevel@tonic-gate error = nfs4_wait_for_grace(mi, rsp); 7667c478bd9Sstevel@tonic-gate if (error) 7677c478bd9Sstevel@tonic-gate goto out; 7687c478bd9Sstevel@tonic-gate 7697c478bd9Sstevel@tonic-gate if (vp1 != NULL) { 7707c478bd9Sstevel@tonic-gate error = nfs4_wait_for_delay(vp1, rsp); 7717c478bd9Sstevel@tonic-gate if (error) 7727c478bd9Sstevel@tonic-gate goto out; 7737c478bd9Sstevel@tonic-gate } 7747c478bd9Sstevel@tonic-gate 7757c478bd9Sstevel@tonic-gate /* Wait for a delegation recall to complete. */ 7767c478bd9Sstevel@tonic-gate 7777c478bd9Sstevel@tonic-gate error = wait_for_recall(vp1, vp2, op, rsp); 7787c478bd9Sstevel@tonic-gate if (error) 7797c478bd9Sstevel@tonic-gate goto out; 7807c478bd9Sstevel@tonic-gate 7817c478bd9Sstevel@tonic-gate /* 7827c478bd9Sstevel@tonic-gate * Wait for any current recovery actions to finish. Note that a 7837c478bd9Sstevel@tonic-gate * recovery thread can still start up after wait_for_recovery() 7847c478bd9Sstevel@tonic-gate * finishes. We don't block out recovery operations until we 7857c478bd9Sstevel@tonic-gate * acquire s_recovlock and mi_recovlock. 7867c478bd9Sstevel@tonic-gate */ 7877c478bd9Sstevel@tonic-gate error = wait_for_recovery(mi, op); 7887c478bd9Sstevel@tonic-gate if (error) 7897c478bd9Sstevel@tonic-gate goto out; 7907c478bd9Sstevel@tonic-gate 7917c478bd9Sstevel@tonic-gate /* 7927c478bd9Sstevel@tonic-gate * Check to see if the rnode is already marked with a 7937c478bd9Sstevel@tonic-gate * recovery error. If so, return it immediately. But 7947c478bd9Sstevel@tonic-gate * always pass CLOSE, LOCKU, and DELEGRETURN so we can 7957c478bd9Sstevel@tonic-gate * clean up state on the server. 7967c478bd9Sstevel@tonic-gate */ 7977c478bd9Sstevel@tonic-gate 7987c478bd9Sstevel@tonic-gate if (vp1 != NULL) { 7997c478bd9Sstevel@tonic-gate if (error = nfs4_check_recov_err(vp1, op, rsp, rerr_cnt, "vp1")) 8007c478bd9Sstevel@tonic-gate goto out; 8017c478bd9Sstevel@tonic-gate nfs4_check_remap(mi, vp1, NFS4_REMAP_CKATTRS, &e); 8027c478bd9Sstevel@tonic-gate } 8037c478bd9Sstevel@tonic-gate 8047c478bd9Sstevel@tonic-gate if (vp2 != NULL) { 8057c478bd9Sstevel@tonic-gate if (error = nfs4_check_recov_err(vp2, op, rsp, rerr_cnt, "vp2")) 8067c478bd9Sstevel@tonic-gate goto out; 8077c478bd9Sstevel@tonic-gate nfs4_check_remap(mi, vp2, NFS4_REMAP_CKATTRS, &e); 8087c478bd9Sstevel@tonic-gate } 8097c478bd9Sstevel@tonic-gate 8107c478bd9Sstevel@tonic-gate /* 8117c478bd9Sstevel@tonic-gate * The lock order calls for us to acquire s_recovlock before 8127c478bd9Sstevel@tonic-gate * mi_recovlock, but we have to hold mi_recovlock to look up sp (to 8137c478bd9Sstevel@tonic-gate * prevent races with the failover/migration code). So acquire 8147c478bd9Sstevel@tonic-gate * mi_recovlock, look up sp, drop mi_recovlock, acquire 8157c478bd9Sstevel@tonic-gate * s_recovlock and mi_recovlock, then verify that sp is still the 8167c478bd9Sstevel@tonic-gate * right object. XXX Can we find a simpler way to deal with this? 8177c478bd9Sstevel@tonic-gate */ 8187c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 8197c478bd9Sstevel@tonic-gate mi->mi_flags & MI4_INT)) { 8207c478bd9Sstevel@tonic-gate error = EINTR; 8217c478bd9Sstevel@tonic-gate goto out; 8227c478bd9Sstevel@tonic-gate } 8237c478bd9Sstevel@tonic-gate get_sp: 8247c478bd9Sstevel@tonic-gate sp = find_nfs4_server(mi); 8257c478bd9Sstevel@tonic-gate if (sp != NULL) { 8267c478bd9Sstevel@tonic-gate sp->s_otw_call_count++; 8277c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 8283b895386SPavel Filipensky droplock_cnt = mi->mi_srvset_cnt; 8297c478bd9Sstevel@tonic-gate } 8307c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_recovlock); 8317c478bd9Sstevel@tonic-gate 8327c478bd9Sstevel@tonic-gate if (sp != NULL) { 8337c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&sp->s_recovlock, RW_READER, 8347c478bd9Sstevel@tonic-gate mi->mi_flags & MI4_INT)) { 8357c478bd9Sstevel@tonic-gate error = EINTR; 8367c478bd9Sstevel@tonic-gate goto out; 8377c478bd9Sstevel@tonic-gate } 8387c478bd9Sstevel@tonic-gate } 8397c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 8407c478bd9Sstevel@tonic-gate mi->mi_flags & MI4_INT)) { 8417c478bd9Sstevel@tonic-gate if (sp != NULL) 8427c478bd9Sstevel@tonic-gate nfs_rw_exit(&sp->s_recovlock); 8437c478bd9Sstevel@tonic-gate error = EINTR; 8447c478bd9Sstevel@tonic-gate goto out; 8457c478bd9Sstevel@tonic-gate } 8467c478bd9Sstevel@tonic-gate /* 8477c478bd9Sstevel@tonic-gate * If the mntinfo4_t hasn't changed nfs4_sever_ts then 8487c478bd9Sstevel@tonic-gate * there's no point in double checking to make sure it 8497c478bd9Sstevel@tonic-gate * has switched. 8507c478bd9Sstevel@tonic-gate */ 8513b895386SPavel Filipensky if (sp == NULL || droplock_cnt != mi->mi_srvset_cnt) { 8527c478bd9Sstevel@tonic-gate tsp = find_nfs4_server(mi); 8537c478bd9Sstevel@tonic-gate if (tsp != sp) { 8547c478bd9Sstevel@tonic-gate /* try again */ 8557c478bd9Sstevel@tonic-gate if (tsp != NULL) { 8567c478bd9Sstevel@tonic-gate mutex_exit(&tsp->s_lock); 8577c478bd9Sstevel@tonic-gate nfs4_server_rele(tsp); 8587c478bd9Sstevel@tonic-gate tsp = NULL; 8597c478bd9Sstevel@tonic-gate } 8607c478bd9Sstevel@tonic-gate if (sp != NULL) { 8617c478bd9Sstevel@tonic-gate nfs_rw_exit(&sp->s_recovlock); 8627c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock); 8637c478bd9Sstevel@tonic-gate sp->s_otw_call_count--; 8647c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 8657c478bd9Sstevel@tonic-gate nfs4_server_rele(sp); 8667c478bd9Sstevel@tonic-gate sp = NULL; 8677c478bd9Sstevel@tonic-gate } 8687c478bd9Sstevel@tonic-gate goto get_sp; 8697c478bd9Sstevel@tonic-gate } else { 8707c478bd9Sstevel@tonic-gate if (tsp != NULL) { 8717c478bd9Sstevel@tonic-gate mutex_exit(&tsp->s_lock); 8727c478bd9Sstevel@tonic-gate nfs4_server_rele(tsp); 8737c478bd9Sstevel@tonic-gate tsp = NULL; 8747c478bd9Sstevel@tonic-gate } 8757c478bd9Sstevel@tonic-gate } 8767c478bd9Sstevel@tonic-gate } 8777c478bd9Sstevel@tonic-gate 8787c478bd9Sstevel@tonic-gate if (sp != NULL) { 8797c478bd9Sstevel@tonic-gate rsp->rs_sp = sp; 8807c478bd9Sstevel@tonic-gate } 8817c478bd9Sstevel@tonic-gate 8827c478bd9Sstevel@tonic-gate /* 8837c478bd9Sstevel@tonic-gate * If the fileystem uses volatile filehandles, obtain a lock so 8847c478bd9Sstevel@tonic-gate * that we synchronize with renames. Exception: mount operations 8857c478bd9Sstevel@tonic-gate * can change mi_fh_expire_type, which could be a problem, since 8867c478bd9Sstevel@tonic-gate * the end_op code needs to be consistent with the start_op code 8877c478bd9Sstevel@tonic-gate * about mi_rename_lock. Since mounts don't compete with renames, 8887c478bd9Sstevel@tonic-gate * it's simpler to just not acquire the rename lock for mounts. 8897c478bd9Sstevel@tonic-gate */ 8907c478bd9Sstevel@tonic-gate if (NFS4_VOLATILE_FH(mi) && op != OH_MOUNT) { 8917c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&mi->mi_rename_lock, 8927c478bd9Sstevel@tonic-gate op == OH_VFH_RENAME ? RW_WRITER : RW_READER, 8937c478bd9Sstevel@tonic-gate mi->mi_flags & MI4_INT)) { 8947c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_recovlock); 8957c478bd9Sstevel@tonic-gate if (sp != NULL) 8967c478bd9Sstevel@tonic-gate nfs_rw_exit(&sp->s_recovlock); 8977c478bd9Sstevel@tonic-gate error = EINTR; 8987c478bd9Sstevel@tonic-gate goto out; 8997c478bd9Sstevel@tonic-gate } 9007c478bd9Sstevel@tonic-gate rsp->rs_flags |= NFS4_RS_RENAME_HELD; 9017c478bd9Sstevel@tonic-gate } 9027c478bd9Sstevel@tonic-gate 9037c478bd9Sstevel@tonic-gate if (OH_IS_STATE_RELE(op)) { 9047c478bd9Sstevel@tonic-gate /* 9057c478bd9Sstevel@tonic-gate * For forced unmount, letting the request proceed will 9067c478bd9Sstevel@tonic-gate * almost always delay response to the user, so hand it off 9077c478bd9Sstevel@tonic-gate * to the recovery thread. For exiting lwp's, we don't 9087c478bd9Sstevel@tonic-gate * have a good way to tell if the request will hang. We 9097c478bd9Sstevel@tonic-gate * generally want processes to handle their own requests so 9107c478bd9Sstevel@tonic-gate * that they can be done in parallel, but if there is 9117c478bd9Sstevel@tonic-gate * already a recovery thread, hand the request off to it. 9127c478bd9Sstevel@tonic-gate * This will improve user response at no cost to overall 9137c478bd9Sstevel@tonic-gate * system throughput. For zone shutdown, we'd prefer 9147c478bd9Sstevel@tonic-gate * the recovery thread to handle this as well. 9157c478bd9Sstevel@tonic-gate */ 9167c478bd9Sstevel@tonic-gate ASSERT(startrecovp != NULL); 9177c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 9187c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE4(mi->mi_vfsp)) 9197c478bd9Sstevel@tonic-gate *startrecovp = TRUE; 9207c478bd9Sstevel@tonic-gate else if ((curthread->t_proc_flag & TP_LWPEXIT) && 9217c478bd9Sstevel@tonic-gate (mi->mi_flags & MI4_RECOV_ACTIV)) 9227c478bd9Sstevel@tonic-gate *startrecovp = TRUE; 9237c478bd9Sstevel@tonic-gate else 9247c478bd9Sstevel@tonic-gate *startrecovp = FALSE; 9257c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 9267c478bd9Sstevel@tonic-gate } else 9277c478bd9Sstevel@tonic-gate if (startrecovp != NULL) 9287c478bd9Sstevel@tonic-gate *startrecovp = FALSE; 9297c478bd9Sstevel@tonic-gate 9307c478bd9Sstevel@tonic-gate ASSERT(error == 0); 9317c478bd9Sstevel@tonic-gate return (error); 9327c478bd9Sstevel@tonic-gate 9337c478bd9Sstevel@tonic-gate out: 9347c478bd9Sstevel@tonic-gate ASSERT(error != 0); 9357c478bd9Sstevel@tonic-gate if (sp != NULL) { 9367c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock); 9377c478bd9Sstevel@tonic-gate sp->s_otw_call_count--; 9387c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 9397c478bd9Sstevel@tonic-gate nfs4_server_rele(sp); 9407c478bd9Sstevel@tonic-gate rsp->rs_sp = NULL; 9417c478bd9Sstevel@tonic-gate } 9427c478bd9Sstevel@tonic-gate nfs4_end_op_recall(vp1, vp2, rsp); 9437c478bd9Sstevel@tonic-gate 9447c478bd9Sstevel@tonic-gate #ifdef DEBUG 9457c478bd9Sstevel@tonic-gate (void) tsd_set(nfs4_tsd_key, NULL); 9467c478bd9Sstevel@tonic-gate #endif 9477c478bd9Sstevel@tonic-gate return (error); 9487c478bd9Sstevel@tonic-gate } 9497c478bd9Sstevel@tonic-gate 9507c478bd9Sstevel@tonic-gate /* 9517c478bd9Sstevel@tonic-gate * It is up to the caller to determine if rsp->rs_sp being NULL 9527c478bd9Sstevel@tonic-gate * is detrimental or not. 9537c478bd9Sstevel@tonic-gate */ 9547c478bd9Sstevel@tonic-gate int 9557c478bd9Sstevel@tonic-gate nfs4_start_op(mntinfo4_t *mi, vnode_t *vp1, vnode_t *vp2, 9567c478bd9Sstevel@tonic-gate nfs4_recov_state_t *rsp) 9577c478bd9Sstevel@tonic-gate { 9587c478bd9Sstevel@tonic-gate ASSERT(rsp->rs_num_retry_despite_err == 0); 9597c478bd9Sstevel@tonic-gate rsp->rs_num_retry_despite_err = 0; 9607c478bd9Sstevel@tonic-gate return (nfs4_start_fop(mi, vp1, vp2, OH_OTHER, rsp, NULL)); 9617c478bd9Sstevel@tonic-gate } 9627c478bd9Sstevel@tonic-gate 9637c478bd9Sstevel@tonic-gate /* 9647c478bd9Sstevel@tonic-gate * Release any resources acquired by nfs4_start_op(). 9657c478bd9Sstevel@tonic-gate * 'sp' should be the nfs4_server pointer returned by nfs4_start_op(). 9667c478bd9Sstevel@tonic-gate * 9677c478bd9Sstevel@tonic-gate * The operation hint is used to avoid a deadlock by bypassing delegation 9687c478bd9Sstevel@tonic-gate * return logic for writes, which are done while returning a delegation. 9697c478bd9Sstevel@tonic-gate */ 9707c478bd9Sstevel@tonic-gate 9717c478bd9Sstevel@tonic-gate void 9727c478bd9Sstevel@tonic-gate nfs4_end_fop(mntinfo4_t *mi, vnode_t *vp1, vnode_t *vp2, nfs4_op_hint_t op, 9737c478bd9Sstevel@tonic-gate nfs4_recov_state_t *rsp, bool_t needs_recov) 9747c478bd9Sstevel@tonic-gate { 9757c478bd9Sstevel@tonic-gate nfs4_server_t *sp = rsp->rs_sp; 9767c478bd9Sstevel@tonic-gate rnode4_t *rp = NULL; 9777c478bd9Sstevel@tonic-gate 9787c478bd9Sstevel@tonic-gate #ifdef lint 9797c478bd9Sstevel@tonic-gate /* 9807c478bd9Sstevel@tonic-gate * The op hint isn't used any more, but might be in 9817c478bd9Sstevel@tonic-gate * the future. 9827c478bd9Sstevel@tonic-gate */ 9837c478bd9Sstevel@tonic-gate op = op; 9847c478bd9Sstevel@tonic-gate #endif 9857c478bd9Sstevel@tonic-gate 9867c478bd9Sstevel@tonic-gate #ifdef DEBUG 9877c478bd9Sstevel@tonic-gate ASSERT(tsd_get(nfs4_tsd_key) != NULL); 9887c478bd9Sstevel@tonic-gate (void) tsd_set(nfs4_tsd_key, NULL); 9897c478bd9Sstevel@tonic-gate #endif 9907c478bd9Sstevel@tonic-gate 9917c478bd9Sstevel@tonic-gate nfs4_end_op_recall(vp1, vp2, rsp); 9927c478bd9Sstevel@tonic-gate 9937c478bd9Sstevel@tonic-gate if (rsp->rs_flags & NFS4_RS_RENAME_HELD) 9947c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_rename_lock); 9957c478bd9Sstevel@tonic-gate 9967c478bd9Sstevel@tonic-gate if (!needs_recov) { 9977c478bd9Sstevel@tonic-gate if (rsp->rs_flags & NFS4_RS_DELAY_MSG) { 9987c478bd9Sstevel@tonic-gate /* may need to clear the delay interval */ 9997c478bd9Sstevel@tonic-gate if (vp1 != NULL) { 10007c478bd9Sstevel@tonic-gate rp = VTOR4(vp1); 10017c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 10027c478bd9Sstevel@tonic-gate rp->r_delay_interval = 0; 10037c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 10047c478bd9Sstevel@tonic-gate } 10057c478bd9Sstevel@tonic-gate } 10067c478bd9Sstevel@tonic-gate rsp->rs_flags &= ~(NFS4_RS_GRACE_MSG|NFS4_RS_DELAY_MSG); 10077c478bd9Sstevel@tonic-gate } 10087c478bd9Sstevel@tonic-gate 10097c478bd9Sstevel@tonic-gate /* 10107c478bd9Sstevel@tonic-gate * If the corresponding nfs4_start_op() found a sp, 10117c478bd9Sstevel@tonic-gate * then there must still be a sp. 10127c478bd9Sstevel@tonic-gate */ 10137c478bd9Sstevel@tonic-gate if (sp != NULL) { 10147c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_recovlock); 10157c478bd9Sstevel@tonic-gate nfs_rw_exit(&sp->s_recovlock); 10167c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock); 10177c478bd9Sstevel@tonic-gate sp->s_otw_call_count--; 10187c478bd9Sstevel@tonic-gate cv_broadcast(&sp->s_cv_otw_count); 10197c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 10207c478bd9Sstevel@tonic-gate nfs4_server_rele(sp); 10217c478bd9Sstevel@tonic-gate } else { 10227c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_recovlock); 10237c478bd9Sstevel@tonic-gate } 10247c478bd9Sstevel@tonic-gate } 10257c478bd9Sstevel@tonic-gate 10267c478bd9Sstevel@tonic-gate void 10277c478bd9Sstevel@tonic-gate nfs4_end_op(mntinfo4_t *mi, vnode_t *vp1, vnode_t *vp2, 10287c478bd9Sstevel@tonic-gate nfs4_recov_state_t *rsp, bool_t needrecov) 10297c478bd9Sstevel@tonic-gate { 10307c478bd9Sstevel@tonic-gate nfs4_end_fop(mi, vp1, vp2, OH_OTHER, rsp, needrecov); 10317c478bd9Sstevel@tonic-gate } 10327c478bd9Sstevel@tonic-gate 10337c478bd9Sstevel@tonic-gate /* 10347c478bd9Sstevel@tonic-gate * If the filesystem is going through client recovery, block until 10357c478bd9Sstevel@tonic-gate * finished. 10367c478bd9Sstevel@tonic-gate * Exceptions: 10377c478bd9Sstevel@tonic-gate * - state-releasing ops (CLOSE, LOCKU, DELEGRETURN) are allowed to proceed 10387c478bd9Sstevel@tonic-gate * if the filesystem has been forcibly unmounted or the lwp is exiting. 10397c478bd9Sstevel@tonic-gate * 10407c478bd9Sstevel@tonic-gate * Return value: 10417c478bd9Sstevel@tonic-gate * - 0 if no errors 10427c478bd9Sstevel@tonic-gate * - EINTR if the call was interrupted 10437c478bd9Sstevel@tonic-gate * - EIO if the filesystem has been forcibly unmounted (non-state-releasing 10447c478bd9Sstevel@tonic-gate * op) 10457c478bd9Sstevel@tonic-gate * - the errno value from the recovery thread, if recovery failed 10467c478bd9Sstevel@tonic-gate */ 10477c478bd9Sstevel@tonic-gate 10487c478bd9Sstevel@tonic-gate static int 10497c478bd9Sstevel@tonic-gate wait_for_recovery(mntinfo4_t *mi, nfs4_op_hint_t op_hint) 10507c478bd9Sstevel@tonic-gate { 10517c478bd9Sstevel@tonic-gate int error = 0; 10527c478bd9Sstevel@tonic-gate 10537c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 10547c478bd9Sstevel@tonic-gate 10557c478bd9Sstevel@tonic-gate while (mi->mi_recovflags != 0) { 10567c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 10577c478bd9Sstevel@tonic-gate 1058ffa198efSvv149972 if ((mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED) || 1059ffa198efSvv149972 (mi->mi_flags & MI4_RECOV_FAIL)) 10607c478bd9Sstevel@tonic-gate break; 10617c478bd9Sstevel@tonic-gate if (OH_IS_STATE_RELE(op_hint) && 10627c478bd9Sstevel@tonic-gate (curthread->t_proc_flag & TP_LWPEXIT)) 10637c478bd9Sstevel@tonic-gate break; 10647c478bd9Sstevel@tonic-gate 10657c478bd9Sstevel@tonic-gate if (lwp != NULL) 10667c478bd9Sstevel@tonic-gate lwp->lwp_nostop++; 10677c478bd9Sstevel@tonic-gate /* XXX - use different cv? */ 10687c478bd9Sstevel@tonic-gate if (cv_wait_sig(&mi->mi_failover_cv, &mi->mi_lock) == 0) { 10697c478bd9Sstevel@tonic-gate error = EINTR; 10707c478bd9Sstevel@tonic-gate if (lwp != NULL) 10717c478bd9Sstevel@tonic-gate lwp->lwp_nostop--; 10727c478bd9Sstevel@tonic-gate break; 10737c478bd9Sstevel@tonic-gate } 10747c478bd9Sstevel@tonic-gate if (lwp != NULL) 10757c478bd9Sstevel@tonic-gate lwp->lwp_nostop--; 10767c478bd9Sstevel@tonic-gate } 10777c478bd9Sstevel@tonic-gate 1078ffa198efSvv149972 if ((mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED) && 10797c478bd9Sstevel@tonic-gate !OH_IS_STATE_RELE(op_hint)) { 10807c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 10817c478bd9Sstevel@tonic-gate "wait_for_recovery: forced unmount")); 10827c478bd9Sstevel@tonic-gate error = EIO; 1083ffa198efSvv149972 } else if (mi->mi_flags & MI4_RECOV_FAIL) { 1084ffa198efSvv149972 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1085ffa198efSvv149972 "wait_for_recovery: fail since RECOV FAIL")); 1086ffa198efSvv149972 error = mi->mi_error; 10877c478bd9Sstevel@tonic-gate } 10887c478bd9Sstevel@tonic-gate 10897c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 10907c478bd9Sstevel@tonic-gate 10917c478bd9Sstevel@tonic-gate return (error); 10927c478bd9Sstevel@tonic-gate } 10937c478bd9Sstevel@tonic-gate 10947c478bd9Sstevel@tonic-gate /* 10957c478bd9Sstevel@tonic-gate * If the client received NFS4ERR_GRACE for this particular mount, 10967c478bd9Sstevel@tonic-gate * the client blocks here until it is time to try again. 10977c478bd9Sstevel@tonic-gate * 10987c478bd9Sstevel@tonic-gate * Return value: 10997c478bd9Sstevel@tonic-gate * - 0 if wait was successful 11007c478bd9Sstevel@tonic-gate * - EINTR if the call was interrupted 11017c478bd9Sstevel@tonic-gate */ 11027c478bd9Sstevel@tonic-gate 11037c478bd9Sstevel@tonic-gate int 11047c478bd9Sstevel@tonic-gate nfs4_wait_for_grace(mntinfo4_t *mi, nfs4_recov_state_t *rsp) 11057c478bd9Sstevel@tonic-gate { 11067c478bd9Sstevel@tonic-gate int error = 0; 11077c478bd9Sstevel@tonic-gate time_t curtime, time_to_wait; 11087c478bd9Sstevel@tonic-gate 11097c478bd9Sstevel@tonic-gate /* do a unprotected check to reduce mi_lock contention */ 11107c478bd9Sstevel@tonic-gate if (mi->mi_grace_wait != 0) { 11117c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 11127c478bd9Sstevel@tonic-gate 11137c478bd9Sstevel@tonic-gate if (mi->mi_grace_wait != 0) { 11147c478bd9Sstevel@tonic-gate if (!(rsp->rs_flags & NFS4_RS_GRACE_MSG)) 11157c478bd9Sstevel@tonic-gate rsp->rs_flags |= NFS4_RS_GRACE_MSG; 11167c478bd9Sstevel@tonic-gate 11177c478bd9Sstevel@tonic-gate curtime = gethrestime_sec(); 11187c478bd9Sstevel@tonic-gate 11197c478bd9Sstevel@tonic-gate if (curtime < mi->mi_grace_wait) { 11207c478bd9Sstevel@tonic-gate 11217c478bd9Sstevel@tonic-gate time_to_wait = mi->mi_grace_wait - curtime; 11227c478bd9Sstevel@tonic-gate 11237c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 11247c478bd9Sstevel@tonic-gate 11251988a130Sdm120769 delay(SEC_TO_TICK(time_to_wait)); 11267c478bd9Sstevel@tonic-gate 11277c478bd9Sstevel@tonic-gate curtime = gethrestime_sec(); 11287c478bd9Sstevel@tonic-gate 11297c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 11307c478bd9Sstevel@tonic-gate 11317c478bd9Sstevel@tonic-gate if (curtime >= mi->mi_grace_wait) 11327c478bd9Sstevel@tonic-gate mi->mi_grace_wait = 0; 11337c478bd9Sstevel@tonic-gate } else { 11347c478bd9Sstevel@tonic-gate mi->mi_grace_wait = 0; 11357c478bd9Sstevel@tonic-gate } 11367c478bd9Sstevel@tonic-gate } 11377c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 11387c478bd9Sstevel@tonic-gate } 11397c478bd9Sstevel@tonic-gate 11407c478bd9Sstevel@tonic-gate return (error); 11417c478bd9Sstevel@tonic-gate } 11427c478bd9Sstevel@tonic-gate 11437c478bd9Sstevel@tonic-gate /* 11447c478bd9Sstevel@tonic-gate * If the client received NFS4ERR_DELAY for an operation on a vnode, 11457c478bd9Sstevel@tonic-gate * the client blocks here until it is time to try again. 11467c478bd9Sstevel@tonic-gate * 11477c478bd9Sstevel@tonic-gate * Return value: 11487c478bd9Sstevel@tonic-gate * - 0 if wait was successful 11497c478bd9Sstevel@tonic-gate * - EINTR if the call was interrupted 11507c478bd9Sstevel@tonic-gate */ 11517c478bd9Sstevel@tonic-gate 11527c478bd9Sstevel@tonic-gate int 11537c478bd9Sstevel@tonic-gate nfs4_wait_for_delay(vnode_t *vp, nfs4_recov_state_t *rsp) 11547c478bd9Sstevel@tonic-gate { 11557c478bd9Sstevel@tonic-gate int error = 0; 11567c478bd9Sstevel@tonic-gate time_t curtime, time_to_wait; 11577c478bd9Sstevel@tonic-gate rnode4_t *rp; 11587c478bd9Sstevel@tonic-gate 11597c478bd9Sstevel@tonic-gate ASSERT(vp != NULL); 11607c478bd9Sstevel@tonic-gate 11617c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 11627c478bd9Sstevel@tonic-gate 11637c478bd9Sstevel@tonic-gate /* do a unprotected check to reduce r_statelock contention */ 11647c478bd9Sstevel@tonic-gate if (rp->r_delay_wait != 0) { 11657c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 11667c478bd9Sstevel@tonic-gate 11677c478bd9Sstevel@tonic-gate if (rp->r_delay_wait != 0) { 11687c478bd9Sstevel@tonic-gate 11697c478bd9Sstevel@tonic-gate if (!(rsp->rs_flags & NFS4_RS_DELAY_MSG)) { 11707c478bd9Sstevel@tonic-gate rsp->rs_flags |= NFS4_RS_DELAY_MSG; 11717c478bd9Sstevel@tonic-gate nfs4_mi_kstat_inc_delay(VTOMI4(vp)); 11727c478bd9Sstevel@tonic-gate } 11737c478bd9Sstevel@tonic-gate 11747c478bd9Sstevel@tonic-gate curtime = gethrestime_sec(); 11757c478bd9Sstevel@tonic-gate 11767c478bd9Sstevel@tonic-gate if (curtime < rp->r_delay_wait) { 11777c478bd9Sstevel@tonic-gate 11787c478bd9Sstevel@tonic-gate time_to_wait = rp->r_delay_wait - curtime; 11797c478bd9Sstevel@tonic-gate 11807c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 11817c478bd9Sstevel@tonic-gate 11821988a130Sdm120769 delay(SEC_TO_TICK(time_to_wait)); 11837c478bd9Sstevel@tonic-gate 11847c478bd9Sstevel@tonic-gate curtime = gethrestime_sec(); 11857c478bd9Sstevel@tonic-gate 11867c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 11877c478bd9Sstevel@tonic-gate 11887c478bd9Sstevel@tonic-gate if (curtime >= rp->r_delay_wait) 11897c478bd9Sstevel@tonic-gate rp->r_delay_wait = 0; 11907c478bd9Sstevel@tonic-gate } else { 11917c478bd9Sstevel@tonic-gate rp->r_delay_wait = 0; 11927c478bd9Sstevel@tonic-gate } 11937c478bd9Sstevel@tonic-gate } 11947c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 11957c478bd9Sstevel@tonic-gate } 11967c478bd9Sstevel@tonic-gate 11977c478bd9Sstevel@tonic-gate return (error); 11987c478bd9Sstevel@tonic-gate } 11997c478bd9Sstevel@tonic-gate 12007c478bd9Sstevel@tonic-gate /* 12017c478bd9Sstevel@tonic-gate * The recovery thread. 12027c478bd9Sstevel@tonic-gate */ 12037c478bd9Sstevel@tonic-gate 12047c478bd9Sstevel@tonic-gate static void 12057c478bd9Sstevel@tonic-gate nfs4_recov_thread(recov_info_t *recovp) 12067c478bd9Sstevel@tonic-gate { 12077c478bd9Sstevel@tonic-gate mntinfo4_t *mi = recovp->rc_mi; 12087c478bd9Sstevel@tonic-gate nfs4_server_t *sp; 12097c478bd9Sstevel@tonic-gate int done = 0, error = 0; 12107c478bd9Sstevel@tonic-gate bool_t recov_fail = FALSE; 12117c478bd9Sstevel@tonic-gate callb_cpr_t cpr_info; 12127c478bd9Sstevel@tonic-gate kmutex_t cpr_lock; 12137c478bd9Sstevel@tonic-gate 12147c478bd9Sstevel@tonic-gate nfs4_queue_event(RE_START, mi, NULL, mi->mi_recovflags, 12157c478bd9Sstevel@tonic-gate recovp->rc_vp1, recovp->rc_vp2, 0, NULL, 0, TAG_NONE, TAG_NONE, 12167c478bd9Sstevel@tonic-gate 0, 0); 12177c478bd9Sstevel@tonic-gate 12187c478bd9Sstevel@tonic-gate mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 12197c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recov"); 12207c478bd9Sstevel@tonic-gate 12217c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 12227c478bd9Sstevel@tonic-gate mi->mi_recovthread = curthread; 12237c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 12247c478bd9Sstevel@tonic-gate 12257c478bd9Sstevel@tonic-gate /* 12267c478bd9Sstevel@tonic-gate * We don't really need protection here against failover or 12277c478bd9Sstevel@tonic-gate * migration, since the current thread is the one that would make 12287c478bd9Sstevel@tonic-gate * any changes, but hold mi_recovlock anyway for completeness (and 12297c478bd9Sstevel@tonic-gate * to satisfy any ASSERTs). 12307c478bd9Sstevel@tonic-gate */ 12317c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 12327c478bd9Sstevel@tonic-gate sp = find_nfs4_server(mi); 12337c478bd9Sstevel@tonic-gate if (sp != NULL) 12347c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 12357c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_recovlock); 12367c478bd9Sstevel@tonic-gate 12377c478bd9Sstevel@tonic-gate /* 12387c478bd9Sstevel@tonic-gate * Do any necessary recovery, based on the information in recovp 12397c478bd9Sstevel@tonic-gate * and any recovery flags. 12407c478bd9Sstevel@tonic-gate */ 12417c478bd9Sstevel@tonic-gate 12427c478bd9Sstevel@tonic-gate do { 12437c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 12447c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE4(mi->mi_vfsp)) { 12457c478bd9Sstevel@tonic-gate bool_t activesrv; 12467c478bd9Sstevel@tonic-gate 12477c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug && 12487c478bd9Sstevel@tonic-gate mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED, (CE_NOTE, 12497c478bd9Sstevel@tonic-gate "nfs4_recov_thread: file system has been " 12507c478bd9Sstevel@tonic-gate "unmounted")); 12517c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug && 12527c478bd9Sstevel@tonic-gate zone_status_get(curproc->p_zone) >= 12537c478bd9Sstevel@tonic-gate ZONE_IS_SHUTTING_DOWN, (CE_NOTE, 12547c478bd9Sstevel@tonic-gate "nfs4_recov_thread: zone shutting down")); 12557c478bd9Sstevel@tonic-gate /* 12567c478bd9Sstevel@tonic-gate * If the server has lost its state for us and 12577c478bd9Sstevel@tonic-gate * the filesystem is unmounted, then the filesystem 12587c478bd9Sstevel@tonic-gate * can be tossed, even if there are lost lock or 12597c478bd9Sstevel@tonic-gate * lost state calls in the recovery queue. 12607c478bd9Sstevel@tonic-gate */ 12617c478bd9Sstevel@tonic-gate if (mi->mi_recovflags & 12627c478bd9Sstevel@tonic-gate (MI4R_NEED_CLIENTID | MI4R_REOPEN_FILES)) { 12637c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 12647c478bd9Sstevel@tonic-gate "nfs4_recov_thread: bailing out")); 12657c478bd9Sstevel@tonic-gate mi->mi_flags |= MI4_RECOV_FAIL; 12667c478bd9Sstevel@tonic-gate mi->mi_error = recovp->rc_error; 12677c478bd9Sstevel@tonic-gate recov_fail = TRUE; 12687c478bd9Sstevel@tonic-gate } 12697c478bd9Sstevel@tonic-gate /* 12707c478bd9Sstevel@tonic-gate * We don't know if the server has any state for 12717c478bd9Sstevel@tonic-gate * us, and the filesystem has been unmounted. If 12727c478bd9Sstevel@tonic-gate * there are "lost state" recovery items, keep 12737c478bd9Sstevel@tonic-gate * trying to process them until there are no more 12747c478bd9Sstevel@tonic-gate * mounted filesystems for the server. Otherwise, 12757c478bd9Sstevel@tonic-gate * bail out. The reason we don't mark the 12767c478bd9Sstevel@tonic-gate * filesystem as failing recovery is in case we 12777c478bd9Sstevel@tonic-gate * have to do "lost state" recovery later (e.g., a 12787c478bd9Sstevel@tonic-gate * user process exits). 12797c478bd9Sstevel@tonic-gate */ 12807c478bd9Sstevel@tonic-gate if (!(mi->mi_recovflags & MI4R_LOST_STATE)) { 1281e749d04dSjwahlig done = 1; 12827c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 12837c478bd9Sstevel@tonic-gate break; 12847c478bd9Sstevel@tonic-gate } 12857c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 12867c478bd9Sstevel@tonic-gate 12877c478bd9Sstevel@tonic-gate if (sp == NULL) 12887c478bd9Sstevel@tonic-gate activesrv = FALSE; 12897c478bd9Sstevel@tonic-gate else { 12907c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock); 12917c478bd9Sstevel@tonic-gate activesrv = nfs4_fs_active(sp); 12927c478bd9Sstevel@tonic-gate } 12937c478bd9Sstevel@tonic-gate if (!activesrv) { 12947c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 12957c478bd9Sstevel@tonic-gate "no active fs for server %p", 12967c478bd9Sstevel@tonic-gate (void *)sp)); 12977c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 12987c478bd9Sstevel@tonic-gate mi->mi_flags |= MI4_RECOV_FAIL; 12997c478bd9Sstevel@tonic-gate mi->mi_error = recovp->rc_error; 13007c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 13017c478bd9Sstevel@tonic-gate recov_fail = TRUE; 13027c478bd9Sstevel@tonic-gate if (sp != NULL) { 13037c478bd9Sstevel@tonic-gate /* 13047c478bd9Sstevel@tonic-gate * Mark the server instance as 13057c478bd9Sstevel@tonic-gate * dead, so that nobody will attach 13067c478bd9Sstevel@tonic-gate * a new filesystem. 13077c478bd9Sstevel@tonic-gate */ 13087c478bd9Sstevel@tonic-gate nfs4_mark_srv_dead(sp); 13097c478bd9Sstevel@tonic-gate } 13107c478bd9Sstevel@tonic-gate } 13117c478bd9Sstevel@tonic-gate if (sp != NULL) 13127c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 13137c478bd9Sstevel@tonic-gate } else { 13147c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 13157c478bd9Sstevel@tonic-gate } 13167c478bd9Sstevel@tonic-gate 13177c478bd9Sstevel@tonic-gate /* 13187c478bd9Sstevel@tonic-gate * Check if we need to select a new server for a 13197c478bd9Sstevel@tonic-gate * failover. Choosing a new server will force at 13207c478bd9Sstevel@tonic-gate * least a check of the clientid. 13217c478bd9Sstevel@tonic-gate */ 13227c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 13237c478bd9Sstevel@tonic-gate if (!recov_fail && 13247c478bd9Sstevel@tonic-gate (mi->mi_recovflags & MI4R_NEED_NEW_SERVER)) { 13257c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 13267c478bd9Sstevel@tonic-gate recov_newserver(recovp, &sp, &recov_fail); 13277c478bd9Sstevel@tonic-gate } else 13287c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 13297c478bd9Sstevel@tonic-gate 13307c478bd9Sstevel@tonic-gate /* 13317c478bd9Sstevel@tonic-gate * Check if we need to recover the clientid. This 13327c478bd9Sstevel@tonic-gate * must be done before file and lock recovery, and it 13337c478bd9Sstevel@tonic-gate * potentially affects the recovery threads for other 13347c478bd9Sstevel@tonic-gate * filesystems, so it gets special treatment. 13357c478bd9Sstevel@tonic-gate */ 13367c478bd9Sstevel@tonic-gate if (sp != NULL && recov_fail == FALSE) { 13377c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock); 13387c478bd9Sstevel@tonic-gate if (!(sp->s_flags & N4S_CLIENTID_SET)) { 13397c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 13407c478bd9Sstevel@tonic-gate recov_clientid(recovp, sp); 13417c478bd9Sstevel@tonic-gate } else { 13427c478bd9Sstevel@tonic-gate /* 13437c478bd9Sstevel@tonic-gate * Unset this flag in case another recovery 13447c478bd9Sstevel@tonic-gate * thread successfully recovered the clientid 13457c478bd9Sstevel@tonic-gate * for us already. 13467c478bd9Sstevel@tonic-gate */ 13477c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 13487c478bd9Sstevel@tonic-gate mi->mi_recovflags &= ~MI4R_NEED_CLIENTID; 13497c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 13507c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 13517c478bd9Sstevel@tonic-gate } 13527c478bd9Sstevel@tonic-gate } 13537c478bd9Sstevel@tonic-gate 13547c478bd9Sstevel@tonic-gate /* 13557c478bd9Sstevel@tonic-gate * Check if we need to get the security information. 13567c478bd9Sstevel@tonic-gate */ 13577c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 13587c478bd9Sstevel@tonic-gate if ((mi->mi_recovflags & MI4R_NEED_SECINFO) && 13597c478bd9Sstevel@tonic-gate !(mi->mi_flags & MI4_RECOV_FAIL)) { 13607c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 13617c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&mi->mi_recovlock, 13627c478bd9Sstevel@tonic-gate RW_WRITER, 0); 13637c478bd9Sstevel@tonic-gate error = nfs4_secinfo_recov(recovp->rc_mi, 13647c478bd9Sstevel@tonic-gate recovp->rc_vp1, recovp->rc_vp2); 13657c478bd9Sstevel@tonic-gate /* 13667c478bd9Sstevel@tonic-gate * If error, nothing more can be done, stop 13677c478bd9Sstevel@tonic-gate * the recovery. 13687c478bd9Sstevel@tonic-gate */ 13697c478bd9Sstevel@tonic-gate if (error) { 13707c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 13717c478bd9Sstevel@tonic-gate mi->mi_flags |= MI4_RECOV_FAIL; 13727c478bd9Sstevel@tonic-gate mi->mi_error = recovp->rc_error; 13737c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 13747c478bd9Sstevel@tonic-gate nfs4_queue_event(RE_WRONGSEC, mi, NULL, 13757c478bd9Sstevel@tonic-gate error, recovp->rc_vp1, recovp->rc_vp2, 13767c478bd9Sstevel@tonic-gate 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 13777c478bd9Sstevel@tonic-gate } 13787c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_recovlock); 13797c478bd9Sstevel@tonic-gate } else 13807c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 13817c478bd9Sstevel@tonic-gate 13827c478bd9Sstevel@tonic-gate /* 13837c478bd9Sstevel@tonic-gate * Check if there's a bad seqid to recover. 13847c478bd9Sstevel@tonic-gate */ 13857c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 13867c478bd9Sstevel@tonic-gate if ((mi->mi_recovflags & MI4R_BAD_SEQID) && 13877c478bd9Sstevel@tonic-gate !(mi->mi_flags & MI4_RECOV_FAIL)) { 13887c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 13897c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&mi->mi_recovlock, 13907c478bd9Sstevel@tonic-gate RW_WRITER, 0); 13917c478bd9Sstevel@tonic-gate recov_bad_seqid(recovp); 13927c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_recovlock); 13937c478bd9Sstevel@tonic-gate } else 13947c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 13957c478bd9Sstevel@tonic-gate 13967c478bd9Sstevel@tonic-gate /* 13977c478bd9Sstevel@tonic-gate * Next check for recovery that affects the entire 13987c478bd9Sstevel@tonic-gate * filesystem. 13997c478bd9Sstevel@tonic-gate */ 14007c478bd9Sstevel@tonic-gate if (sp != NULL) { 14017c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 14027c478bd9Sstevel@tonic-gate if ((mi->mi_recovflags & MI4R_REOPEN_FILES) && 14037c478bd9Sstevel@tonic-gate !(mi->mi_flags & MI4_RECOV_FAIL)) { 14047c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 14057c478bd9Sstevel@tonic-gate recov_openfiles(recovp, sp); 14067c478bd9Sstevel@tonic-gate } else 14077c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 14087c478bd9Sstevel@tonic-gate } 14097c478bd9Sstevel@tonic-gate 14107c478bd9Sstevel@tonic-gate /* 14117c478bd9Sstevel@tonic-gate * Send any queued state recovery requests. 14127c478bd9Sstevel@tonic-gate */ 14137c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 14147c478bd9Sstevel@tonic-gate if (sp != NULL && 14157c478bd9Sstevel@tonic-gate (mi->mi_recovflags & MI4R_LOST_STATE) && 14167c478bd9Sstevel@tonic-gate !(mi->mi_flags & MI4_RECOV_FAIL)) { 14177c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 14187c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&mi->mi_recovlock, 14197c478bd9Sstevel@tonic-gate RW_WRITER, 0); 14207c478bd9Sstevel@tonic-gate nfs4_resend_lost_rqsts(recovp, sp); 14217c478bd9Sstevel@tonic-gate if (list_head(&mi->mi_lost_state) == NULL) { 14227c478bd9Sstevel@tonic-gate /* done */ 14237c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 14247c478bd9Sstevel@tonic-gate mi->mi_recovflags &= ~MI4R_LOST_STATE; 14257c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 14267c478bd9Sstevel@tonic-gate } 14277c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_recovlock); 14287c478bd9Sstevel@tonic-gate } else { 14297c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 14307c478bd9Sstevel@tonic-gate } 14317c478bd9Sstevel@tonic-gate 14327c478bd9Sstevel@tonic-gate /* 14337c478bd9Sstevel@tonic-gate * See if there is anything more to do. If not, announce 14347c478bd9Sstevel@tonic-gate * that we are done and exit. 14357c478bd9Sstevel@tonic-gate * 14367c478bd9Sstevel@tonic-gate * Need mi_recovlock to keep 'sp' valid. Must grab 14377c478bd9Sstevel@tonic-gate * mi_recovlock before mi_lock to preserve lock ordering. 14387c478bd9Sstevel@tonic-gate */ 14397c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 14407c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 14417c478bd9Sstevel@tonic-gate if ((mi->mi_recovflags & ~MI4R_SRV_REBOOT) == 0 || 14427c478bd9Sstevel@tonic-gate (mi->mi_flags & MI4_RECOV_FAIL)) { 14437c478bd9Sstevel@tonic-gate list_t local_lost_state; 14447c478bd9Sstevel@tonic-gate nfs4_lost_rqst_t *lrp; 14457c478bd9Sstevel@tonic-gate 14467c478bd9Sstevel@tonic-gate /* 14477c478bd9Sstevel@tonic-gate * We need to remove the lost requests before we 14487c478bd9Sstevel@tonic-gate * unmark the mi as no longer doing recovery to 14497c478bd9Sstevel@tonic-gate * avoid a race with a new thread putting new lost 14507c478bd9Sstevel@tonic-gate * requests on the same mi (and the going away 14517c478bd9Sstevel@tonic-gate * thread would remove the new lost requests). 14527c478bd9Sstevel@tonic-gate * 14537c478bd9Sstevel@tonic-gate * Move the lost requests to a local list since 14547c478bd9Sstevel@tonic-gate * nfs4_remove_lost_rqst() drops mi_lock, and 14557c478bd9Sstevel@tonic-gate * dropping the mi_lock would make our check to 14567c478bd9Sstevel@tonic-gate * see if recovery is done no longer valid. 14577c478bd9Sstevel@tonic-gate */ 14587c478bd9Sstevel@tonic-gate list_create(&local_lost_state, 14597c478bd9Sstevel@tonic-gate sizeof (nfs4_lost_rqst_t), 14607c478bd9Sstevel@tonic-gate offsetof(nfs4_lost_rqst_t, lr_node)); 14617c478bd9Sstevel@tonic-gate list_move_tail(&local_lost_state, &mi->mi_lost_state); 14627c478bd9Sstevel@tonic-gate 14637c478bd9Sstevel@tonic-gate done = 1; 14647c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 14657c478bd9Sstevel@tonic-gate /* 14667c478bd9Sstevel@tonic-gate * Now officially free the "moved" 14677c478bd9Sstevel@tonic-gate * lost requests. 14687c478bd9Sstevel@tonic-gate */ 14697c478bd9Sstevel@tonic-gate while ((lrp = list_head(&local_lost_state)) != NULL) { 14707c478bd9Sstevel@tonic-gate list_remove(&local_lost_state, lrp); 14717c478bd9Sstevel@tonic-gate nfs4_free_lost_rqst(lrp, sp); 14727c478bd9Sstevel@tonic-gate } 14737c478bd9Sstevel@tonic-gate list_destroy(&local_lost_state); 14747c478bd9Sstevel@tonic-gate } else 14757c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 14767c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_recovlock); 14777c478bd9Sstevel@tonic-gate 14787c478bd9Sstevel@tonic-gate /* 14797c478bd9Sstevel@tonic-gate * If the filesystem has been forcibly unmounted, there is 14807c478bd9Sstevel@tonic-gate * probably no point in retrying immediately. Furthermore, 14817c478bd9Sstevel@tonic-gate * there might be user processes waiting for a chance to 14827c478bd9Sstevel@tonic-gate * queue up "lost state" requests, so that they can exit. 14837c478bd9Sstevel@tonic-gate * So pause here for a moment. Same logic for zone shutdown. 14847c478bd9Sstevel@tonic-gate */ 14857c478bd9Sstevel@tonic-gate if (!done && FS_OR_ZONE_GONE4(mi->mi_vfsp)) { 14867c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 14877c478bd9Sstevel@tonic-gate cv_broadcast(&mi->mi_failover_cv); 14887c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 14897c478bd9Sstevel@tonic-gate delay(SEC_TO_TICK(nfs4_unmount_delay)); 14907c478bd9Sstevel@tonic-gate } 14917c478bd9Sstevel@tonic-gate 14927c478bd9Sstevel@tonic-gate } while (!done); 14937c478bd9Sstevel@tonic-gate 14947c478bd9Sstevel@tonic-gate if (sp != NULL) 14957c478bd9Sstevel@tonic-gate nfs4_server_rele(sp); 14967c478bd9Sstevel@tonic-gate 14977c478bd9Sstevel@tonic-gate /* 14987c478bd9Sstevel@tonic-gate * Return all recalled delegations 14997c478bd9Sstevel@tonic-gate */ 15007c478bd9Sstevel@tonic-gate nfs4_dlistclean(); 15017c478bd9Sstevel@tonic-gate 1502e749d04dSjwahlig mutex_enter(&mi->mi_lock); 1503e749d04dSjwahlig recov_done(mi, recovp); 1504e749d04dSjwahlig mutex_exit(&mi->mi_lock); 1505e749d04dSjwahlig 15067c478bd9Sstevel@tonic-gate /* 15077c478bd9Sstevel@tonic-gate * Free up resources that were allocated for us. 15087c478bd9Sstevel@tonic-gate */ 15097c478bd9Sstevel@tonic-gate if (recovp->rc_vp1 != NULL) 15107c478bd9Sstevel@tonic-gate VN_RELE(recovp->rc_vp1); 15117c478bd9Sstevel@tonic-gate if (recovp->rc_vp2 != NULL) 15127c478bd9Sstevel@tonic-gate VN_RELE(recovp->rc_vp2); 1513d7d95b9aSjwahlig 1514e749d04dSjwahlig /* now we are done using the mi struct, signal the waiters */ 1515e749d04dSjwahlig mutex_enter(&mi->mi_lock); 1516e749d04dSjwahlig mi->mi_in_recovery--; 1517e749d04dSjwahlig if (mi->mi_in_recovery == 0) 1518e749d04dSjwahlig cv_broadcast(&mi->mi_cv_in_recov); 1519e749d04dSjwahlig mutex_exit(&mi->mi_lock); 1520e749d04dSjwahlig 152150a83466Sjwahlig VFS_RELE(mi->mi_vfsp); 152250a83466Sjwahlig MI4_RELE(mi); 15237c478bd9Sstevel@tonic-gate kmem_free(recovp, sizeof (recov_info_t)); 15247c478bd9Sstevel@tonic-gate mutex_enter(&cpr_lock); 15257c478bd9Sstevel@tonic-gate CALLB_CPR_EXIT(&cpr_info); 15267c478bd9Sstevel@tonic-gate mutex_destroy(&cpr_lock); 15277c478bd9Sstevel@tonic-gate zthread_exit(); 15287c478bd9Sstevel@tonic-gate } 15297c478bd9Sstevel@tonic-gate 15307c478bd9Sstevel@tonic-gate /* 15317c478bd9Sstevel@tonic-gate * Log the end of recovery and notify any waiting threads. 15327c478bd9Sstevel@tonic-gate */ 15337c478bd9Sstevel@tonic-gate 15347c478bd9Sstevel@tonic-gate static void 15357c478bd9Sstevel@tonic-gate recov_done(mntinfo4_t *mi, recov_info_t *recovp) 15367c478bd9Sstevel@tonic-gate { 15377c478bd9Sstevel@tonic-gate 15387c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&mi->mi_lock)); 15397c478bd9Sstevel@tonic-gate 15407c478bd9Sstevel@tonic-gate nfs4_queue_event(RE_END, mi, NULL, 0, recovp->rc_vp1, 15417c478bd9Sstevel@tonic-gate recovp->rc_vp2, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 15427c478bd9Sstevel@tonic-gate mi->mi_recovthread = NULL; 15437c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI4_RECOV_ACTIV; 15447c478bd9Sstevel@tonic-gate mi->mi_recovflags &= ~MI4R_SRV_REBOOT; 15457c478bd9Sstevel@tonic-gate cv_broadcast(&mi->mi_failover_cv); 15467c478bd9Sstevel@tonic-gate } 15477c478bd9Sstevel@tonic-gate 15487c478bd9Sstevel@tonic-gate /* 15497c478bd9Sstevel@tonic-gate * State-specific recovery routines, by state. 15507c478bd9Sstevel@tonic-gate */ 15517c478bd9Sstevel@tonic-gate 15527c478bd9Sstevel@tonic-gate /* 15537c478bd9Sstevel@tonic-gate * Failover. 15547c478bd9Sstevel@tonic-gate * 15557c478bd9Sstevel@tonic-gate * Replaces *spp with a reference to the new server, which must 15567c478bd9Sstevel@tonic-gate * eventually be freed. 15577c478bd9Sstevel@tonic-gate */ 15587c478bd9Sstevel@tonic-gate 15597c478bd9Sstevel@tonic-gate static void 15607c478bd9Sstevel@tonic-gate recov_newserver(recov_info_t *recovp, nfs4_server_t **spp, bool_t *recov_fail) 15617c478bd9Sstevel@tonic-gate { 15627c478bd9Sstevel@tonic-gate mntinfo4_t *mi = recovp->rc_mi; 15637c478bd9Sstevel@tonic-gate servinfo4_t *svp = NULL; 15647c478bd9Sstevel@tonic-gate nfs4_server_t *osp = *spp; 15657c478bd9Sstevel@tonic-gate CLIENT *cl; 15667c478bd9Sstevel@tonic-gate enum clnt_stat status; 15677c478bd9Sstevel@tonic-gate struct timeval tv; 15687c478bd9Sstevel@tonic-gate int error; 15697c478bd9Sstevel@tonic-gate int oncethru = 0; 15707c478bd9Sstevel@tonic-gate rnode4_t *rp; 15717c478bd9Sstevel@tonic-gate int index; 15727c478bd9Sstevel@tonic-gate nfs_fh4 fh; 15737c478bd9Sstevel@tonic-gate char *snames; 15747c478bd9Sstevel@tonic-gate size_t len; 15757c478bd9Sstevel@tonic-gate 15767c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_WRITER, 0); 15777c478bd9Sstevel@tonic-gate 15787c478bd9Sstevel@tonic-gate tv.tv_sec = 2; 15797c478bd9Sstevel@tonic-gate tv.tv_usec = 0; 15807c478bd9Sstevel@tonic-gate 15817c478bd9Sstevel@tonic-gate #ifdef lint 15827c478bd9Sstevel@tonic-gate /* 15837c478bd9Sstevel@tonic-gate * Lint can't follow the logic, so thinks that snames and len 15847c478bd9Sstevel@tonic-gate * can be used before being set. They can't, but lint can't 15857c478bd9Sstevel@tonic-gate * figure it out. To address the lint warning, initialize 15867c478bd9Sstevel@tonic-gate * snames and len for lint. 15877c478bd9Sstevel@tonic-gate */ 15887c478bd9Sstevel@tonic-gate snames = NULL; 15897c478bd9Sstevel@tonic-gate len = 0; 15907c478bd9Sstevel@tonic-gate #endif 15917c478bd9Sstevel@tonic-gate 15927c478bd9Sstevel@tonic-gate /* 15937c478bd9Sstevel@tonic-gate * Ping the null NFS procedure of every server in 15947c478bd9Sstevel@tonic-gate * the list until one responds. We always start 15957c478bd9Sstevel@tonic-gate * at the head of the list and always skip the one 15967c478bd9Sstevel@tonic-gate * that is current, since it's caused us a problem. 15977c478bd9Sstevel@tonic-gate */ 15987c478bd9Sstevel@tonic-gate while (svp == NULL) { 15997c478bd9Sstevel@tonic-gate for (svp = mi->mi_servers; svp; svp = svp->sv_next) { 16007c478bd9Sstevel@tonic-gate 16017c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 16027c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE4(mi->mi_vfsp)) { 16037c478bd9Sstevel@tonic-gate mi->mi_flags |= MI4_RECOV_FAIL; 16047c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 16057c478bd9Sstevel@tonic-gate (void) nfs_rw_exit(&mi->mi_recovlock); 16067c478bd9Sstevel@tonic-gate *recov_fail = TRUE; 16077c478bd9Sstevel@tonic-gate if (oncethru) 16087c478bd9Sstevel@tonic-gate kmem_free(snames, len); 16097c478bd9Sstevel@tonic-gate return; 16107c478bd9Sstevel@tonic-gate } 16117c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 16127c478bd9Sstevel@tonic-gate 16137c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 16147c478bd9Sstevel@tonic-gate if (svp->sv_flags & SV4_NOTINUSE) { 16157c478bd9Sstevel@tonic-gate nfs_rw_exit(&svp->sv_lock); 16167c478bd9Sstevel@tonic-gate continue; 16177c478bd9Sstevel@tonic-gate } 16187c478bd9Sstevel@tonic-gate nfs_rw_exit(&svp->sv_lock); 16197c478bd9Sstevel@tonic-gate 16207c478bd9Sstevel@tonic-gate if (!oncethru && svp == mi->mi_curr_serv) 16217c478bd9Sstevel@tonic-gate continue; 16227c478bd9Sstevel@tonic-gate 16237c478bd9Sstevel@tonic-gate error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, 16247c478bd9Sstevel@tonic-gate NFS_PROGRAM, NFS_V4, 0, 1, CRED(), &cl); 16257c478bd9Sstevel@tonic-gate if (error) 16267c478bd9Sstevel@tonic-gate continue; 16277c478bd9Sstevel@tonic-gate 16287c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI4_INT)) 16297c478bd9Sstevel@tonic-gate cl->cl_nosignal = TRUE; 16307c478bd9Sstevel@tonic-gate status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL, 16317c478bd9Sstevel@tonic-gate xdr_void, NULL, tv); 16327c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI4_INT)) 16337c478bd9Sstevel@tonic-gate cl->cl_nosignal = FALSE; 16347c478bd9Sstevel@tonic-gate AUTH_DESTROY(cl->cl_auth); 16357c478bd9Sstevel@tonic-gate CLNT_DESTROY(cl); 16367c478bd9Sstevel@tonic-gate if (status == RPC_SUCCESS) { 16377c478bd9Sstevel@tonic-gate nfs4_queue_event(RE_FAILOVER, mi, 16387c478bd9Sstevel@tonic-gate svp == mi->mi_curr_serv ? NULL : 16397c478bd9Sstevel@tonic-gate svp->sv_hostname, 0, NULL, NULL, 0, 16407c478bd9Sstevel@tonic-gate NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 16417c478bd9Sstevel@tonic-gate break; 16427c478bd9Sstevel@tonic-gate } 16437c478bd9Sstevel@tonic-gate } 16447c478bd9Sstevel@tonic-gate 16457c478bd9Sstevel@tonic-gate if (svp == NULL) { 16467c478bd9Sstevel@tonic-gate if (!oncethru) { 16477c478bd9Sstevel@tonic-gate snames = nfs4_getsrvnames(mi, &len); 16487c478bd9Sstevel@tonic-gate nfs4_queue_fact(RF_SRVS_NOT_RESPOND, mi, 16497c478bd9Sstevel@tonic-gate 0, 0, 0, FALSE, snames, 0, NULL); 16507c478bd9Sstevel@tonic-gate oncethru = 1; 16517c478bd9Sstevel@tonic-gate } 16527c478bd9Sstevel@tonic-gate delay(hz); 16537c478bd9Sstevel@tonic-gate } 16547c478bd9Sstevel@tonic-gate } 16557c478bd9Sstevel@tonic-gate 16567c478bd9Sstevel@tonic-gate if (oncethru) { 16577c478bd9Sstevel@tonic-gate nfs4_queue_fact(RF_SRVS_OK, mi, 0, 0, 0, FALSE, snames, 16587c478bd9Sstevel@tonic-gate 0, NULL); 16597c478bd9Sstevel@tonic-gate kmem_free(snames, len); 16607c478bd9Sstevel@tonic-gate } 16617c478bd9Sstevel@tonic-gate 16627c478bd9Sstevel@tonic-gate #if DEBUG 16637c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 16647c478bd9Sstevel@tonic-gate ASSERT((svp->sv_flags & SV4_NOTINUSE) == 0); 16657c478bd9Sstevel@tonic-gate nfs_rw_exit(&svp->sv_lock); 16667c478bd9Sstevel@tonic-gate #endif 16677c478bd9Sstevel@tonic-gate 16687c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 16697c478bd9Sstevel@tonic-gate mi->mi_recovflags &= ~MI4R_NEED_NEW_SERVER; 16707c478bd9Sstevel@tonic-gate if (svp != mi->mi_curr_serv) { 16717c478bd9Sstevel@tonic-gate servinfo4_t *osvp = mi->mi_curr_serv; 16727c478bd9Sstevel@tonic-gate 16737c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 16747c478bd9Sstevel@tonic-gate 16757c478bd9Sstevel@tonic-gate /* 16767c478bd9Sstevel@tonic-gate * Update server-dependent fields in the root vnode. 16777c478bd9Sstevel@tonic-gate */ 16787c478bd9Sstevel@tonic-gate index = rtable4hash(mi->mi_rootfh); 16797c478bd9Sstevel@tonic-gate rw_enter(&rtable4[index].r_lock, RW_WRITER); 16807c478bd9Sstevel@tonic-gate 16817c478bd9Sstevel@tonic-gate rp = r4find(&rtable4[index], mi->mi_rootfh, mi->mi_vfsp); 16827c478bd9Sstevel@tonic-gate if (rp != NULL) { 16837c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 16847c478bd9Sstevel@tonic-gate "recov_newserver: remapping %s", rnode4info(rp))); 16857c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 16867c478bd9Sstevel@tonic-gate rp->r_server = svp; 16877c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE4_LOCKED(rp); 16887c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 16897c478bd9Sstevel@tonic-gate (void) nfs4_free_data_reclaim(rp); 16907c478bd9Sstevel@tonic-gate nfs4_purge_rddir_cache(RTOV4(rp)); 16917c478bd9Sstevel@tonic-gate rw_exit(&rtable4[index].r_lock); 16927c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 16937c478bd9Sstevel@tonic-gate "recov_newserver: done with %s", 16947c478bd9Sstevel@tonic-gate rnode4info(rp))); 16957c478bd9Sstevel@tonic-gate VN_RELE(RTOV4(rp)); 16967c478bd9Sstevel@tonic-gate } else 16977c478bd9Sstevel@tonic-gate rw_exit(&rtable4[index].r_lock); 16987c478bd9Sstevel@tonic-gate (void) dnlc_purge_vfsp(mi->mi_vfsp, 0); 16997c478bd9Sstevel@tonic-gate 17007c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 17017c478bd9Sstevel@tonic-gate mi->mi_recovflags |= MI4R_REOPEN_FILES | MI4R_REMAP_FILES; 17027c478bd9Sstevel@tonic-gate if (recovp->rc_srv_reboot) 17037c478bd9Sstevel@tonic-gate mi->mi_recovflags |= MI4R_SRV_REBOOT; 17047c478bd9Sstevel@tonic-gate mi->mi_curr_serv = svp; 17057c478bd9Sstevel@tonic-gate mi->mi_failover++; 17067c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI4_BADOWNER_DEBUG; 17077c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 17087c478bd9Sstevel@tonic-gate 17097c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 17107c478bd9Sstevel@tonic-gate fh.nfs_fh4_len = svp->sv_fhandle.fh_len; 17117c478bd9Sstevel@tonic-gate fh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 17127c478bd9Sstevel@tonic-gate sfh4_update(mi->mi_rootfh, &fh); 17137c478bd9Sstevel@tonic-gate fh.nfs_fh4_len = svp->sv_pfhandle.fh_len; 17147c478bd9Sstevel@tonic-gate fh.nfs_fh4_val = svp->sv_pfhandle.fh_buf; 17157c478bd9Sstevel@tonic-gate sfh4_update(mi->mi_srvparentfh, &fh); 17167c478bd9Sstevel@tonic-gate nfs_rw_exit(&svp->sv_lock); 17177c478bd9Sstevel@tonic-gate 17187c478bd9Sstevel@tonic-gate *spp = nfs4_move_mi(mi, osvp, svp); 17197c478bd9Sstevel@tonic-gate if (osp != NULL) 17207c478bd9Sstevel@tonic-gate nfs4_server_rele(osp); 17217c478bd9Sstevel@tonic-gate } else 17227c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 17237c478bd9Sstevel@tonic-gate (void) nfs_rw_exit(&mi->mi_recovlock); 17247c478bd9Sstevel@tonic-gate } 17257c478bd9Sstevel@tonic-gate 17267c478bd9Sstevel@tonic-gate /* 17277c478bd9Sstevel@tonic-gate * Clientid. 17287c478bd9Sstevel@tonic-gate */ 17297c478bd9Sstevel@tonic-gate 17307c478bd9Sstevel@tonic-gate static void 17317c478bd9Sstevel@tonic-gate recov_clientid(recov_info_t *recovp, nfs4_server_t *sp) 17327c478bd9Sstevel@tonic-gate { 17337c478bd9Sstevel@tonic-gate mntinfo4_t *mi = recovp->rc_mi; 17347c478bd9Sstevel@tonic-gate int error = 0; 17357c478bd9Sstevel@tonic-gate int still_stale; 17367c478bd9Sstevel@tonic-gate int need_new_s; 17377c478bd9Sstevel@tonic-gate 17387c478bd9Sstevel@tonic-gate ASSERT(sp != NULL); 17397c478bd9Sstevel@tonic-gate 17407c478bd9Sstevel@tonic-gate /* 17417c478bd9Sstevel@tonic-gate * Acquire the recovery lock and then verify that the clientid 17427c478bd9Sstevel@tonic-gate * still needs to be recovered. (Note that s_recovlock is supposed 17437c478bd9Sstevel@tonic-gate * to be acquired before s_lock.) Since the thread holds the 17447c478bd9Sstevel@tonic-gate * recovery lock, no other thread will recover the clientid. 17457c478bd9Sstevel@tonic-gate */ 17467c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&sp->s_recovlock, RW_WRITER, 0); 17477c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_WRITER, 0); 17487c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock); 17497c478bd9Sstevel@tonic-gate still_stale = ((sp->s_flags & N4S_CLIENTID_SET) == 0); 17507c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 17517c478bd9Sstevel@tonic-gate 17527c478bd9Sstevel@tonic-gate if (still_stale) { 17537c478bd9Sstevel@tonic-gate nfs4_error_t n4e; 17547c478bd9Sstevel@tonic-gate 17557c478bd9Sstevel@tonic-gate nfs4_error_zinit(&n4e); 17567c478bd9Sstevel@tonic-gate nfs4setclientid(mi, kcred, TRUE, &n4e); 17577c478bd9Sstevel@tonic-gate error = n4e.error; 17587c478bd9Sstevel@tonic-gate if (error != 0) { 17597c478bd9Sstevel@tonic-gate 17607c478bd9Sstevel@tonic-gate /* 17617c478bd9Sstevel@tonic-gate * nfs4setclientid may have set MI4R_NEED_NEW_SERVER, 17627c478bd9Sstevel@tonic-gate * if so, just return and let recov_thread drive 17637c478bd9Sstevel@tonic-gate * failover. 17647c478bd9Sstevel@tonic-gate */ 17657c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 17667c478bd9Sstevel@tonic-gate need_new_s = mi->mi_recovflags & MI4R_NEED_NEW_SERVER; 17677c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 17687c478bd9Sstevel@tonic-gate 17697c478bd9Sstevel@tonic-gate if (need_new_s) { 17707c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_recovlock); 17717c478bd9Sstevel@tonic-gate nfs_rw_exit(&sp->s_recovlock); 17727c478bd9Sstevel@tonic-gate return; 17737c478bd9Sstevel@tonic-gate } 17747c478bd9Sstevel@tonic-gate 17757c478bd9Sstevel@tonic-gate nfs4_queue_event(RE_CLIENTID, mi, NULL, n4e.error, NULL, 17767c478bd9Sstevel@tonic-gate NULL, n4e.stat, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 17777c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 17787c478bd9Sstevel@tonic-gate mi->mi_flags |= MI4_RECOV_FAIL; 17797c478bd9Sstevel@tonic-gate mi->mi_error = recovp->rc_error; 17807c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 17817c478bd9Sstevel@tonic-gate /* don't destroy the nfs4_server, let umount do it */ 17827c478bd9Sstevel@tonic-gate } 17837c478bd9Sstevel@tonic-gate } 17847c478bd9Sstevel@tonic-gate 17857c478bd9Sstevel@tonic-gate if (error == 0) { 17867c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 17877c478bd9Sstevel@tonic-gate mi->mi_recovflags &= ~MI4R_NEED_CLIENTID; 17887c478bd9Sstevel@tonic-gate /* 17897c478bd9Sstevel@tonic-gate * If still_stale isn't true, then another thread already 17907c478bd9Sstevel@tonic-gate * recovered the clientid. And that thread that set the 17917c478bd9Sstevel@tonic-gate * clientid will have initiated reopening files on all the 17927c478bd9Sstevel@tonic-gate * filesystems for the server, so we should not initiate 17937c478bd9Sstevel@tonic-gate * reopening for this filesystem here. 17947c478bd9Sstevel@tonic-gate */ 17957c478bd9Sstevel@tonic-gate if (still_stale) { 17967c478bd9Sstevel@tonic-gate mi->mi_recovflags |= MI4R_REOPEN_FILES; 17977c478bd9Sstevel@tonic-gate if (recovp->rc_srv_reboot) 17987c478bd9Sstevel@tonic-gate mi->mi_recovflags |= MI4R_SRV_REBOOT; 17997c478bd9Sstevel@tonic-gate } 18007c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 18017c478bd9Sstevel@tonic-gate } 18027c478bd9Sstevel@tonic-gate 18037c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_recovlock); 18047c478bd9Sstevel@tonic-gate 18057c478bd9Sstevel@tonic-gate if (error != 0) { 18067c478bd9Sstevel@tonic-gate nfs_rw_exit(&sp->s_recovlock); 18077c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 18087c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI4_RECOV_FAIL) == 0) 18097c478bd9Sstevel@tonic-gate delay(SEC_TO_TICK(recov_err_delay)); 18107c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 18117c478bd9Sstevel@tonic-gate } else { 18127c478bd9Sstevel@tonic-gate mntinfo4_t **milist; 18137c478bd9Sstevel@tonic-gate mntinfo4_t *tmi; 18147c478bd9Sstevel@tonic-gate int nummi, i; 18157c478bd9Sstevel@tonic-gate 18167c478bd9Sstevel@tonic-gate /* 18177c478bd9Sstevel@tonic-gate * Initiate recovery of open files for other filesystems. 18187c478bd9Sstevel@tonic-gate * We create an array of filesystems, rather than just 18197c478bd9Sstevel@tonic-gate * walking the filesystem list, to avoid deadlock issues 18207c478bd9Sstevel@tonic-gate * with s_lock and mi_recovlock. 18217c478bd9Sstevel@tonic-gate */ 18227c478bd9Sstevel@tonic-gate milist = make_milist(sp, &nummi); 18237c478bd9Sstevel@tonic-gate for (i = 0; i < nummi; i++) { 18247c478bd9Sstevel@tonic-gate tmi = milist[i]; 18257c478bd9Sstevel@tonic-gate if (tmi != mi) { 18267c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&tmi->mi_recovlock, 18277c478bd9Sstevel@tonic-gate RW_READER, 0); 18287c478bd9Sstevel@tonic-gate start_recovery_action(NR_OPENFILES, TRUE, tmi, 18297c478bd9Sstevel@tonic-gate NULL, NULL); 18307c478bd9Sstevel@tonic-gate nfs_rw_exit(&tmi->mi_recovlock); 18317c478bd9Sstevel@tonic-gate } 18327c478bd9Sstevel@tonic-gate } 18337c478bd9Sstevel@tonic-gate free_milist(milist, nummi); 18347c478bd9Sstevel@tonic-gate 18357c478bd9Sstevel@tonic-gate nfs_rw_exit(&sp->s_recovlock); 18367c478bd9Sstevel@tonic-gate } 18377c478bd9Sstevel@tonic-gate } 18387c478bd9Sstevel@tonic-gate 18397c478bd9Sstevel@tonic-gate /* 18407c478bd9Sstevel@tonic-gate * Return an array of filesystems associated with the given server. The 18417c478bd9Sstevel@tonic-gate * caller should call free_milist() to free the references and memory. 18427c478bd9Sstevel@tonic-gate */ 18437c478bd9Sstevel@tonic-gate 18447c478bd9Sstevel@tonic-gate static mntinfo4_t ** 18457c478bd9Sstevel@tonic-gate make_milist(nfs4_server_t *sp, int *nummip) 18467c478bd9Sstevel@tonic-gate { 18477c478bd9Sstevel@tonic-gate int nummi, i; 18487c478bd9Sstevel@tonic-gate mntinfo4_t **milist; 18497c478bd9Sstevel@tonic-gate mntinfo4_t *tmi; 18507c478bd9Sstevel@tonic-gate 18517c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock); 18527c478bd9Sstevel@tonic-gate nummi = 0; 18537c478bd9Sstevel@tonic-gate for (tmi = sp->mntinfo4_list; tmi != NULL; tmi = tmi->mi_clientid_next) 18547c478bd9Sstevel@tonic-gate nummi++; 18557c478bd9Sstevel@tonic-gate 185638e20649Sjwahlig milist = kmem_alloc(nummi * sizeof (mntinfo4_t *), KM_SLEEP); 18577c478bd9Sstevel@tonic-gate 18587c478bd9Sstevel@tonic-gate for (i = 0, tmi = sp->mntinfo4_list; tmi != NULL; i++, 18597c478bd9Sstevel@tonic-gate tmi = tmi->mi_clientid_next) { 18607c478bd9Sstevel@tonic-gate milist[i] = tmi; 18617c478bd9Sstevel@tonic-gate VFS_HOLD(tmi->mi_vfsp); 18627c478bd9Sstevel@tonic-gate } 18637c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 18647c478bd9Sstevel@tonic-gate 18657c478bd9Sstevel@tonic-gate *nummip = nummi; 18667c478bd9Sstevel@tonic-gate return (milist); 18677c478bd9Sstevel@tonic-gate } 18687c478bd9Sstevel@tonic-gate 18697c478bd9Sstevel@tonic-gate /* 18707c478bd9Sstevel@tonic-gate * Free the filesystem list created by make_milist(). 18717c478bd9Sstevel@tonic-gate */ 18727c478bd9Sstevel@tonic-gate 18737c478bd9Sstevel@tonic-gate static void 18747c478bd9Sstevel@tonic-gate free_milist(mntinfo4_t **milist, int nummi) 18757c478bd9Sstevel@tonic-gate { 18767c478bd9Sstevel@tonic-gate mntinfo4_t *tmi; 18777c478bd9Sstevel@tonic-gate int i; 18787c478bd9Sstevel@tonic-gate 18797c478bd9Sstevel@tonic-gate for (i = 0; i < nummi; i++) { 18807c478bd9Sstevel@tonic-gate tmi = milist[i]; 18817c478bd9Sstevel@tonic-gate VFS_RELE(tmi->mi_vfsp); 18827c478bd9Sstevel@tonic-gate } 18837c478bd9Sstevel@tonic-gate kmem_free(milist, nummi * sizeof (mntinfo4_t *)); 18847c478bd9Sstevel@tonic-gate } 18857c478bd9Sstevel@tonic-gate 18867c478bd9Sstevel@tonic-gate /* 18877c478bd9Sstevel@tonic-gate * Filehandle 18887c478bd9Sstevel@tonic-gate */ 18897c478bd9Sstevel@tonic-gate 18907c478bd9Sstevel@tonic-gate /* 18917c478bd9Sstevel@tonic-gate * Lookup the filehandle for the given vnode and update the rnode if it has 18927c478bd9Sstevel@tonic-gate * changed. 18937c478bd9Sstevel@tonic-gate * 18947c478bd9Sstevel@tonic-gate * Errors: 18957c478bd9Sstevel@tonic-gate * - if the filehandle could not be updated because of an error that 18967c478bd9Sstevel@tonic-gate * requires further recovery, initiate that recovery and return. 18977c478bd9Sstevel@tonic-gate * - if the filehandle could not be updated because of a signal, pretend we 18987c478bd9Sstevel@tonic-gate * succeeded and let someone else deal with it. 18997c478bd9Sstevel@tonic-gate * - if the filehandle could not be updated and the filesystem has been 19007c478bd9Sstevel@tonic-gate * forcibly unmounted, pretend we succeeded, and let the caller deal with 19017c478bd9Sstevel@tonic-gate * the forced unmount (to retry or not to retry, that is the question). 19027c478bd9Sstevel@tonic-gate * - if the filehandle could not be updated because of some other error, 19037c478bd9Sstevel@tonic-gate * mark the rnode bad and return. 19047c478bd9Sstevel@tonic-gate */ 19057c478bd9Sstevel@tonic-gate static void 19067c478bd9Sstevel@tonic-gate recov_filehandle(nfs4_recov_t action, mntinfo4_t *mi, vnode_t *vp) 19077c478bd9Sstevel@tonic-gate { 19087c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(vp); 19097c478bd9Sstevel@tonic-gate nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 19107c478bd9Sstevel@tonic-gate bool_t needrecov; 19117c478bd9Sstevel@tonic-gate 19127c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 19137c478bd9Sstevel@tonic-gate 19147c478bd9Sstevel@tonic-gate if (rp->r_flags & R4RECOVERR) { 19157c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 19167c478bd9Sstevel@tonic-gate return; 19177c478bd9Sstevel@tonic-gate } 19187c478bd9Sstevel@tonic-gate 19197c478bd9Sstevel@tonic-gate /* 19207c478bd9Sstevel@tonic-gate * If someone else is updating the filehandle, wait for them to 19217c478bd9Sstevel@tonic-gate * finish and then let our caller retry. 19227c478bd9Sstevel@tonic-gate */ 19237c478bd9Sstevel@tonic-gate if (rp->r_flags & R4RECEXPFH) { 19247c478bd9Sstevel@tonic-gate while (rp->r_flags & R4RECEXPFH) { 19257c478bd9Sstevel@tonic-gate cv_wait(&rp->r_cv, &rp->r_statelock); 19267c478bd9Sstevel@tonic-gate } 19277c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 19287c478bd9Sstevel@tonic-gate return; 19297c478bd9Sstevel@tonic-gate } 19307c478bd9Sstevel@tonic-gate rp->r_flags |= R4RECEXPFH; 19317c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 19327c478bd9Sstevel@tonic-gate 19337c478bd9Sstevel@tonic-gate if (action == NR_BADHANDLE) { 19347c478bd9Sstevel@tonic-gate /* shouldn't happen */ 19357c478bd9Sstevel@tonic-gate nfs4_queue_event(RE_BADHANDLE, mi, NULL, 0, 19367c478bd9Sstevel@tonic-gate vp, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 19377c478bd9Sstevel@tonic-gate } 19387c478bd9Sstevel@tonic-gate 19397c478bd9Sstevel@tonic-gate nfs4_remap_file(mi, vp, 0, &e); 19407c478bd9Sstevel@tonic-gate needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp); 19417c478bd9Sstevel@tonic-gate 19427c478bd9Sstevel@tonic-gate /* 1943ddbc368aSRick Mesta * If we get BADHANDLE, FHEXPIRED or STALE in their handler, 1944ddbc368aSRick Mesta * something is broken. Don't try to recover, just mark the 1945ddbc368aSRick Mesta * file dead. 19467c478bd9Sstevel@tonic-gate */ 1947ddbc368aSRick Mesta DTRACE_PROBE2(recov__filehandle, nfs4_error_t, &e, vnode_t, vp); 19487c478bd9Sstevel@tonic-gate if (needrecov) { 1949ddbc368aSRick Mesta if (e.error == 0) { 1950ddbc368aSRick Mesta switch (e.stat) { 1951ddbc368aSRick Mesta case NFS4ERR_BADHANDLE: 1952ddbc368aSRick Mesta case NFS4ERR_FHEXPIRED: 1953ddbc368aSRick Mesta case NFS4ERR_STALE: 1954ddbc368aSRick Mesta goto norec; /* Unrecoverable errors */ 1955ddbc368aSRick Mesta default: 1956ddbc368aSRick Mesta break; 1957ddbc368aSRick Mesta } 1958ddbc368aSRick Mesta } 1959ddbc368aSRick Mesta (void) nfs4_start_recovery(&e, mi, vp, NULL, 1960ddbc368aSRick Mesta NULL, NULL, OP_LOOKUP, NULL, NULL, NULL); 1961ddbc368aSRick Mesta 19627c478bd9Sstevel@tonic-gate } else if (e.error != EINTR && 19637c478bd9Sstevel@tonic-gate !NFS4_FRC_UNMT_ERR(e.error, mi->mi_vfsp) && 19647c478bd9Sstevel@tonic-gate (e.error != 0 || e.stat != NFS4_OK)) { 19657c478bd9Sstevel@tonic-gate nfs4_recov_fh_fail(vp, e.error, e.stat); 19667c478bd9Sstevel@tonic-gate /* 19677c478bd9Sstevel@tonic-gate * Don't set r_error to ESTALE. Higher-level code (e.g., 19687c478bd9Sstevel@tonic-gate * cstatat_getvp()) retries on ESTALE, which would cause 19697c478bd9Sstevel@tonic-gate * an infinite loop. 19707c478bd9Sstevel@tonic-gate */ 19717c478bd9Sstevel@tonic-gate } 1972ddbc368aSRick Mesta norec: 19737c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 19747c478bd9Sstevel@tonic-gate rp->r_flags &= ~R4RECEXPFH; 19757c478bd9Sstevel@tonic-gate cv_broadcast(&rp->r_cv); 19767c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 19777c478bd9Sstevel@tonic-gate } 19787c478bd9Sstevel@tonic-gate 19797c478bd9Sstevel@tonic-gate /* 19807c478bd9Sstevel@tonic-gate * Stale Filehandle 19817c478bd9Sstevel@tonic-gate */ 19827c478bd9Sstevel@tonic-gate 19837c478bd9Sstevel@tonic-gate /* 19847c478bd9Sstevel@tonic-gate * A stale filehandle can happen when an individual file has 19857c478bd9Sstevel@tonic-gate * been removed, or when an entire filesystem has been taken 19867c478bd9Sstevel@tonic-gate * offline. To distinguish these cases, we do this: 19877c478bd9Sstevel@tonic-gate * - if a GETATTR with the current filehandle is okay, we do 19887c478bd9Sstevel@tonic-gate * nothing (this can happen with two-filehandle ops) 19897c478bd9Sstevel@tonic-gate * - if the GETATTR fails, but a GETATTR of the root filehandle 19907c478bd9Sstevel@tonic-gate * succeeds, mark the rnode with R4STALE, which will stop use 19917c478bd9Sstevel@tonic-gate * - if the GETATTR fails, and a GETATTR of the root filehandle 19927c478bd9Sstevel@tonic-gate * also fails, we consider the problem filesystem-wide, so: 19937c478bd9Sstevel@tonic-gate * - if we can failover, we should 19947c478bd9Sstevel@tonic-gate * - if we can't failover, we should mark both the original 19957c478bd9Sstevel@tonic-gate * vnode and the root bad 19967c478bd9Sstevel@tonic-gate */ 19977c478bd9Sstevel@tonic-gate static void 19987c478bd9Sstevel@tonic-gate recov_stale(mntinfo4_t *mi, vnode_t *vp) 19997c478bd9Sstevel@tonic-gate { 20007c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(vp); 20017c478bd9Sstevel@tonic-gate vnode_t *rootvp = NULL; 20027c478bd9Sstevel@tonic-gate nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 20037c478bd9Sstevel@tonic-gate nfs4_ga_res_t gar; 20047c478bd9Sstevel@tonic-gate char *fail_msg = "failed to recover from NFS4ERR_STALE"; 20057c478bd9Sstevel@tonic-gate bool_t needrecov; 20067c478bd9Sstevel@tonic-gate 20077c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 20087c478bd9Sstevel@tonic-gate 20097c478bd9Sstevel@tonic-gate if (rp->r_flags & R4RECOVERR) { 20107c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 20117c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 20127c478bd9Sstevel@tonic-gate "recov_stale: already marked dead, rp %s", 20137c478bd9Sstevel@tonic-gate rnode4info(rp))); 20147c478bd9Sstevel@tonic-gate return; 20157c478bd9Sstevel@tonic-gate } 20167c478bd9Sstevel@tonic-gate 20177c478bd9Sstevel@tonic-gate if (rp->r_flags & R4STALE) { 20187c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 20197c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 20207c478bd9Sstevel@tonic-gate "recov_stale: already marked stale, rp %s", 20217c478bd9Sstevel@tonic-gate rnode4info(rp))); 20227c478bd9Sstevel@tonic-gate return; 20237c478bd9Sstevel@tonic-gate } 20247c478bd9Sstevel@tonic-gate 20257c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 20267c478bd9Sstevel@tonic-gate 20277c478bd9Sstevel@tonic-gate /* Try a GETATTR on this vnode */ 20287c478bd9Sstevel@tonic-gate nfs4_getattr_otw_norecovery(vp, &gar, &e, CRED(), 0); 20297c478bd9Sstevel@tonic-gate 20307c478bd9Sstevel@tonic-gate /* 20317c478bd9Sstevel@tonic-gate * Handle non-STALE recoverable errors 20327c478bd9Sstevel@tonic-gate */ 20337c478bd9Sstevel@tonic-gate needrecov = nfs4_needs_recovery(&e, FALSE, vp->v_vfsp); 2034ddbc368aSRick Mesta if (needrecov) { 2035ddbc368aSRick Mesta if (e.error == 0) { 2036ddbc368aSRick Mesta switch (e.stat) { 2037ddbc368aSRick Mesta case NFS4ERR_STALE: 2038ddbc368aSRick Mesta case NFS4ERR_BADHANDLE: 2039ddbc368aSRick Mesta goto norec; /* Unrecoverable */ 2040ddbc368aSRick Mesta default: 2041ddbc368aSRick Mesta break; 2042ddbc368aSRick Mesta } 2043ddbc368aSRick Mesta } 2044ddbc368aSRick Mesta (void) nfs4_start_recovery(&e, mi, vp, NULL, 2045ddbc368aSRick Mesta NULL, NULL, OP_GETATTR, NULL, NULL, NULL); 20467c478bd9Sstevel@tonic-gate goto out; 20477c478bd9Sstevel@tonic-gate } 2048ddbc368aSRick Mesta norec: 20497c478bd9Sstevel@tonic-gate /* Are things OK for this vnode? */ 20507c478bd9Sstevel@tonic-gate if (!e.error && e.stat == NFS4_OK) { 20517c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 20527c478bd9Sstevel@tonic-gate "recov_stale: file appears fine, rp %s", 20537c478bd9Sstevel@tonic-gate rnode4info(rp))); 20547c478bd9Sstevel@tonic-gate goto out; 20557c478bd9Sstevel@tonic-gate } 20567c478bd9Sstevel@tonic-gate 20577c478bd9Sstevel@tonic-gate /* Did we get an unrelated non-recoverable error? */ 20587c478bd9Sstevel@tonic-gate if (e.error || e.stat != NFS4ERR_STALE) { 20597c478bd9Sstevel@tonic-gate nfs4_fail_recov(vp, fail_msg, e.error, e.stat); 20607c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 20617c478bd9Sstevel@tonic-gate "recov_stale: unrelated fatal error, rp %s", 20627c478bd9Sstevel@tonic-gate rnode4info(rp))); 20637c478bd9Sstevel@tonic-gate goto out; 20647c478bd9Sstevel@tonic-gate } 20657c478bd9Sstevel@tonic-gate 20667c478bd9Sstevel@tonic-gate /* 20677c478bd9Sstevel@tonic-gate * If we don't appear to be dealing with the root node, find it. 20687c478bd9Sstevel@tonic-gate */ 20697c478bd9Sstevel@tonic-gate if ((vp->v_flag & VROOT) == 0) { 20707c478bd9Sstevel@tonic-gate nfs4_error_zinit(&e); 20717c478bd9Sstevel@tonic-gate e.error = VFS_ROOT(vp->v_vfsp, &rootvp); 20727c478bd9Sstevel@tonic-gate if (e.error) { 20737c478bd9Sstevel@tonic-gate nfs4_fail_recov(vp, fail_msg, 0, NFS4ERR_STALE); 20747c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 20757c478bd9Sstevel@tonic-gate "recov_stale: can't find root node for rp %s", 20767c478bd9Sstevel@tonic-gate rnode4info(rp))); 20777c478bd9Sstevel@tonic-gate goto out; 20787c478bd9Sstevel@tonic-gate } 20797c478bd9Sstevel@tonic-gate } 20807c478bd9Sstevel@tonic-gate 20817c478bd9Sstevel@tonic-gate /* Try a GETATTR on the root vnode */ 20827c478bd9Sstevel@tonic-gate if (rootvp != NULL) { 20837c478bd9Sstevel@tonic-gate nfs4_error_zinit(&e); 20847c478bd9Sstevel@tonic-gate nfs4_getattr_otw_norecovery(rootvp, &gar, &e, CRED(), 0); 20857c478bd9Sstevel@tonic-gate 20867c478bd9Sstevel@tonic-gate needrecov = nfs4_needs_recovery(&e, FALSE, vp->v_vfsp); 20877c478bd9Sstevel@tonic-gate if (needrecov) { 2088ddbc368aSRick Mesta if (e.error == 0) { 2089ddbc368aSRick Mesta switch (e.stat) { 2090ddbc368aSRick Mesta case NFS4ERR_STALE: 2091ddbc368aSRick Mesta case NFS4ERR_BADHANDLE: 2092ddbc368aSRick Mesta goto unrec; /* Unrecoverable */ 2093ddbc368aSRick Mesta default: 2094ddbc368aSRick Mesta break; 20957c478bd9Sstevel@tonic-gate } 20967c478bd9Sstevel@tonic-gate } 2097ddbc368aSRick Mesta (void) nfs4_start_recovery(&e, mi, rootvp, NULL, 2098ddbc368aSRick Mesta NULL, NULL, OP_GETATTR, NULL, NULL, NULL); 2099ddbc368aSRick Mesta } 2100ddbc368aSRick Mesta unrec: 21017c478bd9Sstevel@tonic-gate /* 21027c478bd9Sstevel@tonic-gate * Check to see if a failover attempt is warranted 21037c478bd9Sstevel@tonic-gate * NB: nfs4_try_failover doesn't check for STALE 21047c478bd9Sstevel@tonic-gate * because recov_stale gets a shot first. Now that 21057c478bd9Sstevel@tonic-gate * recov_stale has failed, go ahead and try failover. 21067c478bd9Sstevel@tonic-gate * 21077c478bd9Sstevel@tonic-gate * If the getattr on the root filehandle was successful, 21087c478bd9Sstevel@tonic-gate * then mark recovery as failed for 'vp' and exit. 21097c478bd9Sstevel@tonic-gate */ 21107c478bd9Sstevel@tonic-gate if (nfs4_try_failover(&e) == 0 && e.stat != NFS4ERR_STALE) { 21117c478bd9Sstevel@tonic-gate /* 21127c478bd9Sstevel@tonic-gate * pass the original error to fail_recov, not 21137c478bd9Sstevel@tonic-gate * the one from trying the root vnode. 21147c478bd9Sstevel@tonic-gate */ 21157c478bd9Sstevel@tonic-gate nfs4_fail_recov(vp, fail_msg, 0, NFS4ERR_STALE); 21167c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 21177c478bd9Sstevel@tonic-gate "recov_stale: root node OK, marking " 21187c478bd9Sstevel@tonic-gate "dead rp %s", rnode4info(rp))); 21197c478bd9Sstevel@tonic-gate goto out; 21207c478bd9Sstevel@tonic-gate } 21217c478bd9Sstevel@tonic-gate } 21227c478bd9Sstevel@tonic-gate 21237c478bd9Sstevel@tonic-gate /* 21247c478bd9Sstevel@tonic-gate * Here, we know that both the original file and the 21257c478bd9Sstevel@tonic-gate * root filehandle (which may be the same) are stale. 21267c478bd9Sstevel@tonic-gate * We want to fail over if we can, and if we can't, we 21277c478bd9Sstevel@tonic-gate * want to mark everything in sight bad. 21287c478bd9Sstevel@tonic-gate */ 21297c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT4(mi)) { 21307c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 21317c478bd9Sstevel@tonic-gate mi->mi_recovflags |= MI4R_NEED_NEW_SERVER; 21327c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 21337c478bd9Sstevel@tonic-gate "recov_stale: failing over due to rp %s", 21347c478bd9Sstevel@tonic-gate rnode4info(rp))); 21357c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 21367c478bd9Sstevel@tonic-gate } else { 21377c478bd9Sstevel@tonic-gate rnode4_t *rootrp; 21387c478bd9Sstevel@tonic-gate servinfo4_t *svp; 21397c478bd9Sstevel@tonic-gate 21407c478bd9Sstevel@tonic-gate /* 21417c478bd9Sstevel@tonic-gate * Can't fail over, so mark things dead. 21427c478bd9Sstevel@tonic-gate * 21437c478bd9Sstevel@tonic-gate * If rootvp is set, we know we have a distinct 21447c478bd9Sstevel@tonic-gate * non-root vnode which can be marked dead in 21457c478bd9Sstevel@tonic-gate * the usual way. 21467c478bd9Sstevel@tonic-gate * 21477c478bd9Sstevel@tonic-gate * Then we want to mark the root vnode dead. 21487c478bd9Sstevel@tonic-gate * Note that if rootvp wasn't set, our vp is 21497c478bd9Sstevel@tonic-gate * actually the root vnode. 21507c478bd9Sstevel@tonic-gate */ 21517c478bd9Sstevel@tonic-gate if (rootvp != NULL) { 21527c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 21537c478bd9Sstevel@tonic-gate "recov_stale: can't fail over, marking dead rp %s", 21547c478bd9Sstevel@tonic-gate rnode4info(rp))); 21557c478bd9Sstevel@tonic-gate nfs4_fail_recov(vp, fail_msg, 0, NFS4ERR_STALE); 21567c478bd9Sstevel@tonic-gate } else { 21577c478bd9Sstevel@tonic-gate rootvp = vp; 21587c478bd9Sstevel@tonic-gate VN_HOLD(rootvp); 21597c478bd9Sstevel@tonic-gate } 21607c478bd9Sstevel@tonic-gate 21617c478bd9Sstevel@tonic-gate /* 21627c478bd9Sstevel@tonic-gate * Mark root dead, but quietly - since 21637c478bd9Sstevel@tonic-gate * the root rnode is frequently recreated, 21647c478bd9Sstevel@tonic-gate * we can encounter this at every access. 21657c478bd9Sstevel@tonic-gate * Also mark recovery as failed on this VFS. 21667c478bd9Sstevel@tonic-gate */ 21677c478bd9Sstevel@tonic-gate rootrp = VTOR4(rootvp); 21687c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_CONT, 21697c478bd9Sstevel@tonic-gate "recov_stale: marking dead root rp %s", 21707c478bd9Sstevel@tonic-gate rnode4info(rootrp))); 21717c478bd9Sstevel@tonic-gate mutex_enter(&rootrp->r_statelock); 21727c478bd9Sstevel@tonic-gate rootrp->r_flags |= (R4RECOVERR | R4STALE); 21737c478bd9Sstevel@tonic-gate rootrp->r_error = ESTALE; 21747c478bd9Sstevel@tonic-gate mutex_exit(&rootrp->r_statelock); 21757c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 21767c478bd9Sstevel@tonic-gate mi->mi_error = ESTALE; 21777c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 21787c478bd9Sstevel@tonic-gate 21797c478bd9Sstevel@tonic-gate svp = mi->mi_curr_serv; 21807c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 21817c478bd9Sstevel@tonic-gate svp->sv_flags |= SV4_ROOT_STALE; 21827c478bd9Sstevel@tonic-gate nfs_rw_exit(&svp->sv_lock); 21837c478bd9Sstevel@tonic-gate } 21847c478bd9Sstevel@tonic-gate 21857c478bd9Sstevel@tonic-gate out: 21867c478bd9Sstevel@tonic-gate if (rootvp) 21877c478bd9Sstevel@tonic-gate VN_RELE(rootvp); 21887c478bd9Sstevel@tonic-gate } 21897c478bd9Sstevel@tonic-gate 21907c478bd9Sstevel@tonic-gate /* 21917c478bd9Sstevel@tonic-gate * Locks. 21927c478bd9Sstevel@tonic-gate */ 21937c478bd9Sstevel@tonic-gate 21947c478bd9Sstevel@tonic-gate /* 21957c478bd9Sstevel@tonic-gate * Reclaim all the active (acquired) locks for the given file. 21967c478bd9Sstevel@tonic-gate * If a process lost a lock, the process is sent a SIGLOST. This is not 21977c478bd9Sstevel@tonic-gate * considered an error. 21987c478bd9Sstevel@tonic-gate * 21997c478bd9Sstevel@tonic-gate * Return values: 22007c478bd9Sstevel@tonic-gate * Errors and status are returned via the nfs4_error_t parameter 22017c478bd9Sstevel@tonic-gate * If an error indicates that recovery is needed, the caller is responsible 22027c478bd9Sstevel@tonic-gate * for dealing with it. 22037c478bd9Sstevel@tonic-gate */ 22047c478bd9Sstevel@tonic-gate 22057c478bd9Sstevel@tonic-gate static void 22067c478bd9Sstevel@tonic-gate relock_file(vnode_t *vp, mntinfo4_t *mi, nfs4_error_t *ep, 22077c478bd9Sstevel@tonic-gate fattr4_change pre_change) 22087c478bd9Sstevel@tonic-gate { 22097c478bd9Sstevel@tonic-gate locklist_t *locks, *llp; 22107c478bd9Sstevel@tonic-gate rnode4_t *rp; 22117c478bd9Sstevel@tonic-gate 22127c478bd9Sstevel@tonic-gate ASSERT(ep != NULL); 22137c478bd9Sstevel@tonic-gate nfs4_error_zinit(ep); 22147c478bd9Sstevel@tonic-gate 22157c478bd9Sstevel@tonic-gate if (VTOMI4(vp)->mi_flags & MI4_LLOCK) 22167c478bd9Sstevel@tonic-gate return; 22177c478bd9Sstevel@tonic-gate 22187c478bd9Sstevel@tonic-gate nfs4_flush_lock_owners(VTOR4(vp)); 22197c478bd9Sstevel@tonic-gate 22207c478bd9Sstevel@tonic-gate /* 22217c478bd9Sstevel@tonic-gate * If we get an error that requires recovery actions, just bail out 22227c478bd9Sstevel@tonic-gate * and let the top-level recovery code handle it. 22237c478bd9Sstevel@tonic-gate * 22247c478bd9Sstevel@tonic-gate * If we get some other error, kill the process that owned the lock 22257c478bd9Sstevel@tonic-gate * and mark its remaining locks (if any) as belonging to NOPID, so 22267c478bd9Sstevel@tonic-gate * that we don't make any more reclaim requests for that process. 22277c478bd9Sstevel@tonic-gate */ 22287c478bd9Sstevel@tonic-gate 22297c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 22307c478bd9Sstevel@tonic-gate locks = flk_active_locks_for_vp(vp); 22317c478bd9Sstevel@tonic-gate for (llp = locks; llp != NULL; llp = llp->ll_next) { 22327c478bd9Sstevel@tonic-gate int did_reclaim = 1; 22337c478bd9Sstevel@tonic-gate 22347c478bd9Sstevel@tonic-gate ASSERT(llp->ll_vp == vp); 22357c478bd9Sstevel@tonic-gate if (llp->ll_flock.l_pid == NOPID) 22367c478bd9Sstevel@tonic-gate continue; 22377c478bd9Sstevel@tonic-gate reclaim_one_lock(vp, &llp->ll_flock, ep, &did_reclaim); 22387c478bd9Sstevel@tonic-gate /* 22397c478bd9Sstevel@tonic-gate * If we need to restart recovery, stop processing the 22407c478bd9Sstevel@tonic-gate * list. Some errors would be recoverable under other 22417c478bd9Sstevel@tonic-gate * circumstances, but if they happen here we just give up 22427c478bd9Sstevel@tonic-gate * on the lock. 22437c478bd9Sstevel@tonic-gate */ 22447c478bd9Sstevel@tonic-gate if (nfs4_needs_recovery(ep, TRUE, vp->v_vfsp)) { 22457c478bd9Sstevel@tonic-gate if (ep->error != 0) 22467c478bd9Sstevel@tonic-gate break; 22477c478bd9Sstevel@tonic-gate if (!nfs4_recov_marks_dead(ep->stat)) 22487c478bd9Sstevel@tonic-gate break; 22497c478bd9Sstevel@tonic-gate } 22507c478bd9Sstevel@tonic-gate /* 22517c478bd9Sstevel@tonic-gate * In case the server isn't offering us a grace period, or 22527c478bd9Sstevel@tonic-gate * if we missed it, we might have opened & locked from scratch, 22537c478bd9Sstevel@tonic-gate * rather than reopened/reclaimed. 22547c478bd9Sstevel@tonic-gate * We need to ensure that the object hadn't been otherwise 22557c478bd9Sstevel@tonic-gate * changed during this time, by comparing the changeinfo. 22567c478bd9Sstevel@tonic-gate * We get passed the changeinfo from before the reopen by our 22577c478bd9Sstevel@tonic-gate * caller, in pre_change. 22587c478bd9Sstevel@tonic-gate * The changeinfo from after the reopen is in rp->r_change, 22597c478bd9Sstevel@tonic-gate * courtesy of the GETATTR in the reopen. 22607c478bd9Sstevel@tonic-gate * If they're different, then the file has changed, and we 22617c478bd9Sstevel@tonic-gate * have to SIGLOST the app. 22627c478bd9Sstevel@tonic-gate */ 22637c478bd9Sstevel@tonic-gate if (ep->error == 0 && ep->stat == NFS4_OK && !did_reclaim) { 22647c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 22657c478bd9Sstevel@tonic-gate if (pre_change != rp->r_change) 22667c478bd9Sstevel@tonic-gate ep->stat = NFS4ERR_NO_GRACE; 22677c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 22687c478bd9Sstevel@tonic-gate } 22697c478bd9Sstevel@tonic-gate if (ep->error != 0 || ep->stat != NFS4_OK) { 22707c478bd9Sstevel@tonic-gate if (ep->error != 0) 22717c478bd9Sstevel@tonic-gate nfs4_queue_event(RE_FAIL_RELOCK, mi, 22727c478bd9Sstevel@tonic-gate NULL, ep->error, vp, NULL, 0, NULL, 22737c478bd9Sstevel@tonic-gate llp->ll_flock.l_pid, TAG_NONE, TAG_NONE, 22747c478bd9Sstevel@tonic-gate 0, 0); 22757c478bd9Sstevel@tonic-gate else 22767c478bd9Sstevel@tonic-gate nfs4_queue_event(RE_FAIL_RELOCK, mi, 22777c478bd9Sstevel@tonic-gate NULL, 0, vp, NULL, ep->stat, NULL, 22787c478bd9Sstevel@tonic-gate llp->ll_flock.l_pid, TAG_NONE, TAG_NONE, 22797c478bd9Sstevel@tonic-gate 0, 0); 22807c478bd9Sstevel@tonic-gate nfs4_send_siglost(llp->ll_flock.l_pid, mi, vp, TRUE, 22817c478bd9Sstevel@tonic-gate ep->error, ep->stat); 2282ed076bbfSMarcel Telka relock_skip_pid(vp, llp, llp->ll_flock.l_pid); 22837c478bd9Sstevel@tonic-gate 22847c478bd9Sstevel@tonic-gate /* Reinitialize the nfs4_error and continue */ 22857c478bd9Sstevel@tonic-gate nfs4_error_zinit(ep); 22867c478bd9Sstevel@tonic-gate } 22877c478bd9Sstevel@tonic-gate } 22887c478bd9Sstevel@tonic-gate 22897c478bd9Sstevel@tonic-gate if (locks != NULL) 22907c478bd9Sstevel@tonic-gate flk_free_locklist(locks); 22917c478bd9Sstevel@tonic-gate } 22927c478bd9Sstevel@tonic-gate 22937c478bd9Sstevel@tonic-gate /* 22947c478bd9Sstevel@tonic-gate * Reclaim the given lock. 22957c478bd9Sstevel@tonic-gate * 22967c478bd9Sstevel@tonic-gate * Errors are returned via the nfs4_error_t parameter. 22977c478bd9Sstevel@tonic-gate */ 22987c478bd9Sstevel@tonic-gate static void 22997c478bd9Sstevel@tonic-gate reclaim_one_lock(vnode_t *vp, flock64_t *flk, nfs4_error_t *ep, 23007c478bd9Sstevel@tonic-gate int *did_reclaimp) 23017c478bd9Sstevel@tonic-gate { 23027c478bd9Sstevel@tonic-gate cred_t *cr; 23037c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(vp); 23047c478bd9Sstevel@tonic-gate 23057c478bd9Sstevel@tonic-gate cr = pid_to_cr(flk->l_pid); 23067c478bd9Sstevel@tonic-gate if (cr == NULL) { 2307ed076bbfSMarcel Telka nfs4_error_init(ep, ESRCH); 23087c478bd9Sstevel@tonic-gate return; 23097c478bd9Sstevel@tonic-gate } 23107c478bd9Sstevel@tonic-gate 23117c478bd9Sstevel@tonic-gate do { 23127c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 23137c478bd9Sstevel@tonic-gate if (rp->r_flags & R4RECOVERR) { 23147c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 2315ed076bbfSMarcel Telka nfs4_error_init(ep, ESTALE); 23167c478bd9Sstevel@tonic-gate break; 23177c478bd9Sstevel@tonic-gate } 23187c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 23197c478bd9Sstevel@tonic-gate 2320*81d2f16dSMarcel Telka nfs4frlock(NFS4_LCK_CTYPE_RECLAIM, vp, F_SETLK, flk, cr, ep, 2321*81d2f16dSMarcel Telka NULL, did_reclaimp); 23227c478bd9Sstevel@tonic-gate if (ep->error == 0 && ep->stat == NFS4ERR_FHEXPIRED) 23237c478bd9Sstevel@tonic-gate start_recovery_action(NR_FHEXPIRED, TRUE, VTOMI4(vp), 23247c478bd9Sstevel@tonic-gate vp, NULL); 23257c478bd9Sstevel@tonic-gate } while (ep->error == 0 && ep->stat == NFS4ERR_FHEXPIRED); 23267c478bd9Sstevel@tonic-gate 23277c478bd9Sstevel@tonic-gate crfree(cr); 23287c478bd9Sstevel@tonic-gate } 23297c478bd9Sstevel@tonic-gate 23307c478bd9Sstevel@tonic-gate /* 23317c478bd9Sstevel@tonic-gate * Open files. 23327c478bd9Sstevel@tonic-gate */ 23337c478bd9Sstevel@tonic-gate 23347c478bd9Sstevel@tonic-gate /* 23357c478bd9Sstevel@tonic-gate * Verifies if the nfsstat4 is a valid error for marking this vnode dead. 23367c478bd9Sstevel@tonic-gate * Returns 1 if the error is valid; 0 otherwise. 23377c478bd9Sstevel@tonic-gate */ 23387c478bd9Sstevel@tonic-gate static int 23397c478bd9Sstevel@tonic-gate nfs4_valid_recov_err_for_vp(vnode_t *vp, nfsstat4 stat) 23407c478bd9Sstevel@tonic-gate { 23417c478bd9Sstevel@tonic-gate /* 23427c478bd9Sstevel@tonic-gate * We should not be marking non-regular files as dead, 23437c478bd9Sstevel@tonic-gate * except in very rare cases (eg: BADHANDLE or NFS4ERR_BADNAME). 23447c478bd9Sstevel@tonic-gate */ 23457c478bd9Sstevel@tonic-gate if (vp->v_type != VREG && stat != NFS4ERR_BADHANDLE && 23467c478bd9Sstevel@tonic-gate stat != NFS4ERR_BADNAME) 23477c478bd9Sstevel@tonic-gate return (0); 23487c478bd9Sstevel@tonic-gate 23497c478bd9Sstevel@tonic-gate return (1); 23507c478bd9Sstevel@tonic-gate } 23517c478bd9Sstevel@tonic-gate 23527c478bd9Sstevel@tonic-gate /* 23537c478bd9Sstevel@tonic-gate * Failed attempting to recover a filehandle. If 'stat' is valid for 'vp', 23547c478bd9Sstevel@tonic-gate * then mark the object dead. Since we've had to do a lookup for 23557c478bd9Sstevel@tonic-gate * filehandle recovery, we will mark the object dead if we got NOENT. 23567c478bd9Sstevel@tonic-gate */ 23577c478bd9Sstevel@tonic-gate static void 23587c478bd9Sstevel@tonic-gate nfs4_recov_fh_fail(vnode_t *vp, int error, nfsstat4 stat) 23597c478bd9Sstevel@tonic-gate { 23607c478bd9Sstevel@tonic-gate ASSERT(vp != NULL); 23617c478bd9Sstevel@tonic-gate 23627c478bd9Sstevel@tonic-gate if ((error == 0) && (stat != NFS4ERR_NOENT) && 23637c478bd9Sstevel@tonic-gate (!nfs4_valid_recov_err_for_vp(vp, stat))) 23647c478bd9Sstevel@tonic-gate return; 23657c478bd9Sstevel@tonic-gate 23667c478bd9Sstevel@tonic-gate nfs4_fail_recov(vp, "can't recover filehandle", error, stat); 23677c478bd9Sstevel@tonic-gate } 23687c478bd9Sstevel@tonic-gate 23697c478bd9Sstevel@tonic-gate /* 23707c478bd9Sstevel@tonic-gate * Recovery from a "shouldn't happen" error. In the long term, we'd like 23717c478bd9Sstevel@tonic-gate * to mark only the data structure(s) that provided the bad value as being 23727c478bd9Sstevel@tonic-gate * bad. But for now we'll just mark the entire file. 23737c478bd9Sstevel@tonic-gate */ 23747c478bd9Sstevel@tonic-gate 23757c478bd9Sstevel@tonic-gate static void 23767c478bd9Sstevel@tonic-gate recov_badstate(recov_info_t *recovp, vnode_t *vp, nfsstat4 stat) 23777c478bd9Sstevel@tonic-gate { 23787c478bd9Sstevel@tonic-gate ASSERT(vp != NULL); 23797c478bd9Sstevel@tonic-gate recov_throttle(recovp, vp); 23807c478bd9Sstevel@tonic-gate 23817c478bd9Sstevel@tonic-gate if (!nfs4_valid_recov_err_for_vp(vp, stat)) 23827c478bd9Sstevel@tonic-gate return; 23837c478bd9Sstevel@tonic-gate 23847c478bd9Sstevel@tonic-gate nfs4_fail_recov(vp, "", 0, stat); 23857c478bd9Sstevel@tonic-gate } 23867c478bd9Sstevel@tonic-gate 23877c478bd9Sstevel@tonic-gate /* 23887c478bd9Sstevel@tonic-gate * Free up the information saved for a lost state request. 23897c478bd9Sstevel@tonic-gate */ 23907c478bd9Sstevel@tonic-gate static void 23917c478bd9Sstevel@tonic-gate nfs4_free_lost_rqst(nfs4_lost_rqst_t *lrp, nfs4_server_t *sp) 23927c478bd9Sstevel@tonic-gate { 23937c478bd9Sstevel@tonic-gate component4 *filep; 23947c478bd9Sstevel@tonic-gate nfs4_open_stream_t *osp; 23957c478bd9Sstevel@tonic-gate int have_sync_lock; 23967c478bd9Sstevel@tonic-gate 23977c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_lost_rqst_debug, 23987c478bd9Sstevel@tonic-gate (CE_NOTE, "nfs4_free_lost_rqst:")); 23997c478bd9Sstevel@tonic-gate 24007c478bd9Sstevel@tonic-gate switch (lrp->lr_op) { 24017c478bd9Sstevel@tonic-gate case OP_OPEN: 24027c478bd9Sstevel@tonic-gate filep = &lrp->lr_ofile; 24037c478bd9Sstevel@tonic-gate if (filep->utf8string_val) { 24047c478bd9Sstevel@tonic-gate kmem_free(filep->utf8string_val, filep->utf8string_len); 24057c478bd9Sstevel@tonic-gate filep->utf8string_val = NULL; 24067c478bd9Sstevel@tonic-gate } 24077c478bd9Sstevel@tonic-gate break; 24087c478bd9Sstevel@tonic-gate case OP_DELEGRETURN: 24097c478bd9Sstevel@tonic-gate nfs4delegreturn_cleanup(VTOR4(lrp->lr_vp), sp); 24107c478bd9Sstevel@tonic-gate break; 24117c478bd9Sstevel@tonic-gate case OP_CLOSE: 24127c478bd9Sstevel@tonic-gate osp = lrp->lr_osp; 24137c478bd9Sstevel@tonic-gate ASSERT(osp != NULL); 24147c478bd9Sstevel@tonic-gate mutex_enter(&osp->os_sync_lock); 24157c478bd9Sstevel@tonic-gate have_sync_lock = 1; 24167c478bd9Sstevel@tonic-gate if (osp->os_pending_close) { 24177c478bd9Sstevel@tonic-gate /* clean up the open file state. */ 24187c478bd9Sstevel@tonic-gate osp->os_pending_close = 0; 24197c478bd9Sstevel@tonic-gate nfs4close_notw(lrp->lr_vp, osp, &have_sync_lock); 24207c478bd9Sstevel@tonic-gate } 24217c478bd9Sstevel@tonic-gate if (have_sync_lock) 24227c478bd9Sstevel@tonic-gate mutex_exit(&osp->os_sync_lock); 24237c478bd9Sstevel@tonic-gate break; 24247c478bd9Sstevel@tonic-gate } 24257c478bd9Sstevel@tonic-gate 24267c478bd9Sstevel@tonic-gate lrp->lr_op = 0; 24277c478bd9Sstevel@tonic-gate if (lrp->lr_oop != NULL) { 24287c478bd9Sstevel@tonic-gate open_owner_rele(lrp->lr_oop); 24297c478bd9Sstevel@tonic-gate lrp->lr_oop = NULL; 24307c478bd9Sstevel@tonic-gate } 24317c478bd9Sstevel@tonic-gate if (lrp->lr_osp != NULL) { 24327c478bd9Sstevel@tonic-gate open_stream_rele(lrp->lr_osp, VTOR4(lrp->lr_vp)); 24337c478bd9Sstevel@tonic-gate lrp->lr_osp = NULL; 24347c478bd9Sstevel@tonic-gate } 24357c478bd9Sstevel@tonic-gate if (lrp->lr_lop != NULL) { 24367c478bd9Sstevel@tonic-gate lock_owner_rele(lrp->lr_lop); 24377c478bd9Sstevel@tonic-gate lrp->lr_lop = NULL; 24387c478bd9Sstevel@tonic-gate } 24397c478bd9Sstevel@tonic-gate if (lrp->lr_flk != NULL) { 24407c478bd9Sstevel@tonic-gate kmem_free(lrp->lr_flk, sizeof (flock64_t)); 24417c478bd9Sstevel@tonic-gate lrp->lr_flk = NULL; 24427c478bd9Sstevel@tonic-gate } 24437c478bd9Sstevel@tonic-gate if (lrp->lr_vp != NULL) { 24447c478bd9Sstevel@tonic-gate VN_RELE(lrp->lr_vp); 24457c478bd9Sstevel@tonic-gate lrp->lr_vp = NULL; 24467c478bd9Sstevel@tonic-gate } 24477c478bd9Sstevel@tonic-gate if (lrp->lr_dvp != NULL) { 24487c478bd9Sstevel@tonic-gate VN_RELE(lrp->lr_dvp); 24497c478bd9Sstevel@tonic-gate lrp->lr_dvp = NULL; 24507c478bd9Sstevel@tonic-gate } 24517c478bd9Sstevel@tonic-gate if (lrp->lr_cr != NULL) { 24527c478bd9Sstevel@tonic-gate crfree(lrp->lr_cr); 24537c478bd9Sstevel@tonic-gate lrp->lr_cr = NULL; 24547c478bd9Sstevel@tonic-gate } 24557c478bd9Sstevel@tonic-gate 24567c478bd9Sstevel@tonic-gate kmem_free(lrp, sizeof (nfs4_lost_rqst_t)); 24577c478bd9Sstevel@tonic-gate } 24587c478bd9Sstevel@tonic-gate 24597c478bd9Sstevel@tonic-gate /* 24607c478bd9Sstevel@tonic-gate * Remove any lost state requests and free them. 24617c478bd9Sstevel@tonic-gate */ 24627c478bd9Sstevel@tonic-gate static void 24637c478bd9Sstevel@tonic-gate nfs4_remove_lost_rqsts(mntinfo4_t *mi, nfs4_server_t *sp) 24647c478bd9Sstevel@tonic-gate { 24657c478bd9Sstevel@tonic-gate nfs4_lost_rqst_t *lrp; 24667c478bd9Sstevel@tonic-gate 24677c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 24687c478bd9Sstevel@tonic-gate while ((lrp = list_head(&mi->mi_lost_state)) != NULL) { 24697c478bd9Sstevel@tonic-gate list_remove(&mi->mi_lost_state, lrp); 24707c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 24717c478bd9Sstevel@tonic-gate nfs4_free_lost_rqst(lrp, sp); 24727c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 24737c478bd9Sstevel@tonic-gate } 24747c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 24757c478bd9Sstevel@tonic-gate } 24767c478bd9Sstevel@tonic-gate 24777c478bd9Sstevel@tonic-gate /* 24787c478bd9Sstevel@tonic-gate * Reopen all the files for the given filesystem and reclaim any locks. 24797c478bd9Sstevel@tonic-gate */ 24807c478bd9Sstevel@tonic-gate 24817c478bd9Sstevel@tonic-gate static void 24827c478bd9Sstevel@tonic-gate recov_openfiles(recov_info_t *recovp, nfs4_server_t *sp) 24837c478bd9Sstevel@tonic-gate { 24847c478bd9Sstevel@tonic-gate mntinfo4_t *mi = recovp->rc_mi; 24857c478bd9Sstevel@tonic-gate nfs4_opinst_t *reopenlist = NULL, *rep; 24867c478bd9Sstevel@tonic-gate nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 24877c478bd9Sstevel@tonic-gate open_claim_type4 claim; 24887c478bd9Sstevel@tonic-gate int remap; 24897c478bd9Sstevel@tonic-gate char *fail_msg = "No such file or directory on replica"; 24907c478bd9Sstevel@tonic-gate rnode4_t *rp; 24917c478bd9Sstevel@tonic-gate fattr4_change pre_change; 24927c478bd9Sstevel@tonic-gate 24937c478bd9Sstevel@tonic-gate ASSERT(sp != NULL); 24947c478bd9Sstevel@tonic-gate 24957c478bd9Sstevel@tonic-gate /* 24967c478bd9Sstevel@tonic-gate * This check is to allow a 10ms pause before we reopen files 24977c478bd9Sstevel@tonic-gate * it should allow the server time to have received the CB_NULL 24987c478bd9Sstevel@tonic-gate * reply and update its internal structures such that (if 24997c478bd9Sstevel@tonic-gate * applicable) we are granted a delegation on reopened files. 25007c478bd9Sstevel@tonic-gate */ 25017c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock); 25027c478bd9Sstevel@tonic-gate if ((sp->s_flags & (N4S_CB_PINGED | N4S_CB_WAITER)) == 0) { 25037c478bd9Sstevel@tonic-gate sp->s_flags |= N4S_CB_WAITER; 2504d3d50737SRafael Vanoni (void) cv_reltimedwait(&sp->wait_cb_null, &sp->s_lock, 2505d3d50737SRafael Vanoni drv_usectohz(N4S_CB_PAUSE_TIME), TR_CLOCK_TICK); 25067c478bd9Sstevel@tonic-gate } 25077c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 25087c478bd9Sstevel@tonic-gate 25097c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&sp->s_recovlock, RW_READER, 0); 25107c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_WRITER, 0); 25117c478bd9Sstevel@tonic-gate 25127c478bd9Sstevel@tonic-gate if (NFS4_VOLATILE_FH(mi)) { 25137c478bd9Sstevel@tonic-gate nfs4_remap_root(mi, &e, 0); 25147c478bd9Sstevel@tonic-gate if (nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp)) { 25157c478bd9Sstevel@tonic-gate (void) nfs4_start_recovery(&e, mi, NULL, 25162f172c55SRobert Thurlow NULL, NULL, NULL, OP_LOOKUP, NULL, NULL, NULL); 25177c478bd9Sstevel@tonic-gate } 25187c478bd9Sstevel@tonic-gate } 25197c478bd9Sstevel@tonic-gate 25207c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 25217c478bd9Sstevel@tonic-gate if (recovp->rc_srv_reboot || (mi->mi_recovflags & MI4R_SRV_REBOOT)) 25227c478bd9Sstevel@tonic-gate claim = CLAIM_PREVIOUS; 25237c478bd9Sstevel@tonic-gate else 25247c478bd9Sstevel@tonic-gate claim = CLAIM_NULL; 25257c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 25267c478bd9Sstevel@tonic-gate 25277c478bd9Sstevel@tonic-gate if (e.error == 0 && e.stat == NFS4_OK) { 25287c478bd9Sstevel@tonic-gate /* 25297c478bd9Sstevel@tonic-gate * Get a snapshot of open files in the filesystem. Note 25307c478bd9Sstevel@tonic-gate * that new opens will stall until the server's grace 25317c478bd9Sstevel@tonic-gate * period is done. 25327c478bd9Sstevel@tonic-gate */ 25337c478bd9Sstevel@tonic-gate reopenlist = r4mkopenlist(mi); 25347c478bd9Sstevel@tonic-gate 25357c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 25367c478bd9Sstevel@tonic-gate remap = mi->mi_recovflags & MI4R_REMAP_FILES; 25377c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 25387c478bd9Sstevel@tonic-gate /* 25397c478bd9Sstevel@tonic-gate * Since we are re-establishing state on the 25407c478bd9Sstevel@tonic-gate * server, its ok to blow away the saved lost 25417c478bd9Sstevel@tonic-gate * requests since we don't need to reissue it. 25427c478bd9Sstevel@tonic-gate */ 25437c478bd9Sstevel@tonic-gate nfs4_remove_lost_rqsts(mi, sp); 25447c478bd9Sstevel@tonic-gate 25457c478bd9Sstevel@tonic-gate for (rep = reopenlist; rep; rep = rep->re_next) { 25467c478bd9Sstevel@tonic-gate 25477c478bd9Sstevel@tonic-gate if (remap) { 25487c478bd9Sstevel@tonic-gate nfs4_remap_file(mi, rep->re_vp, 25497c478bd9Sstevel@tonic-gate NFS4_REMAP_CKATTRS, &e); 25507c478bd9Sstevel@tonic-gate } 2551ddbc368aSRick Mesta DTRACE_PROBE2(recov__openfiles, nfs4_error_t, &e, 2552ddbc368aSRick Mesta vnode_t, rep->re_vp); 25537c478bd9Sstevel@tonic-gate if (e.error == ENOENT || e.stat == NFS4ERR_NOENT) { 25547c478bd9Sstevel@tonic-gate /* 25557c478bd9Sstevel@tonic-gate * The current server does not have the file 25567c478bd9Sstevel@tonic-gate * that is to be remapped. This is most 25577c478bd9Sstevel@tonic-gate * likely due to an improperly maintained 25587c478bd9Sstevel@tonic-gate * replica. The files that are missing from 25597c478bd9Sstevel@tonic-gate * the server will be marked dead and logged 25607c478bd9Sstevel@tonic-gate * in order to make sys admins aware of the 25617c478bd9Sstevel@tonic-gate * problem. 25627c478bd9Sstevel@tonic-gate */ 25637c478bd9Sstevel@tonic-gate nfs4_fail_recov(rep->re_vp, 25647c478bd9Sstevel@tonic-gate fail_msg, e.error, e.stat); 25657c478bd9Sstevel@tonic-gate /* 25667c478bd9Sstevel@tonic-gate * We've already handled the error so clear it. 25677c478bd9Sstevel@tonic-gate */ 25687c478bd9Sstevel@tonic-gate nfs4_error_zinit(&e); 25697c478bd9Sstevel@tonic-gate continue; 25707c478bd9Sstevel@tonic-gate } else if (e.error == 0 && e.stat == NFS4_OK) { 25717c478bd9Sstevel@tonic-gate int j; 25727c478bd9Sstevel@tonic-gate 25737c478bd9Sstevel@tonic-gate rp = VTOR4(rep->re_vp); 25747c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 25757c478bd9Sstevel@tonic-gate pre_change = rp->r_change; 25767c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 25777c478bd9Sstevel@tonic-gate 25787c478bd9Sstevel@tonic-gate for (j = 0; j < rep->re_numosp; j++) { 25797c478bd9Sstevel@tonic-gate nfs4_reopen(rep->re_vp, rep->re_osp[j], 25807c478bd9Sstevel@tonic-gate &e, claim, FALSE, TRUE); 25817c478bd9Sstevel@tonic-gate if (e.error != 0 || e.stat != NFS4_OK) 25827c478bd9Sstevel@tonic-gate break; 25837c478bd9Sstevel@tonic-gate } 25847c478bd9Sstevel@tonic-gate if (nfs4_needs_recovery(&e, TRUE, 25857c478bd9Sstevel@tonic-gate mi->mi_vfsp)) { 25867c478bd9Sstevel@tonic-gate (void) nfs4_start_recovery(&e, mi, 25877c478bd9Sstevel@tonic-gate rep->re_vp, NULL, NULL, NULL, 25882f172c55SRobert Thurlow OP_OPEN, NULL, NULL, NULL); 25897c478bd9Sstevel@tonic-gate break; 25907c478bd9Sstevel@tonic-gate } 25917c478bd9Sstevel@tonic-gate } 25927c478bd9Sstevel@tonic-gate #ifdef DEBUG 25937c478bd9Sstevel@tonic-gate if (nfs4_recovdelay > 0) 25947c478bd9Sstevel@tonic-gate delay(MSEC_TO_TICK(nfs4_recovdelay * 1000)); 25957c478bd9Sstevel@tonic-gate #endif 2596ddbc368aSRick Mesta if (e.error == 0 && e.stat == NFS4_OK) { 25977c478bd9Sstevel@tonic-gate relock_file(rep->re_vp, mi, &e, pre_change); 25987c478bd9Sstevel@tonic-gate 25997c478bd9Sstevel@tonic-gate if (nfs4_needs_recovery(&e, TRUE, mi->mi_vfsp)) 26007c478bd9Sstevel@tonic-gate (void) nfs4_start_recovery(&e, mi, 2601ddbc368aSRick Mesta rep->re_vp, NULL, NULL, NULL, 2602ddbc368aSRick Mesta OP_LOCK, NULL, NULL, NULL); 2603ddbc368aSRick Mesta } 2604ddbc368aSRick Mesta 26057c478bd9Sstevel@tonic-gate if (e.error != 0 || e.stat != NFS4_OK) 26067c478bd9Sstevel@tonic-gate break; 26077c478bd9Sstevel@tonic-gate } 26087c478bd9Sstevel@tonic-gate 26097c478bd9Sstevel@tonic-gate /* 26107c478bd9Sstevel@tonic-gate * Check to see if we need to remap files passed in 26117c478bd9Sstevel@tonic-gate * via the recovery arguments; this will have been 26127c478bd9Sstevel@tonic-gate * done for open files. A failure here is not fatal. 26137c478bd9Sstevel@tonic-gate */ 26147c478bd9Sstevel@tonic-gate if (remap) { 26157c478bd9Sstevel@tonic-gate nfs4_error_t ignore; 26167c478bd9Sstevel@tonic-gate nfs4_check_remap(mi, recovp->rc_vp1, NFS4_REMAP_CKATTRS, 26177c478bd9Sstevel@tonic-gate &ignore); 26187c478bd9Sstevel@tonic-gate nfs4_check_remap(mi, recovp->rc_vp2, NFS4_REMAP_CKATTRS, 26197c478bd9Sstevel@tonic-gate &ignore); 26207c478bd9Sstevel@tonic-gate } 26217c478bd9Sstevel@tonic-gate } 26227c478bd9Sstevel@tonic-gate 26237c478bd9Sstevel@tonic-gate if (e.error == 0 && e.stat == NFS4_OK) { 26247c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 26257c478bd9Sstevel@tonic-gate mi->mi_recovflags &= ~(MI4R_REOPEN_FILES | MI4R_REMAP_FILES); 26267c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 26277c478bd9Sstevel@tonic-gate } 26287c478bd9Sstevel@tonic-gate 26297c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_recovlock); 26307c478bd9Sstevel@tonic-gate nfs_rw_exit(&sp->s_recovlock); 26317c478bd9Sstevel@tonic-gate 26327c478bd9Sstevel@tonic-gate if (reopenlist != NULL) 26337c478bd9Sstevel@tonic-gate r4releopenlist(reopenlist); 26347c478bd9Sstevel@tonic-gate } 26357c478bd9Sstevel@tonic-gate 26367c478bd9Sstevel@tonic-gate /* 26377c478bd9Sstevel@tonic-gate * Resend the queued state recovery requests in "rqsts". 26387c478bd9Sstevel@tonic-gate */ 26397c478bd9Sstevel@tonic-gate 26407c478bd9Sstevel@tonic-gate static void 26417c478bd9Sstevel@tonic-gate nfs4_resend_lost_rqsts(recov_info_t *recovp, nfs4_server_t *sp) 26427c478bd9Sstevel@tonic-gate { 26437c478bd9Sstevel@tonic-gate nfs4_lost_rqst_t *lrp, *tlrp; 26447c478bd9Sstevel@tonic-gate mntinfo4_t *mi = recovp->rc_mi; 2645ba8fdb6fSek110237 nfs4_error_t n4e; 26467c478bd9Sstevel@tonic-gate #ifdef NOTYET 26477c478bd9Sstevel@tonic-gate uint32_t deny_bits = 0; 26487c478bd9Sstevel@tonic-gate #endif 26497c478bd9Sstevel@tonic-gate 26507c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, "nfs4_resend_lost_rqsts")); 26517c478bd9Sstevel@tonic-gate 26527c478bd9Sstevel@tonic-gate ASSERT(mi != NULL); 26537c478bd9Sstevel@tonic-gate ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 26547c478bd9Sstevel@tonic-gate 26557c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 26567c478bd9Sstevel@tonic-gate lrp = list_head(&mi->mi_lost_state); 26577c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 26587c478bd9Sstevel@tonic-gate while (lrp != NULL) { 2659ba8fdb6fSek110237 nfs4_error_zinit(&n4e); 2660ba8fdb6fSek110237 resend_one_op(lrp, &n4e, mi, sp); 26617c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 26627c478bd9Sstevel@tonic-gate "nfs4_resend_lost_rqsts: resend request: for vp %p got " 2663ba8fdb6fSek110237 "error %d stat %d", (void *)lrp->lr_vp, n4e.error, 2664ba8fdb6fSek110237 n4e.stat)); 26657c478bd9Sstevel@tonic-gate 26667c478bd9Sstevel@tonic-gate /* 26677c478bd9Sstevel@tonic-gate * If we get a recovery error that we can actually 26687c478bd9Sstevel@tonic-gate * recover from (such as ETIMEDOUT, FHEXPIRED), we 26697c478bd9Sstevel@tonic-gate * return and let the recovery thread redrive the call. 26707c478bd9Sstevel@tonic-gate * Don't requeue unless the zone is still healthy. 26717c478bd9Sstevel@tonic-gate */ 26727c478bd9Sstevel@tonic-gate if (zone_status_get(curproc->p_zone) < ZONE_IS_SHUTTING_DOWN && 2673ba8fdb6fSek110237 nfs4_needs_recovery(&n4e, TRUE, mi->mi_vfsp) && 2674ba8fdb6fSek110237 (nfs4_try_failover(&n4e) || 2675ba8fdb6fSek110237 NFS4_FRC_UNMT_ERR(n4e.error, mi->mi_vfsp) || 2676ba8fdb6fSek110237 (n4e.error == 0 && n4e.stat != NFS4ERR_BADHANDLE && 2677ba8fdb6fSek110237 !nfs4_recov_marks_dead(n4e.stat)))) { 26787c478bd9Sstevel@tonic-gate /* 26797c478bd9Sstevel@tonic-gate * For these three errors, we want to delay a bit 26807c478bd9Sstevel@tonic-gate * instead of pounding the server into submission. 26817c478bd9Sstevel@tonic-gate * We have to do this manually; the normal 26827c478bd9Sstevel@tonic-gate * processing for these errors only works for 26837c478bd9Sstevel@tonic-gate * non-recovery requests. 26847c478bd9Sstevel@tonic-gate */ 2685ba8fdb6fSek110237 if ((n4e.error == 0 && n4e.stat == NFS4ERR_DELAY) || 2686ba8fdb6fSek110237 (n4e.error == 0 && n4e.stat == NFS4ERR_GRACE) || 2687ba8fdb6fSek110237 (n4e.error == 0 && n4e.stat == NFS4ERR_RESOURCE) || 2688ba8fdb6fSek110237 NFS4_FRC_UNMT_ERR(n4e.error, mi->mi_vfsp)) { 26897c478bd9Sstevel@tonic-gate delay(SEC_TO_TICK(nfs4err_delay_time)); 26907c478bd9Sstevel@tonic-gate } else { 2691ba8fdb6fSek110237 (void) nfs4_start_recovery(&n4e, 26927c478bd9Sstevel@tonic-gate mi, lrp->lr_dvp, lrp->lr_vp, NULL, NULL, 26932f172c55SRobert Thurlow lrp->lr_op, NULL, NULL, NULL); 26947c478bd9Sstevel@tonic-gate } 26957c478bd9Sstevel@tonic-gate return; 26967c478bd9Sstevel@tonic-gate } 26977c478bd9Sstevel@tonic-gate 26987c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 26997c478bd9Sstevel@tonic-gate list_remove(&mi->mi_lost_state, lrp); 27007c478bd9Sstevel@tonic-gate tlrp = lrp; 27017c478bd9Sstevel@tonic-gate lrp = list_head(&mi->mi_lost_state); 27027c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 27037c478bd9Sstevel@tonic-gate nfs4_free_lost_rqst(tlrp, sp); 27047c478bd9Sstevel@tonic-gate } 27057c478bd9Sstevel@tonic-gate } 27067c478bd9Sstevel@tonic-gate 27077c478bd9Sstevel@tonic-gate /* 27087c478bd9Sstevel@tonic-gate * Resend the given op, and issue any necessary undo call. 27097c478bd9Sstevel@tonic-gate * errors are returned via the nfs4_error_t parameter. 27107c478bd9Sstevel@tonic-gate */ 27117c478bd9Sstevel@tonic-gate 27127c478bd9Sstevel@tonic-gate static void 27137c478bd9Sstevel@tonic-gate resend_one_op(nfs4_lost_rqst_t *lrp, nfs4_error_t *ep, 27147c478bd9Sstevel@tonic-gate mntinfo4_t *mi, nfs4_server_t *sp) 27157c478bd9Sstevel@tonic-gate { 27167c478bd9Sstevel@tonic-gate vnode_t *vp; 27177c478bd9Sstevel@tonic-gate nfs4_open_stream_t *osp; 27187c478bd9Sstevel@tonic-gate cred_t *cr; 27197c478bd9Sstevel@tonic-gate uint32_t acc_bits; 27207c478bd9Sstevel@tonic-gate 27217c478bd9Sstevel@tonic-gate vp = lrp->lr_vp; 27227c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, "resend_one_op: " 27237c478bd9Sstevel@tonic-gate "have a lost open/close request for vp %p", (void *)vp)); 27247c478bd9Sstevel@tonic-gate 27257c478bd9Sstevel@tonic-gate switch (lrp->lr_op) { 27267c478bd9Sstevel@tonic-gate case OP_OPEN: 27277c478bd9Sstevel@tonic-gate nfs4_resend_open_otw(&vp, lrp, ep); 27287c478bd9Sstevel@tonic-gate break; 27297c478bd9Sstevel@tonic-gate case OP_OPEN_DOWNGRADE: 27307c478bd9Sstevel@tonic-gate ASSERT(lrp->lr_oop != NULL); 27317c478bd9Sstevel@tonic-gate ep->error = nfs4_start_open_seqid_sync(lrp->lr_oop, mi); 27327c478bd9Sstevel@tonic-gate ASSERT(!ep->error); /* recov thread always succeeds */ 27337c478bd9Sstevel@tonic-gate ASSERT(lrp->lr_osp != NULL); 27347c478bd9Sstevel@tonic-gate mutex_enter(&lrp->lr_osp->os_sync_lock); 27357c478bd9Sstevel@tonic-gate nfs4_open_downgrade(lrp->lr_dg_acc, lrp->lr_dg_deny, 27367c478bd9Sstevel@tonic-gate lrp->lr_oop, lrp->lr_osp, vp, lrp->lr_cr, lrp, 27377c478bd9Sstevel@tonic-gate ep, NULL, NULL); 27387c478bd9Sstevel@tonic-gate mutex_exit(&lrp->lr_osp->os_sync_lock); 27397c478bd9Sstevel@tonic-gate nfs4_end_open_seqid_sync(lrp->lr_oop); 27407c478bd9Sstevel@tonic-gate break; 27417c478bd9Sstevel@tonic-gate case OP_CLOSE: 27427c478bd9Sstevel@tonic-gate osp = lrp->lr_osp; 27437c478bd9Sstevel@tonic-gate cr = lrp->lr_cr; 27447c478bd9Sstevel@tonic-gate acc_bits = 0; 27457c478bd9Sstevel@tonic-gate mutex_enter(&osp->os_sync_lock); 27467c478bd9Sstevel@tonic-gate if (osp->os_share_acc_read) 27477c478bd9Sstevel@tonic-gate acc_bits |= OPEN4_SHARE_ACCESS_READ; 27487c478bd9Sstevel@tonic-gate if (osp->os_share_acc_write) 27497c478bd9Sstevel@tonic-gate acc_bits |= OPEN4_SHARE_ACCESS_WRITE; 27507c478bd9Sstevel@tonic-gate mutex_exit(&osp->os_sync_lock); 27517c478bd9Sstevel@tonic-gate nfs4close_one(vp, osp, cr, acc_bits, lrp, ep, 27527c478bd9Sstevel@tonic-gate CLOSE_RESEND, 0, 0, 0); 27537c478bd9Sstevel@tonic-gate break; 27547c478bd9Sstevel@tonic-gate case OP_LOCK: 27557c478bd9Sstevel@tonic-gate case OP_LOCKU: 27567c478bd9Sstevel@tonic-gate resend_lock(lrp, ep); 27577c478bd9Sstevel@tonic-gate goto done; 27587c478bd9Sstevel@tonic-gate case OP_DELEGRETURN: 27597c478bd9Sstevel@tonic-gate nfs4_resend_delegreturn(lrp, ep, sp); 27607c478bd9Sstevel@tonic-gate goto done; 27617c478bd9Sstevel@tonic-gate default: 27627c478bd9Sstevel@tonic-gate #ifdef DEBUG 27637c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "resend_one_op: unexpected op: %d", 27647c478bd9Sstevel@tonic-gate lrp->lr_op); 27657c478bd9Sstevel@tonic-gate #endif 27667c478bd9Sstevel@tonic-gate nfs4_queue_event(RE_LOST_STATE_BAD_OP, mi, NULL, 27677c478bd9Sstevel@tonic-gate lrp->lr_op, lrp->lr_vp, lrp->lr_dvp, NFS4_OK, NULL, 0, 27687c478bd9Sstevel@tonic-gate TAG_NONE, TAG_NONE, 0, 0); 27697c478bd9Sstevel@tonic-gate nfs4_error_init(ep, EINVAL); 27707c478bd9Sstevel@tonic-gate return; 27717c478bd9Sstevel@tonic-gate } 27727c478bd9Sstevel@tonic-gate 27737c478bd9Sstevel@tonic-gate /* 27747c478bd9Sstevel@tonic-gate * No need to retry nor send an "undo" CLOSE in the 27757c478bd9Sstevel@tonic-gate * event the server rebooted. 27767c478bd9Sstevel@tonic-gate */ 27777c478bd9Sstevel@tonic-gate if (ep->error == 0 && (ep->stat == NFS4ERR_STALE_CLIENTID || 27787c478bd9Sstevel@tonic-gate ep->stat == NFS4ERR_STALE_STATEID || ep->stat == NFS4ERR_EXPIRED)) 27797c478bd9Sstevel@tonic-gate goto done; 27807c478bd9Sstevel@tonic-gate 27817c478bd9Sstevel@tonic-gate /* 27827c478bd9Sstevel@tonic-gate * If we resent a CLOSE or OPEN_DOWNGRADE, there's nothing 27837c478bd9Sstevel@tonic-gate * to undo. Undoing locking operations was handled by 27847c478bd9Sstevel@tonic-gate * resend_lock(). 27857c478bd9Sstevel@tonic-gate */ 27867c478bd9Sstevel@tonic-gate if (lrp->lr_op == OP_OPEN_DOWNGRADE || lrp->lr_op == OP_CLOSE) 27877c478bd9Sstevel@tonic-gate goto done; 27887c478bd9Sstevel@tonic-gate 27897c478bd9Sstevel@tonic-gate /* 27907c478bd9Sstevel@tonic-gate * If we get any other error for OPEN, then don't attempt 27917c478bd9Sstevel@tonic-gate * to undo the resend of the open (since it was never 27927c478bd9Sstevel@tonic-gate * successful!). 27937c478bd9Sstevel@tonic-gate */ 27947c478bd9Sstevel@tonic-gate ASSERT(lrp->lr_op == OP_OPEN); 27957c478bd9Sstevel@tonic-gate if (ep->error || ep->stat != NFS4_OK) 27967c478bd9Sstevel@tonic-gate goto done; 27977c478bd9Sstevel@tonic-gate 27987c478bd9Sstevel@tonic-gate /* 27997c478bd9Sstevel@tonic-gate * Now let's undo our OPEN. 28007c478bd9Sstevel@tonic-gate */ 28017c478bd9Sstevel@tonic-gate nfs4_error_zinit(ep); 28027c478bd9Sstevel@tonic-gate close_after_open_resend(vp, lrp->lr_cr, lrp->lr_oacc, ep); 28037c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, "resend_one_op: " 28047c478bd9Sstevel@tonic-gate "nfs4close_one: for vp %p got error %d stat %d", 28057c478bd9Sstevel@tonic-gate (void *)vp, ep->error, ep->stat)); 28067c478bd9Sstevel@tonic-gate 28077c478bd9Sstevel@tonic-gate done: 28087c478bd9Sstevel@tonic-gate if (vp != lrp->lr_vp) 28097c478bd9Sstevel@tonic-gate VN_RELE(vp); 28107c478bd9Sstevel@tonic-gate } 28117c478bd9Sstevel@tonic-gate 28127c478bd9Sstevel@tonic-gate /* 28137c478bd9Sstevel@tonic-gate * Close a file that was opened via a resent OPEN. 28147c478bd9Sstevel@tonic-gate * Most errors are passed back to the caller (via the return value and 28157c478bd9Sstevel@tonic-gate * *statp), except for FHEXPIRED, which is retried. 28167c478bd9Sstevel@tonic-gate * 28177c478bd9Sstevel@tonic-gate * It might be conceptually cleaner to push the CLOSE request onto the 28187c478bd9Sstevel@tonic-gate * front of the resend queue, rather than sending it here. That would 28197c478bd9Sstevel@tonic-gate * match the way we undo lost lock requests. On the other 28207c478bd9Sstevel@tonic-gate * hand, we've already got something that works, and there's no reason to 28217c478bd9Sstevel@tonic-gate * change it at this time. 28227c478bd9Sstevel@tonic-gate */ 28237c478bd9Sstevel@tonic-gate 28247c478bd9Sstevel@tonic-gate static void 28257c478bd9Sstevel@tonic-gate close_after_open_resend(vnode_t *vp, cred_t *cr, uint32_t acc_bits, 28267c478bd9Sstevel@tonic-gate nfs4_error_t *ep) 28277c478bd9Sstevel@tonic-gate { 28287c478bd9Sstevel@tonic-gate 28297c478bd9Sstevel@tonic-gate for (;;) { 28307c478bd9Sstevel@tonic-gate nfs4close_one(vp, NULL, cr, acc_bits, NULL, ep, 28317c478bd9Sstevel@tonic-gate CLOSE_AFTER_RESEND, 0, 0, 0); 28327c478bd9Sstevel@tonic-gate if (ep->error == 0 && ep->stat == NFS4_OK) 28337c478bd9Sstevel@tonic-gate break; /* success; done */ 28347c478bd9Sstevel@tonic-gate if (ep->error != 0 || ep->stat != NFS4ERR_FHEXPIRED) 28357c478bd9Sstevel@tonic-gate break; 28367c478bd9Sstevel@tonic-gate /* else retry FHEXPIRED */ 28377c478bd9Sstevel@tonic-gate } 28387c478bd9Sstevel@tonic-gate 28397c478bd9Sstevel@tonic-gate } 28407c478bd9Sstevel@tonic-gate 28417c478bd9Sstevel@tonic-gate /* 28427c478bd9Sstevel@tonic-gate * Resend the given lost lock request. Return an errno value. If zero, 28437c478bd9Sstevel@tonic-gate * *statp is set to the NFS status code for the call. 28447c478bd9Sstevel@tonic-gate * 28457c478bd9Sstevel@tonic-gate * Issue a SIGLOST and mark the rnode dead if we get a non-recovery error or 28467c478bd9Sstevel@tonic-gate * a recovery error that we don't actually recover from yet (eg: BAD_SEQID). 28477c478bd9Sstevel@tonic-gate * Let the recovery thread redrive the call if we get a recovery error that 28487c478bd9Sstevel@tonic-gate * we can actually recover from. 28497c478bd9Sstevel@tonic-gate */ 28507c478bd9Sstevel@tonic-gate static void 28517c478bd9Sstevel@tonic-gate resend_lock(nfs4_lost_rqst_t *lrp, nfs4_error_t *ep) 28527c478bd9Sstevel@tonic-gate { 28537c478bd9Sstevel@tonic-gate bool_t send_siglost = FALSE; 28547c478bd9Sstevel@tonic-gate vnode_t *vp = lrp->lr_vp; 28557c478bd9Sstevel@tonic-gate 28567c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, "resend_lock:")); 28577c478bd9Sstevel@tonic-gate ASSERT(lrp->lr_ctype == NFS4_LCK_CTYPE_REINSTATE || 28587c478bd9Sstevel@tonic-gate lrp->lr_ctype == NFS4_LCK_CTYPE_RESEND); 28597c478bd9Sstevel@tonic-gate 2860*81d2f16dSMarcel Telka nfs4frlock(lrp->lr_ctype, vp, F_SETLK, lrp->lr_flk, lrp->lr_cr, ep, 2861*81d2f16dSMarcel Telka lrp, NULL); 28627c478bd9Sstevel@tonic-gate 28637c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, "resend_lock: " 28647c478bd9Sstevel@tonic-gate "nfs4frlock for vp %p returned error %d, stat %d", 28657c478bd9Sstevel@tonic-gate (void *)vp, ep->error, ep->stat)); 28667c478bd9Sstevel@tonic-gate 28677c478bd9Sstevel@tonic-gate if (ep->error == 0 && ep->stat == 0) 28687c478bd9Sstevel@tonic-gate goto done; 28697c478bd9Sstevel@tonic-gate if (ep->error == 0 && ep->stat == NFS4ERR_DENIED && 28707c478bd9Sstevel@tonic-gate lrp->lr_ctype == NFS4_LCK_CTYPE_RESEND) 28717c478bd9Sstevel@tonic-gate goto done; 28727c478bd9Sstevel@tonic-gate 28737c478bd9Sstevel@tonic-gate /* 28747c478bd9Sstevel@tonic-gate * If we failed with a non-recovery error, send SIGLOST and 28757c478bd9Sstevel@tonic-gate * mark the file dead. 28767c478bd9Sstevel@tonic-gate */ 28777c478bd9Sstevel@tonic-gate if (!nfs4_needs_recovery(ep, TRUE, vp->v_vfsp)) 28787c478bd9Sstevel@tonic-gate send_siglost = TRUE; 28797c478bd9Sstevel@tonic-gate else { 28807c478bd9Sstevel@tonic-gate /* 28817c478bd9Sstevel@tonic-gate * Done with recovering LOST LOCK in the event the 28827c478bd9Sstevel@tonic-gate * server rebooted or we've lost the lease. 28837c478bd9Sstevel@tonic-gate */ 28847c478bd9Sstevel@tonic-gate if (ep->error == 0 && (ep->stat == NFS4ERR_STALE_CLIENTID || 28857c478bd9Sstevel@tonic-gate ep->stat == NFS4ERR_STALE_STATEID || 28867c478bd9Sstevel@tonic-gate ep->stat == NFS4ERR_EXPIRED)) { 28877c478bd9Sstevel@tonic-gate goto done; 28887c478bd9Sstevel@tonic-gate } 28897c478bd9Sstevel@tonic-gate 28907c478bd9Sstevel@tonic-gate /* 28917c478bd9Sstevel@tonic-gate * BAD_STATEID on an unlock indicates that the server has 28927c478bd9Sstevel@tonic-gate * forgotten about the lock anyway, so act like the call 28937c478bd9Sstevel@tonic-gate * was successful. 28947c478bd9Sstevel@tonic-gate */ 28957c478bd9Sstevel@tonic-gate if (ep->error == 0 && ep->stat == NFS4ERR_BAD_STATEID && 28967c478bd9Sstevel@tonic-gate lrp->lr_op == OP_LOCKU) 28977c478bd9Sstevel@tonic-gate goto done; 28987c478bd9Sstevel@tonic-gate 28997c478bd9Sstevel@tonic-gate /* 29007c478bd9Sstevel@tonic-gate * If we got a recovery error that we don't actually 29017c478bd9Sstevel@tonic-gate * recover from, send SIGLOST. If the filesystem was 29027c478bd9Sstevel@tonic-gate * forcibly unmounted, we skip the SIGLOST because (a) it's 29037c478bd9Sstevel@tonic-gate * unnecessary noise, and (b) there could be a new process 29047c478bd9Sstevel@tonic-gate * with the same pid as the one that had generated the lost 29057c478bd9Sstevel@tonic-gate * state request. 29067c478bd9Sstevel@tonic-gate */ 29077c478bd9Sstevel@tonic-gate if (ep->error == 0 && (ep->stat == NFS4ERR_BADHANDLE || 29087c478bd9Sstevel@tonic-gate nfs4_recov_marks_dead(ep->stat))) { 29097c478bd9Sstevel@tonic-gate if (!(vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) 29107c478bd9Sstevel@tonic-gate send_siglost = TRUE; 29117c478bd9Sstevel@tonic-gate goto done; 29127c478bd9Sstevel@tonic-gate } 29137c478bd9Sstevel@tonic-gate 29147c478bd9Sstevel@tonic-gate /* 29157c478bd9Sstevel@tonic-gate * If the filesystem was forcibly unmounted, we 29167c478bd9Sstevel@tonic-gate * still need to synchronize with the server and 29177c478bd9Sstevel@tonic-gate * release state. Try again later. 29187c478bd9Sstevel@tonic-gate */ 29197c478bd9Sstevel@tonic-gate if (NFS4_FRC_UNMT_ERR(ep->error, vp->v_vfsp)) 29207c478bd9Sstevel@tonic-gate goto done; 29217c478bd9Sstevel@tonic-gate 29227c478bd9Sstevel@tonic-gate /* 29237c478bd9Sstevel@tonic-gate * If we get a recovery error that we can actually 29247c478bd9Sstevel@tonic-gate * recover from (such as ETIMEDOUT, FHEXPIRED), 29257c478bd9Sstevel@tonic-gate * return and let the recovery thread redrive the call. 29267c478bd9Sstevel@tonic-gate * 29277c478bd9Sstevel@tonic-gate * For the three errors below, we want to delay a bit 29287c478bd9Sstevel@tonic-gate * instead of pounding the server into submission. 29297c478bd9Sstevel@tonic-gate */ 29307c478bd9Sstevel@tonic-gate if ((ep->error == 0 && ep->stat == NFS4ERR_DELAY) || 29317c478bd9Sstevel@tonic-gate (ep->error == 0 && ep->stat == NFS4ERR_GRACE) || 29327c478bd9Sstevel@tonic-gate (ep->error == 0 && ep->stat == NFS4ERR_RESOURCE)) 29337c478bd9Sstevel@tonic-gate delay(SEC_TO_TICK(recov_err_delay)); 29347c478bd9Sstevel@tonic-gate goto done; 29357c478bd9Sstevel@tonic-gate } 29367c478bd9Sstevel@tonic-gate 29377c478bd9Sstevel@tonic-gate done: 29387c478bd9Sstevel@tonic-gate if (send_siglost) { 29397c478bd9Sstevel@tonic-gate cred_t *sv_cred; 29407c478bd9Sstevel@tonic-gate 29417c478bd9Sstevel@tonic-gate /* 29427c478bd9Sstevel@tonic-gate * Must be root or the actual thread being issued the 29437c478bd9Sstevel@tonic-gate * SIGLOST for this to work, so just become root. 29447c478bd9Sstevel@tonic-gate */ 29457c478bd9Sstevel@tonic-gate sv_cred = curthread->t_cred; 29467c478bd9Sstevel@tonic-gate curthread->t_cred = kcred; 29477c478bd9Sstevel@tonic-gate nfs4_send_siglost(lrp->lr_flk->l_pid, VTOMI4(vp), vp, FALSE, 29487c478bd9Sstevel@tonic-gate ep->error, ep->stat); 29497c478bd9Sstevel@tonic-gate curthread->t_cred = sv_cred; 29507c478bd9Sstevel@tonic-gate 29517c478bd9Sstevel@tonic-gate /* 29527c478bd9Sstevel@tonic-gate * Flush any additional reinstantiation requests for 29537c478bd9Sstevel@tonic-gate * this operation. Sending multiple SIGLOSTs to the user 29547c478bd9Sstevel@tonic-gate * process is unlikely to help and may cause trouble. 29557c478bd9Sstevel@tonic-gate */ 29567c478bd9Sstevel@tonic-gate if (lrp->lr_ctype == NFS4_LCK_CTYPE_REINSTATE) 29577c478bd9Sstevel@tonic-gate flush_reinstate(lrp); 29587c478bd9Sstevel@tonic-gate } 29597c478bd9Sstevel@tonic-gate } 29607c478bd9Sstevel@tonic-gate 29617c478bd9Sstevel@tonic-gate /* 29627c478bd9Sstevel@tonic-gate * Remove any lock reinstantiation requests that correspond to the given 29637c478bd9Sstevel@tonic-gate * lost request. We only remove items that follow lrp in the queue, 29647c478bd9Sstevel@tonic-gate * assuming that lrp will be removed by the generic lost state code. 29657c478bd9Sstevel@tonic-gate */ 29667c478bd9Sstevel@tonic-gate 29677c478bd9Sstevel@tonic-gate static void 29687c478bd9Sstevel@tonic-gate flush_reinstate(nfs4_lost_rqst_t *lrp) 29697c478bd9Sstevel@tonic-gate { 29707c478bd9Sstevel@tonic-gate vnode_t *vp; 29717c478bd9Sstevel@tonic-gate pid_t pid; 29727c478bd9Sstevel@tonic-gate mntinfo4_t *mi; 29737c478bd9Sstevel@tonic-gate nfs4_lost_rqst_t *nlrp; 29747c478bd9Sstevel@tonic-gate 29757c478bd9Sstevel@tonic-gate vp = lrp->lr_vp; 29767c478bd9Sstevel@tonic-gate mi = VTOMI4(vp); 29777c478bd9Sstevel@tonic-gate pid = lrp->lr_flk->l_pid; 29787c478bd9Sstevel@tonic-gate 29797c478bd9Sstevel@tonic-gate /* 29807c478bd9Sstevel@tonic-gate * If there are any more reinstantation requests to get rid of, 29817c478bd9Sstevel@tonic-gate * they should all be clustered at the front of the lost state 29827c478bd9Sstevel@tonic-gate * queue. 29837c478bd9Sstevel@tonic-gate */ 29847c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 29857c478bd9Sstevel@tonic-gate for (lrp = list_next(&mi->mi_lost_state, lrp); lrp != NULL; 29867c478bd9Sstevel@tonic-gate lrp = nlrp) { 29877c478bd9Sstevel@tonic-gate nlrp = list_next(&mi->mi_lost_state, lrp); 29887c478bd9Sstevel@tonic-gate if (lrp->lr_op != OP_LOCK && lrp->lr_op != OP_LOCKU) 29897c478bd9Sstevel@tonic-gate break; 29907c478bd9Sstevel@tonic-gate if (lrp->lr_ctype != NFS4_LCK_CTYPE_REINSTATE) 29917c478bd9Sstevel@tonic-gate break; 29927c478bd9Sstevel@tonic-gate ASSERT(lrp->lr_vp == vp); 29937c478bd9Sstevel@tonic-gate ASSERT(lrp->lr_flk->l_pid == pid); 29947c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 29957c478bd9Sstevel@tonic-gate "remove reinstantiation %p", (void *)lrp)); 29967c478bd9Sstevel@tonic-gate list_remove(&mi->mi_lost_state, lrp); 29977c478bd9Sstevel@tonic-gate nfs4_free_lost_rqst(lrp, NULL); 29987c478bd9Sstevel@tonic-gate } 29997c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 30007c478bd9Sstevel@tonic-gate } 30017c478bd9Sstevel@tonic-gate 30027c478bd9Sstevel@tonic-gate /* 30037c478bd9Sstevel@tonic-gate * End of state-specific recovery routines. 30047c478bd9Sstevel@tonic-gate */ 30057c478bd9Sstevel@tonic-gate 30067c478bd9Sstevel@tonic-gate /* 30077c478bd9Sstevel@tonic-gate * Allocate a lost request struct, initialize it from lost_rqstp (including 30087c478bd9Sstevel@tonic-gate * bumping the reference counts for the referenced vnode, etc.), and hang 30097c478bd9Sstevel@tonic-gate * it off of recovp. 30107c478bd9Sstevel@tonic-gate */ 30117c478bd9Sstevel@tonic-gate 30127c478bd9Sstevel@tonic-gate static void 30137c478bd9Sstevel@tonic-gate nfs4_save_lost_rqst(nfs4_lost_rqst_t *lost_rqstp, recov_info_t *recovp, 30147c478bd9Sstevel@tonic-gate nfs4_recov_t *action, mntinfo4_t *mi) 30157c478bd9Sstevel@tonic-gate { 30167c478bd9Sstevel@tonic-gate nfs4_lost_rqst_t *destp; 30177c478bd9Sstevel@tonic-gate 30187c478bd9Sstevel@tonic-gate ASSERT(recovp->rc_lost_rqst == NULL); 30197c478bd9Sstevel@tonic-gate 30207c478bd9Sstevel@tonic-gate destp = kmem_alloc(sizeof (nfs4_lost_rqst_t), KM_SLEEP); 30217c478bd9Sstevel@tonic-gate recovp->rc_lost_rqst = destp; 30227c478bd9Sstevel@tonic-gate 30237c478bd9Sstevel@tonic-gate if (lost_rqstp->lr_op == OP_LOCK || 30247c478bd9Sstevel@tonic-gate lost_rqstp->lr_op == OP_LOCKU) { 30257c478bd9Sstevel@tonic-gate ASSERT(lost_rqstp->lr_lop); 30267c478bd9Sstevel@tonic-gate *action = NR_LOST_LOCK; 30277c478bd9Sstevel@tonic-gate destp->lr_ctype = lost_rqstp->lr_ctype; 30287c478bd9Sstevel@tonic-gate destp->lr_locktype = lost_rqstp->lr_locktype; 30297c478bd9Sstevel@tonic-gate } else if (lost_rqstp->lr_op == OP_OPEN) { 30307c478bd9Sstevel@tonic-gate component4 *srcfp, *destfp; 30317c478bd9Sstevel@tonic-gate 30327c478bd9Sstevel@tonic-gate destp->lr_oacc = lost_rqstp->lr_oacc; 30337c478bd9Sstevel@tonic-gate destp->lr_odeny = lost_rqstp->lr_odeny; 30347c478bd9Sstevel@tonic-gate destp->lr_oclaim = lost_rqstp->lr_oclaim; 30357c478bd9Sstevel@tonic-gate if (lost_rqstp->lr_oclaim == CLAIM_DELEGATE_CUR) 30367c478bd9Sstevel@tonic-gate destp->lr_ostateid = lost_rqstp->lr_ostateid; 30377c478bd9Sstevel@tonic-gate 30387c478bd9Sstevel@tonic-gate srcfp = &lost_rqstp->lr_ofile; 30397c478bd9Sstevel@tonic-gate destfp = &destp->lr_ofile; 30407c478bd9Sstevel@tonic-gate /* 30417c478bd9Sstevel@tonic-gate * Consume caller's utf8string 30427c478bd9Sstevel@tonic-gate */ 30437c478bd9Sstevel@tonic-gate destfp->utf8string_len = srcfp->utf8string_len; 30447c478bd9Sstevel@tonic-gate destfp->utf8string_val = srcfp->utf8string_val; 30457c478bd9Sstevel@tonic-gate srcfp->utf8string_len = 0; 30467c478bd9Sstevel@tonic-gate srcfp->utf8string_val = NULL; /* make sure not reused */ 30477c478bd9Sstevel@tonic-gate 30487c478bd9Sstevel@tonic-gate *action = NR_LOST_STATE_RQST; 30497c478bd9Sstevel@tonic-gate } else if (lost_rqstp->lr_op == OP_OPEN_DOWNGRADE) { 30507c478bd9Sstevel@tonic-gate destp->lr_dg_acc = lost_rqstp->lr_dg_acc; 30517c478bd9Sstevel@tonic-gate destp->lr_dg_deny = lost_rqstp->lr_dg_deny; 30527c478bd9Sstevel@tonic-gate 30537c478bd9Sstevel@tonic-gate *action = NR_LOST_STATE_RQST; 30547c478bd9Sstevel@tonic-gate } else if (lost_rqstp->lr_op == OP_CLOSE) { 30557c478bd9Sstevel@tonic-gate ASSERT(lost_rqstp->lr_oop); 30567c478bd9Sstevel@tonic-gate *action = NR_LOST_STATE_RQST; 30577c478bd9Sstevel@tonic-gate } else if (lost_rqstp->lr_op == OP_DELEGRETURN) { 30587c478bd9Sstevel@tonic-gate *action = NR_LOST_STATE_RQST; 30597c478bd9Sstevel@tonic-gate } else { 30607c478bd9Sstevel@tonic-gate #ifdef DEBUG 30617c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "nfs4_save_lost_rqst: bad op %d", 30627c478bd9Sstevel@tonic-gate lost_rqstp->lr_op); 30637c478bd9Sstevel@tonic-gate #endif 30647c478bd9Sstevel@tonic-gate nfs4_queue_event(RE_LOST_STATE_BAD_OP, mi, NULL, 30657c478bd9Sstevel@tonic-gate lost_rqstp->lr_op, lost_rqstp->lr_vp, lost_rqstp->lr_dvp, 30667c478bd9Sstevel@tonic-gate NFS4_OK, NULL, curproc->p_pid, TAG_NONE, TAG_NONE, 0, 0); 30677c478bd9Sstevel@tonic-gate *action = NR_UNUSED; 30687c478bd9Sstevel@tonic-gate recovp->rc_lost_rqst = NULL; 30697c478bd9Sstevel@tonic-gate kmem_free(destp, sizeof (nfs4_lost_rqst_t)); 30707c478bd9Sstevel@tonic-gate return; 30717c478bd9Sstevel@tonic-gate } 30727c478bd9Sstevel@tonic-gate 30737c478bd9Sstevel@tonic-gate destp->lr_op = lost_rqstp->lr_op; 30747c478bd9Sstevel@tonic-gate destp->lr_vp = lost_rqstp->lr_vp; 30757c478bd9Sstevel@tonic-gate if (destp->lr_vp) 30767c478bd9Sstevel@tonic-gate VN_HOLD(destp->lr_vp); 30777c478bd9Sstevel@tonic-gate destp->lr_dvp = lost_rqstp->lr_dvp; 30787c478bd9Sstevel@tonic-gate if (destp->lr_dvp) 30797c478bd9Sstevel@tonic-gate VN_HOLD(destp->lr_dvp); 30807c478bd9Sstevel@tonic-gate destp->lr_oop = lost_rqstp->lr_oop; 30817c478bd9Sstevel@tonic-gate if (destp->lr_oop) 30827c478bd9Sstevel@tonic-gate open_owner_hold(destp->lr_oop); 30837c478bd9Sstevel@tonic-gate destp->lr_osp = lost_rqstp->lr_osp; 30847c478bd9Sstevel@tonic-gate if (destp->lr_osp) 30857c478bd9Sstevel@tonic-gate open_stream_hold(destp->lr_osp); 30867c478bd9Sstevel@tonic-gate destp->lr_lop = lost_rqstp->lr_lop; 30877c478bd9Sstevel@tonic-gate if (destp->lr_lop) 30887c478bd9Sstevel@tonic-gate lock_owner_hold(destp->lr_lop); 30897c478bd9Sstevel@tonic-gate destp->lr_cr = lost_rqstp->lr_cr; 30907c478bd9Sstevel@tonic-gate if (destp->lr_cr) 30917c478bd9Sstevel@tonic-gate crhold(destp->lr_cr); 30927c478bd9Sstevel@tonic-gate if (lost_rqstp->lr_flk == NULL) 30937c478bd9Sstevel@tonic-gate destp->lr_flk = NULL; 30947c478bd9Sstevel@tonic-gate else { 30957c478bd9Sstevel@tonic-gate destp->lr_flk = kmem_alloc(sizeof (flock64_t), KM_SLEEP); 30967c478bd9Sstevel@tonic-gate *destp->lr_flk = *lost_rqstp->lr_flk; 30977c478bd9Sstevel@tonic-gate } 30987c478bd9Sstevel@tonic-gate destp->lr_putfirst = lost_rqstp->lr_putfirst; 30997c478bd9Sstevel@tonic-gate } 31007c478bd9Sstevel@tonic-gate 31017c478bd9Sstevel@tonic-gate /* 31027c478bd9Sstevel@tonic-gate * Map the given return values (errno and nfs4 status code) to a recovery 31037c478bd9Sstevel@tonic-gate * action and fill in the following fields of recovp: rc_action, 31047c478bd9Sstevel@tonic-gate * rc_srv_reboot, rc_stateid, rc_lost_rqst. 31057c478bd9Sstevel@tonic-gate */ 31067c478bd9Sstevel@tonic-gate 31077c478bd9Sstevel@tonic-gate void 31087c478bd9Sstevel@tonic-gate errs_to_action(recov_info_t *recovp, 31097c478bd9Sstevel@tonic-gate nfs4_server_t *sp, mntinfo4_t *mi, stateid4 *sidp, 31107c478bd9Sstevel@tonic-gate nfs4_lost_rqst_t *lost_rqstp, int unmounted, nfs_opnum4 op, 31117c478bd9Sstevel@tonic-gate nfs4_bseqid_entry_t *bsep) 31127c478bd9Sstevel@tonic-gate { 31137c478bd9Sstevel@tonic-gate nfs4_recov_t action = NR_UNUSED; 31147c478bd9Sstevel@tonic-gate bool_t reboot = FALSE; 31157c478bd9Sstevel@tonic-gate int try_f; 31167c478bd9Sstevel@tonic-gate int error = recovp->rc_orig_errors.error; 31177c478bd9Sstevel@tonic-gate nfsstat4 stat = recovp->rc_orig_errors.stat; 31187c478bd9Sstevel@tonic-gate 31197c478bd9Sstevel@tonic-gate bzero(&recovp->rc_stateid, sizeof (stateid4)); 31207c478bd9Sstevel@tonic-gate recovp->rc_lost_rqst = NULL; 31217c478bd9Sstevel@tonic-gate recovp->rc_bseqid_rqst = NULL; 31227c478bd9Sstevel@tonic-gate 31237c478bd9Sstevel@tonic-gate try_f = nfs4_try_failover(&recovp->rc_orig_errors) && 31247c478bd9Sstevel@tonic-gate FAILOVER_MOUNT4(mi); 31257c478bd9Sstevel@tonic-gate 31267c478bd9Sstevel@tonic-gate /* 31277c478bd9Sstevel@tonic-gate * We start recovery for EINTR only in the lost lock 31287c478bd9Sstevel@tonic-gate * or lost open/close case. 31297c478bd9Sstevel@tonic-gate */ 31307c478bd9Sstevel@tonic-gate 31317c478bd9Sstevel@tonic-gate if (try_f || error == EINTR || (error == EIO && unmounted)) { 31327c478bd9Sstevel@tonic-gate recovp->rc_error = (error != 0 ? error : geterrno4(stat)); 31337c478bd9Sstevel@tonic-gate if (lost_rqstp) { 31347c478bd9Sstevel@tonic-gate ASSERT(lost_rqstp->lr_op != 0); 31357c478bd9Sstevel@tonic-gate nfs4_save_lost_rqst(lost_rqstp, recovp, &action, mi); 31367c478bd9Sstevel@tonic-gate } 31377c478bd9Sstevel@tonic-gate if (try_f) 31387c478bd9Sstevel@tonic-gate action = NR_FAILOVER; 31397c478bd9Sstevel@tonic-gate } else if (error != 0) { 31407c478bd9Sstevel@tonic-gate recovp->rc_error = error; 31417c478bd9Sstevel@tonic-gate nfs4_queue_event(RE_UNEXPECTED_ERRNO, mi, NULL, error, NULL, 31427c478bd9Sstevel@tonic-gate NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 31437c478bd9Sstevel@tonic-gate action = NR_CLIENTID; 31447c478bd9Sstevel@tonic-gate } else { 31457c478bd9Sstevel@tonic-gate recovp->rc_error = geterrno4(stat); 31467c478bd9Sstevel@tonic-gate switch (stat) { 31477c478bd9Sstevel@tonic-gate #ifdef notyet 31487c478bd9Sstevel@tonic-gate case NFS4ERR_LEASE_MOVED: 31497c478bd9Sstevel@tonic-gate action = xxx; 31507c478bd9Sstevel@tonic-gate break; 31517c478bd9Sstevel@tonic-gate #endif 31522f172c55SRobert Thurlow case NFS4ERR_MOVED: 31532f172c55SRobert Thurlow action = NR_MOVED; 31542f172c55SRobert Thurlow break; 31557c478bd9Sstevel@tonic-gate case NFS4ERR_BADHANDLE: 31567c478bd9Sstevel@tonic-gate action = NR_BADHANDLE; 31577c478bd9Sstevel@tonic-gate break; 31587c478bd9Sstevel@tonic-gate case NFS4ERR_BAD_SEQID: 31597c478bd9Sstevel@tonic-gate if (bsep) 31607c478bd9Sstevel@tonic-gate save_bseqid_rqst(bsep, recovp); 31617c478bd9Sstevel@tonic-gate action = NR_BAD_SEQID; 31627c478bd9Sstevel@tonic-gate break; 31637c478bd9Sstevel@tonic-gate case NFS4ERR_OLD_STATEID: 31647c478bd9Sstevel@tonic-gate action = NR_OLDSTATEID; 31657c478bd9Sstevel@tonic-gate break; 31667c478bd9Sstevel@tonic-gate case NFS4ERR_WRONGSEC: 31677c478bd9Sstevel@tonic-gate action = NR_WRONGSEC; 31687c478bd9Sstevel@tonic-gate break; 31697c478bd9Sstevel@tonic-gate case NFS4ERR_FHEXPIRED: 31707c478bd9Sstevel@tonic-gate action = NR_FHEXPIRED; 31717c478bd9Sstevel@tonic-gate break; 31727c478bd9Sstevel@tonic-gate case NFS4ERR_BAD_STATEID: 31737c478bd9Sstevel@tonic-gate if (sp == NULL || (sp != NULL && inlease(sp))) { 31747c478bd9Sstevel@tonic-gate 31757c478bd9Sstevel@tonic-gate action = NR_BAD_STATEID; 31767c478bd9Sstevel@tonic-gate if (sidp) 31777c478bd9Sstevel@tonic-gate recovp->rc_stateid = *sidp; 31787c478bd9Sstevel@tonic-gate } else 31797c478bd9Sstevel@tonic-gate action = NR_CLIENTID; 31807c478bd9Sstevel@tonic-gate break; 31817c478bd9Sstevel@tonic-gate case NFS4ERR_EXPIRED: 31827c478bd9Sstevel@tonic-gate /* 31837c478bd9Sstevel@tonic-gate * The client's lease has expired, either due 31847c478bd9Sstevel@tonic-gate * to a network partition or perhaps a client 31857c478bd9Sstevel@tonic-gate * error. In either case, try an NR_CLIENTID 31867c478bd9Sstevel@tonic-gate * style recovery. reboot remains false, since 31877c478bd9Sstevel@tonic-gate * there is no evidence the server has rebooted. 31887c478bd9Sstevel@tonic-gate * This will cause CLAIM_NULL opens and lock 31897c478bd9Sstevel@tonic-gate * requests without the reclaim bit. 31907c478bd9Sstevel@tonic-gate */ 31917c478bd9Sstevel@tonic-gate action = NR_CLIENTID; 31927c478bd9Sstevel@tonic-gate 31937c478bd9Sstevel@tonic-gate DTRACE_PROBE4(nfs4__expired, 31947c478bd9Sstevel@tonic-gate nfs4_server_t *, sp, 31957c478bd9Sstevel@tonic-gate mntinfo4_t *, mi, 31967c478bd9Sstevel@tonic-gate stateid4 *, sidp, int, op); 31977c478bd9Sstevel@tonic-gate 31987c478bd9Sstevel@tonic-gate break; 31997c478bd9Sstevel@tonic-gate case NFS4ERR_STALE_CLIENTID: 32007c478bd9Sstevel@tonic-gate case NFS4ERR_STALE_STATEID: 32017c478bd9Sstevel@tonic-gate action = NR_CLIENTID; 32027c478bd9Sstevel@tonic-gate reboot = TRUE; 32037c478bd9Sstevel@tonic-gate break; 32047c478bd9Sstevel@tonic-gate case NFS4ERR_RESOURCE: 32057c478bd9Sstevel@tonic-gate /* 32067c478bd9Sstevel@tonic-gate * If this had been a FAILOVER mount, then 32077c478bd9Sstevel@tonic-gate * we'd have tried failover. Since it's not, 32087c478bd9Sstevel@tonic-gate * just delay a while and retry. 32097c478bd9Sstevel@tonic-gate */ 32107c478bd9Sstevel@tonic-gate action = NR_DELAY; 32117c478bd9Sstevel@tonic-gate break; 32127c478bd9Sstevel@tonic-gate case NFS4ERR_GRACE: 32137c478bd9Sstevel@tonic-gate action = NR_GRACE; 32147c478bd9Sstevel@tonic-gate break; 32157c478bd9Sstevel@tonic-gate case NFS4ERR_DELAY: 32167c478bd9Sstevel@tonic-gate action = NR_DELAY; 32177c478bd9Sstevel@tonic-gate break; 32187c478bd9Sstevel@tonic-gate case NFS4ERR_STALE: 32197c478bd9Sstevel@tonic-gate action = NR_STALE; 32207c478bd9Sstevel@tonic-gate break; 32217c478bd9Sstevel@tonic-gate default: 32227c478bd9Sstevel@tonic-gate nfs4_queue_event(RE_UNEXPECTED_STATUS, mi, NULL, 0, 32237c478bd9Sstevel@tonic-gate NULL, NULL, stat, NULL, 0, TAG_NONE, TAG_NONE, 32247c478bd9Sstevel@tonic-gate 0, 0); 32257c478bd9Sstevel@tonic-gate action = NR_CLIENTID; 32267c478bd9Sstevel@tonic-gate break; 32277c478bd9Sstevel@tonic-gate } 32287c478bd9Sstevel@tonic-gate } 32297c478bd9Sstevel@tonic-gate 32307c478bd9Sstevel@tonic-gate /* make sure action got set */ 32317c478bd9Sstevel@tonic-gate ASSERT(action != NR_UNUSED); 32327c478bd9Sstevel@tonic-gate recovp->rc_srv_reboot = reboot; 32337c478bd9Sstevel@tonic-gate recovp->rc_action = action; 32347c478bd9Sstevel@tonic-gate nfs4_queue_fact(RF_ERR, mi, stat, action, op, reboot, NULL, error, 32357c478bd9Sstevel@tonic-gate NULL); 32367c478bd9Sstevel@tonic-gate } 32377c478bd9Sstevel@tonic-gate 32387c478bd9Sstevel@tonic-gate /* 32397c478bd9Sstevel@tonic-gate * Return the (held) credential for the process with the given pid. 32407c478bd9Sstevel@tonic-gate * May return NULL (e.g., process not found). 32417c478bd9Sstevel@tonic-gate */ 32427c478bd9Sstevel@tonic-gate 32437c478bd9Sstevel@tonic-gate static cred_t * 32447c478bd9Sstevel@tonic-gate pid_to_cr(pid_t pid) 32457c478bd9Sstevel@tonic-gate { 32467c478bd9Sstevel@tonic-gate proc_t *p; 32477c478bd9Sstevel@tonic-gate cred_t *cr; 32487c478bd9Sstevel@tonic-gate 32497c478bd9Sstevel@tonic-gate mutex_enter(&pidlock); 32507c478bd9Sstevel@tonic-gate if ((p = prfind(pid)) == NULL) { 32517c478bd9Sstevel@tonic-gate mutex_exit(&pidlock); 32527c478bd9Sstevel@tonic-gate return (NULL); 32537c478bd9Sstevel@tonic-gate } 32547c478bd9Sstevel@tonic-gate 32557c478bd9Sstevel@tonic-gate mutex_enter(&p->p_crlock); 32567c478bd9Sstevel@tonic-gate crhold(cr = p->p_cred); 32577c478bd9Sstevel@tonic-gate mutex_exit(&p->p_crlock); 32587c478bd9Sstevel@tonic-gate mutex_exit(&pidlock); 32597c478bd9Sstevel@tonic-gate 32607c478bd9Sstevel@tonic-gate return (cr); 32617c478bd9Sstevel@tonic-gate } 32627c478bd9Sstevel@tonic-gate 32637c478bd9Sstevel@tonic-gate /* 32647c478bd9Sstevel@tonic-gate * Send SIGLOST to the given process and queue the event. 32657c478bd9Sstevel@tonic-gate * 32667c478bd9Sstevel@tonic-gate * The 'dump' boolean tells us whether this action should dump the 32677c478bd9Sstevel@tonic-gate * in-kernel queue of recovery messages or not. 32687c478bd9Sstevel@tonic-gate */ 32697c478bd9Sstevel@tonic-gate 32707c478bd9Sstevel@tonic-gate void 32717c478bd9Sstevel@tonic-gate nfs4_send_siglost(pid_t pid, mntinfo4_t *mi, vnode_t *vp, bool_t dump, 32727c478bd9Sstevel@tonic-gate int error, nfsstat4 stat) 32737c478bd9Sstevel@tonic-gate { 32747c478bd9Sstevel@tonic-gate proc_t *p; 32757c478bd9Sstevel@tonic-gate 32767c478bd9Sstevel@tonic-gate mutex_enter(&pidlock); 32777c478bd9Sstevel@tonic-gate p = prfind(pid); 32787c478bd9Sstevel@tonic-gate if (p) 32797c478bd9Sstevel@tonic-gate psignal(p, SIGLOST); 32807c478bd9Sstevel@tonic-gate mutex_exit(&pidlock); 32817c478bd9Sstevel@tonic-gate nfs4_queue_event(dump ? RE_SIGLOST : RE_SIGLOST_NO_DUMP, mi, 32827c478bd9Sstevel@tonic-gate NULL, error, vp, NULL, stat, NULL, pid, TAG_NONE, TAG_NONE, 0, 0); 32837c478bd9Sstevel@tonic-gate } 32847c478bd9Sstevel@tonic-gate 32857c478bd9Sstevel@tonic-gate /* 3286ed076bbfSMarcel Telka * Scan the lock list for entries that match the given pid. Unregister those 3287ed076bbfSMarcel Telka * locks that do and change their pid to NOPID. 32887c478bd9Sstevel@tonic-gate */ 32897c478bd9Sstevel@tonic-gate 32907c478bd9Sstevel@tonic-gate static void 3291ed076bbfSMarcel Telka relock_skip_pid(vnode_t *vp, locklist_t *llp, pid_t pid) 32927c478bd9Sstevel@tonic-gate { 32937c478bd9Sstevel@tonic-gate for (; llp != NULL; llp = llp->ll_next) { 3294ed076bbfSMarcel Telka if (llp->ll_flock.l_pid == pid) { 3295ed076bbfSMarcel Telka int r; 3296ed076bbfSMarcel Telka 3297ed076bbfSMarcel Telka /* 3298ed076bbfSMarcel Telka * Unregister the lost lock. 3299ed076bbfSMarcel Telka */ 3300ed076bbfSMarcel Telka llp->ll_flock.l_type = F_UNLCK; 3301ed076bbfSMarcel Telka r = reclock(vp, &llp->ll_flock, SETFLCK, FREAD | FWRITE, 3302ed076bbfSMarcel Telka 0, NULL); 3303ed076bbfSMarcel Telka /* The unlock cannot fail */ 3304ed076bbfSMarcel Telka ASSERT(r == 0); 3305ed076bbfSMarcel Telka 33067c478bd9Sstevel@tonic-gate llp->ll_flock.l_pid = NOPID; 33077c478bd9Sstevel@tonic-gate } 33087c478bd9Sstevel@tonic-gate } 3309ed076bbfSMarcel Telka } 33107c478bd9Sstevel@tonic-gate 33117c478bd9Sstevel@tonic-gate /* 33127c478bd9Sstevel@tonic-gate * Mark a file as having failed recovery, after making a last-ditch effort 33137c478bd9Sstevel@tonic-gate * to return any delegation. 33147c478bd9Sstevel@tonic-gate * 33157c478bd9Sstevel@tonic-gate * Sets r_error to EIO or ESTALE for the given vnode. 33167c478bd9Sstevel@tonic-gate */ 33177c478bd9Sstevel@tonic-gate void 33187c478bd9Sstevel@tonic-gate nfs4_fail_recov(vnode_t *vp, char *why, int error, nfsstat4 stat) 33197c478bd9Sstevel@tonic-gate { 33207c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(vp); 33217c478bd9Sstevel@tonic-gate 33227c478bd9Sstevel@tonic-gate #ifdef DEBUG 33237c478bd9Sstevel@tonic-gate if (nfs4_fail_recov_stop) 33247c478bd9Sstevel@tonic-gate debug_enter("nfs4_fail_recov"); 33257c478bd9Sstevel@tonic-gate #endif 33267c478bd9Sstevel@tonic-gate 33277c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 33287c478bd9Sstevel@tonic-gate if (rp->r_flags & (R4RECOVERR|R4RECOVERRP)) { 33297c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 33307c478bd9Sstevel@tonic-gate return; 33317c478bd9Sstevel@tonic-gate } 33327c478bd9Sstevel@tonic-gate 33337c478bd9Sstevel@tonic-gate /* 33347c478bd9Sstevel@tonic-gate * Set R4RECOVERRP to indicate that a recovery error is in 33357c478bd9Sstevel@tonic-gate * progress. This will shut down reads and writes at the top 33367c478bd9Sstevel@tonic-gate * half. Don't set R4RECOVERR until after we've returned the 33377c478bd9Sstevel@tonic-gate * delegation, otherwise it will fail. 33387c478bd9Sstevel@tonic-gate */ 33397c478bd9Sstevel@tonic-gate 33407c478bd9Sstevel@tonic-gate rp->r_flags |= R4RECOVERRP; 33417c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 33427c478bd9Sstevel@tonic-gate 33437c478bd9Sstevel@tonic-gate nfs4delegabandon(rp); 33447c478bd9Sstevel@tonic-gate 33457c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 33467c478bd9Sstevel@tonic-gate rp->r_flags |= (R4RECOVERR | R4STALE); 33477c478bd9Sstevel@tonic-gate rp->r_error = (error == 0 && stat == NFS4ERR_STALE) ? ESTALE : EIO; 33487c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE4_LOCKED(rp); 33497c478bd9Sstevel@tonic-gate if (!(vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) 33507c478bd9Sstevel@tonic-gate nfs4_queue_event(RE_DEAD_FILE, VTOMI4(vp), NULL, error, 33517c478bd9Sstevel@tonic-gate vp, NULL, stat, why, 0, TAG_NONE, TAG_NONE, 0, 0); 33527c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 33537c478bd9Sstevel@tonic-gate 33547c478bd9Sstevel@tonic-gate dnlc_purge_vp(vp); 33557c478bd9Sstevel@tonic-gate } 33567c478bd9Sstevel@tonic-gate 33577c478bd9Sstevel@tonic-gate /* 33587c478bd9Sstevel@tonic-gate * recov_throttle: if the file had the same recovery action within the 33597c478bd9Sstevel@tonic-gate * throttle interval, wait for the throttle interval to finish before 33607c478bd9Sstevel@tonic-gate * proceeding. 33617c478bd9Sstevel@tonic-gate * 33627c478bd9Sstevel@tonic-gate * Side effects: updates the rnode with the current recovery information. 33637c478bd9Sstevel@tonic-gate */ 33647c478bd9Sstevel@tonic-gate 33657c478bd9Sstevel@tonic-gate static void 33667c478bd9Sstevel@tonic-gate recov_throttle(recov_info_t *recovp, vnode_t *vp) 33677c478bd9Sstevel@tonic-gate { 33687c478bd9Sstevel@tonic-gate time_t curtime, time_to_wait; 33697c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(vp); 33707c478bd9Sstevel@tonic-gate 33717c478bd9Sstevel@tonic-gate curtime = gethrestime_sec(); 33727c478bd9Sstevel@tonic-gate 33737c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 33747c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 33757c478bd9Sstevel@tonic-gate "recov_throttle: now: (%d, %ld), last: (%d, %ld)", 33767c478bd9Sstevel@tonic-gate recovp->rc_action, curtime, 33777c478bd9Sstevel@tonic-gate rp->r_recov_act, rp->r_last_recov)); 33787c478bd9Sstevel@tonic-gate if (recovp->rc_action == rp->r_recov_act && 33797c478bd9Sstevel@tonic-gate rp->r_last_recov + recov_err_delay > curtime) { 33807c478bd9Sstevel@tonic-gate time_to_wait = rp->r_last_recov + recov_err_delay - curtime; 33817c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 33827c478bd9Sstevel@tonic-gate delay(SEC_TO_TICK(time_to_wait)); 33837c478bd9Sstevel@tonic-gate curtime = gethrestime_sec(); 33847c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 33857c478bd9Sstevel@tonic-gate } 33867c478bd9Sstevel@tonic-gate 33877c478bd9Sstevel@tonic-gate rp->r_last_recov = curtime; 33887c478bd9Sstevel@tonic-gate rp->r_recov_act = recovp->rc_action; 33897c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 33907c478bd9Sstevel@tonic-gate } 33917c478bd9Sstevel@tonic-gate 33927c478bd9Sstevel@tonic-gate /* 33937c478bd9Sstevel@tonic-gate * React to NFS4ERR_GRACE by setting the time we'll permit 33947c478bd9Sstevel@tonic-gate * the next call to this filesystem. 33957c478bd9Sstevel@tonic-gate */ 33967c478bd9Sstevel@tonic-gate void 33977c478bd9Sstevel@tonic-gate nfs4_set_grace_wait(mntinfo4_t *mi) 33987c478bd9Sstevel@tonic-gate { 33997c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 34007c478bd9Sstevel@tonic-gate /* Mark the time for the future */ 34017c478bd9Sstevel@tonic-gate mi->mi_grace_wait = gethrestime_sec() + nfs4err_delay_time; 34027c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 34037c478bd9Sstevel@tonic-gate } 34047c478bd9Sstevel@tonic-gate 34057c478bd9Sstevel@tonic-gate /* 34067c478bd9Sstevel@tonic-gate * React to MFS4ERR_DELAY by setting the time we'll permit 34077c478bd9Sstevel@tonic-gate * the next call to this vnode. 34087c478bd9Sstevel@tonic-gate */ 34097c478bd9Sstevel@tonic-gate void 34107c478bd9Sstevel@tonic-gate nfs4_set_delay_wait(vnode_t *vp) 34117c478bd9Sstevel@tonic-gate { 34127c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(vp); 34137c478bd9Sstevel@tonic-gate 34147c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 34157c478bd9Sstevel@tonic-gate /* 34167c478bd9Sstevel@tonic-gate * Calculate amount we should delay, initial 34177c478bd9Sstevel@tonic-gate * delay will be short and then we will back off. 34187c478bd9Sstevel@tonic-gate */ 34197c478bd9Sstevel@tonic-gate if (rp->r_delay_interval == 0) 34207c478bd9Sstevel@tonic-gate rp->r_delay_interval = NFS4_INITIAL_DELAY_INTERVAL; 34217c478bd9Sstevel@tonic-gate else 34227c478bd9Sstevel@tonic-gate /* calculate next interval value */ 34237c478bd9Sstevel@tonic-gate rp->r_delay_interval = 34247c478bd9Sstevel@tonic-gate MIN(NFS4_MAX_DELAY_INTERVAL, (rp->r_delay_interval << 1)); 34257c478bd9Sstevel@tonic-gate rp->r_delay_wait = gethrestime_sec() + rp->r_delay_interval; 34267c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 34277c478bd9Sstevel@tonic-gate } 34287c478bd9Sstevel@tonic-gate 34297c478bd9Sstevel@tonic-gate /* 34307c478bd9Sstevel@tonic-gate * The caller is responsible for freeing the returned string. 34317c478bd9Sstevel@tonic-gate */ 34327c478bd9Sstevel@tonic-gate static char * 34337c478bd9Sstevel@tonic-gate nfs4_getsrvnames(mntinfo4_t *mi, size_t *len) 34347c478bd9Sstevel@tonic-gate { 34357c478bd9Sstevel@tonic-gate servinfo4_t *svp; 34367c478bd9Sstevel@tonic-gate char *srvnames; 34377c478bd9Sstevel@tonic-gate char *namep; 34387c478bd9Sstevel@tonic-gate size_t length; 34397c478bd9Sstevel@tonic-gate 34407c478bd9Sstevel@tonic-gate /* 34417c478bd9Sstevel@tonic-gate * Calculate the length of the string required to hold all 34427c478bd9Sstevel@tonic-gate * of the server names plus either a comma or a null 34437c478bd9Sstevel@tonic-gate * character following each individual one. 34447c478bd9Sstevel@tonic-gate */ 34457c478bd9Sstevel@tonic-gate length = 0; 34467c478bd9Sstevel@tonic-gate for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 34477c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 34487c478bd9Sstevel@tonic-gate if (svp->sv_flags & SV4_NOTINUSE) { 34497c478bd9Sstevel@tonic-gate nfs_rw_exit(&svp->sv_lock); 34507c478bd9Sstevel@tonic-gate continue; 34517c478bd9Sstevel@tonic-gate } 34527c478bd9Sstevel@tonic-gate nfs_rw_exit(&svp->sv_lock); 34537c478bd9Sstevel@tonic-gate length += svp->sv_hostnamelen; 34547c478bd9Sstevel@tonic-gate } 34557c478bd9Sstevel@tonic-gate 34567c478bd9Sstevel@tonic-gate srvnames = kmem_alloc(length, KM_SLEEP); 34577c478bd9Sstevel@tonic-gate 34587c478bd9Sstevel@tonic-gate namep = srvnames; 34597c478bd9Sstevel@tonic-gate for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) { 34607c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 34617c478bd9Sstevel@tonic-gate if (svp->sv_flags & SV4_NOTINUSE) { 34627c478bd9Sstevel@tonic-gate nfs_rw_exit(&svp->sv_lock); 34637c478bd9Sstevel@tonic-gate continue; 34647c478bd9Sstevel@tonic-gate } 34657c478bd9Sstevel@tonic-gate nfs_rw_exit(&svp->sv_lock); 34667c478bd9Sstevel@tonic-gate (void) strcpy(namep, svp->sv_hostname); 34677c478bd9Sstevel@tonic-gate namep += svp->sv_hostnamelen - 1; 34687c478bd9Sstevel@tonic-gate *namep++ = ','; 34697c478bd9Sstevel@tonic-gate } 34707c478bd9Sstevel@tonic-gate *--namep = '\0'; 34717c478bd9Sstevel@tonic-gate 34727c478bd9Sstevel@tonic-gate *len = length; 34737c478bd9Sstevel@tonic-gate 34747c478bd9Sstevel@tonic-gate return (srvnames); 34757c478bd9Sstevel@tonic-gate } 34767c478bd9Sstevel@tonic-gate 34777c478bd9Sstevel@tonic-gate static void 34787c478bd9Sstevel@tonic-gate save_bseqid_rqst(nfs4_bseqid_entry_t *bsep, recov_info_t *recovp) 34797c478bd9Sstevel@tonic-gate { 34807c478bd9Sstevel@tonic-gate nfs4_bseqid_entry_t *destp; 34817c478bd9Sstevel@tonic-gate 34827c478bd9Sstevel@tonic-gate destp = kmem_alloc(sizeof (nfs4_bseqid_entry_t), KM_SLEEP); 34837c478bd9Sstevel@tonic-gate recovp->rc_bseqid_rqst = destp; 34847c478bd9Sstevel@tonic-gate 34857c478bd9Sstevel@tonic-gate if (bsep->bs_oop) 34867c478bd9Sstevel@tonic-gate open_owner_hold(bsep->bs_oop); 34877c478bd9Sstevel@tonic-gate destp->bs_oop = bsep->bs_oop; 34887c478bd9Sstevel@tonic-gate if (bsep->bs_lop) 34897c478bd9Sstevel@tonic-gate lock_owner_hold(bsep->bs_lop); 34907c478bd9Sstevel@tonic-gate destp->bs_lop = bsep->bs_lop; 34917c478bd9Sstevel@tonic-gate if (bsep->bs_vp) 34927c478bd9Sstevel@tonic-gate VN_HOLD(bsep->bs_vp); 34937c478bd9Sstevel@tonic-gate destp->bs_vp = bsep->bs_vp; 34947c478bd9Sstevel@tonic-gate destp->bs_pid = bsep->bs_pid; 34957c478bd9Sstevel@tonic-gate destp->bs_tag = bsep->bs_tag; 34967c478bd9Sstevel@tonic-gate destp->bs_seqid = bsep->bs_seqid; 34977c478bd9Sstevel@tonic-gate } 34987c478bd9Sstevel@tonic-gate 34997c478bd9Sstevel@tonic-gate static void 35007c478bd9Sstevel@tonic-gate free_bseqid_rqst(nfs4_bseqid_entry_t *bsep) 35017c478bd9Sstevel@tonic-gate { 35027c478bd9Sstevel@tonic-gate if (bsep->bs_oop) 35037c478bd9Sstevel@tonic-gate open_owner_rele(bsep->bs_oop); 35047c478bd9Sstevel@tonic-gate if (bsep->bs_lop) 35057c478bd9Sstevel@tonic-gate lock_owner_rele(bsep->bs_lop); 35067c478bd9Sstevel@tonic-gate if (bsep->bs_vp) 35077c478bd9Sstevel@tonic-gate VN_RELE(bsep->bs_vp); 35087c478bd9Sstevel@tonic-gate kmem_free(bsep, sizeof (nfs4_bseqid_entry_t)); 35097c478bd9Sstevel@tonic-gate } 35107c478bd9Sstevel@tonic-gate 35117c478bd9Sstevel@tonic-gate /* 35127c478bd9Sstevel@tonic-gate * We don't actually fully recover from NFS4ERR_BAD_SEQID. We 35137c478bd9Sstevel@tonic-gate * simply mark the open owner and open stream (if provided) as "bad". 35147c478bd9Sstevel@tonic-gate * Then future uses of these data structures will be limited to basically 35157c478bd9Sstevel@tonic-gate * just cleaning up the internal client state (no going OTW). 35167c478bd9Sstevel@tonic-gate * 35177c478bd9Sstevel@tonic-gate * The result of this is to return errors back to the app/usr when 35187c478bd9Sstevel@tonic-gate * we receive NFS4ERR_BAD_SEQID, but also allow future/new calls to 35197c478bd9Sstevel@tonic-gate * succeed so progress can be made. 35207c478bd9Sstevel@tonic-gate */ 35217c478bd9Sstevel@tonic-gate void 35227c478bd9Sstevel@tonic-gate recov_bad_seqid(recov_info_t *recovp) 35237c478bd9Sstevel@tonic-gate { 35247c478bd9Sstevel@tonic-gate mntinfo4_t *mi = recovp->rc_mi; 35257c478bd9Sstevel@tonic-gate nfs4_open_owner_t *bad_oop; 35267c478bd9Sstevel@tonic-gate nfs4_lock_owner_t *bad_lop; 35277c478bd9Sstevel@tonic-gate vnode_t *vp; 35287c478bd9Sstevel@tonic-gate rnode4_t *rp = NULL; 35297c478bd9Sstevel@tonic-gate pid_t pid; 35307c478bd9Sstevel@tonic-gate nfs4_bseqid_entry_t *bsep, *tbsep; 35317c478bd9Sstevel@tonic-gate int error; 35327c478bd9Sstevel@tonic-gate 35337c478bd9Sstevel@tonic-gate ASSERT(mi != NULL); 35347c478bd9Sstevel@tonic-gate ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 35357c478bd9Sstevel@tonic-gate 35367c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 35377c478bd9Sstevel@tonic-gate bsep = list_head(&mi->mi_bseqid_list); 35387c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 35397c478bd9Sstevel@tonic-gate 35407c478bd9Sstevel@tonic-gate /* 35417c478bd9Sstevel@tonic-gate * Handle all the bad seqid entries on mi's list. 35427c478bd9Sstevel@tonic-gate */ 35437c478bd9Sstevel@tonic-gate while (bsep != NULL) { 35447c478bd9Sstevel@tonic-gate bad_oop = bsep->bs_oop; 35457c478bd9Sstevel@tonic-gate bad_lop = bsep->bs_lop; 35467c478bd9Sstevel@tonic-gate vp = bsep->bs_vp; 35477c478bd9Sstevel@tonic-gate pid = bsep->bs_pid; 35487c478bd9Sstevel@tonic-gate 35497c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 35507c478bd9Sstevel@tonic-gate "recov_bad_seqid: mark oop %p lop %p as bad for " 35517c478bd9Sstevel@tonic-gate "vp %p tag %s pid %d: last good seqid %d for tag %s", 35527c478bd9Sstevel@tonic-gate (void *)bad_oop, (void *)bad_lop, (void *)vp, 35537c478bd9Sstevel@tonic-gate nfs4_ctags[bsep->bs_tag].ct_str, pid, 35547c478bd9Sstevel@tonic-gate bad_oop ? bad_oop->oo_last_good_seqid : 0, 35557c478bd9Sstevel@tonic-gate bad_oop ? nfs4_ctags[bad_oop->oo_last_good_op].ct_str : 35567c478bd9Sstevel@tonic-gate nfs4_ctags[TAG_NONE].ct_str)); 35577c478bd9Sstevel@tonic-gate 35587c478bd9Sstevel@tonic-gate nfs4_queue_event(RE_BAD_SEQID, mi, NULL, 35597c478bd9Sstevel@tonic-gate 0, vp, NULL, NFS4ERR_BAD_SEQID, NULL, pid, bsep->bs_tag, 35607c478bd9Sstevel@tonic-gate bad_oop ? bad_oop->oo_last_good_op : TAG_NONE, 35617c478bd9Sstevel@tonic-gate bsep->bs_seqid, bad_oop ? bad_oop->oo_last_good_seqid : 0); 35627c478bd9Sstevel@tonic-gate 35637c478bd9Sstevel@tonic-gate if (bad_oop) { 35647c478bd9Sstevel@tonic-gate /* essentially reset the open owner */ 35657c478bd9Sstevel@tonic-gate error = nfs4_start_open_seqid_sync(bad_oop, mi); 35667c478bd9Sstevel@tonic-gate ASSERT(!error); /* recov thread always succeeds */ 35677c478bd9Sstevel@tonic-gate bad_oop->oo_name = nfs4_get_new_oo_name(); 35687c478bd9Sstevel@tonic-gate bad_oop->oo_seqid = 0; 35697c478bd9Sstevel@tonic-gate nfs4_end_open_seqid_sync(bad_oop); 35707c478bd9Sstevel@tonic-gate } 35717c478bd9Sstevel@tonic-gate 35727c478bd9Sstevel@tonic-gate if (bad_lop) { 35737c478bd9Sstevel@tonic-gate mutex_enter(&bad_lop->lo_lock); 35747c478bd9Sstevel@tonic-gate bad_lop->lo_flags |= NFS4_BAD_SEQID_LOCK; 35757c478bd9Sstevel@tonic-gate mutex_exit(&bad_lop->lo_lock); 35767c478bd9Sstevel@tonic-gate 35777c478bd9Sstevel@tonic-gate ASSERT(vp != NULL); 35787c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 35797c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 35807c478bd9Sstevel@tonic-gate rp->r_flags |= R4LODANGLERS; 35817c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 35827c478bd9Sstevel@tonic-gate 35837c478bd9Sstevel@tonic-gate nfs4_send_siglost(pid, mi, vp, TRUE, 35847c478bd9Sstevel@tonic-gate 0, NFS4ERR_BAD_SEQID); 35857c478bd9Sstevel@tonic-gate } 35867c478bd9Sstevel@tonic-gate 35877c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 35887c478bd9Sstevel@tonic-gate list_remove(&mi->mi_bseqid_list, bsep); 35897c478bd9Sstevel@tonic-gate tbsep = bsep; 35907c478bd9Sstevel@tonic-gate bsep = list_head(&mi->mi_bseqid_list); 35917c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 35927c478bd9Sstevel@tonic-gate free_bseqid_rqst(tbsep); 35937c478bd9Sstevel@tonic-gate } 35947c478bd9Sstevel@tonic-gate 35957c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 35967c478bd9Sstevel@tonic-gate mi->mi_recovflags &= ~MI4R_BAD_SEQID; 35977c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 35987c478bd9Sstevel@tonic-gate } 3599