xref: /freebsd/sys/fs/nfs/nfs_commonkrpc.c (revision a6e527f893df2cbbd941839a93e50ae39ac0db55)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1991, 1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35 
36 #include <sys/cdefs.h>
37 /*
38  * Socket operations for use by nfs
39  */
40 
41 #include "opt_kgssapi.h"
42 #include "opt_nfs.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/limits.h>
48 #include <sys/lock.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/mount.h>
52 #include <sys/mutex.h>
53 #include <sys/proc.h>
54 #include <sys/signalvar.h>
55 #include <sys/syscallsubr.h>
56 #include <sys/sysctl.h>
57 #include <sys/syslog.h>
58 #include <sys/vnode.h>
59 
60 #include <rpc/rpc.h>
61 #include <rpc/krpc.h>
62 
63 #include <kgssapi/krb5/kcrypto.h>
64 
65 #include <fs/nfs/nfsport.h>
66 
67 #ifdef KDTRACE_HOOKS
68 #include <sys/dtrace_bsd.h>
69 
70 dtrace_nfsclient_nfs23_start_probe_func_t
71 		dtrace_nfscl_nfs234_start_probe;
72 
73 dtrace_nfsclient_nfs23_done_probe_func_t
74 		dtrace_nfscl_nfs234_done_probe;
75 
76 /*
77  * Registered probes by RPC type.
78  */
79 uint32_t	nfscl_nfs2_start_probes[NFSV41_NPROCS + 1];
80 uint32_t	nfscl_nfs2_done_probes[NFSV41_NPROCS + 1];
81 
82 uint32_t	nfscl_nfs3_start_probes[NFSV41_NPROCS + 1];
83 uint32_t	nfscl_nfs3_done_probes[NFSV41_NPROCS + 1];
84 
85 uint32_t	nfscl_nfs4_start_probes[NFSV41_NPROCS + 1];
86 uint32_t	nfscl_nfs4_done_probes[NFSV41_NPROCS + 1];
87 #endif
88 
89 NFSSTATESPINLOCK;
90 NFSREQSPINLOCK;
91 NFSDLOCKMUTEX;
92 NFSCLSTATEMUTEX;
93 extern struct nfsstatsv1 nfsstatsv1;
94 extern struct nfsreqhead nfsd_reqq;
95 extern int nfscl_ticks;
96 extern void (*ncl_call_invalcaches)(struct vnode *);
97 extern int nfs_numnfscbd;
98 extern int nfscl_debuglevel;
99 extern int nfsrv_lease;
100 
101 SVCPOOL		*nfscbd_pool;
102 int		nfs_bufpackets = 4;
103 static int	nfsrv_gsscallbackson = 0;
104 static int	nfs_reconnects;
105 static int	nfs3_jukebox_delay = 10;
106 static int	nfs_skip_wcc_data_onerr = 1;
107 static int	nfs_dsretries = 2;
108 static struct timespec	nfs_trylater_max = {
109 	.tv_sec		= NFS_TRYLATERDEL,
110 	.tv_nsec	= 0,
111 };
112 
113 SYSCTL_DECL(_vfs_nfs);
114 
115 SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0,
116     "Buffer reservation size 2 < x < 64");
117 SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0,
118     "Number of times the nfs client has had to reconnect");
119 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0,
120     "Number of seconds to delay a retry after receiving EJUKEBOX");
121 SYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0,
122     "Disable weak cache consistency checking when server returns an error");
123 SYSCTL_INT(_vfs_nfs, OID_AUTO, dsretries, CTLFLAG_RW, &nfs_dsretries, 0,
124     "Number of retries for a DS RPC before failure");
125 
126 static void	nfs_down(struct nfsmount *, struct thread *, const char *,
127     int, int);
128 static void	nfs_up(struct nfsmount *, struct thread *, const char *,
129     int, int);
130 static int	nfs_msg(struct thread *, const char *, const char *, int);
131 
132 struct nfs_cached_auth {
133 	int		ca_refs; /* refcount, including 1 from the cache */
134 	uid_t		ca_uid;	 /* uid that corresponds to this auth */
135 	AUTH		*ca_auth; /* RPC auth handle */
136 };
137 
138 static int nfsv2_procid[NFS_V3NPROCS] = {
139 	NFSV2PROC_NULL,
140 	NFSV2PROC_GETATTR,
141 	NFSV2PROC_SETATTR,
142 	NFSV2PROC_LOOKUP,
143 	NFSV2PROC_NOOP,
144 	NFSV2PROC_READLINK,
145 	NFSV2PROC_READ,
146 	NFSV2PROC_WRITE,
147 	NFSV2PROC_CREATE,
148 	NFSV2PROC_MKDIR,
149 	NFSV2PROC_SYMLINK,
150 	NFSV2PROC_CREATE,
151 	NFSV2PROC_REMOVE,
152 	NFSV2PROC_RMDIR,
153 	NFSV2PROC_RENAME,
154 	NFSV2PROC_LINK,
155 	NFSV2PROC_READDIR,
156 	NFSV2PROC_NOOP,
157 	NFSV2PROC_STATFS,
158 	NFSV2PROC_NOOP,
159 	NFSV2PROC_NOOP,
160 	NFSV2PROC_NOOP,
161 };
162 
163 /*
164  * This static array indicates that a NFSv4 RPC should use
165  * RPCSEC_GSS, if the mount indicates that via sec=krb5[ip].
166  * System RPCs that do not use file handles will be false
167  * in this array so that they will use AUTH_SYS when the
168  * "syskrb5" mount option is specified, along with
169  * "sec=krb5[ip]".
170  */
171 static bool nfscl_use_gss[NFSV42_NPROCS] = {
172 	true,
173 	true,
174 	true,
175 	true,
176 	true,
177 	true,
178 	true,
179 	true,
180 	true,
181 	true,
182 	true,
183 	true,
184 	true,
185 	true,
186 	true,
187 	true,
188 	true,
189 	true,
190 	true,
191 	true,
192 	true,
193 	true,
194 	true,
195 	false,		/* SetClientID */
196 	false,		/* SetClientIDConfirm */
197 	true,
198 	true,
199 	true,
200 	true,
201 	true,
202 	true,
203 	true,
204 	false,		/* Renew */
205 	true,
206 	false,		/* ReleaseLockOwn */
207 	true,
208 	true,
209 	true,
210 	true,
211 	true,
212 	true,
213 	false,		/* ExchangeID */
214 	false,		/* CreateSession */
215 	false,		/* DestroySession */
216 	false,		/* DestroyClientID */
217 	false,		/* FreeStateID */
218 	true,
219 	true,
220 	true,
221 	true,
222 	false,		/* ReclaimComplete */
223 	true,
224 	true,
225 	true,
226 	true,
227 	true,
228 	true,
229 	true,
230 	true,
231 	true,
232 	true,
233 	true,
234 	true,
235 	true,
236 	true,
237 	false,		/* BindConnectionToSession */
238 	true,
239 	true,
240 	true,
241 	true,
242 	true,
243 };
244 
245 /*
246  * Initialize sockets and congestion for a new NFS connection.
247  * We do not free the sockaddr if error.
248  * Which arguments are set to NULL indicate what kind of call it is.
249  * cred == NULL --> a call to connect to a pNFS DS
250  * nmp == NULL --> indicates an upcall to userland or a NFSv4.0 callback
251  */
252 int
newnfs_connect(struct nfsmount * nmp,struct nfssockreq * nrp,struct ucred * cred,NFSPROC_T * p,int callback_retry_mult,bool dotls,struct __rpc_client ** clipp)253 newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp,
254     struct ucred *cred, NFSPROC_T *p, int callback_retry_mult, bool dotls,
255     struct __rpc_client **clipp)
256 {
257 	int rcvreserve, sndreserve;
258 	int pktscale, pktscalesav;
259 	struct sockaddr *saddr;
260 	struct ucred *origcred;
261 	CLIENT *client;
262 	struct netconfig *nconf;
263 	struct socket *so;
264 	int one = 1, retries, error = 0;
265 	struct thread *td = curthread;
266 	SVCXPRT *xprt;
267 	struct timeval timo;
268 	uint64_t tval;
269 
270 	/*
271 	 * We need to establish the socket using the credentials of
272 	 * the mountpoint.  Some parts of this process (such as
273 	 * sobind() and soconnect()) will use the curent thread's
274 	 * credential instead of the socket credential.  To work
275 	 * around this, temporarily change the current thread's
276 	 * credential to that of the mountpoint.
277 	 *
278 	 * XXX: It would be better to explicitly pass the correct
279 	 * credential to sobind() and soconnect().
280 	 */
281 	origcred = td->td_ucred;
282 
283 	/*
284 	 * Use the credential in nr_cred, if not NULL.
285 	 */
286 	if (nrp->nr_cred != NULL)
287 		td->td_ucred = nrp->nr_cred;
288 	else
289 		td->td_ucred = cred;
290 	saddr = nrp->nr_nam;
291 
292 	if (saddr->sa_family == AF_INET)
293 		if (nrp->nr_sotype == SOCK_DGRAM)
294 			nconf = getnetconfigent("udp");
295 		else
296 			nconf = getnetconfigent("tcp");
297 	else
298 		if (nrp->nr_sotype == SOCK_DGRAM)
299 			nconf = getnetconfigent("udp6");
300 		else
301 			nconf = getnetconfigent("tcp6");
302 
303 	pktscale = nfs_bufpackets;
304 	if (pktscale < 2)
305 		pktscale = 2;
306 	if (pktscale > 64)
307 		pktscale = 64;
308 	pktscalesav = pktscale;
309 	/*
310 	 * soreserve() can fail if sb_max is too small, so shrink pktscale
311 	 * and try again if there is an error.
312 	 * Print a log message suggesting increasing sb_max.
313 	 * Creating a socket and doing this is necessary since, if the
314 	 * reservation sizes are too large and will make soreserve() fail,
315 	 * the connection will work until a large send is attempted and
316 	 * then it will loop in the krpc code.
317 	 */
318 	so = NULL;
319 	saddr = NFSSOCKADDR(nrp->nr_nam, struct sockaddr *);
320 	error = socreate(saddr->sa_family, &so, nrp->nr_sotype,
321 	    nrp->nr_soproto, td->td_ucred, td);
322 	if (error != 0)
323 		goto out;
324 	do {
325 	    if (error != 0 && pktscale > 2) {
326 		if (nmp != NULL && nrp->nr_sotype == SOCK_STREAM &&
327 		    pktscale == pktscalesav) {
328 		    /*
329 		     * Suggest vfs.nfs.bufpackets * maximum RPC message,
330 		     * adjusted for the sb_max->sb_max_adj conversion of
331 		     * MCLBYTES / (MSIZE + MCLBYTES) as the minimum setting
332 		     * for kern.ipc.maxsockbuf.
333 		     */
334 		    tval = (NFS_MAXBSIZE + NFS_MAXXDR) * nfs_bufpackets;
335 		    tval *= MSIZE + MCLBYTES;
336 		    tval += MCLBYTES - 1; /* Round up divide by MCLBYTES. */
337 		    tval /= MCLBYTES;
338 		    printf("Consider increasing kern.ipc.maxsockbuf to a "
339 			"minimum of %ju to support %ubyte NFS I/O\n",
340 			(uintmax_t)tval, NFS_MAXBSIZE);
341 		}
342 		pktscale--;
343 	    }
344 	    if (nrp->nr_sotype == SOCK_DGRAM) {
345 		if (nmp != NULL) {
346 			sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) *
347 			    pktscale;
348 			rcvreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) *
349 			    pktscale;
350 		} else {
351 			sndreserve = rcvreserve = 1024 * pktscale;
352 		}
353 	    } else {
354 		if (nrp->nr_sotype != SOCK_STREAM)
355 			panic("nfscon sotype");
356 		if (nmp != NULL) {
357 			sndreserve = (NFS_MAXBSIZE + NFS_MAXXDR) *
358 			    pktscale;
359 			rcvreserve = (NFS_MAXBSIZE + NFS_MAXXDR) *
360 			    pktscale;
361 		} else {
362 			sndreserve = rcvreserve = 1024 * pktscale;
363 		}
364 	    }
365 	    error = soreserve(so, sndreserve, rcvreserve);
366 	    if (error != 0 && nmp != NULL && nrp->nr_sotype == SOCK_STREAM &&
367 		pktscale <= 2)
368 		printf("Must increase kern.ipc.maxsockbuf or reduce"
369 		    " rsize, wsize\n");
370 	} while (error != 0 && pktscale > 2);
371 	soclose(so);
372 	if (error != 0)
373 		goto out;
374 
375 	client = clnt_reconnect_create(nconf, saddr, nrp->nr_prog,
376 	    nrp->nr_vers, sndreserve, rcvreserve);
377 	CLNT_CONTROL(client, CLSET_WAITCHAN, "nfsreq");
378 	if (nmp != NULL) {
379 		if ((nmp->nm_flag & NFSMNT_INT))
380 			CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one);
381 		if ((nmp->nm_flag & NFSMNT_RESVPORT))
382 			CLNT_CONTROL(client, CLSET_PRIVPORT, &one);
383 		if (NFSHASTLS(nmp)) {
384 			CLNT_CONTROL(client, CLSET_TLS, &one);
385 			if (nmp->nm_tlscertname != NULL)
386 				CLNT_CONTROL(client, CLSET_TLSCERTNAME,
387 				    nmp->nm_tlscertname);
388 		}
389 		if (NFSHASSOFT(nmp)) {
390 			if (nmp->nm_sotype == SOCK_DGRAM)
391 				/*
392 				 * For UDP, the large timeout for a reconnect
393 				 * will be set to "nm_retry * nm_timeo / 2", so
394 				 * we only want to do 2 reconnect timeout
395 				 * retries.
396 				 */
397 				retries = 2;
398 			else
399 				retries = nmp->nm_retry;
400 		} else
401 			retries = INT_MAX;
402 		if (NFSHASNFSV4N(nmp)) {
403 			if (cred != NULL) {
404 				if (NFSHASSOFT(nmp)) {
405 					/*
406 					 * This should be a DS mount.
407 					 * Use CLSET_TIMEOUT to set the timeout
408 					 * for connections to DSs instead of
409 					 * specifying a timeout on each RPC.
410 					 * This is done so that SO_SNDTIMEO
411 					 * is set on the TCP socket as well
412 					 * as specifying a time limit when
413 					 * waiting for an RPC reply.  Useful
414 					 * if the send queue for the TCP
415 					 * connection has become constipated,
416 					 * due to a failed DS.
417 					 * The choice of lease_duration / 4 is
418 					 * fairly arbitrary, but seems to work
419 					 * ok, with a lower bound of 10sec.
420 					 */
421 					timo.tv_sec = nfsrv_lease / 4;
422 					if (timo.tv_sec < 10)
423 						timo.tv_sec = 10;
424 					timo.tv_usec = 0;
425 					CLNT_CONTROL(client, CLSET_TIMEOUT,
426 					    &timo);
427 				}
428 				/*
429 				 * Make sure the nfscbd_pool doesn't get
430 				 * destroyed while doing this.
431 				 */
432 				NFSD_LOCK();
433 				if (nfs_numnfscbd > 0) {
434 					nfs_numnfscbd++;
435 					NFSD_UNLOCK();
436 					xprt = svc_vc_create_backchannel(
437 					    nfscbd_pool);
438 					CLNT_CONTROL(client, CLSET_BACKCHANNEL,
439 					    xprt);
440 					NFSD_LOCK();
441 					nfs_numnfscbd--;
442 					if (nfs_numnfscbd == 0)
443 						wakeup(&nfs_numnfscbd);
444 				}
445 				NFSD_UNLOCK();
446 			} else {
447 				/*
448 				 * cred == NULL for a DS connect.
449 				 * For connects to a DS, set a retry limit
450 				 * so that failed DSs will be detected.
451 				 * This is ok for NFSv4.1, since a DS does
452 				 * not maintain open/lock state and is the
453 				 * only case where using a "soft" mount is
454 				 * recommended for NFSv4.
455 				 * For mounts from the MDS to DS, this is done
456 				 * via mount options, but that is not the case
457 				 * here.  The retry limit here can be adjusted
458 				 * via the sysctl vfs.nfs.dsretries.
459 				 * See the comment above w.r.t. timeout.
460 				 */
461 				timo.tv_sec = nfsrv_lease / 4;
462 				if (timo.tv_sec < 10)
463 					timo.tv_sec = 10;
464 				timo.tv_usec = 0;
465 				CLNT_CONTROL(client, CLSET_TIMEOUT, &timo);
466 				retries = nfs_dsretries;
467 			}
468 		}
469 	} else {
470 		/*
471 		 * Three cases:
472 		 * - Null RPC callback to client
473 		 * - Non-Null RPC callback to client, wait a little longer
474 		 * - upcalls to nfsuserd and gssd (clp == NULL)
475 		 */
476 		if (callback_retry_mult == 0) {
477 			retries = NFSV4_UPCALLRETRY;
478 			CLNT_CONTROL(client, CLSET_PRIVPORT, &one);
479 		} else {
480 			retries = NFSV4_CALLBACKRETRY * callback_retry_mult;
481 		}
482 		if (dotls)
483 			CLNT_CONTROL(client, CLSET_TLS, &one);
484 	}
485 	CLNT_CONTROL(client, CLSET_RETRIES, &retries);
486 
487 	if (nmp != NULL) {
488 		/*
489 		 * For UDP, there are 2 timeouts:
490 		 * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer
491 		 *   that does a retransmit of an RPC request using the same
492 		 *   socket and xid. This is what you normally want to do,
493 		 *   since NFS servers depend on "same xid" for their
494 		 *   Duplicate Request Cache.
495 		 * - timeout specified in CLNT_CALL_MBUF(), which specifies when
496 		 *   retransmits on the same socket should fail and a fresh
497 		 *   socket created. Each of these timeouts counts as one
498 		 *   CLSET_RETRIES as set above.
499 		 * Set the initial retransmit timeout for UDP. This timeout
500 		 * doesn't exist for TCP and the following call just fails,
501 		 * which is ok.
502 		 */
503 		timo.tv_sec = nmp->nm_timeo / NFS_HZ;
504 		timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ;
505 		CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo);
506 	}
507 
508 	/*
509 	 * *clipp is &nrp->nr_client or &nm_aconn[nmp->nm_nextaconn].
510 	 * The latter case is for additional connections specified by the
511 	 * "nconnect" mount option.  nr_mtx etc is used for these additional
512 	 * connections, as well as nr_client in the nfssockreq
513 	 * structure for the mount.
514 	 */
515 	mtx_lock(&nrp->nr_mtx);
516 	if (*clipp != NULL) {
517 		mtx_unlock(&nrp->nr_mtx);
518 		/*
519 		 * Someone else already connected.
520 		 */
521 		CLNT_RELEASE(client);
522 	} else {
523 		*clipp = client;
524 		/*
525 		 * Protocols that do not require connections may be optionally
526 		 * left unconnected for servers that reply from a port other
527 		 * than NFS_PORT.
528 		 */
529 		if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) {
530 			mtx_unlock(&nrp->nr_mtx);
531 			CLNT_CONTROL(client, CLSET_CONNECT, &one);
532 		} else
533 			mtx_unlock(&nrp->nr_mtx);
534 	}
535 
536 out:
537 	/* Restore current thread's credentials. */
538 	td->td_ucred = origcred;
539 
540 	NFSEXITCODE(error);
541 	return (error);
542 }
543 
544 /*
545  * NFS disconnect. Clean up and unlink.
546  */
547 void
newnfs_disconnect(struct nfsmount * nmp,struct nfssockreq * nrp)548 newnfs_disconnect(struct nfsmount *nmp, struct nfssockreq *nrp)
549 {
550 	CLIENT *client, *aconn[NFS_MAXNCONN - 1];
551 	int i;
552 
553 	mtx_lock(&nrp->nr_mtx);
554 	if (nrp->nr_client != NULL) {
555 		client = nrp->nr_client;
556 		nrp->nr_client = NULL;
557 		if (nmp != NULL && nmp->nm_aconnect > 0) {
558 			for (i = 0; i < nmp->nm_aconnect; i++) {
559 				aconn[i] = nmp->nm_aconn[i];
560 				nmp->nm_aconn[i] = NULL;
561 			}
562 		}
563 		mtx_unlock(&nrp->nr_mtx);
564 		CURVNET_SET_QUIET(CRED_TO_VNET(nrp->nr_cred));
565 		rpc_gss_secpurge_call(client);
566 		CURVNET_RESTORE();
567 		CLNT_CLOSE(client);
568 		CLNT_RELEASE(client);
569 		if (nmp != NULL && nmp->nm_aconnect > 0) {
570 			for (i = 0; i < nmp->nm_aconnect; i++) {
571 				if (aconn[i] != NULL) {
572 					rpc_gss_secpurge_call(aconn[i]);
573 					CLNT_CLOSE(aconn[i]);
574 					CLNT_RELEASE(aconn[i]);
575 				}
576 			}
577 		}
578 	} else {
579 		mtx_unlock(&nrp->nr_mtx);
580 	}
581 }
582 
583 static AUTH *
nfs_getauth(struct nfssockreq * nrp,int secflavour,char * clnt_principal,char * srv_principal,gss_OID mech_oid,struct ucred * cred)584 nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal,
585     char *srv_principal, gss_OID mech_oid, struct ucred *cred)
586 {
587 	rpc_gss_service_t svc;
588 	AUTH *auth;
589 
590 	switch (secflavour) {
591 	case RPCSEC_GSS_KRB5:
592 	case RPCSEC_GSS_KRB5I:
593 	case RPCSEC_GSS_KRB5P:
594 		if (!mech_oid) {
595 			if (!rpc_gss_mech_to_oid_call("kerberosv5", &mech_oid))
596 				return (NULL);
597 		}
598 		if (secflavour == RPCSEC_GSS_KRB5)
599 			svc = rpc_gss_svc_none;
600 		else if (secflavour == RPCSEC_GSS_KRB5I)
601 			svc = rpc_gss_svc_integrity;
602 		else
603 			svc = rpc_gss_svc_privacy;
604 
605 		if (clnt_principal == NULL) {
606 			NFSCL_DEBUG(1, "nfs_getauth: clnt princ=NULL, "
607 			    "srv princ=%s\n", srv_principal);
608 			auth = rpc_gss_secfind_call(nrp->nr_client, cred,
609 			    srv_principal, mech_oid, svc);
610 		} else {
611 			NFSCL_DEBUG(1, "nfs_getauth: clnt princ=%s "
612 			    "srv princ=%s\n", clnt_principal, srv_principal);
613 			auth = rpc_gss_seccreate_call(nrp->nr_client, cred,
614 			    clnt_principal, srv_principal, "kerberosv5",
615 			    svc, NULL, NULL, NULL);
616 			return (auth);
617 		}
618 		if (auth != NULL)
619 			return (auth);
620 		/* fallthrough */
621 	case AUTH_SYS:
622 	default:
623 		return (authunix_create(cred));
624 	}
625 }
626 
627 /*
628  * Callback from the RPC code to generate up/down notifications.
629  */
630 
631 struct nfs_feedback_arg {
632 	struct nfsmount *nf_mount;
633 	int		nf_lastmsg;	/* last tprintf */
634 	int		nf_tprintfmsg;
635 	struct thread	*nf_td;
636 };
637 
638 static void
nfs_feedback(int type,int proc,void * arg)639 nfs_feedback(int type, int proc, void *arg)
640 {
641 	struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg;
642 	struct nfsmount *nmp = nf->nf_mount;
643 	time_t now;
644 
645 	switch (type) {
646 	case FEEDBACK_REXMIT2:
647 	case FEEDBACK_RECONNECT:
648 		now = NFSD_MONOSEC;
649 		if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now) {
650 			nfs_down(nmp, nf->nf_td,
651 			    "not responding", 0, NFSSTA_TIMEO);
652 			nf->nf_tprintfmsg = TRUE;
653 			nf->nf_lastmsg = now;
654 		}
655 		break;
656 
657 	case FEEDBACK_OK:
658 		nfs_up(nf->nf_mount, nf->nf_td,
659 		    "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg);
660 		break;
661 	}
662 }
663 
664 /*
665  * newnfs_request - goes something like this
666  *	- does the rpc by calling the krpc layer
667  *	- break down rpc header and return with nfs reply
668  * nb: always frees up nd_mreq mbuf list
669  */
670 int
newnfs_request(struct nfsrv_descript * nd,struct nfsmount * nmp,struct nfsclient * clp,struct nfssockreq * nrp,vnode_t vp,struct thread * td,struct ucred * cred,u_int32_t prog,u_int32_t vers,u_char * retsum,int toplevel,u_int64_t * xidp,struct nfsclsession * dssep)671 newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp,
672     struct nfsclient *clp, struct nfssockreq *nrp, vnode_t vp,
673     struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers,
674     u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *dssep)
675 {
676 	uint32_t retseq, retval, retval0, slotseq, *tl;
677 	int i = 0, j = 0, opcnt, set_sigset = 0, slot;
678 	int error = 0, usegssname = 0, secflavour = AUTH_SYS;
679 	int freeslot, maxslot, reterr, slotpos, timeo;
680 	u_int16_t procnum;
681 	u_int nextconn;
682 	struct nfs_feedback_arg nf;
683 	struct timeval timo;
684 	AUTH *auth;
685 	struct rpc_callextra ext;
686 	enum clnt_stat stat;
687 	struct nfsreq *rep = NULL;
688 	char *srv_principal = NULL, *clnt_principal = NULL;
689 	sigset_t oldset;
690 	struct ucred *authcred, *savcred;
691 	struct nfsclsession *sep;
692 	uint8_t sessionid[NFSX_V4SESSIONID];
693 	bool nextconn_set;
694 	struct timespec trylater_delay, ts, waituntil;
695 
696 	/* Initially 1msec. */
697 	trylater_delay.tv_sec = 0;
698 	trylater_delay.tv_nsec = 1000000;
699 	sep = dssep;
700 	if (xidp != NULL)
701 		*xidp = 0;
702 	/* Reject requests while attempting a forced unmount. */
703 	if (nmp != NULL && NFSCL_FORCEDISM(nmp->nm_mountp)) {
704 		m_freem(nd->nd_mreq);
705 		return (ESTALE);
706 	}
707 
708 	/*
709 	 * Set authcred, which is used to acquire RPC credentials to
710 	 * the cred argument, by default. The crhold() should not be
711 	 * necessary, but will ensure that some future code change
712 	 * doesn't result in the credential being free'd prematurely.
713 	 */
714 	authcred = crhold(cred);
715 
716 	/* For client side interruptible mounts, mask off the signals. */
717 	if (nmp != NULL && td != NULL && NFSHASINT(nmp)) {
718 		newnfs_set_sigmask(td, &oldset);
719 		set_sigset = 1;
720 	}
721 
722 	/*
723 	 * If not already connected call newnfs_connect now.
724 	 */
725 	if (nrp->nr_client == NULL)
726 		newnfs_connect(nmp, nrp, cred, td, 0, false, &nrp->nr_client);
727 
728 	/*
729 	 * If the "nconnect" mount option was specified and this RPC is
730 	 * one that can have a large RPC message and is being done through
731 	 * the NFS/MDS server, use an additional connection. (When the RPC is
732 	 * being done through the server/MDS, nrp == &nmp->nm_sockreq.)
733 	 * The "nconnect" mount option normally has minimal effect when the
734 	 * "pnfs" mount option is specified, since only Readdir RPCs are
735 	 * normally done through the NFS/MDS server.
736 	 */
737 	nextconn_set = false;
738 	if (nmp != NULL && nmp->nm_aconnect > 0 && nrp == &nmp->nm_sockreq &&
739 	    (nd->nd_procnum == NFSPROC_READ ||
740 	     nd->nd_procnum == NFSPROC_READDIR ||
741 	     nd->nd_procnum == NFSPROC_READDIRPLUS ||
742 	     nd->nd_procnum == NFSPROC_WRITE)) {
743 		nextconn = atomic_fetchadd_int(&nmp->nm_nextaconn, 1);
744 		nextconn %= nmp->nm_aconnect;
745 		nextconn_set = true;
746 		if (nmp->nm_aconn[nextconn] == NULL)
747 			newnfs_connect(nmp, nrp, cred, td, 0, false,
748 			    &nmp->nm_aconn[nextconn]);
749 	}
750 
751 	/*
752 	 * For a client side mount, nmp is != NULL and clp == NULL. For
753 	 * server calls (callbacks or upcalls), nmp == NULL.
754 	 */
755 	if (clp != NULL) {
756 		NFSLOCKSTATE();
757 		if ((clp->lc_flags & LCL_GSS) && nfsrv_gsscallbackson) {
758 			secflavour = RPCSEC_GSS_KRB5;
759 			if (nd->nd_procnum != NFSPROC_NULL) {
760 				if (clp->lc_flags & LCL_GSSINTEGRITY)
761 					secflavour = RPCSEC_GSS_KRB5I;
762 				else if (clp->lc_flags & LCL_GSSPRIVACY)
763 					secflavour = RPCSEC_GSS_KRB5P;
764 			}
765 		}
766 		NFSUNLOCKSTATE();
767 	} else if (nmp != NULL && NFSHASKERB(nmp) &&
768 	     nd->nd_procnum != NFSPROC_NULL && (!NFSHASSYSKRB5(nmp) ||
769 	     nfscl_use_gss[nd->nd_procnum])) {
770 		if (NFSHASALLGSSNAME(nmp) && nmp->nm_krbnamelen > 0)
771 			nd->nd_flag |= ND_USEGSSNAME;
772 		if ((nd->nd_flag & ND_USEGSSNAME) != 0) {
773 			/*
774 			 * If there is a client side host based credential,
775 			 * use that, otherwise use the system uid, if set.
776 			 * The system uid is in the nmp->nm_sockreq.nr_cred
777 			 * credentials.
778 			 */
779 			if (nmp->nm_krbnamelen > 0) {
780 				usegssname = 1;
781 				clnt_principal = nmp->nm_krbname;
782 			} else if (nmp->nm_uid != (uid_t)-1) {
783 				KASSERT(nmp->nm_sockreq.nr_cred != NULL,
784 				    ("newnfs_request: NULL nr_cred"));
785 				crfree(authcred);
786 				authcred = crhold(nmp->nm_sockreq.nr_cred);
787 			}
788 		} else if (nmp->nm_krbnamelen == 0 &&
789 		    nmp->nm_uid != (uid_t)-1 && cred->cr_uid == (uid_t)0) {
790 			/*
791 			 * If there is no host based principal name and
792 			 * the system uid is set and this is root, use the
793 			 * system uid, since root won't have user
794 			 * credentials in a credentials cache file.
795 			 * The system uid is in the nmp->nm_sockreq.nr_cred
796 			 * credentials.
797 			 */
798 			KASSERT(nmp->nm_sockreq.nr_cred != NULL,
799 			    ("newnfs_request: NULL nr_cred"));
800 			crfree(authcred);
801 			authcred = crhold(nmp->nm_sockreq.nr_cred);
802 		}
803 		if (NFSHASINTEGRITY(nmp))
804 			secflavour = RPCSEC_GSS_KRB5I;
805 		else if (NFSHASPRIVACY(nmp))
806 			secflavour = RPCSEC_GSS_KRB5P;
807 		else
808 			secflavour = RPCSEC_GSS_KRB5;
809 		if (nrp->nr_srvprinc[0] == '\0')
810 			srv_principal = NFSMNT_SRVKRBNAME(nmp);
811 		else
812 			srv_principal = nrp->nr_srvprinc;
813 	} else if (nmp != NULL && (!NFSHASKERB(nmp) || NFSHASSYSKRB5(nmp)) &&
814 	    nd->nd_procnum != NFSPROC_NULL &&
815 	    (nd->nd_flag & ND_USEGSSNAME) != 0) {
816 		/*
817 		 * Use the uid that did the mount when the RPC is doing
818 		 * NFSv4 system operations, as indicated by the
819 		 * ND_USEGSSNAME flag, for the AUTH_SYS case.
820 		 * The credentials in nm_sockreq.nr_cred were used for the
821 		 * mount.
822 		 */
823 		KASSERT(nmp->nm_sockreq.nr_cred != NULL,
824 		    ("newnfs_request: NULL nr_cred"));
825 		crfree(authcred);
826 		authcred = crhold(nmp->nm_sockreq.nr_cred);
827 	}
828 
829 	if (nmp != NULL) {
830 		bzero(&nf, sizeof(struct nfs_feedback_arg));
831 		nf.nf_mount = nmp;
832 		nf.nf_td = td;
833 		nf.nf_lastmsg = NFSD_MONOSEC -
834 		    ((nmp->nm_tprintf_delay)-(nmp->nm_tprintf_initial_delay));
835 	}
836 
837 	/*
838 	 * For Kerberos, the upcall needs to be done to the gssd daemon
839 	 * running in the correct vnet.
840 	 */
841 	CURVNET_SET_QUIET(CRED_TO_VNET(authcred));
842 	if (nd->nd_procnum == NFSPROC_NULL)
843 		auth = authnone_create();
844 	else if (usegssname) {
845 		/*
846 		 * For this case, the authenticator is held in the
847 		 * nfssockreq structure, so don't release the reference count
848 		 * held on it. --> Don't AUTH_DESTROY() it in this function.
849 		 */
850 		if (nrp->nr_auth == NULL)
851 			nrp->nr_auth = nfs_getauth(nrp, secflavour,
852 			    clnt_principal, srv_principal, NULL, authcred);
853 		else
854 			rpc_gss_refresh_auth_call(nrp->nr_auth);
855 		auth = nrp->nr_auth;
856 	} else
857 		auth = nfs_getauth(nrp, secflavour, NULL,
858 		    srv_principal, NULL, authcred);
859 	CURVNET_RESTORE();
860 	if (auth == NULL) {
861 		crfree(authcred);
862 		m_freem(nd->nd_mreq);
863 		if (set_sigset)
864 			newnfs_restore_sigmask(td, &oldset);
865 		return (EACCES);
866 	}
867 	bzero(&ext, sizeof(ext));
868 	ext.rc_auth = auth;
869 	if (nmp != NULL) {
870 		ext.rc_feedback = nfs_feedback;
871 		ext.rc_feedback_arg = &nf;
872 	}
873 
874 	procnum = nd->nd_procnum;
875 	if ((nd->nd_flag & ND_NFSV4) &&
876 	    nd->nd_procnum != NFSPROC_NULL &&
877 	    nd->nd_procnum != NFSV4PROC_CBCOMPOUND)
878 		procnum = NFSV4PROC_COMPOUND;
879 
880 	if (nmp != NULL) {
881 		NFSINCRGLOBAL(nfsstatsv1.rpcrequests);
882 
883 		/* Map the procnum to the old NFSv2 one, as required. */
884 		if ((nd->nd_flag & ND_NFSV2) != 0) {
885 			if (nd->nd_procnum < NFS_V3NPROCS)
886 				procnum = nfsv2_procid[nd->nd_procnum];
887 			else
888 				procnum = NFSV2PROC_NOOP;
889 		}
890 
891 		/*
892 		 * Now only used for the R_DONTRECOVER case, but until that is
893 		 * supported within the krpc code, I need to keep a queue of
894 		 * outstanding RPCs for nfsv4 client requests.
895 		 */
896 		if ((nd->nd_flag & ND_NFSV4) && procnum == NFSV4PROC_COMPOUND)
897 			rep = malloc(sizeof(struct nfsreq),
898 			    M_NFSDREQ, M_WAITOK);
899 #ifdef KDTRACE_HOOKS
900 		if (dtrace_nfscl_nfs234_start_probe != NULL) {
901 			uint32_t probe_id;
902 			int probe_procnum;
903 
904 			if (nd->nd_flag & ND_NFSV4) {
905 				probe_id =
906 				    nfscl_nfs4_start_probes[nd->nd_procnum];
907 				probe_procnum = nd->nd_procnum;
908 			} else if (nd->nd_flag & ND_NFSV3) {
909 				probe_id = nfscl_nfs3_start_probes[procnum];
910 				probe_procnum = procnum;
911 			} else {
912 				probe_id =
913 				    nfscl_nfs2_start_probes[nd->nd_procnum];
914 				probe_procnum = procnum;
915 			}
916 			if (probe_id != 0)
917 				(dtrace_nfscl_nfs234_start_probe)
918 				    (probe_id, vp, nd->nd_mreq, cred,
919 				     probe_procnum);
920 		}
921 #endif
922 	}
923 	freeslot = -1;		/* Set to slot that needs to be free'd */
924 tryagain:
925 	slot = -1;		/* Slot that needs a sequence# increment. */
926 	/*
927 	 * This timeout specifies when a new socket should be created,
928 	 * along with new xid values. For UDP, this should be done
929 	 * infrequently, since retransmits of RPC requests should normally
930 	 * use the same xid.
931 	 */
932 	if (nmp == NULL) {
933 		if (clp == NULL) {
934 			timo.tv_sec = NFSV4_UPCALLTIMEO;
935 			timo.tv_usec = 0;
936 		} else {
937 			timo.tv_sec = NFSV4_CALLBACKTIMEO / 1000;
938 			timo.tv_usec = NFSV4_CALLBACKTIMEO * 1000;
939 		}
940 	} else {
941 		if (nrp->nr_sotype != SOCK_DGRAM) {
942 			timo.tv_usec = 0;
943 			if ((nmp->nm_flag & NFSMNT_NFSV4))
944 				timo.tv_sec = INT_MAX;
945 			else
946 				timo.tv_sec = NFS_TCPTIMEO;
947 		} else {
948 			if (NFSHASSOFT(nmp)) {
949 				/*
950 				 * CLSET_RETRIES is set to 2, so this should be
951 				 * half of the total timeout required.
952 				 */
953 				timeo = nmp->nm_retry * nmp->nm_timeo / 2;
954 				if (timeo < 1)
955 					timeo = 1;
956 				timo.tv_sec = timeo / NFS_HZ;
957 				timo.tv_usec = (timeo % NFS_HZ) * 1000000 /
958 				    NFS_HZ;
959 			} else {
960 				/* For UDP hard mounts, use a large value. */
961 				timo.tv_sec = NFS_MAXTIMEO / NFS_HZ;
962 				timo.tv_usec = 0;
963 			}
964 		}
965 
966 		if (rep != NULL) {
967 			rep->r_flags = 0;
968 			rep->r_nmp = nmp;
969 			/*
970 			 * Chain request into list of outstanding requests.
971 			 */
972 			NFSLOCKREQ();
973 			TAILQ_INSERT_TAIL(&nfsd_reqq, rep, r_chain);
974 			NFSUNLOCKREQ();
975 		}
976 	}
977 
978 	/*
979 	 * In case CLNT_CALL_MBUF()/clnt_bck_call() does an AUTH_REFRESH(),
980 	 * the thread's credentials need to be set to authcred, so that the
981 	 * correct vnet will be set.
982 	 */
983 	savcred = curthread->td_ucred;
984 	curthread->td_ucred = authcred;
985 	nd->nd_mrep = NULL;
986 	if (clp != NULL && sep != NULL)
987 		stat = clnt_bck_call(nrp->nr_client, &ext, procnum,
988 		    nd->nd_mreq, &nd->nd_mrep, timo, sep->nfsess_xprt);
989 	else if (nextconn_set)
990 		/*
991 		 * When there are multiple TCP connections, send the
992 		 * RPCs with large messages on the alternate TCP
993 		 * connection(s) in a round robin fashion.
994 		 * The small RPC messages are sent on the default
995 		 * TCP connection because they do not require much
996 		 * network bandwidth and separating them from the
997 		 * large RPC messages avoids them getting "log jammed"
998 		 * behind several large RPC messages.
999 		 */
1000 		stat = CLNT_CALL_MBUF(nmp->nm_aconn[nextconn],
1001 		    &ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo);
1002 	else
1003 		stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum,
1004 		    nd->nd_mreq, &nd->nd_mrep, timo);
1005 	NFSCL_DEBUG(2, "clnt call=%d\n", stat);
1006 	curthread->td_ucred = savcred;
1007 
1008 	if (rep != NULL) {
1009 		/*
1010 		 * RPC done, unlink the request.
1011 		 */
1012 		NFSLOCKREQ();
1013 		TAILQ_REMOVE(&nfsd_reqq, rep, r_chain);
1014 		NFSUNLOCKREQ();
1015 	}
1016 
1017 	/*
1018 	 * If there was a successful reply and a tprintf msg.
1019 	 * tprintf a response.
1020 	 */
1021 	if (stat == RPC_SUCCESS) {
1022 		error = 0;
1023 	} else if (stat == RPC_TIMEDOUT) {
1024 		NFSINCRGLOBAL(nfsstatsv1.rpctimeouts);
1025 		error = ETIMEDOUT;
1026 	} else if (stat == RPC_VERSMISMATCH) {
1027 		NFSINCRGLOBAL(nfsstatsv1.rpcinvalid);
1028 		error = EOPNOTSUPP;
1029 	} else if (stat == RPC_PROGVERSMISMATCH) {
1030 		NFSINCRGLOBAL(nfsstatsv1.rpcinvalid);
1031 		error = EPROTONOSUPPORT;
1032 	} else if (stat == RPC_CANTSEND || stat == RPC_CANTRECV ||
1033 	     stat == RPC_SYSTEMERROR || stat == RPC_INTR) {
1034 		/* Check for a session slot that needs to be free'd. */
1035 		if ((nd->nd_flag & (ND_NFSV41 | ND_HASSLOTID)) ==
1036 		    (ND_NFSV41 | ND_HASSLOTID) && nmp != NULL &&
1037 		    nd->nd_procnum != NFSPROC_NULL) {
1038 			/*
1039 			 * This should only occur when either the MDS or
1040 			 * a client has an RPC against a DS fail.
1041 			 * This happens because these cases use "soft"
1042 			 * connections that can time out and fail.
1043 			 * The slot used for this RPC is now in a
1044 			 * non-deterministic state, but if the slot isn't
1045 			 * free'd, threads can get stuck waiting for a slot.
1046 			 */
1047 			if (sep == NULL)
1048 				sep = nfsmnt_mdssession(nmp);
1049 			/*
1050 			 * Bump the sequence# out of range, so that reuse of
1051 			 * this slot will result in an NFSERR_SEQMISORDERED
1052 			 * error and not a bogus cached RPC reply.
1053 			 */
1054 			mtx_lock(&sep->nfsess_mtx);
1055 			sep->nfsess_slotseq[nd->nd_slotid] += 10;
1056 			sep->nfsess_badslots |= (0x1ULL << nd->nd_slotid);
1057 			mtx_unlock(&sep->nfsess_mtx);
1058 			/* And free the slot. */
1059 			nfsv4_freeslot(sep, nd->nd_slotid, true);
1060 		}
1061 		if (stat == RPC_INTR)
1062 			error = EINTR;
1063 		else {
1064 			NFSINCRGLOBAL(nfsstatsv1.rpcinvalid);
1065 			error = ENXIO;
1066 		}
1067 	} else if (stat == RPC_AUTHERROR) {
1068 		/* Check for a session slot that needs to be free'd. */
1069 		if ((nd->nd_flag & (ND_NFSV41 | ND_HASSLOTID)) ==
1070 		    (ND_NFSV41 | ND_HASSLOTID) && nmp != NULL &&
1071 		    nd->nd_procnum != NFSPROC_NULL) {
1072 			/*
1073 			 * This can occur when a Kerberos/RPCSEC_GSS session
1074 			 * expires, due to TGT expiration.
1075 			 * Free the slot, resetting the slot's sequence#.
1076 			 */
1077 			if (sep == NULL)
1078 				sep = nfsmnt_mdssession(nmp);
1079 			nfsv4_freeslot(sep, nd->nd_slotid, true);
1080 		}
1081 		NFSINCRGLOBAL(nfsstatsv1.rpcinvalid);
1082 		error = EACCES;
1083 	} else {
1084 		NFSINCRGLOBAL(nfsstatsv1.rpcinvalid);
1085 		error = EACCES;
1086 	}
1087 	if (error) {
1088 		crfree(authcred);
1089 		m_freem(nd->nd_mreq);
1090 		if (usegssname == 0)
1091 			AUTH_DESTROY(auth);
1092 		if (rep != NULL)
1093 			free(rep, M_NFSDREQ);
1094 		if (set_sigset)
1095 			newnfs_restore_sigmask(td, &oldset);
1096 		return (error);
1097 	}
1098 
1099 	KASSERT(nd->nd_mrep != NULL, ("mrep shouldn't be NULL if no error\n"));
1100 
1101 	/*
1102 	 * Search for any mbufs that are not a multiple of 4 bytes long
1103 	 * or with m_data not longword aligned.
1104 	 * These could cause pointer alignment problems, so copy them to
1105 	 * well aligned mbufs.
1106 	 */
1107 	newnfs_realign(&nd->nd_mrep, M_WAITOK);
1108 	nd->nd_md = nd->nd_mrep;
1109 	nd->nd_dpos = mtod(nd->nd_md, caddr_t);
1110 	nd->nd_repstat = 0;
1111 	if (nd->nd_procnum != NFSPROC_NULL &&
1112 	    nd->nd_procnum != NFSV4PROC_CBNULL) {
1113 		/* If sep == NULL, set it to the default in nmp. */
1114 		if (sep == NULL && nmp != NULL)
1115 			sep = nfsmnt_mdssession(nmp);
1116 		/*
1117 		 * and now the actual NFS xdr.
1118 		 */
1119 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1120 		nd->nd_repstat = fxdr_unsigned(u_int32_t, *tl);
1121 		if (nd->nd_repstat >= 10000)
1122 			NFSCL_DEBUG(1, "proc=%d reps=%d\n", (int)nd->nd_procnum,
1123 			    (int)nd->nd_repstat);
1124 
1125 		/*
1126 		 * Get rid of the tag, return count and SEQUENCE result for
1127 		 * NFSv4.
1128 		 */
1129 		if ((nd->nd_flag & ND_NFSV4) != 0 && nd->nd_repstat !=
1130 		    NFSERR_MINORVERMISMATCH) {
1131 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1132 			i = fxdr_unsigned(int, *tl);
1133 			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
1134 			if (error)
1135 				goto nfsmout;
1136 			NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1137 			opcnt = fxdr_unsigned(int, *tl++);
1138 			i = fxdr_unsigned(int, *tl++);
1139 			j = fxdr_unsigned(int, *tl);
1140 			if (j >= 10000)
1141 				NFSCL_DEBUG(1, "fop=%d fst=%d\n", i, j);
1142 			/*
1143 			 * If the first op is Sequence, free up the slot.
1144 			 */
1145 			if ((nmp != NULL && i == NFSV4OP_SEQUENCE && j != 0) ||
1146 			   (clp != NULL && i == NFSV4OP_CBSEQUENCE && j != 0)) {
1147 				NFSCL_DEBUG(1, "failed seq=%d\n", j);
1148 				if (sep != NULL && i == NFSV4OP_SEQUENCE &&
1149 				    j == NFSERR_SEQMISORDERED) {
1150 					mtx_lock(&sep->nfsess_mtx);
1151 					sep->nfsess_badslots |=
1152 					    (0x1ULL << nd->nd_slotid);
1153 					mtx_unlock(&sep->nfsess_mtx);
1154 				}
1155 			}
1156 			if (((nmp != NULL && i == NFSV4OP_SEQUENCE && j == 0) ||
1157 			    (clp != NULL && i == NFSV4OP_CBSEQUENCE &&
1158 			    j == 0)) && sep != NULL) {
1159 				if (i == NFSV4OP_SEQUENCE)
1160 					NFSM_DISSECT(tl, uint32_t *,
1161 					    NFSX_V4SESSIONID +
1162 					    5 * NFSX_UNSIGNED);
1163 				else
1164 					NFSM_DISSECT(tl, uint32_t *,
1165 					    NFSX_V4SESSIONID +
1166 					    4 * NFSX_UNSIGNED);
1167 				mtx_lock(&sep->nfsess_mtx);
1168 				if (bcmp(tl, sep->nfsess_sessionid,
1169 				    NFSX_V4SESSIONID) == 0) {
1170 					tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
1171 					retseq = fxdr_unsigned(uint32_t, *tl++);
1172 					slot = fxdr_unsigned(int, *tl++);
1173 					if ((nd->nd_flag & ND_HASSLOTID) != 0) {
1174 						if (slot >= NFSV4_SLOTS ||
1175 						    (i == NFSV4OP_CBSEQUENCE &&
1176 						     slot >= NFSV4_CBSLOTS)) {
1177 							printf("newnfs_request:"
1178 							    " Bogus slot\n");
1179 							slot = nd->nd_slotid;
1180 						} else if (slot !=
1181 						    nd->nd_slotid) {
1182 						    printf("newnfs_request:"
1183 							" Wrong session "
1184 							"srvslot=%d "
1185 							"slot=%d\n", slot,
1186 							nd->nd_slotid);
1187 						    if (i == NFSV4OP_SEQUENCE) {
1188 							/*
1189 							 * Mark both slots as
1190 							 * bad, because we do
1191 							 * not know if the
1192 							 * server has advanced
1193 							 * the sequence# for
1194 							 * either of them.
1195 							 */
1196 							sep->nfsess_badslots |=
1197 							    (0x1ULL << slot);
1198 							sep->nfsess_badslots |=
1199 							    (0x1ULL <<
1200 							     nd->nd_slotid);
1201 						    }
1202 						    slot = nd->nd_slotid;
1203 						}
1204 						freeslot = slot;
1205 					} else if (slot != 0) {
1206 						printf("newnfs_request: Bad "
1207 						    "session slot=%d\n", slot);
1208 						slot = 0;
1209 					}
1210 					if (retseq != sep->nfsess_slotseq[slot])
1211 						printf("retseq diff 0x%x\n",
1212 						    retseq);
1213 					retval0 = fxdr_unsigned(uint32_t,*tl++);
1214 					retval = fxdr_unsigned(uint32_t, *tl);
1215 					if ((retval + 1) < sep->nfsess_foreslots
1216 					    ) {
1217 						sep->nfsess_foreslots = (retval
1218 						    + 1);
1219 						nfs_resetslots(sep);
1220 					} else if ((retval + 1) >
1221 					    sep->nfsess_foreslots) {
1222 						if (retval0 > retval)
1223 							printf("Sess:highest > "
1224 							    "target_highest\n");
1225 						sep->nfsess_foreslots =
1226 						    (retval < NFSV4_SLOTS) ?
1227 						    (retval + 1) : NFSV4_SLOTS;
1228 					}
1229 				}
1230 				mtx_unlock(&sep->nfsess_mtx);
1231 
1232 				/* Grab the op and status for the next one. */
1233 				if (opcnt > 1) {
1234 					NFSM_DISSECT(tl, uint32_t *,
1235 					    2 * NFSX_UNSIGNED);
1236 					i = fxdr_unsigned(int, *tl++);
1237 					j = fxdr_unsigned(int, *tl);
1238 				}
1239 			}
1240 		}
1241 		if (nd->nd_repstat != 0) {
1242 			if (nd->nd_repstat == NFSERR_BADSESSION &&
1243 			    nmp != NULL && dssep == NULL &&
1244 			    (nd->nd_flag & ND_NFSV41) != 0) {
1245 				/*
1246 				 * If this is a client side MDS RPC, mark
1247 				 * the MDS session defunct and initiate
1248 				 * recovery, as required.
1249 				 * The nfsess_defunct field is protected by
1250 				 * the NFSLOCKMNT()/nm_mtx lock and not the
1251 				 * nfsess_mtx lock to simplify its handling,
1252 				 * for the MDS session. This lock is also
1253 				 * sufficient for nfsess_sessionid, since it
1254 				 * never changes in the structure.
1255 				 */
1256 				NFSCL_DEBUG(1, "Got badsession\n");
1257 				NFSLOCKCLSTATE();
1258 				NFSLOCKMNT(nmp);
1259 				if (TAILQ_EMPTY(&nmp->nm_sess)) {
1260 					NFSUNLOCKMNT(nmp);
1261 					NFSUNLOCKCLSTATE();
1262 					printf("If server has not rebooted, "
1263 					    "check NFS clients for unique "
1264 					    "/etc/hostid's\n");
1265 					goto out;
1266 				}
1267 				sep = NFSMNT_MDSSESSION(nmp);
1268 				if (bcmp(sep->nfsess_sessionid, nd->nd_sequence,
1269 				    NFSX_V4SESSIONID) == 0) {
1270 					printf("Initiate recovery. If server "
1271 					    "has not rebooted, "
1272 					    "check NFS clients for unique "
1273 					    "/etc/hostid's\n");
1274 					/* Initiate recovery. */
1275 					sep->nfsess_defunct = 1;
1276 					NFSCL_DEBUG(1, "Marked defunct\n");
1277 					if (nmp->nm_clp != NULL) {
1278 						nmp->nm_clp->nfsc_flags |=
1279 						    NFSCLFLAGS_RECOVER;
1280 						wakeup(nmp->nm_clp);
1281 					}
1282 				}
1283 				NFSUNLOCKCLSTATE();
1284 				/*
1285 				 * Sleep for up to 1sec waiting for a new
1286 				 * session.
1287 				 */
1288 				mtx_sleep(&nmp->nm_sess, &nmp->nm_mtx, PZERO,
1289 				    "nfsbadsess", hz);
1290 				/*
1291 				 * Get the session again, in case a new one
1292 				 * has been created during the sleep.
1293 				 */
1294 				sep = NFSMNT_MDSSESSION(nmp);
1295 				NFSUNLOCKMNT(nmp);
1296 				if ((nd->nd_flag & ND_LOOPBADSESS) != 0) {
1297 					reterr = nfsv4_sequencelookup(nmp, sep,
1298 					    &slotpos, &maxslot, &slotseq,
1299 					    sessionid, true);
1300 					if (reterr == 0) {
1301 						/* Fill in new session info. */
1302 						NFSCL_DEBUG(1,
1303 						  "Filling in new sequence\n");
1304 						tl = nd->nd_sequence;
1305 						bcopy(sessionid, tl,
1306 						    NFSX_V4SESSIONID);
1307 						tl += NFSX_V4SESSIONID /
1308 						    NFSX_UNSIGNED;
1309 						*tl++ = txdr_unsigned(slotseq);
1310 						*tl++ = txdr_unsigned(slotpos);
1311 						*tl = txdr_unsigned(maxslot);
1312 						nd->nd_slotid = slotpos;
1313 						nd->nd_flag |= ND_HASSLOTID;
1314 					}
1315 					if (reterr == NFSERR_BADSESSION ||
1316 					    reterr == 0) {
1317 						NFSCL_DEBUG(1,
1318 						    "Badsession looping\n");
1319 						m_freem(nd->nd_mrep);
1320 						nd->nd_mrep = NULL;
1321 						goto tryagain;
1322 					}
1323 					nd->nd_repstat = reterr;
1324 					NFSCL_DEBUG(1, "Got err=%d\n", reterr);
1325 				}
1326 			}
1327 			/*
1328 			 * When clp != NULL, it is a callback and all
1329 			 * callback operations can be retried for NFSERR_DELAY.
1330 			 */
1331 			if (((nd->nd_repstat == NFSERR_DELAY ||
1332 			      nd->nd_repstat == NFSERR_GRACE) &&
1333 			     (nd->nd_flag & ND_NFSV4) && (clp != NULL ||
1334 			     (nd->nd_procnum != NFSPROC_DELEGRETURN &&
1335 			     nd->nd_procnum != NFSPROC_SETATTR &&
1336 			     nd->nd_procnum != NFSPROC_READ &&
1337 			     nd->nd_procnum != NFSPROC_READDS &&
1338 			     nd->nd_procnum != NFSPROC_WRITE &&
1339 			     nd->nd_procnum != NFSPROC_WRITEDS &&
1340 			     nd->nd_procnum != NFSPROC_OPEN &&
1341 			     nd->nd_procnum != NFSPROC_OPENLAYGET &&
1342 			     nd->nd_procnum != NFSPROC_CREATE &&
1343 			     nd->nd_procnum != NFSPROC_CREATELAYGET &&
1344 			     nd->nd_procnum != NFSPROC_OPENCONFIRM &&
1345 			     nd->nd_procnum != NFSPROC_OPENDOWNGRADE &&
1346 			     nd->nd_procnum != NFSPROC_CLOSE &&
1347 			     nd->nd_procnum != NFSPROC_LOCK &&
1348 			     nd->nd_procnum != NFSPROC_LOCKU))) ||
1349 			    (nd->nd_repstat == NFSERR_DELAY &&
1350 			     (nd->nd_flag & ND_NFSV4) == 0) ||
1351 			    nd->nd_repstat == NFSERR_RESOURCE ||
1352 			    nd->nd_repstat == NFSERR_RETRYUNCACHEDREP) {
1353 				/* Clip at NFS_TRYLATERDEL. */
1354 				if (timespeccmp(&trylater_delay,
1355 				    &nfs_trylater_max, >))
1356 					trylater_delay = nfs_trylater_max;
1357 				getnanouptime(&waituntil);
1358 				timespecadd(&waituntil, &trylater_delay,
1359 				    &waituntil);
1360 				do {
1361 					nfs_catnap(PZERO, 0, "nfstry");
1362 					getnanouptime(&ts);
1363 				} while (timespeccmp(&ts, &waituntil, <));
1364 				timespecadd(&trylater_delay, &trylater_delay,
1365 				    &trylater_delay);	/* Double each time. */
1366 				if (slot != -1) {
1367 					mtx_lock(&sep->nfsess_mtx);
1368 					sep->nfsess_slotseq[slot]++;
1369 					*nd->nd_slotseq = txdr_unsigned(
1370 					    sep->nfsess_slotseq[slot]);
1371 					mtx_unlock(&sep->nfsess_mtx);
1372 				}
1373 				m_freem(nd->nd_mrep);
1374 				nd->nd_mrep = NULL;
1375 				goto tryagain;
1376 			}
1377 
1378 			/*
1379 			 * If the File Handle was stale, invalidate the
1380 			 * lookup cache, just in case.
1381 			 * (vp != NULL implies a client side call)
1382 			 */
1383 			if (nd->nd_repstat == ESTALE && vp != NULL) {
1384 				cache_purge(vp);
1385 				if (ncl_call_invalcaches != NULL)
1386 					(*ncl_call_invalcaches)(vp);
1387 			}
1388 		}
1389 		if ((nd->nd_flag & ND_NFSV4) != 0) {
1390 			/* Free the slot, as required. */
1391 			if (freeslot != -1)
1392 				nfsv4_freeslot(sep, freeslot, false);
1393 			/*
1394 			 * If this op is Putfh, throw its results away.
1395 			 */
1396 			if (j >= 10000)
1397 				NFSCL_DEBUG(1, "nop=%d nst=%d\n", i, j);
1398 			if (nmp != NULL && i == NFSV4OP_PUTFH && j == 0) {
1399 				NFSM_DISSECT(tl,u_int32_t *,2 * NFSX_UNSIGNED);
1400 				i = fxdr_unsigned(int, *tl++);
1401 				j = fxdr_unsigned(int, *tl);
1402 				if (j >= 10000)
1403 					NFSCL_DEBUG(1, "n2op=%d n2st=%d\n", i,
1404 					    j);
1405 				/*
1406 				 * All Compounds that do an Op that must
1407 				 * be in sequence consist of NFSV4OP_PUTFH
1408 				 * followed by one of these. As such, we
1409 				 * can determine if the seqid# should be
1410 				 * incremented, here.
1411 				 */
1412 				if ((i == NFSV4OP_OPEN ||
1413 				     i == NFSV4OP_OPENCONFIRM ||
1414 				     i == NFSV4OP_OPENDOWNGRADE ||
1415 				     i == NFSV4OP_CLOSE ||
1416 				     i == NFSV4OP_LOCK ||
1417 				     i == NFSV4OP_LOCKU) &&
1418 				    (j == 0 ||
1419 				     (j != NFSERR_STALECLIENTID &&
1420 				      j != NFSERR_STALESTATEID &&
1421 				      j != NFSERR_BADSTATEID &&
1422 				      j != NFSERR_BADSEQID &&
1423 				      j != NFSERR_BADXDR &&
1424 				      j != NFSERR_RESOURCE &&
1425 				      j != NFSERR_NOFILEHANDLE)))
1426 					nd->nd_flag |= ND_INCRSEQID;
1427 			}
1428 			/*
1429 			 * If this op's status is non-zero, mark
1430 			 * that there is no more data to process.
1431 			 * The exception is Setattr, which always has xdr
1432 			 * when it has failed.
1433 			 */
1434 			if (j != 0 && i != NFSV4OP_SETATTR)
1435 				nd->nd_flag |= ND_NOMOREDATA;
1436 
1437 			/*
1438 			 * If R_DONTRECOVER is set, replace the stale error
1439 			 * reply, so that recovery isn't initiated.
1440 			 */
1441 			if ((nd->nd_repstat == NFSERR_STALECLIENTID ||
1442 			     nd->nd_repstat == NFSERR_BADSESSION ||
1443 			     nd->nd_repstat == NFSERR_STALESTATEID) &&
1444 			    rep != NULL && (rep->r_flags & R_DONTRECOVER))
1445 				nd->nd_repstat = NFSERR_STALEDONTRECOVER;
1446 		}
1447 	}
1448 out:
1449 	crfree(authcred);
1450 
1451 #ifdef KDTRACE_HOOKS
1452 	if (nmp != NULL && dtrace_nfscl_nfs234_done_probe != NULL) {
1453 		uint32_t probe_id;
1454 		int probe_procnum;
1455 
1456 		if (nd->nd_flag & ND_NFSV4) {
1457 			probe_id = nfscl_nfs4_done_probes[nd->nd_procnum];
1458 			probe_procnum = nd->nd_procnum;
1459 		} else if (nd->nd_flag & ND_NFSV3) {
1460 			probe_id = nfscl_nfs3_done_probes[procnum];
1461 			probe_procnum = procnum;
1462 		} else {
1463 			probe_id = nfscl_nfs2_done_probes[nd->nd_procnum];
1464 			probe_procnum = procnum;
1465 		}
1466 		if (probe_id != 0)
1467 			(dtrace_nfscl_nfs234_done_probe)(probe_id, vp,
1468 			    nd->nd_mreq, cred, probe_procnum, 0);
1469 	}
1470 #endif
1471 
1472 	m_freem(nd->nd_mreq);
1473 	if (usegssname == 0)
1474 		AUTH_DESTROY(auth);
1475 	if (rep != NULL)
1476 		free(rep, M_NFSDREQ);
1477 	if (set_sigset)
1478 		newnfs_restore_sigmask(td, &oldset);
1479 	return (0);
1480 nfsmout:
1481 	crfree(authcred);
1482 	m_freem(nd->nd_mrep);
1483 	m_freem(nd->nd_mreq);
1484 	if (usegssname == 0)
1485 		AUTH_DESTROY(auth);
1486 	if (rep != NULL)
1487 		free(rep, M_NFSDREQ);
1488 	if (set_sigset)
1489 		newnfs_restore_sigmask(td, &oldset);
1490 	return (error);
1491 }
1492 
1493 /*
1494  * Reset slots above nfsess_foreslots that are not busy.
1495  */
1496 void
nfs_resetslots(struct nfsclsession * sep)1497 nfs_resetslots(struct nfsclsession *sep)
1498 {
1499 	int i;
1500 	uint64_t bitval;
1501 
1502 	mtx_assert(&sep->nfsess_mtx, MA_OWNED);
1503 	bitval = (1 << sep->nfsess_foreslots);
1504 	for (i = sep->nfsess_foreslots; i < NFSV4_SLOTS; i++) {
1505 		if ((sep->nfsess_slots & bitval) == 0 &&
1506 		    (sep->nfsess_badslots & bitval) == 0)
1507 			sep->nfsess_slotseq[i] = 0;
1508 		bitval <<= 1;
1509 	}
1510 }
1511 
1512 /*
1513  * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and
1514  * wait for all requests to complete. This is used by forced unmounts
1515  * to terminate any outstanding RPCs.
1516  */
1517 int
newnfs_nmcancelreqs(struct nfsmount * nmp)1518 newnfs_nmcancelreqs(struct nfsmount *nmp)
1519 {
1520 	struct nfsclds *dsp;
1521 	struct __rpc_client *cl;
1522 	int i;
1523 
1524 	if (nmp->nm_sockreq.nr_client != NULL)
1525 		CLNT_CLOSE(nmp->nm_sockreq.nr_client);
1526 	for (i = 0; i < nmp->nm_aconnect; i++)
1527 		if (nmp->nm_aconn[i] != NULL)
1528 			CLNT_CLOSE(nmp->nm_aconn[i]);
1529 lookformore:
1530 	NFSLOCKMNT(nmp);
1531 	TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
1532 		NFSLOCKDS(dsp);
1533 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1534 		    (dsp->nfsclds_flags & NFSCLDS_CLOSED) == 0 &&
1535 		    dsp->nfsclds_sockp != NULL &&
1536 		    dsp->nfsclds_sockp->nr_client != NULL) {
1537 			dsp->nfsclds_flags |= NFSCLDS_CLOSED;
1538 			cl = dsp->nfsclds_sockp->nr_client;
1539 			NFSUNLOCKDS(dsp);
1540 			NFSUNLOCKMNT(nmp);
1541 			CLNT_CLOSE(cl);
1542 			goto lookformore;
1543 		}
1544 		NFSUNLOCKDS(dsp);
1545 	}
1546 	NFSUNLOCKMNT(nmp);
1547 	return (0);
1548 }
1549 
1550 /*
1551  * Any signal that can interrupt an NFS operation in an intr mount
1552  * should be added to this set. SIGSTOP and SIGKILL cannot be masked.
1553  */
1554 int newnfs_sig_set[] = {
1555 	SIGINT,
1556 	SIGTERM,
1557 	SIGHUP,
1558 	SIGKILL,
1559 	SIGQUIT
1560 };
1561 
1562 /*
1563  * Check to see if one of the signals in our subset is pending on
1564  * the process (in an intr mount).
1565  */
1566 static int
nfs_sig_pending(sigset_t set)1567 nfs_sig_pending(sigset_t set)
1568 {
1569 	int i;
1570 
1571 	for (i = 0 ; i < nitems(newnfs_sig_set); i++)
1572 		if (SIGISMEMBER(set, newnfs_sig_set[i]))
1573 			return (1);
1574 	return (0);
1575 }
1576 
1577 /*
1578  * The set/restore sigmask functions are used to (temporarily) overwrite
1579  * the thread td_sigmask during an RPC call (for example). These are also
1580  * used in other places in the NFS client that might tsleep().
1581  */
1582 void
newnfs_set_sigmask(struct thread * td,sigset_t * oldset)1583 newnfs_set_sigmask(struct thread *td, sigset_t *oldset)
1584 {
1585 	sigset_t newset;
1586 	int i;
1587 	struct proc *p;
1588 
1589 	SIGFILLSET(newset);
1590 	if (td == NULL)
1591 		td = curthread; /* XXX */
1592 	p = td->td_proc;
1593 	/* Remove the NFS set of signals from newset */
1594 	PROC_LOCK(p);
1595 	mtx_lock(&p->p_sigacts->ps_mtx);
1596 	for (i = 0 ; i < nitems(newnfs_sig_set); i++) {
1597 		/*
1598 		 * But make sure we leave the ones already masked
1599 		 * by the process, ie. remove the signal from the
1600 		 * temporary signalmask only if it wasn't already
1601 		 * in p_sigmask.
1602 		 */
1603 		if (!SIGISMEMBER(td->td_sigmask, newnfs_sig_set[i]) &&
1604 		    !SIGISMEMBER(p->p_sigacts->ps_sigignore, newnfs_sig_set[i]))
1605 			SIGDELSET(newset, newnfs_sig_set[i]);
1606 	}
1607 	mtx_unlock(&p->p_sigacts->ps_mtx);
1608 	kern_sigprocmask(td, SIG_SETMASK, &newset, oldset,
1609 	    SIGPROCMASK_PROC_LOCKED);
1610 	PROC_UNLOCK(p);
1611 }
1612 
1613 void
newnfs_restore_sigmask(struct thread * td,sigset_t * set)1614 newnfs_restore_sigmask(struct thread *td, sigset_t *set)
1615 {
1616 	if (td == NULL)
1617 		td = curthread; /* XXX */
1618 	kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0);
1619 }
1620 
1621 /*
1622  * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the
1623  * old one after msleep() returns.
1624  */
1625 int
newnfs_msleep(struct thread * td,void * ident,struct mtx * mtx,int priority,char * wmesg,int timo)1626 newnfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo)
1627 {
1628 	sigset_t oldset;
1629 	int error;
1630 
1631 	if ((priority & PCATCH) == 0)
1632 		return msleep(ident, mtx, priority, wmesg, timo);
1633 	if (td == NULL)
1634 		td = curthread; /* XXX */
1635 	newnfs_set_sigmask(td, &oldset);
1636 	error = msleep(ident, mtx, priority, wmesg, timo);
1637 	newnfs_restore_sigmask(td, &oldset);
1638 	return (error);
1639 }
1640 
1641 /*
1642  * Test for a termination condition pending on the process.
1643  * This is used for NFSMNT_INT mounts.
1644  */
1645 int
newnfs_sigintr(struct nfsmount * nmp,struct thread * td)1646 newnfs_sigintr(struct nfsmount *nmp, struct thread *td)
1647 {
1648 	struct proc *p;
1649 	sigset_t tmpset;
1650 
1651 	/* Terminate all requests while attempting a forced unmount. */
1652 	if (NFSCL_FORCEDISM(nmp->nm_mountp))
1653 		return (EIO);
1654 	if (!(nmp->nm_flag & NFSMNT_INT))
1655 		return (0);
1656 	if (td == NULL)
1657 		return (0);
1658 	p = td->td_proc;
1659 	PROC_LOCK(p);
1660 	tmpset = p->p_siglist;
1661 	SIGSETOR(tmpset, td->td_siglist);
1662 	SIGSETNAND(tmpset, td->td_sigmask);
1663 	mtx_lock(&p->p_sigacts->ps_mtx);
1664 	SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore);
1665 	mtx_unlock(&p->p_sigacts->ps_mtx);
1666 	if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist))
1667 	    && nfs_sig_pending(tmpset)) {
1668 		PROC_UNLOCK(p);
1669 		return (EINTR);
1670 	}
1671 	PROC_UNLOCK(p);
1672 	return (0);
1673 }
1674 
1675 static int
nfs_msg(struct thread * td,const char * server,const char * msg,int error)1676 nfs_msg(struct thread *td, const char *server, const char *msg, int error)
1677 {
1678 	struct proc *p;
1679 
1680 	p = td ? td->td_proc : NULL;
1681 	if (error) {
1682 		tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n",
1683 		    server, msg, error);
1684 	} else {
1685 		tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg);
1686 	}
1687 	return (0);
1688 }
1689 
1690 static void
nfs_down(struct nfsmount * nmp,struct thread * td,const char * msg,int error,int flags)1691 nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg,
1692     int error, int flags)
1693 {
1694 	if (nmp == NULL)
1695 		return;
1696 	mtx_lock(&nmp->nm_mtx);
1697 	if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) {
1698 		nmp->nm_state |= NFSSTA_TIMEO;
1699 		mtx_unlock(&nmp->nm_mtx);
1700 		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
1701 		    VQ_NOTRESP, 0);
1702 	} else
1703 		mtx_unlock(&nmp->nm_mtx);
1704 	mtx_lock(&nmp->nm_mtx);
1705 	if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) {
1706 		nmp->nm_state |= NFSSTA_LOCKTIMEO;
1707 		mtx_unlock(&nmp->nm_mtx);
1708 		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
1709 		    VQ_NOTRESPLOCK, 0);
1710 	} else
1711 		mtx_unlock(&nmp->nm_mtx);
1712 	nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error);
1713 }
1714 
1715 static void
nfs_up(struct nfsmount * nmp,struct thread * td,const char * msg,int flags,int tprintfmsg)1716 nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg,
1717     int flags, int tprintfmsg)
1718 {
1719 	if (nmp == NULL)
1720 		return;
1721 	if (tprintfmsg) {
1722 		nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
1723 	}
1724 
1725 	mtx_lock(&nmp->nm_mtx);
1726 	if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) {
1727 		nmp->nm_state &= ~NFSSTA_TIMEO;
1728 		mtx_unlock(&nmp->nm_mtx);
1729 		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
1730 		    VQ_NOTRESP, 1);
1731 	} else
1732 		mtx_unlock(&nmp->nm_mtx);
1733 
1734 	mtx_lock(&nmp->nm_mtx);
1735 	if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) {
1736 		nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
1737 		mtx_unlock(&nmp->nm_mtx);
1738 		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
1739 		    VQ_NOTRESPLOCK, 1);
1740 	} else
1741 		mtx_unlock(&nmp->nm_mtx);
1742 }
1743