xref: /freebsd/sys/fs/nfs/nfs_commonkrpc.c (revision 608da65de9552d5678c1000776ed69da04a45983)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1991, 1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35 
36 #include <sys/cdefs.h>
37 /*
38  * Socket operations for use by nfs
39  */
40 
41 #include "opt_kgssapi.h"
42 #include "opt_nfs.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/limits.h>
48 #include <sys/lock.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/mount.h>
52 #include <sys/mutex.h>
53 #include <sys/proc.h>
54 #include <sys/signalvar.h>
55 #include <sys/syscallsubr.h>
56 #include <sys/sysctl.h>
57 #include <sys/syslog.h>
58 #include <sys/vnode.h>
59 
60 #include <rpc/rpc.h>
61 #include <rpc/krpc.h>
62 
63 #include <kgssapi/krb5/kcrypto.h>
64 
65 #include <fs/nfs/nfsport.h>
66 
67 #ifdef KDTRACE_HOOKS
68 #include <sys/dtrace_bsd.h>
69 
70 dtrace_nfsclient_nfs23_start_probe_func_t
71 		dtrace_nfscl_nfs234_start_probe;
72 
73 dtrace_nfsclient_nfs23_done_probe_func_t
74 		dtrace_nfscl_nfs234_done_probe;
75 
76 /*
77  * Registered probes by RPC type.
78  */
79 uint32_t	nfscl_nfs2_start_probes[NFSV41_NPROCS + 1];
80 uint32_t	nfscl_nfs2_done_probes[NFSV41_NPROCS + 1];
81 
82 uint32_t	nfscl_nfs3_start_probes[NFSV41_NPROCS + 1];
83 uint32_t	nfscl_nfs3_done_probes[NFSV41_NPROCS + 1];
84 
85 uint32_t	nfscl_nfs4_start_probes[NFSV41_NPROCS + 1];
86 uint32_t	nfscl_nfs4_done_probes[NFSV41_NPROCS + 1];
87 #endif
88 
89 NFSSTATESPINLOCK;
90 NFSREQSPINLOCK;
91 NFSDLOCKMUTEX;
92 NFSCLSTATEMUTEX;
93 extern struct nfsstatsv1 nfsstatsv1;
94 extern struct nfsreqhead nfsd_reqq;
95 extern int nfscl_ticks;
96 extern void (*ncl_call_invalcaches)(struct vnode *);
97 extern int nfs_numnfscbd;
98 extern int nfscl_debuglevel;
99 extern int nfsrv_lease;
100 
101 SVCPOOL		*nfscbd_pool;
102 int		nfs_bufpackets = 4;
103 static int	nfsrv_gsscallbackson = 0;
104 static int	nfs_reconnects;
105 static int	nfs3_jukebox_delay = 10;
106 static int	nfs_skip_wcc_data_onerr = 1;
107 static int	nfs_dsretries = 2;
108 static struct timespec	nfs_trylater_max = {
109 	.tv_sec		= NFS_TRYLATERDEL,
110 	.tv_nsec	= 0,
111 };
112 
113 SYSCTL_DECL(_vfs_nfs);
114 
115 SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0,
116     "Buffer reservation size 2 < x < 64");
117 SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0,
118     "Number of times the nfs client has had to reconnect");
119 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0,
120     "Number of seconds to delay a retry after receiving EJUKEBOX");
121 SYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0,
122     "Disable weak cache consistency checking when server returns an error");
123 SYSCTL_INT(_vfs_nfs, OID_AUTO, dsretries, CTLFLAG_RW, &nfs_dsretries, 0,
124     "Number of retries for a DS RPC before failure");
125 
126 static void	nfs_down(struct nfsmount *, struct thread *, const char *,
127     int, int);
128 static void	nfs_up(struct nfsmount *, struct thread *, const char *,
129     int, int);
130 static int	nfs_msg(struct thread *, const char *, const char *, int);
131 
132 struct nfs_cached_auth {
133 	int		ca_refs; /* refcount, including 1 from the cache */
134 	uid_t		ca_uid;	 /* uid that corresponds to this auth */
135 	AUTH		*ca_auth; /* RPC auth handle */
136 };
137 
138 static int nfsv2_procid[NFS_V3NPROCS] = {
139 	NFSV2PROC_NULL,
140 	NFSV2PROC_GETATTR,
141 	NFSV2PROC_SETATTR,
142 	NFSV2PROC_LOOKUP,
143 	NFSV2PROC_NOOP,
144 	NFSV2PROC_READLINK,
145 	NFSV2PROC_READ,
146 	NFSV2PROC_WRITE,
147 	NFSV2PROC_CREATE,
148 	NFSV2PROC_MKDIR,
149 	NFSV2PROC_SYMLINK,
150 	NFSV2PROC_CREATE,
151 	NFSV2PROC_REMOVE,
152 	NFSV2PROC_RMDIR,
153 	NFSV2PROC_RENAME,
154 	NFSV2PROC_LINK,
155 	NFSV2PROC_READDIR,
156 	NFSV2PROC_NOOP,
157 	NFSV2PROC_STATFS,
158 	NFSV2PROC_NOOP,
159 	NFSV2PROC_NOOP,
160 	NFSV2PROC_NOOP,
161 };
162 
163 /*
164  * This static array indicates that a NFSv4 RPC should use
165  * RPCSEC_GSS, if the mount indicates that via sec=krb5[ip].
166  * System RPCs that do not use file handles will be false
167  * in this array so that they will use AUTH_SYS when the
168  * "syskrb5" mount option is specified, along with
169  * "sec=krb5[ip]".
170  */
171 static bool nfscl_use_gss[NFSV42_NPROCS] = {
172 	true,
173 	true,
174 	true,
175 	true,
176 	true,
177 	true,
178 	true,
179 	true,
180 	true,
181 	true,
182 	true,
183 	true,
184 	true,
185 	true,
186 	true,
187 	true,
188 	true,
189 	true,
190 	true,
191 	true,
192 	true,
193 	true,
194 	true,
195 	false,		/* SetClientID */
196 	false,		/* SetClientIDConfirm */
197 	true,
198 	true,
199 	true,
200 	true,
201 	true,
202 	true,
203 	true,
204 	false,		/* Renew */
205 	true,
206 	false,		/* ReleaseLockOwn */
207 	true,
208 	true,
209 	true,
210 	true,
211 	true,
212 	true,
213 	false,		/* ExchangeID */
214 	false,		/* CreateSession */
215 	false,		/* DestroySession */
216 	false,		/* DestroyClientID */
217 	false,		/* FreeStateID */
218 	true,
219 	true,
220 	true,
221 	true,
222 	false,		/* ReclaimComplete */
223 	true,
224 	true,
225 	true,
226 	true,
227 	true,
228 	true,
229 	true,
230 	true,
231 	true,
232 	true,
233 	true,
234 	true,
235 	true,
236 	true,
237 	false,		/* BindConnectionToSession */
238 	true,
239 	true,
240 	true,
241 	true,
242 };
243 
244 /*
245  * Initialize sockets and congestion for a new NFS connection.
246  * We do not free the sockaddr if error.
247  * Which arguments are set to NULL indicate what kind of call it is.
248  * cred == NULL --> a call to connect to a pNFS DS
249  * nmp == NULL --> indicates an upcall to userland or a NFSv4.0 callback
250  */
251 int
252 newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp,
253     struct ucred *cred, NFSPROC_T *p, int callback_retry_mult, bool dotls,
254     struct __rpc_client **clipp)
255 {
256 	int rcvreserve, sndreserve;
257 	int pktscale, pktscalesav;
258 	struct sockaddr *saddr;
259 	struct ucred *origcred;
260 	CLIENT *client;
261 	struct netconfig *nconf;
262 	struct socket *so;
263 	int one = 1, retries, error = 0;
264 	struct thread *td = curthread;
265 	SVCXPRT *xprt;
266 	struct timeval timo;
267 	uint64_t tval;
268 
269 	/*
270 	 * We need to establish the socket using the credentials of
271 	 * the mountpoint.  Some parts of this process (such as
272 	 * sobind() and soconnect()) will use the curent thread's
273 	 * credential instead of the socket credential.  To work
274 	 * around this, temporarily change the current thread's
275 	 * credential to that of the mountpoint.
276 	 *
277 	 * XXX: It would be better to explicitly pass the correct
278 	 * credential to sobind() and soconnect().
279 	 */
280 	origcred = td->td_ucred;
281 
282 	/*
283 	 * Use the credential in nr_cred, if not NULL.
284 	 */
285 	if (nrp->nr_cred != NULL)
286 		td->td_ucred = nrp->nr_cred;
287 	else
288 		td->td_ucred = cred;
289 	saddr = nrp->nr_nam;
290 
291 	if (saddr->sa_family == AF_INET)
292 		if (nrp->nr_sotype == SOCK_DGRAM)
293 			nconf = getnetconfigent("udp");
294 		else
295 			nconf = getnetconfigent("tcp");
296 	else
297 		if (nrp->nr_sotype == SOCK_DGRAM)
298 			nconf = getnetconfigent("udp6");
299 		else
300 			nconf = getnetconfigent("tcp6");
301 
302 	pktscale = nfs_bufpackets;
303 	if (pktscale < 2)
304 		pktscale = 2;
305 	if (pktscale > 64)
306 		pktscale = 64;
307 	pktscalesav = pktscale;
308 	/*
309 	 * soreserve() can fail if sb_max is too small, so shrink pktscale
310 	 * and try again if there is an error.
311 	 * Print a log message suggesting increasing sb_max.
312 	 * Creating a socket and doing this is necessary since, if the
313 	 * reservation sizes are too large and will make soreserve() fail,
314 	 * the connection will work until a large send is attempted and
315 	 * then it will loop in the krpc code.
316 	 */
317 	so = NULL;
318 	saddr = NFSSOCKADDR(nrp->nr_nam, struct sockaddr *);
319 	error = socreate(saddr->sa_family, &so, nrp->nr_sotype,
320 	    nrp->nr_soproto, td->td_ucred, td);
321 	if (error != 0)
322 		goto out;
323 	do {
324 	    if (error != 0 && pktscale > 2) {
325 		if (nmp != NULL && nrp->nr_sotype == SOCK_STREAM &&
326 		    pktscale == pktscalesav) {
327 		    /*
328 		     * Suggest vfs.nfs.bufpackets * maximum RPC message,
329 		     * adjusted for the sb_max->sb_max_adj conversion of
330 		     * MCLBYTES / (MSIZE + MCLBYTES) as the minimum setting
331 		     * for kern.ipc.maxsockbuf.
332 		     */
333 		    tval = (NFS_MAXBSIZE + NFS_MAXXDR) * nfs_bufpackets;
334 		    tval *= MSIZE + MCLBYTES;
335 		    tval += MCLBYTES - 1; /* Round up divide by MCLBYTES. */
336 		    tval /= MCLBYTES;
337 		    printf("Consider increasing kern.ipc.maxsockbuf to a "
338 			"minimum of %ju to support %ubyte NFS I/O\n",
339 			(uintmax_t)tval, NFS_MAXBSIZE);
340 		}
341 		pktscale--;
342 	    }
343 	    if (nrp->nr_sotype == SOCK_DGRAM) {
344 		if (nmp != NULL) {
345 			sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) *
346 			    pktscale;
347 			rcvreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) *
348 			    pktscale;
349 		} else {
350 			sndreserve = rcvreserve = 1024 * pktscale;
351 		}
352 	    } else {
353 		if (nrp->nr_sotype != SOCK_STREAM)
354 			panic("nfscon sotype");
355 		if (nmp != NULL) {
356 			sndreserve = (NFS_MAXBSIZE + NFS_MAXXDR) *
357 			    pktscale;
358 			rcvreserve = (NFS_MAXBSIZE + NFS_MAXXDR) *
359 			    pktscale;
360 		} else {
361 			sndreserve = rcvreserve = 1024 * pktscale;
362 		}
363 	    }
364 	    error = soreserve(so, sndreserve, rcvreserve);
365 	    if (error != 0 && nmp != NULL && nrp->nr_sotype == SOCK_STREAM &&
366 		pktscale <= 2)
367 		printf("Must increase kern.ipc.maxsockbuf or reduce"
368 		    " rsize, wsize\n");
369 	} while (error != 0 && pktscale > 2);
370 	soclose(so);
371 	if (error != 0)
372 		goto out;
373 
374 	client = clnt_reconnect_create(nconf, saddr, nrp->nr_prog,
375 	    nrp->nr_vers, sndreserve, rcvreserve);
376 	CLNT_CONTROL(client, CLSET_WAITCHAN, "nfsreq");
377 	if (nmp != NULL) {
378 		if ((nmp->nm_flag & NFSMNT_INT))
379 			CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one);
380 		if ((nmp->nm_flag & NFSMNT_RESVPORT))
381 			CLNT_CONTROL(client, CLSET_PRIVPORT, &one);
382 		if (NFSHASTLS(nmp)) {
383 			CLNT_CONTROL(client, CLSET_TLS, &one);
384 			if (nmp->nm_tlscertname != NULL)
385 				CLNT_CONTROL(client, CLSET_TLSCERTNAME,
386 				    nmp->nm_tlscertname);
387 		}
388 		if (NFSHASSOFT(nmp)) {
389 			if (nmp->nm_sotype == SOCK_DGRAM)
390 				/*
391 				 * For UDP, the large timeout for a reconnect
392 				 * will be set to "nm_retry * nm_timeo / 2", so
393 				 * we only want to do 2 reconnect timeout
394 				 * retries.
395 				 */
396 				retries = 2;
397 			else
398 				retries = nmp->nm_retry;
399 		} else
400 			retries = INT_MAX;
401 		if (NFSHASNFSV4N(nmp)) {
402 			if (cred != NULL) {
403 				if (NFSHASSOFT(nmp)) {
404 					/*
405 					 * This should be a DS mount.
406 					 * Use CLSET_TIMEOUT to set the timeout
407 					 * for connections to DSs instead of
408 					 * specifying a timeout on each RPC.
409 					 * This is done so that SO_SNDTIMEO
410 					 * is set on the TCP socket as well
411 					 * as specifying a time limit when
412 					 * waiting for an RPC reply.  Useful
413 					 * if the send queue for the TCP
414 					 * connection has become constipated,
415 					 * due to a failed DS.
416 					 * The choice of lease_duration / 4 is
417 					 * fairly arbitrary, but seems to work
418 					 * ok, with a lower bound of 10sec.
419 					 */
420 					timo.tv_sec = nfsrv_lease / 4;
421 					if (timo.tv_sec < 10)
422 						timo.tv_sec = 10;
423 					timo.tv_usec = 0;
424 					CLNT_CONTROL(client, CLSET_TIMEOUT,
425 					    &timo);
426 				}
427 				/*
428 				 * Make sure the nfscbd_pool doesn't get
429 				 * destroyed while doing this.
430 				 */
431 				NFSD_LOCK();
432 				if (nfs_numnfscbd > 0) {
433 					nfs_numnfscbd++;
434 					NFSD_UNLOCK();
435 					xprt = svc_vc_create_backchannel(
436 					    nfscbd_pool);
437 					CLNT_CONTROL(client, CLSET_BACKCHANNEL,
438 					    xprt);
439 					NFSD_LOCK();
440 					nfs_numnfscbd--;
441 					if (nfs_numnfscbd == 0)
442 						wakeup(&nfs_numnfscbd);
443 				}
444 				NFSD_UNLOCK();
445 			} else {
446 				/*
447 				 * cred == NULL for a DS connect.
448 				 * For connects to a DS, set a retry limit
449 				 * so that failed DSs will be detected.
450 				 * This is ok for NFSv4.1, since a DS does
451 				 * not maintain open/lock state and is the
452 				 * only case where using a "soft" mount is
453 				 * recommended for NFSv4.
454 				 * For mounts from the MDS to DS, this is done
455 				 * via mount options, but that is not the case
456 				 * here.  The retry limit here can be adjusted
457 				 * via the sysctl vfs.nfs.dsretries.
458 				 * See the comment above w.r.t. timeout.
459 				 */
460 				timo.tv_sec = nfsrv_lease / 4;
461 				if (timo.tv_sec < 10)
462 					timo.tv_sec = 10;
463 				timo.tv_usec = 0;
464 				CLNT_CONTROL(client, CLSET_TIMEOUT, &timo);
465 				retries = nfs_dsretries;
466 			}
467 		}
468 	} else {
469 		/*
470 		 * Three cases:
471 		 * - Null RPC callback to client
472 		 * - Non-Null RPC callback to client, wait a little longer
473 		 * - upcalls to nfsuserd and gssd (clp == NULL)
474 		 */
475 		if (callback_retry_mult == 0) {
476 			retries = NFSV4_UPCALLRETRY;
477 			CLNT_CONTROL(client, CLSET_PRIVPORT, &one);
478 		} else {
479 			retries = NFSV4_CALLBACKRETRY * callback_retry_mult;
480 		}
481 		if (dotls)
482 			CLNT_CONTROL(client, CLSET_TLS, &one);
483 	}
484 	CLNT_CONTROL(client, CLSET_RETRIES, &retries);
485 
486 	if (nmp != NULL) {
487 		/*
488 		 * For UDP, there are 2 timeouts:
489 		 * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer
490 		 *   that does a retransmit of an RPC request using the same
491 		 *   socket and xid. This is what you normally want to do,
492 		 *   since NFS servers depend on "same xid" for their
493 		 *   Duplicate Request Cache.
494 		 * - timeout specified in CLNT_CALL_MBUF(), which specifies when
495 		 *   retransmits on the same socket should fail and a fresh
496 		 *   socket created. Each of these timeouts counts as one
497 		 *   CLSET_RETRIES as set above.
498 		 * Set the initial retransmit timeout for UDP. This timeout
499 		 * doesn't exist for TCP and the following call just fails,
500 		 * which is ok.
501 		 */
502 		timo.tv_sec = nmp->nm_timeo / NFS_HZ;
503 		timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ;
504 		CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo);
505 	}
506 
507 	/*
508 	 * *clipp is &nrp->nr_client or &nm_aconn[nmp->nm_nextaconn].
509 	 * The latter case is for additional connections specified by the
510 	 * "nconnect" mount option.  nr_mtx etc is used for these additional
511 	 * connections, as well as nr_client in the nfssockreq
512 	 * structure for the mount.
513 	 */
514 	mtx_lock(&nrp->nr_mtx);
515 	if (*clipp != NULL) {
516 		mtx_unlock(&nrp->nr_mtx);
517 		/*
518 		 * Someone else already connected.
519 		 */
520 		CLNT_RELEASE(client);
521 	} else {
522 		*clipp = client;
523 		/*
524 		 * Protocols that do not require connections may be optionally
525 		 * left unconnected for servers that reply from a port other
526 		 * than NFS_PORT.
527 		 */
528 		if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) {
529 			mtx_unlock(&nrp->nr_mtx);
530 			CLNT_CONTROL(client, CLSET_CONNECT, &one);
531 		} else
532 			mtx_unlock(&nrp->nr_mtx);
533 	}
534 
535 out:
536 	/* Restore current thread's credentials. */
537 	td->td_ucred = origcred;
538 
539 	NFSEXITCODE(error);
540 	return (error);
541 }
542 
543 /*
544  * NFS disconnect. Clean up and unlink.
545  */
546 void
547 newnfs_disconnect(struct nfsmount *nmp, struct nfssockreq *nrp)
548 {
549 	CLIENT *client, *aconn[NFS_MAXNCONN - 1];
550 	int i;
551 
552 	mtx_lock(&nrp->nr_mtx);
553 	if (nrp->nr_client != NULL) {
554 		client = nrp->nr_client;
555 		nrp->nr_client = NULL;
556 		if (nmp != NULL && nmp->nm_aconnect > 0) {
557 			for (i = 0; i < nmp->nm_aconnect; i++) {
558 				aconn[i] = nmp->nm_aconn[i];
559 				nmp->nm_aconn[i] = NULL;
560 			}
561 		}
562 		mtx_unlock(&nrp->nr_mtx);
563 		rpc_gss_secpurge_call(client);
564 		CLNT_CLOSE(client);
565 		CLNT_RELEASE(client);
566 		if (nmp != NULL && nmp->nm_aconnect > 0) {
567 			for (i = 0; i < nmp->nm_aconnect; i++) {
568 				if (aconn[i] != NULL) {
569 					rpc_gss_secpurge_call(aconn[i]);
570 					CLNT_CLOSE(aconn[i]);
571 					CLNT_RELEASE(aconn[i]);
572 				}
573 			}
574 		}
575 	} else {
576 		mtx_unlock(&nrp->nr_mtx);
577 	}
578 }
579 
580 static AUTH *
581 nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal,
582     char *srv_principal, gss_OID mech_oid, struct ucred *cred)
583 {
584 	rpc_gss_service_t svc;
585 	AUTH *auth;
586 
587 	switch (secflavour) {
588 	case RPCSEC_GSS_KRB5:
589 	case RPCSEC_GSS_KRB5I:
590 	case RPCSEC_GSS_KRB5P:
591 		if (!mech_oid) {
592 			if (!rpc_gss_mech_to_oid_call("kerberosv5", &mech_oid))
593 				return (NULL);
594 		}
595 		if (secflavour == RPCSEC_GSS_KRB5)
596 			svc = rpc_gss_svc_none;
597 		else if (secflavour == RPCSEC_GSS_KRB5I)
598 			svc = rpc_gss_svc_integrity;
599 		else
600 			svc = rpc_gss_svc_privacy;
601 
602 		if (clnt_principal == NULL)
603 			auth = rpc_gss_secfind_call(nrp->nr_client, cred,
604 			    srv_principal, mech_oid, svc);
605 		else {
606 			auth = rpc_gss_seccreate_call(nrp->nr_client, cred,
607 			    clnt_principal, srv_principal, "kerberosv5",
608 			    svc, NULL, NULL, NULL);
609 			return (auth);
610 		}
611 		if (auth != NULL)
612 			return (auth);
613 		/* fallthrough */
614 	case AUTH_SYS:
615 	default:
616 		return (authunix_create(cred));
617 	}
618 }
619 
620 /*
621  * Callback from the RPC code to generate up/down notifications.
622  */
623 
624 struct nfs_feedback_arg {
625 	struct nfsmount *nf_mount;
626 	int		nf_lastmsg;	/* last tprintf */
627 	int		nf_tprintfmsg;
628 	struct thread	*nf_td;
629 };
630 
631 static void
632 nfs_feedback(int type, int proc, void *arg)
633 {
634 	struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg;
635 	struct nfsmount *nmp = nf->nf_mount;
636 	time_t now;
637 
638 	switch (type) {
639 	case FEEDBACK_REXMIT2:
640 	case FEEDBACK_RECONNECT:
641 		now = NFSD_MONOSEC;
642 		if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now) {
643 			nfs_down(nmp, nf->nf_td,
644 			    "not responding", 0, NFSSTA_TIMEO);
645 			nf->nf_tprintfmsg = TRUE;
646 			nf->nf_lastmsg = now;
647 		}
648 		break;
649 
650 	case FEEDBACK_OK:
651 		nfs_up(nf->nf_mount, nf->nf_td,
652 		    "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg);
653 		break;
654 	}
655 }
656 
657 /*
658  * newnfs_request - goes something like this
659  *	- does the rpc by calling the krpc layer
660  *	- break down rpc header and return with nfs reply
661  * nb: always frees up nd_mreq mbuf list
662  */
663 int
664 newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp,
665     struct nfsclient *clp, struct nfssockreq *nrp, vnode_t vp,
666     struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers,
667     u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *dssep)
668 {
669 	uint32_t retseq, retval, slotseq, *tl;
670 	int i = 0, j = 0, opcnt, set_sigset = 0, slot;
671 	int error = 0, usegssname = 0, secflavour = AUTH_SYS;
672 	int freeslot, maxslot, reterr, slotpos, timeo;
673 	u_int16_t procnum;
674 	u_int nextconn;
675 	struct nfs_feedback_arg nf;
676 	struct timeval timo;
677 	AUTH *auth;
678 	struct rpc_callextra ext;
679 	enum clnt_stat stat;
680 	struct nfsreq *rep = NULL;
681 	char *srv_principal = NULL, *clnt_principal = NULL;
682 	sigset_t oldset;
683 	struct ucred *authcred;
684 	struct nfsclsession *sep;
685 	uint8_t sessionid[NFSX_V4SESSIONID];
686 	bool nextconn_set;
687 	struct timespec trylater_delay, ts, waituntil;
688 
689 	/* Initially 1msec. */
690 	trylater_delay.tv_sec = 0;
691 	trylater_delay.tv_nsec = 1000000;
692 	sep = dssep;
693 	if (xidp != NULL)
694 		*xidp = 0;
695 	/* Reject requests while attempting a forced unmount. */
696 	if (nmp != NULL && NFSCL_FORCEDISM(nmp->nm_mountp)) {
697 		m_freem(nd->nd_mreq);
698 		return (ESTALE);
699 	}
700 
701 	/*
702 	 * Set authcred, which is used to acquire RPC credentials to
703 	 * the cred argument, by default. The crhold() should not be
704 	 * necessary, but will ensure that some future code change
705 	 * doesn't result in the credential being free'd prematurely.
706 	 */
707 	authcred = crhold(cred);
708 
709 	/* For client side interruptible mounts, mask off the signals. */
710 	if (nmp != NULL && td != NULL && NFSHASINT(nmp)) {
711 		newnfs_set_sigmask(td, &oldset);
712 		set_sigset = 1;
713 	}
714 
715 	/*
716 	 * If not already connected call newnfs_connect now.
717 	 */
718 	if (nrp->nr_client == NULL)
719 		newnfs_connect(nmp, nrp, cred, td, 0, false, &nrp->nr_client);
720 
721 	/*
722 	 * If the "nconnect" mount option was specified and this RPC is
723 	 * one that can have a large RPC message and is being done through
724 	 * the NFS/MDS server, use an additional connection. (When the RPC is
725 	 * being done through the server/MDS, nrp == &nmp->nm_sockreq.)
726 	 * The "nconnect" mount option normally has minimal effect when the
727 	 * "pnfs" mount option is specified, since only Readdir RPCs are
728 	 * normally done through the NFS/MDS server.
729 	 */
730 	nextconn_set = false;
731 	if (nmp != NULL && nmp->nm_aconnect > 0 && nrp == &nmp->nm_sockreq &&
732 	    (nd->nd_procnum == NFSPROC_READ ||
733 	     nd->nd_procnum == NFSPROC_READDIR ||
734 	     nd->nd_procnum == NFSPROC_READDIRPLUS ||
735 	     nd->nd_procnum == NFSPROC_WRITE)) {
736 		nextconn = atomic_fetchadd_int(&nmp->nm_nextaconn, 1);
737 		nextconn %= nmp->nm_aconnect;
738 		nextconn_set = true;
739 		if (nmp->nm_aconn[nextconn] == NULL)
740 			newnfs_connect(nmp, nrp, cred, td, 0, false,
741 			    &nmp->nm_aconn[nextconn]);
742 	}
743 
744 	/*
745 	 * For a client side mount, nmp is != NULL and clp == NULL. For
746 	 * server calls (callbacks or upcalls), nmp == NULL.
747 	 */
748 	if (clp != NULL) {
749 		NFSLOCKSTATE();
750 		if ((clp->lc_flags & LCL_GSS) && nfsrv_gsscallbackson) {
751 			secflavour = RPCSEC_GSS_KRB5;
752 			if (nd->nd_procnum != NFSPROC_NULL) {
753 				if (clp->lc_flags & LCL_GSSINTEGRITY)
754 					secflavour = RPCSEC_GSS_KRB5I;
755 				else if (clp->lc_flags & LCL_GSSPRIVACY)
756 					secflavour = RPCSEC_GSS_KRB5P;
757 			}
758 		}
759 		NFSUNLOCKSTATE();
760 	} else if (nmp != NULL && NFSHASKERB(nmp) &&
761 	     nd->nd_procnum != NFSPROC_NULL && (!NFSHASSYSKRB5(nmp) ||
762 	     nfscl_use_gss[nd->nd_procnum])) {
763 		if (NFSHASALLGSSNAME(nmp) && nmp->nm_krbnamelen > 0)
764 			nd->nd_flag |= ND_USEGSSNAME;
765 		if ((nd->nd_flag & ND_USEGSSNAME) != 0) {
766 			/*
767 			 * If there is a client side host based credential,
768 			 * use that, otherwise use the system uid, if set.
769 			 * The system uid is in the nmp->nm_sockreq.nr_cred
770 			 * credentials.
771 			 */
772 			if (nmp->nm_krbnamelen > 0) {
773 				usegssname = 1;
774 				clnt_principal = nmp->nm_krbname;
775 			} else if (nmp->nm_uid != (uid_t)-1) {
776 				KASSERT(nmp->nm_sockreq.nr_cred != NULL,
777 				    ("newnfs_request: NULL nr_cred"));
778 				crfree(authcred);
779 				authcred = crhold(nmp->nm_sockreq.nr_cred);
780 			}
781 		} else if (nmp->nm_krbnamelen == 0 &&
782 		    nmp->nm_uid != (uid_t)-1 && cred->cr_uid == (uid_t)0) {
783 			/*
784 			 * If there is no host based principal name and
785 			 * the system uid is set and this is root, use the
786 			 * system uid, since root won't have user
787 			 * credentials in a credentials cache file.
788 			 * The system uid is in the nmp->nm_sockreq.nr_cred
789 			 * credentials.
790 			 */
791 			KASSERT(nmp->nm_sockreq.nr_cred != NULL,
792 			    ("newnfs_request: NULL nr_cred"));
793 			crfree(authcred);
794 			authcred = crhold(nmp->nm_sockreq.nr_cred);
795 		}
796 		if (NFSHASINTEGRITY(nmp))
797 			secflavour = RPCSEC_GSS_KRB5I;
798 		else if (NFSHASPRIVACY(nmp))
799 			secflavour = RPCSEC_GSS_KRB5P;
800 		else
801 			secflavour = RPCSEC_GSS_KRB5;
802 		srv_principal = NFSMNT_SRVKRBNAME(nmp);
803 	} else if (nmp != NULL && (!NFSHASKERB(nmp) || NFSHASSYSKRB5(nmp)) &&
804 	    nd->nd_procnum != NFSPROC_NULL &&
805 	    (nd->nd_flag & ND_USEGSSNAME) != 0) {
806 		/*
807 		 * Use the uid that did the mount when the RPC is doing
808 		 * NFSv4 system operations, as indicated by the
809 		 * ND_USEGSSNAME flag, for the AUTH_SYS case.
810 		 * The credentials in nm_sockreq.nr_cred were used for the
811 		 * mount.
812 		 */
813 		KASSERT(nmp->nm_sockreq.nr_cred != NULL,
814 		    ("newnfs_request: NULL nr_cred"));
815 		crfree(authcred);
816 		authcred = crhold(nmp->nm_sockreq.nr_cred);
817 	}
818 
819 	if (nmp != NULL) {
820 		bzero(&nf, sizeof(struct nfs_feedback_arg));
821 		nf.nf_mount = nmp;
822 		nf.nf_td = td;
823 		nf.nf_lastmsg = NFSD_MONOSEC -
824 		    ((nmp->nm_tprintf_delay)-(nmp->nm_tprintf_initial_delay));
825 	}
826 
827 	if (nd->nd_procnum == NFSPROC_NULL)
828 		auth = authnone_create();
829 	else if (usegssname) {
830 		/*
831 		 * For this case, the authenticator is held in the
832 		 * nfssockreq structure, so don't release the reference count
833 		 * held on it. --> Don't AUTH_DESTROY() it in this function.
834 		 */
835 		if (nrp->nr_auth == NULL)
836 			nrp->nr_auth = nfs_getauth(nrp, secflavour,
837 			    clnt_principal, srv_principal, NULL, authcred);
838 		else
839 			rpc_gss_refresh_auth_call(nrp->nr_auth);
840 		auth = nrp->nr_auth;
841 	} else
842 		auth = nfs_getauth(nrp, secflavour, NULL,
843 		    srv_principal, NULL, authcred);
844 	crfree(authcred);
845 	if (auth == NULL) {
846 		m_freem(nd->nd_mreq);
847 		if (set_sigset)
848 			newnfs_restore_sigmask(td, &oldset);
849 		return (EACCES);
850 	}
851 	bzero(&ext, sizeof(ext));
852 	ext.rc_auth = auth;
853 	if (nmp != NULL) {
854 		ext.rc_feedback = nfs_feedback;
855 		ext.rc_feedback_arg = &nf;
856 	}
857 
858 	procnum = nd->nd_procnum;
859 	if ((nd->nd_flag & ND_NFSV4) &&
860 	    nd->nd_procnum != NFSPROC_NULL &&
861 	    nd->nd_procnum != NFSV4PROC_CBCOMPOUND)
862 		procnum = NFSV4PROC_COMPOUND;
863 
864 	if (nmp != NULL) {
865 		NFSINCRGLOBAL(nfsstatsv1.rpcrequests);
866 
867 		/* Map the procnum to the old NFSv2 one, as required. */
868 		if ((nd->nd_flag & ND_NFSV2) != 0) {
869 			if (nd->nd_procnum < NFS_V3NPROCS)
870 				procnum = nfsv2_procid[nd->nd_procnum];
871 			else
872 				procnum = NFSV2PROC_NOOP;
873 		}
874 
875 		/*
876 		 * Now only used for the R_DONTRECOVER case, but until that is
877 		 * supported within the krpc code, I need to keep a queue of
878 		 * outstanding RPCs for nfsv4 client requests.
879 		 */
880 		if ((nd->nd_flag & ND_NFSV4) && procnum == NFSV4PROC_COMPOUND)
881 			rep = malloc(sizeof(struct nfsreq),
882 			    M_NFSDREQ, M_WAITOK);
883 #ifdef KDTRACE_HOOKS
884 		if (dtrace_nfscl_nfs234_start_probe != NULL) {
885 			uint32_t probe_id;
886 			int probe_procnum;
887 
888 			if (nd->nd_flag & ND_NFSV4) {
889 				probe_id =
890 				    nfscl_nfs4_start_probes[nd->nd_procnum];
891 				probe_procnum = nd->nd_procnum;
892 			} else if (nd->nd_flag & ND_NFSV3) {
893 				probe_id = nfscl_nfs3_start_probes[procnum];
894 				probe_procnum = procnum;
895 			} else {
896 				probe_id =
897 				    nfscl_nfs2_start_probes[nd->nd_procnum];
898 				probe_procnum = procnum;
899 			}
900 			if (probe_id != 0)
901 				(dtrace_nfscl_nfs234_start_probe)
902 				    (probe_id, vp, nd->nd_mreq, cred,
903 				     probe_procnum);
904 		}
905 #endif
906 	}
907 	freeslot = -1;		/* Set to slot that needs to be free'd */
908 tryagain:
909 	slot = -1;		/* Slot that needs a sequence# increment. */
910 	/*
911 	 * This timeout specifies when a new socket should be created,
912 	 * along with new xid values. For UDP, this should be done
913 	 * infrequently, since retransmits of RPC requests should normally
914 	 * use the same xid.
915 	 */
916 	if (nmp == NULL) {
917 		if (clp == NULL) {
918 			timo.tv_sec = NFSV4_UPCALLTIMEO;
919 			timo.tv_usec = 0;
920 		} else {
921 			timo.tv_sec = NFSV4_CALLBACKTIMEO / 1000;
922 			timo.tv_usec = NFSV4_CALLBACKTIMEO * 1000;
923 		}
924 	} else {
925 		if (nrp->nr_sotype != SOCK_DGRAM) {
926 			timo.tv_usec = 0;
927 			if ((nmp->nm_flag & NFSMNT_NFSV4))
928 				timo.tv_sec = INT_MAX;
929 			else
930 				timo.tv_sec = NFS_TCPTIMEO;
931 		} else {
932 			if (NFSHASSOFT(nmp)) {
933 				/*
934 				 * CLSET_RETRIES is set to 2, so this should be
935 				 * half of the total timeout required.
936 				 */
937 				timeo = nmp->nm_retry * nmp->nm_timeo / 2;
938 				if (timeo < 1)
939 					timeo = 1;
940 				timo.tv_sec = timeo / NFS_HZ;
941 				timo.tv_usec = (timeo % NFS_HZ) * 1000000 /
942 				    NFS_HZ;
943 			} else {
944 				/* For UDP hard mounts, use a large value. */
945 				timo.tv_sec = NFS_MAXTIMEO / NFS_HZ;
946 				timo.tv_usec = 0;
947 			}
948 		}
949 
950 		if (rep != NULL) {
951 			rep->r_flags = 0;
952 			rep->r_nmp = nmp;
953 			/*
954 			 * Chain request into list of outstanding requests.
955 			 */
956 			NFSLOCKREQ();
957 			TAILQ_INSERT_TAIL(&nfsd_reqq, rep, r_chain);
958 			NFSUNLOCKREQ();
959 		}
960 	}
961 
962 	nd->nd_mrep = NULL;
963 	if (clp != NULL && sep != NULL)
964 		stat = clnt_bck_call(nrp->nr_client, &ext, procnum,
965 		    nd->nd_mreq, &nd->nd_mrep, timo, sep->nfsess_xprt);
966 	else if (nextconn_set)
967 		/*
968 		 * When there are multiple TCP connections, send the
969 		 * RPCs with large messages on the alternate TCP
970 		 * connection(s) in a round robin fashion.
971 		 * The small RPC messages are sent on the default
972 		 * TCP connection because they do not require much
973 		 * network bandwidth and separating them from the
974 		 * large RPC messages avoids them getting "log jammed"
975 		 * behind several large RPC messages.
976 		 */
977 		stat = CLNT_CALL_MBUF(nmp->nm_aconn[nextconn],
978 		    &ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo);
979 	else
980 		stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum,
981 		    nd->nd_mreq, &nd->nd_mrep, timo);
982 	NFSCL_DEBUG(2, "clnt call=%d\n", stat);
983 
984 	if (rep != NULL) {
985 		/*
986 		 * RPC done, unlink the request.
987 		 */
988 		NFSLOCKREQ();
989 		TAILQ_REMOVE(&nfsd_reqq, rep, r_chain);
990 		NFSUNLOCKREQ();
991 	}
992 
993 	/*
994 	 * If there was a successful reply and a tprintf msg.
995 	 * tprintf a response.
996 	 */
997 	if (stat == RPC_SUCCESS) {
998 		error = 0;
999 	} else if (stat == RPC_TIMEDOUT) {
1000 		NFSINCRGLOBAL(nfsstatsv1.rpctimeouts);
1001 		error = ETIMEDOUT;
1002 	} else if (stat == RPC_VERSMISMATCH) {
1003 		NFSINCRGLOBAL(nfsstatsv1.rpcinvalid);
1004 		error = EOPNOTSUPP;
1005 	} else if (stat == RPC_PROGVERSMISMATCH) {
1006 		NFSINCRGLOBAL(nfsstatsv1.rpcinvalid);
1007 		error = EPROTONOSUPPORT;
1008 	} else if (stat == RPC_CANTSEND || stat == RPC_CANTRECV ||
1009 	     stat == RPC_SYSTEMERROR || stat == RPC_INTR) {
1010 		/* Check for a session slot that needs to be free'd. */
1011 		if ((nd->nd_flag & (ND_NFSV41 | ND_HASSLOTID)) ==
1012 		    (ND_NFSV41 | ND_HASSLOTID) && nmp != NULL &&
1013 		    nd->nd_procnum != NFSPROC_NULL) {
1014 			/*
1015 			 * This should only occur when either the MDS or
1016 			 * a client has an RPC against a DS fail.
1017 			 * This happens because these cases use "soft"
1018 			 * connections that can time out and fail.
1019 			 * The slot used for this RPC is now in a
1020 			 * non-deterministic state, but if the slot isn't
1021 			 * free'd, threads can get stuck waiting for a slot.
1022 			 */
1023 			if (sep == NULL)
1024 				sep = nfsmnt_mdssession(nmp);
1025 			/*
1026 			 * Bump the sequence# out of range, so that reuse of
1027 			 * this slot will result in an NFSERR_SEQMISORDERED
1028 			 * error and not a bogus cached RPC reply.
1029 			 */
1030 			mtx_lock(&sep->nfsess_mtx);
1031 			sep->nfsess_slotseq[nd->nd_slotid] += 10;
1032 			sep->nfsess_badslots |= (0x1ULL << nd->nd_slotid);
1033 			mtx_unlock(&sep->nfsess_mtx);
1034 			/* And free the slot. */
1035 			nfsv4_freeslot(sep, nd->nd_slotid, false);
1036 		}
1037 		if (stat == RPC_INTR)
1038 			error = EINTR;
1039 		else {
1040 			NFSINCRGLOBAL(nfsstatsv1.rpcinvalid);
1041 			error = ENXIO;
1042 		}
1043 	} else {
1044 		NFSINCRGLOBAL(nfsstatsv1.rpcinvalid);
1045 		error = EACCES;
1046 	}
1047 	if (error) {
1048 		m_freem(nd->nd_mreq);
1049 		if (usegssname == 0)
1050 			AUTH_DESTROY(auth);
1051 		if (rep != NULL)
1052 			free(rep, M_NFSDREQ);
1053 		if (set_sigset)
1054 			newnfs_restore_sigmask(td, &oldset);
1055 		return (error);
1056 	}
1057 
1058 	KASSERT(nd->nd_mrep != NULL, ("mrep shouldn't be NULL if no error\n"));
1059 
1060 	/*
1061 	 * Search for any mbufs that are not a multiple of 4 bytes long
1062 	 * or with m_data not longword aligned.
1063 	 * These could cause pointer alignment problems, so copy them to
1064 	 * well aligned mbufs.
1065 	 */
1066 	newnfs_realign(&nd->nd_mrep, M_WAITOK);
1067 	nd->nd_md = nd->nd_mrep;
1068 	nd->nd_dpos = mtod(nd->nd_md, caddr_t);
1069 	nd->nd_repstat = 0;
1070 	if (nd->nd_procnum != NFSPROC_NULL &&
1071 	    nd->nd_procnum != NFSV4PROC_CBNULL) {
1072 		/* If sep == NULL, set it to the default in nmp. */
1073 		if (sep == NULL && nmp != NULL)
1074 			sep = nfsmnt_mdssession(nmp);
1075 		/*
1076 		 * and now the actual NFS xdr.
1077 		 */
1078 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1079 		nd->nd_repstat = fxdr_unsigned(u_int32_t, *tl);
1080 		if (nd->nd_repstat >= 10000)
1081 			NFSCL_DEBUG(1, "proc=%d reps=%d\n", (int)nd->nd_procnum,
1082 			    (int)nd->nd_repstat);
1083 
1084 		/*
1085 		 * Get rid of the tag, return count and SEQUENCE result for
1086 		 * NFSv4.
1087 		 */
1088 		if ((nd->nd_flag & ND_NFSV4) != 0 && nd->nd_repstat !=
1089 		    NFSERR_MINORVERMISMATCH) {
1090 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1091 			i = fxdr_unsigned(int, *tl);
1092 			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
1093 			if (error)
1094 				goto nfsmout;
1095 			NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1096 			opcnt = fxdr_unsigned(int, *tl++);
1097 			i = fxdr_unsigned(int, *tl++);
1098 			j = fxdr_unsigned(int, *tl);
1099 			if (j >= 10000)
1100 				NFSCL_DEBUG(1, "fop=%d fst=%d\n", i, j);
1101 			/*
1102 			 * If the first op is Sequence, free up the slot.
1103 			 */
1104 			if ((nmp != NULL && i == NFSV4OP_SEQUENCE && j != 0) ||
1105 			   (clp != NULL && i == NFSV4OP_CBSEQUENCE && j != 0)) {
1106 				NFSCL_DEBUG(1, "failed seq=%d\n", j);
1107 				if (sep != NULL && i == NFSV4OP_SEQUENCE &&
1108 				    j == NFSERR_SEQMISORDERED) {
1109 					mtx_lock(&sep->nfsess_mtx);
1110 					sep->nfsess_badslots |=
1111 					    (0x1ULL << nd->nd_slotid);
1112 					mtx_unlock(&sep->nfsess_mtx);
1113 				}
1114 			}
1115 			if (((nmp != NULL && i == NFSV4OP_SEQUENCE && j == 0) ||
1116 			    (clp != NULL && i == NFSV4OP_CBSEQUENCE &&
1117 			    j == 0)) && sep != NULL) {
1118 				if (i == NFSV4OP_SEQUENCE)
1119 					NFSM_DISSECT(tl, uint32_t *,
1120 					    NFSX_V4SESSIONID +
1121 					    5 * NFSX_UNSIGNED);
1122 				else
1123 					NFSM_DISSECT(tl, uint32_t *,
1124 					    NFSX_V4SESSIONID +
1125 					    4 * NFSX_UNSIGNED);
1126 				mtx_lock(&sep->nfsess_mtx);
1127 				if (bcmp(tl, sep->nfsess_sessionid,
1128 				    NFSX_V4SESSIONID) == 0) {
1129 					tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
1130 					retseq = fxdr_unsigned(uint32_t, *tl++);
1131 					slot = fxdr_unsigned(int, *tl++);
1132 					if ((nd->nd_flag & ND_HASSLOTID) != 0) {
1133 						if (slot >= NFSV4_SLOTS ||
1134 						    (i == NFSV4OP_CBSEQUENCE &&
1135 						     slot >= NFSV4_CBSLOTS)) {
1136 							printf("newnfs_request:"
1137 							    " Bogus slot\n");
1138 							slot = nd->nd_slotid;
1139 						} else if (slot !=
1140 						    nd->nd_slotid) {
1141 						    printf("newnfs_request:"
1142 							" Wrong session "
1143 							"srvslot=%d "
1144 							"slot=%d\n", slot,
1145 							nd->nd_slotid);
1146 						    if (i == NFSV4OP_SEQUENCE) {
1147 							/*
1148 							 * Mark both slots as
1149 							 * bad, because we do
1150 							 * not know if the
1151 							 * server has advanced
1152 							 * the sequence# for
1153 							 * either of them.
1154 							 */
1155 							sep->nfsess_badslots |=
1156 							    (0x1ULL << slot);
1157 							sep->nfsess_badslots |=
1158 							    (0x1ULL <<
1159 							     nd->nd_slotid);
1160 						    }
1161 						    slot = nd->nd_slotid;
1162 						}
1163 						freeslot = slot;
1164 					} else if (slot != 0) {
1165 						printf("newnfs_request: Bad "
1166 						    "session slot=%d\n", slot);
1167 						slot = 0;
1168 					}
1169 					if (retseq != sep->nfsess_slotseq[slot])
1170 						printf("retseq diff 0x%x\n",
1171 						    retseq);
1172 					retval = fxdr_unsigned(uint32_t, *++tl);
1173 					if ((retval + 1) < sep->nfsess_foreslots
1174 					    )
1175 						sep->nfsess_foreslots = (retval
1176 						    + 1);
1177 					else if ((retval + 1) >
1178 					    sep->nfsess_foreslots)
1179 						sep->nfsess_foreslots = (retval
1180 						    < 64) ? (retval + 1) : 64;
1181 				}
1182 				mtx_unlock(&sep->nfsess_mtx);
1183 
1184 				/* Grab the op and status for the next one. */
1185 				if (opcnt > 1) {
1186 					NFSM_DISSECT(tl, uint32_t *,
1187 					    2 * NFSX_UNSIGNED);
1188 					i = fxdr_unsigned(int, *tl++);
1189 					j = fxdr_unsigned(int, *tl);
1190 				}
1191 			}
1192 		}
1193 		if (nd->nd_repstat != 0) {
1194 			if (nd->nd_repstat == NFSERR_BADSESSION &&
1195 			    nmp != NULL && dssep == NULL &&
1196 			    (nd->nd_flag & ND_NFSV41) != 0) {
1197 				/*
1198 				 * If this is a client side MDS RPC, mark
1199 				 * the MDS session defunct and initiate
1200 				 * recovery, as required.
1201 				 * The nfsess_defunct field is protected by
1202 				 * the NFSLOCKMNT()/nm_mtx lock and not the
1203 				 * nfsess_mtx lock to simplify its handling,
1204 				 * for the MDS session. This lock is also
1205 				 * sufficient for nfsess_sessionid, since it
1206 				 * never changes in the structure.
1207 				 */
1208 				NFSCL_DEBUG(1, "Got badsession\n");
1209 				NFSLOCKCLSTATE();
1210 				NFSLOCKMNT(nmp);
1211 				if (TAILQ_EMPTY(&nmp->nm_sess)) {
1212 					NFSUNLOCKMNT(nmp);
1213 					NFSUNLOCKCLSTATE();
1214 					printf("If server has not rebooted, "
1215 					    "check NFS clients for unique "
1216 					    "/etc/hostid's\n");
1217 					goto out;
1218 				}
1219 				sep = NFSMNT_MDSSESSION(nmp);
1220 				if (bcmp(sep->nfsess_sessionid, nd->nd_sequence,
1221 				    NFSX_V4SESSIONID) == 0) {
1222 					printf("Initiate recovery. If server "
1223 					    "has not rebooted, "
1224 					    "check NFS clients for unique "
1225 					    "/etc/hostid's\n");
1226 					/* Initiate recovery. */
1227 					sep->nfsess_defunct = 1;
1228 					NFSCL_DEBUG(1, "Marked defunct\n");
1229 					if (nmp->nm_clp != NULL) {
1230 						nmp->nm_clp->nfsc_flags |=
1231 						    NFSCLFLAGS_RECOVER;
1232 						wakeup(nmp->nm_clp);
1233 					}
1234 				}
1235 				NFSUNLOCKCLSTATE();
1236 				/*
1237 				 * Sleep for up to 1sec waiting for a new
1238 				 * session.
1239 				 */
1240 				mtx_sleep(&nmp->nm_sess, &nmp->nm_mtx, PZERO,
1241 				    "nfsbadsess", hz);
1242 				/*
1243 				 * Get the session again, in case a new one
1244 				 * has been created during the sleep.
1245 				 */
1246 				sep = NFSMNT_MDSSESSION(nmp);
1247 				NFSUNLOCKMNT(nmp);
1248 				if ((nd->nd_flag & ND_LOOPBADSESS) != 0) {
1249 					reterr = nfsv4_sequencelookup(nmp, sep,
1250 					    &slotpos, &maxslot, &slotseq,
1251 					    sessionid, true);
1252 					if (reterr == 0) {
1253 						/* Fill in new session info. */
1254 						NFSCL_DEBUG(1,
1255 						  "Filling in new sequence\n");
1256 						tl = nd->nd_sequence;
1257 						bcopy(sessionid, tl,
1258 						    NFSX_V4SESSIONID);
1259 						tl += NFSX_V4SESSIONID /
1260 						    NFSX_UNSIGNED;
1261 						*tl++ = txdr_unsigned(slotseq);
1262 						*tl++ = txdr_unsigned(slotpos);
1263 						*tl = txdr_unsigned(maxslot);
1264 						nd->nd_slotid = slotpos;
1265 						nd->nd_flag |= ND_HASSLOTID;
1266 					}
1267 					if (reterr == NFSERR_BADSESSION ||
1268 					    reterr == 0) {
1269 						NFSCL_DEBUG(1,
1270 						    "Badsession looping\n");
1271 						m_freem(nd->nd_mrep);
1272 						nd->nd_mrep = NULL;
1273 						goto tryagain;
1274 					}
1275 					nd->nd_repstat = reterr;
1276 					NFSCL_DEBUG(1, "Got err=%d\n", reterr);
1277 				}
1278 			}
1279 			/*
1280 			 * When clp != NULL, it is a callback and all
1281 			 * callback operations can be retried for NFSERR_DELAY.
1282 			 */
1283 			if (((nd->nd_repstat == NFSERR_DELAY ||
1284 			      nd->nd_repstat == NFSERR_GRACE) &&
1285 			     (nd->nd_flag & ND_NFSV4) && (clp != NULL ||
1286 			     (nd->nd_procnum != NFSPROC_DELEGRETURN &&
1287 			     nd->nd_procnum != NFSPROC_SETATTR &&
1288 			     nd->nd_procnum != NFSPROC_READ &&
1289 			     nd->nd_procnum != NFSPROC_READDS &&
1290 			     nd->nd_procnum != NFSPROC_WRITE &&
1291 			     nd->nd_procnum != NFSPROC_WRITEDS &&
1292 			     nd->nd_procnum != NFSPROC_OPEN &&
1293 			     nd->nd_procnum != NFSPROC_OPENLAYGET &&
1294 			     nd->nd_procnum != NFSPROC_CREATE &&
1295 			     nd->nd_procnum != NFSPROC_CREATELAYGET &&
1296 			     nd->nd_procnum != NFSPROC_OPENCONFIRM &&
1297 			     nd->nd_procnum != NFSPROC_OPENDOWNGRADE &&
1298 			     nd->nd_procnum != NFSPROC_CLOSE &&
1299 			     nd->nd_procnum != NFSPROC_LOCK &&
1300 			     nd->nd_procnum != NFSPROC_LOCKU))) ||
1301 			    (nd->nd_repstat == NFSERR_DELAY &&
1302 			     (nd->nd_flag & ND_NFSV4) == 0) ||
1303 			    nd->nd_repstat == NFSERR_RESOURCE ||
1304 			    nd->nd_repstat == NFSERR_RETRYUNCACHEDREP) {
1305 				/* Clip at NFS_TRYLATERDEL. */
1306 				if (timespeccmp(&trylater_delay,
1307 				    &nfs_trylater_max, >))
1308 					trylater_delay = nfs_trylater_max;
1309 				getnanouptime(&waituntil);
1310 				timespecadd(&waituntil, &trylater_delay,
1311 				    &waituntil);
1312 				do {
1313 					nfs_catnap(PZERO, 0, "nfstry");
1314 					getnanouptime(&ts);
1315 				} while (timespeccmp(&ts, &waituntil, <));
1316 				timespecadd(&trylater_delay, &trylater_delay,
1317 				    &trylater_delay);	/* Double each time. */
1318 				if (slot != -1) {
1319 					mtx_lock(&sep->nfsess_mtx);
1320 					sep->nfsess_slotseq[slot]++;
1321 					*nd->nd_slotseq = txdr_unsigned(
1322 					    sep->nfsess_slotseq[slot]);
1323 					mtx_unlock(&sep->nfsess_mtx);
1324 				}
1325 				m_freem(nd->nd_mrep);
1326 				nd->nd_mrep = NULL;
1327 				goto tryagain;
1328 			}
1329 
1330 			/*
1331 			 * If the File Handle was stale, invalidate the
1332 			 * lookup cache, just in case.
1333 			 * (vp != NULL implies a client side call)
1334 			 */
1335 			if (nd->nd_repstat == ESTALE && vp != NULL) {
1336 				cache_purge(vp);
1337 				if (ncl_call_invalcaches != NULL)
1338 					(*ncl_call_invalcaches)(vp);
1339 			}
1340 		}
1341 		if ((nd->nd_flag & ND_NFSV4) != 0) {
1342 			/* Free the slot, as required. */
1343 			if (freeslot != -1)
1344 				nfsv4_freeslot(sep, freeslot, false);
1345 			/*
1346 			 * If this op is Putfh, throw its results away.
1347 			 */
1348 			if (j >= 10000)
1349 				NFSCL_DEBUG(1, "nop=%d nst=%d\n", i, j);
1350 			if (nmp != NULL && i == NFSV4OP_PUTFH && j == 0) {
1351 				NFSM_DISSECT(tl,u_int32_t *,2 * NFSX_UNSIGNED);
1352 				i = fxdr_unsigned(int, *tl++);
1353 				j = fxdr_unsigned(int, *tl);
1354 				if (j >= 10000)
1355 					NFSCL_DEBUG(1, "n2op=%d n2st=%d\n", i,
1356 					    j);
1357 				/*
1358 				 * All Compounds that do an Op that must
1359 				 * be in sequence consist of NFSV4OP_PUTFH
1360 				 * followed by one of these. As such, we
1361 				 * can determine if the seqid# should be
1362 				 * incremented, here.
1363 				 */
1364 				if ((i == NFSV4OP_OPEN ||
1365 				     i == NFSV4OP_OPENCONFIRM ||
1366 				     i == NFSV4OP_OPENDOWNGRADE ||
1367 				     i == NFSV4OP_CLOSE ||
1368 				     i == NFSV4OP_LOCK ||
1369 				     i == NFSV4OP_LOCKU) &&
1370 				    (j == 0 ||
1371 				     (j != NFSERR_STALECLIENTID &&
1372 				      j != NFSERR_STALESTATEID &&
1373 				      j != NFSERR_BADSTATEID &&
1374 				      j != NFSERR_BADSEQID &&
1375 				      j != NFSERR_BADXDR &&
1376 				      j != NFSERR_RESOURCE &&
1377 				      j != NFSERR_NOFILEHANDLE)))
1378 					nd->nd_flag |= ND_INCRSEQID;
1379 			}
1380 			/*
1381 			 * If this op's status is non-zero, mark
1382 			 * that there is no more data to process.
1383 			 * The exception is Setattr, which always has xdr
1384 			 * when it has failed.
1385 			 */
1386 			if (j != 0 && i != NFSV4OP_SETATTR)
1387 				nd->nd_flag |= ND_NOMOREDATA;
1388 
1389 			/*
1390 			 * If R_DONTRECOVER is set, replace the stale error
1391 			 * reply, so that recovery isn't initiated.
1392 			 */
1393 			if ((nd->nd_repstat == NFSERR_STALECLIENTID ||
1394 			     nd->nd_repstat == NFSERR_BADSESSION ||
1395 			     nd->nd_repstat == NFSERR_STALESTATEID) &&
1396 			    rep != NULL && (rep->r_flags & R_DONTRECOVER))
1397 				nd->nd_repstat = NFSERR_STALEDONTRECOVER;
1398 		}
1399 	}
1400 out:
1401 
1402 #ifdef KDTRACE_HOOKS
1403 	if (nmp != NULL && dtrace_nfscl_nfs234_done_probe != NULL) {
1404 		uint32_t probe_id;
1405 		int probe_procnum;
1406 
1407 		if (nd->nd_flag & ND_NFSV4) {
1408 			probe_id = nfscl_nfs4_done_probes[nd->nd_procnum];
1409 			probe_procnum = nd->nd_procnum;
1410 		} else if (nd->nd_flag & ND_NFSV3) {
1411 			probe_id = nfscl_nfs3_done_probes[procnum];
1412 			probe_procnum = procnum;
1413 		} else {
1414 			probe_id = nfscl_nfs2_done_probes[nd->nd_procnum];
1415 			probe_procnum = procnum;
1416 		}
1417 		if (probe_id != 0)
1418 			(dtrace_nfscl_nfs234_done_probe)(probe_id, vp,
1419 			    nd->nd_mreq, cred, probe_procnum, 0);
1420 	}
1421 #endif
1422 
1423 	m_freem(nd->nd_mreq);
1424 	if (usegssname == 0)
1425 		AUTH_DESTROY(auth);
1426 	if (rep != NULL)
1427 		free(rep, M_NFSDREQ);
1428 	if (set_sigset)
1429 		newnfs_restore_sigmask(td, &oldset);
1430 	return (0);
1431 nfsmout:
1432 	m_freem(nd->nd_mrep);
1433 	m_freem(nd->nd_mreq);
1434 	if (usegssname == 0)
1435 		AUTH_DESTROY(auth);
1436 	if (rep != NULL)
1437 		free(rep, M_NFSDREQ);
1438 	if (set_sigset)
1439 		newnfs_restore_sigmask(td, &oldset);
1440 	return (error);
1441 }
1442 
1443 /*
1444  * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and
1445  * wait for all requests to complete. This is used by forced unmounts
1446  * to terminate any outstanding RPCs.
1447  */
1448 int
1449 newnfs_nmcancelreqs(struct nfsmount *nmp)
1450 {
1451 	struct nfsclds *dsp;
1452 	struct __rpc_client *cl;
1453 	int i;
1454 
1455 	if (nmp->nm_sockreq.nr_client != NULL)
1456 		CLNT_CLOSE(nmp->nm_sockreq.nr_client);
1457 	for (i = 0; i < nmp->nm_aconnect; i++)
1458 		if (nmp->nm_aconn[i] != NULL)
1459 			CLNT_CLOSE(nmp->nm_aconn[i]);
1460 lookformore:
1461 	NFSLOCKMNT(nmp);
1462 	TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
1463 		NFSLOCKDS(dsp);
1464 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1465 		    (dsp->nfsclds_flags & NFSCLDS_CLOSED) == 0 &&
1466 		    dsp->nfsclds_sockp != NULL &&
1467 		    dsp->nfsclds_sockp->nr_client != NULL) {
1468 			dsp->nfsclds_flags |= NFSCLDS_CLOSED;
1469 			cl = dsp->nfsclds_sockp->nr_client;
1470 			NFSUNLOCKDS(dsp);
1471 			NFSUNLOCKMNT(nmp);
1472 			CLNT_CLOSE(cl);
1473 			goto lookformore;
1474 		}
1475 		NFSUNLOCKDS(dsp);
1476 	}
1477 	NFSUNLOCKMNT(nmp);
1478 	return (0);
1479 }
1480 
1481 /*
1482  * Any signal that can interrupt an NFS operation in an intr mount
1483  * should be added to this set. SIGSTOP and SIGKILL cannot be masked.
1484  */
1485 int newnfs_sig_set[] = {
1486 	SIGINT,
1487 	SIGTERM,
1488 	SIGHUP,
1489 	SIGKILL,
1490 	SIGQUIT
1491 };
1492 
1493 /*
1494  * Check to see if one of the signals in our subset is pending on
1495  * the process (in an intr mount).
1496  */
1497 static int
1498 nfs_sig_pending(sigset_t set)
1499 {
1500 	int i;
1501 
1502 	for (i = 0 ; i < nitems(newnfs_sig_set); i++)
1503 		if (SIGISMEMBER(set, newnfs_sig_set[i]))
1504 			return (1);
1505 	return (0);
1506 }
1507 
1508 /*
1509  * The set/restore sigmask functions are used to (temporarily) overwrite
1510  * the thread td_sigmask during an RPC call (for example). These are also
1511  * used in other places in the NFS client that might tsleep().
1512  */
1513 void
1514 newnfs_set_sigmask(struct thread *td, sigset_t *oldset)
1515 {
1516 	sigset_t newset;
1517 	int i;
1518 	struct proc *p;
1519 
1520 	SIGFILLSET(newset);
1521 	if (td == NULL)
1522 		td = curthread; /* XXX */
1523 	p = td->td_proc;
1524 	/* Remove the NFS set of signals from newset */
1525 	PROC_LOCK(p);
1526 	mtx_lock(&p->p_sigacts->ps_mtx);
1527 	for (i = 0 ; i < nitems(newnfs_sig_set); i++) {
1528 		/*
1529 		 * But make sure we leave the ones already masked
1530 		 * by the process, ie. remove the signal from the
1531 		 * temporary signalmask only if it wasn't already
1532 		 * in p_sigmask.
1533 		 */
1534 		if (!SIGISMEMBER(td->td_sigmask, newnfs_sig_set[i]) &&
1535 		    !SIGISMEMBER(p->p_sigacts->ps_sigignore, newnfs_sig_set[i]))
1536 			SIGDELSET(newset, newnfs_sig_set[i]);
1537 	}
1538 	mtx_unlock(&p->p_sigacts->ps_mtx);
1539 	kern_sigprocmask(td, SIG_SETMASK, &newset, oldset,
1540 	    SIGPROCMASK_PROC_LOCKED);
1541 	PROC_UNLOCK(p);
1542 }
1543 
1544 void
1545 newnfs_restore_sigmask(struct thread *td, sigset_t *set)
1546 {
1547 	if (td == NULL)
1548 		td = curthread; /* XXX */
1549 	kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0);
1550 }
1551 
1552 /*
1553  * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the
1554  * old one after msleep() returns.
1555  */
1556 int
1557 newnfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo)
1558 {
1559 	sigset_t oldset;
1560 	int error;
1561 
1562 	if ((priority & PCATCH) == 0)
1563 		return msleep(ident, mtx, priority, wmesg, timo);
1564 	if (td == NULL)
1565 		td = curthread; /* XXX */
1566 	newnfs_set_sigmask(td, &oldset);
1567 	error = msleep(ident, mtx, priority, wmesg, timo);
1568 	newnfs_restore_sigmask(td, &oldset);
1569 	return (error);
1570 }
1571 
1572 /*
1573  * Test for a termination condition pending on the process.
1574  * This is used for NFSMNT_INT mounts.
1575  */
1576 int
1577 newnfs_sigintr(struct nfsmount *nmp, struct thread *td)
1578 {
1579 	struct proc *p;
1580 	sigset_t tmpset;
1581 
1582 	/* Terminate all requests while attempting a forced unmount. */
1583 	if (NFSCL_FORCEDISM(nmp->nm_mountp))
1584 		return (EIO);
1585 	if (!(nmp->nm_flag & NFSMNT_INT))
1586 		return (0);
1587 	if (td == NULL)
1588 		return (0);
1589 	p = td->td_proc;
1590 	PROC_LOCK(p);
1591 	tmpset = p->p_siglist;
1592 	SIGSETOR(tmpset, td->td_siglist);
1593 	SIGSETNAND(tmpset, td->td_sigmask);
1594 	mtx_lock(&p->p_sigacts->ps_mtx);
1595 	SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore);
1596 	mtx_unlock(&p->p_sigacts->ps_mtx);
1597 	if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist))
1598 	    && nfs_sig_pending(tmpset)) {
1599 		PROC_UNLOCK(p);
1600 		return (EINTR);
1601 	}
1602 	PROC_UNLOCK(p);
1603 	return (0);
1604 }
1605 
1606 static int
1607 nfs_msg(struct thread *td, const char *server, const char *msg, int error)
1608 {
1609 	struct proc *p;
1610 
1611 	p = td ? td->td_proc : NULL;
1612 	if (error) {
1613 		tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n",
1614 		    server, msg, error);
1615 	} else {
1616 		tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg);
1617 	}
1618 	return (0);
1619 }
1620 
1621 static void
1622 nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg,
1623     int error, int flags)
1624 {
1625 	if (nmp == NULL)
1626 		return;
1627 	mtx_lock(&nmp->nm_mtx);
1628 	if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) {
1629 		nmp->nm_state |= NFSSTA_TIMEO;
1630 		mtx_unlock(&nmp->nm_mtx);
1631 		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
1632 		    VQ_NOTRESP, 0);
1633 	} else
1634 		mtx_unlock(&nmp->nm_mtx);
1635 	mtx_lock(&nmp->nm_mtx);
1636 	if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) {
1637 		nmp->nm_state |= NFSSTA_LOCKTIMEO;
1638 		mtx_unlock(&nmp->nm_mtx);
1639 		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
1640 		    VQ_NOTRESPLOCK, 0);
1641 	} else
1642 		mtx_unlock(&nmp->nm_mtx);
1643 	nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error);
1644 }
1645 
1646 static void
1647 nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg,
1648     int flags, int tprintfmsg)
1649 {
1650 	if (nmp == NULL)
1651 		return;
1652 	if (tprintfmsg) {
1653 		nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
1654 	}
1655 
1656 	mtx_lock(&nmp->nm_mtx);
1657 	if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) {
1658 		nmp->nm_state &= ~NFSSTA_TIMEO;
1659 		mtx_unlock(&nmp->nm_mtx);
1660 		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
1661 		    VQ_NOTRESP, 1);
1662 	} else
1663 		mtx_unlock(&nmp->nm_mtx);
1664 
1665 	mtx_lock(&nmp->nm_mtx);
1666 	if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) {
1667 		nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
1668 		mtx_unlock(&nmp->nm_mtx);
1669 		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
1670 		    VQ_NOTRESPLOCK, 1);
1671 	} else
1672 		mtx_unlock(&nmp->nm_mtx);
1673 }
1674