1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1991, 1993, 1995 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 /* 40 * Socket operations for use by nfs 41 */ 42 43 #include "opt_kgssapi.h" 44 #include "opt_nfs.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/limits.h> 50 #include <sys/lock.h> 51 #include <sys/malloc.h> 52 #include <sys/mbuf.h> 53 #include <sys/mount.h> 54 #include <sys/mutex.h> 55 #include <sys/proc.h> 56 #include <sys/signalvar.h> 57 #include <sys/syscallsubr.h> 58 #include <sys/sysctl.h> 59 #include <sys/syslog.h> 60 #include <sys/vnode.h> 61 62 #include <rpc/rpc.h> 63 #include <rpc/krpc.h> 64 65 #include <kgssapi/krb5/kcrypto.h> 66 67 #include <fs/nfs/nfsport.h> 68 69 #ifdef KDTRACE_HOOKS 70 #include <sys/dtrace_bsd.h> 71 72 dtrace_nfsclient_nfs23_start_probe_func_t 73 dtrace_nfscl_nfs234_start_probe; 74 75 dtrace_nfsclient_nfs23_done_probe_func_t 76 dtrace_nfscl_nfs234_done_probe; 77 78 /* 79 * Registered probes by RPC type. 80 */ 81 uint32_t nfscl_nfs2_start_probes[NFSV41_NPROCS + 1]; 82 uint32_t nfscl_nfs2_done_probes[NFSV41_NPROCS + 1]; 83 84 uint32_t nfscl_nfs3_start_probes[NFSV41_NPROCS + 1]; 85 uint32_t nfscl_nfs3_done_probes[NFSV41_NPROCS + 1]; 86 87 uint32_t nfscl_nfs4_start_probes[NFSV41_NPROCS + 1]; 88 uint32_t nfscl_nfs4_done_probes[NFSV41_NPROCS + 1]; 89 #endif 90 91 NFSSTATESPINLOCK; 92 NFSREQSPINLOCK; 93 NFSDLOCKMUTEX; 94 NFSCLSTATEMUTEX; 95 extern struct nfsstatsv1 nfsstatsv1; 96 extern struct nfsreqhead nfsd_reqq; 97 extern int nfscl_ticks; 98 extern void (*ncl_call_invalcaches)(struct vnode *); 99 extern int nfs_numnfscbd; 100 extern int nfscl_debuglevel; 101 extern int nfsrv_lease; 102 103 SVCPOOL *nfscbd_pool; 104 int nfs_bufpackets = 4; 105 static int nfsrv_gsscallbackson = 0; 106 static int nfs_reconnects; 107 static int nfs3_jukebox_delay = 10; 108 static int nfs_skip_wcc_data_onerr = 1; 109 static int nfs_dsretries = 2; 110 static struct timespec nfs_trylater_max = { 111 .tv_sec = NFS_TRYLATERDEL, 112 .tv_nsec = 0, 113 }; 114 115 SYSCTL_DECL(_vfs_nfs); 116 117 SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, 118 "Buffer reservation size 2 < x < 64"); 119 SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0, 120 "Number of times the nfs client has had to reconnect"); 121 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0, 122 "Number of seconds to delay a retry after receiving EJUKEBOX"); 123 SYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0, 124 "Disable weak cache consistency checking when server returns an error"); 125 SYSCTL_INT(_vfs_nfs, OID_AUTO, dsretries, CTLFLAG_RW, &nfs_dsretries, 0, 126 "Number of retries for a DS RPC before failure"); 127 128 static void nfs_down(struct nfsmount *, struct thread *, const char *, 129 int, int); 130 static void nfs_up(struct nfsmount *, struct thread *, const char *, 131 int, int); 132 static int nfs_msg(struct thread *, const char *, const char *, int); 133 134 struct nfs_cached_auth { 135 int ca_refs; /* refcount, including 1 from the cache */ 136 uid_t ca_uid; /* uid that corresponds to this auth */ 137 AUTH *ca_auth; /* RPC auth handle */ 138 }; 139 140 static int nfsv2_procid[NFS_V3NPROCS] = { 141 NFSV2PROC_NULL, 142 NFSV2PROC_GETATTR, 143 NFSV2PROC_SETATTR, 144 NFSV2PROC_LOOKUP, 145 NFSV2PROC_NOOP, 146 NFSV2PROC_READLINK, 147 NFSV2PROC_READ, 148 NFSV2PROC_WRITE, 149 NFSV2PROC_CREATE, 150 NFSV2PROC_MKDIR, 151 NFSV2PROC_SYMLINK, 152 NFSV2PROC_CREATE, 153 NFSV2PROC_REMOVE, 154 NFSV2PROC_RMDIR, 155 NFSV2PROC_RENAME, 156 NFSV2PROC_LINK, 157 NFSV2PROC_READDIR, 158 NFSV2PROC_NOOP, 159 NFSV2PROC_STATFS, 160 NFSV2PROC_NOOP, 161 NFSV2PROC_NOOP, 162 NFSV2PROC_NOOP, 163 }; 164 165 /* 166 * This static array indicates that a NFSv4 RPC should use 167 * RPCSEC_GSS, if the mount indicates that via sec=krb5[ip]. 168 * System RPCs that do not use file handles will be false 169 * in this array so that they will use AUTH_SYS when the 170 * "syskrb5" mount option is specified, along with 171 * "sec=krb5[ip]". 172 */ 173 static bool nfscl_use_gss[NFSV42_NPROCS] = { 174 true, 175 true, 176 true, 177 true, 178 true, 179 true, 180 true, 181 true, 182 true, 183 true, 184 true, 185 true, 186 true, 187 true, 188 true, 189 true, 190 true, 191 true, 192 true, 193 true, 194 true, 195 true, 196 true, 197 false, /* SetClientID */ 198 false, /* SetClientIDConfirm */ 199 true, 200 true, 201 true, 202 true, 203 true, 204 true, 205 true, 206 false, /* Renew */ 207 true, 208 false, /* ReleaseLockOwn */ 209 true, 210 true, 211 true, 212 true, 213 true, 214 true, 215 false, /* ExchangeID */ 216 false, /* CreateSession */ 217 false, /* DestroySession */ 218 false, /* DestroyClientID */ 219 false, /* FreeStateID */ 220 true, 221 true, 222 true, 223 true, 224 false, /* ReclaimComplete */ 225 true, 226 true, 227 true, 228 true, 229 true, 230 true, 231 true, 232 true, 233 true, 234 true, 235 true, 236 true, 237 true, 238 true, 239 false, /* BindConnectionToSession */ 240 true, 241 true, 242 true, 243 true, 244 }; 245 246 /* 247 * Initialize sockets and congestion for a new NFS connection. 248 * We do not free the sockaddr if error. 249 * Which arguments are set to NULL indicate what kind of call it is. 250 * cred == NULL --> a call to connect to a pNFS DS 251 * nmp == NULL --> indicates an upcall to userland or a NFSv4.0 callback 252 */ 253 int 254 newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp, 255 struct ucred *cred, NFSPROC_T *p, int callback_retry_mult, bool dotls, 256 struct __rpc_client **clipp) 257 { 258 int rcvreserve, sndreserve; 259 int pktscale, pktscalesav; 260 struct sockaddr *saddr; 261 struct ucred *origcred; 262 CLIENT *client; 263 struct netconfig *nconf; 264 struct socket *so; 265 int one = 1, retries, error = 0; 266 struct thread *td = curthread; 267 SVCXPRT *xprt; 268 struct timeval timo; 269 uint64_t tval; 270 271 /* 272 * We need to establish the socket using the credentials of 273 * the mountpoint. Some parts of this process (such as 274 * sobind() and soconnect()) will use the curent thread's 275 * credential instead of the socket credential. To work 276 * around this, temporarily change the current thread's 277 * credential to that of the mountpoint. 278 * 279 * XXX: It would be better to explicitly pass the correct 280 * credential to sobind() and soconnect(). 281 */ 282 origcred = td->td_ucred; 283 284 /* 285 * Use the credential in nr_cred, if not NULL. 286 */ 287 if (nrp->nr_cred != NULL) 288 td->td_ucred = nrp->nr_cred; 289 else 290 td->td_ucred = cred; 291 saddr = nrp->nr_nam; 292 293 if (saddr->sa_family == AF_INET) 294 if (nrp->nr_sotype == SOCK_DGRAM) 295 nconf = getnetconfigent("udp"); 296 else 297 nconf = getnetconfigent("tcp"); 298 else 299 if (nrp->nr_sotype == SOCK_DGRAM) 300 nconf = getnetconfigent("udp6"); 301 else 302 nconf = getnetconfigent("tcp6"); 303 304 pktscale = nfs_bufpackets; 305 if (pktscale < 2) 306 pktscale = 2; 307 if (pktscale > 64) 308 pktscale = 64; 309 pktscalesav = pktscale; 310 /* 311 * soreserve() can fail if sb_max is too small, so shrink pktscale 312 * and try again if there is an error. 313 * Print a log message suggesting increasing sb_max. 314 * Creating a socket and doing this is necessary since, if the 315 * reservation sizes are too large and will make soreserve() fail, 316 * the connection will work until a large send is attempted and 317 * then it will loop in the krpc code. 318 */ 319 so = NULL; 320 saddr = NFSSOCKADDR(nrp->nr_nam, struct sockaddr *); 321 error = socreate(saddr->sa_family, &so, nrp->nr_sotype, 322 nrp->nr_soproto, td->td_ucred, td); 323 if (error != 0) 324 goto out; 325 do { 326 if (error != 0 && pktscale > 2) { 327 if (nmp != NULL && nrp->nr_sotype == SOCK_STREAM && 328 pktscale == pktscalesav) { 329 /* 330 * Suggest vfs.nfs.bufpackets * maximum RPC message, 331 * adjusted for the sb_max->sb_max_adj conversion of 332 * MCLBYTES / (MSIZE + MCLBYTES) as the minimum setting 333 * for kern.ipc.maxsockbuf. 334 */ 335 tval = (NFS_MAXBSIZE + NFS_MAXXDR) * nfs_bufpackets; 336 tval *= MSIZE + MCLBYTES; 337 tval += MCLBYTES - 1; /* Round up divide by MCLBYTES. */ 338 tval /= MCLBYTES; 339 printf("Consider increasing kern.ipc.maxsockbuf to a " 340 "minimum of %ju to support %ubyte NFS I/O\n", 341 (uintmax_t)tval, NFS_MAXBSIZE); 342 } 343 pktscale--; 344 } 345 if (nrp->nr_sotype == SOCK_DGRAM) { 346 if (nmp != NULL) { 347 sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * 348 pktscale; 349 rcvreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * 350 pktscale; 351 } else { 352 sndreserve = rcvreserve = 1024 * pktscale; 353 } 354 } else { 355 if (nrp->nr_sotype != SOCK_STREAM) 356 panic("nfscon sotype"); 357 if (nmp != NULL) { 358 sndreserve = (NFS_MAXBSIZE + NFS_MAXXDR) * 359 pktscale; 360 rcvreserve = (NFS_MAXBSIZE + NFS_MAXXDR) * 361 pktscale; 362 } else { 363 sndreserve = rcvreserve = 1024 * pktscale; 364 } 365 } 366 error = soreserve(so, sndreserve, rcvreserve); 367 if (error != 0 && nmp != NULL && nrp->nr_sotype == SOCK_STREAM && 368 pktscale <= 2) 369 printf("Must increase kern.ipc.maxsockbuf or reduce" 370 " rsize, wsize\n"); 371 } while (error != 0 && pktscale > 2); 372 soclose(so); 373 if (error != 0) 374 goto out; 375 376 client = clnt_reconnect_create(nconf, saddr, nrp->nr_prog, 377 nrp->nr_vers, sndreserve, rcvreserve); 378 CLNT_CONTROL(client, CLSET_WAITCHAN, "nfsreq"); 379 if (nmp != NULL) { 380 if ((nmp->nm_flag & NFSMNT_INT)) 381 CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one); 382 if ((nmp->nm_flag & NFSMNT_RESVPORT)) 383 CLNT_CONTROL(client, CLSET_PRIVPORT, &one); 384 if (NFSHASTLS(nmp)) { 385 CLNT_CONTROL(client, CLSET_TLS, &one); 386 if (nmp->nm_tlscertname != NULL) 387 CLNT_CONTROL(client, CLSET_TLSCERTNAME, 388 nmp->nm_tlscertname); 389 } 390 if (NFSHASSOFT(nmp)) { 391 if (nmp->nm_sotype == SOCK_DGRAM) 392 /* 393 * For UDP, the large timeout for a reconnect 394 * will be set to "nm_retry * nm_timeo / 2", so 395 * we only want to do 2 reconnect timeout 396 * retries. 397 */ 398 retries = 2; 399 else 400 retries = nmp->nm_retry; 401 } else 402 retries = INT_MAX; 403 if (NFSHASNFSV4N(nmp)) { 404 if (cred != NULL) { 405 if (NFSHASSOFT(nmp)) { 406 /* 407 * This should be a DS mount. 408 * Use CLSET_TIMEOUT to set the timeout 409 * for connections to DSs instead of 410 * specifying a timeout on each RPC. 411 * This is done so that SO_SNDTIMEO 412 * is set on the TCP socket as well 413 * as specifying a time limit when 414 * waiting for an RPC reply. Useful 415 * if the send queue for the TCP 416 * connection has become constipated, 417 * due to a failed DS. 418 * The choice of lease_duration / 4 is 419 * fairly arbitrary, but seems to work 420 * ok, with a lower bound of 10sec. 421 */ 422 timo.tv_sec = nfsrv_lease / 4; 423 if (timo.tv_sec < 10) 424 timo.tv_sec = 10; 425 timo.tv_usec = 0; 426 CLNT_CONTROL(client, CLSET_TIMEOUT, 427 &timo); 428 } 429 /* 430 * Make sure the nfscbd_pool doesn't get 431 * destroyed while doing this. 432 */ 433 NFSD_LOCK(); 434 if (nfs_numnfscbd > 0) { 435 nfs_numnfscbd++; 436 NFSD_UNLOCK(); 437 xprt = svc_vc_create_backchannel( 438 nfscbd_pool); 439 CLNT_CONTROL(client, CLSET_BACKCHANNEL, 440 xprt); 441 NFSD_LOCK(); 442 nfs_numnfscbd--; 443 if (nfs_numnfscbd == 0) 444 wakeup(&nfs_numnfscbd); 445 } 446 NFSD_UNLOCK(); 447 } else { 448 /* 449 * cred == NULL for a DS connect. 450 * For connects to a DS, set a retry limit 451 * so that failed DSs will be detected. 452 * This is ok for NFSv4.1, since a DS does 453 * not maintain open/lock state and is the 454 * only case where using a "soft" mount is 455 * recommended for NFSv4. 456 * For mounts from the MDS to DS, this is done 457 * via mount options, but that is not the case 458 * here. The retry limit here can be adjusted 459 * via the sysctl vfs.nfs.dsretries. 460 * See the comment above w.r.t. timeout. 461 */ 462 timo.tv_sec = nfsrv_lease / 4; 463 if (timo.tv_sec < 10) 464 timo.tv_sec = 10; 465 timo.tv_usec = 0; 466 CLNT_CONTROL(client, CLSET_TIMEOUT, &timo); 467 retries = nfs_dsretries; 468 } 469 } 470 } else { 471 /* 472 * Three cases: 473 * - Null RPC callback to client 474 * - Non-Null RPC callback to client, wait a little longer 475 * - upcalls to nfsuserd and gssd (clp == NULL) 476 */ 477 if (callback_retry_mult == 0) { 478 retries = NFSV4_UPCALLRETRY; 479 CLNT_CONTROL(client, CLSET_PRIVPORT, &one); 480 } else { 481 retries = NFSV4_CALLBACKRETRY * callback_retry_mult; 482 } 483 if (dotls) 484 CLNT_CONTROL(client, CLSET_TLS, &one); 485 } 486 CLNT_CONTROL(client, CLSET_RETRIES, &retries); 487 488 if (nmp != NULL) { 489 /* 490 * For UDP, there are 2 timeouts: 491 * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer 492 * that does a retransmit of an RPC request using the same 493 * socket and xid. This is what you normally want to do, 494 * since NFS servers depend on "same xid" for their 495 * Duplicate Request Cache. 496 * - timeout specified in CLNT_CALL_MBUF(), which specifies when 497 * retransmits on the same socket should fail and a fresh 498 * socket created. Each of these timeouts counts as one 499 * CLSET_RETRIES as set above. 500 * Set the initial retransmit timeout for UDP. This timeout 501 * doesn't exist for TCP and the following call just fails, 502 * which is ok. 503 */ 504 timo.tv_sec = nmp->nm_timeo / NFS_HZ; 505 timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ; 506 CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo); 507 } 508 509 /* 510 * *clipp is &nrp->nr_client or &nm_aconn[nmp->nm_nextaconn]. 511 * The latter case is for additional connections specified by the 512 * "nconnect" mount option. nr_mtx etc is used for these additional 513 * connections, as well as nr_client in the nfssockreq 514 * structure for the mount. 515 */ 516 mtx_lock(&nrp->nr_mtx); 517 if (*clipp != NULL) { 518 mtx_unlock(&nrp->nr_mtx); 519 /* 520 * Someone else already connected. 521 */ 522 CLNT_RELEASE(client); 523 } else { 524 *clipp = client; 525 /* 526 * Protocols that do not require connections may be optionally 527 * left unconnected for servers that reply from a port other 528 * than NFS_PORT. 529 */ 530 if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) { 531 mtx_unlock(&nrp->nr_mtx); 532 CLNT_CONTROL(client, CLSET_CONNECT, &one); 533 } else 534 mtx_unlock(&nrp->nr_mtx); 535 } 536 537 out: 538 /* Restore current thread's credentials. */ 539 td->td_ucred = origcred; 540 541 NFSEXITCODE(error); 542 return (error); 543 } 544 545 /* 546 * NFS disconnect. Clean up and unlink. 547 */ 548 void 549 newnfs_disconnect(struct nfsmount *nmp, struct nfssockreq *nrp) 550 { 551 CLIENT *client, *aconn[NFS_MAXNCONN - 1]; 552 int i; 553 554 mtx_lock(&nrp->nr_mtx); 555 if (nrp->nr_client != NULL) { 556 client = nrp->nr_client; 557 nrp->nr_client = NULL; 558 if (nmp != NULL && nmp->nm_aconnect > 0) { 559 for (i = 0; i < nmp->nm_aconnect; i++) { 560 aconn[i] = nmp->nm_aconn[i]; 561 nmp->nm_aconn[i] = NULL; 562 } 563 } 564 mtx_unlock(&nrp->nr_mtx); 565 rpc_gss_secpurge_call(client); 566 CLNT_CLOSE(client); 567 CLNT_RELEASE(client); 568 if (nmp != NULL && nmp->nm_aconnect > 0) { 569 for (i = 0; i < nmp->nm_aconnect; i++) { 570 if (aconn[i] != NULL) { 571 rpc_gss_secpurge_call(aconn[i]); 572 CLNT_CLOSE(aconn[i]); 573 CLNT_RELEASE(aconn[i]); 574 } 575 } 576 } 577 } else { 578 mtx_unlock(&nrp->nr_mtx); 579 } 580 } 581 582 static AUTH * 583 nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal, 584 char *srv_principal, gss_OID mech_oid, struct ucred *cred) 585 { 586 rpc_gss_service_t svc; 587 AUTH *auth; 588 589 switch (secflavour) { 590 case RPCSEC_GSS_KRB5: 591 case RPCSEC_GSS_KRB5I: 592 case RPCSEC_GSS_KRB5P: 593 if (!mech_oid) { 594 if (!rpc_gss_mech_to_oid_call("kerberosv5", &mech_oid)) 595 return (NULL); 596 } 597 if (secflavour == RPCSEC_GSS_KRB5) 598 svc = rpc_gss_svc_none; 599 else if (secflavour == RPCSEC_GSS_KRB5I) 600 svc = rpc_gss_svc_integrity; 601 else 602 svc = rpc_gss_svc_privacy; 603 604 if (clnt_principal == NULL) 605 auth = rpc_gss_secfind_call(nrp->nr_client, cred, 606 srv_principal, mech_oid, svc); 607 else { 608 auth = rpc_gss_seccreate_call(nrp->nr_client, cred, 609 clnt_principal, srv_principal, "kerberosv5", 610 svc, NULL, NULL, NULL); 611 return (auth); 612 } 613 if (auth != NULL) 614 return (auth); 615 /* fallthrough */ 616 case AUTH_SYS: 617 default: 618 return (authunix_create(cred)); 619 } 620 } 621 622 /* 623 * Callback from the RPC code to generate up/down notifications. 624 */ 625 626 struct nfs_feedback_arg { 627 struct nfsmount *nf_mount; 628 int nf_lastmsg; /* last tprintf */ 629 int nf_tprintfmsg; 630 struct thread *nf_td; 631 }; 632 633 static void 634 nfs_feedback(int type, int proc, void *arg) 635 { 636 struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg; 637 struct nfsmount *nmp = nf->nf_mount; 638 time_t now; 639 640 switch (type) { 641 case FEEDBACK_REXMIT2: 642 case FEEDBACK_RECONNECT: 643 now = NFSD_MONOSEC; 644 if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now) { 645 nfs_down(nmp, nf->nf_td, 646 "not responding", 0, NFSSTA_TIMEO); 647 nf->nf_tprintfmsg = TRUE; 648 nf->nf_lastmsg = now; 649 } 650 break; 651 652 case FEEDBACK_OK: 653 nfs_up(nf->nf_mount, nf->nf_td, 654 "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg); 655 break; 656 } 657 } 658 659 /* 660 * newnfs_request - goes something like this 661 * - does the rpc by calling the krpc layer 662 * - break down rpc header and return with nfs reply 663 * nb: always frees up nd_mreq mbuf list 664 */ 665 int 666 newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, 667 struct nfsclient *clp, struct nfssockreq *nrp, vnode_t vp, 668 struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers, 669 u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *dssep) 670 { 671 uint32_t retseq, retval, slotseq, *tl; 672 int i = 0, j = 0, opcnt, set_sigset = 0, slot; 673 int error = 0, usegssname = 0, secflavour = AUTH_SYS; 674 int freeslot, maxslot, reterr, slotpos, timeo; 675 u_int16_t procnum; 676 u_int nextconn; 677 struct nfs_feedback_arg nf; 678 struct timeval timo; 679 AUTH *auth; 680 struct rpc_callextra ext; 681 enum clnt_stat stat; 682 struct nfsreq *rep = NULL; 683 char *srv_principal = NULL, *clnt_principal = NULL; 684 sigset_t oldset; 685 struct ucred *authcred; 686 struct nfsclsession *sep; 687 uint8_t sessionid[NFSX_V4SESSIONID]; 688 bool nextconn_set; 689 struct timespec trylater_delay, ts, waituntil; 690 691 /* Initially 1msec. */ 692 trylater_delay.tv_sec = 0; 693 trylater_delay.tv_nsec = 1000000; 694 sep = dssep; 695 if (xidp != NULL) 696 *xidp = 0; 697 /* Reject requests while attempting a forced unmount. */ 698 if (nmp != NULL && NFSCL_FORCEDISM(nmp->nm_mountp)) { 699 m_freem(nd->nd_mreq); 700 return (ESTALE); 701 } 702 703 /* 704 * Set authcred, which is used to acquire RPC credentials to 705 * the cred argument, by default. The crhold() should not be 706 * necessary, but will ensure that some future code change 707 * doesn't result in the credential being free'd prematurely. 708 */ 709 authcred = crhold(cred); 710 711 /* For client side interruptible mounts, mask off the signals. */ 712 if (nmp != NULL && td != NULL && NFSHASINT(nmp)) { 713 newnfs_set_sigmask(td, &oldset); 714 set_sigset = 1; 715 } 716 717 /* 718 * If not already connected call newnfs_connect now. 719 */ 720 if (nrp->nr_client == NULL) 721 newnfs_connect(nmp, nrp, cred, td, 0, false, &nrp->nr_client); 722 723 /* 724 * If the "nconnect" mount option was specified and this RPC is 725 * one that can have a large RPC message and is being done through 726 * the NFS/MDS server, use an additional connection. (When the RPC is 727 * being done through the server/MDS, nrp == &nmp->nm_sockreq.) 728 * The "nconnect" mount option normally has minimal effect when the 729 * "pnfs" mount option is specified, since only Readdir RPCs are 730 * normally done through the NFS/MDS server. 731 */ 732 nextconn_set = false; 733 if (nmp != NULL && nmp->nm_aconnect > 0 && nrp == &nmp->nm_sockreq && 734 (nd->nd_procnum == NFSPROC_READ || 735 nd->nd_procnum == NFSPROC_READDIR || 736 nd->nd_procnum == NFSPROC_READDIRPLUS || 737 nd->nd_procnum == NFSPROC_WRITE)) { 738 nextconn = atomic_fetchadd_int(&nmp->nm_nextaconn, 1); 739 nextconn %= nmp->nm_aconnect; 740 nextconn_set = true; 741 if (nmp->nm_aconn[nextconn] == NULL) 742 newnfs_connect(nmp, nrp, cred, td, 0, false, 743 &nmp->nm_aconn[nextconn]); 744 } 745 746 /* 747 * For a client side mount, nmp is != NULL and clp == NULL. For 748 * server calls (callbacks or upcalls), nmp == NULL. 749 */ 750 if (clp != NULL) { 751 NFSLOCKSTATE(); 752 if ((clp->lc_flags & LCL_GSS) && nfsrv_gsscallbackson) { 753 secflavour = RPCSEC_GSS_KRB5; 754 if (nd->nd_procnum != NFSPROC_NULL) { 755 if (clp->lc_flags & LCL_GSSINTEGRITY) 756 secflavour = RPCSEC_GSS_KRB5I; 757 else if (clp->lc_flags & LCL_GSSPRIVACY) 758 secflavour = RPCSEC_GSS_KRB5P; 759 } 760 } 761 NFSUNLOCKSTATE(); 762 } else if (nmp != NULL && NFSHASKERB(nmp) && 763 nd->nd_procnum != NFSPROC_NULL && (!NFSHASSYSKRB5(nmp) || 764 nfscl_use_gss[nd->nd_procnum])) { 765 if (NFSHASALLGSSNAME(nmp) && nmp->nm_krbnamelen > 0) 766 nd->nd_flag |= ND_USEGSSNAME; 767 if ((nd->nd_flag & ND_USEGSSNAME) != 0) { 768 /* 769 * If there is a client side host based credential, 770 * use that, otherwise use the system uid, if set. 771 * The system uid is in the nmp->nm_sockreq.nr_cred 772 * credentials. 773 */ 774 if (nmp->nm_krbnamelen > 0) { 775 usegssname = 1; 776 clnt_principal = nmp->nm_krbname; 777 } else if (nmp->nm_uid != (uid_t)-1) { 778 KASSERT(nmp->nm_sockreq.nr_cred != NULL, 779 ("newnfs_request: NULL nr_cred")); 780 crfree(authcred); 781 authcred = crhold(nmp->nm_sockreq.nr_cred); 782 } 783 } else if (nmp->nm_krbnamelen == 0 && 784 nmp->nm_uid != (uid_t)-1 && cred->cr_uid == (uid_t)0) { 785 /* 786 * If there is no host based principal name and 787 * the system uid is set and this is root, use the 788 * system uid, since root won't have user 789 * credentials in a credentials cache file. 790 * The system uid is in the nmp->nm_sockreq.nr_cred 791 * credentials. 792 */ 793 KASSERT(nmp->nm_sockreq.nr_cred != NULL, 794 ("newnfs_request: NULL nr_cred")); 795 crfree(authcred); 796 authcred = crhold(nmp->nm_sockreq.nr_cred); 797 } 798 if (NFSHASINTEGRITY(nmp)) 799 secflavour = RPCSEC_GSS_KRB5I; 800 else if (NFSHASPRIVACY(nmp)) 801 secflavour = RPCSEC_GSS_KRB5P; 802 else 803 secflavour = RPCSEC_GSS_KRB5; 804 srv_principal = NFSMNT_SRVKRBNAME(nmp); 805 } else if (nmp != NULL && (!NFSHASKERB(nmp) || NFSHASSYSKRB5(nmp)) && 806 nd->nd_procnum != NFSPROC_NULL && 807 (nd->nd_flag & ND_USEGSSNAME) != 0) { 808 /* 809 * Use the uid that did the mount when the RPC is doing 810 * NFSv4 system operations, as indicated by the 811 * ND_USEGSSNAME flag, for the AUTH_SYS case. 812 * The credentials in nm_sockreq.nr_cred were used for the 813 * mount. 814 */ 815 KASSERT(nmp->nm_sockreq.nr_cred != NULL, 816 ("newnfs_request: NULL nr_cred")); 817 crfree(authcred); 818 authcred = crhold(nmp->nm_sockreq.nr_cred); 819 } 820 821 if (nmp != NULL) { 822 bzero(&nf, sizeof(struct nfs_feedback_arg)); 823 nf.nf_mount = nmp; 824 nf.nf_td = td; 825 nf.nf_lastmsg = NFSD_MONOSEC - 826 ((nmp->nm_tprintf_delay)-(nmp->nm_tprintf_initial_delay)); 827 } 828 829 if (nd->nd_procnum == NFSPROC_NULL) 830 auth = authnone_create(); 831 else if (usegssname) { 832 /* 833 * For this case, the authenticator is held in the 834 * nfssockreq structure, so don't release the reference count 835 * held on it. --> Don't AUTH_DESTROY() it in this function. 836 */ 837 if (nrp->nr_auth == NULL) 838 nrp->nr_auth = nfs_getauth(nrp, secflavour, 839 clnt_principal, srv_principal, NULL, authcred); 840 else 841 rpc_gss_refresh_auth_call(nrp->nr_auth); 842 auth = nrp->nr_auth; 843 } else 844 auth = nfs_getauth(nrp, secflavour, NULL, 845 srv_principal, NULL, authcred); 846 crfree(authcred); 847 if (auth == NULL) { 848 m_freem(nd->nd_mreq); 849 if (set_sigset) 850 newnfs_restore_sigmask(td, &oldset); 851 return (EACCES); 852 } 853 bzero(&ext, sizeof(ext)); 854 ext.rc_auth = auth; 855 if (nmp != NULL) { 856 ext.rc_feedback = nfs_feedback; 857 ext.rc_feedback_arg = &nf; 858 } 859 860 procnum = nd->nd_procnum; 861 if ((nd->nd_flag & ND_NFSV4) && 862 nd->nd_procnum != NFSPROC_NULL && 863 nd->nd_procnum != NFSV4PROC_CBCOMPOUND) 864 procnum = NFSV4PROC_COMPOUND; 865 866 if (nmp != NULL) { 867 NFSINCRGLOBAL(nfsstatsv1.rpcrequests); 868 869 /* Map the procnum to the old NFSv2 one, as required. */ 870 if ((nd->nd_flag & ND_NFSV2) != 0) { 871 if (nd->nd_procnum < NFS_V3NPROCS) 872 procnum = nfsv2_procid[nd->nd_procnum]; 873 else 874 procnum = NFSV2PROC_NOOP; 875 } 876 877 /* 878 * Now only used for the R_DONTRECOVER case, but until that is 879 * supported within the krpc code, I need to keep a queue of 880 * outstanding RPCs for nfsv4 client requests. 881 */ 882 if ((nd->nd_flag & ND_NFSV4) && procnum == NFSV4PROC_COMPOUND) 883 rep = malloc(sizeof(struct nfsreq), 884 M_NFSDREQ, M_WAITOK); 885 #ifdef KDTRACE_HOOKS 886 if (dtrace_nfscl_nfs234_start_probe != NULL) { 887 uint32_t probe_id; 888 int probe_procnum; 889 890 if (nd->nd_flag & ND_NFSV4) { 891 probe_id = 892 nfscl_nfs4_start_probes[nd->nd_procnum]; 893 probe_procnum = nd->nd_procnum; 894 } else if (nd->nd_flag & ND_NFSV3) { 895 probe_id = nfscl_nfs3_start_probes[procnum]; 896 probe_procnum = procnum; 897 } else { 898 probe_id = 899 nfscl_nfs2_start_probes[nd->nd_procnum]; 900 probe_procnum = procnum; 901 } 902 if (probe_id != 0) 903 (dtrace_nfscl_nfs234_start_probe) 904 (probe_id, vp, nd->nd_mreq, cred, 905 probe_procnum); 906 } 907 #endif 908 } 909 freeslot = -1; /* Set to slot that needs to be free'd */ 910 tryagain: 911 slot = -1; /* Slot that needs a sequence# increment. */ 912 /* 913 * This timeout specifies when a new socket should be created, 914 * along with new xid values. For UDP, this should be done 915 * infrequently, since retransmits of RPC requests should normally 916 * use the same xid. 917 */ 918 if (nmp == NULL) { 919 if (clp == NULL) { 920 timo.tv_sec = NFSV4_UPCALLTIMEO; 921 timo.tv_usec = 0; 922 } else { 923 timo.tv_sec = NFSV4_CALLBACKTIMEO / 1000; 924 timo.tv_usec = NFSV4_CALLBACKTIMEO * 1000; 925 } 926 } else { 927 if (nrp->nr_sotype != SOCK_DGRAM) { 928 timo.tv_usec = 0; 929 if ((nmp->nm_flag & NFSMNT_NFSV4)) 930 timo.tv_sec = INT_MAX; 931 else 932 timo.tv_sec = NFS_TCPTIMEO; 933 } else { 934 if (NFSHASSOFT(nmp)) { 935 /* 936 * CLSET_RETRIES is set to 2, so this should be 937 * half of the total timeout required. 938 */ 939 timeo = nmp->nm_retry * nmp->nm_timeo / 2; 940 if (timeo < 1) 941 timeo = 1; 942 timo.tv_sec = timeo / NFS_HZ; 943 timo.tv_usec = (timeo % NFS_HZ) * 1000000 / 944 NFS_HZ; 945 } else { 946 /* For UDP hard mounts, use a large value. */ 947 timo.tv_sec = NFS_MAXTIMEO / NFS_HZ; 948 timo.tv_usec = 0; 949 } 950 } 951 952 if (rep != NULL) { 953 rep->r_flags = 0; 954 rep->r_nmp = nmp; 955 /* 956 * Chain request into list of outstanding requests. 957 */ 958 NFSLOCKREQ(); 959 TAILQ_INSERT_TAIL(&nfsd_reqq, rep, r_chain); 960 NFSUNLOCKREQ(); 961 } 962 } 963 964 nd->nd_mrep = NULL; 965 if (clp != NULL && sep != NULL) 966 stat = clnt_bck_call(nrp->nr_client, &ext, procnum, 967 nd->nd_mreq, &nd->nd_mrep, timo, sep->nfsess_xprt); 968 else if (nextconn_set) 969 /* 970 * When there are multiple TCP connections, send the 971 * RPCs with large messages on the alternate TCP 972 * connection(s) in a round robin fashion. 973 * The small RPC messages are sent on the default 974 * TCP connection because they do not require much 975 * network bandwidth and separating them from the 976 * large RPC messages avoids them getting "log jammed" 977 * behind several large RPC messages. 978 */ 979 stat = CLNT_CALL_MBUF(nmp->nm_aconn[nextconn], 980 &ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo); 981 else 982 stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum, 983 nd->nd_mreq, &nd->nd_mrep, timo); 984 NFSCL_DEBUG(2, "clnt call=%d\n", stat); 985 986 if (rep != NULL) { 987 /* 988 * RPC done, unlink the request. 989 */ 990 NFSLOCKREQ(); 991 TAILQ_REMOVE(&nfsd_reqq, rep, r_chain); 992 NFSUNLOCKREQ(); 993 } 994 995 /* 996 * If there was a successful reply and a tprintf msg. 997 * tprintf a response. 998 */ 999 if (stat == RPC_SUCCESS) { 1000 error = 0; 1001 } else if (stat == RPC_TIMEDOUT) { 1002 NFSINCRGLOBAL(nfsstatsv1.rpctimeouts); 1003 error = ETIMEDOUT; 1004 } else if (stat == RPC_VERSMISMATCH) { 1005 NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); 1006 error = EOPNOTSUPP; 1007 } else if (stat == RPC_PROGVERSMISMATCH) { 1008 NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); 1009 error = EPROTONOSUPPORT; 1010 } else if (stat == RPC_CANTSEND || stat == RPC_CANTRECV || 1011 stat == RPC_SYSTEMERROR || stat == RPC_INTR) { 1012 /* Check for a session slot that needs to be free'd. */ 1013 if ((nd->nd_flag & (ND_NFSV41 | ND_HASSLOTID)) == 1014 (ND_NFSV41 | ND_HASSLOTID) && nmp != NULL && 1015 nd->nd_procnum != NFSPROC_NULL) { 1016 /* 1017 * This should only occur when either the MDS or 1018 * a client has an RPC against a DS fail. 1019 * This happens because these cases use "soft" 1020 * connections that can time out and fail. 1021 * The slot used for this RPC is now in a 1022 * non-deterministic state, but if the slot isn't 1023 * free'd, threads can get stuck waiting for a slot. 1024 */ 1025 if (sep == NULL) 1026 sep = nfsmnt_mdssession(nmp); 1027 /* 1028 * Bump the sequence# out of range, so that reuse of 1029 * this slot will result in an NFSERR_SEQMISORDERED 1030 * error and not a bogus cached RPC reply. 1031 */ 1032 mtx_lock(&sep->nfsess_mtx); 1033 sep->nfsess_slotseq[nd->nd_slotid] += 10; 1034 sep->nfsess_badslots |= (0x1ULL << nd->nd_slotid); 1035 mtx_unlock(&sep->nfsess_mtx); 1036 /* And free the slot. */ 1037 nfsv4_freeslot(sep, nd->nd_slotid, false); 1038 } 1039 if (stat == RPC_INTR) 1040 error = EINTR; 1041 else { 1042 NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); 1043 error = ENXIO; 1044 } 1045 } else { 1046 NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); 1047 error = EACCES; 1048 } 1049 if (error) { 1050 m_freem(nd->nd_mreq); 1051 if (usegssname == 0) 1052 AUTH_DESTROY(auth); 1053 if (rep != NULL) 1054 free(rep, M_NFSDREQ); 1055 if (set_sigset) 1056 newnfs_restore_sigmask(td, &oldset); 1057 return (error); 1058 } 1059 1060 KASSERT(nd->nd_mrep != NULL, ("mrep shouldn't be NULL if no error\n")); 1061 1062 /* 1063 * Search for any mbufs that are not a multiple of 4 bytes long 1064 * or with m_data not longword aligned. 1065 * These could cause pointer alignment problems, so copy them to 1066 * well aligned mbufs. 1067 */ 1068 newnfs_realign(&nd->nd_mrep, M_WAITOK); 1069 nd->nd_md = nd->nd_mrep; 1070 nd->nd_dpos = mtod(nd->nd_md, caddr_t); 1071 nd->nd_repstat = 0; 1072 if (nd->nd_procnum != NFSPROC_NULL && 1073 nd->nd_procnum != NFSV4PROC_CBNULL) { 1074 /* If sep == NULL, set it to the default in nmp. */ 1075 if (sep == NULL && nmp != NULL) 1076 sep = nfsmnt_mdssession(nmp); 1077 /* 1078 * and now the actual NFS xdr. 1079 */ 1080 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 1081 nd->nd_repstat = fxdr_unsigned(u_int32_t, *tl); 1082 if (nd->nd_repstat >= 10000) 1083 NFSCL_DEBUG(1, "proc=%d reps=%d\n", (int)nd->nd_procnum, 1084 (int)nd->nd_repstat); 1085 1086 /* 1087 * Get rid of the tag, return count and SEQUENCE result for 1088 * NFSv4. 1089 */ 1090 if ((nd->nd_flag & ND_NFSV4) != 0 && nd->nd_repstat != 1091 NFSERR_MINORVERMISMATCH) { 1092 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 1093 i = fxdr_unsigned(int, *tl); 1094 error = nfsm_advance(nd, NFSM_RNDUP(i), -1); 1095 if (error) 1096 goto nfsmout; 1097 NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 1098 opcnt = fxdr_unsigned(int, *tl++); 1099 i = fxdr_unsigned(int, *tl++); 1100 j = fxdr_unsigned(int, *tl); 1101 if (j >= 10000) 1102 NFSCL_DEBUG(1, "fop=%d fst=%d\n", i, j); 1103 /* 1104 * If the first op is Sequence, free up the slot. 1105 */ 1106 if ((nmp != NULL && i == NFSV4OP_SEQUENCE && j != 0) || 1107 (clp != NULL && i == NFSV4OP_CBSEQUENCE && j != 0)) { 1108 NFSCL_DEBUG(1, "failed seq=%d\n", j); 1109 if (sep != NULL && i == NFSV4OP_SEQUENCE && 1110 j == NFSERR_SEQMISORDERED) { 1111 mtx_lock(&sep->nfsess_mtx); 1112 sep->nfsess_badslots |= 1113 (0x1ULL << nd->nd_slotid); 1114 mtx_unlock(&sep->nfsess_mtx); 1115 } 1116 } 1117 if (((nmp != NULL && i == NFSV4OP_SEQUENCE && j == 0) || 1118 (clp != NULL && i == NFSV4OP_CBSEQUENCE && 1119 j == 0)) && sep != NULL) { 1120 if (i == NFSV4OP_SEQUENCE) 1121 NFSM_DISSECT(tl, uint32_t *, 1122 NFSX_V4SESSIONID + 1123 5 * NFSX_UNSIGNED); 1124 else 1125 NFSM_DISSECT(tl, uint32_t *, 1126 NFSX_V4SESSIONID + 1127 4 * NFSX_UNSIGNED); 1128 mtx_lock(&sep->nfsess_mtx); 1129 if (bcmp(tl, sep->nfsess_sessionid, 1130 NFSX_V4SESSIONID) == 0) { 1131 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; 1132 retseq = fxdr_unsigned(uint32_t, *tl++); 1133 slot = fxdr_unsigned(int, *tl++); 1134 if ((nd->nd_flag & ND_HASSLOTID) != 0) { 1135 if (slot >= NFSV4_SLOTS || 1136 (i == NFSV4OP_CBSEQUENCE && 1137 slot >= NFSV4_CBSLOTS)) { 1138 printf("newnfs_request:" 1139 " Bogus slot\n"); 1140 slot = nd->nd_slotid; 1141 } else if (slot != 1142 nd->nd_slotid) { 1143 printf("newnfs_request:" 1144 " Wrong session " 1145 "srvslot=%d " 1146 "slot=%d\n", slot, 1147 nd->nd_slotid); 1148 if (i == NFSV4OP_SEQUENCE) { 1149 /* 1150 * Mark both slots as 1151 * bad, because we do 1152 * not know if the 1153 * server has advanced 1154 * the sequence# for 1155 * either of them. 1156 */ 1157 sep->nfsess_badslots |= 1158 (0x1ULL << slot); 1159 sep->nfsess_badslots |= 1160 (0x1ULL << 1161 nd->nd_slotid); 1162 } 1163 slot = nd->nd_slotid; 1164 } 1165 freeslot = slot; 1166 } else if (slot != 0) { 1167 printf("newnfs_request: Bad " 1168 "session slot=%d\n", slot); 1169 slot = 0; 1170 } 1171 if (retseq != sep->nfsess_slotseq[slot]) 1172 printf("retseq diff 0x%x\n", 1173 retseq); 1174 retval = fxdr_unsigned(uint32_t, *++tl); 1175 if ((retval + 1) < sep->nfsess_foreslots 1176 ) 1177 sep->nfsess_foreslots = (retval 1178 + 1); 1179 else if ((retval + 1) > 1180 sep->nfsess_foreslots) 1181 sep->nfsess_foreslots = (retval 1182 < 64) ? (retval + 1) : 64; 1183 } 1184 mtx_unlock(&sep->nfsess_mtx); 1185 1186 /* Grab the op and status for the next one. */ 1187 if (opcnt > 1) { 1188 NFSM_DISSECT(tl, uint32_t *, 1189 2 * NFSX_UNSIGNED); 1190 i = fxdr_unsigned(int, *tl++); 1191 j = fxdr_unsigned(int, *tl); 1192 } 1193 } 1194 } 1195 if (nd->nd_repstat != 0) { 1196 if (nd->nd_repstat == NFSERR_BADSESSION && 1197 nmp != NULL && dssep == NULL && 1198 (nd->nd_flag & ND_NFSV41) != 0) { 1199 /* 1200 * If this is a client side MDS RPC, mark 1201 * the MDS session defunct and initiate 1202 * recovery, as required. 1203 * The nfsess_defunct field is protected by 1204 * the NFSLOCKMNT()/nm_mtx lock and not the 1205 * nfsess_mtx lock to simplify its handling, 1206 * for the MDS session. This lock is also 1207 * sufficient for nfsess_sessionid, since it 1208 * never changes in the structure. 1209 */ 1210 NFSCL_DEBUG(1, "Got badsession\n"); 1211 NFSLOCKCLSTATE(); 1212 NFSLOCKMNT(nmp); 1213 sep = NFSMNT_MDSSESSION(nmp); 1214 if (bcmp(sep->nfsess_sessionid, nd->nd_sequence, 1215 NFSX_V4SESSIONID) == 0) { 1216 printf("Initiate recovery. If server " 1217 "has not rebooted, " 1218 "check NFS clients for unique " 1219 "/etc/hostid's\n"); 1220 /* Initiate recovery. */ 1221 sep->nfsess_defunct = 1; 1222 NFSCL_DEBUG(1, "Marked defunct\n"); 1223 if (nmp->nm_clp != NULL) { 1224 nmp->nm_clp->nfsc_flags |= 1225 NFSCLFLAGS_RECOVER; 1226 wakeup(nmp->nm_clp); 1227 } 1228 } 1229 NFSUNLOCKCLSTATE(); 1230 /* 1231 * Sleep for up to 1sec waiting for a new 1232 * session. 1233 */ 1234 mtx_sleep(&nmp->nm_sess, &nmp->nm_mtx, PZERO, 1235 "nfsbadsess", hz); 1236 /* 1237 * Get the session again, in case a new one 1238 * has been created during the sleep. 1239 */ 1240 sep = NFSMNT_MDSSESSION(nmp); 1241 NFSUNLOCKMNT(nmp); 1242 if ((nd->nd_flag & ND_LOOPBADSESS) != 0) { 1243 reterr = nfsv4_sequencelookup(nmp, sep, 1244 &slotpos, &maxslot, &slotseq, 1245 sessionid, true); 1246 if (reterr == 0) { 1247 /* Fill in new session info. */ 1248 NFSCL_DEBUG(1, 1249 "Filling in new sequence\n"); 1250 tl = nd->nd_sequence; 1251 bcopy(sessionid, tl, 1252 NFSX_V4SESSIONID); 1253 tl += NFSX_V4SESSIONID / 1254 NFSX_UNSIGNED; 1255 *tl++ = txdr_unsigned(slotseq); 1256 *tl++ = txdr_unsigned(slotpos); 1257 *tl = txdr_unsigned(maxslot); 1258 nd->nd_slotid = slotpos; 1259 nd->nd_flag |= ND_HASSLOTID; 1260 } 1261 if (reterr == NFSERR_BADSESSION || 1262 reterr == 0) { 1263 NFSCL_DEBUG(1, 1264 "Badsession looping\n"); 1265 m_freem(nd->nd_mrep); 1266 nd->nd_mrep = NULL; 1267 goto tryagain; 1268 } 1269 nd->nd_repstat = reterr; 1270 NFSCL_DEBUG(1, "Got err=%d\n", reterr); 1271 } 1272 } 1273 /* 1274 * When clp != NULL, it is a callback and all 1275 * callback operations can be retried for NFSERR_DELAY. 1276 */ 1277 if (((nd->nd_repstat == NFSERR_DELAY || 1278 nd->nd_repstat == NFSERR_GRACE) && 1279 (nd->nd_flag & ND_NFSV4) && (clp != NULL || 1280 (nd->nd_procnum != NFSPROC_DELEGRETURN && 1281 nd->nd_procnum != NFSPROC_SETATTR && 1282 nd->nd_procnum != NFSPROC_READ && 1283 nd->nd_procnum != NFSPROC_READDS && 1284 nd->nd_procnum != NFSPROC_WRITE && 1285 nd->nd_procnum != NFSPROC_WRITEDS && 1286 nd->nd_procnum != NFSPROC_OPEN && 1287 nd->nd_procnum != NFSPROC_OPENLAYGET && 1288 nd->nd_procnum != NFSPROC_CREATE && 1289 nd->nd_procnum != NFSPROC_CREATELAYGET && 1290 nd->nd_procnum != NFSPROC_OPENCONFIRM && 1291 nd->nd_procnum != NFSPROC_OPENDOWNGRADE && 1292 nd->nd_procnum != NFSPROC_CLOSE && 1293 nd->nd_procnum != NFSPROC_LOCK && 1294 nd->nd_procnum != NFSPROC_LOCKU))) || 1295 (nd->nd_repstat == NFSERR_DELAY && 1296 (nd->nd_flag & ND_NFSV4) == 0) || 1297 nd->nd_repstat == NFSERR_RESOURCE) { 1298 /* Clip at NFS_TRYLATERDEL. */ 1299 if (timespeccmp(&trylater_delay, 1300 &nfs_trylater_max, >)) 1301 trylater_delay = nfs_trylater_max; 1302 getnanouptime(&waituntil); 1303 timespecadd(&waituntil, &trylater_delay, 1304 &waituntil); 1305 do { 1306 nfs_catnap(PZERO, 0, "nfstry"); 1307 getnanouptime(&ts); 1308 } while (timespeccmp(&ts, &waituntil, <)); 1309 timespecadd(&trylater_delay, &trylater_delay, 1310 &trylater_delay); /* Double each time. */ 1311 if (slot != -1) { 1312 mtx_lock(&sep->nfsess_mtx); 1313 sep->nfsess_slotseq[slot]++; 1314 *nd->nd_slotseq = txdr_unsigned( 1315 sep->nfsess_slotseq[slot]); 1316 mtx_unlock(&sep->nfsess_mtx); 1317 } 1318 m_freem(nd->nd_mrep); 1319 nd->nd_mrep = NULL; 1320 goto tryagain; 1321 } 1322 1323 /* 1324 * If the File Handle was stale, invalidate the 1325 * lookup cache, just in case. 1326 * (vp != NULL implies a client side call) 1327 */ 1328 if (nd->nd_repstat == ESTALE && vp != NULL) { 1329 cache_purge(vp); 1330 if (ncl_call_invalcaches != NULL) 1331 (*ncl_call_invalcaches)(vp); 1332 } 1333 } 1334 if ((nd->nd_flag & ND_NFSV4) != 0) { 1335 /* Free the slot, as required. */ 1336 if (freeslot != -1) 1337 nfsv4_freeslot(sep, freeslot, false); 1338 /* 1339 * If this op is Putfh, throw its results away. 1340 */ 1341 if (j >= 10000) 1342 NFSCL_DEBUG(1, "nop=%d nst=%d\n", i, j); 1343 if (nmp != NULL && i == NFSV4OP_PUTFH && j == 0) { 1344 NFSM_DISSECT(tl,u_int32_t *,2 * NFSX_UNSIGNED); 1345 i = fxdr_unsigned(int, *tl++); 1346 j = fxdr_unsigned(int, *tl); 1347 if (j >= 10000) 1348 NFSCL_DEBUG(1, "n2op=%d n2st=%d\n", i, 1349 j); 1350 /* 1351 * All Compounds that do an Op that must 1352 * be in sequence consist of NFSV4OP_PUTFH 1353 * followed by one of these. As such, we 1354 * can determine if the seqid# should be 1355 * incremented, here. 1356 */ 1357 if ((i == NFSV4OP_OPEN || 1358 i == NFSV4OP_OPENCONFIRM || 1359 i == NFSV4OP_OPENDOWNGRADE || 1360 i == NFSV4OP_CLOSE || 1361 i == NFSV4OP_LOCK || 1362 i == NFSV4OP_LOCKU) && 1363 (j == 0 || 1364 (j != NFSERR_STALECLIENTID && 1365 j != NFSERR_STALESTATEID && 1366 j != NFSERR_BADSTATEID && 1367 j != NFSERR_BADSEQID && 1368 j != NFSERR_BADXDR && 1369 j != NFSERR_RESOURCE && 1370 j != NFSERR_NOFILEHANDLE))) 1371 nd->nd_flag |= ND_INCRSEQID; 1372 } 1373 /* 1374 * If this op's status is non-zero, mark 1375 * that there is no more data to process. 1376 * The exception is Setattr, which always has xdr 1377 * when it has failed. 1378 */ 1379 if (j != 0 && i != NFSV4OP_SETATTR) 1380 nd->nd_flag |= ND_NOMOREDATA; 1381 1382 /* 1383 * If R_DONTRECOVER is set, replace the stale error 1384 * reply, so that recovery isn't initiated. 1385 */ 1386 if ((nd->nd_repstat == NFSERR_STALECLIENTID || 1387 nd->nd_repstat == NFSERR_BADSESSION || 1388 nd->nd_repstat == NFSERR_STALESTATEID) && 1389 rep != NULL && (rep->r_flags & R_DONTRECOVER)) 1390 nd->nd_repstat = NFSERR_STALEDONTRECOVER; 1391 } 1392 } 1393 1394 #ifdef KDTRACE_HOOKS 1395 if (nmp != NULL && dtrace_nfscl_nfs234_done_probe != NULL) { 1396 uint32_t probe_id; 1397 int probe_procnum; 1398 1399 if (nd->nd_flag & ND_NFSV4) { 1400 probe_id = nfscl_nfs4_done_probes[nd->nd_procnum]; 1401 probe_procnum = nd->nd_procnum; 1402 } else if (nd->nd_flag & ND_NFSV3) { 1403 probe_id = nfscl_nfs3_done_probes[procnum]; 1404 probe_procnum = procnum; 1405 } else { 1406 probe_id = nfscl_nfs2_done_probes[nd->nd_procnum]; 1407 probe_procnum = procnum; 1408 } 1409 if (probe_id != 0) 1410 (dtrace_nfscl_nfs234_done_probe)(probe_id, vp, 1411 nd->nd_mreq, cred, probe_procnum, 0); 1412 } 1413 #endif 1414 1415 m_freem(nd->nd_mreq); 1416 if (usegssname == 0) 1417 AUTH_DESTROY(auth); 1418 if (rep != NULL) 1419 free(rep, M_NFSDREQ); 1420 if (set_sigset) 1421 newnfs_restore_sigmask(td, &oldset); 1422 return (0); 1423 nfsmout: 1424 m_freem(nd->nd_mrep); 1425 m_freem(nd->nd_mreq); 1426 if (usegssname == 0) 1427 AUTH_DESTROY(auth); 1428 if (rep != NULL) 1429 free(rep, M_NFSDREQ); 1430 if (set_sigset) 1431 newnfs_restore_sigmask(td, &oldset); 1432 return (error); 1433 } 1434 1435 /* 1436 * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and 1437 * wait for all requests to complete. This is used by forced unmounts 1438 * to terminate any outstanding RPCs. 1439 */ 1440 int 1441 newnfs_nmcancelreqs(struct nfsmount *nmp) 1442 { 1443 struct nfsclds *dsp; 1444 struct __rpc_client *cl; 1445 int i; 1446 1447 if (nmp->nm_sockreq.nr_client != NULL) 1448 CLNT_CLOSE(nmp->nm_sockreq.nr_client); 1449 for (i = 0; i < nmp->nm_aconnect; i++) 1450 if (nmp->nm_aconn[i] != NULL) 1451 CLNT_CLOSE(nmp->nm_aconn[i]); 1452 lookformore: 1453 NFSLOCKMNT(nmp); 1454 TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) { 1455 NFSLOCKDS(dsp); 1456 if (dsp != TAILQ_FIRST(&nmp->nm_sess) && 1457 (dsp->nfsclds_flags & NFSCLDS_CLOSED) == 0 && 1458 dsp->nfsclds_sockp != NULL && 1459 dsp->nfsclds_sockp->nr_client != NULL) { 1460 dsp->nfsclds_flags |= NFSCLDS_CLOSED; 1461 cl = dsp->nfsclds_sockp->nr_client; 1462 NFSUNLOCKDS(dsp); 1463 NFSUNLOCKMNT(nmp); 1464 CLNT_CLOSE(cl); 1465 goto lookformore; 1466 } 1467 NFSUNLOCKDS(dsp); 1468 } 1469 NFSUNLOCKMNT(nmp); 1470 return (0); 1471 } 1472 1473 /* 1474 * Any signal that can interrupt an NFS operation in an intr mount 1475 * should be added to this set. SIGSTOP and SIGKILL cannot be masked. 1476 */ 1477 int newnfs_sig_set[] = { 1478 SIGINT, 1479 SIGTERM, 1480 SIGHUP, 1481 SIGKILL, 1482 SIGQUIT 1483 }; 1484 1485 /* 1486 * Check to see if one of the signals in our subset is pending on 1487 * the process (in an intr mount). 1488 */ 1489 static int 1490 nfs_sig_pending(sigset_t set) 1491 { 1492 int i; 1493 1494 for (i = 0 ; i < nitems(newnfs_sig_set); i++) 1495 if (SIGISMEMBER(set, newnfs_sig_set[i])) 1496 return (1); 1497 return (0); 1498 } 1499 1500 /* 1501 * The set/restore sigmask functions are used to (temporarily) overwrite 1502 * the thread td_sigmask during an RPC call (for example). These are also 1503 * used in other places in the NFS client that might tsleep(). 1504 */ 1505 void 1506 newnfs_set_sigmask(struct thread *td, sigset_t *oldset) 1507 { 1508 sigset_t newset; 1509 int i; 1510 struct proc *p; 1511 1512 SIGFILLSET(newset); 1513 if (td == NULL) 1514 td = curthread; /* XXX */ 1515 p = td->td_proc; 1516 /* Remove the NFS set of signals from newset */ 1517 PROC_LOCK(p); 1518 mtx_lock(&p->p_sigacts->ps_mtx); 1519 for (i = 0 ; i < nitems(newnfs_sig_set); i++) { 1520 /* 1521 * But make sure we leave the ones already masked 1522 * by the process, ie. remove the signal from the 1523 * temporary signalmask only if it wasn't already 1524 * in p_sigmask. 1525 */ 1526 if (!SIGISMEMBER(td->td_sigmask, newnfs_sig_set[i]) && 1527 !SIGISMEMBER(p->p_sigacts->ps_sigignore, newnfs_sig_set[i])) 1528 SIGDELSET(newset, newnfs_sig_set[i]); 1529 } 1530 mtx_unlock(&p->p_sigacts->ps_mtx); 1531 kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, 1532 SIGPROCMASK_PROC_LOCKED); 1533 PROC_UNLOCK(p); 1534 } 1535 1536 void 1537 newnfs_restore_sigmask(struct thread *td, sigset_t *set) 1538 { 1539 if (td == NULL) 1540 td = curthread; /* XXX */ 1541 kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0); 1542 } 1543 1544 /* 1545 * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the 1546 * old one after msleep() returns. 1547 */ 1548 int 1549 newnfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo) 1550 { 1551 sigset_t oldset; 1552 int error; 1553 1554 if ((priority & PCATCH) == 0) 1555 return msleep(ident, mtx, priority, wmesg, timo); 1556 if (td == NULL) 1557 td = curthread; /* XXX */ 1558 newnfs_set_sigmask(td, &oldset); 1559 error = msleep(ident, mtx, priority, wmesg, timo); 1560 newnfs_restore_sigmask(td, &oldset); 1561 return (error); 1562 } 1563 1564 /* 1565 * Test for a termination condition pending on the process. 1566 * This is used for NFSMNT_INT mounts. 1567 */ 1568 int 1569 newnfs_sigintr(struct nfsmount *nmp, struct thread *td) 1570 { 1571 struct proc *p; 1572 sigset_t tmpset; 1573 1574 /* Terminate all requests while attempting a forced unmount. */ 1575 if (NFSCL_FORCEDISM(nmp->nm_mountp)) 1576 return (EIO); 1577 if (!(nmp->nm_flag & NFSMNT_INT)) 1578 return (0); 1579 if (td == NULL) 1580 return (0); 1581 p = td->td_proc; 1582 PROC_LOCK(p); 1583 tmpset = p->p_siglist; 1584 SIGSETOR(tmpset, td->td_siglist); 1585 SIGSETNAND(tmpset, td->td_sigmask); 1586 mtx_lock(&p->p_sigacts->ps_mtx); 1587 SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); 1588 mtx_unlock(&p->p_sigacts->ps_mtx); 1589 if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) 1590 && nfs_sig_pending(tmpset)) { 1591 PROC_UNLOCK(p); 1592 return (EINTR); 1593 } 1594 PROC_UNLOCK(p); 1595 return (0); 1596 } 1597 1598 static int 1599 nfs_msg(struct thread *td, const char *server, const char *msg, int error) 1600 { 1601 struct proc *p; 1602 1603 p = td ? td->td_proc : NULL; 1604 if (error) { 1605 tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", 1606 server, msg, error); 1607 } else { 1608 tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg); 1609 } 1610 return (0); 1611 } 1612 1613 static void 1614 nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg, 1615 int error, int flags) 1616 { 1617 if (nmp == NULL) 1618 return; 1619 mtx_lock(&nmp->nm_mtx); 1620 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) { 1621 nmp->nm_state |= NFSSTA_TIMEO; 1622 mtx_unlock(&nmp->nm_mtx); 1623 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1624 VQ_NOTRESP, 0); 1625 } else 1626 mtx_unlock(&nmp->nm_mtx); 1627 mtx_lock(&nmp->nm_mtx); 1628 if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) { 1629 nmp->nm_state |= NFSSTA_LOCKTIMEO; 1630 mtx_unlock(&nmp->nm_mtx); 1631 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1632 VQ_NOTRESPLOCK, 0); 1633 } else 1634 mtx_unlock(&nmp->nm_mtx); 1635 nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); 1636 } 1637 1638 static void 1639 nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg, 1640 int flags, int tprintfmsg) 1641 { 1642 if (nmp == NULL) 1643 return; 1644 if (tprintfmsg) { 1645 nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); 1646 } 1647 1648 mtx_lock(&nmp->nm_mtx); 1649 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) { 1650 nmp->nm_state &= ~NFSSTA_TIMEO; 1651 mtx_unlock(&nmp->nm_mtx); 1652 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1653 VQ_NOTRESP, 1); 1654 } else 1655 mtx_unlock(&nmp->nm_mtx); 1656 1657 mtx_lock(&nmp->nm_mtx); 1658 if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) { 1659 nmp->nm_state &= ~NFSSTA_LOCKTIMEO; 1660 mtx_unlock(&nmp->nm_mtx); 1661 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1662 VQ_NOTRESPLOCK, 1); 1663 } else 1664 mtx_unlock(&nmp->nm_mtx); 1665 } 1666