1 /*- 2 * Copyright (c) 1989, 1991, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 /* 38 * Socket operations for use by nfs 39 */ 40 41 #include "opt_inet6.h" 42 #include "opt_kdtrace.h" 43 #include "opt_kgssapi.h" 44 #include "opt_nfs.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/limits.h> 50 #include <sys/lock.h> 51 #include <sys/malloc.h> 52 #include <sys/mbuf.h> 53 #include <sys/mount.h> 54 #include <sys/mutex.h> 55 #include <sys/proc.h> 56 #include <sys/signalvar.h> 57 #include <sys/syscallsubr.h> 58 #include <sys/sysctl.h> 59 #include <sys/syslog.h> 60 #include <sys/vnode.h> 61 62 #include <rpc/rpc.h> 63 64 #include <kgssapi/krb5/kcrypto.h> 65 66 #include <fs/nfs/nfsport.h> 67 68 #ifdef KDTRACE_HOOKS 69 #include <sys/dtrace_bsd.h> 70 71 dtrace_nfsclient_nfs23_start_probe_func_t 72 dtrace_nfscl_nfs234_start_probe; 73 74 dtrace_nfsclient_nfs23_done_probe_func_t 75 dtrace_nfscl_nfs234_done_probe; 76 77 /* 78 * Registered probes by RPC type. 79 */ 80 uint32_t nfscl_nfs2_start_probes[NFS_NPROCS + 1]; 81 uint32_t nfscl_nfs2_done_probes[NFS_NPROCS + 1]; 82 83 uint32_t nfscl_nfs3_start_probes[NFS_NPROCS + 1]; 84 uint32_t nfscl_nfs3_done_probes[NFS_NPROCS + 1]; 85 86 uint32_t nfscl_nfs4_start_probes[NFS_NPROCS + 1]; 87 uint32_t nfscl_nfs4_done_probes[NFS_NPROCS + 1]; 88 #endif 89 90 NFSSTATESPINLOCK; 91 NFSREQSPINLOCK; 92 extern struct nfsstats newnfsstats; 93 extern struct nfsreqhead nfsd_reqq; 94 extern int nfscl_ticks; 95 extern void (*ncl_call_invalcaches)(struct vnode *); 96 97 static int nfsrv_gsscallbackson = 0; 98 static int nfs_bufpackets = 4; 99 static int nfs_reconnects; 100 static int nfs3_jukebox_delay = 10; 101 static int nfs_skip_wcc_data_onerr = 1; 102 static int nfs_keytab_enctype = ETYPE_DES_CBC_CRC; 103 104 SYSCTL_DECL(_vfs_nfs); 105 106 SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, 107 "Buffer reservation size 2 < x < 64"); 108 SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0, 109 "Number of times the nfs client has had to reconnect"); 110 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0, 111 "Number of seconds to delay a retry after receiving EJUKEBOX"); 112 SYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0, 113 "Disable weak cache consistency checking when server returns an error"); 114 SYSCTL_INT(_vfs_nfs, OID_AUTO, keytab_enctype, CTLFLAG_RW, &nfs_keytab_enctype, 0, 115 "Encryption type for the keytab entry used by nfs"); 116 117 static void nfs_down(struct nfsmount *, struct thread *, const char *, 118 int, int); 119 static void nfs_up(struct nfsmount *, struct thread *, const char *, 120 int, int); 121 static int nfs_msg(struct thread *, const char *, const char *, int); 122 123 struct nfs_cached_auth { 124 int ca_refs; /* refcount, including 1 from the cache */ 125 uid_t ca_uid; /* uid that corresponds to this auth */ 126 AUTH *ca_auth; /* RPC auth handle */ 127 }; 128 129 static int nfsv2_procid[NFS_V3NPROCS] = { 130 NFSV2PROC_NULL, 131 NFSV2PROC_GETATTR, 132 NFSV2PROC_SETATTR, 133 NFSV2PROC_LOOKUP, 134 NFSV2PROC_NOOP, 135 NFSV2PROC_READLINK, 136 NFSV2PROC_READ, 137 NFSV2PROC_WRITE, 138 NFSV2PROC_CREATE, 139 NFSV2PROC_MKDIR, 140 NFSV2PROC_SYMLINK, 141 NFSV2PROC_CREATE, 142 NFSV2PROC_REMOVE, 143 NFSV2PROC_RMDIR, 144 NFSV2PROC_RENAME, 145 NFSV2PROC_LINK, 146 NFSV2PROC_READDIR, 147 NFSV2PROC_NOOP, 148 NFSV2PROC_STATFS, 149 NFSV2PROC_NOOP, 150 NFSV2PROC_NOOP, 151 NFSV2PROC_NOOP, 152 }; 153 154 /* 155 * Initialize sockets and congestion for a new NFS connection. 156 * We do not free the sockaddr if error. 157 */ 158 int 159 newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp, 160 struct ucred *cred, NFSPROC_T *p, int callback_retry_mult) 161 { 162 int rcvreserve, sndreserve; 163 int pktscale; 164 struct sockaddr *saddr; 165 struct ucred *origcred; 166 CLIENT *client; 167 struct netconfig *nconf; 168 struct socket *so; 169 int one = 1, retries, error = 0; 170 struct thread *td = curthread; 171 struct timeval timo; 172 173 /* 174 * We need to establish the socket using the credentials of 175 * the mountpoint. Some parts of this process (such as 176 * sobind() and soconnect()) will use the curent thread's 177 * credential instead of the socket credential. To work 178 * around this, temporarily change the current thread's 179 * credential to that of the mountpoint. 180 * 181 * XXX: It would be better to explicitly pass the correct 182 * credential to sobind() and soconnect(). 183 */ 184 origcred = td->td_ucred; 185 186 /* 187 * Use the credential in nr_cred, if not NULL. 188 */ 189 if (nrp->nr_cred != NULL) 190 td->td_ucred = nrp->nr_cred; 191 else 192 td->td_ucred = cred; 193 saddr = nrp->nr_nam; 194 195 if (saddr->sa_family == AF_INET) 196 if (nrp->nr_sotype == SOCK_DGRAM) 197 nconf = getnetconfigent("udp"); 198 else 199 nconf = getnetconfigent("tcp"); 200 else 201 if (nrp->nr_sotype == SOCK_DGRAM) 202 nconf = getnetconfigent("udp6"); 203 else 204 nconf = getnetconfigent("tcp6"); 205 206 pktscale = nfs_bufpackets; 207 if (pktscale < 2) 208 pktscale = 2; 209 if (pktscale > 64) 210 pktscale = 64; 211 /* 212 * soreserve() can fail if sb_max is too small, so shrink pktscale 213 * and try again if there is an error. 214 * Print a log message suggesting increasing sb_max. 215 * Creating a socket and doing this is necessary since, if the 216 * reservation sizes are too large and will make soreserve() fail, 217 * the connection will work until a large send is attempted and 218 * then it will loop in the krpc code. 219 */ 220 so = NULL; 221 saddr = NFSSOCKADDR(nrp->nr_nam, struct sockaddr *); 222 error = socreate(saddr->sa_family, &so, nrp->nr_sotype, 223 nrp->nr_soproto, td->td_ucred, td); 224 if (error) { 225 td->td_ucred = origcred; 226 goto out; 227 } 228 do { 229 if (error != 0 && pktscale > 2) 230 pktscale--; 231 if (nrp->nr_sotype == SOCK_DGRAM) { 232 if (nmp != NULL) { 233 sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * 234 pktscale; 235 rcvreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * 236 pktscale; 237 } else { 238 sndreserve = rcvreserve = 1024 * pktscale; 239 } 240 } else { 241 if (nrp->nr_sotype != SOCK_STREAM) 242 panic("nfscon sotype"); 243 if (nmp != NULL) { 244 sndreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + 245 sizeof (u_int32_t)) * pktscale; 246 rcvreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + 247 sizeof (u_int32_t)) * pktscale; 248 } else { 249 sndreserve = rcvreserve = 1024 * pktscale; 250 } 251 } 252 error = soreserve(so, sndreserve, rcvreserve); 253 } while (error != 0 && pktscale > 2); 254 soclose(so); 255 if (error) { 256 td->td_ucred = origcred; 257 goto out; 258 } 259 260 client = clnt_reconnect_create(nconf, saddr, nrp->nr_prog, 261 nrp->nr_vers, sndreserve, rcvreserve); 262 CLNT_CONTROL(client, CLSET_WAITCHAN, "newnfsreq"); 263 if (nmp != NULL) { 264 if ((nmp->nm_flag & NFSMNT_INT)) 265 CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one); 266 if ((nmp->nm_flag & NFSMNT_RESVPORT)) 267 CLNT_CONTROL(client, CLSET_PRIVPORT, &one); 268 if (NFSHASSOFT(nmp)) { 269 if (nmp->nm_sotype == SOCK_DGRAM) 270 /* 271 * For UDP, the large timeout for a reconnect 272 * will be set to "nm_retry * nm_timeo / 2", so 273 * we only want to do 2 reconnect timeout 274 * retries. 275 */ 276 retries = 2; 277 else 278 retries = nmp->nm_retry; 279 } else 280 retries = INT_MAX; 281 } else { 282 /* 283 * Three cases: 284 * - Null RPC callback to client 285 * - Non-Null RPC callback to client, wait a little longer 286 * - upcalls to nfsuserd and gssd (clp == NULL) 287 */ 288 if (callback_retry_mult == 0) { 289 retries = NFSV4_UPCALLRETRY; 290 CLNT_CONTROL(client, CLSET_PRIVPORT, &one); 291 } else { 292 retries = NFSV4_CALLBACKRETRY * callback_retry_mult; 293 } 294 } 295 CLNT_CONTROL(client, CLSET_RETRIES, &retries); 296 297 if (nmp != NULL) { 298 /* 299 * For UDP, there are 2 timeouts: 300 * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer 301 * that does a retransmit of an RPC request using the same 302 * socket and xid. This is what you normally want to do, 303 * since NFS servers depend on "same xid" for their 304 * Duplicate Request Cache. 305 * - timeout specified in CLNT_CALL_MBUF(), which specifies when 306 * retransmits on the same socket should fail and a fresh 307 * socket created. Each of these timeouts counts as one 308 * CLSET_RETRIES as set above. 309 * Set the initial retransmit timeout for UDP. This timeout 310 * doesn't exist for TCP and the following call just fails, 311 * which is ok. 312 */ 313 timo.tv_sec = nmp->nm_timeo / NFS_HZ; 314 timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ; 315 CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo); 316 } 317 318 mtx_lock(&nrp->nr_mtx); 319 if (nrp->nr_client != NULL) { 320 /* 321 * Someone else already connected. 322 */ 323 CLNT_RELEASE(client); 324 } else { 325 nrp->nr_client = client; 326 } 327 328 /* 329 * Protocols that do not require connections may be optionally left 330 * unconnected for servers that reply from a port other than NFS_PORT. 331 */ 332 if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) { 333 mtx_unlock(&nrp->nr_mtx); 334 CLNT_CONTROL(client, CLSET_CONNECT, &one); 335 } else { 336 mtx_unlock(&nrp->nr_mtx); 337 } 338 339 /* Restore current thread's credentials. */ 340 td->td_ucred = origcred; 341 342 out: 343 NFSEXITCODE(error); 344 return (error); 345 } 346 347 /* 348 * NFS disconnect. Clean up and unlink. 349 */ 350 void 351 newnfs_disconnect(struct nfssockreq *nrp) 352 { 353 CLIENT *client; 354 355 mtx_lock(&nrp->nr_mtx); 356 if (nrp->nr_client != NULL) { 357 client = nrp->nr_client; 358 nrp->nr_client = NULL; 359 mtx_unlock(&nrp->nr_mtx); 360 rpc_gss_secpurge_call(client); 361 CLNT_CLOSE(client); 362 CLNT_RELEASE(client); 363 } else { 364 mtx_unlock(&nrp->nr_mtx); 365 } 366 } 367 368 static AUTH * 369 nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal, 370 char *srv_principal, gss_OID mech_oid, struct ucred *cred) 371 { 372 rpc_gss_service_t svc; 373 AUTH *auth; 374 #ifdef notyet 375 rpc_gss_options_req_t req_options; 376 #endif 377 378 switch (secflavour) { 379 case RPCSEC_GSS_KRB5: 380 case RPCSEC_GSS_KRB5I: 381 case RPCSEC_GSS_KRB5P: 382 if (!mech_oid) { 383 if (!rpc_gss_mech_to_oid_call("kerberosv5", &mech_oid)) 384 return (NULL); 385 } 386 if (secflavour == RPCSEC_GSS_KRB5) 387 svc = rpc_gss_svc_none; 388 else if (secflavour == RPCSEC_GSS_KRB5I) 389 svc = rpc_gss_svc_integrity; 390 else 391 svc = rpc_gss_svc_privacy; 392 #ifdef notyet 393 req_options.req_flags = GSS_C_MUTUAL_FLAG; 394 req_options.time_req = 0; 395 req_options.my_cred = GSS_C_NO_CREDENTIAL; 396 req_options.input_channel_bindings = NULL; 397 req_options.enc_type = nfs_keytab_enctype; 398 399 auth = rpc_gss_secfind_call(nrp->nr_client, cred, 400 clnt_principal, srv_principal, mech_oid, svc, 401 &req_options); 402 #else 403 /* 404 * Until changes to the rpcsec_gss code are committed, 405 * there is no support for host based initiator 406 * principals. As such, that case cannot yet be handled. 407 */ 408 if (clnt_principal == NULL) 409 auth = rpc_gss_secfind_call(nrp->nr_client, cred, 410 srv_principal, mech_oid, svc); 411 else 412 auth = NULL; 413 #endif 414 if (auth != NULL) 415 return (auth); 416 /* fallthrough */ 417 case AUTH_SYS: 418 default: 419 return (authunix_create(cred)); 420 421 } 422 } 423 424 /* 425 * Callback from the RPC code to generate up/down notifications. 426 */ 427 428 struct nfs_feedback_arg { 429 struct nfsmount *nf_mount; 430 int nf_lastmsg; /* last tprintf */ 431 int nf_tprintfmsg; 432 struct thread *nf_td; 433 }; 434 435 static void 436 nfs_feedback(int type, int proc, void *arg) 437 { 438 struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg; 439 struct nfsmount *nmp = nf->nf_mount; 440 struct timeval now; 441 442 getmicrouptime(&now); 443 444 switch (type) { 445 case FEEDBACK_REXMIT2: 446 case FEEDBACK_RECONNECT: 447 if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) { 448 nfs_down(nmp, nf->nf_td, 449 "not responding", 0, NFSSTA_TIMEO); 450 nf->nf_tprintfmsg = TRUE; 451 nf->nf_lastmsg = now.tv_sec; 452 } 453 break; 454 455 case FEEDBACK_OK: 456 nfs_up(nf->nf_mount, nf->nf_td, 457 "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg); 458 break; 459 } 460 } 461 462 /* 463 * newnfs_request - goes something like this 464 * - does the rpc by calling the krpc layer 465 * - break down rpc header and return with nfs reply 466 * nb: always frees up nd_mreq mbuf list 467 */ 468 int 469 newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, 470 struct nfsclient *clp, struct nfssockreq *nrp, vnode_t vp, 471 struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers, 472 u_char *retsum, int toplevel, u_int64_t *xidp) 473 { 474 u_int32_t *tl; 475 time_t waituntil; 476 int i, j, set_uid = 0, set_sigset = 0, timeo; 477 int trycnt, error = 0, usegssname = 0, secflavour = AUTH_SYS; 478 u_int16_t procnum; 479 u_int trylater_delay = 1; 480 struct nfs_feedback_arg nf; 481 struct timeval timo, now; 482 AUTH *auth; 483 struct rpc_callextra ext; 484 enum clnt_stat stat; 485 struct nfsreq *rep = NULL; 486 char *srv_principal = NULL; 487 uid_t saved_uid = (uid_t)-1; 488 sigset_t oldset; 489 490 if (xidp != NULL) 491 *xidp = 0; 492 /* Reject requests while attempting a forced unmount. */ 493 if (nmp != NULL && (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)) { 494 m_freem(nd->nd_mreq); 495 return (ESTALE); 496 } 497 498 /* For client side interruptible mounts, mask off the signals. */ 499 if (nmp != NULL && td != NULL && NFSHASINT(nmp)) { 500 newnfs_set_sigmask(td, &oldset); 501 set_sigset = 1; 502 } 503 504 /* 505 * XXX if not already connected call nfs_connect now. Longer 506 * term, change nfs_mount to call nfs_connect unconditionally 507 * and let clnt_reconnect_create handle reconnects. 508 */ 509 if (nrp->nr_client == NULL) 510 newnfs_connect(nmp, nrp, cred, td, 0); 511 512 /* 513 * For a client side mount, nmp is != NULL and clp == NULL. For 514 * server calls (callbacks or upcalls), nmp == NULL. 515 */ 516 if (clp != NULL) { 517 NFSLOCKSTATE(); 518 if ((clp->lc_flags & LCL_GSS) && nfsrv_gsscallbackson) { 519 secflavour = RPCSEC_GSS_KRB5; 520 if (nd->nd_procnum != NFSPROC_NULL) { 521 if (clp->lc_flags & LCL_GSSINTEGRITY) 522 secflavour = RPCSEC_GSS_KRB5I; 523 else if (clp->lc_flags & LCL_GSSPRIVACY) 524 secflavour = RPCSEC_GSS_KRB5P; 525 } 526 } 527 NFSUNLOCKSTATE(); 528 } else if (nmp != NULL && NFSHASKERB(nmp) && 529 nd->nd_procnum != NFSPROC_NULL) { 530 if (NFSHASALLGSSNAME(nmp) && nmp->nm_krbnamelen > 0) 531 nd->nd_flag |= ND_USEGSSNAME; 532 if ((nd->nd_flag & ND_USEGSSNAME) != 0) { 533 /* 534 * If there is a client side host based credential, 535 * use that, otherwise use the system uid, if set. 536 */ 537 if (nmp->nm_krbnamelen > 0) { 538 usegssname = 1; 539 } else if (nmp->nm_uid != (uid_t)-1) { 540 saved_uid = cred->cr_uid; 541 cred->cr_uid = nmp->nm_uid; 542 set_uid = 1; 543 } 544 } else if (nmp->nm_krbnamelen == 0 && 545 nmp->nm_uid != (uid_t)-1 && cred->cr_uid == (uid_t)0) { 546 /* 547 * If there is no host based principal name and 548 * the system uid is set and this is root, use the 549 * system uid, since root won't have user 550 * credentials in a credentials cache file. 551 */ 552 saved_uid = cred->cr_uid; 553 cred->cr_uid = nmp->nm_uid; 554 set_uid = 1; 555 } 556 if (NFSHASINTEGRITY(nmp)) 557 secflavour = RPCSEC_GSS_KRB5I; 558 else if (NFSHASPRIVACY(nmp)) 559 secflavour = RPCSEC_GSS_KRB5P; 560 else 561 secflavour = RPCSEC_GSS_KRB5; 562 srv_principal = NFSMNT_SRVKRBNAME(nmp); 563 } else if (nmp != NULL && !NFSHASKERB(nmp) && 564 nd->nd_procnum != NFSPROC_NULL && 565 (nd->nd_flag & ND_USEGSSNAME) != 0) { 566 /* 567 * Use the uid that did the mount when the RPC is doing 568 * NFSv4 system operations, as indicated by the 569 * ND_USEGSSNAME flag, for the AUTH_SYS case. 570 */ 571 saved_uid = cred->cr_uid; 572 if (nmp->nm_uid != (uid_t)-1) 573 cred->cr_uid = nmp->nm_uid; 574 else 575 cred->cr_uid = 0; 576 set_uid = 1; 577 } 578 579 if (nmp != NULL) { 580 bzero(&nf, sizeof(struct nfs_feedback_arg)); 581 nf.nf_mount = nmp; 582 nf.nf_td = td; 583 getmicrouptime(&now); 584 nf.nf_lastmsg = now.tv_sec - 585 ((nmp->nm_tprintf_delay)-(nmp->nm_tprintf_initial_delay)); 586 } 587 588 if (nd->nd_procnum == NFSPROC_NULL) 589 auth = authnone_create(); 590 else if (usegssname) 591 auth = nfs_getauth(nrp, secflavour, nmp->nm_krbname, 592 srv_principal, NULL, cred); 593 else 594 auth = nfs_getauth(nrp, secflavour, NULL, 595 srv_principal, NULL, cred); 596 if (set_uid) 597 cred->cr_uid = saved_uid; 598 if (auth == NULL) { 599 m_freem(nd->nd_mreq); 600 if (set_sigset) 601 newnfs_restore_sigmask(td, &oldset); 602 return (EACCES); 603 } 604 bzero(&ext, sizeof(ext)); 605 ext.rc_auth = auth; 606 if (nmp != NULL) { 607 ext.rc_feedback = nfs_feedback; 608 ext.rc_feedback_arg = &nf; 609 } 610 611 procnum = nd->nd_procnum; 612 if ((nd->nd_flag & ND_NFSV4) && 613 nd->nd_procnum != NFSPROC_NULL && 614 nd->nd_procnum != NFSV4PROC_CBCOMPOUND) 615 procnum = NFSV4PROC_COMPOUND; 616 617 if (nmp != NULL) { 618 NFSINCRGLOBAL(newnfsstats.rpcrequests); 619 620 /* Map the procnum to the old NFSv2 one, as required. */ 621 if ((nd->nd_flag & ND_NFSV2) != 0) { 622 if (nd->nd_procnum < NFS_V3NPROCS) 623 procnum = nfsv2_procid[nd->nd_procnum]; 624 else 625 procnum = NFSV2PROC_NOOP; 626 } 627 628 /* 629 * Now only used for the R_DONTRECOVER case, but until that is 630 * supported within the krpc code, I need to keep a queue of 631 * outstanding RPCs for nfsv4 client requests. 632 */ 633 if ((nd->nd_flag & ND_NFSV4) && procnum == NFSV4PROC_COMPOUND) 634 MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), 635 M_NFSDREQ, M_WAITOK); 636 #ifdef KDTRACE_HOOKS 637 if (dtrace_nfscl_nfs234_start_probe != NULL) { 638 uint32_t probe_id; 639 int probe_procnum; 640 641 if (nd->nd_flag & ND_NFSV4) { 642 probe_id = 643 nfscl_nfs4_start_probes[nd->nd_procnum]; 644 probe_procnum = nd->nd_procnum; 645 } else if (nd->nd_flag & ND_NFSV3) { 646 probe_id = nfscl_nfs3_start_probes[procnum]; 647 probe_procnum = procnum; 648 } else { 649 probe_id = 650 nfscl_nfs2_start_probes[nd->nd_procnum]; 651 probe_procnum = procnum; 652 } 653 if (probe_id != 0) 654 (dtrace_nfscl_nfs234_start_probe) 655 (probe_id, vp, nd->nd_mreq, cred, 656 probe_procnum); 657 } 658 #endif 659 } 660 trycnt = 0; 661 tryagain: 662 /* 663 * This timeout specifies when a new socket should be created, 664 * along with new xid values. For UDP, this should be done 665 * infrequently, since retransmits of RPC requests should normally 666 * use the same xid. 667 */ 668 if (nmp == NULL) { 669 timo.tv_usec = 0; 670 if (clp == NULL) 671 timo.tv_sec = NFSV4_UPCALLTIMEO; 672 else 673 timo.tv_sec = NFSV4_CALLBACKTIMEO; 674 } else { 675 if (nrp->nr_sotype != SOCK_DGRAM) { 676 timo.tv_usec = 0; 677 if ((nmp->nm_flag & NFSMNT_NFSV4)) 678 timo.tv_sec = INT_MAX; 679 else 680 timo.tv_sec = NFS_TCPTIMEO; 681 } else { 682 if (NFSHASSOFT(nmp)) { 683 /* 684 * CLSET_RETRIES is set to 2, so this should be 685 * half of the total timeout required. 686 */ 687 timeo = nmp->nm_retry * nmp->nm_timeo / 2; 688 if (timeo < 1) 689 timeo = 1; 690 timo.tv_sec = timeo / NFS_HZ; 691 timo.tv_usec = (timeo % NFS_HZ) * 1000000 / 692 NFS_HZ; 693 } else { 694 /* For UDP hard mounts, use a large value. */ 695 timo.tv_sec = NFS_MAXTIMEO / NFS_HZ; 696 timo.tv_usec = 0; 697 } 698 } 699 700 if (rep != NULL) { 701 rep->r_flags = 0; 702 rep->r_nmp = nmp; 703 /* 704 * Chain request into list of outstanding requests. 705 */ 706 NFSLOCKREQ(); 707 TAILQ_INSERT_TAIL(&nfsd_reqq, rep, r_chain); 708 NFSUNLOCKREQ(); 709 } 710 } 711 712 nd->nd_mrep = NULL; 713 stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum, nd->nd_mreq, 714 &nd->nd_mrep, timo); 715 716 if (rep != NULL) { 717 /* 718 * RPC done, unlink the request. 719 */ 720 NFSLOCKREQ(); 721 TAILQ_REMOVE(&nfsd_reqq, rep, r_chain); 722 NFSUNLOCKREQ(); 723 } 724 725 /* 726 * If there was a successful reply and a tprintf msg. 727 * tprintf a response. 728 */ 729 if (stat == RPC_SUCCESS) { 730 error = 0; 731 } else if (stat == RPC_TIMEDOUT) { 732 error = ETIMEDOUT; 733 } else if (stat == RPC_VERSMISMATCH) { 734 error = EOPNOTSUPP; 735 } else if (stat == RPC_PROGVERSMISMATCH) { 736 error = EPROTONOSUPPORT; 737 } else { 738 error = EACCES; 739 } 740 if (error) { 741 m_freem(nd->nd_mreq); 742 AUTH_DESTROY(auth); 743 if (rep != NULL) 744 FREE((caddr_t)rep, M_NFSDREQ); 745 if (set_sigset) 746 newnfs_restore_sigmask(td, &oldset); 747 return (error); 748 } 749 750 KASSERT(nd->nd_mrep != NULL, ("mrep shouldn't be NULL if no error\n")); 751 752 /* 753 * Search for any mbufs that are not a multiple of 4 bytes long 754 * or with m_data not longword aligned. 755 * These could cause pointer alignment problems, so copy them to 756 * well aligned mbufs. 757 */ 758 newnfs_realign(&nd->nd_mrep); 759 nd->nd_md = nd->nd_mrep; 760 nd->nd_dpos = NFSMTOD(nd->nd_md, caddr_t); 761 nd->nd_repstat = 0; 762 if (nd->nd_procnum != NFSPROC_NULL) { 763 /* 764 * and now the actual NFS xdr. 765 */ 766 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 767 nd->nd_repstat = fxdr_unsigned(u_int32_t, *tl); 768 if (nd->nd_repstat != 0) { 769 if (((nd->nd_repstat == NFSERR_DELAY || 770 nd->nd_repstat == NFSERR_GRACE) && 771 (nd->nd_flag & ND_NFSV4) && 772 nd->nd_procnum != NFSPROC_DELEGRETURN && 773 nd->nd_procnum != NFSPROC_SETATTR && 774 nd->nd_procnum != NFSPROC_READ && 775 nd->nd_procnum != NFSPROC_WRITE && 776 nd->nd_procnum != NFSPROC_OPEN && 777 nd->nd_procnum != NFSPROC_CREATE && 778 nd->nd_procnum != NFSPROC_OPENCONFIRM && 779 nd->nd_procnum != NFSPROC_OPENDOWNGRADE && 780 nd->nd_procnum != NFSPROC_CLOSE && 781 nd->nd_procnum != NFSPROC_LOCK && 782 nd->nd_procnum != NFSPROC_LOCKU) || 783 (nd->nd_repstat == NFSERR_DELAY && 784 (nd->nd_flag & ND_NFSV4) == 0) || 785 nd->nd_repstat == NFSERR_RESOURCE) { 786 if (trylater_delay > NFS_TRYLATERDEL) 787 trylater_delay = NFS_TRYLATERDEL; 788 waituntil = NFSD_MONOSEC + trylater_delay; 789 while (NFSD_MONOSEC < waituntil) 790 (void) nfs_catnap(PZERO, 0, "nfstry"); 791 trylater_delay *= 2; 792 m_freem(nd->nd_mrep); 793 nd->nd_mrep = NULL; 794 goto tryagain; 795 } 796 797 /* 798 * If the File Handle was stale, invalidate the 799 * lookup cache, just in case. 800 * (vp != NULL implies a client side call) 801 */ 802 if (nd->nd_repstat == ESTALE && vp != NULL) { 803 cache_purge(vp); 804 if (ncl_call_invalcaches != NULL) 805 (*ncl_call_invalcaches)(vp); 806 } 807 } 808 809 /* 810 * Get rid of the tag, return count, and PUTFH result for V4. 811 */ 812 if (nd->nd_flag & ND_NFSV4) { 813 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 814 i = fxdr_unsigned(int, *tl); 815 error = nfsm_advance(nd, NFSM_RNDUP(i), -1); 816 if (error) 817 goto nfsmout; 818 NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 819 i = fxdr_unsigned(int, *++tl); 820 821 /* 822 * If the first op's status is non-zero, mark that 823 * there is no more data to process. 824 */ 825 if (*++tl) 826 nd->nd_flag |= ND_NOMOREDATA; 827 828 /* 829 * If the first op is Putfh, throw its results away 830 * and toss the op# and status for the first op. 831 */ 832 if (nmp != NULL && i == NFSV4OP_PUTFH && *tl == 0) { 833 NFSM_DISSECT(tl,u_int32_t *,2 * NFSX_UNSIGNED); 834 i = fxdr_unsigned(int, *tl++); 835 j = fxdr_unsigned(int, *tl); 836 /* 837 * All Compounds that do an Op that must 838 * be in sequence consist of NFSV4OP_PUTFH 839 * followed by one of these. As such, we 840 * can determine if the seqid# should be 841 * incremented, here. 842 */ 843 if ((i == NFSV4OP_OPEN || 844 i == NFSV4OP_OPENCONFIRM || 845 i == NFSV4OP_OPENDOWNGRADE || 846 i == NFSV4OP_CLOSE || 847 i == NFSV4OP_LOCK || 848 i == NFSV4OP_LOCKU) && 849 (j == 0 || 850 (j != NFSERR_STALECLIENTID && 851 j != NFSERR_STALESTATEID && 852 j != NFSERR_BADSTATEID && 853 j != NFSERR_BADSEQID && 854 j != NFSERR_BADXDR && 855 j != NFSERR_RESOURCE && 856 j != NFSERR_NOFILEHANDLE))) 857 nd->nd_flag |= ND_INCRSEQID; 858 /* 859 * If the first op's status is non-zero, mark 860 * that there is no more data to process. 861 */ 862 if (j) 863 nd->nd_flag |= ND_NOMOREDATA; 864 } 865 866 /* 867 * If R_DONTRECOVER is set, replace the stale error 868 * reply, so that recovery isn't initiated. 869 */ 870 if ((nd->nd_repstat == NFSERR_STALECLIENTID || 871 nd->nd_repstat == NFSERR_STALESTATEID) && 872 rep != NULL && (rep->r_flags & R_DONTRECOVER)) 873 nd->nd_repstat = NFSERR_STALEDONTRECOVER; 874 } 875 } 876 877 #ifdef KDTRACE_HOOKS 878 if (nmp != NULL && dtrace_nfscl_nfs234_done_probe != NULL) { 879 uint32_t probe_id; 880 int probe_procnum; 881 882 if (nd->nd_flag & ND_NFSV4) { 883 probe_id = nfscl_nfs4_done_probes[nd->nd_procnum]; 884 probe_procnum = nd->nd_procnum; 885 } else if (nd->nd_flag & ND_NFSV3) { 886 probe_id = nfscl_nfs3_done_probes[procnum]; 887 probe_procnum = procnum; 888 } else { 889 probe_id = nfscl_nfs2_done_probes[nd->nd_procnum]; 890 probe_procnum = procnum; 891 } 892 if (probe_id != 0) 893 (dtrace_nfscl_nfs234_done_probe)(probe_id, vp, 894 nd->nd_mreq, cred, probe_procnum, 0); 895 } 896 #endif 897 898 m_freem(nd->nd_mreq); 899 AUTH_DESTROY(auth); 900 if (rep != NULL) 901 FREE((caddr_t)rep, M_NFSDREQ); 902 if (set_sigset) 903 newnfs_restore_sigmask(td, &oldset); 904 return (0); 905 nfsmout: 906 mbuf_freem(nd->nd_mrep); 907 mbuf_freem(nd->nd_mreq); 908 AUTH_DESTROY(auth); 909 if (rep != NULL) 910 FREE((caddr_t)rep, M_NFSDREQ); 911 if (set_sigset) 912 newnfs_restore_sigmask(td, &oldset); 913 return (error); 914 } 915 916 /* 917 * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and 918 * wait for all requests to complete. This is used by forced unmounts 919 * to terminate any outstanding RPCs. 920 */ 921 int 922 newnfs_nmcancelreqs(struct nfsmount *nmp) 923 { 924 925 if (nmp->nm_sockreq.nr_client != NULL) 926 CLNT_CLOSE(nmp->nm_sockreq.nr_client); 927 return (0); 928 } 929 930 /* 931 * Any signal that can interrupt an NFS operation in an intr mount 932 * should be added to this set. SIGSTOP and SIGKILL cannot be masked. 933 */ 934 int newnfs_sig_set[] = { 935 SIGINT, 936 SIGTERM, 937 SIGHUP, 938 SIGKILL, 939 SIGSTOP, 940 SIGQUIT 941 }; 942 943 /* 944 * Check to see if one of the signals in our subset is pending on 945 * the process (in an intr mount). 946 */ 947 static int 948 nfs_sig_pending(sigset_t set) 949 { 950 int i; 951 952 for (i = 0 ; i < sizeof(newnfs_sig_set)/sizeof(int) ; i++) 953 if (SIGISMEMBER(set, newnfs_sig_set[i])) 954 return (1); 955 return (0); 956 } 957 958 /* 959 * The set/restore sigmask functions are used to (temporarily) overwrite 960 * the process p_sigmask during an RPC call (for example). These are also 961 * used in other places in the NFS client that might tsleep(). 962 */ 963 void 964 newnfs_set_sigmask(struct thread *td, sigset_t *oldset) 965 { 966 sigset_t newset; 967 int i; 968 struct proc *p; 969 970 SIGFILLSET(newset); 971 if (td == NULL) 972 td = curthread; /* XXX */ 973 p = td->td_proc; 974 /* Remove the NFS set of signals from newset */ 975 PROC_LOCK(p); 976 mtx_lock(&p->p_sigacts->ps_mtx); 977 for (i = 0 ; i < sizeof(newnfs_sig_set)/sizeof(int) ; i++) { 978 /* 979 * But make sure we leave the ones already masked 980 * by the process, ie. remove the signal from the 981 * temporary signalmask only if it wasn't already 982 * in p_sigmask. 983 */ 984 if (!SIGISMEMBER(td->td_sigmask, newnfs_sig_set[i]) && 985 !SIGISMEMBER(p->p_sigacts->ps_sigignore, newnfs_sig_set[i])) 986 SIGDELSET(newset, newnfs_sig_set[i]); 987 } 988 mtx_unlock(&p->p_sigacts->ps_mtx); 989 PROC_UNLOCK(p); 990 kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, 0); 991 } 992 993 void 994 newnfs_restore_sigmask(struct thread *td, sigset_t *set) 995 { 996 if (td == NULL) 997 td = curthread; /* XXX */ 998 kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0); 999 } 1000 1001 /* 1002 * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the 1003 * old one after msleep() returns. 1004 */ 1005 int 1006 newnfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo) 1007 { 1008 sigset_t oldset; 1009 int error; 1010 struct proc *p; 1011 1012 if ((priority & PCATCH) == 0) 1013 return msleep(ident, mtx, priority, wmesg, timo); 1014 if (td == NULL) 1015 td = curthread; /* XXX */ 1016 newnfs_set_sigmask(td, &oldset); 1017 error = msleep(ident, mtx, priority, wmesg, timo); 1018 newnfs_restore_sigmask(td, &oldset); 1019 p = td->td_proc; 1020 return (error); 1021 } 1022 1023 /* 1024 * Test for a termination condition pending on the process. 1025 * This is used for NFSMNT_INT mounts. 1026 */ 1027 int 1028 newnfs_sigintr(struct nfsmount *nmp, struct thread *td) 1029 { 1030 struct proc *p; 1031 sigset_t tmpset; 1032 1033 /* Terminate all requests while attempting a forced unmount. */ 1034 if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) 1035 return (EIO); 1036 if (!(nmp->nm_flag & NFSMNT_INT)) 1037 return (0); 1038 if (td == NULL) 1039 return (0); 1040 p = td->td_proc; 1041 PROC_LOCK(p); 1042 tmpset = p->p_siglist; 1043 SIGSETOR(tmpset, td->td_siglist); 1044 SIGSETNAND(tmpset, td->td_sigmask); 1045 mtx_lock(&p->p_sigacts->ps_mtx); 1046 SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); 1047 mtx_unlock(&p->p_sigacts->ps_mtx); 1048 if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) 1049 && nfs_sig_pending(tmpset)) { 1050 PROC_UNLOCK(p); 1051 return (EINTR); 1052 } 1053 PROC_UNLOCK(p); 1054 return (0); 1055 } 1056 1057 static int 1058 nfs_msg(struct thread *td, const char *server, const char *msg, int error) 1059 { 1060 struct proc *p; 1061 1062 p = td ? td->td_proc : NULL; 1063 if (error) { 1064 tprintf(p, LOG_INFO, "newnfs server %s: %s, error %d\n", 1065 server, msg, error); 1066 } else { 1067 tprintf(p, LOG_INFO, "newnfs server %s: %s\n", server, msg); 1068 } 1069 return (0); 1070 } 1071 1072 static void 1073 nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg, 1074 int error, int flags) 1075 { 1076 if (nmp == NULL) 1077 return; 1078 mtx_lock(&nmp->nm_mtx); 1079 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) { 1080 nmp->nm_state |= NFSSTA_TIMEO; 1081 mtx_unlock(&nmp->nm_mtx); 1082 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1083 VQ_NOTRESP, 0); 1084 } else 1085 mtx_unlock(&nmp->nm_mtx); 1086 mtx_lock(&nmp->nm_mtx); 1087 if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) { 1088 nmp->nm_state |= NFSSTA_LOCKTIMEO; 1089 mtx_unlock(&nmp->nm_mtx); 1090 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1091 VQ_NOTRESPLOCK, 0); 1092 } else 1093 mtx_unlock(&nmp->nm_mtx); 1094 nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); 1095 } 1096 1097 static void 1098 nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg, 1099 int flags, int tprintfmsg) 1100 { 1101 if (nmp == NULL) 1102 return; 1103 if (tprintfmsg) { 1104 nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); 1105 } 1106 1107 mtx_lock(&nmp->nm_mtx); 1108 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) { 1109 nmp->nm_state &= ~NFSSTA_TIMEO; 1110 mtx_unlock(&nmp->nm_mtx); 1111 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1112 VQ_NOTRESP, 1); 1113 } else 1114 mtx_unlock(&nmp->nm_mtx); 1115 1116 mtx_lock(&nmp->nm_mtx); 1117 if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) { 1118 nmp->nm_state &= ~NFSSTA_LOCKTIMEO; 1119 mtx_unlock(&nmp->nm_mtx); 1120 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1121 VQ_NOTRESPLOCK, 1); 1122 } else 1123 mtx_unlock(&nmp->nm_mtx); 1124 } 1125 1126