1 /*- 2 * Copyright (c) 1989, 1991, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 /* 38 * Socket operations for use by nfs 39 */ 40 41 #include "opt_kdtrace.h" 42 #include "opt_kgssapi.h" 43 #include "opt_nfs.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/kernel.h> 48 #include <sys/limits.h> 49 #include <sys/lock.h> 50 #include <sys/malloc.h> 51 #include <sys/mbuf.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/signalvar.h> 56 #include <sys/syscallsubr.h> 57 #include <sys/sysctl.h> 58 #include <sys/syslog.h> 59 #include <sys/vnode.h> 60 61 #include <rpc/rpc.h> 62 63 #include <kgssapi/krb5/kcrypto.h> 64 65 #include <fs/nfs/nfsport.h> 66 67 #ifdef KDTRACE_HOOKS 68 #include <sys/dtrace_bsd.h> 69 70 dtrace_nfsclient_nfs23_start_probe_func_t 71 dtrace_nfscl_nfs234_start_probe; 72 73 dtrace_nfsclient_nfs23_done_probe_func_t 74 dtrace_nfscl_nfs234_done_probe; 75 76 /* 77 * Registered probes by RPC type. 78 */ 79 uint32_t nfscl_nfs2_start_probes[NFS_NPROCS + 1]; 80 uint32_t nfscl_nfs2_done_probes[NFS_NPROCS + 1]; 81 82 uint32_t nfscl_nfs3_start_probes[NFS_NPROCS + 1]; 83 uint32_t nfscl_nfs3_done_probes[NFS_NPROCS + 1]; 84 85 uint32_t nfscl_nfs4_start_probes[NFS_NPROCS + 1]; 86 uint32_t nfscl_nfs4_done_probes[NFS_NPROCS + 1]; 87 #endif 88 89 NFSSTATESPINLOCK; 90 NFSREQSPINLOCK; 91 extern struct nfsstats newnfsstats; 92 extern struct nfsreqhead nfsd_reqq; 93 extern int nfscl_ticks; 94 extern void (*ncl_call_invalcaches)(struct vnode *); 95 96 static int nfsrv_gsscallbackson = 0; 97 static int nfs_bufpackets = 4; 98 static int nfs_reconnects; 99 static int nfs3_jukebox_delay = 10; 100 static int nfs_skip_wcc_data_onerr = 1; 101 static int nfs_keytab_enctype = ETYPE_DES_CBC_CRC; 102 103 SYSCTL_DECL(_vfs_nfs); 104 105 SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, 106 "Buffer reservation size 2 < x < 64"); 107 SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0, 108 "Number of times the nfs client has had to reconnect"); 109 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0, 110 "Number of seconds to delay a retry after receiving EJUKEBOX"); 111 SYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0, 112 "Disable weak cache consistency checking when server returns an error"); 113 SYSCTL_INT(_vfs_nfs, OID_AUTO, keytab_enctype, CTLFLAG_RW, &nfs_keytab_enctype, 0, 114 "Encryption type for the keytab entry used by nfs"); 115 116 static void nfs_down(struct nfsmount *, struct thread *, const char *, 117 int, int); 118 static void nfs_up(struct nfsmount *, struct thread *, const char *, 119 int, int); 120 static int nfs_msg(struct thread *, const char *, const char *, int); 121 122 struct nfs_cached_auth { 123 int ca_refs; /* refcount, including 1 from the cache */ 124 uid_t ca_uid; /* uid that corresponds to this auth */ 125 AUTH *ca_auth; /* RPC auth handle */ 126 }; 127 128 static int nfsv2_procid[NFS_V3NPROCS] = { 129 NFSV2PROC_NULL, 130 NFSV2PROC_GETATTR, 131 NFSV2PROC_SETATTR, 132 NFSV2PROC_LOOKUP, 133 NFSV2PROC_NOOP, 134 NFSV2PROC_READLINK, 135 NFSV2PROC_READ, 136 NFSV2PROC_WRITE, 137 NFSV2PROC_CREATE, 138 NFSV2PROC_MKDIR, 139 NFSV2PROC_SYMLINK, 140 NFSV2PROC_CREATE, 141 NFSV2PROC_REMOVE, 142 NFSV2PROC_RMDIR, 143 NFSV2PROC_RENAME, 144 NFSV2PROC_LINK, 145 NFSV2PROC_READDIR, 146 NFSV2PROC_NOOP, 147 NFSV2PROC_STATFS, 148 NFSV2PROC_NOOP, 149 NFSV2PROC_NOOP, 150 NFSV2PROC_NOOP, 151 }; 152 153 /* 154 * Initialize sockets and congestion for a new NFS connection. 155 * We do not free the sockaddr if error. 156 */ 157 int 158 newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp, 159 struct ucred *cred, NFSPROC_T *p, int callback_retry_mult) 160 { 161 int rcvreserve, sndreserve; 162 int pktscale; 163 struct sockaddr *saddr; 164 struct ucred *origcred; 165 CLIENT *client; 166 struct netconfig *nconf; 167 struct socket *so; 168 int one = 1, retries, error = 0; 169 struct thread *td = curthread; 170 struct timeval timo; 171 172 /* 173 * We need to establish the socket using the credentials of 174 * the mountpoint. Some parts of this process (such as 175 * sobind() and soconnect()) will use the curent thread's 176 * credential instead of the socket credential. To work 177 * around this, temporarily change the current thread's 178 * credential to that of the mountpoint. 179 * 180 * XXX: It would be better to explicitly pass the correct 181 * credential to sobind() and soconnect(). 182 */ 183 origcred = td->td_ucred; 184 185 /* 186 * Use the credential in nr_cred, if not NULL. 187 */ 188 if (nrp->nr_cred != NULL) 189 td->td_ucred = nrp->nr_cred; 190 else 191 td->td_ucred = cred; 192 saddr = nrp->nr_nam; 193 194 if (saddr->sa_family == AF_INET) 195 if (nrp->nr_sotype == SOCK_DGRAM) 196 nconf = getnetconfigent("udp"); 197 else 198 nconf = getnetconfigent("tcp"); 199 else 200 if (nrp->nr_sotype == SOCK_DGRAM) 201 nconf = getnetconfigent("udp6"); 202 else 203 nconf = getnetconfigent("tcp6"); 204 205 pktscale = nfs_bufpackets; 206 if (pktscale < 2) 207 pktscale = 2; 208 if (pktscale > 64) 209 pktscale = 64; 210 /* 211 * soreserve() can fail if sb_max is too small, so shrink pktscale 212 * and try again if there is an error. 213 * Print a log message suggesting increasing sb_max. 214 * Creating a socket and doing this is necessary since, if the 215 * reservation sizes are too large and will make soreserve() fail, 216 * the connection will work until a large send is attempted and 217 * then it will loop in the krpc code. 218 */ 219 so = NULL; 220 saddr = NFSSOCKADDR(nrp->nr_nam, struct sockaddr *); 221 error = socreate(saddr->sa_family, &so, nrp->nr_sotype, 222 nrp->nr_soproto, td->td_ucred, td); 223 if (error) { 224 td->td_ucred = origcred; 225 goto out; 226 } 227 do { 228 if (error != 0 && pktscale > 2) 229 pktscale--; 230 if (nrp->nr_sotype == SOCK_DGRAM) { 231 if (nmp != NULL) { 232 sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * 233 pktscale; 234 rcvreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * 235 pktscale; 236 } else { 237 sndreserve = rcvreserve = 1024 * pktscale; 238 } 239 } else { 240 if (nrp->nr_sotype != SOCK_STREAM) 241 panic("nfscon sotype"); 242 if (nmp != NULL) { 243 sndreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + 244 sizeof (u_int32_t)) * pktscale; 245 rcvreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + 246 sizeof (u_int32_t)) * pktscale; 247 } else { 248 sndreserve = rcvreserve = 1024 * pktscale; 249 } 250 } 251 error = soreserve(so, sndreserve, rcvreserve); 252 } while (error != 0 && pktscale > 2); 253 soclose(so); 254 if (error) { 255 td->td_ucred = origcred; 256 goto out; 257 } 258 259 client = clnt_reconnect_create(nconf, saddr, nrp->nr_prog, 260 nrp->nr_vers, sndreserve, rcvreserve); 261 CLNT_CONTROL(client, CLSET_WAITCHAN, "newnfsreq"); 262 if (nmp != NULL) { 263 if ((nmp->nm_flag & NFSMNT_INT)) 264 CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one); 265 if ((nmp->nm_flag & NFSMNT_RESVPORT)) 266 CLNT_CONTROL(client, CLSET_PRIVPORT, &one); 267 if (NFSHASSOFT(nmp)) { 268 if (nmp->nm_sotype == SOCK_DGRAM) 269 /* 270 * For UDP, the large timeout for a reconnect 271 * will be set to "nm_retry * nm_timeo / 2", so 272 * we only want to do 2 reconnect timeout 273 * retries. 274 */ 275 retries = 2; 276 else 277 retries = nmp->nm_retry; 278 } else 279 retries = INT_MAX; 280 } else { 281 /* 282 * Three cases: 283 * - Null RPC callback to client 284 * - Non-Null RPC callback to client, wait a little longer 285 * - upcalls to nfsuserd and gssd (clp == NULL) 286 */ 287 if (callback_retry_mult == 0) { 288 retries = NFSV4_UPCALLRETRY; 289 CLNT_CONTROL(client, CLSET_PRIVPORT, &one); 290 } else { 291 retries = NFSV4_CALLBACKRETRY * callback_retry_mult; 292 } 293 } 294 CLNT_CONTROL(client, CLSET_RETRIES, &retries); 295 296 if (nmp != NULL) { 297 /* 298 * For UDP, there are 2 timeouts: 299 * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer 300 * that does a retransmit of an RPC request using the same 301 * socket and xid. This is what you normally want to do, 302 * since NFS servers depend on "same xid" for their 303 * Duplicate Request Cache. 304 * - timeout specified in CLNT_CALL_MBUF(), which specifies when 305 * retransmits on the same socket should fail and a fresh 306 * socket created. Each of these timeouts counts as one 307 * CLSET_RETRIES as set above. 308 * Set the initial retransmit timeout for UDP. This timeout 309 * doesn't exist for TCP and the following call just fails, 310 * which is ok. 311 */ 312 timo.tv_sec = nmp->nm_timeo / NFS_HZ; 313 timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ; 314 CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo); 315 } 316 317 mtx_lock(&nrp->nr_mtx); 318 if (nrp->nr_client != NULL) { 319 /* 320 * Someone else already connected. 321 */ 322 CLNT_RELEASE(client); 323 } else { 324 nrp->nr_client = client; 325 } 326 327 /* 328 * Protocols that do not require connections may be optionally left 329 * unconnected for servers that reply from a port other than NFS_PORT. 330 */ 331 if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) { 332 mtx_unlock(&nrp->nr_mtx); 333 CLNT_CONTROL(client, CLSET_CONNECT, &one); 334 } else { 335 mtx_unlock(&nrp->nr_mtx); 336 } 337 338 /* Restore current thread's credentials. */ 339 td->td_ucred = origcred; 340 341 out: 342 NFSEXITCODE(error); 343 return (error); 344 } 345 346 /* 347 * NFS disconnect. Clean up and unlink. 348 */ 349 void 350 newnfs_disconnect(struct nfssockreq *nrp) 351 { 352 CLIENT *client; 353 354 mtx_lock(&nrp->nr_mtx); 355 if (nrp->nr_client != NULL) { 356 client = nrp->nr_client; 357 nrp->nr_client = NULL; 358 mtx_unlock(&nrp->nr_mtx); 359 rpc_gss_secpurge_call(client); 360 CLNT_CLOSE(client); 361 CLNT_RELEASE(client); 362 } else { 363 mtx_unlock(&nrp->nr_mtx); 364 } 365 } 366 367 static AUTH * 368 nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal, 369 char *srv_principal, gss_OID mech_oid, struct ucred *cred) 370 { 371 rpc_gss_service_t svc; 372 AUTH *auth; 373 #ifdef notyet 374 rpc_gss_options_req_t req_options; 375 #endif 376 377 switch (secflavour) { 378 case RPCSEC_GSS_KRB5: 379 case RPCSEC_GSS_KRB5I: 380 case RPCSEC_GSS_KRB5P: 381 if (!mech_oid) { 382 if (!rpc_gss_mech_to_oid_call("kerberosv5", &mech_oid)) 383 return (NULL); 384 } 385 if (secflavour == RPCSEC_GSS_KRB5) 386 svc = rpc_gss_svc_none; 387 else if (secflavour == RPCSEC_GSS_KRB5I) 388 svc = rpc_gss_svc_integrity; 389 else 390 svc = rpc_gss_svc_privacy; 391 #ifdef notyet 392 req_options.req_flags = GSS_C_MUTUAL_FLAG; 393 req_options.time_req = 0; 394 req_options.my_cred = GSS_C_NO_CREDENTIAL; 395 req_options.input_channel_bindings = NULL; 396 req_options.enc_type = nfs_keytab_enctype; 397 398 auth = rpc_gss_secfind_call(nrp->nr_client, cred, 399 clnt_principal, srv_principal, mech_oid, svc, 400 &req_options); 401 #else 402 /* 403 * Until changes to the rpcsec_gss code are committed, 404 * there is no support for host based initiator 405 * principals. As such, that case cannot yet be handled. 406 */ 407 if (clnt_principal == NULL) 408 auth = rpc_gss_secfind_call(nrp->nr_client, cred, 409 srv_principal, mech_oid, svc); 410 else 411 auth = NULL; 412 #endif 413 if (auth != NULL) 414 return (auth); 415 /* fallthrough */ 416 case AUTH_SYS: 417 default: 418 return (authunix_create(cred)); 419 420 } 421 } 422 423 /* 424 * Callback from the RPC code to generate up/down notifications. 425 */ 426 427 struct nfs_feedback_arg { 428 struct nfsmount *nf_mount; 429 int nf_lastmsg; /* last tprintf */ 430 int nf_tprintfmsg; 431 struct thread *nf_td; 432 }; 433 434 static void 435 nfs_feedback(int type, int proc, void *arg) 436 { 437 struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg; 438 struct nfsmount *nmp = nf->nf_mount; 439 struct timeval now; 440 441 getmicrouptime(&now); 442 443 switch (type) { 444 case FEEDBACK_REXMIT2: 445 case FEEDBACK_RECONNECT: 446 if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) { 447 nfs_down(nmp, nf->nf_td, 448 "not responding", 0, NFSSTA_TIMEO); 449 nf->nf_tprintfmsg = TRUE; 450 nf->nf_lastmsg = now.tv_sec; 451 } 452 break; 453 454 case FEEDBACK_OK: 455 nfs_up(nf->nf_mount, nf->nf_td, 456 "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg); 457 break; 458 } 459 } 460 461 /* 462 * newnfs_request - goes something like this 463 * - does the rpc by calling the krpc layer 464 * - break down rpc header and return with nfs reply 465 * nb: always frees up nd_mreq mbuf list 466 */ 467 int 468 newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, 469 struct nfsclient *clp, struct nfssockreq *nrp, vnode_t vp, 470 struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers, 471 u_char *retsum, int toplevel, u_int64_t *xidp) 472 { 473 u_int32_t *tl; 474 time_t waituntil; 475 int i, j, set_sigset = 0, timeo; 476 int trycnt, error = 0, usegssname = 0, secflavour = AUTH_SYS; 477 u_int16_t procnum; 478 u_int trylater_delay = 1; 479 struct nfs_feedback_arg nf; 480 struct timeval timo, now; 481 AUTH *auth; 482 struct rpc_callextra ext; 483 enum clnt_stat stat; 484 struct nfsreq *rep = NULL; 485 char *srv_principal = NULL; 486 sigset_t oldset; 487 struct ucred *authcred; 488 489 if (xidp != NULL) 490 *xidp = 0; 491 /* Reject requests while attempting a forced unmount. */ 492 if (nmp != NULL && (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)) { 493 m_freem(nd->nd_mreq); 494 return (ESTALE); 495 } 496 497 /* 498 * Set authcred, which is used to acquire RPC credentials to 499 * the cred argument, by default. The crhold() should not be 500 * necessary, but will ensure that some future code change 501 * doesn't result in the credential being free'd prematurely. 502 */ 503 authcred = crhold(cred); 504 505 /* For client side interruptible mounts, mask off the signals. */ 506 if (nmp != NULL && td != NULL && NFSHASINT(nmp)) { 507 newnfs_set_sigmask(td, &oldset); 508 set_sigset = 1; 509 } 510 511 /* 512 * XXX if not already connected call nfs_connect now. Longer 513 * term, change nfs_mount to call nfs_connect unconditionally 514 * and let clnt_reconnect_create handle reconnects. 515 */ 516 if (nrp->nr_client == NULL) 517 newnfs_connect(nmp, nrp, cred, td, 0); 518 519 /* 520 * For a client side mount, nmp is != NULL and clp == NULL. For 521 * server calls (callbacks or upcalls), nmp == NULL. 522 */ 523 if (clp != NULL) { 524 NFSLOCKSTATE(); 525 if ((clp->lc_flags & LCL_GSS) && nfsrv_gsscallbackson) { 526 secflavour = RPCSEC_GSS_KRB5; 527 if (nd->nd_procnum != NFSPROC_NULL) { 528 if (clp->lc_flags & LCL_GSSINTEGRITY) 529 secflavour = RPCSEC_GSS_KRB5I; 530 else if (clp->lc_flags & LCL_GSSPRIVACY) 531 secflavour = RPCSEC_GSS_KRB5P; 532 } 533 } 534 NFSUNLOCKSTATE(); 535 } else if (nmp != NULL && NFSHASKERB(nmp) && 536 nd->nd_procnum != NFSPROC_NULL) { 537 if (NFSHASALLGSSNAME(nmp) && nmp->nm_krbnamelen > 0) 538 nd->nd_flag |= ND_USEGSSNAME; 539 if ((nd->nd_flag & ND_USEGSSNAME) != 0) { 540 /* 541 * If there is a client side host based credential, 542 * use that, otherwise use the system uid, if set. 543 * The system uid is in the nmp->nm_sockreq.nr_cred 544 * credentials. 545 */ 546 if (nmp->nm_krbnamelen > 0) { 547 usegssname = 1; 548 } else if (nmp->nm_uid != (uid_t)-1) { 549 KASSERT(nmp->nm_sockreq.nr_cred != NULL, 550 ("newnfs_request: NULL nr_cred")); 551 crfree(authcred); 552 authcred = crhold(nmp->nm_sockreq.nr_cred); 553 } 554 } else if (nmp->nm_krbnamelen == 0 && 555 nmp->nm_uid != (uid_t)-1 && cred->cr_uid == (uid_t)0) { 556 /* 557 * If there is no host based principal name and 558 * the system uid is set and this is root, use the 559 * system uid, since root won't have user 560 * credentials in a credentials cache file. 561 * The system uid is in the nmp->nm_sockreq.nr_cred 562 * credentials. 563 */ 564 KASSERT(nmp->nm_sockreq.nr_cred != NULL, 565 ("newnfs_request: NULL nr_cred")); 566 crfree(authcred); 567 authcred = crhold(nmp->nm_sockreq.nr_cred); 568 } 569 if (NFSHASINTEGRITY(nmp)) 570 secflavour = RPCSEC_GSS_KRB5I; 571 else if (NFSHASPRIVACY(nmp)) 572 secflavour = RPCSEC_GSS_KRB5P; 573 else 574 secflavour = RPCSEC_GSS_KRB5; 575 srv_principal = NFSMNT_SRVKRBNAME(nmp); 576 } else if (nmp != NULL && !NFSHASKERB(nmp) && 577 nd->nd_procnum != NFSPROC_NULL && 578 (nd->nd_flag & ND_USEGSSNAME) != 0) { 579 /* 580 * Use the uid that did the mount when the RPC is doing 581 * NFSv4 system operations, as indicated by the 582 * ND_USEGSSNAME flag, for the AUTH_SYS case. 583 * The credentials in nm_sockreq.nr_cred were used for the 584 * mount. 585 */ 586 KASSERT(nmp->nm_sockreq.nr_cred != NULL, 587 ("newnfs_request: NULL nr_cred")); 588 crfree(authcred); 589 authcred = crhold(nmp->nm_sockreq.nr_cred); 590 } 591 592 if (nmp != NULL) { 593 bzero(&nf, sizeof(struct nfs_feedback_arg)); 594 nf.nf_mount = nmp; 595 nf.nf_td = td; 596 getmicrouptime(&now); 597 nf.nf_lastmsg = now.tv_sec - 598 ((nmp->nm_tprintf_delay)-(nmp->nm_tprintf_initial_delay)); 599 } 600 601 if (nd->nd_procnum == NFSPROC_NULL) 602 auth = authnone_create(); 603 else if (usegssname) 604 auth = nfs_getauth(nrp, secflavour, nmp->nm_krbname, 605 srv_principal, NULL, authcred); 606 else 607 auth = nfs_getauth(nrp, secflavour, NULL, 608 srv_principal, NULL, authcred); 609 crfree(authcred); 610 if (auth == NULL) { 611 m_freem(nd->nd_mreq); 612 if (set_sigset) 613 newnfs_restore_sigmask(td, &oldset); 614 return (EACCES); 615 } 616 bzero(&ext, sizeof(ext)); 617 ext.rc_auth = auth; 618 if (nmp != NULL) { 619 ext.rc_feedback = nfs_feedback; 620 ext.rc_feedback_arg = &nf; 621 } 622 623 procnum = nd->nd_procnum; 624 if ((nd->nd_flag & ND_NFSV4) && 625 nd->nd_procnum != NFSPROC_NULL && 626 nd->nd_procnum != NFSV4PROC_CBCOMPOUND) 627 procnum = NFSV4PROC_COMPOUND; 628 629 if (nmp != NULL) { 630 NFSINCRGLOBAL(newnfsstats.rpcrequests); 631 632 /* Map the procnum to the old NFSv2 one, as required. */ 633 if ((nd->nd_flag & ND_NFSV2) != 0) { 634 if (nd->nd_procnum < NFS_V3NPROCS) 635 procnum = nfsv2_procid[nd->nd_procnum]; 636 else 637 procnum = NFSV2PROC_NOOP; 638 } 639 640 /* 641 * Now only used for the R_DONTRECOVER case, but until that is 642 * supported within the krpc code, I need to keep a queue of 643 * outstanding RPCs for nfsv4 client requests. 644 */ 645 if ((nd->nd_flag & ND_NFSV4) && procnum == NFSV4PROC_COMPOUND) 646 MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), 647 M_NFSDREQ, M_WAITOK); 648 #ifdef KDTRACE_HOOKS 649 if (dtrace_nfscl_nfs234_start_probe != NULL) { 650 uint32_t probe_id; 651 int probe_procnum; 652 653 if (nd->nd_flag & ND_NFSV4) { 654 probe_id = 655 nfscl_nfs4_start_probes[nd->nd_procnum]; 656 probe_procnum = nd->nd_procnum; 657 } else if (nd->nd_flag & ND_NFSV3) { 658 probe_id = nfscl_nfs3_start_probes[procnum]; 659 probe_procnum = procnum; 660 } else { 661 probe_id = 662 nfscl_nfs2_start_probes[nd->nd_procnum]; 663 probe_procnum = procnum; 664 } 665 if (probe_id != 0) 666 (dtrace_nfscl_nfs234_start_probe) 667 (probe_id, vp, nd->nd_mreq, cred, 668 probe_procnum); 669 } 670 #endif 671 } 672 trycnt = 0; 673 tryagain: 674 /* 675 * This timeout specifies when a new socket should be created, 676 * along with new xid values. For UDP, this should be done 677 * infrequently, since retransmits of RPC requests should normally 678 * use the same xid. 679 */ 680 if (nmp == NULL) { 681 timo.tv_usec = 0; 682 if (clp == NULL) 683 timo.tv_sec = NFSV4_UPCALLTIMEO; 684 else 685 timo.tv_sec = NFSV4_CALLBACKTIMEO; 686 } else { 687 if (nrp->nr_sotype != SOCK_DGRAM) { 688 timo.tv_usec = 0; 689 if ((nmp->nm_flag & NFSMNT_NFSV4)) 690 timo.tv_sec = INT_MAX; 691 else 692 timo.tv_sec = NFS_TCPTIMEO; 693 } else { 694 if (NFSHASSOFT(nmp)) { 695 /* 696 * CLSET_RETRIES is set to 2, so this should be 697 * half of the total timeout required. 698 */ 699 timeo = nmp->nm_retry * nmp->nm_timeo / 2; 700 if (timeo < 1) 701 timeo = 1; 702 timo.tv_sec = timeo / NFS_HZ; 703 timo.tv_usec = (timeo % NFS_HZ) * 1000000 / 704 NFS_HZ; 705 } else { 706 /* For UDP hard mounts, use a large value. */ 707 timo.tv_sec = NFS_MAXTIMEO / NFS_HZ; 708 timo.tv_usec = 0; 709 } 710 } 711 712 if (rep != NULL) { 713 rep->r_flags = 0; 714 rep->r_nmp = nmp; 715 /* 716 * Chain request into list of outstanding requests. 717 */ 718 NFSLOCKREQ(); 719 TAILQ_INSERT_TAIL(&nfsd_reqq, rep, r_chain); 720 NFSUNLOCKREQ(); 721 } 722 } 723 724 nd->nd_mrep = NULL; 725 stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum, nd->nd_mreq, 726 &nd->nd_mrep, timo); 727 728 if (rep != NULL) { 729 /* 730 * RPC done, unlink the request. 731 */ 732 NFSLOCKREQ(); 733 TAILQ_REMOVE(&nfsd_reqq, rep, r_chain); 734 NFSUNLOCKREQ(); 735 } 736 737 /* 738 * If there was a successful reply and a tprintf msg. 739 * tprintf a response. 740 */ 741 if (stat == RPC_SUCCESS) { 742 error = 0; 743 } else if (stat == RPC_TIMEDOUT) { 744 error = ETIMEDOUT; 745 } else if (stat == RPC_VERSMISMATCH) { 746 error = EOPNOTSUPP; 747 } else if (stat == RPC_PROGVERSMISMATCH) { 748 error = EPROTONOSUPPORT; 749 } else { 750 error = EACCES; 751 } 752 if (error) { 753 m_freem(nd->nd_mreq); 754 AUTH_DESTROY(auth); 755 if (rep != NULL) 756 FREE((caddr_t)rep, M_NFSDREQ); 757 if (set_sigset) 758 newnfs_restore_sigmask(td, &oldset); 759 return (error); 760 } 761 762 KASSERT(nd->nd_mrep != NULL, ("mrep shouldn't be NULL if no error\n")); 763 764 /* 765 * Search for any mbufs that are not a multiple of 4 bytes long 766 * or with m_data not longword aligned. 767 * These could cause pointer alignment problems, so copy them to 768 * well aligned mbufs. 769 */ 770 newnfs_realign(&nd->nd_mrep); 771 nd->nd_md = nd->nd_mrep; 772 nd->nd_dpos = NFSMTOD(nd->nd_md, caddr_t); 773 nd->nd_repstat = 0; 774 if (nd->nd_procnum != NFSPROC_NULL) { 775 /* 776 * and now the actual NFS xdr. 777 */ 778 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 779 nd->nd_repstat = fxdr_unsigned(u_int32_t, *tl); 780 if (nd->nd_repstat != 0) { 781 if (((nd->nd_repstat == NFSERR_DELAY || 782 nd->nd_repstat == NFSERR_GRACE) && 783 (nd->nd_flag & ND_NFSV4) && 784 nd->nd_procnum != NFSPROC_DELEGRETURN && 785 nd->nd_procnum != NFSPROC_SETATTR && 786 nd->nd_procnum != NFSPROC_READ && 787 nd->nd_procnum != NFSPROC_WRITE && 788 nd->nd_procnum != NFSPROC_OPEN && 789 nd->nd_procnum != NFSPROC_CREATE && 790 nd->nd_procnum != NFSPROC_OPENCONFIRM && 791 nd->nd_procnum != NFSPROC_OPENDOWNGRADE && 792 nd->nd_procnum != NFSPROC_CLOSE && 793 nd->nd_procnum != NFSPROC_LOCK && 794 nd->nd_procnum != NFSPROC_LOCKU) || 795 (nd->nd_repstat == NFSERR_DELAY && 796 (nd->nd_flag & ND_NFSV4) == 0) || 797 nd->nd_repstat == NFSERR_RESOURCE) { 798 if (trylater_delay > NFS_TRYLATERDEL) 799 trylater_delay = NFS_TRYLATERDEL; 800 waituntil = NFSD_MONOSEC + trylater_delay; 801 while (NFSD_MONOSEC < waituntil) 802 (void) nfs_catnap(PZERO, 0, "nfstry"); 803 trylater_delay *= 2; 804 m_freem(nd->nd_mrep); 805 nd->nd_mrep = NULL; 806 goto tryagain; 807 } 808 809 /* 810 * If the File Handle was stale, invalidate the 811 * lookup cache, just in case. 812 * (vp != NULL implies a client side call) 813 */ 814 if (nd->nd_repstat == ESTALE && vp != NULL) { 815 cache_purge(vp); 816 if (ncl_call_invalcaches != NULL) 817 (*ncl_call_invalcaches)(vp); 818 } 819 } 820 821 /* 822 * Get rid of the tag, return count, and PUTFH result for V4. 823 */ 824 if (nd->nd_flag & ND_NFSV4) { 825 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 826 i = fxdr_unsigned(int, *tl); 827 error = nfsm_advance(nd, NFSM_RNDUP(i), -1); 828 if (error) 829 goto nfsmout; 830 NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 831 i = fxdr_unsigned(int, *++tl); 832 833 /* 834 * If the first op's status is non-zero, mark that 835 * there is no more data to process. 836 */ 837 if (*++tl) 838 nd->nd_flag |= ND_NOMOREDATA; 839 840 /* 841 * If the first op is Putfh, throw its results away 842 * and toss the op# and status for the first op. 843 */ 844 if (nmp != NULL && i == NFSV4OP_PUTFH && *tl == 0) { 845 NFSM_DISSECT(tl,u_int32_t *,2 * NFSX_UNSIGNED); 846 i = fxdr_unsigned(int, *tl++); 847 j = fxdr_unsigned(int, *tl); 848 /* 849 * All Compounds that do an Op that must 850 * be in sequence consist of NFSV4OP_PUTFH 851 * followed by one of these. As such, we 852 * can determine if the seqid# should be 853 * incremented, here. 854 */ 855 if ((i == NFSV4OP_OPEN || 856 i == NFSV4OP_OPENCONFIRM || 857 i == NFSV4OP_OPENDOWNGRADE || 858 i == NFSV4OP_CLOSE || 859 i == NFSV4OP_LOCK || 860 i == NFSV4OP_LOCKU) && 861 (j == 0 || 862 (j != NFSERR_STALECLIENTID && 863 j != NFSERR_STALESTATEID && 864 j != NFSERR_BADSTATEID && 865 j != NFSERR_BADSEQID && 866 j != NFSERR_BADXDR && 867 j != NFSERR_RESOURCE && 868 j != NFSERR_NOFILEHANDLE))) 869 nd->nd_flag |= ND_INCRSEQID; 870 /* 871 * If the first op's status is non-zero, mark 872 * that there is no more data to process. 873 */ 874 if (j) 875 nd->nd_flag |= ND_NOMOREDATA; 876 } 877 878 /* 879 * If R_DONTRECOVER is set, replace the stale error 880 * reply, so that recovery isn't initiated. 881 */ 882 if ((nd->nd_repstat == NFSERR_STALECLIENTID || 883 nd->nd_repstat == NFSERR_STALESTATEID) && 884 rep != NULL && (rep->r_flags & R_DONTRECOVER)) 885 nd->nd_repstat = NFSERR_STALEDONTRECOVER; 886 } 887 } 888 889 #ifdef KDTRACE_HOOKS 890 if (nmp != NULL && dtrace_nfscl_nfs234_done_probe != NULL) { 891 uint32_t probe_id; 892 int probe_procnum; 893 894 if (nd->nd_flag & ND_NFSV4) { 895 probe_id = nfscl_nfs4_done_probes[nd->nd_procnum]; 896 probe_procnum = nd->nd_procnum; 897 } else if (nd->nd_flag & ND_NFSV3) { 898 probe_id = nfscl_nfs3_done_probes[procnum]; 899 probe_procnum = procnum; 900 } else { 901 probe_id = nfscl_nfs2_done_probes[nd->nd_procnum]; 902 probe_procnum = procnum; 903 } 904 if (probe_id != 0) 905 (dtrace_nfscl_nfs234_done_probe)(probe_id, vp, 906 nd->nd_mreq, cred, probe_procnum, 0); 907 } 908 #endif 909 910 m_freem(nd->nd_mreq); 911 AUTH_DESTROY(auth); 912 if (rep != NULL) 913 FREE((caddr_t)rep, M_NFSDREQ); 914 if (set_sigset) 915 newnfs_restore_sigmask(td, &oldset); 916 return (0); 917 nfsmout: 918 mbuf_freem(nd->nd_mrep); 919 mbuf_freem(nd->nd_mreq); 920 AUTH_DESTROY(auth); 921 if (rep != NULL) 922 FREE((caddr_t)rep, M_NFSDREQ); 923 if (set_sigset) 924 newnfs_restore_sigmask(td, &oldset); 925 return (error); 926 } 927 928 /* 929 * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and 930 * wait for all requests to complete. This is used by forced unmounts 931 * to terminate any outstanding RPCs. 932 */ 933 int 934 newnfs_nmcancelreqs(struct nfsmount *nmp) 935 { 936 937 if (nmp->nm_sockreq.nr_client != NULL) 938 CLNT_CLOSE(nmp->nm_sockreq.nr_client); 939 return (0); 940 } 941 942 /* 943 * Any signal that can interrupt an NFS operation in an intr mount 944 * should be added to this set. SIGSTOP and SIGKILL cannot be masked. 945 */ 946 int newnfs_sig_set[] = { 947 SIGINT, 948 SIGTERM, 949 SIGHUP, 950 SIGKILL, 951 SIGSTOP, 952 SIGQUIT 953 }; 954 955 /* 956 * Check to see if one of the signals in our subset is pending on 957 * the process (in an intr mount). 958 */ 959 static int 960 nfs_sig_pending(sigset_t set) 961 { 962 int i; 963 964 for (i = 0 ; i < sizeof(newnfs_sig_set)/sizeof(int) ; i++) 965 if (SIGISMEMBER(set, newnfs_sig_set[i])) 966 return (1); 967 return (0); 968 } 969 970 /* 971 * The set/restore sigmask functions are used to (temporarily) overwrite 972 * the process p_sigmask during an RPC call (for example). These are also 973 * used in other places in the NFS client that might tsleep(). 974 */ 975 void 976 newnfs_set_sigmask(struct thread *td, sigset_t *oldset) 977 { 978 sigset_t newset; 979 int i; 980 struct proc *p; 981 982 SIGFILLSET(newset); 983 if (td == NULL) 984 td = curthread; /* XXX */ 985 p = td->td_proc; 986 /* Remove the NFS set of signals from newset */ 987 PROC_LOCK(p); 988 mtx_lock(&p->p_sigacts->ps_mtx); 989 for (i = 0 ; i < sizeof(newnfs_sig_set)/sizeof(int) ; i++) { 990 /* 991 * But make sure we leave the ones already masked 992 * by the process, ie. remove the signal from the 993 * temporary signalmask only if it wasn't already 994 * in p_sigmask. 995 */ 996 if (!SIGISMEMBER(td->td_sigmask, newnfs_sig_set[i]) && 997 !SIGISMEMBER(p->p_sigacts->ps_sigignore, newnfs_sig_set[i])) 998 SIGDELSET(newset, newnfs_sig_set[i]); 999 } 1000 mtx_unlock(&p->p_sigacts->ps_mtx); 1001 PROC_UNLOCK(p); 1002 kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, 0); 1003 } 1004 1005 void 1006 newnfs_restore_sigmask(struct thread *td, sigset_t *set) 1007 { 1008 if (td == NULL) 1009 td = curthread; /* XXX */ 1010 kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0); 1011 } 1012 1013 /* 1014 * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the 1015 * old one after msleep() returns. 1016 */ 1017 int 1018 newnfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo) 1019 { 1020 sigset_t oldset; 1021 int error; 1022 struct proc *p; 1023 1024 if ((priority & PCATCH) == 0) 1025 return msleep(ident, mtx, priority, wmesg, timo); 1026 if (td == NULL) 1027 td = curthread; /* XXX */ 1028 newnfs_set_sigmask(td, &oldset); 1029 error = msleep(ident, mtx, priority, wmesg, timo); 1030 newnfs_restore_sigmask(td, &oldset); 1031 p = td->td_proc; 1032 return (error); 1033 } 1034 1035 /* 1036 * Test for a termination condition pending on the process. 1037 * This is used for NFSMNT_INT mounts. 1038 */ 1039 int 1040 newnfs_sigintr(struct nfsmount *nmp, struct thread *td) 1041 { 1042 struct proc *p; 1043 sigset_t tmpset; 1044 1045 /* Terminate all requests while attempting a forced unmount. */ 1046 if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) 1047 return (EIO); 1048 if (!(nmp->nm_flag & NFSMNT_INT)) 1049 return (0); 1050 if (td == NULL) 1051 return (0); 1052 p = td->td_proc; 1053 PROC_LOCK(p); 1054 tmpset = p->p_siglist; 1055 SIGSETOR(tmpset, td->td_siglist); 1056 SIGSETNAND(tmpset, td->td_sigmask); 1057 mtx_lock(&p->p_sigacts->ps_mtx); 1058 SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); 1059 mtx_unlock(&p->p_sigacts->ps_mtx); 1060 if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) 1061 && nfs_sig_pending(tmpset)) { 1062 PROC_UNLOCK(p); 1063 return (EINTR); 1064 } 1065 PROC_UNLOCK(p); 1066 return (0); 1067 } 1068 1069 static int 1070 nfs_msg(struct thread *td, const char *server, const char *msg, int error) 1071 { 1072 struct proc *p; 1073 1074 p = td ? td->td_proc : NULL; 1075 if (error) { 1076 tprintf(p, LOG_INFO, "newnfs server %s: %s, error %d\n", 1077 server, msg, error); 1078 } else { 1079 tprintf(p, LOG_INFO, "newnfs server %s: %s\n", server, msg); 1080 } 1081 return (0); 1082 } 1083 1084 static void 1085 nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg, 1086 int error, int flags) 1087 { 1088 if (nmp == NULL) 1089 return; 1090 mtx_lock(&nmp->nm_mtx); 1091 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) { 1092 nmp->nm_state |= NFSSTA_TIMEO; 1093 mtx_unlock(&nmp->nm_mtx); 1094 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1095 VQ_NOTRESP, 0); 1096 } else 1097 mtx_unlock(&nmp->nm_mtx); 1098 mtx_lock(&nmp->nm_mtx); 1099 if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) { 1100 nmp->nm_state |= NFSSTA_LOCKTIMEO; 1101 mtx_unlock(&nmp->nm_mtx); 1102 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1103 VQ_NOTRESPLOCK, 0); 1104 } else 1105 mtx_unlock(&nmp->nm_mtx); 1106 nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); 1107 } 1108 1109 static void 1110 nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg, 1111 int flags, int tprintfmsg) 1112 { 1113 if (nmp == NULL) 1114 return; 1115 if (tprintfmsg) { 1116 nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); 1117 } 1118 1119 mtx_lock(&nmp->nm_mtx); 1120 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) { 1121 nmp->nm_state &= ~NFSSTA_TIMEO; 1122 mtx_unlock(&nmp->nm_mtx); 1123 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1124 VQ_NOTRESP, 1); 1125 } else 1126 mtx_unlock(&nmp->nm_mtx); 1127 1128 mtx_lock(&nmp->nm_mtx); 1129 if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) { 1130 nmp->nm_state &= ~NFSSTA_LOCKTIMEO; 1131 mtx_unlock(&nmp->nm_mtx); 1132 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1133 VQ_NOTRESPLOCK, 1); 1134 } else 1135 mtx_unlock(&nmp->nm_mtx); 1136 } 1137 1138