1 /*- 2 * Copyright (c) 1989, 1991, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 /* 38 * Socket operations for use by nfs 39 */ 40 41 #include "opt_inet6.h" 42 #include "opt_kdtrace.h" 43 #include "opt_kgssapi.h" 44 #include "opt_nfs.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/limits.h> 50 #include <sys/lock.h> 51 #include <sys/malloc.h> 52 #include <sys/mbuf.h> 53 #include <sys/mount.h> 54 #include <sys/mutex.h> 55 #include <sys/proc.h> 56 #include <sys/signalvar.h> 57 #include <sys/syscallsubr.h> 58 #include <sys/sysctl.h> 59 #include <sys/syslog.h> 60 #include <sys/vnode.h> 61 62 #include <rpc/rpc.h> 63 64 #include <kgssapi/krb5/kcrypto.h> 65 66 #include <fs/nfs/nfsport.h> 67 68 #ifdef KDTRACE_HOOKS 69 #include <sys/dtrace_bsd.h> 70 71 dtrace_nfsclient_nfs23_start_probe_func_t 72 dtrace_nfscl_nfs234_start_probe; 73 74 dtrace_nfsclient_nfs23_done_probe_func_t 75 dtrace_nfscl_nfs234_done_probe; 76 77 /* 78 * Registered probes by RPC type. 79 */ 80 uint32_t nfscl_nfs2_start_probes[NFS_NPROCS + 1]; 81 uint32_t nfscl_nfs2_done_probes[NFS_NPROCS + 1]; 82 83 uint32_t nfscl_nfs3_start_probes[NFS_NPROCS + 1]; 84 uint32_t nfscl_nfs3_done_probes[NFS_NPROCS + 1]; 85 86 uint32_t nfscl_nfs4_start_probes[NFS_NPROCS + 1]; 87 uint32_t nfscl_nfs4_done_probes[NFS_NPROCS + 1]; 88 #endif 89 90 NFSSTATESPINLOCK; 91 NFSREQSPINLOCK; 92 extern struct nfsstats newnfsstats; 93 extern struct nfsreqhead nfsd_reqq; 94 extern int nfscl_ticks; 95 extern void (*ncl_call_invalcaches)(struct vnode *); 96 97 static int nfsrv_gsscallbackson = 0; 98 static int nfs_bufpackets = 4; 99 static int nfs_reconnects; 100 static int nfs3_jukebox_delay = 10; 101 static int nfs_skip_wcc_data_onerr = 1; 102 static int nfs_keytab_enctype = ETYPE_DES_CBC_CRC; 103 104 SYSCTL_DECL(_vfs_nfs); 105 106 SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, 107 "Buffer reservation size 2 < x < 64"); 108 SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0, 109 "Number of times the nfs client has had to reconnect"); 110 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0, 111 "Number of seconds to delay a retry after receiving EJUKEBOX"); 112 SYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0, 113 "Disable weak cache consistency checking when server returns an error"); 114 SYSCTL_INT(_vfs_nfs, OID_AUTO, keytab_enctype, CTLFLAG_RW, &nfs_keytab_enctype, 0, 115 "Encryption type for the keytab entry used by nfs"); 116 117 static void nfs_down(struct nfsmount *, struct thread *, const char *, 118 int, int); 119 static void nfs_up(struct nfsmount *, struct thread *, const char *, 120 int, int); 121 static int nfs_msg(struct thread *, const char *, const char *, int); 122 123 struct nfs_cached_auth { 124 int ca_refs; /* refcount, including 1 from the cache */ 125 uid_t ca_uid; /* uid that corresponds to this auth */ 126 AUTH *ca_auth; /* RPC auth handle */ 127 }; 128 129 static int nfsv2_procid[NFS_V3NPROCS] = { 130 NFSV2PROC_NULL, 131 NFSV2PROC_GETATTR, 132 NFSV2PROC_SETATTR, 133 NFSV2PROC_LOOKUP, 134 NFSV2PROC_NOOP, 135 NFSV2PROC_READLINK, 136 NFSV2PROC_READ, 137 NFSV2PROC_WRITE, 138 NFSV2PROC_CREATE, 139 NFSV2PROC_MKDIR, 140 NFSV2PROC_SYMLINK, 141 NFSV2PROC_CREATE, 142 NFSV2PROC_REMOVE, 143 NFSV2PROC_RMDIR, 144 NFSV2PROC_RENAME, 145 NFSV2PROC_LINK, 146 NFSV2PROC_READDIR, 147 NFSV2PROC_NOOP, 148 NFSV2PROC_STATFS, 149 NFSV2PROC_NOOP, 150 NFSV2PROC_NOOP, 151 NFSV2PROC_NOOP, 152 }; 153 154 /* 155 * Initialize sockets and congestion for a new NFS connection. 156 * We do not free the sockaddr if error. 157 */ 158 int 159 newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp, 160 struct ucred *cred, NFSPROC_T *p, int callback_retry_mult) 161 { 162 int rcvreserve, sndreserve; 163 int pktscale; 164 struct sockaddr *saddr; 165 struct ucred *origcred; 166 CLIENT *client; 167 struct netconfig *nconf; 168 struct socket *so; 169 int one = 1, retries, error = 0; 170 struct thread *td = curthread; 171 172 /* 173 * We need to establish the socket using the credentials of 174 * the mountpoint. Some parts of this process (such as 175 * sobind() and soconnect()) will use the curent thread's 176 * credential instead of the socket credential. To work 177 * around this, temporarily change the current thread's 178 * credential to that of the mountpoint. 179 * 180 * XXX: It would be better to explicitly pass the correct 181 * credential to sobind() and soconnect(). 182 */ 183 origcred = td->td_ucred; 184 185 /* 186 * Use the credential in nr_cred, if not NULL. 187 */ 188 if (nrp->nr_cred != NULL) 189 td->td_ucred = nrp->nr_cred; 190 else 191 td->td_ucred = cred; 192 saddr = nrp->nr_nam; 193 194 if (saddr->sa_family == AF_INET) 195 if (nrp->nr_sotype == SOCK_DGRAM) 196 nconf = getnetconfigent("udp"); 197 else 198 nconf = getnetconfigent("tcp"); 199 else 200 if (nrp->nr_sotype == SOCK_DGRAM) 201 nconf = getnetconfigent("udp6"); 202 else 203 nconf = getnetconfigent("tcp6"); 204 205 pktscale = nfs_bufpackets; 206 if (pktscale < 2) 207 pktscale = 2; 208 if (pktscale > 64) 209 pktscale = 64; 210 /* 211 * soreserve() can fail if sb_max is too small, so shrink pktscale 212 * and try again if there is an error. 213 * Print a log message suggesting increasing sb_max. 214 * Creating a socket and doing this is necessary since, if the 215 * reservation sizes are too large and will make soreserve() fail, 216 * the connection will work until a large send is attempted and 217 * then it will loop in the krpc code. 218 */ 219 so = NULL; 220 saddr = NFSSOCKADDR(nrp->nr_nam, struct sockaddr *); 221 error = socreate(saddr->sa_family, &so, nrp->nr_sotype, 222 nrp->nr_soproto, td->td_ucred, td); 223 if (error) { 224 td->td_ucred = origcred; 225 goto out; 226 } 227 do { 228 if (error != 0 && pktscale > 2) 229 pktscale--; 230 if (nrp->nr_sotype == SOCK_DGRAM) { 231 if (nmp != NULL) { 232 sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * 233 pktscale; 234 rcvreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * 235 pktscale; 236 } else { 237 sndreserve = rcvreserve = 1024 * pktscale; 238 } 239 } else { 240 if (nrp->nr_sotype != SOCK_STREAM) 241 panic("nfscon sotype"); 242 if (nmp != NULL) { 243 sndreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + 244 sizeof (u_int32_t)) * pktscale; 245 rcvreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + 246 sizeof (u_int32_t)) * pktscale; 247 } else { 248 sndreserve = rcvreserve = 1024 * pktscale; 249 } 250 } 251 error = soreserve(so, sndreserve, rcvreserve); 252 } while (error != 0 && pktscale > 2); 253 soclose(so); 254 if (error) { 255 td->td_ucred = origcred; 256 goto out; 257 } 258 259 client = clnt_reconnect_create(nconf, saddr, nrp->nr_prog, 260 nrp->nr_vers, sndreserve, rcvreserve); 261 CLNT_CONTROL(client, CLSET_WAITCHAN, "newnfsreq"); 262 if (nmp != NULL) { 263 if ((nmp->nm_flag & NFSMNT_INT)) 264 CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one); 265 if ((nmp->nm_flag & NFSMNT_RESVPORT)) 266 CLNT_CONTROL(client, CLSET_PRIVPORT, &one); 267 if (NFSHASSOFT(nmp)) 268 retries = nmp->nm_retry; 269 else 270 retries = INT_MAX; 271 } else { 272 /* 273 * Three cases: 274 * - Null RPC callback to client 275 * - Non-Null RPC callback to client, wait a little longer 276 * - upcalls to nfsuserd and gssd (clp == NULL) 277 */ 278 if (callback_retry_mult == 0) { 279 retries = NFSV4_UPCALLRETRY; 280 CLNT_CONTROL(client, CLSET_PRIVPORT, &one); 281 } else { 282 retries = NFSV4_CALLBACKRETRY * callback_retry_mult; 283 } 284 } 285 CLNT_CONTROL(client, CLSET_RETRIES, &retries); 286 287 mtx_lock(&nrp->nr_mtx); 288 if (nrp->nr_client != NULL) { 289 /* 290 * Someone else already connected. 291 */ 292 CLNT_RELEASE(client); 293 } else { 294 nrp->nr_client = client; 295 } 296 297 /* 298 * Protocols that do not require connections may be optionally left 299 * unconnected for servers that reply from a port other than NFS_PORT. 300 */ 301 if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) { 302 mtx_unlock(&nrp->nr_mtx); 303 CLNT_CONTROL(client, CLSET_CONNECT, &one); 304 } else { 305 mtx_unlock(&nrp->nr_mtx); 306 } 307 308 /* Restore current thread's credentials. */ 309 td->td_ucred = origcred; 310 311 out: 312 NFSEXITCODE(error); 313 return (error); 314 } 315 316 /* 317 * NFS disconnect. Clean up and unlink. 318 */ 319 void 320 newnfs_disconnect(struct nfssockreq *nrp) 321 { 322 CLIENT *client; 323 324 mtx_lock(&nrp->nr_mtx); 325 if (nrp->nr_client != NULL) { 326 client = nrp->nr_client; 327 nrp->nr_client = NULL; 328 mtx_unlock(&nrp->nr_mtx); 329 rpc_gss_secpurge_call(client); 330 CLNT_CLOSE(client); 331 CLNT_RELEASE(client); 332 } else { 333 mtx_unlock(&nrp->nr_mtx); 334 } 335 } 336 337 static AUTH * 338 nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal, 339 char *srv_principal, gss_OID mech_oid, struct ucred *cred) 340 { 341 rpc_gss_service_t svc; 342 AUTH *auth; 343 #ifdef notyet 344 rpc_gss_options_req_t req_options; 345 #endif 346 347 switch (secflavour) { 348 case RPCSEC_GSS_KRB5: 349 case RPCSEC_GSS_KRB5I: 350 case RPCSEC_GSS_KRB5P: 351 if (!mech_oid) { 352 if (!rpc_gss_mech_to_oid_call("kerberosv5", &mech_oid)) 353 return (NULL); 354 } 355 if (secflavour == RPCSEC_GSS_KRB5) 356 svc = rpc_gss_svc_none; 357 else if (secflavour == RPCSEC_GSS_KRB5I) 358 svc = rpc_gss_svc_integrity; 359 else 360 svc = rpc_gss_svc_privacy; 361 #ifdef notyet 362 req_options.req_flags = GSS_C_MUTUAL_FLAG; 363 req_options.time_req = 0; 364 req_options.my_cred = GSS_C_NO_CREDENTIAL; 365 req_options.input_channel_bindings = NULL; 366 req_options.enc_type = nfs_keytab_enctype; 367 368 auth = rpc_gss_secfind_call(nrp->nr_client, cred, 369 clnt_principal, srv_principal, mech_oid, svc, 370 &req_options); 371 #else 372 /* 373 * Until changes to the rpcsec_gss code are committed, 374 * there is no support for host based initiator 375 * principals. As such, that case cannot yet be handled. 376 */ 377 if (clnt_principal == NULL) 378 auth = rpc_gss_secfind_call(nrp->nr_client, cred, 379 srv_principal, mech_oid, svc); 380 else 381 auth = NULL; 382 #endif 383 if (auth != NULL) 384 return (auth); 385 /* fallthrough */ 386 case AUTH_SYS: 387 default: 388 return (authunix_create(cred)); 389 390 } 391 } 392 393 /* 394 * Callback from the RPC code to generate up/down notifications. 395 */ 396 397 struct nfs_feedback_arg { 398 struct nfsmount *nf_mount; 399 int nf_lastmsg; /* last tprintf */ 400 int nf_tprintfmsg; 401 struct thread *nf_td; 402 }; 403 404 static void 405 nfs_feedback(int type, int proc, void *arg) 406 { 407 struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg; 408 struct nfsmount *nmp = nf->nf_mount; 409 struct timeval now; 410 411 getmicrouptime(&now); 412 413 switch (type) { 414 case FEEDBACK_REXMIT2: 415 case FEEDBACK_RECONNECT: 416 if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) { 417 nfs_down(nmp, nf->nf_td, 418 "not responding", 0, NFSSTA_TIMEO); 419 nf->nf_tprintfmsg = TRUE; 420 nf->nf_lastmsg = now.tv_sec; 421 } 422 break; 423 424 case FEEDBACK_OK: 425 nfs_up(nf->nf_mount, nf->nf_td, 426 "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg); 427 break; 428 } 429 } 430 431 /* 432 * newnfs_request - goes something like this 433 * - does the rpc by calling the krpc layer 434 * - break down rpc header and return with nfs reply 435 * nb: always frees up nd_mreq mbuf list 436 */ 437 int 438 newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, 439 struct nfsclient *clp, struct nfssockreq *nrp, vnode_t vp, 440 struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers, 441 u_char *retsum, int toplevel, u_int64_t *xidp) 442 { 443 u_int32_t *tl; 444 time_t waituntil; 445 int i, j, set_uid = 0, set_sigset = 0; 446 int trycnt, error = 0, usegssname = 0, secflavour = AUTH_SYS; 447 u_int16_t procnum; 448 u_int trylater_delay = 1; 449 struct nfs_feedback_arg nf; 450 struct timeval timo, now; 451 AUTH *auth; 452 struct rpc_callextra ext; 453 enum clnt_stat stat; 454 struct nfsreq *rep = NULL; 455 char *srv_principal = NULL; 456 uid_t saved_uid = (uid_t)-1; 457 sigset_t oldset; 458 459 if (xidp != NULL) 460 *xidp = 0; 461 /* Reject requests while attempting a forced unmount. */ 462 if (nmp != NULL && (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)) { 463 m_freem(nd->nd_mreq); 464 return (ESTALE); 465 } 466 467 /* For client side interruptible mounts, mask off the signals. */ 468 if (nmp != NULL && td != NULL && NFSHASINT(nmp)) { 469 newnfs_set_sigmask(td, &oldset); 470 set_sigset = 1; 471 } 472 473 /* 474 * XXX if not already connected call nfs_connect now. Longer 475 * term, change nfs_mount to call nfs_connect unconditionally 476 * and let clnt_reconnect_create handle reconnects. 477 */ 478 if (nrp->nr_client == NULL) 479 newnfs_connect(nmp, nrp, cred, td, 0); 480 481 /* 482 * For a client side mount, nmp is != NULL and clp == NULL. For 483 * server calls (callbacks or upcalls), nmp == NULL. 484 */ 485 if (clp != NULL) { 486 NFSLOCKSTATE(); 487 if ((clp->lc_flags & LCL_GSS) && nfsrv_gsscallbackson) { 488 secflavour = RPCSEC_GSS_KRB5; 489 if (nd->nd_procnum != NFSPROC_NULL) { 490 if (clp->lc_flags & LCL_GSSINTEGRITY) 491 secflavour = RPCSEC_GSS_KRB5I; 492 else if (clp->lc_flags & LCL_GSSPRIVACY) 493 secflavour = RPCSEC_GSS_KRB5P; 494 } 495 } 496 NFSUNLOCKSTATE(); 497 } else if (nmp != NULL && NFSHASKERB(nmp) && 498 nd->nd_procnum != NFSPROC_NULL) { 499 if (NFSHASALLGSSNAME(nmp) && nmp->nm_krbnamelen > 0) 500 nd->nd_flag |= ND_USEGSSNAME; 501 if ((nd->nd_flag & ND_USEGSSNAME) != 0) { 502 /* 503 * If there is a client side host based credential, 504 * use that, otherwise use the system uid, if set. 505 */ 506 if (nmp->nm_krbnamelen > 0) { 507 usegssname = 1; 508 } else if (nmp->nm_uid != (uid_t)-1) { 509 saved_uid = cred->cr_uid; 510 cred->cr_uid = nmp->nm_uid; 511 set_uid = 1; 512 } 513 } else if (nmp->nm_krbnamelen == 0 && 514 nmp->nm_uid != (uid_t)-1 && cred->cr_uid == (uid_t)0) { 515 /* 516 * If there is no host based principal name and 517 * the system uid is set and this is root, use the 518 * system uid, since root won't have user 519 * credentials in a credentials cache file. 520 */ 521 saved_uid = cred->cr_uid; 522 cred->cr_uid = nmp->nm_uid; 523 set_uid = 1; 524 } 525 if (NFSHASINTEGRITY(nmp)) 526 secflavour = RPCSEC_GSS_KRB5I; 527 else if (NFSHASPRIVACY(nmp)) 528 secflavour = RPCSEC_GSS_KRB5P; 529 else 530 secflavour = RPCSEC_GSS_KRB5; 531 srv_principal = NFSMNT_SRVKRBNAME(nmp); 532 } else if (nmp != NULL && !NFSHASKERB(nmp) && 533 nd->nd_procnum != NFSPROC_NULL && 534 (nd->nd_flag & ND_USEGSSNAME) != 0) { 535 /* 536 * Use the uid that did the mount when the RPC is doing 537 * NFSv4 system operations, as indicated by the 538 * ND_USEGSSNAME flag, for the AUTH_SYS case. 539 */ 540 saved_uid = cred->cr_uid; 541 if (nmp->nm_uid != (uid_t)-1) 542 cred->cr_uid = nmp->nm_uid; 543 else 544 cred->cr_uid = 0; 545 set_uid = 1; 546 } 547 548 if (nmp != NULL) { 549 bzero(&nf, sizeof(struct nfs_feedback_arg)); 550 nf.nf_mount = nmp; 551 nf.nf_td = td; 552 getmicrouptime(&now); 553 nf.nf_lastmsg = now.tv_sec - 554 ((nmp->nm_tprintf_delay)-(nmp->nm_tprintf_initial_delay)); 555 } 556 557 if (nd->nd_procnum == NFSPROC_NULL) 558 auth = authnone_create(); 559 else if (usegssname) 560 auth = nfs_getauth(nrp, secflavour, nmp->nm_krbname, 561 srv_principal, NULL, cred); 562 else 563 auth = nfs_getauth(nrp, secflavour, NULL, 564 srv_principal, NULL, cred); 565 if (set_uid) 566 cred->cr_uid = saved_uid; 567 if (auth == NULL) { 568 m_freem(nd->nd_mreq); 569 if (set_sigset) 570 newnfs_restore_sigmask(td, &oldset); 571 return (EACCES); 572 } 573 bzero(&ext, sizeof(ext)); 574 ext.rc_auth = auth; 575 if (nmp != NULL) { 576 ext.rc_feedback = nfs_feedback; 577 ext.rc_feedback_arg = &nf; 578 } 579 580 procnum = nd->nd_procnum; 581 if ((nd->nd_flag & ND_NFSV4) && 582 nd->nd_procnum != NFSPROC_NULL && 583 nd->nd_procnum != NFSV4PROC_CBCOMPOUND) 584 procnum = NFSV4PROC_COMPOUND; 585 586 if (nmp != NULL) { 587 NFSINCRGLOBAL(newnfsstats.rpcrequests); 588 589 /* Map the procnum to the old NFSv2 one, as required. */ 590 if ((nd->nd_flag & ND_NFSV2) != 0) { 591 if (nd->nd_procnum < NFS_V3NPROCS) 592 procnum = nfsv2_procid[nd->nd_procnum]; 593 else 594 procnum = NFSV2PROC_NOOP; 595 } 596 597 /* 598 * Now only used for the R_DONTRECOVER case, but until that is 599 * supported within the krpc code, I need to keep a queue of 600 * outstanding RPCs for nfsv4 client requests. 601 */ 602 if ((nd->nd_flag & ND_NFSV4) && procnum == NFSV4PROC_COMPOUND) 603 MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), 604 M_NFSDREQ, M_WAITOK); 605 #ifdef KDTRACE_HOOKS 606 if (dtrace_nfscl_nfs234_start_probe != NULL) { 607 uint32_t probe_id; 608 int probe_procnum; 609 610 if (nd->nd_flag & ND_NFSV4) { 611 probe_id = 612 nfscl_nfs4_start_probes[nd->nd_procnum]; 613 probe_procnum = nd->nd_procnum; 614 } else if (nd->nd_flag & ND_NFSV3) { 615 probe_id = nfscl_nfs3_start_probes[procnum]; 616 probe_procnum = procnum; 617 } else { 618 probe_id = 619 nfscl_nfs2_start_probes[nd->nd_procnum]; 620 probe_procnum = procnum; 621 } 622 if (probe_id != 0) 623 (dtrace_nfscl_nfs234_start_probe) 624 (probe_id, vp, nd->nd_mreq, cred, 625 probe_procnum); 626 } 627 #endif 628 } 629 trycnt = 0; 630 tryagain: 631 if (nmp == NULL) { 632 timo.tv_usec = 0; 633 if (clp == NULL) 634 timo.tv_sec = NFSV4_UPCALLTIMEO; 635 else 636 timo.tv_sec = NFSV4_CALLBACKTIMEO; 637 } else { 638 if (nrp->nr_sotype != SOCK_DGRAM) { 639 timo.tv_usec = 0; 640 if ((nmp->nm_flag & NFSMNT_NFSV4)) 641 timo.tv_sec = INT_MAX; 642 else 643 timo.tv_sec = NFS_TCPTIMEO; 644 } else { 645 timo.tv_sec = nmp->nm_timeo / NFS_HZ; 646 timo.tv_usec = (nmp->nm_timeo * 1000000) / NFS_HZ; 647 } 648 649 if (rep != NULL) { 650 rep->r_flags = 0; 651 rep->r_nmp = nmp; 652 /* 653 * Chain request into list of outstanding requests. 654 */ 655 NFSLOCKREQ(); 656 TAILQ_INSERT_TAIL(&nfsd_reqq, rep, r_chain); 657 NFSUNLOCKREQ(); 658 } 659 } 660 661 nd->nd_mrep = NULL; 662 stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum, nd->nd_mreq, 663 &nd->nd_mrep, timo); 664 665 if (rep != NULL) { 666 /* 667 * RPC done, unlink the request. 668 */ 669 NFSLOCKREQ(); 670 TAILQ_REMOVE(&nfsd_reqq, rep, r_chain); 671 NFSUNLOCKREQ(); 672 } 673 674 /* 675 * If there was a successful reply and a tprintf msg. 676 * tprintf a response. 677 */ 678 if (stat == RPC_SUCCESS) { 679 error = 0; 680 } else if (stat == RPC_TIMEDOUT) { 681 error = ETIMEDOUT; 682 } else if (stat == RPC_VERSMISMATCH) { 683 error = EOPNOTSUPP; 684 } else if (stat == RPC_PROGVERSMISMATCH) { 685 error = EPROTONOSUPPORT; 686 } else { 687 error = EACCES; 688 } 689 if (error) { 690 m_freem(nd->nd_mreq); 691 AUTH_DESTROY(auth); 692 if (rep != NULL) 693 FREE((caddr_t)rep, M_NFSDREQ); 694 if (set_sigset) 695 newnfs_restore_sigmask(td, &oldset); 696 return (error); 697 } 698 699 KASSERT(nd->nd_mrep != NULL, ("mrep shouldn't be NULL if no error\n")); 700 701 /* 702 * Search for any mbufs that are not a multiple of 4 bytes long 703 * or with m_data not longword aligned. 704 * These could cause pointer alignment problems, so copy them to 705 * well aligned mbufs. 706 */ 707 newnfs_realign(&nd->nd_mrep); 708 nd->nd_md = nd->nd_mrep; 709 nd->nd_dpos = NFSMTOD(nd->nd_md, caddr_t); 710 nd->nd_repstat = 0; 711 if (nd->nd_procnum != NFSPROC_NULL) { 712 /* 713 * and now the actual NFS xdr. 714 */ 715 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 716 nd->nd_repstat = fxdr_unsigned(u_int32_t, *tl); 717 if (nd->nd_repstat != 0) { 718 if (((nd->nd_repstat == NFSERR_DELAY || 719 nd->nd_repstat == NFSERR_GRACE) && 720 (nd->nd_flag & ND_NFSV4) && 721 nd->nd_procnum != NFSPROC_DELEGRETURN && 722 nd->nd_procnum != NFSPROC_SETATTR && 723 nd->nd_procnum != NFSPROC_READ && 724 nd->nd_procnum != NFSPROC_WRITE && 725 nd->nd_procnum != NFSPROC_OPEN && 726 nd->nd_procnum != NFSPROC_CREATE && 727 nd->nd_procnum != NFSPROC_OPENCONFIRM && 728 nd->nd_procnum != NFSPROC_OPENDOWNGRADE && 729 nd->nd_procnum != NFSPROC_CLOSE && 730 nd->nd_procnum != NFSPROC_LOCK && 731 nd->nd_procnum != NFSPROC_LOCKU) || 732 (nd->nd_repstat == NFSERR_DELAY && 733 (nd->nd_flag & ND_NFSV4) == 0) || 734 nd->nd_repstat == NFSERR_RESOURCE) { 735 if (trylater_delay > NFS_TRYLATERDEL) 736 trylater_delay = NFS_TRYLATERDEL; 737 waituntil = NFSD_MONOSEC + trylater_delay; 738 while (NFSD_MONOSEC < waituntil) 739 (void) nfs_catnap(PZERO, 0, "nfstry"); 740 trylater_delay *= 2; 741 m_freem(nd->nd_mrep); 742 nd->nd_mrep = NULL; 743 goto tryagain; 744 } 745 746 /* 747 * If the File Handle was stale, invalidate the 748 * lookup cache, just in case. 749 * (vp != NULL implies a client side call) 750 */ 751 if (nd->nd_repstat == ESTALE && vp != NULL) { 752 cache_purge(vp); 753 if (ncl_call_invalcaches != NULL) 754 (*ncl_call_invalcaches)(vp); 755 } 756 } 757 758 /* 759 * Get rid of the tag, return count, and PUTFH result for V4. 760 */ 761 if (nd->nd_flag & ND_NFSV4) { 762 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 763 i = fxdr_unsigned(int, *tl); 764 error = nfsm_advance(nd, NFSM_RNDUP(i), -1); 765 if (error) 766 goto nfsmout; 767 NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 768 i = fxdr_unsigned(int, *++tl); 769 770 /* 771 * If the first op's status is non-zero, mark that 772 * there is no more data to process. 773 */ 774 if (*++tl) 775 nd->nd_flag |= ND_NOMOREDATA; 776 777 /* 778 * If the first op is Putfh, throw its results away 779 * and toss the op# and status for the first op. 780 */ 781 if (nmp != NULL && i == NFSV4OP_PUTFH && *tl == 0) { 782 NFSM_DISSECT(tl,u_int32_t *,2 * NFSX_UNSIGNED); 783 i = fxdr_unsigned(int, *tl++); 784 j = fxdr_unsigned(int, *tl); 785 /* 786 * All Compounds that do an Op that must 787 * be in sequence consist of NFSV4OP_PUTFH 788 * followed by one of these. As such, we 789 * can determine if the seqid# should be 790 * incremented, here. 791 */ 792 if ((i == NFSV4OP_OPEN || 793 i == NFSV4OP_OPENCONFIRM || 794 i == NFSV4OP_OPENDOWNGRADE || 795 i == NFSV4OP_CLOSE || 796 i == NFSV4OP_LOCK || 797 i == NFSV4OP_LOCKU) && 798 (j == 0 || 799 (j != NFSERR_STALECLIENTID && 800 j != NFSERR_STALESTATEID && 801 j != NFSERR_BADSTATEID && 802 j != NFSERR_BADSEQID && 803 j != NFSERR_BADXDR && 804 j != NFSERR_RESOURCE && 805 j != NFSERR_NOFILEHANDLE))) 806 nd->nd_flag |= ND_INCRSEQID; 807 /* 808 * If the first op's status is non-zero, mark 809 * that there is no more data to process. 810 */ 811 if (j) 812 nd->nd_flag |= ND_NOMOREDATA; 813 } 814 815 /* 816 * If R_DONTRECOVER is set, replace the stale error 817 * reply, so that recovery isn't initiated. 818 */ 819 if ((nd->nd_repstat == NFSERR_STALECLIENTID || 820 nd->nd_repstat == NFSERR_STALESTATEID) && 821 rep != NULL && (rep->r_flags & R_DONTRECOVER)) 822 nd->nd_repstat = NFSERR_STALEDONTRECOVER; 823 } 824 } 825 826 #ifdef KDTRACE_HOOKS 827 if (nmp != NULL && dtrace_nfscl_nfs234_done_probe != NULL) { 828 uint32_t probe_id; 829 int probe_procnum; 830 831 if (nd->nd_flag & ND_NFSV4) { 832 probe_id = nfscl_nfs4_done_probes[nd->nd_procnum]; 833 probe_procnum = nd->nd_procnum; 834 } else if (nd->nd_flag & ND_NFSV3) { 835 probe_id = nfscl_nfs3_done_probes[procnum]; 836 probe_procnum = procnum; 837 } else { 838 probe_id = nfscl_nfs2_done_probes[nd->nd_procnum]; 839 probe_procnum = procnum; 840 } 841 if (probe_id != 0) 842 (dtrace_nfscl_nfs234_done_probe)(probe_id, vp, 843 nd->nd_mreq, cred, probe_procnum, 0); 844 } 845 #endif 846 847 m_freem(nd->nd_mreq); 848 AUTH_DESTROY(auth); 849 if (rep != NULL) 850 FREE((caddr_t)rep, M_NFSDREQ); 851 if (set_sigset) 852 newnfs_restore_sigmask(td, &oldset); 853 return (0); 854 nfsmout: 855 mbuf_freem(nd->nd_mrep); 856 mbuf_freem(nd->nd_mreq); 857 AUTH_DESTROY(auth); 858 if (rep != NULL) 859 FREE((caddr_t)rep, M_NFSDREQ); 860 if (set_sigset) 861 newnfs_restore_sigmask(td, &oldset); 862 return (error); 863 } 864 865 /* 866 * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and 867 * wait for all requests to complete. This is used by forced unmounts 868 * to terminate any outstanding RPCs. 869 */ 870 int 871 newnfs_nmcancelreqs(struct nfsmount *nmp) 872 { 873 874 if (nmp->nm_sockreq.nr_client != NULL) 875 CLNT_CLOSE(nmp->nm_sockreq.nr_client); 876 return (0); 877 } 878 879 /* 880 * Any signal that can interrupt an NFS operation in an intr mount 881 * should be added to this set. SIGSTOP and SIGKILL cannot be masked. 882 */ 883 int newnfs_sig_set[] = { 884 SIGINT, 885 SIGTERM, 886 SIGHUP, 887 SIGKILL, 888 SIGSTOP, 889 SIGQUIT 890 }; 891 892 /* 893 * Check to see if one of the signals in our subset is pending on 894 * the process (in an intr mount). 895 */ 896 static int 897 nfs_sig_pending(sigset_t set) 898 { 899 int i; 900 901 for (i = 0 ; i < sizeof(newnfs_sig_set)/sizeof(int) ; i++) 902 if (SIGISMEMBER(set, newnfs_sig_set[i])) 903 return (1); 904 return (0); 905 } 906 907 /* 908 * The set/restore sigmask functions are used to (temporarily) overwrite 909 * the process p_sigmask during an RPC call (for example). These are also 910 * used in other places in the NFS client that might tsleep(). 911 */ 912 void 913 newnfs_set_sigmask(struct thread *td, sigset_t *oldset) 914 { 915 sigset_t newset; 916 int i; 917 struct proc *p; 918 919 SIGFILLSET(newset); 920 if (td == NULL) 921 td = curthread; /* XXX */ 922 p = td->td_proc; 923 /* Remove the NFS set of signals from newset */ 924 PROC_LOCK(p); 925 mtx_lock(&p->p_sigacts->ps_mtx); 926 for (i = 0 ; i < sizeof(newnfs_sig_set)/sizeof(int) ; i++) { 927 /* 928 * But make sure we leave the ones already masked 929 * by the process, ie. remove the signal from the 930 * temporary signalmask only if it wasn't already 931 * in p_sigmask. 932 */ 933 if (!SIGISMEMBER(td->td_sigmask, newnfs_sig_set[i]) && 934 !SIGISMEMBER(p->p_sigacts->ps_sigignore, newnfs_sig_set[i])) 935 SIGDELSET(newset, newnfs_sig_set[i]); 936 } 937 mtx_unlock(&p->p_sigacts->ps_mtx); 938 PROC_UNLOCK(p); 939 kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, 0); 940 } 941 942 void 943 newnfs_restore_sigmask(struct thread *td, sigset_t *set) 944 { 945 if (td == NULL) 946 td = curthread; /* XXX */ 947 kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0); 948 } 949 950 /* 951 * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the 952 * old one after msleep() returns. 953 */ 954 int 955 newnfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo) 956 { 957 sigset_t oldset; 958 int error; 959 struct proc *p; 960 961 if ((priority & PCATCH) == 0) 962 return msleep(ident, mtx, priority, wmesg, timo); 963 if (td == NULL) 964 td = curthread; /* XXX */ 965 newnfs_set_sigmask(td, &oldset); 966 error = msleep(ident, mtx, priority, wmesg, timo); 967 newnfs_restore_sigmask(td, &oldset); 968 p = td->td_proc; 969 return (error); 970 } 971 972 /* 973 * Test for a termination condition pending on the process. 974 * This is used for NFSMNT_INT mounts. 975 */ 976 int 977 newnfs_sigintr(struct nfsmount *nmp, struct thread *td) 978 { 979 struct proc *p; 980 sigset_t tmpset; 981 982 /* Terminate all requests while attempting a forced unmount. */ 983 if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) 984 return (EIO); 985 if (!(nmp->nm_flag & NFSMNT_INT)) 986 return (0); 987 if (td == NULL) 988 return (0); 989 p = td->td_proc; 990 PROC_LOCK(p); 991 tmpset = p->p_siglist; 992 SIGSETOR(tmpset, td->td_siglist); 993 SIGSETNAND(tmpset, td->td_sigmask); 994 mtx_lock(&p->p_sigacts->ps_mtx); 995 SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); 996 mtx_unlock(&p->p_sigacts->ps_mtx); 997 if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) 998 && nfs_sig_pending(tmpset)) { 999 PROC_UNLOCK(p); 1000 return (EINTR); 1001 } 1002 PROC_UNLOCK(p); 1003 return (0); 1004 } 1005 1006 static int 1007 nfs_msg(struct thread *td, const char *server, const char *msg, int error) 1008 { 1009 struct proc *p; 1010 1011 p = td ? td->td_proc : NULL; 1012 if (error) { 1013 tprintf(p, LOG_INFO, "newnfs server %s: %s, error %d\n", 1014 server, msg, error); 1015 } else { 1016 tprintf(p, LOG_INFO, "newnfs server %s: %s\n", server, msg); 1017 } 1018 return (0); 1019 } 1020 1021 static void 1022 nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg, 1023 int error, int flags) 1024 { 1025 if (nmp == NULL) 1026 return; 1027 mtx_lock(&nmp->nm_mtx); 1028 if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) { 1029 nmp->nm_state |= NFSSTA_TIMEO; 1030 mtx_unlock(&nmp->nm_mtx); 1031 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1032 VQ_NOTRESP, 0); 1033 } else 1034 mtx_unlock(&nmp->nm_mtx); 1035 mtx_lock(&nmp->nm_mtx); 1036 if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) { 1037 nmp->nm_state |= NFSSTA_LOCKTIMEO; 1038 mtx_unlock(&nmp->nm_mtx); 1039 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1040 VQ_NOTRESPLOCK, 0); 1041 } else 1042 mtx_unlock(&nmp->nm_mtx); 1043 nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); 1044 } 1045 1046 static void 1047 nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg, 1048 int flags, int tprintfmsg) 1049 { 1050 if (nmp == NULL) 1051 return; 1052 if (tprintfmsg) { 1053 nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); 1054 } 1055 1056 mtx_lock(&nmp->nm_mtx); 1057 if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) { 1058 nmp->nm_state &= ~NFSSTA_TIMEO; 1059 mtx_unlock(&nmp->nm_mtx); 1060 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1061 VQ_NOTRESP, 1); 1062 } else 1063 mtx_unlock(&nmp->nm_mtx); 1064 1065 mtx_lock(&nmp->nm_mtx); 1066 if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) { 1067 nmp->nm_state &= ~NFSSTA_LOCKTIMEO; 1068 mtx_unlock(&nmp->nm_mtx); 1069 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 1070 VQ_NOTRESPLOCK, 1); 1071 } else 1072 mtx_unlock(&nmp->nm_mtx); 1073 } 1074 1075