1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_inet.h" 40 #include "opt_inet6.h" 41 42 #include <sys/capsicum.h> 43 44 /* 45 * generally, I don't like #includes inside .h files, but it seems to 46 * be the easiest way to handle the port. 47 */ 48 #include <sys/fail.h> 49 #include <sys/hash.h> 50 #include <sys/sysctl.h> 51 #include <fs/nfs/nfsport.h> 52 #include <netinet/in_fib.h> 53 #include <netinet/if_ether.h> 54 #include <netinet6/ip6_var.h> 55 #include <net/if_types.h> 56 #include <net/route/nhop.h> 57 58 #include <fs/nfsclient/nfs_kdtrace.h> 59 60 #ifdef KDTRACE_HOOKS 61 dtrace_nfsclient_attrcache_flush_probe_func_t 62 dtrace_nfscl_attrcache_flush_done_probe; 63 uint32_t nfscl_attrcache_flush_done_id; 64 65 dtrace_nfsclient_attrcache_get_hit_probe_func_t 66 dtrace_nfscl_attrcache_get_hit_probe; 67 uint32_t nfscl_attrcache_get_hit_id; 68 69 dtrace_nfsclient_attrcache_get_miss_probe_func_t 70 dtrace_nfscl_attrcache_get_miss_probe; 71 uint32_t nfscl_attrcache_get_miss_id; 72 73 dtrace_nfsclient_attrcache_load_probe_func_t 74 dtrace_nfscl_attrcache_load_done_probe; 75 uint32_t nfscl_attrcache_load_done_id; 76 #endif /* !KDTRACE_HOOKS */ 77 78 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; 79 extern struct vop_vector newnfs_vnodeops; 80 extern struct vop_vector newnfs_fifoops; 81 extern uma_zone_t newnfsnode_zone; 82 extern struct buf_ops buf_ops_newnfs; 83 extern uma_zone_t ncl_pbuf_zone; 84 extern short nfsv4_cbport; 85 extern int nfscl_enablecallb; 86 extern int nfs_numnfscbd; 87 extern int nfscl_inited; 88 struct mtx ncl_iod_mutex; 89 NFSDLOCKMUTEX; 90 extern struct mtx nfsrv_dslock_mtx; 91 92 extern void (*ncl_call_invalcaches)(struct vnode *); 93 94 SYSCTL_DECL(_vfs_nfs); 95 static int ncl_fileid_maxwarnings = 10; 96 SYSCTL_INT(_vfs_nfs, OID_AUTO, fileid_maxwarnings, CTLFLAG_RWTUN, 97 &ncl_fileid_maxwarnings, 0, 98 "Limit fileid corruption warnings; 0 is off; -1 is unlimited"); 99 static volatile int ncl_fileid_nwarnings; 100 101 static void nfscl_warn_fileid(struct nfsmount *, struct nfsvattr *, 102 struct nfsvattr *); 103 104 /* 105 * Comparison function for vfs_hash functions. 106 */ 107 int 108 newnfs_vncmpf(struct vnode *vp, void *arg) 109 { 110 struct nfsfh *nfhp = (struct nfsfh *)arg; 111 struct nfsnode *np = VTONFS(vp); 112 113 if (np->n_fhp->nfh_len != nfhp->nfh_len || 114 NFSBCMP(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len)) 115 return (1); 116 return (0); 117 } 118 119 /* 120 * Look up a vnode/nfsnode by file handle. 121 * Callers must check for mount points!! 122 * In all cases, a pointer to a 123 * nfsnode structure is returned. 124 * This variant takes a "struct nfsfh *" as second argument and uses 125 * that structure up, either by hanging off the nfsnode or FREEing it. 126 */ 127 int 128 nfscl_nget(struct mount *mntp, struct vnode *dvp, struct nfsfh *nfhp, 129 struct componentname *cnp, struct thread *td, struct nfsnode **npp, 130 int lkflags) 131 { 132 struct nfsnode *np, *dnp; 133 struct vnode *vp, *nvp; 134 struct nfsv4node *newd, *oldd; 135 int error; 136 u_int hash; 137 struct nfsmount *nmp; 138 139 nmp = VFSTONFS(mntp); 140 dnp = VTONFS(dvp); 141 *npp = NULL; 142 143 /* 144 * If this is the mount point fh and NFSMNTP_FAKEROOT is set, replace 145 * it with the fake fh. 146 */ 147 if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 && 148 nmp->nm_fhsize > 0 && nmp->nm_fhsize == nfhp->nfh_len && 149 !NFSBCMP(nmp->nm_fh, nfhp->nfh_fh, nmp->nm_fhsize)) { 150 free(nfhp, M_NFSFH); 151 nfhp = malloc(sizeof(struct nfsfh) + NFSX_FHMAX + 1, 152 M_NFSFH, M_WAITOK | M_ZERO); 153 nfhp->nfh_len = NFSX_FHMAX + 1; 154 } 155 156 hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, FNV1_32_INIT); 157 158 error = vfs_hash_get(mntp, hash, lkflags, 159 td, &nvp, newnfs_vncmpf, nfhp); 160 if (error == 0 && nvp != NULL) { 161 /* 162 * I believe there is a slight chance that vgonel() could 163 * get called on this vnode between when NFSVOPLOCK() drops 164 * the VI_LOCK() and vget() acquires it again, so that it 165 * hasn't yet had v_usecount incremented. If this were to 166 * happen, the VIRF_DOOMED flag would be set, so check for 167 * that here. Since we now have the v_usecount incremented, 168 * we should be ok until we vrele() it, if the VIRF_DOOMED 169 * flag isn't set now. 170 */ 171 VI_LOCK(nvp); 172 if (VN_IS_DOOMED(nvp)) { 173 VI_UNLOCK(nvp); 174 vrele(nvp); 175 error = ENOENT; 176 } else { 177 VI_UNLOCK(nvp); 178 } 179 } 180 if (error) { 181 free(nfhp, M_NFSFH); 182 return (error); 183 } 184 if (nvp != NULL) { 185 np = VTONFS(nvp); 186 /* 187 * For NFSv4, check to see if it is the same name and 188 * replace the name, if it is different. 189 */ 190 oldd = newd = NULL; 191 if ((nmp->nm_flag & NFSMNT_NFSV4) && np->n_v4 != NULL && 192 nvp->v_type == VREG && 193 (np->n_v4->n4_namelen != cnp->cn_namelen || 194 NFSBCMP(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), 195 cnp->cn_namelen) || 196 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || 197 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 198 dnp->n_fhp->nfh_len))) { 199 newd = malloc( 200 sizeof (struct nfsv4node) + dnp->n_fhp->nfh_len + 201 + cnp->cn_namelen - 1, M_NFSV4NODE, M_WAITOK); 202 NFSLOCKNODE(np); 203 if (newd != NULL && np->n_v4 != NULL && nvp->v_type == VREG 204 && (np->n_v4->n4_namelen != cnp->cn_namelen || 205 NFSBCMP(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), 206 cnp->cn_namelen) || 207 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || 208 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 209 dnp->n_fhp->nfh_len))) { 210 oldd = np->n_v4; 211 np->n_v4 = newd; 212 newd = NULL; 213 np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; 214 np->n_v4->n4_namelen = cnp->cn_namelen; 215 NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 216 dnp->n_fhp->nfh_len); 217 NFSBCOPY(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), 218 cnp->cn_namelen); 219 } 220 NFSUNLOCKNODE(np); 221 } 222 if (newd != NULL) 223 free(newd, M_NFSV4NODE); 224 if (oldd != NULL) 225 free(oldd, M_NFSV4NODE); 226 *npp = np; 227 free(nfhp, M_NFSFH); 228 return (0); 229 } 230 np = uma_zalloc(newnfsnode_zone, M_WAITOK | M_ZERO); 231 232 error = getnewvnode(nfs_vnode_tag, mntp, &newnfs_vnodeops, &nvp); 233 if (error) { 234 uma_zfree(newnfsnode_zone, np); 235 free(nfhp, M_NFSFH); 236 return (error); 237 } 238 vp = nvp; 239 KASSERT(vp->v_bufobj.bo_bsize != 0, ("nfscl_nget: bo_bsize == 0")); 240 vp->v_bufobj.bo_ops = &buf_ops_newnfs; 241 vp->v_data = np; 242 np->n_vnode = vp; 243 /* 244 * Initialize the mutex even if the vnode is going to be a loser. 245 * This simplifies the logic in reclaim, which can then unconditionally 246 * destroy the mutex (in the case of the loser, or if hash_insert 247 * happened to return an error no special casing is needed). 248 */ 249 mtx_init(&np->n_mtx, "NEWNFSnode lock", NULL, MTX_DEF | MTX_DUPOK); 250 lockinit(&np->n_excl, PVFS, "nfsupg", VLKTIMEOUT, LK_NOSHARE | 251 LK_CANRECURSE); 252 253 /* 254 * Are we getting the root? If so, make sure the vnode flags 255 * are correct 256 */ 257 if (nfhp->nfh_len == NFSX_FHMAX + 1 || 258 (nfhp->nfh_len == nmp->nm_fhsize && 259 !bcmp(nfhp->nfh_fh, nmp->nm_fh, nfhp->nfh_len))) { 260 if (vp->v_type == VNON) 261 vp->v_type = VDIR; 262 vp->v_vflag |= VV_ROOT; 263 } 264 265 vp->v_vflag |= VV_VMSIZEVNLOCK; 266 267 np->n_fhp = nfhp; 268 /* 269 * For NFSv4, we have to attach the directory file handle and 270 * file name, so that Open Ops can be done later. 271 */ 272 if (nmp->nm_flag & NFSMNT_NFSV4) { 273 np->n_v4 = malloc(sizeof (struct nfsv4node) 274 + dnp->n_fhp->nfh_len + cnp->cn_namelen - 1, M_NFSV4NODE, 275 M_WAITOK); 276 np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; 277 np->n_v4->n4_namelen = cnp->cn_namelen; 278 NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 279 dnp->n_fhp->nfh_len); 280 NFSBCOPY(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), 281 cnp->cn_namelen); 282 } else { 283 np->n_v4 = NULL; 284 } 285 286 /* 287 * NFS supports recursive and shared locking. 288 */ 289 lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_NOWITNESS, NULL); 290 VN_LOCK_AREC(vp); 291 VN_LOCK_ASHARE(vp); 292 error = insmntque(vp, mntp); 293 if (error != 0) { 294 *npp = NULL; 295 mtx_destroy(&np->n_mtx); 296 lockdestroy(&np->n_excl); 297 free(nfhp, M_NFSFH); 298 if (np->n_v4 != NULL) 299 free(np->n_v4, M_NFSV4NODE); 300 uma_zfree(newnfsnode_zone, np); 301 return (error); 302 } 303 vn_set_state(vp, VSTATE_CONSTRUCTED); 304 error = vfs_hash_insert(vp, hash, lkflags, 305 td, &nvp, newnfs_vncmpf, nfhp); 306 if (error) 307 return (error); 308 if (nvp != NULL) { 309 *npp = VTONFS(nvp); 310 /* vfs_hash_insert() vput()'s the losing vnode */ 311 return (0); 312 } 313 *npp = np; 314 315 return (0); 316 } 317 318 /* 319 * Another variant of nfs_nget(). This one is only used by reopen. It 320 * takes almost the same args as nfs_nget(), but only succeeds if an entry 321 * exists in the cache. (Since files should already be "open" with a 322 * vnode ref cnt on the node when reopen calls this, it should always 323 * succeed.) 324 * Also, don't get a vnode lock, since it may already be locked by some 325 * other process that is handling it. This is ok, since all other threads 326 * on the client are blocked by the nfsc_lock being exclusively held by the 327 * caller of this function. 328 */ 329 int 330 nfscl_ngetreopen(struct mount *mntp, u_int8_t *fhp, int fhsize, 331 struct thread *td, struct nfsnode **npp) 332 { 333 struct vnode *nvp; 334 u_int hash; 335 struct nfsfh *nfhp; 336 int error; 337 338 *npp = NULL; 339 /* For forced dismounts, just return error. */ 340 if (NFSCL_FORCEDISM(mntp)) 341 return (EINTR); 342 nfhp = malloc(sizeof (struct nfsfh) + fhsize, 343 M_NFSFH, M_WAITOK); 344 bcopy(fhp, &nfhp->nfh_fh[0], fhsize); 345 nfhp->nfh_len = fhsize; 346 347 hash = fnv_32_buf(fhp, fhsize, FNV1_32_INIT); 348 349 /* 350 * First, try to get the vnode locked, but don't block for the lock. 351 */ 352 error = vfs_hash_get(mntp, hash, (LK_EXCLUSIVE | LK_NOWAIT), td, &nvp, 353 newnfs_vncmpf, nfhp); 354 if (error == 0 && nvp != NULL) { 355 NFSVOPUNLOCK(nvp); 356 } else if (error == EBUSY) { 357 /* 358 * It is safe so long as a vflush() with 359 * FORCECLOSE has not been done. Since the Renew thread is 360 * stopped and the MNTK_UNMOUNTF flag is set before doing 361 * a vflush() with FORCECLOSE, we should be ok here. 362 */ 363 if (NFSCL_FORCEDISM(mntp)) 364 error = EINTR; 365 else { 366 vfs_hash_ref(mntp, hash, td, &nvp, newnfs_vncmpf, nfhp); 367 if (nvp == NULL) { 368 error = ENOENT; 369 } else if (VN_IS_DOOMED(nvp)) { 370 error = ENOENT; 371 vrele(nvp); 372 } else { 373 error = 0; 374 } 375 } 376 } 377 free(nfhp, M_NFSFH); 378 if (error) 379 return (error); 380 if (nvp != NULL) { 381 *npp = VTONFS(nvp); 382 return (0); 383 } 384 return (EINVAL); 385 } 386 387 static void 388 nfscl_warn_fileid(struct nfsmount *nmp, struct nfsvattr *oldnap, 389 struct nfsvattr *newnap) 390 { 391 int off; 392 393 if (ncl_fileid_maxwarnings >= 0 && 394 ncl_fileid_nwarnings >= ncl_fileid_maxwarnings) 395 return; 396 off = 0; 397 if (ncl_fileid_maxwarnings >= 0) { 398 if (++ncl_fileid_nwarnings >= ncl_fileid_maxwarnings) 399 off = 1; 400 } 401 402 printf("newnfs: server '%s' error: fileid changed. " 403 "fsid %jx:%jx: expected fileid %#jx, got %#jx. " 404 "(BROKEN NFS SERVER OR MIDDLEWARE)\n", 405 nmp->nm_com.nmcom_hostname, 406 (uintmax_t)nmp->nm_fsid[0], 407 (uintmax_t)nmp->nm_fsid[1], 408 (uintmax_t)oldnap->na_fileid, 409 (uintmax_t)newnap->na_fileid); 410 411 if (off) 412 printf("newnfs: Logged %d times about fileid corruption; " 413 "going quiet to avoid spamming logs excessively. (Limit " 414 "is: %d).\n", ncl_fileid_nwarnings, 415 ncl_fileid_maxwarnings); 416 } 417 418 void 419 ncl_copy_vattr(struct vattr *dst, struct vattr *src) 420 { 421 dst->va_type = src->va_type; 422 dst->va_mode = src->va_mode; 423 dst->va_nlink = src->va_nlink; 424 dst->va_uid = src->va_uid; 425 dst->va_gid = src->va_gid; 426 dst->va_fsid = src->va_fsid; 427 dst->va_fileid = src->va_fileid; 428 dst->va_size = src->va_size; 429 dst->va_blocksize = src->va_blocksize; 430 dst->va_atime = src->va_atime; 431 dst->va_mtime = src->va_mtime; 432 dst->va_ctime = src->va_ctime; 433 dst->va_birthtime = src->va_birthtime; 434 dst->va_gen = src->va_gen; 435 dst->va_flags = src->va_flags; 436 dst->va_rdev = src->va_rdev; 437 dst->va_bytes = src->va_bytes; 438 dst->va_filerev = src->va_filerev; 439 } 440 441 /* 442 * Load the attribute cache (that lives in the nfsnode entry) with 443 * the attributes of the second argument and 444 * Iff vaper not NULL 445 * copy the attributes to *vaper 446 * Similar to nfs_loadattrcache(), except the attributes are passed in 447 * instead of being parsed out of the mbuf list. 448 */ 449 int 450 nfscl_loadattrcache(struct vnode **vpp, struct nfsvattr *nap, void *nvaper, 451 int writeattr, int dontshrink) 452 { 453 struct vnode *vp = *vpp; 454 struct vattr *vap, *nvap = &nap->na_vattr, *vaper = nvaper; 455 struct nfsnode *np; 456 struct nfsmount *nmp; 457 struct timespec mtime_save; 458 int error, force_fid_err; 459 dev_t topfsid; 460 461 error = 0; 462 463 /* 464 * If v_type == VNON it is a new node, so fill in the v_type, 465 * n_mtime fields. Check to see if it represents a special 466 * device, and if so, check for a possible alias. Once the 467 * correct vnode has been obtained, fill in the rest of the 468 * information. 469 */ 470 np = VTONFS(vp); 471 NFSLOCKNODE(np); 472 if (vp->v_type != nvap->va_type) { 473 vp->v_type = nvap->va_type; 474 if (vp->v_type == VFIFO) 475 vp->v_op = &newnfs_fifoops; 476 np->n_mtime = nvap->va_mtime; 477 } 478 nmp = VFSTONFS(vp->v_mount); 479 vap = &np->n_vattr.na_vattr; 480 mtime_save = vap->va_mtime; 481 if (writeattr) { 482 np->n_vattr.na_filerev = nap->na_filerev; 483 np->n_vattr.na_size = nap->na_size; 484 np->n_vattr.na_mtime = nap->na_mtime; 485 np->n_vattr.na_ctime = nap->na_ctime; 486 np->n_vattr.na_btime = nap->na_btime; 487 np->n_vattr.na_fsid = nap->na_fsid; 488 np->n_vattr.na_mode = nap->na_mode; 489 } else { 490 force_fid_err = 0; 491 KFAIL_POINT_ERROR(DEBUG_FP, nfscl_force_fileid_warning, 492 force_fid_err); 493 /* 494 * BROKEN NFS SERVER OR MIDDLEWARE 495 * 496 * Certain NFS servers (certain old proprietary filers ca. 497 * 2006) or broken middleboxes (e.g. WAN accelerator products) 498 * will respond to GETATTR requests with results for a 499 * different fileid. 500 * 501 * The WAN accelerator we've observed not only serves stale 502 * cache results for a given file, it also occasionally serves 503 * results for wholly different files. This causes surprising 504 * problems; for example the cached size attribute of a file 505 * may truncate down and then back up, resulting in zero 506 * regions in file contents read by applications. We observed 507 * this reliably with Clang and .c files during parallel build. 508 * A pcap revealed packet fragmentation and GETATTR RPC 509 * responses with wholly wrong fileids. 510 * For the case where the file handle is a fake one 511 * generated via the "syskrb5" mount option and 512 * the old fileid is 2, ignore the test, since this might 513 * be replacing the fake attributes with correct ones. 514 */ 515 if ((np->n_vattr.na_fileid != 0 && 516 np->n_vattr.na_fileid != nap->na_fileid && 517 (np->n_vattr.na_fileid != 2 || !NFSHASSYSKRB5(nmp) || 518 np->n_fhp->nfh_len != NFSX_FHMAX + 1)) || 519 force_fid_err) { 520 nfscl_warn_fileid(nmp, &np->n_vattr, nap); 521 error = EIDRM; 522 goto out; 523 } 524 NFSBCOPY((caddr_t)nap, (caddr_t)&np->n_vattr, 525 sizeof (struct nfsvattr)); 526 } 527 528 /* 529 * For NFSv4, the server's export may be a tree of file systems 530 * where a fileno is a unique value within each file system. 531 * na_filesid[0,1] uniquely identify the server file system 532 * and nm_fsid[0,1] is the value for the root file system mounted. 533 * As such, the value of va_fsid generated by vn_fsid() represents 534 * the root file system on the server and a different value for 535 * va_fsid is needed for the other server file systems. This 536 * va_fsid is ideally unique for all of the server file systems, 537 * so a 64bit hash on na_filesid[0,1] is calculated. 538 * Although highly unlikely that the fnv_64_hash() will be 539 * the same as the root, test for this case and recalculate the hash. 540 */ 541 vn_fsid(vp, vap); 542 if (NFSHASNFSV4(nmp) && NFSHASHASSETFSID(nmp) && 543 (nmp->nm_fsid[0] != np->n_vattr.na_filesid[0] || 544 nmp->nm_fsid[1] != np->n_vattr.na_filesid[1])) { 545 topfsid = vap->va_fsid; 546 vap->va_fsid = FNV1_64_INIT; 547 do { 548 vap->va_fsid = fnv_64_buf(np->n_vattr.na_filesid, 549 sizeof(np->n_vattr.na_filesid), vap->va_fsid); 550 } while (vap->va_fsid == topfsid); 551 } 552 553 np->n_attrstamp = time_second; 554 if (vap->va_size != np->n_size) { 555 if (vap->va_type == VREG) { 556 if (dontshrink && vap->va_size < np->n_size) { 557 /* 558 * We've been told not to shrink the file; 559 * zero np->n_attrstamp to indicate that 560 * the attributes are stale. 561 */ 562 vap->va_size = np->n_size; 563 np->n_attrstamp = 0; 564 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 565 } else if (np->n_flag & NMODIFIED) { 566 /* 567 * We've modified the file: Use the larger 568 * of our size, and the server's size. 569 */ 570 if (vap->va_size < np->n_size) { 571 vap->va_size = np->n_size; 572 } else { 573 np->n_size = vap->va_size; 574 np->n_flag |= NSIZECHANGED; 575 } 576 } else { 577 np->n_size = vap->va_size; 578 np->n_flag |= NSIZECHANGED; 579 } 580 } else { 581 np->n_size = vap->va_size; 582 } 583 } 584 /* 585 * The following checks are added to prevent a race between (say) 586 * a READDIR+ and a WRITE. 587 * READDIR+, WRITE requests sent out. 588 * READDIR+ resp, WRITE resp received on client. 589 * However, the WRITE resp was handled before the READDIR+ resp 590 * causing the post op attrs from the write to be loaded first 591 * and the attrs from the READDIR+ to be loaded later. If this 592 * happens, we have stale attrs loaded into the attrcache. 593 * We detect this by for the mtime moving back. We invalidate the 594 * attrcache when this happens. 595 */ 596 if (timespeccmp(&mtime_save, &vap->va_mtime, >)) { 597 /* Size changed or mtime went backwards */ 598 np->n_attrstamp = 0; 599 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 600 } 601 if (vaper != NULL) { 602 ncl_copy_vattr(vaper, vap); 603 if (np->n_flag & NCHG) { 604 if (np->n_flag & NACC) 605 vaper->va_atime = np->n_atim; 606 if (np->n_flag & NUPD) 607 vaper->va_mtime = np->n_mtim; 608 } 609 } 610 611 out: 612 #ifdef KDTRACE_HOOKS 613 if (np->n_attrstamp != 0) 614 KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, vap, error); 615 #endif 616 (void)ncl_pager_setsize(vp, NULL); 617 return (error); 618 } 619 620 /* 621 * Call vnode_pager_setsize() if the size of the node changed, as 622 * recorded in nfsnode vs. v_object, or delay the call if notifying 623 * the pager is not possible at the moment. 624 * 625 * If nsizep is non-NULL, the call is delayed and the new node size is 626 * provided. Caller should itself call vnode_pager_setsize() if 627 * function returned true. If nsizep is NULL, function tries to call 628 * vnode_pager_setsize() itself if needed and possible, and the nfs 629 * node is unlocked unconditionally, the return value is not useful. 630 */ 631 bool 632 ncl_pager_setsize(struct vnode *vp, u_quad_t *nsizep) 633 { 634 struct nfsnode *np; 635 vm_object_t object; 636 struct vattr *vap; 637 u_quad_t nsize; 638 bool setnsize; 639 640 np = VTONFS(vp); 641 NFSASSERTNODE(np); 642 643 vap = &np->n_vattr.na_vattr; 644 nsize = vap->va_size; 645 object = vp->v_object; 646 setnsize = false; 647 648 if (object != NULL && nsize != object->un_pager.vnp.vnp_size) { 649 if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE && 650 (curthread->td_pflags2 & TDP2_SBPAGES) == 0) 651 setnsize = true; 652 else 653 np->n_flag |= NVNSETSZSKIP; 654 } 655 if (nsizep == NULL) { 656 NFSUNLOCKNODE(np); 657 if (setnsize) 658 vnode_pager_setsize(vp, nsize); 659 setnsize = false; 660 } else { 661 *nsizep = nsize; 662 } 663 return (setnsize); 664 } 665 666 /* 667 * Fill in the client id name. For these bytes: 668 * 1 - they must be unique 669 * 2 - they should be persistent across client reboots 670 * 1 is more critical than 2 671 * Use the mount point's unique id plus either the uuid or, if that 672 * isn't set, random junk. 673 */ 674 void 675 nfscl_fillclid(u_int64_t clval, char *uuid, u_int8_t *cp, u_int16_t idlen) 676 { 677 int uuidlen; 678 679 /* 680 * First, put in the 64bit mount point identifier. 681 */ 682 if (idlen >= sizeof (u_int64_t)) { 683 NFSBCOPY((caddr_t)&clval, cp, sizeof (u_int64_t)); 684 cp += sizeof (u_int64_t); 685 idlen -= sizeof (u_int64_t); 686 } 687 688 /* 689 * If uuid is non-zero length, use it. 690 */ 691 uuidlen = strlen(uuid); 692 if (uuidlen > 0 && idlen >= uuidlen) { 693 NFSBCOPY(uuid, cp, uuidlen); 694 cp += uuidlen; 695 idlen -= uuidlen; 696 } 697 698 /* 699 * This only normally happens if the uuid isn't set. 700 */ 701 while (idlen > 0) { 702 *cp++ = (u_int8_t)(arc4random() % 256); 703 idlen--; 704 } 705 } 706 707 /* 708 * Fill in a lock owner name. For now, pid + the process's creation time. 709 */ 710 void 711 nfscl_filllockowner(void *id, u_int8_t *cp, int flags) 712 { 713 union { 714 u_int32_t lval; 715 u_int8_t cval[4]; 716 } tl; 717 struct proc *p; 718 719 if (id == NULL) { 720 /* Return the single open_owner of all 0 bytes. */ 721 bzero(cp, NFSV4CL_LOCKNAMELEN); 722 return; 723 } 724 if ((flags & F_POSIX) != 0) { 725 p = (struct proc *)id; 726 tl.lval = p->p_pid; 727 *cp++ = tl.cval[0]; 728 *cp++ = tl.cval[1]; 729 *cp++ = tl.cval[2]; 730 *cp++ = tl.cval[3]; 731 tl.lval = p->p_stats->p_start.tv_sec; 732 *cp++ = tl.cval[0]; 733 *cp++ = tl.cval[1]; 734 *cp++ = tl.cval[2]; 735 *cp++ = tl.cval[3]; 736 tl.lval = p->p_stats->p_start.tv_usec; 737 *cp++ = tl.cval[0]; 738 *cp++ = tl.cval[1]; 739 *cp++ = tl.cval[2]; 740 *cp = tl.cval[3]; 741 } else if ((flags & F_FLOCK) != 0) { 742 bcopy(&id, cp, sizeof(id)); 743 bzero(&cp[sizeof(id)], NFSV4CL_LOCKNAMELEN - sizeof(id)); 744 } else { 745 printf("nfscl_filllockowner: not F_POSIX or F_FLOCK\n"); 746 bzero(cp, NFSV4CL_LOCKNAMELEN); 747 } 748 } 749 750 /* 751 * Find the parent process for the thread passed in as an argument. 752 * If none exists, return NULL, otherwise return a thread for the parent. 753 * (Can be any of the threads, since it is only used for td->td_proc.) 754 */ 755 NFSPROC_T * 756 nfscl_getparent(struct thread *td) 757 { 758 struct proc *p; 759 struct thread *ptd; 760 761 if (td == NULL) 762 return (NULL); 763 p = td->td_proc; 764 if (p->p_pid == 0) 765 return (NULL); 766 p = p->p_pptr; 767 if (p == NULL) 768 return (NULL); 769 ptd = TAILQ_FIRST(&p->p_threads); 770 return (ptd); 771 } 772 773 /* 774 * Start up the renew kernel thread. 775 */ 776 static void 777 start_nfscl(void *arg) 778 { 779 struct nfsclclient *clp; 780 struct thread *td; 781 782 clp = (struct nfsclclient *)arg; 783 td = TAILQ_FIRST(&clp->nfsc_renewthread->p_threads); 784 nfscl_renewthread(clp, td); 785 kproc_exit(0); 786 } 787 788 void 789 nfscl_start_renewthread(struct nfsclclient *clp) 790 { 791 792 kproc_create(start_nfscl, (void *)clp, &clp->nfsc_renewthread, 0, 0, 793 "nfscl"); 794 } 795 796 /* 797 * Handle wcc_data. 798 * For NFSv4, it assumes that nfsv4_wccattr() was used to set up the getattr 799 * as the first Op after PutFH. 800 * (For NFSv4, the postop attributes are after the Op, so they can't be 801 * parsed here. A separate call to nfscl_postop_attr() is required.) 802 */ 803 int 804 nfscl_wcc_data(struct nfsrv_descript *nd, struct vnode *vp, 805 struct nfsvattr *nap, int *flagp, int *wccflagp, uint64_t *repsizep) 806 { 807 u_int32_t *tl; 808 struct nfsnode *np = VTONFS(vp); 809 struct nfsvattr nfsva; 810 int error = 0; 811 812 if (wccflagp != NULL) 813 *wccflagp = 0; 814 if (nd->nd_flag & ND_NFSV3) { 815 *flagp = 0; 816 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 817 if (*tl == newnfs_true) { 818 NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); 819 if (wccflagp != NULL) { 820 NFSLOCKNODE(np); 821 *wccflagp = (np->n_mtime.tv_sec == 822 fxdr_unsigned(u_int32_t, *(tl + 2)) && 823 np->n_mtime.tv_nsec == 824 fxdr_unsigned(u_int32_t, *(tl + 3))); 825 NFSUNLOCKNODE(np); 826 } 827 } 828 error = nfscl_postop_attr(nd, nap, flagp); 829 if (wccflagp != NULL && *flagp == 0) 830 *wccflagp = 0; 831 } else if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) 832 == (ND_NFSV4 | ND_V4WCCATTR)) { 833 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, 834 NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, 835 NULL, NULL, NULL, NULL, NULL); 836 if (error) 837 return (error); 838 /* 839 * Get rid of Op# and status for next op. 840 */ 841 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 842 if (*++tl) 843 nd->nd_flag |= ND_NOMOREDATA; 844 if (repsizep != NULL) 845 *repsizep = nfsva.na_size; 846 if (wccflagp != NULL && 847 nfsva.na_vattr.va_mtime.tv_sec != 0) { 848 NFSLOCKNODE(np); 849 *wccflagp = (np->n_mtime.tv_sec == 850 nfsva.na_vattr.va_mtime.tv_sec && 851 np->n_mtime.tv_nsec == 852 nfsva.na_vattr.va_mtime.tv_sec); 853 NFSUNLOCKNODE(np); 854 } 855 } 856 nfsmout: 857 return (error); 858 } 859 860 /* 861 * Get postop attributes. 862 */ 863 int 864 nfscl_postop_attr(struct nfsrv_descript *nd, struct nfsvattr *nap, int *retp) 865 { 866 u_int32_t *tl; 867 int error = 0; 868 869 *retp = 0; 870 if (nd->nd_flag & ND_NOMOREDATA) 871 return (error); 872 if (nd->nd_flag & ND_NFSV3) { 873 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); 874 *retp = fxdr_unsigned(int, *tl); 875 } else if (nd->nd_flag & ND_NFSV4) { 876 /* 877 * For NFSv4, the postop attr are at the end, so no point 878 * in looking if nd_repstat != 0. 879 */ 880 if (!nd->nd_repstat) { 881 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 882 if (*(tl + 1)) 883 /* should never happen since nd_repstat != 0 */ 884 nd->nd_flag |= ND_NOMOREDATA; 885 else 886 *retp = 1; 887 } 888 } else if (!nd->nd_repstat) { 889 /* For NFSv2, the attributes are here iff nd_repstat == 0 */ 890 *retp = 1; 891 } 892 if (*retp) { 893 error = nfsm_loadattr(nd, nap); 894 if (error) 895 *retp = 0; 896 } 897 nfsmout: 898 return (error); 899 } 900 901 /* 902 * nfscl_request() - mostly a wrapper for newnfs_request(). 903 */ 904 int 905 nfscl_request(struct nfsrv_descript *nd, struct vnode *vp, NFSPROC_T *p, 906 struct ucred *cred) 907 { 908 int ret, vers; 909 struct nfsmount *nmp; 910 911 nmp = VFSTONFS(vp->v_mount); 912 if (nd->nd_flag & ND_NFSV4) 913 vers = NFS_VER4; 914 else if (nd->nd_flag & ND_NFSV3) 915 vers = NFS_VER3; 916 else 917 vers = NFS_VER2; 918 ret = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, 919 NFS_PROG, vers, NULL, 1, NULL, NULL); 920 return (ret); 921 } 922 923 /* 924 * fill in this bsden's variant of statfs using nfsstatfs. 925 */ 926 void 927 nfscl_loadsbinfo(struct nfsmount *nmp, struct nfsstatfs *sfp, void *statfs) 928 { 929 struct statfs *sbp = (struct statfs *)statfs; 930 931 if (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) { 932 sbp->f_bsize = NFS_FABLKSIZE; 933 sbp->f_blocks = sfp->sf_tbytes / NFS_FABLKSIZE; 934 sbp->f_bfree = sfp->sf_fbytes / NFS_FABLKSIZE; 935 /* 936 * Although sf_abytes is uint64_t and f_bavail is int64_t, 937 * the value after dividing by NFS_FABLKSIZE is small 938 * enough that it will fit in 63bits, so it is ok to 939 * assign it to f_bavail without fear that it will become 940 * negative. 941 */ 942 sbp->f_bavail = sfp->sf_abytes / NFS_FABLKSIZE; 943 sbp->f_files = sfp->sf_tfiles; 944 /* Since f_ffree is int64_t, clip it to 63bits. */ 945 if (sfp->sf_ffiles > INT64_MAX) 946 sbp->f_ffree = INT64_MAX; 947 else 948 sbp->f_ffree = sfp->sf_ffiles; 949 } else if ((nmp->nm_flag & NFSMNT_NFSV4) == 0) { 950 /* 951 * The type casts to (int32_t) ensure that this code is 952 * compatible with the old NFS client, in that it will 953 * propagate bit31 to the high order bits. This may or may 954 * not be correct for NFSv2, but since it is a legacy 955 * environment, I'd rather retain backwards compatibility. 956 */ 957 sbp->f_bsize = (int32_t)sfp->sf_bsize; 958 sbp->f_blocks = (int32_t)sfp->sf_blocks; 959 sbp->f_bfree = (int32_t)sfp->sf_bfree; 960 sbp->f_bavail = (int32_t)sfp->sf_bavail; 961 sbp->f_files = 0; 962 sbp->f_ffree = 0; 963 } 964 } 965 966 /* 967 * Use the fsinfo stuff to update the mount point. 968 */ 969 void 970 nfscl_loadfsinfo(struct nfsmount *nmp, struct nfsfsinfo *fsp) 971 { 972 973 if ((nmp->nm_wsize == 0 || fsp->fs_wtpref < nmp->nm_wsize) && 974 fsp->fs_wtpref >= NFS_FABLKSIZE) 975 nmp->nm_wsize = (fsp->fs_wtpref + NFS_FABLKSIZE - 1) & 976 ~(NFS_FABLKSIZE - 1); 977 if (fsp->fs_wtmax < nmp->nm_wsize && fsp->fs_wtmax > 0) { 978 nmp->nm_wsize = fsp->fs_wtmax & ~(NFS_FABLKSIZE - 1); 979 if (nmp->nm_wsize == 0) 980 nmp->nm_wsize = fsp->fs_wtmax; 981 } 982 if (nmp->nm_wsize < NFS_FABLKSIZE) 983 nmp->nm_wsize = NFS_FABLKSIZE; 984 if ((nmp->nm_rsize == 0 || fsp->fs_rtpref < nmp->nm_rsize) && 985 fsp->fs_rtpref >= NFS_FABLKSIZE) 986 nmp->nm_rsize = (fsp->fs_rtpref + NFS_FABLKSIZE - 1) & 987 ~(NFS_FABLKSIZE - 1); 988 if (fsp->fs_rtmax < nmp->nm_rsize && fsp->fs_rtmax > 0) { 989 nmp->nm_rsize = fsp->fs_rtmax & ~(NFS_FABLKSIZE - 1); 990 if (nmp->nm_rsize == 0) 991 nmp->nm_rsize = fsp->fs_rtmax; 992 } 993 if (nmp->nm_rsize < NFS_FABLKSIZE) 994 nmp->nm_rsize = NFS_FABLKSIZE; 995 if ((nmp->nm_readdirsize == 0 || fsp->fs_dtpref < nmp->nm_readdirsize) 996 && fsp->fs_dtpref >= NFS_DIRBLKSIZ) 997 nmp->nm_readdirsize = (fsp->fs_dtpref + NFS_DIRBLKSIZ - 1) & 998 ~(NFS_DIRBLKSIZ - 1); 999 if (fsp->fs_rtmax < nmp->nm_readdirsize && fsp->fs_rtmax > 0) { 1000 nmp->nm_readdirsize = fsp->fs_rtmax & ~(NFS_DIRBLKSIZ - 1); 1001 if (nmp->nm_readdirsize == 0) 1002 nmp->nm_readdirsize = fsp->fs_rtmax; 1003 } 1004 if (nmp->nm_readdirsize < NFS_DIRBLKSIZ) 1005 nmp->nm_readdirsize = NFS_DIRBLKSIZ; 1006 if (fsp->fs_maxfilesize > 0 && 1007 fsp->fs_maxfilesize < nmp->nm_maxfilesize) 1008 nmp->nm_maxfilesize = fsp->fs_maxfilesize; 1009 nmp->nm_mountp->mnt_stat.f_iosize = newnfs_iosize(nmp); 1010 nmp->nm_state |= NFSSTA_GOTFSINFO; 1011 } 1012 1013 /* 1014 * Lookups source address which should be used to communicate with 1015 * @nmp and stores it inside @pdst. 1016 * 1017 * Returns 0 on success. 1018 */ 1019 u_int8_t * 1020 nfscl_getmyip(struct nfsmount *nmp, struct in6_addr *paddr, int *isinet6p) 1021 { 1022 #if defined(INET6) || defined(INET) 1023 int fibnum; 1024 1025 fibnum = curthread->td_proc->p_fibnum; 1026 #endif 1027 #ifdef INET 1028 if (nmp->nm_nam->sa_family == AF_INET) { 1029 struct epoch_tracker et; 1030 struct nhop_object *nh; 1031 struct sockaddr_in *sin; 1032 struct in_addr addr = {}; 1033 1034 sin = (struct sockaddr_in *)nmp->nm_nam; 1035 NET_EPOCH_ENTER(et); 1036 CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); 1037 nh = fib4_lookup(fibnum, sin->sin_addr, 0, NHR_NONE, 0); 1038 if (nh != NULL) { 1039 addr = IA_SIN(ifatoia(nh->nh_ifa))->sin_addr; 1040 if (IN_LOOPBACK(ntohl(addr.s_addr))) { 1041 /* Ignore loopback addresses */ 1042 nh = NULL; 1043 } 1044 } 1045 CURVNET_RESTORE(); 1046 NET_EPOCH_EXIT(et); 1047 1048 if (nh == NULL) 1049 return (NULL); 1050 *isinet6p = 0; 1051 *((struct in_addr *)paddr) = addr; 1052 1053 return (u_int8_t *)paddr; 1054 } 1055 #endif 1056 #ifdef INET6 1057 if (nmp->nm_nam->sa_family == AF_INET6) { 1058 struct epoch_tracker et; 1059 struct sockaddr_in6 *sin6; 1060 int error; 1061 1062 sin6 = (struct sockaddr_in6 *)nmp->nm_nam; 1063 1064 NET_EPOCH_ENTER(et); 1065 CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); 1066 error = in6_selectsrc_addr(fibnum, &sin6->sin6_addr, 1067 sin6->sin6_scope_id, NULL, paddr, NULL); 1068 CURVNET_RESTORE(); 1069 NET_EPOCH_EXIT(et); 1070 if (error != 0) 1071 return (NULL); 1072 1073 if (IN6_IS_ADDR_LOOPBACK(paddr)) 1074 return (NULL); 1075 1076 /* Scope is embedded in */ 1077 *isinet6p = 1; 1078 1079 return (u_int8_t *)paddr; 1080 } 1081 #endif 1082 return (NULL); 1083 } 1084 1085 /* 1086 * Copy NFS uid, gids from the cred structure. 1087 */ 1088 void 1089 newnfs_copyincred(struct ucred *cr, struct nfscred *nfscr) 1090 { 1091 int i; 1092 1093 KASSERT(cr->cr_ngroups >= 0, 1094 ("newnfs_copyincred: negative cr_ngroups")); 1095 nfscr->nfsc_uid = cr->cr_uid; 1096 nfscr->nfsc_ngroups = MIN(cr->cr_ngroups, NFS_MAXGRPS + 1); 1097 for (i = 0; i < nfscr->nfsc_ngroups; i++) 1098 nfscr->nfsc_groups[i] = cr->cr_groups[i]; 1099 } 1100 1101 /* 1102 * Do any client specific initialization. 1103 */ 1104 void 1105 nfscl_init(void) 1106 { 1107 static int inited = 0; 1108 1109 if (inited) 1110 return; 1111 inited = 1; 1112 nfscl_inited = 1; 1113 ncl_pbuf_zone = pbuf_zsecond_create("nfspbuf", nswbuf / 2); 1114 } 1115 1116 /* 1117 * Check each of the attributes to be set, to ensure they aren't already 1118 * the correct value. Disable setting ones already correct. 1119 */ 1120 int 1121 nfscl_checksattr(struct vattr *vap, struct nfsvattr *nvap) 1122 { 1123 1124 if (vap->va_mode != (mode_t)VNOVAL) { 1125 if (vap->va_mode == nvap->na_mode) 1126 vap->va_mode = (mode_t)VNOVAL; 1127 } 1128 if (vap->va_uid != (uid_t)VNOVAL) { 1129 if (vap->va_uid == nvap->na_uid) 1130 vap->va_uid = (uid_t)VNOVAL; 1131 } 1132 if (vap->va_gid != (gid_t)VNOVAL) { 1133 if (vap->va_gid == nvap->na_gid) 1134 vap->va_gid = (gid_t)VNOVAL; 1135 } 1136 if (vap->va_size != VNOVAL) { 1137 if (vap->va_size == nvap->na_size) 1138 vap->va_size = VNOVAL; 1139 } 1140 1141 /* 1142 * We are normally called with only a partially initialized 1143 * VAP. Since the NFSv3 spec says that server may use the 1144 * file attributes to store the verifier, the spec requires 1145 * us to do a SETATTR RPC. FreeBSD servers store the verifier 1146 * in atime, but we can't really assume that all servers will 1147 * so we ensure that our SETATTR sets both atime and mtime. 1148 * Set the VA_UTIMES_NULL flag for this case, so that 1149 * the server's time will be used. This is needed to 1150 * work around a bug in some Solaris servers, where 1151 * setting the time TOCLIENT causes the Setattr RPC 1152 * to return NFS_OK, but not set va_mode. 1153 */ 1154 if (vap->va_mtime.tv_sec == VNOVAL) { 1155 vfs_timestamp(&vap->va_mtime); 1156 vap->va_vaflags |= VA_UTIMES_NULL; 1157 } 1158 if (vap->va_atime.tv_sec == VNOVAL) 1159 vap->va_atime = vap->va_mtime; 1160 return (1); 1161 } 1162 1163 /* 1164 * Map nfsv4 errors to errno.h errors. 1165 * The uid and gid arguments are only used for NFSERR_BADOWNER and that 1166 * error should only be returned for the Open, Create and Setattr Ops. 1167 * As such, most calls can just pass in 0 for those arguments. 1168 */ 1169 int 1170 nfscl_maperr(struct thread *td, int error, uid_t uid, gid_t gid) 1171 { 1172 struct proc *p; 1173 1174 if (error < 10000 || error >= NFSERR_STALEWRITEVERF) 1175 return (error); 1176 if (td != NULL) 1177 p = td->td_proc; 1178 else 1179 p = NULL; 1180 switch (error) { 1181 case NFSERR_BADOWNER: 1182 tprintf(p, LOG_INFO, 1183 "No name and/or group mapping for uid,gid:(%d,%d)\n", 1184 uid, gid); 1185 return (EPERM); 1186 case NFSERR_BADNAME: 1187 case NFSERR_BADCHAR: 1188 printf("nfsv4 char/name not handled by server\n"); 1189 return (ENOENT); 1190 case NFSERR_STALECLIENTID: 1191 case NFSERR_STALESTATEID: 1192 case NFSERR_EXPIRED: 1193 case NFSERR_BADSTATEID: 1194 case NFSERR_BADSESSION: 1195 printf("nfsv4 recover err returned %d\n", error); 1196 return (EIO); 1197 case NFSERR_BADHANDLE: 1198 case NFSERR_SERVERFAULT: 1199 case NFSERR_BADTYPE: 1200 case NFSERR_FHEXPIRED: 1201 case NFSERR_RESOURCE: 1202 case NFSERR_MOVED: 1203 case NFSERR_MINORVERMISMATCH: 1204 case NFSERR_OLDSTATEID: 1205 case NFSERR_BADSEQID: 1206 case NFSERR_LEASEMOVED: 1207 case NFSERR_RECLAIMBAD: 1208 case NFSERR_BADXDR: 1209 case NFSERR_OPILLEGAL: 1210 printf("nfsv4 client/server protocol prob err=%d\n", 1211 error); 1212 return (EIO); 1213 case NFSERR_NOFILEHANDLE: 1214 printf("nfsv4 no file handle: usually means the file " 1215 "system is not exported on the NFSv4 server\n"); 1216 return (EIO); 1217 case NFSERR_WRONGSEC: 1218 tprintf(p, LOG_INFO, "NFSv4 error WrongSec: You probably need a" 1219 " Kerberos TGT\n"); 1220 return (EIO); 1221 default: 1222 tprintf(p, LOG_INFO, "nfsv4 err=%d\n", error); 1223 return (EIO); 1224 }; 1225 } 1226 1227 /* 1228 * Check to see if the process for this owner exists. Return 1 if it doesn't 1229 * and 0 otherwise. 1230 */ 1231 int 1232 nfscl_procdoesntexist(u_int8_t *own) 1233 { 1234 union { 1235 u_int32_t lval; 1236 u_int8_t cval[4]; 1237 } tl; 1238 struct proc *p; 1239 pid_t pid; 1240 int i, ret = 0; 1241 1242 /* For the single open_owner of all 0 bytes, just return 0. */ 1243 for (i = 0; i < NFSV4CL_LOCKNAMELEN; i++) 1244 if (own[i] != 0) 1245 break; 1246 if (i == NFSV4CL_LOCKNAMELEN) 1247 return (0); 1248 1249 tl.cval[0] = *own++; 1250 tl.cval[1] = *own++; 1251 tl.cval[2] = *own++; 1252 tl.cval[3] = *own++; 1253 pid = tl.lval; 1254 p = pfind_any_locked(pid); 1255 if (p == NULL) 1256 return (1); 1257 if (p->p_stats == NULL) { 1258 PROC_UNLOCK(p); 1259 return (0); 1260 } 1261 tl.cval[0] = *own++; 1262 tl.cval[1] = *own++; 1263 tl.cval[2] = *own++; 1264 tl.cval[3] = *own++; 1265 if (tl.lval != p->p_stats->p_start.tv_sec) { 1266 ret = 1; 1267 } else { 1268 tl.cval[0] = *own++; 1269 tl.cval[1] = *own++; 1270 tl.cval[2] = *own++; 1271 tl.cval[3] = *own; 1272 if (tl.lval != p->p_stats->p_start.tv_usec) 1273 ret = 1; 1274 } 1275 PROC_UNLOCK(p); 1276 return (ret); 1277 } 1278 1279 /* 1280 * - nfs pseudo system call for the client 1281 */ 1282 /* 1283 * MPSAFE 1284 */ 1285 static int 1286 nfssvc_nfscl(struct thread *td, struct nfssvc_args *uap) 1287 { 1288 struct file *fp; 1289 struct nfscbd_args nfscbdarg; 1290 struct nfsd_nfscbd_args nfscbdarg2; 1291 struct nameidata nd; 1292 struct nfscl_dumpmntopts dumpmntopts; 1293 cap_rights_t rights; 1294 char *buf; 1295 int error; 1296 struct mount *mp; 1297 struct nfsmount *nmp; 1298 1299 NFSD_CURVNET_SET(NFSD_TD_TO_VNET(td)); 1300 if (uap->flag & NFSSVC_CBADDSOCK) { 1301 error = copyin(uap->argp, (caddr_t)&nfscbdarg, sizeof(nfscbdarg)); 1302 if (error) 1303 goto out; 1304 /* 1305 * Since we don't know what rights might be required, 1306 * pretend that we need them all. It is better to be too 1307 * careful than too reckless. 1308 */ 1309 error = fget(td, nfscbdarg.sock, 1310 cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp); 1311 if (error) 1312 goto out; 1313 if (fp->f_type != DTYPE_SOCKET) { 1314 fdrop(fp, td); 1315 error = EPERM; 1316 goto out; 1317 } 1318 error = nfscbd_addsock(fp); 1319 fdrop(fp, td); 1320 if (!error && nfscl_enablecallb == 0) { 1321 nfsv4_cbport = nfscbdarg.port; 1322 nfscl_enablecallb = 1; 1323 } 1324 } else if (uap->flag & NFSSVC_NFSCBD) { 1325 if (uap->argp == NULL) { 1326 error = EINVAL; 1327 goto out; 1328 } 1329 error = copyin(uap->argp, (caddr_t)&nfscbdarg2, 1330 sizeof(nfscbdarg2)); 1331 if (error) 1332 goto out; 1333 error = nfscbd_nfsd(td, &nfscbdarg2); 1334 } else if (uap->flag & NFSSVC_DUMPMNTOPTS) { 1335 error = copyin(uap->argp, &dumpmntopts, sizeof(dumpmntopts)); 1336 if (error == 0 && (dumpmntopts.ndmnt_blen < 256 || 1337 dumpmntopts.ndmnt_blen > 1024)) 1338 error = EINVAL; 1339 if (error == 0) 1340 error = nfsrv_lookupfilename(&nd, 1341 dumpmntopts.ndmnt_fname, td); 1342 if (error == 0 && strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, 1343 "nfs") != 0) { 1344 vput(nd.ni_vp); 1345 error = EINVAL; 1346 } 1347 if (error == 0) { 1348 buf = malloc(dumpmntopts.ndmnt_blen, M_TEMP, M_WAITOK | 1349 M_ZERO); 1350 nfscl_retopts(VFSTONFS(nd.ni_vp->v_mount), buf, 1351 dumpmntopts.ndmnt_blen); 1352 vput(nd.ni_vp); 1353 error = copyout(buf, dumpmntopts.ndmnt_buf, 1354 dumpmntopts.ndmnt_blen); 1355 free(buf, M_TEMP); 1356 } 1357 } else if (uap->flag & NFSSVC_FORCEDISM) { 1358 buf = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK); 1359 error = copyinstr(uap->argp, buf, MNAMELEN + 1, NULL); 1360 if (error == 0) { 1361 nmp = NULL; 1362 mtx_lock(&mountlist_mtx); 1363 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 1364 if (strcmp(mp->mnt_stat.f_mntonname, buf) == 1365 0 && strcmp(mp->mnt_stat.f_fstypename, 1366 "nfs") == 0 && mp->mnt_data != NULL) { 1367 nmp = VFSTONFS(mp); 1368 NFSDDSLOCK(); 1369 if (nfsv4_findmirror(nmp) != NULL) { 1370 NFSDDSUNLOCK(); 1371 error = ENXIO; 1372 nmp = NULL; 1373 break; 1374 } 1375 mtx_lock(&nmp->nm_mtx); 1376 if ((nmp->nm_privflag & 1377 NFSMNTP_FORCEDISM) == 0) { 1378 nmp->nm_privflag |= 1379 (NFSMNTP_FORCEDISM | 1380 NFSMNTP_CANCELRPCS); 1381 mtx_unlock(&nmp->nm_mtx); 1382 } else { 1383 mtx_unlock(&nmp->nm_mtx); 1384 nmp = NULL; 1385 } 1386 NFSDDSUNLOCK(); 1387 break; 1388 } 1389 } 1390 mtx_unlock(&mountlist_mtx); 1391 1392 if (nmp != NULL) { 1393 /* 1394 * Call newnfs_nmcancelreqs() to cause 1395 * any RPCs in progress on the mount point to 1396 * fail. 1397 * This will cause any process waiting for an 1398 * RPC to complete while holding a vnode lock 1399 * on the mounted-on vnode (such as "df" or 1400 * a non-forced "umount") to fail. 1401 * This will unlock the mounted-on vnode so 1402 * a forced dismount can succeed. 1403 * Then clear NFSMNTP_CANCELRPCS and wakeup(), 1404 * so that nfs_unmount() can complete. 1405 */ 1406 newnfs_nmcancelreqs(nmp); 1407 mtx_lock(&nmp->nm_mtx); 1408 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; 1409 wakeup(nmp); 1410 mtx_unlock(&nmp->nm_mtx); 1411 } else if (error == 0) 1412 error = EINVAL; 1413 } 1414 free(buf, M_TEMP); 1415 } else { 1416 error = EINVAL; 1417 } 1418 out: 1419 NFSD_CURVNET_RESTORE(); 1420 return (error); 1421 } 1422 1423 extern int (*nfsd_call_nfscl)(struct thread *, struct nfssvc_args *); 1424 1425 /* 1426 * Called once to initialize data structures... 1427 */ 1428 static int 1429 nfscl_modevent(module_t mod, int type, void *data) 1430 { 1431 int error = 0; 1432 static int loaded = 0; 1433 1434 switch (type) { 1435 case MOD_LOAD: 1436 if (loaded) 1437 return (0); 1438 newnfs_portinit(); 1439 mtx_init(&ncl_iod_mutex, "ncl_iod_mutex", NULL, MTX_DEF); 1440 nfscl_init(); 1441 NFSD_LOCK(); 1442 nfsrvd_cbinit(0); 1443 NFSD_UNLOCK(); 1444 ncl_call_invalcaches = ncl_invalcaches; 1445 nfsd_call_nfscl = nfssvc_nfscl; 1446 loaded = 1; 1447 break; 1448 1449 case MOD_UNLOAD: 1450 if (nfs_numnfscbd != 0) { 1451 error = EBUSY; 1452 break; 1453 } 1454 1455 /* 1456 * XXX: Unloading of nfscl module is unsupported. 1457 */ 1458 #if 0 1459 ncl_call_invalcaches = NULL; 1460 nfsd_call_nfscl = NULL; 1461 uma_zdestroy(ncl_pbuf_zone); 1462 /* and get rid of the mutexes */ 1463 mtx_destroy(&ncl_iod_mutex); 1464 loaded = 0; 1465 break; 1466 #else 1467 /* FALLTHROUGH */ 1468 #endif 1469 default: 1470 error = EOPNOTSUPP; 1471 break; 1472 } 1473 return error; 1474 } 1475 static moduledata_t nfscl_mod = { 1476 "nfscl", 1477 nfscl_modevent, 1478 NULL, 1479 }; 1480 /* 1481 * This is the main module declaration for the NFS client. The 1482 * nfscl_modevent() function is needed to ensure that the module 1483 * cannot be unloaded, among other things. 1484 * There is also a module declaration in sys/fs/nfsclient/nfs_clvfsops.c 1485 * for the name "nfs" within the VFS_SET() macro that defines the "nfs" 1486 * file system type. 1487 */ 1488 DECLARE_MODULE(nfscl, nfscl_mod, SI_SUB_VFS, SI_ORDER_FIRST); 1489 1490 /* So that loader and kldload(2) can find us, wherever we are.. */ 1491 MODULE_VERSION(nfscl, 1); 1492 MODULE_DEPEND(nfscl, nfscommon, 1, 1, 1); 1493 MODULE_DEPEND(nfscl, krpc, 1, 1, 1); 1494 MODULE_DEPEND(nfscl, nfssvc, 1, 1, 1); 1495 MODULE_DEPEND(nfscl, xdr, 1, 1, 1); 1496