1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from nfs_vnops.c 8.16 (Berkeley) 5/27/95 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 /* 41 * vnode op calls for Sun NFS version 2, 3 and 4 42 */ 43 44 #include "opt_inet.h" 45 46 #include <sys/param.h> 47 #include <sys/kernel.h> 48 #include <sys/systm.h> 49 #include <sys/resourcevar.h> 50 #include <sys/proc.h> 51 #include <sys/mount.h> 52 #include <sys/bio.h> 53 #include <sys/buf.h> 54 #include <sys/extattr.h> 55 #include <sys/filio.h> 56 #include <sys/jail.h> 57 #include <sys/malloc.h> 58 #include <sys/mbuf.h> 59 #include <sys/namei.h> 60 #include <sys/socket.h> 61 #include <sys/vnode.h> 62 #include <sys/dirent.h> 63 #include <sys/fcntl.h> 64 #include <sys/lockf.h> 65 #include <sys/stat.h> 66 #include <sys/sysctl.h> 67 #include <sys/signalvar.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_extern.h> 71 #include <vm/vm_object.h> 72 73 #include <fs/nfs/nfsport.h> 74 #include <fs/nfsclient/nfsnode.h> 75 #include <fs/nfsclient/nfsmount.h> 76 #include <fs/nfsclient/nfs.h> 77 #include <fs/nfsclient/nfs_kdtrace.h> 78 79 #include <net/if.h> 80 #include <netinet/in.h> 81 #include <netinet/in_var.h> 82 83 #include <nfs/nfs_lock.h> 84 85 #ifdef KDTRACE_HOOKS 86 #include <sys/dtrace_bsd.h> 87 88 dtrace_nfsclient_accesscache_flush_probe_func_t 89 dtrace_nfscl_accesscache_flush_done_probe; 90 uint32_t nfscl_accesscache_flush_done_id; 91 92 dtrace_nfsclient_accesscache_get_probe_func_t 93 dtrace_nfscl_accesscache_get_hit_probe, 94 dtrace_nfscl_accesscache_get_miss_probe; 95 uint32_t nfscl_accesscache_get_hit_id; 96 uint32_t nfscl_accesscache_get_miss_id; 97 98 dtrace_nfsclient_accesscache_load_probe_func_t 99 dtrace_nfscl_accesscache_load_done_probe; 100 uint32_t nfscl_accesscache_load_done_id; 101 #endif /* !KDTRACE_HOOKS */ 102 103 /* Defs */ 104 #define TRUE 1 105 #define FALSE 0 106 107 extern struct nfsstatsv1 nfsstatsv1; 108 extern int nfsrv_useacl; 109 extern int nfscl_debuglevel; 110 MALLOC_DECLARE(M_NEWNFSREQ); 111 112 static vop_read_t nfsfifo_read; 113 static vop_write_t nfsfifo_write; 114 static vop_close_t nfsfifo_close; 115 static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *, 116 struct thread *); 117 static vop_lookup_t nfs_lookup; 118 static vop_create_t nfs_create; 119 static vop_mknod_t nfs_mknod; 120 static vop_open_t nfs_open; 121 static vop_pathconf_t nfs_pathconf; 122 static vop_close_t nfs_close; 123 static vop_access_t nfs_access; 124 static vop_getattr_t nfs_getattr; 125 static vop_setattr_t nfs_setattr; 126 static vop_read_t nfs_read; 127 static vop_fsync_t nfs_fsync; 128 static vop_remove_t nfs_remove; 129 static vop_link_t nfs_link; 130 static vop_rename_t nfs_rename; 131 static vop_mkdir_t nfs_mkdir; 132 static vop_rmdir_t nfs_rmdir; 133 static vop_symlink_t nfs_symlink; 134 static vop_readdir_t nfs_readdir; 135 static vop_strategy_t nfs_strategy; 136 static int nfs_lookitup(struct vnode *, char *, int, 137 struct ucred *, struct thread *, struct nfsnode **); 138 static int nfs_sillyrename(struct vnode *, struct vnode *, 139 struct componentname *); 140 static vop_access_t nfsspec_access; 141 static vop_readlink_t nfs_readlink; 142 static vop_print_t nfs_print; 143 static vop_advlock_t nfs_advlock; 144 static vop_advlockasync_t nfs_advlockasync; 145 static vop_getacl_t nfs_getacl; 146 static vop_setacl_t nfs_setacl; 147 static vop_advise_t nfs_advise; 148 static vop_allocate_t nfs_allocate; 149 static vop_copy_file_range_t nfs_copy_file_range; 150 static vop_ioctl_t nfs_ioctl; 151 static vop_getextattr_t nfs_getextattr; 152 static vop_setextattr_t nfs_setextattr; 153 static vop_listextattr_t nfs_listextattr; 154 static vop_deleteextattr_t nfs_deleteextattr; 155 static vop_lock1_t nfs_lock; 156 157 /* 158 * Global vfs data structures for nfs 159 */ 160 161 static struct vop_vector newnfs_vnodeops_nosig = { 162 .vop_default = &default_vnodeops, 163 .vop_access = nfs_access, 164 .vop_advlock = nfs_advlock, 165 .vop_advlockasync = nfs_advlockasync, 166 .vop_close = nfs_close, 167 .vop_create = nfs_create, 168 .vop_fsync = nfs_fsync, 169 .vop_getattr = nfs_getattr, 170 .vop_getpages = ncl_getpages, 171 .vop_putpages = ncl_putpages, 172 .vop_inactive = ncl_inactive, 173 .vop_link = nfs_link, 174 .vop_lock1 = nfs_lock, 175 .vop_lookup = nfs_lookup, 176 .vop_mkdir = nfs_mkdir, 177 .vop_mknod = nfs_mknod, 178 .vop_open = nfs_open, 179 .vop_pathconf = nfs_pathconf, 180 .vop_print = nfs_print, 181 .vop_read = nfs_read, 182 .vop_readdir = nfs_readdir, 183 .vop_readlink = nfs_readlink, 184 .vop_reclaim = ncl_reclaim, 185 .vop_remove = nfs_remove, 186 .vop_rename = nfs_rename, 187 .vop_rmdir = nfs_rmdir, 188 .vop_setattr = nfs_setattr, 189 .vop_strategy = nfs_strategy, 190 .vop_symlink = nfs_symlink, 191 .vop_write = ncl_write, 192 .vop_getacl = nfs_getacl, 193 .vop_setacl = nfs_setacl, 194 .vop_advise = nfs_advise, 195 .vop_allocate = nfs_allocate, 196 .vop_copy_file_range = nfs_copy_file_range, 197 .vop_ioctl = nfs_ioctl, 198 .vop_getextattr = nfs_getextattr, 199 .vop_setextattr = nfs_setextattr, 200 .vop_listextattr = nfs_listextattr, 201 .vop_deleteextattr = nfs_deleteextattr, 202 }; 203 VFS_VOP_VECTOR_REGISTER(newnfs_vnodeops_nosig); 204 205 static int 206 nfs_vnodeops_bypass(struct vop_generic_args *a) 207 { 208 209 return (vop_sigdefer(&newnfs_vnodeops_nosig, a)); 210 } 211 212 struct vop_vector newnfs_vnodeops = { 213 .vop_default = &default_vnodeops, 214 .vop_bypass = nfs_vnodeops_bypass, 215 }; 216 VFS_VOP_VECTOR_REGISTER(newnfs_vnodeops); 217 218 static struct vop_vector newnfs_fifoops_nosig = { 219 .vop_default = &fifo_specops, 220 .vop_access = nfsspec_access, 221 .vop_close = nfsfifo_close, 222 .vop_fsync = nfs_fsync, 223 .vop_getattr = nfs_getattr, 224 .vop_inactive = ncl_inactive, 225 .vop_pathconf = nfs_pathconf, 226 .vop_print = nfs_print, 227 .vop_read = nfsfifo_read, 228 .vop_reclaim = ncl_reclaim, 229 .vop_setattr = nfs_setattr, 230 .vop_write = nfsfifo_write, 231 }; 232 VFS_VOP_VECTOR_REGISTER(newnfs_fifoops_nosig); 233 234 static int 235 nfs_fifoops_bypass(struct vop_generic_args *a) 236 { 237 238 return (vop_sigdefer(&newnfs_fifoops_nosig, a)); 239 } 240 241 struct vop_vector newnfs_fifoops = { 242 .vop_default = &default_vnodeops, 243 .vop_bypass = nfs_fifoops_bypass, 244 }; 245 VFS_VOP_VECTOR_REGISTER(newnfs_fifoops); 246 247 static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, 248 struct componentname *cnp, struct vattr *vap); 249 static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 250 int namelen, struct ucred *cred, struct thread *td); 251 static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, 252 char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp, 253 char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td); 254 static int nfs_renameit(struct vnode *sdvp, struct vnode *svp, 255 struct componentname *scnp, struct sillyrename *sp); 256 257 /* 258 * Global variables 259 */ 260 SYSCTL_DECL(_vfs_nfs); 261 262 static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; 263 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 264 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); 265 266 static int nfs_prime_access_cache = 0; 267 SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW, 268 &nfs_prime_access_cache, 0, 269 "Prime NFS ACCESS cache when fetching attributes"); 270 271 static int newnfs_commit_on_close = 0; 272 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_on_close, CTLFLAG_RW, 273 &newnfs_commit_on_close, 0, "write+commit on close, else only write"); 274 275 static int nfs_clean_pages_on_close = 1; 276 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, 277 &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); 278 279 int newnfs_directio_enable = 0; 280 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, 281 &newnfs_directio_enable, 0, "Enable NFS directio"); 282 283 int nfs_keep_dirty_on_error; 284 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_keep_dirty_on_error, CTLFLAG_RW, 285 &nfs_keep_dirty_on_error, 0, "Retry pageout if error returned"); 286 287 /* 288 * This sysctl allows other processes to mmap a file that has been opened 289 * O_DIRECT by a process. In general, having processes mmap the file while 290 * Direct IO is in progress can lead to Data Inconsistencies. But, we allow 291 * this by default to prevent DoS attacks - to prevent a malicious user from 292 * opening up files O_DIRECT preventing other users from mmap'ing these 293 * files. "Protected" environments where stricter consistency guarantees are 294 * required can disable this knob. The process that opened the file O_DIRECT 295 * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not 296 * meaningful. 297 */ 298 int newnfs_directio_allow_mmap = 1; 299 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, 300 &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); 301 302 #define NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY \ 303 | NFSACCESS_EXTEND | NFSACCESS_EXECUTE \ 304 | NFSACCESS_DELETE | NFSACCESS_LOOKUP) 305 306 /* 307 * SMP Locking Note : 308 * The list of locks after the description of the lock is the ordering 309 * of other locks acquired with the lock held. 310 * np->n_mtx : Protects the fields in the nfsnode. 311 VM Object Lock 312 VI_MTX (acquired indirectly) 313 * nmp->nm_mtx : Protects the fields in the nfsmount. 314 rep->r_mtx 315 * ncl_iod_mutex : Global lock, protects shared nfsiod state. 316 * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. 317 nmp->nm_mtx 318 rep->r_mtx 319 * rep->r_mtx : Protects the fields in an nfsreq. 320 */ 321 322 static int 323 nfs_lock(struct vop_lock1_args *ap) 324 { 325 struct vnode *vp; 326 struct nfsnode *np; 327 u_quad_t nsize; 328 int error, lktype; 329 bool onfault; 330 331 vp = ap->a_vp; 332 lktype = ap->a_flags & LK_TYPE_MASK; 333 error = VOP_LOCK1_APV(&default_vnodeops, ap); 334 if (error != 0 || vp->v_op != &newnfs_vnodeops) 335 return (error); 336 np = VTONFS(vp); 337 if (np == NULL) 338 return (0); 339 NFSLOCKNODE(np); 340 if ((np->n_flag & NVNSETSZSKIP) == 0 || (lktype != LK_SHARED && 341 lktype != LK_EXCLUSIVE && lktype != LK_UPGRADE && 342 lktype != LK_TRYUPGRADE)) { 343 NFSUNLOCKNODE(np); 344 return (0); 345 } 346 onfault = (ap->a_flags & LK_EATTR_MASK) == LK_NOWAIT && 347 (ap->a_flags & LK_INIT_MASK) == LK_CANRECURSE && 348 (lktype == LK_SHARED || lktype == LK_EXCLUSIVE); 349 if (onfault && vp->v_vnlock->lk_recurse == 0) { 350 /* 351 * Force retry in vm_fault(), to make the lock request 352 * sleepable, which allows us to piggy-back the 353 * sleepable call to vnode_pager_setsize(). 354 */ 355 NFSUNLOCKNODE(np); 356 VOP_UNLOCK(vp); 357 return (EBUSY); 358 } 359 if ((ap->a_flags & LK_NOWAIT) != 0 || 360 (lktype == LK_SHARED && vp->v_vnlock->lk_recurse > 0)) { 361 NFSUNLOCKNODE(np); 362 return (0); 363 } 364 if (lktype == LK_SHARED) { 365 NFSUNLOCKNODE(np); 366 VOP_UNLOCK(vp); 367 ap->a_flags &= ~(LK_TYPE_MASK | LK_INTERLOCK); 368 ap->a_flags |= LK_EXCLUSIVE; 369 error = VOP_LOCK1_APV(&default_vnodeops, ap); 370 if (error != 0 || vp->v_op != &newnfs_vnodeops) 371 return (error); 372 if (vp->v_data == NULL) 373 goto downgrade; 374 MPASS(vp->v_data == np); 375 NFSLOCKNODE(np); 376 if ((np->n_flag & NVNSETSZSKIP) == 0) { 377 NFSUNLOCKNODE(np); 378 goto downgrade; 379 } 380 } 381 np->n_flag &= ~NVNSETSZSKIP; 382 nsize = np->n_size; 383 NFSUNLOCKNODE(np); 384 vnode_pager_setsize(vp, nsize); 385 downgrade: 386 if (lktype == LK_SHARED) { 387 ap->a_flags &= ~(LK_TYPE_MASK | LK_INTERLOCK); 388 ap->a_flags |= LK_DOWNGRADE; 389 (void)VOP_LOCK1_APV(&default_vnodeops, ap); 390 } 391 return (0); 392 } 393 394 static int 395 nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td, 396 struct ucred *cred, u_int32_t *retmode) 397 { 398 int error = 0, attrflag, i, lrupos; 399 u_int32_t rmode; 400 struct nfsnode *np = VTONFS(vp); 401 struct nfsvattr nfsva; 402 403 error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag, 404 &rmode, NULL); 405 if (attrflag) 406 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 407 if (!error) { 408 lrupos = 0; 409 NFSLOCKNODE(np); 410 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 411 if (np->n_accesscache[i].uid == cred->cr_uid) { 412 np->n_accesscache[i].mode = rmode; 413 np->n_accesscache[i].stamp = time_second; 414 break; 415 } 416 if (i > 0 && np->n_accesscache[i].stamp < 417 np->n_accesscache[lrupos].stamp) 418 lrupos = i; 419 } 420 if (i == NFS_ACCESSCACHESIZE) { 421 np->n_accesscache[lrupos].uid = cred->cr_uid; 422 np->n_accesscache[lrupos].mode = rmode; 423 np->n_accesscache[lrupos].stamp = time_second; 424 } 425 NFSUNLOCKNODE(np); 426 if (retmode != NULL) 427 *retmode = rmode; 428 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0); 429 } else if (NFS_ISV4(vp)) { 430 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 431 } 432 #ifdef KDTRACE_HOOKS 433 if (error != 0) 434 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0, 435 error); 436 #endif 437 return (error); 438 } 439 440 /* 441 * nfs access vnode op. 442 * For nfs version 2, just return ok. File accesses may fail later. 443 * For nfs version 3, use the access rpc to check accessibility. If file modes 444 * are changed on the server, accesses might still fail later. 445 */ 446 static int 447 nfs_access(struct vop_access_args *ap) 448 { 449 struct vnode *vp = ap->a_vp; 450 int error = 0, i, gotahit; 451 u_int32_t mode, wmode, rmode; 452 int v34 = NFS_ISV34(vp); 453 struct nfsnode *np = VTONFS(vp); 454 455 /* 456 * Disallow write attempts on filesystems mounted read-only; 457 * unless the file is a socket, fifo, or a block or character 458 * device resident on the filesystem. 459 */ 460 if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS | 461 VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL | 462 VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) { 463 switch (vp->v_type) { 464 case VREG: 465 case VDIR: 466 case VLNK: 467 return (EROFS); 468 default: 469 break; 470 } 471 } 472 /* 473 * For nfs v3 or v4, check to see if we have done this recently, and if 474 * so return our cached result instead of making an ACCESS call. 475 * If not, do an access rpc, otherwise you are stuck emulating 476 * ufs_access() locally using the vattr. This may not be correct, 477 * since the server may apply other access criteria such as 478 * client uid-->server uid mapping that we do not know about. 479 */ 480 if (v34) { 481 if (ap->a_accmode & VREAD) 482 mode = NFSACCESS_READ; 483 else 484 mode = 0; 485 if (vp->v_type != VDIR) { 486 if (ap->a_accmode & VWRITE) 487 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 488 if (ap->a_accmode & VAPPEND) 489 mode |= NFSACCESS_EXTEND; 490 if (ap->a_accmode & VEXEC) 491 mode |= NFSACCESS_EXECUTE; 492 if (ap->a_accmode & VDELETE) 493 mode |= NFSACCESS_DELETE; 494 } else { 495 if (ap->a_accmode & VWRITE) 496 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 497 if (ap->a_accmode & VAPPEND) 498 mode |= NFSACCESS_EXTEND; 499 if (ap->a_accmode & VEXEC) 500 mode |= NFSACCESS_LOOKUP; 501 if (ap->a_accmode & VDELETE) 502 mode |= NFSACCESS_DELETE; 503 if (ap->a_accmode & VDELETE_CHILD) 504 mode |= NFSACCESS_MODIFY; 505 } 506 /* XXX safety belt, only make blanket request if caching */ 507 if (nfsaccess_cache_timeout > 0) { 508 wmode = NFSACCESS_READ | NFSACCESS_MODIFY | 509 NFSACCESS_EXTEND | NFSACCESS_EXECUTE | 510 NFSACCESS_DELETE | NFSACCESS_LOOKUP; 511 } else { 512 wmode = mode; 513 } 514 515 /* 516 * Does our cached result allow us to give a definite yes to 517 * this request? 518 */ 519 gotahit = 0; 520 NFSLOCKNODE(np); 521 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 522 if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) { 523 if (time_second < (np->n_accesscache[i].stamp 524 + nfsaccess_cache_timeout) && 525 (np->n_accesscache[i].mode & mode) == mode) { 526 NFSINCRGLOBAL(nfsstatsv1.accesscache_hits); 527 gotahit = 1; 528 } 529 break; 530 } 531 } 532 NFSUNLOCKNODE(np); 533 #ifdef KDTRACE_HOOKS 534 if (gotahit != 0) 535 KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, 536 ap->a_cred->cr_uid, mode); 537 else 538 KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, 539 ap->a_cred->cr_uid, mode); 540 #endif 541 if (gotahit == 0) { 542 /* 543 * Either a no, or a don't know. Go to the wire. 544 */ 545 NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); 546 error = nfs34_access_otw(vp, wmode, ap->a_td, 547 ap->a_cred, &rmode); 548 if (!error && 549 (rmode & mode) != mode) 550 error = EACCES; 551 } 552 return (error); 553 } else { 554 if ((error = nfsspec_access(ap)) != 0) { 555 return (error); 556 } 557 /* 558 * Attempt to prevent a mapped root from accessing a file 559 * which it shouldn't. We try to read a byte from the file 560 * if the user is root and the file is not zero length. 561 * After calling nfsspec_access, we should have the correct 562 * file size cached. 563 */ 564 NFSLOCKNODE(np); 565 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) 566 && VTONFS(vp)->n_size > 0) { 567 struct iovec aiov; 568 struct uio auio; 569 char buf[1]; 570 571 NFSUNLOCKNODE(np); 572 aiov.iov_base = buf; 573 aiov.iov_len = 1; 574 auio.uio_iov = &aiov; 575 auio.uio_iovcnt = 1; 576 auio.uio_offset = 0; 577 auio.uio_resid = 1; 578 auio.uio_segflg = UIO_SYSSPACE; 579 auio.uio_rw = UIO_READ; 580 auio.uio_td = ap->a_td; 581 582 if (vp->v_type == VREG) 583 error = ncl_readrpc(vp, &auio, ap->a_cred); 584 else if (vp->v_type == VDIR) { 585 char* bp; 586 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); 587 aiov.iov_base = bp; 588 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; 589 error = ncl_readdirrpc(vp, &auio, ap->a_cred, 590 ap->a_td); 591 free(bp, M_TEMP); 592 } else if (vp->v_type == VLNK) 593 error = ncl_readlinkrpc(vp, &auio, ap->a_cred); 594 else 595 error = EACCES; 596 } else 597 NFSUNLOCKNODE(np); 598 return (error); 599 } 600 } 601 602 /* 603 * nfs open vnode op 604 * Check to see if the type is ok 605 * and that deletion is not in progress. 606 * For paged in text files, you will need to flush the page cache 607 * if consistency is lost. 608 */ 609 /* ARGSUSED */ 610 static int 611 nfs_open(struct vop_open_args *ap) 612 { 613 struct vnode *vp = ap->a_vp; 614 struct nfsnode *np = VTONFS(vp); 615 struct vattr vattr; 616 int error; 617 int fmode = ap->a_mode; 618 struct ucred *cred; 619 vm_object_t obj; 620 621 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) 622 return (EOPNOTSUPP); 623 624 /* 625 * For NFSv4, we need to do the Open Op before cache validation, 626 * so that we conform to RFC3530 Sec. 9.3.1. 627 */ 628 if (NFS_ISV4(vp)) { 629 error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td); 630 if (error) { 631 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 632 (gid_t)0); 633 return (error); 634 } 635 } 636 637 /* 638 * Now, if this Open will be doing reading, re-validate/flush the 639 * cache, so that Close/Open coherency is maintained. 640 */ 641 NFSLOCKNODE(np); 642 if (np->n_flag & NMODIFIED) { 643 NFSUNLOCKNODE(np); 644 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 645 if (error == EINTR || error == EIO) { 646 if (NFS_ISV4(vp)) 647 (void) nfsrpc_close(vp, 0, ap->a_td); 648 return (error); 649 } 650 NFSLOCKNODE(np); 651 np->n_attrstamp = 0; 652 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 653 if (vp->v_type == VDIR) 654 np->n_direofoffset = 0; 655 NFSUNLOCKNODE(np); 656 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 657 if (error) { 658 if (NFS_ISV4(vp)) 659 (void) nfsrpc_close(vp, 0, ap->a_td); 660 return (error); 661 } 662 NFSLOCKNODE(np); 663 np->n_mtime = vattr.va_mtime; 664 if (NFS_ISV4(vp)) 665 np->n_change = vattr.va_filerev; 666 } else { 667 NFSUNLOCKNODE(np); 668 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 669 if (error) { 670 if (NFS_ISV4(vp)) 671 (void) nfsrpc_close(vp, 0, ap->a_td); 672 return (error); 673 } 674 NFSLOCKNODE(np); 675 if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) || 676 NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 677 if (vp->v_type == VDIR) 678 np->n_direofoffset = 0; 679 NFSUNLOCKNODE(np); 680 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 681 if (error == EINTR || error == EIO) { 682 if (NFS_ISV4(vp)) 683 (void) nfsrpc_close(vp, 0, ap->a_td); 684 return (error); 685 } 686 NFSLOCKNODE(np); 687 np->n_mtime = vattr.va_mtime; 688 if (NFS_ISV4(vp)) 689 np->n_change = vattr.va_filerev; 690 } 691 } 692 693 /* 694 * If the object has >= 1 O_DIRECT active opens, we disable caching. 695 */ 696 if (newnfs_directio_enable && (fmode & O_DIRECT) && 697 (vp->v_type == VREG)) { 698 if (np->n_directio_opens == 0) { 699 NFSUNLOCKNODE(np); 700 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 701 if (error) { 702 if (NFS_ISV4(vp)) 703 (void) nfsrpc_close(vp, 0, ap->a_td); 704 return (error); 705 } 706 NFSLOCKNODE(np); 707 np->n_flag |= NNONCACHE; 708 } 709 np->n_directio_opens++; 710 } 711 712 /* If opened for writing via NFSv4.1 or later, mark that for pNFS. */ 713 if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0) 714 np->n_flag |= NWRITEOPENED; 715 716 /* 717 * If this is an open for writing, capture a reference to the 718 * credentials, so they can be used by ncl_putpages(). Using 719 * these write credentials is preferable to the credentials of 720 * whatever thread happens to be doing the VOP_PUTPAGES() since 721 * the write RPCs are less likely to fail with EACCES. 722 */ 723 if ((fmode & FWRITE) != 0) { 724 cred = np->n_writecred; 725 np->n_writecred = crhold(ap->a_cred); 726 } else 727 cred = NULL; 728 NFSUNLOCKNODE(np); 729 730 if (cred != NULL) 731 crfree(cred); 732 vnode_create_vobject(vp, vattr.va_size, ap->a_td); 733 734 /* 735 * If the text file has been mmap'd, flush any dirty pages to the 736 * buffer cache and then... 737 * Make sure all writes are pushed to the NFS server. If this is not 738 * done, the modify time of the file can change while the text 739 * file is being executed. This will cause the process that is 740 * executing the text file to be terminated. 741 */ 742 if (vp->v_writecount <= -1) { 743 if ((obj = vp->v_object) != NULL && 744 vm_object_mightbedirty(obj)) { 745 VM_OBJECT_WLOCK(obj); 746 vm_object_page_clean(obj, 0, 0, OBJPC_SYNC); 747 VM_OBJECT_WUNLOCK(obj); 748 } 749 750 /* Now, flush the buffer cache. */ 751 ncl_flush(vp, MNT_WAIT, curthread, 0, 0); 752 753 /* And, finally, make sure that n_mtime is up to date. */ 754 np = VTONFS(vp); 755 NFSLOCKNODE(np); 756 np->n_mtime = np->n_vattr.na_mtime; 757 NFSUNLOCKNODE(np); 758 } 759 return (0); 760 } 761 762 /* 763 * nfs close vnode op 764 * What an NFS client should do upon close after writing is a debatable issue. 765 * Most NFS clients push delayed writes to the server upon close, basically for 766 * two reasons: 767 * 1 - So that any write errors may be reported back to the client process 768 * doing the close system call. By far the two most likely errors are 769 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. 770 * 2 - To put a worst case upper bound on cache inconsistency between 771 * multiple clients for the file. 772 * There is also a consistency problem for Version 2 of the protocol w.r.t. 773 * not being able to tell if other clients are writing a file concurrently, 774 * since there is no way of knowing if the changed modify time in the reply 775 * is only due to the write for this client. 776 * (NFS Version 3 provides weak cache consistency data in the reply that 777 * should be sufficient to detect and handle this case.) 778 * 779 * The current code does the following: 780 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers 781 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate 782 * or commit them (this satisfies 1 and 2 except for the 783 * case where the server crashes after this close but 784 * before the commit RPC, which is felt to be "good 785 * enough". Changing the last argument to ncl_flush() to 786 * a 1 would force a commit operation, if it is felt a 787 * commit is necessary now. 788 * for NFS Version 4 - flush the dirty buffers and commit them, if 789 * nfscl_mustflush() says this is necessary. 790 * It is necessary if there is no write delegation held, 791 * in order to satisfy open/close coherency. 792 * If the file isn't cached on local stable storage, 793 * it may be necessary in order to detect "out of space" 794 * errors from the server, if the write delegation 795 * issued by the server doesn't allow the file to grow. 796 */ 797 /* ARGSUSED */ 798 static int 799 nfs_close(struct vop_close_args *ap) 800 { 801 struct vnode *vp = ap->a_vp; 802 struct nfsnode *np = VTONFS(vp); 803 struct nfsvattr nfsva; 804 struct ucred *cred; 805 int error = 0, ret, localcred = 0; 806 int fmode = ap->a_fflag; 807 808 if (NFSCL_FORCEDISM(vp->v_mount)) 809 return (0); 810 /* 811 * During shutdown, a_cred isn't valid, so just use root. 812 */ 813 if (ap->a_cred == NOCRED) { 814 cred = newnfs_getcred(); 815 localcred = 1; 816 } else { 817 cred = ap->a_cred; 818 } 819 if (vp->v_type == VREG) { 820 /* 821 * Examine and clean dirty pages, regardless of NMODIFIED. 822 * This closes a major hole in close-to-open consistency. 823 * We want to push out all dirty pages (and buffers) on 824 * close, regardless of whether they were dirtied by 825 * mmap'ed writes or via write(). 826 */ 827 if (nfs_clean_pages_on_close && vp->v_object) { 828 VM_OBJECT_WLOCK(vp->v_object); 829 vm_object_page_clean(vp->v_object, 0, 0, 0); 830 VM_OBJECT_WUNLOCK(vp->v_object); 831 } 832 NFSLOCKNODE(np); 833 if (np->n_flag & NMODIFIED) { 834 NFSUNLOCKNODE(np); 835 if (NFS_ISV3(vp)) { 836 /* 837 * Under NFSv3 we have dirty buffers to dispose of. We 838 * must flush them to the NFS server. We have the option 839 * of waiting all the way through the commit rpc or just 840 * waiting for the initial write. The default is to only 841 * wait through the initial write so the data is in the 842 * server's cache, which is roughly similar to the state 843 * a standard disk subsystem leaves the file in on close(). 844 * 845 * We cannot clear the NMODIFIED bit in np->n_flag due to 846 * potential races with other processes, and certainly 847 * cannot clear it if we don't commit. 848 * These races occur when there is no longer the old 849 * traditional vnode locking implemented for Vnode Ops. 850 */ 851 int cm = newnfs_commit_on_close ? 1 : 0; 852 error = ncl_flush(vp, MNT_WAIT, ap->a_td, cm, 0); 853 /* np->n_flag &= ~NMODIFIED; */ 854 } else if (NFS_ISV4(vp)) { 855 if (nfscl_mustflush(vp) != 0) { 856 int cm = newnfs_commit_on_close ? 1 : 0; 857 error = ncl_flush(vp, MNT_WAIT, ap->a_td, 858 cm, 0); 859 /* 860 * as above w.r.t races when clearing 861 * NMODIFIED. 862 * np->n_flag &= ~NMODIFIED; 863 */ 864 } 865 } else { 866 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 867 } 868 NFSLOCKNODE(np); 869 } 870 /* 871 * Invalidate the attribute cache in all cases. 872 * An open is going to fetch fresh attrs any way, other procs 873 * on this node that have file open will be forced to do an 874 * otw attr fetch, but this is safe. 875 * --> A user found that their RPC count dropped by 20% when 876 * this was commented out and I can't see any requirement 877 * for it, so I've disabled it when negative lookups are 878 * enabled. (What does this have to do with negative lookup 879 * caching? Well nothing, except it was reported by the 880 * same user that needed negative lookup caching and I wanted 881 * there to be a way to disable it to see if it 882 * is the cause of some caching/coherency issue that might 883 * crop up.) 884 */ 885 if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) { 886 np->n_attrstamp = 0; 887 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 888 } 889 if (np->n_flag & NWRITEERR) { 890 np->n_flag &= ~NWRITEERR; 891 error = np->n_error; 892 } 893 NFSUNLOCKNODE(np); 894 } 895 896 if (NFS_ISV4(vp)) { 897 /* 898 * Get attributes so "change" is up to date. 899 */ 900 if (error == 0 && nfscl_mustflush(vp) != 0 && 901 vp->v_type == VREG && 902 (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) == 0) { 903 ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva, 904 NULL); 905 if (!ret) { 906 np->n_change = nfsva.na_filerev; 907 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 908 NULL, 0, 0); 909 } 910 } 911 912 /* 913 * and do the close. 914 */ 915 ret = nfsrpc_close(vp, 0, ap->a_td); 916 if (!error && ret) 917 error = ret; 918 if (error) 919 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 920 (gid_t)0); 921 } 922 if (newnfs_directio_enable) 923 KASSERT((np->n_directio_asyncwr == 0), 924 ("nfs_close: dirty unflushed (%d) directio buffers\n", 925 np->n_directio_asyncwr)); 926 if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 927 NFSLOCKNODE(np); 928 KASSERT((np->n_directio_opens > 0), 929 ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); 930 np->n_directio_opens--; 931 if (np->n_directio_opens == 0) 932 np->n_flag &= ~NNONCACHE; 933 NFSUNLOCKNODE(np); 934 } 935 if (localcred) 936 NFSFREECRED(cred); 937 return (error); 938 } 939 940 /* 941 * nfs getattr call from vfs. 942 */ 943 static int 944 nfs_getattr(struct vop_getattr_args *ap) 945 { 946 struct vnode *vp = ap->a_vp; 947 struct thread *td = curthread; /* XXX */ 948 struct nfsnode *np = VTONFS(vp); 949 int error = 0; 950 struct nfsvattr nfsva; 951 struct vattr *vap = ap->a_vap; 952 struct vattr vattr; 953 954 /* 955 * Update local times for special files. 956 */ 957 NFSLOCKNODE(np); 958 if (np->n_flag & (NACC | NUPD)) 959 np->n_flag |= NCHG; 960 NFSUNLOCKNODE(np); 961 /* 962 * First look in the cache. 963 */ 964 if (ncl_getattrcache(vp, &vattr) == 0) { 965 ncl_copy_vattr(vap, &vattr); 966 967 /* 968 * Get the local modify time for the case of a write 969 * delegation. 970 */ 971 nfscl_deleggetmodtime(vp, &vap->va_mtime); 972 return (0); 973 } 974 975 if (NFS_ISV34(vp) && nfs_prime_access_cache && 976 nfsaccess_cache_timeout > 0) { 977 NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); 978 nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL); 979 if (ncl_getattrcache(vp, ap->a_vap) == 0) { 980 nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime); 981 return (0); 982 } 983 } 984 error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva, NULL); 985 if (!error) 986 error = nfscl_loadattrcache(&vp, &nfsva, vap, NULL, 0, 0); 987 if (!error) { 988 /* 989 * Get the local modify time for the case of a write 990 * delegation. 991 */ 992 nfscl_deleggetmodtime(vp, &vap->va_mtime); 993 } else if (NFS_ISV4(vp)) { 994 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 995 } 996 return (error); 997 } 998 999 /* 1000 * nfs setattr call. 1001 */ 1002 static int 1003 nfs_setattr(struct vop_setattr_args *ap) 1004 { 1005 struct vnode *vp = ap->a_vp; 1006 struct nfsnode *np = VTONFS(vp); 1007 struct thread *td = curthread; /* XXX */ 1008 struct vattr *vap = ap->a_vap; 1009 int error = 0; 1010 u_quad_t tsize; 1011 1012 #ifndef nolint 1013 tsize = (u_quad_t)0; 1014 #endif 1015 1016 /* 1017 * Setting of flags and marking of atimes are not supported. 1018 */ 1019 if (vap->va_flags != VNOVAL) 1020 return (EOPNOTSUPP); 1021 1022 /* 1023 * Disallow write attempts if the filesystem is mounted read-only. 1024 */ 1025 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 1026 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 1027 vap->va_mtime.tv_sec != VNOVAL || 1028 vap->va_birthtime.tv_sec != VNOVAL || 1029 vap->va_mode != (mode_t)VNOVAL) && 1030 (vp->v_mount->mnt_flag & MNT_RDONLY)) 1031 return (EROFS); 1032 if (vap->va_size != VNOVAL) { 1033 switch (vp->v_type) { 1034 case VDIR: 1035 return (EISDIR); 1036 case VCHR: 1037 case VBLK: 1038 case VSOCK: 1039 case VFIFO: 1040 if (vap->va_mtime.tv_sec == VNOVAL && 1041 vap->va_atime.tv_sec == VNOVAL && 1042 vap->va_birthtime.tv_sec == VNOVAL && 1043 vap->va_mode == (mode_t)VNOVAL && 1044 vap->va_uid == (uid_t)VNOVAL && 1045 vap->va_gid == (gid_t)VNOVAL) 1046 return (0); 1047 vap->va_size = VNOVAL; 1048 break; 1049 default: 1050 /* 1051 * Disallow write attempts if the filesystem is 1052 * mounted read-only. 1053 */ 1054 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1055 return (EROFS); 1056 /* 1057 * We run vnode_pager_setsize() early (why?), 1058 * we must set np->n_size now to avoid vinvalbuf 1059 * V_SAVE races that might setsize a lower 1060 * value. 1061 */ 1062 NFSLOCKNODE(np); 1063 tsize = np->n_size; 1064 NFSUNLOCKNODE(np); 1065 error = ncl_meta_setsize(vp, td, vap->va_size); 1066 NFSLOCKNODE(np); 1067 if (np->n_flag & NMODIFIED) { 1068 tsize = np->n_size; 1069 NFSUNLOCKNODE(np); 1070 error = ncl_vinvalbuf(vp, vap->va_size == 0 ? 1071 0 : V_SAVE, td, 1); 1072 if (error != 0) { 1073 vnode_pager_setsize(vp, tsize); 1074 return (error); 1075 } 1076 /* 1077 * Call nfscl_delegmodtime() to set the modify time 1078 * locally, as required. 1079 */ 1080 nfscl_delegmodtime(vp); 1081 } else 1082 NFSUNLOCKNODE(np); 1083 /* 1084 * np->n_size has already been set to vap->va_size 1085 * in ncl_meta_setsize(). We must set it again since 1086 * nfs_loadattrcache() could be called through 1087 * ncl_meta_setsize() and could modify np->n_size. 1088 */ 1089 NFSLOCKNODE(np); 1090 np->n_vattr.na_size = np->n_size = vap->va_size; 1091 NFSUNLOCKNODE(np); 1092 } 1093 } else { 1094 NFSLOCKNODE(np); 1095 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 1096 (np->n_flag & NMODIFIED) && vp->v_type == VREG) { 1097 NFSUNLOCKNODE(np); 1098 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 1099 if (error == EINTR || error == EIO) 1100 return (error); 1101 } else 1102 NFSUNLOCKNODE(np); 1103 } 1104 error = nfs_setattrrpc(vp, vap, ap->a_cred, td); 1105 if (error && vap->va_size != VNOVAL) { 1106 NFSLOCKNODE(np); 1107 np->n_size = np->n_vattr.na_size = tsize; 1108 vnode_pager_setsize(vp, tsize); 1109 NFSUNLOCKNODE(np); 1110 } 1111 return (error); 1112 } 1113 1114 /* 1115 * Do an nfs setattr rpc. 1116 */ 1117 static int 1118 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred, 1119 struct thread *td) 1120 { 1121 struct nfsnode *np = VTONFS(vp); 1122 int error, ret, attrflag, i; 1123 struct nfsvattr nfsva; 1124 1125 if (NFS_ISV34(vp)) { 1126 NFSLOCKNODE(np); 1127 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) 1128 np->n_accesscache[i].stamp = 0; 1129 np->n_flag |= NDELEGMOD; 1130 NFSUNLOCKNODE(np); 1131 KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); 1132 } 1133 error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag, 1134 NULL); 1135 if (attrflag) { 1136 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1137 if (ret && !error) 1138 error = ret; 1139 } 1140 if (error && NFS_ISV4(vp)) 1141 error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid); 1142 return (error); 1143 } 1144 1145 /* 1146 * nfs lookup call, one step at a time... 1147 * First look in cache 1148 * If not found, unlock the directory nfsnode and do the rpc 1149 */ 1150 static int 1151 nfs_lookup(struct vop_lookup_args *ap) 1152 { 1153 struct componentname *cnp = ap->a_cnp; 1154 struct vnode *dvp = ap->a_dvp; 1155 struct vnode **vpp = ap->a_vpp; 1156 struct mount *mp = dvp->v_mount; 1157 int flags = cnp->cn_flags; 1158 struct vnode *newvp; 1159 struct nfsmount *nmp; 1160 struct nfsnode *np, *newnp; 1161 int error = 0, attrflag, dattrflag, ltype, ncticks; 1162 struct thread *td = cnp->cn_thread; 1163 struct nfsfh *nfhp; 1164 struct nfsvattr dnfsva, nfsva; 1165 struct vattr vattr; 1166 struct timespec nctime; 1167 1168 *vpp = NULLVP; 1169 if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && 1170 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 1171 return (EROFS); 1172 if (dvp->v_type != VDIR) 1173 return (ENOTDIR); 1174 nmp = VFSTONFS(mp); 1175 np = VTONFS(dvp); 1176 1177 /* For NFSv4, wait until any remove is done. */ 1178 NFSLOCKNODE(np); 1179 while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) { 1180 np->n_flag |= NREMOVEWANT; 1181 (void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0); 1182 } 1183 NFSUNLOCKNODE(np); 1184 1185 error = vn_dir_check_exec(dvp, cnp); 1186 if (error != 0) 1187 return (error); 1188 error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks); 1189 if (error > 0 && error != ENOENT) 1190 return (error); 1191 if (error == -1) { 1192 /* 1193 * Lookups of "." are special and always return the 1194 * current directory. cache_lookup() already handles 1195 * associated locking bookkeeping, etc. 1196 */ 1197 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { 1198 /* XXX: Is this really correct? */ 1199 if (cnp->cn_nameiop != LOOKUP && 1200 (flags & ISLASTCN)) 1201 cnp->cn_flags |= SAVENAME; 1202 return (0); 1203 } 1204 1205 /* 1206 * We only accept a positive hit in the cache if the 1207 * change time of the file matches our cached copy. 1208 * Otherwise, we discard the cache entry and fallback 1209 * to doing a lookup RPC. We also only trust cache 1210 * entries for less than nm_nametimeo seconds. 1211 * 1212 * To better handle stale file handles and attributes, 1213 * clear the attribute cache of this node if it is a 1214 * leaf component, part of an open() call, and not 1215 * locally modified before fetching the attributes. 1216 * This should allow stale file handles to be detected 1217 * here where we can fall back to a LOOKUP RPC to 1218 * recover rather than having nfs_open() detect the 1219 * stale file handle and failing open(2) with ESTALE. 1220 */ 1221 newvp = *vpp; 1222 newnp = VTONFS(newvp); 1223 if (!(nmp->nm_flag & NFSMNT_NOCTO) && 1224 (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && 1225 !(newnp->n_flag & NMODIFIED)) { 1226 NFSLOCKNODE(newnp); 1227 newnp->n_attrstamp = 0; 1228 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); 1229 NFSUNLOCKNODE(newnp); 1230 } 1231 if (nfscl_nodeleg(newvp, 0) == 0 || 1232 ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) && 1233 VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 && 1234 timespeccmp(&vattr.va_ctime, &nctime, ==))) { 1235 NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); 1236 if (cnp->cn_nameiop != LOOKUP && 1237 (flags & ISLASTCN)) 1238 cnp->cn_flags |= SAVENAME; 1239 return (0); 1240 } 1241 cache_purge(newvp); 1242 if (dvp != newvp) 1243 vput(newvp); 1244 else 1245 vrele(newvp); 1246 *vpp = NULLVP; 1247 } else if (error == ENOENT) { 1248 if (VN_IS_DOOMED(dvp)) 1249 return (ENOENT); 1250 /* 1251 * We only accept a negative hit in the cache if the 1252 * modification time of the parent directory matches 1253 * the cached copy in the name cache entry. 1254 * Otherwise, we discard all of the negative cache 1255 * entries for this directory. We also only trust 1256 * negative cache entries for up to nm_negnametimeo 1257 * seconds. 1258 */ 1259 if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) && 1260 VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && 1261 timespeccmp(&vattr.va_mtime, &nctime, ==)) { 1262 NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); 1263 return (ENOENT); 1264 } 1265 cache_purge_negative(dvp); 1266 } 1267 1268 newvp = NULLVP; 1269 NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses); 1270 error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1271 cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1272 NULL); 1273 if (dattrflag) 1274 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1275 if (error) { 1276 if (newvp != NULLVP) { 1277 vput(newvp); 1278 *vpp = NULLVP; 1279 } 1280 1281 if (error != ENOENT) { 1282 if (NFS_ISV4(dvp)) 1283 error = nfscl_maperr(td, error, (uid_t)0, 1284 (gid_t)0); 1285 return (error); 1286 } 1287 1288 /* The requested file was not found. */ 1289 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && 1290 (flags & ISLASTCN)) { 1291 /* 1292 * XXX: UFS does a full VOP_ACCESS(dvp, 1293 * VWRITE) here instead of just checking 1294 * MNT_RDONLY. 1295 */ 1296 if (mp->mnt_flag & MNT_RDONLY) 1297 return (EROFS); 1298 cnp->cn_flags |= SAVENAME; 1299 return (EJUSTRETURN); 1300 } 1301 1302 if ((cnp->cn_flags & MAKEENTRY) != 0 && dattrflag) { 1303 /* 1304 * Cache the modification time of the parent 1305 * directory from the post-op attributes in 1306 * the name cache entry. The negative cache 1307 * entry will be ignored once the directory 1308 * has changed. Don't bother adding the entry 1309 * if the directory has already changed. 1310 */ 1311 NFSLOCKNODE(np); 1312 if (timespeccmp(&np->n_vattr.na_mtime, 1313 &dnfsva.na_mtime, ==)) { 1314 NFSUNLOCKNODE(np); 1315 cache_enter_time(dvp, NULL, cnp, 1316 &dnfsva.na_mtime, NULL); 1317 } else 1318 NFSUNLOCKNODE(np); 1319 } 1320 return (ENOENT); 1321 } 1322 1323 /* 1324 * Handle RENAME case... 1325 */ 1326 if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { 1327 if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1328 free(nfhp, M_NFSFH); 1329 return (EISDIR); 1330 } 1331 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, 1332 LK_EXCLUSIVE); 1333 if (error) 1334 return (error); 1335 newvp = NFSTOV(np); 1336 if (attrflag) 1337 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1338 0, 1); 1339 *vpp = newvp; 1340 cnp->cn_flags |= SAVENAME; 1341 return (0); 1342 } 1343 1344 if (flags & ISDOTDOT) { 1345 ltype = NFSVOPISLOCKED(dvp); 1346 error = vfs_busy(mp, MBF_NOWAIT); 1347 if (error != 0) { 1348 vfs_ref(mp); 1349 NFSVOPUNLOCK(dvp); 1350 error = vfs_busy(mp, 0); 1351 NFSVOPLOCK(dvp, ltype | LK_RETRY); 1352 vfs_rel(mp); 1353 if (error == 0 && VN_IS_DOOMED(dvp)) { 1354 vfs_unbusy(mp); 1355 error = ENOENT; 1356 } 1357 if (error != 0) 1358 return (error); 1359 } 1360 NFSVOPUNLOCK(dvp); 1361 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, 1362 cnp->cn_lkflags); 1363 if (error == 0) 1364 newvp = NFSTOV(np); 1365 vfs_unbusy(mp); 1366 if (newvp != dvp) 1367 NFSVOPLOCK(dvp, ltype | LK_RETRY); 1368 if (VN_IS_DOOMED(dvp)) { 1369 if (error == 0) { 1370 if (newvp == dvp) 1371 vrele(newvp); 1372 else 1373 vput(newvp); 1374 } 1375 error = ENOENT; 1376 } 1377 if (error != 0) 1378 return (error); 1379 if (attrflag) 1380 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1381 0, 1); 1382 } else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1383 free(nfhp, M_NFSFH); 1384 VREF(dvp); 1385 newvp = dvp; 1386 if (attrflag) 1387 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1388 0, 1); 1389 } else { 1390 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, 1391 cnp->cn_lkflags); 1392 if (error) 1393 return (error); 1394 newvp = NFSTOV(np); 1395 if (attrflag) 1396 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1397 0, 1); 1398 else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && 1399 !(np->n_flag & NMODIFIED)) { 1400 /* 1401 * Flush the attribute cache when opening a 1402 * leaf node to ensure that fresh attributes 1403 * are fetched in nfs_open() since we did not 1404 * fetch attributes from the LOOKUP reply. 1405 */ 1406 NFSLOCKNODE(np); 1407 np->n_attrstamp = 0; 1408 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); 1409 NFSUNLOCKNODE(np); 1410 } 1411 } 1412 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) 1413 cnp->cn_flags |= SAVENAME; 1414 if ((cnp->cn_flags & MAKEENTRY) && dvp != newvp && 1415 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) && 1416 attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0)) 1417 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, 1418 newvp->v_type != VDIR ? NULL : &dnfsva.na_ctime); 1419 *vpp = newvp; 1420 return (0); 1421 } 1422 1423 /* 1424 * nfs read call. 1425 * Just call ncl_bioread() to do the work. 1426 */ 1427 static int 1428 nfs_read(struct vop_read_args *ap) 1429 { 1430 struct vnode *vp = ap->a_vp; 1431 1432 switch (vp->v_type) { 1433 case VREG: 1434 return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); 1435 case VDIR: 1436 return (EISDIR); 1437 default: 1438 return (EOPNOTSUPP); 1439 } 1440 } 1441 1442 /* 1443 * nfs readlink call 1444 */ 1445 static int 1446 nfs_readlink(struct vop_readlink_args *ap) 1447 { 1448 struct vnode *vp = ap->a_vp; 1449 1450 if (vp->v_type != VLNK) 1451 return (EINVAL); 1452 return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred)); 1453 } 1454 1455 /* 1456 * Do a readlink rpc. 1457 * Called by ncl_doio() from below the buffer cache. 1458 */ 1459 int 1460 ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1461 { 1462 int error, ret, attrflag; 1463 struct nfsvattr nfsva; 1464 1465 error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva, 1466 &attrflag, NULL); 1467 if (attrflag) { 1468 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1469 if (ret && !error) 1470 error = ret; 1471 } 1472 if (error && NFS_ISV4(vp)) 1473 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1474 return (error); 1475 } 1476 1477 /* 1478 * nfs read rpc call 1479 * Ditto above 1480 */ 1481 int 1482 ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1483 { 1484 int error, ret, attrflag; 1485 struct nfsvattr nfsva; 1486 struct nfsmount *nmp; 1487 1488 nmp = VFSTONFS(vp->v_mount); 1489 error = EIO; 1490 attrflag = 0; 1491 if (NFSHASPNFS(nmp)) 1492 error = nfscl_doiods(vp, uiop, NULL, NULL, 1493 NFSV4OPEN_ACCESSREAD, 0, cred, uiop->uio_td); 1494 NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error); 1495 if (error != 0) 1496 error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, 1497 &attrflag, NULL); 1498 if (attrflag) { 1499 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1500 if (ret && !error) 1501 error = ret; 1502 } 1503 if (error && NFS_ISV4(vp)) 1504 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1505 return (error); 1506 } 1507 1508 /* 1509 * nfs write call 1510 */ 1511 int 1512 ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 1513 int *iomode, int *must_commit, int called_from_strategy) 1514 { 1515 struct nfsvattr nfsva; 1516 int error, attrflag, ret; 1517 struct nfsmount *nmp; 1518 1519 nmp = VFSTONFS(vp->v_mount); 1520 error = EIO; 1521 attrflag = 0; 1522 if (NFSHASPNFS(nmp)) 1523 error = nfscl_doiods(vp, uiop, iomode, must_commit, 1524 NFSV4OPEN_ACCESSWRITE, 0, cred, uiop->uio_td); 1525 NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error); 1526 if (error != 0) 1527 error = nfsrpc_write(vp, uiop, iomode, must_commit, cred, 1528 uiop->uio_td, &nfsva, &attrflag, NULL, 1529 called_from_strategy); 1530 if (attrflag) { 1531 if (VTONFS(vp)->n_flag & ND_NFSV4) 1532 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1, 1533 1); 1534 else 1535 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1536 1); 1537 if (ret && !error) 1538 error = ret; 1539 } 1540 if (DOINGASYNC(vp)) 1541 *iomode = NFSWRITE_FILESYNC; 1542 if (error && NFS_ISV4(vp)) 1543 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1544 return (error); 1545 } 1546 1547 /* 1548 * nfs mknod rpc 1549 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the 1550 * mode set to specify the file type and the size field for rdev. 1551 */ 1552 static int 1553 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, 1554 struct vattr *vap) 1555 { 1556 struct nfsvattr nfsva, dnfsva; 1557 struct vnode *newvp = NULL; 1558 struct nfsnode *np = NULL, *dnp; 1559 struct nfsfh *nfhp; 1560 struct vattr vattr; 1561 int error = 0, attrflag, dattrflag; 1562 u_int32_t rdev; 1563 1564 if (vap->va_type == VCHR || vap->va_type == VBLK) 1565 rdev = vap->va_rdev; 1566 else if (vap->va_type == VFIFO || vap->va_type == VSOCK) 1567 rdev = 0xffffffff; 1568 else 1569 return (EOPNOTSUPP); 1570 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1571 return (error); 1572 error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, 1573 rdev, vap->va_type, cnp->cn_cred, cnp->cn_thread, &dnfsva, 1574 &nfsva, &nfhp, &attrflag, &dattrflag, NULL); 1575 if (!error) { 1576 if (!nfhp) 1577 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1578 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, 1579 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1580 NULL); 1581 if (nfhp) 1582 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1583 cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); 1584 } 1585 if (dattrflag) 1586 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1587 if (!error) { 1588 newvp = NFSTOV(np); 1589 if (attrflag != 0) { 1590 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1591 0, 1); 1592 if (error != 0) 1593 vput(newvp); 1594 } 1595 } 1596 if (!error) { 1597 *vpp = newvp; 1598 } else if (NFS_ISV4(dvp)) { 1599 error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, 1600 vap->va_gid); 1601 } 1602 dnp = VTONFS(dvp); 1603 NFSLOCKNODE(dnp); 1604 dnp->n_flag |= NMODIFIED; 1605 if (!dattrflag) { 1606 dnp->n_attrstamp = 0; 1607 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1608 } 1609 NFSUNLOCKNODE(dnp); 1610 return (error); 1611 } 1612 1613 /* 1614 * nfs mknod vop 1615 * just call nfs_mknodrpc() to do the work. 1616 */ 1617 /* ARGSUSED */ 1618 static int 1619 nfs_mknod(struct vop_mknod_args *ap) 1620 { 1621 return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); 1622 } 1623 1624 static struct mtx nfs_cverf_mtx; 1625 MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex", 1626 MTX_DEF); 1627 1628 static nfsquad_t 1629 nfs_get_cverf(void) 1630 { 1631 static nfsquad_t cverf; 1632 nfsquad_t ret; 1633 static int cverf_initialized = 0; 1634 1635 mtx_lock(&nfs_cverf_mtx); 1636 if (cverf_initialized == 0) { 1637 cverf.lval[0] = arc4random(); 1638 cverf.lval[1] = arc4random(); 1639 cverf_initialized = 1; 1640 } else 1641 cverf.qval++; 1642 ret = cverf; 1643 mtx_unlock(&nfs_cverf_mtx); 1644 1645 return (ret); 1646 } 1647 1648 /* 1649 * nfs file create call 1650 */ 1651 static int 1652 nfs_create(struct vop_create_args *ap) 1653 { 1654 struct vnode *dvp = ap->a_dvp; 1655 struct vattr *vap = ap->a_vap; 1656 struct componentname *cnp = ap->a_cnp; 1657 struct nfsnode *np = NULL, *dnp; 1658 struct vnode *newvp = NULL; 1659 struct nfsmount *nmp; 1660 struct nfsvattr dnfsva, nfsva; 1661 struct nfsfh *nfhp; 1662 nfsquad_t cverf; 1663 int error = 0, attrflag, dattrflag, fmode = 0; 1664 struct vattr vattr; 1665 1666 /* 1667 * Oops, not for me.. 1668 */ 1669 if (vap->va_type == VSOCK) 1670 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); 1671 1672 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1673 return (error); 1674 if (vap->va_vaflags & VA_EXCLUSIVE) 1675 fmode |= O_EXCL; 1676 dnp = VTONFS(dvp); 1677 nmp = VFSTONFS(dvp->v_mount); 1678 again: 1679 /* For NFSv4, wait until any remove is done. */ 1680 NFSLOCKNODE(dnp); 1681 while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) { 1682 dnp->n_flag |= NREMOVEWANT; 1683 (void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0); 1684 } 1685 NFSUNLOCKNODE(dnp); 1686 1687 cverf = nfs_get_cverf(); 1688 error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1689 vap, cverf, fmode, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, 1690 &nfhp, &attrflag, &dattrflag, NULL); 1691 if (!error) { 1692 if (nfhp == NULL) 1693 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1694 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, 1695 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1696 NULL); 1697 if (nfhp != NULL) 1698 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1699 cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); 1700 } 1701 if (dattrflag) 1702 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1703 if (!error) { 1704 newvp = NFSTOV(np); 1705 if (attrflag == 0) 1706 error = nfsrpc_getattr(newvp, cnp->cn_cred, 1707 cnp->cn_thread, &nfsva, NULL); 1708 if (error == 0) 1709 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1710 0, 1); 1711 } 1712 if (error) { 1713 if (newvp != NULL) { 1714 vput(newvp); 1715 newvp = NULL; 1716 } 1717 if (NFS_ISV34(dvp) && (fmode & O_EXCL) && 1718 error == NFSERR_NOTSUPP) { 1719 fmode &= ~O_EXCL; 1720 goto again; 1721 } 1722 } else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) { 1723 if (nfscl_checksattr(vap, &nfsva)) { 1724 error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred, 1725 cnp->cn_thread, &nfsva, &attrflag, NULL); 1726 if (error && (vap->va_uid != (uid_t)VNOVAL || 1727 vap->va_gid != (gid_t)VNOVAL)) { 1728 /* try again without setting uid/gid */ 1729 vap->va_uid = (uid_t)VNOVAL; 1730 vap->va_gid = (uid_t)VNOVAL; 1731 error = nfsrpc_setattr(newvp, vap, NULL, 1732 cnp->cn_cred, cnp->cn_thread, &nfsva, 1733 &attrflag, NULL); 1734 } 1735 if (attrflag) 1736 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 1737 NULL, 0, 1); 1738 if (error != 0) 1739 vput(newvp); 1740 } 1741 } 1742 if (!error) { 1743 if ((cnp->cn_flags & MAKEENTRY) && attrflag) { 1744 if (dvp != newvp) 1745 cache_enter_time(dvp, newvp, cnp, 1746 &nfsva.na_ctime, NULL); 1747 else 1748 printf("nfs_create: bogus NFS server returned " 1749 "the directory as the new file object\n"); 1750 } 1751 *ap->a_vpp = newvp; 1752 } else if (NFS_ISV4(dvp)) { 1753 error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, 1754 vap->va_gid); 1755 } 1756 NFSLOCKNODE(dnp); 1757 dnp->n_flag |= NMODIFIED; 1758 if (!dattrflag) { 1759 dnp->n_attrstamp = 0; 1760 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1761 } 1762 NFSUNLOCKNODE(dnp); 1763 return (error); 1764 } 1765 1766 /* 1767 * nfs file remove call 1768 * To try and make nfs semantics closer to ufs semantics, a file that has 1769 * other processes using the vnode is renamed instead of removed and then 1770 * removed later on the last close. 1771 * - If v_usecount > 1 1772 * If a rename is not already in the works 1773 * call nfs_sillyrename() to set it up 1774 * else 1775 * do the remove rpc 1776 */ 1777 static int 1778 nfs_remove(struct vop_remove_args *ap) 1779 { 1780 struct vnode *vp = ap->a_vp; 1781 struct vnode *dvp = ap->a_dvp; 1782 struct componentname *cnp = ap->a_cnp; 1783 struct nfsnode *np = VTONFS(vp); 1784 int error = 0; 1785 struct vattr vattr; 1786 1787 KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name")); 1788 KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount")); 1789 if (vp->v_type == VDIR) 1790 error = EPERM; 1791 else if (vrefcnt(vp) == 1 || (np->n_sillyrename && 1792 VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 && 1793 vattr.va_nlink > 1)) { 1794 /* 1795 * Purge the name cache so that the chance of a lookup for 1796 * the name succeeding while the remove is in progress is 1797 * minimized. Without node locking it can still happen, such 1798 * that an I/O op returns ESTALE, but since you get this if 1799 * another host removes the file.. 1800 */ 1801 cache_purge(vp); 1802 /* 1803 * throw away biocache buffers, mainly to avoid 1804 * unnecessary delayed writes later. 1805 */ 1806 error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1); 1807 if (error != EINTR && error != EIO) 1808 /* Do the rpc */ 1809 error = nfs_removerpc(dvp, vp, cnp->cn_nameptr, 1810 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); 1811 /* 1812 * Kludge City: If the first reply to the remove rpc is lost.. 1813 * the reply to the retransmitted request will be ENOENT 1814 * since the file was in fact removed 1815 * Therefore, we cheat and return success. 1816 */ 1817 if (error == ENOENT) 1818 error = 0; 1819 } else if (!np->n_sillyrename) 1820 error = nfs_sillyrename(dvp, vp, cnp); 1821 NFSLOCKNODE(np); 1822 np->n_attrstamp = 0; 1823 NFSUNLOCKNODE(np); 1824 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 1825 return (error); 1826 } 1827 1828 /* 1829 * nfs file remove rpc called from nfs_inactive 1830 */ 1831 int 1832 ncl_removeit(struct sillyrename *sp, struct vnode *vp) 1833 { 1834 /* 1835 * Make sure that the directory vnode is still valid. 1836 * XXX we should lock sp->s_dvp here. 1837 */ 1838 if (sp->s_dvp->v_type == VBAD) 1839 return (0); 1840 return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen, 1841 sp->s_cred, NULL)); 1842 } 1843 1844 /* 1845 * Nfs remove rpc, called from nfs_remove() and ncl_removeit(). 1846 */ 1847 static int 1848 nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 1849 int namelen, struct ucred *cred, struct thread *td) 1850 { 1851 struct nfsvattr dnfsva; 1852 struct nfsnode *dnp = VTONFS(dvp); 1853 int error = 0, dattrflag; 1854 1855 NFSLOCKNODE(dnp); 1856 dnp->n_flag |= NREMOVEINPROG; 1857 NFSUNLOCKNODE(dnp); 1858 error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva, 1859 &dattrflag, NULL); 1860 NFSLOCKNODE(dnp); 1861 if ((dnp->n_flag & NREMOVEWANT)) { 1862 dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG); 1863 NFSUNLOCKNODE(dnp); 1864 wakeup((caddr_t)dnp); 1865 } else { 1866 dnp->n_flag &= ~NREMOVEINPROG; 1867 NFSUNLOCKNODE(dnp); 1868 } 1869 if (dattrflag) 1870 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1871 NFSLOCKNODE(dnp); 1872 dnp->n_flag |= NMODIFIED; 1873 if (!dattrflag) { 1874 dnp->n_attrstamp = 0; 1875 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1876 } 1877 NFSUNLOCKNODE(dnp); 1878 if (error && NFS_ISV4(dvp)) 1879 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 1880 return (error); 1881 } 1882 1883 /* 1884 * nfs file rename call 1885 */ 1886 static int 1887 nfs_rename(struct vop_rename_args *ap) 1888 { 1889 struct vnode *fvp = ap->a_fvp; 1890 struct vnode *tvp = ap->a_tvp; 1891 struct vnode *fdvp = ap->a_fdvp; 1892 struct vnode *tdvp = ap->a_tdvp; 1893 struct componentname *tcnp = ap->a_tcnp; 1894 struct componentname *fcnp = ap->a_fcnp; 1895 struct nfsnode *fnp = VTONFS(ap->a_fvp); 1896 struct nfsnode *tdnp = VTONFS(ap->a_tdvp); 1897 struct nfsv4node *newv4 = NULL; 1898 int error; 1899 1900 KASSERT((tcnp->cn_flags & HASBUF) != 0 && 1901 (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name")); 1902 /* Check for cross-device rename */ 1903 if ((fvp->v_mount != tdvp->v_mount) || 1904 (tvp && (fvp->v_mount != tvp->v_mount))) { 1905 error = EXDEV; 1906 goto out; 1907 } 1908 1909 if (fvp == tvp) { 1910 printf("nfs_rename: fvp == tvp (can't happen)\n"); 1911 error = 0; 1912 goto out; 1913 } 1914 if ((error = NFSVOPLOCK(fvp, LK_EXCLUSIVE)) != 0) 1915 goto out; 1916 1917 /* 1918 * We have to flush B_DELWRI data prior to renaming 1919 * the file. If we don't, the delayed-write buffers 1920 * can be flushed out later after the file has gone stale 1921 * under NFSV3. NFSV2 does not have this problem because 1922 * ( as far as I can tell ) it flushes dirty buffers more 1923 * often. 1924 * 1925 * Skip the rename operation if the fsync fails, this can happen 1926 * due to the server's volume being full, when we pushed out data 1927 * that was written back to our cache earlier. Not checking for 1928 * this condition can result in potential (silent) data loss. 1929 */ 1930 error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread); 1931 NFSVOPUNLOCK(fvp); 1932 if (!error && tvp) 1933 error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread); 1934 if (error) 1935 goto out; 1936 1937 /* 1938 * If the tvp exists and is in use, sillyrename it before doing the 1939 * rename of the new file over it. 1940 * XXX Can't sillyrename a directory. 1941 */ 1942 if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && 1943 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { 1944 vput(tvp); 1945 tvp = NULL; 1946 } 1947 1948 error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen, 1949 tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, 1950 tcnp->cn_thread); 1951 1952 if (error == 0 && NFS_ISV4(tdvp)) { 1953 /* 1954 * For NFSv4, check to see if it is the same name and 1955 * replace the name, if it is different. 1956 */ 1957 newv4 = malloc( 1958 sizeof (struct nfsv4node) + 1959 tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1, 1960 M_NFSV4NODE, M_WAITOK); 1961 NFSLOCKNODE(tdnp); 1962 NFSLOCKNODE(fnp); 1963 if (fnp->n_v4 != NULL && fvp->v_type == VREG && 1964 (fnp->n_v4->n4_namelen != tcnp->cn_namelen || 1965 NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4), 1966 tcnp->cn_namelen) || 1967 tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen || 1968 NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 1969 tdnp->n_fhp->nfh_len))) { 1970 #ifdef notdef 1971 { char nnn[100]; int nnnl; 1972 nnnl = (tcnp->cn_namelen < 100) ? tcnp->cn_namelen : 99; 1973 bcopy(tcnp->cn_nameptr, nnn, nnnl); 1974 nnn[nnnl] = '\0'; 1975 printf("ren replace=%s\n",nnn); 1976 } 1977 #endif 1978 free(fnp->n_v4, M_NFSV4NODE); 1979 fnp->n_v4 = newv4; 1980 newv4 = NULL; 1981 fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len; 1982 fnp->n_v4->n4_namelen = tcnp->cn_namelen; 1983 NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 1984 tdnp->n_fhp->nfh_len); 1985 NFSBCOPY(tcnp->cn_nameptr, 1986 NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen); 1987 } 1988 NFSUNLOCKNODE(tdnp); 1989 NFSUNLOCKNODE(fnp); 1990 if (newv4 != NULL) 1991 free(newv4, M_NFSV4NODE); 1992 } 1993 1994 if (fvp->v_type == VDIR) { 1995 if (tvp != NULL && tvp->v_type == VDIR) 1996 cache_purge(tdvp); 1997 cache_purge(fdvp); 1998 } 1999 2000 out: 2001 if (tdvp == tvp) 2002 vrele(tdvp); 2003 else 2004 vput(tdvp); 2005 if (tvp) 2006 vput(tvp); 2007 vrele(fdvp); 2008 vrele(fvp); 2009 /* 2010 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. 2011 */ 2012 if (error == ENOENT) 2013 error = 0; 2014 return (error); 2015 } 2016 2017 /* 2018 * nfs file rename rpc called from nfs_remove() above 2019 */ 2020 static int 2021 nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp, 2022 struct sillyrename *sp) 2023 { 2024 2025 return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen, 2026 sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred, 2027 scnp->cn_thread)); 2028 } 2029 2030 /* 2031 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). 2032 */ 2033 static int 2034 nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, 2035 int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr, 2036 int tnamelen, struct ucred *cred, struct thread *td) 2037 { 2038 struct nfsvattr fnfsva, tnfsva; 2039 struct nfsnode *fdnp = VTONFS(fdvp); 2040 struct nfsnode *tdnp = VTONFS(tdvp); 2041 int error = 0, fattrflag, tattrflag; 2042 2043 error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp, 2044 tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag, 2045 &tattrflag, NULL, NULL); 2046 NFSLOCKNODE(fdnp); 2047 fdnp->n_flag |= NMODIFIED; 2048 if (fattrflag != 0) { 2049 NFSUNLOCKNODE(fdnp); 2050 (void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, NULL, 0, 1); 2051 } else { 2052 fdnp->n_attrstamp = 0; 2053 NFSUNLOCKNODE(fdnp); 2054 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp); 2055 } 2056 NFSLOCKNODE(tdnp); 2057 tdnp->n_flag |= NMODIFIED; 2058 if (tattrflag != 0) { 2059 NFSUNLOCKNODE(tdnp); 2060 (void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, NULL, 0, 1); 2061 } else { 2062 tdnp->n_attrstamp = 0; 2063 NFSUNLOCKNODE(tdnp); 2064 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 2065 } 2066 if (error && NFS_ISV4(fdvp)) 2067 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2068 return (error); 2069 } 2070 2071 /* 2072 * nfs hard link create call 2073 */ 2074 static int 2075 nfs_link(struct vop_link_args *ap) 2076 { 2077 struct vnode *vp = ap->a_vp; 2078 struct vnode *tdvp = ap->a_tdvp; 2079 struct componentname *cnp = ap->a_cnp; 2080 struct nfsnode *np, *tdnp; 2081 struct nfsvattr nfsva, dnfsva; 2082 int error = 0, attrflag, dattrflag; 2083 2084 /* 2085 * Push all writes to the server, so that the attribute cache 2086 * doesn't get "out of sync" with the server. 2087 * XXX There should be a better way! 2088 */ 2089 VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread); 2090 2091 error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen, 2092 cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &attrflag, 2093 &dattrflag, NULL); 2094 tdnp = VTONFS(tdvp); 2095 NFSLOCKNODE(tdnp); 2096 tdnp->n_flag |= NMODIFIED; 2097 if (dattrflag != 0) { 2098 NFSUNLOCKNODE(tdnp); 2099 (void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, NULL, 0, 1); 2100 } else { 2101 tdnp->n_attrstamp = 0; 2102 NFSUNLOCKNODE(tdnp); 2103 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 2104 } 2105 if (attrflag) 2106 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 2107 else { 2108 np = VTONFS(vp); 2109 NFSLOCKNODE(np); 2110 np->n_attrstamp = 0; 2111 NFSUNLOCKNODE(np); 2112 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 2113 } 2114 /* 2115 * If negative lookup caching is enabled, I might as well 2116 * add an entry for this node. Not necessary for correctness, 2117 * but if negative caching is enabled, then the system 2118 * must care about lookup caching hit rate, so... 2119 */ 2120 if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 && 2121 (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { 2122 if (tdvp != vp) 2123 cache_enter_time(tdvp, vp, cnp, &nfsva.na_ctime, NULL); 2124 else 2125 printf("nfs_link: bogus NFS server returned " 2126 "the directory as the new link\n"); 2127 } 2128 if (error && NFS_ISV4(vp)) 2129 error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, 2130 (gid_t)0); 2131 return (error); 2132 } 2133 2134 /* 2135 * nfs symbolic link create call 2136 */ 2137 static int 2138 nfs_symlink(struct vop_symlink_args *ap) 2139 { 2140 struct vnode *dvp = ap->a_dvp; 2141 struct vattr *vap = ap->a_vap; 2142 struct componentname *cnp = ap->a_cnp; 2143 struct nfsvattr nfsva, dnfsva; 2144 struct nfsfh *nfhp; 2145 struct nfsnode *np = NULL, *dnp; 2146 struct vnode *newvp = NULL; 2147 int error = 0, attrflag, dattrflag, ret; 2148 2149 vap->va_type = VLNK; 2150 error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2151 ap->a_target, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, 2152 &nfsva, &nfhp, &attrflag, &dattrflag, NULL); 2153 if (nfhp) { 2154 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, 2155 &np, NULL, LK_EXCLUSIVE); 2156 if (!ret) 2157 newvp = NFSTOV(np); 2158 else if (!error) 2159 error = ret; 2160 } 2161 if (newvp != NULL) { 2162 if (attrflag) 2163 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 2164 0, 1); 2165 } else if (!error) { 2166 /* 2167 * If we do not have an error and we could not extract the 2168 * newvp from the response due to the request being NFSv2, we 2169 * have to do a lookup in order to obtain a newvp to return. 2170 */ 2171 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2172 cnp->cn_cred, cnp->cn_thread, &np); 2173 if (!error) 2174 newvp = NFSTOV(np); 2175 } 2176 if (error) { 2177 if (newvp) 2178 vput(newvp); 2179 if (NFS_ISV4(dvp)) 2180 error = nfscl_maperr(cnp->cn_thread, error, 2181 vap->va_uid, vap->va_gid); 2182 } else { 2183 *ap->a_vpp = newvp; 2184 } 2185 2186 dnp = VTONFS(dvp); 2187 NFSLOCKNODE(dnp); 2188 dnp->n_flag |= NMODIFIED; 2189 if (dattrflag != 0) { 2190 NFSUNLOCKNODE(dnp); 2191 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2192 } else { 2193 dnp->n_attrstamp = 0; 2194 NFSUNLOCKNODE(dnp); 2195 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2196 } 2197 /* 2198 * If negative lookup caching is enabled, I might as well 2199 * add an entry for this node. Not necessary for correctness, 2200 * but if negative caching is enabled, then the system 2201 * must care about lookup caching hit rate, so... 2202 */ 2203 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 2204 (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { 2205 if (dvp != newvp) 2206 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, 2207 NULL); 2208 else 2209 printf("nfs_symlink: bogus NFS server returned " 2210 "the directory as the new file object\n"); 2211 } 2212 return (error); 2213 } 2214 2215 /* 2216 * nfs make dir call 2217 */ 2218 static int 2219 nfs_mkdir(struct vop_mkdir_args *ap) 2220 { 2221 struct vnode *dvp = ap->a_dvp; 2222 struct vattr *vap = ap->a_vap; 2223 struct componentname *cnp = ap->a_cnp; 2224 struct nfsnode *np = NULL, *dnp; 2225 struct vnode *newvp = NULL; 2226 struct vattr vattr; 2227 struct nfsfh *nfhp; 2228 struct nfsvattr nfsva, dnfsva; 2229 int error = 0, attrflag, dattrflag, ret; 2230 2231 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) 2232 return (error); 2233 vap->va_type = VDIR; 2234 error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2235 vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, 2236 &attrflag, &dattrflag, NULL); 2237 dnp = VTONFS(dvp); 2238 NFSLOCKNODE(dnp); 2239 dnp->n_flag |= NMODIFIED; 2240 if (dattrflag != 0) { 2241 NFSUNLOCKNODE(dnp); 2242 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2243 } else { 2244 dnp->n_attrstamp = 0; 2245 NFSUNLOCKNODE(dnp); 2246 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2247 } 2248 if (nfhp) { 2249 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, 2250 &np, NULL, LK_EXCLUSIVE); 2251 if (!ret) { 2252 newvp = NFSTOV(np); 2253 if (attrflag) 2254 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 2255 NULL, 0, 1); 2256 } else if (!error) 2257 error = ret; 2258 } 2259 if (!error && newvp == NULL) { 2260 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2261 cnp->cn_cred, cnp->cn_thread, &np); 2262 if (!error) { 2263 newvp = NFSTOV(np); 2264 if (newvp->v_type != VDIR) 2265 error = EEXIST; 2266 } 2267 } 2268 if (error) { 2269 if (newvp) 2270 vput(newvp); 2271 if (NFS_ISV4(dvp)) 2272 error = nfscl_maperr(cnp->cn_thread, error, 2273 vap->va_uid, vap->va_gid); 2274 } else { 2275 /* 2276 * If negative lookup caching is enabled, I might as well 2277 * add an entry for this node. Not necessary for correctness, 2278 * but if negative caching is enabled, then the system 2279 * must care about lookup caching hit rate, so... 2280 */ 2281 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 2282 (cnp->cn_flags & MAKEENTRY) && 2283 attrflag != 0 && dattrflag != 0) { 2284 if (dvp != newvp) 2285 cache_enter_time(dvp, newvp, cnp, 2286 &nfsva.na_ctime, &dnfsva.na_ctime); 2287 else 2288 printf("nfs_mkdir: bogus NFS server returned " 2289 "the directory that the directory was " 2290 "created in as the new file object\n"); 2291 } 2292 *ap->a_vpp = newvp; 2293 } 2294 return (error); 2295 } 2296 2297 /* 2298 * nfs remove directory call 2299 */ 2300 static int 2301 nfs_rmdir(struct vop_rmdir_args *ap) 2302 { 2303 struct vnode *vp = ap->a_vp; 2304 struct vnode *dvp = ap->a_dvp; 2305 struct componentname *cnp = ap->a_cnp; 2306 struct nfsnode *dnp; 2307 struct nfsvattr dnfsva; 2308 int error, dattrflag; 2309 2310 if (dvp == vp) 2311 return (EINVAL); 2312 error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2313 cnp->cn_cred, cnp->cn_thread, &dnfsva, &dattrflag, NULL); 2314 dnp = VTONFS(dvp); 2315 NFSLOCKNODE(dnp); 2316 dnp->n_flag |= NMODIFIED; 2317 if (dattrflag != 0) { 2318 NFSUNLOCKNODE(dnp); 2319 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2320 } else { 2321 dnp->n_attrstamp = 0; 2322 NFSUNLOCKNODE(dnp); 2323 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2324 } 2325 2326 cache_purge(dvp); 2327 cache_purge(vp); 2328 if (error && NFS_ISV4(dvp)) 2329 error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, 2330 (gid_t)0); 2331 /* 2332 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. 2333 */ 2334 if (error == ENOENT) 2335 error = 0; 2336 return (error); 2337 } 2338 2339 /* 2340 * nfs readdir call 2341 */ 2342 static int 2343 nfs_readdir(struct vop_readdir_args *ap) 2344 { 2345 struct vnode *vp = ap->a_vp; 2346 struct nfsnode *np = VTONFS(vp); 2347 struct uio *uio = ap->a_uio; 2348 ssize_t tresid, left; 2349 int error = 0; 2350 struct vattr vattr; 2351 2352 if (ap->a_eofflag != NULL) 2353 *ap->a_eofflag = 0; 2354 if (vp->v_type != VDIR) 2355 return(EPERM); 2356 2357 /* 2358 * First, check for hit on the EOF offset cache 2359 */ 2360 NFSLOCKNODE(np); 2361 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && 2362 (np->n_flag & NMODIFIED) == 0) { 2363 NFSUNLOCKNODE(np); 2364 if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { 2365 NFSLOCKNODE(np); 2366 if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) || 2367 !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 2368 NFSUNLOCKNODE(np); 2369 NFSINCRGLOBAL(nfsstatsv1.direofcache_hits); 2370 if (ap->a_eofflag != NULL) 2371 *ap->a_eofflag = 1; 2372 return (0); 2373 } else 2374 NFSUNLOCKNODE(np); 2375 } 2376 } else 2377 NFSUNLOCKNODE(np); 2378 2379 /* 2380 * NFS always guarantees that directory entries don't straddle 2381 * DIRBLKSIZ boundaries. As such, we need to limit the size 2382 * to an exact multiple of DIRBLKSIZ, to avoid copying a partial 2383 * directory entry. 2384 */ 2385 left = uio->uio_resid % DIRBLKSIZ; 2386 if (left == uio->uio_resid) 2387 return (EINVAL); 2388 uio->uio_resid -= left; 2389 2390 /* 2391 * Call ncl_bioread() to do the real work. 2392 */ 2393 tresid = uio->uio_resid; 2394 error = ncl_bioread(vp, uio, 0, ap->a_cred); 2395 2396 if (!error && uio->uio_resid == tresid) { 2397 NFSINCRGLOBAL(nfsstatsv1.direofcache_misses); 2398 if (ap->a_eofflag != NULL) 2399 *ap->a_eofflag = 1; 2400 } 2401 2402 /* Add the partial DIRBLKSIZ (left) back in. */ 2403 uio->uio_resid += left; 2404 return (error); 2405 } 2406 2407 /* 2408 * Readdir rpc call. 2409 * Called from below the buffer cache by ncl_doio(). 2410 */ 2411 int 2412 ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2413 struct thread *td) 2414 { 2415 struct nfsvattr nfsva; 2416 nfsuint64 *cookiep, cookie; 2417 struct nfsnode *dnp = VTONFS(vp); 2418 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2419 int error = 0, eof, attrflag; 2420 2421 KASSERT(uiop->uio_iovcnt == 1 && 2422 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2423 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2424 ("nfs readdirrpc bad uio")); 2425 2426 /* 2427 * If there is no cookie, assume directory was stale. 2428 */ 2429 ncl_dircookie_lock(dnp); 2430 NFSUNLOCKNODE(dnp); 2431 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2432 if (cookiep) { 2433 cookie = *cookiep; 2434 ncl_dircookie_unlock(dnp); 2435 } else { 2436 ncl_dircookie_unlock(dnp); 2437 return (NFSERR_BAD_COOKIE); 2438 } 2439 2440 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2441 (void)ncl_fsinfo(nmp, vp, cred, td); 2442 2443 error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva, 2444 &attrflag, &eof, NULL); 2445 if (attrflag) 2446 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 2447 2448 if (!error) { 2449 /* 2450 * We are now either at the end of the directory or have filled 2451 * the block. 2452 */ 2453 if (eof) { 2454 NFSLOCKNODE(dnp); 2455 dnp->n_direofoffset = uiop->uio_offset; 2456 NFSUNLOCKNODE(dnp); 2457 } else { 2458 if (uiop->uio_resid > 0) 2459 printf("EEK! readdirrpc resid > 0\n"); 2460 ncl_dircookie_lock(dnp); 2461 NFSUNLOCKNODE(dnp); 2462 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2463 *cookiep = cookie; 2464 ncl_dircookie_unlock(dnp); 2465 } 2466 } else if (NFS_ISV4(vp)) { 2467 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2468 } 2469 return (error); 2470 } 2471 2472 /* 2473 * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc(). 2474 */ 2475 int 2476 ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2477 struct thread *td) 2478 { 2479 struct nfsvattr nfsva; 2480 nfsuint64 *cookiep, cookie; 2481 struct nfsnode *dnp = VTONFS(vp); 2482 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2483 int error = 0, attrflag, eof; 2484 2485 KASSERT(uiop->uio_iovcnt == 1 && 2486 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2487 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2488 ("nfs readdirplusrpc bad uio")); 2489 2490 /* 2491 * If there is no cookie, assume directory was stale. 2492 */ 2493 ncl_dircookie_lock(dnp); 2494 NFSUNLOCKNODE(dnp); 2495 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2496 if (cookiep) { 2497 cookie = *cookiep; 2498 ncl_dircookie_unlock(dnp); 2499 } else { 2500 ncl_dircookie_unlock(dnp); 2501 return (NFSERR_BAD_COOKIE); 2502 } 2503 2504 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2505 (void)ncl_fsinfo(nmp, vp, cred, td); 2506 error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva, 2507 &attrflag, &eof, NULL); 2508 if (attrflag) 2509 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 2510 2511 if (!error) { 2512 /* 2513 * We are now either at end of the directory or have filled the 2514 * the block. 2515 */ 2516 if (eof) { 2517 NFSLOCKNODE(dnp); 2518 dnp->n_direofoffset = uiop->uio_offset; 2519 NFSUNLOCKNODE(dnp); 2520 } else { 2521 if (uiop->uio_resid > 0) 2522 printf("EEK! readdirplusrpc resid > 0\n"); 2523 ncl_dircookie_lock(dnp); 2524 NFSUNLOCKNODE(dnp); 2525 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2526 *cookiep = cookie; 2527 ncl_dircookie_unlock(dnp); 2528 } 2529 } else if (NFS_ISV4(vp)) { 2530 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2531 } 2532 return (error); 2533 } 2534 2535 /* 2536 * Silly rename. To make the NFS filesystem that is stateless look a little 2537 * more like the "ufs" a remove of an active vnode is translated to a rename 2538 * to a funny looking filename that is removed by nfs_inactive on the 2539 * nfsnode. There is the potential for another process on a different client 2540 * to create the same funny name between the nfs_lookitup() fails and the 2541 * nfs_rename() completes, but... 2542 */ 2543 static int 2544 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 2545 { 2546 struct sillyrename *sp; 2547 struct nfsnode *np; 2548 int error; 2549 short pid; 2550 unsigned int lticks; 2551 2552 cache_purge(dvp); 2553 np = VTONFS(vp); 2554 KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir")); 2555 sp = malloc(sizeof (struct sillyrename), 2556 M_NEWNFSREQ, M_WAITOK); 2557 sp->s_cred = crhold(cnp->cn_cred); 2558 sp->s_dvp = dvp; 2559 VREF(dvp); 2560 2561 /* 2562 * Fudge together a funny name. 2563 * Changing the format of the funny name to accommodate more 2564 * sillynames per directory. 2565 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 2566 * CPU ticks since boot. 2567 */ 2568 pid = cnp->cn_thread->td_proc->p_pid; 2569 lticks = (unsigned int)ticks; 2570 for ( ; ; ) { 2571 sp->s_namlen = sprintf(sp->s_name, 2572 ".nfs.%08x.%04x4.4", lticks, 2573 pid); 2574 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2575 cnp->cn_thread, NULL)) 2576 break; 2577 lticks++; 2578 } 2579 error = nfs_renameit(dvp, vp, cnp, sp); 2580 if (error) 2581 goto bad; 2582 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2583 cnp->cn_thread, &np); 2584 np->n_sillyrename = sp; 2585 return (0); 2586 bad: 2587 vrele(sp->s_dvp); 2588 crfree(sp->s_cred); 2589 free(sp, M_NEWNFSREQ); 2590 return (error); 2591 } 2592 2593 /* 2594 * Look up a file name and optionally either update the file handle or 2595 * allocate an nfsnode, depending on the value of npp. 2596 * npp == NULL --> just do the lookup 2597 * *npp == NULL --> allocate a new nfsnode and make sure attributes are 2598 * handled too 2599 * *npp != NULL --> update the file handle in the vnode 2600 */ 2601 static int 2602 nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred, 2603 struct thread *td, struct nfsnode **npp) 2604 { 2605 struct vnode *newvp = NULL, *vp; 2606 struct nfsnode *np, *dnp = VTONFS(dvp); 2607 struct nfsfh *nfhp, *onfhp; 2608 struct nfsvattr nfsva, dnfsva; 2609 struct componentname cn; 2610 int error = 0, attrflag, dattrflag; 2611 u_int hash; 2612 2613 error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva, 2614 &nfhp, &attrflag, &dattrflag, NULL); 2615 if (dattrflag) 2616 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2617 if (npp && !error) { 2618 if (*npp != NULL) { 2619 np = *npp; 2620 vp = NFSTOV(np); 2621 /* 2622 * For NFSv4, check to see if it is the same name and 2623 * replace the name, if it is different. 2624 */ 2625 if (np->n_v4 != NULL && nfsva.na_type == VREG && 2626 (np->n_v4->n4_namelen != len || 2627 NFSBCMP(name, NFS4NODENAME(np->n_v4), len) || 2628 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || 2629 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2630 dnp->n_fhp->nfh_len))) { 2631 #ifdef notdef 2632 { char nnn[100]; int nnnl; 2633 nnnl = (len < 100) ? len : 99; 2634 bcopy(name, nnn, nnnl); 2635 nnn[nnnl] = '\0'; 2636 printf("replace=%s\n",nnn); 2637 } 2638 #endif 2639 free(np->n_v4, M_NFSV4NODE); 2640 np->n_v4 = malloc( 2641 sizeof (struct nfsv4node) + 2642 dnp->n_fhp->nfh_len + len - 1, 2643 M_NFSV4NODE, M_WAITOK); 2644 np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; 2645 np->n_v4->n4_namelen = len; 2646 NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2647 dnp->n_fhp->nfh_len); 2648 NFSBCOPY(name, NFS4NODENAME(np->n_v4), len); 2649 } 2650 hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, 2651 FNV1_32_INIT); 2652 onfhp = np->n_fhp; 2653 /* 2654 * Rehash node for new file handle. 2655 */ 2656 vfs_hash_rehash(vp, hash); 2657 np->n_fhp = nfhp; 2658 if (onfhp != NULL) 2659 free(onfhp, M_NFSFH); 2660 newvp = NFSTOV(np); 2661 } else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) { 2662 free(nfhp, M_NFSFH); 2663 VREF(dvp); 2664 newvp = dvp; 2665 } else { 2666 cn.cn_nameptr = name; 2667 cn.cn_namelen = len; 2668 error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td, 2669 &np, NULL, LK_EXCLUSIVE); 2670 if (error) 2671 return (error); 2672 newvp = NFSTOV(np); 2673 } 2674 if (!attrflag && *npp == NULL) { 2675 if (newvp == dvp) 2676 vrele(newvp); 2677 else 2678 vput(newvp); 2679 return (ENOENT); 2680 } 2681 if (attrflag) 2682 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 2683 0, 1); 2684 } 2685 if (npp && *npp == NULL) { 2686 if (error) { 2687 if (newvp) { 2688 if (newvp == dvp) 2689 vrele(newvp); 2690 else 2691 vput(newvp); 2692 } 2693 } else 2694 *npp = np; 2695 } 2696 if (error && NFS_ISV4(dvp)) 2697 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2698 return (error); 2699 } 2700 2701 /* 2702 * Nfs Version 3 and 4 commit rpc 2703 */ 2704 int 2705 ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, 2706 struct thread *td) 2707 { 2708 struct nfsvattr nfsva; 2709 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2710 struct nfsnode *np; 2711 struct uio uio; 2712 int error, attrflag; 2713 2714 np = VTONFS(vp); 2715 error = EIO; 2716 attrflag = 0; 2717 if (NFSHASPNFS(nmp) && (np->n_flag & NDSCOMMIT) != 0) { 2718 uio.uio_offset = offset; 2719 uio.uio_resid = cnt; 2720 error = nfscl_doiods(vp, &uio, NULL, NULL, 2721 NFSV4OPEN_ACCESSWRITE, 1, cred, td); 2722 if (error != 0) { 2723 NFSLOCKNODE(np); 2724 np->n_flag &= ~NDSCOMMIT; 2725 NFSUNLOCKNODE(np); 2726 } 2727 } 2728 if (error != 0) { 2729 mtx_lock(&nmp->nm_mtx); 2730 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { 2731 mtx_unlock(&nmp->nm_mtx); 2732 return (0); 2733 } 2734 mtx_unlock(&nmp->nm_mtx); 2735 error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva, 2736 &attrflag, NULL); 2737 } 2738 if (attrflag != 0) 2739 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 2740 0, 1); 2741 if (error != 0 && NFS_ISV4(vp)) 2742 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2743 return (error); 2744 } 2745 2746 /* 2747 * Strategy routine. 2748 * For async requests when nfsiod(s) are running, queue the request by 2749 * calling ncl_asyncio(), otherwise just all ncl_doio() to do the 2750 * request. 2751 */ 2752 static int 2753 nfs_strategy(struct vop_strategy_args *ap) 2754 { 2755 struct buf *bp; 2756 struct vnode *vp; 2757 struct ucred *cr; 2758 2759 bp = ap->a_bp; 2760 vp = ap->a_vp; 2761 KASSERT(bp->b_vp == vp, ("missing b_getvp")); 2762 KASSERT(!(bp->b_flags & B_DONE), 2763 ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); 2764 2765 if (vp->v_type == VREG && bp->b_blkno == bp->b_lblkno) 2766 bp->b_blkno = bp->b_lblkno * (vp->v_bufobj.bo_bsize / 2767 DEV_BSIZE); 2768 if (bp->b_iocmd == BIO_READ) 2769 cr = bp->b_rcred; 2770 else 2771 cr = bp->b_wcred; 2772 2773 /* 2774 * If the op is asynchronous and an i/o daemon is waiting 2775 * queue the request, wake it up and wait for completion 2776 * otherwise just do it ourselves. 2777 */ 2778 if ((bp->b_flags & B_ASYNC) == 0 || 2779 ncl_asyncio(VFSTONFS(vp->v_mount), bp, NOCRED, curthread)) 2780 (void) ncl_doio(vp, bp, cr, curthread, 1); 2781 return (0); 2782 } 2783 2784 /* 2785 * fsync vnode op. Just call ncl_flush() with commit == 1. 2786 */ 2787 /* ARGSUSED */ 2788 static int 2789 nfs_fsync(struct vop_fsync_args *ap) 2790 { 2791 2792 if (ap->a_vp->v_type != VREG) { 2793 /* 2794 * For NFS, metadata is changed synchronously on the server, 2795 * so there is nothing to flush. Also, ncl_flush() clears 2796 * the NMODIFIED flag and that shouldn't be done here for 2797 * directories. 2798 */ 2799 return (0); 2800 } 2801 return (ncl_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1, 0)); 2802 } 2803 2804 /* 2805 * Flush all the blocks associated with a vnode. 2806 * Walk through the buffer pool and push any dirty pages 2807 * associated with the vnode. 2808 * If the called_from_renewthread argument is TRUE, it has been called 2809 * from the NFSv4 renew thread and, as such, cannot block indefinitely 2810 * waiting for a buffer write to complete. 2811 */ 2812 int 2813 ncl_flush(struct vnode *vp, int waitfor, struct thread *td, 2814 int commit, int called_from_renewthread) 2815 { 2816 struct nfsnode *np = VTONFS(vp); 2817 struct buf *bp; 2818 int i; 2819 struct buf *nbp; 2820 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2821 int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; 2822 int passone = 1, trycnt = 0; 2823 u_quad_t off, endoff, toff; 2824 struct ucred* wcred = NULL; 2825 struct buf **bvec = NULL; 2826 struct bufobj *bo; 2827 #ifndef NFS_COMMITBVECSIZ 2828 #define NFS_COMMITBVECSIZ 20 2829 #endif 2830 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; 2831 u_int bvecsize = 0, bveccount; 2832 2833 if (called_from_renewthread != 0) 2834 slptimeo = hz; 2835 if (nmp->nm_flag & NFSMNT_INT) 2836 slpflag = PCATCH; 2837 if (!commit) 2838 passone = 0; 2839 bo = &vp->v_bufobj; 2840 /* 2841 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the 2842 * server, but has not been committed to stable storage on the server 2843 * yet. On the first pass, the byte range is worked out and the commit 2844 * rpc is done. On the second pass, ncl_writebp() is called to do the 2845 * job. 2846 */ 2847 again: 2848 off = (u_quad_t)-1; 2849 endoff = 0; 2850 bvecpos = 0; 2851 if (NFS_ISV34(vp) && commit) { 2852 if (bvec != NULL && bvec != bvec_on_stack) 2853 free(bvec, M_TEMP); 2854 /* 2855 * Count up how many buffers waiting for a commit. 2856 */ 2857 bveccount = 0; 2858 BO_LOCK(bo); 2859 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2860 if (!BUF_ISLOCKED(bp) && 2861 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 2862 == (B_DELWRI | B_NEEDCOMMIT)) 2863 bveccount++; 2864 } 2865 /* 2866 * Allocate space to remember the list of bufs to commit. It is 2867 * important to use M_NOWAIT here to avoid a race with nfs_write. 2868 * If we can't get memory (for whatever reason), we will end up 2869 * committing the buffers one-by-one in the loop below. 2870 */ 2871 if (bveccount > NFS_COMMITBVECSIZ) { 2872 /* 2873 * Release the vnode interlock to avoid a lock 2874 * order reversal. 2875 */ 2876 BO_UNLOCK(bo); 2877 bvec = (struct buf **) 2878 malloc(bveccount * sizeof(struct buf *), 2879 M_TEMP, M_NOWAIT); 2880 BO_LOCK(bo); 2881 if (bvec == NULL) { 2882 bvec = bvec_on_stack; 2883 bvecsize = NFS_COMMITBVECSIZ; 2884 } else 2885 bvecsize = bveccount; 2886 } else { 2887 bvec = bvec_on_stack; 2888 bvecsize = NFS_COMMITBVECSIZ; 2889 } 2890 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2891 if (bvecpos >= bvecsize) 2892 break; 2893 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 2894 nbp = TAILQ_NEXT(bp, b_bobufs); 2895 continue; 2896 } 2897 if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != 2898 (B_DELWRI | B_NEEDCOMMIT)) { 2899 BUF_UNLOCK(bp); 2900 nbp = TAILQ_NEXT(bp, b_bobufs); 2901 continue; 2902 } 2903 BO_UNLOCK(bo); 2904 bremfree(bp); 2905 /* 2906 * Work out if all buffers are using the same cred 2907 * so we can deal with them all with one commit. 2908 * 2909 * NOTE: we are not clearing B_DONE here, so we have 2910 * to do it later on in this routine if we intend to 2911 * initiate I/O on the bp. 2912 * 2913 * Note: to avoid loopback deadlocks, we do not 2914 * assign b_runningbufspace. 2915 */ 2916 if (wcred == NULL) 2917 wcred = bp->b_wcred; 2918 else if (wcred != bp->b_wcred) 2919 wcred = NOCRED; 2920 vfs_busy_pages(bp, 1); 2921 2922 BO_LOCK(bo); 2923 /* 2924 * bp is protected by being locked, but nbp is not 2925 * and vfs_busy_pages() may sleep. We have to 2926 * recalculate nbp. 2927 */ 2928 nbp = TAILQ_NEXT(bp, b_bobufs); 2929 2930 /* 2931 * A list of these buffers is kept so that the 2932 * second loop knows which buffers have actually 2933 * been committed. This is necessary, since there 2934 * may be a race between the commit rpc and new 2935 * uncommitted writes on the file. 2936 */ 2937 bvec[bvecpos++] = bp; 2938 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 2939 bp->b_dirtyoff; 2940 if (toff < off) 2941 off = toff; 2942 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); 2943 if (toff > endoff) 2944 endoff = toff; 2945 } 2946 BO_UNLOCK(bo); 2947 } 2948 if (bvecpos > 0) { 2949 /* 2950 * Commit data on the server, as required. 2951 * If all bufs are using the same wcred, then use that with 2952 * one call for all of them, otherwise commit each one 2953 * separately. 2954 */ 2955 if (wcred != NOCRED) 2956 retv = ncl_commit(vp, off, (int)(endoff - off), 2957 wcred, td); 2958 else { 2959 retv = 0; 2960 for (i = 0; i < bvecpos; i++) { 2961 off_t off, size; 2962 bp = bvec[i]; 2963 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 2964 bp->b_dirtyoff; 2965 size = (u_quad_t)(bp->b_dirtyend 2966 - bp->b_dirtyoff); 2967 retv = ncl_commit(vp, off, (int)size, 2968 bp->b_wcred, td); 2969 if (retv) break; 2970 } 2971 } 2972 2973 if (retv == NFSERR_STALEWRITEVERF) 2974 ncl_clearcommit(vp->v_mount); 2975 2976 /* 2977 * Now, either mark the blocks I/O done or mark the 2978 * blocks dirty, depending on whether the commit 2979 * succeeded. 2980 */ 2981 for (i = 0; i < bvecpos; i++) { 2982 bp = bvec[i]; 2983 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 2984 if (retv) { 2985 /* 2986 * Error, leave B_DELWRI intact 2987 */ 2988 vfs_unbusy_pages(bp); 2989 brelse(bp); 2990 } else { 2991 /* 2992 * Success, remove B_DELWRI ( bundirty() ). 2993 * 2994 * b_dirtyoff/b_dirtyend seem to be NFS 2995 * specific. We should probably move that 2996 * into bundirty(). XXX 2997 */ 2998 bufobj_wref(bo); 2999 bp->b_flags |= B_ASYNC; 3000 bundirty(bp); 3001 bp->b_flags &= ~B_DONE; 3002 bp->b_ioflags &= ~BIO_ERROR; 3003 bp->b_dirtyoff = bp->b_dirtyend = 0; 3004 bufdone(bp); 3005 } 3006 } 3007 } 3008 3009 /* 3010 * Start/do any write(s) that are required. 3011 */ 3012 loop: 3013 BO_LOCK(bo); 3014 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 3015 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 3016 if (waitfor != MNT_WAIT || passone) 3017 continue; 3018 3019 error = BUF_TIMELOCK(bp, 3020 LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, 3021 BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo); 3022 if (error == 0) { 3023 BUF_UNLOCK(bp); 3024 goto loop; 3025 } 3026 if (error == ENOLCK) { 3027 error = 0; 3028 goto loop; 3029 } 3030 if (called_from_renewthread != 0) { 3031 /* 3032 * Return EIO so the flush will be retried 3033 * later. 3034 */ 3035 error = EIO; 3036 goto done; 3037 } 3038 if (newnfs_sigintr(nmp, td)) { 3039 error = EINTR; 3040 goto done; 3041 } 3042 if (slpflag == PCATCH) { 3043 slpflag = 0; 3044 slptimeo = 2 * hz; 3045 } 3046 goto loop; 3047 } 3048 if ((bp->b_flags & B_DELWRI) == 0) 3049 panic("nfs_fsync: not dirty"); 3050 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { 3051 BUF_UNLOCK(bp); 3052 continue; 3053 } 3054 BO_UNLOCK(bo); 3055 bremfree(bp); 3056 bp->b_flags |= B_ASYNC; 3057 bwrite(bp); 3058 if (newnfs_sigintr(nmp, td)) { 3059 error = EINTR; 3060 goto done; 3061 } 3062 goto loop; 3063 } 3064 if (passone) { 3065 passone = 0; 3066 BO_UNLOCK(bo); 3067 goto again; 3068 } 3069 if (waitfor == MNT_WAIT) { 3070 while (bo->bo_numoutput) { 3071 error = bufobj_wwait(bo, slpflag, slptimeo); 3072 if (error) { 3073 BO_UNLOCK(bo); 3074 if (called_from_renewthread != 0) { 3075 /* 3076 * Return EIO so that the flush will be 3077 * retried later. 3078 */ 3079 error = EIO; 3080 goto done; 3081 } 3082 error = newnfs_sigintr(nmp, td); 3083 if (error) 3084 goto done; 3085 if (slpflag == PCATCH) { 3086 slpflag = 0; 3087 slptimeo = 2 * hz; 3088 } 3089 BO_LOCK(bo); 3090 } 3091 } 3092 if (bo->bo_dirty.bv_cnt != 0 && commit) { 3093 BO_UNLOCK(bo); 3094 goto loop; 3095 } 3096 /* 3097 * Wait for all the async IO requests to drain 3098 */ 3099 BO_UNLOCK(bo); 3100 NFSLOCKNODE(np); 3101 while (np->n_directio_asyncwr > 0) { 3102 np->n_flag |= NFSYNCWAIT; 3103 error = newnfs_msleep(td, &np->n_directio_asyncwr, 3104 &np->n_mtx, slpflag | (PRIBIO + 1), 3105 "nfsfsync", 0); 3106 if (error) { 3107 if (newnfs_sigintr(nmp, td)) { 3108 NFSUNLOCKNODE(np); 3109 error = EINTR; 3110 goto done; 3111 } 3112 } 3113 } 3114 NFSUNLOCKNODE(np); 3115 } else 3116 BO_UNLOCK(bo); 3117 if (NFSHASPNFS(nmp)) { 3118 nfscl_layoutcommit(vp, td); 3119 /* 3120 * Invalidate the attribute cache, since writes to a DS 3121 * won't update the size attribute. 3122 */ 3123 NFSLOCKNODE(np); 3124 np->n_attrstamp = 0; 3125 } else 3126 NFSLOCKNODE(np); 3127 if (np->n_flag & NWRITEERR) { 3128 error = np->n_error; 3129 np->n_flag &= ~NWRITEERR; 3130 } 3131 if (commit && bo->bo_dirty.bv_cnt == 0 && 3132 bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) 3133 np->n_flag &= ~NMODIFIED; 3134 NFSUNLOCKNODE(np); 3135 done: 3136 if (bvec != NULL && bvec != bvec_on_stack) 3137 free(bvec, M_TEMP); 3138 if (error == 0 && commit != 0 && waitfor == MNT_WAIT && 3139 (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 || 3140 np->n_directio_asyncwr != 0)) { 3141 if (trycnt++ < 5) { 3142 /* try, try again... */ 3143 passone = 1; 3144 wcred = NULL; 3145 bvec = NULL; 3146 bvecsize = 0; 3147 goto again; 3148 } 3149 vn_printf(vp, "ncl_flush failed"); 3150 error = called_from_renewthread != 0 ? EIO : EBUSY; 3151 } 3152 return (error); 3153 } 3154 3155 /* 3156 * NFS advisory byte-level locks. 3157 */ 3158 static int 3159 nfs_advlock(struct vop_advlock_args *ap) 3160 { 3161 struct vnode *vp = ap->a_vp; 3162 struct ucred *cred; 3163 struct nfsnode *np = VTONFS(ap->a_vp); 3164 struct proc *p = (struct proc *)ap->a_id; 3165 struct thread *td = curthread; /* XXX */ 3166 struct vattr va; 3167 int ret, error; 3168 u_quad_t size; 3169 struct nfsmount *nmp; 3170 3171 error = NFSVOPLOCK(vp, LK_SHARED); 3172 if (error != 0) 3173 return (EBADF); 3174 if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) { 3175 if (vp->v_type != VREG) { 3176 error = EINVAL; 3177 goto out; 3178 } 3179 if ((ap->a_flags & F_POSIX) != 0) 3180 cred = p->p_ucred; 3181 else 3182 cred = td->td_ucred; 3183 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 3184 if (VN_IS_DOOMED(vp)) { 3185 error = EBADF; 3186 goto out; 3187 } 3188 3189 /* 3190 * If this is unlocking a write locked region, flush and 3191 * commit them before unlocking. This is required by 3192 * RFC3530 Sec. 9.3.2. 3193 */ 3194 if (ap->a_op == F_UNLCK && 3195 nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id, 3196 ap->a_flags)) 3197 (void) ncl_flush(vp, MNT_WAIT, td, 1, 0); 3198 3199 /* 3200 * Mark NFS node as might have acquired a lock. 3201 * This is separate from NHASBEENLOCKED, because it must 3202 * be done before the nfsrpc_advlock() call, which might 3203 * add a nfscllock structure to the client state. 3204 * It is used to check for the case where a nfscllock 3205 * state structure cannot exist for the file. 3206 * Only done for "oneopenown" NFSv4.1/4.2 mounts. 3207 */ 3208 nmp = VFSTONFS(vp->v_mount); 3209 if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp)) { 3210 NFSLOCKNODE(np); 3211 np->n_flag |= NMIGHTBELOCKED; 3212 NFSUNLOCKNODE(np); 3213 } 3214 3215 /* 3216 * Loop around doing the lock op, while a blocking lock 3217 * must wait for the lock op to succeed. 3218 */ 3219 do { 3220 ret = nfsrpc_advlock(vp, np->n_size, ap->a_op, 3221 ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags); 3222 if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 3223 ap->a_op == F_SETLK) { 3224 NFSVOPUNLOCK(vp); 3225 error = nfs_catnap(PZERO | PCATCH, ret, 3226 "ncladvl"); 3227 if (error) 3228 return (EINTR); 3229 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 3230 if (VN_IS_DOOMED(vp)) { 3231 error = EBADF; 3232 goto out; 3233 } 3234 } 3235 } while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 3236 ap->a_op == F_SETLK); 3237 if (ret == NFSERR_DENIED) { 3238 error = EAGAIN; 3239 goto out; 3240 } else if (ret == EINVAL || ret == EBADF || ret == EINTR) { 3241 error = ret; 3242 goto out; 3243 } else if (ret != 0) { 3244 error = EACCES; 3245 goto out; 3246 } 3247 3248 /* 3249 * Now, if we just got a lock, invalidate data in the buffer 3250 * cache, as required, so that the coherency conforms with 3251 * RFC3530 Sec. 9.3.2. 3252 */ 3253 if (ap->a_op == F_SETLK) { 3254 if ((np->n_flag & NMODIFIED) == 0) { 3255 np->n_attrstamp = 0; 3256 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 3257 ret = VOP_GETATTR(vp, &va, cred); 3258 } 3259 if ((np->n_flag & NMODIFIED) || ret || 3260 np->n_change != va.va_filerev) { 3261 (void) ncl_vinvalbuf(vp, V_SAVE, td, 1); 3262 np->n_attrstamp = 0; 3263 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 3264 ret = VOP_GETATTR(vp, &va, cred); 3265 if (!ret) { 3266 np->n_mtime = va.va_mtime; 3267 np->n_change = va.va_filerev; 3268 } 3269 } 3270 /* Mark that a file lock has been acquired. */ 3271 NFSLOCKNODE(np); 3272 np->n_flag |= NHASBEENLOCKED; 3273 NFSUNLOCKNODE(np); 3274 } 3275 } else if (!NFS_ISV4(vp)) { 3276 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3277 size = VTONFS(vp)->n_size; 3278 NFSVOPUNLOCK(vp); 3279 error = lf_advlock(ap, &(vp->v_lockf), size); 3280 } else { 3281 if (nfs_advlock_p != NULL) 3282 error = nfs_advlock_p(ap); 3283 else { 3284 NFSVOPUNLOCK(vp); 3285 error = ENOLCK; 3286 } 3287 } 3288 if (error == 0 && ap->a_op == F_SETLK) { 3289 error = NFSVOPLOCK(vp, LK_SHARED); 3290 if (error == 0) { 3291 /* Mark that a file lock has been acquired. */ 3292 NFSLOCKNODE(np); 3293 np->n_flag |= NHASBEENLOCKED; 3294 NFSUNLOCKNODE(np); 3295 NFSVOPUNLOCK(vp); 3296 } 3297 } 3298 return (error); 3299 } else 3300 error = EOPNOTSUPP; 3301 out: 3302 NFSVOPUNLOCK(vp); 3303 return (error); 3304 } 3305 3306 /* 3307 * NFS advisory byte-level locks. 3308 */ 3309 static int 3310 nfs_advlockasync(struct vop_advlockasync_args *ap) 3311 { 3312 struct vnode *vp = ap->a_vp; 3313 u_quad_t size; 3314 int error; 3315 3316 if (NFS_ISV4(vp)) 3317 return (EOPNOTSUPP); 3318 error = NFSVOPLOCK(vp, LK_SHARED); 3319 if (error) 3320 return (error); 3321 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3322 size = VTONFS(vp)->n_size; 3323 NFSVOPUNLOCK(vp); 3324 error = lf_advlockasync(ap, &(vp->v_lockf), size); 3325 } else { 3326 NFSVOPUNLOCK(vp); 3327 error = EOPNOTSUPP; 3328 } 3329 return (error); 3330 } 3331 3332 /* 3333 * Print out the contents of an nfsnode. 3334 */ 3335 static int 3336 nfs_print(struct vop_print_args *ap) 3337 { 3338 struct vnode *vp = ap->a_vp; 3339 struct nfsnode *np = VTONFS(vp); 3340 3341 printf("\tfileid %jd fsid 0x%jx", (uintmax_t)np->n_vattr.na_fileid, 3342 (uintmax_t)np->n_vattr.na_fsid); 3343 if (vp->v_type == VFIFO) 3344 fifo_printinfo(vp); 3345 printf("\n"); 3346 return (0); 3347 } 3348 3349 /* 3350 * This is the "real" nfs::bwrite(struct buf*). 3351 * We set B_CACHE if this is a VMIO buffer. 3352 */ 3353 int 3354 ncl_writebp(struct buf *bp, int force __unused, struct thread *td) 3355 { 3356 int oldflags, rtval; 3357 3358 if (bp->b_flags & B_INVAL) { 3359 brelse(bp); 3360 return (0); 3361 } 3362 3363 oldflags = bp->b_flags; 3364 bp->b_flags |= B_CACHE; 3365 3366 /* 3367 * Undirty the bp. We will redirty it later if the I/O fails. 3368 */ 3369 bundirty(bp); 3370 bp->b_flags &= ~B_DONE; 3371 bp->b_ioflags &= ~BIO_ERROR; 3372 bp->b_iocmd = BIO_WRITE; 3373 3374 bufobj_wref(bp->b_bufobj); 3375 curthread->td_ru.ru_oublock++; 3376 3377 /* 3378 * Note: to avoid loopback deadlocks, we do not 3379 * assign b_runningbufspace. 3380 */ 3381 vfs_busy_pages(bp, 1); 3382 3383 BUF_KERNPROC(bp); 3384 bp->b_iooffset = dbtob(bp->b_blkno); 3385 bstrategy(bp); 3386 3387 if ((oldflags & B_ASYNC) != 0) 3388 return (0); 3389 3390 rtval = bufwait(bp); 3391 if (oldflags & B_DELWRI) 3392 reassignbuf(bp); 3393 brelse(bp); 3394 return (rtval); 3395 } 3396 3397 /* 3398 * nfs special file access vnode op. 3399 * Essentially just get vattr and then imitate iaccess() since the device is 3400 * local to the client. 3401 */ 3402 static int 3403 nfsspec_access(struct vop_access_args *ap) 3404 { 3405 struct vattr *vap; 3406 struct ucred *cred = ap->a_cred; 3407 struct vnode *vp = ap->a_vp; 3408 accmode_t accmode = ap->a_accmode; 3409 struct vattr vattr; 3410 int error; 3411 3412 /* 3413 * Disallow write attempts on filesystems mounted read-only; 3414 * unless the file is a socket, fifo, or a block or character 3415 * device resident on the filesystem. 3416 */ 3417 if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 3418 switch (vp->v_type) { 3419 case VREG: 3420 case VDIR: 3421 case VLNK: 3422 return (EROFS); 3423 default: 3424 break; 3425 } 3426 } 3427 vap = &vattr; 3428 error = VOP_GETATTR(vp, vap, cred); 3429 if (error) 3430 goto out; 3431 error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, 3432 accmode, cred); 3433 out: 3434 return error; 3435 } 3436 3437 /* 3438 * Read wrapper for fifos. 3439 */ 3440 static int 3441 nfsfifo_read(struct vop_read_args *ap) 3442 { 3443 struct nfsnode *np = VTONFS(ap->a_vp); 3444 int error; 3445 3446 /* 3447 * Set access flag. 3448 */ 3449 NFSLOCKNODE(np); 3450 np->n_flag |= NACC; 3451 vfs_timestamp(&np->n_atim); 3452 NFSUNLOCKNODE(np); 3453 error = fifo_specops.vop_read(ap); 3454 return error; 3455 } 3456 3457 /* 3458 * Write wrapper for fifos. 3459 */ 3460 static int 3461 nfsfifo_write(struct vop_write_args *ap) 3462 { 3463 struct nfsnode *np = VTONFS(ap->a_vp); 3464 3465 /* 3466 * Set update flag. 3467 */ 3468 NFSLOCKNODE(np); 3469 np->n_flag |= NUPD; 3470 vfs_timestamp(&np->n_mtim); 3471 NFSUNLOCKNODE(np); 3472 return(fifo_specops.vop_write(ap)); 3473 } 3474 3475 /* 3476 * Close wrapper for fifos. 3477 * 3478 * Update the times on the nfsnode then do fifo close. 3479 */ 3480 static int 3481 nfsfifo_close(struct vop_close_args *ap) 3482 { 3483 struct vnode *vp = ap->a_vp; 3484 struct nfsnode *np = VTONFS(vp); 3485 struct vattr vattr; 3486 struct timespec ts; 3487 3488 NFSLOCKNODE(np); 3489 if (np->n_flag & (NACC | NUPD)) { 3490 vfs_timestamp(&ts); 3491 if (np->n_flag & NACC) 3492 np->n_atim = ts; 3493 if (np->n_flag & NUPD) 3494 np->n_mtim = ts; 3495 np->n_flag |= NCHG; 3496 if (vrefcnt(vp) == 1 && 3497 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 3498 VATTR_NULL(&vattr); 3499 if (np->n_flag & NACC) 3500 vattr.va_atime = np->n_atim; 3501 if (np->n_flag & NUPD) 3502 vattr.va_mtime = np->n_mtim; 3503 NFSUNLOCKNODE(np); 3504 (void)VOP_SETATTR(vp, &vattr, ap->a_cred); 3505 goto out; 3506 } 3507 } 3508 NFSUNLOCKNODE(np); 3509 out: 3510 return (fifo_specops.vop_close(ap)); 3511 } 3512 3513 /* 3514 * Just call ncl_writebp() with the force argument set to 1. 3515 * 3516 * NOTE: B_DONE may or may not be set in a_bp on call. 3517 */ 3518 static int 3519 nfs_bwrite(struct buf *bp) 3520 { 3521 3522 return (ncl_writebp(bp, 1, curthread)); 3523 } 3524 3525 struct buf_ops buf_ops_newnfs = { 3526 .bop_name = "buf_ops_nfs", 3527 .bop_write = nfs_bwrite, 3528 .bop_strategy = bufstrategy, 3529 .bop_sync = bufsync, 3530 .bop_bdflush = bufbdflush, 3531 }; 3532 3533 static int 3534 nfs_getacl(struct vop_getacl_args *ap) 3535 { 3536 int error; 3537 3538 if (ap->a_type != ACL_TYPE_NFS4) 3539 return (EOPNOTSUPP); 3540 error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, 3541 NULL); 3542 if (error > NFSERR_STALE) { 3543 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3544 error = EPERM; 3545 } 3546 return (error); 3547 } 3548 3549 static int 3550 nfs_setacl(struct vop_setacl_args *ap) 3551 { 3552 int error; 3553 3554 if (ap->a_type != ACL_TYPE_NFS4) 3555 return (EOPNOTSUPP); 3556 error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, 3557 NULL); 3558 if (error > NFSERR_STALE) { 3559 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3560 error = EPERM; 3561 } 3562 return (error); 3563 } 3564 3565 /* 3566 * VOP_ADVISE for NFS. 3567 * Just return 0 for any errors, since it is just a hint. 3568 */ 3569 static int 3570 nfs_advise(struct vop_advise_args *ap) 3571 { 3572 struct thread *td = curthread; 3573 struct nfsmount *nmp; 3574 uint64_t len; 3575 int error; 3576 3577 /* 3578 * First do vop_stdadvise() to handle the buffer cache. 3579 */ 3580 error = vop_stdadvise(ap); 3581 if (error != 0) 3582 return (error); 3583 if (ap->a_start < 0 || ap->a_end < 0) 3584 return (0); 3585 if (ap->a_end == OFF_MAX) 3586 len = 0; 3587 else if (ap->a_end < ap->a_start) 3588 return (0); 3589 else 3590 len = ap->a_end - ap->a_start + 1; 3591 nmp = VFSTONFS(ap->a_vp->v_mount); 3592 mtx_lock(&nmp->nm_mtx); 3593 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 3594 (NFSHASPNFS(nmp) && (nmp->nm_privflag & NFSMNTP_IOADVISETHRUMDS) == 3595 0) || (nmp->nm_privflag & NFSMNTP_NOADVISE) != 0) { 3596 mtx_unlock(&nmp->nm_mtx); 3597 return (0); 3598 } 3599 mtx_unlock(&nmp->nm_mtx); 3600 error = nfsrpc_advise(ap->a_vp, ap->a_start, len, ap->a_advice, 3601 td->td_ucred, td); 3602 if (error == NFSERR_NOTSUPP) { 3603 mtx_lock(&nmp->nm_mtx); 3604 nmp->nm_privflag |= NFSMNTP_NOADVISE; 3605 mtx_unlock(&nmp->nm_mtx); 3606 } 3607 return (0); 3608 } 3609 3610 /* 3611 * nfs allocate call 3612 */ 3613 static int 3614 nfs_allocate(struct vop_allocate_args *ap) 3615 { 3616 struct vnode *vp = ap->a_vp; 3617 struct thread *td = curthread; 3618 struct nfsvattr nfsva; 3619 struct nfsmount *nmp; 3620 int attrflag, error, ret; 3621 3622 attrflag = 0; 3623 nmp = VFSTONFS(vp->v_mount); 3624 mtx_lock(&nmp->nm_mtx); 3625 if (NFSHASNFSV4(nmp) && nmp->nm_minorvers >= NFSV42_MINORVERSION && 3626 (nmp->nm_privflag & NFSMNTP_NOALLOCATE) == 0) { 3627 mtx_unlock(&nmp->nm_mtx); 3628 /* 3629 * Flush first to ensure that the allocate adds to the 3630 * file's allocation on the server. 3631 */ 3632 error = ncl_flush(vp, MNT_WAIT, td, 1, 0); 3633 if (error == 0) 3634 error = nfsrpc_allocate(vp, *ap->a_offset, *ap->a_len, 3635 &nfsva, &attrflag, td->td_ucred, td, NULL); 3636 if (error == 0) { 3637 *ap->a_offset += *ap->a_len; 3638 *ap->a_len = 0; 3639 } else if (error == NFSERR_NOTSUPP) { 3640 mtx_lock(&nmp->nm_mtx); 3641 nmp->nm_privflag |= NFSMNTP_NOALLOCATE; 3642 mtx_unlock(&nmp->nm_mtx); 3643 } 3644 } else { 3645 mtx_unlock(&nmp->nm_mtx); 3646 error = EIO; 3647 } 3648 /* 3649 * If the NFS server cannot perform the Allocate operation, just call 3650 * vop_stdallocate() to perform it. 3651 */ 3652 if (error != 0) 3653 error = vop_stdallocate(ap); 3654 if (attrflag != 0) { 3655 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 3656 if (error == 0 && ret != 0) 3657 error = ret; 3658 } 3659 if (error != 0) 3660 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 3661 return (error); 3662 } 3663 3664 /* 3665 * nfs copy_file_range call 3666 */ 3667 static int 3668 nfs_copy_file_range(struct vop_copy_file_range_args *ap) 3669 { 3670 struct vnode *invp = ap->a_invp; 3671 struct vnode *outvp = ap->a_outvp; 3672 struct mount *mp; 3673 struct nfsvattr innfsva, outnfsva; 3674 struct vattr *vap; 3675 struct uio io; 3676 struct nfsmount *nmp; 3677 size_t len, len2; 3678 int error, inattrflag, outattrflag, ret, ret2; 3679 off_t inoff, outoff; 3680 bool consecutive, must_commit, tryoutcred; 3681 3682 ret = ret2 = 0; 3683 nmp = VFSTONFS(invp->v_mount); 3684 mtx_lock(&nmp->nm_mtx); 3685 /* NFSv4.2 Copy is not permitted for infile == outfile. */ 3686 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 3687 (nmp->nm_privflag & NFSMNTP_NOCOPY) != 0 || invp == outvp) { 3688 mtx_unlock(&nmp->nm_mtx); 3689 error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, 3690 ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, 3691 ap->a_incred, ap->a_outcred, ap->a_fsizetd); 3692 return (error); 3693 } 3694 mtx_unlock(&nmp->nm_mtx); 3695 3696 /* Lock both vnodes, avoiding risk of deadlock. */ 3697 do { 3698 mp = NULL; 3699 error = vn_start_write(outvp, &mp, V_WAIT); 3700 if (error == 0) { 3701 error = vn_lock(outvp, LK_EXCLUSIVE); 3702 if (error == 0) { 3703 error = vn_lock(invp, LK_SHARED | LK_NOWAIT); 3704 if (error == 0) 3705 break; 3706 VOP_UNLOCK(outvp); 3707 if (mp != NULL) 3708 vn_finished_write(mp); 3709 mp = NULL; 3710 error = vn_lock(invp, LK_SHARED); 3711 if (error == 0) 3712 VOP_UNLOCK(invp); 3713 } 3714 } 3715 if (mp != NULL) 3716 vn_finished_write(mp); 3717 } while (error == 0); 3718 if (error != 0) 3719 return (error); 3720 3721 /* 3722 * Do the vn_rlimit_fsize() check. Should this be above the VOP layer? 3723 */ 3724 io.uio_offset = *ap->a_outoffp; 3725 io.uio_resid = *ap->a_lenp; 3726 error = vn_rlimit_fsize(outvp, &io, ap->a_fsizetd); 3727 3728 /* 3729 * Flush the input file so that the data is up to date before 3730 * the copy. Flush writes for the output file so that they 3731 * do not overwrite the data copied to the output file by the Copy. 3732 * Set the commit argument for both flushes so that the data is on 3733 * stable storage before the Copy RPC. This is done in case the 3734 * server reboots during the Copy and needs to be redone. 3735 */ 3736 if (error == 0) 3737 error = ncl_flush(invp, MNT_WAIT, curthread, 1, 0); 3738 if (error == 0) 3739 error = ncl_flush(outvp, MNT_WAIT, curthread, 1, 0); 3740 3741 /* Do the actual NFSv4.2 RPC. */ 3742 len = *ap->a_lenp; 3743 mtx_lock(&nmp->nm_mtx); 3744 if ((nmp->nm_privflag & NFSMNTP_NOCONSECUTIVE) == 0) 3745 consecutive = true; 3746 else 3747 consecutive = false; 3748 mtx_unlock(&nmp->nm_mtx); 3749 inoff = *ap->a_inoffp; 3750 outoff = *ap->a_outoffp; 3751 tryoutcred = true; 3752 must_commit = false; 3753 if (error == 0) { 3754 vap = &VTONFS(invp)->n_vattr.na_vattr; 3755 error = VOP_GETATTR(invp, vap, ap->a_incred); 3756 if (error == 0) { 3757 /* 3758 * Clip "len" at va_size so that RFC compliant servers 3759 * will not reply NFSERR_INVAL. 3760 * Setting "len == 0" for the RPC would be preferred, 3761 * but some Linux servers do not support that. 3762 */ 3763 if (inoff >= vap->va_size) 3764 *ap->a_lenp = len = 0; 3765 else if (inoff + len > vap->va_size) 3766 *ap->a_lenp = len = vap->va_size - inoff; 3767 } else 3768 error = 0; 3769 } 3770 3771 /* 3772 * len will be set to 0 upon a successful Copy RPC. 3773 * As such, this only loops when the Copy RPC needs to be retried. 3774 */ 3775 while (len > 0 && error == 0) { 3776 inattrflag = outattrflag = 0; 3777 len2 = len; 3778 if (tryoutcred) 3779 error = nfsrpc_copy_file_range(invp, ap->a_inoffp, 3780 outvp, ap->a_outoffp, &len2, ap->a_flags, 3781 &inattrflag, &innfsva, &outattrflag, &outnfsva, 3782 ap->a_outcred, consecutive, &must_commit); 3783 else 3784 error = nfsrpc_copy_file_range(invp, ap->a_inoffp, 3785 outvp, ap->a_outoffp, &len2, ap->a_flags, 3786 &inattrflag, &innfsva, &outattrflag, &outnfsva, 3787 ap->a_incred, consecutive, &must_commit); 3788 if (inattrflag != 0) 3789 ret = nfscl_loadattrcache(&invp, &innfsva, NULL, NULL, 3790 0, 1); 3791 if (outattrflag != 0) 3792 ret2 = nfscl_loadattrcache(&outvp, &outnfsva, NULL, 3793 NULL, 1, 1); 3794 if (error == 0) { 3795 if (consecutive == false) { 3796 if (len2 == len) { 3797 mtx_lock(&nmp->nm_mtx); 3798 nmp->nm_privflag |= 3799 NFSMNTP_NOCONSECUTIVE; 3800 mtx_unlock(&nmp->nm_mtx); 3801 } else 3802 error = NFSERR_OFFLOADNOREQS; 3803 } 3804 *ap->a_lenp = len2; 3805 len = 0; 3806 if (len2 > 0 && must_commit && error == 0) 3807 error = ncl_commit(outvp, outoff, *ap->a_lenp, 3808 ap->a_outcred, curthread); 3809 if (error == 0 && ret != 0) 3810 error = ret; 3811 if (error == 0 && ret2 != 0) 3812 error = ret2; 3813 } else if (error == NFSERR_OFFLOADNOREQS && consecutive) { 3814 /* 3815 * Try consecutive == false, which is ok only if all 3816 * bytes are copied. 3817 * If only some bytes were copied when consecutive 3818 * is false, there is no way to know which bytes 3819 * still need to be written. 3820 */ 3821 consecutive = false; 3822 error = 0; 3823 } else if (error == NFSERR_ACCES && tryoutcred) { 3824 /* Try again with incred. */ 3825 tryoutcred = false; 3826 error = 0; 3827 } 3828 if (error == NFSERR_STALEWRITEVERF) { 3829 /* 3830 * Server rebooted, so do it all again. 3831 */ 3832 *ap->a_inoffp = inoff; 3833 *ap->a_outoffp = outoff; 3834 len = *ap->a_lenp; 3835 must_commit = false; 3836 error = 0; 3837 } 3838 } 3839 VOP_UNLOCK(invp); 3840 VOP_UNLOCK(outvp); 3841 if (mp != NULL) 3842 vn_finished_write(mp); 3843 if (error == NFSERR_NOTSUPP || error == NFSERR_OFFLOADNOREQS || 3844 error == NFSERR_ACCES) { 3845 /* 3846 * Unlike the NFSv4.2 Copy, vn_generic_copy_file_range() can 3847 * use a_incred for the read and a_outcred for the write, so 3848 * try this for NFSERR_ACCES failures for the Copy. 3849 * For NFSERR_NOTSUPP and NFSERR_OFFLOADNOREQS, the Copy can 3850 * never succeed, so disable it. 3851 */ 3852 if (error != NFSERR_ACCES) { 3853 /* Can never do Copy on this mount. */ 3854 mtx_lock(&nmp->nm_mtx); 3855 nmp->nm_privflag |= NFSMNTP_NOCOPY; 3856 mtx_unlock(&nmp->nm_mtx); 3857 } 3858 *ap->a_inoffp = inoff; 3859 *ap->a_outoffp = outoff; 3860 error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, 3861 ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, 3862 ap->a_incred, ap->a_outcred, ap->a_fsizetd); 3863 } else if (error != 0) 3864 *ap->a_lenp = 0; 3865 3866 if (error != 0) 3867 error = nfscl_maperr(curthread, error, (uid_t)0, (gid_t)0); 3868 return (error); 3869 } 3870 3871 /* 3872 * nfs ioctl call 3873 */ 3874 static int 3875 nfs_ioctl(struct vop_ioctl_args *ap) 3876 { 3877 struct vnode *vp = ap->a_vp; 3878 struct nfsvattr nfsva; 3879 struct nfsmount *nmp; 3880 int attrflag, content, error, ret; 3881 bool eof = false; /* shut up compiler. */ 3882 3883 if (vp->v_type != VREG) 3884 return (ENOTTY); 3885 nmp = VFSTONFS(vp->v_mount); 3886 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION) { 3887 error = vop_stdioctl(ap); 3888 return (error); 3889 } 3890 3891 /* Do the actual NFSv4.2 RPC. */ 3892 switch (ap->a_command) { 3893 case FIOSEEKDATA: 3894 content = NFSV4CONTENT_DATA; 3895 break; 3896 case FIOSEEKHOLE: 3897 content = NFSV4CONTENT_HOLE; 3898 break; 3899 default: 3900 return (ENOTTY); 3901 } 3902 3903 error = vn_lock(vp, LK_SHARED); 3904 if (error != 0) 3905 return (EBADF); 3906 attrflag = 0; 3907 if (*((off_t *)ap->a_data) >= VTONFS(vp)->n_size) 3908 error = ENXIO; 3909 else { 3910 /* 3911 * Flush all writes, so that the server is up to date. 3912 * Although a Commit is not required, the commit argument 3913 * is set so that, for a pNFS File/Flexible File Layout 3914 * server, the LayoutCommit will be done to ensure the file 3915 * size is up to date on the Metadata Server. 3916 */ 3917 error = ncl_flush(vp, MNT_WAIT, ap->a_td, 1, 0); 3918 if (error == 0) 3919 error = nfsrpc_seek(vp, (off_t *)ap->a_data, &eof, 3920 content, ap->a_cred, &nfsva, &attrflag); 3921 /* If at eof for FIOSEEKDATA, return ENXIO. */ 3922 if (eof && error == 0 && content == NFSV4CONTENT_DATA) 3923 error = ENXIO; 3924 } 3925 if (attrflag != 0) { 3926 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 3927 if (error == 0 && ret != 0) 3928 error = ret; 3929 } 3930 NFSVOPUNLOCK(vp); 3931 3932 if (error != 0) 3933 error = ENXIO; 3934 return (error); 3935 } 3936 3937 /* 3938 * nfs getextattr call 3939 */ 3940 static int 3941 nfs_getextattr(struct vop_getextattr_args *ap) 3942 { 3943 struct vnode *vp = ap->a_vp; 3944 struct nfsmount *nmp; 3945 struct ucred *cred; 3946 struct thread *td = ap->a_td; 3947 struct nfsvattr nfsva; 3948 ssize_t len; 3949 int attrflag, error, ret; 3950 3951 nmp = VFSTONFS(vp->v_mount); 3952 mtx_lock(&nmp->nm_mtx); 3953 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 3954 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 3955 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 3956 mtx_unlock(&nmp->nm_mtx); 3957 return (EOPNOTSUPP); 3958 } 3959 mtx_unlock(&nmp->nm_mtx); 3960 3961 cred = ap->a_cred; 3962 if (cred == NULL) 3963 cred = td->td_ucred; 3964 /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ 3965 attrflag = 0; 3966 error = nfsrpc_getextattr(vp, ap->a_name, ap->a_uio, &len, &nfsva, 3967 &attrflag, cred, td); 3968 if (attrflag != 0) { 3969 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 3970 if (error == 0 && ret != 0) 3971 error = ret; 3972 } 3973 if (error == 0 && ap->a_size != NULL) 3974 *ap->a_size = len; 3975 3976 switch (error) { 3977 case NFSERR_NOTSUPP: 3978 case NFSERR_OPILLEGAL: 3979 mtx_lock(&nmp->nm_mtx); 3980 nmp->nm_privflag |= NFSMNTP_NOXATTR; 3981 mtx_unlock(&nmp->nm_mtx); 3982 error = EOPNOTSUPP; 3983 break; 3984 case NFSERR_NOXATTR: 3985 case NFSERR_XATTR2BIG: 3986 error = ENOATTR; 3987 break; 3988 default: 3989 error = nfscl_maperr(td, error, 0, 0); 3990 break; 3991 } 3992 return (error); 3993 } 3994 3995 /* 3996 * nfs setextattr call 3997 */ 3998 static int 3999 nfs_setextattr(struct vop_setextattr_args *ap) 4000 { 4001 struct vnode *vp = ap->a_vp; 4002 struct nfsmount *nmp; 4003 struct ucred *cred; 4004 struct thread *td = ap->a_td; 4005 struct nfsvattr nfsva; 4006 int attrflag, error, ret; 4007 4008 nmp = VFSTONFS(vp->v_mount); 4009 mtx_lock(&nmp->nm_mtx); 4010 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4011 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4012 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4013 mtx_unlock(&nmp->nm_mtx); 4014 return (EOPNOTSUPP); 4015 } 4016 mtx_unlock(&nmp->nm_mtx); 4017 4018 if (ap->a_uio->uio_resid < 0) 4019 return (EINVAL); 4020 cred = ap->a_cred; 4021 if (cred == NULL) 4022 cred = td->td_ucred; 4023 /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ 4024 attrflag = 0; 4025 error = nfsrpc_setextattr(vp, ap->a_name, ap->a_uio, &nfsva, 4026 &attrflag, cred, td); 4027 if (attrflag != 0) { 4028 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 4029 if (error == 0 && ret != 0) 4030 error = ret; 4031 } 4032 4033 switch (error) { 4034 case NFSERR_NOTSUPP: 4035 case NFSERR_OPILLEGAL: 4036 mtx_lock(&nmp->nm_mtx); 4037 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4038 mtx_unlock(&nmp->nm_mtx); 4039 error = EOPNOTSUPP; 4040 break; 4041 case NFSERR_NOXATTR: 4042 case NFSERR_XATTR2BIG: 4043 error = ENOATTR; 4044 break; 4045 default: 4046 error = nfscl_maperr(td, error, 0, 0); 4047 break; 4048 } 4049 return (error); 4050 } 4051 4052 /* 4053 * nfs listextattr call 4054 */ 4055 static int 4056 nfs_listextattr(struct vop_listextattr_args *ap) 4057 { 4058 struct vnode *vp = ap->a_vp; 4059 struct nfsmount *nmp; 4060 struct ucred *cred; 4061 struct thread *td = ap->a_td; 4062 struct nfsvattr nfsva; 4063 size_t len, len2; 4064 uint64_t cookie; 4065 int attrflag, error, ret; 4066 bool eof; 4067 4068 nmp = VFSTONFS(vp->v_mount); 4069 mtx_lock(&nmp->nm_mtx); 4070 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4071 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4072 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4073 mtx_unlock(&nmp->nm_mtx); 4074 return (EOPNOTSUPP); 4075 } 4076 mtx_unlock(&nmp->nm_mtx); 4077 4078 cred = ap->a_cred; 4079 if (cred == NULL) 4080 cred = td->td_ucred; 4081 4082 /* Loop around doing List Extended Attribute RPCs. */ 4083 eof = false; 4084 cookie = 0; 4085 len2 = 0; 4086 error = 0; 4087 while (!eof && error == 0) { 4088 len = nmp->nm_rsize; 4089 attrflag = 0; 4090 error = nfsrpc_listextattr(vp, &cookie, ap->a_uio, &len, &eof, 4091 &nfsva, &attrflag, cred, td); 4092 if (attrflag != 0) { 4093 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 4094 1); 4095 if (error == 0 && ret != 0) 4096 error = ret; 4097 } 4098 if (error == 0) { 4099 len2 += len; 4100 if (len2 > SSIZE_MAX) 4101 error = ENOATTR; 4102 } 4103 } 4104 if (error == 0 && ap->a_size != NULL) 4105 *ap->a_size = len2; 4106 4107 switch (error) { 4108 case NFSERR_NOTSUPP: 4109 case NFSERR_OPILLEGAL: 4110 mtx_lock(&nmp->nm_mtx); 4111 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4112 mtx_unlock(&nmp->nm_mtx); 4113 error = EOPNOTSUPP; 4114 break; 4115 case NFSERR_NOXATTR: 4116 case NFSERR_XATTR2BIG: 4117 error = ENOATTR; 4118 break; 4119 default: 4120 error = nfscl_maperr(td, error, 0, 0); 4121 break; 4122 } 4123 return (error); 4124 } 4125 4126 /* 4127 * nfs setextattr call 4128 */ 4129 static int 4130 nfs_deleteextattr(struct vop_deleteextattr_args *ap) 4131 { 4132 struct vnode *vp = ap->a_vp; 4133 struct nfsmount *nmp; 4134 struct nfsvattr nfsva; 4135 int attrflag, error, ret; 4136 4137 nmp = VFSTONFS(vp->v_mount); 4138 mtx_lock(&nmp->nm_mtx); 4139 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4140 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4141 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4142 mtx_unlock(&nmp->nm_mtx); 4143 return (EOPNOTSUPP); 4144 } 4145 mtx_unlock(&nmp->nm_mtx); 4146 4147 /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ 4148 attrflag = 0; 4149 error = nfsrpc_rmextattr(vp, ap->a_name, &nfsva, &attrflag, ap->a_cred, 4150 ap->a_td); 4151 if (attrflag != 0) { 4152 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 4153 if (error == 0 && ret != 0) 4154 error = ret; 4155 } 4156 4157 switch (error) { 4158 case NFSERR_NOTSUPP: 4159 case NFSERR_OPILLEGAL: 4160 mtx_lock(&nmp->nm_mtx); 4161 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4162 mtx_unlock(&nmp->nm_mtx); 4163 error = EOPNOTSUPP; 4164 break; 4165 case NFSERR_NOXATTR: 4166 case NFSERR_XATTR2BIG: 4167 error = ENOATTR; 4168 break; 4169 default: 4170 error = nfscl_maperr(ap->a_td, error, 0, 0); 4171 break; 4172 } 4173 return (error); 4174 } 4175 4176 /* 4177 * Return POSIX pathconf information applicable to nfs filesystems. 4178 */ 4179 static int 4180 nfs_pathconf(struct vop_pathconf_args *ap) 4181 { 4182 struct nfsv3_pathconf pc; 4183 struct nfsvattr nfsva; 4184 struct vnode *vp = ap->a_vp; 4185 struct nfsmount *nmp; 4186 struct thread *td = curthread; 4187 off_t off; 4188 bool eof; 4189 int attrflag, error; 4190 4191 if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX || 4192 ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED || 4193 ap->a_name == _PC_NO_TRUNC)) || 4194 (NFS_ISV4(vp) && ap->a_name == _PC_ACL_NFS4)) { 4195 /* 4196 * Since only the above 4 a_names are returned by the NFSv3 4197 * Pathconf RPC, there is no point in doing it for others. 4198 * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can 4199 * be used for _PC_NFS4_ACL as well. 4200 */ 4201 error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva, 4202 &attrflag, NULL); 4203 if (attrflag != 0) 4204 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 4205 1); 4206 if (error != 0) 4207 return (error); 4208 } else { 4209 /* 4210 * For NFSv2 (or NFSv3 when not one of the above 4 a_names), 4211 * just fake them. 4212 */ 4213 pc.pc_linkmax = NFS_LINK_MAX; 4214 pc.pc_namemax = NFS_MAXNAMLEN; 4215 pc.pc_notrunc = 1; 4216 pc.pc_chownrestricted = 1; 4217 pc.pc_caseinsensitive = 0; 4218 pc.pc_casepreserving = 1; 4219 error = 0; 4220 } 4221 switch (ap->a_name) { 4222 case _PC_LINK_MAX: 4223 #ifdef _LP64 4224 *ap->a_retval = pc.pc_linkmax; 4225 #else 4226 *ap->a_retval = MIN(LONG_MAX, pc.pc_linkmax); 4227 #endif 4228 break; 4229 case _PC_NAME_MAX: 4230 *ap->a_retval = pc.pc_namemax; 4231 break; 4232 case _PC_PIPE_BUF: 4233 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) 4234 *ap->a_retval = PIPE_BUF; 4235 else 4236 error = EINVAL; 4237 break; 4238 case _PC_CHOWN_RESTRICTED: 4239 *ap->a_retval = pc.pc_chownrestricted; 4240 break; 4241 case _PC_NO_TRUNC: 4242 *ap->a_retval = pc.pc_notrunc; 4243 break; 4244 case _PC_ACL_NFS4: 4245 if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 && 4246 NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) 4247 *ap->a_retval = 1; 4248 else 4249 *ap->a_retval = 0; 4250 break; 4251 case _PC_ACL_PATH_MAX: 4252 if (NFS_ISV4(vp)) 4253 *ap->a_retval = ACL_MAX_ENTRIES; 4254 else 4255 *ap->a_retval = 3; 4256 break; 4257 case _PC_PRIO_IO: 4258 *ap->a_retval = 0; 4259 break; 4260 case _PC_SYNC_IO: 4261 *ap->a_retval = 0; 4262 break; 4263 case _PC_ALLOC_SIZE_MIN: 4264 *ap->a_retval = vp->v_mount->mnt_stat.f_bsize; 4265 break; 4266 case _PC_FILESIZEBITS: 4267 if (NFS_ISV34(vp)) 4268 *ap->a_retval = 64; 4269 else 4270 *ap->a_retval = 32; 4271 break; 4272 case _PC_REC_INCR_XFER_SIZE: 4273 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 4274 break; 4275 case _PC_REC_MAX_XFER_SIZE: 4276 *ap->a_retval = -1; /* means ``unlimited'' */ 4277 break; 4278 case _PC_REC_MIN_XFER_SIZE: 4279 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 4280 break; 4281 case _PC_REC_XFER_ALIGN: 4282 *ap->a_retval = PAGE_SIZE; 4283 break; 4284 case _PC_SYMLINK_MAX: 4285 *ap->a_retval = NFS_MAXPATHLEN; 4286 break; 4287 case _PC_MIN_HOLE_SIZE: 4288 /* Only some NFSv4.2 servers support Seek for Holes. */ 4289 *ap->a_retval = 0; 4290 nmp = VFSTONFS(vp->v_mount); 4291 if (NFS_ISV4(vp) && nmp->nm_minorvers == NFSV42_MINORVERSION) { 4292 /* 4293 * NFSv4.2 doesn't have an attribute for hole size, 4294 * so all we can do is see if the Seek operation is 4295 * supported and then use f_iosize as a "best guess". 4296 */ 4297 mtx_lock(&nmp->nm_mtx); 4298 if ((nmp->nm_privflag & NFSMNTP_SEEKTESTED) == 0) { 4299 mtx_unlock(&nmp->nm_mtx); 4300 off = 0; 4301 attrflag = 0; 4302 error = nfsrpc_seek(vp, &off, &eof, 4303 NFSV4CONTENT_HOLE, td->td_ucred, &nfsva, 4304 &attrflag); 4305 if (attrflag != 0) 4306 nfscl_loadattrcache(&vp, &nfsva, 4307 NULL, NULL, 0, 1); 4308 mtx_lock(&nmp->nm_mtx); 4309 if (error == NFSERR_NOTSUPP) 4310 nmp->nm_privflag |= NFSMNTP_SEEKTESTED; 4311 else 4312 nmp->nm_privflag |= NFSMNTP_SEEKTESTED | 4313 NFSMNTP_SEEK; 4314 error = 0; 4315 } 4316 if ((nmp->nm_privflag & NFSMNTP_SEEK) != 0) 4317 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 4318 mtx_unlock(&nmp->nm_mtx); 4319 } 4320 break; 4321 4322 default: 4323 error = vop_stdpathconf(ap); 4324 break; 4325 } 4326 return (error); 4327 } 4328