1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from nfs_vnops.c 8.16 (Berkeley) 5/27/95 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 /* 41 * vnode op calls for Sun NFS version 2, 3 and 4 42 */ 43 44 #include "opt_inet.h" 45 46 #include <sys/param.h> 47 #include <sys/kernel.h> 48 #include <sys/systm.h> 49 #include <sys/resourcevar.h> 50 #include <sys/proc.h> 51 #include <sys/mount.h> 52 #include <sys/bio.h> 53 #include <sys/buf.h> 54 #include <sys/extattr.h> 55 #include <sys/filio.h> 56 #include <sys/jail.h> 57 #include <sys/malloc.h> 58 #include <sys/mbuf.h> 59 #include <sys/namei.h> 60 #include <sys/socket.h> 61 #include <sys/vnode.h> 62 #include <sys/dirent.h> 63 #include <sys/fcntl.h> 64 #include <sys/lockf.h> 65 #include <sys/stat.h> 66 #include <sys/sysctl.h> 67 #include <sys/signalvar.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_extern.h> 71 #include <vm/vm_object.h> 72 73 #include <fs/nfs/nfsport.h> 74 #include <fs/nfsclient/nfsnode.h> 75 #include <fs/nfsclient/nfsmount.h> 76 #include <fs/nfsclient/nfs.h> 77 #include <fs/nfsclient/nfs_kdtrace.h> 78 79 #include <net/if.h> 80 #include <netinet/in.h> 81 #include <netinet/in_var.h> 82 83 #include <nfs/nfs_lock.h> 84 85 #ifdef KDTRACE_HOOKS 86 #include <sys/dtrace_bsd.h> 87 88 dtrace_nfsclient_accesscache_flush_probe_func_t 89 dtrace_nfscl_accesscache_flush_done_probe; 90 uint32_t nfscl_accesscache_flush_done_id; 91 92 dtrace_nfsclient_accesscache_get_probe_func_t 93 dtrace_nfscl_accesscache_get_hit_probe, 94 dtrace_nfscl_accesscache_get_miss_probe; 95 uint32_t nfscl_accesscache_get_hit_id; 96 uint32_t nfscl_accesscache_get_miss_id; 97 98 dtrace_nfsclient_accesscache_load_probe_func_t 99 dtrace_nfscl_accesscache_load_done_probe; 100 uint32_t nfscl_accesscache_load_done_id; 101 #endif /* !KDTRACE_HOOKS */ 102 103 /* Defs */ 104 #define TRUE 1 105 #define FALSE 0 106 107 extern struct nfsstatsv1 nfsstatsv1; 108 extern int nfsrv_useacl; 109 extern int nfscl_debuglevel; 110 MALLOC_DECLARE(M_NEWNFSREQ); 111 112 static vop_read_t nfsfifo_read; 113 static vop_write_t nfsfifo_write; 114 static vop_close_t nfsfifo_close; 115 static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *, 116 struct thread *); 117 static vop_lookup_t nfs_lookup; 118 static vop_create_t nfs_create; 119 static vop_mknod_t nfs_mknod; 120 static vop_open_t nfs_open; 121 static vop_pathconf_t nfs_pathconf; 122 static vop_close_t nfs_close; 123 static vop_access_t nfs_access; 124 static vop_getattr_t nfs_getattr; 125 static vop_setattr_t nfs_setattr; 126 static vop_read_t nfs_read; 127 static vop_fsync_t nfs_fsync; 128 static vop_remove_t nfs_remove; 129 static vop_link_t nfs_link; 130 static vop_rename_t nfs_rename; 131 static vop_mkdir_t nfs_mkdir; 132 static vop_rmdir_t nfs_rmdir; 133 static vop_symlink_t nfs_symlink; 134 static vop_readdir_t nfs_readdir; 135 static vop_strategy_t nfs_strategy; 136 static int nfs_lookitup(struct vnode *, char *, int, 137 struct ucred *, struct thread *, struct nfsnode **); 138 static int nfs_sillyrename(struct vnode *, struct vnode *, 139 struct componentname *); 140 static vop_access_t nfsspec_access; 141 static vop_readlink_t nfs_readlink; 142 static vop_print_t nfs_print; 143 static vop_advlock_t nfs_advlock; 144 static vop_advlockasync_t nfs_advlockasync; 145 static vop_getacl_t nfs_getacl; 146 static vop_setacl_t nfs_setacl; 147 static vop_advise_t nfs_advise; 148 static vop_allocate_t nfs_allocate; 149 static vop_deallocate_t nfs_deallocate; 150 static vop_copy_file_range_t nfs_copy_file_range; 151 static vop_ioctl_t nfs_ioctl; 152 static vop_getextattr_t nfs_getextattr; 153 static vop_setextattr_t nfs_setextattr; 154 static vop_listextattr_t nfs_listextattr; 155 static vop_deleteextattr_t nfs_deleteextattr; 156 static vop_lock1_t nfs_lock; 157 158 /* 159 * Global vfs data structures for nfs 160 */ 161 162 static struct vop_vector newnfs_vnodeops_nosig = { 163 .vop_default = &default_vnodeops, 164 .vop_access = nfs_access, 165 .vop_advlock = nfs_advlock, 166 .vop_advlockasync = nfs_advlockasync, 167 .vop_close = nfs_close, 168 .vop_create = nfs_create, 169 .vop_fsync = nfs_fsync, 170 .vop_getattr = nfs_getattr, 171 .vop_getpages = ncl_getpages, 172 .vop_putpages = ncl_putpages, 173 .vop_inactive = ncl_inactive, 174 .vop_link = nfs_link, 175 .vop_lock1 = nfs_lock, 176 .vop_lookup = nfs_lookup, 177 .vop_mkdir = nfs_mkdir, 178 .vop_mknod = nfs_mknod, 179 .vop_open = nfs_open, 180 .vop_pathconf = nfs_pathconf, 181 .vop_print = nfs_print, 182 .vop_read = nfs_read, 183 .vop_readdir = nfs_readdir, 184 .vop_readlink = nfs_readlink, 185 .vop_reclaim = ncl_reclaim, 186 .vop_remove = nfs_remove, 187 .vop_rename = nfs_rename, 188 .vop_rmdir = nfs_rmdir, 189 .vop_setattr = nfs_setattr, 190 .vop_strategy = nfs_strategy, 191 .vop_symlink = nfs_symlink, 192 .vop_write = ncl_write, 193 .vop_getacl = nfs_getacl, 194 .vop_setacl = nfs_setacl, 195 .vop_advise = nfs_advise, 196 .vop_allocate = nfs_allocate, 197 .vop_deallocate = nfs_deallocate, 198 .vop_copy_file_range = nfs_copy_file_range, 199 .vop_ioctl = nfs_ioctl, 200 .vop_getextattr = nfs_getextattr, 201 .vop_setextattr = nfs_setextattr, 202 .vop_listextattr = nfs_listextattr, 203 .vop_deleteextattr = nfs_deleteextattr, 204 }; 205 VFS_VOP_VECTOR_REGISTER(newnfs_vnodeops_nosig); 206 207 static int 208 nfs_vnodeops_bypass(struct vop_generic_args *a) 209 { 210 211 return (vop_sigdefer(&newnfs_vnodeops_nosig, a)); 212 } 213 214 struct vop_vector newnfs_vnodeops = { 215 .vop_default = &default_vnodeops, 216 .vop_bypass = nfs_vnodeops_bypass, 217 }; 218 VFS_VOP_VECTOR_REGISTER(newnfs_vnodeops); 219 220 static struct vop_vector newnfs_fifoops_nosig = { 221 .vop_default = &fifo_specops, 222 .vop_access = nfsspec_access, 223 .vop_close = nfsfifo_close, 224 .vop_fsync = nfs_fsync, 225 .vop_getattr = nfs_getattr, 226 .vop_inactive = ncl_inactive, 227 .vop_pathconf = nfs_pathconf, 228 .vop_print = nfs_print, 229 .vop_read = nfsfifo_read, 230 .vop_reclaim = ncl_reclaim, 231 .vop_setattr = nfs_setattr, 232 .vop_write = nfsfifo_write, 233 }; 234 VFS_VOP_VECTOR_REGISTER(newnfs_fifoops_nosig); 235 236 static int 237 nfs_fifoops_bypass(struct vop_generic_args *a) 238 { 239 240 return (vop_sigdefer(&newnfs_fifoops_nosig, a)); 241 } 242 243 struct vop_vector newnfs_fifoops = { 244 .vop_default = &default_vnodeops, 245 .vop_bypass = nfs_fifoops_bypass, 246 }; 247 VFS_VOP_VECTOR_REGISTER(newnfs_fifoops); 248 249 static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, 250 struct componentname *cnp, struct vattr *vap); 251 static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 252 int namelen, struct ucred *cred, struct thread *td); 253 static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, 254 char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp, 255 char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td); 256 static int nfs_renameit(struct vnode *sdvp, struct vnode *svp, 257 struct componentname *scnp, struct sillyrename *sp); 258 259 /* 260 * Global variables 261 */ 262 SYSCTL_DECL(_vfs_nfs); 263 264 static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; 265 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 266 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); 267 268 static int nfs_prime_access_cache = 0; 269 SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW, 270 &nfs_prime_access_cache, 0, 271 "Prime NFS ACCESS cache when fetching attributes"); 272 273 static int newnfs_commit_on_close = 0; 274 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_on_close, CTLFLAG_RW, 275 &newnfs_commit_on_close, 0, "write+commit on close, else only write"); 276 277 static int nfs_clean_pages_on_close = 1; 278 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, 279 &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); 280 281 int newnfs_directio_enable = 0; 282 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, 283 &newnfs_directio_enable, 0, "Enable NFS directio"); 284 285 int nfs_keep_dirty_on_error; 286 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_keep_dirty_on_error, CTLFLAG_RW, 287 &nfs_keep_dirty_on_error, 0, "Retry pageout if error returned"); 288 289 /* 290 * This sysctl allows other processes to mmap a file that has been opened 291 * O_DIRECT by a process. In general, having processes mmap the file while 292 * Direct IO is in progress can lead to Data Inconsistencies. But, we allow 293 * this by default to prevent DoS attacks - to prevent a malicious user from 294 * opening up files O_DIRECT preventing other users from mmap'ing these 295 * files. "Protected" environments where stricter consistency guarantees are 296 * required can disable this knob. The process that opened the file O_DIRECT 297 * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not 298 * meaningful. 299 */ 300 int newnfs_directio_allow_mmap = 1; 301 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, 302 &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); 303 304 #define NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY \ 305 | NFSACCESS_EXTEND | NFSACCESS_EXECUTE \ 306 | NFSACCESS_DELETE | NFSACCESS_LOOKUP) 307 308 /* 309 * SMP Locking Note : 310 * The list of locks after the description of the lock is the ordering 311 * of other locks acquired with the lock held. 312 * np->n_mtx : Protects the fields in the nfsnode. 313 VM Object Lock 314 VI_MTX (acquired indirectly) 315 * nmp->nm_mtx : Protects the fields in the nfsmount. 316 rep->r_mtx 317 * ncl_iod_mutex : Global lock, protects shared nfsiod state. 318 * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. 319 nmp->nm_mtx 320 rep->r_mtx 321 * rep->r_mtx : Protects the fields in an nfsreq. 322 */ 323 324 static int 325 nfs_lock(struct vop_lock1_args *ap) 326 { 327 struct vnode *vp; 328 struct nfsnode *np; 329 u_quad_t nsize; 330 int error, lktype; 331 bool onfault; 332 333 vp = ap->a_vp; 334 lktype = ap->a_flags & LK_TYPE_MASK; 335 error = VOP_LOCK1_APV(&default_vnodeops, ap); 336 if (error != 0 || vp->v_op != &newnfs_vnodeops) 337 return (error); 338 np = VTONFS(vp); 339 if (np == NULL) 340 return (0); 341 NFSLOCKNODE(np); 342 if ((np->n_flag & NVNSETSZSKIP) == 0 || (lktype != LK_SHARED && 343 lktype != LK_EXCLUSIVE && lktype != LK_UPGRADE && 344 lktype != LK_TRYUPGRADE)) { 345 NFSUNLOCKNODE(np); 346 return (0); 347 } 348 onfault = (ap->a_flags & LK_EATTR_MASK) == LK_NOWAIT && 349 (ap->a_flags & LK_INIT_MASK) == LK_CANRECURSE && 350 (lktype == LK_SHARED || lktype == LK_EXCLUSIVE); 351 if (onfault && vp->v_vnlock->lk_recurse == 0) { 352 /* 353 * Force retry in vm_fault(), to make the lock request 354 * sleepable, which allows us to piggy-back the 355 * sleepable call to vnode_pager_setsize(). 356 */ 357 NFSUNLOCKNODE(np); 358 VOP_UNLOCK(vp); 359 return (EBUSY); 360 } 361 if ((ap->a_flags & LK_NOWAIT) != 0 || 362 (lktype == LK_SHARED && vp->v_vnlock->lk_recurse > 0)) { 363 NFSUNLOCKNODE(np); 364 return (0); 365 } 366 if (lktype == LK_SHARED) { 367 NFSUNLOCKNODE(np); 368 VOP_UNLOCK(vp); 369 ap->a_flags &= ~(LK_TYPE_MASK | LK_INTERLOCK); 370 ap->a_flags |= LK_EXCLUSIVE; 371 error = VOP_LOCK1_APV(&default_vnodeops, ap); 372 if (error != 0 || vp->v_op != &newnfs_vnodeops) 373 return (error); 374 if (vp->v_data == NULL) 375 goto downgrade; 376 MPASS(vp->v_data == np); 377 NFSLOCKNODE(np); 378 if ((np->n_flag & NVNSETSZSKIP) == 0) { 379 NFSUNLOCKNODE(np); 380 goto downgrade; 381 } 382 } 383 np->n_flag &= ~NVNSETSZSKIP; 384 nsize = np->n_size; 385 NFSUNLOCKNODE(np); 386 vnode_pager_setsize(vp, nsize); 387 downgrade: 388 if (lktype == LK_SHARED) { 389 ap->a_flags &= ~(LK_TYPE_MASK | LK_INTERLOCK); 390 ap->a_flags |= LK_DOWNGRADE; 391 (void)VOP_LOCK1_APV(&default_vnodeops, ap); 392 } 393 return (0); 394 } 395 396 static int 397 nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td, 398 struct ucred *cred, u_int32_t *retmode) 399 { 400 int error = 0, attrflag, i, lrupos; 401 u_int32_t rmode; 402 struct nfsnode *np = VTONFS(vp); 403 struct nfsvattr nfsva; 404 405 error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag, 406 &rmode, NULL); 407 if (attrflag) 408 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 409 if (!error) { 410 lrupos = 0; 411 NFSLOCKNODE(np); 412 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 413 if (np->n_accesscache[i].uid == cred->cr_uid) { 414 np->n_accesscache[i].mode = rmode; 415 np->n_accesscache[i].stamp = time_second; 416 break; 417 } 418 if (i > 0 && np->n_accesscache[i].stamp < 419 np->n_accesscache[lrupos].stamp) 420 lrupos = i; 421 } 422 if (i == NFS_ACCESSCACHESIZE) { 423 np->n_accesscache[lrupos].uid = cred->cr_uid; 424 np->n_accesscache[lrupos].mode = rmode; 425 np->n_accesscache[lrupos].stamp = time_second; 426 } 427 NFSUNLOCKNODE(np); 428 if (retmode != NULL) 429 *retmode = rmode; 430 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0); 431 } else if (NFS_ISV4(vp)) { 432 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 433 } 434 #ifdef KDTRACE_HOOKS 435 if (error != 0) 436 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0, 437 error); 438 #endif 439 return (error); 440 } 441 442 /* 443 * nfs access vnode op. 444 * For nfs version 2, just return ok. File accesses may fail later. 445 * For nfs version 3, use the access rpc to check accessibility. If file modes 446 * are changed on the server, accesses might still fail later. 447 */ 448 static int 449 nfs_access(struct vop_access_args *ap) 450 { 451 struct vnode *vp = ap->a_vp; 452 int error = 0, i, gotahit; 453 u_int32_t mode, wmode, rmode; 454 int v34 = NFS_ISV34(vp); 455 struct nfsnode *np = VTONFS(vp); 456 457 /* 458 * Disallow write attempts on filesystems mounted read-only; 459 * unless the file is a socket, fifo, or a block or character 460 * device resident on the filesystem. 461 */ 462 if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS | 463 VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL | 464 VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) { 465 switch (vp->v_type) { 466 case VREG: 467 case VDIR: 468 case VLNK: 469 return (EROFS); 470 default: 471 break; 472 } 473 } 474 /* 475 * For nfs v3 or v4, check to see if we have done this recently, and if 476 * so return our cached result instead of making an ACCESS call. 477 * If not, do an access rpc, otherwise you are stuck emulating 478 * ufs_access() locally using the vattr. This may not be correct, 479 * since the server may apply other access criteria such as 480 * client uid-->server uid mapping that we do not know about. 481 */ 482 if (v34) { 483 if (ap->a_accmode & VREAD) 484 mode = NFSACCESS_READ; 485 else 486 mode = 0; 487 if (vp->v_type != VDIR) { 488 if (ap->a_accmode & VWRITE) 489 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 490 if (ap->a_accmode & VAPPEND) 491 mode |= NFSACCESS_EXTEND; 492 if (ap->a_accmode & VEXEC) 493 mode |= NFSACCESS_EXECUTE; 494 if (ap->a_accmode & VDELETE) 495 mode |= NFSACCESS_DELETE; 496 } else { 497 if (ap->a_accmode & VWRITE) 498 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 499 if (ap->a_accmode & VAPPEND) 500 mode |= NFSACCESS_EXTEND; 501 if (ap->a_accmode & VEXEC) 502 mode |= NFSACCESS_LOOKUP; 503 if (ap->a_accmode & VDELETE) 504 mode |= NFSACCESS_DELETE; 505 if (ap->a_accmode & VDELETE_CHILD) 506 mode |= NFSACCESS_MODIFY; 507 } 508 /* XXX safety belt, only make blanket request if caching */ 509 if (nfsaccess_cache_timeout > 0) { 510 wmode = NFSACCESS_READ | NFSACCESS_MODIFY | 511 NFSACCESS_EXTEND | NFSACCESS_EXECUTE | 512 NFSACCESS_DELETE | NFSACCESS_LOOKUP; 513 } else { 514 wmode = mode; 515 } 516 517 /* 518 * Does our cached result allow us to give a definite yes to 519 * this request? 520 */ 521 gotahit = 0; 522 NFSLOCKNODE(np); 523 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 524 if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) { 525 if (time_second < (np->n_accesscache[i].stamp 526 + nfsaccess_cache_timeout) && 527 (np->n_accesscache[i].mode & mode) == mode) { 528 NFSINCRGLOBAL(nfsstatsv1.accesscache_hits); 529 gotahit = 1; 530 } 531 break; 532 } 533 } 534 NFSUNLOCKNODE(np); 535 #ifdef KDTRACE_HOOKS 536 if (gotahit != 0) 537 KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, 538 ap->a_cred->cr_uid, mode); 539 else 540 KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, 541 ap->a_cred->cr_uid, mode); 542 #endif 543 if (gotahit == 0) { 544 /* 545 * Either a no, or a don't know. Go to the wire. 546 */ 547 NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); 548 error = nfs34_access_otw(vp, wmode, ap->a_td, 549 ap->a_cred, &rmode); 550 if (!error && 551 (rmode & mode) != mode) 552 error = EACCES; 553 } 554 return (error); 555 } else { 556 if ((error = nfsspec_access(ap)) != 0) { 557 return (error); 558 } 559 /* 560 * Attempt to prevent a mapped root from accessing a file 561 * which it shouldn't. We try to read a byte from the file 562 * if the user is root and the file is not zero length. 563 * After calling nfsspec_access, we should have the correct 564 * file size cached. 565 */ 566 NFSLOCKNODE(np); 567 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) 568 && VTONFS(vp)->n_size > 0) { 569 struct iovec aiov; 570 struct uio auio; 571 char buf[1]; 572 573 NFSUNLOCKNODE(np); 574 aiov.iov_base = buf; 575 aiov.iov_len = 1; 576 auio.uio_iov = &aiov; 577 auio.uio_iovcnt = 1; 578 auio.uio_offset = 0; 579 auio.uio_resid = 1; 580 auio.uio_segflg = UIO_SYSSPACE; 581 auio.uio_rw = UIO_READ; 582 auio.uio_td = ap->a_td; 583 584 if (vp->v_type == VREG) 585 error = ncl_readrpc(vp, &auio, ap->a_cred); 586 else if (vp->v_type == VDIR) { 587 char* bp; 588 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); 589 aiov.iov_base = bp; 590 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; 591 error = ncl_readdirrpc(vp, &auio, ap->a_cred, 592 ap->a_td); 593 free(bp, M_TEMP); 594 } else if (vp->v_type == VLNK) 595 error = ncl_readlinkrpc(vp, &auio, ap->a_cred); 596 else 597 error = EACCES; 598 } else 599 NFSUNLOCKNODE(np); 600 return (error); 601 } 602 } 603 604 /* 605 * nfs open vnode op 606 * Check to see if the type is ok 607 * and that deletion is not in progress. 608 * For paged in text files, you will need to flush the page cache 609 * if consistency is lost. 610 */ 611 /* ARGSUSED */ 612 static int 613 nfs_open(struct vop_open_args *ap) 614 { 615 struct vnode *vp = ap->a_vp; 616 struct nfsnode *np = VTONFS(vp); 617 struct vattr vattr; 618 int error; 619 int fmode = ap->a_mode; 620 struct ucred *cred; 621 vm_object_t obj; 622 623 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) 624 return (EOPNOTSUPP); 625 626 /* 627 * For NFSv4, we need to do the Open Op before cache validation, 628 * so that we conform to RFC3530 Sec. 9.3.1. 629 */ 630 if (NFS_ISV4(vp)) { 631 error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td); 632 if (error) { 633 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 634 (gid_t)0); 635 return (error); 636 } 637 } 638 639 /* 640 * Now, if this Open will be doing reading, re-validate/flush the 641 * cache, so that Close/Open coherency is maintained. 642 */ 643 NFSLOCKNODE(np); 644 if (np->n_flag & NMODIFIED) { 645 NFSUNLOCKNODE(np); 646 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 647 if (error == EINTR || error == EIO) { 648 if (NFS_ISV4(vp)) 649 (void) nfsrpc_close(vp, 0, ap->a_td); 650 return (error); 651 } 652 NFSLOCKNODE(np); 653 np->n_attrstamp = 0; 654 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 655 if (vp->v_type == VDIR) 656 np->n_direofoffset = 0; 657 NFSUNLOCKNODE(np); 658 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 659 if (error) { 660 if (NFS_ISV4(vp)) 661 (void) nfsrpc_close(vp, 0, ap->a_td); 662 return (error); 663 } 664 NFSLOCKNODE(np); 665 np->n_mtime = vattr.va_mtime; 666 if (NFS_ISV4(vp)) 667 np->n_change = vattr.va_filerev; 668 } else { 669 NFSUNLOCKNODE(np); 670 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 671 if (error) { 672 if (NFS_ISV4(vp)) 673 (void) nfsrpc_close(vp, 0, ap->a_td); 674 return (error); 675 } 676 NFSLOCKNODE(np); 677 if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) || 678 NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 679 if (vp->v_type == VDIR) 680 np->n_direofoffset = 0; 681 NFSUNLOCKNODE(np); 682 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 683 if (error == EINTR || error == EIO) { 684 if (NFS_ISV4(vp)) 685 (void) nfsrpc_close(vp, 0, ap->a_td); 686 return (error); 687 } 688 NFSLOCKNODE(np); 689 np->n_mtime = vattr.va_mtime; 690 if (NFS_ISV4(vp)) 691 np->n_change = vattr.va_filerev; 692 } 693 } 694 695 /* 696 * If the object has >= 1 O_DIRECT active opens, we disable caching. 697 */ 698 if (newnfs_directio_enable && (fmode & O_DIRECT) && 699 (vp->v_type == VREG)) { 700 if (np->n_directio_opens == 0) { 701 NFSUNLOCKNODE(np); 702 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 703 if (error) { 704 if (NFS_ISV4(vp)) 705 (void) nfsrpc_close(vp, 0, ap->a_td); 706 return (error); 707 } 708 NFSLOCKNODE(np); 709 np->n_flag |= NNONCACHE; 710 } 711 np->n_directio_opens++; 712 } 713 714 /* If opened for writing via NFSv4.1 or later, mark that for pNFS. */ 715 if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0) 716 np->n_flag |= NWRITEOPENED; 717 718 /* 719 * If this is an open for writing, capture a reference to the 720 * credentials, so they can be used by ncl_putpages(). Using 721 * these write credentials is preferable to the credentials of 722 * whatever thread happens to be doing the VOP_PUTPAGES() since 723 * the write RPCs are less likely to fail with EACCES. 724 */ 725 if ((fmode & FWRITE) != 0) { 726 cred = np->n_writecred; 727 np->n_writecred = crhold(ap->a_cred); 728 } else 729 cred = NULL; 730 NFSUNLOCKNODE(np); 731 732 if (cred != NULL) 733 crfree(cred); 734 vnode_create_vobject(vp, vattr.va_size, ap->a_td); 735 736 /* 737 * If the text file has been mmap'd, flush any dirty pages to the 738 * buffer cache and then... 739 * Make sure all writes are pushed to the NFS server. If this is not 740 * done, the modify time of the file can change while the text 741 * file is being executed. This will cause the process that is 742 * executing the text file to be terminated. 743 */ 744 if (vp->v_writecount <= -1) { 745 if ((obj = vp->v_object) != NULL && 746 vm_object_mightbedirty(obj)) { 747 VM_OBJECT_WLOCK(obj); 748 vm_object_page_clean(obj, 0, 0, OBJPC_SYNC); 749 VM_OBJECT_WUNLOCK(obj); 750 } 751 752 /* Now, flush the buffer cache. */ 753 ncl_flush(vp, MNT_WAIT, curthread, 0, 0); 754 755 /* And, finally, make sure that n_mtime is up to date. */ 756 np = VTONFS(vp); 757 NFSLOCKNODE(np); 758 np->n_mtime = np->n_vattr.na_mtime; 759 NFSUNLOCKNODE(np); 760 } 761 return (0); 762 } 763 764 /* 765 * nfs close vnode op 766 * What an NFS client should do upon close after writing is a debatable issue. 767 * Most NFS clients push delayed writes to the server upon close, basically for 768 * two reasons: 769 * 1 - So that any write errors may be reported back to the client process 770 * doing the close system call. By far the two most likely errors are 771 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. 772 * 2 - To put a worst case upper bound on cache inconsistency between 773 * multiple clients for the file. 774 * There is also a consistency problem for Version 2 of the protocol w.r.t. 775 * not being able to tell if other clients are writing a file concurrently, 776 * since there is no way of knowing if the changed modify time in the reply 777 * is only due to the write for this client. 778 * (NFS Version 3 provides weak cache consistency data in the reply that 779 * should be sufficient to detect and handle this case.) 780 * 781 * The current code does the following: 782 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers 783 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate 784 * or commit them (this satisfies 1 and 2 except for the 785 * case where the server crashes after this close but 786 * before the commit RPC, which is felt to be "good 787 * enough". Changing the last argument to ncl_flush() to 788 * a 1 would force a commit operation, if it is felt a 789 * commit is necessary now. 790 * for NFS Version 4 - flush the dirty buffers and commit them, if 791 * nfscl_mustflush() says this is necessary. 792 * It is necessary if there is no write delegation held, 793 * in order to satisfy open/close coherency. 794 * If the file isn't cached on local stable storage, 795 * it may be necessary in order to detect "out of space" 796 * errors from the server, if the write delegation 797 * issued by the server doesn't allow the file to grow. 798 */ 799 /* ARGSUSED */ 800 static int 801 nfs_close(struct vop_close_args *ap) 802 { 803 struct vnode *vp = ap->a_vp; 804 struct nfsnode *np = VTONFS(vp); 805 struct nfsvattr nfsva; 806 struct ucred *cred; 807 int error = 0, ret, localcred = 0; 808 int fmode = ap->a_fflag; 809 810 if (NFSCL_FORCEDISM(vp->v_mount)) 811 return (0); 812 /* 813 * During shutdown, a_cred isn't valid, so just use root. 814 */ 815 if (ap->a_cred == NOCRED) { 816 cred = newnfs_getcred(); 817 localcred = 1; 818 } else { 819 cred = ap->a_cred; 820 } 821 if (vp->v_type == VREG) { 822 /* 823 * Examine and clean dirty pages, regardless of NMODIFIED. 824 * This closes a major hole in close-to-open consistency. 825 * We want to push out all dirty pages (and buffers) on 826 * close, regardless of whether they were dirtied by 827 * mmap'ed writes or via write(). 828 */ 829 if (nfs_clean_pages_on_close && vp->v_object) { 830 VM_OBJECT_WLOCK(vp->v_object); 831 vm_object_page_clean(vp->v_object, 0, 0, 0); 832 VM_OBJECT_WUNLOCK(vp->v_object); 833 } 834 NFSLOCKNODE(np); 835 if (np->n_flag & NMODIFIED) { 836 NFSUNLOCKNODE(np); 837 if (NFS_ISV3(vp)) { 838 /* 839 * Under NFSv3 we have dirty buffers to dispose of. We 840 * must flush them to the NFS server. We have the option 841 * of waiting all the way through the commit rpc or just 842 * waiting for the initial write. The default is to only 843 * wait through the initial write so the data is in the 844 * server's cache, which is roughly similar to the state 845 * a standard disk subsystem leaves the file in on close(). 846 * 847 * We cannot clear the NMODIFIED bit in np->n_flag due to 848 * potential races with other processes, and certainly 849 * cannot clear it if we don't commit. 850 * These races occur when there is no longer the old 851 * traditional vnode locking implemented for Vnode Ops. 852 */ 853 int cm = newnfs_commit_on_close ? 1 : 0; 854 error = ncl_flush(vp, MNT_WAIT, ap->a_td, cm, 0); 855 /* np->n_flag &= ~NMODIFIED; */ 856 } else if (NFS_ISV4(vp)) { 857 if (nfscl_mustflush(vp) != 0) { 858 int cm = newnfs_commit_on_close ? 1 : 0; 859 error = ncl_flush(vp, MNT_WAIT, ap->a_td, 860 cm, 0); 861 /* 862 * as above w.r.t races when clearing 863 * NMODIFIED. 864 * np->n_flag &= ~NMODIFIED; 865 */ 866 } 867 } else { 868 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 869 } 870 NFSLOCKNODE(np); 871 } 872 /* 873 * Invalidate the attribute cache in all cases. 874 * An open is going to fetch fresh attrs any way, other procs 875 * on this node that have file open will be forced to do an 876 * otw attr fetch, but this is safe. 877 * --> A user found that their RPC count dropped by 20% when 878 * this was commented out and I can't see any requirement 879 * for it, so I've disabled it when negative lookups are 880 * enabled. (What does this have to do with negative lookup 881 * caching? Well nothing, except it was reported by the 882 * same user that needed negative lookup caching and I wanted 883 * there to be a way to disable it to see if it 884 * is the cause of some caching/coherency issue that might 885 * crop up.) 886 */ 887 if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) { 888 np->n_attrstamp = 0; 889 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 890 } 891 if (np->n_flag & NWRITEERR) { 892 np->n_flag &= ~NWRITEERR; 893 error = np->n_error; 894 } 895 NFSUNLOCKNODE(np); 896 } 897 898 if (NFS_ISV4(vp)) { 899 /* 900 * Get attributes so "change" is up to date. 901 */ 902 if (error == 0 && nfscl_mustflush(vp) != 0 && 903 vp->v_type == VREG && 904 (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) == 0) { 905 ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva, 906 NULL); 907 if (!ret) { 908 np->n_change = nfsva.na_filerev; 909 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 910 NULL, 0, 0); 911 } 912 } 913 914 /* 915 * and do the close. 916 */ 917 ret = nfsrpc_close(vp, 0, ap->a_td); 918 if (!error && ret) 919 error = ret; 920 if (error) 921 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 922 (gid_t)0); 923 } 924 if (newnfs_directio_enable) 925 KASSERT((np->n_directio_asyncwr == 0), 926 ("nfs_close: dirty unflushed (%d) directio buffers\n", 927 np->n_directio_asyncwr)); 928 if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 929 NFSLOCKNODE(np); 930 KASSERT((np->n_directio_opens > 0), 931 ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); 932 np->n_directio_opens--; 933 if (np->n_directio_opens == 0) 934 np->n_flag &= ~NNONCACHE; 935 NFSUNLOCKNODE(np); 936 } 937 if (localcred) 938 NFSFREECRED(cred); 939 return (error); 940 } 941 942 /* 943 * nfs getattr call from vfs. 944 */ 945 static int 946 nfs_getattr(struct vop_getattr_args *ap) 947 { 948 struct vnode *vp = ap->a_vp; 949 struct thread *td = curthread; /* XXX */ 950 struct nfsnode *np = VTONFS(vp); 951 int error = 0; 952 struct nfsvattr nfsva; 953 struct vattr *vap = ap->a_vap; 954 struct vattr vattr; 955 956 /* 957 * Update local times for special files. 958 */ 959 NFSLOCKNODE(np); 960 if (np->n_flag & (NACC | NUPD)) 961 np->n_flag |= NCHG; 962 NFSUNLOCKNODE(np); 963 /* 964 * First look in the cache. 965 */ 966 if (ncl_getattrcache(vp, &vattr) == 0) { 967 ncl_copy_vattr(vap, &vattr); 968 969 /* 970 * Get the local modify time for the case of a write 971 * delegation. 972 */ 973 nfscl_deleggetmodtime(vp, &vap->va_mtime); 974 return (0); 975 } 976 977 if (NFS_ISV34(vp) && nfs_prime_access_cache && 978 nfsaccess_cache_timeout > 0) { 979 NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); 980 nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL); 981 if (ncl_getattrcache(vp, ap->a_vap) == 0) { 982 nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime); 983 return (0); 984 } 985 } 986 error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva, NULL); 987 if (!error) 988 error = nfscl_loadattrcache(&vp, &nfsva, vap, NULL, 0, 0); 989 if (!error) { 990 /* 991 * Get the local modify time for the case of a write 992 * delegation. 993 */ 994 nfscl_deleggetmodtime(vp, &vap->va_mtime); 995 } else if (NFS_ISV4(vp)) { 996 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 997 } 998 return (error); 999 } 1000 1001 /* 1002 * nfs setattr call. 1003 */ 1004 static int 1005 nfs_setattr(struct vop_setattr_args *ap) 1006 { 1007 struct vnode *vp = ap->a_vp; 1008 struct nfsnode *np = VTONFS(vp); 1009 struct thread *td = curthread; /* XXX */ 1010 struct vattr *vap = ap->a_vap; 1011 int error = 0; 1012 u_quad_t tsize; 1013 1014 #ifndef nolint 1015 tsize = (u_quad_t)0; 1016 #endif 1017 1018 /* 1019 * Setting of flags and marking of atimes are not supported. 1020 */ 1021 if (vap->va_flags != VNOVAL) 1022 return (EOPNOTSUPP); 1023 1024 /* 1025 * Disallow write attempts if the filesystem is mounted read-only. 1026 */ 1027 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 1028 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 1029 vap->va_mtime.tv_sec != VNOVAL || 1030 vap->va_birthtime.tv_sec != VNOVAL || 1031 vap->va_mode != (mode_t)VNOVAL) && 1032 (vp->v_mount->mnt_flag & MNT_RDONLY)) 1033 return (EROFS); 1034 if (vap->va_size != VNOVAL) { 1035 switch (vp->v_type) { 1036 case VDIR: 1037 return (EISDIR); 1038 case VCHR: 1039 case VBLK: 1040 case VSOCK: 1041 case VFIFO: 1042 if (vap->va_mtime.tv_sec == VNOVAL && 1043 vap->va_atime.tv_sec == VNOVAL && 1044 vap->va_birthtime.tv_sec == VNOVAL && 1045 vap->va_mode == (mode_t)VNOVAL && 1046 vap->va_uid == (uid_t)VNOVAL && 1047 vap->va_gid == (gid_t)VNOVAL) 1048 return (0); 1049 vap->va_size = VNOVAL; 1050 break; 1051 default: 1052 /* 1053 * Disallow write attempts if the filesystem is 1054 * mounted read-only. 1055 */ 1056 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1057 return (EROFS); 1058 /* 1059 * We run vnode_pager_setsize() early (why?), 1060 * we must set np->n_size now to avoid vinvalbuf 1061 * V_SAVE races that might setsize a lower 1062 * value. 1063 */ 1064 NFSLOCKNODE(np); 1065 tsize = np->n_size; 1066 NFSUNLOCKNODE(np); 1067 error = ncl_meta_setsize(vp, td, vap->va_size); 1068 NFSLOCKNODE(np); 1069 if (np->n_flag & NMODIFIED) { 1070 tsize = np->n_size; 1071 NFSUNLOCKNODE(np); 1072 error = ncl_vinvalbuf(vp, vap->va_size == 0 ? 1073 0 : V_SAVE, td, 1); 1074 if (error != 0) { 1075 vnode_pager_setsize(vp, tsize); 1076 return (error); 1077 } 1078 /* 1079 * Call nfscl_delegmodtime() to set the modify time 1080 * locally, as required. 1081 */ 1082 nfscl_delegmodtime(vp); 1083 } else 1084 NFSUNLOCKNODE(np); 1085 /* 1086 * np->n_size has already been set to vap->va_size 1087 * in ncl_meta_setsize(). We must set it again since 1088 * nfs_loadattrcache() could be called through 1089 * ncl_meta_setsize() and could modify np->n_size. 1090 */ 1091 NFSLOCKNODE(np); 1092 np->n_vattr.na_size = np->n_size = vap->va_size; 1093 NFSUNLOCKNODE(np); 1094 } 1095 } else { 1096 NFSLOCKNODE(np); 1097 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 1098 (np->n_flag & NMODIFIED) && vp->v_type == VREG) { 1099 NFSUNLOCKNODE(np); 1100 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 1101 if (error == EINTR || error == EIO) 1102 return (error); 1103 } else 1104 NFSUNLOCKNODE(np); 1105 } 1106 error = nfs_setattrrpc(vp, vap, ap->a_cred, td); 1107 if (error && vap->va_size != VNOVAL) { 1108 NFSLOCKNODE(np); 1109 np->n_size = np->n_vattr.na_size = tsize; 1110 vnode_pager_setsize(vp, tsize); 1111 NFSUNLOCKNODE(np); 1112 } 1113 return (error); 1114 } 1115 1116 /* 1117 * Do an nfs setattr rpc. 1118 */ 1119 static int 1120 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred, 1121 struct thread *td) 1122 { 1123 struct nfsnode *np = VTONFS(vp); 1124 int error, ret, attrflag, i; 1125 struct nfsvattr nfsva; 1126 1127 if (NFS_ISV34(vp)) { 1128 NFSLOCKNODE(np); 1129 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) 1130 np->n_accesscache[i].stamp = 0; 1131 np->n_flag |= NDELEGMOD; 1132 NFSUNLOCKNODE(np); 1133 KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); 1134 } 1135 error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag, 1136 NULL); 1137 if (attrflag) { 1138 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1139 if (ret && !error) 1140 error = ret; 1141 } 1142 if (error && NFS_ISV4(vp)) 1143 error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid); 1144 return (error); 1145 } 1146 1147 /* 1148 * nfs lookup call, one step at a time... 1149 * First look in cache 1150 * If not found, unlock the directory nfsnode and do the rpc 1151 */ 1152 static int 1153 nfs_lookup(struct vop_lookup_args *ap) 1154 { 1155 struct componentname *cnp = ap->a_cnp; 1156 struct vnode *dvp = ap->a_dvp; 1157 struct vnode **vpp = ap->a_vpp; 1158 struct mount *mp = dvp->v_mount; 1159 int flags = cnp->cn_flags; 1160 struct vnode *newvp; 1161 struct nfsmount *nmp; 1162 struct nfsnode *np, *newnp; 1163 int error = 0, attrflag, dattrflag, ltype, ncticks; 1164 struct thread *td = cnp->cn_thread; 1165 struct nfsfh *nfhp; 1166 struct nfsvattr dnfsva, nfsva; 1167 struct vattr vattr; 1168 struct timespec nctime; 1169 uint32_t openmode; 1170 1171 *vpp = NULLVP; 1172 if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && 1173 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 1174 return (EROFS); 1175 if (dvp->v_type != VDIR) 1176 return (ENOTDIR); 1177 nmp = VFSTONFS(mp); 1178 np = VTONFS(dvp); 1179 1180 /* For NFSv4, wait until any remove is done. */ 1181 NFSLOCKNODE(np); 1182 while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) { 1183 np->n_flag |= NREMOVEWANT; 1184 (void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0); 1185 } 1186 NFSUNLOCKNODE(np); 1187 1188 error = vn_dir_check_exec(dvp, cnp); 1189 if (error != 0) 1190 return (error); 1191 error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks); 1192 if (error > 0 && error != ENOENT) 1193 return (error); 1194 if (error == -1) { 1195 /* 1196 * Lookups of "." are special and always return the 1197 * current directory. cache_lookup() already handles 1198 * associated locking bookkeeping, etc. 1199 */ 1200 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { 1201 /* XXX: Is this really correct? */ 1202 if (cnp->cn_nameiop != LOOKUP && 1203 (flags & ISLASTCN)) 1204 cnp->cn_flags |= SAVENAME; 1205 return (0); 1206 } 1207 1208 /* 1209 * We only accept a positive hit in the cache if the 1210 * change time of the file matches our cached copy. 1211 * Otherwise, we discard the cache entry and fallback 1212 * to doing a lookup RPC. We also only trust cache 1213 * entries for less than nm_nametimeo seconds. 1214 * 1215 * To better handle stale file handles and attributes, 1216 * clear the attribute cache of this node if it is a 1217 * leaf component, part of an open() call, and not 1218 * locally modified before fetching the attributes. 1219 * This should allow stale file handles to be detected 1220 * here where we can fall back to a LOOKUP RPC to 1221 * recover rather than having nfs_open() detect the 1222 * stale file handle and failing open(2) with ESTALE. 1223 */ 1224 newvp = *vpp; 1225 newnp = VTONFS(newvp); 1226 if (!(nmp->nm_flag & NFSMNT_NOCTO) && 1227 (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && 1228 !(newnp->n_flag & NMODIFIED)) { 1229 NFSLOCKNODE(newnp); 1230 newnp->n_attrstamp = 0; 1231 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); 1232 NFSUNLOCKNODE(newnp); 1233 } 1234 if (nfscl_nodeleg(newvp, 0) == 0 || 1235 ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) && 1236 VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 && 1237 timespeccmp(&vattr.va_ctime, &nctime, ==))) { 1238 NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); 1239 if (cnp->cn_nameiop != LOOKUP && 1240 (flags & ISLASTCN)) 1241 cnp->cn_flags |= SAVENAME; 1242 return (0); 1243 } 1244 cache_purge(newvp); 1245 if (dvp != newvp) 1246 vput(newvp); 1247 else 1248 vrele(newvp); 1249 *vpp = NULLVP; 1250 } else if (error == ENOENT) { 1251 if (VN_IS_DOOMED(dvp)) 1252 return (ENOENT); 1253 /* 1254 * We only accept a negative hit in the cache if the 1255 * modification time of the parent directory matches 1256 * the cached copy in the name cache entry. 1257 * Otherwise, we discard all of the negative cache 1258 * entries for this directory. We also only trust 1259 * negative cache entries for up to nm_negnametimeo 1260 * seconds. 1261 */ 1262 if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) && 1263 VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && 1264 timespeccmp(&vattr.va_mtime, &nctime, ==)) { 1265 NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); 1266 return (ENOENT); 1267 } 1268 cache_purge_negative(dvp); 1269 } 1270 1271 /* 1272 * If this an NFSv4.1/4.2 mount using the "oneopenown" mount 1273 * option, it is possible to do the Open operation in the same 1274 * compound as Lookup, so long as delegations are not being 1275 * issued. This saves doing a separate RPC for Open. 1276 */ 1277 openmode = 0; 1278 NFSLOCKMNT(nmp); 1279 if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp) && 1280 (nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0 && 1281 (!NFSMNT_RDONLY(mp) || (flags & OPENWRITE) == 0) && 1282 (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN)) { 1283 if ((flags & OPENREAD) != 0) 1284 openmode |= NFSV4OPEN_ACCESSREAD; 1285 if ((flags & OPENWRITE) != 0) 1286 openmode |= NFSV4OPEN_ACCESSWRITE; 1287 } 1288 NFSUNLOCKMNT(nmp); 1289 1290 newvp = NULLVP; 1291 NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses); 1292 error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1293 cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1294 NULL, openmode); 1295 if (dattrflag) 1296 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1297 if (error) { 1298 if (newvp != NULLVP) { 1299 vput(newvp); 1300 *vpp = NULLVP; 1301 } 1302 1303 if (error != ENOENT) { 1304 if (NFS_ISV4(dvp)) 1305 error = nfscl_maperr(td, error, (uid_t)0, 1306 (gid_t)0); 1307 return (error); 1308 } 1309 1310 /* The requested file was not found. */ 1311 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && 1312 (flags & ISLASTCN)) { 1313 /* 1314 * XXX: UFS does a full VOP_ACCESS(dvp, 1315 * VWRITE) here instead of just checking 1316 * MNT_RDONLY. 1317 */ 1318 if (mp->mnt_flag & MNT_RDONLY) 1319 return (EROFS); 1320 cnp->cn_flags |= SAVENAME; 1321 return (EJUSTRETURN); 1322 } 1323 1324 if ((cnp->cn_flags & MAKEENTRY) != 0 && dattrflag) { 1325 /* 1326 * Cache the modification time of the parent 1327 * directory from the post-op attributes in 1328 * the name cache entry. The negative cache 1329 * entry will be ignored once the directory 1330 * has changed. Don't bother adding the entry 1331 * if the directory has already changed. 1332 */ 1333 NFSLOCKNODE(np); 1334 if (timespeccmp(&np->n_vattr.na_mtime, 1335 &dnfsva.na_mtime, ==)) { 1336 NFSUNLOCKNODE(np); 1337 cache_enter_time(dvp, NULL, cnp, 1338 &dnfsva.na_mtime, NULL); 1339 } else 1340 NFSUNLOCKNODE(np); 1341 } 1342 return (ENOENT); 1343 } 1344 1345 /* 1346 * Handle RENAME case... 1347 */ 1348 if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { 1349 if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1350 free(nfhp, M_NFSFH); 1351 return (EISDIR); 1352 } 1353 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, 1354 LK_EXCLUSIVE); 1355 if (error) 1356 return (error); 1357 newvp = NFSTOV(np); 1358 if (attrflag) 1359 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1360 0, 1); 1361 *vpp = newvp; 1362 cnp->cn_flags |= SAVENAME; 1363 return (0); 1364 } 1365 1366 if (flags & ISDOTDOT) { 1367 ltype = NFSVOPISLOCKED(dvp); 1368 error = vfs_busy(mp, MBF_NOWAIT); 1369 if (error != 0) { 1370 vfs_ref(mp); 1371 NFSVOPUNLOCK(dvp); 1372 error = vfs_busy(mp, 0); 1373 NFSVOPLOCK(dvp, ltype | LK_RETRY); 1374 vfs_rel(mp); 1375 if (error == 0 && VN_IS_DOOMED(dvp)) { 1376 vfs_unbusy(mp); 1377 error = ENOENT; 1378 } 1379 if (error != 0) 1380 return (error); 1381 } 1382 NFSVOPUNLOCK(dvp); 1383 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, 1384 cnp->cn_lkflags); 1385 if (error == 0) 1386 newvp = NFSTOV(np); 1387 vfs_unbusy(mp); 1388 if (newvp != dvp) 1389 NFSVOPLOCK(dvp, ltype | LK_RETRY); 1390 if (VN_IS_DOOMED(dvp)) { 1391 if (error == 0) { 1392 if (newvp == dvp) 1393 vrele(newvp); 1394 else 1395 vput(newvp); 1396 } 1397 error = ENOENT; 1398 } 1399 if (error != 0) 1400 return (error); 1401 if (attrflag) 1402 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1403 0, 1); 1404 } else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1405 free(nfhp, M_NFSFH); 1406 VREF(dvp); 1407 newvp = dvp; 1408 if (attrflag) 1409 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1410 0, 1); 1411 } else { 1412 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, 1413 cnp->cn_lkflags); 1414 if (error) 1415 return (error); 1416 newvp = NFSTOV(np); 1417 if (attrflag) 1418 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1419 0, 1); 1420 else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && 1421 !(np->n_flag & NMODIFIED)) { 1422 /* 1423 * Flush the attribute cache when opening a 1424 * leaf node to ensure that fresh attributes 1425 * are fetched in nfs_open() since we did not 1426 * fetch attributes from the LOOKUP reply. 1427 */ 1428 NFSLOCKNODE(np); 1429 np->n_attrstamp = 0; 1430 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); 1431 NFSUNLOCKNODE(np); 1432 } 1433 } 1434 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) 1435 cnp->cn_flags |= SAVENAME; 1436 if ((cnp->cn_flags & MAKEENTRY) && dvp != newvp && 1437 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) && 1438 attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0)) 1439 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, 1440 newvp->v_type != VDIR ? NULL : &dnfsva.na_ctime); 1441 *vpp = newvp; 1442 return (0); 1443 } 1444 1445 /* 1446 * nfs read call. 1447 * Just call ncl_bioread() to do the work. 1448 */ 1449 static int 1450 nfs_read(struct vop_read_args *ap) 1451 { 1452 struct vnode *vp = ap->a_vp; 1453 1454 switch (vp->v_type) { 1455 case VREG: 1456 return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); 1457 case VDIR: 1458 return (EISDIR); 1459 default: 1460 return (EOPNOTSUPP); 1461 } 1462 } 1463 1464 /* 1465 * nfs readlink call 1466 */ 1467 static int 1468 nfs_readlink(struct vop_readlink_args *ap) 1469 { 1470 struct vnode *vp = ap->a_vp; 1471 1472 if (vp->v_type != VLNK) 1473 return (EINVAL); 1474 return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred)); 1475 } 1476 1477 /* 1478 * Do a readlink rpc. 1479 * Called by ncl_doio() from below the buffer cache. 1480 */ 1481 int 1482 ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1483 { 1484 int error, ret, attrflag; 1485 struct nfsvattr nfsva; 1486 1487 error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva, 1488 &attrflag, NULL); 1489 if (attrflag) { 1490 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1491 if (ret && !error) 1492 error = ret; 1493 } 1494 if (error && NFS_ISV4(vp)) 1495 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1496 return (error); 1497 } 1498 1499 /* 1500 * nfs read rpc call 1501 * Ditto above 1502 */ 1503 int 1504 ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1505 { 1506 int error, ret, attrflag; 1507 struct nfsvattr nfsva; 1508 struct nfsmount *nmp; 1509 1510 nmp = VFSTONFS(vp->v_mount); 1511 error = EIO; 1512 attrflag = 0; 1513 if (NFSHASPNFS(nmp)) 1514 error = nfscl_doiods(vp, uiop, NULL, NULL, 1515 NFSV4OPEN_ACCESSREAD, 0, cred, uiop->uio_td); 1516 NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error); 1517 if (error != 0) 1518 error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, 1519 &attrflag, NULL); 1520 if (attrflag) { 1521 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1522 if (ret && !error) 1523 error = ret; 1524 } 1525 if (error && NFS_ISV4(vp)) 1526 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1527 return (error); 1528 } 1529 1530 /* 1531 * nfs write call 1532 */ 1533 int 1534 ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 1535 int *iomode, int *must_commit, int called_from_strategy) 1536 { 1537 struct nfsvattr nfsva; 1538 int error, attrflag, ret; 1539 struct nfsmount *nmp; 1540 1541 nmp = VFSTONFS(vp->v_mount); 1542 error = EIO; 1543 attrflag = 0; 1544 if (NFSHASPNFS(nmp)) 1545 error = nfscl_doiods(vp, uiop, iomode, must_commit, 1546 NFSV4OPEN_ACCESSWRITE, 0, cred, uiop->uio_td); 1547 NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error); 1548 if (error != 0) 1549 error = nfsrpc_write(vp, uiop, iomode, must_commit, cred, 1550 uiop->uio_td, &nfsva, &attrflag, NULL, 1551 called_from_strategy); 1552 if (attrflag) { 1553 if (VTONFS(vp)->n_flag & ND_NFSV4) 1554 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1, 1555 1); 1556 else 1557 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1558 1); 1559 if (ret && !error) 1560 error = ret; 1561 } 1562 if (DOINGASYNC(vp)) 1563 *iomode = NFSWRITE_FILESYNC; 1564 if (error && NFS_ISV4(vp)) 1565 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1566 return (error); 1567 } 1568 1569 /* 1570 * nfs mknod rpc 1571 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the 1572 * mode set to specify the file type and the size field for rdev. 1573 */ 1574 static int 1575 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, 1576 struct vattr *vap) 1577 { 1578 struct nfsvattr nfsva, dnfsva; 1579 struct vnode *newvp = NULL; 1580 struct nfsnode *np = NULL, *dnp; 1581 struct nfsfh *nfhp; 1582 struct vattr vattr; 1583 int error = 0, attrflag, dattrflag; 1584 u_int32_t rdev; 1585 1586 if (vap->va_type == VCHR || vap->va_type == VBLK) 1587 rdev = vap->va_rdev; 1588 else if (vap->va_type == VFIFO || vap->va_type == VSOCK) 1589 rdev = 0xffffffff; 1590 else 1591 return (EOPNOTSUPP); 1592 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1593 return (error); 1594 error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, 1595 rdev, vap->va_type, cnp->cn_cred, cnp->cn_thread, &dnfsva, 1596 &nfsva, &nfhp, &attrflag, &dattrflag, NULL); 1597 if (!error) { 1598 if (!nfhp) 1599 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1600 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, 1601 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1602 NULL, 0); 1603 if (nfhp) 1604 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1605 cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); 1606 } 1607 if (dattrflag) 1608 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1609 if (!error) { 1610 newvp = NFSTOV(np); 1611 if (attrflag != 0) { 1612 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1613 0, 1); 1614 if (error != 0) 1615 vput(newvp); 1616 } 1617 } 1618 if (!error) { 1619 *vpp = newvp; 1620 } else if (NFS_ISV4(dvp)) { 1621 error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, 1622 vap->va_gid); 1623 } 1624 dnp = VTONFS(dvp); 1625 NFSLOCKNODE(dnp); 1626 dnp->n_flag |= NMODIFIED; 1627 if (!dattrflag) { 1628 dnp->n_attrstamp = 0; 1629 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1630 } 1631 NFSUNLOCKNODE(dnp); 1632 return (error); 1633 } 1634 1635 /* 1636 * nfs mknod vop 1637 * just call nfs_mknodrpc() to do the work. 1638 */ 1639 /* ARGSUSED */ 1640 static int 1641 nfs_mknod(struct vop_mknod_args *ap) 1642 { 1643 return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); 1644 } 1645 1646 static struct mtx nfs_cverf_mtx; 1647 MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex", 1648 MTX_DEF); 1649 1650 static nfsquad_t 1651 nfs_get_cverf(void) 1652 { 1653 static nfsquad_t cverf; 1654 nfsquad_t ret; 1655 static int cverf_initialized = 0; 1656 1657 mtx_lock(&nfs_cverf_mtx); 1658 if (cverf_initialized == 0) { 1659 cverf.lval[0] = arc4random(); 1660 cverf.lval[1] = arc4random(); 1661 cverf_initialized = 1; 1662 } else 1663 cverf.qval++; 1664 ret = cverf; 1665 mtx_unlock(&nfs_cverf_mtx); 1666 1667 return (ret); 1668 } 1669 1670 /* 1671 * nfs file create call 1672 */ 1673 static int 1674 nfs_create(struct vop_create_args *ap) 1675 { 1676 struct vnode *dvp = ap->a_dvp; 1677 struct vattr *vap = ap->a_vap; 1678 struct componentname *cnp = ap->a_cnp; 1679 struct nfsnode *np = NULL, *dnp; 1680 struct vnode *newvp = NULL; 1681 struct nfsmount *nmp; 1682 struct nfsvattr dnfsva, nfsva; 1683 struct nfsfh *nfhp; 1684 nfsquad_t cverf; 1685 int error = 0, attrflag, dattrflag, fmode = 0; 1686 struct vattr vattr; 1687 1688 /* 1689 * Oops, not for me.. 1690 */ 1691 if (vap->va_type == VSOCK) 1692 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); 1693 1694 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1695 return (error); 1696 if (vap->va_vaflags & VA_EXCLUSIVE) 1697 fmode |= O_EXCL; 1698 dnp = VTONFS(dvp); 1699 nmp = VFSTONFS(dvp->v_mount); 1700 again: 1701 /* For NFSv4, wait until any remove is done. */ 1702 NFSLOCKNODE(dnp); 1703 while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) { 1704 dnp->n_flag |= NREMOVEWANT; 1705 (void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0); 1706 } 1707 NFSUNLOCKNODE(dnp); 1708 1709 cverf = nfs_get_cverf(); 1710 error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1711 vap, cverf, fmode, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, 1712 &nfhp, &attrflag, &dattrflag, NULL); 1713 if (!error) { 1714 if (nfhp == NULL) 1715 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1716 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, 1717 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1718 NULL, 0); 1719 if (nfhp != NULL) 1720 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1721 cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); 1722 } 1723 if (dattrflag) 1724 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1725 if (!error) { 1726 newvp = NFSTOV(np); 1727 if (attrflag == 0) 1728 error = nfsrpc_getattr(newvp, cnp->cn_cred, 1729 cnp->cn_thread, &nfsva, NULL); 1730 if (error == 0) 1731 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1732 0, 1); 1733 } 1734 if (error) { 1735 if (newvp != NULL) { 1736 vput(newvp); 1737 newvp = NULL; 1738 } 1739 if (NFS_ISV34(dvp) && (fmode & O_EXCL) && 1740 error == NFSERR_NOTSUPP) { 1741 fmode &= ~O_EXCL; 1742 goto again; 1743 } 1744 } else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) { 1745 if (nfscl_checksattr(vap, &nfsva)) { 1746 error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred, 1747 cnp->cn_thread, &nfsva, &attrflag, NULL); 1748 if (error && (vap->va_uid != (uid_t)VNOVAL || 1749 vap->va_gid != (gid_t)VNOVAL)) { 1750 /* try again without setting uid/gid */ 1751 vap->va_uid = (uid_t)VNOVAL; 1752 vap->va_gid = (uid_t)VNOVAL; 1753 error = nfsrpc_setattr(newvp, vap, NULL, 1754 cnp->cn_cred, cnp->cn_thread, &nfsva, 1755 &attrflag, NULL); 1756 } 1757 if (attrflag) 1758 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 1759 NULL, 0, 1); 1760 if (error != 0) 1761 vput(newvp); 1762 } 1763 } 1764 if (!error) { 1765 if ((cnp->cn_flags & MAKEENTRY) && attrflag) { 1766 if (dvp != newvp) 1767 cache_enter_time(dvp, newvp, cnp, 1768 &nfsva.na_ctime, NULL); 1769 else 1770 printf("nfs_create: bogus NFS server returned " 1771 "the directory as the new file object\n"); 1772 } 1773 *ap->a_vpp = newvp; 1774 } else if (NFS_ISV4(dvp)) { 1775 error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, 1776 vap->va_gid); 1777 } 1778 NFSLOCKNODE(dnp); 1779 dnp->n_flag |= NMODIFIED; 1780 if (!dattrflag) { 1781 dnp->n_attrstamp = 0; 1782 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1783 } 1784 NFSUNLOCKNODE(dnp); 1785 return (error); 1786 } 1787 1788 /* 1789 * nfs file remove call 1790 * To try and make nfs semantics closer to ufs semantics, a file that has 1791 * other processes using the vnode is renamed instead of removed and then 1792 * removed later on the last close. 1793 * - If v_usecount > 1 1794 * If a rename is not already in the works 1795 * call nfs_sillyrename() to set it up 1796 * else 1797 * do the remove rpc 1798 */ 1799 static int 1800 nfs_remove(struct vop_remove_args *ap) 1801 { 1802 struct vnode *vp = ap->a_vp; 1803 struct vnode *dvp = ap->a_dvp; 1804 struct componentname *cnp = ap->a_cnp; 1805 struct nfsnode *np = VTONFS(vp); 1806 int error = 0; 1807 struct vattr vattr; 1808 1809 KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name")); 1810 KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount")); 1811 if (vp->v_type == VDIR) 1812 error = EPERM; 1813 else if (vrefcnt(vp) == 1 || (np->n_sillyrename && 1814 VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 && 1815 vattr.va_nlink > 1)) { 1816 /* 1817 * Purge the name cache so that the chance of a lookup for 1818 * the name succeeding while the remove is in progress is 1819 * minimized. Without node locking it can still happen, such 1820 * that an I/O op returns ESTALE, but since you get this if 1821 * another host removes the file.. 1822 */ 1823 cache_purge(vp); 1824 /* 1825 * throw away biocache buffers, mainly to avoid 1826 * unnecessary delayed writes later. 1827 */ 1828 error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1); 1829 if (error != EINTR && error != EIO) 1830 /* Do the rpc */ 1831 error = nfs_removerpc(dvp, vp, cnp->cn_nameptr, 1832 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); 1833 /* 1834 * Kludge City: If the first reply to the remove rpc is lost.. 1835 * the reply to the retransmitted request will be ENOENT 1836 * since the file was in fact removed 1837 * Therefore, we cheat and return success. 1838 */ 1839 if (error == ENOENT) 1840 error = 0; 1841 } else if (!np->n_sillyrename) 1842 error = nfs_sillyrename(dvp, vp, cnp); 1843 NFSLOCKNODE(np); 1844 np->n_attrstamp = 0; 1845 NFSUNLOCKNODE(np); 1846 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 1847 return (error); 1848 } 1849 1850 /* 1851 * nfs file remove rpc called from nfs_inactive 1852 */ 1853 int 1854 ncl_removeit(struct sillyrename *sp, struct vnode *vp) 1855 { 1856 /* 1857 * Make sure that the directory vnode is still valid. 1858 * XXX we should lock sp->s_dvp here. 1859 */ 1860 if (sp->s_dvp->v_type == VBAD) 1861 return (0); 1862 return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen, 1863 sp->s_cred, NULL)); 1864 } 1865 1866 /* 1867 * Nfs remove rpc, called from nfs_remove() and ncl_removeit(). 1868 */ 1869 static int 1870 nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 1871 int namelen, struct ucred *cred, struct thread *td) 1872 { 1873 struct nfsvattr dnfsva; 1874 struct nfsnode *dnp = VTONFS(dvp); 1875 int error = 0, dattrflag; 1876 1877 NFSLOCKNODE(dnp); 1878 dnp->n_flag |= NREMOVEINPROG; 1879 NFSUNLOCKNODE(dnp); 1880 error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva, 1881 &dattrflag, NULL); 1882 NFSLOCKNODE(dnp); 1883 if ((dnp->n_flag & NREMOVEWANT)) { 1884 dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG); 1885 NFSUNLOCKNODE(dnp); 1886 wakeup((caddr_t)dnp); 1887 } else { 1888 dnp->n_flag &= ~NREMOVEINPROG; 1889 NFSUNLOCKNODE(dnp); 1890 } 1891 if (dattrflag) 1892 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1893 NFSLOCKNODE(dnp); 1894 dnp->n_flag |= NMODIFIED; 1895 if (!dattrflag) { 1896 dnp->n_attrstamp = 0; 1897 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1898 } 1899 NFSUNLOCKNODE(dnp); 1900 if (error && NFS_ISV4(dvp)) 1901 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 1902 return (error); 1903 } 1904 1905 /* 1906 * nfs file rename call 1907 */ 1908 static int 1909 nfs_rename(struct vop_rename_args *ap) 1910 { 1911 struct vnode *fvp = ap->a_fvp; 1912 struct vnode *tvp = ap->a_tvp; 1913 struct vnode *fdvp = ap->a_fdvp; 1914 struct vnode *tdvp = ap->a_tdvp; 1915 struct componentname *tcnp = ap->a_tcnp; 1916 struct componentname *fcnp = ap->a_fcnp; 1917 struct nfsnode *fnp = VTONFS(ap->a_fvp); 1918 struct nfsnode *tdnp = VTONFS(ap->a_tdvp); 1919 struct nfsv4node *newv4 = NULL; 1920 int error; 1921 1922 KASSERT((tcnp->cn_flags & HASBUF) != 0 && 1923 (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name")); 1924 /* Check for cross-device rename */ 1925 if ((fvp->v_mount != tdvp->v_mount) || 1926 (tvp && (fvp->v_mount != tvp->v_mount))) { 1927 error = EXDEV; 1928 goto out; 1929 } 1930 1931 if (fvp == tvp) { 1932 printf("nfs_rename: fvp == tvp (can't happen)\n"); 1933 error = 0; 1934 goto out; 1935 } 1936 if ((error = NFSVOPLOCK(fvp, LK_EXCLUSIVE)) != 0) 1937 goto out; 1938 1939 /* 1940 * We have to flush B_DELWRI data prior to renaming 1941 * the file. If we don't, the delayed-write buffers 1942 * can be flushed out later after the file has gone stale 1943 * under NFSV3. NFSV2 does not have this problem because 1944 * ( as far as I can tell ) it flushes dirty buffers more 1945 * often. 1946 * 1947 * Skip the rename operation if the fsync fails, this can happen 1948 * due to the server's volume being full, when we pushed out data 1949 * that was written back to our cache earlier. Not checking for 1950 * this condition can result in potential (silent) data loss. 1951 */ 1952 error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread); 1953 NFSVOPUNLOCK(fvp); 1954 if (!error && tvp) 1955 error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread); 1956 if (error) 1957 goto out; 1958 1959 /* 1960 * If the tvp exists and is in use, sillyrename it before doing the 1961 * rename of the new file over it. 1962 * XXX Can't sillyrename a directory. 1963 */ 1964 if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && 1965 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { 1966 vput(tvp); 1967 tvp = NULL; 1968 } 1969 1970 error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen, 1971 tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, 1972 tcnp->cn_thread); 1973 1974 if (error == 0 && NFS_ISV4(tdvp)) { 1975 /* 1976 * For NFSv4, check to see if it is the same name and 1977 * replace the name, if it is different. 1978 */ 1979 newv4 = malloc( 1980 sizeof (struct nfsv4node) + 1981 tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1, 1982 M_NFSV4NODE, M_WAITOK); 1983 NFSLOCKNODE(tdnp); 1984 NFSLOCKNODE(fnp); 1985 if (fnp->n_v4 != NULL && fvp->v_type == VREG && 1986 (fnp->n_v4->n4_namelen != tcnp->cn_namelen || 1987 NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4), 1988 tcnp->cn_namelen) || 1989 tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen || 1990 NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 1991 tdnp->n_fhp->nfh_len))) { 1992 #ifdef notdef 1993 { char nnn[100]; int nnnl; 1994 nnnl = (tcnp->cn_namelen < 100) ? tcnp->cn_namelen : 99; 1995 bcopy(tcnp->cn_nameptr, nnn, nnnl); 1996 nnn[nnnl] = '\0'; 1997 printf("ren replace=%s\n",nnn); 1998 } 1999 #endif 2000 free(fnp->n_v4, M_NFSV4NODE); 2001 fnp->n_v4 = newv4; 2002 newv4 = NULL; 2003 fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len; 2004 fnp->n_v4->n4_namelen = tcnp->cn_namelen; 2005 NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 2006 tdnp->n_fhp->nfh_len); 2007 NFSBCOPY(tcnp->cn_nameptr, 2008 NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen); 2009 } 2010 NFSUNLOCKNODE(tdnp); 2011 NFSUNLOCKNODE(fnp); 2012 if (newv4 != NULL) 2013 free(newv4, M_NFSV4NODE); 2014 } 2015 2016 if (fvp->v_type == VDIR) { 2017 if (tvp != NULL && tvp->v_type == VDIR) 2018 cache_purge(tdvp); 2019 cache_purge(fdvp); 2020 } 2021 2022 out: 2023 if (tdvp == tvp) 2024 vrele(tdvp); 2025 else 2026 vput(tdvp); 2027 if (tvp) 2028 vput(tvp); 2029 vrele(fdvp); 2030 vrele(fvp); 2031 /* 2032 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. 2033 */ 2034 if (error == ENOENT) 2035 error = 0; 2036 return (error); 2037 } 2038 2039 /* 2040 * nfs file rename rpc called from nfs_remove() above 2041 */ 2042 static int 2043 nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp, 2044 struct sillyrename *sp) 2045 { 2046 2047 return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen, 2048 sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred, 2049 scnp->cn_thread)); 2050 } 2051 2052 /* 2053 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). 2054 */ 2055 static int 2056 nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, 2057 int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr, 2058 int tnamelen, struct ucred *cred, struct thread *td) 2059 { 2060 struct nfsvattr fnfsva, tnfsva; 2061 struct nfsnode *fdnp = VTONFS(fdvp); 2062 struct nfsnode *tdnp = VTONFS(tdvp); 2063 int error = 0, fattrflag, tattrflag; 2064 2065 error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp, 2066 tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag, 2067 &tattrflag, NULL, NULL); 2068 NFSLOCKNODE(fdnp); 2069 fdnp->n_flag |= NMODIFIED; 2070 if (fattrflag != 0) { 2071 NFSUNLOCKNODE(fdnp); 2072 (void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, NULL, 0, 1); 2073 } else { 2074 fdnp->n_attrstamp = 0; 2075 NFSUNLOCKNODE(fdnp); 2076 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp); 2077 } 2078 NFSLOCKNODE(tdnp); 2079 tdnp->n_flag |= NMODIFIED; 2080 if (tattrflag != 0) { 2081 NFSUNLOCKNODE(tdnp); 2082 (void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, NULL, 0, 1); 2083 } else { 2084 tdnp->n_attrstamp = 0; 2085 NFSUNLOCKNODE(tdnp); 2086 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 2087 } 2088 if (error && NFS_ISV4(fdvp)) 2089 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2090 return (error); 2091 } 2092 2093 /* 2094 * nfs hard link create call 2095 */ 2096 static int 2097 nfs_link(struct vop_link_args *ap) 2098 { 2099 struct vnode *vp = ap->a_vp; 2100 struct vnode *tdvp = ap->a_tdvp; 2101 struct componentname *cnp = ap->a_cnp; 2102 struct nfsnode *np, *tdnp; 2103 struct nfsvattr nfsva, dnfsva; 2104 int error = 0, attrflag, dattrflag; 2105 2106 /* 2107 * Push all writes to the server, so that the attribute cache 2108 * doesn't get "out of sync" with the server. 2109 * XXX There should be a better way! 2110 */ 2111 VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread); 2112 2113 error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen, 2114 cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &attrflag, 2115 &dattrflag, NULL); 2116 tdnp = VTONFS(tdvp); 2117 NFSLOCKNODE(tdnp); 2118 tdnp->n_flag |= NMODIFIED; 2119 if (dattrflag != 0) { 2120 NFSUNLOCKNODE(tdnp); 2121 (void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, NULL, 0, 1); 2122 } else { 2123 tdnp->n_attrstamp = 0; 2124 NFSUNLOCKNODE(tdnp); 2125 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 2126 } 2127 if (attrflag) 2128 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 2129 else { 2130 np = VTONFS(vp); 2131 NFSLOCKNODE(np); 2132 np->n_attrstamp = 0; 2133 NFSUNLOCKNODE(np); 2134 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 2135 } 2136 /* 2137 * If negative lookup caching is enabled, I might as well 2138 * add an entry for this node. Not necessary for correctness, 2139 * but if negative caching is enabled, then the system 2140 * must care about lookup caching hit rate, so... 2141 */ 2142 if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 && 2143 (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { 2144 if (tdvp != vp) 2145 cache_enter_time(tdvp, vp, cnp, &nfsva.na_ctime, NULL); 2146 else 2147 printf("nfs_link: bogus NFS server returned " 2148 "the directory as the new link\n"); 2149 } 2150 if (error && NFS_ISV4(vp)) 2151 error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, 2152 (gid_t)0); 2153 return (error); 2154 } 2155 2156 /* 2157 * nfs symbolic link create call 2158 */ 2159 static int 2160 nfs_symlink(struct vop_symlink_args *ap) 2161 { 2162 struct vnode *dvp = ap->a_dvp; 2163 struct vattr *vap = ap->a_vap; 2164 struct componentname *cnp = ap->a_cnp; 2165 struct nfsvattr nfsva, dnfsva; 2166 struct nfsfh *nfhp; 2167 struct nfsnode *np = NULL, *dnp; 2168 struct vnode *newvp = NULL; 2169 int error = 0, attrflag, dattrflag, ret; 2170 2171 vap->va_type = VLNK; 2172 error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2173 ap->a_target, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, 2174 &nfsva, &nfhp, &attrflag, &dattrflag, NULL); 2175 if (nfhp) { 2176 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, 2177 &np, NULL, LK_EXCLUSIVE); 2178 if (!ret) 2179 newvp = NFSTOV(np); 2180 else if (!error) 2181 error = ret; 2182 } 2183 if (newvp != NULL) { 2184 if (attrflag) 2185 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 2186 0, 1); 2187 } else if (!error) { 2188 /* 2189 * If we do not have an error and we could not extract the 2190 * newvp from the response due to the request being NFSv2, we 2191 * have to do a lookup in order to obtain a newvp to return. 2192 */ 2193 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2194 cnp->cn_cred, cnp->cn_thread, &np); 2195 if (!error) 2196 newvp = NFSTOV(np); 2197 } 2198 if (error) { 2199 if (newvp) 2200 vput(newvp); 2201 if (NFS_ISV4(dvp)) 2202 error = nfscl_maperr(cnp->cn_thread, error, 2203 vap->va_uid, vap->va_gid); 2204 } else { 2205 *ap->a_vpp = newvp; 2206 } 2207 2208 dnp = VTONFS(dvp); 2209 NFSLOCKNODE(dnp); 2210 dnp->n_flag |= NMODIFIED; 2211 if (dattrflag != 0) { 2212 NFSUNLOCKNODE(dnp); 2213 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2214 } else { 2215 dnp->n_attrstamp = 0; 2216 NFSUNLOCKNODE(dnp); 2217 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2218 } 2219 /* 2220 * If negative lookup caching is enabled, I might as well 2221 * add an entry for this node. Not necessary for correctness, 2222 * but if negative caching is enabled, then the system 2223 * must care about lookup caching hit rate, so... 2224 */ 2225 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 2226 (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { 2227 if (dvp != newvp) 2228 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, 2229 NULL); 2230 else 2231 printf("nfs_symlink: bogus NFS server returned " 2232 "the directory as the new file object\n"); 2233 } 2234 return (error); 2235 } 2236 2237 /* 2238 * nfs make dir call 2239 */ 2240 static int 2241 nfs_mkdir(struct vop_mkdir_args *ap) 2242 { 2243 struct vnode *dvp = ap->a_dvp; 2244 struct vattr *vap = ap->a_vap; 2245 struct componentname *cnp = ap->a_cnp; 2246 struct nfsnode *np = NULL, *dnp; 2247 struct vnode *newvp = NULL; 2248 struct vattr vattr; 2249 struct nfsfh *nfhp; 2250 struct nfsvattr nfsva, dnfsva; 2251 int error = 0, attrflag, dattrflag, ret; 2252 2253 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) 2254 return (error); 2255 vap->va_type = VDIR; 2256 error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2257 vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, 2258 &attrflag, &dattrflag, NULL); 2259 dnp = VTONFS(dvp); 2260 NFSLOCKNODE(dnp); 2261 dnp->n_flag |= NMODIFIED; 2262 if (dattrflag != 0) { 2263 NFSUNLOCKNODE(dnp); 2264 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2265 } else { 2266 dnp->n_attrstamp = 0; 2267 NFSUNLOCKNODE(dnp); 2268 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2269 } 2270 if (nfhp) { 2271 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, 2272 &np, NULL, LK_EXCLUSIVE); 2273 if (!ret) { 2274 newvp = NFSTOV(np); 2275 if (attrflag) 2276 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 2277 NULL, 0, 1); 2278 } else if (!error) 2279 error = ret; 2280 } 2281 if (!error && newvp == NULL) { 2282 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2283 cnp->cn_cred, cnp->cn_thread, &np); 2284 if (!error) { 2285 newvp = NFSTOV(np); 2286 if (newvp->v_type != VDIR) 2287 error = EEXIST; 2288 } 2289 } 2290 if (error) { 2291 if (newvp) 2292 vput(newvp); 2293 if (NFS_ISV4(dvp)) 2294 error = nfscl_maperr(cnp->cn_thread, error, 2295 vap->va_uid, vap->va_gid); 2296 } else { 2297 /* 2298 * If negative lookup caching is enabled, I might as well 2299 * add an entry for this node. Not necessary for correctness, 2300 * but if negative caching is enabled, then the system 2301 * must care about lookup caching hit rate, so... 2302 */ 2303 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 2304 (cnp->cn_flags & MAKEENTRY) && 2305 attrflag != 0 && dattrflag != 0) { 2306 if (dvp != newvp) 2307 cache_enter_time(dvp, newvp, cnp, 2308 &nfsva.na_ctime, &dnfsva.na_ctime); 2309 else 2310 printf("nfs_mkdir: bogus NFS server returned " 2311 "the directory that the directory was " 2312 "created in as the new file object\n"); 2313 } 2314 *ap->a_vpp = newvp; 2315 } 2316 return (error); 2317 } 2318 2319 /* 2320 * nfs remove directory call 2321 */ 2322 static int 2323 nfs_rmdir(struct vop_rmdir_args *ap) 2324 { 2325 struct vnode *vp = ap->a_vp; 2326 struct vnode *dvp = ap->a_dvp; 2327 struct componentname *cnp = ap->a_cnp; 2328 struct nfsnode *dnp; 2329 struct nfsvattr dnfsva; 2330 int error, dattrflag; 2331 2332 if (dvp == vp) 2333 return (EINVAL); 2334 error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2335 cnp->cn_cred, cnp->cn_thread, &dnfsva, &dattrflag, NULL); 2336 dnp = VTONFS(dvp); 2337 NFSLOCKNODE(dnp); 2338 dnp->n_flag |= NMODIFIED; 2339 if (dattrflag != 0) { 2340 NFSUNLOCKNODE(dnp); 2341 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2342 } else { 2343 dnp->n_attrstamp = 0; 2344 NFSUNLOCKNODE(dnp); 2345 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2346 } 2347 2348 cache_purge(dvp); 2349 cache_purge(vp); 2350 if (error && NFS_ISV4(dvp)) 2351 error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, 2352 (gid_t)0); 2353 /* 2354 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. 2355 */ 2356 if (error == ENOENT) 2357 error = 0; 2358 return (error); 2359 } 2360 2361 /* 2362 * nfs readdir call 2363 */ 2364 static int 2365 nfs_readdir(struct vop_readdir_args *ap) 2366 { 2367 struct vnode *vp = ap->a_vp; 2368 struct nfsnode *np = VTONFS(vp); 2369 struct uio *uio = ap->a_uio; 2370 ssize_t tresid, left; 2371 int error = 0; 2372 struct vattr vattr; 2373 2374 if (ap->a_eofflag != NULL) 2375 *ap->a_eofflag = 0; 2376 if (vp->v_type != VDIR) 2377 return(EPERM); 2378 2379 /* 2380 * First, check for hit on the EOF offset cache 2381 */ 2382 NFSLOCKNODE(np); 2383 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && 2384 (np->n_flag & NMODIFIED) == 0) { 2385 NFSUNLOCKNODE(np); 2386 if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { 2387 NFSLOCKNODE(np); 2388 if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) || 2389 !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 2390 NFSUNLOCKNODE(np); 2391 NFSINCRGLOBAL(nfsstatsv1.direofcache_hits); 2392 if (ap->a_eofflag != NULL) 2393 *ap->a_eofflag = 1; 2394 return (0); 2395 } else 2396 NFSUNLOCKNODE(np); 2397 } 2398 } else 2399 NFSUNLOCKNODE(np); 2400 2401 /* 2402 * NFS always guarantees that directory entries don't straddle 2403 * DIRBLKSIZ boundaries. As such, we need to limit the size 2404 * to an exact multiple of DIRBLKSIZ, to avoid copying a partial 2405 * directory entry. 2406 */ 2407 left = uio->uio_resid % DIRBLKSIZ; 2408 if (left == uio->uio_resid) 2409 return (EINVAL); 2410 uio->uio_resid -= left; 2411 2412 /* 2413 * Call ncl_bioread() to do the real work. 2414 */ 2415 tresid = uio->uio_resid; 2416 error = ncl_bioread(vp, uio, 0, ap->a_cred); 2417 2418 if (!error && uio->uio_resid == tresid) { 2419 NFSINCRGLOBAL(nfsstatsv1.direofcache_misses); 2420 if (ap->a_eofflag != NULL) 2421 *ap->a_eofflag = 1; 2422 } 2423 2424 /* Add the partial DIRBLKSIZ (left) back in. */ 2425 uio->uio_resid += left; 2426 return (error); 2427 } 2428 2429 /* 2430 * Readdir rpc call. 2431 * Called from below the buffer cache by ncl_doio(). 2432 */ 2433 int 2434 ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2435 struct thread *td) 2436 { 2437 struct nfsvattr nfsva; 2438 nfsuint64 *cookiep, cookie; 2439 struct nfsnode *dnp = VTONFS(vp); 2440 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2441 int error = 0, eof, attrflag; 2442 2443 KASSERT(uiop->uio_iovcnt == 1 && 2444 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2445 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2446 ("nfs readdirrpc bad uio")); 2447 2448 /* 2449 * If there is no cookie, assume directory was stale. 2450 */ 2451 ncl_dircookie_lock(dnp); 2452 NFSUNLOCKNODE(dnp); 2453 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2454 if (cookiep) { 2455 cookie = *cookiep; 2456 ncl_dircookie_unlock(dnp); 2457 } else { 2458 ncl_dircookie_unlock(dnp); 2459 return (NFSERR_BAD_COOKIE); 2460 } 2461 2462 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2463 (void)ncl_fsinfo(nmp, vp, cred, td); 2464 2465 error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva, 2466 &attrflag, &eof, NULL); 2467 if (attrflag) 2468 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 2469 2470 if (!error) { 2471 /* 2472 * We are now either at the end of the directory or have filled 2473 * the block. 2474 */ 2475 if (eof) { 2476 NFSLOCKNODE(dnp); 2477 dnp->n_direofoffset = uiop->uio_offset; 2478 NFSUNLOCKNODE(dnp); 2479 } else { 2480 if (uiop->uio_resid > 0) 2481 printf("EEK! readdirrpc resid > 0\n"); 2482 ncl_dircookie_lock(dnp); 2483 NFSUNLOCKNODE(dnp); 2484 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2485 *cookiep = cookie; 2486 ncl_dircookie_unlock(dnp); 2487 } 2488 } else if (NFS_ISV4(vp)) { 2489 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2490 } 2491 return (error); 2492 } 2493 2494 /* 2495 * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc(). 2496 */ 2497 int 2498 ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2499 struct thread *td) 2500 { 2501 struct nfsvattr nfsva; 2502 nfsuint64 *cookiep, cookie; 2503 struct nfsnode *dnp = VTONFS(vp); 2504 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2505 int error = 0, attrflag, eof; 2506 2507 KASSERT(uiop->uio_iovcnt == 1 && 2508 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2509 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2510 ("nfs readdirplusrpc bad uio")); 2511 2512 /* 2513 * If there is no cookie, assume directory was stale. 2514 */ 2515 ncl_dircookie_lock(dnp); 2516 NFSUNLOCKNODE(dnp); 2517 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2518 if (cookiep) { 2519 cookie = *cookiep; 2520 ncl_dircookie_unlock(dnp); 2521 } else { 2522 ncl_dircookie_unlock(dnp); 2523 return (NFSERR_BAD_COOKIE); 2524 } 2525 2526 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2527 (void)ncl_fsinfo(nmp, vp, cred, td); 2528 error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva, 2529 &attrflag, &eof, NULL); 2530 if (attrflag) 2531 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 2532 2533 if (!error) { 2534 /* 2535 * We are now either at end of the directory or have filled the 2536 * the block. 2537 */ 2538 if (eof) { 2539 NFSLOCKNODE(dnp); 2540 dnp->n_direofoffset = uiop->uio_offset; 2541 NFSUNLOCKNODE(dnp); 2542 } else { 2543 if (uiop->uio_resid > 0) 2544 printf("EEK! readdirplusrpc resid > 0\n"); 2545 ncl_dircookie_lock(dnp); 2546 NFSUNLOCKNODE(dnp); 2547 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2548 *cookiep = cookie; 2549 ncl_dircookie_unlock(dnp); 2550 } 2551 } else if (NFS_ISV4(vp)) { 2552 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2553 } 2554 return (error); 2555 } 2556 2557 /* 2558 * Silly rename. To make the NFS filesystem that is stateless look a little 2559 * more like the "ufs" a remove of an active vnode is translated to a rename 2560 * to a funny looking filename that is removed by nfs_inactive on the 2561 * nfsnode. There is the potential for another process on a different client 2562 * to create the same funny name between the nfs_lookitup() fails and the 2563 * nfs_rename() completes, but... 2564 */ 2565 static int 2566 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 2567 { 2568 struct sillyrename *sp; 2569 struct nfsnode *np; 2570 int error; 2571 short pid; 2572 unsigned int lticks; 2573 2574 cache_purge(dvp); 2575 np = VTONFS(vp); 2576 KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir")); 2577 sp = malloc(sizeof (struct sillyrename), 2578 M_NEWNFSREQ, M_WAITOK); 2579 sp->s_cred = crhold(cnp->cn_cred); 2580 sp->s_dvp = dvp; 2581 VREF(dvp); 2582 2583 /* 2584 * Fudge together a funny name. 2585 * Changing the format of the funny name to accommodate more 2586 * sillynames per directory. 2587 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 2588 * CPU ticks since boot. 2589 */ 2590 pid = cnp->cn_thread->td_proc->p_pid; 2591 lticks = (unsigned int)ticks; 2592 for ( ; ; ) { 2593 sp->s_namlen = sprintf(sp->s_name, 2594 ".nfs.%08x.%04x4.4", lticks, 2595 pid); 2596 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2597 cnp->cn_thread, NULL)) 2598 break; 2599 lticks++; 2600 } 2601 error = nfs_renameit(dvp, vp, cnp, sp); 2602 if (error) 2603 goto bad; 2604 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2605 cnp->cn_thread, &np); 2606 np->n_sillyrename = sp; 2607 return (0); 2608 bad: 2609 vrele(sp->s_dvp); 2610 crfree(sp->s_cred); 2611 free(sp, M_NEWNFSREQ); 2612 return (error); 2613 } 2614 2615 /* 2616 * Look up a file name and optionally either update the file handle or 2617 * allocate an nfsnode, depending on the value of npp. 2618 * npp == NULL --> just do the lookup 2619 * *npp == NULL --> allocate a new nfsnode and make sure attributes are 2620 * handled too 2621 * *npp != NULL --> update the file handle in the vnode 2622 */ 2623 static int 2624 nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred, 2625 struct thread *td, struct nfsnode **npp) 2626 { 2627 struct vnode *newvp = NULL, *vp; 2628 struct nfsnode *np, *dnp = VTONFS(dvp); 2629 struct nfsfh *nfhp, *onfhp; 2630 struct nfsvattr nfsva, dnfsva; 2631 struct componentname cn; 2632 int error = 0, attrflag, dattrflag; 2633 u_int hash; 2634 2635 error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva, 2636 &nfhp, &attrflag, &dattrflag, NULL, 0); 2637 if (dattrflag) 2638 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2639 if (npp && !error) { 2640 if (*npp != NULL) { 2641 np = *npp; 2642 vp = NFSTOV(np); 2643 /* 2644 * For NFSv4, check to see if it is the same name and 2645 * replace the name, if it is different. 2646 */ 2647 if (np->n_v4 != NULL && nfsva.na_type == VREG && 2648 (np->n_v4->n4_namelen != len || 2649 NFSBCMP(name, NFS4NODENAME(np->n_v4), len) || 2650 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || 2651 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2652 dnp->n_fhp->nfh_len))) { 2653 #ifdef notdef 2654 { char nnn[100]; int nnnl; 2655 nnnl = (len < 100) ? len : 99; 2656 bcopy(name, nnn, nnnl); 2657 nnn[nnnl] = '\0'; 2658 printf("replace=%s\n",nnn); 2659 } 2660 #endif 2661 free(np->n_v4, M_NFSV4NODE); 2662 np->n_v4 = malloc( 2663 sizeof (struct nfsv4node) + 2664 dnp->n_fhp->nfh_len + len - 1, 2665 M_NFSV4NODE, M_WAITOK); 2666 np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; 2667 np->n_v4->n4_namelen = len; 2668 NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2669 dnp->n_fhp->nfh_len); 2670 NFSBCOPY(name, NFS4NODENAME(np->n_v4), len); 2671 } 2672 hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, 2673 FNV1_32_INIT); 2674 onfhp = np->n_fhp; 2675 /* 2676 * Rehash node for new file handle. 2677 */ 2678 vfs_hash_rehash(vp, hash); 2679 np->n_fhp = nfhp; 2680 if (onfhp != NULL) 2681 free(onfhp, M_NFSFH); 2682 newvp = NFSTOV(np); 2683 } else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) { 2684 free(nfhp, M_NFSFH); 2685 VREF(dvp); 2686 newvp = dvp; 2687 } else { 2688 cn.cn_nameptr = name; 2689 cn.cn_namelen = len; 2690 error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td, 2691 &np, NULL, LK_EXCLUSIVE); 2692 if (error) 2693 return (error); 2694 newvp = NFSTOV(np); 2695 } 2696 if (!attrflag && *npp == NULL) { 2697 if (newvp == dvp) 2698 vrele(newvp); 2699 else 2700 vput(newvp); 2701 return (ENOENT); 2702 } 2703 if (attrflag) 2704 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 2705 0, 1); 2706 } 2707 if (npp && *npp == NULL) { 2708 if (error) { 2709 if (newvp) { 2710 if (newvp == dvp) 2711 vrele(newvp); 2712 else 2713 vput(newvp); 2714 } 2715 } else 2716 *npp = np; 2717 } 2718 if (error && NFS_ISV4(dvp)) 2719 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2720 return (error); 2721 } 2722 2723 /* 2724 * Nfs Version 3 and 4 commit rpc 2725 */ 2726 int 2727 ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, 2728 struct thread *td) 2729 { 2730 struct nfsvattr nfsva; 2731 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2732 struct nfsnode *np; 2733 struct uio uio; 2734 int error, attrflag; 2735 2736 np = VTONFS(vp); 2737 error = EIO; 2738 attrflag = 0; 2739 if (NFSHASPNFS(nmp) && (np->n_flag & NDSCOMMIT) != 0) { 2740 uio.uio_offset = offset; 2741 uio.uio_resid = cnt; 2742 error = nfscl_doiods(vp, &uio, NULL, NULL, 2743 NFSV4OPEN_ACCESSWRITE, 1, cred, td); 2744 if (error != 0) { 2745 NFSLOCKNODE(np); 2746 np->n_flag &= ~NDSCOMMIT; 2747 NFSUNLOCKNODE(np); 2748 } 2749 } 2750 if (error != 0) { 2751 mtx_lock(&nmp->nm_mtx); 2752 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { 2753 mtx_unlock(&nmp->nm_mtx); 2754 return (0); 2755 } 2756 mtx_unlock(&nmp->nm_mtx); 2757 error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva, 2758 &attrflag, NULL); 2759 } 2760 if (attrflag != 0) 2761 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 2762 0, 1); 2763 if (error != 0 && NFS_ISV4(vp)) 2764 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2765 return (error); 2766 } 2767 2768 /* 2769 * Strategy routine. 2770 * For async requests when nfsiod(s) are running, queue the request by 2771 * calling ncl_asyncio(), otherwise just all ncl_doio() to do the 2772 * request. 2773 */ 2774 static int 2775 nfs_strategy(struct vop_strategy_args *ap) 2776 { 2777 struct buf *bp; 2778 struct vnode *vp; 2779 struct ucred *cr; 2780 2781 bp = ap->a_bp; 2782 vp = ap->a_vp; 2783 KASSERT(bp->b_vp == vp, ("missing b_getvp")); 2784 KASSERT(!(bp->b_flags & B_DONE), 2785 ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); 2786 2787 if (vp->v_type == VREG && bp->b_blkno == bp->b_lblkno) 2788 bp->b_blkno = bp->b_lblkno * (vp->v_bufobj.bo_bsize / 2789 DEV_BSIZE); 2790 if (bp->b_iocmd == BIO_READ) 2791 cr = bp->b_rcred; 2792 else 2793 cr = bp->b_wcred; 2794 2795 /* 2796 * If the op is asynchronous and an i/o daemon is waiting 2797 * queue the request, wake it up and wait for completion 2798 * otherwise just do it ourselves. 2799 */ 2800 if ((bp->b_flags & B_ASYNC) == 0 || 2801 ncl_asyncio(VFSTONFS(vp->v_mount), bp, NOCRED, curthread)) 2802 (void) ncl_doio(vp, bp, cr, curthread, 1); 2803 return (0); 2804 } 2805 2806 /* 2807 * fsync vnode op. Just call ncl_flush() with commit == 1. 2808 */ 2809 /* ARGSUSED */ 2810 static int 2811 nfs_fsync(struct vop_fsync_args *ap) 2812 { 2813 2814 if (ap->a_vp->v_type != VREG) { 2815 /* 2816 * For NFS, metadata is changed synchronously on the server, 2817 * so there is nothing to flush. Also, ncl_flush() clears 2818 * the NMODIFIED flag and that shouldn't be done here for 2819 * directories. 2820 */ 2821 return (0); 2822 } 2823 return (ncl_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1, 0)); 2824 } 2825 2826 /* 2827 * Flush all the blocks associated with a vnode. 2828 * Walk through the buffer pool and push any dirty pages 2829 * associated with the vnode. 2830 * If the called_from_renewthread argument is TRUE, it has been called 2831 * from the NFSv4 renew thread and, as such, cannot block indefinitely 2832 * waiting for a buffer write to complete. 2833 */ 2834 int 2835 ncl_flush(struct vnode *vp, int waitfor, struct thread *td, 2836 int commit, int called_from_renewthread) 2837 { 2838 struct nfsnode *np = VTONFS(vp); 2839 struct buf *bp; 2840 int i; 2841 struct buf *nbp; 2842 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2843 int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; 2844 int passone = 1, trycnt = 0; 2845 u_quad_t off, endoff, toff; 2846 struct ucred* wcred = NULL; 2847 struct buf **bvec = NULL; 2848 struct bufobj *bo; 2849 #ifndef NFS_COMMITBVECSIZ 2850 #define NFS_COMMITBVECSIZ 20 2851 #endif 2852 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; 2853 u_int bvecsize = 0, bveccount; 2854 2855 if (called_from_renewthread != 0) 2856 slptimeo = hz; 2857 if (nmp->nm_flag & NFSMNT_INT) 2858 slpflag = PCATCH; 2859 if (!commit) 2860 passone = 0; 2861 bo = &vp->v_bufobj; 2862 /* 2863 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the 2864 * server, but has not been committed to stable storage on the server 2865 * yet. On the first pass, the byte range is worked out and the commit 2866 * rpc is done. On the second pass, ncl_writebp() is called to do the 2867 * job. 2868 */ 2869 again: 2870 off = (u_quad_t)-1; 2871 endoff = 0; 2872 bvecpos = 0; 2873 if (NFS_ISV34(vp) && commit) { 2874 if (bvec != NULL && bvec != bvec_on_stack) 2875 free(bvec, M_TEMP); 2876 /* 2877 * Count up how many buffers waiting for a commit. 2878 */ 2879 bveccount = 0; 2880 BO_LOCK(bo); 2881 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2882 if (!BUF_ISLOCKED(bp) && 2883 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 2884 == (B_DELWRI | B_NEEDCOMMIT)) 2885 bveccount++; 2886 } 2887 /* 2888 * Allocate space to remember the list of bufs to commit. It is 2889 * important to use M_NOWAIT here to avoid a race with nfs_write. 2890 * If we can't get memory (for whatever reason), we will end up 2891 * committing the buffers one-by-one in the loop below. 2892 */ 2893 if (bveccount > NFS_COMMITBVECSIZ) { 2894 /* 2895 * Release the vnode interlock to avoid a lock 2896 * order reversal. 2897 */ 2898 BO_UNLOCK(bo); 2899 bvec = (struct buf **) 2900 malloc(bveccount * sizeof(struct buf *), 2901 M_TEMP, M_NOWAIT); 2902 BO_LOCK(bo); 2903 if (bvec == NULL) { 2904 bvec = bvec_on_stack; 2905 bvecsize = NFS_COMMITBVECSIZ; 2906 } else 2907 bvecsize = bveccount; 2908 } else { 2909 bvec = bvec_on_stack; 2910 bvecsize = NFS_COMMITBVECSIZ; 2911 } 2912 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2913 if (bvecpos >= bvecsize) 2914 break; 2915 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 2916 nbp = TAILQ_NEXT(bp, b_bobufs); 2917 continue; 2918 } 2919 if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != 2920 (B_DELWRI | B_NEEDCOMMIT)) { 2921 BUF_UNLOCK(bp); 2922 nbp = TAILQ_NEXT(bp, b_bobufs); 2923 continue; 2924 } 2925 BO_UNLOCK(bo); 2926 bremfree(bp); 2927 /* 2928 * Work out if all buffers are using the same cred 2929 * so we can deal with them all with one commit. 2930 * 2931 * NOTE: we are not clearing B_DONE here, so we have 2932 * to do it later on in this routine if we intend to 2933 * initiate I/O on the bp. 2934 * 2935 * Note: to avoid loopback deadlocks, we do not 2936 * assign b_runningbufspace. 2937 */ 2938 if (wcred == NULL) 2939 wcred = bp->b_wcred; 2940 else if (wcred != bp->b_wcred) 2941 wcred = NOCRED; 2942 vfs_busy_pages(bp, 1); 2943 2944 BO_LOCK(bo); 2945 /* 2946 * bp is protected by being locked, but nbp is not 2947 * and vfs_busy_pages() may sleep. We have to 2948 * recalculate nbp. 2949 */ 2950 nbp = TAILQ_NEXT(bp, b_bobufs); 2951 2952 /* 2953 * A list of these buffers is kept so that the 2954 * second loop knows which buffers have actually 2955 * been committed. This is necessary, since there 2956 * may be a race between the commit rpc and new 2957 * uncommitted writes on the file. 2958 */ 2959 bvec[bvecpos++] = bp; 2960 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 2961 bp->b_dirtyoff; 2962 if (toff < off) 2963 off = toff; 2964 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); 2965 if (toff > endoff) 2966 endoff = toff; 2967 } 2968 BO_UNLOCK(bo); 2969 } 2970 if (bvecpos > 0) { 2971 /* 2972 * Commit data on the server, as required. 2973 * If all bufs are using the same wcred, then use that with 2974 * one call for all of them, otherwise commit each one 2975 * separately. 2976 */ 2977 if (wcred != NOCRED) 2978 retv = ncl_commit(vp, off, (int)(endoff - off), 2979 wcred, td); 2980 else { 2981 retv = 0; 2982 for (i = 0; i < bvecpos; i++) { 2983 off_t off, size; 2984 bp = bvec[i]; 2985 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 2986 bp->b_dirtyoff; 2987 size = (u_quad_t)(bp->b_dirtyend 2988 - bp->b_dirtyoff); 2989 retv = ncl_commit(vp, off, (int)size, 2990 bp->b_wcred, td); 2991 if (retv) break; 2992 } 2993 } 2994 2995 if (retv == NFSERR_STALEWRITEVERF) 2996 ncl_clearcommit(vp->v_mount); 2997 2998 /* 2999 * Now, either mark the blocks I/O done or mark the 3000 * blocks dirty, depending on whether the commit 3001 * succeeded. 3002 */ 3003 for (i = 0; i < bvecpos; i++) { 3004 bp = bvec[i]; 3005 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 3006 if (retv) { 3007 /* 3008 * Error, leave B_DELWRI intact 3009 */ 3010 vfs_unbusy_pages(bp); 3011 brelse(bp); 3012 } else { 3013 /* 3014 * Success, remove B_DELWRI ( bundirty() ). 3015 * 3016 * b_dirtyoff/b_dirtyend seem to be NFS 3017 * specific. We should probably move that 3018 * into bundirty(). XXX 3019 */ 3020 bufobj_wref(bo); 3021 bp->b_flags |= B_ASYNC; 3022 bundirty(bp); 3023 bp->b_flags &= ~B_DONE; 3024 bp->b_ioflags &= ~BIO_ERROR; 3025 bp->b_dirtyoff = bp->b_dirtyend = 0; 3026 bufdone(bp); 3027 } 3028 } 3029 } 3030 3031 /* 3032 * Start/do any write(s) that are required. 3033 */ 3034 loop: 3035 BO_LOCK(bo); 3036 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 3037 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 3038 if (waitfor != MNT_WAIT || passone) 3039 continue; 3040 3041 error = BUF_TIMELOCK(bp, 3042 LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, 3043 BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo); 3044 if (error == 0) { 3045 BUF_UNLOCK(bp); 3046 goto loop; 3047 } 3048 if (error == ENOLCK) { 3049 error = 0; 3050 goto loop; 3051 } 3052 if (called_from_renewthread != 0) { 3053 /* 3054 * Return EIO so the flush will be retried 3055 * later. 3056 */ 3057 error = EIO; 3058 goto done; 3059 } 3060 if (newnfs_sigintr(nmp, td)) { 3061 error = EINTR; 3062 goto done; 3063 } 3064 if (slpflag == PCATCH) { 3065 slpflag = 0; 3066 slptimeo = 2 * hz; 3067 } 3068 goto loop; 3069 } 3070 if ((bp->b_flags & B_DELWRI) == 0) 3071 panic("nfs_fsync: not dirty"); 3072 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { 3073 BUF_UNLOCK(bp); 3074 continue; 3075 } 3076 BO_UNLOCK(bo); 3077 bremfree(bp); 3078 bp->b_flags |= B_ASYNC; 3079 bwrite(bp); 3080 if (newnfs_sigintr(nmp, td)) { 3081 error = EINTR; 3082 goto done; 3083 } 3084 goto loop; 3085 } 3086 if (passone) { 3087 passone = 0; 3088 BO_UNLOCK(bo); 3089 goto again; 3090 } 3091 if (waitfor == MNT_WAIT) { 3092 while (bo->bo_numoutput) { 3093 error = bufobj_wwait(bo, slpflag, slptimeo); 3094 if (error) { 3095 BO_UNLOCK(bo); 3096 if (called_from_renewthread != 0) { 3097 /* 3098 * Return EIO so that the flush will be 3099 * retried later. 3100 */ 3101 error = EIO; 3102 goto done; 3103 } 3104 error = newnfs_sigintr(nmp, td); 3105 if (error) 3106 goto done; 3107 if (slpflag == PCATCH) { 3108 slpflag = 0; 3109 slptimeo = 2 * hz; 3110 } 3111 BO_LOCK(bo); 3112 } 3113 } 3114 if (bo->bo_dirty.bv_cnt != 0 && commit) { 3115 BO_UNLOCK(bo); 3116 goto loop; 3117 } 3118 /* 3119 * Wait for all the async IO requests to drain 3120 */ 3121 BO_UNLOCK(bo); 3122 NFSLOCKNODE(np); 3123 while (np->n_directio_asyncwr > 0) { 3124 np->n_flag |= NFSYNCWAIT; 3125 error = newnfs_msleep(td, &np->n_directio_asyncwr, 3126 &np->n_mtx, slpflag | (PRIBIO + 1), 3127 "nfsfsync", 0); 3128 if (error) { 3129 if (newnfs_sigintr(nmp, td)) { 3130 NFSUNLOCKNODE(np); 3131 error = EINTR; 3132 goto done; 3133 } 3134 } 3135 } 3136 NFSUNLOCKNODE(np); 3137 } else 3138 BO_UNLOCK(bo); 3139 if (NFSHASPNFS(nmp)) { 3140 nfscl_layoutcommit(vp, td); 3141 /* 3142 * Invalidate the attribute cache, since writes to a DS 3143 * won't update the size attribute. 3144 */ 3145 NFSLOCKNODE(np); 3146 np->n_attrstamp = 0; 3147 } else 3148 NFSLOCKNODE(np); 3149 if (np->n_flag & NWRITEERR) { 3150 error = np->n_error; 3151 np->n_flag &= ~NWRITEERR; 3152 } 3153 if (commit && bo->bo_dirty.bv_cnt == 0 && 3154 bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) 3155 np->n_flag &= ~NMODIFIED; 3156 NFSUNLOCKNODE(np); 3157 done: 3158 if (bvec != NULL && bvec != bvec_on_stack) 3159 free(bvec, M_TEMP); 3160 if (error == 0 && commit != 0 && waitfor == MNT_WAIT && 3161 (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 || 3162 np->n_directio_asyncwr != 0)) { 3163 if (trycnt++ < 5) { 3164 /* try, try again... */ 3165 passone = 1; 3166 wcred = NULL; 3167 bvec = NULL; 3168 bvecsize = 0; 3169 goto again; 3170 } 3171 vn_printf(vp, "ncl_flush failed"); 3172 error = called_from_renewthread != 0 ? EIO : EBUSY; 3173 } 3174 return (error); 3175 } 3176 3177 /* 3178 * NFS advisory byte-level locks. 3179 */ 3180 static int 3181 nfs_advlock(struct vop_advlock_args *ap) 3182 { 3183 struct vnode *vp = ap->a_vp; 3184 struct ucred *cred; 3185 struct nfsnode *np = VTONFS(ap->a_vp); 3186 struct proc *p = (struct proc *)ap->a_id; 3187 struct thread *td = curthread; /* XXX */ 3188 struct vattr va; 3189 int ret, error; 3190 u_quad_t size; 3191 struct nfsmount *nmp; 3192 3193 error = NFSVOPLOCK(vp, LK_SHARED); 3194 if (error != 0) 3195 return (EBADF); 3196 if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) { 3197 if (vp->v_type != VREG) { 3198 error = EINVAL; 3199 goto out; 3200 } 3201 if ((ap->a_flags & F_POSIX) != 0) 3202 cred = p->p_ucred; 3203 else 3204 cred = td->td_ucred; 3205 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 3206 if (VN_IS_DOOMED(vp)) { 3207 error = EBADF; 3208 goto out; 3209 } 3210 3211 /* 3212 * If this is unlocking a write locked region, flush and 3213 * commit them before unlocking. This is required by 3214 * RFC3530 Sec. 9.3.2. 3215 */ 3216 if (ap->a_op == F_UNLCK && 3217 nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id, 3218 ap->a_flags)) 3219 (void) ncl_flush(vp, MNT_WAIT, td, 1, 0); 3220 3221 /* 3222 * Mark NFS node as might have acquired a lock. 3223 * This is separate from NHASBEENLOCKED, because it must 3224 * be done before the nfsrpc_advlock() call, which might 3225 * add a nfscllock structure to the client state. 3226 * It is used to check for the case where a nfscllock 3227 * state structure cannot exist for the file. 3228 * Only done for "oneopenown" NFSv4.1/4.2 mounts. 3229 */ 3230 nmp = VFSTONFS(vp->v_mount); 3231 if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp)) { 3232 NFSLOCKNODE(np); 3233 np->n_flag |= NMIGHTBELOCKED; 3234 NFSUNLOCKNODE(np); 3235 } 3236 3237 /* 3238 * Loop around doing the lock op, while a blocking lock 3239 * must wait for the lock op to succeed. 3240 */ 3241 do { 3242 ret = nfsrpc_advlock(vp, np->n_size, ap->a_op, 3243 ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags); 3244 if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 3245 ap->a_op == F_SETLK) { 3246 NFSVOPUNLOCK(vp); 3247 error = nfs_catnap(PZERO | PCATCH, ret, 3248 "ncladvl"); 3249 if (error) 3250 return (EINTR); 3251 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 3252 if (VN_IS_DOOMED(vp)) { 3253 error = EBADF; 3254 goto out; 3255 } 3256 } 3257 } while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 3258 ap->a_op == F_SETLK); 3259 if (ret == NFSERR_DENIED) { 3260 error = EAGAIN; 3261 goto out; 3262 } else if (ret == EINVAL || ret == EBADF || ret == EINTR) { 3263 error = ret; 3264 goto out; 3265 } else if (ret != 0) { 3266 error = EACCES; 3267 goto out; 3268 } 3269 3270 /* 3271 * Now, if we just got a lock, invalidate data in the buffer 3272 * cache, as required, so that the coherency conforms with 3273 * RFC3530 Sec. 9.3.2. 3274 */ 3275 if (ap->a_op == F_SETLK) { 3276 if ((np->n_flag & NMODIFIED) == 0) { 3277 np->n_attrstamp = 0; 3278 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 3279 ret = VOP_GETATTR(vp, &va, cred); 3280 } 3281 if ((np->n_flag & NMODIFIED) || ret || 3282 np->n_change != va.va_filerev) { 3283 (void) ncl_vinvalbuf(vp, V_SAVE, td, 1); 3284 np->n_attrstamp = 0; 3285 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 3286 ret = VOP_GETATTR(vp, &va, cred); 3287 if (!ret) { 3288 np->n_mtime = va.va_mtime; 3289 np->n_change = va.va_filerev; 3290 } 3291 } 3292 /* Mark that a file lock has been acquired. */ 3293 NFSLOCKNODE(np); 3294 np->n_flag |= NHASBEENLOCKED; 3295 NFSUNLOCKNODE(np); 3296 } 3297 } else if (!NFS_ISV4(vp)) { 3298 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3299 size = VTONFS(vp)->n_size; 3300 NFSVOPUNLOCK(vp); 3301 error = lf_advlock(ap, &(vp->v_lockf), size); 3302 } else { 3303 if (nfs_advlock_p != NULL) 3304 error = nfs_advlock_p(ap); 3305 else { 3306 NFSVOPUNLOCK(vp); 3307 error = ENOLCK; 3308 } 3309 } 3310 if (error == 0 && ap->a_op == F_SETLK) { 3311 error = NFSVOPLOCK(vp, LK_SHARED); 3312 if (error == 0) { 3313 /* Mark that a file lock has been acquired. */ 3314 NFSLOCKNODE(np); 3315 np->n_flag |= NHASBEENLOCKED; 3316 NFSUNLOCKNODE(np); 3317 NFSVOPUNLOCK(vp); 3318 } 3319 } 3320 return (error); 3321 } else 3322 error = EOPNOTSUPP; 3323 out: 3324 NFSVOPUNLOCK(vp); 3325 return (error); 3326 } 3327 3328 /* 3329 * NFS advisory byte-level locks. 3330 */ 3331 static int 3332 nfs_advlockasync(struct vop_advlockasync_args *ap) 3333 { 3334 struct vnode *vp = ap->a_vp; 3335 u_quad_t size; 3336 int error; 3337 3338 if (NFS_ISV4(vp)) 3339 return (EOPNOTSUPP); 3340 error = NFSVOPLOCK(vp, LK_SHARED); 3341 if (error) 3342 return (error); 3343 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3344 size = VTONFS(vp)->n_size; 3345 NFSVOPUNLOCK(vp); 3346 error = lf_advlockasync(ap, &(vp->v_lockf), size); 3347 } else { 3348 NFSVOPUNLOCK(vp); 3349 error = EOPNOTSUPP; 3350 } 3351 return (error); 3352 } 3353 3354 /* 3355 * Print out the contents of an nfsnode. 3356 */ 3357 static int 3358 nfs_print(struct vop_print_args *ap) 3359 { 3360 struct vnode *vp = ap->a_vp; 3361 struct nfsnode *np = VTONFS(vp); 3362 3363 printf("\tfileid %jd fsid 0x%jx", (uintmax_t)np->n_vattr.na_fileid, 3364 (uintmax_t)np->n_vattr.na_fsid); 3365 if (vp->v_type == VFIFO) 3366 fifo_printinfo(vp); 3367 printf("\n"); 3368 return (0); 3369 } 3370 3371 /* 3372 * This is the "real" nfs::bwrite(struct buf*). 3373 * We set B_CACHE if this is a VMIO buffer. 3374 */ 3375 int 3376 ncl_writebp(struct buf *bp, int force __unused, struct thread *td) 3377 { 3378 int oldflags, rtval; 3379 3380 if (bp->b_flags & B_INVAL) { 3381 brelse(bp); 3382 return (0); 3383 } 3384 3385 oldflags = bp->b_flags; 3386 bp->b_flags |= B_CACHE; 3387 3388 /* 3389 * Undirty the bp. We will redirty it later if the I/O fails. 3390 */ 3391 bundirty(bp); 3392 bp->b_flags &= ~B_DONE; 3393 bp->b_ioflags &= ~BIO_ERROR; 3394 bp->b_iocmd = BIO_WRITE; 3395 3396 bufobj_wref(bp->b_bufobj); 3397 curthread->td_ru.ru_oublock++; 3398 3399 /* 3400 * Note: to avoid loopback deadlocks, we do not 3401 * assign b_runningbufspace. 3402 */ 3403 vfs_busy_pages(bp, 1); 3404 3405 BUF_KERNPROC(bp); 3406 bp->b_iooffset = dbtob(bp->b_blkno); 3407 bstrategy(bp); 3408 3409 if ((oldflags & B_ASYNC) != 0) 3410 return (0); 3411 3412 rtval = bufwait(bp); 3413 if (oldflags & B_DELWRI) 3414 reassignbuf(bp); 3415 brelse(bp); 3416 return (rtval); 3417 } 3418 3419 /* 3420 * nfs special file access vnode op. 3421 * Essentially just get vattr and then imitate iaccess() since the device is 3422 * local to the client. 3423 */ 3424 static int 3425 nfsspec_access(struct vop_access_args *ap) 3426 { 3427 struct vattr *vap; 3428 struct ucred *cred = ap->a_cred; 3429 struct vnode *vp = ap->a_vp; 3430 accmode_t accmode = ap->a_accmode; 3431 struct vattr vattr; 3432 int error; 3433 3434 /* 3435 * Disallow write attempts on filesystems mounted read-only; 3436 * unless the file is a socket, fifo, or a block or character 3437 * device resident on the filesystem. 3438 */ 3439 if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 3440 switch (vp->v_type) { 3441 case VREG: 3442 case VDIR: 3443 case VLNK: 3444 return (EROFS); 3445 default: 3446 break; 3447 } 3448 } 3449 vap = &vattr; 3450 error = VOP_GETATTR(vp, vap, cred); 3451 if (error) 3452 goto out; 3453 error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, 3454 accmode, cred); 3455 out: 3456 return error; 3457 } 3458 3459 /* 3460 * Read wrapper for fifos. 3461 */ 3462 static int 3463 nfsfifo_read(struct vop_read_args *ap) 3464 { 3465 struct nfsnode *np = VTONFS(ap->a_vp); 3466 int error; 3467 3468 /* 3469 * Set access flag. 3470 */ 3471 NFSLOCKNODE(np); 3472 np->n_flag |= NACC; 3473 vfs_timestamp(&np->n_atim); 3474 NFSUNLOCKNODE(np); 3475 error = fifo_specops.vop_read(ap); 3476 return error; 3477 } 3478 3479 /* 3480 * Write wrapper for fifos. 3481 */ 3482 static int 3483 nfsfifo_write(struct vop_write_args *ap) 3484 { 3485 struct nfsnode *np = VTONFS(ap->a_vp); 3486 3487 /* 3488 * Set update flag. 3489 */ 3490 NFSLOCKNODE(np); 3491 np->n_flag |= NUPD; 3492 vfs_timestamp(&np->n_mtim); 3493 NFSUNLOCKNODE(np); 3494 return(fifo_specops.vop_write(ap)); 3495 } 3496 3497 /* 3498 * Close wrapper for fifos. 3499 * 3500 * Update the times on the nfsnode then do fifo close. 3501 */ 3502 static int 3503 nfsfifo_close(struct vop_close_args *ap) 3504 { 3505 struct vnode *vp = ap->a_vp; 3506 struct nfsnode *np = VTONFS(vp); 3507 struct vattr vattr; 3508 struct timespec ts; 3509 3510 NFSLOCKNODE(np); 3511 if (np->n_flag & (NACC | NUPD)) { 3512 vfs_timestamp(&ts); 3513 if (np->n_flag & NACC) 3514 np->n_atim = ts; 3515 if (np->n_flag & NUPD) 3516 np->n_mtim = ts; 3517 np->n_flag |= NCHG; 3518 if (vrefcnt(vp) == 1 && 3519 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 3520 VATTR_NULL(&vattr); 3521 if (np->n_flag & NACC) 3522 vattr.va_atime = np->n_atim; 3523 if (np->n_flag & NUPD) 3524 vattr.va_mtime = np->n_mtim; 3525 NFSUNLOCKNODE(np); 3526 (void)VOP_SETATTR(vp, &vattr, ap->a_cred); 3527 goto out; 3528 } 3529 } 3530 NFSUNLOCKNODE(np); 3531 out: 3532 return (fifo_specops.vop_close(ap)); 3533 } 3534 3535 /* 3536 * Just call ncl_writebp() with the force argument set to 1. 3537 * 3538 * NOTE: B_DONE may or may not be set in a_bp on call. 3539 */ 3540 static int 3541 nfs_bwrite(struct buf *bp) 3542 { 3543 3544 return (ncl_writebp(bp, 1, curthread)); 3545 } 3546 3547 struct buf_ops buf_ops_newnfs = { 3548 .bop_name = "buf_ops_nfs", 3549 .bop_write = nfs_bwrite, 3550 .bop_strategy = bufstrategy, 3551 .bop_sync = bufsync, 3552 .bop_bdflush = bufbdflush, 3553 }; 3554 3555 static int 3556 nfs_getacl(struct vop_getacl_args *ap) 3557 { 3558 int error; 3559 3560 if (ap->a_type != ACL_TYPE_NFS4) 3561 return (EOPNOTSUPP); 3562 error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, 3563 NULL); 3564 if (error > NFSERR_STALE) { 3565 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3566 error = EPERM; 3567 } 3568 return (error); 3569 } 3570 3571 static int 3572 nfs_setacl(struct vop_setacl_args *ap) 3573 { 3574 int error; 3575 3576 if (ap->a_type != ACL_TYPE_NFS4) 3577 return (EOPNOTSUPP); 3578 error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, 3579 NULL); 3580 if (error > NFSERR_STALE) { 3581 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3582 error = EPERM; 3583 } 3584 return (error); 3585 } 3586 3587 /* 3588 * VOP_ADVISE for NFS. 3589 * Just return 0 for any errors, since it is just a hint. 3590 */ 3591 static int 3592 nfs_advise(struct vop_advise_args *ap) 3593 { 3594 struct thread *td = curthread; 3595 struct nfsmount *nmp; 3596 uint64_t len; 3597 int error; 3598 3599 /* 3600 * First do vop_stdadvise() to handle the buffer cache. 3601 */ 3602 error = vop_stdadvise(ap); 3603 if (error != 0) 3604 return (error); 3605 if (ap->a_start < 0 || ap->a_end < 0) 3606 return (0); 3607 if (ap->a_end == OFF_MAX) 3608 len = 0; 3609 else if (ap->a_end < ap->a_start) 3610 return (0); 3611 else 3612 len = ap->a_end - ap->a_start + 1; 3613 nmp = VFSTONFS(ap->a_vp->v_mount); 3614 mtx_lock(&nmp->nm_mtx); 3615 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 3616 (NFSHASPNFS(nmp) && (nmp->nm_privflag & NFSMNTP_IOADVISETHRUMDS) == 3617 0) || (nmp->nm_privflag & NFSMNTP_NOADVISE) != 0) { 3618 mtx_unlock(&nmp->nm_mtx); 3619 return (0); 3620 } 3621 mtx_unlock(&nmp->nm_mtx); 3622 error = nfsrpc_advise(ap->a_vp, ap->a_start, len, ap->a_advice, 3623 td->td_ucred, td); 3624 if (error == NFSERR_NOTSUPP) { 3625 mtx_lock(&nmp->nm_mtx); 3626 nmp->nm_privflag |= NFSMNTP_NOADVISE; 3627 mtx_unlock(&nmp->nm_mtx); 3628 } 3629 return (0); 3630 } 3631 3632 /* 3633 * nfs allocate call 3634 */ 3635 static int 3636 nfs_allocate(struct vop_allocate_args *ap) 3637 { 3638 struct vnode *vp = ap->a_vp; 3639 struct thread *td = curthread; 3640 struct nfsvattr nfsva; 3641 struct nfsmount *nmp; 3642 int attrflag, error, ret; 3643 3644 attrflag = 0; 3645 nmp = VFSTONFS(vp->v_mount); 3646 mtx_lock(&nmp->nm_mtx); 3647 if (NFSHASNFSV4(nmp) && nmp->nm_minorvers >= NFSV42_MINORVERSION && 3648 (nmp->nm_privflag & NFSMNTP_NOALLOCATE) == 0) { 3649 mtx_unlock(&nmp->nm_mtx); 3650 /* 3651 * Flush first to ensure that the allocate adds to the 3652 * file's allocation on the server. 3653 */ 3654 error = ncl_flush(vp, MNT_WAIT, td, 1, 0); 3655 if (error == 0) 3656 error = nfsrpc_allocate(vp, *ap->a_offset, *ap->a_len, 3657 &nfsva, &attrflag, td->td_ucred, td, NULL); 3658 if (error == 0) { 3659 *ap->a_offset += *ap->a_len; 3660 *ap->a_len = 0; 3661 } else if (error == NFSERR_NOTSUPP) { 3662 mtx_lock(&nmp->nm_mtx); 3663 nmp->nm_privflag |= NFSMNTP_NOALLOCATE; 3664 mtx_unlock(&nmp->nm_mtx); 3665 } 3666 } else { 3667 mtx_unlock(&nmp->nm_mtx); 3668 error = EIO; 3669 } 3670 /* 3671 * If the NFS server cannot perform the Allocate operation, just call 3672 * vop_stdallocate() to perform it. 3673 */ 3674 if (error != 0) 3675 error = vop_stdallocate(ap); 3676 if (attrflag != 0) { 3677 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 3678 if (error == 0 && ret != 0) 3679 error = ret; 3680 } 3681 if (error != 0) 3682 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 3683 return (error); 3684 } 3685 3686 /* 3687 * nfs deallocate call 3688 */ 3689 static int 3690 nfs_deallocate(struct vop_deallocate_args *ap) 3691 { 3692 struct vnode *vp = ap->a_vp; 3693 struct thread *td = curthread; 3694 struct nfsvattr nfsva; 3695 struct nfsmount *nmp; 3696 off_t tlen; 3697 int attrflag, error, ret; 3698 3699 error = 0; 3700 attrflag = 0; 3701 nmp = VFSTONFS(vp->v_mount); 3702 mtx_lock(&nmp->nm_mtx); 3703 if (NFSHASNFSV4(nmp) && nmp->nm_minorvers >= NFSV42_MINORVERSION && 3704 (nmp->nm_privflag & NFSMNTP_NODEALLOCATE) == 0) { 3705 mtx_unlock(&nmp->nm_mtx); 3706 tlen = omin(OFF_MAX - *ap->a_offset, *ap->a_len); 3707 NFSCL_DEBUG(4, "dealloc: off=%jd len=%jd maxfilesize=%ju\n", 3708 (intmax_t)*ap->a_offset, (intmax_t)tlen, 3709 (uintmax_t)nmp->nm_maxfilesize); 3710 if ((uint64_t)*ap->a_offset >= nmp->nm_maxfilesize) { 3711 /* Avoid EFBIG error return from the NFSv4.2 server. */ 3712 *ap->a_len = 0; 3713 return (0); 3714 } 3715 if ((uint64_t)*ap->a_offset + tlen > nmp->nm_maxfilesize) 3716 tlen = nmp->nm_maxfilesize - *ap->a_offset; 3717 if (error == 0) 3718 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 3719 if (error == 0) { 3720 vnode_pager_purge_range(vp, *ap->a_offset, 3721 *ap->a_offset + tlen); 3722 error = nfsrpc_deallocate(vp, *ap->a_offset, tlen, 3723 &nfsva, &attrflag, ap->a_cred, td, NULL); 3724 NFSCL_DEBUG(4, "dealloc: rpc=%d\n", error); 3725 } 3726 if (error == 0) { 3727 NFSCL_DEBUG(4, "dealloc: attrflag=%d na_size=%ju\n", 3728 attrflag, (uintmax_t)nfsva.na_size); 3729 if (attrflag != 0) { 3730 if ((uint64_t)*ap->a_offset < nfsva.na_size) 3731 *ap->a_offset += omin((off_t) 3732 nfsva.na_size - *ap->a_offset, 3733 tlen); 3734 } 3735 *ap->a_len = 0; 3736 } else if (error == NFSERR_NOTSUPP) { 3737 mtx_lock(&nmp->nm_mtx); 3738 nmp->nm_privflag |= NFSMNTP_NODEALLOCATE; 3739 mtx_unlock(&nmp->nm_mtx); 3740 } 3741 } else { 3742 mtx_unlock(&nmp->nm_mtx); 3743 error = EIO; 3744 } 3745 /* 3746 * If the NFS server cannot perform the Deallocate operation, just call 3747 * vop_stddeallocate() to perform it. 3748 */ 3749 if (error != 0 && error != NFSERR_FBIG && error != NFSERR_INVAL) { 3750 error = vop_stddeallocate(ap); 3751 NFSCL_DEBUG(4, "dealloc: stddeallocate=%d\n", error); 3752 } 3753 if (attrflag != 0) { 3754 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 3755 if (error == 0 && ret != 0) 3756 error = ret; 3757 } 3758 if (error != 0) 3759 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 3760 return (error); 3761 } 3762 3763 /* 3764 * nfs copy_file_range call 3765 */ 3766 static int 3767 nfs_copy_file_range(struct vop_copy_file_range_args *ap) 3768 { 3769 struct vnode *invp = ap->a_invp; 3770 struct vnode *outvp = ap->a_outvp; 3771 struct mount *mp; 3772 struct nfsvattr innfsva, outnfsva; 3773 struct vattr *vap; 3774 struct uio io; 3775 struct nfsmount *nmp; 3776 size_t len, len2; 3777 int error, inattrflag, outattrflag, ret, ret2; 3778 off_t inoff, outoff; 3779 bool consecutive, must_commit, tryoutcred; 3780 3781 ret = ret2 = 0; 3782 nmp = VFSTONFS(invp->v_mount); 3783 mtx_lock(&nmp->nm_mtx); 3784 /* NFSv4.2 Copy is not permitted for infile == outfile. */ 3785 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 3786 (nmp->nm_privflag & NFSMNTP_NOCOPY) != 0 || invp == outvp) { 3787 mtx_unlock(&nmp->nm_mtx); 3788 error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, 3789 ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, 3790 ap->a_incred, ap->a_outcred, ap->a_fsizetd); 3791 return (error); 3792 } 3793 mtx_unlock(&nmp->nm_mtx); 3794 3795 /* Lock both vnodes, avoiding risk of deadlock. */ 3796 do { 3797 mp = NULL; 3798 error = vn_start_write(outvp, &mp, V_WAIT); 3799 if (error == 0) { 3800 error = vn_lock(outvp, LK_EXCLUSIVE); 3801 if (error == 0) { 3802 error = vn_lock(invp, LK_SHARED | LK_NOWAIT); 3803 if (error == 0) 3804 break; 3805 VOP_UNLOCK(outvp); 3806 if (mp != NULL) 3807 vn_finished_write(mp); 3808 mp = NULL; 3809 error = vn_lock(invp, LK_SHARED); 3810 if (error == 0) 3811 VOP_UNLOCK(invp); 3812 } 3813 } 3814 if (mp != NULL) 3815 vn_finished_write(mp); 3816 } while (error == 0); 3817 if (error != 0) 3818 return (error); 3819 3820 /* 3821 * Do the vn_rlimit_fsize() check. Should this be above the VOP layer? 3822 */ 3823 io.uio_offset = *ap->a_outoffp; 3824 io.uio_resid = *ap->a_lenp; 3825 error = vn_rlimit_fsize(outvp, &io, ap->a_fsizetd); 3826 3827 /* 3828 * Flush the input file so that the data is up to date before 3829 * the copy. Flush writes for the output file so that they 3830 * do not overwrite the data copied to the output file by the Copy. 3831 * Set the commit argument for both flushes so that the data is on 3832 * stable storage before the Copy RPC. This is done in case the 3833 * server reboots during the Copy and needs to be redone. 3834 */ 3835 if (error == 0) 3836 error = ncl_flush(invp, MNT_WAIT, curthread, 1, 0); 3837 if (error == 0) 3838 error = ncl_flush(outvp, MNT_WAIT, curthread, 1, 0); 3839 3840 /* Do the actual NFSv4.2 RPC. */ 3841 len = *ap->a_lenp; 3842 mtx_lock(&nmp->nm_mtx); 3843 if ((nmp->nm_privflag & NFSMNTP_NOCONSECUTIVE) == 0) 3844 consecutive = true; 3845 else 3846 consecutive = false; 3847 mtx_unlock(&nmp->nm_mtx); 3848 inoff = *ap->a_inoffp; 3849 outoff = *ap->a_outoffp; 3850 tryoutcred = true; 3851 must_commit = false; 3852 if (error == 0) { 3853 vap = &VTONFS(invp)->n_vattr.na_vattr; 3854 error = VOP_GETATTR(invp, vap, ap->a_incred); 3855 if (error == 0) { 3856 /* 3857 * Clip "len" at va_size so that RFC compliant servers 3858 * will not reply NFSERR_INVAL. 3859 * Setting "len == 0" for the RPC would be preferred, 3860 * but some Linux servers do not support that. 3861 */ 3862 if (inoff >= vap->va_size) 3863 *ap->a_lenp = len = 0; 3864 else if (inoff + len > vap->va_size) 3865 *ap->a_lenp = len = vap->va_size - inoff; 3866 } else 3867 error = 0; 3868 } 3869 3870 /* 3871 * len will be set to 0 upon a successful Copy RPC. 3872 * As such, this only loops when the Copy RPC needs to be retried. 3873 */ 3874 while (len > 0 && error == 0) { 3875 inattrflag = outattrflag = 0; 3876 len2 = len; 3877 if (tryoutcred) 3878 error = nfsrpc_copy_file_range(invp, ap->a_inoffp, 3879 outvp, ap->a_outoffp, &len2, ap->a_flags, 3880 &inattrflag, &innfsva, &outattrflag, &outnfsva, 3881 ap->a_outcred, consecutive, &must_commit); 3882 else 3883 error = nfsrpc_copy_file_range(invp, ap->a_inoffp, 3884 outvp, ap->a_outoffp, &len2, ap->a_flags, 3885 &inattrflag, &innfsva, &outattrflag, &outnfsva, 3886 ap->a_incred, consecutive, &must_commit); 3887 if (inattrflag != 0) 3888 ret = nfscl_loadattrcache(&invp, &innfsva, NULL, NULL, 3889 0, 1); 3890 if (outattrflag != 0) 3891 ret2 = nfscl_loadattrcache(&outvp, &outnfsva, NULL, 3892 NULL, 1, 1); 3893 if (error == 0) { 3894 if (consecutive == false) { 3895 if (len2 == len) { 3896 mtx_lock(&nmp->nm_mtx); 3897 nmp->nm_privflag |= 3898 NFSMNTP_NOCONSECUTIVE; 3899 mtx_unlock(&nmp->nm_mtx); 3900 } else 3901 error = NFSERR_OFFLOADNOREQS; 3902 } 3903 *ap->a_lenp = len2; 3904 len = 0; 3905 if (len2 > 0 && must_commit && error == 0) 3906 error = ncl_commit(outvp, outoff, *ap->a_lenp, 3907 ap->a_outcred, curthread); 3908 if (error == 0 && ret != 0) 3909 error = ret; 3910 if (error == 0 && ret2 != 0) 3911 error = ret2; 3912 } else if (error == NFSERR_OFFLOADNOREQS && consecutive) { 3913 /* 3914 * Try consecutive == false, which is ok only if all 3915 * bytes are copied. 3916 * If only some bytes were copied when consecutive 3917 * is false, there is no way to know which bytes 3918 * still need to be written. 3919 */ 3920 consecutive = false; 3921 error = 0; 3922 } else if (error == NFSERR_ACCES && tryoutcred) { 3923 /* Try again with incred. */ 3924 tryoutcred = false; 3925 error = 0; 3926 } 3927 if (error == NFSERR_STALEWRITEVERF) { 3928 /* 3929 * Server rebooted, so do it all again. 3930 */ 3931 *ap->a_inoffp = inoff; 3932 *ap->a_outoffp = outoff; 3933 len = *ap->a_lenp; 3934 must_commit = false; 3935 error = 0; 3936 } 3937 } 3938 VOP_UNLOCK(invp); 3939 VOP_UNLOCK(outvp); 3940 if (mp != NULL) 3941 vn_finished_write(mp); 3942 if (error == NFSERR_NOTSUPP || error == NFSERR_OFFLOADNOREQS || 3943 error == NFSERR_ACCES) { 3944 /* 3945 * Unlike the NFSv4.2 Copy, vn_generic_copy_file_range() can 3946 * use a_incred for the read and a_outcred for the write, so 3947 * try this for NFSERR_ACCES failures for the Copy. 3948 * For NFSERR_NOTSUPP and NFSERR_OFFLOADNOREQS, the Copy can 3949 * never succeed, so disable it. 3950 */ 3951 if (error != NFSERR_ACCES) { 3952 /* Can never do Copy on this mount. */ 3953 mtx_lock(&nmp->nm_mtx); 3954 nmp->nm_privflag |= NFSMNTP_NOCOPY; 3955 mtx_unlock(&nmp->nm_mtx); 3956 } 3957 *ap->a_inoffp = inoff; 3958 *ap->a_outoffp = outoff; 3959 error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, 3960 ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, 3961 ap->a_incred, ap->a_outcred, ap->a_fsizetd); 3962 } else if (error != 0) 3963 *ap->a_lenp = 0; 3964 3965 if (error != 0) 3966 error = nfscl_maperr(curthread, error, (uid_t)0, (gid_t)0); 3967 return (error); 3968 } 3969 3970 /* 3971 * nfs ioctl call 3972 */ 3973 static int 3974 nfs_ioctl(struct vop_ioctl_args *ap) 3975 { 3976 struct vnode *vp = ap->a_vp; 3977 struct nfsvattr nfsva; 3978 struct nfsmount *nmp; 3979 int attrflag, content, error, ret; 3980 bool eof = false; /* shut up compiler. */ 3981 3982 if (vp->v_type != VREG) 3983 return (ENOTTY); 3984 nmp = VFSTONFS(vp->v_mount); 3985 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION) { 3986 error = vop_stdioctl(ap); 3987 return (error); 3988 } 3989 3990 /* Do the actual NFSv4.2 RPC. */ 3991 switch (ap->a_command) { 3992 case FIOSEEKDATA: 3993 content = NFSV4CONTENT_DATA; 3994 break; 3995 case FIOSEEKHOLE: 3996 content = NFSV4CONTENT_HOLE; 3997 break; 3998 default: 3999 return (ENOTTY); 4000 } 4001 4002 error = vn_lock(vp, LK_SHARED); 4003 if (error != 0) 4004 return (EBADF); 4005 attrflag = 0; 4006 if (*((off_t *)ap->a_data) >= VTONFS(vp)->n_size) 4007 error = ENXIO; 4008 else { 4009 /* 4010 * Flush all writes, so that the server is up to date. 4011 * Although a Commit is not required, the commit argument 4012 * is set so that, for a pNFS File/Flexible File Layout 4013 * server, the LayoutCommit will be done to ensure the file 4014 * size is up to date on the Metadata Server. 4015 */ 4016 error = ncl_flush(vp, MNT_WAIT, ap->a_td, 1, 0); 4017 if (error == 0) 4018 error = nfsrpc_seek(vp, (off_t *)ap->a_data, &eof, 4019 content, ap->a_cred, &nfsva, &attrflag); 4020 /* If at eof for FIOSEEKDATA, return ENXIO. */ 4021 if (eof && error == 0 && content == NFSV4CONTENT_DATA) 4022 error = ENXIO; 4023 } 4024 if (attrflag != 0) { 4025 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 4026 if (error == 0 && ret != 0) 4027 error = ret; 4028 } 4029 NFSVOPUNLOCK(vp); 4030 4031 if (error != 0) 4032 error = ENXIO; 4033 return (error); 4034 } 4035 4036 /* 4037 * nfs getextattr call 4038 */ 4039 static int 4040 nfs_getextattr(struct vop_getextattr_args *ap) 4041 { 4042 struct vnode *vp = ap->a_vp; 4043 struct nfsmount *nmp; 4044 struct ucred *cred; 4045 struct thread *td = ap->a_td; 4046 struct nfsvattr nfsva; 4047 ssize_t len; 4048 int attrflag, error, ret; 4049 4050 nmp = VFSTONFS(vp->v_mount); 4051 mtx_lock(&nmp->nm_mtx); 4052 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4053 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4054 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4055 mtx_unlock(&nmp->nm_mtx); 4056 return (EOPNOTSUPP); 4057 } 4058 mtx_unlock(&nmp->nm_mtx); 4059 4060 cred = ap->a_cred; 4061 if (cred == NULL) 4062 cred = td->td_ucred; 4063 /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ 4064 attrflag = 0; 4065 error = nfsrpc_getextattr(vp, ap->a_name, ap->a_uio, &len, &nfsva, 4066 &attrflag, cred, td); 4067 if (attrflag != 0) { 4068 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 4069 if (error == 0 && ret != 0) 4070 error = ret; 4071 } 4072 if (error == 0 && ap->a_size != NULL) 4073 *ap->a_size = len; 4074 4075 switch (error) { 4076 case NFSERR_NOTSUPP: 4077 case NFSERR_OPILLEGAL: 4078 mtx_lock(&nmp->nm_mtx); 4079 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4080 mtx_unlock(&nmp->nm_mtx); 4081 error = EOPNOTSUPP; 4082 break; 4083 case NFSERR_NOXATTR: 4084 case NFSERR_XATTR2BIG: 4085 error = ENOATTR; 4086 break; 4087 default: 4088 error = nfscl_maperr(td, error, 0, 0); 4089 break; 4090 } 4091 return (error); 4092 } 4093 4094 /* 4095 * nfs setextattr call 4096 */ 4097 static int 4098 nfs_setextattr(struct vop_setextattr_args *ap) 4099 { 4100 struct vnode *vp = ap->a_vp; 4101 struct nfsmount *nmp; 4102 struct ucred *cred; 4103 struct thread *td = ap->a_td; 4104 struct nfsvattr nfsva; 4105 int attrflag, error, ret; 4106 4107 nmp = VFSTONFS(vp->v_mount); 4108 mtx_lock(&nmp->nm_mtx); 4109 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4110 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4111 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4112 mtx_unlock(&nmp->nm_mtx); 4113 return (EOPNOTSUPP); 4114 } 4115 mtx_unlock(&nmp->nm_mtx); 4116 4117 if (ap->a_uio->uio_resid < 0) 4118 return (EINVAL); 4119 cred = ap->a_cred; 4120 if (cred == NULL) 4121 cred = td->td_ucred; 4122 /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ 4123 attrflag = 0; 4124 error = nfsrpc_setextattr(vp, ap->a_name, ap->a_uio, &nfsva, 4125 &attrflag, cred, td); 4126 if (attrflag != 0) { 4127 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 4128 if (error == 0 && ret != 0) 4129 error = ret; 4130 } 4131 4132 switch (error) { 4133 case NFSERR_NOTSUPP: 4134 case NFSERR_OPILLEGAL: 4135 mtx_lock(&nmp->nm_mtx); 4136 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4137 mtx_unlock(&nmp->nm_mtx); 4138 error = EOPNOTSUPP; 4139 break; 4140 case NFSERR_NOXATTR: 4141 case NFSERR_XATTR2BIG: 4142 error = ENOATTR; 4143 break; 4144 default: 4145 error = nfscl_maperr(td, error, 0, 0); 4146 break; 4147 } 4148 return (error); 4149 } 4150 4151 /* 4152 * nfs listextattr call 4153 */ 4154 static int 4155 nfs_listextattr(struct vop_listextattr_args *ap) 4156 { 4157 struct vnode *vp = ap->a_vp; 4158 struct nfsmount *nmp; 4159 struct ucred *cred; 4160 struct thread *td = ap->a_td; 4161 struct nfsvattr nfsva; 4162 size_t len, len2; 4163 uint64_t cookie; 4164 int attrflag, error, ret; 4165 bool eof; 4166 4167 nmp = VFSTONFS(vp->v_mount); 4168 mtx_lock(&nmp->nm_mtx); 4169 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4170 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4171 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4172 mtx_unlock(&nmp->nm_mtx); 4173 return (EOPNOTSUPP); 4174 } 4175 mtx_unlock(&nmp->nm_mtx); 4176 4177 cred = ap->a_cred; 4178 if (cred == NULL) 4179 cred = td->td_ucred; 4180 4181 /* Loop around doing List Extended Attribute RPCs. */ 4182 eof = false; 4183 cookie = 0; 4184 len2 = 0; 4185 error = 0; 4186 while (!eof && error == 0) { 4187 len = nmp->nm_rsize; 4188 attrflag = 0; 4189 error = nfsrpc_listextattr(vp, &cookie, ap->a_uio, &len, &eof, 4190 &nfsva, &attrflag, cred, td); 4191 if (attrflag != 0) { 4192 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 4193 1); 4194 if (error == 0 && ret != 0) 4195 error = ret; 4196 } 4197 if (error == 0) { 4198 len2 += len; 4199 if (len2 > SSIZE_MAX) 4200 error = ENOATTR; 4201 } 4202 } 4203 if (error == 0 && ap->a_size != NULL) 4204 *ap->a_size = len2; 4205 4206 switch (error) { 4207 case NFSERR_NOTSUPP: 4208 case NFSERR_OPILLEGAL: 4209 mtx_lock(&nmp->nm_mtx); 4210 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4211 mtx_unlock(&nmp->nm_mtx); 4212 error = EOPNOTSUPP; 4213 break; 4214 case NFSERR_NOXATTR: 4215 case NFSERR_XATTR2BIG: 4216 error = ENOATTR; 4217 break; 4218 default: 4219 error = nfscl_maperr(td, error, 0, 0); 4220 break; 4221 } 4222 return (error); 4223 } 4224 4225 /* 4226 * nfs setextattr call 4227 */ 4228 static int 4229 nfs_deleteextattr(struct vop_deleteextattr_args *ap) 4230 { 4231 struct vnode *vp = ap->a_vp; 4232 struct nfsmount *nmp; 4233 struct nfsvattr nfsva; 4234 int attrflag, error, ret; 4235 4236 nmp = VFSTONFS(vp->v_mount); 4237 mtx_lock(&nmp->nm_mtx); 4238 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4239 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4240 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4241 mtx_unlock(&nmp->nm_mtx); 4242 return (EOPNOTSUPP); 4243 } 4244 mtx_unlock(&nmp->nm_mtx); 4245 4246 /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ 4247 attrflag = 0; 4248 error = nfsrpc_rmextattr(vp, ap->a_name, &nfsva, &attrflag, ap->a_cred, 4249 ap->a_td); 4250 if (attrflag != 0) { 4251 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 4252 if (error == 0 && ret != 0) 4253 error = ret; 4254 } 4255 4256 switch (error) { 4257 case NFSERR_NOTSUPP: 4258 case NFSERR_OPILLEGAL: 4259 mtx_lock(&nmp->nm_mtx); 4260 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4261 mtx_unlock(&nmp->nm_mtx); 4262 error = EOPNOTSUPP; 4263 break; 4264 case NFSERR_NOXATTR: 4265 case NFSERR_XATTR2BIG: 4266 error = ENOATTR; 4267 break; 4268 default: 4269 error = nfscl_maperr(ap->a_td, error, 0, 0); 4270 break; 4271 } 4272 return (error); 4273 } 4274 4275 /* 4276 * Return POSIX pathconf information applicable to nfs filesystems. 4277 */ 4278 static int 4279 nfs_pathconf(struct vop_pathconf_args *ap) 4280 { 4281 struct nfsv3_pathconf pc; 4282 struct nfsvattr nfsva; 4283 struct vnode *vp = ap->a_vp; 4284 struct nfsmount *nmp; 4285 struct thread *td = curthread; 4286 off_t off; 4287 bool eof; 4288 int attrflag, error; 4289 4290 if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX || 4291 ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED || 4292 ap->a_name == _PC_NO_TRUNC)) || 4293 (NFS_ISV4(vp) && ap->a_name == _PC_ACL_NFS4)) { 4294 /* 4295 * Since only the above 4 a_names are returned by the NFSv3 4296 * Pathconf RPC, there is no point in doing it for others. 4297 * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can 4298 * be used for _PC_NFS4_ACL as well. 4299 */ 4300 error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva, 4301 &attrflag, NULL); 4302 if (attrflag != 0) 4303 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 4304 1); 4305 if (error != 0) 4306 return (error); 4307 } else { 4308 /* 4309 * For NFSv2 (or NFSv3 when not one of the above 4 a_names), 4310 * just fake them. 4311 */ 4312 pc.pc_linkmax = NFS_LINK_MAX; 4313 pc.pc_namemax = NFS_MAXNAMLEN; 4314 pc.pc_notrunc = 1; 4315 pc.pc_chownrestricted = 1; 4316 pc.pc_caseinsensitive = 0; 4317 pc.pc_casepreserving = 1; 4318 error = 0; 4319 } 4320 switch (ap->a_name) { 4321 case _PC_LINK_MAX: 4322 #ifdef _LP64 4323 *ap->a_retval = pc.pc_linkmax; 4324 #else 4325 *ap->a_retval = MIN(LONG_MAX, pc.pc_linkmax); 4326 #endif 4327 break; 4328 case _PC_NAME_MAX: 4329 *ap->a_retval = pc.pc_namemax; 4330 break; 4331 case _PC_PIPE_BUF: 4332 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) 4333 *ap->a_retval = PIPE_BUF; 4334 else 4335 error = EINVAL; 4336 break; 4337 case _PC_CHOWN_RESTRICTED: 4338 *ap->a_retval = pc.pc_chownrestricted; 4339 break; 4340 case _PC_NO_TRUNC: 4341 *ap->a_retval = pc.pc_notrunc; 4342 break; 4343 case _PC_ACL_NFS4: 4344 if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 && 4345 NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) 4346 *ap->a_retval = 1; 4347 else 4348 *ap->a_retval = 0; 4349 break; 4350 case _PC_ACL_PATH_MAX: 4351 if (NFS_ISV4(vp)) 4352 *ap->a_retval = ACL_MAX_ENTRIES; 4353 else 4354 *ap->a_retval = 3; 4355 break; 4356 case _PC_PRIO_IO: 4357 *ap->a_retval = 0; 4358 break; 4359 case _PC_SYNC_IO: 4360 *ap->a_retval = 0; 4361 break; 4362 case _PC_ALLOC_SIZE_MIN: 4363 *ap->a_retval = vp->v_mount->mnt_stat.f_bsize; 4364 break; 4365 case _PC_FILESIZEBITS: 4366 if (NFS_ISV34(vp)) 4367 *ap->a_retval = 64; 4368 else 4369 *ap->a_retval = 32; 4370 break; 4371 case _PC_REC_INCR_XFER_SIZE: 4372 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 4373 break; 4374 case _PC_REC_MAX_XFER_SIZE: 4375 *ap->a_retval = -1; /* means ``unlimited'' */ 4376 break; 4377 case _PC_REC_MIN_XFER_SIZE: 4378 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 4379 break; 4380 case _PC_REC_XFER_ALIGN: 4381 *ap->a_retval = PAGE_SIZE; 4382 break; 4383 case _PC_SYMLINK_MAX: 4384 *ap->a_retval = NFS_MAXPATHLEN; 4385 break; 4386 case _PC_MIN_HOLE_SIZE: 4387 /* Only some NFSv4.2 servers support Seek for Holes. */ 4388 *ap->a_retval = 0; 4389 nmp = VFSTONFS(vp->v_mount); 4390 if (NFS_ISV4(vp) && nmp->nm_minorvers == NFSV42_MINORVERSION) { 4391 /* 4392 * NFSv4.2 doesn't have an attribute for hole size, 4393 * so all we can do is see if the Seek operation is 4394 * supported and then use f_iosize as a "best guess". 4395 */ 4396 mtx_lock(&nmp->nm_mtx); 4397 if ((nmp->nm_privflag & NFSMNTP_SEEKTESTED) == 0) { 4398 mtx_unlock(&nmp->nm_mtx); 4399 off = 0; 4400 attrflag = 0; 4401 error = nfsrpc_seek(vp, &off, &eof, 4402 NFSV4CONTENT_HOLE, td->td_ucred, &nfsva, 4403 &attrflag); 4404 if (attrflag != 0) 4405 nfscl_loadattrcache(&vp, &nfsva, 4406 NULL, NULL, 0, 1); 4407 mtx_lock(&nmp->nm_mtx); 4408 if (error == NFSERR_NOTSUPP) 4409 nmp->nm_privflag |= NFSMNTP_SEEKTESTED; 4410 else 4411 nmp->nm_privflag |= NFSMNTP_SEEKTESTED | 4412 NFSMNTP_SEEK; 4413 error = 0; 4414 } 4415 if ((nmp->nm_privflag & NFSMNTP_SEEK) != 0) 4416 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 4417 mtx_unlock(&nmp->nm_mtx); 4418 } 4419 break; 4420 4421 default: 4422 error = vop_stdpathconf(ap); 4423 break; 4424 } 4425 return (error); 4426 } 4427