1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from nfs_vnops.c 8.16 (Berkeley) 5/27/95 35 */ 36 37 #include <sys/cdefs.h> 38 /* 39 * vnode op calls for Sun NFS version 2, 3 and 4 40 */ 41 42 #include "opt_inet.h" 43 44 #include <sys/param.h> 45 #include <sys/kernel.h> 46 #include <sys/systm.h> 47 #include <sys/resourcevar.h> 48 #include <sys/proc.h> 49 #include <sys/mount.h> 50 #include <sys/bio.h> 51 #include <sys/buf.h> 52 #include <sys/extattr.h> 53 #include <sys/filio.h> 54 #include <sys/jail.h> 55 #include <sys/malloc.h> 56 #include <sys/mbuf.h> 57 #include <sys/namei.h> 58 #include <sys/socket.h> 59 #include <sys/vnode.h> 60 #include <sys/dirent.h> 61 #include <sys/fcntl.h> 62 #include <sys/lockf.h> 63 #include <sys/stat.h> 64 #include <sys/sysctl.h> 65 #include <sys/signalvar.h> 66 67 #include <vm/vm.h> 68 #include <vm/vm_extern.h> 69 #include <vm/vm_object.h> 70 #include <vm/vnode_pager.h> 71 72 #include <fs/nfs/nfsport.h> 73 #include <fs/nfsclient/nfsnode.h> 74 #include <fs/nfsclient/nfsmount.h> 75 #include <fs/nfsclient/nfs.h> 76 #include <fs/nfsclient/nfs_kdtrace.h> 77 78 #include <net/if.h> 79 #include <netinet/in.h> 80 #include <netinet/in_var.h> 81 82 #include <nfs/nfs_lock.h> 83 84 #ifdef KDTRACE_HOOKS 85 #include <sys/dtrace_bsd.h> 86 87 dtrace_nfsclient_accesscache_flush_probe_func_t 88 dtrace_nfscl_accesscache_flush_done_probe; 89 uint32_t nfscl_accesscache_flush_done_id; 90 91 dtrace_nfsclient_accesscache_get_probe_func_t 92 dtrace_nfscl_accesscache_get_hit_probe, 93 dtrace_nfscl_accesscache_get_miss_probe; 94 uint32_t nfscl_accesscache_get_hit_id; 95 uint32_t nfscl_accesscache_get_miss_id; 96 97 dtrace_nfsclient_accesscache_load_probe_func_t 98 dtrace_nfscl_accesscache_load_done_probe; 99 uint32_t nfscl_accesscache_load_done_id; 100 #endif /* !KDTRACE_HOOKS */ 101 102 /* Defs */ 103 #define TRUE 1 104 #define FALSE 0 105 106 extern struct nfsstatsv1 nfsstatsv1; 107 extern int nfsrv_useacl; 108 extern int nfscl_debuglevel; 109 MALLOC_DECLARE(M_NEWNFSREQ); 110 111 static vop_read_t nfsfifo_read; 112 static vop_write_t nfsfifo_write; 113 static vop_close_t nfsfifo_close; 114 static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *, 115 struct thread *); 116 static vop_lookup_t nfs_lookup; 117 static vop_create_t nfs_create; 118 static vop_mknod_t nfs_mknod; 119 static vop_open_t nfs_open; 120 static vop_pathconf_t nfs_pathconf; 121 static vop_close_t nfs_close; 122 static vop_access_t nfs_access; 123 static vop_getattr_t nfs_getattr; 124 static vop_setattr_t nfs_setattr; 125 static vop_read_t nfs_read; 126 static vop_fsync_t nfs_fsync; 127 static vop_remove_t nfs_remove; 128 static vop_link_t nfs_link; 129 static vop_rename_t nfs_rename; 130 static vop_mkdir_t nfs_mkdir; 131 static vop_rmdir_t nfs_rmdir; 132 static vop_symlink_t nfs_symlink; 133 static vop_readdir_t nfs_readdir; 134 static vop_strategy_t nfs_strategy; 135 static int nfs_lookitup(struct vnode *, char *, int, 136 struct ucred *, struct thread *, struct nfsnode **); 137 static int nfs_sillyrename(struct vnode *, struct vnode *, 138 struct componentname *); 139 static vop_access_t nfsspec_access; 140 static vop_readlink_t nfs_readlink; 141 static vop_print_t nfs_print; 142 static vop_advlock_t nfs_advlock; 143 static vop_advlockasync_t nfs_advlockasync; 144 static vop_getacl_t nfs_getacl; 145 static vop_setacl_t nfs_setacl; 146 static vop_advise_t nfs_advise; 147 static vop_allocate_t nfs_allocate; 148 static vop_deallocate_t nfs_deallocate; 149 static vop_copy_file_range_t nfs_copy_file_range; 150 static vop_ioctl_t nfs_ioctl; 151 static vop_getextattr_t nfs_getextattr; 152 static vop_setextattr_t nfs_setextattr; 153 static vop_listextattr_t nfs_listextattr; 154 static vop_deleteextattr_t nfs_deleteextattr; 155 static vop_lock1_t nfs_lock; 156 157 /* 158 * Global vfs data structures for nfs 159 */ 160 161 static struct vop_vector newnfs_vnodeops_nosig = { 162 .vop_default = &default_vnodeops, 163 .vop_access = nfs_access, 164 .vop_advlock = nfs_advlock, 165 .vop_advlockasync = nfs_advlockasync, 166 .vop_close = nfs_close, 167 .vop_create = nfs_create, 168 .vop_fsync = nfs_fsync, 169 .vop_getattr = nfs_getattr, 170 .vop_getpages = ncl_getpages, 171 .vop_putpages = ncl_putpages, 172 .vop_inactive = ncl_inactive, 173 .vop_link = nfs_link, 174 .vop_lock1 = nfs_lock, 175 .vop_lookup = nfs_lookup, 176 .vop_mkdir = nfs_mkdir, 177 .vop_mknod = nfs_mknod, 178 .vop_open = nfs_open, 179 .vop_pathconf = nfs_pathconf, 180 .vop_print = nfs_print, 181 .vop_read = nfs_read, 182 .vop_readdir = nfs_readdir, 183 .vop_readlink = nfs_readlink, 184 .vop_reclaim = ncl_reclaim, 185 .vop_remove = nfs_remove, 186 .vop_rename = nfs_rename, 187 .vop_rmdir = nfs_rmdir, 188 .vop_setattr = nfs_setattr, 189 .vop_strategy = nfs_strategy, 190 .vop_symlink = nfs_symlink, 191 .vop_write = ncl_write, 192 .vop_getacl = nfs_getacl, 193 .vop_setacl = nfs_setacl, 194 .vop_advise = nfs_advise, 195 .vop_allocate = nfs_allocate, 196 .vop_deallocate = nfs_deallocate, 197 .vop_copy_file_range = nfs_copy_file_range, 198 .vop_ioctl = nfs_ioctl, 199 .vop_getextattr = nfs_getextattr, 200 .vop_setextattr = nfs_setextattr, 201 .vop_listextattr = nfs_listextattr, 202 .vop_deleteextattr = nfs_deleteextattr, 203 }; 204 VFS_VOP_VECTOR_REGISTER(newnfs_vnodeops_nosig); 205 206 static int 207 nfs_vnodeops_bypass(struct vop_generic_args *a) 208 { 209 210 return (vop_sigdefer(&newnfs_vnodeops_nosig, a)); 211 } 212 213 struct vop_vector newnfs_vnodeops = { 214 .vop_default = &default_vnodeops, 215 .vop_bypass = nfs_vnodeops_bypass, 216 }; 217 VFS_VOP_VECTOR_REGISTER(newnfs_vnodeops); 218 219 static struct vop_vector newnfs_fifoops_nosig = { 220 .vop_default = &fifo_specops, 221 .vop_access = nfsspec_access, 222 .vop_close = nfsfifo_close, 223 .vop_fsync = nfs_fsync, 224 .vop_getattr = nfs_getattr, 225 .vop_inactive = ncl_inactive, 226 .vop_pathconf = nfs_pathconf, 227 .vop_print = nfs_print, 228 .vop_read = nfsfifo_read, 229 .vop_reclaim = ncl_reclaim, 230 .vop_setattr = nfs_setattr, 231 .vop_write = nfsfifo_write, 232 }; 233 VFS_VOP_VECTOR_REGISTER(newnfs_fifoops_nosig); 234 235 static int 236 nfs_fifoops_bypass(struct vop_generic_args *a) 237 { 238 239 return (vop_sigdefer(&newnfs_fifoops_nosig, a)); 240 } 241 242 struct vop_vector newnfs_fifoops = { 243 .vop_default = &default_vnodeops, 244 .vop_bypass = nfs_fifoops_bypass, 245 }; 246 VFS_VOP_VECTOR_REGISTER(newnfs_fifoops); 247 248 static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, 249 struct componentname *cnp, struct vattr *vap); 250 static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 251 int namelen, struct ucred *cred, struct thread *td); 252 static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, 253 char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp, 254 char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td); 255 static int nfs_renameit(struct vnode *sdvp, struct vnode *svp, 256 struct componentname *scnp, struct sillyrename *sp); 257 258 /* 259 * Global variables 260 */ 261 SYSCTL_DECL(_vfs_nfs); 262 263 static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; 264 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 265 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); 266 267 static int nfs_prime_access_cache = 0; 268 SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW, 269 &nfs_prime_access_cache, 0, 270 "Prime NFS ACCESS cache when fetching attributes"); 271 272 static int newnfs_commit_on_close = 0; 273 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_on_close, CTLFLAG_RW, 274 &newnfs_commit_on_close, 0, "write+commit on close, else only write"); 275 276 static int nfs_clean_pages_on_close = 1; 277 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, 278 &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); 279 280 int newnfs_directio_enable = 0; 281 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, 282 &newnfs_directio_enable, 0, "Enable NFS directio"); 283 284 int nfs_keep_dirty_on_error; 285 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_keep_dirty_on_error, CTLFLAG_RW, 286 &nfs_keep_dirty_on_error, 0, "Retry pageout if error returned"); 287 288 /* 289 * This sysctl allows other processes to mmap a file that has been opened 290 * O_DIRECT by a process. In general, having processes mmap the file while 291 * Direct IO is in progress can lead to Data Inconsistencies. But, we allow 292 * this by default to prevent DoS attacks - to prevent a malicious user from 293 * opening up files O_DIRECT preventing other users from mmap'ing these 294 * files. "Protected" environments where stricter consistency guarantees are 295 * required can disable this knob. The process that opened the file O_DIRECT 296 * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not 297 * meaningful. 298 */ 299 int newnfs_directio_allow_mmap = 1; 300 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, 301 &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); 302 303 static uint64_t nfs_maxalloclen = 64 * 1024 * 1024; 304 SYSCTL_U64(_vfs_nfs, OID_AUTO, maxalloclen, CTLFLAG_RW, 305 &nfs_maxalloclen, 0, "NFS max allocate/deallocate length"); 306 307 #define NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY \ 308 | NFSACCESS_EXTEND | NFSACCESS_EXECUTE \ 309 | NFSACCESS_DELETE | NFSACCESS_LOOKUP) 310 311 /* 312 * SMP Locking Note : 313 * The list of locks after the description of the lock is the ordering 314 * of other locks acquired with the lock held. 315 * np->n_mtx : Protects the fields in the nfsnode. 316 VM Object Lock 317 VI_MTX (acquired indirectly) 318 * nmp->nm_mtx : Protects the fields in the nfsmount. 319 rep->r_mtx 320 * ncl_iod_mutex : Global lock, protects shared nfsiod state. 321 * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. 322 nmp->nm_mtx 323 rep->r_mtx 324 * rep->r_mtx : Protects the fields in an nfsreq. 325 */ 326 327 static int 328 nfs_lock(struct vop_lock1_args *ap) 329 { 330 struct vnode *vp; 331 struct nfsnode *np; 332 u_quad_t nsize; 333 int error, lktype; 334 bool onfault; 335 336 vp = ap->a_vp; 337 lktype = ap->a_flags & LK_TYPE_MASK; 338 error = VOP_LOCK1_APV(&default_vnodeops, ap); 339 if (error != 0 || vp->v_op != &newnfs_vnodeops) 340 return (error); 341 np = VTONFS(vp); 342 if (np == NULL) 343 return (0); 344 NFSLOCKNODE(np); 345 if ((np->n_flag & NVNSETSZSKIP) == 0 || (lktype != LK_SHARED && 346 lktype != LK_EXCLUSIVE && lktype != LK_UPGRADE && 347 lktype != LK_TRYUPGRADE)) { 348 NFSUNLOCKNODE(np); 349 return (0); 350 } 351 onfault = (ap->a_flags & LK_EATTR_MASK) == LK_NOWAIT && 352 (ap->a_flags & LK_INIT_MASK) == LK_CANRECURSE && 353 (lktype == LK_SHARED || lktype == LK_EXCLUSIVE); 354 if (onfault && vp->v_vnlock->lk_recurse == 0) { 355 /* 356 * Force retry in vm_fault(), to make the lock request 357 * sleepable, which allows us to piggy-back the 358 * sleepable call to vnode_pager_setsize(). 359 */ 360 NFSUNLOCKNODE(np); 361 VOP_UNLOCK(vp); 362 return (EBUSY); 363 } 364 if ((ap->a_flags & LK_NOWAIT) != 0 || 365 (lktype == LK_SHARED && vp->v_vnlock->lk_recurse > 0)) { 366 NFSUNLOCKNODE(np); 367 return (0); 368 } 369 if (lktype == LK_SHARED) { 370 NFSUNLOCKNODE(np); 371 VOP_UNLOCK(vp); 372 ap->a_flags &= ~(LK_TYPE_MASK | LK_INTERLOCK); 373 ap->a_flags |= LK_EXCLUSIVE; 374 error = VOP_LOCK1_APV(&default_vnodeops, ap); 375 if (error != 0 || vp->v_op != &newnfs_vnodeops) 376 return (error); 377 if (vp->v_data == NULL) 378 goto downgrade; 379 MPASS(vp->v_data == np); 380 NFSLOCKNODE(np); 381 if ((np->n_flag & NVNSETSZSKIP) == 0) { 382 NFSUNLOCKNODE(np); 383 goto downgrade; 384 } 385 } 386 np->n_flag &= ~NVNSETSZSKIP; 387 nsize = np->n_size; 388 NFSUNLOCKNODE(np); 389 vnode_pager_setsize(vp, nsize); 390 downgrade: 391 if (lktype == LK_SHARED) { 392 ap->a_flags &= ~(LK_TYPE_MASK | LK_INTERLOCK); 393 ap->a_flags |= LK_DOWNGRADE; 394 (void)VOP_LOCK1_APV(&default_vnodeops, ap); 395 } 396 return (0); 397 } 398 399 static int 400 nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td, 401 struct ucred *cred, u_int32_t *retmode) 402 { 403 int error = 0, attrflag, i, lrupos; 404 u_int32_t rmode; 405 struct nfsnode *np = VTONFS(vp); 406 struct nfsvattr nfsva; 407 408 error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag, 409 &rmode); 410 if (attrflag) 411 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 412 if (!error) { 413 lrupos = 0; 414 NFSLOCKNODE(np); 415 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 416 if (np->n_accesscache[i].uid == cred->cr_uid) { 417 np->n_accesscache[i].mode = rmode; 418 np->n_accesscache[i].stamp = time_second; 419 break; 420 } 421 if (i > 0 && np->n_accesscache[i].stamp < 422 np->n_accesscache[lrupos].stamp) 423 lrupos = i; 424 } 425 if (i == NFS_ACCESSCACHESIZE) { 426 np->n_accesscache[lrupos].uid = cred->cr_uid; 427 np->n_accesscache[lrupos].mode = rmode; 428 np->n_accesscache[lrupos].stamp = time_second; 429 } 430 NFSUNLOCKNODE(np); 431 if (retmode != NULL) 432 *retmode = rmode; 433 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0); 434 } else if (NFS_ISV4(vp)) { 435 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 436 } 437 #ifdef KDTRACE_HOOKS 438 if (error != 0) 439 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0, 440 error); 441 #endif 442 return (error); 443 } 444 445 /* 446 * nfs access vnode op. 447 * For nfs version 2, just return ok. File accesses may fail later. 448 * For nfs version 3, use the access rpc to check accessibility. If file modes 449 * are changed on the server, accesses might still fail later. 450 */ 451 static int 452 nfs_access(struct vop_access_args *ap) 453 { 454 struct vnode *vp = ap->a_vp; 455 int error = 0, i, gotahit; 456 u_int32_t mode, wmode, rmode; 457 int v34 = NFS_ISV34(vp); 458 struct nfsnode *np = VTONFS(vp); 459 460 /* 461 * Disallow write attempts on filesystems mounted read-only; 462 * unless the file is a socket, fifo, or a block or character 463 * device resident on the filesystem. 464 */ 465 if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS | 466 VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL | 467 VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) { 468 switch (vp->v_type) { 469 case VREG: 470 case VDIR: 471 case VLNK: 472 return (EROFS); 473 default: 474 break; 475 } 476 } 477 /* 478 * For nfs v3 or v4, check to see if we have done this recently, and if 479 * so return our cached result instead of making an ACCESS call. 480 * If not, do an access rpc, otherwise you are stuck emulating 481 * ufs_access() locally using the vattr. This may not be correct, 482 * since the server may apply other access criteria such as 483 * client uid-->server uid mapping that we do not know about. 484 */ 485 if (v34) { 486 if (ap->a_accmode & VREAD) 487 mode = NFSACCESS_READ; 488 else 489 mode = 0; 490 if (vp->v_type != VDIR) { 491 if (ap->a_accmode & VWRITE) 492 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 493 if (ap->a_accmode & VAPPEND) 494 mode |= NFSACCESS_EXTEND; 495 if (ap->a_accmode & VEXEC) 496 mode |= NFSACCESS_EXECUTE; 497 if (ap->a_accmode & VDELETE) 498 mode |= NFSACCESS_DELETE; 499 } else { 500 if (ap->a_accmode & VWRITE) 501 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 502 if (ap->a_accmode & VAPPEND) 503 mode |= NFSACCESS_EXTEND; 504 if (ap->a_accmode & VEXEC) 505 mode |= NFSACCESS_LOOKUP; 506 if (ap->a_accmode & VDELETE) 507 mode |= NFSACCESS_DELETE; 508 if (ap->a_accmode & VDELETE_CHILD) 509 mode |= NFSACCESS_MODIFY; 510 } 511 /* XXX safety belt, only make blanket request if caching */ 512 if (nfsaccess_cache_timeout > 0) { 513 wmode = NFSACCESS_READ | NFSACCESS_MODIFY | 514 NFSACCESS_EXTEND | NFSACCESS_EXECUTE | 515 NFSACCESS_DELETE | NFSACCESS_LOOKUP; 516 } else { 517 wmode = mode; 518 } 519 520 /* 521 * Does our cached result allow us to give a definite yes to 522 * this request? 523 */ 524 gotahit = 0; 525 NFSLOCKNODE(np); 526 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 527 if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) { 528 if (time_second < (np->n_accesscache[i].stamp 529 + nfsaccess_cache_timeout) && 530 (np->n_accesscache[i].mode & mode) == mode) { 531 NFSINCRGLOBAL(nfsstatsv1.accesscache_hits); 532 gotahit = 1; 533 } 534 break; 535 } 536 } 537 NFSUNLOCKNODE(np); 538 #ifdef KDTRACE_HOOKS 539 if (gotahit != 0) 540 KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, 541 ap->a_cred->cr_uid, mode); 542 else 543 KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, 544 ap->a_cred->cr_uid, mode); 545 #endif 546 if (gotahit == 0) { 547 /* 548 * Either a no, or a don't know. Go to the wire. 549 */ 550 NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); 551 error = nfs34_access_otw(vp, wmode, ap->a_td, 552 ap->a_cred, &rmode); 553 if (!error && 554 (rmode & mode) != mode) 555 error = EACCES; 556 } 557 return (error); 558 } else { 559 if ((error = nfsspec_access(ap)) != 0) { 560 return (error); 561 } 562 /* 563 * Attempt to prevent a mapped root from accessing a file 564 * which it shouldn't. We try to read a byte from the file 565 * if the user is root and the file is not zero length. 566 * After calling nfsspec_access, we should have the correct 567 * file size cached. 568 */ 569 NFSLOCKNODE(np); 570 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) 571 && VTONFS(vp)->n_size > 0) { 572 struct iovec aiov; 573 struct uio auio; 574 char buf[1]; 575 576 NFSUNLOCKNODE(np); 577 aiov.iov_base = buf; 578 aiov.iov_len = 1; 579 auio.uio_iov = &aiov; 580 auio.uio_iovcnt = 1; 581 auio.uio_offset = 0; 582 auio.uio_resid = 1; 583 auio.uio_segflg = UIO_SYSSPACE; 584 auio.uio_rw = UIO_READ; 585 auio.uio_td = ap->a_td; 586 587 if (vp->v_type == VREG) 588 error = ncl_readrpc(vp, &auio, ap->a_cred); 589 else if (vp->v_type == VDIR) { 590 char* bp; 591 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); 592 aiov.iov_base = bp; 593 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; 594 error = ncl_readdirrpc(vp, &auio, ap->a_cred, 595 ap->a_td); 596 free(bp, M_TEMP); 597 } else if (vp->v_type == VLNK) 598 error = ncl_readlinkrpc(vp, &auio, ap->a_cred); 599 else 600 error = EACCES; 601 } else 602 NFSUNLOCKNODE(np); 603 return (error); 604 } 605 } 606 607 /* 608 * nfs open vnode op 609 * Check to see if the type is ok 610 * and that deletion is not in progress. 611 * For paged in text files, you will need to flush the page cache 612 * if consistency is lost. 613 */ 614 /* ARGSUSED */ 615 static int 616 nfs_open(struct vop_open_args *ap) 617 { 618 struct vnode *vp = ap->a_vp; 619 struct nfsnode *np = VTONFS(vp); 620 struct vattr vattr; 621 int error; 622 int fmode = ap->a_mode; 623 struct ucred *cred; 624 vm_object_t obj; 625 626 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) 627 return (EOPNOTSUPP); 628 629 /* 630 * For NFSv4, we need to do the Open Op before cache validation, 631 * so that we conform to RFC3530 Sec. 9.3.1. 632 */ 633 if (NFS_ISV4(vp)) { 634 error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td); 635 if (error) { 636 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 637 (gid_t)0); 638 return (error); 639 } 640 } 641 642 /* 643 * Now, if this Open will be doing reading, re-validate/flush the 644 * cache, so that Close/Open coherency is maintained. 645 */ 646 NFSLOCKNODE(np); 647 if (np->n_flag & NMODIFIED) { 648 NFSUNLOCKNODE(np); 649 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 650 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 651 if (VN_IS_DOOMED(vp)) 652 return (EBADF); 653 } 654 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 655 if (error == EINTR || error == EIO) { 656 if (NFS_ISV4(vp)) 657 (void) nfsrpc_close(vp, 0, ap->a_td); 658 return (error); 659 } 660 NFSLOCKNODE(np); 661 np->n_attrstamp = 0; 662 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 663 if (vp->v_type == VDIR) 664 np->n_direofoffset = 0; 665 NFSUNLOCKNODE(np); 666 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 667 if (error) { 668 if (NFS_ISV4(vp)) 669 (void) nfsrpc_close(vp, 0, ap->a_td); 670 return (error); 671 } 672 NFSLOCKNODE(np); 673 np->n_mtime = vattr.va_mtime; 674 if (NFS_ISV4(vp)) 675 np->n_change = vattr.va_filerev; 676 } else { 677 NFSUNLOCKNODE(np); 678 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 679 if (error) { 680 if (NFS_ISV4(vp)) 681 (void) nfsrpc_close(vp, 0, ap->a_td); 682 return (error); 683 } 684 NFSLOCKNODE(np); 685 if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) || 686 NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 687 if (vp->v_type == VDIR) 688 np->n_direofoffset = 0; 689 NFSUNLOCKNODE(np); 690 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 691 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 692 if (VN_IS_DOOMED(vp)) 693 return (EBADF); 694 } 695 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 696 if (error == EINTR || error == EIO) { 697 if (NFS_ISV4(vp)) 698 (void) nfsrpc_close(vp, 0, ap->a_td); 699 return (error); 700 } 701 NFSLOCKNODE(np); 702 np->n_mtime = vattr.va_mtime; 703 if (NFS_ISV4(vp)) 704 np->n_change = vattr.va_filerev; 705 } 706 } 707 708 /* 709 * If the object has >= 1 O_DIRECT active opens, we disable caching. 710 */ 711 if (newnfs_directio_enable && (fmode & O_DIRECT) && 712 (vp->v_type == VREG)) { 713 if (np->n_directio_opens == 0) { 714 NFSUNLOCKNODE(np); 715 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 716 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 717 if (VN_IS_DOOMED(vp)) 718 return (EBADF); 719 } 720 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 721 if (error) { 722 if (NFS_ISV4(vp)) 723 (void) nfsrpc_close(vp, 0, ap->a_td); 724 return (error); 725 } 726 NFSLOCKNODE(np); 727 np->n_flag |= NNONCACHE; 728 } 729 np->n_directio_opens++; 730 } 731 732 /* If opened for writing via NFSv4.1 or later, mark that for pNFS. */ 733 if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0) 734 np->n_flag |= NWRITEOPENED; 735 736 /* 737 * If this is an open for writing, capture a reference to the 738 * credentials, so they can be used by ncl_putpages(). Using 739 * these write credentials is preferable to the credentials of 740 * whatever thread happens to be doing the VOP_PUTPAGES() since 741 * the write RPCs are less likely to fail with EACCES. 742 */ 743 if ((fmode & FWRITE) != 0) { 744 cred = np->n_writecred; 745 np->n_writecred = crhold(ap->a_cred); 746 } else 747 cred = NULL; 748 NFSUNLOCKNODE(np); 749 750 if (cred != NULL) 751 crfree(cred); 752 vnode_create_vobject(vp, vattr.va_size, ap->a_td); 753 754 /* 755 * If the text file has been mmap'd, flush any dirty pages to the 756 * buffer cache and then... 757 * Make sure all writes are pushed to the NFS server. If this is not 758 * done, the modify time of the file can change while the text 759 * file is being executed. This will cause the process that is 760 * executing the text file to be terminated. 761 */ 762 if (vp->v_writecount <= -1) { 763 if ((obj = vp->v_object) != NULL && 764 vm_object_mightbedirty(obj)) { 765 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 766 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 767 if (VN_IS_DOOMED(vp)) 768 return (EBADF); 769 } 770 vnode_pager_clean_sync(vp); 771 } 772 773 /* Now, flush the buffer cache. */ 774 ncl_flush(vp, MNT_WAIT, curthread, 0, 0); 775 776 /* And, finally, make sure that n_mtime is up to date. */ 777 np = VTONFS(vp); 778 NFSLOCKNODE(np); 779 np->n_mtime = np->n_vattr.na_mtime; 780 NFSUNLOCKNODE(np); 781 } 782 return (0); 783 } 784 785 /* 786 * nfs close vnode op 787 * What an NFS client should do upon close after writing is a debatable issue. 788 * Most NFS clients push delayed writes to the server upon close, basically for 789 * two reasons: 790 * 1 - So that any write errors may be reported back to the client process 791 * doing the close system call. By far the two most likely errors are 792 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. 793 * 2 - To put a worst case upper bound on cache inconsistency between 794 * multiple clients for the file. 795 * There is also a consistency problem for Version 2 of the protocol w.r.t. 796 * not being able to tell if other clients are writing a file concurrently, 797 * since there is no way of knowing if the changed modify time in the reply 798 * is only due to the write for this client. 799 * (NFS Version 3 provides weak cache consistency data in the reply that 800 * should be sufficient to detect and handle this case.) 801 * 802 * The current code does the following: 803 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers 804 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate 805 * or commit them (this satisfies 1 and 2 except for the 806 * case where the server crashes after this close but 807 * before the commit RPC, which is felt to be "good 808 * enough". Changing the last argument to ncl_flush() to 809 * a 1 would force a commit operation, if it is felt a 810 * commit is necessary now. 811 * for NFS Version 4 - flush the dirty buffers and commit them, if 812 * nfscl_mustflush() says this is necessary. 813 * It is necessary if there is no write delegation held, 814 * in order to satisfy open/close coherency. 815 * If the file isn't cached on local stable storage, 816 * it may be necessary in order to detect "out of space" 817 * errors from the server, if the write delegation 818 * issued by the server doesn't allow the file to grow. 819 */ 820 /* ARGSUSED */ 821 static int 822 nfs_close(struct vop_close_args *ap) 823 { 824 struct vnode *vp = ap->a_vp; 825 struct nfsnode *np = VTONFS(vp); 826 struct nfsvattr nfsva; 827 struct ucred *cred; 828 int error = 0, ret, localcred = 0; 829 int fmode = ap->a_fflag; 830 831 if (NFSCL_FORCEDISM(vp->v_mount)) 832 return (0); 833 /* 834 * During shutdown, a_cred isn't valid, so just use root. 835 */ 836 if (ap->a_cred == NOCRED) { 837 cred = newnfs_getcred(); 838 localcred = 1; 839 } else { 840 cred = ap->a_cred; 841 } 842 if (vp->v_type == VREG) { 843 /* 844 * Examine and clean dirty pages, regardless of NMODIFIED. 845 * This closes a major hole in close-to-open consistency. 846 * We want to push out all dirty pages (and buffers) on 847 * close, regardless of whether they were dirtied by 848 * mmap'ed writes or via write(). 849 */ 850 if (nfs_clean_pages_on_close && vp->v_object) { 851 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 852 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 853 if (VN_IS_DOOMED(vp) && ap->a_fflag != FNONBLOCK) 854 return (EBADF); 855 } 856 vnode_pager_clean_async(vp); 857 } 858 NFSLOCKNODE(np); 859 if (np->n_flag & NMODIFIED) { 860 NFSUNLOCKNODE(np); 861 if (NFS_ISV3(vp)) { 862 /* 863 * Under NFSv3 we have dirty buffers to dispose of. We 864 * must flush them to the NFS server. We have the option 865 * of waiting all the way through the commit rpc or just 866 * waiting for the initial write. The default is to only 867 * wait through the initial write so the data is in the 868 * server's cache, which is roughly similar to the state 869 * a standard disk subsystem leaves the file in on close(). 870 * 871 * We cannot clear the NMODIFIED bit in np->n_flag due to 872 * potential races with other processes, and certainly 873 * cannot clear it if we don't commit. 874 * These races occur when there is no longer the old 875 * traditional vnode locking implemented for Vnode Ops. 876 */ 877 int cm = newnfs_commit_on_close ? 1 : 0; 878 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 879 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 880 if (VN_IS_DOOMED(vp) && ap->a_fflag != FNONBLOCK) 881 return (EBADF); 882 } 883 error = ncl_flush(vp, MNT_WAIT, ap->a_td, cm, 0); 884 /* np->n_flag &= ~NMODIFIED; */ 885 } else if (NFS_ISV4(vp)) { 886 if (nfscl_mustflush(vp) != 0) { 887 int cm = newnfs_commit_on_close ? 1 : 0; 888 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 889 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 890 if (VN_IS_DOOMED(vp) && ap->a_fflag != 891 FNONBLOCK) 892 return (EBADF); 893 } 894 error = ncl_flush(vp, MNT_WAIT, ap->a_td, 895 cm, 0); 896 /* 897 * as above w.r.t races when clearing 898 * NMODIFIED. 899 * np->n_flag &= ~NMODIFIED; 900 */ 901 } 902 } else { 903 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 904 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 905 if (VN_IS_DOOMED(vp) && ap->a_fflag != 906 FNONBLOCK) 907 return (EBADF); 908 } 909 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 910 } 911 NFSLOCKNODE(np); 912 } 913 /* 914 * Invalidate the attribute cache in all cases. 915 * An open is going to fetch fresh attrs any way, other procs 916 * on this node that have file open will be forced to do an 917 * otw attr fetch, but this is safe. 918 * --> A user found that their RPC count dropped by 20% when 919 * this was commented out and I can't see any requirement 920 * for it, so I've disabled it when negative lookups are 921 * enabled. (What does this have to do with negative lookup 922 * caching? Well nothing, except it was reported by the 923 * same user that needed negative lookup caching and I wanted 924 * there to be a way to disable it to see if it 925 * is the cause of some caching/coherency issue that might 926 * crop up.) 927 */ 928 if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) { 929 np->n_attrstamp = 0; 930 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 931 } 932 if (np->n_flag & NWRITEERR) { 933 np->n_flag &= ~NWRITEERR; 934 error = np->n_error; 935 } 936 NFSUNLOCKNODE(np); 937 } 938 939 if (NFS_ISV4(vp)) { 940 /* 941 * Get attributes so "change" is up to date. 942 */ 943 if (error == 0 && nfscl_nodeleg(vp, 0) != 0 && 944 vp->v_type == VREG && 945 (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) == 0) { 946 ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva); 947 if (!ret) { 948 np->n_change = nfsva.na_filerev; 949 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 950 0, 0); 951 } 952 } 953 954 /* 955 * and do the close. 956 */ 957 ret = nfsrpc_close(vp, 0, ap->a_td); 958 if (!error && ret) 959 error = ret; 960 if (error) 961 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 962 (gid_t)0); 963 } 964 if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 965 NFSLOCKNODE(np); 966 KASSERT((np->n_directio_opens > 0), 967 ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); 968 np->n_directio_opens--; 969 if (np->n_directio_opens == 0) 970 np->n_flag &= ~NNONCACHE; 971 NFSUNLOCKNODE(np); 972 } 973 if (localcred) 974 NFSFREECRED(cred); 975 return (error); 976 } 977 978 /* 979 * nfs getattr call from vfs. 980 */ 981 static int 982 nfs_getattr(struct vop_getattr_args *ap) 983 { 984 struct vnode *vp = ap->a_vp; 985 struct thread *td = curthread; /* XXX */ 986 struct nfsnode *np = VTONFS(vp); 987 int error = 0; 988 struct nfsvattr nfsva; 989 struct vattr *vap = ap->a_vap; 990 struct vattr vattr; 991 struct nfsmount *nmp; 992 993 nmp = VFSTONFS(vp->v_mount); 994 /* 995 * Update local times for special files. 996 */ 997 NFSLOCKNODE(np); 998 if (np->n_flag & (NACC | NUPD)) 999 np->n_flag |= NCHG; 1000 NFSUNLOCKNODE(np); 1001 /* 1002 * First look in the cache. 1003 * For "syskrb5" mounts, nm_fhsize might still be zero and 1004 * cached attributes should be ignored. 1005 */ 1006 if (nmp->nm_fhsize > 0 && ncl_getattrcache(vp, &vattr) == 0) { 1007 ncl_copy_vattr(vap, &vattr); 1008 1009 /* 1010 * Get the local modify time for the case of a write 1011 * delegation. 1012 */ 1013 nfscl_deleggetmodtime(vp, &vap->va_mtime); 1014 return (0); 1015 } 1016 1017 if (NFS_ISV34(vp) && nfs_prime_access_cache && 1018 nfsaccess_cache_timeout > 0) { 1019 NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); 1020 nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL); 1021 if (ncl_getattrcache(vp, ap->a_vap) == 0) { 1022 nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime); 1023 return (0); 1024 } 1025 } 1026 error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva); 1027 if (!error) 1028 error = nfscl_loadattrcache(&vp, &nfsva, vap, 0, 0); 1029 if (!error) { 1030 /* 1031 * Get the local modify time for the case of a write 1032 * delegation. 1033 */ 1034 nfscl_deleggetmodtime(vp, &vap->va_mtime); 1035 } else if (NFS_ISV4(vp)) { 1036 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 1037 } 1038 return (error); 1039 } 1040 1041 /* 1042 * nfs setattr call. 1043 */ 1044 static int 1045 nfs_setattr(struct vop_setattr_args *ap) 1046 { 1047 struct vnode *vp = ap->a_vp; 1048 struct nfsnode *np = VTONFS(vp); 1049 struct thread *td = curthread; /* XXX */ 1050 struct vattr *vap = ap->a_vap; 1051 int error = 0; 1052 u_quad_t tsize; 1053 struct timespec ts; 1054 1055 #ifndef nolint 1056 tsize = (u_quad_t)0; 1057 #endif 1058 1059 /* 1060 * Setting of flags and marking of atimes are not supported. 1061 */ 1062 if (vap->va_flags != VNOVAL) 1063 return (EOPNOTSUPP); 1064 1065 /* 1066 * Disallow write attempts if the filesystem is mounted read-only. 1067 */ 1068 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 1069 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 1070 vap->va_mtime.tv_sec != VNOVAL || 1071 vap->va_birthtime.tv_sec != VNOVAL || 1072 vap->va_mode != (mode_t)VNOVAL) && 1073 (vp->v_mount->mnt_flag & MNT_RDONLY)) 1074 return (EROFS); 1075 if (vap->va_size != VNOVAL) { 1076 switch (vp->v_type) { 1077 case VDIR: 1078 return (EISDIR); 1079 case VCHR: 1080 case VBLK: 1081 case VSOCK: 1082 case VFIFO: 1083 if (vap->va_mtime.tv_sec == VNOVAL && 1084 vap->va_atime.tv_sec == VNOVAL && 1085 vap->va_birthtime.tv_sec == VNOVAL && 1086 vap->va_mode == (mode_t)VNOVAL && 1087 vap->va_uid == (uid_t)VNOVAL && 1088 vap->va_gid == (gid_t)VNOVAL) 1089 return (0); 1090 vap->va_size = VNOVAL; 1091 break; 1092 default: 1093 /* 1094 * Disallow write attempts if the filesystem is 1095 * mounted read-only. 1096 */ 1097 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1098 return (EROFS); 1099 /* 1100 * We run vnode_pager_setsize() early (why?), 1101 * we must set np->n_size now to avoid vinvalbuf 1102 * V_SAVE races that might setsize a lower 1103 * value. 1104 */ 1105 NFSLOCKNODE(np); 1106 tsize = np->n_size; 1107 NFSUNLOCKNODE(np); 1108 error = ncl_meta_setsize(vp, td, vap->va_size); 1109 NFSLOCKNODE(np); 1110 if (np->n_flag & NMODIFIED) { 1111 tsize = np->n_size; 1112 NFSUNLOCKNODE(np); 1113 error = ncl_vinvalbuf(vp, vap->va_size == 0 ? 1114 0 : V_SAVE, td, 1); 1115 if (error != 0) { 1116 vnode_pager_setsize(vp, tsize); 1117 return (error); 1118 } 1119 /* 1120 * Call nfscl_delegmodtime() to set the modify time 1121 * locally, as required. 1122 */ 1123 nfscl_delegmodtime(vp); 1124 } else 1125 NFSUNLOCKNODE(np); 1126 /* 1127 * np->n_size has already been set to vap->va_size 1128 * in ncl_meta_setsize(). We must set it again since 1129 * nfs_loadattrcache() could be called through 1130 * ncl_meta_setsize() and could modify np->n_size. 1131 */ 1132 NFSLOCKNODE(np); 1133 np->n_vattr.na_size = np->n_size = vap->va_size; 1134 NFSUNLOCKNODE(np); 1135 } 1136 } else { 1137 NFSLOCKNODE(np); 1138 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 1139 (np->n_flag & NMODIFIED) && vp->v_type == VREG) { 1140 NFSUNLOCKNODE(np); 1141 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 1142 if (error == EINTR || error == EIO) 1143 return (error); 1144 } else 1145 NFSUNLOCKNODE(np); 1146 } 1147 error = nfs_setattrrpc(vp, vap, ap->a_cred, td); 1148 if (vap->va_size != VNOVAL) { 1149 if (error == 0) { 1150 nanouptime(&ts); 1151 NFSLOCKNODE(np); 1152 np->n_localmodtime = ts; 1153 NFSUNLOCKNODE(np); 1154 } else { 1155 NFSLOCKNODE(np); 1156 np->n_size = np->n_vattr.na_size = tsize; 1157 vnode_pager_setsize(vp, tsize); 1158 NFSUNLOCKNODE(np); 1159 } 1160 } 1161 return (error); 1162 } 1163 1164 /* 1165 * Do an nfs setattr rpc. 1166 */ 1167 static int 1168 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred, 1169 struct thread *td) 1170 { 1171 struct nfsnode *np = VTONFS(vp); 1172 int error, ret, attrflag, i; 1173 struct nfsvattr nfsva; 1174 1175 if (NFS_ISV34(vp)) { 1176 NFSLOCKNODE(np); 1177 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) 1178 np->n_accesscache[i].stamp = 0; 1179 np->n_flag |= NDELEGMOD; 1180 NFSUNLOCKNODE(np); 1181 KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); 1182 } 1183 error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag); 1184 if (attrflag) { 1185 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 1186 if (ret && !error) 1187 error = ret; 1188 } 1189 if (error && NFS_ISV4(vp)) 1190 error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid); 1191 return (error); 1192 } 1193 1194 /* 1195 * nfs lookup call, one step at a time... 1196 * First look in cache 1197 * If not found, unlock the directory nfsnode and do the rpc 1198 */ 1199 static int 1200 nfs_lookup(struct vop_lookup_args *ap) 1201 { 1202 struct componentname *cnp = ap->a_cnp; 1203 struct vnode *dvp = ap->a_dvp; 1204 struct vnode **vpp = ap->a_vpp; 1205 struct mount *mp = dvp->v_mount; 1206 int flags = cnp->cn_flags; 1207 struct vnode *newvp; 1208 struct nfsmount *nmp; 1209 struct nfsnode *np, *newnp; 1210 int error = 0, attrflag, dattrflag, ltype, ncticks; 1211 struct thread *td = curthread; 1212 struct nfsfh *nfhp; 1213 struct nfsvattr dnfsva, nfsva; 1214 struct vattr vattr; 1215 struct timespec nctime, ts; 1216 uint32_t openmode; 1217 1218 *vpp = NULLVP; 1219 if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && 1220 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 1221 return (EROFS); 1222 if (dvp->v_type != VDIR) 1223 return (ENOTDIR); 1224 nmp = VFSTONFS(mp); 1225 np = VTONFS(dvp); 1226 1227 /* For NFSv4, wait until any remove is done. */ 1228 NFSLOCKNODE(np); 1229 while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) { 1230 np->n_flag |= NREMOVEWANT; 1231 (void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0); 1232 } 1233 NFSUNLOCKNODE(np); 1234 1235 error = vn_dir_check_exec(dvp, cnp); 1236 if (error != 0) 1237 return (error); 1238 error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks); 1239 if (error > 0 && error != ENOENT) 1240 return (error); 1241 if (error == -1) { 1242 /* 1243 * Lookups of "." are special and always return the 1244 * current directory. cache_lookup() already handles 1245 * associated locking bookkeeping, etc. 1246 */ 1247 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { 1248 return (0); 1249 } 1250 1251 /* 1252 * We only accept a positive hit in the cache if the 1253 * change time of the file matches our cached copy. 1254 * Otherwise, we discard the cache entry and fallback 1255 * to doing a lookup RPC. We also only trust cache 1256 * entries for less than nm_nametimeo seconds. 1257 * 1258 * To better handle stale file handles and attributes, 1259 * clear the attribute cache of this node if it is a 1260 * leaf component, part of an open() call, and not 1261 * locally modified before fetching the attributes. 1262 * This should allow stale file handles to be detected 1263 * here where we can fall back to a LOOKUP RPC to 1264 * recover rather than having nfs_open() detect the 1265 * stale file handle and failing open(2) with ESTALE. 1266 */ 1267 newvp = *vpp; 1268 newnp = VTONFS(newvp); 1269 if (!(nmp->nm_flag & NFSMNT_NOCTO) && 1270 (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && 1271 !(newnp->n_flag & NMODIFIED)) { 1272 NFSLOCKNODE(newnp); 1273 newnp->n_attrstamp = 0; 1274 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); 1275 NFSUNLOCKNODE(newnp); 1276 } 1277 if (nfscl_nodeleg(newvp, 0) == 0 || 1278 ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) && 1279 VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 && 1280 timespeccmp(&vattr.va_ctime, &nctime, ==))) { 1281 NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); 1282 return (0); 1283 } 1284 cache_purge(newvp); 1285 if (dvp != newvp) 1286 vput(newvp); 1287 else 1288 vrele(newvp); 1289 *vpp = NULLVP; 1290 } else if (error == ENOENT) { 1291 if (VN_IS_DOOMED(dvp)) 1292 return (ENOENT); 1293 /* 1294 * We only accept a negative hit in the cache if the 1295 * modification time of the parent directory matches 1296 * the cached copy in the name cache entry. 1297 * Otherwise, we discard all of the negative cache 1298 * entries for this directory. We also only trust 1299 * negative cache entries for up to nm_negnametimeo 1300 * seconds. 1301 */ 1302 if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) && 1303 VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && 1304 timespeccmp(&vattr.va_mtime, &nctime, ==)) { 1305 NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); 1306 return (ENOENT); 1307 } 1308 cache_purge_negative(dvp); 1309 } 1310 1311 openmode = 0; 1312 /* 1313 * If this an NFSv4.1/4.2 mount using the "oneopenown" mount 1314 * option, it is possible to do the Open operation in the same 1315 * compound as Lookup, so long as delegations are not being 1316 * issued. This saves doing a separate RPC for Open. 1317 * For pnfs, do not do this, since the Open+LayoutGet will 1318 * be needed as a separate RPC. 1319 */ 1320 NFSLOCKMNT(nmp); 1321 if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp) && !NFSHASPNFS(nmp) && 1322 (nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0 && 1323 (!NFSMNT_RDONLY(mp) || (flags & OPENWRITE) == 0) && 1324 (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN)) { 1325 if ((flags & OPENREAD) != 0) 1326 openmode |= NFSV4OPEN_ACCESSREAD; 1327 if ((flags & OPENWRITE) != 0) 1328 openmode |= NFSV4OPEN_ACCESSWRITE; 1329 } 1330 NFSUNLOCKMNT(nmp); 1331 1332 newvp = NULLVP; 1333 NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses); 1334 nanouptime(&ts); 1335 error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1336 cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1337 openmode); 1338 if (dattrflag) 1339 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 1340 if (error) { 1341 if (newvp != NULLVP) { 1342 vput(newvp); 1343 *vpp = NULLVP; 1344 } 1345 1346 if (error != ENOENT) { 1347 if (NFS_ISV4(dvp)) 1348 error = nfscl_maperr(td, error, (uid_t)0, 1349 (gid_t)0); 1350 return (error); 1351 } 1352 1353 /* The requested file was not found. */ 1354 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && 1355 (flags & ISLASTCN)) { 1356 /* 1357 * XXX: UFS does a full VOP_ACCESS(dvp, 1358 * VWRITE) here instead of just checking 1359 * MNT_RDONLY. 1360 */ 1361 if (mp->mnt_flag & MNT_RDONLY) 1362 return (EROFS); 1363 return (EJUSTRETURN); 1364 } 1365 1366 if ((cnp->cn_flags & MAKEENTRY) != 0 && dattrflag) { 1367 /* 1368 * Cache the modification time of the parent 1369 * directory from the post-op attributes in 1370 * the name cache entry. The negative cache 1371 * entry will be ignored once the directory 1372 * has changed. Don't bother adding the entry 1373 * if the directory has already changed. 1374 */ 1375 NFSLOCKNODE(np); 1376 if (timespeccmp(&np->n_vattr.na_mtime, 1377 &dnfsva.na_mtime, ==)) { 1378 NFSUNLOCKNODE(np); 1379 cache_enter_time(dvp, NULL, cnp, 1380 &dnfsva.na_mtime, NULL); 1381 } else 1382 NFSUNLOCKNODE(np); 1383 } 1384 return (ENOENT); 1385 } 1386 1387 /* 1388 * Handle RENAME case... 1389 */ 1390 if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { 1391 if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1392 free(nfhp, M_NFSFH); 1393 return (EISDIR); 1394 } 1395 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, LK_EXCLUSIVE); 1396 if (error) 1397 return (error); 1398 newvp = NFSTOV(np); 1399 /* 1400 * If n_localmodtime >= time before RPC, then 1401 * a file modification operation, such as 1402 * VOP_SETATTR() of size, has occurred while 1403 * the Lookup RPC and acquisition of the vnode 1404 * happened. As such, the attributes might 1405 * be stale, with possibly an incorrect size. 1406 */ 1407 NFSLOCKNODE(np); 1408 if (timespecisset(&np->n_localmodtime) && 1409 timespeccmp(&np->n_localmodtime, &ts, >=)) { 1410 NFSCL_DEBUG(4, "nfs_lookup: rename localmod " 1411 "stale attributes\n"); 1412 attrflag = 0; 1413 } 1414 NFSUNLOCKNODE(np); 1415 if (attrflag) 1416 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 1417 *vpp = newvp; 1418 return (0); 1419 } 1420 1421 if (flags & ISDOTDOT) { 1422 ltype = NFSVOPISLOCKED(dvp); 1423 error = vfs_busy(mp, MBF_NOWAIT); 1424 if (error != 0) { 1425 vfs_ref(mp); 1426 NFSVOPUNLOCK(dvp); 1427 error = vfs_busy(mp, 0); 1428 NFSVOPLOCK(dvp, ltype | LK_RETRY); 1429 vfs_rel(mp); 1430 if (error == 0 && VN_IS_DOOMED(dvp)) { 1431 vfs_unbusy(mp); 1432 error = ENOENT; 1433 } 1434 if (error != 0) 1435 return (error); 1436 } 1437 NFSVOPUNLOCK(dvp); 1438 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, 1439 cnp->cn_lkflags); 1440 if (error == 0) 1441 newvp = NFSTOV(np); 1442 vfs_unbusy(mp); 1443 if (newvp != dvp) 1444 NFSVOPLOCK(dvp, ltype | LK_RETRY); 1445 if (VN_IS_DOOMED(dvp)) { 1446 if (error == 0) { 1447 if (newvp == dvp) 1448 vrele(newvp); 1449 else 1450 vput(newvp); 1451 } 1452 error = ENOENT; 1453 } 1454 if (error != 0) 1455 return (error); 1456 if (attrflag) 1457 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 1458 } else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1459 free(nfhp, M_NFSFH); 1460 VREF(dvp); 1461 newvp = dvp; 1462 if (attrflag) 1463 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 1464 } else { 1465 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, 1466 cnp->cn_lkflags); 1467 if (error) 1468 return (error); 1469 newvp = NFSTOV(np); 1470 /* 1471 * If n_localmodtime >= time before RPC, then 1472 * a file modification operation, such as 1473 * VOP_SETATTR() of size, has occurred while 1474 * the Lookup RPC and acquisition of the vnode 1475 * happened. As such, the attributes might 1476 * be stale, with possibly an incorrect size. 1477 */ 1478 NFSLOCKNODE(np); 1479 if (timespecisset(&np->n_localmodtime) && 1480 timespeccmp(&np->n_localmodtime, &ts, >=)) { 1481 NFSCL_DEBUG(4, "nfs_lookup: localmod " 1482 "stale attributes\n"); 1483 attrflag = 0; 1484 } 1485 NFSUNLOCKNODE(np); 1486 if (attrflag) 1487 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 1488 else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && 1489 !(np->n_flag & NMODIFIED)) { 1490 /* 1491 * Flush the attribute cache when opening a 1492 * leaf node to ensure that fresh attributes 1493 * are fetched in nfs_open() since we did not 1494 * fetch attributes from the LOOKUP reply. 1495 */ 1496 NFSLOCKNODE(np); 1497 np->n_attrstamp = 0; 1498 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); 1499 NFSUNLOCKNODE(np); 1500 } 1501 } 1502 if ((cnp->cn_flags & MAKEENTRY) && dvp != newvp && 1503 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) && 1504 attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0)) 1505 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, 1506 newvp->v_type != VDIR ? NULL : &dnfsva.na_ctime); 1507 *vpp = newvp; 1508 return (0); 1509 } 1510 1511 /* 1512 * nfs read call. 1513 * Just call ncl_bioread() to do the work. 1514 */ 1515 static int 1516 nfs_read(struct vop_read_args *ap) 1517 { 1518 struct vnode *vp = ap->a_vp; 1519 1520 switch (vp->v_type) { 1521 case VREG: 1522 return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); 1523 case VDIR: 1524 return (EISDIR); 1525 default: 1526 return (EOPNOTSUPP); 1527 } 1528 } 1529 1530 /* 1531 * nfs readlink call 1532 */ 1533 static int 1534 nfs_readlink(struct vop_readlink_args *ap) 1535 { 1536 struct vnode *vp = ap->a_vp; 1537 1538 if (vp->v_type != VLNK) 1539 return (EINVAL); 1540 return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred)); 1541 } 1542 1543 /* 1544 * Do a readlink rpc. 1545 * Called by ncl_doio() from below the buffer cache. 1546 */ 1547 int 1548 ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1549 { 1550 int error, ret, attrflag; 1551 struct nfsvattr nfsva; 1552 1553 error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva, 1554 &attrflag); 1555 if (attrflag) { 1556 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 1557 if (ret && !error) 1558 error = ret; 1559 } 1560 if (error && NFS_ISV4(vp)) 1561 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1562 return (error); 1563 } 1564 1565 /* 1566 * nfs read rpc call 1567 * Ditto above 1568 */ 1569 int 1570 ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1571 { 1572 int error, ret, attrflag; 1573 struct nfsvattr nfsva; 1574 struct nfsmount *nmp; 1575 1576 nmp = VFSTONFS(vp->v_mount); 1577 error = EIO; 1578 attrflag = 0; 1579 if (NFSHASPNFS(nmp)) 1580 error = nfscl_doiods(vp, uiop, NULL, NULL, 1581 NFSV4OPEN_ACCESSREAD, 0, cred, uiop->uio_td); 1582 NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error); 1583 if (error != 0 && error != EFAULT) 1584 error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, 1585 &attrflag); 1586 if (attrflag) { 1587 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 1588 if (ret && !error) 1589 error = ret; 1590 } 1591 if (error && NFS_ISV4(vp)) 1592 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1593 return (error); 1594 } 1595 1596 /* 1597 * nfs write call 1598 */ 1599 int 1600 ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 1601 int *iomode, int *must_commit, int called_from_strategy, int ioflag) 1602 { 1603 struct nfsvattr nfsva; 1604 int error, attrflag, ret; 1605 struct nfsmount *nmp; 1606 1607 nmp = VFSTONFS(vp->v_mount); 1608 error = EIO; 1609 attrflag = 0; 1610 if (NFSHASPNFS(nmp)) 1611 error = nfscl_doiods(vp, uiop, iomode, must_commit, 1612 NFSV4OPEN_ACCESSWRITE, 0, cred, uiop->uio_td); 1613 NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error); 1614 if (error != 0 && error != EFAULT) 1615 error = nfsrpc_write(vp, uiop, iomode, must_commit, cred, 1616 uiop->uio_td, &nfsva, &attrflag, called_from_strategy, 1617 ioflag); 1618 if (attrflag) { 1619 if (VTONFS(vp)->n_flag & ND_NFSV4) 1620 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 1, 1); 1621 else 1622 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 1623 if (ret && !error) 1624 error = ret; 1625 } 1626 if (DOINGASYNC(vp)) 1627 *iomode = NFSWRITE_FILESYNC; 1628 if (error && NFS_ISV4(vp)) 1629 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1630 return (error); 1631 } 1632 1633 /* 1634 * nfs mknod rpc 1635 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the 1636 * mode set to specify the file type and the size field for rdev. 1637 */ 1638 static int 1639 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, 1640 struct vattr *vap) 1641 { 1642 struct nfsvattr nfsva, dnfsva; 1643 struct vnode *newvp = NULL; 1644 struct nfsnode *np = NULL, *dnp; 1645 struct nfsfh *nfhp; 1646 struct vattr vattr; 1647 int error = 0, attrflag, dattrflag; 1648 u_int32_t rdev; 1649 1650 if (vap->va_type == VCHR || vap->va_type == VBLK) 1651 rdev = vap->va_rdev; 1652 else if (vap->va_type == VFIFO || vap->va_type == VSOCK) 1653 rdev = 0xffffffff; 1654 else 1655 return (EOPNOTSUPP); 1656 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1657 return (error); 1658 error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, 1659 rdev, vap->va_type, cnp->cn_cred, curthread, &dnfsva, 1660 &nfsva, &nfhp, &attrflag, &dattrflag); 1661 if (!error) { 1662 if (!nfhp) 1663 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1664 cnp->cn_namelen, cnp->cn_cred, curthread, 1665 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 0); 1666 if (nfhp) 1667 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1668 curthread, &np, LK_EXCLUSIVE); 1669 } 1670 if (dattrflag) 1671 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 1672 if (!error) { 1673 newvp = NFSTOV(np); 1674 if (attrflag != 0) { 1675 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 1676 if (error != 0) 1677 vput(newvp); 1678 } 1679 } 1680 if (!error) { 1681 *vpp = newvp; 1682 } else if (NFS_ISV4(dvp)) { 1683 error = nfscl_maperr(curthread, error, vap->va_uid, 1684 vap->va_gid); 1685 } 1686 dnp = VTONFS(dvp); 1687 NFSLOCKNODE(dnp); 1688 dnp->n_flag |= NMODIFIED; 1689 if (!dattrflag) { 1690 dnp->n_attrstamp = 0; 1691 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1692 } 1693 NFSUNLOCKNODE(dnp); 1694 return (error); 1695 } 1696 1697 /* 1698 * nfs mknod vop 1699 * just call nfs_mknodrpc() to do the work. 1700 */ 1701 /* ARGSUSED */ 1702 static int 1703 nfs_mknod(struct vop_mknod_args *ap) 1704 { 1705 return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); 1706 } 1707 1708 static struct mtx nfs_cverf_mtx; 1709 MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex", 1710 MTX_DEF); 1711 1712 static nfsquad_t 1713 nfs_get_cverf(void) 1714 { 1715 static nfsquad_t cverf; 1716 nfsquad_t ret; 1717 static int cverf_initialized = 0; 1718 1719 mtx_lock(&nfs_cverf_mtx); 1720 if (cverf_initialized == 0) { 1721 cverf.lval[0] = arc4random(); 1722 cverf.lval[1] = arc4random(); 1723 cverf_initialized = 1; 1724 } else 1725 cverf.qval++; 1726 ret = cverf; 1727 mtx_unlock(&nfs_cverf_mtx); 1728 1729 return (ret); 1730 } 1731 1732 /* 1733 * nfs file create call 1734 */ 1735 static int 1736 nfs_create(struct vop_create_args *ap) 1737 { 1738 struct vnode *dvp = ap->a_dvp; 1739 struct vattr *vap = ap->a_vap; 1740 struct componentname *cnp = ap->a_cnp; 1741 struct nfsnode *np = NULL, *dnp; 1742 struct vnode *newvp = NULL; 1743 struct nfsmount *nmp; 1744 struct nfsvattr dnfsva, nfsva; 1745 struct nfsfh *nfhp; 1746 nfsquad_t cverf; 1747 int error = 0, attrflag, dattrflag, fmode = 0; 1748 struct vattr vattr; 1749 1750 /* 1751 * Oops, not for me.. 1752 */ 1753 if (vap->va_type == VSOCK) 1754 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); 1755 1756 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1757 return (error); 1758 if (vap->va_vaflags & VA_EXCLUSIVE) 1759 fmode |= O_EXCL; 1760 dnp = VTONFS(dvp); 1761 nmp = VFSTONFS(dvp->v_mount); 1762 again: 1763 /* For NFSv4, wait until any remove is done. */ 1764 NFSLOCKNODE(dnp); 1765 while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) { 1766 dnp->n_flag |= NREMOVEWANT; 1767 (void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0); 1768 } 1769 NFSUNLOCKNODE(dnp); 1770 1771 cverf = nfs_get_cverf(); 1772 error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1773 vap, cverf, fmode, cnp->cn_cred, curthread, &dnfsva, &nfsva, 1774 &nfhp, &attrflag, &dattrflag); 1775 if (!error) { 1776 if (nfhp == NULL) 1777 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1778 cnp->cn_namelen, cnp->cn_cred, curthread, 1779 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 0); 1780 if (nfhp != NULL) 1781 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1782 curthread, &np, LK_EXCLUSIVE); 1783 } 1784 if (dattrflag) 1785 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 1786 if (!error) { 1787 newvp = NFSTOV(np); 1788 if (attrflag == 0) 1789 error = nfsrpc_getattr(newvp, cnp->cn_cred, curthread, 1790 &nfsva); 1791 if (error == 0) 1792 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 1793 } 1794 if (error) { 1795 if (newvp != NULL) { 1796 vput(newvp); 1797 newvp = NULL; 1798 } 1799 if (NFS_ISV34(dvp) && (fmode & O_EXCL) && 1800 error == NFSERR_NOTSUPP) { 1801 fmode &= ~O_EXCL; 1802 goto again; 1803 } 1804 } else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) { 1805 if (nfscl_checksattr(vap, &nfsva)) { 1806 error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred, 1807 curthread, &nfsva, &attrflag); 1808 if (error && (vap->va_uid != (uid_t)VNOVAL || 1809 vap->va_gid != (gid_t)VNOVAL)) { 1810 /* try again without setting uid/gid */ 1811 vap->va_uid = (uid_t)VNOVAL; 1812 vap->va_gid = (uid_t)VNOVAL; 1813 error = nfsrpc_setattr(newvp, vap, NULL, 1814 cnp->cn_cred, curthread, &nfsva, &attrflag); 1815 } 1816 if (attrflag) 1817 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 1818 0, 1); 1819 if (error != 0) 1820 vput(newvp); 1821 } 1822 } 1823 if (!error) { 1824 if ((cnp->cn_flags & MAKEENTRY) && attrflag) { 1825 if (dvp != newvp) 1826 cache_enter_time(dvp, newvp, cnp, 1827 &nfsva.na_ctime, NULL); 1828 else 1829 printf("nfs_create: bogus NFS server returned " 1830 "the directory as the new file object\n"); 1831 } 1832 *ap->a_vpp = newvp; 1833 } else if (NFS_ISV4(dvp)) { 1834 error = nfscl_maperr(curthread, error, vap->va_uid, 1835 vap->va_gid); 1836 } 1837 NFSLOCKNODE(dnp); 1838 dnp->n_flag |= NMODIFIED; 1839 if (!dattrflag) { 1840 dnp->n_attrstamp = 0; 1841 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1842 } 1843 NFSUNLOCKNODE(dnp); 1844 return (error); 1845 } 1846 1847 /* 1848 * nfs file remove call 1849 * To try and make nfs semantics closer to ufs semantics, a file that has 1850 * other processes using the vnode is renamed instead of removed and then 1851 * removed later on the last close. 1852 * - If v_usecount > 1 1853 * If a rename is not already in the works 1854 * call nfs_sillyrename() to set it up 1855 * else 1856 * do the remove rpc 1857 */ 1858 static int 1859 nfs_remove(struct vop_remove_args *ap) 1860 { 1861 struct vnode *vp = ap->a_vp; 1862 struct vnode *dvp = ap->a_dvp; 1863 struct componentname *cnp = ap->a_cnp; 1864 struct nfsnode *np = VTONFS(vp); 1865 int error = 0; 1866 struct vattr vattr; 1867 1868 KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount")); 1869 if (vp->v_type == VDIR) 1870 error = EPERM; 1871 else if (vrefcnt(vp) == 1 || (np->n_sillyrename && 1872 VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 && 1873 vattr.va_nlink > 1)) { 1874 /* 1875 * Purge the name cache so that the chance of a lookup for 1876 * the name succeeding while the remove is in progress is 1877 * minimized. Without node locking it can still happen, such 1878 * that an I/O op returns ESTALE, but since you get this if 1879 * another host removes the file.. 1880 */ 1881 cache_purge(vp); 1882 /* 1883 * throw away biocache buffers, mainly to avoid 1884 * unnecessary delayed writes later. 1885 */ 1886 error = ncl_vinvalbuf(vp, 0, curthread, 1); 1887 if (error != EINTR && error != EIO) 1888 /* Do the rpc */ 1889 error = nfs_removerpc(dvp, vp, cnp->cn_nameptr, 1890 cnp->cn_namelen, cnp->cn_cred, curthread); 1891 /* 1892 * Kludge City: If the first reply to the remove rpc is lost.. 1893 * the reply to the retransmitted request will be ENOENT 1894 * since the file was in fact removed 1895 * Therefore, we cheat and return success. 1896 */ 1897 if (error == ENOENT) 1898 error = 0; 1899 } else if (!np->n_sillyrename) 1900 error = nfs_sillyrename(dvp, vp, cnp); 1901 NFSLOCKNODE(np); 1902 np->n_attrstamp = 0; 1903 NFSUNLOCKNODE(np); 1904 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 1905 return (error); 1906 } 1907 1908 /* 1909 * nfs file remove rpc called from nfs_inactive 1910 */ 1911 int 1912 ncl_removeit(struct sillyrename *sp, struct vnode *vp) 1913 { 1914 /* 1915 * Make sure that the directory vnode is still valid. 1916 * XXX we should lock sp->s_dvp here. 1917 */ 1918 if (sp->s_dvp->v_type == VBAD) 1919 return (0); 1920 return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen, 1921 sp->s_cred, NULL)); 1922 } 1923 1924 /* 1925 * Nfs remove rpc, called from nfs_remove() and ncl_removeit(). 1926 */ 1927 static int 1928 nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 1929 int namelen, struct ucred *cred, struct thread *td) 1930 { 1931 struct nfsvattr dnfsva; 1932 struct nfsnode *dnp = VTONFS(dvp); 1933 int error = 0, dattrflag; 1934 1935 NFSLOCKNODE(dnp); 1936 dnp->n_flag |= NREMOVEINPROG; 1937 NFSUNLOCKNODE(dnp); 1938 error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva, 1939 &dattrflag); 1940 NFSLOCKNODE(dnp); 1941 if ((dnp->n_flag & NREMOVEWANT)) { 1942 dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG); 1943 NFSUNLOCKNODE(dnp); 1944 wakeup((caddr_t)dnp); 1945 } else { 1946 dnp->n_flag &= ~NREMOVEINPROG; 1947 NFSUNLOCKNODE(dnp); 1948 } 1949 if (dattrflag) 1950 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 1951 NFSLOCKNODE(dnp); 1952 dnp->n_flag |= NMODIFIED; 1953 if (!dattrflag) { 1954 dnp->n_attrstamp = 0; 1955 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1956 } 1957 NFSUNLOCKNODE(dnp); 1958 if (error && NFS_ISV4(dvp)) 1959 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 1960 return (error); 1961 } 1962 1963 /* 1964 * nfs file rename call 1965 */ 1966 static int 1967 nfs_rename(struct vop_rename_args *ap) 1968 { 1969 struct vnode *fvp = ap->a_fvp; 1970 struct vnode *tvp = ap->a_tvp; 1971 struct vnode *fdvp = ap->a_fdvp; 1972 struct vnode *tdvp = ap->a_tdvp; 1973 struct componentname *tcnp = ap->a_tcnp; 1974 struct componentname *fcnp = ap->a_fcnp; 1975 struct nfsnode *fnp = VTONFS(ap->a_fvp); 1976 struct nfsnode *tdnp = VTONFS(ap->a_tdvp); 1977 struct nfsv4node *newv4 = NULL; 1978 int error; 1979 1980 /* Check for cross-device rename */ 1981 if ((fvp->v_mount != tdvp->v_mount) || 1982 (tvp && (fvp->v_mount != tvp->v_mount))) { 1983 error = EXDEV; 1984 goto out; 1985 } 1986 1987 if (fvp == tvp) { 1988 printf("nfs_rename: fvp == tvp (can't happen)\n"); 1989 error = 0; 1990 goto out; 1991 } 1992 if ((error = NFSVOPLOCK(fvp, LK_EXCLUSIVE)) != 0) 1993 goto out; 1994 1995 /* 1996 * We have to flush B_DELWRI data prior to renaming 1997 * the file. If we don't, the delayed-write buffers 1998 * can be flushed out later after the file has gone stale 1999 * under NFSV3. NFSV2 does not have this problem because 2000 * ( as far as I can tell ) it flushes dirty buffers more 2001 * often. 2002 * 2003 * Skip the rename operation if the fsync fails, this can happen 2004 * due to the server's volume being full, when we pushed out data 2005 * that was written back to our cache earlier. Not checking for 2006 * this condition can result in potential (silent) data loss. 2007 */ 2008 error = VOP_FSYNC(fvp, MNT_WAIT, curthread); 2009 NFSVOPUNLOCK(fvp); 2010 if (!error && tvp) 2011 error = VOP_FSYNC(tvp, MNT_WAIT, curthread); 2012 if (error) 2013 goto out; 2014 2015 /* 2016 * If the tvp exists and is in use, sillyrename it before doing the 2017 * rename of the new file over it. 2018 * XXX Can't sillyrename a directory. 2019 */ 2020 if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && 2021 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { 2022 vput(tvp); 2023 tvp = NULL; 2024 } 2025 2026 error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen, 2027 tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, 2028 curthread); 2029 2030 if (error == 0 && NFS_ISV4(tdvp)) { 2031 /* 2032 * For NFSv4, check to see if it is the same name and 2033 * replace the name, if it is different. 2034 */ 2035 newv4 = malloc( 2036 sizeof (struct nfsv4node) + 2037 tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1, 2038 M_NFSV4NODE, M_WAITOK); 2039 NFSLOCKNODE(tdnp); 2040 NFSLOCKNODE(fnp); 2041 if (fnp->n_v4 != NULL && fvp->v_type == VREG && 2042 (fnp->n_v4->n4_namelen != tcnp->cn_namelen || 2043 NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4), 2044 tcnp->cn_namelen) || 2045 tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen || 2046 NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 2047 tdnp->n_fhp->nfh_len))) { 2048 free(fnp->n_v4, M_NFSV4NODE); 2049 fnp->n_v4 = newv4; 2050 newv4 = NULL; 2051 fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len; 2052 fnp->n_v4->n4_namelen = tcnp->cn_namelen; 2053 NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 2054 tdnp->n_fhp->nfh_len); 2055 NFSBCOPY(tcnp->cn_nameptr, 2056 NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen); 2057 } 2058 NFSUNLOCKNODE(tdnp); 2059 NFSUNLOCKNODE(fnp); 2060 if (newv4 != NULL) 2061 free(newv4, M_NFSV4NODE); 2062 } 2063 2064 if (fvp->v_type == VDIR) { 2065 if (tvp != NULL && tvp->v_type == VDIR) 2066 cache_purge(tdvp); 2067 cache_purge(fdvp); 2068 } 2069 2070 out: 2071 if (tdvp == tvp) 2072 vrele(tdvp); 2073 else 2074 vput(tdvp); 2075 if (tvp) 2076 vput(tvp); 2077 vrele(fdvp); 2078 vrele(fvp); 2079 /* 2080 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. 2081 */ 2082 if (error == ENOENT) 2083 error = 0; 2084 return (error); 2085 } 2086 2087 /* 2088 * nfs file rename rpc called from nfs_remove() above 2089 */ 2090 static int 2091 nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp, 2092 struct sillyrename *sp) 2093 { 2094 2095 return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen, 2096 sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred, 2097 curthread)); 2098 } 2099 2100 /* 2101 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). 2102 */ 2103 static int 2104 nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, 2105 int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr, 2106 int tnamelen, struct ucred *cred, struct thread *td) 2107 { 2108 struct nfsvattr fnfsva, tnfsva; 2109 struct nfsnode *fdnp = VTONFS(fdvp); 2110 struct nfsnode *tdnp = VTONFS(tdvp); 2111 int error = 0, fattrflag, tattrflag; 2112 2113 error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp, 2114 tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag, 2115 &tattrflag); 2116 NFSLOCKNODE(fdnp); 2117 fdnp->n_flag |= NMODIFIED; 2118 if (fattrflag != 0) { 2119 NFSUNLOCKNODE(fdnp); 2120 (void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, 0, 1); 2121 } else { 2122 fdnp->n_attrstamp = 0; 2123 NFSUNLOCKNODE(fdnp); 2124 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp); 2125 } 2126 NFSLOCKNODE(tdnp); 2127 tdnp->n_flag |= NMODIFIED; 2128 if (tattrflag != 0) { 2129 NFSUNLOCKNODE(tdnp); 2130 (void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, 0, 1); 2131 } else { 2132 tdnp->n_attrstamp = 0; 2133 NFSUNLOCKNODE(tdnp); 2134 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 2135 } 2136 if (error && NFS_ISV4(fdvp)) 2137 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2138 return (error); 2139 } 2140 2141 /* 2142 * nfs hard link create call 2143 */ 2144 static int 2145 nfs_link(struct vop_link_args *ap) 2146 { 2147 struct vnode *vp = ap->a_vp; 2148 struct vnode *tdvp = ap->a_tdvp; 2149 struct componentname *cnp = ap->a_cnp; 2150 struct nfsnode *np, *tdnp; 2151 struct nfsvattr nfsva, dnfsva; 2152 int error = 0, attrflag, dattrflag; 2153 2154 /* 2155 * Push all writes to the server, so that the attribute cache 2156 * doesn't get "out of sync" with the server. 2157 * XXX There should be a better way! 2158 */ 2159 VOP_FSYNC(vp, MNT_WAIT, curthread); 2160 2161 error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen, 2162 cnp->cn_cred, curthread, &dnfsva, &nfsva, &attrflag, &dattrflag); 2163 tdnp = VTONFS(tdvp); 2164 NFSLOCKNODE(tdnp); 2165 tdnp->n_flag |= NMODIFIED; 2166 if (dattrflag != 0) { 2167 NFSUNLOCKNODE(tdnp); 2168 (void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, 0, 1); 2169 } else { 2170 tdnp->n_attrstamp = 0; 2171 NFSUNLOCKNODE(tdnp); 2172 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 2173 } 2174 if (attrflag) 2175 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 2176 else { 2177 np = VTONFS(vp); 2178 NFSLOCKNODE(np); 2179 np->n_attrstamp = 0; 2180 NFSUNLOCKNODE(np); 2181 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 2182 } 2183 /* 2184 * If negative lookup caching is enabled, I might as well 2185 * add an entry for this node. Not necessary for correctness, 2186 * but if negative caching is enabled, then the system 2187 * must care about lookup caching hit rate, so... 2188 */ 2189 if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 && 2190 (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { 2191 if (tdvp != vp) 2192 cache_enter_time(tdvp, vp, cnp, &nfsva.na_ctime, NULL); 2193 else 2194 printf("nfs_link: bogus NFS server returned " 2195 "the directory as the new link\n"); 2196 } 2197 if (error && NFS_ISV4(vp)) 2198 error = nfscl_maperr(curthread, error, (uid_t)0, 2199 (gid_t)0); 2200 return (error); 2201 } 2202 2203 /* 2204 * nfs symbolic link create call 2205 */ 2206 static int 2207 nfs_symlink(struct vop_symlink_args *ap) 2208 { 2209 struct vnode *dvp = ap->a_dvp; 2210 struct vattr *vap = ap->a_vap; 2211 struct componentname *cnp = ap->a_cnp; 2212 struct nfsvattr nfsva, dnfsva; 2213 struct nfsfh *nfhp; 2214 struct nfsnode *np = NULL, *dnp; 2215 struct vnode *newvp = NULL; 2216 int error = 0, attrflag, dattrflag, ret; 2217 2218 vap->va_type = VLNK; 2219 error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2220 ap->a_target, vap, cnp->cn_cred, curthread, &dnfsva, 2221 &nfsva, &nfhp, &attrflag, &dattrflag); 2222 if (nfhp) { 2223 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, curthread, 2224 &np, LK_EXCLUSIVE); 2225 if (!ret) 2226 newvp = NFSTOV(np); 2227 else if (!error) 2228 error = ret; 2229 } 2230 if (newvp != NULL) { 2231 if (attrflag) 2232 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 2233 } else if (!error) { 2234 /* 2235 * If we do not have an error and we could not extract the 2236 * newvp from the response due to the request being NFSv2, we 2237 * have to do a lookup in order to obtain a newvp to return. 2238 */ 2239 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2240 cnp->cn_cred, curthread, &np); 2241 if (!error) 2242 newvp = NFSTOV(np); 2243 } 2244 if (error) { 2245 if (newvp) 2246 vput(newvp); 2247 if (NFS_ISV4(dvp)) 2248 error = nfscl_maperr(curthread, error, 2249 vap->va_uid, vap->va_gid); 2250 } else { 2251 *ap->a_vpp = newvp; 2252 } 2253 2254 dnp = VTONFS(dvp); 2255 NFSLOCKNODE(dnp); 2256 dnp->n_flag |= NMODIFIED; 2257 if (dattrflag != 0) { 2258 NFSUNLOCKNODE(dnp); 2259 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 2260 } else { 2261 dnp->n_attrstamp = 0; 2262 NFSUNLOCKNODE(dnp); 2263 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2264 } 2265 /* 2266 * If negative lookup caching is enabled, I might as well 2267 * add an entry for this node. Not necessary for correctness, 2268 * but if negative caching is enabled, then the system 2269 * must care about lookup caching hit rate, so... 2270 */ 2271 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 2272 (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { 2273 if (dvp != newvp) 2274 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, 2275 NULL); 2276 else 2277 printf("nfs_symlink: bogus NFS server returned " 2278 "the directory as the new file object\n"); 2279 } 2280 return (error); 2281 } 2282 2283 /* 2284 * nfs make dir call 2285 */ 2286 static int 2287 nfs_mkdir(struct vop_mkdir_args *ap) 2288 { 2289 struct vnode *dvp = ap->a_dvp; 2290 struct vattr *vap = ap->a_vap; 2291 struct componentname *cnp = ap->a_cnp; 2292 struct nfsnode *np = NULL, *dnp; 2293 struct vnode *newvp = NULL; 2294 struct vattr vattr; 2295 struct nfsfh *nfhp; 2296 struct nfsvattr nfsva, dnfsva; 2297 int error = 0, attrflag, dattrflag, ret; 2298 2299 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) 2300 return (error); 2301 vap->va_type = VDIR; 2302 error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2303 vap, cnp->cn_cred, curthread, &dnfsva, &nfsva, &nfhp, 2304 &attrflag, &dattrflag); 2305 dnp = VTONFS(dvp); 2306 NFSLOCKNODE(dnp); 2307 dnp->n_flag |= NMODIFIED; 2308 if (dattrflag != 0) { 2309 NFSUNLOCKNODE(dnp); 2310 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 2311 } else { 2312 dnp->n_attrstamp = 0; 2313 NFSUNLOCKNODE(dnp); 2314 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2315 } 2316 if (nfhp) { 2317 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, curthread, 2318 &np, LK_EXCLUSIVE); 2319 if (!ret) { 2320 newvp = NFSTOV(np); 2321 if (attrflag) 2322 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 2323 0, 1); 2324 } else if (!error) 2325 error = ret; 2326 } 2327 if (!error && newvp == NULL) { 2328 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2329 cnp->cn_cred, curthread, &np); 2330 if (!error) { 2331 newvp = NFSTOV(np); 2332 if (newvp->v_type != VDIR) 2333 error = EEXIST; 2334 } 2335 } 2336 if (error) { 2337 if (newvp) 2338 vput(newvp); 2339 if (NFS_ISV4(dvp)) 2340 error = nfscl_maperr(curthread, error, 2341 vap->va_uid, vap->va_gid); 2342 } else { 2343 /* 2344 * If negative lookup caching is enabled, I might as well 2345 * add an entry for this node. Not necessary for correctness, 2346 * but if negative caching is enabled, then the system 2347 * must care about lookup caching hit rate, so... 2348 */ 2349 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 2350 (cnp->cn_flags & MAKEENTRY) && 2351 attrflag != 0 && dattrflag != 0) { 2352 if (dvp != newvp) 2353 cache_enter_time(dvp, newvp, cnp, 2354 &nfsva.na_ctime, &dnfsva.na_ctime); 2355 else 2356 printf("nfs_mkdir: bogus NFS server returned " 2357 "the directory that the directory was " 2358 "created in as the new file object\n"); 2359 } 2360 *ap->a_vpp = newvp; 2361 } 2362 return (error); 2363 } 2364 2365 /* 2366 * nfs remove directory call 2367 */ 2368 static int 2369 nfs_rmdir(struct vop_rmdir_args *ap) 2370 { 2371 struct vnode *vp = ap->a_vp; 2372 struct vnode *dvp = ap->a_dvp; 2373 struct componentname *cnp = ap->a_cnp; 2374 struct nfsnode *dnp; 2375 struct nfsvattr dnfsva; 2376 int error, dattrflag; 2377 2378 if (dvp == vp) 2379 return (EINVAL); 2380 error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2381 cnp->cn_cred, curthread, &dnfsva, &dattrflag); 2382 dnp = VTONFS(dvp); 2383 NFSLOCKNODE(dnp); 2384 dnp->n_flag |= NMODIFIED; 2385 if (dattrflag != 0) { 2386 NFSUNLOCKNODE(dnp); 2387 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 2388 } else { 2389 dnp->n_attrstamp = 0; 2390 NFSUNLOCKNODE(dnp); 2391 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2392 } 2393 2394 cache_purge(dvp); 2395 cache_purge(vp); 2396 if (error && NFS_ISV4(dvp)) 2397 error = nfscl_maperr(curthread, error, (uid_t)0, 2398 (gid_t)0); 2399 /* 2400 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. 2401 */ 2402 if (error == ENOENT) 2403 error = 0; 2404 return (error); 2405 } 2406 2407 /* 2408 * nfs readdir call 2409 */ 2410 static int 2411 nfs_readdir(struct vop_readdir_args *ap) 2412 { 2413 struct vnode *vp = ap->a_vp; 2414 struct nfsnode *np = VTONFS(vp); 2415 struct uio *uio = ap->a_uio; 2416 ssize_t tresid, left; 2417 int error = 0; 2418 struct vattr vattr; 2419 2420 if (ap->a_eofflag != NULL) 2421 *ap->a_eofflag = 0; 2422 if (vp->v_type != VDIR) 2423 return(EPERM); 2424 2425 /* 2426 * First, check for hit on the EOF offset cache 2427 */ 2428 NFSLOCKNODE(np); 2429 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && 2430 (np->n_flag & NMODIFIED) == 0) { 2431 NFSUNLOCKNODE(np); 2432 if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { 2433 NFSLOCKNODE(np); 2434 if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) || 2435 !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 2436 NFSUNLOCKNODE(np); 2437 NFSINCRGLOBAL(nfsstatsv1.direofcache_hits); 2438 if (ap->a_eofflag != NULL) 2439 *ap->a_eofflag = 1; 2440 return (0); 2441 } else 2442 NFSUNLOCKNODE(np); 2443 } 2444 } else 2445 NFSUNLOCKNODE(np); 2446 2447 /* 2448 * NFS always guarantees that directory entries don't straddle 2449 * DIRBLKSIZ boundaries. As such, we need to limit the size 2450 * to an exact multiple of DIRBLKSIZ, to avoid copying a partial 2451 * directory entry. 2452 */ 2453 left = uio->uio_resid % DIRBLKSIZ; 2454 if (left == uio->uio_resid) 2455 return (EINVAL); 2456 uio->uio_resid -= left; 2457 2458 /* 2459 * For readdirplus, if starting to read the directory, 2460 * purge the name cache, since it will be reloaded by 2461 * this directory read. 2462 * This removes potentially stale name cache entries. 2463 */ 2464 if (uio->uio_offset == 0 && 2465 (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_RDIRPLUS) != 0) 2466 cache_purge(vp); 2467 2468 /* 2469 * Call ncl_bioread() to do the real work. 2470 */ 2471 tresid = uio->uio_resid; 2472 error = ncl_bioread(vp, uio, 0, ap->a_cred); 2473 2474 if (!error && uio->uio_resid == tresid) { 2475 NFSINCRGLOBAL(nfsstatsv1.direofcache_misses); 2476 if (ap->a_eofflag != NULL) 2477 *ap->a_eofflag = 1; 2478 } 2479 2480 /* Add the partial DIRBLKSIZ (left) back in. */ 2481 uio->uio_resid += left; 2482 return (error); 2483 } 2484 2485 /* 2486 * Readdir rpc call. 2487 * Called from below the buffer cache by ncl_doio(). 2488 */ 2489 int 2490 ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2491 struct thread *td) 2492 { 2493 struct nfsvattr nfsva; 2494 nfsuint64 *cookiep, cookie; 2495 struct nfsnode *dnp = VTONFS(vp); 2496 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2497 int error = 0, eof, attrflag; 2498 2499 KASSERT(uiop->uio_iovcnt == 1 && 2500 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2501 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2502 ("nfs readdirrpc bad uio")); 2503 2504 /* 2505 * If there is no cookie, assume directory was stale. 2506 */ 2507 ncl_dircookie_lock(dnp); 2508 NFSUNLOCKNODE(dnp); 2509 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2510 if (cookiep) { 2511 cookie = *cookiep; 2512 ncl_dircookie_unlock(dnp); 2513 } else { 2514 ncl_dircookie_unlock(dnp); 2515 return (NFSERR_BAD_COOKIE); 2516 } 2517 2518 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2519 (void)ncl_fsinfo(nmp, vp, cred, td); 2520 2521 error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva, 2522 &attrflag, &eof); 2523 if (attrflag) 2524 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 2525 2526 if (!error) { 2527 /* 2528 * We are now either at the end of the directory or have filled 2529 * the block. 2530 */ 2531 if (eof) { 2532 NFSLOCKNODE(dnp); 2533 dnp->n_direofoffset = uiop->uio_offset; 2534 NFSUNLOCKNODE(dnp); 2535 } else { 2536 if (uiop->uio_resid > 0) 2537 printf("EEK! readdirrpc resid > 0\n"); 2538 ncl_dircookie_lock(dnp); 2539 NFSUNLOCKNODE(dnp); 2540 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2541 *cookiep = cookie; 2542 ncl_dircookie_unlock(dnp); 2543 } 2544 } else if (NFS_ISV4(vp)) { 2545 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2546 } 2547 return (error); 2548 } 2549 2550 /* 2551 * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc(). 2552 */ 2553 int 2554 ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2555 struct thread *td) 2556 { 2557 struct nfsvattr nfsva; 2558 nfsuint64 *cookiep, cookie; 2559 struct nfsnode *dnp = VTONFS(vp); 2560 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2561 int error = 0, attrflag, eof; 2562 2563 KASSERT(uiop->uio_iovcnt == 1 && 2564 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2565 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2566 ("nfs readdirplusrpc bad uio")); 2567 2568 /* 2569 * If there is no cookie, assume directory was stale. 2570 */ 2571 ncl_dircookie_lock(dnp); 2572 NFSUNLOCKNODE(dnp); 2573 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2574 if (cookiep) { 2575 cookie = *cookiep; 2576 ncl_dircookie_unlock(dnp); 2577 } else { 2578 ncl_dircookie_unlock(dnp); 2579 return (NFSERR_BAD_COOKIE); 2580 } 2581 2582 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2583 (void)ncl_fsinfo(nmp, vp, cred, td); 2584 error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva, 2585 &attrflag, &eof); 2586 if (attrflag) 2587 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 2588 2589 if (!error) { 2590 /* 2591 * We are now either at end of the directory or have filled the 2592 * the block. 2593 */ 2594 if (eof) { 2595 NFSLOCKNODE(dnp); 2596 dnp->n_direofoffset = uiop->uio_offset; 2597 NFSUNLOCKNODE(dnp); 2598 } else { 2599 if (uiop->uio_resid > 0) 2600 printf("EEK! readdirplusrpc resid > 0\n"); 2601 ncl_dircookie_lock(dnp); 2602 NFSUNLOCKNODE(dnp); 2603 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2604 *cookiep = cookie; 2605 ncl_dircookie_unlock(dnp); 2606 } 2607 } else if (NFS_ISV4(vp)) { 2608 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2609 } 2610 return (error); 2611 } 2612 2613 /* 2614 * Silly rename. To make the NFS filesystem that is stateless look a little 2615 * more like the "ufs" a remove of an active vnode is translated to a rename 2616 * to a funny looking filename that is removed by nfs_inactive on the 2617 * nfsnode. There is the potential for another process on a different client 2618 * to create the same funny name between the nfs_lookitup() fails and the 2619 * nfs_rename() completes, but... 2620 */ 2621 static int 2622 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 2623 { 2624 struct sillyrename *sp; 2625 struct nfsnode *np; 2626 int error; 2627 short pid; 2628 unsigned int lticks; 2629 2630 cache_purge(dvp); 2631 np = VTONFS(vp); 2632 KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir")); 2633 sp = malloc(sizeof (struct sillyrename), 2634 M_NEWNFSREQ, M_WAITOK); 2635 sp->s_cred = crhold(cnp->cn_cred); 2636 sp->s_dvp = dvp; 2637 VREF(dvp); 2638 2639 /* 2640 * Fudge together a funny name. 2641 * Changing the format of the funny name to accommodate more 2642 * sillynames per directory. 2643 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 2644 * CPU ticks since boot. 2645 */ 2646 pid = curthread->td_proc->p_pid; 2647 lticks = (unsigned int)ticks; 2648 for ( ; ; ) { 2649 sp->s_namlen = sprintf(sp->s_name, 2650 ".nfs.%08x.%04x4.4", lticks, 2651 pid); 2652 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2653 curthread, NULL)) 2654 break; 2655 lticks++; 2656 } 2657 error = nfs_renameit(dvp, vp, cnp, sp); 2658 if (error) 2659 goto bad; 2660 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2661 curthread, &np); 2662 np->n_sillyrename = sp; 2663 return (0); 2664 bad: 2665 vrele(sp->s_dvp); 2666 crfree(sp->s_cred); 2667 free(sp, M_NEWNFSREQ); 2668 return (error); 2669 } 2670 2671 /* 2672 * Look up a file name and optionally either update the file handle or 2673 * allocate an nfsnode, depending on the value of npp. 2674 * npp == NULL --> just do the lookup 2675 * *npp == NULL --> allocate a new nfsnode and make sure attributes are 2676 * handled too 2677 * *npp != NULL --> update the file handle in the vnode 2678 */ 2679 static int 2680 nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred, 2681 struct thread *td, struct nfsnode **npp) 2682 { 2683 struct vnode *newvp = NULL, *vp; 2684 struct nfsnode *np, *dnp = VTONFS(dvp); 2685 struct nfsfh *nfhp, *onfhp; 2686 struct nfsvattr nfsva, dnfsva; 2687 struct componentname cn; 2688 int error = 0, attrflag, dattrflag; 2689 u_int hash; 2690 struct timespec ts; 2691 2692 nanouptime(&ts); 2693 error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva, 2694 &nfhp, &attrflag, &dattrflag, 0); 2695 if (dattrflag) 2696 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 2697 if (npp && !error) { 2698 if (*npp != NULL) { 2699 np = *npp; 2700 vp = NFSTOV(np); 2701 /* 2702 * For NFSv4, check to see if it is the same name and 2703 * replace the name, if it is different. 2704 */ 2705 if (np->n_v4 != NULL && nfsva.na_type == VREG && 2706 (np->n_v4->n4_namelen != len || 2707 NFSBCMP(name, NFS4NODENAME(np->n_v4), len) || 2708 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || 2709 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2710 dnp->n_fhp->nfh_len))) { 2711 free(np->n_v4, M_NFSV4NODE); 2712 np->n_v4 = malloc( 2713 sizeof (struct nfsv4node) + 2714 dnp->n_fhp->nfh_len + len - 1, 2715 M_NFSV4NODE, M_WAITOK); 2716 np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; 2717 np->n_v4->n4_namelen = len; 2718 NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2719 dnp->n_fhp->nfh_len); 2720 NFSBCOPY(name, NFS4NODENAME(np->n_v4), len); 2721 } 2722 hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, 2723 FNV1_32_INIT); 2724 onfhp = np->n_fhp; 2725 /* 2726 * Rehash node for new file handle. 2727 */ 2728 vfs_hash_rehash(vp, hash); 2729 np->n_fhp = nfhp; 2730 if (onfhp != NULL) 2731 free(onfhp, M_NFSFH); 2732 newvp = NFSTOV(np); 2733 } else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) { 2734 free(nfhp, M_NFSFH); 2735 VREF(dvp); 2736 newvp = dvp; 2737 } else { 2738 cn.cn_nameptr = name; 2739 cn.cn_namelen = len; 2740 error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td, 2741 &np, LK_EXCLUSIVE); 2742 if (error) 2743 return (error); 2744 newvp = NFSTOV(np); 2745 /* 2746 * If n_localmodtime >= time before RPC, then 2747 * a file modification operation, such as 2748 * VOP_SETATTR() of size, has occurred while 2749 * the Lookup RPC and acquisition of the vnode 2750 * happened. As such, the attributes might 2751 * be stale, with possibly an incorrect size. 2752 */ 2753 NFSLOCKNODE(np); 2754 if (timespecisset(&np->n_localmodtime) && 2755 timespeccmp(&np->n_localmodtime, &ts, >=)) { 2756 NFSCL_DEBUG(4, "nfs_lookitup: localmod " 2757 "stale attributes\n"); 2758 attrflag = 0; 2759 } 2760 NFSUNLOCKNODE(np); 2761 } 2762 if (!attrflag && *npp == NULL) { 2763 if (newvp == dvp) 2764 vrele(newvp); 2765 else 2766 vput(newvp); 2767 return (ENOENT); 2768 } 2769 if (attrflag) 2770 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 2771 } 2772 if (npp && *npp == NULL) { 2773 if (error) { 2774 if (newvp) { 2775 if (newvp == dvp) 2776 vrele(newvp); 2777 else 2778 vput(newvp); 2779 } 2780 } else 2781 *npp = np; 2782 } 2783 if (error && NFS_ISV4(dvp)) 2784 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2785 return (error); 2786 } 2787 2788 /* 2789 * Nfs Version 3 and 4 commit rpc 2790 */ 2791 int 2792 ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, 2793 struct thread *td) 2794 { 2795 struct nfsvattr nfsva; 2796 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2797 struct nfsnode *np; 2798 struct uio uio; 2799 int error, attrflag; 2800 2801 np = VTONFS(vp); 2802 error = EIO; 2803 attrflag = 0; 2804 if (NFSHASPNFS(nmp) && (np->n_flag & NDSCOMMIT) != 0) { 2805 uio.uio_offset = offset; 2806 uio.uio_resid = cnt; 2807 error = nfscl_doiods(vp, &uio, NULL, NULL, 2808 NFSV4OPEN_ACCESSWRITE, 1, cred, td); 2809 if (error != 0) { 2810 NFSLOCKNODE(np); 2811 np->n_flag &= ~NDSCOMMIT; 2812 NFSUNLOCKNODE(np); 2813 } 2814 } 2815 if (error != 0) { 2816 mtx_lock(&nmp->nm_mtx); 2817 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { 2818 mtx_unlock(&nmp->nm_mtx); 2819 return (0); 2820 } 2821 mtx_unlock(&nmp->nm_mtx); 2822 error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva, 2823 &attrflag); 2824 } 2825 if (attrflag != 0) 2826 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 2827 if (error != 0 && NFS_ISV4(vp)) 2828 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2829 return (error); 2830 } 2831 2832 /* 2833 * Strategy routine. 2834 * For async requests when nfsiod(s) are running, queue the request by 2835 * calling ncl_asyncio(), otherwise just all ncl_doio() to do the 2836 * request. 2837 */ 2838 static int 2839 nfs_strategy(struct vop_strategy_args *ap) 2840 { 2841 struct buf *bp; 2842 struct vnode *vp; 2843 struct ucred *cr; 2844 2845 bp = ap->a_bp; 2846 vp = ap->a_vp; 2847 KASSERT(bp->b_vp == vp, ("missing b_getvp")); 2848 KASSERT(!(bp->b_flags & B_DONE), 2849 ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); 2850 2851 if (vp->v_type == VREG && bp->b_blkno == bp->b_lblkno) 2852 bp->b_blkno = bp->b_lblkno * (vp->v_bufobj.bo_bsize / 2853 DEV_BSIZE); 2854 if (bp->b_iocmd == BIO_READ) 2855 cr = bp->b_rcred; 2856 else 2857 cr = bp->b_wcred; 2858 2859 /* 2860 * If the op is asynchronous and an i/o daemon is waiting 2861 * queue the request, wake it up and wait for completion 2862 * otherwise just do it ourselves. 2863 */ 2864 if ((bp->b_flags & B_ASYNC) == 0 || 2865 ncl_asyncio(VFSTONFS(vp->v_mount), bp, NOCRED, curthread)) 2866 (void) ncl_doio(vp, bp, cr, curthread, 1); 2867 return (0); 2868 } 2869 2870 /* 2871 * fsync vnode op. Just call ncl_flush() with commit == 1. 2872 */ 2873 /* ARGSUSED */ 2874 static int 2875 nfs_fsync(struct vop_fsync_args *ap) 2876 { 2877 2878 if (ap->a_vp->v_type != VREG) { 2879 /* 2880 * For NFS, metadata is changed synchronously on the server, 2881 * so there is nothing to flush. Also, ncl_flush() clears 2882 * the NMODIFIED flag and that shouldn't be done here for 2883 * directories. 2884 */ 2885 return (0); 2886 } 2887 return (ncl_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1, 0)); 2888 } 2889 2890 /* 2891 * Flush all the blocks associated with a vnode. 2892 * Walk through the buffer pool and push any dirty pages 2893 * associated with the vnode. 2894 * If the called_from_renewthread argument is TRUE, it has been called 2895 * from the NFSv4 renew thread and, as such, cannot block indefinitely 2896 * waiting for a buffer write to complete. 2897 */ 2898 int 2899 ncl_flush(struct vnode *vp, int waitfor, struct thread *td, 2900 int commit, int called_from_renewthread) 2901 { 2902 struct nfsnode *np = VTONFS(vp); 2903 struct buf *bp; 2904 int i; 2905 struct buf *nbp; 2906 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2907 int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; 2908 int passone = 1, trycnt = 0; 2909 u_quad_t off, endoff, toff; 2910 struct ucred* wcred = NULL; 2911 struct buf **bvec = NULL; 2912 struct bufobj *bo; 2913 #ifndef NFS_COMMITBVECSIZ 2914 #define NFS_COMMITBVECSIZ 20 2915 #endif 2916 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; 2917 u_int bvecsize = 0, bveccount; 2918 struct timespec ts; 2919 2920 if (called_from_renewthread != 0) 2921 slptimeo = hz; 2922 if (nmp->nm_flag & NFSMNT_INT) 2923 slpflag = PCATCH; 2924 if (!commit) 2925 passone = 0; 2926 bo = &vp->v_bufobj; 2927 /* 2928 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the 2929 * server, but has not been committed to stable storage on the server 2930 * yet. On the first pass, the byte range is worked out and the commit 2931 * rpc is done. On the second pass, bwrite() is called to do the 2932 * job. 2933 */ 2934 again: 2935 off = (u_quad_t)-1; 2936 endoff = 0; 2937 bvecpos = 0; 2938 if (NFS_ISV34(vp) && commit) { 2939 if (bvec != NULL && bvec != bvec_on_stack) 2940 free(bvec, M_TEMP); 2941 /* 2942 * Count up how many buffers waiting for a commit. 2943 */ 2944 bveccount = 0; 2945 BO_LOCK(bo); 2946 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2947 if (!BUF_ISLOCKED(bp) && 2948 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 2949 == (B_DELWRI | B_NEEDCOMMIT)) 2950 bveccount++; 2951 } 2952 /* 2953 * Allocate space to remember the list of bufs to commit. It is 2954 * important to use M_NOWAIT here to avoid a race with nfs_write. 2955 * If we can't get memory (for whatever reason), we will end up 2956 * committing the buffers one-by-one in the loop below. 2957 */ 2958 if (bveccount > NFS_COMMITBVECSIZ) { 2959 /* 2960 * Release the vnode interlock to avoid a lock 2961 * order reversal. 2962 */ 2963 BO_UNLOCK(bo); 2964 bvec = (struct buf **) 2965 malloc(bveccount * sizeof(struct buf *), 2966 M_TEMP, M_NOWAIT); 2967 BO_LOCK(bo); 2968 if (bvec == NULL) { 2969 bvec = bvec_on_stack; 2970 bvecsize = NFS_COMMITBVECSIZ; 2971 } else 2972 bvecsize = bveccount; 2973 } else { 2974 bvec = bvec_on_stack; 2975 bvecsize = NFS_COMMITBVECSIZ; 2976 } 2977 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2978 if (bvecpos >= bvecsize) 2979 break; 2980 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 2981 nbp = TAILQ_NEXT(bp, b_bobufs); 2982 continue; 2983 } 2984 if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != 2985 (B_DELWRI | B_NEEDCOMMIT)) { 2986 BUF_UNLOCK(bp); 2987 nbp = TAILQ_NEXT(bp, b_bobufs); 2988 continue; 2989 } 2990 BO_UNLOCK(bo); 2991 bremfree(bp); 2992 /* 2993 * Work out if all buffers are using the same cred 2994 * so we can deal with them all with one commit. 2995 * 2996 * NOTE: we are not clearing B_DONE here, so we have 2997 * to do it later on in this routine if we intend to 2998 * initiate I/O on the bp. 2999 * 3000 * Note: to avoid loopback deadlocks, we do not 3001 * assign b_runningbufspace. 3002 */ 3003 if (wcred == NULL) 3004 wcred = bp->b_wcred; 3005 else if (wcred != bp->b_wcred) 3006 wcred = NOCRED; 3007 vfs_busy_pages(bp, 0); 3008 3009 BO_LOCK(bo); 3010 /* 3011 * bp is protected by being locked, but nbp is not 3012 * and vfs_busy_pages() may sleep. We have to 3013 * recalculate nbp. 3014 */ 3015 nbp = TAILQ_NEXT(bp, b_bobufs); 3016 3017 /* 3018 * A list of these buffers is kept so that the 3019 * second loop knows which buffers have actually 3020 * been committed. This is necessary, since there 3021 * may be a race between the commit rpc and new 3022 * uncommitted writes on the file. 3023 */ 3024 bvec[bvecpos++] = bp; 3025 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 3026 bp->b_dirtyoff; 3027 if (toff < off) 3028 off = toff; 3029 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); 3030 if (toff > endoff) 3031 endoff = toff; 3032 } 3033 BO_UNLOCK(bo); 3034 } 3035 if (bvecpos > 0) { 3036 /* 3037 * Commit data on the server, as required. 3038 * If all bufs are using the same wcred, then use that with 3039 * one call for all of them, otherwise commit each one 3040 * separately. 3041 */ 3042 if (wcred != NOCRED) 3043 retv = ncl_commit(vp, off, (int)(endoff - off), 3044 wcred, td); 3045 else { 3046 retv = 0; 3047 for (i = 0; i < bvecpos; i++) { 3048 off_t off, size; 3049 bp = bvec[i]; 3050 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 3051 bp->b_dirtyoff; 3052 size = (u_quad_t)(bp->b_dirtyend 3053 - bp->b_dirtyoff); 3054 retv = ncl_commit(vp, off, (int)size, 3055 bp->b_wcred, td); 3056 if (retv) break; 3057 } 3058 } 3059 3060 if (retv == NFSERR_STALEWRITEVERF) 3061 ncl_clearcommit(vp->v_mount); 3062 3063 /* 3064 * Now, either mark the blocks I/O done or mark the 3065 * blocks dirty, depending on whether the commit 3066 * succeeded. 3067 */ 3068 for (i = 0; i < bvecpos; i++) { 3069 bp = bvec[i]; 3070 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 3071 if (!NFSCL_FORCEDISM(vp->v_mount) && retv) { 3072 /* 3073 * Error, leave B_DELWRI intact 3074 */ 3075 vfs_unbusy_pages(bp); 3076 brelse(bp); 3077 } else { 3078 /* 3079 * Success, remove B_DELWRI ( bundirty() ). 3080 * 3081 * b_dirtyoff/b_dirtyend seem to be NFS 3082 * specific. We should probably move that 3083 * into bundirty(). XXX 3084 */ 3085 bufobj_wref(bo); 3086 bp->b_flags |= B_ASYNC; 3087 bundirty(bp); 3088 bp->b_flags &= ~B_DONE; 3089 bp->b_ioflags &= ~BIO_ERROR; 3090 bp->b_dirtyoff = bp->b_dirtyend = 0; 3091 bufdone(bp); 3092 } 3093 } 3094 } 3095 3096 /* 3097 * Start/do any write(s) that are required. 3098 */ 3099 loop: 3100 BO_LOCK(bo); 3101 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 3102 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 3103 if (waitfor != MNT_WAIT || passone) 3104 continue; 3105 3106 error = BUF_TIMELOCK(bp, 3107 LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, 3108 BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo); 3109 if (error == 0) { 3110 BUF_UNLOCK(bp); 3111 goto loop; 3112 } 3113 if (error == ENOLCK) { 3114 error = 0; 3115 goto loop; 3116 } 3117 if (called_from_renewthread != 0) { 3118 /* 3119 * Return EIO so the flush will be retried 3120 * later. 3121 */ 3122 error = EIO; 3123 goto done; 3124 } 3125 if (newnfs_sigintr(nmp, td)) { 3126 error = EINTR; 3127 goto done; 3128 } 3129 if (slpflag == PCATCH) { 3130 slpflag = 0; 3131 slptimeo = 2 * hz; 3132 } 3133 goto loop; 3134 } 3135 if ((bp->b_flags & B_DELWRI) == 0) 3136 panic("nfs_fsync: not dirty"); 3137 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { 3138 BUF_UNLOCK(bp); 3139 continue; 3140 } 3141 BO_UNLOCK(bo); 3142 bremfree(bp); 3143 bp->b_flags |= B_ASYNC; 3144 bwrite(bp); 3145 if (newnfs_sigintr(nmp, td)) { 3146 error = EINTR; 3147 goto done; 3148 } 3149 goto loop; 3150 } 3151 if (passone) { 3152 passone = 0; 3153 BO_UNLOCK(bo); 3154 goto again; 3155 } 3156 if (waitfor == MNT_WAIT) { 3157 while (bo->bo_numoutput) { 3158 error = bufobj_wwait(bo, slpflag, slptimeo); 3159 if (error) { 3160 BO_UNLOCK(bo); 3161 if (called_from_renewthread != 0) { 3162 /* 3163 * Return EIO so that the flush will be 3164 * retried later. 3165 */ 3166 error = EIO; 3167 goto done; 3168 } 3169 error = newnfs_sigintr(nmp, td); 3170 if (error) 3171 goto done; 3172 if (slpflag == PCATCH) { 3173 slpflag = 0; 3174 slptimeo = 2 * hz; 3175 } 3176 BO_LOCK(bo); 3177 } 3178 } 3179 if (bo->bo_dirty.bv_cnt != 0 && commit) { 3180 BO_UNLOCK(bo); 3181 goto loop; 3182 } 3183 /* 3184 * Wait for all the async IO requests to drain 3185 */ 3186 BO_UNLOCK(bo); 3187 } else 3188 BO_UNLOCK(bo); 3189 if (NFSHASPNFS(nmp)) { 3190 nfscl_layoutcommit(vp, td); 3191 /* 3192 * Invalidate the attribute cache, since writes to a DS 3193 * won't update the size attribute. 3194 */ 3195 NFSLOCKNODE(np); 3196 np->n_attrstamp = 0; 3197 } else 3198 NFSLOCKNODE(np); 3199 if (np->n_flag & NWRITEERR) { 3200 error = np->n_error; 3201 np->n_flag &= ~NWRITEERR; 3202 } 3203 if (commit && bo->bo_dirty.bv_cnt == 0 && 3204 bo->bo_numoutput == 0) 3205 np->n_flag &= ~NMODIFIED; 3206 NFSUNLOCKNODE(np); 3207 done: 3208 if (bvec != NULL && bvec != bvec_on_stack) 3209 free(bvec, M_TEMP); 3210 if (error == 0 && commit != 0 && waitfor == MNT_WAIT && 3211 (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0)) { 3212 if (trycnt++ < 5) { 3213 /* try, try again... */ 3214 passone = 1; 3215 wcred = NULL; 3216 bvec = NULL; 3217 bvecsize = 0; 3218 goto again; 3219 } 3220 vn_printf(vp, "ncl_flush failed"); 3221 error = called_from_renewthread != 0 ? EIO : EBUSY; 3222 } 3223 if (error == 0) { 3224 nanouptime(&ts); 3225 NFSLOCKNODE(np); 3226 np->n_localmodtime = ts; 3227 NFSUNLOCKNODE(np); 3228 } 3229 return (error); 3230 } 3231 3232 /* 3233 * NFS advisory byte-level locks. 3234 */ 3235 static int 3236 nfs_advlock(struct vop_advlock_args *ap) 3237 { 3238 struct vnode *vp = ap->a_vp; 3239 struct ucred *cred; 3240 struct nfsnode *np = VTONFS(ap->a_vp); 3241 struct proc *p = (struct proc *)ap->a_id; 3242 struct thread *td = curthread; /* XXX */ 3243 struct vattr va; 3244 int ret, error; 3245 u_quad_t size; 3246 struct nfsmount *nmp; 3247 3248 error = NFSVOPLOCK(vp, LK_SHARED); 3249 if (error != 0) 3250 return (EBADF); 3251 nmp = VFSTONFS(vp->v_mount); 3252 if (!NFS_ISV4(vp) || (nmp->nm_flag & NFSMNT_NOLOCKD) != 0) { 3253 if ((nmp->nm_flag & NFSMNT_NOLOCKD) != 0) { 3254 size = np->n_size; 3255 NFSVOPUNLOCK(vp); 3256 error = lf_advlock(ap, &(vp->v_lockf), size); 3257 } else { 3258 if (nfs_advlock_p != NULL) 3259 error = nfs_advlock_p(ap); 3260 else { 3261 NFSVOPUNLOCK(vp); 3262 error = ENOLCK; 3263 } 3264 } 3265 if (error == 0 && ap->a_op == F_SETLK) { 3266 error = NFSVOPLOCK(vp, LK_SHARED); 3267 if (error == 0) { 3268 /* Mark that a file lock has been acquired. */ 3269 NFSLOCKNODE(np); 3270 np->n_flag |= NHASBEENLOCKED; 3271 NFSUNLOCKNODE(np); 3272 NFSVOPUNLOCK(vp); 3273 } 3274 } 3275 return (error); 3276 } else if ((ap->a_flags & (F_POSIX | F_FLOCK)) != 0) { 3277 if (vp->v_type != VREG) { 3278 error = EINVAL; 3279 goto out; 3280 } 3281 if ((ap->a_flags & F_POSIX) != 0) 3282 cred = p->p_ucred; 3283 else 3284 cred = td->td_ucred; 3285 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 3286 if (VN_IS_DOOMED(vp)) { 3287 error = EBADF; 3288 goto out; 3289 } 3290 3291 /* 3292 * If this is unlocking a write locked region, flush and 3293 * commit them before unlocking. This is required by 3294 * RFC3530 Sec. 9.3.2. 3295 */ 3296 if (ap->a_op == F_UNLCK && 3297 nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id, 3298 ap->a_flags)) 3299 (void) ncl_flush(vp, MNT_WAIT, td, 1, 0); 3300 3301 /* 3302 * Mark NFS node as might have acquired a lock. 3303 * This is separate from NHASBEENLOCKED, because it must 3304 * be done before the nfsrpc_advlock() call, which might 3305 * add a nfscllock structure to the client state. 3306 * It is used to check for the case where a nfscllock 3307 * state structure cannot exist for the file. 3308 * Only done for "oneopenown" NFSv4.1/4.2 mounts. 3309 */ 3310 if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp)) { 3311 NFSLOCKNODE(np); 3312 np->n_flag |= NMIGHTBELOCKED; 3313 NFSUNLOCKNODE(np); 3314 } 3315 3316 /* 3317 * Loop around doing the lock op, while a blocking lock 3318 * must wait for the lock op to succeed. 3319 */ 3320 do { 3321 ret = nfsrpc_advlock(vp, np->n_size, ap->a_op, 3322 ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags); 3323 if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 3324 ap->a_op == F_SETLK) { 3325 NFSVOPUNLOCK(vp); 3326 error = nfs_catnap(PZERO | PCATCH, ret, 3327 "ncladvl"); 3328 if (error) 3329 return (EINTR); 3330 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 3331 if (VN_IS_DOOMED(vp)) { 3332 error = EBADF; 3333 goto out; 3334 } 3335 } 3336 } while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 3337 ap->a_op == F_SETLK); 3338 if (ret == NFSERR_DENIED) { 3339 error = EAGAIN; 3340 goto out; 3341 } else if (ret == EINVAL || ret == EBADF || ret == EINTR) { 3342 error = ret; 3343 goto out; 3344 } else if (ret != 0) { 3345 error = EACCES; 3346 goto out; 3347 } 3348 3349 /* 3350 * Now, if we just got a lock, invalidate data in the buffer 3351 * cache, as required, so that the coherency conforms with 3352 * RFC3530 Sec. 9.3.2. 3353 */ 3354 if (ap->a_op == F_SETLK) { 3355 if ((np->n_flag & NMODIFIED) == 0) { 3356 np->n_attrstamp = 0; 3357 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 3358 ret = VOP_GETATTR(vp, &va, cred); 3359 } 3360 if ((np->n_flag & NMODIFIED) || ret || 3361 np->n_change != va.va_filerev) { 3362 (void) ncl_vinvalbuf(vp, V_SAVE, td, 1); 3363 np->n_attrstamp = 0; 3364 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 3365 ret = VOP_GETATTR(vp, &va, cred); 3366 if (!ret) { 3367 np->n_mtime = va.va_mtime; 3368 np->n_change = va.va_filerev; 3369 } 3370 } 3371 /* Mark that a file lock has been acquired. */ 3372 NFSLOCKNODE(np); 3373 np->n_flag |= NHASBEENLOCKED; 3374 NFSUNLOCKNODE(np); 3375 } 3376 } else 3377 error = EOPNOTSUPP; 3378 out: 3379 NFSVOPUNLOCK(vp); 3380 return (error); 3381 } 3382 3383 /* 3384 * NFS advisory byte-level locks. 3385 */ 3386 static int 3387 nfs_advlockasync(struct vop_advlockasync_args *ap) 3388 { 3389 struct vnode *vp = ap->a_vp; 3390 u_quad_t size; 3391 int error; 3392 3393 error = NFSVOPLOCK(vp, LK_SHARED); 3394 if (error) 3395 return (error); 3396 if (NFS_ISV4(vp)) { 3397 NFSVOPUNLOCK(vp); 3398 return (EOPNOTSUPP); 3399 } 3400 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3401 size = VTONFS(vp)->n_size; 3402 NFSVOPUNLOCK(vp); 3403 error = lf_advlockasync(ap, &(vp->v_lockf), size); 3404 } else { 3405 NFSVOPUNLOCK(vp); 3406 error = EOPNOTSUPP; 3407 } 3408 return (error); 3409 } 3410 3411 /* 3412 * Print out the contents of an nfsnode. 3413 */ 3414 static int 3415 nfs_print(struct vop_print_args *ap) 3416 { 3417 struct vnode *vp = ap->a_vp; 3418 struct nfsnode *np = VTONFS(vp); 3419 3420 printf("\tfileid %jd fsid 0x%jx", (uintmax_t)np->n_vattr.na_fileid, 3421 (uintmax_t)np->n_vattr.na_fsid); 3422 if (vp->v_type == VFIFO) 3423 fifo_printinfo(vp); 3424 printf("\n"); 3425 return (0); 3426 } 3427 3428 /* 3429 * nfs special file access vnode op. 3430 * Essentially just get vattr and then imitate iaccess() since the device is 3431 * local to the client. 3432 */ 3433 static int 3434 nfsspec_access(struct vop_access_args *ap) 3435 { 3436 struct vattr *vap; 3437 struct ucred *cred = ap->a_cred; 3438 struct vnode *vp = ap->a_vp; 3439 accmode_t accmode = ap->a_accmode; 3440 struct vattr vattr; 3441 int error; 3442 3443 /* 3444 * Disallow write attempts on filesystems mounted read-only; 3445 * unless the file is a socket, fifo, or a block or character 3446 * device resident on the filesystem. 3447 */ 3448 if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 3449 switch (vp->v_type) { 3450 case VREG: 3451 case VDIR: 3452 case VLNK: 3453 return (EROFS); 3454 default: 3455 break; 3456 } 3457 } 3458 vap = &vattr; 3459 error = VOP_GETATTR(vp, vap, cred); 3460 if (error) 3461 goto out; 3462 error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, 3463 accmode, cred); 3464 out: 3465 return error; 3466 } 3467 3468 /* 3469 * Read wrapper for fifos. 3470 */ 3471 static int 3472 nfsfifo_read(struct vop_read_args *ap) 3473 { 3474 struct nfsnode *np = VTONFS(ap->a_vp); 3475 int error; 3476 3477 /* 3478 * Set access flag. 3479 */ 3480 NFSLOCKNODE(np); 3481 np->n_flag |= NACC; 3482 vfs_timestamp(&np->n_atim); 3483 NFSUNLOCKNODE(np); 3484 error = fifo_specops.vop_read(ap); 3485 return error; 3486 } 3487 3488 /* 3489 * Write wrapper for fifos. 3490 */ 3491 static int 3492 nfsfifo_write(struct vop_write_args *ap) 3493 { 3494 struct nfsnode *np = VTONFS(ap->a_vp); 3495 3496 /* 3497 * Set update flag. 3498 */ 3499 NFSLOCKNODE(np); 3500 np->n_flag |= NUPD; 3501 vfs_timestamp(&np->n_mtim); 3502 NFSUNLOCKNODE(np); 3503 return(fifo_specops.vop_write(ap)); 3504 } 3505 3506 /* 3507 * Close wrapper for fifos. 3508 * 3509 * Update the times on the nfsnode then do fifo close. 3510 */ 3511 static int 3512 nfsfifo_close(struct vop_close_args *ap) 3513 { 3514 struct vnode *vp = ap->a_vp; 3515 struct nfsnode *np = VTONFS(vp); 3516 struct vattr vattr; 3517 struct timespec ts; 3518 3519 NFSLOCKNODE(np); 3520 if (np->n_flag & (NACC | NUPD)) { 3521 vfs_timestamp(&ts); 3522 if (np->n_flag & NACC) 3523 np->n_atim = ts; 3524 if (np->n_flag & NUPD) 3525 np->n_mtim = ts; 3526 np->n_flag |= NCHG; 3527 if (vrefcnt(vp) == 1 && 3528 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 3529 VATTR_NULL(&vattr); 3530 if (np->n_flag & NACC) 3531 vattr.va_atime = np->n_atim; 3532 if (np->n_flag & NUPD) 3533 vattr.va_mtime = np->n_mtim; 3534 NFSUNLOCKNODE(np); 3535 (void)VOP_SETATTR(vp, &vattr, ap->a_cred); 3536 goto out; 3537 } 3538 } 3539 NFSUNLOCKNODE(np); 3540 out: 3541 return (fifo_specops.vop_close(ap)); 3542 } 3543 3544 static int 3545 nfs_getacl(struct vop_getacl_args *ap) 3546 { 3547 int error; 3548 3549 if (ap->a_type != ACL_TYPE_NFS4) 3550 return (EOPNOTSUPP); 3551 error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp); 3552 if (error > NFSERR_STALE) { 3553 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3554 error = EPERM; 3555 } 3556 return (error); 3557 } 3558 3559 static int 3560 nfs_setacl(struct vop_setacl_args *ap) 3561 { 3562 int error; 3563 3564 if (ap->a_type != ACL_TYPE_NFS4) 3565 return (EOPNOTSUPP); 3566 error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp); 3567 if (error > NFSERR_STALE) { 3568 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3569 error = EPERM; 3570 } 3571 return (error); 3572 } 3573 3574 /* 3575 * VOP_ADVISE for NFS. 3576 * Just return 0 for any errors, since it is just a hint. 3577 */ 3578 static int 3579 nfs_advise(struct vop_advise_args *ap) 3580 { 3581 struct thread *td = curthread; 3582 struct nfsmount *nmp; 3583 uint64_t len; 3584 int error; 3585 3586 /* 3587 * First do vop_stdadvise() to handle the buffer cache. 3588 */ 3589 error = vop_stdadvise(ap); 3590 if (error != 0) 3591 return (error); 3592 if (ap->a_start < 0 || ap->a_end < 0) 3593 return (0); 3594 if (ap->a_end == OFF_MAX) 3595 len = 0; 3596 else if (ap->a_end < ap->a_start) 3597 return (0); 3598 else 3599 len = ap->a_end - ap->a_start + 1; 3600 nmp = VFSTONFS(ap->a_vp->v_mount); 3601 mtx_lock(&nmp->nm_mtx); 3602 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 3603 (NFSHASPNFS(nmp) && (nmp->nm_privflag & NFSMNTP_IOADVISETHRUMDS) == 3604 0) || (nmp->nm_privflag & NFSMNTP_NOADVISE) != 0) { 3605 mtx_unlock(&nmp->nm_mtx); 3606 return (0); 3607 } 3608 mtx_unlock(&nmp->nm_mtx); 3609 error = nfsrpc_advise(ap->a_vp, ap->a_start, len, ap->a_advice, 3610 td->td_ucred, td); 3611 if (error == NFSERR_NOTSUPP) { 3612 mtx_lock(&nmp->nm_mtx); 3613 nmp->nm_privflag |= NFSMNTP_NOADVISE; 3614 mtx_unlock(&nmp->nm_mtx); 3615 } 3616 return (0); 3617 } 3618 3619 /* 3620 * nfs allocate call 3621 */ 3622 static int 3623 nfs_allocate(struct vop_allocate_args *ap) 3624 { 3625 struct vnode *vp = ap->a_vp; 3626 struct thread *td = curthread; 3627 struct nfsvattr nfsva; 3628 struct nfsmount *nmp; 3629 struct nfsnode *np; 3630 off_t alen; 3631 int attrflag, error, ret; 3632 struct timespec ts; 3633 struct uio io; 3634 3635 attrflag = 0; 3636 nmp = VFSTONFS(vp->v_mount); 3637 np = VTONFS(vp); 3638 mtx_lock(&nmp->nm_mtx); 3639 if (NFSHASNFSV4(nmp) && nmp->nm_minorvers >= NFSV42_MINORVERSION && 3640 (nmp->nm_privflag & NFSMNTP_NOALLOCATE) == 0) { 3641 mtx_unlock(&nmp->nm_mtx); 3642 alen = *ap->a_len; 3643 if ((uint64_t)alen > nfs_maxalloclen) 3644 alen = nfs_maxalloclen; 3645 3646 /* Check the file size limit. */ 3647 io.uio_offset = *ap->a_offset; 3648 io.uio_resid = alen; 3649 error = vn_rlimit_fsize(vp, &io, td); 3650 3651 /* 3652 * Flush first to ensure that the allocate adds to the 3653 * file's allocation on the server. 3654 */ 3655 if (error == 0) { 3656 vnode_pager_clean_sync(vp); 3657 error = ncl_flush(vp, MNT_WAIT, td, 1, 0); 3658 } 3659 if (error == 0) 3660 error = nfsrpc_allocate(vp, *ap->a_offset, alen, 3661 &nfsva, &attrflag, ap->a_cred, td); 3662 if (error == 0) { 3663 *ap->a_offset += alen; 3664 *ap->a_len -= alen; 3665 nanouptime(&ts); 3666 NFSLOCKNODE(np); 3667 np->n_localmodtime = ts; 3668 NFSUNLOCKNODE(np); 3669 } else if (error == NFSERR_NOTSUPP) { 3670 mtx_lock(&nmp->nm_mtx); 3671 nmp->nm_privflag |= NFSMNTP_NOALLOCATE; 3672 mtx_unlock(&nmp->nm_mtx); 3673 error = EINVAL; 3674 } 3675 } else { 3676 mtx_unlock(&nmp->nm_mtx); 3677 error = EINVAL; 3678 } 3679 if (attrflag != 0) { 3680 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 3681 if (error == 0 && ret != 0) 3682 error = ret; 3683 } 3684 if (error != 0) 3685 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 3686 return (error); 3687 } 3688 3689 /* 3690 * nfs deallocate call 3691 */ 3692 static int 3693 nfs_deallocate(struct vop_deallocate_args *ap) 3694 { 3695 struct vnode *vp = ap->a_vp; 3696 struct thread *td = curthread; 3697 struct nfsvattr nfsva; 3698 struct nfsmount *nmp; 3699 struct nfsnode *np; 3700 off_t tlen, mlen; 3701 int attrflag, error, ret; 3702 bool clipped; 3703 struct timespec ts; 3704 3705 error = 0; 3706 attrflag = 0; 3707 nmp = VFSTONFS(vp->v_mount); 3708 np = VTONFS(vp); 3709 mtx_lock(&nmp->nm_mtx); 3710 if (NFSHASNFSV4(nmp) && nmp->nm_minorvers >= NFSV42_MINORVERSION && 3711 (nmp->nm_privflag & NFSMNTP_NODEALLOCATE) == 0) { 3712 mtx_unlock(&nmp->nm_mtx); 3713 tlen = omin(OFF_MAX - *ap->a_offset, *ap->a_len); 3714 NFSCL_DEBUG(4, "dealloc: off=%jd len=%jd maxfilesize=%ju\n", 3715 (intmax_t)*ap->a_offset, (intmax_t)tlen, 3716 (uintmax_t)nmp->nm_maxfilesize); 3717 if ((uint64_t)*ap->a_offset >= nmp->nm_maxfilesize) { 3718 /* Avoid EFBIG error return from the NFSv4.2 server. */ 3719 *ap->a_len = 0; 3720 return (0); 3721 } 3722 clipped = false; 3723 if ((uint64_t)*ap->a_offset + tlen > nmp->nm_maxfilesize) 3724 tlen = nmp->nm_maxfilesize - *ap->a_offset; 3725 if ((uint64_t)*ap->a_offset < np->n_size) { 3726 /* Limit the len to nfs_maxalloclen before EOF. */ 3727 mlen = omin((off_t)np->n_size - *ap->a_offset, tlen); 3728 if ((uint64_t)mlen > nfs_maxalloclen) { 3729 NFSCL_DEBUG(4, "dealloc: tlen maxalloclen\n"); 3730 tlen = nfs_maxalloclen; 3731 clipped = true; 3732 } 3733 } 3734 if (error == 0) 3735 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 3736 if (error == 0) { 3737 vnode_pager_purge_range(vp, *ap->a_offset, 3738 *ap->a_offset + tlen); 3739 error = nfsrpc_deallocate(vp, *ap->a_offset, tlen, 3740 &nfsva, &attrflag, ap->a_cred, td); 3741 NFSCL_DEBUG(4, "dealloc: rpc=%d\n", error); 3742 } 3743 if (error == 0) { 3744 NFSCL_DEBUG(4, "dealloc: attrflag=%d na_size=%ju\n", 3745 attrflag, (uintmax_t)nfsva.na_size); 3746 nanouptime(&ts); 3747 NFSLOCKNODE(np); 3748 np->n_localmodtime = ts; 3749 NFSUNLOCKNODE(np); 3750 if (attrflag != 0) { 3751 if ((uint64_t)*ap->a_offset < nfsva.na_size) 3752 *ap->a_offset += omin((off_t) 3753 nfsva.na_size - *ap->a_offset, 3754 tlen); 3755 } 3756 if (clipped && tlen < *ap->a_len) 3757 *ap->a_len -= tlen; 3758 else 3759 *ap->a_len = 0; 3760 } else if (error == NFSERR_NOTSUPP) { 3761 mtx_lock(&nmp->nm_mtx); 3762 nmp->nm_privflag |= NFSMNTP_NODEALLOCATE; 3763 mtx_unlock(&nmp->nm_mtx); 3764 } 3765 } else { 3766 mtx_unlock(&nmp->nm_mtx); 3767 error = EIO; 3768 } 3769 /* 3770 * If the NFS server cannot perform the Deallocate operation, just call 3771 * vop_stddeallocate() to perform it. 3772 */ 3773 if (error != 0 && error != NFSERR_FBIG && error != NFSERR_INVAL) { 3774 error = vop_stddeallocate(ap); 3775 NFSCL_DEBUG(4, "dealloc: stddeallocate=%d\n", error); 3776 } 3777 if (attrflag != 0) { 3778 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 3779 if (error == 0 && ret != 0) 3780 error = ret; 3781 } 3782 if (error != 0) 3783 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 3784 return (error); 3785 } 3786 3787 /* 3788 * nfs copy_file_range call 3789 */ 3790 static int 3791 nfs_copy_file_range(struct vop_copy_file_range_args *ap) 3792 { 3793 struct vnode *invp = ap->a_invp; 3794 struct vnode *outvp = ap->a_outvp; 3795 struct mount *mp; 3796 vm_object_t invp_obj; 3797 struct nfsvattr innfsva, outnfsva; 3798 struct vattr va, *vap; 3799 struct uio io; 3800 struct nfsmount *nmp; 3801 size_t len, len2; 3802 ssize_t r; 3803 int error, inattrflag, outattrflag, ret, ret2, invp_lock; 3804 off_t inoff, outoff; 3805 bool consecutive, must_commit, tryoutcred; 3806 3807 /* 3808 * NFSv4.2 Copy is not permitted for infile == outfile. 3809 * TODO: copy_file_range() between multiple NFS mountpoints 3810 */ 3811 if (invp == outvp || invp->v_mount != outvp->v_mount) { 3812 generic_copy: 3813 return (ENOSYS); 3814 } 3815 3816 invp_lock = LK_SHARED; 3817 relock: 3818 3819 /* Lock both vnodes, avoiding risk of deadlock. */ 3820 do { 3821 mp = NULL; 3822 error = vn_start_write(outvp, &mp, V_WAIT); 3823 if (error == 0) { 3824 error = vn_lock(outvp, LK_EXCLUSIVE); 3825 if (error == 0) { 3826 error = vn_lock(invp, invp_lock | LK_NOWAIT); 3827 if (error == 0) 3828 break; 3829 VOP_UNLOCK(outvp); 3830 if (mp != NULL) 3831 vn_finished_write(mp); 3832 mp = NULL; 3833 error = vn_lock(invp, invp_lock); 3834 if (error == 0) 3835 VOP_UNLOCK(invp); 3836 } 3837 } 3838 if (mp != NULL) 3839 vn_finished_write(mp); 3840 } while (error == 0); 3841 if (error != 0) 3842 return (error); 3843 3844 /* 3845 * More reasons to avoid nfs copy: not NFSv4.2, or explicitly 3846 * disabled. 3847 */ 3848 nmp = VFSTONFS(invp->v_mount); 3849 mtx_lock(&nmp->nm_mtx); 3850 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 3851 (nmp->nm_privflag & NFSMNTP_NOCOPY) != 0) { 3852 mtx_unlock(&nmp->nm_mtx); 3853 VOP_UNLOCK(invp); 3854 VOP_UNLOCK(outvp); 3855 if (mp != NULL) 3856 vn_finished_write(mp); 3857 goto generic_copy; 3858 } 3859 mtx_unlock(&nmp->nm_mtx); 3860 3861 /* 3862 * Do the vn_rlimit_fsize() check. Should this be above the VOP layer? 3863 */ 3864 io.uio_offset = *ap->a_outoffp; 3865 io.uio_resid = *ap->a_lenp; 3866 error = vn_rlimit_fsizex(outvp, &io, 0, &r, ap->a_fsizetd); 3867 *ap->a_lenp = io.uio_resid; 3868 /* 3869 * No need to call vn_rlimit_fsizex_res before return, since the uio is 3870 * local. 3871 */ 3872 3873 /* 3874 * Flush the input file so that the data is up to date before 3875 * the copy. Flush writes for the output file so that they 3876 * do not overwrite the data copied to the output file by the Copy. 3877 * Set the commit argument for both flushes so that the data is on 3878 * stable storage before the Copy RPC. This is done in case the 3879 * server reboots during the Copy and needs to be redone. 3880 */ 3881 if (error == 0) { 3882 invp_obj = invp->v_object; 3883 if (invp_obj != NULL && vm_object_mightbedirty(invp_obj)) { 3884 if (invp_lock != LK_EXCLUSIVE) { 3885 invp_lock = LK_EXCLUSIVE; 3886 VOP_UNLOCK(invp); 3887 VOP_UNLOCK(outvp); 3888 if (mp != NULL) 3889 vn_finished_write(mp); 3890 goto relock; 3891 } 3892 vnode_pager_clean_sync(invp); 3893 } 3894 error = ncl_flush(invp, MNT_WAIT, curthread, 1, 0); 3895 } 3896 if (error == 0) 3897 error = ncl_vinvalbuf(outvp, V_SAVE, curthread, 0); 3898 3899 /* Do the actual NFSv4.2 RPC. */ 3900 ret = ret2 = 0; 3901 len = *ap->a_lenp; 3902 mtx_lock(&nmp->nm_mtx); 3903 if ((nmp->nm_privflag & NFSMNTP_NOCONSECUTIVE) == 0) 3904 consecutive = true; 3905 else 3906 consecutive = false; 3907 mtx_unlock(&nmp->nm_mtx); 3908 inoff = *ap->a_inoffp; 3909 outoff = *ap->a_outoffp; 3910 tryoutcred = true; 3911 must_commit = false; 3912 if (error == 0) { 3913 vap = &VTONFS(invp)->n_vattr.na_vattr; 3914 error = VOP_GETATTR(invp, vap, ap->a_incred); 3915 if (error == 0) { 3916 /* 3917 * Clip "len" at va_size so that RFC compliant servers 3918 * will not reply NFSERR_INVAL. 3919 * Setting "len == 0" for the RPC would be preferred, 3920 * but some Linux servers do not support that. 3921 * If the len is being set to 0, do a Setattr RPC to 3922 * set the server's atime. This behaviour was the 3923 * preferred one for the FreeBSD "collective". 3924 */ 3925 if (inoff >= vap->va_size) { 3926 *ap->a_lenp = len = 0; 3927 if ((nmp->nm_mountp->mnt_flag & MNT_NOATIME) == 3928 0) { 3929 VATTR_NULL(&va); 3930 va.va_atime.tv_sec = 0; 3931 va.va_atime.tv_nsec = 0; 3932 va.va_vaflags = VA_UTIMES_NULL; 3933 inattrflag = 0; 3934 error = nfsrpc_setattr(invp, &va, NULL, 3935 ap->a_incred, curthread, &innfsva, 3936 &inattrflag); 3937 if (inattrflag != 0) 3938 ret = nfscl_loadattrcache(&invp, 3939 &innfsva, NULL, 0, 1); 3940 if (error == 0 && ret != 0) 3941 error = ret; 3942 } 3943 } else if (inoff + len > vap->va_size) 3944 *ap->a_lenp = len = vap->va_size - inoff; 3945 } else 3946 error = 0; 3947 } 3948 3949 /* 3950 * len will be set to 0 upon a successful Copy RPC. 3951 * As such, this only loops when the Copy RPC needs to be retried. 3952 */ 3953 while (len > 0 && error == 0) { 3954 inattrflag = outattrflag = 0; 3955 len2 = len; 3956 if (tryoutcred) 3957 error = nfsrpc_copy_file_range(invp, ap->a_inoffp, 3958 outvp, ap->a_outoffp, &len2, ap->a_flags, 3959 &inattrflag, &innfsva, &outattrflag, &outnfsva, 3960 ap->a_outcred, consecutive, &must_commit); 3961 else 3962 error = nfsrpc_copy_file_range(invp, ap->a_inoffp, 3963 outvp, ap->a_outoffp, &len2, ap->a_flags, 3964 &inattrflag, &innfsva, &outattrflag, &outnfsva, 3965 ap->a_incred, consecutive, &must_commit); 3966 if (inattrflag != 0) 3967 ret = nfscl_loadattrcache(&invp, &innfsva, NULL, 0, 1); 3968 if (outattrflag != 0) 3969 ret2 = nfscl_loadattrcache(&outvp, &outnfsva, NULL, 3970 1, 1); 3971 if (error == 0) { 3972 if (consecutive == false) { 3973 if (len2 == len) { 3974 mtx_lock(&nmp->nm_mtx); 3975 nmp->nm_privflag |= 3976 NFSMNTP_NOCONSECUTIVE; 3977 mtx_unlock(&nmp->nm_mtx); 3978 } else 3979 error = NFSERR_OFFLOADNOREQS; 3980 } 3981 *ap->a_lenp = len2; 3982 len = 0; 3983 if (len2 > 0 && must_commit && error == 0) 3984 error = ncl_commit(outvp, outoff, *ap->a_lenp, 3985 ap->a_outcred, curthread); 3986 if (error == 0 && ret != 0) 3987 error = ret; 3988 if (error == 0 && ret2 != 0) 3989 error = ret2; 3990 } else if (error == NFSERR_OFFLOADNOREQS && consecutive) { 3991 /* 3992 * Try consecutive == false, which is ok only if all 3993 * bytes are copied. 3994 * If only some bytes were copied when consecutive 3995 * is false, there is no way to know which bytes 3996 * still need to be written. 3997 */ 3998 consecutive = false; 3999 error = 0; 4000 } else if (error == NFSERR_ACCES && tryoutcred) { 4001 /* Try again with incred. */ 4002 tryoutcred = false; 4003 error = 0; 4004 } 4005 if (error == NFSERR_STALEWRITEVERF) { 4006 /* 4007 * Server rebooted, so do it all again. 4008 */ 4009 *ap->a_inoffp = inoff; 4010 *ap->a_outoffp = outoff; 4011 len = *ap->a_lenp; 4012 must_commit = false; 4013 error = 0; 4014 } 4015 } 4016 VOP_UNLOCK(invp); 4017 VOP_UNLOCK(outvp); 4018 if (mp != NULL) 4019 vn_finished_write(mp); 4020 if (error == NFSERR_NOTSUPP || error == NFSERR_OFFLOADNOREQS || 4021 error == NFSERR_ACCES) { 4022 /* 4023 * Unlike the NFSv4.2 Copy, vn_generic_copy_file_range() can 4024 * use a_incred for the read and a_outcred for the write, so 4025 * try this for NFSERR_ACCES failures for the Copy. 4026 * For NFSERR_NOTSUPP and NFSERR_OFFLOADNOREQS, the Copy can 4027 * never succeed, so disable it. 4028 */ 4029 if (error != NFSERR_ACCES) { 4030 /* Can never do Copy on this mount. */ 4031 mtx_lock(&nmp->nm_mtx); 4032 nmp->nm_privflag |= NFSMNTP_NOCOPY; 4033 mtx_unlock(&nmp->nm_mtx); 4034 } 4035 *ap->a_inoffp = inoff; 4036 *ap->a_outoffp = outoff; 4037 error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, 4038 ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, 4039 ap->a_incred, ap->a_outcred, ap->a_fsizetd); 4040 } else if (error != 0) 4041 *ap->a_lenp = 0; 4042 4043 if (error != 0) 4044 error = nfscl_maperr(curthread, error, (uid_t)0, (gid_t)0); 4045 return (error); 4046 } 4047 4048 /* 4049 * nfs ioctl call 4050 */ 4051 static int 4052 nfs_ioctl(struct vop_ioctl_args *ap) 4053 { 4054 struct vnode *vp = ap->a_vp; 4055 struct nfsvattr nfsva; 4056 struct nfsmount *nmp; 4057 int attrflag, content, error, ret; 4058 bool eof = false; /* shut up compiler. */ 4059 4060 /* Do the actual NFSv4.2 RPC. */ 4061 switch (ap->a_command) { 4062 case FIOSEEKDATA: 4063 content = NFSV4CONTENT_DATA; 4064 break; 4065 case FIOSEEKHOLE: 4066 content = NFSV4CONTENT_HOLE; 4067 break; 4068 default: 4069 return (ENOTTY); 4070 } 4071 4072 error = vn_lock(vp, LK_EXCLUSIVE); 4073 if (error != 0) 4074 return (EBADF); 4075 4076 if (vp->v_type != VREG) { 4077 VOP_UNLOCK(vp); 4078 return (ENOTTY); 4079 } 4080 nmp = VFSTONFS(vp->v_mount); 4081 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION) { 4082 VOP_UNLOCK(vp); 4083 error = vop_stdioctl(ap); 4084 return (error); 4085 } 4086 4087 attrflag = 0; 4088 if (*((off_t *)ap->a_data) >= VTONFS(vp)->n_size) 4089 error = ENXIO; 4090 else { 4091 /* 4092 * Flush all writes, so that the server is up to date. 4093 * Although a Commit is not required, the commit argument 4094 * is set so that, for a pNFS File/Flexible File Layout 4095 * server, the LayoutCommit will be done to ensure the file 4096 * size is up to date on the Metadata Server. 4097 */ 4098 4099 vnode_pager_clean_sync(vp); 4100 error = ncl_flush(vp, MNT_WAIT, ap->a_td, 1, 0); 4101 if (error == 0) 4102 error = nfsrpc_seek(vp, (off_t *)ap->a_data, &eof, 4103 content, ap->a_cred, &nfsva, &attrflag); 4104 /* If at eof for FIOSEEKDATA, return ENXIO. */ 4105 if (eof && error == 0 && content == NFSV4CONTENT_DATA) 4106 error = ENXIO; 4107 } 4108 if (attrflag != 0) { 4109 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 4110 if (error == 0 && ret != 0) 4111 error = ret; 4112 } 4113 NFSVOPUNLOCK(vp); 4114 4115 if (error != 0) 4116 error = ENXIO; 4117 return (error); 4118 } 4119 4120 /* 4121 * nfs getextattr call 4122 */ 4123 static int 4124 nfs_getextattr(struct vop_getextattr_args *ap) 4125 { 4126 struct vnode *vp = ap->a_vp; 4127 struct nfsmount *nmp; 4128 struct ucred *cred; 4129 struct thread *td = ap->a_td; 4130 struct nfsvattr nfsva; 4131 ssize_t len; 4132 int attrflag, error, ret; 4133 4134 nmp = VFSTONFS(vp->v_mount); 4135 mtx_lock(&nmp->nm_mtx); 4136 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4137 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4138 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4139 mtx_unlock(&nmp->nm_mtx); 4140 return (EOPNOTSUPP); 4141 } 4142 mtx_unlock(&nmp->nm_mtx); 4143 4144 cred = ap->a_cred; 4145 if (cred == NULL) 4146 cred = td->td_ucred; 4147 /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ 4148 attrflag = 0; 4149 error = nfsrpc_getextattr(vp, ap->a_name, ap->a_uio, &len, &nfsva, 4150 &attrflag, cred, td); 4151 if (attrflag != 0) { 4152 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 4153 if (error == 0 && ret != 0) 4154 error = ret; 4155 } 4156 if (error == 0 && ap->a_size != NULL) 4157 *ap->a_size = len; 4158 4159 switch (error) { 4160 case NFSERR_NOTSUPP: 4161 case NFSERR_OPILLEGAL: 4162 mtx_lock(&nmp->nm_mtx); 4163 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4164 mtx_unlock(&nmp->nm_mtx); 4165 error = EOPNOTSUPP; 4166 break; 4167 case NFSERR_NOXATTR: 4168 case NFSERR_XATTR2BIG: 4169 error = ENOATTR; 4170 break; 4171 default: 4172 error = nfscl_maperr(td, error, 0, 0); 4173 break; 4174 } 4175 return (error); 4176 } 4177 4178 /* 4179 * nfs setextattr call 4180 */ 4181 static int 4182 nfs_setextattr(struct vop_setextattr_args *ap) 4183 { 4184 struct vnode *vp = ap->a_vp; 4185 struct nfsmount *nmp; 4186 struct ucred *cred; 4187 struct thread *td = ap->a_td; 4188 struct nfsvattr nfsva; 4189 int attrflag, error, ret; 4190 4191 nmp = VFSTONFS(vp->v_mount); 4192 mtx_lock(&nmp->nm_mtx); 4193 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4194 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4195 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4196 mtx_unlock(&nmp->nm_mtx); 4197 return (EOPNOTSUPP); 4198 } 4199 mtx_unlock(&nmp->nm_mtx); 4200 4201 if (ap->a_uio->uio_resid < 0) 4202 return (EINVAL); 4203 cred = ap->a_cred; 4204 if (cred == NULL) 4205 cred = td->td_ucred; 4206 /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ 4207 attrflag = 0; 4208 error = nfsrpc_setextattr(vp, ap->a_name, ap->a_uio, &nfsva, 4209 &attrflag, cred, td); 4210 if (attrflag != 0) { 4211 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 4212 if (error == 0 && ret != 0) 4213 error = ret; 4214 } 4215 4216 switch (error) { 4217 case NFSERR_NOTSUPP: 4218 case NFSERR_OPILLEGAL: 4219 mtx_lock(&nmp->nm_mtx); 4220 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4221 mtx_unlock(&nmp->nm_mtx); 4222 error = EOPNOTSUPP; 4223 break; 4224 case NFSERR_NOXATTR: 4225 case NFSERR_XATTR2BIG: 4226 error = ENOATTR; 4227 break; 4228 default: 4229 error = nfscl_maperr(td, error, 0, 0); 4230 break; 4231 } 4232 return (error); 4233 } 4234 4235 /* 4236 * nfs listextattr call 4237 */ 4238 static int 4239 nfs_listextattr(struct vop_listextattr_args *ap) 4240 { 4241 struct vnode *vp = ap->a_vp; 4242 struct nfsmount *nmp; 4243 struct ucred *cred; 4244 struct thread *td = ap->a_td; 4245 struct nfsvattr nfsva; 4246 size_t len, len2; 4247 uint64_t cookie; 4248 int attrflag, error, ret; 4249 bool eof; 4250 4251 nmp = VFSTONFS(vp->v_mount); 4252 mtx_lock(&nmp->nm_mtx); 4253 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4254 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4255 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4256 mtx_unlock(&nmp->nm_mtx); 4257 return (EOPNOTSUPP); 4258 } 4259 mtx_unlock(&nmp->nm_mtx); 4260 4261 cred = ap->a_cred; 4262 if (cred == NULL) 4263 cred = td->td_ucred; 4264 4265 /* Loop around doing List Extended Attribute RPCs. */ 4266 eof = false; 4267 cookie = 0; 4268 len2 = 0; 4269 error = 0; 4270 while (!eof && error == 0) { 4271 len = nmp->nm_rsize; 4272 attrflag = 0; 4273 error = nfsrpc_listextattr(vp, &cookie, ap->a_uio, &len, &eof, 4274 &nfsva, &attrflag, cred, td); 4275 if (attrflag != 0) { 4276 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 4277 if (error == 0 && ret != 0) 4278 error = ret; 4279 } 4280 if (error == 0) { 4281 len2 += len; 4282 if (len2 > SSIZE_MAX) 4283 error = ENOATTR; 4284 } 4285 } 4286 if (error == 0 && ap->a_size != NULL) 4287 *ap->a_size = len2; 4288 4289 switch (error) { 4290 case NFSERR_NOTSUPP: 4291 case NFSERR_OPILLEGAL: 4292 mtx_lock(&nmp->nm_mtx); 4293 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4294 mtx_unlock(&nmp->nm_mtx); 4295 error = EOPNOTSUPP; 4296 break; 4297 case NFSERR_NOXATTR: 4298 case NFSERR_XATTR2BIG: 4299 error = ENOATTR; 4300 break; 4301 default: 4302 error = nfscl_maperr(td, error, 0, 0); 4303 break; 4304 } 4305 return (error); 4306 } 4307 4308 /* 4309 * nfs setextattr call 4310 */ 4311 static int 4312 nfs_deleteextattr(struct vop_deleteextattr_args *ap) 4313 { 4314 struct vnode *vp = ap->a_vp; 4315 struct nfsmount *nmp; 4316 struct nfsvattr nfsva; 4317 int attrflag, error, ret; 4318 4319 nmp = VFSTONFS(vp->v_mount); 4320 mtx_lock(&nmp->nm_mtx); 4321 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4322 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4323 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4324 mtx_unlock(&nmp->nm_mtx); 4325 return (EOPNOTSUPP); 4326 } 4327 mtx_unlock(&nmp->nm_mtx); 4328 4329 /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ 4330 attrflag = 0; 4331 error = nfsrpc_rmextattr(vp, ap->a_name, &nfsva, &attrflag, ap->a_cred, 4332 ap->a_td); 4333 if (attrflag != 0) { 4334 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 4335 if (error == 0 && ret != 0) 4336 error = ret; 4337 } 4338 4339 switch (error) { 4340 case NFSERR_NOTSUPP: 4341 case NFSERR_OPILLEGAL: 4342 mtx_lock(&nmp->nm_mtx); 4343 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4344 mtx_unlock(&nmp->nm_mtx); 4345 error = EOPNOTSUPP; 4346 break; 4347 case NFSERR_NOXATTR: 4348 case NFSERR_XATTR2BIG: 4349 error = ENOATTR; 4350 break; 4351 default: 4352 error = nfscl_maperr(ap->a_td, error, 0, 0); 4353 break; 4354 } 4355 return (error); 4356 } 4357 4358 /* 4359 * Return POSIX pathconf information applicable to nfs filesystems. 4360 */ 4361 static int 4362 nfs_pathconf(struct vop_pathconf_args *ap) 4363 { 4364 struct nfsv3_pathconf pc; 4365 struct nfsvattr nfsva; 4366 struct vnode *vp = ap->a_vp; 4367 struct nfsmount *nmp; 4368 struct thread *td = curthread; 4369 off_t off; 4370 bool eof; 4371 int attrflag, error; 4372 4373 if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX || 4374 ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED || 4375 ap->a_name == _PC_NO_TRUNC)) || 4376 (NFS_ISV4(vp) && ap->a_name == _PC_ACL_NFS4)) { 4377 /* 4378 * Since only the above 4 a_names are returned by the NFSv3 4379 * Pathconf RPC, there is no point in doing it for others. 4380 * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can 4381 * be used for _PC_NFS4_ACL as well. 4382 */ 4383 error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva, 4384 &attrflag); 4385 if (attrflag != 0) 4386 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 4387 if (error != 0) 4388 return (error); 4389 } else { 4390 /* 4391 * For NFSv2 (or NFSv3 when not one of the above 4 a_names), 4392 * just fake them. 4393 */ 4394 pc.pc_linkmax = NFS_LINK_MAX; 4395 pc.pc_namemax = NFS_MAXNAMLEN; 4396 pc.pc_notrunc = 1; 4397 pc.pc_chownrestricted = 1; 4398 pc.pc_caseinsensitive = 0; 4399 pc.pc_casepreserving = 1; 4400 error = 0; 4401 } 4402 switch (ap->a_name) { 4403 case _PC_LINK_MAX: 4404 #ifdef _LP64 4405 *ap->a_retval = pc.pc_linkmax; 4406 #else 4407 *ap->a_retval = MIN(LONG_MAX, pc.pc_linkmax); 4408 #endif 4409 break; 4410 case _PC_NAME_MAX: 4411 *ap->a_retval = pc.pc_namemax; 4412 break; 4413 case _PC_PIPE_BUF: 4414 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) 4415 *ap->a_retval = PIPE_BUF; 4416 else 4417 error = EINVAL; 4418 break; 4419 case _PC_CHOWN_RESTRICTED: 4420 *ap->a_retval = pc.pc_chownrestricted; 4421 break; 4422 case _PC_NO_TRUNC: 4423 *ap->a_retval = pc.pc_notrunc; 4424 break; 4425 case _PC_ACL_NFS4: 4426 if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 && 4427 NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) 4428 *ap->a_retval = 1; 4429 else 4430 *ap->a_retval = 0; 4431 break; 4432 case _PC_ACL_PATH_MAX: 4433 if (NFS_ISV4(vp)) 4434 *ap->a_retval = ACL_MAX_ENTRIES; 4435 else 4436 *ap->a_retval = 3; 4437 break; 4438 case _PC_PRIO_IO: 4439 *ap->a_retval = 0; 4440 break; 4441 case _PC_SYNC_IO: 4442 *ap->a_retval = 0; 4443 break; 4444 case _PC_ALLOC_SIZE_MIN: 4445 *ap->a_retval = vp->v_mount->mnt_stat.f_bsize; 4446 break; 4447 case _PC_FILESIZEBITS: 4448 if (NFS_ISV34(vp)) 4449 *ap->a_retval = 64; 4450 else 4451 *ap->a_retval = 32; 4452 break; 4453 case _PC_REC_INCR_XFER_SIZE: 4454 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 4455 break; 4456 case _PC_REC_MAX_XFER_SIZE: 4457 *ap->a_retval = -1; /* means ``unlimited'' */ 4458 break; 4459 case _PC_REC_MIN_XFER_SIZE: 4460 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 4461 break; 4462 case _PC_REC_XFER_ALIGN: 4463 *ap->a_retval = PAGE_SIZE; 4464 break; 4465 case _PC_SYMLINK_MAX: 4466 *ap->a_retval = NFS_MAXPATHLEN; 4467 break; 4468 case _PC_MIN_HOLE_SIZE: 4469 /* Only some NFSv4.2 servers support Seek for Holes. */ 4470 *ap->a_retval = 0; 4471 nmp = VFSTONFS(vp->v_mount); 4472 if (NFS_ISV4(vp) && nmp->nm_minorvers == NFSV42_MINORVERSION) { 4473 /* 4474 * NFSv4.2 doesn't have an attribute for hole size, 4475 * so all we can do is see if the Seek operation is 4476 * supported and then use f_iosize as a "best guess". 4477 */ 4478 mtx_lock(&nmp->nm_mtx); 4479 if ((nmp->nm_privflag & NFSMNTP_SEEKTESTED) == 0) { 4480 mtx_unlock(&nmp->nm_mtx); 4481 off = 0; 4482 attrflag = 0; 4483 error = nfsrpc_seek(vp, &off, &eof, 4484 NFSV4CONTENT_HOLE, td->td_ucred, &nfsva, 4485 &attrflag); 4486 if (attrflag != 0) 4487 (void) nfscl_loadattrcache(&vp, &nfsva, 4488 NULL, 0, 1); 4489 mtx_lock(&nmp->nm_mtx); 4490 if (error == NFSERR_NOTSUPP) 4491 nmp->nm_privflag |= NFSMNTP_SEEKTESTED; 4492 else 4493 nmp->nm_privflag |= NFSMNTP_SEEKTESTED | 4494 NFSMNTP_SEEK; 4495 error = 0; 4496 } 4497 if ((nmp->nm_privflag & NFSMNTP_SEEK) != 0) 4498 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 4499 mtx_unlock(&nmp->nm_mtx); 4500 } 4501 break; 4502 4503 default: 4504 error = vop_stdpathconf(ap); 4505 break; 4506 } 4507 return (error); 4508 } 4509