1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from nfs_vnops.c 8.16 (Berkeley) 5/27/95 35 */ 36 37 #include <sys/cdefs.h> 38 /* 39 * vnode op calls for Sun NFS version 2, 3 and 4 40 */ 41 42 #include "opt_inet.h" 43 44 #include <sys/param.h> 45 #include <sys/kernel.h> 46 #include <sys/systm.h> 47 #include <sys/resourcevar.h> 48 #include <sys/proc.h> 49 #include <sys/mount.h> 50 #include <sys/bio.h> 51 #include <sys/buf.h> 52 #include <sys/extattr.h> 53 #include <sys/filio.h> 54 #include <sys/jail.h> 55 #include <sys/malloc.h> 56 #include <sys/mbuf.h> 57 #include <sys/namei.h> 58 #include <sys/socket.h> 59 #include <sys/vnode.h> 60 #include <sys/dirent.h> 61 #include <sys/fcntl.h> 62 #include <sys/lockf.h> 63 #include <sys/stat.h> 64 #include <sys/sysctl.h> 65 #include <sys/signalvar.h> 66 67 #include <vm/vm.h> 68 #include <vm/vm_extern.h> 69 #include <vm/vm_object.h> 70 #include <vm/vnode_pager.h> 71 72 #include <fs/nfs/nfsport.h> 73 #include <fs/nfsclient/nfsnode.h> 74 #include <fs/nfsclient/nfsmount.h> 75 #include <fs/nfsclient/nfs.h> 76 #include <fs/nfsclient/nfs_kdtrace.h> 77 78 #include <net/if.h> 79 #include <netinet/in.h> 80 #include <netinet/in_var.h> 81 82 #include <nfs/nfs_lock.h> 83 84 #ifdef KDTRACE_HOOKS 85 #include <sys/dtrace_bsd.h> 86 87 dtrace_nfsclient_accesscache_flush_probe_func_t 88 dtrace_nfscl_accesscache_flush_done_probe; 89 uint32_t nfscl_accesscache_flush_done_id; 90 91 dtrace_nfsclient_accesscache_get_probe_func_t 92 dtrace_nfscl_accesscache_get_hit_probe, 93 dtrace_nfscl_accesscache_get_miss_probe; 94 uint32_t nfscl_accesscache_get_hit_id; 95 uint32_t nfscl_accesscache_get_miss_id; 96 97 dtrace_nfsclient_accesscache_load_probe_func_t 98 dtrace_nfscl_accesscache_load_done_probe; 99 uint32_t nfscl_accesscache_load_done_id; 100 #endif /* !KDTRACE_HOOKS */ 101 102 /* Defs */ 103 #define TRUE 1 104 #define FALSE 0 105 106 extern struct nfsstatsv1 nfsstatsv1; 107 extern int nfsrv_useacl; 108 extern int nfscl_debuglevel; 109 MALLOC_DECLARE(M_NEWNFSREQ); 110 111 static vop_read_t nfsfifo_read; 112 static vop_write_t nfsfifo_write; 113 static vop_close_t nfsfifo_close; 114 static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *, 115 struct thread *); 116 static vop_lookup_t nfs_lookup; 117 static vop_create_t nfs_create; 118 static vop_mknod_t nfs_mknod; 119 static vop_open_t nfs_open; 120 static vop_pathconf_t nfs_pathconf; 121 static vop_close_t nfs_close; 122 static vop_access_t nfs_access; 123 static vop_getattr_t nfs_getattr; 124 static vop_setattr_t nfs_setattr; 125 static vop_read_t nfs_read; 126 static vop_fsync_t nfs_fsync; 127 static vop_remove_t nfs_remove; 128 static vop_link_t nfs_link; 129 static vop_rename_t nfs_rename; 130 static vop_mkdir_t nfs_mkdir; 131 static vop_rmdir_t nfs_rmdir; 132 static vop_symlink_t nfs_symlink; 133 static vop_readdir_t nfs_readdir; 134 static vop_strategy_t nfs_strategy; 135 static int nfs_lookitup(struct vnode *, char *, int, 136 struct ucred *, struct thread *, struct nfsnode **); 137 static int nfs_sillyrename(struct vnode *, struct vnode *, 138 struct componentname *); 139 static vop_access_t nfsspec_access; 140 static vop_readlink_t nfs_readlink; 141 static vop_print_t nfs_print; 142 static vop_advlock_t nfs_advlock; 143 static vop_advlockasync_t nfs_advlockasync; 144 static vop_getacl_t nfs_getacl; 145 static vop_setacl_t nfs_setacl; 146 static vop_advise_t nfs_advise; 147 static vop_allocate_t nfs_allocate; 148 static vop_deallocate_t nfs_deallocate; 149 static vop_copy_file_range_t nfs_copy_file_range; 150 static vop_ioctl_t nfs_ioctl; 151 static vop_getextattr_t nfs_getextattr; 152 static vop_setextattr_t nfs_setextattr; 153 static vop_listextattr_t nfs_listextattr; 154 static vop_deleteextattr_t nfs_deleteextattr; 155 static vop_lock1_t nfs_lock; 156 157 /* 158 * Global vfs data structures for nfs 159 */ 160 161 static struct vop_vector newnfs_vnodeops_nosig = { 162 .vop_default = &default_vnodeops, 163 .vop_access = nfs_access, 164 .vop_advlock = nfs_advlock, 165 .vop_advlockasync = nfs_advlockasync, 166 .vop_close = nfs_close, 167 .vop_create = nfs_create, 168 .vop_fsync = nfs_fsync, 169 .vop_getattr = nfs_getattr, 170 .vop_getpages = ncl_getpages, 171 .vop_putpages = ncl_putpages, 172 .vop_inactive = ncl_inactive, 173 .vop_link = nfs_link, 174 .vop_lock1 = nfs_lock, 175 .vop_lookup = nfs_lookup, 176 .vop_mkdir = nfs_mkdir, 177 .vop_mknod = nfs_mknod, 178 .vop_open = nfs_open, 179 .vop_pathconf = nfs_pathconf, 180 .vop_print = nfs_print, 181 .vop_read = nfs_read, 182 .vop_readdir = nfs_readdir, 183 .vop_readlink = nfs_readlink, 184 .vop_reclaim = ncl_reclaim, 185 .vop_remove = nfs_remove, 186 .vop_rename = nfs_rename, 187 .vop_rmdir = nfs_rmdir, 188 .vop_setattr = nfs_setattr, 189 .vop_strategy = nfs_strategy, 190 .vop_symlink = nfs_symlink, 191 .vop_write = ncl_write, 192 .vop_getacl = nfs_getacl, 193 .vop_setacl = nfs_setacl, 194 .vop_advise = nfs_advise, 195 .vop_allocate = nfs_allocate, 196 .vop_deallocate = nfs_deallocate, 197 .vop_copy_file_range = nfs_copy_file_range, 198 .vop_ioctl = nfs_ioctl, 199 .vop_getextattr = nfs_getextattr, 200 .vop_setextattr = nfs_setextattr, 201 .vop_listextattr = nfs_listextattr, 202 .vop_deleteextattr = nfs_deleteextattr, 203 }; 204 VFS_VOP_VECTOR_REGISTER(newnfs_vnodeops_nosig); 205 206 static int 207 nfs_vnodeops_bypass(struct vop_generic_args *a) 208 { 209 210 return (vop_sigdefer(&newnfs_vnodeops_nosig, a)); 211 } 212 213 struct vop_vector newnfs_vnodeops = { 214 .vop_default = &default_vnodeops, 215 .vop_bypass = nfs_vnodeops_bypass, 216 }; 217 VFS_VOP_VECTOR_REGISTER(newnfs_vnodeops); 218 219 static struct vop_vector newnfs_fifoops_nosig = { 220 .vop_default = &fifo_specops, 221 .vop_access = nfsspec_access, 222 .vop_close = nfsfifo_close, 223 .vop_fsync = nfs_fsync, 224 .vop_getattr = nfs_getattr, 225 .vop_inactive = ncl_inactive, 226 .vop_pathconf = nfs_pathconf, 227 .vop_print = nfs_print, 228 .vop_read = nfsfifo_read, 229 .vop_reclaim = ncl_reclaim, 230 .vop_setattr = nfs_setattr, 231 .vop_write = nfsfifo_write, 232 }; 233 VFS_VOP_VECTOR_REGISTER(newnfs_fifoops_nosig); 234 235 static int 236 nfs_fifoops_bypass(struct vop_generic_args *a) 237 { 238 239 return (vop_sigdefer(&newnfs_fifoops_nosig, a)); 240 } 241 242 struct vop_vector newnfs_fifoops = { 243 .vop_default = &default_vnodeops, 244 .vop_bypass = nfs_fifoops_bypass, 245 }; 246 VFS_VOP_VECTOR_REGISTER(newnfs_fifoops); 247 248 static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, 249 struct componentname *cnp, struct vattr *vap); 250 static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 251 int namelen, struct ucred *cred, struct thread *td); 252 static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, 253 char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp, 254 char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td); 255 static int nfs_renameit(struct vnode *sdvp, struct vnode *svp, 256 struct componentname *scnp, struct sillyrename *sp); 257 258 /* 259 * Global variables 260 */ 261 SYSCTL_DECL(_vfs_nfs); 262 263 static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; 264 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 265 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); 266 267 static int nfs_prime_access_cache = 0; 268 SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW, 269 &nfs_prime_access_cache, 0, 270 "Prime NFS ACCESS cache when fetching attributes"); 271 272 static int newnfs_commit_on_close = 0; 273 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_on_close, CTLFLAG_RW, 274 &newnfs_commit_on_close, 0, "write+commit on close, else only write"); 275 276 static int nfs_clean_pages_on_close = 1; 277 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, 278 &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); 279 280 int newnfs_directio_enable = 0; 281 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, 282 &newnfs_directio_enable, 0, "Enable NFS directio"); 283 284 int nfs_keep_dirty_on_error; 285 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_keep_dirty_on_error, CTLFLAG_RW, 286 &nfs_keep_dirty_on_error, 0, "Retry pageout if error returned"); 287 288 /* 289 * This sysctl allows other processes to mmap a file that has been opened 290 * O_DIRECT by a process. In general, having processes mmap the file while 291 * Direct IO is in progress can lead to Data Inconsistencies. But, we allow 292 * this by default to prevent DoS attacks - to prevent a malicious user from 293 * opening up files O_DIRECT preventing other users from mmap'ing these 294 * files. "Protected" environments where stricter consistency guarantees are 295 * required can disable this knob. The process that opened the file O_DIRECT 296 * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not 297 * meaningful. 298 */ 299 int newnfs_directio_allow_mmap = 1; 300 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, 301 &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); 302 303 static uint64_t nfs_maxalloclen = 64 * 1024 * 1024; 304 SYSCTL_U64(_vfs_nfs, OID_AUTO, maxalloclen, CTLFLAG_RW, 305 &nfs_maxalloclen, 0, "NFS max allocate/deallocate length"); 306 307 #define NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY \ 308 | NFSACCESS_EXTEND | NFSACCESS_EXECUTE \ 309 | NFSACCESS_DELETE | NFSACCESS_LOOKUP) 310 311 /* 312 * SMP Locking Note : 313 * The list of locks after the description of the lock is the ordering 314 * of other locks acquired with the lock held. 315 * np->n_mtx : Protects the fields in the nfsnode. 316 VM Object Lock 317 VI_MTX (acquired indirectly) 318 * nmp->nm_mtx : Protects the fields in the nfsmount. 319 rep->r_mtx 320 * ncl_iod_mutex : Global lock, protects shared nfsiod state. 321 * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. 322 nmp->nm_mtx 323 rep->r_mtx 324 * rep->r_mtx : Protects the fields in an nfsreq. 325 */ 326 327 static int 328 nfs_lock(struct vop_lock1_args *ap) 329 { 330 struct vnode *vp; 331 struct nfsnode *np; 332 u_quad_t nsize; 333 int error, lktype; 334 bool onfault; 335 336 vp = ap->a_vp; 337 lktype = ap->a_flags & LK_TYPE_MASK; 338 error = VOP_LOCK1_APV(&default_vnodeops, ap); 339 if (error != 0 || vp->v_op != &newnfs_vnodeops) 340 return (error); 341 np = VTONFS(vp); 342 if (np == NULL) 343 return (0); 344 NFSLOCKNODE(np); 345 if ((np->n_flag & NVNSETSZSKIP) == 0 || (lktype != LK_SHARED && 346 lktype != LK_EXCLUSIVE && lktype != LK_UPGRADE && 347 lktype != LK_TRYUPGRADE)) { 348 NFSUNLOCKNODE(np); 349 return (0); 350 } 351 onfault = (ap->a_flags & LK_EATTR_MASK) == LK_NOWAIT && 352 (ap->a_flags & LK_INIT_MASK) == LK_CANRECURSE && 353 (lktype == LK_SHARED || lktype == LK_EXCLUSIVE); 354 if (onfault && vp->v_vnlock->lk_recurse == 0) { 355 /* 356 * Force retry in vm_fault(), to make the lock request 357 * sleepable, which allows us to piggy-back the 358 * sleepable call to vnode_pager_setsize(). 359 */ 360 NFSUNLOCKNODE(np); 361 VOP_UNLOCK(vp); 362 return (EBUSY); 363 } 364 if ((ap->a_flags & LK_NOWAIT) != 0 || 365 (lktype == LK_SHARED && vp->v_vnlock->lk_recurse > 0)) { 366 NFSUNLOCKNODE(np); 367 return (0); 368 } 369 if (lktype == LK_SHARED) { 370 NFSUNLOCKNODE(np); 371 VOP_UNLOCK(vp); 372 ap->a_flags &= ~(LK_TYPE_MASK | LK_INTERLOCK); 373 ap->a_flags |= LK_EXCLUSIVE; 374 error = VOP_LOCK1_APV(&default_vnodeops, ap); 375 if (error != 0 || vp->v_op != &newnfs_vnodeops) 376 return (error); 377 if (vp->v_data == NULL) 378 goto downgrade; 379 MPASS(vp->v_data == np); 380 NFSLOCKNODE(np); 381 if ((np->n_flag & NVNSETSZSKIP) == 0) { 382 NFSUNLOCKNODE(np); 383 goto downgrade; 384 } 385 } 386 np->n_flag &= ~NVNSETSZSKIP; 387 nsize = np->n_size; 388 NFSUNLOCKNODE(np); 389 vnode_pager_setsize(vp, nsize); 390 downgrade: 391 if (lktype == LK_SHARED) { 392 ap->a_flags &= ~(LK_TYPE_MASK | LK_INTERLOCK); 393 ap->a_flags |= LK_DOWNGRADE; 394 (void)VOP_LOCK1_APV(&default_vnodeops, ap); 395 } 396 return (0); 397 } 398 399 static int 400 nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td, 401 struct ucred *cred, u_int32_t *retmode) 402 { 403 int error = 0, attrflag, i, lrupos; 404 u_int32_t rmode; 405 struct nfsnode *np = VTONFS(vp); 406 struct nfsvattr nfsva; 407 408 error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag, 409 &rmode); 410 if (attrflag) 411 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 412 if (!error) { 413 lrupos = 0; 414 NFSLOCKNODE(np); 415 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 416 if (np->n_accesscache[i].uid == cred->cr_uid) { 417 np->n_accesscache[i].mode = rmode; 418 np->n_accesscache[i].stamp = time_second; 419 break; 420 } 421 if (i > 0 && np->n_accesscache[i].stamp < 422 np->n_accesscache[lrupos].stamp) 423 lrupos = i; 424 } 425 if (i == NFS_ACCESSCACHESIZE) { 426 np->n_accesscache[lrupos].uid = cred->cr_uid; 427 np->n_accesscache[lrupos].mode = rmode; 428 np->n_accesscache[lrupos].stamp = time_second; 429 } 430 NFSUNLOCKNODE(np); 431 if (retmode != NULL) 432 *retmode = rmode; 433 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0); 434 } else if (NFS_ISV4(vp)) { 435 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 436 } 437 #ifdef KDTRACE_HOOKS 438 if (error != 0) 439 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0, 440 error); 441 #endif 442 return (error); 443 } 444 445 /* 446 * nfs access vnode op. 447 * For nfs version 2, just return ok. File accesses may fail later. 448 * For nfs version 3, use the access rpc to check accessibility. If file modes 449 * are changed on the server, accesses might still fail later. 450 */ 451 static int 452 nfs_access(struct vop_access_args *ap) 453 { 454 struct vnode *vp = ap->a_vp; 455 int error = 0, i, gotahit; 456 u_int32_t mode, wmode, rmode; 457 int v34 = NFS_ISV34(vp); 458 struct nfsnode *np = VTONFS(vp); 459 460 /* 461 * Disallow write attempts on filesystems mounted read-only; 462 * unless the file is a socket, fifo, or a block or character 463 * device resident on the filesystem. 464 */ 465 if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS | 466 VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL | 467 VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) { 468 switch (vp->v_type) { 469 case VREG: 470 case VDIR: 471 case VLNK: 472 return (EROFS); 473 default: 474 break; 475 } 476 } 477 /* 478 * For nfs v3 or v4, check to see if we have done this recently, and if 479 * so return our cached result instead of making an ACCESS call. 480 * If not, do an access rpc, otherwise you are stuck emulating 481 * ufs_access() locally using the vattr. This may not be correct, 482 * since the server may apply other access criteria such as 483 * client uid-->server uid mapping that we do not know about. 484 */ 485 if (v34) { 486 if (ap->a_accmode & VREAD) 487 mode = NFSACCESS_READ; 488 else 489 mode = 0; 490 if (vp->v_type != VDIR) { 491 if (ap->a_accmode & VWRITE) 492 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 493 if (ap->a_accmode & VAPPEND) 494 mode |= NFSACCESS_EXTEND; 495 if (ap->a_accmode & VEXEC) 496 mode |= NFSACCESS_EXECUTE; 497 if (ap->a_accmode & VDELETE) 498 mode |= NFSACCESS_DELETE; 499 } else { 500 if (ap->a_accmode & VWRITE) 501 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 502 if (ap->a_accmode & VAPPEND) 503 mode |= NFSACCESS_EXTEND; 504 if (ap->a_accmode & VEXEC) 505 mode |= NFSACCESS_LOOKUP; 506 if (ap->a_accmode & VDELETE) 507 mode |= NFSACCESS_DELETE; 508 if (ap->a_accmode & VDELETE_CHILD) 509 mode |= NFSACCESS_MODIFY; 510 } 511 /* XXX safety belt, only make blanket request if caching */ 512 if (nfsaccess_cache_timeout > 0) { 513 wmode = NFSACCESS_READ | NFSACCESS_MODIFY | 514 NFSACCESS_EXTEND | NFSACCESS_EXECUTE | 515 NFSACCESS_DELETE | NFSACCESS_LOOKUP; 516 } else { 517 wmode = mode; 518 } 519 520 /* 521 * Does our cached result allow us to give a definite yes to 522 * this request? 523 */ 524 gotahit = 0; 525 NFSLOCKNODE(np); 526 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 527 if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) { 528 if (time_second < (np->n_accesscache[i].stamp 529 + nfsaccess_cache_timeout) && 530 (np->n_accesscache[i].mode & mode) == mode) { 531 NFSINCRGLOBAL(nfsstatsv1.accesscache_hits); 532 gotahit = 1; 533 } 534 break; 535 } 536 } 537 NFSUNLOCKNODE(np); 538 #ifdef KDTRACE_HOOKS 539 if (gotahit != 0) 540 KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, 541 ap->a_cred->cr_uid, mode); 542 else 543 KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, 544 ap->a_cred->cr_uid, mode); 545 #endif 546 if (gotahit == 0) { 547 /* 548 * Either a no, or a don't know. Go to the wire. 549 */ 550 NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); 551 error = nfs34_access_otw(vp, wmode, ap->a_td, 552 ap->a_cred, &rmode); 553 if (!error && 554 (rmode & mode) != mode) 555 error = EACCES; 556 } 557 return (error); 558 } else { 559 if ((error = nfsspec_access(ap)) != 0) { 560 return (error); 561 } 562 /* 563 * Attempt to prevent a mapped root from accessing a file 564 * which it shouldn't. We try to read a byte from the file 565 * if the user is root and the file is not zero length. 566 * After calling nfsspec_access, we should have the correct 567 * file size cached. 568 */ 569 NFSLOCKNODE(np); 570 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) 571 && VTONFS(vp)->n_size > 0) { 572 struct iovec aiov; 573 struct uio auio; 574 char buf[1]; 575 576 NFSUNLOCKNODE(np); 577 aiov.iov_base = buf; 578 aiov.iov_len = 1; 579 auio.uio_iov = &aiov; 580 auio.uio_iovcnt = 1; 581 auio.uio_offset = 0; 582 auio.uio_resid = 1; 583 auio.uio_segflg = UIO_SYSSPACE; 584 auio.uio_rw = UIO_READ; 585 auio.uio_td = ap->a_td; 586 587 if (vp->v_type == VREG) 588 error = ncl_readrpc(vp, &auio, ap->a_cred); 589 else if (vp->v_type == VDIR) { 590 char* bp; 591 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); 592 aiov.iov_base = bp; 593 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; 594 error = ncl_readdirrpc(vp, &auio, ap->a_cred, 595 ap->a_td); 596 free(bp, M_TEMP); 597 } else if (vp->v_type == VLNK) 598 error = ncl_readlinkrpc(vp, &auio, ap->a_cred); 599 else 600 error = EACCES; 601 } else 602 NFSUNLOCKNODE(np); 603 return (error); 604 } 605 } 606 607 /* 608 * nfs open vnode op 609 * Check to see if the type is ok 610 * and that deletion is not in progress. 611 * For paged in text files, you will need to flush the page cache 612 * if consistency is lost. 613 */ 614 /* ARGSUSED */ 615 static int 616 nfs_open(struct vop_open_args *ap) 617 { 618 struct vnode *vp = ap->a_vp; 619 struct nfsnode *np = VTONFS(vp); 620 struct vattr vattr; 621 int error; 622 int fmode = ap->a_mode; 623 struct ucred *cred; 624 vm_object_t obj; 625 626 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) 627 return (EOPNOTSUPP); 628 629 /* 630 * For NFSv4, we need to do the Open Op before cache validation, 631 * so that we conform to RFC3530 Sec. 9.3.1. 632 */ 633 if (NFS_ISV4(vp)) { 634 error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td); 635 if (error) { 636 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 637 (gid_t)0); 638 return (error); 639 } 640 } 641 642 /* 643 * Now, if this Open will be doing reading, re-validate/flush the 644 * cache, so that Close/Open coherency is maintained. 645 */ 646 NFSLOCKNODE(np); 647 if (np->n_flag & NMODIFIED) { 648 NFSUNLOCKNODE(np); 649 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 650 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 651 if (VN_IS_DOOMED(vp)) 652 return (EBADF); 653 } 654 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 655 if (error == EINTR || error == EIO) { 656 if (NFS_ISV4(vp)) 657 (void) nfsrpc_close(vp, 0, ap->a_td); 658 return (error); 659 } 660 NFSLOCKNODE(np); 661 np->n_attrstamp = 0; 662 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 663 if (vp->v_type == VDIR) 664 np->n_direofoffset = 0; 665 NFSUNLOCKNODE(np); 666 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 667 if (error) { 668 if (NFS_ISV4(vp)) 669 (void) nfsrpc_close(vp, 0, ap->a_td); 670 return (error); 671 } 672 NFSLOCKNODE(np); 673 np->n_mtime = vattr.va_mtime; 674 if (NFS_ISV4(vp)) 675 np->n_change = vattr.va_filerev; 676 } else { 677 NFSUNLOCKNODE(np); 678 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 679 if (error) { 680 if (NFS_ISV4(vp)) 681 (void) nfsrpc_close(vp, 0, ap->a_td); 682 return (error); 683 } 684 NFSLOCKNODE(np); 685 if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) || 686 NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 687 if (vp->v_type == VDIR) 688 np->n_direofoffset = 0; 689 NFSUNLOCKNODE(np); 690 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 691 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 692 if (VN_IS_DOOMED(vp)) 693 return (EBADF); 694 } 695 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 696 if (error == EINTR || error == EIO) { 697 if (NFS_ISV4(vp)) 698 (void) nfsrpc_close(vp, 0, ap->a_td); 699 return (error); 700 } 701 NFSLOCKNODE(np); 702 np->n_mtime = vattr.va_mtime; 703 if (NFS_ISV4(vp)) 704 np->n_change = vattr.va_filerev; 705 } 706 } 707 708 /* 709 * If the object has >= 1 O_DIRECT active opens, we disable caching. 710 */ 711 if (newnfs_directio_enable && (fmode & O_DIRECT) && 712 (vp->v_type == VREG)) { 713 if (np->n_directio_opens == 0) { 714 NFSUNLOCKNODE(np); 715 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 716 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 717 if (VN_IS_DOOMED(vp)) 718 return (EBADF); 719 } 720 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 721 if (error) { 722 if (NFS_ISV4(vp)) 723 (void) nfsrpc_close(vp, 0, ap->a_td); 724 return (error); 725 } 726 NFSLOCKNODE(np); 727 np->n_flag |= NNONCACHE; 728 } 729 np->n_directio_opens++; 730 } 731 732 /* If opened for writing via NFSv4.1 or later, mark that for pNFS. */ 733 if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0) 734 np->n_flag |= NWRITEOPENED; 735 736 /* 737 * If this is an open for writing, capture a reference to the 738 * credentials, so they can be used by ncl_putpages(). Using 739 * these write credentials is preferable to the credentials of 740 * whatever thread happens to be doing the VOP_PUTPAGES() since 741 * the write RPCs are less likely to fail with EACCES. 742 */ 743 if ((fmode & FWRITE) != 0) { 744 cred = np->n_writecred; 745 np->n_writecred = crhold(ap->a_cred); 746 } else 747 cred = NULL; 748 NFSUNLOCKNODE(np); 749 750 if (cred != NULL) 751 crfree(cred); 752 vnode_create_vobject(vp, vattr.va_size, ap->a_td); 753 754 /* 755 * If the text file has been mmap'd, flush any dirty pages to the 756 * buffer cache and then... 757 * Make sure all writes are pushed to the NFS server. If this is not 758 * done, the modify time of the file can change while the text 759 * file is being executed. This will cause the process that is 760 * executing the text file to be terminated. 761 */ 762 if (vp->v_writecount <= -1) { 763 if ((obj = vp->v_object) != NULL && 764 vm_object_mightbedirty(obj)) { 765 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 766 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 767 if (VN_IS_DOOMED(vp)) 768 return (EBADF); 769 } 770 vnode_pager_clean_sync(vp); 771 } 772 773 /* Now, flush the buffer cache. */ 774 ncl_flush(vp, MNT_WAIT, curthread, 0, 0); 775 776 /* And, finally, make sure that n_mtime is up to date. */ 777 np = VTONFS(vp); 778 NFSLOCKNODE(np); 779 np->n_mtime = np->n_vattr.na_mtime; 780 NFSUNLOCKNODE(np); 781 } 782 return (0); 783 } 784 785 /* 786 * nfs close vnode op 787 * What an NFS client should do upon close after writing is a debatable issue. 788 * Most NFS clients push delayed writes to the server upon close, basically for 789 * two reasons: 790 * 1 - So that any write errors may be reported back to the client process 791 * doing the close system call. By far the two most likely errors are 792 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. 793 * 2 - To put a worst case upper bound on cache inconsistency between 794 * multiple clients for the file. 795 * There is also a consistency problem for Version 2 of the protocol w.r.t. 796 * not being able to tell if other clients are writing a file concurrently, 797 * since there is no way of knowing if the changed modify time in the reply 798 * is only due to the write for this client. 799 * (NFS Version 3 provides weak cache consistency data in the reply that 800 * should be sufficient to detect and handle this case.) 801 * 802 * The current code does the following: 803 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers 804 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate 805 * or commit them (this satisfies 1 and 2 except for the 806 * case where the server crashes after this close but 807 * before the commit RPC, which is felt to be "good 808 * enough". Changing the last argument to ncl_flush() to 809 * a 1 would force a commit operation, if it is felt a 810 * commit is necessary now. 811 * for NFS Version 4 - flush the dirty buffers and commit them, if 812 * nfscl_mustflush() says this is necessary. 813 * It is necessary if there is no write delegation held, 814 * in order to satisfy open/close coherency. 815 * If the file isn't cached on local stable storage, 816 * it may be necessary in order to detect "out of space" 817 * errors from the server, if the write delegation 818 * issued by the server doesn't allow the file to grow. 819 */ 820 /* ARGSUSED */ 821 static int 822 nfs_close(struct vop_close_args *ap) 823 { 824 struct vnode *vp = ap->a_vp; 825 struct nfsnode *np = VTONFS(vp); 826 struct nfsvattr nfsva; 827 struct ucred *cred; 828 int error = 0, ret, localcred = 0; 829 int fmode = ap->a_fflag; 830 831 if (NFSCL_FORCEDISM(vp->v_mount)) 832 return (0); 833 /* 834 * During shutdown, a_cred isn't valid, so just use root. 835 */ 836 if (ap->a_cred == NOCRED) { 837 cred = newnfs_getcred(); 838 localcred = 1; 839 } else { 840 cred = ap->a_cred; 841 } 842 if (vp->v_type == VREG) { 843 /* 844 * Examine and clean dirty pages, regardless of NMODIFIED. 845 * This closes a major hole in close-to-open consistency. 846 * We want to push out all dirty pages (and buffers) on 847 * close, regardless of whether they were dirtied by 848 * mmap'ed writes or via write(). 849 */ 850 if (nfs_clean_pages_on_close && vp->v_object) { 851 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 852 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 853 if (VN_IS_DOOMED(vp) && ap->a_fflag != FNONBLOCK) 854 return (EBADF); 855 } 856 vnode_pager_clean_async(vp); 857 } 858 NFSLOCKNODE(np); 859 if (np->n_flag & NMODIFIED) { 860 NFSUNLOCKNODE(np); 861 if (NFS_ISV3(vp)) { 862 /* 863 * Under NFSv3 we have dirty buffers to dispose of. We 864 * must flush them to the NFS server. We have the option 865 * of waiting all the way through the commit rpc or just 866 * waiting for the initial write. The default is to only 867 * wait through the initial write so the data is in the 868 * server's cache, which is roughly similar to the state 869 * a standard disk subsystem leaves the file in on close(). 870 * 871 * We cannot clear the NMODIFIED bit in np->n_flag due to 872 * potential races with other processes, and certainly 873 * cannot clear it if we don't commit. 874 * These races occur when there is no longer the old 875 * traditional vnode locking implemented for Vnode Ops. 876 */ 877 int cm = newnfs_commit_on_close ? 1 : 0; 878 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 879 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 880 if (VN_IS_DOOMED(vp) && ap->a_fflag != FNONBLOCK) 881 return (EBADF); 882 } 883 error = ncl_flush(vp, MNT_WAIT, ap->a_td, cm, 0); 884 /* np->n_flag &= ~NMODIFIED; */ 885 } else if (NFS_ISV4(vp)) { 886 if (nfscl_mustflush(vp) != 0) { 887 int cm = newnfs_commit_on_close ? 1 : 0; 888 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 889 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 890 if (VN_IS_DOOMED(vp) && ap->a_fflag != 891 FNONBLOCK) 892 return (EBADF); 893 } 894 error = ncl_flush(vp, MNT_WAIT, ap->a_td, 895 cm, 0); 896 /* 897 * as above w.r.t races when clearing 898 * NMODIFIED. 899 * np->n_flag &= ~NMODIFIED; 900 */ 901 } 902 } else { 903 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 904 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 905 if (VN_IS_DOOMED(vp) && ap->a_fflag != 906 FNONBLOCK) 907 return (EBADF); 908 } 909 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 910 } 911 NFSLOCKNODE(np); 912 } 913 /* 914 * Invalidate the attribute cache in all cases. 915 * An open is going to fetch fresh attrs any way, other procs 916 * on this node that have file open will be forced to do an 917 * otw attr fetch, but this is safe. 918 * --> A user found that their RPC count dropped by 20% when 919 * this was commented out and I can't see any requirement 920 * for it, so I've disabled it when negative lookups are 921 * enabled. (What does this have to do with negative lookup 922 * caching? Well nothing, except it was reported by the 923 * same user that needed negative lookup caching and I wanted 924 * there to be a way to disable it to see if it 925 * is the cause of some caching/coherency issue that might 926 * crop up.) 927 */ 928 if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) { 929 np->n_attrstamp = 0; 930 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 931 } 932 if (np->n_flag & NWRITEERR) { 933 np->n_flag &= ~NWRITEERR; 934 error = np->n_error; 935 } 936 NFSUNLOCKNODE(np); 937 } 938 939 if (NFS_ISV4(vp)) { 940 /* 941 * Get attributes so "change" is up to date. 942 */ 943 if (error == 0 && nfscl_nodeleg(vp, 0) != 0 && 944 vp->v_type == VREG && 945 (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) == 0) { 946 ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva); 947 if (!ret) { 948 np->n_change = nfsva.na_filerev; 949 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 950 0, 0); 951 } 952 } 953 954 /* 955 * and do the close. 956 */ 957 ret = nfsrpc_close(vp, 0, ap->a_td); 958 if (!error && ret) 959 error = ret; 960 if (error) 961 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 962 (gid_t)0); 963 } 964 if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 965 NFSLOCKNODE(np); 966 KASSERT((np->n_directio_opens > 0), 967 ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); 968 np->n_directio_opens--; 969 if (np->n_directio_opens == 0) 970 np->n_flag &= ~NNONCACHE; 971 NFSUNLOCKNODE(np); 972 } 973 if (localcred) 974 NFSFREECRED(cred); 975 return (error); 976 } 977 978 /* 979 * nfs getattr call from vfs. 980 */ 981 static int 982 nfs_getattr(struct vop_getattr_args *ap) 983 { 984 struct vnode *vp = ap->a_vp; 985 struct thread *td = curthread; /* XXX */ 986 struct nfsnode *np = VTONFS(vp); 987 int error = 0; 988 struct nfsvattr nfsva; 989 struct vattr *vap = ap->a_vap; 990 struct vattr vattr; 991 struct nfsmount *nmp; 992 993 nmp = VFSTONFS(vp->v_mount); 994 /* 995 * Update local times for special files. 996 */ 997 NFSLOCKNODE(np); 998 if (np->n_flag & (NACC | NUPD)) 999 np->n_flag |= NCHG; 1000 NFSUNLOCKNODE(np); 1001 /* 1002 * First look in the cache. 1003 * For "syskrb5" mounts, nm_fhsize might still be zero and 1004 * cached attributes should be ignored. 1005 */ 1006 if (nmp->nm_fhsize > 0 && ncl_getattrcache(vp, &vattr) == 0) { 1007 ncl_copy_vattr(vap, &vattr); 1008 1009 /* 1010 * Get the local modify time for the case of a write 1011 * delegation. 1012 */ 1013 nfscl_deleggetmodtime(vp, &vap->va_mtime); 1014 return (0); 1015 } 1016 1017 if (NFS_ISV34(vp) && nfs_prime_access_cache && 1018 nfsaccess_cache_timeout > 0) { 1019 NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); 1020 nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL); 1021 if (ncl_getattrcache(vp, ap->a_vap) == 0) { 1022 nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime); 1023 return (0); 1024 } 1025 } 1026 error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva); 1027 if (!error) 1028 error = nfscl_loadattrcache(&vp, &nfsva, vap, 0, 0); 1029 if (!error) { 1030 /* 1031 * Get the local modify time for the case of a write 1032 * delegation. 1033 */ 1034 nfscl_deleggetmodtime(vp, &vap->va_mtime); 1035 } else if (NFS_ISV4(vp)) { 1036 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 1037 } 1038 return (error); 1039 } 1040 1041 /* 1042 * nfs setattr call. 1043 */ 1044 static int 1045 nfs_setattr(struct vop_setattr_args *ap) 1046 { 1047 struct vnode *vp = ap->a_vp; 1048 struct nfsnode *np = VTONFS(vp); 1049 struct thread *td = curthread; /* XXX */ 1050 struct vattr *vap = ap->a_vap; 1051 int error = 0; 1052 u_quad_t tsize; 1053 struct timespec ts; 1054 1055 #ifndef nolint 1056 tsize = (u_quad_t)0; 1057 #endif 1058 1059 /* 1060 * Setting of flags and marking of atimes are not supported. 1061 */ 1062 if (vap->va_flags != VNOVAL) 1063 return (EOPNOTSUPP); 1064 1065 /* 1066 * Disallow write attempts if the filesystem is mounted read-only. 1067 */ 1068 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 1069 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 1070 vap->va_mtime.tv_sec != VNOVAL || 1071 vap->va_birthtime.tv_sec != VNOVAL || 1072 vap->va_mode != (mode_t)VNOVAL) && 1073 (vp->v_mount->mnt_flag & MNT_RDONLY)) 1074 return (EROFS); 1075 if (vap->va_size != VNOVAL) { 1076 switch (vp->v_type) { 1077 case VDIR: 1078 return (EISDIR); 1079 case VCHR: 1080 case VBLK: 1081 case VSOCK: 1082 case VFIFO: 1083 if (vap->va_mtime.tv_sec == VNOVAL && 1084 vap->va_atime.tv_sec == VNOVAL && 1085 vap->va_birthtime.tv_sec == VNOVAL && 1086 vap->va_mode == (mode_t)VNOVAL && 1087 vap->va_uid == (uid_t)VNOVAL && 1088 vap->va_gid == (gid_t)VNOVAL) 1089 return (0); 1090 vap->va_size = VNOVAL; 1091 break; 1092 default: 1093 /* 1094 * Disallow write attempts if the filesystem is 1095 * mounted read-only. 1096 */ 1097 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1098 return (EROFS); 1099 /* 1100 * We run vnode_pager_setsize() early (why?), 1101 * we must set np->n_size now to avoid vinvalbuf 1102 * V_SAVE races that might setsize a lower 1103 * value. 1104 */ 1105 NFSLOCKNODE(np); 1106 tsize = np->n_size; 1107 NFSUNLOCKNODE(np); 1108 error = ncl_meta_setsize(vp, td, vap->va_size); 1109 NFSLOCKNODE(np); 1110 if (np->n_flag & NMODIFIED) { 1111 tsize = np->n_size; 1112 NFSUNLOCKNODE(np); 1113 error = ncl_vinvalbuf(vp, vap->va_size == 0 ? 1114 0 : V_SAVE, td, 1); 1115 if (error != 0) { 1116 vnode_pager_setsize(vp, tsize); 1117 return (error); 1118 } 1119 /* 1120 * Call nfscl_delegmodtime() to set the modify time 1121 * locally, as required. 1122 */ 1123 nfscl_delegmodtime(vp); 1124 } else 1125 NFSUNLOCKNODE(np); 1126 /* 1127 * np->n_size has already been set to vap->va_size 1128 * in ncl_meta_setsize(). We must set it again since 1129 * nfs_loadattrcache() could be called through 1130 * ncl_meta_setsize() and could modify np->n_size. 1131 */ 1132 NFSLOCKNODE(np); 1133 np->n_vattr.na_size = np->n_size = vap->va_size; 1134 NFSUNLOCKNODE(np); 1135 } 1136 } else { 1137 NFSLOCKNODE(np); 1138 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 1139 (np->n_flag & NMODIFIED) && vp->v_type == VREG) { 1140 NFSUNLOCKNODE(np); 1141 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 1142 if (error == EINTR || error == EIO) 1143 return (error); 1144 } else 1145 NFSUNLOCKNODE(np); 1146 } 1147 error = nfs_setattrrpc(vp, vap, ap->a_cred, td); 1148 if (vap->va_size != VNOVAL) { 1149 if (error == 0) { 1150 nanouptime(&ts); 1151 NFSLOCKNODE(np); 1152 np->n_localmodtime = ts; 1153 NFSUNLOCKNODE(np); 1154 } else { 1155 NFSLOCKNODE(np); 1156 np->n_size = np->n_vattr.na_size = tsize; 1157 vnode_pager_setsize(vp, tsize); 1158 NFSUNLOCKNODE(np); 1159 } 1160 } 1161 return (error); 1162 } 1163 1164 /* 1165 * Do an nfs setattr rpc. 1166 */ 1167 static int 1168 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred, 1169 struct thread *td) 1170 { 1171 struct nfsnode *np = VTONFS(vp); 1172 int error, ret, attrflag, i; 1173 struct nfsvattr nfsva; 1174 1175 if (NFS_ISV34(vp)) { 1176 NFSLOCKNODE(np); 1177 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) 1178 np->n_accesscache[i].stamp = 0; 1179 np->n_flag |= NDELEGMOD; 1180 NFSUNLOCKNODE(np); 1181 KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); 1182 } 1183 error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag); 1184 if (attrflag) { 1185 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 1186 if (ret && !error) 1187 error = ret; 1188 } 1189 if (error && NFS_ISV4(vp)) 1190 error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid); 1191 return (error); 1192 } 1193 1194 /* 1195 * nfs lookup call, one step at a time... 1196 * First look in cache 1197 * If not found, unlock the directory nfsnode and do the rpc 1198 */ 1199 static int 1200 nfs_lookup(struct vop_lookup_args *ap) 1201 { 1202 struct componentname *cnp = ap->a_cnp; 1203 struct vnode *dvp = ap->a_dvp; 1204 struct vnode **vpp = ap->a_vpp; 1205 struct mount *mp = dvp->v_mount; 1206 int flags = cnp->cn_flags; 1207 struct vnode *newvp; 1208 struct nfsmount *nmp; 1209 struct nfsnode *np, *newnp; 1210 int error = 0, attrflag, dattrflag, ltype, ncticks; 1211 struct thread *td = curthread; 1212 struct nfsfh *nfhp; 1213 struct nfsvattr dnfsva, nfsva; 1214 struct vattr vattr; 1215 struct timespec nctime, ts; 1216 uint32_t openmode; 1217 1218 *vpp = NULLVP; 1219 if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && 1220 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 1221 return (EROFS); 1222 if (dvp->v_type != VDIR) 1223 return (ENOTDIR); 1224 nmp = VFSTONFS(mp); 1225 np = VTONFS(dvp); 1226 1227 /* For NFSv4, wait until any remove is done. */ 1228 NFSLOCKNODE(np); 1229 while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) { 1230 np->n_flag |= NREMOVEWANT; 1231 (void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0); 1232 } 1233 NFSUNLOCKNODE(np); 1234 1235 error = vn_dir_check_exec(dvp, cnp); 1236 if (error != 0) 1237 return (error); 1238 error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks); 1239 if (error > 0 && error != ENOENT) 1240 return (error); 1241 if (error == -1) { 1242 /* 1243 * Lookups of "." are special and always return the 1244 * current directory. cache_lookup() already handles 1245 * associated locking bookkeeping, etc. 1246 */ 1247 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { 1248 return (0); 1249 } 1250 1251 /* 1252 * We only accept a positive hit in the cache if the 1253 * change time of the file matches our cached copy. 1254 * Otherwise, we discard the cache entry and fallback 1255 * to doing a lookup RPC. We also only trust cache 1256 * entries for less than nm_nametimeo seconds. 1257 * 1258 * To better handle stale file handles and attributes, 1259 * clear the attribute cache of this node if it is a 1260 * leaf component, part of an open() call, and not 1261 * locally modified before fetching the attributes. 1262 * This should allow stale file handles to be detected 1263 * here where we can fall back to a LOOKUP RPC to 1264 * recover rather than having nfs_open() detect the 1265 * stale file handle and failing open(2) with ESTALE. 1266 */ 1267 newvp = *vpp; 1268 newnp = VTONFS(newvp); 1269 if (!(nmp->nm_flag & NFSMNT_NOCTO) && 1270 (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && 1271 !(newnp->n_flag & NMODIFIED)) { 1272 NFSLOCKNODE(newnp); 1273 newnp->n_attrstamp = 0; 1274 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); 1275 NFSUNLOCKNODE(newnp); 1276 } 1277 if (nfscl_nodeleg(newvp, 0) == 0 || 1278 ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) && 1279 VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 && 1280 timespeccmp(&vattr.va_ctime, &nctime, ==))) { 1281 NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); 1282 return (0); 1283 } 1284 cache_purge(newvp); 1285 if (dvp != newvp) 1286 vput(newvp); 1287 else 1288 vrele(newvp); 1289 *vpp = NULLVP; 1290 } else if (error == ENOENT) { 1291 if (VN_IS_DOOMED(dvp)) 1292 return (ENOENT); 1293 /* 1294 * We only accept a negative hit in the cache if the 1295 * modification time of the parent directory matches 1296 * the cached copy in the name cache entry. 1297 * Otherwise, we discard all of the negative cache 1298 * entries for this directory. We also only trust 1299 * negative cache entries for up to nm_negnametimeo 1300 * seconds. 1301 */ 1302 if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) && 1303 VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && 1304 timespeccmp(&vattr.va_mtime, &nctime, ==)) { 1305 NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); 1306 return (ENOENT); 1307 } 1308 cache_purge_negative(dvp); 1309 } 1310 1311 openmode = 0; 1312 #if 0 1313 /* 1314 * The use of LookupOpen breaks some builds. It is disabled 1315 * until that is fixed. 1316 */ 1317 /* 1318 * If this an NFSv4.1/4.2 mount using the "oneopenown" mount 1319 * option, it is possible to do the Open operation in the same 1320 * compound as Lookup, so long as delegations are not being 1321 * issued. This saves doing a separate RPC for Open. 1322 * For pnfs, do not do this, since the Open+LayoutGet will 1323 * be needed as a separate RPC. 1324 */ 1325 NFSLOCKMNT(nmp); 1326 if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp) && !NFSHASPNFS(nmp) && 1327 (nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0 && 1328 (!NFSMNT_RDONLY(mp) || (flags & OPENWRITE) == 0) && 1329 (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN)) { 1330 if ((flags & OPENREAD) != 0) 1331 openmode |= NFSV4OPEN_ACCESSREAD; 1332 if ((flags & OPENWRITE) != 0) 1333 openmode |= NFSV4OPEN_ACCESSWRITE; 1334 } 1335 NFSUNLOCKMNT(nmp); 1336 #endif 1337 1338 newvp = NULLVP; 1339 NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses); 1340 nanouptime(&ts); 1341 error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1342 cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1343 openmode); 1344 if (dattrflag) 1345 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 1346 if (error) { 1347 if (newvp != NULLVP) { 1348 vput(newvp); 1349 *vpp = NULLVP; 1350 } 1351 1352 if (error != ENOENT) { 1353 if (NFS_ISV4(dvp)) 1354 error = nfscl_maperr(td, error, (uid_t)0, 1355 (gid_t)0); 1356 return (error); 1357 } 1358 1359 /* The requested file was not found. */ 1360 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && 1361 (flags & ISLASTCN)) { 1362 /* 1363 * XXX: UFS does a full VOP_ACCESS(dvp, 1364 * VWRITE) here instead of just checking 1365 * MNT_RDONLY. 1366 */ 1367 if (mp->mnt_flag & MNT_RDONLY) 1368 return (EROFS); 1369 return (EJUSTRETURN); 1370 } 1371 1372 if ((cnp->cn_flags & MAKEENTRY) != 0 && dattrflag) { 1373 /* 1374 * Cache the modification time of the parent 1375 * directory from the post-op attributes in 1376 * the name cache entry. The negative cache 1377 * entry will be ignored once the directory 1378 * has changed. Don't bother adding the entry 1379 * if the directory has already changed. 1380 */ 1381 NFSLOCKNODE(np); 1382 if (timespeccmp(&np->n_vattr.na_mtime, 1383 &dnfsva.na_mtime, ==)) { 1384 NFSUNLOCKNODE(np); 1385 cache_enter_time(dvp, NULL, cnp, 1386 &dnfsva.na_mtime, NULL); 1387 } else 1388 NFSUNLOCKNODE(np); 1389 } 1390 return (ENOENT); 1391 } 1392 1393 /* 1394 * Handle RENAME case... 1395 */ 1396 if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { 1397 if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1398 free(nfhp, M_NFSFH); 1399 return (EISDIR); 1400 } 1401 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, LK_EXCLUSIVE); 1402 if (error) 1403 return (error); 1404 newvp = NFSTOV(np); 1405 /* 1406 * If n_localmodtime >= time before RPC, then 1407 * a file modification operation, such as 1408 * VOP_SETATTR() of size, has occurred while 1409 * the Lookup RPC and acquisition of the vnode 1410 * happened. As such, the attributes might 1411 * be stale, with possibly an incorrect size. 1412 */ 1413 NFSLOCKNODE(np); 1414 if (timespecisset(&np->n_localmodtime) && 1415 timespeccmp(&np->n_localmodtime, &ts, >=)) { 1416 NFSCL_DEBUG(4, "nfs_lookup: rename localmod " 1417 "stale attributes\n"); 1418 attrflag = 0; 1419 } 1420 NFSUNLOCKNODE(np); 1421 if (attrflag) 1422 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 1423 *vpp = newvp; 1424 return (0); 1425 } 1426 1427 if (flags & ISDOTDOT) { 1428 ltype = NFSVOPISLOCKED(dvp); 1429 error = vfs_busy(mp, MBF_NOWAIT); 1430 if (error != 0) { 1431 vfs_ref(mp); 1432 NFSVOPUNLOCK(dvp); 1433 error = vfs_busy(mp, 0); 1434 NFSVOPLOCK(dvp, ltype | LK_RETRY); 1435 vfs_rel(mp); 1436 if (error == 0 && VN_IS_DOOMED(dvp)) { 1437 vfs_unbusy(mp); 1438 error = ENOENT; 1439 } 1440 if (error != 0) 1441 return (error); 1442 } 1443 NFSVOPUNLOCK(dvp); 1444 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, 1445 cnp->cn_lkflags); 1446 if (error == 0) 1447 newvp = NFSTOV(np); 1448 vfs_unbusy(mp); 1449 if (newvp != dvp) 1450 NFSVOPLOCK(dvp, ltype | LK_RETRY); 1451 if (VN_IS_DOOMED(dvp)) { 1452 if (error == 0) { 1453 if (newvp == dvp) 1454 vrele(newvp); 1455 else 1456 vput(newvp); 1457 } 1458 error = ENOENT; 1459 } 1460 if (error != 0) 1461 return (error); 1462 if (attrflag) 1463 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 1464 } else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1465 free(nfhp, M_NFSFH); 1466 VREF(dvp); 1467 newvp = dvp; 1468 if (attrflag) 1469 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 1470 } else { 1471 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, 1472 cnp->cn_lkflags); 1473 if (error) 1474 return (error); 1475 newvp = NFSTOV(np); 1476 /* 1477 * If n_localmodtime >= time before RPC, then 1478 * a file modification operation, such as 1479 * VOP_SETATTR() of size, has occurred while 1480 * the Lookup RPC and acquisition of the vnode 1481 * happened. As such, the attributes might 1482 * be stale, with possibly an incorrect size. 1483 */ 1484 NFSLOCKNODE(np); 1485 if (timespecisset(&np->n_localmodtime) && 1486 timespeccmp(&np->n_localmodtime, &ts, >=)) { 1487 NFSCL_DEBUG(4, "nfs_lookup: localmod " 1488 "stale attributes\n"); 1489 attrflag = 0; 1490 } 1491 NFSUNLOCKNODE(np); 1492 if (attrflag) 1493 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 1494 else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && 1495 !(np->n_flag & NMODIFIED)) { 1496 /* 1497 * Flush the attribute cache when opening a 1498 * leaf node to ensure that fresh attributes 1499 * are fetched in nfs_open() since we did not 1500 * fetch attributes from the LOOKUP reply. 1501 */ 1502 NFSLOCKNODE(np); 1503 np->n_attrstamp = 0; 1504 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); 1505 NFSUNLOCKNODE(np); 1506 } 1507 } 1508 if ((cnp->cn_flags & MAKEENTRY) && dvp != newvp && 1509 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) && 1510 attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0)) 1511 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, 1512 newvp->v_type != VDIR ? NULL : &dnfsva.na_ctime); 1513 *vpp = newvp; 1514 return (0); 1515 } 1516 1517 /* 1518 * nfs read call. 1519 * Just call ncl_bioread() to do the work. 1520 */ 1521 static int 1522 nfs_read(struct vop_read_args *ap) 1523 { 1524 struct vnode *vp = ap->a_vp; 1525 1526 switch (vp->v_type) { 1527 case VREG: 1528 return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); 1529 case VDIR: 1530 return (EISDIR); 1531 default: 1532 return (EOPNOTSUPP); 1533 } 1534 } 1535 1536 /* 1537 * nfs readlink call 1538 */ 1539 static int 1540 nfs_readlink(struct vop_readlink_args *ap) 1541 { 1542 struct vnode *vp = ap->a_vp; 1543 1544 if (vp->v_type != VLNK) 1545 return (EINVAL); 1546 return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred)); 1547 } 1548 1549 /* 1550 * Do a readlink rpc. 1551 * Called by ncl_doio() from below the buffer cache. 1552 */ 1553 int 1554 ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1555 { 1556 int error, ret, attrflag; 1557 struct nfsvattr nfsva; 1558 1559 error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva, 1560 &attrflag); 1561 if (attrflag) { 1562 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 1563 if (ret && !error) 1564 error = ret; 1565 } 1566 if (error && NFS_ISV4(vp)) 1567 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1568 return (error); 1569 } 1570 1571 /* 1572 * nfs read rpc call 1573 * Ditto above 1574 */ 1575 int 1576 ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1577 { 1578 int error, ret, attrflag; 1579 struct nfsvattr nfsva; 1580 struct nfsmount *nmp; 1581 1582 nmp = VFSTONFS(vp->v_mount); 1583 error = EIO; 1584 attrflag = 0; 1585 if (NFSHASPNFS(nmp)) 1586 error = nfscl_doiods(vp, uiop, NULL, NULL, 1587 NFSV4OPEN_ACCESSREAD, 0, cred, uiop->uio_td); 1588 NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error); 1589 if (error != 0 && error != EFAULT) 1590 error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, 1591 &attrflag); 1592 if (attrflag) { 1593 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 1594 if (ret && !error) 1595 error = ret; 1596 } 1597 if (error && NFS_ISV4(vp)) 1598 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1599 return (error); 1600 } 1601 1602 /* 1603 * nfs write call 1604 */ 1605 int 1606 ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 1607 int *iomode, int *must_commit, int called_from_strategy, int ioflag) 1608 { 1609 struct nfsvattr nfsva; 1610 int error, attrflag, ret; 1611 struct nfsmount *nmp; 1612 1613 nmp = VFSTONFS(vp->v_mount); 1614 error = EIO; 1615 attrflag = 0; 1616 if (NFSHASPNFS(nmp)) 1617 error = nfscl_doiods(vp, uiop, iomode, must_commit, 1618 NFSV4OPEN_ACCESSWRITE, 0, cred, uiop->uio_td); 1619 NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error); 1620 if (error != 0 && error != EFAULT) 1621 error = nfsrpc_write(vp, uiop, iomode, must_commit, cred, 1622 uiop->uio_td, &nfsva, &attrflag, called_from_strategy, 1623 ioflag); 1624 if (attrflag) { 1625 if (VTONFS(vp)->n_flag & ND_NFSV4) 1626 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 1, 1); 1627 else 1628 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 1629 if (ret && !error) 1630 error = ret; 1631 } 1632 if (DOINGASYNC(vp)) 1633 *iomode = NFSWRITE_FILESYNC; 1634 if (error && NFS_ISV4(vp)) 1635 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1636 return (error); 1637 } 1638 1639 /* 1640 * nfs mknod rpc 1641 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the 1642 * mode set to specify the file type and the size field for rdev. 1643 */ 1644 static int 1645 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, 1646 struct vattr *vap) 1647 { 1648 struct nfsvattr nfsva, dnfsva; 1649 struct vnode *newvp = NULL; 1650 struct nfsnode *np = NULL, *dnp; 1651 struct nfsfh *nfhp; 1652 struct vattr vattr; 1653 int error = 0, attrflag, dattrflag; 1654 u_int32_t rdev; 1655 1656 if (vap->va_type == VCHR || vap->va_type == VBLK) 1657 rdev = vap->va_rdev; 1658 else if (vap->va_type == VFIFO || vap->va_type == VSOCK) 1659 rdev = 0xffffffff; 1660 else 1661 return (EOPNOTSUPP); 1662 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1663 return (error); 1664 error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, 1665 rdev, vap->va_type, cnp->cn_cred, curthread, &dnfsva, 1666 &nfsva, &nfhp, &attrflag, &dattrflag); 1667 if (!error) { 1668 if (!nfhp) 1669 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1670 cnp->cn_namelen, cnp->cn_cred, curthread, 1671 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 0); 1672 if (nfhp) 1673 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1674 curthread, &np, LK_EXCLUSIVE); 1675 } 1676 if (dattrflag) 1677 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 1678 if (!error) { 1679 newvp = NFSTOV(np); 1680 if (attrflag != 0) { 1681 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 1682 if (error != 0) 1683 vput(newvp); 1684 } 1685 } 1686 if (!error) { 1687 *vpp = newvp; 1688 } else if (NFS_ISV4(dvp)) { 1689 error = nfscl_maperr(curthread, error, vap->va_uid, 1690 vap->va_gid); 1691 } 1692 dnp = VTONFS(dvp); 1693 NFSLOCKNODE(dnp); 1694 dnp->n_flag |= NMODIFIED; 1695 if (!dattrflag) { 1696 dnp->n_attrstamp = 0; 1697 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1698 } 1699 NFSUNLOCKNODE(dnp); 1700 return (error); 1701 } 1702 1703 /* 1704 * nfs mknod vop 1705 * just call nfs_mknodrpc() to do the work. 1706 */ 1707 /* ARGSUSED */ 1708 static int 1709 nfs_mknod(struct vop_mknod_args *ap) 1710 { 1711 return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); 1712 } 1713 1714 static struct mtx nfs_cverf_mtx; 1715 MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex", 1716 MTX_DEF); 1717 1718 static nfsquad_t 1719 nfs_get_cverf(void) 1720 { 1721 static nfsquad_t cverf; 1722 nfsquad_t ret; 1723 static int cverf_initialized = 0; 1724 1725 mtx_lock(&nfs_cverf_mtx); 1726 if (cverf_initialized == 0) { 1727 cverf.lval[0] = arc4random(); 1728 cverf.lval[1] = arc4random(); 1729 cverf_initialized = 1; 1730 } else 1731 cverf.qval++; 1732 ret = cverf; 1733 mtx_unlock(&nfs_cverf_mtx); 1734 1735 return (ret); 1736 } 1737 1738 /* 1739 * nfs file create call 1740 */ 1741 static int 1742 nfs_create(struct vop_create_args *ap) 1743 { 1744 struct vnode *dvp = ap->a_dvp; 1745 struct vattr *vap = ap->a_vap; 1746 struct componentname *cnp = ap->a_cnp; 1747 struct nfsnode *np = NULL, *dnp; 1748 struct vnode *newvp = NULL; 1749 struct nfsmount *nmp; 1750 struct nfsvattr dnfsva, nfsva; 1751 struct nfsfh *nfhp; 1752 nfsquad_t cverf; 1753 int error = 0, attrflag, dattrflag, fmode = 0; 1754 struct vattr vattr; 1755 1756 /* 1757 * Oops, not for me.. 1758 */ 1759 if (vap->va_type == VSOCK) 1760 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); 1761 1762 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1763 return (error); 1764 if (vap->va_vaflags & VA_EXCLUSIVE) 1765 fmode |= O_EXCL; 1766 dnp = VTONFS(dvp); 1767 nmp = VFSTONFS(dvp->v_mount); 1768 again: 1769 /* For NFSv4, wait until any remove is done. */ 1770 NFSLOCKNODE(dnp); 1771 while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) { 1772 dnp->n_flag |= NREMOVEWANT; 1773 (void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0); 1774 } 1775 NFSUNLOCKNODE(dnp); 1776 1777 cverf = nfs_get_cverf(); 1778 error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1779 vap, cverf, fmode, cnp->cn_cred, curthread, &dnfsva, &nfsva, 1780 &nfhp, &attrflag, &dattrflag); 1781 if (!error) { 1782 if (nfhp == NULL) 1783 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1784 cnp->cn_namelen, cnp->cn_cred, curthread, 1785 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 0); 1786 if (nfhp != NULL) 1787 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1788 curthread, &np, LK_EXCLUSIVE); 1789 } 1790 if (dattrflag) 1791 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 1792 if (!error) { 1793 newvp = NFSTOV(np); 1794 if (attrflag == 0) 1795 error = nfsrpc_getattr(newvp, cnp->cn_cred, curthread, 1796 &nfsva); 1797 if (error == 0) 1798 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 1799 } 1800 if (error) { 1801 if (newvp != NULL) { 1802 vput(newvp); 1803 newvp = NULL; 1804 } 1805 if (NFS_ISV34(dvp) && (fmode & O_EXCL) && 1806 error == NFSERR_NOTSUPP) { 1807 fmode &= ~O_EXCL; 1808 goto again; 1809 } 1810 } else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) { 1811 if (nfscl_checksattr(vap, &nfsva)) { 1812 error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred, 1813 curthread, &nfsva, &attrflag); 1814 if (error && (vap->va_uid != (uid_t)VNOVAL || 1815 vap->va_gid != (gid_t)VNOVAL)) { 1816 /* try again without setting uid/gid */ 1817 vap->va_uid = (uid_t)VNOVAL; 1818 vap->va_gid = (uid_t)VNOVAL; 1819 error = nfsrpc_setattr(newvp, vap, NULL, 1820 cnp->cn_cred, curthread, &nfsva, &attrflag); 1821 } 1822 if (attrflag) 1823 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 1824 0, 1); 1825 if (error != 0) 1826 vput(newvp); 1827 } 1828 } 1829 if (!error) { 1830 if ((cnp->cn_flags & MAKEENTRY) && attrflag) { 1831 if (dvp != newvp) 1832 cache_enter_time(dvp, newvp, cnp, 1833 &nfsva.na_ctime, NULL); 1834 else 1835 printf("nfs_create: bogus NFS server returned " 1836 "the directory as the new file object\n"); 1837 } 1838 *ap->a_vpp = newvp; 1839 } else if (NFS_ISV4(dvp)) { 1840 error = nfscl_maperr(curthread, error, vap->va_uid, 1841 vap->va_gid); 1842 } 1843 NFSLOCKNODE(dnp); 1844 dnp->n_flag |= NMODIFIED; 1845 if (!dattrflag) { 1846 dnp->n_attrstamp = 0; 1847 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1848 } 1849 NFSUNLOCKNODE(dnp); 1850 return (error); 1851 } 1852 1853 /* 1854 * nfs file remove call 1855 * To try and make nfs semantics closer to ufs semantics, a file that has 1856 * other processes using the vnode is renamed instead of removed and then 1857 * removed later on the last close. 1858 * - If v_usecount > 1 1859 * If a rename is not already in the works 1860 * call nfs_sillyrename() to set it up 1861 * else 1862 * do the remove rpc 1863 */ 1864 static int 1865 nfs_remove(struct vop_remove_args *ap) 1866 { 1867 struct vnode *vp = ap->a_vp; 1868 struct vnode *dvp = ap->a_dvp; 1869 struct componentname *cnp = ap->a_cnp; 1870 struct nfsnode *np = VTONFS(vp); 1871 int error = 0; 1872 struct vattr vattr; 1873 1874 KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount")); 1875 if (vp->v_type == VDIR) 1876 error = EPERM; 1877 else if (vrefcnt(vp) == 1 || (np->n_sillyrename && 1878 VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 && 1879 vattr.va_nlink > 1)) { 1880 /* 1881 * Purge the name cache so that the chance of a lookup for 1882 * the name succeeding while the remove is in progress is 1883 * minimized. Without node locking it can still happen, such 1884 * that an I/O op returns ESTALE, but since you get this if 1885 * another host removes the file.. 1886 */ 1887 cache_purge(vp); 1888 /* 1889 * throw away biocache buffers, mainly to avoid 1890 * unnecessary delayed writes later. 1891 */ 1892 error = ncl_vinvalbuf(vp, 0, curthread, 1); 1893 if (error != EINTR && error != EIO) 1894 /* Do the rpc */ 1895 error = nfs_removerpc(dvp, vp, cnp->cn_nameptr, 1896 cnp->cn_namelen, cnp->cn_cred, curthread); 1897 /* 1898 * Kludge City: If the first reply to the remove rpc is lost.. 1899 * the reply to the retransmitted request will be ENOENT 1900 * since the file was in fact removed 1901 * Therefore, we cheat and return success. 1902 */ 1903 if (error == ENOENT) 1904 error = 0; 1905 } else if (!np->n_sillyrename) 1906 error = nfs_sillyrename(dvp, vp, cnp); 1907 NFSLOCKNODE(np); 1908 np->n_attrstamp = 0; 1909 NFSUNLOCKNODE(np); 1910 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 1911 return (error); 1912 } 1913 1914 /* 1915 * nfs file remove rpc called from nfs_inactive 1916 */ 1917 int 1918 ncl_removeit(struct sillyrename *sp, struct vnode *vp) 1919 { 1920 /* 1921 * Make sure that the directory vnode is still valid. 1922 * XXX we should lock sp->s_dvp here. 1923 */ 1924 if (sp->s_dvp->v_type == VBAD) 1925 return (0); 1926 return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen, 1927 sp->s_cred, NULL)); 1928 } 1929 1930 /* 1931 * Nfs remove rpc, called from nfs_remove() and ncl_removeit(). 1932 */ 1933 static int 1934 nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 1935 int namelen, struct ucred *cred, struct thread *td) 1936 { 1937 struct nfsvattr dnfsva; 1938 struct nfsnode *dnp = VTONFS(dvp); 1939 int error = 0, dattrflag; 1940 1941 NFSLOCKNODE(dnp); 1942 dnp->n_flag |= NREMOVEINPROG; 1943 NFSUNLOCKNODE(dnp); 1944 error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva, 1945 &dattrflag); 1946 NFSLOCKNODE(dnp); 1947 if ((dnp->n_flag & NREMOVEWANT)) { 1948 dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG); 1949 NFSUNLOCKNODE(dnp); 1950 wakeup((caddr_t)dnp); 1951 } else { 1952 dnp->n_flag &= ~NREMOVEINPROG; 1953 NFSUNLOCKNODE(dnp); 1954 } 1955 if (dattrflag) 1956 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 1957 NFSLOCKNODE(dnp); 1958 dnp->n_flag |= NMODIFIED; 1959 if (!dattrflag) { 1960 dnp->n_attrstamp = 0; 1961 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1962 } 1963 NFSUNLOCKNODE(dnp); 1964 if (error && NFS_ISV4(dvp)) 1965 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 1966 return (error); 1967 } 1968 1969 /* 1970 * nfs file rename call 1971 */ 1972 static int 1973 nfs_rename(struct vop_rename_args *ap) 1974 { 1975 struct vnode *fvp = ap->a_fvp; 1976 struct vnode *tvp = ap->a_tvp; 1977 struct vnode *fdvp = ap->a_fdvp; 1978 struct vnode *tdvp = ap->a_tdvp; 1979 struct componentname *tcnp = ap->a_tcnp; 1980 struct componentname *fcnp = ap->a_fcnp; 1981 struct nfsnode *fnp = VTONFS(ap->a_fvp); 1982 struct nfsnode *tdnp = VTONFS(ap->a_tdvp); 1983 struct nfsv4node *newv4 = NULL; 1984 int error; 1985 1986 /* Check for cross-device rename */ 1987 if ((fvp->v_mount != tdvp->v_mount) || 1988 (tvp && (fvp->v_mount != tvp->v_mount))) { 1989 error = EXDEV; 1990 goto out; 1991 } 1992 1993 if (fvp == tvp) { 1994 printf("nfs_rename: fvp == tvp (can't happen)\n"); 1995 error = 0; 1996 goto out; 1997 } 1998 if ((error = NFSVOPLOCK(fvp, LK_EXCLUSIVE)) != 0) 1999 goto out; 2000 2001 /* 2002 * We have to flush B_DELWRI data prior to renaming 2003 * the file. If we don't, the delayed-write buffers 2004 * can be flushed out later after the file has gone stale 2005 * under NFSV3. NFSV2 does not have this problem because 2006 * ( as far as I can tell ) it flushes dirty buffers more 2007 * often. 2008 * 2009 * Skip the rename operation if the fsync fails, this can happen 2010 * due to the server's volume being full, when we pushed out data 2011 * that was written back to our cache earlier. Not checking for 2012 * this condition can result in potential (silent) data loss. 2013 */ 2014 error = VOP_FSYNC(fvp, MNT_WAIT, curthread); 2015 NFSVOPUNLOCK(fvp); 2016 if (!error && tvp) 2017 error = VOP_FSYNC(tvp, MNT_WAIT, curthread); 2018 if (error) 2019 goto out; 2020 2021 /* 2022 * If the tvp exists and is in use, sillyrename it before doing the 2023 * rename of the new file over it. 2024 * XXX Can't sillyrename a directory. 2025 */ 2026 if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && 2027 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { 2028 vput(tvp); 2029 tvp = NULL; 2030 } 2031 2032 error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen, 2033 tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, 2034 curthread); 2035 2036 if (error == 0 && NFS_ISV4(tdvp)) { 2037 /* 2038 * For NFSv4, check to see if it is the same name and 2039 * replace the name, if it is different. 2040 */ 2041 newv4 = malloc( 2042 sizeof (struct nfsv4node) + 2043 tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1, 2044 M_NFSV4NODE, M_WAITOK); 2045 NFSLOCKNODE(tdnp); 2046 NFSLOCKNODE(fnp); 2047 if (fnp->n_v4 != NULL && fvp->v_type == VREG && 2048 (fnp->n_v4->n4_namelen != tcnp->cn_namelen || 2049 NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4), 2050 tcnp->cn_namelen) || 2051 tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen || 2052 NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 2053 tdnp->n_fhp->nfh_len))) { 2054 free(fnp->n_v4, M_NFSV4NODE); 2055 fnp->n_v4 = newv4; 2056 newv4 = NULL; 2057 fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len; 2058 fnp->n_v4->n4_namelen = tcnp->cn_namelen; 2059 NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 2060 tdnp->n_fhp->nfh_len); 2061 NFSBCOPY(tcnp->cn_nameptr, 2062 NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen); 2063 } 2064 NFSUNLOCKNODE(tdnp); 2065 NFSUNLOCKNODE(fnp); 2066 if (newv4 != NULL) 2067 free(newv4, M_NFSV4NODE); 2068 } 2069 2070 if (fvp->v_type == VDIR) { 2071 if (tvp != NULL && tvp->v_type == VDIR) 2072 cache_purge(tdvp); 2073 cache_purge(fdvp); 2074 } 2075 2076 out: 2077 if (tdvp == tvp) 2078 vrele(tdvp); 2079 else 2080 vput(tdvp); 2081 if (tvp) 2082 vput(tvp); 2083 vrele(fdvp); 2084 vrele(fvp); 2085 /* 2086 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. 2087 */ 2088 if (error == ENOENT) 2089 error = 0; 2090 return (error); 2091 } 2092 2093 /* 2094 * nfs file rename rpc called from nfs_remove() above 2095 */ 2096 static int 2097 nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp, 2098 struct sillyrename *sp) 2099 { 2100 2101 return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen, 2102 sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred, 2103 curthread)); 2104 } 2105 2106 /* 2107 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). 2108 */ 2109 static int 2110 nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, 2111 int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr, 2112 int tnamelen, struct ucred *cred, struct thread *td) 2113 { 2114 struct nfsvattr fnfsva, tnfsva; 2115 struct nfsnode *fdnp = VTONFS(fdvp); 2116 struct nfsnode *tdnp = VTONFS(tdvp); 2117 int error = 0, fattrflag, tattrflag; 2118 2119 error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp, 2120 tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag, 2121 &tattrflag); 2122 NFSLOCKNODE(fdnp); 2123 fdnp->n_flag |= NMODIFIED; 2124 if (fattrflag != 0) { 2125 NFSUNLOCKNODE(fdnp); 2126 (void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, 0, 1); 2127 } else { 2128 fdnp->n_attrstamp = 0; 2129 NFSUNLOCKNODE(fdnp); 2130 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp); 2131 } 2132 NFSLOCKNODE(tdnp); 2133 tdnp->n_flag |= NMODIFIED; 2134 if (tattrflag != 0) { 2135 NFSUNLOCKNODE(tdnp); 2136 (void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, 0, 1); 2137 } else { 2138 tdnp->n_attrstamp = 0; 2139 NFSUNLOCKNODE(tdnp); 2140 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 2141 } 2142 if (error && NFS_ISV4(fdvp)) 2143 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2144 return (error); 2145 } 2146 2147 /* 2148 * nfs hard link create call 2149 */ 2150 static int 2151 nfs_link(struct vop_link_args *ap) 2152 { 2153 struct vnode *vp = ap->a_vp; 2154 struct vnode *tdvp = ap->a_tdvp; 2155 struct componentname *cnp = ap->a_cnp; 2156 struct nfsnode *np, *tdnp; 2157 struct nfsvattr nfsva, dnfsva; 2158 int error = 0, attrflag, dattrflag; 2159 2160 /* 2161 * Push all writes to the server, so that the attribute cache 2162 * doesn't get "out of sync" with the server. 2163 * XXX There should be a better way! 2164 */ 2165 VOP_FSYNC(vp, MNT_WAIT, curthread); 2166 2167 error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen, 2168 cnp->cn_cred, curthread, &dnfsva, &nfsva, &attrflag, &dattrflag); 2169 tdnp = VTONFS(tdvp); 2170 NFSLOCKNODE(tdnp); 2171 tdnp->n_flag |= NMODIFIED; 2172 if (dattrflag != 0) { 2173 NFSUNLOCKNODE(tdnp); 2174 (void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, 0, 1); 2175 } else { 2176 tdnp->n_attrstamp = 0; 2177 NFSUNLOCKNODE(tdnp); 2178 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 2179 } 2180 if (attrflag) 2181 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 2182 else { 2183 np = VTONFS(vp); 2184 NFSLOCKNODE(np); 2185 np->n_attrstamp = 0; 2186 NFSUNLOCKNODE(np); 2187 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 2188 } 2189 /* 2190 * If negative lookup caching is enabled, I might as well 2191 * add an entry for this node. Not necessary for correctness, 2192 * but if negative caching is enabled, then the system 2193 * must care about lookup caching hit rate, so... 2194 */ 2195 if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 && 2196 (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { 2197 if (tdvp != vp) 2198 cache_enter_time(tdvp, vp, cnp, &nfsva.na_ctime, NULL); 2199 else 2200 printf("nfs_link: bogus NFS server returned " 2201 "the directory as the new link\n"); 2202 } 2203 if (error && NFS_ISV4(vp)) 2204 error = nfscl_maperr(curthread, error, (uid_t)0, 2205 (gid_t)0); 2206 return (error); 2207 } 2208 2209 /* 2210 * nfs symbolic link create call 2211 */ 2212 static int 2213 nfs_symlink(struct vop_symlink_args *ap) 2214 { 2215 struct vnode *dvp = ap->a_dvp; 2216 struct vattr *vap = ap->a_vap; 2217 struct componentname *cnp = ap->a_cnp; 2218 struct nfsvattr nfsva, dnfsva; 2219 struct nfsfh *nfhp; 2220 struct nfsnode *np = NULL, *dnp; 2221 struct vnode *newvp = NULL; 2222 int error = 0, attrflag, dattrflag, ret; 2223 2224 vap->va_type = VLNK; 2225 error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2226 ap->a_target, vap, cnp->cn_cred, curthread, &dnfsva, 2227 &nfsva, &nfhp, &attrflag, &dattrflag); 2228 if (nfhp) { 2229 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, curthread, 2230 &np, LK_EXCLUSIVE); 2231 if (!ret) 2232 newvp = NFSTOV(np); 2233 else if (!error) 2234 error = ret; 2235 } 2236 if (newvp != NULL) { 2237 if (attrflag) 2238 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 2239 } else if (!error) { 2240 /* 2241 * If we do not have an error and we could not extract the 2242 * newvp from the response due to the request being NFSv2, we 2243 * have to do a lookup in order to obtain a newvp to return. 2244 */ 2245 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2246 cnp->cn_cred, curthread, &np); 2247 if (!error) 2248 newvp = NFSTOV(np); 2249 } 2250 if (error) { 2251 if (newvp) 2252 vput(newvp); 2253 if (NFS_ISV4(dvp)) 2254 error = nfscl_maperr(curthread, error, 2255 vap->va_uid, vap->va_gid); 2256 } else { 2257 *ap->a_vpp = newvp; 2258 } 2259 2260 dnp = VTONFS(dvp); 2261 NFSLOCKNODE(dnp); 2262 dnp->n_flag |= NMODIFIED; 2263 if (dattrflag != 0) { 2264 NFSUNLOCKNODE(dnp); 2265 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 2266 } else { 2267 dnp->n_attrstamp = 0; 2268 NFSUNLOCKNODE(dnp); 2269 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2270 } 2271 /* 2272 * If negative lookup caching is enabled, I might as well 2273 * add an entry for this node. Not necessary for correctness, 2274 * but if negative caching is enabled, then the system 2275 * must care about lookup caching hit rate, so... 2276 */ 2277 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 2278 (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { 2279 if (dvp != newvp) 2280 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, 2281 NULL); 2282 else 2283 printf("nfs_symlink: bogus NFS server returned " 2284 "the directory as the new file object\n"); 2285 } 2286 return (error); 2287 } 2288 2289 /* 2290 * nfs make dir call 2291 */ 2292 static int 2293 nfs_mkdir(struct vop_mkdir_args *ap) 2294 { 2295 struct vnode *dvp = ap->a_dvp; 2296 struct vattr *vap = ap->a_vap; 2297 struct componentname *cnp = ap->a_cnp; 2298 struct nfsnode *np = NULL, *dnp; 2299 struct vnode *newvp = NULL; 2300 struct vattr vattr; 2301 struct nfsfh *nfhp; 2302 struct nfsvattr nfsva, dnfsva; 2303 int error = 0, attrflag, dattrflag, ret; 2304 2305 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) 2306 return (error); 2307 vap->va_type = VDIR; 2308 error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2309 vap, cnp->cn_cred, curthread, &dnfsva, &nfsva, &nfhp, 2310 &attrflag, &dattrflag); 2311 dnp = VTONFS(dvp); 2312 NFSLOCKNODE(dnp); 2313 dnp->n_flag |= NMODIFIED; 2314 if (dattrflag != 0) { 2315 NFSUNLOCKNODE(dnp); 2316 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 2317 } else { 2318 dnp->n_attrstamp = 0; 2319 NFSUNLOCKNODE(dnp); 2320 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2321 } 2322 if (nfhp) { 2323 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, curthread, 2324 &np, LK_EXCLUSIVE); 2325 if (!ret) { 2326 newvp = NFSTOV(np); 2327 if (attrflag) 2328 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 2329 0, 1); 2330 } else if (!error) 2331 error = ret; 2332 } 2333 if (!error && newvp == NULL) { 2334 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2335 cnp->cn_cred, curthread, &np); 2336 if (!error) { 2337 newvp = NFSTOV(np); 2338 if (newvp->v_type != VDIR) 2339 error = EEXIST; 2340 } 2341 } 2342 if (error) { 2343 if (newvp) 2344 vput(newvp); 2345 if (NFS_ISV4(dvp)) 2346 error = nfscl_maperr(curthread, error, 2347 vap->va_uid, vap->va_gid); 2348 } else { 2349 /* 2350 * If negative lookup caching is enabled, I might as well 2351 * add an entry for this node. Not necessary for correctness, 2352 * but if negative caching is enabled, then the system 2353 * must care about lookup caching hit rate, so... 2354 */ 2355 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 2356 (cnp->cn_flags & MAKEENTRY) && 2357 attrflag != 0 && dattrflag != 0) { 2358 if (dvp != newvp) 2359 cache_enter_time(dvp, newvp, cnp, 2360 &nfsva.na_ctime, &dnfsva.na_ctime); 2361 else 2362 printf("nfs_mkdir: bogus NFS server returned " 2363 "the directory that the directory was " 2364 "created in as the new file object\n"); 2365 } 2366 *ap->a_vpp = newvp; 2367 } 2368 return (error); 2369 } 2370 2371 /* 2372 * nfs remove directory call 2373 */ 2374 static int 2375 nfs_rmdir(struct vop_rmdir_args *ap) 2376 { 2377 struct vnode *vp = ap->a_vp; 2378 struct vnode *dvp = ap->a_dvp; 2379 struct componentname *cnp = ap->a_cnp; 2380 struct nfsnode *dnp; 2381 struct nfsvattr dnfsva; 2382 int error, dattrflag; 2383 2384 if (dvp == vp) 2385 return (EINVAL); 2386 error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2387 cnp->cn_cred, curthread, &dnfsva, &dattrflag); 2388 dnp = VTONFS(dvp); 2389 NFSLOCKNODE(dnp); 2390 dnp->n_flag |= NMODIFIED; 2391 if (dattrflag != 0) { 2392 NFSUNLOCKNODE(dnp); 2393 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 2394 } else { 2395 dnp->n_attrstamp = 0; 2396 NFSUNLOCKNODE(dnp); 2397 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2398 } 2399 2400 cache_purge(dvp); 2401 cache_purge(vp); 2402 if (error && NFS_ISV4(dvp)) 2403 error = nfscl_maperr(curthread, error, (uid_t)0, 2404 (gid_t)0); 2405 /* 2406 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. 2407 */ 2408 if (error == ENOENT) 2409 error = 0; 2410 return (error); 2411 } 2412 2413 /* 2414 * nfs readdir call 2415 */ 2416 static int 2417 nfs_readdir(struct vop_readdir_args *ap) 2418 { 2419 struct vnode *vp = ap->a_vp; 2420 struct nfsnode *np = VTONFS(vp); 2421 struct uio *uio = ap->a_uio; 2422 ssize_t tresid, left; 2423 int error = 0; 2424 struct vattr vattr; 2425 2426 if (ap->a_eofflag != NULL) 2427 *ap->a_eofflag = 0; 2428 if (vp->v_type != VDIR) 2429 return(EPERM); 2430 2431 /* 2432 * First, check for hit on the EOF offset cache 2433 */ 2434 NFSLOCKNODE(np); 2435 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && 2436 (np->n_flag & NMODIFIED) == 0) { 2437 NFSUNLOCKNODE(np); 2438 if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { 2439 NFSLOCKNODE(np); 2440 if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) || 2441 !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 2442 NFSUNLOCKNODE(np); 2443 NFSINCRGLOBAL(nfsstatsv1.direofcache_hits); 2444 if (ap->a_eofflag != NULL) 2445 *ap->a_eofflag = 1; 2446 return (0); 2447 } else 2448 NFSUNLOCKNODE(np); 2449 } 2450 } else 2451 NFSUNLOCKNODE(np); 2452 2453 /* 2454 * NFS always guarantees that directory entries don't straddle 2455 * DIRBLKSIZ boundaries. As such, we need to limit the size 2456 * to an exact multiple of DIRBLKSIZ, to avoid copying a partial 2457 * directory entry. 2458 */ 2459 left = uio->uio_resid % DIRBLKSIZ; 2460 if (left == uio->uio_resid) 2461 return (EINVAL); 2462 uio->uio_resid -= left; 2463 2464 /* 2465 * For readdirplus, if starting to read the directory, 2466 * purge the name cache, since it will be reloaded by 2467 * this directory read. 2468 * This removes potentially stale name cache entries. 2469 */ 2470 if (uio->uio_offset == 0 && 2471 (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_RDIRPLUS) != 0) 2472 cache_purge(vp); 2473 2474 /* 2475 * Call ncl_bioread() to do the real work. 2476 */ 2477 tresid = uio->uio_resid; 2478 error = ncl_bioread(vp, uio, 0, ap->a_cred); 2479 2480 if (!error && uio->uio_resid == tresid) { 2481 NFSINCRGLOBAL(nfsstatsv1.direofcache_misses); 2482 if (ap->a_eofflag != NULL) 2483 *ap->a_eofflag = 1; 2484 } 2485 2486 /* Add the partial DIRBLKSIZ (left) back in. */ 2487 uio->uio_resid += left; 2488 return (error); 2489 } 2490 2491 /* 2492 * Readdir rpc call. 2493 * Called from below the buffer cache by ncl_doio(). 2494 */ 2495 int 2496 ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2497 struct thread *td) 2498 { 2499 struct nfsvattr nfsva; 2500 nfsuint64 *cookiep, cookie; 2501 struct nfsnode *dnp = VTONFS(vp); 2502 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2503 int error = 0, eof, attrflag; 2504 2505 KASSERT(uiop->uio_iovcnt == 1 && 2506 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2507 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2508 ("nfs readdirrpc bad uio")); 2509 2510 /* 2511 * If there is no cookie, assume directory was stale. 2512 */ 2513 ncl_dircookie_lock(dnp); 2514 NFSUNLOCKNODE(dnp); 2515 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2516 if (cookiep) { 2517 cookie = *cookiep; 2518 ncl_dircookie_unlock(dnp); 2519 } else { 2520 ncl_dircookie_unlock(dnp); 2521 return (NFSERR_BAD_COOKIE); 2522 } 2523 2524 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2525 (void)ncl_fsinfo(nmp, vp, cred, td); 2526 2527 error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva, 2528 &attrflag, &eof); 2529 if (attrflag) 2530 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 2531 2532 if (!error) { 2533 /* 2534 * We are now either at the end of the directory or have filled 2535 * the block. 2536 */ 2537 if (eof) { 2538 NFSLOCKNODE(dnp); 2539 dnp->n_direofoffset = uiop->uio_offset; 2540 NFSUNLOCKNODE(dnp); 2541 } else { 2542 if (uiop->uio_resid > 0) 2543 printf("EEK! readdirrpc resid > 0\n"); 2544 ncl_dircookie_lock(dnp); 2545 NFSUNLOCKNODE(dnp); 2546 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2547 *cookiep = cookie; 2548 ncl_dircookie_unlock(dnp); 2549 } 2550 } else if (NFS_ISV4(vp)) { 2551 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2552 } 2553 return (error); 2554 } 2555 2556 /* 2557 * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc(). 2558 */ 2559 int 2560 ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2561 struct thread *td) 2562 { 2563 struct nfsvattr nfsva; 2564 nfsuint64 *cookiep, cookie; 2565 struct nfsnode *dnp = VTONFS(vp); 2566 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2567 int error = 0, attrflag, eof; 2568 2569 KASSERT(uiop->uio_iovcnt == 1 && 2570 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2571 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2572 ("nfs readdirplusrpc bad uio")); 2573 2574 /* 2575 * If there is no cookie, assume directory was stale. 2576 */ 2577 ncl_dircookie_lock(dnp); 2578 NFSUNLOCKNODE(dnp); 2579 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2580 if (cookiep) { 2581 cookie = *cookiep; 2582 ncl_dircookie_unlock(dnp); 2583 } else { 2584 ncl_dircookie_unlock(dnp); 2585 return (NFSERR_BAD_COOKIE); 2586 } 2587 2588 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2589 (void)ncl_fsinfo(nmp, vp, cred, td); 2590 error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva, 2591 &attrflag, &eof); 2592 if (attrflag) 2593 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 2594 2595 if (!error) { 2596 /* 2597 * We are now either at end of the directory or have filled the 2598 * the block. 2599 */ 2600 if (eof) { 2601 NFSLOCKNODE(dnp); 2602 dnp->n_direofoffset = uiop->uio_offset; 2603 NFSUNLOCKNODE(dnp); 2604 } else { 2605 if (uiop->uio_resid > 0) 2606 printf("EEK! readdirplusrpc resid > 0\n"); 2607 ncl_dircookie_lock(dnp); 2608 NFSUNLOCKNODE(dnp); 2609 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2610 *cookiep = cookie; 2611 ncl_dircookie_unlock(dnp); 2612 } 2613 } else if (NFS_ISV4(vp)) { 2614 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2615 } 2616 return (error); 2617 } 2618 2619 /* 2620 * Silly rename. To make the NFS filesystem that is stateless look a little 2621 * more like the "ufs" a remove of an active vnode is translated to a rename 2622 * to a funny looking filename that is removed by nfs_inactive on the 2623 * nfsnode. There is the potential for another process on a different client 2624 * to create the same funny name between the nfs_lookitup() fails and the 2625 * nfs_rename() completes, but... 2626 */ 2627 static int 2628 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 2629 { 2630 struct sillyrename *sp; 2631 struct nfsnode *np; 2632 int error; 2633 short pid; 2634 unsigned int lticks; 2635 2636 cache_purge(dvp); 2637 np = VTONFS(vp); 2638 KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir")); 2639 sp = malloc(sizeof (struct sillyrename), 2640 M_NEWNFSREQ, M_WAITOK); 2641 sp->s_cred = crhold(cnp->cn_cred); 2642 sp->s_dvp = dvp; 2643 VREF(dvp); 2644 2645 /* 2646 * Fudge together a funny name. 2647 * Changing the format of the funny name to accommodate more 2648 * sillynames per directory. 2649 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 2650 * CPU ticks since boot. 2651 */ 2652 pid = curthread->td_proc->p_pid; 2653 lticks = (unsigned int)ticks; 2654 for ( ; ; ) { 2655 sp->s_namlen = sprintf(sp->s_name, 2656 ".nfs.%08x.%04x4.4", lticks, 2657 pid); 2658 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2659 curthread, NULL)) 2660 break; 2661 lticks++; 2662 } 2663 error = nfs_renameit(dvp, vp, cnp, sp); 2664 if (error) 2665 goto bad; 2666 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2667 curthread, &np); 2668 np->n_sillyrename = sp; 2669 return (0); 2670 bad: 2671 vrele(sp->s_dvp); 2672 crfree(sp->s_cred); 2673 free(sp, M_NEWNFSREQ); 2674 return (error); 2675 } 2676 2677 /* 2678 * Look up a file name and optionally either update the file handle or 2679 * allocate an nfsnode, depending on the value of npp. 2680 * npp == NULL --> just do the lookup 2681 * *npp == NULL --> allocate a new nfsnode and make sure attributes are 2682 * handled too 2683 * *npp != NULL --> update the file handle in the vnode 2684 */ 2685 static int 2686 nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred, 2687 struct thread *td, struct nfsnode **npp) 2688 { 2689 struct vnode *newvp = NULL, *vp; 2690 struct nfsnode *np, *dnp = VTONFS(dvp); 2691 struct nfsfh *nfhp, *onfhp; 2692 struct nfsvattr nfsva, dnfsva; 2693 struct componentname cn; 2694 int error = 0, attrflag, dattrflag; 2695 u_int hash; 2696 struct timespec ts; 2697 2698 nanouptime(&ts); 2699 error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva, 2700 &nfhp, &attrflag, &dattrflag, 0); 2701 if (dattrflag) 2702 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 2703 if (npp && !error) { 2704 if (*npp != NULL) { 2705 np = *npp; 2706 vp = NFSTOV(np); 2707 /* 2708 * For NFSv4, check to see if it is the same name and 2709 * replace the name, if it is different. 2710 */ 2711 if (np->n_v4 != NULL && nfsva.na_type == VREG && 2712 (np->n_v4->n4_namelen != len || 2713 NFSBCMP(name, NFS4NODENAME(np->n_v4), len) || 2714 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || 2715 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2716 dnp->n_fhp->nfh_len))) { 2717 free(np->n_v4, M_NFSV4NODE); 2718 np->n_v4 = malloc( 2719 sizeof (struct nfsv4node) + 2720 dnp->n_fhp->nfh_len + len - 1, 2721 M_NFSV4NODE, M_WAITOK); 2722 np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; 2723 np->n_v4->n4_namelen = len; 2724 NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2725 dnp->n_fhp->nfh_len); 2726 NFSBCOPY(name, NFS4NODENAME(np->n_v4), len); 2727 } 2728 hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, 2729 FNV1_32_INIT); 2730 onfhp = np->n_fhp; 2731 /* 2732 * Rehash node for new file handle. 2733 */ 2734 vfs_hash_rehash(vp, hash); 2735 np->n_fhp = nfhp; 2736 if (onfhp != NULL) 2737 free(onfhp, M_NFSFH); 2738 newvp = NFSTOV(np); 2739 } else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) { 2740 free(nfhp, M_NFSFH); 2741 VREF(dvp); 2742 newvp = dvp; 2743 } else { 2744 cn.cn_nameptr = name; 2745 cn.cn_namelen = len; 2746 error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td, 2747 &np, LK_EXCLUSIVE); 2748 if (error) 2749 return (error); 2750 newvp = NFSTOV(np); 2751 /* 2752 * If n_localmodtime >= time before RPC, then 2753 * a file modification operation, such as 2754 * VOP_SETATTR() of size, has occurred while 2755 * the Lookup RPC and acquisition of the vnode 2756 * happened. As such, the attributes might 2757 * be stale, with possibly an incorrect size. 2758 */ 2759 NFSLOCKNODE(np); 2760 if (timespecisset(&np->n_localmodtime) && 2761 timespeccmp(&np->n_localmodtime, &ts, >=)) { 2762 NFSCL_DEBUG(4, "nfs_lookitup: localmod " 2763 "stale attributes\n"); 2764 attrflag = 0; 2765 } 2766 NFSUNLOCKNODE(np); 2767 } 2768 if (!attrflag && *npp == NULL) { 2769 if (newvp == dvp) 2770 vrele(newvp); 2771 else 2772 vput(newvp); 2773 return (ENOENT); 2774 } 2775 if (attrflag) 2776 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 2777 } 2778 if (npp && *npp == NULL) { 2779 if (error) { 2780 if (newvp) { 2781 if (newvp == dvp) 2782 vrele(newvp); 2783 else 2784 vput(newvp); 2785 } 2786 } else 2787 *npp = np; 2788 } 2789 if (error && NFS_ISV4(dvp)) 2790 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2791 return (error); 2792 } 2793 2794 /* 2795 * Nfs Version 3 and 4 commit rpc 2796 */ 2797 int 2798 ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, 2799 struct thread *td) 2800 { 2801 struct nfsvattr nfsva; 2802 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2803 struct nfsnode *np; 2804 struct uio uio; 2805 int error, attrflag; 2806 2807 np = VTONFS(vp); 2808 error = EIO; 2809 attrflag = 0; 2810 if (NFSHASPNFS(nmp) && (np->n_flag & NDSCOMMIT) != 0) { 2811 uio.uio_offset = offset; 2812 uio.uio_resid = cnt; 2813 error = nfscl_doiods(vp, &uio, NULL, NULL, 2814 NFSV4OPEN_ACCESSWRITE, 1, cred, td); 2815 if (error != 0) { 2816 NFSLOCKNODE(np); 2817 np->n_flag &= ~NDSCOMMIT; 2818 NFSUNLOCKNODE(np); 2819 } 2820 } 2821 if (error != 0) { 2822 mtx_lock(&nmp->nm_mtx); 2823 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { 2824 mtx_unlock(&nmp->nm_mtx); 2825 return (0); 2826 } 2827 mtx_unlock(&nmp->nm_mtx); 2828 error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva, 2829 &attrflag); 2830 } 2831 if (attrflag != 0) 2832 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 2833 if (error != 0 && NFS_ISV4(vp)) 2834 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2835 return (error); 2836 } 2837 2838 /* 2839 * Strategy routine. 2840 * For async requests when nfsiod(s) are running, queue the request by 2841 * calling ncl_asyncio(), otherwise just all ncl_doio() to do the 2842 * request. 2843 */ 2844 static int 2845 nfs_strategy(struct vop_strategy_args *ap) 2846 { 2847 struct buf *bp; 2848 struct vnode *vp; 2849 struct ucred *cr; 2850 2851 bp = ap->a_bp; 2852 vp = ap->a_vp; 2853 KASSERT(bp->b_vp == vp, ("missing b_getvp")); 2854 KASSERT(!(bp->b_flags & B_DONE), 2855 ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); 2856 2857 if (vp->v_type == VREG && bp->b_blkno == bp->b_lblkno) 2858 bp->b_blkno = bp->b_lblkno * (vp->v_bufobj.bo_bsize / 2859 DEV_BSIZE); 2860 if (bp->b_iocmd == BIO_READ) 2861 cr = bp->b_rcred; 2862 else 2863 cr = bp->b_wcred; 2864 2865 /* 2866 * If the op is asynchronous and an i/o daemon is waiting 2867 * queue the request, wake it up and wait for completion 2868 * otherwise just do it ourselves. 2869 */ 2870 if ((bp->b_flags & B_ASYNC) == 0 || 2871 ncl_asyncio(VFSTONFS(vp->v_mount), bp, NOCRED, curthread)) 2872 (void) ncl_doio(vp, bp, cr, curthread, 1); 2873 return (0); 2874 } 2875 2876 /* 2877 * fsync vnode op. Just call ncl_flush() with commit == 1. 2878 */ 2879 /* ARGSUSED */ 2880 static int 2881 nfs_fsync(struct vop_fsync_args *ap) 2882 { 2883 2884 if (ap->a_vp->v_type != VREG) { 2885 /* 2886 * For NFS, metadata is changed synchronously on the server, 2887 * so there is nothing to flush. Also, ncl_flush() clears 2888 * the NMODIFIED flag and that shouldn't be done here for 2889 * directories. 2890 */ 2891 return (0); 2892 } 2893 return (ncl_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1, 0)); 2894 } 2895 2896 /* 2897 * Flush all the blocks associated with a vnode. 2898 * Walk through the buffer pool and push any dirty pages 2899 * associated with the vnode. 2900 * If the called_from_renewthread argument is TRUE, it has been called 2901 * from the NFSv4 renew thread and, as such, cannot block indefinitely 2902 * waiting for a buffer write to complete. 2903 */ 2904 int 2905 ncl_flush(struct vnode *vp, int waitfor, struct thread *td, 2906 int commit, int called_from_renewthread) 2907 { 2908 struct nfsnode *np = VTONFS(vp); 2909 struct buf *bp; 2910 int i; 2911 struct buf *nbp; 2912 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2913 int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; 2914 int passone = 1, trycnt = 0; 2915 u_quad_t off, endoff, toff; 2916 struct ucred* wcred = NULL; 2917 struct buf **bvec = NULL; 2918 struct bufobj *bo; 2919 #ifndef NFS_COMMITBVECSIZ 2920 #define NFS_COMMITBVECSIZ 20 2921 #endif 2922 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; 2923 u_int bvecsize = 0, bveccount; 2924 struct timespec ts; 2925 2926 if (called_from_renewthread != 0) 2927 slptimeo = hz; 2928 if (nmp->nm_flag & NFSMNT_INT) 2929 slpflag = PCATCH; 2930 if (!commit) 2931 passone = 0; 2932 bo = &vp->v_bufobj; 2933 /* 2934 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the 2935 * server, but has not been committed to stable storage on the server 2936 * yet. On the first pass, the byte range is worked out and the commit 2937 * rpc is done. On the second pass, bwrite() is called to do the 2938 * job. 2939 */ 2940 again: 2941 off = (u_quad_t)-1; 2942 endoff = 0; 2943 bvecpos = 0; 2944 if (NFS_ISV34(vp) && commit) { 2945 if (bvec != NULL && bvec != bvec_on_stack) 2946 free(bvec, M_TEMP); 2947 /* 2948 * Count up how many buffers waiting for a commit. 2949 */ 2950 bveccount = 0; 2951 BO_LOCK(bo); 2952 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2953 if (!BUF_ISLOCKED(bp) && 2954 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 2955 == (B_DELWRI | B_NEEDCOMMIT)) 2956 bveccount++; 2957 } 2958 /* 2959 * Allocate space to remember the list of bufs to commit. It is 2960 * important to use M_NOWAIT here to avoid a race with nfs_write. 2961 * If we can't get memory (for whatever reason), we will end up 2962 * committing the buffers one-by-one in the loop below. 2963 */ 2964 if (bveccount > NFS_COMMITBVECSIZ) { 2965 /* 2966 * Release the vnode interlock to avoid a lock 2967 * order reversal. 2968 */ 2969 BO_UNLOCK(bo); 2970 bvec = (struct buf **) 2971 malloc(bveccount * sizeof(struct buf *), 2972 M_TEMP, M_NOWAIT); 2973 BO_LOCK(bo); 2974 if (bvec == NULL) { 2975 bvec = bvec_on_stack; 2976 bvecsize = NFS_COMMITBVECSIZ; 2977 } else 2978 bvecsize = bveccount; 2979 } else { 2980 bvec = bvec_on_stack; 2981 bvecsize = NFS_COMMITBVECSIZ; 2982 } 2983 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2984 if (bvecpos >= bvecsize) 2985 break; 2986 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 2987 nbp = TAILQ_NEXT(bp, b_bobufs); 2988 continue; 2989 } 2990 if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != 2991 (B_DELWRI | B_NEEDCOMMIT)) { 2992 BUF_UNLOCK(bp); 2993 nbp = TAILQ_NEXT(bp, b_bobufs); 2994 continue; 2995 } 2996 BO_UNLOCK(bo); 2997 bremfree(bp); 2998 /* 2999 * Work out if all buffers are using the same cred 3000 * so we can deal with them all with one commit. 3001 * 3002 * NOTE: we are not clearing B_DONE here, so we have 3003 * to do it later on in this routine if we intend to 3004 * initiate I/O on the bp. 3005 * 3006 * Note: to avoid loopback deadlocks, we do not 3007 * assign b_runningbufspace. 3008 */ 3009 if (wcred == NULL) 3010 wcred = bp->b_wcred; 3011 else if (wcred != bp->b_wcred) 3012 wcred = NOCRED; 3013 vfs_busy_pages(bp, 0); 3014 3015 BO_LOCK(bo); 3016 /* 3017 * bp is protected by being locked, but nbp is not 3018 * and vfs_busy_pages() may sleep. We have to 3019 * recalculate nbp. 3020 */ 3021 nbp = TAILQ_NEXT(bp, b_bobufs); 3022 3023 /* 3024 * A list of these buffers is kept so that the 3025 * second loop knows which buffers have actually 3026 * been committed. This is necessary, since there 3027 * may be a race between the commit rpc and new 3028 * uncommitted writes on the file. 3029 */ 3030 bvec[bvecpos++] = bp; 3031 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 3032 bp->b_dirtyoff; 3033 if (toff < off) 3034 off = toff; 3035 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); 3036 if (toff > endoff) 3037 endoff = toff; 3038 } 3039 BO_UNLOCK(bo); 3040 } 3041 if (bvecpos > 0) { 3042 /* 3043 * Commit data on the server, as required. 3044 * If all bufs are using the same wcred, then use that with 3045 * one call for all of them, otherwise commit each one 3046 * separately. 3047 */ 3048 if (wcred != NOCRED) 3049 retv = ncl_commit(vp, off, (int)(endoff - off), 3050 wcred, td); 3051 else { 3052 retv = 0; 3053 for (i = 0; i < bvecpos; i++) { 3054 off_t off, size; 3055 bp = bvec[i]; 3056 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 3057 bp->b_dirtyoff; 3058 size = (u_quad_t)(bp->b_dirtyend 3059 - bp->b_dirtyoff); 3060 retv = ncl_commit(vp, off, (int)size, 3061 bp->b_wcred, td); 3062 if (retv) break; 3063 } 3064 } 3065 3066 if (retv == NFSERR_STALEWRITEVERF) 3067 ncl_clearcommit(vp->v_mount); 3068 3069 /* 3070 * Now, either mark the blocks I/O done or mark the 3071 * blocks dirty, depending on whether the commit 3072 * succeeded. 3073 */ 3074 for (i = 0; i < bvecpos; i++) { 3075 bp = bvec[i]; 3076 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 3077 if (!NFSCL_FORCEDISM(vp->v_mount) && retv) { 3078 /* 3079 * Error, leave B_DELWRI intact 3080 */ 3081 vfs_unbusy_pages(bp); 3082 brelse(bp); 3083 } else { 3084 /* 3085 * Success, remove B_DELWRI ( bundirty() ). 3086 * 3087 * b_dirtyoff/b_dirtyend seem to be NFS 3088 * specific. We should probably move that 3089 * into bundirty(). XXX 3090 */ 3091 bufobj_wref(bo); 3092 bp->b_flags |= B_ASYNC; 3093 bundirty(bp); 3094 bp->b_flags &= ~B_DONE; 3095 bp->b_ioflags &= ~BIO_ERROR; 3096 bp->b_dirtyoff = bp->b_dirtyend = 0; 3097 bufdone(bp); 3098 } 3099 } 3100 } 3101 3102 /* 3103 * Start/do any write(s) that are required. 3104 */ 3105 loop: 3106 BO_LOCK(bo); 3107 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 3108 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 3109 if (waitfor != MNT_WAIT || passone) 3110 continue; 3111 3112 error = BUF_TIMELOCK(bp, 3113 LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, 3114 BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo); 3115 if (error == 0) { 3116 BUF_UNLOCK(bp); 3117 goto loop; 3118 } 3119 if (error == ENOLCK) { 3120 error = 0; 3121 goto loop; 3122 } 3123 if (called_from_renewthread != 0) { 3124 /* 3125 * Return EIO so the flush will be retried 3126 * later. 3127 */ 3128 error = EIO; 3129 goto done; 3130 } 3131 if (newnfs_sigintr(nmp, td)) { 3132 error = EINTR; 3133 goto done; 3134 } 3135 if (slpflag == PCATCH) { 3136 slpflag = 0; 3137 slptimeo = 2 * hz; 3138 } 3139 goto loop; 3140 } 3141 if ((bp->b_flags & B_DELWRI) == 0) 3142 panic("nfs_fsync: not dirty"); 3143 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { 3144 BUF_UNLOCK(bp); 3145 continue; 3146 } 3147 BO_UNLOCK(bo); 3148 bremfree(bp); 3149 bp->b_flags |= B_ASYNC; 3150 bwrite(bp); 3151 if (newnfs_sigintr(nmp, td)) { 3152 error = EINTR; 3153 goto done; 3154 } 3155 goto loop; 3156 } 3157 if (passone) { 3158 passone = 0; 3159 BO_UNLOCK(bo); 3160 goto again; 3161 } 3162 if (waitfor == MNT_WAIT) { 3163 while (bo->bo_numoutput) { 3164 error = bufobj_wwait(bo, slpflag, slptimeo); 3165 if (error) { 3166 BO_UNLOCK(bo); 3167 if (called_from_renewthread != 0) { 3168 /* 3169 * Return EIO so that the flush will be 3170 * retried later. 3171 */ 3172 error = EIO; 3173 goto done; 3174 } 3175 error = newnfs_sigintr(nmp, td); 3176 if (error) 3177 goto done; 3178 if (slpflag == PCATCH) { 3179 slpflag = 0; 3180 slptimeo = 2 * hz; 3181 } 3182 BO_LOCK(bo); 3183 } 3184 } 3185 if (bo->bo_dirty.bv_cnt != 0 && commit) { 3186 BO_UNLOCK(bo); 3187 goto loop; 3188 } 3189 /* 3190 * Wait for all the async IO requests to drain 3191 */ 3192 BO_UNLOCK(bo); 3193 } else 3194 BO_UNLOCK(bo); 3195 if (NFSHASPNFS(nmp)) { 3196 nfscl_layoutcommit(vp, td); 3197 /* 3198 * Invalidate the attribute cache, since writes to a DS 3199 * won't update the size attribute. 3200 */ 3201 NFSLOCKNODE(np); 3202 np->n_attrstamp = 0; 3203 } else 3204 NFSLOCKNODE(np); 3205 if (np->n_flag & NWRITEERR) { 3206 error = np->n_error; 3207 np->n_flag &= ~NWRITEERR; 3208 } 3209 if (commit && bo->bo_dirty.bv_cnt == 0 && 3210 bo->bo_numoutput == 0) 3211 np->n_flag &= ~NMODIFIED; 3212 NFSUNLOCKNODE(np); 3213 done: 3214 if (bvec != NULL && bvec != bvec_on_stack) 3215 free(bvec, M_TEMP); 3216 if (error == 0 && commit != 0 && waitfor == MNT_WAIT && 3217 (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0)) { 3218 if (trycnt++ < 5) { 3219 /* try, try again... */ 3220 passone = 1; 3221 wcred = NULL; 3222 bvec = NULL; 3223 bvecsize = 0; 3224 goto again; 3225 } 3226 vn_printf(vp, "ncl_flush failed"); 3227 error = called_from_renewthread != 0 ? EIO : EBUSY; 3228 } 3229 if (error == 0) { 3230 nanouptime(&ts); 3231 NFSLOCKNODE(np); 3232 np->n_localmodtime = ts; 3233 NFSUNLOCKNODE(np); 3234 } 3235 return (error); 3236 } 3237 3238 /* 3239 * NFS advisory byte-level locks. 3240 */ 3241 static int 3242 nfs_advlock(struct vop_advlock_args *ap) 3243 { 3244 struct vnode *vp = ap->a_vp; 3245 struct ucred *cred; 3246 struct nfsnode *np = VTONFS(ap->a_vp); 3247 struct proc *p = (struct proc *)ap->a_id; 3248 struct thread *td = curthread; /* XXX */ 3249 struct vattr va; 3250 int ret, error; 3251 u_quad_t size; 3252 struct nfsmount *nmp; 3253 3254 error = NFSVOPLOCK(vp, LK_SHARED); 3255 if (error != 0) 3256 return (EBADF); 3257 nmp = VFSTONFS(vp->v_mount); 3258 if (!NFS_ISV4(vp) || (nmp->nm_flag & NFSMNT_NOLOCKD) != 0) { 3259 if ((nmp->nm_flag & NFSMNT_NOLOCKD) != 0) { 3260 size = np->n_size; 3261 NFSVOPUNLOCK(vp); 3262 error = lf_advlock(ap, &(vp->v_lockf), size); 3263 } else { 3264 if (nfs_advlock_p != NULL) 3265 error = nfs_advlock_p(ap); 3266 else { 3267 NFSVOPUNLOCK(vp); 3268 error = ENOLCK; 3269 } 3270 } 3271 if (error == 0 && ap->a_op == F_SETLK) { 3272 error = NFSVOPLOCK(vp, LK_SHARED); 3273 if (error == 0) { 3274 /* Mark that a file lock has been acquired. */ 3275 NFSLOCKNODE(np); 3276 np->n_flag |= NHASBEENLOCKED; 3277 NFSUNLOCKNODE(np); 3278 NFSVOPUNLOCK(vp); 3279 } 3280 } 3281 return (error); 3282 } else if ((ap->a_flags & (F_POSIX | F_FLOCK)) != 0) { 3283 if (vp->v_type != VREG) { 3284 error = EINVAL; 3285 goto out; 3286 } 3287 if ((ap->a_flags & F_POSIX) != 0) 3288 cred = p->p_ucred; 3289 else 3290 cred = td->td_ucred; 3291 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 3292 if (VN_IS_DOOMED(vp)) { 3293 error = EBADF; 3294 goto out; 3295 } 3296 3297 /* 3298 * If this is unlocking a write locked region, flush and 3299 * commit them before unlocking. This is required by 3300 * RFC3530 Sec. 9.3.2. 3301 */ 3302 if (ap->a_op == F_UNLCK && 3303 nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id, 3304 ap->a_flags)) 3305 (void) ncl_flush(vp, MNT_WAIT, td, 1, 0); 3306 3307 /* 3308 * Mark NFS node as might have acquired a lock. 3309 * This is separate from NHASBEENLOCKED, because it must 3310 * be done before the nfsrpc_advlock() call, which might 3311 * add a nfscllock structure to the client state. 3312 * It is used to check for the case where a nfscllock 3313 * state structure cannot exist for the file. 3314 * Only done for "oneopenown" NFSv4.1/4.2 mounts. 3315 */ 3316 if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp)) { 3317 NFSLOCKNODE(np); 3318 np->n_flag |= NMIGHTBELOCKED; 3319 NFSUNLOCKNODE(np); 3320 } 3321 3322 /* 3323 * Loop around doing the lock op, while a blocking lock 3324 * must wait for the lock op to succeed. 3325 */ 3326 do { 3327 ret = nfsrpc_advlock(vp, np->n_size, ap->a_op, 3328 ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags); 3329 if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 3330 ap->a_op == F_SETLK) { 3331 NFSVOPUNLOCK(vp); 3332 error = nfs_catnap(PZERO | PCATCH, ret, 3333 "ncladvl"); 3334 if (error) 3335 return (EINTR); 3336 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 3337 if (VN_IS_DOOMED(vp)) { 3338 error = EBADF; 3339 goto out; 3340 } 3341 } 3342 } while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 3343 ap->a_op == F_SETLK); 3344 if (ret == NFSERR_DENIED) { 3345 error = EAGAIN; 3346 goto out; 3347 } else if (ret == EINVAL || ret == EBADF || ret == EINTR) { 3348 error = ret; 3349 goto out; 3350 } else if (ret != 0) { 3351 error = EACCES; 3352 goto out; 3353 } 3354 3355 /* 3356 * Now, if we just got a lock, invalidate data in the buffer 3357 * cache, as required, so that the coherency conforms with 3358 * RFC3530 Sec. 9.3.2. 3359 */ 3360 if (ap->a_op == F_SETLK) { 3361 if ((np->n_flag & NMODIFIED) == 0) { 3362 np->n_attrstamp = 0; 3363 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 3364 ret = VOP_GETATTR(vp, &va, cred); 3365 } 3366 if ((np->n_flag & NMODIFIED) || ret || 3367 np->n_change != va.va_filerev) { 3368 (void) ncl_vinvalbuf(vp, V_SAVE, td, 1); 3369 np->n_attrstamp = 0; 3370 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 3371 ret = VOP_GETATTR(vp, &va, cred); 3372 if (!ret) { 3373 np->n_mtime = va.va_mtime; 3374 np->n_change = va.va_filerev; 3375 } 3376 } 3377 /* Mark that a file lock has been acquired. */ 3378 NFSLOCKNODE(np); 3379 np->n_flag |= NHASBEENLOCKED; 3380 NFSUNLOCKNODE(np); 3381 } 3382 } else 3383 error = EOPNOTSUPP; 3384 out: 3385 NFSVOPUNLOCK(vp); 3386 return (error); 3387 } 3388 3389 /* 3390 * NFS advisory byte-level locks. 3391 */ 3392 static int 3393 nfs_advlockasync(struct vop_advlockasync_args *ap) 3394 { 3395 struct vnode *vp = ap->a_vp; 3396 u_quad_t size; 3397 int error; 3398 3399 error = NFSVOPLOCK(vp, LK_SHARED); 3400 if (error) 3401 return (error); 3402 if (NFS_ISV4(vp)) { 3403 NFSVOPUNLOCK(vp); 3404 return (EOPNOTSUPP); 3405 } 3406 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3407 size = VTONFS(vp)->n_size; 3408 NFSVOPUNLOCK(vp); 3409 error = lf_advlockasync(ap, &(vp->v_lockf), size); 3410 } else { 3411 NFSVOPUNLOCK(vp); 3412 error = EOPNOTSUPP; 3413 } 3414 return (error); 3415 } 3416 3417 /* 3418 * Print out the contents of an nfsnode. 3419 */ 3420 static int 3421 nfs_print(struct vop_print_args *ap) 3422 { 3423 struct vnode *vp = ap->a_vp; 3424 struct nfsnode *np = VTONFS(vp); 3425 3426 printf("\tfileid %jd fsid 0x%jx", (uintmax_t)np->n_vattr.na_fileid, 3427 (uintmax_t)np->n_vattr.na_fsid); 3428 if (vp->v_type == VFIFO) 3429 fifo_printinfo(vp); 3430 printf("\n"); 3431 return (0); 3432 } 3433 3434 /* 3435 * nfs special file access vnode op. 3436 * Essentially just get vattr and then imitate iaccess() since the device is 3437 * local to the client. 3438 */ 3439 static int 3440 nfsspec_access(struct vop_access_args *ap) 3441 { 3442 struct vattr *vap; 3443 struct ucred *cred = ap->a_cred; 3444 struct vnode *vp = ap->a_vp; 3445 accmode_t accmode = ap->a_accmode; 3446 struct vattr vattr; 3447 int error; 3448 3449 /* 3450 * Disallow write attempts on filesystems mounted read-only; 3451 * unless the file is a socket, fifo, or a block or character 3452 * device resident on the filesystem. 3453 */ 3454 if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 3455 switch (vp->v_type) { 3456 case VREG: 3457 case VDIR: 3458 case VLNK: 3459 return (EROFS); 3460 default: 3461 break; 3462 } 3463 } 3464 vap = &vattr; 3465 error = VOP_GETATTR(vp, vap, cred); 3466 if (error) 3467 goto out; 3468 error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, 3469 accmode, cred); 3470 out: 3471 return error; 3472 } 3473 3474 /* 3475 * Read wrapper for fifos. 3476 */ 3477 static int 3478 nfsfifo_read(struct vop_read_args *ap) 3479 { 3480 struct nfsnode *np = VTONFS(ap->a_vp); 3481 int error; 3482 3483 /* 3484 * Set access flag. 3485 */ 3486 NFSLOCKNODE(np); 3487 np->n_flag |= NACC; 3488 vfs_timestamp(&np->n_atim); 3489 NFSUNLOCKNODE(np); 3490 error = fifo_specops.vop_read(ap); 3491 return error; 3492 } 3493 3494 /* 3495 * Write wrapper for fifos. 3496 */ 3497 static int 3498 nfsfifo_write(struct vop_write_args *ap) 3499 { 3500 struct nfsnode *np = VTONFS(ap->a_vp); 3501 3502 /* 3503 * Set update flag. 3504 */ 3505 NFSLOCKNODE(np); 3506 np->n_flag |= NUPD; 3507 vfs_timestamp(&np->n_mtim); 3508 NFSUNLOCKNODE(np); 3509 return(fifo_specops.vop_write(ap)); 3510 } 3511 3512 /* 3513 * Close wrapper for fifos. 3514 * 3515 * Update the times on the nfsnode then do fifo close. 3516 */ 3517 static int 3518 nfsfifo_close(struct vop_close_args *ap) 3519 { 3520 struct vnode *vp = ap->a_vp; 3521 struct nfsnode *np = VTONFS(vp); 3522 struct vattr vattr; 3523 struct timespec ts; 3524 3525 NFSLOCKNODE(np); 3526 if (np->n_flag & (NACC | NUPD)) { 3527 vfs_timestamp(&ts); 3528 if (np->n_flag & NACC) 3529 np->n_atim = ts; 3530 if (np->n_flag & NUPD) 3531 np->n_mtim = ts; 3532 np->n_flag |= NCHG; 3533 if (vrefcnt(vp) == 1 && 3534 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 3535 VATTR_NULL(&vattr); 3536 if (np->n_flag & NACC) 3537 vattr.va_atime = np->n_atim; 3538 if (np->n_flag & NUPD) 3539 vattr.va_mtime = np->n_mtim; 3540 NFSUNLOCKNODE(np); 3541 (void)VOP_SETATTR(vp, &vattr, ap->a_cred); 3542 goto out; 3543 } 3544 } 3545 NFSUNLOCKNODE(np); 3546 out: 3547 return (fifo_specops.vop_close(ap)); 3548 } 3549 3550 static int 3551 nfs_getacl(struct vop_getacl_args *ap) 3552 { 3553 int error; 3554 3555 if (ap->a_type != ACL_TYPE_NFS4) 3556 return (EOPNOTSUPP); 3557 error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp); 3558 if (error > NFSERR_STALE) { 3559 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3560 error = EPERM; 3561 } 3562 return (error); 3563 } 3564 3565 static int 3566 nfs_setacl(struct vop_setacl_args *ap) 3567 { 3568 int error; 3569 3570 if (ap->a_type != ACL_TYPE_NFS4) 3571 return (EOPNOTSUPP); 3572 error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp); 3573 if (error > NFSERR_STALE) { 3574 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3575 error = EPERM; 3576 } 3577 return (error); 3578 } 3579 3580 /* 3581 * VOP_ADVISE for NFS. 3582 * Just return 0 for any errors, since it is just a hint. 3583 */ 3584 static int 3585 nfs_advise(struct vop_advise_args *ap) 3586 { 3587 struct thread *td = curthread; 3588 struct nfsmount *nmp; 3589 uint64_t len; 3590 int error; 3591 3592 /* 3593 * First do vop_stdadvise() to handle the buffer cache. 3594 */ 3595 error = vop_stdadvise(ap); 3596 if (error != 0) 3597 return (error); 3598 if (ap->a_start < 0 || ap->a_end < 0) 3599 return (0); 3600 if (ap->a_end == OFF_MAX) 3601 len = 0; 3602 else if (ap->a_end < ap->a_start) 3603 return (0); 3604 else 3605 len = ap->a_end - ap->a_start + 1; 3606 nmp = VFSTONFS(ap->a_vp->v_mount); 3607 mtx_lock(&nmp->nm_mtx); 3608 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 3609 (NFSHASPNFS(nmp) && (nmp->nm_privflag & NFSMNTP_IOADVISETHRUMDS) == 3610 0) || (nmp->nm_privflag & NFSMNTP_NOADVISE) != 0) { 3611 mtx_unlock(&nmp->nm_mtx); 3612 return (0); 3613 } 3614 mtx_unlock(&nmp->nm_mtx); 3615 error = nfsrpc_advise(ap->a_vp, ap->a_start, len, ap->a_advice, 3616 td->td_ucred, td); 3617 if (error == NFSERR_NOTSUPP) { 3618 mtx_lock(&nmp->nm_mtx); 3619 nmp->nm_privflag |= NFSMNTP_NOADVISE; 3620 mtx_unlock(&nmp->nm_mtx); 3621 } 3622 return (0); 3623 } 3624 3625 /* 3626 * nfs allocate call 3627 */ 3628 static int 3629 nfs_allocate(struct vop_allocate_args *ap) 3630 { 3631 struct vnode *vp = ap->a_vp; 3632 struct thread *td = curthread; 3633 struct nfsvattr nfsva; 3634 struct nfsmount *nmp; 3635 struct nfsnode *np; 3636 off_t alen; 3637 int attrflag, error, ret; 3638 struct timespec ts; 3639 struct uio io; 3640 3641 attrflag = 0; 3642 nmp = VFSTONFS(vp->v_mount); 3643 np = VTONFS(vp); 3644 mtx_lock(&nmp->nm_mtx); 3645 if (NFSHASNFSV4(nmp) && nmp->nm_minorvers >= NFSV42_MINORVERSION && 3646 (nmp->nm_privflag & NFSMNTP_NOALLOCATE) == 0) { 3647 mtx_unlock(&nmp->nm_mtx); 3648 alen = *ap->a_len; 3649 if ((uint64_t)alen > nfs_maxalloclen) 3650 alen = nfs_maxalloclen; 3651 3652 /* Check the file size limit. */ 3653 io.uio_offset = *ap->a_offset; 3654 io.uio_resid = alen; 3655 error = vn_rlimit_fsize(vp, &io, td); 3656 3657 /* 3658 * Flush first to ensure that the allocate adds to the 3659 * file's allocation on the server. 3660 */ 3661 if (error == 0) { 3662 vnode_pager_clean_sync(vp); 3663 error = ncl_flush(vp, MNT_WAIT, td, 1, 0); 3664 } 3665 if (error == 0) 3666 error = nfsrpc_allocate(vp, *ap->a_offset, alen, 3667 &nfsva, &attrflag, ap->a_cred, td); 3668 if (error == 0) { 3669 *ap->a_offset += alen; 3670 *ap->a_len -= alen; 3671 nanouptime(&ts); 3672 NFSLOCKNODE(np); 3673 np->n_localmodtime = ts; 3674 NFSUNLOCKNODE(np); 3675 } else if (error == NFSERR_NOTSUPP) { 3676 mtx_lock(&nmp->nm_mtx); 3677 nmp->nm_privflag |= NFSMNTP_NOALLOCATE; 3678 mtx_unlock(&nmp->nm_mtx); 3679 error = EINVAL; 3680 } 3681 } else { 3682 mtx_unlock(&nmp->nm_mtx); 3683 error = EINVAL; 3684 } 3685 if (attrflag != 0) { 3686 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 3687 if (error == 0 && ret != 0) 3688 error = ret; 3689 } 3690 if (error != 0) 3691 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 3692 return (error); 3693 } 3694 3695 /* 3696 * nfs deallocate call 3697 */ 3698 static int 3699 nfs_deallocate(struct vop_deallocate_args *ap) 3700 { 3701 struct vnode *vp = ap->a_vp; 3702 struct thread *td = curthread; 3703 struct nfsvattr nfsva; 3704 struct nfsmount *nmp; 3705 struct nfsnode *np; 3706 off_t tlen, mlen; 3707 int attrflag, error, ret; 3708 bool clipped; 3709 struct timespec ts; 3710 3711 error = 0; 3712 attrflag = 0; 3713 nmp = VFSTONFS(vp->v_mount); 3714 np = VTONFS(vp); 3715 mtx_lock(&nmp->nm_mtx); 3716 if (NFSHASNFSV4(nmp) && nmp->nm_minorvers >= NFSV42_MINORVERSION && 3717 (nmp->nm_privflag & NFSMNTP_NODEALLOCATE) == 0) { 3718 mtx_unlock(&nmp->nm_mtx); 3719 tlen = omin(OFF_MAX - *ap->a_offset, *ap->a_len); 3720 NFSCL_DEBUG(4, "dealloc: off=%jd len=%jd maxfilesize=%ju\n", 3721 (intmax_t)*ap->a_offset, (intmax_t)tlen, 3722 (uintmax_t)nmp->nm_maxfilesize); 3723 if ((uint64_t)*ap->a_offset >= nmp->nm_maxfilesize) { 3724 /* Avoid EFBIG error return from the NFSv4.2 server. */ 3725 *ap->a_len = 0; 3726 return (0); 3727 } 3728 clipped = false; 3729 if ((uint64_t)*ap->a_offset + tlen > nmp->nm_maxfilesize) 3730 tlen = nmp->nm_maxfilesize - *ap->a_offset; 3731 if ((uint64_t)*ap->a_offset < np->n_size) { 3732 /* Limit the len to nfs_maxalloclen before EOF. */ 3733 mlen = omin((off_t)np->n_size - *ap->a_offset, tlen); 3734 if ((uint64_t)mlen > nfs_maxalloclen) { 3735 NFSCL_DEBUG(4, "dealloc: tlen maxalloclen\n"); 3736 tlen = nfs_maxalloclen; 3737 clipped = true; 3738 } 3739 } 3740 if (error == 0) 3741 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 3742 if (error == 0) { 3743 vnode_pager_purge_range(vp, *ap->a_offset, 3744 *ap->a_offset + tlen); 3745 error = nfsrpc_deallocate(vp, *ap->a_offset, tlen, 3746 &nfsva, &attrflag, ap->a_cred, td); 3747 NFSCL_DEBUG(4, "dealloc: rpc=%d\n", error); 3748 } 3749 if (error == 0) { 3750 NFSCL_DEBUG(4, "dealloc: attrflag=%d na_size=%ju\n", 3751 attrflag, (uintmax_t)nfsva.na_size); 3752 nanouptime(&ts); 3753 NFSLOCKNODE(np); 3754 np->n_localmodtime = ts; 3755 NFSUNLOCKNODE(np); 3756 if (attrflag != 0) { 3757 if ((uint64_t)*ap->a_offset < nfsva.na_size) 3758 *ap->a_offset += omin((off_t) 3759 nfsva.na_size - *ap->a_offset, 3760 tlen); 3761 } 3762 if (clipped && tlen < *ap->a_len) 3763 *ap->a_len -= tlen; 3764 else 3765 *ap->a_len = 0; 3766 } else if (error == NFSERR_NOTSUPP) { 3767 mtx_lock(&nmp->nm_mtx); 3768 nmp->nm_privflag |= NFSMNTP_NODEALLOCATE; 3769 mtx_unlock(&nmp->nm_mtx); 3770 } 3771 } else { 3772 mtx_unlock(&nmp->nm_mtx); 3773 error = EIO; 3774 } 3775 /* 3776 * If the NFS server cannot perform the Deallocate operation, just call 3777 * vop_stddeallocate() to perform it. 3778 */ 3779 if (error != 0 && error != NFSERR_FBIG && error != NFSERR_INVAL) { 3780 error = vop_stddeallocate(ap); 3781 NFSCL_DEBUG(4, "dealloc: stddeallocate=%d\n", error); 3782 } 3783 if (attrflag != 0) { 3784 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 3785 if (error == 0 && ret != 0) 3786 error = ret; 3787 } 3788 if (error != 0) 3789 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 3790 return (error); 3791 } 3792 3793 /* 3794 * nfs copy_file_range call 3795 */ 3796 static int 3797 nfs_copy_file_range(struct vop_copy_file_range_args *ap) 3798 { 3799 struct vnode *invp = ap->a_invp; 3800 struct vnode *outvp = ap->a_outvp; 3801 struct mount *mp; 3802 vm_object_t invp_obj; 3803 struct nfsvattr innfsva, outnfsva; 3804 struct vattr va, *vap; 3805 struct uio io; 3806 struct nfsmount *nmp; 3807 size_t len, len2; 3808 ssize_t r; 3809 int error, inattrflag, outattrflag, ret, ret2, invp_lock; 3810 off_t inoff, outoff; 3811 bool consecutive, must_commit, tryoutcred; 3812 3813 /* 3814 * NFSv4.2 Copy is not permitted for infile == outfile. 3815 * TODO: copy_file_range() between multiple NFS mountpoints 3816 */ 3817 if (invp == outvp || invp->v_mount != outvp->v_mount) { 3818 generic_copy: 3819 return (ENOSYS); 3820 } 3821 3822 invp_lock = LK_SHARED; 3823 relock: 3824 3825 /* Lock both vnodes, avoiding risk of deadlock. */ 3826 do { 3827 mp = NULL; 3828 error = vn_start_write(outvp, &mp, V_WAIT); 3829 if (error == 0) { 3830 error = vn_lock(outvp, LK_EXCLUSIVE); 3831 if (error == 0) { 3832 error = vn_lock(invp, invp_lock | LK_NOWAIT); 3833 if (error == 0) 3834 break; 3835 VOP_UNLOCK(outvp); 3836 if (mp != NULL) 3837 vn_finished_write(mp); 3838 mp = NULL; 3839 error = vn_lock(invp, invp_lock); 3840 if (error == 0) 3841 VOP_UNLOCK(invp); 3842 } 3843 } 3844 if (mp != NULL) 3845 vn_finished_write(mp); 3846 } while (error == 0); 3847 if (error != 0) 3848 return (error); 3849 3850 /* 3851 * More reasons to avoid nfs copy: not NFSv4.2, or explicitly 3852 * disabled. 3853 */ 3854 nmp = VFSTONFS(invp->v_mount); 3855 mtx_lock(&nmp->nm_mtx); 3856 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 3857 (nmp->nm_privflag & NFSMNTP_NOCOPY) != 0) { 3858 mtx_unlock(&nmp->nm_mtx); 3859 VOP_UNLOCK(invp); 3860 VOP_UNLOCK(outvp); 3861 if (mp != NULL) 3862 vn_finished_write(mp); 3863 goto generic_copy; 3864 } 3865 mtx_unlock(&nmp->nm_mtx); 3866 3867 /* 3868 * Do the vn_rlimit_fsize() check. Should this be above the VOP layer? 3869 */ 3870 io.uio_offset = *ap->a_outoffp; 3871 io.uio_resid = *ap->a_lenp; 3872 error = vn_rlimit_fsizex(outvp, &io, 0, &r, ap->a_fsizetd); 3873 *ap->a_lenp = io.uio_resid; 3874 /* 3875 * No need to call vn_rlimit_fsizex_res before return, since the uio is 3876 * local. 3877 */ 3878 3879 /* 3880 * Flush the input file so that the data is up to date before 3881 * the copy. Flush writes for the output file so that they 3882 * do not overwrite the data copied to the output file by the Copy. 3883 * Set the commit argument for both flushes so that the data is on 3884 * stable storage before the Copy RPC. This is done in case the 3885 * server reboots during the Copy and needs to be redone. 3886 */ 3887 if (error == 0) { 3888 invp_obj = invp->v_object; 3889 if (invp_obj != NULL && vm_object_mightbedirty(invp_obj)) { 3890 if (invp_lock != LK_EXCLUSIVE) { 3891 invp_lock = LK_EXCLUSIVE; 3892 VOP_UNLOCK(invp); 3893 VOP_UNLOCK(outvp); 3894 if (mp != NULL) 3895 vn_finished_write(mp); 3896 goto relock; 3897 } 3898 vnode_pager_clean_sync(invp); 3899 } 3900 error = ncl_flush(invp, MNT_WAIT, curthread, 1, 0); 3901 } 3902 if (error == 0) 3903 error = ncl_vinvalbuf(outvp, V_SAVE, curthread, 0); 3904 3905 /* Do the actual NFSv4.2 RPC. */ 3906 ret = ret2 = 0; 3907 len = *ap->a_lenp; 3908 mtx_lock(&nmp->nm_mtx); 3909 if ((nmp->nm_privflag & NFSMNTP_NOCONSECUTIVE) == 0) 3910 consecutive = true; 3911 else 3912 consecutive = false; 3913 mtx_unlock(&nmp->nm_mtx); 3914 inoff = *ap->a_inoffp; 3915 outoff = *ap->a_outoffp; 3916 tryoutcred = true; 3917 must_commit = false; 3918 if (error == 0) { 3919 vap = &VTONFS(invp)->n_vattr.na_vattr; 3920 error = VOP_GETATTR(invp, vap, ap->a_incred); 3921 if (error == 0) { 3922 /* 3923 * Clip "len" at va_size so that RFC compliant servers 3924 * will not reply NFSERR_INVAL. 3925 * Setting "len == 0" for the RPC would be preferred, 3926 * but some Linux servers do not support that. 3927 * If the len is being set to 0, do a Setattr RPC to 3928 * set the server's atime. This behaviour was the 3929 * preferred one for the FreeBSD "collective". 3930 */ 3931 if (inoff >= vap->va_size) { 3932 *ap->a_lenp = len = 0; 3933 if ((nmp->nm_mountp->mnt_flag & MNT_NOATIME) == 3934 0) { 3935 VATTR_NULL(&va); 3936 va.va_atime.tv_sec = 0; 3937 va.va_atime.tv_nsec = 0; 3938 va.va_vaflags = VA_UTIMES_NULL; 3939 inattrflag = 0; 3940 error = nfsrpc_setattr(invp, &va, NULL, 3941 ap->a_incred, curthread, &innfsva, 3942 &inattrflag); 3943 if (inattrflag != 0) 3944 ret = nfscl_loadattrcache(&invp, 3945 &innfsva, NULL, 0, 1); 3946 if (error == 0 && ret != 0) 3947 error = ret; 3948 } 3949 } else if (inoff + len > vap->va_size) 3950 *ap->a_lenp = len = vap->va_size - inoff; 3951 } else 3952 error = 0; 3953 } 3954 3955 /* 3956 * len will be set to 0 upon a successful Copy RPC. 3957 * As such, this only loops when the Copy RPC needs to be retried. 3958 */ 3959 while (len > 0 && error == 0) { 3960 inattrflag = outattrflag = 0; 3961 len2 = len; 3962 if (tryoutcred) 3963 error = nfsrpc_copy_file_range(invp, ap->a_inoffp, 3964 outvp, ap->a_outoffp, &len2, ap->a_flags, 3965 &inattrflag, &innfsva, &outattrflag, &outnfsva, 3966 ap->a_outcred, consecutive, &must_commit); 3967 else 3968 error = nfsrpc_copy_file_range(invp, ap->a_inoffp, 3969 outvp, ap->a_outoffp, &len2, ap->a_flags, 3970 &inattrflag, &innfsva, &outattrflag, &outnfsva, 3971 ap->a_incred, consecutive, &must_commit); 3972 if (inattrflag != 0) 3973 ret = nfscl_loadattrcache(&invp, &innfsva, NULL, 0, 1); 3974 if (outattrflag != 0) 3975 ret2 = nfscl_loadattrcache(&outvp, &outnfsva, NULL, 3976 1, 1); 3977 if (error == 0) { 3978 if (consecutive == false) { 3979 if (len2 == len) { 3980 mtx_lock(&nmp->nm_mtx); 3981 nmp->nm_privflag |= 3982 NFSMNTP_NOCONSECUTIVE; 3983 mtx_unlock(&nmp->nm_mtx); 3984 } else 3985 error = NFSERR_OFFLOADNOREQS; 3986 } 3987 *ap->a_lenp = len2; 3988 len = 0; 3989 if (len2 > 0 && must_commit && error == 0) 3990 error = ncl_commit(outvp, outoff, *ap->a_lenp, 3991 ap->a_outcred, curthread); 3992 if (error == 0 && ret != 0) 3993 error = ret; 3994 if (error == 0 && ret2 != 0) 3995 error = ret2; 3996 } else if (error == NFSERR_OFFLOADNOREQS && consecutive) { 3997 /* 3998 * Try consecutive == false, which is ok only if all 3999 * bytes are copied. 4000 * If only some bytes were copied when consecutive 4001 * is false, there is no way to know which bytes 4002 * still need to be written. 4003 */ 4004 consecutive = false; 4005 error = 0; 4006 } else if (error == NFSERR_ACCES && tryoutcred) { 4007 /* Try again with incred. */ 4008 tryoutcred = false; 4009 error = 0; 4010 } 4011 if (error == NFSERR_STALEWRITEVERF) { 4012 /* 4013 * Server rebooted, so do it all again. 4014 */ 4015 *ap->a_inoffp = inoff; 4016 *ap->a_outoffp = outoff; 4017 len = *ap->a_lenp; 4018 must_commit = false; 4019 error = 0; 4020 } 4021 } 4022 VOP_UNLOCK(invp); 4023 VOP_UNLOCK(outvp); 4024 if (mp != NULL) 4025 vn_finished_write(mp); 4026 if (error == NFSERR_NOTSUPP || error == NFSERR_OFFLOADNOREQS || 4027 error == NFSERR_ACCES) { 4028 /* 4029 * Unlike the NFSv4.2 Copy, vn_generic_copy_file_range() can 4030 * use a_incred for the read and a_outcred for the write, so 4031 * try this for NFSERR_ACCES failures for the Copy. 4032 * For NFSERR_NOTSUPP and NFSERR_OFFLOADNOREQS, the Copy can 4033 * never succeed, so disable it. 4034 */ 4035 if (error != NFSERR_ACCES) { 4036 /* Can never do Copy on this mount. */ 4037 mtx_lock(&nmp->nm_mtx); 4038 nmp->nm_privflag |= NFSMNTP_NOCOPY; 4039 mtx_unlock(&nmp->nm_mtx); 4040 } 4041 *ap->a_inoffp = inoff; 4042 *ap->a_outoffp = outoff; 4043 error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, 4044 ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, 4045 ap->a_incred, ap->a_outcred, ap->a_fsizetd); 4046 } else if (error != 0) 4047 *ap->a_lenp = 0; 4048 4049 if (error != 0) 4050 error = nfscl_maperr(curthread, error, (uid_t)0, (gid_t)0); 4051 return (error); 4052 } 4053 4054 /* 4055 * nfs ioctl call 4056 */ 4057 static int 4058 nfs_ioctl(struct vop_ioctl_args *ap) 4059 { 4060 struct vnode *vp = ap->a_vp; 4061 struct nfsvattr nfsva; 4062 struct nfsmount *nmp; 4063 int attrflag, content, error, ret; 4064 bool eof = false; /* shut up compiler. */ 4065 4066 /* Do the actual NFSv4.2 RPC. */ 4067 switch (ap->a_command) { 4068 case FIOSEEKDATA: 4069 content = NFSV4CONTENT_DATA; 4070 break; 4071 case FIOSEEKHOLE: 4072 content = NFSV4CONTENT_HOLE; 4073 break; 4074 default: 4075 return (ENOTTY); 4076 } 4077 4078 error = vn_lock(vp, LK_EXCLUSIVE); 4079 if (error != 0) 4080 return (EBADF); 4081 4082 if (vp->v_type != VREG) { 4083 VOP_UNLOCK(vp); 4084 return (ENOTTY); 4085 } 4086 nmp = VFSTONFS(vp->v_mount); 4087 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION) { 4088 VOP_UNLOCK(vp); 4089 error = vop_stdioctl(ap); 4090 return (error); 4091 } 4092 4093 attrflag = 0; 4094 if (*((off_t *)ap->a_data) >= VTONFS(vp)->n_size) 4095 error = ENXIO; 4096 else { 4097 /* 4098 * Flush all writes, so that the server is up to date. 4099 * Although a Commit is not required, the commit argument 4100 * is set so that, for a pNFS File/Flexible File Layout 4101 * server, the LayoutCommit will be done to ensure the file 4102 * size is up to date on the Metadata Server. 4103 */ 4104 4105 vnode_pager_clean_sync(vp); 4106 error = ncl_flush(vp, MNT_WAIT, ap->a_td, 1, 0); 4107 if (error == 0) 4108 error = nfsrpc_seek(vp, (off_t *)ap->a_data, &eof, 4109 content, ap->a_cred, &nfsva, &attrflag); 4110 /* If at eof for FIOSEEKDATA, return ENXIO. */ 4111 if (eof && error == 0 && content == NFSV4CONTENT_DATA) 4112 error = ENXIO; 4113 } 4114 if (attrflag != 0) { 4115 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 4116 if (error == 0 && ret != 0) 4117 error = ret; 4118 } 4119 NFSVOPUNLOCK(vp); 4120 4121 if (error != 0) 4122 error = ENXIO; 4123 return (error); 4124 } 4125 4126 /* 4127 * nfs getextattr call 4128 */ 4129 static int 4130 nfs_getextattr(struct vop_getextattr_args *ap) 4131 { 4132 struct vnode *vp = ap->a_vp; 4133 struct nfsmount *nmp; 4134 struct ucred *cred; 4135 struct thread *td = ap->a_td; 4136 struct nfsvattr nfsva; 4137 ssize_t len; 4138 int attrflag, error, ret; 4139 4140 nmp = VFSTONFS(vp->v_mount); 4141 mtx_lock(&nmp->nm_mtx); 4142 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4143 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4144 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4145 mtx_unlock(&nmp->nm_mtx); 4146 return (EOPNOTSUPP); 4147 } 4148 mtx_unlock(&nmp->nm_mtx); 4149 4150 cred = ap->a_cred; 4151 if (cred == NULL) 4152 cred = td->td_ucred; 4153 /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ 4154 attrflag = 0; 4155 error = nfsrpc_getextattr(vp, ap->a_name, ap->a_uio, &len, &nfsva, 4156 &attrflag, cred, td); 4157 if (attrflag != 0) { 4158 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 4159 if (error == 0 && ret != 0) 4160 error = ret; 4161 } 4162 if (error == 0 && ap->a_size != NULL) 4163 *ap->a_size = len; 4164 4165 switch (error) { 4166 case NFSERR_NOTSUPP: 4167 case NFSERR_OPILLEGAL: 4168 mtx_lock(&nmp->nm_mtx); 4169 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4170 mtx_unlock(&nmp->nm_mtx); 4171 error = EOPNOTSUPP; 4172 break; 4173 case NFSERR_NOXATTR: 4174 case NFSERR_XATTR2BIG: 4175 error = ENOATTR; 4176 break; 4177 default: 4178 error = nfscl_maperr(td, error, 0, 0); 4179 break; 4180 } 4181 return (error); 4182 } 4183 4184 /* 4185 * nfs setextattr call 4186 */ 4187 static int 4188 nfs_setextattr(struct vop_setextattr_args *ap) 4189 { 4190 struct vnode *vp = ap->a_vp; 4191 struct nfsmount *nmp; 4192 struct ucred *cred; 4193 struct thread *td = ap->a_td; 4194 struct nfsvattr nfsva; 4195 int attrflag, error, ret; 4196 4197 nmp = VFSTONFS(vp->v_mount); 4198 mtx_lock(&nmp->nm_mtx); 4199 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4200 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4201 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4202 mtx_unlock(&nmp->nm_mtx); 4203 return (EOPNOTSUPP); 4204 } 4205 mtx_unlock(&nmp->nm_mtx); 4206 4207 if (ap->a_uio->uio_resid < 0) 4208 return (EINVAL); 4209 cred = ap->a_cred; 4210 if (cred == NULL) 4211 cred = td->td_ucred; 4212 /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ 4213 attrflag = 0; 4214 error = nfsrpc_setextattr(vp, ap->a_name, ap->a_uio, &nfsva, 4215 &attrflag, cred, td); 4216 if (attrflag != 0) { 4217 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 4218 if (error == 0 && ret != 0) 4219 error = ret; 4220 } 4221 4222 switch (error) { 4223 case NFSERR_NOTSUPP: 4224 case NFSERR_OPILLEGAL: 4225 mtx_lock(&nmp->nm_mtx); 4226 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4227 mtx_unlock(&nmp->nm_mtx); 4228 error = EOPNOTSUPP; 4229 break; 4230 case NFSERR_NOXATTR: 4231 case NFSERR_XATTR2BIG: 4232 error = ENOATTR; 4233 break; 4234 default: 4235 error = nfscl_maperr(td, error, 0, 0); 4236 break; 4237 } 4238 return (error); 4239 } 4240 4241 /* 4242 * nfs listextattr call 4243 */ 4244 static int 4245 nfs_listextattr(struct vop_listextattr_args *ap) 4246 { 4247 struct vnode *vp = ap->a_vp; 4248 struct nfsmount *nmp; 4249 struct ucred *cred; 4250 struct thread *td = ap->a_td; 4251 struct nfsvattr nfsva; 4252 size_t len, len2; 4253 uint64_t cookie; 4254 int attrflag, error, ret; 4255 bool eof; 4256 4257 nmp = VFSTONFS(vp->v_mount); 4258 mtx_lock(&nmp->nm_mtx); 4259 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4260 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4261 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4262 mtx_unlock(&nmp->nm_mtx); 4263 return (EOPNOTSUPP); 4264 } 4265 mtx_unlock(&nmp->nm_mtx); 4266 4267 cred = ap->a_cred; 4268 if (cred == NULL) 4269 cred = td->td_ucred; 4270 4271 /* Loop around doing List Extended Attribute RPCs. */ 4272 eof = false; 4273 cookie = 0; 4274 len2 = 0; 4275 error = 0; 4276 while (!eof && error == 0) { 4277 len = nmp->nm_rsize; 4278 attrflag = 0; 4279 error = nfsrpc_listextattr(vp, &cookie, ap->a_uio, &len, &eof, 4280 &nfsva, &attrflag, cred, td); 4281 if (attrflag != 0) { 4282 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 4283 if (error == 0 && ret != 0) 4284 error = ret; 4285 } 4286 if (error == 0) { 4287 len2 += len; 4288 if (len2 > SSIZE_MAX) 4289 error = ENOATTR; 4290 } 4291 } 4292 if (error == 0 && ap->a_size != NULL) 4293 *ap->a_size = len2; 4294 4295 switch (error) { 4296 case NFSERR_NOTSUPP: 4297 case NFSERR_OPILLEGAL: 4298 mtx_lock(&nmp->nm_mtx); 4299 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4300 mtx_unlock(&nmp->nm_mtx); 4301 error = EOPNOTSUPP; 4302 break; 4303 case NFSERR_NOXATTR: 4304 case NFSERR_XATTR2BIG: 4305 error = ENOATTR; 4306 break; 4307 default: 4308 error = nfscl_maperr(td, error, 0, 0); 4309 break; 4310 } 4311 return (error); 4312 } 4313 4314 /* 4315 * nfs setextattr call 4316 */ 4317 static int 4318 nfs_deleteextattr(struct vop_deleteextattr_args *ap) 4319 { 4320 struct vnode *vp = ap->a_vp; 4321 struct nfsmount *nmp; 4322 struct nfsvattr nfsva; 4323 int attrflag, error, ret; 4324 4325 nmp = VFSTONFS(vp->v_mount); 4326 mtx_lock(&nmp->nm_mtx); 4327 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4328 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4329 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4330 mtx_unlock(&nmp->nm_mtx); 4331 return (EOPNOTSUPP); 4332 } 4333 mtx_unlock(&nmp->nm_mtx); 4334 4335 /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ 4336 attrflag = 0; 4337 error = nfsrpc_rmextattr(vp, ap->a_name, &nfsva, &attrflag, ap->a_cred, 4338 ap->a_td); 4339 if (attrflag != 0) { 4340 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 4341 if (error == 0 && ret != 0) 4342 error = ret; 4343 } 4344 4345 switch (error) { 4346 case NFSERR_NOTSUPP: 4347 case NFSERR_OPILLEGAL: 4348 mtx_lock(&nmp->nm_mtx); 4349 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4350 mtx_unlock(&nmp->nm_mtx); 4351 error = EOPNOTSUPP; 4352 break; 4353 case NFSERR_NOXATTR: 4354 case NFSERR_XATTR2BIG: 4355 error = ENOATTR; 4356 break; 4357 default: 4358 error = nfscl_maperr(ap->a_td, error, 0, 0); 4359 break; 4360 } 4361 return (error); 4362 } 4363 4364 /* 4365 * Return POSIX pathconf information applicable to nfs filesystems. 4366 */ 4367 static int 4368 nfs_pathconf(struct vop_pathconf_args *ap) 4369 { 4370 struct nfsv3_pathconf pc; 4371 struct nfsvattr nfsva; 4372 struct vnode *vp = ap->a_vp; 4373 struct nfsmount *nmp; 4374 struct thread *td = curthread; 4375 off_t off; 4376 bool eof; 4377 int attrflag, error; 4378 4379 if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX || 4380 ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED || 4381 ap->a_name == _PC_NO_TRUNC)) || 4382 (NFS_ISV4(vp) && ap->a_name == _PC_ACL_NFS4)) { 4383 /* 4384 * Since only the above 4 a_names are returned by the NFSv3 4385 * Pathconf RPC, there is no point in doing it for others. 4386 * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can 4387 * be used for _PC_NFS4_ACL as well. 4388 */ 4389 error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva, 4390 &attrflag); 4391 if (attrflag != 0) 4392 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 4393 if (error != 0) 4394 return (error); 4395 } else { 4396 /* 4397 * For NFSv2 (or NFSv3 when not one of the above 4 a_names), 4398 * just fake them. 4399 */ 4400 pc.pc_linkmax = NFS_LINK_MAX; 4401 pc.pc_namemax = NFS_MAXNAMLEN; 4402 pc.pc_notrunc = 1; 4403 pc.pc_chownrestricted = 1; 4404 pc.pc_caseinsensitive = 0; 4405 pc.pc_casepreserving = 1; 4406 error = 0; 4407 } 4408 switch (ap->a_name) { 4409 case _PC_LINK_MAX: 4410 #ifdef _LP64 4411 *ap->a_retval = pc.pc_linkmax; 4412 #else 4413 *ap->a_retval = MIN(LONG_MAX, pc.pc_linkmax); 4414 #endif 4415 break; 4416 case _PC_NAME_MAX: 4417 *ap->a_retval = pc.pc_namemax; 4418 break; 4419 case _PC_PIPE_BUF: 4420 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) 4421 *ap->a_retval = PIPE_BUF; 4422 else 4423 error = EINVAL; 4424 break; 4425 case _PC_CHOWN_RESTRICTED: 4426 *ap->a_retval = pc.pc_chownrestricted; 4427 break; 4428 case _PC_NO_TRUNC: 4429 *ap->a_retval = pc.pc_notrunc; 4430 break; 4431 case _PC_ACL_NFS4: 4432 if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 && 4433 NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) 4434 *ap->a_retval = 1; 4435 else 4436 *ap->a_retval = 0; 4437 break; 4438 case _PC_ACL_PATH_MAX: 4439 if (NFS_ISV4(vp)) 4440 *ap->a_retval = ACL_MAX_ENTRIES; 4441 else 4442 *ap->a_retval = 3; 4443 break; 4444 case _PC_PRIO_IO: 4445 *ap->a_retval = 0; 4446 break; 4447 case _PC_SYNC_IO: 4448 *ap->a_retval = 0; 4449 break; 4450 case _PC_ALLOC_SIZE_MIN: 4451 *ap->a_retval = vp->v_mount->mnt_stat.f_bsize; 4452 break; 4453 case _PC_FILESIZEBITS: 4454 if (NFS_ISV34(vp)) 4455 *ap->a_retval = 64; 4456 else 4457 *ap->a_retval = 32; 4458 break; 4459 case _PC_REC_INCR_XFER_SIZE: 4460 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 4461 break; 4462 case _PC_REC_MAX_XFER_SIZE: 4463 *ap->a_retval = -1; /* means ``unlimited'' */ 4464 break; 4465 case _PC_REC_MIN_XFER_SIZE: 4466 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 4467 break; 4468 case _PC_REC_XFER_ALIGN: 4469 *ap->a_retval = PAGE_SIZE; 4470 break; 4471 case _PC_SYMLINK_MAX: 4472 *ap->a_retval = NFS_MAXPATHLEN; 4473 break; 4474 case _PC_MIN_HOLE_SIZE: 4475 /* Only some NFSv4.2 servers support Seek for Holes. */ 4476 *ap->a_retval = 0; 4477 nmp = VFSTONFS(vp->v_mount); 4478 if (NFS_ISV4(vp) && nmp->nm_minorvers == NFSV42_MINORVERSION) { 4479 /* 4480 * NFSv4.2 doesn't have an attribute for hole size, 4481 * so all we can do is see if the Seek operation is 4482 * supported and then use f_iosize as a "best guess". 4483 */ 4484 mtx_lock(&nmp->nm_mtx); 4485 if ((nmp->nm_privflag & NFSMNTP_SEEKTESTED) == 0) { 4486 mtx_unlock(&nmp->nm_mtx); 4487 off = 0; 4488 attrflag = 0; 4489 error = nfsrpc_seek(vp, &off, &eof, 4490 NFSV4CONTENT_HOLE, td->td_ucred, &nfsva, 4491 &attrflag); 4492 if (attrflag != 0) 4493 (void) nfscl_loadattrcache(&vp, &nfsva, 4494 NULL, 0, 1); 4495 mtx_lock(&nmp->nm_mtx); 4496 if (error == NFSERR_NOTSUPP) 4497 nmp->nm_privflag |= NFSMNTP_SEEKTESTED; 4498 else 4499 nmp->nm_privflag |= NFSMNTP_SEEKTESTED | 4500 NFSMNTP_SEEK; 4501 error = 0; 4502 } 4503 if ((nmp->nm_privflag & NFSMNTP_SEEK) != 0) 4504 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 4505 mtx_unlock(&nmp->nm_mtx); 4506 } 4507 break; 4508 4509 default: 4510 error = vop_stdpathconf(ap); 4511 break; 4512 } 4513 return (error); 4514 } 4515