1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from nfs_vnops.c 8.16 (Berkeley) 5/27/95 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 /* 41 * vnode op calls for Sun NFS version 2, 3 and 4 42 */ 43 44 #include "opt_inet.h" 45 46 #include <sys/param.h> 47 #include <sys/kernel.h> 48 #include <sys/systm.h> 49 #include <sys/resourcevar.h> 50 #include <sys/proc.h> 51 #include <sys/mount.h> 52 #include <sys/bio.h> 53 #include <sys/buf.h> 54 #include <sys/extattr.h> 55 #include <sys/filio.h> 56 #include <sys/jail.h> 57 #include <sys/malloc.h> 58 #include <sys/mbuf.h> 59 #include <sys/namei.h> 60 #include <sys/socket.h> 61 #include <sys/vnode.h> 62 #include <sys/dirent.h> 63 #include <sys/fcntl.h> 64 #include <sys/lockf.h> 65 #include <sys/stat.h> 66 #include <sys/sysctl.h> 67 #include <sys/signalvar.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_extern.h> 71 #include <vm/vm_object.h> 72 73 #include <fs/nfs/nfsport.h> 74 #include <fs/nfsclient/nfsnode.h> 75 #include <fs/nfsclient/nfsmount.h> 76 #include <fs/nfsclient/nfs.h> 77 #include <fs/nfsclient/nfs_kdtrace.h> 78 79 #include <net/if.h> 80 #include <netinet/in.h> 81 #include <netinet/in_var.h> 82 83 #include <nfs/nfs_lock.h> 84 85 #ifdef KDTRACE_HOOKS 86 #include <sys/dtrace_bsd.h> 87 88 dtrace_nfsclient_accesscache_flush_probe_func_t 89 dtrace_nfscl_accesscache_flush_done_probe; 90 uint32_t nfscl_accesscache_flush_done_id; 91 92 dtrace_nfsclient_accesscache_get_probe_func_t 93 dtrace_nfscl_accesscache_get_hit_probe, 94 dtrace_nfscl_accesscache_get_miss_probe; 95 uint32_t nfscl_accesscache_get_hit_id; 96 uint32_t nfscl_accesscache_get_miss_id; 97 98 dtrace_nfsclient_accesscache_load_probe_func_t 99 dtrace_nfscl_accesscache_load_done_probe; 100 uint32_t nfscl_accesscache_load_done_id; 101 #endif /* !KDTRACE_HOOKS */ 102 103 /* Defs */ 104 #define TRUE 1 105 #define FALSE 0 106 107 extern struct nfsstatsv1 nfsstatsv1; 108 extern int nfsrv_useacl; 109 extern int nfscl_debuglevel; 110 MALLOC_DECLARE(M_NEWNFSREQ); 111 112 static vop_read_t nfsfifo_read; 113 static vop_write_t nfsfifo_write; 114 static vop_close_t nfsfifo_close; 115 static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *, 116 struct thread *); 117 static vop_lookup_t nfs_lookup; 118 static vop_create_t nfs_create; 119 static vop_mknod_t nfs_mknod; 120 static vop_open_t nfs_open; 121 static vop_pathconf_t nfs_pathconf; 122 static vop_close_t nfs_close; 123 static vop_access_t nfs_access; 124 static vop_getattr_t nfs_getattr; 125 static vop_setattr_t nfs_setattr; 126 static vop_read_t nfs_read; 127 static vop_fsync_t nfs_fsync; 128 static vop_remove_t nfs_remove; 129 static vop_link_t nfs_link; 130 static vop_rename_t nfs_rename; 131 static vop_mkdir_t nfs_mkdir; 132 static vop_rmdir_t nfs_rmdir; 133 static vop_symlink_t nfs_symlink; 134 static vop_readdir_t nfs_readdir; 135 static vop_strategy_t nfs_strategy; 136 static int nfs_lookitup(struct vnode *, char *, int, 137 struct ucred *, struct thread *, struct nfsnode **); 138 static int nfs_sillyrename(struct vnode *, struct vnode *, 139 struct componentname *); 140 static vop_access_t nfsspec_access; 141 static vop_readlink_t nfs_readlink; 142 static vop_print_t nfs_print; 143 static vop_advlock_t nfs_advlock; 144 static vop_advlockasync_t nfs_advlockasync; 145 static vop_getacl_t nfs_getacl; 146 static vop_setacl_t nfs_setacl; 147 static vop_advise_t nfs_advise; 148 static vop_allocate_t nfs_allocate; 149 static vop_deallocate_t nfs_deallocate; 150 static vop_copy_file_range_t nfs_copy_file_range; 151 static vop_ioctl_t nfs_ioctl; 152 static vop_getextattr_t nfs_getextattr; 153 static vop_setextattr_t nfs_setextattr; 154 static vop_listextattr_t nfs_listextattr; 155 static vop_deleteextattr_t nfs_deleteextattr; 156 static vop_lock1_t nfs_lock; 157 158 /* 159 * Global vfs data structures for nfs 160 */ 161 162 static struct vop_vector newnfs_vnodeops_nosig = { 163 .vop_default = &default_vnodeops, 164 .vop_access = nfs_access, 165 .vop_advlock = nfs_advlock, 166 .vop_advlockasync = nfs_advlockasync, 167 .vop_close = nfs_close, 168 .vop_create = nfs_create, 169 .vop_fsync = nfs_fsync, 170 .vop_getattr = nfs_getattr, 171 .vop_getpages = ncl_getpages, 172 .vop_putpages = ncl_putpages, 173 .vop_inactive = ncl_inactive, 174 .vop_link = nfs_link, 175 .vop_lock1 = nfs_lock, 176 .vop_lookup = nfs_lookup, 177 .vop_mkdir = nfs_mkdir, 178 .vop_mknod = nfs_mknod, 179 .vop_open = nfs_open, 180 .vop_pathconf = nfs_pathconf, 181 .vop_print = nfs_print, 182 .vop_read = nfs_read, 183 .vop_readdir = nfs_readdir, 184 .vop_readlink = nfs_readlink, 185 .vop_reclaim = ncl_reclaim, 186 .vop_remove = nfs_remove, 187 .vop_rename = nfs_rename, 188 .vop_rmdir = nfs_rmdir, 189 .vop_setattr = nfs_setattr, 190 .vop_strategy = nfs_strategy, 191 .vop_symlink = nfs_symlink, 192 .vop_write = ncl_write, 193 .vop_getacl = nfs_getacl, 194 .vop_setacl = nfs_setacl, 195 .vop_advise = nfs_advise, 196 .vop_allocate = nfs_allocate, 197 .vop_deallocate = nfs_deallocate, 198 .vop_copy_file_range = nfs_copy_file_range, 199 .vop_ioctl = nfs_ioctl, 200 .vop_getextattr = nfs_getextattr, 201 .vop_setextattr = nfs_setextattr, 202 .vop_listextattr = nfs_listextattr, 203 .vop_deleteextattr = nfs_deleteextattr, 204 }; 205 VFS_VOP_VECTOR_REGISTER(newnfs_vnodeops_nosig); 206 207 static int 208 nfs_vnodeops_bypass(struct vop_generic_args *a) 209 { 210 211 return (vop_sigdefer(&newnfs_vnodeops_nosig, a)); 212 } 213 214 struct vop_vector newnfs_vnodeops = { 215 .vop_default = &default_vnodeops, 216 .vop_bypass = nfs_vnodeops_bypass, 217 }; 218 VFS_VOP_VECTOR_REGISTER(newnfs_vnodeops); 219 220 static struct vop_vector newnfs_fifoops_nosig = { 221 .vop_default = &fifo_specops, 222 .vop_access = nfsspec_access, 223 .vop_close = nfsfifo_close, 224 .vop_fsync = nfs_fsync, 225 .vop_getattr = nfs_getattr, 226 .vop_inactive = ncl_inactive, 227 .vop_pathconf = nfs_pathconf, 228 .vop_print = nfs_print, 229 .vop_read = nfsfifo_read, 230 .vop_reclaim = ncl_reclaim, 231 .vop_setattr = nfs_setattr, 232 .vop_write = nfsfifo_write, 233 }; 234 VFS_VOP_VECTOR_REGISTER(newnfs_fifoops_nosig); 235 236 static int 237 nfs_fifoops_bypass(struct vop_generic_args *a) 238 { 239 240 return (vop_sigdefer(&newnfs_fifoops_nosig, a)); 241 } 242 243 struct vop_vector newnfs_fifoops = { 244 .vop_default = &default_vnodeops, 245 .vop_bypass = nfs_fifoops_bypass, 246 }; 247 VFS_VOP_VECTOR_REGISTER(newnfs_fifoops); 248 249 static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, 250 struct componentname *cnp, struct vattr *vap); 251 static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 252 int namelen, struct ucred *cred, struct thread *td); 253 static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, 254 char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp, 255 char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td); 256 static int nfs_renameit(struct vnode *sdvp, struct vnode *svp, 257 struct componentname *scnp, struct sillyrename *sp); 258 259 /* 260 * Global variables 261 */ 262 SYSCTL_DECL(_vfs_nfs); 263 264 static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; 265 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 266 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); 267 268 static int nfs_prime_access_cache = 0; 269 SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW, 270 &nfs_prime_access_cache, 0, 271 "Prime NFS ACCESS cache when fetching attributes"); 272 273 static int newnfs_commit_on_close = 0; 274 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_on_close, CTLFLAG_RW, 275 &newnfs_commit_on_close, 0, "write+commit on close, else only write"); 276 277 static int nfs_clean_pages_on_close = 1; 278 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, 279 &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); 280 281 int newnfs_directio_enable = 0; 282 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, 283 &newnfs_directio_enable, 0, "Enable NFS directio"); 284 285 int nfs_keep_dirty_on_error; 286 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_keep_dirty_on_error, CTLFLAG_RW, 287 &nfs_keep_dirty_on_error, 0, "Retry pageout if error returned"); 288 289 /* 290 * This sysctl allows other processes to mmap a file that has been opened 291 * O_DIRECT by a process. In general, having processes mmap the file while 292 * Direct IO is in progress can lead to Data Inconsistencies. But, we allow 293 * this by default to prevent DoS attacks - to prevent a malicious user from 294 * opening up files O_DIRECT preventing other users from mmap'ing these 295 * files. "Protected" environments where stricter consistency guarantees are 296 * required can disable this knob. The process that opened the file O_DIRECT 297 * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not 298 * meaningful. 299 */ 300 int newnfs_directio_allow_mmap = 1; 301 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, 302 &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); 303 304 static uint64_t nfs_maxalloclen = 64 * 1024 * 1024; 305 SYSCTL_U64(_vfs_nfs, OID_AUTO, maxalloclen, CTLFLAG_RW, 306 &nfs_maxalloclen, 0, "NFS max allocate/deallocate length"); 307 308 #define NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY \ 309 | NFSACCESS_EXTEND | NFSACCESS_EXECUTE \ 310 | NFSACCESS_DELETE | NFSACCESS_LOOKUP) 311 312 /* 313 * SMP Locking Note : 314 * The list of locks after the description of the lock is the ordering 315 * of other locks acquired with the lock held. 316 * np->n_mtx : Protects the fields in the nfsnode. 317 VM Object Lock 318 VI_MTX (acquired indirectly) 319 * nmp->nm_mtx : Protects the fields in the nfsmount. 320 rep->r_mtx 321 * ncl_iod_mutex : Global lock, protects shared nfsiod state. 322 * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. 323 nmp->nm_mtx 324 rep->r_mtx 325 * rep->r_mtx : Protects the fields in an nfsreq. 326 */ 327 328 static int 329 nfs_lock(struct vop_lock1_args *ap) 330 { 331 struct vnode *vp; 332 struct nfsnode *np; 333 u_quad_t nsize; 334 int error, lktype; 335 bool onfault; 336 337 vp = ap->a_vp; 338 lktype = ap->a_flags & LK_TYPE_MASK; 339 error = VOP_LOCK1_APV(&default_vnodeops, ap); 340 if (error != 0 || vp->v_op != &newnfs_vnodeops) 341 return (error); 342 np = VTONFS(vp); 343 if (np == NULL) 344 return (0); 345 NFSLOCKNODE(np); 346 if ((np->n_flag & NVNSETSZSKIP) == 0 || (lktype != LK_SHARED && 347 lktype != LK_EXCLUSIVE && lktype != LK_UPGRADE && 348 lktype != LK_TRYUPGRADE)) { 349 NFSUNLOCKNODE(np); 350 return (0); 351 } 352 onfault = (ap->a_flags & LK_EATTR_MASK) == LK_NOWAIT && 353 (ap->a_flags & LK_INIT_MASK) == LK_CANRECURSE && 354 (lktype == LK_SHARED || lktype == LK_EXCLUSIVE); 355 if (onfault && vp->v_vnlock->lk_recurse == 0) { 356 /* 357 * Force retry in vm_fault(), to make the lock request 358 * sleepable, which allows us to piggy-back the 359 * sleepable call to vnode_pager_setsize(). 360 */ 361 NFSUNLOCKNODE(np); 362 VOP_UNLOCK(vp); 363 return (EBUSY); 364 } 365 if ((ap->a_flags & LK_NOWAIT) != 0 || 366 (lktype == LK_SHARED && vp->v_vnlock->lk_recurse > 0)) { 367 NFSUNLOCKNODE(np); 368 return (0); 369 } 370 if (lktype == LK_SHARED) { 371 NFSUNLOCKNODE(np); 372 VOP_UNLOCK(vp); 373 ap->a_flags &= ~(LK_TYPE_MASK | LK_INTERLOCK); 374 ap->a_flags |= LK_EXCLUSIVE; 375 error = VOP_LOCK1_APV(&default_vnodeops, ap); 376 if (error != 0 || vp->v_op != &newnfs_vnodeops) 377 return (error); 378 if (vp->v_data == NULL) 379 goto downgrade; 380 MPASS(vp->v_data == np); 381 NFSLOCKNODE(np); 382 if ((np->n_flag & NVNSETSZSKIP) == 0) { 383 NFSUNLOCKNODE(np); 384 goto downgrade; 385 } 386 } 387 np->n_flag &= ~NVNSETSZSKIP; 388 nsize = np->n_size; 389 NFSUNLOCKNODE(np); 390 vnode_pager_setsize(vp, nsize); 391 downgrade: 392 if (lktype == LK_SHARED) { 393 ap->a_flags &= ~(LK_TYPE_MASK | LK_INTERLOCK); 394 ap->a_flags |= LK_DOWNGRADE; 395 (void)VOP_LOCK1_APV(&default_vnodeops, ap); 396 } 397 return (0); 398 } 399 400 static int 401 nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td, 402 struct ucred *cred, u_int32_t *retmode) 403 { 404 int error = 0, attrflag, i, lrupos; 405 u_int32_t rmode; 406 struct nfsnode *np = VTONFS(vp); 407 struct nfsvattr nfsva; 408 409 error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag, 410 &rmode, NULL); 411 if (attrflag) 412 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 413 if (!error) { 414 lrupos = 0; 415 NFSLOCKNODE(np); 416 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 417 if (np->n_accesscache[i].uid == cred->cr_uid) { 418 np->n_accesscache[i].mode = rmode; 419 np->n_accesscache[i].stamp = time_second; 420 break; 421 } 422 if (i > 0 && np->n_accesscache[i].stamp < 423 np->n_accesscache[lrupos].stamp) 424 lrupos = i; 425 } 426 if (i == NFS_ACCESSCACHESIZE) { 427 np->n_accesscache[lrupos].uid = cred->cr_uid; 428 np->n_accesscache[lrupos].mode = rmode; 429 np->n_accesscache[lrupos].stamp = time_second; 430 } 431 NFSUNLOCKNODE(np); 432 if (retmode != NULL) 433 *retmode = rmode; 434 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0); 435 } else if (NFS_ISV4(vp)) { 436 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 437 } 438 #ifdef KDTRACE_HOOKS 439 if (error != 0) 440 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0, 441 error); 442 #endif 443 return (error); 444 } 445 446 /* 447 * nfs access vnode op. 448 * For nfs version 2, just return ok. File accesses may fail later. 449 * For nfs version 3, use the access rpc to check accessibility. If file modes 450 * are changed on the server, accesses might still fail later. 451 */ 452 static int 453 nfs_access(struct vop_access_args *ap) 454 { 455 struct vnode *vp = ap->a_vp; 456 int error = 0, i, gotahit; 457 u_int32_t mode, wmode, rmode; 458 int v34 = NFS_ISV34(vp); 459 struct nfsnode *np = VTONFS(vp); 460 461 /* 462 * Disallow write attempts on filesystems mounted read-only; 463 * unless the file is a socket, fifo, or a block or character 464 * device resident on the filesystem. 465 */ 466 if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS | 467 VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL | 468 VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) { 469 switch (vp->v_type) { 470 case VREG: 471 case VDIR: 472 case VLNK: 473 return (EROFS); 474 default: 475 break; 476 } 477 } 478 /* 479 * For nfs v3 or v4, check to see if we have done this recently, and if 480 * so return our cached result instead of making an ACCESS call. 481 * If not, do an access rpc, otherwise you are stuck emulating 482 * ufs_access() locally using the vattr. This may not be correct, 483 * since the server may apply other access criteria such as 484 * client uid-->server uid mapping that we do not know about. 485 */ 486 if (v34) { 487 if (ap->a_accmode & VREAD) 488 mode = NFSACCESS_READ; 489 else 490 mode = 0; 491 if (vp->v_type != VDIR) { 492 if (ap->a_accmode & VWRITE) 493 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 494 if (ap->a_accmode & VAPPEND) 495 mode |= NFSACCESS_EXTEND; 496 if (ap->a_accmode & VEXEC) 497 mode |= NFSACCESS_EXECUTE; 498 if (ap->a_accmode & VDELETE) 499 mode |= NFSACCESS_DELETE; 500 } else { 501 if (ap->a_accmode & VWRITE) 502 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 503 if (ap->a_accmode & VAPPEND) 504 mode |= NFSACCESS_EXTEND; 505 if (ap->a_accmode & VEXEC) 506 mode |= NFSACCESS_LOOKUP; 507 if (ap->a_accmode & VDELETE) 508 mode |= NFSACCESS_DELETE; 509 if (ap->a_accmode & VDELETE_CHILD) 510 mode |= NFSACCESS_MODIFY; 511 } 512 /* XXX safety belt, only make blanket request if caching */ 513 if (nfsaccess_cache_timeout > 0) { 514 wmode = NFSACCESS_READ | NFSACCESS_MODIFY | 515 NFSACCESS_EXTEND | NFSACCESS_EXECUTE | 516 NFSACCESS_DELETE | NFSACCESS_LOOKUP; 517 } else { 518 wmode = mode; 519 } 520 521 /* 522 * Does our cached result allow us to give a definite yes to 523 * this request? 524 */ 525 gotahit = 0; 526 NFSLOCKNODE(np); 527 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 528 if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) { 529 if (time_second < (np->n_accesscache[i].stamp 530 + nfsaccess_cache_timeout) && 531 (np->n_accesscache[i].mode & mode) == mode) { 532 NFSINCRGLOBAL(nfsstatsv1.accesscache_hits); 533 gotahit = 1; 534 } 535 break; 536 } 537 } 538 NFSUNLOCKNODE(np); 539 #ifdef KDTRACE_HOOKS 540 if (gotahit != 0) 541 KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, 542 ap->a_cred->cr_uid, mode); 543 else 544 KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, 545 ap->a_cred->cr_uid, mode); 546 #endif 547 if (gotahit == 0) { 548 /* 549 * Either a no, or a don't know. Go to the wire. 550 */ 551 NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); 552 error = nfs34_access_otw(vp, wmode, ap->a_td, 553 ap->a_cred, &rmode); 554 if (!error && 555 (rmode & mode) != mode) 556 error = EACCES; 557 } 558 return (error); 559 } else { 560 if ((error = nfsspec_access(ap)) != 0) { 561 return (error); 562 } 563 /* 564 * Attempt to prevent a mapped root from accessing a file 565 * which it shouldn't. We try to read a byte from the file 566 * if the user is root and the file is not zero length. 567 * After calling nfsspec_access, we should have the correct 568 * file size cached. 569 */ 570 NFSLOCKNODE(np); 571 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) 572 && VTONFS(vp)->n_size > 0) { 573 struct iovec aiov; 574 struct uio auio; 575 char buf[1]; 576 577 NFSUNLOCKNODE(np); 578 aiov.iov_base = buf; 579 aiov.iov_len = 1; 580 auio.uio_iov = &aiov; 581 auio.uio_iovcnt = 1; 582 auio.uio_offset = 0; 583 auio.uio_resid = 1; 584 auio.uio_segflg = UIO_SYSSPACE; 585 auio.uio_rw = UIO_READ; 586 auio.uio_td = ap->a_td; 587 588 if (vp->v_type == VREG) 589 error = ncl_readrpc(vp, &auio, ap->a_cred); 590 else if (vp->v_type == VDIR) { 591 char* bp; 592 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); 593 aiov.iov_base = bp; 594 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; 595 error = ncl_readdirrpc(vp, &auio, ap->a_cred, 596 ap->a_td); 597 free(bp, M_TEMP); 598 } else if (vp->v_type == VLNK) 599 error = ncl_readlinkrpc(vp, &auio, ap->a_cred); 600 else 601 error = EACCES; 602 } else 603 NFSUNLOCKNODE(np); 604 return (error); 605 } 606 } 607 608 /* 609 * nfs open vnode op 610 * Check to see if the type is ok 611 * and that deletion is not in progress. 612 * For paged in text files, you will need to flush the page cache 613 * if consistency is lost. 614 */ 615 /* ARGSUSED */ 616 static int 617 nfs_open(struct vop_open_args *ap) 618 { 619 struct vnode *vp = ap->a_vp; 620 struct nfsnode *np = VTONFS(vp); 621 struct vattr vattr; 622 int error; 623 int fmode = ap->a_mode; 624 struct ucred *cred; 625 vm_object_t obj; 626 627 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) 628 return (EOPNOTSUPP); 629 630 /* 631 * For NFSv4, we need to do the Open Op before cache validation, 632 * so that we conform to RFC3530 Sec. 9.3.1. 633 */ 634 if (NFS_ISV4(vp)) { 635 error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td); 636 if (error) { 637 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 638 (gid_t)0); 639 return (error); 640 } 641 } 642 643 /* 644 * Now, if this Open will be doing reading, re-validate/flush the 645 * cache, so that Close/Open coherency is maintained. 646 */ 647 NFSLOCKNODE(np); 648 if (np->n_flag & NMODIFIED) { 649 NFSUNLOCKNODE(np); 650 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 651 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 652 if (VN_IS_DOOMED(vp)) 653 return (EBADF); 654 } 655 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 656 if (error == EINTR || error == EIO) { 657 if (NFS_ISV4(vp)) 658 (void) nfsrpc_close(vp, 0, ap->a_td); 659 return (error); 660 } 661 NFSLOCKNODE(np); 662 np->n_attrstamp = 0; 663 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 664 if (vp->v_type == VDIR) 665 np->n_direofoffset = 0; 666 NFSUNLOCKNODE(np); 667 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 668 if (error) { 669 if (NFS_ISV4(vp)) 670 (void) nfsrpc_close(vp, 0, ap->a_td); 671 return (error); 672 } 673 NFSLOCKNODE(np); 674 np->n_mtime = vattr.va_mtime; 675 if (NFS_ISV4(vp)) 676 np->n_change = vattr.va_filerev; 677 } else { 678 NFSUNLOCKNODE(np); 679 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 680 if (error) { 681 if (NFS_ISV4(vp)) 682 (void) nfsrpc_close(vp, 0, ap->a_td); 683 return (error); 684 } 685 NFSLOCKNODE(np); 686 if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) || 687 NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 688 if (vp->v_type == VDIR) 689 np->n_direofoffset = 0; 690 NFSUNLOCKNODE(np); 691 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 692 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 693 if (VN_IS_DOOMED(vp)) 694 return (EBADF); 695 } 696 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 697 if (error == EINTR || error == EIO) { 698 if (NFS_ISV4(vp)) 699 (void) nfsrpc_close(vp, 0, ap->a_td); 700 return (error); 701 } 702 NFSLOCKNODE(np); 703 np->n_mtime = vattr.va_mtime; 704 if (NFS_ISV4(vp)) 705 np->n_change = vattr.va_filerev; 706 } 707 } 708 709 /* 710 * If the object has >= 1 O_DIRECT active opens, we disable caching. 711 */ 712 if (newnfs_directio_enable && (fmode & O_DIRECT) && 713 (vp->v_type == VREG)) { 714 if (np->n_directio_opens == 0) { 715 NFSUNLOCKNODE(np); 716 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 717 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 718 if (VN_IS_DOOMED(vp)) 719 return (EBADF); 720 } 721 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 722 if (error) { 723 if (NFS_ISV4(vp)) 724 (void) nfsrpc_close(vp, 0, ap->a_td); 725 return (error); 726 } 727 NFSLOCKNODE(np); 728 np->n_flag |= NNONCACHE; 729 } 730 np->n_directio_opens++; 731 } 732 733 /* If opened for writing via NFSv4.1 or later, mark that for pNFS. */ 734 if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0) 735 np->n_flag |= NWRITEOPENED; 736 737 /* 738 * If this is an open for writing, capture a reference to the 739 * credentials, so they can be used by ncl_putpages(). Using 740 * these write credentials is preferable to the credentials of 741 * whatever thread happens to be doing the VOP_PUTPAGES() since 742 * the write RPCs are less likely to fail with EACCES. 743 */ 744 if ((fmode & FWRITE) != 0) { 745 cred = np->n_writecred; 746 np->n_writecred = crhold(ap->a_cred); 747 } else 748 cred = NULL; 749 NFSUNLOCKNODE(np); 750 751 if (cred != NULL) 752 crfree(cred); 753 vnode_create_vobject(vp, vattr.va_size, ap->a_td); 754 755 /* 756 * If the text file has been mmap'd, flush any dirty pages to the 757 * buffer cache and then... 758 * Make sure all writes are pushed to the NFS server. If this is not 759 * done, the modify time of the file can change while the text 760 * file is being executed. This will cause the process that is 761 * executing the text file to be terminated. 762 */ 763 if (vp->v_writecount <= -1) { 764 if ((obj = vp->v_object) != NULL && 765 vm_object_mightbedirty(obj)) { 766 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 767 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 768 if (VN_IS_DOOMED(vp)) 769 return (EBADF); 770 } 771 VM_OBJECT_WLOCK(obj); 772 vm_object_page_clean(obj, 0, 0, OBJPC_SYNC); 773 VM_OBJECT_WUNLOCK(obj); 774 } 775 776 /* Now, flush the buffer cache. */ 777 ncl_flush(vp, MNT_WAIT, curthread, 0, 0); 778 779 /* And, finally, make sure that n_mtime is up to date. */ 780 np = VTONFS(vp); 781 NFSLOCKNODE(np); 782 np->n_mtime = np->n_vattr.na_mtime; 783 NFSUNLOCKNODE(np); 784 } 785 return (0); 786 } 787 788 /* 789 * nfs close vnode op 790 * What an NFS client should do upon close after writing is a debatable issue. 791 * Most NFS clients push delayed writes to the server upon close, basically for 792 * two reasons: 793 * 1 - So that any write errors may be reported back to the client process 794 * doing the close system call. By far the two most likely errors are 795 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. 796 * 2 - To put a worst case upper bound on cache inconsistency between 797 * multiple clients for the file. 798 * There is also a consistency problem for Version 2 of the protocol w.r.t. 799 * not being able to tell if other clients are writing a file concurrently, 800 * since there is no way of knowing if the changed modify time in the reply 801 * is only due to the write for this client. 802 * (NFS Version 3 provides weak cache consistency data in the reply that 803 * should be sufficient to detect and handle this case.) 804 * 805 * The current code does the following: 806 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers 807 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate 808 * or commit them (this satisfies 1 and 2 except for the 809 * case where the server crashes after this close but 810 * before the commit RPC, which is felt to be "good 811 * enough". Changing the last argument to ncl_flush() to 812 * a 1 would force a commit operation, if it is felt a 813 * commit is necessary now. 814 * for NFS Version 4 - flush the dirty buffers and commit them, if 815 * nfscl_mustflush() says this is necessary. 816 * It is necessary if there is no write delegation held, 817 * in order to satisfy open/close coherency. 818 * If the file isn't cached on local stable storage, 819 * it may be necessary in order to detect "out of space" 820 * errors from the server, if the write delegation 821 * issued by the server doesn't allow the file to grow. 822 */ 823 /* ARGSUSED */ 824 static int 825 nfs_close(struct vop_close_args *ap) 826 { 827 struct vnode *vp = ap->a_vp; 828 struct nfsnode *np = VTONFS(vp); 829 struct nfsvattr nfsva; 830 struct ucred *cred; 831 int error = 0, ret, localcred = 0; 832 int fmode = ap->a_fflag; 833 834 if (NFSCL_FORCEDISM(vp->v_mount)) 835 return (0); 836 /* 837 * During shutdown, a_cred isn't valid, so just use root. 838 */ 839 if (ap->a_cred == NOCRED) { 840 cred = newnfs_getcred(); 841 localcred = 1; 842 } else { 843 cred = ap->a_cred; 844 } 845 if (vp->v_type == VREG) { 846 /* 847 * Examine and clean dirty pages, regardless of NMODIFIED. 848 * This closes a major hole in close-to-open consistency. 849 * We want to push out all dirty pages (and buffers) on 850 * close, regardless of whether they were dirtied by 851 * mmap'ed writes or via write(). 852 */ 853 if (nfs_clean_pages_on_close && vp->v_object) { 854 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 855 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 856 if (VN_IS_DOOMED(vp) && ap->a_fflag != FNONBLOCK) 857 return (EBADF); 858 } 859 VM_OBJECT_WLOCK(vp->v_object); 860 vm_object_page_clean(vp->v_object, 0, 0, 0); 861 VM_OBJECT_WUNLOCK(vp->v_object); 862 } 863 NFSLOCKNODE(np); 864 if (np->n_flag & NMODIFIED) { 865 NFSUNLOCKNODE(np); 866 if (NFS_ISV3(vp)) { 867 /* 868 * Under NFSv3 we have dirty buffers to dispose of. We 869 * must flush them to the NFS server. We have the option 870 * of waiting all the way through the commit rpc or just 871 * waiting for the initial write. The default is to only 872 * wait through the initial write so the data is in the 873 * server's cache, which is roughly similar to the state 874 * a standard disk subsystem leaves the file in on close(). 875 * 876 * We cannot clear the NMODIFIED bit in np->n_flag due to 877 * potential races with other processes, and certainly 878 * cannot clear it if we don't commit. 879 * These races occur when there is no longer the old 880 * traditional vnode locking implemented for Vnode Ops. 881 */ 882 int cm = newnfs_commit_on_close ? 1 : 0; 883 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 884 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 885 if (VN_IS_DOOMED(vp) && ap->a_fflag != FNONBLOCK) 886 return (EBADF); 887 } 888 error = ncl_flush(vp, MNT_WAIT, ap->a_td, cm, 0); 889 /* np->n_flag &= ~NMODIFIED; */ 890 } else if (NFS_ISV4(vp)) { 891 if (nfscl_mustflush(vp) != 0) { 892 int cm = newnfs_commit_on_close ? 1 : 0; 893 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 894 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 895 if (VN_IS_DOOMED(vp) && ap->a_fflag != 896 FNONBLOCK) 897 return (EBADF); 898 } 899 error = ncl_flush(vp, MNT_WAIT, ap->a_td, 900 cm, 0); 901 /* 902 * as above w.r.t races when clearing 903 * NMODIFIED. 904 * np->n_flag &= ~NMODIFIED; 905 */ 906 } 907 } else { 908 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 909 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 910 if (VN_IS_DOOMED(vp) && ap->a_fflag != 911 FNONBLOCK) 912 return (EBADF); 913 } 914 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 915 } 916 NFSLOCKNODE(np); 917 } 918 /* 919 * Invalidate the attribute cache in all cases. 920 * An open is going to fetch fresh attrs any way, other procs 921 * on this node that have file open will be forced to do an 922 * otw attr fetch, but this is safe. 923 * --> A user found that their RPC count dropped by 20% when 924 * this was commented out and I can't see any requirement 925 * for it, so I've disabled it when negative lookups are 926 * enabled. (What does this have to do with negative lookup 927 * caching? Well nothing, except it was reported by the 928 * same user that needed negative lookup caching and I wanted 929 * there to be a way to disable it to see if it 930 * is the cause of some caching/coherency issue that might 931 * crop up.) 932 */ 933 if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) { 934 np->n_attrstamp = 0; 935 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 936 } 937 if (np->n_flag & NWRITEERR) { 938 np->n_flag &= ~NWRITEERR; 939 error = np->n_error; 940 } 941 NFSUNLOCKNODE(np); 942 } 943 944 if (NFS_ISV4(vp)) { 945 /* 946 * Get attributes so "change" is up to date. 947 */ 948 if (error == 0 && nfscl_mustflush(vp) != 0 && 949 vp->v_type == VREG && 950 (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) == 0) { 951 ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva, 952 NULL); 953 if (!ret) { 954 np->n_change = nfsva.na_filerev; 955 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 956 NULL, 0, 0); 957 } 958 } 959 960 /* 961 * and do the close. 962 */ 963 ret = nfsrpc_close(vp, 0, ap->a_td); 964 if (!error && ret) 965 error = ret; 966 if (error) 967 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 968 (gid_t)0); 969 } 970 if (newnfs_directio_enable) 971 KASSERT((np->n_directio_asyncwr == 0), 972 ("nfs_close: dirty unflushed (%d) directio buffers\n", 973 np->n_directio_asyncwr)); 974 if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 975 NFSLOCKNODE(np); 976 KASSERT((np->n_directio_opens > 0), 977 ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); 978 np->n_directio_opens--; 979 if (np->n_directio_opens == 0) 980 np->n_flag &= ~NNONCACHE; 981 NFSUNLOCKNODE(np); 982 } 983 if (localcred) 984 NFSFREECRED(cred); 985 return (error); 986 } 987 988 /* 989 * nfs getattr call from vfs. 990 */ 991 static int 992 nfs_getattr(struct vop_getattr_args *ap) 993 { 994 struct vnode *vp = ap->a_vp; 995 struct thread *td = curthread; /* XXX */ 996 struct nfsnode *np = VTONFS(vp); 997 int error = 0; 998 struct nfsvattr nfsva; 999 struct vattr *vap = ap->a_vap; 1000 struct vattr vattr; 1001 1002 /* 1003 * Update local times for special files. 1004 */ 1005 NFSLOCKNODE(np); 1006 if (np->n_flag & (NACC | NUPD)) 1007 np->n_flag |= NCHG; 1008 NFSUNLOCKNODE(np); 1009 /* 1010 * First look in the cache. 1011 */ 1012 if (ncl_getattrcache(vp, &vattr) == 0) { 1013 ncl_copy_vattr(vap, &vattr); 1014 1015 /* 1016 * Get the local modify time for the case of a write 1017 * delegation. 1018 */ 1019 nfscl_deleggetmodtime(vp, &vap->va_mtime); 1020 return (0); 1021 } 1022 1023 if (NFS_ISV34(vp) && nfs_prime_access_cache && 1024 nfsaccess_cache_timeout > 0) { 1025 NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); 1026 nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL); 1027 if (ncl_getattrcache(vp, ap->a_vap) == 0) { 1028 nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime); 1029 return (0); 1030 } 1031 } 1032 error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva, NULL); 1033 if (!error) 1034 error = nfscl_loadattrcache(&vp, &nfsva, vap, NULL, 0, 0); 1035 if (!error) { 1036 /* 1037 * Get the local modify time for the case of a write 1038 * delegation. 1039 */ 1040 nfscl_deleggetmodtime(vp, &vap->va_mtime); 1041 } else if (NFS_ISV4(vp)) { 1042 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 1043 } 1044 return (error); 1045 } 1046 1047 /* 1048 * nfs setattr call. 1049 */ 1050 static int 1051 nfs_setattr(struct vop_setattr_args *ap) 1052 { 1053 struct vnode *vp = ap->a_vp; 1054 struct nfsnode *np = VTONFS(vp); 1055 struct thread *td = curthread; /* XXX */ 1056 struct vattr *vap = ap->a_vap; 1057 int error = 0; 1058 u_quad_t tsize; 1059 struct timespec ts; 1060 1061 #ifndef nolint 1062 tsize = (u_quad_t)0; 1063 #endif 1064 1065 /* 1066 * Setting of flags and marking of atimes are not supported. 1067 */ 1068 if (vap->va_flags != VNOVAL) 1069 return (EOPNOTSUPP); 1070 1071 /* 1072 * Disallow write attempts if the filesystem is mounted read-only. 1073 */ 1074 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 1075 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 1076 vap->va_mtime.tv_sec != VNOVAL || 1077 vap->va_birthtime.tv_sec != VNOVAL || 1078 vap->va_mode != (mode_t)VNOVAL) && 1079 (vp->v_mount->mnt_flag & MNT_RDONLY)) 1080 return (EROFS); 1081 if (vap->va_size != VNOVAL) { 1082 switch (vp->v_type) { 1083 case VDIR: 1084 return (EISDIR); 1085 case VCHR: 1086 case VBLK: 1087 case VSOCK: 1088 case VFIFO: 1089 if (vap->va_mtime.tv_sec == VNOVAL && 1090 vap->va_atime.tv_sec == VNOVAL && 1091 vap->va_birthtime.tv_sec == VNOVAL && 1092 vap->va_mode == (mode_t)VNOVAL && 1093 vap->va_uid == (uid_t)VNOVAL && 1094 vap->va_gid == (gid_t)VNOVAL) 1095 return (0); 1096 vap->va_size = VNOVAL; 1097 break; 1098 default: 1099 /* 1100 * Disallow write attempts if the filesystem is 1101 * mounted read-only. 1102 */ 1103 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1104 return (EROFS); 1105 /* 1106 * We run vnode_pager_setsize() early (why?), 1107 * we must set np->n_size now to avoid vinvalbuf 1108 * V_SAVE races that might setsize a lower 1109 * value. 1110 */ 1111 NFSLOCKNODE(np); 1112 tsize = np->n_size; 1113 NFSUNLOCKNODE(np); 1114 error = ncl_meta_setsize(vp, td, vap->va_size); 1115 NFSLOCKNODE(np); 1116 if (np->n_flag & NMODIFIED) { 1117 tsize = np->n_size; 1118 NFSUNLOCKNODE(np); 1119 error = ncl_vinvalbuf(vp, vap->va_size == 0 ? 1120 0 : V_SAVE, td, 1); 1121 if (error != 0) { 1122 vnode_pager_setsize(vp, tsize); 1123 return (error); 1124 } 1125 /* 1126 * Call nfscl_delegmodtime() to set the modify time 1127 * locally, as required. 1128 */ 1129 nfscl_delegmodtime(vp); 1130 } else 1131 NFSUNLOCKNODE(np); 1132 /* 1133 * np->n_size has already been set to vap->va_size 1134 * in ncl_meta_setsize(). We must set it again since 1135 * nfs_loadattrcache() could be called through 1136 * ncl_meta_setsize() and could modify np->n_size. 1137 */ 1138 NFSLOCKNODE(np); 1139 np->n_vattr.na_size = np->n_size = vap->va_size; 1140 NFSUNLOCKNODE(np); 1141 } 1142 } else { 1143 NFSLOCKNODE(np); 1144 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 1145 (np->n_flag & NMODIFIED) && vp->v_type == VREG) { 1146 NFSUNLOCKNODE(np); 1147 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 1148 if (error == EINTR || error == EIO) 1149 return (error); 1150 } else 1151 NFSUNLOCKNODE(np); 1152 } 1153 error = nfs_setattrrpc(vp, vap, ap->a_cred, td); 1154 if (vap->va_size != VNOVAL) { 1155 if (error == 0) { 1156 nanouptime(&ts); 1157 NFSLOCKNODE(np); 1158 np->n_localmodtime = ts; 1159 NFSUNLOCKNODE(np); 1160 } else { 1161 NFSLOCKNODE(np); 1162 np->n_size = np->n_vattr.na_size = tsize; 1163 vnode_pager_setsize(vp, tsize); 1164 NFSUNLOCKNODE(np); 1165 } 1166 } 1167 return (error); 1168 } 1169 1170 /* 1171 * Do an nfs setattr rpc. 1172 */ 1173 static int 1174 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred, 1175 struct thread *td) 1176 { 1177 struct nfsnode *np = VTONFS(vp); 1178 int error, ret, attrflag, i; 1179 struct nfsvattr nfsva; 1180 1181 if (NFS_ISV34(vp)) { 1182 NFSLOCKNODE(np); 1183 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) 1184 np->n_accesscache[i].stamp = 0; 1185 np->n_flag |= NDELEGMOD; 1186 NFSUNLOCKNODE(np); 1187 KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); 1188 } 1189 error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag, 1190 NULL); 1191 if (attrflag) { 1192 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1193 if (ret && !error) 1194 error = ret; 1195 } 1196 if (error && NFS_ISV4(vp)) 1197 error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid); 1198 return (error); 1199 } 1200 1201 /* 1202 * nfs lookup call, one step at a time... 1203 * First look in cache 1204 * If not found, unlock the directory nfsnode and do the rpc 1205 */ 1206 static int 1207 nfs_lookup(struct vop_lookup_args *ap) 1208 { 1209 struct componentname *cnp = ap->a_cnp; 1210 struct vnode *dvp = ap->a_dvp; 1211 struct vnode **vpp = ap->a_vpp; 1212 struct mount *mp = dvp->v_mount; 1213 int flags = cnp->cn_flags; 1214 struct vnode *newvp; 1215 struct nfsmount *nmp; 1216 struct nfsnode *np, *newnp; 1217 int error = 0, attrflag, dattrflag, ltype, ncticks; 1218 struct thread *td = curthread; 1219 struct nfsfh *nfhp; 1220 struct nfsvattr dnfsva, nfsva; 1221 struct vattr vattr; 1222 struct timespec nctime, ts; 1223 uint32_t openmode; 1224 1225 *vpp = NULLVP; 1226 if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && 1227 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 1228 return (EROFS); 1229 if (dvp->v_type != VDIR) 1230 return (ENOTDIR); 1231 nmp = VFSTONFS(mp); 1232 np = VTONFS(dvp); 1233 1234 /* For NFSv4, wait until any remove is done. */ 1235 NFSLOCKNODE(np); 1236 while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) { 1237 np->n_flag |= NREMOVEWANT; 1238 (void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0); 1239 } 1240 NFSUNLOCKNODE(np); 1241 1242 error = vn_dir_check_exec(dvp, cnp); 1243 if (error != 0) 1244 return (error); 1245 error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks); 1246 if (error > 0 && error != ENOENT) 1247 return (error); 1248 if (error == -1) { 1249 /* 1250 * Lookups of "." are special and always return the 1251 * current directory. cache_lookup() already handles 1252 * associated locking bookkeeping, etc. 1253 */ 1254 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { 1255 /* XXX: Is this really correct? */ 1256 if (cnp->cn_nameiop != LOOKUP && 1257 (flags & ISLASTCN)) 1258 cnp->cn_flags |= SAVENAME; 1259 return (0); 1260 } 1261 1262 /* 1263 * We only accept a positive hit in the cache if the 1264 * change time of the file matches our cached copy. 1265 * Otherwise, we discard the cache entry and fallback 1266 * to doing a lookup RPC. We also only trust cache 1267 * entries for less than nm_nametimeo seconds. 1268 * 1269 * To better handle stale file handles and attributes, 1270 * clear the attribute cache of this node if it is a 1271 * leaf component, part of an open() call, and not 1272 * locally modified before fetching the attributes. 1273 * This should allow stale file handles to be detected 1274 * here where we can fall back to a LOOKUP RPC to 1275 * recover rather than having nfs_open() detect the 1276 * stale file handle and failing open(2) with ESTALE. 1277 */ 1278 newvp = *vpp; 1279 newnp = VTONFS(newvp); 1280 if (!(nmp->nm_flag & NFSMNT_NOCTO) && 1281 (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && 1282 !(newnp->n_flag & NMODIFIED)) { 1283 NFSLOCKNODE(newnp); 1284 newnp->n_attrstamp = 0; 1285 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); 1286 NFSUNLOCKNODE(newnp); 1287 } 1288 if (nfscl_nodeleg(newvp, 0) == 0 || 1289 ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) && 1290 VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 && 1291 timespeccmp(&vattr.va_ctime, &nctime, ==))) { 1292 NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); 1293 if (cnp->cn_nameiop != LOOKUP && 1294 (flags & ISLASTCN)) 1295 cnp->cn_flags |= SAVENAME; 1296 return (0); 1297 } 1298 cache_purge(newvp); 1299 if (dvp != newvp) 1300 vput(newvp); 1301 else 1302 vrele(newvp); 1303 *vpp = NULLVP; 1304 } else if (error == ENOENT) { 1305 if (VN_IS_DOOMED(dvp)) 1306 return (ENOENT); 1307 /* 1308 * We only accept a negative hit in the cache if the 1309 * modification time of the parent directory matches 1310 * the cached copy in the name cache entry. 1311 * Otherwise, we discard all of the negative cache 1312 * entries for this directory. We also only trust 1313 * negative cache entries for up to nm_negnametimeo 1314 * seconds. 1315 */ 1316 if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) && 1317 VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && 1318 timespeccmp(&vattr.va_mtime, &nctime, ==)) { 1319 NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); 1320 return (ENOENT); 1321 } 1322 cache_purge_negative(dvp); 1323 } 1324 1325 openmode = 0; 1326 #if 0 1327 /* 1328 * The use of LookupOpen breaks some builds. It is disabled 1329 * until that is fixed. 1330 */ 1331 /* 1332 * If this an NFSv4.1/4.2 mount using the "oneopenown" mount 1333 * option, it is possible to do the Open operation in the same 1334 * compound as Lookup, so long as delegations are not being 1335 * issued. This saves doing a separate RPC for Open. 1336 * For pnfs, do not do this, since the Open+LayoutGet will 1337 * be needed as a separate RPC. 1338 */ 1339 NFSLOCKMNT(nmp); 1340 if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp) && !NFSHASPNFS(nmp) && 1341 (nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0 && 1342 (!NFSMNT_RDONLY(mp) || (flags & OPENWRITE) == 0) && 1343 (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN)) { 1344 if ((flags & OPENREAD) != 0) 1345 openmode |= NFSV4OPEN_ACCESSREAD; 1346 if ((flags & OPENWRITE) != 0) 1347 openmode |= NFSV4OPEN_ACCESSWRITE; 1348 } 1349 NFSUNLOCKMNT(nmp); 1350 #endif 1351 1352 newvp = NULLVP; 1353 NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses); 1354 nanouptime(&ts); 1355 error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1356 cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1357 NULL, openmode); 1358 if (dattrflag) 1359 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1360 if (error) { 1361 if (newvp != NULLVP) { 1362 vput(newvp); 1363 *vpp = NULLVP; 1364 } 1365 1366 if (error != ENOENT) { 1367 if (NFS_ISV4(dvp)) 1368 error = nfscl_maperr(td, error, (uid_t)0, 1369 (gid_t)0); 1370 return (error); 1371 } 1372 1373 /* The requested file was not found. */ 1374 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && 1375 (flags & ISLASTCN)) { 1376 /* 1377 * XXX: UFS does a full VOP_ACCESS(dvp, 1378 * VWRITE) here instead of just checking 1379 * MNT_RDONLY. 1380 */ 1381 if (mp->mnt_flag & MNT_RDONLY) 1382 return (EROFS); 1383 cnp->cn_flags |= SAVENAME; 1384 return (EJUSTRETURN); 1385 } 1386 1387 if ((cnp->cn_flags & MAKEENTRY) != 0 && dattrflag) { 1388 /* 1389 * Cache the modification time of the parent 1390 * directory from the post-op attributes in 1391 * the name cache entry. The negative cache 1392 * entry will be ignored once the directory 1393 * has changed. Don't bother adding the entry 1394 * if the directory has already changed. 1395 */ 1396 NFSLOCKNODE(np); 1397 if (timespeccmp(&np->n_vattr.na_mtime, 1398 &dnfsva.na_mtime, ==)) { 1399 NFSUNLOCKNODE(np); 1400 cache_enter_time(dvp, NULL, cnp, 1401 &dnfsva.na_mtime, NULL); 1402 } else 1403 NFSUNLOCKNODE(np); 1404 } 1405 return (ENOENT); 1406 } 1407 1408 /* 1409 * Handle RENAME case... 1410 */ 1411 if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { 1412 if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1413 free(nfhp, M_NFSFH); 1414 return (EISDIR); 1415 } 1416 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, 1417 LK_EXCLUSIVE); 1418 if (error) 1419 return (error); 1420 newvp = NFSTOV(np); 1421 /* 1422 * If n_localmodtime >= time before RPC, then 1423 * a file modification operation, such as 1424 * VOP_SETATTR() of size, has occurred while 1425 * the Lookup RPC and acquisition of the vnode 1426 * happened. As such, the attributes might 1427 * be stale, with possibly an incorrect size. 1428 */ 1429 NFSLOCKNODE(np); 1430 if (timespecisset(&np->n_localmodtime) && 1431 timespeccmp(&np->n_localmodtime, &ts, >=)) { 1432 NFSCL_DEBUG(4, "nfs_lookup: rename localmod " 1433 "stale attributes\n"); 1434 attrflag = 0; 1435 } 1436 NFSUNLOCKNODE(np); 1437 if (attrflag) 1438 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1439 0, 1); 1440 *vpp = newvp; 1441 cnp->cn_flags |= SAVENAME; 1442 return (0); 1443 } 1444 1445 if (flags & ISDOTDOT) { 1446 ltype = NFSVOPISLOCKED(dvp); 1447 error = vfs_busy(mp, MBF_NOWAIT); 1448 if (error != 0) { 1449 vfs_ref(mp); 1450 NFSVOPUNLOCK(dvp); 1451 error = vfs_busy(mp, 0); 1452 NFSVOPLOCK(dvp, ltype | LK_RETRY); 1453 vfs_rel(mp); 1454 if (error == 0 && VN_IS_DOOMED(dvp)) { 1455 vfs_unbusy(mp); 1456 error = ENOENT; 1457 } 1458 if (error != 0) 1459 return (error); 1460 } 1461 NFSVOPUNLOCK(dvp); 1462 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, 1463 cnp->cn_lkflags); 1464 if (error == 0) 1465 newvp = NFSTOV(np); 1466 vfs_unbusy(mp); 1467 if (newvp != dvp) 1468 NFSVOPLOCK(dvp, ltype | LK_RETRY); 1469 if (VN_IS_DOOMED(dvp)) { 1470 if (error == 0) { 1471 if (newvp == dvp) 1472 vrele(newvp); 1473 else 1474 vput(newvp); 1475 } 1476 error = ENOENT; 1477 } 1478 if (error != 0) 1479 return (error); 1480 if (attrflag) 1481 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1482 0, 1); 1483 } else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1484 free(nfhp, M_NFSFH); 1485 VREF(dvp); 1486 newvp = dvp; 1487 if (attrflag) 1488 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1489 0, 1); 1490 } else { 1491 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, 1492 cnp->cn_lkflags); 1493 if (error) 1494 return (error); 1495 newvp = NFSTOV(np); 1496 /* 1497 * If n_localmodtime >= time before RPC, then 1498 * a file modification operation, such as 1499 * VOP_SETATTR() of size, has occurred while 1500 * the Lookup RPC and acquisition of the vnode 1501 * happened. As such, the attributes might 1502 * be stale, with possibly an incorrect size. 1503 */ 1504 NFSLOCKNODE(np); 1505 if (timespecisset(&np->n_localmodtime) && 1506 timespeccmp(&np->n_localmodtime, &ts, >=)) { 1507 NFSCL_DEBUG(4, "nfs_lookup: localmod " 1508 "stale attributes\n"); 1509 attrflag = 0; 1510 } 1511 NFSUNLOCKNODE(np); 1512 if (attrflag) 1513 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1514 0, 1); 1515 else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && 1516 !(np->n_flag & NMODIFIED)) { 1517 /* 1518 * Flush the attribute cache when opening a 1519 * leaf node to ensure that fresh attributes 1520 * are fetched in nfs_open() since we did not 1521 * fetch attributes from the LOOKUP reply. 1522 */ 1523 NFSLOCKNODE(np); 1524 np->n_attrstamp = 0; 1525 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); 1526 NFSUNLOCKNODE(np); 1527 } 1528 } 1529 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) 1530 cnp->cn_flags |= SAVENAME; 1531 if ((cnp->cn_flags & MAKEENTRY) && dvp != newvp && 1532 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) && 1533 attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0)) 1534 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, 1535 newvp->v_type != VDIR ? NULL : &dnfsva.na_ctime); 1536 *vpp = newvp; 1537 return (0); 1538 } 1539 1540 /* 1541 * nfs read call. 1542 * Just call ncl_bioread() to do the work. 1543 */ 1544 static int 1545 nfs_read(struct vop_read_args *ap) 1546 { 1547 struct vnode *vp = ap->a_vp; 1548 1549 switch (vp->v_type) { 1550 case VREG: 1551 return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); 1552 case VDIR: 1553 return (EISDIR); 1554 default: 1555 return (EOPNOTSUPP); 1556 } 1557 } 1558 1559 /* 1560 * nfs readlink call 1561 */ 1562 static int 1563 nfs_readlink(struct vop_readlink_args *ap) 1564 { 1565 struct vnode *vp = ap->a_vp; 1566 1567 if (vp->v_type != VLNK) 1568 return (EINVAL); 1569 return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred)); 1570 } 1571 1572 /* 1573 * Do a readlink rpc. 1574 * Called by ncl_doio() from below the buffer cache. 1575 */ 1576 int 1577 ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1578 { 1579 int error, ret, attrflag; 1580 struct nfsvattr nfsva; 1581 1582 error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva, 1583 &attrflag, NULL); 1584 if (attrflag) { 1585 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1586 if (ret && !error) 1587 error = ret; 1588 } 1589 if (error && NFS_ISV4(vp)) 1590 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1591 return (error); 1592 } 1593 1594 /* 1595 * nfs read rpc call 1596 * Ditto above 1597 */ 1598 int 1599 ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1600 { 1601 int error, ret, attrflag; 1602 struct nfsvattr nfsva; 1603 struct nfsmount *nmp; 1604 1605 nmp = VFSTONFS(vp->v_mount); 1606 error = EIO; 1607 attrflag = 0; 1608 if (NFSHASPNFS(nmp)) 1609 error = nfscl_doiods(vp, uiop, NULL, NULL, 1610 NFSV4OPEN_ACCESSREAD, 0, cred, uiop->uio_td); 1611 NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error); 1612 if (error != 0) 1613 error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, 1614 &attrflag, NULL); 1615 if (attrflag) { 1616 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1617 if (ret && !error) 1618 error = ret; 1619 } 1620 if (error && NFS_ISV4(vp)) 1621 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1622 return (error); 1623 } 1624 1625 /* 1626 * nfs write call 1627 */ 1628 int 1629 ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 1630 int *iomode, int *must_commit, int called_from_strategy) 1631 { 1632 struct nfsvattr nfsva; 1633 int error, attrflag, ret; 1634 struct nfsmount *nmp; 1635 1636 nmp = VFSTONFS(vp->v_mount); 1637 error = EIO; 1638 attrflag = 0; 1639 if (NFSHASPNFS(nmp)) 1640 error = nfscl_doiods(vp, uiop, iomode, must_commit, 1641 NFSV4OPEN_ACCESSWRITE, 0, cred, uiop->uio_td); 1642 NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error); 1643 if (error != 0) 1644 error = nfsrpc_write(vp, uiop, iomode, must_commit, cred, 1645 uiop->uio_td, &nfsva, &attrflag, NULL, 1646 called_from_strategy); 1647 if (attrflag) { 1648 if (VTONFS(vp)->n_flag & ND_NFSV4) 1649 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1, 1650 1); 1651 else 1652 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1653 1); 1654 if (ret && !error) 1655 error = ret; 1656 } 1657 if (DOINGASYNC(vp)) 1658 *iomode = NFSWRITE_FILESYNC; 1659 if (error && NFS_ISV4(vp)) 1660 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1661 return (error); 1662 } 1663 1664 /* 1665 * nfs mknod rpc 1666 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the 1667 * mode set to specify the file type and the size field for rdev. 1668 */ 1669 static int 1670 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, 1671 struct vattr *vap) 1672 { 1673 struct nfsvattr nfsva, dnfsva; 1674 struct vnode *newvp = NULL; 1675 struct nfsnode *np = NULL, *dnp; 1676 struct nfsfh *nfhp; 1677 struct vattr vattr; 1678 int error = 0, attrflag, dattrflag; 1679 u_int32_t rdev; 1680 1681 if (vap->va_type == VCHR || vap->va_type == VBLK) 1682 rdev = vap->va_rdev; 1683 else if (vap->va_type == VFIFO || vap->va_type == VSOCK) 1684 rdev = 0xffffffff; 1685 else 1686 return (EOPNOTSUPP); 1687 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1688 return (error); 1689 error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, 1690 rdev, vap->va_type, cnp->cn_cred, curthread, &dnfsva, 1691 &nfsva, &nfhp, &attrflag, &dattrflag, NULL); 1692 if (!error) { 1693 if (!nfhp) 1694 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1695 cnp->cn_namelen, cnp->cn_cred, curthread, 1696 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1697 NULL, 0); 1698 if (nfhp) 1699 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1700 curthread, &np, NULL, LK_EXCLUSIVE); 1701 } 1702 if (dattrflag) 1703 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1704 if (!error) { 1705 newvp = NFSTOV(np); 1706 if (attrflag != 0) { 1707 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1708 0, 1); 1709 if (error != 0) 1710 vput(newvp); 1711 } 1712 } 1713 if (!error) { 1714 *vpp = newvp; 1715 } else if (NFS_ISV4(dvp)) { 1716 error = nfscl_maperr(curthread, error, vap->va_uid, 1717 vap->va_gid); 1718 } 1719 dnp = VTONFS(dvp); 1720 NFSLOCKNODE(dnp); 1721 dnp->n_flag |= NMODIFIED; 1722 if (!dattrflag) { 1723 dnp->n_attrstamp = 0; 1724 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1725 } 1726 NFSUNLOCKNODE(dnp); 1727 return (error); 1728 } 1729 1730 /* 1731 * nfs mknod vop 1732 * just call nfs_mknodrpc() to do the work. 1733 */ 1734 /* ARGSUSED */ 1735 static int 1736 nfs_mknod(struct vop_mknod_args *ap) 1737 { 1738 return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); 1739 } 1740 1741 static struct mtx nfs_cverf_mtx; 1742 MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex", 1743 MTX_DEF); 1744 1745 static nfsquad_t 1746 nfs_get_cverf(void) 1747 { 1748 static nfsquad_t cverf; 1749 nfsquad_t ret; 1750 static int cverf_initialized = 0; 1751 1752 mtx_lock(&nfs_cverf_mtx); 1753 if (cverf_initialized == 0) { 1754 cverf.lval[0] = arc4random(); 1755 cverf.lval[1] = arc4random(); 1756 cverf_initialized = 1; 1757 } else 1758 cverf.qval++; 1759 ret = cverf; 1760 mtx_unlock(&nfs_cverf_mtx); 1761 1762 return (ret); 1763 } 1764 1765 /* 1766 * nfs file create call 1767 */ 1768 static int 1769 nfs_create(struct vop_create_args *ap) 1770 { 1771 struct vnode *dvp = ap->a_dvp; 1772 struct vattr *vap = ap->a_vap; 1773 struct componentname *cnp = ap->a_cnp; 1774 struct nfsnode *np = NULL, *dnp; 1775 struct vnode *newvp = NULL; 1776 struct nfsmount *nmp; 1777 struct nfsvattr dnfsva, nfsva; 1778 struct nfsfh *nfhp; 1779 nfsquad_t cverf; 1780 int error = 0, attrflag, dattrflag, fmode = 0; 1781 struct vattr vattr; 1782 1783 /* 1784 * Oops, not for me.. 1785 */ 1786 if (vap->va_type == VSOCK) 1787 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); 1788 1789 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1790 return (error); 1791 if (vap->va_vaflags & VA_EXCLUSIVE) 1792 fmode |= O_EXCL; 1793 dnp = VTONFS(dvp); 1794 nmp = VFSTONFS(dvp->v_mount); 1795 again: 1796 /* For NFSv4, wait until any remove is done. */ 1797 NFSLOCKNODE(dnp); 1798 while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) { 1799 dnp->n_flag |= NREMOVEWANT; 1800 (void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0); 1801 } 1802 NFSUNLOCKNODE(dnp); 1803 1804 cverf = nfs_get_cverf(); 1805 error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1806 vap, cverf, fmode, cnp->cn_cred, curthread, &dnfsva, &nfsva, 1807 &nfhp, &attrflag, &dattrflag, NULL); 1808 if (!error) { 1809 if (nfhp == NULL) 1810 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1811 cnp->cn_namelen, cnp->cn_cred, curthread, 1812 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1813 NULL, 0); 1814 if (nfhp != NULL) 1815 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1816 curthread, &np, NULL, LK_EXCLUSIVE); 1817 } 1818 if (dattrflag) 1819 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1820 if (!error) { 1821 newvp = NFSTOV(np); 1822 if (attrflag == 0) 1823 error = nfsrpc_getattr(newvp, cnp->cn_cred, 1824 curthread, &nfsva, NULL); 1825 if (error == 0) 1826 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1827 0, 1); 1828 } 1829 if (error) { 1830 if (newvp != NULL) { 1831 vput(newvp); 1832 newvp = NULL; 1833 } 1834 if (NFS_ISV34(dvp) && (fmode & O_EXCL) && 1835 error == NFSERR_NOTSUPP) { 1836 fmode &= ~O_EXCL; 1837 goto again; 1838 } 1839 } else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) { 1840 if (nfscl_checksattr(vap, &nfsva)) { 1841 error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred, 1842 curthread, &nfsva, &attrflag, NULL); 1843 if (error && (vap->va_uid != (uid_t)VNOVAL || 1844 vap->va_gid != (gid_t)VNOVAL)) { 1845 /* try again without setting uid/gid */ 1846 vap->va_uid = (uid_t)VNOVAL; 1847 vap->va_gid = (uid_t)VNOVAL; 1848 error = nfsrpc_setattr(newvp, vap, NULL, 1849 cnp->cn_cred, curthread, &nfsva, 1850 &attrflag, NULL); 1851 } 1852 if (attrflag) 1853 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 1854 NULL, 0, 1); 1855 if (error != 0) 1856 vput(newvp); 1857 } 1858 } 1859 if (!error) { 1860 if ((cnp->cn_flags & MAKEENTRY) && attrflag) { 1861 if (dvp != newvp) 1862 cache_enter_time(dvp, newvp, cnp, 1863 &nfsva.na_ctime, NULL); 1864 else 1865 printf("nfs_create: bogus NFS server returned " 1866 "the directory as the new file object\n"); 1867 } 1868 *ap->a_vpp = newvp; 1869 } else if (NFS_ISV4(dvp)) { 1870 error = nfscl_maperr(curthread, error, vap->va_uid, 1871 vap->va_gid); 1872 } 1873 NFSLOCKNODE(dnp); 1874 dnp->n_flag |= NMODIFIED; 1875 if (!dattrflag) { 1876 dnp->n_attrstamp = 0; 1877 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1878 } 1879 NFSUNLOCKNODE(dnp); 1880 return (error); 1881 } 1882 1883 /* 1884 * nfs file remove call 1885 * To try and make nfs semantics closer to ufs semantics, a file that has 1886 * other processes using the vnode is renamed instead of removed and then 1887 * removed later on the last close. 1888 * - If v_usecount > 1 1889 * If a rename is not already in the works 1890 * call nfs_sillyrename() to set it up 1891 * else 1892 * do the remove rpc 1893 */ 1894 static int 1895 nfs_remove(struct vop_remove_args *ap) 1896 { 1897 struct vnode *vp = ap->a_vp; 1898 struct vnode *dvp = ap->a_dvp; 1899 struct componentname *cnp = ap->a_cnp; 1900 struct nfsnode *np = VTONFS(vp); 1901 int error = 0; 1902 struct vattr vattr; 1903 1904 KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name")); 1905 KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount")); 1906 if (vp->v_type == VDIR) 1907 error = EPERM; 1908 else if (vrefcnt(vp) == 1 || (np->n_sillyrename && 1909 VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 && 1910 vattr.va_nlink > 1)) { 1911 /* 1912 * Purge the name cache so that the chance of a lookup for 1913 * the name succeeding while the remove is in progress is 1914 * minimized. Without node locking it can still happen, such 1915 * that an I/O op returns ESTALE, but since you get this if 1916 * another host removes the file.. 1917 */ 1918 cache_purge(vp); 1919 /* 1920 * throw away biocache buffers, mainly to avoid 1921 * unnecessary delayed writes later. 1922 */ 1923 error = ncl_vinvalbuf(vp, 0, curthread, 1); 1924 if (error != EINTR && error != EIO) 1925 /* Do the rpc */ 1926 error = nfs_removerpc(dvp, vp, cnp->cn_nameptr, 1927 cnp->cn_namelen, cnp->cn_cred, curthread); 1928 /* 1929 * Kludge City: If the first reply to the remove rpc is lost.. 1930 * the reply to the retransmitted request will be ENOENT 1931 * since the file was in fact removed 1932 * Therefore, we cheat and return success. 1933 */ 1934 if (error == ENOENT) 1935 error = 0; 1936 } else if (!np->n_sillyrename) 1937 error = nfs_sillyrename(dvp, vp, cnp); 1938 NFSLOCKNODE(np); 1939 np->n_attrstamp = 0; 1940 NFSUNLOCKNODE(np); 1941 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 1942 return (error); 1943 } 1944 1945 /* 1946 * nfs file remove rpc called from nfs_inactive 1947 */ 1948 int 1949 ncl_removeit(struct sillyrename *sp, struct vnode *vp) 1950 { 1951 /* 1952 * Make sure that the directory vnode is still valid. 1953 * XXX we should lock sp->s_dvp here. 1954 */ 1955 if (sp->s_dvp->v_type == VBAD) 1956 return (0); 1957 return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen, 1958 sp->s_cred, NULL)); 1959 } 1960 1961 /* 1962 * Nfs remove rpc, called from nfs_remove() and ncl_removeit(). 1963 */ 1964 static int 1965 nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 1966 int namelen, struct ucred *cred, struct thread *td) 1967 { 1968 struct nfsvattr dnfsva; 1969 struct nfsnode *dnp = VTONFS(dvp); 1970 int error = 0, dattrflag; 1971 1972 NFSLOCKNODE(dnp); 1973 dnp->n_flag |= NREMOVEINPROG; 1974 NFSUNLOCKNODE(dnp); 1975 error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva, 1976 &dattrflag, NULL); 1977 NFSLOCKNODE(dnp); 1978 if ((dnp->n_flag & NREMOVEWANT)) { 1979 dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG); 1980 NFSUNLOCKNODE(dnp); 1981 wakeup((caddr_t)dnp); 1982 } else { 1983 dnp->n_flag &= ~NREMOVEINPROG; 1984 NFSUNLOCKNODE(dnp); 1985 } 1986 if (dattrflag) 1987 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1988 NFSLOCKNODE(dnp); 1989 dnp->n_flag |= NMODIFIED; 1990 if (!dattrflag) { 1991 dnp->n_attrstamp = 0; 1992 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1993 } 1994 NFSUNLOCKNODE(dnp); 1995 if (error && NFS_ISV4(dvp)) 1996 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 1997 return (error); 1998 } 1999 2000 /* 2001 * nfs file rename call 2002 */ 2003 static int 2004 nfs_rename(struct vop_rename_args *ap) 2005 { 2006 struct vnode *fvp = ap->a_fvp; 2007 struct vnode *tvp = ap->a_tvp; 2008 struct vnode *fdvp = ap->a_fdvp; 2009 struct vnode *tdvp = ap->a_tdvp; 2010 struct componentname *tcnp = ap->a_tcnp; 2011 struct componentname *fcnp = ap->a_fcnp; 2012 struct nfsnode *fnp = VTONFS(ap->a_fvp); 2013 struct nfsnode *tdnp = VTONFS(ap->a_tdvp); 2014 struct nfsv4node *newv4 = NULL; 2015 int error; 2016 2017 KASSERT((tcnp->cn_flags & HASBUF) != 0 && 2018 (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name")); 2019 /* Check for cross-device rename */ 2020 if ((fvp->v_mount != tdvp->v_mount) || 2021 (tvp && (fvp->v_mount != tvp->v_mount))) { 2022 error = EXDEV; 2023 goto out; 2024 } 2025 2026 if (fvp == tvp) { 2027 printf("nfs_rename: fvp == tvp (can't happen)\n"); 2028 error = 0; 2029 goto out; 2030 } 2031 if ((error = NFSVOPLOCK(fvp, LK_EXCLUSIVE)) != 0) 2032 goto out; 2033 2034 /* 2035 * We have to flush B_DELWRI data prior to renaming 2036 * the file. If we don't, the delayed-write buffers 2037 * can be flushed out later after the file has gone stale 2038 * under NFSV3. NFSV2 does not have this problem because 2039 * ( as far as I can tell ) it flushes dirty buffers more 2040 * often. 2041 * 2042 * Skip the rename operation if the fsync fails, this can happen 2043 * due to the server's volume being full, when we pushed out data 2044 * that was written back to our cache earlier. Not checking for 2045 * this condition can result in potential (silent) data loss. 2046 */ 2047 error = VOP_FSYNC(fvp, MNT_WAIT, curthread); 2048 NFSVOPUNLOCK(fvp); 2049 if (!error && tvp) 2050 error = VOP_FSYNC(tvp, MNT_WAIT, curthread); 2051 if (error) 2052 goto out; 2053 2054 /* 2055 * If the tvp exists and is in use, sillyrename it before doing the 2056 * rename of the new file over it. 2057 * XXX Can't sillyrename a directory. 2058 */ 2059 if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && 2060 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { 2061 vput(tvp); 2062 tvp = NULL; 2063 } 2064 2065 error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen, 2066 tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, 2067 curthread); 2068 2069 if (error == 0 && NFS_ISV4(tdvp)) { 2070 /* 2071 * For NFSv4, check to see if it is the same name and 2072 * replace the name, if it is different. 2073 */ 2074 newv4 = malloc( 2075 sizeof (struct nfsv4node) + 2076 tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1, 2077 M_NFSV4NODE, M_WAITOK); 2078 NFSLOCKNODE(tdnp); 2079 NFSLOCKNODE(fnp); 2080 if (fnp->n_v4 != NULL && fvp->v_type == VREG && 2081 (fnp->n_v4->n4_namelen != tcnp->cn_namelen || 2082 NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4), 2083 tcnp->cn_namelen) || 2084 tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen || 2085 NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 2086 tdnp->n_fhp->nfh_len))) { 2087 #ifdef notdef 2088 { char nnn[100]; int nnnl; 2089 nnnl = (tcnp->cn_namelen < 100) ? tcnp->cn_namelen : 99; 2090 bcopy(tcnp->cn_nameptr, nnn, nnnl); 2091 nnn[nnnl] = '\0'; 2092 printf("ren replace=%s\n",nnn); 2093 } 2094 #endif 2095 free(fnp->n_v4, M_NFSV4NODE); 2096 fnp->n_v4 = newv4; 2097 newv4 = NULL; 2098 fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len; 2099 fnp->n_v4->n4_namelen = tcnp->cn_namelen; 2100 NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 2101 tdnp->n_fhp->nfh_len); 2102 NFSBCOPY(tcnp->cn_nameptr, 2103 NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen); 2104 } 2105 NFSUNLOCKNODE(tdnp); 2106 NFSUNLOCKNODE(fnp); 2107 if (newv4 != NULL) 2108 free(newv4, M_NFSV4NODE); 2109 } 2110 2111 if (fvp->v_type == VDIR) { 2112 if (tvp != NULL && tvp->v_type == VDIR) 2113 cache_purge(tdvp); 2114 cache_purge(fdvp); 2115 } 2116 2117 out: 2118 if (tdvp == tvp) 2119 vrele(tdvp); 2120 else 2121 vput(tdvp); 2122 if (tvp) 2123 vput(tvp); 2124 vrele(fdvp); 2125 vrele(fvp); 2126 /* 2127 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. 2128 */ 2129 if (error == ENOENT) 2130 error = 0; 2131 return (error); 2132 } 2133 2134 /* 2135 * nfs file rename rpc called from nfs_remove() above 2136 */ 2137 static int 2138 nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp, 2139 struct sillyrename *sp) 2140 { 2141 2142 return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen, 2143 sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred, 2144 curthread)); 2145 } 2146 2147 /* 2148 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). 2149 */ 2150 static int 2151 nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, 2152 int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr, 2153 int tnamelen, struct ucred *cred, struct thread *td) 2154 { 2155 struct nfsvattr fnfsva, tnfsva; 2156 struct nfsnode *fdnp = VTONFS(fdvp); 2157 struct nfsnode *tdnp = VTONFS(tdvp); 2158 int error = 0, fattrflag, tattrflag; 2159 2160 error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp, 2161 tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag, 2162 &tattrflag, NULL, NULL); 2163 NFSLOCKNODE(fdnp); 2164 fdnp->n_flag |= NMODIFIED; 2165 if (fattrflag != 0) { 2166 NFSUNLOCKNODE(fdnp); 2167 (void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, NULL, 0, 1); 2168 } else { 2169 fdnp->n_attrstamp = 0; 2170 NFSUNLOCKNODE(fdnp); 2171 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp); 2172 } 2173 NFSLOCKNODE(tdnp); 2174 tdnp->n_flag |= NMODIFIED; 2175 if (tattrflag != 0) { 2176 NFSUNLOCKNODE(tdnp); 2177 (void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, NULL, 0, 1); 2178 } else { 2179 tdnp->n_attrstamp = 0; 2180 NFSUNLOCKNODE(tdnp); 2181 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 2182 } 2183 if (error && NFS_ISV4(fdvp)) 2184 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2185 return (error); 2186 } 2187 2188 /* 2189 * nfs hard link create call 2190 */ 2191 static int 2192 nfs_link(struct vop_link_args *ap) 2193 { 2194 struct vnode *vp = ap->a_vp; 2195 struct vnode *tdvp = ap->a_tdvp; 2196 struct componentname *cnp = ap->a_cnp; 2197 struct nfsnode *np, *tdnp; 2198 struct nfsvattr nfsva, dnfsva; 2199 int error = 0, attrflag, dattrflag; 2200 2201 /* 2202 * Push all writes to the server, so that the attribute cache 2203 * doesn't get "out of sync" with the server. 2204 * XXX There should be a better way! 2205 */ 2206 VOP_FSYNC(vp, MNT_WAIT, curthread); 2207 2208 error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen, 2209 cnp->cn_cred, curthread, &dnfsva, &nfsva, &attrflag, 2210 &dattrflag, NULL); 2211 tdnp = VTONFS(tdvp); 2212 NFSLOCKNODE(tdnp); 2213 tdnp->n_flag |= NMODIFIED; 2214 if (dattrflag != 0) { 2215 NFSUNLOCKNODE(tdnp); 2216 (void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, NULL, 0, 1); 2217 } else { 2218 tdnp->n_attrstamp = 0; 2219 NFSUNLOCKNODE(tdnp); 2220 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 2221 } 2222 if (attrflag) 2223 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 2224 else { 2225 np = VTONFS(vp); 2226 NFSLOCKNODE(np); 2227 np->n_attrstamp = 0; 2228 NFSUNLOCKNODE(np); 2229 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 2230 } 2231 /* 2232 * If negative lookup caching is enabled, I might as well 2233 * add an entry for this node. Not necessary for correctness, 2234 * but if negative caching is enabled, then the system 2235 * must care about lookup caching hit rate, so... 2236 */ 2237 if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 && 2238 (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { 2239 if (tdvp != vp) 2240 cache_enter_time(tdvp, vp, cnp, &nfsva.na_ctime, NULL); 2241 else 2242 printf("nfs_link: bogus NFS server returned " 2243 "the directory as the new link\n"); 2244 } 2245 if (error && NFS_ISV4(vp)) 2246 error = nfscl_maperr(curthread, error, (uid_t)0, 2247 (gid_t)0); 2248 return (error); 2249 } 2250 2251 /* 2252 * nfs symbolic link create call 2253 */ 2254 static int 2255 nfs_symlink(struct vop_symlink_args *ap) 2256 { 2257 struct vnode *dvp = ap->a_dvp; 2258 struct vattr *vap = ap->a_vap; 2259 struct componentname *cnp = ap->a_cnp; 2260 struct nfsvattr nfsva, dnfsva; 2261 struct nfsfh *nfhp; 2262 struct nfsnode *np = NULL, *dnp; 2263 struct vnode *newvp = NULL; 2264 int error = 0, attrflag, dattrflag, ret; 2265 2266 vap->va_type = VLNK; 2267 error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2268 ap->a_target, vap, cnp->cn_cred, curthread, &dnfsva, 2269 &nfsva, &nfhp, &attrflag, &dattrflag, NULL); 2270 if (nfhp) { 2271 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, curthread, 2272 &np, NULL, LK_EXCLUSIVE); 2273 if (!ret) 2274 newvp = NFSTOV(np); 2275 else if (!error) 2276 error = ret; 2277 } 2278 if (newvp != NULL) { 2279 if (attrflag) 2280 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 2281 0, 1); 2282 } else if (!error) { 2283 /* 2284 * If we do not have an error and we could not extract the 2285 * newvp from the response due to the request being NFSv2, we 2286 * have to do a lookup in order to obtain a newvp to return. 2287 */ 2288 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2289 cnp->cn_cred, curthread, &np); 2290 if (!error) 2291 newvp = NFSTOV(np); 2292 } 2293 if (error) { 2294 if (newvp) 2295 vput(newvp); 2296 if (NFS_ISV4(dvp)) 2297 error = nfscl_maperr(curthread, error, 2298 vap->va_uid, vap->va_gid); 2299 } else { 2300 *ap->a_vpp = newvp; 2301 } 2302 2303 dnp = VTONFS(dvp); 2304 NFSLOCKNODE(dnp); 2305 dnp->n_flag |= NMODIFIED; 2306 if (dattrflag != 0) { 2307 NFSUNLOCKNODE(dnp); 2308 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2309 } else { 2310 dnp->n_attrstamp = 0; 2311 NFSUNLOCKNODE(dnp); 2312 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2313 } 2314 /* 2315 * If negative lookup caching is enabled, I might as well 2316 * add an entry for this node. Not necessary for correctness, 2317 * but if negative caching is enabled, then the system 2318 * must care about lookup caching hit rate, so... 2319 */ 2320 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 2321 (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { 2322 if (dvp != newvp) 2323 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, 2324 NULL); 2325 else 2326 printf("nfs_symlink: bogus NFS server returned " 2327 "the directory as the new file object\n"); 2328 } 2329 return (error); 2330 } 2331 2332 /* 2333 * nfs make dir call 2334 */ 2335 static int 2336 nfs_mkdir(struct vop_mkdir_args *ap) 2337 { 2338 struct vnode *dvp = ap->a_dvp; 2339 struct vattr *vap = ap->a_vap; 2340 struct componentname *cnp = ap->a_cnp; 2341 struct nfsnode *np = NULL, *dnp; 2342 struct vnode *newvp = NULL; 2343 struct vattr vattr; 2344 struct nfsfh *nfhp; 2345 struct nfsvattr nfsva, dnfsva; 2346 int error = 0, attrflag, dattrflag, ret; 2347 2348 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) 2349 return (error); 2350 vap->va_type = VDIR; 2351 error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2352 vap, cnp->cn_cred, curthread, &dnfsva, &nfsva, &nfhp, 2353 &attrflag, &dattrflag, NULL); 2354 dnp = VTONFS(dvp); 2355 NFSLOCKNODE(dnp); 2356 dnp->n_flag |= NMODIFIED; 2357 if (dattrflag != 0) { 2358 NFSUNLOCKNODE(dnp); 2359 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2360 } else { 2361 dnp->n_attrstamp = 0; 2362 NFSUNLOCKNODE(dnp); 2363 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2364 } 2365 if (nfhp) { 2366 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, curthread, 2367 &np, NULL, LK_EXCLUSIVE); 2368 if (!ret) { 2369 newvp = NFSTOV(np); 2370 if (attrflag) 2371 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 2372 NULL, 0, 1); 2373 } else if (!error) 2374 error = ret; 2375 } 2376 if (!error && newvp == NULL) { 2377 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2378 cnp->cn_cred, curthread, &np); 2379 if (!error) { 2380 newvp = NFSTOV(np); 2381 if (newvp->v_type != VDIR) 2382 error = EEXIST; 2383 } 2384 } 2385 if (error) { 2386 if (newvp) 2387 vput(newvp); 2388 if (NFS_ISV4(dvp)) 2389 error = nfscl_maperr(curthread, error, 2390 vap->va_uid, vap->va_gid); 2391 } else { 2392 /* 2393 * If negative lookup caching is enabled, I might as well 2394 * add an entry for this node. Not necessary for correctness, 2395 * but if negative caching is enabled, then the system 2396 * must care about lookup caching hit rate, so... 2397 */ 2398 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 2399 (cnp->cn_flags & MAKEENTRY) && 2400 attrflag != 0 && dattrflag != 0) { 2401 if (dvp != newvp) 2402 cache_enter_time(dvp, newvp, cnp, 2403 &nfsva.na_ctime, &dnfsva.na_ctime); 2404 else 2405 printf("nfs_mkdir: bogus NFS server returned " 2406 "the directory that the directory was " 2407 "created in as the new file object\n"); 2408 } 2409 *ap->a_vpp = newvp; 2410 } 2411 return (error); 2412 } 2413 2414 /* 2415 * nfs remove directory call 2416 */ 2417 static int 2418 nfs_rmdir(struct vop_rmdir_args *ap) 2419 { 2420 struct vnode *vp = ap->a_vp; 2421 struct vnode *dvp = ap->a_dvp; 2422 struct componentname *cnp = ap->a_cnp; 2423 struct nfsnode *dnp; 2424 struct nfsvattr dnfsva; 2425 int error, dattrflag; 2426 2427 if (dvp == vp) 2428 return (EINVAL); 2429 error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2430 cnp->cn_cred, curthread, &dnfsva, &dattrflag, NULL); 2431 dnp = VTONFS(dvp); 2432 NFSLOCKNODE(dnp); 2433 dnp->n_flag |= NMODIFIED; 2434 if (dattrflag != 0) { 2435 NFSUNLOCKNODE(dnp); 2436 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2437 } else { 2438 dnp->n_attrstamp = 0; 2439 NFSUNLOCKNODE(dnp); 2440 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2441 } 2442 2443 cache_purge(dvp); 2444 cache_purge(vp); 2445 if (error && NFS_ISV4(dvp)) 2446 error = nfscl_maperr(curthread, error, (uid_t)0, 2447 (gid_t)0); 2448 /* 2449 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. 2450 */ 2451 if (error == ENOENT) 2452 error = 0; 2453 return (error); 2454 } 2455 2456 /* 2457 * nfs readdir call 2458 */ 2459 static int 2460 nfs_readdir(struct vop_readdir_args *ap) 2461 { 2462 struct vnode *vp = ap->a_vp; 2463 struct nfsnode *np = VTONFS(vp); 2464 struct uio *uio = ap->a_uio; 2465 ssize_t tresid, left; 2466 int error = 0; 2467 struct vattr vattr; 2468 2469 if (ap->a_eofflag != NULL) 2470 *ap->a_eofflag = 0; 2471 if (vp->v_type != VDIR) 2472 return(EPERM); 2473 2474 /* 2475 * First, check for hit on the EOF offset cache 2476 */ 2477 NFSLOCKNODE(np); 2478 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && 2479 (np->n_flag & NMODIFIED) == 0) { 2480 NFSUNLOCKNODE(np); 2481 if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { 2482 NFSLOCKNODE(np); 2483 if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) || 2484 !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 2485 NFSUNLOCKNODE(np); 2486 NFSINCRGLOBAL(nfsstatsv1.direofcache_hits); 2487 if (ap->a_eofflag != NULL) 2488 *ap->a_eofflag = 1; 2489 return (0); 2490 } else 2491 NFSUNLOCKNODE(np); 2492 } 2493 } else 2494 NFSUNLOCKNODE(np); 2495 2496 /* 2497 * NFS always guarantees that directory entries don't straddle 2498 * DIRBLKSIZ boundaries. As such, we need to limit the size 2499 * to an exact multiple of DIRBLKSIZ, to avoid copying a partial 2500 * directory entry. 2501 */ 2502 left = uio->uio_resid % DIRBLKSIZ; 2503 if (left == uio->uio_resid) 2504 return (EINVAL); 2505 uio->uio_resid -= left; 2506 2507 /* 2508 * Call ncl_bioread() to do the real work. 2509 */ 2510 tresid = uio->uio_resid; 2511 error = ncl_bioread(vp, uio, 0, ap->a_cred); 2512 2513 if (!error && uio->uio_resid == tresid) { 2514 NFSINCRGLOBAL(nfsstatsv1.direofcache_misses); 2515 if (ap->a_eofflag != NULL) 2516 *ap->a_eofflag = 1; 2517 } 2518 2519 /* Add the partial DIRBLKSIZ (left) back in. */ 2520 uio->uio_resid += left; 2521 return (error); 2522 } 2523 2524 /* 2525 * Readdir rpc call. 2526 * Called from below the buffer cache by ncl_doio(). 2527 */ 2528 int 2529 ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2530 struct thread *td) 2531 { 2532 struct nfsvattr nfsva; 2533 nfsuint64 *cookiep, cookie; 2534 struct nfsnode *dnp = VTONFS(vp); 2535 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2536 int error = 0, eof, attrflag; 2537 2538 KASSERT(uiop->uio_iovcnt == 1 && 2539 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2540 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2541 ("nfs readdirrpc bad uio")); 2542 2543 /* 2544 * If there is no cookie, assume directory was stale. 2545 */ 2546 ncl_dircookie_lock(dnp); 2547 NFSUNLOCKNODE(dnp); 2548 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2549 if (cookiep) { 2550 cookie = *cookiep; 2551 ncl_dircookie_unlock(dnp); 2552 } else { 2553 ncl_dircookie_unlock(dnp); 2554 return (NFSERR_BAD_COOKIE); 2555 } 2556 2557 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2558 (void)ncl_fsinfo(nmp, vp, cred, td); 2559 2560 error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva, 2561 &attrflag, &eof, NULL); 2562 if (attrflag) 2563 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 2564 2565 if (!error) { 2566 /* 2567 * We are now either at the end of the directory or have filled 2568 * the block. 2569 */ 2570 if (eof) { 2571 NFSLOCKNODE(dnp); 2572 dnp->n_direofoffset = uiop->uio_offset; 2573 NFSUNLOCKNODE(dnp); 2574 } else { 2575 if (uiop->uio_resid > 0) 2576 printf("EEK! readdirrpc resid > 0\n"); 2577 ncl_dircookie_lock(dnp); 2578 NFSUNLOCKNODE(dnp); 2579 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2580 *cookiep = cookie; 2581 ncl_dircookie_unlock(dnp); 2582 } 2583 } else if (NFS_ISV4(vp)) { 2584 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2585 } 2586 return (error); 2587 } 2588 2589 /* 2590 * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc(). 2591 */ 2592 int 2593 ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2594 struct thread *td) 2595 { 2596 struct nfsvattr nfsva; 2597 nfsuint64 *cookiep, cookie; 2598 struct nfsnode *dnp = VTONFS(vp); 2599 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2600 int error = 0, attrflag, eof; 2601 2602 KASSERT(uiop->uio_iovcnt == 1 && 2603 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2604 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2605 ("nfs readdirplusrpc bad uio")); 2606 2607 /* 2608 * If there is no cookie, assume directory was stale. 2609 */ 2610 ncl_dircookie_lock(dnp); 2611 NFSUNLOCKNODE(dnp); 2612 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2613 if (cookiep) { 2614 cookie = *cookiep; 2615 ncl_dircookie_unlock(dnp); 2616 } else { 2617 ncl_dircookie_unlock(dnp); 2618 return (NFSERR_BAD_COOKIE); 2619 } 2620 2621 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2622 (void)ncl_fsinfo(nmp, vp, cred, td); 2623 error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva, 2624 &attrflag, &eof, NULL); 2625 if (attrflag) 2626 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 2627 2628 if (!error) { 2629 /* 2630 * We are now either at end of the directory or have filled the 2631 * the block. 2632 */ 2633 if (eof) { 2634 NFSLOCKNODE(dnp); 2635 dnp->n_direofoffset = uiop->uio_offset; 2636 NFSUNLOCKNODE(dnp); 2637 } else { 2638 if (uiop->uio_resid > 0) 2639 printf("EEK! readdirplusrpc resid > 0\n"); 2640 ncl_dircookie_lock(dnp); 2641 NFSUNLOCKNODE(dnp); 2642 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2643 *cookiep = cookie; 2644 ncl_dircookie_unlock(dnp); 2645 } 2646 } else if (NFS_ISV4(vp)) { 2647 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2648 } 2649 return (error); 2650 } 2651 2652 /* 2653 * Silly rename. To make the NFS filesystem that is stateless look a little 2654 * more like the "ufs" a remove of an active vnode is translated to a rename 2655 * to a funny looking filename that is removed by nfs_inactive on the 2656 * nfsnode. There is the potential for another process on a different client 2657 * to create the same funny name between the nfs_lookitup() fails and the 2658 * nfs_rename() completes, but... 2659 */ 2660 static int 2661 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 2662 { 2663 struct sillyrename *sp; 2664 struct nfsnode *np; 2665 int error; 2666 short pid; 2667 unsigned int lticks; 2668 2669 cache_purge(dvp); 2670 np = VTONFS(vp); 2671 KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir")); 2672 sp = malloc(sizeof (struct sillyrename), 2673 M_NEWNFSREQ, M_WAITOK); 2674 sp->s_cred = crhold(cnp->cn_cred); 2675 sp->s_dvp = dvp; 2676 VREF(dvp); 2677 2678 /* 2679 * Fudge together a funny name. 2680 * Changing the format of the funny name to accommodate more 2681 * sillynames per directory. 2682 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 2683 * CPU ticks since boot. 2684 */ 2685 pid = curthread->td_proc->p_pid; 2686 lticks = (unsigned int)ticks; 2687 for ( ; ; ) { 2688 sp->s_namlen = sprintf(sp->s_name, 2689 ".nfs.%08x.%04x4.4", lticks, 2690 pid); 2691 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2692 curthread, NULL)) 2693 break; 2694 lticks++; 2695 } 2696 error = nfs_renameit(dvp, vp, cnp, sp); 2697 if (error) 2698 goto bad; 2699 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2700 curthread, &np); 2701 np->n_sillyrename = sp; 2702 return (0); 2703 bad: 2704 vrele(sp->s_dvp); 2705 crfree(sp->s_cred); 2706 free(sp, M_NEWNFSREQ); 2707 return (error); 2708 } 2709 2710 /* 2711 * Look up a file name and optionally either update the file handle or 2712 * allocate an nfsnode, depending on the value of npp. 2713 * npp == NULL --> just do the lookup 2714 * *npp == NULL --> allocate a new nfsnode and make sure attributes are 2715 * handled too 2716 * *npp != NULL --> update the file handle in the vnode 2717 */ 2718 static int 2719 nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred, 2720 struct thread *td, struct nfsnode **npp) 2721 { 2722 struct vnode *newvp = NULL, *vp; 2723 struct nfsnode *np, *dnp = VTONFS(dvp); 2724 struct nfsfh *nfhp, *onfhp; 2725 struct nfsvattr nfsva, dnfsva; 2726 struct componentname cn; 2727 int error = 0, attrflag, dattrflag; 2728 u_int hash; 2729 struct timespec ts; 2730 2731 nanouptime(&ts); 2732 error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva, 2733 &nfhp, &attrflag, &dattrflag, NULL, 0); 2734 if (dattrflag) 2735 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2736 if (npp && !error) { 2737 if (*npp != NULL) { 2738 np = *npp; 2739 vp = NFSTOV(np); 2740 /* 2741 * For NFSv4, check to see if it is the same name and 2742 * replace the name, if it is different. 2743 */ 2744 if (np->n_v4 != NULL && nfsva.na_type == VREG && 2745 (np->n_v4->n4_namelen != len || 2746 NFSBCMP(name, NFS4NODENAME(np->n_v4), len) || 2747 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || 2748 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2749 dnp->n_fhp->nfh_len))) { 2750 #ifdef notdef 2751 { char nnn[100]; int nnnl; 2752 nnnl = (len < 100) ? len : 99; 2753 bcopy(name, nnn, nnnl); 2754 nnn[nnnl] = '\0'; 2755 printf("replace=%s\n",nnn); 2756 } 2757 #endif 2758 free(np->n_v4, M_NFSV4NODE); 2759 np->n_v4 = malloc( 2760 sizeof (struct nfsv4node) + 2761 dnp->n_fhp->nfh_len + len - 1, 2762 M_NFSV4NODE, M_WAITOK); 2763 np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; 2764 np->n_v4->n4_namelen = len; 2765 NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2766 dnp->n_fhp->nfh_len); 2767 NFSBCOPY(name, NFS4NODENAME(np->n_v4), len); 2768 } 2769 hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, 2770 FNV1_32_INIT); 2771 onfhp = np->n_fhp; 2772 /* 2773 * Rehash node for new file handle. 2774 */ 2775 vfs_hash_rehash(vp, hash); 2776 np->n_fhp = nfhp; 2777 if (onfhp != NULL) 2778 free(onfhp, M_NFSFH); 2779 newvp = NFSTOV(np); 2780 } else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) { 2781 free(nfhp, M_NFSFH); 2782 VREF(dvp); 2783 newvp = dvp; 2784 } else { 2785 cn.cn_nameptr = name; 2786 cn.cn_namelen = len; 2787 error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td, 2788 &np, NULL, LK_EXCLUSIVE); 2789 if (error) 2790 return (error); 2791 newvp = NFSTOV(np); 2792 /* 2793 * If n_localmodtime >= time before RPC, then 2794 * a file modification operation, such as 2795 * VOP_SETATTR() of size, has occurred while 2796 * the Lookup RPC and acquisition of the vnode 2797 * happened. As such, the attributes might 2798 * be stale, with possibly an incorrect size. 2799 */ 2800 NFSLOCKNODE(np); 2801 if (timespecisset(&np->n_localmodtime) && 2802 timespeccmp(&np->n_localmodtime, &ts, >=)) { 2803 NFSCL_DEBUG(4, "nfs_lookitup: localmod " 2804 "stale attributes\n"); 2805 attrflag = 0; 2806 } 2807 NFSUNLOCKNODE(np); 2808 } 2809 if (!attrflag && *npp == NULL) { 2810 if (newvp == dvp) 2811 vrele(newvp); 2812 else 2813 vput(newvp); 2814 return (ENOENT); 2815 } 2816 if (attrflag) 2817 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 2818 0, 1); 2819 } 2820 if (npp && *npp == NULL) { 2821 if (error) { 2822 if (newvp) { 2823 if (newvp == dvp) 2824 vrele(newvp); 2825 else 2826 vput(newvp); 2827 } 2828 } else 2829 *npp = np; 2830 } 2831 if (error && NFS_ISV4(dvp)) 2832 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2833 return (error); 2834 } 2835 2836 /* 2837 * Nfs Version 3 and 4 commit rpc 2838 */ 2839 int 2840 ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, 2841 struct thread *td) 2842 { 2843 struct nfsvattr nfsva; 2844 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2845 struct nfsnode *np; 2846 struct uio uio; 2847 int error, attrflag; 2848 2849 np = VTONFS(vp); 2850 error = EIO; 2851 attrflag = 0; 2852 if (NFSHASPNFS(nmp) && (np->n_flag & NDSCOMMIT) != 0) { 2853 uio.uio_offset = offset; 2854 uio.uio_resid = cnt; 2855 error = nfscl_doiods(vp, &uio, NULL, NULL, 2856 NFSV4OPEN_ACCESSWRITE, 1, cred, td); 2857 if (error != 0) { 2858 NFSLOCKNODE(np); 2859 np->n_flag &= ~NDSCOMMIT; 2860 NFSUNLOCKNODE(np); 2861 } 2862 } 2863 if (error != 0) { 2864 mtx_lock(&nmp->nm_mtx); 2865 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { 2866 mtx_unlock(&nmp->nm_mtx); 2867 return (0); 2868 } 2869 mtx_unlock(&nmp->nm_mtx); 2870 error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva, 2871 &attrflag, NULL); 2872 } 2873 if (attrflag != 0) 2874 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 2875 0, 1); 2876 if (error != 0 && NFS_ISV4(vp)) 2877 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2878 return (error); 2879 } 2880 2881 /* 2882 * Strategy routine. 2883 * For async requests when nfsiod(s) are running, queue the request by 2884 * calling ncl_asyncio(), otherwise just all ncl_doio() to do the 2885 * request. 2886 */ 2887 static int 2888 nfs_strategy(struct vop_strategy_args *ap) 2889 { 2890 struct buf *bp; 2891 struct vnode *vp; 2892 struct ucred *cr; 2893 2894 bp = ap->a_bp; 2895 vp = ap->a_vp; 2896 KASSERT(bp->b_vp == vp, ("missing b_getvp")); 2897 KASSERT(!(bp->b_flags & B_DONE), 2898 ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); 2899 2900 if (vp->v_type == VREG && bp->b_blkno == bp->b_lblkno) 2901 bp->b_blkno = bp->b_lblkno * (vp->v_bufobj.bo_bsize / 2902 DEV_BSIZE); 2903 if (bp->b_iocmd == BIO_READ) 2904 cr = bp->b_rcred; 2905 else 2906 cr = bp->b_wcred; 2907 2908 /* 2909 * If the op is asynchronous and an i/o daemon is waiting 2910 * queue the request, wake it up and wait for completion 2911 * otherwise just do it ourselves. 2912 */ 2913 if ((bp->b_flags & B_ASYNC) == 0 || 2914 ncl_asyncio(VFSTONFS(vp->v_mount), bp, NOCRED, curthread)) 2915 (void) ncl_doio(vp, bp, cr, curthread, 1); 2916 return (0); 2917 } 2918 2919 /* 2920 * fsync vnode op. Just call ncl_flush() with commit == 1. 2921 */ 2922 /* ARGSUSED */ 2923 static int 2924 nfs_fsync(struct vop_fsync_args *ap) 2925 { 2926 2927 if (ap->a_vp->v_type != VREG) { 2928 /* 2929 * For NFS, metadata is changed synchronously on the server, 2930 * so there is nothing to flush. Also, ncl_flush() clears 2931 * the NMODIFIED flag and that shouldn't be done here for 2932 * directories. 2933 */ 2934 return (0); 2935 } 2936 return (ncl_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1, 0)); 2937 } 2938 2939 /* 2940 * Flush all the blocks associated with a vnode. 2941 * Walk through the buffer pool and push any dirty pages 2942 * associated with the vnode. 2943 * If the called_from_renewthread argument is TRUE, it has been called 2944 * from the NFSv4 renew thread and, as such, cannot block indefinitely 2945 * waiting for a buffer write to complete. 2946 */ 2947 int 2948 ncl_flush(struct vnode *vp, int waitfor, struct thread *td, 2949 int commit, int called_from_renewthread) 2950 { 2951 struct nfsnode *np = VTONFS(vp); 2952 struct buf *bp; 2953 int i; 2954 struct buf *nbp; 2955 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2956 int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; 2957 int passone = 1, trycnt = 0; 2958 u_quad_t off, endoff, toff; 2959 struct ucred* wcred = NULL; 2960 struct buf **bvec = NULL; 2961 struct bufobj *bo; 2962 #ifndef NFS_COMMITBVECSIZ 2963 #define NFS_COMMITBVECSIZ 20 2964 #endif 2965 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; 2966 u_int bvecsize = 0, bveccount; 2967 struct timespec ts; 2968 2969 if (called_from_renewthread != 0) 2970 slptimeo = hz; 2971 if (nmp->nm_flag & NFSMNT_INT) 2972 slpflag = PCATCH; 2973 if (!commit) 2974 passone = 0; 2975 bo = &vp->v_bufobj; 2976 /* 2977 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the 2978 * server, but has not been committed to stable storage on the server 2979 * yet. On the first pass, the byte range is worked out and the commit 2980 * rpc is done. On the second pass, ncl_writebp() is called to do the 2981 * job. 2982 */ 2983 again: 2984 off = (u_quad_t)-1; 2985 endoff = 0; 2986 bvecpos = 0; 2987 if (NFS_ISV34(vp) && commit) { 2988 if (bvec != NULL && bvec != bvec_on_stack) 2989 free(bvec, M_TEMP); 2990 /* 2991 * Count up how many buffers waiting for a commit. 2992 */ 2993 bveccount = 0; 2994 BO_LOCK(bo); 2995 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2996 if (!BUF_ISLOCKED(bp) && 2997 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 2998 == (B_DELWRI | B_NEEDCOMMIT)) 2999 bveccount++; 3000 } 3001 /* 3002 * Allocate space to remember the list of bufs to commit. It is 3003 * important to use M_NOWAIT here to avoid a race with nfs_write. 3004 * If we can't get memory (for whatever reason), we will end up 3005 * committing the buffers one-by-one in the loop below. 3006 */ 3007 if (bveccount > NFS_COMMITBVECSIZ) { 3008 /* 3009 * Release the vnode interlock to avoid a lock 3010 * order reversal. 3011 */ 3012 BO_UNLOCK(bo); 3013 bvec = (struct buf **) 3014 malloc(bveccount * sizeof(struct buf *), 3015 M_TEMP, M_NOWAIT); 3016 BO_LOCK(bo); 3017 if (bvec == NULL) { 3018 bvec = bvec_on_stack; 3019 bvecsize = NFS_COMMITBVECSIZ; 3020 } else 3021 bvecsize = bveccount; 3022 } else { 3023 bvec = bvec_on_stack; 3024 bvecsize = NFS_COMMITBVECSIZ; 3025 } 3026 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 3027 if (bvecpos >= bvecsize) 3028 break; 3029 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 3030 nbp = TAILQ_NEXT(bp, b_bobufs); 3031 continue; 3032 } 3033 if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != 3034 (B_DELWRI | B_NEEDCOMMIT)) { 3035 BUF_UNLOCK(bp); 3036 nbp = TAILQ_NEXT(bp, b_bobufs); 3037 continue; 3038 } 3039 BO_UNLOCK(bo); 3040 bremfree(bp); 3041 /* 3042 * Work out if all buffers are using the same cred 3043 * so we can deal with them all with one commit. 3044 * 3045 * NOTE: we are not clearing B_DONE here, so we have 3046 * to do it later on in this routine if we intend to 3047 * initiate I/O on the bp. 3048 * 3049 * Note: to avoid loopback deadlocks, we do not 3050 * assign b_runningbufspace. 3051 */ 3052 if (wcred == NULL) 3053 wcred = bp->b_wcred; 3054 else if (wcred != bp->b_wcred) 3055 wcred = NOCRED; 3056 vfs_busy_pages(bp, 1); 3057 3058 BO_LOCK(bo); 3059 /* 3060 * bp is protected by being locked, but nbp is not 3061 * and vfs_busy_pages() may sleep. We have to 3062 * recalculate nbp. 3063 */ 3064 nbp = TAILQ_NEXT(bp, b_bobufs); 3065 3066 /* 3067 * A list of these buffers is kept so that the 3068 * second loop knows which buffers have actually 3069 * been committed. This is necessary, since there 3070 * may be a race between the commit rpc and new 3071 * uncommitted writes on the file. 3072 */ 3073 bvec[bvecpos++] = bp; 3074 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 3075 bp->b_dirtyoff; 3076 if (toff < off) 3077 off = toff; 3078 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); 3079 if (toff > endoff) 3080 endoff = toff; 3081 } 3082 BO_UNLOCK(bo); 3083 } 3084 if (bvecpos > 0) { 3085 /* 3086 * Commit data on the server, as required. 3087 * If all bufs are using the same wcred, then use that with 3088 * one call for all of them, otherwise commit each one 3089 * separately. 3090 */ 3091 if (wcred != NOCRED) 3092 retv = ncl_commit(vp, off, (int)(endoff - off), 3093 wcred, td); 3094 else { 3095 retv = 0; 3096 for (i = 0; i < bvecpos; i++) { 3097 off_t off, size; 3098 bp = bvec[i]; 3099 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 3100 bp->b_dirtyoff; 3101 size = (u_quad_t)(bp->b_dirtyend 3102 - bp->b_dirtyoff); 3103 retv = ncl_commit(vp, off, (int)size, 3104 bp->b_wcred, td); 3105 if (retv) break; 3106 } 3107 } 3108 3109 if (retv == NFSERR_STALEWRITEVERF) 3110 ncl_clearcommit(vp->v_mount); 3111 3112 /* 3113 * Now, either mark the blocks I/O done or mark the 3114 * blocks dirty, depending on whether the commit 3115 * succeeded. 3116 */ 3117 for (i = 0; i < bvecpos; i++) { 3118 bp = bvec[i]; 3119 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 3120 if (!NFSCL_FORCEDISM(vp->v_mount) && retv) { 3121 /* 3122 * Error, leave B_DELWRI intact 3123 */ 3124 vfs_unbusy_pages(bp); 3125 brelse(bp); 3126 } else { 3127 /* 3128 * Success, remove B_DELWRI ( bundirty() ). 3129 * 3130 * b_dirtyoff/b_dirtyend seem to be NFS 3131 * specific. We should probably move that 3132 * into bundirty(). XXX 3133 */ 3134 bufobj_wref(bo); 3135 bp->b_flags |= B_ASYNC; 3136 bundirty(bp); 3137 bp->b_flags &= ~B_DONE; 3138 bp->b_ioflags &= ~BIO_ERROR; 3139 bp->b_dirtyoff = bp->b_dirtyend = 0; 3140 bufdone(bp); 3141 } 3142 } 3143 } 3144 3145 /* 3146 * Start/do any write(s) that are required. 3147 */ 3148 loop: 3149 BO_LOCK(bo); 3150 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 3151 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 3152 if (waitfor != MNT_WAIT || passone) 3153 continue; 3154 3155 error = BUF_TIMELOCK(bp, 3156 LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, 3157 BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo); 3158 if (error == 0) { 3159 BUF_UNLOCK(bp); 3160 goto loop; 3161 } 3162 if (error == ENOLCK) { 3163 error = 0; 3164 goto loop; 3165 } 3166 if (called_from_renewthread != 0) { 3167 /* 3168 * Return EIO so the flush will be retried 3169 * later. 3170 */ 3171 error = EIO; 3172 goto done; 3173 } 3174 if (newnfs_sigintr(nmp, td)) { 3175 error = EINTR; 3176 goto done; 3177 } 3178 if (slpflag == PCATCH) { 3179 slpflag = 0; 3180 slptimeo = 2 * hz; 3181 } 3182 goto loop; 3183 } 3184 if ((bp->b_flags & B_DELWRI) == 0) 3185 panic("nfs_fsync: not dirty"); 3186 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { 3187 BUF_UNLOCK(bp); 3188 continue; 3189 } 3190 BO_UNLOCK(bo); 3191 bremfree(bp); 3192 bp->b_flags |= B_ASYNC; 3193 bwrite(bp); 3194 if (newnfs_sigintr(nmp, td)) { 3195 error = EINTR; 3196 goto done; 3197 } 3198 goto loop; 3199 } 3200 if (passone) { 3201 passone = 0; 3202 BO_UNLOCK(bo); 3203 goto again; 3204 } 3205 if (waitfor == MNT_WAIT) { 3206 while (bo->bo_numoutput) { 3207 error = bufobj_wwait(bo, slpflag, slptimeo); 3208 if (error) { 3209 BO_UNLOCK(bo); 3210 if (called_from_renewthread != 0) { 3211 /* 3212 * Return EIO so that the flush will be 3213 * retried later. 3214 */ 3215 error = EIO; 3216 goto done; 3217 } 3218 error = newnfs_sigintr(nmp, td); 3219 if (error) 3220 goto done; 3221 if (slpflag == PCATCH) { 3222 slpflag = 0; 3223 slptimeo = 2 * hz; 3224 } 3225 BO_LOCK(bo); 3226 } 3227 } 3228 if (bo->bo_dirty.bv_cnt != 0 && commit) { 3229 BO_UNLOCK(bo); 3230 goto loop; 3231 } 3232 /* 3233 * Wait for all the async IO requests to drain 3234 */ 3235 BO_UNLOCK(bo); 3236 NFSLOCKNODE(np); 3237 while (np->n_directio_asyncwr > 0) { 3238 np->n_flag |= NFSYNCWAIT; 3239 error = newnfs_msleep(td, &np->n_directio_asyncwr, 3240 &np->n_mtx, slpflag | (PRIBIO + 1), 3241 "nfsfsync", 0); 3242 if (error) { 3243 if (newnfs_sigintr(nmp, td)) { 3244 NFSUNLOCKNODE(np); 3245 error = EINTR; 3246 goto done; 3247 } 3248 } 3249 } 3250 NFSUNLOCKNODE(np); 3251 } else 3252 BO_UNLOCK(bo); 3253 if (NFSHASPNFS(nmp)) { 3254 nfscl_layoutcommit(vp, td); 3255 /* 3256 * Invalidate the attribute cache, since writes to a DS 3257 * won't update the size attribute. 3258 */ 3259 NFSLOCKNODE(np); 3260 np->n_attrstamp = 0; 3261 } else 3262 NFSLOCKNODE(np); 3263 if (np->n_flag & NWRITEERR) { 3264 error = np->n_error; 3265 np->n_flag &= ~NWRITEERR; 3266 } 3267 if (commit && bo->bo_dirty.bv_cnt == 0 && 3268 bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) 3269 np->n_flag &= ~NMODIFIED; 3270 NFSUNLOCKNODE(np); 3271 done: 3272 if (bvec != NULL && bvec != bvec_on_stack) 3273 free(bvec, M_TEMP); 3274 if (error == 0 && commit != 0 && waitfor == MNT_WAIT && 3275 (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 || 3276 np->n_directio_asyncwr != 0)) { 3277 if (trycnt++ < 5) { 3278 /* try, try again... */ 3279 passone = 1; 3280 wcred = NULL; 3281 bvec = NULL; 3282 bvecsize = 0; 3283 goto again; 3284 } 3285 vn_printf(vp, "ncl_flush failed"); 3286 error = called_from_renewthread != 0 ? EIO : EBUSY; 3287 } 3288 if (error == 0) { 3289 nanouptime(&ts); 3290 NFSLOCKNODE(np); 3291 np->n_localmodtime = ts; 3292 NFSUNLOCKNODE(np); 3293 } 3294 return (error); 3295 } 3296 3297 /* 3298 * NFS advisory byte-level locks. 3299 */ 3300 static int 3301 nfs_advlock(struct vop_advlock_args *ap) 3302 { 3303 struct vnode *vp = ap->a_vp; 3304 struct ucred *cred; 3305 struct nfsnode *np = VTONFS(ap->a_vp); 3306 struct proc *p = (struct proc *)ap->a_id; 3307 struct thread *td = curthread; /* XXX */ 3308 struct vattr va; 3309 int ret, error; 3310 u_quad_t size; 3311 struct nfsmount *nmp; 3312 3313 error = NFSVOPLOCK(vp, LK_SHARED); 3314 if (error != 0) 3315 return (EBADF); 3316 if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) { 3317 if (vp->v_type != VREG) { 3318 error = EINVAL; 3319 goto out; 3320 } 3321 if ((ap->a_flags & F_POSIX) != 0) 3322 cred = p->p_ucred; 3323 else 3324 cred = td->td_ucred; 3325 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 3326 if (VN_IS_DOOMED(vp)) { 3327 error = EBADF; 3328 goto out; 3329 } 3330 3331 /* 3332 * If this is unlocking a write locked region, flush and 3333 * commit them before unlocking. This is required by 3334 * RFC3530 Sec. 9.3.2. 3335 */ 3336 if (ap->a_op == F_UNLCK && 3337 nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id, 3338 ap->a_flags)) 3339 (void) ncl_flush(vp, MNT_WAIT, td, 1, 0); 3340 3341 /* 3342 * Mark NFS node as might have acquired a lock. 3343 * This is separate from NHASBEENLOCKED, because it must 3344 * be done before the nfsrpc_advlock() call, which might 3345 * add a nfscllock structure to the client state. 3346 * It is used to check for the case where a nfscllock 3347 * state structure cannot exist for the file. 3348 * Only done for "oneopenown" NFSv4.1/4.2 mounts. 3349 */ 3350 nmp = VFSTONFS(vp->v_mount); 3351 if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp)) { 3352 NFSLOCKNODE(np); 3353 np->n_flag |= NMIGHTBELOCKED; 3354 NFSUNLOCKNODE(np); 3355 } 3356 3357 /* 3358 * Loop around doing the lock op, while a blocking lock 3359 * must wait for the lock op to succeed. 3360 */ 3361 do { 3362 ret = nfsrpc_advlock(vp, np->n_size, ap->a_op, 3363 ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags); 3364 if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 3365 ap->a_op == F_SETLK) { 3366 NFSVOPUNLOCK(vp); 3367 error = nfs_catnap(PZERO | PCATCH, ret, 3368 "ncladvl"); 3369 if (error) 3370 return (EINTR); 3371 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 3372 if (VN_IS_DOOMED(vp)) { 3373 error = EBADF; 3374 goto out; 3375 } 3376 } 3377 } while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 3378 ap->a_op == F_SETLK); 3379 if (ret == NFSERR_DENIED) { 3380 error = EAGAIN; 3381 goto out; 3382 } else if (ret == EINVAL || ret == EBADF || ret == EINTR) { 3383 error = ret; 3384 goto out; 3385 } else if (ret != 0) { 3386 error = EACCES; 3387 goto out; 3388 } 3389 3390 /* 3391 * Now, if we just got a lock, invalidate data in the buffer 3392 * cache, as required, so that the coherency conforms with 3393 * RFC3530 Sec. 9.3.2. 3394 */ 3395 if (ap->a_op == F_SETLK) { 3396 if ((np->n_flag & NMODIFIED) == 0) { 3397 np->n_attrstamp = 0; 3398 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 3399 ret = VOP_GETATTR(vp, &va, cred); 3400 } 3401 if ((np->n_flag & NMODIFIED) || ret || 3402 np->n_change != va.va_filerev) { 3403 (void) ncl_vinvalbuf(vp, V_SAVE, td, 1); 3404 np->n_attrstamp = 0; 3405 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 3406 ret = VOP_GETATTR(vp, &va, cred); 3407 if (!ret) { 3408 np->n_mtime = va.va_mtime; 3409 np->n_change = va.va_filerev; 3410 } 3411 } 3412 /* Mark that a file lock has been acquired. */ 3413 NFSLOCKNODE(np); 3414 np->n_flag |= NHASBEENLOCKED; 3415 NFSUNLOCKNODE(np); 3416 } 3417 } else if (!NFS_ISV4(vp)) { 3418 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3419 size = VTONFS(vp)->n_size; 3420 NFSVOPUNLOCK(vp); 3421 error = lf_advlock(ap, &(vp->v_lockf), size); 3422 } else { 3423 if (nfs_advlock_p != NULL) 3424 error = nfs_advlock_p(ap); 3425 else { 3426 NFSVOPUNLOCK(vp); 3427 error = ENOLCK; 3428 } 3429 } 3430 if (error == 0 && ap->a_op == F_SETLK) { 3431 error = NFSVOPLOCK(vp, LK_SHARED); 3432 if (error == 0) { 3433 /* Mark that a file lock has been acquired. */ 3434 NFSLOCKNODE(np); 3435 np->n_flag |= NHASBEENLOCKED; 3436 NFSUNLOCKNODE(np); 3437 NFSVOPUNLOCK(vp); 3438 } 3439 } 3440 return (error); 3441 } else 3442 error = EOPNOTSUPP; 3443 out: 3444 NFSVOPUNLOCK(vp); 3445 return (error); 3446 } 3447 3448 /* 3449 * NFS advisory byte-level locks. 3450 */ 3451 static int 3452 nfs_advlockasync(struct vop_advlockasync_args *ap) 3453 { 3454 struct vnode *vp = ap->a_vp; 3455 u_quad_t size; 3456 int error; 3457 3458 if (NFS_ISV4(vp)) 3459 return (EOPNOTSUPP); 3460 error = NFSVOPLOCK(vp, LK_SHARED); 3461 if (error) 3462 return (error); 3463 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3464 size = VTONFS(vp)->n_size; 3465 NFSVOPUNLOCK(vp); 3466 error = lf_advlockasync(ap, &(vp->v_lockf), size); 3467 } else { 3468 NFSVOPUNLOCK(vp); 3469 error = EOPNOTSUPP; 3470 } 3471 return (error); 3472 } 3473 3474 /* 3475 * Print out the contents of an nfsnode. 3476 */ 3477 static int 3478 nfs_print(struct vop_print_args *ap) 3479 { 3480 struct vnode *vp = ap->a_vp; 3481 struct nfsnode *np = VTONFS(vp); 3482 3483 printf("\tfileid %jd fsid 0x%jx", (uintmax_t)np->n_vattr.na_fileid, 3484 (uintmax_t)np->n_vattr.na_fsid); 3485 if (vp->v_type == VFIFO) 3486 fifo_printinfo(vp); 3487 printf("\n"); 3488 return (0); 3489 } 3490 3491 /* 3492 * This is the "real" nfs::bwrite(struct buf*). 3493 * We set B_CACHE if this is a VMIO buffer. 3494 */ 3495 int 3496 ncl_writebp(struct buf *bp, int force __unused, struct thread *td) 3497 { 3498 int oldflags, rtval; 3499 3500 if (bp->b_flags & B_INVAL) { 3501 brelse(bp); 3502 return (0); 3503 } 3504 3505 oldflags = bp->b_flags; 3506 bp->b_flags |= B_CACHE; 3507 3508 /* 3509 * Undirty the bp. We will redirty it later if the I/O fails. 3510 */ 3511 bundirty(bp); 3512 bp->b_flags &= ~B_DONE; 3513 bp->b_ioflags &= ~BIO_ERROR; 3514 bp->b_iocmd = BIO_WRITE; 3515 3516 bufobj_wref(bp->b_bufobj); 3517 curthread->td_ru.ru_oublock++; 3518 3519 /* 3520 * Note: to avoid loopback deadlocks, we do not 3521 * assign b_runningbufspace. 3522 */ 3523 vfs_busy_pages(bp, 1); 3524 3525 BUF_KERNPROC(bp); 3526 bp->b_iooffset = dbtob(bp->b_blkno); 3527 bstrategy(bp); 3528 3529 if ((oldflags & B_ASYNC) != 0) 3530 return (0); 3531 3532 rtval = bufwait(bp); 3533 if (oldflags & B_DELWRI) 3534 reassignbuf(bp); 3535 brelse(bp); 3536 return (rtval); 3537 } 3538 3539 /* 3540 * nfs special file access vnode op. 3541 * Essentially just get vattr and then imitate iaccess() since the device is 3542 * local to the client. 3543 */ 3544 static int 3545 nfsspec_access(struct vop_access_args *ap) 3546 { 3547 struct vattr *vap; 3548 struct ucred *cred = ap->a_cred; 3549 struct vnode *vp = ap->a_vp; 3550 accmode_t accmode = ap->a_accmode; 3551 struct vattr vattr; 3552 int error; 3553 3554 /* 3555 * Disallow write attempts on filesystems mounted read-only; 3556 * unless the file is a socket, fifo, or a block or character 3557 * device resident on the filesystem. 3558 */ 3559 if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 3560 switch (vp->v_type) { 3561 case VREG: 3562 case VDIR: 3563 case VLNK: 3564 return (EROFS); 3565 default: 3566 break; 3567 } 3568 } 3569 vap = &vattr; 3570 error = VOP_GETATTR(vp, vap, cred); 3571 if (error) 3572 goto out; 3573 error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, 3574 accmode, cred); 3575 out: 3576 return error; 3577 } 3578 3579 /* 3580 * Read wrapper for fifos. 3581 */ 3582 static int 3583 nfsfifo_read(struct vop_read_args *ap) 3584 { 3585 struct nfsnode *np = VTONFS(ap->a_vp); 3586 int error; 3587 3588 /* 3589 * Set access flag. 3590 */ 3591 NFSLOCKNODE(np); 3592 np->n_flag |= NACC; 3593 vfs_timestamp(&np->n_atim); 3594 NFSUNLOCKNODE(np); 3595 error = fifo_specops.vop_read(ap); 3596 return error; 3597 } 3598 3599 /* 3600 * Write wrapper for fifos. 3601 */ 3602 static int 3603 nfsfifo_write(struct vop_write_args *ap) 3604 { 3605 struct nfsnode *np = VTONFS(ap->a_vp); 3606 3607 /* 3608 * Set update flag. 3609 */ 3610 NFSLOCKNODE(np); 3611 np->n_flag |= NUPD; 3612 vfs_timestamp(&np->n_mtim); 3613 NFSUNLOCKNODE(np); 3614 return(fifo_specops.vop_write(ap)); 3615 } 3616 3617 /* 3618 * Close wrapper for fifos. 3619 * 3620 * Update the times on the nfsnode then do fifo close. 3621 */ 3622 static int 3623 nfsfifo_close(struct vop_close_args *ap) 3624 { 3625 struct vnode *vp = ap->a_vp; 3626 struct nfsnode *np = VTONFS(vp); 3627 struct vattr vattr; 3628 struct timespec ts; 3629 3630 NFSLOCKNODE(np); 3631 if (np->n_flag & (NACC | NUPD)) { 3632 vfs_timestamp(&ts); 3633 if (np->n_flag & NACC) 3634 np->n_atim = ts; 3635 if (np->n_flag & NUPD) 3636 np->n_mtim = ts; 3637 np->n_flag |= NCHG; 3638 if (vrefcnt(vp) == 1 && 3639 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 3640 VATTR_NULL(&vattr); 3641 if (np->n_flag & NACC) 3642 vattr.va_atime = np->n_atim; 3643 if (np->n_flag & NUPD) 3644 vattr.va_mtime = np->n_mtim; 3645 NFSUNLOCKNODE(np); 3646 (void)VOP_SETATTR(vp, &vattr, ap->a_cred); 3647 goto out; 3648 } 3649 } 3650 NFSUNLOCKNODE(np); 3651 out: 3652 return (fifo_specops.vop_close(ap)); 3653 } 3654 3655 /* 3656 * Just call ncl_writebp() with the force argument set to 1. 3657 * 3658 * NOTE: B_DONE may or may not be set in a_bp on call. 3659 */ 3660 static int 3661 nfs_bwrite(struct buf *bp) 3662 { 3663 3664 return (ncl_writebp(bp, 1, curthread)); 3665 } 3666 3667 struct buf_ops buf_ops_newnfs = { 3668 .bop_name = "buf_ops_nfs", 3669 .bop_write = nfs_bwrite, 3670 .bop_strategy = bufstrategy, 3671 .bop_sync = bufsync, 3672 .bop_bdflush = bufbdflush, 3673 }; 3674 3675 static int 3676 nfs_getacl(struct vop_getacl_args *ap) 3677 { 3678 int error; 3679 3680 if (ap->a_type != ACL_TYPE_NFS4) 3681 return (EOPNOTSUPP); 3682 error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, 3683 NULL); 3684 if (error > NFSERR_STALE) { 3685 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3686 error = EPERM; 3687 } 3688 return (error); 3689 } 3690 3691 static int 3692 nfs_setacl(struct vop_setacl_args *ap) 3693 { 3694 int error; 3695 3696 if (ap->a_type != ACL_TYPE_NFS4) 3697 return (EOPNOTSUPP); 3698 error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, 3699 NULL); 3700 if (error > NFSERR_STALE) { 3701 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3702 error = EPERM; 3703 } 3704 return (error); 3705 } 3706 3707 /* 3708 * VOP_ADVISE for NFS. 3709 * Just return 0 for any errors, since it is just a hint. 3710 */ 3711 static int 3712 nfs_advise(struct vop_advise_args *ap) 3713 { 3714 struct thread *td = curthread; 3715 struct nfsmount *nmp; 3716 uint64_t len; 3717 int error; 3718 3719 /* 3720 * First do vop_stdadvise() to handle the buffer cache. 3721 */ 3722 error = vop_stdadvise(ap); 3723 if (error != 0) 3724 return (error); 3725 if (ap->a_start < 0 || ap->a_end < 0) 3726 return (0); 3727 if (ap->a_end == OFF_MAX) 3728 len = 0; 3729 else if (ap->a_end < ap->a_start) 3730 return (0); 3731 else 3732 len = ap->a_end - ap->a_start + 1; 3733 nmp = VFSTONFS(ap->a_vp->v_mount); 3734 mtx_lock(&nmp->nm_mtx); 3735 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 3736 (NFSHASPNFS(nmp) && (nmp->nm_privflag & NFSMNTP_IOADVISETHRUMDS) == 3737 0) || (nmp->nm_privflag & NFSMNTP_NOADVISE) != 0) { 3738 mtx_unlock(&nmp->nm_mtx); 3739 return (0); 3740 } 3741 mtx_unlock(&nmp->nm_mtx); 3742 error = nfsrpc_advise(ap->a_vp, ap->a_start, len, ap->a_advice, 3743 td->td_ucred, td); 3744 if (error == NFSERR_NOTSUPP) { 3745 mtx_lock(&nmp->nm_mtx); 3746 nmp->nm_privflag |= NFSMNTP_NOADVISE; 3747 mtx_unlock(&nmp->nm_mtx); 3748 } 3749 return (0); 3750 } 3751 3752 /* 3753 * nfs allocate call 3754 */ 3755 static int 3756 nfs_allocate(struct vop_allocate_args *ap) 3757 { 3758 struct vnode *vp = ap->a_vp; 3759 struct thread *td = curthread; 3760 struct nfsvattr nfsva; 3761 struct nfsmount *nmp; 3762 struct nfsnode *np; 3763 off_t alen; 3764 int attrflag, error, ret; 3765 struct timespec ts; 3766 struct uio io; 3767 3768 attrflag = 0; 3769 nmp = VFSTONFS(vp->v_mount); 3770 np = VTONFS(vp); 3771 mtx_lock(&nmp->nm_mtx); 3772 if (NFSHASNFSV4(nmp) && nmp->nm_minorvers >= NFSV42_MINORVERSION && 3773 (nmp->nm_privflag & NFSMNTP_NOALLOCATE) == 0) { 3774 mtx_unlock(&nmp->nm_mtx); 3775 alen = *ap->a_len; 3776 if ((uint64_t)alen > nfs_maxalloclen) 3777 alen = nfs_maxalloclen; 3778 3779 /* Check the file size limit. */ 3780 io.uio_offset = *ap->a_offset; 3781 io.uio_resid = alen; 3782 error = vn_rlimit_fsize(vp, &io, td); 3783 3784 /* 3785 * Flush first to ensure that the allocate adds to the 3786 * file's allocation on the server. 3787 */ 3788 if (error == 0) 3789 error = ncl_flush(vp, MNT_WAIT, td, 1, 0); 3790 if (error == 0) 3791 error = nfsrpc_allocate(vp, *ap->a_offset, alen, 3792 &nfsva, &attrflag, ap->a_cred, td, NULL); 3793 if (error == 0) { 3794 *ap->a_offset += alen; 3795 *ap->a_len -= alen; 3796 nanouptime(&ts); 3797 NFSLOCKNODE(np); 3798 np->n_localmodtime = ts; 3799 NFSUNLOCKNODE(np); 3800 } else if (error == NFSERR_NOTSUPP) { 3801 mtx_lock(&nmp->nm_mtx); 3802 nmp->nm_privflag |= NFSMNTP_NOALLOCATE; 3803 mtx_unlock(&nmp->nm_mtx); 3804 error = EINVAL; 3805 } 3806 } else { 3807 mtx_unlock(&nmp->nm_mtx); 3808 error = EINVAL; 3809 } 3810 if (attrflag != 0) { 3811 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 3812 if (error == 0 && ret != 0) 3813 error = ret; 3814 } 3815 if (error != 0) 3816 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 3817 return (error); 3818 } 3819 3820 /* 3821 * nfs deallocate call 3822 */ 3823 static int 3824 nfs_deallocate(struct vop_deallocate_args *ap) 3825 { 3826 struct vnode *vp = ap->a_vp; 3827 struct thread *td = curthread; 3828 struct nfsvattr nfsva; 3829 struct nfsmount *nmp; 3830 struct nfsnode *np; 3831 off_t tlen, mlen; 3832 int attrflag, error, ret; 3833 bool clipped; 3834 struct timespec ts; 3835 3836 error = 0; 3837 attrflag = 0; 3838 nmp = VFSTONFS(vp->v_mount); 3839 np = VTONFS(vp); 3840 mtx_lock(&nmp->nm_mtx); 3841 if (NFSHASNFSV4(nmp) && nmp->nm_minorvers >= NFSV42_MINORVERSION && 3842 (nmp->nm_privflag & NFSMNTP_NODEALLOCATE) == 0) { 3843 mtx_unlock(&nmp->nm_mtx); 3844 tlen = omin(OFF_MAX - *ap->a_offset, *ap->a_len); 3845 NFSCL_DEBUG(4, "dealloc: off=%jd len=%jd maxfilesize=%ju\n", 3846 (intmax_t)*ap->a_offset, (intmax_t)tlen, 3847 (uintmax_t)nmp->nm_maxfilesize); 3848 if ((uint64_t)*ap->a_offset >= nmp->nm_maxfilesize) { 3849 /* Avoid EFBIG error return from the NFSv4.2 server. */ 3850 *ap->a_len = 0; 3851 return (0); 3852 } 3853 clipped = false; 3854 if ((uint64_t)*ap->a_offset + tlen > nmp->nm_maxfilesize) 3855 tlen = nmp->nm_maxfilesize - *ap->a_offset; 3856 if ((uint64_t)*ap->a_offset < np->n_size) { 3857 /* Limit the len to nfs_maxalloclen before EOF. */ 3858 mlen = omin((off_t)np->n_size - *ap->a_offset, tlen); 3859 if ((uint64_t)mlen > nfs_maxalloclen) { 3860 NFSCL_DEBUG(4, "dealloc: tlen maxalloclen\n"); 3861 tlen = nfs_maxalloclen; 3862 clipped = true; 3863 } 3864 } 3865 if (error == 0) 3866 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 3867 if (error == 0) { 3868 vnode_pager_purge_range(vp, *ap->a_offset, 3869 *ap->a_offset + tlen); 3870 error = nfsrpc_deallocate(vp, *ap->a_offset, tlen, 3871 &nfsva, &attrflag, ap->a_cred, td, NULL); 3872 NFSCL_DEBUG(4, "dealloc: rpc=%d\n", error); 3873 } 3874 if (error == 0) { 3875 NFSCL_DEBUG(4, "dealloc: attrflag=%d na_size=%ju\n", 3876 attrflag, (uintmax_t)nfsva.na_size); 3877 nanouptime(&ts); 3878 NFSLOCKNODE(np); 3879 np->n_localmodtime = ts; 3880 NFSUNLOCKNODE(np); 3881 if (attrflag != 0) { 3882 if ((uint64_t)*ap->a_offset < nfsva.na_size) 3883 *ap->a_offset += omin((off_t) 3884 nfsva.na_size - *ap->a_offset, 3885 tlen); 3886 } 3887 if (clipped && tlen < *ap->a_len) 3888 *ap->a_len -= tlen; 3889 else 3890 *ap->a_len = 0; 3891 } else if (error == NFSERR_NOTSUPP) { 3892 mtx_lock(&nmp->nm_mtx); 3893 nmp->nm_privflag |= NFSMNTP_NODEALLOCATE; 3894 mtx_unlock(&nmp->nm_mtx); 3895 } 3896 } else { 3897 mtx_unlock(&nmp->nm_mtx); 3898 error = EIO; 3899 } 3900 /* 3901 * If the NFS server cannot perform the Deallocate operation, just call 3902 * vop_stddeallocate() to perform it. 3903 */ 3904 if (error != 0 && error != NFSERR_FBIG && error != NFSERR_INVAL) { 3905 error = vop_stddeallocate(ap); 3906 NFSCL_DEBUG(4, "dealloc: stddeallocate=%d\n", error); 3907 } 3908 if (attrflag != 0) { 3909 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 3910 if (error == 0 && ret != 0) 3911 error = ret; 3912 } 3913 if (error != 0) 3914 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 3915 return (error); 3916 } 3917 3918 /* 3919 * nfs copy_file_range call 3920 */ 3921 static int 3922 nfs_copy_file_range(struct vop_copy_file_range_args *ap) 3923 { 3924 struct vnode *invp = ap->a_invp; 3925 struct vnode *outvp = ap->a_outvp; 3926 struct mount *mp; 3927 struct nfsvattr innfsva, outnfsva; 3928 struct vattr *vap; 3929 struct uio io; 3930 struct nfsmount *nmp; 3931 size_t len, len2; 3932 int error, inattrflag, outattrflag, ret, ret2; 3933 off_t inoff, outoff; 3934 bool consecutive, must_commit, tryoutcred; 3935 3936 ret = ret2 = 0; 3937 nmp = VFSTONFS(invp->v_mount); 3938 mtx_lock(&nmp->nm_mtx); 3939 /* NFSv4.2 Copy is not permitted for infile == outfile. */ 3940 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 3941 (nmp->nm_privflag & NFSMNTP_NOCOPY) != 0 || invp == outvp) { 3942 mtx_unlock(&nmp->nm_mtx); 3943 error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, 3944 ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, 3945 ap->a_incred, ap->a_outcred, ap->a_fsizetd); 3946 return (error); 3947 } 3948 mtx_unlock(&nmp->nm_mtx); 3949 3950 /* Lock both vnodes, avoiding risk of deadlock. */ 3951 do { 3952 mp = NULL; 3953 error = vn_start_write(outvp, &mp, V_WAIT); 3954 if (error == 0) { 3955 error = vn_lock(outvp, LK_EXCLUSIVE); 3956 if (error == 0) { 3957 error = vn_lock(invp, LK_SHARED | LK_NOWAIT); 3958 if (error == 0) 3959 break; 3960 VOP_UNLOCK(outvp); 3961 if (mp != NULL) 3962 vn_finished_write(mp); 3963 mp = NULL; 3964 error = vn_lock(invp, LK_SHARED); 3965 if (error == 0) 3966 VOP_UNLOCK(invp); 3967 } 3968 } 3969 if (mp != NULL) 3970 vn_finished_write(mp); 3971 } while (error == 0); 3972 if (error != 0) 3973 return (error); 3974 3975 /* 3976 * Do the vn_rlimit_fsize() check. Should this be above the VOP layer? 3977 */ 3978 io.uio_offset = *ap->a_outoffp; 3979 io.uio_resid = *ap->a_lenp; 3980 error = vn_rlimit_fsize(outvp, &io, ap->a_fsizetd); 3981 3982 /* 3983 * Flush the input file so that the data is up to date before 3984 * the copy. Flush writes for the output file so that they 3985 * do not overwrite the data copied to the output file by the Copy. 3986 * Set the commit argument for both flushes so that the data is on 3987 * stable storage before the Copy RPC. This is done in case the 3988 * server reboots during the Copy and needs to be redone. 3989 */ 3990 if (error == 0) 3991 error = ncl_flush(invp, MNT_WAIT, curthread, 1, 0); 3992 if (error == 0) 3993 error = ncl_flush(outvp, MNT_WAIT, curthread, 1, 0); 3994 3995 /* Do the actual NFSv4.2 RPC. */ 3996 len = *ap->a_lenp; 3997 mtx_lock(&nmp->nm_mtx); 3998 if ((nmp->nm_privflag & NFSMNTP_NOCONSECUTIVE) == 0) 3999 consecutive = true; 4000 else 4001 consecutive = false; 4002 mtx_unlock(&nmp->nm_mtx); 4003 inoff = *ap->a_inoffp; 4004 outoff = *ap->a_outoffp; 4005 tryoutcred = true; 4006 must_commit = false; 4007 if (error == 0) { 4008 vap = &VTONFS(invp)->n_vattr.na_vattr; 4009 error = VOP_GETATTR(invp, vap, ap->a_incred); 4010 if (error == 0) { 4011 /* 4012 * Clip "len" at va_size so that RFC compliant servers 4013 * will not reply NFSERR_INVAL. 4014 * Setting "len == 0" for the RPC would be preferred, 4015 * but some Linux servers do not support that. 4016 */ 4017 if (inoff >= vap->va_size) 4018 *ap->a_lenp = len = 0; 4019 else if (inoff + len > vap->va_size) 4020 *ap->a_lenp = len = vap->va_size - inoff; 4021 } else 4022 error = 0; 4023 } 4024 4025 /* 4026 * len will be set to 0 upon a successful Copy RPC. 4027 * As such, this only loops when the Copy RPC needs to be retried. 4028 */ 4029 while (len > 0 && error == 0) { 4030 inattrflag = outattrflag = 0; 4031 len2 = len; 4032 if (tryoutcred) 4033 error = nfsrpc_copy_file_range(invp, ap->a_inoffp, 4034 outvp, ap->a_outoffp, &len2, ap->a_flags, 4035 &inattrflag, &innfsva, &outattrflag, &outnfsva, 4036 ap->a_outcred, consecutive, &must_commit); 4037 else 4038 error = nfsrpc_copy_file_range(invp, ap->a_inoffp, 4039 outvp, ap->a_outoffp, &len2, ap->a_flags, 4040 &inattrflag, &innfsva, &outattrflag, &outnfsva, 4041 ap->a_incred, consecutive, &must_commit); 4042 if (inattrflag != 0) 4043 ret = nfscl_loadattrcache(&invp, &innfsva, NULL, NULL, 4044 0, 1); 4045 if (outattrflag != 0) 4046 ret2 = nfscl_loadattrcache(&outvp, &outnfsva, NULL, 4047 NULL, 1, 1); 4048 if (error == 0) { 4049 if (consecutive == false) { 4050 if (len2 == len) { 4051 mtx_lock(&nmp->nm_mtx); 4052 nmp->nm_privflag |= 4053 NFSMNTP_NOCONSECUTIVE; 4054 mtx_unlock(&nmp->nm_mtx); 4055 } else 4056 error = NFSERR_OFFLOADNOREQS; 4057 } 4058 *ap->a_lenp = len2; 4059 len = 0; 4060 if (len2 > 0 && must_commit && error == 0) 4061 error = ncl_commit(outvp, outoff, *ap->a_lenp, 4062 ap->a_outcred, curthread); 4063 if (error == 0 && ret != 0) 4064 error = ret; 4065 if (error == 0 && ret2 != 0) 4066 error = ret2; 4067 } else if (error == NFSERR_OFFLOADNOREQS && consecutive) { 4068 /* 4069 * Try consecutive == false, which is ok only if all 4070 * bytes are copied. 4071 * If only some bytes were copied when consecutive 4072 * is false, there is no way to know which bytes 4073 * still need to be written. 4074 */ 4075 consecutive = false; 4076 error = 0; 4077 } else if (error == NFSERR_ACCES && tryoutcred) { 4078 /* Try again with incred. */ 4079 tryoutcred = false; 4080 error = 0; 4081 } 4082 if (error == NFSERR_STALEWRITEVERF) { 4083 /* 4084 * Server rebooted, so do it all again. 4085 */ 4086 *ap->a_inoffp = inoff; 4087 *ap->a_outoffp = outoff; 4088 len = *ap->a_lenp; 4089 must_commit = false; 4090 error = 0; 4091 } 4092 } 4093 VOP_UNLOCK(invp); 4094 VOP_UNLOCK(outvp); 4095 if (mp != NULL) 4096 vn_finished_write(mp); 4097 if (error == NFSERR_NOTSUPP || error == NFSERR_OFFLOADNOREQS || 4098 error == NFSERR_ACCES) { 4099 /* 4100 * Unlike the NFSv4.2 Copy, vn_generic_copy_file_range() can 4101 * use a_incred for the read and a_outcred for the write, so 4102 * try this for NFSERR_ACCES failures for the Copy. 4103 * For NFSERR_NOTSUPP and NFSERR_OFFLOADNOREQS, the Copy can 4104 * never succeed, so disable it. 4105 */ 4106 if (error != NFSERR_ACCES) { 4107 /* Can never do Copy on this mount. */ 4108 mtx_lock(&nmp->nm_mtx); 4109 nmp->nm_privflag |= NFSMNTP_NOCOPY; 4110 mtx_unlock(&nmp->nm_mtx); 4111 } 4112 *ap->a_inoffp = inoff; 4113 *ap->a_outoffp = outoff; 4114 error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, 4115 ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, 4116 ap->a_incred, ap->a_outcred, ap->a_fsizetd); 4117 } else if (error != 0) 4118 *ap->a_lenp = 0; 4119 4120 if (error != 0) 4121 error = nfscl_maperr(curthread, error, (uid_t)0, (gid_t)0); 4122 return (error); 4123 } 4124 4125 /* 4126 * nfs ioctl call 4127 */ 4128 static int 4129 nfs_ioctl(struct vop_ioctl_args *ap) 4130 { 4131 struct vnode *vp = ap->a_vp; 4132 struct nfsvattr nfsva; 4133 struct nfsmount *nmp; 4134 int attrflag, content, error, ret; 4135 bool eof = false; /* shut up compiler. */ 4136 4137 if (vp->v_type != VREG) 4138 return (ENOTTY); 4139 nmp = VFSTONFS(vp->v_mount); 4140 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION) { 4141 error = vop_stdioctl(ap); 4142 return (error); 4143 } 4144 4145 /* Do the actual NFSv4.2 RPC. */ 4146 switch (ap->a_command) { 4147 case FIOSEEKDATA: 4148 content = NFSV4CONTENT_DATA; 4149 break; 4150 case FIOSEEKHOLE: 4151 content = NFSV4CONTENT_HOLE; 4152 break; 4153 default: 4154 return (ENOTTY); 4155 } 4156 4157 error = vn_lock(vp, LK_SHARED); 4158 if (error != 0) 4159 return (EBADF); 4160 attrflag = 0; 4161 if (*((off_t *)ap->a_data) >= VTONFS(vp)->n_size) 4162 error = ENXIO; 4163 else { 4164 /* 4165 * Flush all writes, so that the server is up to date. 4166 * Although a Commit is not required, the commit argument 4167 * is set so that, for a pNFS File/Flexible File Layout 4168 * server, the LayoutCommit will be done to ensure the file 4169 * size is up to date on the Metadata Server. 4170 */ 4171 error = ncl_flush(vp, MNT_WAIT, ap->a_td, 1, 0); 4172 if (error == 0) 4173 error = nfsrpc_seek(vp, (off_t *)ap->a_data, &eof, 4174 content, ap->a_cred, &nfsva, &attrflag); 4175 /* If at eof for FIOSEEKDATA, return ENXIO. */ 4176 if (eof && error == 0 && content == NFSV4CONTENT_DATA) 4177 error = ENXIO; 4178 } 4179 if (attrflag != 0) { 4180 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 4181 if (error == 0 && ret != 0) 4182 error = ret; 4183 } 4184 NFSVOPUNLOCK(vp); 4185 4186 if (error != 0) 4187 error = ENXIO; 4188 return (error); 4189 } 4190 4191 /* 4192 * nfs getextattr call 4193 */ 4194 static int 4195 nfs_getextattr(struct vop_getextattr_args *ap) 4196 { 4197 struct vnode *vp = ap->a_vp; 4198 struct nfsmount *nmp; 4199 struct ucred *cred; 4200 struct thread *td = ap->a_td; 4201 struct nfsvattr nfsva; 4202 ssize_t len; 4203 int attrflag, error, ret; 4204 4205 nmp = VFSTONFS(vp->v_mount); 4206 mtx_lock(&nmp->nm_mtx); 4207 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4208 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4209 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4210 mtx_unlock(&nmp->nm_mtx); 4211 return (EOPNOTSUPP); 4212 } 4213 mtx_unlock(&nmp->nm_mtx); 4214 4215 cred = ap->a_cred; 4216 if (cred == NULL) 4217 cred = td->td_ucred; 4218 /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ 4219 attrflag = 0; 4220 error = nfsrpc_getextattr(vp, ap->a_name, ap->a_uio, &len, &nfsva, 4221 &attrflag, cred, td); 4222 if (attrflag != 0) { 4223 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 4224 if (error == 0 && ret != 0) 4225 error = ret; 4226 } 4227 if (error == 0 && ap->a_size != NULL) 4228 *ap->a_size = len; 4229 4230 switch (error) { 4231 case NFSERR_NOTSUPP: 4232 case NFSERR_OPILLEGAL: 4233 mtx_lock(&nmp->nm_mtx); 4234 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4235 mtx_unlock(&nmp->nm_mtx); 4236 error = EOPNOTSUPP; 4237 break; 4238 case NFSERR_NOXATTR: 4239 case NFSERR_XATTR2BIG: 4240 error = ENOATTR; 4241 break; 4242 default: 4243 error = nfscl_maperr(td, error, 0, 0); 4244 break; 4245 } 4246 return (error); 4247 } 4248 4249 /* 4250 * nfs setextattr call 4251 */ 4252 static int 4253 nfs_setextattr(struct vop_setextattr_args *ap) 4254 { 4255 struct vnode *vp = ap->a_vp; 4256 struct nfsmount *nmp; 4257 struct ucred *cred; 4258 struct thread *td = ap->a_td; 4259 struct nfsvattr nfsva; 4260 int attrflag, error, ret; 4261 4262 nmp = VFSTONFS(vp->v_mount); 4263 mtx_lock(&nmp->nm_mtx); 4264 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4265 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4266 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4267 mtx_unlock(&nmp->nm_mtx); 4268 return (EOPNOTSUPP); 4269 } 4270 mtx_unlock(&nmp->nm_mtx); 4271 4272 if (ap->a_uio->uio_resid < 0) 4273 return (EINVAL); 4274 cred = ap->a_cred; 4275 if (cred == NULL) 4276 cred = td->td_ucred; 4277 /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ 4278 attrflag = 0; 4279 error = nfsrpc_setextattr(vp, ap->a_name, ap->a_uio, &nfsva, 4280 &attrflag, cred, td); 4281 if (attrflag != 0) { 4282 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 4283 if (error == 0 && ret != 0) 4284 error = ret; 4285 } 4286 4287 switch (error) { 4288 case NFSERR_NOTSUPP: 4289 case NFSERR_OPILLEGAL: 4290 mtx_lock(&nmp->nm_mtx); 4291 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4292 mtx_unlock(&nmp->nm_mtx); 4293 error = EOPNOTSUPP; 4294 break; 4295 case NFSERR_NOXATTR: 4296 case NFSERR_XATTR2BIG: 4297 error = ENOATTR; 4298 break; 4299 default: 4300 error = nfscl_maperr(td, error, 0, 0); 4301 break; 4302 } 4303 return (error); 4304 } 4305 4306 /* 4307 * nfs listextattr call 4308 */ 4309 static int 4310 nfs_listextattr(struct vop_listextattr_args *ap) 4311 { 4312 struct vnode *vp = ap->a_vp; 4313 struct nfsmount *nmp; 4314 struct ucred *cred; 4315 struct thread *td = ap->a_td; 4316 struct nfsvattr nfsva; 4317 size_t len, len2; 4318 uint64_t cookie; 4319 int attrflag, error, ret; 4320 bool eof; 4321 4322 nmp = VFSTONFS(vp->v_mount); 4323 mtx_lock(&nmp->nm_mtx); 4324 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4325 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4326 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4327 mtx_unlock(&nmp->nm_mtx); 4328 return (EOPNOTSUPP); 4329 } 4330 mtx_unlock(&nmp->nm_mtx); 4331 4332 cred = ap->a_cred; 4333 if (cred == NULL) 4334 cred = td->td_ucred; 4335 4336 /* Loop around doing List Extended Attribute RPCs. */ 4337 eof = false; 4338 cookie = 0; 4339 len2 = 0; 4340 error = 0; 4341 while (!eof && error == 0) { 4342 len = nmp->nm_rsize; 4343 attrflag = 0; 4344 error = nfsrpc_listextattr(vp, &cookie, ap->a_uio, &len, &eof, 4345 &nfsva, &attrflag, cred, td); 4346 if (attrflag != 0) { 4347 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 4348 1); 4349 if (error == 0 && ret != 0) 4350 error = ret; 4351 } 4352 if (error == 0) { 4353 len2 += len; 4354 if (len2 > SSIZE_MAX) 4355 error = ENOATTR; 4356 } 4357 } 4358 if (error == 0 && ap->a_size != NULL) 4359 *ap->a_size = len2; 4360 4361 switch (error) { 4362 case NFSERR_NOTSUPP: 4363 case NFSERR_OPILLEGAL: 4364 mtx_lock(&nmp->nm_mtx); 4365 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4366 mtx_unlock(&nmp->nm_mtx); 4367 error = EOPNOTSUPP; 4368 break; 4369 case NFSERR_NOXATTR: 4370 case NFSERR_XATTR2BIG: 4371 error = ENOATTR; 4372 break; 4373 default: 4374 error = nfscl_maperr(td, error, 0, 0); 4375 break; 4376 } 4377 return (error); 4378 } 4379 4380 /* 4381 * nfs setextattr call 4382 */ 4383 static int 4384 nfs_deleteextattr(struct vop_deleteextattr_args *ap) 4385 { 4386 struct vnode *vp = ap->a_vp; 4387 struct nfsmount *nmp; 4388 struct nfsvattr nfsva; 4389 int attrflag, error, ret; 4390 4391 nmp = VFSTONFS(vp->v_mount); 4392 mtx_lock(&nmp->nm_mtx); 4393 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4394 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4395 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4396 mtx_unlock(&nmp->nm_mtx); 4397 return (EOPNOTSUPP); 4398 } 4399 mtx_unlock(&nmp->nm_mtx); 4400 4401 /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ 4402 attrflag = 0; 4403 error = nfsrpc_rmextattr(vp, ap->a_name, &nfsva, &attrflag, ap->a_cred, 4404 ap->a_td); 4405 if (attrflag != 0) { 4406 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 4407 if (error == 0 && ret != 0) 4408 error = ret; 4409 } 4410 4411 switch (error) { 4412 case NFSERR_NOTSUPP: 4413 case NFSERR_OPILLEGAL: 4414 mtx_lock(&nmp->nm_mtx); 4415 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4416 mtx_unlock(&nmp->nm_mtx); 4417 error = EOPNOTSUPP; 4418 break; 4419 case NFSERR_NOXATTR: 4420 case NFSERR_XATTR2BIG: 4421 error = ENOATTR; 4422 break; 4423 default: 4424 error = nfscl_maperr(ap->a_td, error, 0, 0); 4425 break; 4426 } 4427 return (error); 4428 } 4429 4430 /* 4431 * Return POSIX pathconf information applicable to nfs filesystems. 4432 */ 4433 static int 4434 nfs_pathconf(struct vop_pathconf_args *ap) 4435 { 4436 struct nfsv3_pathconf pc; 4437 struct nfsvattr nfsva; 4438 struct vnode *vp = ap->a_vp; 4439 struct nfsmount *nmp; 4440 struct thread *td = curthread; 4441 off_t off; 4442 bool eof; 4443 int attrflag, error; 4444 4445 if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX || 4446 ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED || 4447 ap->a_name == _PC_NO_TRUNC)) || 4448 (NFS_ISV4(vp) && ap->a_name == _PC_ACL_NFS4)) { 4449 /* 4450 * Since only the above 4 a_names are returned by the NFSv3 4451 * Pathconf RPC, there is no point in doing it for others. 4452 * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can 4453 * be used for _PC_NFS4_ACL as well. 4454 */ 4455 error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva, 4456 &attrflag, NULL); 4457 if (attrflag != 0) 4458 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 4459 1); 4460 if (error != 0) 4461 return (error); 4462 } else { 4463 /* 4464 * For NFSv2 (or NFSv3 when not one of the above 4 a_names), 4465 * just fake them. 4466 */ 4467 pc.pc_linkmax = NFS_LINK_MAX; 4468 pc.pc_namemax = NFS_MAXNAMLEN; 4469 pc.pc_notrunc = 1; 4470 pc.pc_chownrestricted = 1; 4471 pc.pc_caseinsensitive = 0; 4472 pc.pc_casepreserving = 1; 4473 error = 0; 4474 } 4475 switch (ap->a_name) { 4476 case _PC_LINK_MAX: 4477 #ifdef _LP64 4478 *ap->a_retval = pc.pc_linkmax; 4479 #else 4480 *ap->a_retval = MIN(LONG_MAX, pc.pc_linkmax); 4481 #endif 4482 break; 4483 case _PC_NAME_MAX: 4484 *ap->a_retval = pc.pc_namemax; 4485 break; 4486 case _PC_PIPE_BUF: 4487 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) 4488 *ap->a_retval = PIPE_BUF; 4489 else 4490 error = EINVAL; 4491 break; 4492 case _PC_CHOWN_RESTRICTED: 4493 *ap->a_retval = pc.pc_chownrestricted; 4494 break; 4495 case _PC_NO_TRUNC: 4496 *ap->a_retval = pc.pc_notrunc; 4497 break; 4498 case _PC_ACL_NFS4: 4499 if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 && 4500 NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) 4501 *ap->a_retval = 1; 4502 else 4503 *ap->a_retval = 0; 4504 break; 4505 case _PC_ACL_PATH_MAX: 4506 if (NFS_ISV4(vp)) 4507 *ap->a_retval = ACL_MAX_ENTRIES; 4508 else 4509 *ap->a_retval = 3; 4510 break; 4511 case _PC_PRIO_IO: 4512 *ap->a_retval = 0; 4513 break; 4514 case _PC_SYNC_IO: 4515 *ap->a_retval = 0; 4516 break; 4517 case _PC_ALLOC_SIZE_MIN: 4518 *ap->a_retval = vp->v_mount->mnt_stat.f_bsize; 4519 break; 4520 case _PC_FILESIZEBITS: 4521 if (NFS_ISV34(vp)) 4522 *ap->a_retval = 64; 4523 else 4524 *ap->a_retval = 32; 4525 break; 4526 case _PC_REC_INCR_XFER_SIZE: 4527 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 4528 break; 4529 case _PC_REC_MAX_XFER_SIZE: 4530 *ap->a_retval = -1; /* means ``unlimited'' */ 4531 break; 4532 case _PC_REC_MIN_XFER_SIZE: 4533 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 4534 break; 4535 case _PC_REC_XFER_ALIGN: 4536 *ap->a_retval = PAGE_SIZE; 4537 break; 4538 case _PC_SYMLINK_MAX: 4539 *ap->a_retval = NFS_MAXPATHLEN; 4540 break; 4541 case _PC_MIN_HOLE_SIZE: 4542 /* Only some NFSv4.2 servers support Seek for Holes. */ 4543 *ap->a_retval = 0; 4544 nmp = VFSTONFS(vp->v_mount); 4545 if (NFS_ISV4(vp) && nmp->nm_minorvers == NFSV42_MINORVERSION) { 4546 /* 4547 * NFSv4.2 doesn't have an attribute for hole size, 4548 * so all we can do is see if the Seek operation is 4549 * supported and then use f_iosize as a "best guess". 4550 */ 4551 mtx_lock(&nmp->nm_mtx); 4552 if ((nmp->nm_privflag & NFSMNTP_SEEKTESTED) == 0) { 4553 mtx_unlock(&nmp->nm_mtx); 4554 off = 0; 4555 attrflag = 0; 4556 error = nfsrpc_seek(vp, &off, &eof, 4557 NFSV4CONTENT_HOLE, td->td_ucred, &nfsva, 4558 &attrflag); 4559 if (attrflag != 0) 4560 nfscl_loadattrcache(&vp, &nfsva, 4561 NULL, NULL, 0, 1); 4562 mtx_lock(&nmp->nm_mtx); 4563 if (error == NFSERR_NOTSUPP) 4564 nmp->nm_privflag |= NFSMNTP_SEEKTESTED; 4565 else 4566 nmp->nm_privflag |= NFSMNTP_SEEKTESTED | 4567 NFSMNTP_SEEK; 4568 error = 0; 4569 } 4570 if ((nmp->nm_privflag & NFSMNTP_SEEK) != 0) 4571 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 4572 mtx_unlock(&nmp->nm_mtx); 4573 } 4574 break; 4575 4576 default: 4577 error = vop_stdpathconf(ap); 4578 break; 4579 } 4580 return (error); 4581 } 4582