1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from nfs_vnops.c 8.16 (Berkeley) 5/27/95 35 */ 36 37 #include <sys/cdefs.h> 38 /* 39 * vnode op calls for Sun NFS version 2, 3 and 4 40 */ 41 42 #include "opt_inet.h" 43 44 #include <sys/param.h> 45 #include <sys/kernel.h> 46 #include <sys/systm.h> 47 #include <sys/resourcevar.h> 48 #include <sys/proc.h> 49 #include <sys/mount.h> 50 #include <sys/bio.h> 51 #include <sys/buf.h> 52 #include <sys/extattr.h> 53 #include <sys/filio.h> 54 #include <sys/jail.h> 55 #include <sys/malloc.h> 56 #include <sys/mbuf.h> 57 #include <sys/namei.h> 58 #include <sys/socket.h> 59 #include <sys/vnode.h> 60 #include <sys/dirent.h> 61 #include <sys/fcntl.h> 62 #include <sys/lockf.h> 63 #include <sys/stat.h> 64 #include <sys/sysctl.h> 65 #include <sys/signalvar.h> 66 67 #include <vm/vm.h> 68 #include <vm/vm_extern.h> 69 #include <vm/vm_object.h> 70 #include <vm/vnode_pager.h> 71 72 #include <fs/nfs/nfsport.h> 73 #include <fs/nfsclient/nfsnode.h> 74 #include <fs/nfsclient/nfsmount.h> 75 #include <fs/nfsclient/nfs.h> 76 #include <fs/nfsclient/nfs_kdtrace.h> 77 78 #include <net/if.h> 79 #include <netinet/in.h> 80 #include <netinet/in_var.h> 81 82 #include <nfs/nfs_lock.h> 83 84 #ifdef KDTRACE_HOOKS 85 #include <sys/dtrace_bsd.h> 86 87 dtrace_nfsclient_accesscache_flush_probe_func_t 88 dtrace_nfscl_accesscache_flush_done_probe; 89 uint32_t nfscl_accesscache_flush_done_id; 90 91 dtrace_nfsclient_accesscache_get_probe_func_t 92 dtrace_nfscl_accesscache_get_hit_probe, 93 dtrace_nfscl_accesscache_get_miss_probe; 94 uint32_t nfscl_accesscache_get_hit_id; 95 uint32_t nfscl_accesscache_get_miss_id; 96 97 dtrace_nfsclient_accesscache_load_probe_func_t 98 dtrace_nfscl_accesscache_load_done_probe; 99 uint32_t nfscl_accesscache_load_done_id; 100 #endif /* !KDTRACE_HOOKS */ 101 102 /* Defs */ 103 #define TRUE 1 104 #define FALSE 0 105 106 extern struct nfsstatsv1 nfsstatsv1; 107 extern int nfsrv_useacl; 108 extern int nfscl_debuglevel; 109 NFSCLSTATEMUTEX; 110 MALLOC_DECLARE(M_NEWNFSREQ); 111 112 static vop_read_t nfsfifo_read; 113 static vop_write_t nfsfifo_write; 114 static vop_close_t nfsfifo_close; 115 static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *, 116 struct thread *); 117 static int nfs_get_namedattrdir(struct vnode *, struct componentname *, 118 struct vnode **); 119 static vop_lookup_t nfs_lookup; 120 static vop_create_t nfs_create; 121 static vop_mknod_t nfs_mknod; 122 static vop_open_t nfs_open; 123 static vop_pathconf_t nfs_pathconf; 124 static vop_close_t nfs_close; 125 static vop_access_t nfs_access; 126 static vop_getattr_t nfs_getattr; 127 static vop_setattr_t nfs_setattr; 128 static vop_read_t nfs_read; 129 static vop_fsync_t nfs_fsync; 130 static vop_remove_t nfs_remove; 131 static vop_link_t nfs_link; 132 static vop_rename_t nfs_rename; 133 static vop_mkdir_t nfs_mkdir; 134 static vop_rmdir_t nfs_rmdir; 135 static vop_symlink_t nfs_symlink; 136 static vop_readdir_t nfs_readdir; 137 static vop_strategy_t nfs_strategy; 138 static int nfs_lookitup(struct vnode *, char *, int, 139 struct ucred *, struct thread *, struct nfsnode **); 140 static int nfs_sillyrename(struct vnode *, struct vnode *, 141 struct componentname *); 142 static vop_access_t nfsspec_access; 143 static vop_readlink_t nfs_readlink; 144 static vop_print_t nfs_print; 145 static vop_advlock_t nfs_advlock; 146 static vop_advlockasync_t nfs_advlockasync; 147 static vop_getacl_t nfs_getacl; 148 static vop_setacl_t nfs_setacl; 149 static vop_advise_t nfs_advise; 150 static vop_allocate_t nfs_allocate; 151 static vop_deallocate_t nfs_deallocate; 152 static vop_copy_file_range_t nfs_copy_file_range; 153 static vop_ioctl_t nfs_ioctl; 154 static vop_getextattr_t nfs_getextattr; 155 static vop_setextattr_t nfs_setextattr; 156 static vop_listextattr_t nfs_listextattr; 157 static vop_deleteextattr_t nfs_deleteextattr; 158 static vop_delayed_setsize_t nfs_delayed_setsize; 159 160 /* 161 * Global vfs data structures for nfs 162 */ 163 164 static struct vop_vector newnfs_vnodeops_nosig = { 165 .vop_default = &default_vnodeops, 166 .vop_access = nfs_access, 167 .vop_advlock = nfs_advlock, 168 .vop_advlockasync = nfs_advlockasync, 169 .vop_close = nfs_close, 170 .vop_create = nfs_create, 171 .vop_delayed_setsize = nfs_delayed_setsize, 172 .vop_fsync = nfs_fsync, 173 .vop_getattr = nfs_getattr, 174 .vop_getpages = ncl_getpages, 175 .vop_putpages = ncl_putpages, 176 .vop_inactive = ncl_inactive, 177 .vop_link = nfs_link, 178 .vop_lookup = nfs_lookup, 179 .vop_mkdir = nfs_mkdir, 180 .vop_mknod = nfs_mknod, 181 .vop_open = nfs_open, 182 .vop_pathconf = nfs_pathconf, 183 .vop_print = nfs_print, 184 .vop_read = nfs_read, 185 .vop_readdir = nfs_readdir, 186 .vop_readlink = nfs_readlink, 187 .vop_reclaim = ncl_reclaim, 188 .vop_remove = nfs_remove, 189 .vop_rename = nfs_rename, 190 .vop_rmdir = nfs_rmdir, 191 .vop_setattr = nfs_setattr, 192 .vop_strategy = nfs_strategy, 193 .vop_symlink = nfs_symlink, 194 .vop_write = ncl_write, 195 .vop_getacl = nfs_getacl, 196 .vop_setacl = nfs_setacl, 197 .vop_advise = nfs_advise, 198 .vop_allocate = nfs_allocate, 199 .vop_deallocate = nfs_deallocate, 200 .vop_copy_file_range = nfs_copy_file_range, 201 .vop_ioctl = nfs_ioctl, 202 .vop_getextattr = nfs_getextattr, 203 .vop_setextattr = nfs_setextattr, 204 .vop_listextattr = nfs_listextattr, 205 .vop_deleteextattr = nfs_deleteextattr, 206 }; 207 VFS_VOP_VECTOR_REGISTER(newnfs_vnodeops_nosig); 208 209 static int 210 nfs_vnodeops_bypass(struct vop_generic_args *a) 211 { 212 213 return (vop_sigdefer(&newnfs_vnodeops_nosig, a)); 214 } 215 216 struct vop_vector newnfs_vnodeops = { 217 .vop_default = &default_vnodeops, 218 .vop_bypass = nfs_vnodeops_bypass, 219 }; 220 VFS_VOP_VECTOR_REGISTER(newnfs_vnodeops); 221 222 static struct vop_vector newnfs_fifoops_nosig = { 223 .vop_default = &fifo_specops, 224 .vop_access = nfsspec_access, 225 .vop_close = nfsfifo_close, 226 .vop_fsync = nfs_fsync, 227 .vop_getattr = nfs_getattr, 228 .vop_inactive = ncl_inactive, 229 .vop_pathconf = nfs_pathconf, 230 .vop_print = nfs_print, 231 .vop_read = nfsfifo_read, 232 .vop_reclaim = ncl_reclaim, 233 .vop_setattr = nfs_setattr, 234 .vop_write = nfsfifo_write, 235 }; 236 VFS_VOP_VECTOR_REGISTER(newnfs_fifoops_nosig); 237 238 static int 239 nfs_fifoops_bypass(struct vop_generic_args *a) 240 { 241 242 return (vop_sigdefer(&newnfs_fifoops_nosig, a)); 243 } 244 245 struct vop_vector newnfs_fifoops = { 246 .vop_default = &default_vnodeops, 247 .vop_bypass = nfs_fifoops_bypass, 248 }; 249 VFS_VOP_VECTOR_REGISTER(newnfs_fifoops); 250 251 static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, 252 struct componentname *cnp, struct vattr *vap); 253 static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 254 int namelen, struct ucred *cred, struct thread *td, bool silly); 255 static void nfs_removestatus(struct vnode *vp, nfsremove_status file_status, 256 bool silly, struct thread *td); 257 static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, 258 char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp, 259 char *tnameptr, int tnamelen, bool silly, struct ucred *cred, 260 struct thread *td); 261 static int nfs_renameit(struct vnode *sdvp, struct vnode *svp, 262 struct componentname *scnp, struct sillyrename *sp); 263 264 /* 265 * Global variables 266 */ 267 SYSCTL_DECL(_vfs_nfs); 268 269 static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; 270 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 271 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); 272 273 static int nfs_prime_access_cache = 0; 274 SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW, 275 &nfs_prime_access_cache, 0, 276 "Prime NFS ACCESS cache when fetching attributes"); 277 278 static int newnfs_commit_on_close = 0; 279 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_on_close, CTLFLAG_RW, 280 &newnfs_commit_on_close, 0, "write+commit on close, else only write"); 281 282 static int nfs_clean_pages_on_close = 1; 283 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, 284 &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); 285 286 int newnfs_directio_enable = 0; 287 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, 288 &newnfs_directio_enable, 0, "Enable NFS directio"); 289 290 int nfs_keep_dirty_on_error; 291 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_keep_dirty_on_error, CTLFLAG_RW, 292 &nfs_keep_dirty_on_error, 0, "Retry pageout if error returned"); 293 294 /* 295 * This sysctl allows other processes to mmap a file that has been opened 296 * O_DIRECT by a process. In general, having processes mmap the file while 297 * Direct IO is in progress can lead to Data Inconsistencies. But, we allow 298 * this by default to prevent DoS attacks - to prevent a malicious user from 299 * opening up files O_DIRECT preventing other users from mmap'ing these 300 * files. "Protected" environments where stricter consistency guarantees are 301 * required can disable this knob. The process that opened the file O_DIRECT 302 * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not 303 * meaningful. 304 */ 305 int newnfs_directio_allow_mmap = 1; 306 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, 307 &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); 308 309 static uint64_t nfs_maxalloclen = 64 * 1024 * 1024; 310 SYSCTL_U64(_vfs_nfs, OID_AUTO, maxalloclen, CTLFLAG_RW, 311 &nfs_maxalloclen, 0, "NFS max allocate/deallocate length"); 312 313 #define NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY \ 314 | NFSACCESS_EXTEND | NFSACCESS_EXECUTE \ 315 | NFSACCESS_DELETE | NFSACCESS_LOOKUP) 316 317 /* 318 * SMP Locking Note : 319 * The list of locks after the description of the lock is the ordering 320 * of other locks acquired with the lock held. 321 * np->n_mtx : Protects the fields in the nfsnode. 322 VM Object Lock 323 VI_MTX (acquired indirectly) 324 * nmp->nm_mtx : Protects the fields in the nfsmount. 325 rep->r_mtx 326 * ncl_iod_mutex : Global lock, protects shared nfsiod state. 327 * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. 328 nmp->nm_mtx 329 rep->r_mtx 330 * rep->r_mtx : Protects the fields in an nfsreq. 331 */ 332 333 static int 334 nfs_delayed_setsize(struct vop_delayed_setsize_args *ap) 335 { 336 struct vnode *vp; 337 struct nfsnode *np; 338 u_quad_t nsize; 339 340 vp = ap->a_vp; 341 np = VTONFS(vp); 342 if (np != NULL) { 343 NFSLOCKNODE(np); 344 nsize = np->n_size; 345 NFSUNLOCKNODE(np); 346 vnode_pager_setsize(vp, nsize); 347 } 348 return (0); 349 } 350 351 static int 352 nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td, 353 struct ucred *cred, u_int32_t *retmode) 354 { 355 int error = 0, attrflag, i, lrupos; 356 u_int32_t rmode; 357 struct nfsnode *np = VTONFS(vp); 358 struct nfsvattr nfsva; 359 360 error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag, 361 &rmode); 362 if (attrflag) 363 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 364 if (!error) { 365 lrupos = 0; 366 NFSLOCKNODE(np); 367 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 368 if (np->n_accesscache[i].uid == cred->cr_uid) { 369 np->n_accesscache[i].mode = rmode; 370 np->n_accesscache[i].stamp = time_second; 371 break; 372 } 373 if (i > 0 && np->n_accesscache[i].stamp < 374 np->n_accesscache[lrupos].stamp) 375 lrupos = i; 376 } 377 if (i == NFS_ACCESSCACHESIZE) { 378 np->n_accesscache[lrupos].uid = cred->cr_uid; 379 np->n_accesscache[lrupos].mode = rmode; 380 np->n_accesscache[lrupos].stamp = time_second; 381 } 382 NFSUNLOCKNODE(np); 383 if (retmode != NULL) 384 *retmode = rmode; 385 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0); 386 } else if (NFS_ISV4(vp)) { 387 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 388 } 389 #ifdef KDTRACE_HOOKS 390 if (error != 0) 391 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0, 392 error); 393 #endif 394 return (error); 395 } 396 397 /* 398 * nfs access vnode op. 399 * For nfs version 2, just return ok. File accesses may fail later. 400 * For nfs version 3, use the access rpc to check accessibility. If file modes 401 * are changed on the server, accesses might still fail later. 402 */ 403 static int 404 nfs_access(struct vop_access_args *ap) 405 { 406 struct vnode *vp = ap->a_vp; 407 int error = 0, i, gotahit; 408 u_int32_t mode, wmode, rmode; 409 int v34 = NFS_ISV34(vp); 410 struct nfsnode *np = VTONFS(vp); 411 412 /* 413 * Disallow write attempts on filesystems mounted read-only; 414 * unless the file is a socket, fifo, or a block or character 415 * device resident on the filesystem. 416 */ 417 if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS | 418 VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL | 419 VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) { 420 switch (vp->v_type) { 421 case VREG: 422 case VDIR: 423 case VLNK: 424 return (EROFS); 425 default: 426 break; 427 } 428 } 429 430 /* 431 * For NFSv4, check for a delegation with an Allow ACE, to see 432 * if that permits access. 433 */ 434 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) != 0) { 435 error = nfscl_delegacecheck(vp, ap->a_accmode, ap->a_cred); 436 if (error == 0) 437 return (error); 438 error = 0; 439 } 440 441 /* 442 * For nfs v3 or v4, check to see if we have done this recently, and if 443 * so return our cached result instead of making an ACCESS call. 444 * If not, do an access rpc, otherwise you are stuck emulating 445 * ufs_access() locally using the vattr. This may not be correct, 446 * since the server may apply other access criteria such as 447 * client uid-->server uid mapping that we do not know about. 448 */ 449 if (v34) { 450 if (ap->a_accmode & VREAD) 451 mode = NFSACCESS_READ; 452 else 453 mode = 0; 454 if (vp->v_type != VDIR) { 455 if (ap->a_accmode & VWRITE) 456 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 457 if (ap->a_accmode & VAPPEND) 458 mode |= NFSACCESS_EXTEND; 459 if (ap->a_accmode & VEXEC) 460 mode |= NFSACCESS_EXECUTE; 461 if (ap->a_accmode & VDELETE) 462 mode |= NFSACCESS_DELETE; 463 } else { 464 if (ap->a_accmode & VWRITE) 465 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 466 if (ap->a_accmode & VAPPEND) 467 mode |= NFSACCESS_EXTEND; 468 if (ap->a_accmode & VEXEC) 469 mode |= NFSACCESS_LOOKUP; 470 if (ap->a_accmode & VDELETE) 471 mode |= NFSACCESS_DELETE; 472 if (ap->a_accmode & VDELETE_CHILD) 473 mode |= NFSACCESS_MODIFY; 474 } 475 /* XXX safety belt, only make blanket request if caching */ 476 if (nfsaccess_cache_timeout > 0) { 477 wmode = NFSACCESS_READ | NFSACCESS_MODIFY | 478 NFSACCESS_EXTEND | NFSACCESS_EXECUTE | 479 NFSACCESS_DELETE | NFSACCESS_LOOKUP; 480 } else { 481 wmode = mode; 482 } 483 484 /* 485 * Does our cached result allow us to give a definite yes to 486 * this request? 487 */ 488 gotahit = 0; 489 NFSLOCKNODE(np); 490 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 491 if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) { 492 if (time_second < (np->n_accesscache[i].stamp 493 + nfsaccess_cache_timeout) && 494 (np->n_accesscache[i].mode & mode) == mode) { 495 NFSINCRGLOBAL(nfsstatsv1.accesscache_hits); 496 gotahit = 1; 497 } 498 break; 499 } 500 } 501 NFSUNLOCKNODE(np); 502 #ifdef KDTRACE_HOOKS 503 if (gotahit != 0) 504 KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, 505 ap->a_cred->cr_uid, mode); 506 else 507 KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, 508 ap->a_cred->cr_uid, mode); 509 #endif 510 if (gotahit == 0) { 511 /* 512 * Either a no, or a don't know. Go to the wire. 513 */ 514 NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); 515 error = nfs34_access_otw(vp, wmode, ap->a_td, 516 ap->a_cred, &rmode); 517 if (!error && 518 (rmode & mode) != mode) 519 error = EACCES; 520 } 521 return (error); 522 } else { 523 if ((error = nfsspec_access(ap)) != 0) { 524 return (error); 525 } 526 /* 527 * Attempt to prevent a mapped root from accessing a file 528 * which it shouldn't. We try to read a byte from the file 529 * if the user is root and the file is not zero length. 530 * After calling nfsspec_access, we should have the correct 531 * file size cached. 532 */ 533 NFSLOCKNODE(np); 534 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) 535 && VTONFS(vp)->n_size > 0) { 536 struct iovec aiov; 537 struct uio auio; 538 char buf[1]; 539 540 NFSUNLOCKNODE(np); 541 aiov.iov_base = buf; 542 aiov.iov_len = 1; 543 auio.uio_iov = &aiov; 544 auio.uio_iovcnt = 1; 545 auio.uio_offset = 0; 546 auio.uio_resid = 1; 547 auio.uio_segflg = UIO_SYSSPACE; 548 auio.uio_rw = UIO_READ; 549 auio.uio_td = ap->a_td; 550 551 if (vp->v_type == VREG) 552 error = ncl_readrpc(vp, &auio, ap->a_cred); 553 else if (vp->v_type == VDIR) { 554 char* bp; 555 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); 556 aiov.iov_base = bp; 557 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; 558 error = ncl_readdirrpc(vp, &auio, ap->a_cred, 559 ap->a_td); 560 free(bp, M_TEMP); 561 } else if (vp->v_type == VLNK) 562 error = ncl_readlinkrpc(vp, &auio, ap->a_cred); 563 else 564 error = EACCES; 565 } else 566 NFSUNLOCKNODE(np); 567 return (error); 568 } 569 } 570 571 /* 572 * nfs open vnode op 573 * Check to see if the type is ok 574 * and that deletion is not in progress. 575 * For paged in text files, you will need to flush the page cache 576 * if consistency is lost. 577 */ 578 /* ARGSUSED */ 579 static int 580 nfs_open(struct vop_open_args *ap) 581 { 582 struct vnode *vp = ap->a_vp; 583 struct nfsnode *np = VTONFS(vp); 584 struct vattr vattr; 585 int error; 586 int fmode = ap->a_mode; 587 struct ucred *cred; 588 vm_object_t obj; 589 590 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) 591 return (EOPNOTSUPP); 592 593 /* 594 * For NFSv4, we need to do the Open Op before cache validation, 595 * so that we conform to RFC3530 Sec. 9.3.1. 596 */ 597 if (NFS_ISV4(vp)) { 598 error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td); 599 if (error) { 600 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 601 (gid_t)0); 602 return (error); 603 } 604 } 605 606 /* 607 * Now, if this Open will be doing reading, re-validate/flush the 608 * cache, so that Close/Open coherency is maintained. 609 */ 610 NFSLOCKNODE(np); 611 if (np->n_flag & NMODIFIED) { 612 NFSUNLOCKNODE(np); 613 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 614 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 615 if (VN_IS_DOOMED(vp)) 616 return (EBADF); 617 } 618 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 619 if (error == EINTR || error == EIO) { 620 if (NFS_ISV4(vp)) 621 (void) nfsrpc_close(vp, 0, ap->a_td); 622 return (error); 623 } 624 NFSLOCKNODE(np); 625 np->n_attrstamp = 0; 626 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 627 if (vp->v_type == VDIR) 628 np->n_direofoffset = 0; 629 NFSUNLOCKNODE(np); 630 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 631 if (error) { 632 if (NFS_ISV4(vp)) 633 (void) nfsrpc_close(vp, 0, ap->a_td); 634 return (error); 635 } 636 NFSLOCKNODE(np); 637 np->n_mtime = vattr.va_mtime; 638 if (NFS_ISV4(vp)) 639 np->n_change = vattr.va_filerev; 640 } else { 641 NFSUNLOCKNODE(np); 642 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 643 if (error) { 644 if (NFS_ISV4(vp)) 645 (void) nfsrpc_close(vp, 0, ap->a_td); 646 return (error); 647 } 648 NFSLOCKNODE(np); 649 if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) || 650 NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 651 if (vp->v_type == VDIR) 652 np->n_direofoffset = 0; 653 NFSUNLOCKNODE(np); 654 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 655 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 656 if (VN_IS_DOOMED(vp)) 657 return (EBADF); 658 } 659 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 660 if (error == EINTR || error == EIO) { 661 if (NFS_ISV4(vp)) 662 (void) nfsrpc_close(vp, 0, ap->a_td); 663 return (error); 664 } 665 NFSLOCKNODE(np); 666 np->n_mtime = vattr.va_mtime; 667 if (NFS_ISV4(vp)) 668 np->n_change = vattr.va_filerev; 669 } 670 } 671 672 /* 673 * If the object has >= 1 O_DIRECT active opens, we disable caching. 674 */ 675 if (newnfs_directio_enable && (fmode & O_DIRECT) && 676 (vp->v_type == VREG)) { 677 if (np->n_directio_opens == 0) { 678 NFSUNLOCKNODE(np); 679 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 680 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 681 if (VN_IS_DOOMED(vp)) 682 return (EBADF); 683 } 684 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 685 if (error) { 686 if (NFS_ISV4(vp)) 687 (void) nfsrpc_close(vp, 0, ap->a_td); 688 return (error); 689 } 690 NFSLOCKNODE(np); 691 np->n_flag |= NNONCACHE; 692 } 693 np->n_directio_opens++; 694 } 695 696 /* If opened for writing via NFSv4.1 or later, mark that for pNFS. */ 697 if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0) 698 np->n_flag |= NWRITEOPENED; 699 700 /* 701 * If this is an open for writing, capture a reference to the 702 * credentials, so they can be used by ncl_putpages(). Using 703 * these write credentials is preferable to the credentials of 704 * whatever thread happens to be doing the VOP_PUTPAGES() since 705 * the write RPCs are less likely to fail with EACCES. 706 */ 707 if ((fmode & FWRITE) != 0) { 708 cred = np->n_writecred; 709 np->n_writecred = crhold(ap->a_cred); 710 } else 711 cred = NULL; 712 NFSUNLOCKNODE(np); 713 714 if (cred != NULL) 715 crfree(cred); 716 vnode_create_vobject(vp, vattr.va_size, ap->a_td); 717 718 /* 719 * If the text file has been mmap'd, flush any dirty pages to the 720 * buffer cache and then... 721 * Make sure all writes are pushed to the NFS server. If this is not 722 * done, the modify time of the file can change while the text 723 * file is being executed. This will cause the process that is 724 * executing the text file to be terminated. 725 */ 726 if (vp->v_writecount <= -1) { 727 if ((obj = vp->v_object) != NULL && 728 vm_object_mightbedirty(obj)) { 729 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 730 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 731 if (VN_IS_DOOMED(vp)) 732 return (EBADF); 733 } 734 vnode_pager_clean_sync(vp); 735 } 736 737 /* Now, flush the buffer cache. */ 738 ncl_flush(vp, MNT_WAIT, curthread, 0, 0); 739 740 /* And, finally, make sure that n_mtime is up to date. */ 741 np = VTONFS(vp); 742 NFSLOCKNODE(np); 743 np->n_mtime = np->n_vattr.na_mtime; 744 NFSUNLOCKNODE(np); 745 } 746 return (0); 747 } 748 749 /* 750 * nfs close vnode op 751 * What an NFS client should do upon close after writing is a debatable issue. 752 * Most NFS clients push delayed writes to the server upon close, basically for 753 * two reasons: 754 * 1 - So that any write errors may be reported back to the client process 755 * doing the close system call. By far the two most likely errors are 756 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. 757 * 2 - To put a worst case upper bound on cache inconsistency between 758 * multiple clients for the file. 759 * There is also a consistency problem for Version 2 of the protocol w.r.t. 760 * not being able to tell if other clients are writing a file concurrently, 761 * since there is no way of knowing if the changed modify time in the reply 762 * is only due to the write for this client. 763 * (NFS Version 3 provides weak cache consistency data in the reply that 764 * should be sufficient to detect and handle this case.) 765 * 766 * The current code does the following: 767 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers 768 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate 769 * or commit them (this satisfies 1 and 2 except for the 770 * case where the server crashes after this close but 771 * before the commit RPC, which is felt to be "good 772 * enough". Changing the last argument to ncl_flush() to 773 * a 1 would force a commit operation, if it is felt a 774 * commit is necessary now. 775 * for NFS Version 4 - flush the dirty buffers and commit them, if 776 * nfscl_mustflush() says this is necessary. 777 * It is necessary if there is no write delegation held, 778 * in order to satisfy open/close coherency. 779 * If the file isn't cached on local stable storage, 780 * it may be necessary in order to detect "out of space" 781 * errors from the server, if the write delegation 782 * issued by the server doesn't allow the file to grow. 783 */ 784 /* ARGSUSED */ 785 static int 786 nfs_close(struct vop_close_args *ap) 787 { 788 struct vnode *vp = ap->a_vp; 789 struct nfsnode *np = VTONFS(vp); 790 struct nfsvattr nfsva; 791 struct ucred *cred; 792 int error = 0, ret, localcred = 0; 793 int fmode = ap->a_fflag; 794 struct nfsmount *nmp; 795 796 if (NFSCL_FORCEDISM(vp->v_mount)) 797 return (0); 798 nmp = VFSTONFS(vp->v_mount); 799 /* 800 * During shutdown, a_cred isn't valid, so just use root. 801 */ 802 if (ap->a_cred == NOCRED) { 803 cred = newnfs_getcred(); 804 localcred = 1; 805 } else { 806 cred = ap->a_cred; 807 } 808 if (vp->v_type == VREG) { 809 /* 810 * Examine and clean dirty pages, regardless of NMODIFIED. 811 * This closes a major hole in close-to-open consistency. 812 * We want to push out all dirty pages (and buffers) on 813 * close, regardless of whether they were dirtied by 814 * mmap'ed writes or via write(). 815 */ 816 if (nfs_clean_pages_on_close && vp->v_object) { 817 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 818 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 819 if (VN_IS_DOOMED(vp) && ap->a_fflag != FNONBLOCK) 820 return (EBADF); 821 } 822 vnode_pager_clean_async(vp); 823 } 824 NFSLOCKNODE(np); 825 if (np->n_flag & NMODIFIED) { 826 NFSUNLOCKNODE(np); 827 if (NFS_ISV3(vp)) { 828 /* 829 * Under NFSv3 we have dirty buffers to dispose of. We 830 * must flush them to the NFS server. We have the option 831 * of waiting all the way through the commit rpc or just 832 * waiting for the initial write. The default is to only 833 * wait through the initial write so the data is in the 834 * server's cache, which is roughly similar to the state 835 * a standard disk subsystem leaves the file in on close(). 836 * 837 * We cannot clear the NMODIFIED bit in np->n_flag due to 838 * potential races with other processes, and certainly 839 * cannot clear it if we don't commit. 840 * These races occur when there is no longer the old 841 * traditional vnode locking implemented for Vnode Ops. 842 */ 843 int cm = newnfs_commit_on_close ? 1 : 0; 844 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 845 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 846 if (VN_IS_DOOMED(vp) && ap->a_fflag != FNONBLOCK) 847 return (EBADF); 848 } 849 error = ncl_flush(vp, MNT_WAIT, ap->a_td, cm, 0); 850 /* np->n_flag &= ~NMODIFIED; */ 851 } else if (NFS_ISV4(vp)) { 852 if (!NFSHASNFSV4N(nmp) || 853 (nmp->nm_flag & NFSMNT_NOCTO) == 0 || 854 nfscl_mustflush(vp) != 0) { 855 int cm = newnfs_commit_on_close ? 1 : 0; 856 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 857 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 858 if (VN_IS_DOOMED(vp) && ap->a_fflag != 859 FNONBLOCK) 860 return (EBADF); 861 } 862 error = ncl_flush(vp, MNT_WAIT, ap->a_td, 863 cm, 0); 864 /* 865 * as above w.r.t races when clearing 866 * NMODIFIED. 867 * np->n_flag &= ~NMODIFIED; 868 */ 869 } 870 } else { 871 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 872 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); 873 if (VN_IS_DOOMED(vp) && ap->a_fflag != 874 FNONBLOCK) 875 return (EBADF); 876 } 877 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 878 } 879 NFSLOCKNODE(np); 880 } 881 /* 882 * Invalidate the attribute cache in all cases. 883 * An open is going to fetch fresh attrs any way, other procs 884 * on this node that have file open will be forced to do an 885 * otw attr fetch, but this is safe. 886 * --> A user found that their RPC count dropped by 20% when 887 * this was commented out and I can't see any requirement 888 * for it, so I've disabled it when negative lookups are 889 * enabled. (What does this have to do with negative lookup 890 * caching? Well nothing, except it was reported by the 891 * same user that needed negative lookup caching and I wanted 892 * there to be a way to disable it to see if it 893 * is the cause of some caching/coherency issue that might 894 * crop up.) 895 */ 896 if (nmp->nm_negnametimeo == 0) { 897 np->n_attrstamp = 0; 898 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 899 } 900 if (np->n_flag & NWRITEERR) { 901 np->n_flag &= ~NWRITEERR; 902 error = np->n_error; 903 } 904 NFSUNLOCKNODE(np); 905 } 906 907 if (NFS_ISV4(vp)) { 908 /* 909 * Get attributes so "change" is up to date. 910 */ 911 if (error == 0 && nfscl_nodeleg(vp, 0) != 0 && 912 vp->v_type == VREG && 913 (nmp->nm_flag & NFSMNT_NOCTO) == 0) { 914 ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva); 915 if (!ret) { 916 np->n_change = nfsva.na_filerev; 917 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 918 0, 0); 919 } 920 } 921 922 /* 923 * and do the close. 924 */ 925 ret = nfsrpc_close(vp, 0, ap->a_td); 926 if (!error && ret) 927 error = ret; 928 if (error) 929 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 930 (gid_t)0); 931 } 932 if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 933 NFSLOCKNODE(np); 934 KASSERT((np->n_directio_opens > 0), 935 ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); 936 np->n_directio_opens--; 937 if (np->n_directio_opens == 0) 938 np->n_flag &= ~NNONCACHE; 939 NFSUNLOCKNODE(np); 940 } 941 if (localcred) 942 NFSFREECRED(cred); 943 return (error); 944 } 945 946 /* 947 * nfs getattr call from vfs. 948 */ 949 static int 950 nfs_getattr(struct vop_getattr_args *ap) 951 { 952 struct vnode *vp = ap->a_vp; 953 struct thread *td = curthread; /* XXX */ 954 struct nfsnode *np = VTONFS(vp); 955 int error = 0; 956 struct nfsvattr nfsva; 957 struct vattr *vap = ap->a_vap; 958 struct vattr vattr; 959 struct nfsmount *nmp; 960 961 nmp = VFSTONFS(vp->v_mount); 962 /* 963 * Update local times for special files. 964 */ 965 NFSLOCKNODE(np); 966 if (np->n_flag & (NACC | NUPD)) 967 np->n_flag |= NCHG; 968 NFSUNLOCKNODE(np); 969 /* 970 * First look in the cache. 971 * For "syskrb5" mounts, nm_fhsize might still be zero and 972 * cached attributes should be ignored. 973 */ 974 if (nmp->nm_fhsize > 0 && ncl_getattrcache(vp, &vattr) == 0) { 975 ncl_copy_vattr(vp, vap, &vattr); 976 977 /* 978 * Get the local modify time for the case of a write 979 * delegation. 980 */ 981 nfscl_deleggetmodtime(vp, &vap->va_mtime); 982 return (0); 983 } 984 985 if (NFS_ISV34(vp) && nfs_prime_access_cache && 986 nfsaccess_cache_timeout > 0) { 987 NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); 988 nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL); 989 if (ncl_getattrcache(vp, ap->a_vap) == 0) { 990 nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime); 991 return (0); 992 } 993 } 994 995 error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva); 996 if (error == 0) 997 error = nfscl_loadattrcache(&vp, &nfsva, vap, 0, 0); 998 if (!error) { 999 /* 1000 * Get the local modify time for the case of a write 1001 * delegation. 1002 */ 1003 nfscl_deleggetmodtime(vp, &vap->va_mtime); 1004 } else if (NFS_ISV4(vp)) { 1005 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 1006 } 1007 return (error); 1008 } 1009 1010 /* 1011 * nfs setattr call. 1012 */ 1013 static int 1014 nfs_setattr(struct vop_setattr_args *ap) 1015 { 1016 struct vnode *vp = ap->a_vp; 1017 struct nfsnode *np = VTONFS(vp); 1018 struct thread *td = curthread; /* XXX */ 1019 struct vattr *vap = ap->a_vap; 1020 int error = 0; 1021 u_quad_t tsize; 1022 struct timespec ts; 1023 struct nfsmount *nmp; 1024 1025 #ifndef nolint 1026 tsize = (u_quad_t)0; 1027 #endif 1028 1029 /* 1030 * Only setting of UF_ARCHIVE, UF_HIDDEN and UF_SYSTEM are supported and 1031 * only for NFSv4 servers that support them. 1032 */ 1033 nmp = VFSTONFS(vp->v_mount); 1034 if (vap->va_flags != VNOVAL && (!NFSHASNFSV4(nmp) || 1035 (vap->va_flags & ~(UF_ARCHIVE | UF_HIDDEN | UF_SYSTEM)) != 0 || 1036 !NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, NFSATTRBIT_ARCHIVE) || 1037 !NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, NFSATTRBIT_HIDDEN) || 1038 !NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, NFSATTRBIT_SYSTEM))) 1039 return (EOPNOTSUPP); 1040 1041 /* 1042 * Disallow write attempts if the filesystem is mounted read-only. 1043 */ 1044 if ((vap->va_flags != (u_long)VNOVAL || vap->va_uid != (uid_t)VNOVAL || 1045 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 1046 vap->va_mtime.tv_sec != VNOVAL || 1047 vap->va_birthtime.tv_sec != VNOVAL || 1048 vap->va_mode != (mode_t)VNOVAL) && 1049 (vp->v_mount->mnt_flag & MNT_RDONLY)) 1050 return (EROFS); 1051 if (vap->va_size != VNOVAL) { 1052 switch (vp->v_type) { 1053 case VDIR: 1054 return (EISDIR); 1055 case VCHR: 1056 case VBLK: 1057 case VSOCK: 1058 case VFIFO: 1059 if (vap->va_mtime.tv_sec == VNOVAL && 1060 vap->va_atime.tv_sec == VNOVAL && 1061 vap->va_birthtime.tv_sec == VNOVAL && 1062 vap->va_mode == (mode_t)VNOVAL && 1063 vap->va_uid == (uid_t)VNOVAL && 1064 vap->va_gid == (gid_t)VNOVAL) 1065 return (0); 1066 vap->va_size = VNOVAL; 1067 break; 1068 default: 1069 /* 1070 * Disallow write attempts if the filesystem is 1071 * mounted read-only. 1072 */ 1073 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1074 return (EROFS); 1075 /* 1076 * We run vnode_pager_setsize() early (why?), 1077 * we must set np->n_size now to avoid vinvalbuf 1078 * V_SAVE races that might setsize a lower 1079 * value. 1080 */ 1081 NFSLOCKNODE(np); 1082 tsize = np->n_size; 1083 NFSUNLOCKNODE(np); 1084 error = ncl_meta_setsize(vp, td, vap->va_size); 1085 NFSLOCKNODE(np); 1086 if (np->n_flag & NMODIFIED) { 1087 tsize = np->n_size; 1088 NFSUNLOCKNODE(np); 1089 error = ncl_vinvalbuf(vp, vap->va_size == 0 ? 1090 0 : V_SAVE, td, 1); 1091 if (error != 0) { 1092 vnode_pager_setsize(vp, tsize); 1093 return (error); 1094 } 1095 /* 1096 * Call nfscl_delegmodtime() to set the modify time 1097 * locally, as required. 1098 */ 1099 nfscl_delegmodtime(vp, NULL); 1100 } else 1101 NFSUNLOCKNODE(np); 1102 /* 1103 * np->n_size has already been set to vap->va_size 1104 * in ncl_meta_setsize(). We must set it again since 1105 * nfs_loadattrcache() could be called through 1106 * ncl_meta_setsize() and could modify np->n_size. 1107 */ 1108 NFSLOCKNODE(np); 1109 np->n_vattr.na_size = np->n_size = vap->va_size; 1110 NFSUNLOCKNODE(np); 1111 } 1112 } else { 1113 NFSLOCKNODE(np); 1114 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 1115 (np->n_flag & NMODIFIED) && vp->v_type == VREG) { 1116 NFSUNLOCKNODE(np); 1117 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 1118 if (error == EINTR || error == EIO) 1119 return (error); 1120 } else 1121 NFSUNLOCKNODE(np); 1122 } 1123 error = nfs_setattrrpc(vp, vap, ap->a_cred, td); 1124 if (vap->va_size != VNOVAL) { 1125 if (error == 0) { 1126 nanouptime(&ts); 1127 NFSLOCKNODE(np); 1128 np->n_localmodtime = ts; 1129 NFSUNLOCKNODE(np); 1130 } else { 1131 NFSLOCKNODE(np); 1132 np->n_size = np->n_vattr.na_size = tsize; 1133 vnode_pager_setsize(vp, tsize); 1134 NFSUNLOCKNODE(np); 1135 } 1136 } 1137 if (vap->va_mtime.tv_sec != VNOVAL && error == 0) 1138 nfscl_delegmodtime(vp, &vap->va_mtime); 1139 return (error); 1140 } 1141 1142 /* 1143 * Do an nfs setattr rpc. 1144 */ 1145 static int 1146 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred, 1147 struct thread *td) 1148 { 1149 struct nfsnode *np = VTONFS(vp); 1150 int error, ret, attrflag, i; 1151 struct nfsvattr nfsva; 1152 1153 if (NFS_ISV34(vp)) { 1154 NFSLOCKNODE(np); 1155 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) 1156 np->n_accesscache[i].stamp = 0; 1157 np->n_flag |= NDELEGMOD; 1158 NFSUNLOCKNODE(np); 1159 KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); 1160 } 1161 error = nfsrpc_setattr(vp, vap, NULL, 0, cred, td, &nfsva, &attrflag); 1162 if (attrflag) { 1163 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 1164 if (ret && !error) 1165 error = ret; 1166 } 1167 if (error && NFS_ISV4(vp)) 1168 error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid); 1169 return (error); 1170 } 1171 1172 /* 1173 * Get a named attribute directory for the vnode. 1174 */ 1175 static int 1176 nfs_get_namedattrdir(struct vnode *vp, struct componentname *cnp, 1177 struct vnode **vpp) 1178 { 1179 struct nfsfh *nfhp; 1180 struct nfsnode *np; 1181 struct vnode *newvp; 1182 struct nfsvattr nfsva; 1183 int attrflag, error; 1184 1185 attrflag = 0; 1186 *vpp = NULL; 1187 np = VTONFS(vp); 1188 error = nfsrpc_openattr(VFSTONFS(vp->v_mount), vp, np->n_fhp->nfh_fh, 1189 np->n_fhp->nfh_len, (cnp->cn_flags & CREATENAMED), 1190 cnp->cn_cred, curthread, &nfsva, &nfhp, &attrflag); 1191 if (error == NFSERR_NOTSUPP) 1192 error = ENOATTR; 1193 if (error == 0) 1194 error = nfscl_nget(vp->v_mount, vp, nfhp, cnp, curthread, &np, 1195 cnp->cn_lkflags); 1196 if (error != 0) 1197 return (error); 1198 newvp = NFSTOV(np); 1199 vn_irflag_set_cond(newvp, VIRF_NAMEDDIR); 1200 if (attrflag != 0) 1201 (void)nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 1202 *vpp = newvp; 1203 return (0); 1204 } 1205 1206 /* 1207 * nfs lookup call, one step at a time... 1208 * First look in cache 1209 * If not found, unlock the directory nfsnode and do the rpc 1210 */ 1211 static int 1212 nfs_lookup(struct vop_lookup_args *ap) 1213 { 1214 struct componentname *cnp = ap->a_cnp; 1215 struct vnode *dvp = ap->a_dvp; 1216 struct vnode **vpp = ap->a_vpp; 1217 struct mount *mp = dvp->v_mount; 1218 uint64_t flags = cnp->cn_flags; 1219 struct vnode *newvp; 1220 struct nfsmount *nmp; 1221 struct nfsnode *np, *newnp; 1222 int error = 0, attrflag, dattrflag, ltype, ncticks; 1223 struct thread *td = curthread; 1224 struct nfsfh *nfhp; 1225 struct nfsvattr dnfsva, nfsva; 1226 struct vattr vattr; 1227 struct timespec nctime, ts; 1228 uint32_t openmode; 1229 bool is_nameddir, needs_nameddir, opennamed; 1230 1231 dattrflag = 0; 1232 *vpp = NULL; 1233 nmp = VFSTONFS(mp); 1234 opennamed = (flags & (OPENNAMED | ISLASTCN)) == (OPENNAMED | ISLASTCN); 1235 if (opennamed && (!NFSHASNFSV4(nmp) || !NFSHASNFSV4N(nmp))) 1236 return (ENOATTR); 1237 is_nameddir = (vn_irflag_read(dvp) & VIRF_NAMEDDIR) != 0; 1238 if ((is_nameddir && (flags & ISLASTCN) == 0 && (cnp->cn_namelen > 1 || 1239 *cnp->cn_nameptr != '.')) || 1240 (opennamed && !is_nameddir && (flags & ISDOTDOT) != 0)) 1241 return (ENOATTR); 1242 if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && 1243 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 1244 return (EROFS); 1245 np = VTONFS(dvp); 1246 1247 needs_nameddir = false; 1248 if (opennamed || is_nameddir) { 1249 cnp->cn_flags &= ~MAKEENTRY; 1250 if (!is_nameddir) 1251 needs_nameddir = true; 1252 } 1253 1254 /* 1255 * If the named attribute directory is needed, acquire it now. 1256 */ 1257 newvp = NULL; 1258 if (needs_nameddir) { 1259 KASSERT(np->n_v4 == NULL, ("nfs_lookup: O_NAMEDATTR when" 1260 " n_v4 not NULL")); 1261 error = nfs_get_namedattrdir(dvp, cnp, &newvp); 1262 if (error != 0) 1263 goto handle_error; 1264 if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') { 1265 *vpp = newvp; 1266 return (0); 1267 } 1268 dvp = newvp; 1269 np = VTONFS(dvp); 1270 newvp = NULL; 1271 } else if (opennamed && cnp->cn_namelen == 1 && 1272 *cnp->cn_nameptr == '.') { 1273 vref(dvp); 1274 *vpp = dvp; 1275 return (0); 1276 } 1277 1278 if (dvp->v_type != VDIR) 1279 return (ENOTDIR); 1280 1281 /* For NFSv4, wait until any remove is done. */ 1282 NFSLOCKNODE(np); 1283 while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) { 1284 np->n_flag |= NREMOVEWANT; 1285 (void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0); 1286 } 1287 NFSUNLOCKNODE(np); 1288 1289 error = vn_dir_check_exec(dvp, cnp); 1290 if (error != 0) 1291 return (error); 1292 1293 if (!opennamed && !is_nameddir) { 1294 error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks); 1295 if (error > 0 && error != ENOENT) 1296 return (error); 1297 if (error == -1) { 1298 /* 1299 * Lookups of "." are special and always return the 1300 * current directory. cache_lookup() already handles 1301 * associated locking bookkeeping, etc. 1302 */ 1303 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { 1304 return (0); 1305 } 1306 1307 /* 1308 * We only accept a positive hit in the cache if the 1309 * change time of the file matches our cached copy. 1310 * Otherwise, we discard the cache entry and fallback 1311 * to doing a lookup RPC. We also only trust cache 1312 * entries for less than nm_nametimeo seconds. 1313 * 1314 * To better handle stale file handles and attributes, 1315 * clear the attribute cache of this node if it is a 1316 * leaf component, part of an open() call, and not 1317 * locally modified before fetching the attributes. 1318 * This should allow stale file handles to be detected 1319 * here where we can fall back to a LOOKUP RPC to 1320 * recover rather than having nfs_open() detect the 1321 * stale file handle and failing open(2) with ESTALE. 1322 */ 1323 newvp = *vpp; 1324 newnp = VTONFS(newvp); 1325 if (!(nmp->nm_flag & NFSMNT_NOCTO) && 1326 (flags & (ISLASTCN | ISOPEN)) == 1327 (ISLASTCN | ISOPEN) && 1328 !(newnp->n_flag & NMODIFIED)) { 1329 NFSLOCKNODE(newnp); 1330 newnp->n_attrstamp = 0; 1331 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); 1332 NFSUNLOCKNODE(newnp); 1333 } 1334 if (nfscl_nodeleg(newvp, 0) == 0 || 1335 ((u_int)(ticks - ncticks) < 1336 (nmp->nm_nametimeo * hz) && 1337 VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 && 1338 timespeccmp(&vattr.va_ctime, &nctime, ==))) { 1339 NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); 1340 return (0); 1341 } 1342 cache_purge(newvp); 1343 if (dvp != newvp) 1344 vput(newvp); 1345 else 1346 vrele(newvp); 1347 *vpp = NULL; 1348 } else if (error == ENOENT) { 1349 if (VN_IS_DOOMED(dvp)) 1350 return (ENOENT); 1351 /* 1352 * We only accept a negative hit in the cache if the 1353 * modification time of the parent directory matches 1354 * the cached copy in the name cache entry. 1355 * Otherwise, we discard all of the negative cache 1356 * entries for this directory. We also only trust 1357 * negative cache entries for up to nm_negnametimeo 1358 * seconds. 1359 */ 1360 if ((u_int)(ticks - ncticks) < 1361 (nmp->nm_negnametimeo * hz) && 1362 VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && 1363 timespeccmp(&vattr.va_mtime, &nctime, ==)) { 1364 NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); 1365 return (ENOENT); 1366 } 1367 cache_purge_negative(dvp); 1368 } 1369 } 1370 1371 openmode = 0; 1372 #if 0 1373 /* 1374 * The use of LookupOpen breaks some builds. It is disabled 1375 * until that is fixed. 1376 */ 1377 /* 1378 * If this an NFSv4.1/4.2 mount using the "oneopenown" mount 1379 * option, it is possible to do the Open operation in the same 1380 * compound as Lookup, so long as delegations are not being 1381 * issued. This saves doing a separate RPC for Open. 1382 * For pnfs, do not do this, since the Open+LayoutGet will 1383 * be needed as a separate RPC. 1384 */ 1385 NFSLOCKMNT(nmp); 1386 if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp) && !NFSHASPNFS(nmp) && 1387 (nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0 && 1388 (!NFSMNT_RDONLY(mp) || (flags & OPENWRITE) == 0) && 1389 (flags & (ISLASTCN | ISOPEN | OPENNAMED))) == (ISLASTCN | ISOPEN)) { 1390 if ((flags & OPENREAD) != 0) 1391 openmode |= NFSV4OPEN_ACCESSREAD; 1392 if ((flags & OPENWRITE) != 0) 1393 openmode |= NFSV4OPEN_ACCESSWRITE; 1394 } 1395 NFSUNLOCKMNT(nmp); 1396 #endif 1397 1398 newvp = NULL; 1399 NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses); 1400 nanouptime(&ts); 1401 error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1402 cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1403 openmode); 1404 if (dattrflag) 1405 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 1406 if (needs_nameddir) { 1407 vput(dvp); 1408 dvp = ap->a_dvp; 1409 } 1410 handle_error: 1411 if (error) { 1412 if (newvp != NULL) { 1413 vput(newvp); 1414 *vpp = NULL; 1415 } 1416 1417 if (error != ENOENT) { 1418 if (NFS_ISV4(dvp)) 1419 error = nfscl_maperr(td, error, 1420 (uid_t)0, (gid_t)0); 1421 return (error); 1422 } 1423 1424 /* The requested file was not found. */ 1425 if ((cnp->cn_nameiop == CREATE || 1426 cnp->cn_nameiop == RENAME) && 1427 (flags & ISLASTCN)) { 1428 /* 1429 * XXX: UFS does a full VOP_ACCESS(dvp, 1430 * VWRITE) here instead of just checking 1431 * MNT_RDONLY. 1432 */ 1433 if (mp->mnt_flag & MNT_RDONLY) 1434 return (EROFS); 1435 return (EJUSTRETURN); 1436 } 1437 1438 if ((cnp->cn_flags & MAKEENTRY) != 0 && dattrflag && 1439 !NFSHASCASEINSENSITIVE(nmp)) { 1440 /* 1441 * Cache the modification time of the parent 1442 * directory from the post-op attributes in 1443 * the name cache entry. The negative cache 1444 * entry will be ignored once the directory 1445 * has changed. Don't bother adding the entry 1446 * if the directory has already changed. 1447 */ 1448 NFSLOCKNODE(np); 1449 if (timespeccmp(&np->n_vattr.na_mtime, 1450 &dnfsva.na_mtime, ==)) { 1451 NFSUNLOCKNODE(np); 1452 cache_enter_time(dvp, NULL, cnp, 1453 &dnfsva.na_mtime, NULL); 1454 } else 1455 NFSUNLOCKNODE(np); 1456 } 1457 return (ENOENT); 1458 } 1459 1460 /* 1461 * Handle RENAME case... 1462 */ 1463 if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { 1464 if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1465 free(nfhp, M_NFSFH); 1466 return (EISDIR); 1467 } 1468 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, 1469 LK_EXCLUSIVE); 1470 if (error) 1471 return (error); 1472 newvp = NFSTOV(np); 1473 /* 1474 * If n_localmodtime >= time before RPC, then 1475 * a file modification operation, such as 1476 * VOP_SETATTR() of size, has occurred while 1477 * the Lookup RPC and acquisition of the vnode 1478 * happened. As such, the attributes might 1479 * be stale, with possibly an incorrect size. 1480 */ 1481 NFSLOCKNODE(np); 1482 if (timespecisset(&np->n_localmodtime) && 1483 timespeccmp(&np->n_localmodtime, &ts, >=)) { 1484 NFSCL_DEBUG(4, "nfs_lookup: rename localmod " 1485 "stale attributes\n"); 1486 attrflag = 0; 1487 } 1488 NFSUNLOCKNODE(np); 1489 if (attrflag) 1490 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 1491 0, 1); 1492 *vpp = newvp; 1493 return (0); 1494 } 1495 1496 if (flags & ISDOTDOT) { 1497 ltype = NFSVOPISLOCKED(dvp); 1498 error = vfs_busy(mp, MBF_NOWAIT); 1499 if (error != 0) { 1500 vfs_ref(mp); 1501 NFSVOPUNLOCK(dvp); 1502 error = vfs_busy(mp, 0); 1503 NFSVOPLOCK(dvp, ltype | LK_RETRY); 1504 vfs_rel(mp); 1505 if (error == 0 && VN_IS_DOOMED(dvp)) { 1506 vfs_unbusy(mp); 1507 error = ENOENT; 1508 } 1509 if (error != 0) 1510 return (error); 1511 } 1512 NFSVOPUNLOCK(dvp); 1513 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, 1514 cnp->cn_lkflags); 1515 if (error == 0) 1516 newvp = NFSTOV(np); 1517 vfs_unbusy(mp); 1518 if (newvp != dvp) 1519 NFSVOPLOCK(dvp, ltype | LK_RETRY); 1520 if (VN_IS_DOOMED(dvp)) { 1521 if (error == 0) { 1522 if (newvp == dvp) 1523 vrele(newvp); 1524 else 1525 vput(newvp); 1526 } 1527 error = ENOENT; 1528 } 1529 if (error != 0) 1530 return (error); 1531 if (attrflag) 1532 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 1533 0, 1); 1534 } else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1535 free(nfhp, M_NFSFH); 1536 vref(dvp); 1537 newvp = dvp; 1538 if (attrflag) 1539 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 1540 0, 1); 1541 } else { 1542 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, 1543 cnp->cn_lkflags); 1544 if (error) 1545 return (error); 1546 newvp = NFSTOV(np); 1547 if (opennamed) 1548 vn_irflag_set_cond(newvp, VIRF_NAMEDATTR); 1549 /* 1550 * If n_localmodtime >= time before RPC, then 1551 * a file modification operation, such as 1552 * VOP_SETATTR() of size, has occurred while 1553 * the Lookup RPC and acquisition of the vnode 1554 * happened. As such, the attributes might 1555 * be stale, with possibly an incorrect size. 1556 */ 1557 NFSLOCKNODE(np); 1558 if (timespecisset(&np->n_localmodtime) && 1559 timespeccmp(&np->n_localmodtime, &ts, >=)) { 1560 NFSCL_DEBUG(4, "nfs_lookup: localmod " 1561 "stale attributes\n"); 1562 attrflag = 0; 1563 } 1564 NFSUNLOCKNODE(np); 1565 if (attrflag) 1566 (void)nfscl_loadattrcache(&newvp, &nfsva, NULL, 1567 0, 1); 1568 else if ((flags & (ISLASTCN | ISOPEN)) == 1569 (ISLASTCN | ISOPEN) && 1570 !(np->n_flag & NMODIFIED)) { 1571 /* 1572 * Flush the attribute cache when opening a 1573 * leaf node to ensure that fresh attributes 1574 * are fetched in nfs_open() since we did not 1575 * fetch attributes from the LOOKUP reply. 1576 */ 1577 NFSLOCKNODE(np); 1578 np->n_attrstamp = 0; 1579 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); 1580 NFSUNLOCKNODE(np); 1581 } 1582 } 1583 if ((cnp->cn_flags & MAKEENTRY) && dvp != newvp && 1584 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) && 1585 attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0)) 1586 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, 1587 newvp->v_type != VDIR ? NULL : &dnfsva.na_ctime); 1588 *vpp = newvp; 1589 return (0); 1590 } 1591 1592 /* 1593 * nfs read call. 1594 * Just call ncl_bioread() to do the work. 1595 */ 1596 static int 1597 nfs_read(struct vop_read_args *ap) 1598 { 1599 struct vnode *vp = ap->a_vp; 1600 1601 switch (vp->v_type) { 1602 case VREG: 1603 return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); 1604 case VDIR: 1605 return (EISDIR); 1606 default: 1607 return (EOPNOTSUPP); 1608 } 1609 } 1610 1611 /* 1612 * nfs readlink call 1613 */ 1614 static int 1615 nfs_readlink(struct vop_readlink_args *ap) 1616 { 1617 struct vnode *vp = ap->a_vp; 1618 1619 if (vp->v_type != VLNK) 1620 return (EINVAL); 1621 return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred)); 1622 } 1623 1624 /* 1625 * Do a readlink rpc. 1626 * Called by ncl_doio() from below the buffer cache. 1627 */ 1628 int 1629 ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1630 { 1631 int error, ret, attrflag; 1632 struct nfsvattr nfsva; 1633 1634 error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva, 1635 &attrflag); 1636 if (attrflag) { 1637 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 1638 if (ret && !error) 1639 error = ret; 1640 } 1641 if (error && NFS_ISV4(vp)) 1642 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1643 return (error); 1644 } 1645 1646 /* 1647 * nfs read rpc call 1648 * Ditto above 1649 */ 1650 int 1651 ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1652 { 1653 int error, ret, attrflag; 1654 struct nfsvattr nfsva; 1655 struct nfsmount *nmp; 1656 1657 nmp = VFSTONFS(vp->v_mount); 1658 error = EIO; 1659 attrflag = 0; 1660 if (NFSHASPNFS(nmp)) 1661 error = nfscl_doiods(vp, uiop, NULL, NULL, 1662 NFSV4OPEN_ACCESSREAD, 0, cred, uiop->uio_td); 1663 NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error); 1664 if (error != 0 && error != EFAULT) 1665 error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, 1666 &attrflag); 1667 if (attrflag) { 1668 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 1669 if (ret && !error) 1670 error = ret; 1671 } 1672 if (error && NFS_ISV4(vp)) 1673 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1674 return (error); 1675 } 1676 1677 /* 1678 * nfs write call 1679 */ 1680 int 1681 ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 1682 int *iomode, int *must_commit, int called_from_strategy, int ioflag) 1683 { 1684 struct nfsvattr nfsva; 1685 int error, attrflag, ret; 1686 struct nfsmount *nmp; 1687 1688 nmp = VFSTONFS(vp->v_mount); 1689 error = EIO; 1690 attrflag = 0; 1691 if (NFSHASPNFS(nmp)) 1692 error = nfscl_doiods(vp, uiop, iomode, must_commit, 1693 NFSV4OPEN_ACCESSWRITE, 0, cred, uiop->uio_td); 1694 NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error); 1695 if (error != 0 && error != EFAULT) 1696 error = nfsrpc_write(vp, uiop, iomode, must_commit, cred, 1697 uiop->uio_td, &nfsva, &attrflag, called_from_strategy, 1698 ioflag); 1699 if (attrflag) { 1700 if (VTONFS(vp)->n_flag & ND_NFSV4) 1701 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 1, 1); 1702 else 1703 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 1704 if (ret && !error) 1705 error = ret; 1706 } 1707 if (DOINGASYNC(vp)) 1708 *iomode = NFSWRITE_FILESYNC; 1709 if (error && NFS_ISV4(vp)) 1710 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1711 return (error); 1712 } 1713 1714 /* 1715 * nfs mknod rpc 1716 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the 1717 * mode set to specify the file type and the size field for rdev. 1718 */ 1719 static int 1720 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, 1721 struct vattr *vap) 1722 { 1723 struct nfsvattr nfsva, dnfsva; 1724 struct vnode *newvp = NULL; 1725 struct nfsnode *np = NULL, *dnp; 1726 struct nfsfh *nfhp; 1727 struct vattr vattr; 1728 int error = 0, attrflag, dattrflag; 1729 u_int32_t rdev; 1730 1731 if (VATTR_ISDEV(vap)) 1732 rdev = vap->va_rdev; 1733 else if (vap->va_type == VFIFO || vap->va_type == VSOCK) 1734 rdev = 0xffffffff; 1735 else 1736 return (EOPNOTSUPP); 1737 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1738 return (error); 1739 error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, 1740 rdev, vap->va_type, cnp->cn_cred, curthread, &dnfsva, 1741 &nfsva, &nfhp, &attrflag, &dattrflag); 1742 if (!error) { 1743 if (!nfhp) 1744 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1745 cnp->cn_namelen, cnp->cn_cred, curthread, 1746 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 0); 1747 if (nfhp) 1748 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1749 curthread, &np, LK_EXCLUSIVE); 1750 } 1751 if (dattrflag) 1752 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 1753 if (!error) { 1754 newvp = NFSTOV(np); 1755 if (attrflag != 0) { 1756 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 1757 if (error != 0) 1758 vput(newvp); 1759 } 1760 } 1761 if (!error) { 1762 *vpp = newvp; 1763 } else if (NFS_ISV4(dvp)) { 1764 error = nfscl_maperr(curthread, error, vap->va_uid, 1765 vap->va_gid); 1766 } 1767 dnp = VTONFS(dvp); 1768 NFSLOCKNODE(dnp); 1769 dnp->n_flag |= NMODIFIED; 1770 if (!dattrflag) { 1771 dnp->n_attrstamp = 0; 1772 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1773 } 1774 NFSUNLOCKNODE(dnp); 1775 return (error); 1776 } 1777 1778 /* 1779 * nfs mknod vop 1780 * just call nfs_mknodrpc() to do the work. 1781 */ 1782 /* ARGSUSED */ 1783 static int 1784 nfs_mknod(struct vop_mknod_args *ap) 1785 { 1786 return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); 1787 } 1788 1789 static struct mtx nfs_cverf_mtx; 1790 MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex", 1791 MTX_DEF); 1792 1793 static nfsquad_t 1794 nfs_get_cverf(void) 1795 { 1796 static nfsquad_t cverf; 1797 nfsquad_t ret; 1798 static int cverf_initialized = 0; 1799 1800 mtx_lock(&nfs_cverf_mtx); 1801 if (cverf_initialized == 0) { 1802 cverf.lval[0] = arc4random(); 1803 cverf.lval[1] = arc4random(); 1804 cverf_initialized = 1; 1805 } else 1806 cverf.qval++; 1807 ret = cverf; 1808 mtx_unlock(&nfs_cverf_mtx); 1809 1810 return (ret); 1811 } 1812 1813 /* 1814 * nfs file create call 1815 */ 1816 static int 1817 nfs_create(struct vop_create_args *ap) 1818 { 1819 struct vnode *dvp = ap->a_dvp; 1820 struct vattr *vap = ap->a_vap; 1821 struct componentname *cnp = ap->a_cnp; 1822 struct nfsnode *np = NULL, *dnp; 1823 struct vnode *newvp = NULL; 1824 struct nfsmount *nmp; 1825 struct nfsvattr dnfsva, nfsva; 1826 struct nfsfh *nfhp; 1827 nfsquad_t cverf; 1828 int error = 0, attrflag, dattrflag, fmode = 0; 1829 struct vattr vattr; 1830 bool is_nameddir, needs_nameddir, opennamed; 1831 1832 /* 1833 * Oops, not for me.. 1834 */ 1835 if (vap->va_type == VSOCK) 1836 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); 1837 1838 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1839 return (error); 1840 if (vap->va_vaflags & VA_EXCLUSIVE) 1841 fmode |= O_EXCL; 1842 dnp = VTONFS(dvp); 1843 nmp = VFSTONFS(dvp->v_mount); 1844 needs_nameddir = false; 1845 if (NFSHASNFSV4(nmp) && NFSHASNFSV4N(nmp)) { 1846 opennamed = (cnp->cn_flags & (OPENNAMED | ISLASTCN)) == 1847 (OPENNAMED | ISLASTCN); 1848 is_nameddir = (vn_irflag_read(dvp) & VIRF_NAMEDDIR) != 0; 1849 if (opennamed || is_nameddir) { 1850 cnp->cn_flags &= ~MAKEENTRY; 1851 if (!is_nameddir) 1852 needs_nameddir = true; 1853 } 1854 } 1855 1856 /* 1857 * If the named attribute directory is needed, acquire it now. 1858 */ 1859 if (needs_nameddir) { 1860 KASSERT(dnp->n_v4 == NULL, ("nfs_create: O_NAMEDATTR when" 1861 " n_v4 not NULL")); 1862 error = nfs_get_namedattrdir(dvp, cnp, &newvp); 1863 if (error != 0) 1864 return (error); 1865 dvp = newvp; 1866 dnp = VTONFS(dvp); 1867 newvp = NULL; 1868 } 1869 1870 again: 1871 /* For NFSv4, wait until any remove is done. */ 1872 NFSLOCKNODE(dnp); 1873 while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) { 1874 dnp->n_flag |= NREMOVEWANT; 1875 (void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0); 1876 } 1877 NFSUNLOCKNODE(dnp); 1878 1879 cverf = nfs_get_cverf(); 1880 error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1881 vap, cverf, fmode, cnp->cn_cred, curthread, &dnfsva, &nfsva, 1882 &nfhp, &attrflag, &dattrflag); 1883 if (!error) { 1884 if (nfhp == NULL) 1885 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1886 cnp->cn_namelen, cnp->cn_cred, curthread, 1887 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 0); 1888 if (nfhp != NULL) 1889 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1890 curthread, &np, LK_EXCLUSIVE); 1891 } 1892 if (dattrflag) 1893 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 1894 if (!error) { 1895 newvp = NFSTOV(np); 1896 if (attrflag == 0) 1897 error = nfsrpc_getattr(newvp, cnp->cn_cred, curthread, 1898 &nfsva); 1899 if (error == 0) 1900 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 1901 } 1902 if (error) { 1903 if (newvp != NULL) { 1904 vput(newvp); 1905 newvp = NULL; 1906 } 1907 if (NFS_ISV34(dvp) && (fmode & O_EXCL) && 1908 error == NFSERR_NOTSUPP) { 1909 fmode &= ~O_EXCL; 1910 goto again; 1911 } 1912 } else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) { 1913 if (nfscl_checksattr(vap, &nfsva)) { 1914 error = nfsrpc_setattr(newvp, vap, NULL, 0, 1915 cnp->cn_cred, curthread, &nfsva, &attrflag); 1916 if (error && (vap->va_uid != (uid_t)VNOVAL || 1917 vap->va_gid != (gid_t)VNOVAL)) { 1918 /* try again without setting uid/gid */ 1919 vap->va_uid = (uid_t)VNOVAL; 1920 vap->va_gid = (uid_t)VNOVAL; 1921 error = nfsrpc_setattr(newvp, vap, NULL, 0, 1922 cnp->cn_cred, curthread, &nfsva, &attrflag); 1923 } 1924 if (attrflag) 1925 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 1926 0, 1); 1927 if (error != 0) 1928 vput(newvp); 1929 } 1930 } 1931 if (!error) { 1932 if ((cnp->cn_flags & MAKEENTRY) && attrflag) { 1933 if (dvp != newvp) 1934 cache_enter_time(dvp, newvp, cnp, 1935 &nfsva.na_ctime, NULL); 1936 else 1937 printf("nfs_create: bogus NFS server returned " 1938 "the directory as the new file object\n"); 1939 } 1940 *ap->a_vpp = newvp; 1941 } else if (NFS_ISV4(dvp)) { 1942 error = nfscl_maperr(curthread, error, vap->va_uid, 1943 vap->va_gid); 1944 } 1945 NFSLOCKNODE(dnp); 1946 dnp->n_flag |= NMODIFIED; 1947 if (!dattrflag) { 1948 dnp->n_attrstamp = 0; 1949 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1950 } 1951 NFSUNLOCKNODE(dnp); 1952 if (needs_nameddir) 1953 vput(dvp); 1954 return (error); 1955 } 1956 1957 /* 1958 * nfs file remove call 1959 * To try and make nfs semantics closer to ufs semantics, a file that has 1960 * other processes using the vnode is renamed instead of removed and then 1961 * removed later on the last close. 1962 * - If v_usecount > 1 1963 * If a rename is not already in the works 1964 * call nfs_sillyrename() to set it up 1965 * else 1966 * do the remove rpc 1967 */ 1968 static int 1969 nfs_remove(struct vop_remove_args *ap) 1970 { 1971 struct vnode *vp = ap->a_vp; 1972 struct vnode *dvp = ap->a_dvp; 1973 struct componentname *cnp = ap->a_cnp; 1974 struct nfsnode *np = VTONFS(vp); 1975 int error = 0; 1976 struct vattr vattr; 1977 struct nfsmount *nmp; 1978 1979 KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount")); 1980 if (vp->v_type == VDIR) 1981 error = EPERM; 1982 else if (vrefcnt(vp) == 1 || (np->n_sillyrename && 1983 VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 && 1984 vattr.va_nlink > 1)) { 1985 nmp = VFSTONFS(vp->v_mount); 1986 /* 1987 * Purge the name cache so that the chance of a lookup for 1988 * the name succeeding while the remove is in progress is 1989 * minimized. Without node locking it can still happen, such 1990 * that an I/O op returns ESTALE, but since you get this if 1991 * another host removes the file.. 1992 */ 1993 cache_purge(vp); 1994 /* 1995 * throw away biocache buffers, mainly to avoid 1996 * unnecessary delayed writes later. 1997 * Flushing here would be more correct for the case 1998 * where nfs_close() did not do a flush. However, it 1999 * could be a large performance hit for some servers 2000 * and only matters when the file name being removed is 2001 * one of multiple hard links. 2002 */ 2003 if (!NFSHASNFSV4(nmp) || !NFSHASNFSV4N(nmp) || 2004 (nmp->nm_flag & NFSMNT_NOCTO) == 0) 2005 error = ncl_vinvalbuf(vp, 0, curthread, 1); 2006 if (error != EINTR && error != EIO) 2007 /* Do the rpc */ 2008 error = nfs_removerpc(dvp, vp, cnp->cn_nameptr, 2009 cnp->cn_namelen, cnp->cn_cred, curthread, false); 2010 /* 2011 * Kludge City: If the first reply to the remove rpc is lost.. 2012 * the reply to the retransmitted request will be ENOENT 2013 * since the file was in fact removed 2014 * Therefore, we cheat and return success. 2015 */ 2016 if (error == ENOENT) 2017 error = 0; 2018 } else if (!np->n_sillyrename) 2019 error = nfs_sillyrename(dvp, vp, cnp); 2020 NFSLOCKNODE(np); 2021 np->n_attrstamp = 0; 2022 NFSUNLOCKNODE(np); 2023 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 2024 return (error); 2025 } 2026 2027 /* 2028 * nfs file remove rpc called from nfs_inactive 2029 */ 2030 int 2031 ncl_removeit(struct sillyrename *sp, struct vnode *vp) 2032 { 2033 /* 2034 * Make sure that the directory vnode is still valid. 2035 * XXX we should lock sp->s_dvp here. 2036 */ 2037 if (sp->s_dvp->v_type == VBAD) 2038 return (0); 2039 return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen, 2040 sp->s_cred, NULL, true)); 2041 } 2042 2043 /* 2044 * Handle the nfsremove_status reply from the RPC function. 2045 */ 2046 static void 2047 nfs_removestatus(struct vnode *vp, nfsremove_status file_status, 2048 bool silly, struct thread *td) 2049 { 2050 2051 switch (file_status) { 2052 case NLINK_ZERO: 2053 /* Get rid of any delegation. */ 2054 nfscl_delegreturnvp(vp, false, td); 2055 /* FALLTHROUGH */ 2056 case DELETED: 2057 /* Throw away buffer cache blocks. */ 2058 (void)ncl_vinvalbuf(vp, 0, td, 1); 2059 break; 2060 case VALID: 2061 /* Nothing to do, delegation is still ok. */ 2062 break; 2063 default: 2064 break; 2065 } 2066 } 2067 2068 /* 2069 * Nfs remove rpc, called from nfs_remove() and ncl_removeit(). 2070 */ 2071 static int 2072 nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 2073 int namelen, struct ucred *cred, struct thread *td, bool silly) 2074 { 2075 struct nfsvattr dnfsva, nfsva; 2076 struct nfsnode *dnp = VTONFS(dvp); 2077 struct nfsmount *nmp; 2078 int attrflag, error = 0, dattrflag; 2079 nfsremove_status file_status; 2080 2081 nmp = VFSTONFS(dvp->v_mount); 2082 NFSLOCKNODE(dnp); 2083 dnp->n_flag |= NREMOVEINPROG; 2084 NFSUNLOCKNODE(dnp); 2085 error = nfsrpc_remove(dvp, name, namelen, vp, &nfsva, &attrflag, 2086 &file_status, &dnfsva, &dattrflag, cred, td); 2087 NFSLOCKNODE(dnp); 2088 if ((dnp->n_flag & NREMOVEWANT)) { 2089 dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG); 2090 NFSUNLOCKNODE(dnp); 2091 wakeup((caddr_t)dnp); 2092 } else { 2093 dnp->n_flag &= ~NREMOVEINPROG; 2094 NFSUNLOCKNODE(dnp); 2095 } 2096 2097 if (NFSHASNFSV4(nmp) && NFSHASNFSV4N(nmp)) { 2098 if (file_status != DELETED && attrflag != 0) 2099 (void)nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 2100 if ((nmp->nm_flag & NFSMNT_NOCTO) != 0) 2101 nfs_removestatus(vp, file_status, silly, td); 2102 } 2103 2104 if (dattrflag != 0) 2105 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 2106 NFSLOCKNODE(dnp); 2107 dnp->n_flag |= NMODIFIED; 2108 if (dattrflag == 0) { 2109 dnp->n_attrstamp = 0; 2110 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2111 } 2112 NFSUNLOCKNODE(dnp); 2113 if (error && NFS_ISV4(dvp)) 2114 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2115 return (error); 2116 } 2117 2118 /* 2119 * nfs file rename call 2120 */ 2121 static int 2122 nfs_rename(struct vop_rename_args *ap) 2123 { 2124 struct vnode *fvp = ap->a_fvp; 2125 struct vnode *tvp = ap->a_tvp; 2126 struct vnode *fdvp = ap->a_fdvp; 2127 struct vnode *tdvp = ap->a_tdvp; 2128 struct componentname *tcnp = ap->a_tcnp; 2129 struct componentname *fcnp = ap->a_fcnp; 2130 struct nfsnode *fnp = VTONFS(ap->a_fvp); 2131 struct nfsnode *tdnp = VTONFS(ap->a_tdvp); 2132 struct nfsv4node *newv4 = NULL; 2133 struct nfsmount *nmp; 2134 int error; 2135 2136 /* Check for cross-device rename */ 2137 if ((fvp->v_mount != tdvp->v_mount) || 2138 (tvp && (fvp->v_mount != tvp->v_mount))) { 2139 error = EXDEV; 2140 goto out; 2141 } 2142 2143 if (ap->a_flags != 0) { 2144 error = EOPNOTSUPP; 2145 goto out; 2146 } 2147 2148 nmp = VFSTONFS(fvp->v_mount); 2149 2150 if (fvp == tvp) { 2151 printf("nfs_rename: fvp == tvp (can't happen)\n"); 2152 error = 0; 2153 goto out; 2154 } 2155 if ((error = NFSVOPLOCK(fvp, LK_EXCLUSIVE)) != 0) 2156 goto out; 2157 2158 /* 2159 * For case insensitive file systems, there may be multiple 2160 * names cached for the one name being rename'd, so purge 2161 * all names from the cache. 2162 */ 2163 if (NFSHASCASEINSENSITIVE(nmp)) 2164 cache_purge(fvp); 2165 2166 /* 2167 * We have to flush B_DELWRI data prior to renaming 2168 * the file. If we don't, the delayed-write buffers 2169 * can be flushed out later after the file has gone stale 2170 * under NFSV3. NFSV2 does not have this problem because 2171 * ( as far as I can tell ) it flushes dirty buffers more 2172 * often. 2173 * 2174 * Skip the rename operation if the fsync fails, this can happen 2175 * due to the server's volume being full, when we pushed out data 2176 * that was written back to our cache earlier. Not checking for 2177 * this condition can result in potential (silent) data loss. 2178 */ 2179 if ((nmp->nm_flag & NFSMNT_NOCTO) == 0 || !NFSHASNFSV4(nmp) || 2180 !NFSHASNFSV4N(nmp) || nfscl_mustflush(fvp) != 0) 2181 error = VOP_FSYNC(fvp, MNT_WAIT, curthread); 2182 2183 NFSVOPUNLOCK(fvp); 2184 if (error == 0 && tvp != NULL && ((nmp->nm_flag & NFSMNT_NOCTO) == 0 || 2185 !NFSHASNFSV4(nmp) || !NFSHASNFSV4N(nmp) || 2186 nfscl_mustflush(tvp) != 0)) 2187 error = VOP_FSYNC(tvp, MNT_WAIT, curthread); 2188 if (error != 0) 2189 goto out; 2190 2191 /* 2192 * If the tvp exists and is in use, sillyrename it before doing the 2193 * rename of the new file over it. 2194 * XXX Can't sillyrename a directory. 2195 */ 2196 if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && 2197 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { 2198 vput(tvp); 2199 tvp = NULL; 2200 } 2201 2202 error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen, 2203 tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, false, tcnp->cn_cred, 2204 curthread); 2205 2206 if (error == 0 && NFS_ISV4(tdvp)) { 2207 /* 2208 * For NFSv4, check to see if it is the same name and 2209 * replace the name, if it is different. 2210 */ 2211 newv4 = malloc( 2212 sizeof (struct nfsv4node) + 2213 tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1, 2214 M_NFSV4NODE, M_WAITOK); 2215 NFSLOCKNODE(tdnp); 2216 NFSLOCKNODE(fnp); 2217 if (fnp->n_v4 != NULL && fvp->v_type == VREG && 2218 (fnp->n_v4->n4_namelen != tcnp->cn_namelen || 2219 NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4), 2220 tcnp->cn_namelen) || 2221 tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen || 2222 NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 2223 tdnp->n_fhp->nfh_len))) { 2224 free(fnp->n_v4, M_NFSV4NODE); 2225 fnp->n_v4 = newv4; 2226 newv4 = NULL; 2227 fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len; 2228 fnp->n_v4->n4_namelen = tcnp->cn_namelen; 2229 NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 2230 tdnp->n_fhp->nfh_len); 2231 NFSBCOPY(tcnp->cn_nameptr, 2232 NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen); 2233 } 2234 NFSUNLOCKNODE(tdnp); 2235 NFSUNLOCKNODE(fnp); 2236 if (newv4 != NULL) 2237 free(newv4, M_NFSV4NODE); 2238 } 2239 2240 if (fvp->v_type == VDIR) { 2241 if (tvp != NULL && tvp->v_type == VDIR) 2242 cache_purge(tdvp); 2243 cache_purge(fdvp); 2244 } 2245 2246 out: 2247 if (tdvp == tvp) 2248 vrele(tdvp); 2249 else 2250 vput(tdvp); 2251 if (tvp) 2252 vput(tvp); 2253 vrele(fdvp); 2254 vrele(fvp); 2255 /* 2256 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. 2257 */ 2258 if (error == ENOENT) 2259 error = 0; 2260 return (error); 2261 } 2262 2263 /* 2264 * nfs file rename rpc called from nfs_remove() above 2265 */ 2266 static int 2267 nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp, 2268 struct sillyrename *sp) 2269 { 2270 2271 return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen, 2272 sdvp, NULL, sp->s_name, sp->s_namlen, true, scnp->cn_cred, 2273 curthread)); 2274 } 2275 2276 /* 2277 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). 2278 */ 2279 static int 2280 nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, 2281 int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr, 2282 int tnamelen, bool silly, struct ucred *cred, struct thread *td) 2283 { 2284 struct nfsvattr fnfsva, tnfsva, tvpnfsva; 2285 struct nfsnode *fdnp = VTONFS(fdvp); 2286 struct nfsnode *tdnp = VTONFS(tdvp); 2287 struct nfsmount *nmp; 2288 int error = 0, fattrflag, tattrflag, tvpattrflag; 2289 nfsremove_status tvp_status; 2290 2291 nmp = VFSTONFS(fdvp->v_mount); 2292 error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp, 2293 tnameptr, tnamelen, &tvp_status, &fnfsva, &tnfsva, &fattrflag, 2294 &tattrflag, &tvpnfsva, &tvpattrflag, cred, td); 2295 NFSLOCKNODE(fdnp); 2296 fdnp->n_flag |= NMODIFIED; 2297 if (fattrflag != 0) { 2298 NFSUNLOCKNODE(fdnp); 2299 (void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, 0, 1); 2300 } else { 2301 fdnp->n_attrstamp = 0; 2302 NFSUNLOCKNODE(fdnp); 2303 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp); 2304 } 2305 NFSLOCKNODE(tdnp); 2306 tdnp->n_flag |= NMODIFIED; 2307 if (tattrflag != 0) { 2308 NFSUNLOCKNODE(tdnp); 2309 (void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, 0, 1); 2310 } else { 2311 tdnp->n_attrstamp = 0; 2312 NFSUNLOCKNODE(tdnp); 2313 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 2314 } 2315 2316 if (tvp != NULL) { 2317 if (NFSHASNFSV4(nmp) && NFSHASNFSV4N(nmp) && 2318 (nmp->nm_flag & NFSMNT_NOCTO) != 0) 2319 nfs_removestatus(tvp, tvp_status, silly, td); 2320 if (!silly && tvpattrflag != 0) 2321 (void)nfscl_loadattrcache(&tvp, &tvpnfsva, NULL, 0, 1); 2322 } 2323 2324 if (error && NFS_ISV4(fdvp)) 2325 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2326 return (error); 2327 } 2328 2329 /* 2330 * nfs hard link create call 2331 */ 2332 static int 2333 nfs_link(struct vop_link_args *ap) 2334 { 2335 struct vnode *vp = ap->a_vp; 2336 struct vnode *tdvp = ap->a_tdvp; 2337 struct componentname *cnp = ap->a_cnp; 2338 struct nfsnode *np, *tdnp; 2339 struct nfsvattr nfsva, dnfsva; 2340 int error = 0, attrflag, dattrflag; 2341 2342 /* 2343 * Push all writes to the server, so that the attribute cache 2344 * doesn't get "out of sync" with the server. 2345 * XXX There should be a better way! 2346 */ 2347 #ifdef notnow 2348 VOP_FSYNC(vp, MNT_WAIT, curthread); 2349 #endif 2350 2351 error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen, 2352 cnp->cn_cred, curthread, &dnfsva, &nfsva, &attrflag, &dattrflag); 2353 tdnp = VTONFS(tdvp); 2354 NFSLOCKNODE(tdnp); 2355 tdnp->n_flag |= NMODIFIED; 2356 if (dattrflag != 0) { 2357 NFSUNLOCKNODE(tdnp); 2358 (void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, 0, 1); 2359 } else { 2360 tdnp->n_attrstamp = 0; 2361 NFSUNLOCKNODE(tdnp); 2362 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 2363 } 2364 if (attrflag) 2365 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 2366 else { 2367 np = VTONFS(vp); 2368 NFSLOCKNODE(np); 2369 np->n_attrstamp = 0; 2370 NFSUNLOCKNODE(np); 2371 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 2372 } 2373 /* 2374 * If negative lookup caching is enabled, I might as well 2375 * add an entry for this node. Not necessary for correctness, 2376 * but if negative caching is enabled, then the system 2377 * must care about lookup caching hit rate, so... 2378 */ 2379 if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 && 2380 (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { 2381 if (tdvp != vp) 2382 cache_enter_time(tdvp, vp, cnp, &nfsva.na_ctime, NULL); 2383 else 2384 printf("nfs_link: bogus NFS server returned " 2385 "the directory as the new link\n"); 2386 } 2387 if (error && NFS_ISV4(vp)) 2388 error = nfscl_maperr(curthread, error, (uid_t)0, 2389 (gid_t)0); 2390 return (error); 2391 } 2392 2393 /* 2394 * nfs symbolic link create call 2395 */ 2396 static int 2397 nfs_symlink(struct vop_symlink_args *ap) 2398 { 2399 struct vnode *dvp = ap->a_dvp; 2400 struct vattr *vap = ap->a_vap; 2401 struct componentname *cnp = ap->a_cnp; 2402 struct nfsvattr nfsva, dnfsva; 2403 struct nfsfh *nfhp; 2404 struct nfsnode *np = NULL, *dnp; 2405 struct vnode *newvp = NULL; 2406 int error = 0, attrflag, dattrflag, ret; 2407 2408 vap->va_type = VLNK; 2409 error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2410 ap->a_target, vap, cnp->cn_cred, curthread, &dnfsva, 2411 &nfsva, &nfhp, &attrflag, &dattrflag); 2412 if (nfhp) { 2413 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, curthread, 2414 &np, LK_EXCLUSIVE); 2415 if (!ret) 2416 newvp = NFSTOV(np); 2417 else if (!error) 2418 error = ret; 2419 } 2420 if (newvp != NULL) { 2421 if (attrflag) 2422 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 2423 } else if (!error) { 2424 /* 2425 * If we do not have an error and we could not extract the 2426 * newvp from the response due to the request being NFSv2, we 2427 * have to do a lookup in order to obtain a newvp to return. 2428 */ 2429 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2430 cnp->cn_cred, curthread, &np); 2431 if (!error) 2432 newvp = NFSTOV(np); 2433 } 2434 if (error) { 2435 if (newvp) 2436 vput(newvp); 2437 if (NFS_ISV4(dvp)) 2438 error = nfscl_maperr(curthread, error, 2439 vap->va_uid, vap->va_gid); 2440 } else { 2441 *ap->a_vpp = newvp; 2442 } 2443 2444 dnp = VTONFS(dvp); 2445 NFSLOCKNODE(dnp); 2446 dnp->n_flag |= NMODIFIED; 2447 if (dattrflag != 0) { 2448 NFSUNLOCKNODE(dnp); 2449 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 2450 } else { 2451 dnp->n_attrstamp = 0; 2452 NFSUNLOCKNODE(dnp); 2453 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2454 } 2455 /* 2456 * If negative lookup caching is enabled, I might as well 2457 * add an entry for this node. Not necessary for correctness, 2458 * but if negative caching is enabled, then the system 2459 * must care about lookup caching hit rate, so... 2460 */ 2461 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 2462 (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { 2463 if (dvp != newvp) 2464 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, 2465 NULL); 2466 else 2467 printf("nfs_symlink: bogus NFS server returned " 2468 "the directory as the new file object\n"); 2469 } 2470 return (error); 2471 } 2472 2473 /* 2474 * nfs make dir call 2475 */ 2476 static int 2477 nfs_mkdir(struct vop_mkdir_args *ap) 2478 { 2479 struct vnode *dvp = ap->a_dvp; 2480 struct vattr *vap = ap->a_vap; 2481 struct componentname *cnp = ap->a_cnp; 2482 struct nfsnode *np = NULL, *dnp; 2483 struct vnode *newvp = NULL; 2484 struct vattr vattr; 2485 struct nfsfh *nfhp; 2486 struct nfsvattr nfsva, dnfsva; 2487 int error = 0, attrflag, dattrflag, ret; 2488 2489 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) 2490 return (error); 2491 vap->va_type = VDIR; 2492 error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2493 vap, cnp->cn_cred, curthread, &dnfsva, &nfsva, &nfhp, 2494 &attrflag, &dattrflag); 2495 dnp = VTONFS(dvp); 2496 NFSLOCKNODE(dnp); 2497 dnp->n_flag |= NMODIFIED; 2498 if (dattrflag != 0) { 2499 NFSUNLOCKNODE(dnp); 2500 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 2501 } else { 2502 dnp->n_attrstamp = 0; 2503 NFSUNLOCKNODE(dnp); 2504 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2505 } 2506 if (nfhp) { 2507 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, curthread, 2508 &np, LK_EXCLUSIVE); 2509 if (!ret) { 2510 newvp = NFSTOV(np); 2511 if (attrflag) 2512 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 2513 0, 1); 2514 } else if (!error) 2515 error = ret; 2516 } 2517 if (!error && newvp == NULL) { 2518 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2519 cnp->cn_cred, curthread, &np); 2520 if (!error) { 2521 newvp = NFSTOV(np); 2522 if (newvp->v_type != VDIR) 2523 error = EEXIST; 2524 } 2525 } 2526 if (error) { 2527 if (newvp) 2528 vput(newvp); 2529 if (NFS_ISV4(dvp)) 2530 error = nfscl_maperr(curthread, error, 2531 vap->va_uid, vap->va_gid); 2532 } else { 2533 /* 2534 * If negative lookup caching is enabled, I might as well 2535 * add an entry for this node. Not necessary for correctness, 2536 * but if negative caching is enabled, then the system 2537 * must care about lookup caching hit rate, so... 2538 */ 2539 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 2540 (cnp->cn_flags & MAKEENTRY) && 2541 attrflag != 0 && dattrflag != 0) { 2542 if (dvp != newvp) 2543 cache_enter_time(dvp, newvp, cnp, 2544 &nfsva.na_ctime, &dnfsva.na_ctime); 2545 else 2546 printf("nfs_mkdir: bogus NFS server returned " 2547 "the directory that the directory was " 2548 "created in as the new file object\n"); 2549 } 2550 *ap->a_vpp = newvp; 2551 } 2552 return (error); 2553 } 2554 2555 /* 2556 * nfs remove directory call 2557 */ 2558 static int 2559 nfs_rmdir(struct vop_rmdir_args *ap) 2560 { 2561 struct vnode *vp = ap->a_vp; 2562 struct vnode *dvp = ap->a_dvp; 2563 struct componentname *cnp = ap->a_cnp; 2564 struct nfsnode *dnp; 2565 struct nfsvattr dnfsva; 2566 int error, dattrflag; 2567 2568 if (dvp == vp) 2569 return (EINVAL); 2570 error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2571 cnp->cn_cred, curthread, &dnfsva, &dattrflag); 2572 dnp = VTONFS(dvp); 2573 NFSLOCKNODE(dnp); 2574 dnp->n_flag |= NMODIFIED; 2575 if (dattrflag != 0) { 2576 NFSUNLOCKNODE(dnp); 2577 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 2578 } else { 2579 dnp->n_attrstamp = 0; 2580 NFSUNLOCKNODE(dnp); 2581 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2582 } 2583 2584 cache_purge(dvp); 2585 cache_purge(vp); 2586 if (error && NFS_ISV4(dvp)) 2587 error = nfscl_maperr(curthread, error, (uid_t)0, 2588 (gid_t)0); 2589 /* 2590 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. 2591 */ 2592 if (error == ENOENT) 2593 error = 0; 2594 return (error); 2595 } 2596 2597 /* 2598 * nfs readdir call 2599 */ 2600 static int 2601 nfs_readdir(struct vop_readdir_args *ap) 2602 { 2603 struct vnode *vp = ap->a_vp; 2604 struct nfsnode *np = VTONFS(vp); 2605 struct uio *uio = ap->a_uio; 2606 ssize_t tresid, left; 2607 int error = 0; 2608 struct vattr vattr; 2609 2610 if (ap->a_eofflag != NULL) 2611 *ap->a_eofflag = 0; 2612 if (vp->v_type != VDIR) 2613 return(EPERM); 2614 2615 /* 2616 * First, check for hit on the EOF offset cache 2617 */ 2618 NFSLOCKNODE(np); 2619 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && 2620 (np->n_flag & NMODIFIED) == 0) { 2621 NFSUNLOCKNODE(np); 2622 if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { 2623 NFSLOCKNODE(np); 2624 if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) || 2625 !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 2626 NFSUNLOCKNODE(np); 2627 NFSINCRGLOBAL(nfsstatsv1.direofcache_hits); 2628 if (ap->a_eofflag != NULL) 2629 *ap->a_eofflag = 1; 2630 return (0); 2631 } else 2632 NFSUNLOCKNODE(np); 2633 } 2634 } else 2635 NFSUNLOCKNODE(np); 2636 2637 /* 2638 * NFS always guarantees that directory entries don't straddle 2639 * DIRBLKSIZ boundaries. As such, we need to limit the size 2640 * to an exact multiple of DIRBLKSIZ, to avoid copying a partial 2641 * directory entry. 2642 */ 2643 left = uio->uio_resid % DIRBLKSIZ; 2644 if (left == uio->uio_resid) 2645 return (EINVAL); 2646 uio->uio_resid -= left; 2647 2648 /* 2649 * For readdirplus, if starting to read the directory, 2650 * purge the name cache, since it will be reloaded by 2651 * this directory read. 2652 * This removes potentially stale name cache entries. 2653 */ 2654 if (uio->uio_offset == 0 && 2655 (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_RDIRPLUS) != 0) 2656 cache_purge(vp); 2657 2658 /* 2659 * Call ncl_bioread() to do the real work. 2660 */ 2661 tresid = uio->uio_resid; 2662 error = ncl_bioread(vp, uio, 0, ap->a_cred); 2663 2664 if (!error && uio->uio_resid == tresid) { 2665 NFSINCRGLOBAL(nfsstatsv1.direofcache_misses); 2666 if (ap->a_eofflag != NULL) 2667 *ap->a_eofflag = 1; 2668 } 2669 2670 /* Add the partial DIRBLKSIZ (left) back in. */ 2671 uio->uio_resid += left; 2672 return (error); 2673 } 2674 2675 /* 2676 * Readdir rpc call. 2677 * Called from below the buffer cache by ncl_doio(). 2678 */ 2679 int 2680 ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2681 struct thread *td) 2682 { 2683 struct nfsvattr nfsva; 2684 nfsuint64 *cookiep, cookie; 2685 struct nfsnode *dnp = VTONFS(vp); 2686 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2687 int error = 0, eof, attrflag; 2688 2689 KASSERT(uiop->uio_iovcnt == 1 && 2690 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2691 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2692 ("nfs readdirrpc bad uio")); 2693 2694 /* 2695 * If there is no cookie, assume directory was stale. 2696 */ 2697 ncl_dircookie_lock(dnp); 2698 NFSUNLOCKNODE(dnp); 2699 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2700 if (cookiep) { 2701 cookie = *cookiep; 2702 ncl_dircookie_unlock(dnp); 2703 } else { 2704 ncl_dircookie_unlock(dnp); 2705 return (NFSERR_BAD_COOKIE); 2706 } 2707 2708 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2709 (void)ncl_fsinfo(nmp, vp, cred, td); 2710 2711 error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva, 2712 &attrflag, &eof); 2713 if (attrflag) 2714 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 2715 2716 if (!error) { 2717 /* 2718 * We are now either at the end of the directory or have filled 2719 * the block. 2720 */ 2721 if (eof) { 2722 NFSLOCKNODE(dnp); 2723 dnp->n_direofoffset = uiop->uio_offset; 2724 NFSUNLOCKNODE(dnp); 2725 } else { 2726 if (uiop->uio_resid > 0) 2727 printf("EEK! readdirrpc resid > 0\n"); 2728 ncl_dircookie_lock(dnp); 2729 NFSUNLOCKNODE(dnp); 2730 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2731 *cookiep = cookie; 2732 ncl_dircookie_unlock(dnp); 2733 } 2734 } else if (NFS_ISV4(vp)) { 2735 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2736 } 2737 return (error); 2738 } 2739 2740 /* 2741 * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc(). 2742 */ 2743 int 2744 ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2745 struct thread *td) 2746 { 2747 struct nfsvattr nfsva; 2748 nfsuint64 *cookiep, cookie; 2749 struct nfsnode *dnp = VTONFS(vp); 2750 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2751 int error = 0, attrflag, eof; 2752 2753 KASSERT(uiop->uio_iovcnt == 1 && 2754 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2755 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2756 ("nfs readdirplusrpc bad uio")); 2757 2758 /* 2759 * If there is no cookie, assume directory was stale. 2760 */ 2761 ncl_dircookie_lock(dnp); 2762 NFSUNLOCKNODE(dnp); 2763 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2764 if (cookiep) { 2765 cookie = *cookiep; 2766 ncl_dircookie_unlock(dnp); 2767 } else { 2768 ncl_dircookie_unlock(dnp); 2769 return (NFSERR_BAD_COOKIE); 2770 } 2771 2772 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2773 (void)ncl_fsinfo(nmp, vp, cred, td); 2774 error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva, 2775 &attrflag, &eof); 2776 if (attrflag) 2777 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 2778 2779 if (!error) { 2780 /* 2781 * We are now either at end of the directory or have filled the 2782 * the block. 2783 */ 2784 if (eof) { 2785 NFSLOCKNODE(dnp); 2786 dnp->n_direofoffset = uiop->uio_offset; 2787 NFSUNLOCKNODE(dnp); 2788 } else { 2789 if (uiop->uio_resid > 0) 2790 printf("EEK! readdirplusrpc resid > 0\n"); 2791 ncl_dircookie_lock(dnp); 2792 NFSUNLOCKNODE(dnp); 2793 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2794 *cookiep = cookie; 2795 ncl_dircookie_unlock(dnp); 2796 } 2797 } else if (NFS_ISV4(vp)) { 2798 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2799 } 2800 return (error); 2801 } 2802 2803 /* 2804 * Silly rename. To make the NFS filesystem that is stateless look a little 2805 * more like the "ufs" a remove of an active vnode is translated to a rename 2806 * to a funny looking filename that is removed by nfs_inactive on the 2807 * nfsnode. There is the potential for another process on a different client 2808 * to create the same funny name between the nfs_lookitup() fails and the 2809 * nfs_rename() completes, but... 2810 */ 2811 static int 2812 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 2813 { 2814 struct sillyrename *sp; 2815 struct nfsnode *np; 2816 int error; 2817 short pid; 2818 unsigned int lticks; 2819 2820 cache_purge(dvp); 2821 np = VTONFS(vp); 2822 KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir")); 2823 sp = malloc(sizeof (struct sillyrename), 2824 M_NEWNFSREQ, M_WAITOK); 2825 sp->s_cred = crhold(cnp->cn_cred); 2826 sp->s_dvp = dvp; 2827 vref(dvp); 2828 2829 /* 2830 * Fudge together a funny name. 2831 * Changing the format of the funny name to accommodate more 2832 * sillynames per directory. 2833 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 2834 * CPU ticks since boot. 2835 */ 2836 pid = curthread->td_proc->p_pid; 2837 lticks = (unsigned int)ticks; 2838 for ( ; ; ) { 2839 sp->s_namlen = sprintf(sp->s_name, 2840 ".nfs.%08x.%04x4.4", lticks, 2841 pid); 2842 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2843 curthread, NULL)) 2844 break; 2845 lticks++; 2846 } 2847 error = nfs_renameit(dvp, vp, cnp, sp); 2848 if (error) 2849 goto bad; 2850 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2851 curthread, &np); 2852 np->n_sillyrename = sp; 2853 return (0); 2854 bad: 2855 vrele(sp->s_dvp); 2856 crfree(sp->s_cred); 2857 free(sp, M_NEWNFSREQ); 2858 return (error); 2859 } 2860 2861 /* 2862 * Look up a file name and optionally either update the file handle or 2863 * allocate an nfsnode, depending on the value of npp. 2864 * npp == NULL --> just do the lookup 2865 * *npp == NULL --> allocate a new nfsnode and make sure attributes are 2866 * handled too 2867 * *npp != NULL --> update the file handle in the vnode 2868 */ 2869 static int 2870 nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred, 2871 struct thread *td, struct nfsnode **npp) 2872 { 2873 struct vnode *newvp = NULL, *vp; 2874 struct nfsnode *np, *dnp = VTONFS(dvp); 2875 struct nfsfh *nfhp, *onfhp; 2876 struct nfsvattr nfsva, dnfsva; 2877 struct componentname cn; 2878 int error = 0, attrflag, dattrflag; 2879 u_int hash; 2880 struct timespec ts; 2881 2882 nanouptime(&ts); 2883 error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva, 2884 &nfhp, &attrflag, &dattrflag, 0); 2885 if (dattrflag) 2886 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1); 2887 if (npp && !error) { 2888 if (*npp != NULL) { 2889 np = *npp; 2890 vp = NFSTOV(np); 2891 /* 2892 * For NFSv4, check to see if it is the same name and 2893 * replace the name, if it is different. 2894 */ 2895 if (np->n_v4 != NULL && nfsva.na_type == VREG && 2896 (np->n_v4->n4_namelen != len || 2897 NFSBCMP(name, NFS4NODENAME(np->n_v4), len) || 2898 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || 2899 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2900 dnp->n_fhp->nfh_len))) { 2901 free(np->n_v4, M_NFSV4NODE); 2902 np->n_v4 = malloc( 2903 sizeof (struct nfsv4node) + 2904 dnp->n_fhp->nfh_len + len - 1, 2905 M_NFSV4NODE, M_WAITOK); 2906 np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; 2907 np->n_v4->n4_namelen = len; 2908 NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2909 dnp->n_fhp->nfh_len); 2910 NFSBCOPY(name, NFS4NODENAME(np->n_v4), len); 2911 } 2912 hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, 2913 FNV1_32_INIT); 2914 onfhp = np->n_fhp; 2915 /* 2916 * Rehash node for new file handle. 2917 */ 2918 vfs_hash_rehash(vp, hash); 2919 np->n_fhp = nfhp; 2920 if (onfhp != NULL) 2921 free(onfhp, M_NFSFH); 2922 newvp = NFSTOV(np); 2923 } else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) { 2924 free(nfhp, M_NFSFH); 2925 vref(dvp); 2926 newvp = dvp; 2927 } else { 2928 cn.cn_nameptr = name; 2929 cn.cn_namelen = len; 2930 error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td, 2931 &np, LK_EXCLUSIVE); 2932 if (error) 2933 return (error); 2934 newvp = NFSTOV(np); 2935 /* 2936 * If n_localmodtime >= time before RPC, then 2937 * a file modification operation, such as 2938 * VOP_SETATTR() of size, has occurred while 2939 * the Lookup RPC and acquisition of the vnode 2940 * happened. As such, the attributes might 2941 * be stale, with possibly an incorrect size. 2942 */ 2943 NFSLOCKNODE(np); 2944 if (timespecisset(&np->n_localmodtime) && 2945 timespeccmp(&np->n_localmodtime, &ts, >=)) { 2946 NFSCL_DEBUG(4, "nfs_lookitup: localmod " 2947 "stale attributes\n"); 2948 attrflag = 0; 2949 } 2950 NFSUNLOCKNODE(np); 2951 } 2952 if (!attrflag && *npp == NULL) { 2953 if (newvp == dvp) 2954 vrele(newvp); 2955 else 2956 vput(newvp); 2957 return (ENOENT); 2958 } 2959 if (attrflag) 2960 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1); 2961 } 2962 if (npp && *npp == NULL) { 2963 if (error) { 2964 if (newvp) { 2965 if (newvp == dvp) 2966 vrele(newvp); 2967 else 2968 vput(newvp); 2969 } 2970 } else 2971 *npp = np; 2972 } 2973 if (error && NFS_ISV4(dvp)) 2974 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2975 return (error); 2976 } 2977 2978 /* 2979 * Nfs Version 3 and 4 commit rpc 2980 */ 2981 int 2982 ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, 2983 struct thread *td) 2984 { 2985 struct nfsvattr nfsva; 2986 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2987 struct nfsnode *np; 2988 struct uio uio; 2989 int error, attrflag; 2990 2991 np = VTONFS(vp); 2992 error = EIO; 2993 attrflag = 0; 2994 if (NFSHASPNFS(nmp) && (np->n_flag & NDSCOMMIT) != 0) { 2995 uio.uio_offset = offset; 2996 uio.uio_resid = cnt; 2997 error = nfscl_doiods(vp, &uio, NULL, NULL, 2998 NFSV4OPEN_ACCESSWRITE, 1, cred, td); 2999 if (error != 0) { 3000 NFSLOCKNODE(np); 3001 np->n_flag &= ~NDSCOMMIT; 3002 NFSUNLOCKNODE(np); 3003 } 3004 } 3005 if (error != 0) { 3006 mtx_lock(&nmp->nm_mtx); 3007 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { 3008 mtx_unlock(&nmp->nm_mtx); 3009 return (0); 3010 } 3011 mtx_unlock(&nmp->nm_mtx); 3012 error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva, 3013 &attrflag); 3014 } 3015 if (attrflag != 0) 3016 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 3017 if (error != 0 && NFS_ISV4(vp)) 3018 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 3019 return (error); 3020 } 3021 3022 /* 3023 * Strategy routine. 3024 * For async requests when nfsiod(s) are running, queue the request by 3025 * calling ncl_asyncio(), otherwise just all ncl_doio() to do the 3026 * request. 3027 */ 3028 static int 3029 nfs_strategy(struct vop_strategy_args *ap) 3030 { 3031 struct buf *bp; 3032 struct vnode *vp; 3033 struct ucred *cr; 3034 3035 bp = ap->a_bp; 3036 vp = ap->a_vp; 3037 KASSERT(bp->b_vp == vp, ("missing b_getvp")); 3038 KASSERT(!(bp->b_flags & B_DONE), 3039 ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); 3040 3041 if (vp->v_type == VREG && bp->b_blkno == bp->b_lblkno) 3042 bp->b_blkno = bp->b_lblkno * (vp->v_bufobj.bo_bsize / 3043 DEV_BSIZE); 3044 if (bp->b_iocmd == BIO_READ) 3045 cr = bp->b_rcred; 3046 else 3047 cr = bp->b_wcred; 3048 3049 /* 3050 * If the op is asynchronous and an i/o daemon is waiting 3051 * queue the request, wake it up and wait for completion 3052 * otherwise just do it ourselves. 3053 */ 3054 if ((bp->b_flags & B_ASYNC) == 0 || 3055 ncl_asyncio(VFSTONFS(vp->v_mount), bp, NOCRED, curthread)) 3056 (void) ncl_doio(vp, bp, cr, curthread, 1); 3057 return (0); 3058 } 3059 3060 /* 3061 * fsync vnode op. Just call ncl_flush() with commit == 1. 3062 */ 3063 /* ARGSUSED */ 3064 static int 3065 nfs_fsync(struct vop_fsync_args *ap) 3066 { 3067 3068 if (ap->a_vp->v_type != VREG) { 3069 /* 3070 * For NFS, metadata is changed synchronously on the server, 3071 * so there is nothing to flush. Also, ncl_flush() clears 3072 * the NMODIFIED flag and that shouldn't be done here for 3073 * directories. 3074 */ 3075 return (0); 3076 } 3077 return (ncl_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1, 0)); 3078 } 3079 3080 /* 3081 * Flush all the blocks associated with a vnode. 3082 * Walk through the buffer pool and push any dirty pages 3083 * associated with the vnode. 3084 * If the called_from_renewthread argument is TRUE, it has been called 3085 * from the NFSv4 renew thread and, as such, cannot block indefinitely 3086 * waiting for a buffer write to complete. 3087 */ 3088 int 3089 ncl_flush(struct vnode *vp, int waitfor, struct thread *td, 3090 int commit, int called_from_renewthread) 3091 { 3092 struct nfsnode *np = VTONFS(vp); 3093 struct buf *bp; 3094 int i; 3095 struct buf *nbp; 3096 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 3097 int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; 3098 int passone = 1, trycnt = 0; 3099 u_quad_t off, endoff, toff; 3100 struct ucred* wcred = NULL; 3101 struct buf **bvec = NULL; 3102 struct bufobj *bo; 3103 #ifndef NFS_COMMITBVECSIZ 3104 #define NFS_COMMITBVECSIZ 20 3105 #endif 3106 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; 3107 u_int bvecsize = 0, bveccount; 3108 struct timespec ts; 3109 3110 if (called_from_renewthread != 0) 3111 slptimeo = hz; 3112 if (nmp->nm_flag & NFSMNT_INT) 3113 slpflag = PCATCH; 3114 if (!commit) 3115 passone = 0; 3116 bo = &vp->v_bufobj; 3117 /* 3118 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the 3119 * server, but has not been committed to stable storage on the server 3120 * yet. On the first pass, the byte range is worked out and the commit 3121 * rpc is done. On the second pass, bwrite() is called to do the 3122 * job. 3123 */ 3124 again: 3125 off = (u_quad_t)-1; 3126 endoff = 0; 3127 bvecpos = 0; 3128 if (NFS_ISV34(vp) && commit) { 3129 if (bvec != NULL && bvec != bvec_on_stack) 3130 free(bvec, M_TEMP); 3131 /* 3132 * Count up how many buffers waiting for a commit. 3133 */ 3134 bveccount = 0; 3135 BO_LOCK(bo); 3136 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 3137 if (!BUF_ISLOCKED(bp) && 3138 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 3139 == (B_DELWRI | B_NEEDCOMMIT)) 3140 bveccount++; 3141 } 3142 /* 3143 * Allocate space to remember the list of bufs to commit. It is 3144 * important to use M_NOWAIT here to avoid a race with nfs_write. 3145 * If we can't get memory (for whatever reason), we will end up 3146 * committing the buffers one-by-one in the loop below. 3147 */ 3148 if (bveccount > NFS_COMMITBVECSIZ) { 3149 /* 3150 * Release the vnode interlock to avoid a lock 3151 * order reversal. 3152 */ 3153 BO_UNLOCK(bo); 3154 bvec = (struct buf **) 3155 malloc(bveccount * sizeof(struct buf *), 3156 M_TEMP, M_NOWAIT); 3157 BO_LOCK(bo); 3158 if (bvec == NULL) { 3159 bvec = bvec_on_stack; 3160 bvecsize = NFS_COMMITBVECSIZ; 3161 } else 3162 bvecsize = bveccount; 3163 } else { 3164 bvec = bvec_on_stack; 3165 bvecsize = NFS_COMMITBVECSIZ; 3166 } 3167 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 3168 if (bvecpos >= bvecsize) 3169 break; 3170 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 3171 nbp = TAILQ_NEXT(bp, b_bobufs); 3172 continue; 3173 } 3174 if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != 3175 (B_DELWRI | B_NEEDCOMMIT)) { 3176 BUF_UNLOCK(bp); 3177 nbp = TAILQ_NEXT(bp, b_bobufs); 3178 continue; 3179 } 3180 BO_UNLOCK(bo); 3181 bremfree(bp); 3182 /* 3183 * Work out if all buffers are using the same cred 3184 * so we can deal with them all with one commit. 3185 * 3186 * NOTE: we are not clearing B_DONE here, so we have 3187 * to do it later on in this routine if we intend to 3188 * initiate I/O on the bp. 3189 * 3190 * Note: to avoid loopback deadlocks, we do not 3191 * assign b_runningbufspace. 3192 */ 3193 if (wcred == NULL) 3194 wcred = bp->b_wcred; 3195 else if (wcred != bp->b_wcred) 3196 wcred = NOCRED; 3197 vfs_busy_pages(bp, 0); 3198 3199 BO_LOCK(bo); 3200 /* 3201 * bp is protected by being locked, but nbp is not 3202 * and vfs_busy_pages() may sleep. We have to 3203 * recalculate nbp. 3204 */ 3205 nbp = TAILQ_NEXT(bp, b_bobufs); 3206 3207 /* 3208 * A list of these buffers is kept so that the 3209 * second loop knows which buffers have actually 3210 * been committed. This is necessary, since there 3211 * may be a race between the commit rpc and new 3212 * uncommitted writes on the file. 3213 */ 3214 bvec[bvecpos++] = bp; 3215 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 3216 bp->b_dirtyoff; 3217 if (toff < off) 3218 off = toff; 3219 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); 3220 if (toff > endoff) 3221 endoff = toff; 3222 } 3223 BO_UNLOCK(bo); 3224 } 3225 if (bvecpos > 0) { 3226 /* 3227 * Commit data on the server, as required. 3228 * If all bufs are using the same wcred, then use that with 3229 * one call for all of them, otherwise commit each one 3230 * separately. 3231 */ 3232 if (wcred != NOCRED) 3233 retv = ncl_commit(vp, off, (int)(endoff - off), 3234 wcred, td); 3235 else { 3236 retv = 0; 3237 for (i = 0; i < bvecpos; i++) { 3238 off_t off, size; 3239 bp = bvec[i]; 3240 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 3241 bp->b_dirtyoff; 3242 size = (u_quad_t)(bp->b_dirtyend 3243 - bp->b_dirtyoff); 3244 retv = ncl_commit(vp, off, (int)size, 3245 bp->b_wcred, td); 3246 if (retv) break; 3247 } 3248 } 3249 3250 if (retv == NFSERR_STALEWRITEVERF) 3251 ncl_clearcommit(vp->v_mount); 3252 3253 /* 3254 * Now, either mark the blocks I/O done or mark the 3255 * blocks dirty, depending on whether the commit 3256 * succeeded. 3257 */ 3258 for (i = 0; i < bvecpos; i++) { 3259 bp = bvec[i]; 3260 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 3261 if (!NFSCL_FORCEDISM(vp->v_mount) && retv) { 3262 /* 3263 * Error, leave B_DELWRI intact 3264 */ 3265 vfs_unbusy_pages(bp); 3266 brelse(bp); 3267 } else { 3268 /* 3269 * Success, remove B_DELWRI ( bundirty() ). 3270 * 3271 * b_dirtyoff/b_dirtyend seem to be NFS 3272 * specific. We should probably move that 3273 * into bundirty(). XXX 3274 */ 3275 bufobj_wref(bo); 3276 bp->b_flags |= B_ASYNC; 3277 bundirty(bp); 3278 bp->b_flags &= ~B_DONE; 3279 bp->b_ioflags &= ~BIO_ERROR; 3280 bp->b_dirtyoff = bp->b_dirtyend = 0; 3281 bufdone(bp); 3282 } 3283 } 3284 } 3285 3286 /* 3287 * Start/do any write(s) that are required. 3288 */ 3289 loop: 3290 BO_LOCK(bo); 3291 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 3292 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 3293 if (waitfor != MNT_WAIT || passone) 3294 continue; 3295 3296 error = BUF_TIMELOCK(bp, 3297 LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, 3298 BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo); 3299 if (error == 0) { 3300 BUF_UNLOCK(bp); 3301 goto loop; 3302 } 3303 if (error == ENOLCK) { 3304 error = 0; 3305 goto loop; 3306 } 3307 if (called_from_renewthread != 0) { 3308 /* 3309 * Return EIO so the flush will be retried 3310 * later. 3311 */ 3312 error = EIO; 3313 goto done; 3314 } 3315 if (newnfs_sigintr(nmp, td)) { 3316 error = EINTR; 3317 goto done; 3318 } 3319 if (slpflag == PCATCH) { 3320 slpflag = 0; 3321 slptimeo = 2 * hz; 3322 } 3323 goto loop; 3324 } 3325 if ((bp->b_flags & B_DELWRI) == 0) 3326 panic("nfs_fsync: not dirty"); 3327 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { 3328 BUF_UNLOCK(bp); 3329 continue; 3330 } 3331 BO_UNLOCK(bo); 3332 bremfree(bp); 3333 bp->b_flags |= B_ASYNC; 3334 bwrite(bp); 3335 if (newnfs_sigintr(nmp, td)) { 3336 error = EINTR; 3337 goto done; 3338 } 3339 goto loop; 3340 } 3341 if (passone) { 3342 passone = 0; 3343 BO_UNLOCK(bo); 3344 goto again; 3345 } 3346 if (waitfor == MNT_WAIT) { 3347 while (bo->bo_numoutput) { 3348 error = bufobj_wwait(bo, slpflag, slptimeo); 3349 if (error) { 3350 BO_UNLOCK(bo); 3351 if (called_from_renewthread != 0) { 3352 /* 3353 * Return EIO so that the flush will be 3354 * retried later. 3355 */ 3356 error = EIO; 3357 goto done; 3358 } 3359 error = newnfs_sigintr(nmp, td); 3360 if (error) 3361 goto done; 3362 if (slpflag == PCATCH) { 3363 slpflag = 0; 3364 slptimeo = 2 * hz; 3365 } 3366 BO_LOCK(bo); 3367 } 3368 } 3369 if (bo->bo_dirty.bv_cnt != 0 && commit) { 3370 BO_UNLOCK(bo); 3371 goto loop; 3372 } 3373 /* 3374 * Wait for all the async IO requests to drain 3375 */ 3376 BO_UNLOCK(bo); 3377 } else 3378 BO_UNLOCK(bo); 3379 if (NFSHASPNFS(nmp)) { 3380 nfscl_layoutcommit(vp, td); 3381 /* 3382 * Invalidate the attribute cache, since writes to a DS 3383 * won't update the size attribute. 3384 */ 3385 NFSLOCKNODE(np); 3386 np->n_attrstamp = 0; 3387 } else 3388 NFSLOCKNODE(np); 3389 if (np->n_flag & NWRITEERR) { 3390 error = np->n_error; 3391 np->n_flag &= ~NWRITEERR; 3392 } 3393 if (commit && bo->bo_dirty.bv_cnt == 0 && 3394 bo->bo_numoutput == 0) 3395 np->n_flag &= ~NMODIFIED; 3396 NFSUNLOCKNODE(np); 3397 done: 3398 if (bvec != NULL && bvec != bvec_on_stack) 3399 free(bvec, M_TEMP); 3400 if (error == 0 && commit != 0 && waitfor == MNT_WAIT && 3401 (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0)) { 3402 if (trycnt++ < 5) { 3403 /* try, try again... */ 3404 passone = 1; 3405 wcred = NULL; 3406 bvec = NULL; 3407 bvecsize = 0; 3408 goto again; 3409 } 3410 vn_printf(vp, "ncl_flush failed"); 3411 error = called_from_renewthread != 0 ? EIO : EBUSY; 3412 } 3413 if (error == 0) { 3414 nanouptime(&ts); 3415 NFSLOCKNODE(np); 3416 np->n_localmodtime = ts; 3417 NFSUNLOCKNODE(np); 3418 } 3419 return (error); 3420 } 3421 3422 /* 3423 * NFS advisory byte-level locks. 3424 */ 3425 static int 3426 nfs_advlock(struct vop_advlock_args *ap) 3427 { 3428 struct vnode *vp = ap->a_vp; 3429 struct ucred *cred; 3430 struct nfsnode *np = VTONFS(ap->a_vp); 3431 struct proc *p = (struct proc *)ap->a_id; 3432 struct thread *td = curthread; /* XXX */ 3433 struct vattr va; 3434 int ret, error; 3435 u_quad_t size; 3436 struct nfsmount *nmp; 3437 3438 error = NFSVOPLOCK(vp, LK_EXCLUSIVE); 3439 if (error != 0) 3440 return (EBADF); 3441 nmp = VFSTONFS(vp->v_mount); 3442 if (!NFS_ISV4(vp) || (nmp->nm_flag & NFSMNT_NOLOCKD) != 0) { 3443 if ((nmp->nm_flag & NFSMNT_NOLOCKD) != 0) { 3444 size = np->n_size; 3445 NFSVOPUNLOCK(vp); 3446 error = lf_advlock(ap, &(vp->v_lockf), size); 3447 } else { 3448 if (nfs_advlock_p != NULL) 3449 error = nfs_advlock_p(ap); 3450 else { 3451 NFSVOPUNLOCK(vp); 3452 error = ENOLCK; 3453 } 3454 } 3455 if (error == 0 && ap->a_op == F_SETLK) { 3456 error = NFSVOPLOCK(vp, LK_SHARED); 3457 if (error == 0) { 3458 /* Mark that a file lock has been acquired. */ 3459 NFSLOCKNODE(np); 3460 np->n_flag |= NHASBEENLOCKED; 3461 NFSUNLOCKNODE(np); 3462 NFSVOPUNLOCK(vp); 3463 } 3464 } 3465 return (error); 3466 } else if ((ap->a_flags & (F_POSIX | F_FLOCK)) != 0) { 3467 if (vp->v_type != VREG) { 3468 error = EINVAL; 3469 goto out; 3470 } 3471 if ((ap->a_flags & F_POSIX) != 0) 3472 cred = p->p_ucred; 3473 else 3474 cred = td->td_ucred; 3475 3476 /* 3477 * If this is unlocking a write locked region, flush and 3478 * commit them before unlocking. This is required by 3479 * RFC3530 Sec. 9.3.2. 3480 */ 3481 if (ap->a_op == F_UNLCK && 3482 nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id, 3483 ap->a_flags)) 3484 (void) ncl_flush(vp, MNT_WAIT, td, 1, 0); 3485 3486 /* 3487 * Mark NFS node as might have acquired a lock. 3488 * This is separate from NHASBEENLOCKED, because it must 3489 * be done before the nfsrpc_advlock() call, which might 3490 * add a nfscllock structure to the client state. 3491 * It is used to check for the case where a nfscllock 3492 * state structure cannot exist for the file. 3493 * Only done for "oneopenown" NFSv4.1/4.2 mounts. 3494 */ 3495 if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp)) { 3496 NFSLOCKNODE(np); 3497 np->n_flag |= NMIGHTBELOCKED; 3498 NFSUNLOCKNODE(np); 3499 } 3500 3501 /* 3502 * Loop around doing the lock op, while a blocking lock 3503 * must wait for the lock op to succeed. 3504 */ 3505 do { 3506 ret = nfsrpc_advlock(vp, np->n_size, ap->a_op, 3507 ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags); 3508 if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 3509 ap->a_op == F_SETLK) { 3510 NFSVOPUNLOCK(vp); 3511 error = nfs_catnap(PZERO | PCATCH, ret, 3512 "ncladvl"); 3513 if (error) 3514 return (EINTR); 3515 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 3516 if (VN_IS_DOOMED(vp)) { 3517 error = EBADF; 3518 goto out; 3519 } 3520 } 3521 } while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 3522 ap->a_op == F_SETLK); 3523 if (ret == NFSERR_DENIED) { 3524 error = EAGAIN; 3525 goto out; 3526 } else if (ret == EINVAL || ret == EBADF || ret == EINTR) { 3527 error = ret; 3528 goto out; 3529 } else if (ret != 0) { 3530 error = EACCES; 3531 goto out; 3532 } 3533 3534 /* 3535 * Now, if we just got a lock, invalidate data in the buffer 3536 * cache, as required, so that the coherency conforms with 3537 * RFC3530 Sec. 9.3.2. 3538 */ 3539 if (ap->a_op == F_SETLK) { 3540 if ((np->n_flag & NMODIFIED) == 0) { 3541 np->n_attrstamp = 0; 3542 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 3543 ret = VOP_GETATTR(vp, &va, cred); 3544 } 3545 if ((np->n_flag & NMODIFIED) || ret || 3546 np->n_change != va.va_filerev) { 3547 (void) ncl_vinvalbuf(vp, V_SAVE, td, 1); 3548 np->n_attrstamp = 0; 3549 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 3550 ret = VOP_GETATTR(vp, &va, cred); 3551 if (!ret) { 3552 np->n_mtime = va.va_mtime; 3553 np->n_change = va.va_filerev; 3554 } 3555 } 3556 /* Mark that a file lock has been acquired. */ 3557 NFSLOCKNODE(np); 3558 np->n_flag |= NHASBEENLOCKED; 3559 NFSUNLOCKNODE(np); 3560 } 3561 } else 3562 error = EOPNOTSUPP; 3563 out: 3564 NFSVOPUNLOCK(vp); 3565 return (error); 3566 } 3567 3568 /* 3569 * NFS advisory byte-level locks. 3570 */ 3571 static int 3572 nfs_advlockasync(struct vop_advlockasync_args *ap) 3573 { 3574 struct vnode *vp = ap->a_vp; 3575 u_quad_t size; 3576 int error; 3577 3578 error = NFSVOPLOCK(vp, LK_SHARED); 3579 if (error) 3580 return (error); 3581 if (NFS_ISV4(vp)) { 3582 NFSVOPUNLOCK(vp); 3583 return (EOPNOTSUPP); 3584 } 3585 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3586 size = VTONFS(vp)->n_size; 3587 NFSVOPUNLOCK(vp); 3588 error = lf_advlockasync(ap, &(vp->v_lockf), size); 3589 } else { 3590 NFSVOPUNLOCK(vp); 3591 error = EOPNOTSUPP; 3592 } 3593 return (error); 3594 } 3595 3596 /* 3597 * Print out the contents of an nfsnode. 3598 */ 3599 static int 3600 nfs_print(struct vop_print_args *ap) 3601 { 3602 struct vnode *vp = ap->a_vp; 3603 struct nfsnode *np = VTONFS(vp); 3604 3605 printf("\tfileid %jd fsid 0x%jx", (uintmax_t)np->n_vattr.na_fileid, 3606 (uintmax_t)np->n_vattr.na_fsid); 3607 if (vp->v_type == VFIFO) 3608 fifo_printinfo(vp); 3609 printf("\n"); 3610 return (0); 3611 } 3612 3613 /* 3614 * nfs special file access vnode op. 3615 * Essentially just get vattr and then imitate iaccess() since the device is 3616 * local to the client. 3617 */ 3618 static int 3619 nfsspec_access(struct vop_access_args *ap) 3620 { 3621 struct vattr *vap; 3622 struct ucred *cred = ap->a_cred; 3623 struct vnode *vp = ap->a_vp; 3624 accmode_t accmode = ap->a_accmode; 3625 struct vattr vattr; 3626 int error; 3627 3628 /* 3629 * Disallow write attempts on filesystems mounted read-only; 3630 * unless the file is a socket, fifo, or a block or character 3631 * device resident on the filesystem. 3632 */ 3633 if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 3634 switch (vp->v_type) { 3635 case VREG: 3636 case VDIR: 3637 case VLNK: 3638 return (EROFS); 3639 default: 3640 break; 3641 } 3642 } 3643 vap = &vattr; 3644 error = VOP_GETATTR(vp, vap, cred); 3645 if (error) 3646 goto out; 3647 error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, 3648 accmode, cred); 3649 out: 3650 return error; 3651 } 3652 3653 /* 3654 * Read wrapper for fifos. 3655 */ 3656 static int 3657 nfsfifo_read(struct vop_read_args *ap) 3658 { 3659 struct nfsnode *np = VTONFS(ap->a_vp); 3660 int error; 3661 3662 /* 3663 * Set access flag. 3664 */ 3665 NFSLOCKNODE(np); 3666 np->n_flag |= NACC; 3667 vfs_timestamp(&np->n_atim); 3668 NFSUNLOCKNODE(np); 3669 error = fifo_specops.vop_read(ap); 3670 return error; 3671 } 3672 3673 /* 3674 * Write wrapper for fifos. 3675 */ 3676 static int 3677 nfsfifo_write(struct vop_write_args *ap) 3678 { 3679 struct nfsnode *np = VTONFS(ap->a_vp); 3680 3681 /* 3682 * Set update flag. 3683 */ 3684 NFSLOCKNODE(np); 3685 np->n_flag |= NUPD; 3686 vfs_timestamp(&np->n_mtim); 3687 NFSUNLOCKNODE(np); 3688 return(fifo_specops.vop_write(ap)); 3689 } 3690 3691 /* 3692 * Close wrapper for fifos. 3693 * 3694 * Update the times on the nfsnode then do fifo close. 3695 */ 3696 static int 3697 nfsfifo_close(struct vop_close_args *ap) 3698 { 3699 struct vnode *vp = ap->a_vp; 3700 struct nfsnode *np = VTONFS(vp); 3701 struct vattr vattr; 3702 struct timespec ts; 3703 3704 NFSLOCKNODE(np); 3705 if (np->n_flag & (NACC | NUPD)) { 3706 vfs_timestamp(&ts); 3707 if (np->n_flag & NACC) 3708 np->n_atim = ts; 3709 if (np->n_flag & NUPD) 3710 np->n_mtim = ts; 3711 np->n_flag |= NCHG; 3712 if (vrefcnt(vp) == 1 && 3713 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 3714 VATTR_NULL(&vattr); 3715 if (np->n_flag & NACC) 3716 vattr.va_atime = np->n_atim; 3717 if (np->n_flag & NUPD) 3718 vattr.va_mtime = np->n_mtim; 3719 NFSUNLOCKNODE(np); 3720 (void)VOP_SETATTR(vp, &vattr, ap->a_cred); 3721 goto out; 3722 } 3723 } 3724 NFSUNLOCKNODE(np); 3725 out: 3726 return (fifo_specops.vop_close(ap)); 3727 } 3728 3729 static int 3730 nfs_getacl(struct vop_getacl_args *ap) 3731 { 3732 int error; 3733 3734 if (ap->a_type != ACL_TYPE_NFS4 && ap->a_type != ACL_TYPE_ACCESS && 3735 ap->a_type != ACL_TYPE_DEFAULT) 3736 return (EOPNOTSUPP); 3737 if (ap->a_type == ACL_TYPE_DEFAULT && ap->a_vp->v_type != VDIR) 3738 return (EINVAL); 3739 error = nfsrpc_getacl(ap->a_vp, ap->a_type, ap->a_cred, ap->a_td, 3740 ap->a_aclp); 3741 if (error == 0 && ap->a_aclp->acl_cnt == 0 && 3742 ap->a_type != ACL_TYPE_DEFAULT) 3743 return (EOPNOTSUPP); 3744 if (error > NFSERR_STALE) { 3745 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3746 error = EPERM; 3747 } 3748 return (error); 3749 } 3750 3751 static int 3752 nfs_setacl(struct vop_setacl_args *ap) 3753 { 3754 int error; 3755 3756 if (ap->a_type != ACL_TYPE_NFS4 && ap->a_type != ACL_TYPE_ACCESS && 3757 ap->a_type != ACL_TYPE_DEFAULT) 3758 return (EOPNOTSUPP); 3759 if (ap->a_aclp == NULL) { 3760 if (ap->a_type != ACL_TYPE_DEFAULT) 3761 return (EINVAL); 3762 if (ap->a_vp->v_type != VDIR) 3763 return (ENOTDIR); 3764 } 3765 error = nfsrpc_setacl(ap->a_vp, ap->a_type, ap->a_cred, ap->a_td, 3766 ap->a_aclp); 3767 if (error > NFSERR_STALE) { 3768 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3769 error = EPERM; 3770 } 3771 return (error); 3772 } 3773 3774 /* 3775 * VOP_ADVISE for NFS. 3776 * Just return 0 for any errors, since it is just a hint. 3777 */ 3778 static int 3779 nfs_advise(struct vop_advise_args *ap) 3780 { 3781 struct thread *td = curthread; 3782 struct nfsmount *nmp; 3783 uint64_t len; 3784 int error; 3785 3786 /* 3787 * First do vop_stdadvise() to handle the buffer cache. 3788 */ 3789 error = vop_stdadvise(ap); 3790 if (error != 0) 3791 return (error); 3792 if (ap->a_start < 0 || ap->a_end < 0) 3793 return (0); 3794 if (ap->a_end == OFF_MAX) 3795 len = 0; 3796 else if (ap->a_end < ap->a_start) 3797 return (0); 3798 else 3799 len = ap->a_end - ap->a_start + 1; 3800 nmp = VFSTONFS(ap->a_vp->v_mount); 3801 mtx_lock(&nmp->nm_mtx); 3802 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 3803 (NFSHASPNFS(nmp) && (nmp->nm_privflag & NFSMNTP_IOADVISETHRUMDS) == 3804 0) || (nmp->nm_privflag & NFSMNTP_NOADVISE) != 0) { 3805 mtx_unlock(&nmp->nm_mtx); 3806 return (0); 3807 } 3808 mtx_unlock(&nmp->nm_mtx); 3809 error = nfsrpc_advise(ap->a_vp, ap->a_start, len, ap->a_advice, 3810 td->td_ucred, td); 3811 if (error == NFSERR_NOTSUPP) { 3812 mtx_lock(&nmp->nm_mtx); 3813 nmp->nm_privflag |= NFSMNTP_NOADVISE; 3814 mtx_unlock(&nmp->nm_mtx); 3815 } 3816 return (0); 3817 } 3818 3819 /* 3820 * nfs allocate call 3821 */ 3822 static int 3823 nfs_allocate(struct vop_allocate_args *ap) 3824 { 3825 struct vnode *vp = ap->a_vp; 3826 struct thread *td = curthread; 3827 struct nfsvattr nfsva; 3828 struct nfsmount *nmp; 3829 struct nfsnode *np; 3830 off_t alen; 3831 int attrflag, error, ret; 3832 struct timespec ts; 3833 struct uio io; 3834 3835 attrflag = 0; 3836 nmp = VFSTONFS(vp->v_mount); 3837 np = VTONFS(vp); 3838 mtx_lock(&nmp->nm_mtx); 3839 if (NFSHASNFSV4(nmp) && nmp->nm_minorvers >= NFSV42_MINORVERSION && 3840 (nmp->nm_privflag & NFSMNTP_NOALLOCATE) == 0) { 3841 mtx_unlock(&nmp->nm_mtx); 3842 alen = *ap->a_len; 3843 if ((uint64_t)alen > nfs_maxalloclen) 3844 alen = nfs_maxalloclen; 3845 3846 /* Check the file size limit. */ 3847 io.uio_offset = *ap->a_offset; 3848 io.uio_resid = alen; 3849 error = vn_rlimit_fsize(vp, &io, td); 3850 3851 /* 3852 * Flush first to ensure that the allocate adds to the 3853 * file's allocation on the server. 3854 */ 3855 if (error == 0) { 3856 vnode_pager_clean_sync(vp); 3857 error = ncl_flush(vp, MNT_WAIT, td, 1, 0); 3858 } 3859 if (error == 0) 3860 error = nfsrpc_allocate(vp, *ap->a_offset, alen, 3861 &nfsva, &attrflag, ap->a_cred, td); 3862 if (error == 0) { 3863 *ap->a_offset += alen; 3864 *ap->a_len -= alen; 3865 nanouptime(&ts); 3866 NFSLOCKNODE(np); 3867 np->n_localmodtime = ts; 3868 NFSUNLOCKNODE(np); 3869 } else if (error == NFSERR_NOTSUPP) { 3870 mtx_lock(&nmp->nm_mtx); 3871 nmp->nm_privflag |= NFSMNTP_NOALLOCATE; 3872 mtx_unlock(&nmp->nm_mtx); 3873 error = EOPNOTSUPP; 3874 } 3875 } else { 3876 /* 3877 * Pre-v4.2 NFS server that doesn't support it, or a newer 3878 * NFS server that has indicated that it doesn't support it. 3879 */ 3880 mtx_unlock(&nmp->nm_mtx); 3881 error = EOPNOTSUPP; 3882 } 3883 if (attrflag != 0) { 3884 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 3885 if (error == 0 && ret != 0) 3886 error = ret; 3887 } 3888 if (error != 0) 3889 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 3890 return (error); 3891 } 3892 3893 /* 3894 * nfs deallocate call 3895 */ 3896 static int 3897 nfs_deallocate(struct vop_deallocate_args *ap) 3898 { 3899 struct vnode *vp = ap->a_vp; 3900 struct thread *td = curthread; 3901 struct nfsvattr nfsva; 3902 struct nfsmount *nmp; 3903 struct nfsnode *np; 3904 off_t tlen, mlen; 3905 int attrflag, error, ret; 3906 bool clipped; 3907 struct timespec ts; 3908 3909 error = 0; 3910 attrflag = 0; 3911 nmp = VFSTONFS(vp->v_mount); 3912 np = VTONFS(vp); 3913 mtx_lock(&nmp->nm_mtx); 3914 if (NFSHASNFSV4(nmp) && nmp->nm_minorvers >= NFSV42_MINORVERSION && 3915 (nmp->nm_privflag & NFSMNTP_NODEALLOCATE) == 0) { 3916 mtx_unlock(&nmp->nm_mtx); 3917 tlen = omin(OFF_MAX - *ap->a_offset, *ap->a_len); 3918 NFSCL_DEBUG(4, "dealloc: off=%jd len=%jd maxfilesize=%ju\n", 3919 (intmax_t)*ap->a_offset, (intmax_t)tlen, 3920 (uintmax_t)nmp->nm_maxfilesize); 3921 if ((uint64_t)*ap->a_offset >= nmp->nm_maxfilesize) { 3922 /* Avoid EFBIG error return from the NFSv4.2 server. */ 3923 *ap->a_len = 0; 3924 return (0); 3925 } 3926 clipped = false; 3927 if ((uint64_t)*ap->a_offset + tlen > nmp->nm_maxfilesize) 3928 tlen = nmp->nm_maxfilesize - *ap->a_offset; 3929 if ((uint64_t)*ap->a_offset < np->n_size) { 3930 /* Limit the len to nfs_maxalloclen before EOF. */ 3931 mlen = omin((off_t)np->n_size - *ap->a_offset, tlen); 3932 if ((uint64_t)mlen > nfs_maxalloclen) { 3933 NFSCL_DEBUG(4, "dealloc: tlen maxalloclen\n"); 3934 tlen = nfs_maxalloclen; 3935 clipped = true; 3936 } 3937 } 3938 if (error == 0) 3939 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 3940 if (error == 0) { 3941 vnode_pager_purge_range(vp, *ap->a_offset, 3942 *ap->a_offset + tlen); 3943 error = nfsrpc_deallocate(vp, *ap->a_offset, tlen, 3944 &nfsva, &attrflag, ap->a_cred, td); 3945 NFSCL_DEBUG(4, "dealloc: rpc=%d\n", error); 3946 } 3947 if (error == 0) { 3948 NFSCL_DEBUG(4, "dealloc: attrflag=%d na_size=%ju\n", 3949 attrflag, (uintmax_t)nfsva.na_size); 3950 nanouptime(&ts); 3951 NFSLOCKNODE(np); 3952 np->n_localmodtime = ts; 3953 NFSUNLOCKNODE(np); 3954 if (attrflag != 0) { 3955 if ((uint64_t)*ap->a_offset < nfsva.na_size) 3956 *ap->a_offset += omin((off_t) 3957 nfsva.na_size - *ap->a_offset, 3958 tlen); 3959 } 3960 if (clipped && tlen < *ap->a_len) 3961 *ap->a_len -= tlen; 3962 else 3963 *ap->a_len = 0; 3964 } else if (error == NFSERR_NOTSUPP) { 3965 mtx_lock(&nmp->nm_mtx); 3966 nmp->nm_privflag |= NFSMNTP_NODEALLOCATE; 3967 mtx_unlock(&nmp->nm_mtx); 3968 } 3969 } else { 3970 mtx_unlock(&nmp->nm_mtx); 3971 error = EIO; 3972 } 3973 /* 3974 * If the NFS server cannot perform the Deallocate operation, just call 3975 * vop_stddeallocate() to perform it. 3976 */ 3977 if (error != 0 && error != NFSERR_FBIG && error != NFSERR_INVAL) { 3978 error = vop_stddeallocate(ap); 3979 NFSCL_DEBUG(4, "dealloc: stddeallocate=%d\n", error); 3980 } 3981 if (attrflag != 0) { 3982 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 3983 if (error == 0 && ret != 0) 3984 error = ret; 3985 } 3986 if (error != 0) 3987 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 3988 return (error); 3989 } 3990 3991 /* 3992 * nfs copy_file_range call 3993 */ 3994 static int 3995 nfs_copy_file_range(struct vop_copy_file_range_args *ap) 3996 { 3997 struct vnode *invp = ap->a_invp; 3998 struct vnode *outvp = ap->a_outvp; 3999 struct mount *mp; 4000 vm_object_t invp_obj; 4001 struct nfsvattr innfsva, outnfsva; 4002 struct vattr va, *vap; 4003 struct uio io; 4004 struct nfsmount *nmp; 4005 struct nfsnode *np; 4006 size_t len, len2; 4007 ssize_t r; 4008 int error, inattrflag, outattrflag, ret, ret2, invp_lock; 4009 off_t inoff, outoff; 4010 bool consecutive, must_commit, onevp, toeof, tryclone, tryoutcred; 4011 bool mustclone; 4012 4013 /* 4014 * NFSv4.2 Copy is not permitted for infile == outfile. 4015 * The NFSv4.2 Clone operation does work on non-overlapping 4016 * byte ranges in the same file, but only if offsets 4017 * (and len if not to EOF) are aligned properly. 4018 * TODO: copy_file_range() between multiple NFS mountpoints 4019 * --> This is not possible now, since each mount appears to 4020 * the NFSv4.n server as a separate client. 4021 */ 4022 if ((invp == outvp && (ap->a_flags & COPY_FILE_RANGE_CLONE) == 0) || 4023 (invp != outvp && invp->v_mount != outvp->v_mount)) { 4024 generic_copy: 4025 return (ENOSYS); 4026 } 4027 if (invp == outvp) { 4028 onevp = true; 4029 invp_lock = LK_EXCLUSIVE; 4030 } else { 4031 onevp = false; 4032 invp_lock = LK_SHARED; 4033 } 4034 mustclone = false; 4035 if (onevp || (ap->a_flags & COPY_FILE_RANGE_CLONE) != 0) 4036 mustclone = true; 4037 relock: 4038 inoff = *ap->a_inoffp; 4039 outoff = *ap->a_outoffp; 4040 4041 /* Lock vnode(s), avoiding risk of deadlock. */ 4042 do { 4043 mp = NULL; 4044 error = vn_start_write(outvp, &mp, V_WAIT); 4045 if (error == 0) { 4046 error = vn_lock(outvp, LK_EXCLUSIVE); 4047 if (error == 0) { 4048 if (onevp) 4049 break; 4050 error = vn_lock(invp, invp_lock | LK_NOWAIT); 4051 if (error == 0) 4052 break; 4053 VOP_UNLOCK(outvp); 4054 if (mp != NULL) 4055 vn_finished_write(mp); 4056 mp = NULL; 4057 error = vn_lock(invp, invp_lock); 4058 if (error == 0) 4059 VOP_UNLOCK(invp); 4060 } 4061 } 4062 if (mp != NULL) 4063 vn_finished_write(mp); 4064 } while (error == 0); 4065 if (error != 0) 4066 return (error); 4067 4068 /* 4069 * More reasons to avoid nfs copy/clone: not NFSv4.2, explicitly 4070 * disabled or requires cloning and unable to clone. 4071 * Only clone if the clone_blksize attribute is supported 4072 * and the clone_blksize is greater than 0. 4073 * Alignment of offsets and length will be checked later. 4074 */ 4075 nmp = VFSTONFS(invp->v_mount); 4076 np = VTONFS(invp); 4077 mtx_lock(&nmp->nm_mtx); 4078 if ((nmp->nm_privflag & NFSMNTP_NOCOPY) != 0) 4079 mustclone = true; 4080 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4081 (mustclone && (!NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, 4082 NFSATTRBIT_CLONEBLKSIZE) || nmp->nm_cloneblksize == 0))) { 4083 mtx_unlock(&nmp->nm_mtx); 4084 VOP_UNLOCK(invp); 4085 if (!onevp) 4086 VOP_UNLOCK(outvp); /* For onevp, same as invp. */ 4087 if (mp != NULL) 4088 vn_finished_write(mp); 4089 goto generic_copy; 4090 } 4091 mtx_unlock(&nmp->nm_mtx); 4092 4093 /* 4094 * Do the vn_rlimit_fsize() check. Should this be above the VOP layer? 4095 */ 4096 io.uio_offset = *ap->a_outoffp; 4097 io.uio_resid = *ap->a_lenp; 4098 error = vn_rlimit_fsizex(outvp, &io, 0, &r, ap->a_fsizetd); 4099 *ap->a_lenp = io.uio_resid; 4100 /* 4101 * No need to call vn_rlimit_fsizex_res before return, since the uio is 4102 * local. 4103 */ 4104 4105 /* 4106 * Flush the input file so that the data is up to date before 4107 * the copy. Flush writes for the output file so that they 4108 * do not overwrite the data copied to the output file by the Copy. 4109 * Set the commit argument for both flushes so that the data is on 4110 * stable storage before the Copy RPC. This is done in case the 4111 * server reboots during the Copy and needs to be redone. 4112 */ 4113 if (error == 0) { 4114 invp_obj = invp->v_object; 4115 if (invp_obj != NULL && vm_object_mightbedirty(invp_obj)) { 4116 if (invp_lock != LK_EXCLUSIVE) { 4117 KASSERT(!onevp, ("nfs_copy_file_range: " 4118 "invp_lock LK_SHARED for onevp")); 4119 invp_lock = LK_EXCLUSIVE; 4120 VOP_UNLOCK(invp); 4121 VOP_UNLOCK(outvp); 4122 if (mp != NULL) 4123 vn_finished_write(mp); 4124 goto relock; 4125 } 4126 vnode_pager_clean_sync(invp); 4127 } 4128 error = ncl_flush(invp, MNT_WAIT, curthread, 1, 0); 4129 } 4130 if (error == 0) 4131 error = ncl_vinvalbuf(outvp, V_SAVE, curthread, 0); 4132 4133 /* Do the actual NFSv4.2 RPC. */ 4134 ret = ret2 = 0; 4135 len = *ap->a_lenp; 4136 mtx_lock(&nmp->nm_mtx); 4137 if ((nmp->nm_privflag & NFSMNTP_NOCONSECUTIVE) == 0) 4138 consecutive = true; 4139 else 4140 consecutive = false; 4141 mtx_unlock(&nmp->nm_mtx); 4142 tryoutcred = true; 4143 must_commit = false; 4144 toeof = false; 4145 4146 if (error == 0) { 4147 vap = &VTONFS(invp)->n_vattr.na_vattr; 4148 error = VOP_GETATTR(invp, vap, ap->a_incred); 4149 if (error == 0) { 4150 /* 4151 * Clip "len" at va_size so that RFC compliant servers 4152 * will not reply NFSERR_INVAL. 4153 * Setting "len == 0" for the RPC would be preferred, 4154 * but some Linux servers do not support that. 4155 * If the len is being set to 0, do a Setattr RPC to 4156 * set the server's atime. This behaviour was the 4157 * preferred one for the FreeBSD "collective". 4158 */ 4159 if (inoff >= vap->va_size) { 4160 *ap->a_lenp = len = 0; 4161 if ((nmp->nm_mountp->mnt_flag & MNT_NOATIME) == 4162 0) { 4163 VATTR_NULL(&va); 4164 va.va_atime.tv_sec = 0; 4165 va.va_atime.tv_nsec = 0; 4166 va.va_vaflags = VA_UTIMES_NULL; 4167 inattrflag = 0; 4168 error = nfsrpc_setattr(invp, &va, NULL, 4169 0, ap->a_incred, curthread, 4170 &innfsva, &inattrflag); 4171 if (inattrflag != 0) 4172 ret = nfscl_loadattrcache(&invp, 4173 &innfsva, NULL, 0, 1); 4174 if (error == 0 && ret != 0) 4175 error = ret; 4176 } 4177 } else if (inoff + len >= vap->va_size) { 4178 toeof = true; 4179 *ap->a_lenp = len = vap->va_size - inoff; 4180 } 4181 } else 4182 error = 0; 4183 } 4184 4185 /* 4186 * For cloning, the offsets must be clone blksize aligned and 4187 * the len must be blksize aligned unless it goes to EOF on 4188 * the input file. 4189 */ 4190 tryclone = false; 4191 if (len > 0) { 4192 if (error == 0 && NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, 4193 NFSATTRBIT_CLONEBLKSIZE) && nmp->nm_cloneblksize != 0 && 4194 (inoff % nmp->nm_cloneblksize) == 0 && 4195 (outoff % nmp->nm_cloneblksize) == 0 && 4196 (toeof || (len % nmp->nm_cloneblksize) == 0)) 4197 tryclone = true; 4198 else if (mustclone) 4199 error = ENOSYS; 4200 } 4201 4202 /* 4203 * len will be set to 0 upon a successful Copy RPC. 4204 * As such, this only loops when the Copy/Clone RPC needs to be retried. 4205 */ 4206 while (len > 0 && error == 0) { 4207 inattrflag = outattrflag = 0; 4208 len2 = len; 4209 if (tryclone) { 4210 if (tryoutcred) 4211 error = nfsrpc_clone(invp, ap->a_inoffp, outvp, 4212 ap->a_outoffp, &len2, toeof, &inattrflag, 4213 &innfsva, &outattrflag, &outnfsva, 4214 ap->a_outcred); 4215 else 4216 error = nfsrpc_clone(invp, ap->a_inoffp, outvp, 4217 ap->a_outoffp, &len2, toeof, &inattrflag, 4218 &innfsva, &outattrflag, &outnfsva, 4219 ap->a_incred); 4220 } else { 4221 if (tryoutcred) 4222 error = nfsrpc_copy_file_range(invp, 4223 ap->a_inoffp, outvp, ap->a_outoffp, &len2, 4224 ap->a_flags, &inattrflag, &innfsva, 4225 &outattrflag, &outnfsva, 4226 ap->a_outcred, consecutive, &must_commit); 4227 else 4228 error = nfsrpc_copy_file_range(invp, 4229 ap->a_inoffp, outvp, ap->a_outoffp, &len2, 4230 ap->a_flags, &inattrflag, &innfsva, 4231 &outattrflag, &outnfsva, 4232 ap->a_incred, consecutive, &must_commit); 4233 } 4234 if (inattrflag != 0) 4235 ret = nfscl_loadattrcache(&invp, &innfsva, NULL, 0, 1); 4236 if (outattrflag != 0) 4237 ret2 = nfscl_loadattrcache(&outvp, &outnfsva, NULL, 4238 1, 1); 4239 if (error == 0) { 4240 if (consecutive == false) { 4241 if (len2 == len) { 4242 mtx_lock(&nmp->nm_mtx); 4243 nmp->nm_privflag |= 4244 NFSMNTP_NOCONSECUTIVE; 4245 mtx_unlock(&nmp->nm_mtx); 4246 } else 4247 error = NFSERR_OFFLOADNOREQS; 4248 } 4249 *ap->a_lenp = len2; 4250 len = 0; 4251 if (len2 > 0 && must_commit && error == 0) 4252 error = ncl_commit(outvp, outoff, *ap->a_lenp, 4253 ap->a_outcred, curthread); 4254 if (error == 0 && ret != 0) 4255 error = ret; 4256 if (error == 0 && ret2 != 0) 4257 error = ret2; 4258 } else if (error == NFSERR_OFFLOADNOREQS && consecutive) { 4259 /* 4260 * Try consecutive == false, which is ok only if all 4261 * bytes are copied. 4262 * If only some bytes were copied when consecutive 4263 * is false, there is no way to know which bytes 4264 * still need to be written. 4265 */ 4266 consecutive = false; 4267 error = 0; 4268 } else if (error == NFSERR_ACCES && tryoutcred) { 4269 /* Try again with incred. */ 4270 tryoutcred = false; 4271 error = 0; 4272 } else if (tryclone && error != 0) { 4273 if (mustclone) { 4274 error = ENOSYS; 4275 } else { 4276 tryclone = false; 4277 error = 0; 4278 } 4279 } 4280 if (error == NFSERR_STALEWRITEVERF) { 4281 /* 4282 * Server rebooted, so do it all again. 4283 */ 4284 *ap->a_inoffp = inoff; 4285 *ap->a_outoffp = outoff; 4286 len = *ap->a_lenp; 4287 must_commit = false; 4288 error = 0; 4289 } 4290 } 4291 VOP_UNLOCK(invp); 4292 if (!onevp) 4293 VOP_UNLOCK(outvp); /* For onevp, same as invp. */ 4294 if (mp != NULL) 4295 vn_finished_write(mp); 4296 if (error == NFSERR_NOTSUPP || error == NFSERR_OFFLOADNOREQS || 4297 error == NFSERR_ACCES || error == ENOSYS) { 4298 /* 4299 * Unlike the NFSv4.2 Copy, vn_generic_copy_file_range() can 4300 * use a_incred for the read and a_outcred for the write, so 4301 * try this for NFSERR_ACCES failures for the Copy. 4302 * For NFSERR_NOTSUPP and NFSERR_OFFLOADNOREQS, the Copy can 4303 * never succeed, so disable it. 4304 */ 4305 if (error != NFSERR_ACCES && error != ENOSYS) { 4306 /* Can never do Copy on this mount. */ 4307 mtx_lock(&nmp->nm_mtx); 4308 nmp->nm_privflag |= NFSMNTP_NOCOPY; 4309 mtx_unlock(&nmp->nm_mtx); 4310 } 4311 *ap->a_inoffp = inoff; 4312 *ap->a_outoffp = outoff; 4313 error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, 4314 ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, 4315 ap->a_incred, ap->a_outcred, ap->a_fsizetd); 4316 } else if (error != 0) 4317 *ap->a_lenp = 0; 4318 4319 if (error != 0) 4320 error = nfscl_maperr(curthread, error, (uid_t)0, (gid_t)0); 4321 return (error); 4322 } 4323 4324 /* 4325 * nfs ioctl call 4326 */ 4327 static int 4328 nfs_ioctl(struct vop_ioctl_args *ap) 4329 { 4330 struct vnode *vp = ap->a_vp; 4331 struct nfsvattr nfsva; 4332 struct nfsmount *nmp; 4333 int attrflag, content, error, ret; 4334 bool eof = false; /* shut up compiler. */ 4335 4336 /* Do the actual NFSv4.2 RPC. */ 4337 switch (ap->a_command) { 4338 case FIOSEEKDATA: 4339 content = NFSV4CONTENT_DATA; 4340 break; 4341 case FIOSEEKHOLE: 4342 content = NFSV4CONTENT_HOLE; 4343 break; 4344 default: 4345 return (ENOTTY); 4346 } 4347 4348 error = vn_lock(vp, LK_EXCLUSIVE); 4349 if (error != 0) 4350 return (EBADF); 4351 4352 if (vp->v_type != VREG) { 4353 VOP_UNLOCK(vp); 4354 return (ENOTTY); 4355 } 4356 nmp = VFSTONFS(vp->v_mount); 4357 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION) { 4358 VOP_UNLOCK(vp); 4359 error = vop_stdioctl(ap); 4360 return (error); 4361 } 4362 4363 attrflag = 0; 4364 if (*((off_t *)ap->a_data) >= VTONFS(vp)->n_size) 4365 error = ENXIO; 4366 else { 4367 /* 4368 * Flush all writes, so that the server is up to date. 4369 * Although a Commit is not required, the commit argument 4370 * is set so that, for a pNFS File/Flexible File Layout 4371 * server, the LayoutCommit will be done to ensure the file 4372 * size is up to date on the Metadata Server. 4373 */ 4374 4375 vnode_pager_clean_sync(vp); 4376 error = ncl_flush(vp, MNT_WAIT, ap->a_td, 1, 0); 4377 if (error == 0) 4378 error = nfsrpc_seek(vp, (off_t *)ap->a_data, &eof, 4379 content, ap->a_cred, &nfsva, &attrflag); 4380 /* If at eof for FIOSEEKDATA, return ENXIO. */ 4381 if (eof && error == 0 && content == NFSV4CONTENT_DATA) 4382 error = ENXIO; 4383 } 4384 if (attrflag != 0) { 4385 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 4386 if (error == 0 && ret != 0) 4387 error = ret; 4388 } 4389 NFSVOPUNLOCK(vp); 4390 4391 if (error != 0) 4392 error = ENXIO; 4393 return (error); 4394 } 4395 4396 /* 4397 * nfs getextattr call 4398 */ 4399 static int 4400 nfs_getextattr(struct vop_getextattr_args *ap) 4401 { 4402 struct vnode *vp = ap->a_vp; 4403 struct nfsmount *nmp; 4404 struct ucred *cred; 4405 struct thread *td = ap->a_td; 4406 struct nfsvattr nfsva; 4407 ssize_t len; 4408 int attrflag, error, ret; 4409 4410 nmp = VFSTONFS(vp->v_mount); 4411 mtx_lock(&nmp->nm_mtx); 4412 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4413 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4414 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4415 mtx_unlock(&nmp->nm_mtx); 4416 return (EOPNOTSUPP); 4417 } 4418 mtx_unlock(&nmp->nm_mtx); 4419 4420 cred = ap->a_cred; 4421 if (cred == NULL) 4422 cred = td->td_ucred; 4423 /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ 4424 attrflag = 0; 4425 error = nfsrpc_getextattr(vp, ap->a_name, ap->a_uio, &len, &nfsva, 4426 &attrflag, cred, td); 4427 if (attrflag != 0) { 4428 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 4429 if (error == 0 && ret != 0) 4430 error = ret; 4431 } 4432 if (error == 0 && ap->a_size != NULL) 4433 *ap->a_size = len; 4434 4435 switch (error) { 4436 case NFSERR_NOTSUPP: 4437 case NFSERR_OPILLEGAL: 4438 mtx_lock(&nmp->nm_mtx); 4439 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4440 mtx_unlock(&nmp->nm_mtx); 4441 error = EOPNOTSUPP; 4442 break; 4443 case NFSERR_NOXATTR: 4444 case NFSERR_XATTR2BIG: 4445 error = ENOATTR; 4446 break; 4447 default: 4448 error = nfscl_maperr(td, error, 0, 0); 4449 break; 4450 } 4451 return (error); 4452 } 4453 4454 /* 4455 * nfs setextattr call 4456 */ 4457 static int 4458 nfs_setextattr(struct vop_setextattr_args *ap) 4459 { 4460 struct vnode *vp = ap->a_vp; 4461 struct nfsmount *nmp; 4462 struct ucred *cred; 4463 struct thread *td = ap->a_td; 4464 struct nfsvattr nfsva; 4465 int attrflag, error, ret; 4466 4467 nmp = VFSTONFS(vp->v_mount); 4468 mtx_lock(&nmp->nm_mtx); 4469 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4470 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4471 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4472 mtx_unlock(&nmp->nm_mtx); 4473 return (EOPNOTSUPP); 4474 } 4475 mtx_unlock(&nmp->nm_mtx); 4476 4477 if (ap->a_uio->uio_resid < 0) 4478 return (EINVAL); 4479 cred = ap->a_cred; 4480 if (cred == NULL) 4481 cred = td->td_ucred; 4482 /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ 4483 attrflag = 0; 4484 error = nfsrpc_setextattr(vp, ap->a_name, ap->a_uio, &nfsva, 4485 &attrflag, cred, td); 4486 if (attrflag != 0) { 4487 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 4488 if (error == 0 && ret != 0) 4489 error = ret; 4490 } 4491 4492 switch (error) { 4493 case NFSERR_NOTSUPP: 4494 case NFSERR_OPILLEGAL: 4495 mtx_lock(&nmp->nm_mtx); 4496 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4497 mtx_unlock(&nmp->nm_mtx); 4498 error = EOPNOTSUPP; 4499 break; 4500 case NFSERR_NOXATTR: 4501 case NFSERR_XATTR2BIG: 4502 error = ENOATTR; 4503 break; 4504 default: 4505 error = nfscl_maperr(td, error, 0, 0); 4506 break; 4507 } 4508 return (error); 4509 } 4510 4511 /* 4512 * nfs listextattr call 4513 */ 4514 static int 4515 nfs_listextattr(struct vop_listextattr_args *ap) 4516 { 4517 struct vnode *vp = ap->a_vp; 4518 struct nfsmount *nmp; 4519 struct ucred *cred; 4520 struct thread *td = ap->a_td; 4521 struct nfsvattr nfsva; 4522 size_t len, len2; 4523 uint64_t cookie; 4524 int attrflag, error, ret; 4525 bool eof; 4526 4527 nmp = VFSTONFS(vp->v_mount); 4528 mtx_lock(&nmp->nm_mtx); 4529 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4530 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4531 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4532 mtx_unlock(&nmp->nm_mtx); 4533 return (EOPNOTSUPP); 4534 } 4535 mtx_unlock(&nmp->nm_mtx); 4536 4537 cred = ap->a_cred; 4538 if (cred == NULL) 4539 cred = td->td_ucred; 4540 4541 /* Loop around doing List Extended Attribute RPCs. */ 4542 eof = false; 4543 cookie = 0; 4544 len2 = 0; 4545 error = 0; 4546 while (!eof && error == 0) { 4547 len = nmp->nm_rsize; 4548 attrflag = 0; 4549 error = nfsrpc_listextattr(vp, &cookie, ap->a_uio, &len, &eof, 4550 &nfsva, &attrflag, cred, td); 4551 if (attrflag != 0) { 4552 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 4553 if (error == 0 && ret != 0) 4554 error = ret; 4555 } 4556 if (error == 0) { 4557 len2 += len; 4558 if (len2 > SSIZE_MAX) 4559 error = ENOATTR; 4560 } 4561 } 4562 if (error == 0 && ap->a_size != NULL) 4563 *ap->a_size = len2; 4564 4565 switch (error) { 4566 case NFSERR_NOTSUPP: 4567 case NFSERR_OPILLEGAL: 4568 mtx_lock(&nmp->nm_mtx); 4569 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4570 mtx_unlock(&nmp->nm_mtx); 4571 error = EOPNOTSUPP; 4572 break; 4573 case NFSERR_NOXATTR: 4574 case NFSERR_XATTR2BIG: 4575 error = ENOATTR; 4576 break; 4577 default: 4578 error = nfscl_maperr(td, error, 0, 0); 4579 break; 4580 } 4581 return (error); 4582 } 4583 4584 /* 4585 * nfs setextattr call 4586 */ 4587 static int 4588 nfs_deleteextattr(struct vop_deleteextattr_args *ap) 4589 { 4590 struct vnode *vp = ap->a_vp; 4591 struct nfsmount *nmp; 4592 struct nfsvattr nfsva; 4593 int attrflag, error, ret; 4594 4595 nmp = VFSTONFS(vp->v_mount); 4596 mtx_lock(&nmp->nm_mtx); 4597 if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION || 4598 (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 || 4599 ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) { 4600 mtx_unlock(&nmp->nm_mtx); 4601 return (EOPNOTSUPP); 4602 } 4603 mtx_unlock(&nmp->nm_mtx); 4604 4605 /* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */ 4606 attrflag = 0; 4607 error = nfsrpc_rmextattr(vp, ap->a_name, &nfsva, &attrflag, ap->a_cred, 4608 ap->a_td); 4609 if (attrflag != 0) { 4610 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 4611 if (error == 0 && ret != 0) 4612 error = ret; 4613 } 4614 4615 switch (error) { 4616 case NFSERR_NOTSUPP: 4617 case NFSERR_OPILLEGAL: 4618 mtx_lock(&nmp->nm_mtx); 4619 nmp->nm_privflag |= NFSMNTP_NOXATTR; 4620 mtx_unlock(&nmp->nm_mtx); 4621 error = EOPNOTSUPP; 4622 break; 4623 case NFSERR_NOXATTR: 4624 case NFSERR_XATTR2BIG: 4625 error = ENOATTR; 4626 break; 4627 default: 4628 error = nfscl_maperr(ap->a_td, error, 0, 0); 4629 break; 4630 } 4631 return (error); 4632 } 4633 4634 /* 4635 * Return POSIX pathconf information applicable to nfs filesystems. 4636 */ 4637 static int 4638 nfs_pathconf(struct vop_pathconf_args *ap) 4639 { 4640 struct nfsv3_pathconf pc; 4641 struct nfsvattr nfsva; 4642 struct vnode *vp = ap->a_vp; 4643 struct nfsmount *nmp; 4644 struct thread *td = curthread; 4645 off_t off; 4646 uint32_t clone_blksize; 4647 bool eof, has_namedattr, named_enabled; 4648 int attrflag, error; 4649 struct nfsnode *np; 4650 uint32_t trueform; 4651 4652 nmp = VFSTONFS(vp->v_mount); 4653 np = VTONFS(vp); 4654 named_enabled = false; 4655 has_namedattr = false; 4656 clone_blksize = 0; 4657 if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX || 4658 ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED || 4659 ap->a_name == _PC_NO_TRUNC || 4660 ap->a_name == _PC_CASE_INSENSITIVE)) || 4661 (NFS_ISV4(vp) && (ap->a_name == _PC_ACL_NFS4 || 4662 ap->a_name == _PC_HAS_NAMEDATTR || 4663 ap->a_name == _PC_CLONE_BLKSIZE || 4664 ap->a_name == _PC_ACL_EXTENDED))) { 4665 /* 4666 * Since only the above 5 a_names are returned by the NFSv3 4667 * Pathconf RPC, there is no point in doing it for others. 4668 * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can 4669 * be used for _PC_ACL_NFS4, _PC_HAS_NAMEDATTR, 4670 * and _PC_ACL_EXTENDED as well. 4671 */ 4672 trueform = UINT32_MAX; 4673 error = nfsrpc_pathconf(vp, &pc, &has_namedattr, &clone_blksize, 4674 td->td_ucred, td, &nfsva, &attrflag, &trueform); 4675 if (attrflag != 0) 4676 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); 4677 if (error != 0) 4678 return (error); 4679 } else if (NFS_ISV4(vp) && ap->a_name == _PC_NAMEDATTR_ENABLED && 4680 (np->n_flag & NNAMEDNOTSUPP) == 0) { 4681 struct nfsfh *nfhp; 4682 4683 error = nfsrpc_openattr(nmp, vp, np->n_fhp->nfh_fh, 4684 np->n_fhp->nfh_len, false, td->td_ucred, td, &nfsva, &nfhp, 4685 &attrflag); 4686 named_enabled = true; 4687 if (error == 0) { 4688 free(nfhp, M_NFSFH); 4689 } else if (error == NFSERR_NOTSUPP) { 4690 named_enabled = false; 4691 NFSLOCKNODE(np); 4692 np->n_flag |= NNAMEDNOTSUPP; 4693 NFSUNLOCKNODE(np); 4694 } 4695 error = 0; 4696 } else { 4697 /* 4698 * For NFSv2 (or NFSv3 when not one of the above 4 a_names), 4699 * just fake them. 4700 */ 4701 pc.pc_linkmax = NFS_LINK_MAX; 4702 pc.pc_namemax = NFS_MAXNAMLEN; 4703 pc.pc_notrunc = 1; 4704 pc.pc_chownrestricted = 1; 4705 pc.pc_caseinsensitive = 0; 4706 pc.pc_casepreserving = 1; 4707 error = 0; 4708 } 4709 switch (ap->a_name) { 4710 case _PC_LINK_MAX: 4711 #ifdef _LP64 4712 *ap->a_retval = pc.pc_linkmax; 4713 #else 4714 *ap->a_retval = MIN(LONG_MAX, pc.pc_linkmax); 4715 #endif 4716 break; 4717 case _PC_NAME_MAX: 4718 *ap->a_retval = pc.pc_namemax; 4719 break; 4720 case _PC_PIPE_BUF: 4721 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) 4722 *ap->a_retval = PIPE_BUF; 4723 else 4724 error = EINVAL; 4725 break; 4726 case _PC_CHOWN_RESTRICTED: 4727 *ap->a_retval = pc.pc_chownrestricted; 4728 break; 4729 case _PC_NO_TRUNC: 4730 *ap->a_retval = pc.pc_notrunc; 4731 break; 4732 case _PC_ACL_NFS4: 4733 if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 && 4734 NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL) && 4735 (trueform == NFSV4_ACL_MODEL_NFS4 || 4736 trueform == UINT32_MAX)) 4737 *ap->a_retval = 1; 4738 else 4739 *ap->a_retval = 0; 4740 break; 4741 case _PC_ACL_EXTENDED: 4742 if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 && 4743 NFSISSET_ATTRBIT(&nfsva.na_suppattr, 4744 NFSATTRBIT_POSIXACCESSACL) && 4745 NFSISSET_ATTRBIT(&nfsva.na_suppattr, 4746 NFSATTRBIT_POSIXDEFAULTACL) && 4747 trueform == NFSV4_ACL_MODEL_POSIX_DRAFT) 4748 *ap->a_retval = 1; 4749 else 4750 *ap->a_retval = 0; 4751 break; 4752 case _PC_ACL_PATH_MAX: 4753 if (NFS_ISV4(vp)) 4754 *ap->a_retval = ACL_MAX_ENTRIES; 4755 else 4756 *ap->a_retval = 3; 4757 break; 4758 case _PC_PRIO_IO: 4759 *ap->a_retval = 0; 4760 break; 4761 case _PC_SYNC_IO: 4762 *ap->a_retval = 0; 4763 break; 4764 case _PC_ALLOC_SIZE_MIN: 4765 *ap->a_retval = vp->v_mount->mnt_stat.f_bsize; 4766 break; 4767 case _PC_FILESIZEBITS: 4768 if (NFS_ISV34(vp)) 4769 *ap->a_retval = 64; 4770 else 4771 *ap->a_retval = 32; 4772 break; 4773 case _PC_REC_INCR_XFER_SIZE: 4774 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 4775 break; 4776 case _PC_REC_MAX_XFER_SIZE: 4777 *ap->a_retval = -1; /* means ``unlimited'' */ 4778 break; 4779 case _PC_REC_MIN_XFER_SIZE: 4780 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 4781 break; 4782 case _PC_REC_XFER_ALIGN: 4783 *ap->a_retval = PAGE_SIZE; 4784 break; 4785 case _PC_SYMLINK_MAX: 4786 *ap->a_retval = NFS_MAXPATHLEN; 4787 break; 4788 case _PC_MIN_HOLE_SIZE: 4789 /* Only some NFSv4.2 servers support Seek for Holes. */ 4790 *ap->a_retval = 0; 4791 if (NFS_ISV4(vp) && nmp->nm_minorvers == NFSV42_MINORVERSION) { 4792 /* 4793 * NFSv4.2 doesn't have an attribute for hole size, 4794 * so all we can do is see if the Seek operation is 4795 * supported and then use f_iosize as a "best guess". 4796 */ 4797 mtx_lock(&nmp->nm_mtx); 4798 if ((nmp->nm_privflag & NFSMNTP_SEEKTESTED) == 0) { 4799 mtx_unlock(&nmp->nm_mtx); 4800 off = 0; 4801 attrflag = 0; 4802 error = nfsrpc_seek(vp, &off, &eof, 4803 NFSV4CONTENT_HOLE, td->td_ucred, &nfsva, 4804 &attrflag); 4805 if (attrflag != 0) 4806 (void) nfscl_loadattrcache(&vp, &nfsva, 4807 NULL, 0, 1); 4808 mtx_lock(&nmp->nm_mtx); 4809 if (error == NFSERR_NOTSUPP) 4810 nmp->nm_privflag |= NFSMNTP_SEEKTESTED; 4811 else 4812 nmp->nm_privflag |= NFSMNTP_SEEKTESTED | 4813 NFSMNTP_SEEK; 4814 error = 0; 4815 } 4816 if ((nmp->nm_privflag & NFSMNTP_SEEK) != 0) 4817 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 4818 mtx_unlock(&nmp->nm_mtx); 4819 } 4820 break; 4821 case _PC_NAMEDATTR_ENABLED: 4822 if (named_enabled) 4823 *ap->a_retval = 1; 4824 else 4825 *ap->a_retval = 0; 4826 break; 4827 case _PC_HAS_NAMEDATTR: 4828 if (has_namedattr) 4829 *ap->a_retval = 1; 4830 else 4831 *ap->a_retval = 0; 4832 break; 4833 case _PC_HAS_HIDDENSYSTEM: 4834 if (NFS_ISV4(vp) && NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, 4835 NFSATTRBIT_ARCHIVE) && 4836 NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, 4837 NFSATTRBIT_HIDDEN) && 4838 NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, 4839 NFSATTRBIT_SYSTEM)) 4840 *ap->a_retval = 1; 4841 else 4842 *ap->a_retval = 0; 4843 break; 4844 case _PC_CLONE_BLKSIZE: 4845 *ap->a_retval = clone_blksize; 4846 break; 4847 case _PC_CASE_INSENSITIVE: 4848 *ap->a_retval = pc.pc_caseinsensitive; 4849 break; 4850 4851 default: 4852 error = vop_stdpathconf(ap); 4853 break; 4854 } 4855 return (error); 4856 } 4857