1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from nfs_vnops.c 8.16 (Berkeley) 5/27/95 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 /* 41 * vnode op calls for Sun NFS version 2, 3 and 4 42 */ 43 44 #include "opt_inet.h" 45 46 #include <sys/param.h> 47 #include <sys/kernel.h> 48 #include <sys/systm.h> 49 #include <sys/resourcevar.h> 50 #include <sys/proc.h> 51 #include <sys/mount.h> 52 #include <sys/bio.h> 53 #include <sys/buf.h> 54 #include <sys/jail.h> 55 #include <sys/malloc.h> 56 #include <sys/mbuf.h> 57 #include <sys/namei.h> 58 #include <sys/socket.h> 59 #include <sys/vnode.h> 60 #include <sys/dirent.h> 61 #include <sys/fcntl.h> 62 #include <sys/lockf.h> 63 #include <sys/stat.h> 64 #include <sys/sysctl.h> 65 #include <sys/signalvar.h> 66 67 #include <vm/vm.h> 68 #include <vm/vm_extern.h> 69 #include <vm/vm_object.h> 70 71 #include <fs/nfs/nfsport.h> 72 #include <fs/nfsclient/nfsnode.h> 73 #include <fs/nfsclient/nfsmount.h> 74 #include <fs/nfsclient/nfs.h> 75 #include <fs/nfsclient/nfs_kdtrace.h> 76 77 #include <net/if.h> 78 #include <netinet/in.h> 79 #include <netinet/in_var.h> 80 81 #include <nfs/nfs_lock.h> 82 83 #ifdef KDTRACE_HOOKS 84 #include <sys/dtrace_bsd.h> 85 86 dtrace_nfsclient_accesscache_flush_probe_func_t 87 dtrace_nfscl_accesscache_flush_done_probe; 88 uint32_t nfscl_accesscache_flush_done_id; 89 90 dtrace_nfsclient_accesscache_get_probe_func_t 91 dtrace_nfscl_accesscache_get_hit_probe, 92 dtrace_nfscl_accesscache_get_miss_probe; 93 uint32_t nfscl_accesscache_get_hit_id; 94 uint32_t nfscl_accesscache_get_miss_id; 95 96 dtrace_nfsclient_accesscache_load_probe_func_t 97 dtrace_nfscl_accesscache_load_done_probe; 98 uint32_t nfscl_accesscache_load_done_id; 99 #endif /* !KDTRACE_HOOKS */ 100 101 /* Defs */ 102 #define TRUE 1 103 #define FALSE 0 104 105 extern struct nfsstatsv1 nfsstatsv1; 106 extern int nfsrv_useacl; 107 extern int nfscl_debuglevel; 108 MALLOC_DECLARE(M_NEWNFSREQ); 109 110 static vop_read_t nfsfifo_read; 111 static vop_write_t nfsfifo_write; 112 static vop_close_t nfsfifo_close; 113 static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *, 114 struct thread *); 115 static vop_lookup_t nfs_lookup; 116 static vop_create_t nfs_create; 117 static vop_mknod_t nfs_mknod; 118 static vop_open_t nfs_open; 119 static vop_pathconf_t nfs_pathconf; 120 static vop_close_t nfs_close; 121 static vop_access_t nfs_access; 122 static vop_getattr_t nfs_getattr; 123 static vop_setattr_t nfs_setattr; 124 static vop_read_t nfs_read; 125 static vop_fsync_t nfs_fsync; 126 static vop_remove_t nfs_remove; 127 static vop_link_t nfs_link; 128 static vop_rename_t nfs_rename; 129 static vop_mkdir_t nfs_mkdir; 130 static vop_rmdir_t nfs_rmdir; 131 static vop_symlink_t nfs_symlink; 132 static vop_readdir_t nfs_readdir; 133 static vop_strategy_t nfs_strategy; 134 static int nfs_lookitup(struct vnode *, char *, int, 135 struct ucred *, struct thread *, struct nfsnode **); 136 static int nfs_sillyrename(struct vnode *, struct vnode *, 137 struct componentname *); 138 static vop_access_t nfsspec_access; 139 static vop_readlink_t nfs_readlink; 140 static vop_print_t nfs_print; 141 static vop_advlock_t nfs_advlock; 142 static vop_advlockasync_t nfs_advlockasync; 143 static vop_getacl_t nfs_getacl; 144 static vop_setacl_t nfs_setacl; 145 static vop_set_text_t nfs_set_text; 146 147 /* 148 * Global vfs data structures for nfs 149 */ 150 struct vop_vector newnfs_vnodeops = { 151 .vop_default = &default_vnodeops, 152 .vop_access = nfs_access, 153 .vop_advlock = nfs_advlock, 154 .vop_advlockasync = nfs_advlockasync, 155 .vop_close = nfs_close, 156 .vop_create = nfs_create, 157 .vop_fsync = nfs_fsync, 158 .vop_getattr = nfs_getattr, 159 .vop_getpages = ncl_getpages, 160 .vop_putpages = ncl_putpages, 161 .vop_inactive = ncl_inactive, 162 .vop_link = nfs_link, 163 .vop_lookup = nfs_lookup, 164 .vop_mkdir = nfs_mkdir, 165 .vop_mknod = nfs_mknod, 166 .vop_open = nfs_open, 167 .vop_pathconf = nfs_pathconf, 168 .vop_print = nfs_print, 169 .vop_read = nfs_read, 170 .vop_readdir = nfs_readdir, 171 .vop_readlink = nfs_readlink, 172 .vop_reclaim = ncl_reclaim, 173 .vop_remove = nfs_remove, 174 .vop_rename = nfs_rename, 175 .vop_rmdir = nfs_rmdir, 176 .vop_setattr = nfs_setattr, 177 .vop_strategy = nfs_strategy, 178 .vop_symlink = nfs_symlink, 179 .vop_write = ncl_write, 180 .vop_getacl = nfs_getacl, 181 .vop_setacl = nfs_setacl, 182 .vop_set_text = nfs_set_text, 183 }; 184 185 struct vop_vector newnfs_fifoops = { 186 .vop_default = &fifo_specops, 187 .vop_access = nfsspec_access, 188 .vop_close = nfsfifo_close, 189 .vop_fsync = nfs_fsync, 190 .vop_getattr = nfs_getattr, 191 .vop_inactive = ncl_inactive, 192 .vop_print = nfs_print, 193 .vop_read = nfsfifo_read, 194 .vop_reclaim = ncl_reclaim, 195 .vop_setattr = nfs_setattr, 196 .vop_write = nfsfifo_write, 197 }; 198 199 static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, 200 struct componentname *cnp, struct vattr *vap); 201 static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 202 int namelen, struct ucred *cred, struct thread *td); 203 static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, 204 char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp, 205 char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td); 206 static int nfs_renameit(struct vnode *sdvp, struct vnode *svp, 207 struct componentname *scnp, struct sillyrename *sp); 208 209 /* 210 * Global variables 211 */ 212 SYSCTL_DECL(_vfs_nfs); 213 214 static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; 215 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 216 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); 217 218 static int nfs_prime_access_cache = 0; 219 SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW, 220 &nfs_prime_access_cache, 0, 221 "Prime NFS ACCESS cache when fetching attributes"); 222 223 static int newnfs_commit_on_close = 0; 224 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_on_close, CTLFLAG_RW, 225 &newnfs_commit_on_close, 0, "write+commit on close, else only write"); 226 227 static int nfs_clean_pages_on_close = 1; 228 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, 229 &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); 230 231 int newnfs_directio_enable = 0; 232 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, 233 &newnfs_directio_enable, 0, "Enable NFS directio"); 234 235 int nfs_keep_dirty_on_error; 236 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_keep_dirty_on_error, CTLFLAG_RW, 237 &nfs_keep_dirty_on_error, 0, "Retry pageout if error returned"); 238 239 /* 240 * This sysctl allows other processes to mmap a file that has been opened 241 * O_DIRECT by a process. In general, having processes mmap the file while 242 * Direct IO is in progress can lead to Data Inconsistencies. But, we allow 243 * this by default to prevent DoS attacks - to prevent a malicious user from 244 * opening up files O_DIRECT preventing other users from mmap'ing these 245 * files. "Protected" environments where stricter consistency guarantees are 246 * required can disable this knob. The process that opened the file O_DIRECT 247 * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not 248 * meaningful. 249 */ 250 int newnfs_directio_allow_mmap = 1; 251 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, 252 &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); 253 254 #define NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY \ 255 | NFSACCESS_EXTEND | NFSACCESS_EXECUTE \ 256 | NFSACCESS_DELETE | NFSACCESS_LOOKUP) 257 258 /* 259 * SMP Locking Note : 260 * The list of locks after the description of the lock is the ordering 261 * of other locks acquired with the lock held. 262 * np->n_mtx : Protects the fields in the nfsnode. 263 VM Object Lock 264 VI_MTX (acquired indirectly) 265 * nmp->nm_mtx : Protects the fields in the nfsmount. 266 rep->r_mtx 267 * ncl_iod_mutex : Global lock, protects shared nfsiod state. 268 * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. 269 nmp->nm_mtx 270 rep->r_mtx 271 * rep->r_mtx : Protects the fields in an nfsreq. 272 */ 273 274 static int 275 nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td, 276 struct ucred *cred, u_int32_t *retmode) 277 { 278 int error = 0, attrflag, i, lrupos; 279 u_int32_t rmode; 280 struct nfsnode *np = VTONFS(vp); 281 struct nfsvattr nfsva; 282 283 error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag, 284 &rmode, NULL); 285 if (attrflag) 286 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 287 if (!error) { 288 lrupos = 0; 289 mtx_lock(&np->n_mtx); 290 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 291 if (np->n_accesscache[i].uid == cred->cr_uid) { 292 np->n_accesscache[i].mode = rmode; 293 np->n_accesscache[i].stamp = time_second; 294 break; 295 } 296 if (i > 0 && np->n_accesscache[i].stamp < 297 np->n_accesscache[lrupos].stamp) 298 lrupos = i; 299 } 300 if (i == NFS_ACCESSCACHESIZE) { 301 np->n_accesscache[lrupos].uid = cred->cr_uid; 302 np->n_accesscache[lrupos].mode = rmode; 303 np->n_accesscache[lrupos].stamp = time_second; 304 } 305 mtx_unlock(&np->n_mtx); 306 if (retmode != NULL) 307 *retmode = rmode; 308 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0); 309 } else if (NFS_ISV4(vp)) { 310 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 311 } 312 #ifdef KDTRACE_HOOKS 313 if (error != 0) 314 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0, 315 error); 316 #endif 317 return (error); 318 } 319 320 /* 321 * nfs access vnode op. 322 * For nfs version 2, just return ok. File accesses may fail later. 323 * For nfs version 3, use the access rpc to check accessibility. If file modes 324 * are changed on the server, accesses might still fail later. 325 */ 326 static int 327 nfs_access(struct vop_access_args *ap) 328 { 329 struct vnode *vp = ap->a_vp; 330 int error = 0, i, gotahit; 331 u_int32_t mode, wmode, rmode; 332 int v34 = NFS_ISV34(vp); 333 struct nfsnode *np = VTONFS(vp); 334 335 /* 336 * Disallow write attempts on filesystems mounted read-only; 337 * unless the file is a socket, fifo, or a block or character 338 * device resident on the filesystem. 339 */ 340 if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS | 341 VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL | 342 VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) { 343 switch (vp->v_type) { 344 case VREG: 345 case VDIR: 346 case VLNK: 347 return (EROFS); 348 default: 349 break; 350 } 351 } 352 /* 353 * For nfs v3 or v4, check to see if we have done this recently, and if 354 * so return our cached result instead of making an ACCESS call. 355 * If not, do an access rpc, otherwise you are stuck emulating 356 * ufs_access() locally using the vattr. This may not be correct, 357 * since the server may apply other access criteria such as 358 * client uid-->server uid mapping that we do not know about. 359 */ 360 if (v34) { 361 if (ap->a_accmode & VREAD) 362 mode = NFSACCESS_READ; 363 else 364 mode = 0; 365 if (vp->v_type != VDIR) { 366 if (ap->a_accmode & VWRITE) 367 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 368 if (ap->a_accmode & VAPPEND) 369 mode |= NFSACCESS_EXTEND; 370 if (ap->a_accmode & VEXEC) 371 mode |= NFSACCESS_EXECUTE; 372 if (ap->a_accmode & VDELETE) 373 mode |= NFSACCESS_DELETE; 374 } else { 375 if (ap->a_accmode & VWRITE) 376 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 377 if (ap->a_accmode & VAPPEND) 378 mode |= NFSACCESS_EXTEND; 379 if (ap->a_accmode & VEXEC) 380 mode |= NFSACCESS_LOOKUP; 381 if (ap->a_accmode & VDELETE) 382 mode |= NFSACCESS_DELETE; 383 if (ap->a_accmode & VDELETE_CHILD) 384 mode |= NFSACCESS_MODIFY; 385 } 386 /* XXX safety belt, only make blanket request if caching */ 387 if (nfsaccess_cache_timeout > 0) { 388 wmode = NFSACCESS_READ | NFSACCESS_MODIFY | 389 NFSACCESS_EXTEND | NFSACCESS_EXECUTE | 390 NFSACCESS_DELETE | NFSACCESS_LOOKUP; 391 } else { 392 wmode = mode; 393 } 394 395 /* 396 * Does our cached result allow us to give a definite yes to 397 * this request? 398 */ 399 gotahit = 0; 400 mtx_lock(&np->n_mtx); 401 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 402 if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) { 403 if (time_second < (np->n_accesscache[i].stamp 404 + nfsaccess_cache_timeout) && 405 (np->n_accesscache[i].mode & mode) == mode) { 406 NFSINCRGLOBAL(nfsstatsv1.accesscache_hits); 407 gotahit = 1; 408 } 409 break; 410 } 411 } 412 mtx_unlock(&np->n_mtx); 413 #ifdef KDTRACE_HOOKS 414 if (gotahit != 0) 415 KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, 416 ap->a_cred->cr_uid, mode); 417 else 418 KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, 419 ap->a_cred->cr_uid, mode); 420 #endif 421 if (gotahit == 0) { 422 /* 423 * Either a no, or a don't know. Go to the wire. 424 */ 425 NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); 426 error = nfs34_access_otw(vp, wmode, ap->a_td, 427 ap->a_cred, &rmode); 428 if (!error && 429 (rmode & mode) != mode) 430 error = EACCES; 431 } 432 return (error); 433 } else { 434 if ((error = nfsspec_access(ap)) != 0) { 435 return (error); 436 } 437 /* 438 * Attempt to prevent a mapped root from accessing a file 439 * which it shouldn't. We try to read a byte from the file 440 * if the user is root and the file is not zero length. 441 * After calling nfsspec_access, we should have the correct 442 * file size cached. 443 */ 444 mtx_lock(&np->n_mtx); 445 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) 446 && VTONFS(vp)->n_size > 0) { 447 struct iovec aiov; 448 struct uio auio; 449 char buf[1]; 450 451 mtx_unlock(&np->n_mtx); 452 aiov.iov_base = buf; 453 aiov.iov_len = 1; 454 auio.uio_iov = &aiov; 455 auio.uio_iovcnt = 1; 456 auio.uio_offset = 0; 457 auio.uio_resid = 1; 458 auio.uio_segflg = UIO_SYSSPACE; 459 auio.uio_rw = UIO_READ; 460 auio.uio_td = ap->a_td; 461 462 if (vp->v_type == VREG) 463 error = ncl_readrpc(vp, &auio, ap->a_cred); 464 else if (vp->v_type == VDIR) { 465 char* bp; 466 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); 467 aiov.iov_base = bp; 468 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; 469 error = ncl_readdirrpc(vp, &auio, ap->a_cred, 470 ap->a_td); 471 free(bp, M_TEMP); 472 } else if (vp->v_type == VLNK) 473 error = ncl_readlinkrpc(vp, &auio, ap->a_cred); 474 else 475 error = EACCES; 476 } else 477 mtx_unlock(&np->n_mtx); 478 return (error); 479 } 480 } 481 482 483 /* 484 * nfs open vnode op 485 * Check to see if the type is ok 486 * and that deletion is not in progress. 487 * For paged in text files, you will need to flush the page cache 488 * if consistency is lost. 489 */ 490 /* ARGSUSED */ 491 static int 492 nfs_open(struct vop_open_args *ap) 493 { 494 struct vnode *vp = ap->a_vp; 495 struct nfsnode *np = VTONFS(vp); 496 struct vattr vattr; 497 int error; 498 int fmode = ap->a_mode; 499 struct ucred *cred; 500 501 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) 502 return (EOPNOTSUPP); 503 504 /* 505 * For NFSv4, we need to do the Open Op before cache validation, 506 * so that we conform to RFC3530 Sec. 9.3.1. 507 */ 508 if (NFS_ISV4(vp)) { 509 error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td); 510 if (error) { 511 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 512 (gid_t)0); 513 return (error); 514 } 515 } 516 517 /* 518 * Now, if this Open will be doing reading, re-validate/flush the 519 * cache, so that Close/Open coherency is maintained. 520 */ 521 mtx_lock(&np->n_mtx); 522 if (np->n_flag & NMODIFIED) { 523 mtx_unlock(&np->n_mtx); 524 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 525 if (error == EINTR || error == EIO) { 526 if (NFS_ISV4(vp)) 527 (void) nfsrpc_close(vp, 0, ap->a_td); 528 return (error); 529 } 530 mtx_lock(&np->n_mtx); 531 np->n_attrstamp = 0; 532 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 533 if (vp->v_type == VDIR) 534 np->n_direofoffset = 0; 535 mtx_unlock(&np->n_mtx); 536 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 537 if (error) { 538 if (NFS_ISV4(vp)) 539 (void) nfsrpc_close(vp, 0, ap->a_td); 540 return (error); 541 } 542 mtx_lock(&np->n_mtx); 543 np->n_mtime = vattr.va_mtime; 544 if (NFS_ISV4(vp)) 545 np->n_change = vattr.va_filerev; 546 } else { 547 mtx_unlock(&np->n_mtx); 548 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 549 if (error) { 550 if (NFS_ISV4(vp)) 551 (void) nfsrpc_close(vp, 0, ap->a_td); 552 return (error); 553 } 554 mtx_lock(&np->n_mtx); 555 if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) || 556 NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 557 if (vp->v_type == VDIR) 558 np->n_direofoffset = 0; 559 mtx_unlock(&np->n_mtx); 560 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 561 if (error == EINTR || error == EIO) { 562 if (NFS_ISV4(vp)) 563 (void) nfsrpc_close(vp, 0, ap->a_td); 564 return (error); 565 } 566 mtx_lock(&np->n_mtx); 567 np->n_mtime = vattr.va_mtime; 568 if (NFS_ISV4(vp)) 569 np->n_change = vattr.va_filerev; 570 } 571 } 572 573 /* 574 * If the object has >= 1 O_DIRECT active opens, we disable caching. 575 */ 576 if (newnfs_directio_enable && (fmode & O_DIRECT) && 577 (vp->v_type == VREG)) { 578 if (np->n_directio_opens == 0) { 579 mtx_unlock(&np->n_mtx); 580 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 581 if (error) { 582 if (NFS_ISV4(vp)) 583 (void) nfsrpc_close(vp, 0, ap->a_td); 584 return (error); 585 } 586 mtx_lock(&np->n_mtx); 587 np->n_flag |= NNONCACHE; 588 } 589 np->n_directio_opens++; 590 } 591 592 /* If opened for writing via NFSv4.1 or later, mark that for pNFS. */ 593 if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0) 594 np->n_flag |= NWRITEOPENED; 595 596 /* 597 * If this is an open for writing, capture a reference to the 598 * credentials, so they can be used by ncl_putpages(). Using 599 * these write credentials is preferable to the credentials of 600 * whatever thread happens to be doing the VOP_PUTPAGES() since 601 * the write RPCs are less likely to fail with EACCES. 602 */ 603 if ((fmode & FWRITE) != 0) { 604 cred = np->n_writecred; 605 np->n_writecred = crhold(ap->a_cred); 606 } else 607 cred = NULL; 608 mtx_unlock(&np->n_mtx); 609 610 if (cred != NULL) 611 crfree(cred); 612 vnode_create_vobject(vp, vattr.va_size, ap->a_td); 613 return (0); 614 } 615 616 /* 617 * nfs close vnode op 618 * What an NFS client should do upon close after writing is a debatable issue. 619 * Most NFS clients push delayed writes to the server upon close, basically for 620 * two reasons: 621 * 1 - So that any write errors may be reported back to the client process 622 * doing the close system call. By far the two most likely errors are 623 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. 624 * 2 - To put a worst case upper bound on cache inconsistency between 625 * multiple clients for the file. 626 * There is also a consistency problem for Version 2 of the protocol w.r.t. 627 * not being able to tell if other clients are writing a file concurrently, 628 * since there is no way of knowing if the changed modify time in the reply 629 * is only due to the write for this client. 630 * (NFS Version 3 provides weak cache consistency data in the reply that 631 * should be sufficient to detect and handle this case.) 632 * 633 * The current code does the following: 634 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers 635 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate 636 * or commit them (this satisfies 1 and 2 except for the 637 * case where the server crashes after this close but 638 * before the commit RPC, which is felt to be "good 639 * enough". Changing the last argument to ncl_flush() to 640 * a 1 would force a commit operation, if it is felt a 641 * commit is necessary now. 642 * for NFS Version 4 - flush the dirty buffers and commit them, if 643 * nfscl_mustflush() says this is necessary. 644 * It is necessary if there is no write delegation held, 645 * in order to satisfy open/close coherency. 646 * If the file isn't cached on local stable storage, 647 * it may be necessary in order to detect "out of space" 648 * errors from the server, if the write delegation 649 * issued by the server doesn't allow the file to grow. 650 */ 651 /* ARGSUSED */ 652 static int 653 nfs_close(struct vop_close_args *ap) 654 { 655 struct vnode *vp = ap->a_vp; 656 struct nfsnode *np = VTONFS(vp); 657 struct nfsvattr nfsva; 658 struct ucred *cred; 659 int error = 0, ret, localcred = 0; 660 int fmode = ap->a_fflag; 661 662 if (NFSCL_FORCEDISM(vp->v_mount)) 663 return (0); 664 /* 665 * During shutdown, a_cred isn't valid, so just use root. 666 */ 667 if (ap->a_cred == NOCRED) { 668 cred = newnfs_getcred(); 669 localcred = 1; 670 } else { 671 cred = ap->a_cred; 672 } 673 if (vp->v_type == VREG) { 674 /* 675 * Examine and clean dirty pages, regardless of NMODIFIED. 676 * This closes a major hole in close-to-open consistency. 677 * We want to push out all dirty pages (and buffers) on 678 * close, regardless of whether they were dirtied by 679 * mmap'ed writes or via write(). 680 */ 681 if (nfs_clean_pages_on_close && vp->v_object) { 682 VM_OBJECT_WLOCK(vp->v_object); 683 vm_object_page_clean(vp->v_object, 0, 0, 0); 684 VM_OBJECT_WUNLOCK(vp->v_object); 685 } 686 mtx_lock(&np->n_mtx); 687 if (np->n_flag & NMODIFIED) { 688 mtx_unlock(&np->n_mtx); 689 if (NFS_ISV3(vp)) { 690 /* 691 * Under NFSv3 we have dirty buffers to dispose of. We 692 * must flush them to the NFS server. We have the option 693 * of waiting all the way through the commit rpc or just 694 * waiting for the initial write. The default is to only 695 * wait through the initial write so the data is in the 696 * server's cache, which is roughly similar to the state 697 * a standard disk subsystem leaves the file in on close(). 698 * 699 * We cannot clear the NMODIFIED bit in np->n_flag due to 700 * potential races with other processes, and certainly 701 * cannot clear it if we don't commit. 702 * These races occur when there is no longer the old 703 * traditional vnode locking implemented for Vnode Ops. 704 */ 705 int cm = newnfs_commit_on_close ? 1 : 0; 706 error = ncl_flush(vp, MNT_WAIT, ap->a_td, cm, 0); 707 /* np->n_flag &= ~NMODIFIED; */ 708 } else if (NFS_ISV4(vp)) { 709 if (nfscl_mustflush(vp) != 0) { 710 int cm = newnfs_commit_on_close ? 1 : 0; 711 error = ncl_flush(vp, MNT_WAIT, ap->a_td, 712 cm, 0); 713 /* 714 * as above w.r.t races when clearing 715 * NMODIFIED. 716 * np->n_flag &= ~NMODIFIED; 717 */ 718 } 719 } else { 720 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 721 } 722 mtx_lock(&np->n_mtx); 723 } 724 /* 725 * Invalidate the attribute cache in all cases. 726 * An open is going to fetch fresh attrs any way, other procs 727 * on this node that have file open will be forced to do an 728 * otw attr fetch, but this is safe. 729 * --> A user found that their RPC count dropped by 20% when 730 * this was commented out and I can't see any requirement 731 * for it, so I've disabled it when negative lookups are 732 * enabled. (What does this have to do with negative lookup 733 * caching? Well nothing, except it was reported by the 734 * same user that needed negative lookup caching and I wanted 735 * there to be a way to disable it to see if it 736 * is the cause of some caching/coherency issue that might 737 * crop up.) 738 */ 739 if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) { 740 np->n_attrstamp = 0; 741 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 742 } 743 if (np->n_flag & NWRITEERR) { 744 np->n_flag &= ~NWRITEERR; 745 error = np->n_error; 746 } 747 mtx_unlock(&np->n_mtx); 748 } 749 750 if (NFS_ISV4(vp)) { 751 /* 752 * Get attributes so "change" is up to date. 753 */ 754 if (error == 0 && nfscl_mustflush(vp) != 0 && 755 vp->v_type == VREG && 756 (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) == 0) { 757 ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva, 758 NULL); 759 if (!ret) { 760 np->n_change = nfsva.na_filerev; 761 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 762 NULL, 0, 0); 763 } 764 } 765 766 /* 767 * and do the close. 768 */ 769 ret = nfsrpc_close(vp, 0, ap->a_td); 770 if (!error && ret) 771 error = ret; 772 if (error) 773 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 774 (gid_t)0); 775 } 776 if (newnfs_directio_enable) 777 KASSERT((np->n_directio_asyncwr == 0), 778 ("nfs_close: dirty unflushed (%d) directio buffers\n", 779 np->n_directio_asyncwr)); 780 if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 781 mtx_lock(&np->n_mtx); 782 KASSERT((np->n_directio_opens > 0), 783 ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); 784 np->n_directio_opens--; 785 if (np->n_directio_opens == 0) 786 np->n_flag &= ~NNONCACHE; 787 mtx_unlock(&np->n_mtx); 788 } 789 if (localcred) 790 NFSFREECRED(cred); 791 return (error); 792 } 793 794 /* 795 * nfs getattr call from vfs. 796 */ 797 static int 798 nfs_getattr(struct vop_getattr_args *ap) 799 { 800 struct vnode *vp = ap->a_vp; 801 struct thread *td = curthread; /* XXX */ 802 struct nfsnode *np = VTONFS(vp); 803 int error = 0; 804 struct nfsvattr nfsva; 805 struct vattr *vap = ap->a_vap; 806 struct vattr vattr; 807 808 /* 809 * Update local times for special files. 810 */ 811 mtx_lock(&np->n_mtx); 812 if (np->n_flag & (NACC | NUPD)) 813 np->n_flag |= NCHG; 814 mtx_unlock(&np->n_mtx); 815 /* 816 * First look in the cache. 817 */ 818 if (ncl_getattrcache(vp, &vattr) == 0) { 819 vap->va_type = vattr.va_type; 820 vap->va_mode = vattr.va_mode; 821 vap->va_nlink = vattr.va_nlink; 822 vap->va_uid = vattr.va_uid; 823 vap->va_gid = vattr.va_gid; 824 vap->va_fsid = vattr.va_fsid; 825 vap->va_fileid = vattr.va_fileid; 826 vap->va_size = vattr.va_size; 827 vap->va_blocksize = vattr.va_blocksize; 828 vap->va_atime = vattr.va_atime; 829 vap->va_mtime = vattr.va_mtime; 830 vap->va_ctime = vattr.va_ctime; 831 vap->va_gen = vattr.va_gen; 832 vap->va_flags = vattr.va_flags; 833 vap->va_rdev = vattr.va_rdev; 834 vap->va_bytes = vattr.va_bytes; 835 vap->va_filerev = vattr.va_filerev; 836 /* 837 * Get the local modify time for the case of a write 838 * delegation. 839 */ 840 nfscl_deleggetmodtime(vp, &vap->va_mtime); 841 return (0); 842 } 843 844 if (NFS_ISV34(vp) && nfs_prime_access_cache && 845 nfsaccess_cache_timeout > 0) { 846 NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); 847 nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL); 848 if (ncl_getattrcache(vp, ap->a_vap) == 0) { 849 nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime); 850 return (0); 851 } 852 } 853 error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva, NULL); 854 if (!error) 855 error = nfscl_loadattrcache(&vp, &nfsva, vap, NULL, 0, 0); 856 if (!error) { 857 /* 858 * Get the local modify time for the case of a write 859 * delegation. 860 */ 861 nfscl_deleggetmodtime(vp, &vap->va_mtime); 862 } else if (NFS_ISV4(vp)) { 863 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 864 } 865 return (error); 866 } 867 868 /* 869 * nfs setattr call. 870 */ 871 static int 872 nfs_setattr(struct vop_setattr_args *ap) 873 { 874 struct vnode *vp = ap->a_vp; 875 struct nfsnode *np = VTONFS(vp); 876 struct thread *td = curthread; /* XXX */ 877 struct vattr *vap = ap->a_vap; 878 int error = 0; 879 u_quad_t tsize; 880 881 #ifndef nolint 882 tsize = (u_quad_t)0; 883 #endif 884 885 /* 886 * Setting of flags and marking of atimes are not supported. 887 */ 888 if (vap->va_flags != VNOVAL) 889 return (EOPNOTSUPP); 890 891 /* 892 * Disallow write attempts if the filesystem is mounted read-only. 893 */ 894 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 895 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 896 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && 897 (vp->v_mount->mnt_flag & MNT_RDONLY)) 898 return (EROFS); 899 if (vap->va_size != VNOVAL) { 900 switch (vp->v_type) { 901 case VDIR: 902 return (EISDIR); 903 case VCHR: 904 case VBLK: 905 case VSOCK: 906 case VFIFO: 907 if (vap->va_mtime.tv_sec == VNOVAL && 908 vap->va_atime.tv_sec == VNOVAL && 909 vap->va_mode == (mode_t)VNOVAL && 910 vap->va_uid == (uid_t)VNOVAL && 911 vap->va_gid == (gid_t)VNOVAL) 912 return (0); 913 vap->va_size = VNOVAL; 914 break; 915 default: 916 /* 917 * Disallow write attempts if the filesystem is 918 * mounted read-only. 919 */ 920 if (vp->v_mount->mnt_flag & MNT_RDONLY) 921 return (EROFS); 922 /* 923 * We run vnode_pager_setsize() early (why?), 924 * we must set np->n_size now to avoid vinvalbuf 925 * V_SAVE races that might setsize a lower 926 * value. 927 */ 928 mtx_lock(&np->n_mtx); 929 tsize = np->n_size; 930 mtx_unlock(&np->n_mtx); 931 error = ncl_meta_setsize(vp, ap->a_cred, td, 932 vap->va_size); 933 mtx_lock(&np->n_mtx); 934 if (np->n_flag & NMODIFIED) { 935 tsize = np->n_size; 936 mtx_unlock(&np->n_mtx); 937 error = ncl_vinvalbuf(vp, vap->va_size == 0 ? 938 0 : V_SAVE, td, 1); 939 if (error != 0) { 940 vnode_pager_setsize(vp, tsize); 941 return (error); 942 } 943 /* 944 * Call nfscl_delegmodtime() to set the modify time 945 * locally, as required. 946 */ 947 nfscl_delegmodtime(vp); 948 } else 949 mtx_unlock(&np->n_mtx); 950 /* 951 * np->n_size has already been set to vap->va_size 952 * in ncl_meta_setsize(). We must set it again since 953 * nfs_loadattrcache() could be called through 954 * ncl_meta_setsize() and could modify np->n_size. 955 */ 956 mtx_lock(&np->n_mtx); 957 np->n_vattr.na_size = np->n_size = vap->va_size; 958 mtx_unlock(&np->n_mtx); 959 } 960 } else { 961 mtx_lock(&np->n_mtx); 962 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 963 (np->n_flag & NMODIFIED) && vp->v_type == VREG) { 964 mtx_unlock(&np->n_mtx); 965 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 966 if (error == EINTR || error == EIO) 967 return (error); 968 } else 969 mtx_unlock(&np->n_mtx); 970 } 971 error = nfs_setattrrpc(vp, vap, ap->a_cred, td); 972 if (error && vap->va_size != VNOVAL) { 973 mtx_lock(&np->n_mtx); 974 np->n_size = np->n_vattr.na_size = tsize; 975 vnode_pager_setsize(vp, tsize); 976 mtx_unlock(&np->n_mtx); 977 } 978 return (error); 979 } 980 981 /* 982 * Do an nfs setattr rpc. 983 */ 984 static int 985 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred, 986 struct thread *td) 987 { 988 struct nfsnode *np = VTONFS(vp); 989 int error, ret, attrflag, i; 990 struct nfsvattr nfsva; 991 992 if (NFS_ISV34(vp)) { 993 mtx_lock(&np->n_mtx); 994 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) 995 np->n_accesscache[i].stamp = 0; 996 np->n_flag |= NDELEGMOD; 997 mtx_unlock(&np->n_mtx); 998 KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); 999 } 1000 error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag, 1001 NULL); 1002 if (attrflag) { 1003 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1004 if (ret && !error) 1005 error = ret; 1006 } 1007 if (error && NFS_ISV4(vp)) 1008 error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid); 1009 return (error); 1010 } 1011 1012 /* 1013 * nfs lookup call, one step at a time... 1014 * First look in cache 1015 * If not found, unlock the directory nfsnode and do the rpc 1016 */ 1017 static int 1018 nfs_lookup(struct vop_lookup_args *ap) 1019 { 1020 struct componentname *cnp = ap->a_cnp; 1021 struct vnode *dvp = ap->a_dvp; 1022 struct vnode **vpp = ap->a_vpp; 1023 struct mount *mp = dvp->v_mount; 1024 int flags = cnp->cn_flags; 1025 struct vnode *newvp; 1026 struct nfsmount *nmp; 1027 struct nfsnode *np, *newnp; 1028 int error = 0, attrflag, dattrflag, ltype, ncticks; 1029 struct thread *td = cnp->cn_thread; 1030 struct nfsfh *nfhp; 1031 struct nfsvattr dnfsva, nfsva; 1032 struct vattr vattr; 1033 struct timespec nctime; 1034 1035 *vpp = NULLVP; 1036 if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && 1037 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 1038 return (EROFS); 1039 if (dvp->v_type != VDIR) 1040 return (ENOTDIR); 1041 nmp = VFSTONFS(mp); 1042 np = VTONFS(dvp); 1043 1044 /* For NFSv4, wait until any remove is done. */ 1045 mtx_lock(&np->n_mtx); 1046 while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) { 1047 np->n_flag |= NREMOVEWANT; 1048 (void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0); 1049 } 1050 mtx_unlock(&np->n_mtx); 1051 1052 if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) 1053 return (error); 1054 error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks); 1055 if (error > 0 && error != ENOENT) 1056 return (error); 1057 if (error == -1) { 1058 /* 1059 * Lookups of "." are special and always return the 1060 * current directory. cache_lookup() already handles 1061 * associated locking bookkeeping, etc. 1062 */ 1063 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { 1064 /* XXX: Is this really correct? */ 1065 if (cnp->cn_nameiop != LOOKUP && 1066 (flags & ISLASTCN)) 1067 cnp->cn_flags |= SAVENAME; 1068 return (0); 1069 } 1070 1071 /* 1072 * We only accept a positive hit in the cache if the 1073 * change time of the file matches our cached copy. 1074 * Otherwise, we discard the cache entry and fallback 1075 * to doing a lookup RPC. We also only trust cache 1076 * entries for less than nm_nametimeo seconds. 1077 * 1078 * To better handle stale file handles and attributes, 1079 * clear the attribute cache of this node if it is a 1080 * leaf component, part of an open() call, and not 1081 * locally modified before fetching the attributes. 1082 * This should allow stale file handles to be detected 1083 * here where we can fall back to a LOOKUP RPC to 1084 * recover rather than having nfs_open() detect the 1085 * stale file handle and failing open(2) with ESTALE. 1086 */ 1087 newvp = *vpp; 1088 newnp = VTONFS(newvp); 1089 if (!(nmp->nm_flag & NFSMNT_NOCTO) && 1090 (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && 1091 !(newnp->n_flag & NMODIFIED)) { 1092 mtx_lock(&newnp->n_mtx); 1093 newnp->n_attrstamp = 0; 1094 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); 1095 mtx_unlock(&newnp->n_mtx); 1096 } 1097 if (nfscl_nodeleg(newvp, 0) == 0 || 1098 ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) && 1099 VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 && 1100 timespeccmp(&vattr.va_ctime, &nctime, ==))) { 1101 NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); 1102 if (cnp->cn_nameiop != LOOKUP && 1103 (flags & ISLASTCN)) 1104 cnp->cn_flags |= SAVENAME; 1105 return (0); 1106 } 1107 cache_purge(newvp); 1108 if (dvp != newvp) 1109 vput(newvp); 1110 else 1111 vrele(newvp); 1112 *vpp = NULLVP; 1113 } else if (error == ENOENT) { 1114 if (dvp->v_iflag & VI_DOOMED) 1115 return (ENOENT); 1116 /* 1117 * We only accept a negative hit in the cache if the 1118 * modification time of the parent directory matches 1119 * the cached copy in the name cache entry. 1120 * Otherwise, we discard all of the negative cache 1121 * entries for this directory. We also only trust 1122 * negative cache entries for up to nm_negnametimeo 1123 * seconds. 1124 */ 1125 if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) && 1126 VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && 1127 timespeccmp(&vattr.va_mtime, &nctime, ==)) { 1128 NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); 1129 return (ENOENT); 1130 } 1131 cache_purge_negative(dvp); 1132 } 1133 1134 error = 0; 1135 newvp = NULLVP; 1136 NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses); 1137 error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1138 cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1139 NULL); 1140 if (dattrflag) 1141 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1142 if (error) { 1143 if (newvp != NULLVP) { 1144 vput(newvp); 1145 *vpp = NULLVP; 1146 } 1147 1148 if (error != ENOENT) { 1149 if (NFS_ISV4(dvp)) 1150 error = nfscl_maperr(td, error, (uid_t)0, 1151 (gid_t)0); 1152 return (error); 1153 } 1154 1155 /* The requested file was not found. */ 1156 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && 1157 (flags & ISLASTCN)) { 1158 /* 1159 * XXX: UFS does a full VOP_ACCESS(dvp, 1160 * VWRITE) here instead of just checking 1161 * MNT_RDONLY. 1162 */ 1163 if (mp->mnt_flag & MNT_RDONLY) 1164 return (EROFS); 1165 cnp->cn_flags |= SAVENAME; 1166 return (EJUSTRETURN); 1167 } 1168 1169 if ((cnp->cn_flags & MAKEENTRY) != 0 && dattrflag) { 1170 /* 1171 * Cache the modification time of the parent 1172 * directory from the post-op attributes in 1173 * the name cache entry. The negative cache 1174 * entry will be ignored once the directory 1175 * has changed. Don't bother adding the entry 1176 * if the directory has already changed. 1177 */ 1178 mtx_lock(&np->n_mtx); 1179 if (timespeccmp(&np->n_vattr.na_mtime, 1180 &dnfsva.na_mtime, ==)) { 1181 mtx_unlock(&np->n_mtx); 1182 cache_enter_time(dvp, NULL, cnp, 1183 &dnfsva.na_mtime, NULL); 1184 } else 1185 mtx_unlock(&np->n_mtx); 1186 } 1187 return (ENOENT); 1188 } 1189 1190 /* 1191 * Handle RENAME case... 1192 */ 1193 if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { 1194 if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1195 FREE((caddr_t)nfhp, M_NFSFH); 1196 return (EISDIR); 1197 } 1198 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, 1199 LK_EXCLUSIVE); 1200 if (error) 1201 return (error); 1202 newvp = NFSTOV(np); 1203 if (attrflag) 1204 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1205 0, 1); 1206 *vpp = newvp; 1207 cnp->cn_flags |= SAVENAME; 1208 return (0); 1209 } 1210 1211 if (flags & ISDOTDOT) { 1212 ltype = NFSVOPISLOCKED(dvp); 1213 error = vfs_busy(mp, MBF_NOWAIT); 1214 if (error != 0) { 1215 vfs_ref(mp); 1216 NFSVOPUNLOCK(dvp, 0); 1217 error = vfs_busy(mp, 0); 1218 NFSVOPLOCK(dvp, ltype | LK_RETRY); 1219 vfs_rel(mp); 1220 if (error == 0 && (dvp->v_iflag & VI_DOOMED)) { 1221 vfs_unbusy(mp); 1222 error = ENOENT; 1223 } 1224 if (error != 0) 1225 return (error); 1226 } 1227 NFSVOPUNLOCK(dvp, 0); 1228 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, 1229 cnp->cn_lkflags); 1230 if (error == 0) 1231 newvp = NFSTOV(np); 1232 vfs_unbusy(mp); 1233 if (newvp != dvp) 1234 NFSVOPLOCK(dvp, ltype | LK_RETRY); 1235 if (dvp->v_iflag & VI_DOOMED) { 1236 if (error == 0) { 1237 if (newvp == dvp) 1238 vrele(newvp); 1239 else 1240 vput(newvp); 1241 } 1242 error = ENOENT; 1243 } 1244 if (error != 0) 1245 return (error); 1246 if (attrflag) 1247 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1248 0, 1); 1249 } else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1250 FREE((caddr_t)nfhp, M_NFSFH); 1251 VREF(dvp); 1252 newvp = dvp; 1253 if (attrflag) 1254 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1255 0, 1); 1256 } else { 1257 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, 1258 cnp->cn_lkflags); 1259 if (error) 1260 return (error); 1261 newvp = NFSTOV(np); 1262 if (attrflag) 1263 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1264 0, 1); 1265 else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && 1266 !(np->n_flag & NMODIFIED)) { 1267 /* 1268 * Flush the attribute cache when opening a 1269 * leaf node to ensure that fresh attributes 1270 * are fetched in nfs_open() since we did not 1271 * fetch attributes from the LOOKUP reply. 1272 */ 1273 mtx_lock(&np->n_mtx); 1274 np->n_attrstamp = 0; 1275 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); 1276 mtx_unlock(&np->n_mtx); 1277 } 1278 } 1279 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) 1280 cnp->cn_flags |= SAVENAME; 1281 if ((cnp->cn_flags & MAKEENTRY) && 1282 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) && 1283 attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0)) 1284 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, 1285 newvp->v_type != VDIR ? NULL : &dnfsva.na_ctime); 1286 *vpp = newvp; 1287 return (0); 1288 } 1289 1290 /* 1291 * nfs read call. 1292 * Just call ncl_bioread() to do the work. 1293 */ 1294 static int 1295 nfs_read(struct vop_read_args *ap) 1296 { 1297 struct vnode *vp = ap->a_vp; 1298 1299 switch (vp->v_type) { 1300 case VREG: 1301 return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); 1302 case VDIR: 1303 return (EISDIR); 1304 default: 1305 return (EOPNOTSUPP); 1306 } 1307 } 1308 1309 /* 1310 * nfs readlink call 1311 */ 1312 static int 1313 nfs_readlink(struct vop_readlink_args *ap) 1314 { 1315 struct vnode *vp = ap->a_vp; 1316 1317 if (vp->v_type != VLNK) 1318 return (EINVAL); 1319 return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred)); 1320 } 1321 1322 /* 1323 * Do a readlink rpc. 1324 * Called by ncl_doio() from below the buffer cache. 1325 */ 1326 int 1327 ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1328 { 1329 int error, ret, attrflag; 1330 struct nfsvattr nfsva; 1331 1332 error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva, 1333 &attrflag, NULL); 1334 if (attrflag) { 1335 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1336 if (ret && !error) 1337 error = ret; 1338 } 1339 if (error && NFS_ISV4(vp)) 1340 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1341 return (error); 1342 } 1343 1344 /* 1345 * nfs read rpc call 1346 * Ditto above 1347 */ 1348 int 1349 ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1350 { 1351 int error, ret, attrflag; 1352 struct nfsvattr nfsva; 1353 struct nfsmount *nmp; 1354 1355 nmp = VFSTONFS(vnode_mount(vp)); 1356 error = EIO; 1357 attrflag = 0; 1358 if (NFSHASPNFS(nmp)) 1359 error = nfscl_doiods(vp, uiop, NULL, NULL, 1360 NFSV4OPEN_ACCESSREAD, 0, cred, uiop->uio_td); 1361 NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error); 1362 if (error != 0) 1363 error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, 1364 &attrflag, NULL); 1365 if (attrflag) { 1366 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1367 if (ret && !error) 1368 error = ret; 1369 } 1370 if (error && NFS_ISV4(vp)) 1371 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1372 return (error); 1373 } 1374 1375 /* 1376 * nfs write call 1377 */ 1378 int 1379 ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 1380 int *iomode, int *must_commit, int called_from_strategy) 1381 { 1382 struct nfsvattr nfsva; 1383 int error, attrflag, ret; 1384 struct nfsmount *nmp; 1385 1386 nmp = VFSTONFS(vnode_mount(vp)); 1387 error = EIO; 1388 attrflag = 0; 1389 if (NFSHASPNFS(nmp)) 1390 error = nfscl_doiods(vp, uiop, iomode, must_commit, 1391 NFSV4OPEN_ACCESSWRITE, 0, cred, uiop->uio_td); 1392 NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error); 1393 if (error != 0) 1394 error = nfsrpc_write(vp, uiop, iomode, must_commit, cred, 1395 uiop->uio_td, &nfsva, &attrflag, NULL, 1396 called_from_strategy); 1397 if (attrflag) { 1398 if (VTONFS(vp)->n_flag & ND_NFSV4) 1399 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1, 1400 1); 1401 else 1402 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1403 1); 1404 if (ret && !error) 1405 error = ret; 1406 } 1407 if (DOINGASYNC(vp)) 1408 *iomode = NFSWRITE_FILESYNC; 1409 if (error && NFS_ISV4(vp)) 1410 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1411 return (error); 1412 } 1413 1414 /* 1415 * nfs mknod rpc 1416 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the 1417 * mode set to specify the file type and the size field for rdev. 1418 */ 1419 static int 1420 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, 1421 struct vattr *vap) 1422 { 1423 struct nfsvattr nfsva, dnfsva; 1424 struct vnode *newvp = NULL; 1425 struct nfsnode *np = NULL, *dnp; 1426 struct nfsfh *nfhp; 1427 struct vattr vattr; 1428 int error = 0, attrflag, dattrflag; 1429 u_int32_t rdev; 1430 1431 if (vap->va_type == VCHR || vap->va_type == VBLK) 1432 rdev = vap->va_rdev; 1433 else if (vap->va_type == VFIFO || vap->va_type == VSOCK) 1434 rdev = 0xffffffff; 1435 else 1436 return (EOPNOTSUPP); 1437 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1438 return (error); 1439 error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, 1440 rdev, vap->va_type, cnp->cn_cred, cnp->cn_thread, &dnfsva, 1441 &nfsva, &nfhp, &attrflag, &dattrflag, NULL); 1442 if (!error) { 1443 if (!nfhp) 1444 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1445 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, 1446 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1447 NULL); 1448 if (nfhp) 1449 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1450 cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); 1451 } 1452 if (dattrflag) 1453 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1454 if (!error) { 1455 newvp = NFSTOV(np); 1456 if (attrflag != 0) { 1457 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1458 0, 1); 1459 if (error != 0) 1460 vput(newvp); 1461 } 1462 } 1463 if (!error) { 1464 *vpp = newvp; 1465 } else if (NFS_ISV4(dvp)) { 1466 error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, 1467 vap->va_gid); 1468 } 1469 dnp = VTONFS(dvp); 1470 mtx_lock(&dnp->n_mtx); 1471 dnp->n_flag |= NMODIFIED; 1472 if (!dattrflag) { 1473 dnp->n_attrstamp = 0; 1474 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1475 } 1476 mtx_unlock(&dnp->n_mtx); 1477 return (error); 1478 } 1479 1480 /* 1481 * nfs mknod vop 1482 * just call nfs_mknodrpc() to do the work. 1483 */ 1484 /* ARGSUSED */ 1485 static int 1486 nfs_mknod(struct vop_mknod_args *ap) 1487 { 1488 return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); 1489 } 1490 1491 static struct mtx nfs_cverf_mtx; 1492 MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex", 1493 MTX_DEF); 1494 1495 static nfsquad_t 1496 nfs_get_cverf(void) 1497 { 1498 static nfsquad_t cverf; 1499 nfsquad_t ret; 1500 static int cverf_initialized = 0; 1501 1502 mtx_lock(&nfs_cverf_mtx); 1503 if (cverf_initialized == 0) { 1504 cverf.lval[0] = arc4random(); 1505 cverf.lval[1] = arc4random(); 1506 cverf_initialized = 1; 1507 } else 1508 cverf.qval++; 1509 ret = cverf; 1510 mtx_unlock(&nfs_cverf_mtx); 1511 1512 return (ret); 1513 } 1514 1515 /* 1516 * nfs file create call 1517 */ 1518 static int 1519 nfs_create(struct vop_create_args *ap) 1520 { 1521 struct vnode *dvp = ap->a_dvp; 1522 struct vattr *vap = ap->a_vap; 1523 struct componentname *cnp = ap->a_cnp; 1524 struct nfsnode *np = NULL, *dnp; 1525 struct vnode *newvp = NULL; 1526 struct nfsmount *nmp; 1527 struct nfsvattr dnfsva, nfsva; 1528 struct nfsfh *nfhp; 1529 nfsquad_t cverf; 1530 int error = 0, attrflag, dattrflag, fmode = 0; 1531 struct vattr vattr; 1532 1533 /* 1534 * Oops, not for me.. 1535 */ 1536 if (vap->va_type == VSOCK) 1537 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); 1538 1539 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1540 return (error); 1541 if (vap->va_vaflags & VA_EXCLUSIVE) 1542 fmode |= O_EXCL; 1543 dnp = VTONFS(dvp); 1544 nmp = VFSTONFS(vnode_mount(dvp)); 1545 again: 1546 /* For NFSv4, wait until any remove is done. */ 1547 mtx_lock(&dnp->n_mtx); 1548 while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) { 1549 dnp->n_flag |= NREMOVEWANT; 1550 (void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0); 1551 } 1552 mtx_unlock(&dnp->n_mtx); 1553 1554 cverf = nfs_get_cverf(); 1555 error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1556 vap, cverf, fmode, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, 1557 &nfhp, &attrflag, &dattrflag, NULL); 1558 if (!error) { 1559 if (nfhp == NULL) 1560 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1561 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, 1562 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1563 NULL); 1564 if (nfhp != NULL) 1565 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1566 cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); 1567 } 1568 if (dattrflag) 1569 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1570 if (!error) { 1571 newvp = NFSTOV(np); 1572 if (attrflag == 0) 1573 error = nfsrpc_getattr(newvp, cnp->cn_cred, 1574 cnp->cn_thread, &nfsva, NULL); 1575 if (error == 0) 1576 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1577 0, 1); 1578 } 1579 if (error) { 1580 if (newvp != NULL) { 1581 vput(newvp); 1582 newvp = NULL; 1583 } 1584 if (NFS_ISV34(dvp) && (fmode & O_EXCL) && 1585 error == NFSERR_NOTSUPP) { 1586 fmode &= ~O_EXCL; 1587 goto again; 1588 } 1589 } else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) { 1590 if (nfscl_checksattr(vap, &nfsva)) { 1591 error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred, 1592 cnp->cn_thread, &nfsva, &attrflag, NULL); 1593 if (error && (vap->va_uid != (uid_t)VNOVAL || 1594 vap->va_gid != (gid_t)VNOVAL)) { 1595 /* try again without setting uid/gid */ 1596 vap->va_uid = (uid_t)VNOVAL; 1597 vap->va_gid = (uid_t)VNOVAL; 1598 error = nfsrpc_setattr(newvp, vap, NULL, 1599 cnp->cn_cred, cnp->cn_thread, &nfsva, 1600 &attrflag, NULL); 1601 } 1602 if (attrflag) 1603 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 1604 NULL, 0, 1); 1605 if (error != 0) 1606 vput(newvp); 1607 } 1608 } 1609 if (!error) { 1610 if ((cnp->cn_flags & MAKEENTRY) && attrflag) 1611 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, 1612 NULL); 1613 *ap->a_vpp = newvp; 1614 } else if (NFS_ISV4(dvp)) { 1615 error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, 1616 vap->va_gid); 1617 } 1618 mtx_lock(&dnp->n_mtx); 1619 dnp->n_flag |= NMODIFIED; 1620 if (!dattrflag) { 1621 dnp->n_attrstamp = 0; 1622 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1623 } 1624 mtx_unlock(&dnp->n_mtx); 1625 return (error); 1626 } 1627 1628 /* 1629 * nfs file remove call 1630 * To try and make nfs semantics closer to ufs semantics, a file that has 1631 * other processes using the vnode is renamed instead of removed and then 1632 * removed later on the last close. 1633 * - If v_usecount > 1 1634 * If a rename is not already in the works 1635 * call nfs_sillyrename() to set it up 1636 * else 1637 * do the remove rpc 1638 */ 1639 static int 1640 nfs_remove(struct vop_remove_args *ap) 1641 { 1642 struct vnode *vp = ap->a_vp; 1643 struct vnode *dvp = ap->a_dvp; 1644 struct componentname *cnp = ap->a_cnp; 1645 struct nfsnode *np = VTONFS(vp); 1646 int error = 0; 1647 struct vattr vattr; 1648 1649 KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name")); 1650 KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount")); 1651 if (vp->v_type == VDIR) 1652 error = EPERM; 1653 else if (vrefcnt(vp) == 1 || (np->n_sillyrename && 1654 VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 && 1655 vattr.va_nlink > 1)) { 1656 /* 1657 * Purge the name cache so that the chance of a lookup for 1658 * the name succeeding while the remove is in progress is 1659 * minimized. Without node locking it can still happen, such 1660 * that an I/O op returns ESTALE, but since you get this if 1661 * another host removes the file.. 1662 */ 1663 cache_purge(vp); 1664 /* 1665 * throw away biocache buffers, mainly to avoid 1666 * unnecessary delayed writes later. 1667 */ 1668 error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1); 1669 if (error != EINTR && error != EIO) 1670 /* Do the rpc */ 1671 error = nfs_removerpc(dvp, vp, cnp->cn_nameptr, 1672 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); 1673 /* 1674 * Kludge City: If the first reply to the remove rpc is lost.. 1675 * the reply to the retransmitted request will be ENOENT 1676 * since the file was in fact removed 1677 * Therefore, we cheat and return success. 1678 */ 1679 if (error == ENOENT) 1680 error = 0; 1681 } else if (!np->n_sillyrename) 1682 error = nfs_sillyrename(dvp, vp, cnp); 1683 mtx_lock(&np->n_mtx); 1684 np->n_attrstamp = 0; 1685 mtx_unlock(&np->n_mtx); 1686 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 1687 return (error); 1688 } 1689 1690 /* 1691 * nfs file remove rpc called from nfs_inactive 1692 */ 1693 int 1694 ncl_removeit(struct sillyrename *sp, struct vnode *vp) 1695 { 1696 /* 1697 * Make sure that the directory vnode is still valid. 1698 * XXX we should lock sp->s_dvp here. 1699 */ 1700 if (sp->s_dvp->v_type == VBAD) 1701 return (0); 1702 return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen, 1703 sp->s_cred, NULL)); 1704 } 1705 1706 /* 1707 * Nfs remove rpc, called from nfs_remove() and ncl_removeit(). 1708 */ 1709 static int 1710 nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 1711 int namelen, struct ucred *cred, struct thread *td) 1712 { 1713 struct nfsvattr dnfsva; 1714 struct nfsnode *dnp = VTONFS(dvp); 1715 int error = 0, dattrflag; 1716 1717 mtx_lock(&dnp->n_mtx); 1718 dnp->n_flag |= NREMOVEINPROG; 1719 mtx_unlock(&dnp->n_mtx); 1720 error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva, 1721 &dattrflag, NULL); 1722 mtx_lock(&dnp->n_mtx); 1723 if ((dnp->n_flag & NREMOVEWANT)) { 1724 dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG); 1725 mtx_unlock(&dnp->n_mtx); 1726 wakeup((caddr_t)dnp); 1727 } else { 1728 dnp->n_flag &= ~NREMOVEINPROG; 1729 mtx_unlock(&dnp->n_mtx); 1730 } 1731 if (dattrflag) 1732 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1733 mtx_lock(&dnp->n_mtx); 1734 dnp->n_flag |= NMODIFIED; 1735 if (!dattrflag) { 1736 dnp->n_attrstamp = 0; 1737 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1738 } 1739 mtx_unlock(&dnp->n_mtx); 1740 if (error && NFS_ISV4(dvp)) 1741 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 1742 return (error); 1743 } 1744 1745 /* 1746 * nfs file rename call 1747 */ 1748 static int 1749 nfs_rename(struct vop_rename_args *ap) 1750 { 1751 struct vnode *fvp = ap->a_fvp; 1752 struct vnode *tvp = ap->a_tvp; 1753 struct vnode *fdvp = ap->a_fdvp; 1754 struct vnode *tdvp = ap->a_tdvp; 1755 struct componentname *tcnp = ap->a_tcnp; 1756 struct componentname *fcnp = ap->a_fcnp; 1757 struct nfsnode *fnp = VTONFS(ap->a_fvp); 1758 struct nfsnode *tdnp = VTONFS(ap->a_tdvp); 1759 struct nfsv4node *newv4 = NULL; 1760 int error; 1761 1762 KASSERT((tcnp->cn_flags & HASBUF) != 0 && 1763 (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name")); 1764 /* Check for cross-device rename */ 1765 if ((fvp->v_mount != tdvp->v_mount) || 1766 (tvp && (fvp->v_mount != tvp->v_mount))) { 1767 error = EXDEV; 1768 goto out; 1769 } 1770 1771 if (fvp == tvp) { 1772 printf("nfs_rename: fvp == tvp (can't happen)\n"); 1773 error = 0; 1774 goto out; 1775 } 1776 if ((error = NFSVOPLOCK(fvp, LK_EXCLUSIVE)) != 0) 1777 goto out; 1778 1779 /* 1780 * We have to flush B_DELWRI data prior to renaming 1781 * the file. If we don't, the delayed-write buffers 1782 * can be flushed out later after the file has gone stale 1783 * under NFSV3. NFSV2 does not have this problem because 1784 * ( as far as I can tell ) it flushes dirty buffers more 1785 * often. 1786 * 1787 * Skip the rename operation if the fsync fails, this can happen 1788 * due to the server's volume being full, when we pushed out data 1789 * that was written back to our cache earlier. Not checking for 1790 * this condition can result in potential (silent) data loss. 1791 */ 1792 error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread); 1793 NFSVOPUNLOCK(fvp, 0); 1794 if (!error && tvp) 1795 error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread); 1796 if (error) 1797 goto out; 1798 1799 /* 1800 * If the tvp exists and is in use, sillyrename it before doing the 1801 * rename of the new file over it. 1802 * XXX Can't sillyrename a directory. 1803 */ 1804 if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && 1805 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { 1806 vput(tvp); 1807 tvp = NULL; 1808 } 1809 1810 error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen, 1811 tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, 1812 tcnp->cn_thread); 1813 1814 if (error == 0 && NFS_ISV4(tdvp)) { 1815 /* 1816 * For NFSv4, check to see if it is the same name and 1817 * replace the name, if it is different. 1818 */ 1819 MALLOC(newv4, struct nfsv4node *, 1820 sizeof (struct nfsv4node) + 1821 tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1, 1822 M_NFSV4NODE, M_WAITOK); 1823 mtx_lock(&tdnp->n_mtx); 1824 mtx_lock(&fnp->n_mtx); 1825 if (fnp->n_v4 != NULL && fvp->v_type == VREG && 1826 (fnp->n_v4->n4_namelen != tcnp->cn_namelen || 1827 NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4), 1828 tcnp->cn_namelen) || 1829 tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen || 1830 NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 1831 tdnp->n_fhp->nfh_len))) { 1832 #ifdef notdef 1833 { char nnn[100]; int nnnl; 1834 nnnl = (tcnp->cn_namelen < 100) ? tcnp->cn_namelen : 99; 1835 bcopy(tcnp->cn_nameptr, nnn, nnnl); 1836 nnn[nnnl] = '\0'; 1837 printf("ren replace=%s\n",nnn); 1838 } 1839 #endif 1840 FREE((caddr_t)fnp->n_v4, M_NFSV4NODE); 1841 fnp->n_v4 = newv4; 1842 newv4 = NULL; 1843 fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len; 1844 fnp->n_v4->n4_namelen = tcnp->cn_namelen; 1845 NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 1846 tdnp->n_fhp->nfh_len); 1847 NFSBCOPY(tcnp->cn_nameptr, 1848 NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen); 1849 } 1850 mtx_unlock(&tdnp->n_mtx); 1851 mtx_unlock(&fnp->n_mtx); 1852 if (newv4 != NULL) 1853 FREE((caddr_t)newv4, M_NFSV4NODE); 1854 } 1855 1856 if (fvp->v_type == VDIR) { 1857 if (tvp != NULL && tvp->v_type == VDIR) 1858 cache_purge(tdvp); 1859 cache_purge(fdvp); 1860 } 1861 1862 out: 1863 if (tdvp == tvp) 1864 vrele(tdvp); 1865 else 1866 vput(tdvp); 1867 if (tvp) 1868 vput(tvp); 1869 vrele(fdvp); 1870 vrele(fvp); 1871 /* 1872 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. 1873 */ 1874 if (error == ENOENT) 1875 error = 0; 1876 return (error); 1877 } 1878 1879 /* 1880 * nfs file rename rpc called from nfs_remove() above 1881 */ 1882 static int 1883 nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp, 1884 struct sillyrename *sp) 1885 { 1886 1887 return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen, 1888 sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred, 1889 scnp->cn_thread)); 1890 } 1891 1892 /* 1893 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). 1894 */ 1895 static int 1896 nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, 1897 int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr, 1898 int tnamelen, struct ucred *cred, struct thread *td) 1899 { 1900 struct nfsvattr fnfsva, tnfsva; 1901 struct nfsnode *fdnp = VTONFS(fdvp); 1902 struct nfsnode *tdnp = VTONFS(tdvp); 1903 int error = 0, fattrflag, tattrflag; 1904 1905 error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp, 1906 tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag, 1907 &tattrflag, NULL, NULL); 1908 mtx_lock(&fdnp->n_mtx); 1909 fdnp->n_flag |= NMODIFIED; 1910 if (fattrflag != 0) { 1911 mtx_unlock(&fdnp->n_mtx); 1912 (void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, NULL, 0, 1); 1913 } else { 1914 fdnp->n_attrstamp = 0; 1915 mtx_unlock(&fdnp->n_mtx); 1916 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp); 1917 } 1918 mtx_lock(&tdnp->n_mtx); 1919 tdnp->n_flag |= NMODIFIED; 1920 if (tattrflag != 0) { 1921 mtx_unlock(&tdnp->n_mtx); 1922 (void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, NULL, 0, 1); 1923 } else { 1924 tdnp->n_attrstamp = 0; 1925 mtx_unlock(&tdnp->n_mtx); 1926 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 1927 } 1928 if (error && NFS_ISV4(fdvp)) 1929 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 1930 return (error); 1931 } 1932 1933 /* 1934 * nfs hard link create call 1935 */ 1936 static int 1937 nfs_link(struct vop_link_args *ap) 1938 { 1939 struct vnode *vp = ap->a_vp; 1940 struct vnode *tdvp = ap->a_tdvp; 1941 struct componentname *cnp = ap->a_cnp; 1942 struct nfsnode *np, *tdnp; 1943 struct nfsvattr nfsva, dnfsva; 1944 int error = 0, attrflag, dattrflag; 1945 1946 /* 1947 * Push all writes to the server, so that the attribute cache 1948 * doesn't get "out of sync" with the server. 1949 * XXX There should be a better way! 1950 */ 1951 VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread); 1952 1953 error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen, 1954 cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &attrflag, 1955 &dattrflag, NULL); 1956 tdnp = VTONFS(tdvp); 1957 mtx_lock(&tdnp->n_mtx); 1958 tdnp->n_flag |= NMODIFIED; 1959 if (dattrflag != 0) { 1960 mtx_unlock(&tdnp->n_mtx); 1961 (void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, NULL, 0, 1); 1962 } else { 1963 tdnp->n_attrstamp = 0; 1964 mtx_unlock(&tdnp->n_mtx); 1965 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 1966 } 1967 if (attrflag) 1968 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1969 else { 1970 np = VTONFS(vp); 1971 mtx_lock(&np->n_mtx); 1972 np->n_attrstamp = 0; 1973 mtx_unlock(&np->n_mtx); 1974 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 1975 } 1976 /* 1977 * If negative lookup caching is enabled, I might as well 1978 * add an entry for this node. Not necessary for correctness, 1979 * but if negative caching is enabled, then the system 1980 * must care about lookup caching hit rate, so... 1981 */ 1982 if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 && 1983 (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { 1984 cache_enter_time(tdvp, vp, cnp, &nfsva.na_ctime, NULL); 1985 } 1986 if (error && NFS_ISV4(vp)) 1987 error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, 1988 (gid_t)0); 1989 return (error); 1990 } 1991 1992 /* 1993 * nfs symbolic link create call 1994 */ 1995 static int 1996 nfs_symlink(struct vop_symlink_args *ap) 1997 { 1998 struct vnode *dvp = ap->a_dvp; 1999 struct vattr *vap = ap->a_vap; 2000 struct componentname *cnp = ap->a_cnp; 2001 struct nfsvattr nfsva, dnfsva; 2002 struct nfsfh *nfhp; 2003 struct nfsnode *np = NULL, *dnp; 2004 struct vnode *newvp = NULL; 2005 int error = 0, attrflag, dattrflag, ret; 2006 2007 vap->va_type = VLNK; 2008 error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2009 ap->a_target, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, 2010 &nfsva, &nfhp, &attrflag, &dattrflag, NULL); 2011 if (nfhp) { 2012 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, 2013 &np, NULL, LK_EXCLUSIVE); 2014 if (!ret) 2015 newvp = NFSTOV(np); 2016 else if (!error) 2017 error = ret; 2018 } 2019 if (newvp != NULL) { 2020 if (attrflag) 2021 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 2022 0, 1); 2023 } else if (!error) { 2024 /* 2025 * If we do not have an error and we could not extract the 2026 * newvp from the response due to the request being NFSv2, we 2027 * have to do a lookup in order to obtain a newvp to return. 2028 */ 2029 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2030 cnp->cn_cred, cnp->cn_thread, &np); 2031 if (!error) 2032 newvp = NFSTOV(np); 2033 } 2034 if (error) { 2035 if (newvp) 2036 vput(newvp); 2037 if (NFS_ISV4(dvp)) 2038 error = nfscl_maperr(cnp->cn_thread, error, 2039 vap->va_uid, vap->va_gid); 2040 } else { 2041 *ap->a_vpp = newvp; 2042 } 2043 2044 dnp = VTONFS(dvp); 2045 mtx_lock(&dnp->n_mtx); 2046 dnp->n_flag |= NMODIFIED; 2047 if (dattrflag != 0) { 2048 mtx_unlock(&dnp->n_mtx); 2049 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2050 } else { 2051 dnp->n_attrstamp = 0; 2052 mtx_unlock(&dnp->n_mtx); 2053 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2054 } 2055 /* 2056 * If negative lookup caching is enabled, I might as well 2057 * add an entry for this node. Not necessary for correctness, 2058 * but if negative caching is enabled, then the system 2059 * must care about lookup caching hit rate, so... 2060 */ 2061 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 2062 (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { 2063 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, NULL); 2064 } 2065 return (error); 2066 } 2067 2068 /* 2069 * nfs make dir call 2070 */ 2071 static int 2072 nfs_mkdir(struct vop_mkdir_args *ap) 2073 { 2074 struct vnode *dvp = ap->a_dvp; 2075 struct vattr *vap = ap->a_vap; 2076 struct componentname *cnp = ap->a_cnp; 2077 struct nfsnode *np = NULL, *dnp; 2078 struct vnode *newvp = NULL; 2079 struct vattr vattr; 2080 struct nfsfh *nfhp; 2081 struct nfsvattr nfsva, dnfsva; 2082 int error = 0, attrflag, dattrflag, ret; 2083 2084 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) 2085 return (error); 2086 vap->va_type = VDIR; 2087 error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2088 vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, 2089 &attrflag, &dattrflag, NULL); 2090 dnp = VTONFS(dvp); 2091 mtx_lock(&dnp->n_mtx); 2092 dnp->n_flag |= NMODIFIED; 2093 if (dattrflag != 0) { 2094 mtx_unlock(&dnp->n_mtx); 2095 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2096 } else { 2097 dnp->n_attrstamp = 0; 2098 mtx_unlock(&dnp->n_mtx); 2099 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2100 } 2101 if (nfhp) { 2102 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, 2103 &np, NULL, LK_EXCLUSIVE); 2104 if (!ret) { 2105 newvp = NFSTOV(np); 2106 if (attrflag) 2107 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 2108 NULL, 0, 1); 2109 } else if (!error) 2110 error = ret; 2111 } 2112 if (!error && newvp == NULL) { 2113 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2114 cnp->cn_cred, cnp->cn_thread, &np); 2115 if (!error) { 2116 newvp = NFSTOV(np); 2117 if (newvp->v_type != VDIR) 2118 error = EEXIST; 2119 } 2120 } 2121 if (error) { 2122 if (newvp) 2123 vput(newvp); 2124 if (NFS_ISV4(dvp)) 2125 error = nfscl_maperr(cnp->cn_thread, error, 2126 vap->va_uid, vap->va_gid); 2127 } else { 2128 /* 2129 * If negative lookup caching is enabled, I might as well 2130 * add an entry for this node. Not necessary for correctness, 2131 * but if negative caching is enabled, then the system 2132 * must care about lookup caching hit rate, so... 2133 */ 2134 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 2135 (cnp->cn_flags & MAKEENTRY) && 2136 attrflag != 0 && dattrflag != 0) 2137 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, 2138 &dnfsva.na_ctime); 2139 *ap->a_vpp = newvp; 2140 } 2141 return (error); 2142 } 2143 2144 /* 2145 * nfs remove directory call 2146 */ 2147 static int 2148 nfs_rmdir(struct vop_rmdir_args *ap) 2149 { 2150 struct vnode *vp = ap->a_vp; 2151 struct vnode *dvp = ap->a_dvp; 2152 struct componentname *cnp = ap->a_cnp; 2153 struct nfsnode *dnp; 2154 struct nfsvattr dnfsva; 2155 int error, dattrflag; 2156 2157 if (dvp == vp) 2158 return (EINVAL); 2159 error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2160 cnp->cn_cred, cnp->cn_thread, &dnfsva, &dattrflag, NULL); 2161 dnp = VTONFS(dvp); 2162 mtx_lock(&dnp->n_mtx); 2163 dnp->n_flag |= NMODIFIED; 2164 if (dattrflag != 0) { 2165 mtx_unlock(&dnp->n_mtx); 2166 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2167 } else { 2168 dnp->n_attrstamp = 0; 2169 mtx_unlock(&dnp->n_mtx); 2170 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2171 } 2172 2173 cache_purge(dvp); 2174 cache_purge(vp); 2175 if (error && NFS_ISV4(dvp)) 2176 error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, 2177 (gid_t)0); 2178 /* 2179 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. 2180 */ 2181 if (error == ENOENT) 2182 error = 0; 2183 return (error); 2184 } 2185 2186 /* 2187 * nfs readdir call 2188 */ 2189 static int 2190 nfs_readdir(struct vop_readdir_args *ap) 2191 { 2192 struct vnode *vp = ap->a_vp; 2193 struct nfsnode *np = VTONFS(vp); 2194 struct uio *uio = ap->a_uio; 2195 ssize_t tresid, left; 2196 int error = 0; 2197 struct vattr vattr; 2198 2199 if (ap->a_eofflag != NULL) 2200 *ap->a_eofflag = 0; 2201 if (vp->v_type != VDIR) 2202 return(EPERM); 2203 2204 /* 2205 * First, check for hit on the EOF offset cache 2206 */ 2207 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && 2208 (np->n_flag & NMODIFIED) == 0) { 2209 if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { 2210 mtx_lock(&np->n_mtx); 2211 if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) || 2212 !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 2213 mtx_unlock(&np->n_mtx); 2214 NFSINCRGLOBAL(nfsstatsv1.direofcache_hits); 2215 if (ap->a_eofflag != NULL) 2216 *ap->a_eofflag = 1; 2217 return (0); 2218 } else 2219 mtx_unlock(&np->n_mtx); 2220 } 2221 } 2222 2223 /* 2224 * NFS always guarantees that directory entries don't straddle 2225 * DIRBLKSIZ boundaries. As such, we need to limit the size 2226 * to an exact multiple of DIRBLKSIZ, to avoid copying a partial 2227 * directory entry. 2228 */ 2229 left = uio->uio_resid % DIRBLKSIZ; 2230 if (left == uio->uio_resid) 2231 return (EINVAL); 2232 uio->uio_resid -= left; 2233 2234 /* 2235 * Call ncl_bioread() to do the real work. 2236 */ 2237 tresid = uio->uio_resid; 2238 error = ncl_bioread(vp, uio, 0, ap->a_cred); 2239 2240 if (!error && uio->uio_resid == tresid) { 2241 NFSINCRGLOBAL(nfsstatsv1.direofcache_misses); 2242 if (ap->a_eofflag != NULL) 2243 *ap->a_eofflag = 1; 2244 } 2245 2246 /* Add the partial DIRBLKSIZ (left) back in. */ 2247 uio->uio_resid += left; 2248 return (error); 2249 } 2250 2251 /* 2252 * Readdir rpc call. 2253 * Called from below the buffer cache by ncl_doio(). 2254 */ 2255 int 2256 ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2257 struct thread *td) 2258 { 2259 struct nfsvattr nfsva; 2260 nfsuint64 *cookiep, cookie; 2261 struct nfsnode *dnp = VTONFS(vp); 2262 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2263 int error = 0, eof, attrflag; 2264 2265 KASSERT(uiop->uio_iovcnt == 1 && 2266 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2267 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2268 ("nfs readdirrpc bad uio")); 2269 2270 /* 2271 * If there is no cookie, assume directory was stale. 2272 */ 2273 ncl_dircookie_lock(dnp); 2274 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2275 if (cookiep) { 2276 cookie = *cookiep; 2277 ncl_dircookie_unlock(dnp); 2278 } else { 2279 ncl_dircookie_unlock(dnp); 2280 return (NFSERR_BAD_COOKIE); 2281 } 2282 2283 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2284 (void)ncl_fsinfo(nmp, vp, cred, td); 2285 2286 error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva, 2287 &attrflag, &eof, NULL); 2288 if (attrflag) 2289 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 2290 2291 if (!error) { 2292 /* 2293 * We are now either at the end of the directory or have filled 2294 * the block. 2295 */ 2296 if (eof) 2297 dnp->n_direofoffset = uiop->uio_offset; 2298 else { 2299 if (uiop->uio_resid > 0) 2300 printf("EEK! readdirrpc resid > 0\n"); 2301 ncl_dircookie_lock(dnp); 2302 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2303 *cookiep = cookie; 2304 ncl_dircookie_unlock(dnp); 2305 } 2306 } else if (NFS_ISV4(vp)) { 2307 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2308 } 2309 return (error); 2310 } 2311 2312 /* 2313 * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc(). 2314 */ 2315 int 2316 ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2317 struct thread *td) 2318 { 2319 struct nfsvattr nfsva; 2320 nfsuint64 *cookiep, cookie; 2321 struct nfsnode *dnp = VTONFS(vp); 2322 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2323 int error = 0, attrflag, eof; 2324 2325 KASSERT(uiop->uio_iovcnt == 1 && 2326 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2327 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2328 ("nfs readdirplusrpc bad uio")); 2329 2330 /* 2331 * If there is no cookie, assume directory was stale. 2332 */ 2333 ncl_dircookie_lock(dnp); 2334 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2335 if (cookiep) { 2336 cookie = *cookiep; 2337 ncl_dircookie_unlock(dnp); 2338 } else { 2339 ncl_dircookie_unlock(dnp); 2340 return (NFSERR_BAD_COOKIE); 2341 } 2342 2343 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2344 (void)ncl_fsinfo(nmp, vp, cred, td); 2345 error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva, 2346 &attrflag, &eof, NULL); 2347 if (attrflag) 2348 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 2349 2350 if (!error) { 2351 /* 2352 * We are now either at end of the directory or have filled the 2353 * the block. 2354 */ 2355 if (eof) 2356 dnp->n_direofoffset = uiop->uio_offset; 2357 else { 2358 if (uiop->uio_resid > 0) 2359 printf("EEK! readdirplusrpc resid > 0\n"); 2360 ncl_dircookie_lock(dnp); 2361 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2362 *cookiep = cookie; 2363 ncl_dircookie_unlock(dnp); 2364 } 2365 } else if (NFS_ISV4(vp)) { 2366 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2367 } 2368 return (error); 2369 } 2370 2371 /* 2372 * Silly rename. To make the NFS filesystem that is stateless look a little 2373 * more like the "ufs" a remove of an active vnode is translated to a rename 2374 * to a funny looking filename that is removed by nfs_inactive on the 2375 * nfsnode. There is the potential for another process on a different client 2376 * to create the same funny name between the nfs_lookitup() fails and the 2377 * nfs_rename() completes, but... 2378 */ 2379 static int 2380 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 2381 { 2382 struct sillyrename *sp; 2383 struct nfsnode *np; 2384 int error; 2385 short pid; 2386 unsigned int lticks; 2387 2388 cache_purge(dvp); 2389 np = VTONFS(vp); 2390 KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir")); 2391 MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename), 2392 M_NEWNFSREQ, M_WAITOK); 2393 sp->s_cred = crhold(cnp->cn_cred); 2394 sp->s_dvp = dvp; 2395 VREF(dvp); 2396 2397 /* 2398 * Fudge together a funny name. 2399 * Changing the format of the funny name to accommodate more 2400 * sillynames per directory. 2401 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 2402 * CPU ticks since boot. 2403 */ 2404 pid = cnp->cn_thread->td_proc->p_pid; 2405 lticks = (unsigned int)ticks; 2406 for ( ; ; ) { 2407 sp->s_namlen = sprintf(sp->s_name, 2408 ".nfs.%08x.%04x4.4", lticks, 2409 pid); 2410 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2411 cnp->cn_thread, NULL)) 2412 break; 2413 lticks++; 2414 } 2415 error = nfs_renameit(dvp, vp, cnp, sp); 2416 if (error) 2417 goto bad; 2418 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2419 cnp->cn_thread, &np); 2420 np->n_sillyrename = sp; 2421 return (0); 2422 bad: 2423 vrele(sp->s_dvp); 2424 crfree(sp->s_cred); 2425 free((caddr_t)sp, M_NEWNFSREQ); 2426 return (error); 2427 } 2428 2429 /* 2430 * Look up a file name and optionally either update the file handle or 2431 * allocate an nfsnode, depending on the value of npp. 2432 * npp == NULL --> just do the lookup 2433 * *npp == NULL --> allocate a new nfsnode and make sure attributes are 2434 * handled too 2435 * *npp != NULL --> update the file handle in the vnode 2436 */ 2437 static int 2438 nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred, 2439 struct thread *td, struct nfsnode **npp) 2440 { 2441 struct vnode *newvp = NULL, *vp; 2442 struct nfsnode *np, *dnp = VTONFS(dvp); 2443 struct nfsfh *nfhp, *onfhp; 2444 struct nfsvattr nfsva, dnfsva; 2445 struct componentname cn; 2446 int error = 0, attrflag, dattrflag; 2447 u_int hash; 2448 2449 error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva, 2450 &nfhp, &attrflag, &dattrflag, NULL); 2451 if (dattrflag) 2452 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2453 if (npp && !error) { 2454 if (*npp != NULL) { 2455 np = *npp; 2456 vp = NFSTOV(np); 2457 /* 2458 * For NFSv4, check to see if it is the same name and 2459 * replace the name, if it is different. 2460 */ 2461 if (np->n_v4 != NULL && nfsva.na_type == VREG && 2462 (np->n_v4->n4_namelen != len || 2463 NFSBCMP(name, NFS4NODENAME(np->n_v4), len) || 2464 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || 2465 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2466 dnp->n_fhp->nfh_len))) { 2467 #ifdef notdef 2468 { char nnn[100]; int nnnl; 2469 nnnl = (len < 100) ? len : 99; 2470 bcopy(name, nnn, nnnl); 2471 nnn[nnnl] = '\0'; 2472 printf("replace=%s\n",nnn); 2473 } 2474 #endif 2475 FREE((caddr_t)np->n_v4, M_NFSV4NODE); 2476 MALLOC(np->n_v4, struct nfsv4node *, 2477 sizeof (struct nfsv4node) + 2478 dnp->n_fhp->nfh_len + len - 1, 2479 M_NFSV4NODE, M_WAITOK); 2480 np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; 2481 np->n_v4->n4_namelen = len; 2482 NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2483 dnp->n_fhp->nfh_len); 2484 NFSBCOPY(name, NFS4NODENAME(np->n_v4), len); 2485 } 2486 hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, 2487 FNV1_32_INIT); 2488 onfhp = np->n_fhp; 2489 /* 2490 * Rehash node for new file handle. 2491 */ 2492 vfs_hash_rehash(vp, hash); 2493 np->n_fhp = nfhp; 2494 if (onfhp != NULL) 2495 FREE((caddr_t)onfhp, M_NFSFH); 2496 newvp = NFSTOV(np); 2497 } else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) { 2498 FREE((caddr_t)nfhp, M_NFSFH); 2499 VREF(dvp); 2500 newvp = dvp; 2501 } else { 2502 cn.cn_nameptr = name; 2503 cn.cn_namelen = len; 2504 error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td, 2505 &np, NULL, LK_EXCLUSIVE); 2506 if (error) 2507 return (error); 2508 newvp = NFSTOV(np); 2509 } 2510 if (!attrflag && *npp == NULL) { 2511 if (newvp == dvp) 2512 vrele(newvp); 2513 else 2514 vput(newvp); 2515 return (ENOENT); 2516 } 2517 if (attrflag) 2518 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 2519 0, 1); 2520 } 2521 if (npp && *npp == NULL) { 2522 if (error) { 2523 if (newvp) { 2524 if (newvp == dvp) 2525 vrele(newvp); 2526 else 2527 vput(newvp); 2528 } 2529 } else 2530 *npp = np; 2531 } 2532 if (error && NFS_ISV4(dvp)) 2533 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2534 return (error); 2535 } 2536 2537 /* 2538 * Nfs Version 3 and 4 commit rpc 2539 */ 2540 int 2541 ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, 2542 struct thread *td) 2543 { 2544 struct nfsvattr nfsva; 2545 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2546 struct nfsnode *np; 2547 struct uio uio; 2548 int error, attrflag; 2549 2550 np = VTONFS(vp); 2551 error = EIO; 2552 attrflag = 0; 2553 if (NFSHASPNFS(nmp) && (np->n_flag & NDSCOMMIT) != 0) { 2554 uio.uio_offset = offset; 2555 uio.uio_resid = cnt; 2556 error = nfscl_doiods(vp, &uio, NULL, NULL, 2557 NFSV4OPEN_ACCESSWRITE, 1, cred, td); 2558 if (error != 0) { 2559 mtx_lock(&np->n_mtx); 2560 np->n_flag &= ~NDSCOMMIT; 2561 mtx_unlock(&np->n_mtx); 2562 } 2563 } 2564 if (error != 0) { 2565 mtx_lock(&nmp->nm_mtx); 2566 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { 2567 mtx_unlock(&nmp->nm_mtx); 2568 return (0); 2569 } 2570 mtx_unlock(&nmp->nm_mtx); 2571 error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva, 2572 &attrflag, NULL); 2573 } 2574 if (attrflag != 0) 2575 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 2576 0, 1); 2577 if (error != 0 && NFS_ISV4(vp)) 2578 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2579 return (error); 2580 } 2581 2582 /* 2583 * Strategy routine. 2584 * For async requests when nfsiod(s) are running, queue the request by 2585 * calling ncl_asyncio(), otherwise just all ncl_doio() to do the 2586 * request. 2587 */ 2588 static int 2589 nfs_strategy(struct vop_strategy_args *ap) 2590 { 2591 struct buf *bp; 2592 struct vnode *vp; 2593 struct ucred *cr; 2594 2595 bp = ap->a_bp; 2596 vp = ap->a_vp; 2597 KASSERT(bp->b_vp == vp, ("missing b_getvp")); 2598 KASSERT(!(bp->b_flags & B_DONE), 2599 ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); 2600 BUF_ASSERT_HELD(bp); 2601 2602 if (vp->v_type == VREG && bp->b_blkno == bp->b_lblkno) 2603 bp->b_blkno = bp->b_lblkno * (vp->v_bufobj.bo_bsize / 2604 DEV_BSIZE); 2605 if (bp->b_iocmd == BIO_READ) 2606 cr = bp->b_rcred; 2607 else 2608 cr = bp->b_wcred; 2609 2610 /* 2611 * If the op is asynchronous and an i/o daemon is waiting 2612 * queue the request, wake it up and wait for completion 2613 * otherwise just do it ourselves. 2614 */ 2615 if ((bp->b_flags & B_ASYNC) == 0 || 2616 ncl_asyncio(VFSTONFS(vp->v_mount), bp, NOCRED, curthread)) 2617 (void) ncl_doio(vp, bp, cr, curthread, 1); 2618 return (0); 2619 } 2620 2621 /* 2622 * fsync vnode op. Just call ncl_flush() with commit == 1. 2623 */ 2624 /* ARGSUSED */ 2625 static int 2626 nfs_fsync(struct vop_fsync_args *ap) 2627 { 2628 2629 if (ap->a_vp->v_type != VREG) { 2630 /* 2631 * For NFS, metadata is changed synchronously on the server, 2632 * so there is nothing to flush. Also, ncl_flush() clears 2633 * the NMODIFIED flag and that shouldn't be done here for 2634 * directories. 2635 */ 2636 return (0); 2637 } 2638 return (ncl_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1, 0)); 2639 } 2640 2641 /* 2642 * Flush all the blocks associated with a vnode. 2643 * Walk through the buffer pool and push any dirty pages 2644 * associated with the vnode. 2645 * If the called_from_renewthread argument is TRUE, it has been called 2646 * from the NFSv4 renew thread and, as such, cannot block indefinitely 2647 * waiting for a buffer write to complete. 2648 */ 2649 int 2650 ncl_flush(struct vnode *vp, int waitfor, struct thread *td, 2651 int commit, int called_from_renewthread) 2652 { 2653 struct nfsnode *np = VTONFS(vp); 2654 struct buf *bp; 2655 int i; 2656 struct buf *nbp; 2657 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2658 int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; 2659 int passone = 1, trycnt = 0; 2660 u_quad_t off, endoff, toff; 2661 struct ucred* wcred = NULL; 2662 struct buf **bvec = NULL; 2663 struct bufobj *bo; 2664 #ifndef NFS_COMMITBVECSIZ 2665 #define NFS_COMMITBVECSIZ 20 2666 #endif 2667 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; 2668 int bvecsize = 0, bveccount; 2669 2670 if (called_from_renewthread != 0) 2671 slptimeo = hz; 2672 if (nmp->nm_flag & NFSMNT_INT) 2673 slpflag = PCATCH; 2674 if (!commit) 2675 passone = 0; 2676 bo = &vp->v_bufobj; 2677 /* 2678 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the 2679 * server, but has not been committed to stable storage on the server 2680 * yet. On the first pass, the byte range is worked out and the commit 2681 * rpc is done. On the second pass, ncl_writebp() is called to do the 2682 * job. 2683 */ 2684 again: 2685 off = (u_quad_t)-1; 2686 endoff = 0; 2687 bvecpos = 0; 2688 if (NFS_ISV34(vp) && commit) { 2689 if (bvec != NULL && bvec != bvec_on_stack) 2690 free(bvec, M_TEMP); 2691 /* 2692 * Count up how many buffers waiting for a commit. 2693 */ 2694 bveccount = 0; 2695 BO_LOCK(bo); 2696 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2697 if (!BUF_ISLOCKED(bp) && 2698 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 2699 == (B_DELWRI | B_NEEDCOMMIT)) 2700 bveccount++; 2701 } 2702 /* 2703 * Allocate space to remember the list of bufs to commit. It is 2704 * important to use M_NOWAIT here to avoid a race with nfs_write. 2705 * If we can't get memory (for whatever reason), we will end up 2706 * committing the buffers one-by-one in the loop below. 2707 */ 2708 if (bveccount > NFS_COMMITBVECSIZ) { 2709 /* 2710 * Release the vnode interlock to avoid a lock 2711 * order reversal. 2712 */ 2713 BO_UNLOCK(bo); 2714 bvec = (struct buf **) 2715 malloc(bveccount * sizeof(struct buf *), 2716 M_TEMP, M_NOWAIT); 2717 BO_LOCK(bo); 2718 if (bvec == NULL) { 2719 bvec = bvec_on_stack; 2720 bvecsize = NFS_COMMITBVECSIZ; 2721 } else 2722 bvecsize = bveccount; 2723 } else { 2724 bvec = bvec_on_stack; 2725 bvecsize = NFS_COMMITBVECSIZ; 2726 } 2727 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2728 if (bvecpos >= bvecsize) 2729 break; 2730 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 2731 nbp = TAILQ_NEXT(bp, b_bobufs); 2732 continue; 2733 } 2734 if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != 2735 (B_DELWRI | B_NEEDCOMMIT)) { 2736 BUF_UNLOCK(bp); 2737 nbp = TAILQ_NEXT(bp, b_bobufs); 2738 continue; 2739 } 2740 BO_UNLOCK(bo); 2741 bremfree(bp); 2742 /* 2743 * Work out if all buffers are using the same cred 2744 * so we can deal with them all with one commit. 2745 * 2746 * NOTE: we are not clearing B_DONE here, so we have 2747 * to do it later on in this routine if we intend to 2748 * initiate I/O on the bp. 2749 * 2750 * Note: to avoid loopback deadlocks, we do not 2751 * assign b_runningbufspace. 2752 */ 2753 if (wcred == NULL) 2754 wcred = bp->b_wcred; 2755 else if (wcred != bp->b_wcred) 2756 wcred = NOCRED; 2757 vfs_busy_pages(bp, 1); 2758 2759 BO_LOCK(bo); 2760 /* 2761 * bp is protected by being locked, but nbp is not 2762 * and vfs_busy_pages() may sleep. We have to 2763 * recalculate nbp. 2764 */ 2765 nbp = TAILQ_NEXT(bp, b_bobufs); 2766 2767 /* 2768 * A list of these buffers is kept so that the 2769 * second loop knows which buffers have actually 2770 * been committed. This is necessary, since there 2771 * may be a race between the commit rpc and new 2772 * uncommitted writes on the file. 2773 */ 2774 bvec[bvecpos++] = bp; 2775 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 2776 bp->b_dirtyoff; 2777 if (toff < off) 2778 off = toff; 2779 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); 2780 if (toff > endoff) 2781 endoff = toff; 2782 } 2783 BO_UNLOCK(bo); 2784 } 2785 if (bvecpos > 0) { 2786 /* 2787 * Commit data on the server, as required. 2788 * If all bufs are using the same wcred, then use that with 2789 * one call for all of them, otherwise commit each one 2790 * separately. 2791 */ 2792 if (wcred != NOCRED) 2793 retv = ncl_commit(vp, off, (int)(endoff - off), 2794 wcred, td); 2795 else { 2796 retv = 0; 2797 for (i = 0; i < bvecpos; i++) { 2798 off_t off, size; 2799 bp = bvec[i]; 2800 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 2801 bp->b_dirtyoff; 2802 size = (u_quad_t)(bp->b_dirtyend 2803 - bp->b_dirtyoff); 2804 retv = ncl_commit(vp, off, (int)size, 2805 bp->b_wcred, td); 2806 if (retv) break; 2807 } 2808 } 2809 2810 if (retv == NFSERR_STALEWRITEVERF) 2811 ncl_clearcommit(vp->v_mount); 2812 2813 /* 2814 * Now, either mark the blocks I/O done or mark the 2815 * blocks dirty, depending on whether the commit 2816 * succeeded. 2817 */ 2818 for (i = 0; i < bvecpos; i++) { 2819 bp = bvec[i]; 2820 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 2821 if (retv) { 2822 /* 2823 * Error, leave B_DELWRI intact 2824 */ 2825 vfs_unbusy_pages(bp); 2826 brelse(bp); 2827 } else { 2828 /* 2829 * Success, remove B_DELWRI ( bundirty() ). 2830 * 2831 * b_dirtyoff/b_dirtyend seem to be NFS 2832 * specific. We should probably move that 2833 * into bundirty(). XXX 2834 */ 2835 bufobj_wref(bo); 2836 bp->b_flags |= B_ASYNC; 2837 bundirty(bp); 2838 bp->b_flags &= ~B_DONE; 2839 bp->b_ioflags &= ~BIO_ERROR; 2840 bp->b_dirtyoff = bp->b_dirtyend = 0; 2841 bufdone(bp); 2842 } 2843 } 2844 } 2845 2846 /* 2847 * Start/do any write(s) that are required. 2848 */ 2849 loop: 2850 BO_LOCK(bo); 2851 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2852 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 2853 if (waitfor != MNT_WAIT || passone) 2854 continue; 2855 2856 error = BUF_TIMELOCK(bp, 2857 LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, 2858 BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo); 2859 if (error == 0) { 2860 BUF_UNLOCK(bp); 2861 goto loop; 2862 } 2863 if (error == ENOLCK) { 2864 error = 0; 2865 goto loop; 2866 } 2867 if (called_from_renewthread != 0) { 2868 /* 2869 * Return EIO so the flush will be retried 2870 * later. 2871 */ 2872 error = EIO; 2873 goto done; 2874 } 2875 if (newnfs_sigintr(nmp, td)) { 2876 error = EINTR; 2877 goto done; 2878 } 2879 if (slpflag == PCATCH) { 2880 slpflag = 0; 2881 slptimeo = 2 * hz; 2882 } 2883 goto loop; 2884 } 2885 if ((bp->b_flags & B_DELWRI) == 0) 2886 panic("nfs_fsync: not dirty"); 2887 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { 2888 BUF_UNLOCK(bp); 2889 continue; 2890 } 2891 BO_UNLOCK(bo); 2892 bremfree(bp); 2893 if (passone || !commit) 2894 bp->b_flags |= B_ASYNC; 2895 else 2896 bp->b_flags |= B_ASYNC; 2897 bwrite(bp); 2898 if (newnfs_sigintr(nmp, td)) { 2899 error = EINTR; 2900 goto done; 2901 } 2902 goto loop; 2903 } 2904 if (passone) { 2905 passone = 0; 2906 BO_UNLOCK(bo); 2907 goto again; 2908 } 2909 if (waitfor == MNT_WAIT) { 2910 while (bo->bo_numoutput) { 2911 error = bufobj_wwait(bo, slpflag, slptimeo); 2912 if (error) { 2913 BO_UNLOCK(bo); 2914 if (called_from_renewthread != 0) { 2915 /* 2916 * Return EIO so that the flush will be 2917 * retried later. 2918 */ 2919 error = EIO; 2920 goto done; 2921 } 2922 error = newnfs_sigintr(nmp, td); 2923 if (error) 2924 goto done; 2925 if (slpflag == PCATCH) { 2926 slpflag = 0; 2927 slptimeo = 2 * hz; 2928 } 2929 BO_LOCK(bo); 2930 } 2931 } 2932 if (bo->bo_dirty.bv_cnt != 0 && commit) { 2933 BO_UNLOCK(bo); 2934 goto loop; 2935 } 2936 /* 2937 * Wait for all the async IO requests to drain 2938 */ 2939 BO_UNLOCK(bo); 2940 mtx_lock(&np->n_mtx); 2941 while (np->n_directio_asyncwr > 0) { 2942 np->n_flag |= NFSYNCWAIT; 2943 error = newnfs_msleep(td, &np->n_directio_asyncwr, 2944 &np->n_mtx, slpflag | (PRIBIO + 1), 2945 "nfsfsync", 0); 2946 if (error) { 2947 if (newnfs_sigintr(nmp, td)) { 2948 mtx_unlock(&np->n_mtx); 2949 error = EINTR; 2950 goto done; 2951 } 2952 } 2953 } 2954 mtx_unlock(&np->n_mtx); 2955 } else 2956 BO_UNLOCK(bo); 2957 if (NFSHASPNFS(nmp)) { 2958 nfscl_layoutcommit(vp, td); 2959 /* 2960 * Invalidate the attribute cache, since writes to a DS 2961 * won't update the size attribute. 2962 */ 2963 mtx_lock(&np->n_mtx); 2964 np->n_attrstamp = 0; 2965 } else 2966 mtx_lock(&np->n_mtx); 2967 if (np->n_flag & NWRITEERR) { 2968 error = np->n_error; 2969 np->n_flag &= ~NWRITEERR; 2970 } 2971 if (commit && bo->bo_dirty.bv_cnt == 0 && 2972 bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) 2973 np->n_flag &= ~NMODIFIED; 2974 mtx_unlock(&np->n_mtx); 2975 done: 2976 if (bvec != NULL && bvec != bvec_on_stack) 2977 free(bvec, M_TEMP); 2978 if (error == 0 && commit != 0 && waitfor == MNT_WAIT && 2979 (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 || 2980 np->n_directio_asyncwr != 0)) { 2981 if (trycnt++ < 5) { 2982 /* try, try again... */ 2983 passone = 1; 2984 wcred = NULL; 2985 bvec = NULL; 2986 bvecsize = 0; 2987 goto again; 2988 } 2989 vn_printf(vp, "ncl_flush failed"); 2990 error = called_from_renewthread != 0 ? EIO : EBUSY; 2991 } 2992 return (error); 2993 } 2994 2995 /* 2996 * NFS advisory byte-level locks. 2997 */ 2998 static int 2999 nfs_advlock(struct vop_advlock_args *ap) 3000 { 3001 struct vnode *vp = ap->a_vp; 3002 struct ucred *cred; 3003 struct nfsnode *np = VTONFS(ap->a_vp); 3004 struct proc *p = (struct proc *)ap->a_id; 3005 struct thread *td = curthread; /* XXX */ 3006 struct vattr va; 3007 int ret, error = EOPNOTSUPP; 3008 u_quad_t size; 3009 3010 if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) { 3011 if (vp->v_type != VREG) 3012 return (EINVAL); 3013 if ((ap->a_flags & F_POSIX) != 0) 3014 cred = p->p_ucred; 3015 else 3016 cred = td->td_ucred; 3017 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 3018 if (vp->v_iflag & VI_DOOMED) { 3019 NFSVOPUNLOCK(vp, 0); 3020 return (EBADF); 3021 } 3022 3023 /* 3024 * If this is unlocking a write locked region, flush and 3025 * commit them before unlocking. This is required by 3026 * RFC3530 Sec. 9.3.2. 3027 */ 3028 if (ap->a_op == F_UNLCK && 3029 nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id, 3030 ap->a_flags)) 3031 (void) ncl_flush(vp, MNT_WAIT, td, 1, 0); 3032 3033 /* 3034 * Loop around doing the lock op, while a blocking lock 3035 * must wait for the lock op to succeed. 3036 */ 3037 do { 3038 ret = nfsrpc_advlock(vp, np->n_size, ap->a_op, 3039 ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags); 3040 if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 3041 ap->a_op == F_SETLK) { 3042 NFSVOPUNLOCK(vp, 0); 3043 error = nfs_catnap(PZERO | PCATCH, ret, 3044 "ncladvl"); 3045 if (error) 3046 return (EINTR); 3047 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 3048 if (vp->v_iflag & VI_DOOMED) { 3049 NFSVOPUNLOCK(vp, 0); 3050 return (EBADF); 3051 } 3052 } 3053 } while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 3054 ap->a_op == F_SETLK); 3055 if (ret == NFSERR_DENIED) { 3056 NFSVOPUNLOCK(vp, 0); 3057 return (EAGAIN); 3058 } else if (ret == EINVAL || ret == EBADF || ret == EINTR) { 3059 NFSVOPUNLOCK(vp, 0); 3060 return (ret); 3061 } else if (ret != 0) { 3062 NFSVOPUNLOCK(vp, 0); 3063 return (EACCES); 3064 } 3065 3066 /* 3067 * Now, if we just got a lock, invalidate data in the buffer 3068 * cache, as required, so that the coherency conforms with 3069 * RFC3530 Sec. 9.3.2. 3070 */ 3071 if (ap->a_op == F_SETLK) { 3072 if ((np->n_flag & NMODIFIED) == 0) { 3073 np->n_attrstamp = 0; 3074 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 3075 ret = VOP_GETATTR(vp, &va, cred); 3076 } 3077 if ((np->n_flag & NMODIFIED) || ret || 3078 np->n_change != va.va_filerev) { 3079 (void) ncl_vinvalbuf(vp, V_SAVE, td, 1); 3080 np->n_attrstamp = 0; 3081 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 3082 ret = VOP_GETATTR(vp, &va, cred); 3083 if (!ret) { 3084 np->n_mtime = va.va_mtime; 3085 np->n_change = va.va_filerev; 3086 } 3087 } 3088 /* Mark that a file lock has been acquired. */ 3089 mtx_lock(&np->n_mtx); 3090 np->n_flag |= NHASBEENLOCKED; 3091 mtx_unlock(&np->n_mtx); 3092 } 3093 NFSVOPUNLOCK(vp, 0); 3094 return (0); 3095 } else if (!NFS_ISV4(vp)) { 3096 error = NFSVOPLOCK(vp, LK_SHARED); 3097 if (error) 3098 return (error); 3099 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3100 size = VTONFS(vp)->n_size; 3101 NFSVOPUNLOCK(vp, 0); 3102 error = lf_advlock(ap, &(vp->v_lockf), size); 3103 } else { 3104 if (nfs_advlock_p != NULL) 3105 error = nfs_advlock_p(ap); 3106 else { 3107 NFSVOPUNLOCK(vp, 0); 3108 error = ENOLCK; 3109 } 3110 } 3111 if (error == 0 && ap->a_op == F_SETLK) { 3112 error = NFSVOPLOCK(vp, LK_SHARED); 3113 if (error == 0) { 3114 /* Mark that a file lock has been acquired. */ 3115 mtx_lock(&np->n_mtx); 3116 np->n_flag |= NHASBEENLOCKED; 3117 mtx_unlock(&np->n_mtx); 3118 NFSVOPUNLOCK(vp, 0); 3119 } 3120 } 3121 } 3122 return (error); 3123 } 3124 3125 /* 3126 * NFS advisory byte-level locks. 3127 */ 3128 static int 3129 nfs_advlockasync(struct vop_advlockasync_args *ap) 3130 { 3131 struct vnode *vp = ap->a_vp; 3132 u_quad_t size; 3133 int error; 3134 3135 if (NFS_ISV4(vp)) 3136 return (EOPNOTSUPP); 3137 error = NFSVOPLOCK(vp, LK_SHARED); 3138 if (error) 3139 return (error); 3140 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3141 size = VTONFS(vp)->n_size; 3142 NFSVOPUNLOCK(vp, 0); 3143 error = lf_advlockasync(ap, &(vp->v_lockf), size); 3144 } else { 3145 NFSVOPUNLOCK(vp, 0); 3146 error = EOPNOTSUPP; 3147 } 3148 return (error); 3149 } 3150 3151 /* 3152 * Print out the contents of an nfsnode. 3153 */ 3154 static int 3155 nfs_print(struct vop_print_args *ap) 3156 { 3157 struct vnode *vp = ap->a_vp; 3158 struct nfsnode *np = VTONFS(vp); 3159 3160 printf("\tfileid %jd fsid 0x%jx", (uintmax_t)np->n_vattr.na_fileid, 3161 (uintmax_t)np->n_vattr.na_fsid); 3162 if (vp->v_type == VFIFO) 3163 fifo_printinfo(vp); 3164 printf("\n"); 3165 return (0); 3166 } 3167 3168 /* 3169 * This is the "real" nfs::bwrite(struct buf*). 3170 * We set B_CACHE if this is a VMIO buffer. 3171 */ 3172 int 3173 ncl_writebp(struct buf *bp, int force __unused, struct thread *td) 3174 { 3175 int oldflags, rtval; 3176 3177 BUF_ASSERT_HELD(bp); 3178 3179 if (bp->b_flags & B_INVAL) { 3180 brelse(bp); 3181 return (0); 3182 } 3183 3184 oldflags = bp->b_flags; 3185 bp->b_flags |= B_CACHE; 3186 3187 /* 3188 * Undirty the bp. We will redirty it later if the I/O fails. 3189 */ 3190 bundirty(bp); 3191 bp->b_flags &= ~B_DONE; 3192 bp->b_ioflags &= ~BIO_ERROR; 3193 bp->b_iocmd = BIO_WRITE; 3194 3195 bufobj_wref(bp->b_bufobj); 3196 curthread->td_ru.ru_oublock++; 3197 3198 /* 3199 * Note: to avoid loopback deadlocks, we do not 3200 * assign b_runningbufspace. 3201 */ 3202 vfs_busy_pages(bp, 1); 3203 3204 BUF_KERNPROC(bp); 3205 bp->b_iooffset = dbtob(bp->b_blkno); 3206 bstrategy(bp); 3207 3208 if ((oldflags & B_ASYNC) != 0) 3209 return (0); 3210 3211 rtval = bufwait(bp); 3212 if (oldflags & B_DELWRI) 3213 reassignbuf(bp); 3214 brelse(bp); 3215 return (rtval); 3216 } 3217 3218 /* 3219 * nfs special file access vnode op. 3220 * Essentially just get vattr and then imitate iaccess() since the device is 3221 * local to the client. 3222 */ 3223 static int 3224 nfsspec_access(struct vop_access_args *ap) 3225 { 3226 struct vattr *vap; 3227 struct ucred *cred = ap->a_cred; 3228 struct vnode *vp = ap->a_vp; 3229 accmode_t accmode = ap->a_accmode; 3230 struct vattr vattr; 3231 int error; 3232 3233 /* 3234 * Disallow write attempts on filesystems mounted read-only; 3235 * unless the file is a socket, fifo, or a block or character 3236 * device resident on the filesystem. 3237 */ 3238 if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 3239 switch (vp->v_type) { 3240 case VREG: 3241 case VDIR: 3242 case VLNK: 3243 return (EROFS); 3244 default: 3245 break; 3246 } 3247 } 3248 vap = &vattr; 3249 error = VOP_GETATTR(vp, vap, cred); 3250 if (error) 3251 goto out; 3252 error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, 3253 accmode, cred, NULL); 3254 out: 3255 return error; 3256 } 3257 3258 /* 3259 * Read wrapper for fifos. 3260 */ 3261 static int 3262 nfsfifo_read(struct vop_read_args *ap) 3263 { 3264 struct nfsnode *np = VTONFS(ap->a_vp); 3265 int error; 3266 3267 /* 3268 * Set access flag. 3269 */ 3270 mtx_lock(&np->n_mtx); 3271 np->n_flag |= NACC; 3272 vfs_timestamp(&np->n_atim); 3273 mtx_unlock(&np->n_mtx); 3274 error = fifo_specops.vop_read(ap); 3275 return error; 3276 } 3277 3278 /* 3279 * Write wrapper for fifos. 3280 */ 3281 static int 3282 nfsfifo_write(struct vop_write_args *ap) 3283 { 3284 struct nfsnode *np = VTONFS(ap->a_vp); 3285 3286 /* 3287 * Set update flag. 3288 */ 3289 mtx_lock(&np->n_mtx); 3290 np->n_flag |= NUPD; 3291 vfs_timestamp(&np->n_mtim); 3292 mtx_unlock(&np->n_mtx); 3293 return(fifo_specops.vop_write(ap)); 3294 } 3295 3296 /* 3297 * Close wrapper for fifos. 3298 * 3299 * Update the times on the nfsnode then do fifo close. 3300 */ 3301 static int 3302 nfsfifo_close(struct vop_close_args *ap) 3303 { 3304 struct vnode *vp = ap->a_vp; 3305 struct nfsnode *np = VTONFS(vp); 3306 struct vattr vattr; 3307 struct timespec ts; 3308 3309 mtx_lock(&np->n_mtx); 3310 if (np->n_flag & (NACC | NUPD)) { 3311 vfs_timestamp(&ts); 3312 if (np->n_flag & NACC) 3313 np->n_atim = ts; 3314 if (np->n_flag & NUPD) 3315 np->n_mtim = ts; 3316 np->n_flag |= NCHG; 3317 if (vrefcnt(vp) == 1 && 3318 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 3319 VATTR_NULL(&vattr); 3320 if (np->n_flag & NACC) 3321 vattr.va_atime = np->n_atim; 3322 if (np->n_flag & NUPD) 3323 vattr.va_mtime = np->n_mtim; 3324 mtx_unlock(&np->n_mtx); 3325 (void)VOP_SETATTR(vp, &vattr, ap->a_cred); 3326 goto out; 3327 } 3328 } 3329 mtx_unlock(&np->n_mtx); 3330 out: 3331 return (fifo_specops.vop_close(ap)); 3332 } 3333 3334 /* 3335 * Just call ncl_writebp() with the force argument set to 1. 3336 * 3337 * NOTE: B_DONE may or may not be set in a_bp on call. 3338 */ 3339 static int 3340 nfs_bwrite(struct buf *bp) 3341 { 3342 3343 return (ncl_writebp(bp, 1, curthread)); 3344 } 3345 3346 struct buf_ops buf_ops_newnfs = { 3347 .bop_name = "buf_ops_nfs", 3348 .bop_write = nfs_bwrite, 3349 .bop_strategy = bufstrategy, 3350 .bop_sync = bufsync, 3351 .bop_bdflush = bufbdflush, 3352 }; 3353 3354 static int 3355 nfs_getacl(struct vop_getacl_args *ap) 3356 { 3357 int error; 3358 3359 if (ap->a_type != ACL_TYPE_NFS4) 3360 return (EOPNOTSUPP); 3361 error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, 3362 NULL); 3363 if (error > NFSERR_STALE) { 3364 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3365 error = EPERM; 3366 } 3367 return (error); 3368 } 3369 3370 static int 3371 nfs_setacl(struct vop_setacl_args *ap) 3372 { 3373 int error; 3374 3375 if (ap->a_type != ACL_TYPE_NFS4) 3376 return (EOPNOTSUPP); 3377 error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, 3378 NULL); 3379 if (error > NFSERR_STALE) { 3380 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3381 error = EPERM; 3382 } 3383 return (error); 3384 } 3385 3386 static int 3387 nfs_set_text(struct vop_set_text_args *ap) 3388 { 3389 struct vnode *vp = ap->a_vp; 3390 struct nfsnode *np; 3391 3392 /* 3393 * If the text file has been mmap'd, flush any dirty pages to the 3394 * buffer cache and then... 3395 * Make sure all writes are pushed to the NFS server. If this is not 3396 * done, the modify time of the file can change while the text 3397 * file is being executed. This will cause the process that is 3398 * executing the text file to be terminated. 3399 */ 3400 if (vp->v_object != NULL) { 3401 VM_OBJECT_WLOCK(vp->v_object); 3402 vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); 3403 VM_OBJECT_WUNLOCK(vp->v_object); 3404 } 3405 3406 /* Now, flush the buffer cache. */ 3407 ncl_flush(vp, MNT_WAIT, curthread, 0, 0); 3408 3409 /* And, finally, make sure that n_mtime is up to date. */ 3410 np = VTONFS(vp); 3411 mtx_lock(&np->n_mtx); 3412 np->n_mtime = np->n_vattr.na_mtime; 3413 mtx_unlock(&np->n_mtx); 3414 3415 vp->v_vflag |= VV_TEXT; 3416 return (0); 3417 } 3418 3419 /* 3420 * Return POSIX pathconf information applicable to nfs filesystems. 3421 */ 3422 static int 3423 nfs_pathconf(struct vop_pathconf_args *ap) 3424 { 3425 struct nfsv3_pathconf pc; 3426 struct nfsvattr nfsva; 3427 struct vnode *vp = ap->a_vp; 3428 struct thread *td = curthread; 3429 int attrflag, error; 3430 3431 if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX || 3432 ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED || 3433 ap->a_name == _PC_NO_TRUNC)) || 3434 (NFS_ISV4(vp) && ap->a_name == _PC_ACL_NFS4)) { 3435 /* 3436 * Since only the above 4 a_names are returned by the NFSv3 3437 * Pathconf RPC, there is no point in doing it for others. 3438 * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can 3439 * be used for _PC_NFS4_ACL as well. 3440 */ 3441 error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva, 3442 &attrflag, NULL); 3443 if (attrflag != 0) 3444 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 3445 1); 3446 if (error != 0) 3447 return (error); 3448 } else { 3449 /* 3450 * For NFSv2 (or NFSv3 when not one of the above 4 a_names), 3451 * just fake them. 3452 */ 3453 pc.pc_linkmax = LINK_MAX; 3454 pc.pc_namemax = NFS_MAXNAMLEN; 3455 pc.pc_notrunc = 1; 3456 pc.pc_chownrestricted = 1; 3457 pc.pc_caseinsensitive = 0; 3458 pc.pc_casepreserving = 1; 3459 error = 0; 3460 } 3461 switch (ap->a_name) { 3462 case _PC_LINK_MAX: 3463 *ap->a_retval = pc.pc_linkmax; 3464 break; 3465 case _PC_NAME_MAX: 3466 *ap->a_retval = pc.pc_namemax; 3467 break; 3468 case _PC_CHOWN_RESTRICTED: 3469 *ap->a_retval = pc.pc_chownrestricted; 3470 break; 3471 case _PC_NO_TRUNC: 3472 *ap->a_retval = pc.pc_notrunc; 3473 break; 3474 case _PC_ACL_EXTENDED: 3475 *ap->a_retval = 0; 3476 break; 3477 case _PC_ACL_NFS4: 3478 if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 && 3479 NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) 3480 *ap->a_retval = 1; 3481 else 3482 *ap->a_retval = 0; 3483 break; 3484 case _PC_ACL_PATH_MAX: 3485 if (NFS_ISV4(vp)) 3486 *ap->a_retval = ACL_MAX_ENTRIES; 3487 else 3488 *ap->a_retval = 3; 3489 break; 3490 case _PC_MAC_PRESENT: 3491 *ap->a_retval = 0; 3492 break; 3493 case _PC_PRIO_IO: 3494 *ap->a_retval = 0; 3495 break; 3496 case _PC_SYNC_IO: 3497 *ap->a_retval = 0; 3498 break; 3499 case _PC_ALLOC_SIZE_MIN: 3500 *ap->a_retval = vp->v_mount->mnt_stat.f_bsize; 3501 break; 3502 case _PC_FILESIZEBITS: 3503 if (NFS_ISV34(vp)) 3504 *ap->a_retval = 64; 3505 else 3506 *ap->a_retval = 32; 3507 break; 3508 case _PC_REC_INCR_XFER_SIZE: 3509 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 3510 break; 3511 case _PC_REC_MAX_XFER_SIZE: 3512 *ap->a_retval = -1; /* means ``unlimited'' */ 3513 break; 3514 case _PC_REC_MIN_XFER_SIZE: 3515 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 3516 break; 3517 case _PC_REC_XFER_ALIGN: 3518 *ap->a_retval = PAGE_SIZE; 3519 break; 3520 case _PC_SYMLINK_MAX: 3521 *ap->a_retval = NFS_MAXPATHLEN; 3522 break; 3523 3524 default: 3525 error = vop_stdpathconf(ap); 3526 break; 3527 } 3528 return (error); 3529 } 3530 3531