1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * from nfs_vnops.c 8.16 (Berkeley) 5/27/95 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 /* 39 * vnode op calls for Sun NFS version 2, 3 and 4 40 */ 41 42 #include "opt_inet.h" 43 44 #include <sys/param.h> 45 #include <sys/kernel.h> 46 #include <sys/systm.h> 47 #include <sys/resourcevar.h> 48 #include <sys/proc.h> 49 #include <sys/mount.h> 50 #include <sys/bio.h> 51 #include <sys/buf.h> 52 #include <sys/jail.h> 53 #include <sys/malloc.h> 54 #include <sys/mbuf.h> 55 #include <sys/namei.h> 56 #include <sys/socket.h> 57 #include <sys/vnode.h> 58 #include <sys/dirent.h> 59 #include <sys/fcntl.h> 60 #include <sys/lockf.h> 61 #include <sys/stat.h> 62 #include <sys/sysctl.h> 63 #include <sys/signalvar.h> 64 65 #include <vm/vm.h> 66 #include <vm/vm_object.h> 67 #include <vm/vm_extern.h> 68 #include <vm/vm_object.h> 69 70 71 #include <fs/nfs/nfsport.h> 72 #include <fs/nfsclient/nfsnode.h> 73 #include <fs/nfsclient/nfsmount.h> 74 #include <fs/nfsclient/nfs.h> 75 #include <fs/nfsclient/nfs_lock.h> 76 77 #include <net/if.h> 78 #include <netinet/in.h> 79 #include <netinet/in_var.h> 80 81 /* Defs */ 82 #define TRUE 1 83 #define FALSE 0 84 85 extern struct nfsstats newnfsstats; 86 MALLOC_DECLARE(M_NEWNFSREQ); 87 vop_advlock_t *ncl_advlock_p = ncl_dolock; 88 89 /* 90 * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these 91 * calls are not in getblk() and brelse() so that they would not be necessary 92 * here. 93 */ 94 #ifndef B_VMIO 95 #define vfs_busy_pages(bp, f) 96 #endif 97 98 static vop_read_t nfsfifo_read; 99 static vop_write_t nfsfifo_write; 100 static vop_close_t nfsfifo_close; 101 static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *, 102 struct thread *); 103 static vop_lookup_t nfs_lookup; 104 static vop_create_t nfs_create; 105 static vop_mknod_t nfs_mknod; 106 static vop_open_t nfs_open; 107 static vop_close_t nfs_close; 108 static vop_access_t nfs_access; 109 static vop_getattr_t nfs_getattr; 110 static vop_setattr_t nfs_setattr; 111 static vop_read_t nfs_read; 112 static vop_fsync_t nfs_fsync; 113 static vop_remove_t nfs_remove; 114 static vop_link_t nfs_link; 115 static vop_rename_t nfs_rename; 116 static vop_mkdir_t nfs_mkdir; 117 static vop_rmdir_t nfs_rmdir; 118 static vop_symlink_t nfs_symlink; 119 static vop_readdir_t nfs_readdir; 120 static vop_strategy_t nfs_strategy; 121 static vop_lock1_t nfs_lock1; 122 static int nfs_lookitup(struct vnode *, char *, int, 123 struct ucred *, struct thread *, struct nfsnode **); 124 static int nfs_sillyrename(struct vnode *, struct vnode *, 125 struct componentname *); 126 static vop_access_t nfsspec_access; 127 static vop_readlink_t nfs_readlink; 128 static vop_print_t nfs_print; 129 static vop_advlock_t nfs_advlock; 130 static vop_advlockasync_t nfs_advlockasync; 131 static vop_getacl_t nfs_getacl; 132 static vop_setacl_t nfs_setacl; 133 134 /* 135 * Global vfs data structures for nfs 136 */ 137 struct vop_vector newnfs_vnodeops = { 138 .vop_default = &default_vnodeops, 139 .vop_access = nfs_access, 140 .vop_advlock = nfs_advlock, 141 .vop_advlockasync = nfs_advlockasync, 142 .vop_close = nfs_close, 143 .vop_create = nfs_create, 144 .vop_fsync = nfs_fsync, 145 .vop_getattr = nfs_getattr, 146 .vop_getpages = ncl_getpages, 147 .vop_putpages = ncl_putpages, 148 .vop_inactive = ncl_inactive, 149 .vop_link = nfs_link, 150 .vop_lock1 = nfs_lock1, 151 .vop_lookup = nfs_lookup, 152 .vop_mkdir = nfs_mkdir, 153 .vop_mknod = nfs_mknod, 154 .vop_open = nfs_open, 155 .vop_print = nfs_print, 156 .vop_read = nfs_read, 157 .vop_readdir = nfs_readdir, 158 .vop_readlink = nfs_readlink, 159 .vop_reclaim = ncl_reclaim, 160 .vop_remove = nfs_remove, 161 .vop_rename = nfs_rename, 162 .vop_rmdir = nfs_rmdir, 163 .vop_setattr = nfs_setattr, 164 .vop_strategy = nfs_strategy, 165 .vop_symlink = nfs_symlink, 166 .vop_write = ncl_write, 167 .vop_getacl = nfs_getacl, 168 .vop_setacl = nfs_setacl, 169 }; 170 171 struct vop_vector newnfs_fifoops = { 172 .vop_default = &fifo_specops, 173 .vop_access = nfsspec_access, 174 .vop_close = nfsfifo_close, 175 .vop_fsync = nfs_fsync, 176 .vop_getattr = nfs_getattr, 177 .vop_inactive = ncl_inactive, 178 .vop_print = nfs_print, 179 .vop_read = nfsfifo_read, 180 .vop_reclaim = ncl_reclaim, 181 .vop_setattr = nfs_setattr, 182 .vop_write = nfsfifo_write, 183 }; 184 185 static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, 186 struct componentname *cnp, struct vattr *vap); 187 static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 188 int namelen, struct ucred *cred, struct thread *td); 189 static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, 190 char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp, 191 char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td); 192 static int nfs_renameit(struct vnode *sdvp, struct vnode *svp, 193 struct componentname *scnp, struct sillyrename *sp); 194 195 /* 196 * Global variables 197 */ 198 #define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) 199 200 SYSCTL_DECL(_vfs_newnfs); 201 202 static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; 203 SYSCTL_INT(_vfs_newnfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 204 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); 205 206 static int nfs_prime_access_cache = 0; 207 SYSCTL_INT(_vfs_newnfs, OID_AUTO, prime_access_cache, CTLFLAG_RW, 208 &nfs_prime_access_cache, 0, 209 "Prime NFS ACCESS cache when fetching attributes"); 210 211 static int newnfs_commit_on_close = 0; 212 SYSCTL_INT(_vfs_newnfs, OID_AUTO, commit_on_close, CTLFLAG_RW, 213 &newnfs_commit_on_close, 0, "write+commit on close, else only write"); 214 215 static int nfs_clean_pages_on_close = 1; 216 SYSCTL_INT(_vfs_newnfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, 217 &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); 218 219 int newnfs_directio_enable = 0; 220 SYSCTL_INT(_vfs_newnfs, OID_AUTO, directio_enable, CTLFLAG_RW, 221 &newnfs_directio_enable, 0, "Enable NFS directio"); 222 223 /* 224 * This sysctl allows other processes to mmap a file that has been opened 225 * O_DIRECT by a process. In general, having processes mmap the file while 226 * Direct IO is in progress can lead to Data Inconsistencies. But, we allow 227 * this by default to prevent DoS attacks - to prevent a malicious user from 228 * opening up files O_DIRECT preventing other users from mmap'ing these 229 * files. "Protected" environments where stricter consistency guarantees are 230 * required can disable this knob. The process that opened the file O_DIRECT 231 * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not 232 * meaningful. 233 */ 234 int newnfs_directio_allow_mmap = 1; 235 SYSCTL_INT(_vfs_newnfs, OID_AUTO, directio_allow_mmap, CTLFLAG_RW, 236 &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); 237 238 #if 0 239 SYSCTL_INT(_vfs_newnfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, 240 &newnfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count"); 241 242 SYSCTL_INT(_vfs_newnfs, OID_AUTO, access_cache_misses, CTLFLAG_RD, 243 &newnfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count"); 244 #endif 245 246 #define NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY \ 247 | NFSACCESS_EXTEND | NFSACCESS_EXECUTE \ 248 | NFSACCESS_DELETE | NFSACCESS_LOOKUP) 249 250 /* 251 * SMP Locking Note : 252 * The list of locks after the description of the lock is the ordering 253 * of other locks acquired with the lock held. 254 * np->n_mtx : Protects the fields in the nfsnode. 255 VM Object Lock 256 VI_MTX (acquired indirectly) 257 * nmp->nm_mtx : Protects the fields in the nfsmount. 258 rep->r_mtx 259 * ncl_iod_mutex : Global lock, protects shared nfsiod state. 260 * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. 261 nmp->nm_mtx 262 rep->r_mtx 263 * rep->r_mtx : Protects the fields in an nfsreq. 264 */ 265 266 static int 267 nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td, 268 struct ucred *cred, u_int32_t *retmode) 269 { 270 int error = 0, attrflag, i, lrupos; 271 u_int32_t rmode; 272 struct nfsnode *np = VTONFS(vp); 273 struct nfsvattr nfsva; 274 275 error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag, 276 &rmode, NULL); 277 if (attrflag) 278 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 279 if (!error) { 280 lrupos = 0; 281 mtx_lock(&np->n_mtx); 282 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 283 if (np->n_accesscache[i].uid == cred->cr_uid) { 284 np->n_accesscache[i].mode = rmode; 285 np->n_accesscache[i].stamp = time_second; 286 break; 287 } 288 if (i > 0 && np->n_accesscache[i].stamp < 289 np->n_accesscache[lrupos].stamp) 290 lrupos = i; 291 } 292 if (i == NFS_ACCESSCACHESIZE) { 293 np->n_accesscache[lrupos].uid = cred->cr_uid; 294 np->n_accesscache[lrupos].mode = rmode; 295 np->n_accesscache[lrupos].stamp = time_second; 296 } 297 mtx_unlock(&np->n_mtx); 298 if (retmode != NULL) 299 *retmode = rmode; 300 } else if (NFS_ISV4(vp)) { 301 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 302 } 303 return (error); 304 } 305 306 /* 307 * nfs access vnode op. 308 * For nfs version 2, just return ok. File accesses may fail later. 309 * For nfs version 3, use the access rpc to check accessibility. If file modes 310 * are changed on the server, accesses might still fail later. 311 */ 312 static int 313 nfs_access(struct vop_access_args *ap) 314 { 315 struct vnode *vp = ap->a_vp; 316 int error = 0, i, gotahit; 317 u_int32_t mode, wmode, rmode; 318 int v34 = NFS_ISV34(vp); 319 struct nfsnode *np = VTONFS(vp); 320 321 /* 322 * Disallow write attempts on filesystems mounted read-only; 323 * unless the file is a socket, fifo, or a block or character 324 * device resident on the filesystem. 325 */ 326 if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS | 327 VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL | 328 VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) { 329 switch (vp->v_type) { 330 case VREG: 331 case VDIR: 332 case VLNK: 333 return (EROFS); 334 default: 335 break; 336 } 337 } 338 /* 339 * For nfs v3 or v4, check to see if we have done this recently, and if 340 * so return our cached result instead of making an ACCESS call. 341 * If not, do an access rpc, otherwise you are stuck emulating 342 * ufs_access() locally using the vattr. This may not be correct, 343 * since the server may apply other access criteria such as 344 * client uid-->server uid mapping that we do not know about. 345 */ 346 if (v34) { 347 if (ap->a_accmode & VREAD) 348 mode = NFSACCESS_READ; 349 else 350 mode = 0; 351 if (vp->v_type != VDIR) { 352 if (ap->a_accmode & VWRITE) 353 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 354 if (ap->a_accmode & VAPPEND) 355 mode |= NFSACCESS_EXTEND; 356 if (ap->a_accmode & VEXEC) 357 mode |= NFSACCESS_EXECUTE; 358 if (ap->a_accmode & VDELETE) 359 mode |= NFSACCESS_DELETE; 360 } else { 361 if (ap->a_accmode & VWRITE) 362 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 363 if (ap->a_accmode & VAPPEND) 364 mode |= NFSACCESS_EXTEND; 365 if (ap->a_accmode & VEXEC) 366 mode |= NFSACCESS_LOOKUP; 367 if (ap->a_accmode & VDELETE) 368 mode |= NFSACCESS_DELETE; 369 if (ap->a_accmode & VDELETE_CHILD) 370 mode |= NFSACCESS_MODIFY; 371 } 372 /* XXX safety belt, only make blanket request if caching */ 373 if (nfsaccess_cache_timeout > 0) { 374 wmode = NFSACCESS_READ | NFSACCESS_MODIFY | 375 NFSACCESS_EXTEND | NFSACCESS_EXECUTE | 376 NFSACCESS_DELETE | NFSACCESS_LOOKUP; 377 } else { 378 wmode = mode; 379 } 380 381 /* 382 * Does our cached result allow us to give a definite yes to 383 * this request? 384 */ 385 gotahit = 0; 386 mtx_lock(&np->n_mtx); 387 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 388 if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) { 389 if (time_second < (np->n_accesscache[i].stamp 390 + nfsaccess_cache_timeout) && 391 (np->n_accesscache[i].mode & mode) == mode) { 392 NFSINCRGLOBAL(newnfsstats.accesscache_hits); 393 gotahit = 1; 394 } 395 break; 396 } 397 } 398 mtx_unlock(&np->n_mtx); 399 if (gotahit == 0) { 400 /* 401 * Either a no, or a don't know. Go to the wire. 402 */ 403 NFSINCRGLOBAL(newnfsstats.accesscache_misses); 404 error = nfs34_access_otw(vp, wmode, ap->a_td, 405 ap->a_cred, &rmode); 406 if (!error && 407 (rmode & mode) != mode) 408 error = EACCES; 409 } 410 return (error); 411 } else { 412 if ((error = nfsspec_access(ap)) != 0) { 413 return (error); 414 } 415 /* 416 * Attempt to prevent a mapped root from accessing a file 417 * which it shouldn't. We try to read a byte from the file 418 * if the user is root and the file is not zero length. 419 * After calling nfsspec_access, we should have the correct 420 * file size cached. 421 */ 422 mtx_lock(&np->n_mtx); 423 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) 424 && VTONFS(vp)->n_size > 0) { 425 struct iovec aiov; 426 struct uio auio; 427 char buf[1]; 428 429 mtx_unlock(&np->n_mtx); 430 aiov.iov_base = buf; 431 aiov.iov_len = 1; 432 auio.uio_iov = &aiov; 433 auio.uio_iovcnt = 1; 434 auio.uio_offset = 0; 435 auio.uio_resid = 1; 436 auio.uio_segflg = UIO_SYSSPACE; 437 auio.uio_rw = UIO_READ; 438 auio.uio_td = ap->a_td; 439 440 if (vp->v_type == VREG) 441 error = ncl_readrpc(vp, &auio, ap->a_cred); 442 else if (vp->v_type == VDIR) { 443 char* bp; 444 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); 445 aiov.iov_base = bp; 446 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; 447 error = ncl_readdirrpc(vp, &auio, ap->a_cred, 448 ap->a_td); 449 free(bp, M_TEMP); 450 } else if (vp->v_type == VLNK) 451 error = ncl_readlinkrpc(vp, &auio, ap->a_cred); 452 else 453 error = EACCES; 454 } else 455 mtx_unlock(&np->n_mtx); 456 return (error); 457 } 458 } 459 460 461 /* 462 * nfs open vnode op 463 * Check to see if the type is ok 464 * and that deletion is not in progress. 465 * For paged in text files, you will need to flush the page cache 466 * if consistency is lost. 467 */ 468 /* ARGSUSED */ 469 static int 470 nfs_open(struct vop_open_args *ap) 471 { 472 struct vnode *vp = ap->a_vp; 473 struct nfsnode *np = VTONFS(vp); 474 struct vattr vattr; 475 int error; 476 int fmode = ap->a_mode; 477 478 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) 479 return (EOPNOTSUPP); 480 481 /* 482 * For NFSv4, we need to do the Open Op before cache validation, 483 * so that we conform to RFC3530 Sec. 9.3.1. 484 */ 485 if (NFS_ISV4(vp)) { 486 error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td); 487 if (error) { 488 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 489 (gid_t)0); 490 return (error); 491 } 492 } 493 494 /* 495 * Now, if this Open will be doing reading, re-validate/flush the 496 * cache, so that Close/Open coherency is maintained. 497 */ 498 if ((fmode & FREAD) && (!NFS_ISV4(vp) || nfscl_mustflush(vp))) { 499 mtx_lock(&np->n_mtx); 500 if (np->n_flag & NMODIFIED) { 501 mtx_unlock(&np->n_mtx); 502 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 503 if (error == EINTR || error == EIO) { 504 if (NFS_ISV4(vp)) 505 (void) nfsrpc_close(vp, 0, ap->a_td); 506 return (error); 507 } 508 np->n_attrstamp = 0; 509 if (vp->v_type == VDIR) 510 np->n_direofoffset = 0; 511 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 512 if (error) { 513 if (NFS_ISV4(vp)) 514 (void) nfsrpc_close(vp, 0, ap->a_td); 515 return (error); 516 } 517 mtx_lock(&np->n_mtx); 518 np->n_mtime = vattr.va_mtime; 519 if (NFS_ISV4(vp)) 520 np->n_change = vattr.va_filerev; 521 mtx_unlock(&np->n_mtx); 522 } else { 523 struct thread *td = curthread; 524 525 if (np->n_ac_ts_syscalls != td->td_syscalls || 526 np->n_ac_ts_tid != td->td_tid || 527 td->td_proc == NULL || 528 np->n_ac_ts_pid != td->td_proc->p_pid) { 529 np->n_attrstamp = 0; 530 } 531 mtx_unlock(&np->n_mtx); 532 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 533 if (error) { 534 if (NFS_ISV4(vp)) 535 (void) nfsrpc_close(vp, 0, ap->a_td); 536 return (error); 537 } 538 mtx_lock(&np->n_mtx); 539 if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) || 540 NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 541 if (vp->v_type == VDIR) 542 np->n_direofoffset = 0; 543 mtx_unlock(&np->n_mtx); 544 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 545 if (error == EINTR || error == EIO) { 546 if (NFS_ISV4(vp)) 547 (void) nfsrpc_close(vp, 0, 548 ap->a_td); 549 return (error); 550 } 551 mtx_lock(&np->n_mtx); 552 np->n_mtime = vattr.va_mtime; 553 if (NFS_ISV4(vp)) 554 np->n_change = vattr.va_filerev; 555 } 556 mtx_unlock(&np->n_mtx); 557 } 558 } 559 560 /* 561 * If the object has >= 1 O_DIRECT active opens, we disable caching. 562 */ 563 if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 564 if (np->n_directio_opens == 0) { 565 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 566 if (error) { 567 if (NFS_ISV4(vp)) 568 (void) nfsrpc_close(vp, 0, ap->a_td); 569 return (error); 570 } 571 mtx_lock(&np->n_mtx); 572 np->n_flag |= NNONCACHE; 573 } else { 574 mtx_lock(&np->n_mtx); 575 } 576 np->n_directio_opens++; 577 mtx_unlock(&np->n_mtx); 578 } 579 vnode_create_vobject(vp, vattr.va_size, ap->a_td); 580 return (0); 581 } 582 583 /* 584 * nfs close vnode op 585 * What an NFS client should do upon close after writing is a debatable issue. 586 * Most NFS clients push delayed writes to the server upon close, basically for 587 * two reasons: 588 * 1 - So that any write errors may be reported back to the client process 589 * doing the close system call. By far the two most likely errors are 590 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. 591 * 2 - To put a worst case upper bound on cache inconsistency between 592 * multiple clients for the file. 593 * There is also a consistency problem for Version 2 of the protocol w.r.t. 594 * not being able to tell if other clients are writing a file concurrently, 595 * since there is no way of knowing if the changed modify time in the reply 596 * is only due to the write for this client. 597 * (NFS Version 3 provides weak cache consistency data in the reply that 598 * should be sufficient to detect and handle this case.) 599 * 600 * The current code does the following: 601 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers 602 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate 603 * or commit them (this satisfies 1 and 2 except for the 604 * case where the server crashes after this close but 605 * before the commit RPC, which is felt to be "good 606 * enough". Changing the last argument to ncl_flush() to 607 * a 1 would force a commit operation, if it is felt a 608 * commit is necessary now. 609 * for NFS Version 4 - flush the dirty buffers and commit them, if 610 * nfscl_mustflush() says this is necessary. 611 * It is necessary if there is no write delegation held, 612 * in order to satisfy open/close coherency. 613 * If the file isn't cached on local stable storage, 614 * it may be necessary in order to detect "out of space" 615 * errors from the server, if the write delegation 616 * issued by the server doesn't allow the file to grow. 617 */ 618 /* ARGSUSED */ 619 static int 620 nfs_close(struct vop_close_args *ap) 621 { 622 struct vnode *vp = ap->a_vp; 623 struct nfsnode *np = VTONFS(vp); 624 struct nfsvattr nfsva; 625 struct ucred *cred; 626 int error = 0, ret, localcred = 0; 627 int fmode = ap->a_fflag; 628 629 if ((vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF)) 630 return (0); 631 /* 632 * During shutdown, a_cred isn't valid, so just use root. 633 */ 634 if (ap->a_cred == NOCRED) { 635 cred = newnfs_getcred(); 636 localcred = 1; 637 } else { 638 cred = ap->a_cred; 639 } 640 if (vp->v_type == VREG) { 641 /* 642 * Examine and clean dirty pages, regardless of NMODIFIED. 643 * This closes a major hole in close-to-open consistency. 644 * We want to push out all dirty pages (and buffers) on 645 * close, regardless of whether they were dirtied by 646 * mmap'ed writes or via write(). 647 */ 648 if (nfs_clean_pages_on_close && vp->v_object) { 649 VM_OBJECT_LOCK(vp->v_object); 650 vm_object_page_clean(vp->v_object, 0, 0, 0); 651 VM_OBJECT_UNLOCK(vp->v_object); 652 } 653 mtx_lock(&np->n_mtx); 654 if (np->n_flag & NMODIFIED) { 655 mtx_unlock(&np->n_mtx); 656 if (NFS_ISV3(vp)) { 657 /* 658 * Under NFSv3 we have dirty buffers to dispose of. We 659 * must flush them to the NFS server. We have the option 660 * of waiting all the way through the commit rpc or just 661 * waiting for the initial write. The default is to only 662 * wait through the initial write so the data is in the 663 * server's cache, which is roughly similar to the state 664 * a standard disk subsystem leaves the file in on close(). 665 * 666 * We cannot clear the NMODIFIED bit in np->n_flag due to 667 * potential races with other processes, and certainly 668 * cannot clear it if we don't commit. 669 * These races occur when there is no longer the old 670 * traditional vnode locking implemented for Vnode Ops. 671 */ 672 int cm = newnfs_commit_on_close ? 1 : 0; 673 error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm, 0); 674 /* np->n_flag &= ~NMODIFIED; */ 675 } else if (NFS_ISV4(vp)) { 676 if (nfscl_mustflush(vp)) { 677 int cm = newnfs_commit_on_close ? 1 : 0; 678 error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, 679 cm, 0); 680 /* 681 * as above w.r.t races when clearing 682 * NMODIFIED. 683 * np->n_flag &= ~NMODIFIED; 684 */ 685 } 686 } else 687 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 688 mtx_lock(&np->n_mtx); 689 } 690 /* 691 * Invalidate the attribute cache in all cases. 692 * An open is going to fetch fresh attrs any way, other procs 693 * on this node that have file open will be forced to do an 694 * otw attr fetch, but this is safe. 695 * --> A user found that their RPC count dropped by 20% when 696 * this was commented out and I can't see any requirement 697 * for it, so I've disabled it when negative lookups are 698 * enabled. (What does this have to do with negative lookup 699 * caching? Well nothing, except it was reported by the 700 * same user that needed negative lookup caching and I wanted 701 * there to be a way to disable it to see if it 702 * is the cause of some caching/coherency issue that might 703 * crop up.) 704 */ 705 if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) 706 np->n_attrstamp = 0; 707 if (np->n_flag & NWRITEERR) { 708 np->n_flag &= ~NWRITEERR; 709 error = np->n_error; 710 } 711 mtx_unlock(&np->n_mtx); 712 } 713 714 if (NFS_ISV4(vp)) { 715 /* 716 * Get attributes so "change" is up to date. 717 */ 718 if (!error) { 719 ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva, 720 NULL); 721 if (!ret) { 722 np->n_change = nfsva.na_filerev; 723 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 724 NULL, 0, 0); 725 } 726 } 727 728 /* 729 * and do the close. 730 */ 731 ret = nfsrpc_close(vp, 0, ap->a_td); 732 if (!error && ret) 733 error = ret; 734 if (error) 735 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 736 (gid_t)0); 737 } 738 if (newnfs_directio_enable) 739 KASSERT((np->n_directio_asyncwr == 0), 740 ("nfs_close: dirty unflushed (%d) directio buffers\n", 741 np->n_directio_asyncwr)); 742 if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 743 mtx_lock(&np->n_mtx); 744 KASSERT((np->n_directio_opens > 0), 745 ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); 746 np->n_directio_opens--; 747 if (np->n_directio_opens == 0) 748 np->n_flag &= ~NNONCACHE; 749 mtx_unlock(&np->n_mtx); 750 } 751 if (localcred) 752 NFSFREECRED(cred); 753 return (error); 754 } 755 756 /* 757 * nfs getattr call from vfs. 758 */ 759 static int 760 nfs_getattr(struct vop_getattr_args *ap) 761 { 762 struct vnode *vp = ap->a_vp; 763 struct thread *td = curthread; /* XXX */ 764 struct nfsnode *np = VTONFS(vp); 765 int error = 0; 766 struct nfsvattr nfsva; 767 struct vattr *vap = ap->a_vap; 768 struct vattr vattr; 769 770 /* 771 * Update local times for special files. 772 */ 773 mtx_lock(&np->n_mtx); 774 if (np->n_flag & (NACC | NUPD)) 775 np->n_flag |= NCHG; 776 mtx_unlock(&np->n_mtx); 777 /* 778 * First look in the cache. 779 */ 780 if (ncl_getattrcache(vp, &vattr) == 0) { 781 vap->va_type = vattr.va_type; 782 vap->va_mode = vattr.va_mode; 783 vap->va_nlink = vattr.va_nlink; 784 vap->va_uid = vattr.va_uid; 785 vap->va_gid = vattr.va_gid; 786 vap->va_fsid = vattr.va_fsid; 787 vap->va_fileid = vattr.va_fileid; 788 vap->va_size = vattr.va_size; 789 vap->va_blocksize = vattr.va_blocksize; 790 vap->va_atime = vattr.va_atime; 791 vap->va_mtime = vattr.va_mtime; 792 vap->va_ctime = vattr.va_ctime; 793 vap->va_gen = vattr.va_gen; 794 vap->va_flags = vattr.va_flags; 795 vap->va_rdev = vattr.va_rdev; 796 vap->va_bytes = vattr.va_bytes; 797 vap->va_filerev = vattr.va_filerev; 798 /* 799 * Get the local modify time for the case of a write 800 * delegation. 801 */ 802 nfscl_deleggetmodtime(vp, &vap->va_mtime); 803 return (0); 804 } 805 806 if (NFS_ISV34(vp) && nfs_prime_access_cache && 807 nfsaccess_cache_timeout > 0) { 808 NFSINCRGLOBAL(newnfsstats.accesscache_misses); 809 nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL); 810 if (ncl_getattrcache(vp, ap->a_vap) == 0) { 811 nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime); 812 return (0); 813 } 814 } 815 error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva, NULL); 816 if (!error) 817 error = nfscl_loadattrcache(&vp, &nfsva, vap, NULL, 0, 0); 818 if (!error) { 819 /* 820 * Get the local modify time for the case of a write 821 * delegation. 822 */ 823 nfscl_deleggetmodtime(vp, &vap->va_mtime); 824 } else if (NFS_ISV4(vp)) { 825 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 826 } 827 return (error); 828 } 829 830 /* 831 * nfs setattr call. 832 */ 833 static int 834 nfs_setattr(struct vop_setattr_args *ap) 835 { 836 struct vnode *vp = ap->a_vp; 837 struct nfsnode *np = VTONFS(vp); 838 struct thread *td = curthread; /* XXX */ 839 struct vattr *vap = ap->a_vap; 840 int error = 0; 841 u_quad_t tsize; 842 843 #ifndef nolint 844 tsize = (u_quad_t)0; 845 #endif 846 847 /* 848 * Setting of flags and marking of atimes are not supported. 849 */ 850 if (vap->va_flags != VNOVAL) 851 return (EOPNOTSUPP); 852 853 /* 854 * Disallow write attempts if the filesystem is mounted read-only. 855 */ 856 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 857 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 858 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && 859 (vp->v_mount->mnt_flag & MNT_RDONLY)) 860 return (EROFS); 861 if (vap->va_size != VNOVAL) { 862 switch (vp->v_type) { 863 case VDIR: 864 return (EISDIR); 865 case VCHR: 866 case VBLK: 867 case VSOCK: 868 case VFIFO: 869 if (vap->va_mtime.tv_sec == VNOVAL && 870 vap->va_atime.tv_sec == VNOVAL && 871 vap->va_mode == (mode_t)VNOVAL && 872 vap->va_uid == (uid_t)VNOVAL && 873 vap->va_gid == (gid_t)VNOVAL) 874 return (0); 875 vap->va_size = VNOVAL; 876 break; 877 default: 878 /* 879 * Disallow write attempts if the filesystem is 880 * mounted read-only. 881 */ 882 if (vp->v_mount->mnt_flag & MNT_RDONLY) 883 return (EROFS); 884 /* 885 * We run vnode_pager_setsize() early (why?), 886 * we must set np->n_size now to avoid vinvalbuf 887 * V_SAVE races that might setsize a lower 888 * value. 889 */ 890 mtx_lock(&np->n_mtx); 891 tsize = np->n_size; 892 mtx_unlock(&np->n_mtx); 893 error = ncl_meta_setsize(vp, ap->a_cred, td, 894 vap->va_size); 895 mtx_lock(&np->n_mtx); 896 if (np->n_flag & NMODIFIED) { 897 tsize = np->n_size; 898 mtx_unlock(&np->n_mtx); 899 if (vap->va_size == 0) 900 error = ncl_vinvalbuf(vp, 0, td, 1); 901 else 902 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 903 if (error) { 904 vnode_pager_setsize(vp, tsize); 905 return (error); 906 } 907 /* 908 * Call nfscl_delegmodtime() to set the modify time 909 * locally, as required. 910 */ 911 nfscl_delegmodtime(vp); 912 } else 913 mtx_unlock(&np->n_mtx); 914 /* 915 * np->n_size has already been set to vap->va_size 916 * in ncl_meta_setsize(). We must set it again since 917 * nfs_loadattrcache() could be called through 918 * ncl_meta_setsize() and could modify np->n_size. 919 */ 920 mtx_lock(&np->n_mtx); 921 np->n_vattr.na_size = np->n_size = vap->va_size; 922 mtx_unlock(&np->n_mtx); 923 }; 924 } else { 925 mtx_lock(&np->n_mtx); 926 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 927 (np->n_flag & NMODIFIED) && vp->v_type == VREG) { 928 mtx_unlock(&np->n_mtx); 929 if ((error = ncl_vinvalbuf(vp, V_SAVE, td, 1)) != 0 && 930 (error == EINTR || error == EIO)) 931 return (error); 932 } else 933 mtx_unlock(&np->n_mtx); 934 } 935 error = nfs_setattrrpc(vp, vap, ap->a_cred, td); 936 if (error && vap->va_size != VNOVAL) { 937 mtx_lock(&np->n_mtx); 938 np->n_size = np->n_vattr.na_size = tsize; 939 vnode_pager_setsize(vp, tsize); 940 mtx_unlock(&np->n_mtx); 941 } 942 return (error); 943 } 944 945 /* 946 * Do an nfs setattr rpc. 947 */ 948 static int 949 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred, 950 struct thread *td) 951 { 952 struct nfsnode *np = VTONFS(vp); 953 int error, ret, attrflag, i; 954 struct nfsvattr nfsva; 955 956 if (NFS_ISV34(vp)) { 957 mtx_lock(&np->n_mtx); 958 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) 959 np->n_accesscache[i].stamp = 0; 960 np->n_flag |= NDELEGMOD; 961 mtx_unlock(&np->n_mtx); 962 } 963 error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag, 964 NULL); 965 if (attrflag) { 966 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 967 if (ret && !error) 968 error = ret; 969 } 970 if (error && NFS_ISV4(vp)) 971 error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid); 972 return (error); 973 } 974 975 /* 976 * nfs lookup call, one step at a time... 977 * First look in cache 978 * If not found, unlock the directory nfsnode and do the rpc 979 */ 980 static int 981 nfs_lookup(struct vop_lookup_args *ap) 982 { 983 struct componentname *cnp = ap->a_cnp; 984 struct vnode *dvp = ap->a_dvp; 985 struct vnode **vpp = ap->a_vpp; 986 struct mount *mp = dvp->v_mount; 987 int flags = cnp->cn_flags; 988 struct vnode *newvp; 989 struct nfsmount *nmp; 990 struct nfsnode *np; 991 int error = 0, attrflag, dattrflag, ltype; 992 struct thread *td = cnp->cn_thread; 993 struct nfsfh *nfhp; 994 struct nfsvattr dnfsva, nfsva; 995 struct vattr vattr; 996 time_t dmtime; 997 998 *vpp = NULLVP; 999 if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && 1000 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 1001 return (EROFS); 1002 if (dvp->v_type != VDIR) 1003 return (ENOTDIR); 1004 nmp = VFSTONFS(mp); 1005 np = VTONFS(dvp); 1006 1007 /* For NFSv4, wait until any remove is done. */ 1008 mtx_lock(&np->n_mtx); 1009 while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) { 1010 np->n_flag |= NREMOVEWANT; 1011 (void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0); 1012 } 1013 mtx_unlock(&np->n_mtx); 1014 1015 if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) 1016 return (error); 1017 error = cache_lookup(dvp, vpp, cnp); 1018 if (error > 0 && error != ENOENT) 1019 return (error); 1020 if (error == -1) { 1021 /* 1022 * We only accept a positive hit in the cache if the 1023 * change time of the file matches our cached copy. 1024 * Otherwise, we discard the cache entry and fallback 1025 * to doing a lookup RPC. 1026 */ 1027 newvp = *vpp; 1028 if (nfscl_nodeleg(newvp, 0) == 0 || 1029 (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred) 1030 && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime)) { 1031 NFSINCRGLOBAL(newnfsstats.lookupcache_hits); 1032 if (cnp->cn_nameiop != LOOKUP && 1033 (flags & ISLASTCN)) 1034 cnp->cn_flags |= SAVENAME; 1035 return (0); 1036 } 1037 cache_purge(newvp); 1038 if (dvp != newvp) 1039 vput(newvp); 1040 else 1041 vrele(newvp); 1042 *vpp = NULLVP; 1043 } else if (error == ENOENT) { 1044 if (dvp->v_iflag & VI_DOOMED) 1045 return (ENOENT); 1046 /* 1047 * We only accept a negative hit in the cache if the 1048 * modification time of the parent directory matches 1049 * our cached copy. Otherwise, we discard all of the 1050 * negative cache entries for this directory. We also 1051 * only trust -ve cache entries for less than 1052 * nm_negative_namecache_timeout seconds. 1053 */ 1054 if ((u_int)(ticks - np->n_dmtime_ticks) < 1055 (nmp->nm_negnametimeo * hz) && 1056 VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && 1057 vattr.va_mtime.tv_sec == np->n_dmtime) { 1058 NFSINCRGLOBAL(newnfsstats.lookupcache_hits); 1059 return (ENOENT); 1060 } 1061 cache_purge_negative(dvp); 1062 mtx_lock(&np->n_mtx); 1063 np->n_dmtime = 0; 1064 mtx_unlock(&np->n_mtx); 1065 } 1066 1067 /* 1068 * Cache the modification time of the parent directory in case 1069 * the lookup fails and results in adding the first negative 1070 * name cache entry for the directory. Since this is reading 1071 * a single time_t, don't bother with locking. The 1072 * modification time may be a bit stale, but it must be read 1073 * before performing the lookup RPC to prevent a race where 1074 * another lookup updates the timestamp on the directory after 1075 * the lookup RPC has been performed on the server but before 1076 * n_dmtime is set at the end of this function. 1077 */ 1078 dmtime = np->n_vattr.na_mtime.tv_sec; 1079 error = 0; 1080 newvp = NULLVP; 1081 NFSINCRGLOBAL(newnfsstats.lookupcache_misses); 1082 error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1083 cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1084 NULL); 1085 if (dattrflag) 1086 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1087 if (error) { 1088 if (newvp != NULLVP) { 1089 vput(newvp); 1090 *vpp = NULLVP; 1091 } 1092 1093 if (error != ENOENT) { 1094 if (NFS_ISV4(dvp)) 1095 error = nfscl_maperr(td, error, (uid_t)0, 1096 (gid_t)0); 1097 return (error); 1098 } 1099 1100 /* The requested file was not found. */ 1101 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && 1102 (flags & ISLASTCN)) { 1103 /* 1104 * XXX: UFS does a full VOP_ACCESS(dvp, 1105 * VWRITE) here instead of just checking 1106 * MNT_RDONLY. 1107 */ 1108 if (mp->mnt_flag & MNT_RDONLY) 1109 return (EROFS); 1110 cnp->cn_flags |= SAVENAME; 1111 return (EJUSTRETURN); 1112 } 1113 1114 if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) { 1115 /* 1116 * Maintain n_dmtime as the modification time 1117 * of the parent directory when the oldest -ve 1118 * name cache entry for this directory was 1119 * added. If a -ve cache entry has already 1120 * been added with a newer modification time 1121 * by a concurrent lookup, then don't bother 1122 * adding a cache entry. The modification 1123 * time of the directory might have changed 1124 * due to the file this lookup failed to find 1125 * being created. In that case a subsequent 1126 * lookup would incorrectly use the entry 1127 * added here instead of doing an extra 1128 * lookup. 1129 */ 1130 mtx_lock(&np->n_mtx); 1131 if (np->n_dmtime <= dmtime) { 1132 if (np->n_dmtime == 0) { 1133 np->n_dmtime = dmtime; 1134 np->n_dmtime_ticks = ticks; 1135 } 1136 mtx_unlock(&np->n_mtx); 1137 cache_enter(dvp, NULL, cnp); 1138 } else 1139 mtx_unlock(&np->n_mtx); 1140 } 1141 return (ENOENT); 1142 } 1143 1144 /* 1145 * Handle RENAME case... 1146 */ 1147 if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { 1148 if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1149 FREE((caddr_t)nfhp, M_NFSFH); 1150 return (EISDIR); 1151 } 1152 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL); 1153 if (error) 1154 return (error); 1155 newvp = NFSTOV(np); 1156 if (attrflag) 1157 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1158 0, 1); 1159 *vpp = newvp; 1160 cnp->cn_flags |= SAVENAME; 1161 return (0); 1162 } 1163 1164 if (flags & ISDOTDOT) { 1165 ltype = VOP_ISLOCKED(dvp); 1166 error = vfs_busy(mp, MBF_NOWAIT); 1167 if (error != 0) { 1168 vfs_ref(mp); 1169 VOP_UNLOCK(dvp, 0); 1170 error = vfs_busy(mp, 0); 1171 vn_lock(dvp, ltype | LK_RETRY); 1172 vfs_rel(mp); 1173 if (error == 0 && (dvp->v_iflag & VI_DOOMED)) { 1174 vfs_unbusy(mp); 1175 error = ENOENT; 1176 } 1177 if (error != 0) 1178 return (error); 1179 } 1180 VOP_UNLOCK(dvp, 0); 1181 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL); 1182 if (error == 0) 1183 newvp = NFSTOV(np); 1184 vfs_unbusy(mp); 1185 if (newvp != dvp) 1186 vn_lock(dvp, ltype | LK_RETRY); 1187 if (dvp->v_iflag & VI_DOOMED) { 1188 if (error == 0) { 1189 if (newvp == dvp) 1190 vrele(newvp); 1191 else 1192 vput(newvp); 1193 } 1194 error = ENOENT; 1195 } 1196 if (error != 0) 1197 return (error); 1198 if (attrflag) 1199 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1200 0, 1); 1201 } else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1202 FREE((caddr_t)nfhp, M_NFSFH); 1203 VREF(dvp); 1204 newvp = dvp; 1205 if (attrflag) 1206 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1207 0, 1); 1208 } else { 1209 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL); 1210 if (error) 1211 return (error); 1212 newvp = NFSTOV(np); 1213 if (attrflag) 1214 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1215 0, 1); 1216 } 1217 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) 1218 cnp->cn_flags |= SAVENAME; 1219 if ((cnp->cn_flags & MAKEENTRY) && 1220 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { 1221 np->n_ctime = np->n_vattr.na_vattr.va_ctime.tv_sec; 1222 cache_enter(dvp, newvp, cnp); 1223 } 1224 *vpp = newvp; 1225 return (0); 1226 } 1227 1228 /* 1229 * nfs read call. 1230 * Just call ncl_bioread() to do the work. 1231 */ 1232 static int 1233 nfs_read(struct vop_read_args *ap) 1234 { 1235 struct vnode *vp = ap->a_vp; 1236 1237 switch (vp->v_type) { 1238 case VREG: 1239 return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); 1240 case VDIR: 1241 return (EISDIR); 1242 default: 1243 return (EOPNOTSUPP); 1244 } 1245 } 1246 1247 /* 1248 * nfs readlink call 1249 */ 1250 static int 1251 nfs_readlink(struct vop_readlink_args *ap) 1252 { 1253 struct vnode *vp = ap->a_vp; 1254 1255 if (vp->v_type != VLNK) 1256 return (EINVAL); 1257 return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred)); 1258 } 1259 1260 /* 1261 * Do a readlink rpc. 1262 * Called by ncl_doio() from below the buffer cache. 1263 */ 1264 int 1265 ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1266 { 1267 int error, ret, attrflag; 1268 struct nfsvattr nfsva; 1269 1270 error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva, 1271 &attrflag, NULL); 1272 if (attrflag) { 1273 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1274 if (ret && !error) 1275 error = ret; 1276 } 1277 if (error && NFS_ISV4(vp)) 1278 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1279 return (error); 1280 } 1281 1282 /* 1283 * nfs read rpc call 1284 * Ditto above 1285 */ 1286 int 1287 ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1288 { 1289 int error, ret, attrflag; 1290 struct nfsvattr nfsva; 1291 1292 error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, &attrflag, 1293 NULL); 1294 if (attrflag) { 1295 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1296 if (ret && !error) 1297 error = ret; 1298 } 1299 if (error && NFS_ISV4(vp)) 1300 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1301 return (error); 1302 } 1303 1304 /* 1305 * nfs write call 1306 */ 1307 int 1308 ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 1309 int *iomode, int *must_commit, int called_from_strategy) 1310 { 1311 struct nfsvattr nfsva; 1312 int error = 0, attrflag, ret; 1313 u_char verf[NFSX_VERF]; 1314 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 1315 1316 *must_commit = 0; 1317 error = nfsrpc_write(vp, uiop, iomode, verf, cred, 1318 uiop->uio_td, &nfsva, &attrflag, NULL, called_from_strategy); 1319 NFSLOCKMNT(nmp); 1320 if (!error && NFSHASWRITEVERF(nmp) && 1321 NFSBCMP(verf, nmp->nm_verf, NFSX_VERF)) { 1322 *must_commit = 1; 1323 NFSBCOPY(verf, nmp->nm_verf, NFSX_VERF); 1324 } 1325 NFSUNLOCKMNT(nmp); 1326 if (attrflag) { 1327 if (VTONFS(vp)->n_flag & ND_NFSV4) 1328 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1, 1329 1); 1330 else 1331 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1332 1); 1333 if (ret && !error) 1334 error = ret; 1335 } 1336 if (vp->v_mount->mnt_kern_flag & MNTK_ASYNC) 1337 *iomode = NFSWRITE_FILESYNC; 1338 if (error && NFS_ISV4(vp)) 1339 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1340 return (error); 1341 } 1342 1343 /* 1344 * nfs mknod rpc 1345 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the 1346 * mode set to specify the file type and the size field for rdev. 1347 */ 1348 static int 1349 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, 1350 struct vattr *vap) 1351 { 1352 struct nfsvattr nfsva, dnfsva; 1353 struct vnode *newvp = NULL; 1354 struct nfsnode *np = NULL, *dnp; 1355 struct nfsfh *nfhp; 1356 struct vattr vattr; 1357 int error = 0, attrflag, dattrflag; 1358 u_int32_t rdev; 1359 1360 if (vap->va_type == VCHR || vap->va_type == VBLK) 1361 rdev = vap->va_rdev; 1362 else if (vap->va_type == VFIFO || vap->va_type == VSOCK) 1363 rdev = 0xffffffff; 1364 else 1365 return (EOPNOTSUPP); 1366 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1367 return (error); 1368 error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, 1369 rdev, vap->va_type, cnp->cn_cred, cnp->cn_thread, &dnfsva, 1370 &nfsva, &nfhp, &attrflag, &dattrflag, NULL); 1371 if (!error) { 1372 if (!nfhp) 1373 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1374 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, 1375 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1376 NULL); 1377 if (nfhp) 1378 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1379 cnp->cn_thread, &np, NULL); 1380 } 1381 if (dattrflag) 1382 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1383 if (!error) { 1384 newvp = NFSTOV(np); 1385 if (attrflag) 1386 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1387 0, 1); 1388 } 1389 if (!error) { 1390 if ((cnp->cn_flags & MAKEENTRY)) 1391 cache_enter(dvp, newvp, cnp); 1392 *vpp = newvp; 1393 } else if (NFS_ISV4(dvp)) { 1394 error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, 1395 vap->va_gid); 1396 } 1397 dnp = VTONFS(dvp); 1398 mtx_lock(&dnp->n_mtx); 1399 dnp->n_flag |= NMODIFIED; 1400 if (!dattrflag) 1401 dnp->n_attrstamp = 0; 1402 mtx_unlock(&dnp->n_mtx); 1403 return (error); 1404 } 1405 1406 /* 1407 * nfs mknod vop 1408 * just call nfs_mknodrpc() to do the work. 1409 */ 1410 /* ARGSUSED */ 1411 static int 1412 nfs_mknod(struct vop_mknod_args *ap) 1413 { 1414 return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); 1415 } 1416 1417 static struct mtx nfs_cverf_mtx; 1418 MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex", 1419 MTX_DEF); 1420 1421 static nfsquad_t 1422 nfs_get_cverf(void) 1423 { 1424 static nfsquad_t cverf; 1425 nfsquad_t ret; 1426 static int cverf_initialized = 0; 1427 1428 mtx_lock(&nfs_cverf_mtx); 1429 if (cverf_initialized == 0) { 1430 cverf.lval[0] = arc4random(); 1431 cverf.lval[1] = arc4random(); 1432 cverf_initialized = 1; 1433 } else 1434 cverf.qval++; 1435 ret = cverf; 1436 mtx_unlock(&nfs_cverf_mtx); 1437 1438 return (ret); 1439 } 1440 1441 /* 1442 * nfs file create call 1443 */ 1444 static int 1445 nfs_create(struct vop_create_args *ap) 1446 { 1447 struct vnode *dvp = ap->a_dvp; 1448 struct vattr *vap = ap->a_vap; 1449 struct componentname *cnp = ap->a_cnp; 1450 struct nfsnode *np = NULL, *dnp; 1451 struct vnode *newvp = NULL; 1452 struct nfsmount *nmp; 1453 struct nfsvattr dnfsva, nfsva; 1454 struct nfsfh *nfhp; 1455 nfsquad_t cverf; 1456 int error = 0, attrflag, dattrflag, fmode = 0; 1457 struct vattr vattr; 1458 1459 /* 1460 * Oops, not for me.. 1461 */ 1462 if (vap->va_type == VSOCK) 1463 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); 1464 1465 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1466 return (error); 1467 if (vap->va_vaflags & VA_EXCLUSIVE) 1468 fmode |= O_EXCL; 1469 dnp = VTONFS(dvp); 1470 nmp = VFSTONFS(vnode_mount(dvp)); 1471 again: 1472 /* For NFSv4, wait until any remove is done. */ 1473 mtx_lock(&dnp->n_mtx); 1474 while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) { 1475 dnp->n_flag |= NREMOVEWANT; 1476 (void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0); 1477 } 1478 mtx_unlock(&dnp->n_mtx); 1479 1480 cverf = nfs_get_cverf(); 1481 error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1482 vap, cverf, fmode, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, 1483 &nfhp, &attrflag, &dattrflag, NULL); 1484 if (!error) { 1485 if (nfhp == NULL) 1486 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1487 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, 1488 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1489 NULL); 1490 if (nfhp != NULL) 1491 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1492 cnp->cn_thread, &np, NULL); 1493 } 1494 if (dattrflag) 1495 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1496 if (!error) { 1497 newvp = NFSTOV(np); 1498 if (attrflag) 1499 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1500 0, 1); 1501 } 1502 if (error) { 1503 if (newvp != NULL) { 1504 vrele(newvp); 1505 newvp = NULL; 1506 } 1507 if (NFS_ISV34(dvp) && (fmode & O_EXCL) && 1508 error == NFSERR_NOTSUPP) { 1509 fmode &= ~O_EXCL; 1510 goto again; 1511 } 1512 } else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) { 1513 if (nfscl_checksattr(vap, &nfsva)) { 1514 error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred, 1515 cnp->cn_thread, &nfsva, &attrflag, NULL); 1516 if (error && (vap->va_uid != (uid_t)VNOVAL || 1517 vap->va_gid != (gid_t)VNOVAL)) { 1518 /* try again without setting uid/gid */ 1519 vap->va_uid = (uid_t)VNOVAL; 1520 vap->va_gid = (uid_t)VNOVAL; 1521 error = nfsrpc_setattr(newvp, vap, NULL, 1522 cnp->cn_cred, cnp->cn_thread, &nfsva, 1523 &attrflag, NULL); 1524 } 1525 if (attrflag) 1526 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 1527 NULL, 0, 1); 1528 } 1529 } 1530 if (!error) { 1531 if (cnp->cn_flags & MAKEENTRY) 1532 cache_enter(dvp, newvp, cnp); 1533 *ap->a_vpp = newvp; 1534 } else if (NFS_ISV4(dvp)) { 1535 error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, 1536 vap->va_gid); 1537 } 1538 mtx_lock(&dnp->n_mtx); 1539 dnp->n_flag |= NMODIFIED; 1540 if (!dattrflag) 1541 dnp->n_attrstamp = 0; 1542 mtx_unlock(&dnp->n_mtx); 1543 return (error); 1544 } 1545 1546 /* 1547 * nfs file remove call 1548 * To try and make nfs semantics closer to ufs semantics, a file that has 1549 * other processes using the vnode is renamed instead of removed and then 1550 * removed later on the last close. 1551 * - If v_usecount > 1 1552 * If a rename is not already in the works 1553 * call nfs_sillyrename() to set it up 1554 * else 1555 * do the remove rpc 1556 */ 1557 static int 1558 nfs_remove(struct vop_remove_args *ap) 1559 { 1560 struct vnode *vp = ap->a_vp; 1561 struct vnode *dvp = ap->a_dvp; 1562 struct componentname *cnp = ap->a_cnp; 1563 struct nfsnode *np = VTONFS(vp); 1564 int error = 0; 1565 struct vattr vattr; 1566 1567 #ifndef DIAGNOSTIC 1568 if ((cnp->cn_flags & HASBUF) == 0) 1569 panic("nfs_remove: no name"); 1570 if (vrefcnt(vp) < 1) 1571 panic("nfs_remove: bad v_usecount"); 1572 #endif 1573 if (vp->v_type == VDIR) 1574 error = EPERM; 1575 else if (vrefcnt(vp) == 1 || (np->n_sillyrename && 1576 VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 && 1577 vattr.va_nlink > 1)) { 1578 /* 1579 * Purge the name cache so that the chance of a lookup for 1580 * the name succeeding while the remove is in progress is 1581 * minimized. Without node locking it can still happen, such 1582 * that an I/O op returns ESTALE, but since you get this if 1583 * another host removes the file.. 1584 */ 1585 cache_purge(vp); 1586 /* 1587 * throw away biocache buffers, mainly to avoid 1588 * unnecessary delayed writes later. 1589 */ 1590 error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1); 1591 /* Do the rpc */ 1592 if (error != EINTR && error != EIO) 1593 error = nfs_removerpc(dvp, vp, cnp->cn_nameptr, 1594 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); 1595 /* 1596 * Kludge City: If the first reply to the remove rpc is lost.. 1597 * the reply to the retransmitted request will be ENOENT 1598 * since the file was in fact removed 1599 * Therefore, we cheat and return success. 1600 */ 1601 if (error == ENOENT) 1602 error = 0; 1603 } else if (!np->n_sillyrename) 1604 error = nfs_sillyrename(dvp, vp, cnp); 1605 np->n_attrstamp = 0; 1606 return (error); 1607 } 1608 1609 /* 1610 * nfs file remove rpc called from nfs_inactive 1611 */ 1612 int 1613 ncl_removeit(struct sillyrename *sp, struct vnode *vp) 1614 { 1615 /* 1616 * Make sure that the directory vnode is still valid. 1617 * XXX we should lock sp->s_dvp here. 1618 */ 1619 if (sp->s_dvp->v_type == VBAD) 1620 return (0); 1621 return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen, 1622 sp->s_cred, NULL)); 1623 } 1624 1625 /* 1626 * Nfs remove rpc, called from nfs_remove() and ncl_removeit(). 1627 */ 1628 static int 1629 nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 1630 int namelen, struct ucred *cred, struct thread *td) 1631 { 1632 struct nfsvattr dnfsva; 1633 struct nfsnode *dnp = VTONFS(dvp); 1634 int error = 0, dattrflag; 1635 1636 mtx_lock(&dnp->n_mtx); 1637 dnp->n_flag |= NREMOVEINPROG; 1638 mtx_unlock(&dnp->n_mtx); 1639 error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva, 1640 &dattrflag, NULL); 1641 mtx_lock(&dnp->n_mtx); 1642 if ((dnp->n_flag & NREMOVEWANT)) { 1643 dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG); 1644 mtx_unlock(&dnp->n_mtx); 1645 wakeup((caddr_t)dnp); 1646 } else { 1647 dnp->n_flag &= ~NREMOVEINPROG; 1648 mtx_unlock(&dnp->n_mtx); 1649 } 1650 if (dattrflag) 1651 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1652 mtx_lock(&dnp->n_mtx); 1653 dnp->n_flag |= NMODIFIED; 1654 if (!dattrflag) 1655 dnp->n_attrstamp = 0; 1656 mtx_unlock(&dnp->n_mtx); 1657 if (error && NFS_ISV4(dvp)) 1658 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 1659 return (error); 1660 } 1661 1662 /* 1663 * nfs file rename call 1664 */ 1665 static int 1666 nfs_rename(struct vop_rename_args *ap) 1667 { 1668 struct vnode *fvp = ap->a_fvp; 1669 struct vnode *tvp = ap->a_tvp; 1670 struct vnode *fdvp = ap->a_fdvp; 1671 struct vnode *tdvp = ap->a_tdvp; 1672 struct componentname *tcnp = ap->a_tcnp; 1673 struct componentname *fcnp = ap->a_fcnp; 1674 struct nfsnode *fnp = VTONFS(ap->a_fvp); 1675 struct nfsnode *tdnp = VTONFS(ap->a_tdvp); 1676 struct nfsv4node *newv4 = NULL; 1677 int error; 1678 1679 #ifndef DIAGNOSTIC 1680 if ((tcnp->cn_flags & HASBUF) == 0 || 1681 (fcnp->cn_flags & HASBUF) == 0) 1682 panic("nfs_rename: no name"); 1683 #endif 1684 /* Check for cross-device rename */ 1685 if ((fvp->v_mount != tdvp->v_mount) || 1686 (tvp && (fvp->v_mount != tvp->v_mount))) { 1687 error = EXDEV; 1688 goto out; 1689 } 1690 1691 if (fvp == tvp) { 1692 ncl_printf("nfs_rename: fvp == tvp (can't happen)\n"); 1693 error = 0; 1694 goto out; 1695 } 1696 if ((error = vn_lock(fvp, LK_EXCLUSIVE))) 1697 goto out; 1698 1699 /* 1700 * We have to flush B_DELWRI data prior to renaming 1701 * the file. If we don't, the delayed-write buffers 1702 * can be flushed out later after the file has gone stale 1703 * under NFSV3. NFSV2 does not have this problem because 1704 * ( as far as I can tell ) it flushes dirty buffers more 1705 * often. 1706 * 1707 * Skip the rename operation if the fsync fails, this can happen 1708 * due to the server's volume being full, when we pushed out data 1709 * that was written back to our cache earlier. Not checking for 1710 * this condition can result in potential (silent) data loss. 1711 */ 1712 error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread); 1713 VOP_UNLOCK(fvp, 0); 1714 if (!error && tvp) 1715 error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread); 1716 if (error) 1717 goto out; 1718 1719 /* 1720 * If the tvp exists and is in use, sillyrename it before doing the 1721 * rename of the new file over it. 1722 * XXX Can't sillyrename a directory. 1723 */ 1724 if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && 1725 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { 1726 vput(tvp); 1727 tvp = NULL; 1728 } 1729 1730 error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen, 1731 tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, 1732 tcnp->cn_thread); 1733 1734 if (!error) { 1735 /* 1736 * For NFSv4, check to see if it is the same name and 1737 * replace the name, if it is different. 1738 */ 1739 MALLOC(newv4, struct nfsv4node *, 1740 sizeof (struct nfsv4node) + 1741 tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1, 1742 M_NFSV4NODE, M_WAITOK); 1743 mtx_lock(&tdnp->n_mtx); 1744 mtx_lock(&fnp->n_mtx); 1745 if (fnp->n_v4 != NULL && fvp->v_type == VREG && 1746 (fnp->n_v4->n4_namelen != tcnp->cn_namelen || 1747 NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4), 1748 tcnp->cn_namelen) || 1749 tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen || 1750 NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 1751 tdnp->n_fhp->nfh_len))) { 1752 #ifdef notdef 1753 { char nnn[100]; int nnnl; 1754 nnnl = (tcnp->cn_namelen < 100) ? tcnp->cn_namelen : 99; 1755 bcopy(tcnp->cn_nameptr, nnn, nnnl); 1756 nnn[nnnl] = '\0'; 1757 printf("ren replace=%s\n",nnn); 1758 } 1759 #endif 1760 FREE((caddr_t)fnp->n_v4, M_NFSV4NODE); 1761 fnp->n_v4 = newv4; 1762 newv4 = NULL; 1763 fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len; 1764 fnp->n_v4->n4_namelen = tcnp->cn_namelen; 1765 NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 1766 tdnp->n_fhp->nfh_len); 1767 NFSBCOPY(tcnp->cn_nameptr, 1768 NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen); 1769 } 1770 mtx_unlock(&tdnp->n_mtx); 1771 mtx_unlock(&fnp->n_mtx); 1772 if (newv4 != NULL) 1773 FREE((caddr_t)newv4, M_NFSV4NODE); 1774 } 1775 1776 if (fvp->v_type == VDIR) { 1777 if (tvp != NULL && tvp->v_type == VDIR) 1778 cache_purge(tdvp); 1779 cache_purge(fdvp); 1780 } 1781 1782 out: 1783 if (tdvp == tvp) 1784 vrele(tdvp); 1785 else 1786 vput(tdvp); 1787 if (tvp) 1788 vput(tvp); 1789 vrele(fdvp); 1790 vrele(fvp); 1791 /* 1792 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. 1793 */ 1794 if (error == ENOENT) 1795 error = 0; 1796 return (error); 1797 } 1798 1799 /* 1800 * nfs file rename rpc called from nfs_remove() above 1801 */ 1802 static int 1803 nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp, 1804 struct sillyrename *sp) 1805 { 1806 1807 return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen, 1808 sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred, 1809 scnp->cn_thread)); 1810 } 1811 1812 /* 1813 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). 1814 */ 1815 static int 1816 nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, 1817 int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr, 1818 int tnamelen, struct ucred *cred, struct thread *td) 1819 { 1820 struct nfsvattr fnfsva, tnfsva; 1821 struct nfsnode *fdnp = VTONFS(fdvp); 1822 struct nfsnode *tdnp = VTONFS(tdvp); 1823 int error = 0, fattrflag, tattrflag; 1824 1825 error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp, 1826 tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag, 1827 &tattrflag, NULL, NULL); 1828 mtx_lock(&fdnp->n_mtx); 1829 fdnp->n_flag |= NMODIFIED; 1830 mtx_unlock(&fdnp->n_mtx); 1831 mtx_lock(&tdnp->n_mtx); 1832 tdnp->n_flag |= NMODIFIED; 1833 mtx_unlock(&tdnp->n_mtx); 1834 if (fattrflag) 1835 (void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, NULL, 0, 1); 1836 else 1837 fdnp->n_attrstamp = 0; 1838 if (tattrflag) 1839 (void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, NULL, 0, 1); 1840 else 1841 tdnp->n_attrstamp = 0; 1842 if (error && NFS_ISV4(fdvp)) 1843 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 1844 return (error); 1845 } 1846 1847 /* 1848 * nfs hard link create call 1849 */ 1850 static int 1851 nfs_link(struct vop_link_args *ap) 1852 { 1853 struct vnode *vp = ap->a_vp; 1854 struct vnode *tdvp = ap->a_tdvp; 1855 struct componentname *cnp = ap->a_cnp; 1856 struct nfsnode *tdnp; 1857 struct nfsvattr nfsva, dnfsva; 1858 int error = 0, attrflag, dattrflag; 1859 1860 if (vp->v_mount != tdvp->v_mount) { 1861 return (EXDEV); 1862 } 1863 1864 /* 1865 * Push all writes to the server, so that the attribute cache 1866 * doesn't get "out of sync" with the server. 1867 * XXX There should be a better way! 1868 */ 1869 VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread); 1870 1871 error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen, 1872 cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &attrflag, 1873 &dattrflag, NULL); 1874 tdnp = VTONFS(tdvp); 1875 mtx_lock(&tdnp->n_mtx); 1876 tdnp->n_flag |= NMODIFIED; 1877 mtx_unlock(&tdnp->n_mtx); 1878 if (attrflag) 1879 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1880 else 1881 VTONFS(vp)->n_attrstamp = 0; 1882 if (dattrflag) 1883 (void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, NULL, 0, 1); 1884 else 1885 tdnp->n_attrstamp = 0; 1886 /* 1887 * If negative lookup caching is enabled, I might as well 1888 * add an entry for this node. Not necessary for correctness, 1889 * but if negative caching is enabled, then the system 1890 * must care about lookup caching hit rate, so... 1891 */ 1892 if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 && 1893 (cnp->cn_flags & MAKEENTRY)) 1894 cache_enter(tdvp, vp, cnp); 1895 if (error && NFS_ISV4(vp)) 1896 error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, 1897 (gid_t)0); 1898 return (error); 1899 } 1900 1901 /* 1902 * nfs symbolic link create call 1903 */ 1904 static int 1905 nfs_symlink(struct vop_symlink_args *ap) 1906 { 1907 struct vnode *dvp = ap->a_dvp; 1908 struct vattr *vap = ap->a_vap; 1909 struct componentname *cnp = ap->a_cnp; 1910 struct nfsvattr nfsva, dnfsva; 1911 struct nfsfh *nfhp; 1912 struct nfsnode *np = NULL, *dnp; 1913 struct vnode *newvp = NULL; 1914 int error = 0, attrflag, dattrflag, ret; 1915 1916 vap->va_type = VLNK; 1917 error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1918 ap->a_target, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, 1919 &nfsva, &nfhp, &attrflag, &dattrflag, NULL); 1920 if (nfhp) { 1921 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, 1922 &np, NULL); 1923 if (!ret) 1924 newvp = NFSTOV(np); 1925 else if (!error) 1926 error = ret; 1927 } 1928 if (newvp != NULL) { 1929 if (attrflag) 1930 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1931 0, 1); 1932 } else if (!error) { 1933 /* 1934 * If we do not have an error and we could not extract the 1935 * newvp from the response due to the request being NFSv2, we 1936 * have to do a lookup in order to obtain a newvp to return. 1937 */ 1938 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1939 cnp->cn_cred, cnp->cn_thread, &np); 1940 if (!error) 1941 newvp = NFSTOV(np); 1942 } 1943 if (error) { 1944 if (newvp) 1945 vput(newvp); 1946 if (NFS_ISV4(dvp)) 1947 error = nfscl_maperr(cnp->cn_thread, error, 1948 vap->va_uid, vap->va_gid); 1949 } else { 1950 /* 1951 * If negative lookup caching is enabled, I might as well 1952 * add an entry for this node. Not necessary for correctness, 1953 * but if negative caching is enabled, then the system 1954 * must care about lookup caching hit rate, so... 1955 */ 1956 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 1957 (cnp->cn_flags & MAKEENTRY)) 1958 cache_enter(dvp, newvp, cnp); 1959 *ap->a_vpp = newvp; 1960 } 1961 1962 dnp = VTONFS(dvp); 1963 mtx_lock(&dnp->n_mtx); 1964 dnp->n_flag |= NMODIFIED; 1965 mtx_unlock(&dnp->n_mtx); 1966 if (dattrflag) 1967 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1968 else 1969 dnp->n_attrstamp = 0; 1970 return (error); 1971 } 1972 1973 /* 1974 * nfs make dir call 1975 */ 1976 static int 1977 nfs_mkdir(struct vop_mkdir_args *ap) 1978 { 1979 struct vnode *dvp = ap->a_dvp; 1980 struct vattr *vap = ap->a_vap; 1981 struct componentname *cnp = ap->a_cnp; 1982 struct nfsnode *np = NULL, *dnp; 1983 struct vnode *newvp = NULL; 1984 struct vattr vattr; 1985 struct nfsfh *nfhp; 1986 struct nfsvattr nfsva, dnfsva; 1987 int error = 0, attrflag, dattrflag, ret; 1988 1989 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1990 return (error); 1991 vap->va_type = VDIR; 1992 error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1993 vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, 1994 &attrflag, &dattrflag, NULL); 1995 dnp = VTONFS(dvp); 1996 mtx_lock(&dnp->n_mtx); 1997 dnp->n_flag |= NMODIFIED; 1998 mtx_unlock(&dnp->n_mtx); 1999 if (dattrflag) 2000 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2001 else 2002 dnp->n_attrstamp = 0; 2003 if (nfhp) { 2004 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, 2005 &np, NULL); 2006 if (!ret) { 2007 newvp = NFSTOV(np); 2008 if (attrflag) 2009 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 2010 NULL, 0, 1); 2011 } else if (!error) 2012 error = ret; 2013 } 2014 if (!error && newvp == NULL) { 2015 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2016 cnp->cn_cred, cnp->cn_thread, &np); 2017 if (!error) { 2018 newvp = NFSTOV(np); 2019 if (newvp->v_type != VDIR) 2020 error = EEXIST; 2021 } 2022 } 2023 if (error) { 2024 if (newvp) 2025 vput(newvp); 2026 if (NFS_ISV4(dvp)) 2027 error = nfscl_maperr(cnp->cn_thread, error, 2028 vap->va_uid, vap->va_gid); 2029 } else { 2030 /* 2031 * If negative lookup caching is enabled, I might as well 2032 * add an entry for this node. Not necessary for correctness, 2033 * but if negative caching is enabled, then the system 2034 * must care about lookup caching hit rate, so... 2035 */ 2036 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 2037 (cnp->cn_flags & MAKEENTRY)) 2038 cache_enter(dvp, newvp, cnp); 2039 *ap->a_vpp = newvp; 2040 } 2041 return (error); 2042 } 2043 2044 /* 2045 * nfs remove directory call 2046 */ 2047 static int 2048 nfs_rmdir(struct vop_rmdir_args *ap) 2049 { 2050 struct vnode *vp = ap->a_vp; 2051 struct vnode *dvp = ap->a_dvp; 2052 struct componentname *cnp = ap->a_cnp; 2053 struct nfsnode *dnp; 2054 struct nfsvattr dnfsva; 2055 int error, dattrflag; 2056 2057 if (dvp == vp) 2058 return (EINVAL); 2059 error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2060 cnp->cn_cred, cnp->cn_thread, &dnfsva, &dattrflag, NULL); 2061 dnp = VTONFS(dvp); 2062 mtx_lock(&dnp->n_mtx); 2063 dnp->n_flag |= NMODIFIED; 2064 mtx_unlock(&dnp->n_mtx); 2065 if (dattrflag) 2066 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2067 else 2068 dnp->n_attrstamp = 0; 2069 2070 cache_purge(dvp); 2071 cache_purge(vp); 2072 if (error && NFS_ISV4(dvp)) 2073 error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, 2074 (gid_t)0); 2075 /* 2076 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. 2077 */ 2078 if (error == ENOENT) 2079 error = 0; 2080 return (error); 2081 } 2082 2083 /* 2084 * nfs readdir call 2085 */ 2086 static int 2087 nfs_readdir(struct vop_readdir_args *ap) 2088 { 2089 struct vnode *vp = ap->a_vp; 2090 struct nfsnode *np = VTONFS(vp); 2091 struct uio *uio = ap->a_uio; 2092 int tresid, error = 0; 2093 struct vattr vattr; 2094 2095 if (vp->v_type != VDIR) 2096 return(EPERM); 2097 2098 /* 2099 * First, check for hit on the EOF offset cache 2100 */ 2101 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && 2102 (np->n_flag & NMODIFIED) == 0) { 2103 if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { 2104 mtx_lock(&np->n_mtx); 2105 if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) || 2106 !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 2107 mtx_unlock(&np->n_mtx); 2108 NFSINCRGLOBAL(newnfsstats.direofcache_hits); 2109 return (0); 2110 } else 2111 mtx_unlock(&np->n_mtx); 2112 } 2113 } 2114 2115 /* 2116 * Call ncl_bioread() to do the real work. 2117 */ 2118 tresid = uio->uio_resid; 2119 error = ncl_bioread(vp, uio, 0, ap->a_cred); 2120 2121 if (!error && uio->uio_resid == tresid) 2122 NFSINCRGLOBAL(newnfsstats.direofcache_misses); 2123 return (error); 2124 } 2125 2126 /* 2127 * Readdir rpc call. 2128 * Called from below the buffer cache by ncl_doio(). 2129 */ 2130 int 2131 ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2132 struct thread *td) 2133 { 2134 struct nfsvattr nfsva; 2135 nfsuint64 *cookiep, cookie; 2136 struct nfsnode *dnp = VTONFS(vp); 2137 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2138 int error = 0, eof, attrflag; 2139 2140 #ifndef DIAGNOSTIC 2141 if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || 2142 (uiop->uio_resid & (DIRBLKSIZ - 1))) 2143 panic("nfs readdirrpc bad uio"); 2144 #endif 2145 2146 /* 2147 * If there is no cookie, assume directory was stale. 2148 */ 2149 ncl_dircookie_lock(dnp); 2150 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2151 if (cookiep) { 2152 cookie = *cookiep; 2153 ncl_dircookie_unlock(dnp); 2154 } else { 2155 ncl_dircookie_unlock(dnp); 2156 return (NFSERR_BAD_COOKIE); 2157 } 2158 2159 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2160 (void)ncl_fsinfo(nmp, vp, cred, td); 2161 2162 error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva, 2163 &attrflag, &eof, NULL); 2164 if (attrflag) 2165 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 2166 2167 if (!error) { 2168 /* 2169 * We are now either at the end of the directory or have filled 2170 * the block. 2171 */ 2172 if (eof) 2173 dnp->n_direofoffset = uiop->uio_offset; 2174 else { 2175 if (uiop->uio_resid > 0) 2176 ncl_printf("EEK! readdirrpc resid > 0\n"); 2177 ncl_dircookie_lock(dnp); 2178 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2179 *cookiep = cookie; 2180 ncl_dircookie_unlock(dnp); 2181 } 2182 } else if (NFS_ISV4(vp)) { 2183 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2184 } 2185 return (error); 2186 } 2187 2188 /* 2189 * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc(). 2190 */ 2191 int 2192 ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2193 struct thread *td) 2194 { 2195 struct nfsvattr nfsva; 2196 nfsuint64 *cookiep, cookie; 2197 struct nfsnode *dnp = VTONFS(vp); 2198 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2199 int error = 0, attrflag, eof; 2200 2201 #ifndef DIAGNOSTIC 2202 if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || 2203 (uiop->uio_resid & (DIRBLKSIZ - 1))) 2204 panic("nfs readdirplusrpc bad uio"); 2205 #endif 2206 2207 /* 2208 * If there is no cookie, assume directory was stale. 2209 */ 2210 ncl_dircookie_lock(dnp); 2211 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2212 if (cookiep) { 2213 cookie = *cookiep; 2214 ncl_dircookie_unlock(dnp); 2215 } else { 2216 ncl_dircookie_unlock(dnp); 2217 return (NFSERR_BAD_COOKIE); 2218 } 2219 2220 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2221 (void)ncl_fsinfo(nmp, vp, cred, td); 2222 error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva, 2223 &attrflag, &eof, NULL); 2224 if (attrflag) 2225 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 2226 2227 if (!error) { 2228 /* 2229 * We are now either at end of the directory or have filled the 2230 * the block. 2231 */ 2232 if (eof) 2233 dnp->n_direofoffset = uiop->uio_offset; 2234 else { 2235 if (uiop->uio_resid > 0) 2236 ncl_printf("EEK! readdirplusrpc resid > 0\n"); 2237 ncl_dircookie_lock(dnp); 2238 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2239 *cookiep = cookie; 2240 ncl_dircookie_unlock(dnp); 2241 } 2242 } else if (NFS_ISV4(vp)) { 2243 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2244 } 2245 return (error); 2246 } 2247 2248 /* 2249 * Silly rename. To make the NFS filesystem that is stateless look a little 2250 * more like the "ufs" a remove of an active vnode is translated to a rename 2251 * to a funny looking filename that is removed by nfs_inactive on the 2252 * nfsnode. There is the potential for another process on a different client 2253 * to create the same funny name between the nfs_lookitup() fails and the 2254 * nfs_rename() completes, but... 2255 */ 2256 static int 2257 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 2258 { 2259 struct sillyrename *sp; 2260 struct nfsnode *np; 2261 int error; 2262 short pid; 2263 unsigned int lticks; 2264 2265 cache_purge(dvp); 2266 np = VTONFS(vp); 2267 #ifndef DIAGNOSTIC 2268 if (vp->v_type == VDIR) 2269 panic("nfs: sillyrename dir"); 2270 #endif 2271 MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename), 2272 M_NEWNFSREQ, M_WAITOK); 2273 sp->s_cred = crhold(cnp->cn_cred); 2274 sp->s_dvp = dvp; 2275 VREF(dvp); 2276 2277 /* 2278 * Fudge together a funny name. 2279 * Changing the format of the funny name to accomodate more 2280 * sillynames per directory. 2281 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 2282 * CPU ticks since boot. 2283 */ 2284 pid = cnp->cn_thread->td_proc->p_pid; 2285 lticks = (unsigned int)ticks; 2286 for ( ; ; ) { 2287 sp->s_namlen = sprintf(sp->s_name, 2288 ".nfs.%08x.%04x4.4", lticks, 2289 pid); 2290 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2291 cnp->cn_thread, NULL)) 2292 break; 2293 lticks++; 2294 } 2295 error = nfs_renameit(dvp, vp, cnp, sp); 2296 if (error) 2297 goto bad; 2298 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2299 cnp->cn_thread, &np); 2300 np->n_sillyrename = sp; 2301 return (0); 2302 bad: 2303 vrele(sp->s_dvp); 2304 crfree(sp->s_cred); 2305 free((caddr_t)sp, M_NEWNFSREQ); 2306 return (error); 2307 } 2308 2309 /* 2310 * Look up a file name and optionally either update the file handle or 2311 * allocate an nfsnode, depending on the value of npp. 2312 * npp == NULL --> just do the lookup 2313 * *npp == NULL --> allocate a new nfsnode and make sure attributes are 2314 * handled too 2315 * *npp != NULL --> update the file handle in the vnode 2316 */ 2317 static int 2318 nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred, 2319 struct thread *td, struct nfsnode **npp) 2320 { 2321 struct vnode *newvp = NULL, *vp; 2322 struct nfsnode *np, *dnp = VTONFS(dvp); 2323 struct nfsfh *nfhp, *onfhp; 2324 struct nfsvattr nfsva, dnfsva; 2325 struct componentname cn; 2326 int error = 0, attrflag, dattrflag; 2327 u_int hash; 2328 2329 error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva, 2330 &nfhp, &attrflag, &dattrflag, NULL); 2331 if (dattrflag) 2332 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2333 if (npp && !error) { 2334 if (*npp != NULL) { 2335 np = *npp; 2336 vp = NFSTOV(np); 2337 /* 2338 * For NFSv4, check to see if it is the same name and 2339 * replace the name, if it is different. 2340 */ 2341 if (np->n_v4 != NULL && nfsva.na_type == VREG && 2342 (np->n_v4->n4_namelen != len || 2343 NFSBCMP(name, NFS4NODENAME(np->n_v4), len) || 2344 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || 2345 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2346 dnp->n_fhp->nfh_len))) { 2347 #ifdef notdef 2348 { char nnn[100]; int nnnl; 2349 nnnl = (len < 100) ? len : 99; 2350 bcopy(name, nnn, nnnl); 2351 nnn[nnnl] = '\0'; 2352 printf("replace=%s\n",nnn); 2353 } 2354 #endif 2355 FREE((caddr_t)np->n_v4, M_NFSV4NODE); 2356 MALLOC(np->n_v4, struct nfsv4node *, 2357 sizeof (struct nfsv4node) + 2358 dnp->n_fhp->nfh_len + len - 1, 2359 M_NFSV4NODE, M_WAITOK); 2360 np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; 2361 np->n_v4->n4_namelen = len; 2362 NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2363 dnp->n_fhp->nfh_len); 2364 NFSBCOPY(name, NFS4NODENAME(np->n_v4), len); 2365 } 2366 hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, 2367 FNV1_32_INIT); 2368 onfhp = np->n_fhp; 2369 /* 2370 * Rehash node for new file handle. 2371 */ 2372 vfs_hash_rehash(vp, hash); 2373 np->n_fhp = nfhp; 2374 if (onfhp != NULL) 2375 FREE((caddr_t)onfhp, M_NFSFH); 2376 newvp = NFSTOV(np); 2377 } else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) { 2378 FREE((caddr_t)nfhp, M_NFSFH); 2379 VREF(dvp); 2380 newvp = dvp; 2381 } else { 2382 cn.cn_nameptr = name; 2383 cn.cn_namelen = len; 2384 error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td, 2385 &np, NULL); 2386 if (error) 2387 return (error); 2388 newvp = NFSTOV(np); 2389 } 2390 if (!attrflag && *npp == NULL) { 2391 vrele(newvp); 2392 return (ENOENT); 2393 } 2394 if (attrflag) 2395 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 2396 0, 1); 2397 } 2398 if (npp && *npp == NULL) { 2399 if (error) { 2400 if (newvp) { 2401 if (newvp == dvp) 2402 vrele(newvp); 2403 else 2404 vput(newvp); 2405 } 2406 } else 2407 *npp = np; 2408 } 2409 if (error && NFS_ISV4(dvp)) 2410 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2411 return (error); 2412 } 2413 2414 /* 2415 * Nfs Version 3 and 4 commit rpc 2416 */ 2417 int 2418 ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, 2419 struct thread *td) 2420 { 2421 struct nfsvattr nfsva; 2422 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2423 int error, attrflag; 2424 u_char verf[NFSX_VERF]; 2425 2426 mtx_lock(&nmp->nm_mtx); 2427 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { 2428 mtx_unlock(&nmp->nm_mtx); 2429 return (0); 2430 } 2431 mtx_unlock(&nmp->nm_mtx); 2432 error = nfsrpc_commit(vp, offset, cnt, cred, td, verf, &nfsva, 2433 &attrflag, NULL); 2434 if (!error) { 2435 if (NFSBCMP((caddr_t)nmp->nm_verf, verf, NFSX_VERF)) { 2436 NFSBCOPY(verf, (caddr_t)nmp->nm_verf, NFSX_VERF); 2437 error = NFSERR_STALEWRITEVERF; 2438 } 2439 if (!error && attrflag) 2440 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 2441 0, 1); 2442 } else if (NFS_ISV4(vp)) { 2443 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2444 } 2445 return (error); 2446 } 2447 2448 /* 2449 * Strategy routine. 2450 * For async requests when nfsiod(s) are running, queue the request by 2451 * calling ncl_asyncio(), otherwise just all ncl_doio() to do the 2452 * request. 2453 */ 2454 static int 2455 nfs_strategy(struct vop_strategy_args *ap) 2456 { 2457 struct buf *bp = ap->a_bp; 2458 struct ucred *cr; 2459 2460 KASSERT(!(bp->b_flags & B_DONE), 2461 ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); 2462 BUF_ASSERT_HELD(bp); 2463 2464 if (bp->b_iocmd == BIO_READ) 2465 cr = bp->b_rcred; 2466 else 2467 cr = bp->b_wcred; 2468 2469 /* 2470 * If the op is asynchronous and an i/o daemon is waiting 2471 * queue the request, wake it up and wait for completion 2472 * otherwise just do it ourselves. 2473 */ 2474 if ((bp->b_flags & B_ASYNC) == 0 || 2475 ncl_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread)) 2476 (void) ncl_doio(ap->a_vp, bp, cr, curthread, 1); 2477 return (0); 2478 } 2479 2480 /* 2481 * fsync vnode op. Just call ncl_flush() with commit == 1. 2482 */ 2483 /* ARGSUSED */ 2484 static int 2485 nfs_fsync(struct vop_fsync_args *ap) 2486 { 2487 return (ncl_flush(ap->a_vp, ap->a_waitfor, NULL, ap->a_td, 1, 0)); 2488 } 2489 2490 /* 2491 * Flush all the blocks associated with a vnode. 2492 * Walk through the buffer pool and push any dirty pages 2493 * associated with the vnode. 2494 * If the called_from_renewthread argument is TRUE, it has been called 2495 * from the NFSv4 renew thread and, as such, cannot block indefinitely 2496 * waiting for a buffer write to complete. 2497 */ 2498 int 2499 ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td, 2500 int commit, int called_from_renewthread) 2501 { 2502 struct nfsnode *np = VTONFS(vp); 2503 struct buf *bp; 2504 int i; 2505 struct buf *nbp; 2506 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2507 int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; 2508 int passone = 1, trycnt = 0; 2509 u_quad_t off, endoff, toff; 2510 struct ucred* wcred = NULL; 2511 struct buf **bvec = NULL; 2512 struct bufobj *bo; 2513 #ifndef NFS_COMMITBVECSIZ 2514 #define NFS_COMMITBVECSIZ 20 2515 #endif 2516 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; 2517 int bvecsize = 0, bveccount; 2518 2519 if (called_from_renewthread != 0) 2520 slptimeo = hz; 2521 if (nmp->nm_flag & NFSMNT_INT) 2522 slpflag = NFS_PCATCH; 2523 if (!commit) 2524 passone = 0; 2525 bo = &vp->v_bufobj; 2526 /* 2527 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the 2528 * server, but has not been committed to stable storage on the server 2529 * yet. On the first pass, the byte range is worked out and the commit 2530 * rpc is done. On the second pass, ncl_writebp() is called to do the 2531 * job. 2532 */ 2533 again: 2534 off = (u_quad_t)-1; 2535 endoff = 0; 2536 bvecpos = 0; 2537 if (NFS_ISV34(vp) && commit) { 2538 if (bvec != NULL && bvec != bvec_on_stack) 2539 free(bvec, M_TEMP); 2540 /* 2541 * Count up how many buffers waiting for a commit. 2542 */ 2543 bveccount = 0; 2544 BO_LOCK(bo); 2545 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2546 if (!BUF_ISLOCKED(bp) && 2547 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 2548 == (B_DELWRI | B_NEEDCOMMIT)) 2549 bveccount++; 2550 } 2551 /* 2552 * Allocate space to remember the list of bufs to commit. It is 2553 * important to use M_NOWAIT here to avoid a race with nfs_write. 2554 * If we can't get memory (for whatever reason), we will end up 2555 * committing the buffers one-by-one in the loop below. 2556 */ 2557 if (bveccount > NFS_COMMITBVECSIZ) { 2558 /* 2559 * Release the vnode interlock to avoid a lock 2560 * order reversal. 2561 */ 2562 BO_UNLOCK(bo); 2563 bvec = (struct buf **) 2564 malloc(bveccount * sizeof(struct buf *), 2565 M_TEMP, M_NOWAIT); 2566 BO_LOCK(bo); 2567 if (bvec == NULL) { 2568 bvec = bvec_on_stack; 2569 bvecsize = NFS_COMMITBVECSIZ; 2570 } else 2571 bvecsize = bveccount; 2572 } else { 2573 bvec = bvec_on_stack; 2574 bvecsize = NFS_COMMITBVECSIZ; 2575 } 2576 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2577 if (bvecpos >= bvecsize) 2578 break; 2579 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 2580 nbp = TAILQ_NEXT(bp, b_bobufs); 2581 continue; 2582 } 2583 if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != 2584 (B_DELWRI | B_NEEDCOMMIT)) { 2585 BUF_UNLOCK(bp); 2586 nbp = TAILQ_NEXT(bp, b_bobufs); 2587 continue; 2588 } 2589 BO_UNLOCK(bo); 2590 bremfree(bp); 2591 /* 2592 * Work out if all buffers are using the same cred 2593 * so we can deal with them all with one commit. 2594 * 2595 * NOTE: we are not clearing B_DONE here, so we have 2596 * to do it later on in this routine if we intend to 2597 * initiate I/O on the bp. 2598 * 2599 * Note: to avoid loopback deadlocks, we do not 2600 * assign b_runningbufspace. 2601 */ 2602 if (wcred == NULL) 2603 wcred = bp->b_wcred; 2604 else if (wcred != bp->b_wcred) 2605 wcred = NOCRED; 2606 vfs_busy_pages(bp, 1); 2607 2608 BO_LOCK(bo); 2609 /* 2610 * bp is protected by being locked, but nbp is not 2611 * and vfs_busy_pages() may sleep. We have to 2612 * recalculate nbp. 2613 */ 2614 nbp = TAILQ_NEXT(bp, b_bobufs); 2615 2616 /* 2617 * A list of these buffers is kept so that the 2618 * second loop knows which buffers have actually 2619 * been committed. This is necessary, since there 2620 * may be a race between the commit rpc and new 2621 * uncommitted writes on the file. 2622 */ 2623 bvec[bvecpos++] = bp; 2624 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 2625 bp->b_dirtyoff; 2626 if (toff < off) 2627 off = toff; 2628 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); 2629 if (toff > endoff) 2630 endoff = toff; 2631 } 2632 BO_UNLOCK(bo); 2633 } 2634 if (bvecpos > 0) { 2635 /* 2636 * Commit data on the server, as required. 2637 * If all bufs are using the same wcred, then use that with 2638 * one call for all of them, otherwise commit each one 2639 * separately. 2640 */ 2641 if (wcred != NOCRED) 2642 retv = ncl_commit(vp, off, (int)(endoff - off), 2643 wcred, td); 2644 else { 2645 retv = 0; 2646 for (i = 0; i < bvecpos; i++) { 2647 off_t off, size; 2648 bp = bvec[i]; 2649 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 2650 bp->b_dirtyoff; 2651 size = (u_quad_t)(bp->b_dirtyend 2652 - bp->b_dirtyoff); 2653 retv = ncl_commit(vp, off, (int)size, 2654 bp->b_wcred, td); 2655 if (retv) break; 2656 } 2657 } 2658 2659 if (retv == NFSERR_STALEWRITEVERF) 2660 ncl_clearcommit(vp->v_mount); 2661 2662 /* 2663 * Now, either mark the blocks I/O done or mark the 2664 * blocks dirty, depending on whether the commit 2665 * succeeded. 2666 */ 2667 for (i = 0; i < bvecpos; i++) { 2668 bp = bvec[i]; 2669 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 2670 if (retv) { 2671 /* 2672 * Error, leave B_DELWRI intact 2673 */ 2674 vfs_unbusy_pages(bp); 2675 brelse(bp); 2676 } else { 2677 /* 2678 * Success, remove B_DELWRI ( bundirty() ). 2679 * 2680 * b_dirtyoff/b_dirtyend seem to be NFS 2681 * specific. We should probably move that 2682 * into bundirty(). XXX 2683 */ 2684 bufobj_wref(bo); 2685 bp->b_flags |= B_ASYNC; 2686 bundirty(bp); 2687 bp->b_flags &= ~B_DONE; 2688 bp->b_ioflags &= ~BIO_ERROR; 2689 bp->b_dirtyoff = bp->b_dirtyend = 0; 2690 bufdone(bp); 2691 } 2692 } 2693 } 2694 2695 /* 2696 * Start/do any write(s) that are required. 2697 */ 2698 loop: 2699 BO_LOCK(bo); 2700 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2701 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 2702 if (waitfor != MNT_WAIT || passone) 2703 continue; 2704 2705 error = BUF_TIMELOCK(bp, 2706 LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, 2707 BO_MTX(bo), "nfsfsync", slpflag, slptimeo); 2708 if (error == 0) { 2709 BUF_UNLOCK(bp); 2710 goto loop; 2711 } 2712 if (error == ENOLCK) { 2713 error = 0; 2714 goto loop; 2715 } 2716 if (called_from_renewthread != 0) { 2717 /* 2718 * Return EIO so the flush will be retried 2719 * later. 2720 */ 2721 error = EIO; 2722 goto done; 2723 } 2724 if (newnfs_sigintr(nmp, td)) { 2725 error = EINTR; 2726 goto done; 2727 } 2728 if (slpflag & PCATCH) { 2729 slpflag = 0; 2730 slptimeo = 2 * hz; 2731 } 2732 goto loop; 2733 } 2734 if ((bp->b_flags & B_DELWRI) == 0) 2735 panic("nfs_fsync: not dirty"); 2736 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { 2737 BUF_UNLOCK(bp); 2738 continue; 2739 } 2740 BO_UNLOCK(bo); 2741 bremfree(bp); 2742 if (passone || !commit) 2743 bp->b_flags |= B_ASYNC; 2744 else 2745 bp->b_flags |= B_ASYNC; 2746 bwrite(bp); 2747 if (newnfs_sigintr(nmp, td)) { 2748 error = EINTR; 2749 goto done; 2750 } 2751 goto loop; 2752 } 2753 if (passone) { 2754 passone = 0; 2755 BO_UNLOCK(bo); 2756 goto again; 2757 } 2758 if (waitfor == MNT_WAIT) { 2759 while (bo->bo_numoutput) { 2760 error = bufobj_wwait(bo, slpflag, slptimeo); 2761 if (error) { 2762 BO_UNLOCK(bo); 2763 if (called_from_renewthread != 0) { 2764 /* 2765 * Return EIO so that the flush will be 2766 * retried later. 2767 */ 2768 error = EIO; 2769 goto done; 2770 } 2771 error = newnfs_sigintr(nmp, td); 2772 if (error) 2773 goto done; 2774 if (slpflag & PCATCH) { 2775 slpflag = 0; 2776 slptimeo = 2 * hz; 2777 } 2778 BO_LOCK(bo); 2779 } 2780 } 2781 if (bo->bo_dirty.bv_cnt != 0 && commit) { 2782 BO_UNLOCK(bo); 2783 goto loop; 2784 } 2785 /* 2786 * Wait for all the async IO requests to drain 2787 */ 2788 BO_UNLOCK(bo); 2789 mtx_lock(&np->n_mtx); 2790 while (np->n_directio_asyncwr > 0) { 2791 np->n_flag |= NFSYNCWAIT; 2792 error = newnfs_msleep(td, &np->n_directio_asyncwr, 2793 &np->n_mtx, slpflag | (PRIBIO + 1), 2794 "nfsfsync", 0); 2795 if (error) { 2796 if (newnfs_sigintr(nmp, td)) { 2797 mtx_unlock(&np->n_mtx); 2798 error = EINTR; 2799 goto done; 2800 } 2801 } 2802 } 2803 mtx_unlock(&np->n_mtx); 2804 } else 2805 BO_UNLOCK(bo); 2806 mtx_lock(&np->n_mtx); 2807 if (np->n_flag & NWRITEERR) { 2808 error = np->n_error; 2809 np->n_flag &= ~NWRITEERR; 2810 } 2811 if (commit && bo->bo_dirty.bv_cnt == 0 && 2812 bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) 2813 np->n_flag &= ~NMODIFIED; 2814 mtx_unlock(&np->n_mtx); 2815 done: 2816 if (bvec != NULL && bvec != bvec_on_stack) 2817 free(bvec, M_TEMP); 2818 if (error == 0 && commit != 0 && waitfor == MNT_WAIT && 2819 (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 || 2820 np->n_directio_asyncwr != 0) && trycnt++ < 5) { 2821 /* try, try again... */ 2822 passone = 1; 2823 wcred = NULL; 2824 bvec = NULL; 2825 bvecsize = 0; 2826 printf("try%d\n", trycnt); 2827 goto again; 2828 } 2829 return (error); 2830 } 2831 2832 /* 2833 * NFS advisory byte-level locks. 2834 */ 2835 static int 2836 nfs_advlock(struct vop_advlock_args *ap) 2837 { 2838 struct vnode *vp = ap->a_vp; 2839 struct ucred *cred; 2840 struct nfsnode *np = VTONFS(ap->a_vp); 2841 struct proc *p = (struct proc *)ap->a_id; 2842 struct thread *td = curthread; /* XXX */ 2843 struct vattr va; 2844 int ret, error = EOPNOTSUPP; 2845 u_quad_t size; 2846 2847 if (NFS_ISV4(vp) && (ap->a_flags & F_POSIX)) { 2848 cred = p->p_ucred; 2849 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2850 if (vp->v_iflag & VI_DOOMED) { 2851 VOP_UNLOCK(vp, 0); 2852 return (EBADF); 2853 } 2854 2855 /* 2856 * If this is unlocking a write locked region, flush and 2857 * commit them before unlocking. This is required by 2858 * RFC3530 Sec. 9.3.2. 2859 */ 2860 if (ap->a_op == F_UNLCK && 2861 nfscl_checkwritelocked(vp, ap->a_fl, cred, td)) 2862 (void) ncl_flush(vp, MNT_WAIT, cred, td, 1, 0); 2863 2864 /* 2865 * Loop around doing the lock op, while a blocking lock 2866 * must wait for the lock op to succeed. 2867 */ 2868 do { 2869 ret = nfsrpc_advlock(vp, np->n_size, ap->a_op, 2870 ap->a_fl, 0, cred, td); 2871 if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 2872 ap->a_op == F_SETLK) { 2873 VOP_UNLOCK(vp, 0); 2874 error = nfs_catnap(PZERO | PCATCH, ret, 2875 "ncladvl"); 2876 if (error) 2877 return (EINTR); 2878 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2879 if (vp->v_iflag & VI_DOOMED) { 2880 VOP_UNLOCK(vp, 0); 2881 return (EBADF); 2882 } 2883 } 2884 } while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 2885 ap->a_op == F_SETLK); 2886 if (ret == NFSERR_DENIED) { 2887 VOP_UNLOCK(vp, 0); 2888 return (EAGAIN); 2889 } else if (ret == EINVAL || ret == EBADF || ret == EINTR) { 2890 VOP_UNLOCK(vp, 0); 2891 return (ret); 2892 } else if (ret != 0) { 2893 VOP_UNLOCK(vp, 0); 2894 return (EACCES); 2895 } 2896 2897 /* 2898 * Now, if we just got a lock, invalidate data in the buffer 2899 * cache, as required, so that the coherency conforms with 2900 * RFC3530 Sec. 9.3.2. 2901 */ 2902 if (ap->a_op == F_SETLK) { 2903 if ((np->n_flag & NMODIFIED) == 0) { 2904 np->n_attrstamp = 0; 2905 ret = VOP_GETATTR(vp, &va, cred); 2906 } 2907 if ((np->n_flag & NMODIFIED) || ret || 2908 np->n_change != va.va_filerev) { 2909 (void) ncl_vinvalbuf(vp, V_SAVE, td, 1); 2910 np->n_attrstamp = 0; 2911 ret = VOP_GETATTR(vp, &va, cred); 2912 if (!ret) { 2913 np->n_mtime = va.va_mtime; 2914 np->n_change = va.va_filerev; 2915 } 2916 } 2917 } 2918 VOP_UNLOCK(vp, 0); 2919 return (0); 2920 } else if (!NFS_ISV4(vp)) { 2921 error = vn_lock(vp, LK_SHARED); 2922 if (error) 2923 return (error); 2924 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 2925 size = VTONFS(vp)->n_size; 2926 VOP_UNLOCK(vp, 0); 2927 error = lf_advlock(ap, &(vp->v_lockf), size); 2928 } else { 2929 if (ncl_advlock_p) 2930 error = ncl_advlock_p(ap); 2931 else 2932 error = ENOLCK; 2933 } 2934 } 2935 return (error); 2936 } 2937 2938 /* 2939 * NFS advisory byte-level locks. 2940 */ 2941 static int 2942 nfs_advlockasync(struct vop_advlockasync_args *ap) 2943 { 2944 struct vnode *vp = ap->a_vp; 2945 u_quad_t size; 2946 int error; 2947 2948 if (NFS_ISV4(vp)) 2949 return (EOPNOTSUPP); 2950 error = vn_lock(vp, LK_SHARED); 2951 if (error) 2952 return (error); 2953 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 2954 size = VTONFS(vp)->n_size; 2955 VOP_UNLOCK(vp, 0); 2956 error = lf_advlockasync(ap, &(vp->v_lockf), size); 2957 } else { 2958 VOP_UNLOCK(vp, 0); 2959 error = EOPNOTSUPP; 2960 } 2961 return (error); 2962 } 2963 2964 /* 2965 * Print out the contents of an nfsnode. 2966 */ 2967 static int 2968 nfs_print(struct vop_print_args *ap) 2969 { 2970 struct vnode *vp = ap->a_vp; 2971 struct nfsnode *np = VTONFS(vp); 2972 2973 ncl_printf("\tfileid %ld fsid 0x%x", 2974 np->n_vattr.na_fileid, np->n_vattr.na_fsid); 2975 if (vp->v_type == VFIFO) 2976 fifo_printinfo(vp); 2977 printf("\n"); 2978 return (0); 2979 } 2980 2981 /* 2982 * This is the "real" nfs::bwrite(struct buf*). 2983 * We set B_CACHE if this is a VMIO buffer. 2984 */ 2985 int 2986 ncl_writebp(struct buf *bp, int force __unused, struct thread *td) 2987 { 2988 int s; 2989 int oldflags = bp->b_flags; 2990 #if 0 2991 int retv = 1; 2992 off_t off; 2993 #endif 2994 2995 BUF_ASSERT_HELD(bp); 2996 2997 if (bp->b_flags & B_INVAL) { 2998 brelse(bp); 2999 return(0); 3000 } 3001 3002 bp->b_flags |= B_CACHE; 3003 3004 /* 3005 * Undirty the bp. We will redirty it later if the I/O fails. 3006 */ 3007 3008 s = splbio(); 3009 bundirty(bp); 3010 bp->b_flags &= ~B_DONE; 3011 bp->b_ioflags &= ~BIO_ERROR; 3012 bp->b_iocmd = BIO_WRITE; 3013 3014 bufobj_wref(bp->b_bufobj); 3015 curthread->td_ru.ru_oublock++; 3016 splx(s); 3017 3018 /* 3019 * Note: to avoid loopback deadlocks, we do not 3020 * assign b_runningbufspace. 3021 */ 3022 vfs_busy_pages(bp, 1); 3023 3024 BUF_KERNPROC(bp); 3025 bp->b_iooffset = dbtob(bp->b_blkno); 3026 bstrategy(bp); 3027 3028 if( (oldflags & B_ASYNC) == 0) { 3029 int rtval = bufwait(bp); 3030 3031 if (oldflags & B_DELWRI) { 3032 s = splbio(); 3033 reassignbuf(bp); 3034 splx(s); 3035 } 3036 brelse(bp); 3037 return (rtval); 3038 } 3039 3040 return (0); 3041 } 3042 3043 /* 3044 * nfs special file access vnode op. 3045 * Essentially just get vattr and then imitate iaccess() since the device is 3046 * local to the client. 3047 */ 3048 static int 3049 nfsspec_access(struct vop_access_args *ap) 3050 { 3051 struct vattr *vap; 3052 struct ucred *cred = ap->a_cred; 3053 struct vnode *vp = ap->a_vp; 3054 accmode_t accmode = ap->a_accmode; 3055 struct vattr vattr; 3056 int error; 3057 3058 /* 3059 * Disallow write attempts on filesystems mounted read-only; 3060 * unless the file is a socket, fifo, or a block or character 3061 * device resident on the filesystem. 3062 */ 3063 if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 3064 switch (vp->v_type) { 3065 case VREG: 3066 case VDIR: 3067 case VLNK: 3068 return (EROFS); 3069 default: 3070 break; 3071 } 3072 } 3073 vap = &vattr; 3074 error = VOP_GETATTR(vp, vap, cred); 3075 if (error) 3076 goto out; 3077 error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, 3078 accmode, cred, NULL); 3079 out: 3080 return error; 3081 } 3082 3083 /* 3084 * Read wrapper for fifos. 3085 */ 3086 static int 3087 nfsfifo_read(struct vop_read_args *ap) 3088 { 3089 struct nfsnode *np = VTONFS(ap->a_vp); 3090 int error; 3091 3092 /* 3093 * Set access flag. 3094 */ 3095 mtx_lock(&np->n_mtx); 3096 np->n_flag |= NACC; 3097 getnanotime(&np->n_atim); 3098 mtx_unlock(&np->n_mtx); 3099 error = fifo_specops.vop_read(ap); 3100 return error; 3101 } 3102 3103 /* 3104 * Write wrapper for fifos. 3105 */ 3106 static int 3107 nfsfifo_write(struct vop_write_args *ap) 3108 { 3109 struct nfsnode *np = VTONFS(ap->a_vp); 3110 3111 /* 3112 * Set update flag. 3113 */ 3114 mtx_lock(&np->n_mtx); 3115 np->n_flag |= NUPD; 3116 getnanotime(&np->n_mtim); 3117 mtx_unlock(&np->n_mtx); 3118 return(fifo_specops.vop_write(ap)); 3119 } 3120 3121 /* 3122 * Close wrapper for fifos. 3123 * 3124 * Update the times on the nfsnode then do fifo close. 3125 */ 3126 static int 3127 nfsfifo_close(struct vop_close_args *ap) 3128 { 3129 struct vnode *vp = ap->a_vp; 3130 struct nfsnode *np = VTONFS(vp); 3131 struct vattr vattr; 3132 struct timespec ts; 3133 3134 mtx_lock(&np->n_mtx); 3135 if (np->n_flag & (NACC | NUPD)) { 3136 getnanotime(&ts); 3137 if (np->n_flag & NACC) 3138 np->n_atim = ts; 3139 if (np->n_flag & NUPD) 3140 np->n_mtim = ts; 3141 np->n_flag |= NCHG; 3142 if (vrefcnt(vp) == 1 && 3143 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 3144 VATTR_NULL(&vattr); 3145 if (np->n_flag & NACC) 3146 vattr.va_atime = np->n_atim; 3147 if (np->n_flag & NUPD) 3148 vattr.va_mtime = np->n_mtim; 3149 mtx_unlock(&np->n_mtx); 3150 (void)VOP_SETATTR(vp, &vattr, ap->a_cred); 3151 goto out; 3152 } 3153 } 3154 mtx_unlock(&np->n_mtx); 3155 out: 3156 return (fifo_specops.vop_close(ap)); 3157 } 3158 3159 /* 3160 * Just call ncl_writebp() with the force argument set to 1. 3161 * 3162 * NOTE: B_DONE may or may not be set in a_bp on call. 3163 */ 3164 static int 3165 nfs_bwrite(struct buf *bp) 3166 { 3167 3168 return (ncl_writebp(bp, 1, curthread)); 3169 } 3170 3171 struct buf_ops buf_ops_newnfs = { 3172 .bop_name = "buf_ops_nfs", 3173 .bop_write = nfs_bwrite, 3174 .bop_strategy = bufstrategy, 3175 .bop_sync = bufsync, 3176 .bop_bdflush = bufbdflush, 3177 }; 3178 3179 /* 3180 * Cloned from vop_stdlock(), and then the ugly hack added. 3181 */ 3182 static int 3183 nfs_lock1(struct vop_lock1_args *ap) 3184 { 3185 struct vnode *vp = ap->a_vp; 3186 int error = 0; 3187 3188 /* 3189 * Since vfs_hash_get() calls vget() and it will no longer work 3190 * for FreeBSD8 with flags == 0, I can only think of this horrible 3191 * hack to work around it. I call vfs_hash_get() with LK_EXCLOTHER 3192 * and then handle it here. All I want for this case is a v_usecount 3193 * on the vnode to use for recovery, while another thread might 3194 * hold a lock on the vnode. I have the other threads blocked, so 3195 * there isn't any race problem. 3196 */ 3197 if ((ap->a_flags & LK_TYPE_MASK) == LK_EXCLOTHER) { 3198 if ((ap->a_flags & LK_INTERLOCK) == 0) 3199 panic("ncllock1"); 3200 if ((vp->v_iflag & VI_DOOMED)) 3201 error = ENOENT; 3202 VI_UNLOCK(vp); 3203 return (error); 3204 } 3205 return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp), 3206 LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file, 3207 ap->a_line)); 3208 } 3209 3210 static int 3211 nfs_getacl(struct vop_getacl_args *ap) 3212 { 3213 int error; 3214 3215 if (ap->a_type != ACL_TYPE_NFS4) 3216 return (EOPNOTSUPP); 3217 error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, 3218 NULL); 3219 if (error > NFSERR_STALE) { 3220 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3221 error = EPERM; 3222 } 3223 return (error); 3224 } 3225 3226 static int 3227 nfs_setacl(struct vop_setacl_args *ap) 3228 { 3229 int error; 3230 3231 if (ap->a_type != ACL_TYPE_NFS4) 3232 return (EOPNOTSUPP); 3233 error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, 3234 NULL); 3235 if (error > NFSERR_STALE) { 3236 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3237 error = EPERM; 3238 } 3239 return (error); 3240 } 3241