1 /*- 2 * Copyright (c) 2002, 2003 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * from: @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 60 * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ... 61 * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __FBSDID("$FreeBSD$"); 66 67 #include <sys/param.h> 68 #include <sys/bio.h> 69 #include <sys/systm.h> 70 #include <sys/buf.h> 71 #include <sys/conf.h> 72 #include <sys/extattr.h> 73 #include <sys/kernel.h> 74 #include <sys/limits.h> 75 #include <sys/malloc.h> 76 #include <sys/mount.h> 77 #include <sys/priv.h> 78 #include <sys/proc.h> 79 #include <sys/resourcevar.h> 80 #include <sys/signalvar.h> 81 #include <sys/stat.h> 82 #include <sys/vmmeter.h> 83 #include <sys/vnode.h> 84 85 #include <vm/vm.h> 86 #include <vm/vm_extern.h> 87 #include <vm/vm_object.h> 88 #include <vm/vm_page.h> 89 #include <vm/vm_pager.h> 90 #include <vm/vnode_pager.h> 91 92 #include <ufs/ufs/extattr.h> 93 #include <ufs/ufs/quota.h> 94 #include <ufs/ufs/inode.h> 95 #include <ufs/ufs/ufs_extern.h> 96 #include <ufs/ufs/ufsmount.h> 97 98 #include <ufs/ffs/fs.h> 99 #include <ufs/ffs/ffs_extern.h> 100 #include "opt_directio.h" 101 #include "opt_ffs.h" 102 103 #ifdef DIRECTIO 104 extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone); 105 #endif 106 static vop_fsync_t ffs_fsync; 107 static vop_lock1_t ffs_lock; 108 static vop_getpages_t ffs_getpages; 109 static vop_read_t ffs_read; 110 static vop_write_t ffs_write; 111 static int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag); 112 static int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, 113 struct ucred *cred); 114 static vop_strategy_t ffsext_strategy; 115 static vop_closeextattr_t ffs_closeextattr; 116 static vop_deleteextattr_t ffs_deleteextattr; 117 static vop_getextattr_t ffs_getextattr; 118 static vop_listextattr_t ffs_listextattr; 119 static vop_openextattr_t ffs_openextattr; 120 static vop_setextattr_t ffs_setextattr; 121 static vop_vptofh_t ffs_vptofh; 122 123 124 /* Global vfs data structures for ufs. */ 125 struct vop_vector ffs_vnodeops1 = { 126 .vop_default = &ufs_vnodeops, 127 .vop_fsync = ffs_fsync, 128 .vop_getpages = ffs_getpages, 129 .vop_lock1 = ffs_lock, 130 .vop_read = ffs_read, 131 .vop_reallocblks = ffs_reallocblks, 132 .vop_write = ffs_write, 133 .vop_vptofh = ffs_vptofh, 134 }; 135 136 struct vop_vector ffs_fifoops1 = { 137 .vop_default = &ufs_fifoops, 138 .vop_fsync = ffs_fsync, 139 .vop_reallocblks = ffs_reallocblks, /* XXX: really ??? */ 140 .vop_vptofh = ffs_vptofh, 141 }; 142 143 /* Global vfs data structures for ufs. */ 144 struct vop_vector ffs_vnodeops2 = { 145 .vop_default = &ufs_vnodeops, 146 .vop_fsync = ffs_fsync, 147 .vop_getpages = ffs_getpages, 148 .vop_lock1 = ffs_lock, 149 .vop_read = ffs_read, 150 .vop_reallocblks = ffs_reallocblks, 151 .vop_write = ffs_write, 152 .vop_closeextattr = ffs_closeextattr, 153 .vop_deleteextattr = ffs_deleteextattr, 154 .vop_getextattr = ffs_getextattr, 155 .vop_listextattr = ffs_listextattr, 156 .vop_openextattr = ffs_openextattr, 157 .vop_setextattr = ffs_setextattr, 158 .vop_vptofh = ffs_vptofh, 159 }; 160 161 struct vop_vector ffs_fifoops2 = { 162 .vop_default = &ufs_fifoops, 163 .vop_fsync = ffs_fsync, 164 .vop_lock1 = ffs_lock, 165 .vop_reallocblks = ffs_reallocblks, 166 .vop_strategy = ffsext_strategy, 167 .vop_closeextattr = ffs_closeextattr, 168 .vop_deleteextattr = ffs_deleteextattr, 169 .vop_getextattr = ffs_getextattr, 170 .vop_listextattr = ffs_listextattr, 171 .vop_openextattr = ffs_openextattr, 172 .vop_setextattr = ffs_setextattr, 173 .vop_vptofh = ffs_vptofh, 174 }; 175 176 /* 177 * Synch an open file. 178 */ 179 /* ARGSUSED */ 180 static int 181 ffs_fsync(struct vop_fsync_args *ap) 182 { 183 int error; 184 185 error = ffs_syncvnode(ap->a_vp, ap->a_waitfor); 186 if (error) 187 return (error); 188 if (ap->a_waitfor == MNT_WAIT && 189 (ap->a_vp->v_mount->mnt_flag & MNT_SOFTDEP)) 190 error = softdep_fsync(ap->a_vp); 191 return (error); 192 } 193 194 int 195 ffs_syncvnode(struct vnode *vp, int waitfor) 196 { 197 struct inode *ip = VTOI(vp); 198 struct bufobj *bo; 199 struct buf *bp; 200 struct buf *nbp; 201 int s, error, wait, passes, skipmeta; 202 ufs_lbn_t lbn; 203 204 wait = (waitfor == MNT_WAIT); 205 lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); 206 bo = &vp->v_bufobj; 207 208 /* 209 * Flush all dirty buffers associated with a vnode. 210 */ 211 passes = NIADDR + 1; 212 skipmeta = 0; 213 if (wait) 214 skipmeta = 1; 215 s = splbio(); 216 BO_LOCK(bo); 217 loop: 218 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) 219 bp->b_vflags &= ~BV_SCANNED; 220 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 221 /* 222 * Reasons to skip this buffer: it has already been considered 223 * on this pass, this pass is the first time through on a 224 * synchronous flush request and the buffer being considered 225 * is metadata, the buffer has dependencies that will cause 226 * it to be redirtied and it has not already been deferred, 227 * or it is already being written. 228 */ 229 if ((bp->b_vflags & BV_SCANNED) != 0) 230 continue; 231 bp->b_vflags |= BV_SCANNED; 232 if ((skipmeta == 1 && bp->b_lblkno < 0)) 233 continue; 234 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) 235 continue; 236 BO_UNLOCK(bo); 237 if (!wait && !LIST_EMPTY(&bp->b_dep) && 238 (bp->b_flags & B_DEFERRED) == 0 && 239 buf_countdeps(bp, 0)) { 240 bp->b_flags |= B_DEFERRED; 241 BUF_UNLOCK(bp); 242 BO_LOCK(bo); 243 continue; 244 } 245 if ((bp->b_flags & B_DELWRI) == 0) 246 panic("ffs_fsync: not dirty"); 247 /* 248 * If this is a synchronous flush request, or it is not a 249 * file or device, start the write on this buffer immediately. 250 */ 251 if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) { 252 253 /* 254 * On our final pass through, do all I/O synchronously 255 * so that we can find out if our flush is failing 256 * because of write errors. 257 */ 258 if (passes > 0 || !wait) { 259 if ((bp->b_flags & B_CLUSTEROK) && !wait) { 260 (void) vfs_bio_awrite(bp); 261 } else { 262 bremfree(bp); 263 splx(s); 264 (void) bawrite(bp); 265 s = splbio(); 266 } 267 } else { 268 bremfree(bp); 269 splx(s); 270 if ((error = bwrite(bp)) != 0) 271 return (error); 272 s = splbio(); 273 } 274 } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) { 275 /* 276 * If the buffer is for data that has been truncated 277 * off the file, then throw it away. 278 */ 279 bremfree(bp); 280 bp->b_flags |= B_INVAL | B_NOCACHE; 281 splx(s); 282 brelse(bp); 283 s = splbio(); 284 } else 285 vfs_bio_awrite(bp); 286 287 /* 288 * Since we may have slept during the I/O, we need 289 * to start from a known point. 290 */ 291 BO_LOCK(bo); 292 nbp = TAILQ_FIRST(&bo->bo_dirty.bv_hd); 293 } 294 /* 295 * If we were asked to do this synchronously, then go back for 296 * another pass, this time doing the metadata. 297 */ 298 if (skipmeta) { 299 skipmeta = 0; 300 goto loop; 301 } 302 303 if (wait) { 304 bufobj_wwait(bo, 3, 0); 305 BO_UNLOCK(bo); 306 307 /* 308 * Ensure that any filesystem metatdata associated 309 * with the vnode has been written. 310 */ 311 splx(s); 312 if ((error = softdep_sync_metadata(vp)) != 0) 313 return (error); 314 s = splbio(); 315 316 BO_LOCK(bo); 317 if (bo->bo_dirty.bv_cnt > 0) { 318 /* 319 * Block devices associated with filesystems may 320 * have new I/O requests posted for them even if 321 * the vnode is locked, so no amount of trying will 322 * get them clean. Thus we give block devices a 323 * good effort, then just give up. For all other file 324 * types, go around and try again until it is clean. 325 */ 326 if (passes > 0) { 327 passes -= 1; 328 goto loop; 329 } 330 #ifdef INVARIANTS 331 if (!vn_isdisk(vp, NULL)) 332 vprint("ffs_fsync: dirty", vp); 333 #endif 334 } 335 } 336 BO_UNLOCK(bo); 337 splx(s); 338 return (ffs_update(vp, wait)); 339 } 340 341 static int 342 ffs_lock(ap) 343 struct vop_lock1_args /* { 344 struct vnode *a_vp; 345 int a_flags; 346 struct thread *a_td; 347 char *file; 348 int line; 349 } */ *ap; 350 { 351 #ifndef NO_FFS_SNAPSHOT 352 struct vnode *vp; 353 int flags; 354 struct lock *lkp; 355 int result; 356 357 switch (ap->a_flags & LK_TYPE_MASK) { 358 case LK_SHARED: 359 case LK_UPGRADE: 360 case LK_EXCLUSIVE: 361 vp = ap->a_vp; 362 flags = ap->a_flags; 363 for (;;) { 364 #ifdef DEBUG_VFS_LOCKS 365 KASSERT(vp->v_holdcnt != 0, 366 ("ffs_lock %p: zero hold count", vp)); 367 #endif 368 lkp = vp->v_vnlock; 369 result = _lockmgr_args(lkp, flags, VI_MTX(vp), 370 LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, 371 ap->a_file, ap->a_line); 372 if (lkp == vp->v_vnlock || result != 0) 373 break; 374 /* 375 * Apparent success, except that the vnode 376 * mutated between snapshot file vnode and 377 * regular file vnode while this process 378 * slept. The lock currently held is not the 379 * right lock. Release it, and try to get the 380 * new lock. 381 */ 382 (void) _lockmgr_args(lkp, LK_RELEASE, NULL, 383 LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, 384 ap->a_file, ap->a_line); 385 if ((flags & (LK_INTERLOCK | LK_NOWAIT)) == 386 (LK_INTERLOCK | LK_NOWAIT)) 387 return (EBUSY); 388 if ((flags & LK_TYPE_MASK) == LK_UPGRADE) 389 flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE; 390 flags &= ~LK_INTERLOCK; 391 } 392 break; 393 default: 394 result = VOP_LOCK1_APV(&ufs_vnodeops, ap); 395 } 396 return (result); 397 #else 398 return (VOP_LOCK1_APV(&ufs_vnodeops, ap)); 399 #endif 400 } 401 402 /* 403 * Vnode op for reading. 404 */ 405 /* ARGSUSED */ 406 static int 407 ffs_read(ap) 408 struct vop_read_args /* { 409 struct vnode *a_vp; 410 struct uio *a_uio; 411 int a_ioflag; 412 struct ucred *a_cred; 413 } */ *ap; 414 { 415 struct vnode *vp; 416 struct inode *ip; 417 struct uio *uio; 418 struct fs *fs; 419 struct buf *bp; 420 ufs_lbn_t lbn, nextlbn; 421 off_t bytesinfile; 422 long size, xfersize, blkoffset; 423 int error, orig_resid; 424 int seqcount; 425 int ioflag; 426 427 vp = ap->a_vp; 428 uio = ap->a_uio; 429 ioflag = ap->a_ioflag; 430 if (ap->a_ioflag & IO_EXT) 431 #ifdef notyet 432 return (ffs_extread(vp, uio, ioflag)); 433 #else 434 panic("ffs_read+IO_EXT"); 435 #endif 436 #ifdef DIRECTIO 437 if ((ioflag & IO_DIRECT) != 0) { 438 int workdone; 439 440 error = ffs_rawread(vp, uio, &workdone); 441 if (error != 0 || workdone != 0) 442 return error; 443 } 444 #endif 445 446 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 447 ip = VTOI(vp); 448 449 #ifdef INVARIANTS 450 if (uio->uio_rw != UIO_READ) 451 panic("ffs_read: mode"); 452 453 if (vp->v_type == VLNK) { 454 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 455 panic("ffs_read: short symlink"); 456 } else if (vp->v_type != VREG && vp->v_type != VDIR) 457 panic("ffs_read: type %d", vp->v_type); 458 #endif 459 orig_resid = uio->uio_resid; 460 KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0")); 461 if (orig_resid == 0) 462 return (0); 463 KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0")); 464 fs = ip->i_fs; 465 if (uio->uio_offset < ip->i_size && 466 uio->uio_offset >= fs->fs_maxfilesize) 467 return (EOVERFLOW); 468 469 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 470 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 471 break; 472 lbn = lblkno(fs, uio->uio_offset); 473 nextlbn = lbn + 1; 474 475 /* 476 * size of buffer. The buffer representing the 477 * end of the file is rounded up to the size of 478 * the block type ( fragment or full block, 479 * depending ). 480 */ 481 size = blksize(fs, ip, lbn); 482 blkoffset = blkoff(fs, uio->uio_offset); 483 484 /* 485 * The amount we want to transfer in this iteration is 486 * one FS block less the amount of the data before 487 * our startpoint (duh!) 488 */ 489 xfersize = fs->fs_bsize - blkoffset; 490 491 /* 492 * But if we actually want less than the block, 493 * or the file doesn't have a whole block more of data, 494 * then use the lesser number. 495 */ 496 if (uio->uio_resid < xfersize) 497 xfersize = uio->uio_resid; 498 if (bytesinfile < xfersize) 499 xfersize = bytesinfile; 500 501 if (lblktosize(fs, nextlbn) >= ip->i_size) { 502 /* 503 * Don't do readahead if this is the end of the file. 504 */ 505 error = bread(vp, lbn, size, NOCRED, &bp); 506 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 507 /* 508 * Otherwise if we are allowed to cluster, 509 * grab as much as we can. 510 * 511 * XXX This may not be a win if we are not 512 * doing sequential access. 513 */ 514 error = cluster_read(vp, ip->i_size, lbn, 515 size, NOCRED, blkoffset + uio->uio_resid, seqcount, &bp); 516 } else if (seqcount > 1) { 517 /* 518 * If we are NOT allowed to cluster, then 519 * if we appear to be acting sequentially, 520 * fire off a request for a readahead 521 * as well as a read. Note that the 4th and 5th 522 * arguments point to arrays of the size specified in 523 * the 6th argument. 524 */ 525 int nextsize = blksize(fs, ip, nextlbn); 526 error = breadn(vp, lbn, 527 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 528 } else { 529 /* 530 * Failing all of the above, just read what the 531 * user asked for. Interestingly, the same as 532 * the first option above. 533 */ 534 error = bread(vp, lbn, size, NOCRED, &bp); 535 } 536 if (error) { 537 brelse(bp); 538 bp = NULL; 539 break; 540 } 541 542 /* 543 * If IO_DIRECT then set B_DIRECT for the buffer. This 544 * will cause us to attempt to release the buffer later on 545 * and will cause the buffer cache to attempt to free the 546 * underlying pages. 547 */ 548 if (ioflag & IO_DIRECT) 549 bp->b_flags |= B_DIRECT; 550 551 /* 552 * We should only get non-zero b_resid when an I/O error 553 * has occurred, which should cause us to break above. 554 * However, if the short read did not cause an error, 555 * then we want to ensure that we do not uiomove bad 556 * or uninitialized data. 557 */ 558 size -= bp->b_resid; 559 if (size < xfersize) { 560 if (size == 0) 561 break; 562 xfersize = size; 563 } 564 565 error = uiomove((char *)bp->b_data + blkoffset, 566 (int)xfersize, uio); 567 if (error) 568 break; 569 570 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 571 (LIST_EMPTY(&bp->b_dep))) { 572 /* 573 * If there are no dependencies, and it's VMIO, 574 * then we don't need the buf, mark it available 575 * for freeing. The VM has the data. 576 */ 577 bp->b_flags |= B_RELBUF; 578 brelse(bp); 579 } else { 580 /* 581 * Otherwise let whoever 582 * made the request take care of 583 * freeing it. We just queue 584 * it onto another list. 585 */ 586 bqrelse(bp); 587 } 588 } 589 590 /* 591 * This can only happen in the case of an error 592 * because the loop above resets bp to NULL on each iteration 593 * and on normal completion has not set a new value into it. 594 * so it must have come from a 'break' statement 595 */ 596 if (bp != NULL) { 597 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 598 (LIST_EMPTY(&bp->b_dep))) { 599 bp->b_flags |= B_RELBUF; 600 brelse(bp); 601 } else { 602 bqrelse(bp); 603 } 604 } 605 606 if ((error == 0 || uio->uio_resid != orig_resid) && 607 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0 && 608 (ip->i_flag & IN_ACCESS) == 0) { 609 VI_LOCK(vp); 610 ip->i_flag |= IN_ACCESS; 611 VI_UNLOCK(vp); 612 } 613 return (error); 614 } 615 616 /* 617 * Vnode op for writing. 618 */ 619 static int 620 ffs_write(ap) 621 struct vop_write_args /* { 622 struct vnode *a_vp; 623 struct uio *a_uio; 624 int a_ioflag; 625 struct ucred *a_cred; 626 } */ *ap; 627 { 628 struct vnode *vp; 629 struct uio *uio; 630 struct inode *ip; 631 struct fs *fs; 632 struct buf *bp; 633 struct thread *td; 634 ufs_lbn_t lbn; 635 off_t osize; 636 int seqcount; 637 int blkoffset, error, flags, ioflag, resid, size, xfersize; 638 639 vp = ap->a_vp; 640 uio = ap->a_uio; 641 ioflag = ap->a_ioflag; 642 if (ap->a_ioflag & IO_EXT) 643 #ifdef notyet 644 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred)); 645 #else 646 panic("ffs_write+IO_EXT"); 647 #endif 648 649 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 650 ip = VTOI(vp); 651 652 #ifdef INVARIANTS 653 if (uio->uio_rw != UIO_WRITE) 654 panic("ffs_write: mode"); 655 #endif 656 657 switch (vp->v_type) { 658 case VREG: 659 if (ioflag & IO_APPEND) 660 uio->uio_offset = ip->i_size; 661 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 662 return (EPERM); 663 /* FALLTHROUGH */ 664 case VLNK: 665 break; 666 case VDIR: 667 panic("ffs_write: dir write"); 668 break; 669 default: 670 panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type, 671 (int)uio->uio_offset, 672 (int)uio->uio_resid 673 ); 674 } 675 676 KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0")); 677 KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0")); 678 fs = ip->i_fs; 679 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) 680 return (EFBIG); 681 /* 682 * Maybe this should be above the vnode op call, but so long as 683 * file servers have no limits, I don't think it matters. 684 */ 685 td = uio->uio_td; 686 if (vp->v_type == VREG && td != NULL) { 687 PROC_LOCK(td->td_proc); 688 if (uio->uio_offset + uio->uio_resid > 689 lim_cur(td->td_proc, RLIMIT_FSIZE)) { 690 psignal(td->td_proc, SIGXFSZ); 691 PROC_UNLOCK(td->td_proc); 692 return (EFBIG); 693 } 694 PROC_UNLOCK(td->td_proc); 695 } 696 697 resid = uio->uio_resid; 698 osize = ip->i_size; 699 if (seqcount > BA_SEQMAX) 700 flags = BA_SEQMAX << BA_SEQSHIFT; 701 else 702 flags = seqcount << BA_SEQSHIFT; 703 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 704 flags |= IO_SYNC; 705 706 for (error = 0; uio->uio_resid > 0;) { 707 lbn = lblkno(fs, uio->uio_offset); 708 blkoffset = blkoff(fs, uio->uio_offset); 709 xfersize = fs->fs_bsize - blkoffset; 710 if (uio->uio_resid < xfersize) 711 xfersize = uio->uio_resid; 712 if (uio->uio_offset + xfersize > ip->i_size) 713 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 714 715 /* 716 * We must perform a read-before-write if the transfer size 717 * does not cover the entire buffer. 718 */ 719 if (fs->fs_bsize > xfersize) 720 flags |= BA_CLRBUF; 721 else 722 flags &= ~BA_CLRBUF; 723 /* XXX is uio->uio_offset the right thing here? */ 724 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 725 ap->a_cred, flags, &bp); 726 if (error != 0) 727 break; 728 /* 729 * If the buffer is not valid we have to clear out any 730 * garbage data from the pages instantiated for the buffer. 731 * If we do not, a failed uiomove() during a write can leave 732 * the prior contents of the pages exposed to a userland 733 * mmap(). XXX deal with uiomove() errors a better way. 734 */ 735 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 736 vfs_bio_clrbuf(bp); 737 if (ioflag & IO_DIRECT) 738 bp->b_flags |= B_DIRECT; 739 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL)) 740 bp->b_flags |= B_NOCACHE; 741 742 if (uio->uio_offset + xfersize > ip->i_size) { 743 ip->i_size = uio->uio_offset + xfersize; 744 DIP_SET(ip, i_size, ip->i_size); 745 } 746 747 size = blksize(fs, ip, lbn) - bp->b_resid; 748 if (size < xfersize) 749 xfersize = size; 750 751 error = 752 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 753 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 754 (LIST_EMPTY(&bp->b_dep))) { 755 bp->b_flags |= B_RELBUF; 756 } 757 758 /* 759 * If IO_SYNC each buffer is written synchronously. Otherwise 760 * if we have a severe page deficiency write the buffer 761 * asynchronously. Otherwise try to cluster, and if that 762 * doesn't do it then either do an async write (if O_DIRECT), 763 * or a delayed write (if not). 764 */ 765 if (ioflag & IO_SYNC) { 766 (void)bwrite(bp); 767 } else if (vm_page_count_severe() || 768 buf_dirty_count_severe() || 769 (ioflag & IO_ASYNC)) { 770 bp->b_flags |= B_CLUSTEROK; 771 bawrite(bp); 772 } else if (xfersize + blkoffset == fs->fs_bsize) { 773 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 774 bp->b_flags |= B_CLUSTEROK; 775 cluster_write(vp, bp, ip->i_size, seqcount); 776 } else { 777 bawrite(bp); 778 } 779 } else if (ioflag & IO_DIRECT) { 780 bp->b_flags |= B_CLUSTEROK; 781 bawrite(bp); 782 } else { 783 bp->b_flags |= B_CLUSTEROK; 784 bdwrite(bp); 785 } 786 if (error || xfersize == 0) 787 break; 788 ip->i_flag |= IN_CHANGE | IN_UPDATE; 789 } 790 /* 791 * If we successfully wrote any data, and we are not the superuser 792 * we clear the setuid and setgid bits as a precaution against 793 * tampering. 794 */ 795 if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && 796 ap->a_cred) { 797 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) { 798 ip->i_mode &= ~(ISUID | ISGID); 799 DIP_SET(ip, i_mode, ip->i_mode); 800 } 801 } 802 if (error) { 803 if (ioflag & IO_UNIT) { 804 (void)ffs_truncate(vp, osize, 805 IO_NORMAL | (ioflag & IO_SYNC), 806 ap->a_cred, uio->uio_td); 807 uio->uio_offset -= resid - uio->uio_resid; 808 uio->uio_resid = resid; 809 } 810 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 811 error = ffs_update(vp, 1); 812 return (error); 813 } 814 815 /* 816 * get page routine 817 */ 818 static int 819 ffs_getpages(ap) 820 struct vop_getpages_args *ap; 821 { 822 int i; 823 vm_page_t mreq; 824 int pcount; 825 826 pcount = round_page(ap->a_count) / PAGE_SIZE; 827 mreq = ap->a_m[ap->a_reqpage]; 828 829 /* 830 * if ANY DEV_BSIZE blocks are valid on a large filesystem block, 831 * then the entire page is valid. Since the page may be mapped, 832 * user programs might reference data beyond the actual end of file 833 * occuring within the page. We have to zero that data. 834 */ 835 VM_OBJECT_LOCK(mreq->object); 836 if (mreq->valid) { 837 if (mreq->valid != VM_PAGE_BITS_ALL) 838 vm_page_zero_invalid(mreq, TRUE); 839 vm_page_lock_queues(); 840 for (i = 0; i < pcount; i++) { 841 if (i != ap->a_reqpage) { 842 vm_page_free(ap->a_m[i]); 843 } 844 } 845 vm_page_unlock_queues(); 846 VM_OBJECT_UNLOCK(mreq->object); 847 return VM_PAGER_OK; 848 } 849 VM_OBJECT_UNLOCK(mreq->object); 850 851 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 852 ap->a_count, 853 ap->a_reqpage); 854 } 855 856 857 /* 858 * Extended attribute area reading. 859 */ 860 static int 861 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag) 862 { 863 struct inode *ip; 864 struct ufs2_dinode *dp; 865 struct fs *fs; 866 struct buf *bp; 867 ufs_lbn_t lbn, nextlbn; 868 off_t bytesinfile; 869 long size, xfersize, blkoffset; 870 int error, orig_resid; 871 872 ip = VTOI(vp); 873 fs = ip->i_fs; 874 dp = ip->i_din2; 875 876 #ifdef INVARIANTS 877 if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC) 878 panic("ffs_extread: mode"); 879 880 #endif 881 orig_resid = uio->uio_resid; 882 KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0")); 883 if (orig_resid == 0) 884 return (0); 885 KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0")); 886 887 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 888 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0) 889 break; 890 lbn = lblkno(fs, uio->uio_offset); 891 nextlbn = lbn + 1; 892 893 /* 894 * size of buffer. The buffer representing the 895 * end of the file is rounded up to the size of 896 * the block type ( fragment or full block, 897 * depending ). 898 */ 899 size = sblksize(fs, dp->di_extsize, lbn); 900 blkoffset = blkoff(fs, uio->uio_offset); 901 902 /* 903 * The amount we want to transfer in this iteration is 904 * one FS block less the amount of the data before 905 * our startpoint (duh!) 906 */ 907 xfersize = fs->fs_bsize - blkoffset; 908 909 /* 910 * But if we actually want less than the block, 911 * or the file doesn't have a whole block more of data, 912 * then use the lesser number. 913 */ 914 if (uio->uio_resid < xfersize) 915 xfersize = uio->uio_resid; 916 if (bytesinfile < xfersize) 917 xfersize = bytesinfile; 918 919 if (lblktosize(fs, nextlbn) >= dp->di_extsize) { 920 /* 921 * Don't do readahead if this is the end of the info. 922 */ 923 error = bread(vp, -1 - lbn, size, NOCRED, &bp); 924 } else { 925 /* 926 * If we have a second block, then 927 * fire off a request for a readahead 928 * as well as a read. Note that the 4th and 5th 929 * arguments point to arrays of the size specified in 930 * the 6th argument. 931 */ 932 int nextsize = sblksize(fs, dp->di_extsize, nextlbn); 933 934 nextlbn = -1 - nextlbn; 935 error = breadn(vp, -1 - lbn, 936 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 937 } 938 if (error) { 939 brelse(bp); 940 bp = NULL; 941 break; 942 } 943 944 /* 945 * If IO_DIRECT then set B_DIRECT for the buffer. This 946 * will cause us to attempt to release the buffer later on 947 * and will cause the buffer cache to attempt to free the 948 * underlying pages. 949 */ 950 if (ioflag & IO_DIRECT) 951 bp->b_flags |= B_DIRECT; 952 953 /* 954 * We should only get non-zero b_resid when an I/O error 955 * has occurred, which should cause us to break above. 956 * However, if the short read did not cause an error, 957 * then we want to ensure that we do not uiomove bad 958 * or uninitialized data. 959 */ 960 size -= bp->b_resid; 961 if (size < xfersize) { 962 if (size == 0) 963 break; 964 xfersize = size; 965 } 966 967 error = uiomove((char *)bp->b_data + blkoffset, 968 (int)xfersize, uio); 969 if (error) 970 break; 971 972 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 973 (LIST_EMPTY(&bp->b_dep))) { 974 /* 975 * If there are no dependencies, and it's VMIO, 976 * then we don't need the buf, mark it available 977 * for freeing. The VM has the data. 978 */ 979 bp->b_flags |= B_RELBUF; 980 brelse(bp); 981 } else { 982 /* 983 * Otherwise let whoever 984 * made the request take care of 985 * freeing it. We just queue 986 * it onto another list. 987 */ 988 bqrelse(bp); 989 } 990 } 991 992 /* 993 * This can only happen in the case of an error 994 * because the loop above resets bp to NULL on each iteration 995 * and on normal completion has not set a new value into it. 996 * so it must have come from a 'break' statement 997 */ 998 if (bp != NULL) { 999 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1000 (LIST_EMPTY(&bp->b_dep))) { 1001 bp->b_flags |= B_RELBUF; 1002 brelse(bp); 1003 } else { 1004 bqrelse(bp); 1005 } 1006 } 1007 1008 if ((error == 0 || uio->uio_resid != orig_resid) && 1009 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0 && 1010 (ip->i_flag & IN_ACCESS) == 0) { 1011 VI_LOCK(vp); 1012 ip->i_flag |= IN_ACCESS; 1013 VI_UNLOCK(vp); 1014 } 1015 return (error); 1016 } 1017 1018 /* 1019 * Extended attribute area writing. 1020 */ 1021 static int 1022 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred) 1023 { 1024 struct inode *ip; 1025 struct ufs2_dinode *dp; 1026 struct fs *fs; 1027 struct buf *bp; 1028 ufs_lbn_t lbn; 1029 off_t osize; 1030 int blkoffset, error, flags, resid, size, xfersize; 1031 1032 ip = VTOI(vp); 1033 fs = ip->i_fs; 1034 dp = ip->i_din2; 1035 1036 KASSERT(!(ip->i_flag & IN_SPACECOUNTED), ("inode %u: inode is dead", 1037 ip->i_number)); 1038 1039 #ifdef INVARIANTS 1040 if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC) 1041 panic("ffs_extwrite: mode"); 1042 #endif 1043 1044 if (ioflag & IO_APPEND) 1045 uio->uio_offset = dp->di_extsize; 1046 KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0")); 1047 KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0")); 1048 if ((uoff_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize) 1049 return (EFBIG); 1050 1051 resid = uio->uio_resid; 1052 osize = dp->di_extsize; 1053 flags = IO_EXT; 1054 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 1055 flags |= IO_SYNC; 1056 1057 for (error = 0; uio->uio_resid > 0;) { 1058 lbn = lblkno(fs, uio->uio_offset); 1059 blkoffset = blkoff(fs, uio->uio_offset); 1060 xfersize = fs->fs_bsize - blkoffset; 1061 if (uio->uio_resid < xfersize) 1062 xfersize = uio->uio_resid; 1063 1064 /* 1065 * We must perform a read-before-write if the transfer size 1066 * does not cover the entire buffer. 1067 */ 1068 if (fs->fs_bsize > xfersize) 1069 flags |= BA_CLRBUF; 1070 else 1071 flags &= ~BA_CLRBUF; 1072 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 1073 ucred, flags, &bp); 1074 if (error != 0) 1075 break; 1076 /* 1077 * If the buffer is not valid we have to clear out any 1078 * garbage data from the pages instantiated for the buffer. 1079 * If we do not, a failed uiomove() during a write can leave 1080 * the prior contents of the pages exposed to a userland 1081 * mmap(). XXX deal with uiomove() errors a better way. 1082 */ 1083 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 1084 vfs_bio_clrbuf(bp); 1085 if (ioflag & IO_DIRECT) 1086 bp->b_flags |= B_DIRECT; 1087 1088 if (uio->uio_offset + xfersize > dp->di_extsize) 1089 dp->di_extsize = uio->uio_offset + xfersize; 1090 1091 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid; 1092 if (size < xfersize) 1093 xfersize = size; 1094 1095 error = 1096 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 1097 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1098 (LIST_EMPTY(&bp->b_dep))) { 1099 bp->b_flags |= B_RELBUF; 1100 } 1101 1102 /* 1103 * If IO_SYNC each buffer is written synchronously. Otherwise 1104 * if we have a severe page deficiency write the buffer 1105 * asynchronously. Otherwise try to cluster, and if that 1106 * doesn't do it then either do an async write (if O_DIRECT), 1107 * or a delayed write (if not). 1108 */ 1109 if (ioflag & IO_SYNC) { 1110 (void)bwrite(bp); 1111 } else if (vm_page_count_severe() || 1112 buf_dirty_count_severe() || 1113 xfersize + blkoffset == fs->fs_bsize || 1114 (ioflag & (IO_ASYNC | IO_DIRECT))) 1115 bawrite(bp); 1116 else 1117 bdwrite(bp); 1118 if (error || xfersize == 0) 1119 break; 1120 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1121 } 1122 /* 1123 * If we successfully wrote any data, and we are not the superuser 1124 * we clear the setuid and setgid bits as a precaution against 1125 * tampering. 1126 */ 1127 if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) { 1128 if (priv_check_cred(ucred, PRIV_VFS_RETAINSUGID, 0)) { 1129 ip->i_mode &= ~(ISUID | ISGID); 1130 dp->di_mode = ip->i_mode; 1131 } 1132 } 1133 if (error) { 1134 if (ioflag & IO_UNIT) { 1135 (void)ffs_truncate(vp, osize, 1136 IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td); 1137 uio->uio_offset -= resid - uio->uio_resid; 1138 uio->uio_resid = resid; 1139 } 1140 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 1141 error = ffs_update(vp, 1); 1142 return (error); 1143 } 1144 1145 1146 /* 1147 * Vnode operating to retrieve a named extended attribute. 1148 * 1149 * Locate a particular EA (nspace:name) in the area (ptr:length), and return 1150 * the length of the EA, and possibly the pointer to the entry and to the data. 1151 */ 1152 static int 1153 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name, u_char **eap, u_char **eac) 1154 { 1155 u_char *p, *pe, *pn, *p0; 1156 int eapad1, eapad2, ealength, ealen, nlen; 1157 uint32_t ul; 1158 1159 pe = ptr + length; 1160 nlen = strlen(name); 1161 1162 for (p = ptr; p < pe; p = pn) { 1163 p0 = p; 1164 bcopy(p, &ul, sizeof(ul)); 1165 pn = p + ul; 1166 /* make sure this entry is complete */ 1167 if (pn > pe) 1168 break; 1169 p += sizeof(uint32_t); 1170 if (*p != nspace) 1171 continue; 1172 p++; 1173 eapad2 = *p++; 1174 if (*p != nlen) 1175 continue; 1176 p++; 1177 if (bcmp(p, name, nlen)) 1178 continue; 1179 ealength = sizeof(uint32_t) + 3 + nlen; 1180 eapad1 = 8 - (ealength % 8); 1181 if (eapad1 == 8) 1182 eapad1 = 0; 1183 ealength += eapad1; 1184 ealen = ul - ealength - eapad2; 1185 p += nlen + eapad1; 1186 if (eap != NULL) 1187 *eap = p0; 1188 if (eac != NULL) 1189 *eac = p; 1190 return (ealen); 1191 } 1192 return(-1); 1193 } 1194 1195 static int 1196 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra) 1197 { 1198 struct inode *ip; 1199 struct ufs2_dinode *dp; 1200 struct fs *fs; 1201 struct uio luio; 1202 struct iovec liovec; 1203 int easize, error; 1204 u_char *eae; 1205 1206 ip = VTOI(vp); 1207 fs = ip->i_fs; 1208 dp = ip->i_din2; 1209 easize = dp->di_extsize; 1210 if ((uoff_t)easize + extra > NXADDR * fs->fs_bsize) 1211 return (EFBIG); 1212 1213 eae = malloc(easize + extra, M_TEMP, M_WAITOK); 1214 1215 liovec.iov_base = eae; 1216 liovec.iov_len = easize; 1217 luio.uio_iov = &liovec; 1218 luio.uio_iovcnt = 1; 1219 luio.uio_offset = 0; 1220 luio.uio_resid = easize; 1221 luio.uio_segflg = UIO_SYSSPACE; 1222 luio.uio_rw = UIO_READ; 1223 luio.uio_td = td; 1224 1225 error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC); 1226 if (error) { 1227 free(eae, M_TEMP); 1228 return(error); 1229 } 1230 *p = eae; 1231 return (0); 1232 } 1233 1234 static int 1235 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td) 1236 { 1237 struct inode *ip; 1238 struct ufs2_dinode *dp; 1239 int error; 1240 1241 ip = VTOI(vp); 1242 1243 if (ip->i_ea_area != NULL) 1244 return (EBUSY); 1245 dp = ip->i_din2; 1246 error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0); 1247 if (error) 1248 return (error); 1249 ip->i_ea_len = dp->di_extsize; 1250 ip->i_ea_error = 0; 1251 return (0); 1252 } 1253 1254 /* 1255 * Vnode extattr transaction commit/abort 1256 */ 1257 static int 1258 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td) 1259 { 1260 struct inode *ip; 1261 struct uio luio; 1262 struct iovec liovec; 1263 int error; 1264 struct ufs2_dinode *dp; 1265 1266 ip = VTOI(vp); 1267 if (ip->i_ea_area == NULL) 1268 return (EINVAL); 1269 dp = ip->i_din2; 1270 error = ip->i_ea_error; 1271 if (commit && error == 0) { 1272 if (cred == NOCRED) 1273 cred = vp->v_mount->mnt_cred; 1274 liovec.iov_base = ip->i_ea_area; 1275 liovec.iov_len = ip->i_ea_len; 1276 luio.uio_iov = &liovec; 1277 luio.uio_iovcnt = 1; 1278 luio.uio_offset = 0; 1279 luio.uio_resid = ip->i_ea_len; 1280 luio.uio_segflg = UIO_SYSSPACE; 1281 luio.uio_rw = UIO_WRITE; 1282 luio.uio_td = td; 1283 /* XXX: I'm not happy about truncating to zero size */ 1284 if (ip->i_ea_len < dp->di_extsize) 1285 error = ffs_truncate(vp, 0, IO_EXT, cred, td); 1286 error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred); 1287 } 1288 free(ip->i_ea_area, M_TEMP); 1289 ip->i_ea_area = NULL; 1290 ip->i_ea_len = 0; 1291 ip->i_ea_error = 0; 1292 return (error); 1293 } 1294 1295 /* 1296 * Vnode extattr strategy routine for fifos. 1297 * 1298 * We need to check for a read or write of the external attributes. 1299 * Otherwise we just fall through and do the usual thing. 1300 */ 1301 static int 1302 ffsext_strategy(struct vop_strategy_args *ap) 1303 /* 1304 struct vop_strategy_args { 1305 struct vnodeop_desc *a_desc; 1306 struct vnode *a_vp; 1307 struct buf *a_bp; 1308 }; 1309 */ 1310 { 1311 struct vnode *vp; 1312 daddr_t lbn; 1313 1314 vp = ap->a_vp; 1315 lbn = ap->a_bp->b_lblkno; 1316 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC && 1317 lbn < 0 && lbn >= -NXADDR) 1318 return (VOP_STRATEGY_APV(&ufs_vnodeops, ap)); 1319 if (vp->v_type == VFIFO) 1320 return (VOP_STRATEGY_APV(&ufs_fifoops, ap)); 1321 panic("spec nodes went here"); 1322 } 1323 1324 /* 1325 * Vnode extattr transaction commit/abort 1326 */ 1327 static int 1328 ffs_openextattr(struct vop_openextattr_args *ap) 1329 /* 1330 struct vop_openextattr_args { 1331 struct vnodeop_desc *a_desc; 1332 struct vnode *a_vp; 1333 IN struct ucred *a_cred; 1334 IN struct thread *a_td; 1335 }; 1336 */ 1337 { 1338 struct inode *ip; 1339 struct fs *fs; 1340 1341 ip = VTOI(ap->a_vp); 1342 fs = ip->i_fs; 1343 1344 if (ap->a_vp->v_type == VCHR) 1345 return (EOPNOTSUPP); 1346 1347 return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td)); 1348 } 1349 1350 1351 /* 1352 * Vnode extattr transaction commit/abort 1353 */ 1354 static int 1355 ffs_closeextattr(struct vop_closeextattr_args *ap) 1356 /* 1357 struct vop_closeextattr_args { 1358 struct vnodeop_desc *a_desc; 1359 struct vnode *a_vp; 1360 int a_commit; 1361 IN struct ucred *a_cred; 1362 IN struct thread *a_td; 1363 }; 1364 */ 1365 { 1366 struct inode *ip; 1367 struct fs *fs; 1368 1369 ip = VTOI(ap->a_vp); 1370 fs = ip->i_fs; 1371 1372 if (ap->a_vp->v_type == VCHR) 1373 return (EOPNOTSUPP); 1374 1375 if (ap->a_commit && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) 1376 return (EROFS); 1377 1378 return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td)); 1379 } 1380 1381 /* 1382 * Vnode operation to remove a named attribute. 1383 */ 1384 static int 1385 ffs_deleteextattr(struct vop_deleteextattr_args *ap) 1386 /* 1387 vop_deleteextattr { 1388 IN struct vnode *a_vp; 1389 IN int a_attrnamespace; 1390 IN const char *a_name; 1391 IN struct ucred *a_cred; 1392 IN struct thread *a_td; 1393 }; 1394 */ 1395 { 1396 struct inode *ip; 1397 struct fs *fs; 1398 uint32_t ealength, ul; 1399 int ealen, olen, eapad1, eapad2, error, i, easize; 1400 u_char *eae, *p; 1401 int stand_alone; 1402 1403 ip = VTOI(ap->a_vp); 1404 fs = ip->i_fs; 1405 1406 if (ap->a_vp->v_type == VCHR) 1407 return (EOPNOTSUPP); 1408 1409 if (strlen(ap->a_name) == 0) 1410 return (EINVAL); 1411 1412 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) 1413 return (EROFS); 1414 1415 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1416 ap->a_cred, ap->a_td, VWRITE); 1417 if (error) { 1418 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1419 ip->i_ea_error = error; 1420 return (error); 1421 } 1422 1423 if (ip->i_ea_area == NULL) { 1424 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1425 if (error) 1426 return (error); 1427 stand_alone = 1; 1428 } else { 1429 stand_alone = 0; 1430 } 1431 1432 ealength = eapad1 = ealen = eapad2 = 0; 1433 1434 eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK); 1435 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1436 easize = ip->i_ea_len; 1437 1438 olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1439 &p, NULL); 1440 if (olen == -1) { 1441 /* delete but nonexistent */ 1442 free(eae, M_TEMP); 1443 if (stand_alone) 1444 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1445 return(ENOATTR); 1446 } 1447 bcopy(p, &ul, sizeof ul); 1448 i = p - eae + ul; 1449 if (ul != ealength) { 1450 bcopy(p + ul, p + ealength, easize - i); 1451 easize += (ealength - ul); 1452 } 1453 if (easize > NXADDR * fs->fs_bsize) { 1454 free(eae, M_TEMP); 1455 if (stand_alone) 1456 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1457 else if (ip->i_ea_error == 0) 1458 ip->i_ea_error = ENOSPC; 1459 return(ENOSPC); 1460 } 1461 p = ip->i_ea_area; 1462 ip->i_ea_area = eae; 1463 ip->i_ea_len = easize; 1464 free(p, M_TEMP); 1465 if (stand_alone) 1466 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1467 return(error); 1468 } 1469 1470 /* 1471 * Vnode operation to retrieve a named extended attribute. 1472 */ 1473 static int 1474 ffs_getextattr(struct vop_getextattr_args *ap) 1475 /* 1476 vop_getextattr { 1477 IN struct vnode *a_vp; 1478 IN int a_attrnamespace; 1479 IN const char *a_name; 1480 INOUT struct uio *a_uio; 1481 OUT size_t *a_size; 1482 IN struct ucred *a_cred; 1483 IN struct thread *a_td; 1484 }; 1485 */ 1486 { 1487 struct inode *ip; 1488 struct fs *fs; 1489 u_char *eae, *p; 1490 unsigned easize; 1491 int error, ealen, stand_alone; 1492 1493 ip = VTOI(ap->a_vp); 1494 fs = ip->i_fs; 1495 1496 if (ap->a_vp->v_type == VCHR) 1497 return (EOPNOTSUPP); 1498 1499 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1500 ap->a_cred, ap->a_td, VREAD); 1501 if (error) 1502 return (error); 1503 1504 if (ip->i_ea_area == NULL) { 1505 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1506 if (error) 1507 return (error); 1508 stand_alone = 1; 1509 } else { 1510 stand_alone = 0; 1511 } 1512 eae = ip->i_ea_area; 1513 easize = ip->i_ea_len; 1514 1515 ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1516 NULL, &p); 1517 if (ealen >= 0) { 1518 error = 0; 1519 if (ap->a_size != NULL) 1520 *ap->a_size = ealen; 1521 else if (ap->a_uio != NULL) 1522 error = uiomove(p, ealen, ap->a_uio); 1523 } else 1524 error = ENOATTR; 1525 if (stand_alone) 1526 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1527 return(error); 1528 } 1529 1530 /* 1531 * Vnode operation to retrieve extended attributes on a vnode. 1532 */ 1533 static int 1534 ffs_listextattr(struct vop_listextattr_args *ap) 1535 /* 1536 vop_listextattr { 1537 IN struct vnode *a_vp; 1538 IN int a_attrnamespace; 1539 INOUT struct uio *a_uio; 1540 OUT size_t *a_size; 1541 IN struct ucred *a_cred; 1542 IN struct thread *a_td; 1543 }; 1544 */ 1545 { 1546 struct inode *ip; 1547 struct fs *fs; 1548 u_char *eae, *p, *pe, *pn; 1549 unsigned easize; 1550 uint32_t ul; 1551 int error, ealen, stand_alone; 1552 1553 ip = VTOI(ap->a_vp); 1554 fs = ip->i_fs; 1555 1556 if (ap->a_vp->v_type == VCHR) 1557 return (EOPNOTSUPP); 1558 1559 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1560 ap->a_cred, ap->a_td, VREAD); 1561 if (error) 1562 return (error); 1563 1564 if (ip->i_ea_area == NULL) { 1565 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1566 if (error) 1567 return (error); 1568 stand_alone = 1; 1569 } else { 1570 stand_alone = 0; 1571 } 1572 eae = ip->i_ea_area; 1573 easize = ip->i_ea_len; 1574 1575 error = 0; 1576 if (ap->a_size != NULL) 1577 *ap->a_size = 0; 1578 pe = eae + easize; 1579 for(p = eae; error == 0 && p < pe; p = pn) { 1580 bcopy(p, &ul, sizeof(ul)); 1581 pn = p + ul; 1582 if (pn > pe) 1583 break; 1584 p += sizeof(ul); 1585 if (*p++ != ap->a_attrnamespace) 1586 continue; 1587 p++; /* pad2 */ 1588 ealen = *p; 1589 if (ap->a_size != NULL) { 1590 *ap->a_size += ealen + 1; 1591 } else if (ap->a_uio != NULL) { 1592 error = uiomove(p, ealen + 1, ap->a_uio); 1593 } 1594 } 1595 if (stand_alone) 1596 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1597 return(error); 1598 } 1599 1600 /* 1601 * Vnode operation to set a named attribute. 1602 */ 1603 static int 1604 ffs_setextattr(struct vop_setextattr_args *ap) 1605 /* 1606 vop_setextattr { 1607 IN struct vnode *a_vp; 1608 IN int a_attrnamespace; 1609 IN const char *a_name; 1610 INOUT struct uio *a_uio; 1611 IN struct ucred *a_cred; 1612 IN struct thread *a_td; 1613 }; 1614 */ 1615 { 1616 struct inode *ip; 1617 struct fs *fs; 1618 uint32_t ealength, ul; 1619 int ealen, olen, eapad1, eapad2, error, i, easize; 1620 u_char *eae, *p; 1621 int stand_alone; 1622 1623 ip = VTOI(ap->a_vp); 1624 fs = ip->i_fs; 1625 1626 if (ap->a_vp->v_type == VCHR) 1627 return (EOPNOTSUPP); 1628 1629 if (strlen(ap->a_name) == 0) 1630 return (EINVAL); 1631 1632 /* XXX Now unsupported API to delete EAs using NULL uio. */ 1633 if (ap->a_uio == NULL) 1634 return (EOPNOTSUPP); 1635 1636 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) 1637 return (EROFS); 1638 1639 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1640 ap->a_cred, ap->a_td, VWRITE); 1641 if (error) { 1642 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1643 ip->i_ea_error = error; 1644 return (error); 1645 } 1646 1647 if (ip->i_ea_area == NULL) { 1648 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1649 if (error) 1650 return (error); 1651 stand_alone = 1; 1652 } else { 1653 stand_alone = 0; 1654 } 1655 1656 ealen = ap->a_uio->uio_resid; 1657 ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name); 1658 eapad1 = 8 - (ealength % 8); 1659 if (eapad1 == 8) 1660 eapad1 = 0; 1661 eapad2 = 8 - (ealen % 8); 1662 if (eapad2 == 8) 1663 eapad2 = 0; 1664 ealength += eapad1 + ealen + eapad2; 1665 1666 eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK); 1667 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1668 easize = ip->i_ea_len; 1669 1670 olen = ffs_findextattr(eae, easize, 1671 ap->a_attrnamespace, ap->a_name, &p, NULL); 1672 if (olen == -1) { 1673 /* new, append at end */ 1674 p = eae + easize; 1675 easize += ealength; 1676 } else { 1677 bcopy(p, &ul, sizeof ul); 1678 i = p - eae + ul; 1679 if (ul != ealength) { 1680 bcopy(p + ul, p + ealength, easize - i); 1681 easize += (ealength - ul); 1682 } 1683 } 1684 if (easize > NXADDR * fs->fs_bsize) { 1685 free(eae, M_TEMP); 1686 if (stand_alone) 1687 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1688 else if (ip->i_ea_error == 0) 1689 ip->i_ea_error = ENOSPC; 1690 return(ENOSPC); 1691 } 1692 bcopy(&ealength, p, sizeof(ealength)); 1693 p += sizeof(ealength); 1694 *p++ = ap->a_attrnamespace; 1695 *p++ = eapad2; 1696 *p++ = strlen(ap->a_name); 1697 strcpy(p, ap->a_name); 1698 p += strlen(ap->a_name); 1699 bzero(p, eapad1); 1700 p += eapad1; 1701 error = uiomove(p, ealen, ap->a_uio); 1702 if (error) { 1703 free(eae, M_TEMP); 1704 if (stand_alone) 1705 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1706 else if (ip->i_ea_error == 0) 1707 ip->i_ea_error = error; 1708 return(error); 1709 } 1710 p += ealen; 1711 bzero(p, eapad2); 1712 1713 p = ip->i_ea_area; 1714 ip->i_ea_area = eae; 1715 ip->i_ea_len = easize; 1716 free(p, M_TEMP); 1717 if (stand_alone) 1718 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1719 return(error); 1720 } 1721 1722 /* 1723 * Vnode pointer to File handle 1724 */ 1725 static int 1726 ffs_vptofh(struct vop_vptofh_args *ap) 1727 /* 1728 vop_vptofh { 1729 IN struct vnode *a_vp; 1730 IN struct fid *a_fhp; 1731 }; 1732 */ 1733 { 1734 struct inode *ip; 1735 struct ufid *ufhp; 1736 1737 ip = VTOI(ap->a_vp); 1738 ufhp = (struct ufid *)ap->a_fhp; 1739 ufhp->ufid_len = sizeof(struct ufid); 1740 ufhp->ufid_ino = ip->i_number; 1741 ufhp->ufid_gen = ip->i_gen; 1742 return (0); 1743 } 1744