1 /*- 2 * Copyright (c) 2002, 2003 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * from: @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 60 * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ... 61 * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __FBSDID("$FreeBSD$"); 66 67 #include <sys/param.h> 68 #include <sys/bio.h> 69 #include <sys/systm.h> 70 #include <sys/buf.h> 71 #include <sys/conf.h> 72 #include <sys/extattr.h> 73 #include <sys/kernel.h> 74 #include <sys/limits.h> 75 #include <sys/malloc.h> 76 #include <sys/mount.h> 77 #include <sys/priv.h> 78 #include <sys/proc.h> 79 #include <sys/resourcevar.h> 80 #include <sys/signalvar.h> 81 #include <sys/stat.h> 82 #include <sys/vmmeter.h> 83 #include <sys/vnode.h> 84 85 #include <vm/vm.h> 86 #include <vm/vm_extern.h> 87 #include <vm/vm_object.h> 88 #include <vm/vm_page.h> 89 #include <vm/vm_pager.h> 90 #include <vm/vnode_pager.h> 91 92 #include <ufs/ufs/extattr.h> 93 #include <ufs/ufs/quota.h> 94 #include <ufs/ufs/inode.h> 95 #include <ufs/ufs/ufs_extern.h> 96 #include <ufs/ufs/ufsmount.h> 97 98 #include <ufs/ffs/fs.h> 99 #include <ufs/ffs/ffs_extern.h> 100 #include "opt_directio.h" 101 #include "opt_ffs.h" 102 103 #ifdef DIRECTIO 104 extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone); 105 #endif 106 static vop_fsync_t ffs_fsync; 107 static vop_lock1_t ffs_lock; 108 static vop_getpages_t ffs_getpages; 109 static vop_read_t ffs_read; 110 static vop_write_t ffs_write; 111 static int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag); 112 static int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, 113 struct ucred *cred); 114 static vop_strategy_t ffsext_strategy; 115 static vop_closeextattr_t ffs_closeextattr; 116 static vop_deleteextattr_t ffs_deleteextattr; 117 static vop_getextattr_t ffs_getextattr; 118 static vop_listextattr_t ffs_listextattr; 119 static vop_openextattr_t ffs_openextattr; 120 static vop_setextattr_t ffs_setextattr; 121 static vop_vptofh_t ffs_vptofh; 122 123 124 /* Global vfs data structures for ufs. */ 125 struct vop_vector ffs_vnodeops1 = { 126 .vop_default = &ufs_vnodeops, 127 .vop_fsync = ffs_fsync, 128 .vop_getpages = ffs_getpages, 129 .vop_lock1 = ffs_lock, 130 .vop_read = ffs_read, 131 .vop_reallocblks = ffs_reallocblks, 132 .vop_write = ffs_write, 133 .vop_vptofh = ffs_vptofh, 134 }; 135 136 struct vop_vector ffs_fifoops1 = { 137 .vop_default = &ufs_fifoops, 138 .vop_fsync = ffs_fsync, 139 .vop_reallocblks = ffs_reallocblks, /* XXX: really ??? */ 140 .vop_vptofh = ffs_vptofh, 141 }; 142 143 /* Global vfs data structures for ufs. */ 144 struct vop_vector ffs_vnodeops2 = { 145 .vop_default = &ufs_vnodeops, 146 .vop_fsync = ffs_fsync, 147 .vop_getpages = ffs_getpages, 148 .vop_lock1 = ffs_lock, 149 .vop_read = ffs_read, 150 .vop_reallocblks = ffs_reallocblks, 151 .vop_write = ffs_write, 152 .vop_closeextattr = ffs_closeextattr, 153 .vop_deleteextattr = ffs_deleteextattr, 154 .vop_getextattr = ffs_getextattr, 155 .vop_listextattr = ffs_listextattr, 156 .vop_openextattr = ffs_openextattr, 157 .vop_setextattr = ffs_setextattr, 158 .vop_vptofh = ffs_vptofh, 159 }; 160 161 struct vop_vector ffs_fifoops2 = { 162 .vop_default = &ufs_fifoops, 163 .vop_fsync = ffs_fsync, 164 .vop_lock1 = ffs_lock, 165 .vop_reallocblks = ffs_reallocblks, 166 .vop_strategy = ffsext_strategy, 167 .vop_closeextattr = ffs_closeextattr, 168 .vop_deleteextattr = ffs_deleteextattr, 169 .vop_getextattr = ffs_getextattr, 170 .vop_listextattr = ffs_listextattr, 171 .vop_openextattr = ffs_openextattr, 172 .vop_setextattr = ffs_setextattr, 173 .vop_vptofh = ffs_vptofh, 174 }; 175 176 /* 177 * Synch an open file. 178 */ 179 /* ARGSUSED */ 180 static int 181 ffs_fsync(struct vop_fsync_args *ap) 182 { 183 int error; 184 185 error = ffs_syncvnode(ap->a_vp, ap->a_waitfor); 186 if (error) 187 return (error); 188 if (ap->a_waitfor == MNT_WAIT && 189 (ap->a_vp->v_mount->mnt_flag & MNT_SOFTDEP)) 190 error = softdep_fsync(ap->a_vp); 191 return (error); 192 } 193 194 int 195 ffs_syncvnode(struct vnode *vp, int waitfor) 196 { 197 struct inode *ip = VTOI(vp); 198 struct bufobj *bo; 199 struct buf *bp; 200 struct buf *nbp; 201 int s, error, wait, passes, skipmeta; 202 ufs_lbn_t lbn; 203 204 wait = (waitfor == MNT_WAIT); 205 lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); 206 bo = &vp->v_bufobj; 207 208 /* 209 * Flush all dirty buffers associated with a vnode. 210 */ 211 passes = NIADDR + 1; 212 skipmeta = 0; 213 if (wait) 214 skipmeta = 1; 215 s = splbio(); 216 BO_LOCK(bo); 217 loop: 218 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) 219 bp->b_vflags &= ~BV_SCANNED; 220 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 221 /* 222 * Reasons to skip this buffer: it has already been considered 223 * on this pass, this pass is the first time through on a 224 * synchronous flush request and the buffer being considered 225 * is metadata, the buffer has dependencies that will cause 226 * it to be redirtied and it has not already been deferred, 227 * or it is already being written. 228 */ 229 if ((bp->b_vflags & BV_SCANNED) != 0) 230 continue; 231 bp->b_vflags |= BV_SCANNED; 232 if ((skipmeta == 1 && bp->b_lblkno < 0)) 233 continue; 234 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) 235 continue; 236 BO_UNLOCK(bo); 237 if (!wait && !LIST_EMPTY(&bp->b_dep) && 238 (bp->b_flags & B_DEFERRED) == 0 && 239 buf_countdeps(bp, 0)) { 240 bp->b_flags |= B_DEFERRED; 241 BUF_UNLOCK(bp); 242 BO_LOCK(bo); 243 continue; 244 } 245 if ((bp->b_flags & B_DELWRI) == 0) 246 panic("ffs_fsync: not dirty"); 247 /* 248 * If this is a synchronous flush request, or it is not a 249 * file or device, start the write on this buffer immediately. 250 */ 251 if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) { 252 253 /* 254 * On our final pass through, do all I/O synchronously 255 * so that we can find out if our flush is failing 256 * because of write errors. 257 */ 258 if (passes > 0 || !wait) { 259 if ((bp->b_flags & B_CLUSTEROK) && !wait) { 260 (void) vfs_bio_awrite(bp); 261 } else { 262 bremfree(bp); 263 splx(s); 264 (void) bawrite(bp); 265 s = splbio(); 266 } 267 } else { 268 bremfree(bp); 269 splx(s); 270 if ((error = bwrite(bp)) != 0) 271 return (error); 272 s = splbio(); 273 } 274 } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) { 275 /* 276 * If the buffer is for data that has been truncated 277 * off the file, then throw it away. 278 */ 279 bremfree(bp); 280 bp->b_flags |= B_INVAL | B_NOCACHE; 281 splx(s); 282 brelse(bp); 283 s = splbio(); 284 } else 285 vfs_bio_awrite(bp); 286 287 /* 288 * Since we may have slept during the I/O, we need 289 * to start from a known point. 290 */ 291 BO_LOCK(bo); 292 nbp = TAILQ_FIRST(&bo->bo_dirty.bv_hd); 293 } 294 /* 295 * If we were asked to do this synchronously, then go back for 296 * another pass, this time doing the metadata. 297 */ 298 if (skipmeta) { 299 skipmeta = 0; 300 goto loop; 301 } 302 303 if (wait) { 304 bufobj_wwait(bo, 3, 0); 305 BO_UNLOCK(bo); 306 307 /* 308 * Ensure that any filesystem metatdata associated 309 * with the vnode has been written. 310 */ 311 splx(s); 312 if ((error = softdep_sync_metadata(vp)) != 0) 313 return (error); 314 s = splbio(); 315 316 BO_LOCK(bo); 317 if (bo->bo_dirty.bv_cnt > 0) { 318 /* 319 * Block devices associated with filesystems may 320 * have new I/O requests posted for them even if 321 * the vnode is locked, so no amount of trying will 322 * get them clean. Thus we give block devices a 323 * good effort, then just give up. For all other file 324 * types, go around and try again until it is clean. 325 */ 326 if (passes > 0) { 327 passes -= 1; 328 goto loop; 329 } 330 #ifdef INVARIANTS 331 if (!vn_isdisk(vp, NULL)) 332 vprint("ffs_fsync: dirty", vp); 333 #endif 334 } 335 } 336 BO_UNLOCK(bo); 337 splx(s); 338 return (ffs_update(vp, wait)); 339 } 340 341 static int 342 ffs_lock(ap) 343 struct vop_lock1_args /* { 344 struct vnode *a_vp; 345 int a_flags; 346 struct thread *a_td; 347 char *file; 348 int line; 349 } */ *ap; 350 { 351 #ifndef NO_FFS_SNAPSHOT 352 struct vnode *vp; 353 int flags; 354 struct lock *lkp; 355 int result; 356 357 switch (ap->a_flags & LK_TYPE_MASK) { 358 case LK_SHARED: 359 case LK_UPGRADE: 360 case LK_EXCLUSIVE: 361 vp = ap->a_vp; 362 flags = ap->a_flags; 363 for (;;) { 364 /* 365 * vnode interlock must be held to ensure that 366 * the possibly external lock isn't freed, 367 * e.g. when mutating from snapshot file vnode 368 * to regular file vnode. 369 */ 370 if ((flags & LK_INTERLOCK) == 0) { 371 VI_LOCK(vp); 372 flags |= LK_INTERLOCK; 373 } 374 lkp = vp->v_vnlock; 375 result = _lockmgr_args(lkp, flags, VI_MTX(vp), 376 LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, 377 ap->a_file, ap->a_line); 378 if (lkp == vp->v_vnlock || result != 0) 379 break; 380 /* 381 * Apparent success, except that the vnode 382 * mutated between snapshot file vnode and 383 * regular file vnode while this process 384 * slept. The lock currently held is not the 385 * right lock. Release it, and try to get the 386 * new lock. 387 */ 388 (void) _lockmgr_args(lkp, LK_RELEASE, VI_MTX(vp), 389 LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, 390 ap->a_file, ap->a_line); 391 if ((flags & LK_TYPE_MASK) == LK_UPGRADE) 392 flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE; 393 flags &= ~LK_INTERLOCK; 394 } 395 break; 396 default: 397 result = VOP_LOCK1_APV(&ufs_vnodeops, ap); 398 } 399 return (result); 400 #else 401 return (VOP_LOCK1_APV(&ufs_vnodeops, ap)); 402 #endif 403 } 404 405 /* 406 * Vnode op for reading. 407 */ 408 /* ARGSUSED */ 409 static int 410 ffs_read(ap) 411 struct vop_read_args /* { 412 struct vnode *a_vp; 413 struct uio *a_uio; 414 int a_ioflag; 415 struct ucred *a_cred; 416 } */ *ap; 417 { 418 struct vnode *vp; 419 struct inode *ip; 420 struct uio *uio; 421 struct fs *fs; 422 struct buf *bp; 423 ufs_lbn_t lbn, nextlbn; 424 off_t bytesinfile; 425 long size, xfersize, blkoffset; 426 int error, orig_resid; 427 int seqcount; 428 int ioflag; 429 430 vp = ap->a_vp; 431 uio = ap->a_uio; 432 ioflag = ap->a_ioflag; 433 if (ap->a_ioflag & IO_EXT) 434 #ifdef notyet 435 return (ffs_extread(vp, uio, ioflag)); 436 #else 437 panic("ffs_read+IO_EXT"); 438 #endif 439 #ifdef DIRECTIO 440 if ((ioflag & IO_DIRECT) != 0) { 441 int workdone; 442 443 error = ffs_rawread(vp, uio, &workdone); 444 if (error != 0 || workdone != 0) 445 return error; 446 } 447 #endif 448 449 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 450 ip = VTOI(vp); 451 452 #ifdef INVARIANTS 453 if (uio->uio_rw != UIO_READ) 454 panic("ffs_read: mode"); 455 456 if (vp->v_type == VLNK) { 457 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 458 panic("ffs_read: short symlink"); 459 } else if (vp->v_type != VREG && vp->v_type != VDIR) 460 panic("ffs_read: type %d", vp->v_type); 461 #endif 462 orig_resid = uio->uio_resid; 463 KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0")); 464 if (orig_resid == 0) 465 return (0); 466 KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0")); 467 fs = ip->i_fs; 468 if (uio->uio_offset < ip->i_size && 469 uio->uio_offset >= fs->fs_maxfilesize) 470 return (EOVERFLOW); 471 472 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 473 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 474 break; 475 lbn = lblkno(fs, uio->uio_offset); 476 nextlbn = lbn + 1; 477 478 /* 479 * size of buffer. The buffer representing the 480 * end of the file is rounded up to the size of 481 * the block type ( fragment or full block, 482 * depending ). 483 */ 484 size = blksize(fs, ip, lbn); 485 blkoffset = blkoff(fs, uio->uio_offset); 486 487 /* 488 * The amount we want to transfer in this iteration is 489 * one FS block less the amount of the data before 490 * our startpoint (duh!) 491 */ 492 xfersize = fs->fs_bsize - blkoffset; 493 494 /* 495 * But if we actually want less than the block, 496 * or the file doesn't have a whole block more of data, 497 * then use the lesser number. 498 */ 499 if (uio->uio_resid < xfersize) 500 xfersize = uio->uio_resid; 501 if (bytesinfile < xfersize) 502 xfersize = bytesinfile; 503 504 if (lblktosize(fs, nextlbn) >= ip->i_size) { 505 /* 506 * Don't do readahead if this is the end of the file. 507 */ 508 error = bread(vp, lbn, size, NOCRED, &bp); 509 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 510 /* 511 * Otherwise if we are allowed to cluster, 512 * grab as much as we can. 513 * 514 * XXX This may not be a win if we are not 515 * doing sequential access. 516 */ 517 error = cluster_read(vp, ip->i_size, lbn, 518 size, NOCRED, blkoffset + uio->uio_resid, seqcount, &bp); 519 } else if (seqcount > 1) { 520 /* 521 * If we are NOT allowed to cluster, then 522 * if we appear to be acting sequentially, 523 * fire off a request for a readahead 524 * as well as a read. Note that the 4th and 5th 525 * arguments point to arrays of the size specified in 526 * the 6th argument. 527 */ 528 int nextsize = blksize(fs, ip, nextlbn); 529 error = breadn(vp, lbn, 530 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 531 } else { 532 /* 533 * Failing all of the above, just read what the 534 * user asked for. Interestingly, the same as 535 * the first option above. 536 */ 537 error = bread(vp, lbn, size, NOCRED, &bp); 538 } 539 if (error) { 540 brelse(bp); 541 bp = NULL; 542 break; 543 } 544 545 /* 546 * If IO_DIRECT then set B_DIRECT for the buffer. This 547 * will cause us to attempt to release the buffer later on 548 * and will cause the buffer cache to attempt to free the 549 * underlying pages. 550 */ 551 if (ioflag & IO_DIRECT) 552 bp->b_flags |= B_DIRECT; 553 554 /* 555 * We should only get non-zero b_resid when an I/O error 556 * has occurred, which should cause us to break above. 557 * However, if the short read did not cause an error, 558 * then we want to ensure that we do not uiomove bad 559 * or uninitialized data. 560 */ 561 size -= bp->b_resid; 562 if (size < xfersize) { 563 if (size == 0) 564 break; 565 xfersize = size; 566 } 567 568 error = uiomove((char *)bp->b_data + blkoffset, 569 (int)xfersize, uio); 570 if (error) 571 break; 572 573 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 574 (LIST_EMPTY(&bp->b_dep))) { 575 /* 576 * If there are no dependencies, and it's VMIO, 577 * then we don't need the buf, mark it available 578 * for freeing. The VM has the data. 579 */ 580 bp->b_flags |= B_RELBUF; 581 brelse(bp); 582 } else { 583 /* 584 * Otherwise let whoever 585 * made the request take care of 586 * freeing it. We just queue 587 * it onto another list. 588 */ 589 bqrelse(bp); 590 } 591 } 592 593 /* 594 * This can only happen in the case of an error 595 * because the loop above resets bp to NULL on each iteration 596 * and on normal completion has not set a new value into it. 597 * so it must have come from a 'break' statement 598 */ 599 if (bp != NULL) { 600 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 601 (LIST_EMPTY(&bp->b_dep))) { 602 bp->b_flags |= B_RELBUF; 603 brelse(bp); 604 } else { 605 bqrelse(bp); 606 } 607 } 608 609 if ((error == 0 || uio->uio_resid != orig_resid) && 610 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0 && 611 (ip->i_flag & IN_ACCESS) == 0) { 612 VI_LOCK(vp); 613 ip->i_flag |= IN_ACCESS; 614 VI_UNLOCK(vp); 615 } 616 return (error); 617 } 618 619 /* 620 * Vnode op for writing. 621 */ 622 static int 623 ffs_write(ap) 624 struct vop_write_args /* { 625 struct vnode *a_vp; 626 struct uio *a_uio; 627 int a_ioflag; 628 struct ucred *a_cred; 629 } */ *ap; 630 { 631 struct vnode *vp; 632 struct uio *uio; 633 struct inode *ip; 634 struct fs *fs; 635 struct buf *bp; 636 struct thread *td; 637 ufs_lbn_t lbn; 638 off_t osize; 639 int seqcount; 640 int blkoffset, error, flags, ioflag, resid, size, xfersize; 641 642 vp = ap->a_vp; 643 uio = ap->a_uio; 644 ioflag = ap->a_ioflag; 645 if (ap->a_ioflag & IO_EXT) 646 #ifdef notyet 647 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred)); 648 #else 649 panic("ffs_write+IO_EXT"); 650 #endif 651 652 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 653 ip = VTOI(vp); 654 655 #ifdef INVARIANTS 656 if (uio->uio_rw != UIO_WRITE) 657 panic("ffs_write: mode"); 658 #endif 659 660 switch (vp->v_type) { 661 case VREG: 662 if (ioflag & IO_APPEND) 663 uio->uio_offset = ip->i_size; 664 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 665 return (EPERM); 666 /* FALLTHROUGH */ 667 case VLNK: 668 break; 669 case VDIR: 670 panic("ffs_write: dir write"); 671 break; 672 default: 673 panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type, 674 (int)uio->uio_offset, 675 (int)uio->uio_resid 676 ); 677 } 678 679 KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0")); 680 KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0")); 681 fs = ip->i_fs; 682 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) 683 return (EFBIG); 684 /* 685 * Maybe this should be above the vnode op call, but so long as 686 * file servers have no limits, I don't think it matters. 687 */ 688 td = uio->uio_td; 689 if (vp->v_type == VREG && td != NULL) { 690 PROC_LOCK(td->td_proc); 691 if (uio->uio_offset + uio->uio_resid > 692 lim_cur(td->td_proc, RLIMIT_FSIZE)) { 693 psignal(td->td_proc, SIGXFSZ); 694 PROC_UNLOCK(td->td_proc); 695 return (EFBIG); 696 } 697 PROC_UNLOCK(td->td_proc); 698 } 699 700 resid = uio->uio_resid; 701 osize = ip->i_size; 702 if (seqcount > BA_SEQMAX) 703 flags = BA_SEQMAX << BA_SEQSHIFT; 704 else 705 flags = seqcount << BA_SEQSHIFT; 706 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 707 flags |= IO_SYNC; 708 709 for (error = 0; uio->uio_resid > 0;) { 710 lbn = lblkno(fs, uio->uio_offset); 711 blkoffset = blkoff(fs, uio->uio_offset); 712 xfersize = fs->fs_bsize - blkoffset; 713 if (uio->uio_resid < xfersize) 714 xfersize = uio->uio_resid; 715 if (uio->uio_offset + xfersize > ip->i_size) 716 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 717 718 /* 719 * We must perform a read-before-write if the transfer size 720 * does not cover the entire buffer. 721 */ 722 if (fs->fs_bsize > xfersize) 723 flags |= BA_CLRBUF; 724 else 725 flags &= ~BA_CLRBUF; 726 /* XXX is uio->uio_offset the right thing here? */ 727 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 728 ap->a_cred, flags, &bp); 729 if (error != 0) 730 break; 731 /* 732 * If the buffer is not valid we have to clear out any 733 * garbage data from the pages instantiated for the buffer. 734 * If we do not, a failed uiomove() during a write can leave 735 * the prior contents of the pages exposed to a userland 736 * mmap(). XXX deal with uiomove() errors a better way. 737 */ 738 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 739 vfs_bio_clrbuf(bp); 740 if (ioflag & IO_DIRECT) 741 bp->b_flags |= B_DIRECT; 742 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL)) 743 bp->b_flags |= B_NOCACHE; 744 745 if (uio->uio_offset + xfersize > ip->i_size) { 746 ip->i_size = uio->uio_offset + xfersize; 747 DIP_SET(ip, i_size, ip->i_size); 748 } 749 750 size = blksize(fs, ip, lbn) - bp->b_resid; 751 if (size < xfersize) 752 xfersize = size; 753 754 error = 755 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 756 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 757 (LIST_EMPTY(&bp->b_dep))) { 758 bp->b_flags |= B_RELBUF; 759 } 760 761 /* 762 * If IO_SYNC each buffer is written synchronously. Otherwise 763 * if we have a severe page deficiency write the buffer 764 * asynchronously. Otherwise try to cluster, and if that 765 * doesn't do it then either do an async write (if O_DIRECT), 766 * or a delayed write (if not). 767 */ 768 if (ioflag & IO_SYNC) { 769 (void)bwrite(bp); 770 } else if (vm_page_count_severe() || 771 buf_dirty_count_severe() || 772 (ioflag & IO_ASYNC)) { 773 bp->b_flags |= B_CLUSTEROK; 774 bawrite(bp); 775 } else if (xfersize + blkoffset == fs->fs_bsize) { 776 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 777 bp->b_flags |= B_CLUSTEROK; 778 cluster_write(vp, bp, ip->i_size, seqcount); 779 } else { 780 bawrite(bp); 781 } 782 } else if (ioflag & IO_DIRECT) { 783 bp->b_flags |= B_CLUSTEROK; 784 bawrite(bp); 785 } else { 786 bp->b_flags |= B_CLUSTEROK; 787 bdwrite(bp); 788 } 789 if (error || xfersize == 0) 790 break; 791 ip->i_flag |= IN_CHANGE | IN_UPDATE; 792 } 793 /* 794 * If we successfully wrote any data, and we are not the superuser 795 * we clear the setuid and setgid bits as a precaution against 796 * tampering. 797 */ 798 if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && 799 ap->a_cred) { 800 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) { 801 ip->i_mode &= ~(ISUID | ISGID); 802 DIP_SET(ip, i_mode, ip->i_mode); 803 } 804 } 805 if (error) { 806 if (ioflag & IO_UNIT) { 807 (void)ffs_truncate(vp, osize, 808 IO_NORMAL | (ioflag & IO_SYNC), 809 ap->a_cred, uio->uio_td); 810 uio->uio_offset -= resid - uio->uio_resid; 811 uio->uio_resid = resid; 812 } 813 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 814 error = ffs_update(vp, 1); 815 return (error); 816 } 817 818 /* 819 * get page routine 820 */ 821 static int 822 ffs_getpages(ap) 823 struct vop_getpages_args *ap; 824 { 825 int i; 826 vm_page_t mreq; 827 int pcount; 828 829 pcount = round_page(ap->a_count) / PAGE_SIZE; 830 mreq = ap->a_m[ap->a_reqpage]; 831 832 /* 833 * if ANY DEV_BSIZE blocks are valid on a large filesystem block, 834 * then the entire page is valid. Since the page may be mapped, 835 * user programs might reference data beyond the actual end of file 836 * occuring within the page. We have to zero that data. 837 */ 838 VM_OBJECT_LOCK(mreq->object); 839 if (mreq->valid) { 840 if (mreq->valid != VM_PAGE_BITS_ALL) 841 vm_page_zero_invalid(mreq, TRUE); 842 vm_page_lock_queues(); 843 for (i = 0; i < pcount; i++) { 844 if (i != ap->a_reqpage) { 845 vm_page_free(ap->a_m[i]); 846 } 847 } 848 vm_page_unlock_queues(); 849 VM_OBJECT_UNLOCK(mreq->object); 850 return VM_PAGER_OK; 851 } 852 VM_OBJECT_UNLOCK(mreq->object); 853 854 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 855 ap->a_count, 856 ap->a_reqpage); 857 } 858 859 860 /* 861 * Extended attribute area reading. 862 */ 863 static int 864 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag) 865 { 866 struct inode *ip; 867 struct ufs2_dinode *dp; 868 struct fs *fs; 869 struct buf *bp; 870 ufs_lbn_t lbn, nextlbn; 871 off_t bytesinfile; 872 long size, xfersize, blkoffset; 873 int error, orig_resid; 874 875 ip = VTOI(vp); 876 fs = ip->i_fs; 877 dp = ip->i_din2; 878 879 #ifdef INVARIANTS 880 if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC) 881 panic("ffs_extread: mode"); 882 883 #endif 884 orig_resid = uio->uio_resid; 885 KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0")); 886 if (orig_resid == 0) 887 return (0); 888 KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0")); 889 890 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 891 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0) 892 break; 893 lbn = lblkno(fs, uio->uio_offset); 894 nextlbn = lbn + 1; 895 896 /* 897 * size of buffer. The buffer representing the 898 * end of the file is rounded up to the size of 899 * the block type ( fragment or full block, 900 * depending ). 901 */ 902 size = sblksize(fs, dp->di_extsize, lbn); 903 blkoffset = blkoff(fs, uio->uio_offset); 904 905 /* 906 * The amount we want to transfer in this iteration is 907 * one FS block less the amount of the data before 908 * our startpoint (duh!) 909 */ 910 xfersize = fs->fs_bsize - blkoffset; 911 912 /* 913 * But if we actually want less than the block, 914 * or the file doesn't have a whole block more of data, 915 * then use the lesser number. 916 */ 917 if (uio->uio_resid < xfersize) 918 xfersize = uio->uio_resid; 919 if (bytesinfile < xfersize) 920 xfersize = bytesinfile; 921 922 if (lblktosize(fs, nextlbn) >= dp->di_extsize) { 923 /* 924 * Don't do readahead if this is the end of the info. 925 */ 926 error = bread(vp, -1 - lbn, size, NOCRED, &bp); 927 } else { 928 /* 929 * If we have a second block, then 930 * fire off a request for a readahead 931 * as well as a read. Note that the 4th and 5th 932 * arguments point to arrays of the size specified in 933 * the 6th argument. 934 */ 935 int nextsize = sblksize(fs, dp->di_extsize, nextlbn); 936 937 nextlbn = -1 - nextlbn; 938 error = breadn(vp, -1 - lbn, 939 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 940 } 941 if (error) { 942 brelse(bp); 943 bp = NULL; 944 break; 945 } 946 947 /* 948 * If IO_DIRECT then set B_DIRECT for the buffer. This 949 * will cause us to attempt to release the buffer later on 950 * and will cause the buffer cache to attempt to free the 951 * underlying pages. 952 */ 953 if (ioflag & IO_DIRECT) 954 bp->b_flags |= B_DIRECT; 955 956 /* 957 * We should only get non-zero b_resid when an I/O error 958 * has occurred, which should cause us to break above. 959 * However, if the short read did not cause an error, 960 * then we want to ensure that we do not uiomove bad 961 * or uninitialized data. 962 */ 963 size -= bp->b_resid; 964 if (size < xfersize) { 965 if (size == 0) 966 break; 967 xfersize = size; 968 } 969 970 error = uiomove((char *)bp->b_data + blkoffset, 971 (int)xfersize, uio); 972 if (error) 973 break; 974 975 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 976 (LIST_EMPTY(&bp->b_dep))) { 977 /* 978 * If there are no dependencies, and it's VMIO, 979 * then we don't need the buf, mark it available 980 * for freeing. The VM has the data. 981 */ 982 bp->b_flags |= B_RELBUF; 983 brelse(bp); 984 } else { 985 /* 986 * Otherwise let whoever 987 * made the request take care of 988 * freeing it. We just queue 989 * it onto another list. 990 */ 991 bqrelse(bp); 992 } 993 } 994 995 /* 996 * This can only happen in the case of an error 997 * because the loop above resets bp to NULL on each iteration 998 * and on normal completion has not set a new value into it. 999 * so it must have come from a 'break' statement 1000 */ 1001 if (bp != NULL) { 1002 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1003 (LIST_EMPTY(&bp->b_dep))) { 1004 bp->b_flags |= B_RELBUF; 1005 brelse(bp); 1006 } else { 1007 bqrelse(bp); 1008 } 1009 } 1010 1011 if ((error == 0 || uio->uio_resid != orig_resid) && 1012 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0 && 1013 (ip->i_flag & IN_ACCESS) == 0) { 1014 VI_LOCK(vp); 1015 ip->i_flag |= IN_ACCESS; 1016 VI_UNLOCK(vp); 1017 } 1018 return (error); 1019 } 1020 1021 /* 1022 * Extended attribute area writing. 1023 */ 1024 static int 1025 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred) 1026 { 1027 struct inode *ip; 1028 struct ufs2_dinode *dp; 1029 struct fs *fs; 1030 struct buf *bp; 1031 ufs_lbn_t lbn; 1032 off_t osize; 1033 int blkoffset, error, flags, resid, size, xfersize; 1034 1035 ip = VTOI(vp); 1036 fs = ip->i_fs; 1037 dp = ip->i_din2; 1038 1039 KASSERT(!(ip->i_flag & IN_SPACECOUNTED), ("inode %u: inode is dead", 1040 ip->i_number)); 1041 1042 #ifdef INVARIANTS 1043 if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC) 1044 panic("ffs_extwrite: mode"); 1045 #endif 1046 1047 if (ioflag & IO_APPEND) 1048 uio->uio_offset = dp->di_extsize; 1049 KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0")); 1050 KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0")); 1051 if ((uoff_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize) 1052 return (EFBIG); 1053 1054 resid = uio->uio_resid; 1055 osize = dp->di_extsize; 1056 flags = IO_EXT; 1057 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 1058 flags |= IO_SYNC; 1059 1060 for (error = 0; uio->uio_resid > 0;) { 1061 lbn = lblkno(fs, uio->uio_offset); 1062 blkoffset = blkoff(fs, uio->uio_offset); 1063 xfersize = fs->fs_bsize - blkoffset; 1064 if (uio->uio_resid < xfersize) 1065 xfersize = uio->uio_resid; 1066 1067 /* 1068 * We must perform a read-before-write if the transfer size 1069 * does not cover the entire buffer. 1070 */ 1071 if (fs->fs_bsize > xfersize) 1072 flags |= BA_CLRBUF; 1073 else 1074 flags &= ~BA_CLRBUF; 1075 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 1076 ucred, flags, &bp); 1077 if (error != 0) 1078 break; 1079 /* 1080 * If the buffer is not valid we have to clear out any 1081 * garbage data from the pages instantiated for the buffer. 1082 * If we do not, a failed uiomove() during a write can leave 1083 * the prior contents of the pages exposed to a userland 1084 * mmap(). XXX deal with uiomove() errors a better way. 1085 */ 1086 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 1087 vfs_bio_clrbuf(bp); 1088 if (ioflag & IO_DIRECT) 1089 bp->b_flags |= B_DIRECT; 1090 1091 if (uio->uio_offset + xfersize > dp->di_extsize) 1092 dp->di_extsize = uio->uio_offset + xfersize; 1093 1094 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid; 1095 if (size < xfersize) 1096 xfersize = size; 1097 1098 error = 1099 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 1100 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1101 (LIST_EMPTY(&bp->b_dep))) { 1102 bp->b_flags |= B_RELBUF; 1103 } 1104 1105 /* 1106 * If IO_SYNC each buffer is written synchronously. Otherwise 1107 * if we have a severe page deficiency write the buffer 1108 * asynchronously. Otherwise try to cluster, and if that 1109 * doesn't do it then either do an async write (if O_DIRECT), 1110 * or a delayed write (if not). 1111 */ 1112 if (ioflag & IO_SYNC) { 1113 (void)bwrite(bp); 1114 } else if (vm_page_count_severe() || 1115 buf_dirty_count_severe() || 1116 xfersize + blkoffset == fs->fs_bsize || 1117 (ioflag & (IO_ASYNC | IO_DIRECT))) 1118 bawrite(bp); 1119 else 1120 bdwrite(bp); 1121 if (error || xfersize == 0) 1122 break; 1123 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1124 } 1125 /* 1126 * If we successfully wrote any data, and we are not the superuser 1127 * we clear the setuid and setgid bits as a precaution against 1128 * tampering. 1129 */ 1130 if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) { 1131 if (priv_check_cred(ucred, PRIV_VFS_RETAINSUGID, 0)) { 1132 ip->i_mode &= ~(ISUID | ISGID); 1133 dp->di_mode = ip->i_mode; 1134 } 1135 } 1136 if (error) { 1137 if (ioflag & IO_UNIT) { 1138 (void)ffs_truncate(vp, osize, 1139 IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td); 1140 uio->uio_offset -= resid - uio->uio_resid; 1141 uio->uio_resid = resid; 1142 } 1143 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 1144 error = ffs_update(vp, 1); 1145 return (error); 1146 } 1147 1148 1149 /* 1150 * Vnode operating to retrieve a named extended attribute. 1151 * 1152 * Locate a particular EA (nspace:name) in the area (ptr:length), and return 1153 * the length of the EA, and possibly the pointer to the entry and to the data. 1154 */ 1155 static int 1156 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name, u_char **eap, u_char **eac) 1157 { 1158 u_char *p, *pe, *pn, *p0; 1159 int eapad1, eapad2, ealength, ealen, nlen; 1160 uint32_t ul; 1161 1162 pe = ptr + length; 1163 nlen = strlen(name); 1164 1165 for (p = ptr; p < pe; p = pn) { 1166 p0 = p; 1167 bcopy(p, &ul, sizeof(ul)); 1168 pn = p + ul; 1169 /* make sure this entry is complete */ 1170 if (pn > pe) 1171 break; 1172 p += sizeof(uint32_t); 1173 if (*p != nspace) 1174 continue; 1175 p++; 1176 eapad2 = *p++; 1177 if (*p != nlen) 1178 continue; 1179 p++; 1180 if (bcmp(p, name, nlen)) 1181 continue; 1182 ealength = sizeof(uint32_t) + 3 + nlen; 1183 eapad1 = 8 - (ealength % 8); 1184 if (eapad1 == 8) 1185 eapad1 = 0; 1186 ealength += eapad1; 1187 ealen = ul - ealength - eapad2; 1188 p += nlen + eapad1; 1189 if (eap != NULL) 1190 *eap = p0; 1191 if (eac != NULL) 1192 *eac = p; 1193 return (ealen); 1194 } 1195 return(-1); 1196 } 1197 1198 static int 1199 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra) 1200 { 1201 struct inode *ip; 1202 struct ufs2_dinode *dp; 1203 struct fs *fs; 1204 struct uio luio; 1205 struct iovec liovec; 1206 int easize, error; 1207 u_char *eae; 1208 1209 ip = VTOI(vp); 1210 fs = ip->i_fs; 1211 dp = ip->i_din2; 1212 easize = dp->di_extsize; 1213 if ((uoff_t)easize + extra > NXADDR * fs->fs_bsize) 1214 return (EFBIG); 1215 1216 eae = malloc(easize + extra, M_TEMP, M_WAITOK); 1217 1218 liovec.iov_base = eae; 1219 liovec.iov_len = easize; 1220 luio.uio_iov = &liovec; 1221 luio.uio_iovcnt = 1; 1222 luio.uio_offset = 0; 1223 luio.uio_resid = easize; 1224 luio.uio_segflg = UIO_SYSSPACE; 1225 luio.uio_rw = UIO_READ; 1226 luio.uio_td = td; 1227 1228 error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC); 1229 if (error) { 1230 free(eae, M_TEMP); 1231 return(error); 1232 } 1233 *p = eae; 1234 return (0); 1235 } 1236 1237 static int 1238 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td) 1239 { 1240 struct inode *ip; 1241 struct ufs2_dinode *dp; 1242 int error; 1243 1244 ip = VTOI(vp); 1245 1246 if (ip->i_ea_area != NULL) 1247 return (EBUSY); 1248 dp = ip->i_din2; 1249 error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0); 1250 if (error) 1251 return (error); 1252 ip->i_ea_len = dp->di_extsize; 1253 ip->i_ea_error = 0; 1254 return (0); 1255 } 1256 1257 /* 1258 * Vnode extattr transaction commit/abort 1259 */ 1260 static int 1261 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td) 1262 { 1263 struct inode *ip; 1264 struct uio luio; 1265 struct iovec liovec; 1266 int error; 1267 struct ufs2_dinode *dp; 1268 1269 ip = VTOI(vp); 1270 if (ip->i_ea_area == NULL) 1271 return (EINVAL); 1272 dp = ip->i_din2; 1273 error = ip->i_ea_error; 1274 if (commit && error == 0) { 1275 if (cred == NOCRED) 1276 cred = vp->v_mount->mnt_cred; 1277 liovec.iov_base = ip->i_ea_area; 1278 liovec.iov_len = ip->i_ea_len; 1279 luio.uio_iov = &liovec; 1280 luio.uio_iovcnt = 1; 1281 luio.uio_offset = 0; 1282 luio.uio_resid = ip->i_ea_len; 1283 luio.uio_segflg = UIO_SYSSPACE; 1284 luio.uio_rw = UIO_WRITE; 1285 luio.uio_td = td; 1286 /* XXX: I'm not happy about truncating to zero size */ 1287 if (ip->i_ea_len < dp->di_extsize) 1288 error = ffs_truncate(vp, 0, IO_EXT, cred, td); 1289 error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred); 1290 } 1291 free(ip->i_ea_area, M_TEMP); 1292 ip->i_ea_area = NULL; 1293 ip->i_ea_len = 0; 1294 ip->i_ea_error = 0; 1295 return (error); 1296 } 1297 1298 /* 1299 * Vnode extattr strategy routine for fifos. 1300 * 1301 * We need to check for a read or write of the external attributes. 1302 * Otherwise we just fall through and do the usual thing. 1303 */ 1304 static int 1305 ffsext_strategy(struct vop_strategy_args *ap) 1306 /* 1307 struct vop_strategy_args { 1308 struct vnodeop_desc *a_desc; 1309 struct vnode *a_vp; 1310 struct buf *a_bp; 1311 }; 1312 */ 1313 { 1314 struct vnode *vp; 1315 daddr_t lbn; 1316 1317 vp = ap->a_vp; 1318 lbn = ap->a_bp->b_lblkno; 1319 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC && 1320 lbn < 0 && lbn >= -NXADDR) 1321 return (VOP_STRATEGY_APV(&ufs_vnodeops, ap)); 1322 if (vp->v_type == VFIFO) 1323 return (VOP_STRATEGY_APV(&ufs_fifoops, ap)); 1324 panic("spec nodes went here"); 1325 } 1326 1327 /* 1328 * Vnode extattr transaction commit/abort 1329 */ 1330 static int 1331 ffs_openextattr(struct vop_openextattr_args *ap) 1332 /* 1333 struct vop_openextattr_args { 1334 struct vnodeop_desc *a_desc; 1335 struct vnode *a_vp; 1336 IN struct ucred *a_cred; 1337 IN struct thread *a_td; 1338 }; 1339 */ 1340 { 1341 struct inode *ip; 1342 struct fs *fs; 1343 1344 ip = VTOI(ap->a_vp); 1345 fs = ip->i_fs; 1346 1347 if (ap->a_vp->v_type == VCHR) 1348 return (EOPNOTSUPP); 1349 1350 return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td)); 1351 } 1352 1353 1354 /* 1355 * Vnode extattr transaction commit/abort 1356 */ 1357 static int 1358 ffs_closeextattr(struct vop_closeextattr_args *ap) 1359 /* 1360 struct vop_closeextattr_args { 1361 struct vnodeop_desc *a_desc; 1362 struct vnode *a_vp; 1363 int a_commit; 1364 IN struct ucred *a_cred; 1365 IN struct thread *a_td; 1366 }; 1367 */ 1368 { 1369 struct inode *ip; 1370 struct fs *fs; 1371 1372 ip = VTOI(ap->a_vp); 1373 fs = ip->i_fs; 1374 1375 if (ap->a_vp->v_type == VCHR) 1376 return (EOPNOTSUPP); 1377 1378 if (ap->a_commit && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) 1379 return (EROFS); 1380 1381 return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td)); 1382 } 1383 1384 /* 1385 * Vnode operation to remove a named attribute. 1386 */ 1387 static int 1388 ffs_deleteextattr(struct vop_deleteextattr_args *ap) 1389 /* 1390 vop_deleteextattr { 1391 IN struct vnode *a_vp; 1392 IN int a_attrnamespace; 1393 IN const char *a_name; 1394 IN struct ucred *a_cred; 1395 IN struct thread *a_td; 1396 }; 1397 */ 1398 { 1399 struct inode *ip; 1400 struct fs *fs; 1401 uint32_t ealength, ul; 1402 int ealen, olen, eapad1, eapad2, error, i, easize; 1403 u_char *eae, *p; 1404 int stand_alone; 1405 1406 ip = VTOI(ap->a_vp); 1407 fs = ip->i_fs; 1408 1409 if (ap->a_vp->v_type == VCHR) 1410 return (EOPNOTSUPP); 1411 1412 if (strlen(ap->a_name) == 0) 1413 return (EINVAL); 1414 1415 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) 1416 return (EROFS); 1417 1418 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1419 ap->a_cred, ap->a_td, IWRITE); 1420 if (error) { 1421 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1422 ip->i_ea_error = error; 1423 return (error); 1424 } 1425 1426 if (ip->i_ea_area == NULL) { 1427 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1428 if (error) 1429 return (error); 1430 stand_alone = 1; 1431 } else { 1432 stand_alone = 0; 1433 } 1434 1435 ealength = eapad1 = ealen = eapad2 = 0; 1436 1437 eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK); 1438 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1439 easize = ip->i_ea_len; 1440 1441 olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1442 &p, NULL); 1443 if (olen == -1) { 1444 /* delete but nonexistent */ 1445 free(eae, M_TEMP); 1446 if (stand_alone) 1447 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1448 return(ENOATTR); 1449 } 1450 bcopy(p, &ul, sizeof ul); 1451 i = p - eae + ul; 1452 if (ul != ealength) { 1453 bcopy(p + ul, p + ealength, easize - i); 1454 easize += (ealength - ul); 1455 } 1456 if (easize > NXADDR * fs->fs_bsize) { 1457 free(eae, M_TEMP); 1458 if (stand_alone) 1459 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1460 else if (ip->i_ea_error == 0) 1461 ip->i_ea_error = ENOSPC; 1462 return(ENOSPC); 1463 } 1464 p = ip->i_ea_area; 1465 ip->i_ea_area = eae; 1466 ip->i_ea_len = easize; 1467 free(p, M_TEMP); 1468 if (stand_alone) 1469 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1470 return(error); 1471 } 1472 1473 /* 1474 * Vnode operation to retrieve a named extended attribute. 1475 */ 1476 static int 1477 ffs_getextattr(struct vop_getextattr_args *ap) 1478 /* 1479 vop_getextattr { 1480 IN struct vnode *a_vp; 1481 IN int a_attrnamespace; 1482 IN const char *a_name; 1483 INOUT struct uio *a_uio; 1484 OUT size_t *a_size; 1485 IN struct ucred *a_cred; 1486 IN struct thread *a_td; 1487 }; 1488 */ 1489 { 1490 struct inode *ip; 1491 struct fs *fs; 1492 u_char *eae, *p; 1493 unsigned easize; 1494 int error, ealen, stand_alone; 1495 1496 ip = VTOI(ap->a_vp); 1497 fs = ip->i_fs; 1498 1499 if (ap->a_vp->v_type == VCHR) 1500 return (EOPNOTSUPP); 1501 1502 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1503 ap->a_cred, ap->a_td, IREAD); 1504 if (error) 1505 return (error); 1506 1507 if (ip->i_ea_area == NULL) { 1508 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1509 if (error) 1510 return (error); 1511 stand_alone = 1; 1512 } else { 1513 stand_alone = 0; 1514 } 1515 eae = ip->i_ea_area; 1516 easize = ip->i_ea_len; 1517 1518 ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1519 NULL, &p); 1520 if (ealen >= 0) { 1521 error = 0; 1522 if (ap->a_size != NULL) 1523 *ap->a_size = ealen; 1524 else if (ap->a_uio != NULL) 1525 error = uiomove(p, ealen, ap->a_uio); 1526 } else 1527 error = ENOATTR; 1528 if (stand_alone) 1529 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1530 return(error); 1531 } 1532 1533 /* 1534 * Vnode operation to retrieve extended attributes on a vnode. 1535 */ 1536 static int 1537 ffs_listextattr(struct vop_listextattr_args *ap) 1538 /* 1539 vop_listextattr { 1540 IN struct vnode *a_vp; 1541 IN int a_attrnamespace; 1542 INOUT struct uio *a_uio; 1543 OUT size_t *a_size; 1544 IN struct ucred *a_cred; 1545 IN struct thread *a_td; 1546 }; 1547 */ 1548 { 1549 struct inode *ip; 1550 struct fs *fs; 1551 u_char *eae, *p, *pe, *pn; 1552 unsigned easize; 1553 uint32_t ul; 1554 int error, ealen, stand_alone; 1555 1556 ip = VTOI(ap->a_vp); 1557 fs = ip->i_fs; 1558 1559 if (ap->a_vp->v_type == VCHR) 1560 return (EOPNOTSUPP); 1561 1562 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1563 ap->a_cred, ap->a_td, IREAD); 1564 if (error) 1565 return (error); 1566 1567 if (ip->i_ea_area == NULL) { 1568 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1569 if (error) 1570 return (error); 1571 stand_alone = 1; 1572 } else { 1573 stand_alone = 0; 1574 } 1575 eae = ip->i_ea_area; 1576 easize = ip->i_ea_len; 1577 1578 error = 0; 1579 if (ap->a_size != NULL) 1580 *ap->a_size = 0; 1581 pe = eae + easize; 1582 for(p = eae; error == 0 && p < pe; p = pn) { 1583 bcopy(p, &ul, sizeof(ul)); 1584 pn = p + ul; 1585 if (pn > pe) 1586 break; 1587 p += sizeof(ul); 1588 if (*p++ != ap->a_attrnamespace) 1589 continue; 1590 p++; /* pad2 */ 1591 ealen = *p; 1592 if (ap->a_size != NULL) { 1593 *ap->a_size += ealen + 1; 1594 } else if (ap->a_uio != NULL) { 1595 error = uiomove(p, ealen + 1, ap->a_uio); 1596 } 1597 } 1598 if (stand_alone) 1599 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1600 return(error); 1601 } 1602 1603 /* 1604 * Vnode operation to set a named attribute. 1605 */ 1606 static int 1607 ffs_setextattr(struct vop_setextattr_args *ap) 1608 /* 1609 vop_setextattr { 1610 IN struct vnode *a_vp; 1611 IN int a_attrnamespace; 1612 IN const char *a_name; 1613 INOUT struct uio *a_uio; 1614 IN struct ucred *a_cred; 1615 IN struct thread *a_td; 1616 }; 1617 */ 1618 { 1619 struct inode *ip; 1620 struct fs *fs; 1621 uint32_t ealength, ul; 1622 int ealen, olen, eapad1, eapad2, error, i, easize; 1623 u_char *eae, *p; 1624 int stand_alone; 1625 1626 ip = VTOI(ap->a_vp); 1627 fs = ip->i_fs; 1628 1629 if (ap->a_vp->v_type == VCHR) 1630 return (EOPNOTSUPP); 1631 1632 if (strlen(ap->a_name) == 0) 1633 return (EINVAL); 1634 1635 /* XXX Now unsupported API to delete EAs using NULL uio. */ 1636 if (ap->a_uio == NULL) 1637 return (EOPNOTSUPP); 1638 1639 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) 1640 return (EROFS); 1641 1642 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1643 ap->a_cred, ap->a_td, IWRITE); 1644 if (error) { 1645 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1646 ip->i_ea_error = error; 1647 return (error); 1648 } 1649 1650 if (ip->i_ea_area == NULL) { 1651 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1652 if (error) 1653 return (error); 1654 stand_alone = 1; 1655 } else { 1656 stand_alone = 0; 1657 } 1658 1659 ealen = ap->a_uio->uio_resid; 1660 ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name); 1661 eapad1 = 8 - (ealength % 8); 1662 if (eapad1 == 8) 1663 eapad1 = 0; 1664 eapad2 = 8 - (ealen % 8); 1665 if (eapad2 == 8) 1666 eapad2 = 0; 1667 ealength += eapad1 + ealen + eapad2; 1668 1669 eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK); 1670 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1671 easize = ip->i_ea_len; 1672 1673 olen = ffs_findextattr(eae, easize, 1674 ap->a_attrnamespace, ap->a_name, &p, NULL); 1675 if (olen == -1) { 1676 /* new, append at end */ 1677 p = eae + easize; 1678 easize += ealength; 1679 } else { 1680 bcopy(p, &ul, sizeof ul); 1681 i = p - eae + ul; 1682 if (ul != ealength) { 1683 bcopy(p + ul, p + ealength, easize - i); 1684 easize += (ealength - ul); 1685 } 1686 } 1687 if (easize > NXADDR * fs->fs_bsize) { 1688 free(eae, M_TEMP); 1689 if (stand_alone) 1690 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1691 else if (ip->i_ea_error == 0) 1692 ip->i_ea_error = ENOSPC; 1693 return(ENOSPC); 1694 } 1695 bcopy(&ealength, p, sizeof(ealength)); 1696 p += sizeof(ealength); 1697 *p++ = ap->a_attrnamespace; 1698 *p++ = eapad2; 1699 *p++ = strlen(ap->a_name); 1700 strcpy(p, ap->a_name); 1701 p += strlen(ap->a_name); 1702 bzero(p, eapad1); 1703 p += eapad1; 1704 error = uiomove(p, ealen, ap->a_uio); 1705 if (error) { 1706 free(eae, M_TEMP); 1707 if (stand_alone) 1708 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1709 else if (ip->i_ea_error == 0) 1710 ip->i_ea_error = error; 1711 return(error); 1712 } 1713 p += ealen; 1714 bzero(p, eapad2); 1715 1716 p = ip->i_ea_area; 1717 ip->i_ea_area = eae; 1718 ip->i_ea_len = easize; 1719 free(p, M_TEMP); 1720 if (stand_alone) 1721 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1722 return(error); 1723 } 1724 1725 /* 1726 * Vnode pointer to File handle 1727 */ 1728 static int 1729 ffs_vptofh(struct vop_vptofh_args *ap) 1730 /* 1731 vop_vptofh { 1732 IN struct vnode *a_vp; 1733 IN struct fid *a_fhp; 1734 }; 1735 */ 1736 { 1737 struct inode *ip; 1738 struct ufid *ufhp; 1739 1740 ip = VTOI(ap->a_vp); 1741 ufhp = (struct ufid *)ap->a_fhp; 1742 ufhp->ufid_len = sizeof(struct ufid); 1743 ufhp->ufid_ino = ip->i_number; 1744 ufhp->ufid_gen = ip->i_gen; 1745 return (0); 1746 } 1747