1 /*- 2 * Copyright (c) 2002, 2003 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * from: @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 60 * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ... 61 * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __FBSDID("$FreeBSD$"); 66 67 #include <sys/param.h> 68 #include <sys/bio.h> 69 #include <sys/systm.h> 70 #include <sys/buf.h> 71 #include <sys/conf.h> 72 #include <sys/extattr.h> 73 #include <sys/kernel.h> 74 #include <sys/limits.h> 75 #include <sys/malloc.h> 76 #include <sys/mount.h> 77 #include <sys/proc.h> 78 #include <sys/resourcevar.h> 79 #include <sys/signalvar.h> 80 #include <sys/stat.h> 81 #include <sys/vmmeter.h> 82 #include <sys/vnode.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_extern.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/vm_pager.h> 89 #include <vm/vnode_pager.h> 90 91 #include <ufs/ufs/extattr.h> 92 #include <ufs/ufs/quota.h> 93 #include <ufs/ufs/inode.h> 94 #include <ufs/ufs/ufs_extern.h> 95 #include <ufs/ufs/ufsmount.h> 96 97 #include <ufs/ffs/fs.h> 98 #include <ufs/ffs/ffs_extern.h> 99 #include "opt_directio.h" 100 #include "opt_ffs.h" 101 102 #ifdef DIRECTIO 103 extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone); 104 #endif 105 static vop_fsync_t ffs_fsync; 106 static vop_lock_t ffs_lock; 107 static vop_getpages_t ffs_getpages; 108 static vop_read_t ffs_read; 109 static vop_write_t ffs_write; 110 static int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag); 111 static int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, 112 struct ucred *cred); 113 static vop_strategy_t ffsext_strategy; 114 static vop_closeextattr_t ffs_closeextattr; 115 static vop_deleteextattr_t ffs_deleteextattr; 116 static vop_getextattr_t ffs_getextattr; 117 static vop_listextattr_t ffs_listextattr; 118 static vop_openextattr_t ffs_openextattr; 119 static vop_setextattr_t ffs_setextattr; 120 121 122 /* Global vfs data structures for ufs. */ 123 struct vop_vector ffs_vnodeops1 = { 124 .vop_default = &ufs_vnodeops, 125 .vop_fsync = ffs_fsync, 126 .vop_getpages = ffs_getpages, 127 .vop_lock = ffs_lock, 128 .vop_read = ffs_read, 129 .vop_reallocblks = ffs_reallocblks, 130 .vop_write = ffs_write, 131 }; 132 133 struct vop_vector ffs_fifoops1 = { 134 .vop_default = &ufs_fifoops, 135 .vop_fsync = ffs_fsync, 136 .vop_reallocblks = ffs_reallocblks, /* XXX: really ??? */ 137 }; 138 139 /* Global vfs data structures for ufs. */ 140 struct vop_vector ffs_vnodeops2 = { 141 .vop_default = &ufs_vnodeops, 142 .vop_fsync = ffs_fsync, 143 .vop_getpages = ffs_getpages, 144 .vop_lock = ffs_lock, 145 .vop_read = ffs_read, 146 .vop_reallocblks = ffs_reallocblks, 147 .vop_write = ffs_write, 148 .vop_closeextattr = ffs_closeextattr, 149 .vop_deleteextattr = ffs_deleteextattr, 150 .vop_getextattr = ffs_getextattr, 151 .vop_listextattr = ffs_listextattr, 152 .vop_openextattr = ffs_openextattr, 153 .vop_setextattr = ffs_setextattr, 154 }; 155 156 struct vop_vector ffs_fifoops2 = { 157 .vop_default = &ufs_fifoops, 158 .vop_fsync = ffs_fsync, 159 .vop_lock = ffs_lock, 160 .vop_reallocblks = ffs_reallocblks, 161 .vop_strategy = ffsext_strategy, 162 .vop_closeextattr = ffs_closeextattr, 163 .vop_deleteextattr = ffs_deleteextattr, 164 .vop_getextattr = ffs_getextattr, 165 .vop_listextattr = ffs_listextattr, 166 .vop_openextattr = ffs_openextattr, 167 .vop_setextattr = ffs_setextattr, 168 }; 169 170 /* 171 * Synch an open file. 172 */ 173 /* ARGSUSED */ 174 static int 175 ffs_fsync(struct vop_fsync_args *ap) 176 { 177 int error; 178 179 error = ffs_syncvnode(ap->a_vp, ap->a_waitfor); 180 if (error) 181 return (error); 182 if (ap->a_waitfor == MNT_WAIT && 183 (ap->a_vp->v_mount->mnt_flag & MNT_SOFTDEP)) 184 error = softdep_fsync(ap->a_vp); 185 return (error); 186 } 187 188 int 189 ffs_syncvnode(struct vnode *vp, int waitfor) 190 { 191 struct inode *ip = VTOI(vp); 192 struct buf *bp; 193 struct buf *nbp; 194 int s, error, wait, passes, skipmeta; 195 ufs_lbn_t lbn; 196 197 wait = (waitfor == MNT_WAIT); 198 lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); 199 200 /* 201 * Flush all dirty buffers associated with a vnode. 202 */ 203 passes = NIADDR + 1; 204 skipmeta = 0; 205 if (wait) 206 skipmeta = 1; 207 s = splbio(); 208 VI_LOCK(vp); 209 loop: 210 TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) 211 bp->b_vflags &= ~BV_SCANNED; 212 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { 213 /* 214 * Reasons to skip this buffer: it has already been considered 215 * on this pass, this pass is the first time through on a 216 * synchronous flush request and the buffer being considered 217 * is metadata, the buffer has dependencies that will cause 218 * it to be redirtied and it has not already been deferred, 219 * or it is already being written. 220 */ 221 if ((bp->b_vflags & BV_SCANNED) != 0) 222 continue; 223 bp->b_vflags |= BV_SCANNED; 224 if ((skipmeta == 1 && bp->b_lblkno < 0)) 225 continue; 226 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) 227 continue; 228 VI_UNLOCK(vp); 229 if (!wait && LIST_FIRST(&bp->b_dep) != NULL && 230 (bp->b_flags & B_DEFERRED) == 0 && 231 buf_countdeps(bp, 0)) { 232 bp->b_flags |= B_DEFERRED; 233 BUF_UNLOCK(bp); 234 VI_LOCK(vp); 235 continue; 236 } 237 if ((bp->b_flags & B_DELWRI) == 0) 238 panic("ffs_fsync: not dirty"); 239 /* 240 * If this is a synchronous flush request, or it is not a 241 * file or device, start the write on this buffer immediatly. 242 */ 243 if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) { 244 245 /* 246 * On our final pass through, do all I/O synchronously 247 * so that we can find out if our flush is failing 248 * because of write errors. 249 */ 250 if (passes > 0 || !wait) { 251 if ((bp->b_flags & B_CLUSTEROK) && !wait) { 252 (void) vfs_bio_awrite(bp); 253 } else { 254 bremfree(bp); 255 splx(s); 256 (void) bawrite(bp); 257 s = splbio(); 258 } 259 } else { 260 bremfree(bp); 261 splx(s); 262 if ((error = bwrite(bp)) != 0) 263 return (error); 264 s = splbio(); 265 } 266 } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) { 267 /* 268 * If the buffer is for data that has been truncated 269 * off the file, then throw it away. 270 */ 271 bremfree(bp); 272 bp->b_flags |= B_INVAL | B_NOCACHE; 273 splx(s); 274 brelse(bp); 275 s = splbio(); 276 } else 277 vfs_bio_awrite(bp); 278 279 /* 280 * Since we may have slept during the I/O, we need 281 * to start from a known point. 282 */ 283 VI_LOCK(vp); 284 nbp = TAILQ_FIRST(&vp->v_bufobj.bo_dirty.bv_hd); 285 } 286 /* 287 * If we were asked to do this synchronously, then go back for 288 * another pass, this time doing the metadata. 289 */ 290 if (skipmeta) { 291 skipmeta = 0; 292 goto loop; 293 } 294 295 if (wait) { 296 bufobj_wwait(&vp->v_bufobj, 3, 0); 297 VI_UNLOCK(vp); 298 299 /* 300 * Ensure that any filesystem metatdata associated 301 * with the vnode has been written. 302 */ 303 splx(s); 304 if ((error = softdep_sync_metadata(vp)) != 0) 305 return (error); 306 s = splbio(); 307 308 VI_LOCK(vp); 309 if (vp->v_bufobj.bo_dirty.bv_cnt > 0) { 310 /* 311 * Block devices associated with filesystems may 312 * have new I/O requests posted for them even if 313 * the vnode is locked, so no amount of trying will 314 * get them clean. Thus we give block devices a 315 * good effort, then just give up. For all other file 316 * types, go around and try again until it is clean. 317 */ 318 if (passes > 0) { 319 passes -= 1; 320 goto loop; 321 } 322 #ifdef DIAGNOSTIC 323 if (!vn_isdisk(vp, NULL)) 324 vprint("ffs_fsync: dirty", vp); 325 #endif 326 } 327 } 328 VI_UNLOCK(vp); 329 splx(s); 330 return (ffs_update(vp, wait)); 331 } 332 333 /* 334 * Snapshots require all lock requests to be exclusive. 335 */ 336 static int 337 ffs_lock(ap) 338 struct vop_lock_args /* { 339 struct vnode *a_vp; 340 int a_flags; 341 struct thread *a_td; 342 } */ *ap; 343 { 344 struct vnode *vp = ap->a_vp; 345 346 if ((VTOI(vp)->i_flags & SF_SNAPSHOT) && 347 ((ap->a_flags & LK_TYPE_MASK) == LK_SHARED)) { 348 ap->a_flags &= ~LK_TYPE_MASK; 349 ap->a_flags |= LK_EXCLUSIVE; 350 } 351 return (VOP_LOCK_APV(&ufs_vnodeops, ap)); 352 } 353 354 /* 355 * Vnode op for reading. 356 */ 357 /* ARGSUSED */ 358 static int 359 ffs_read(ap) 360 struct vop_read_args /* { 361 struct vnode *a_vp; 362 struct uio *a_uio; 363 int a_ioflag; 364 struct ucred *a_cred; 365 } */ *ap; 366 { 367 struct vnode *vp; 368 struct inode *ip; 369 struct uio *uio; 370 struct fs *fs; 371 struct buf *bp; 372 ufs_lbn_t lbn, nextlbn; 373 off_t bytesinfile; 374 long size, xfersize, blkoffset; 375 int error, orig_resid; 376 int seqcount; 377 int ioflag; 378 379 vp = ap->a_vp; 380 uio = ap->a_uio; 381 ioflag = ap->a_ioflag; 382 if (ap->a_ioflag & IO_EXT) 383 #ifdef notyet 384 return (ffs_extread(vp, uio, ioflag)); 385 #else 386 panic("ffs_read+IO_EXT"); 387 #endif 388 #ifdef DIRECTIO 389 if ((ioflag & IO_DIRECT) != 0) { 390 int workdone; 391 392 error = ffs_rawread(vp, uio, &workdone); 393 if (error != 0 || workdone != 0) 394 return error; 395 } 396 #endif 397 398 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 399 ip = VTOI(vp); 400 401 #ifdef DIAGNOSTIC 402 if (uio->uio_rw != UIO_READ) 403 panic("ffs_read: mode"); 404 405 if (vp->v_type == VLNK) { 406 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 407 panic("ffs_read: short symlink"); 408 } else if (vp->v_type != VREG && vp->v_type != VDIR) 409 panic("ffs_read: type %d", vp->v_type); 410 #endif 411 orig_resid = uio->uio_resid; 412 KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0")); 413 if (orig_resid == 0) 414 return (0); 415 KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0")); 416 fs = ip->i_fs; 417 if (uio->uio_offset < ip->i_size && 418 uio->uio_offset >= fs->fs_maxfilesize) 419 return (EOVERFLOW); 420 421 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 422 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 423 break; 424 lbn = lblkno(fs, uio->uio_offset); 425 nextlbn = lbn + 1; 426 427 /* 428 * size of buffer. The buffer representing the 429 * end of the file is rounded up to the size of 430 * the block type ( fragment or full block, 431 * depending ). 432 */ 433 size = blksize(fs, ip, lbn); 434 blkoffset = blkoff(fs, uio->uio_offset); 435 436 /* 437 * The amount we want to transfer in this iteration is 438 * one FS block less the amount of the data before 439 * our startpoint (duh!) 440 */ 441 xfersize = fs->fs_bsize - blkoffset; 442 443 /* 444 * But if we actually want less than the block, 445 * or the file doesn't have a whole block more of data, 446 * then use the lesser number. 447 */ 448 if (uio->uio_resid < xfersize) 449 xfersize = uio->uio_resid; 450 if (bytesinfile < xfersize) 451 xfersize = bytesinfile; 452 453 if (lblktosize(fs, nextlbn) >= ip->i_size) { 454 /* 455 * Don't do readahead if this is the end of the file. 456 */ 457 error = bread(vp, lbn, size, NOCRED, &bp); 458 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 459 /* 460 * Otherwise if we are allowed to cluster, 461 * grab as much as we can. 462 * 463 * XXX This may not be a win if we are not 464 * doing sequential access. 465 */ 466 error = cluster_read(vp, ip->i_size, lbn, 467 size, NOCRED, uio->uio_resid, seqcount, &bp); 468 } else if (seqcount > 1) { 469 /* 470 * If we are NOT allowed to cluster, then 471 * if we appear to be acting sequentially, 472 * fire off a request for a readahead 473 * as well as a read. Note that the 4th and 5th 474 * arguments point to arrays of the size specified in 475 * the 6th argument. 476 */ 477 int nextsize = blksize(fs, ip, nextlbn); 478 error = breadn(vp, lbn, 479 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 480 } else { 481 /* 482 * Failing all of the above, just read what the 483 * user asked for. Interestingly, the same as 484 * the first option above. 485 */ 486 error = bread(vp, lbn, size, NOCRED, &bp); 487 } 488 if (error) { 489 brelse(bp); 490 bp = NULL; 491 break; 492 } 493 494 /* 495 * If IO_DIRECT then set B_DIRECT for the buffer. This 496 * will cause us to attempt to release the buffer later on 497 * and will cause the buffer cache to attempt to free the 498 * underlying pages. 499 */ 500 if (ioflag & IO_DIRECT) 501 bp->b_flags |= B_DIRECT; 502 503 /* 504 * We should only get non-zero b_resid when an I/O error 505 * has occurred, which should cause us to break above. 506 * However, if the short read did not cause an error, 507 * then we want to ensure that we do not uiomove bad 508 * or uninitialized data. 509 */ 510 size -= bp->b_resid; 511 if (size < xfersize) { 512 if (size == 0) 513 break; 514 xfersize = size; 515 } 516 517 error = uiomove((char *)bp->b_data + blkoffset, 518 (int)xfersize, uio); 519 if (error) 520 break; 521 522 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 523 (LIST_FIRST(&bp->b_dep) == NULL)) { 524 /* 525 * If there are no dependencies, and it's VMIO, 526 * then we don't need the buf, mark it available 527 * for freeing. The VM has the data. 528 */ 529 bp->b_flags |= B_RELBUF; 530 brelse(bp); 531 } else { 532 /* 533 * Otherwise let whoever 534 * made the request take care of 535 * freeing it. We just queue 536 * it onto another list. 537 */ 538 bqrelse(bp); 539 } 540 } 541 542 /* 543 * This can only happen in the case of an error 544 * because the loop above resets bp to NULL on each iteration 545 * and on normal completion has not set a new value into it. 546 * so it must have come from a 'break' statement 547 */ 548 if (bp != NULL) { 549 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 550 (LIST_FIRST(&bp->b_dep) == NULL)) { 551 bp->b_flags |= B_RELBUF; 552 brelse(bp); 553 } else { 554 bqrelse(bp); 555 } 556 } 557 558 if ((error == 0 || uio->uio_resid != orig_resid) && 559 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 560 ip->i_flag |= IN_ACCESS; 561 return (error); 562 } 563 564 /* 565 * Vnode op for writing. 566 */ 567 static int 568 ffs_write(ap) 569 struct vop_write_args /* { 570 struct vnode *a_vp; 571 struct uio *a_uio; 572 int a_ioflag; 573 struct ucred *a_cred; 574 } */ *ap; 575 { 576 struct vnode *vp; 577 struct uio *uio; 578 struct inode *ip; 579 struct fs *fs; 580 struct buf *bp; 581 struct thread *td; 582 ufs_lbn_t lbn; 583 off_t osize; 584 int seqcount; 585 int blkoffset, error, extended, flags, ioflag, resid, size, xfersize; 586 587 vp = ap->a_vp; 588 uio = ap->a_uio; 589 ioflag = ap->a_ioflag; 590 if (ap->a_ioflag & IO_EXT) 591 #ifdef notyet 592 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred)); 593 #else 594 panic("ffs_write+IO_EXT"); 595 #endif 596 597 extended = 0; 598 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 599 ip = VTOI(vp); 600 601 #ifdef DIAGNOSTIC 602 if (uio->uio_rw != UIO_WRITE) 603 panic("ffs_write: mode"); 604 #endif 605 606 switch (vp->v_type) { 607 case VREG: 608 if (ioflag & IO_APPEND) 609 uio->uio_offset = ip->i_size; 610 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 611 return (EPERM); 612 /* FALLTHROUGH */ 613 case VLNK: 614 break; 615 case VDIR: 616 panic("ffs_write: dir write"); 617 break; 618 default: 619 panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type, 620 (int)uio->uio_offset, 621 (int)uio->uio_resid 622 ); 623 } 624 625 KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0")); 626 KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0")); 627 fs = ip->i_fs; 628 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) 629 return (EFBIG); 630 /* 631 * Maybe this should be above the vnode op call, but so long as 632 * file servers have no limits, I don't think it matters. 633 */ 634 td = uio->uio_td; 635 if (vp->v_type == VREG && td != NULL) { 636 PROC_LOCK(td->td_proc); 637 if (uio->uio_offset + uio->uio_resid > 638 lim_cur(td->td_proc, RLIMIT_FSIZE)) { 639 psignal(td->td_proc, SIGXFSZ); 640 PROC_UNLOCK(td->td_proc); 641 return (EFBIG); 642 } 643 PROC_UNLOCK(td->td_proc); 644 } 645 646 resid = uio->uio_resid; 647 osize = ip->i_size; 648 if (seqcount > BA_SEQMAX) 649 flags = BA_SEQMAX << BA_SEQSHIFT; 650 else 651 flags = seqcount << BA_SEQSHIFT; 652 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 653 flags |= IO_SYNC; 654 655 for (error = 0; uio->uio_resid > 0;) { 656 lbn = lblkno(fs, uio->uio_offset); 657 blkoffset = blkoff(fs, uio->uio_offset); 658 xfersize = fs->fs_bsize - blkoffset; 659 if (uio->uio_resid < xfersize) 660 xfersize = uio->uio_resid; 661 if (uio->uio_offset + xfersize > ip->i_size) 662 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 663 664 /* 665 * We must perform a read-before-write if the transfer size 666 * does not cover the entire buffer. 667 */ 668 if (fs->fs_bsize > xfersize) 669 flags |= BA_CLRBUF; 670 else 671 flags &= ~BA_CLRBUF; 672 /* XXX is uio->uio_offset the right thing here? */ 673 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 674 ap->a_cred, flags, &bp); 675 if (error != 0) 676 break; 677 /* 678 * If the buffer is not valid we have to clear out any 679 * garbage data from the pages instantiated for the buffer. 680 * If we do not, a failed uiomove() during a write can leave 681 * the prior contents of the pages exposed to a userland 682 * mmap(). XXX deal with uiomove() errors a better way. 683 */ 684 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 685 vfs_bio_clrbuf(bp); 686 if (ioflag & IO_DIRECT) 687 bp->b_flags |= B_DIRECT; 688 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL)) 689 bp->b_flags |= B_NOCACHE; 690 691 if (uio->uio_offset + xfersize > ip->i_size) { 692 ip->i_size = uio->uio_offset + xfersize; 693 DIP_SET(ip, i_size, ip->i_size); 694 extended = 1; 695 } 696 697 size = blksize(fs, ip, lbn) - bp->b_resid; 698 if (size < xfersize) 699 xfersize = size; 700 701 error = 702 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 703 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 704 (LIST_FIRST(&bp->b_dep) == NULL)) { 705 bp->b_flags |= B_RELBUF; 706 } 707 708 /* 709 * If IO_SYNC each buffer is written synchronously. Otherwise 710 * if we have a severe page deficiency write the buffer 711 * asynchronously. Otherwise try to cluster, and if that 712 * doesn't do it then either do an async write (if O_DIRECT), 713 * or a delayed write (if not). 714 */ 715 if (ioflag & IO_SYNC) { 716 (void)bwrite(bp); 717 } else if (vm_page_count_severe() || 718 buf_dirty_count_severe() || 719 (ioflag & IO_ASYNC)) { 720 bp->b_flags |= B_CLUSTEROK; 721 bawrite(bp); 722 } else if (xfersize + blkoffset == fs->fs_bsize) { 723 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 724 bp->b_flags |= B_CLUSTEROK; 725 cluster_write(vp, bp, ip->i_size, seqcount); 726 } else { 727 bawrite(bp); 728 } 729 } else if (ioflag & IO_DIRECT) { 730 bp->b_flags |= B_CLUSTEROK; 731 bawrite(bp); 732 } else { 733 bp->b_flags |= B_CLUSTEROK; 734 bdwrite(bp); 735 } 736 if (error || xfersize == 0) 737 break; 738 ip->i_flag |= IN_CHANGE | IN_UPDATE; 739 } 740 /* 741 * If we successfully wrote any data, and we are not the superuser 742 * we clear the setuid and setgid bits as a precaution against 743 * tampering. 744 */ 745 if (resid > uio->uio_resid && ap->a_cred && 746 suser_cred(ap->a_cred, SUSER_ALLOWJAIL)) { 747 ip->i_mode &= ~(ISUID | ISGID); 748 DIP_SET(ip, i_mode, ip->i_mode); 749 } 750 if (resid > uio->uio_resid) 751 VN_KNOTE_UNLOCKED(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); 752 if (error) { 753 if (ioflag & IO_UNIT) { 754 (void)ffs_truncate(vp, osize, 755 IO_NORMAL | (ioflag & IO_SYNC), 756 ap->a_cred, uio->uio_td); 757 uio->uio_offset -= resid - uio->uio_resid; 758 uio->uio_resid = resid; 759 } 760 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 761 error = ffs_update(vp, 1); 762 return (error); 763 } 764 765 /* 766 * get page routine 767 */ 768 static int 769 ffs_getpages(ap) 770 struct vop_getpages_args *ap; 771 { 772 int i; 773 vm_page_t mreq; 774 int pcount; 775 776 pcount = round_page(ap->a_count) / PAGE_SIZE; 777 mreq = ap->a_m[ap->a_reqpage]; 778 779 /* 780 * if ANY DEV_BSIZE blocks are valid on a large filesystem block, 781 * then the entire page is valid. Since the page may be mapped, 782 * user programs might reference data beyond the actual end of file 783 * occuring within the page. We have to zero that data. 784 */ 785 VM_OBJECT_LOCK(mreq->object); 786 if (mreq->valid) { 787 if (mreq->valid != VM_PAGE_BITS_ALL) 788 vm_page_zero_invalid(mreq, TRUE); 789 vm_page_lock_queues(); 790 for (i = 0; i < pcount; i++) { 791 if (i != ap->a_reqpage) { 792 vm_page_free(ap->a_m[i]); 793 } 794 } 795 vm_page_unlock_queues(); 796 VM_OBJECT_UNLOCK(mreq->object); 797 return VM_PAGER_OK; 798 } 799 VM_OBJECT_UNLOCK(mreq->object); 800 801 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 802 ap->a_count, 803 ap->a_reqpage); 804 } 805 806 807 /* 808 * Extended attribute area reading. 809 */ 810 static int 811 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag) 812 { 813 struct inode *ip; 814 struct ufs2_dinode *dp; 815 struct fs *fs; 816 struct buf *bp; 817 ufs_lbn_t lbn, nextlbn; 818 off_t bytesinfile; 819 long size, xfersize, blkoffset; 820 int error, orig_resid; 821 822 ip = VTOI(vp); 823 fs = ip->i_fs; 824 dp = ip->i_din2; 825 826 #ifdef DIAGNOSTIC 827 if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC) 828 panic("ffs_extread: mode"); 829 830 #endif 831 orig_resid = uio->uio_resid; 832 KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0")); 833 if (orig_resid == 0) 834 return (0); 835 KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0")); 836 837 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 838 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0) 839 break; 840 lbn = lblkno(fs, uio->uio_offset); 841 nextlbn = lbn + 1; 842 843 /* 844 * size of buffer. The buffer representing the 845 * end of the file is rounded up to the size of 846 * the block type ( fragment or full block, 847 * depending ). 848 */ 849 size = sblksize(fs, dp->di_extsize, lbn); 850 blkoffset = blkoff(fs, uio->uio_offset); 851 852 /* 853 * The amount we want to transfer in this iteration is 854 * one FS block less the amount of the data before 855 * our startpoint (duh!) 856 */ 857 xfersize = fs->fs_bsize - blkoffset; 858 859 /* 860 * But if we actually want less than the block, 861 * or the file doesn't have a whole block more of data, 862 * then use the lesser number. 863 */ 864 if (uio->uio_resid < xfersize) 865 xfersize = uio->uio_resid; 866 if (bytesinfile < xfersize) 867 xfersize = bytesinfile; 868 869 if (lblktosize(fs, nextlbn) >= dp->di_extsize) { 870 /* 871 * Don't do readahead if this is the end of the info. 872 */ 873 error = bread(vp, -1 - lbn, size, NOCRED, &bp); 874 } else { 875 /* 876 * If we have a second block, then 877 * fire off a request for a readahead 878 * as well as a read. Note that the 4th and 5th 879 * arguments point to arrays of the size specified in 880 * the 6th argument. 881 */ 882 int nextsize = sblksize(fs, dp->di_extsize, nextlbn); 883 884 nextlbn = -1 - nextlbn; 885 error = breadn(vp, -1 - lbn, 886 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 887 } 888 if (error) { 889 brelse(bp); 890 bp = NULL; 891 break; 892 } 893 894 /* 895 * If IO_DIRECT then set B_DIRECT for the buffer. This 896 * will cause us to attempt to release the buffer later on 897 * and will cause the buffer cache to attempt to free the 898 * underlying pages. 899 */ 900 if (ioflag & IO_DIRECT) 901 bp->b_flags |= B_DIRECT; 902 903 /* 904 * We should only get non-zero b_resid when an I/O error 905 * has occurred, which should cause us to break above. 906 * However, if the short read did not cause an error, 907 * then we want to ensure that we do not uiomove bad 908 * or uninitialized data. 909 */ 910 size -= bp->b_resid; 911 if (size < xfersize) { 912 if (size == 0) 913 break; 914 xfersize = size; 915 } 916 917 error = uiomove((char *)bp->b_data + blkoffset, 918 (int)xfersize, uio); 919 if (error) 920 break; 921 922 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 923 (LIST_FIRST(&bp->b_dep) == NULL)) { 924 /* 925 * If there are no dependencies, and it's VMIO, 926 * then we don't need the buf, mark it available 927 * for freeing. The VM has the data. 928 */ 929 bp->b_flags |= B_RELBUF; 930 brelse(bp); 931 } else { 932 /* 933 * Otherwise let whoever 934 * made the request take care of 935 * freeing it. We just queue 936 * it onto another list. 937 */ 938 bqrelse(bp); 939 } 940 } 941 942 /* 943 * This can only happen in the case of an error 944 * because the loop above resets bp to NULL on each iteration 945 * and on normal completion has not set a new value into it. 946 * so it must have come from a 'break' statement 947 */ 948 if (bp != NULL) { 949 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 950 (LIST_FIRST(&bp->b_dep) == NULL)) { 951 bp->b_flags |= B_RELBUF; 952 brelse(bp); 953 } else { 954 bqrelse(bp); 955 } 956 } 957 958 if ((error == 0 || uio->uio_resid != orig_resid) && 959 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 960 ip->i_flag |= IN_ACCESS; 961 return (error); 962 } 963 964 /* 965 * Extended attribute area writing. 966 */ 967 static int 968 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred) 969 { 970 struct inode *ip; 971 struct ufs2_dinode *dp; 972 struct fs *fs; 973 struct buf *bp; 974 ufs_lbn_t lbn; 975 off_t osize; 976 int blkoffset, error, flags, resid, size, xfersize; 977 978 ip = VTOI(vp); 979 fs = ip->i_fs; 980 dp = ip->i_din2; 981 982 #ifdef DIAGNOSTIC 983 if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC) 984 panic("ffs_extwrite: mode"); 985 #endif 986 987 if (ioflag & IO_APPEND) 988 uio->uio_offset = dp->di_extsize; 989 KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0")); 990 KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0")); 991 if ((uoff_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize) 992 return (EFBIG); 993 994 resid = uio->uio_resid; 995 osize = dp->di_extsize; 996 flags = IO_EXT; 997 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 998 flags |= IO_SYNC; 999 1000 for (error = 0; uio->uio_resid > 0;) { 1001 lbn = lblkno(fs, uio->uio_offset); 1002 blkoffset = blkoff(fs, uio->uio_offset); 1003 xfersize = fs->fs_bsize - blkoffset; 1004 if (uio->uio_resid < xfersize) 1005 xfersize = uio->uio_resid; 1006 1007 /* 1008 * We must perform a read-before-write if the transfer size 1009 * does not cover the entire buffer. 1010 */ 1011 if (fs->fs_bsize > xfersize) 1012 flags |= BA_CLRBUF; 1013 else 1014 flags &= ~BA_CLRBUF; 1015 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 1016 ucred, flags, &bp); 1017 if (error != 0) 1018 break; 1019 /* 1020 * If the buffer is not valid we have to clear out any 1021 * garbage data from the pages instantiated for the buffer. 1022 * If we do not, a failed uiomove() during a write can leave 1023 * the prior contents of the pages exposed to a userland 1024 * mmap(). XXX deal with uiomove() errors a better way. 1025 */ 1026 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 1027 vfs_bio_clrbuf(bp); 1028 if (ioflag & IO_DIRECT) 1029 bp->b_flags |= B_DIRECT; 1030 1031 if (uio->uio_offset + xfersize > dp->di_extsize) 1032 dp->di_extsize = uio->uio_offset + xfersize; 1033 1034 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid; 1035 if (size < xfersize) 1036 xfersize = size; 1037 1038 error = 1039 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 1040 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1041 (LIST_FIRST(&bp->b_dep) == NULL)) { 1042 bp->b_flags |= B_RELBUF; 1043 } 1044 1045 /* 1046 * If IO_SYNC each buffer is written synchronously. Otherwise 1047 * if we have a severe page deficiency write the buffer 1048 * asynchronously. Otherwise try to cluster, and if that 1049 * doesn't do it then either do an async write (if O_DIRECT), 1050 * or a delayed write (if not). 1051 */ 1052 if (ioflag & IO_SYNC) { 1053 (void)bwrite(bp); 1054 } else if (vm_page_count_severe() || 1055 buf_dirty_count_severe() || 1056 xfersize + blkoffset == fs->fs_bsize || 1057 (ioflag & (IO_ASYNC | IO_DIRECT))) 1058 bawrite(bp); 1059 else 1060 bdwrite(bp); 1061 if (error || xfersize == 0) 1062 break; 1063 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1064 } 1065 /* 1066 * If we successfully wrote any data, and we are not the superuser 1067 * we clear the setuid and setgid bits as a precaution against 1068 * tampering. 1069 */ 1070 if (resid > uio->uio_resid && ucred && 1071 suser_cred(ucred, SUSER_ALLOWJAIL)) { 1072 ip->i_mode &= ~(ISUID | ISGID); 1073 dp->di_mode = ip->i_mode; 1074 } 1075 if (error) { 1076 if (ioflag & IO_UNIT) { 1077 (void)ffs_truncate(vp, osize, 1078 IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td); 1079 uio->uio_offset -= resid - uio->uio_resid; 1080 uio->uio_resid = resid; 1081 } 1082 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 1083 error = ffs_update(vp, 1); 1084 return (error); 1085 } 1086 1087 1088 /* 1089 * Vnode operating to retrieve a named extended attribute. 1090 * 1091 * Locate a particular EA (nspace:name) in the area (ptr:length), and return 1092 * the length of the EA, and possibly the pointer to the entry and to the data. 1093 */ 1094 static int 1095 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name, u_char **eap, u_char **eac) 1096 { 1097 u_char *p, *pe, *pn, *p0; 1098 int eapad1, eapad2, ealength, ealen, nlen; 1099 uint32_t ul; 1100 1101 pe = ptr + length; 1102 nlen = strlen(name); 1103 1104 for (p = ptr; p < pe; p = pn) { 1105 p0 = p; 1106 bcopy(p, &ul, sizeof(ul)); 1107 pn = p + ul; 1108 /* make sure this entry is complete */ 1109 if (pn > pe) 1110 break; 1111 p += sizeof(uint32_t); 1112 if (*p != nspace) 1113 continue; 1114 p++; 1115 eapad2 = *p++; 1116 if (*p != nlen) 1117 continue; 1118 p++; 1119 if (bcmp(p, name, nlen)) 1120 continue; 1121 ealength = sizeof(uint32_t) + 3 + nlen; 1122 eapad1 = 8 - (ealength % 8); 1123 if (eapad1 == 8) 1124 eapad1 = 0; 1125 ealength += eapad1; 1126 ealen = ul - ealength - eapad2; 1127 p += nlen + eapad1; 1128 if (eap != NULL) 1129 *eap = p0; 1130 if (eac != NULL) 1131 *eac = p; 1132 return (ealen); 1133 } 1134 return(-1); 1135 } 1136 1137 static int 1138 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra) 1139 { 1140 struct inode *ip; 1141 struct ufs2_dinode *dp; 1142 struct uio luio; 1143 struct iovec liovec; 1144 int easize, error; 1145 u_char *eae; 1146 1147 ip = VTOI(vp); 1148 dp = ip->i_din2; 1149 easize = dp->di_extsize; 1150 1151 eae = malloc(easize + extra, M_TEMP, M_WAITOK); 1152 1153 liovec.iov_base = eae; 1154 liovec.iov_len = easize; 1155 luio.uio_iov = &liovec; 1156 luio.uio_iovcnt = 1; 1157 luio.uio_offset = 0; 1158 luio.uio_resid = easize; 1159 luio.uio_segflg = UIO_SYSSPACE; 1160 luio.uio_rw = UIO_READ; 1161 luio.uio_td = td; 1162 1163 error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC); 1164 if (error) { 1165 free(eae, M_TEMP); 1166 return(error); 1167 } 1168 *p = eae; 1169 return (0); 1170 } 1171 1172 static int 1173 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td) 1174 { 1175 struct inode *ip; 1176 struct ufs2_dinode *dp; 1177 int error; 1178 1179 ip = VTOI(vp); 1180 1181 if (ip->i_ea_area != NULL) 1182 return (EBUSY); 1183 dp = ip->i_din2; 1184 error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0); 1185 if (error) 1186 return (error); 1187 ip->i_ea_len = dp->di_extsize; 1188 ip->i_ea_error = 0; 1189 return (0); 1190 } 1191 1192 /* 1193 * Vnode extattr transaction commit/abort 1194 */ 1195 static int 1196 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td) 1197 { 1198 struct inode *ip; 1199 struct uio luio; 1200 struct iovec liovec; 1201 int error; 1202 struct ufs2_dinode *dp; 1203 1204 ip = VTOI(vp); 1205 if (ip->i_ea_area == NULL) 1206 return (EINVAL); 1207 dp = ip->i_din2; 1208 error = ip->i_ea_error; 1209 if (commit && error == 0) { 1210 if (cred == NOCRED) 1211 cred = vp->v_mount->mnt_cred; 1212 liovec.iov_base = ip->i_ea_area; 1213 liovec.iov_len = ip->i_ea_len; 1214 luio.uio_iov = &liovec; 1215 luio.uio_iovcnt = 1; 1216 luio.uio_offset = 0; 1217 luio.uio_resid = ip->i_ea_len; 1218 luio.uio_segflg = UIO_SYSSPACE; 1219 luio.uio_rw = UIO_WRITE; 1220 luio.uio_td = td; 1221 /* XXX: I'm not happy about truncating to zero size */ 1222 if (ip->i_ea_len < dp->di_extsize) 1223 error = ffs_truncate(vp, 0, IO_EXT, cred, td); 1224 error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred); 1225 } 1226 free(ip->i_ea_area, M_TEMP); 1227 ip->i_ea_area = NULL; 1228 ip->i_ea_len = 0; 1229 ip->i_ea_error = 0; 1230 return (error); 1231 } 1232 1233 /* 1234 * Vnode extattr strategy routine for fifos. 1235 * 1236 * We need to check for a read or write of the external attributes. 1237 * Otherwise we just fall through and do the usual thing. 1238 */ 1239 static int 1240 ffsext_strategy(struct vop_strategy_args *ap) 1241 /* 1242 struct vop_strategy_args { 1243 struct vnodeop_desc *a_desc; 1244 struct vnode *a_vp; 1245 struct buf *a_bp; 1246 }; 1247 */ 1248 { 1249 struct vnode *vp; 1250 daddr_t lbn; 1251 1252 vp = ap->a_vp; 1253 lbn = ap->a_bp->b_lblkno; 1254 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC && 1255 lbn < 0 && lbn >= -NXADDR) 1256 return (VOP_STRATEGY_APV(&ufs_vnodeops, ap)); 1257 if (vp->v_type == VFIFO) 1258 return (VOP_STRATEGY_APV(&ufs_fifoops, ap)); 1259 panic("spec nodes went here"); 1260 } 1261 1262 /* 1263 * Vnode extattr transaction commit/abort 1264 */ 1265 static int 1266 ffs_openextattr(struct vop_openextattr_args *ap) 1267 /* 1268 struct vop_openextattr_args { 1269 struct vnodeop_desc *a_desc; 1270 struct vnode *a_vp; 1271 IN struct ucred *a_cred; 1272 IN struct thread *a_td; 1273 }; 1274 */ 1275 { 1276 struct inode *ip; 1277 struct fs *fs; 1278 1279 ip = VTOI(ap->a_vp); 1280 fs = ip->i_fs; 1281 1282 if (ap->a_vp->v_type == VCHR) 1283 return (EOPNOTSUPP); 1284 1285 return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td)); 1286 } 1287 1288 1289 /* 1290 * Vnode extattr transaction commit/abort 1291 */ 1292 static int 1293 ffs_closeextattr(struct vop_closeextattr_args *ap) 1294 /* 1295 struct vop_closeextattr_args { 1296 struct vnodeop_desc *a_desc; 1297 struct vnode *a_vp; 1298 int a_commit; 1299 IN struct ucred *a_cred; 1300 IN struct thread *a_td; 1301 }; 1302 */ 1303 { 1304 struct inode *ip; 1305 struct fs *fs; 1306 1307 ip = VTOI(ap->a_vp); 1308 fs = ip->i_fs; 1309 1310 if (ap->a_vp->v_type == VCHR) 1311 return (EOPNOTSUPP); 1312 1313 return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td)); 1314 } 1315 1316 /* 1317 * Vnode operation to remove a named attribute. 1318 */ 1319 static int 1320 ffs_deleteextattr(struct vop_deleteextattr_args *ap) 1321 /* 1322 vop_deleteextattr { 1323 IN struct vnode *a_vp; 1324 IN int a_attrnamespace; 1325 IN const char *a_name; 1326 IN struct ucred *a_cred; 1327 IN struct thread *a_td; 1328 }; 1329 */ 1330 { 1331 struct inode *ip; 1332 struct fs *fs; 1333 uint32_t ealength, ul; 1334 int ealen, olen, eapad1, eapad2, error, i, easize; 1335 u_char *eae, *p; 1336 int stand_alone; 1337 1338 ip = VTOI(ap->a_vp); 1339 fs = ip->i_fs; 1340 1341 if (ap->a_vp->v_type == VCHR) 1342 return (EOPNOTSUPP); 1343 1344 if (strlen(ap->a_name) == 0) 1345 return (EINVAL); 1346 1347 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1348 ap->a_cred, ap->a_td, IWRITE); 1349 if (error) { 1350 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1351 ip->i_ea_error = error; 1352 return (error); 1353 } 1354 1355 if (ip->i_ea_area == NULL) { 1356 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1357 if (error) 1358 return (error); 1359 stand_alone = 1; 1360 } else { 1361 stand_alone = 0; 1362 } 1363 1364 ealength = eapad1 = ealen = eapad2 = 0; 1365 1366 eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK); 1367 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1368 easize = ip->i_ea_len; 1369 1370 olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1371 &p, NULL); 1372 if (olen == -1) { 1373 /* delete but nonexistent */ 1374 free(eae, M_TEMP); 1375 if (stand_alone) 1376 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1377 return(ENOATTR); 1378 } 1379 bcopy(p, &ul, sizeof ul); 1380 i = p - eae + ul; 1381 if (ul != ealength) { 1382 bcopy(p + ul, p + ealength, easize - i); 1383 easize += (ealength - ul); 1384 } 1385 if (easize > NXADDR * fs->fs_bsize) { 1386 free(eae, M_TEMP); 1387 if (stand_alone) 1388 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1389 else if (ip->i_ea_error == 0) 1390 ip->i_ea_error = ENOSPC; 1391 return(ENOSPC); 1392 } 1393 p = ip->i_ea_area; 1394 ip->i_ea_area = eae; 1395 ip->i_ea_len = easize; 1396 free(p, M_TEMP); 1397 if (stand_alone) 1398 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1399 return(error); 1400 } 1401 1402 /* 1403 * Vnode operation to retrieve a named extended attribute. 1404 */ 1405 static int 1406 ffs_getextattr(struct vop_getextattr_args *ap) 1407 /* 1408 vop_getextattr { 1409 IN struct vnode *a_vp; 1410 IN int a_attrnamespace; 1411 IN const char *a_name; 1412 INOUT struct uio *a_uio; 1413 OUT size_t *a_size; 1414 IN struct ucred *a_cred; 1415 IN struct thread *a_td; 1416 }; 1417 */ 1418 { 1419 struct inode *ip; 1420 struct fs *fs; 1421 u_char *eae, *p; 1422 unsigned easize; 1423 int error, ealen, stand_alone; 1424 1425 ip = VTOI(ap->a_vp); 1426 fs = ip->i_fs; 1427 1428 if (ap->a_vp->v_type == VCHR) 1429 return (EOPNOTSUPP); 1430 1431 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1432 ap->a_cred, ap->a_td, IREAD); 1433 if (error) 1434 return (error); 1435 1436 if (ip->i_ea_area == NULL) { 1437 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1438 if (error) 1439 return (error); 1440 stand_alone = 1; 1441 } else { 1442 stand_alone = 0; 1443 } 1444 eae = ip->i_ea_area; 1445 easize = ip->i_ea_len; 1446 1447 ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1448 NULL, &p); 1449 if (ealen >= 0) { 1450 error = 0; 1451 if (ap->a_size != NULL) 1452 *ap->a_size = ealen; 1453 else if (ap->a_uio != NULL) 1454 error = uiomove(p, ealen, ap->a_uio); 1455 } else 1456 error = ENOATTR; 1457 if (stand_alone) 1458 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1459 return(error); 1460 } 1461 1462 /* 1463 * Vnode operation to retrieve extended attributes on a vnode. 1464 */ 1465 static int 1466 ffs_listextattr(struct vop_listextattr_args *ap) 1467 /* 1468 vop_listextattr { 1469 IN struct vnode *a_vp; 1470 IN int a_attrnamespace; 1471 INOUT struct uio *a_uio; 1472 OUT size_t *a_size; 1473 IN struct ucred *a_cred; 1474 IN struct thread *a_td; 1475 }; 1476 */ 1477 { 1478 struct inode *ip; 1479 struct fs *fs; 1480 u_char *eae, *p, *pe, *pn; 1481 unsigned easize; 1482 uint32_t ul; 1483 int error, ealen, stand_alone; 1484 1485 ip = VTOI(ap->a_vp); 1486 fs = ip->i_fs; 1487 1488 if (ap->a_vp->v_type == VCHR) 1489 return (EOPNOTSUPP); 1490 1491 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1492 ap->a_cred, ap->a_td, IREAD); 1493 if (error) 1494 return (error); 1495 1496 if (ip->i_ea_area == NULL) { 1497 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1498 if (error) 1499 return (error); 1500 stand_alone = 1; 1501 } else { 1502 stand_alone = 0; 1503 } 1504 eae = ip->i_ea_area; 1505 easize = ip->i_ea_len; 1506 1507 error = 0; 1508 if (ap->a_size != NULL) 1509 *ap->a_size = 0; 1510 pe = eae + easize; 1511 for(p = eae; error == 0 && p < pe; p = pn) { 1512 bcopy(p, &ul, sizeof(ul)); 1513 pn = p + ul; 1514 if (pn > pe) 1515 break; 1516 p += sizeof(ul); 1517 if (*p++ != ap->a_attrnamespace) 1518 continue; 1519 p++; /* pad2 */ 1520 ealen = *p; 1521 if (ap->a_size != NULL) { 1522 *ap->a_size += ealen + 1; 1523 } else if (ap->a_uio != NULL) { 1524 error = uiomove(p, ealen + 1, ap->a_uio); 1525 } 1526 } 1527 if (stand_alone) 1528 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1529 return(error); 1530 } 1531 1532 /* 1533 * Vnode operation to set a named attribute. 1534 */ 1535 static int 1536 ffs_setextattr(struct vop_setextattr_args *ap) 1537 /* 1538 vop_setextattr { 1539 IN struct vnode *a_vp; 1540 IN int a_attrnamespace; 1541 IN const char *a_name; 1542 INOUT struct uio *a_uio; 1543 IN struct ucred *a_cred; 1544 IN struct thread *a_td; 1545 }; 1546 */ 1547 { 1548 struct inode *ip; 1549 struct fs *fs; 1550 uint32_t ealength, ul; 1551 int ealen, olen, eapad1, eapad2, error, i, easize; 1552 u_char *eae, *p; 1553 int stand_alone; 1554 1555 ip = VTOI(ap->a_vp); 1556 fs = ip->i_fs; 1557 1558 if (ap->a_vp->v_type == VCHR) 1559 return (EOPNOTSUPP); 1560 1561 if (strlen(ap->a_name) == 0) 1562 return (EINVAL); 1563 1564 /* XXX Now unsupported API to delete EAs using NULL uio. */ 1565 if (ap->a_uio == NULL) 1566 return (EOPNOTSUPP); 1567 1568 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1569 ap->a_cred, ap->a_td, IWRITE); 1570 if (error) { 1571 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1572 ip->i_ea_error = error; 1573 return (error); 1574 } 1575 1576 if (ip->i_ea_area == NULL) { 1577 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1578 if (error) 1579 return (error); 1580 stand_alone = 1; 1581 } else { 1582 stand_alone = 0; 1583 } 1584 1585 ealen = ap->a_uio->uio_resid; 1586 ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name); 1587 eapad1 = 8 - (ealength % 8); 1588 if (eapad1 == 8) 1589 eapad1 = 0; 1590 eapad2 = 8 - (ealen % 8); 1591 if (eapad2 == 8) 1592 eapad2 = 0; 1593 ealength += eapad1 + ealen + eapad2; 1594 1595 eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK); 1596 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1597 easize = ip->i_ea_len; 1598 1599 olen = ffs_findextattr(eae, easize, 1600 ap->a_attrnamespace, ap->a_name, &p, NULL); 1601 if (olen == -1) { 1602 /* new, append at end */ 1603 p = eae + easize; 1604 easize += ealength; 1605 } else { 1606 bcopy(p, &ul, sizeof ul); 1607 i = p - eae + ul; 1608 if (ul != ealength) { 1609 bcopy(p + ul, p + ealength, easize - i); 1610 easize += (ealength - ul); 1611 } 1612 } 1613 if (easize > NXADDR * fs->fs_bsize) { 1614 free(eae, M_TEMP); 1615 if (stand_alone) 1616 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1617 else if (ip->i_ea_error == 0) 1618 ip->i_ea_error = ENOSPC; 1619 return(ENOSPC); 1620 } 1621 bcopy(&ealength, p, sizeof(ealength)); 1622 p += sizeof(ealength); 1623 *p++ = ap->a_attrnamespace; 1624 *p++ = eapad2; 1625 *p++ = strlen(ap->a_name); 1626 strcpy(p, ap->a_name); 1627 p += strlen(ap->a_name); 1628 bzero(p, eapad1); 1629 p += eapad1; 1630 error = uiomove(p, ealen, ap->a_uio); 1631 if (error) { 1632 free(eae, M_TEMP); 1633 if (stand_alone) 1634 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1635 else if (ip->i_ea_error == 0) 1636 ip->i_ea_error = error; 1637 return(error); 1638 } 1639 p += ealen; 1640 bzero(p, eapad2); 1641 1642 p = ip->i_ea_area; 1643 ip->i_ea_area = eae; 1644 ip->i_ea_len = easize; 1645 free(p, M_TEMP); 1646 if (stand_alone) 1647 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1648 return(error); 1649 } 1650