1 /* 2 * Copyright (c) 2002, 2003 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * from: @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 60 * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ... 61 * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __FBSDID("$FreeBSD$"); 66 67 #include <sys/param.h> 68 #include <sys/bio.h> 69 #include <sys/systm.h> 70 #include <sys/buf.h> 71 #include <sys/conf.h> 72 #include <sys/extattr.h> 73 #include <sys/kernel.h> 74 #include <sys/limits.h> 75 #include <sys/malloc.h> 76 #include <sys/mount.h> 77 #include <sys/proc.h> 78 #include <sys/resourcevar.h> 79 #include <sys/signalvar.h> 80 #include <sys/stat.h> 81 #include <sys/vmmeter.h> 82 #include <sys/vnode.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_extern.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/vm_pager.h> 89 #include <vm/vnode_pager.h> 90 91 #include <ufs/ufs/extattr.h> 92 #include <ufs/ufs/quota.h> 93 #include <ufs/ufs/inode.h> 94 #include <ufs/ufs/ufs_extern.h> 95 #include <ufs/ufs/ufsmount.h> 96 97 #include <ufs/ffs/fs.h> 98 #include <ufs/ffs/ffs_extern.h> 99 #include "opt_directio.h" 100 101 #ifdef DIRECTIO 102 extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone); 103 #endif 104 static vop_fsync_t ffs_fsync; 105 static vop_getpages_t ffs_getpages; 106 static vop_read_t ffs_read; 107 static vop_write_t ffs_write; 108 static int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag); 109 static int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, 110 struct ucred *cred); 111 static vop_strategy_t ffsext_strategy; 112 static vop_closeextattr_t ffs_closeextattr; 113 static vop_deleteextattr_t ffs_deleteextattr; 114 static vop_getextattr_t ffs_getextattr; 115 static vop_listextattr_t ffs_listextattr; 116 static vop_openextattr_t ffs_openextattr; 117 static vop_setextattr_t ffs_setextattr; 118 119 120 /* Global vfs data structures for ufs. */ 121 struct vop_vector ffs_vnodeops = { 122 .vop_default = &ufs_vnodeops, 123 .vop_fsync = ffs_fsync, 124 .vop_getpages = ffs_getpages, 125 .vop_read = ffs_read, 126 .vop_reallocblks = ffs_reallocblks, 127 .vop_write = ffs_write, 128 .vop_closeextattr = ffs_closeextattr, 129 .vop_deleteextattr = ffs_deleteextattr, 130 .vop_getextattr = ffs_getextattr, 131 .vop_listextattr = ffs_listextattr, 132 .vop_openextattr = ffs_openextattr, 133 .vop_setextattr = ffs_setextattr, 134 }; 135 136 struct vop_vector ffs_fifoops = { 137 .vop_default = &ufs_fifoops, 138 .vop_fsync = ffs_fsync, 139 .vop_reallocblks = ffs_reallocblks, 140 .vop_strategy = ffsext_strategy, 141 .vop_closeextattr = ffs_closeextattr, 142 .vop_deleteextattr = ffs_deleteextattr, 143 .vop_getextattr = ffs_getextattr, 144 .vop_listextattr = ffs_listextattr, 145 .vop_openextattr = ffs_openextattr, 146 .vop_setextattr = ffs_setextattr, 147 }; 148 149 /* 150 * Synch an open file. 151 */ 152 /* ARGSUSED */ 153 static int 154 ffs_fsync(ap) 155 struct vop_fsync_args /* { 156 struct vnode *a_vp; 157 struct ucred *a_cred; 158 int a_waitfor; 159 struct thread *a_td; 160 } */ *ap; 161 { 162 struct vnode *vp = ap->a_vp; 163 struct inode *ip = VTOI(vp); 164 struct buf *bp; 165 struct buf *nbp; 166 int s, error, wait, passes, skipmeta; 167 ufs_lbn_t lbn; 168 169 wait = (ap->a_waitfor == MNT_WAIT); 170 lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); 171 172 /* 173 * Flush all dirty buffers associated with a vnode. 174 */ 175 passes = NIADDR + 1; 176 skipmeta = 0; 177 if (wait) 178 skipmeta = 1; 179 s = splbio(); 180 VI_LOCK(vp); 181 loop: 182 TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) 183 bp->b_vflags &= ~BV_SCANNED; 184 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { 185 /* 186 * Reasons to skip this buffer: it has already been considered 187 * on this pass, this pass is the first time through on a 188 * synchronous flush request and the buffer being considered 189 * is metadata, the buffer has dependencies that will cause 190 * it to be redirtied and it has not already been deferred, 191 * or it is already being written. 192 */ 193 if ((bp->b_vflags & BV_SCANNED) != 0) 194 continue; 195 bp->b_vflags |= BV_SCANNED; 196 if ((skipmeta == 1 && bp->b_lblkno < 0)) 197 continue; 198 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) 199 continue; 200 VI_UNLOCK(vp); 201 if (!wait && LIST_FIRST(&bp->b_dep) != NULL && 202 (bp->b_flags & B_DEFERRED) == 0 && 203 buf_countdeps(bp, 0)) { 204 bp->b_flags |= B_DEFERRED; 205 BUF_UNLOCK(bp); 206 VI_LOCK(vp); 207 continue; 208 } 209 if ((bp->b_flags & B_DELWRI) == 0) 210 panic("ffs_fsync: not dirty"); 211 /* 212 * If this is a synchronous flush request, or it is not a 213 * file or device, start the write on this buffer immediatly. 214 */ 215 if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) { 216 217 /* 218 * On our final pass through, do all I/O synchronously 219 * so that we can find out if our flush is failing 220 * because of write errors. 221 */ 222 if (passes > 0 || !wait) { 223 if ((bp->b_flags & B_CLUSTEROK) && !wait) { 224 (void) vfs_bio_awrite(bp); 225 } else { 226 bremfree(bp); 227 splx(s); 228 (void) bawrite(bp); 229 s = splbio(); 230 } 231 } else { 232 bremfree(bp); 233 splx(s); 234 if ((error = bwrite(bp)) != 0) 235 return (error); 236 s = splbio(); 237 } 238 } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) { 239 /* 240 * If the buffer is for data that has been truncated 241 * off the file, then throw it away. 242 */ 243 bremfree(bp); 244 bp->b_flags |= B_INVAL | B_NOCACHE; 245 splx(s); 246 brelse(bp); 247 s = splbio(); 248 } else 249 vfs_bio_awrite(bp); 250 251 /* 252 * Since we may have slept during the I/O, we need 253 * to start from a known point. 254 */ 255 VI_LOCK(vp); 256 nbp = TAILQ_FIRST(&vp->v_bufobj.bo_dirty.bv_hd); 257 } 258 /* 259 * If we were asked to do this synchronously, then go back for 260 * another pass, this time doing the metadata. 261 */ 262 if (skipmeta) { 263 skipmeta = 0; 264 goto loop; 265 } 266 267 if (wait) { 268 bufobj_wwait(&vp->v_bufobj, 3, 0); 269 VI_UNLOCK(vp); 270 271 /* 272 * Ensure that any filesystem metatdata associated 273 * with the vnode has been written. 274 */ 275 splx(s); 276 if ((error = softdep_sync_metadata(ap)) != 0) 277 return (error); 278 s = splbio(); 279 280 VI_LOCK(vp); 281 if (vp->v_bufobj.bo_dirty.bv_cnt > 0) { 282 /* 283 * Block devices associated with filesystems may 284 * have new I/O requests posted for them even if 285 * the vnode is locked, so no amount of trying will 286 * get them clean. Thus we give block devices a 287 * good effort, then just give up. For all other file 288 * types, go around and try again until it is clean. 289 */ 290 if (passes > 0) { 291 passes -= 1; 292 goto loop; 293 } 294 #ifdef DIAGNOSTIC 295 if (!vn_isdisk(vp, NULL)) 296 vprint("ffs_fsync: dirty", vp); 297 #endif 298 } 299 } 300 VI_UNLOCK(vp); 301 splx(s); 302 return (UFS_UPDATE(vp, wait)); 303 } 304 305 306 /* 307 * Vnode op for reading. 308 */ 309 /* ARGSUSED */ 310 static int 311 ffs_read(ap) 312 struct vop_read_args /* { 313 struct vnode *a_vp; 314 struct uio *a_uio; 315 int a_ioflag; 316 struct ucred *a_cred; 317 } */ *ap; 318 { 319 struct vnode *vp; 320 struct inode *ip; 321 struct uio *uio; 322 struct fs *fs; 323 struct buf *bp; 324 ufs_lbn_t lbn, nextlbn; 325 off_t bytesinfile; 326 long size, xfersize, blkoffset; 327 int error, orig_resid; 328 int seqcount; 329 int ioflag; 330 331 vp = ap->a_vp; 332 uio = ap->a_uio; 333 ioflag = ap->a_ioflag; 334 if (ap->a_ioflag & IO_EXT) 335 #ifdef notyet 336 return (ffs_extread(vp, uio, ioflag)); 337 #else 338 panic("ffs_read+IO_EXT"); 339 #endif 340 #ifdef DIRECTIO 341 if ((ioflag & IO_DIRECT) != 0) { 342 int workdone; 343 344 error = ffs_rawread(vp, uio, &workdone); 345 if (error != 0 || workdone != 0) 346 return error; 347 } 348 #endif 349 350 GIANT_REQUIRED; 351 352 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 353 ip = VTOI(vp); 354 355 #ifdef DIAGNOSTIC 356 if (uio->uio_rw != UIO_READ) 357 panic("ffs_read: mode"); 358 359 if (vp->v_type == VLNK) { 360 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 361 panic("ffs_read: short symlink"); 362 } else if (vp->v_type != VREG && vp->v_type != VDIR) 363 panic("ffs_read: type %d", vp->v_type); 364 #endif 365 orig_resid = uio->uio_resid; 366 KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0")); 367 if (orig_resid == 0) 368 return (0); 369 KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0")); 370 fs = ip->i_fs; 371 if (uio->uio_offset < ip->i_size && 372 uio->uio_offset >= fs->fs_maxfilesize) 373 return (EOVERFLOW); 374 375 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 376 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 377 break; 378 lbn = lblkno(fs, uio->uio_offset); 379 nextlbn = lbn + 1; 380 381 /* 382 * size of buffer. The buffer representing the 383 * end of the file is rounded up to the size of 384 * the block type ( fragment or full block, 385 * depending ). 386 */ 387 size = blksize(fs, ip, lbn); 388 blkoffset = blkoff(fs, uio->uio_offset); 389 390 /* 391 * The amount we want to transfer in this iteration is 392 * one FS block less the amount of the data before 393 * our startpoint (duh!) 394 */ 395 xfersize = fs->fs_bsize - blkoffset; 396 397 /* 398 * But if we actually want less than the block, 399 * or the file doesn't have a whole block more of data, 400 * then use the lesser number. 401 */ 402 if (uio->uio_resid < xfersize) 403 xfersize = uio->uio_resid; 404 if (bytesinfile < xfersize) 405 xfersize = bytesinfile; 406 407 if (lblktosize(fs, nextlbn) >= ip->i_size) { 408 /* 409 * Don't do readahead if this is the end of the file. 410 */ 411 error = bread(vp, lbn, size, NOCRED, &bp); 412 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 413 /* 414 * Otherwise if we are allowed to cluster, 415 * grab as much as we can. 416 * 417 * XXX This may not be a win if we are not 418 * doing sequential access. 419 */ 420 error = cluster_read(vp, ip->i_size, lbn, 421 size, NOCRED, uio->uio_resid, seqcount, &bp); 422 } else if (seqcount > 1) { 423 /* 424 * If we are NOT allowed to cluster, then 425 * if we appear to be acting sequentially, 426 * fire off a request for a readahead 427 * as well as a read. Note that the 4th and 5th 428 * arguments point to arrays of the size specified in 429 * the 6th argument. 430 */ 431 int nextsize = blksize(fs, ip, nextlbn); 432 error = breadn(vp, lbn, 433 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 434 } else { 435 /* 436 * Failing all of the above, just read what the 437 * user asked for. Interestingly, the same as 438 * the first option above. 439 */ 440 error = bread(vp, lbn, size, NOCRED, &bp); 441 } 442 if (error) { 443 brelse(bp); 444 bp = NULL; 445 break; 446 } 447 448 /* 449 * If IO_DIRECT then set B_DIRECT for the buffer. This 450 * will cause us to attempt to release the buffer later on 451 * and will cause the buffer cache to attempt to free the 452 * underlying pages. 453 */ 454 if (ioflag & IO_DIRECT) 455 bp->b_flags |= B_DIRECT; 456 457 /* 458 * We should only get non-zero b_resid when an I/O error 459 * has occurred, which should cause us to break above. 460 * However, if the short read did not cause an error, 461 * then we want to ensure that we do not uiomove bad 462 * or uninitialized data. 463 */ 464 size -= bp->b_resid; 465 if (size < xfersize) { 466 if (size == 0) 467 break; 468 xfersize = size; 469 } 470 471 error = uiomove((char *)bp->b_data + blkoffset, 472 (int)xfersize, uio); 473 if (error) 474 break; 475 476 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 477 (LIST_FIRST(&bp->b_dep) == NULL)) { 478 /* 479 * If there are no dependencies, and it's VMIO, 480 * then we don't need the buf, mark it available 481 * for freeing. The VM has the data. 482 */ 483 bp->b_flags |= B_RELBUF; 484 brelse(bp); 485 } else { 486 /* 487 * Otherwise let whoever 488 * made the request take care of 489 * freeing it. We just queue 490 * it onto another list. 491 */ 492 bqrelse(bp); 493 } 494 } 495 496 /* 497 * This can only happen in the case of an error 498 * because the loop above resets bp to NULL on each iteration 499 * and on normal completion has not set a new value into it. 500 * so it must have come from a 'break' statement 501 */ 502 if (bp != NULL) { 503 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 504 (LIST_FIRST(&bp->b_dep) == NULL)) { 505 bp->b_flags |= B_RELBUF; 506 brelse(bp); 507 } else { 508 bqrelse(bp); 509 } 510 } 511 512 if ((error == 0 || uio->uio_resid != orig_resid) && 513 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 514 ip->i_flag |= IN_ACCESS; 515 return (error); 516 } 517 518 /* 519 * Vnode op for writing. 520 */ 521 static int 522 ffs_write(ap) 523 struct vop_write_args /* { 524 struct vnode *a_vp; 525 struct uio *a_uio; 526 int a_ioflag; 527 struct ucred *a_cred; 528 } */ *ap; 529 { 530 struct vnode *vp; 531 struct uio *uio; 532 struct inode *ip; 533 struct fs *fs; 534 struct buf *bp; 535 struct thread *td; 536 ufs_lbn_t lbn; 537 off_t osize; 538 int seqcount; 539 int blkoffset, error, extended, flags, ioflag, resid, size, xfersize; 540 541 vp = ap->a_vp; 542 uio = ap->a_uio; 543 ioflag = ap->a_ioflag; 544 if (ap->a_ioflag & IO_EXT) 545 #ifdef notyet 546 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred)); 547 #else 548 panic("ffs_write+IO_EXT"); 549 #endif 550 551 GIANT_REQUIRED; 552 553 extended = 0; 554 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 555 ip = VTOI(vp); 556 557 #ifdef DIAGNOSTIC 558 if (uio->uio_rw != UIO_WRITE) 559 panic("ffs_write: mode"); 560 #endif 561 562 switch (vp->v_type) { 563 case VREG: 564 if (ioflag & IO_APPEND) 565 uio->uio_offset = ip->i_size; 566 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 567 return (EPERM); 568 /* FALLTHROUGH */ 569 case VLNK: 570 break; 571 case VDIR: 572 panic("ffs_write: dir write"); 573 break; 574 default: 575 panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type, 576 (int)uio->uio_offset, 577 (int)uio->uio_resid 578 ); 579 } 580 581 KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0")); 582 KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0")); 583 fs = ip->i_fs; 584 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) 585 return (EFBIG); 586 /* 587 * Maybe this should be above the vnode op call, but so long as 588 * file servers have no limits, I don't think it matters. 589 */ 590 td = uio->uio_td; 591 if (vp->v_type == VREG && td != NULL) { 592 PROC_LOCK(td->td_proc); 593 if (uio->uio_offset + uio->uio_resid > 594 lim_cur(td->td_proc, RLIMIT_FSIZE)) { 595 psignal(td->td_proc, SIGXFSZ); 596 PROC_UNLOCK(td->td_proc); 597 return (EFBIG); 598 } 599 PROC_UNLOCK(td->td_proc); 600 } 601 602 resid = uio->uio_resid; 603 osize = ip->i_size; 604 if (seqcount > BA_SEQMAX) 605 flags = BA_SEQMAX << BA_SEQSHIFT; 606 else 607 flags = seqcount << BA_SEQSHIFT; 608 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 609 flags |= IO_SYNC; 610 611 for (error = 0; uio->uio_resid > 0;) { 612 lbn = lblkno(fs, uio->uio_offset); 613 blkoffset = blkoff(fs, uio->uio_offset); 614 xfersize = fs->fs_bsize - blkoffset; 615 if (uio->uio_resid < xfersize) 616 xfersize = uio->uio_resid; 617 if (uio->uio_offset + xfersize > ip->i_size) 618 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 619 620 /* 621 * We must perform a read-before-write if the transfer size 622 * does not cover the entire buffer. 623 */ 624 if (fs->fs_bsize > xfersize) 625 flags |= BA_CLRBUF; 626 else 627 flags &= ~BA_CLRBUF; 628 /* XXX is uio->uio_offset the right thing here? */ 629 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 630 ap->a_cred, flags, &bp); 631 if (error != 0) 632 break; 633 /* 634 * If the buffer is not valid we have to clear out any 635 * garbage data from the pages instantiated for the buffer. 636 * If we do not, a failed uiomove() during a write can leave 637 * the prior contents of the pages exposed to a userland 638 * mmap(). XXX deal with uiomove() errors a better way. 639 */ 640 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 641 vfs_bio_clrbuf(bp); 642 if (ioflag & IO_DIRECT) 643 bp->b_flags |= B_DIRECT; 644 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL)) 645 bp->b_flags |= B_NOCACHE; 646 647 if (uio->uio_offset + xfersize > ip->i_size) { 648 ip->i_size = uio->uio_offset + xfersize; 649 DIP_SET(ip, i_size, ip->i_size); 650 extended = 1; 651 } 652 653 size = blksize(fs, ip, lbn) - bp->b_resid; 654 if (size < xfersize) 655 xfersize = size; 656 657 error = 658 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 659 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 660 (LIST_FIRST(&bp->b_dep) == NULL)) { 661 bp->b_flags |= B_RELBUF; 662 } 663 664 /* 665 * If IO_SYNC each buffer is written synchronously. Otherwise 666 * if we have a severe page deficiency write the buffer 667 * asynchronously. Otherwise try to cluster, and if that 668 * doesn't do it then either do an async write (if O_DIRECT), 669 * or a delayed write (if not). 670 */ 671 if (ioflag & IO_SYNC) { 672 (void)bwrite(bp); 673 } else if (vm_page_count_severe() || 674 buf_dirty_count_severe() || 675 (ioflag & IO_ASYNC)) { 676 bp->b_flags |= B_CLUSTEROK; 677 bawrite(bp); 678 } else if (xfersize + blkoffset == fs->fs_bsize) { 679 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 680 bp->b_flags |= B_CLUSTEROK; 681 cluster_write(vp, bp, ip->i_size, seqcount); 682 } else { 683 bawrite(bp); 684 } 685 } else if (ioflag & IO_DIRECT) { 686 bp->b_flags |= B_CLUSTEROK; 687 bawrite(bp); 688 } else { 689 bp->b_flags |= B_CLUSTEROK; 690 bdwrite(bp); 691 } 692 if (error || xfersize == 0) 693 break; 694 ip->i_flag |= IN_CHANGE | IN_UPDATE; 695 } 696 /* 697 * If we successfully wrote any data, and we are not the superuser 698 * we clear the setuid and setgid bits as a precaution against 699 * tampering. 700 */ 701 if (resid > uio->uio_resid && ap->a_cred && 702 suser_cred(ap->a_cred, SUSER_ALLOWJAIL)) { 703 ip->i_mode &= ~(ISUID | ISGID); 704 DIP_SET(ip, i_mode, ip->i_mode); 705 } 706 if (resid > uio->uio_resid) 707 VN_KNOTE_UNLOCKED(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); 708 if (error) { 709 if (ioflag & IO_UNIT) { 710 (void)UFS_TRUNCATE(vp, osize, 711 IO_NORMAL | (ioflag & IO_SYNC), 712 ap->a_cred, uio->uio_td); 713 uio->uio_offset -= resid - uio->uio_resid; 714 uio->uio_resid = resid; 715 } 716 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 717 error = UFS_UPDATE(vp, 1); 718 return (error); 719 } 720 721 /* 722 * get page routine 723 */ 724 static int 725 ffs_getpages(ap) 726 struct vop_getpages_args *ap; 727 { 728 int i; 729 vm_page_t mreq; 730 int pcount; 731 732 GIANT_REQUIRED; 733 734 pcount = round_page(ap->a_count) / PAGE_SIZE; 735 mreq = ap->a_m[ap->a_reqpage]; 736 737 /* 738 * if ANY DEV_BSIZE blocks are valid on a large filesystem block, 739 * then the entire page is valid. Since the page may be mapped, 740 * user programs might reference data beyond the actual end of file 741 * occuring within the page. We have to zero that data. 742 */ 743 VM_OBJECT_LOCK(mreq->object); 744 if (mreq->valid) { 745 if (mreq->valid != VM_PAGE_BITS_ALL) 746 vm_page_zero_invalid(mreq, TRUE); 747 vm_page_lock_queues(); 748 for (i = 0; i < pcount; i++) { 749 if (i != ap->a_reqpage) { 750 vm_page_free(ap->a_m[i]); 751 } 752 } 753 vm_page_unlock_queues(); 754 VM_OBJECT_UNLOCK(mreq->object); 755 return VM_PAGER_OK; 756 } 757 VM_OBJECT_UNLOCK(mreq->object); 758 759 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 760 ap->a_count, 761 ap->a_reqpage); 762 } 763 764 765 /* 766 * Extended attribute area reading. 767 */ 768 static int 769 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag) 770 { 771 struct inode *ip; 772 struct ufs2_dinode *dp; 773 struct fs *fs; 774 struct buf *bp; 775 ufs_lbn_t lbn, nextlbn; 776 off_t bytesinfile; 777 long size, xfersize, blkoffset; 778 int error, orig_resid; 779 780 GIANT_REQUIRED; 781 782 ip = VTOI(vp); 783 fs = ip->i_fs; 784 dp = ip->i_din2; 785 786 #ifdef DIAGNOSTIC 787 if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC) 788 panic("ffs_extread: mode"); 789 790 #endif 791 orig_resid = uio->uio_resid; 792 KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0")); 793 if (orig_resid == 0) 794 return (0); 795 KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0")); 796 797 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 798 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0) 799 break; 800 lbn = lblkno(fs, uio->uio_offset); 801 nextlbn = lbn + 1; 802 803 /* 804 * size of buffer. The buffer representing the 805 * end of the file is rounded up to the size of 806 * the block type ( fragment or full block, 807 * depending ). 808 */ 809 size = sblksize(fs, dp->di_extsize, lbn); 810 blkoffset = blkoff(fs, uio->uio_offset); 811 812 /* 813 * The amount we want to transfer in this iteration is 814 * one FS block less the amount of the data before 815 * our startpoint (duh!) 816 */ 817 xfersize = fs->fs_bsize - blkoffset; 818 819 /* 820 * But if we actually want less than the block, 821 * or the file doesn't have a whole block more of data, 822 * then use the lesser number. 823 */ 824 if (uio->uio_resid < xfersize) 825 xfersize = uio->uio_resid; 826 if (bytesinfile < xfersize) 827 xfersize = bytesinfile; 828 829 if (lblktosize(fs, nextlbn) >= dp->di_extsize) { 830 /* 831 * Don't do readahead if this is the end of the info. 832 */ 833 error = bread(vp, -1 - lbn, size, NOCRED, &bp); 834 } else { 835 /* 836 * If we have a second block, then 837 * fire off a request for a readahead 838 * as well as a read. Note that the 4th and 5th 839 * arguments point to arrays of the size specified in 840 * the 6th argument. 841 */ 842 int nextsize = sblksize(fs, dp->di_extsize, nextlbn); 843 844 nextlbn = -1 - nextlbn; 845 error = breadn(vp, -1 - lbn, 846 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 847 } 848 if (error) { 849 brelse(bp); 850 bp = NULL; 851 break; 852 } 853 854 /* 855 * If IO_DIRECT then set B_DIRECT for the buffer. This 856 * will cause us to attempt to release the buffer later on 857 * and will cause the buffer cache to attempt to free the 858 * underlying pages. 859 */ 860 if (ioflag & IO_DIRECT) 861 bp->b_flags |= B_DIRECT; 862 863 /* 864 * We should only get non-zero b_resid when an I/O error 865 * has occurred, which should cause us to break above. 866 * However, if the short read did not cause an error, 867 * then we want to ensure that we do not uiomove bad 868 * or uninitialized data. 869 */ 870 size -= bp->b_resid; 871 if (size < xfersize) { 872 if (size == 0) 873 break; 874 xfersize = size; 875 } 876 877 error = uiomove((char *)bp->b_data + blkoffset, 878 (int)xfersize, uio); 879 if (error) 880 break; 881 882 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 883 (LIST_FIRST(&bp->b_dep) == NULL)) { 884 /* 885 * If there are no dependencies, and it's VMIO, 886 * then we don't need the buf, mark it available 887 * for freeing. The VM has the data. 888 */ 889 bp->b_flags |= B_RELBUF; 890 brelse(bp); 891 } else { 892 /* 893 * Otherwise let whoever 894 * made the request take care of 895 * freeing it. We just queue 896 * it onto another list. 897 */ 898 bqrelse(bp); 899 } 900 } 901 902 /* 903 * This can only happen in the case of an error 904 * because the loop above resets bp to NULL on each iteration 905 * and on normal completion has not set a new value into it. 906 * so it must have come from a 'break' statement 907 */ 908 if (bp != NULL) { 909 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 910 (LIST_FIRST(&bp->b_dep) == NULL)) { 911 bp->b_flags |= B_RELBUF; 912 brelse(bp); 913 } else { 914 bqrelse(bp); 915 } 916 } 917 918 if ((error == 0 || uio->uio_resid != orig_resid) && 919 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 920 ip->i_flag |= IN_ACCESS; 921 return (error); 922 } 923 924 /* 925 * Extended attribute area writing. 926 */ 927 static int 928 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred) 929 { 930 struct inode *ip; 931 struct ufs2_dinode *dp; 932 struct fs *fs; 933 struct buf *bp; 934 ufs_lbn_t lbn; 935 off_t osize; 936 int blkoffset, error, flags, resid, size, xfersize; 937 938 GIANT_REQUIRED; 939 940 ip = VTOI(vp); 941 fs = ip->i_fs; 942 dp = ip->i_din2; 943 944 #ifdef DIAGNOSTIC 945 if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC) 946 panic("ffs_extwrite: mode"); 947 #endif 948 949 if (ioflag & IO_APPEND) 950 uio->uio_offset = dp->di_extsize; 951 KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0")); 952 KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0")); 953 if ((uoff_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize) 954 return (EFBIG); 955 956 resid = uio->uio_resid; 957 osize = dp->di_extsize; 958 flags = IO_EXT; 959 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 960 flags |= IO_SYNC; 961 962 for (error = 0; uio->uio_resid > 0;) { 963 lbn = lblkno(fs, uio->uio_offset); 964 blkoffset = blkoff(fs, uio->uio_offset); 965 xfersize = fs->fs_bsize - blkoffset; 966 if (uio->uio_resid < xfersize) 967 xfersize = uio->uio_resid; 968 969 /* 970 * We must perform a read-before-write if the transfer size 971 * does not cover the entire buffer. 972 */ 973 if (fs->fs_bsize > xfersize) 974 flags |= BA_CLRBUF; 975 else 976 flags &= ~BA_CLRBUF; 977 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 978 ucred, flags, &bp); 979 if (error != 0) 980 break; 981 /* 982 * If the buffer is not valid we have to clear out any 983 * garbage data from the pages instantiated for the buffer. 984 * If we do not, a failed uiomove() during a write can leave 985 * the prior contents of the pages exposed to a userland 986 * mmap(). XXX deal with uiomove() errors a better way. 987 */ 988 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 989 vfs_bio_clrbuf(bp); 990 if (ioflag & IO_DIRECT) 991 bp->b_flags |= B_DIRECT; 992 993 if (uio->uio_offset + xfersize > dp->di_extsize) 994 dp->di_extsize = uio->uio_offset + xfersize; 995 996 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid; 997 if (size < xfersize) 998 xfersize = size; 999 1000 error = 1001 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 1002 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1003 (LIST_FIRST(&bp->b_dep) == NULL)) { 1004 bp->b_flags |= B_RELBUF; 1005 } 1006 1007 /* 1008 * If IO_SYNC each buffer is written synchronously. Otherwise 1009 * if we have a severe page deficiency write the buffer 1010 * asynchronously. Otherwise try to cluster, and if that 1011 * doesn't do it then either do an async write (if O_DIRECT), 1012 * or a delayed write (if not). 1013 */ 1014 if (ioflag & IO_SYNC) { 1015 (void)bwrite(bp); 1016 } else if (vm_page_count_severe() || 1017 buf_dirty_count_severe() || 1018 xfersize + blkoffset == fs->fs_bsize || 1019 (ioflag & (IO_ASYNC | IO_DIRECT))) 1020 bawrite(bp); 1021 else 1022 bdwrite(bp); 1023 if (error || xfersize == 0) 1024 break; 1025 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1026 } 1027 /* 1028 * If we successfully wrote any data, and we are not the superuser 1029 * we clear the setuid and setgid bits as a precaution against 1030 * tampering. 1031 */ 1032 if (resid > uio->uio_resid && ucred && 1033 suser_cred(ucred, SUSER_ALLOWJAIL)) { 1034 ip->i_mode &= ~(ISUID | ISGID); 1035 dp->di_mode = ip->i_mode; 1036 } 1037 if (error) { 1038 if (ioflag & IO_UNIT) { 1039 (void)UFS_TRUNCATE(vp, osize, 1040 IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td); 1041 uio->uio_offset -= resid - uio->uio_resid; 1042 uio->uio_resid = resid; 1043 } 1044 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 1045 error = UFS_UPDATE(vp, 1); 1046 return (error); 1047 } 1048 1049 1050 /* 1051 * Vnode operating to retrieve a named extended attribute. 1052 * 1053 * Locate a particular EA (nspace:name) in the area (ptr:length), and return 1054 * the length of the EA, and possibly the pointer to the entry and to the data. 1055 */ 1056 static int 1057 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name, u_char **eap, u_char **eac) 1058 { 1059 u_char *p, *pe, *pn, *p0; 1060 int eapad1, eapad2, ealength, ealen, nlen; 1061 uint32_t ul; 1062 1063 pe = ptr + length; 1064 nlen = strlen(name); 1065 1066 for (p = ptr; p < pe; p = pn) { 1067 p0 = p; 1068 bcopy(p, &ul, sizeof(ul)); 1069 pn = p + ul; 1070 /* make sure this entry is complete */ 1071 if (pn > pe) 1072 break; 1073 p += sizeof(uint32_t); 1074 if (*p != nspace) 1075 continue; 1076 p++; 1077 eapad2 = *p++; 1078 if (*p != nlen) 1079 continue; 1080 p++; 1081 if (bcmp(p, name, nlen)) 1082 continue; 1083 ealength = sizeof(uint32_t) + 3 + nlen; 1084 eapad1 = 8 - (ealength % 8); 1085 if (eapad1 == 8) 1086 eapad1 = 0; 1087 ealength += eapad1; 1088 ealen = ul - ealength - eapad2; 1089 p += nlen + eapad1; 1090 if (eap != NULL) 1091 *eap = p0; 1092 if (eac != NULL) 1093 *eac = p; 1094 return (ealen); 1095 } 1096 return(-1); 1097 } 1098 1099 static int 1100 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra) 1101 { 1102 struct inode *ip; 1103 struct ufs2_dinode *dp; 1104 struct uio luio; 1105 struct iovec liovec; 1106 int easize, error; 1107 u_char *eae; 1108 1109 ip = VTOI(vp); 1110 dp = ip->i_din2; 1111 easize = dp->di_extsize; 1112 1113 eae = malloc(easize + extra, M_TEMP, M_WAITOK); 1114 1115 liovec.iov_base = eae; 1116 liovec.iov_len = easize; 1117 luio.uio_iov = &liovec; 1118 luio.uio_iovcnt = 1; 1119 luio.uio_offset = 0; 1120 luio.uio_resid = easize; 1121 luio.uio_segflg = UIO_SYSSPACE; 1122 luio.uio_rw = UIO_READ; 1123 luio.uio_td = td; 1124 1125 error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC); 1126 if (error) { 1127 free(eae, M_TEMP); 1128 return(error); 1129 } 1130 *p = eae; 1131 return (0); 1132 } 1133 1134 static int 1135 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td) 1136 { 1137 struct inode *ip; 1138 struct ufs2_dinode *dp; 1139 int error; 1140 1141 ip = VTOI(vp); 1142 1143 if (ip->i_ea_area != NULL) 1144 return (EBUSY); 1145 dp = ip->i_din2; 1146 error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0); 1147 if (error) 1148 return (error); 1149 ip->i_ea_len = dp->di_extsize; 1150 ip->i_ea_error = 0; 1151 return (0); 1152 } 1153 1154 /* 1155 * Vnode extattr transaction commit/abort 1156 */ 1157 static int 1158 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td) 1159 { 1160 struct inode *ip; 1161 struct uio luio; 1162 struct iovec liovec; 1163 int error; 1164 struct ufs2_dinode *dp; 1165 1166 ip = VTOI(vp); 1167 if (ip->i_ea_area == NULL) 1168 return (EINVAL); 1169 dp = ip->i_din2; 1170 error = ip->i_ea_error; 1171 if (commit && error == 0) { 1172 if (cred == NOCRED) 1173 cred = vp->v_mount->mnt_cred; 1174 liovec.iov_base = ip->i_ea_area; 1175 liovec.iov_len = ip->i_ea_len; 1176 luio.uio_iov = &liovec; 1177 luio.uio_iovcnt = 1; 1178 luio.uio_offset = 0; 1179 luio.uio_resid = ip->i_ea_len; 1180 luio.uio_segflg = UIO_SYSSPACE; 1181 luio.uio_rw = UIO_WRITE; 1182 luio.uio_td = td; 1183 /* XXX: I'm not happy about truncating to zero size */ 1184 if (ip->i_ea_len < dp->di_extsize) 1185 error = ffs_truncate(vp, 0, IO_EXT, cred, td); 1186 error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred); 1187 } 1188 free(ip->i_ea_area, M_TEMP); 1189 ip->i_ea_area = NULL; 1190 ip->i_ea_len = 0; 1191 ip->i_ea_error = 0; 1192 return (error); 1193 } 1194 1195 /* 1196 * Vnode extattr strategy routine for fifos. 1197 * 1198 * We need to check for a read or write of the external attributes. 1199 * Otherwise we just fall through and do the usual thing. 1200 */ 1201 static int 1202 ffsext_strategy(struct vop_strategy_args *ap) 1203 /* 1204 struct vop_strategy_args { 1205 struct vnodeop_desc *a_desc; 1206 struct vnode *a_vp; 1207 struct buf *a_bp; 1208 }; 1209 */ 1210 { 1211 struct vnode *vp; 1212 daddr_t lbn; 1213 1214 vp = ap->a_vp; 1215 lbn = ap->a_bp->b_lblkno; 1216 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC && 1217 lbn < 0 && lbn >= -NXADDR) 1218 return ufs_vnodeops.vop_strategy(ap); 1219 if (vp->v_type == VFIFO) 1220 return ufs_fifoops.vop_strategy(ap); 1221 panic("spec nodes went here"); 1222 } 1223 1224 /* 1225 * Vnode extattr transaction commit/abort 1226 */ 1227 static int 1228 ffs_openextattr(struct vop_openextattr_args *ap) 1229 /* 1230 struct vop_openextattr_args { 1231 struct vnodeop_desc *a_desc; 1232 struct vnode *a_vp; 1233 IN struct ucred *a_cred; 1234 IN struct thread *a_td; 1235 }; 1236 */ 1237 { 1238 struct inode *ip; 1239 struct fs *fs; 1240 1241 ip = VTOI(ap->a_vp); 1242 fs = ip->i_fs; 1243 if (fs->fs_magic == FS_UFS1_MAGIC) 1244 return (ufs_vnodeops.vop_openextattr(ap)); 1245 1246 if (ap->a_vp->v_type == VCHR) 1247 return (EOPNOTSUPP); 1248 1249 return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td)); 1250 } 1251 1252 1253 /* 1254 * Vnode extattr transaction commit/abort 1255 */ 1256 static int 1257 ffs_closeextattr(struct vop_closeextattr_args *ap) 1258 /* 1259 struct vop_closeextattr_args { 1260 struct vnodeop_desc *a_desc; 1261 struct vnode *a_vp; 1262 int a_commit; 1263 IN struct ucred *a_cred; 1264 IN struct thread *a_td; 1265 }; 1266 */ 1267 { 1268 struct inode *ip; 1269 struct fs *fs; 1270 1271 ip = VTOI(ap->a_vp); 1272 fs = ip->i_fs; 1273 if (fs->fs_magic == FS_UFS1_MAGIC) 1274 return (ufs_vnodeops.vop_closeextattr(ap)); 1275 1276 if (ap->a_vp->v_type == VCHR) 1277 return (EOPNOTSUPP); 1278 1279 return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td)); 1280 } 1281 1282 /* 1283 * Vnode operation to remove a named attribute. 1284 */ 1285 static int 1286 ffs_deleteextattr(struct vop_deleteextattr_args *ap) 1287 /* 1288 vop_deleteextattr { 1289 IN struct vnode *a_vp; 1290 IN int a_attrnamespace; 1291 IN const char *a_name; 1292 IN struct ucred *a_cred; 1293 IN struct thread *a_td; 1294 }; 1295 */ 1296 { 1297 struct inode *ip; 1298 struct fs *fs; 1299 uint32_t ealength, ul; 1300 int ealen, olen, eapad1, eapad2, error, i, easize; 1301 u_char *eae, *p; 1302 int stand_alone; 1303 1304 ip = VTOI(ap->a_vp); 1305 fs = ip->i_fs; 1306 1307 if (fs->fs_magic == FS_UFS1_MAGIC) 1308 return (ufs_vnodeops.vop_deleteextattr(ap)); 1309 1310 if (ap->a_vp->v_type == VCHR) 1311 return (EOPNOTSUPP); 1312 1313 if (strlen(ap->a_name) == 0) 1314 return (EINVAL); 1315 1316 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1317 ap->a_cred, ap->a_td, IWRITE); 1318 if (error) { 1319 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1320 ip->i_ea_error = error; 1321 return (error); 1322 } 1323 1324 if (ip->i_ea_area == NULL) { 1325 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1326 if (error) 1327 return (error); 1328 stand_alone = 1; 1329 } else { 1330 stand_alone = 0; 1331 } 1332 1333 ealength = eapad1 = ealen = eapad2 = 0; 1334 1335 eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK); 1336 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1337 easize = ip->i_ea_len; 1338 1339 olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1340 &p, NULL); 1341 if (olen == -1) { 1342 /* delete but nonexistent */ 1343 free(eae, M_TEMP); 1344 if (stand_alone) 1345 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1346 return(ENOATTR); 1347 } 1348 bcopy(p, &ul, sizeof ul); 1349 i = p - eae + ul; 1350 if (ul != ealength) { 1351 bcopy(p + ul, p + ealength, easize - i); 1352 easize += (ealength - ul); 1353 } 1354 if (easize > NXADDR * fs->fs_bsize) { 1355 free(eae, M_TEMP); 1356 if (stand_alone) 1357 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1358 else if (ip->i_ea_error == 0) 1359 ip->i_ea_error = ENOSPC; 1360 return(ENOSPC); 1361 } 1362 p = ip->i_ea_area; 1363 ip->i_ea_area = eae; 1364 ip->i_ea_len = easize; 1365 free(p, M_TEMP); 1366 if (stand_alone) 1367 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1368 return(error); 1369 } 1370 1371 /* 1372 * Vnode operation to retrieve a named extended attribute. 1373 */ 1374 static int 1375 ffs_getextattr(struct vop_getextattr_args *ap) 1376 /* 1377 vop_getextattr { 1378 IN struct vnode *a_vp; 1379 IN int a_attrnamespace; 1380 IN const char *a_name; 1381 INOUT struct uio *a_uio; 1382 OUT size_t *a_size; 1383 IN struct ucred *a_cred; 1384 IN struct thread *a_td; 1385 }; 1386 */ 1387 { 1388 struct inode *ip; 1389 struct fs *fs; 1390 u_char *eae, *p; 1391 unsigned easize; 1392 int error, ealen, stand_alone; 1393 1394 ip = VTOI(ap->a_vp); 1395 fs = ip->i_fs; 1396 1397 if (fs->fs_magic == FS_UFS1_MAGIC) 1398 return (ufs_vnodeops.vop_getextattr(ap)); 1399 1400 if (ap->a_vp->v_type == VCHR) 1401 return (EOPNOTSUPP); 1402 1403 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1404 ap->a_cred, ap->a_td, IREAD); 1405 if (error) 1406 return (error); 1407 1408 if (ip->i_ea_area == NULL) { 1409 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1410 if (error) 1411 return (error); 1412 stand_alone = 1; 1413 } else { 1414 stand_alone = 0; 1415 } 1416 eae = ip->i_ea_area; 1417 easize = ip->i_ea_len; 1418 1419 ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1420 NULL, &p); 1421 if (ealen >= 0) { 1422 error = 0; 1423 if (ap->a_size != NULL) 1424 *ap->a_size = ealen; 1425 else if (ap->a_uio != NULL) 1426 error = uiomove(p, ealen, ap->a_uio); 1427 } else 1428 error = ENOATTR; 1429 if (stand_alone) 1430 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1431 return(error); 1432 } 1433 1434 /* 1435 * Vnode operation to retrieve extended attributes on a vnode. 1436 */ 1437 static int 1438 ffs_listextattr(struct vop_listextattr_args *ap) 1439 /* 1440 vop_listextattr { 1441 IN struct vnode *a_vp; 1442 IN int a_attrnamespace; 1443 INOUT struct uio *a_uio; 1444 OUT size_t *a_size; 1445 IN struct ucred *a_cred; 1446 IN struct thread *a_td; 1447 }; 1448 */ 1449 { 1450 struct inode *ip; 1451 struct fs *fs; 1452 u_char *eae, *p, *pe, *pn; 1453 unsigned easize; 1454 uint32_t ul; 1455 int error, ealen, stand_alone; 1456 1457 ip = VTOI(ap->a_vp); 1458 fs = ip->i_fs; 1459 1460 if (fs->fs_magic == FS_UFS1_MAGIC) 1461 return (ufs_vnodeops.vop_listextattr(ap)); 1462 1463 if (ap->a_vp->v_type == VCHR) 1464 return (EOPNOTSUPP); 1465 1466 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1467 ap->a_cred, ap->a_td, IREAD); 1468 if (error) 1469 return (error); 1470 1471 if (ip->i_ea_area == NULL) { 1472 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1473 if (error) 1474 return (error); 1475 stand_alone = 1; 1476 } else { 1477 stand_alone = 0; 1478 } 1479 eae = ip->i_ea_area; 1480 easize = ip->i_ea_len; 1481 1482 error = 0; 1483 if (ap->a_size != NULL) 1484 *ap->a_size = 0; 1485 pe = eae + easize; 1486 for(p = eae; error == 0 && p < pe; p = pn) { 1487 bcopy(p, &ul, sizeof(ul)); 1488 pn = p + ul; 1489 if (pn > pe) 1490 break; 1491 p += sizeof(ul); 1492 if (*p++ != ap->a_attrnamespace) 1493 continue; 1494 p++; /* pad2 */ 1495 ealen = *p; 1496 if (ap->a_size != NULL) { 1497 *ap->a_size += ealen + 1; 1498 } else if (ap->a_uio != NULL) { 1499 error = uiomove(p, ealen + 1, ap->a_uio); 1500 } 1501 } 1502 if (stand_alone) 1503 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1504 return(error); 1505 } 1506 1507 /* 1508 * Vnode operation to set a named attribute. 1509 */ 1510 static int 1511 ffs_setextattr(struct vop_setextattr_args *ap) 1512 /* 1513 vop_setextattr { 1514 IN struct vnode *a_vp; 1515 IN int a_attrnamespace; 1516 IN const char *a_name; 1517 INOUT struct uio *a_uio; 1518 IN struct ucred *a_cred; 1519 IN struct thread *a_td; 1520 }; 1521 */ 1522 { 1523 struct inode *ip; 1524 struct fs *fs; 1525 uint32_t ealength, ul; 1526 int ealen, olen, eapad1, eapad2, error, i, easize; 1527 u_char *eae, *p; 1528 int stand_alone; 1529 1530 ip = VTOI(ap->a_vp); 1531 fs = ip->i_fs; 1532 1533 if (fs->fs_magic == FS_UFS1_MAGIC) 1534 return (ufs_vnodeops.vop_setextattr(ap)); 1535 1536 if (ap->a_vp->v_type == VCHR) 1537 return (EOPNOTSUPP); 1538 1539 if (strlen(ap->a_name) == 0) 1540 return (EINVAL); 1541 1542 /* XXX Now unsupported API to delete EAs using NULL uio. */ 1543 if (ap->a_uio == NULL) 1544 return (EOPNOTSUPP); 1545 1546 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1547 ap->a_cred, ap->a_td, IWRITE); 1548 if (error) { 1549 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1550 ip->i_ea_error = error; 1551 return (error); 1552 } 1553 1554 if (ip->i_ea_area == NULL) { 1555 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1556 if (error) 1557 return (error); 1558 stand_alone = 1; 1559 } else { 1560 stand_alone = 0; 1561 } 1562 1563 ealen = ap->a_uio->uio_resid; 1564 ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name); 1565 eapad1 = 8 - (ealength % 8); 1566 if (eapad1 == 8) 1567 eapad1 = 0; 1568 eapad2 = 8 - (ealen % 8); 1569 if (eapad2 == 8) 1570 eapad2 = 0; 1571 ealength += eapad1 + ealen + eapad2; 1572 1573 eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK); 1574 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1575 easize = ip->i_ea_len; 1576 1577 olen = ffs_findextattr(eae, easize, 1578 ap->a_attrnamespace, ap->a_name, &p, NULL); 1579 if (olen == -1) { 1580 /* new, append at end */ 1581 p = eae + easize; 1582 easize += ealength; 1583 } else { 1584 bcopy(p, &ul, sizeof ul); 1585 i = p - eae + ul; 1586 if (ul != ealength) { 1587 bcopy(p + ul, p + ealength, easize - i); 1588 easize += (ealength - ul); 1589 } 1590 } 1591 if (easize > NXADDR * fs->fs_bsize) { 1592 free(eae, M_TEMP); 1593 if (stand_alone) 1594 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1595 else if (ip->i_ea_error == 0) 1596 ip->i_ea_error = ENOSPC; 1597 return(ENOSPC); 1598 } 1599 bcopy(&ealength, p, sizeof(ealength)); 1600 p += sizeof(ealength); 1601 *p++ = ap->a_attrnamespace; 1602 *p++ = eapad2; 1603 *p++ = strlen(ap->a_name); 1604 strcpy(p, ap->a_name); 1605 p += strlen(ap->a_name); 1606 bzero(p, eapad1); 1607 p += eapad1; 1608 error = uiomove(p, ealen, ap->a_uio); 1609 if (error) { 1610 free(eae, M_TEMP); 1611 if (stand_alone) 1612 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1613 else if (ip->i_ea_error == 0) 1614 ip->i_ea_error = error; 1615 return(error); 1616 } 1617 p += ealen; 1618 bzero(p, eapad2); 1619 1620 p = ip->i_ea_area; 1621 ip->i_ea_area = eae; 1622 ip->i_ea_len = easize; 1623 free(p, M_TEMP); 1624 if (stand_alone) 1625 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1626 return(error); 1627 } 1628