1 /*- 2 * Copyright (c) 2002, 2003 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * from: @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 60 * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ... 61 * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __FBSDID("$FreeBSD$"); 66 67 #include <sys/param.h> 68 #include <sys/bio.h> 69 #include <sys/systm.h> 70 #include <sys/buf.h> 71 #include <sys/conf.h> 72 #include <sys/extattr.h> 73 #include <sys/kernel.h> 74 #include <sys/limits.h> 75 #include <sys/malloc.h> 76 #include <sys/mount.h> 77 #include <sys/proc.h> 78 #include <sys/resourcevar.h> 79 #include <sys/signalvar.h> 80 #include <sys/stat.h> 81 #include <sys/vmmeter.h> 82 #include <sys/vnode.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_extern.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/vm_pager.h> 89 #include <vm/vnode_pager.h> 90 91 #include <ufs/ufs/extattr.h> 92 #include <ufs/ufs/quota.h> 93 #include <ufs/ufs/inode.h> 94 #include <ufs/ufs/ufs_extern.h> 95 #include <ufs/ufs/ufsmount.h> 96 97 #include <ufs/ffs/fs.h> 98 #include <ufs/ffs/ffs_extern.h> 99 #include "opt_directio.h" 100 #include "opt_ffs.h" 101 102 #ifdef DIRECTIO 103 extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone); 104 #endif 105 static vop_fsync_t ffs_fsync; 106 static vop_lock_t ffs_lock; 107 static vop_getpages_t ffs_getpages; 108 static vop_read_t ffs_read; 109 static vop_write_t ffs_write; 110 static int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag); 111 static int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, 112 struct ucred *cred); 113 static vop_strategy_t ffsext_strategy; 114 static vop_closeextattr_t ffs_closeextattr; 115 static vop_deleteextattr_t ffs_deleteextattr; 116 static vop_getextattr_t ffs_getextattr; 117 static vop_listextattr_t ffs_listextattr; 118 static vop_openextattr_t ffs_openextattr; 119 static vop_setextattr_t ffs_setextattr; 120 121 122 /* Global vfs data structures for ufs. */ 123 struct vop_vector ffs_vnodeops1 = { 124 .vop_default = &ufs_vnodeops, 125 .vop_fsync = ffs_fsync, 126 .vop_getpages = ffs_getpages, 127 .vop_lock = ffs_lock, 128 .vop_read = ffs_read, 129 .vop_reallocblks = ffs_reallocblks, 130 .vop_write = ffs_write, 131 }; 132 133 struct vop_vector ffs_fifoops1 = { 134 .vop_default = &ufs_fifoops, 135 .vop_fsync = ffs_fsync, 136 .vop_reallocblks = ffs_reallocblks, /* XXX: really ??? */ 137 }; 138 139 /* Global vfs data structures for ufs. */ 140 struct vop_vector ffs_vnodeops2 = { 141 .vop_default = &ufs_vnodeops, 142 .vop_fsync = ffs_fsync, 143 .vop_getpages = ffs_getpages, 144 .vop_lock = ffs_lock, 145 .vop_read = ffs_read, 146 .vop_reallocblks = ffs_reallocblks, 147 .vop_write = ffs_write, 148 .vop_closeextattr = ffs_closeextattr, 149 .vop_deleteextattr = ffs_deleteextattr, 150 .vop_getextattr = ffs_getextattr, 151 .vop_listextattr = ffs_listextattr, 152 .vop_openextattr = ffs_openextattr, 153 .vop_setextattr = ffs_setextattr, 154 }; 155 156 struct vop_vector ffs_fifoops2 = { 157 .vop_default = &ufs_fifoops, 158 .vop_fsync = ffs_fsync, 159 .vop_lock = ffs_lock, 160 .vop_reallocblks = ffs_reallocblks, 161 .vop_strategy = ffsext_strategy, 162 .vop_closeextattr = ffs_closeextattr, 163 .vop_deleteextattr = ffs_deleteextattr, 164 .vop_getextattr = ffs_getextattr, 165 .vop_listextattr = ffs_listextattr, 166 .vop_openextattr = ffs_openextattr, 167 .vop_setextattr = ffs_setextattr, 168 }; 169 170 /* 171 * Synch an open file. 172 */ 173 /* ARGSUSED */ 174 static int 175 ffs_fsync(struct vop_fsync_args *ap) 176 { 177 int error; 178 179 error = ffs_syncvnode(ap->a_vp, ap->a_waitfor); 180 if (error) 181 return (error); 182 if (ap->a_waitfor == MNT_WAIT && 183 (ap->a_vp->v_mount->mnt_flag & MNT_SOFTDEP)) 184 error = softdep_fsync(ap->a_vp); 185 return (error); 186 } 187 188 int 189 ffs_syncvnode(struct vnode *vp, int waitfor) 190 { 191 struct inode *ip = VTOI(vp); 192 struct buf *bp; 193 struct buf *nbp; 194 int s, error, wait, passes, skipmeta; 195 ufs_lbn_t lbn; 196 197 wait = (waitfor == MNT_WAIT); 198 lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); 199 200 /* 201 * Flush all dirty buffers associated with a vnode. 202 */ 203 passes = NIADDR + 1; 204 skipmeta = 0; 205 if (wait) 206 skipmeta = 1; 207 s = splbio(); 208 VI_LOCK(vp); 209 loop: 210 TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) 211 bp->b_vflags &= ~BV_SCANNED; 212 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { 213 /* 214 * Reasons to skip this buffer: it has already been considered 215 * on this pass, this pass is the first time through on a 216 * synchronous flush request and the buffer being considered 217 * is metadata, the buffer has dependencies that will cause 218 * it to be redirtied and it has not already been deferred, 219 * or it is already being written. 220 */ 221 if ((bp->b_vflags & BV_SCANNED) != 0) 222 continue; 223 bp->b_vflags |= BV_SCANNED; 224 if ((skipmeta == 1 && bp->b_lblkno < 0)) 225 continue; 226 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) 227 continue; 228 VI_UNLOCK(vp); 229 if (!wait && LIST_FIRST(&bp->b_dep) != NULL && 230 (bp->b_flags & B_DEFERRED) == 0 && 231 buf_countdeps(bp, 0)) { 232 bp->b_flags |= B_DEFERRED; 233 BUF_UNLOCK(bp); 234 VI_LOCK(vp); 235 continue; 236 } 237 if ((bp->b_flags & B_DELWRI) == 0) 238 panic("ffs_fsync: not dirty"); 239 /* 240 * If this is a synchronous flush request, or it is not a 241 * file or device, start the write on this buffer immediatly. 242 */ 243 if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) { 244 245 /* 246 * On our final pass through, do all I/O synchronously 247 * so that we can find out if our flush is failing 248 * because of write errors. 249 */ 250 if (passes > 0 || !wait) { 251 if ((bp->b_flags & B_CLUSTEROK) && !wait) { 252 (void) vfs_bio_awrite(bp); 253 } else { 254 bremfree(bp); 255 splx(s); 256 (void) bawrite(bp); 257 s = splbio(); 258 } 259 } else { 260 bremfree(bp); 261 splx(s); 262 if ((error = bwrite(bp)) != 0) 263 return (error); 264 s = splbio(); 265 } 266 } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) { 267 /* 268 * If the buffer is for data that has been truncated 269 * off the file, then throw it away. 270 */ 271 bremfree(bp); 272 bp->b_flags |= B_INVAL | B_NOCACHE; 273 splx(s); 274 brelse(bp); 275 s = splbio(); 276 } else 277 vfs_bio_awrite(bp); 278 279 /* 280 * Since we may have slept during the I/O, we need 281 * to start from a known point. 282 */ 283 VI_LOCK(vp); 284 nbp = TAILQ_FIRST(&vp->v_bufobj.bo_dirty.bv_hd); 285 } 286 /* 287 * If we were asked to do this synchronously, then go back for 288 * another pass, this time doing the metadata. 289 */ 290 if (skipmeta) { 291 skipmeta = 0; 292 goto loop; 293 } 294 295 if (wait) { 296 bufobj_wwait(&vp->v_bufobj, 3, 0); 297 VI_UNLOCK(vp); 298 299 /* 300 * Ensure that any filesystem metatdata associated 301 * with the vnode has been written. 302 */ 303 splx(s); 304 if ((error = softdep_sync_metadata(vp)) != 0) 305 return (error); 306 s = splbio(); 307 308 VI_LOCK(vp); 309 if (vp->v_bufobj.bo_dirty.bv_cnt > 0) { 310 /* 311 * Block devices associated with filesystems may 312 * have new I/O requests posted for them even if 313 * the vnode is locked, so no amount of trying will 314 * get them clean. Thus we give block devices a 315 * good effort, then just give up. For all other file 316 * types, go around and try again until it is clean. 317 */ 318 if (passes > 0) { 319 passes -= 1; 320 goto loop; 321 } 322 #ifdef DIAGNOSTIC 323 if (!vn_isdisk(vp, NULL)) 324 vprint("ffs_fsync: dirty", vp); 325 #endif 326 } 327 } 328 VI_UNLOCK(vp); 329 splx(s); 330 return (ffs_update(vp, wait)); 331 } 332 333 static int 334 ffs_lock(ap) 335 struct vop_lock_args /* { 336 struct vnode *a_vp; 337 int a_flags; 338 struct thread *a_td; 339 } */ *ap; 340 { 341 return (VOP_LOCK_APV(&ufs_vnodeops, ap)); 342 } 343 344 /* 345 * Vnode op for reading. 346 */ 347 /* ARGSUSED */ 348 static int 349 ffs_read(ap) 350 struct vop_read_args /* { 351 struct vnode *a_vp; 352 struct uio *a_uio; 353 int a_ioflag; 354 struct ucred *a_cred; 355 } */ *ap; 356 { 357 struct vnode *vp; 358 struct inode *ip; 359 struct uio *uio; 360 struct fs *fs; 361 struct buf *bp; 362 ufs_lbn_t lbn, nextlbn; 363 off_t bytesinfile; 364 long size, xfersize, blkoffset; 365 int error, orig_resid; 366 int seqcount; 367 int ioflag; 368 369 vp = ap->a_vp; 370 uio = ap->a_uio; 371 ioflag = ap->a_ioflag; 372 if (ap->a_ioflag & IO_EXT) 373 #ifdef notyet 374 return (ffs_extread(vp, uio, ioflag)); 375 #else 376 panic("ffs_read+IO_EXT"); 377 #endif 378 #ifdef DIRECTIO 379 if ((ioflag & IO_DIRECT) != 0) { 380 int workdone; 381 382 error = ffs_rawread(vp, uio, &workdone); 383 if (error != 0 || workdone != 0) 384 return error; 385 } 386 #endif 387 388 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 389 ip = VTOI(vp); 390 391 #ifdef DIAGNOSTIC 392 if (uio->uio_rw != UIO_READ) 393 panic("ffs_read: mode"); 394 395 if (vp->v_type == VLNK) { 396 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 397 panic("ffs_read: short symlink"); 398 } else if (vp->v_type != VREG && vp->v_type != VDIR) 399 panic("ffs_read: type %d", vp->v_type); 400 #endif 401 orig_resid = uio->uio_resid; 402 KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0")); 403 if (orig_resid == 0) 404 return (0); 405 KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0")); 406 fs = ip->i_fs; 407 if (uio->uio_offset < ip->i_size && 408 uio->uio_offset >= fs->fs_maxfilesize) 409 return (EOVERFLOW); 410 411 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 412 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 413 break; 414 lbn = lblkno(fs, uio->uio_offset); 415 nextlbn = lbn + 1; 416 417 /* 418 * size of buffer. The buffer representing the 419 * end of the file is rounded up to the size of 420 * the block type ( fragment or full block, 421 * depending ). 422 */ 423 size = blksize(fs, ip, lbn); 424 blkoffset = blkoff(fs, uio->uio_offset); 425 426 /* 427 * The amount we want to transfer in this iteration is 428 * one FS block less the amount of the data before 429 * our startpoint (duh!) 430 */ 431 xfersize = fs->fs_bsize - blkoffset; 432 433 /* 434 * But if we actually want less than the block, 435 * or the file doesn't have a whole block more of data, 436 * then use the lesser number. 437 */ 438 if (uio->uio_resid < xfersize) 439 xfersize = uio->uio_resid; 440 if (bytesinfile < xfersize) 441 xfersize = bytesinfile; 442 443 if (lblktosize(fs, nextlbn) >= ip->i_size) { 444 /* 445 * Don't do readahead if this is the end of the file. 446 */ 447 error = bread(vp, lbn, size, NOCRED, &bp); 448 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 449 /* 450 * Otherwise if we are allowed to cluster, 451 * grab as much as we can. 452 * 453 * XXX This may not be a win if we are not 454 * doing sequential access. 455 */ 456 error = cluster_read(vp, ip->i_size, lbn, 457 size, NOCRED, uio->uio_resid, seqcount, &bp); 458 } else if (seqcount > 1) { 459 /* 460 * If we are NOT allowed to cluster, then 461 * if we appear to be acting sequentially, 462 * fire off a request for a readahead 463 * as well as a read. Note that the 4th and 5th 464 * arguments point to arrays of the size specified in 465 * the 6th argument. 466 */ 467 int nextsize = blksize(fs, ip, nextlbn); 468 error = breadn(vp, lbn, 469 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 470 } else { 471 /* 472 * Failing all of the above, just read what the 473 * user asked for. Interestingly, the same as 474 * the first option above. 475 */ 476 error = bread(vp, lbn, size, NOCRED, &bp); 477 } 478 if (error) { 479 brelse(bp); 480 bp = NULL; 481 break; 482 } 483 484 /* 485 * If IO_DIRECT then set B_DIRECT for the buffer. This 486 * will cause us to attempt to release the buffer later on 487 * and will cause the buffer cache to attempt to free the 488 * underlying pages. 489 */ 490 if (ioflag & IO_DIRECT) 491 bp->b_flags |= B_DIRECT; 492 493 /* 494 * We should only get non-zero b_resid when an I/O error 495 * has occurred, which should cause us to break above. 496 * However, if the short read did not cause an error, 497 * then we want to ensure that we do not uiomove bad 498 * or uninitialized data. 499 */ 500 size -= bp->b_resid; 501 if (size < xfersize) { 502 if (size == 0) 503 break; 504 xfersize = size; 505 } 506 507 error = uiomove((char *)bp->b_data + blkoffset, 508 (int)xfersize, uio); 509 if (error) 510 break; 511 512 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 513 (LIST_FIRST(&bp->b_dep) == NULL)) { 514 /* 515 * If there are no dependencies, and it's VMIO, 516 * then we don't need the buf, mark it available 517 * for freeing. The VM has the data. 518 */ 519 bp->b_flags |= B_RELBUF; 520 brelse(bp); 521 } else { 522 /* 523 * Otherwise let whoever 524 * made the request take care of 525 * freeing it. We just queue 526 * it onto another list. 527 */ 528 bqrelse(bp); 529 } 530 } 531 532 /* 533 * This can only happen in the case of an error 534 * because the loop above resets bp to NULL on each iteration 535 * and on normal completion has not set a new value into it. 536 * so it must have come from a 'break' statement 537 */ 538 if (bp != NULL) { 539 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 540 (LIST_FIRST(&bp->b_dep) == NULL)) { 541 bp->b_flags |= B_RELBUF; 542 brelse(bp); 543 } else { 544 bqrelse(bp); 545 } 546 } 547 548 if ((error == 0 || uio->uio_resid != orig_resid) && 549 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 550 ip->i_flag |= IN_ACCESS; 551 return (error); 552 } 553 554 /* 555 * Vnode op for writing. 556 */ 557 static int 558 ffs_write(ap) 559 struct vop_write_args /* { 560 struct vnode *a_vp; 561 struct uio *a_uio; 562 int a_ioflag; 563 struct ucred *a_cred; 564 } */ *ap; 565 { 566 struct vnode *vp; 567 struct uio *uio; 568 struct inode *ip; 569 struct fs *fs; 570 struct buf *bp; 571 struct thread *td; 572 ufs_lbn_t lbn; 573 off_t osize; 574 int seqcount; 575 int blkoffset, error, extended, flags, ioflag, resid, size, xfersize; 576 577 vp = ap->a_vp; 578 uio = ap->a_uio; 579 ioflag = ap->a_ioflag; 580 if (ap->a_ioflag & IO_EXT) 581 #ifdef notyet 582 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred)); 583 #else 584 panic("ffs_write+IO_EXT"); 585 #endif 586 587 extended = 0; 588 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 589 ip = VTOI(vp); 590 591 #ifdef DIAGNOSTIC 592 if (uio->uio_rw != UIO_WRITE) 593 panic("ffs_write: mode"); 594 #endif 595 596 switch (vp->v_type) { 597 case VREG: 598 if (ioflag & IO_APPEND) 599 uio->uio_offset = ip->i_size; 600 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 601 return (EPERM); 602 /* FALLTHROUGH */ 603 case VLNK: 604 break; 605 case VDIR: 606 panic("ffs_write: dir write"); 607 break; 608 default: 609 panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type, 610 (int)uio->uio_offset, 611 (int)uio->uio_resid 612 ); 613 } 614 615 KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0")); 616 KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0")); 617 fs = ip->i_fs; 618 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) 619 return (EFBIG); 620 /* 621 * Maybe this should be above the vnode op call, but so long as 622 * file servers have no limits, I don't think it matters. 623 */ 624 td = uio->uio_td; 625 if (vp->v_type == VREG && td != NULL) { 626 PROC_LOCK(td->td_proc); 627 if (uio->uio_offset + uio->uio_resid > 628 lim_cur(td->td_proc, RLIMIT_FSIZE)) { 629 psignal(td->td_proc, SIGXFSZ); 630 PROC_UNLOCK(td->td_proc); 631 return (EFBIG); 632 } 633 PROC_UNLOCK(td->td_proc); 634 } 635 636 resid = uio->uio_resid; 637 osize = ip->i_size; 638 if (seqcount > BA_SEQMAX) 639 flags = BA_SEQMAX << BA_SEQSHIFT; 640 else 641 flags = seqcount << BA_SEQSHIFT; 642 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 643 flags |= IO_SYNC; 644 645 for (error = 0; uio->uio_resid > 0;) { 646 lbn = lblkno(fs, uio->uio_offset); 647 blkoffset = blkoff(fs, uio->uio_offset); 648 xfersize = fs->fs_bsize - blkoffset; 649 if (uio->uio_resid < xfersize) 650 xfersize = uio->uio_resid; 651 if (uio->uio_offset + xfersize > ip->i_size) 652 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 653 654 /* 655 * We must perform a read-before-write if the transfer size 656 * does not cover the entire buffer. 657 */ 658 if (fs->fs_bsize > xfersize) 659 flags |= BA_CLRBUF; 660 else 661 flags &= ~BA_CLRBUF; 662 /* XXX is uio->uio_offset the right thing here? */ 663 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 664 ap->a_cred, flags, &bp); 665 if (error != 0) 666 break; 667 /* 668 * If the buffer is not valid we have to clear out any 669 * garbage data from the pages instantiated for the buffer. 670 * If we do not, a failed uiomove() during a write can leave 671 * the prior contents of the pages exposed to a userland 672 * mmap(). XXX deal with uiomove() errors a better way. 673 */ 674 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 675 vfs_bio_clrbuf(bp); 676 if (ioflag & IO_DIRECT) 677 bp->b_flags |= B_DIRECT; 678 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL)) 679 bp->b_flags |= B_NOCACHE; 680 681 if (uio->uio_offset + xfersize > ip->i_size) { 682 ip->i_size = uio->uio_offset + xfersize; 683 DIP_SET(ip, i_size, ip->i_size); 684 extended = 1; 685 } 686 687 size = blksize(fs, ip, lbn) - bp->b_resid; 688 if (size < xfersize) 689 xfersize = size; 690 691 error = 692 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 693 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 694 (LIST_FIRST(&bp->b_dep) == NULL)) { 695 bp->b_flags |= B_RELBUF; 696 } 697 698 /* 699 * If IO_SYNC each buffer is written synchronously. Otherwise 700 * if we have a severe page deficiency write the buffer 701 * asynchronously. Otherwise try to cluster, and if that 702 * doesn't do it then either do an async write (if O_DIRECT), 703 * or a delayed write (if not). 704 */ 705 if (ioflag & IO_SYNC) { 706 (void)bwrite(bp); 707 } else if (vm_page_count_severe() || 708 buf_dirty_count_severe() || 709 (ioflag & IO_ASYNC)) { 710 bp->b_flags |= B_CLUSTEROK; 711 bawrite(bp); 712 } else if (xfersize + blkoffset == fs->fs_bsize) { 713 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 714 bp->b_flags |= B_CLUSTEROK; 715 cluster_write(vp, bp, ip->i_size, seqcount); 716 } else { 717 bawrite(bp); 718 } 719 } else if (ioflag & IO_DIRECT) { 720 bp->b_flags |= B_CLUSTEROK; 721 bawrite(bp); 722 } else { 723 bp->b_flags |= B_CLUSTEROK; 724 bdwrite(bp); 725 } 726 if (error || xfersize == 0) 727 break; 728 ip->i_flag |= IN_CHANGE | IN_UPDATE; 729 } 730 /* 731 * If we successfully wrote any data, and we are not the superuser 732 * we clear the setuid and setgid bits as a precaution against 733 * tampering. 734 */ 735 if (resid > uio->uio_resid && ap->a_cred && 736 suser_cred(ap->a_cred, SUSER_ALLOWJAIL)) { 737 ip->i_mode &= ~(ISUID | ISGID); 738 DIP_SET(ip, i_mode, ip->i_mode); 739 } 740 if (resid > uio->uio_resid) 741 VN_KNOTE_UNLOCKED(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); 742 if (error) { 743 if (ioflag & IO_UNIT) { 744 (void)ffs_truncate(vp, osize, 745 IO_NORMAL | (ioflag & IO_SYNC), 746 ap->a_cred, uio->uio_td); 747 uio->uio_offset -= resid - uio->uio_resid; 748 uio->uio_resid = resid; 749 } 750 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 751 error = ffs_update(vp, 1); 752 return (error); 753 } 754 755 /* 756 * get page routine 757 */ 758 static int 759 ffs_getpages(ap) 760 struct vop_getpages_args *ap; 761 { 762 int i; 763 vm_page_t mreq; 764 int pcount; 765 766 pcount = round_page(ap->a_count) / PAGE_SIZE; 767 mreq = ap->a_m[ap->a_reqpage]; 768 769 /* 770 * if ANY DEV_BSIZE blocks are valid on a large filesystem block, 771 * then the entire page is valid. Since the page may be mapped, 772 * user programs might reference data beyond the actual end of file 773 * occuring within the page. We have to zero that data. 774 */ 775 VM_OBJECT_LOCK(mreq->object); 776 if (mreq->valid) { 777 if (mreq->valid != VM_PAGE_BITS_ALL) 778 vm_page_zero_invalid(mreq, TRUE); 779 vm_page_lock_queues(); 780 for (i = 0; i < pcount; i++) { 781 if (i != ap->a_reqpage) { 782 vm_page_free(ap->a_m[i]); 783 } 784 } 785 vm_page_unlock_queues(); 786 VM_OBJECT_UNLOCK(mreq->object); 787 return VM_PAGER_OK; 788 } 789 VM_OBJECT_UNLOCK(mreq->object); 790 791 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 792 ap->a_count, 793 ap->a_reqpage); 794 } 795 796 797 /* 798 * Extended attribute area reading. 799 */ 800 static int 801 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag) 802 { 803 struct inode *ip; 804 struct ufs2_dinode *dp; 805 struct fs *fs; 806 struct buf *bp; 807 ufs_lbn_t lbn, nextlbn; 808 off_t bytesinfile; 809 long size, xfersize, blkoffset; 810 int error, orig_resid; 811 812 ip = VTOI(vp); 813 fs = ip->i_fs; 814 dp = ip->i_din2; 815 816 #ifdef DIAGNOSTIC 817 if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC) 818 panic("ffs_extread: mode"); 819 820 #endif 821 orig_resid = uio->uio_resid; 822 KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0")); 823 if (orig_resid == 0) 824 return (0); 825 KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0")); 826 827 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 828 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0) 829 break; 830 lbn = lblkno(fs, uio->uio_offset); 831 nextlbn = lbn + 1; 832 833 /* 834 * size of buffer. The buffer representing the 835 * end of the file is rounded up to the size of 836 * the block type ( fragment or full block, 837 * depending ). 838 */ 839 size = sblksize(fs, dp->di_extsize, lbn); 840 blkoffset = blkoff(fs, uio->uio_offset); 841 842 /* 843 * The amount we want to transfer in this iteration is 844 * one FS block less the amount of the data before 845 * our startpoint (duh!) 846 */ 847 xfersize = fs->fs_bsize - blkoffset; 848 849 /* 850 * But if we actually want less than the block, 851 * or the file doesn't have a whole block more of data, 852 * then use the lesser number. 853 */ 854 if (uio->uio_resid < xfersize) 855 xfersize = uio->uio_resid; 856 if (bytesinfile < xfersize) 857 xfersize = bytesinfile; 858 859 if (lblktosize(fs, nextlbn) >= dp->di_extsize) { 860 /* 861 * Don't do readahead if this is the end of the info. 862 */ 863 error = bread(vp, -1 - lbn, size, NOCRED, &bp); 864 } else { 865 /* 866 * If we have a second block, then 867 * fire off a request for a readahead 868 * as well as a read. Note that the 4th and 5th 869 * arguments point to arrays of the size specified in 870 * the 6th argument. 871 */ 872 int nextsize = sblksize(fs, dp->di_extsize, nextlbn); 873 874 nextlbn = -1 - nextlbn; 875 error = breadn(vp, -1 - lbn, 876 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 877 } 878 if (error) { 879 brelse(bp); 880 bp = NULL; 881 break; 882 } 883 884 /* 885 * If IO_DIRECT then set B_DIRECT for the buffer. This 886 * will cause us to attempt to release the buffer later on 887 * and will cause the buffer cache to attempt to free the 888 * underlying pages. 889 */ 890 if (ioflag & IO_DIRECT) 891 bp->b_flags |= B_DIRECT; 892 893 /* 894 * We should only get non-zero b_resid when an I/O error 895 * has occurred, which should cause us to break above. 896 * However, if the short read did not cause an error, 897 * then we want to ensure that we do not uiomove bad 898 * or uninitialized data. 899 */ 900 size -= bp->b_resid; 901 if (size < xfersize) { 902 if (size == 0) 903 break; 904 xfersize = size; 905 } 906 907 error = uiomove((char *)bp->b_data + blkoffset, 908 (int)xfersize, uio); 909 if (error) 910 break; 911 912 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 913 (LIST_FIRST(&bp->b_dep) == NULL)) { 914 /* 915 * If there are no dependencies, and it's VMIO, 916 * then we don't need the buf, mark it available 917 * for freeing. The VM has the data. 918 */ 919 bp->b_flags |= B_RELBUF; 920 brelse(bp); 921 } else { 922 /* 923 * Otherwise let whoever 924 * made the request take care of 925 * freeing it. We just queue 926 * it onto another list. 927 */ 928 bqrelse(bp); 929 } 930 } 931 932 /* 933 * This can only happen in the case of an error 934 * because the loop above resets bp to NULL on each iteration 935 * and on normal completion has not set a new value into it. 936 * so it must have come from a 'break' statement 937 */ 938 if (bp != NULL) { 939 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 940 (LIST_FIRST(&bp->b_dep) == NULL)) { 941 bp->b_flags |= B_RELBUF; 942 brelse(bp); 943 } else { 944 bqrelse(bp); 945 } 946 } 947 948 if ((error == 0 || uio->uio_resid != orig_resid) && 949 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 950 ip->i_flag |= IN_ACCESS; 951 return (error); 952 } 953 954 /* 955 * Extended attribute area writing. 956 */ 957 static int 958 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred) 959 { 960 struct inode *ip; 961 struct ufs2_dinode *dp; 962 struct fs *fs; 963 struct buf *bp; 964 ufs_lbn_t lbn; 965 off_t osize; 966 int blkoffset, error, flags, resid, size, xfersize; 967 968 ip = VTOI(vp); 969 fs = ip->i_fs; 970 dp = ip->i_din2; 971 972 #ifdef DIAGNOSTIC 973 if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC) 974 panic("ffs_extwrite: mode"); 975 #endif 976 977 if (ioflag & IO_APPEND) 978 uio->uio_offset = dp->di_extsize; 979 KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0")); 980 KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0")); 981 if ((uoff_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize) 982 return (EFBIG); 983 984 resid = uio->uio_resid; 985 osize = dp->di_extsize; 986 flags = IO_EXT; 987 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 988 flags |= IO_SYNC; 989 990 for (error = 0; uio->uio_resid > 0;) { 991 lbn = lblkno(fs, uio->uio_offset); 992 blkoffset = blkoff(fs, uio->uio_offset); 993 xfersize = fs->fs_bsize - blkoffset; 994 if (uio->uio_resid < xfersize) 995 xfersize = uio->uio_resid; 996 997 /* 998 * We must perform a read-before-write if the transfer size 999 * does not cover the entire buffer. 1000 */ 1001 if (fs->fs_bsize > xfersize) 1002 flags |= BA_CLRBUF; 1003 else 1004 flags &= ~BA_CLRBUF; 1005 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 1006 ucred, flags, &bp); 1007 if (error != 0) 1008 break; 1009 /* 1010 * If the buffer is not valid we have to clear out any 1011 * garbage data from the pages instantiated for the buffer. 1012 * If we do not, a failed uiomove() during a write can leave 1013 * the prior contents of the pages exposed to a userland 1014 * mmap(). XXX deal with uiomove() errors a better way. 1015 */ 1016 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 1017 vfs_bio_clrbuf(bp); 1018 if (ioflag & IO_DIRECT) 1019 bp->b_flags |= B_DIRECT; 1020 1021 if (uio->uio_offset + xfersize > dp->di_extsize) 1022 dp->di_extsize = uio->uio_offset + xfersize; 1023 1024 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid; 1025 if (size < xfersize) 1026 xfersize = size; 1027 1028 error = 1029 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 1030 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1031 (LIST_FIRST(&bp->b_dep) == NULL)) { 1032 bp->b_flags |= B_RELBUF; 1033 } 1034 1035 /* 1036 * If IO_SYNC each buffer is written synchronously. Otherwise 1037 * if we have a severe page deficiency write the buffer 1038 * asynchronously. Otherwise try to cluster, and if that 1039 * doesn't do it then either do an async write (if O_DIRECT), 1040 * or a delayed write (if not). 1041 */ 1042 if (ioflag & IO_SYNC) { 1043 (void)bwrite(bp); 1044 } else if (vm_page_count_severe() || 1045 buf_dirty_count_severe() || 1046 xfersize + blkoffset == fs->fs_bsize || 1047 (ioflag & (IO_ASYNC | IO_DIRECT))) 1048 bawrite(bp); 1049 else 1050 bdwrite(bp); 1051 if (error || xfersize == 0) 1052 break; 1053 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1054 } 1055 /* 1056 * If we successfully wrote any data, and we are not the superuser 1057 * we clear the setuid and setgid bits as a precaution against 1058 * tampering. 1059 */ 1060 if (resid > uio->uio_resid && ucred && 1061 suser_cred(ucred, SUSER_ALLOWJAIL)) { 1062 ip->i_mode &= ~(ISUID | ISGID); 1063 dp->di_mode = ip->i_mode; 1064 } 1065 if (error) { 1066 if (ioflag & IO_UNIT) { 1067 (void)ffs_truncate(vp, osize, 1068 IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td); 1069 uio->uio_offset -= resid - uio->uio_resid; 1070 uio->uio_resid = resid; 1071 } 1072 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 1073 error = ffs_update(vp, 1); 1074 return (error); 1075 } 1076 1077 1078 /* 1079 * Vnode operating to retrieve a named extended attribute. 1080 * 1081 * Locate a particular EA (nspace:name) in the area (ptr:length), and return 1082 * the length of the EA, and possibly the pointer to the entry and to the data. 1083 */ 1084 static int 1085 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name, u_char **eap, u_char **eac) 1086 { 1087 u_char *p, *pe, *pn, *p0; 1088 int eapad1, eapad2, ealength, ealen, nlen; 1089 uint32_t ul; 1090 1091 pe = ptr + length; 1092 nlen = strlen(name); 1093 1094 for (p = ptr; p < pe; p = pn) { 1095 p0 = p; 1096 bcopy(p, &ul, sizeof(ul)); 1097 pn = p + ul; 1098 /* make sure this entry is complete */ 1099 if (pn > pe) 1100 break; 1101 p += sizeof(uint32_t); 1102 if (*p != nspace) 1103 continue; 1104 p++; 1105 eapad2 = *p++; 1106 if (*p != nlen) 1107 continue; 1108 p++; 1109 if (bcmp(p, name, nlen)) 1110 continue; 1111 ealength = sizeof(uint32_t) + 3 + nlen; 1112 eapad1 = 8 - (ealength % 8); 1113 if (eapad1 == 8) 1114 eapad1 = 0; 1115 ealength += eapad1; 1116 ealen = ul - ealength - eapad2; 1117 p += nlen + eapad1; 1118 if (eap != NULL) 1119 *eap = p0; 1120 if (eac != NULL) 1121 *eac = p; 1122 return (ealen); 1123 } 1124 return(-1); 1125 } 1126 1127 static int 1128 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra) 1129 { 1130 struct inode *ip; 1131 struct ufs2_dinode *dp; 1132 struct uio luio; 1133 struct iovec liovec; 1134 int easize, error; 1135 u_char *eae; 1136 1137 ip = VTOI(vp); 1138 dp = ip->i_din2; 1139 easize = dp->di_extsize; 1140 1141 eae = malloc(easize + extra, M_TEMP, M_WAITOK); 1142 1143 liovec.iov_base = eae; 1144 liovec.iov_len = easize; 1145 luio.uio_iov = &liovec; 1146 luio.uio_iovcnt = 1; 1147 luio.uio_offset = 0; 1148 luio.uio_resid = easize; 1149 luio.uio_segflg = UIO_SYSSPACE; 1150 luio.uio_rw = UIO_READ; 1151 luio.uio_td = td; 1152 1153 error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC); 1154 if (error) { 1155 free(eae, M_TEMP); 1156 return(error); 1157 } 1158 *p = eae; 1159 return (0); 1160 } 1161 1162 static int 1163 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td) 1164 { 1165 struct inode *ip; 1166 struct ufs2_dinode *dp; 1167 int error; 1168 1169 ip = VTOI(vp); 1170 1171 if (ip->i_ea_area != NULL) 1172 return (EBUSY); 1173 dp = ip->i_din2; 1174 error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0); 1175 if (error) 1176 return (error); 1177 ip->i_ea_len = dp->di_extsize; 1178 ip->i_ea_error = 0; 1179 return (0); 1180 } 1181 1182 /* 1183 * Vnode extattr transaction commit/abort 1184 */ 1185 static int 1186 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td) 1187 { 1188 struct inode *ip; 1189 struct uio luio; 1190 struct iovec liovec; 1191 int error; 1192 struct ufs2_dinode *dp; 1193 1194 ip = VTOI(vp); 1195 if (ip->i_ea_area == NULL) 1196 return (EINVAL); 1197 dp = ip->i_din2; 1198 error = ip->i_ea_error; 1199 if (commit && error == 0) { 1200 if (cred == NOCRED) 1201 cred = vp->v_mount->mnt_cred; 1202 liovec.iov_base = ip->i_ea_area; 1203 liovec.iov_len = ip->i_ea_len; 1204 luio.uio_iov = &liovec; 1205 luio.uio_iovcnt = 1; 1206 luio.uio_offset = 0; 1207 luio.uio_resid = ip->i_ea_len; 1208 luio.uio_segflg = UIO_SYSSPACE; 1209 luio.uio_rw = UIO_WRITE; 1210 luio.uio_td = td; 1211 /* XXX: I'm not happy about truncating to zero size */ 1212 if (ip->i_ea_len < dp->di_extsize) 1213 error = ffs_truncate(vp, 0, IO_EXT, cred, td); 1214 error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred); 1215 } 1216 free(ip->i_ea_area, M_TEMP); 1217 ip->i_ea_area = NULL; 1218 ip->i_ea_len = 0; 1219 ip->i_ea_error = 0; 1220 return (error); 1221 } 1222 1223 /* 1224 * Vnode extattr strategy routine for fifos. 1225 * 1226 * We need to check for a read or write of the external attributes. 1227 * Otherwise we just fall through and do the usual thing. 1228 */ 1229 static int 1230 ffsext_strategy(struct vop_strategy_args *ap) 1231 /* 1232 struct vop_strategy_args { 1233 struct vnodeop_desc *a_desc; 1234 struct vnode *a_vp; 1235 struct buf *a_bp; 1236 }; 1237 */ 1238 { 1239 struct vnode *vp; 1240 daddr_t lbn; 1241 1242 vp = ap->a_vp; 1243 lbn = ap->a_bp->b_lblkno; 1244 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC && 1245 lbn < 0 && lbn >= -NXADDR) 1246 return (VOP_STRATEGY_APV(&ufs_vnodeops, ap)); 1247 if (vp->v_type == VFIFO) 1248 return (VOP_STRATEGY_APV(&ufs_fifoops, ap)); 1249 panic("spec nodes went here"); 1250 } 1251 1252 /* 1253 * Vnode extattr transaction commit/abort 1254 */ 1255 static int 1256 ffs_openextattr(struct vop_openextattr_args *ap) 1257 /* 1258 struct vop_openextattr_args { 1259 struct vnodeop_desc *a_desc; 1260 struct vnode *a_vp; 1261 IN struct ucred *a_cred; 1262 IN struct thread *a_td; 1263 }; 1264 */ 1265 { 1266 struct inode *ip; 1267 struct fs *fs; 1268 1269 ip = VTOI(ap->a_vp); 1270 fs = ip->i_fs; 1271 1272 if (ap->a_vp->v_type == VCHR) 1273 return (EOPNOTSUPP); 1274 1275 return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td)); 1276 } 1277 1278 1279 /* 1280 * Vnode extattr transaction commit/abort 1281 */ 1282 static int 1283 ffs_closeextattr(struct vop_closeextattr_args *ap) 1284 /* 1285 struct vop_closeextattr_args { 1286 struct vnodeop_desc *a_desc; 1287 struct vnode *a_vp; 1288 int a_commit; 1289 IN struct ucred *a_cred; 1290 IN struct thread *a_td; 1291 }; 1292 */ 1293 { 1294 struct inode *ip; 1295 struct fs *fs; 1296 1297 ip = VTOI(ap->a_vp); 1298 fs = ip->i_fs; 1299 1300 if (ap->a_vp->v_type == VCHR) 1301 return (EOPNOTSUPP); 1302 1303 return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td)); 1304 } 1305 1306 /* 1307 * Vnode operation to remove a named attribute. 1308 */ 1309 static int 1310 ffs_deleteextattr(struct vop_deleteextattr_args *ap) 1311 /* 1312 vop_deleteextattr { 1313 IN struct vnode *a_vp; 1314 IN int a_attrnamespace; 1315 IN const char *a_name; 1316 IN struct ucred *a_cred; 1317 IN struct thread *a_td; 1318 }; 1319 */ 1320 { 1321 struct inode *ip; 1322 struct fs *fs; 1323 uint32_t ealength, ul; 1324 int ealen, olen, eapad1, eapad2, error, i, easize; 1325 u_char *eae, *p; 1326 int stand_alone; 1327 1328 ip = VTOI(ap->a_vp); 1329 fs = ip->i_fs; 1330 1331 if (ap->a_vp->v_type == VCHR) 1332 return (EOPNOTSUPP); 1333 1334 if (strlen(ap->a_name) == 0) 1335 return (EINVAL); 1336 1337 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1338 ap->a_cred, ap->a_td, IWRITE); 1339 if (error) { 1340 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1341 ip->i_ea_error = error; 1342 return (error); 1343 } 1344 1345 if (ip->i_ea_area == NULL) { 1346 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1347 if (error) 1348 return (error); 1349 stand_alone = 1; 1350 } else { 1351 stand_alone = 0; 1352 } 1353 1354 ealength = eapad1 = ealen = eapad2 = 0; 1355 1356 eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK); 1357 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1358 easize = ip->i_ea_len; 1359 1360 olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1361 &p, NULL); 1362 if (olen == -1) { 1363 /* delete but nonexistent */ 1364 free(eae, M_TEMP); 1365 if (stand_alone) 1366 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1367 return(ENOATTR); 1368 } 1369 bcopy(p, &ul, sizeof ul); 1370 i = p - eae + ul; 1371 if (ul != ealength) { 1372 bcopy(p + ul, p + ealength, easize - i); 1373 easize += (ealength - ul); 1374 } 1375 if (easize > NXADDR * fs->fs_bsize) { 1376 free(eae, M_TEMP); 1377 if (stand_alone) 1378 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1379 else if (ip->i_ea_error == 0) 1380 ip->i_ea_error = ENOSPC; 1381 return(ENOSPC); 1382 } 1383 p = ip->i_ea_area; 1384 ip->i_ea_area = eae; 1385 ip->i_ea_len = easize; 1386 free(p, M_TEMP); 1387 if (stand_alone) 1388 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1389 return(error); 1390 } 1391 1392 /* 1393 * Vnode operation to retrieve a named extended attribute. 1394 */ 1395 static int 1396 ffs_getextattr(struct vop_getextattr_args *ap) 1397 /* 1398 vop_getextattr { 1399 IN struct vnode *a_vp; 1400 IN int a_attrnamespace; 1401 IN const char *a_name; 1402 INOUT struct uio *a_uio; 1403 OUT size_t *a_size; 1404 IN struct ucred *a_cred; 1405 IN struct thread *a_td; 1406 }; 1407 */ 1408 { 1409 struct inode *ip; 1410 struct fs *fs; 1411 u_char *eae, *p; 1412 unsigned easize; 1413 int error, ealen, stand_alone; 1414 1415 ip = VTOI(ap->a_vp); 1416 fs = ip->i_fs; 1417 1418 if (ap->a_vp->v_type == VCHR) 1419 return (EOPNOTSUPP); 1420 1421 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1422 ap->a_cred, ap->a_td, IREAD); 1423 if (error) 1424 return (error); 1425 1426 if (ip->i_ea_area == NULL) { 1427 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1428 if (error) 1429 return (error); 1430 stand_alone = 1; 1431 } else { 1432 stand_alone = 0; 1433 } 1434 eae = ip->i_ea_area; 1435 easize = ip->i_ea_len; 1436 1437 ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1438 NULL, &p); 1439 if (ealen >= 0) { 1440 error = 0; 1441 if (ap->a_size != NULL) 1442 *ap->a_size = ealen; 1443 else if (ap->a_uio != NULL) 1444 error = uiomove(p, ealen, ap->a_uio); 1445 } else 1446 error = ENOATTR; 1447 if (stand_alone) 1448 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1449 return(error); 1450 } 1451 1452 /* 1453 * Vnode operation to retrieve extended attributes on a vnode. 1454 */ 1455 static int 1456 ffs_listextattr(struct vop_listextattr_args *ap) 1457 /* 1458 vop_listextattr { 1459 IN struct vnode *a_vp; 1460 IN int a_attrnamespace; 1461 INOUT struct uio *a_uio; 1462 OUT size_t *a_size; 1463 IN struct ucred *a_cred; 1464 IN struct thread *a_td; 1465 }; 1466 */ 1467 { 1468 struct inode *ip; 1469 struct fs *fs; 1470 u_char *eae, *p, *pe, *pn; 1471 unsigned easize; 1472 uint32_t ul; 1473 int error, ealen, stand_alone; 1474 1475 ip = VTOI(ap->a_vp); 1476 fs = ip->i_fs; 1477 1478 if (ap->a_vp->v_type == VCHR) 1479 return (EOPNOTSUPP); 1480 1481 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1482 ap->a_cred, ap->a_td, IREAD); 1483 if (error) 1484 return (error); 1485 1486 if (ip->i_ea_area == NULL) { 1487 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1488 if (error) 1489 return (error); 1490 stand_alone = 1; 1491 } else { 1492 stand_alone = 0; 1493 } 1494 eae = ip->i_ea_area; 1495 easize = ip->i_ea_len; 1496 1497 error = 0; 1498 if (ap->a_size != NULL) 1499 *ap->a_size = 0; 1500 pe = eae + easize; 1501 for(p = eae; error == 0 && p < pe; p = pn) { 1502 bcopy(p, &ul, sizeof(ul)); 1503 pn = p + ul; 1504 if (pn > pe) 1505 break; 1506 p += sizeof(ul); 1507 if (*p++ != ap->a_attrnamespace) 1508 continue; 1509 p++; /* pad2 */ 1510 ealen = *p; 1511 if (ap->a_size != NULL) { 1512 *ap->a_size += ealen + 1; 1513 } else if (ap->a_uio != NULL) { 1514 error = uiomove(p, ealen + 1, ap->a_uio); 1515 } 1516 } 1517 if (stand_alone) 1518 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1519 return(error); 1520 } 1521 1522 /* 1523 * Vnode operation to set a named attribute. 1524 */ 1525 static int 1526 ffs_setextattr(struct vop_setextattr_args *ap) 1527 /* 1528 vop_setextattr { 1529 IN struct vnode *a_vp; 1530 IN int a_attrnamespace; 1531 IN const char *a_name; 1532 INOUT struct uio *a_uio; 1533 IN struct ucred *a_cred; 1534 IN struct thread *a_td; 1535 }; 1536 */ 1537 { 1538 struct inode *ip; 1539 struct fs *fs; 1540 uint32_t ealength, ul; 1541 int ealen, olen, eapad1, eapad2, error, i, easize; 1542 u_char *eae, *p; 1543 int stand_alone; 1544 1545 ip = VTOI(ap->a_vp); 1546 fs = ip->i_fs; 1547 1548 if (ap->a_vp->v_type == VCHR) 1549 return (EOPNOTSUPP); 1550 1551 if (strlen(ap->a_name) == 0) 1552 return (EINVAL); 1553 1554 /* XXX Now unsupported API to delete EAs using NULL uio. */ 1555 if (ap->a_uio == NULL) 1556 return (EOPNOTSUPP); 1557 1558 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1559 ap->a_cred, ap->a_td, IWRITE); 1560 if (error) { 1561 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1562 ip->i_ea_error = error; 1563 return (error); 1564 } 1565 1566 if (ip->i_ea_area == NULL) { 1567 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1568 if (error) 1569 return (error); 1570 stand_alone = 1; 1571 } else { 1572 stand_alone = 0; 1573 } 1574 1575 ealen = ap->a_uio->uio_resid; 1576 ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name); 1577 eapad1 = 8 - (ealength % 8); 1578 if (eapad1 == 8) 1579 eapad1 = 0; 1580 eapad2 = 8 - (ealen % 8); 1581 if (eapad2 == 8) 1582 eapad2 = 0; 1583 ealength += eapad1 + ealen + eapad2; 1584 1585 eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK); 1586 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1587 easize = ip->i_ea_len; 1588 1589 olen = ffs_findextattr(eae, easize, 1590 ap->a_attrnamespace, ap->a_name, &p, NULL); 1591 if (olen == -1) { 1592 /* new, append at end */ 1593 p = eae + easize; 1594 easize += ealength; 1595 } else { 1596 bcopy(p, &ul, sizeof ul); 1597 i = p - eae + ul; 1598 if (ul != ealength) { 1599 bcopy(p + ul, p + ealength, easize - i); 1600 easize += (ealength - ul); 1601 } 1602 } 1603 if (easize > NXADDR * fs->fs_bsize) { 1604 free(eae, M_TEMP); 1605 if (stand_alone) 1606 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1607 else if (ip->i_ea_error == 0) 1608 ip->i_ea_error = ENOSPC; 1609 return(ENOSPC); 1610 } 1611 bcopy(&ealength, p, sizeof(ealength)); 1612 p += sizeof(ealength); 1613 *p++ = ap->a_attrnamespace; 1614 *p++ = eapad2; 1615 *p++ = strlen(ap->a_name); 1616 strcpy(p, ap->a_name); 1617 p += strlen(ap->a_name); 1618 bzero(p, eapad1); 1619 p += eapad1; 1620 error = uiomove(p, ealen, ap->a_uio); 1621 if (error) { 1622 free(eae, M_TEMP); 1623 if (stand_alone) 1624 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1625 else if (ip->i_ea_error == 0) 1626 ip->i_ea_error = error; 1627 return(error); 1628 } 1629 p += ealen; 1630 bzero(p, eapad2); 1631 1632 p = ip->i_ea_area; 1633 ip->i_ea_area = eae; 1634 ip->i_ea_len = easize; 1635 free(p, M_TEMP); 1636 if (stand_alone) 1637 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1638 return(error); 1639 } 1640