1 /* 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Copyright (c) 1982, 1986, 1989, 1993 12 * The Regents of the University of California. All rights reserved. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed by the University of 25 * California, Berkeley and its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 43 * $FreeBSD$ 44 */ 45 46 #include <sys/param.h> 47 #include <sys/bio.h> 48 #include <sys/systm.h> 49 #include <sys/buf.h> 50 #include <sys/conf.h> 51 #include <sys/extattr.h> 52 #include <sys/kernel.h> 53 #include <sys/malloc.h> 54 #include <sys/mount.h> 55 #include <sys/proc.h> 56 #include <sys/resourcevar.h> 57 #include <sys/signalvar.h> 58 #include <sys/stat.h> 59 #include <sys/vmmeter.h> 60 #include <sys/vnode.h> 61 62 #include <machine/limits.h> 63 64 #include <vm/vm.h> 65 #include <vm/vm_extern.h> 66 #include <vm/vm_object.h> 67 #include <vm/vm_page.h> 68 #include <vm/vm_pager.h> 69 #include <vm/vnode_pager.h> 70 71 #include <ufs/ufs/extattr.h> 72 #include <ufs/ufs/quota.h> 73 #include <ufs/ufs/inode.h> 74 #include <ufs/ufs/ufs_extern.h> 75 #include <ufs/ufs/ufsmount.h> 76 77 #include <ufs/ffs/fs.h> 78 #include <ufs/ffs/ffs_extern.h> 79 80 static int ffs_fsync(struct vop_fsync_args *); 81 static int ffs_getpages(struct vop_getpages_args *); 82 static int ffs_read(struct vop_read_args *); 83 static int ffs_write(struct vop_write_args *); 84 static int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag); 85 static int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, 86 struct ucred *cred); 87 static int ffsext_strategy(struct vop_strategy_args *); 88 static int ffs_closeextattr(struct vop_closeextattr_args *); 89 static int ffs_getextattr(struct vop_getextattr_args *); 90 static int ffs_openextattr(struct vop_openextattr_args *); 91 static int ffs_setextattr(struct vop_setextattr_args *); 92 93 94 /* Global vfs data structures for ufs. */ 95 vop_t **ffs_vnodeop_p; 96 static struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { 97 { &vop_default_desc, (vop_t *) ufs_vnoperate }, 98 { &vop_fsync_desc, (vop_t *) ffs_fsync }, 99 { &vop_getpages_desc, (vop_t *) ffs_getpages }, 100 { &vop_read_desc, (vop_t *) ffs_read }, 101 { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks }, 102 { &vop_write_desc, (vop_t *) ffs_write }, 103 { &vop_closeextattr_desc, (vop_t *) ffs_closeextattr }, 104 { &vop_getextattr_desc, (vop_t *) ffs_getextattr }, 105 { &vop_openextattr_desc, (vop_t *) ffs_openextattr }, 106 { &vop_setextattr_desc, (vop_t *) ffs_setextattr }, 107 { NULL, NULL } 108 }; 109 static struct vnodeopv_desc ffs_vnodeop_opv_desc = 110 { &ffs_vnodeop_p, ffs_vnodeop_entries }; 111 112 vop_t **ffs_specop_p; 113 static struct vnodeopv_entry_desc ffs_specop_entries[] = { 114 { &vop_default_desc, (vop_t *) ufs_vnoperatespec }, 115 { &vop_fsync_desc, (vop_t *) ffs_fsync }, 116 { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks }, 117 { &vop_strategy_desc, (vop_t *) ffsext_strategy }, 118 { &vop_closeextattr_desc, (vop_t *) ffs_closeextattr }, 119 { &vop_getextattr_desc, (vop_t *) ffs_getextattr }, 120 { &vop_openextattr_desc, (vop_t *) ffs_openextattr }, 121 { &vop_setextattr_desc, (vop_t *) ffs_setextattr }, 122 { NULL, NULL } 123 }; 124 static struct vnodeopv_desc ffs_specop_opv_desc = 125 { &ffs_specop_p, ffs_specop_entries }; 126 127 vop_t **ffs_fifoop_p; 128 static struct vnodeopv_entry_desc ffs_fifoop_entries[] = { 129 { &vop_default_desc, (vop_t *) ufs_vnoperatefifo }, 130 { &vop_fsync_desc, (vop_t *) ffs_fsync }, 131 { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks }, 132 { &vop_strategy_desc, (vop_t *) ffsext_strategy }, 133 { &vop_closeextattr_desc, (vop_t *) ffs_closeextattr }, 134 { &vop_getextattr_desc, (vop_t *) ffs_getextattr }, 135 { &vop_openextattr_desc, (vop_t *) ffs_openextattr }, 136 { &vop_setextattr_desc, (vop_t *) ffs_setextattr }, 137 { NULL, NULL } 138 }; 139 static struct vnodeopv_desc ffs_fifoop_opv_desc = 140 { &ffs_fifoop_p, ffs_fifoop_entries }; 141 142 VNODEOP_SET(ffs_vnodeop_opv_desc); 143 VNODEOP_SET(ffs_specop_opv_desc); 144 VNODEOP_SET(ffs_fifoop_opv_desc); 145 146 /* 147 * Synch an open file. 148 */ 149 /* ARGSUSED */ 150 static int 151 ffs_fsync(ap) 152 struct vop_fsync_args /* { 153 struct vnode *a_vp; 154 struct ucred *a_cred; 155 int a_waitfor; 156 struct thread *a_td; 157 } */ *ap; 158 { 159 struct vnode *vp = ap->a_vp; 160 struct inode *ip = VTOI(vp); 161 struct buf *bp; 162 struct buf *nbp; 163 int s, error, wait, passes, skipmeta; 164 ufs_lbn_t lbn; 165 166 wait = (ap->a_waitfor == MNT_WAIT); 167 if (vn_isdisk(vp, NULL)) { 168 lbn = INT_MAX; 169 if (vp->v_rdev->si_mountpoint != NULL && 170 (vp->v_rdev->si_mountpoint->mnt_flag & MNT_SOFTDEP)) 171 softdep_fsync_mountdev(vp); 172 } else { 173 lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); 174 } 175 176 /* 177 * Flush all dirty buffers associated with a vnode. 178 */ 179 passes = NIADDR + 1; 180 skipmeta = 0; 181 if (wait) 182 skipmeta = 1; 183 s = splbio(); 184 VI_LOCK(vp); 185 loop: 186 TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) 187 bp->b_vflags &= ~BV_SCANNED; 188 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 189 nbp = TAILQ_NEXT(bp, b_vnbufs); 190 /* 191 * Reasons to skip this buffer: it has already been considered 192 * on this pass, this pass is the first time through on a 193 * synchronous flush request and the buffer being considered 194 * is metadata, the buffer has dependencies that will cause 195 * it to be redirtied and it has not already been deferred, 196 * or it is already being written. 197 */ 198 if ((bp->b_vflags & BV_SCANNED) != 0) 199 continue; 200 bp->b_vflags |= BV_SCANNED; 201 if ((skipmeta == 1 && bp->b_lblkno < 0)) 202 continue; 203 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) 204 continue; 205 if (!wait && LIST_FIRST(&bp->b_dep) != NULL && 206 (bp->b_flags & B_DEFERRED) == 0 && 207 buf_countdeps(bp, 0)) { 208 bp->b_flags |= B_DEFERRED; 209 BUF_UNLOCK(bp); 210 continue; 211 } 212 VI_UNLOCK(vp); 213 if ((bp->b_flags & B_DELWRI) == 0) 214 panic("ffs_fsync: not dirty"); 215 if (vp != bp->b_vp) 216 panic("ffs_fsync: vp != vp->b_vp"); 217 /* 218 * If this is a synchronous flush request, or it is not a 219 * file or device, start the write on this buffer immediatly. 220 */ 221 if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) { 222 223 /* 224 * On our final pass through, do all I/O synchronously 225 * so that we can find out if our flush is failing 226 * because of write errors. 227 */ 228 if (passes > 0 || !wait) { 229 if ((bp->b_flags & B_CLUSTEROK) && !wait) { 230 BUF_UNLOCK(bp); 231 (void) vfs_bio_awrite(bp); 232 } else { 233 bremfree(bp); 234 splx(s); 235 (void) bawrite(bp); 236 s = splbio(); 237 } 238 } else { 239 bremfree(bp); 240 splx(s); 241 if ((error = bwrite(bp)) != 0) 242 return (error); 243 s = splbio(); 244 } 245 } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) { 246 /* 247 * If the buffer is for data that has been truncated 248 * off the file, then throw it away. 249 */ 250 bremfree(bp); 251 bp->b_flags |= B_INVAL | B_NOCACHE; 252 splx(s); 253 brelse(bp); 254 s = splbio(); 255 } else { 256 BUF_UNLOCK(bp); 257 vfs_bio_awrite(bp); 258 } 259 /* 260 * Since we may have slept during the I/O, we need 261 * to start from a known point. 262 */ 263 VI_LOCK(vp); 264 nbp = TAILQ_FIRST(&vp->v_dirtyblkhd); 265 } 266 /* 267 * If we were asked to do this synchronously, then go back for 268 * another pass, this time doing the metadata. 269 */ 270 if (skipmeta) { 271 skipmeta = 0; 272 goto loop; 273 } 274 275 if (wait) { 276 while (vp->v_numoutput) { 277 vp->v_iflag |= VI_BWAIT; 278 msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp), 279 PRIBIO + 4, "ffsfsn", 0); 280 } 281 VI_UNLOCK(vp); 282 283 /* 284 * Ensure that any filesystem metatdata associated 285 * with the vnode has been written. 286 */ 287 splx(s); 288 if ((error = softdep_sync_metadata(ap)) != 0) 289 return (error); 290 s = splbio(); 291 292 VI_LOCK(vp); 293 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 294 /* 295 * Block devices associated with filesystems may 296 * have new I/O requests posted for them even if 297 * the vnode is locked, so no amount of trying will 298 * get them clean. Thus we give block devices a 299 * good effort, then just give up. For all other file 300 * types, go around and try again until it is clean. 301 */ 302 if (passes > 0) { 303 passes -= 1; 304 goto loop; 305 } 306 #ifdef DIAGNOSTIC 307 if (!vn_isdisk(vp, NULL)) 308 vprint("ffs_fsync: dirty", vp); 309 #endif 310 } 311 } 312 VI_UNLOCK(vp); 313 splx(s); 314 return (UFS_UPDATE(vp, wait)); 315 } 316 317 318 /* 319 * Vnode op for reading. 320 */ 321 /* ARGSUSED */ 322 static int 323 ffs_read(ap) 324 struct vop_read_args /* { 325 struct vnode *a_vp; 326 struct uio *a_uio; 327 int a_ioflag; 328 struct ucred *a_cred; 329 } */ *ap; 330 { 331 struct vnode *vp; 332 struct inode *ip; 333 struct uio *uio; 334 struct fs *fs; 335 struct buf *bp; 336 ufs_lbn_t lbn, nextlbn; 337 off_t bytesinfile; 338 long size, xfersize, blkoffset; 339 int error, orig_resid; 340 mode_t mode; 341 int seqcount; 342 int ioflag; 343 vm_object_t object; 344 345 vp = ap->a_vp; 346 uio = ap->a_uio; 347 ioflag = ap->a_ioflag; 348 if (ap->a_ioflag & IO_EXT) 349 #ifdef notyet 350 return (ffs_extread(vp, uio, ioflag)); 351 #else 352 panic("ffs_read+IO_EXT"); 353 #endif 354 355 GIANT_REQUIRED; 356 357 seqcount = ap->a_ioflag >> 16; 358 ip = VTOI(vp); 359 mode = ip->i_mode; 360 361 #ifdef DIAGNOSTIC 362 if (uio->uio_rw != UIO_READ) 363 panic("ffs_read: mode"); 364 365 if (vp->v_type == VLNK) { 366 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 367 panic("ffs_read: short symlink"); 368 } else if (vp->v_type != VREG && vp->v_type != VDIR) 369 panic("ffs_read: type %d", vp->v_type); 370 #endif 371 fs = ip->i_fs; 372 if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize) 373 return (EFBIG); 374 375 orig_resid = uio->uio_resid; 376 if (orig_resid <= 0) 377 return (0); 378 379 object = vp->v_object; 380 381 bytesinfile = ip->i_size - uio->uio_offset; 382 if (bytesinfile <= 0) { 383 if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 384 ip->i_flag |= IN_ACCESS; 385 return 0; 386 } 387 388 if (object) { 389 vm_object_reference(object); 390 } 391 392 /* 393 * Ok so we couldn't do it all in one vm trick... 394 * so cycle around trying smaller bites.. 395 */ 396 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 397 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 398 break; 399 400 lbn = lblkno(fs, uio->uio_offset); 401 nextlbn = lbn + 1; 402 403 /* 404 * size of buffer. The buffer representing the 405 * end of the file is rounded up to the size of 406 * the block type ( fragment or full block, 407 * depending ). 408 */ 409 size = blksize(fs, ip, lbn); 410 blkoffset = blkoff(fs, uio->uio_offset); 411 412 /* 413 * The amount we want to transfer in this iteration is 414 * one FS block less the amount of the data before 415 * our startpoint (duh!) 416 */ 417 xfersize = fs->fs_bsize - blkoffset; 418 419 /* 420 * But if we actually want less than the block, 421 * or the file doesn't have a whole block more of data, 422 * then use the lesser number. 423 */ 424 if (uio->uio_resid < xfersize) 425 xfersize = uio->uio_resid; 426 if (bytesinfile < xfersize) 427 xfersize = bytesinfile; 428 429 if (lblktosize(fs, nextlbn) >= ip->i_size) { 430 /* 431 * Don't do readahead if this is the end of the file. 432 */ 433 error = bread(vp, lbn, size, NOCRED, &bp); 434 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 435 /* 436 * Otherwise if we are allowed to cluster, 437 * grab as much as we can. 438 * 439 * XXX This may not be a win if we are not 440 * doing sequential access. 441 */ 442 error = cluster_read(vp, ip->i_size, lbn, 443 size, NOCRED, uio->uio_resid, seqcount, &bp); 444 } else if (seqcount > 1) { 445 /* 446 * If we are NOT allowed to cluster, then 447 * if we appear to be acting sequentially, 448 * fire off a request for a readahead 449 * as well as a read. Note that the 4th and 5th 450 * arguments point to arrays of the size specified in 451 * the 6th argument. 452 */ 453 int nextsize = blksize(fs, ip, nextlbn); 454 error = breadn(vp, lbn, 455 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 456 } else { 457 /* 458 * Failing all of the above, just read what the 459 * user asked for. Interestingly, the same as 460 * the first option above. 461 */ 462 error = bread(vp, lbn, size, NOCRED, &bp); 463 } 464 if (error) { 465 brelse(bp); 466 bp = NULL; 467 break; 468 } 469 470 /* 471 * If IO_DIRECT then set B_DIRECT for the buffer. This 472 * will cause us to attempt to release the buffer later on 473 * and will cause the buffer cache to attempt to free the 474 * underlying pages. 475 */ 476 if (ioflag & IO_DIRECT) 477 bp->b_flags |= B_DIRECT; 478 479 /* 480 * We should only get non-zero b_resid when an I/O error 481 * has occurred, which should cause us to break above. 482 * However, if the short read did not cause an error, 483 * then we want to ensure that we do not uiomove bad 484 * or uninitialized data. 485 */ 486 size -= bp->b_resid; 487 if (size < xfersize) { 488 if (size == 0) 489 break; 490 xfersize = size; 491 } 492 493 { 494 /* 495 * otherwise use the general form 496 */ 497 error = 498 uiomove((char *)bp->b_data + blkoffset, 499 (int)xfersize, uio); 500 } 501 502 if (error) 503 break; 504 505 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 506 (LIST_FIRST(&bp->b_dep) == NULL)) { 507 /* 508 * If there are no dependencies, and it's VMIO, 509 * then we don't need the buf, mark it available 510 * for freeing. The VM has the data. 511 */ 512 bp->b_flags |= B_RELBUF; 513 brelse(bp); 514 } else { 515 /* 516 * Otherwise let whoever 517 * made the request take care of 518 * freeing it. We just queue 519 * it onto another list. 520 */ 521 bqrelse(bp); 522 } 523 } 524 525 /* 526 * This can only happen in the case of an error 527 * because the loop above resets bp to NULL on each iteration 528 * and on normal completion has not set a new value into it. 529 * so it must have come from a 'break' statement 530 */ 531 if (bp != NULL) { 532 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 533 (LIST_FIRST(&bp->b_dep) == NULL)) { 534 bp->b_flags |= B_RELBUF; 535 brelse(bp); 536 } else { 537 bqrelse(bp); 538 } 539 } 540 541 if (object) { 542 vm_object_vndeallocate(object); 543 } 544 if ((error == 0 || uio->uio_resid != orig_resid) && 545 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 546 ip->i_flag |= IN_ACCESS; 547 return (error); 548 } 549 550 /* 551 * Vnode op for writing. 552 */ 553 static int 554 ffs_write(ap) 555 struct vop_write_args /* { 556 struct vnode *a_vp; 557 struct uio *a_uio; 558 int a_ioflag; 559 struct ucred *a_cred; 560 } */ *ap; 561 { 562 struct vnode *vp; 563 struct uio *uio; 564 struct inode *ip; 565 struct fs *fs; 566 struct buf *bp; 567 struct thread *td; 568 ufs_lbn_t lbn; 569 off_t osize; 570 int seqcount; 571 int blkoffset, error, extended, flags, ioflag, resid, size, xfersize; 572 vm_object_t object; 573 574 vp = ap->a_vp; 575 uio = ap->a_uio; 576 ioflag = ap->a_ioflag; 577 if (ap->a_ioflag & IO_EXT) 578 #ifdef notyet 579 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred)); 580 #else 581 panic("ffs_read+IO_EXT"); 582 #endif 583 584 GIANT_REQUIRED; 585 586 extended = 0; 587 seqcount = ap->a_ioflag >> 16; 588 ip = VTOI(vp); 589 590 object = vp->v_object; 591 if (object) { 592 vm_object_reference(object); 593 } 594 595 #ifdef DIAGNOSTIC 596 if (uio->uio_rw != UIO_WRITE) 597 panic("ffswrite: mode"); 598 #endif 599 600 switch (vp->v_type) { 601 case VREG: 602 if (ioflag & IO_APPEND) 603 uio->uio_offset = ip->i_size; 604 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) { 605 if (object) { 606 vm_object_vndeallocate(object); 607 } 608 return (EPERM); 609 } 610 /* FALLTHROUGH */ 611 case VLNK: 612 break; 613 case VDIR: 614 panic("ffswrite: dir write"); 615 break; 616 default: 617 panic("ffswrite: type %p %d (%d,%d)", vp, (int)vp->v_type, 618 (int)uio->uio_offset, 619 (int)uio->uio_resid 620 ); 621 } 622 623 fs = ip->i_fs; 624 if (uio->uio_offset < 0 || 625 (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) { 626 if (object) { 627 vm_object_vndeallocate(object); 628 } 629 return (EFBIG); 630 } 631 /* 632 * Maybe this should be above the vnode op call, but so long as 633 * file servers have no limits, I don't think it matters. 634 */ 635 td = uio->uio_td; 636 if (vp->v_type == VREG && td && 637 uio->uio_offset + uio->uio_resid > 638 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 639 PROC_LOCK(td->td_proc); 640 psignal(td->td_proc, SIGXFSZ); 641 PROC_UNLOCK(td->td_proc); 642 if (object) { 643 vm_object_vndeallocate(object); 644 } 645 return (EFBIG); 646 } 647 648 resid = uio->uio_resid; 649 osize = ip->i_size; 650 if (seqcount > BA_SEQMAX) 651 flags = BA_SEQMAX << BA_SEQSHIFT; 652 else 653 flags = seqcount << BA_SEQSHIFT; 654 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 655 flags |= IO_SYNC; 656 657 for (error = 0; uio->uio_resid > 0;) { 658 lbn = lblkno(fs, uio->uio_offset); 659 blkoffset = blkoff(fs, uio->uio_offset); 660 xfersize = fs->fs_bsize - blkoffset; 661 if (uio->uio_resid < xfersize) 662 xfersize = uio->uio_resid; 663 664 if (uio->uio_offset + xfersize > ip->i_size) 665 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 666 667 /* 668 * We must perform a read-before-write if the transfer size 669 * does not cover the entire buffer. 670 */ 671 if (fs->fs_bsize > xfersize) 672 flags |= BA_CLRBUF; 673 else 674 flags &= ~BA_CLRBUF; 675 /* XXX is uio->uio_offset the right thing here? */ 676 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 677 ap->a_cred, flags, &bp); 678 if (error != 0) 679 break; 680 /* 681 * If the buffer is not valid we have to clear out any 682 * garbage data from the pages instantiated for the buffer. 683 * If we do not, a failed uiomove() during a write can leave 684 * the prior contents of the pages exposed to a userland 685 * mmap(). XXX deal with uiomove() errors a better way. 686 */ 687 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 688 vfs_bio_clrbuf(bp); 689 if (ioflag & IO_DIRECT) 690 bp->b_flags |= B_DIRECT; 691 if (ioflag & IO_NOWDRAIN) 692 bp->b_flags |= B_NOWDRAIN; 693 694 if (uio->uio_offset + xfersize > ip->i_size) { 695 ip->i_size = uio->uio_offset + xfersize; 696 DIP(ip, i_size) = ip->i_size; 697 extended = 1; 698 } 699 700 size = blksize(fs, ip, lbn) - bp->b_resid; 701 if (size < xfersize) 702 xfersize = size; 703 704 error = 705 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 706 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 707 (LIST_FIRST(&bp->b_dep) == NULL)) { 708 bp->b_flags |= B_RELBUF; 709 } 710 711 /* 712 * If IO_SYNC each buffer is written synchronously. Otherwise 713 * if we have a severe page deficiency write the buffer 714 * asynchronously. Otherwise try to cluster, and if that 715 * doesn't do it then either do an async write (if O_DIRECT), 716 * or a delayed write (if not). 717 */ 718 if (ioflag & IO_SYNC) { 719 (void)bwrite(bp); 720 } else if (vm_page_count_severe() || 721 buf_dirty_count_severe() || 722 (ioflag & IO_ASYNC)) { 723 bp->b_flags |= B_CLUSTEROK; 724 bawrite(bp); 725 } else if (xfersize + blkoffset == fs->fs_bsize) { 726 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 727 bp->b_flags |= B_CLUSTEROK; 728 cluster_write(bp, ip->i_size, seqcount); 729 } else { 730 bawrite(bp); 731 } 732 } else if (ioflag & IO_DIRECT) { 733 bp->b_flags |= B_CLUSTEROK; 734 bawrite(bp); 735 } else { 736 bp->b_flags |= B_CLUSTEROK; 737 bdwrite(bp); 738 } 739 if (error || xfersize == 0) 740 break; 741 ip->i_flag |= IN_CHANGE | IN_UPDATE; 742 } 743 /* 744 * If we successfully wrote any data, and we are not the superuser 745 * we clear the setuid and setgid bits as a precaution against 746 * tampering. 747 */ 748 if (resid > uio->uio_resid && ap->a_cred && 749 suser_cred(ap->a_cred, PRISON_ROOT)) { 750 ip->i_mode &= ~(ISUID | ISGID); 751 DIP(ip, i_mode) = ip->i_mode; 752 } 753 if (resid > uio->uio_resid) 754 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); 755 if (error) { 756 if (ioflag & IO_UNIT) { 757 (void)UFS_TRUNCATE(vp, osize, 758 IO_NORMAL | (ioflag & IO_SYNC), 759 ap->a_cred, uio->uio_td); 760 uio->uio_offset -= resid - uio->uio_resid; 761 uio->uio_resid = resid; 762 } 763 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 764 error = UFS_UPDATE(vp, 1); 765 766 if (object) { 767 vm_object_vndeallocate(object); 768 } 769 770 return (error); 771 } 772 773 /* 774 * get page routine 775 */ 776 static int 777 ffs_getpages(ap) 778 struct vop_getpages_args *ap; 779 { 780 off_t foff, physoffset; 781 int i, size, bsize; 782 struct vnode *dp, *vp; 783 vm_object_t obj; 784 vm_pindex_t pindex, firstindex; 785 vm_page_t mreq; 786 int bbackwards, bforwards; 787 int pbackwards, pforwards; 788 int firstpage; 789 ufs2_daddr_t reqblkno, reqlblkno; 790 int poff; 791 int pcount; 792 int rtval; 793 int pagesperblock; 794 795 GIANT_REQUIRED; 796 797 pcount = round_page(ap->a_count) / PAGE_SIZE; 798 mreq = ap->a_m[ap->a_reqpage]; 799 firstindex = ap->a_m[0]->pindex; 800 801 /* 802 * if ANY DEV_BSIZE blocks are valid on a large filesystem block, 803 * then the entire page is valid. Since the page may be mapped, 804 * user programs might reference data beyond the actual end of file 805 * occuring within the page. We have to zero that data. 806 */ 807 if (mreq->valid) { 808 if (mreq->valid != VM_PAGE_BITS_ALL) 809 vm_page_zero_invalid(mreq, TRUE); 810 vm_page_lock_queues(); 811 for (i = 0; i < pcount; i++) { 812 if (i != ap->a_reqpage) { 813 vm_page_free(ap->a_m[i]); 814 } 815 } 816 vm_page_unlock_queues(); 817 return VM_PAGER_OK; 818 } 819 820 vp = ap->a_vp; 821 obj = vp->v_object; 822 bsize = vp->v_mount->mnt_stat.f_iosize; 823 pindex = mreq->pindex; 824 foff = IDX_TO_OFF(pindex) /* + ap->a_offset should be zero */; 825 826 if (bsize < PAGE_SIZE) 827 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 828 ap->a_count, 829 ap->a_reqpage); 830 831 /* 832 * foff is the file offset of the required page 833 * reqlblkno is the logical block that contains the page 834 * poff is the index of the page into the logical block 835 */ 836 reqlblkno = foff / bsize; 837 poff = (foff % bsize) / PAGE_SIZE; 838 839 dp = VTOI(vp)->i_devvp; 840 if (ufs_bmaparray(vp, reqlblkno, &reqblkno, 0, &bforwards, &bbackwards) 841 || (reqblkno == -1)) { 842 vm_page_lock_queues(); 843 for(i = 0; i < pcount; i++) { 844 if (i != ap->a_reqpage) 845 vm_page_free(ap->a_m[i]); 846 } 847 vm_page_unlock_queues(); 848 if (reqblkno == -1) { 849 if ((mreq->flags & PG_ZERO) == 0) 850 pmap_zero_page(mreq); 851 vm_page_undirty(mreq); 852 mreq->valid = VM_PAGE_BITS_ALL; 853 return VM_PAGER_OK; 854 } else { 855 return VM_PAGER_ERROR; 856 } 857 } 858 859 physoffset = (off_t)reqblkno * DEV_BSIZE + poff * PAGE_SIZE; 860 pagesperblock = bsize / PAGE_SIZE; 861 /* 862 * find the first page that is contiguous... 863 * note that pbackwards is the number of pages that are contiguous 864 * backwards. 865 */ 866 firstpage = 0; 867 if (ap->a_count) { 868 pbackwards = poff + bbackwards * pagesperblock; 869 if (ap->a_reqpage > pbackwards) { 870 firstpage = ap->a_reqpage - pbackwards; 871 vm_page_lock_queues(); 872 for(i=0;i<firstpage;i++) 873 vm_page_free(ap->a_m[i]); 874 vm_page_unlock_queues(); 875 } 876 877 /* 878 * pforwards is the number of pages that are contiguous 879 * after the current page. 880 */ 881 pforwards = (pagesperblock - (poff + 1)) + 882 bforwards * pagesperblock; 883 if (pforwards < (pcount - (ap->a_reqpage + 1))) { 884 vm_page_lock_queues(); 885 for( i = ap->a_reqpage + pforwards + 1; i < pcount; i++) 886 vm_page_free(ap->a_m[i]); 887 vm_page_unlock_queues(); 888 pcount = ap->a_reqpage + pforwards + 1; 889 } 890 891 /* 892 * number of pages for I/O corrected for the non-contig pages at 893 * the beginning of the array. 894 */ 895 pcount -= firstpage; 896 } 897 898 /* 899 * calculate the size of the transfer 900 */ 901 902 size = pcount * PAGE_SIZE; 903 904 if ((IDX_TO_OFF(ap->a_m[firstpage]->pindex) + size) > 905 obj->un_pager.vnp.vnp_size) 906 size = obj->un_pager.vnp.vnp_size - 907 IDX_TO_OFF(ap->a_m[firstpage]->pindex); 908 909 physoffset -= foff; 910 rtval = VOP_GETPAGES(dp, &ap->a_m[firstpage], size, 911 (ap->a_reqpage - firstpage), physoffset); 912 913 return (rtval); 914 } 915 916 /* 917 * Extended attribute area reading. 918 */ 919 static int 920 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag) 921 { 922 struct inode *ip; 923 struct ufs2_dinode *dp; 924 struct fs *fs; 925 struct buf *bp; 926 ufs_lbn_t lbn, nextlbn; 927 off_t bytesinfile; 928 long size, xfersize, blkoffset; 929 int error, orig_resid; 930 mode_t mode; 931 932 GIANT_REQUIRED; 933 934 ip = VTOI(vp); 935 fs = ip->i_fs; 936 dp = ip->i_din2; 937 mode = ip->i_mode; 938 939 #ifdef DIAGNOSTIC 940 if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC) 941 panic("ffs_extread: mode"); 942 943 #endif 944 orig_resid = uio->uio_resid; 945 if (orig_resid <= 0) 946 return (0); 947 948 bytesinfile = dp->di_extsize - uio->uio_offset; 949 if (bytesinfile <= 0) { 950 if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 951 ip->i_flag |= IN_ACCESS; 952 return 0; 953 } 954 955 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 956 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0) 957 break; 958 959 lbn = lblkno(fs, uio->uio_offset); 960 nextlbn = lbn + 1; 961 962 /* 963 * size of buffer. The buffer representing the 964 * end of the file is rounded up to the size of 965 * the block type ( fragment or full block, 966 * depending ). 967 */ 968 size = sblksize(fs, dp->di_extsize, lbn); 969 blkoffset = blkoff(fs, uio->uio_offset); 970 971 /* 972 * The amount we want to transfer in this iteration is 973 * one FS block less the amount of the data before 974 * our startpoint (duh!) 975 */ 976 xfersize = fs->fs_bsize - blkoffset; 977 978 /* 979 * But if we actually want less than the block, 980 * or the file doesn't have a whole block more of data, 981 * then use the lesser number. 982 */ 983 if (uio->uio_resid < xfersize) 984 xfersize = uio->uio_resid; 985 if (bytesinfile < xfersize) 986 xfersize = bytesinfile; 987 988 if (lblktosize(fs, nextlbn) >= dp->di_extsize) { 989 /* 990 * Don't do readahead if this is the end of the info. 991 */ 992 error = bread(vp, -1 - lbn, size, NOCRED, &bp); 993 } else { 994 /* 995 * If we have a second block, then 996 * fire off a request for a readahead 997 * as well as a read. Note that the 4th and 5th 998 * arguments point to arrays of the size specified in 999 * the 6th argument. 1000 */ 1001 int nextsize = sblksize(fs, dp->di_extsize, nextlbn); 1002 1003 nextlbn = -1 - nextlbn; 1004 error = breadn(vp, -1 - lbn, 1005 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 1006 } 1007 if (error) { 1008 brelse(bp); 1009 bp = NULL; 1010 break; 1011 } 1012 1013 /* 1014 * If IO_DIRECT then set B_DIRECT for the buffer. This 1015 * will cause us to attempt to release the buffer later on 1016 * and will cause the buffer cache to attempt to free the 1017 * underlying pages. 1018 */ 1019 if (ioflag & IO_DIRECT) 1020 bp->b_flags |= B_DIRECT; 1021 1022 /* 1023 * We should only get non-zero b_resid when an I/O error 1024 * has occurred, which should cause us to break above. 1025 * However, if the short read did not cause an error, 1026 * then we want to ensure that we do not uiomove bad 1027 * or uninitialized data. 1028 */ 1029 size -= bp->b_resid; 1030 if (size < xfersize) { 1031 if (size == 0) 1032 break; 1033 xfersize = size; 1034 } 1035 1036 error = uiomove((char *)bp->b_data + blkoffset, 1037 (int)xfersize, uio); 1038 if (error) 1039 break; 1040 1041 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1042 (LIST_FIRST(&bp->b_dep) == NULL)) { 1043 /* 1044 * If there are no dependencies, and it's VMIO, 1045 * then we don't need the buf, mark it available 1046 * for freeing. The VM has the data. 1047 */ 1048 bp->b_flags |= B_RELBUF; 1049 brelse(bp); 1050 } else { 1051 /* 1052 * Otherwise let whoever 1053 * made the request take care of 1054 * freeing it. We just queue 1055 * it onto another list. 1056 */ 1057 bqrelse(bp); 1058 } 1059 } 1060 1061 /* 1062 * This can only happen in the case of an error 1063 * because the loop above resets bp to NULL on each iteration 1064 * and on normal completion has not set a new value into it. 1065 * so it must have come from a 'break' statement 1066 */ 1067 if (bp != NULL) { 1068 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1069 (LIST_FIRST(&bp->b_dep) == NULL)) { 1070 bp->b_flags |= B_RELBUF; 1071 brelse(bp); 1072 } else { 1073 bqrelse(bp); 1074 } 1075 } 1076 1077 if ((error == 0 || uio->uio_resid != orig_resid) && 1078 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 1079 ip->i_flag |= IN_ACCESS; 1080 return (error); 1081 } 1082 1083 /* 1084 * Extended attribute area writing. 1085 */ 1086 static int 1087 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred) 1088 { 1089 struct inode *ip; 1090 struct ufs2_dinode *dp; 1091 struct fs *fs; 1092 struct buf *bp; 1093 ufs_lbn_t lbn; 1094 off_t osize; 1095 int blkoffset, error, flags, resid, size, xfersize; 1096 1097 GIANT_REQUIRED; 1098 1099 ip = VTOI(vp); 1100 fs = ip->i_fs; 1101 dp = ip->i_din2; 1102 1103 #ifdef DIAGNOSTIC 1104 if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC) 1105 panic("ext_write: mode"); 1106 #endif 1107 1108 if (ioflag & IO_APPEND) 1109 uio->uio_offset = dp->di_extsize; 1110 1111 if (uio->uio_offset < 0 || 1112 (u_int64_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize) 1113 return (EFBIG); 1114 1115 resid = uio->uio_resid; 1116 osize = dp->di_extsize; 1117 flags = IO_EXT; 1118 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 1119 flags |= IO_SYNC; 1120 1121 for (error = 0; uio->uio_resid > 0;) { 1122 lbn = lblkno(fs, uio->uio_offset); 1123 blkoffset = blkoff(fs, uio->uio_offset); 1124 xfersize = fs->fs_bsize - blkoffset; 1125 if (uio->uio_resid < xfersize) 1126 xfersize = uio->uio_resid; 1127 1128 /* 1129 * We must perform a read-before-write if the transfer size 1130 * does not cover the entire buffer. 1131 */ 1132 if (fs->fs_bsize > xfersize) 1133 flags |= BA_CLRBUF; 1134 else 1135 flags &= ~BA_CLRBUF; 1136 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 1137 ucred, flags, &bp); 1138 if (error != 0) 1139 break; 1140 /* 1141 * If the buffer is not valid we have to clear out any 1142 * garbage data from the pages instantiated for the buffer. 1143 * If we do not, a failed uiomove() during a write can leave 1144 * the prior contents of the pages exposed to a userland 1145 * mmap(). XXX deal with uiomove() errors a better way. 1146 */ 1147 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 1148 vfs_bio_clrbuf(bp); 1149 if (ioflag & IO_DIRECT) 1150 bp->b_flags |= B_DIRECT; 1151 if (ioflag & IO_NOWDRAIN) 1152 bp->b_flags |= B_NOWDRAIN; 1153 1154 if (uio->uio_offset + xfersize > dp->di_extsize) 1155 dp->di_extsize = uio->uio_offset + xfersize; 1156 1157 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid; 1158 if (size < xfersize) 1159 xfersize = size; 1160 1161 error = 1162 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 1163 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1164 (LIST_FIRST(&bp->b_dep) == NULL)) { 1165 bp->b_flags |= B_RELBUF; 1166 } 1167 1168 /* 1169 * If IO_SYNC each buffer is written synchronously. Otherwise 1170 * if we have a severe page deficiency write the buffer 1171 * asynchronously. Otherwise try to cluster, and if that 1172 * doesn't do it then either do an async write (if O_DIRECT), 1173 * or a delayed write (if not). 1174 */ 1175 if (ioflag & IO_SYNC) { 1176 (void)bwrite(bp); 1177 } else if (vm_page_count_severe() || 1178 buf_dirty_count_severe() || 1179 xfersize + blkoffset == fs->fs_bsize || 1180 (ioflag & (IO_ASYNC | IO_DIRECT))) 1181 bawrite(bp); 1182 else 1183 bdwrite(bp); 1184 if (error || xfersize == 0) 1185 break; 1186 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1187 } 1188 /* 1189 * If we successfully wrote any data, and we are not the superuser 1190 * we clear the setuid and setgid bits as a precaution against 1191 * tampering. 1192 */ 1193 if (resid > uio->uio_resid && ucred && 1194 suser_cred(ucred, PRISON_ROOT)) { 1195 ip->i_mode &= ~(ISUID | ISGID); 1196 dp->di_mode = ip->i_mode; 1197 } 1198 if (error) { 1199 if (ioflag & IO_UNIT) { 1200 (void)UFS_TRUNCATE(vp, osize, 1201 IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td); 1202 uio->uio_offset -= resid - uio->uio_resid; 1203 uio->uio_resid = resid; 1204 } 1205 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 1206 error = UFS_UPDATE(vp, 1); 1207 return (error); 1208 } 1209 1210 1211 /* 1212 * Vnode operating to retrieve a named extended attribute. 1213 * 1214 * Locate a particular EA (nspace:name) in the area (ptr:length), and return 1215 * the length of the EA, and possibly the pointer to the entry and to the data. 1216 */ 1217 static int 1218 ffs_findextattr(u_char *ptr, uint length, int nspace, const char *name, u_char **eap, u_char **eac) 1219 { 1220 u_char *p, *pe, *pn, *p0; 1221 int eapad1, eapad2, ealength, ealen, nlen; 1222 uint32_t ul; 1223 1224 pe = ptr + length; 1225 nlen = strlen(name); 1226 1227 for (p = ptr; p < pe; p = pn) { 1228 p0 = p; 1229 bcopy(p, &ul, sizeof(ul)); 1230 pn = p + ul; 1231 /* make sure this entry is complete */ 1232 if (pn > pe) 1233 break; 1234 p += sizeof(uint32_t); 1235 if (*p != nspace) 1236 continue; 1237 p++; 1238 eapad2 = *p++; 1239 if (*p != nlen) 1240 continue; 1241 p++; 1242 if (bcmp(p, name, nlen)) 1243 continue; 1244 ealength = sizeof(uint32_t) + 3 + nlen; 1245 eapad1 = 8 - (ealength % 8); 1246 if (eapad1 == 8) 1247 eapad1 = 0; 1248 ealength += eapad1; 1249 ealen = ul - ealength - eapad2; 1250 p += nlen + eapad1; 1251 if (eap != NULL) 1252 *eap = p0; 1253 if (eac != NULL) 1254 *eac = p; 1255 return (ealen); 1256 } 1257 return(-1); 1258 } 1259 1260 static int 1261 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra) 1262 { 1263 struct inode *ip; 1264 struct fs *fs; 1265 struct ufs2_dinode *dp; 1266 struct uio luio; 1267 struct iovec liovec; 1268 int easize, error; 1269 u_char *eae; 1270 1271 ip = VTOI(vp); 1272 fs = ip->i_fs; 1273 dp = ip->i_din2; 1274 easize = dp->di_extsize; 1275 1276 eae = malloc(easize + extra, M_TEMP, M_WAITOK); 1277 1278 liovec.iov_base = eae; 1279 liovec.iov_len = easize; 1280 luio.uio_iov = &liovec; 1281 luio.uio_iovcnt = 1; 1282 luio.uio_offset = 0; 1283 luio.uio_resid = easize; 1284 luio.uio_segflg = UIO_SYSSPACE; 1285 luio.uio_rw = UIO_READ; 1286 luio.uio_td = td; 1287 1288 error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC); 1289 if (error) { 1290 free(eae, M_TEMP); 1291 return(error); 1292 } 1293 *p = eae; 1294 return (0); 1295 } 1296 1297 static int 1298 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td) 1299 { 1300 struct inode *ip; 1301 struct fs *fs; 1302 struct ufs2_dinode *dp; 1303 int error; 1304 1305 ip = VTOI(vp); 1306 fs = ip->i_fs; 1307 1308 if (ip->i_ea_area != NULL) 1309 return (EBUSY); 1310 dp = ip->i_din2; 1311 error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0); 1312 if (error) 1313 return (error); 1314 ip->i_ea_len = dp->di_extsize; 1315 ip->i_ea_error = 0; 1316 return (0); 1317 } 1318 1319 /* 1320 * Vnode extattr transaction commit/abort 1321 */ 1322 static int 1323 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td) 1324 { 1325 struct inode *ip; 1326 struct fs *fs; 1327 struct uio luio; 1328 struct iovec liovec; 1329 int error; 1330 struct ufs2_dinode *dp; 1331 1332 ip = VTOI(vp); 1333 fs = ip->i_fs; 1334 if (ip->i_ea_area == NULL) 1335 return (EINVAL); 1336 dp = ip->i_din2; 1337 error = ip->i_ea_error; 1338 if (commit && error == 0) { 1339 if (cred == NOCRED) 1340 cred = vp->v_mount->mnt_cred; 1341 liovec.iov_base = ip->i_ea_area; 1342 liovec.iov_len = ip->i_ea_len; 1343 luio.uio_iov = &liovec; 1344 luio.uio_iovcnt = 1; 1345 luio.uio_offset = 0; 1346 luio.uio_resid = ip->i_ea_len; 1347 luio.uio_segflg = UIO_SYSSPACE; 1348 luio.uio_rw = UIO_WRITE; 1349 luio.uio_td = td; 1350 /* XXX: I'm not happy about truncating to zero size */ 1351 if (ip->i_ea_len < dp->di_extsize) 1352 error = ffs_truncate(vp, 0, IO_EXT, cred, td); 1353 error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred); 1354 } 1355 free(ip->i_ea_area, M_TEMP); 1356 ip->i_ea_area = NULL; 1357 ip->i_ea_len = 0; 1358 ip->i_ea_error = 0; 1359 return (error); 1360 } 1361 1362 /* 1363 * Vnode extattr strategy routine for special devices and fifos. 1364 * 1365 * We need to check for a read or write of the external attributes. 1366 * Otherwise we just fall through and do the usual thing. 1367 */ 1368 static int 1369 ffsext_strategy(struct vop_strategy_args *ap) 1370 /* 1371 struct vop_strategy_args { 1372 struct vnodeop_desc *a_desc; 1373 struct vnode *a_vp; 1374 struct buf *a_bp; 1375 }; 1376 */ 1377 { 1378 struct vnode *vp; 1379 daddr_t lbn; 1380 1381 vp = ap->a_vp; 1382 lbn = ap->a_bp->b_lblkno; 1383 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC && 1384 lbn < 0 && lbn >= -NXADDR) 1385 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1386 if (vp->v_type == VFIFO) 1387 return (ufs_vnoperatefifo((struct vop_generic_args *)ap)); 1388 return (ufs_vnoperatespec((struct vop_generic_args *)ap)); 1389 } 1390 1391 /* 1392 * Vnode extattr transaction commit/abort 1393 */ 1394 static int 1395 ffs_openextattr(struct vop_openextattr_args *ap) 1396 /* 1397 struct vop_openextattr_args { 1398 struct vnodeop_desc *a_desc; 1399 struct vnode *a_vp; 1400 IN struct ucred *a_cred; 1401 IN struct thread *a_td; 1402 }; 1403 */ 1404 { 1405 struct inode *ip; 1406 struct fs *fs; 1407 1408 ip = VTOI(ap->a_vp); 1409 fs = ip->i_fs; 1410 if (fs->fs_magic == FS_UFS1_MAGIC) 1411 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1412 return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td)); 1413 } 1414 1415 1416 /* 1417 * Vnode extattr transaction commit/abort 1418 */ 1419 static int 1420 ffs_closeextattr(struct vop_closeextattr_args *ap) 1421 /* 1422 struct vop_closeextattr_args { 1423 struct vnodeop_desc *a_desc; 1424 struct vnode *a_vp; 1425 int a_commit; 1426 IN struct ucred *a_cred; 1427 IN struct thread *a_td; 1428 }; 1429 */ 1430 { 1431 struct inode *ip; 1432 struct fs *fs; 1433 1434 ip = VTOI(ap->a_vp); 1435 fs = ip->i_fs; 1436 if (fs->fs_magic == FS_UFS1_MAGIC) 1437 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1438 return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td)); 1439 } 1440 1441 1442 1443 /* 1444 * Vnode operation to retrieve a named extended attribute. 1445 */ 1446 static int 1447 ffs_getextattr(struct vop_getextattr_args *ap) 1448 /* 1449 vop_getextattr { 1450 IN struct vnode *a_vp; 1451 IN int a_attrnamespace; 1452 IN const char *a_name; 1453 INOUT struct uio *a_uio; 1454 OUT size_t *a_size; 1455 IN struct ucred *a_cred; 1456 IN struct thread *a_td; 1457 }; 1458 */ 1459 { 1460 struct inode *ip; 1461 struct fs *fs; 1462 u_char *eae, *p, *pe, *pn; 1463 struct ufs2_dinode *dp; 1464 unsigned easize; 1465 uint32_t ul; 1466 int error, ealen, stand_alone; 1467 1468 ip = VTOI(ap->a_vp); 1469 fs = ip->i_fs; 1470 1471 if (fs->fs_magic == FS_UFS1_MAGIC) 1472 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1473 1474 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1475 ap->a_cred, ap->a_td, IREAD); 1476 if (error) 1477 return (error); 1478 1479 if (ip->i_ea_area == NULL) { 1480 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1481 if (error) 1482 return (error); 1483 stand_alone = 1; 1484 } else { 1485 stand_alone = 0; 1486 } 1487 dp = ip->i_din2; 1488 eae = ip->i_ea_area; 1489 easize = ip->i_ea_len; 1490 if (strlen(ap->a_name) > 0) { 1491 ealen = ffs_findextattr(eae, easize, 1492 ap->a_attrnamespace, ap->a_name, NULL, &p); 1493 if (ealen >= 0) { 1494 error = 0; 1495 if (ap->a_size != NULL) 1496 *ap->a_size = ealen; 1497 else if (ap->a_uio != NULL) 1498 error = uiomove(p, ealen, ap->a_uio); 1499 } else { 1500 error = ENOATTR; 1501 } 1502 } else { 1503 error = 0; 1504 if (ap->a_size != NULL) 1505 *ap->a_size = 0; 1506 pe = eae + easize; 1507 for(p = eae; error == 0 && p < pe; p = pn) { 1508 bcopy(p, &ul, sizeof(ul)); 1509 pn = p + ul; 1510 if (pn > pe) 1511 break; 1512 p += sizeof(ul); 1513 if (*p++ != ap->a_attrnamespace) 1514 continue; 1515 p++; /* pad2 */ 1516 ealen = *p; 1517 if (ap->a_size != NULL) { 1518 *ap->a_size += ealen + 1; 1519 } else if (ap->a_uio != NULL) { 1520 error = uiomove(p, ealen + 1, ap->a_uio); 1521 } 1522 } 1523 } 1524 if (stand_alone) 1525 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1526 return(error); 1527 } 1528 1529 /* 1530 * Vnode operation to set a named attribute. 1531 */ 1532 static int 1533 ffs_setextattr(struct vop_setextattr_args *ap) 1534 /* 1535 vop_setextattr { 1536 IN struct vnode *a_vp; 1537 IN int a_attrnamespace; 1538 IN const char *a_name; 1539 INOUT struct uio *a_uio; 1540 IN struct ucred *a_cred; 1541 IN struct thread *a_td; 1542 }; 1543 */ 1544 { 1545 struct inode *ip; 1546 struct fs *fs; 1547 uint32_t ealength, ul; 1548 int ealen, olen, eacont, eapad1, eapad2, error, i, easize; 1549 u_char *eae, *p; 1550 struct ufs2_dinode *dp; 1551 struct ucred *cred; 1552 int stand_alone; 1553 1554 ip = VTOI(ap->a_vp); 1555 fs = ip->i_fs; 1556 1557 if (fs->fs_magic == FS_UFS1_MAGIC) 1558 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1559 1560 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1561 ap->a_cred, ap->a_td, IWRITE); 1562 if (error) { 1563 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1564 ip->i_ea_error = error; 1565 return (error); 1566 } 1567 1568 if (ap->a_cred != NOCRED) 1569 cred = ap->a_cred; 1570 else 1571 cred = ap->a_vp->v_mount->mnt_cred; 1572 1573 dp = ip->i_din2; 1574 1575 if (ip->i_ea_area == NULL) { 1576 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1577 if (error) 1578 return (error); 1579 stand_alone = 1; 1580 } else { 1581 stand_alone = 0; 1582 } 1583 1584 /* Calculate the length of the EA entry */ 1585 if (ap->a_uio == NULL) { 1586 /* delete */ 1587 ealength = eapad1 = ealen = eapad2 = eacont = 0; 1588 } else { 1589 ealen = ap->a_uio->uio_resid; 1590 ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name); 1591 eapad1 = 8 - (ealength % 8); 1592 if (eapad1 == 8) 1593 eapad1 = 0; 1594 eacont = ealength + eapad1; 1595 eapad2 = 8 - (ealen % 8); 1596 if (eapad2 == 8) 1597 eapad2 = 0; 1598 ealength += eapad1 + ealen + eapad2; 1599 } 1600 1601 eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK); 1602 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1603 easize = ip->i_ea_len; 1604 1605 olen = ffs_findextattr(eae, easize, 1606 ap->a_attrnamespace, ap->a_name, &p, NULL); 1607 if (olen == -1 && ealength == 0) { 1608 /* delete but nonexistent */ 1609 free(eae, M_TEMP); 1610 if (stand_alone) 1611 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1612 return(ENOATTR); 1613 } 1614 if (olen == -1) { 1615 /* new, append at end */ 1616 p = eae + easize; 1617 easize += ealength; 1618 } else { 1619 bcopy(p, &ul, sizeof ul); 1620 i = p - eae + ul; 1621 if (ul != ealength) { 1622 bcopy(p + ul, p + ealength, easize - i); 1623 easize += (ealength - ul); 1624 } 1625 } 1626 if (easize > NXADDR * fs->fs_bsize) { 1627 free(eae, M_TEMP); 1628 if (stand_alone) 1629 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1630 else if (ip->i_ea_error == 0) 1631 ip->i_ea_error = ENOSPC; 1632 return(ENOSPC); 1633 } 1634 if (ealength != 0) { 1635 bcopy(&ealength, p, sizeof(ealength)); 1636 p += sizeof(ealength); 1637 *p++ = ap->a_attrnamespace; 1638 *p++ = eapad2; 1639 *p++ = strlen(ap->a_name); 1640 strcpy(p, ap->a_name); 1641 p += strlen(ap->a_name); 1642 bzero(p, eapad1); 1643 p += eapad1; 1644 error = uiomove(p, ealen, ap->a_uio); 1645 if (error) { 1646 free(eae, M_TEMP); 1647 if (stand_alone) 1648 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1649 else if (ip->i_ea_error == 0) 1650 ip->i_ea_error = error; 1651 return(error); 1652 } 1653 p += ealen; 1654 bzero(p, eapad2); 1655 } 1656 p = ip->i_ea_area; 1657 ip->i_ea_area = eae; 1658 ip->i_ea_len = easize; 1659 free(p, M_TEMP); 1660 if (stand_alone) 1661 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1662 return(error); 1663 } 1664