1 /* 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Copyright (c) 1982, 1986, 1989, 1993 12 * The Regents of the University of California. All rights reserved. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed by the University of 25 * California, Berkeley and its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 43 * $FreeBSD$ 44 */ 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/resourcevar.h> 49 #include <sys/signalvar.h> 50 #include <sys/kernel.h> 51 #include <sys/stat.h> 52 #include <sys/bio.h> 53 #include <sys/buf.h> 54 #include <sys/proc.h> 55 #include <sys/mount.h> 56 #include <sys/vnode.h> 57 #include <sys/conf.h> 58 59 #include <machine/limits.h> 60 61 #include <vm/vm.h> 62 #include <vm/vm_page.h> 63 #include <vm/vm_object.h> 64 #include <vm/vm_extern.h> 65 #include <vm/vm_pager.h> 66 #include <vm/vnode_pager.h> 67 68 #include <ufs/ufs/extattr.h> 69 #include <ufs/ufs/quota.h> 70 #include <ufs/ufs/inode.h> 71 #include <ufs/ufs/ufsmount.h> 72 #include <ufs/ufs/ufs_extern.h> 73 74 #include <ufs/ffs/fs.h> 75 #include <ufs/ffs/ffs_extern.h> 76 77 int ffs_fsync(struct vop_fsync_args *); 78 static int ffs_getpages(struct vop_getpages_args *); 79 static int ffs_read(struct vop_read_args *); 80 static int ffs_write(struct vop_write_args *); 81 static int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag); 82 static int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred); 83 static int ffs_getextattr(struct vop_getextattr_args *); 84 static int ffs_setextattr(struct vop_setextattr_args *); 85 86 87 /* Global vfs data structures for ufs. */ 88 vop_t **ffs_vnodeop_p; 89 static struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { 90 { &vop_default_desc, (vop_t *) ufs_vnoperate }, 91 { &vop_fsync_desc, (vop_t *) ffs_fsync }, 92 { &vop_getpages_desc, (vop_t *) ffs_getpages }, 93 { &vop_read_desc, (vop_t *) ffs_read }, 94 { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks }, 95 { &vop_write_desc, (vop_t *) ffs_write }, 96 { &vop_getextattr_desc, (vop_t *) ffs_getextattr }, 97 { &vop_setextattr_desc, (vop_t *) ffs_setextattr }, 98 { NULL, NULL } 99 }; 100 static struct vnodeopv_desc ffs_vnodeop_opv_desc = 101 { &ffs_vnodeop_p, ffs_vnodeop_entries }; 102 103 vop_t **ffs_specop_p; 104 static struct vnodeopv_entry_desc ffs_specop_entries[] = { 105 { &vop_default_desc, (vop_t *) ufs_vnoperatespec }, 106 { &vop_fsync_desc, (vop_t *) ffs_fsync }, 107 { &vop_getextattr_desc, (vop_t *) ffs_getextattr }, 108 { &vop_setextattr_desc, (vop_t *) ffs_setextattr }, 109 { NULL, NULL } 110 }; 111 static struct vnodeopv_desc ffs_specop_opv_desc = 112 { &ffs_specop_p, ffs_specop_entries }; 113 114 vop_t **ffs_fifoop_p; 115 static struct vnodeopv_entry_desc ffs_fifoop_entries[] = { 116 { &vop_default_desc, (vop_t *) ufs_vnoperatefifo }, 117 { &vop_fsync_desc, (vop_t *) ffs_fsync }, 118 { &vop_getextattr_desc, (vop_t *) ffs_getextattr }, 119 { &vop_setextattr_desc, (vop_t *) ffs_setextattr }, 120 { NULL, NULL } 121 }; 122 static struct vnodeopv_desc ffs_fifoop_opv_desc = 123 { &ffs_fifoop_p, ffs_fifoop_entries }; 124 125 VNODEOP_SET(ffs_vnodeop_opv_desc); 126 VNODEOP_SET(ffs_specop_opv_desc); 127 VNODEOP_SET(ffs_fifoop_opv_desc); 128 129 /* 130 * Synch an open file. 131 */ 132 /* ARGSUSED */ 133 int 134 ffs_fsync(ap) 135 struct vop_fsync_args /* { 136 struct vnode *a_vp; 137 struct ucred *a_cred; 138 int a_waitfor; 139 struct thread *a_td; 140 } */ *ap; 141 { 142 struct vnode *vp = ap->a_vp; 143 struct inode *ip = VTOI(vp); 144 struct buf *bp; 145 struct buf *nbp; 146 int s, error, wait, passes, skipmeta; 147 ufs_lbn_t lbn; 148 149 wait = (ap->a_waitfor == MNT_WAIT); 150 if (vn_isdisk(vp, NULL)) { 151 lbn = INT_MAX; 152 if (vp->v_rdev->si_mountpoint != NULL && 153 (vp->v_rdev->si_mountpoint->mnt_flag & MNT_SOFTDEP)) 154 softdep_fsync_mountdev(vp); 155 } else { 156 lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); 157 } 158 159 /* 160 * Flush all dirty buffers associated with a vnode. 161 */ 162 passes = NIADDR + 1; 163 skipmeta = 0; 164 if (wait) 165 skipmeta = 1; 166 s = splbio(); 167 loop: 168 TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) 169 bp->b_flags &= ~B_SCANNED; 170 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 171 nbp = TAILQ_NEXT(bp, b_vnbufs); 172 /* 173 * Reasons to skip this buffer: it has already been considered 174 * on this pass, this pass is the first time through on a 175 * synchronous flush request and the buffer being considered 176 * is metadata, the buffer has dependencies that will cause 177 * it to be redirtied and it has not already been deferred, 178 * or it is already being written. 179 */ 180 if ((bp->b_flags & B_SCANNED) != 0) 181 continue; 182 bp->b_flags |= B_SCANNED; 183 if ((skipmeta == 1 && bp->b_lblkno < 0)) 184 continue; 185 if (!wait && LIST_FIRST(&bp->b_dep) != NULL && 186 (bp->b_flags & B_DEFERRED) == 0 && 187 buf_countdeps(bp, 0)) { 188 bp->b_flags |= B_DEFERRED; 189 continue; 190 } 191 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) 192 continue; 193 if ((bp->b_flags & B_DELWRI) == 0) 194 panic("ffs_fsync: not dirty"); 195 if (vp != bp->b_vp) 196 panic("ffs_fsync: vp != vp->b_vp"); 197 /* 198 * If this is a synchronous flush request, or it is not a 199 * file or device, start the write on this buffer immediatly. 200 */ 201 if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) { 202 203 /* 204 * On our final pass through, do all I/O synchronously 205 * so that we can find out if our flush is failing 206 * because of write errors. 207 */ 208 if (passes > 0 || !wait) { 209 if ((bp->b_flags & B_CLUSTEROK) && !wait) { 210 BUF_UNLOCK(bp); 211 (void) vfs_bio_awrite(bp); 212 } else { 213 bremfree(bp); 214 splx(s); 215 (void) bawrite(bp); 216 s = splbio(); 217 } 218 } else { 219 bremfree(bp); 220 splx(s); 221 if ((error = bwrite(bp)) != 0) 222 return (error); 223 s = splbio(); 224 } 225 } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) { 226 /* 227 * If the buffer is for data that has been truncated 228 * off the file, then throw it away. 229 */ 230 bremfree(bp); 231 bp->b_flags |= B_INVAL | B_NOCACHE; 232 splx(s); 233 brelse(bp); 234 s = splbio(); 235 } else { 236 BUF_UNLOCK(bp); 237 vfs_bio_awrite(bp); 238 } 239 /* 240 * Since we may have slept during the I/O, we need 241 * to start from a known point. 242 */ 243 nbp = TAILQ_FIRST(&vp->v_dirtyblkhd); 244 } 245 /* 246 * If we were asked to do this synchronously, then go back for 247 * another pass, this time doing the metadata. 248 */ 249 if (skipmeta) { 250 skipmeta = 0; 251 goto loop; 252 } 253 254 if (wait) { 255 VI_LOCK(vp); 256 while (vp->v_numoutput) { 257 vp->v_iflag |= VI_BWAIT; 258 msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp), 259 PRIBIO + 4, "ffsfsn", 0); 260 } 261 VI_UNLOCK(vp); 262 263 /* 264 * Ensure that any filesystem metatdata associated 265 * with the vnode has been written. 266 */ 267 splx(s); 268 if ((error = softdep_sync_metadata(ap)) != 0) 269 return (error); 270 s = splbio(); 271 272 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 273 /* 274 * Block devices associated with filesystems may 275 * have new I/O requests posted for them even if 276 * the vnode is locked, so no amount of trying will 277 * get them clean. Thus we give block devices a 278 * good effort, then just give up. For all other file 279 * types, go around and try again until it is clean. 280 */ 281 if (passes > 0) { 282 passes -= 1; 283 goto loop; 284 } 285 #ifdef DIAGNOSTIC 286 if (!vn_isdisk(vp, NULL)) 287 vprint("ffs_fsync: dirty", vp); 288 #endif 289 } 290 } 291 splx(s); 292 return (UFS_UPDATE(vp, wait)); 293 } 294 295 296 /* 297 * Vnode op for reading. 298 */ 299 /* ARGSUSED */ 300 int 301 ffs_read(ap) 302 struct vop_read_args /* { 303 struct vnode *a_vp; 304 struct uio *a_uio; 305 int a_ioflag; 306 struct ucred *a_cred; 307 } */ *ap; 308 { 309 struct vnode *vp; 310 struct inode *ip; 311 struct uio *uio; 312 struct fs *fs; 313 struct buf *bp; 314 ufs_lbn_t lbn, nextlbn; 315 off_t bytesinfile; 316 long size, xfersize, blkoffset; 317 int error, orig_resid; 318 mode_t mode; 319 int seqcount; 320 int ioflag; 321 vm_object_t object; 322 323 vp = ap->a_vp; 324 uio = ap->a_uio; 325 ioflag = ap->a_ioflag; 326 if (ap->a_ioflag & IO_EXT) 327 #ifdef notyet 328 return (ffs_extread(vp, uio, ioflag)); 329 #else 330 panic("ffs_read+IO_EXT"); 331 #endif 332 333 GIANT_REQUIRED; 334 335 seqcount = ap->a_ioflag >> 16; 336 ip = VTOI(vp); 337 mode = ip->i_mode; 338 339 #ifdef DIAGNOSTIC 340 if (uio->uio_rw != UIO_READ) 341 panic("ffs_read: mode"); 342 343 if (vp->v_type == VLNK) { 344 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 345 panic("ffs_read: short symlink"); 346 } else if (vp->v_type != VREG && vp->v_type != VDIR) 347 panic("ffs_read: type %d", vp->v_type); 348 #endif 349 fs = ip->i_fs; 350 if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize) 351 return (EFBIG); 352 353 orig_resid = uio->uio_resid; 354 if (orig_resid <= 0) 355 return (0); 356 357 object = vp->v_object; 358 359 bytesinfile = ip->i_size - uio->uio_offset; 360 if (bytesinfile <= 0) { 361 if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 362 ip->i_flag |= IN_ACCESS; 363 return 0; 364 } 365 366 if (object) { 367 vm_object_reference(object); 368 } 369 370 #ifdef ENABLE_VFS_IOOPT 371 /* 372 * If IO optimisation is turned on, 373 * and we are NOT a VM based IO request, 374 * (i.e. not headed for the buffer cache) 375 * but there IS a vm object associated with it. 376 */ 377 if ((ioflag & IO_VMIO) == 0 && (vfs_ioopt > 1) && object) { 378 int nread, toread; 379 380 toread = uio->uio_resid; 381 if (toread > bytesinfile) 382 toread = bytesinfile; 383 if (toread >= PAGE_SIZE) { 384 /* 385 * Then if it's at least a page in size, try 386 * get the data from the object using vm tricks 387 */ 388 error = uioread(toread, uio, object, &nread); 389 if ((uio->uio_resid == 0) || (error != 0)) { 390 /* 391 * If we finished or there was an error 392 * then finish up (the reference previously 393 * obtained on object must be released). 394 */ 395 if ((error == 0 || 396 uio->uio_resid != orig_resid) && 397 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 398 ip->i_flag |= IN_ACCESS; 399 400 if (object) { 401 vm_object_vndeallocate(object); 402 } 403 return error; 404 } 405 } 406 } 407 #endif 408 409 /* 410 * Ok so we couldn't do it all in one vm trick... 411 * so cycle around trying smaller bites.. 412 */ 413 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 414 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 415 break; 416 #ifdef ENABLE_VFS_IOOPT 417 if ((ioflag & IO_VMIO) == 0 && (vfs_ioopt > 1) && object) { 418 /* 419 * Obviously we didn't finish above, but we 420 * didn't get an error either. Try the same trick again. 421 * but this time we are looping. 422 */ 423 int nread, toread; 424 toread = uio->uio_resid; 425 if (toread > bytesinfile) 426 toread = bytesinfile; 427 428 /* 429 * Once again, if there isn't enough for a 430 * whole page, don't try optimising. 431 */ 432 if (toread >= PAGE_SIZE) { 433 error = uioread(toread, uio, object, &nread); 434 if ((uio->uio_resid == 0) || (error != 0)) { 435 /* 436 * If we finished or there was an 437 * error then finish up (the reference 438 * previously obtained on object must 439 * be released). 440 */ 441 if ((error == 0 || 442 uio->uio_resid != orig_resid) && 443 (vp->v_mount->mnt_flag & 444 MNT_NOATIME) == 0) 445 ip->i_flag |= IN_ACCESS; 446 if (object) { 447 vm_object_vndeallocate(object); 448 } 449 return error; 450 } 451 /* 452 * To get here we didnt't finish or err. 453 * If we did get some data, 454 * loop to try another bite. 455 */ 456 if (nread > 0) { 457 continue; 458 } 459 } 460 } 461 #endif 462 463 lbn = lblkno(fs, uio->uio_offset); 464 nextlbn = lbn + 1; 465 466 /* 467 * size of buffer. The buffer representing the 468 * end of the file is rounded up to the size of 469 * the block type ( fragment or full block, 470 * depending ). 471 */ 472 size = blksize(fs, ip, lbn); 473 blkoffset = blkoff(fs, uio->uio_offset); 474 475 /* 476 * The amount we want to transfer in this iteration is 477 * one FS block less the amount of the data before 478 * our startpoint (duh!) 479 */ 480 xfersize = fs->fs_bsize - blkoffset; 481 482 /* 483 * But if we actually want less than the block, 484 * or the file doesn't have a whole block more of data, 485 * then use the lesser number. 486 */ 487 if (uio->uio_resid < xfersize) 488 xfersize = uio->uio_resid; 489 if (bytesinfile < xfersize) 490 xfersize = bytesinfile; 491 492 if (lblktosize(fs, nextlbn) >= ip->i_size) { 493 /* 494 * Don't do readahead if this is the end of the file. 495 */ 496 error = bread(vp, lbn, size, NOCRED, &bp); 497 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 498 /* 499 * Otherwise if we are allowed to cluster, 500 * grab as much as we can. 501 * 502 * XXX This may not be a win if we are not 503 * doing sequential access. 504 */ 505 error = cluster_read(vp, ip->i_size, lbn, 506 size, NOCRED, uio->uio_resid, seqcount, &bp); 507 } else if (seqcount > 1) { 508 /* 509 * If we are NOT allowed to cluster, then 510 * if we appear to be acting sequentially, 511 * fire off a request for a readahead 512 * as well as a read. Note that the 4th and 5th 513 * arguments point to arrays of the size specified in 514 * the 6th argument. 515 */ 516 int nextsize = blksize(fs, ip, nextlbn); 517 error = breadn(vp, lbn, 518 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 519 } else { 520 /* 521 * Failing all of the above, just read what the 522 * user asked for. Interestingly, the same as 523 * the first option above. 524 */ 525 error = bread(vp, lbn, size, NOCRED, &bp); 526 } 527 if (error) { 528 brelse(bp); 529 bp = NULL; 530 break; 531 } 532 533 /* 534 * If IO_DIRECT then set B_DIRECT for the buffer. This 535 * will cause us to attempt to release the buffer later on 536 * and will cause the buffer cache to attempt to free the 537 * underlying pages. 538 */ 539 if (ioflag & IO_DIRECT) 540 bp->b_flags |= B_DIRECT; 541 542 /* 543 * We should only get non-zero b_resid when an I/O error 544 * has occurred, which should cause us to break above. 545 * However, if the short read did not cause an error, 546 * then we want to ensure that we do not uiomove bad 547 * or uninitialized data. 548 */ 549 size -= bp->b_resid; 550 if (size < xfersize) { 551 if (size == 0) 552 break; 553 xfersize = size; 554 } 555 556 #ifdef ENABLE_VFS_IOOPT 557 if (vfs_ioopt && object && 558 (bp->b_flags & B_VMIO) && 559 ((blkoffset & PAGE_MASK) == 0) && 560 ((xfersize & PAGE_MASK) == 0)) { 561 /* 562 * If VFS IO optimisation is turned on, 563 * and it's an exact page multiple 564 * And a normal VM based op, 565 * then use uiomiveco() 566 */ 567 error = 568 uiomoveco((char *)bp->b_data + blkoffset, 569 (int)xfersize, uio, object, 0); 570 } else 571 #endif 572 { 573 /* 574 * otherwise use the general form 575 */ 576 error = 577 uiomove((char *)bp->b_data + blkoffset, 578 (int)xfersize, uio); 579 } 580 581 if (error) 582 break; 583 584 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 585 (LIST_FIRST(&bp->b_dep) == NULL)) { 586 /* 587 * If there are no dependencies, and it's VMIO, 588 * then we don't need the buf, mark it available 589 * for freeing. The VM has the data. 590 */ 591 bp->b_flags |= B_RELBUF; 592 brelse(bp); 593 } else { 594 /* 595 * Otherwise let whoever 596 * made the request take care of 597 * freeing it. We just queue 598 * it onto another list. 599 */ 600 bqrelse(bp); 601 } 602 } 603 604 /* 605 * This can only happen in the case of an error 606 * because the loop above resets bp to NULL on each iteration 607 * and on normal completion has not set a new value into it. 608 * so it must have come from a 'break' statement 609 */ 610 if (bp != NULL) { 611 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 612 (LIST_FIRST(&bp->b_dep) == NULL)) { 613 bp->b_flags |= B_RELBUF; 614 brelse(bp); 615 } else { 616 bqrelse(bp); 617 } 618 } 619 620 if (object) { 621 vm_object_vndeallocate(object); 622 } 623 if ((error == 0 || uio->uio_resid != orig_resid) && 624 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 625 ip->i_flag |= IN_ACCESS; 626 return (error); 627 } 628 629 /* 630 * Vnode op for writing. 631 */ 632 int 633 ffs_write(ap) 634 struct vop_write_args /* { 635 struct vnode *a_vp; 636 struct uio *a_uio; 637 int a_ioflag; 638 struct ucred *a_cred; 639 } */ *ap; 640 { 641 struct vnode *vp; 642 struct uio *uio; 643 struct inode *ip; 644 struct fs *fs; 645 struct buf *bp; 646 struct thread *td; 647 ufs_lbn_t lbn; 648 off_t osize; 649 int seqcount; 650 int blkoffset, error, extended, flags, ioflag, resid, size, xfersize; 651 vm_object_t object; 652 653 vp = ap->a_vp; 654 uio = ap->a_uio; 655 ioflag = ap->a_ioflag; 656 if (ap->a_ioflag & IO_EXT) 657 #ifdef notyet 658 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred)); 659 #else 660 panic("ffs_read+IO_EXT"); 661 #endif 662 663 GIANT_REQUIRED; 664 665 extended = 0; 666 seqcount = ap->a_ioflag >> 16; 667 ip = VTOI(vp); 668 669 object = vp->v_object; 670 if (object) { 671 vm_object_reference(object); 672 } 673 674 #ifdef DIAGNOSTIC 675 if (uio->uio_rw != UIO_WRITE) 676 panic("ffswrite: mode"); 677 #endif 678 679 switch (vp->v_type) { 680 case VREG: 681 if (ioflag & IO_APPEND) 682 uio->uio_offset = ip->i_size; 683 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) { 684 if (object) { 685 vm_object_vndeallocate(object); 686 } 687 return (EPERM); 688 } 689 /* FALLTHROUGH */ 690 case VLNK: 691 break; 692 case VDIR: 693 panic("ffswrite: dir write"); 694 break; 695 default: 696 panic("ffswrite: type %p %d (%d,%d)", vp, (int)vp->v_type, 697 (int)uio->uio_offset, 698 (int)uio->uio_resid 699 ); 700 } 701 702 fs = ip->i_fs; 703 if (uio->uio_offset < 0 || 704 (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) { 705 if (object) { 706 vm_object_vndeallocate(object); 707 } 708 return (EFBIG); 709 } 710 /* 711 * Maybe this should be above the vnode op call, but so long as 712 * file servers have no limits, I don't think it matters. 713 */ 714 td = uio->uio_td; 715 if (vp->v_type == VREG && td && 716 uio->uio_offset + uio->uio_resid > 717 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 718 PROC_LOCK(td->td_proc); 719 psignal(td->td_proc, SIGXFSZ); 720 PROC_UNLOCK(td->td_proc); 721 if (object) { 722 vm_object_vndeallocate(object); 723 } 724 return (EFBIG); 725 } 726 727 resid = uio->uio_resid; 728 osize = ip->i_size; 729 flags = 0; 730 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 731 flags = IO_SYNC; 732 733 #ifdef ENABLE_VFS_IOOPT 734 if (object && (object->flags & OBJ_OPT)) { 735 vm_freeze_copyopts(object, 736 OFF_TO_IDX(uio->uio_offset), 737 OFF_TO_IDX(uio->uio_offset + uio->uio_resid + PAGE_MASK)); 738 } 739 #endif 740 for (error = 0; uio->uio_resid > 0;) { 741 lbn = lblkno(fs, uio->uio_offset); 742 blkoffset = blkoff(fs, uio->uio_offset); 743 xfersize = fs->fs_bsize - blkoffset; 744 if (uio->uio_resid < xfersize) 745 xfersize = uio->uio_resid; 746 747 if (uio->uio_offset + xfersize > ip->i_size) 748 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 749 750 /* 751 * We must perform a read-before-write if the transfer size 752 * does not cover the entire buffer. 753 */ 754 if (fs->fs_bsize > xfersize) 755 flags |= BA_CLRBUF; 756 else 757 flags &= ~BA_CLRBUF; 758 /* XXX is uio->uio_offset the right thing here? */ 759 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 760 ap->a_cred, flags, &bp); 761 if (error != 0) 762 break; 763 /* 764 * If the buffer is not valid we have to clear out any 765 * garbage data from the pages instantiated for the buffer. 766 * If we do not, a failed uiomove() during a write can leave 767 * the prior contents of the pages exposed to a userland 768 * mmap(). XXX deal with uiomove() errors a better way. 769 */ 770 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 771 vfs_bio_clrbuf(bp); 772 if (ioflag & IO_DIRECT) 773 bp->b_flags |= B_DIRECT; 774 if (ioflag & IO_NOWDRAIN) 775 bp->b_flags |= B_NOWDRAIN; 776 777 if (uio->uio_offset + xfersize > ip->i_size) { 778 ip->i_size = uio->uio_offset + xfersize; 779 DIP(ip, i_size) = ip->i_size; 780 extended = 1; 781 } 782 783 size = blksize(fs, ip, lbn) - bp->b_resid; 784 if (size < xfersize) 785 xfersize = size; 786 787 error = 788 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 789 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 790 (LIST_FIRST(&bp->b_dep) == NULL)) { 791 bp->b_flags |= B_RELBUF; 792 } 793 794 /* 795 * If IO_SYNC each buffer is written synchronously. Otherwise 796 * if we have a severe page deficiency write the buffer 797 * asynchronously. Otherwise try to cluster, and if that 798 * doesn't do it then either do an async write (if O_DIRECT), 799 * or a delayed write (if not). 800 */ 801 if (ioflag & IO_SYNC) { 802 (void)bwrite(bp); 803 } else if (vm_page_count_severe() || 804 buf_dirty_count_severe() || 805 (ioflag & IO_ASYNC)) { 806 bp->b_flags |= B_CLUSTEROK; 807 bawrite(bp); 808 } else if (xfersize + blkoffset == fs->fs_bsize) { 809 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 810 bp->b_flags |= B_CLUSTEROK; 811 cluster_write(bp, ip->i_size, seqcount); 812 } else { 813 bawrite(bp); 814 } 815 } else if (ioflag & IO_DIRECT) { 816 bp->b_flags |= B_CLUSTEROK; 817 bawrite(bp); 818 } else { 819 bp->b_flags |= B_CLUSTEROK; 820 bdwrite(bp); 821 } 822 if (error || xfersize == 0) 823 break; 824 ip->i_flag |= IN_CHANGE | IN_UPDATE; 825 } 826 /* 827 * If we successfully wrote any data, and we are not the superuser 828 * we clear the setuid and setgid bits as a precaution against 829 * tampering. 830 */ 831 if (resid > uio->uio_resid && ap->a_cred && 832 suser_cred(ap->a_cred, PRISON_ROOT)) { 833 ip->i_mode &= ~(ISUID | ISGID); 834 DIP(ip, i_mode) = ip->i_mode; 835 } 836 if (resid > uio->uio_resid) 837 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); 838 if (error) { 839 if (ioflag & IO_UNIT) { 840 (void)UFS_TRUNCATE(vp, osize, 841 IO_NORMAL | (ioflag & IO_SYNC), 842 ap->a_cred, uio->uio_td); 843 uio->uio_offset -= resid - uio->uio_resid; 844 uio->uio_resid = resid; 845 } 846 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 847 error = UFS_UPDATE(vp, 1); 848 849 if (object) { 850 vm_object_vndeallocate(object); 851 } 852 853 return (error); 854 } 855 856 /* 857 * get page routine 858 */ 859 int 860 ffs_getpages(ap) 861 struct vop_getpages_args *ap; 862 { 863 off_t foff, physoffset; 864 int i, size, bsize; 865 struct vnode *dp, *vp; 866 vm_object_t obj; 867 vm_pindex_t pindex, firstindex; 868 vm_page_t mreq; 869 int bbackwards, bforwards; 870 int pbackwards, pforwards; 871 int firstpage; 872 ufs2_daddr_t reqblkno, reqlblkno; 873 int poff; 874 int pcount; 875 int rtval; 876 int pagesperblock; 877 878 GIANT_REQUIRED; 879 880 pcount = round_page(ap->a_count) / PAGE_SIZE; 881 mreq = ap->a_m[ap->a_reqpage]; 882 firstindex = ap->a_m[0]->pindex; 883 884 /* 885 * if ANY DEV_BSIZE blocks are valid on a large filesystem block, 886 * then the entire page is valid. Since the page may be mapped, 887 * user programs might reference data beyond the actual end of file 888 * occuring within the page. We have to zero that data. 889 */ 890 if (mreq->valid) { 891 if (mreq->valid != VM_PAGE_BITS_ALL) 892 vm_page_zero_invalid(mreq, TRUE); 893 vm_page_lock_queues(); 894 for (i = 0; i < pcount; i++) { 895 if (i != ap->a_reqpage) { 896 vm_page_free(ap->a_m[i]); 897 } 898 } 899 vm_page_unlock_queues(); 900 return VM_PAGER_OK; 901 } 902 903 vp = ap->a_vp; 904 obj = vp->v_object; 905 bsize = vp->v_mount->mnt_stat.f_iosize; 906 pindex = mreq->pindex; 907 foff = IDX_TO_OFF(pindex) /* + ap->a_offset should be zero */; 908 909 if (bsize < PAGE_SIZE) 910 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 911 ap->a_count, 912 ap->a_reqpage); 913 914 /* 915 * foff is the file offset of the required page 916 * reqlblkno is the logical block that contains the page 917 * poff is the index of the page into the logical block 918 */ 919 reqlblkno = foff / bsize; 920 poff = (foff % bsize) / PAGE_SIZE; 921 922 dp = VTOI(vp)->i_devvp; 923 if (ufs_bmaparray(vp, reqlblkno, &reqblkno, 0, &bforwards, &bbackwards) 924 || (reqblkno == -1)) { 925 vm_page_lock_queues(); 926 for(i = 0; i < pcount; i++) { 927 if (i != ap->a_reqpage) 928 vm_page_free(ap->a_m[i]); 929 } 930 vm_page_unlock_queues(); 931 if (reqblkno == -1) { 932 if ((mreq->flags & PG_ZERO) == 0) 933 vm_page_zero_fill(mreq); 934 vm_page_undirty(mreq); 935 mreq->valid = VM_PAGE_BITS_ALL; 936 return VM_PAGER_OK; 937 } else { 938 return VM_PAGER_ERROR; 939 } 940 } 941 942 physoffset = (off_t)reqblkno * DEV_BSIZE + poff * PAGE_SIZE; 943 pagesperblock = bsize / PAGE_SIZE; 944 /* 945 * find the first page that is contiguous... 946 * note that pbackwards is the number of pages that are contiguous 947 * backwards. 948 */ 949 firstpage = 0; 950 if (ap->a_count) { 951 pbackwards = poff + bbackwards * pagesperblock; 952 if (ap->a_reqpage > pbackwards) { 953 firstpage = ap->a_reqpage - pbackwards; 954 vm_page_lock_queues(); 955 for(i=0;i<firstpage;i++) 956 vm_page_free(ap->a_m[i]); 957 vm_page_unlock_queues(); 958 } 959 960 /* 961 * pforwards is the number of pages that are contiguous 962 * after the current page. 963 */ 964 pforwards = (pagesperblock - (poff + 1)) + 965 bforwards * pagesperblock; 966 if (pforwards < (pcount - (ap->a_reqpage + 1))) { 967 vm_page_lock_queues(); 968 for( i = ap->a_reqpage + pforwards + 1; i < pcount; i++) 969 vm_page_free(ap->a_m[i]); 970 vm_page_unlock_queues(); 971 pcount = ap->a_reqpage + pforwards + 1; 972 } 973 974 /* 975 * number of pages for I/O corrected for the non-contig pages at 976 * the beginning of the array. 977 */ 978 pcount -= firstpage; 979 } 980 981 /* 982 * calculate the size of the transfer 983 */ 984 985 size = pcount * PAGE_SIZE; 986 987 if ((IDX_TO_OFF(ap->a_m[firstpage]->pindex) + size) > 988 obj->un_pager.vnp.vnp_size) 989 size = obj->un_pager.vnp.vnp_size - 990 IDX_TO_OFF(ap->a_m[firstpage]->pindex); 991 992 physoffset -= foff; 993 rtval = VOP_GETPAGES(dp, &ap->a_m[firstpage], size, 994 (ap->a_reqpage - firstpage), physoffset); 995 996 return (rtval); 997 } 998 999 /* 1000 * Extended attribute reading. 1001 */ 1002 static int 1003 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag) 1004 { 1005 struct inode *ip; 1006 struct ufs2_dinode *dp; 1007 struct fs *fs; 1008 struct buf *bp; 1009 ufs_lbn_t lbn, nextlbn; 1010 off_t bytesinfile; 1011 long size, xfersize, blkoffset; 1012 int error, orig_resid; 1013 mode_t mode; 1014 1015 GIANT_REQUIRED; 1016 1017 ip = VTOI(vp); 1018 fs = ip->i_fs; 1019 dp = ip->i_din2; 1020 mode = ip->i_mode; 1021 1022 #ifdef DIAGNOSTIC 1023 if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC) 1024 panic("ffs_extread: mode"); 1025 1026 #endif 1027 orig_resid = uio->uio_resid; 1028 if (orig_resid <= 0) 1029 return (0); 1030 1031 bytesinfile = dp->di_extsize - uio->uio_offset; 1032 if (bytesinfile <= 0) { 1033 if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 1034 ip->i_flag |= IN_ACCESS; 1035 return 0; 1036 } 1037 1038 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 1039 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0) 1040 break; 1041 1042 lbn = lblkno(fs, uio->uio_offset); 1043 nextlbn = lbn + 1; 1044 1045 /* 1046 * size of buffer. The buffer representing the 1047 * end of the file is rounded up to the size of 1048 * the block type ( fragment or full block, 1049 * depending ). 1050 */ 1051 size = sblksize(fs, dp->di_extsize, lbn); 1052 blkoffset = blkoff(fs, uio->uio_offset); 1053 1054 /* 1055 * The amount we want to transfer in this iteration is 1056 * one FS block less the amount of the data before 1057 * our startpoint (duh!) 1058 */ 1059 xfersize = fs->fs_bsize - blkoffset; 1060 1061 /* 1062 * But if we actually want less than the block, 1063 * or the file doesn't have a whole block more of data, 1064 * then use the lesser number. 1065 */ 1066 if (uio->uio_resid < xfersize) 1067 xfersize = uio->uio_resid; 1068 if (bytesinfile < xfersize) 1069 xfersize = bytesinfile; 1070 1071 if (lblktosize(fs, nextlbn) >= dp->di_extsize) { 1072 /* 1073 * Don't do readahead if this is the end of the info. 1074 */ 1075 error = bread(vp, -1 - lbn, size, NOCRED, &bp); 1076 } else { 1077 /* 1078 * If we have a second block, then 1079 * fire off a request for a readahead 1080 * as well as a read. Note that the 4th and 5th 1081 * arguments point to arrays of the size specified in 1082 * the 6th argument. 1083 */ 1084 int nextsize = sblksize(fs, dp->di_extsize, nextlbn); 1085 1086 nextlbn = -1 - nextlbn; 1087 error = breadn(vp, -1 - lbn, 1088 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 1089 } 1090 if (error) { 1091 brelse(bp); 1092 bp = NULL; 1093 break; 1094 } 1095 1096 /* 1097 * If IO_DIRECT then set B_DIRECT for the buffer. This 1098 * will cause us to attempt to release the buffer later on 1099 * and will cause the buffer cache to attempt to free the 1100 * underlying pages. 1101 */ 1102 if (ioflag & IO_DIRECT) 1103 bp->b_flags |= B_DIRECT; 1104 1105 /* 1106 * We should only get non-zero b_resid when an I/O error 1107 * has occurred, which should cause us to break above. 1108 * However, if the short read did not cause an error, 1109 * then we want to ensure that we do not uiomove bad 1110 * or uninitialized data. 1111 */ 1112 size -= bp->b_resid; 1113 if (size < xfersize) { 1114 if (size == 0) 1115 break; 1116 xfersize = size; 1117 } 1118 1119 error = uiomove((char *)bp->b_data + blkoffset, 1120 (int)xfersize, uio); 1121 if (error) 1122 break; 1123 1124 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1125 (LIST_FIRST(&bp->b_dep) == NULL)) { 1126 /* 1127 * If there are no dependencies, and it's VMIO, 1128 * then we don't need the buf, mark it available 1129 * for freeing. The VM has the data. 1130 */ 1131 bp->b_flags |= B_RELBUF; 1132 brelse(bp); 1133 } else { 1134 /* 1135 * Otherwise let whoever 1136 * made the request take care of 1137 * freeing it. We just queue 1138 * it onto another list. 1139 */ 1140 bqrelse(bp); 1141 } 1142 } 1143 1144 /* 1145 * This can only happen in the case of an error 1146 * because the loop above resets bp to NULL on each iteration 1147 * and on normal completion has not set a new value into it. 1148 * so it must have come from a 'break' statement 1149 */ 1150 if (bp != NULL) { 1151 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1152 (LIST_FIRST(&bp->b_dep) == NULL)) { 1153 bp->b_flags |= B_RELBUF; 1154 brelse(bp); 1155 } else { 1156 bqrelse(bp); 1157 } 1158 } 1159 1160 if ((error == 0 || uio->uio_resid != orig_resid) && 1161 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 1162 ip->i_flag |= IN_ACCESS; 1163 return (error); 1164 } 1165 1166 /* 1167 * Extended attribute writing. 1168 */ 1169 static int 1170 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred) 1171 { 1172 struct inode *ip; 1173 struct ufs2_dinode *dp; 1174 struct fs *fs; 1175 struct buf *bp; 1176 ufs_lbn_t lbn; 1177 off_t osize; 1178 int blkoffset, error, flags, resid, size, xfersize; 1179 1180 GIANT_REQUIRED; 1181 1182 ip = VTOI(vp); 1183 fs = ip->i_fs; 1184 dp = ip->i_din2; 1185 1186 #ifdef DIAGNOSTIC 1187 if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC) 1188 panic("ext_write: mode"); 1189 #endif 1190 1191 if (ioflag & IO_APPEND) 1192 uio->uio_offset = dp->di_extsize; 1193 1194 if (uio->uio_offset < 0 || 1195 (u_int64_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize) 1196 return (EFBIG); 1197 1198 resid = uio->uio_resid; 1199 osize = dp->di_extsize; 1200 flags = IO_EXT; 1201 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 1202 flags |= IO_SYNC; 1203 1204 for (error = 0; uio->uio_resid > 0;) { 1205 lbn = lblkno(fs, uio->uio_offset); 1206 blkoffset = blkoff(fs, uio->uio_offset); 1207 xfersize = fs->fs_bsize - blkoffset; 1208 if (uio->uio_resid < xfersize) 1209 xfersize = uio->uio_resid; 1210 1211 /* 1212 * We must perform a read-before-write if the transfer size 1213 * does not cover the entire buffer. 1214 */ 1215 if (fs->fs_bsize > xfersize) 1216 flags |= BA_CLRBUF; 1217 else 1218 flags &= ~BA_CLRBUF; 1219 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 1220 ucred, flags, &bp); 1221 if (error != 0) 1222 break; 1223 /* 1224 * If the buffer is not valid we have to clear out any 1225 * garbage data from the pages instantiated for the buffer. 1226 * If we do not, a failed uiomove() during a write can leave 1227 * the prior contents of the pages exposed to a userland 1228 * mmap(). XXX deal with uiomove() errors a better way. 1229 */ 1230 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 1231 vfs_bio_clrbuf(bp); 1232 if (ioflag & IO_DIRECT) 1233 bp->b_flags |= B_DIRECT; 1234 if (ioflag & IO_NOWDRAIN) 1235 bp->b_flags |= B_NOWDRAIN; 1236 1237 if (uio->uio_offset + xfersize > dp->di_extsize) 1238 dp->di_extsize = uio->uio_offset + xfersize; 1239 1240 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid; 1241 if (size < xfersize) 1242 xfersize = size; 1243 1244 error = 1245 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 1246 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1247 (LIST_FIRST(&bp->b_dep) == NULL)) { 1248 bp->b_flags |= B_RELBUF; 1249 } 1250 1251 /* 1252 * If IO_SYNC each buffer is written synchronously. Otherwise 1253 * if we have a severe page deficiency write the buffer 1254 * asynchronously. Otherwise try to cluster, and if that 1255 * doesn't do it then either do an async write (if O_DIRECT), 1256 * or a delayed write (if not). 1257 */ 1258 if (ioflag & IO_SYNC) { 1259 (void)bwrite(bp); 1260 } else if (vm_page_count_severe() || 1261 buf_dirty_count_severe() || 1262 xfersize + blkoffset == fs->fs_bsize || 1263 (ioflag & (IO_ASYNC | IO_DIRECT))) 1264 bawrite(bp); 1265 else 1266 bdwrite(bp); 1267 if (error || xfersize == 0) 1268 break; 1269 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1270 } 1271 /* 1272 * If we successfully wrote any data, and we are not the superuser 1273 * we clear the setuid and setgid bits as a precaution against 1274 * tampering. 1275 */ 1276 if (resid > uio->uio_resid && ucred && 1277 suser_cred(ucred, PRISON_ROOT)) { 1278 ip->i_mode &= ~(ISUID | ISGID); 1279 dp->di_mode = ip->i_mode; 1280 } 1281 if (error) { 1282 if (ioflag & IO_UNIT) { 1283 (void)UFS_TRUNCATE(vp, osize, 1284 IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td); 1285 uio->uio_offset -= resid - uio->uio_resid; 1286 uio->uio_resid = resid; 1287 } 1288 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 1289 error = UFS_UPDATE(vp, 1); 1290 return (error); 1291 } 1292 1293 1294 /* 1295 * Vnode operating to retrieve a named extended attribute. 1296 * 1297 * Locate a particular EA (nspace:name) in the area (ptr:length), and return 1298 * the length of the EA, and possibly the pointer to the entry and to the data. 1299 */ 1300 static int 1301 ffs_findextattr(u_char *ptr, uint length, int nspace, const char *name, u_char **eap, u_char **eac) 1302 { 1303 u_char *p, *pe, *pn, *p0; 1304 int eapad1, eapad2, ealength, ealen, nlen; 1305 uint32_t ul; 1306 1307 pe = ptr + length; 1308 nlen = strlen(name); 1309 1310 for (p = ptr; p < pe; p = pn) { 1311 p0 = p; 1312 bcopy(p, &ul, sizeof(ul)); 1313 pn = p + ul; 1314 /* make sure this entry is complete */ 1315 if (pn > pe) 1316 break; 1317 p += sizeof(uint32_t); 1318 if (*p != nspace) 1319 continue; 1320 p++; 1321 eapad2 = *p++; 1322 if (*p != nlen) 1323 continue; 1324 p++; 1325 if (bcmp(p, name, nlen)) 1326 continue; 1327 ealength = sizeof(uint32_t) + 3 + nlen; 1328 eapad1 = 8 - (ealength % 8); 1329 if (eapad1 == 8) 1330 eapad1 = 0; 1331 ealength += eapad1; 1332 ealen = ul - ealength - eapad2; 1333 p += nlen + eapad1; 1334 if (eap != NULL) 1335 *eap = p0; 1336 if (eac != NULL) 1337 *eac = p; 1338 return (ealen); 1339 } 1340 return(0); 1341 } 1342 1343 static int 1344 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra) 1345 { 1346 struct inode *ip; 1347 struct fs *fs; 1348 struct ufs2_dinode *dp; 1349 struct uio luio; 1350 struct iovec liovec; 1351 int easize, error; 1352 u_char *eae; 1353 1354 ip = VTOI(vp); 1355 fs = ip->i_fs; 1356 dp = ip->i_din2; 1357 easize = dp->di_extsize; 1358 1359 eae = malloc(easize + extra, M_TEMP, M_WAITOK); 1360 1361 liovec.iov_base = eae; 1362 liovec.iov_len = easize; 1363 luio.uio_iov = &liovec; 1364 luio.uio_iovcnt = 1; 1365 luio.uio_offset = 0; 1366 luio.uio_resid = easize; 1367 luio.uio_segflg = UIO_SYSSPACE; 1368 luio.uio_rw = UIO_READ; 1369 luio.uio_td = td; 1370 1371 error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC); 1372 if (error) { 1373 free(eae, M_TEMP); 1374 return(error); 1375 } 1376 *p = eae; 1377 return (0); 1378 } 1379 1380 /* 1381 * Vnode operating to retrieve a named extended attribute. 1382 */ 1383 int 1384 ffs_getextattr(struct vop_getextattr_args *ap) 1385 /* 1386 vop_getextattr { 1387 IN struct vnode *a_vp; 1388 IN int a_attrnamespace; 1389 IN const char *a_name; 1390 INOUT struct uio *a_uio; 1391 OUT struct size_t *a_size; 1392 IN struct ucred *a_cred; 1393 IN struct thread *a_td; 1394 }; 1395 */ 1396 { 1397 struct inode *ip; 1398 struct fs *fs; 1399 u_char *eae, *p, *pe, *pn; 1400 struct ufs2_dinode *dp; 1401 unsigned easize; 1402 uint32_t ul; 1403 int error, ealen; 1404 1405 ip = VTOI(ap->a_vp); 1406 fs = ip->i_fs; 1407 1408 if (fs->fs_magic == FS_UFS1_MAGIC) 1409 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1410 1411 dp = ip->i_din2; 1412 error = ffs_rdextattr(&eae, ap->a_vp, ap->a_td, 0); 1413 if (error) 1414 return (error); 1415 easize = dp->di_extsize; 1416 if (strlen(ap->a_name) > 0) { 1417 error = ENOATTR; 1418 ealen = ffs_findextattr(eae, easize, 1419 ap->a_attrnamespace, ap->a_name, NULL, &p); 1420 if (ealen != 0) { 1421 error = 0; 1422 if (ap->a_size != NULL) 1423 *ap->a_size = ealen; 1424 else if (ap->a_uio != NULL) 1425 error = uiomove(p, ealen, ap->a_uio); 1426 } 1427 } else { 1428 error = 0; 1429 if (ap->a_size != NULL) 1430 *ap->a_size = 0; 1431 pe = eae + easize; 1432 for(p = eae; error == 0 && p < pe; p = pn) { 1433 bcopy(p, &ul, sizeof(ul)); 1434 pn = p + ul; 1435 if (pn > pe) 1436 break; 1437 p += sizeof(ul); 1438 if (*p++ != ap->a_attrnamespace) 1439 continue; 1440 p++; /* pad2 */ 1441 ealen = *p; 1442 if (ap->a_size != NULL) { 1443 *ap->a_size += ealen + 1; 1444 } else if (ap->a_uio != NULL) { 1445 error = uiomove(p, ealen + 1, ap->a_uio); 1446 } 1447 } 1448 } 1449 free(eae, M_TEMP); 1450 return(error); 1451 } 1452 1453 /* 1454 * Vnode operation to set a named attribute. 1455 */ 1456 int 1457 ffs_setextattr(struct vop_setextattr_args *ap) 1458 /* 1459 vop_setextattr { 1460 IN struct vnode *a_vp; 1461 IN int a_attrnamespace; 1462 IN const char *a_name; 1463 INOUT struct uio *a_uio; 1464 IN struct ucred *a_cred; 1465 IN struct thread *a_td; 1466 }; 1467 */ 1468 { 1469 struct inode *ip; 1470 struct fs *fs; 1471 uint32_t ealength, ul; 1472 int ealen, eacont, eapad1, eapad2, error, i, easize; 1473 u_char *eae, *p; 1474 struct uio luio; 1475 struct iovec liovec; 1476 struct ufs2_dinode *dp; 1477 struct ucred *cred; 1478 1479 ip = VTOI(ap->a_vp); 1480 fs = ip->i_fs; 1481 1482 if (fs->fs_magic == FS_UFS1_MAGIC) 1483 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1484 1485 if (ap->a_cred != NOCRED) 1486 cred = ap->a_cred; 1487 else 1488 cred = ap->a_vp->v_mount->mnt_cred; 1489 dp = ip->i_din2; 1490 1491 /* Calculate the length of the EA entry */ 1492 if (ap->a_uio == NULL || ap->a_uio->uio_resid == 0) { 1493 /* delete */ 1494 ealength = eapad1 = ealen = eapad2 = eacont = 0; 1495 } else { 1496 ealen = ap->a_uio->uio_resid; 1497 ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name); 1498 eapad1 = 8 - (ealength % 8); 1499 if (eapad1 == 8) 1500 eapad1 = 0; 1501 eacont = ealength + eapad1; 1502 eapad2 = 8 - (ealen % 8); 1503 if (eapad2 == 8) 1504 eapad2 = 0; 1505 ealength += eapad1 + ealen + eapad2; 1506 } 1507 1508 error = ffs_rdextattr(&eae, ap->a_vp, ap->a_td, ealength); 1509 if (error) 1510 return (error); 1511 1512 easize = dp->di_extsize; 1513 ul = ffs_findextattr(eae, easize, 1514 ap->a_attrnamespace, ap->a_name, &p, NULL); 1515 if (ul == 0 && ealength == 0) { 1516 /* delete but nonexistent */ 1517 free(eae, M_TEMP); 1518 return(ENOATTR); 1519 } else if (ul == 0) { 1520 /* new, append at end */ 1521 p = eae + easize; 1522 easize += ealength; 1523 } else if (ul != ealength) { 1524 bcopy(p, &ul, sizeof ul); 1525 i = p - eae + ul; 1526 bcopy(p + ul, p + ealength, easize - i); 1527 easize -= ul; 1528 easize += ealength; 1529 } 1530 if (easize > NXADDR * fs->fs_bsize) { 1531 free(eae, M_TEMP); 1532 return(ENOSPC); 1533 } 1534 if (ealength != 0) { 1535 bcopy(&ealength, p, sizeof(ealength)); 1536 p += sizeof(ealength); 1537 *p++ = ap->a_attrnamespace; 1538 *p++ = eapad2; 1539 *p++ = strlen(ap->a_name); 1540 strcpy(p, ap->a_name); 1541 p += strlen(ap->a_name); 1542 bzero(p, eapad1); 1543 p += eapad1; 1544 error = uiomove(p, ealen, ap->a_uio); 1545 if (error) { 1546 free(eae, M_TEMP); 1547 return(error); 1548 } 1549 p += ealen; 1550 bzero(p, eapad2); 1551 } 1552 liovec.iov_base = eae; 1553 liovec.iov_len = easize; 1554 luio.uio_iov = &liovec; 1555 luio.uio_iovcnt = 1; 1556 luio.uio_offset = 0; 1557 luio.uio_resid = easize; 1558 luio.uio_segflg = UIO_SYSSPACE; 1559 luio.uio_rw = UIO_WRITE; 1560 luio.uio_td = ap->a_td; 1561 /* XXX: I'm not happy about truncating to zero size */ 1562 if (easize < dp->di_extsize) 1563 error = ffs_truncate(ap->a_vp, 0, IO_EXT, cred, ap->a_td); 1564 error = ffs_extwrite(ap->a_vp, &luio, IO_EXT | IO_SYNC, cred); 1565 free(eae, M_TEMP); 1566 if (error) 1567 return(error); 1568 return(error); 1569 } 1570