1 /* 2 * Copyright (c) 2002, 2003 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * from: @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 60 * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ... 61 * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __FBSDID("$FreeBSD$"); 66 67 #include <sys/param.h> 68 #include <sys/bio.h> 69 #include <sys/systm.h> 70 #include <sys/buf.h> 71 #include <sys/conf.h> 72 #include <sys/extattr.h> 73 #include <sys/kernel.h> 74 #include <sys/limits.h> 75 #include <sys/malloc.h> 76 #include <sys/mount.h> 77 #include <sys/proc.h> 78 #include <sys/resourcevar.h> 79 #include <sys/signalvar.h> 80 #include <sys/stat.h> 81 #include <sys/vmmeter.h> 82 #include <sys/vnode.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_extern.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/vm_pager.h> 89 #include <vm/vnode_pager.h> 90 91 #include <ufs/ufs/extattr.h> 92 #include <ufs/ufs/quota.h> 93 #include <ufs/ufs/inode.h> 94 #include <ufs/ufs/ufs_extern.h> 95 #include <ufs/ufs/ufsmount.h> 96 97 #include <ufs/ffs/fs.h> 98 #include <ufs/ffs/ffs_extern.h> 99 #include "opt_directio.h" 100 101 #ifdef DIRECTIO 102 extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone); 103 #endif 104 static int ffs_fsync(struct vop_fsync_args *); 105 static int ffs_getpages(struct vop_getpages_args *); 106 static int ffs_read(struct vop_read_args *); 107 static int ffs_write(struct vop_write_args *); 108 static int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag); 109 static int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, 110 struct ucred *cred); 111 static int ffsext_strategy(struct vop_strategy_args *); 112 static int ffs_closeextattr(struct vop_closeextattr_args *); 113 static int ffs_deleteextattr(struct vop_deleteextattr_args *); 114 static int ffs_getextattr(struct vop_getextattr_args *); 115 static int ffs_listextattr(struct vop_listextattr_args *); 116 static int ffs_openextattr(struct vop_openextattr_args *); 117 static int ffs_setextattr(struct vop_setextattr_args *); 118 119 120 /* Global vfs data structures for ufs. */ 121 vop_t **ffs_vnodeop_p; 122 static struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { 123 { &vop_default_desc, (vop_t *) ufs_vnoperate }, 124 { &vop_fsync_desc, (vop_t *) ffs_fsync }, 125 { &vop_getpages_desc, (vop_t *) ffs_getpages }, 126 { &vop_read_desc, (vop_t *) ffs_read }, 127 { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks }, 128 { &vop_write_desc, (vop_t *) ffs_write }, 129 { &vop_closeextattr_desc, (vop_t *) ffs_closeextattr }, 130 { &vop_deleteextattr_desc, (vop_t *) ffs_deleteextattr }, 131 { &vop_getextattr_desc, (vop_t *) ffs_getextattr }, 132 { &vop_listextattr_desc, (vop_t *) ffs_listextattr }, 133 { &vop_openextattr_desc, (vop_t *) ffs_openextattr }, 134 { &vop_setextattr_desc, (vop_t *) ffs_setextattr }, 135 { NULL, NULL } 136 }; 137 static struct vnodeopv_desc ffs_vnodeop_opv_desc = 138 { &ffs_vnodeop_p, ffs_vnodeop_entries }; 139 140 vop_t **ffs_fifoop_p; 141 static struct vnodeopv_entry_desc ffs_fifoop_entries[] = { 142 { &vop_default_desc, (vop_t *) ufs_vnoperatefifo }, 143 { &vop_fsync_desc, (vop_t *) ffs_fsync }, 144 { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks }, 145 { &vop_strategy_desc, (vop_t *) ffsext_strategy }, 146 { &vop_closeextattr_desc, (vop_t *) ffs_closeextattr }, 147 { &vop_deleteextattr_desc, (vop_t *) ffs_deleteextattr }, 148 { &vop_getextattr_desc, (vop_t *) ffs_getextattr }, 149 { &vop_listextattr_desc, (vop_t *) ffs_listextattr }, 150 { &vop_openextattr_desc, (vop_t *) ffs_openextattr }, 151 { &vop_setextattr_desc, (vop_t *) ffs_setextattr }, 152 { NULL, NULL } 153 }; 154 static struct vnodeopv_desc ffs_fifoop_opv_desc = 155 { &ffs_fifoop_p, ffs_fifoop_entries }; 156 157 VNODEOP_SET(ffs_vnodeop_opv_desc); 158 VNODEOP_SET(ffs_fifoop_opv_desc); 159 160 /* 161 * Synch an open file. 162 */ 163 /* ARGSUSED */ 164 static int 165 ffs_fsync(ap) 166 struct vop_fsync_args /* { 167 struct vnode *a_vp; 168 struct ucred *a_cred; 169 int a_waitfor; 170 struct thread *a_td; 171 } */ *ap; 172 { 173 struct vnode *vp = ap->a_vp; 174 struct inode *ip = VTOI(vp); 175 struct buf *bp; 176 struct buf *nbp; 177 int s, error, wait, passes, skipmeta; 178 ufs_lbn_t lbn; 179 180 wait = (ap->a_waitfor == MNT_WAIT); 181 if (vn_isdisk(vp, NULL)) { 182 lbn = INT_MAX; 183 if (vp->v_rdev->si_mountpoint != NULL && 184 (vp->v_rdev->si_mountpoint->mnt_flag & MNT_SOFTDEP)) 185 softdep_fsync_mountdev(vp); 186 } else { 187 lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); 188 } 189 190 /* 191 * Flush all dirty buffers associated with a vnode. 192 */ 193 passes = NIADDR + 1; 194 skipmeta = 0; 195 if (wait) 196 skipmeta = 1; 197 s = splbio(); 198 VI_LOCK(vp); 199 loop: 200 TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_bobufs) 201 bp->b_vflags &= ~BV_SCANNED; 202 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 203 nbp = TAILQ_NEXT(bp, b_bobufs); 204 /* 205 * Reasons to skip this buffer: it has already been considered 206 * on this pass, this pass is the first time through on a 207 * synchronous flush request and the buffer being considered 208 * is metadata, the buffer has dependencies that will cause 209 * it to be redirtied and it has not already been deferred, 210 * or it is already being written. 211 */ 212 if ((bp->b_vflags & BV_SCANNED) != 0) 213 continue; 214 bp->b_vflags |= BV_SCANNED; 215 if ((skipmeta == 1 && bp->b_lblkno < 0)) 216 continue; 217 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) 218 continue; 219 if (!wait && LIST_FIRST(&bp->b_dep) != NULL && 220 (bp->b_flags & B_DEFERRED) == 0 && 221 buf_countdeps(bp, 0)) { 222 bp->b_flags |= B_DEFERRED; 223 BUF_UNLOCK(bp); 224 continue; 225 } 226 VI_UNLOCK(vp); 227 if ((bp->b_flags & B_DELWRI) == 0) 228 panic("ffs_fsync: not dirty"); 229 if (vp != bp->b_vp) 230 panic("ffs_fsync: vp != vp->b_vp"); 231 /* 232 * If this is a synchronous flush request, or it is not a 233 * file or device, start the write on this buffer immediatly. 234 */ 235 if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) { 236 237 /* 238 * On our final pass through, do all I/O synchronously 239 * so that we can find out if our flush is failing 240 * because of write errors. 241 */ 242 if (passes > 0 || !wait) { 243 if ((bp->b_flags & B_CLUSTEROK) && !wait) { 244 (void) vfs_bio_awrite(bp); 245 } else { 246 bremfree(bp); 247 splx(s); 248 (void) bawrite(bp); 249 s = splbio(); 250 } 251 } else { 252 bremfree(bp); 253 splx(s); 254 if ((error = bwrite(bp)) != 0) 255 return (error); 256 s = splbio(); 257 } 258 } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) { 259 /* 260 * If the buffer is for data that has been truncated 261 * off the file, then throw it away. 262 */ 263 bremfree(bp); 264 bp->b_flags |= B_INVAL | B_NOCACHE; 265 splx(s); 266 brelse(bp); 267 s = splbio(); 268 } else 269 vfs_bio_awrite(bp); 270 271 /* 272 * Since we may have slept during the I/O, we need 273 * to start from a known point. 274 */ 275 VI_LOCK(vp); 276 nbp = TAILQ_FIRST(&vp->v_dirtyblkhd); 277 } 278 /* 279 * If we were asked to do this synchronously, then go back for 280 * another pass, this time doing the metadata. 281 */ 282 if (skipmeta) { 283 skipmeta = 0; 284 goto loop; 285 } 286 287 if (wait) { 288 bufobj_wwait(&vp->v_bufobj, 3, 0); 289 VI_UNLOCK(vp); 290 291 /* 292 * Ensure that any filesystem metatdata associated 293 * with the vnode has been written. 294 */ 295 splx(s); 296 if ((error = softdep_sync_metadata(ap)) != 0) 297 return (error); 298 s = splbio(); 299 300 VI_LOCK(vp); 301 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 302 /* 303 * Block devices associated with filesystems may 304 * have new I/O requests posted for them even if 305 * the vnode is locked, so no amount of trying will 306 * get them clean. Thus we give block devices a 307 * good effort, then just give up. For all other file 308 * types, go around and try again until it is clean. 309 */ 310 if (passes > 0) { 311 passes -= 1; 312 goto loop; 313 } 314 #ifdef DIAGNOSTIC 315 if (!vn_isdisk(vp, NULL)) 316 vprint("ffs_fsync: dirty", vp); 317 #endif 318 } 319 } 320 VI_UNLOCK(vp); 321 splx(s); 322 return (UFS_UPDATE(vp, wait)); 323 } 324 325 326 /* 327 * Vnode op for reading. 328 */ 329 /* ARGSUSED */ 330 static int 331 ffs_read(ap) 332 struct vop_read_args /* { 333 struct vnode *a_vp; 334 struct uio *a_uio; 335 int a_ioflag; 336 struct ucred *a_cred; 337 } */ *ap; 338 { 339 struct vnode *vp; 340 struct inode *ip; 341 struct uio *uio; 342 struct fs *fs; 343 struct buf *bp; 344 ufs_lbn_t lbn, nextlbn; 345 off_t bytesinfile; 346 long size, xfersize, blkoffset; 347 int error, orig_resid; 348 int seqcount; 349 int ioflag; 350 351 vp = ap->a_vp; 352 uio = ap->a_uio; 353 ioflag = ap->a_ioflag; 354 if (ap->a_ioflag & IO_EXT) 355 #ifdef notyet 356 return (ffs_extread(vp, uio, ioflag)); 357 #else 358 panic("ffs_read+IO_EXT"); 359 #endif 360 #ifdef DIRECTIO 361 if ((ioflag & IO_DIRECT) != 0) { 362 int workdone; 363 364 error = ffs_rawread(vp, uio, &workdone); 365 if (error != 0 || workdone != 0) 366 return error; 367 } 368 #endif 369 370 GIANT_REQUIRED; 371 372 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 373 ip = VTOI(vp); 374 375 #ifdef DIAGNOSTIC 376 if (uio->uio_rw != UIO_READ) 377 panic("ffs_read: mode"); 378 379 if (vp->v_type == VLNK) { 380 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 381 panic("ffs_read: short symlink"); 382 } else if (vp->v_type != VREG && vp->v_type != VDIR) 383 panic("ffs_read: type %d", vp->v_type); 384 #endif 385 orig_resid = uio->uio_resid; 386 KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0")); 387 if (orig_resid == 0) 388 return (0); 389 KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0")); 390 fs = ip->i_fs; 391 if (uio->uio_offset < ip->i_size && 392 uio->uio_offset >= fs->fs_maxfilesize) 393 return (EOVERFLOW); 394 395 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 396 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 397 break; 398 lbn = lblkno(fs, uio->uio_offset); 399 nextlbn = lbn + 1; 400 401 /* 402 * size of buffer. The buffer representing the 403 * end of the file is rounded up to the size of 404 * the block type ( fragment or full block, 405 * depending ). 406 */ 407 size = blksize(fs, ip, lbn); 408 blkoffset = blkoff(fs, uio->uio_offset); 409 410 /* 411 * The amount we want to transfer in this iteration is 412 * one FS block less the amount of the data before 413 * our startpoint (duh!) 414 */ 415 xfersize = fs->fs_bsize - blkoffset; 416 417 /* 418 * But if we actually want less than the block, 419 * or the file doesn't have a whole block more of data, 420 * then use the lesser number. 421 */ 422 if (uio->uio_resid < xfersize) 423 xfersize = uio->uio_resid; 424 if (bytesinfile < xfersize) 425 xfersize = bytesinfile; 426 427 if (lblktosize(fs, nextlbn) >= ip->i_size) { 428 /* 429 * Don't do readahead if this is the end of the file. 430 */ 431 error = bread(vp, lbn, size, NOCRED, &bp); 432 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 433 /* 434 * Otherwise if we are allowed to cluster, 435 * grab as much as we can. 436 * 437 * XXX This may not be a win if we are not 438 * doing sequential access. 439 */ 440 error = cluster_read(vp, ip->i_size, lbn, 441 size, NOCRED, uio->uio_resid, seqcount, &bp); 442 } else if (seqcount > 1) { 443 /* 444 * If we are NOT allowed to cluster, then 445 * if we appear to be acting sequentially, 446 * fire off a request for a readahead 447 * as well as a read. Note that the 4th and 5th 448 * arguments point to arrays of the size specified in 449 * the 6th argument. 450 */ 451 int nextsize = blksize(fs, ip, nextlbn); 452 error = breadn(vp, lbn, 453 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 454 } else { 455 /* 456 * Failing all of the above, just read what the 457 * user asked for. Interestingly, the same as 458 * the first option above. 459 */ 460 error = bread(vp, lbn, size, NOCRED, &bp); 461 } 462 if (error) { 463 brelse(bp); 464 bp = NULL; 465 break; 466 } 467 468 /* 469 * If IO_DIRECT then set B_DIRECT for the buffer. This 470 * will cause us to attempt to release the buffer later on 471 * and will cause the buffer cache to attempt to free the 472 * underlying pages. 473 */ 474 if (ioflag & IO_DIRECT) 475 bp->b_flags |= B_DIRECT; 476 477 /* 478 * We should only get non-zero b_resid when an I/O error 479 * has occurred, which should cause us to break above. 480 * However, if the short read did not cause an error, 481 * then we want to ensure that we do not uiomove bad 482 * or uninitialized data. 483 */ 484 size -= bp->b_resid; 485 if (size < xfersize) { 486 if (size == 0) 487 break; 488 xfersize = size; 489 } 490 491 error = uiomove((char *)bp->b_data + blkoffset, 492 (int)xfersize, uio); 493 if (error) 494 break; 495 496 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 497 (LIST_FIRST(&bp->b_dep) == NULL)) { 498 /* 499 * If there are no dependencies, and it's VMIO, 500 * then we don't need the buf, mark it available 501 * for freeing. The VM has the data. 502 */ 503 bp->b_flags |= B_RELBUF; 504 brelse(bp); 505 } else { 506 /* 507 * Otherwise let whoever 508 * made the request take care of 509 * freeing it. We just queue 510 * it onto another list. 511 */ 512 bqrelse(bp); 513 } 514 } 515 516 /* 517 * This can only happen in the case of an error 518 * because the loop above resets bp to NULL on each iteration 519 * and on normal completion has not set a new value into it. 520 * so it must have come from a 'break' statement 521 */ 522 if (bp != NULL) { 523 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 524 (LIST_FIRST(&bp->b_dep) == NULL)) { 525 bp->b_flags |= B_RELBUF; 526 brelse(bp); 527 } else { 528 bqrelse(bp); 529 } 530 } 531 532 if ((error == 0 || uio->uio_resid != orig_resid) && 533 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 534 ip->i_flag |= IN_ACCESS; 535 return (error); 536 } 537 538 /* 539 * Vnode op for writing. 540 */ 541 static int 542 ffs_write(ap) 543 struct vop_write_args /* { 544 struct vnode *a_vp; 545 struct uio *a_uio; 546 int a_ioflag; 547 struct ucred *a_cred; 548 } */ *ap; 549 { 550 struct vnode *vp; 551 struct uio *uio; 552 struct inode *ip; 553 struct fs *fs; 554 struct buf *bp; 555 struct thread *td; 556 ufs_lbn_t lbn; 557 off_t osize; 558 int seqcount; 559 int blkoffset, error, extended, flags, ioflag, resid, size, xfersize; 560 561 vp = ap->a_vp; 562 uio = ap->a_uio; 563 ioflag = ap->a_ioflag; 564 if (ap->a_ioflag & IO_EXT) 565 #ifdef notyet 566 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred)); 567 #else 568 panic("ffs_write+IO_EXT"); 569 #endif 570 571 GIANT_REQUIRED; 572 573 extended = 0; 574 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 575 ip = VTOI(vp); 576 577 #ifdef DIAGNOSTIC 578 if (uio->uio_rw != UIO_WRITE) 579 panic("ffs_write: mode"); 580 #endif 581 582 switch (vp->v_type) { 583 case VREG: 584 if (ioflag & IO_APPEND) 585 uio->uio_offset = ip->i_size; 586 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 587 return (EPERM); 588 /* FALLTHROUGH */ 589 case VLNK: 590 break; 591 case VDIR: 592 panic("ffs_write: dir write"); 593 break; 594 default: 595 panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type, 596 (int)uio->uio_offset, 597 (int)uio->uio_resid 598 ); 599 } 600 601 KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0")); 602 KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0")); 603 fs = ip->i_fs; 604 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) 605 return (EFBIG); 606 /* 607 * Maybe this should be above the vnode op call, but so long as 608 * file servers have no limits, I don't think it matters. 609 */ 610 td = uio->uio_td; 611 if (vp->v_type == VREG && td != NULL) { 612 PROC_LOCK(td->td_proc); 613 if (uio->uio_offset + uio->uio_resid > 614 lim_cur(td->td_proc, RLIMIT_FSIZE)) { 615 psignal(td->td_proc, SIGXFSZ); 616 PROC_UNLOCK(td->td_proc); 617 return (EFBIG); 618 } 619 PROC_UNLOCK(td->td_proc); 620 } 621 622 resid = uio->uio_resid; 623 osize = ip->i_size; 624 if (seqcount > BA_SEQMAX) 625 flags = BA_SEQMAX << BA_SEQSHIFT; 626 else 627 flags = seqcount << BA_SEQSHIFT; 628 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 629 flags |= IO_SYNC; 630 631 for (error = 0; uio->uio_resid > 0;) { 632 lbn = lblkno(fs, uio->uio_offset); 633 blkoffset = blkoff(fs, uio->uio_offset); 634 xfersize = fs->fs_bsize - blkoffset; 635 if (uio->uio_resid < xfersize) 636 xfersize = uio->uio_resid; 637 if (uio->uio_offset + xfersize > ip->i_size) 638 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 639 640 /* 641 * We must perform a read-before-write if the transfer size 642 * does not cover the entire buffer. 643 */ 644 if (fs->fs_bsize > xfersize) 645 flags |= BA_CLRBUF; 646 else 647 flags &= ~BA_CLRBUF; 648 /* XXX is uio->uio_offset the right thing here? */ 649 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 650 ap->a_cred, flags, &bp); 651 if (error != 0) 652 break; 653 /* 654 * If the buffer is not valid we have to clear out any 655 * garbage data from the pages instantiated for the buffer. 656 * If we do not, a failed uiomove() during a write can leave 657 * the prior contents of the pages exposed to a userland 658 * mmap(). XXX deal with uiomove() errors a better way. 659 */ 660 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 661 vfs_bio_clrbuf(bp); 662 if (ioflag & IO_DIRECT) 663 bp->b_flags |= B_DIRECT; 664 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL)) 665 bp->b_flags |= B_NOCACHE; 666 667 if (uio->uio_offset + xfersize > ip->i_size) { 668 ip->i_size = uio->uio_offset + xfersize; 669 DIP_SET(ip, i_size, ip->i_size); 670 extended = 1; 671 } 672 673 size = blksize(fs, ip, lbn) - bp->b_resid; 674 if (size < xfersize) 675 xfersize = size; 676 677 error = 678 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 679 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 680 (LIST_FIRST(&bp->b_dep) == NULL)) { 681 bp->b_flags |= B_RELBUF; 682 } 683 684 /* 685 * If IO_SYNC each buffer is written synchronously. Otherwise 686 * if we have a severe page deficiency write the buffer 687 * asynchronously. Otherwise try to cluster, and if that 688 * doesn't do it then either do an async write (if O_DIRECT), 689 * or a delayed write (if not). 690 */ 691 if (ioflag & IO_SYNC) { 692 (void)bwrite(bp); 693 } else if (vm_page_count_severe() || 694 buf_dirty_count_severe() || 695 (ioflag & IO_ASYNC)) { 696 bp->b_flags |= B_CLUSTEROK; 697 bawrite(bp); 698 } else if (xfersize + blkoffset == fs->fs_bsize) { 699 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 700 bp->b_flags |= B_CLUSTEROK; 701 cluster_write(vp, bp, ip->i_size, seqcount); 702 } else { 703 bawrite(bp); 704 } 705 } else if (ioflag & IO_DIRECT) { 706 bp->b_flags |= B_CLUSTEROK; 707 bawrite(bp); 708 } else { 709 bp->b_flags |= B_CLUSTEROK; 710 bdwrite(bp); 711 } 712 if (error || xfersize == 0) 713 break; 714 ip->i_flag |= IN_CHANGE | IN_UPDATE; 715 } 716 /* 717 * If we successfully wrote any data, and we are not the superuser 718 * we clear the setuid and setgid bits as a precaution against 719 * tampering. 720 */ 721 if (resid > uio->uio_resid && ap->a_cred && 722 suser_cred(ap->a_cred, SUSER_ALLOWJAIL)) { 723 ip->i_mode &= ~(ISUID | ISGID); 724 DIP_SET(ip, i_mode, ip->i_mode); 725 } 726 if (resid > uio->uio_resid) 727 VN_KNOTE_UNLOCKED(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); 728 if (error) { 729 if (ioflag & IO_UNIT) { 730 (void)UFS_TRUNCATE(vp, osize, 731 IO_NORMAL | (ioflag & IO_SYNC), 732 ap->a_cred, uio->uio_td); 733 uio->uio_offset -= resid - uio->uio_resid; 734 uio->uio_resid = resid; 735 } 736 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 737 error = UFS_UPDATE(vp, 1); 738 return (error); 739 } 740 741 /* 742 * get page routine 743 */ 744 static int 745 ffs_getpages(ap) 746 struct vop_getpages_args *ap; 747 { 748 int i; 749 vm_page_t mreq; 750 int pcount; 751 752 GIANT_REQUIRED; 753 754 pcount = round_page(ap->a_count) / PAGE_SIZE; 755 mreq = ap->a_m[ap->a_reqpage]; 756 757 /* 758 * if ANY DEV_BSIZE blocks are valid on a large filesystem block, 759 * then the entire page is valid. Since the page may be mapped, 760 * user programs might reference data beyond the actual end of file 761 * occuring within the page. We have to zero that data. 762 */ 763 VM_OBJECT_LOCK(mreq->object); 764 if (mreq->valid) { 765 if (mreq->valid != VM_PAGE_BITS_ALL) 766 vm_page_zero_invalid(mreq, TRUE); 767 vm_page_lock_queues(); 768 for (i = 0; i < pcount; i++) { 769 if (i != ap->a_reqpage) { 770 vm_page_free(ap->a_m[i]); 771 } 772 } 773 vm_page_unlock_queues(); 774 VM_OBJECT_UNLOCK(mreq->object); 775 return VM_PAGER_OK; 776 } 777 VM_OBJECT_UNLOCK(mreq->object); 778 779 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 780 ap->a_count, 781 ap->a_reqpage); 782 } 783 784 785 /* 786 * Extended attribute area reading. 787 */ 788 static int 789 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag) 790 { 791 struct inode *ip; 792 struct ufs2_dinode *dp; 793 struct fs *fs; 794 struct buf *bp; 795 ufs_lbn_t lbn, nextlbn; 796 off_t bytesinfile; 797 long size, xfersize, blkoffset; 798 int error, orig_resid; 799 800 GIANT_REQUIRED; 801 802 ip = VTOI(vp); 803 fs = ip->i_fs; 804 dp = ip->i_din2; 805 806 #ifdef DIAGNOSTIC 807 if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC) 808 panic("ffs_extread: mode"); 809 810 #endif 811 orig_resid = uio->uio_resid; 812 KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0")); 813 if (orig_resid == 0) 814 return (0); 815 KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0")); 816 817 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 818 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0) 819 break; 820 lbn = lblkno(fs, uio->uio_offset); 821 nextlbn = lbn + 1; 822 823 /* 824 * size of buffer. The buffer representing the 825 * end of the file is rounded up to the size of 826 * the block type ( fragment or full block, 827 * depending ). 828 */ 829 size = sblksize(fs, dp->di_extsize, lbn); 830 blkoffset = blkoff(fs, uio->uio_offset); 831 832 /* 833 * The amount we want to transfer in this iteration is 834 * one FS block less the amount of the data before 835 * our startpoint (duh!) 836 */ 837 xfersize = fs->fs_bsize - blkoffset; 838 839 /* 840 * But if we actually want less than the block, 841 * or the file doesn't have a whole block more of data, 842 * then use the lesser number. 843 */ 844 if (uio->uio_resid < xfersize) 845 xfersize = uio->uio_resid; 846 if (bytesinfile < xfersize) 847 xfersize = bytesinfile; 848 849 if (lblktosize(fs, nextlbn) >= dp->di_extsize) { 850 /* 851 * Don't do readahead if this is the end of the info. 852 */ 853 error = bread(vp, -1 - lbn, size, NOCRED, &bp); 854 } else { 855 /* 856 * If we have a second block, then 857 * fire off a request for a readahead 858 * as well as a read. Note that the 4th and 5th 859 * arguments point to arrays of the size specified in 860 * the 6th argument. 861 */ 862 int nextsize = sblksize(fs, dp->di_extsize, nextlbn); 863 864 nextlbn = -1 - nextlbn; 865 error = breadn(vp, -1 - lbn, 866 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 867 } 868 if (error) { 869 brelse(bp); 870 bp = NULL; 871 break; 872 } 873 874 /* 875 * If IO_DIRECT then set B_DIRECT for the buffer. This 876 * will cause us to attempt to release the buffer later on 877 * and will cause the buffer cache to attempt to free the 878 * underlying pages. 879 */ 880 if (ioflag & IO_DIRECT) 881 bp->b_flags |= B_DIRECT; 882 883 /* 884 * We should only get non-zero b_resid when an I/O error 885 * has occurred, which should cause us to break above. 886 * However, if the short read did not cause an error, 887 * then we want to ensure that we do not uiomove bad 888 * or uninitialized data. 889 */ 890 size -= bp->b_resid; 891 if (size < xfersize) { 892 if (size == 0) 893 break; 894 xfersize = size; 895 } 896 897 error = uiomove((char *)bp->b_data + blkoffset, 898 (int)xfersize, uio); 899 if (error) 900 break; 901 902 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 903 (LIST_FIRST(&bp->b_dep) == NULL)) { 904 /* 905 * If there are no dependencies, and it's VMIO, 906 * then we don't need the buf, mark it available 907 * for freeing. The VM has the data. 908 */ 909 bp->b_flags |= B_RELBUF; 910 brelse(bp); 911 } else { 912 /* 913 * Otherwise let whoever 914 * made the request take care of 915 * freeing it. We just queue 916 * it onto another list. 917 */ 918 bqrelse(bp); 919 } 920 } 921 922 /* 923 * This can only happen in the case of an error 924 * because the loop above resets bp to NULL on each iteration 925 * and on normal completion has not set a new value into it. 926 * so it must have come from a 'break' statement 927 */ 928 if (bp != NULL) { 929 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 930 (LIST_FIRST(&bp->b_dep) == NULL)) { 931 bp->b_flags |= B_RELBUF; 932 brelse(bp); 933 } else { 934 bqrelse(bp); 935 } 936 } 937 938 if ((error == 0 || uio->uio_resid != orig_resid) && 939 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 940 ip->i_flag |= IN_ACCESS; 941 return (error); 942 } 943 944 /* 945 * Extended attribute area writing. 946 */ 947 static int 948 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred) 949 { 950 struct inode *ip; 951 struct ufs2_dinode *dp; 952 struct fs *fs; 953 struct buf *bp; 954 ufs_lbn_t lbn; 955 off_t osize; 956 int blkoffset, error, flags, resid, size, xfersize; 957 958 GIANT_REQUIRED; 959 960 ip = VTOI(vp); 961 fs = ip->i_fs; 962 dp = ip->i_din2; 963 964 #ifdef DIAGNOSTIC 965 if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC) 966 panic("ffs_extwrite: mode"); 967 #endif 968 969 if (ioflag & IO_APPEND) 970 uio->uio_offset = dp->di_extsize; 971 KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0")); 972 KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0")); 973 if ((uoff_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize) 974 return (EFBIG); 975 976 resid = uio->uio_resid; 977 osize = dp->di_extsize; 978 flags = IO_EXT; 979 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 980 flags |= IO_SYNC; 981 982 for (error = 0; uio->uio_resid > 0;) { 983 lbn = lblkno(fs, uio->uio_offset); 984 blkoffset = blkoff(fs, uio->uio_offset); 985 xfersize = fs->fs_bsize - blkoffset; 986 if (uio->uio_resid < xfersize) 987 xfersize = uio->uio_resid; 988 989 /* 990 * We must perform a read-before-write if the transfer size 991 * does not cover the entire buffer. 992 */ 993 if (fs->fs_bsize > xfersize) 994 flags |= BA_CLRBUF; 995 else 996 flags &= ~BA_CLRBUF; 997 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 998 ucred, flags, &bp); 999 if (error != 0) 1000 break; 1001 /* 1002 * If the buffer is not valid we have to clear out any 1003 * garbage data from the pages instantiated for the buffer. 1004 * If we do not, a failed uiomove() during a write can leave 1005 * the prior contents of the pages exposed to a userland 1006 * mmap(). XXX deal with uiomove() errors a better way. 1007 */ 1008 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 1009 vfs_bio_clrbuf(bp); 1010 if (ioflag & IO_DIRECT) 1011 bp->b_flags |= B_DIRECT; 1012 1013 if (uio->uio_offset + xfersize > dp->di_extsize) 1014 dp->di_extsize = uio->uio_offset + xfersize; 1015 1016 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid; 1017 if (size < xfersize) 1018 xfersize = size; 1019 1020 error = 1021 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 1022 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1023 (LIST_FIRST(&bp->b_dep) == NULL)) { 1024 bp->b_flags |= B_RELBUF; 1025 } 1026 1027 /* 1028 * If IO_SYNC each buffer is written synchronously. Otherwise 1029 * if we have a severe page deficiency write the buffer 1030 * asynchronously. Otherwise try to cluster, and if that 1031 * doesn't do it then either do an async write (if O_DIRECT), 1032 * or a delayed write (if not). 1033 */ 1034 if (ioflag & IO_SYNC) { 1035 (void)bwrite(bp); 1036 } else if (vm_page_count_severe() || 1037 buf_dirty_count_severe() || 1038 xfersize + blkoffset == fs->fs_bsize || 1039 (ioflag & (IO_ASYNC | IO_DIRECT))) 1040 bawrite(bp); 1041 else 1042 bdwrite(bp); 1043 if (error || xfersize == 0) 1044 break; 1045 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1046 } 1047 /* 1048 * If we successfully wrote any data, and we are not the superuser 1049 * we clear the setuid and setgid bits as a precaution against 1050 * tampering. 1051 */ 1052 if (resid > uio->uio_resid && ucred && 1053 suser_cred(ucred, SUSER_ALLOWJAIL)) { 1054 ip->i_mode &= ~(ISUID | ISGID); 1055 dp->di_mode = ip->i_mode; 1056 } 1057 if (error) { 1058 if (ioflag & IO_UNIT) { 1059 (void)UFS_TRUNCATE(vp, osize, 1060 IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td); 1061 uio->uio_offset -= resid - uio->uio_resid; 1062 uio->uio_resid = resid; 1063 } 1064 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 1065 error = UFS_UPDATE(vp, 1); 1066 return (error); 1067 } 1068 1069 1070 /* 1071 * Vnode operating to retrieve a named extended attribute. 1072 * 1073 * Locate a particular EA (nspace:name) in the area (ptr:length), and return 1074 * the length of the EA, and possibly the pointer to the entry and to the data. 1075 */ 1076 static int 1077 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name, u_char **eap, u_char **eac) 1078 { 1079 u_char *p, *pe, *pn, *p0; 1080 int eapad1, eapad2, ealength, ealen, nlen; 1081 uint32_t ul; 1082 1083 pe = ptr + length; 1084 nlen = strlen(name); 1085 1086 for (p = ptr; p < pe; p = pn) { 1087 p0 = p; 1088 bcopy(p, &ul, sizeof(ul)); 1089 pn = p + ul; 1090 /* make sure this entry is complete */ 1091 if (pn > pe) 1092 break; 1093 p += sizeof(uint32_t); 1094 if (*p != nspace) 1095 continue; 1096 p++; 1097 eapad2 = *p++; 1098 if (*p != nlen) 1099 continue; 1100 p++; 1101 if (bcmp(p, name, nlen)) 1102 continue; 1103 ealength = sizeof(uint32_t) + 3 + nlen; 1104 eapad1 = 8 - (ealength % 8); 1105 if (eapad1 == 8) 1106 eapad1 = 0; 1107 ealength += eapad1; 1108 ealen = ul - ealength - eapad2; 1109 p += nlen + eapad1; 1110 if (eap != NULL) 1111 *eap = p0; 1112 if (eac != NULL) 1113 *eac = p; 1114 return (ealen); 1115 } 1116 return(-1); 1117 } 1118 1119 static int 1120 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra) 1121 { 1122 struct inode *ip; 1123 struct ufs2_dinode *dp; 1124 struct uio luio; 1125 struct iovec liovec; 1126 int easize, error; 1127 u_char *eae; 1128 1129 ip = VTOI(vp); 1130 dp = ip->i_din2; 1131 easize = dp->di_extsize; 1132 1133 eae = malloc(easize + extra, M_TEMP, M_WAITOK); 1134 1135 liovec.iov_base = eae; 1136 liovec.iov_len = easize; 1137 luio.uio_iov = &liovec; 1138 luio.uio_iovcnt = 1; 1139 luio.uio_offset = 0; 1140 luio.uio_resid = easize; 1141 luio.uio_segflg = UIO_SYSSPACE; 1142 luio.uio_rw = UIO_READ; 1143 luio.uio_td = td; 1144 1145 error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC); 1146 if (error) { 1147 free(eae, M_TEMP); 1148 return(error); 1149 } 1150 *p = eae; 1151 return (0); 1152 } 1153 1154 static int 1155 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td) 1156 { 1157 struct inode *ip; 1158 struct ufs2_dinode *dp; 1159 int error; 1160 1161 ip = VTOI(vp); 1162 1163 if (ip->i_ea_area != NULL) 1164 return (EBUSY); 1165 dp = ip->i_din2; 1166 error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0); 1167 if (error) 1168 return (error); 1169 ip->i_ea_len = dp->di_extsize; 1170 ip->i_ea_error = 0; 1171 return (0); 1172 } 1173 1174 /* 1175 * Vnode extattr transaction commit/abort 1176 */ 1177 static int 1178 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td) 1179 { 1180 struct inode *ip; 1181 struct uio luio; 1182 struct iovec liovec; 1183 int error; 1184 struct ufs2_dinode *dp; 1185 1186 ip = VTOI(vp); 1187 if (ip->i_ea_area == NULL) 1188 return (EINVAL); 1189 dp = ip->i_din2; 1190 error = ip->i_ea_error; 1191 if (commit && error == 0) { 1192 if (cred == NOCRED) 1193 cred = vp->v_mount->mnt_cred; 1194 liovec.iov_base = ip->i_ea_area; 1195 liovec.iov_len = ip->i_ea_len; 1196 luio.uio_iov = &liovec; 1197 luio.uio_iovcnt = 1; 1198 luio.uio_offset = 0; 1199 luio.uio_resid = ip->i_ea_len; 1200 luio.uio_segflg = UIO_SYSSPACE; 1201 luio.uio_rw = UIO_WRITE; 1202 luio.uio_td = td; 1203 /* XXX: I'm not happy about truncating to zero size */ 1204 if (ip->i_ea_len < dp->di_extsize) 1205 error = ffs_truncate(vp, 0, IO_EXT, cred, td); 1206 error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred); 1207 } 1208 free(ip->i_ea_area, M_TEMP); 1209 ip->i_ea_area = NULL; 1210 ip->i_ea_len = 0; 1211 ip->i_ea_error = 0; 1212 return (error); 1213 } 1214 1215 /* 1216 * Vnode extattr strategy routine for special devices and fifos. 1217 * 1218 * We need to check for a read or write of the external attributes. 1219 * Otherwise we just fall through and do the usual thing. 1220 */ 1221 static int 1222 ffsext_strategy(struct vop_strategy_args *ap) 1223 /* 1224 struct vop_strategy_args { 1225 struct vnodeop_desc *a_desc; 1226 struct vnode *a_vp; 1227 struct buf *a_bp; 1228 }; 1229 */ 1230 { 1231 struct vnode *vp; 1232 daddr_t lbn; 1233 1234 KASSERT(ap->a_vp == ap->a_bp->b_vp, ("%s(%p != %p)", 1235 __func__, ap->a_vp, ap->a_bp->b_vp)); 1236 vp = ap->a_vp; 1237 lbn = ap->a_bp->b_lblkno; 1238 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC && 1239 lbn < 0 && lbn >= -NXADDR) 1240 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1241 if (vp->v_type == VFIFO) 1242 return (ufs_vnoperatefifo((struct vop_generic_args *)ap)); 1243 panic("spec nodes went here"); 1244 } 1245 1246 /* 1247 * Vnode extattr transaction commit/abort 1248 */ 1249 static int 1250 ffs_openextattr(struct vop_openextattr_args *ap) 1251 /* 1252 struct vop_openextattr_args { 1253 struct vnodeop_desc *a_desc; 1254 struct vnode *a_vp; 1255 IN struct ucred *a_cred; 1256 IN struct thread *a_td; 1257 }; 1258 */ 1259 { 1260 struct inode *ip; 1261 struct fs *fs; 1262 1263 ip = VTOI(ap->a_vp); 1264 fs = ip->i_fs; 1265 if (fs->fs_magic == FS_UFS1_MAGIC) 1266 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1267 1268 if (ap->a_vp->v_type == VCHR) 1269 return (EOPNOTSUPP); 1270 1271 return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td)); 1272 } 1273 1274 1275 /* 1276 * Vnode extattr transaction commit/abort 1277 */ 1278 static int 1279 ffs_closeextattr(struct vop_closeextattr_args *ap) 1280 /* 1281 struct vop_closeextattr_args { 1282 struct vnodeop_desc *a_desc; 1283 struct vnode *a_vp; 1284 int a_commit; 1285 IN struct ucred *a_cred; 1286 IN struct thread *a_td; 1287 }; 1288 */ 1289 { 1290 struct inode *ip; 1291 struct fs *fs; 1292 1293 ip = VTOI(ap->a_vp); 1294 fs = ip->i_fs; 1295 if (fs->fs_magic == FS_UFS1_MAGIC) 1296 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1297 1298 if (ap->a_vp->v_type == VCHR) 1299 return (EOPNOTSUPP); 1300 1301 return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td)); 1302 } 1303 1304 /* 1305 * Vnode operation to remove a named attribute. 1306 */ 1307 static int 1308 ffs_deleteextattr(struct vop_deleteextattr_args *ap) 1309 /* 1310 vop_deleteextattr { 1311 IN struct vnode *a_vp; 1312 IN int a_attrnamespace; 1313 IN const char *a_name; 1314 IN struct ucred *a_cred; 1315 IN struct thread *a_td; 1316 }; 1317 */ 1318 { 1319 struct inode *ip; 1320 struct fs *fs; 1321 uint32_t ealength, ul; 1322 int ealen, olen, eapad1, eapad2, error, i, easize; 1323 u_char *eae, *p; 1324 int stand_alone; 1325 1326 ip = VTOI(ap->a_vp); 1327 fs = ip->i_fs; 1328 1329 if (fs->fs_magic == FS_UFS1_MAGIC) 1330 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1331 1332 if (ap->a_vp->v_type == VCHR) 1333 return (EOPNOTSUPP); 1334 1335 if (strlen(ap->a_name) == 0) 1336 return (EINVAL); 1337 1338 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1339 ap->a_cred, ap->a_td, IWRITE); 1340 if (error) { 1341 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1342 ip->i_ea_error = error; 1343 return (error); 1344 } 1345 1346 if (ip->i_ea_area == NULL) { 1347 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1348 if (error) 1349 return (error); 1350 stand_alone = 1; 1351 } else { 1352 stand_alone = 0; 1353 } 1354 1355 ealength = eapad1 = ealen = eapad2 = 0; 1356 1357 eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK); 1358 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1359 easize = ip->i_ea_len; 1360 1361 olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1362 &p, NULL); 1363 if (olen == -1) { 1364 /* delete but nonexistent */ 1365 free(eae, M_TEMP); 1366 if (stand_alone) 1367 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1368 return(ENOATTR); 1369 } 1370 bcopy(p, &ul, sizeof ul); 1371 i = p - eae + ul; 1372 if (ul != ealength) { 1373 bcopy(p + ul, p + ealength, easize - i); 1374 easize += (ealength - ul); 1375 } 1376 if (easize > NXADDR * fs->fs_bsize) { 1377 free(eae, M_TEMP); 1378 if (stand_alone) 1379 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1380 else if (ip->i_ea_error == 0) 1381 ip->i_ea_error = ENOSPC; 1382 return(ENOSPC); 1383 } 1384 p = ip->i_ea_area; 1385 ip->i_ea_area = eae; 1386 ip->i_ea_len = easize; 1387 free(p, M_TEMP); 1388 if (stand_alone) 1389 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1390 return(error); 1391 } 1392 1393 /* 1394 * Vnode operation to retrieve a named extended attribute. 1395 */ 1396 static int 1397 ffs_getextattr(struct vop_getextattr_args *ap) 1398 /* 1399 vop_getextattr { 1400 IN struct vnode *a_vp; 1401 IN int a_attrnamespace; 1402 IN const char *a_name; 1403 INOUT struct uio *a_uio; 1404 OUT size_t *a_size; 1405 IN struct ucred *a_cred; 1406 IN struct thread *a_td; 1407 }; 1408 */ 1409 { 1410 struct inode *ip; 1411 struct fs *fs; 1412 u_char *eae, *p; 1413 unsigned easize; 1414 int error, ealen, stand_alone; 1415 1416 ip = VTOI(ap->a_vp); 1417 fs = ip->i_fs; 1418 1419 if (fs->fs_magic == FS_UFS1_MAGIC) 1420 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1421 1422 if (ap->a_vp->v_type == VCHR) 1423 return (EOPNOTSUPP); 1424 1425 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1426 ap->a_cred, ap->a_td, IREAD); 1427 if (error) 1428 return (error); 1429 1430 if (ip->i_ea_area == NULL) { 1431 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1432 if (error) 1433 return (error); 1434 stand_alone = 1; 1435 } else { 1436 stand_alone = 0; 1437 } 1438 eae = ip->i_ea_area; 1439 easize = ip->i_ea_len; 1440 1441 ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1442 NULL, &p); 1443 if (ealen >= 0) { 1444 error = 0; 1445 if (ap->a_size != NULL) 1446 *ap->a_size = ealen; 1447 else if (ap->a_uio != NULL) 1448 error = uiomove(p, ealen, ap->a_uio); 1449 } else 1450 error = ENOATTR; 1451 if (stand_alone) 1452 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1453 return(error); 1454 } 1455 1456 /* 1457 * Vnode operation to retrieve extended attributes on a vnode. 1458 */ 1459 static int 1460 ffs_listextattr(struct vop_listextattr_args *ap) 1461 /* 1462 vop_listextattr { 1463 IN struct vnode *a_vp; 1464 IN int a_attrnamespace; 1465 INOUT struct uio *a_uio; 1466 OUT size_t *a_size; 1467 IN struct ucred *a_cred; 1468 IN struct thread *a_td; 1469 }; 1470 */ 1471 { 1472 struct inode *ip; 1473 struct fs *fs; 1474 u_char *eae, *p, *pe, *pn; 1475 unsigned easize; 1476 uint32_t ul; 1477 int error, ealen, stand_alone; 1478 1479 ip = VTOI(ap->a_vp); 1480 fs = ip->i_fs; 1481 1482 if (fs->fs_magic == FS_UFS1_MAGIC) 1483 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1484 1485 if (ap->a_vp->v_type == VCHR) 1486 return (EOPNOTSUPP); 1487 1488 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1489 ap->a_cred, ap->a_td, IREAD); 1490 if (error) 1491 return (error); 1492 1493 if (ip->i_ea_area == NULL) { 1494 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1495 if (error) 1496 return (error); 1497 stand_alone = 1; 1498 } else { 1499 stand_alone = 0; 1500 } 1501 eae = ip->i_ea_area; 1502 easize = ip->i_ea_len; 1503 1504 error = 0; 1505 if (ap->a_size != NULL) 1506 *ap->a_size = 0; 1507 pe = eae + easize; 1508 for(p = eae; error == 0 && p < pe; p = pn) { 1509 bcopy(p, &ul, sizeof(ul)); 1510 pn = p + ul; 1511 if (pn > pe) 1512 break; 1513 p += sizeof(ul); 1514 if (*p++ != ap->a_attrnamespace) 1515 continue; 1516 p++; /* pad2 */ 1517 ealen = *p; 1518 if (ap->a_size != NULL) { 1519 *ap->a_size += ealen + 1; 1520 } else if (ap->a_uio != NULL) { 1521 error = uiomove(p, ealen + 1, ap->a_uio); 1522 } 1523 } 1524 if (stand_alone) 1525 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1526 return(error); 1527 } 1528 1529 /* 1530 * Vnode operation to set a named attribute. 1531 */ 1532 static int 1533 ffs_setextattr(struct vop_setextattr_args *ap) 1534 /* 1535 vop_setextattr { 1536 IN struct vnode *a_vp; 1537 IN int a_attrnamespace; 1538 IN const char *a_name; 1539 INOUT struct uio *a_uio; 1540 IN struct ucred *a_cred; 1541 IN struct thread *a_td; 1542 }; 1543 */ 1544 { 1545 struct inode *ip; 1546 struct fs *fs; 1547 uint32_t ealength, ul; 1548 int ealen, olen, eapad1, eapad2, error, i, easize; 1549 u_char *eae, *p; 1550 int stand_alone; 1551 1552 ip = VTOI(ap->a_vp); 1553 fs = ip->i_fs; 1554 1555 if (fs->fs_magic == FS_UFS1_MAGIC) 1556 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1557 1558 if (ap->a_vp->v_type == VCHR) 1559 return (EOPNOTSUPP); 1560 1561 if (strlen(ap->a_name) == 0) 1562 return (EINVAL); 1563 1564 /* XXX Now unsupported API to delete EAs using NULL uio. */ 1565 if (ap->a_uio == NULL) 1566 return (EOPNOTSUPP); 1567 1568 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1569 ap->a_cred, ap->a_td, IWRITE); 1570 if (error) { 1571 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1572 ip->i_ea_error = error; 1573 return (error); 1574 } 1575 1576 if (ip->i_ea_area == NULL) { 1577 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1578 if (error) 1579 return (error); 1580 stand_alone = 1; 1581 } else { 1582 stand_alone = 0; 1583 } 1584 1585 ealen = ap->a_uio->uio_resid; 1586 ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name); 1587 eapad1 = 8 - (ealength % 8); 1588 if (eapad1 == 8) 1589 eapad1 = 0; 1590 eapad2 = 8 - (ealen % 8); 1591 if (eapad2 == 8) 1592 eapad2 = 0; 1593 ealength += eapad1 + ealen + eapad2; 1594 1595 eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK); 1596 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1597 easize = ip->i_ea_len; 1598 1599 olen = ffs_findextattr(eae, easize, 1600 ap->a_attrnamespace, ap->a_name, &p, NULL); 1601 if (olen == -1) { 1602 /* new, append at end */ 1603 p = eae + easize; 1604 easize += ealength; 1605 } else { 1606 bcopy(p, &ul, sizeof ul); 1607 i = p - eae + ul; 1608 if (ul != ealength) { 1609 bcopy(p + ul, p + ealength, easize - i); 1610 easize += (ealength - ul); 1611 } 1612 } 1613 if (easize > NXADDR * fs->fs_bsize) { 1614 free(eae, M_TEMP); 1615 if (stand_alone) 1616 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1617 else if (ip->i_ea_error == 0) 1618 ip->i_ea_error = ENOSPC; 1619 return(ENOSPC); 1620 } 1621 bcopy(&ealength, p, sizeof(ealength)); 1622 p += sizeof(ealength); 1623 *p++ = ap->a_attrnamespace; 1624 *p++ = eapad2; 1625 *p++ = strlen(ap->a_name); 1626 strcpy(p, ap->a_name); 1627 p += strlen(ap->a_name); 1628 bzero(p, eapad1); 1629 p += eapad1; 1630 error = uiomove(p, ealen, ap->a_uio); 1631 if (error) { 1632 free(eae, M_TEMP); 1633 if (stand_alone) 1634 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1635 else if (ip->i_ea_error == 0) 1636 ip->i_ea_error = error; 1637 return(error); 1638 } 1639 p += ealen; 1640 bzero(p, eapad2); 1641 1642 p = ip->i_ea_area; 1643 ip->i_ea_area = eae; 1644 ip->i_ea_len = easize; 1645 free(p, M_TEMP); 1646 if (stand_alone) 1647 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1648 return(error); 1649 } 1650