1 /* 2 * Copyright (c) 2002, 2003 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * from: @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 60 * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ... 61 * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 62 */ 63 64 #include <sys/cdefs.h> 65 __FBSDID("$FreeBSD$"); 66 67 #include <sys/param.h> 68 #include <sys/bio.h> 69 #include <sys/systm.h> 70 #include <sys/buf.h> 71 #include <sys/conf.h> 72 #include <sys/extattr.h> 73 #include <sys/kernel.h> 74 #include <sys/limits.h> 75 #include <sys/malloc.h> 76 #include <sys/mount.h> 77 #include <sys/proc.h> 78 #include <sys/resourcevar.h> 79 #include <sys/signalvar.h> 80 #include <sys/stat.h> 81 #include <sys/vmmeter.h> 82 #include <sys/vnode.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_extern.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/vm_pager.h> 89 #include <vm/vnode_pager.h> 90 91 #include <ufs/ufs/extattr.h> 92 #include <ufs/ufs/quota.h> 93 #include <ufs/ufs/inode.h> 94 #include <ufs/ufs/ufs_extern.h> 95 #include <ufs/ufs/ufsmount.h> 96 97 #include <ufs/ffs/fs.h> 98 #include <ufs/ffs/ffs_extern.h> 99 #include "opt_directio.h" 100 101 #ifdef DIRECTIO 102 extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone); 103 #endif 104 static int ffs_fsync(struct vop_fsync_args *); 105 static int ffs_getpages(struct vop_getpages_args *); 106 static int ffs_read(struct vop_read_args *); 107 static int ffs_write(struct vop_write_args *); 108 static int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag); 109 static int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, 110 struct ucred *cred); 111 static int ffsext_strategy(struct vop_strategy_args *); 112 static int ffs_closeextattr(struct vop_closeextattr_args *); 113 static int ffs_deleteextattr(struct vop_deleteextattr_args *); 114 static int ffs_getextattr(struct vop_getextattr_args *); 115 static int ffs_listextattr(struct vop_listextattr_args *); 116 static int ffs_openextattr(struct vop_openextattr_args *); 117 static int ffs_setextattr(struct vop_setextattr_args *); 118 119 120 /* Global vfs data structures for ufs. */ 121 vop_t **ffs_vnodeop_p; 122 static struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { 123 { &vop_default_desc, (vop_t *) ufs_vnoperate }, 124 { &vop_fsync_desc, (vop_t *) ffs_fsync }, 125 { &vop_getpages_desc, (vop_t *) ffs_getpages }, 126 { &vop_read_desc, (vop_t *) ffs_read }, 127 { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks }, 128 { &vop_write_desc, (vop_t *) ffs_write }, 129 { &vop_closeextattr_desc, (vop_t *) ffs_closeextattr }, 130 { &vop_deleteextattr_desc, (vop_t *) ffs_deleteextattr }, 131 { &vop_getextattr_desc, (vop_t *) ffs_getextattr }, 132 { &vop_listextattr_desc, (vop_t *) ffs_listextattr }, 133 { &vop_openextattr_desc, (vop_t *) ffs_openextattr }, 134 { &vop_setextattr_desc, (vop_t *) ffs_setextattr }, 135 { NULL, NULL } 136 }; 137 static struct vnodeopv_desc ffs_vnodeop_opv_desc = 138 { &ffs_vnodeop_p, ffs_vnodeop_entries }; 139 140 vop_t **ffs_fifoop_p; 141 static struct vnodeopv_entry_desc ffs_fifoop_entries[] = { 142 { &vop_default_desc, (vop_t *) ufs_vnoperatefifo }, 143 { &vop_fsync_desc, (vop_t *) ffs_fsync }, 144 { &vop_reallocblks_desc, (vop_t *) ffs_reallocblks }, 145 { &vop_strategy_desc, (vop_t *) ffsext_strategy }, 146 { &vop_closeextattr_desc, (vop_t *) ffs_closeextattr }, 147 { &vop_deleteextattr_desc, (vop_t *) ffs_deleteextattr }, 148 { &vop_getextattr_desc, (vop_t *) ffs_getextattr }, 149 { &vop_listextattr_desc, (vop_t *) ffs_listextattr }, 150 { &vop_openextattr_desc, (vop_t *) ffs_openextattr }, 151 { &vop_setextattr_desc, (vop_t *) ffs_setextattr }, 152 { NULL, NULL } 153 }; 154 static struct vnodeopv_desc ffs_fifoop_opv_desc = 155 { &ffs_fifoop_p, ffs_fifoop_entries }; 156 157 VNODEOP_SET(ffs_vnodeop_opv_desc); 158 VNODEOP_SET(ffs_fifoop_opv_desc); 159 160 /* 161 * Synch an open file. 162 */ 163 /* ARGSUSED */ 164 static int 165 ffs_fsync(ap) 166 struct vop_fsync_args /* { 167 struct vnode *a_vp; 168 struct ucred *a_cred; 169 int a_waitfor; 170 struct thread *a_td; 171 } */ *ap; 172 { 173 struct vnode *vp = ap->a_vp; 174 struct inode *ip = VTOI(vp); 175 struct buf *bp; 176 struct buf *nbp; 177 int s, error, wait, passes, skipmeta; 178 ufs_lbn_t lbn; 179 180 wait = (ap->a_waitfor == MNT_WAIT); 181 lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); 182 183 /* 184 * Flush all dirty buffers associated with a vnode. 185 */ 186 passes = NIADDR + 1; 187 skipmeta = 0; 188 if (wait) 189 skipmeta = 1; 190 s = splbio(); 191 VI_LOCK(vp); 192 loop: 193 TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) 194 bp->b_vflags &= ~BV_SCANNED; 195 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { 196 /* 197 * Reasons to skip this buffer: it has already been considered 198 * on this pass, this pass is the first time through on a 199 * synchronous flush request and the buffer being considered 200 * is metadata, the buffer has dependencies that will cause 201 * it to be redirtied and it has not already been deferred, 202 * or it is already being written. 203 */ 204 if ((bp->b_vflags & BV_SCANNED) != 0) 205 continue; 206 bp->b_vflags |= BV_SCANNED; 207 if ((skipmeta == 1 && bp->b_lblkno < 0)) 208 continue; 209 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) 210 continue; 211 VI_UNLOCK(vp); 212 if (!wait && LIST_FIRST(&bp->b_dep) != NULL && 213 (bp->b_flags & B_DEFERRED) == 0 && 214 buf_countdeps(bp, 0)) { 215 bp->b_flags |= B_DEFERRED; 216 BUF_UNLOCK(bp); 217 VI_LOCK(vp); 218 continue; 219 } 220 if ((bp->b_flags & B_DELWRI) == 0) 221 panic("ffs_fsync: not dirty"); 222 /* 223 * If this is a synchronous flush request, or it is not a 224 * file or device, start the write on this buffer immediatly. 225 */ 226 if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) { 227 228 /* 229 * On our final pass through, do all I/O synchronously 230 * so that we can find out if our flush is failing 231 * because of write errors. 232 */ 233 if (passes > 0 || !wait) { 234 if ((bp->b_flags & B_CLUSTEROK) && !wait) { 235 (void) vfs_bio_awrite(bp); 236 } else { 237 bremfree(bp); 238 splx(s); 239 (void) bawrite(bp); 240 s = splbio(); 241 } 242 } else { 243 bremfree(bp); 244 splx(s); 245 if ((error = bwrite(bp)) != 0) 246 return (error); 247 s = splbio(); 248 } 249 } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) { 250 /* 251 * If the buffer is for data that has been truncated 252 * off the file, then throw it away. 253 */ 254 bremfree(bp); 255 bp->b_flags |= B_INVAL | B_NOCACHE; 256 splx(s); 257 brelse(bp); 258 s = splbio(); 259 } else 260 vfs_bio_awrite(bp); 261 262 /* 263 * Since we may have slept during the I/O, we need 264 * to start from a known point. 265 */ 266 VI_LOCK(vp); 267 nbp = TAILQ_FIRST(&vp->v_bufobj.bo_dirty.bv_hd); 268 } 269 /* 270 * If we were asked to do this synchronously, then go back for 271 * another pass, this time doing the metadata. 272 */ 273 if (skipmeta) { 274 skipmeta = 0; 275 goto loop; 276 } 277 278 if (wait) { 279 bufobj_wwait(&vp->v_bufobj, 3, 0); 280 VI_UNLOCK(vp); 281 282 /* 283 * Ensure that any filesystem metatdata associated 284 * with the vnode has been written. 285 */ 286 splx(s); 287 if ((error = softdep_sync_metadata(ap)) != 0) 288 return (error); 289 s = splbio(); 290 291 VI_LOCK(vp); 292 if (vp->v_bufobj.bo_dirty.bv_cnt > 0) { 293 /* 294 * Block devices associated with filesystems may 295 * have new I/O requests posted for them even if 296 * the vnode is locked, so no amount of trying will 297 * get them clean. Thus we give block devices a 298 * good effort, then just give up. For all other file 299 * types, go around and try again until it is clean. 300 */ 301 if (passes > 0) { 302 passes -= 1; 303 goto loop; 304 } 305 #ifdef DIAGNOSTIC 306 if (!vn_isdisk(vp, NULL)) 307 vprint("ffs_fsync: dirty", vp); 308 #endif 309 } 310 } 311 VI_UNLOCK(vp); 312 splx(s); 313 return (UFS_UPDATE(vp, wait)); 314 } 315 316 317 /* 318 * Vnode op for reading. 319 */ 320 /* ARGSUSED */ 321 static int 322 ffs_read(ap) 323 struct vop_read_args /* { 324 struct vnode *a_vp; 325 struct uio *a_uio; 326 int a_ioflag; 327 struct ucred *a_cred; 328 } */ *ap; 329 { 330 struct vnode *vp; 331 struct inode *ip; 332 struct uio *uio; 333 struct fs *fs; 334 struct buf *bp; 335 ufs_lbn_t lbn, nextlbn; 336 off_t bytesinfile; 337 long size, xfersize, blkoffset; 338 int error, orig_resid; 339 int seqcount; 340 int ioflag; 341 342 vp = ap->a_vp; 343 uio = ap->a_uio; 344 ioflag = ap->a_ioflag; 345 if (ap->a_ioflag & IO_EXT) 346 #ifdef notyet 347 return (ffs_extread(vp, uio, ioflag)); 348 #else 349 panic("ffs_read+IO_EXT"); 350 #endif 351 #ifdef DIRECTIO 352 if ((ioflag & IO_DIRECT) != 0) { 353 int workdone; 354 355 error = ffs_rawread(vp, uio, &workdone); 356 if (error != 0 || workdone != 0) 357 return error; 358 } 359 #endif 360 361 GIANT_REQUIRED; 362 363 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 364 ip = VTOI(vp); 365 366 #ifdef DIAGNOSTIC 367 if (uio->uio_rw != UIO_READ) 368 panic("ffs_read: mode"); 369 370 if (vp->v_type == VLNK) { 371 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 372 panic("ffs_read: short symlink"); 373 } else if (vp->v_type != VREG && vp->v_type != VDIR) 374 panic("ffs_read: type %d", vp->v_type); 375 #endif 376 orig_resid = uio->uio_resid; 377 KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0")); 378 if (orig_resid == 0) 379 return (0); 380 KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0")); 381 fs = ip->i_fs; 382 if (uio->uio_offset < ip->i_size && 383 uio->uio_offset >= fs->fs_maxfilesize) 384 return (EOVERFLOW); 385 386 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 387 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 388 break; 389 lbn = lblkno(fs, uio->uio_offset); 390 nextlbn = lbn + 1; 391 392 /* 393 * size of buffer. The buffer representing the 394 * end of the file is rounded up to the size of 395 * the block type ( fragment or full block, 396 * depending ). 397 */ 398 size = blksize(fs, ip, lbn); 399 blkoffset = blkoff(fs, uio->uio_offset); 400 401 /* 402 * The amount we want to transfer in this iteration is 403 * one FS block less the amount of the data before 404 * our startpoint (duh!) 405 */ 406 xfersize = fs->fs_bsize - blkoffset; 407 408 /* 409 * But if we actually want less than the block, 410 * or the file doesn't have a whole block more of data, 411 * then use the lesser number. 412 */ 413 if (uio->uio_resid < xfersize) 414 xfersize = uio->uio_resid; 415 if (bytesinfile < xfersize) 416 xfersize = bytesinfile; 417 418 if (lblktosize(fs, nextlbn) >= ip->i_size) { 419 /* 420 * Don't do readahead if this is the end of the file. 421 */ 422 error = bread(vp, lbn, size, NOCRED, &bp); 423 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 424 /* 425 * Otherwise if we are allowed to cluster, 426 * grab as much as we can. 427 * 428 * XXX This may not be a win if we are not 429 * doing sequential access. 430 */ 431 error = cluster_read(vp, ip->i_size, lbn, 432 size, NOCRED, uio->uio_resid, seqcount, &bp); 433 } else if (seqcount > 1) { 434 /* 435 * If we are NOT allowed to cluster, then 436 * if we appear to be acting sequentially, 437 * fire off a request for a readahead 438 * as well as a read. Note that the 4th and 5th 439 * arguments point to arrays of the size specified in 440 * the 6th argument. 441 */ 442 int nextsize = blksize(fs, ip, nextlbn); 443 error = breadn(vp, lbn, 444 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 445 } else { 446 /* 447 * Failing all of the above, just read what the 448 * user asked for. Interestingly, the same as 449 * the first option above. 450 */ 451 error = bread(vp, lbn, size, NOCRED, &bp); 452 } 453 if (error) { 454 brelse(bp); 455 bp = NULL; 456 break; 457 } 458 459 /* 460 * If IO_DIRECT then set B_DIRECT for the buffer. This 461 * will cause us to attempt to release the buffer later on 462 * and will cause the buffer cache to attempt to free the 463 * underlying pages. 464 */ 465 if (ioflag & IO_DIRECT) 466 bp->b_flags |= B_DIRECT; 467 468 /* 469 * We should only get non-zero b_resid when an I/O error 470 * has occurred, which should cause us to break above. 471 * However, if the short read did not cause an error, 472 * then we want to ensure that we do not uiomove bad 473 * or uninitialized data. 474 */ 475 size -= bp->b_resid; 476 if (size < xfersize) { 477 if (size == 0) 478 break; 479 xfersize = size; 480 } 481 482 error = uiomove((char *)bp->b_data + blkoffset, 483 (int)xfersize, uio); 484 if (error) 485 break; 486 487 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 488 (LIST_FIRST(&bp->b_dep) == NULL)) { 489 /* 490 * If there are no dependencies, and it's VMIO, 491 * then we don't need the buf, mark it available 492 * for freeing. The VM has the data. 493 */ 494 bp->b_flags |= B_RELBUF; 495 brelse(bp); 496 } else { 497 /* 498 * Otherwise let whoever 499 * made the request take care of 500 * freeing it. We just queue 501 * it onto another list. 502 */ 503 bqrelse(bp); 504 } 505 } 506 507 /* 508 * This can only happen in the case of an error 509 * because the loop above resets bp to NULL on each iteration 510 * and on normal completion has not set a new value into it. 511 * so it must have come from a 'break' statement 512 */ 513 if (bp != NULL) { 514 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 515 (LIST_FIRST(&bp->b_dep) == NULL)) { 516 bp->b_flags |= B_RELBUF; 517 brelse(bp); 518 } else { 519 bqrelse(bp); 520 } 521 } 522 523 if ((error == 0 || uio->uio_resid != orig_resid) && 524 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 525 ip->i_flag |= IN_ACCESS; 526 return (error); 527 } 528 529 /* 530 * Vnode op for writing. 531 */ 532 static int 533 ffs_write(ap) 534 struct vop_write_args /* { 535 struct vnode *a_vp; 536 struct uio *a_uio; 537 int a_ioflag; 538 struct ucred *a_cred; 539 } */ *ap; 540 { 541 struct vnode *vp; 542 struct uio *uio; 543 struct inode *ip; 544 struct fs *fs; 545 struct buf *bp; 546 struct thread *td; 547 ufs_lbn_t lbn; 548 off_t osize; 549 int seqcount; 550 int blkoffset, error, extended, flags, ioflag, resid, size, xfersize; 551 552 vp = ap->a_vp; 553 uio = ap->a_uio; 554 ioflag = ap->a_ioflag; 555 if (ap->a_ioflag & IO_EXT) 556 #ifdef notyet 557 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred)); 558 #else 559 panic("ffs_write+IO_EXT"); 560 #endif 561 562 GIANT_REQUIRED; 563 564 extended = 0; 565 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 566 ip = VTOI(vp); 567 568 #ifdef DIAGNOSTIC 569 if (uio->uio_rw != UIO_WRITE) 570 panic("ffs_write: mode"); 571 #endif 572 573 switch (vp->v_type) { 574 case VREG: 575 if (ioflag & IO_APPEND) 576 uio->uio_offset = ip->i_size; 577 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 578 return (EPERM); 579 /* FALLTHROUGH */ 580 case VLNK: 581 break; 582 case VDIR: 583 panic("ffs_write: dir write"); 584 break; 585 default: 586 panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type, 587 (int)uio->uio_offset, 588 (int)uio->uio_resid 589 ); 590 } 591 592 KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0")); 593 KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0")); 594 fs = ip->i_fs; 595 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) 596 return (EFBIG); 597 /* 598 * Maybe this should be above the vnode op call, but so long as 599 * file servers have no limits, I don't think it matters. 600 */ 601 td = uio->uio_td; 602 if (vp->v_type == VREG && td != NULL) { 603 PROC_LOCK(td->td_proc); 604 if (uio->uio_offset + uio->uio_resid > 605 lim_cur(td->td_proc, RLIMIT_FSIZE)) { 606 psignal(td->td_proc, SIGXFSZ); 607 PROC_UNLOCK(td->td_proc); 608 return (EFBIG); 609 } 610 PROC_UNLOCK(td->td_proc); 611 } 612 613 resid = uio->uio_resid; 614 osize = ip->i_size; 615 if (seqcount > BA_SEQMAX) 616 flags = BA_SEQMAX << BA_SEQSHIFT; 617 else 618 flags = seqcount << BA_SEQSHIFT; 619 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 620 flags |= IO_SYNC; 621 622 for (error = 0; uio->uio_resid > 0;) { 623 lbn = lblkno(fs, uio->uio_offset); 624 blkoffset = blkoff(fs, uio->uio_offset); 625 xfersize = fs->fs_bsize - blkoffset; 626 if (uio->uio_resid < xfersize) 627 xfersize = uio->uio_resid; 628 if (uio->uio_offset + xfersize > ip->i_size) 629 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 630 631 /* 632 * We must perform a read-before-write if the transfer size 633 * does not cover the entire buffer. 634 */ 635 if (fs->fs_bsize > xfersize) 636 flags |= BA_CLRBUF; 637 else 638 flags &= ~BA_CLRBUF; 639 /* XXX is uio->uio_offset the right thing here? */ 640 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 641 ap->a_cred, flags, &bp); 642 if (error != 0) 643 break; 644 /* 645 * If the buffer is not valid we have to clear out any 646 * garbage data from the pages instantiated for the buffer. 647 * If we do not, a failed uiomove() during a write can leave 648 * the prior contents of the pages exposed to a userland 649 * mmap(). XXX deal with uiomove() errors a better way. 650 */ 651 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 652 vfs_bio_clrbuf(bp); 653 if (ioflag & IO_DIRECT) 654 bp->b_flags |= B_DIRECT; 655 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL)) 656 bp->b_flags |= B_NOCACHE; 657 658 if (uio->uio_offset + xfersize > ip->i_size) { 659 ip->i_size = uio->uio_offset + xfersize; 660 DIP_SET(ip, i_size, ip->i_size); 661 extended = 1; 662 } 663 664 size = blksize(fs, ip, lbn) - bp->b_resid; 665 if (size < xfersize) 666 xfersize = size; 667 668 error = 669 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 670 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 671 (LIST_FIRST(&bp->b_dep) == NULL)) { 672 bp->b_flags |= B_RELBUF; 673 } 674 675 /* 676 * If IO_SYNC each buffer is written synchronously. Otherwise 677 * if we have a severe page deficiency write the buffer 678 * asynchronously. Otherwise try to cluster, and if that 679 * doesn't do it then either do an async write (if O_DIRECT), 680 * or a delayed write (if not). 681 */ 682 if (ioflag & IO_SYNC) { 683 (void)bwrite(bp); 684 } else if (vm_page_count_severe() || 685 buf_dirty_count_severe() || 686 (ioflag & IO_ASYNC)) { 687 bp->b_flags |= B_CLUSTEROK; 688 bawrite(bp); 689 } else if (xfersize + blkoffset == fs->fs_bsize) { 690 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 691 bp->b_flags |= B_CLUSTEROK; 692 cluster_write(vp, bp, ip->i_size, seqcount); 693 } else { 694 bawrite(bp); 695 } 696 } else if (ioflag & IO_DIRECT) { 697 bp->b_flags |= B_CLUSTEROK; 698 bawrite(bp); 699 } else { 700 bp->b_flags |= B_CLUSTEROK; 701 bdwrite(bp); 702 } 703 if (error || xfersize == 0) 704 break; 705 ip->i_flag |= IN_CHANGE | IN_UPDATE; 706 } 707 /* 708 * If we successfully wrote any data, and we are not the superuser 709 * we clear the setuid and setgid bits as a precaution against 710 * tampering. 711 */ 712 if (resid > uio->uio_resid && ap->a_cred && 713 suser_cred(ap->a_cred, SUSER_ALLOWJAIL)) { 714 ip->i_mode &= ~(ISUID | ISGID); 715 DIP_SET(ip, i_mode, ip->i_mode); 716 } 717 if (resid > uio->uio_resid) 718 VN_KNOTE_UNLOCKED(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); 719 if (error) { 720 if (ioflag & IO_UNIT) { 721 (void)UFS_TRUNCATE(vp, osize, 722 IO_NORMAL | (ioflag & IO_SYNC), 723 ap->a_cred, uio->uio_td); 724 uio->uio_offset -= resid - uio->uio_resid; 725 uio->uio_resid = resid; 726 } 727 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 728 error = UFS_UPDATE(vp, 1); 729 return (error); 730 } 731 732 /* 733 * get page routine 734 */ 735 static int 736 ffs_getpages(ap) 737 struct vop_getpages_args *ap; 738 { 739 int i; 740 vm_page_t mreq; 741 int pcount; 742 743 GIANT_REQUIRED; 744 745 pcount = round_page(ap->a_count) / PAGE_SIZE; 746 mreq = ap->a_m[ap->a_reqpage]; 747 748 /* 749 * if ANY DEV_BSIZE blocks are valid on a large filesystem block, 750 * then the entire page is valid. Since the page may be mapped, 751 * user programs might reference data beyond the actual end of file 752 * occuring within the page. We have to zero that data. 753 */ 754 VM_OBJECT_LOCK(mreq->object); 755 if (mreq->valid) { 756 if (mreq->valid != VM_PAGE_BITS_ALL) 757 vm_page_zero_invalid(mreq, TRUE); 758 vm_page_lock_queues(); 759 for (i = 0; i < pcount; i++) { 760 if (i != ap->a_reqpage) { 761 vm_page_free(ap->a_m[i]); 762 } 763 } 764 vm_page_unlock_queues(); 765 VM_OBJECT_UNLOCK(mreq->object); 766 return VM_PAGER_OK; 767 } 768 VM_OBJECT_UNLOCK(mreq->object); 769 770 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 771 ap->a_count, 772 ap->a_reqpage); 773 } 774 775 776 /* 777 * Extended attribute area reading. 778 */ 779 static int 780 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag) 781 { 782 struct inode *ip; 783 struct ufs2_dinode *dp; 784 struct fs *fs; 785 struct buf *bp; 786 ufs_lbn_t lbn, nextlbn; 787 off_t bytesinfile; 788 long size, xfersize, blkoffset; 789 int error, orig_resid; 790 791 GIANT_REQUIRED; 792 793 ip = VTOI(vp); 794 fs = ip->i_fs; 795 dp = ip->i_din2; 796 797 #ifdef DIAGNOSTIC 798 if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC) 799 panic("ffs_extread: mode"); 800 801 #endif 802 orig_resid = uio->uio_resid; 803 KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0")); 804 if (orig_resid == 0) 805 return (0); 806 KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0")); 807 808 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 809 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0) 810 break; 811 lbn = lblkno(fs, uio->uio_offset); 812 nextlbn = lbn + 1; 813 814 /* 815 * size of buffer. The buffer representing the 816 * end of the file is rounded up to the size of 817 * the block type ( fragment or full block, 818 * depending ). 819 */ 820 size = sblksize(fs, dp->di_extsize, lbn); 821 blkoffset = blkoff(fs, uio->uio_offset); 822 823 /* 824 * The amount we want to transfer in this iteration is 825 * one FS block less the amount of the data before 826 * our startpoint (duh!) 827 */ 828 xfersize = fs->fs_bsize - blkoffset; 829 830 /* 831 * But if we actually want less than the block, 832 * or the file doesn't have a whole block more of data, 833 * then use the lesser number. 834 */ 835 if (uio->uio_resid < xfersize) 836 xfersize = uio->uio_resid; 837 if (bytesinfile < xfersize) 838 xfersize = bytesinfile; 839 840 if (lblktosize(fs, nextlbn) >= dp->di_extsize) { 841 /* 842 * Don't do readahead if this is the end of the info. 843 */ 844 error = bread(vp, -1 - lbn, size, NOCRED, &bp); 845 } else { 846 /* 847 * If we have a second block, then 848 * fire off a request for a readahead 849 * as well as a read. Note that the 4th and 5th 850 * arguments point to arrays of the size specified in 851 * the 6th argument. 852 */ 853 int nextsize = sblksize(fs, dp->di_extsize, nextlbn); 854 855 nextlbn = -1 - nextlbn; 856 error = breadn(vp, -1 - lbn, 857 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 858 } 859 if (error) { 860 brelse(bp); 861 bp = NULL; 862 break; 863 } 864 865 /* 866 * If IO_DIRECT then set B_DIRECT for the buffer. This 867 * will cause us to attempt to release the buffer later on 868 * and will cause the buffer cache to attempt to free the 869 * underlying pages. 870 */ 871 if (ioflag & IO_DIRECT) 872 bp->b_flags |= B_DIRECT; 873 874 /* 875 * We should only get non-zero b_resid when an I/O error 876 * has occurred, which should cause us to break above. 877 * However, if the short read did not cause an error, 878 * then we want to ensure that we do not uiomove bad 879 * or uninitialized data. 880 */ 881 size -= bp->b_resid; 882 if (size < xfersize) { 883 if (size == 0) 884 break; 885 xfersize = size; 886 } 887 888 error = uiomove((char *)bp->b_data + blkoffset, 889 (int)xfersize, uio); 890 if (error) 891 break; 892 893 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 894 (LIST_FIRST(&bp->b_dep) == NULL)) { 895 /* 896 * If there are no dependencies, and it's VMIO, 897 * then we don't need the buf, mark it available 898 * for freeing. The VM has the data. 899 */ 900 bp->b_flags |= B_RELBUF; 901 brelse(bp); 902 } else { 903 /* 904 * Otherwise let whoever 905 * made the request take care of 906 * freeing it. We just queue 907 * it onto another list. 908 */ 909 bqrelse(bp); 910 } 911 } 912 913 /* 914 * This can only happen in the case of an error 915 * because the loop above resets bp to NULL on each iteration 916 * and on normal completion has not set a new value into it. 917 * so it must have come from a 'break' statement 918 */ 919 if (bp != NULL) { 920 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 921 (LIST_FIRST(&bp->b_dep) == NULL)) { 922 bp->b_flags |= B_RELBUF; 923 brelse(bp); 924 } else { 925 bqrelse(bp); 926 } 927 } 928 929 if ((error == 0 || uio->uio_resid != orig_resid) && 930 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 931 ip->i_flag |= IN_ACCESS; 932 return (error); 933 } 934 935 /* 936 * Extended attribute area writing. 937 */ 938 static int 939 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred) 940 { 941 struct inode *ip; 942 struct ufs2_dinode *dp; 943 struct fs *fs; 944 struct buf *bp; 945 ufs_lbn_t lbn; 946 off_t osize; 947 int blkoffset, error, flags, resid, size, xfersize; 948 949 GIANT_REQUIRED; 950 951 ip = VTOI(vp); 952 fs = ip->i_fs; 953 dp = ip->i_din2; 954 955 #ifdef DIAGNOSTIC 956 if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC) 957 panic("ffs_extwrite: mode"); 958 #endif 959 960 if (ioflag & IO_APPEND) 961 uio->uio_offset = dp->di_extsize; 962 KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0")); 963 KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0")); 964 if ((uoff_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize) 965 return (EFBIG); 966 967 resid = uio->uio_resid; 968 osize = dp->di_extsize; 969 flags = IO_EXT; 970 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 971 flags |= IO_SYNC; 972 973 for (error = 0; uio->uio_resid > 0;) { 974 lbn = lblkno(fs, uio->uio_offset); 975 blkoffset = blkoff(fs, uio->uio_offset); 976 xfersize = fs->fs_bsize - blkoffset; 977 if (uio->uio_resid < xfersize) 978 xfersize = uio->uio_resid; 979 980 /* 981 * We must perform a read-before-write if the transfer size 982 * does not cover the entire buffer. 983 */ 984 if (fs->fs_bsize > xfersize) 985 flags |= BA_CLRBUF; 986 else 987 flags &= ~BA_CLRBUF; 988 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 989 ucred, flags, &bp); 990 if (error != 0) 991 break; 992 /* 993 * If the buffer is not valid we have to clear out any 994 * garbage data from the pages instantiated for the buffer. 995 * If we do not, a failed uiomove() during a write can leave 996 * the prior contents of the pages exposed to a userland 997 * mmap(). XXX deal with uiomove() errors a better way. 998 */ 999 if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 1000 vfs_bio_clrbuf(bp); 1001 if (ioflag & IO_DIRECT) 1002 bp->b_flags |= B_DIRECT; 1003 1004 if (uio->uio_offset + xfersize > dp->di_extsize) 1005 dp->di_extsize = uio->uio_offset + xfersize; 1006 1007 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid; 1008 if (size < xfersize) 1009 xfersize = size; 1010 1011 error = 1012 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 1013 if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1014 (LIST_FIRST(&bp->b_dep) == NULL)) { 1015 bp->b_flags |= B_RELBUF; 1016 } 1017 1018 /* 1019 * If IO_SYNC each buffer is written synchronously. Otherwise 1020 * if we have a severe page deficiency write the buffer 1021 * asynchronously. Otherwise try to cluster, and if that 1022 * doesn't do it then either do an async write (if O_DIRECT), 1023 * or a delayed write (if not). 1024 */ 1025 if (ioflag & IO_SYNC) { 1026 (void)bwrite(bp); 1027 } else if (vm_page_count_severe() || 1028 buf_dirty_count_severe() || 1029 xfersize + blkoffset == fs->fs_bsize || 1030 (ioflag & (IO_ASYNC | IO_DIRECT))) 1031 bawrite(bp); 1032 else 1033 bdwrite(bp); 1034 if (error || xfersize == 0) 1035 break; 1036 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1037 } 1038 /* 1039 * If we successfully wrote any data, and we are not the superuser 1040 * we clear the setuid and setgid bits as a precaution against 1041 * tampering. 1042 */ 1043 if (resid > uio->uio_resid && ucred && 1044 suser_cred(ucred, SUSER_ALLOWJAIL)) { 1045 ip->i_mode &= ~(ISUID | ISGID); 1046 dp->di_mode = ip->i_mode; 1047 } 1048 if (error) { 1049 if (ioflag & IO_UNIT) { 1050 (void)UFS_TRUNCATE(vp, osize, 1051 IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td); 1052 uio->uio_offset -= resid - uio->uio_resid; 1053 uio->uio_resid = resid; 1054 } 1055 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 1056 error = UFS_UPDATE(vp, 1); 1057 return (error); 1058 } 1059 1060 1061 /* 1062 * Vnode operating to retrieve a named extended attribute. 1063 * 1064 * Locate a particular EA (nspace:name) in the area (ptr:length), and return 1065 * the length of the EA, and possibly the pointer to the entry and to the data. 1066 */ 1067 static int 1068 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name, u_char **eap, u_char **eac) 1069 { 1070 u_char *p, *pe, *pn, *p0; 1071 int eapad1, eapad2, ealength, ealen, nlen; 1072 uint32_t ul; 1073 1074 pe = ptr + length; 1075 nlen = strlen(name); 1076 1077 for (p = ptr; p < pe; p = pn) { 1078 p0 = p; 1079 bcopy(p, &ul, sizeof(ul)); 1080 pn = p + ul; 1081 /* make sure this entry is complete */ 1082 if (pn > pe) 1083 break; 1084 p += sizeof(uint32_t); 1085 if (*p != nspace) 1086 continue; 1087 p++; 1088 eapad2 = *p++; 1089 if (*p != nlen) 1090 continue; 1091 p++; 1092 if (bcmp(p, name, nlen)) 1093 continue; 1094 ealength = sizeof(uint32_t) + 3 + nlen; 1095 eapad1 = 8 - (ealength % 8); 1096 if (eapad1 == 8) 1097 eapad1 = 0; 1098 ealength += eapad1; 1099 ealen = ul - ealength - eapad2; 1100 p += nlen + eapad1; 1101 if (eap != NULL) 1102 *eap = p0; 1103 if (eac != NULL) 1104 *eac = p; 1105 return (ealen); 1106 } 1107 return(-1); 1108 } 1109 1110 static int 1111 ffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra) 1112 { 1113 struct inode *ip; 1114 struct ufs2_dinode *dp; 1115 struct uio luio; 1116 struct iovec liovec; 1117 int easize, error; 1118 u_char *eae; 1119 1120 ip = VTOI(vp); 1121 dp = ip->i_din2; 1122 easize = dp->di_extsize; 1123 1124 eae = malloc(easize + extra, M_TEMP, M_WAITOK); 1125 1126 liovec.iov_base = eae; 1127 liovec.iov_len = easize; 1128 luio.uio_iov = &liovec; 1129 luio.uio_iovcnt = 1; 1130 luio.uio_offset = 0; 1131 luio.uio_resid = easize; 1132 luio.uio_segflg = UIO_SYSSPACE; 1133 luio.uio_rw = UIO_READ; 1134 luio.uio_td = td; 1135 1136 error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC); 1137 if (error) { 1138 free(eae, M_TEMP); 1139 return(error); 1140 } 1141 *p = eae; 1142 return (0); 1143 } 1144 1145 static int 1146 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td) 1147 { 1148 struct inode *ip; 1149 struct ufs2_dinode *dp; 1150 int error; 1151 1152 ip = VTOI(vp); 1153 1154 if (ip->i_ea_area != NULL) 1155 return (EBUSY); 1156 dp = ip->i_din2; 1157 error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0); 1158 if (error) 1159 return (error); 1160 ip->i_ea_len = dp->di_extsize; 1161 ip->i_ea_error = 0; 1162 return (0); 1163 } 1164 1165 /* 1166 * Vnode extattr transaction commit/abort 1167 */ 1168 static int 1169 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td) 1170 { 1171 struct inode *ip; 1172 struct uio luio; 1173 struct iovec liovec; 1174 int error; 1175 struct ufs2_dinode *dp; 1176 1177 ip = VTOI(vp); 1178 if (ip->i_ea_area == NULL) 1179 return (EINVAL); 1180 dp = ip->i_din2; 1181 error = ip->i_ea_error; 1182 if (commit && error == 0) { 1183 if (cred == NOCRED) 1184 cred = vp->v_mount->mnt_cred; 1185 liovec.iov_base = ip->i_ea_area; 1186 liovec.iov_len = ip->i_ea_len; 1187 luio.uio_iov = &liovec; 1188 luio.uio_iovcnt = 1; 1189 luio.uio_offset = 0; 1190 luio.uio_resid = ip->i_ea_len; 1191 luio.uio_segflg = UIO_SYSSPACE; 1192 luio.uio_rw = UIO_WRITE; 1193 luio.uio_td = td; 1194 /* XXX: I'm not happy about truncating to zero size */ 1195 if (ip->i_ea_len < dp->di_extsize) 1196 error = ffs_truncate(vp, 0, IO_EXT, cred, td); 1197 error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred); 1198 } 1199 free(ip->i_ea_area, M_TEMP); 1200 ip->i_ea_area = NULL; 1201 ip->i_ea_len = 0; 1202 ip->i_ea_error = 0; 1203 return (error); 1204 } 1205 1206 /* 1207 * Vnode extattr strategy routine for fifos. 1208 * 1209 * We need to check for a read or write of the external attributes. 1210 * Otherwise we just fall through and do the usual thing. 1211 */ 1212 static int 1213 ffsext_strategy(struct vop_strategy_args *ap) 1214 /* 1215 struct vop_strategy_args { 1216 struct vnodeop_desc *a_desc; 1217 struct vnode *a_vp; 1218 struct buf *a_bp; 1219 }; 1220 */ 1221 { 1222 struct vnode *vp; 1223 daddr_t lbn; 1224 1225 vp = ap->a_vp; 1226 lbn = ap->a_bp->b_lblkno; 1227 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC && 1228 lbn < 0 && lbn >= -NXADDR) 1229 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1230 if (vp->v_type == VFIFO) 1231 return (ufs_vnoperatefifo((struct vop_generic_args *)ap)); 1232 panic("spec nodes went here"); 1233 } 1234 1235 /* 1236 * Vnode extattr transaction commit/abort 1237 */ 1238 static int 1239 ffs_openextattr(struct vop_openextattr_args *ap) 1240 /* 1241 struct vop_openextattr_args { 1242 struct vnodeop_desc *a_desc; 1243 struct vnode *a_vp; 1244 IN struct ucred *a_cred; 1245 IN struct thread *a_td; 1246 }; 1247 */ 1248 { 1249 struct inode *ip; 1250 struct fs *fs; 1251 1252 ip = VTOI(ap->a_vp); 1253 fs = ip->i_fs; 1254 if (fs->fs_magic == FS_UFS1_MAGIC) 1255 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1256 1257 if (ap->a_vp->v_type == VCHR) 1258 return (EOPNOTSUPP); 1259 1260 return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td)); 1261 } 1262 1263 1264 /* 1265 * Vnode extattr transaction commit/abort 1266 */ 1267 static int 1268 ffs_closeextattr(struct vop_closeextattr_args *ap) 1269 /* 1270 struct vop_closeextattr_args { 1271 struct vnodeop_desc *a_desc; 1272 struct vnode *a_vp; 1273 int a_commit; 1274 IN struct ucred *a_cred; 1275 IN struct thread *a_td; 1276 }; 1277 */ 1278 { 1279 struct inode *ip; 1280 struct fs *fs; 1281 1282 ip = VTOI(ap->a_vp); 1283 fs = ip->i_fs; 1284 if (fs->fs_magic == FS_UFS1_MAGIC) 1285 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1286 1287 if (ap->a_vp->v_type == VCHR) 1288 return (EOPNOTSUPP); 1289 1290 return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td)); 1291 } 1292 1293 /* 1294 * Vnode operation to remove a named attribute. 1295 */ 1296 static int 1297 ffs_deleteextattr(struct vop_deleteextattr_args *ap) 1298 /* 1299 vop_deleteextattr { 1300 IN struct vnode *a_vp; 1301 IN int a_attrnamespace; 1302 IN const char *a_name; 1303 IN struct ucred *a_cred; 1304 IN struct thread *a_td; 1305 }; 1306 */ 1307 { 1308 struct inode *ip; 1309 struct fs *fs; 1310 uint32_t ealength, ul; 1311 int ealen, olen, eapad1, eapad2, error, i, easize; 1312 u_char *eae, *p; 1313 int stand_alone; 1314 1315 ip = VTOI(ap->a_vp); 1316 fs = ip->i_fs; 1317 1318 if (fs->fs_magic == FS_UFS1_MAGIC) 1319 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1320 1321 if (ap->a_vp->v_type == VCHR) 1322 return (EOPNOTSUPP); 1323 1324 if (strlen(ap->a_name) == 0) 1325 return (EINVAL); 1326 1327 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1328 ap->a_cred, ap->a_td, IWRITE); 1329 if (error) { 1330 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1331 ip->i_ea_error = error; 1332 return (error); 1333 } 1334 1335 if (ip->i_ea_area == NULL) { 1336 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1337 if (error) 1338 return (error); 1339 stand_alone = 1; 1340 } else { 1341 stand_alone = 0; 1342 } 1343 1344 ealength = eapad1 = ealen = eapad2 = 0; 1345 1346 eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK); 1347 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1348 easize = ip->i_ea_len; 1349 1350 olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1351 &p, NULL); 1352 if (olen == -1) { 1353 /* delete but nonexistent */ 1354 free(eae, M_TEMP); 1355 if (stand_alone) 1356 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1357 return(ENOATTR); 1358 } 1359 bcopy(p, &ul, sizeof ul); 1360 i = p - eae + ul; 1361 if (ul != ealength) { 1362 bcopy(p + ul, p + ealength, easize - i); 1363 easize += (ealength - ul); 1364 } 1365 if (easize > NXADDR * fs->fs_bsize) { 1366 free(eae, M_TEMP); 1367 if (stand_alone) 1368 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1369 else if (ip->i_ea_error == 0) 1370 ip->i_ea_error = ENOSPC; 1371 return(ENOSPC); 1372 } 1373 p = ip->i_ea_area; 1374 ip->i_ea_area = eae; 1375 ip->i_ea_len = easize; 1376 free(p, M_TEMP); 1377 if (stand_alone) 1378 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1379 return(error); 1380 } 1381 1382 /* 1383 * Vnode operation to retrieve a named extended attribute. 1384 */ 1385 static int 1386 ffs_getextattr(struct vop_getextattr_args *ap) 1387 /* 1388 vop_getextattr { 1389 IN struct vnode *a_vp; 1390 IN int a_attrnamespace; 1391 IN const char *a_name; 1392 INOUT struct uio *a_uio; 1393 OUT size_t *a_size; 1394 IN struct ucred *a_cred; 1395 IN struct thread *a_td; 1396 }; 1397 */ 1398 { 1399 struct inode *ip; 1400 struct fs *fs; 1401 u_char *eae, *p; 1402 unsigned easize; 1403 int error, ealen, stand_alone; 1404 1405 ip = VTOI(ap->a_vp); 1406 fs = ip->i_fs; 1407 1408 if (fs->fs_magic == FS_UFS1_MAGIC) 1409 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1410 1411 if (ap->a_vp->v_type == VCHR) 1412 return (EOPNOTSUPP); 1413 1414 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1415 ap->a_cred, ap->a_td, IREAD); 1416 if (error) 1417 return (error); 1418 1419 if (ip->i_ea_area == NULL) { 1420 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1421 if (error) 1422 return (error); 1423 stand_alone = 1; 1424 } else { 1425 stand_alone = 0; 1426 } 1427 eae = ip->i_ea_area; 1428 easize = ip->i_ea_len; 1429 1430 ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1431 NULL, &p); 1432 if (ealen >= 0) { 1433 error = 0; 1434 if (ap->a_size != NULL) 1435 *ap->a_size = ealen; 1436 else if (ap->a_uio != NULL) 1437 error = uiomove(p, ealen, ap->a_uio); 1438 } else 1439 error = ENOATTR; 1440 if (stand_alone) 1441 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1442 return(error); 1443 } 1444 1445 /* 1446 * Vnode operation to retrieve extended attributes on a vnode. 1447 */ 1448 static int 1449 ffs_listextattr(struct vop_listextattr_args *ap) 1450 /* 1451 vop_listextattr { 1452 IN struct vnode *a_vp; 1453 IN int a_attrnamespace; 1454 INOUT struct uio *a_uio; 1455 OUT size_t *a_size; 1456 IN struct ucred *a_cred; 1457 IN struct thread *a_td; 1458 }; 1459 */ 1460 { 1461 struct inode *ip; 1462 struct fs *fs; 1463 u_char *eae, *p, *pe, *pn; 1464 unsigned easize; 1465 uint32_t ul; 1466 int error, ealen, stand_alone; 1467 1468 ip = VTOI(ap->a_vp); 1469 fs = ip->i_fs; 1470 1471 if (fs->fs_magic == FS_UFS1_MAGIC) 1472 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1473 1474 if (ap->a_vp->v_type == VCHR) 1475 return (EOPNOTSUPP); 1476 1477 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1478 ap->a_cred, ap->a_td, IREAD); 1479 if (error) 1480 return (error); 1481 1482 if (ip->i_ea_area == NULL) { 1483 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1484 if (error) 1485 return (error); 1486 stand_alone = 1; 1487 } else { 1488 stand_alone = 0; 1489 } 1490 eae = ip->i_ea_area; 1491 easize = ip->i_ea_len; 1492 1493 error = 0; 1494 if (ap->a_size != NULL) 1495 *ap->a_size = 0; 1496 pe = eae + easize; 1497 for(p = eae; error == 0 && p < pe; p = pn) { 1498 bcopy(p, &ul, sizeof(ul)); 1499 pn = p + ul; 1500 if (pn > pe) 1501 break; 1502 p += sizeof(ul); 1503 if (*p++ != ap->a_attrnamespace) 1504 continue; 1505 p++; /* pad2 */ 1506 ealen = *p; 1507 if (ap->a_size != NULL) { 1508 *ap->a_size += ealen + 1; 1509 } else if (ap->a_uio != NULL) { 1510 error = uiomove(p, ealen + 1, ap->a_uio); 1511 } 1512 } 1513 if (stand_alone) 1514 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1515 return(error); 1516 } 1517 1518 /* 1519 * Vnode operation to set a named attribute. 1520 */ 1521 static int 1522 ffs_setextattr(struct vop_setextattr_args *ap) 1523 /* 1524 vop_setextattr { 1525 IN struct vnode *a_vp; 1526 IN int a_attrnamespace; 1527 IN const char *a_name; 1528 INOUT struct uio *a_uio; 1529 IN struct ucred *a_cred; 1530 IN struct thread *a_td; 1531 }; 1532 */ 1533 { 1534 struct inode *ip; 1535 struct fs *fs; 1536 uint32_t ealength, ul; 1537 int ealen, olen, eapad1, eapad2, error, i, easize; 1538 u_char *eae, *p; 1539 int stand_alone; 1540 1541 ip = VTOI(ap->a_vp); 1542 fs = ip->i_fs; 1543 1544 if (fs->fs_magic == FS_UFS1_MAGIC) 1545 return (ufs_vnoperate((struct vop_generic_args *)ap)); 1546 1547 if (ap->a_vp->v_type == VCHR) 1548 return (EOPNOTSUPP); 1549 1550 if (strlen(ap->a_name) == 0) 1551 return (EINVAL); 1552 1553 /* XXX Now unsupported API to delete EAs using NULL uio. */ 1554 if (ap->a_uio == NULL) 1555 return (EOPNOTSUPP); 1556 1557 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1558 ap->a_cred, ap->a_td, IWRITE); 1559 if (error) { 1560 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1561 ip->i_ea_error = error; 1562 return (error); 1563 } 1564 1565 if (ip->i_ea_area == NULL) { 1566 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1567 if (error) 1568 return (error); 1569 stand_alone = 1; 1570 } else { 1571 stand_alone = 0; 1572 } 1573 1574 ealen = ap->a_uio->uio_resid; 1575 ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name); 1576 eapad1 = 8 - (ealength % 8); 1577 if (eapad1 == 8) 1578 eapad1 = 0; 1579 eapad2 = 8 - (ealen % 8); 1580 if (eapad2 == 8) 1581 eapad2 = 0; 1582 ealength += eapad1 + ealen + eapad2; 1583 1584 eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK); 1585 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1586 easize = ip->i_ea_len; 1587 1588 olen = ffs_findextattr(eae, easize, 1589 ap->a_attrnamespace, ap->a_name, &p, NULL); 1590 if (olen == -1) { 1591 /* new, append at end */ 1592 p = eae + easize; 1593 easize += ealength; 1594 } else { 1595 bcopy(p, &ul, sizeof ul); 1596 i = p - eae + ul; 1597 if (ul != ealength) { 1598 bcopy(p + ul, p + ealength, easize - i); 1599 easize += (ealength - ul); 1600 } 1601 } 1602 if (easize > NXADDR * fs->fs_bsize) { 1603 free(eae, M_TEMP); 1604 if (stand_alone) 1605 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1606 else if (ip->i_ea_error == 0) 1607 ip->i_ea_error = ENOSPC; 1608 return(ENOSPC); 1609 } 1610 bcopy(&ealength, p, sizeof(ealength)); 1611 p += sizeof(ealength); 1612 *p++ = ap->a_attrnamespace; 1613 *p++ = eapad2; 1614 *p++ = strlen(ap->a_name); 1615 strcpy(p, ap->a_name); 1616 p += strlen(ap->a_name); 1617 bzero(p, eapad1); 1618 p += eapad1; 1619 error = uiomove(p, ealen, ap->a_uio); 1620 if (error) { 1621 free(eae, M_TEMP); 1622 if (stand_alone) 1623 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1624 else if (ip->i_ea_error == 0) 1625 ip->i_ea_error = error; 1626 return(error); 1627 } 1628 p += ealen; 1629 bzero(p, eapad2); 1630 1631 p = ip->i_ea_area; 1632 ip->i_ea_area = eae; 1633 ip->i_ea_len = easize; 1634 free(p, M_TEMP); 1635 if (stand_alone) 1636 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1637 return(error); 1638 } 1639