1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/t_lock.h> 30 #include <sys/param.h> 31 #include <sys/time.h> 32 #include <sys/systm.h> 33 #include <sys/sysmacros.h> 34 #include <sys/resource.h> 35 #include <sys/signal.h> 36 #include <sys/cred.h> 37 #include <sys/user.h> 38 #include <sys/buf.h> 39 #include <sys/vfs.h> 40 #include <sys/vfs_opreg.h> 41 #include <sys/stat.h> 42 #include <sys/vnode.h> 43 #include <sys/mode.h> 44 #include <sys/proc.h> 45 #include <sys/disp.h> 46 #include <sys/file.h> 47 #include <sys/fcntl.h> 48 #include <sys/flock.h> 49 #include <sys/kmem.h> 50 #include <sys/uio.h> 51 #include <sys/dnlc.h> 52 #include <sys/conf.h> 53 #include <sys/errno.h> 54 #include <sys/mman.h> 55 #include <sys/fbuf.h> 56 #include <sys/pathname.h> 57 #include <sys/debug.h> 58 #include <sys/vmsystm.h> 59 #include <sys/cmn_err.h> 60 #include <sys/dirent.h> 61 #include <sys/errno.h> 62 #include <sys/modctl.h> 63 #include <sys/statvfs.h> 64 #include <sys/mount.h> 65 #include <sys/sunddi.h> 66 #include <sys/bootconf.h> 67 #include <sys/policy.h> 68 69 #include <vm/hat.h> 70 #include <vm/page.h> 71 #include <vm/pvn.h> 72 #include <vm/as.h> 73 #include <vm/seg.h> 74 #include <vm/seg_map.h> 75 #include <vm/seg_kmem.h> 76 #include <vm/seg_vn.h> 77 #include <vm/rm.h> 78 #include <vm/page.h> 79 #include <sys/swap.h> 80 81 #include <fs/fs_subr.h> 82 83 #include <sys/fs/udf_volume.h> 84 #include <sys/fs/udf_inode.h> 85 86 static int32_t udf_open(struct vnode **, 87 int32_t, struct cred *); 88 static int32_t udf_close(struct vnode *, 89 int32_t, int32_t, offset_t, struct cred *); 90 static int32_t udf_read(struct vnode *, 91 struct uio *, int32_t, struct cred *, struct caller_context *); 92 static int32_t udf_write(struct vnode *, 93 struct uio *, int32_t, struct cred *, struct caller_context *); 94 static int32_t udf_ioctl(struct vnode *, 95 int32_t, intptr_t, int32_t, struct cred *, int32_t *); 96 static int32_t udf_getattr(struct vnode *, 97 struct vattr *, int32_t, struct cred *); 98 static int32_t udf_setattr(struct vnode *, 99 struct vattr *, int32_t, struct cred *, caller_context_t *); 100 static int32_t udf_access(struct vnode *, 101 int32_t, int32_t, struct cred *); 102 static int32_t udf_lookup(struct vnode *, 103 char *, struct vnode **, struct pathname *, 104 int32_t, struct vnode *, struct cred *); 105 static int32_t udf_create(struct vnode *, 106 char *, struct vattr *, enum vcexcl, 107 int32_t, struct vnode **, struct cred *, int32_t); 108 static int32_t udf_remove(struct vnode *, 109 char *, struct cred *); 110 static int32_t udf_link(struct vnode *, 111 struct vnode *, char *, struct cred *); 112 static int32_t udf_rename(struct vnode *, 113 char *, struct vnode *, char *, struct cred *); 114 static int32_t udf_mkdir(struct vnode *, 115 char *, struct vattr *, struct vnode **, struct cred *); 116 static int32_t udf_rmdir(struct vnode *, 117 char *, struct vnode *, struct cred *); 118 static int32_t udf_readdir(struct vnode *, 119 struct uio *, struct cred *, int32_t *); 120 static int32_t udf_symlink(struct vnode *, 121 char *, struct vattr *, char *, struct cred *); 122 static int32_t udf_readlink(struct vnode *, 123 struct uio *, struct cred *); 124 static int32_t udf_fsync(struct vnode *, 125 int32_t, struct cred *); 126 static void udf_inactive(struct vnode *, 127 struct cred *); 128 static int32_t udf_fid(struct vnode *, struct fid *); 129 static int udf_rwlock(struct vnode *, int32_t, caller_context_t *); 130 static void udf_rwunlock(struct vnode *, int32_t, caller_context_t *); 131 static int32_t udf_seek(struct vnode *, offset_t, offset_t *); 132 static int32_t udf_frlock(struct vnode *, int32_t, 133 struct flock64 *, int32_t, offset_t, struct flk_callback *, cred_t *); 134 static int32_t udf_space(struct vnode *, int32_t, 135 struct flock64 *, int32_t, offset_t, cred_t *, caller_context_t *); 136 static int32_t udf_getpage(struct vnode *, offset_t, 137 size_t, uint32_t *, struct page **, size_t, 138 struct seg *, caddr_t, enum seg_rw, struct cred *); 139 static int32_t udf_putpage(struct vnode *, offset_t, 140 size_t, int32_t, struct cred *); 141 static int32_t udf_map(struct vnode *, offset_t, struct as *, 142 caddr_t *, size_t, uint8_t, uint8_t, uint32_t, struct cred *); 143 static int32_t udf_addmap(struct vnode *, offset_t, struct as *, 144 caddr_t, size_t, uint8_t, uint8_t, uint32_t, struct cred *); 145 static int32_t udf_delmap(struct vnode *, offset_t, struct as *, 146 caddr_t, size_t, uint32_t, uint32_t, uint32_t, struct cred *); 147 static int32_t udf_l_pathconf(struct vnode *, int32_t, 148 ulong_t *, struct cred *); 149 static int32_t udf_pageio(struct vnode *, struct page *, 150 u_offset_t, size_t, int32_t, struct cred *); 151 152 int32_t ud_getpage_miss(struct vnode *, u_offset_t, 153 size_t, struct seg *, caddr_t, page_t *pl[], 154 size_t, enum seg_rw, int32_t); 155 void ud_getpage_ra(struct vnode *, u_offset_t, struct seg *, caddr_t); 156 int32_t ud_putpages(struct vnode *, offset_t, size_t, int32_t, struct cred *); 157 int32_t ud_page_fill(struct ud_inode *, page_t *, 158 u_offset_t, uint32_t, u_offset_t *); 159 int32_t ud_iodone(struct buf *); 160 int32_t ud_rdip(struct ud_inode *, struct uio *, int32_t, cred_t *); 161 int32_t ud_wrip(struct ud_inode *, struct uio *, int32_t, cred_t *); 162 int32_t ud_multi_strat(struct ud_inode *, page_t *, struct buf *, u_offset_t); 163 int32_t ud_slave_done(struct buf *); 164 165 /* 166 * Structures to control multiple IO operations to get or put pages 167 * that are backed by discontiguous blocks. The master struct is 168 * a dummy that holds the original bp from pageio_setup. The 169 * slave struct holds the working bp's to do the actual IO. Once 170 * all the slave IOs complete. The master is processed as if a single 171 * IO op has completed. 172 */ 173 uint32_t master_index = 0; 174 typedef struct mio_master { 175 kmutex_t mm_mutex; /* protect the fields below */ 176 int32_t mm_size; 177 buf_t *mm_bp; /* original bp */ 178 int32_t mm_resid; /* bytes remaining to transfer */ 179 int32_t mm_error; /* accumulated error from slaves */ 180 int32_t mm_index; /* XXX debugging */ 181 } mio_master_t; 182 183 typedef struct mio_slave { 184 buf_t ms_buf; /* working buffer for this IO chunk */ 185 mio_master_t *ms_ptr; /* pointer to master */ 186 } mio_slave_t; 187 188 struct vnodeops *udf_vnodeops; 189 190 const fs_operation_def_t udf_vnodeops_template[] = { 191 VOPNAME_OPEN, { .vop_open = udf_open }, 192 VOPNAME_CLOSE, { .vop_close = udf_close }, 193 VOPNAME_READ, { .vop_read = udf_read }, 194 VOPNAME_WRITE, { .vop_write = udf_write }, 195 VOPNAME_IOCTL, { .vop_ioctl = udf_ioctl }, 196 VOPNAME_GETATTR, { .vop_getattr = udf_getattr }, 197 VOPNAME_SETATTR, { .vop_setattr = udf_setattr }, 198 VOPNAME_ACCESS, { .vop_access = udf_access }, 199 VOPNAME_LOOKUP, { .vop_lookup = udf_lookup }, 200 VOPNAME_CREATE, { .vop_create = udf_create }, 201 VOPNAME_REMOVE, { .vop_remove = udf_remove }, 202 VOPNAME_LINK, { .vop_link = udf_link }, 203 VOPNAME_RENAME, { .vop_rename = udf_rename }, 204 VOPNAME_MKDIR, { .vop_mkdir = udf_mkdir }, 205 VOPNAME_RMDIR, { .vop_rmdir = udf_rmdir }, 206 VOPNAME_READDIR, { .vop_readdir = udf_readdir }, 207 VOPNAME_SYMLINK, { .vop_symlink = udf_symlink }, 208 VOPNAME_READLINK, { .vop_readlink = udf_readlink }, 209 VOPNAME_FSYNC, { .vop_fsync = udf_fsync }, 210 VOPNAME_INACTIVE, { .vop_inactive = udf_inactive }, 211 VOPNAME_FID, { .vop_fid = udf_fid }, 212 VOPNAME_RWLOCK, { .vop_rwlock = udf_rwlock }, 213 VOPNAME_RWUNLOCK, { .vop_rwunlock = udf_rwunlock }, 214 VOPNAME_SEEK, { .vop_seek = udf_seek }, 215 VOPNAME_FRLOCK, { .vop_frlock = udf_frlock }, 216 VOPNAME_SPACE, { .vop_space = udf_space }, 217 VOPNAME_GETPAGE, { .vop_getpage = udf_getpage }, 218 VOPNAME_PUTPAGE, { .vop_putpage = udf_putpage }, 219 VOPNAME_MAP, { .vop_map = udf_map }, 220 VOPNAME_ADDMAP, { .vop_addmap = udf_addmap }, 221 VOPNAME_DELMAP, { .vop_delmap = udf_delmap }, 222 VOPNAME_PATHCONF, { .vop_pathconf = udf_l_pathconf }, 223 VOPNAME_PAGEIO, { .vop_pageio = udf_pageio }, 224 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 225 NULL, NULL 226 }; 227 228 /* ARGSUSED */ 229 static int32_t 230 udf_open(struct vnode **vpp, int32_t flag, struct cred *cr) 231 { 232 ud_printf("udf_open\n"); 233 234 return (0); 235 } 236 237 /* ARGSUSED */ 238 static int32_t 239 udf_close(struct vnode *vp, int32_t flag, 240 int32_t count, offset_t offset, struct cred *cr) 241 { 242 struct ud_inode *ip = VTOI(vp); 243 244 ud_printf("udf_close\n"); 245 246 ITIMES(ip); 247 248 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 249 cleanshares(vp, ttoproc(curthread)->p_pid); 250 251 /* 252 * Push partially filled cluster at last close. 253 * ``last close'' is approximated because the dnlc 254 * may have a hold on the vnode. 255 */ 256 if (vp->v_count <= 2 && vp->v_type != VBAD) { 257 struct ud_inode *ip = VTOI(vp); 258 if (ip->i_delaylen) { 259 (void) ud_putpages(vp, ip->i_delayoff, ip->i_delaylen, 260 B_ASYNC | B_FREE, cr); 261 ip->i_delaylen = 0; 262 } 263 } 264 265 return (0); 266 } 267 268 static int32_t 269 udf_read(struct vnode *vp, struct uio *uiop, 270 int32_t ioflag, struct cred *cr, struct caller_context *ct) 271 { 272 struct ud_inode *ip = VTOI(vp); 273 int32_t error; 274 275 ud_printf("udf_read\n"); 276 277 #ifdef __lock_lint 278 rw_enter(&ip->i_rwlock, RW_READER); 279 #endif 280 281 ASSERT(RW_READ_HELD(&ip->i_rwlock)); 282 283 if (MANDLOCK(vp, ip->i_char)) { 284 /* 285 * udf_getattr ends up being called by chklock 286 */ 287 error = chklock(vp, FREAD, uiop->uio_loffset, 288 uiop->uio_resid, uiop->uio_fmode, ct); 289 if (error) { 290 goto end; 291 } 292 } 293 294 rw_enter(&ip->i_contents, RW_READER); 295 error = ud_rdip(ip, uiop, ioflag, cr); 296 rw_exit(&ip->i_contents); 297 298 end: 299 #ifdef __lock_lint 300 rw_exit(&ip->i_rwlock); 301 #endif 302 303 return (error); 304 } 305 306 307 int32_t ud_WRITES = 1; 308 int32_t ud_HW = 96 * 1024; 309 int32_t ud_LW = 64 * 1024; 310 int32_t ud_throttles = 0; 311 312 static int32_t 313 udf_write(struct vnode *vp, struct uio *uiop, 314 int32_t ioflag, struct cred *cr, struct caller_context *ct) 315 { 316 struct ud_inode *ip = VTOI(vp); 317 int32_t error = 0; 318 319 ud_printf("udf_write\n"); 320 321 #ifdef __lock_lint 322 rw_enter(&ip->i_rwlock, RW_WRITER); 323 #endif 324 325 ASSERT(RW_WRITE_HELD(&ip->i_rwlock)); 326 327 if (MANDLOCK(vp, ip->i_char)) { 328 /* 329 * ud_getattr ends up being called by chklock 330 */ 331 error = chklock(vp, FWRITE, uiop->uio_loffset, 332 uiop->uio_resid, uiop->uio_fmode, ct); 333 if (error) { 334 goto end; 335 } 336 } 337 /* 338 * Throttle writes. 339 */ 340 mutex_enter(&ip->i_tlock); 341 if (ud_WRITES && (ip->i_writes > ud_HW)) { 342 while (ip->i_writes > ud_HW) { 343 ud_throttles++; 344 cv_wait(&ip->i_wrcv, &ip->i_tlock); 345 } 346 } 347 mutex_exit(&ip->i_tlock); 348 349 /* 350 * Write to the file 351 */ 352 rw_enter(&ip->i_contents, RW_WRITER); 353 if ((ioflag & FAPPEND) != 0 && (ip->i_type == VREG)) { 354 /* 355 * In append mode start at end of file. 356 */ 357 uiop->uio_loffset = ip->i_size; 358 } 359 error = ud_wrip(ip, uiop, ioflag, cr); 360 rw_exit(&ip->i_contents); 361 362 end: 363 #ifdef __lock_lint 364 rw_exit(&ip->i_rwlock); 365 #endif 366 367 return (error); 368 } 369 370 /* ARGSUSED */ 371 static int32_t 372 udf_ioctl(struct vnode *vp, int32_t cmd, intptr_t arg, 373 int32_t flag, struct cred *cr, int32_t *rvalp) 374 { 375 return (ENOTTY); 376 } 377 378 /* ARGSUSED */ 379 static int32_t 380 udf_getattr(struct vnode *vp, 381 struct vattr *vap, int32_t flags, struct cred *cr) 382 { 383 struct ud_inode *ip = VTOI(vp); 384 385 ud_printf("udf_getattr\n"); 386 387 if (vap->va_mask == AT_SIZE) { 388 /* 389 * for performance, if only the size is requested don't bother 390 * with anything else. 391 */ 392 vap->va_size = ip->i_size; 393 return (0); 394 } 395 396 rw_enter(&ip->i_contents, RW_READER); 397 398 vap->va_type = vp->v_type; 399 vap->va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char; 400 401 vap->va_uid = ip->i_uid; 402 vap->va_gid = ip->i_gid; 403 vap->va_fsid = ip->i_dev; 404 vap->va_nodeid = ip->i_icb_lbano; 405 vap->va_nlink = ip->i_nlink; 406 vap->va_size = ip->i_size; 407 vap->va_seq = ip->i_seq; 408 if (vp->v_type == VCHR || vp->v_type == VBLK) { 409 vap->va_rdev = ip->i_rdev; 410 } else { 411 vap->va_rdev = 0; 412 } 413 414 mutex_enter(&ip->i_tlock); 415 ITIMES_NOLOCK(ip); /* mark correct time in inode */ 416 vap->va_atime.tv_sec = (time_t)ip->i_atime.tv_sec; 417 vap->va_atime.tv_nsec = ip->i_atime.tv_nsec; 418 vap->va_mtime.tv_sec = (time_t)ip->i_mtime.tv_sec; 419 vap->va_mtime.tv_nsec = ip->i_mtime.tv_nsec; 420 vap->va_ctime.tv_sec = (time_t)ip->i_ctime.tv_sec; 421 vap->va_ctime.tv_nsec = ip->i_ctime.tv_nsec; 422 mutex_exit(&ip->i_tlock); 423 424 switch (ip->i_type) { 425 case VBLK: 426 vap->va_blksize = MAXBSIZE; 427 break; 428 case VCHR: 429 vap->va_blksize = MAXBSIZE; 430 break; 431 default: 432 vap->va_blksize = ip->i_udf->udf_lbsize; 433 break; 434 } 435 vap->va_nblocks = ip->i_lbr << ip->i_udf->udf_l2d_shift; 436 437 rw_exit(&ip->i_contents); 438 439 return (0); 440 } 441 442 static int 443 ud_iaccess_vmode(void *ip, int mode, struct cred *cr) 444 { 445 return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr)); 446 } 447 448 /*ARGSUSED4*/ 449 static int32_t 450 udf_setattr( 451 struct vnode *vp, 452 struct vattr *vap, 453 int32_t flags, 454 struct cred *cr, 455 caller_context_t *ct) 456 { 457 int32_t error = 0; 458 uint32_t mask = vap->va_mask; 459 struct ud_inode *ip; 460 timestruc_t now; 461 struct vattr ovap; 462 463 ud_printf("udf_setattr\n"); 464 465 ip = VTOI(vp); 466 467 /* 468 * not updates allowed to 4096 files 469 */ 470 if (ip->i_astrat == STRAT_TYPE4096) { 471 return (EINVAL); 472 } 473 474 /* 475 * Cannot set these attributes 476 */ 477 if (mask & AT_NOSET) { 478 return (EINVAL); 479 } 480 481 rw_enter(&ip->i_rwlock, RW_WRITER); 482 rw_enter(&ip->i_contents, RW_WRITER); 483 484 ovap.va_uid = ip->i_uid; 485 ovap.va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char; 486 error = secpolicy_vnode_setattr(cr, vp, vap, &ovap, flags, 487 ud_iaccess_vmode, ip); 488 if (error) 489 goto update_inode; 490 491 mask = vap->va_mask; 492 /* 493 * Change file access modes. 494 */ 495 if (mask & AT_MODE) { 496 ip->i_perm = VA2UD_PERM(vap->va_mode); 497 ip->i_char = vap->va_mode & (VSUID | VSGID | VSVTX); 498 mutex_enter(&ip->i_tlock); 499 ip->i_flag |= ICHG; 500 mutex_exit(&ip->i_tlock); 501 } 502 if (mask & (AT_UID|AT_GID)) { 503 if (mask & AT_UID) { 504 ip->i_uid = vap->va_uid; 505 } 506 if (mask & AT_GID) { 507 ip->i_gid = vap->va_gid; 508 } 509 mutex_enter(&ip->i_tlock); 510 ip->i_flag |= ICHG; 511 mutex_exit(&ip->i_tlock); 512 } 513 /* 514 * Truncate file. Must have write permission and not be a directory. 515 */ 516 if (mask & AT_SIZE) { 517 if (vp->v_type == VDIR) { 518 error = EISDIR; 519 goto update_inode; 520 } 521 if (error = ud_iaccess(ip, IWRITE, cr)) { 522 goto update_inode; 523 } 524 if (vap->va_size > MAXOFFSET_T) { 525 error = EFBIG; 526 goto update_inode; 527 } 528 if (error = ud_itrunc(ip, vap->va_size, 0, cr)) { 529 goto update_inode; 530 } 531 } 532 /* 533 * Change file access or modified times. 534 */ 535 if (mask & (AT_ATIME|AT_MTIME)) { 536 mutex_enter(&ip->i_tlock); 537 if (mask & AT_ATIME) { 538 ip->i_atime.tv_sec = vap->va_atime.tv_sec; 539 ip->i_atime.tv_nsec = vap->va_atime.tv_nsec; 540 ip->i_flag &= ~IACC; 541 } 542 if (mask & AT_MTIME) { 543 ip->i_mtime.tv_sec = vap->va_mtime.tv_sec; 544 ip->i_mtime.tv_nsec = vap->va_mtime.tv_nsec; 545 gethrestime(&now); 546 ip->i_ctime.tv_sec = now.tv_sec; 547 ip->i_ctime.tv_nsec = now.tv_nsec; 548 ip->i_flag &= ~(IUPD|ICHG); 549 ip->i_flag |= IMODTIME; 550 } 551 ip->i_flag |= IMOD; 552 mutex_exit(&ip->i_tlock); 553 } 554 555 update_inode: 556 if (curthread->t_flag & T_DONTPEND) { 557 ud_iupdat(ip, 1); 558 } else { 559 ITIMES_NOLOCK(ip); 560 } 561 rw_exit(&ip->i_contents); 562 rw_exit(&ip->i_rwlock); 563 564 return (error); 565 } 566 567 /* ARGSUSED */ 568 static int32_t 569 udf_access(struct vnode *vp, 570 int32_t mode, int32_t flags, struct cred *cr) 571 { 572 struct ud_inode *ip = VTOI(vp); 573 int32_t error; 574 575 ud_printf("udf_access\n"); 576 577 if (ip->i_udf == NULL) { 578 return (EIO); 579 } 580 581 error = ud_iaccess(ip, UD_UPERM2DPERM(mode), cr); 582 583 return (error); 584 } 585 586 int32_t udfs_stickyhack = 1; 587 588 /* ARGSUSED */ 589 static int32_t 590 udf_lookup(struct vnode *dvp, 591 char *nm, struct vnode **vpp, struct pathname *pnp, 592 int32_t flags, struct vnode *rdir, struct cred *cr) 593 { 594 int32_t error; 595 struct vnode *vp; 596 struct ud_inode *ip, *xip; 597 598 ud_printf("udf_lookup\n"); 599 /* 600 * Null component name is a synonym for directory being searched. 601 */ 602 if (*nm == '\0') { 603 VN_HOLD(dvp); 604 *vpp = dvp; 605 error = 0; 606 goto out; 607 } 608 609 /* 610 * Fast path: Check the directory name lookup cache. 611 */ 612 ip = VTOI(dvp); 613 if (vp = dnlc_lookup(dvp, nm)) { 614 /* 615 * Check accessibility of directory. 616 */ 617 if ((error = ud_iaccess(ip, IEXEC, cr)) != 0) { 618 VN_RELE(vp); 619 } 620 xip = VTOI(vp); 621 } else { 622 error = ud_dirlook(ip, nm, &xip, cr, 1); 623 ITIMES(ip); 624 } 625 626 if (error == 0) { 627 ip = xip; 628 *vpp = ITOV(ip); 629 if ((ip->i_type != VDIR) && 630 (ip->i_char & ISVTX) && 631 ((ip->i_perm & IEXEC) == 0) && 632 udfs_stickyhack) { 633 mutex_enter(&(*vpp)->v_lock); 634 (*vpp)->v_flag |= VISSWAP; 635 mutex_exit(&(*vpp)->v_lock); 636 } 637 ITIMES(ip); 638 /* 639 * If vnode is a device return special vnode instead. 640 */ 641 if (IS_DEVVP(*vpp)) { 642 struct vnode *newvp; 643 newvp = specvp(*vpp, (*vpp)->v_rdev, 644 (*vpp)->v_type, cr); 645 VN_RELE(*vpp); 646 if (newvp == NULL) { 647 error = ENOSYS; 648 } else { 649 *vpp = newvp; 650 } 651 } 652 } 653 out: 654 return (error); 655 } 656 657 /* ARGSUSED */ 658 static int32_t 659 udf_create(struct vnode *dvp, 660 char *name, struct vattr *vap, enum vcexcl excl, 661 int32_t mode, struct vnode **vpp, struct cred *cr, int32_t flag) 662 { 663 int32_t error; 664 struct ud_inode *ip = VTOI(dvp), *xip; 665 666 ud_printf("udf_create\n"); 667 668 if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0) 669 vap->va_mode &= ~VSVTX; 670 671 if (*name == '\0') { 672 /* 673 * Null component name refers to the directory itself. 674 */ 675 VN_HOLD(dvp); 676 ITIMES(ip); 677 error = EEXIST; 678 } else { 679 xip = NULL; 680 rw_enter(&ip->i_rwlock, RW_WRITER); 681 error = ud_direnter(ip, name, DE_CREATE, 682 (struct ud_inode *)0, (struct ud_inode *)0, 683 vap, &xip, cr); 684 rw_exit(&ip->i_rwlock); 685 ITIMES(ip); 686 ip = xip; 687 } 688 #ifdef __lock_lint 689 rw_enter(&ip->i_contents, RW_WRITER); 690 #else 691 if (ip != NULL) { 692 rw_enter(&ip->i_contents, RW_WRITER); 693 } 694 #endif 695 696 /* 697 * If the file already exists and this is a non-exclusive create, 698 * check permissions and allow access for non-directories. 699 * Read-only create of an existing directory is also allowed. 700 * We fail an exclusive create of anything which already exists. 701 */ 702 if (error == EEXIST) { 703 if (excl == NONEXCL) { 704 if ((ip->i_type == VDIR) && (mode & VWRITE)) { 705 error = EISDIR; 706 } else if (mode) { 707 error = ud_iaccess(ip, 708 UD_UPERM2DPERM(mode), cr); 709 } else { 710 error = 0; 711 } 712 } 713 if (error) { 714 rw_exit(&ip->i_contents); 715 VN_RELE(ITOV(ip)); 716 goto out; 717 } else if ((ip->i_type == VREG) && 718 (vap->va_mask & AT_SIZE) && vap->va_size == 0) { 719 /* 720 * Truncate regular files, if requested by caller. 721 * Grab i_rwlock to make sure no one else is 722 * currently writing to the file (we promised 723 * bmap we would do this). 724 * Must get the locks in the correct order. 725 */ 726 if (ip->i_size == 0) { 727 ip->i_flag |= ICHG | IUPD; 728 } else { 729 rw_exit(&ip->i_contents); 730 rw_enter(&ip->i_rwlock, RW_WRITER); 731 rw_enter(&ip->i_contents, RW_WRITER); 732 (void) ud_itrunc(ip, 0, 0, cr); 733 rw_exit(&ip->i_rwlock); 734 } 735 } 736 } 737 738 if (error == 0) { 739 *vpp = ITOV(ip); 740 ITIMES(ip); 741 } 742 #ifdef __lock_lint 743 rw_exit(&ip->i_contents); 744 #else 745 if (ip != NULL) { 746 rw_exit(&ip->i_contents); 747 } 748 #endif 749 if (error) { 750 goto out; 751 } 752 753 /* 754 * If vnode is a device return special vnode instead. 755 */ 756 if (!error && IS_DEVVP(*vpp)) { 757 struct vnode *newvp; 758 759 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 760 VN_RELE(*vpp); 761 if (newvp == NULL) { 762 error = ENOSYS; 763 goto out; 764 } 765 *vpp = newvp; 766 } 767 out: 768 return (error); 769 } 770 771 static int32_t 772 udf_remove(struct vnode *vp, char *nm, struct cred *cr) 773 { 774 int32_t error; 775 struct ud_inode *ip = VTOI(vp); 776 777 ud_printf("udf_remove\n"); 778 779 rw_enter(&ip->i_rwlock, RW_WRITER); 780 error = ud_dirremove(ip, nm, 781 (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr); 782 rw_exit(&ip->i_rwlock); 783 ITIMES(ip); 784 785 return (error); 786 } 787 788 static int32_t 789 udf_link(struct vnode *tdvp, 790 struct vnode *svp, char *tnm, struct cred *cr) 791 { 792 int32_t error; 793 struct vnode *realvp; 794 struct ud_inode *sip; 795 struct ud_inode *tdp; 796 797 ud_printf("udf_link\n"); 798 if (VOP_REALVP(svp, &realvp) == 0) { 799 svp = realvp; 800 } 801 802 /* 803 * Do not allow links to directories 804 */ 805 if (svp->v_type == VDIR) { 806 return (EPERM); 807 } 808 809 sip = VTOI(svp); 810 811 if (sip->i_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0) 812 return (EPERM); 813 814 tdp = VTOI(tdvp); 815 816 rw_enter(&tdp->i_rwlock, RW_WRITER); 817 error = ud_direnter(tdp, tnm, DE_LINK, (struct ud_inode *)0, 818 sip, (struct vattr *)0, (struct ud_inode **)0, cr); 819 rw_exit(&tdp->i_rwlock); 820 ITIMES(sip); 821 ITIMES(tdp); 822 823 return (error); 824 } 825 826 /* ARGSUSED */ 827 static int32_t 828 udf_rename(struct vnode *sdvp, 829 char *snm, struct vnode *tdvp, 830 char *tnm, struct cred *cr) 831 { 832 int32_t error = 0; 833 struct udf_vfs *udf_vfsp; 834 struct ud_inode *sip; /* source inode */ 835 struct ud_inode *sdp, *tdp; /* source and target parent inode */ 836 struct vnode *realvp; 837 838 ud_printf("udf_rename\n"); 839 840 if (VOP_REALVP(tdvp, &realvp) == 0) { 841 tdvp = realvp; 842 } 843 844 sdp = VTOI(sdvp); 845 tdp = VTOI(tdvp); 846 847 udf_vfsp = sdp->i_udf; 848 849 mutex_enter(&udf_vfsp->udf_rename_lck); 850 /* 851 * Look up inode of file we're supposed to rename. 852 */ 853 if (error = ud_dirlook(sdp, snm, &sip, cr, 0)) { 854 mutex_exit(&udf_vfsp->udf_rename_lck); 855 return (error); 856 } 857 /* 858 * be sure this is not a directory with another file system mounted 859 * over it. If it is just give up the locks, and return with 860 * EBUSY 861 */ 862 if (vn_mountedvfs(ITOV(sip)) != NULL) { 863 error = EBUSY; 864 goto errout; 865 } 866 /* 867 * Make sure we can delete the source entry. This requires 868 * write permission on the containing directory. If that 869 * directory is "sticky" it further requires (except for 870 * privileged users) that the user own the directory or the 871 * source entry, or else have permission to write the source 872 * entry. 873 */ 874 rw_enter(&sdp->i_contents, RW_READER); 875 rw_enter(&sip->i_contents, RW_READER); 876 if ((error = ud_iaccess(sdp, IWRITE, cr)) != 0 || 877 (error = ud_sticky_remove_access(sdp, sip, cr)) != 0) { 878 rw_exit(&sip->i_contents); 879 rw_exit(&sdp->i_contents); 880 ITIMES(sip); 881 goto errout; 882 } 883 884 /* 885 * Check for renaming '.' or '..' or alias of '.' 886 */ 887 if ((strcmp(snm, ".") == 0) || 888 (strcmp(snm, "..") == 0) || 889 (sdp == sip)) { 890 error = EINVAL; 891 rw_exit(&sip->i_contents); 892 rw_exit(&sdp->i_contents); 893 goto errout; 894 } 895 rw_exit(&sip->i_contents); 896 rw_exit(&sdp->i_contents); 897 898 899 /* 900 * Link source to the target. 901 */ 902 rw_enter(&tdp->i_rwlock, RW_WRITER); 903 if (error = ud_direnter(tdp, tnm, DE_RENAME, sdp, sip, 904 (struct vattr *)0, (struct ud_inode **)0, cr)) { 905 /* 906 * ESAME isn't really an error; it indicates that the 907 * operation should not be done because the source and target 908 * are the same file, but that no error should be reported. 909 */ 910 if (error == ESAME) { 911 error = 0; 912 } 913 rw_exit(&tdp->i_rwlock); 914 goto errout; 915 } 916 rw_exit(&tdp->i_rwlock); 917 918 rw_enter(&sdp->i_rwlock, RW_WRITER); 919 /* 920 * Unlink the source. 921 * Remove the source entry. ud_dirremove() checks that the entry 922 * still reflects sip, and returns an error if it doesn't. 923 * If the entry has changed just forget about it. Release 924 * the source inode. 925 */ 926 if ((error = ud_dirremove(sdp, snm, sip, (struct vnode *)0, 927 DR_RENAME, cr)) == ENOENT) { 928 error = 0; 929 } 930 rw_exit(&sdp->i_rwlock); 931 errout: 932 ITIMES(sdp); 933 ITIMES(tdp); 934 VN_RELE(ITOV(sip)); 935 mutex_exit(&udf_vfsp->udf_rename_lck); 936 937 return (error); 938 } 939 940 static int32_t 941 udf_mkdir(struct vnode *dvp, 942 char *dirname, struct vattr *vap, 943 struct vnode **vpp, struct cred *cr) 944 { 945 int32_t error; 946 struct ud_inode *ip; 947 struct ud_inode *xip; 948 949 ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); 950 951 ud_printf("udf_mkdir\n"); 952 953 ip = VTOI(dvp); 954 rw_enter(&ip->i_rwlock, RW_WRITER); 955 error = ud_direnter(ip, dirname, DE_MKDIR, 956 (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr); 957 rw_exit(&ip->i_rwlock); 958 ITIMES(ip); 959 if (error == 0) { 960 ip = xip; 961 *vpp = ITOV(ip); 962 ITIMES(ip); 963 } else if (error == EEXIST) { 964 ITIMES(xip); 965 VN_RELE(ITOV(xip)); 966 } 967 968 return (error); 969 } 970 971 static int32_t 972 udf_rmdir(struct vnode *vp, 973 char *nm, struct vnode *cdir, struct cred *cr) 974 { 975 int32_t error; 976 struct ud_inode *ip = VTOI(vp); 977 978 ud_printf("udf_rmdir\n"); 979 980 rw_enter(&ip->i_rwlock, RW_WRITER); 981 error = ud_dirremove(ip, nm, (struct ud_inode *)0, cdir, DR_RMDIR, cr); 982 rw_exit(&ip->i_rwlock); 983 ITIMES(ip); 984 985 return (error); 986 } 987 988 /* ARGSUSED */ 989 static int32_t 990 udf_readdir(struct vnode *vp, 991 struct uio *uiop, struct cred *cr, int32_t *eofp) 992 { 993 struct ud_inode *ip; 994 struct dirent64 *nd; 995 struct udf_vfs *udf_vfsp; 996 int32_t error = 0, len, outcount = 0; 997 uint32_t dirsiz, offset; 998 uint32_t bufsize, ndlen, dummy; 999 caddr_t outbuf; 1000 caddr_t outb, end_outb; 1001 struct iovec *iovp; 1002 1003 uint8_t *dname; 1004 int32_t length; 1005 1006 uint8_t *buf = NULL; 1007 1008 struct fbuf *fbp = NULL; 1009 struct file_id *fid; 1010 uint8_t *name; 1011 1012 1013 ud_printf("udf_readdir\n"); 1014 1015 ip = VTOI(vp); 1016 udf_vfsp = ip->i_udf; 1017 1018 dirsiz = ip->i_size; 1019 if ((uiop->uio_offset >= dirsiz) || 1020 (ip->i_nlink <= 0)) { 1021 if (eofp) { 1022 *eofp = 1; 1023 } 1024 return (0); 1025 } 1026 1027 offset = uiop->uio_offset; 1028 iovp = uiop->uio_iov; 1029 bufsize = iovp->iov_len; 1030 1031 outb = outbuf = (char *)kmem_alloc((uint32_t)bufsize, KM_SLEEP); 1032 end_outb = outb + bufsize; 1033 nd = (struct dirent64 *)outbuf; 1034 1035 dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP); 1036 buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP); 1037 1038 if (offset == 0) { 1039 len = DIRENT64_RECLEN(1); 1040 if (((caddr_t)nd + len) >= end_outb) { 1041 error = EINVAL; 1042 goto end; 1043 } 1044 nd->d_ino = ip->i_icb_lbano; 1045 nd->d_reclen = (uint16_t)len; 1046 nd->d_off = 0x10; 1047 nd->d_name[0] = '.'; 1048 bzero(&nd->d_name[1], DIRENT64_NAMELEN(len) - 1); 1049 nd = (struct dirent64 *)((char *)nd + nd->d_reclen); 1050 outcount++; 1051 } else if (offset == 0x10) { 1052 offset = 0; 1053 } 1054 1055 while (offset < dirsiz) { 1056 error = ud_get_next_fid(ip, &fbp, 1057 offset, &fid, &name, buf); 1058 if (error != 0) { 1059 break; 1060 } 1061 1062 if ((fid->fid_flags & FID_DELETED) == 0) { 1063 if (fid->fid_flags & FID_PARENT) { 1064 1065 len = DIRENT64_RECLEN(2); 1066 if (((caddr_t)nd + len) >= end_outb) { 1067 error = EINVAL; 1068 break; 1069 } 1070 1071 nd->d_ino = ip->i_icb_lbano; 1072 nd->d_reclen = (uint16_t)len; 1073 nd->d_off = offset + FID_LEN(fid); 1074 nd->d_name[0] = '.'; 1075 nd->d_name[1] = '.'; 1076 bzero(&nd->d_name[2], 1077 DIRENT64_NAMELEN(len) - 2); 1078 nd = (struct dirent64 *) 1079 ((char *)nd + nd->d_reclen); 1080 } else { 1081 if ((error = ud_uncompress(fid->fid_idlen, 1082 &length, name, dname)) != 0) { 1083 break; 1084 } 1085 if (length == 0) { 1086 offset += FID_LEN(fid); 1087 continue; 1088 } 1089 len = DIRENT64_RECLEN(length); 1090 if (((caddr_t)nd + len) >= end_outb) { 1091 if (!outcount) { 1092 error = EINVAL; 1093 } 1094 break; 1095 } 1096 (void) strncpy(nd->d_name, 1097 (caddr_t)dname, length); 1098 bzero(&nd->d_name[length], 1099 DIRENT64_NAMELEN(len) - length); 1100 nd->d_ino = ud_xlate_to_daddr(udf_vfsp, 1101 SWAP_16(fid->fid_icb.lad_ext_prn), 1102 SWAP_32(fid->fid_icb.lad_ext_loc), 1, 1103 &dummy); 1104 nd->d_reclen = (uint16_t)len; 1105 nd->d_off = offset + FID_LEN(fid); 1106 nd = (struct dirent64 *) 1107 ((char *)nd + nd->d_reclen); 1108 } 1109 outcount++; 1110 } 1111 1112 offset += FID_LEN(fid); 1113 } 1114 1115 end: 1116 if (fbp != NULL) { 1117 fbrelse(fbp, S_OTHER); 1118 } 1119 ndlen = ((char *)nd - outbuf); 1120 /* 1121 * In case of error do not call uiomove. 1122 * Return the error to the caller. 1123 */ 1124 if ((error == 0) && (ndlen != 0)) { 1125 error = uiomove(outbuf, (long)ndlen, UIO_READ, uiop); 1126 uiop->uio_offset = offset; 1127 } 1128 kmem_free((caddr_t)buf, udf_vfsp->udf_lbsize); 1129 kmem_free((caddr_t)dname, 1024); 1130 kmem_free(outbuf, (uint32_t)bufsize); 1131 if (eofp && error == 0) { 1132 *eofp = (uiop->uio_offset >= dirsiz); 1133 } 1134 return (error); 1135 } 1136 1137 /* ARGSUSED */ 1138 static int32_t 1139 udf_symlink(struct vnode *dvp, 1140 char *linkname, struct vattr *vap, 1141 char *target, struct cred *cr) 1142 { 1143 int32_t error = 0, outlen; 1144 uint32_t ioflag = 0; 1145 struct ud_inode *ip, *dip = VTOI(dvp); 1146 1147 struct path_comp *pc; 1148 int8_t *dname = NULL, *uname = NULL, *sp; 1149 1150 ud_printf("udf_symlink\n"); 1151 1152 ip = (struct ud_inode *)0; 1153 vap->va_type = VLNK; 1154 vap->va_rdev = 0; 1155 1156 rw_enter(&dip->i_rwlock, RW_WRITER); 1157 error = ud_direnter(dip, linkname, DE_CREATE, 1158 (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr); 1159 rw_exit(&dip->i_rwlock); 1160 if (error == 0) { 1161 dname = kmem_zalloc(1024, KM_SLEEP); 1162 uname = kmem_zalloc(PAGESIZE, KM_SLEEP); 1163 1164 pc = (struct path_comp *)uname; 1165 /* 1166 * If the first character in target is "/" 1167 * then skip it and create entry for it 1168 */ 1169 if (*target == '/') { 1170 pc->pc_type = 2; 1171 pc->pc_len = 0; 1172 pc = (struct path_comp *)(((char *)pc) + 4); 1173 while (*target == '/') { 1174 target++; 1175 } 1176 } 1177 1178 while (*target != NULL) { 1179 sp = target; 1180 while ((*target != '/') && (*target != '\0')) { 1181 target ++; 1182 } 1183 /* 1184 * We got the next component of the 1185 * path name. Create path_comp of 1186 * appropriate type 1187 */ 1188 if (((target - sp) == 1) && (*sp == '.')) { 1189 /* 1190 * Dot entry. 1191 */ 1192 pc->pc_type = 4; 1193 pc = (struct path_comp *)(((char *)pc) + 4); 1194 } else if (((target - sp) == 2) && 1195 (*sp == '.') && ((*(sp + 1)) == '.')) { 1196 /* 1197 * DotDot entry. 1198 */ 1199 pc->pc_type = 3; 1200 pc = (struct path_comp *)(((char *)pc) + 4); 1201 } else { 1202 /* 1203 * convert the user given name 1204 * into appropriate form to be put 1205 * on the media 1206 */ 1207 outlen = 1024; /* set to size of dname */ 1208 if (error = ud_compress(target - sp, &outlen, 1209 (uint8_t *)sp, (uint8_t *)dname)) { 1210 break; 1211 } 1212 pc->pc_type = 5; 1213 /* LINTED */ 1214 pc->pc_len = outlen; 1215 dname[outlen] = '\0'; 1216 (void) strcpy((char *)pc->pc_id, dname); 1217 pc = (struct path_comp *) 1218 (((char *)pc) + 4 + outlen); 1219 } 1220 while (*target == '/') { 1221 target++; 1222 } 1223 if (*target == NULL) { 1224 break; 1225 } 1226 } 1227 1228 rw_enter(&ip->i_contents, RW_WRITER); 1229 if (error == 0) { 1230 ioflag = FWRITE; 1231 if (curthread->t_flag & T_DONTPEND) { 1232 ioflag |= FDSYNC; 1233 } 1234 error = ud_rdwri(UIO_WRITE, ioflag, ip, 1235 uname, ((int8_t *)pc) - uname, 1236 (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr); 1237 } 1238 if (error) { 1239 ud_idrop(ip); 1240 rw_exit(&ip->i_contents); 1241 rw_enter(&dip->i_rwlock, RW_WRITER); 1242 (void) ud_dirremove(dip, linkname, (struct ud_inode *)0, 1243 (struct vnode *)0, DR_REMOVE, cr); 1244 rw_exit(&dip->i_rwlock); 1245 goto update_inode; 1246 } 1247 rw_exit(&ip->i_contents); 1248 } 1249 1250 if ((error == 0) || (error == EEXIST)) { 1251 VN_RELE(ITOV(ip)); 1252 } 1253 1254 update_inode: 1255 ITIMES(VTOI(dvp)); 1256 if (uname != NULL) { 1257 kmem_free(uname, PAGESIZE); 1258 } 1259 if (dname != NULL) { 1260 kmem_free(dname, 1024); 1261 } 1262 1263 return (error); 1264 } 1265 1266 /* ARGSUSED */ 1267 static int32_t 1268 udf_readlink(struct vnode *vp, 1269 struct uio *uiop, struct cred *cr) 1270 { 1271 int32_t error = 0, off, id_len, size, len; 1272 int8_t *dname = NULL, *uname = NULL; 1273 struct ud_inode *ip; 1274 struct fbuf *fbp = NULL; 1275 struct path_comp *pc; 1276 1277 ud_printf("udf_readlink\n"); 1278 1279 if (vp->v_type != VLNK) { 1280 return (EINVAL); 1281 } 1282 1283 ip = VTOI(vp); 1284 size = ip->i_size; 1285 if (size > PAGESIZE) { 1286 return (EIO); 1287 } 1288 1289 if (size == 0) { 1290 return (0); 1291 } 1292 1293 dname = kmem_zalloc(1024, KM_SLEEP); 1294 uname = kmem_zalloc(PAGESIZE, KM_SLEEP); 1295 1296 rw_enter(&ip->i_contents, RW_READER); 1297 1298 if ((error = fbread(vp, 0, size, S_READ, &fbp)) != 0) { 1299 goto end; 1300 } 1301 1302 off = 0; 1303 1304 while (off < size) { 1305 pc = (struct path_comp *)(fbp->fb_addr + off); 1306 switch (pc->pc_type) { 1307 case 1 : 1308 (void) strcpy(uname, ip->i_udf->udf_fsmnt); 1309 (void) strcat(uname, "/"); 1310 break; 1311 case 2 : 1312 if (pc->pc_len != 0) { 1313 goto end; 1314 } 1315 uname[0] = '/'; 1316 uname[1] = '\0'; 1317 break; 1318 case 3 : 1319 (void) strcat(uname, "../"); 1320 break; 1321 case 4 : 1322 (void) strcat(uname, "./"); 1323 break; 1324 case 5 : 1325 if ((error = ud_uncompress(pc->pc_len, &id_len, 1326 pc->pc_id, (uint8_t *)dname)) != 0) { 1327 break; 1328 } 1329 dname[id_len] = '\0'; 1330 (void) strcat(uname, dname); 1331 (void) strcat(uname, "/"); 1332 break; 1333 default : 1334 error = EINVAL; 1335 goto end; 1336 } 1337 off += 4 + pc->pc_len; 1338 } 1339 len = strlen(uname) - 1; 1340 if (uname[len] == '/') { 1341 if (len == 0) { 1342 /* 1343 * special case link to / 1344 */ 1345 len = 1; 1346 } else { 1347 uname[len] = '\0'; 1348 } 1349 } 1350 1351 error = uiomove(uname, len, UIO_READ, uiop); 1352 1353 ITIMES(ip); 1354 1355 end: 1356 if (fbp != NULL) { 1357 fbrelse(fbp, S_OTHER); 1358 } 1359 rw_exit(&ip->i_contents); 1360 if (uname != NULL) { 1361 kmem_free(uname, PAGESIZE); 1362 } 1363 if (dname != NULL) { 1364 kmem_free(dname, 1024); 1365 } 1366 return (error); 1367 } 1368 1369 /* ARGSUSED */ 1370 static int32_t 1371 udf_fsync(struct vnode *vp, 1372 int32_t syncflag, struct cred *cr) 1373 { 1374 int32_t error = 0; 1375 struct ud_inode *ip = VTOI(vp); 1376 1377 ud_printf("udf_fsync\n"); 1378 1379 rw_enter(&ip->i_contents, RW_WRITER); 1380 if (!(IS_SWAPVP(vp))) { 1381 error = ud_syncip(ip, 0, I_SYNC); /* Do synchronous writes */ 1382 } 1383 if (error == 0) { 1384 error = ud_sync_indir(ip); 1385 } 1386 ITIMES(ip); /* XXX: is this necessary ??? */ 1387 rw_exit(&ip->i_contents); 1388 1389 return (error); 1390 } 1391 1392 /* ARGSUSED */ 1393 static void 1394 udf_inactive(struct vnode *vp, struct cred *cr) 1395 { 1396 ud_printf("udf_iinactive\n"); 1397 1398 ud_iinactive(VTOI(vp), cr); 1399 } 1400 1401 static int32_t 1402 udf_fid(struct vnode *vp, struct fid *fidp) 1403 { 1404 struct udf_fid *udfidp; 1405 struct ud_inode *ip = VTOI(vp); 1406 1407 ud_printf("udf_fid\n"); 1408 1409 if (fidp->fid_len < (sizeof (struct udf_fid) - sizeof (uint16_t))) { 1410 fidp->fid_len = sizeof (struct udf_fid) - sizeof (uint16_t); 1411 return (ENOSPC); 1412 } 1413 1414 udfidp = (struct udf_fid *)fidp; 1415 bzero((char *)udfidp, sizeof (struct udf_fid)); 1416 rw_enter(&ip->i_contents, RW_READER); 1417 udfidp->udfid_len = sizeof (struct udf_fid) - sizeof (uint16_t); 1418 udfidp->udfid_uinq_lo = ip->i_uniqid & 0xffffffff; 1419 udfidp->udfid_prn = ip->i_icb_prn; 1420 udfidp->udfid_icb_lbn = ip->i_icb_block; 1421 rw_exit(&ip->i_contents); 1422 1423 return (0); 1424 } 1425 1426 /* ARGSUSED2 */ 1427 static int 1428 udf_rwlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp) 1429 { 1430 struct ud_inode *ip = VTOI(vp); 1431 1432 ud_printf("udf_rwlock\n"); 1433 1434 if (write_lock) { 1435 rw_enter(&ip->i_rwlock, RW_WRITER); 1436 } else { 1437 rw_enter(&ip->i_rwlock, RW_READER); 1438 } 1439 #ifdef __lock_lint 1440 rw_exit(&ip->i_rwlock); 1441 #endif 1442 return (write_lock); 1443 } 1444 1445 /* ARGSUSED */ 1446 static void 1447 udf_rwunlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp) 1448 { 1449 struct ud_inode *ip = VTOI(vp); 1450 1451 ud_printf("udf_rwunlock\n"); 1452 1453 #ifdef __lock_lint 1454 rw_enter(&ip->i_rwlock, RW_WRITER); 1455 #endif 1456 1457 rw_exit(&ip->i_rwlock); 1458 1459 } 1460 1461 /* ARGSUSED */ 1462 static int32_t 1463 udf_seek(struct vnode *vp, offset_t ooff, offset_t *noffp) 1464 { 1465 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 1466 } 1467 1468 static int32_t 1469 udf_frlock(struct vnode *vp, int32_t cmd, struct flock64 *bfp, 1470 int32_t flag, offset_t offset, struct flk_callback *flk_cbp, 1471 cred_t *cr) 1472 { 1473 struct ud_inode *ip = VTOI(vp); 1474 1475 ud_printf("udf_frlock\n"); 1476 1477 /* 1478 * If file is being mapped, disallow frlock. 1479 * XXX I am not holding tlock while checking i_mapcnt because the 1480 * current locking strategy drops all locks before calling fs_frlock. 1481 * So, mapcnt could change before we enter fs_frlock making is 1482 * meaningless to have held tlock in the first place. 1483 */ 1484 if ((ip->i_mapcnt > 0) && 1485 (MANDLOCK(vp, ip->i_char))) { 1486 return (EAGAIN); 1487 } 1488 1489 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr)); 1490 } 1491 1492 /*ARGSUSED6*/ 1493 static int32_t 1494 udf_space( 1495 struct vnode *vp, 1496 int32_t cmd, 1497 struct flock64 *bfp, 1498 int32_t flag, 1499 offset_t offset, 1500 cred_t *cr, 1501 caller_context_t *ct) 1502 { 1503 int32_t error = 0; 1504 1505 ud_printf("udf_space\n"); 1506 1507 if (cmd != F_FREESP) { 1508 error = EINVAL; 1509 } else if ((error = convoff(vp, bfp, 0, offset)) == 0) { 1510 error = ud_freesp(vp, bfp, flag, cr); 1511 } 1512 1513 return (error); 1514 } 1515 1516 /* ARGSUSED */ 1517 static int32_t 1518 udf_getpage(struct vnode *vp, offset_t off, 1519 size_t len, uint32_t *protp, struct page **plarr, 1520 size_t plsz, struct seg *seg, caddr_t addr, 1521 enum seg_rw rw, struct cred *cr) 1522 { 1523 struct ud_inode *ip = VTOI(vp); 1524 int32_t error, has_holes, beyond_eof, seqmode, dolock; 1525 int32_t pgsize = PAGESIZE; 1526 struct udf_vfs *udf_vfsp = ip->i_udf; 1527 page_t **pl; 1528 u_offset_t pgoff, eoff, uoff; 1529 krw_t rwtype; 1530 caddr_t pgaddr; 1531 1532 ud_printf("udf_getpage\n"); 1533 1534 uoff = (u_offset_t)off; /* type conversion */ 1535 if (protp) { 1536 *protp = PROT_ALL; 1537 } 1538 if (vp->v_flag & VNOMAP) { 1539 return (ENOSYS); 1540 } 1541 seqmode = ip->i_nextr == uoff && rw != S_CREATE; 1542 1543 rwtype = RW_READER; 1544 dolock = (rw_owner(&ip->i_contents) != curthread); 1545 retrylock: 1546 #ifdef __lock_lint 1547 rw_enter(&ip->i_contents, rwtype); 1548 #else 1549 if (dolock) { 1550 rw_enter(&ip->i_contents, rwtype); 1551 } 1552 #endif 1553 1554 /* 1555 * We may be getting called as a side effect of a bmap using 1556 * fbread() when the blocks might be being allocated and the 1557 * size has not yet been up'ed. In this case we want to be 1558 * able to return zero pages if we get back UDF_HOLE from 1559 * calling bmap for a non write case here. We also might have 1560 * to read some frags from the disk into a page if we are 1561 * extending the number of frags for a given lbn in bmap(). 1562 */ 1563 beyond_eof = uoff + len > ip->i_size + PAGEOFFSET; 1564 if (beyond_eof && seg != segkmap) { 1565 #ifdef __lock_lint 1566 rw_exit(&ip->i_contents); 1567 #else 1568 if (dolock) { 1569 rw_exit(&ip->i_contents); 1570 } 1571 #endif 1572 return (EFAULT); 1573 } 1574 1575 /* 1576 * Must hold i_contents lock throughout the call to pvn_getpages 1577 * since locked pages are returned from each call to ud_getapage. 1578 * Must *not* return locked pages and then try for contents lock 1579 * due to lock ordering requirements (inode > page) 1580 */ 1581 1582 has_holes = ud_bmap_has_holes(ip); 1583 1584 if ((rw == S_WRITE || rw == S_CREATE) && (has_holes || beyond_eof)) { 1585 int32_t blk_size, count; 1586 u_offset_t offset; 1587 1588 /* 1589 * We must acquire the RW_WRITER lock in order to 1590 * call bmap_write(). 1591 */ 1592 if (dolock && rwtype == RW_READER) { 1593 rwtype = RW_WRITER; 1594 1595 if (!rw_tryupgrade(&ip->i_contents)) { 1596 1597 rw_exit(&ip->i_contents); 1598 1599 goto retrylock; 1600 } 1601 } 1602 1603 /* 1604 * May be allocating disk blocks for holes here as 1605 * a result of mmap faults. write(2) does the bmap_write 1606 * in rdip/wrip, not here. We are not dealing with frags 1607 * in this case. 1608 */ 1609 offset = uoff; 1610 while ((offset < uoff + len) && 1611 (offset < ip->i_size)) { 1612 /* 1613 * the variable "bnp" is to simplify the expression for 1614 * the compiler; * just passing in &bn to bmap_write 1615 * causes a compiler "loop" 1616 */ 1617 1618 blk_size = udf_vfsp->udf_lbsize; 1619 if ((offset + blk_size) > ip->i_size) { 1620 count = ip->i_size - offset; 1621 } else { 1622 count = blk_size; 1623 } 1624 error = ud_bmap_write(ip, offset, count, 0, cr); 1625 if (error) { 1626 goto update_inode; 1627 } 1628 offset += count; /* XXX - make this contig */ 1629 } 1630 } 1631 1632 /* 1633 * Can be a reader from now on. 1634 */ 1635 #ifdef __lock_lint 1636 if (rwtype == RW_WRITER) { 1637 rw_downgrade(&ip->i_contents); 1638 } 1639 #else 1640 if (dolock && rwtype == RW_WRITER) { 1641 rw_downgrade(&ip->i_contents); 1642 } 1643 #endif 1644 1645 /* 1646 * We remove PROT_WRITE in cases when the file has UDF holes 1647 * because we don't want to call bmap_read() to check each 1648 * page if it is backed with a disk block. 1649 */ 1650 if (protp && has_holes && rw != S_WRITE && rw != S_CREATE) { 1651 *protp &= ~PROT_WRITE; 1652 } 1653 1654 error = 0; 1655 1656 /* 1657 * The loop looks up pages in the range <off, off + len). 1658 * For each page, we first check if we should initiate an asynchronous 1659 * read ahead before we call page_lookup (we may sleep in page_lookup 1660 * for a previously initiated disk read). 1661 */ 1662 eoff = (uoff + len); 1663 for (pgoff = uoff, pgaddr = addr, pl = plarr; 1664 pgoff < eoff; /* empty */) { 1665 page_t *pp; 1666 u_offset_t nextrio; 1667 se_t se; 1668 1669 se = ((rw == S_CREATE) ? SE_EXCL : SE_SHARED); 1670 1671 /* 1672 * Handle async getpage (faultahead) 1673 */ 1674 if (plarr == NULL) { 1675 ip->i_nextrio = pgoff; 1676 ud_getpage_ra(vp, pgoff, seg, pgaddr); 1677 pgoff += pgsize; 1678 pgaddr += pgsize; 1679 continue; 1680 } 1681 1682 /* 1683 * Check if we should initiate read ahead of next cluster. 1684 * We call page_exists only when we need to confirm that 1685 * we have the current page before we initiate the read ahead. 1686 */ 1687 nextrio = ip->i_nextrio; 1688 if (seqmode && 1689 pgoff + RD_CLUSTSZ(ip) >= nextrio && pgoff <= nextrio && 1690 nextrio < ip->i_size && page_exists(vp, pgoff)) 1691 ud_getpage_ra(vp, pgoff, seg, pgaddr); 1692 1693 if ((pp = page_lookup(vp, pgoff, se)) != NULL) { 1694 1695 /* 1696 * We found the page in the page cache. 1697 */ 1698 *pl++ = pp; 1699 pgoff += pgsize; 1700 pgaddr += pgsize; 1701 len -= pgsize; 1702 plsz -= pgsize; 1703 } else { 1704 1705 /* 1706 * We have to create the page, or read it from disk. 1707 */ 1708 if (error = ud_getpage_miss(vp, pgoff, len, 1709 seg, pgaddr, pl, plsz, rw, seqmode)) { 1710 goto error_out; 1711 } 1712 1713 while (*pl != NULL) { 1714 pl++; 1715 pgoff += pgsize; 1716 pgaddr += pgsize; 1717 len -= pgsize; 1718 plsz -= pgsize; 1719 } 1720 } 1721 } 1722 1723 /* 1724 * Return pages up to plsz if they are in the page cache. 1725 * We cannot return pages if there is a chance that they are 1726 * backed with a UDF hole and rw is S_WRITE or S_CREATE. 1727 */ 1728 if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) { 1729 1730 ASSERT((protp == NULL) || 1731 !(has_holes && (*protp & PROT_WRITE))); 1732 1733 eoff = pgoff + plsz; 1734 while (pgoff < eoff) { 1735 page_t *pp; 1736 1737 if ((pp = page_lookup_nowait(vp, pgoff, 1738 SE_SHARED)) == NULL) 1739 break; 1740 1741 *pl++ = pp; 1742 pgoff += pgsize; 1743 plsz -= pgsize; 1744 } 1745 } 1746 1747 if (plarr) 1748 *pl = NULL; /* Terminate page list */ 1749 ip->i_nextr = pgoff; 1750 1751 error_out: 1752 if (error && plarr) { 1753 /* 1754 * Release any pages we have locked. 1755 */ 1756 while (pl > &plarr[0]) 1757 page_unlock(*--pl); 1758 1759 plarr[0] = NULL; 1760 } 1761 1762 update_inode: 1763 #ifdef __lock_lint 1764 rw_exit(&ip->i_contents); 1765 #else 1766 if (dolock) { 1767 rw_exit(&ip->i_contents); 1768 } 1769 #endif 1770 1771 /* 1772 * If the inode is not already marked for IACC (in rwip() for read) 1773 * and the inode is not marked for no access time update (in rwip() 1774 * for write) then update the inode access time and mod time now. 1775 */ 1776 mutex_enter(&ip->i_tlock); 1777 if ((ip->i_flag & (IACC | INOACC)) == 0) { 1778 if ((rw != S_OTHER) && (ip->i_type != VDIR)) { 1779 ip->i_flag |= IACC; 1780 } 1781 if (rw == S_WRITE) { 1782 ip->i_flag |= IUPD; 1783 } 1784 ITIMES_NOLOCK(ip); 1785 } 1786 mutex_exit(&ip->i_tlock); 1787 1788 return (error); 1789 } 1790 1791 int32_t ud_delay = 1; 1792 1793 /* ARGSUSED */ 1794 static int32_t 1795 udf_putpage(struct vnode *vp, offset_t off, 1796 size_t len, int32_t flags, struct cred *cr) 1797 { 1798 struct ud_inode *ip; 1799 int32_t error = 0; 1800 1801 ud_printf("udf_putpage\n"); 1802 1803 ip = VTOI(vp); 1804 #ifdef __lock_lint 1805 rw_enter(&ip->i_contents, RW_WRITER); 1806 #endif 1807 1808 if (vp->v_count == 0) { 1809 cmn_err(CE_WARN, "ud_putpage : bad v_count"); 1810 error = EINVAL; 1811 goto out; 1812 } 1813 1814 if (vp->v_flag & VNOMAP) { 1815 error = ENOSYS; 1816 goto out; 1817 } 1818 1819 if (flags & B_ASYNC) { 1820 if (ud_delay && len && 1821 (flags & ~(B_ASYNC|B_DONTNEED|B_FREE)) == 0) { 1822 mutex_enter(&ip->i_tlock); 1823 1824 /* 1825 * If nobody stalled, start a new cluster. 1826 */ 1827 if (ip->i_delaylen == 0) { 1828 ip->i_delayoff = off; 1829 ip->i_delaylen = len; 1830 mutex_exit(&ip->i_tlock); 1831 goto out; 1832 } 1833 1834 /* 1835 * If we have a full cluster or they are not contig, 1836 * then push last cluster and start over. 1837 */ 1838 if (ip->i_delaylen >= WR_CLUSTSZ(ip) || 1839 ip->i_delayoff + ip->i_delaylen != off) { 1840 u_offset_t doff; 1841 size_t dlen; 1842 1843 doff = ip->i_delayoff; 1844 dlen = ip->i_delaylen; 1845 ip->i_delayoff = off; 1846 ip->i_delaylen = len; 1847 mutex_exit(&ip->i_tlock); 1848 error = ud_putpages(vp, doff, dlen, flags, cr); 1849 /* LMXXX - flags are new val, not old */ 1850 goto out; 1851 } 1852 1853 /* 1854 * There is something there, it's not full, and 1855 * it is contig. 1856 */ 1857 ip->i_delaylen += len; 1858 mutex_exit(&ip->i_tlock); 1859 goto out; 1860 } 1861 1862 /* 1863 * Must have weird flags or we are not clustering. 1864 */ 1865 } 1866 1867 error = ud_putpages(vp, off, len, flags, cr); 1868 1869 out: 1870 #ifdef __lock_lint 1871 rw_exit(&ip->i_contents); 1872 #endif 1873 return (error); 1874 } 1875 1876 static int32_t 1877 udf_map(struct vnode *vp, offset_t off, 1878 struct as *as, caddr_t *addrp, size_t len, 1879 uint8_t prot, uint8_t maxprot, uint32_t flags, 1880 struct cred *cr) 1881 { 1882 struct segvn_crargs vn_a; 1883 int32_t error = 0; 1884 1885 ud_printf("udf_map\n"); 1886 1887 if (vp->v_flag & VNOMAP) { 1888 error = ENOSYS; 1889 goto end; 1890 } 1891 1892 if ((off < (offset_t)0) || 1893 ((off + len) < (offset_t)0)) { 1894 error = EINVAL; 1895 goto end; 1896 } 1897 1898 if (vp->v_type != VREG) { 1899 error = ENODEV; 1900 goto end; 1901 } 1902 1903 /* 1904 * If file is being locked, disallow mapping. 1905 */ 1906 if (vn_has_mandatory_locks(vp, VTOI(vp)->i_char)) { 1907 error = EAGAIN; 1908 goto end; 1909 } 1910 1911 as_rangelock(as); 1912 if ((flags & MAP_FIXED) == 0) { 1913 map_addr(addrp, len, off, 1, flags); 1914 if (*addrp == NULL) { 1915 as_rangeunlock(as); 1916 error = ENOMEM; 1917 goto end; 1918 } 1919 } else { 1920 /* 1921 * User specified address - blow away any previous mappings 1922 */ 1923 (void) as_unmap(as, *addrp, len); 1924 } 1925 1926 vn_a.vp = vp; 1927 vn_a.offset = off; 1928 vn_a.type = flags & MAP_TYPE; 1929 vn_a.prot = prot; 1930 vn_a.maxprot = maxprot; 1931 vn_a.cred = cr; 1932 vn_a.amp = NULL; 1933 vn_a.flags = flags & ~MAP_TYPE; 1934 vn_a.szc = 0; 1935 vn_a.lgrp_mem_policy_flags = 0; 1936 1937 error = as_map(as, *addrp, len, segvn_create, (caddr_t)&vn_a); 1938 as_rangeunlock(as); 1939 1940 end: 1941 return (error); 1942 } 1943 1944 /* ARGSUSED */ 1945 static int32_t 1946 udf_addmap(struct vnode *vp, offset_t off, 1947 struct as *as, caddr_t addr, size_t len, 1948 uint8_t prot, uint8_t maxprot, uint32_t flags, 1949 struct cred *cr) 1950 { 1951 struct ud_inode *ip = VTOI(vp); 1952 1953 ud_printf("udf_addmap\n"); 1954 1955 if (vp->v_flag & VNOMAP) { 1956 return (ENOSYS); 1957 } 1958 1959 mutex_enter(&ip->i_tlock); 1960 ip->i_mapcnt += btopr(len); 1961 mutex_exit(&ip->i_tlock); 1962 1963 return (0); 1964 } 1965 1966 /* ARGSUSED */ 1967 static int32_t 1968 udf_delmap(struct vnode *vp, offset_t off, 1969 struct as *as, caddr_t addr, size_t len, 1970 uint32_t prot, uint32_t maxprot, uint32_t flags, 1971 struct cred *cr) 1972 { 1973 struct ud_inode *ip = VTOI(vp); 1974 1975 ud_printf("udf_delmap\n"); 1976 1977 if (vp->v_flag & VNOMAP) { 1978 return (ENOSYS); 1979 } 1980 1981 mutex_enter(&ip->i_tlock); 1982 ip->i_mapcnt -= btopr(len); /* Count released mappings */ 1983 ASSERT(ip->i_mapcnt >= 0); 1984 mutex_exit(&ip->i_tlock); 1985 1986 return (0); 1987 } 1988 1989 static int32_t 1990 udf_l_pathconf(struct vnode *vp, int32_t cmd, 1991 ulong_t *valp, struct cred *cr) 1992 { 1993 int32_t error = 0; 1994 1995 ud_printf("udf_l_pathconf\n"); 1996 1997 if (cmd == _PC_FILESIZEBITS) { 1998 /* 1999 * udf supports 64 bits as file size 2000 * but there are several other restrictions 2001 * it only supports 32-bit block numbers and 2002 * daddr32_t is only and int32_t so taking these 2003 * into account we can stay just as where ufs is 2004 */ 2005 *valp = 41; 2006 } else { 2007 error = fs_pathconf(vp, cmd, valp, cr); 2008 } 2009 2010 return (error); 2011 } 2012 2013 uint32_t ud_pageio_reads = 0, ud_pageio_writes = 0; 2014 #ifndef __lint 2015 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_reads)) 2016 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_writes)) 2017 #endif 2018 /* 2019 * Assumption is that there will not be a pageio request 2020 * to a enbedded file 2021 */ 2022 /* ARGSUSED */ 2023 static int32_t 2024 udf_pageio(struct vnode *vp, struct page *pp, 2025 u_offset_t io_off, size_t io_len, 2026 int32_t flags, struct cred *cr) 2027 { 2028 daddr_t bn; 2029 struct buf *bp; 2030 struct ud_inode *ip = VTOI(vp); 2031 int32_t dolock, error = 0, contig, multi_io; 2032 size_t done_len = 0, cur_len = 0; 2033 page_t *npp = NULL, *opp = NULL, *cpp = pp; 2034 2035 if (pp == NULL) { 2036 return (EINVAL); 2037 } 2038 2039 dolock = (rw_owner(&ip->i_contents) != curthread); 2040 2041 /* 2042 * We need a better check. Ideally, we would use another 2043 * vnodeops so that hlocked and forcibly unmounted file 2044 * systems would return EIO where appropriate and w/o the 2045 * need for these checks. 2046 */ 2047 if (ip->i_udf == NULL) { 2048 return (EIO); 2049 } 2050 2051 #ifdef __lock_lint 2052 rw_enter(&ip->i_contents, RW_READER); 2053 #else 2054 if (dolock) { 2055 rw_enter(&ip->i_contents, RW_READER); 2056 } 2057 #endif 2058 2059 /* 2060 * Break the io request into chunks, one for each contiguous 2061 * stretch of disk blocks in the target file. 2062 */ 2063 while (done_len < io_len) { 2064 ASSERT(cpp); 2065 bp = NULL; 2066 contig = 0; 2067 if (error = ud_bmap_read(ip, (u_offset_t)(io_off + done_len), 2068 &bn, &contig)) { 2069 break; 2070 } 2071 2072 if (bn == UDF_HOLE) { /* No holey swapfiles */ 2073 cmn_err(CE_WARN, "SWAP file has HOLES"); 2074 error = EINVAL; 2075 break; 2076 } 2077 2078 cur_len = MIN(io_len - done_len, contig); 2079 2080 /* 2081 * Check if more than one I/O is 2082 * required to complete the given 2083 * I/O operation 2084 */ 2085 if (ip->i_udf->udf_lbsize < PAGESIZE) { 2086 if (cur_len >= PAGESIZE) { 2087 multi_io = 0; 2088 cur_len &= PAGEMASK; 2089 } else { 2090 multi_io = 1; 2091 cur_len = MIN(io_len - done_len, PAGESIZE); 2092 } 2093 } 2094 page_list_break(&cpp, &npp, btop(cur_len)); 2095 2096 bp = pageio_setup(cpp, cur_len, ip->i_devvp, flags); 2097 ASSERT(bp != NULL); 2098 2099 bp->b_edev = ip->i_dev; 2100 bp->b_dev = cmpdev(ip->i_dev); 2101 bp->b_blkno = bn; 2102 bp->b_un.b_addr = (caddr_t)0; 2103 bp->b_file = vp; 2104 bp->b_offset = (offset_t)(io_off + done_len); 2105 2106 /* 2107 * ub.ub_pageios.value.ul++; 2108 */ 2109 if (multi_io == 0) { 2110 (void) bdev_strategy(bp); 2111 } else { 2112 error = ud_multi_strat(ip, cpp, bp, 2113 (u_offset_t)(io_off + done_len)); 2114 if (error != 0) { 2115 pageio_done(bp); 2116 break; 2117 } 2118 } 2119 if (flags & B_READ) { 2120 ud_pageio_reads++; 2121 } else { 2122 ud_pageio_writes++; 2123 } 2124 2125 /* 2126 * If the request is not B_ASYNC, wait for i/o to complete 2127 * and re-assemble the page list to return to the caller. 2128 * If it is B_ASYNC we leave the page list in pieces and 2129 * cleanup() will dispose of them. 2130 */ 2131 if ((flags & B_ASYNC) == 0) { 2132 error = biowait(bp); 2133 pageio_done(bp); 2134 if (error) { 2135 break; 2136 } 2137 page_list_concat(&opp, &cpp); 2138 } 2139 cpp = npp; 2140 npp = NULL; 2141 done_len += cur_len; 2142 } 2143 2144 ASSERT(error || (cpp == NULL && npp == NULL && done_len == io_len)); 2145 if (error) { 2146 if (flags & B_ASYNC) { 2147 /* Cleanup unprocessed parts of list */ 2148 page_list_concat(&cpp, &npp); 2149 if (flags & B_READ) { 2150 pvn_read_done(cpp, B_ERROR); 2151 } else { 2152 pvn_write_done(cpp, B_ERROR); 2153 } 2154 } else { 2155 /* Re-assemble list and let caller clean up */ 2156 page_list_concat(&opp, &cpp); 2157 page_list_concat(&opp, &npp); 2158 } 2159 } 2160 2161 #ifdef __lock_lint 2162 rw_exit(&ip->i_contents); 2163 #else 2164 if (dolock) { 2165 rw_exit(&ip->i_contents); 2166 } 2167 #endif 2168 return (error); 2169 } 2170 2171 2172 2173 2174 /* -------------------- local functions --------------------------- */ 2175 2176 2177 2178 int32_t 2179 ud_rdwri(enum uio_rw rw, int32_t ioflag, 2180 struct ud_inode *ip, caddr_t base, int32_t len, 2181 offset_t offset, enum uio_seg seg, int32_t *aresid, struct cred *cr) 2182 { 2183 int32_t error; 2184 struct uio auio; 2185 struct iovec aiov; 2186 2187 ud_printf("ud_rdwri\n"); 2188 2189 bzero((caddr_t)&auio, sizeof (uio_t)); 2190 bzero((caddr_t)&aiov, sizeof (iovec_t)); 2191 2192 aiov.iov_base = base; 2193 aiov.iov_len = len; 2194 auio.uio_iov = &aiov; 2195 auio.uio_iovcnt = 1; 2196 auio.uio_loffset = offset; 2197 auio.uio_segflg = (int16_t)seg; 2198 auio.uio_resid = len; 2199 2200 if (rw == UIO_WRITE) { 2201 auio.uio_fmode = FWRITE; 2202 auio.uio_extflg = UIO_COPY_DEFAULT; 2203 auio.uio_llimit = curproc->p_fsz_ctl; 2204 error = ud_wrip(ip, &auio, ioflag, cr); 2205 } else { 2206 auio.uio_fmode = FREAD; 2207 auio.uio_extflg = UIO_COPY_CACHED; 2208 auio.uio_llimit = MAXOFFSET_T; 2209 error = ud_rdip(ip, &auio, ioflag, cr); 2210 } 2211 2212 if (aresid) { 2213 *aresid = auio.uio_resid; 2214 } else if (auio.uio_resid) { 2215 error = EIO; 2216 } 2217 return (error); 2218 } 2219 2220 /* 2221 * Free behind hacks. The pager is busted. 2222 * XXX - need to pass the information down to writedone() in a flag like B_SEQ 2223 * or B_FREE_IF_TIGHT_ON_MEMORY. 2224 */ 2225 int32_t ud_freebehind = 1; 2226 int32_t ud_smallfile = 32 * 1024; 2227 2228 /* ARGSUSED */ 2229 int32_t 2230 ud_getpage_miss(struct vnode *vp, u_offset_t off, 2231 size_t len, struct seg *seg, caddr_t addr, page_t *pl[], 2232 size_t plsz, enum seg_rw rw, int32_t seq) 2233 { 2234 struct ud_inode *ip = VTOI(vp); 2235 int32_t err = 0; 2236 size_t io_len; 2237 u_offset_t io_off; 2238 u_offset_t pgoff; 2239 page_t *pp; 2240 2241 pl[0] = NULL; 2242 2243 /* 2244 * Figure out whether the page can be created, or must be 2245 * read from the disk 2246 */ 2247 if (rw == S_CREATE) { 2248 if ((pp = page_create_va(vp, off, 2249 PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 2250 cmn_err(CE_WARN, "ud_getpage_miss: page_create"); 2251 return (EINVAL); 2252 } 2253 io_len = PAGESIZE; 2254 } else { 2255 pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 2256 &io_len, off, PAGESIZE, 0); 2257 2258 /* 2259 * Some other thread has entered the page. 2260 * ud_getpage will retry page_lookup. 2261 */ 2262 if (pp == NULL) { 2263 return (0); 2264 } 2265 2266 /* 2267 * Fill the page with as much data as we can from the file. 2268 */ 2269 err = ud_page_fill(ip, pp, off, B_READ, &pgoff); 2270 if (err) { 2271 pvn_read_done(pp, B_ERROR); 2272 return (err); 2273 } 2274 2275 /* 2276 * XXX ??? ufs has io_len instead of pgoff below 2277 */ 2278 ip->i_nextrio = off + ((pgoff + PAGESIZE - 1) & PAGEMASK); 2279 2280 /* 2281 * If the file access is sequential, initiate read ahead 2282 * of the next cluster. 2283 */ 2284 if (seq && ip->i_nextrio < ip->i_size) { 2285 ud_getpage_ra(vp, off, seg, addr); 2286 } 2287 } 2288 2289 outmiss: 2290 pvn_plist_init(pp, pl, plsz, (offset_t)off, io_len, rw); 2291 return (err); 2292 } 2293 2294 /* ARGSUSED */ 2295 void 2296 ud_getpage_ra(struct vnode *vp, 2297 u_offset_t off, struct seg *seg, caddr_t addr) 2298 { 2299 page_t *pp; 2300 size_t io_len; 2301 struct ud_inode *ip = VTOI(vp); 2302 u_offset_t io_off = ip->i_nextrio, pgoff; 2303 caddr_t addr2 = addr + (io_off - off); 2304 daddr_t bn; 2305 int32_t contig = 0; 2306 2307 /* 2308 * Is this test needed? 2309 */ 2310 2311 if (addr2 >= seg->s_base + seg->s_size) { 2312 return; 2313 } 2314 2315 contig = 0; 2316 if (ud_bmap_read(ip, io_off, &bn, &contig) != 0 || bn == UDF_HOLE) { 2317 return; 2318 } 2319 2320 pp = pvn_read_kluster(vp, io_off, seg, addr2, 2321 &io_off, &io_len, io_off, PAGESIZE, 1); 2322 2323 /* 2324 * Some other thread has entered the page. 2325 * So no read head done here (ie we will have to and wait 2326 * for the read when needed). 2327 */ 2328 2329 if (pp == NULL) { 2330 return; 2331 } 2332 2333 (void) ud_page_fill(ip, pp, io_off, (B_READ|B_ASYNC), &pgoff); 2334 ip->i_nextrio = io_off + ((pgoff + PAGESIZE - 1) & PAGEMASK); 2335 } 2336 2337 int 2338 ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off, 2339 uint32_t bflgs, u_offset_t *pg_off) 2340 { 2341 daddr_t bn; 2342 struct buf *bp; 2343 caddr_t kaddr, caddr; 2344 int32_t error = 0, contig = 0, multi_io = 0; 2345 int32_t lbsize = ip->i_udf->udf_lbsize; 2346 int32_t lbmask = ip->i_udf->udf_lbmask; 2347 uint64_t isize; 2348 2349 isize = (ip->i_size + lbmask) & (~lbmask); 2350 if (ip->i_desc_type == ICB_FLAG_ONE_AD) { 2351 2352 /* 2353 * Embedded file read file_entry 2354 * from buffer cache and copy the required 2355 * portions 2356 */ 2357 bp = ud_bread(ip->i_dev, 2358 ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize); 2359 if ((bp->b_error == 0) && 2360 (bp->b_resid == 0)) { 2361 2362 caddr = bp->b_un.b_addr + ip->i_data_off; 2363 2364 /* 2365 * mapin to kvm 2366 */ 2367 kaddr = (caddr_t)ppmapin(pp, 2368 PROT_READ | PROT_WRITE, (caddr_t)-1); 2369 (void) kcopy(caddr, kaddr, ip->i_size); 2370 2371 /* 2372 * mapout of kvm 2373 */ 2374 ppmapout(kaddr); 2375 } 2376 brelse(bp); 2377 contig = ip->i_size; 2378 } else { 2379 2380 /* 2381 * Get the continuous size and block number 2382 * at offset "off" 2383 */ 2384 if (error = ud_bmap_read(ip, off, &bn, &contig)) 2385 goto out; 2386 contig = MIN(contig, PAGESIZE); 2387 contig = (contig + lbmask) & (~lbmask); 2388 2389 /* 2390 * Zero part of the page which we are not 2391 * going to read from the disk. 2392 */ 2393 2394 if (bn == UDF_HOLE) { 2395 2396 /* 2397 * This is a HOLE. Just zero out 2398 * the page 2399 */ 2400 if (((off + contig) == isize) || 2401 (contig == PAGESIZE)) { 2402 pagezero(pp->p_prev, 0, PAGESIZE); 2403 goto out; 2404 } 2405 } 2406 2407 if (contig < PAGESIZE) { 2408 uint64_t count; 2409 2410 count = isize - off; 2411 if (contig != count) { 2412 multi_io = 1; 2413 contig = (int32_t)(MIN(count, PAGESIZE)); 2414 } else { 2415 pagezero(pp->p_prev, contig, PAGESIZE - contig); 2416 } 2417 } 2418 2419 /* 2420 * Get a bp and initialize it 2421 */ 2422 bp = pageio_setup(pp, contig, ip->i_devvp, bflgs); 2423 ASSERT(bp != NULL); 2424 2425 bp->b_edev = ip->i_dev; 2426 bp->b_dev = cmpdev(ip->i_dev); 2427 bp->b_blkno = bn; 2428 bp->b_un.b_addr = 0; 2429 bp->b_file = ip->i_vnode; 2430 2431 /* 2432 * Start I/O 2433 */ 2434 if (multi_io == 0) { 2435 2436 /* 2437 * Single I/O is sufficient for this page 2438 */ 2439 (void) bdev_strategy(bp); 2440 } else { 2441 2442 /* 2443 * We need to do the I/O in 2444 * piece's 2445 */ 2446 error = ud_multi_strat(ip, pp, bp, off); 2447 if (error != 0) { 2448 goto out; 2449 } 2450 } 2451 if ((bflgs & B_ASYNC) == 0) { 2452 2453 /* 2454 * Wait for i/o to complete. 2455 */ 2456 2457 error = biowait(bp); 2458 pageio_done(bp); 2459 if (error) { 2460 goto out; 2461 } 2462 } 2463 } 2464 if ((off + contig) >= ip->i_size) { 2465 contig = ip->i_size - off; 2466 } 2467 2468 out: 2469 *pg_off = contig; 2470 return (error); 2471 } 2472 2473 int32_t 2474 ud_putpages(struct vnode *vp, offset_t off, 2475 size_t len, int32_t flags, struct cred *cr) 2476 { 2477 struct ud_inode *ip; 2478 page_t *pp; 2479 u_offset_t io_off; 2480 size_t io_len; 2481 u_offset_t eoff; 2482 int32_t err = 0; 2483 int32_t dolock; 2484 2485 ud_printf("ud_putpages\n"); 2486 2487 if (vp->v_count == 0) { 2488 cmn_err(CE_WARN, "ud_putpages: bad v_count"); 2489 return (EINVAL); 2490 } 2491 2492 ip = VTOI(vp); 2493 2494 /* 2495 * Acquire the readers/write inode lock before locking 2496 * any pages in this inode. 2497 * The inode lock is held during i/o. 2498 */ 2499 if (len == 0) { 2500 mutex_enter(&ip->i_tlock); 2501 ip->i_delayoff = ip->i_delaylen = 0; 2502 mutex_exit(&ip->i_tlock); 2503 } 2504 #ifdef __lock_lint 2505 rw_enter(&ip->i_contents, RW_READER); 2506 #else 2507 dolock = (rw_owner(&ip->i_contents) != curthread); 2508 if (dolock) { 2509 rw_enter(&ip->i_contents, RW_READER); 2510 } 2511 #endif 2512 2513 if (!vn_has_cached_data(vp)) { 2514 #ifdef __lock_lint 2515 rw_exit(&ip->i_contents); 2516 #else 2517 if (dolock) { 2518 rw_exit(&ip->i_contents); 2519 } 2520 #endif 2521 return (0); 2522 } 2523 2524 if (len == 0) { 2525 /* 2526 * Search the entire vp list for pages >= off. 2527 */ 2528 err = pvn_vplist_dirty(vp, (u_offset_t)off, ud_putapage, 2529 flags, cr); 2530 } else { 2531 /* 2532 * Loop over all offsets in the range looking for 2533 * pages to deal with. 2534 */ 2535 if ((eoff = blkroundup(ip->i_udf, ip->i_size)) != 0) { 2536 eoff = MIN(off + len, eoff); 2537 } else { 2538 eoff = off + len; 2539 } 2540 2541 for (io_off = off; io_off < eoff; io_off += io_len) { 2542 /* 2543 * If we are not invalidating, synchronously 2544 * freeing or writing pages, use the routine 2545 * page_lookup_nowait() to prevent reclaiming 2546 * them from the free list. 2547 */ 2548 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 2549 pp = page_lookup(vp, io_off, 2550 (flags & (B_INVAL | B_FREE)) ? 2551 SE_EXCL : SE_SHARED); 2552 } else { 2553 pp = page_lookup_nowait(vp, io_off, 2554 (flags & B_FREE) ? SE_EXCL : SE_SHARED); 2555 } 2556 2557 if (pp == NULL || pvn_getdirty(pp, flags) == 0) { 2558 io_len = PAGESIZE; 2559 } else { 2560 2561 err = ud_putapage(vp, pp, 2562 &io_off, &io_len, flags, cr); 2563 if (err != 0) { 2564 break; 2565 } 2566 /* 2567 * "io_off" and "io_len" are returned as 2568 * the range of pages we actually wrote. 2569 * This allows us to skip ahead more quickly 2570 * since several pages may've been dealt 2571 * with by this iteration of the loop. 2572 */ 2573 } 2574 } 2575 } 2576 if (err == 0 && off == 0 && (len == 0 || len >= ip->i_size)) { 2577 /* 2578 * We have just sync'ed back all the pages on 2579 * the inode, turn off the IMODTIME flag. 2580 */ 2581 mutex_enter(&ip->i_tlock); 2582 ip->i_flag &= ~IMODTIME; 2583 mutex_exit(&ip->i_tlock); 2584 } 2585 #ifdef __lock_lint 2586 rw_exit(&ip->i_contents); 2587 #else 2588 if (dolock) { 2589 rw_exit(&ip->i_contents); 2590 } 2591 #endif 2592 return (err); 2593 } 2594 2595 /* ARGSUSED */ 2596 int32_t 2597 ud_putapage(struct vnode *vp, 2598 page_t *pp, u_offset_t *offp, 2599 size_t *lenp, int32_t flags, struct cred *cr) 2600 { 2601 daddr_t bn; 2602 size_t io_len; 2603 struct ud_inode *ip; 2604 int32_t error = 0, contig, multi_io = 0; 2605 struct udf_vfs *udf_vfsp; 2606 u_offset_t off, io_off; 2607 caddr_t kaddr, caddr; 2608 struct buf *bp = NULL; 2609 int32_t lbmask; 2610 uint64_t isize; 2611 int32_t crc_len; 2612 struct file_entry *fe; 2613 2614 ud_printf("ud_putapage\n"); 2615 2616 ip = VTOI(vp); 2617 ASSERT(ip); 2618 ASSERT(RW_LOCK_HELD(&ip->i_contents)); 2619 lbmask = ip->i_udf->udf_lbmask; 2620 isize = (ip->i_size + lbmask) & (~lbmask); 2621 2622 udf_vfsp = ip->i_udf; 2623 ASSERT(udf_vfsp->udf_flags & UDF_FL_RW); 2624 2625 /* 2626 * If the modified time on the inode has not already been 2627 * set elsewhere (e.g. for write/setattr) we set the time now. 2628 * This gives us approximate modified times for mmap'ed files 2629 * which are modified via stores in the user address space. 2630 */ 2631 if (((ip->i_flag & IMODTIME) == 0) || (flags & B_FORCE)) { 2632 mutex_enter(&ip->i_tlock); 2633 ip->i_flag |= IUPD; 2634 ITIMES_NOLOCK(ip); 2635 mutex_exit(&ip->i_tlock); 2636 } 2637 2638 2639 /* 2640 * Align the request to a block boundry (for old file systems), 2641 * and go ask bmap() how contiguous things are for this file. 2642 */ 2643 off = pp->p_offset & ~(offset_t)lbmask; 2644 /* block align it */ 2645 2646 2647 if (ip->i_desc_type == ICB_FLAG_ONE_AD) { 2648 ASSERT(ip->i_size <= ip->i_max_emb); 2649 2650 pp = pvn_write_kluster(vp, pp, &io_off, 2651 &io_len, off, PAGESIZE, flags); 2652 if (io_len == 0) { 2653 io_len = PAGESIZE; 2654 } 2655 2656 bp = ud_bread(ip->i_dev, 2657 ip->i_icb_lbano << udf_vfsp->udf_l2d_shift, 2658 udf_vfsp->udf_lbsize); 2659 fe = (struct file_entry *)bp->b_un.b_addr; 2660 if ((bp->b_flags & B_ERROR) || 2661 (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY, 2662 ip->i_icb_block, 2663 1, udf_vfsp->udf_lbsize) != 0)) { 2664 if (pp != NULL) 2665 pvn_write_done(pp, B_ERROR | B_WRITE | flags); 2666 if (bp->b_flags & B_ERROR) { 2667 error = EIO; 2668 } else { 2669 error = EINVAL; 2670 } 2671 brelse(bp); 2672 return (error); 2673 } 2674 if ((bp->b_error == 0) && 2675 (bp->b_resid == 0)) { 2676 2677 caddr = bp->b_un.b_addr + ip->i_data_off; 2678 kaddr = (caddr_t)ppmapin(pp, 2679 PROT_READ | PROT_WRITE, (caddr_t)-1); 2680 (void) kcopy(kaddr, caddr, ip->i_size); 2681 ppmapout(kaddr); 2682 } 2683 crc_len = ((uint32_t)&((struct file_entry *)0)->fe_spec) + 2684 SWAP_32(fe->fe_len_ear); 2685 crc_len += ip->i_size; 2686 ud_make_tag(ip->i_udf, &fe->fe_tag, 2687 UD_FILE_ENTRY, ip->i_icb_block, crc_len); 2688 2689 bwrite(bp); 2690 2691 if (flags & B_ASYNC) { 2692 pvn_write_done(pp, flags); 2693 } 2694 contig = ip->i_size; 2695 } else { 2696 2697 if (error = ud_bmap_read(ip, off, &bn, &contig)) { 2698 goto out; 2699 } 2700 contig = MIN(contig, PAGESIZE); 2701 contig = (contig + lbmask) & (~lbmask); 2702 2703 if (contig < PAGESIZE) { 2704 uint64_t count; 2705 2706 count = isize - off; 2707 if (contig != count) { 2708 multi_io = 1; 2709 contig = (int32_t)(MIN(count, PAGESIZE)); 2710 } 2711 } 2712 2713 if ((off + contig) > isize) { 2714 contig = isize - off; 2715 } 2716 2717 if (contig > PAGESIZE) { 2718 if (contig & PAGEOFFSET) { 2719 contig &= PAGEMASK; 2720 } 2721 } 2722 2723 pp = pvn_write_kluster(vp, pp, &io_off, 2724 &io_len, off, contig, flags); 2725 if (io_len == 0) { 2726 io_len = PAGESIZE; 2727 } 2728 2729 bp = pageio_setup(pp, contig, ip->i_devvp, B_WRITE | flags); 2730 ASSERT(bp != NULL); 2731 2732 bp->b_edev = ip->i_dev; 2733 bp->b_dev = cmpdev(ip->i_dev); 2734 bp->b_blkno = bn; 2735 bp->b_un.b_addr = 0; 2736 bp->b_file = vp; 2737 bp->b_offset = (offset_t)off; 2738 2739 2740 /* 2741 * write throttle 2742 */ 2743 ASSERT(bp->b_iodone == NULL); 2744 bp->b_iodone = ud_iodone; 2745 mutex_enter(&ip->i_tlock); 2746 ip->i_writes += bp->b_bcount; 2747 mutex_exit(&ip->i_tlock); 2748 2749 if (multi_io == 0) { 2750 2751 (void) bdev_strategy(bp); 2752 } else { 2753 error = ud_multi_strat(ip, pp, bp, off); 2754 if (error != 0) { 2755 goto out; 2756 } 2757 } 2758 2759 if ((flags & B_ASYNC) == 0) { 2760 /* 2761 * Wait for i/o to complete. 2762 */ 2763 error = biowait(bp); 2764 pageio_done(bp); 2765 } 2766 } 2767 2768 if ((flags & B_ASYNC) == 0) { 2769 pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags); 2770 } 2771 2772 pp = NULL; 2773 2774 out: 2775 if (error != 0 && pp != NULL) { 2776 pvn_write_done(pp, B_ERROR | B_WRITE | flags); 2777 } 2778 2779 if (offp) { 2780 *offp = io_off; 2781 } 2782 if (lenp) { 2783 *lenp = io_len; 2784 } 2785 2786 return (error); 2787 } 2788 2789 2790 int32_t 2791 ud_iodone(struct buf *bp) 2792 { 2793 struct ud_inode *ip; 2794 2795 ASSERT((bp->b_pages->p_vnode != NULL) && !(bp->b_flags & B_READ)); 2796 2797 bp->b_iodone = NULL; 2798 2799 ip = VTOI(bp->b_pages->p_vnode); 2800 2801 mutex_enter(&ip->i_tlock); 2802 if (ip->i_writes >= ud_LW) { 2803 if ((ip->i_writes -= bp->b_bcount) <= ud_LW) { 2804 if (ud_WRITES) { 2805 cv_broadcast(&ip->i_wrcv); /* wake all up */ 2806 } 2807 } 2808 } else { 2809 ip->i_writes -= bp->b_bcount; 2810 } 2811 mutex_exit(&ip->i_tlock); 2812 iodone(bp); 2813 return (0); 2814 } 2815 2816 /* ARGSUSED3 */ 2817 int32_t 2818 ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr) 2819 { 2820 struct vnode *vp; 2821 struct udf_vfs *udf_vfsp; 2822 krw_t rwtype; 2823 caddr_t base; 2824 uint32_t flags; 2825 int32_t error, n, on, mapon, dofree; 2826 u_offset_t off; 2827 long oresid = uio->uio_resid; 2828 2829 ASSERT(RW_LOCK_HELD(&ip->i_contents)); 2830 if ((ip->i_type != VREG) && 2831 (ip->i_type != VDIR) && 2832 (ip->i_type != VLNK)) { 2833 return (EIO); 2834 } 2835 2836 if (uio->uio_loffset > MAXOFFSET_T) { 2837 return (0); 2838 } 2839 2840 if ((uio->uio_loffset < (offset_t)0) || 2841 ((uio->uio_loffset + uio->uio_resid) < 0)) { 2842 return (EINVAL); 2843 } 2844 if (uio->uio_resid == 0) { 2845 return (0); 2846 } 2847 2848 vp = ITOV(ip); 2849 udf_vfsp = ip->i_udf; 2850 mutex_enter(&ip->i_tlock); 2851 ip->i_flag |= IACC; 2852 mutex_exit(&ip->i_tlock); 2853 2854 rwtype = (rw_write_held(&ip->i_contents)?RW_WRITER:RW_READER); 2855 2856 do { 2857 offset_t diff; 2858 u_offset_t uoff = uio->uio_loffset; 2859 off = uoff & (offset_t)MAXBMASK; 2860 mapon = (int)(uoff & (offset_t)MAXBOFFSET); 2861 on = (int)blkoff(udf_vfsp, uoff); 2862 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid); 2863 2864 diff = ip->i_size - uoff; 2865 2866 if (diff <= (offset_t)0) { 2867 error = 0; 2868 goto out; 2869 } 2870 if (diff < (offset_t)n) { 2871 n = (int)diff; 2872 } 2873 dofree = ud_freebehind && 2874 ip->i_nextr == (off & PAGEMASK) && 2875 off > ud_smallfile; 2876 2877 #ifndef __lock_lint 2878 if (rwtype == RW_READER) { 2879 rw_exit(&ip->i_contents); 2880 } 2881 #endif 2882 2883 base = segmap_getmapflt(segkmap, vp, (off + mapon), 2884 (uint32_t)n, 1, S_READ); 2885 error = uiomove(base + mapon, (long)n, UIO_READ, uio); 2886 2887 flags = 0; 2888 if (!error) { 2889 /* 2890 * If read a whole block, or read to eof, 2891 * won't need this buffer again soon. 2892 */ 2893 if (n + on == MAXBSIZE && ud_freebehind && dofree && 2894 freemem < lotsfree + pages_before_pager) { 2895 flags = SM_FREE | SM_DONTNEED |SM_ASYNC; 2896 } 2897 /* 2898 * In POSIX SYNC (FSYNC and FDSYNC) read mode, 2899 * we want to make sure that the page which has 2900 * been read, is written on disk if it is dirty. 2901 * And corresponding indirect blocks should also 2902 * be flushed out. 2903 */ 2904 if ((ioflag & FRSYNC) && (ioflag & (FSYNC|FDSYNC))) { 2905 flags &= ~SM_ASYNC; 2906 flags |= SM_WRITE; 2907 } 2908 error = segmap_release(segkmap, base, flags); 2909 } else { 2910 (void) segmap_release(segkmap, base, flags); 2911 } 2912 2913 #ifndef __lock_lint 2914 if (rwtype == RW_READER) { 2915 rw_enter(&ip->i_contents, rwtype); 2916 } 2917 #endif 2918 } while (error == 0 && uio->uio_resid > 0 && n != 0); 2919 out: 2920 /* 2921 * Inode is updated according to this table if FRSYNC is set. 2922 * 2923 * FSYNC FDSYNC(posix.4) 2924 * -------------------------- 2925 * always IATTCHG|IBDWRITE 2926 */ 2927 if (ioflag & FRSYNC) { 2928 if ((ioflag & FSYNC) || 2929 ((ioflag & FDSYNC) && (ip->i_flag & (IATTCHG|IBDWRITE)))) { 2930 rw_exit(&ip->i_contents); 2931 rw_enter(&ip->i_contents, RW_WRITER); 2932 ud_iupdat(ip, 1); 2933 } 2934 } 2935 /* 2936 * If we've already done a partial read, terminate 2937 * the read but return no error. 2938 */ 2939 if (oresid != uio->uio_resid) { 2940 error = 0; 2941 } 2942 ITIMES(ip); 2943 2944 return (error); 2945 } 2946 2947 int32_t 2948 ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr) 2949 { 2950 caddr_t base; 2951 struct vnode *vp; 2952 struct udf_vfs *udf_vfsp; 2953 uint32_t flags; 2954 int32_t error = 0, iupdat_flag, n, on, mapon, i_size_changed = 0; 2955 int32_t pagecreate, newpage; 2956 uint64_t old_i_size; 2957 u_offset_t off; 2958 long start_resid = uio->uio_resid, premove_resid; 2959 rlim64_t limit = uio->uio_limit; 2960 2961 2962 ASSERT(RW_WRITE_HELD(&ip->i_contents)); 2963 if ((ip->i_type != VREG) && 2964 (ip->i_type != VDIR) && 2965 (ip->i_type != VLNK)) { 2966 return (EIO); 2967 } 2968 2969 if (uio->uio_loffset >= MAXOFFSET_T) { 2970 return (EFBIG); 2971 } 2972 /* 2973 * see udf_l_pathconf 2974 */ 2975 if (limit > (((uint64_t)1 << 40) - 1)) { 2976 limit = ((uint64_t)1 << 40) - 1; 2977 } 2978 if (uio->uio_loffset >= limit) { 2979 proc_t *p = ttoproc(curthread); 2980 2981 mutex_enter(&p->p_lock); 2982 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, 2983 p, RCA_UNSAFE_SIGINFO); 2984 mutex_exit(&p->p_lock); 2985 return (EFBIG); 2986 } 2987 if ((uio->uio_loffset < (offset_t)0) || 2988 ((uio->uio_loffset + uio->uio_resid) < 0)) { 2989 return (EINVAL); 2990 } 2991 if (uio->uio_resid == 0) { 2992 return (0); 2993 } 2994 2995 mutex_enter(&ip->i_tlock); 2996 ip->i_flag |= INOACC; 2997 2998 if (ioflag & (FSYNC | FDSYNC)) { 2999 ip->i_flag |= ISYNC; 3000 iupdat_flag = 1; 3001 } 3002 mutex_exit(&ip->i_tlock); 3003 3004 udf_vfsp = ip->i_udf; 3005 vp = ITOV(ip); 3006 3007 do { 3008 u_offset_t uoff = uio->uio_loffset; 3009 off = uoff & (offset_t)MAXBMASK; 3010 mapon = (int)(uoff & (offset_t)MAXBOFFSET); 3011 on = (int)blkoff(udf_vfsp, uoff); 3012 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid); 3013 3014 if (ip->i_type == VREG && uoff + n >= limit) { 3015 if (uoff >= limit) { 3016 error = EFBIG; 3017 goto out; 3018 } 3019 n = (int)(limit - (rlim64_t)uoff); 3020 } 3021 if (uoff + n > ip->i_size) { 3022 /* 3023 * We are extending the length of the file. 3024 * bmap is used so that we are sure that 3025 * if we need to allocate new blocks, that it 3026 * is done here before we up the file size. 3027 */ 3028 error = ud_bmap_write(ip, uoff, 3029 (int)(on + n), mapon == 0, cr); 3030 if (error) { 3031 break; 3032 } 3033 i_size_changed = 1; 3034 old_i_size = ip->i_size; 3035 ip->i_size = uoff + n; 3036 /* 3037 * If we are writing from the beginning of 3038 * the mapping, we can just create the 3039 * pages without having to read them. 3040 */ 3041 pagecreate = (mapon == 0); 3042 } else if (n == MAXBSIZE) { 3043 /* 3044 * Going to do a whole mappings worth, 3045 * so we can just create the pages w/o 3046 * having to read them in. But before 3047 * we do that, we need to make sure any 3048 * needed blocks are allocated first. 3049 */ 3050 error = ud_bmap_write(ip, uoff, 3051 (int)(on + n), 1, cr); 3052 if (error) { 3053 break; 3054 } 3055 pagecreate = 1; 3056 } else { 3057 pagecreate = 0; 3058 } 3059 3060 rw_exit(&ip->i_contents); 3061 3062 base = segmap_getmapflt(segkmap, vp, (off + mapon), 3063 (uint32_t)n, !pagecreate, S_WRITE); 3064 3065 /* 3066 * segmap_pagecreate() returns 1 if it calls 3067 * page_create_va() to allocate any pages. 3068 */ 3069 newpage = 0; 3070 if (pagecreate) { 3071 newpage = segmap_pagecreate(segkmap, base, 3072 (size_t)n, 0); 3073 } 3074 3075 premove_resid = uio->uio_resid; 3076 error = uiomove(base + mapon, (long)n, UIO_WRITE, uio); 3077 3078 if (pagecreate && 3079 uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) { 3080 /* 3081 * We created pages w/o initializing them completely, 3082 * thus we need to zero the part that wasn't set up. 3083 * This happens on most EOF write cases and if 3084 * we had some sort of error during the uiomove. 3085 */ 3086 int nzero, nmoved; 3087 3088 nmoved = (int)(uio->uio_loffset - (off + mapon)); 3089 ASSERT(nmoved >= 0 && nmoved <= n); 3090 nzero = roundup(on + n, PAGESIZE) - nmoved; 3091 ASSERT(nzero > 0 && mapon + nmoved + nzero <= MAXBSIZE); 3092 (void) kzero(base + mapon + nmoved, (uint32_t)nzero); 3093 } 3094 3095 /* 3096 * Unlock the pages allocated by page_create_va() 3097 * in segmap_pagecreate() 3098 */ 3099 if (newpage) { 3100 segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE); 3101 } 3102 3103 if (error) { 3104 /* 3105 * If we failed on a write, we may have already 3106 * allocated file blocks as well as pages. It's 3107 * hard to undo the block allocation, but we must 3108 * be sure to invalidate any pages that may have 3109 * been allocated. 3110 */ 3111 (void) segmap_release(segkmap, base, SM_INVAL); 3112 } else { 3113 flags = 0; 3114 /* 3115 * Force write back for synchronous write cases. 3116 */ 3117 if ((ioflag & (FSYNC|FDSYNC)) || ip->i_type == VDIR) { 3118 /* 3119 * If the sticky bit is set but the 3120 * execute bit is not set, we do a 3121 * synchronous write back and free 3122 * the page when done. We set up swap 3123 * files to be handled this way to 3124 * prevent servers from keeping around 3125 * the client's swap pages too long. 3126 * XXX - there ought to be a better way. 3127 */ 3128 if (IS_SWAPVP(vp)) { 3129 flags = SM_WRITE | SM_FREE | 3130 SM_DONTNEED; 3131 iupdat_flag = 0; 3132 } else { 3133 flags = SM_WRITE; 3134 } 3135 } else if (((mapon + n) == MAXBSIZE) || 3136 IS_SWAPVP(vp)) { 3137 /* 3138 * Have written a whole block. 3139 * Start an asynchronous write and 3140 * mark the buffer to indicate that 3141 * it won't be needed again soon. 3142 */ 3143 flags = SM_WRITE |SM_ASYNC | SM_DONTNEED; 3144 } 3145 error = segmap_release(segkmap, base, flags); 3146 3147 /* 3148 * If the operation failed and is synchronous, 3149 * then we need to unwind what uiomove() last 3150 * did so we can potentially return an error to 3151 * the caller. If this write operation was 3152 * done in two pieces and the first succeeded, 3153 * then we won't return an error for the second 3154 * piece that failed. However, we only want to 3155 * return a resid value that reflects what was 3156 * really done. 3157 * 3158 * Failures for non-synchronous operations can 3159 * be ignored since the page subsystem will 3160 * retry the operation until it succeeds or the 3161 * file system is unmounted. 3162 */ 3163 if (error) { 3164 if ((ioflag & (FSYNC | FDSYNC)) || 3165 ip->i_type == VDIR) { 3166 uio->uio_resid = premove_resid; 3167 } else { 3168 error = 0; 3169 } 3170 } 3171 } 3172 3173 /* 3174 * Re-acquire contents lock. 3175 */ 3176 rw_enter(&ip->i_contents, RW_WRITER); 3177 /* 3178 * If the uiomove() failed or if a synchronous 3179 * page push failed, fix up i_size. 3180 */ 3181 if (error) { 3182 if (i_size_changed) { 3183 /* 3184 * The uiomove failed, and we 3185 * allocated blocks,so get rid 3186 * of them. 3187 */ 3188 (void) ud_itrunc(ip, old_i_size, 0, cr); 3189 } 3190 } else { 3191 /* 3192 * XXX - Can this be out of the loop? 3193 */ 3194 ip->i_flag |= IUPD | ICHG; 3195 if (i_size_changed) { 3196 ip->i_flag |= IATTCHG; 3197 } 3198 if ((ip->i_perm & (IEXEC | (IEXEC >> 5) | 3199 (IEXEC >> 10))) != 0 && 3200 (ip->i_char & (ISUID | ISGID)) != 0 && 3201 secpolicy_vnode_setid_retain(cr, 3202 (ip->i_char & ISUID) != 0 && ip->i_uid == 0) != 0) { 3203 /* 3204 * Clear Set-UID & Set-GID bits on 3205 * successful write if not privileged 3206 * and at least one of the execute bits 3207 * is set. If we always clear Set-GID, 3208 * mandatory file and record locking is 3209 * unuseable. 3210 */ 3211 ip->i_char &= ~(ISUID | ISGID); 3212 } 3213 } 3214 } while (error == 0 && uio->uio_resid > 0 && n != 0); 3215 3216 out: 3217 /* 3218 * Inode is updated according to this table - 3219 * 3220 * FSYNC FDSYNC(posix.4) 3221 * -------------------------- 3222 * always@ IATTCHG|IBDWRITE 3223 * 3224 * @ - If we are doing synchronous write the only time we should 3225 * not be sync'ing the ip here is if we have the stickyhack 3226 * activated, the file is marked with the sticky bit and 3227 * no exec bit, the file length has not been changed and 3228 * no new blocks have been allocated during this write. 3229 */ 3230 if ((ip->i_flag & ISYNC) != 0) { 3231 /* 3232 * we have eliminated nosync 3233 */ 3234 if ((ip->i_flag & (IATTCHG|IBDWRITE)) || 3235 ((ioflag & FSYNC) && iupdat_flag)) { 3236 ud_iupdat(ip, 1); 3237 } 3238 } 3239 3240 /* 3241 * If we've already done a partial-write, terminate 3242 * the write but return no error. 3243 */ 3244 if (start_resid != uio->uio_resid) { 3245 error = 0; 3246 } 3247 ip->i_flag &= ~(INOACC | ISYNC); 3248 ITIMES_NOLOCK(ip); 3249 3250 return (error); 3251 } 3252 3253 int32_t 3254 ud_multi_strat(struct ud_inode *ip, 3255 page_t *pp, struct buf *bp, u_offset_t start) 3256 { 3257 daddr_t bn; 3258 int32_t error = 0, io_count, contig, alloc_sz, i; 3259 uint32_t io_off; 3260 mio_master_t *mm = NULL; 3261 mio_slave_t *ms = NULL; 3262 struct buf *rbp; 3263 3264 ASSERT(!(start & PAGEOFFSET)); 3265 3266 /* 3267 * Figure out how many buffers to allocate 3268 */ 3269 io_count = 0; 3270 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) { 3271 contig = 0; 3272 if (error = ud_bmap_read(ip, (u_offset_t)(start + io_off), 3273 &bn, &contig)) { 3274 goto end; 3275 } 3276 if (contig == 0) { 3277 goto end; 3278 } 3279 contig = MIN(contig, PAGESIZE - io_off); 3280 if (bn != UDF_HOLE) { 3281 io_count ++; 3282 } else { 3283 /* 3284 * HOLE 3285 */ 3286 if (bp->b_flags & B_READ) { 3287 3288 /* 3289 * This is a hole and is read 3290 * it should be filled with 0's 3291 */ 3292 pagezero(pp, io_off, contig); 3293 } 3294 } 3295 } 3296 3297 3298 if (io_count != 0) { 3299 3300 /* 3301 * Allocate memory for all the 3302 * required number of buffers 3303 */ 3304 alloc_sz = sizeof (mio_master_t) + 3305 (sizeof (mio_slave_t) * io_count); 3306 mm = (mio_master_t *)kmem_zalloc(alloc_sz, KM_SLEEP); 3307 if (mm == NULL) { 3308 error = ENOMEM; 3309 goto end; 3310 } 3311 3312 /* 3313 * initialize master 3314 */ 3315 mutex_init(&mm->mm_mutex, NULL, MUTEX_DEFAULT, NULL); 3316 mm->mm_size = alloc_sz; 3317 mm->mm_bp = bp; 3318 mm->mm_resid = 0; 3319 mm->mm_error = 0; 3320 mm->mm_index = master_index++; 3321 3322 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t)); 3323 3324 /* 3325 * Initialize buffers 3326 */ 3327 io_count = 0; 3328 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) { 3329 contig = 0; 3330 if (error = ud_bmap_read(ip, 3331 (u_offset_t)(start + io_off), 3332 &bn, &contig)) { 3333 goto end; 3334 } 3335 ASSERT(contig); 3336 if ((io_off + contig) > bp->b_bcount) { 3337 contig = bp->b_bcount - io_off; 3338 } 3339 if (bn != UDF_HOLE) { 3340 /* 3341 * Clone the buffer 3342 * and prepare to start I/O 3343 */ 3344 ms->ms_ptr = mm; 3345 bioinit(&ms->ms_buf); 3346 rbp = bioclone(bp, io_off, (size_t)contig, 3347 bp->b_edev, bn, ud_slave_done, 3348 &ms->ms_buf, KM_NOSLEEP); 3349 ASSERT(rbp == &ms->ms_buf); 3350 mm->mm_resid += contig; 3351 io_count++; 3352 ms ++; 3353 } 3354 } 3355 3356 /* 3357 * Start I/O's 3358 */ 3359 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t)); 3360 for (i = 0; i < io_count; i++) { 3361 (void) bdev_strategy(&ms->ms_buf); 3362 ms ++; 3363 } 3364 } 3365 3366 end: 3367 if (error != 0) { 3368 bp->b_flags |= B_ERROR; 3369 bp->b_error = error; 3370 if (mm != NULL) { 3371 mutex_destroy(&mm->mm_mutex); 3372 kmem_free(mm, mm->mm_size); 3373 } 3374 } 3375 return (error); 3376 } 3377 3378 int32_t 3379 ud_slave_done(struct buf *bp) 3380 { 3381 mio_master_t *mm; 3382 int32_t resid; 3383 3384 ASSERT(SEMA_HELD(&bp->b_sem)); 3385 ASSERT((bp->b_flags & B_DONE) == 0); 3386 3387 mm = ((mio_slave_t *)bp)->ms_ptr; 3388 3389 /* 3390 * Propagate error and byte count info from slave struct to 3391 * the master struct 3392 */ 3393 mutex_enter(&mm->mm_mutex); 3394 if (bp->b_flags & B_ERROR) { 3395 3396 /* 3397 * If multiple slave buffers get 3398 * error we forget the old errors 3399 * this is ok because we any way 3400 * cannot return multiple errors 3401 */ 3402 mm->mm_error = bp->b_error; 3403 } 3404 mm->mm_resid -= bp->b_bcount; 3405 resid = mm->mm_resid; 3406 mutex_exit(&mm->mm_mutex); 3407 3408 /* 3409 * free up the resources allocated to cloned buffers. 3410 */ 3411 bp_mapout(bp); 3412 biofini(bp); 3413 3414 if (resid == 0) { 3415 3416 /* 3417 * This is the last I/O operation 3418 * clean up and return the original buffer 3419 */ 3420 if (mm->mm_error) { 3421 mm->mm_bp->b_flags |= B_ERROR; 3422 mm->mm_bp->b_error = mm->mm_error; 3423 } 3424 biodone(mm->mm_bp); 3425 mutex_destroy(&mm->mm_mutex); 3426 kmem_free(mm, mm->mm_size); 3427 } 3428 return (0); 3429 } 3430