1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/t_lock.h> 30 #include <sys/param.h> 31 #include <sys/time.h> 32 #include <sys/systm.h> 33 #include <sys/sysmacros.h> 34 #include <sys/resource.h> 35 #include <sys/signal.h> 36 #include <sys/cred.h> 37 #include <sys/user.h> 38 #include <sys/buf.h> 39 #include <sys/vfs.h> 40 #include <sys/vfs_opreg.h> 41 #include <sys/stat.h> 42 #include <sys/vnode.h> 43 #include <sys/mode.h> 44 #include <sys/proc.h> 45 #include <sys/disp.h> 46 #include <sys/file.h> 47 #include <sys/fcntl.h> 48 #include <sys/flock.h> 49 #include <sys/kmem.h> 50 #include <sys/uio.h> 51 #include <sys/dnlc.h> 52 #include <sys/conf.h> 53 #include <sys/errno.h> 54 #include <sys/mman.h> 55 #include <sys/fbuf.h> 56 #include <sys/pathname.h> 57 #include <sys/debug.h> 58 #include <sys/vmsystm.h> 59 #include <sys/cmn_err.h> 60 #include <sys/dirent.h> 61 #include <sys/errno.h> 62 #include <sys/modctl.h> 63 #include <sys/statvfs.h> 64 #include <sys/mount.h> 65 #include <sys/sunddi.h> 66 #include <sys/bootconf.h> 67 #include <sys/policy.h> 68 69 #include <vm/hat.h> 70 #include <vm/page.h> 71 #include <vm/pvn.h> 72 #include <vm/as.h> 73 #include <vm/seg.h> 74 #include <vm/seg_map.h> 75 #include <vm/seg_kmem.h> 76 #include <vm/seg_vn.h> 77 #include <vm/rm.h> 78 #include <vm/page.h> 79 #include <sys/swap.h> 80 81 #include <fs/fs_subr.h> 82 83 #include <sys/fs/udf_volume.h> 84 #include <sys/fs/udf_inode.h> 85 86 static int32_t udf_open(struct vnode **, 87 int32_t, struct cred *); 88 static int32_t udf_close(struct vnode *, 89 int32_t, int32_t, offset_t, struct cred *); 90 static int32_t udf_read(struct vnode *, 91 struct uio *, int32_t, struct cred *, struct caller_context *); 92 static int32_t udf_write(struct vnode *, 93 struct uio *, int32_t, struct cred *, struct caller_context *); 94 static int32_t udf_ioctl(struct vnode *, 95 int32_t, intptr_t, int32_t, struct cred *, int32_t *); 96 static int32_t udf_getattr(struct vnode *, 97 struct vattr *, int32_t, struct cred *); 98 static int32_t udf_setattr(struct vnode *, 99 struct vattr *, int32_t, struct cred *, caller_context_t *); 100 static int32_t udf_access(struct vnode *, 101 int32_t, int32_t, struct cred *); 102 static int32_t udf_lookup(struct vnode *, 103 char *, struct vnode **, struct pathname *, 104 int32_t, struct vnode *, struct cred *); 105 static int32_t udf_create(struct vnode *, 106 char *, struct vattr *, enum vcexcl, 107 int32_t, struct vnode **, struct cred *, int32_t); 108 static int32_t udf_remove(struct vnode *, 109 char *, struct cred *); 110 static int32_t udf_link(struct vnode *, 111 struct vnode *, char *, struct cred *); 112 static int32_t udf_rename(struct vnode *, 113 char *, struct vnode *, char *, struct cred *); 114 static int32_t udf_mkdir(struct vnode *, 115 char *, struct vattr *, struct vnode **, struct cred *); 116 static int32_t udf_rmdir(struct vnode *, 117 char *, struct vnode *, struct cred *); 118 static int32_t udf_readdir(struct vnode *, 119 struct uio *, struct cred *, int32_t *); 120 static int32_t udf_symlink(struct vnode *, 121 char *, struct vattr *, char *, struct cred *); 122 static int32_t udf_readlink(struct vnode *, 123 struct uio *, struct cred *); 124 static int32_t udf_fsync(struct vnode *, 125 int32_t, struct cred *); 126 static void udf_inactive(struct vnode *, 127 struct cred *); 128 static int32_t udf_fid(struct vnode *, struct fid *); 129 static int udf_rwlock(struct vnode *, int32_t, caller_context_t *); 130 static void udf_rwunlock(struct vnode *, int32_t, caller_context_t *); 131 static int32_t udf_seek(struct vnode *, offset_t, offset_t *); 132 static int32_t udf_frlock(struct vnode *, int32_t, 133 struct flock64 *, int32_t, offset_t, struct flk_callback *, cred_t *); 134 static int32_t udf_space(struct vnode *, int32_t, 135 struct flock64 *, int32_t, offset_t, cred_t *, caller_context_t *); 136 static int32_t udf_getpage(struct vnode *, offset_t, 137 size_t, uint32_t *, struct page **, size_t, 138 struct seg *, caddr_t, enum seg_rw, struct cred *); 139 static int32_t udf_putpage(struct vnode *, offset_t, 140 size_t, int32_t, struct cred *); 141 static int32_t udf_map(struct vnode *, offset_t, struct as *, 142 caddr_t *, size_t, uint8_t, uint8_t, uint32_t, struct cred *); 143 static int32_t udf_addmap(struct vnode *, offset_t, struct as *, 144 caddr_t, size_t, uint8_t, uint8_t, uint32_t, struct cred *); 145 static int32_t udf_delmap(struct vnode *, offset_t, struct as *, 146 caddr_t, size_t, uint32_t, uint32_t, uint32_t, struct cred *); 147 static int32_t udf_l_pathconf(struct vnode *, int32_t, 148 ulong_t *, struct cred *); 149 static int32_t udf_pageio(struct vnode *, struct page *, 150 u_offset_t, size_t, int32_t, struct cred *); 151 152 int32_t ud_getpage_miss(struct vnode *, u_offset_t, 153 size_t, struct seg *, caddr_t, page_t *pl[], 154 size_t, enum seg_rw, int32_t); 155 void ud_getpage_ra(struct vnode *, u_offset_t, struct seg *, caddr_t); 156 int32_t ud_putpages(struct vnode *, offset_t, size_t, int32_t, struct cred *); 157 int32_t ud_page_fill(struct ud_inode *, page_t *, 158 u_offset_t, uint32_t, u_offset_t *); 159 int32_t ud_iodone(struct buf *); 160 int32_t ud_rdip(struct ud_inode *, struct uio *, int32_t, cred_t *); 161 int32_t ud_wrip(struct ud_inode *, struct uio *, int32_t, cred_t *); 162 int32_t ud_multi_strat(struct ud_inode *, page_t *, struct buf *, u_offset_t); 163 int32_t ud_slave_done(struct buf *); 164 165 /* 166 * Structures to control multiple IO operations to get or put pages 167 * that are backed by discontiguous blocks. The master struct is 168 * a dummy that holds the original bp from pageio_setup. The 169 * slave struct holds the working bp's to do the actual IO. Once 170 * all the slave IOs complete. The master is processed as if a single 171 * IO op has completed. 172 */ 173 uint32_t master_index = 0; 174 typedef struct mio_master { 175 kmutex_t mm_mutex; /* protect the fields below */ 176 int32_t mm_size; 177 buf_t *mm_bp; /* original bp */ 178 int32_t mm_resid; /* bytes remaining to transfer */ 179 int32_t mm_error; /* accumulated error from slaves */ 180 int32_t mm_index; /* XXX debugging */ 181 } mio_master_t; 182 183 typedef struct mio_slave { 184 buf_t ms_buf; /* working buffer for this IO chunk */ 185 mio_master_t *ms_ptr; /* pointer to master */ 186 } mio_slave_t; 187 188 struct vnodeops *udf_vnodeops; 189 190 const fs_operation_def_t udf_vnodeops_template[] = { 191 VOPNAME_OPEN, { .vop_open = udf_open }, 192 VOPNAME_CLOSE, { .vop_close = udf_close }, 193 VOPNAME_READ, { .vop_read = udf_read }, 194 VOPNAME_WRITE, { .vop_write = udf_write }, 195 VOPNAME_IOCTL, { .vop_ioctl = udf_ioctl }, 196 VOPNAME_GETATTR, { .vop_getattr = udf_getattr }, 197 VOPNAME_SETATTR, { .vop_setattr = udf_setattr }, 198 VOPNAME_ACCESS, { .vop_access = udf_access }, 199 VOPNAME_LOOKUP, { .vop_lookup = udf_lookup }, 200 VOPNAME_CREATE, { .vop_create = udf_create }, 201 VOPNAME_REMOVE, { .vop_remove = udf_remove }, 202 VOPNAME_LINK, { .vop_link = udf_link }, 203 VOPNAME_RENAME, { .vop_rename = udf_rename }, 204 VOPNAME_MKDIR, { .vop_mkdir = udf_mkdir }, 205 VOPNAME_RMDIR, { .vop_rmdir = udf_rmdir }, 206 VOPNAME_READDIR, { .vop_readdir = udf_readdir }, 207 VOPNAME_SYMLINK, { .vop_symlink = udf_symlink }, 208 VOPNAME_READLINK, { .vop_readlink = udf_readlink }, 209 VOPNAME_FSYNC, { .vop_fsync = udf_fsync }, 210 VOPNAME_INACTIVE, { .vop_inactive = udf_inactive }, 211 VOPNAME_FID, { .vop_fid = udf_fid }, 212 VOPNAME_RWLOCK, { .vop_rwlock = udf_rwlock }, 213 VOPNAME_RWUNLOCK, { .vop_rwunlock = udf_rwunlock }, 214 VOPNAME_SEEK, { .vop_seek = udf_seek }, 215 VOPNAME_FRLOCK, { .vop_frlock = udf_frlock }, 216 VOPNAME_SPACE, { .vop_space = udf_space }, 217 VOPNAME_GETPAGE, { .vop_getpage = udf_getpage }, 218 VOPNAME_PUTPAGE, { .vop_putpage = udf_putpage }, 219 VOPNAME_MAP, { .vop_map = udf_map }, 220 VOPNAME_ADDMAP, { .vop_addmap = udf_addmap }, 221 VOPNAME_DELMAP, { .vop_delmap = udf_delmap }, 222 VOPNAME_PATHCONF, { .vop_pathconf = udf_l_pathconf }, 223 VOPNAME_PAGEIO, { .vop_pageio = udf_pageio }, 224 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 225 NULL, NULL 226 }; 227 228 /* ARGSUSED */ 229 static int32_t 230 udf_open(struct vnode **vpp, int32_t flag, struct cred *cr) 231 { 232 ud_printf("udf_open\n"); 233 234 return (0); 235 } 236 237 /* ARGSUSED */ 238 static int32_t 239 udf_close(struct vnode *vp, int32_t flag, 240 int32_t count, offset_t offset, struct cred *cr) 241 { 242 struct ud_inode *ip = VTOI(vp); 243 244 ud_printf("udf_close\n"); 245 246 ITIMES(ip); 247 248 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 249 cleanshares(vp, ttoproc(curthread)->p_pid); 250 251 /* 252 * Push partially filled cluster at last close. 253 * ``last close'' is approximated because the dnlc 254 * may have a hold on the vnode. 255 */ 256 if (vp->v_count <= 2 && vp->v_type != VBAD) { 257 struct ud_inode *ip = VTOI(vp); 258 if (ip->i_delaylen) { 259 (void) ud_putpages(vp, ip->i_delayoff, ip->i_delaylen, 260 B_ASYNC | B_FREE, cr); 261 ip->i_delaylen = 0; 262 } 263 } 264 265 return (0); 266 } 267 268 static int32_t 269 udf_read(struct vnode *vp, struct uio *uiop, 270 int32_t ioflag, struct cred *cr, struct caller_context *ct) 271 { 272 struct ud_inode *ip = VTOI(vp); 273 int32_t error; 274 275 ud_printf("udf_read\n"); 276 277 #ifdef __lock_lint 278 rw_enter(&ip->i_rwlock, RW_READER); 279 #endif 280 281 ASSERT(RW_READ_HELD(&ip->i_rwlock)); 282 283 if (MANDLOCK(vp, ip->i_char)) { 284 /* 285 * udf_getattr ends up being called by chklock 286 */ 287 error = chklock(vp, FREAD, uiop->uio_loffset, 288 uiop->uio_resid, uiop->uio_fmode, ct); 289 if (error) { 290 goto end; 291 } 292 } 293 294 rw_enter(&ip->i_contents, RW_READER); 295 error = ud_rdip(ip, uiop, ioflag, cr); 296 rw_exit(&ip->i_contents); 297 298 end: 299 #ifdef __lock_lint 300 rw_exit(&ip->i_rwlock); 301 #endif 302 303 return (error); 304 } 305 306 307 int32_t ud_WRITES = 1; 308 int32_t ud_HW = 96 * 1024; 309 int32_t ud_LW = 64 * 1024; 310 int32_t ud_throttles = 0; 311 312 static int32_t 313 udf_write(struct vnode *vp, struct uio *uiop, 314 int32_t ioflag, struct cred *cr, struct caller_context *ct) 315 { 316 struct ud_inode *ip = VTOI(vp); 317 int32_t error = 0; 318 319 ud_printf("udf_write\n"); 320 321 #ifdef __lock_lint 322 rw_enter(&ip->i_rwlock, RW_WRITER); 323 #endif 324 325 ASSERT(RW_WRITE_HELD(&ip->i_rwlock)); 326 327 if (MANDLOCK(vp, ip->i_char)) { 328 /* 329 * ud_getattr ends up being called by chklock 330 */ 331 error = chklock(vp, FWRITE, uiop->uio_loffset, 332 uiop->uio_resid, uiop->uio_fmode, ct); 333 if (error) { 334 goto end; 335 } 336 } 337 /* 338 * Throttle writes. 339 */ 340 mutex_enter(&ip->i_tlock); 341 if (ud_WRITES && (ip->i_writes > ud_HW)) { 342 while (ip->i_writes > ud_HW) { 343 ud_throttles++; 344 cv_wait(&ip->i_wrcv, &ip->i_tlock); 345 } 346 } 347 mutex_exit(&ip->i_tlock); 348 349 /* 350 * Write to the file 351 */ 352 rw_enter(&ip->i_contents, RW_WRITER); 353 if ((ioflag & FAPPEND) != 0 && (ip->i_type == VREG)) { 354 /* 355 * In append mode start at end of file. 356 */ 357 uiop->uio_loffset = ip->i_size; 358 } 359 error = ud_wrip(ip, uiop, ioflag, cr); 360 rw_exit(&ip->i_contents); 361 362 end: 363 #ifdef __lock_lint 364 rw_exit(&ip->i_rwlock); 365 #endif 366 367 return (error); 368 } 369 370 /* ARGSUSED */ 371 static int32_t 372 udf_ioctl(struct vnode *vp, int32_t cmd, intptr_t arg, 373 int32_t flag, struct cred *cr, int32_t *rvalp) 374 { 375 return (ENOTTY); 376 } 377 378 /* ARGSUSED */ 379 static int32_t 380 udf_getattr(struct vnode *vp, 381 struct vattr *vap, int32_t flags, struct cred *cr) 382 { 383 struct ud_inode *ip = VTOI(vp); 384 385 ud_printf("udf_getattr\n"); 386 387 if (vap->va_mask == AT_SIZE) { 388 /* 389 * for performance, if only the size is requested don't bother 390 * with anything else. 391 */ 392 vap->va_size = ip->i_size; 393 return (0); 394 } 395 396 rw_enter(&ip->i_contents, RW_READER); 397 398 vap->va_type = vp->v_type; 399 vap->va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char; 400 401 vap->va_uid = ip->i_uid; 402 vap->va_gid = ip->i_gid; 403 vap->va_fsid = ip->i_dev; 404 vap->va_nodeid = ip->i_icb_lbano; 405 vap->va_nlink = ip->i_nlink; 406 vap->va_size = ip->i_size; 407 vap->va_seq = ip->i_seq; 408 if (vp->v_type == VCHR || vp->v_type == VBLK) { 409 vap->va_rdev = ip->i_rdev; 410 } else { 411 vap->va_rdev = 0; 412 } 413 414 mutex_enter(&ip->i_tlock); 415 ITIMES_NOLOCK(ip); /* mark correct time in inode */ 416 vap->va_atime.tv_sec = (time_t)ip->i_atime.tv_sec; 417 vap->va_atime.tv_nsec = ip->i_atime.tv_nsec; 418 vap->va_mtime.tv_sec = (time_t)ip->i_mtime.tv_sec; 419 vap->va_mtime.tv_nsec = ip->i_mtime.tv_nsec; 420 vap->va_ctime.tv_sec = (time_t)ip->i_ctime.tv_sec; 421 vap->va_ctime.tv_nsec = ip->i_ctime.tv_nsec; 422 mutex_exit(&ip->i_tlock); 423 424 switch (ip->i_type) { 425 case VBLK: 426 vap->va_blksize = MAXBSIZE; 427 break; 428 case VCHR: 429 vap->va_blksize = MAXBSIZE; 430 break; 431 default: 432 vap->va_blksize = ip->i_udf->udf_lbsize; 433 break; 434 } 435 vap->va_nblocks = ip->i_lbr << ip->i_udf->udf_l2d_shift; 436 437 rw_exit(&ip->i_contents); 438 439 return (0); 440 } 441 442 static int 443 ud_iaccess_vmode(void *ip, int mode, struct cred *cr) 444 { 445 return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr)); 446 } 447 448 /*ARGSUSED4*/ 449 static int32_t 450 udf_setattr( 451 struct vnode *vp, 452 struct vattr *vap, 453 int32_t flags, 454 struct cred *cr, 455 caller_context_t *ct) 456 { 457 int32_t error = 0; 458 uint32_t mask = vap->va_mask; 459 struct ud_inode *ip; 460 timestruc_t now; 461 struct vattr ovap; 462 463 ud_printf("udf_setattr\n"); 464 465 ip = VTOI(vp); 466 467 /* 468 * not updates allowed to 4096 files 469 */ 470 if (ip->i_astrat == STRAT_TYPE4096) { 471 return (EINVAL); 472 } 473 474 /* 475 * Cannot set these attributes 476 */ 477 if (mask & AT_NOSET) { 478 return (EINVAL); 479 } 480 481 rw_enter(&ip->i_rwlock, RW_WRITER); 482 rw_enter(&ip->i_contents, RW_WRITER); 483 484 ovap.va_uid = ip->i_uid; 485 ovap.va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char; 486 error = secpolicy_vnode_setattr(cr, vp, vap, &ovap, flags, 487 ud_iaccess_vmode, ip); 488 if (error) 489 goto update_inode; 490 491 mask = vap->va_mask; 492 /* 493 * Change file access modes. 494 */ 495 if (mask & AT_MODE) { 496 ip->i_perm = VA2UD_PERM(vap->va_mode); 497 ip->i_char = vap->va_mode & (VSUID | VSGID | VSVTX); 498 mutex_enter(&ip->i_tlock); 499 ip->i_flag |= ICHG; 500 mutex_exit(&ip->i_tlock); 501 } 502 if (mask & (AT_UID|AT_GID)) { 503 if (mask & AT_UID) { 504 ip->i_uid = vap->va_uid; 505 } 506 if (mask & AT_GID) { 507 ip->i_gid = vap->va_gid; 508 } 509 mutex_enter(&ip->i_tlock); 510 ip->i_flag |= ICHG; 511 mutex_exit(&ip->i_tlock); 512 } 513 /* 514 * Truncate file. Must have write permission and not be a directory. 515 */ 516 if (mask & AT_SIZE) { 517 if (vp->v_type == VDIR) { 518 error = EISDIR; 519 goto update_inode; 520 } 521 if (error = ud_iaccess(ip, IWRITE, cr)) { 522 goto update_inode; 523 } 524 if (vap->va_size > MAXOFFSET_T) { 525 error = EFBIG; 526 goto update_inode; 527 } 528 if (error = ud_itrunc(ip, vap->va_size, 0, cr)) { 529 goto update_inode; 530 } 531 } 532 /* 533 * Change file access or modified times. 534 */ 535 if (mask & (AT_ATIME|AT_MTIME)) { 536 mutex_enter(&ip->i_tlock); 537 if (mask & AT_ATIME) { 538 ip->i_atime.tv_sec = vap->va_atime.tv_sec; 539 ip->i_atime.tv_nsec = vap->va_atime.tv_nsec; 540 ip->i_flag &= ~IACC; 541 } 542 if (mask & AT_MTIME) { 543 ip->i_mtime.tv_sec = vap->va_mtime.tv_sec; 544 ip->i_mtime.tv_nsec = vap->va_mtime.tv_nsec; 545 gethrestime(&now); 546 ip->i_ctime.tv_sec = now.tv_sec; 547 ip->i_ctime.tv_nsec = now.tv_nsec; 548 ip->i_flag &= ~(IUPD|ICHG); 549 ip->i_flag |= IMODTIME; 550 } 551 ip->i_flag |= IMOD; 552 mutex_exit(&ip->i_tlock); 553 } 554 555 update_inode: 556 if (curthread->t_flag & T_DONTPEND) { 557 ud_iupdat(ip, 1); 558 } else { 559 ITIMES_NOLOCK(ip); 560 } 561 rw_exit(&ip->i_contents); 562 rw_exit(&ip->i_rwlock); 563 564 return (error); 565 } 566 567 /* ARGSUSED */ 568 static int32_t 569 udf_access(struct vnode *vp, 570 int32_t mode, int32_t flags, struct cred *cr) 571 { 572 struct ud_inode *ip = VTOI(vp); 573 int32_t error; 574 575 ud_printf("udf_access\n"); 576 577 if (ip->i_udf == NULL) { 578 return (EIO); 579 } 580 581 error = ud_iaccess(ip, UD_UPERM2DPERM(mode), cr); 582 583 return (error); 584 } 585 586 int32_t udfs_stickyhack = 1; 587 588 /* ARGSUSED */ 589 static int32_t 590 udf_lookup(struct vnode *dvp, 591 char *nm, struct vnode **vpp, struct pathname *pnp, 592 int32_t flags, struct vnode *rdir, struct cred *cr) 593 { 594 int32_t error; 595 struct vnode *vp; 596 struct ud_inode *ip, *xip; 597 598 ud_printf("udf_lookup\n"); 599 /* 600 * Null component name is a synonym for directory being searched. 601 */ 602 if (*nm == '\0') { 603 VN_HOLD(dvp); 604 *vpp = dvp; 605 error = 0; 606 goto out; 607 } 608 609 /* 610 * Fast path: Check the directory name lookup cache. 611 */ 612 ip = VTOI(dvp); 613 if (vp = dnlc_lookup(dvp, nm)) { 614 /* 615 * Check accessibility of directory. 616 */ 617 if ((error = ud_iaccess(ip, IEXEC, cr)) != 0) { 618 VN_RELE(vp); 619 } 620 xip = VTOI(vp); 621 } else { 622 error = ud_dirlook(ip, nm, &xip, cr, 1); 623 ITIMES(ip); 624 } 625 626 if (error == 0) { 627 ip = xip; 628 *vpp = ITOV(ip); 629 if ((ip->i_type != VDIR) && 630 (ip->i_char & ISVTX) && 631 ((ip->i_perm & IEXEC) == 0) && 632 udfs_stickyhack) { 633 mutex_enter(&(*vpp)->v_lock); 634 (*vpp)->v_flag |= VISSWAP; 635 mutex_exit(&(*vpp)->v_lock); 636 } 637 ITIMES(ip); 638 /* 639 * If vnode is a device return special vnode instead. 640 */ 641 if (IS_DEVVP(*vpp)) { 642 struct vnode *newvp; 643 newvp = specvp(*vpp, (*vpp)->v_rdev, 644 (*vpp)->v_type, cr); 645 VN_RELE(*vpp); 646 if (newvp == NULL) { 647 error = ENOSYS; 648 } else { 649 *vpp = newvp; 650 } 651 } 652 } 653 out: 654 return (error); 655 } 656 657 /* ARGSUSED */ 658 static int32_t 659 udf_create(struct vnode *dvp, 660 char *name, struct vattr *vap, enum vcexcl excl, 661 int32_t mode, struct vnode **vpp, struct cred *cr, int32_t flag) 662 { 663 int32_t error; 664 struct ud_inode *ip = VTOI(dvp), *xip; 665 666 ud_printf("udf_create\n"); 667 668 if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0) 669 vap->va_mode &= ~VSVTX; 670 671 if (*name == '\0') { 672 /* 673 * Null component name refers to the directory itself. 674 */ 675 VN_HOLD(dvp); 676 ITIMES(ip); 677 error = EEXIST; 678 } else { 679 xip = NULL; 680 rw_enter(&ip->i_rwlock, RW_WRITER); 681 error = ud_direnter(ip, name, DE_CREATE, 682 (struct ud_inode *)0, (struct ud_inode *)0, 683 vap, &xip, cr); 684 rw_exit(&ip->i_rwlock); 685 ITIMES(ip); 686 ip = xip; 687 } 688 #ifdef __lock_lint 689 rw_enter(&ip->i_contents, RW_WRITER); 690 #else 691 if (ip != NULL) { 692 rw_enter(&ip->i_contents, RW_WRITER); 693 } 694 #endif 695 696 /* 697 * If the file already exists and this is a non-exclusive create, 698 * check permissions and allow access for non-directories. 699 * Read-only create of an existing directory is also allowed. 700 * We fail an exclusive create of anything which already exists. 701 */ 702 if (error == EEXIST) { 703 if (excl == NONEXCL) { 704 if ((ip->i_type == VDIR) && (mode & VWRITE)) { 705 error = EISDIR; 706 } else if (mode) { 707 error = ud_iaccess(ip, 708 UD_UPERM2DPERM(mode), cr); 709 } else { 710 error = 0; 711 } 712 } 713 if (error) { 714 rw_exit(&ip->i_contents); 715 VN_RELE(ITOV(ip)); 716 goto out; 717 } else if ((ip->i_type == VREG) && 718 (vap->va_mask & AT_SIZE) && vap->va_size == 0) { 719 /* 720 * Truncate regular files, if requested by caller. 721 * Grab i_rwlock to make sure no one else is 722 * currently writing to the file (we promised 723 * bmap we would do this). 724 * Must get the locks in the correct order. 725 */ 726 if (ip->i_size == 0) { 727 ip->i_flag |= ICHG | IUPD; 728 } else { 729 rw_exit(&ip->i_contents); 730 rw_enter(&ip->i_rwlock, RW_WRITER); 731 rw_enter(&ip->i_contents, RW_WRITER); 732 (void) ud_itrunc(ip, 0, 0, cr); 733 rw_exit(&ip->i_rwlock); 734 } 735 vnevent_create(ITOV(ip)); 736 } 737 } 738 739 if (error == 0) { 740 *vpp = ITOV(ip); 741 ITIMES(ip); 742 } 743 #ifdef __lock_lint 744 rw_exit(&ip->i_contents); 745 #else 746 if (ip != NULL) { 747 rw_exit(&ip->i_contents); 748 } 749 #endif 750 if (error) { 751 goto out; 752 } 753 754 /* 755 * If vnode is a device return special vnode instead. 756 */ 757 if (!error && IS_DEVVP(*vpp)) { 758 struct vnode *newvp; 759 760 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 761 VN_RELE(*vpp); 762 if (newvp == NULL) { 763 error = ENOSYS; 764 goto out; 765 } 766 *vpp = newvp; 767 } 768 out: 769 return (error); 770 } 771 772 static int32_t 773 udf_remove(struct vnode *vp, char *nm, struct cred *cr) 774 { 775 int32_t error; 776 struct ud_inode *ip = VTOI(vp); 777 778 ud_printf("udf_remove\n"); 779 780 rw_enter(&ip->i_rwlock, RW_WRITER); 781 error = ud_dirremove(ip, nm, 782 (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr); 783 rw_exit(&ip->i_rwlock); 784 ITIMES(ip); 785 786 return (error); 787 } 788 789 static int32_t 790 udf_link(struct vnode *tdvp, 791 struct vnode *svp, char *tnm, struct cred *cr) 792 { 793 int32_t error; 794 struct vnode *realvp; 795 struct ud_inode *sip; 796 struct ud_inode *tdp; 797 798 ud_printf("udf_link\n"); 799 if (VOP_REALVP(svp, &realvp) == 0) { 800 svp = realvp; 801 } 802 803 /* 804 * Do not allow links to directories 805 */ 806 if (svp->v_type == VDIR) { 807 return (EPERM); 808 } 809 810 sip = VTOI(svp); 811 812 if (sip->i_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0) 813 return (EPERM); 814 815 tdp = VTOI(tdvp); 816 817 rw_enter(&tdp->i_rwlock, RW_WRITER); 818 error = ud_direnter(tdp, tnm, DE_LINK, (struct ud_inode *)0, 819 sip, (struct vattr *)0, (struct ud_inode **)0, cr); 820 rw_exit(&tdp->i_rwlock); 821 ITIMES(sip); 822 ITIMES(tdp); 823 824 if (error == 0) { 825 vnevent_link(svp); 826 } 827 828 return (error); 829 } 830 831 /* ARGSUSED */ 832 static int32_t 833 udf_rename(struct vnode *sdvp, 834 char *snm, struct vnode *tdvp, 835 char *tnm, struct cred *cr) 836 { 837 int32_t error = 0; 838 struct udf_vfs *udf_vfsp; 839 struct ud_inode *sip; /* source inode */ 840 struct ud_inode *sdp, *tdp; /* source and target parent inode */ 841 struct vnode *realvp; 842 843 ud_printf("udf_rename\n"); 844 845 if (VOP_REALVP(tdvp, &realvp) == 0) { 846 tdvp = realvp; 847 } 848 849 sdp = VTOI(sdvp); 850 tdp = VTOI(tdvp); 851 852 udf_vfsp = sdp->i_udf; 853 854 mutex_enter(&udf_vfsp->udf_rename_lck); 855 /* 856 * Look up inode of file we're supposed to rename. 857 */ 858 if (error = ud_dirlook(sdp, snm, &sip, cr, 0)) { 859 mutex_exit(&udf_vfsp->udf_rename_lck); 860 return (error); 861 } 862 /* 863 * be sure this is not a directory with another file system mounted 864 * over it. If it is just give up the locks, and return with 865 * EBUSY 866 */ 867 if (vn_mountedvfs(ITOV(sip)) != NULL) { 868 error = EBUSY; 869 goto errout; 870 } 871 /* 872 * Make sure we can delete the source entry. This requires 873 * write permission on the containing directory. If that 874 * directory is "sticky" it further requires (except for 875 * privileged users) that the user own the directory or the 876 * source entry, or else have permission to write the source 877 * entry. 878 */ 879 rw_enter(&sdp->i_contents, RW_READER); 880 rw_enter(&sip->i_contents, RW_READER); 881 if ((error = ud_iaccess(sdp, IWRITE, cr)) != 0 || 882 (error = ud_sticky_remove_access(sdp, sip, cr)) != 0) { 883 rw_exit(&sip->i_contents); 884 rw_exit(&sdp->i_contents); 885 ITIMES(sip); 886 goto errout; 887 } 888 889 /* 890 * Check for renaming '.' or '..' or alias of '.' 891 */ 892 if ((strcmp(snm, ".") == 0) || 893 (strcmp(snm, "..") == 0) || 894 (sdp == sip)) { 895 error = EINVAL; 896 rw_exit(&sip->i_contents); 897 rw_exit(&sdp->i_contents); 898 goto errout; 899 } 900 rw_exit(&sip->i_contents); 901 rw_exit(&sdp->i_contents); 902 903 904 /* 905 * Link source to the target. 906 */ 907 rw_enter(&tdp->i_rwlock, RW_WRITER); 908 if (error = ud_direnter(tdp, tnm, DE_RENAME, sdp, sip, 909 (struct vattr *)0, (struct ud_inode **)0, cr)) { 910 /* 911 * ESAME isn't really an error; it indicates that the 912 * operation should not be done because the source and target 913 * are the same file, but that no error should be reported. 914 */ 915 if (error == ESAME) { 916 error = 0; 917 } 918 rw_exit(&tdp->i_rwlock); 919 goto errout; 920 } 921 vnevent_rename_src(ITOV(sip), sdvp, snm); 922 rw_exit(&tdp->i_rwlock); 923 924 rw_enter(&sdp->i_rwlock, RW_WRITER); 925 /* 926 * Unlink the source. 927 * Remove the source entry. ud_dirremove() checks that the entry 928 * still reflects sip, and returns an error if it doesn't. 929 * If the entry has changed just forget about it. Release 930 * the source inode. 931 */ 932 if ((error = ud_dirremove(sdp, snm, sip, (struct vnode *)0, 933 DR_RENAME, cr)) == ENOENT) { 934 error = 0; 935 } 936 rw_exit(&sdp->i_rwlock); 937 errout: 938 ITIMES(sdp); 939 ITIMES(tdp); 940 VN_RELE(ITOV(sip)); 941 mutex_exit(&udf_vfsp->udf_rename_lck); 942 943 return (error); 944 } 945 946 static int32_t 947 udf_mkdir(struct vnode *dvp, 948 char *dirname, struct vattr *vap, 949 struct vnode **vpp, struct cred *cr) 950 { 951 int32_t error; 952 struct ud_inode *ip; 953 struct ud_inode *xip; 954 955 ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); 956 957 ud_printf("udf_mkdir\n"); 958 959 ip = VTOI(dvp); 960 rw_enter(&ip->i_rwlock, RW_WRITER); 961 error = ud_direnter(ip, dirname, DE_MKDIR, 962 (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr); 963 rw_exit(&ip->i_rwlock); 964 ITIMES(ip); 965 if (error == 0) { 966 ip = xip; 967 *vpp = ITOV(ip); 968 ITIMES(ip); 969 } else if (error == EEXIST) { 970 ITIMES(xip); 971 VN_RELE(ITOV(xip)); 972 } 973 974 return (error); 975 } 976 977 static int32_t 978 udf_rmdir(struct vnode *vp, 979 char *nm, struct vnode *cdir, struct cred *cr) 980 { 981 int32_t error; 982 struct ud_inode *ip = VTOI(vp); 983 984 ud_printf("udf_rmdir\n"); 985 986 rw_enter(&ip->i_rwlock, RW_WRITER); 987 error = ud_dirremove(ip, nm, (struct ud_inode *)0, cdir, DR_RMDIR, cr); 988 rw_exit(&ip->i_rwlock); 989 ITIMES(ip); 990 991 return (error); 992 } 993 994 /* ARGSUSED */ 995 static int32_t 996 udf_readdir(struct vnode *vp, 997 struct uio *uiop, struct cred *cr, int32_t *eofp) 998 { 999 struct ud_inode *ip; 1000 struct dirent64 *nd; 1001 struct udf_vfs *udf_vfsp; 1002 int32_t error = 0, len, outcount = 0; 1003 uint32_t dirsiz, offset; 1004 uint32_t bufsize, ndlen, dummy; 1005 caddr_t outbuf; 1006 caddr_t outb, end_outb; 1007 struct iovec *iovp; 1008 1009 uint8_t *dname; 1010 int32_t length; 1011 1012 uint8_t *buf = NULL; 1013 1014 struct fbuf *fbp = NULL; 1015 struct file_id *fid; 1016 uint8_t *name; 1017 1018 1019 ud_printf("udf_readdir\n"); 1020 1021 ip = VTOI(vp); 1022 udf_vfsp = ip->i_udf; 1023 1024 dirsiz = ip->i_size; 1025 if ((uiop->uio_offset >= dirsiz) || 1026 (ip->i_nlink <= 0)) { 1027 if (eofp) { 1028 *eofp = 1; 1029 } 1030 return (0); 1031 } 1032 1033 offset = uiop->uio_offset; 1034 iovp = uiop->uio_iov; 1035 bufsize = iovp->iov_len; 1036 1037 outb = outbuf = (char *)kmem_alloc((uint32_t)bufsize, KM_SLEEP); 1038 end_outb = outb + bufsize; 1039 nd = (struct dirent64 *)outbuf; 1040 1041 dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP); 1042 buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP); 1043 1044 if (offset == 0) { 1045 len = DIRENT64_RECLEN(1); 1046 if (((caddr_t)nd + len) >= end_outb) { 1047 error = EINVAL; 1048 goto end; 1049 } 1050 nd->d_ino = ip->i_icb_lbano; 1051 nd->d_reclen = (uint16_t)len; 1052 nd->d_off = 0x10; 1053 nd->d_name[0] = '.'; 1054 bzero(&nd->d_name[1], DIRENT64_NAMELEN(len) - 1); 1055 nd = (struct dirent64 *)((char *)nd + nd->d_reclen); 1056 outcount++; 1057 } else if (offset == 0x10) { 1058 offset = 0; 1059 } 1060 1061 while (offset < dirsiz) { 1062 error = ud_get_next_fid(ip, &fbp, 1063 offset, &fid, &name, buf); 1064 if (error != 0) { 1065 break; 1066 } 1067 1068 if ((fid->fid_flags & FID_DELETED) == 0) { 1069 if (fid->fid_flags & FID_PARENT) { 1070 1071 len = DIRENT64_RECLEN(2); 1072 if (((caddr_t)nd + len) >= end_outb) { 1073 error = EINVAL; 1074 break; 1075 } 1076 1077 nd->d_ino = ip->i_icb_lbano; 1078 nd->d_reclen = (uint16_t)len; 1079 nd->d_off = offset + FID_LEN(fid); 1080 nd->d_name[0] = '.'; 1081 nd->d_name[1] = '.'; 1082 bzero(&nd->d_name[2], 1083 DIRENT64_NAMELEN(len) - 2); 1084 nd = (struct dirent64 *) 1085 ((char *)nd + nd->d_reclen); 1086 } else { 1087 if ((error = ud_uncompress(fid->fid_idlen, 1088 &length, name, dname)) != 0) { 1089 break; 1090 } 1091 if (length == 0) { 1092 offset += FID_LEN(fid); 1093 continue; 1094 } 1095 len = DIRENT64_RECLEN(length); 1096 if (((caddr_t)nd + len) >= end_outb) { 1097 if (!outcount) { 1098 error = EINVAL; 1099 } 1100 break; 1101 } 1102 (void) strncpy(nd->d_name, 1103 (caddr_t)dname, length); 1104 bzero(&nd->d_name[length], 1105 DIRENT64_NAMELEN(len) - length); 1106 nd->d_ino = ud_xlate_to_daddr(udf_vfsp, 1107 SWAP_16(fid->fid_icb.lad_ext_prn), 1108 SWAP_32(fid->fid_icb.lad_ext_loc), 1, 1109 &dummy); 1110 nd->d_reclen = (uint16_t)len; 1111 nd->d_off = offset + FID_LEN(fid); 1112 nd = (struct dirent64 *) 1113 ((char *)nd + nd->d_reclen); 1114 } 1115 outcount++; 1116 } 1117 1118 offset += FID_LEN(fid); 1119 } 1120 1121 end: 1122 if (fbp != NULL) { 1123 fbrelse(fbp, S_OTHER); 1124 } 1125 ndlen = ((char *)nd - outbuf); 1126 /* 1127 * In case of error do not call uiomove. 1128 * Return the error to the caller. 1129 */ 1130 if ((error == 0) && (ndlen != 0)) { 1131 error = uiomove(outbuf, (long)ndlen, UIO_READ, uiop); 1132 uiop->uio_offset = offset; 1133 } 1134 kmem_free((caddr_t)buf, udf_vfsp->udf_lbsize); 1135 kmem_free((caddr_t)dname, 1024); 1136 kmem_free(outbuf, (uint32_t)bufsize); 1137 if (eofp && error == 0) { 1138 *eofp = (uiop->uio_offset >= dirsiz); 1139 } 1140 return (error); 1141 } 1142 1143 /* ARGSUSED */ 1144 static int32_t 1145 udf_symlink(struct vnode *dvp, 1146 char *linkname, struct vattr *vap, 1147 char *target, struct cred *cr) 1148 { 1149 int32_t error = 0, outlen; 1150 uint32_t ioflag = 0; 1151 struct ud_inode *ip, *dip = VTOI(dvp); 1152 1153 struct path_comp *pc; 1154 int8_t *dname = NULL, *uname = NULL, *sp; 1155 1156 ud_printf("udf_symlink\n"); 1157 1158 ip = (struct ud_inode *)0; 1159 vap->va_type = VLNK; 1160 vap->va_rdev = 0; 1161 1162 rw_enter(&dip->i_rwlock, RW_WRITER); 1163 error = ud_direnter(dip, linkname, DE_CREATE, 1164 (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr); 1165 rw_exit(&dip->i_rwlock); 1166 if (error == 0) { 1167 dname = kmem_zalloc(1024, KM_SLEEP); 1168 uname = kmem_zalloc(PAGESIZE, KM_SLEEP); 1169 1170 pc = (struct path_comp *)uname; 1171 /* 1172 * If the first character in target is "/" 1173 * then skip it and create entry for it 1174 */ 1175 if (*target == '/') { 1176 pc->pc_type = 2; 1177 pc->pc_len = 0; 1178 pc = (struct path_comp *)(((char *)pc) + 4); 1179 while (*target == '/') { 1180 target++; 1181 } 1182 } 1183 1184 while (*target != NULL) { 1185 sp = target; 1186 while ((*target != '/') && (*target != '\0')) { 1187 target ++; 1188 } 1189 /* 1190 * We got the next component of the 1191 * path name. Create path_comp of 1192 * appropriate type 1193 */ 1194 if (((target - sp) == 1) && (*sp == '.')) { 1195 /* 1196 * Dot entry. 1197 */ 1198 pc->pc_type = 4; 1199 pc = (struct path_comp *)(((char *)pc) + 4); 1200 } else if (((target - sp) == 2) && 1201 (*sp == '.') && ((*(sp + 1)) == '.')) { 1202 /* 1203 * DotDot entry. 1204 */ 1205 pc->pc_type = 3; 1206 pc = (struct path_comp *)(((char *)pc) + 4); 1207 } else { 1208 /* 1209 * convert the user given name 1210 * into appropriate form to be put 1211 * on the media 1212 */ 1213 outlen = 1024; /* set to size of dname */ 1214 if (error = ud_compress(target - sp, &outlen, 1215 (uint8_t *)sp, (uint8_t *)dname)) { 1216 break; 1217 } 1218 pc->pc_type = 5; 1219 /* LINTED */ 1220 pc->pc_len = outlen; 1221 dname[outlen] = '\0'; 1222 (void) strcpy((char *)pc->pc_id, dname); 1223 pc = (struct path_comp *) 1224 (((char *)pc) + 4 + outlen); 1225 } 1226 while (*target == '/') { 1227 target++; 1228 } 1229 if (*target == NULL) { 1230 break; 1231 } 1232 } 1233 1234 rw_enter(&ip->i_contents, RW_WRITER); 1235 if (error == 0) { 1236 ioflag = FWRITE; 1237 if (curthread->t_flag & T_DONTPEND) { 1238 ioflag |= FDSYNC; 1239 } 1240 error = ud_rdwri(UIO_WRITE, ioflag, ip, 1241 uname, ((int8_t *)pc) - uname, 1242 (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr); 1243 } 1244 if (error) { 1245 ud_idrop(ip); 1246 rw_exit(&ip->i_contents); 1247 rw_enter(&dip->i_rwlock, RW_WRITER); 1248 (void) ud_dirremove(dip, linkname, (struct ud_inode *)0, 1249 (struct vnode *)0, DR_REMOVE, cr); 1250 rw_exit(&dip->i_rwlock); 1251 goto update_inode; 1252 } 1253 rw_exit(&ip->i_contents); 1254 } 1255 1256 if ((error == 0) || (error == EEXIST)) { 1257 VN_RELE(ITOV(ip)); 1258 } 1259 1260 update_inode: 1261 ITIMES(VTOI(dvp)); 1262 if (uname != NULL) { 1263 kmem_free(uname, PAGESIZE); 1264 } 1265 if (dname != NULL) { 1266 kmem_free(dname, 1024); 1267 } 1268 1269 return (error); 1270 } 1271 1272 /* ARGSUSED */ 1273 static int32_t 1274 udf_readlink(struct vnode *vp, 1275 struct uio *uiop, struct cred *cr) 1276 { 1277 int32_t error = 0, off, id_len, size, len; 1278 int8_t *dname = NULL, *uname = NULL; 1279 struct ud_inode *ip; 1280 struct fbuf *fbp = NULL; 1281 struct path_comp *pc; 1282 1283 ud_printf("udf_readlink\n"); 1284 1285 if (vp->v_type != VLNK) { 1286 return (EINVAL); 1287 } 1288 1289 ip = VTOI(vp); 1290 size = ip->i_size; 1291 if (size > PAGESIZE) { 1292 return (EIO); 1293 } 1294 1295 if (size == 0) { 1296 return (0); 1297 } 1298 1299 dname = kmem_zalloc(1024, KM_SLEEP); 1300 uname = kmem_zalloc(PAGESIZE, KM_SLEEP); 1301 1302 rw_enter(&ip->i_contents, RW_READER); 1303 1304 if ((error = fbread(vp, 0, size, S_READ, &fbp)) != 0) { 1305 goto end; 1306 } 1307 1308 off = 0; 1309 1310 while (off < size) { 1311 pc = (struct path_comp *)(fbp->fb_addr + off); 1312 switch (pc->pc_type) { 1313 case 1 : 1314 (void) strcpy(uname, ip->i_udf->udf_fsmnt); 1315 (void) strcat(uname, "/"); 1316 break; 1317 case 2 : 1318 if (pc->pc_len != 0) { 1319 goto end; 1320 } 1321 uname[0] = '/'; 1322 uname[1] = '\0'; 1323 break; 1324 case 3 : 1325 (void) strcat(uname, "../"); 1326 break; 1327 case 4 : 1328 (void) strcat(uname, "./"); 1329 break; 1330 case 5 : 1331 if ((error = ud_uncompress(pc->pc_len, &id_len, 1332 pc->pc_id, (uint8_t *)dname)) != 0) { 1333 break; 1334 } 1335 dname[id_len] = '\0'; 1336 (void) strcat(uname, dname); 1337 (void) strcat(uname, "/"); 1338 break; 1339 default : 1340 error = EINVAL; 1341 goto end; 1342 } 1343 off += 4 + pc->pc_len; 1344 } 1345 len = strlen(uname) - 1; 1346 if (uname[len] == '/') { 1347 if (len == 0) { 1348 /* 1349 * special case link to / 1350 */ 1351 len = 1; 1352 } else { 1353 uname[len] = '\0'; 1354 } 1355 } 1356 1357 error = uiomove(uname, len, UIO_READ, uiop); 1358 1359 ITIMES(ip); 1360 1361 end: 1362 if (fbp != NULL) { 1363 fbrelse(fbp, S_OTHER); 1364 } 1365 rw_exit(&ip->i_contents); 1366 if (uname != NULL) { 1367 kmem_free(uname, PAGESIZE); 1368 } 1369 if (dname != NULL) { 1370 kmem_free(dname, 1024); 1371 } 1372 return (error); 1373 } 1374 1375 /* ARGSUSED */ 1376 static int32_t 1377 udf_fsync(struct vnode *vp, 1378 int32_t syncflag, struct cred *cr) 1379 { 1380 int32_t error = 0; 1381 struct ud_inode *ip = VTOI(vp); 1382 1383 ud_printf("udf_fsync\n"); 1384 1385 rw_enter(&ip->i_contents, RW_WRITER); 1386 if (!(IS_SWAPVP(vp))) { 1387 error = ud_syncip(ip, 0, I_SYNC); /* Do synchronous writes */ 1388 } 1389 if (error == 0) { 1390 error = ud_sync_indir(ip); 1391 } 1392 ITIMES(ip); /* XXX: is this necessary ??? */ 1393 rw_exit(&ip->i_contents); 1394 1395 return (error); 1396 } 1397 1398 /* ARGSUSED */ 1399 static void 1400 udf_inactive(struct vnode *vp, struct cred *cr) 1401 { 1402 ud_printf("udf_iinactive\n"); 1403 1404 ud_iinactive(VTOI(vp), cr); 1405 } 1406 1407 static int32_t 1408 udf_fid(struct vnode *vp, struct fid *fidp) 1409 { 1410 struct udf_fid *udfidp; 1411 struct ud_inode *ip = VTOI(vp); 1412 1413 ud_printf("udf_fid\n"); 1414 1415 if (fidp->fid_len < (sizeof (struct udf_fid) - sizeof (uint16_t))) { 1416 fidp->fid_len = sizeof (struct udf_fid) - sizeof (uint16_t); 1417 return (ENOSPC); 1418 } 1419 1420 udfidp = (struct udf_fid *)fidp; 1421 bzero((char *)udfidp, sizeof (struct udf_fid)); 1422 rw_enter(&ip->i_contents, RW_READER); 1423 udfidp->udfid_len = sizeof (struct udf_fid) - sizeof (uint16_t); 1424 udfidp->udfid_uinq_lo = ip->i_uniqid & 0xffffffff; 1425 udfidp->udfid_prn = ip->i_icb_prn; 1426 udfidp->udfid_icb_lbn = ip->i_icb_block; 1427 rw_exit(&ip->i_contents); 1428 1429 return (0); 1430 } 1431 1432 /* ARGSUSED2 */ 1433 static int 1434 udf_rwlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp) 1435 { 1436 struct ud_inode *ip = VTOI(vp); 1437 1438 ud_printf("udf_rwlock\n"); 1439 1440 if (write_lock) { 1441 rw_enter(&ip->i_rwlock, RW_WRITER); 1442 } else { 1443 rw_enter(&ip->i_rwlock, RW_READER); 1444 } 1445 #ifdef __lock_lint 1446 rw_exit(&ip->i_rwlock); 1447 #endif 1448 return (write_lock); 1449 } 1450 1451 /* ARGSUSED */ 1452 static void 1453 udf_rwunlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp) 1454 { 1455 struct ud_inode *ip = VTOI(vp); 1456 1457 ud_printf("udf_rwunlock\n"); 1458 1459 #ifdef __lock_lint 1460 rw_enter(&ip->i_rwlock, RW_WRITER); 1461 #endif 1462 1463 rw_exit(&ip->i_rwlock); 1464 1465 } 1466 1467 /* ARGSUSED */ 1468 static int32_t 1469 udf_seek(struct vnode *vp, offset_t ooff, offset_t *noffp) 1470 { 1471 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 1472 } 1473 1474 static int32_t 1475 udf_frlock(struct vnode *vp, int32_t cmd, struct flock64 *bfp, 1476 int32_t flag, offset_t offset, struct flk_callback *flk_cbp, 1477 cred_t *cr) 1478 { 1479 struct ud_inode *ip = VTOI(vp); 1480 1481 ud_printf("udf_frlock\n"); 1482 1483 /* 1484 * If file is being mapped, disallow frlock. 1485 * XXX I am not holding tlock while checking i_mapcnt because the 1486 * current locking strategy drops all locks before calling fs_frlock. 1487 * So, mapcnt could change before we enter fs_frlock making is 1488 * meaningless to have held tlock in the first place. 1489 */ 1490 if ((ip->i_mapcnt > 0) && 1491 (MANDLOCK(vp, ip->i_char))) { 1492 return (EAGAIN); 1493 } 1494 1495 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr)); 1496 } 1497 1498 /*ARGSUSED6*/ 1499 static int32_t 1500 udf_space( 1501 struct vnode *vp, 1502 int32_t cmd, 1503 struct flock64 *bfp, 1504 int32_t flag, 1505 offset_t offset, 1506 cred_t *cr, 1507 caller_context_t *ct) 1508 { 1509 int32_t error = 0; 1510 1511 ud_printf("udf_space\n"); 1512 1513 if (cmd != F_FREESP) { 1514 error = EINVAL; 1515 } else if ((error = convoff(vp, bfp, 0, offset)) == 0) { 1516 error = ud_freesp(vp, bfp, flag, cr); 1517 } 1518 1519 return (error); 1520 } 1521 1522 /* ARGSUSED */ 1523 static int32_t 1524 udf_getpage(struct vnode *vp, offset_t off, 1525 size_t len, uint32_t *protp, struct page **plarr, 1526 size_t plsz, struct seg *seg, caddr_t addr, 1527 enum seg_rw rw, struct cred *cr) 1528 { 1529 struct ud_inode *ip = VTOI(vp); 1530 int32_t error, has_holes, beyond_eof, seqmode, dolock; 1531 int32_t pgsize = PAGESIZE; 1532 struct udf_vfs *udf_vfsp = ip->i_udf; 1533 page_t **pl; 1534 u_offset_t pgoff, eoff, uoff; 1535 krw_t rwtype; 1536 caddr_t pgaddr; 1537 1538 ud_printf("udf_getpage\n"); 1539 1540 uoff = (u_offset_t)off; /* type conversion */ 1541 if (protp) { 1542 *protp = PROT_ALL; 1543 } 1544 if (vp->v_flag & VNOMAP) { 1545 return (ENOSYS); 1546 } 1547 seqmode = ip->i_nextr == uoff && rw != S_CREATE; 1548 1549 rwtype = RW_READER; 1550 dolock = (rw_owner(&ip->i_contents) != curthread); 1551 retrylock: 1552 #ifdef __lock_lint 1553 rw_enter(&ip->i_contents, rwtype); 1554 #else 1555 if (dolock) { 1556 rw_enter(&ip->i_contents, rwtype); 1557 } 1558 #endif 1559 1560 /* 1561 * We may be getting called as a side effect of a bmap using 1562 * fbread() when the blocks might be being allocated and the 1563 * size has not yet been up'ed. In this case we want to be 1564 * able to return zero pages if we get back UDF_HOLE from 1565 * calling bmap for a non write case here. We also might have 1566 * to read some frags from the disk into a page if we are 1567 * extending the number of frags for a given lbn in bmap(). 1568 */ 1569 beyond_eof = uoff + len > ip->i_size + PAGEOFFSET; 1570 if (beyond_eof && seg != segkmap) { 1571 #ifdef __lock_lint 1572 rw_exit(&ip->i_contents); 1573 #else 1574 if (dolock) { 1575 rw_exit(&ip->i_contents); 1576 } 1577 #endif 1578 return (EFAULT); 1579 } 1580 1581 /* 1582 * Must hold i_contents lock throughout the call to pvn_getpages 1583 * since locked pages are returned from each call to ud_getapage. 1584 * Must *not* return locked pages and then try for contents lock 1585 * due to lock ordering requirements (inode > page) 1586 */ 1587 1588 has_holes = ud_bmap_has_holes(ip); 1589 1590 if ((rw == S_WRITE || rw == S_CREATE) && (has_holes || beyond_eof)) { 1591 int32_t blk_size, count; 1592 u_offset_t offset; 1593 1594 /* 1595 * We must acquire the RW_WRITER lock in order to 1596 * call bmap_write(). 1597 */ 1598 if (dolock && rwtype == RW_READER) { 1599 rwtype = RW_WRITER; 1600 1601 if (!rw_tryupgrade(&ip->i_contents)) { 1602 1603 rw_exit(&ip->i_contents); 1604 1605 goto retrylock; 1606 } 1607 } 1608 1609 /* 1610 * May be allocating disk blocks for holes here as 1611 * a result of mmap faults. write(2) does the bmap_write 1612 * in rdip/wrip, not here. We are not dealing with frags 1613 * in this case. 1614 */ 1615 offset = uoff; 1616 while ((offset < uoff + len) && 1617 (offset < ip->i_size)) { 1618 /* 1619 * the variable "bnp" is to simplify the expression for 1620 * the compiler; * just passing in &bn to bmap_write 1621 * causes a compiler "loop" 1622 */ 1623 1624 blk_size = udf_vfsp->udf_lbsize; 1625 if ((offset + blk_size) > ip->i_size) { 1626 count = ip->i_size - offset; 1627 } else { 1628 count = blk_size; 1629 } 1630 error = ud_bmap_write(ip, offset, count, 0, cr); 1631 if (error) { 1632 goto update_inode; 1633 } 1634 offset += count; /* XXX - make this contig */ 1635 } 1636 } 1637 1638 /* 1639 * Can be a reader from now on. 1640 */ 1641 #ifdef __lock_lint 1642 if (rwtype == RW_WRITER) { 1643 rw_downgrade(&ip->i_contents); 1644 } 1645 #else 1646 if (dolock && rwtype == RW_WRITER) { 1647 rw_downgrade(&ip->i_contents); 1648 } 1649 #endif 1650 1651 /* 1652 * We remove PROT_WRITE in cases when the file has UDF holes 1653 * because we don't want to call bmap_read() to check each 1654 * page if it is backed with a disk block. 1655 */ 1656 if (protp && has_holes && rw != S_WRITE && rw != S_CREATE) { 1657 *protp &= ~PROT_WRITE; 1658 } 1659 1660 error = 0; 1661 1662 /* 1663 * The loop looks up pages in the range <off, off + len). 1664 * For each page, we first check if we should initiate an asynchronous 1665 * read ahead before we call page_lookup (we may sleep in page_lookup 1666 * for a previously initiated disk read). 1667 */ 1668 eoff = (uoff + len); 1669 for (pgoff = uoff, pgaddr = addr, pl = plarr; 1670 pgoff < eoff; /* empty */) { 1671 page_t *pp; 1672 u_offset_t nextrio; 1673 se_t se; 1674 1675 se = ((rw == S_CREATE) ? SE_EXCL : SE_SHARED); 1676 1677 /* 1678 * Handle async getpage (faultahead) 1679 */ 1680 if (plarr == NULL) { 1681 ip->i_nextrio = pgoff; 1682 ud_getpage_ra(vp, pgoff, seg, pgaddr); 1683 pgoff += pgsize; 1684 pgaddr += pgsize; 1685 continue; 1686 } 1687 1688 /* 1689 * Check if we should initiate read ahead of next cluster. 1690 * We call page_exists only when we need to confirm that 1691 * we have the current page before we initiate the read ahead. 1692 */ 1693 nextrio = ip->i_nextrio; 1694 if (seqmode && 1695 pgoff + RD_CLUSTSZ(ip) >= nextrio && pgoff <= nextrio && 1696 nextrio < ip->i_size && page_exists(vp, pgoff)) 1697 ud_getpage_ra(vp, pgoff, seg, pgaddr); 1698 1699 if ((pp = page_lookup(vp, pgoff, se)) != NULL) { 1700 1701 /* 1702 * We found the page in the page cache. 1703 */ 1704 *pl++ = pp; 1705 pgoff += pgsize; 1706 pgaddr += pgsize; 1707 len -= pgsize; 1708 plsz -= pgsize; 1709 } else { 1710 1711 /* 1712 * We have to create the page, or read it from disk. 1713 */ 1714 if (error = ud_getpage_miss(vp, pgoff, len, 1715 seg, pgaddr, pl, plsz, rw, seqmode)) { 1716 goto error_out; 1717 } 1718 1719 while (*pl != NULL) { 1720 pl++; 1721 pgoff += pgsize; 1722 pgaddr += pgsize; 1723 len -= pgsize; 1724 plsz -= pgsize; 1725 } 1726 } 1727 } 1728 1729 /* 1730 * Return pages up to plsz if they are in the page cache. 1731 * We cannot return pages if there is a chance that they are 1732 * backed with a UDF hole and rw is S_WRITE or S_CREATE. 1733 */ 1734 if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) { 1735 1736 ASSERT((protp == NULL) || 1737 !(has_holes && (*protp & PROT_WRITE))); 1738 1739 eoff = pgoff + plsz; 1740 while (pgoff < eoff) { 1741 page_t *pp; 1742 1743 if ((pp = page_lookup_nowait(vp, pgoff, 1744 SE_SHARED)) == NULL) 1745 break; 1746 1747 *pl++ = pp; 1748 pgoff += pgsize; 1749 plsz -= pgsize; 1750 } 1751 } 1752 1753 if (plarr) 1754 *pl = NULL; /* Terminate page list */ 1755 ip->i_nextr = pgoff; 1756 1757 error_out: 1758 if (error && plarr) { 1759 /* 1760 * Release any pages we have locked. 1761 */ 1762 while (pl > &plarr[0]) 1763 page_unlock(*--pl); 1764 1765 plarr[0] = NULL; 1766 } 1767 1768 update_inode: 1769 #ifdef __lock_lint 1770 rw_exit(&ip->i_contents); 1771 #else 1772 if (dolock) { 1773 rw_exit(&ip->i_contents); 1774 } 1775 #endif 1776 1777 /* 1778 * If the inode is not already marked for IACC (in rwip() for read) 1779 * and the inode is not marked for no access time update (in rwip() 1780 * for write) then update the inode access time and mod time now. 1781 */ 1782 mutex_enter(&ip->i_tlock); 1783 if ((ip->i_flag & (IACC | INOACC)) == 0) { 1784 if ((rw != S_OTHER) && (ip->i_type != VDIR)) { 1785 ip->i_flag |= IACC; 1786 } 1787 if (rw == S_WRITE) { 1788 ip->i_flag |= IUPD; 1789 } 1790 ITIMES_NOLOCK(ip); 1791 } 1792 mutex_exit(&ip->i_tlock); 1793 1794 return (error); 1795 } 1796 1797 int32_t ud_delay = 1; 1798 1799 /* ARGSUSED */ 1800 static int32_t 1801 udf_putpage(struct vnode *vp, offset_t off, 1802 size_t len, int32_t flags, struct cred *cr) 1803 { 1804 struct ud_inode *ip; 1805 int32_t error = 0; 1806 1807 ud_printf("udf_putpage\n"); 1808 1809 ip = VTOI(vp); 1810 #ifdef __lock_lint 1811 rw_enter(&ip->i_contents, RW_WRITER); 1812 #endif 1813 1814 if (vp->v_count == 0) { 1815 cmn_err(CE_WARN, "ud_putpage : bad v_count"); 1816 error = EINVAL; 1817 goto out; 1818 } 1819 1820 if (vp->v_flag & VNOMAP) { 1821 error = ENOSYS; 1822 goto out; 1823 } 1824 1825 if (flags & B_ASYNC) { 1826 if (ud_delay && len && 1827 (flags & ~(B_ASYNC|B_DONTNEED|B_FREE)) == 0) { 1828 mutex_enter(&ip->i_tlock); 1829 1830 /* 1831 * If nobody stalled, start a new cluster. 1832 */ 1833 if (ip->i_delaylen == 0) { 1834 ip->i_delayoff = off; 1835 ip->i_delaylen = len; 1836 mutex_exit(&ip->i_tlock); 1837 goto out; 1838 } 1839 1840 /* 1841 * If we have a full cluster or they are not contig, 1842 * then push last cluster and start over. 1843 */ 1844 if (ip->i_delaylen >= WR_CLUSTSZ(ip) || 1845 ip->i_delayoff + ip->i_delaylen != off) { 1846 u_offset_t doff; 1847 size_t dlen; 1848 1849 doff = ip->i_delayoff; 1850 dlen = ip->i_delaylen; 1851 ip->i_delayoff = off; 1852 ip->i_delaylen = len; 1853 mutex_exit(&ip->i_tlock); 1854 error = ud_putpages(vp, doff, dlen, flags, cr); 1855 /* LMXXX - flags are new val, not old */ 1856 goto out; 1857 } 1858 1859 /* 1860 * There is something there, it's not full, and 1861 * it is contig. 1862 */ 1863 ip->i_delaylen += len; 1864 mutex_exit(&ip->i_tlock); 1865 goto out; 1866 } 1867 1868 /* 1869 * Must have weird flags or we are not clustering. 1870 */ 1871 } 1872 1873 error = ud_putpages(vp, off, len, flags, cr); 1874 1875 out: 1876 #ifdef __lock_lint 1877 rw_exit(&ip->i_contents); 1878 #endif 1879 return (error); 1880 } 1881 1882 static int32_t 1883 udf_map(struct vnode *vp, offset_t off, 1884 struct as *as, caddr_t *addrp, size_t len, 1885 uint8_t prot, uint8_t maxprot, uint32_t flags, 1886 struct cred *cr) 1887 { 1888 struct segvn_crargs vn_a; 1889 int32_t error = 0; 1890 1891 ud_printf("udf_map\n"); 1892 1893 if (vp->v_flag & VNOMAP) { 1894 error = ENOSYS; 1895 goto end; 1896 } 1897 1898 if ((off < (offset_t)0) || 1899 ((off + len) < (offset_t)0)) { 1900 error = EINVAL; 1901 goto end; 1902 } 1903 1904 if (vp->v_type != VREG) { 1905 error = ENODEV; 1906 goto end; 1907 } 1908 1909 /* 1910 * If file is being locked, disallow mapping. 1911 */ 1912 if (vn_has_mandatory_locks(vp, VTOI(vp)->i_char)) { 1913 error = EAGAIN; 1914 goto end; 1915 } 1916 1917 as_rangelock(as); 1918 if ((flags & MAP_FIXED) == 0) { 1919 map_addr(addrp, len, off, 1, flags); 1920 if (*addrp == NULL) { 1921 as_rangeunlock(as); 1922 error = ENOMEM; 1923 goto end; 1924 } 1925 } else { 1926 /* 1927 * User specified address - blow away any previous mappings 1928 */ 1929 (void) as_unmap(as, *addrp, len); 1930 } 1931 1932 vn_a.vp = vp; 1933 vn_a.offset = off; 1934 vn_a.type = flags & MAP_TYPE; 1935 vn_a.prot = prot; 1936 vn_a.maxprot = maxprot; 1937 vn_a.cred = cr; 1938 vn_a.amp = NULL; 1939 vn_a.flags = flags & ~MAP_TYPE; 1940 vn_a.szc = 0; 1941 vn_a.lgrp_mem_policy_flags = 0; 1942 1943 error = as_map(as, *addrp, len, segvn_create, (caddr_t)&vn_a); 1944 as_rangeunlock(as); 1945 1946 end: 1947 return (error); 1948 } 1949 1950 /* ARGSUSED */ 1951 static int32_t 1952 udf_addmap(struct vnode *vp, offset_t off, 1953 struct as *as, caddr_t addr, size_t len, 1954 uint8_t prot, uint8_t maxprot, uint32_t flags, 1955 struct cred *cr) 1956 { 1957 struct ud_inode *ip = VTOI(vp); 1958 1959 ud_printf("udf_addmap\n"); 1960 1961 if (vp->v_flag & VNOMAP) { 1962 return (ENOSYS); 1963 } 1964 1965 mutex_enter(&ip->i_tlock); 1966 ip->i_mapcnt += btopr(len); 1967 mutex_exit(&ip->i_tlock); 1968 1969 return (0); 1970 } 1971 1972 /* ARGSUSED */ 1973 static int32_t 1974 udf_delmap(struct vnode *vp, offset_t off, 1975 struct as *as, caddr_t addr, size_t len, 1976 uint32_t prot, uint32_t maxprot, uint32_t flags, 1977 struct cred *cr) 1978 { 1979 struct ud_inode *ip = VTOI(vp); 1980 1981 ud_printf("udf_delmap\n"); 1982 1983 if (vp->v_flag & VNOMAP) { 1984 return (ENOSYS); 1985 } 1986 1987 mutex_enter(&ip->i_tlock); 1988 ip->i_mapcnt -= btopr(len); /* Count released mappings */ 1989 ASSERT(ip->i_mapcnt >= 0); 1990 mutex_exit(&ip->i_tlock); 1991 1992 return (0); 1993 } 1994 1995 static int32_t 1996 udf_l_pathconf(struct vnode *vp, int32_t cmd, 1997 ulong_t *valp, struct cred *cr) 1998 { 1999 int32_t error = 0; 2000 2001 ud_printf("udf_l_pathconf\n"); 2002 2003 if (cmd == _PC_FILESIZEBITS) { 2004 /* 2005 * udf supports 64 bits as file size 2006 * but there are several other restrictions 2007 * it only supports 32-bit block numbers and 2008 * daddr32_t is only and int32_t so taking these 2009 * into account we can stay just as where ufs is 2010 */ 2011 *valp = 41; 2012 } else { 2013 error = fs_pathconf(vp, cmd, valp, cr); 2014 } 2015 2016 return (error); 2017 } 2018 2019 uint32_t ud_pageio_reads = 0, ud_pageio_writes = 0; 2020 #ifndef __lint 2021 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_reads)) 2022 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_writes)) 2023 #endif 2024 /* 2025 * Assumption is that there will not be a pageio request 2026 * to a enbedded file 2027 */ 2028 /* ARGSUSED */ 2029 static int32_t 2030 udf_pageio(struct vnode *vp, struct page *pp, 2031 u_offset_t io_off, size_t io_len, 2032 int32_t flags, struct cred *cr) 2033 { 2034 daddr_t bn; 2035 struct buf *bp; 2036 struct ud_inode *ip = VTOI(vp); 2037 int32_t dolock, error = 0, contig, multi_io; 2038 size_t done_len = 0, cur_len = 0; 2039 page_t *npp = NULL, *opp = NULL, *cpp = pp; 2040 2041 if (pp == NULL) { 2042 return (EINVAL); 2043 } 2044 2045 dolock = (rw_owner(&ip->i_contents) != curthread); 2046 2047 /* 2048 * We need a better check. Ideally, we would use another 2049 * vnodeops so that hlocked and forcibly unmounted file 2050 * systems would return EIO where appropriate and w/o the 2051 * need for these checks. 2052 */ 2053 if (ip->i_udf == NULL) { 2054 return (EIO); 2055 } 2056 2057 #ifdef __lock_lint 2058 rw_enter(&ip->i_contents, RW_READER); 2059 #else 2060 if (dolock) { 2061 rw_enter(&ip->i_contents, RW_READER); 2062 } 2063 #endif 2064 2065 /* 2066 * Break the io request into chunks, one for each contiguous 2067 * stretch of disk blocks in the target file. 2068 */ 2069 while (done_len < io_len) { 2070 ASSERT(cpp); 2071 bp = NULL; 2072 contig = 0; 2073 if (error = ud_bmap_read(ip, (u_offset_t)(io_off + done_len), 2074 &bn, &contig)) { 2075 break; 2076 } 2077 2078 if (bn == UDF_HOLE) { /* No holey swapfiles */ 2079 cmn_err(CE_WARN, "SWAP file has HOLES"); 2080 error = EINVAL; 2081 break; 2082 } 2083 2084 cur_len = MIN(io_len - done_len, contig); 2085 2086 /* 2087 * Check if more than one I/O is 2088 * required to complete the given 2089 * I/O operation 2090 */ 2091 if (ip->i_udf->udf_lbsize < PAGESIZE) { 2092 if (cur_len >= PAGESIZE) { 2093 multi_io = 0; 2094 cur_len &= PAGEMASK; 2095 } else { 2096 multi_io = 1; 2097 cur_len = MIN(io_len - done_len, PAGESIZE); 2098 } 2099 } 2100 page_list_break(&cpp, &npp, btop(cur_len)); 2101 2102 bp = pageio_setup(cpp, cur_len, ip->i_devvp, flags); 2103 ASSERT(bp != NULL); 2104 2105 bp->b_edev = ip->i_dev; 2106 bp->b_dev = cmpdev(ip->i_dev); 2107 bp->b_blkno = bn; 2108 bp->b_un.b_addr = (caddr_t)0; 2109 bp->b_file = vp; 2110 bp->b_offset = (offset_t)(io_off + done_len); 2111 2112 /* 2113 * ub.ub_pageios.value.ul++; 2114 */ 2115 if (multi_io == 0) { 2116 (void) bdev_strategy(bp); 2117 } else { 2118 error = ud_multi_strat(ip, cpp, bp, 2119 (u_offset_t)(io_off + done_len)); 2120 if (error != 0) { 2121 pageio_done(bp); 2122 break; 2123 } 2124 } 2125 if (flags & B_READ) { 2126 ud_pageio_reads++; 2127 } else { 2128 ud_pageio_writes++; 2129 } 2130 2131 /* 2132 * If the request is not B_ASYNC, wait for i/o to complete 2133 * and re-assemble the page list to return to the caller. 2134 * If it is B_ASYNC we leave the page list in pieces and 2135 * cleanup() will dispose of them. 2136 */ 2137 if ((flags & B_ASYNC) == 0) { 2138 error = biowait(bp); 2139 pageio_done(bp); 2140 if (error) { 2141 break; 2142 } 2143 page_list_concat(&opp, &cpp); 2144 } 2145 cpp = npp; 2146 npp = NULL; 2147 done_len += cur_len; 2148 } 2149 2150 ASSERT(error || (cpp == NULL && npp == NULL && done_len == io_len)); 2151 if (error) { 2152 if (flags & B_ASYNC) { 2153 /* Cleanup unprocessed parts of list */ 2154 page_list_concat(&cpp, &npp); 2155 if (flags & B_READ) { 2156 pvn_read_done(cpp, B_ERROR); 2157 } else { 2158 pvn_write_done(cpp, B_ERROR); 2159 } 2160 } else { 2161 /* Re-assemble list and let caller clean up */ 2162 page_list_concat(&opp, &cpp); 2163 page_list_concat(&opp, &npp); 2164 } 2165 } 2166 2167 #ifdef __lock_lint 2168 rw_exit(&ip->i_contents); 2169 #else 2170 if (dolock) { 2171 rw_exit(&ip->i_contents); 2172 } 2173 #endif 2174 return (error); 2175 } 2176 2177 2178 2179 2180 /* -------------------- local functions --------------------------- */ 2181 2182 2183 2184 int32_t 2185 ud_rdwri(enum uio_rw rw, int32_t ioflag, 2186 struct ud_inode *ip, caddr_t base, int32_t len, 2187 offset_t offset, enum uio_seg seg, int32_t *aresid, struct cred *cr) 2188 { 2189 int32_t error; 2190 struct uio auio; 2191 struct iovec aiov; 2192 2193 ud_printf("ud_rdwri\n"); 2194 2195 bzero((caddr_t)&auio, sizeof (uio_t)); 2196 bzero((caddr_t)&aiov, sizeof (iovec_t)); 2197 2198 aiov.iov_base = base; 2199 aiov.iov_len = len; 2200 auio.uio_iov = &aiov; 2201 auio.uio_iovcnt = 1; 2202 auio.uio_loffset = offset; 2203 auio.uio_segflg = (int16_t)seg; 2204 auio.uio_resid = len; 2205 2206 if (rw == UIO_WRITE) { 2207 auio.uio_fmode = FWRITE; 2208 auio.uio_extflg = UIO_COPY_DEFAULT; 2209 auio.uio_llimit = curproc->p_fsz_ctl; 2210 error = ud_wrip(ip, &auio, ioflag, cr); 2211 } else { 2212 auio.uio_fmode = FREAD; 2213 auio.uio_extflg = UIO_COPY_CACHED; 2214 auio.uio_llimit = MAXOFFSET_T; 2215 error = ud_rdip(ip, &auio, ioflag, cr); 2216 } 2217 2218 if (aresid) { 2219 *aresid = auio.uio_resid; 2220 } else if (auio.uio_resid) { 2221 error = EIO; 2222 } 2223 return (error); 2224 } 2225 2226 /* 2227 * Free behind hacks. The pager is busted. 2228 * XXX - need to pass the information down to writedone() in a flag like B_SEQ 2229 * or B_FREE_IF_TIGHT_ON_MEMORY. 2230 */ 2231 int32_t ud_freebehind = 1; 2232 int32_t ud_smallfile = 32 * 1024; 2233 2234 /* ARGSUSED */ 2235 int32_t 2236 ud_getpage_miss(struct vnode *vp, u_offset_t off, 2237 size_t len, struct seg *seg, caddr_t addr, page_t *pl[], 2238 size_t plsz, enum seg_rw rw, int32_t seq) 2239 { 2240 struct ud_inode *ip = VTOI(vp); 2241 int32_t err = 0; 2242 size_t io_len; 2243 u_offset_t io_off; 2244 u_offset_t pgoff; 2245 page_t *pp; 2246 2247 pl[0] = NULL; 2248 2249 /* 2250 * Figure out whether the page can be created, or must be 2251 * read from the disk 2252 */ 2253 if (rw == S_CREATE) { 2254 if ((pp = page_create_va(vp, off, 2255 PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 2256 cmn_err(CE_WARN, "ud_getpage_miss: page_create"); 2257 return (EINVAL); 2258 } 2259 io_len = PAGESIZE; 2260 } else { 2261 pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 2262 &io_len, off, PAGESIZE, 0); 2263 2264 /* 2265 * Some other thread has entered the page. 2266 * ud_getpage will retry page_lookup. 2267 */ 2268 if (pp == NULL) { 2269 return (0); 2270 } 2271 2272 /* 2273 * Fill the page with as much data as we can from the file. 2274 */ 2275 err = ud_page_fill(ip, pp, off, B_READ, &pgoff); 2276 if (err) { 2277 pvn_read_done(pp, B_ERROR); 2278 return (err); 2279 } 2280 2281 /* 2282 * XXX ??? ufs has io_len instead of pgoff below 2283 */ 2284 ip->i_nextrio = off + ((pgoff + PAGESIZE - 1) & PAGEMASK); 2285 2286 /* 2287 * If the file access is sequential, initiate read ahead 2288 * of the next cluster. 2289 */ 2290 if (seq && ip->i_nextrio < ip->i_size) { 2291 ud_getpage_ra(vp, off, seg, addr); 2292 } 2293 } 2294 2295 outmiss: 2296 pvn_plist_init(pp, pl, plsz, (offset_t)off, io_len, rw); 2297 return (err); 2298 } 2299 2300 /* ARGSUSED */ 2301 void 2302 ud_getpage_ra(struct vnode *vp, 2303 u_offset_t off, struct seg *seg, caddr_t addr) 2304 { 2305 page_t *pp; 2306 size_t io_len; 2307 struct ud_inode *ip = VTOI(vp); 2308 u_offset_t io_off = ip->i_nextrio, pgoff; 2309 caddr_t addr2 = addr + (io_off - off); 2310 daddr_t bn; 2311 int32_t contig = 0; 2312 2313 /* 2314 * Is this test needed? 2315 */ 2316 2317 if (addr2 >= seg->s_base + seg->s_size) { 2318 return; 2319 } 2320 2321 contig = 0; 2322 if (ud_bmap_read(ip, io_off, &bn, &contig) != 0 || bn == UDF_HOLE) { 2323 return; 2324 } 2325 2326 pp = pvn_read_kluster(vp, io_off, seg, addr2, 2327 &io_off, &io_len, io_off, PAGESIZE, 1); 2328 2329 /* 2330 * Some other thread has entered the page. 2331 * So no read head done here (ie we will have to and wait 2332 * for the read when needed). 2333 */ 2334 2335 if (pp == NULL) { 2336 return; 2337 } 2338 2339 (void) ud_page_fill(ip, pp, io_off, (B_READ|B_ASYNC), &pgoff); 2340 ip->i_nextrio = io_off + ((pgoff + PAGESIZE - 1) & PAGEMASK); 2341 } 2342 2343 int 2344 ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off, 2345 uint32_t bflgs, u_offset_t *pg_off) 2346 { 2347 daddr_t bn; 2348 struct buf *bp; 2349 caddr_t kaddr, caddr; 2350 int32_t error = 0, contig = 0, multi_io = 0; 2351 int32_t lbsize = ip->i_udf->udf_lbsize; 2352 int32_t lbmask = ip->i_udf->udf_lbmask; 2353 uint64_t isize; 2354 2355 isize = (ip->i_size + lbmask) & (~lbmask); 2356 if (ip->i_desc_type == ICB_FLAG_ONE_AD) { 2357 2358 /* 2359 * Embedded file read file_entry 2360 * from buffer cache and copy the required 2361 * portions 2362 */ 2363 bp = ud_bread(ip->i_dev, 2364 ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize); 2365 if ((bp->b_error == 0) && 2366 (bp->b_resid == 0)) { 2367 2368 caddr = bp->b_un.b_addr + ip->i_data_off; 2369 2370 /* 2371 * mapin to kvm 2372 */ 2373 kaddr = (caddr_t)ppmapin(pp, 2374 PROT_READ | PROT_WRITE, (caddr_t)-1); 2375 (void) kcopy(caddr, kaddr, ip->i_size); 2376 2377 /* 2378 * mapout of kvm 2379 */ 2380 ppmapout(kaddr); 2381 } 2382 brelse(bp); 2383 contig = ip->i_size; 2384 } else { 2385 2386 /* 2387 * Get the continuous size and block number 2388 * at offset "off" 2389 */ 2390 if (error = ud_bmap_read(ip, off, &bn, &contig)) 2391 goto out; 2392 contig = MIN(contig, PAGESIZE); 2393 contig = (contig + lbmask) & (~lbmask); 2394 2395 /* 2396 * Zero part of the page which we are not 2397 * going to read from the disk. 2398 */ 2399 2400 if (bn == UDF_HOLE) { 2401 2402 /* 2403 * This is a HOLE. Just zero out 2404 * the page 2405 */ 2406 if (((off + contig) == isize) || 2407 (contig == PAGESIZE)) { 2408 pagezero(pp->p_prev, 0, PAGESIZE); 2409 goto out; 2410 } 2411 } 2412 2413 if (contig < PAGESIZE) { 2414 uint64_t count; 2415 2416 count = isize - off; 2417 if (contig != count) { 2418 multi_io = 1; 2419 contig = (int32_t)(MIN(count, PAGESIZE)); 2420 } else { 2421 pagezero(pp->p_prev, contig, PAGESIZE - contig); 2422 } 2423 } 2424 2425 /* 2426 * Get a bp and initialize it 2427 */ 2428 bp = pageio_setup(pp, contig, ip->i_devvp, bflgs); 2429 ASSERT(bp != NULL); 2430 2431 bp->b_edev = ip->i_dev; 2432 bp->b_dev = cmpdev(ip->i_dev); 2433 bp->b_blkno = bn; 2434 bp->b_un.b_addr = 0; 2435 bp->b_file = ip->i_vnode; 2436 2437 /* 2438 * Start I/O 2439 */ 2440 if (multi_io == 0) { 2441 2442 /* 2443 * Single I/O is sufficient for this page 2444 */ 2445 (void) bdev_strategy(bp); 2446 } else { 2447 2448 /* 2449 * We need to do the I/O in 2450 * piece's 2451 */ 2452 error = ud_multi_strat(ip, pp, bp, off); 2453 if (error != 0) { 2454 goto out; 2455 } 2456 } 2457 if ((bflgs & B_ASYNC) == 0) { 2458 2459 /* 2460 * Wait for i/o to complete. 2461 */ 2462 2463 error = biowait(bp); 2464 pageio_done(bp); 2465 if (error) { 2466 goto out; 2467 } 2468 } 2469 } 2470 if ((off + contig) >= ip->i_size) { 2471 contig = ip->i_size - off; 2472 } 2473 2474 out: 2475 *pg_off = contig; 2476 return (error); 2477 } 2478 2479 int32_t 2480 ud_putpages(struct vnode *vp, offset_t off, 2481 size_t len, int32_t flags, struct cred *cr) 2482 { 2483 struct ud_inode *ip; 2484 page_t *pp; 2485 u_offset_t io_off; 2486 size_t io_len; 2487 u_offset_t eoff; 2488 int32_t err = 0; 2489 int32_t dolock; 2490 2491 ud_printf("ud_putpages\n"); 2492 2493 if (vp->v_count == 0) { 2494 cmn_err(CE_WARN, "ud_putpages: bad v_count"); 2495 return (EINVAL); 2496 } 2497 2498 ip = VTOI(vp); 2499 2500 /* 2501 * Acquire the readers/write inode lock before locking 2502 * any pages in this inode. 2503 * The inode lock is held during i/o. 2504 */ 2505 if (len == 0) { 2506 mutex_enter(&ip->i_tlock); 2507 ip->i_delayoff = ip->i_delaylen = 0; 2508 mutex_exit(&ip->i_tlock); 2509 } 2510 #ifdef __lock_lint 2511 rw_enter(&ip->i_contents, RW_READER); 2512 #else 2513 dolock = (rw_owner(&ip->i_contents) != curthread); 2514 if (dolock) { 2515 rw_enter(&ip->i_contents, RW_READER); 2516 } 2517 #endif 2518 2519 if (!vn_has_cached_data(vp)) { 2520 #ifdef __lock_lint 2521 rw_exit(&ip->i_contents); 2522 #else 2523 if (dolock) { 2524 rw_exit(&ip->i_contents); 2525 } 2526 #endif 2527 return (0); 2528 } 2529 2530 if (len == 0) { 2531 /* 2532 * Search the entire vp list for pages >= off. 2533 */ 2534 err = pvn_vplist_dirty(vp, (u_offset_t)off, ud_putapage, 2535 flags, cr); 2536 } else { 2537 /* 2538 * Loop over all offsets in the range looking for 2539 * pages to deal with. 2540 */ 2541 if ((eoff = blkroundup(ip->i_udf, ip->i_size)) != 0) { 2542 eoff = MIN(off + len, eoff); 2543 } else { 2544 eoff = off + len; 2545 } 2546 2547 for (io_off = off; io_off < eoff; io_off += io_len) { 2548 /* 2549 * If we are not invalidating, synchronously 2550 * freeing or writing pages, use the routine 2551 * page_lookup_nowait() to prevent reclaiming 2552 * them from the free list. 2553 */ 2554 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 2555 pp = page_lookup(vp, io_off, 2556 (flags & (B_INVAL | B_FREE)) ? 2557 SE_EXCL : SE_SHARED); 2558 } else { 2559 pp = page_lookup_nowait(vp, io_off, 2560 (flags & B_FREE) ? SE_EXCL : SE_SHARED); 2561 } 2562 2563 if (pp == NULL || pvn_getdirty(pp, flags) == 0) { 2564 io_len = PAGESIZE; 2565 } else { 2566 2567 err = ud_putapage(vp, pp, 2568 &io_off, &io_len, flags, cr); 2569 if (err != 0) { 2570 break; 2571 } 2572 /* 2573 * "io_off" and "io_len" are returned as 2574 * the range of pages we actually wrote. 2575 * This allows us to skip ahead more quickly 2576 * since several pages may've been dealt 2577 * with by this iteration of the loop. 2578 */ 2579 } 2580 } 2581 } 2582 if (err == 0 && off == 0 && (len == 0 || len >= ip->i_size)) { 2583 /* 2584 * We have just sync'ed back all the pages on 2585 * the inode, turn off the IMODTIME flag. 2586 */ 2587 mutex_enter(&ip->i_tlock); 2588 ip->i_flag &= ~IMODTIME; 2589 mutex_exit(&ip->i_tlock); 2590 } 2591 #ifdef __lock_lint 2592 rw_exit(&ip->i_contents); 2593 #else 2594 if (dolock) { 2595 rw_exit(&ip->i_contents); 2596 } 2597 #endif 2598 return (err); 2599 } 2600 2601 /* ARGSUSED */ 2602 int32_t 2603 ud_putapage(struct vnode *vp, 2604 page_t *pp, u_offset_t *offp, 2605 size_t *lenp, int32_t flags, struct cred *cr) 2606 { 2607 daddr_t bn; 2608 size_t io_len; 2609 struct ud_inode *ip; 2610 int32_t error = 0, contig, multi_io = 0; 2611 struct udf_vfs *udf_vfsp; 2612 u_offset_t off, io_off; 2613 caddr_t kaddr, caddr; 2614 struct buf *bp = NULL; 2615 int32_t lbmask; 2616 uint64_t isize; 2617 int32_t crc_len; 2618 struct file_entry *fe; 2619 2620 ud_printf("ud_putapage\n"); 2621 2622 ip = VTOI(vp); 2623 ASSERT(ip); 2624 ASSERT(RW_LOCK_HELD(&ip->i_contents)); 2625 lbmask = ip->i_udf->udf_lbmask; 2626 isize = (ip->i_size + lbmask) & (~lbmask); 2627 2628 udf_vfsp = ip->i_udf; 2629 ASSERT(udf_vfsp->udf_flags & UDF_FL_RW); 2630 2631 /* 2632 * If the modified time on the inode has not already been 2633 * set elsewhere (e.g. for write/setattr) we set the time now. 2634 * This gives us approximate modified times for mmap'ed files 2635 * which are modified via stores in the user address space. 2636 */ 2637 if (((ip->i_flag & IMODTIME) == 0) || (flags & B_FORCE)) { 2638 mutex_enter(&ip->i_tlock); 2639 ip->i_flag |= IUPD; 2640 ITIMES_NOLOCK(ip); 2641 mutex_exit(&ip->i_tlock); 2642 } 2643 2644 2645 /* 2646 * Align the request to a block boundry (for old file systems), 2647 * and go ask bmap() how contiguous things are for this file. 2648 */ 2649 off = pp->p_offset & ~(offset_t)lbmask; 2650 /* block align it */ 2651 2652 2653 if (ip->i_desc_type == ICB_FLAG_ONE_AD) { 2654 ASSERT(ip->i_size <= ip->i_max_emb); 2655 2656 pp = pvn_write_kluster(vp, pp, &io_off, 2657 &io_len, off, PAGESIZE, flags); 2658 if (io_len == 0) { 2659 io_len = PAGESIZE; 2660 } 2661 2662 bp = ud_bread(ip->i_dev, 2663 ip->i_icb_lbano << udf_vfsp->udf_l2d_shift, 2664 udf_vfsp->udf_lbsize); 2665 fe = (struct file_entry *)bp->b_un.b_addr; 2666 if ((bp->b_flags & B_ERROR) || 2667 (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY, 2668 ip->i_icb_block, 2669 1, udf_vfsp->udf_lbsize) != 0)) { 2670 if (pp != NULL) 2671 pvn_write_done(pp, B_ERROR | B_WRITE | flags); 2672 if (bp->b_flags & B_ERROR) { 2673 error = EIO; 2674 } else { 2675 error = EINVAL; 2676 } 2677 brelse(bp); 2678 return (error); 2679 } 2680 if ((bp->b_error == 0) && 2681 (bp->b_resid == 0)) { 2682 2683 caddr = bp->b_un.b_addr + ip->i_data_off; 2684 kaddr = (caddr_t)ppmapin(pp, 2685 PROT_READ | PROT_WRITE, (caddr_t)-1); 2686 (void) kcopy(kaddr, caddr, ip->i_size); 2687 ppmapout(kaddr); 2688 } 2689 crc_len = ((uint32_t)&((struct file_entry *)0)->fe_spec) + 2690 SWAP_32(fe->fe_len_ear); 2691 crc_len += ip->i_size; 2692 ud_make_tag(ip->i_udf, &fe->fe_tag, 2693 UD_FILE_ENTRY, ip->i_icb_block, crc_len); 2694 2695 bwrite(bp); 2696 2697 if (flags & B_ASYNC) { 2698 pvn_write_done(pp, flags); 2699 } 2700 contig = ip->i_size; 2701 } else { 2702 2703 if (error = ud_bmap_read(ip, off, &bn, &contig)) { 2704 goto out; 2705 } 2706 contig = MIN(contig, PAGESIZE); 2707 contig = (contig + lbmask) & (~lbmask); 2708 2709 if (contig < PAGESIZE) { 2710 uint64_t count; 2711 2712 count = isize - off; 2713 if (contig != count) { 2714 multi_io = 1; 2715 contig = (int32_t)(MIN(count, PAGESIZE)); 2716 } 2717 } 2718 2719 if ((off + contig) > isize) { 2720 contig = isize - off; 2721 } 2722 2723 if (contig > PAGESIZE) { 2724 if (contig & PAGEOFFSET) { 2725 contig &= PAGEMASK; 2726 } 2727 } 2728 2729 pp = pvn_write_kluster(vp, pp, &io_off, 2730 &io_len, off, contig, flags); 2731 if (io_len == 0) { 2732 io_len = PAGESIZE; 2733 } 2734 2735 bp = pageio_setup(pp, contig, ip->i_devvp, B_WRITE | flags); 2736 ASSERT(bp != NULL); 2737 2738 bp->b_edev = ip->i_dev; 2739 bp->b_dev = cmpdev(ip->i_dev); 2740 bp->b_blkno = bn; 2741 bp->b_un.b_addr = 0; 2742 bp->b_file = vp; 2743 bp->b_offset = (offset_t)off; 2744 2745 2746 /* 2747 * write throttle 2748 */ 2749 ASSERT(bp->b_iodone == NULL); 2750 bp->b_iodone = ud_iodone; 2751 mutex_enter(&ip->i_tlock); 2752 ip->i_writes += bp->b_bcount; 2753 mutex_exit(&ip->i_tlock); 2754 2755 if (multi_io == 0) { 2756 2757 (void) bdev_strategy(bp); 2758 } else { 2759 error = ud_multi_strat(ip, pp, bp, off); 2760 if (error != 0) { 2761 goto out; 2762 } 2763 } 2764 2765 if ((flags & B_ASYNC) == 0) { 2766 /* 2767 * Wait for i/o to complete. 2768 */ 2769 error = biowait(bp); 2770 pageio_done(bp); 2771 } 2772 } 2773 2774 if ((flags & B_ASYNC) == 0) { 2775 pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags); 2776 } 2777 2778 pp = NULL; 2779 2780 out: 2781 if (error != 0 && pp != NULL) { 2782 pvn_write_done(pp, B_ERROR | B_WRITE | flags); 2783 } 2784 2785 if (offp) { 2786 *offp = io_off; 2787 } 2788 if (lenp) { 2789 *lenp = io_len; 2790 } 2791 2792 return (error); 2793 } 2794 2795 2796 int32_t 2797 ud_iodone(struct buf *bp) 2798 { 2799 struct ud_inode *ip; 2800 2801 ASSERT((bp->b_pages->p_vnode != NULL) && !(bp->b_flags & B_READ)); 2802 2803 bp->b_iodone = NULL; 2804 2805 ip = VTOI(bp->b_pages->p_vnode); 2806 2807 mutex_enter(&ip->i_tlock); 2808 if (ip->i_writes >= ud_LW) { 2809 if ((ip->i_writes -= bp->b_bcount) <= ud_LW) { 2810 if (ud_WRITES) { 2811 cv_broadcast(&ip->i_wrcv); /* wake all up */ 2812 } 2813 } 2814 } else { 2815 ip->i_writes -= bp->b_bcount; 2816 } 2817 mutex_exit(&ip->i_tlock); 2818 iodone(bp); 2819 return (0); 2820 } 2821 2822 /* ARGSUSED3 */ 2823 int32_t 2824 ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr) 2825 { 2826 struct vnode *vp; 2827 struct udf_vfs *udf_vfsp; 2828 krw_t rwtype; 2829 caddr_t base; 2830 uint32_t flags; 2831 int32_t error, n, on, mapon, dofree; 2832 u_offset_t off; 2833 long oresid = uio->uio_resid; 2834 2835 ASSERT(RW_LOCK_HELD(&ip->i_contents)); 2836 if ((ip->i_type != VREG) && 2837 (ip->i_type != VDIR) && 2838 (ip->i_type != VLNK)) { 2839 return (EIO); 2840 } 2841 2842 if (uio->uio_loffset > MAXOFFSET_T) { 2843 return (0); 2844 } 2845 2846 if ((uio->uio_loffset < (offset_t)0) || 2847 ((uio->uio_loffset + uio->uio_resid) < 0)) { 2848 return (EINVAL); 2849 } 2850 if (uio->uio_resid == 0) { 2851 return (0); 2852 } 2853 2854 vp = ITOV(ip); 2855 udf_vfsp = ip->i_udf; 2856 mutex_enter(&ip->i_tlock); 2857 ip->i_flag |= IACC; 2858 mutex_exit(&ip->i_tlock); 2859 2860 rwtype = (rw_write_held(&ip->i_contents)?RW_WRITER:RW_READER); 2861 2862 do { 2863 offset_t diff; 2864 u_offset_t uoff = uio->uio_loffset; 2865 off = uoff & (offset_t)MAXBMASK; 2866 mapon = (int)(uoff & (offset_t)MAXBOFFSET); 2867 on = (int)blkoff(udf_vfsp, uoff); 2868 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid); 2869 2870 diff = ip->i_size - uoff; 2871 2872 if (diff <= (offset_t)0) { 2873 error = 0; 2874 goto out; 2875 } 2876 if (diff < (offset_t)n) { 2877 n = (int)diff; 2878 } 2879 dofree = ud_freebehind && 2880 ip->i_nextr == (off & PAGEMASK) && 2881 off > ud_smallfile; 2882 2883 #ifndef __lock_lint 2884 if (rwtype == RW_READER) { 2885 rw_exit(&ip->i_contents); 2886 } 2887 #endif 2888 2889 base = segmap_getmapflt(segkmap, vp, (off + mapon), 2890 (uint32_t)n, 1, S_READ); 2891 error = uiomove(base + mapon, (long)n, UIO_READ, uio); 2892 2893 flags = 0; 2894 if (!error) { 2895 /* 2896 * If read a whole block, or read to eof, 2897 * won't need this buffer again soon. 2898 */ 2899 if (n + on == MAXBSIZE && ud_freebehind && dofree && 2900 freemem < lotsfree + pages_before_pager) { 2901 flags = SM_FREE | SM_DONTNEED |SM_ASYNC; 2902 } 2903 /* 2904 * In POSIX SYNC (FSYNC and FDSYNC) read mode, 2905 * we want to make sure that the page which has 2906 * been read, is written on disk if it is dirty. 2907 * And corresponding indirect blocks should also 2908 * be flushed out. 2909 */ 2910 if ((ioflag & FRSYNC) && (ioflag & (FSYNC|FDSYNC))) { 2911 flags &= ~SM_ASYNC; 2912 flags |= SM_WRITE; 2913 } 2914 error = segmap_release(segkmap, base, flags); 2915 } else { 2916 (void) segmap_release(segkmap, base, flags); 2917 } 2918 2919 #ifndef __lock_lint 2920 if (rwtype == RW_READER) { 2921 rw_enter(&ip->i_contents, rwtype); 2922 } 2923 #endif 2924 } while (error == 0 && uio->uio_resid > 0 && n != 0); 2925 out: 2926 /* 2927 * Inode is updated according to this table if FRSYNC is set. 2928 * 2929 * FSYNC FDSYNC(posix.4) 2930 * -------------------------- 2931 * always IATTCHG|IBDWRITE 2932 */ 2933 if (ioflag & FRSYNC) { 2934 if ((ioflag & FSYNC) || 2935 ((ioflag & FDSYNC) && (ip->i_flag & (IATTCHG|IBDWRITE)))) { 2936 rw_exit(&ip->i_contents); 2937 rw_enter(&ip->i_contents, RW_WRITER); 2938 ud_iupdat(ip, 1); 2939 } 2940 } 2941 /* 2942 * If we've already done a partial read, terminate 2943 * the read but return no error. 2944 */ 2945 if (oresid != uio->uio_resid) { 2946 error = 0; 2947 } 2948 ITIMES(ip); 2949 2950 return (error); 2951 } 2952 2953 int32_t 2954 ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr) 2955 { 2956 caddr_t base; 2957 struct vnode *vp; 2958 struct udf_vfs *udf_vfsp; 2959 uint32_t flags; 2960 int32_t error = 0, iupdat_flag, n, on, mapon, i_size_changed = 0; 2961 int32_t pagecreate, newpage; 2962 uint64_t old_i_size; 2963 u_offset_t off; 2964 long start_resid = uio->uio_resid, premove_resid; 2965 rlim64_t limit = uio->uio_limit; 2966 2967 2968 ASSERT(RW_WRITE_HELD(&ip->i_contents)); 2969 if ((ip->i_type != VREG) && 2970 (ip->i_type != VDIR) && 2971 (ip->i_type != VLNK)) { 2972 return (EIO); 2973 } 2974 2975 if (uio->uio_loffset >= MAXOFFSET_T) { 2976 return (EFBIG); 2977 } 2978 /* 2979 * see udf_l_pathconf 2980 */ 2981 if (limit > (((uint64_t)1 << 40) - 1)) { 2982 limit = ((uint64_t)1 << 40) - 1; 2983 } 2984 if (uio->uio_loffset >= limit) { 2985 proc_t *p = ttoproc(curthread); 2986 2987 mutex_enter(&p->p_lock); 2988 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, 2989 p, RCA_UNSAFE_SIGINFO); 2990 mutex_exit(&p->p_lock); 2991 return (EFBIG); 2992 } 2993 if ((uio->uio_loffset < (offset_t)0) || 2994 ((uio->uio_loffset + uio->uio_resid) < 0)) { 2995 return (EINVAL); 2996 } 2997 if (uio->uio_resid == 0) { 2998 return (0); 2999 } 3000 3001 mutex_enter(&ip->i_tlock); 3002 ip->i_flag |= INOACC; 3003 3004 if (ioflag & (FSYNC | FDSYNC)) { 3005 ip->i_flag |= ISYNC; 3006 iupdat_flag = 1; 3007 } 3008 mutex_exit(&ip->i_tlock); 3009 3010 udf_vfsp = ip->i_udf; 3011 vp = ITOV(ip); 3012 3013 do { 3014 u_offset_t uoff = uio->uio_loffset; 3015 off = uoff & (offset_t)MAXBMASK; 3016 mapon = (int)(uoff & (offset_t)MAXBOFFSET); 3017 on = (int)blkoff(udf_vfsp, uoff); 3018 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid); 3019 3020 if (ip->i_type == VREG && uoff + n >= limit) { 3021 if (uoff >= limit) { 3022 error = EFBIG; 3023 goto out; 3024 } 3025 n = (int)(limit - (rlim64_t)uoff); 3026 } 3027 if (uoff + n > ip->i_size) { 3028 /* 3029 * We are extending the length of the file. 3030 * bmap is used so that we are sure that 3031 * if we need to allocate new blocks, that it 3032 * is done here before we up the file size. 3033 */ 3034 error = ud_bmap_write(ip, uoff, 3035 (int)(on + n), mapon == 0, cr); 3036 if (error) { 3037 break; 3038 } 3039 i_size_changed = 1; 3040 old_i_size = ip->i_size; 3041 ip->i_size = uoff + n; 3042 /* 3043 * If we are writing from the beginning of 3044 * the mapping, we can just create the 3045 * pages without having to read them. 3046 */ 3047 pagecreate = (mapon == 0); 3048 } else if (n == MAXBSIZE) { 3049 /* 3050 * Going to do a whole mappings worth, 3051 * so we can just create the pages w/o 3052 * having to read them in. But before 3053 * we do that, we need to make sure any 3054 * needed blocks are allocated first. 3055 */ 3056 error = ud_bmap_write(ip, uoff, 3057 (int)(on + n), 1, cr); 3058 if (error) { 3059 break; 3060 } 3061 pagecreate = 1; 3062 } else { 3063 pagecreate = 0; 3064 } 3065 3066 rw_exit(&ip->i_contents); 3067 3068 base = segmap_getmapflt(segkmap, vp, (off + mapon), 3069 (uint32_t)n, !pagecreate, S_WRITE); 3070 3071 /* 3072 * segmap_pagecreate() returns 1 if it calls 3073 * page_create_va() to allocate any pages. 3074 */ 3075 newpage = 0; 3076 if (pagecreate) { 3077 newpage = segmap_pagecreate(segkmap, base, 3078 (size_t)n, 0); 3079 } 3080 3081 premove_resid = uio->uio_resid; 3082 error = uiomove(base + mapon, (long)n, UIO_WRITE, uio); 3083 3084 if (pagecreate && 3085 uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) { 3086 /* 3087 * We created pages w/o initializing them completely, 3088 * thus we need to zero the part that wasn't set up. 3089 * This happens on most EOF write cases and if 3090 * we had some sort of error during the uiomove. 3091 */ 3092 int nzero, nmoved; 3093 3094 nmoved = (int)(uio->uio_loffset - (off + mapon)); 3095 ASSERT(nmoved >= 0 && nmoved <= n); 3096 nzero = roundup(on + n, PAGESIZE) - nmoved; 3097 ASSERT(nzero > 0 && mapon + nmoved + nzero <= MAXBSIZE); 3098 (void) kzero(base + mapon + nmoved, (uint32_t)nzero); 3099 } 3100 3101 /* 3102 * Unlock the pages allocated by page_create_va() 3103 * in segmap_pagecreate() 3104 */ 3105 if (newpage) { 3106 segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE); 3107 } 3108 3109 if (error) { 3110 /* 3111 * If we failed on a write, we may have already 3112 * allocated file blocks as well as pages. It's 3113 * hard to undo the block allocation, but we must 3114 * be sure to invalidate any pages that may have 3115 * been allocated. 3116 */ 3117 (void) segmap_release(segkmap, base, SM_INVAL); 3118 } else { 3119 flags = 0; 3120 /* 3121 * Force write back for synchronous write cases. 3122 */ 3123 if ((ioflag & (FSYNC|FDSYNC)) || ip->i_type == VDIR) { 3124 /* 3125 * If the sticky bit is set but the 3126 * execute bit is not set, we do a 3127 * synchronous write back and free 3128 * the page when done. We set up swap 3129 * files to be handled this way to 3130 * prevent servers from keeping around 3131 * the client's swap pages too long. 3132 * XXX - there ought to be a better way. 3133 */ 3134 if (IS_SWAPVP(vp)) { 3135 flags = SM_WRITE | SM_FREE | 3136 SM_DONTNEED; 3137 iupdat_flag = 0; 3138 } else { 3139 flags = SM_WRITE; 3140 } 3141 } else if (((mapon + n) == MAXBSIZE) || 3142 IS_SWAPVP(vp)) { 3143 /* 3144 * Have written a whole block. 3145 * Start an asynchronous write and 3146 * mark the buffer to indicate that 3147 * it won't be needed again soon. 3148 */ 3149 flags = SM_WRITE |SM_ASYNC | SM_DONTNEED; 3150 } 3151 error = segmap_release(segkmap, base, flags); 3152 3153 /* 3154 * If the operation failed and is synchronous, 3155 * then we need to unwind what uiomove() last 3156 * did so we can potentially return an error to 3157 * the caller. If this write operation was 3158 * done in two pieces and the first succeeded, 3159 * then we won't return an error for the second 3160 * piece that failed. However, we only want to 3161 * return a resid value that reflects what was 3162 * really done. 3163 * 3164 * Failures for non-synchronous operations can 3165 * be ignored since the page subsystem will 3166 * retry the operation until it succeeds or the 3167 * file system is unmounted. 3168 */ 3169 if (error) { 3170 if ((ioflag & (FSYNC | FDSYNC)) || 3171 ip->i_type == VDIR) { 3172 uio->uio_resid = premove_resid; 3173 } else { 3174 error = 0; 3175 } 3176 } 3177 } 3178 3179 /* 3180 * Re-acquire contents lock. 3181 */ 3182 rw_enter(&ip->i_contents, RW_WRITER); 3183 /* 3184 * If the uiomove() failed or if a synchronous 3185 * page push failed, fix up i_size. 3186 */ 3187 if (error) { 3188 if (i_size_changed) { 3189 /* 3190 * The uiomove failed, and we 3191 * allocated blocks,so get rid 3192 * of them. 3193 */ 3194 (void) ud_itrunc(ip, old_i_size, 0, cr); 3195 } 3196 } else { 3197 /* 3198 * XXX - Can this be out of the loop? 3199 */ 3200 ip->i_flag |= IUPD | ICHG; 3201 if (i_size_changed) { 3202 ip->i_flag |= IATTCHG; 3203 } 3204 if ((ip->i_perm & (IEXEC | (IEXEC >> 5) | 3205 (IEXEC >> 10))) != 0 && 3206 (ip->i_char & (ISUID | ISGID)) != 0 && 3207 secpolicy_vnode_setid_retain(cr, 3208 (ip->i_char & ISUID) != 0 && ip->i_uid == 0) != 0) { 3209 /* 3210 * Clear Set-UID & Set-GID bits on 3211 * successful write if not privileged 3212 * and at least one of the execute bits 3213 * is set. If we always clear Set-GID, 3214 * mandatory file and record locking is 3215 * unuseable. 3216 */ 3217 ip->i_char &= ~(ISUID | ISGID); 3218 } 3219 } 3220 } while (error == 0 && uio->uio_resid > 0 && n != 0); 3221 3222 out: 3223 /* 3224 * Inode is updated according to this table - 3225 * 3226 * FSYNC FDSYNC(posix.4) 3227 * -------------------------- 3228 * always@ IATTCHG|IBDWRITE 3229 * 3230 * @ - If we are doing synchronous write the only time we should 3231 * not be sync'ing the ip here is if we have the stickyhack 3232 * activated, the file is marked with the sticky bit and 3233 * no exec bit, the file length has not been changed and 3234 * no new blocks have been allocated during this write. 3235 */ 3236 if ((ip->i_flag & ISYNC) != 0) { 3237 /* 3238 * we have eliminated nosync 3239 */ 3240 if ((ip->i_flag & (IATTCHG|IBDWRITE)) || 3241 ((ioflag & FSYNC) && iupdat_flag)) { 3242 ud_iupdat(ip, 1); 3243 } 3244 } 3245 3246 /* 3247 * If we've already done a partial-write, terminate 3248 * the write but return no error. 3249 */ 3250 if (start_resid != uio->uio_resid) { 3251 error = 0; 3252 } 3253 ip->i_flag &= ~(INOACC | ISYNC); 3254 ITIMES_NOLOCK(ip); 3255 3256 return (error); 3257 } 3258 3259 int32_t 3260 ud_multi_strat(struct ud_inode *ip, 3261 page_t *pp, struct buf *bp, u_offset_t start) 3262 { 3263 daddr_t bn; 3264 int32_t error = 0, io_count, contig, alloc_sz, i; 3265 uint32_t io_off; 3266 mio_master_t *mm = NULL; 3267 mio_slave_t *ms = NULL; 3268 struct buf *rbp; 3269 3270 ASSERT(!(start & PAGEOFFSET)); 3271 3272 /* 3273 * Figure out how many buffers to allocate 3274 */ 3275 io_count = 0; 3276 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) { 3277 contig = 0; 3278 if (error = ud_bmap_read(ip, (u_offset_t)(start + io_off), 3279 &bn, &contig)) { 3280 goto end; 3281 } 3282 if (contig == 0) { 3283 goto end; 3284 } 3285 contig = MIN(contig, PAGESIZE - io_off); 3286 if (bn != UDF_HOLE) { 3287 io_count ++; 3288 } else { 3289 /* 3290 * HOLE 3291 */ 3292 if (bp->b_flags & B_READ) { 3293 3294 /* 3295 * This is a hole and is read 3296 * it should be filled with 0's 3297 */ 3298 pagezero(pp, io_off, contig); 3299 } 3300 } 3301 } 3302 3303 3304 if (io_count != 0) { 3305 3306 /* 3307 * Allocate memory for all the 3308 * required number of buffers 3309 */ 3310 alloc_sz = sizeof (mio_master_t) + 3311 (sizeof (mio_slave_t) * io_count); 3312 mm = (mio_master_t *)kmem_zalloc(alloc_sz, KM_SLEEP); 3313 if (mm == NULL) { 3314 error = ENOMEM; 3315 goto end; 3316 } 3317 3318 /* 3319 * initialize master 3320 */ 3321 mutex_init(&mm->mm_mutex, NULL, MUTEX_DEFAULT, NULL); 3322 mm->mm_size = alloc_sz; 3323 mm->mm_bp = bp; 3324 mm->mm_resid = 0; 3325 mm->mm_error = 0; 3326 mm->mm_index = master_index++; 3327 3328 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t)); 3329 3330 /* 3331 * Initialize buffers 3332 */ 3333 io_count = 0; 3334 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) { 3335 contig = 0; 3336 if (error = ud_bmap_read(ip, 3337 (u_offset_t)(start + io_off), 3338 &bn, &contig)) { 3339 goto end; 3340 } 3341 ASSERT(contig); 3342 if ((io_off + contig) > bp->b_bcount) { 3343 contig = bp->b_bcount - io_off; 3344 } 3345 if (bn != UDF_HOLE) { 3346 /* 3347 * Clone the buffer 3348 * and prepare to start I/O 3349 */ 3350 ms->ms_ptr = mm; 3351 bioinit(&ms->ms_buf); 3352 rbp = bioclone(bp, io_off, (size_t)contig, 3353 bp->b_edev, bn, ud_slave_done, 3354 &ms->ms_buf, KM_NOSLEEP); 3355 ASSERT(rbp == &ms->ms_buf); 3356 mm->mm_resid += contig; 3357 io_count++; 3358 ms ++; 3359 } 3360 } 3361 3362 /* 3363 * Start I/O's 3364 */ 3365 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t)); 3366 for (i = 0; i < io_count; i++) { 3367 (void) bdev_strategy(&ms->ms_buf); 3368 ms ++; 3369 } 3370 } 3371 3372 end: 3373 if (error != 0) { 3374 bp->b_flags |= B_ERROR; 3375 bp->b_error = error; 3376 if (mm != NULL) { 3377 mutex_destroy(&mm->mm_mutex); 3378 kmem_free(mm, mm->mm_size); 3379 } 3380 } 3381 return (error); 3382 } 3383 3384 int32_t 3385 ud_slave_done(struct buf *bp) 3386 { 3387 mio_master_t *mm; 3388 int32_t resid; 3389 3390 ASSERT(SEMA_HELD(&bp->b_sem)); 3391 ASSERT((bp->b_flags & B_DONE) == 0); 3392 3393 mm = ((mio_slave_t *)bp)->ms_ptr; 3394 3395 /* 3396 * Propagate error and byte count info from slave struct to 3397 * the master struct 3398 */ 3399 mutex_enter(&mm->mm_mutex); 3400 if (bp->b_flags & B_ERROR) { 3401 3402 /* 3403 * If multiple slave buffers get 3404 * error we forget the old errors 3405 * this is ok because we any way 3406 * cannot return multiple errors 3407 */ 3408 mm->mm_error = bp->b_error; 3409 } 3410 mm->mm_resid -= bp->b_bcount; 3411 resid = mm->mm_resid; 3412 mutex_exit(&mm->mm_mutex); 3413 3414 /* 3415 * free up the resources allocated to cloned buffers. 3416 */ 3417 bp_mapout(bp); 3418 biofini(bp); 3419 3420 if (resid == 0) { 3421 3422 /* 3423 * This is the last I/O operation 3424 * clean up and return the original buffer 3425 */ 3426 if (mm->mm_error) { 3427 mm->mm_bp->b_flags |= B_ERROR; 3428 mm->mm_bp->b_error = mm->mm_error; 3429 } 3430 biodone(mm->mm_bp); 3431 mutex_destroy(&mm->mm_mutex); 3432 kmem_free(mm, mm->mm_size); 3433 } 3434 return (0); 3435 } 3436