1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/t_lock.h> 28 #include <sys/param.h> 29 #include <sys/time.h> 30 #include <sys/systm.h> 31 #include <sys/sysmacros.h> 32 #include <sys/resource.h> 33 #include <sys/signal.h> 34 #include <sys/cred.h> 35 #include <sys/user.h> 36 #include <sys/buf.h> 37 #include <sys/vfs.h> 38 #include <sys/vfs_opreg.h> 39 #include <sys/stat.h> 40 #include <sys/vnode.h> 41 #include <sys/mode.h> 42 #include <sys/proc.h> 43 #include <sys/disp.h> 44 #include <sys/file.h> 45 #include <sys/fcntl.h> 46 #include <sys/flock.h> 47 #include <sys/kmem.h> 48 #include <sys/uio.h> 49 #include <sys/dnlc.h> 50 #include <sys/conf.h> 51 #include <sys/errno.h> 52 #include <sys/mman.h> 53 #include <sys/fbuf.h> 54 #include <sys/pathname.h> 55 #include <sys/debug.h> 56 #include <sys/vmsystm.h> 57 #include <sys/cmn_err.h> 58 #include <sys/dirent.h> 59 #include <sys/errno.h> 60 #include <sys/modctl.h> 61 #include <sys/statvfs.h> 62 #include <sys/mount.h> 63 #include <sys/sunddi.h> 64 #include <sys/bootconf.h> 65 #include <sys/policy.h> 66 67 #include <vm/hat.h> 68 #include <vm/page.h> 69 #include <vm/pvn.h> 70 #include <vm/as.h> 71 #include <vm/seg.h> 72 #include <vm/seg_map.h> 73 #include <vm/seg_kmem.h> 74 #include <vm/seg_vn.h> 75 #include <vm/rm.h> 76 #include <vm/page.h> 77 #include <sys/swap.h> 78 79 #include <fs/fs_subr.h> 80 81 #include <sys/fs/udf_volume.h> 82 #include <sys/fs/udf_inode.h> 83 84 static int32_t udf_open(struct vnode **, 85 int32_t, struct cred *, caller_context_t *); 86 static int32_t udf_close(struct vnode *, 87 int32_t, int32_t, offset_t, struct cred *, caller_context_t *); 88 static int32_t udf_read(struct vnode *, 89 struct uio *, int32_t, struct cred *, caller_context_t *); 90 static int32_t udf_write(struct vnode *, 91 struct uio *, int32_t, struct cred *, caller_context_t *); 92 static int32_t udf_ioctl(struct vnode *, 93 int32_t, intptr_t, int32_t, struct cred *, int32_t *, 94 caller_context_t *); 95 static int32_t udf_getattr(struct vnode *, 96 struct vattr *, int32_t, struct cred *, caller_context_t *); 97 static int32_t udf_setattr(struct vnode *, 98 struct vattr *, int32_t, struct cred *, caller_context_t *); 99 static int32_t udf_access(struct vnode *, 100 int32_t, int32_t, struct cred *, caller_context_t *); 101 static int32_t udf_lookup(struct vnode *, 102 char *, struct vnode **, struct pathname *, 103 int32_t, struct vnode *, struct cred *, 104 caller_context_t *, int *, pathname_t *); 105 static int32_t udf_create(struct vnode *, 106 char *, struct vattr *, enum vcexcl, 107 int32_t, struct vnode **, struct cred *, int32_t, 108 caller_context_t *, vsecattr_t *); 109 static int32_t udf_remove(struct vnode *, 110 char *, struct cred *, caller_context_t *, int); 111 static int32_t udf_link(struct vnode *, 112 struct vnode *, char *, struct cred *, caller_context_t *, int); 113 static int32_t udf_rename(struct vnode *, 114 char *, struct vnode *, char *, struct cred *, caller_context_t *, int); 115 static int32_t udf_mkdir(struct vnode *, 116 char *, struct vattr *, struct vnode **, struct cred *, 117 caller_context_t *, int, vsecattr_t *); 118 static int32_t udf_rmdir(struct vnode *, 119 char *, struct vnode *, struct cred *, caller_context_t *, int); 120 static int32_t udf_readdir(struct vnode *, 121 struct uio *, struct cred *, int32_t *, caller_context_t *, int); 122 static int32_t udf_symlink(struct vnode *, 123 char *, struct vattr *, char *, struct cred *, caller_context_t *, int); 124 static int32_t udf_readlink(struct vnode *, 125 struct uio *, struct cred *, caller_context_t *); 126 static int32_t udf_fsync(struct vnode *, 127 int32_t, struct cred *, caller_context_t *); 128 static void udf_inactive(struct vnode *, 129 struct cred *, caller_context_t *); 130 static int32_t udf_fid(struct vnode *, struct fid *, caller_context_t *); 131 static int udf_rwlock(struct vnode *, int32_t, caller_context_t *); 132 static void udf_rwunlock(struct vnode *, int32_t, caller_context_t *); 133 static int32_t udf_seek(struct vnode *, offset_t, offset_t *, 134 caller_context_t *); 135 static int32_t udf_frlock(struct vnode *, int32_t, 136 struct flock64 *, int32_t, offset_t, struct flk_callback *, cred_t *, 137 caller_context_t *); 138 static int32_t udf_space(struct vnode *, int32_t, 139 struct flock64 *, int32_t, offset_t, cred_t *, caller_context_t *); 140 static int32_t udf_getpage(struct vnode *, offset_t, 141 size_t, uint32_t *, struct page **, size_t, 142 struct seg *, caddr_t, enum seg_rw, struct cred *, caller_context_t *); 143 static int32_t udf_putpage(struct vnode *, offset_t, 144 size_t, int32_t, struct cred *, caller_context_t *); 145 static int32_t udf_map(struct vnode *, offset_t, struct as *, 146 caddr_t *, size_t, uint8_t, uint8_t, uint32_t, struct cred *, 147 caller_context_t *); 148 static int32_t udf_addmap(struct vnode *, offset_t, struct as *, 149 caddr_t, size_t, uint8_t, uint8_t, uint32_t, struct cred *, 150 caller_context_t *); 151 static int32_t udf_delmap(struct vnode *, offset_t, struct as *, 152 caddr_t, size_t, uint32_t, uint32_t, uint32_t, struct cred *, 153 caller_context_t *); 154 static int32_t udf_l_pathconf(struct vnode *, int32_t, 155 ulong_t *, struct cred *, caller_context_t *); 156 static int32_t udf_pageio(struct vnode *, struct page *, 157 u_offset_t, size_t, int32_t, struct cred *, caller_context_t *); 158 159 int32_t ud_getpage_miss(struct vnode *, u_offset_t, 160 size_t, struct seg *, caddr_t, page_t *pl[], 161 size_t, enum seg_rw, int32_t); 162 void ud_getpage_ra(struct vnode *, u_offset_t, struct seg *, caddr_t); 163 int32_t ud_putpages(struct vnode *, offset_t, size_t, int32_t, struct cred *); 164 int32_t ud_page_fill(struct ud_inode *, page_t *, 165 u_offset_t, uint32_t, u_offset_t *); 166 int32_t ud_iodone(struct buf *); 167 int32_t ud_rdip(struct ud_inode *, struct uio *, int32_t, cred_t *); 168 int32_t ud_wrip(struct ud_inode *, struct uio *, int32_t, cred_t *); 169 int32_t ud_multi_strat(struct ud_inode *, page_t *, struct buf *, u_offset_t); 170 int32_t ud_slave_done(struct buf *); 171 172 /* 173 * Structures to control multiple IO operations to get or put pages 174 * that are backed by discontiguous blocks. The master struct is 175 * a dummy that holds the original bp from pageio_setup. The 176 * slave struct holds the working bp's to do the actual IO. Once 177 * all the slave IOs complete. The master is processed as if a single 178 * IO op has completed. 179 */ 180 uint32_t master_index = 0; 181 typedef struct mio_master { 182 kmutex_t mm_mutex; /* protect the fields below */ 183 int32_t mm_size; 184 buf_t *mm_bp; /* original bp */ 185 int32_t mm_resid; /* bytes remaining to transfer */ 186 int32_t mm_error; /* accumulated error from slaves */ 187 int32_t mm_index; /* XXX debugging */ 188 } mio_master_t; 189 190 typedef struct mio_slave { 191 buf_t ms_buf; /* working buffer for this IO chunk */ 192 mio_master_t *ms_ptr; /* pointer to master */ 193 } mio_slave_t; 194 195 struct vnodeops *udf_vnodeops; 196 197 const fs_operation_def_t udf_vnodeops_template[] = { 198 VOPNAME_OPEN, { .vop_open = udf_open }, 199 VOPNAME_CLOSE, { .vop_close = udf_close }, 200 VOPNAME_READ, { .vop_read = udf_read }, 201 VOPNAME_WRITE, { .vop_write = udf_write }, 202 VOPNAME_IOCTL, { .vop_ioctl = udf_ioctl }, 203 VOPNAME_GETATTR, { .vop_getattr = udf_getattr }, 204 VOPNAME_SETATTR, { .vop_setattr = udf_setattr }, 205 VOPNAME_ACCESS, { .vop_access = udf_access }, 206 VOPNAME_LOOKUP, { .vop_lookup = udf_lookup }, 207 VOPNAME_CREATE, { .vop_create = udf_create }, 208 VOPNAME_REMOVE, { .vop_remove = udf_remove }, 209 VOPNAME_LINK, { .vop_link = udf_link }, 210 VOPNAME_RENAME, { .vop_rename = udf_rename }, 211 VOPNAME_MKDIR, { .vop_mkdir = udf_mkdir }, 212 VOPNAME_RMDIR, { .vop_rmdir = udf_rmdir }, 213 VOPNAME_READDIR, { .vop_readdir = udf_readdir }, 214 VOPNAME_SYMLINK, { .vop_symlink = udf_symlink }, 215 VOPNAME_READLINK, { .vop_readlink = udf_readlink }, 216 VOPNAME_FSYNC, { .vop_fsync = udf_fsync }, 217 VOPNAME_INACTIVE, { .vop_inactive = udf_inactive }, 218 VOPNAME_FID, { .vop_fid = udf_fid }, 219 VOPNAME_RWLOCK, { .vop_rwlock = udf_rwlock }, 220 VOPNAME_RWUNLOCK, { .vop_rwunlock = udf_rwunlock }, 221 VOPNAME_SEEK, { .vop_seek = udf_seek }, 222 VOPNAME_FRLOCK, { .vop_frlock = udf_frlock }, 223 VOPNAME_SPACE, { .vop_space = udf_space }, 224 VOPNAME_GETPAGE, { .vop_getpage = udf_getpage }, 225 VOPNAME_PUTPAGE, { .vop_putpage = udf_putpage }, 226 VOPNAME_MAP, { .vop_map = udf_map }, 227 VOPNAME_ADDMAP, { .vop_addmap = udf_addmap }, 228 VOPNAME_DELMAP, { .vop_delmap = udf_delmap }, 229 VOPNAME_PATHCONF, { .vop_pathconf = udf_l_pathconf }, 230 VOPNAME_PAGEIO, { .vop_pageio = udf_pageio }, 231 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 232 NULL, NULL 233 }; 234 235 /* ARGSUSED */ 236 static int32_t 237 udf_open( 238 struct vnode **vpp, 239 int32_t flag, 240 struct cred *cr, 241 caller_context_t *ct) 242 { 243 ud_printf("udf_open\n"); 244 245 return (0); 246 } 247 248 /* ARGSUSED */ 249 static int32_t 250 udf_close( 251 struct vnode *vp, 252 int32_t flag, 253 int32_t count, 254 offset_t offset, 255 struct cred *cr, 256 caller_context_t *ct) 257 { 258 struct ud_inode *ip = VTOI(vp); 259 260 ud_printf("udf_close\n"); 261 262 ITIMES(ip); 263 264 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 265 cleanshares(vp, ttoproc(curthread)->p_pid); 266 267 /* 268 * Push partially filled cluster at last close. 269 * ``last close'' is approximated because the dnlc 270 * may have a hold on the vnode. 271 */ 272 if (vp->v_count <= 2 && vp->v_type != VBAD) { 273 struct ud_inode *ip = VTOI(vp); 274 if (ip->i_delaylen) { 275 (void) ud_putpages(vp, ip->i_delayoff, ip->i_delaylen, 276 B_ASYNC | B_FREE, cr); 277 ip->i_delaylen = 0; 278 } 279 } 280 281 return (0); 282 } 283 284 /* ARGSUSED */ 285 static int32_t 286 udf_read( 287 struct vnode *vp, 288 struct uio *uiop, 289 int32_t ioflag, 290 struct cred *cr, 291 caller_context_t *ct) 292 { 293 struct ud_inode *ip = VTOI(vp); 294 int32_t error; 295 296 ud_printf("udf_read\n"); 297 298 #ifdef __lock_lint 299 rw_enter(&ip->i_rwlock, RW_READER); 300 #endif 301 302 ASSERT(RW_READ_HELD(&ip->i_rwlock)); 303 304 if (MANDLOCK(vp, ip->i_char)) { 305 /* 306 * udf_getattr ends up being called by chklock 307 */ 308 error = chklock(vp, FREAD, uiop->uio_loffset, 309 uiop->uio_resid, uiop->uio_fmode, ct); 310 if (error) { 311 goto end; 312 } 313 } 314 315 rw_enter(&ip->i_contents, RW_READER); 316 error = ud_rdip(ip, uiop, ioflag, cr); 317 rw_exit(&ip->i_contents); 318 319 end: 320 #ifdef __lock_lint 321 rw_exit(&ip->i_rwlock); 322 #endif 323 324 return (error); 325 } 326 327 328 int32_t ud_WRITES = 1; 329 int32_t ud_HW = 96 * 1024; 330 int32_t ud_LW = 64 * 1024; 331 int32_t ud_throttles = 0; 332 333 /* ARGSUSED */ 334 static int32_t 335 udf_write( 336 struct vnode *vp, 337 struct uio *uiop, 338 int32_t ioflag, 339 struct cred *cr, 340 caller_context_t *ct) 341 { 342 struct ud_inode *ip = VTOI(vp); 343 int32_t error = 0; 344 345 ud_printf("udf_write\n"); 346 347 #ifdef __lock_lint 348 rw_enter(&ip->i_rwlock, RW_WRITER); 349 #endif 350 351 ASSERT(RW_WRITE_HELD(&ip->i_rwlock)); 352 353 if (MANDLOCK(vp, ip->i_char)) { 354 /* 355 * ud_getattr ends up being called by chklock 356 */ 357 error = chklock(vp, FWRITE, uiop->uio_loffset, 358 uiop->uio_resid, uiop->uio_fmode, ct); 359 if (error) { 360 goto end; 361 } 362 } 363 /* 364 * Throttle writes. 365 */ 366 mutex_enter(&ip->i_tlock); 367 if (ud_WRITES && (ip->i_writes > ud_HW)) { 368 while (ip->i_writes > ud_HW) { 369 ud_throttles++; 370 cv_wait(&ip->i_wrcv, &ip->i_tlock); 371 } 372 } 373 mutex_exit(&ip->i_tlock); 374 375 /* 376 * Write to the file 377 */ 378 rw_enter(&ip->i_contents, RW_WRITER); 379 if ((ioflag & FAPPEND) != 0 && (ip->i_type == VREG)) { 380 /* 381 * In append mode start at end of file. 382 */ 383 uiop->uio_loffset = ip->i_size; 384 } 385 error = ud_wrip(ip, uiop, ioflag, cr); 386 rw_exit(&ip->i_contents); 387 388 end: 389 #ifdef __lock_lint 390 rw_exit(&ip->i_rwlock); 391 #endif 392 393 return (error); 394 } 395 396 /* ARGSUSED */ 397 static int32_t 398 udf_ioctl( 399 struct vnode *vp, 400 int32_t cmd, 401 intptr_t arg, 402 int32_t flag, 403 struct cred *cr, 404 int32_t *rvalp, 405 caller_context_t *ct) 406 { 407 return (ENOTTY); 408 } 409 410 /* ARGSUSED */ 411 static int32_t 412 udf_getattr( 413 struct vnode *vp, 414 struct vattr *vap, 415 int32_t flags, 416 struct cred *cr, 417 caller_context_t *ct) 418 { 419 struct ud_inode *ip = VTOI(vp); 420 421 ud_printf("udf_getattr\n"); 422 423 if (vap->va_mask == AT_SIZE) { 424 /* 425 * for performance, if only the size is requested don't bother 426 * with anything else. 427 */ 428 vap->va_size = ip->i_size; 429 return (0); 430 } 431 432 rw_enter(&ip->i_contents, RW_READER); 433 434 vap->va_type = vp->v_type; 435 vap->va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char; 436 437 vap->va_uid = ip->i_uid; 438 vap->va_gid = ip->i_gid; 439 vap->va_fsid = ip->i_dev; 440 vap->va_nodeid = ip->i_icb_lbano; 441 vap->va_nlink = ip->i_nlink; 442 vap->va_size = ip->i_size; 443 vap->va_seq = ip->i_seq; 444 if (vp->v_type == VCHR || vp->v_type == VBLK) { 445 vap->va_rdev = ip->i_rdev; 446 } else { 447 vap->va_rdev = 0; 448 } 449 450 mutex_enter(&ip->i_tlock); 451 ITIMES_NOLOCK(ip); /* mark correct time in inode */ 452 vap->va_atime.tv_sec = (time_t)ip->i_atime.tv_sec; 453 vap->va_atime.tv_nsec = ip->i_atime.tv_nsec; 454 vap->va_mtime.tv_sec = (time_t)ip->i_mtime.tv_sec; 455 vap->va_mtime.tv_nsec = ip->i_mtime.tv_nsec; 456 vap->va_ctime.tv_sec = (time_t)ip->i_ctime.tv_sec; 457 vap->va_ctime.tv_nsec = ip->i_ctime.tv_nsec; 458 mutex_exit(&ip->i_tlock); 459 460 switch (ip->i_type) { 461 case VBLK: 462 vap->va_blksize = MAXBSIZE; 463 break; 464 case VCHR: 465 vap->va_blksize = MAXBSIZE; 466 break; 467 default: 468 vap->va_blksize = ip->i_udf->udf_lbsize; 469 break; 470 } 471 vap->va_nblocks = ip->i_lbr << ip->i_udf->udf_l2d_shift; 472 473 rw_exit(&ip->i_contents); 474 475 return (0); 476 } 477 478 static int 479 ud_iaccess_vmode(void *ip, int mode, struct cred *cr) 480 { 481 return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr)); 482 } 483 484 /*ARGSUSED4*/ 485 static int32_t 486 udf_setattr( 487 struct vnode *vp, 488 struct vattr *vap, 489 int32_t flags, 490 struct cred *cr, 491 caller_context_t *ct) 492 { 493 int32_t error = 0; 494 uint32_t mask = vap->va_mask; 495 struct ud_inode *ip; 496 timestruc_t now; 497 struct vattr ovap; 498 499 ud_printf("udf_setattr\n"); 500 501 ip = VTOI(vp); 502 503 /* 504 * not updates allowed to 4096 files 505 */ 506 if (ip->i_astrat == STRAT_TYPE4096) { 507 return (EINVAL); 508 } 509 510 /* 511 * Cannot set these attributes 512 */ 513 if (mask & AT_NOSET) { 514 return (EINVAL); 515 } 516 517 rw_enter(&ip->i_rwlock, RW_WRITER); 518 rw_enter(&ip->i_contents, RW_WRITER); 519 520 ovap.va_uid = ip->i_uid; 521 ovap.va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char; 522 error = secpolicy_vnode_setattr(cr, vp, vap, &ovap, flags, 523 ud_iaccess_vmode, ip); 524 if (error) 525 goto update_inode; 526 527 mask = vap->va_mask; 528 /* 529 * Change file access modes. 530 */ 531 if (mask & AT_MODE) { 532 ip->i_perm = VA2UD_PERM(vap->va_mode); 533 ip->i_char = vap->va_mode & (VSUID | VSGID | VSVTX); 534 mutex_enter(&ip->i_tlock); 535 ip->i_flag |= ICHG; 536 mutex_exit(&ip->i_tlock); 537 } 538 if (mask & (AT_UID|AT_GID)) { 539 if (mask & AT_UID) { 540 ip->i_uid = vap->va_uid; 541 } 542 if (mask & AT_GID) { 543 ip->i_gid = vap->va_gid; 544 } 545 mutex_enter(&ip->i_tlock); 546 ip->i_flag |= ICHG; 547 mutex_exit(&ip->i_tlock); 548 } 549 /* 550 * Truncate file. Must have write permission and not be a directory. 551 */ 552 if (mask & AT_SIZE) { 553 if (vp->v_type == VDIR) { 554 error = EISDIR; 555 goto update_inode; 556 } 557 if (error = ud_iaccess(ip, IWRITE, cr)) { 558 goto update_inode; 559 } 560 if (vap->va_size > MAXOFFSET_T) { 561 error = EFBIG; 562 goto update_inode; 563 } 564 if (error = ud_itrunc(ip, vap->va_size, 0, cr)) { 565 goto update_inode; 566 } 567 } 568 /* 569 * Change file access or modified times. 570 */ 571 if (mask & (AT_ATIME|AT_MTIME)) { 572 mutex_enter(&ip->i_tlock); 573 if (mask & AT_ATIME) { 574 ip->i_atime.tv_sec = vap->va_atime.tv_sec; 575 ip->i_atime.tv_nsec = vap->va_atime.tv_nsec; 576 ip->i_flag &= ~IACC; 577 } 578 if (mask & AT_MTIME) { 579 ip->i_mtime.tv_sec = vap->va_mtime.tv_sec; 580 ip->i_mtime.tv_nsec = vap->va_mtime.tv_nsec; 581 gethrestime(&now); 582 ip->i_ctime.tv_sec = now.tv_sec; 583 ip->i_ctime.tv_nsec = now.tv_nsec; 584 ip->i_flag &= ~(IUPD|ICHG); 585 ip->i_flag |= IMODTIME; 586 } 587 ip->i_flag |= IMOD; 588 mutex_exit(&ip->i_tlock); 589 } 590 591 update_inode: 592 if (curthread->t_flag & T_DONTPEND) { 593 ud_iupdat(ip, 1); 594 } else { 595 ITIMES_NOLOCK(ip); 596 } 597 rw_exit(&ip->i_contents); 598 rw_exit(&ip->i_rwlock); 599 600 return (error); 601 } 602 603 /* ARGSUSED */ 604 static int32_t 605 udf_access( 606 struct vnode *vp, 607 int32_t mode, 608 int32_t flags, 609 struct cred *cr, 610 caller_context_t *ct) 611 { 612 struct ud_inode *ip = VTOI(vp); 613 int32_t error; 614 615 ud_printf("udf_access\n"); 616 617 if (ip->i_udf == NULL) { 618 return (EIO); 619 } 620 621 error = ud_iaccess(ip, UD_UPERM2DPERM(mode), cr); 622 623 return (error); 624 } 625 626 int32_t udfs_stickyhack = 1; 627 628 /* ARGSUSED */ 629 static int32_t 630 udf_lookup( 631 struct vnode *dvp, 632 char *nm, 633 struct vnode **vpp, 634 struct pathname *pnp, 635 int32_t flags, 636 struct vnode *rdir, 637 struct cred *cr, 638 caller_context_t *ct, 639 int *direntflags, 640 pathname_t *realpnp) 641 { 642 int32_t error; 643 struct vnode *vp; 644 struct ud_inode *ip, *xip; 645 646 ud_printf("udf_lookup\n"); 647 /* 648 * Null component name is a synonym for directory being searched. 649 */ 650 if (*nm == '\0') { 651 VN_HOLD(dvp); 652 *vpp = dvp; 653 error = 0; 654 goto out; 655 } 656 657 /* 658 * Fast path: Check the directory name lookup cache. 659 */ 660 ip = VTOI(dvp); 661 if (vp = dnlc_lookup(dvp, nm)) { 662 /* 663 * Check accessibility of directory. 664 */ 665 if ((error = ud_iaccess(ip, IEXEC, cr)) != 0) { 666 VN_RELE(vp); 667 } 668 xip = VTOI(vp); 669 } else { 670 error = ud_dirlook(ip, nm, &xip, cr, 1); 671 ITIMES(ip); 672 } 673 674 if (error == 0) { 675 ip = xip; 676 *vpp = ITOV(ip); 677 if ((ip->i_type != VDIR) && 678 (ip->i_char & ISVTX) && 679 ((ip->i_perm & IEXEC) == 0) && 680 udfs_stickyhack) { 681 mutex_enter(&(*vpp)->v_lock); 682 (*vpp)->v_flag |= VISSWAP; 683 mutex_exit(&(*vpp)->v_lock); 684 } 685 ITIMES(ip); 686 /* 687 * If vnode is a device return special vnode instead. 688 */ 689 if (IS_DEVVP(*vpp)) { 690 struct vnode *newvp; 691 newvp = specvp(*vpp, (*vpp)->v_rdev, 692 (*vpp)->v_type, cr); 693 VN_RELE(*vpp); 694 if (newvp == NULL) { 695 error = ENOSYS; 696 } else { 697 *vpp = newvp; 698 } 699 } 700 } 701 out: 702 return (error); 703 } 704 705 /* ARGSUSED */ 706 static int32_t 707 udf_create( 708 struct vnode *dvp, 709 char *name, 710 struct vattr *vap, 711 enum vcexcl excl, 712 int32_t mode, 713 struct vnode **vpp, 714 struct cred *cr, 715 int32_t flag, 716 caller_context_t *ct, 717 vsecattr_t *vsecp) 718 { 719 int32_t error; 720 struct ud_inode *ip = VTOI(dvp), *xip; 721 722 ud_printf("udf_create\n"); 723 724 if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0) 725 vap->va_mode &= ~VSVTX; 726 727 if (*name == '\0') { 728 /* 729 * Null component name refers to the directory itself. 730 */ 731 VN_HOLD(dvp); 732 ITIMES(ip); 733 error = EEXIST; 734 } else { 735 xip = NULL; 736 rw_enter(&ip->i_rwlock, RW_WRITER); 737 error = ud_direnter(ip, name, DE_CREATE, 738 (struct ud_inode *)0, (struct ud_inode *)0, 739 vap, &xip, cr, ct); 740 rw_exit(&ip->i_rwlock); 741 ITIMES(ip); 742 ip = xip; 743 } 744 #ifdef __lock_lint 745 rw_enter(&ip->i_contents, RW_WRITER); 746 #else 747 if (ip != NULL) { 748 rw_enter(&ip->i_contents, RW_WRITER); 749 } 750 #endif 751 752 /* 753 * If the file already exists and this is a non-exclusive create, 754 * check permissions and allow access for non-directories. 755 * Read-only create of an existing directory is also allowed. 756 * We fail an exclusive create of anything which already exists. 757 */ 758 if (error == EEXIST) { 759 if (excl == NONEXCL) { 760 if ((ip->i_type == VDIR) && (mode & VWRITE)) { 761 error = EISDIR; 762 } else if (mode) { 763 error = ud_iaccess(ip, 764 UD_UPERM2DPERM(mode), cr); 765 } else { 766 error = 0; 767 } 768 } 769 if (error) { 770 rw_exit(&ip->i_contents); 771 VN_RELE(ITOV(ip)); 772 goto out; 773 } else if ((ip->i_type == VREG) && 774 (vap->va_mask & AT_SIZE) && vap->va_size == 0) { 775 /* 776 * Truncate regular files, if requested by caller. 777 * Grab i_rwlock to make sure no one else is 778 * currently writing to the file (we promised 779 * bmap we would do this). 780 * Must get the locks in the correct order. 781 */ 782 if (ip->i_size == 0) { 783 ip->i_flag |= ICHG | IUPD; 784 } else { 785 rw_exit(&ip->i_contents); 786 rw_enter(&ip->i_rwlock, RW_WRITER); 787 rw_enter(&ip->i_contents, RW_WRITER); 788 (void) ud_itrunc(ip, 0, 0, cr); 789 rw_exit(&ip->i_rwlock); 790 } 791 vnevent_create(ITOV(ip), ct); 792 } 793 } 794 795 if (error == 0) { 796 *vpp = ITOV(ip); 797 ITIMES(ip); 798 } 799 #ifdef __lock_lint 800 rw_exit(&ip->i_contents); 801 #else 802 if (ip != NULL) { 803 rw_exit(&ip->i_contents); 804 } 805 #endif 806 if (error) { 807 goto out; 808 } 809 810 /* 811 * If vnode is a device return special vnode instead. 812 */ 813 if (!error && IS_DEVVP(*vpp)) { 814 struct vnode *newvp; 815 816 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 817 VN_RELE(*vpp); 818 if (newvp == NULL) { 819 error = ENOSYS; 820 goto out; 821 } 822 *vpp = newvp; 823 } 824 out: 825 return (error); 826 } 827 828 /* ARGSUSED */ 829 static int32_t 830 udf_remove( 831 struct vnode *vp, 832 char *nm, 833 struct cred *cr, 834 caller_context_t *ct, 835 int flags) 836 { 837 int32_t error; 838 struct ud_inode *ip = VTOI(vp); 839 840 ud_printf("udf_remove\n"); 841 842 rw_enter(&ip->i_rwlock, RW_WRITER); 843 error = ud_dirremove(ip, nm, 844 (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr, ct); 845 rw_exit(&ip->i_rwlock); 846 ITIMES(ip); 847 848 return (error); 849 } 850 851 /* ARGSUSED */ 852 static int32_t 853 udf_link( 854 struct vnode *tdvp, 855 struct vnode *svp, 856 char *tnm, 857 struct cred *cr, 858 caller_context_t *ct, 859 int flags) 860 { 861 int32_t error; 862 struct vnode *realvp; 863 struct ud_inode *sip; 864 struct ud_inode *tdp; 865 866 ud_printf("udf_link\n"); 867 if (VOP_REALVP(svp, &realvp, ct) == 0) { 868 svp = realvp; 869 } 870 871 /* 872 * Do not allow links to directories 873 */ 874 if (svp->v_type == VDIR) { 875 return (EPERM); 876 } 877 878 sip = VTOI(svp); 879 880 if (sip->i_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0) 881 return (EPERM); 882 883 tdp = VTOI(tdvp); 884 885 rw_enter(&tdp->i_rwlock, RW_WRITER); 886 error = ud_direnter(tdp, tnm, DE_LINK, (struct ud_inode *)0, 887 sip, (struct vattr *)0, (struct ud_inode **)0, cr, ct); 888 rw_exit(&tdp->i_rwlock); 889 ITIMES(sip); 890 ITIMES(tdp); 891 892 if (error == 0) { 893 vnevent_link(svp, ct); 894 } 895 896 return (error); 897 } 898 899 /* ARGSUSED */ 900 static int32_t 901 udf_rename( 902 struct vnode *sdvp, 903 char *snm, 904 struct vnode *tdvp, 905 char *tnm, 906 struct cred *cr, 907 caller_context_t *ct, 908 int flags) 909 { 910 int32_t error = 0; 911 struct udf_vfs *udf_vfsp; 912 struct ud_inode *sip; /* source inode */ 913 struct ud_inode *sdp, *tdp; /* source and target parent inode */ 914 struct vnode *realvp; 915 916 ud_printf("udf_rename\n"); 917 918 if (VOP_REALVP(tdvp, &realvp, ct) == 0) { 919 tdvp = realvp; 920 } 921 922 sdp = VTOI(sdvp); 923 tdp = VTOI(tdvp); 924 925 udf_vfsp = sdp->i_udf; 926 927 mutex_enter(&udf_vfsp->udf_rename_lck); 928 /* 929 * Look up inode of file we're supposed to rename. 930 */ 931 if (error = ud_dirlook(sdp, snm, &sip, cr, 0)) { 932 mutex_exit(&udf_vfsp->udf_rename_lck); 933 return (error); 934 } 935 /* 936 * be sure this is not a directory with another file system mounted 937 * over it. If it is just give up the locks, and return with 938 * EBUSY 939 */ 940 if (vn_mountedvfs(ITOV(sip)) != NULL) { 941 error = EBUSY; 942 goto errout; 943 } 944 /* 945 * Make sure we can delete the source entry. This requires 946 * write permission on the containing directory. If that 947 * directory is "sticky" it further requires (except for 948 * privileged users) that the user own the directory or the 949 * source entry, or else have permission to write the source 950 * entry. 951 */ 952 rw_enter(&sdp->i_contents, RW_READER); 953 rw_enter(&sip->i_contents, RW_READER); 954 if ((error = ud_iaccess(sdp, IWRITE, cr)) != 0 || 955 (error = ud_sticky_remove_access(sdp, sip, cr)) != 0) { 956 rw_exit(&sip->i_contents); 957 rw_exit(&sdp->i_contents); 958 ITIMES(sip); 959 goto errout; 960 } 961 962 /* 963 * Check for renaming '.' or '..' or alias of '.' 964 */ 965 if ((strcmp(snm, ".") == 0) || 966 (strcmp(snm, "..") == 0) || 967 (sdp == sip)) { 968 error = EINVAL; 969 rw_exit(&sip->i_contents); 970 rw_exit(&sdp->i_contents); 971 goto errout; 972 } 973 rw_exit(&sip->i_contents); 974 rw_exit(&sdp->i_contents); 975 976 977 /* 978 * Link source to the target. 979 */ 980 rw_enter(&tdp->i_rwlock, RW_WRITER); 981 if (error = ud_direnter(tdp, tnm, DE_RENAME, sdp, sip, 982 (struct vattr *)0, (struct ud_inode **)0, cr, ct)) { 983 /* 984 * ESAME isn't really an error; it indicates that the 985 * operation should not be done because the source and target 986 * are the same file, but that no error should be reported. 987 */ 988 if (error == ESAME) { 989 error = 0; 990 } 991 rw_exit(&tdp->i_rwlock); 992 goto errout; 993 } 994 vnevent_rename_src(ITOV(sip), sdvp, snm, ct); 995 rw_exit(&tdp->i_rwlock); 996 997 rw_enter(&sdp->i_rwlock, RW_WRITER); 998 /* 999 * Unlink the source. 1000 * Remove the source entry. ud_dirremove() checks that the entry 1001 * still reflects sip, and returns an error if it doesn't. 1002 * If the entry has changed just forget about it. Release 1003 * the source inode. 1004 */ 1005 if ((error = ud_dirremove(sdp, snm, sip, (struct vnode *)0, 1006 DR_RENAME, cr, ct)) == ENOENT) { 1007 error = 0; 1008 } 1009 rw_exit(&sdp->i_rwlock); 1010 errout: 1011 ITIMES(sdp); 1012 ITIMES(tdp); 1013 VN_RELE(ITOV(sip)); 1014 mutex_exit(&udf_vfsp->udf_rename_lck); 1015 1016 return (error); 1017 } 1018 1019 /* ARGSUSED */ 1020 static int32_t 1021 udf_mkdir( 1022 struct vnode *dvp, 1023 char *dirname, 1024 struct vattr *vap, 1025 struct vnode **vpp, 1026 struct cred *cr, 1027 caller_context_t *ct, 1028 int flags, 1029 vsecattr_t *vsecp) 1030 { 1031 int32_t error; 1032 struct ud_inode *ip; 1033 struct ud_inode *xip; 1034 1035 ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); 1036 1037 ud_printf("udf_mkdir\n"); 1038 1039 ip = VTOI(dvp); 1040 rw_enter(&ip->i_rwlock, RW_WRITER); 1041 error = ud_direnter(ip, dirname, DE_MKDIR, 1042 (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr, ct); 1043 rw_exit(&ip->i_rwlock); 1044 ITIMES(ip); 1045 if (error == 0) { 1046 ip = xip; 1047 *vpp = ITOV(ip); 1048 ITIMES(ip); 1049 } else if (error == EEXIST) { 1050 ITIMES(xip); 1051 VN_RELE(ITOV(xip)); 1052 } 1053 1054 return (error); 1055 } 1056 1057 /* ARGSUSED */ 1058 static int32_t 1059 udf_rmdir( 1060 struct vnode *vp, 1061 char *nm, 1062 struct vnode *cdir, 1063 struct cred *cr, 1064 caller_context_t *ct, 1065 int flags) 1066 { 1067 int32_t error; 1068 struct ud_inode *ip = VTOI(vp); 1069 1070 ud_printf("udf_rmdir\n"); 1071 1072 rw_enter(&ip->i_rwlock, RW_WRITER); 1073 error = ud_dirremove(ip, nm, (struct ud_inode *)0, cdir, DR_RMDIR, 1074 cr, ct); 1075 rw_exit(&ip->i_rwlock); 1076 ITIMES(ip); 1077 1078 return (error); 1079 } 1080 1081 /* ARGSUSED */ 1082 static int32_t 1083 udf_readdir( 1084 struct vnode *vp, 1085 struct uio *uiop, 1086 struct cred *cr, 1087 int32_t *eofp, 1088 caller_context_t *ct, 1089 int flags) 1090 { 1091 struct ud_inode *ip; 1092 struct dirent64 *nd; 1093 struct udf_vfs *udf_vfsp; 1094 int32_t error = 0, len, outcount = 0; 1095 uint32_t dirsiz, offset; 1096 uint32_t bufsize, ndlen, dummy; 1097 caddr_t outbuf; 1098 caddr_t outb, end_outb; 1099 struct iovec *iovp; 1100 1101 uint8_t *dname; 1102 int32_t length; 1103 1104 uint8_t *buf = NULL; 1105 1106 struct fbuf *fbp = NULL; 1107 struct file_id *fid; 1108 uint8_t *name; 1109 1110 1111 ud_printf("udf_readdir\n"); 1112 1113 ip = VTOI(vp); 1114 udf_vfsp = ip->i_udf; 1115 1116 dirsiz = ip->i_size; 1117 if ((uiop->uio_offset >= dirsiz) || 1118 (ip->i_nlink <= 0)) { 1119 if (eofp) { 1120 *eofp = 1; 1121 } 1122 return (0); 1123 } 1124 1125 offset = uiop->uio_offset; 1126 iovp = uiop->uio_iov; 1127 bufsize = iovp->iov_len; 1128 1129 outb = outbuf = (char *)kmem_alloc((uint32_t)bufsize, KM_SLEEP); 1130 end_outb = outb + bufsize; 1131 nd = (struct dirent64 *)outbuf; 1132 1133 dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP); 1134 buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP); 1135 1136 if (offset == 0) { 1137 len = DIRENT64_RECLEN(1); 1138 if (((caddr_t)nd + len) >= end_outb) { 1139 error = EINVAL; 1140 goto end; 1141 } 1142 nd->d_ino = ip->i_icb_lbano; 1143 nd->d_reclen = (uint16_t)len; 1144 nd->d_off = 0x10; 1145 nd->d_name[0] = '.'; 1146 bzero(&nd->d_name[1], DIRENT64_NAMELEN(len) - 1); 1147 nd = (struct dirent64 *)((char *)nd + nd->d_reclen); 1148 outcount++; 1149 } else if (offset == 0x10) { 1150 offset = 0; 1151 } 1152 1153 while (offset < dirsiz) { 1154 error = ud_get_next_fid(ip, &fbp, 1155 offset, &fid, &name, buf); 1156 if (error != 0) { 1157 break; 1158 } 1159 1160 if ((fid->fid_flags & FID_DELETED) == 0) { 1161 if (fid->fid_flags & FID_PARENT) { 1162 1163 len = DIRENT64_RECLEN(2); 1164 if (((caddr_t)nd + len) >= end_outb) { 1165 error = EINVAL; 1166 break; 1167 } 1168 1169 nd->d_ino = ip->i_icb_lbano; 1170 nd->d_reclen = (uint16_t)len; 1171 nd->d_off = offset + FID_LEN(fid); 1172 nd->d_name[0] = '.'; 1173 nd->d_name[1] = '.'; 1174 bzero(&nd->d_name[2], 1175 DIRENT64_NAMELEN(len) - 2); 1176 nd = (struct dirent64 *) 1177 ((char *)nd + nd->d_reclen); 1178 } else { 1179 if ((error = ud_uncompress(fid->fid_idlen, 1180 &length, name, dname)) != 0) { 1181 break; 1182 } 1183 if (length == 0) { 1184 offset += FID_LEN(fid); 1185 continue; 1186 } 1187 len = DIRENT64_RECLEN(length); 1188 if (((caddr_t)nd + len) >= end_outb) { 1189 if (!outcount) { 1190 error = EINVAL; 1191 } 1192 break; 1193 } 1194 (void) strncpy(nd->d_name, 1195 (caddr_t)dname, length); 1196 bzero(&nd->d_name[length], 1197 DIRENT64_NAMELEN(len) - length); 1198 nd->d_ino = ud_xlate_to_daddr(udf_vfsp, 1199 SWAP_16(fid->fid_icb.lad_ext_prn), 1200 SWAP_32(fid->fid_icb.lad_ext_loc), 1, 1201 &dummy); 1202 nd->d_reclen = (uint16_t)len; 1203 nd->d_off = offset + FID_LEN(fid); 1204 nd = (struct dirent64 *) 1205 ((char *)nd + nd->d_reclen); 1206 } 1207 outcount++; 1208 } 1209 1210 offset += FID_LEN(fid); 1211 } 1212 1213 end: 1214 if (fbp != NULL) { 1215 fbrelse(fbp, S_OTHER); 1216 } 1217 ndlen = ((char *)nd - outbuf); 1218 /* 1219 * In case of error do not call uiomove. 1220 * Return the error to the caller. 1221 */ 1222 if ((error == 0) && (ndlen != 0)) { 1223 error = uiomove(outbuf, (long)ndlen, UIO_READ, uiop); 1224 uiop->uio_offset = offset; 1225 } 1226 kmem_free((caddr_t)buf, udf_vfsp->udf_lbsize); 1227 kmem_free((caddr_t)dname, 1024); 1228 kmem_free(outbuf, (uint32_t)bufsize); 1229 if (eofp && error == 0) { 1230 *eofp = (uiop->uio_offset >= dirsiz); 1231 } 1232 return (error); 1233 } 1234 1235 /* ARGSUSED */ 1236 static int32_t 1237 udf_symlink( 1238 struct vnode *dvp, 1239 char *linkname, 1240 struct vattr *vap, 1241 char *target, 1242 struct cred *cr, 1243 caller_context_t *ct, 1244 int flags) 1245 { 1246 int32_t error = 0, outlen; 1247 uint32_t ioflag = 0; 1248 struct ud_inode *ip, *dip = VTOI(dvp); 1249 1250 struct path_comp *pc; 1251 int8_t *dname = NULL, *uname = NULL, *sp; 1252 1253 ud_printf("udf_symlink\n"); 1254 1255 ip = (struct ud_inode *)0; 1256 vap->va_type = VLNK; 1257 vap->va_rdev = 0; 1258 1259 rw_enter(&dip->i_rwlock, RW_WRITER); 1260 error = ud_direnter(dip, linkname, DE_CREATE, 1261 (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr, ct); 1262 rw_exit(&dip->i_rwlock); 1263 if (error == 0) { 1264 dname = kmem_zalloc(1024, KM_SLEEP); 1265 uname = kmem_zalloc(PAGESIZE, KM_SLEEP); 1266 1267 pc = (struct path_comp *)uname; 1268 /* 1269 * If the first character in target is "/" 1270 * then skip it and create entry for it 1271 */ 1272 if (*target == '/') { 1273 pc->pc_type = 2; 1274 pc->pc_len = 0; 1275 pc = (struct path_comp *)(((char *)pc) + 4); 1276 while (*target == '/') { 1277 target++; 1278 } 1279 } 1280 1281 while (*target != NULL) { 1282 sp = target; 1283 while ((*target != '/') && (*target != '\0')) { 1284 target ++; 1285 } 1286 /* 1287 * We got the next component of the 1288 * path name. Create path_comp of 1289 * appropriate type 1290 */ 1291 if (((target - sp) == 1) && (*sp == '.')) { 1292 /* 1293 * Dot entry. 1294 */ 1295 pc->pc_type = 4; 1296 pc = (struct path_comp *)(((char *)pc) + 4); 1297 } else if (((target - sp) == 2) && 1298 (*sp == '.') && ((*(sp + 1)) == '.')) { 1299 /* 1300 * DotDot entry. 1301 */ 1302 pc->pc_type = 3; 1303 pc = (struct path_comp *)(((char *)pc) + 4); 1304 } else { 1305 /* 1306 * convert the user given name 1307 * into appropriate form to be put 1308 * on the media 1309 */ 1310 outlen = 1024; /* set to size of dname */ 1311 if (error = ud_compress(target - sp, &outlen, 1312 (uint8_t *)sp, (uint8_t *)dname)) { 1313 break; 1314 } 1315 pc->pc_type = 5; 1316 /* LINTED */ 1317 pc->pc_len = outlen; 1318 dname[outlen] = '\0'; 1319 (void) strcpy((char *)pc->pc_id, dname); 1320 pc = (struct path_comp *) 1321 (((char *)pc) + 4 + outlen); 1322 } 1323 while (*target == '/') { 1324 target++; 1325 } 1326 if (*target == NULL) { 1327 break; 1328 } 1329 } 1330 1331 rw_enter(&ip->i_contents, RW_WRITER); 1332 if (error == 0) { 1333 ioflag = FWRITE; 1334 if (curthread->t_flag & T_DONTPEND) { 1335 ioflag |= FDSYNC; 1336 } 1337 error = ud_rdwri(UIO_WRITE, ioflag, ip, 1338 uname, ((int8_t *)pc) - uname, 1339 (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr); 1340 } 1341 if (error) { 1342 ud_idrop(ip); 1343 rw_exit(&ip->i_contents); 1344 rw_enter(&dip->i_rwlock, RW_WRITER); 1345 (void) ud_dirremove(dip, linkname, (struct ud_inode *)0, 1346 (struct vnode *)0, DR_REMOVE, cr, ct); 1347 rw_exit(&dip->i_rwlock); 1348 goto update_inode; 1349 } 1350 rw_exit(&ip->i_contents); 1351 } 1352 1353 if ((error == 0) || (error == EEXIST)) { 1354 VN_RELE(ITOV(ip)); 1355 } 1356 1357 update_inode: 1358 ITIMES(VTOI(dvp)); 1359 if (uname != NULL) { 1360 kmem_free(uname, PAGESIZE); 1361 } 1362 if (dname != NULL) { 1363 kmem_free(dname, 1024); 1364 } 1365 1366 return (error); 1367 } 1368 1369 /* ARGSUSED */ 1370 static int32_t 1371 udf_readlink( 1372 struct vnode *vp, 1373 struct uio *uiop, 1374 struct cred *cr, 1375 caller_context_t *ct) 1376 { 1377 int32_t error = 0, off, id_len, size, len; 1378 int8_t *dname = NULL, *uname = NULL; 1379 struct ud_inode *ip; 1380 struct fbuf *fbp = NULL; 1381 struct path_comp *pc; 1382 1383 ud_printf("udf_readlink\n"); 1384 1385 if (vp->v_type != VLNK) { 1386 return (EINVAL); 1387 } 1388 1389 ip = VTOI(vp); 1390 size = ip->i_size; 1391 if (size > PAGESIZE) { 1392 return (EIO); 1393 } 1394 1395 if (size == 0) { 1396 return (0); 1397 } 1398 1399 dname = kmem_zalloc(1024, KM_SLEEP); 1400 uname = kmem_zalloc(PAGESIZE, KM_SLEEP); 1401 1402 rw_enter(&ip->i_contents, RW_READER); 1403 1404 if ((error = fbread(vp, 0, size, S_READ, &fbp)) != 0) { 1405 goto end; 1406 } 1407 1408 off = 0; 1409 1410 while (off < size) { 1411 pc = (struct path_comp *)(fbp->fb_addr + off); 1412 switch (pc->pc_type) { 1413 case 1 : 1414 (void) strcpy(uname, ip->i_udf->udf_fsmnt); 1415 (void) strcat(uname, "/"); 1416 break; 1417 case 2 : 1418 if (pc->pc_len != 0) { 1419 goto end; 1420 } 1421 uname[0] = '/'; 1422 uname[1] = '\0'; 1423 break; 1424 case 3 : 1425 (void) strcat(uname, "../"); 1426 break; 1427 case 4 : 1428 (void) strcat(uname, "./"); 1429 break; 1430 case 5 : 1431 if ((error = ud_uncompress(pc->pc_len, &id_len, 1432 pc->pc_id, (uint8_t *)dname)) != 0) { 1433 break; 1434 } 1435 dname[id_len] = '\0'; 1436 (void) strcat(uname, dname); 1437 (void) strcat(uname, "/"); 1438 break; 1439 default : 1440 error = EINVAL; 1441 goto end; 1442 } 1443 off += 4 + pc->pc_len; 1444 } 1445 len = strlen(uname) - 1; 1446 if (uname[len] == '/') { 1447 if (len == 0) { 1448 /* 1449 * special case link to / 1450 */ 1451 len = 1; 1452 } else { 1453 uname[len] = '\0'; 1454 } 1455 } 1456 1457 error = uiomove(uname, len, UIO_READ, uiop); 1458 1459 ITIMES(ip); 1460 1461 end: 1462 if (fbp != NULL) { 1463 fbrelse(fbp, S_OTHER); 1464 } 1465 rw_exit(&ip->i_contents); 1466 if (uname != NULL) { 1467 kmem_free(uname, PAGESIZE); 1468 } 1469 if (dname != NULL) { 1470 kmem_free(dname, 1024); 1471 } 1472 return (error); 1473 } 1474 1475 /* ARGSUSED */ 1476 static int32_t 1477 udf_fsync( 1478 struct vnode *vp, 1479 int32_t syncflag, 1480 struct cred *cr, 1481 caller_context_t *ct) 1482 { 1483 int32_t error = 0; 1484 struct ud_inode *ip = VTOI(vp); 1485 1486 ud_printf("udf_fsync\n"); 1487 1488 rw_enter(&ip->i_contents, RW_WRITER); 1489 if (!(IS_SWAPVP(vp))) { 1490 error = ud_syncip(ip, 0, I_SYNC); /* Do synchronous writes */ 1491 } 1492 if (error == 0) { 1493 error = ud_sync_indir(ip); 1494 } 1495 ITIMES(ip); /* XXX: is this necessary ??? */ 1496 rw_exit(&ip->i_contents); 1497 1498 return (error); 1499 } 1500 1501 /* ARGSUSED */ 1502 static void 1503 udf_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct) 1504 { 1505 ud_printf("udf_iinactive\n"); 1506 1507 ud_iinactive(VTOI(vp), cr); 1508 } 1509 1510 /* ARGSUSED */ 1511 static int32_t 1512 udf_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 1513 { 1514 struct udf_fid *udfidp; 1515 struct ud_inode *ip = VTOI(vp); 1516 1517 ud_printf("udf_fid\n"); 1518 1519 if (fidp->fid_len < (sizeof (struct udf_fid) - sizeof (uint16_t))) { 1520 fidp->fid_len = sizeof (struct udf_fid) - sizeof (uint16_t); 1521 return (ENOSPC); 1522 } 1523 1524 udfidp = (struct udf_fid *)fidp; 1525 bzero((char *)udfidp, sizeof (struct udf_fid)); 1526 rw_enter(&ip->i_contents, RW_READER); 1527 udfidp->udfid_len = sizeof (struct udf_fid) - sizeof (uint16_t); 1528 udfidp->udfid_uinq_lo = ip->i_uniqid & 0xffffffff; 1529 udfidp->udfid_prn = ip->i_icb_prn; 1530 udfidp->udfid_icb_lbn = ip->i_icb_block; 1531 rw_exit(&ip->i_contents); 1532 1533 return (0); 1534 } 1535 1536 /* ARGSUSED2 */ 1537 static int 1538 udf_rwlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp) 1539 { 1540 struct ud_inode *ip = VTOI(vp); 1541 1542 ud_printf("udf_rwlock\n"); 1543 1544 if (write_lock) { 1545 rw_enter(&ip->i_rwlock, RW_WRITER); 1546 } else { 1547 rw_enter(&ip->i_rwlock, RW_READER); 1548 } 1549 #ifdef __lock_lint 1550 rw_exit(&ip->i_rwlock); 1551 #endif 1552 return (write_lock); 1553 } 1554 1555 /* ARGSUSED */ 1556 static void 1557 udf_rwunlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp) 1558 { 1559 struct ud_inode *ip = VTOI(vp); 1560 1561 ud_printf("udf_rwunlock\n"); 1562 1563 #ifdef __lock_lint 1564 rw_enter(&ip->i_rwlock, RW_WRITER); 1565 #endif 1566 1567 rw_exit(&ip->i_rwlock); 1568 1569 } 1570 1571 /* ARGSUSED */ 1572 static int32_t 1573 udf_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct) 1574 { 1575 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 1576 } 1577 1578 static int32_t 1579 udf_frlock( 1580 struct vnode *vp, 1581 int32_t cmd, 1582 struct flock64 *bfp, 1583 int32_t flag, 1584 offset_t offset, 1585 struct flk_callback *flk_cbp, 1586 cred_t *cr, 1587 caller_context_t *ct) 1588 { 1589 struct ud_inode *ip = VTOI(vp); 1590 1591 ud_printf("udf_frlock\n"); 1592 1593 /* 1594 * If file is being mapped, disallow frlock. 1595 * XXX I am not holding tlock while checking i_mapcnt because the 1596 * current locking strategy drops all locks before calling fs_frlock. 1597 * So, mapcnt could change before we enter fs_frlock making is 1598 * meaningless to have held tlock in the first place. 1599 */ 1600 if ((ip->i_mapcnt > 0) && 1601 (MANDLOCK(vp, ip->i_char))) { 1602 return (EAGAIN); 1603 } 1604 1605 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 1606 } 1607 1608 /*ARGSUSED6*/ 1609 static int32_t 1610 udf_space( 1611 struct vnode *vp, 1612 int32_t cmd, 1613 struct flock64 *bfp, 1614 int32_t flag, 1615 offset_t offset, 1616 cred_t *cr, 1617 caller_context_t *ct) 1618 { 1619 int32_t error = 0; 1620 1621 ud_printf("udf_space\n"); 1622 1623 if (cmd != F_FREESP) { 1624 error = EINVAL; 1625 } else if ((error = convoff(vp, bfp, 0, offset)) == 0) { 1626 error = ud_freesp(vp, bfp, flag, cr); 1627 } 1628 1629 return (error); 1630 } 1631 1632 /* ARGSUSED */ 1633 static int32_t 1634 udf_getpage( 1635 struct vnode *vp, 1636 offset_t off, 1637 size_t len, 1638 uint32_t *protp, 1639 struct page **plarr, 1640 size_t plsz, 1641 struct seg *seg, 1642 caddr_t addr, 1643 enum seg_rw rw, 1644 struct cred *cr, 1645 caller_context_t *ct) 1646 { 1647 struct ud_inode *ip = VTOI(vp); 1648 int32_t error, has_holes, beyond_eof, seqmode, dolock; 1649 int32_t pgsize = PAGESIZE; 1650 struct udf_vfs *udf_vfsp = ip->i_udf; 1651 page_t **pl; 1652 u_offset_t pgoff, eoff, uoff; 1653 krw_t rwtype; 1654 caddr_t pgaddr; 1655 1656 ud_printf("udf_getpage\n"); 1657 1658 uoff = (u_offset_t)off; /* type conversion */ 1659 if (protp) { 1660 *protp = PROT_ALL; 1661 } 1662 if (vp->v_flag & VNOMAP) { 1663 return (ENOSYS); 1664 } 1665 seqmode = ip->i_nextr == uoff && rw != S_CREATE; 1666 1667 rwtype = RW_READER; 1668 dolock = (rw_owner(&ip->i_contents) != curthread); 1669 retrylock: 1670 #ifdef __lock_lint 1671 rw_enter(&ip->i_contents, rwtype); 1672 #else 1673 if (dolock) { 1674 rw_enter(&ip->i_contents, rwtype); 1675 } 1676 #endif 1677 1678 /* 1679 * We may be getting called as a side effect of a bmap using 1680 * fbread() when the blocks might be being allocated and the 1681 * size has not yet been up'ed. In this case we want to be 1682 * able to return zero pages if we get back UDF_HOLE from 1683 * calling bmap for a non write case here. We also might have 1684 * to read some frags from the disk into a page if we are 1685 * extending the number of frags for a given lbn in bmap(). 1686 */ 1687 beyond_eof = uoff + len > ip->i_size + PAGEOFFSET; 1688 if (beyond_eof && seg != segkmap) { 1689 #ifdef __lock_lint 1690 rw_exit(&ip->i_contents); 1691 #else 1692 if (dolock) { 1693 rw_exit(&ip->i_contents); 1694 } 1695 #endif 1696 return (EFAULT); 1697 } 1698 1699 /* 1700 * Must hold i_contents lock throughout the call to pvn_getpages 1701 * since locked pages are returned from each call to ud_getapage. 1702 * Must *not* return locked pages and then try for contents lock 1703 * due to lock ordering requirements (inode > page) 1704 */ 1705 1706 has_holes = ud_bmap_has_holes(ip); 1707 1708 if ((rw == S_WRITE || rw == S_CREATE) && (has_holes || beyond_eof)) { 1709 int32_t blk_size, count; 1710 u_offset_t offset; 1711 1712 /* 1713 * We must acquire the RW_WRITER lock in order to 1714 * call bmap_write(). 1715 */ 1716 if (dolock && rwtype == RW_READER) { 1717 rwtype = RW_WRITER; 1718 1719 if (!rw_tryupgrade(&ip->i_contents)) { 1720 1721 rw_exit(&ip->i_contents); 1722 1723 goto retrylock; 1724 } 1725 } 1726 1727 /* 1728 * May be allocating disk blocks for holes here as 1729 * a result of mmap faults. write(2) does the bmap_write 1730 * in rdip/wrip, not here. We are not dealing with frags 1731 * in this case. 1732 */ 1733 offset = uoff; 1734 while ((offset < uoff + len) && 1735 (offset < ip->i_size)) { 1736 /* 1737 * the variable "bnp" is to simplify the expression for 1738 * the compiler; * just passing in &bn to bmap_write 1739 * causes a compiler "loop" 1740 */ 1741 1742 blk_size = udf_vfsp->udf_lbsize; 1743 if ((offset + blk_size) > ip->i_size) { 1744 count = ip->i_size - offset; 1745 } else { 1746 count = blk_size; 1747 } 1748 error = ud_bmap_write(ip, offset, count, 0, cr); 1749 if (error) { 1750 goto update_inode; 1751 } 1752 offset += count; /* XXX - make this contig */ 1753 } 1754 } 1755 1756 /* 1757 * Can be a reader from now on. 1758 */ 1759 #ifdef __lock_lint 1760 if (rwtype == RW_WRITER) { 1761 rw_downgrade(&ip->i_contents); 1762 } 1763 #else 1764 if (dolock && rwtype == RW_WRITER) { 1765 rw_downgrade(&ip->i_contents); 1766 } 1767 #endif 1768 1769 /* 1770 * We remove PROT_WRITE in cases when the file has UDF holes 1771 * because we don't want to call bmap_read() to check each 1772 * page if it is backed with a disk block. 1773 */ 1774 if (protp && has_holes && rw != S_WRITE && rw != S_CREATE) { 1775 *protp &= ~PROT_WRITE; 1776 } 1777 1778 error = 0; 1779 1780 /* 1781 * The loop looks up pages in the range <off, off + len). 1782 * For each page, we first check if we should initiate an asynchronous 1783 * read ahead before we call page_lookup (we may sleep in page_lookup 1784 * for a previously initiated disk read). 1785 */ 1786 eoff = (uoff + len); 1787 for (pgoff = uoff, pgaddr = addr, pl = plarr; 1788 pgoff < eoff; /* empty */) { 1789 page_t *pp; 1790 u_offset_t nextrio; 1791 se_t se; 1792 1793 se = ((rw == S_CREATE) ? SE_EXCL : SE_SHARED); 1794 1795 /* 1796 * Handle async getpage (faultahead) 1797 */ 1798 if (plarr == NULL) { 1799 ip->i_nextrio = pgoff; 1800 ud_getpage_ra(vp, pgoff, seg, pgaddr); 1801 pgoff += pgsize; 1802 pgaddr += pgsize; 1803 continue; 1804 } 1805 1806 /* 1807 * Check if we should initiate read ahead of next cluster. 1808 * We call page_exists only when we need to confirm that 1809 * we have the current page before we initiate the read ahead. 1810 */ 1811 nextrio = ip->i_nextrio; 1812 if (seqmode && 1813 pgoff + RD_CLUSTSZ(ip) >= nextrio && pgoff <= nextrio && 1814 nextrio < ip->i_size && page_exists(vp, pgoff)) 1815 ud_getpage_ra(vp, pgoff, seg, pgaddr); 1816 1817 if ((pp = page_lookup(vp, pgoff, se)) != NULL) { 1818 1819 /* 1820 * We found the page in the page cache. 1821 */ 1822 *pl++ = pp; 1823 pgoff += pgsize; 1824 pgaddr += pgsize; 1825 len -= pgsize; 1826 plsz -= pgsize; 1827 } else { 1828 1829 /* 1830 * We have to create the page, or read it from disk. 1831 */ 1832 if (error = ud_getpage_miss(vp, pgoff, len, 1833 seg, pgaddr, pl, plsz, rw, seqmode)) { 1834 goto error_out; 1835 } 1836 1837 while (*pl != NULL) { 1838 pl++; 1839 pgoff += pgsize; 1840 pgaddr += pgsize; 1841 len -= pgsize; 1842 plsz -= pgsize; 1843 } 1844 } 1845 } 1846 1847 /* 1848 * Return pages up to plsz if they are in the page cache. 1849 * We cannot return pages if there is a chance that they are 1850 * backed with a UDF hole and rw is S_WRITE or S_CREATE. 1851 */ 1852 if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) { 1853 1854 ASSERT((protp == NULL) || 1855 !(has_holes && (*protp & PROT_WRITE))); 1856 1857 eoff = pgoff + plsz; 1858 while (pgoff < eoff) { 1859 page_t *pp; 1860 1861 if ((pp = page_lookup_nowait(vp, pgoff, 1862 SE_SHARED)) == NULL) 1863 break; 1864 1865 *pl++ = pp; 1866 pgoff += pgsize; 1867 plsz -= pgsize; 1868 } 1869 } 1870 1871 if (plarr) 1872 *pl = NULL; /* Terminate page list */ 1873 ip->i_nextr = pgoff; 1874 1875 error_out: 1876 if (error && plarr) { 1877 /* 1878 * Release any pages we have locked. 1879 */ 1880 while (pl > &plarr[0]) 1881 page_unlock(*--pl); 1882 1883 plarr[0] = NULL; 1884 } 1885 1886 update_inode: 1887 #ifdef __lock_lint 1888 rw_exit(&ip->i_contents); 1889 #else 1890 if (dolock) { 1891 rw_exit(&ip->i_contents); 1892 } 1893 #endif 1894 1895 /* 1896 * If the inode is not already marked for IACC (in rwip() for read) 1897 * and the inode is not marked for no access time update (in rwip() 1898 * for write) then update the inode access time and mod time now. 1899 */ 1900 mutex_enter(&ip->i_tlock); 1901 if ((ip->i_flag & (IACC | INOACC)) == 0) { 1902 if ((rw != S_OTHER) && (ip->i_type != VDIR)) { 1903 ip->i_flag |= IACC; 1904 } 1905 if (rw == S_WRITE) { 1906 ip->i_flag |= IUPD; 1907 } 1908 ITIMES_NOLOCK(ip); 1909 } 1910 mutex_exit(&ip->i_tlock); 1911 1912 return (error); 1913 } 1914 1915 int32_t ud_delay = 1; 1916 1917 /* ARGSUSED */ 1918 static int32_t 1919 udf_putpage( 1920 struct vnode *vp, 1921 offset_t off, 1922 size_t len, 1923 int32_t flags, 1924 struct cred *cr, 1925 caller_context_t *ct) 1926 { 1927 struct ud_inode *ip; 1928 int32_t error = 0; 1929 1930 ud_printf("udf_putpage\n"); 1931 1932 ip = VTOI(vp); 1933 #ifdef __lock_lint 1934 rw_enter(&ip->i_contents, RW_WRITER); 1935 #endif 1936 1937 if (vp->v_count == 0) { 1938 cmn_err(CE_WARN, "ud_putpage : bad v_count"); 1939 error = EINVAL; 1940 goto out; 1941 } 1942 1943 if (vp->v_flag & VNOMAP) { 1944 error = ENOSYS; 1945 goto out; 1946 } 1947 1948 if (flags & B_ASYNC) { 1949 if (ud_delay && len && 1950 (flags & ~(B_ASYNC|B_DONTNEED|B_FREE)) == 0) { 1951 mutex_enter(&ip->i_tlock); 1952 1953 /* 1954 * If nobody stalled, start a new cluster. 1955 */ 1956 if (ip->i_delaylen == 0) { 1957 ip->i_delayoff = off; 1958 ip->i_delaylen = len; 1959 mutex_exit(&ip->i_tlock); 1960 goto out; 1961 } 1962 1963 /* 1964 * If we have a full cluster or they are not contig, 1965 * then push last cluster and start over. 1966 */ 1967 if (ip->i_delaylen >= WR_CLUSTSZ(ip) || 1968 ip->i_delayoff + ip->i_delaylen != off) { 1969 u_offset_t doff; 1970 size_t dlen; 1971 1972 doff = ip->i_delayoff; 1973 dlen = ip->i_delaylen; 1974 ip->i_delayoff = off; 1975 ip->i_delaylen = len; 1976 mutex_exit(&ip->i_tlock); 1977 error = ud_putpages(vp, doff, dlen, flags, cr); 1978 /* LMXXX - flags are new val, not old */ 1979 goto out; 1980 } 1981 1982 /* 1983 * There is something there, it's not full, and 1984 * it is contig. 1985 */ 1986 ip->i_delaylen += len; 1987 mutex_exit(&ip->i_tlock); 1988 goto out; 1989 } 1990 1991 /* 1992 * Must have weird flags or we are not clustering. 1993 */ 1994 } 1995 1996 error = ud_putpages(vp, off, len, flags, cr); 1997 1998 out: 1999 #ifdef __lock_lint 2000 rw_exit(&ip->i_contents); 2001 #endif 2002 return (error); 2003 } 2004 2005 /* ARGSUSED */ 2006 static int32_t 2007 udf_map( 2008 struct vnode *vp, 2009 offset_t off, 2010 struct as *as, 2011 caddr_t *addrp, 2012 size_t len, 2013 uint8_t prot, 2014 uint8_t maxprot, 2015 uint32_t flags, 2016 struct cred *cr, 2017 caller_context_t *ct) 2018 { 2019 struct segvn_crargs vn_a; 2020 int32_t error = 0; 2021 2022 ud_printf("udf_map\n"); 2023 2024 if (vp->v_flag & VNOMAP) { 2025 error = ENOSYS; 2026 goto end; 2027 } 2028 2029 if ((off < (offset_t)0) || 2030 ((off + len) < (offset_t)0)) { 2031 error = EINVAL; 2032 goto end; 2033 } 2034 2035 if (vp->v_type != VREG) { 2036 error = ENODEV; 2037 goto end; 2038 } 2039 2040 /* 2041 * If file is being locked, disallow mapping. 2042 */ 2043 if (vn_has_mandatory_locks(vp, VTOI(vp)->i_char)) { 2044 error = EAGAIN; 2045 goto end; 2046 } 2047 2048 as_rangelock(as); 2049 error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 2050 if (error != 0) { 2051 as_rangeunlock(as); 2052 goto end; 2053 } 2054 2055 vn_a.vp = vp; 2056 vn_a.offset = off; 2057 vn_a.type = flags & MAP_TYPE; 2058 vn_a.prot = prot; 2059 vn_a.maxprot = maxprot; 2060 vn_a.cred = cr; 2061 vn_a.amp = NULL; 2062 vn_a.flags = flags & ~MAP_TYPE; 2063 vn_a.szc = 0; 2064 vn_a.lgrp_mem_policy_flags = 0; 2065 2066 error = as_map(as, *addrp, len, segvn_create, (caddr_t)&vn_a); 2067 as_rangeunlock(as); 2068 2069 end: 2070 return (error); 2071 } 2072 2073 /* ARGSUSED */ 2074 static int32_t 2075 udf_addmap(struct vnode *vp, 2076 offset_t off, 2077 struct as *as, 2078 caddr_t addr, 2079 size_t len, 2080 uint8_t prot, 2081 uint8_t maxprot, 2082 uint32_t flags, 2083 struct cred *cr, 2084 caller_context_t *ct) 2085 { 2086 struct ud_inode *ip = VTOI(vp); 2087 2088 ud_printf("udf_addmap\n"); 2089 2090 if (vp->v_flag & VNOMAP) { 2091 return (ENOSYS); 2092 } 2093 2094 mutex_enter(&ip->i_tlock); 2095 ip->i_mapcnt += btopr(len); 2096 mutex_exit(&ip->i_tlock); 2097 2098 return (0); 2099 } 2100 2101 /* ARGSUSED */ 2102 static int32_t 2103 udf_delmap( 2104 struct vnode *vp, offset_t off, 2105 struct as *as, 2106 caddr_t addr, 2107 size_t len, 2108 uint32_t prot, 2109 uint32_t maxprot, 2110 uint32_t flags, 2111 struct cred *cr, 2112 caller_context_t *ct) 2113 { 2114 struct ud_inode *ip = VTOI(vp); 2115 2116 ud_printf("udf_delmap\n"); 2117 2118 if (vp->v_flag & VNOMAP) { 2119 return (ENOSYS); 2120 } 2121 2122 mutex_enter(&ip->i_tlock); 2123 ip->i_mapcnt -= btopr(len); /* Count released mappings */ 2124 ASSERT(ip->i_mapcnt >= 0); 2125 mutex_exit(&ip->i_tlock); 2126 2127 return (0); 2128 } 2129 2130 /* ARGSUSED */ 2131 static int32_t 2132 udf_l_pathconf( 2133 struct vnode *vp, 2134 int32_t cmd, 2135 ulong_t *valp, 2136 struct cred *cr, 2137 caller_context_t *ct) 2138 { 2139 int32_t error = 0; 2140 2141 ud_printf("udf_l_pathconf\n"); 2142 2143 if (cmd == _PC_FILESIZEBITS) { 2144 /* 2145 * udf supports 64 bits as file size 2146 * but there are several other restrictions 2147 * it only supports 32-bit block numbers and 2148 * daddr32_t is only and int32_t so taking these 2149 * into account we can stay just as where ufs is 2150 */ 2151 *valp = 41; 2152 } else { 2153 error = fs_pathconf(vp, cmd, valp, cr, ct); 2154 } 2155 2156 return (error); 2157 } 2158 2159 uint32_t ud_pageio_reads = 0, ud_pageio_writes = 0; 2160 #ifndef __lint 2161 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_reads)) 2162 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_writes)) 2163 #endif 2164 /* 2165 * Assumption is that there will not be a pageio request 2166 * to a enbedded file 2167 */ 2168 /* ARGSUSED */ 2169 static int32_t 2170 udf_pageio( 2171 struct vnode *vp, 2172 struct page *pp, 2173 u_offset_t io_off, 2174 size_t io_len, 2175 int32_t flags, 2176 struct cred *cr, 2177 caller_context_t *ct) 2178 { 2179 daddr_t bn; 2180 struct buf *bp; 2181 struct ud_inode *ip = VTOI(vp); 2182 int32_t dolock, error = 0, contig, multi_io; 2183 size_t done_len = 0, cur_len = 0; 2184 page_t *npp = NULL, *opp = NULL, *cpp = pp; 2185 2186 if (pp == NULL) { 2187 return (EINVAL); 2188 } 2189 2190 dolock = (rw_owner(&ip->i_contents) != curthread); 2191 2192 /* 2193 * We need a better check. Ideally, we would use another 2194 * vnodeops so that hlocked and forcibly unmounted file 2195 * systems would return EIO where appropriate and w/o the 2196 * need for these checks. 2197 */ 2198 if (ip->i_udf == NULL) { 2199 return (EIO); 2200 } 2201 2202 #ifdef __lock_lint 2203 rw_enter(&ip->i_contents, RW_READER); 2204 #else 2205 if (dolock) { 2206 rw_enter(&ip->i_contents, RW_READER); 2207 } 2208 #endif 2209 2210 /* 2211 * Break the io request into chunks, one for each contiguous 2212 * stretch of disk blocks in the target file. 2213 */ 2214 while (done_len < io_len) { 2215 ASSERT(cpp); 2216 bp = NULL; 2217 contig = 0; 2218 if (error = ud_bmap_read(ip, (u_offset_t)(io_off + done_len), 2219 &bn, &contig)) { 2220 break; 2221 } 2222 2223 if (bn == UDF_HOLE) { /* No holey swapfiles */ 2224 cmn_err(CE_WARN, "SWAP file has HOLES"); 2225 error = EINVAL; 2226 break; 2227 } 2228 2229 cur_len = MIN(io_len - done_len, contig); 2230 2231 /* 2232 * Check if more than one I/O is 2233 * required to complete the given 2234 * I/O operation 2235 */ 2236 if (ip->i_udf->udf_lbsize < PAGESIZE) { 2237 if (cur_len >= PAGESIZE) { 2238 multi_io = 0; 2239 cur_len &= PAGEMASK; 2240 } else { 2241 multi_io = 1; 2242 cur_len = MIN(io_len - done_len, PAGESIZE); 2243 } 2244 } 2245 page_list_break(&cpp, &npp, btop(cur_len)); 2246 2247 bp = pageio_setup(cpp, cur_len, ip->i_devvp, flags); 2248 ASSERT(bp != NULL); 2249 2250 bp->b_edev = ip->i_dev; 2251 bp->b_dev = cmpdev(ip->i_dev); 2252 bp->b_blkno = bn; 2253 bp->b_un.b_addr = (caddr_t)0; 2254 bp->b_file = vp; 2255 bp->b_offset = (offset_t)(io_off + done_len); 2256 2257 /* 2258 * ub.ub_pageios.value.ul++; 2259 */ 2260 if (multi_io == 0) { 2261 (void) bdev_strategy(bp); 2262 } else { 2263 error = ud_multi_strat(ip, cpp, bp, 2264 (u_offset_t)(io_off + done_len)); 2265 if (error != 0) { 2266 pageio_done(bp); 2267 break; 2268 } 2269 } 2270 if (flags & B_READ) { 2271 ud_pageio_reads++; 2272 } else { 2273 ud_pageio_writes++; 2274 } 2275 2276 /* 2277 * If the request is not B_ASYNC, wait for i/o to complete 2278 * and re-assemble the page list to return to the caller. 2279 * If it is B_ASYNC we leave the page list in pieces and 2280 * cleanup() will dispose of them. 2281 */ 2282 if ((flags & B_ASYNC) == 0) { 2283 error = biowait(bp); 2284 pageio_done(bp); 2285 if (error) { 2286 break; 2287 } 2288 page_list_concat(&opp, &cpp); 2289 } 2290 cpp = npp; 2291 npp = NULL; 2292 done_len += cur_len; 2293 } 2294 2295 ASSERT(error || (cpp == NULL && npp == NULL && done_len == io_len)); 2296 if (error) { 2297 if (flags & B_ASYNC) { 2298 /* Cleanup unprocessed parts of list */ 2299 page_list_concat(&cpp, &npp); 2300 if (flags & B_READ) { 2301 pvn_read_done(cpp, B_ERROR); 2302 } else { 2303 pvn_write_done(cpp, B_ERROR); 2304 } 2305 } else { 2306 /* Re-assemble list and let caller clean up */ 2307 page_list_concat(&opp, &cpp); 2308 page_list_concat(&opp, &npp); 2309 } 2310 } 2311 2312 #ifdef __lock_lint 2313 rw_exit(&ip->i_contents); 2314 #else 2315 if (dolock) { 2316 rw_exit(&ip->i_contents); 2317 } 2318 #endif 2319 return (error); 2320 } 2321 2322 2323 2324 2325 /* -------------------- local functions --------------------------- */ 2326 2327 2328 2329 int32_t 2330 ud_rdwri(enum uio_rw rw, int32_t ioflag, 2331 struct ud_inode *ip, caddr_t base, int32_t len, 2332 offset_t offset, enum uio_seg seg, int32_t *aresid, struct cred *cr) 2333 { 2334 int32_t error; 2335 struct uio auio; 2336 struct iovec aiov; 2337 2338 ud_printf("ud_rdwri\n"); 2339 2340 bzero((caddr_t)&auio, sizeof (uio_t)); 2341 bzero((caddr_t)&aiov, sizeof (iovec_t)); 2342 2343 aiov.iov_base = base; 2344 aiov.iov_len = len; 2345 auio.uio_iov = &aiov; 2346 auio.uio_iovcnt = 1; 2347 auio.uio_loffset = offset; 2348 auio.uio_segflg = (int16_t)seg; 2349 auio.uio_resid = len; 2350 2351 if (rw == UIO_WRITE) { 2352 auio.uio_fmode = FWRITE; 2353 auio.uio_extflg = UIO_COPY_DEFAULT; 2354 auio.uio_llimit = curproc->p_fsz_ctl; 2355 error = ud_wrip(ip, &auio, ioflag, cr); 2356 } else { 2357 auio.uio_fmode = FREAD; 2358 auio.uio_extflg = UIO_COPY_CACHED; 2359 auio.uio_llimit = MAXOFFSET_T; 2360 error = ud_rdip(ip, &auio, ioflag, cr); 2361 } 2362 2363 if (aresid) { 2364 *aresid = auio.uio_resid; 2365 } else if (auio.uio_resid) { 2366 error = EIO; 2367 } 2368 return (error); 2369 } 2370 2371 /* 2372 * Free behind hacks. The pager is busted. 2373 * XXX - need to pass the information down to writedone() in a flag like B_SEQ 2374 * or B_FREE_IF_TIGHT_ON_MEMORY. 2375 */ 2376 int32_t ud_freebehind = 1; 2377 int32_t ud_smallfile = 32 * 1024; 2378 2379 /* ARGSUSED */ 2380 int32_t 2381 ud_getpage_miss(struct vnode *vp, u_offset_t off, 2382 size_t len, struct seg *seg, caddr_t addr, page_t *pl[], 2383 size_t plsz, enum seg_rw rw, int32_t seq) 2384 { 2385 struct ud_inode *ip = VTOI(vp); 2386 int32_t err = 0; 2387 size_t io_len; 2388 u_offset_t io_off; 2389 u_offset_t pgoff; 2390 page_t *pp; 2391 2392 pl[0] = NULL; 2393 2394 /* 2395 * Figure out whether the page can be created, or must be 2396 * read from the disk 2397 */ 2398 if (rw == S_CREATE) { 2399 if ((pp = page_create_va(vp, off, 2400 PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 2401 cmn_err(CE_WARN, "ud_getpage_miss: page_create"); 2402 return (EINVAL); 2403 } 2404 io_len = PAGESIZE; 2405 } else { 2406 pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 2407 &io_len, off, PAGESIZE, 0); 2408 2409 /* 2410 * Some other thread has entered the page. 2411 * ud_getpage will retry page_lookup. 2412 */ 2413 if (pp == NULL) { 2414 return (0); 2415 } 2416 2417 /* 2418 * Fill the page with as much data as we can from the file. 2419 */ 2420 err = ud_page_fill(ip, pp, off, B_READ, &pgoff); 2421 if (err) { 2422 pvn_read_done(pp, B_ERROR); 2423 return (err); 2424 } 2425 2426 /* 2427 * XXX ??? ufs has io_len instead of pgoff below 2428 */ 2429 ip->i_nextrio = off + ((pgoff + PAGESIZE - 1) & PAGEMASK); 2430 2431 /* 2432 * If the file access is sequential, initiate read ahead 2433 * of the next cluster. 2434 */ 2435 if (seq && ip->i_nextrio < ip->i_size) { 2436 ud_getpage_ra(vp, off, seg, addr); 2437 } 2438 } 2439 2440 outmiss: 2441 pvn_plist_init(pp, pl, plsz, (offset_t)off, io_len, rw); 2442 return (err); 2443 } 2444 2445 /* ARGSUSED */ 2446 void 2447 ud_getpage_ra(struct vnode *vp, 2448 u_offset_t off, struct seg *seg, caddr_t addr) 2449 { 2450 page_t *pp; 2451 size_t io_len; 2452 struct ud_inode *ip = VTOI(vp); 2453 u_offset_t io_off = ip->i_nextrio, pgoff; 2454 caddr_t addr2 = addr + (io_off - off); 2455 daddr_t bn; 2456 int32_t contig = 0; 2457 2458 /* 2459 * Is this test needed? 2460 */ 2461 2462 if (addr2 >= seg->s_base + seg->s_size) { 2463 return; 2464 } 2465 2466 contig = 0; 2467 if (ud_bmap_read(ip, io_off, &bn, &contig) != 0 || bn == UDF_HOLE) { 2468 return; 2469 } 2470 2471 pp = pvn_read_kluster(vp, io_off, seg, addr2, 2472 &io_off, &io_len, io_off, PAGESIZE, 1); 2473 2474 /* 2475 * Some other thread has entered the page. 2476 * So no read head done here (ie we will have to and wait 2477 * for the read when needed). 2478 */ 2479 2480 if (pp == NULL) { 2481 return; 2482 } 2483 2484 (void) ud_page_fill(ip, pp, io_off, (B_READ|B_ASYNC), &pgoff); 2485 ip->i_nextrio = io_off + ((pgoff + PAGESIZE - 1) & PAGEMASK); 2486 } 2487 2488 int 2489 ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off, 2490 uint32_t bflgs, u_offset_t *pg_off) 2491 { 2492 daddr_t bn; 2493 struct buf *bp; 2494 caddr_t kaddr, caddr; 2495 int32_t error = 0, contig = 0, multi_io = 0; 2496 int32_t lbsize = ip->i_udf->udf_lbsize; 2497 int32_t lbmask = ip->i_udf->udf_lbmask; 2498 uint64_t isize; 2499 2500 isize = (ip->i_size + lbmask) & (~lbmask); 2501 if (ip->i_desc_type == ICB_FLAG_ONE_AD) { 2502 2503 /* 2504 * Embedded file read file_entry 2505 * from buffer cache and copy the required 2506 * portions 2507 */ 2508 bp = ud_bread(ip->i_dev, 2509 ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize); 2510 if ((bp->b_error == 0) && 2511 (bp->b_resid == 0)) { 2512 2513 caddr = bp->b_un.b_addr + ip->i_data_off; 2514 2515 /* 2516 * mapin to kvm 2517 */ 2518 kaddr = (caddr_t)ppmapin(pp, 2519 PROT_READ | PROT_WRITE, (caddr_t)-1); 2520 (void) kcopy(caddr, kaddr, ip->i_size); 2521 2522 /* 2523 * mapout of kvm 2524 */ 2525 ppmapout(kaddr); 2526 } 2527 brelse(bp); 2528 contig = ip->i_size; 2529 } else { 2530 2531 /* 2532 * Get the continuous size and block number 2533 * at offset "off" 2534 */ 2535 if (error = ud_bmap_read(ip, off, &bn, &contig)) 2536 goto out; 2537 contig = MIN(contig, PAGESIZE); 2538 contig = (contig + lbmask) & (~lbmask); 2539 2540 /* 2541 * Zero part of the page which we are not 2542 * going to read from the disk. 2543 */ 2544 2545 if (bn == UDF_HOLE) { 2546 2547 /* 2548 * This is a HOLE. Just zero out 2549 * the page 2550 */ 2551 if (((off + contig) == isize) || 2552 (contig == PAGESIZE)) { 2553 pagezero(pp->p_prev, 0, PAGESIZE); 2554 goto out; 2555 } 2556 } 2557 2558 if (contig < PAGESIZE) { 2559 uint64_t count; 2560 2561 count = isize - off; 2562 if (contig != count) { 2563 multi_io = 1; 2564 contig = (int32_t)(MIN(count, PAGESIZE)); 2565 } else { 2566 pagezero(pp->p_prev, contig, PAGESIZE - contig); 2567 } 2568 } 2569 2570 /* 2571 * Get a bp and initialize it 2572 */ 2573 bp = pageio_setup(pp, contig, ip->i_devvp, bflgs); 2574 ASSERT(bp != NULL); 2575 2576 bp->b_edev = ip->i_dev; 2577 bp->b_dev = cmpdev(ip->i_dev); 2578 bp->b_blkno = bn; 2579 bp->b_un.b_addr = 0; 2580 bp->b_file = ip->i_vnode; 2581 2582 /* 2583 * Start I/O 2584 */ 2585 if (multi_io == 0) { 2586 2587 /* 2588 * Single I/O is sufficient for this page 2589 */ 2590 (void) bdev_strategy(bp); 2591 } else { 2592 2593 /* 2594 * We need to do the I/O in 2595 * piece's 2596 */ 2597 error = ud_multi_strat(ip, pp, bp, off); 2598 if (error != 0) { 2599 goto out; 2600 } 2601 } 2602 if ((bflgs & B_ASYNC) == 0) { 2603 2604 /* 2605 * Wait for i/o to complete. 2606 */ 2607 2608 error = biowait(bp); 2609 pageio_done(bp); 2610 if (error) { 2611 goto out; 2612 } 2613 } 2614 } 2615 if ((off + contig) >= ip->i_size) { 2616 contig = ip->i_size - off; 2617 } 2618 2619 out: 2620 *pg_off = contig; 2621 return (error); 2622 } 2623 2624 int32_t 2625 ud_putpages(struct vnode *vp, offset_t off, 2626 size_t len, int32_t flags, struct cred *cr) 2627 { 2628 struct ud_inode *ip; 2629 page_t *pp; 2630 u_offset_t io_off; 2631 size_t io_len; 2632 u_offset_t eoff; 2633 int32_t err = 0; 2634 int32_t dolock; 2635 2636 ud_printf("ud_putpages\n"); 2637 2638 if (vp->v_count == 0) { 2639 cmn_err(CE_WARN, "ud_putpages: bad v_count"); 2640 return (EINVAL); 2641 } 2642 2643 ip = VTOI(vp); 2644 2645 /* 2646 * Acquire the readers/write inode lock before locking 2647 * any pages in this inode. 2648 * The inode lock is held during i/o. 2649 */ 2650 if (len == 0) { 2651 mutex_enter(&ip->i_tlock); 2652 ip->i_delayoff = ip->i_delaylen = 0; 2653 mutex_exit(&ip->i_tlock); 2654 } 2655 #ifdef __lock_lint 2656 rw_enter(&ip->i_contents, RW_READER); 2657 #else 2658 dolock = (rw_owner(&ip->i_contents) != curthread); 2659 if (dolock) { 2660 rw_enter(&ip->i_contents, RW_READER); 2661 } 2662 #endif 2663 2664 if (!vn_has_cached_data(vp)) { 2665 #ifdef __lock_lint 2666 rw_exit(&ip->i_contents); 2667 #else 2668 if (dolock) { 2669 rw_exit(&ip->i_contents); 2670 } 2671 #endif 2672 return (0); 2673 } 2674 2675 if (len == 0) { 2676 /* 2677 * Search the entire vp list for pages >= off. 2678 */ 2679 err = pvn_vplist_dirty(vp, (u_offset_t)off, ud_putapage, 2680 flags, cr); 2681 } else { 2682 /* 2683 * Loop over all offsets in the range looking for 2684 * pages to deal with. 2685 */ 2686 if ((eoff = blkroundup(ip->i_udf, ip->i_size)) != 0) { 2687 eoff = MIN(off + len, eoff); 2688 } else { 2689 eoff = off + len; 2690 } 2691 2692 for (io_off = off; io_off < eoff; io_off += io_len) { 2693 /* 2694 * If we are not invalidating, synchronously 2695 * freeing or writing pages, use the routine 2696 * page_lookup_nowait() to prevent reclaiming 2697 * them from the free list. 2698 */ 2699 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 2700 pp = page_lookup(vp, io_off, 2701 (flags & (B_INVAL | B_FREE)) ? 2702 SE_EXCL : SE_SHARED); 2703 } else { 2704 pp = page_lookup_nowait(vp, io_off, 2705 (flags & B_FREE) ? SE_EXCL : SE_SHARED); 2706 } 2707 2708 if (pp == NULL || pvn_getdirty(pp, flags) == 0) { 2709 io_len = PAGESIZE; 2710 } else { 2711 2712 err = ud_putapage(vp, pp, 2713 &io_off, &io_len, flags, cr); 2714 if (err != 0) { 2715 break; 2716 } 2717 /* 2718 * "io_off" and "io_len" are returned as 2719 * the range of pages we actually wrote. 2720 * This allows us to skip ahead more quickly 2721 * since several pages may've been dealt 2722 * with by this iteration of the loop. 2723 */ 2724 } 2725 } 2726 } 2727 if (err == 0 && off == 0 && (len == 0 || len >= ip->i_size)) { 2728 /* 2729 * We have just sync'ed back all the pages on 2730 * the inode, turn off the IMODTIME flag. 2731 */ 2732 mutex_enter(&ip->i_tlock); 2733 ip->i_flag &= ~IMODTIME; 2734 mutex_exit(&ip->i_tlock); 2735 } 2736 #ifdef __lock_lint 2737 rw_exit(&ip->i_contents); 2738 #else 2739 if (dolock) { 2740 rw_exit(&ip->i_contents); 2741 } 2742 #endif 2743 return (err); 2744 } 2745 2746 /* ARGSUSED */ 2747 int32_t 2748 ud_putapage(struct vnode *vp, 2749 page_t *pp, u_offset_t *offp, 2750 size_t *lenp, int32_t flags, struct cred *cr) 2751 { 2752 daddr_t bn; 2753 size_t io_len; 2754 struct ud_inode *ip; 2755 int32_t error = 0, contig, multi_io = 0; 2756 struct udf_vfs *udf_vfsp; 2757 u_offset_t off, io_off; 2758 caddr_t kaddr, caddr; 2759 struct buf *bp = NULL; 2760 int32_t lbmask; 2761 uint64_t isize; 2762 int32_t crc_len; 2763 struct file_entry *fe; 2764 2765 ud_printf("ud_putapage\n"); 2766 2767 ip = VTOI(vp); 2768 ASSERT(ip); 2769 ASSERT(RW_LOCK_HELD(&ip->i_contents)); 2770 lbmask = ip->i_udf->udf_lbmask; 2771 isize = (ip->i_size + lbmask) & (~lbmask); 2772 2773 udf_vfsp = ip->i_udf; 2774 ASSERT(udf_vfsp->udf_flags & UDF_FL_RW); 2775 2776 /* 2777 * If the modified time on the inode has not already been 2778 * set elsewhere (e.g. for write/setattr) we set the time now. 2779 * This gives us approximate modified times for mmap'ed files 2780 * which are modified via stores in the user address space. 2781 */ 2782 if (((ip->i_flag & IMODTIME) == 0) || (flags & B_FORCE)) { 2783 mutex_enter(&ip->i_tlock); 2784 ip->i_flag |= IUPD; 2785 ITIMES_NOLOCK(ip); 2786 mutex_exit(&ip->i_tlock); 2787 } 2788 2789 2790 /* 2791 * Align the request to a block boundry (for old file systems), 2792 * and go ask bmap() how contiguous things are for this file. 2793 */ 2794 off = pp->p_offset & ~(offset_t)lbmask; 2795 /* block align it */ 2796 2797 2798 if (ip->i_desc_type == ICB_FLAG_ONE_AD) { 2799 ASSERT(ip->i_size <= ip->i_max_emb); 2800 2801 pp = pvn_write_kluster(vp, pp, &io_off, 2802 &io_len, off, PAGESIZE, flags); 2803 if (io_len == 0) { 2804 io_len = PAGESIZE; 2805 } 2806 2807 bp = ud_bread(ip->i_dev, 2808 ip->i_icb_lbano << udf_vfsp->udf_l2d_shift, 2809 udf_vfsp->udf_lbsize); 2810 fe = (struct file_entry *)bp->b_un.b_addr; 2811 if ((bp->b_flags & B_ERROR) || 2812 (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY, 2813 ip->i_icb_block, 2814 1, udf_vfsp->udf_lbsize) != 0)) { 2815 if (pp != NULL) 2816 pvn_write_done(pp, B_ERROR | B_WRITE | flags); 2817 if (bp->b_flags & B_ERROR) { 2818 error = EIO; 2819 } else { 2820 error = EINVAL; 2821 } 2822 brelse(bp); 2823 return (error); 2824 } 2825 if ((bp->b_error == 0) && 2826 (bp->b_resid == 0)) { 2827 2828 caddr = bp->b_un.b_addr + ip->i_data_off; 2829 kaddr = (caddr_t)ppmapin(pp, 2830 PROT_READ | PROT_WRITE, (caddr_t)-1); 2831 (void) kcopy(kaddr, caddr, ip->i_size); 2832 ppmapout(kaddr); 2833 } 2834 crc_len = ((uint32_t)&((struct file_entry *)0)->fe_spec) + 2835 SWAP_32(fe->fe_len_ear); 2836 crc_len += ip->i_size; 2837 ud_make_tag(ip->i_udf, &fe->fe_tag, 2838 UD_FILE_ENTRY, ip->i_icb_block, crc_len); 2839 2840 bwrite(bp); 2841 2842 if (flags & B_ASYNC) { 2843 pvn_write_done(pp, flags); 2844 } 2845 contig = ip->i_size; 2846 } else { 2847 2848 if (error = ud_bmap_read(ip, off, &bn, &contig)) { 2849 goto out; 2850 } 2851 contig = MIN(contig, PAGESIZE); 2852 contig = (contig + lbmask) & (~lbmask); 2853 2854 if (contig < PAGESIZE) { 2855 uint64_t count; 2856 2857 count = isize - off; 2858 if (contig != count) { 2859 multi_io = 1; 2860 contig = (int32_t)(MIN(count, PAGESIZE)); 2861 } 2862 } 2863 2864 if ((off + contig) > isize) { 2865 contig = isize - off; 2866 } 2867 2868 if (contig > PAGESIZE) { 2869 if (contig & PAGEOFFSET) { 2870 contig &= PAGEMASK; 2871 } 2872 } 2873 2874 pp = pvn_write_kluster(vp, pp, &io_off, 2875 &io_len, off, contig, flags); 2876 if (io_len == 0) { 2877 io_len = PAGESIZE; 2878 } 2879 2880 bp = pageio_setup(pp, contig, ip->i_devvp, B_WRITE | flags); 2881 ASSERT(bp != NULL); 2882 2883 bp->b_edev = ip->i_dev; 2884 bp->b_dev = cmpdev(ip->i_dev); 2885 bp->b_blkno = bn; 2886 bp->b_un.b_addr = 0; 2887 bp->b_file = vp; 2888 bp->b_offset = (offset_t)off; 2889 2890 2891 /* 2892 * write throttle 2893 */ 2894 ASSERT(bp->b_iodone == NULL); 2895 bp->b_iodone = ud_iodone; 2896 mutex_enter(&ip->i_tlock); 2897 ip->i_writes += bp->b_bcount; 2898 mutex_exit(&ip->i_tlock); 2899 2900 if (multi_io == 0) { 2901 2902 (void) bdev_strategy(bp); 2903 } else { 2904 error = ud_multi_strat(ip, pp, bp, off); 2905 if (error != 0) { 2906 goto out; 2907 } 2908 } 2909 2910 if ((flags & B_ASYNC) == 0) { 2911 /* 2912 * Wait for i/o to complete. 2913 */ 2914 error = biowait(bp); 2915 pageio_done(bp); 2916 } 2917 } 2918 2919 if ((flags & B_ASYNC) == 0) { 2920 pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags); 2921 } 2922 2923 pp = NULL; 2924 2925 out: 2926 if (error != 0 && pp != NULL) { 2927 pvn_write_done(pp, B_ERROR | B_WRITE | flags); 2928 } 2929 2930 if (offp) { 2931 *offp = io_off; 2932 } 2933 if (lenp) { 2934 *lenp = io_len; 2935 } 2936 2937 return (error); 2938 } 2939 2940 2941 int32_t 2942 ud_iodone(struct buf *bp) 2943 { 2944 struct ud_inode *ip; 2945 2946 ASSERT((bp->b_pages->p_vnode != NULL) && !(bp->b_flags & B_READ)); 2947 2948 bp->b_iodone = NULL; 2949 2950 ip = VTOI(bp->b_pages->p_vnode); 2951 2952 mutex_enter(&ip->i_tlock); 2953 if (ip->i_writes >= ud_LW) { 2954 if ((ip->i_writes -= bp->b_bcount) <= ud_LW) { 2955 if (ud_WRITES) { 2956 cv_broadcast(&ip->i_wrcv); /* wake all up */ 2957 } 2958 } 2959 } else { 2960 ip->i_writes -= bp->b_bcount; 2961 } 2962 mutex_exit(&ip->i_tlock); 2963 iodone(bp); 2964 return (0); 2965 } 2966 2967 /* ARGSUSED3 */ 2968 int32_t 2969 ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr) 2970 { 2971 struct vnode *vp; 2972 struct udf_vfs *udf_vfsp; 2973 krw_t rwtype; 2974 caddr_t base; 2975 uint32_t flags; 2976 int32_t error, n, on, mapon, dofree; 2977 u_offset_t off; 2978 long oresid = uio->uio_resid; 2979 2980 ASSERT(RW_LOCK_HELD(&ip->i_contents)); 2981 if ((ip->i_type != VREG) && 2982 (ip->i_type != VDIR) && 2983 (ip->i_type != VLNK)) { 2984 return (EIO); 2985 } 2986 2987 if (uio->uio_loffset > MAXOFFSET_T) { 2988 return (0); 2989 } 2990 2991 if ((uio->uio_loffset < (offset_t)0) || 2992 ((uio->uio_loffset + uio->uio_resid) < 0)) { 2993 return (EINVAL); 2994 } 2995 if (uio->uio_resid == 0) { 2996 return (0); 2997 } 2998 2999 vp = ITOV(ip); 3000 udf_vfsp = ip->i_udf; 3001 mutex_enter(&ip->i_tlock); 3002 ip->i_flag |= IACC; 3003 mutex_exit(&ip->i_tlock); 3004 3005 rwtype = (rw_write_held(&ip->i_contents)?RW_WRITER:RW_READER); 3006 3007 do { 3008 offset_t diff; 3009 u_offset_t uoff = uio->uio_loffset; 3010 off = uoff & (offset_t)MAXBMASK; 3011 mapon = (int)(uoff & (offset_t)MAXBOFFSET); 3012 on = (int)blkoff(udf_vfsp, uoff); 3013 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid); 3014 3015 diff = ip->i_size - uoff; 3016 3017 if (diff <= (offset_t)0) { 3018 error = 0; 3019 goto out; 3020 } 3021 if (diff < (offset_t)n) { 3022 n = (int)diff; 3023 } 3024 dofree = ud_freebehind && 3025 ip->i_nextr == (off & PAGEMASK) && 3026 off > ud_smallfile; 3027 3028 #ifndef __lock_lint 3029 if (rwtype == RW_READER) { 3030 rw_exit(&ip->i_contents); 3031 } 3032 #endif 3033 3034 base = segmap_getmapflt(segkmap, vp, (off + mapon), 3035 (uint32_t)n, 1, S_READ); 3036 error = uiomove(base + mapon, (long)n, UIO_READ, uio); 3037 3038 flags = 0; 3039 if (!error) { 3040 /* 3041 * If read a whole block, or read to eof, 3042 * won't need this buffer again soon. 3043 */ 3044 if (n + on == MAXBSIZE && ud_freebehind && dofree && 3045 freemem < lotsfree + pages_before_pager) { 3046 flags = SM_FREE | SM_DONTNEED |SM_ASYNC; 3047 } 3048 /* 3049 * In POSIX SYNC (FSYNC and FDSYNC) read mode, 3050 * we want to make sure that the page which has 3051 * been read, is written on disk if it is dirty. 3052 * And corresponding indirect blocks should also 3053 * be flushed out. 3054 */ 3055 if ((ioflag & FRSYNC) && (ioflag & (FSYNC|FDSYNC))) { 3056 flags &= ~SM_ASYNC; 3057 flags |= SM_WRITE; 3058 } 3059 error = segmap_release(segkmap, base, flags); 3060 } else { 3061 (void) segmap_release(segkmap, base, flags); 3062 } 3063 3064 #ifndef __lock_lint 3065 if (rwtype == RW_READER) { 3066 rw_enter(&ip->i_contents, rwtype); 3067 } 3068 #endif 3069 } while (error == 0 && uio->uio_resid > 0 && n != 0); 3070 out: 3071 /* 3072 * Inode is updated according to this table if FRSYNC is set. 3073 * 3074 * FSYNC FDSYNC(posix.4) 3075 * -------------------------- 3076 * always IATTCHG|IBDWRITE 3077 */ 3078 if (ioflag & FRSYNC) { 3079 if ((ioflag & FSYNC) || 3080 ((ioflag & FDSYNC) && 3081 (ip->i_flag & (IATTCHG|IBDWRITE)))) { 3082 rw_exit(&ip->i_contents); 3083 rw_enter(&ip->i_contents, RW_WRITER); 3084 ud_iupdat(ip, 1); 3085 } 3086 } 3087 /* 3088 * If we've already done a partial read, terminate 3089 * the read but return no error. 3090 */ 3091 if (oresid != uio->uio_resid) { 3092 error = 0; 3093 } 3094 ITIMES(ip); 3095 3096 return (error); 3097 } 3098 3099 int32_t 3100 ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr) 3101 { 3102 caddr_t base; 3103 struct vnode *vp; 3104 struct udf_vfs *udf_vfsp; 3105 uint32_t flags; 3106 int32_t error = 0, iupdat_flag, n, on, mapon, i_size_changed = 0; 3107 int32_t pagecreate, newpage; 3108 uint64_t old_i_size; 3109 u_offset_t off; 3110 long start_resid = uio->uio_resid, premove_resid; 3111 rlim64_t limit = uio->uio_limit; 3112 3113 3114 ASSERT(RW_WRITE_HELD(&ip->i_contents)); 3115 if ((ip->i_type != VREG) && 3116 (ip->i_type != VDIR) && 3117 (ip->i_type != VLNK)) { 3118 return (EIO); 3119 } 3120 3121 if (uio->uio_loffset >= MAXOFFSET_T) { 3122 return (EFBIG); 3123 } 3124 /* 3125 * see udf_l_pathconf 3126 */ 3127 if (limit > (((uint64_t)1 << 40) - 1)) { 3128 limit = ((uint64_t)1 << 40) - 1; 3129 } 3130 if (uio->uio_loffset >= limit) { 3131 proc_t *p = ttoproc(curthread); 3132 3133 mutex_enter(&p->p_lock); 3134 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, 3135 p, RCA_UNSAFE_SIGINFO); 3136 mutex_exit(&p->p_lock); 3137 return (EFBIG); 3138 } 3139 if ((uio->uio_loffset < (offset_t)0) || 3140 ((uio->uio_loffset + uio->uio_resid) < 0)) { 3141 return (EINVAL); 3142 } 3143 if (uio->uio_resid == 0) { 3144 return (0); 3145 } 3146 3147 mutex_enter(&ip->i_tlock); 3148 ip->i_flag |= INOACC; 3149 3150 if (ioflag & (FSYNC | FDSYNC)) { 3151 ip->i_flag |= ISYNC; 3152 iupdat_flag = 1; 3153 } 3154 mutex_exit(&ip->i_tlock); 3155 3156 udf_vfsp = ip->i_udf; 3157 vp = ITOV(ip); 3158 3159 do { 3160 u_offset_t uoff = uio->uio_loffset; 3161 off = uoff & (offset_t)MAXBMASK; 3162 mapon = (int)(uoff & (offset_t)MAXBOFFSET); 3163 on = (int)blkoff(udf_vfsp, uoff); 3164 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid); 3165 3166 if (ip->i_type == VREG && uoff + n >= limit) { 3167 if (uoff >= limit) { 3168 error = EFBIG; 3169 goto out; 3170 } 3171 n = (int)(limit - (rlim64_t)uoff); 3172 } 3173 if (uoff + n > ip->i_size) { 3174 /* 3175 * We are extending the length of the file. 3176 * bmap is used so that we are sure that 3177 * if we need to allocate new blocks, that it 3178 * is done here before we up the file size. 3179 */ 3180 error = ud_bmap_write(ip, uoff, 3181 (int)(on + n), mapon == 0, cr); 3182 if (error) { 3183 break; 3184 } 3185 i_size_changed = 1; 3186 old_i_size = ip->i_size; 3187 ip->i_size = uoff + n; 3188 /* 3189 * If we are writing from the beginning of 3190 * the mapping, we can just create the 3191 * pages without having to read them. 3192 */ 3193 pagecreate = (mapon == 0); 3194 } else if (n == MAXBSIZE) { 3195 /* 3196 * Going to do a whole mappings worth, 3197 * so we can just create the pages w/o 3198 * having to read them in. But before 3199 * we do that, we need to make sure any 3200 * needed blocks are allocated first. 3201 */ 3202 error = ud_bmap_write(ip, uoff, 3203 (int)(on + n), 1, cr); 3204 if (error) { 3205 break; 3206 } 3207 pagecreate = 1; 3208 } else { 3209 pagecreate = 0; 3210 } 3211 3212 rw_exit(&ip->i_contents); 3213 3214 /* 3215 * Touch the page and fault it in if it is not in 3216 * core before segmap_getmapflt can lock it. This 3217 * is to avoid the deadlock if the buffer is mapped 3218 * to the same file through mmap which we want to 3219 * write to. 3220 */ 3221 uio_prefaultpages((long)n, uio); 3222 3223 base = segmap_getmapflt(segkmap, vp, (off + mapon), 3224 (uint32_t)n, !pagecreate, S_WRITE); 3225 3226 /* 3227 * segmap_pagecreate() returns 1 if it calls 3228 * page_create_va() to allocate any pages. 3229 */ 3230 newpage = 0; 3231 if (pagecreate) { 3232 newpage = segmap_pagecreate(segkmap, base, 3233 (size_t)n, 0); 3234 } 3235 3236 premove_resid = uio->uio_resid; 3237 error = uiomove(base + mapon, (long)n, UIO_WRITE, uio); 3238 3239 if (pagecreate && 3240 uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) { 3241 /* 3242 * We created pages w/o initializing them completely, 3243 * thus we need to zero the part that wasn't set up. 3244 * This happens on most EOF write cases and if 3245 * we had some sort of error during the uiomove. 3246 */ 3247 int nzero, nmoved; 3248 3249 nmoved = (int)(uio->uio_loffset - (off + mapon)); 3250 ASSERT(nmoved >= 0 && nmoved <= n); 3251 nzero = roundup(on + n, PAGESIZE) - nmoved; 3252 ASSERT(nzero > 0 && mapon + nmoved + nzero <= MAXBSIZE); 3253 (void) kzero(base + mapon + nmoved, (uint32_t)nzero); 3254 } 3255 3256 /* 3257 * Unlock the pages allocated by page_create_va() 3258 * in segmap_pagecreate() 3259 */ 3260 if (newpage) { 3261 segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE); 3262 } 3263 3264 if (error) { 3265 /* 3266 * If we failed on a write, we may have already 3267 * allocated file blocks as well as pages. It's 3268 * hard to undo the block allocation, but we must 3269 * be sure to invalidate any pages that may have 3270 * been allocated. 3271 */ 3272 (void) segmap_release(segkmap, base, SM_INVAL); 3273 } else { 3274 flags = 0; 3275 /* 3276 * Force write back for synchronous write cases. 3277 */ 3278 if ((ioflag & (FSYNC|FDSYNC)) || ip->i_type == VDIR) { 3279 /* 3280 * If the sticky bit is set but the 3281 * execute bit is not set, we do a 3282 * synchronous write back and free 3283 * the page when done. We set up swap 3284 * files to be handled this way to 3285 * prevent servers from keeping around 3286 * the client's swap pages too long. 3287 * XXX - there ought to be a better way. 3288 */ 3289 if (IS_SWAPVP(vp)) { 3290 flags = SM_WRITE | SM_FREE | 3291 SM_DONTNEED; 3292 iupdat_flag = 0; 3293 } else { 3294 flags = SM_WRITE; 3295 } 3296 } else if (((mapon + n) == MAXBSIZE) || 3297 IS_SWAPVP(vp)) { 3298 /* 3299 * Have written a whole block. 3300 * Start an asynchronous write and 3301 * mark the buffer to indicate that 3302 * it won't be needed again soon. 3303 */ 3304 flags = SM_WRITE |SM_ASYNC | SM_DONTNEED; 3305 } 3306 error = segmap_release(segkmap, base, flags); 3307 3308 /* 3309 * If the operation failed and is synchronous, 3310 * then we need to unwind what uiomove() last 3311 * did so we can potentially return an error to 3312 * the caller. If this write operation was 3313 * done in two pieces and the first succeeded, 3314 * then we won't return an error for the second 3315 * piece that failed. However, we only want to 3316 * return a resid value that reflects what was 3317 * really done. 3318 * 3319 * Failures for non-synchronous operations can 3320 * be ignored since the page subsystem will 3321 * retry the operation until it succeeds or the 3322 * file system is unmounted. 3323 */ 3324 if (error) { 3325 if ((ioflag & (FSYNC | FDSYNC)) || 3326 ip->i_type == VDIR) { 3327 uio->uio_resid = premove_resid; 3328 } else { 3329 error = 0; 3330 } 3331 } 3332 } 3333 3334 /* 3335 * Re-acquire contents lock. 3336 */ 3337 rw_enter(&ip->i_contents, RW_WRITER); 3338 /* 3339 * If the uiomove() failed or if a synchronous 3340 * page push failed, fix up i_size. 3341 */ 3342 if (error) { 3343 if (i_size_changed) { 3344 /* 3345 * The uiomove failed, and we 3346 * allocated blocks,so get rid 3347 * of them. 3348 */ 3349 (void) ud_itrunc(ip, old_i_size, 0, cr); 3350 } 3351 } else { 3352 /* 3353 * XXX - Can this be out of the loop? 3354 */ 3355 ip->i_flag |= IUPD | ICHG; 3356 if (i_size_changed) { 3357 ip->i_flag |= IATTCHG; 3358 } 3359 if ((ip->i_perm & (IEXEC | (IEXEC >> 5) | 3360 (IEXEC >> 10))) != 0 && 3361 (ip->i_char & (ISUID | ISGID)) != 0 && 3362 secpolicy_vnode_setid_retain(cr, 3363 (ip->i_char & ISUID) != 0 && ip->i_uid == 0) != 0) { 3364 /* 3365 * Clear Set-UID & Set-GID bits on 3366 * successful write if not privileged 3367 * and at least one of the execute bits 3368 * is set. If we always clear Set-GID, 3369 * mandatory file and record locking is 3370 * unuseable. 3371 */ 3372 ip->i_char &= ~(ISUID | ISGID); 3373 } 3374 } 3375 } while (error == 0 && uio->uio_resid > 0 && n != 0); 3376 3377 out: 3378 /* 3379 * Inode is updated according to this table - 3380 * 3381 * FSYNC FDSYNC(posix.4) 3382 * -------------------------- 3383 * always@ IATTCHG|IBDWRITE 3384 * 3385 * @ - If we are doing synchronous write the only time we should 3386 * not be sync'ing the ip here is if we have the stickyhack 3387 * activated, the file is marked with the sticky bit and 3388 * no exec bit, the file length has not been changed and 3389 * no new blocks have been allocated during this write. 3390 */ 3391 if ((ip->i_flag & ISYNC) != 0) { 3392 /* 3393 * we have eliminated nosync 3394 */ 3395 if ((ip->i_flag & (IATTCHG|IBDWRITE)) || 3396 ((ioflag & FSYNC) && iupdat_flag)) { 3397 ud_iupdat(ip, 1); 3398 } 3399 } 3400 3401 /* 3402 * If we've already done a partial-write, terminate 3403 * the write but return no error. 3404 */ 3405 if (start_resid != uio->uio_resid) { 3406 error = 0; 3407 } 3408 ip->i_flag &= ~(INOACC | ISYNC); 3409 ITIMES_NOLOCK(ip); 3410 3411 return (error); 3412 } 3413 3414 int32_t 3415 ud_multi_strat(struct ud_inode *ip, 3416 page_t *pp, struct buf *bp, u_offset_t start) 3417 { 3418 daddr_t bn; 3419 int32_t error = 0, io_count, contig, alloc_sz, i; 3420 uint32_t io_off; 3421 mio_master_t *mm = NULL; 3422 mio_slave_t *ms = NULL; 3423 struct buf *rbp; 3424 3425 ASSERT(!(start & PAGEOFFSET)); 3426 3427 /* 3428 * Figure out how many buffers to allocate 3429 */ 3430 io_count = 0; 3431 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) { 3432 contig = 0; 3433 if (error = ud_bmap_read(ip, (u_offset_t)(start + io_off), 3434 &bn, &contig)) { 3435 goto end; 3436 } 3437 if (contig == 0) { 3438 goto end; 3439 } 3440 contig = MIN(contig, PAGESIZE - io_off); 3441 if (bn != UDF_HOLE) { 3442 io_count ++; 3443 } else { 3444 /* 3445 * HOLE 3446 */ 3447 if (bp->b_flags & B_READ) { 3448 3449 /* 3450 * This is a hole and is read 3451 * it should be filled with 0's 3452 */ 3453 pagezero(pp, io_off, contig); 3454 } 3455 } 3456 } 3457 3458 3459 if (io_count != 0) { 3460 3461 /* 3462 * Allocate memory for all the 3463 * required number of buffers 3464 */ 3465 alloc_sz = sizeof (mio_master_t) + 3466 (sizeof (mio_slave_t) * io_count); 3467 mm = (mio_master_t *)kmem_zalloc(alloc_sz, KM_SLEEP); 3468 if (mm == NULL) { 3469 error = ENOMEM; 3470 goto end; 3471 } 3472 3473 /* 3474 * initialize master 3475 */ 3476 mutex_init(&mm->mm_mutex, NULL, MUTEX_DEFAULT, NULL); 3477 mm->mm_size = alloc_sz; 3478 mm->mm_bp = bp; 3479 mm->mm_resid = 0; 3480 mm->mm_error = 0; 3481 mm->mm_index = master_index++; 3482 3483 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t)); 3484 3485 /* 3486 * Initialize buffers 3487 */ 3488 io_count = 0; 3489 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) { 3490 contig = 0; 3491 if (error = ud_bmap_read(ip, 3492 (u_offset_t)(start + io_off), 3493 &bn, &contig)) { 3494 goto end; 3495 } 3496 ASSERT(contig); 3497 if ((io_off + contig) > bp->b_bcount) { 3498 contig = bp->b_bcount - io_off; 3499 } 3500 if (bn != UDF_HOLE) { 3501 /* 3502 * Clone the buffer 3503 * and prepare to start I/O 3504 */ 3505 ms->ms_ptr = mm; 3506 bioinit(&ms->ms_buf); 3507 rbp = bioclone(bp, io_off, (size_t)contig, 3508 bp->b_edev, bn, ud_slave_done, 3509 &ms->ms_buf, KM_NOSLEEP); 3510 ASSERT(rbp == &ms->ms_buf); 3511 mm->mm_resid += contig; 3512 io_count++; 3513 ms ++; 3514 } 3515 } 3516 3517 /* 3518 * Start I/O's 3519 */ 3520 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t)); 3521 for (i = 0; i < io_count; i++) { 3522 (void) bdev_strategy(&ms->ms_buf); 3523 ms ++; 3524 } 3525 } 3526 3527 end: 3528 if (error != 0) { 3529 bp->b_flags |= B_ERROR; 3530 bp->b_error = error; 3531 if (mm != NULL) { 3532 mutex_destroy(&mm->mm_mutex); 3533 kmem_free(mm, mm->mm_size); 3534 } 3535 } 3536 return (error); 3537 } 3538 3539 int32_t 3540 ud_slave_done(struct buf *bp) 3541 { 3542 mio_master_t *mm; 3543 int32_t resid; 3544 3545 ASSERT(SEMA_HELD(&bp->b_sem)); 3546 ASSERT((bp->b_flags & B_DONE) == 0); 3547 3548 mm = ((mio_slave_t *)bp)->ms_ptr; 3549 3550 /* 3551 * Propagate error and byte count info from slave struct to 3552 * the master struct 3553 */ 3554 mutex_enter(&mm->mm_mutex); 3555 if (bp->b_flags & B_ERROR) { 3556 3557 /* 3558 * If multiple slave buffers get 3559 * error we forget the old errors 3560 * this is ok because we any way 3561 * cannot return multiple errors 3562 */ 3563 mm->mm_error = bp->b_error; 3564 } 3565 mm->mm_resid -= bp->b_bcount; 3566 resid = mm->mm_resid; 3567 mutex_exit(&mm->mm_mutex); 3568 3569 /* 3570 * free up the resources allocated to cloned buffers. 3571 */ 3572 bp_mapout(bp); 3573 biofini(bp); 3574 3575 if (resid == 0) { 3576 3577 /* 3578 * This is the last I/O operation 3579 * clean up and return the original buffer 3580 */ 3581 if (mm->mm_error) { 3582 mm->mm_bp->b_flags |= B_ERROR; 3583 mm->mm_bp->b_error = mm->mm_error; 3584 } 3585 biodone(mm->mm_bp); 3586 mutex_destroy(&mm->mm_mutex); 3587 kmem_free(mm, mm->mm_size); 3588 } 3589 return (0); 3590 } 3591