1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/t_lock.h> 28 #include <sys/param.h> 29 #include <sys/time.h> 30 #include <sys/systm.h> 31 #include <sys/sysmacros.h> 32 #include <sys/resource.h> 33 #include <sys/signal.h> 34 #include <sys/cred.h> 35 #include <sys/user.h> 36 #include <sys/buf.h> 37 #include <sys/vfs.h> 38 #include <sys/vfs_opreg.h> 39 #include <sys/stat.h> 40 #include <sys/vnode.h> 41 #include <sys/mode.h> 42 #include <sys/proc.h> 43 #include <sys/disp.h> 44 #include <sys/file.h> 45 #include <sys/fcntl.h> 46 #include <sys/flock.h> 47 #include <sys/kmem.h> 48 #include <sys/uio.h> 49 #include <sys/dnlc.h> 50 #include <sys/conf.h> 51 #include <sys/errno.h> 52 #include <sys/mman.h> 53 #include <sys/fbuf.h> 54 #include <sys/pathname.h> 55 #include <sys/debug.h> 56 #include <sys/vmsystm.h> 57 #include <sys/cmn_err.h> 58 #include <sys/dirent.h> 59 #include <sys/errno.h> 60 #include <sys/modctl.h> 61 #include <sys/statvfs.h> 62 #include <sys/mount.h> 63 #include <sys/sunddi.h> 64 #include <sys/bootconf.h> 65 #include <sys/policy.h> 66 67 #include <vm/hat.h> 68 #include <vm/page.h> 69 #include <vm/pvn.h> 70 #include <vm/as.h> 71 #include <vm/seg.h> 72 #include <vm/seg_map.h> 73 #include <vm/seg_kmem.h> 74 #include <vm/seg_vn.h> 75 #include <vm/rm.h> 76 #include <vm/page.h> 77 #include <sys/swap.h> 78 79 #include <fs/fs_subr.h> 80 81 #include <sys/fs/udf_volume.h> 82 #include <sys/fs/udf_inode.h> 83 84 static int32_t udf_open(struct vnode **, 85 int32_t, struct cred *, caller_context_t *); 86 static int32_t udf_close(struct vnode *, 87 int32_t, int32_t, offset_t, struct cred *, caller_context_t *); 88 static int32_t udf_read(struct vnode *, 89 struct uio *, int32_t, struct cred *, caller_context_t *); 90 static int32_t udf_write(struct vnode *, 91 struct uio *, int32_t, struct cred *, caller_context_t *); 92 static int32_t udf_ioctl(struct vnode *, 93 int32_t, intptr_t, int32_t, struct cred *, int32_t *, 94 caller_context_t *); 95 static int32_t udf_getattr(struct vnode *, 96 struct vattr *, int32_t, struct cred *, caller_context_t *); 97 static int32_t udf_setattr(struct vnode *, 98 struct vattr *, int32_t, struct cred *, caller_context_t *); 99 static int32_t udf_access(struct vnode *, 100 int32_t, int32_t, struct cred *, caller_context_t *); 101 static int32_t udf_lookup(struct vnode *, 102 char *, struct vnode **, struct pathname *, 103 int32_t, struct vnode *, struct cred *, 104 caller_context_t *, int *, pathname_t *); 105 static int32_t udf_create(struct vnode *, 106 char *, struct vattr *, enum vcexcl, 107 int32_t, struct vnode **, struct cred *, int32_t, 108 caller_context_t *, vsecattr_t *); 109 static int32_t udf_remove(struct vnode *, 110 char *, struct cred *, caller_context_t *, int); 111 static int32_t udf_link(struct vnode *, 112 struct vnode *, char *, struct cred *, caller_context_t *, int); 113 static int32_t udf_rename(struct vnode *, 114 char *, struct vnode *, char *, struct cred *, caller_context_t *, int); 115 static int32_t udf_mkdir(struct vnode *, 116 char *, struct vattr *, struct vnode **, struct cred *, 117 caller_context_t *, int, vsecattr_t *); 118 static int32_t udf_rmdir(struct vnode *, 119 char *, struct vnode *, struct cred *, caller_context_t *, int); 120 static int32_t udf_readdir(struct vnode *, 121 struct uio *, struct cred *, int32_t *, caller_context_t *, int); 122 static int32_t udf_symlink(struct vnode *, 123 char *, struct vattr *, char *, struct cred *, caller_context_t *, int); 124 static int32_t udf_readlink(struct vnode *, 125 struct uio *, struct cred *, caller_context_t *); 126 static int32_t udf_fsync(struct vnode *, 127 int32_t, struct cred *, caller_context_t *); 128 static void udf_inactive(struct vnode *, 129 struct cred *, caller_context_t *); 130 static int32_t udf_fid(struct vnode *, struct fid *, caller_context_t *); 131 static int udf_rwlock(struct vnode *, int32_t, caller_context_t *); 132 static void udf_rwunlock(struct vnode *, int32_t, caller_context_t *); 133 static int32_t udf_seek(struct vnode *, offset_t, offset_t *, 134 caller_context_t *); 135 static int32_t udf_frlock(struct vnode *, int32_t, 136 struct flock64 *, int32_t, offset_t, struct flk_callback *, cred_t *, 137 caller_context_t *); 138 static int32_t udf_space(struct vnode *, int32_t, 139 struct flock64 *, int32_t, offset_t, cred_t *, caller_context_t *); 140 static int32_t udf_getpage(struct vnode *, offset_t, 141 size_t, uint32_t *, struct page **, size_t, 142 struct seg *, caddr_t, enum seg_rw, struct cred *, caller_context_t *); 143 static int32_t udf_putpage(struct vnode *, offset_t, 144 size_t, int32_t, struct cred *, caller_context_t *); 145 static int32_t udf_map(struct vnode *, offset_t, struct as *, 146 caddr_t *, size_t, uint8_t, uint8_t, uint32_t, struct cred *, 147 caller_context_t *); 148 static int32_t udf_addmap(struct vnode *, offset_t, struct as *, 149 caddr_t, size_t, uint8_t, uint8_t, uint32_t, struct cred *, 150 caller_context_t *); 151 static int32_t udf_delmap(struct vnode *, offset_t, struct as *, 152 caddr_t, size_t, uint32_t, uint32_t, uint32_t, struct cred *, 153 caller_context_t *); 154 static int32_t udf_l_pathconf(struct vnode *, int32_t, 155 ulong_t *, struct cred *, caller_context_t *); 156 static int32_t udf_pageio(struct vnode *, struct page *, 157 u_offset_t, size_t, int32_t, struct cred *, caller_context_t *); 158 159 int32_t ud_getpage_miss(struct vnode *, u_offset_t, 160 size_t, struct seg *, caddr_t, page_t *pl[], 161 size_t, enum seg_rw, int32_t); 162 void ud_getpage_ra(struct vnode *, u_offset_t, struct seg *, caddr_t); 163 int32_t ud_putpages(struct vnode *, offset_t, size_t, int32_t, struct cred *); 164 int32_t ud_page_fill(struct ud_inode *, page_t *, 165 u_offset_t, uint32_t, u_offset_t *); 166 int32_t ud_iodone(struct buf *); 167 int32_t ud_rdip(struct ud_inode *, struct uio *, int32_t, cred_t *); 168 int32_t ud_wrip(struct ud_inode *, struct uio *, int32_t, cred_t *); 169 int32_t ud_multi_strat(struct ud_inode *, page_t *, struct buf *, u_offset_t); 170 int32_t ud_slave_done(struct buf *); 171 172 /* 173 * Structures to control multiple IO operations to get or put pages 174 * that are backed by discontiguous blocks. The master struct is 175 * a dummy that holds the original bp from pageio_setup. The 176 * slave struct holds the working bp's to do the actual IO. Once 177 * all the slave IOs complete. The master is processed as if a single 178 * IO op has completed. 179 */ 180 uint32_t master_index = 0; 181 typedef struct mio_master { 182 kmutex_t mm_mutex; /* protect the fields below */ 183 int32_t mm_size; 184 buf_t *mm_bp; /* original bp */ 185 int32_t mm_resid; /* bytes remaining to transfer */ 186 int32_t mm_error; /* accumulated error from slaves */ 187 int32_t mm_index; /* XXX debugging */ 188 } mio_master_t; 189 190 typedef struct mio_slave { 191 buf_t ms_buf; /* working buffer for this IO chunk */ 192 mio_master_t *ms_ptr; /* pointer to master */ 193 } mio_slave_t; 194 195 struct vnodeops *udf_vnodeops; 196 197 const fs_operation_def_t udf_vnodeops_template[] = { 198 VOPNAME_OPEN, { .vop_open = udf_open }, 199 VOPNAME_CLOSE, { .vop_close = udf_close }, 200 VOPNAME_READ, { .vop_read = udf_read }, 201 VOPNAME_WRITE, { .vop_write = udf_write }, 202 VOPNAME_IOCTL, { .vop_ioctl = udf_ioctl }, 203 VOPNAME_GETATTR, { .vop_getattr = udf_getattr }, 204 VOPNAME_SETATTR, { .vop_setattr = udf_setattr }, 205 VOPNAME_ACCESS, { .vop_access = udf_access }, 206 VOPNAME_LOOKUP, { .vop_lookup = udf_lookup }, 207 VOPNAME_CREATE, { .vop_create = udf_create }, 208 VOPNAME_REMOVE, { .vop_remove = udf_remove }, 209 VOPNAME_LINK, { .vop_link = udf_link }, 210 VOPNAME_RENAME, { .vop_rename = udf_rename }, 211 VOPNAME_MKDIR, { .vop_mkdir = udf_mkdir }, 212 VOPNAME_RMDIR, { .vop_rmdir = udf_rmdir }, 213 VOPNAME_READDIR, { .vop_readdir = udf_readdir }, 214 VOPNAME_SYMLINK, { .vop_symlink = udf_symlink }, 215 VOPNAME_READLINK, { .vop_readlink = udf_readlink }, 216 VOPNAME_FSYNC, { .vop_fsync = udf_fsync }, 217 VOPNAME_INACTIVE, { .vop_inactive = udf_inactive }, 218 VOPNAME_FID, { .vop_fid = udf_fid }, 219 VOPNAME_RWLOCK, { .vop_rwlock = udf_rwlock }, 220 VOPNAME_RWUNLOCK, { .vop_rwunlock = udf_rwunlock }, 221 VOPNAME_SEEK, { .vop_seek = udf_seek }, 222 VOPNAME_FRLOCK, { .vop_frlock = udf_frlock }, 223 VOPNAME_SPACE, { .vop_space = udf_space }, 224 VOPNAME_GETPAGE, { .vop_getpage = udf_getpage }, 225 VOPNAME_PUTPAGE, { .vop_putpage = udf_putpage }, 226 VOPNAME_MAP, { .vop_map = udf_map }, 227 VOPNAME_ADDMAP, { .vop_addmap = udf_addmap }, 228 VOPNAME_DELMAP, { .vop_delmap = udf_delmap }, 229 VOPNAME_PATHCONF, { .vop_pathconf = udf_l_pathconf }, 230 VOPNAME_PAGEIO, { .vop_pageio = udf_pageio }, 231 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 232 NULL, NULL 233 }; 234 235 /* ARGSUSED */ 236 static int32_t 237 udf_open( 238 struct vnode **vpp, 239 int32_t flag, 240 struct cred *cr, 241 caller_context_t *ct) 242 { 243 ud_printf("udf_open\n"); 244 245 return (0); 246 } 247 248 /* ARGSUSED */ 249 static int32_t 250 udf_close( 251 struct vnode *vp, 252 int32_t flag, 253 int32_t count, 254 offset_t offset, 255 struct cred *cr, 256 caller_context_t *ct) 257 { 258 struct ud_inode *ip = VTOI(vp); 259 260 ud_printf("udf_close\n"); 261 262 ITIMES(ip); 263 264 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 265 cleanshares(vp, ttoproc(curthread)->p_pid); 266 267 /* 268 * Push partially filled cluster at last close. 269 * ``last close'' is approximated because the dnlc 270 * may have a hold on the vnode. 271 */ 272 if (vp->v_count <= 2 && vp->v_type != VBAD) { 273 struct ud_inode *ip = VTOI(vp); 274 if (ip->i_delaylen) { 275 (void) ud_putpages(vp, ip->i_delayoff, ip->i_delaylen, 276 B_ASYNC | B_FREE, cr); 277 ip->i_delaylen = 0; 278 } 279 } 280 281 return (0); 282 } 283 284 /* ARGSUSED */ 285 static int32_t 286 udf_read( 287 struct vnode *vp, 288 struct uio *uiop, 289 int32_t ioflag, 290 struct cred *cr, 291 caller_context_t *ct) 292 { 293 struct ud_inode *ip = VTOI(vp); 294 int32_t error; 295 296 ud_printf("udf_read\n"); 297 298 #ifdef __lock_lint 299 rw_enter(&ip->i_rwlock, RW_READER); 300 #endif 301 302 ASSERT(RW_READ_HELD(&ip->i_rwlock)); 303 304 if (MANDLOCK(vp, ip->i_char)) { 305 /* 306 * udf_getattr ends up being called by chklock 307 */ 308 error = chklock(vp, FREAD, uiop->uio_loffset, 309 uiop->uio_resid, uiop->uio_fmode, ct); 310 if (error) { 311 goto end; 312 } 313 } 314 315 rw_enter(&ip->i_contents, RW_READER); 316 error = ud_rdip(ip, uiop, ioflag, cr); 317 rw_exit(&ip->i_contents); 318 319 end: 320 #ifdef __lock_lint 321 rw_exit(&ip->i_rwlock); 322 #endif 323 324 return (error); 325 } 326 327 328 int32_t ud_WRITES = 1; 329 int32_t ud_HW = 96 * 1024; 330 int32_t ud_LW = 64 * 1024; 331 int32_t ud_throttles = 0; 332 333 /* ARGSUSED */ 334 static int32_t 335 udf_write( 336 struct vnode *vp, 337 struct uio *uiop, 338 int32_t ioflag, 339 struct cred *cr, 340 caller_context_t *ct) 341 { 342 struct ud_inode *ip = VTOI(vp); 343 int32_t error = 0; 344 345 ud_printf("udf_write\n"); 346 347 #ifdef __lock_lint 348 rw_enter(&ip->i_rwlock, RW_WRITER); 349 #endif 350 351 ASSERT(RW_WRITE_HELD(&ip->i_rwlock)); 352 353 if (MANDLOCK(vp, ip->i_char)) { 354 /* 355 * ud_getattr ends up being called by chklock 356 */ 357 error = chklock(vp, FWRITE, uiop->uio_loffset, 358 uiop->uio_resid, uiop->uio_fmode, ct); 359 if (error) { 360 goto end; 361 } 362 } 363 /* 364 * Throttle writes. 365 */ 366 mutex_enter(&ip->i_tlock); 367 if (ud_WRITES && (ip->i_writes > ud_HW)) { 368 while (ip->i_writes > ud_HW) { 369 ud_throttles++; 370 cv_wait(&ip->i_wrcv, &ip->i_tlock); 371 } 372 } 373 mutex_exit(&ip->i_tlock); 374 375 /* 376 * Write to the file 377 */ 378 rw_enter(&ip->i_contents, RW_WRITER); 379 if ((ioflag & FAPPEND) != 0 && (ip->i_type == VREG)) { 380 /* 381 * In append mode start at end of file. 382 */ 383 uiop->uio_loffset = ip->i_size; 384 } 385 error = ud_wrip(ip, uiop, ioflag, cr); 386 rw_exit(&ip->i_contents); 387 388 end: 389 #ifdef __lock_lint 390 rw_exit(&ip->i_rwlock); 391 #endif 392 393 return (error); 394 } 395 396 /* ARGSUSED */ 397 static int32_t 398 udf_ioctl( 399 struct vnode *vp, 400 int32_t cmd, 401 intptr_t arg, 402 int32_t flag, 403 struct cred *cr, 404 int32_t *rvalp, 405 caller_context_t *ct) 406 { 407 return (ENOTTY); 408 } 409 410 /* ARGSUSED */ 411 static int32_t 412 udf_getattr( 413 struct vnode *vp, 414 struct vattr *vap, 415 int32_t flags, 416 struct cred *cr, 417 caller_context_t *ct) 418 { 419 struct ud_inode *ip = VTOI(vp); 420 421 ud_printf("udf_getattr\n"); 422 423 if (vap->va_mask == AT_SIZE) { 424 /* 425 * for performance, if only the size is requested don't bother 426 * with anything else. 427 */ 428 vap->va_size = ip->i_size; 429 return (0); 430 } 431 432 rw_enter(&ip->i_contents, RW_READER); 433 434 vap->va_type = vp->v_type; 435 vap->va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char; 436 437 vap->va_uid = ip->i_uid; 438 vap->va_gid = ip->i_gid; 439 vap->va_fsid = ip->i_dev; 440 vap->va_nodeid = ip->i_icb_lbano; 441 vap->va_nlink = ip->i_nlink; 442 vap->va_size = ip->i_size; 443 vap->va_seq = ip->i_seq; 444 if (vp->v_type == VCHR || vp->v_type == VBLK) { 445 vap->va_rdev = ip->i_rdev; 446 } else { 447 vap->va_rdev = 0; 448 } 449 450 mutex_enter(&ip->i_tlock); 451 ITIMES_NOLOCK(ip); /* mark correct time in inode */ 452 vap->va_atime.tv_sec = (time_t)ip->i_atime.tv_sec; 453 vap->va_atime.tv_nsec = ip->i_atime.tv_nsec; 454 vap->va_mtime.tv_sec = (time_t)ip->i_mtime.tv_sec; 455 vap->va_mtime.tv_nsec = ip->i_mtime.tv_nsec; 456 vap->va_ctime.tv_sec = (time_t)ip->i_ctime.tv_sec; 457 vap->va_ctime.tv_nsec = ip->i_ctime.tv_nsec; 458 mutex_exit(&ip->i_tlock); 459 460 switch (ip->i_type) { 461 case VBLK: 462 vap->va_blksize = MAXBSIZE; 463 break; 464 case VCHR: 465 vap->va_blksize = MAXBSIZE; 466 break; 467 default: 468 vap->va_blksize = ip->i_udf->udf_lbsize; 469 break; 470 } 471 vap->va_nblocks = ip->i_lbr << ip->i_udf->udf_l2d_shift; 472 473 rw_exit(&ip->i_contents); 474 475 return (0); 476 } 477 478 static int 479 ud_iaccess_vmode(void *ip, int mode, struct cred *cr) 480 { 481 return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 0)); 482 } 483 484 /*ARGSUSED4*/ 485 static int32_t 486 udf_setattr( 487 struct vnode *vp, 488 struct vattr *vap, 489 int32_t flags, 490 struct cred *cr, 491 caller_context_t *ct) 492 { 493 int32_t error = 0; 494 uint32_t mask = vap->va_mask; 495 struct ud_inode *ip; 496 timestruc_t now; 497 struct vattr ovap; 498 499 ud_printf("udf_setattr\n"); 500 501 ip = VTOI(vp); 502 503 /* 504 * not updates allowed to 4096 files 505 */ 506 if (ip->i_astrat == STRAT_TYPE4096) { 507 return (EINVAL); 508 } 509 510 /* 511 * Cannot set these attributes 512 */ 513 if (mask & AT_NOSET) { 514 return (EINVAL); 515 } 516 517 rw_enter(&ip->i_rwlock, RW_WRITER); 518 rw_enter(&ip->i_contents, RW_WRITER); 519 520 ovap.va_uid = ip->i_uid; 521 ovap.va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char; 522 error = secpolicy_vnode_setattr(cr, vp, vap, &ovap, flags, 523 ud_iaccess_vmode, ip); 524 if (error) 525 goto update_inode; 526 527 mask = vap->va_mask; 528 /* 529 * Change file access modes. 530 */ 531 if (mask & AT_MODE) { 532 ip->i_perm = VA2UD_PERM(vap->va_mode); 533 ip->i_char = vap->va_mode & (VSUID | VSGID | VSVTX); 534 mutex_enter(&ip->i_tlock); 535 ip->i_flag |= ICHG; 536 mutex_exit(&ip->i_tlock); 537 } 538 if (mask & (AT_UID|AT_GID)) { 539 if (mask & AT_UID) { 540 ip->i_uid = vap->va_uid; 541 } 542 if (mask & AT_GID) { 543 ip->i_gid = vap->va_gid; 544 } 545 mutex_enter(&ip->i_tlock); 546 ip->i_flag |= ICHG; 547 mutex_exit(&ip->i_tlock); 548 } 549 /* 550 * Truncate file. Must have write permission and not be a directory. 551 */ 552 if (mask & AT_SIZE) { 553 if (vp->v_type == VDIR) { 554 error = EISDIR; 555 goto update_inode; 556 } 557 if (error = ud_iaccess(ip, IWRITE, cr, 0)) { 558 goto update_inode; 559 } 560 if (vap->va_size > MAXOFFSET_T) { 561 error = EFBIG; 562 goto update_inode; 563 } 564 if (error = ud_itrunc(ip, vap->va_size, 0, cr)) { 565 goto update_inode; 566 } 567 } 568 /* 569 * Change file access or modified times. 570 */ 571 if (mask & (AT_ATIME|AT_MTIME)) { 572 mutex_enter(&ip->i_tlock); 573 if (mask & AT_ATIME) { 574 ip->i_atime.tv_sec = vap->va_atime.tv_sec; 575 ip->i_atime.tv_nsec = vap->va_atime.tv_nsec; 576 ip->i_flag &= ~IACC; 577 } 578 if (mask & AT_MTIME) { 579 ip->i_mtime.tv_sec = vap->va_mtime.tv_sec; 580 ip->i_mtime.tv_nsec = vap->va_mtime.tv_nsec; 581 gethrestime(&now); 582 ip->i_ctime.tv_sec = now.tv_sec; 583 ip->i_ctime.tv_nsec = now.tv_nsec; 584 ip->i_flag &= ~(IUPD|ICHG); 585 ip->i_flag |= IMODTIME; 586 } 587 ip->i_flag |= IMOD; 588 mutex_exit(&ip->i_tlock); 589 } 590 591 update_inode: 592 if (curthread->t_flag & T_DONTPEND) { 593 ud_iupdat(ip, 1); 594 } else { 595 ITIMES_NOLOCK(ip); 596 } 597 rw_exit(&ip->i_contents); 598 rw_exit(&ip->i_rwlock); 599 600 return (error); 601 } 602 603 /* ARGSUSED */ 604 static int32_t 605 udf_access( 606 struct vnode *vp, 607 int32_t mode, 608 int32_t flags, 609 struct cred *cr, 610 caller_context_t *ct) 611 { 612 struct ud_inode *ip = VTOI(vp); 613 614 ud_printf("udf_access\n"); 615 616 if (ip->i_udf == NULL) { 617 return (EIO); 618 } 619 620 return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 1)); 621 } 622 623 int32_t udfs_stickyhack = 1; 624 625 /* ARGSUSED */ 626 static int32_t 627 udf_lookup( 628 struct vnode *dvp, 629 char *nm, 630 struct vnode **vpp, 631 struct pathname *pnp, 632 int32_t flags, 633 struct vnode *rdir, 634 struct cred *cr, 635 caller_context_t *ct, 636 int *direntflags, 637 pathname_t *realpnp) 638 { 639 int32_t error; 640 struct vnode *vp; 641 struct ud_inode *ip, *xip; 642 643 ud_printf("udf_lookup\n"); 644 /* 645 * Null component name is a synonym for directory being searched. 646 */ 647 if (*nm == '\0') { 648 VN_HOLD(dvp); 649 *vpp = dvp; 650 error = 0; 651 goto out; 652 } 653 654 /* 655 * Fast path: Check the directory name lookup cache. 656 */ 657 ip = VTOI(dvp); 658 if (vp = dnlc_lookup(dvp, nm)) { 659 /* 660 * Check accessibility of directory. 661 */ 662 if ((error = ud_iaccess(ip, IEXEC, cr, 1)) != 0) { 663 VN_RELE(vp); 664 } 665 xip = VTOI(vp); 666 } else { 667 error = ud_dirlook(ip, nm, &xip, cr, 1); 668 ITIMES(ip); 669 } 670 671 if (error == 0) { 672 ip = xip; 673 *vpp = ITOV(ip); 674 if ((ip->i_type != VDIR) && 675 (ip->i_char & ISVTX) && 676 ((ip->i_perm & IEXEC) == 0) && 677 udfs_stickyhack) { 678 mutex_enter(&(*vpp)->v_lock); 679 (*vpp)->v_flag |= VISSWAP; 680 mutex_exit(&(*vpp)->v_lock); 681 } 682 ITIMES(ip); 683 /* 684 * If vnode is a device return special vnode instead. 685 */ 686 if (IS_DEVVP(*vpp)) { 687 struct vnode *newvp; 688 newvp = specvp(*vpp, (*vpp)->v_rdev, 689 (*vpp)->v_type, cr); 690 VN_RELE(*vpp); 691 if (newvp == NULL) { 692 error = ENOSYS; 693 } else { 694 *vpp = newvp; 695 } 696 } 697 } 698 out: 699 return (error); 700 } 701 702 /* ARGSUSED */ 703 static int32_t 704 udf_create( 705 struct vnode *dvp, 706 char *name, 707 struct vattr *vap, 708 enum vcexcl excl, 709 int32_t mode, 710 struct vnode **vpp, 711 struct cred *cr, 712 int32_t flag, 713 caller_context_t *ct, 714 vsecattr_t *vsecp) 715 { 716 int32_t error; 717 struct ud_inode *ip = VTOI(dvp), *xip; 718 719 ud_printf("udf_create\n"); 720 721 if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0) 722 vap->va_mode &= ~VSVTX; 723 724 if (*name == '\0') { 725 /* 726 * Null component name refers to the directory itself. 727 */ 728 VN_HOLD(dvp); 729 ITIMES(ip); 730 error = EEXIST; 731 } else { 732 xip = NULL; 733 rw_enter(&ip->i_rwlock, RW_WRITER); 734 error = ud_direnter(ip, name, DE_CREATE, 735 (struct ud_inode *)0, (struct ud_inode *)0, 736 vap, &xip, cr, ct); 737 rw_exit(&ip->i_rwlock); 738 ITIMES(ip); 739 ip = xip; 740 } 741 #ifdef __lock_lint 742 rw_enter(&ip->i_contents, RW_WRITER); 743 #else 744 if (ip != NULL) { 745 rw_enter(&ip->i_contents, RW_WRITER); 746 } 747 #endif 748 749 /* 750 * If the file already exists and this is a non-exclusive create, 751 * check permissions and allow access for non-directories. 752 * Read-only create of an existing directory is also allowed. 753 * We fail an exclusive create of anything which already exists. 754 */ 755 if (error == EEXIST) { 756 if (excl == NONEXCL) { 757 if ((ip->i_type == VDIR) && (mode & VWRITE)) { 758 error = EISDIR; 759 } else if (mode) { 760 error = ud_iaccess(ip, 761 UD_UPERM2DPERM(mode), cr, 0); 762 } else { 763 error = 0; 764 } 765 } 766 if (error) { 767 rw_exit(&ip->i_contents); 768 VN_RELE(ITOV(ip)); 769 goto out; 770 } else if ((ip->i_type == VREG) && 771 (vap->va_mask & AT_SIZE) && vap->va_size == 0) { 772 /* 773 * Truncate regular files, if requested by caller. 774 * Grab i_rwlock to make sure no one else is 775 * currently writing to the file (we promised 776 * bmap we would do this). 777 * Must get the locks in the correct order. 778 */ 779 if (ip->i_size == 0) { 780 ip->i_flag |= ICHG | IUPD; 781 } else { 782 rw_exit(&ip->i_contents); 783 rw_enter(&ip->i_rwlock, RW_WRITER); 784 rw_enter(&ip->i_contents, RW_WRITER); 785 (void) ud_itrunc(ip, 0, 0, cr); 786 rw_exit(&ip->i_rwlock); 787 } 788 vnevent_create(ITOV(ip), ct); 789 } 790 } 791 792 if (error == 0) { 793 *vpp = ITOV(ip); 794 ITIMES(ip); 795 } 796 #ifdef __lock_lint 797 rw_exit(&ip->i_contents); 798 #else 799 if (ip != NULL) { 800 rw_exit(&ip->i_contents); 801 } 802 #endif 803 if (error) { 804 goto out; 805 } 806 807 /* 808 * If vnode is a device return special vnode instead. 809 */ 810 if (!error && IS_DEVVP(*vpp)) { 811 struct vnode *newvp; 812 813 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 814 VN_RELE(*vpp); 815 if (newvp == NULL) { 816 error = ENOSYS; 817 goto out; 818 } 819 *vpp = newvp; 820 } 821 out: 822 return (error); 823 } 824 825 /* ARGSUSED */ 826 static int32_t 827 udf_remove( 828 struct vnode *vp, 829 char *nm, 830 struct cred *cr, 831 caller_context_t *ct, 832 int flags) 833 { 834 int32_t error; 835 struct ud_inode *ip = VTOI(vp); 836 837 ud_printf("udf_remove\n"); 838 839 rw_enter(&ip->i_rwlock, RW_WRITER); 840 error = ud_dirremove(ip, nm, 841 (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr, ct); 842 rw_exit(&ip->i_rwlock); 843 ITIMES(ip); 844 845 return (error); 846 } 847 848 /* ARGSUSED */ 849 static int32_t 850 udf_link( 851 struct vnode *tdvp, 852 struct vnode *svp, 853 char *tnm, 854 struct cred *cr, 855 caller_context_t *ct, 856 int flags) 857 { 858 int32_t error; 859 struct vnode *realvp; 860 struct ud_inode *sip; 861 struct ud_inode *tdp; 862 863 ud_printf("udf_link\n"); 864 if (VOP_REALVP(svp, &realvp, ct) == 0) { 865 svp = realvp; 866 } 867 868 /* 869 * Do not allow links to directories 870 */ 871 if (svp->v_type == VDIR) { 872 return (EPERM); 873 } 874 875 sip = VTOI(svp); 876 877 if (sip->i_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0) 878 return (EPERM); 879 880 tdp = VTOI(tdvp); 881 882 rw_enter(&tdp->i_rwlock, RW_WRITER); 883 error = ud_direnter(tdp, tnm, DE_LINK, (struct ud_inode *)0, 884 sip, (struct vattr *)0, (struct ud_inode **)0, cr, ct); 885 rw_exit(&tdp->i_rwlock); 886 ITIMES(sip); 887 ITIMES(tdp); 888 889 if (error == 0) { 890 vnevent_link(svp, ct); 891 } 892 893 return (error); 894 } 895 896 /* ARGSUSED */ 897 static int32_t 898 udf_rename( 899 struct vnode *sdvp, 900 char *snm, 901 struct vnode *tdvp, 902 char *tnm, 903 struct cred *cr, 904 caller_context_t *ct, 905 int flags) 906 { 907 int32_t error = 0; 908 struct udf_vfs *udf_vfsp; 909 struct ud_inode *sip; /* source inode */ 910 struct ud_inode *sdp, *tdp; /* source and target parent inode */ 911 struct vnode *realvp; 912 913 ud_printf("udf_rename\n"); 914 915 if (VOP_REALVP(tdvp, &realvp, ct) == 0) { 916 tdvp = realvp; 917 } 918 919 sdp = VTOI(sdvp); 920 tdp = VTOI(tdvp); 921 922 udf_vfsp = sdp->i_udf; 923 924 mutex_enter(&udf_vfsp->udf_rename_lck); 925 /* 926 * Look up inode of file we're supposed to rename. 927 */ 928 if (error = ud_dirlook(sdp, snm, &sip, cr, 0)) { 929 mutex_exit(&udf_vfsp->udf_rename_lck); 930 return (error); 931 } 932 /* 933 * be sure this is not a directory with another file system mounted 934 * over it. If it is just give up the locks, and return with 935 * EBUSY 936 */ 937 if (vn_mountedvfs(ITOV(sip)) != NULL) { 938 error = EBUSY; 939 goto errout; 940 } 941 /* 942 * Make sure we can delete the source entry. This requires 943 * write permission on the containing directory. If that 944 * directory is "sticky" it further requires (except for 945 * privileged users) that the user own the directory or the 946 * source entry, or else have permission to write the source 947 * entry. 948 */ 949 rw_enter(&sdp->i_contents, RW_READER); 950 rw_enter(&sip->i_contents, RW_READER); 951 if ((error = ud_iaccess(sdp, IWRITE, cr, 0)) != 0 || 952 (error = ud_sticky_remove_access(sdp, sip, cr)) != 0) { 953 rw_exit(&sip->i_contents); 954 rw_exit(&sdp->i_contents); 955 ITIMES(sip); 956 goto errout; 957 } 958 959 /* 960 * Check for renaming '.' or '..' or alias of '.' 961 */ 962 if ((strcmp(snm, ".") == 0) || 963 (strcmp(snm, "..") == 0) || 964 (sdp == sip)) { 965 error = EINVAL; 966 rw_exit(&sip->i_contents); 967 rw_exit(&sdp->i_contents); 968 goto errout; 969 } 970 rw_exit(&sip->i_contents); 971 rw_exit(&sdp->i_contents); 972 973 974 /* 975 * Link source to the target. 976 */ 977 rw_enter(&tdp->i_rwlock, RW_WRITER); 978 if (error = ud_direnter(tdp, tnm, DE_RENAME, sdp, sip, 979 (struct vattr *)0, (struct ud_inode **)0, cr, ct)) { 980 /* 981 * ESAME isn't really an error; it indicates that the 982 * operation should not be done because the source and target 983 * are the same file, but that no error should be reported. 984 */ 985 if (error == ESAME) { 986 error = 0; 987 } 988 rw_exit(&tdp->i_rwlock); 989 goto errout; 990 } 991 vnevent_rename_src(ITOV(sip), sdvp, snm, ct); 992 rw_exit(&tdp->i_rwlock); 993 994 rw_enter(&sdp->i_rwlock, RW_WRITER); 995 /* 996 * Unlink the source. 997 * Remove the source entry. ud_dirremove() checks that the entry 998 * still reflects sip, and returns an error if it doesn't. 999 * If the entry has changed just forget about it. Release 1000 * the source inode. 1001 */ 1002 if ((error = ud_dirremove(sdp, snm, sip, (struct vnode *)0, 1003 DR_RENAME, cr, ct)) == ENOENT) { 1004 error = 0; 1005 } 1006 rw_exit(&sdp->i_rwlock); 1007 errout: 1008 ITIMES(sdp); 1009 ITIMES(tdp); 1010 VN_RELE(ITOV(sip)); 1011 mutex_exit(&udf_vfsp->udf_rename_lck); 1012 1013 return (error); 1014 } 1015 1016 /* ARGSUSED */ 1017 static int32_t 1018 udf_mkdir( 1019 struct vnode *dvp, 1020 char *dirname, 1021 struct vattr *vap, 1022 struct vnode **vpp, 1023 struct cred *cr, 1024 caller_context_t *ct, 1025 int flags, 1026 vsecattr_t *vsecp) 1027 { 1028 int32_t error; 1029 struct ud_inode *ip; 1030 struct ud_inode *xip; 1031 1032 ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); 1033 1034 ud_printf("udf_mkdir\n"); 1035 1036 ip = VTOI(dvp); 1037 rw_enter(&ip->i_rwlock, RW_WRITER); 1038 error = ud_direnter(ip, dirname, DE_MKDIR, 1039 (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr, ct); 1040 rw_exit(&ip->i_rwlock); 1041 ITIMES(ip); 1042 if (error == 0) { 1043 ip = xip; 1044 *vpp = ITOV(ip); 1045 ITIMES(ip); 1046 } else if (error == EEXIST) { 1047 ITIMES(xip); 1048 VN_RELE(ITOV(xip)); 1049 } 1050 1051 return (error); 1052 } 1053 1054 /* ARGSUSED */ 1055 static int32_t 1056 udf_rmdir( 1057 struct vnode *vp, 1058 char *nm, 1059 struct vnode *cdir, 1060 struct cred *cr, 1061 caller_context_t *ct, 1062 int flags) 1063 { 1064 int32_t error; 1065 struct ud_inode *ip = VTOI(vp); 1066 1067 ud_printf("udf_rmdir\n"); 1068 1069 rw_enter(&ip->i_rwlock, RW_WRITER); 1070 error = ud_dirremove(ip, nm, (struct ud_inode *)0, cdir, DR_RMDIR, 1071 cr, ct); 1072 rw_exit(&ip->i_rwlock); 1073 ITIMES(ip); 1074 1075 return (error); 1076 } 1077 1078 /* ARGSUSED */ 1079 static int32_t 1080 udf_readdir( 1081 struct vnode *vp, 1082 struct uio *uiop, 1083 struct cred *cr, 1084 int32_t *eofp, 1085 caller_context_t *ct, 1086 int flags) 1087 { 1088 struct ud_inode *ip; 1089 struct dirent64 *nd; 1090 struct udf_vfs *udf_vfsp; 1091 int32_t error = 0, len, outcount = 0; 1092 uint32_t dirsiz, offset; 1093 uint32_t bufsize, ndlen, dummy; 1094 caddr_t outbuf; 1095 caddr_t outb, end_outb; 1096 struct iovec *iovp; 1097 1098 uint8_t *dname; 1099 int32_t length; 1100 1101 uint8_t *buf = NULL; 1102 1103 struct fbuf *fbp = NULL; 1104 struct file_id *fid; 1105 uint8_t *name; 1106 1107 1108 ud_printf("udf_readdir\n"); 1109 1110 ip = VTOI(vp); 1111 udf_vfsp = ip->i_udf; 1112 1113 dirsiz = ip->i_size; 1114 if ((uiop->uio_offset >= dirsiz) || 1115 (ip->i_nlink <= 0)) { 1116 if (eofp) { 1117 *eofp = 1; 1118 } 1119 return (0); 1120 } 1121 1122 offset = uiop->uio_offset; 1123 iovp = uiop->uio_iov; 1124 bufsize = iovp->iov_len; 1125 1126 outb = outbuf = (char *)kmem_alloc((uint32_t)bufsize, KM_SLEEP); 1127 end_outb = outb + bufsize; 1128 nd = (struct dirent64 *)outbuf; 1129 1130 dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP); 1131 buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP); 1132 1133 if (offset == 0) { 1134 len = DIRENT64_RECLEN(1); 1135 if (((caddr_t)nd + len) >= end_outb) { 1136 error = EINVAL; 1137 goto end; 1138 } 1139 nd->d_ino = ip->i_icb_lbano; 1140 nd->d_reclen = (uint16_t)len; 1141 nd->d_off = 0x10; 1142 nd->d_name[0] = '.'; 1143 bzero(&nd->d_name[1], DIRENT64_NAMELEN(len) - 1); 1144 nd = (struct dirent64 *)((char *)nd + nd->d_reclen); 1145 outcount++; 1146 } else if (offset == 0x10) { 1147 offset = 0; 1148 } 1149 1150 while (offset < dirsiz) { 1151 error = ud_get_next_fid(ip, &fbp, 1152 offset, &fid, &name, buf); 1153 if (error != 0) { 1154 break; 1155 } 1156 1157 if ((fid->fid_flags & FID_DELETED) == 0) { 1158 if (fid->fid_flags & FID_PARENT) { 1159 1160 len = DIRENT64_RECLEN(2); 1161 if (((caddr_t)nd + len) >= end_outb) { 1162 error = EINVAL; 1163 break; 1164 } 1165 1166 nd->d_ino = ip->i_icb_lbano; 1167 nd->d_reclen = (uint16_t)len; 1168 nd->d_off = offset + FID_LEN(fid); 1169 nd->d_name[0] = '.'; 1170 nd->d_name[1] = '.'; 1171 bzero(&nd->d_name[2], 1172 DIRENT64_NAMELEN(len) - 2); 1173 nd = (struct dirent64 *) 1174 ((char *)nd + nd->d_reclen); 1175 } else { 1176 if ((error = ud_uncompress(fid->fid_idlen, 1177 &length, name, dname)) != 0) { 1178 break; 1179 } 1180 if (length == 0) { 1181 offset += FID_LEN(fid); 1182 continue; 1183 } 1184 len = DIRENT64_RECLEN(length); 1185 if (((caddr_t)nd + len) >= end_outb) { 1186 if (!outcount) { 1187 error = EINVAL; 1188 } 1189 break; 1190 } 1191 (void) strncpy(nd->d_name, 1192 (caddr_t)dname, length); 1193 bzero(&nd->d_name[length], 1194 DIRENT64_NAMELEN(len) - length); 1195 nd->d_ino = ud_xlate_to_daddr(udf_vfsp, 1196 SWAP_16(fid->fid_icb.lad_ext_prn), 1197 SWAP_32(fid->fid_icb.lad_ext_loc), 1, 1198 &dummy); 1199 nd->d_reclen = (uint16_t)len; 1200 nd->d_off = offset + FID_LEN(fid); 1201 nd = (struct dirent64 *) 1202 ((char *)nd + nd->d_reclen); 1203 } 1204 outcount++; 1205 } 1206 1207 offset += FID_LEN(fid); 1208 } 1209 1210 end: 1211 if (fbp != NULL) { 1212 fbrelse(fbp, S_OTHER); 1213 } 1214 ndlen = ((char *)nd - outbuf); 1215 /* 1216 * In case of error do not call uiomove. 1217 * Return the error to the caller. 1218 */ 1219 if ((error == 0) && (ndlen != 0)) { 1220 error = uiomove(outbuf, (long)ndlen, UIO_READ, uiop); 1221 uiop->uio_offset = offset; 1222 } 1223 kmem_free((caddr_t)buf, udf_vfsp->udf_lbsize); 1224 kmem_free((caddr_t)dname, 1024); 1225 kmem_free(outbuf, (uint32_t)bufsize); 1226 if (eofp && error == 0) { 1227 *eofp = (uiop->uio_offset >= dirsiz); 1228 } 1229 return (error); 1230 } 1231 1232 /* ARGSUSED */ 1233 static int32_t 1234 udf_symlink( 1235 struct vnode *dvp, 1236 char *linkname, 1237 struct vattr *vap, 1238 char *target, 1239 struct cred *cr, 1240 caller_context_t *ct, 1241 int flags) 1242 { 1243 int32_t error = 0, outlen; 1244 uint32_t ioflag = 0; 1245 struct ud_inode *ip, *dip = VTOI(dvp); 1246 1247 struct path_comp *pc; 1248 int8_t *dname = NULL, *uname = NULL, *sp; 1249 1250 ud_printf("udf_symlink\n"); 1251 1252 ip = (struct ud_inode *)0; 1253 vap->va_type = VLNK; 1254 vap->va_rdev = 0; 1255 1256 rw_enter(&dip->i_rwlock, RW_WRITER); 1257 error = ud_direnter(dip, linkname, DE_CREATE, 1258 (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr, ct); 1259 rw_exit(&dip->i_rwlock); 1260 if (error == 0) { 1261 dname = kmem_zalloc(1024, KM_SLEEP); 1262 uname = kmem_zalloc(PAGESIZE, KM_SLEEP); 1263 1264 pc = (struct path_comp *)uname; 1265 /* 1266 * If the first character in target is "/" 1267 * then skip it and create entry for it 1268 */ 1269 if (*target == '/') { 1270 pc->pc_type = 2; 1271 pc->pc_len = 0; 1272 pc = (struct path_comp *)(((char *)pc) + 4); 1273 while (*target == '/') { 1274 target++; 1275 } 1276 } 1277 1278 while (*target != NULL) { 1279 sp = target; 1280 while ((*target != '/') && (*target != '\0')) { 1281 target ++; 1282 } 1283 /* 1284 * We got the next component of the 1285 * path name. Create path_comp of 1286 * appropriate type 1287 */ 1288 if (((target - sp) == 1) && (*sp == '.')) { 1289 /* 1290 * Dot entry. 1291 */ 1292 pc->pc_type = 4; 1293 pc = (struct path_comp *)(((char *)pc) + 4); 1294 } else if (((target - sp) == 2) && 1295 (*sp == '.') && ((*(sp + 1)) == '.')) { 1296 /* 1297 * DotDot entry. 1298 */ 1299 pc->pc_type = 3; 1300 pc = (struct path_comp *)(((char *)pc) + 4); 1301 } else { 1302 /* 1303 * convert the user given name 1304 * into appropriate form to be put 1305 * on the media 1306 */ 1307 outlen = 1024; /* set to size of dname */ 1308 if (error = ud_compress(target - sp, &outlen, 1309 (uint8_t *)sp, (uint8_t *)dname)) { 1310 break; 1311 } 1312 pc->pc_type = 5; 1313 /* LINTED */ 1314 pc->pc_len = outlen; 1315 dname[outlen] = '\0'; 1316 (void) strcpy((char *)pc->pc_id, dname); 1317 pc = (struct path_comp *) 1318 (((char *)pc) + 4 + outlen); 1319 } 1320 while (*target == '/') { 1321 target++; 1322 } 1323 if (*target == NULL) { 1324 break; 1325 } 1326 } 1327 1328 rw_enter(&ip->i_contents, RW_WRITER); 1329 if (error == 0) { 1330 ioflag = FWRITE; 1331 if (curthread->t_flag & T_DONTPEND) { 1332 ioflag |= FDSYNC; 1333 } 1334 error = ud_rdwri(UIO_WRITE, ioflag, ip, 1335 uname, ((int8_t *)pc) - uname, 1336 (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr); 1337 } 1338 if (error) { 1339 ud_idrop(ip); 1340 rw_exit(&ip->i_contents); 1341 rw_enter(&dip->i_rwlock, RW_WRITER); 1342 (void) ud_dirremove(dip, linkname, (struct ud_inode *)0, 1343 (struct vnode *)0, DR_REMOVE, cr, ct); 1344 rw_exit(&dip->i_rwlock); 1345 goto update_inode; 1346 } 1347 rw_exit(&ip->i_contents); 1348 } 1349 1350 if ((error == 0) || (error == EEXIST)) { 1351 VN_RELE(ITOV(ip)); 1352 } 1353 1354 update_inode: 1355 ITIMES(VTOI(dvp)); 1356 if (uname != NULL) { 1357 kmem_free(uname, PAGESIZE); 1358 } 1359 if (dname != NULL) { 1360 kmem_free(dname, 1024); 1361 } 1362 1363 return (error); 1364 } 1365 1366 /* ARGSUSED */ 1367 static int32_t 1368 udf_readlink( 1369 struct vnode *vp, 1370 struct uio *uiop, 1371 struct cred *cr, 1372 caller_context_t *ct) 1373 { 1374 int32_t error = 0, off, id_len, size, len; 1375 int8_t *dname = NULL, *uname = NULL; 1376 struct ud_inode *ip; 1377 struct fbuf *fbp = NULL; 1378 struct path_comp *pc; 1379 1380 ud_printf("udf_readlink\n"); 1381 1382 if (vp->v_type != VLNK) { 1383 return (EINVAL); 1384 } 1385 1386 ip = VTOI(vp); 1387 size = ip->i_size; 1388 if (size > PAGESIZE) { 1389 return (EIO); 1390 } 1391 1392 if (size == 0) { 1393 return (0); 1394 } 1395 1396 dname = kmem_zalloc(1024, KM_SLEEP); 1397 uname = kmem_zalloc(PAGESIZE, KM_SLEEP); 1398 1399 rw_enter(&ip->i_contents, RW_READER); 1400 1401 if ((error = fbread(vp, 0, size, S_READ, &fbp)) != 0) { 1402 goto end; 1403 } 1404 1405 off = 0; 1406 1407 while (off < size) { 1408 pc = (struct path_comp *)(fbp->fb_addr + off); 1409 switch (pc->pc_type) { 1410 case 1 : 1411 (void) strcpy(uname, ip->i_udf->udf_fsmnt); 1412 (void) strcat(uname, "/"); 1413 break; 1414 case 2 : 1415 if (pc->pc_len != 0) { 1416 goto end; 1417 } 1418 uname[0] = '/'; 1419 uname[1] = '\0'; 1420 break; 1421 case 3 : 1422 (void) strcat(uname, "../"); 1423 break; 1424 case 4 : 1425 (void) strcat(uname, "./"); 1426 break; 1427 case 5 : 1428 if ((error = ud_uncompress(pc->pc_len, &id_len, 1429 pc->pc_id, (uint8_t *)dname)) != 0) { 1430 break; 1431 } 1432 dname[id_len] = '\0'; 1433 (void) strcat(uname, dname); 1434 (void) strcat(uname, "/"); 1435 break; 1436 default : 1437 error = EINVAL; 1438 goto end; 1439 } 1440 off += 4 + pc->pc_len; 1441 } 1442 len = strlen(uname) - 1; 1443 if (uname[len] == '/') { 1444 if (len == 0) { 1445 /* 1446 * special case link to / 1447 */ 1448 len = 1; 1449 } else { 1450 uname[len] = '\0'; 1451 } 1452 } 1453 1454 error = uiomove(uname, len, UIO_READ, uiop); 1455 1456 ITIMES(ip); 1457 1458 end: 1459 if (fbp != NULL) { 1460 fbrelse(fbp, S_OTHER); 1461 } 1462 rw_exit(&ip->i_contents); 1463 if (uname != NULL) { 1464 kmem_free(uname, PAGESIZE); 1465 } 1466 if (dname != NULL) { 1467 kmem_free(dname, 1024); 1468 } 1469 return (error); 1470 } 1471 1472 /* ARGSUSED */ 1473 static int32_t 1474 udf_fsync( 1475 struct vnode *vp, 1476 int32_t syncflag, 1477 struct cred *cr, 1478 caller_context_t *ct) 1479 { 1480 int32_t error = 0; 1481 struct ud_inode *ip = VTOI(vp); 1482 1483 ud_printf("udf_fsync\n"); 1484 1485 rw_enter(&ip->i_contents, RW_WRITER); 1486 if (!(IS_SWAPVP(vp))) { 1487 error = ud_syncip(ip, 0, I_SYNC); /* Do synchronous writes */ 1488 } 1489 if (error == 0) { 1490 error = ud_sync_indir(ip); 1491 } 1492 ITIMES(ip); /* XXX: is this necessary ??? */ 1493 rw_exit(&ip->i_contents); 1494 1495 return (error); 1496 } 1497 1498 /* ARGSUSED */ 1499 static void 1500 udf_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct) 1501 { 1502 ud_printf("udf_iinactive\n"); 1503 1504 ud_iinactive(VTOI(vp), cr); 1505 } 1506 1507 /* ARGSUSED */ 1508 static int32_t 1509 udf_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 1510 { 1511 struct udf_fid *udfidp; 1512 struct ud_inode *ip = VTOI(vp); 1513 1514 ud_printf("udf_fid\n"); 1515 1516 if (fidp->fid_len < (sizeof (struct udf_fid) - sizeof (uint16_t))) { 1517 fidp->fid_len = sizeof (struct udf_fid) - sizeof (uint16_t); 1518 return (ENOSPC); 1519 } 1520 1521 udfidp = (struct udf_fid *)fidp; 1522 bzero((char *)udfidp, sizeof (struct udf_fid)); 1523 rw_enter(&ip->i_contents, RW_READER); 1524 udfidp->udfid_len = sizeof (struct udf_fid) - sizeof (uint16_t); 1525 udfidp->udfid_uinq_lo = ip->i_uniqid & 0xffffffff; 1526 udfidp->udfid_prn = ip->i_icb_prn; 1527 udfidp->udfid_icb_lbn = ip->i_icb_block; 1528 rw_exit(&ip->i_contents); 1529 1530 return (0); 1531 } 1532 1533 /* ARGSUSED2 */ 1534 static int 1535 udf_rwlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp) 1536 { 1537 struct ud_inode *ip = VTOI(vp); 1538 1539 ud_printf("udf_rwlock\n"); 1540 1541 if (write_lock) { 1542 rw_enter(&ip->i_rwlock, RW_WRITER); 1543 } else { 1544 rw_enter(&ip->i_rwlock, RW_READER); 1545 } 1546 #ifdef __lock_lint 1547 rw_exit(&ip->i_rwlock); 1548 #endif 1549 return (write_lock); 1550 } 1551 1552 /* ARGSUSED */ 1553 static void 1554 udf_rwunlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp) 1555 { 1556 struct ud_inode *ip = VTOI(vp); 1557 1558 ud_printf("udf_rwunlock\n"); 1559 1560 #ifdef __lock_lint 1561 rw_enter(&ip->i_rwlock, RW_WRITER); 1562 #endif 1563 1564 rw_exit(&ip->i_rwlock); 1565 1566 } 1567 1568 /* ARGSUSED */ 1569 static int32_t 1570 udf_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct) 1571 { 1572 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 1573 } 1574 1575 static int32_t 1576 udf_frlock( 1577 struct vnode *vp, 1578 int32_t cmd, 1579 struct flock64 *bfp, 1580 int32_t flag, 1581 offset_t offset, 1582 struct flk_callback *flk_cbp, 1583 cred_t *cr, 1584 caller_context_t *ct) 1585 { 1586 struct ud_inode *ip = VTOI(vp); 1587 1588 ud_printf("udf_frlock\n"); 1589 1590 /* 1591 * If file is being mapped, disallow frlock. 1592 * XXX I am not holding tlock while checking i_mapcnt because the 1593 * current locking strategy drops all locks before calling fs_frlock. 1594 * So, mapcnt could change before we enter fs_frlock making is 1595 * meaningless to have held tlock in the first place. 1596 */ 1597 if ((ip->i_mapcnt > 0) && 1598 (MANDLOCK(vp, ip->i_char))) { 1599 return (EAGAIN); 1600 } 1601 1602 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 1603 } 1604 1605 /*ARGSUSED6*/ 1606 static int32_t 1607 udf_space( 1608 struct vnode *vp, 1609 int32_t cmd, 1610 struct flock64 *bfp, 1611 int32_t flag, 1612 offset_t offset, 1613 cred_t *cr, 1614 caller_context_t *ct) 1615 { 1616 int32_t error = 0; 1617 1618 ud_printf("udf_space\n"); 1619 1620 if (cmd != F_FREESP) { 1621 error = EINVAL; 1622 } else if ((error = convoff(vp, bfp, 0, offset)) == 0) { 1623 error = ud_freesp(vp, bfp, flag, cr); 1624 } 1625 1626 return (error); 1627 } 1628 1629 /* ARGSUSED */ 1630 static int32_t 1631 udf_getpage( 1632 struct vnode *vp, 1633 offset_t off, 1634 size_t len, 1635 uint32_t *protp, 1636 struct page **plarr, 1637 size_t plsz, 1638 struct seg *seg, 1639 caddr_t addr, 1640 enum seg_rw rw, 1641 struct cred *cr, 1642 caller_context_t *ct) 1643 { 1644 struct ud_inode *ip = VTOI(vp); 1645 int32_t error, has_holes, beyond_eof, seqmode, dolock; 1646 int32_t pgsize = PAGESIZE; 1647 struct udf_vfs *udf_vfsp = ip->i_udf; 1648 page_t **pl; 1649 u_offset_t pgoff, eoff, uoff; 1650 krw_t rwtype; 1651 caddr_t pgaddr; 1652 1653 ud_printf("udf_getpage\n"); 1654 1655 uoff = (u_offset_t)off; /* type conversion */ 1656 if (protp) { 1657 *protp = PROT_ALL; 1658 } 1659 if (vp->v_flag & VNOMAP) { 1660 return (ENOSYS); 1661 } 1662 seqmode = ip->i_nextr == uoff && rw != S_CREATE; 1663 1664 rwtype = RW_READER; 1665 dolock = (rw_owner(&ip->i_contents) != curthread); 1666 retrylock: 1667 #ifdef __lock_lint 1668 rw_enter(&ip->i_contents, rwtype); 1669 #else 1670 if (dolock) { 1671 rw_enter(&ip->i_contents, rwtype); 1672 } 1673 #endif 1674 1675 /* 1676 * We may be getting called as a side effect of a bmap using 1677 * fbread() when the blocks might be being allocated and the 1678 * size has not yet been up'ed. In this case we want to be 1679 * able to return zero pages if we get back UDF_HOLE from 1680 * calling bmap for a non write case here. We also might have 1681 * to read some frags from the disk into a page if we are 1682 * extending the number of frags for a given lbn in bmap(). 1683 */ 1684 beyond_eof = uoff + len > ip->i_size + PAGEOFFSET; 1685 if (beyond_eof && seg != segkmap) { 1686 #ifdef __lock_lint 1687 rw_exit(&ip->i_contents); 1688 #else 1689 if (dolock) { 1690 rw_exit(&ip->i_contents); 1691 } 1692 #endif 1693 return (EFAULT); 1694 } 1695 1696 /* 1697 * Must hold i_contents lock throughout the call to pvn_getpages 1698 * since locked pages are returned from each call to ud_getapage. 1699 * Must *not* return locked pages and then try for contents lock 1700 * due to lock ordering requirements (inode > page) 1701 */ 1702 1703 has_holes = ud_bmap_has_holes(ip); 1704 1705 if ((rw == S_WRITE || rw == S_CREATE) && (has_holes || beyond_eof)) { 1706 int32_t blk_size, count; 1707 u_offset_t offset; 1708 1709 /* 1710 * We must acquire the RW_WRITER lock in order to 1711 * call bmap_write(). 1712 */ 1713 if (dolock && rwtype == RW_READER) { 1714 rwtype = RW_WRITER; 1715 1716 if (!rw_tryupgrade(&ip->i_contents)) { 1717 1718 rw_exit(&ip->i_contents); 1719 1720 goto retrylock; 1721 } 1722 } 1723 1724 /* 1725 * May be allocating disk blocks for holes here as 1726 * a result of mmap faults. write(2) does the bmap_write 1727 * in rdip/wrip, not here. We are not dealing with frags 1728 * in this case. 1729 */ 1730 offset = uoff; 1731 while ((offset < uoff + len) && 1732 (offset < ip->i_size)) { 1733 /* 1734 * the variable "bnp" is to simplify the expression for 1735 * the compiler; * just passing in &bn to bmap_write 1736 * causes a compiler "loop" 1737 */ 1738 1739 blk_size = udf_vfsp->udf_lbsize; 1740 if ((offset + blk_size) > ip->i_size) { 1741 count = ip->i_size - offset; 1742 } else { 1743 count = blk_size; 1744 } 1745 error = ud_bmap_write(ip, offset, count, 0, cr); 1746 if (error) { 1747 goto update_inode; 1748 } 1749 offset += count; /* XXX - make this contig */ 1750 } 1751 } 1752 1753 /* 1754 * Can be a reader from now on. 1755 */ 1756 #ifdef __lock_lint 1757 if (rwtype == RW_WRITER) { 1758 rw_downgrade(&ip->i_contents); 1759 } 1760 #else 1761 if (dolock && rwtype == RW_WRITER) { 1762 rw_downgrade(&ip->i_contents); 1763 } 1764 #endif 1765 1766 /* 1767 * We remove PROT_WRITE in cases when the file has UDF holes 1768 * because we don't want to call bmap_read() to check each 1769 * page if it is backed with a disk block. 1770 */ 1771 if (protp && has_holes && rw != S_WRITE && rw != S_CREATE) { 1772 *protp &= ~PROT_WRITE; 1773 } 1774 1775 error = 0; 1776 1777 /* 1778 * The loop looks up pages in the range <off, off + len). 1779 * For each page, we first check if we should initiate an asynchronous 1780 * read ahead before we call page_lookup (we may sleep in page_lookup 1781 * for a previously initiated disk read). 1782 */ 1783 eoff = (uoff + len); 1784 for (pgoff = uoff, pgaddr = addr, pl = plarr; 1785 pgoff < eoff; /* empty */) { 1786 page_t *pp; 1787 u_offset_t nextrio; 1788 se_t se; 1789 1790 se = ((rw == S_CREATE) ? SE_EXCL : SE_SHARED); 1791 1792 /* 1793 * Handle async getpage (faultahead) 1794 */ 1795 if (plarr == NULL) { 1796 ip->i_nextrio = pgoff; 1797 ud_getpage_ra(vp, pgoff, seg, pgaddr); 1798 pgoff += pgsize; 1799 pgaddr += pgsize; 1800 continue; 1801 } 1802 1803 /* 1804 * Check if we should initiate read ahead of next cluster. 1805 * We call page_exists only when we need to confirm that 1806 * we have the current page before we initiate the read ahead. 1807 */ 1808 nextrio = ip->i_nextrio; 1809 if (seqmode && 1810 pgoff + RD_CLUSTSZ(ip) >= nextrio && pgoff <= nextrio && 1811 nextrio < ip->i_size && page_exists(vp, pgoff)) 1812 ud_getpage_ra(vp, pgoff, seg, pgaddr); 1813 1814 if ((pp = page_lookup(vp, pgoff, se)) != NULL) { 1815 1816 /* 1817 * We found the page in the page cache. 1818 */ 1819 *pl++ = pp; 1820 pgoff += pgsize; 1821 pgaddr += pgsize; 1822 len -= pgsize; 1823 plsz -= pgsize; 1824 } else { 1825 1826 /* 1827 * We have to create the page, or read it from disk. 1828 */ 1829 if (error = ud_getpage_miss(vp, pgoff, len, 1830 seg, pgaddr, pl, plsz, rw, seqmode)) { 1831 goto error_out; 1832 } 1833 1834 while (*pl != NULL) { 1835 pl++; 1836 pgoff += pgsize; 1837 pgaddr += pgsize; 1838 len -= pgsize; 1839 plsz -= pgsize; 1840 } 1841 } 1842 } 1843 1844 /* 1845 * Return pages up to plsz if they are in the page cache. 1846 * We cannot return pages if there is a chance that they are 1847 * backed with a UDF hole and rw is S_WRITE or S_CREATE. 1848 */ 1849 if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) { 1850 1851 ASSERT((protp == NULL) || 1852 !(has_holes && (*protp & PROT_WRITE))); 1853 1854 eoff = pgoff + plsz; 1855 while (pgoff < eoff) { 1856 page_t *pp; 1857 1858 if ((pp = page_lookup_nowait(vp, pgoff, 1859 SE_SHARED)) == NULL) 1860 break; 1861 1862 *pl++ = pp; 1863 pgoff += pgsize; 1864 plsz -= pgsize; 1865 } 1866 } 1867 1868 if (plarr) 1869 *pl = NULL; /* Terminate page list */ 1870 ip->i_nextr = pgoff; 1871 1872 error_out: 1873 if (error && plarr) { 1874 /* 1875 * Release any pages we have locked. 1876 */ 1877 while (pl > &plarr[0]) 1878 page_unlock(*--pl); 1879 1880 plarr[0] = NULL; 1881 } 1882 1883 update_inode: 1884 #ifdef __lock_lint 1885 rw_exit(&ip->i_contents); 1886 #else 1887 if (dolock) { 1888 rw_exit(&ip->i_contents); 1889 } 1890 #endif 1891 1892 /* 1893 * If the inode is not already marked for IACC (in rwip() for read) 1894 * and the inode is not marked for no access time update (in rwip() 1895 * for write) then update the inode access time and mod time now. 1896 */ 1897 mutex_enter(&ip->i_tlock); 1898 if ((ip->i_flag & (IACC | INOACC)) == 0) { 1899 if ((rw != S_OTHER) && (ip->i_type != VDIR)) { 1900 ip->i_flag |= IACC; 1901 } 1902 if (rw == S_WRITE) { 1903 ip->i_flag |= IUPD; 1904 } 1905 ITIMES_NOLOCK(ip); 1906 } 1907 mutex_exit(&ip->i_tlock); 1908 1909 return (error); 1910 } 1911 1912 int32_t ud_delay = 1; 1913 1914 /* ARGSUSED */ 1915 static int32_t 1916 udf_putpage( 1917 struct vnode *vp, 1918 offset_t off, 1919 size_t len, 1920 int32_t flags, 1921 struct cred *cr, 1922 caller_context_t *ct) 1923 { 1924 struct ud_inode *ip; 1925 int32_t error = 0; 1926 1927 ud_printf("udf_putpage\n"); 1928 1929 ip = VTOI(vp); 1930 #ifdef __lock_lint 1931 rw_enter(&ip->i_contents, RW_WRITER); 1932 #endif 1933 1934 if (vp->v_count == 0) { 1935 cmn_err(CE_WARN, "ud_putpage : bad v_count"); 1936 error = EINVAL; 1937 goto out; 1938 } 1939 1940 if (vp->v_flag & VNOMAP) { 1941 error = ENOSYS; 1942 goto out; 1943 } 1944 1945 if (flags & B_ASYNC) { 1946 if (ud_delay && len && 1947 (flags & ~(B_ASYNC|B_DONTNEED|B_FREE)) == 0) { 1948 mutex_enter(&ip->i_tlock); 1949 1950 /* 1951 * If nobody stalled, start a new cluster. 1952 */ 1953 if (ip->i_delaylen == 0) { 1954 ip->i_delayoff = off; 1955 ip->i_delaylen = len; 1956 mutex_exit(&ip->i_tlock); 1957 goto out; 1958 } 1959 1960 /* 1961 * If we have a full cluster or they are not contig, 1962 * then push last cluster and start over. 1963 */ 1964 if (ip->i_delaylen >= WR_CLUSTSZ(ip) || 1965 ip->i_delayoff + ip->i_delaylen != off) { 1966 u_offset_t doff; 1967 size_t dlen; 1968 1969 doff = ip->i_delayoff; 1970 dlen = ip->i_delaylen; 1971 ip->i_delayoff = off; 1972 ip->i_delaylen = len; 1973 mutex_exit(&ip->i_tlock); 1974 error = ud_putpages(vp, doff, dlen, flags, cr); 1975 /* LMXXX - flags are new val, not old */ 1976 goto out; 1977 } 1978 1979 /* 1980 * There is something there, it's not full, and 1981 * it is contig. 1982 */ 1983 ip->i_delaylen += len; 1984 mutex_exit(&ip->i_tlock); 1985 goto out; 1986 } 1987 1988 /* 1989 * Must have weird flags or we are not clustering. 1990 */ 1991 } 1992 1993 error = ud_putpages(vp, off, len, flags, cr); 1994 1995 out: 1996 #ifdef __lock_lint 1997 rw_exit(&ip->i_contents); 1998 #endif 1999 return (error); 2000 } 2001 2002 /* ARGSUSED */ 2003 static int32_t 2004 udf_map( 2005 struct vnode *vp, 2006 offset_t off, 2007 struct as *as, 2008 caddr_t *addrp, 2009 size_t len, 2010 uint8_t prot, 2011 uint8_t maxprot, 2012 uint32_t flags, 2013 struct cred *cr, 2014 caller_context_t *ct) 2015 { 2016 struct segvn_crargs vn_a; 2017 int32_t error = 0; 2018 2019 ud_printf("udf_map\n"); 2020 2021 if (vp->v_flag & VNOMAP) { 2022 error = ENOSYS; 2023 goto end; 2024 } 2025 2026 if ((off < (offset_t)0) || 2027 ((off + len) < (offset_t)0)) { 2028 error = EINVAL; 2029 goto end; 2030 } 2031 2032 if (vp->v_type != VREG) { 2033 error = ENODEV; 2034 goto end; 2035 } 2036 2037 /* 2038 * If file is being locked, disallow mapping. 2039 */ 2040 if (vn_has_mandatory_locks(vp, VTOI(vp)->i_char)) { 2041 error = EAGAIN; 2042 goto end; 2043 } 2044 2045 as_rangelock(as); 2046 error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 2047 if (error != 0) { 2048 as_rangeunlock(as); 2049 goto end; 2050 } 2051 2052 vn_a.vp = vp; 2053 vn_a.offset = off; 2054 vn_a.type = flags & MAP_TYPE; 2055 vn_a.prot = prot; 2056 vn_a.maxprot = maxprot; 2057 vn_a.cred = cr; 2058 vn_a.amp = NULL; 2059 vn_a.flags = flags & ~MAP_TYPE; 2060 vn_a.szc = 0; 2061 vn_a.lgrp_mem_policy_flags = 0; 2062 2063 error = as_map(as, *addrp, len, segvn_create, (caddr_t)&vn_a); 2064 as_rangeunlock(as); 2065 2066 end: 2067 return (error); 2068 } 2069 2070 /* ARGSUSED */ 2071 static int32_t 2072 udf_addmap(struct vnode *vp, 2073 offset_t off, 2074 struct as *as, 2075 caddr_t addr, 2076 size_t len, 2077 uint8_t prot, 2078 uint8_t maxprot, 2079 uint32_t flags, 2080 struct cred *cr, 2081 caller_context_t *ct) 2082 { 2083 struct ud_inode *ip = VTOI(vp); 2084 2085 ud_printf("udf_addmap\n"); 2086 2087 if (vp->v_flag & VNOMAP) { 2088 return (ENOSYS); 2089 } 2090 2091 mutex_enter(&ip->i_tlock); 2092 ip->i_mapcnt += btopr(len); 2093 mutex_exit(&ip->i_tlock); 2094 2095 return (0); 2096 } 2097 2098 /* ARGSUSED */ 2099 static int32_t 2100 udf_delmap( 2101 struct vnode *vp, offset_t off, 2102 struct as *as, 2103 caddr_t addr, 2104 size_t len, 2105 uint32_t prot, 2106 uint32_t maxprot, 2107 uint32_t flags, 2108 struct cred *cr, 2109 caller_context_t *ct) 2110 { 2111 struct ud_inode *ip = VTOI(vp); 2112 2113 ud_printf("udf_delmap\n"); 2114 2115 if (vp->v_flag & VNOMAP) { 2116 return (ENOSYS); 2117 } 2118 2119 mutex_enter(&ip->i_tlock); 2120 ip->i_mapcnt -= btopr(len); /* Count released mappings */ 2121 ASSERT(ip->i_mapcnt >= 0); 2122 mutex_exit(&ip->i_tlock); 2123 2124 return (0); 2125 } 2126 2127 /* ARGSUSED */ 2128 static int32_t 2129 udf_l_pathconf( 2130 struct vnode *vp, 2131 int32_t cmd, 2132 ulong_t *valp, 2133 struct cred *cr, 2134 caller_context_t *ct) 2135 { 2136 int32_t error = 0; 2137 2138 ud_printf("udf_l_pathconf\n"); 2139 2140 if (cmd == _PC_FILESIZEBITS) { 2141 /* 2142 * udf supports 64 bits as file size 2143 * but there are several other restrictions 2144 * it only supports 32-bit block numbers and 2145 * daddr32_t is only and int32_t so taking these 2146 * into account we can stay just as where ufs is 2147 */ 2148 *valp = 41; 2149 } else if (cmd == _PC_TIMESTAMP_RESOLUTION) { 2150 /* nanosecond timestamp resolution */ 2151 *valp = 1L; 2152 } else { 2153 error = fs_pathconf(vp, cmd, valp, cr, ct); 2154 } 2155 2156 return (error); 2157 } 2158 2159 uint32_t ud_pageio_reads = 0, ud_pageio_writes = 0; 2160 #ifndef __lint 2161 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_reads)) 2162 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_writes)) 2163 #endif 2164 /* 2165 * Assumption is that there will not be a pageio request 2166 * to a enbedded file 2167 */ 2168 /* ARGSUSED */ 2169 static int32_t 2170 udf_pageio( 2171 struct vnode *vp, 2172 struct page *pp, 2173 u_offset_t io_off, 2174 size_t io_len, 2175 int32_t flags, 2176 struct cred *cr, 2177 caller_context_t *ct) 2178 { 2179 daddr_t bn; 2180 struct buf *bp; 2181 struct ud_inode *ip = VTOI(vp); 2182 int32_t dolock, error = 0, contig, multi_io; 2183 size_t done_len = 0, cur_len = 0; 2184 page_t *npp = NULL, *opp = NULL, *cpp = pp; 2185 2186 if (pp == NULL) { 2187 return (EINVAL); 2188 } 2189 2190 dolock = (rw_owner(&ip->i_contents) != curthread); 2191 2192 /* 2193 * We need a better check. Ideally, we would use another 2194 * vnodeops so that hlocked and forcibly unmounted file 2195 * systems would return EIO where appropriate and w/o the 2196 * need for these checks. 2197 */ 2198 if (ip->i_udf == NULL) { 2199 return (EIO); 2200 } 2201 2202 #ifdef __lock_lint 2203 rw_enter(&ip->i_contents, RW_READER); 2204 #else 2205 if (dolock) { 2206 rw_enter(&ip->i_contents, RW_READER); 2207 } 2208 #endif 2209 2210 /* 2211 * Break the io request into chunks, one for each contiguous 2212 * stretch of disk blocks in the target file. 2213 */ 2214 while (done_len < io_len) { 2215 ASSERT(cpp); 2216 bp = NULL; 2217 contig = 0; 2218 if (error = ud_bmap_read(ip, (u_offset_t)(io_off + done_len), 2219 &bn, &contig)) { 2220 break; 2221 } 2222 2223 if (bn == UDF_HOLE) { /* No holey swapfiles */ 2224 cmn_err(CE_WARN, "SWAP file has HOLES"); 2225 error = EINVAL; 2226 break; 2227 } 2228 2229 cur_len = MIN(io_len - done_len, contig); 2230 2231 /* 2232 * Check if more than one I/O is 2233 * required to complete the given 2234 * I/O operation 2235 */ 2236 if (ip->i_udf->udf_lbsize < PAGESIZE) { 2237 if (cur_len >= PAGESIZE) { 2238 multi_io = 0; 2239 cur_len &= PAGEMASK; 2240 } else { 2241 multi_io = 1; 2242 cur_len = MIN(io_len - done_len, PAGESIZE); 2243 } 2244 } 2245 page_list_break(&cpp, &npp, btop(cur_len)); 2246 2247 bp = pageio_setup(cpp, cur_len, ip->i_devvp, flags); 2248 ASSERT(bp != NULL); 2249 2250 bp->b_edev = ip->i_dev; 2251 bp->b_dev = cmpdev(ip->i_dev); 2252 bp->b_blkno = bn; 2253 bp->b_un.b_addr = (caddr_t)0; 2254 bp->b_file = vp; 2255 bp->b_offset = (offset_t)(io_off + done_len); 2256 2257 /* 2258 * ub.ub_pageios.value.ul++; 2259 */ 2260 if (multi_io == 0) { 2261 (void) bdev_strategy(bp); 2262 } else { 2263 error = ud_multi_strat(ip, cpp, bp, 2264 (u_offset_t)(io_off + done_len)); 2265 if (error != 0) { 2266 pageio_done(bp); 2267 break; 2268 } 2269 } 2270 if (flags & B_READ) { 2271 ud_pageio_reads++; 2272 } else { 2273 ud_pageio_writes++; 2274 } 2275 2276 /* 2277 * If the request is not B_ASYNC, wait for i/o to complete 2278 * and re-assemble the page list to return to the caller. 2279 * If it is B_ASYNC we leave the page list in pieces and 2280 * cleanup() will dispose of them. 2281 */ 2282 if ((flags & B_ASYNC) == 0) { 2283 error = biowait(bp); 2284 pageio_done(bp); 2285 if (error) { 2286 break; 2287 } 2288 page_list_concat(&opp, &cpp); 2289 } 2290 cpp = npp; 2291 npp = NULL; 2292 done_len += cur_len; 2293 } 2294 2295 ASSERT(error || (cpp == NULL && npp == NULL && done_len == io_len)); 2296 if (error) { 2297 if (flags & B_ASYNC) { 2298 /* Cleanup unprocessed parts of list */ 2299 page_list_concat(&cpp, &npp); 2300 if (flags & B_READ) { 2301 pvn_read_done(cpp, B_ERROR); 2302 } else { 2303 pvn_write_done(cpp, B_ERROR); 2304 } 2305 } else { 2306 /* Re-assemble list and let caller clean up */ 2307 page_list_concat(&opp, &cpp); 2308 page_list_concat(&opp, &npp); 2309 } 2310 } 2311 2312 #ifdef __lock_lint 2313 rw_exit(&ip->i_contents); 2314 #else 2315 if (dolock) { 2316 rw_exit(&ip->i_contents); 2317 } 2318 #endif 2319 return (error); 2320 } 2321 2322 2323 2324 2325 /* -------------------- local functions --------------------------- */ 2326 2327 2328 2329 int32_t 2330 ud_rdwri(enum uio_rw rw, int32_t ioflag, 2331 struct ud_inode *ip, caddr_t base, int32_t len, 2332 offset_t offset, enum uio_seg seg, int32_t *aresid, struct cred *cr) 2333 { 2334 int32_t error; 2335 struct uio auio; 2336 struct iovec aiov; 2337 2338 ud_printf("ud_rdwri\n"); 2339 2340 bzero((caddr_t)&auio, sizeof (uio_t)); 2341 bzero((caddr_t)&aiov, sizeof (iovec_t)); 2342 2343 aiov.iov_base = base; 2344 aiov.iov_len = len; 2345 auio.uio_iov = &aiov; 2346 auio.uio_iovcnt = 1; 2347 auio.uio_loffset = offset; 2348 auio.uio_segflg = (int16_t)seg; 2349 auio.uio_resid = len; 2350 2351 if (rw == UIO_WRITE) { 2352 auio.uio_fmode = FWRITE; 2353 auio.uio_extflg = UIO_COPY_DEFAULT; 2354 auio.uio_llimit = curproc->p_fsz_ctl; 2355 error = ud_wrip(ip, &auio, ioflag, cr); 2356 } else { 2357 auio.uio_fmode = FREAD; 2358 auio.uio_extflg = UIO_COPY_CACHED; 2359 auio.uio_llimit = MAXOFFSET_T; 2360 error = ud_rdip(ip, &auio, ioflag, cr); 2361 } 2362 2363 if (aresid) { 2364 *aresid = auio.uio_resid; 2365 } else if (auio.uio_resid) { 2366 error = EIO; 2367 } 2368 return (error); 2369 } 2370 2371 /* 2372 * Free behind hacks. The pager is busted. 2373 * XXX - need to pass the information down to writedone() in a flag like B_SEQ 2374 * or B_FREE_IF_TIGHT_ON_MEMORY. 2375 */ 2376 int32_t ud_freebehind = 1; 2377 int32_t ud_smallfile = 32 * 1024; 2378 2379 /* ARGSUSED */ 2380 int32_t 2381 ud_getpage_miss(struct vnode *vp, u_offset_t off, 2382 size_t len, struct seg *seg, caddr_t addr, page_t *pl[], 2383 size_t plsz, enum seg_rw rw, int32_t seq) 2384 { 2385 struct ud_inode *ip = VTOI(vp); 2386 int32_t err = 0; 2387 size_t io_len; 2388 u_offset_t io_off; 2389 u_offset_t pgoff; 2390 page_t *pp; 2391 2392 pl[0] = NULL; 2393 2394 /* 2395 * Figure out whether the page can be created, or must be 2396 * read from the disk 2397 */ 2398 if (rw == S_CREATE) { 2399 if ((pp = page_create_va(vp, off, 2400 PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 2401 cmn_err(CE_WARN, "ud_getpage_miss: page_create"); 2402 return (EINVAL); 2403 } 2404 io_len = PAGESIZE; 2405 } else { 2406 pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 2407 &io_len, off, PAGESIZE, 0); 2408 2409 /* 2410 * Some other thread has entered the page. 2411 * ud_getpage will retry page_lookup. 2412 */ 2413 if (pp == NULL) { 2414 return (0); 2415 } 2416 2417 /* 2418 * Fill the page with as much data as we can from the file. 2419 */ 2420 err = ud_page_fill(ip, pp, off, B_READ, &pgoff); 2421 if (err) { 2422 pvn_read_done(pp, B_ERROR); 2423 return (err); 2424 } 2425 2426 /* 2427 * XXX ??? ufs has io_len instead of pgoff below 2428 */ 2429 ip->i_nextrio = off + ((pgoff + PAGESIZE - 1) & PAGEMASK); 2430 2431 /* 2432 * If the file access is sequential, initiate read ahead 2433 * of the next cluster. 2434 */ 2435 if (seq && ip->i_nextrio < ip->i_size) { 2436 ud_getpage_ra(vp, off, seg, addr); 2437 } 2438 } 2439 2440 outmiss: 2441 pvn_plist_init(pp, pl, plsz, (offset_t)off, io_len, rw); 2442 return (err); 2443 } 2444 2445 /* ARGSUSED */ 2446 void 2447 ud_getpage_ra(struct vnode *vp, 2448 u_offset_t off, struct seg *seg, caddr_t addr) 2449 { 2450 page_t *pp; 2451 size_t io_len; 2452 struct ud_inode *ip = VTOI(vp); 2453 u_offset_t io_off = ip->i_nextrio, pgoff; 2454 caddr_t addr2 = addr + (io_off - off); 2455 daddr_t bn; 2456 int32_t contig = 0; 2457 2458 /* 2459 * Is this test needed? 2460 */ 2461 2462 if (addr2 >= seg->s_base + seg->s_size) { 2463 return; 2464 } 2465 2466 contig = 0; 2467 if (ud_bmap_read(ip, io_off, &bn, &contig) != 0 || bn == UDF_HOLE) { 2468 return; 2469 } 2470 2471 pp = pvn_read_kluster(vp, io_off, seg, addr2, 2472 &io_off, &io_len, io_off, PAGESIZE, 1); 2473 2474 /* 2475 * Some other thread has entered the page. 2476 * So no read head done here (ie we will have to and wait 2477 * for the read when needed). 2478 */ 2479 2480 if (pp == NULL) { 2481 return; 2482 } 2483 2484 (void) ud_page_fill(ip, pp, io_off, (B_READ|B_ASYNC), &pgoff); 2485 ip->i_nextrio = io_off + ((pgoff + PAGESIZE - 1) & PAGEMASK); 2486 } 2487 2488 int 2489 ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off, 2490 uint32_t bflgs, u_offset_t *pg_off) 2491 { 2492 daddr_t bn; 2493 struct buf *bp; 2494 caddr_t kaddr, caddr; 2495 int32_t error = 0, contig = 0, multi_io = 0; 2496 int32_t lbsize = ip->i_udf->udf_lbsize; 2497 int32_t lbmask = ip->i_udf->udf_lbmask; 2498 uint64_t isize; 2499 2500 isize = (ip->i_size + lbmask) & (~lbmask); 2501 if (ip->i_desc_type == ICB_FLAG_ONE_AD) { 2502 2503 /* 2504 * Embedded file read file_entry 2505 * from buffer cache and copy the required 2506 * portions 2507 */ 2508 bp = ud_bread(ip->i_dev, 2509 ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize); 2510 if ((bp->b_error == 0) && 2511 (bp->b_resid == 0)) { 2512 2513 caddr = bp->b_un.b_addr + ip->i_data_off; 2514 2515 /* 2516 * mapin to kvm 2517 */ 2518 kaddr = (caddr_t)ppmapin(pp, 2519 PROT_READ | PROT_WRITE, (caddr_t)-1); 2520 (void) kcopy(caddr, kaddr, ip->i_size); 2521 2522 /* 2523 * mapout of kvm 2524 */ 2525 ppmapout(kaddr); 2526 } 2527 brelse(bp); 2528 contig = ip->i_size; 2529 } else { 2530 2531 /* 2532 * Get the continuous size and block number 2533 * at offset "off" 2534 */ 2535 if (error = ud_bmap_read(ip, off, &bn, &contig)) 2536 goto out; 2537 contig = MIN(contig, PAGESIZE); 2538 contig = (contig + lbmask) & (~lbmask); 2539 2540 /* 2541 * Zero part of the page which we are not 2542 * going to read from the disk. 2543 */ 2544 2545 if (bn == UDF_HOLE) { 2546 2547 /* 2548 * This is a HOLE. Just zero out 2549 * the page 2550 */ 2551 if (((off + contig) == isize) || 2552 (contig == PAGESIZE)) { 2553 pagezero(pp->p_prev, 0, PAGESIZE); 2554 goto out; 2555 } 2556 } 2557 2558 if (contig < PAGESIZE) { 2559 uint64_t count; 2560 2561 count = isize - off; 2562 if (contig != count) { 2563 multi_io = 1; 2564 contig = (int32_t)(MIN(count, PAGESIZE)); 2565 } else { 2566 pagezero(pp->p_prev, contig, PAGESIZE - contig); 2567 } 2568 } 2569 2570 /* 2571 * Get a bp and initialize it 2572 */ 2573 bp = pageio_setup(pp, contig, ip->i_devvp, bflgs); 2574 ASSERT(bp != NULL); 2575 2576 bp->b_edev = ip->i_dev; 2577 bp->b_dev = cmpdev(ip->i_dev); 2578 bp->b_blkno = bn; 2579 bp->b_un.b_addr = 0; 2580 bp->b_file = ip->i_vnode; 2581 2582 /* 2583 * Start I/O 2584 */ 2585 if (multi_io == 0) { 2586 2587 /* 2588 * Single I/O is sufficient for this page 2589 */ 2590 (void) bdev_strategy(bp); 2591 } else { 2592 2593 /* 2594 * We need to do the I/O in 2595 * piece's 2596 */ 2597 error = ud_multi_strat(ip, pp, bp, off); 2598 if (error != 0) { 2599 goto out; 2600 } 2601 } 2602 if ((bflgs & B_ASYNC) == 0) { 2603 2604 /* 2605 * Wait for i/o to complete. 2606 */ 2607 2608 error = biowait(bp); 2609 pageio_done(bp); 2610 if (error) { 2611 goto out; 2612 } 2613 } 2614 } 2615 if ((off + contig) >= ip->i_size) { 2616 contig = ip->i_size - off; 2617 } 2618 2619 out: 2620 *pg_off = contig; 2621 return (error); 2622 } 2623 2624 int32_t 2625 ud_putpages(struct vnode *vp, offset_t off, 2626 size_t len, int32_t flags, struct cred *cr) 2627 { 2628 struct ud_inode *ip; 2629 page_t *pp; 2630 u_offset_t io_off; 2631 size_t io_len; 2632 u_offset_t eoff; 2633 int32_t err = 0; 2634 int32_t dolock; 2635 2636 ud_printf("ud_putpages\n"); 2637 2638 if (vp->v_count == 0) { 2639 cmn_err(CE_WARN, "ud_putpages: bad v_count"); 2640 return (EINVAL); 2641 } 2642 2643 ip = VTOI(vp); 2644 2645 /* 2646 * Acquire the readers/write inode lock before locking 2647 * any pages in this inode. 2648 * The inode lock is held during i/o. 2649 */ 2650 if (len == 0) { 2651 mutex_enter(&ip->i_tlock); 2652 ip->i_delayoff = ip->i_delaylen = 0; 2653 mutex_exit(&ip->i_tlock); 2654 } 2655 #ifdef __lock_lint 2656 rw_enter(&ip->i_contents, RW_READER); 2657 #else 2658 dolock = (rw_owner(&ip->i_contents) != curthread); 2659 if (dolock) { 2660 rw_enter(&ip->i_contents, RW_READER); 2661 } 2662 #endif 2663 2664 if (!vn_has_cached_data(vp)) { 2665 #ifdef __lock_lint 2666 rw_exit(&ip->i_contents); 2667 #else 2668 if (dolock) { 2669 rw_exit(&ip->i_contents); 2670 } 2671 #endif 2672 return (0); 2673 } 2674 2675 if (len == 0) { 2676 /* 2677 * Search the entire vp list for pages >= off. 2678 */ 2679 err = pvn_vplist_dirty(vp, (u_offset_t)off, ud_putapage, 2680 flags, cr); 2681 } else { 2682 /* 2683 * Loop over all offsets in the range looking for 2684 * pages to deal with. 2685 */ 2686 if ((eoff = blkroundup(ip->i_udf, ip->i_size)) != 0) { 2687 eoff = MIN(off + len, eoff); 2688 } else { 2689 eoff = off + len; 2690 } 2691 2692 for (io_off = off; io_off < eoff; io_off += io_len) { 2693 /* 2694 * If we are not invalidating, synchronously 2695 * freeing or writing pages, use the routine 2696 * page_lookup_nowait() to prevent reclaiming 2697 * them from the free list. 2698 */ 2699 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 2700 pp = page_lookup(vp, io_off, 2701 (flags & (B_INVAL | B_FREE)) ? 2702 SE_EXCL : SE_SHARED); 2703 } else { 2704 pp = page_lookup_nowait(vp, io_off, 2705 (flags & B_FREE) ? SE_EXCL : SE_SHARED); 2706 } 2707 2708 if (pp == NULL || pvn_getdirty(pp, flags) == 0) { 2709 io_len = PAGESIZE; 2710 } else { 2711 2712 err = ud_putapage(vp, pp, 2713 &io_off, &io_len, flags, cr); 2714 if (err != 0) { 2715 break; 2716 } 2717 /* 2718 * "io_off" and "io_len" are returned as 2719 * the range of pages we actually wrote. 2720 * This allows us to skip ahead more quickly 2721 * since several pages may've been dealt 2722 * with by this iteration of the loop. 2723 */ 2724 } 2725 } 2726 } 2727 if (err == 0 && off == 0 && (len == 0 || len >= ip->i_size)) { 2728 /* 2729 * We have just sync'ed back all the pages on 2730 * the inode, turn off the IMODTIME flag. 2731 */ 2732 mutex_enter(&ip->i_tlock); 2733 ip->i_flag &= ~IMODTIME; 2734 mutex_exit(&ip->i_tlock); 2735 } 2736 #ifdef __lock_lint 2737 rw_exit(&ip->i_contents); 2738 #else 2739 if (dolock) { 2740 rw_exit(&ip->i_contents); 2741 } 2742 #endif 2743 return (err); 2744 } 2745 2746 /* ARGSUSED */ 2747 int32_t 2748 ud_putapage(struct vnode *vp, 2749 page_t *pp, u_offset_t *offp, 2750 size_t *lenp, int32_t flags, struct cred *cr) 2751 { 2752 daddr_t bn; 2753 size_t io_len; 2754 struct ud_inode *ip; 2755 int32_t error = 0, contig, multi_io = 0; 2756 struct udf_vfs *udf_vfsp; 2757 u_offset_t off, io_off; 2758 caddr_t kaddr, caddr; 2759 struct buf *bp = NULL; 2760 int32_t lbmask; 2761 uint64_t isize; 2762 int32_t crc_len; 2763 struct file_entry *fe; 2764 2765 ud_printf("ud_putapage\n"); 2766 2767 ip = VTOI(vp); 2768 ASSERT(ip); 2769 ASSERT(RW_LOCK_HELD(&ip->i_contents)); 2770 lbmask = ip->i_udf->udf_lbmask; 2771 isize = (ip->i_size + lbmask) & (~lbmask); 2772 2773 udf_vfsp = ip->i_udf; 2774 ASSERT(udf_vfsp->udf_flags & UDF_FL_RW); 2775 2776 /* 2777 * If the modified time on the inode has not already been 2778 * set elsewhere (e.g. for write/setattr) we set the time now. 2779 * This gives us approximate modified times for mmap'ed files 2780 * which are modified via stores in the user address space. 2781 */ 2782 if (((ip->i_flag & IMODTIME) == 0) || (flags & B_FORCE)) { 2783 mutex_enter(&ip->i_tlock); 2784 ip->i_flag |= IUPD; 2785 ITIMES_NOLOCK(ip); 2786 mutex_exit(&ip->i_tlock); 2787 } 2788 2789 2790 /* 2791 * Align the request to a block boundry (for old file systems), 2792 * and go ask bmap() how contiguous things are for this file. 2793 */ 2794 off = pp->p_offset & ~(offset_t)lbmask; 2795 /* block align it */ 2796 2797 2798 if (ip->i_desc_type == ICB_FLAG_ONE_AD) { 2799 ASSERT(ip->i_size <= ip->i_max_emb); 2800 2801 pp = pvn_write_kluster(vp, pp, &io_off, 2802 &io_len, off, PAGESIZE, flags); 2803 if (io_len == 0) { 2804 io_len = PAGESIZE; 2805 } 2806 2807 bp = ud_bread(ip->i_dev, 2808 ip->i_icb_lbano << udf_vfsp->udf_l2d_shift, 2809 udf_vfsp->udf_lbsize); 2810 fe = (struct file_entry *)bp->b_un.b_addr; 2811 if ((bp->b_flags & B_ERROR) || 2812 (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY, 2813 ip->i_icb_block, 2814 1, udf_vfsp->udf_lbsize) != 0)) { 2815 if (pp != NULL) 2816 pvn_write_done(pp, B_ERROR | B_WRITE | flags); 2817 if (bp->b_flags & B_ERROR) { 2818 error = EIO; 2819 } else { 2820 error = EINVAL; 2821 } 2822 brelse(bp); 2823 return (error); 2824 } 2825 if ((bp->b_error == 0) && 2826 (bp->b_resid == 0)) { 2827 2828 caddr = bp->b_un.b_addr + ip->i_data_off; 2829 kaddr = (caddr_t)ppmapin(pp, 2830 PROT_READ | PROT_WRITE, (caddr_t)-1); 2831 (void) kcopy(kaddr, caddr, ip->i_size); 2832 ppmapout(kaddr); 2833 } 2834 crc_len = ((uint32_t)&((struct file_entry *)0)->fe_spec) + 2835 SWAP_32(fe->fe_len_ear); 2836 crc_len += ip->i_size; 2837 ud_make_tag(ip->i_udf, &fe->fe_tag, 2838 UD_FILE_ENTRY, ip->i_icb_block, crc_len); 2839 2840 bwrite(bp); 2841 2842 if (flags & B_ASYNC) { 2843 pvn_write_done(pp, flags); 2844 } 2845 contig = ip->i_size; 2846 } else { 2847 2848 if (error = ud_bmap_read(ip, off, &bn, &contig)) { 2849 goto out; 2850 } 2851 contig = MIN(contig, PAGESIZE); 2852 contig = (contig + lbmask) & (~lbmask); 2853 2854 if (contig < PAGESIZE) { 2855 uint64_t count; 2856 2857 count = isize - off; 2858 if (contig != count) { 2859 multi_io = 1; 2860 contig = (int32_t)(MIN(count, PAGESIZE)); 2861 } 2862 } 2863 2864 if ((off + contig) > isize) { 2865 contig = isize - off; 2866 } 2867 2868 if (contig > PAGESIZE) { 2869 if (contig & PAGEOFFSET) { 2870 contig &= PAGEMASK; 2871 } 2872 } 2873 2874 pp = pvn_write_kluster(vp, pp, &io_off, 2875 &io_len, off, contig, flags); 2876 if (io_len == 0) { 2877 io_len = PAGESIZE; 2878 } 2879 2880 bp = pageio_setup(pp, contig, ip->i_devvp, B_WRITE | flags); 2881 ASSERT(bp != NULL); 2882 2883 bp->b_edev = ip->i_dev; 2884 bp->b_dev = cmpdev(ip->i_dev); 2885 bp->b_blkno = bn; 2886 bp->b_un.b_addr = 0; 2887 bp->b_file = vp; 2888 bp->b_offset = (offset_t)off; 2889 2890 2891 /* 2892 * write throttle 2893 */ 2894 ASSERT(bp->b_iodone == NULL); 2895 bp->b_iodone = ud_iodone; 2896 mutex_enter(&ip->i_tlock); 2897 ip->i_writes += bp->b_bcount; 2898 mutex_exit(&ip->i_tlock); 2899 2900 if (multi_io == 0) { 2901 2902 (void) bdev_strategy(bp); 2903 } else { 2904 error = ud_multi_strat(ip, pp, bp, off); 2905 if (error != 0) { 2906 goto out; 2907 } 2908 } 2909 2910 if ((flags & B_ASYNC) == 0) { 2911 /* 2912 * Wait for i/o to complete. 2913 */ 2914 error = biowait(bp); 2915 pageio_done(bp); 2916 } 2917 } 2918 2919 if ((flags & B_ASYNC) == 0) { 2920 pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags); 2921 } 2922 2923 pp = NULL; 2924 2925 out: 2926 if (error != 0 && pp != NULL) { 2927 pvn_write_done(pp, B_ERROR | B_WRITE | flags); 2928 } 2929 2930 if (offp) { 2931 *offp = io_off; 2932 } 2933 if (lenp) { 2934 *lenp = io_len; 2935 } 2936 2937 return (error); 2938 } 2939 2940 2941 int32_t 2942 ud_iodone(struct buf *bp) 2943 { 2944 struct ud_inode *ip; 2945 2946 ASSERT((bp->b_pages->p_vnode != NULL) && !(bp->b_flags & B_READ)); 2947 2948 bp->b_iodone = NULL; 2949 2950 ip = VTOI(bp->b_pages->p_vnode); 2951 2952 mutex_enter(&ip->i_tlock); 2953 if (ip->i_writes >= ud_LW) { 2954 if ((ip->i_writes -= bp->b_bcount) <= ud_LW) { 2955 if (ud_WRITES) { 2956 cv_broadcast(&ip->i_wrcv); /* wake all up */ 2957 } 2958 } 2959 } else { 2960 ip->i_writes -= bp->b_bcount; 2961 } 2962 mutex_exit(&ip->i_tlock); 2963 iodone(bp); 2964 return (0); 2965 } 2966 2967 /* ARGSUSED3 */ 2968 int32_t 2969 ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr) 2970 { 2971 struct vnode *vp; 2972 struct udf_vfs *udf_vfsp; 2973 krw_t rwtype; 2974 caddr_t base; 2975 uint32_t flags; 2976 int32_t error, n, on, mapon, dofree; 2977 u_offset_t off; 2978 long oresid = uio->uio_resid; 2979 2980 ASSERT(RW_LOCK_HELD(&ip->i_contents)); 2981 if ((ip->i_type != VREG) && 2982 (ip->i_type != VDIR) && 2983 (ip->i_type != VLNK)) { 2984 return (EIO); 2985 } 2986 2987 if (uio->uio_loffset > MAXOFFSET_T) { 2988 return (0); 2989 } 2990 2991 if ((uio->uio_loffset < (offset_t)0) || 2992 ((uio->uio_loffset + uio->uio_resid) < 0)) { 2993 return (EINVAL); 2994 } 2995 if (uio->uio_resid == 0) { 2996 return (0); 2997 } 2998 2999 vp = ITOV(ip); 3000 udf_vfsp = ip->i_udf; 3001 mutex_enter(&ip->i_tlock); 3002 ip->i_flag |= IACC; 3003 mutex_exit(&ip->i_tlock); 3004 3005 rwtype = (rw_write_held(&ip->i_contents)?RW_WRITER:RW_READER); 3006 3007 do { 3008 offset_t diff; 3009 u_offset_t uoff = uio->uio_loffset; 3010 off = uoff & (offset_t)MAXBMASK; 3011 mapon = (int)(uoff & (offset_t)MAXBOFFSET); 3012 on = (int)blkoff(udf_vfsp, uoff); 3013 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid); 3014 3015 diff = ip->i_size - uoff; 3016 3017 if (diff <= (offset_t)0) { 3018 error = 0; 3019 goto out; 3020 } 3021 if (diff < (offset_t)n) { 3022 n = (int)diff; 3023 } 3024 dofree = ud_freebehind && 3025 ip->i_nextr == (off & PAGEMASK) && 3026 off > ud_smallfile; 3027 3028 #ifndef __lock_lint 3029 if (rwtype == RW_READER) { 3030 rw_exit(&ip->i_contents); 3031 } 3032 #endif 3033 3034 base = segmap_getmapflt(segkmap, vp, (off + mapon), 3035 (uint32_t)n, 1, S_READ); 3036 error = uiomove(base + mapon, (long)n, UIO_READ, uio); 3037 3038 flags = 0; 3039 if (!error) { 3040 /* 3041 * If read a whole block, or read to eof, 3042 * won't need this buffer again soon. 3043 */ 3044 if (n + on == MAXBSIZE && ud_freebehind && dofree && 3045 freemem < lotsfree + pages_before_pager) { 3046 flags = SM_FREE | SM_DONTNEED |SM_ASYNC; 3047 } 3048 /* 3049 * In POSIX SYNC (FSYNC and FDSYNC) read mode, 3050 * we want to make sure that the page which has 3051 * been read, is written on disk if it is dirty. 3052 * And corresponding indirect blocks should also 3053 * be flushed out. 3054 */ 3055 if ((ioflag & FRSYNC) && (ioflag & (FSYNC|FDSYNC))) { 3056 flags &= ~SM_ASYNC; 3057 flags |= SM_WRITE; 3058 } 3059 error = segmap_release(segkmap, base, flags); 3060 } else { 3061 (void) segmap_release(segkmap, base, flags); 3062 } 3063 3064 #ifndef __lock_lint 3065 if (rwtype == RW_READER) { 3066 rw_enter(&ip->i_contents, rwtype); 3067 } 3068 #endif 3069 } while (error == 0 && uio->uio_resid > 0 && n != 0); 3070 out: 3071 /* 3072 * Inode is updated according to this table if FRSYNC is set. 3073 * 3074 * FSYNC FDSYNC(posix.4) 3075 * -------------------------- 3076 * always IATTCHG|IBDWRITE 3077 */ 3078 if (ioflag & FRSYNC) { 3079 if ((ioflag & FSYNC) || 3080 ((ioflag & FDSYNC) && 3081 (ip->i_flag & (IATTCHG|IBDWRITE)))) { 3082 rw_exit(&ip->i_contents); 3083 rw_enter(&ip->i_contents, RW_WRITER); 3084 ud_iupdat(ip, 1); 3085 } 3086 } 3087 /* 3088 * If we've already done a partial read, terminate 3089 * the read but return no error. 3090 */ 3091 if (oresid != uio->uio_resid) { 3092 error = 0; 3093 } 3094 ITIMES(ip); 3095 3096 return (error); 3097 } 3098 3099 int32_t 3100 ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr) 3101 { 3102 caddr_t base; 3103 struct vnode *vp; 3104 struct udf_vfs *udf_vfsp; 3105 uint32_t flags; 3106 int32_t error = 0, iupdat_flag, n, on, mapon, i_size_changed = 0; 3107 int32_t pagecreate, newpage; 3108 uint64_t old_i_size; 3109 u_offset_t off; 3110 long start_resid = uio->uio_resid, premove_resid; 3111 rlim64_t limit = uio->uio_limit; 3112 3113 3114 ASSERT(RW_WRITE_HELD(&ip->i_contents)); 3115 if ((ip->i_type != VREG) && 3116 (ip->i_type != VDIR) && 3117 (ip->i_type != VLNK)) { 3118 return (EIO); 3119 } 3120 3121 if (uio->uio_loffset >= MAXOFFSET_T) { 3122 return (EFBIG); 3123 } 3124 /* 3125 * see udf_l_pathconf 3126 */ 3127 if (limit > (((uint64_t)1 << 40) - 1)) { 3128 limit = ((uint64_t)1 << 40) - 1; 3129 } 3130 if (uio->uio_loffset >= limit) { 3131 proc_t *p = ttoproc(curthread); 3132 3133 mutex_enter(&p->p_lock); 3134 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, 3135 p, RCA_UNSAFE_SIGINFO); 3136 mutex_exit(&p->p_lock); 3137 return (EFBIG); 3138 } 3139 if ((uio->uio_loffset < (offset_t)0) || 3140 ((uio->uio_loffset + uio->uio_resid) < 0)) { 3141 return (EINVAL); 3142 } 3143 if (uio->uio_resid == 0) { 3144 return (0); 3145 } 3146 3147 mutex_enter(&ip->i_tlock); 3148 ip->i_flag |= INOACC; 3149 3150 if (ioflag & (FSYNC | FDSYNC)) { 3151 ip->i_flag |= ISYNC; 3152 iupdat_flag = 1; 3153 } 3154 mutex_exit(&ip->i_tlock); 3155 3156 udf_vfsp = ip->i_udf; 3157 vp = ITOV(ip); 3158 3159 do { 3160 u_offset_t uoff = uio->uio_loffset; 3161 off = uoff & (offset_t)MAXBMASK; 3162 mapon = (int)(uoff & (offset_t)MAXBOFFSET); 3163 on = (int)blkoff(udf_vfsp, uoff); 3164 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid); 3165 3166 if (ip->i_type == VREG && uoff + n >= limit) { 3167 if (uoff >= limit) { 3168 error = EFBIG; 3169 goto out; 3170 } 3171 n = (int)(limit - (rlim64_t)uoff); 3172 } 3173 if (uoff + n > ip->i_size) { 3174 /* 3175 * We are extending the length of the file. 3176 * bmap is used so that we are sure that 3177 * if we need to allocate new blocks, that it 3178 * is done here before we up the file size. 3179 */ 3180 error = ud_bmap_write(ip, uoff, 3181 (int)(on + n), mapon == 0, cr); 3182 if (error) { 3183 break; 3184 } 3185 i_size_changed = 1; 3186 old_i_size = ip->i_size; 3187 ip->i_size = uoff + n; 3188 /* 3189 * If we are writing from the beginning of 3190 * the mapping, we can just create the 3191 * pages without having to read them. 3192 */ 3193 pagecreate = (mapon == 0); 3194 } else if (n == MAXBSIZE) { 3195 /* 3196 * Going to do a whole mappings worth, 3197 * so we can just create the pages w/o 3198 * having to read them in. But before 3199 * we do that, we need to make sure any 3200 * needed blocks are allocated first. 3201 */ 3202 error = ud_bmap_write(ip, uoff, 3203 (int)(on + n), 1, cr); 3204 if (error) { 3205 break; 3206 } 3207 pagecreate = 1; 3208 } else { 3209 pagecreate = 0; 3210 } 3211 3212 rw_exit(&ip->i_contents); 3213 3214 /* 3215 * Touch the page and fault it in if it is not in 3216 * core before segmap_getmapflt can lock it. This 3217 * is to avoid the deadlock if the buffer is mapped 3218 * to the same file through mmap which we want to 3219 * write to. 3220 */ 3221 uio_prefaultpages((long)n, uio); 3222 3223 base = segmap_getmapflt(segkmap, vp, (off + mapon), 3224 (uint32_t)n, !pagecreate, S_WRITE); 3225 3226 /* 3227 * segmap_pagecreate() returns 1 if it calls 3228 * page_create_va() to allocate any pages. 3229 */ 3230 newpage = 0; 3231 if (pagecreate) { 3232 newpage = segmap_pagecreate(segkmap, base, 3233 (size_t)n, 0); 3234 } 3235 3236 premove_resid = uio->uio_resid; 3237 error = uiomove(base + mapon, (long)n, UIO_WRITE, uio); 3238 3239 if (pagecreate && 3240 uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) { 3241 /* 3242 * We created pages w/o initializing them completely, 3243 * thus we need to zero the part that wasn't set up. 3244 * This happens on most EOF write cases and if 3245 * we had some sort of error during the uiomove. 3246 */ 3247 int nzero, nmoved; 3248 3249 nmoved = (int)(uio->uio_loffset - (off + mapon)); 3250 ASSERT(nmoved >= 0 && nmoved <= n); 3251 nzero = roundup(on + n, PAGESIZE) - nmoved; 3252 ASSERT(nzero > 0 && mapon + nmoved + nzero <= MAXBSIZE); 3253 (void) kzero(base + mapon + nmoved, (uint32_t)nzero); 3254 } 3255 3256 /* 3257 * Unlock the pages allocated by page_create_va() 3258 * in segmap_pagecreate() 3259 */ 3260 if (newpage) { 3261 segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE); 3262 } 3263 3264 if (error) { 3265 /* 3266 * If we failed on a write, we may have already 3267 * allocated file blocks as well as pages. It's 3268 * hard to undo the block allocation, but we must 3269 * be sure to invalidate any pages that may have 3270 * been allocated. 3271 */ 3272 (void) segmap_release(segkmap, base, SM_INVAL); 3273 } else { 3274 flags = 0; 3275 /* 3276 * Force write back for synchronous write cases. 3277 */ 3278 if ((ioflag & (FSYNC|FDSYNC)) || ip->i_type == VDIR) { 3279 /* 3280 * If the sticky bit is set but the 3281 * execute bit is not set, we do a 3282 * synchronous write back and free 3283 * the page when done. We set up swap 3284 * files to be handled this way to 3285 * prevent servers from keeping around 3286 * the client's swap pages too long. 3287 * XXX - there ought to be a better way. 3288 */ 3289 if (IS_SWAPVP(vp)) { 3290 flags = SM_WRITE | SM_FREE | 3291 SM_DONTNEED; 3292 iupdat_flag = 0; 3293 } else { 3294 flags = SM_WRITE; 3295 } 3296 } else if (((mapon + n) == MAXBSIZE) || 3297 IS_SWAPVP(vp)) { 3298 /* 3299 * Have written a whole block. 3300 * Start an asynchronous write and 3301 * mark the buffer to indicate that 3302 * it won't be needed again soon. 3303 */ 3304 flags = SM_WRITE |SM_ASYNC | SM_DONTNEED; 3305 } 3306 error = segmap_release(segkmap, base, flags); 3307 3308 /* 3309 * If the operation failed and is synchronous, 3310 * then we need to unwind what uiomove() last 3311 * did so we can potentially return an error to 3312 * the caller. If this write operation was 3313 * done in two pieces and the first succeeded, 3314 * then we won't return an error for the second 3315 * piece that failed. However, we only want to 3316 * return a resid value that reflects what was 3317 * really done. 3318 * 3319 * Failures for non-synchronous operations can 3320 * be ignored since the page subsystem will 3321 * retry the operation until it succeeds or the 3322 * file system is unmounted. 3323 */ 3324 if (error) { 3325 if ((ioflag & (FSYNC | FDSYNC)) || 3326 ip->i_type == VDIR) { 3327 uio->uio_resid = premove_resid; 3328 } else { 3329 error = 0; 3330 } 3331 } 3332 } 3333 3334 /* 3335 * Re-acquire contents lock. 3336 */ 3337 rw_enter(&ip->i_contents, RW_WRITER); 3338 /* 3339 * If the uiomove() failed or if a synchronous 3340 * page push failed, fix up i_size. 3341 */ 3342 if (error) { 3343 if (i_size_changed) { 3344 /* 3345 * The uiomove failed, and we 3346 * allocated blocks,so get rid 3347 * of them. 3348 */ 3349 (void) ud_itrunc(ip, old_i_size, 0, cr); 3350 } 3351 } else { 3352 /* 3353 * XXX - Can this be out of the loop? 3354 */ 3355 ip->i_flag |= IUPD | ICHG; 3356 if (i_size_changed) { 3357 ip->i_flag |= IATTCHG; 3358 } 3359 if ((ip->i_perm & (IEXEC | (IEXEC >> 5) | 3360 (IEXEC >> 10))) != 0 && 3361 (ip->i_char & (ISUID | ISGID)) != 0 && 3362 secpolicy_vnode_setid_retain(cr, 3363 (ip->i_char & ISUID) != 0 && ip->i_uid == 0) != 0) { 3364 /* 3365 * Clear Set-UID & Set-GID bits on 3366 * successful write if not privileged 3367 * and at least one of the execute bits 3368 * is set. If we always clear Set-GID, 3369 * mandatory file and record locking is 3370 * unuseable. 3371 */ 3372 ip->i_char &= ~(ISUID | ISGID); 3373 } 3374 } 3375 } while (error == 0 && uio->uio_resid > 0 && n != 0); 3376 3377 out: 3378 /* 3379 * Inode is updated according to this table - 3380 * 3381 * FSYNC FDSYNC(posix.4) 3382 * -------------------------- 3383 * always@ IATTCHG|IBDWRITE 3384 * 3385 * @ - If we are doing synchronous write the only time we should 3386 * not be sync'ing the ip here is if we have the stickyhack 3387 * activated, the file is marked with the sticky bit and 3388 * no exec bit, the file length has not been changed and 3389 * no new blocks have been allocated during this write. 3390 */ 3391 if ((ip->i_flag & ISYNC) != 0) { 3392 /* 3393 * we have eliminated nosync 3394 */ 3395 if ((ip->i_flag & (IATTCHG|IBDWRITE)) || 3396 ((ioflag & FSYNC) && iupdat_flag)) { 3397 ud_iupdat(ip, 1); 3398 } 3399 } 3400 3401 /* 3402 * If we've already done a partial-write, terminate 3403 * the write but return no error. 3404 */ 3405 if (start_resid != uio->uio_resid) { 3406 error = 0; 3407 } 3408 ip->i_flag &= ~(INOACC | ISYNC); 3409 ITIMES_NOLOCK(ip); 3410 3411 return (error); 3412 } 3413 3414 int32_t 3415 ud_multi_strat(struct ud_inode *ip, 3416 page_t *pp, struct buf *bp, u_offset_t start) 3417 { 3418 daddr_t bn; 3419 int32_t error = 0, io_count, contig, alloc_sz, i; 3420 uint32_t io_off; 3421 mio_master_t *mm = NULL; 3422 mio_slave_t *ms = NULL; 3423 struct buf *rbp; 3424 3425 ASSERT(!(start & PAGEOFFSET)); 3426 3427 /* 3428 * Figure out how many buffers to allocate 3429 */ 3430 io_count = 0; 3431 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) { 3432 contig = 0; 3433 if (error = ud_bmap_read(ip, (u_offset_t)(start + io_off), 3434 &bn, &contig)) { 3435 goto end; 3436 } 3437 if (contig == 0) { 3438 goto end; 3439 } 3440 contig = MIN(contig, PAGESIZE - io_off); 3441 if (bn != UDF_HOLE) { 3442 io_count ++; 3443 } else { 3444 /* 3445 * HOLE 3446 */ 3447 if (bp->b_flags & B_READ) { 3448 3449 /* 3450 * This is a hole and is read 3451 * it should be filled with 0's 3452 */ 3453 pagezero(pp, io_off, contig); 3454 } 3455 } 3456 } 3457 3458 3459 if (io_count != 0) { 3460 3461 /* 3462 * Allocate memory for all the 3463 * required number of buffers 3464 */ 3465 alloc_sz = sizeof (mio_master_t) + 3466 (sizeof (mio_slave_t) * io_count); 3467 mm = (mio_master_t *)kmem_zalloc(alloc_sz, KM_SLEEP); 3468 if (mm == NULL) { 3469 error = ENOMEM; 3470 goto end; 3471 } 3472 3473 /* 3474 * initialize master 3475 */ 3476 mutex_init(&mm->mm_mutex, NULL, MUTEX_DEFAULT, NULL); 3477 mm->mm_size = alloc_sz; 3478 mm->mm_bp = bp; 3479 mm->mm_resid = 0; 3480 mm->mm_error = 0; 3481 mm->mm_index = master_index++; 3482 3483 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t)); 3484 3485 /* 3486 * Initialize buffers 3487 */ 3488 io_count = 0; 3489 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) { 3490 contig = 0; 3491 if (error = ud_bmap_read(ip, 3492 (u_offset_t)(start + io_off), 3493 &bn, &contig)) { 3494 goto end; 3495 } 3496 ASSERT(contig); 3497 if ((io_off + contig) > bp->b_bcount) { 3498 contig = bp->b_bcount - io_off; 3499 } 3500 if (bn != UDF_HOLE) { 3501 /* 3502 * Clone the buffer 3503 * and prepare to start I/O 3504 */ 3505 ms->ms_ptr = mm; 3506 bioinit(&ms->ms_buf); 3507 rbp = bioclone(bp, io_off, (size_t)contig, 3508 bp->b_edev, bn, ud_slave_done, 3509 &ms->ms_buf, KM_NOSLEEP); 3510 ASSERT(rbp == &ms->ms_buf); 3511 mm->mm_resid += contig; 3512 io_count++; 3513 ms ++; 3514 } 3515 } 3516 3517 /* 3518 * Start I/O's 3519 */ 3520 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t)); 3521 for (i = 0; i < io_count; i++) { 3522 (void) bdev_strategy(&ms->ms_buf); 3523 ms ++; 3524 } 3525 } 3526 3527 end: 3528 if (error != 0) { 3529 bp->b_flags |= B_ERROR; 3530 bp->b_error = error; 3531 if (mm != NULL) { 3532 mutex_destroy(&mm->mm_mutex); 3533 kmem_free(mm, mm->mm_size); 3534 } 3535 } 3536 return (error); 3537 } 3538 3539 int32_t 3540 ud_slave_done(struct buf *bp) 3541 { 3542 mio_master_t *mm; 3543 int32_t resid; 3544 3545 ASSERT(SEMA_HELD(&bp->b_sem)); 3546 ASSERT((bp->b_flags & B_DONE) == 0); 3547 3548 mm = ((mio_slave_t *)bp)->ms_ptr; 3549 3550 /* 3551 * Propagate error and byte count info from slave struct to 3552 * the master struct 3553 */ 3554 mutex_enter(&mm->mm_mutex); 3555 if (bp->b_flags & B_ERROR) { 3556 3557 /* 3558 * If multiple slave buffers get 3559 * error we forget the old errors 3560 * this is ok because we any way 3561 * cannot return multiple errors 3562 */ 3563 mm->mm_error = bp->b_error; 3564 } 3565 mm->mm_resid -= bp->b_bcount; 3566 resid = mm->mm_resid; 3567 mutex_exit(&mm->mm_mutex); 3568 3569 /* 3570 * free up the resources allocated to cloned buffers. 3571 */ 3572 bp_mapout(bp); 3573 biofini(bp); 3574 3575 if (resid == 0) { 3576 3577 /* 3578 * This is the last I/O operation 3579 * clean up and return the original buffer 3580 */ 3581 if (mm->mm_error) { 3582 mm->mm_bp->b_flags |= B_ERROR; 3583 mm->mm_bp->b_error = mm->mm_error; 3584 } 3585 biodone(mm->mm_bp); 3586 mutex_destroy(&mm->mm_mutex); 3587 kmem_free(mm, mm->mm_size); 3588 } 3589 return (0); 3590 } 3591