1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/t_lock.h> 30 #include <sys/param.h> 31 #include <sys/time.h> 32 #include <sys/systm.h> 33 #include <sys/sysmacros.h> 34 #include <sys/resource.h> 35 #include <sys/signal.h> 36 #include <sys/cred.h> 37 #include <sys/user.h> 38 #include <sys/buf.h> 39 #include <sys/vfs.h> 40 #include <sys/vfs_opreg.h> 41 #include <sys/stat.h> 42 #include <sys/vnode.h> 43 #include <sys/mode.h> 44 #include <sys/proc.h> 45 #include <sys/disp.h> 46 #include <sys/file.h> 47 #include <sys/fcntl.h> 48 #include <sys/flock.h> 49 #include <sys/kmem.h> 50 #include <sys/uio.h> 51 #include <sys/dnlc.h> 52 #include <sys/conf.h> 53 #include <sys/errno.h> 54 #include <sys/mman.h> 55 #include <sys/fbuf.h> 56 #include <sys/pathname.h> 57 #include <sys/debug.h> 58 #include <sys/vmsystm.h> 59 #include <sys/cmn_err.h> 60 #include <sys/dirent.h> 61 #include <sys/errno.h> 62 #include <sys/modctl.h> 63 #include <sys/statvfs.h> 64 #include <sys/mount.h> 65 #include <sys/sunddi.h> 66 #include <sys/bootconf.h> 67 #include <sys/policy.h> 68 69 #include <vm/hat.h> 70 #include <vm/page.h> 71 #include <vm/pvn.h> 72 #include <vm/as.h> 73 #include <vm/seg.h> 74 #include <vm/seg_map.h> 75 #include <vm/seg_kmem.h> 76 #include <vm/seg_vn.h> 77 #include <vm/rm.h> 78 #include <vm/page.h> 79 #include <sys/swap.h> 80 81 #include <fs/fs_subr.h> 82 83 #include <sys/fs/udf_volume.h> 84 #include <sys/fs/udf_inode.h> 85 86 static int32_t udf_open(struct vnode **, 87 int32_t, struct cred *, caller_context_t *); 88 static int32_t udf_close(struct vnode *, 89 int32_t, int32_t, offset_t, struct cred *, caller_context_t *); 90 static int32_t udf_read(struct vnode *, 91 struct uio *, int32_t, struct cred *, caller_context_t *); 92 static int32_t udf_write(struct vnode *, 93 struct uio *, int32_t, struct cred *, caller_context_t *); 94 static int32_t udf_ioctl(struct vnode *, 95 int32_t, intptr_t, int32_t, struct cred *, int32_t *, 96 caller_context_t *); 97 static int32_t udf_getattr(struct vnode *, 98 struct vattr *, int32_t, struct cred *, caller_context_t *); 99 static int32_t udf_setattr(struct vnode *, 100 struct vattr *, int32_t, struct cred *, caller_context_t *); 101 static int32_t udf_access(struct vnode *, 102 int32_t, int32_t, struct cred *, caller_context_t *); 103 static int32_t udf_lookup(struct vnode *, 104 char *, struct vnode **, struct pathname *, 105 int32_t, struct vnode *, struct cred *, 106 caller_context_t *, int *, pathname_t *); 107 static int32_t udf_create(struct vnode *, 108 char *, struct vattr *, enum vcexcl, 109 int32_t, struct vnode **, struct cred *, int32_t, 110 caller_context_t *, vsecattr_t *); 111 static int32_t udf_remove(struct vnode *, 112 char *, struct cred *, caller_context_t *, int); 113 static int32_t udf_link(struct vnode *, 114 struct vnode *, char *, struct cred *, caller_context_t *, int); 115 static int32_t udf_rename(struct vnode *, 116 char *, struct vnode *, char *, struct cred *, caller_context_t *, int); 117 static int32_t udf_mkdir(struct vnode *, 118 char *, struct vattr *, struct vnode **, struct cred *, 119 caller_context_t *, int, vsecattr_t *); 120 static int32_t udf_rmdir(struct vnode *, 121 char *, struct vnode *, struct cred *, caller_context_t *, int); 122 static int32_t udf_readdir(struct vnode *, 123 struct uio *, struct cred *, int32_t *, caller_context_t *, int); 124 static int32_t udf_symlink(struct vnode *, 125 char *, struct vattr *, char *, struct cred *, caller_context_t *, int); 126 static int32_t udf_readlink(struct vnode *, 127 struct uio *, struct cred *, caller_context_t *); 128 static int32_t udf_fsync(struct vnode *, 129 int32_t, struct cred *, caller_context_t *); 130 static void udf_inactive(struct vnode *, 131 struct cred *, caller_context_t *); 132 static int32_t udf_fid(struct vnode *, struct fid *, caller_context_t *); 133 static int udf_rwlock(struct vnode *, int32_t, caller_context_t *); 134 static void udf_rwunlock(struct vnode *, int32_t, caller_context_t *); 135 static int32_t udf_seek(struct vnode *, offset_t, offset_t *, 136 caller_context_t *); 137 static int32_t udf_frlock(struct vnode *, int32_t, 138 struct flock64 *, int32_t, offset_t, struct flk_callback *, cred_t *, 139 caller_context_t *); 140 static int32_t udf_space(struct vnode *, int32_t, 141 struct flock64 *, int32_t, offset_t, cred_t *, caller_context_t *); 142 static int32_t udf_getpage(struct vnode *, offset_t, 143 size_t, uint32_t *, struct page **, size_t, 144 struct seg *, caddr_t, enum seg_rw, struct cred *, caller_context_t *); 145 static int32_t udf_putpage(struct vnode *, offset_t, 146 size_t, int32_t, struct cred *, caller_context_t *); 147 static int32_t udf_map(struct vnode *, offset_t, struct as *, 148 caddr_t *, size_t, uint8_t, uint8_t, uint32_t, struct cred *, 149 caller_context_t *); 150 static int32_t udf_addmap(struct vnode *, offset_t, struct as *, 151 caddr_t, size_t, uint8_t, uint8_t, uint32_t, struct cred *, 152 caller_context_t *); 153 static int32_t udf_delmap(struct vnode *, offset_t, struct as *, 154 caddr_t, size_t, uint32_t, uint32_t, uint32_t, struct cred *, 155 caller_context_t *); 156 static int32_t udf_l_pathconf(struct vnode *, int32_t, 157 ulong_t *, struct cred *, caller_context_t *); 158 static int32_t udf_pageio(struct vnode *, struct page *, 159 u_offset_t, size_t, int32_t, struct cred *, caller_context_t *); 160 161 int32_t ud_getpage_miss(struct vnode *, u_offset_t, 162 size_t, struct seg *, caddr_t, page_t *pl[], 163 size_t, enum seg_rw, int32_t); 164 void ud_getpage_ra(struct vnode *, u_offset_t, struct seg *, caddr_t); 165 int32_t ud_putpages(struct vnode *, offset_t, size_t, int32_t, struct cred *); 166 int32_t ud_page_fill(struct ud_inode *, page_t *, 167 u_offset_t, uint32_t, u_offset_t *); 168 int32_t ud_iodone(struct buf *); 169 int32_t ud_rdip(struct ud_inode *, struct uio *, int32_t, cred_t *); 170 int32_t ud_wrip(struct ud_inode *, struct uio *, int32_t, cred_t *); 171 int32_t ud_multi_strat(struct ud_inode *, page_t *, struct buf *, u_offset_t); 172 int32_t ud_slave_done(struct buf *); 173 174 /* 175 * Structures to control multiple IO operations to get or put pages 176 * that are backed by discontiguous blocks. The master struct is 177 * a dummy that holds the original bp from pageio_setup. The 178 * slave struct holds the working bp's to do the actual IO. Once 179 * all the slave IOs complete. The master is processed as if a single 180 * IO op has completed. 181 */ 182 uint32_t master_index = 0; 183 typedef struct mio_master { 184 kmutex_t mm_mutex; /* protect the fields below */ 185 int32_t mm_size; 186 buf_t *mm_bp; /* original bp */ 187 int32_t mm_resid; /* bytes remaining to transfer */ 188 int32_t mm_error; /* accumulated error from slaves */ 189 int32_t mm_index; /* XXX debugging */ 190 } mio_master_t; 191 192 typedef struct mio_slave { 193 buf_t ms_buf; /* working buffer for this IO chunk */ 194 mio_master_t *ms_ptr; /* pointer to master */ 195 } mio_slave_t; 196 197 struct vnodeops *udf_vnodeops; 198 199 const fs_operation_def_t udf_vnodeops_template[] = { 200 VOPNAME_OPEN, { .vop_open = udf_open }, 201 VOPNAME_CLOSE, { .vop_close = udf_close }, 202 VOPNAME_READ, { .vop_read = udf_read }, 203 VOPNAME_WRITE, { .vop_write = udf_write }, 204 VOPNAME_IOCTL, { .vop_ioctl = udf_ioctl }, 205 VOPNAME_GETATTR, { .vop_getattr = udf_getattr }, 206 VOPNAME_SETATTR, { .vop_setattr = udf_setattr }, 207 VOPNAME_ACCESS, { .vop_access = udf_access }, 208 VOPNAME_LOOKUP, { .vop_lookup = udf_lookup }, 209 VOPNAME_CREATE, { .vop_create = udf_create }, 210 VOPNAME_REMOVE, { .vop_remove = udf_remove }, 211 VOPNAME_LINK, { .vop_link = udf_link }, 212 VOPNAME_RENAME, { .vop_rename = udf_rename }, 213 VOPNAME_MKDIR, { .vop_mkdir = udf_mkdir }, 214 VOPNAME_RMDIR, { .vop_rmdir = udf_rmdir }, 215 VOPNAME_READDIR, { .vop_readdir = udf_readdir }, 216 VOPNAME_SYMLINK, { .vop_symlink = udf_symlink }, 217 VOPNAME_READLINK, { .vop_readlink = udf_readlink }, 218 VOPNAME_FSYNC, { .vop_fsync = udf_fsync }, 219 VOPNAME_INACTIVE, { .vop_inactive = udf_inactive }, 220 VOPNAME_FID, { .vop_fid = udf_fid }, 221 VOPNAME_RWLOCK, { .vop_rwlock = udf_rwlock }, 222 VOPNAME_RWUNLOCK, { .vop_rwunlock = udf_rwunlock }, 223 VOPNAME_SEEK, { .vop_seek = udf_seek }, 224 VOPNAME_FRLOCK, { .vop_frlock = udf_frlock }, 225 VOPNAME_SPACE, { .vop_space = udf_space }, 226 VOPNAME_GETPAGE, { .vop_getpage = udf_getpage }, 227 VOPNAME_PUTPAGE, { .vop_putpage = udf_putpage }, 228 VOPNAME_MAP, { .vop_map = udf_map }, 229 VOPNAME_ADDMAP, { .vop_addmap = udf_addmap }, 230 VOPNAME_DELMAP, { .vop_delmap = udf_delmap }, 231 VOPNAME_PATHCONF, { .vop_pathconf = udf_l_pathconf }, 232 VOPNAME_PAGEIO, { .vop_pageio = udf_pageio }, 233 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 234 NULL, NULL 235 }; 236 237 /* ARGSUSED */ 238 static int32_t 239 udf_open( 240 struct vnode **vpp, 241 int32_t flag, 242 struct cred *cr, 243 caller_context_t *ct) 244 { 245 ud_printf("udf_open\n"); 246 247 return (0); 248 } 249 250 /* ARGSUSED */ 251 static int32_t 252 udf_close( 253 struct vnode *vp, 254 int32_t flag, 255 int32_t count, 256 offset_t offset, 257 struct cred *cr, 258 caller_context_t *ct) 259 { 260 struct ud_inode *ip = VTOI(vp); 261 262 ud_printf("udf_close\n"); 263 264 ITIMES(ip); 265 266 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 267 cleanshares(vp, ttoproc(curthread)->p_pid); 268 269 /* 270 * Push partially filled cluster at last close. 271 * ``last close'' is approximated because the dnlc 272 * may have a hold on the vnode. 273 */ 274 if (vp->v_count <= 2 && vp->v_type != VBAD) { 275 struct ud_inode *ip = VTOI(vp); 276 if (ip->i_delaylen) { 277 (void) ud_putpages(vp, ip->i_delayoff, ip->i_delaylen, 278 B_ASYNC | B_FREE, cr); 279 ip->i_delaylen = 0; 280 } 281 } 282 283 return (0); 284 } 285 286 /* ARGSUSED */ 287 static int32_t 288 udf_read( 289 struct vnode *vp, 290 struct uio *uiop, 291 int32_t ioflag, 292 struct cred *cr, 293 caller_context_t *ct) 294 { 295 struct ud_inode *ip = VTOI(vp); 296 int32_t error; 297 298 ud_printf("udf_read\n"); 299 300 #ifdef __lock_lint 301 rw_enter(&ip->i_rwlock, RW_READER); 302 #endif 303 304 ASSERT(RW_READ_HELD(&ip->i_rwlock)); 305 306 if (MANDLOCK(vp, ip->i_char)) { 307 /* 308 * udf_getattr ends up being called by chklock 309 */ 310 error = chklock(vp, FREAD, uiop->uio_loffset, 311 uiop->uio_resid, uiop->uio_fmode, ct); 312 if (error) { 313 goto end; 314 } 315 } 316 317 rw_enter(&ip->i_contents, RW_READER); 318 error = ud_rdip(ip, uiop, ioflag, cr); 319 rw_exit(&ip->i_contents); 320 321 end: 322 #ifdef __lock_lint 323 rw_exit(&ip->i_rwlock); 324 #endif 325 326 return (error); 327 } 328 329 330 int32_t ud_WRITES = 1; 331 int32_t ud_HW = 96 * 1024; 332 int32_t ud_LW = 64 * 1024; 333 int32_t ud_throttles = 0; 334 335 /* ARGSUSED */ 336 static int32_t 337 udf_write( 338 struct vnode *vp, 339 struct uio *uiop, 340 int32_t ioflag, 341 struct cred *cr, 342 caller_context_t *ct) 343 { 344 struct ud_inode *ip = VTOI(vp); 345 int32_t error = 0; 346 347 ud_printf("udf_write\n"); 348 349 #ifdef __lock_lint 350 rw_enter(&ip->i_rwlock, RW_WRITER); 351 #endif 352 353 ASSERT(RW_WRITE_HELD(&ip->i_rwlock)); 354 355 if (MANDLOCK(vp, ip->i_char)) { 356 /* 357 * ud_getattr ends up being called by chklock 358 */ 359 error = chklock(vp, FWRITE, uiop->uio_loffset, 360 uiop->uio_resid, uiop->uio_fmode, ct); 361 if (error) { 362 goto end; 363 } 364 } 365 /* 366 * Throttle writes. 367 */ 368 mutex_enter(&ip->i_tlock); 369 if (ud_WRITES && (ip->i_writes > ud_HW)) { 370 while (ip->i_writes > ud_HW) { 371 ud_throttles++; 372 cv_wait(&ip->i_wrcv, &ip->i_tlock); 373 } 374 } 375 mutex_exit(&ip->i_tlock); 376 377 /* 378 * Write to the file 379 */ 380 rw_enter(&ip->i_contents, RW_WRITER); 381 if ((ioflag & FAPPEND) != 0 && (ip->i_type == VREG)) { 382 /* 383 * In append mode start at end of file. 384 */ 385 uiop->uio_loffset = ip->i_size; 386 } 387 error = ud_wrip(ip, uiop, ioflag, cr); 388 rw_exit(&ip->i_contents); 389 390 end: 391 #ifdef __lock_lint 392 rw_exit(&ip->i_rwlock); 393 #endif 394 395 return (error); 396 } 397 398 /* ARGSUSED */ 399 static int32_t 400 udf_ioctl( 401 struct vnode *vp, 402 int32_t cmd, 403 intptr_t arg, 404 int32_t flag, 405 struct cred *cr, 406 int32_t *rvalp, 407 caller_context_t *ct) 408 { 409 return (ENOTTY); 410 } 411 412 /* ARGSUSED */ 413 static int32_t 414 udf_getattr( 415 struct vnode *vp, 416 struct vattr *vap, 417 int32_t flags, 418 struct cred *cr, 419 caller_context_t *ct) 420 { 421 struct ud_inode *ip = VTOI(vp); 422 423 ud_printf("udf_getattr\n"); 424 425 if (vap->va_mask == AT_SIZE) { 426 /* 427 * for performance, if only the size is requested don't bother 428 * with anything else. 429 */ 430 vap->va_size = ip->i_size; 431 return (0); 432 } 433 434 rw_enter(&ip->i_contents, RW_READER); 435 436 vap->va_type = vp->v_type; 437 vap->va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char; 438 439 vap->va_uid = ip->i_uid; 440 vap->va_gid = ip->i_gid; 441 vap->va_fsid = ip->i_dev; 442 vap->va_nodeid = ip->i_icb_lbano; 443 vap->va_nlink = ip->i_nlink; 444 vap->va_size = ip->i_size; 445 vap->va_seq = ip->i_seq; 446 if (vp->v_type == VCHR || vp->v_type == VBLK) { 447 vap->va_rdev = ip->i_rdev; 448 } else { 449 vap->va_rdev = 0; 450 } 451 452 mutex_enter(&ip->i_tlock); 453 ITIMES_NOLOCK(ip); /* mark correct time in inode */ 454 vap->va_atime.tv_sec = (time_t)ip->i_atime.tv_sec; 455 vap->va_atime.tv_nsec = ip->i_atime.tv_nsec; 456 vap->va_mtime.tv_sec = (time_t)ip->i_mtime.tv_sec; 457 vap->va_mtime.tv_nsec = ip->i_mtime.tv_nsec; 458 vap->va_ctime.tv_sec = (time_t)ip->i_ctime.tv_sec; 459 vap->va_ctime.tv_nsec = ip->i_ctime.tv_nsec; 460 mutex_exit(&ip->i_tlock); 461 462 switch (ip->i_type) { 463 case VBLK: 464 vap->va_blksize = MAXBSIZE; 465 break; 466 case VCHR: 467 vap->va_blksize = MAXBSIZE; 468 break; 469 default: 470 vap->va_blksize = ip->i_udf->udf_lbsize; 471 break; 472 } 473 vap->va_nblocks = ip->i_lbr << ip->i_udf->udf_l2d_shift; 474 475 rw_exit(&ip->i_contents); 476 477 return (0); 478 } 479 480 static int 481 ud_iaccess_vmode(void *ip, int mode, struct cred *cr) 482 { 483 return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr)); 484 } 485 486 /*ARGSUSED4*/ 487 static int32_t 488 udf_setattr( 489 struct vnode *vp, 490 struct vattr *vap, 491 int32_t flags, 492 struct cred *cr, 493 caller_context_t *ct) 494 { 495 int32_t error = 0; 496 uint32_t mask = vap->va_mask; 497 struct ud_inode *ip; 498 timestruc_t now; 499 struct vattr ovap; 500 501 ud_printf("udf_setattr\n"); 502 503 ip = VTOI(vp); 504 505 /* 506 * not updates allowed to 4096 files 507 */ 508 if (ip->i_astrat == STRAT_TYPE4096) { 509 return (EINVAL); 510 } 511 512 /* 513 * Cannot set these attributes 514 */ 515 if (mask & AT_NOSET) { 516 return (EINVAL); 517 } 518 519 rw_enter(&ip->i_rwlock, RW_WRITER); 520 rw_enter(&ip->i_contents, RW_WRITER); 521 522 ovap.va_uid = ip->i_uid; 523 ovap.va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char; 524 error = secpolicy_vnode_setattr(cr, vp, vap, &ovap, flags, 525 ud_iaccess_vmode, ip); 526 if (error) 527 goto update_inode; 528 529 mask = vap->va_mask; 530 /* 531 * Change file access modes. 532 */ 533 if (mask & AT_MODE) { 534 ip->i_perm = VA2UD_PERM(vap->va_mode); 535 ip->i_char = vap->va_mode & (VSUID | VSGID | VSVTX); 536 mutex_enter(&ip->i_tlock); 537 ip->i_flag |= ICHG; 538 mutex_exit(&ip->i_tlock); 539 } 540 if (mask & (AT_UID|AT_GID)) { 541 if (mask & AT_UID) { 542 ip->i_uid = vap->va_uid; 543 } 544 if (mask & AT_GID) { 545 ip->i_gid = vap->va_gid; 546 } 547 mutex_enter(&ip->i_tlock); 548 ip->i_flag |= ICHG; 549 mutex_exit(&ip->i_tlock); 550 } 551 /* 552 * Truncate file. Must have write permission and not be a directory. 553 */ 554 if (mask & AT_SIZE) { 555 if (vp->v_type == VDIR) { 556 error = EISDIR; 557 goto update_inode; 558 } 559 if (error = ud_iaccess(ip, IWRITE, cr)) { 560 goto update_inode; 561 } 562 if (vap->va_size > MAXOFFSET_T) { 563 error = EFBIG; 564 goto update_inode; 565 } 566 if (error = ud_itrunc(ip, vap->va_size, 0, cr)) { 567 goto update_inode; 568 } 569 } 570 /* 571 * Change file access or modified times. 572 */ 573 if (mask & (AT_ATIME|AT_MTIME)) { 574 mutex_enter(&ip->i_tlock); 575 if (mask & AT_ATIME) { 576 ip->i_atime.tv_sec = vap->va_atime.tv_sec; 577 ip->i_atime.tv_nsec = vap->va_atime.tv_nsec; 578 ip->i_flag &= ~IACC; 579 } 580 if (mask & AT_MTIME) { 581 ip->i_mtime.tv_sec = vap->va_mtime.tv_sec; 582 ip->i_mtime.tv_nsec = vap->va_mtime.tv_nsec; 583 gethrestime(&now); 584 ip->i_ctime.tv_sec = now.tv_sec; 585 ip->i_ctime.tv_nsec = now.tv_nsec; 586 ip->i_flag &= ~(IUPD|ICHG); 587 ip->i_flag |= IMODTIME; 588 } 589 ip->i_flag |= IMOD; 590 mutex_exit(&ip->i_tlock); 591 } 592 593 update_inode: 594 if (curthread->t_flag & T_DONTPEND) { 595 ud_iupdat(ip, 1); 596 } else { 597 ITIMES_NOLOCK(ip); 598 } 599 rw_exit(&ip->i_contents); 600 rw_exit(&ip->i_rwlock); 601 602 return (error); 603 } 604 605 /* ARGSUSED */ 606 static int32_t 607 udf_access( 608 struct vnode *vp, 609 int32_t mode, 610 int32_t flags, 611 struct cred *cr, 612 caller_context_t *ct) 613 { 614 struct ud_inode *ip = VTOI(vp); 615 int32_t error; 616 617 ud_printf("udf_access\n"); 618 619 if (ip->i_udf == NULL) { 620 return (EIO); 621 } 622 623 error = ud_iaccess(ip, UD_UPERM2DPERM(mode), cr); 624 625 return (error); 626 } 627 628 int32_t udfs_stickyhack = 1; 629 630 /* ARGSUSED */ 631 static int32_t 632 udf_lookup( 633 struct vnode *dvp, 634 char *nm, 635 struct vnode **vpp, 636 struct pathname *pnp, 637 int32_t flags, 638 struct vnode *rdir, 639 struct cred *cr, 640 caller_context_t *ct, 641 int *direntflags, 642 pathname_t *realpnp) 643 { 644 int32_t error; 645 struct vnode *vp; 646 struct ud_inode *ip, *xip; 647 648 ud_printf("udf_lookup\n"); 649 /* 650 * Null component name is a synonym for directory being searched. 651 */ 652 if (*nm == '\0') { 653 VN_HOLD(dvp); 654 *vpp = dvp; 655 error = 0; 656 goto out; 657 } 658 659 /* 660 * Fast path: Check the directory name lookup cache. 661 */ 662 ip = VTOI(dvp); 663 if (vp = dnlc_lookup(dvp, nm)) { 664 /* 665 * Check accessibility of directory. 666 */ 667 if ((error = ud_iaccess(ip, IEXEC, cr)) != 0) { 668 VN_RELE(vp); 669 } 670 xip = VTOI(vp); 671 } else { 672 error = ud_dirlook(ip, nm, &xip, cr, 1); 673 ITIMES(ip); 674 } 675 676 if (error == 0) { 677 ip = xip; 678 *vpp = ITOV(ip); 679 if ((ip->i_type != VDIR) && 680 (ip->i_char & ISVTX) && 681 ((ip->i_perm & IEXEC) == 0) && 682 udfs_stickyhack) { 683 mutex_enter(&(*vpp)->v_lock); 684 (*vpp)->v_flag |= VISSWAP; 685 mutex_exit(&(*vpp)->v_lock); 686 } 687 ITIMES(ip); 688 /* 689 * If vnode is a device return special vnode instead. 690 */ 691 if (IS_DEVVP(*vpp)) { 692 struct vnode *newvp; 693 newvp = specvp(*vpp, (*vpp)->v_rdev, 694 (*vpp)->v_type, cr); 695 VN_RELE(*vpp); 696 if (newvp == NULL) { 697 error = ENOSYS; 698 } else { 699 *vpp = newvp; 700 } 701 } 702 } 703 out: 704 return (error); 705 } 706 707 /* ARGSUSED */ 708 static int32_t 709 udf_create( 710 struct vnode *dvp, 711 char *name, 712 struct vattr *vap, 713 enum vcexcl excl, 714 int32_t mode, 715 struct vnode **vpp, 716 struct cred *cr, 717 int32_t flag, 718 caller_context_t *ct, 719 vsecattr_t *vsecp) 720 { 721 int32_t error; 722 struct ud_inode *ip = VTOI(dvp), *xip; 723 724 ud_printf("udf_create\n"); 725 726 if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0) 727 vap->va_mode &= ~VSVTX; 728 729 if (*name == '\0') { 730 /* 731 * Null component name refers to the directory itself. 732 */ 733 VN_HOLD(dvp); 734 ITIMES(ip); 735 error = EEXIST; 736 } else { 737 xip = NULL; 738 rw_enter(&ip->i_rwlock, RW_WRITER); 739 error = ud_direnter(ip, name, DE_CREATE, 740 (struct ud_inode *)0, (struct ud_inode *)0, 741 vap, &xip, cr, ct); 742 rw_exit(&ip->i_rwlock); 743 ITIMES(ip); 744 ip = xip; 745 } 746 #ifdef __lock_lint 747 rw_enter(&ip->i_contents, RW_WRITER); 748 #else 749 if (ip != NULL) { 750 rw_enter(&ip->i_contents, RW_WRITER); 751 } 752 #endif 753 754 /* 755 * If the file already exists and this is a non-exclusive create, 756 * check permissions and allow access for non-directories. 757 * Read-only create of an existing directory is also allowed. 758 * We fail an exclusive create of anything which already exists. 759 */ 760 if (error == EEXIST) { 761 if (excl == NONEXCL) { 762 if ((ip->i_type == VDIR) && (mode & VWRITE)) { 763 error = EISDIR; 764 } else if (mode) { 765 error = ud_iaccess(ip, 766 UD_UPERM2DPERM(mode), cr); 767 } else { 768 error = 0; 769 } 770 } 771 if (error) { 772 rw_exit(&ip->i_contents); 773 VN_RELE(ITOV(ip)); 774 goto out; 775 } else if ((ip->i_type == VREG) && 776 (vap->va_mask & AT_SIZE) && vap->va_size == 0) { 777 /* 778 * Truncate regular files, if requested by caller. 779 * Grab i_rwlock to make sure no one else is 780 * currently writing to the file (we promised 781 * bmap we would do this). 782 * Must get the locks in the correct order. 783 */ 784 if (ip->i_size == 0) { 785 ip->i_flag |= ICHG | IUPD; 786 } else { 787 rw_exit(&ip->i_contents); 788 rw_enter(&ip->i_rwlock, RW_WRITER); 789 rw_enter(&ip->i_contents, RW_WRITER); 790 (void) ud_itrunc(ip, 0, 0, cr); 791 rw_exit(&ip->i_rwlock); 792 } 793 vnevent_create(ITOV(ip), ct); 794 } 795 } 796 797 if (error == 0) { 798 *vpp = ITOV(ip); 799 ITIMES(ip); 800 } 801 #ifdef __lock_lint 802 rw_exit(&ip->i_contents); 803 #else 804 if (ip != NULL) { 805 rw_exit(&ip->i_contents); 806 } 807 #endif 808 if (error) { 809 goto out; 810 } 811 812 /* 813 * If vnode is a device return special vnode instead. 814 */ 815 if (!error && IS_DEVVP(*vpp)) { 816 struct vnode *newvp; 817 818 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 819 VN_RELE(*vpp); 820 if (newvp == NULL) { 821 error = ENOSYS; 822 goto out; 823 } 824 *vpp = newvp; 825 } 826 out: 827 return (error); 828 } 829 830 /* ARGSUSED */ 831 static int32_t 832 udf_remove( 833 struct vnode *vp, 834 char *nm, 835 struct cred *cr, 836 caller_context_t *ct, 837 int flags) 838 { 839 int32_t error; 840 struct ud_inode *ip = VTOI(vp); 841 842 ud_printf("udf_remove\n"); 843 844 rw_enter(&ip->i_rwlock, RW_WRITER); 845 error = ud_dirremove(ip, nm, 846 (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr, ct); 847 rw_exit(&ip->i_rwlock); 848 ITIMES(ip); 849 850 return (error); 851 } 852 853 /* ARGSUSED */ 854 static int32_t 855 udf_link( 856 struct vnode *tdvp, 857 struct vnode *svp, 858 char *tnm, 859 struct cred *cr, 860 caller_context_t *ct, 861 int flags) 862 { 863 int32_t error; 864 struct vnode *realvp; 865 struct ud_inode *sip; 866 struct ud_inode *tdp; 867 868 ud_printf("udf_link\n"); 869 if (VOP_REALVP(svp, &realvp, ct) == 0) { 870 svp = realvp; 871 } 872 873 /* 874 * Do not allow links to directories 875 */ 876 if (svp->v_type == VDIR) { 877 return (EPERM); 878 } 879 880 sip = VTOI(svp); 881 882 if (sip->i_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0) 883 return (EPERM); 884 885 tdp = VTOI(tdvp); 886 887 rw_enter(&tdp->i_rwlock, RW_WRITER); 888 error = ud_direnter(tdp, tnm, DE_LINK, (struct ud_inode *)0, 889 sip, (struct vattr *)0, (struct ud_inode **)0, cr, ct); 890 rw_exit(&tdp->i_rwlock); 891 ITIMES(sip); 892 ITIMES(tdp); 893 894 if (error == 0) { 895 vnevent_link(svp, ct); 896 } 897 898 return (error); 899 } 900 901 /* ARGSUSED */ 902 static int32_t 903 udf_rename( 904 struct vnode *sdvp, 905 char *snm, 906 struct vnode *tdvp, 907 char *tnm, 908 struct cred *cr, 909 caller_context_t *ct, 910 int flags) 911 { 912 int32_t error = 0; 913 struct udf_vfs *udf_vfsp; 914 struct ud_inode *sip; /* source inode */ 915 struct ud_inode *sdp, *tdp; /* source and target parent inode */ 916 struct vnode *realvp; 917 918 ud_printf("udf_rename\n"); 919 920 if (VOP_REALVP(tdvp, &realvp, ct) == 0) { 921 tdvp = realvp; 922 } 923 924 sdp = VTOI(sdvp); 925 tdp = VTOI(tdvp); 926 927 udf_vfsp = sdp->i_udf; 928 929 mutex_enter(&udf_vfsp->udf_rename_lck); 930 /* 931 * Look up inode of file we're supposed to rename. 932 */ 933 if (error = ud_dirlook(sdp, snm, &sip, cr, 0)) { 934 mutex_exit(&udf_vfsp->udf_rename_lck); 935 return (error); 936 } 937 /* 938 * be sure this is not a directory with another file system mounted 939 * over it. If it is just give up the locks, and return with 940 * EBUSY 941 */ 942 if (vn_mountedvfs(ITOV(sip)) != NULL) { 943 error = EBUSY; 944 goto errout; 945 } 946 /* 947 * Make sure we can delete the source entry. This requires 948 * write permission on the containing directory. If that 949 * directory is "sticky" it further requires (except for 950 * privileged users) that the user own the directory or the 951 * source entry, or else have permission to write the source 952 * entry. 953 */ 954 rw_enter(&sdp->i_contents, RW_READER); 955 rw_enter(&sip->i_contents, RW_READER); 956 if ((error = ud_iaccess(sdp, IWRITE, cr)) != 0 || 957 (error = ud_sticky_remove_access(sdp, sip, cr)) != 0) { 958 rw_exit(&sip->i_contents); 959 rw_exit(&sdp->i_contents); 960 ITIMES(sip); 961 goto errout; 962 } 963 964 /* 965 * Check for renaming '.' or '..' or alias of '.' 966 */ 967 if ((strcmp(snm, ".") == 0) || 968 (strcmp(snm, "..") == 0) || 969 (sdp == sip)) { 970 error = EINVAL; 971 rw_exit(&sip->i_contents); 972 rw_exit(&sdp->i_contents); 973 goto errout; 974 } 975 rw_exit(&sip->i_contents); 976 rw_exit(&sdp->i_contents); 977 978 979 /* 980 * Link source to the target. 981 */ 982 rw_enter(&tdp->i_rwlock, RW_WRITER); 983 if (error = ud_direnter(tdp, tnm, DE_RENAME, sdp, sip, 984 (struct vattr *)0, (struct ud_inode **)0, cr, ct)) { 985 /* 986 * ESAME isn't really an error; it indicates that the 987 * operation should not be done because the source and target 988 * are the same file, but that no error should be reported. 989 */ 990 if (error == ESAME) { 991 error = 0; 992 } 993 rw_exit(&tdp->i_rwlock); 994 goto errout; 995 } 996 vnevent_rename_src(ITOV(sip), sdvp, snm, ct); 997 rw_exit(&tdp->i_rwlock); 998 999 rw_enter(&sdp->i_rwlock, RW_WRITER); 1000 /* 1001 * Unlink the source. 1002 * Remove the source entry. ud_dirremove() checks that the entry 1003 * still reflects sip, and returns an error if it doesn't. 1004 * If the entry has changed just forget about it. Release 1005 * the source inode. 1006 */ 1007 if ((error = ud_dirremove(sdp, snm, sip, (struct vnode *)0, 1008 DR_RENAME, cr, ct)) == ENOENT) { 1009 error = 0; 1010 } 1011 rw_exit(&sdp->i_rwlock); 1012 errout: 1013 ITIMES(sdp); 1014 ITIMES(tdp); 1015 VN_RELE(ITOV(sip)); 1016 mutex_exit(&udf_vfsp->udf_rename_lck); 1017 1018 return (error); 1019 } 1020 1021 /* ARGSUSED */ 1022 static int32_t 1023 udf_mkdir( 1024 struct vnode *dvp, 1025 char *dirname, 1026 struct vattr *vap, 1027 struct vnode **vpp, 1028 struct cred *cr, 1029 caller_context_t *ct, 1030 int flags, 1031 vsecattr_t *vsecp) 1032 { 1033 int32_t error; 1034 struct ud_inode *ip; 1035 struct ud_inode *xip; 1036 1037 ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); 1038 1039 ud_printf("udf_mkdir\n"); 1040 1041 ip = VTOI(dvp); 1042 rw_enter(&ip->i_rwlock, RW_WRITER); 1043 error = ud_direnter(ip, dirname, DE_MKDIR, 1044 (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr, ct); 1045 rw_exit(&ip->i_rwlock); 1046 ITIMES(ip); 1047 if (error == 0) { 1048 ip = xip; 1049 *vpp = ITOV(ip); 1050 ITIMES(ip); 1051 } else if (error == EEXIST) { 1052 ITIMES(xip); 1053 VN_RELE(ITOV(xip)); 1054 } 1055 1056 return (error); 1057 } 1058 1059 /* ARGSUSED */ 1060 static int32_t 1061 udf_rmdir( 1062 struct vnode *vp, 1063 char *nm, 1064 struct vnode *cdir, 1065 struct cred *cr, 1066 caller_context_t *ct, 1067 int flags) 1068 { 1069 int32_t error; 1070 struct ud_inode *ip = VTOI(vp); 1071 1072 ud_printf("udf_rmdir\n"); 1073 1074 rw_enter(&ip->i_rwlock, RW_WRITER); 1075 error = ud_dirremove(ip, nm, (struct ud_inode *)0, cdir, DR_RMDIR, 1076 cr, ct); 1077 rw_exit(&ip->i_rwlock); 1078 ITIMES(ip); 1079 1080 return (error); 1081 } 1082 1083 /* ARGSUSED */ 1084 static int32_t 1085 udf_readdir( 1086 struct vnode *vp, 1087 struct uio *uiop, 1088 struct cred *cr, 1089 int32_t *eofp, 1090 caller_context_t *ct, 1091 int flags) 1092 { 1093 struct ud_inode *ip; 1094 struct dirent64 *nd; 1095 struct udf_vfs *udf_vfsp; 1096 int32_t error = 0, len, outcount = 0; 1097 uint32_t dirsiz, offset; 1098 uint32_t bufsize, ndlen, dummy; 1099 caddr_t outbuf; 1100 caddr_t outb, end_outb; 1101 struct iovec *iovp; 1102 1103 uint8_t *dname; 1104 int32_t length; 1105 1106 uint8_t *buf = NULL; 1107 1108 struct fbuf *fbp = NULL; 1109 struct file_id *fid; 1110 uint8_t *name; 1111 1112 1113 ud_printf("udf_readdir\n"); 1114 1115 ip = VTOI(vp); 1116 udf_vfsp = ip->i_udf; 1117 1118 dirsiz = ip->i_size; 1119 if ((uiop->uio_offset >= dirsiz) || 1120 (ip->i_nlink <= 0)) { 1121 if (eofp) { 1122 *eofp = 1; 1123 } 1124 return (0); 1125 } 1126 1127 offset = uiop->uio_offset; 1128 iovp = uiop->uio_iov; 1129 bufsize = iovp->iov_len; 1130 1131 outb = outbuf = (char *)kmem_alloc((uint32_t)bufsize, KM_SLEEP); 1132 end_outb = outb + bufsize; 1133 nd = (struct dirent64 *)outbuf; 1134 1135 dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP); 1136 buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP); 1137 1138 if (offset == 0) { 1139 len = DIRENT64_RECLEN(1); 1140 if (((caddr_t)nd + len) >= end_outb) { 1141 error = EINVAL; 1142 goto end; 1143 } 1144 nd->d_ino = ip->i_icb_lbano; 1145 nd->d_reclen = (uint16_t)len; 1146 nd->d_off = 0x10; 1147 nd->d_name[0] = '.'; 1148 bzero(&nd->d_name[1], DIRENT64_NAMELEN(len) - 1); 1149 nd = (struct dirent64 *)((char *)nd + nd->d_reclen); 1150 outcount++; 1151 } else if (offset == 0x10) { 1152 offset = 0; 1153 } 1154 1155 while (offset < dirsiz) { 1156 error = ud_get_next_fid(ip, &fbp, 1157 offset, &fid, &name, buf); 1158 if (error != 0) { 1159 break; 1160 } 1161 1162 if ((fid->fid_flags & FID_DELETED) == 0) { 1163 if (fid->fid_flags & FID_PARENT) { 1164 1165 len = DIRENT64_RECLEN(2); 1166 if (((caddr_t)nd + len) >= end_outb) { 1167 error = EINVAL; 1168 break; 1169 } 1170 1171 nd->d_ino = ip->i_icb_lbano; 1172 nd->d_reclen = (uint16_t)len; 1173 nd->d_off = offset + FID_LEN(fid); 1174 nd->d_name[0] = '.'; 1175 nd->d_name[1] = '.'; 1176 bzero(&nd->d_name[2], 1177 DIRENT64_NAMELEN(len) - 2); 1178 nd = (struct dirent64 *) 1179 ((char *)nd + nd->d_reclen); 1180 } else { 1181 if ((error = ud_uncompress(fid->fid_idlen, 1182 &length, name, dname)) != 0) { 1183 break; 1184 } 1185 if (length == 0) { 1186 offset += FID_LEN(fid); 1187 continue; 1188 } 1189 len = DIRENT64_RECLEN(length); 1190 if (((caddr_t)nd + len) >= end_outb) { 1191 if (!outcount) { 1192 error = EINVAL; 1193 } 1194 break; 1195 } 1196 (void) strncpy(nd->d_name, 1197 (caddr_t)dname, length); 1198 bzero(&nd->d_name[length], 1199 DIRENT64_NAMELEN(len) - length); 1200 nd->d_ino = ud_xlate_to_daddr(udf_vfsp, 1201 SWAP_16(fid->fid_icb.lad_ext_prn), 1202 SWAP_32(fid->fid_icb.lad_ext_loc), 1, 1203 &dummy); 1204 nd->d_reclen = (uint16_t)len; 1205 nd->d_off = offset + FID_LEN(fid); 1206 nd = (struct dirent64 *) 1207 ((char *)nd + nd->d_reclen); 1208 } 1209 outcount++; 1210 } 1211 1212 offset += FID_LEN(fid); 1213 } 1214 1215 end: 1216 if (fbp != NULL) { 1217 fbrelse(fbp, S_OTHER); 1218 } 1219 ndlen = ((char *)nd - outbuf); 1220 /* 1221 * In case of error do not call uiomove. 1222 * Return the error to the caller. 1223 */ 1224 if ((error == 0) && (ndlen != 0)) { 1225 error = uiomove(outbuf, (long)ndlen, UIO_READ, uiop); 1226 uiop->uio_offset = offset; 1227 } 1228 kmem_free((caddr_t)buf, udf_vfsp->udf_lbsize); 1229 kmem_free((caddr_t)dname, 1024); 1230 kmem_free(outbuf, (uint32_t)bufsize); 1231 if (eofp && error == 0) { 1232 *eofp = (uiop->uio_offset >= dirsiz); 1233 } 1234 return (error); 1235 } 1236 1237 /* ARGSUSED */ 1238 static int32_t 1239 udf_symlink( 1240 struct vnode *dvp, 1241 char *linkname, 1242 struct vattr *vap, 1243 char *target, 1244 struct cred *cr, 1245 caller_context_t *ct, 1246 int flags) 1247 { 1248 int32_t error = 0, outlen; 1249 uint32_t ioflag = 0; 1250 struct ud_inode *ip, *dip = VTOI(dvp); 1251 1252 struct path_comp *pc; 1253 int8_t *dname = NULL, *uname = NULL, *sp; 1254 1255 ud_printf("udf_symlink\n"); 1256 1257 ip = (struct ud_inode *)0; 1258 vap->va_type = VLNK; 1259 vap->va_rdev = 0; 1260 1261 rw_enter(&dip->i_rwlock, RW_WRITER); 1262 error = ud_direnter(dip, linkname, DE_CREATE, 1263 (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr, ct); 1264 rw_exit(&dip->i_rwlock); 1265 if (error == 0) { 1266 dname = kmem_zalloc(1024, KM_SLEEP); 1267 uname = kmem_zalloc(PAGESIZE, KM_SLEEP); 1268 1269 pc = (struct path_comp *)uname; 1270 /* 1271 * If the first character in target is "/" 1272 * then skip it and create entry for it 1273 */ 1274 if (*target == '/') { 1275 pc->pc_type = 2; 1276 pc->pc_len = 0; 1277 pc = (struct path_comp *)(((char *)pc) + 4); 1278 while (*target == '/') { 1279 target++; 1280 } 1281 } 1282 1283 while (*target != NULL) { 1284 sp = target; 1285 while ((*target != '/') && (*target != '\0')) { 1286 target ++; 1287 } 1288 /* 1289 * We got the next component of the 1290 * path name. Create path_comp of 1291 * appropriate type 1292 */ 1293 if (((target - sp) == 1) && (*sp == '.')) { 1294 /* 1295 * Dot entry. 1296 */ 1297 pc->pc_type = 4; 1298 pc = (struct path_comp *)(((char *)pc) + 4); 1299 } else if (((target - sp) == 2) && 1300 (*sp == '.') && ((*(sp + 1)) == '.')) { 1301 /* 1302 * DotDot entry. 1303 */ 1304 pc->pc_type = 3; 1305 pc = (struct path_comp *)(((char *)pc) + 4); 1306 } else { 1307 /* 1308 * convert the user given name 1309 * into appropriate form to be put 1310 * on the media 1311 */ 1312 outlen = 1024; /* set to size of dname */ 1313 if (error = ud_compress(target - sp, &outlen, 1314 (uint8_t *)sp, (uint8_t *)dname)) { 1315 break; 1316 } 1317 pc->pc_type = 5; 1318 /* LINTED */ 1319 pc->pc_len = outlen; 1320 dname[outlen] = '\0'; 1321 (void) strcpy((char *)pc->pc_id, dname); 1322 pc = (struct path_comp *) 1323 (((char *)pc) + 4 + outlen); 1324 } 1325 while (*target == '/') { 1326 target++; 1327 } 1328 if (*target == NULL) { 1329 break; 1330 } 1331 } 1332 1333 rw_enter(&ip->i_contents, RW_WRITER); 1334 if (error == 0) { 1335 ioflag = FWRITE; 1336 if (curthread->t_flag & T_DONTPEND) { 1337 ioflag |= FDSYNC; 1338 } 1339 error = ud_rdwri(UIO_WRITE, ioflag, ip, 1340 uname, ((int8_t *)pc) - uname, 1341 (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr); 1342 } 1343 if (error) { 1344 ud_idrop(ip); 1345 rw_exit(&ip->i_contents); 1346 rw_enter(&dip->i_rwlock, RW_WRITER); 1347 (void) ud_dirremove(dip, linkname, (struct ud_inode *)0, 1348 (struct vnode *)0, DR_REMOVE, cr, ct); 1349 rw_exit(&dip->i_rwlock); 1350 goto update_inode; 1351 } 1352 rw_exit(&ip->i_contents); 1353 } 1354 1355 if ((error == 0) || (error == EEXIST)) { 1356 VN_RELE(ITOV(ip)); 1357 } 1358 1359 update_inode: 1360 ITIMES(VTOI(dvp)); 1361 if (uname != NULL) { 1362 kmem_free(uname, PAGESIZE); 1363 } 1364 if (dname != NULL) { 1365 kmem_free(dname, 1024); 1366 } 1367 1368 return (error); 1369 } 1370 1371 /* ARGSUSED */ 1372 static int32_t 1373 udf_readlink( 1374 struct vnode *vp, 1375 struct uio *uiop, 1376 struct cred *cr, 1377 caller_context_t *ct) 1378 { 1379 int32_t error = 0, off, id_len, size, len; 1380 int8_t *dname = NULL, *uname = NULL; 1381 struct ud_inode *ip; 1382 struct fbuf *fbp = NULL; 1383 struct path_comp *pc; 1384 1385 ud_printf("udf_readlink\n"); 1386 1387 if (vp->v_type != VLNK) { 1388 return (EINVAL); 1389 } 1390 1391 ip = VTOI(vp); 1392 size = ip->i_size; 1393 if (size > PAGESIZE) { 1394 return (EIO); 1395 } 1396 1397 if (size == 0) { 1398 return (0); 1399 } 1400 1401 dname = kmem_zalloc(1024, KM_SLEEP); 1402 uname = kmem_zalloc(PAGESIZE, KM_SLEEP); 1403 1404 rw_enter(&ip->i_contents, RW_READER); 1405 1406 if ((error = fbread(vp, 0, size, S_READ, &fbp)) != 0) { 1407 goto end; 1408 } 1409 1410 off = 0; 1411 1412 while (off < size) { 1413 pc = (struct path_comp *)(fbp->fb_addr + off); 1414 switch (pc->pc_type) { 1415 case 1 : 1416 (void) strcpy(uname, ip->i_udf->udf_fsmnt); 1417 (void) strcat(uname, "/"); 1418 break; 1419 case 2 : 1420 if (pc->pc_len != 0) { 1421 goto end; 1422 } 1423 uname[0] = '/'; 1424 uname[1] = '\0'; 1425 break; 1426 case 3 : 1427 (void) strcat(uname, "../"); 1428 break; 1429 case 4 : 1430 (void) strcat(uname, "./"); 1431 break; 1432 case 5 : 1433 if ((error = ud_uncompress(pc->pc_len, &id_len, 1434 pc->pc_id, (uint8_t *)dname)) != 0) { 1435 break; 1436 } 1437 dname[id_len] = '\0'; 1438 (void) strcat(uname, dname); 1439 (void) strcat(uname, "/"); 1440 break; 1441 default : 1442 error = EINVAL; 1443 goto end; 1444 } 1445 off += 4 + pc->pc_len; 1446 } 1447 len = strlen(uname) - 1; 1448 if (uname[len] == '/') { 1449 if (len == 0) { 1450 /* 1451 * special case link to / 1452 */ 1453 len = 1; 1454 } else { 1455 uname[len] = '\0'; 1456 } 1457 } 1458 1459 error = uiomove(uname, len, UIO_READ, uiop); 1460 1461 ITIMES(ip); 1462 1463 end: 1464 if (fbp != NULL) { 1465 fbrelse(fbp, S_OTHER); 1466 } 1467 rw_exit(&ip->i_contents); 1468 if (uname != NULL) { 1469 kmem_free(uname, PAGESIZE); 1470 } 1471 if (dname != NULL) { 1472 kmem_free(dname, 1024); 1473 } 1474 return (error); 1475 } 1476 1477 /* ARGSUSED */ 1478 static int32_t 1479 udf_fsync( 1480 struct vnode *vp, 1481 int32_t syncflag, 1482 struct cred *cr, 1483 caller_context_t *ct) 1484 { 1485 int32_t error = 0; 1486 struct ud_inode *ip = VTOI(vp); 1487 1488 ud_printf("udf_fsync\n"); 1489 1490 rw_enter(&ip->i_contents, RW_WRITER); 1491 if (!(IS_SWAPVP(vp))) { 1492 error = ud_syncip(ip, 0, I_SYNC); /* Do synchronous writes */ 1493 } 1494 if (error == 0) { 1495 error = ud_sync_indir(ip); 1496 } 1497 ITIMES(ip); /* XXX: is this necessary ??? */ 1498 rw_exit(&ip->i_contents); 1499 1500 return (error); 1501 } 1502 1503 /* ARGSUSED */ 1504 static void 1505 udf_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct) 1506 { 1507 ud_printf("udf_iinactive\n"); 1508 1509 ud_iinactive(VTOI(vp), cr); 1510 } 1511 1512 /* ARGSUSED */ 1513 static int32_t 1514 udf_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 1515 { 1516 struct udf_fid *udfidp; 1517 struct ud_inode *ip = VTOI(vp); 1518 1519 ud_printf("udf_fid\n"); 1520 1521 if (fidp->fid_len < (sizeof (struct udf_fid) - sizeof (uint16_t))) { 1522 fidp->fid_len = sizeof (struct udf_fid) - sizeof (uint16_t); 1523 return (ENOSPC); 1524 } 1525 1526 udfidp = (struct udf_fid *)fidp; 1527 bzero((char *)udfidp, sizeof (struct udf_fid)); 1528 rw_enter(&ip->i_contents, RW_READER); 1529 udfidp->udfid_len = sizeof (struct udf_fid) - sizeof (uint16_t); 1530 udfidp->udfid_uinq_lo = ip->i_uniqid & 0xffffffff; 1531 udfidp->udfid_prn = ip->i_icb_prn; 1532 udfidp->udfid_icb_lbn = ip->i_icb_block; 1533 rw_exit(&ip->i_contents); 1534 1535 return (0); 1536 } 1537 1538 /* ARGSUSED2 */ 1539 static int 1540 udf_rwlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp) 1541 { 1542 struct ud_inode *ip = VTOI(vp); 1543 1544 ud_printf("udf_rwlock\n"); 1545 1546 if (write_lock) { 1547 rw_enter(&ip->i_rwlock, RW_WRITER); 1548 } else { 1549 rw_enter(&ip->i_rwlock, RW_READER); 1550 } 1551 #ifdef __lock_lint 1552 rw_exit(&ip->i_rwlock); 1553 #endif 1554 return (write_lock); 1555 } 1556 1557 /* ARGSUSED */ 1558 static void 1559 udf_rwunlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp) 1560 { 1561 struct ud_inode *ip = VTOI(vp); 1562 1563 ud_printf("udf_rwunlock\n"); 1564 1565 #ifdef __lock_lint 1566 rw_enter(&ip->i_rwlock, RW_WRITER); 1567 #endif 1568 1569 rw_exit(&ip->i_rwlock); 1570 1571 } 1572 1573 /* ARGSUSED */ 1574 static int32_t 1575 udf_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct) 1576 { 1577 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 1578 } 1579 1580 static int32_t 1581 udf_frlock( 1582 struct vnode *vp, 1583 int32_t cmd, 1584 struct flock64 *bfp, 1585 int32_t flag, 1586 offset_t offset, 1587 struct flk_callback *flk_cbp, 1588 cred_t *cr, 1589 caller_context_t *ct) 1590 { 1591 struct ud_inode *ip = VTOI(vp); 1592 1593 ud_printf("udf_frlock\n"); 1594 1595 /* 1596 * If file is being mapped, disallow frlock. 1597 * XXX I am not holding tlock while checking i_mapcnt because the 1598 * current locking strategy drops all locks before calling fs_frlock. 1599 * So, mapcnt could change before we enter fs_frlock making is 1600 * meaningless to have held tlock in the first place. 1601 */ 1602 if ((ip->i_mapcnt > 0) && 1603 (MANDLOCK(vp, ip->i_char))) { 1604 return (EAGAIN); 1605 } 1606 1607 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 1608 } 1609 1610 /*ARGSUSED6*/ 1611 static int32_t 1612 udf_space( 1613 struct vnode *vp, 1614 int32_t cmd, 1615 struct flock64 *bfp, 1616 int32_t flag, 1617 offset_t offset, 1618 cred_t *cr, 1619 caller_context_t *ct) 1620 { 1621 int32_t error = 0; 1622 1623 ud_printf("udf_space\n"); 1624 1625 if (cmd != F_FREESP) { 1626 error = EINVAL; 1627 } else if ((error = convoff(vp, bfp, 0, offset)) == 0) { 1628 error = ud_freesp(vp, bfp, flag, cr); 1629 } 1630 1631 return (error); 1632 } 1633 1634 /* ARGSUSED */ 1635 static int32_t 1636 udf_getpage( 1637 struct vnode *vp, 1638 offset_t off, 1639 size_t len, 1640 uint32_t *protp, 1641 struct page **plarr, 1642 size_t plsz, 1643 struct seg *seg, 1644 caddr_t addr, 1645 enum seg_rw rw, 1646 struct cred *cr, 1647 caller_context_t *ct) 1648 { 1649 struct ud_inode *ip = VTOI(vp); 1650 int32_t error, has_holes, beyond_eof, seqmode, dolock; 1651 int32_t pgsize = PAGESIZE; 1652 struct udf_vfs *udf_vfsp = ip->i_udf; 1653 page_t **pl; 1654 u_offset_t pgoff, eoff, uoff; 1655 krw_t rwtype; 1656 caddr_t pgaddr; 1657 1658 ud_printf("udf_getpage\n"); 1659 1660 uoff = (u_offset_t)off; /* type conversion */ 1661 if (protp) { 1662 *protp = PROT_ALL; 1663 } 1664 if (vp->v_flag & VNOMAP) { 1665 return (ENOSYS); 1666 } 1667 seqmode = ip->i_nextr == uoff && rw != S_CREATE; 1668 1669 rwtype = RW_READER; 1670 dolock = (rw_owner(&ip->i_contents) != curthread); 1671 retrylock: 1672 #ifdef __lock_lint 1673 rw_enter(&ip->i_contents, rwtype); 1674 #else 1675 if (dolock) { 1676 rw_enter(&ip->i_contents, rwtype); 1677 } 1678 #endif 1679 1680 /* 1681 * We may be getting called as a side effect of a bmap using 1682 * fbread() when the blocks might be being allocated and the 1683 * size has not yet been up'ed. In this case we want to be 1684 * able to return zero pages if we get back UDF_HOLE from 1685 * calling bmap for a non write case here. We also might have 1686 * to read some frags from the disk into a page if we are 1687 * extending the number of frags for a given lbn in bmap(). 1688 */ 1689 beyond_eof = uoff + len > ip->i_size + PAGEOFFSET; 1690 if (beyond_eof && seg != segkmap) { 1691 #ifdef __lock_lint 1692 rw_exit(&ip->i_contents); 1693 #else 1694 if (dolock) { 1695 rw_exit(&ip->i_contents); 1696 } 1697 #endif 1698 return (EFAULT); 1699 } 1700 1701 /* 1702 * Must hold i_contents lock throughout the call to pvn_getpages 1703 * since locked pages are returned from each call to ud_getapage. 1704 * Must *not* return locked pages and then try for contents lock 1705 * due to lock ordering requirements (inode > page) 1706 */ 1707 1708 has_holes = ud_bmap_has_holes(ip); 1709 1710 if ((rw == S_WRITE || rw == S_CREATE) && (has_holes || beyond_eof)) { 1711 int32_t blk_size, count; 1712 u_offset_t offset; 1713 1714 /* 1715 * We must acquire the RW_WRITER lock in order to 1716 * call bmap_write(). 1717 */ 1718 if (dolock && rwtype == RW_READER) { 1719 rwtype = RW_WRITER; 1720 1721 if (!rw_tryupgrade(&ip->i_contents)) { 1722 1723 rw_exit(&ip->i_contents); 1724 1725 goto retrylock; 1726 } 1727 } 1728 1729 /* 1730 * May be allocating disk blocks for holes here as 1731 * a result of mmap faults. write(2) does the bmap_write 1732 * in rdip/wrip, not here. We are not dealing with frags 1733 * in this case. 1734 */ 1735 offset = uoff; 1736 while ((offset < uoff + len) && 1737 (offset < ip->i_size)) { 1738 /* 1739 * the variable "bnp" is to simplify the expression for 1740 * the compiler; * just passing in &bn to bmap_write 1741 * causes a compiler "loop" 1742 */ 1743 1744 blk_size = udf_vfsp->udf_lbsize; 1745 if ((offset + blk_size) > ip->i_size) { 1746 count = ip->i_size - offset; 1747 } else { 1748 count = blk_size; 1749 } 1750 error = ud_bmap_write(ip, offset, count, 0, cr); 1751 if (error) { 1752 goto update_inode; 1753 } 1754 offset += count; /* XXX - make this contig */ 1755 } 1756 } 1757 1758 /* 1759 * Can be a reader from now on. 1760 */ 1761 #ifdef __lock_lint 1762 if (rwtype == RW_WRITER) { 1763 rw_downgrade(&ip->i_contents); 1764 } 1765 #else 1766 if (dolock && rwtype == RW_WRITER) { 1767 rw_downgrade(&ip->i_contents); 1768 } 1769 #endif 1770 1771 /* 1772 * We remove PROT_WRITE in cases when the file has UDF holes 1773 * because we don't want to call bmap_read() to check each 1774 * page if it is backed with a disk block. 1775 */ 1776 if (protp && has_holes && rw != S_WRITE && rw != S_CREATE) { 1777 *protp &= ~PROT_WRITE; 1778 } 1779 1780 error = 0; 1781 1782 /* 1783 * The loop looks up pages in the range <off, off + len). 1784 * For each page, we first check if we should initiate an asynchronous 1785 * read ahead before we call page_lookup (we may sleep in page_lookup 1786 * for a previously initiated disk read). 1787 */ 1788 eoff = (uoff + len); 1789 for (pgoff = uoff, pgaddr = addr, pl = plarr; 1790 pgoff < eoff; /* empty */) { 1791 page_t *pp; 1792 u_offset_t nextrio; 1793 se_t se; 1794 1795 se = ((rw == S_CREATE) ? SE_EXCL : SE_SHARED); 1796 1797 /* 1798 * Handle async getpage (faultahead) 1799 */ 1800 if (plarr == NULL) { 1801 ip->i_nextrio = pgoff; 1802 ud_getpage_ra(vp, pgoff, seg, pgaddr); 1803 pgoff += pgsize; 1804 pgaddr += pgsize; 1805 continue; 1806 } 1807 1808 /* 1809 * Check if we should initiate read ahead of next cluster. 1810 * We call page_exists only when we need to confirm that 1811 * we have the current page before we initiate the read ahead. 1812 */ 1813 nextrio = ip->i_nextrio; 1814 if (seqmode && 1815 pgoff + RD_CLUSTSZ(ip) >= nextrio && pgoff <= nextrio && 1816 nextrio < ip->i_size && page_exists(vp, pgoff)) 1817 ud_getpage_ra(vp, pgoff, seg, pgaddr); 1818 1819 if ((pp = page_lookup(vp, pgoff, se)) != NULL) { 1820 1821 /* 1822 * We found the page in the page cache. 1823 */ 1824 *pl++ = pp; 1825 pgoff += pgsize; 1826 pgaddr += pgsize; 1827 len -= pgsize; 1828 plsz -= pgsize; 1829 } else { 1830 1831 /* 1832 * We have to create the page, or read it from disk. 1833 */ 1834 if (error = ud_getpage_miss(vp, pgoff, len, 1835 seg, pgaddr, pl, plsz, rw, seqmode)) { 1836 goto error_out; 1837 } 1838 1839 while (*pl != NULL) { 1840 pl++; 1841 pgoff += pgsize; 1842 pgaddr += pgsize; 1843 len -= pgsize; 1844 plsz -= pgsize; 1845 } 1846 } 1847 } 1848 1849 /* 1850 * Return pages up to plsz if they are in the page cache. 1851 * We cannot return pages if there is a chance that they are 1852 * backed with a UDF hole and rw is S_WRITE or S_CREATE. 1853 */ 1854 if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) { 1855 1856 ASSERT((protp == NULL) || 1857 !(has_holes && (*protp & PROT_WRITE))); 1858 1859 eoff = pgoff + plsz; 1860 while (pgoff < eoff) { 1861 page_t *pp; 1862 1863 if ((pp = page_lookup_nowait(vp, pgoff, 1864 SE_SHARED)) == NULL) 1865 break; 1866 1867 *pl++ = pp; 1868 pgoff += pgsize; 1869 plsz -= pgsize; 1870 } 1871 } 1872 1873 if (plarr) 1874 *pl = NULL; /* Terminate page list */ 1875 ip->i_nextr = pgoff; 1876 1877 error_out: 1878 if (error && plarr) { 1879 /* 1880 * Release any pages we have locked. 1881 */ 1882 while (pl > &plarr[0]) 1883 page_unlock(*--pl); 1884 1885 plarr[0] = NULL; 1886 } 1887 1888 update_inode: 1889 #ifdef __lock_lint 1890 rw_exit(&ip->i_contents); 1891 #else 1892 if (dolock) { 1893 rw_exit(&ip->i_contents); 1894 } 1895 #endif 1896 1897 /* 1898 * If the inode is not already marked for IACC (in rwip() for read) 1899 * and the inode is not marked for no access time update (in rwip() 1900 * for write) then update the inode access time and mod time now. 1901 */ 1902 mutex_enter(&ip->i_tlock); 1903 if ((ip->i_flag & (IACC | INOACC)) == 0) { 1904 if ((rw != S_OTHER) && (ip->i_type != VDIR)) { 1905 ip->i_flag |= IACC; 1906 } 1907 if (rw == S_WRITE) { 1908 ip->i_flag |= IUPD; 1909 } 1910 ITIMES_NOLOCK(ip); 1911 } 1912 mutex_exit(&ip->i_tlock); 1913 1914 return (error); 1915 } 1916 1917 int32_t ud_delay = 1; 1918 1919 /* ARGSUSED */ 1920 static int32_t 1921 udf_putpage( 1922 struct vnode *vp, 1923 offset_t off, 1924 size_t len, 1925 int32_t flags, 1926 struct cred *cr, 1927 caller_context_t *ct) 1928 { 1929 struct ud_inode *ip; 1930 int32_t error = 0; 1931 1932 ud_printf("udf_putpage\n"); 1933 1934 ip = VTOI(vp); 1935 #ifdef __lock_lint 1936 rw_enter(&ip->i_contents, RW_WRITER); 1937 #endif 1938 1939 if (vp->v_count == 0) { 1940 cmn_err(CE_WARN, "ud_putpage : bad v_count"); 1941 error = EINVAL; 1942 goto out; 1943 } 1944 1945 if (vp->v_flag & VNOMAP) { 1946 error = ENOSYS; 1947 goto out; 1948 } 1949 1950 if (flags & B_ASYNC) { 1951 if (ud_delay && len && 1952 (flags & ~(B_ASYNC|B_DONTNEED|B_FREE)) == 0) { 1953 mutex_enter(&ip->i_tlock); 1954 1955 /* 1956 * If nobody stalled, start a new cluster. 1957 */ 1958 if (ip->i_delaylen == 0) { 1959 ip->i_delayoff = off; 1960 ip->i_delaylen = len; 1961 mutex_exit(&ip->i_tlock); 1962 goto out; 1963 } 1964 1965 /* 1966 * If we have a full cluster or they are not contig, 1967 * then push last cluster and start over. 1968 */ 1969 if (ip->i_delaylen >= WR_CLUSTSZ(ip) || 1970 ip->i_delayoff + ip->i_delaylen != off) { 1971 u_offset_t doff; 1972 size_t dlen; 1973 1974 doff = ip->i_delayoff; 1975 dlen = ip->i_delaylen; 1976 ip->i_delayoff = off; 1977 ip->i_delaylen = len; 1978 mutex_exit(&ip->i_tlock); 1979 error = ud_putpages(vp, doff, dlen, flags, cr); 1980 /* LMXXX - flags are new val, not old */ 1981 goto out; 1982 } 1983 1984 /* 1985 * There is something there, it's not full, and 1986 * it is contig. 1987 */ 1988 ip->i_delaylen += len; 1989 mutex_exit(&ip->i_tlock); 1990 goto out; 1991 } 1992 1993 /* 1994 * Must have weird flags or we are not clustering. 1995 */ 1996 } 1997 1998 error = ud_putpages(vp, off, len, flags, cr); 1999 2000 out: 2001 #ifdef __lock_lint 2002 rw_exit(&ip->i_contents); 2003 #endif 2004 return (error); 2005 } 2006 2007 /* ARGSUSED */ 2008 static int32_t 2009 udf_map( 2010 struct vnode *vp, 2011 offset_t off, 2012 struct as *as, 2013 caddr_t *addrp, 2014 size_t len, 2015 uint8_t prot, 2016 uint8_t maxprot, 2017 uint32_t flags, 2018 struct cred *cr, 2019 caller_context_t *ct) 2020 { 2021 struct segvn_crargs vn_a; 2022 int32_t error = 0; 2023 2024 ud_printf("udf_map\n"); 2025 2026 if (vp->v_flag & VNOMAP) { 2027 error = ENOSYS; 2028 goto end; 2029 } 2030 2031 if ((off < (offset_t)0) || 2032 ((off + len) < (offset_t)0)) { 2033 error = EINVAL; 2034 goto end; 2035 } 2036 2037 if (vp->v_type != VREG) { 2038 error = ENODEV; 2039 goto end; 2040 } 2041 2042 /* 2043 * If file is being locked, disallow mapping. 2044 */ 2045 if (vn_has_mandatory_locks(vp, VTOI(vp)->i_char)) { 2046 error = EAGAIN; 2047 goto end; 2048 } 2049 2050 as_rangelock(as); 2051 error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 2052 if (error != 0) { 2053 as_rangeunlock(as); 2054 goto end; 2055 } 2056 2057 vn_a.vp = vp; 2058 vn_a.offset = off; 2059 vn_a.type = flags & MAP_TYPE; 2060 vn_a.prot = prot; 2061 vn_a.maxprot = maxprot; 2062 vn_a.cred = cr; 2063 vn_a.amp = NULL; 2064 vn_a.flags = flags & ~MAP_TYPE; 2065 vn_a.szc = 0; 2066 vn_a.lgrp_mem_policy_flags = 0; 2067 2068 error = as_map(as, *addrp, len, segvn_create, (caddr_t)&vn_a); 2069 as_rangeunlock(as); 2070 2071 end: 2072 return (error); 2073 } 2074 2075 /* ARGSUSED */ 2076 static int32_t 2077 udf_addmap(struct vnode *vp, 2078 offset_t off, 2079 struct as *as, 2080 caddr_t addr, 2081 size_t len, 2082 uint8_t prot, 2083 uint8_t maxprot, 2084 uint32_t flags, 2085 struct cred *cr, 2086 caller_context_t *ct) 2087 { 2088 struct ud_inode *ip = VTOI(vp); 2089 2090 ud_printf("udf_addmap\n"); 2091 2092 if (vp->v_flag & VNOMAP) { 2093 return (ENOSYS); 2094 } 2095 2096 mutex_enter(&ip->i_tlock); 2097 ip->i_mapcnt += btopr(len); 2098 mutex_exit(&ip->i_tlock); 2099 2100 return (0); 2101 } 2102 2103 /* ARGSUSED */ 2104 static int32_t 2105 udf_delmap( 2106 struct vnode *vp, offset_t off, 2107 struct as *as, 2108 caddr_t addr, 2109 size_t len, 2110 uint32_t prot, 2111 uint32_t maxprot, 2112 uint32_t flags, 2113 struct cred *cr, 2114 caller_context_t *ct) 2115 { 2116 struct ud_inode *ip = VTOI(vp); 2117 2118 ud_printf("udf_delmap\n"); 2119 2120 if (vp->v_flag & VNOMAP) { 2121 return (ENOSYS); 2122 } 2123 2124 mutex_enter(&ip->i_tlock); 2125 ip->i_mapcnt -= btopr(len); /* Count released mappings */ 2126 ASSERT(ip->i_mapcnt >= 0); 2127 mutex_exit(&ip->i_tlock); 2128 2129 return (0); 2130 } 2131 2132 /* ARGSUSED */ 2133 static int32_t 2134 udf_l_pathconf( 2135 struct vnode *vp, 2136 int32_t cmd, 2137 ulong_t *valp, 2138 struct cred *cr, 2139 caller_context_t *ct) 2140 { 2141 int32_t error = 0; 2142 2143 ud_printf("udf_l_pathconf\n"); 2144 2145 if (cmd == _PC_FILESIZEBITS) { 2146 /* 2147 * udf supports 64 bits as file size 2148 * but there are several other restrictions 2149 * it only supports 32-bit block numbers and 2150 * daddr32_t is only and int32_t so taking these 2151 * into account we can stay just as where ufs is 2152 */ 2153 *valp = 41; 2154 } else { 2155 error = fs_pathconf(vp, cmd, valp, cr, ct); 2156 } 2157 2158 return (error); 2159 } 2160 2161 uint32_t ud_pageio_reads = 0, ud_pageio_writes = 0; 2162 #ifndef __lint 2163 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_reads)) 2164 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_writes)) 2165 #endif 2166 /* 2167 * Assumption is that there will not be a pageio request 2168 * to a enbedded file 2169 */ 2170 /* ARGSUSED */ 2171 static int32_t 2172 udf_pageio( 2173 struct vnode *vp, 2174 struct page *pp, 2175 u_offset_t io_off, 2176 size_t io_len, 2177 int32_t flags, 2178 struct cred *cr, 2179 caller_context_t *ct) 2180 { 2181 daddr_t bn; 2182 struct buf *bp; 2183 struct ud_inode *ip = VTOI(vp); 2184 int32_t dolock, error = 0, contig, multi_io; 2185 size_t done_len = 0, cur_len = 0; 2186 page_t *npp = NULL, *opp = NULL, *cpp = pp; 2187 2188 if (pp == NULL) { 2189 return (EINVAL); 2190 } 2191 2192 dolock = (rw_owner(&ip->i_contents) != curthread); 2193 2194 /* 2195 * We need a better check. Ideally, we would use another 2196 * vnodeops so that hlocked and forcibly unmounted file 2197 * systems would return EIO where appropriate and w/o the 2198 * need for these checks. 2199 */ 2200 if (ip->i_udf == NULL) { 2201 return (EIO); 2202 } 2203 2204 #ifdef __lock_lint 2205 rw_enter(&ip->i_contents, RW_READER); 2206 #else 2207 if (dolock) { 2208 rw_enter(&ip->i_contents, RW_READER); 2209 } 2210 #endif 2211 2212 /* 2213 * Break the io request into chunks, one for each contiguous 2214 * stretch of disk blocks in the target file. 2215 */ 2216 while (done_len < io_len) { 2217 ASSERT(cpp); 2218 bp = NULL; 2219 contig = 0; 2220 if (error = ud_bmap_read(ip, (u_offset_t)(io_off + done_len), 2221 &bn, &contig)) { 2222 break; 2223 } 2224 2225 if (bn == UDF_HOLE) { /* No holey swapfiles */ 2226 cmn_err(CE_WARN, "SWAP file has HOLES"); 2227 error = EINVAL; 2228 break; 2229 } 2230 2231 cur_len = MIN(io_len - done_len, contig); 2232 2233 /* 2234 * Check if more than one I/O is 2235 * required to complete the given 2236 * I/O operation 2237 */ 2238 if (ip->i_udf->udf_lbsize < PAGESIZE) { 2239 if (cur_len >= PAGESIZE) { 2240 multi_io = 0; 2241 cur_len &= PAGEMASK; 2242 } else { 2243 multi_io = 1; 2244 cur_len = MIN(io_len - done_len, PAGESIZE); 2245 } 2246 } 2247 page_list_break(&cpp, &npp, btop(cur_len)); 2248 2249 bp = pageio_setup(cpp, cur_len, ip->i_devvp, flags); 2250 ASSERT(bp != NULL); 2251 2252 bp->b_edev = ip->i_dev; 2253 bp->b_dev = cmpdev(ip->i_dev); 2254 bp->b_blkno = bn; 2255 bp->b_un.b_addr = (caddr_t)0; 2256 bp->b_file = vp; 2257 bp->b_offset = (offset_t)(io_off + done_len); 2258 2259 /* 2260 * ub.ub_pageios.value.ul++; 2261 */ 2262 if (multi_io == 0) { 2263 (void) bdev_strategy(bp); 2264 } else { 2265 error = ud_multi_strat(ip, cpp, bp, 2266 (u_offset_t)(io_off + done_len)); 2267 if (error != 0) { 2268 pageio_done(bp); 2269 break; 2270 } 2271 } 2272 if (flags & B_READ) { 2273 ud_pageio_reads++; 2274 } else { 2275 ud_pageio_writes++; 2276 } 2277 2278 /* 2279 * If the request is not B_ASYNC, wait for i/o to complete 2280 * and re-assemble the page list to return to the caller. 2281 * If it is B_ASYNC we leave the page list in pieces and 2282 * cleanup() will dispose of them. 2283 */ 2284 if ((flags & B_ASYNC) == 0) { 2285 error = biowait(bp); 2286 pageio_done(bp); 2287 if (error) { 2288 break; 2289 } 2290 page_list_concat(&opp, &cpp); 2291 } 2292 cpp = npp; 2293 npp = NULL; 2294 done_len += cur_len; 2295 } 2296 2297 ASSERT(error || (cpp == NULL && npp == NULL && done_len == io_len)); 2298 if (error) { 2299 if (flags & B_ASYNC) { 2300 /* Cleanup unprocessed parts of list */ 2301 page_list_concat(&cpp, &npp); 2302 if (flags & B_READ) { 2303 pvn_read_done(cpp, B_ERROR); 2304 } else { 2305 pvn_write_done(cpp, B_ERROR); 2306 } 2307 } else { 2308 /* Re-assemble list and let caller clean up */ 2309 page_list_concat(&opp, &cpp); 2310 page_list_concat(&opp, &npp); 2311 } 2312 } 2313 2314 #ifdef __lock_lint 2315 rw_exit(&ip->i_contents); 2316 #else 2317 if (dolock) { 2318 rw_exit(&ip->i_contents); 2319 } 2320 #endif 2321 return (error); 2322 } 2323 2324 2325 2326 2327 /* -------------------- local functions --------------------------- */ 2328 2329 2330 2331 int32_t 2332 ud_rdwri(enum uio_rw rw, int32_t ioflag, 2333 struct ud_inode *ip, caddr_t base, int32_t len, 2334 offset_t offset, enum uio_seg seg, int32_t *aresid, struct cred *cr) 2335 { 2336 int32_t error; 2337 struct uio auio; 2338 struct iovec aiov; 2339 2340 ud_printf("ud_rdwri\n"); 2341 2342 bzero((caddr_t)&auio, sizeof (uio_t)); 2343 bzero((caddr_t)&aiov, sizeof (iovec_t)); 2344 2345 aiov.iov_base = base; 2346 aiov.iov_len = len; 2347 auio.uio_iov = &aiov; 2348 auio.uio_iovcnt = 1; 2349 auio.uio_loffset = offset; 2350 auio.uio_segflg = (int16_t)seg; 2351 auio.uio_resid = len; 2352 2353 if (rw == UIO_WRITE) { 2354 auio.uio_fmode = FWRITE; 2355 auio.uio_extflg = UIO_COPY_DEFAULT; 2356 auio.uio_llimit = curproc->p_fsz_ctl; 2357 error = ud_wrip(ip, &auio, ioflag, cr); 2358 } else { 2359 auio.uio_fmode = FREAD; 2360 auio.uio_extflg = UIO_COPY_CACHED; 2361 auio.uio_llimit = MAXOFFSET_T; 2362 error = ud_rdip(ip, &auio, ioflag, cr); 2363 } 2364 2365 if (aresid) { 2366 *aresid = auio.uio_resid; 2367 } else if (auio.uio_resid) { 2368 error = EIO; 2369 } 2370 return (error); 2371 } 2372 2373 /* 2374 * Free behind hacks. The pager is busted. 2375 * XXX - need to pass the information down to writedone() in a flag like B_SEQ 2376 * or B_FREE_IF_TIGHT_ON_MEMORY. 2377 */ 2378 int32_t ud_freebehind = 1; 2379 int32_t ud_smallfile = 32 * 1024; 2380 2381 /* ARGSUSED */ 2382 int32_t 2383 ud_getpage_miss(struct vnode *vp, u_offset_t off, 2384 size_t len, struct seg *seg, caddr_t addr, page_t *pl[], 2385 size_t plsz, enum seg_rw rw, int32_t seq) 2386 { 2387 struct ud_inode *ip = VTOI(vp); 2388 int32_t err = 0; 2389 size_t io_len; 2390 u_offset_t io_off; 2391 u_offset_t pgoff; 2392 page_t *pp; 2393 2394 pl[0] = NULL; 2395 2396 /* 2397 * Figure out whether the page can be created, or must be 2398 * read from the disk 2399 */ 2400 if (rw == S_CREATE) { 2401 if ((pp = page_create_va(vp, off, 2402 PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 2403 cmn_err(CE_WARN, "ud_getpage_miss: page_create"); 2404 return (EINVAL); 2405 } 2406 io_len = PAGESIZE; 2407 } else { 2408 pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 2409 &io_len, off, PAGESIZE, 0); 2410 2411 /* 2412 * Some other thread has entered the page. 2413 * ud_getpage will retry page_lookup. 2414 */ 2415 if (pp == NULL) { 2416 return (0); 2417 } 2418 2419 /* 2420 * Fill the page with as much data as we can from the file. 2421 */ 2422 err = ud_page_fill(ip, pp, off, B_READ, &pgoff); 2423 if (err) { 2424 pvn_read_done(pp, B_ERROR); 2425 return (err); 2426 } 2427 2428 /* 2429 * XXX ??? ufs has io_len instead of pgoff below 2430 */ 2431 ip->i_nextrio = off + ((pgoff + PAGESIZE - 1) & PAGEMASK); 2432 2433 /* 2434 * If the file access is sequential, initiate read ahead 2435 * of the next cluster. 2436 */ 2437 if (seq && ip->i_nextrio < ip->i_size) { 2438 ud_getpage_ra(vp, off, seg, addr); 2439 } 2440 } 2441 2442 outmiss: 2443 pvn_plist_init(pp, pl, plsz, (offset_t)off, io_len, rw); 2444 return (err); 2445 } 2446 2447 /* ARGSUSED */ 2448 void 2449 ud_getpage_ra(struct vnode *vp, 2450 u_offset_t off, struct seg *seg, caddr_t addr) 2451 { 2452 page_t *pp; 2453 size_t io_len; 2454 struct ud_inode *ip = VTOI(vp); 2455 u_offset_t io_off = ip->i_nextrio, pgoff; 2456 caddr_t addr2 = addr + (io_off - off); 2457 daddr_t bn; 2458 int32_t contig = 0; 2459 2460 /* 2461 * Is this test needed? 2462 */ 2463 2464 if (addr2 >= seg->s_base + seg->s_size) { 2465 return; 2466 } 2467 2468 contig = 0; 2469 if (ud_bmap_read(ip, io_off, &bn, &contig) != 0 || bn == UDF_HOLE) { 2470 return; 2471 } 2472 2473 pp = pvn_read_kluster(vp, io_off, seg, addr2, 2474 &io_off, &io_len, io_off, PAGESIZE, 1); 2475 2476 /* 2477 * Some other thread has entered the page. 2478 * So no read head done here (ie we will have to and wait 2479 * for the read when needed). 2480 */ 2481 2482 if (pp == NULL) { 2483 return; 2484 } 2485 2486 (void) ud_page_fill(ip, pp, io_off, (B_READ|B_ASYNC), &pgoff); 2487 ip->i_nextrio = io_off + ((pgoff + PAGESIZE - 1) & PAGEMASK); 2488 } 2489 2490 int 2491 ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off, 2492 uint32_t bflgs, u_offset_t *pg_off) 2493 { 2494 daddr_t bn; 2495 struct buf *bp; 2496 caddr_t kaddr, caddr; 2497 int32_t error = 0, contig = 0, multi_io = 0; 2498 int32_t lbsize = ip->i_udf->udf_lbsize; 2499 int32_t lbmask = ip->i_udf->udf_lbmask; 2500 uint64_t isize; 2501 2502 isize = (ip->i_size + lbmask) & (~lbmask); 2503 if (ip->i_desc_type == ICB_FLAG_ONE_AD) { 2504 2505 /* 2506 * Embedded file read file_entry 2507 * from buffer cache and copy the required 2508 * portions 2509 */ 2510 bp = ud_bread(ip->i_dev, 2511 ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize); 2512 if ((bp->b_error == 0) && 2513 (bp->b_resid == 0)) { 2514 2515 caddr = bp->b_un.b_addr + ip->i_data_off; 2516 2517 /* 2518 * mapin to kvm 2519 */ 2520 kaddr = (caddr_t)ppmapin(pp, 2521 PROT_READ | PROT_WRITE, (caddr_t)-1); 2522 (void) kcopy(caddr, kaddr, ip->i_size); 2523 2524 /* 2525 * mapout of kvm 2526 */ 2527 ppmapout(kaddr); 2528 } 2529 brelse(bp); 2530 contig = ip->i_size; 2531 } else { 2532 2533 /* 2534 * Get the continuous size and block number 2535 * at offset "off" 2536 */ 2537 if (error = ud_bmap_read(ip, off, &bn, &contig)) 2538 goto out; 2539 contig = MIN(contig, PAGESIZE); 2540 contig = (contig + lbmask) & (~lbmask); 2541 2542 /* 2543 * Zero part of the page which we are not 2544 * going to read from the disk. 2545 */ 2546 2547 if (bn == UDF_HOLE) { 2548 2549 /* 2550 * This is a HOLE. Just zero out 2551 * the page 2552 */ 2553 if (((off + contig) == isize) || 2554 (contig == PAGESIZE)) { 2555 pagezero(pp->p_prev, 0, PAGESIZE); 2556 goto out; 2557 } 2558 } 2559 2560 if (contig < PAGESIZE) { 2561 uint64_t count; 2562 2563 count = isize - off; 2564 if (contig != count) { 2565 multi_io = 1; 2566 contig = (int32_t)(MIN(count, PAGESIZE)); 2567 } else { 2568 pagezero(pp->p_prev, contig, PAGESIZE - contig); 2569 } 2570 } 2571 2572 /* 2573 * Get a bp and initialize it 2574 */ 2575 bp = pageio_setup(pp, contig, ip->i_devvp, bflgs); 2576 ASSERT(bp != NULL); 2577 2578 bp->b_edev = ip->i_dev; 2579 bp->b_dev = cmpdev(ip->i_dev); 2580 bp->b_blkno = bn; 2581 bp->b_un.b_addr = 0; 2582 bp->b_file = ip->i_vnode; 2583 2584 /* 2585 * Start I/O 2586 */ 2587 if (multi_io == 0) { 2588 2589 /* 2590 * Single I/O is sufficient for this page 2591 */ 2592 (void) bdev_strategy(bp); 2593 } else { 2594 2595 /* 2596 * We need to do the I/O in 2597 * piece's 2598 */ 2599 error = ud_multi_strat(ip, pp, bp, off); 2600 if (error != 0) { 2601 goto out; 2602 } 2603 } 2604 if ((bflgs & B_ASYNC) == 0) { 2605 2606 /* 2607 * Wait for i/o to complete. 2608 */ 2609 2610 error = biowait(bp); 2611 pageio_done(bp); 2612 if (error) { 2613 goto out; 2614 } 2615 } 2616 } 2617 if ((off + contig) >= ip->i_size) { 2618 contig = ip->i_size - off; 2619 } 2620 2621 out: 2622 *pg_off = contig; 2623 return (error); 2624 } 2625 2626 int32_t 2627 ud_putpages(struct vnode *vp, offset_t off, 2628 size_t len, int32_t flags, struct cred *cr) 2629 { 2630 struct ud_inode *ip; 2631 page_t *pp; 2632 u_offset_t io_off; 2633 size_t io_len; 2634 u_offset_t eoff; 2635 int32_t err = 0; 2636 int32_t dolock; 2637 2638 ud_printf("ud_putpages\n"); 2639 2640 if (vp->v_count == 0) { 2641 cmn_err(CE_WARN, "ud_putpages: bad v_count"); 2642 return (EINVAL); 2643 } 2644 2645 ip = VTOI(vp); 2646 2647 /* 2648 * Acquire the readers/write inode lock before locking 2649 * any pages in this inode. 2650 * The inode lock is held during i/o. 2651 */ 2652 if (len == 0) { 2653 mutex_enter(&ip->i_tlock); 2654 ip->i_delayoff = ip->i_delaylen = 0; 2655 mutex_exit(&ip->i_tlock); 2656 } 2657 #ifdef __lock_lint 2658 rw_enter(&ip->i_contents, RW_READER); 2659 #else 2660 dolock = (rw_owner(&ip->i_contents) != curthread); 2661 if (dolock) { 2662 rw_enter(&ip->i_contents, RW_READER); 2663 } 2664 #endif 2665 2666 if (!vn_has_cached_data(vp)) { 2667 #ifdef __lock_lint 2668 rw_exit(&ip->i_contents); 2669 #else 2670 if (dolock) { 2671 rw_exit(&ip->i_contents); 2672 } 2673 #endif 2674 return (0); 2675 } 2676 2677 if (len == 0) { 2678 /* 2679 * Search the entire vp list for pages >= off. 2680 */ 2681 err = pvn_vplist_dirty(vp, (u_offset_t)off, ud_putapage, 2682 flags, cr); 2683 } else { 2684 /* 2685 * Loop over all offsets in the range looking for 2686 * pages to deal with. 2687 */ 2688 if ((eoff = blkroundup(ip->i_udf, ip->i_size)) != 0) { 2689 eoff = MIN(off + len, eoff); 2690 } else { 2691 eoff = off + len; 2692 } 2693 2694 for (io_off = off; io_off < eoff; io_off += io_len) { 2695 /* 2696 * If we are not invalidating, synchronously 2697 * freeing or writing pages, use the routine 2698 * page_lookup_nowait() to prevent reclaiming 2699 * them from the free list. 2700 */ 2701 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 2702 pp = page_lookup(vp, io_off, 2703 (flags & (B_INVAL | B_FREE)) ? 2704 SE_EXCL : SE_SHARED); 2705 } else { 2706 pp = page_lookup_nowait(vp, io_off, 2707 (flags & B_FREE) ? SE_EXCL : SE_SHARED); 2708 } 2709 2710 if (pp == NULL || pvn_getdirty(pp, flags) == 0) { 2711 io_len = PAGESIZE; 2712 } else { 2713 2714 err = ud_putapage(vp, pp, 2715 &io_off, &io_len, flags, cr); 2716 if (err != 0) { 2717 break; 2718 } 2719 /* 2720 * "io_off" and "io_len" are returned as 2721 * the range of pages we actually wrote. 2722 * This allows us to skip ahead more quickly 2723 * since several pages may've been dealt 2724 * with by this iteration of the loop. 2725 */ 2726 } 2727 } 2728 } 2729 if (err == 0 && off == 0 && (len == 0 || len >= ip->i_size)) { 2730 /* 2731 * We have just sync'ed back all the pages on 2732 * the inode, turn off the IMODTIME flag. 2733 */ 2734 mutex_enter(&ip->i_tlock); 2735 ip->i_flag &= ~IMODTIME; 2736 mutex_exit(&ip->i_tlock); 2737 } 2738 #ifdef __lock_lint 2739 rw_exit(&ip->i_contents); 2740 #else 2741 if (dolock) { 2742 rw_exit(&ip->i_contents); 2743 } 2744 #endif 2745 return (err); 2746 } 2747 2748 /* ARGSUSED */ 2749 int32_t 2750 ud_putapage(struct vnode *vp, 2751 page_t *pp, u_offset_t *offp, 2752 size_t *lenp, int32_t flags, struct cred *cr) 2753 { 2754 daddr_t bn; 2755 size_t io_len; 2756 struct ud_inode *ip; 2757 int32_t error = 0, contig, multi_io = 0; 2758 struct udf_vfs *udf_vfsp; 2759 u_offset_t off, io_off; 2760 caddr_t kaddr, caddr; 2761 struct buf *bp = NULL; 2762 int32_t lbmask; 2763 uint64_t isize; 2764 int32_t crc_len; 2765 struct file_entry *fe; 2766 2767 ud_printf("ud_putapage\n"); 2768 2769 ip = VTOI(vp); 2770 ASSERT(ip); 2771 ASSERT(RW_LOCK_HELD(&ip->i_contents)); 2772 lbmask = ip->i_udf->udf_lbmask; 2773 isize = (ip->i_size + lbmask) & (~lbmask); 2774 2775 udf_vfsp = ip->i_udf; 2776 ASSERT(udf_vfsp->udf_flags & UDF_FL_RW); 2777 2778 /* 2779 * If the modified time on the inode has not already been 2780 * set elsewhere (e.g. for write/setattr) we set the time now. 2781 * This gives us approximate modified times for mmap'ed files 2782 * which are modified via stores in the user address space. 2783 */ 2784 if (((ip->i_flag & IMODTIME) == 0) || (flags & B_FORCE)) { 2785 mutex_enter(&ip->i_tlock); 2786 ip->i_flag |= IUPD; 2787 ITIMES_NOLOCK(ip); 2788 mutex_exit(&ip->i_tlock); 2789 } 2790 2791 2792 /* 2793 * Align the request to a block boundry (for old file systems), 2794 * and go ask bmap() how contiguous things are for this file. 2795 */ 2796 off = pp->p_offset & ~(offset_t)lbmask; 2797 /* block align it */ 2798 2799 2800 if (ip->i_desc_type == ICB_FLAG_ONE_AD) { 2801 ASSERT(ip->i_size <= ip->i_max_emb); 2802 2803 pp = pvn_write_kluster(vp, pp, &io_off, 2804 &io_len, off, PAGESIZE, flags); 2805 if (io_len == 0) { 2806 io_len = PAGESIZE; 2807 } 2808 2809 bp = ud_bread(ip->i_dev, 2810 ip->i_icb_lbano << udf_vfsp->udf_l2d_shift, 2811 udf_vfsp->udf_lbsize); 2812 fe = (struct file_entry *)bp->b_un.b_addr; 2813 if ((bp->b_flags & B_ERROR) || 2814 (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY, 2815 ip->i_icb_block, 2816 1, udf_vfsp->udf_lbsize) != 0)) { 2817 if (pp != NULL) 2818 pvn_write_done(pp, B_ERROR | B_WRITE | flags); 2819 if (bp->b_flags & B_ERROR) { 2820 error = EIO; 2821 } else { 2822 error = EINVAL; 2823 } 2824 brelse(bp); 2825 return (error); 2826 } 2827 if ((bp->b_error == 0) && 2828 (bp->b_resid == 0)) { 2829 2830 caddr = bp->b_un.b_addr + ip->i_data_off; 2831 kaddr = (caddr_t)ppmapin(pp, 2832 PROT_READ | PROT_WRITE, (caddr_t)-1); 2833 (void) kcopy(kaddr, caddr, ip->i_size); 2834 ppmapout(kaddr); 2835 } 2836 crc_len = ((uint32_t)&((struct file_entry *)0)->fe_spec) + 2837 SWAP_32(fe->fe_len_ear); 2838 crc_len += ip->i_size; 2839 ud_make_tag(ip->i_udf, &fe->fe_tag, 2840 UD_FILE_ENTRY, ip->i_icb_block, crc_len); 2841 2842 bwrite(bp); 2843 2844 if (flags & B_ASYNC) { 2845 pvn_write_done(pp, flags); 2846 } 2847 contig = ip->i_size; 2848 } else { 2849 2850 if (error = ud_bmap_read(ip, off, &bn, &contig)) { 2851 goto out; 2852 } 2853 contig = MIN(contig, PAGESIZE); 2854 contig = (contig + lbmask) & (~lbmask); 2855 2856 if (contig < PAGESIZE) { 2857 uint64_t count; 2858 2859 count = isize - off; 2860 if (contig != count) { 2861 multi_io = 1; 2862 contig = (int32_t)(MIN(count, PAGESIZE)); 2863 } 2864 } 2865 2866 if ((off + contig) > isize) { 2867 contig = isize - off; 2868 } 2869 2870 if (contig > PAGESIZE) { 2871 if (contig & PAGEOFFSET) { 2872 contig &= PAGEMASK; 2873 } 2874 } 2875 2876 pp = pvn_write_kluster(vp, pp, &io_off, 2877 &io_len, off, contig, flags); 2878 if (io_len == 0) { 2879 io_len = PAGESIZE; 2880 } 2881 2882 bp = pageio_setup(pp, contig, ip->i_devvp, B_WRITE | flags); 2883 ASSERT(bp != NULL); 2884 2885 bp->b_edev = ip->i_dev; 2886 bp->b_dev = cmpdev(ip->i_dev); 2887 bp->b_blkno = bn; 2888 bp->b_un.b_addr = 0; 2889 bp->b_file = vp; 2890 bp->b_offset = (offset_t)off; 2891 2892 2893 /* 2894 * write throttle 2895 */ 2896 ASSERT(bp->b_iodone == NULL); 2897 bp->b_iodone = ud_iodone; 2898 mutex_enter(&ip->i_tlock); 2899 ip->i_writes += bp->b_bcount; 2900 mutex_exit(&ip->i_tlock); 2901 2902 if (multi_io == 0) { 2903 2904 (void) bdev_strategy(bp); 2905 } else { 2906 error = ud_multi_strat(ip, pp, bp, off); 2907 if (error != 0) { 2908 goto out; 2909 } 2910 } 2911 2912 if ((flags & B_ASYNC) == 0) { 2913 /* 2914 * Wait for i/o to complete. 2915 */ 2916 error = biowait(bp); 2917 pageio_done(bp); 2918 } 2919 } 2920 2921 if ((flags & B_ASYNC) == 0) { 2922 pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags); 2923 } 2924 2925 pp = NULL; 2926 2927 out: 2928 if (error != 0 && pp != NULL) { 2929 pvn_write_done(pp, B_ERROR | B_WRITE | flags); 2930 } 2931 2932 if (offp) { 2933 *offp = io_off; 2934 } 2935 if (lenp) { 2936 *lenp = io_len; 2937 } 2938 2939 return (error); 2940 } 2941 2942 2943 int32_t 2944 ud_iodone(struct buf *bp) 2945 { 2946 struct ud_inode *ip; 2947 2948 ASSERT((bp->b_pages->p_vnode != NULL) && !(bp->b_flags & B_READ)); 2949 2950 bp->b_iodone = NULL; 2951 2952 ip = VTOI(bp->b_pages->p_vnode); 2953 2954 mutex_enter(&ip->i_tlock); 2955 if (ip->i_writes >= ud_LW) { 2956 if ((ip->i_writes -= bp->b_bcount) <= ud_LW) { 2957 if (ud_WRITES) { 2958 cv_broadcast(&ip->i_wrcv); /* wake all up */ 2959 } 2960 } 2961 } else { 2962 ip->i_writes -= bp->b_bcount; 2963 } 2964 mutex_exit(&ip->i_tlock); 2965 iodone(bp); 2966 return (0); 2967 } 2968 2969 /* ARGSUSED3 */ 2970 int32_t 2971 ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr) 2972 { 2973 struct vnode *vp; 2974 struct udf_vfs *udf_vfsp; 2975 krw_t rwtype; 2976 caddr_t base; 2977 uint32_t flags; 2978 int32_t error, n, on, mapon, dofree; 2979 u_offset_t off; 2980 long oresid = uio->uio_resid; 2981 2982 ASSERT(RW_LOCK_HELD(&ip->i_contents)); 2983 if ((ip->i_type != VREG) && 2984 (ip->i_type != VDIR) && 2985 (ip->i_type != VLNK)) { 2986 return (EIO); 2987 } 2988 2989 if (uio->uio_loffset > MAXOFFSET_T) { 2990 return (0); 2991 } 2992 2993 if ((uio->uio_loffset < (offset_t)0) || 2994 ((uio->uio_loffset + uio->uio_resid) < 0)) { 2995 return (EINVAL); 2996 } 2997 if (uio->uio_resid == 0) { 2998 return (0); 2999 } 3000 3001 vp = ITOV(ip); 3002 udf_vfsp = ip->i_udf; 3003 mutex_enter(&ip->i_tlock); 3004 ip->i_flag |= IACC; 3005 mutex_exit(&ip->i_tlock); 3006 3007 rwtype = (rw_write_held(&ip->i_contents)?RW_WRITER:RW_READER); 3008 3009 do { 3010 offset_t diff; 3011 u_offset_t uoff = uio->uio_loffset; 3012 off = uoff & (offset_t)MAXBMASK; 3013 mapon = (int)(uoff & (offset_t)MAXBOFFSET); 3014 on = (int)blkoff(udf_vfsp, uoff); 3015 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid); 3016 3017 diff = ip->i_size - uoff; 3018 3019 if (diff <= (offset_t)0) { 3020 error = 0; 3021 goto out; 3022 } 3023 if (diff < (offset_t)n) { 3024 n = (int)diff; 3025 } 3026 dofree = ud_freebehind && 3027 ip->i_nextr == (off & PAGEMASK) && 3028 off > ud_smallfile; 3029 3030 #ifndef __lock_lint 3031 if (rwtype == RW_READER) { 3032 rw_exit(&ip->i_contents); 3033 } 3034 #endif 3035 3036 base = segmap_getmapflt(segkmap, vp, (off + mapon), 3037 (uint32_t)n, 1, S_READ); 3038 error = uiomove(base + mapon, (long)n, UIO_READ, uio); 3039 3040 flags = 0; 3041 if (!error) { 3042 /* 3043 * If read a whole block, or read to eof, 3044 * won't need this buffer again soon. 3045 */ 3046 if (n + on == MAXBSIZE && ud_freebehind && dofree && 3047 freemem < lotsfree + pages_before_pager) { 3048 flags = SM_FREE | SM_DONTNEED |SM_ASYNC; 3049 } 3050 /* 3051 * In POSIX SYNC (FSYNC and FDSYNC) read mode, 3052 * we want to make sure that the page which has 3053 * been read, is written on disk if it is dirty. 3054 * And corresponding indirect blocks should also 3055 * be flushed out. 3056 */ 3057 if ((ioflag & FRSYNC) && (ioflag & (FSYNC|FDSYNC))) { 3058 flags &= ~SM_ASYNC; 3059 flags |= SM_WRITE; 3060 } 3061 error = segmap_release(segkmap, base, flags); 3062 } else { 3063 (void) segmap_release(segkmap, base, flags); 3064 } 3065 3066 #ifndef __lock_lint 3067 if (rwtype == RW_READER) { 3068 rw_enter(&ip->i_contents, rwtype); 3069 } 3070 #endif 3071 } while (error == 0 && uio->uio_resid > 0 && n != 0); 3072 out: 3073 /* 3074 * Inode is updated according to this table if FRSYNC is set. 3075 * 3076 * FSYNC FDSYNC(posix.4) 3077 * -------------------------- 3078 * always IATTCHG|IBDWRITE 3079 */ 3080 if (ioflag & FRSYNC) { 3081 if ((ioflag & FSYNC) || 3082 ((ioflag & FDSYNC) && (ip->i_flag & (IATTCHG|IBDWRITE)))) { 3083 rw_exit(&ip->i_contents); 3084 rw_enter(&ip->i_contents, RW_WRITER); 3085 ud_iupdat(ip, 1); 3086 } 3087 } 3088 /* 3089 * If we've already done a partial read, terminate 3090 * the read but return no error. 3091 */ 3092 if (oresid != uio->uio_resid) { 3093 error = 0; 3094 } 3095 ITIMES(ip); 3096 3097 return (error); 3098 } 3099 3100 int32_t 3101 ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr) 3102 { 3103 caddr_t base; 3104 struct vnode *vp; 3105 struct udf_vfs *udf_vfsp; 3106 uint32_t flags; 3107 int32_t error = 0, iupdat_flag, n, on, mapon, i_size_changed = 0; 3108 int32_t pagecreate, newpage; 3109 uint64_t old_i_size; 3110 u_offset_t off; 3111 long start_resid = uio->uio_resid, premove_resid; 3112 rlim64_t limit = uio->uio_limit; 3113 3114 3115 ASSERT(RW_WRITE_HELD(&ip->i_contents)); 3116 if ((ip->i_type != VREG) && 3117 (ip->i_type != VDIR) && 3118 (ip->i_type != VLNK)) { 3119 return (EIO); 3120 } 3121 3122 if (uio->uio_loffset >= MAXOFFSET_T) { 3123 return (EFBIG); 3124 } 3125 /* 3126 * see udf_l_pathconf 3127 */ 3128 if (limit > (((uint64_t)1 << 40) - 1)) { 3129 limit = ((uint64_t)1 << 40) - 1; 3130 } 3131 if (uio->uio_loffset >= limit) { 3132 proc_t *p = ttoproc(curthread); 3133 3134 mutex_enter(&p->p_lock); 3135 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, 3136 p, RCA_UNSAFE_SIGINFO); 3137 mutex_exit(&p->p_lock); 3138 return (EFBIG); 3139 } 3140 if ((uio->uio_loffset < (offset_t)0) || 3141 ((uio->uio_loffset + uio->uio_resid) < 0)) { 3142 return (EINVAL); 3143 } 3144 if (uio->uio_resid == 0) { 3145 return (0); 3146 } 3147 3148 mutex_enter(&ip->i_tlock); 3149 ip->i_flag |= INOACC; 3150 3151 if (ioflag & (FSYNC | FDSYNC)) { 3152 ip->i_flag |= ISYNC; 3153 iupdat_flag = 1; 3154 } 3155 mutex_exit(&ip->i_tlock); 3156 3157 udf_vfsp = ip->i_udf; 3158 vp = ITOV(ip); 3159 3160 do { 3161 u_offset_t uoff = uio->uio_loffset; 3162 off = uoff & (offset_t)MAXBMASK; 3163 mapon = (int)(uoff & (offset_t)MAXBOFFSET); 3164 on = (int)blkoff(udf_vfsp, uoff); 3165 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid); 3166 3167 if (ip->i_type == VREG && uoff + n >= limit) { 3168 if (uoff >= limit) { 3169 error = EFBIG; 3170 goto out; 3171 } 3172 n = (int)(limit - (rlim64_t)uoff); 3173 } 3174 if (uoff + n > ip->i_size) { 3175 /* 3176 * We are extending the length of the file. 3177 * bmap is used so that we are sure that 3178 * if we need to allocate new blocks, that it 3179 * is done here before we up the file size. 3180 */ 3181 error = ud_bmap_write(ip, uoff, 3182 (int)(on + n), mapon == 0, cr); 3183 if (error) { 3184 break; 3185 } 3186 i_size_changed = 1; 3187 old_i_size = ip->i_size; 3188 ip->i_size = uoff + n; 3189 /* 3190 * If we are writing from the beginning of 3191 * the mapping, we can just create the 3192 * pages without having to read them. 3193 */ 3194 pagecreate = (mapon == 0); 3195 } else if (n == MAXBSIZE) { 3196 /* 3197 * Going to do a whole mappings worth, 3198 * so we can just create the pages w/o 3199 * having to read them in. But before 3200 * we do that, we need to make sure any 3201 * needed blocks are allocated first. 3202 */ 3203 error = ud_bmap_write(ip, uoff, 3204 (int)(on + n), 1, cr); 3205 if (error) { 3206 break; 3207 } 3208 pagecreate = 1; 3209 } else { 3210 pagecreate = 0; 3211 } 3212 3213 rw_exit(&ip->i_contents); 3214 3215 base = segmap_getmapflt(segkmap, vp, (off + mapon), 3216 (uint32_t)n, !pagecreate, S_WRITE); 3217 3218 /* 3219 * segmap_pagecreate() returns 1 if it calls 3220 * page_create_va() to allocate any pages. 3221 */ 3222 newpage = 0; 3223 if (pagecreate) { 3224 newpage = segmap_pagecreate(segkmap, base, 3225 (size_t)n, 0); 3226 } 3227 3228 premove_resid = uio->uio_resid; 3229 error = uiomove(base + mapon, (long)n, UIO_WRITE, uio); 3230 3231 if (pagecreate && 3232 uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) { 3233 /* 3234 * We created pages w/o initializing them completely, 3235 * thus we need to zero the part that wasn't set up. 3236 * This happens on most EOF write cases and if 3237 * we had some sort of error during the uiomove. 3238 */ 3239 int nzero, nmoved; 3240 3241 nmoved = (int)(uio->uio_loffset - (off + mapon)); 3242 ASSERT(nmoved >= 0 && nmoved <= n); 3243 nzero = roundup(on + n, PAGESIZE) - nmoved; 3244 ASSERT(nzero > 0 && mapon + nmoved + nzero <= MAXBSIZE); 3245 (void) kzero(base + mapon + nmoved, (uint32_t)nzero); 3246 } 3247 3248 /* 3249 * Unlock the pages allocated by page_create_va() 3250 * in segmap_pagecreate() 3251 */ 3252 if (newpage) { 3253 segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE); 3254 } 3255 3256 if (error) { 3257 /* 3258 * If we failed on a write, we may have already 3259 * allocated file blocks as well as pages. It's 3260 * hard to undo the block allocation, but we must 3261 * be sure to invalidate any pages that may have 3262 * been allocated. 3263 */ 3264 (void) segmap_release(segkmap, base, SM_INVAL); 3265 } else { 3266 flags = 0; 3267 /* 3268 * Force write back for synchronous write cases. 3269 */ 3270 if ((ioflag & (FSYNC|FDSYNC)) || ip->i_type == VDIR) { 3271 /* 3272 * If the sticky bit is set but the 3273 * execute bit is not set, we do a 3274 * synchronous write back and free 3275 * the page when done. We set up swap 3276 * files to be handled this way to 3277 * prevent servers from keeping around 3278 * the client's swap pages too long. 3279 * XXX - there ought to be a better way. 3280 */ 3281 if (IS_SWAPVP(vp)) { 3282 flags = SM_WRITE | SM_FREE | 3283 SM_DONTNEED; 3284 iupdat_flag = 0; 3285 } else { 3286 flags = SM_WRITE; 3287 } 3288 } else if (((mapon + n) == MAXBSIZE) || 3289 IS_SWAPVP(vp)) { 3290 /* 3291 * Have written a whole block. 3292 * Start an asynchronous write and 3293 * mark the buffer to indicate that 3294 * it won't be needed again soon. 3295 */ 3296 flags = SM_WRITE |SM_ASYNC | SM_DONTNEED; 3297 } 3298 error = segmap_release(segkmap, base, flags); 3299 3300 /* 3301 * If the operation failed and is synchronous, 3302 * then we need to unwind what uiomove() last 3303 * did so we can potentially return an error to 3304 * the caller. If this write operation was 3305 * done in two pieces and the first succeeded, 3306 * then we won't return an error for the second 3307 * piece that failed. However, we only want to 3308 * return a resid value that reflects what was 3309 * really done. 3310 * 3311 * Failures for non-synchronous operations can 3312 * be ignored since the page subsystem will 3313 * retry the operation until it succeeds or the 3314 * file system is unmounted. 3315 */ 3316 if (error) { 3317 if ((ioflag & (FSYNC | FDSYNC)) || 3318 ip->i_type == VDIR) { 3319 uio->uio_resid = premove_resid; 3320 } else { 3321 error = 0; 3322 } 3323 } 3324 } 3325 3326 /* 3327 * Re-acquire contents lock. 3328 */ 3329 rw_enter(&ip->i_contents, RW_WRITER); 3330 /* 3331 * If the uiomove() failed or if a synchronous 3332 * page push failed, fix up i_size. 3333 */ 3334 if (error) { 3335 if (i_size_changed) { 3336 /* 3337 * The uiomove failed, and we 3338 * allocated blocks,so get rid 3339 * of them. 3340 */ 3341 (void) ud_itrunc(ip, old_i_size, 0, cr); 3342 } 3343 } else { 3344 /* 3345 * XXX - Can this be out of the loop? 3346 */ 3347 ip->i_flag |= IUPD | ICHG; 3348 if (i_size_changed) { 3349 ip->i_flag |= IATTCHG; 3350 } 3351 if ((ip->i_perm & (IEXEC | (IEXEC >> 5) | 3352 (IEXEC >> 10))) != 0 && 3353 (ip->i_char & (ISUID | ISGID)) != 0 && 3354 secpolicy_vnode_setid_retain(cr, 3355 (ip->i_char & ISUID) != 0 && ip->i_uid == 0) != 0) { 3356 /* 3357 * Clear Set-UID & Set-GID bits on 3358 * successful write if not privileged 3359 * and at least one of the execute bits 3360 * is set. If we always clear Set-GID, 3361 * mandatory file and record locking is 3362 * unuseable. 3363 */ 3364 ip->i_char &= ~(ISUID | ISGID); 3365 } 3366 } 3367 } while (error == 0 && uio->uio_resid > 0 && n != 0); 3368 3369 out: 3370 /* 3371 * Inode is updated according to this table - 3372 * 3373 * FSYNC FDSYNC(posix.4) 3374 * -------------------------- 3375 * always@ IATTCHG|IBDWRITE 3376 * 3377 * @ - If we are doing synchronous write the only time we should 3378 * not be sync'ing the ip here is if we have the stickyhack 3379 * activated, the file is marked with the sticky bit and 3380 * no exec bit, the file length has not been changed and 3381 * no new blocks have been allocated during this write. 3382 */ 3383 if ((ip->i_flag & ISYNC) != 0) { 3384 /* 3385 * we have eliminated nosync 3386 */ 3387 if ((ip->i_flag & (IATTCHG|IBDWRITE)) || 3388 ((ioflag & FSYNC) && iupdat_flag)) { 3389 ud_iupdat(ip, 1); 3390 } 3391 } 3392 3393 /* 3394 * If we've already done a partial-write, terminate 3395 * the write but return no error. 3396 */ 3397 if (start_resid != uio->uio_resid) { 3398 error = 0; 3399 } 3400 ip->i_flag &= ~(INOACC | ISYNC); 3401 ITIMES_NOLOCK(ip); 3402 3403 return (error); 3404 } 3405 3406 int32_t 3407 ud_multi_strat(struct ud_inode *ip, 3408 page_t *pp, struct buf *bp, u_offset_t start) 3409 { 3410 daddr_t bn; 3411 int32_t error = 0, io_count, contig, alloc_sz, i; 3412 uint32_t io_off; 3413 mio_master_t *mm = NULL; 3414 mio_slave_t *ms = NULL; 3415 struct buf *rbp; 3416 3417 ASSERT(!(start & PAGEOFFSET)); 3418 3419 /* 3420 * Figure out how many buffers to allocate 3421 */ 3422 io_count = 0; 3423 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) { 3424 contig = 0; 3425 if (error = ud_bmap_read(ip, (u_offset_t)(start + io_off), 3426 &bn, &contig)) { 3427 goto end; 3428 } 3429 if (contig == 0) { 3430 goto end; 3431 } 3432 contig = MIN(contig, PAGESIZE - io_off); 3433 if (bn != UDF_HOLE) { 3434 io_count ++; 3435 } else { 3436 /* 3437 * HOLE 3438 */ 3439 if (bp->b_flags & B_READ) { 3440 3441 /* 3442 * This is a hole and is read 3443 * it should be filled with 0's 3444 */ 3445 pagezero(pp, io_off, contig); 3446 } 3447 } 3448 } 3449 3450 3451 if (io_count != 0) { 3452 3453 /* 3454 * Allocate memory for all the 3455 * required number of buffers 3456 */ 3457 alloc_sz = sizeof (mio_master_t) + 3458 (sizeof (mio_slave_t) * io_count); 3459 mm = (mio_master_t *)kmem_zalloc(alloc_sz, KM_SLEEP); 3460 if (mm == NULL) { 3461 error = ENOMEM; 3462 goto end; 3463 } 3464 3465 /* 3466 * initialize master 3467 */ 3468 mutex_init(&mm->mm_mutex, NULL, MUTEX_DEFAULT, NULL); 3469 mm->mm_size = alloc_sz; 3470 mm->mm_bp = bp; 3471 mm->mm_resid = 0; 3472 mm->mm_error = 0; 3473 mm->mm_index = master_index++; 3474 3475 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t)); 3476 3477 /* 3478 * Initialize buffers 3479 */ 3480 io_count = 0; 3481 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) { 3482 contig = 0; 3483 if (error = ud_bmap_read(ip, 3484 (u_offset_t)(start + io_off), 3485 &bn, &contig)) { 3486 goto end; 3487 } 3488 ASSERT(contig); 3489 if ((io_off + contig) > bp->b_bcount) { 3490 contig = bp->b_bcount - io_off; 3491 } 3492 if (bn != UDF_HOLE) { 3493 /* 3494 * Clone the buffer 3495 * and prepare to start I/O 3496 */ 3497 ms->ms_ptr = mm; 3498 bioinit(&ms->ms_buf); 3499 rbp = bioclone(bp, io_off, (size_t)contig, 3500 bp->b_edev, bn, ud_slave_done, 3501 &ms->ms_buf, KM_NOSLEEP); 3502 ASSERT(rbp == &ms->ms_buf); 3503 mm->mm_resid += contig; 3504 io_count++; 3505 ms ++; 3506 } 3507 } 3508 3509 /* 3510 * Start I/O's 3511 */ 3512 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t)); 3513 for (i = 0; i < io_count; i++) { 3514 (void) bdev_strategy(&ms->ms_buf); 3515 ms ++; 3516 } 3517 } 3518 3519 end: 3520 if (error != 0) { 3521 bp->b_flags |= B_ERROR; 3522 bp->b_error = error; 3523 if (mm != NULL) { 3524 mutex_destroy(&mm->mm_mutex); 3525 kmem_free(mm, mm->mm_size); 3526 } 3527 } 3528 return (error); 3529 } 3530 3531 int32_t 3532 ud_slave_done(struct buf *bp) 3533 { 3534 mio_master_t *mm; 3535 int32_t resid; 3536 3537 ASSERT(SEMA_HELD(&bp->b_sem)); 3538 ASSERT((bp->b_flags & B_DONE) == 0); 3539 3540 mm = ((mio_slave_t *)bp)->ms_ptr; 3541 3542 /* 3543 * Propagate error and byte count info from slave struct to 3544 * the master struct 3545 */ 3546 mutex_enter(&mm->mm_mutex); 3547 if (bp->b_flags & B_ERROR) { 3548 3549 /* 3550 * If multiple slave buffers get 3551 * error we forget the old errors 3552 * this is ok because we any way 3553 * cannot return multiple errors 3554 */ 3555 mm->mm_error = bp->b_error; 3556 } 3557 mm->mm_resid -= bp->b_bcount; 3558 resid = mm->mm_resid; 3559 mutex_exit(&mm->mm_mutex); 3560 3561 /* 3562 * free up the resources allocated to cloned buffers. 3563 */ 3564 bp_mapout(bp); 3565 biofini(bp); 3566 3567 if (resid == 0) { 3568 3569 /* 3570 * This is the last I/O operation 3571 * clean up and return the original buffer 3572 */ 3573 if (mm->mm_error) { 3574 mm->mm_bp->b_flags |= B_ERROR; 3575 mm->mm_bp->b_error = mm->mm_error; 3576 } 3577 biodone(mm->mm_bp); 3578 mutex_destroy(&mm->mm_mutex); 3579 kmem_free(mm, mm->mm_size); 3580 } 3581 return (0); 3582 } 3583