1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 2013, Joyent, Inc. All rights reserved. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/t_lock.h> 32 #include <sys/param.h> 33 #include <sys/time.h> 34 #include <sys/systm.h> 35 #include <sys/sysmacros.h> 36 #include <sys/resource.h> 37 #include <sys/signal.h> 38 #include <sys/cred.h> 39 #include <sys/user.h> 40 #include <sys/buf.h> 41 #include <sys/vfs.h> 42 #include <sys/vfs_opreg.h> 43 #include <sys/stat.h> 44 #include <sys/vnode.h> 45 #include <sys/mode.h> 46 #include <sys/proc.h> 47 #include <sys/disp.h> 48 #include <sys/file.h> 49 #include <sys/fcntl.h> 50 #include <sys/flock.h> 51 #include <sys/kmem.h> 52 #include <sys/uio.h> 53 #include <sys/dnlc.h> 54 #include <sys/conf.h> 55 #include <sys/errno.h> 56 #include <sys/mman.h> 57 #include <sys/fbuf.h> 58 #include <sys/pathname.h> 59 #include <sys/debug.h> 60 #include <sys/vmsystm.h> 61 #include <sys/cmn_err.h> 62 #include <sys/dirent.h> 63 #include <sys/errno.h> 64 #include <sys/modctl.h> 65 #include <sys/statvfs.h> 66 #include <sys/mount.h> 67 #include <sys/sunddi.h> 68 #include <sys/bootconf.h> 69 #include <sys/policy.h> 70 71 #include <vm/hat.h> 72 #include <vm/page.h> 73 #include <vm/pvn.h> 74 #include <vm/as.h> 75 #include <vm/seg.h> 76 #include <vm/seg_map.h> 77 #include <vm/seg_kmem.h> 78 #include <vm/seg_vn.h> 79 #include <vm/rm.h> 80 #include <vm/page.h> 81 #include <sys/swap.h> 82 83 #include <fs/fs_subr.h> 84 85 #include <sys/fs/udf_volume.h> 86 #include <sys/fs/udf_inode.h> 87 88 static int32_t udf_open(struct vnode **, 89 int32_t, struct cred *, caller_context_t *); 90 static int32_t udf_close(struct vnode *, 91 int32_t, int32_t, offset_t, struct cred *, caller_context_t *); 92 static int32_t udf_read(struct vnode *, 93 struct uio *, int32_t, struct cred *, caller_context_t *); 94 static int32_t udf_write(struct vnode *, 95 struct uio *, int32_t, struct cred *, caller_context_t *); 96 static int32_t udf_ioctl(struct vnode *, 97 int32_t, intptr_t, int32_t, struct cred *, int32_t *, 98 caller_context_t *); 99 static int32_t udf_getattr(struct vnode *, 100 struct vattr *, int32_t, struct cred *, caller_context_t *); 101 static int32_t udf_setattr(struct vnode *, 102 struct vattr *, int32_t, struct cred *, caller_context_t *); 103 static int32_t udf_access(struct vnode *, 104 int32_t, int32_t, struct cred *, caller_context_t *); 105 static int32_t udf_lookup(struct vnode *, 106 char *, struct vnode **, struct pathname *, 107 int32_t, struct vnode *, struct cred *, 108 caller_context_t *, int *, pathname_t *); 109 static int32_t udf_create(struct vnode *, 110 char *, struct vattr *, enum vcexcl, 111 int32_t, struct vnode **, struct cred *, int32_t, 112 caller_context_t *, vsecattr_t *); 113 static int32_t udf_remove(struct vnode *, 114 char *, struct cred *, caller_context_t *, int); 115 static int32_t udf_link(struct vnode *, 116 struct vnode *, char *, struct cred *, caller_context_t *, int); 117 static int32_t udf_rename(struct vnode *, 118 char *, struct vnode *, char *, struct cred *, caller_context_t *, int); 119 static int32_t udf_mkdir(struct vnode *, 120 char *, struct vattr *, struct vnode **, struct cred *, 121 caller_context_t *, int, vsecattr_t *); 122 static int32_t udf_rmdir(struct vnode *, 123 char *, struct vnode *, struct cred *, caller_context_t *, int); 124 static int32_t udf_readdir(struct vnode *, 125 struct uio *, struct cred *, int32_t *, caller_context_t *, int); 126 static int32_t udf_symlink(struct vnode *, 127 char *, struct vattr *, char *, struct cred *, caller_context_t *, int); 128 static int32_t udf_readlink(struct vnode *, 129 struct uio *, struct cred *, caller_context_t *); 130 static int32_t udf_fsync(struct vnode *, 131 int32_t, struct cred *, caller_context_t *); 132 static void udf_inactive(struct vnode *, 133 struct cred *, caller_context_t *); 134 static int32_t udf_fid(struct vnode *, struct fid *, caller_context_t *); 135 static int udf_rwlock(struct vnode *, int32_t, caller_context_t *); 136 static void udf_rwunlock(struct vnode *, int32_t, caller_context_t *); 137 static int32_t udf_seek(struct vnode *, offset_t, offset_t *, 138 caller_context_t *); 139 static int32_t udf_frlock(struct vnode *, int32_t, 140 struct flock64 *, int32_t, offset_t, struct flk_callback *, cred_t *, 141 caller_context_t *); 142 static int32_t udf_space(struct vnode *, int32_t, 143 struct flock64 *, int32_t, offset_t, cred_t *, caller_context_t *); 144 static int32_t udf_getpage(struct vnode *, offset_t, 145 size_t, uint32_t *, struct page **, size_t, 146 struct seg *, caddr_t, enum seg_rw, struct cred *, caller_context_t *); 147 static int32_t udf_putpage(struct vnode *, offset_t, 148 size_t, int32_t, struct cred *, caller_context_t *); 149 static int32_t udf_map(struct vnode *, offset_t, struct as *, 150 caddr_t *, size_t, uint8_t, uint8_t, uint32_t, struct cred *, 151 caller_context_t *); 152 static int32_t udf_addmap(struct vnode *, offset_t, struct as *, 153 caddr_t, size_t, uint8_t, uint8_t, uint32_t, struct cred *, 154 caller_context_t *); 155 static int32_t udf_delmap(struct vnode *, offset_t, struct as *, 156 caddr_t, size_t, uint32_t, uint32_t, uint32_t, struct cred *, 157 caller_context_t *); 158 static int32_t udf_l_pathconf(struct vnode *, int32_t, 159 ulong_t *, struct cred *, caller_context_t *); 160 static int32_t udf_pageio(struct vnode *, struct page *, 161 u_offset_t, size_t, int32_t, struct cred *, caller_context_t *); 162 163 int32_t ud_getpage_miss(struct vnode *, u_offset_t, 164 size_t, struct seg *, caddr_t, page_t *pl[], 165 size_t, enum seg_rw, int32_t); 166 void ud_getpage_ra(struct vnode *, u_offset_t, struct seg *, caddr_t); 167 int32_t ud_putpages(struct vnode *, offset_t, size_t, int32_t, struct cred *); 168 int32_t ud_page_fill(struct ud_inode *, page_t *, 169 u_offset_t, uint32_t, u_offset_t *); 170 int32_t ud_iodone(struct buf *); 171 int32_t ud_rdip(struct ud_inode *, struct uio *, int32_t, cred_t *); 172 int32_t ud_wrip(struct ud_inode *, struct uio *, int32_t, cred_t *); 173 int32_t ud_multi_strat(struct ud_inode *, page_t *, struct buf *, u_offset_t); 174 int32_t ud_slave_done(struct buf *); 175 176 /* 177 * Structures to control multiple IO operations to get or put pages 178 * that are backed by discontiguous blocks. The master struct is 179 * a dummy that holds the original bp from pageio_setup. The 180 * slave struct holds the working bp's to do the actual IO. Once 181 * all the slave IOs complete. The master is processed as if a single 182 * IO op has completed. 183 */ 184 uint32_t master_index = 0; 185 typedef struct mio_master { 186 kmutex_t mm_mutex; /* protect the fields below */ 187 int32_t mm_size; 188 buf_t *mm_bp; /* original bp */ 189 int32_t mm_resid; /* bytes remaining to transfer */ 190 int32_t mm_error; /* accumulated error from slaves */ 191 int32_t mm_index; /* XXX debugging */ 192 } mio_master_t; 193 194 typedef struct mio_slave { 195 buf_t ms_buf; /* working buffer for this IO chunk */ 196 mio_master_t *ms_ptr; /* pointer to master */ 197 } mio_slave_t; 198 199 struct vnodeops *udf_vnodeops; 200 201 const fs_operation_def_t udf_vnodeops_template[] = { 202 VOPNAME_OPEN, { .vop_open = udf_open }, 203 VOPNAME_CLOSE, { .vop_close = udf_close }, 204 VOPNAME_READ, { .vop_read = udf_read }, 205 VOPNAME_WRITE, { .vop_write = udf_write }, 206 VOPNAME_IOCTL, { .vop_ioctl = udf_ioctl }, 207 VOPNAME_GETATTR, { .vop_getattr = udf_getattr }, 208 VOPNAME_SETATTR, { .vop_setattr = udf_setattr }, 209 VOPNAME_ACCESS, { .vop_access = udf_access }, 210 VOPNAME_LOOKUP, { .vop_lookup = udf_lookup }, 211 VOPNAME_CREATE, { .vop_create = udf_create }, 212 VOPNAME_REMOVE, { .vop_remove = udf_remove }, 213 VOPNAME_LINK, { .vop_link = udf_link }, 214 VOPNAME_RENAME, { .vop_rename = udf_rename }, 215 VOPNAME_MKDIR, { .vop_mkdir = udf_mkdir }, 216 VOPNAME_RMDIR, { .vop_rmdir = udf_rmdir }, 217 VOPNAME_READDIR, { .vop_readdir = udf_readdir }, 218 VOPNAME_SYMLINK, { .vop_symlink = udf_symlink }, 219 VOPNAME_READLINK, { .vop_readlink = udf_readlink }, 220 VOPNAME_FSYNC, { .vop_fsync = udf_fsync }, 221 VOPNAME_INACTIVE, { .vop_inactive = udf_inactive }, 222 VOPNAME_FID, { .vop_fid = udf_fid }, 223 VOPNAME_RWLOCK, { .vop_rwlock = udf_rwlock }, 224 VOPNAME_RWUNLOCK, { .vop_rwunlock = udf_rwunlock }, 225 VOPNAME_SEEK, { .vop_seek = udf_seek }, 226 VOPNAME_FRLOCK, { .vop_frlock = udf_frlock }, 227 VOPNAME_SPACE, { .vop_space = udf_space }, 228 VOPNAME_GETPAGE, { .vop_getpage = udf_getpage }, 229 VOPNAME_PUTPAGE, { .vop_putpage = udf_putpage }, 230 VOPNAME_MAP, { .vop_map = udf_map }, 231 VOPNAME_ADDMAP, { .vop_addmap = udf_addmap }, 232 VOPNAME_DELMAP, { .vop_delmap = udf_delmap }, 233 VOPNAME_PATHCONF, { .vop_pathconf = udf_l_pathconf }, 234 VOPNAME_PAGEIO, { .vop_pageio = udf_pageio }, 235 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 236 NULL, NULL 237 }; 238 239 /* ARGSUSED */ 240 static int32_t 241 udf_open( 242 struct vnode **vpp, 243 int32_t flag, 244 struct cred *cr, 245 caller_context_t *ct) 246 { 247 ud_printf("udf_open\n"); 248 249 return (0); 250 } 251 252 /* ARGSUSED */ 253 static int32_t 254 udf_close( 255 struct vnode *vp, 256 int32_t flag, 257 int32_t count, 258 offset_t offset, 259 struct cred *cr, 260 caller_context_t *ct) 261 { 262 struct ud_inode *ip = VTOI(vp); 263 264 ud_printf("udf_close\n"); 265 266 ITIMES(ip); 267 268 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 269 cleanshares(vp, ttoproc(curthread)->p_pid); 270 271 /* 272 * Push partially filled cluster at last close. 273 * ``last close'' is approximated because the dnlc 274 * may have a hold on the vnode. 275 */ 276 if (vp->v_count <= 2 && vp->v_type != VBAD) { 277 struct ud_inode *ip = VTOI(vp); 278 if (ip->i_delaylen) { 279 (void) ud_putpages(vp, ip->i_delayoff, ip->i_delaylen, 280 B_ASYNC | B_FREE, cr); 281 ip->i_delaylen = 0; 282 } 283 } 284 285 return (0); 286 } 287 288 /* ARGSUSED */ 289 static int32_t 290 udf_read( 291 struct vnode *vp, 292 struct uio *uiop, 293 int32_t ioflag, 294 struct cred *cr, 295 caller_context_t *ct) 296 { 297 struct ud_inode *ip = VTOI(vp); 298 int32_t error; 299 300 ud_printf("udf_read\n"); 301 302 #ifdef __lock_lint 303 rw_enter(&ip->i_rwlock, RW_READER); 304 #endif 305 306 ASSERT(RW_READ_HELD(&ip->i_rwlock)); 307 308 if (MANDLOCK(vp, ip->i_char)) { 309 /* 310 * udf_getattr ends up being called by chklock 311 */ 312 error = chklock(vp, FREAD, uiop->uio_loffset, 313 uiop->uio_resid, uiop->uio_fmode, ct); 314 if (error) { 315 goto end; 316 } 317 } 318 319 rw_enter(&ip->i_contents, RW_READER); 320 error = ud_rdip(ip, uiop, ioflag, cr); 321 rw_exit(&ip->i_contents); 322 323 end: 324 #ifdef __lock_lint 325 rw_exit(&ip->i_rwlock); 326 #endif 327 328 return (error); 329 } 330 331 332 int32_t ud_WRITES = 1; 333 int32_t ud_HW = 96 * 1024; 334 int32_t ud_LW = 64 * 1024; 335 int32_t ud_throttles = 0; 336 337 /* ARGSUSED */ 338 static int32_t 339 udf_write( 340 struct vnode *vp, 341 struct uio *uiop, 342 int32_t ioflag, 343 struct cred *cr, 344 caller_context_t *ct) 345 { 346 struct ud_inode *ip = VTOI(vp); 347 int32_t error = 0; 348 349 ud_printf("udf_write\n"); 350 351 #ifdef __lock_lint 352 rw_enter(&ip->i_rwlock, RW_WRITER); 353 #endif 354 355 ASSERT(RW_WRITE_HELD(&ip->i_rwlock)); 356 357 if (MANDLOCK(vp, ip->i_char)) { 358 /* 359 * ud_getattr ends up being called by chklock 360 */ 361 error = chklock(vp, FWRITE, uiop->uio_loffset, 362 uiop->uio_resid, uiop->uio_fmode, ct); 363 if (error) { 364 goto end; 365 } 366 } 367 /* 368 * Throttle writes. 369 */ 370 mutex_enter(&ip->i_tlock); 371 if (ud_WRITES && (ip->i_writes > ud_HW)) { 372 while (ip->i_writes > ud_HW) { 373 ud_throttles++; 374 cv_wait(&ip->i_wrcv, &ip->i_tlock); 375 } 376 } 377 mutex_exit(&ip->i_tlock); 378 379 /* 380 * Write to the file 381 */ 382 rw_enter(&ip->i_contents, RW_WRITER); 383 if ((ioflag & FAPPEND) != 0 && (ip->i_type == VREG)) { 384 /* 385 * In append mode start at end of file. 386 */ 387 uiop->uio_loffset = ip->i_size; 388 } 389 error = ud_wrip(ip, uiop, ioflag, cr); 390 rw_exit(&ip->i_contents); 391 392 end: 393 #ifdef __lock_lint 394 rw_exit(&ip->i_rwlock); 395 #endif 396 397 return (error); 398 } 399 400 /* ARGSUSED */ 401 static int32_t 402 udf_ioctl( 403 struct vnode *vp, 404 int32_t cmd, 405 intptr_t arg, 406 int32_t flag, 407 struct cred *cr, 408 int32_t *rvalp, 409 caller_context_t *ct) 410 { 411 return (ENOTTY); 412 } 413 414 /* ARGSUSED */ 415 static int32_t 416 udf_getattr( 417 struct vnode *vp, 418 struct vattr *vap, 419 int32_t flags, 420 struct cred *cr, 421 caller_context_t *ct) 422 { 423 struct ud_inode *ip = VTOI(vp); 424 425 ud_printf("udf_getattr\n"); 426 427 if (vap->va_mask == AT_SIZE) { 428 /* 429 * for performance, if only the size is requested don't bother 430 * with anything else. 431 */ 432 vap->va_size = ip->i_size; 433 return (0); 434 } 435 436 rw_enter(&ip->i_contents, RW_READER); 437 438 vap->va_type = vp->v_type; 439 vap->va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char; 440 441 vap->va_uid = ip->i_uid; 442 vap->va_gid = ip->i_gid; 443 vap->va_fsid = ip->i_dev; 444 vap->va_nodeid = ip->i_icb_lbano; 445 vap->va_nlink = ip->i_nlink; 446 vap->va_size = ip->i_size; 447 vap->va_seq = ip->i_seq; 448 if (vp->v_type == VCHR || vp->v_type == VBLK) { 449 vap->va_rdev = ip->i_rdev; 450 } else { 451 vap->va_rdev = 0; 452 } 453 454 mutex_enter(&ip->i_tlock); 455 ITIMES_NOLOCK(ip); /* mark correct time in inode */ 456 vap->va_atime.tv_sec = (time_t)ip->i_atime.tv_sec; 457 vap->va_atime.tv_nsec = ip->i_atime.tv_nsec; 458 vap->va_mtime.tv_sec = (time_t)ip->i_mtime.tv_sec; 459 vap->va_mtime.tv_nsec = ip->i_mtime.tv_nsec; 460 vap->va_ctime.tv_sec = (time_t)ip->i_ctime.tv_sec; 461 vap->va_ctime.tv_nsec = ip->i_ctime.tv_nsec; 462 mutex_exit(&ip->i_tlock); 463 464 switch (ip->i_type) { 465 case VBLK: 466 vap->va_blksize = MAXBSIZE; 467 break; 468 case VCHR: 469 vap->va_blksize = MAXBSIZE; 470 break; 471 default: 472 vap->va_blksize = ip->i_udf->udf_lbsize; 473 break; 474 } 475 vap->va_nblocks = ip->i_lbr << ip->i_udf->udf_l2d_shift; 476 477 rw_exit(&ip->i_contents); 478 479 return (0); 480 } 481 482 static int 483 ud_iaccess_vmode(void *ip, int mode, struct cred *cr) 484 { 485 return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 0)); 486 } 487 488 /*ARGSUSED4*/ 489 static int32_t 490 udf_setattr( 491 struct vnode *vp, 492 struct vattr *vap, 493 int32_t flags, 494 struct cred *cr, 495 caller_context_t *ct) 496 { 497 int32_t error = 0; 498 uint32_t mask = vap->va_mask; 499 struct ud_inode *ip; 500 timestruc_t now; 501 struct vattr ovap; 502 503 ud_printf("udf_setattr\n"); 504 505 ip = VTOI(vp); 506 507 /* 508 * not updates allowed to 4096 files 509 */ 510 if (ip->i_astrat == STRAT_TYPE4096) { 511 return (EINVAL); 512 } 513 514 /* 515 * Cannot set these attributes 516 */ 517 if (mask & AT_NOSET) { 518 return (EINVAL); 519 } 520 521 rw_enter(&ip->i_rwlock, RW_WRITER); 522 rw_enter(&ip->i_contents, RW_WRITER); 523 524 ovap.va_uid = ip->i_uid; 525 ovap.va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char; 526 error = secpolicy_vnode_setattr(cr, vp, vap, &ovap, flags, 527 ud_iaccess_vmode, ip); 528 if (error) 529 goto update_inode; 530 531 mask = vap->va_mask; 532 /* 533 * Change file access modes. 534 */ 535 if (mask & AT_MODE) { 536 ip->i_perm = VA2UD_PERM(vap->va_mode); 537 ip->i_char = vap->va_mode & (VSUID | VSGID | VSVTX); 538 mutex_enter(&ip->i_tlock); 539 ip->i_flag |= ICHG; 540 mutex_exit(&ip->i_tlock); 541 } 542 if (mask & (AT_UID|AT_GID)) { 543 if (mask & AT_UID) { 544 ip->i_uid = vap->va_uid; 545 } 546 if (mask & AT_GID) { 547 ip->i_gid = vap->va_gid; 548 } 549 mutex_enter(&ip->i_tlock); 550 ip->i_flag |= ICHG; 551 mutex_exit(&ip->i_tlock); 552 } 553 /* 554 * Truncate file. Must have write permission and not be a directory. 555 */ 556 if (mask & AT_SIZE) { 557 if (vp->v_type == VDIR) { 558 error = EISDIR; 559 goto update_inode; 560 } 561 if (error = ud_iaccess(ip, IWRITE, cr, 0)) { 562 goto update_inode; 563 } 564 if (vap->va_size > MAXOFFSET_T) { 565 error = EFBIG; 566 goto update_inode; 567 } 568 if (error = ud_itrunc(ip, vap->va_size, 0, cr)) { 569 goto update_inode; 570 } 571 572 if (vap->va_size == 0) 573 vnevent_truncate(vp, ct); 574 } 575 /* 576 * Change file access or modified times. 577 */ 578 if (mask & (AT_ATIME|AT_MTIME)) { 579 mutex_enter(&ip->i_tlock); 580 if (mask & AT_ATIME) { 581 ip->i_atime.tv_sec = vap->va_atime.tv_sec; 582 ip->i_atime.tv_nsec = vap->va_atime.tv_nsec; 583 ip->i_flag &= ~IACC; 584 } 585 if (mask & AT_MTIME) { 586 ip->i_mtime.tv_sec = vap->va_mtime.tv_sec; 587 ip->i_mtime.tv_nsec = vap->va_mtime.tv_nsec; 588 gethrestime(&now); 589 ip->i_ctime.tv_sec = now.tv_sec; 590 ip->i_ctime.tv_nsec = now.tv_nsec; 591 ip->i_flag &= ~(IUPD|ICHG); 592 ip->i_flag |= IMODTIME; 593 } 594 ip->i_flag |= IMOD; 595 mutex_exit(&ip->i_tlock); 596 } 597 598 update_inode: 599 if (curthread->t_flag & T_DONTPEND) { 600 ud_iupdat(ip, 1); 601 } else { 602 ITIMES_NOLOCK(ip); 603 } 604 rw_exit(&ip->i_contents); 605 rw_exit(&ip->i_rwlock); 606 607 return (error); 608 } 609 610 /* ARGSUSED */ 611 static int32_t 612 udf_access( 613 struct vnode *vp, 614 int32_t mode, 615 int32_t flags, 616 struct cred *cr, 617 caller_context_t *ct) 618 { 619 struct ud_inode *ip = VTOI(vp); 620 621 ud_printf("udf_access\n"); 622 623 if (ip->i_udf == NULL) { 624 return (EIO); 625 } 626 627 return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 1)); 628 } 629 630 int32_t udfs_stickyhack = 1; 631 632 /* ARGSUSED */ 633 static int32_t 634 udf_lookup( 635 struct vnode *dvp, 636 char *nm, 637 struct vnode **vpp, 638 struct pathname *pnp, 639 int32_t flags, 640 struct vnode *rdir, 641 struct cred *cr, 642 caller_context_t *ct, 643 int *direntflags, 644 pathname_t *realpnp) 645 { 646 int32_t error; 647 struct vnode *vp; 648 struct ud_inode *ip, *xip; 649 650 ud_printf("udf_lookup\n"); 651 /* 652 * Null component name is a synonym for directory being searched. 653 */ 654 if (*nm == '\0') { 655 VN_HOLD(dvp); 656 *vpp = dvp; 657 error = 0; 658 goto out; 659 } 660 661 /* 662 * Fast path: Check the directory name lookup cache. 663 */ 664 ip = VTOI(dvp); 665 if (vp = dnlc_lookup(dvp, nm)) { 666 /* 667 * Check accessibility of directory. 668 */ 669 if ((error = ud_iaccess(ip, IEXEC, cr, 1)) != 0) { 670 VN_RELE(vp); 671 } 672 xip = VTOI(vp); 673 } else { 674 error = ud_dirlook(ip, nm, &xip, cr, 1); 675 ITIMES(ip); 676 } 677 678 if (error == 0) { 679 ip = xip; 680 *vpp = ITOV(ip); 681 if ((ip->i_type != VDIR) && 682 (ip->i_char & ISVTX) && 683 ((ip->i_perm & IEXEC) == 0) && 684 udfs_stickyhack) { 685 mutex_enter(&(*vpp)->v_lock); 686 (*vpp)->v_flag |= VISSWAP; 687 mutex_exit(&(*vpp)->v_lock); 688 } 689 ITIMES(ip); 690 /* 691 * If vnode is a device return special vnode instead. 692 */ 693 if (IS_DEVVP(*vpp)) { 694 struct vnode *newvp; 695 newvp = specvp(*vpp, (*vpp)->v_rdev, 696 (*vpp)->v_type, cr); 697 VN_RELE(*vpp); 698 if (newvp == NULL) { 699 error = ENOSYS; 700 } else { 701 *vpp = newvp; 702 } 703 } 704 } 705 out: 706 return (error); 707 } 708 709 /* ARGSUSED */ 710 static int32_t 711 udf_create( 712 struct vnode *dvp, 713 char *name, 714 struct vattr *vap, 715 enum vcexcl excl, 716 int32_t mode, 717 struct vnode **vpp, 718 struct cred *cr, 719 int32_t flag, 720 caller_context_t *ct, 721 vsecattr_t *vsecp) 722 { 723 int32_t error; 724 struct ud_inode *ip = VTOI(dvp), *xip; 725 726 ud_printf("udf_create\n"); 727 728 if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0) 729 vap->va_mode &= ~VSVTX; 730 731 if (*name == '\0') { 732 /* 733 * Null component name refers to the directory itself. 734 */ 735 VN_HOLD(dvp); 736 ITIMES(ip); 737 error = EEXIST; 738 } else { 739 xip = NULL; 740 rw_enter(&ip->i_rwlock, RW_WRITER); 741 error = ud_direnter(ip, name, DE_CREATE, 742 (struct ud_inode *)0, (struct ud_inode *)0, 743 vap, &xip, cr, ct); 744 rw_exit(&ip->i_rwlock); 745 ITIMES(ip); 746 ip = xip; 747 } 748 #ifdef __lock_lint 749 rw_enter(&ip->i_contents, RW_WRITER); 750 #else 751 if (ip != NULL) { 752 rw_enter(&ip->i_contents, RW_WRITER); 753 } 754 #endif 755 756 /* 757 * If the file already exists and this is a non-exclusive create, 758 * check permissions and allow access for non-directories. 759 * Read-only create of an existing directory is also allowed. 760 * We fail an exclusive create of anything which already exists. 761 */ 762 if (error == EEXIST) { 763 if (excl == NONEXCL) { 764 if ((ip->i_type == VDIR) && (mode & VWRITE)) { 765 error = EISDIR; 766 } else if (mode) { 767 error = ud_iaccess(ip, 768 UD_UPERM2DPERM(mode), cr, 0); 769 } else { 770 error = 0; 771 } 772 } 773 if (error) { 774 rw_exit(&ip->i_contents); 775 VN_RELE(ITOV(ip)); 776 goto out; 777 } else if ((ip->i_type == VREG) && 778 (vap->va_mask & AT_SIZE) && vap->va_size == 0) { 779 /* 780 * Truncate regular files, if requested by caller. 781 * Grab i_rwlock to make sure no one else is 782 * currently writing to the file (we promised 783 * bmap we would do this). 784 * Must get the locks in the correct order. 785 */ 786 if (ip->i_size == 0) { 787 ip->i_flag |= ICHG | IUPD; 788 } else { 789 rw_exit(&ip->i_contents); 790 rw_enter(&ip->i_rwlock, RW_WRITER); 791 rw_enter(&ip->i_contents, RW_WRITER); 792 (void) ud_itrunc(ip, 0, 0, cr); 793 rw_exit(&ip->i_rwlock); 794 } 795 vnevent_create(ITOV(ip), ct); 796 } 797 } 798 799 if (error == 0) { 800 *vpp = ITOV(ip); 801 ITIMES(ip); 802 } 803 #ifdef __lock_lint 804 rw_exit(&ip->i_contents); 805 #else 806 if (ip != NULL) { 807 rw_exit(&ip->i_contents); 808 } 809 #endif 810 if (error) { 811 goto out; 812 } 813 814 /* 815 * If vnode is a device return special vnode instead. 816 */ 817 if (!error && IS_DEVVP(*vpp)) { 818 struct vnode *newvp; 819 820 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 821 VN_RELE(*vpp); 822 if (newvp == NULL) { 823 error = ENOSYS; 824 goto out; 825 } 826 *vpp = newvp; 827 } 828 out: 829 return (error); 830 } 831 832 /* ARGSUSED */ 833 static int32_t 834 udf_remove( 835 struct vnode *vp, 836 char *nm, 837 struct cred *cr, 838 caller_context_t *ct, 839 int flags) 840 { 841 int32_t error; 842 struct ud_inode *ip = VTOI(vp); 843 844 ud_printf("udf_remove\n"); 845 846 rw_enter(&ip->i_rwlock, RW_WRITER); 847 error = ud_dirremove(ip, nm, 848 (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr, ct); 849 rw_exit(&ip->i_rwlock); 850 ITIMES(ip); 851 852 return (error); 853 } 854 855 /* ARGSUSED */ 856 static int32_t 857 udf_link( 858 struct vnode *tdvp, 859 struct vnode *svp, 860 char *tnm, 861 struct cred *cr, 862 caller_context_t *ct, 863 int flags) 864 { 865 int32_t error; 866 struct vnode *realvp; 867 struct ud_inode *sip; 868 struct ud_inode *tdp; 869 870 ud_printf("udf_link\n"); 871 if (VOP_REALVP(svp, &realvp, ct) == 0) { 872 svp = realvp; 873 } 874 875 /* 876 * Do not allow links to directories 877 */ 878 if (svp->v_type == VDIR) { 879 return (EPERM); 880 } 881 882 sip = VTOI(svp); 883 884 if (sip->i_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0) 885 return (EPERM); 886 887 tdp = VTOI(tdvp); 888 889 rw_enter(&tdp->i_rwlock, RW_WRITER); 890 error = ud_direnter(tdp, tnm, DE_LINK, (struct ud_inode *)0, 891 sip, (struct vattr *)0, (struct ud_inode **)0, cr, ct); 892 rw_exit(&tdp->i_rwlock); 893 ITIMES(sip); 894 ITIMES(tdp); 895 896 if (error == 0) { 897 vnevent_link(svp, ct); 898 } 899 900 return (error); 901 } 902 903 /* ARGSUSED */ 904 static int32_t 905 udf_rename( 906 struct vnode *sdvp, 907 char *snm, 908 struct vnode *tdvp, 909 char *tnm, 910 struct cred *cr, 911 caller_context_t *ct, 912 int flags) 913 { 914 int32_t error = 0; 915 struct udf_vfs *udf_vfsp; 916 struct ud_inode *sip; /* source inode */ 917 struct ud_inode *sdp, *tdp; /* source and target parent inode */ 918 struct vnode *realvp; 919 920 ud_printf("udf_rename\n"); 921 922 if (VOP_REALVP(tdvp, &realvp, ct) == 0) { 923 tdvp = realvp; 924 } 925 926 sdp = VTOI(sdvp); 927 tdp = VTOI(tdvp); 928 929 udf_vfsp = sdp->i_udf; 930 931 mutex_enter(&udf_vfsp->udf_rename_lck); 932 /* 933 * Look up inode of file we're supposed to rename. 934 */ 935 if (error = ud_dirlook(sdp, snm, &sip, cr, 0)) { 936 mutex_exit(&udf_vfsp->udf_rename_lck); 937 return (error); 938 } 939 /* 940 * be sure this is not a directory with another file system mounted 941 * over it. If it is just give up the locks, and return with 942 * EBUSY 943 */ 944 if (vn_mountedvfs(ITOV(sip)) != NULL) { 945 error = EBUSY; 946 goto errout; 947 } 948 /* 949 * Make sure we can delete the source entry. This requires 950 * write permission on the containing directory. If that 951 * directory is "sticky" it further requires (except for 952 * privileged users) that the user own the directory or the 953 * source entry, or else have permission to write the source 954 * entry. 955 */ 956 rw_enter(&sdp->i_contents, RW_READER); 957 rw_enter(&sip->i_contents, RW_READER); 958 if ((error = ud_iaccess(sdp, IWRITE, cr, 0)) != 0 || 959 (error = ud_sticky_remove_access(sdp, sip, cr)) != 0) { 960 rw_exit(&sip->i_contents); 961 rw_exit(&sdp->i_contents); 962 ITIMES(sip); 963 goto errout; 964 } 965 966 /* 967 * Check for renaming '.' or '..' or alias of '.' 968 */ 969 if ((strcmp(snm, ".") == 0) || 970 (strcmp(snm, "..") == 0) || 971 (sdp == sip)) { 972 error = EINVAL; 973 rw_exit(&sip->i_contents); 974 rw_exit(&sdp->i_contents); 975 goto errout; 976 } 977 rw_exit(&sip->i_contents); 978 rw_exit(&sdp->i_contents); 979 980 981 /* 982 * Link source to the target. 983 */ 984 rw_enter(&tdp->i_rwlock, RW_WRITER); 985 if (error = ud_direnter(tdp, tnm, DE_RENAME, sdp, sip, 986 (struct vattr *)0, (struct ud_inode **)0, cr, ct)) { 987 /* 988 * ESAME isn't really an error; it indicates that the 989 * operation should not be done because the source and target 990 * are the same file, but that no error should be reported. 991 */ 992 if (error == ESAME) { 993 error = 0; 994 } 995 rw_exit(&tdp->i_rwlock); 996 goto errout; 997 } 998 vnevent_rename_src(ITOV(sip), sdvp, snm, ct); 999 rw_exit(&tdp->i_rwlock); 1000 1001 rw_enter(&sdp->i_rwlock, RW_WRITER); 1002 /* 1003 * Unlink the source. 1004 * Remove the source entry. ud_dirremove() checks that the entry 1005 * still reflects sip, and returns an error if it doesn't. 1006 * If the entry has changed just forget about it. Release 1007 * the source inode. 1008 */ 1009 if ((error = ud_dirremove(sdp, snm, sip, (struct vnode *)0, 1010 DR_RENAME, cr, ct)) == ENOENT) { 1011 error = 0; 1012 } 1013 rw_exit(&sdp->i_rwlock); 1014 errout: 1015 ITIMES(sdp); 1016 ITIMES(tdp); 1017 VN_RELE(ITOV(sip)); 1018 mutex_exit(&udf_vfsp->udf_rename_lck); 1019 1020 return (error); 1021 } 1022 1023 /* ARGSUSED */ 1024 static int32_t 1025 udf_mkdir( 1026 struct vnode *dvp, 1027 char *dirname, 1028 struct vattr *vap, 1029 struct vnode **vpp, 1030 struct cred *cr, 1031 caller_context_t *ct, 1032 int flags, 1033 vsecattr_t *vsecp) 1034 { 1035 int32_t error; 1036 struct ud_inode *ip; 1037 struct ud_inode *xip; 1038 1039 ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); 1040 1041 ud_printf("udf_mkdir\n"); 1042 1043 ip = VTOI(dvp); 1044 rw_enter(&ip->i_rwlock, RW_WRITER); 1045 error = ud_direnter(ip, dirname, DE_MKDIR, 1046 (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr, ct); 1047 rw_exit(&ip->i_rwlock); 1048 ITIMES(ip); 1049 if (error == 0) { 1050 ip = xip; 1051 *vpp = ITOV(ip); 1052 ITIMES(ip); 1053 } else if (error == EEXIST) { 1054 ITIMES(xip); 1055 VN_RELE(ITOV(xip)); 1056 } 1057 1058 return (error); 1059 } 1060 1061 /* ARGSUSED */ 1062 static int32_t 1063 udf_rmdir( 1064 struct vnode *vp, 1065 char *nm, 1066 struct vnode *cdir, 1067 struct cred *cr, 1068 caller_context_t *ct, 1069 int flags) 1070 { 1071 int32_t error; 1072 struct ud_inode *ip = VTOI(vp); 1073 1074 ud_printf("udf_rmdir\n"); 1075 1076 rw_enter(&ip->i_rwlock, RW_WRITER); 1077 error = ud_dirremove(ip, nm, (struct ud_inode *)0, cdir, DR_RMDIR, 1078 cr, ct); 1079 rw_exit(&ip->i_rwlock); 1080 ITIMES(ip); 1081 1082 return (error); 1083 } 1084 1085 /* ARGSUSED */ 1086 static int32_t 1087 udf_readdir( 1088 struct vnode *vp, 1089 struct uio *uiop, 1090 struct cred *cr, 1091 int32_t *eofp, 1092 caller_context_t *ct, 1093 int flags) 1094 { 1095 struct ud_inode *ip; 1096 struct dirent64 *nd; 1097 struct udf_vfs *udf_vfsp; 1098 int32_t error = 0, len, outcount = 0; 1099 uint32_t dirsiz, offset; 1100 uint32_t bufsize, ndlen, dummy; 1101 caddr_t outbuf; 1102 caddr_t outb, end_outb; 1103 struct iovec *iovp; 1104 1105 uint8_t *dname; 1106 int32_t length; 1107 1108 uint8_t *buf = NULL; 1109 1110 struct fbuf *fbp = NULL; 1111 struct file_id *fid; 1112 uint8_t *name; 1113 1114 1115 ud_printf("udf_readdir\n"); 1116 1117 ip = VTOI(vp); 1118 udf_vfsp = ip->i_udf; 1119 1120 dirsiz = ip->i_size; 1121 if ((uiop->uio_offset >= dirsiz) || 1122 (ip->i_nlink <= 0)) { 1123 if (eofp) { 1124 *eofp = 1; 1125 } 1126 return (0); 1127 } 1128 1129 offset = uiop->uio_offset; 1130 iovp = uiop->uio_iov; 1131 bufsize = iovp->iov_len; 1132 1133 outb = outbuf = (char *)kmem_alloc((uint32_t)bufsize, KM_SLEEP); 1134 end_outb = outb + bufsize; 1135 nd = (struct dirent64 *)outbuf; 1136 1137 dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP); 1138 buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP); 1139 1140 if (offset == 0) { 1141 len = DIRENT64_RECLEN(1); 1142 if (((caddr_t)nd + len) >= end_outb) { 1143 error = EINVAL; 1144 goto end; 1145 } 1146 nd->d_ino = ip->i_icb_lbano; 1147 nd->d_reclen = (uint16_t)len; 1148 nd->d_off = 0x10; 1149 nd->d_name[0] = '.'; 1150 bzero(&nd->d_name[1], DIRENT64_NAMELEN(len) - 1); 1151 nd = (struct dirent64 *)((char *)nd + nd->d_reclen); 1152 outcount++; 1153 } else if (offset == 0x10) { 1154 offset = 0; 1155 } 1156 1157 while (offset < dirsiz) { 1158 error = ud_get_next_fid(ip, &fbp, 1159 offset, &fid, &name, buf); 1160 if (error != 0) { 1161 break; 1162 } 1163 1164 if ((fid->fid_flags & FID_DELETED) == 0) { 1165 if (fid->fid_flags & FID_PARENT) { 1166 1167 len = DIRENT64_RECLEN(2); 1168 if (((caddr_t)nd + len) >= end_outb) { 1169 error = EINVAL; 1170 break; 1171 } 1172 1173 nd->d_ino = ip->i_icb_lbano; 1174 nd->d_reclen = (uint16_t)len; 1175 nd->d_off = offset + FID_LEN(fid); 1176 nd->d_name[0] = '.'; 1177 nd->d_name[1] = '.'; 1178 bzero(&nd->d_name[2], 1179 DIRENT64_NAMELEN(len) - 2); 1180 nd = (struct dirent64 *) 1181 ((char *)nd + nd->d_reclen); 1182 } else { 1183 if ((error = ud_uncompress(fid->fid_idlen, 1184 &length, name, dname)) != 0) { 1185 break; 1186 } 1187 if (length == 0) { 1188 offset += FID_LEN(fid); 1189 continue; 1190 } 1191 len = DIRENT64_RECLEN(length); 1192 if (((caddr_t)nd + len) >= end_outb) { 1193 if (!outcount) { 1194 error = EINVAL; 1195 } 1196 break; 1197 } 1198 (void) strncpy(nd->d_name, 1199 (caddr_t)dname, length); 1200 bzero(&nd->d_name[length], 1201 DIRENT64_NAMELEN(len) - length); 1202 nd->d_ino = ud_xlate_to_daddr(udf_vfsp, 1203 SWAP_16(fid->fid_icb.lad_ext_prn), 1204 SWAP_32(fid->fid_icb.lad_ext_loc), 1, 1205 &dummy); 1206 nd->d_reclen = (uint16_t)len; 1207 nd->d_off = offset + FID_LEN(fid); 1208 nd = (struct dirent64 *) 1209 ((char *)nd + nd->d_reclen); 1210 } 1211 outcount++; 1212 } 1213 1214 offset += FID_LEN(fid); 1215 } 1216 1217 end: 1218 if (fbp != NULL) { 1219 fbrelse(fbp, S_OTHER); 1220 } 1221 ndlen = ((char *)nd - outbuf); 1222 /* 1223 * In case of error do not call uiomove. 1224 * Return the error to the caller. 1225 */ 1226 if ((error == 0) && (ndlen != 0)) { 1227 error = uiomove(outbuf, (long)ndlen, UIO_READ, uiop); 1228 uiop->uio_offset = offset; 1229 } 1230 kmem_free((caddr_t)buf, udf_vfsp->udf_lbsize); 1231 kmem_free((caddr_t)dname, 1024); 1232 kmem_free(outbuf, (uint32_t)bufsize); 1233 if (eofp && error == 0) { 1234 *eofp = (uiop->uio_offset >= dirsiz); 1235 } 1236 return (error); 1237 } 1238 1239 /* ARGSUSED */ 1240 static int32_t 1241 udf_symlink( 1242 struct vnode *dvp, 1243 char *linkname, 1244 struct vattr *vap, 1245 char *target, 1246 struct cred *cr, 1247 caller_context_t *ct, 1248 int flags) 1249 { 1250 int32_t error = 0, outlen; 1251 uint32_t ioflag = 0; 1252 struct ud_inode *ip, *dip = VTOI(dvp); 1253 1254 struct path_comp *pc; 1255 int8_t *dname = NULL, *uname = NULL, *sp; 1256 1257 ud_printf("udf_symlink\n"); 1258 1259 ip = (struct ud_inode *)0; 1260 vap->va_type = VLNK; 1261 vap->va_rdev = 0; 1262 1263 rw_enter(&dip->i_rwlock, RW_WRITER); 1264 error = ud_direnter(dip, linkname, DE_CREATE, 1265 (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr, ct); 1266 rw_exit(&dip->i_rwlock); 1267 if (error == 0) { 1268 dname = kmem_zalloc(1024, KM_SLEEP); 1269 uname = kmem_zalloc(PAGESIZE, KM_SLEEP); 1270 1271 pc = (struct path_comp *)uname; 1272 /* 1273 * If the first character in target is "/" 1274 * then skip it and create entry for it 1275 */ 1276 if (*target == '/') { 1277 pc->pc_type = 2; 1278 pc->pc_len = 0; 1279 pc = (struct path_comp *)(((char *)pc) + 4); 1280 while (*target == '/') { 1281 target++; 1282 } 1283 } 1284 1285 while (*target != NULL) { 1286 sp = target; 1287 while ((*target != '/') && (*target != '\0')) { 1288 target ++; 1289 } 1290 /* 1291 * We got the next component of the 1292 * path name. Create path_comp of 1293 * appropriate type 1294 */ 1295 if (((target - sp) == 1) && (*sp == '.')) { 1296 /* 1297 * Dot entry. 1298 */ 1299 pc->pc_type = 4; 1300 pc = (struct path_comp *)(((char *)pc) + 4); 1301 } else if (((target - sp) == 2) && 1302 (*sp == '.') && ((*(sp + 1)) == '.')) { 1303 /* 1304 * DotDot entry. 1305 */ 1306 pc->pc_type = 3; 1307 pc = (struct path_comp *)(((char *)pc) + 4); 1308 } else { 1309 /* 1310 * convert the user given name 1311 * into appropriate form to be put 1312 * on the media 1313 */ 1314 outlen = 1024; /* set to size of dname */ 1315 if (error = ud_compress(target - sp, &outlen, 1316 (uint8_t *)sp, (uint8_t *)dname)) { 1317 break; 1318 } 1319 pc->pc_type = 5; 1320 /* LINTED */ 1321 pc->pc_len = outlen; 1322 dname[outlen] = '\0'; 1323 (void) strcpy((char *)pc->pc_id, dname); 1324 pc = (struct path_comp *) 1325 (((char *)pc) + 4 + outlen); 1326 } 1327 while (*target == '/') { 1328 target++; 1329 } 1330 if (*target == NULL) { 1331 break; 1332 } 1333 } 1334 1335 rw_enter(&ip->i_contents, RW_WRITER); 1336 if (error == 0) { 1337 ioflag = FWRITE; 1338 if (curthread->t_flag & T_DONTPEND) { 1339 ioflag |= FDSYNC; 1340 } 1341 error = ud_rdwri(UIO_WRITE, ioflag, ip, 1342 uname, ((int8_t *)pc) - uname, 1343 (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr); 1344 } 1345 if (error) { 1346 ud_idrop(ip); 1347 rw_exit(&ip->i_contents); 1348 rw_enter(&dip->i_rwlock, RW_WRITER); 1349 (void) ud_dirremove(dip, linkname, (struct ud_inode *)0, 1350 (struct vnode *)0, DR_REMOVE, cr, ct); 1351 rw_exit(&dip->i_rwlock); 1352 goto update_inode; 1353 } 1354 rw_exit(&ip->i_contents); 1355 } 1356 1357 if ((error == 0) || (error == EEXIST)) { 1358 VN_RELE(ITOV(ip)); 1359 } 1360 1361 update_inode: 1362 ITIMES(VTOI(dvp)); 1363 if (uname != NULL) { 1364 kmem_free(uname, PAGESIZE); 1365 } 1366 if (dname != NULL) { 1367 kmem_free(dname, 1024); 1368 } 1369 1370 return (error); 1371 } 1372 1373 /* ARGSUSED */ 1374 static int32_t 1375 udf_readlink( 1376 struct vnode *vp, 1377 struct uio *uiop, 1378 struct cred *cr, 1379 caller_context_t *ct) 1380 { 1381 int32_t error = 0, off, id_len, size, len; 1382 int8_t *dname = NULL, *uname = NULL; 1383 struct ud_inode *ip; 1384 struct fbuf *fbp = NULL; 1385 struct path_comp *pc; 1386 1387 ud_printf("udf_readlink\n"); 1388 1389 if (vp->v_type != VLNK) { 1390 return (EINVAL); 1391 } 1392 1393 ip = VTOI(vp); 1394 size = ip->i_size; 1395 if (size > PAGESIZE) { 1396 return (EIO); 1397 } 1398 1399 if (size == 0) { 1400 return (0); 1401 } 1402 1403 dname = kmem_zalloc(1024, KM_SLEEP); 1404 uname = kmem_zalloc(PAGESIZE, KM_SLEEP); 1405 1406 rw_enter(&ip->i_contents, RW_READER); 1407 1408 if ((error = fbread(vp, 0, size, S_READ, &fbp)) != 0) { 1409 goto end; 1410 } 1411 1412 off = 0; 1413 1414 while (off < size) { 1415 pc = (struct path_comp *)(fbp->fb_addr + off); 1416 switch (pc->pc_type) { 1417 case 1 : 1418 (void) strcpy(uname, ip->i_udf->udf_fsmnt); 1419 (void) strcat(uname, "/"); 1420 break; 1421 case 2 : 1422 if (pc->pc_len != 0) { 1423 goto end; 1424 } 1425 uname[0] = '/'; 1426 uname[1] = '\0'; 1427 break; 1428 case 3 : 1429 (void) strcat(uname, "../"); 1430 break; 1431 case 4 : 1432 (void) strcat(uname, "./"); 1433 break; 1434 case 5 : 1435 if ((error = ud_uncompress(pc->pc_len, &id_len, 1436 pc->pc_id, (uint8_t *)dname)) != 0) { 1437 break; 1438 } 1439 dname[id_len] = '\0'; 1440 (void) strcat(uname, dname); 1441 (void) strcat(uname, "/"); 1442 break; 1443 default : 1444 error = EINVAL; 1445 goto end; 1446 } 1447 off += 4 + pc->pc_len; 1448 } 1449 len = strlen(uname) - 1; 1450 if (uname[len] == '/') { 1451 if (len == 0) { 1452 /* 1453 * special case link to / 1454 */ 1455 len = 1; 1456 } else { 1457 uname[len] = '\0'; 1458 } 1459 } 1460 1461 error = uiomove(uname, len, UIO_READ, uiop); 1462 1463 ITIMES(ip); 1464 1465 end: 1466 if (fbp != NULL) { 1467 fbrelse(fbp, S_OTHER); 1468 } 1469 rw_exit(&ip->i_contents); 1470 if (uname != NULL) { 1471 kmem_free(uname, PAGESIZE); 1472 } 1473 if (dname != NULL) { 1474 kmem_free(dname, 1024); 1475 } 1476 return (error); 1477 } 1478 1479 /* ARGSUSED */ 1480 static int32_t 1481 udf_fsync( 1482 struct vnode *vp, 1483 int32_t syncflag, 1484 struct cred *cr, 1485 caller_context_t *ct) 1486 { 1487 int32_t error = 0; 1488 struct ud_inode *ip = VTOI(vp); 1489 1490 ud_printf("udf_fsync\n"); 1491 1492 rw_enter(&ip->i_contents, RW_WRITER); 1493 if (!(IS_SWAPVP(vp))) { 1494 error = ud_syncip(ip, 0, I_SYNC); /* Do synchronous writes */ 1495 } 1496 if (error == 0) { 1497 error = ud_sync_indir(ip); 1498 } 1499 ITIMES(ip); /* XXX: is this necessary ??? */ 1500 rw_exit(&ip->i_contents); 1501 1502 return (error); 1503 } 1504 1505 /* ARGSUSED */ 1506 static void 1507 udf_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct) 1508 { 1509 ud_printf("udf_iinactive\n"); 1510 1511 ud_iinactive(VTOI(vp), cr); 1512 } 1513 1514 /* ARGSUSED */ 1515 static int32_t 1516 udf_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 1517 { 1518 struct udf_fid *udfidp; 1519 struct ud_inode *ip = VTOI(vp); 1520 1521 ud_printf("udf_fid\n"); 1522 1523 if (fidp->fid_len < (sizeof (struct udf_fid) - sizeof (uint16_t))) { 1524 fidp->fid_len = sizeof (struct udf_fid) - sizeof (uint16_t); 1525 return (ENOSPC); 1526 } 1527 1528 udfidp = (struct udf_fid *)fidp; 1529 bzero((char *)udfidp, sizeof (struct udf_fid)); 1530 rw_enter(&ip->i_contents, RW_READER); 1531 udfidp->udfid_len = sizeof (struct udf_fid) - sizeof (uint16_t); 1532 udfidp->udfid_uinq_lo = ip->i_uniqid & 0xffffffff; 1533 udfidp->udfid_prn = ip->i_icb_prn; 1534 udfidp->udfid_icb_lbn = ip->i_icb_block; 1535 rw_exit(&ip->i_contents); 1536 1537 return (0); 1538 } 1539 1540 /* ARGSUSED2 */ 1541 static int 1542 udf_rwlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp) 1543 { 1544 struct ud_inode *ip = VTOI(vp); 1545 1546 ud_printf("udf_rwlock\n"); 1547 1548 if (write_lock) { 1549 rw_enter(&ip->i_rwlock, RW_WRITER); 1550 } else { 1551 rw_enter(&ip->i_rwlock, RW_READER); 1552 } 1553 #ifdef __lock_lint 1554 rw_exit(&ip->i_rwlock); 1555 #endif 1556 return (write_lock); 1557 } 1558 1559 /* ARGSUSED */ 1560 static void 1561 udf_rwunlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp) 1562 { 1563 struct ud_inode *ip = VTOI(vp); 1564 1565 ud_printf("udf_rwunlock\n"); 1566 1567 #ifdef __lock_lint 1568 rw_enter(&ip->i_rwlock, RW_WRITER); 1569 #endif 1570 1571 rw_exit(&ip->i_rwlock); 1572 1573 } 1574 1575 /* ARGSUSED */ 1576 static int32_t 1577 udf_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct) 1578 { 1579 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 1580 } 1581 1582 static int32_t 1583 udf_frlock( 1584 struct vnode *vp, 1585 int32_t cmd, 1586 struct flock64 *bfp, 1587 int32_t flag, 1588 offset_t offset, 1589 struct flk_callback *flk_cbp, 1590 cred_t *cr, 1591 caller_context_t *ct) 1592 { 1593 struct ud_inode *ip = VTOI(vp); 1594 1595 ud_printf("udf_frlock\n"); 1596 1597 /* 1598 * If file is being mapped, disallow frlock. 1599 * XXX I am not holding tlock while checking i_mapcnt because the 1600 * current locking strategy drops all locks before calling fs_frlock. 1601 * So, mapcnt could change before we enter fs_frlock making is 1602 * meaningless to have held tlock in the first place. 1603 */ 1604 if ((ip->i_mapcnt > 0) && 1605 (MANDLOCK(vp, ip->i_char))) { 1606 return (EAGAIN); 1607 } 1608 1609 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 1610 } 1611 1612 /*ARGSUSED6*/ 1613 static int32_t 1614 udf_space( 1615 struct vnode *vp, 1616 int32_t cmd, 1617 struct flock64 *bfp, 1618 int32_t flag, 1619 offset_t offset, 1620 cred_t *cr, 1621 caller_context_t *ct) 1622 { 1623 int32_t error = 0; 1624 1625 ud_printf("udf_space\n"); 1626 1627 if (cmd != F_FREESP) { 1628 error = EINVAL; 1629 } else if ((error = convoff(vp, bfp, 0, offset)) == 0) { 1630 error = ud_freesp(vp, bfp, flag, cr); 1631 1632 if (error == 0 && bfp->l_start == 0) 1633 vnevent_truncate(vp, ct); 1634 } 1635 1636 return (error); 1637 } 1638 1639 /* ARGSUSED */ 1640 static int32_t 1641 udf_getpage( 1642 struct vnode *vp, 1643 offset_t off, 1644 size_t len, 1645 uint32_t *protp, 1646 struct page **plarr, 1647 size_t plsz, 1648 struct seg *seg, 1649 caddr_t addr, 1650 enum seg_rw rw, 1651 struct cred *cr, 1652 caller_context_t *ct) 1653 { 1654 struct ud_inode *ip = VTOI(vp); 1655 int32_t error, has_holes, beyond_eof, seqmode, dolock; 1656 int32_t pgsize = PAGESIZE; 1657 struct udf_vfs *udf_vfsp = ip->i_udf; 1658 page_t **pl; 1659 u_offset_t pgoff, eoff, uoff; 1660 krw_t rwtype; 1661 caddr_t pgaddr; 1662 1663 ud_printf("udf_getpage\n"); 1664 1665 uoff = (u_offset_t)off; /* type conversion */ 1666 if (protp) { 1667 *protp = PROT_ALL; 1668 } 1669 if (vp->v_flag & VNOMAP) { 1670 return (ENOSYS); 1671 } 1672 seqmode = ip->i_nextr == uoff && rw != S_CREATE; 1673 1674 rwtype = RW_READER; 1675 dolock = (rw_owner(&ip->i_contents) != curthread); 1676 retrylock: 1677 #ifdef __lock_lint 1678 rw_enter(&ip->i_contents, rwtype); 1679 #else 1680 if (dolock) { 1681 rw_enter(&ip->i_contents, rwtype); 1682 } 1683 #endif 1684 1685 /* 1686 * We may be getting called as a side effect of a bmap using 1687 * fbread() when the blocks might be being allocated and the 1688 * size has not yet been up'ed. In this case we want to be 1689 * able to return zero pages if we get back UDF_HOLE from 1690 * calling bmap for a non write case here. We also might have 1691 * to read some frags from the disk into a page if we are 1692 * extending the number of frags for a given lbn in bmap(). 1693 */ 1694 beyond_eof = uoff + len > ip->i_size + PAGEOFFSET; 1695 if (beyond_eof && seg != segkmap) { 1696 #ifdef __lock_lint 1697 rw_exit(&ip->i_contents); 1698 #else 1699 if (dolock) { 1700 rw_exit(&ip->i_contents); 1701 } 1702 #endif 1703 return (EFAULT); 1704 } 1705 1706 /* 1707 * Must hold i_contents lock throughout the call to pvn_getpages 1708 * since locked pages are returned from each call to ud_getapage. 1709 * Must *not* return locked pages and then try for contents lock 1710 * due to lock ordering requirements (inode > page) 1711 */ 1712 1713 has_holes = ud_bmap_has_holes(ip); 1714 1715 if ((rw == S_WRITE || rw == S_CREATE) && (has_holes || beyond_eof)) { 1716 int32_t blk_size, count; 1717 u_offset_t offset; 1718 1719 /* 1720 * We must acquire the RW_WRITER lock in order to 1721 * call bmap_write(). 1722 */ 1723 if (dolock && rwtype == RW_READER) { 1724 rwtype = RW_WRITER; 1725 1726 if (!rw_tryupgrade(&ip->i_contents)) { 1727 1728 rw_exit(&ip->i_contents); 1729 1730 goto retrylock; 1731 } 1732 } 1733 1734 /* 1735 * May be allocating disk blocks for holes here as 1736 * a result of mmap faults. write(2) does the bmap_write 1737 * in rdip/wrip, not here. We are not dealing with frags 1738 * in this case. 1739 */ 1740 offset = uoff; 1741 while ((offset < uoff + len) && 1742 (offset < ip->i_size)) { 1743 /* 1744 * the variable "bnp" is to simplify the expression for 1745 * the compiler; * just passing in &bn to bmap_write 1746 * causes a compiler "loop" 1747 */ 1748 1749 blk_size = udf_vfsp->udf_lbsize; 1750 if ((offset + blk_size) > ip->i_size) { 1751 count = ip->i_size - offset; 1752 } else { 1753 count = blk_size; 1754 } 1755 error = ud_bmap_write(ip, offset, count, 0, cr); 1756 if (error) { 1757 goto update_inode; 1758 } 1759 offset += count; /* XXX - make this contig */ 1760 } 1761 } 1762 1763 /* 1764 * Can be a reader from now on. 1765 */ 1766 #ifdef __lock_lint 1767 if (rwtype == RW_WRITER) { 1768 rw_downgrade(&ip->i_contents); 1769 } 1770 #else 1771 if (dolock && rwtype == RW_WRITER) { 1772 rw_downgrade(&ip->i_contents); 1773 } 1774 #endif 1775 1776 /* 1777 * We remove PROT_WRITE in cases when the file has UDF holes 1778 * because we don't want to call bmap_read() to check each 1779 * page if it is backed with a disk block. 1780 */ 1781 if (protp && has_holes && rw != S_WRITE && rw != S_CREATE) { 1782 *protp &= ~PROT_WRITE; 1783 } 1784 1785 error = 0; 1786 1787 /* 1788 * The loop looks up pages in the range <off, off + len). 1789 * For each page, we first check if we should initiate an asynchronous 1790 * read ahead before we call page_lookup (we may sleep in page_lookup 1791 * for a previously initiated disk read). 1792 */ 1793 eoff = (uoff + len); 1794 for (pgoff = uoff, pgaddr = addr, pl = plarr; 1795 pgoff < eoff; /* empty */) { 1796 page_t *pp; 1797 u_offset_t nextrio; 1798 se_t se; 1799 1800 se = ((rw == S_CREATE) ? SE_EXCL : SE_SHARED); 1801 1802 /* 1803 * Handle async getpage (faultahead) 1804 */ 1805 if (plarr == NULL) { 1806 ip->i_nextrio = pgoff; 1807 ud_getpage_ra(vp, pgoff, seg, pgaddr); 1808 pgoff += pgsize; 1809 pgaddr += pgsize; 1810 continue; 1811 } 1812 1813 /* 1814 * Check if we should initiate read ahead of next cluster. 1815 * We call page_exists only when we need to confirm that 1816 * we have the current page before we initiate the read ahead. 1817 */ 1818 nextrio = ip->i_nextrio; 1819 if (seqmode && 1820 pgoff + RD_CLUSTSZ(ip) >= nextrio && pgoff <= nextrio && 1821 nextrio < ip->i_size && page_exists(vp, pgoff)) 1822 ud_getpage_ra(vp, pgoff, seg, pgaddr); 1823 1824 if ((pp = page_lookup(vp, pgoff, se)) != NULL) { 1825 1826 /* 1827 * We found the page in the page cache. 1828 */ 1829 *pl++ = pp; 1830 pgoff += pgsize; 1831 pgaddr += pgsize; 1832 len -= pgsize; 1833 plsz -= pgsize; 1834 } else { 1835 1836 /* 1837 * We have to create the page, or read it from disk. 1838 */ 1839 if (error = ud_getpage_miss(vp, pgoff, len, 1840 seg, pgaddr, pl, plsz, rw, seqmode)) { 1841 goto error_out; 1842 } 1843 1844 while (*pl != NULL) { 1845 pl++; 1846 pgoff += pgsize; 1847 pgaddr += pgsize; 1848 len -= pgsize; 1849 plsz -= pgsize; 1850 } 1851 } 1852 } 1853 1854 /* 1855 * Return pages up to plsz if they are in the page cache. 1856 * We cannot return pages if there is a chance that they are 1857 * backed with a UDF hole and rw is S_WRITE or S_CREATE. 1858 */ 1859 if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) { 1860 1861 ASSERT((protp == NULL) || 1862 !(has_holes && (*protp & PROT_WRITE))); 1863 1864 eoff = pgoff + plsz; 1865 while (pgoff < eoff) { 1866 page_t *pp; 1867 1868 if ((pp = page_lookup_nowait(vp, pgoff, 1869 SE_SHARED)) == NULL) 1870 break; 1871 1872 *pl++ = pp; 1873 pgoff += pgsize; 1874 plsz -= pgsize; 1875 } 1876 } 1877 1878 if (plarr) 1879 *pl = NULL; /* Terminate page list */ 1880 ip->i_nextr = pgoff; 1881 1882 error_out: 1883 if (error && plarr) { 1884 /* 1885 * Release any pages we have locked. 1886 */ 1887 while (pl > &plarr[0]) 1888 page_unlock(*--pl); 1889 1890 plarr[0] = NULL; 1891 } 1892 1893 update_inode: 1894 #ifdef __lock_lint 1895 rw_exit(&ip->i_contents); 1896 #else 1897 if (dolock) { 1898 rw_exit(&ip->i_contents); 1899 } 1900 #endif 1901 1902 /* 1903 * If the inode is not already marked for IACC (in rwip() for read) 1904 * and the inode is not marked for no access time update (in rwip() 1905 * for write) then update the inode access time and mod time now. 1906 */ 1907 mutex_enter(&ip->i_tlock); 1908 if ((ip->i_flag & (IACC | INOACC)) == 0) { 1909 if ((rw != S_OTHER) && (ip->i_type != VDIR)) { 1910 ip->i_flag |= IACC; 1911 } 1912 if (rw == S_WRITE) { 1913 ip->i_flag |= IUPD; 1914 } 1915 ITIMES_NOLOCK(ip); 1916 } 1917 mutex_exit(&ip->i_tlock); 1918 1919 return (error); 1920 } 1921 1922 int32_t ud_delay = 1; 1923 1924 /* ARGSUSED */ 1925 static int32_t 1926 udf_putpage( 1927 struct vnode *vp, 1928 offset_t off, 1929 size_t len, 1930 int32_t flags, 1931 struct cred *cr, 1932 caller_context_t *ct) 1933 { 1934 struct ud_inode *ip; 1935 int32_t error = 0; 1936 1937 ud_printf("udf_putpage\n"); 1938 1939 ip = VTOI(vp); 1940 #ifdef __lock_lint 1941 rw_enter(&ip->i_contents, RW_WRITER); 1942 #endif 1943 1944 if (vp->v_count == 0) { 1945 cmn_err(CE_WARN, "ud_putpage : bad v_count"); 1946 error = EINVAL; 1947 goto out; 1948 } 1949 1950 if (vp->v_flag & VNOMAP) { 1951 error = ENOSYS; 1952 goto out; 1953 } 1954 1955 if (flags & B_ASYNC) { 1956 if (ud_delay && len && 1957 (flags & ~(B_ASYNC|B_DONTNEED|B_FREE)) == 0) { 1958 mutex_enter(&ip->i_tlock); 1959 1960 /* 1961 * If nobody stalled, start a new cluster. 1962 */ 1963 if (ip->i_delaylen == 0) { 1964 ip->i_delayoff = off; 1965 ip->i_delaylen = len; 1966 mutex_exit(&ip->i_tlock); 1967 goto out; 1968 } 1969 1970 /* 1971 * If we have a full cluster or they are not contig, 1972 * then push last cluster and start over. 1973 */ 1974 if (ip->i_delaylen >= WR_CLUSTSZ(ip) || 1975 ip->i_delayoff + ip->i_delaylen != off) { 1976 u_offset_t doff; 1977 size_t dlen; 1978 1979 doff = ip->i_delayoff; 1980 dlen = ip->i_delaylen; 1981 ip->i_delayoff = off; 1982 ip->i_delaylen = len; 1983 mutex_exit(&ip->i_tlock); 1984 error = ud_putpages(vp, doff, dlen, flags, cr); 1985 /* LMXXX - flags are new val, not old */ 1986 goto out; 1987 } 1988 1989 /* 1990 * There is something there, it's not full, and 1991 * it is contig. 1992 */ 1993 ip->i_delaylen += len; 1994 mutex_exit(&ip->i_tlock); 1995 goto out; 1996 } 1997 1998 /* 1999 * Must have weird flags or we are not clustering. 2000 */ 2001 } 2002 2003 error = ud_putpages(vp, off, len, flags, cr); 2004 2005 out: 2006 #ifdef __lock_lint 2007 rw_exit(&ip->i_contents); 2008 #endif 2009 return (error); 2010 } 2011 2012 /* ARGSUSED */ 2013 static int32_t 2014 udf_map( 2015 struct vnode *vp, 2016 offset_t off, 2017 struct as *as, 2018 caddr_t *addrp, 2019 size_t len, 2020 uint8_t prot, 2021 uint8_t maxprot, 2022 uint32_t flags, 2023 struct cred *cr, 2024 caller_context_t *ct) 2025 { 2026 struct segvn_crargs vn_a; 2027 int32_t error = 0; 2028 2029 ud_printf("udf_map\n"); 2030 2031 if (vp->v_flag & VNOMAP) { 2032 error = ENOSYS; 2033 goto end; 2034 } 2035 2036 if ((off < (offset_t)0) || 2037 ((off + len) < (offset_t)0)) { 2038 error = EINVAL; 2039 goto end; 2040 } 2041 2042 if (vp->v_type != VREG) { 2043 error = ENODEV; 2044 goto end; 2045 } 2046 2047 /* 2048 * If file is being locked, disallow mapping. 2049 */ 2050 if (vn_has_mandatory_locks(vp, VTOI(vp)->i_char)) { 2051 error = EAGAIN; 2052 goto end; 2053 } 2054 2055 as_rangelock(as); 2056 error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 2057 if (error != 0) { 2058 as_rangeunlock(as); 2059 goto end; 2060 } 2061 2062 vn_a.vp = vp; 2063 vn_a.offset = off; 2064 vn_a.type = flags & MAP_TYPE; 2065 vn_a.prot = prot; 2066 vn_a.maxprot = maxprot; 2067 vn_a.cred = cr; 2068 vn_a.amp = NULL; 2069 vn_a.flags = flags & ~MAP_TYPE; 2070 vn_a.szc = 0; 2071 vn_a.lgrp_mem_policy_flags = 0; 2072 2073 error = as_map(as, *addrp, len, segvn_create, (caddr_t)&vn_a); 2074 as_rangeunlock(as); 2075 2076 end: 2077 return (error); 2078 } 2079 2080 /* ARGSUSED */ 2081 static int32_t 2082 udf_addmap(struct vnode *vp, 2083 offset_t off, 2084 struct as *as, 2085 caddr_t addr, 2086 size_t len, 2087 uint8_t prot, 2088 uint8_t maxprot, 2089 uint32_t flags, 2090 struct cred *cr, 2091 caller_context_t *ct) 2092 { 2093 struct ud_inode *ip = VTOI(vp); 2094 2095 ud_printf("udf_addmap\n"); 2096 2097 if (vp->v_flag & VNOMAP) { 2098 return (ENOSYS); 2099 } 2100 2101 mutex_enter(&ip->i_tlock); 2102 ip->i_mapcnt += btopr(len); 2103 mutex_exit(&ip->i_tlock); 2104 2105 return (0); 2106 } 2107 2108 /* ARGSUSED */ 2109 static int32_t 2110 udf_delmap( 2111 struct vnode *vp, offset_t off, 2112 struct as *as, 2113 caddr_t addr, 2114 size_t len, 2115 uint32_t prot, 2116 uint32_t maxprot, 2117 uint32_t flags, 2118 struct cred *cr, 2119 caller_context_t *ct) 2120 { 2121 struct ud_inode *ip = VTOI(vp); 2122 2123 ud_printf("udf_delmap\n"); 2124 2125 if (vp->v_flag & VNOMAP) { 2126 return (ENOSYS); 2127 } 2128 2129 mutex_enter(&ip->i_tlock); 2130 ip->i_mapcnt -= btopr(len); /* Count released mappings */ 2131 ASSERT(ip->i_mapcnt >= 0); 2132 mutex_exit(&ip->i_tlock); 2133 2134 return (0); 2135 } 2136 2137 /* ARGSUSED */ 2138 static int32_t 2139 udf_l_pathconf( 2140 struct vnode *vp, 2141 int32_t cmd, 2142 ulong_t *valp, 2143 struct cred *cr, 2144 caller_context_t *ct) 2145 { 2146 int32_t error = 0; 2147 2148 ud_printf("udf_l_pathconf\n"); 2149 2150 if (cmd == _PC_FILESIZEBITS) { 2151 /* 2152 * udf supports 64 bits as file size 2153 * but there are several other restrictions 2154 * it only supports 32-bit block numbers and 2155 * daddr32_t is only and int32_t so taking these 2156 * into account we can stay just as where ufs is 2157 */ 2158 *valp = 41; 2159 } else if (cmd == _PC_TIMESTAMP_RESOLUTION) { 2160 /* nanosecond timestamp resolution */ 2161 *valp = 1L; 2162 } else { 2163 error = fs_pathconf(vp, cmd, valp, cr, ct); 2164 } 2165 2166 return (error); 2167 } 2168 2169 uint32_t ud_pageio_reads = 0, ud_pageio_writes = 0; 2170 #ifndef __lint 2171 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_reads)) 2172 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_writes)) 2173 #endif 2174 /* 2175 * Assumption is that there will not be a pageio request 2176 * to a enbedded file 2177 */ 2178 /* ARGSUSED */ 2179 static int32_t 2180 udf_pageio( 2181 struct vnode *vp, 2182 struct page *pp, 2183 u_offset_t io_off, 2184 size_t io_len, 2185 int32_t flags, 2186 struct cred *cr, 2187 caller_context_t *ct) 2188 { 2189 daddr_t bn; 2190 struct buf *bp; 2191 struct ud_inode *ip = VTOI(vp); 2192 int32_t dolock, error = 0, contig, multi_io; 2193 size_t done_len = 0, cur_len = 0; 2194 page_t *npp = NULL, *opp = NULL, *cpp = pp; 2195 2196 if (pp == NULL) { 2197 return (EINVAL); 2198 } 2199 2200 dolock = (rw_owner(&ip->i_contents) != curthread); 2201 2202 /* 2203 * We need a better check. Ideally, we would use another 2204 * vnodeops so that hlocked and forcibly unmounted file 2205 * systems would return EIO where appropriate and w/o the 2206 * need for these checks. 2207 */ 2208 if (ip->i_udf == NULL) { 2209 return (EIO); 2210 } 2211 2212 #ifdef __lock_lint 2213 rw_enter(&ip->i_contents, RW_READER); 2214 #else 2215 if (dolock) { 2216 rw_enter(&ip->i_contents, RW_READER); 2217 } 2218 #endif 2219 2220 /* 2221 * Break the io request into chunks, one for each contiguous 2222 * stretch of disk blocks in the target file. 2223 */ 2224 while (done_len < io_len) { 2225 ASSERT(cpp); 2226 bp = NULL; 2227 contig = 0; 2228 if (error = ud_bmap_read(ip, (u_offset_t)(io_off + done_len), 2229 &bn, &contig)) { 2230 break; 2231 } 2232 2233 if (bn == UDF_HOLE) { /* No holey swapfiles */ 2234 cmn_err(CE_WARN, "SWAP file has HOLES"); 2235 error = EINVAL; 2236 break; 2237 } 2238 2239 cur_len = MIN(io_len - done_len, contig); 2240 2241 /* 2242 * Check if more than one I/O is 2243 * required to complete the given 2244 * I/O operation 2245 */ 2246 if (ip->i_udf->udf_lbsize < PAGESIZE) { 2247 if (cur_len >= PAGESIZE) { 2248 multi_io = 0; 2249 cur_len &= PAGEMASK; 2250 } else { 2251 multi_io = 1; 2252 cur_len = MIN(io_len - done_len, PAGESIZE); 2253 } 2254 } 2255 page_list_break(&cpp, &npp, btop(cur_len)); 2256 2257 bp = pageio_setup(cpp, cur_len, ip->i_devvp, flags); 2258 ASSERT(bp != NULL); 2259 2260 bp->b_edev = ip->i_dev; 2261 bp->b_dev = cmpdev(ip->i_dev); 2262 bp->b_blkno = bn; 2263 bp->b_un.b_addr = (caddr_t)0; 2264 bp->b_file = vp; 2265 bp->b_offset = (offset_t)(io_off + done_len); 2266 2267 /* 2268 * ub.ub_pageios.value.ul++; 2269 */ 2270 if (multi_io == 0) { 2271 (void) bdev_strategy(bp); 2272 } else { 2273 error = ud_multi_strat(ip, cpp, bp, 2274 (u_offset_t)(io_off + done_len)); 2275 if (error != 0) { 2276 pageio_done(bp); 2277 break; 2278 } 2279 } 2280 if (flags & B_READ) { 2281 ud_pageio_reads++; 2282 } else { 2283 ud_pageio_writes++; 2284 } 2285 2286 /* 2287 * If the request is not B_ASYNC, wait for i/o to complete 2288 * and re-assemble the page list to return to the caller. 2289 * If it is B_ASYNC we leave the page list in pieces and 2290 * cleanup() will dispose of them. 2291 */ 2292 if ((flags & B_ASYNC) == 0) { 2293 error = biowait(bp); 2294 pageio_done(bp); 2295 if (error) { 2296 break; 2297 } 2298 page_list_concat(&opp, &cpp); 2299 } 2300 cpp = npp; 2301 npp = NULL; 2302 done_len += cur_len; 2303 } 2304 2305 ASSERT(error || (cpp == NULL && npp == NULL && done_len == io_len)); 2306 if (error) { 2307 if (flags & B_ASYNC) { 2308 /* Cleanup unprocessed parts of list */ 2309 page_list_concat(&cpp, &npp); 2310 if (flags & B_READ) { 2311 pvn_read_done(cpp, B_ERROR); 2312 } else { 2313 pvn_write_done(cpp, B_ERROR); 2314 } 2315 } else { 2316 /* Re-assemble list and let caller clean up */ 2317 page_list_concat(&opp, &cpp); 2318 page_list_concat(&opp, &npp); 2319 } 2320 } 2321 2322 #ifdef __lock_lint 2323 rw_exit(&ip->i_contents); 2324 #else 2325 if (dolock) { 2326 rw_exit(&ip->i_contents); 2327 } 2328 #endif 2329 return (error); 2330 } 2331 2332 2333 2334 2335 /* -------------------- local functions --------------------------- */ 2336 2337 2338 2339 int32_t 2340 ud_rdwri(enum uio_rw rw, int32_t ioflag, 2341 struct ud_inode *ip, caddr_t base, int32_t len, 2342 offset_t offset, enum uio_seg seg, int32_t *aresid, struct cred *cr) 2343 { 2344 int32_t error; 2345 struct uio auio; 2346 struct iovec aiov; 2347 2348 ud_printf("ud_rdwri\n"); 2349 2350 bzero((caddr_t)&auio, sizeof (uio_t)); 2351 bzero((caddr_t)&aiov, sizeof (iovec_t)); 2352 2353 aiov.iov_base = base; 2354 aiov.iov_len = len; 2355 auio.uio_iov = &aiov; 2356 auio.uio_iovcnt = 1; 2357 auio.uio_loffset = offset; 2358 auio.uio_segflg = (int16_t)seg; 2359 auio.uio_resid = len; 2360 2361 if (rw == UIO_WRITE) { 2362 auio.uio_fmode = FWRITE; 2363 auio.uio_extflg = UIO_COPY_DEFAULT; 2364 auio.uio_llimit = curproc->p_fsz_ctl; 2365 error = ud_wrip(ip, &auio, ioflag, cr); 2366 } else { 2367 auio.uio_fmode = FREAD; 2368 auio.uio_extflg = UIO_COPY_CACHED; 2369 auio.uio_llimit = MAXOFFSET_T; 2370 error = ud_rdip(ip, &auio, ioflag, cr); 2371 } 2372 2373 if (aresid) { 2374 *aresid = auio.uio_resid; 2375 } else if (auio.uio_resid) { 2376 error = EIO; 2377 } 2378 return (error); 2379 } 2380 2381 /* 2382 * Free behind hacks. The pager is busted. 2383 * XXX - need to pass the information down to writedone() in a flag like B_SEQ 2384 * or B_FREE_IF_TIGHT_ON_MEMORY. 2385 */ 2386 int32_t ud_freebehind = 1; 2387 int32_t ud_smallfile = 32 * 1024; 2388 2389 /* ARGSUSED */ 2390 int32_t 2391 ud_getpage_miss(struct vnode *vp, u_offset_t off, 2392 size_t len, struct seg *seg, caddr_t addr, page_t *pl[], 2393 size_t plsz, enum seg_rw rw, int32_t seq) 2394 { 2395 struct ud_inode *ip = VTOI(vp); 2396 int32_t err = 0; 2397 size_t io_len; 2398 u_offset_t io_off; 2399 u_offset_t pgoff; 2400 page_t *pp; 2401 2402 pl[0] = NULL; 2403 2404 /* 2405 * Figure out whether the page can be created, or must be 2406 * read from the disk 2407 */ 2408 if (rw == S_CREATE) { 2409 if ((pp = page_create_va(vp, off, 2410 PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 2411 cmn_err(CE_WARN, "ud_getpage_miss: page_create"); 2412 return (EINVAL); 2413 } 2414 io_len = PAGESIZE; 2415 } else { 2416 pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 2417 &io_len, off, PAGESIZE, 0); 2418 2419 /* 2420 * Some other thread has entered the page. 2421 * ud_getpage will retry page_lookup. 2422 */ 2423 if (pp == NULL) { 2424 return (0); 2425 } 2426 2427 /* 2428 * Fill the page with as much data as we can from the file. 2429 */ 2430 err = ud_page_fill(ip, pp, off, B_READ, &pgoff); 2431 if (err) { 2432 pvn_read_done(pp, B_ERROR); 2433 return (err); 2434 } 2435 2436 /* 2437 * XXX ??? ufs has io_len instead of pgoff below 2438 */ 2439 ip->i_nextrio = off + ((pgoff + PAGESIZE - 1) & PAGEMASK); 2440 2441 /* 2442 * If the file access is sequential, initiate read ahead 2443 * of the next cluster. 2444 */ 2445 if (seq && ip->i_nextrio < ip->i_size) { 2446 ud_getpage_ra(vp, off, seg, addr); 2447 } 2448 } 2449 2450 outmiss: 2451 pvn_plist_init(pp, pl, plsz, (offset_t)off, io_len, rw); 2452 return (err); 2453 } 2454 2455 /* ARGSUSED */ 2456 void 2457 ud_getpage_ra(struct vnode *vp, 2458 u_offset_t off, struct seg *seg, caddr_t addr) 2459 { 2460 page_t *pp; 2461 size_t io_len; 2462 struct ud_inode *ip = VTOI(vp); 2463 u_offset_t io_off = ip->i_nextrio, pgoff; 2464 caddr_t addr2 = addr + (io_off - off); 2465 daddr_t bn; 2466 int32_t contig = 0; 2467 2468 /* 2469 * Is this test needed? 2470 */ 2471 2472 if (addr2 >= seg->s_base + seg->s_size) { 2473 return; 2474 } 2475 2476 contig = 0; 2477 if (ud_bmap_read(ip, io_off, &bn, &contig) != 0 || bn == UDF_HOLE) { 2478 return; 2479 } 2480 2481 pp = pvn_read_kluster(vp, io_off, seg, addr2, 2482 &io_off, &io_len, io_off, PAGESIZE, 1); 2483 2484 /* 2485 * Some other thread has entered the page. 2486 * So no read head done here (ie we will have to and wait 2487 * for the read when needed). 2488 */ 2489 2490 if (pp == NULL) { 2491 return; 2492 } 2493 2494 (void) ud_page_fill(ip, pp, io_off, (B_READ|B_ASYNC), &pgoff); 2495 ip->i_nextrio = io_off + ((pgoff + PAGESIZE - 1) & PAGEMASK); 2496 } 2497 2498 int 2499 ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off, 2500 uint32_t bflgs, u_offset_t *pg_off) 2501 { 2502 daddr_t bn; 2503 struct buf *bp; 2504 caddr_t kaddr, caddr; 2505 int32_t error = 0, contig = 0, multi_io = 0; 2506 int32_t lbsize = ip->i_udf->udf_lbsize; 2507 int32_t lbmask = ip->i_udf->udf_lbmask; 2508 uint64_t isize; 2509 2510 isize = (ip->i_size + lbmask) & (~lbmask); 2511 if (ip->i_desc_type == ICB_FLAG_ONE_AD) { 2512 2513 /* 2514 * Embedded file read file_entry 2515 * from buffer cache and copy the required 2516 * portions 2517 */ 2518 bp = ud_bread(ip->i_dev, 2519 ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize); 2520 if ((bp->b_error == 0) && 2521 (bp->b_resid == 0)) { 2522 2523 caddr = bp->b_un.b_addr + ip->i_data_off; 2524 2525 /* 2526 * mapin to kvm 2527 */ 2528 kaddr = (caddr_t)ppmapin(pp, 2529 PROT_READ | PROT_WRITE, (caddr_t)-1); 2530 (void) kcopy(caddr, kaddr, ip->i_size); 2531 2532 /* 2533 * mapout of kvm 2534 */ 2535 ppmapout(kaddr); 2536 } 2537 brelse(bp); 2538 contig = ip->i_size; 2539 } else { 2540 2541 /* 2542 * Get the continuous size and block number 2543 * at offset "off" 2544 */ 2545 if (error = ud_bmap_read(ip, off, &bn, &contig)) 2546 goto out; 2547 contig = MIN(contig, PAGESIZE); 2548 contig = (contig + lbmask) & (~lbmask); 2549 2550 /* 2551 * Zero part of the page which we are not 2552 * going to read from the disk. 2553 */ 2554 2555 if (bn == UDF_HOLE) { 2556 2557 /* 2558 * This is a HOLE. Just zero out 2559 * the page 2560 */ 2561 if (((off + contig) == isize) || 2562 (contig == PAGESIZE)) { 2563 pagezero(pp->p_prev, 0, PAGESIZE); 2564 goto out; 2565 } 2566 } 2567 2568 if (contig < PAGESIZE) { 2569 uint64_t count; 2570 2571 count = isize - off; 2572 if (contig != count) { 2573 multi_io = 1; 2574 contig = (int32_t)(MIN(count, PAGESIZE)); 2575 } else { 2576 pagezero(pp->p_prev, contig, PAGESIZE - contig); 2577 } 2578 } 2579 2580 /* 2581 * Get a bp and initialize it 2582 */ 2583 bp = pageio_setup(pp, contig, ip->i_devvp, bflgs); 2584 ASSERT(bp != NULL); 2585 2586 bp->b_edev = ip->i_dev; 2587 bp->b_dev = cmpdev(ip->i_dev); 2588 bp->b_blkno = bn; 2589 bp->b_un.b_addr = 0; 2590 bp->b_file = ip->i_vnode; 2591 2592 /* 2593 * Start I/O 2594 */ 2595 if (multi_io == 0) { 2596 2597 /* 2598 * Single I/O is sufficient for this page 2599 */ 2600 (void) bdev_strategy(bp); 2601 } else { 2602 2603 /* 2604 * We need to do the I/O in 2605 * piece's 2606 */ 2607 error = ud_multi_strat(ip, pp, bp, off); 2608 if (error != 0) { 2609 goto out; 2610 } 2611 } 2612 if ((bflgs & B_ASYNC) == 0) { 2613 2614 /* 2615 * Wait for i/o to complete. 2616 */ 2617 2618 error = biowait(bp); 2619 pageio_done(bp); 2620 if (error) { 2621 goto out; 2622 } 2623 } 2624 } 2625 if ((off + contig) >= ip->i_size) { 2626 contig = ip->i_size - off; 2627 } 2628 2629 out: 2630 *pg_off = contig; 2631 return (error); 2632 } 2633 2634 int32_t 2635 ud_putpages(struct vnode *vp, offset_t off, 2636 size_t len, int32_t flags, struct cred *cr) 2637 { 2638 struct ud_inode *ip; 2639 page_t *pp; 2640 u_offset_t io_off; 2641 size_t io_len; 2642 u_offset_t eoff; 2643 int32_t err = 0; 2644 int32_t dolock; 2645 2646 ud_printf("ud_putpages\n"); 2647 2648 if (vp->v_count == 0) { 2649 cmn_err(CE_WARN, "ud_putpages: bad v_count"); 2650 return (EINVAL); 2651 } 2652 2653 ip = VTOI(vp); 2654 2655 /* 2656 * Acquire the readers/write inode lock before locking 2657 * any pages in this inode. 2658 * The inode lock is held during i/o. 2659 */ 2660 if (len == 0) { 2661 mutex_enter(&ip->i_tlock); 2662 ip->i_delayoff = ip->i_delaylen = 0; 2663 mutex_exit(&ip->i_tlock); 2664 } 2665 #ifdef __lock_lint 2666 rw_enter(&ip->i_contents, RW_READER); 2667 #else 2668 dolock = (rw_owner(&ip->i_contents) != curthread); 2669 if (dolock) { 2670 rw_enter(&ip->i_contents, RW_READER); 2671 } 2672 #endif 2673 2674 if (!vn_has_cached_data(vp)) { 2675 #ifdef __lock_lint 2676 rw_exit(&ip->i_contents); 2677 #else 2678 if (dolock) { 2679 rw_exit(&ip->i_contents); 2680 } 2681 #endif 2682 return (0); 2683 } 2684 2685 if (len == 0) { 2686 /* 2687 * Search the entire vp list for pages >= off. 2688 */ 2689 err = pvn_vplist_dirty(vp, (u_offset_t)off, ud_putapage, 2690 flags, cr); 2691 } else { 2692 /* 2693 * Loop over all offsets in the range looking for 2694 * pages to deal with. 2695 */ 2696 if ((eoff = blkroundup(ip->i_udf, ip->i_size)) != 0) { 2697 eoff = MIN(off + len, eoff); 2698 } else { 2699 eoff = off + len; 2700 } 2701 2702 for (io_off = off; io_off < eoff; io_off += io_len) { 2703 /* 2704 * If we are not invalidating, synchronously 2705 * freeing or writing pages, use the routine 2706 * page_lookup_nowait() to prevent reclaiming 2707 * them from the free list. 2708 */ 2709 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 2710 pp = page_lookup(vp, io_off, 2711 (flags & (B_INVAL | B_FREE)) ? 2712 SE_EXCL : SE_SHARED); 2713 } else { 2714 pp = page_lookup_nowait(vp, io_off, 2715 (flags & B_FREE) ? SE_EXCL : SE_SHARED); 2716 } 2717 2718 if (pp == NULL || pvn_getdirty(pp, flags) == 0) { 2719 io_len = PAGESIZE; 2720 } else { 2721 2722 err = ud_putapage(vp, pp, 2723 &io_off, &io_len, flags, cr); 2724 if (err != 0) { 2725 break; 2726 } 2727 /* 2728 * "io_off" and "io_len" are returned as 2729 * the range of pages we actually wrote. 2730 * This allows us to skip ahead more quickly 2731 * since several pages may've been dealt 2732 * with by this iteration of the loop. 2733 */ 2734 } 2735 } 2736 } 2737 if (err == 0 && off == 0 && (len == 0 || len >= ip->i_size)) { 2738 /* 2739 * We have just sync'ed back all the pages on 2740 * the inode, turn off the IMODTIME flag. 2741 */ 2742 mutex_enter(&ip->i_tlock); 2743 ip->i_flag &= ~IMODTIME; 2744 mutex_exit(&ip->i_tlock); 2745 } 2746 #ifdef __lock_lint 2747 rw_exit(&ip->i_contents); 2748 #else 2749 if (dolock) { 2750 rw_exit(&ip->i_contents); 2751 } 2752 #endif 2753 return (err); 2754 } 2755 2756 /* ARGSUSED */ 2757 int32_t 2758 ud_putapage(struct vnode *vp, 2759 page_t *pp, u_offset_t *offp, 2760 size_t *lenp, int32_t flags, struct cred *cr) 2761 { 2762 daddr_t bn; 2763 size_t io_len; 2764 struct ud_inode *ip; 2765 int32_t error = 0, contig, multi_io = 0; 2766 struct udf_vfs *udf_vfsp; 2767 u_offset_t off, io_off; 2768 caddr_t kaddr, caddr; 2769 struct buf *bp = NULL; 2770 int32_t lbmask; 2771 uint64_t isize; 2772 uint16_t crc_len; 2773 struct file_entry *fe; 2774 2775 ud_printf("ud_putapage\n"); 2776 2777 ip = VTOI(vp); 2778 ASSERT(ip); 2779 ASSERT(RW_LOCK_HELD(&ip->i_contents)); 2780 lbmask = ip->i_udf->udf_lbmask; 2781 isize = (ip->i_size + lbmask) & (~lbmask); 2782 2783 udf_vfsp = ip->i_udf; 2784 ASSERT(udf_vfsp->udf_flags & UDF_FL_RW); 2785 2786 /* 2787 * If the modified time on the inode has not already been 2788 * set elsewhere (e.g. for write/setattr) we set the time now. 2789 * This gives us approximate modified times for mmap'ed files 2790 * which are modified via stores in the user address space. 2791 */ 2792 if (((ip->i_flag & IMODTIME) == 0) || (flags & B_FORCE)) { 2793 mutex_enter(&ip->i_tlock); 2794 ip->i_flag |= IUPD; 2795 ITIMES_NOLOCK(ip); 2796 mutex_exit(&ip->i_tlock); 2797 } 2798 2799 2800 /* 2801 * Align the request to a block boundry (for old file systems), 2802 * and go ask bmap() how contiguous things are for this file. 2803 */ 2804 off = pp->p_offset & ~(offset_t)lbmask; 2805 /* block align it */ 2806 2807 2808 if (ip->i_desc_type == ICB_FLAG_ONE_AD) { 2809 ASSERT(ip->i_size <= ip->i_max_emb); 2810 2811 pp = pvn_write_kluster(vp, pp, &io_off, 2812 &io_len, off, PAGESIZE, flags); 2813 if (io_len == 0) { 2814 io_len = PAGESIZE; 2815 } 2816 2817 bp = ud_bread(ip->i_dev, 2818 ip->i_icb_lbano << udf_vfsp->udf_l2d_shift, 2819 udf_vfsp->udf_lbsize); 2820 fe = (struct file_entry *)bp->b_un.b_addr; 2821 if ((bp->b_flags & B_ERROR) || 2822 (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY, 2823 ip->i_icb_block, 2824 1, udf_vfsp->udf_lbsize) != 0)) { 2825 if (pp != NULL) 2826 pvn_write_done(pp, B_ERROR | B_WRITE | flags); 2827 if (bp->b_flags & B_ERROR) { 2828 error = EIO; 2829 } else { 2830 error = EINVAL; 2831 } 2832 brelse(bp); 2833 return (error); 2834 } 2835 if ((bp->b_error == 0) && 2836 (bp->b_resid == 0)) { 2837 2838 caddr = bp->b_un.b_addr + ip->i_data_off; 2839 kaddr = (caddr_t)ppmapin(pp, 2840 PROT_READ | PROT_WRITE, (caddr_t)-1); 2841 (void) kcopy(kaddr, caddr, ip->i_size); 2842 ppmapout(kaddr); 2843 } 2844 crc_len = offsetof(struct file_entry, fe_spec) + 2845 SWAP_32(fe->fe_len_ear); 2846 crc_len += ip->i_size; 2847 ud_make_tag(ip->i_udf, &fe->fe_tag, 2848 UD_FILE_ENTRY, ip->i_icb_block, crc_len); 2849 2850 bwrite(bp); 2851 2852 if (flags & B_ASYNC) { 2853 pvn_write_done(pp, flags); 2854 } 2855 contig = ip->i_size; 2856 } else { 2857 2858 if (error = ud_bmap_read(ip, off, &bn, &contig)) { 2859 goto out; 2860 } 2861 contig = MIN(contig, PAGESIZE); 2862 contig = (contig + lbmask) & (~lbmask); 2863 2864 if (contig < PAGESIZE) { 2865 uint64_t count; 2866 2867 count = isize - off; 2868 if (contig != count) { 2869 multi_io = 1; 2870 contig = (int32_t)(MIN(count, PAGESIZE)); 2871 } 2872 } 2873 2874 if ((off + contig) > isize) { 2875 contig = isize - off; 2876 } 2877 2878 if (contig > PAGESIZE) { 2879 if (contig & PAGEOFFSET) { 2880 contig &= PAGEMASK; 2881 } 2882 } 2883 2884 pp = pvn_write_kluster(vp, pp, &io_off, 2885 &io_len, off, contig, flags); 2886 if (io_len == 0) { 2887 io_len = PAGESIZE; 2888 } 2889 2890 bp = pageio_setup(pp, contig, ip->i_devvp, B_WRITE | flags); 2891 ASSERT(bp != NULL); 2892 2893 bp->b_edev = ip->i_dev; 2894 bp->b_dev = cmpdev(ip->i_dev); 2895 bp->b_blkno = bn; 2896 bp->b_un.b_addr = 0; 2897 bp->b_file = vp; 2898 bp->b_offset = (offset_t)off; 2899 2900 2901 /* 2902 * write throttle 2903 */ 2904 ASSERT(bp->b_iodone == NULL); 2905 bp->b_iodone = ud_iodone; 2906 mutex_enter(&ip->i_tlock); 2907 ip->i_writes += bp->b_bcount; 2908 mutex_exit(&ip->i_tlock); 2909 2910 if (multi_io == 0) { 2911 2912 (void) bdev_strategy(bp); 2913 } else { 2914 error = ud_multi_strat(ip, pp, bp, off); 2915 if (error != 0) { 2916 goto out; 2917 } 2918 } 2919 2920 if ((flags & B_ASYNC) == 0) { 2921 /* 2922 * Wait for i/o to complete. 2923 */ 2924 error = biowait(bp); 2925 pageio_done(bp); 2926 } 2927 } 2928 2929 if ((flags & B_ASYNC) == 0) { 2930 pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags); 2931 } 2932 2933 pp = NULL; 2934 2935 out: 2936 if (error != 0 && pp != NULL) { 2937 pvn_write_done(pp, B_ERROR | B_WRITE | flags); 2938 } 2939 2940 if (offp) { 2941 *offp = io_off; 2942 } 2943 if (lenp) { 2944 *lenp = io_len; 2945 } 2946 2947 return (error); 2948 } 2949 2950 2951 int32_t 2952 ud_iodone(struct buf *bp) 2953 { 2954 struct ud_inode *ip; 2955 2956 ASSERT((bp->b_pages->p_vnode != NULL) && !(bp->b_flags & B_READ)); 2957 2958 bp->b_iodone = NULL; 2959 2960 ip = VTOI(bp->b_pages->p_vnode); 2961 2962 mutex_enter(&ip->i_tlock); 2963 if (ip->i_writes >= ud_LW) { 2964 if ((ip->i_writes -= bp->b_bcount) <= ud_LW) { 2965 if (ud_WRITES) { 2966 cv_broadcast(&ip->i_wrcv); /* wake all up */ 2967 } 2968 } 2969 } else { 2970 ip->i_writes -= bp->b_bcount; 2971 } 2972 mutex_exit(&ip->i_tlock); 2973 iodone(bp); 2974 return (0); 2975 } 2976 2977 /* ARGSUSED3 */ 2978 int32_t 2979 ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr) 2980 { 2981 struct vnode *vp; 2982 struct udf_vfs *udf_vfsp; 2983 krw_t rwtype; 2984 caddr_t base; 2985 uint32_t flags; 2986 int32_t error, n, on, mapon, dofree; 2987 u_offset_t off; 2988 long oresid = uio->uio_resid; 2989 2990 ASSERT(RW_LOCK_HELD(&ip->i_contents)); 2991 if ((ip->i_type != VREG) && 2992 (ip->i_type != VDIR) && 2993 (ip->i_type != VLNK)) { 2994 return (EIO); 2995 } 2996 2997 if (uio->uio_loffset > MAXOFFSET_T) { 2998 return (0); 2999 } 3000 3001 if ((uio->uio_loffset < (offset_t)0) || 3002 ((uio->uio_loffset + uio->uio_resid) < 0)) { 3003 return (EINVAL); 3004 } 3005 if (uio->uio_resid == 0) { 3006 return (0); 3007 } 3008 3009 vp = ITOV(ip); 3010 udf_vfsp = ip->i_udf; 3011 mutex_enter(&ip->i_tlock); 3012 ip->i_flag |= IACC; 3013 mutex_exit(&ip->i_tlock); 3014 3015 rwtype = (rw_write_held(&ip->i_contents)?RW_WRITER:RW_READER); 3016 3017 do { 3018 offset_t diff; 3019 u_offset_t uoff = uio->uio_loffset; 3020 off = uoff & (offset_t)MAXBMASK; 3021 mapon = (int)(uoff & (offset_t)MAXBOFFSET); 3022 on = (int)blkoff(udf_vfsp, uoff); 3023 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid); 3024 3025 diff = ip->i_size - uoff; 3026 3027 if (diff <= (offset_t)0) { 3028 error = 0; 3029 goto out; 3030 } 3031 if (diff < (offset_t)n) { 3032 n = (int)diff; 3033 } 3034 dofree = ud_freebehind && 3035 ip->i_nextr == (off & PAGEMASK) && 3036 off > ud_smallfile; 3037 3038 #ifndef __lock_lint 3039 if (rwtype == RW_READER) { 3040 rw_exit(&ip->i_contents); 3041 } 3042 #endif 3043 3044 base = segmap_getmapflt(segkmap, vp, (off + mapon), 3045 (uint32_t)n, 1, S_READ); 3046 error = uiomove(base + mapon, (long)n, UIO_READ, uio); 3047 3048 flags = 0; 3049 if (!error) { 3050 /* 3051 * If read a whole block, or read to eof, 3052 * won't need this buffer again soon. 3053 */ 3054 if (n + on == MAXBSIZE && ud_freebehind && dofree && 3055 freemem < lotsfree + pages_before_pager) { 3056 flags = SM_FREE | SM_DONTNEED |SM_ASYNC; 3057 } 3058 /* 3059 * In POSIX SYNC (FSYNC and FDSYNC) read mode, 3060 * we want to make sure that the page which has 3061 * been read, is written on disk if it is dirty. 3062 * And corresponding indirect blocks should also 3063 * be flushed out. 3064 */ 3065 if ((ioflag & FRSYNC) && (ioflag & (FSYNC|FDSYNC))) { 3066 flags &= ~SM_ASYNC; 3067 flags |= SM_WRITE; 3068 } 3069 error = segmap_release(segkmap, base, flags); 3070 } else { 3071 (void) segmap_release(segkmap, base, flags); 3072 } 3073 3074 #ifndef __lock_lint 3075 if (rwtype == RW_READER) { 3076 rw_enter(&ip->i_contents, rwtype); 3077 } 3078 #endif 3079 } while (error == 0 && uio->uio_resid > 0 && n != 0); 3080 out: 3081 /* 3082 * Inode is updated according to this table if FRSYNC is set. 3083 * 3084 * FSYNC FDSYNC(posix.4) 3085 * -------------------------- 3086 * always IATTCHG|IBDWRITE 3087 */ 3088 if (ioflag & FRSYNC) { 3089 if ((ioflag & FSYNC) || 3090 ((ioflag & FDSYNC) && 3091 (ip->i_flag & (IATTCHG|IBDWRITE)))) { 3092 rw_exit(&ip->i_contents); 3093 rw_enter(&ip->i_contents, RW_WRITER); 3094 ud_iupdat(ip, 1); 3095 } 3096 } 3097 /* 3098 * If we've already done a partial read, terminate 3099 * the read but return no error. 3100 */ 3101 if (oresid != uio->uio_resid) { 3102 error = 0; 3103 } 3104 ITIMES(ip); 3105 3106 return (error); 3107 } 3108 3109 int32_t 3110 ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr) 3111 { 3112 caddr_t base; 3113 struct vnode *vp; 3114 struct udf_vfs *udf_vfsp; 3115 uint32_t flags; 3116 int32_t error = 0, iupdat_flag, n, on, mapon, i_size_changed = 0; 3117 int32_t pagecreate, newpage; 3118 uint64_t old_i_size; 3119 u_offset_t off; 3120 long start_resid = uio->uio_resid, premove_resid; 3121 rlim64_t limit = uio->uio_limit; 3122 3123 3124 ASSERT(RW_WRITE_HELD(&ip->i_contents)); 3125 if ((ip->i_type != VREG) && 3126 (ip->i_type != VDIR) && 3127 (ip->i_type != VLNK)) { 3128 return (EIO); 3129 } 3130 3131 if (uio->uio_loffset >= MAXOFFSET_T) { 3132 return (EFBIG); 3133 } 3134 /* 3135 * see udf_l_pathconf 3136 */ 3137 if (limit > (((uint64_t)1 << 40) - 1)) { 3138 limit = ((uint64_t)1 << 40) - 1; 3139 } 3140 if (uio->uio_loffset >= limit) { 3141 proc_t *p = ttoproc(curthread); 3142 3143 mutex_enter(&p->p_lock); 3144 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, 3145 p, RCA_UNSAFE_SIGINFO); 3146 mutex_exit(&p->p_lock); 3147 return (EFBIG); 3148 } 3149 if ((uio->uio_loffset < (offset_t)0) || 3150 ((uio->uio_loffset + uio->uio_resid) < 0)) { 3151 return (EINVAL); 3152 } 3153 if (uio->uio_resid == 0) { 3154 return (0); 3155 } 3156 3157 mutex_enter(&ip->i_tlock); 3158 ip->i_flag |= INOACC; 3159 3160 if (ioflag & (FSYNC | FDSYNC)) { 3161 ip->i_flag |= ISYNC; 3162 iupdat_flag = 1; 3163 } 3164 mutex_exit(&ip->i_tlock); 3165 3166 udf_vfsp = ip->i_udf; 3167 vp = ITOV(ip); 3168 3169 do { 3170 u_offset_t uoff = uio->uio_loffset; 3171 off = uoff & (offset_t)MAXBMASK; 3172 mapon = (int)(uoff & (offset_t)MAXBOFFSET); 3173 on = (int)blkoff(udf_vfsp, uoff); 3174 n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid); 3175 3176 if (ip->i_type == VREG && uoff + n >= limit) { 3177 if (uoff >= limit) { 3178 error = EFBIG; 3179 goto out; 3180 } 3181 n = (int)(limit - (rlim64_t)uoff); 3182 } 3183 if (uoff + n > ip->i_size) { 3184 /* 3185 * We are extending the length of the file. 3186 * bmap is used so that we are sure that 3187 * if we need to allocate new blocks, that it 3188 * is done here before we up the file size. 3189 */ 3190 error = ud_bmap_write(ip, uoff, 3191 (int)(on + n), mapon == 0, cr); 3192 if (error) { 3193 break; 3194 } 3195 i_size_changed = 1; 3196 old_i_size = ip->i_size; 3197 ip->i_size = uoff + n; 3198 /* 3199 * If we are writing from the beginning of 3200 * the mapping, we can just create the 3201 * pages without having to read them. 3202 */ 3203 pagecreate = (mapon == 0); 3204 } else if (n == MAXBSIZE) { 3205 /* 3206 * Going to do a whole mappings worth, 3207 * so we can just create the pages w/o 3208 * having to read them in. But before 3209 * we do that, we need to make sure any 3210 * needed blocks are allocated first. 3211 */ 3212 error = ud_bmap_write(ip, uoff, 3213 (int)(on + n), 1, cr); 3214 if (error) { 3215 break; 3216 } 3217 pagecreate = 1; 3218 } else { 3219 pagecreate = 0; 3220 } 3221 3222 rw_exit(&ip->i_contents); 3223 3224 /* 3225 * Touch the page and fault it in if it is not in 3226 * core before segmap_getmapflt can lock it. This 3227 * is to avoid the deadlock if the buffer is mapped 3228 * to the same file through mmap which we want to 3229 * write to. 3230 */ 3231 uio_prefaultpages((long)n, uio); 3232 3233 base = segmap_getmapflt(segkmap, vp, (off + mapon), 3234 (uint32_t)n, !pagecreate, S_WRITE); 3235 3236 /* 3237 * segmap_pagecreate() returns 1 if it calls 3238 * page_create_va() to allocate any pages. 3239 */ 3240 newpage = 0; 3241 if (pagecreate) { 3242 newpage = segmap_pagecreate(segkmap, base, 3243 (size_t)n, 0); 3244 } 3245 3246 premove_resid = uio->uio_resid; 3247 error = uiomove(base + mapon, (long)n, UIO_WRITE, uio); 3248 3249 if (pagecreate && 3250 uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) { 3251 /* 3252 * We created pages w/o initializing them completely, 3253 * thus we need to zero the part that wasn't set up. 3254 * This happens on most EOF write cases and if 3255 * we had some sort of error during the uiomove. 3256 */ 3257 int nzero, nmoved; 3258 3259 nmoved = (int)(uio->uio_loffset - (off + mapon)); 3260 ASSERT(nmoved >= 0 && nmoved <= n); 3261 nzero = roundup(on + n, PAGESIZE) - nmoved; 3262 ASSERT(nzero > 0 && mapon + nmoved + nzero <= MAXBSIZE); 3263 (void) kzero(base + mapon + nmoved, (uint32_t)nzero); 3264 } 3265 3266 /* 3267 * Unlock the pages allocated by page_create_va() 3268 * in segmap_pagecreate() 3269 */ 3270 if (newpage) { 3271 segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE); 3272 } 3273 3274 if (error) { 3275 /* 3276 * If we failed on a write, we may have already 3277 * allocated file blocks as well as pages. It's 3278 * hard to undo the block allocation, but we must 3279 * be sure to invalidate any pages that may have 3280 * been allocated. 3281 */ 3282 (void) segmap_release(segkmap, base, SM_INVAL); 3283 } else { 3284 flags = 0; 3285 /* 3286 * Force write back for synchronous write cases. 3287 */ 3288 if ((ioflag & (FSYNC|FDSYNC)) || ip->i_type == VDIR) { 3289 /* 3290 * If the sticky bit is set but the 3291 * execute bit is not set, we do a 3292 * synchronous write back and free 3293 * the page when done. We set up swap 3294 * files to be handled this way to 3295 * prevent servers from keeping around 3296 * the client's swap pages too long. 3297 * XXX - there ought to be a better way. 3298 */ 3299 if (IS_SWAPVP(vp)) { 3300 flags = SM_WRITE | SM_FREE | 3301 SM_DONTNEED; 3302 iupdat_flag = 0; 3303 } else { 3304 flags = SM_WRITE; 3305 } 3306 } else if (((mapon + n) == MAXBSIZE) || 3307 IS_SWAPVP(vp)) { 3308 /* 3309 * Have written a whole block. 3310 * Start an asynchronous write and 3311 * mark the buffer to indicate that 3312 * it won't be needed again soon. 3313 */ 3314 flags = SM_WRITE |SM_ASYNC | SM_DONTNEED; 3315 } 3316 error = segmap_release(segkmap, base, flags); 3317 3318 /* 3319 * If the operation failed and is synchronous, 3320 * then we need to unwind what uiomove() last 3321 * did so we can potentially return an error to 3322 * the caller. If this write operation was 3323 * done in two pieces and the first succeeded, 3324 * then we won't return an error for the second 3325 * piece that failed. However, we only want to 3326 * return a resid value that reflects what was 3327 * really done. 3328 * 3329 * Failures for non-synchronous operations can 3330 * be ignored since the page subsystem will 3331 * retry the operation until it succeeds or the 3332 * file system is unmounted. 3333 */ 3334 if (error) { 3335 if ((ioflag & (FSYNC | FDSYNC)) || 3336 ip->i_type == VDIR) { 3337 uio->uio_resid = premove_resid; 3338 } else { 3339 error = 0; 3340 } 3341 } 3342 } 3343 3344 /* 3345 * Re-acquire contents lock. 3346 */ 3347 rw_enter(&ip->i_contents, RW_WRITER); 3348 /* 3349 * If the uiomove() failed or if a synchronous 3350 * page push failed, fix up i_size. 3351 */ 3352 if (error) { 3353 if (i_size_changed) { 3354 /* 3355 * The uiomove failed, and we 3356 * allocated blocks,so get rid 3357 * of them. 3358 */ 3359 (void) ud_itrunc(ip, old_i_size, 0, cr); 3360 } 3361 } else { 3362 /* 3363 * XXX - Can this be out of the loop? 3364 */ 3365 ip->i_flag |= IUPD | ICHG; 3366 if (i_size_changed) { 3367 ip->i_flag |= IATTCHG; 3368 } 3369 if ((ip->i_perm & (IEXEC | (IEXEC >> 5) | 3370 (IEXEC >> 10))) != 0 && 3371 (ip->i_char & (ISUID | ISGID)) != 0 && 3372 secpolicy_vnode_setid_retain(cr, 3373 (ip->i_char & ISUID) != 0 && ip->i_uid == 0) != 0) { 3374 /* 3375 * Clear Set-UID & Set-GID bits on 3376 * successful write if not privileged 3377 * and at least one of the execute bits 3378 * is set. If we always clear Set-GID, 3379 * mandatory file and record locking is 3380 * unuseable. 3381 */ 3382 ip->i_char &= ~(ISUID | ISGID); 3383 } 3384 } 3385 } while (error == 0 && uio->uio_resid > 0 && n != 0); 3386 3387 out: 3388 /* 3389 * Inode is updated according to this table - 3390 * 3391 * FSYNC FDSYNC(posix.4) 3392 * -------------------------- 3393 * always@ IATTCHG|IBDWRITE 3394 * 3395 * @ - If we are doing synchronous write the only time we should 3396 * not be sync'ing the ip here is if we have the stickyhack 3397 * activated, the file is marked with the sticky bit and 3398 * no exec bit, the file length has not been changed and 3399 * no new blocks have been allocated during this write. 3400 */ 3401 if ((ip->i_flag & ISYNC) != 0) { 3402 /* 3403 * we have eliminated nosync 3404 */ 3405 if ((ip->i_flag & (IATTCHG|IBDWRITE)) || 3406 ((ioflag & FSYNC) && iupdat_flag)) { 3407 ud_iupdat(ip, 1); 3408 } 3409 } 3410 3411 /* 3412 * If we've already done a partial-write, terminate 3413 * the write but return no error. 3414 */ 3415 if (start_resid != uio->uio_resid) { 3416 error = 0; 3417 } 3418 ip->i_flag &= ~(INOACC | ISYNC); 3419 ITIMES_NOLOCK(ip); 3420 3421 return (error); 3422 } 3423 3424 int32_t 3425 ud_multi_strat(struct ud_inode *ip, 3426 page_t *pp, struct buf *bp, u_offset_t start) 3427 { 3428 daddr_t bn; 3429 int32_t error = 0, io_count, contig, alloc_sz, i; 3430 uint32_t io_off; 3431 mio_master_t *mm = NULL; 3432 mio_slave_t *ms = NULL; 3433 struct buf *rbp; 3434 3435 ASSERT(!(start & PAGEOFFSET)); 3436 3437 /* 3438 * Figure out how many buffers to allocate 3439 */ 3440 io_count = 0; 3441 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) { 3442 contig = 0; 3443 if (error = ud_bmap_read(ip, (u_offset_t)(start + io_off), 3444 &bn, &contig)) { 3445 goto end; 3446 } 3447 if (contig == 0) { 3448 goto end; 3449 } 3450 contig = MIN(contig, PAGESIZE - io_off); 3451 if (bn != UDF_HOLE) { 3452 io_count ++; 3453 } else { 3454 /* 3455 * HOLE 3456 */ 3457 if (bp->b_flags & B_READ) { 3458 3459 /* 3460 * This is a hole and is read 3461 * it should be filled with 0's 3462 */ 3463 pagezero(pp, io_off, contig); 3464 } 3465 } 3466 } 3467 3468 3469 if (io_count != 0) { 3470 3471 /* 3472 * Allocate memory for all the 3473 * required number of buffers 3474 */ 3475 alloc_sz = sizeof (mio_master_t) + 3476 (sizeof (mio_slave_t) * io_count); 3477 mm = (mio_master_t *)kmem_zalloc(alloc_sz, KM_SLEEP); 3478 if (mm == NULL) { 3479 error = ENOMEM; 3480 goto end; 3481 } 3482 3483 /* 3484 * initialize master 3485 */ 3486 mutex_init(&mm->mm_mutex, NULL, MUTEX_DEFAULT, NULL); 3487 mm->mm_size = alloc_sz; 3488 mm->mm_bp = bp; 3489 mm->mm_resid = 0; 3490 mm->mm_error = 0; 3491 mm->mm_index = master_index++; 3492 3493 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t)); 3494 3495 /* 3496 * Initialize buffers 3497 */ 3498 io_count = 0; 3499 for (io_off = 0; io_off < bp->b_bcount; io_off += contig) { 3500 contig = 0; 3501 if (error = ud_bmap_read(ip, 3502 (u_offset_t)(start + io_off), 3503 &bn, &contig)) { 3504 goto end; 3505 } 3506 ASSERT(contig); 3507 if ((io_off + contig) > bp->b_bcount) { 3508 contig = bp->b_bcount - io_off; 3509 } 3510 if (bn != UDF_HOLE) { 3511 /* 3512 * Clone the buffer 3513 * and prepare to start I/O 3514 */ 3515 ms->ms_ptr = mm; 3516 bioinit(&ms->ms_buf); 3517 rbp = bioclone(bp, io_off, (size_t)contig, 3518 bp->b_edev, bn, ud_slave_done, 3519 &ms->ms_buf, KM_NOSLEEP); 3520 ASSERT(rbp == &ms->ms_buf); 3521 mm->mm_resid += contig; 3522 io_count++; 3523 ms ++; 3524 } 3525 } 3526 3527 /* 3528 * Start I/O's 3529 */ 3530 ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t)); 3531 for (i = 0; i < io_count; i++) { 3532 (void) bdev_strategy(&ms->ms_buf); 3533 ms ++; 3534 } 3535 } 3536 3537 end: 3538 if (error != 0) { 3539 bp->b_flags |= B_ERROR; 3540 bp->b_error = error; 3541 if (mm != NULL) { 3542 mutex_destroy(&mm->mm_mutex); 3543 kmem_free(mm, mm->mm_size); 3544 } 3545 } 3546 return (error); 3547 } 3548 3549 int32_t 3550 ud_slave_done(struct buf *bp) 3551 { 3552 mio_master_t *mm; 3553 int32_t resid; 3554 3555 ASSERT(SEMA_HELD(&bp->b_sem)); 3556 ASSERT((bp->b_flags & B_DONE) == 0); 3557 3558 mm = ((mio_slave_t *)bp)->ms_ptr; 3559 3560 /* 3561 * Propagate error and byte count info from slave struct to 3562 * the master struct 3563 */ 3564 mutex_enter(&mm->mm_mutex); 3565 if (bp->b_flags & B_ERROR) { 3566 3567 /* 3568 * If multiple slave buffers get 3569 * error we forget the old errors 3570 * this is ok because we any way 3571 * cannot return multiple errors 3572 */ 3573 mm->mm_error = bp->b_error; 3574 } 3575 mm->mm_resid -= bp->b_bcount; 3576 resid = mm->mm_resid; 3577 mutex_exit(&mm->mm_mutex); 3578 3579 /* 3580 * free up the resources allocated to cloned buffers. 3581 */ 3582 bp_mapout(bp); 3583 biofini(bp); 3584 3585 if (resid == 0) { 3586 3587 /* 3588 * This is the last I/O operation 3589 * clean up and return the original buffer 3590 */ 3591 if (mm->mm_error) { 3592 mm->mm_bp->b_flags |= B_ERROR; 3593 mm->mm_bp->b_error = mm->mm_error; 3594 } 3595 biodone(mm->mm_bp); 3596 mutex_destroy(&mm->mm_mutex); 3597 kmem_free(mm, mm->mm_size); 3598 } 3599 return (0); 3600 } 3601