1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 22 /* All Rights Reserved */ 23 24 25 /* 26 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 /* 33 * Generic vnode operations. 34 */ 35 #include <sys/types.h> 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/errno.h> 39 #include <sys/fcntl.h> 40 #include <sys/flock.h> 41 #include <sys/statvfs.h> 42 #include <sys/vfs.h> 43 #include <sys/vnode.h> 44 #include <sys/proc.h> 45 #include <sys/user.h> 46 #include <sys/unistd.h> 47 #include <sys/cred.h> 48 #include <sys/poll.h> 49 #include <sys/debug.h> 50 #include <sys/cmn_err.h> 51 #include <sys/stream.h> 52 #include <fs/fs_subr.h> 53 #include <sys/acl.h> 54 #include <sys/share.h> 55 #include <sys/file.h> 56 #include <sys/kmem.h> 57 #include <sys/file.h> 58 #include <sys/nbmlock.h> 59 #include <acl/acl_common.h> 60 61 static callb_cpr_t *frlock_serialize_blocked(flk_cb_when_t, void *); 62 63 /* 64 * Tunable to limit the number of retry to recover from STALE error. 65 */ 66 int fs_estale_retry = 5; 67 68 /* 69 * The associated operation is not supported by the file system. 70 */ 71 int 72 fs_nosys() 73 { 74 return (ENOSYS); 75 } 76 77 /* 78 * The associated operation is invalid (on this vnode). 79 */ 80 int 81 fs_inval() 82 { 83 return (EINVAL); 84 } 85 86 /* 87 * The associated operation is valid only for directories. 88 */ 89 int 90 fs_notdir() 91 { 92 return (ENOTDIR); 93 } 94 95 /* 96 * Free the file system specific resources. For the file systems that 97 * do not support the forced unmount, it will be a nop function. 98 */ 99 100 /*ARGSUSED*/ 101 void 102 fs_freevfs(vfs_t *vfsp) 103 { 104 } 105 106 /* ARGSUSED */ 107 int 108 fs_nosys_map(struct vnode *vp, 109 offset_t off, 110 struct as *as, 111 caddr_t *addrp, 112 size_t len, 113 uchar_t prot, 114 uchar_t maxprot, 115 uint_t flags, 116 struct cred *cr) 117 { 118 return (ENOSYS); 119 } 120 121 /* ARGSUSED */ 122 int 123 fs_nosys_addmap(struct vnode *vp, 124 offset_t off, 125 struct as *as, 126 caddr_t addr, 127 size_t len, 128 uchar_t prot, 129 uchar_t maxprot, 130 uint_t flags, 131 struct cred *cr) 132 { 133 return (ENOSYS); 134 } 135 136 /* ARGSUSED */ 137 int 138 fs_nosys_poll(vnode_t *vp, 139 register short events, 140 int anyyet, 141 register short *reventsp, 142 struct pollhead **phpp) 143 { 144 return (ENOSYS); 145 } 146 147 148 /* 149 * The file system has nothing to sync to disk. However, the 150 * VFS_SYNC operation must not fail. 151 */ 152 /* ARGSUSED */ 153 int 154 fs_sync(struct vfs *vfspp, short flag, cred_t *cr) 155 { 156 return (0); 157 } 158 159 /* 160 * Read/write lock/unlock. Does nothing. 161 */ 162 /* ARGSUSED */ 163 int 164 fs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp) 165 { 166 return (-1); 167 } 168 169 /* ARGSUSED */ 170 void 171 fs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp) 172 { 173 } 174 175 /* 176 * Compare two vnodes. 177 */ 178 int 179 fs_cmp(vnode_t *vp1, vnode_t *vp2) 180 { 181 return (vp1 == vp2); 182 } 183 184 /* 185 * No-op seek operation. 186 */ 187 /* ARGSUSED */ 188 int 189 fs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp) 190 { 191 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 192 } 193 194 /* 195 * File and record locking. 196 */ 197 /* ARGSUSED */ 198 int 199 fs_frlock(register vnode_t *vp, int cmd, struct flock64 *bfp, int flag, 200 offset_t offset, flk_callback_t *flk_cbp, cred_t *cr) 201 { 202 int frcmd; 203 int nlmid; 204 int error = 0; 205 flk_callback_t serialize_callback; 206 int serialize = 0; 207 208 switch (cmd) { 209 210 case F_GETLK: 211 case F_O_GETLK: 212 if (flag & F_REMOTELOCK) { 213 frcmd = RCMDLCK; 214 break; 215 } 216 if (flag & F_PXFSLOCK) { 217 frcmd = PCMDLCK; 218 break; 219 } 220 bfp->l_pid = ttoproc(curthread)->p_pid; 221 bfp->l_sysid = 0; 222 frcmd = 0; 223 break; 224 225 case F_SETLK_NBMAND: 226 /* 227 * Are NBMAND locks allowed on this file? 228 */ 229 if (!vp->v_vfsp || 230 !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) { 231 error = EINVAL; 232 goto done; 233 } 234 if (vp->v_type != VREG) { 235 error = EINVAL; 236 goto done; 237 } 238 /*FALLTHROUGH*/ 239 240 case F_SETLK: 241 /* 242 * Check whether there is an NBMAND share reservation that 243 * conflicts with the lock request. 244 */ 245 if (nbl_need_check(vp)) { 246 nbl_start_crit(vp, RW_WRITER); 247 serialize = 1; 248 if (share_blocks_lock(vp, bfp)) { 249 error = EAGAIN; 250 goto done; 251 } 252 } 253 if (flag & F_REMOTELOCK) { 254 frcmd = SETFLCK|RCMDLCK; 255 break; 256 } 257 if (flag & F_PXFSLOCK) { 258 frcmd = SETFLCK|PCMDLCK; 259 break; 260 } 261 bfp->l_pid = ttoproc(curthread)->p_pid; 262 bfp->l_sysid = 0; 263 frcmd = SETFLCK; 264 if (cmd == F_SETLK_NBMAND && 265 (bfp->l_type == F_RDLCK || bfp->l_type == F_WRLCK)) { 266 /* would check here for conflict with mapped region */ 267 frcmd |= NBMLCK; 268 } 269 break; 270 271 case F_SETLKW: 272 /* 273 * If there is an NBMAND share reservation that conflicts 274 * with the lock request, block until the conflicting share 275 * reservation goes away. 276 */ 277 if (nbl_need_check(vp)) { 278 nbl_start_crit(vp, RW_WRITER); 279 serialize = 1; 280 if (share_blocks_lock(vp, bfp)) { 281 error = wait_for_share(vp, bfp); 282 if (error != 0) 283 goto done; 284 } 285 } 286 if (flag & F_REMOTELOCK) { 287 frcmd = SETFLCK|SLPFLCK|RCMDLCK; 288 break; 289 } 290 if (flag & F_PXFSLOCK) { 291 frcmd = SETFLCK|SLPFLCK|PCMDLCK; 292 break; 293 } 294 bfp->l_pid = ttoproc(curthread)->p_pid; 295 bfp->l_sysid = 0; 296 frcmd = SETFLCK|SLPFLCK; 297 break; 298 299 case F_HASREMOTELOCKS: 300 nlmid = GETNLMID(bfp->l_sysid); 301 if (nlmid != 0) { /* booted as a cluster */ 302 l_has_rmt(bfp) = 303 cl_flk_has_remote_locks_for_nlmid(vp, nlmid); 304 } else { /* not booted as a cluster */ 305 l_has_rmt(bfp) = flk_has_remote_locks(vp); 306 } 307 308 goto done; 309 310 default: 311 error = EINVAL; 312 goto done; 313 } 314 315 /* 316 * If this is a blocking lock request and we're serializing lock 317 * requests, modify the callback list to leave the critical region 318 * while we're waiting for the lock. 319 */ 320 321 if (serialize && (frcmd & SLPFLCK) != 0) { 322 flk_add_callback(&serialize_callback, 323 frlock_serialize_blocked, vp, flk_cbp); 324 flk_cbp = &serialize_callback; 325 } 326 327 error = reclock(vp, bfp, frcmd, flag, offset, flk_cbp); 328 329 done: 330 if (serialize) 331 nbl_end_crit(vp); 332 333 return (error); 334 } 335 336 /* 337 * Callback when a lock request blocks and we are serializing requests. If 338 * before sleeping, leave the critical region. If after wakeup, reenter 339 * the critical region. 340 */ 341 342 static callb_cpr_t * 343 frlock_serialize_blocked(flk_cb_when_t when, void *infop) 344 { 345 vnode_t *vp = (vnode_t *)infop; 346 347 if (when == FLK_BEFORE_SLEEP) 348 nbl_end_crit(vp); 349 else { 350 nbl_start_crit(vp, RW_WRITER); 351 } 352 353 return (NULL); 354 } 355 356 /* 357 * Allow any flags. 358 */ 359 /* ARGSUSED */ 360 int 361 fs_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr) 362 { 363 return (0); 364 } 365 366 /* 367 * Return the answer requested to poll() for non-device files. 368 * Only POLLIN, POLLRDNORM, and POLLOUT are recognized. 369 */ 370 struct pollhead fs_pollhd; 371 372 /* ARGSUSED */ 373 int 374 fs_poll(vnode_t *vp, 375 register short events, 376 int anyyet, 377 register short *reventsp, 378 struct pollhead **phpp) 379 { 380 *reventsp = 0; 381 if (events & POLLIN) 382 *reventsp |= POLLIN; 383 if (events & POLLRDNORM) 384 *reventsp |= POLLRDNORM; 385 if (events & POLLRDBAND) 386 *reventsp |= POLLRDBAND; 387 if (events & POLLOUT) 388 *reventsp |= POLLOUT; 389 if (events & POLLWRBAND) 390 *reventsp |= POLLWRBAND; 391 *phpp = !anyyet && !*reventsp ? &fs_pollhd : (struct pollhead *)NULL; 392 return (0); 393 } 394 395 /* 396 * POSIX pathconf() support. 397 */ 398 /* ARGSUSED */ 399 int 400 fs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr) 401 { 402 register ulong_t val; 403 register int error = 0; 404 struct statvfs64 vfsbuf; 405 406 switch (cmd) { 407 408 case _PC_LINK_MAX: 409 val = MAXLINK; 410 break; 411 412 case _PC_MAX_CANON: 413 val = MAX_CANON; 414 break; 415 416 case _PC_MAX_INPUT: 417 val = MAX_INPUT; 418 break; 419 420 case _PC_NAME_MAX: 421 bzero(&vfsbuf, sizeof (vfsbuf)); 422 if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf)) 423 break; 424 val = vfsbuf.f_namemax; 425 break; 426 427 case _PC_PATH_MAX: 428 case _PC_SYMLINK_MAX: 429 val = MAXPATHLEN; 430 break; 431 432 case _PC_PIPE_BUF: 433 val = PIPE_BUF; 434 break; 435 436 case _PC_NO_TRUNC: 437 if (vp->v_vfsp->vfs_flag & VFS_NOTRUNC) 438 val = 1; /* NOTRUNC is enabled for vp */ 439 else 440 val = (ulong_t)-1; 441 break; 442 443 case _PC_VDISABLE: 444 val = _POSIX_VDISABLE; 445 break; 446 447 case _PC_CHOWN_RESTRICTED: 448 if (rstchown) 449 val = rstchown; /* chown restricted enabled */ 450 else 451 val = (ulong_t)-1; 452 break; 453 454 case _PC_FILESIZEBITS: 455 456 /* 457 * If ever we come here it means that underlying file system 458 * does not recognise the command and therefore this 459 * configurable limit cannot be determined. We return -1 460 * and don't change errno. 461 */ 462 463 val = (ulong_t)-1; /* large file support */ 464 break; 465 466 case _PC_ACL_ENABLED: 467 val = 0; 468 break; 469 470 default: 471 error = EINVAL; 472 break; 473 } 474 475 if (error == 0) 476 *valp = val; 477 return (error); 478 } 479 480 /* 481 * Dispose of a page. 482 */ 483 /* ARGSUSED */ 484 void 485 fs_dispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr) 486 { 487 488 ASSERT(fl == B_FREE || fl == B_INVAL); 489 490 if (fl == B_FREE) 491 page_free(pp, dn); 492 else 493 page_destroy(pp, dn); 494 } 495 496 /* ARGSUSED */ 497 void 498 fs_nodispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr) 499 { 500 cmn_err(CE_PANIC, "fs_nodispose invoked"); 501 } 502 503 /* 504 * fabricate acls for file systems that do not support acls. 505 */ 506 /* ARGSUSED */ 507 int 508 fs_fab_acl(vp, vsecattr, flag, cr) 509 vnode_t *vp; 510 vsecattr_t *vsecattr; 511 int flag; 512 cred_t *cr; 513 { 514 aclent_t *aclentp; 515 ace_t *acep; 516 struct vattr vattr; 517 int error; 518 519 vsecattr->vsa_aclcnt = 0; 520 vsecattr->vsa_aclentp = NULL; 521 vsecattr->vsa_dfaclcnt = 0; /* Default ACLs are not fabricated */ 522 vsecattr->vsa_dfaclentp = NULL; 523 524 vattr.va_mask = AT_MODE | AT_UID | AT_GID; 525 if (error = VOP_GETATTR(vp, &vattr, 0, cr)) 526 return (error); 527 528 if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) { 529 vsecattr->vsa_aclcnt = 4; /* USER, GROUP, OTHER, and CLASS */ 530 vsecattr->vsa_aclentp = kmem_zalloc(4 * sizeof (aclent_t), 531 KM_SLEEP); 532 aclentp = vsecattr->vsa_aclentp; 533 534 aclentp->a_type = USER_OBJ; /* Owner */ 535 aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0700)) >> 6; 536 aclentp->a_id = vattr.va_uid; /* Really undefined */ 537 aclentp++; 538 539 aclentp->a_type = GROUP_OBJ; /* Group */ 540 aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0070)) >> 3; 541 aclentp->a_id = vattr.va_gid; /* Really undefined */ 542 aclentp++; 543 544 aclentp->a_type = OTHER_OBJ; /* Other */ 545 aclentp->a_perm = vattr.va_mode & 0007; 546 aclentp->a_id = (gid_t)-1; /* Really undefined */ 547 aclentp++; 548 549 aclentp->a_type = CLASS_OBJ; /* Class */ 550 aclentp->a_perm = (ushort_t)(0007); 551 aclentp->a_id = (gid_t)-1; /* Really undefined */ 552 } else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) { 553 vsecattr->vsa_aclcnt = 6; 554 vsecattr->vsa_aclentp = kmem_zalloc(6 * sizeof (ace_t), 555 KM_SLEEP); 556 acep = vsecattr->vsa_aclentp; 557 (void) memcpy(acep, trivial_acl, sizeof (ace_t) * 6); 558 adjust_ace_pair(acep, (vattr.va_mode & 0700) >> 6); 559 adjust_ace_pair(acep + 2, (vattr.va_mode & 0070) >> 3); 560 adjust_ace_pair(acep + 4, vattr.va_mode & 0007); 561 } 562 563 return (0); 564 } 565 566 /* 567 * Common code for implementing DOS share reservations 568 */ 569 /* ARGSUSED4 */ 570 int 571 fs_shrlock(struct vnode *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr) 572 { 573 int error; 574 575 /* 576 * Make sure that the file was opened with permissions appropriate 577 * for the request, and make sure the caller isn't trying to sneak 578 * in an NBMAND request. 579 */ 580 if (cmd == F_SHARE) { 581 if (((shr->s_access & F_RDACC) && (flag & FREAD) == 0) || 582 ((shr->s_access & F_WRACC) && (flag & FWRITE) == 0)) 583 return (EBADF); 584 if (shr->s_deny & F_MANDDNY) 585 return (EINVAL); 586 } 587 if (cmd == F_SHARE_NBMAND) { 588 /* must have write permission to deny read access */ 589 if ((shr->s_deny & F_RDDNY) && (flag & FWRITE) == 0) 590 return (EBADF); 591 /* make sure nbmand is allowed on the file */ 592 if (!vp->v_vfsp || 593 !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) { 594 return (EINVAL); 595 } 596 if (vp->v_type != VREG) { 597 return (EINVAL); 598 } 599 } 600 601 nbl_start_crit(vp, RW_WRITER); 602 603 switch (cmd) { 604 605 case F_SHARE_NBMAND: 606 shr->s_deny |= F_MANDDNY; 607 /*FALLTHROUGH*/ 608 case F_SHARE: 609 error = add_share(vp, shr); 610 break; 611 612 case F_UNSHARE: 613 error = del_share(vp, shr); 614 break; 615 616 case F_HASREMOTELOCKS: 617 /* 618 * We are overloading this command to refer to remote 619 * shares as well as remote locks, despite its name. 620 */ 621 shr->s_access = shr_has_remote_shares(vp, shr->s_sysid); 622 error = 0; 623 break; 624 625 default: 626 error = EINVAL; 627 break; 628 } 629 630 nbl_end_crit(vp); 631 return (error); 632 } 633 634 /*ARGSUSED1*/ 635 int 636 fs_vnevent_nosupport(vnode_t *vp, vnevent_t vnevent) 637 { 638 ASSERT(vp != NULL); 639 return (ENOTSUP); 640 } 641 642 /*ARGSUSED1*/ 643 int 644 fs_vnevent_support(vnode_t *vp, vnevent_t vnevent) 645 { 646 ASSERT(vp != NULL); 647 return (0); 648 } 649 650 /* 651 * return 1 for non-trivial ACL. 652 * 653 * NB: It is not necessary for the caller to VOP_RWLOCK since 654 * we only issue VOP_GETSECATTR. 655 * 656 * Returns 0 == trivial 657 * 1 == NOT Trivial 658 * <0 could not determine. 659 */ 660 int 661 fs_acl_nontrivial(vnode_t *vp, cred_t *cr) 662 { 663 ulong_t acl_styles; 664 ulong_t acl_flavor; 665 vsecattr_t vsecattr; 666 int error; 667 int isnontrivial; 668 669 /* determine the forms of ACLs maintained */ 670 error = VOP_PATHCONF(vp, _PC_ACL_ENABLED, &acl_styles, cr); 671 672 /* clear bits we don't understand and establish default acl_style */ 673 acl_styles &= (_ACL_ACLENT_ENABLED | _ACL_ACE_ENABLED); 674 if (error || (acl_styles == 0)) 675 acl_styles = _ACL_ACLENT_ENABLED; 676 677 vsecattr.vsa_aclentp = NULL; 678 vsecattr.vsa_dfaclentp = NULL; 679 vsecattr.vsa_aclcnt = 0; 680 vsecattr.vsa_dfaclcnt = 0; 681 682 while (acl_styles) { 683 /* select one of the styles as current flavor */ 684 acl_flavor = 0; 685 if (acl_styles & _ACL_ACLENT_ENABLED) { 686 acl_flavor = _ACL_ACLENT_ENABLED; 687 vsecattr.vsa_mask = VSA_ACLCNT | VSA_DFACLCNT; 688 } else if (acl_styles & _ACL_ACE_ENABLED) { 689 acl_flavor = _ACL_ACE_ENABLED; 690 vsecattr.vsa_mask = VSA_ACECNT | VSA_ACE; 691 } 692 693 ASSERT(vsecattr.vsa_mask && acl_flavor); 694 error = VOP_GETSECATTR(vp, &vsecattr, 0, cr); 695 if (error == 0) 696 break; 697 698 /* that flavor failed */ 699 acl_styles &= ~acl_flavor; 700 } 701 702 /* if all styles fail then assume trivial */ 703 if (acl_styles == 0) 704 return (0); 705 706 /* process the flavor that worked */ 707 isnontrivial = 0; 708 if (acl_flavor & _ACL_ACLENT_ENABLED) { 709 if (vsecattr.vsa_aclcnt > MIN_ACL_ENTRIES) 710 isnontrivial = 1; 711 if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL) 712 kmem_free(vsecattr.vsa_aclentp, 713 vsecattr.vsa_aclcnt * sizeof (aclent_t)); 714 if (vsecattr.vsa_dfaclcnt && vsecattr.vsa_dfaclentp != NULL) 715 kmem_free(vsecattr.vsa_dfaclentp, 716 vsecattr.vsa_dfaclcnt * sizeof (aclent_t)); 717 } 718 if (acl_flavor & _ACL_ACE_ENABLED) { 719 720 isnontrivial = ace_trivial(vsecattr.vsa_aclentp, 721 vsecattr.vsa_aclcnt); 722 723 if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL) 724 kmem_free(vsecattr.vsa_aclentp, 725 vsecattr.vsa_aclcnt * sizeof (ace_t)); 726 /* ACE has no vsecattr.vsa_dfaclcnt */ 727 } 728 return (isnontrivial); 729 } 730 731 /* 732 * Check whether we need a retry to recover from STALE error. 733 */ 734 int 735 fs_need_estale_retry(int retry_count) 736 { 737 if (retry_count < fs_estale_retry) 738 return (1); 739 else 740 return (0); 741 } 742