1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 23 /* All Rights Reserved */ 24 25 26 /* 27 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 /* 34 * Generic vnode operations. 35 */ 36 #include <sys/types.h> 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/errno.h> 40 #include <sys/fcntl.h> 41 #include <sys/flock.h> 42 #include <sys/statvfs.h> 43 #include <sys/vfs.h> 44 #include <sys/vnode.h> 45 #include <sys/proc.h> 46 #include <sys/user.h> 47 #include <sys/unistd.h> 48 #include <sys/cred.h> 49 #include <sys/poll.h> 50 #include <sys/debug.h> 51 #include <sys/cmn_err.h> 52 #include <sys/stream.h> 53 #include <fs/fs_subr.h> 54 #include <sys/acl.h> 55 #include <sys/share.h> 56 #include <sys/file.h> 57 #include <sys/kmem.h> 58 #include <sys/file.h> 59 #include <sys/nbmlock.h> 60 #include <acl/acl_common.h> 61 62 static callb_cpr_t *frlock_serialize_blocked(flk_cb_when_t, void *); 63 64 /* 65 * The associated operation is not supported by the file system. 66 */ 67 int 68 fs_nosys() 69 { 70 return (ENOSYS); 71 } 72 73 /* 74 * The associated operation is invalid (on this vnode). 75 */ 76 int 77 fs_inval() 78 { 79 return (EINVAL); 80 } 81 82 /* 83 * The associated operation is valid only for directories. 84 */ 85 int 86 fs_notdir() 87 { 88 return (ENOTDIR); 89 } 90 91 /* 92 * Free the file system specific resources. For the file systems that 93 * do not support the forced unmount, it will be a nop function. 94 */ 95 96 /*ARGSUSED*/ 97 void 98 fs_freevfs(vfs_t *vfsp) 99 { 100 } 101 102 /* ARGSUSED */ 103 int 104 fs_nosys_map(struct vnode *vp, 105 offset_t off, 106 struct as *as, 107 caddr_t *addrp, 108 size_t len, 109 uchar_t prot, 110 uchar_t maxprot, 111 uint_t flags, 112 struct cred *cr) 113 { 114 return (ENOSYS); 115 } 116 117 /* ARGSUSED */ 118 int 119 fs_nosys_addmap(struct vnode *vp, 120 offset_t off, 121 struct as *as, 122 caddr_t addr, 123 size_t len, 124 uchar_t prot, 125 uchar_t maxprot, 126 uint_t flags, 127 struct cred *cr) 128 { 129 return (ENOSYS); 130 } 131 132 /* ARGSUSED */ 133 int 134 fs_nosys_poll(vnode_t *vp, 135 register short events, 136 int anyyet, 137 register short *reventsp, 138 struct pollhead **phpp) 139 { 140 return (ENOSYS); 141 } 142 143 144 /* 145 * The file system has nothing to sync to disk. However, the 146 * VFS_SYNC operation must not fail. 147 */ 148 /* ARGSUSED */ 149 int 150 fs_sync(struct vfs *vfspp, short flag, cred_t *cr) 151 { 152 return (0); 153 } 154 155 /* 156 * Read/write lock/unlock. Does nothing. 157 */ 158 /* ARGSUSED */ 159 int 160 fs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp) 161 { 162 return (-1); 163 } 164 165 /* ARGSUSED */ 166 void 167 fs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp) 168 { 169 } 170 171 /* 172 * Compare two vnodes. 173 */ 174 int 175 fs_cmp(vnode_t *vp1, vnode_t *vp2) 176 { 177 return (vp1 == vp2); 178 } 179 180 /* 181 * No-op seek operation. 182 */ 183 /* ARGSUSED */ 184 int 185 fs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp) 186 { 187 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 188 } 189 190 /* 191 * File and record locking. 192 */ 193 /* ARGSUSED */ 194 int 195 fs_frlock(register vnode_t *vp, int cmd, struct flock64 *bfp, int flag, 196 offset_t offset, flk_callback_t *flk_cbp, cred_t *cr) 197 { 198 int frcmd; 199 int nlmid; 200 int error = 0; 201 flk_callback_t serialize_callback; 202 int serialize = 0; 203 204 switch (cmd) { 205 206 case F_GETLK: 207 case F_O_GETLK: 208 if (flag & F_REMOTELOCK) { 209 frcmd = RCMDLCK; 210 break; 211 } 212 if (flag & F_PXFSLOCK) { 213 frcmd = PCMDLCK; 214 break; 215 } 216 bfp->l_pid = ttoproc(curthread)->p_pid; 217 bfp->l_sysid = 0; 218 frcmd = 0; 219 break; 220 221 case F_SETLK_NBMAND: 222 /* 223 * Are NBMAND locks allowed on this file? 224 */ 225 if (!vp->v_vfsp || 226 !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) { 227 error = EINVAL; 228 goto done; 229 } 230 if (vp->v_type != VREG) { 231 error = EINVAL; 232 goto done; 233 } 234 /*FALLTHROUGH*/ 235 236 case F_SETLK: 237 /* 238 * Check whether there is an NBMAND share reservation that 239 * conflicts with the lock request. 240 */ 241 if (nbl_need_check(vp)) { 242 nbl_start_crit(vp, RW_WRITER); 243 serialize = 1; 244 if (share_blocks_lock(vp, bfp)) { 245 error = EAGAIN; 246 goto done; 247 } 248 } 249 if (flag & F_REMOTELOCK) { 250 frcmd = SETFLCK|RCMDLCK; 251 break; 252 } 253 if (flag & F_PXFSLOCK) { 254 frcmd = SETFLCK|PCMDLCK; 255 break; 256 } 257 bfp->l_pid = ttoproc(curthread)->p_pid; 258 bfp->l_sysid = 0; 259 frcmd = SETFLCK; 260 if (cmd == F_SETLK_NBMAND && 261 (bfp->l_type == F_RDLCK || bfp->l_type == F_WRLCK)) { 262 /* would check here for conflict with mapped region */ 263 frcmd |= NBMLCK; 264 } 265 break; 266 267 case F_SETLKW: 268 /* 269 * If there is an NBMAND share reservation that conflicts 270 * with the lock request, block until the conflicting share 271 * reservation goes away. 272 */ 273 if (nbl_need_check(vp)) { 274 nbl_start_crit(vp, RW_WRITER); 275 serialize = 1; 276 if (share_blocks_lock(vp, bfp)) { 277 error = wait_for_share(vp, bfp); 278 if (error != 0) 279 goto done; 280 } 281 } 282 if (flag & F_REMOTELOCK) { 283 frcmd = SETFLCK|SLPFLCK|RCMDLCK; 284 break; 285 } 286 if (flag & F_PXFSLOCK) { 287 frcmd = SETFLCK|SLPFLCK|PCMDLCK; 288 break; 289 } 290 bfp->l_pid = ttoproc(curthread)->p_pid; 291 bfp->l_sysid = 0; 292 frcmd = SETFLCK|SLPFLCK; 293 break; 294 295 case F_HASREMOTELOCKS: 296 nlmid = GETNLMID(bfp->l_sysid); 297 if (nlmid != 0) { /* booted as a cluster */ 298 l_has_rmt(bfp) = 299 cl_flk_has_remote_locks_for_nlmid(vp, nlmid); 300 } else { /* not booted as a cluster */ 301 l_has_rmt(bfp) = flk_has_remote_locks(vp); 302 } 303 304 goto done; 305 306 default: 307 error = EINVAL; 308 goto done; 309 } 310 311 /* 312 * If this is a blocking lock request and we're serializing lock 313 * requests, modify the callback list to leave the critical region 314 * while we're waiting for the lock. 315 */ 316 317 if (serialize && (frcmd & SLPFLCK) != 0) { 318 flk_add_callback(&serialize_callback, 319 frlock_serialize_blocked, vp, flk_cbp); 320 flk_cbp = &serialize_callback; 321 } 322 323 error = reclock(vp, bfp, frcmd, flag, offset, flk_cbp); 324 325 done: 326 if (serialize) 327 nbl_end_crit(vp); 328 329 return (error); 330 } 331 332 /* 333 * Callback when a lock request blocks and we are serializing requests. If 334 * before sleeping, leave the critical region. If after wakeup, reenter 335 * the critical region. 336 */ 337 338 static callb_cpr_t * 339 frlock_serialize_blocked(flk_cb_when_t when, void *infop) 340 { 341 vnode_t *vp = (vnode_t *)infop; 342 343 if (when == FLK_BEFORE_SLEEP) 344 nbl_end_crit(vp); 345 else { 346 nbl_start_crit(vp, RW_WRITER); 347 } 348 349 return (NULL); 350 } 351 352 /* 353 * Allow any flags. 354 */ 355 /* ARGSUSED */ 356 int 357 fs_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr) 358 { 359 return (0); 360 } 361 362 /* 363 * Return the answer requested to poll() for non-device files. 364 * Only POLLIN, POLLRDNORM, and POLLOUT are recognized. 365 */ 366 struct pollhead fs_pollhd; 367 368 /* ARGSUSED */ 369 int 370 fs_poll(vnode_t *vp, 371 register short events, 372 int anyyet, 373 register short *reventsp, 374 struct pollhead **phpp) 375 { 376 *reventsp = 0; 377 if (events & POLLIN) 378 *reventsp |= POLLIN; 379 if (events & POLLRDNORM) 380 *reventsp |= POLLRDNORM; 381 if (events & POLLRDBAND) 382 *reventsp |= POLLRDBAND; 383 if (events & POLLOUT) 384 *reventsp |= POLLOUT; 385 if (events & POLLWRBAND) 386 *reventsp |= POLLWRBAND; 387 *phpp = !anyyet && !*reventsp ? &fs_pollhd : (struct pollhead *)NULL; 388 return (0); 389 } 390 391 /* 392 * POSIX pathconf() support. 393 */ 394 /* ARGSUSED */ 395 int 396 fs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr) 397 { 398 register ulong_t val; 399 register int error = 0; 400 struct statvfs64 vfsbuf; 401 402 switch (cmd) { 403 404 case _PC_LINK_MAX: 405 val = MAXLINK; 406 break; 407 408 case _PC_MAX_CANON: 409 val = MAX_CANON; 410 break; 411 412 case _PC_MAX_INPUT: 413 val = MAX_INPUT; 414 break; 415 416 case _PC_NAME_MAX: 417 bzero(&vfsbuf, sizeof (vfsbuf)); 418 if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf)) 419 break; 420 val = vfsbuf.f_namemax; 421 break; 422 423 case _PC_PATH_MAX: 424 case _PC_SYMLINK_MAX: 425 val = MAXPATHLEN; 426 break; 427 428 case _PC_PIPE_BUF: 429 val = PIPE_BUF; 430 break; 431 432 case _PC_NO_TRUNC: 433 if (vp->v_vfsp->vfs_flag & VFS_NOTRUNC) 434 val = 1; /* NOTRUNC is enabled for vp */ 435 else 436 val = (ulong_t)-1; 437 break; 438 439 case _PC_VDISABLE: 440 val = _POSIX_VDISABLE; 441 break; 442 443 case _PC_CHOWN_RESTRICTED: 444 if (rstchown) 445 val = rstchown; /* chown restricted enabled */ 446 else 447 val = (ulong_t)-1; 448 break; 449 450 case _PC_FILESIZEBITS: 451 452 /* 453 * If ever we come here it means that underlying file system 454 * does not recognise the command and therefore this 455 * configurable limit cannot be determined. We return -1 456 * and don't change errno. 457 */ 458 459 val = (ulong_t)-1; /* large file support */ 460 break; 461 462 case _PC_ACL_ENABLED: 463 val = 0; 464 break; 465 466 default: 467 error = EINVAL; 468 break; 469 } 470 471 if (error == 0) 472 *valp = val; 473 return (error); 474 } 475 476 /* 477 * Dispose of a page. 478 */ 479 /* ARGSUSED */ 480 void 481 fs_dispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr) 482 { 483 484 ASSERT(fl == B_FREE || fl == B_INVAL); 485 486 if (fl == B_FREE) 487 page_free(pp, dn); 488 else 489 page_destroy(pp, dn); 490 } 491 492 /* ARGSUSED */ 493 void 494 fs_nodispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr) 495 { 496 cmn_err(CE_PANIC, "fs_nodispose invoked"); 497 } 498 499 /* 500 * fabricate acls for file systems that do not support acls. 501 */ 502 /* ARGSUSED */ 503 int 504 fs_fab_acl(vp, vsecattr, flag, cr) 505 vnode_t *vp; 506 vsecattr_t *vsecattr; 507 int flag; 508 cred_t *cr; 509 { 510 aclent_t *aclentp; 511 struct vattr vattr; 512 int error; 513 514 vsecattr->vsa_aclcnt = 0; 515 vsecattr->vsa_aclentp = NULL; 516 vsecattr->vsa_dfaclcnt = 0; /* Default ACLs are not fabricated */ 517 vsecattr->vsa_dfaclentp = NULL; 518 519 if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) 520 vsecattr->vsa_aclcnt = 4; /* USER, GROUP, OTHER, and CLASS */ 521 522 if (vsecattr->vsa_mask & VSA_ACL) { 523 vsecattr->vsa_aclentp = kmem_zalloc(4 * sizeof (aclent_t), 524 KM_SLEEP); 525 vattr.va_mask = AT_MODE | AT_UID | AT_GID; 526 if (error = VOP_GETATTR(vp, &vattr, 0, CRED())) 527 return (error); 528 aclentp = vsecattr->vsa_aclentp; 529 530 aclentp->a_type = USER_OBJ; /* Owner */ 531 aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0700)) >> 6; 532 aclentp->a_id = vattr.va_uid; /* Really undefined */ 533 aclentp++; 534 535 aclentp->a_type = GROUP_OBJ; /* Group */ 536 aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0070)) >> 3; 537 aclentp->a_id = vattr.va_gid; /* Really undefined */ 538 aclentp++; 539 540 aclentp->a_type = OTHER_OBJ; /* Other */ 541 aclentp->a_perm = vattr.va_mode & 0007; 542 aclentp->a_id = -1; /* Really undefined */ 543 aclentp++; 544 545 aclentp->a_type = CLASS_OBJ; /* Class */ 546 aclentp->a_perm = (ushort_t)(0777); 547 aclentp->a_id = -1; /* Really undefined */ 548 } 549 550 return (0); 551 } 552 553 /* 554 * Common code for implementing DOS share reservations 555 */ 556 /* ARGSUSED4 */ 557 int 558 fs_shrlock(struct vnode *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr) 559 { 560 int error; 561 562 /* 563 * Make sure that the file was opened with permissions appropriate 564 * for the request, and make sure the caller isn't trying to sneak 565 * in an NBMAND request. 566 */ 567 if (cmd == F_SHARE) { 568 if (((shr->s_access & F_RDACC) && (flag & FREAD) == 0) || 569 ((shr->s_access & F_WRACC) && (flag & FWRITE) == 0)) 570 return (EBADF); 571 if (shr->s_deny & F_MANDDNY) 572 return (EINVAL); 573 } 574 if (cmd == F_SHARE_NBMAND) { 575 /* must have write permission to deny read access */ 576 if ((shr->s_deny & F_RDDNY) && (flag & FWRITE) == 0) 577 return (EBADF); 578 /* make sure nbmand is allowed on the file */ 579 if (!vp->v_vfsp || 580 !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) { 581 return (EINVAL); 582 } 583 if (vp->v_type != VREG) { 584 return (EINVAL); 585 } 586 } 587 588 nbl_start_crit(vp, RW_WRITER); 589 590 switch (cmd) { 591 592 case F_SHARE_NBMAND: 593 shr->s_deny |= F_MANDDNY; 594 /*FALLTHROUGH*/ 595 case F_SHARE: 596 error = add_share(vp, shr); 597 break; 598 599 case F_UNSHARE: 600 error = del_share(vp, shr); 601 break; 602 603 case F_HASREMOTELOCKS: 604 /* 605 * We are overloading this command to refer to remote 606 * shares as well as remote locks, despite its name. 607 */ 608 shr->s_access = shr_has_remote_shares(vp, shr->s_sysid); 609 error = 0; 610 break; 611 612 default: 613 error = EINVAL; 614 break; 615 } 616 617 nbl_end_crit(vp); 618 return (error); 619 } 620 621 /*ARGSUSED1*/ 622 int 623 fs_vnevent_nosupport(vnode_t *vp, vnevent_t vnevent) 624 { 625 ASSERT(vp != NULL); 626 return (ENOTSUP); 627 } 628 629 /*ARGSUSED1*/ 630 int 631 fs_vnevent_support(vnode_t *vp, vnevent_t vnevent) 632 { 633 ASSERT(vp != NULL); 634 return (0); 635 } 636 637 /* 638 * return 1 for non-trivial ACL. 639 * 640 * NB: It is not necessary for the caller to VOP_RWLOCK since 641 * we only issue VOP_GETSECATTR. 642 * 643 * Returns 0 == trivial 644 * 1 == NOT Trivial 645 * <0 could not determine. 646 */ 647 int 648 fs_acl_nontrivial(vnode_t *vp, cred_t *cr) 649 { 650 ulong_t acl_styles; 651 ulong_t acl_flavor; 652 vsecattr_t vsecattr; 653 int error; 654 int isnontrivial; 655 656 /* determine the forms of ACLs maintained */ 657 error = VOP_PATHCONF(vp, _PC_ACL_ENABLED, &acl_styles, cr); 658 659 /* clear bits we don't understand and establish default acl_style */ 660 acl_styles &= (_ACL_ACLENT_ENABLED | _ACL_ACE_ENABLED); 661 if (error || (acl_styles == 0)) 662 acl_styles = _ACL_ACLENT_ENABLED; 663 664 vsecattr.vsa_aclentp = NULL; 665 vsecattr.vsa_dfaclentp = NULL; 666 vsecattr.vsa_aclcnt = 0; 667 vsecattr.vsa_dfaclcnt = 0; 668 669 while (acl_styles) { 670 /* select one of the styles as current flavor */ 671 acl_flavor = 0; 672 if (acl_styles & _ACL_ACLENT_ENABLED) { 673 acl_flavor = _ACL_ACLENT_ENABLED; 674 vsecattr.vsa_mask = VSA_ACLCNT | VSA_DFACLCNT; 675 } else if (acl_styles & _ACL_ACE_ENABLED) { 676 acl_flavor = _ACL_ACE_ENABLED; 677 vsecattr.vsa_mask = VSA_ACECNT | VSA_ACE; 678 } 679 680 ASSERT(vsecattr.vsa_mask && acl_flavor); 681 error = VOP_GETSECATTR(vp, &vsecattr, 0, cr); 682 if (error == 0) 683 break; 684 685 /* that flavor failed */ 686 acl_styles &= ~acl_flavor; 687 } 688 689 /* if all styles fail then assume trivial */ 690 if (acl_styles == 0) 691 return (0); 692 693 /* process the flavor that worked */ 694 isnontrivial = 0; 695 if (acl_flavor & _ACL_ACLENT_ENABLED) { 696 if (vsecattr.vsa_aclcnt > MIN_ACL_ENTRIES) 697 isnontrivial = 1; 698 if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL) 699 kmem_free(vsecattr.vsa_aclentp, 700 vsecattr.vsa_aclcnt * sizeof (aclent_t)); 701 if (vsecattr.vsa_dfaclcnt && vsecattr.vsa_dfaclentp != NULL) 702 kmem_free(vsecattr.vsa_dfaclentp, 703 vsecattr.vsa_dfaclcnt * sizeof (aclent_t)); 704 } 705 if (acl_flavor & _ACL_ACE_ENABLED) { 706 707 isnontrivial = ace_trivial(vsecattr.vsa_aclentp, 708 vsecattr.vsa_aclcnt); 709 710 if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL) 711 kmem_free(vsecattr.vsa_aclentp, 712 vsecattr.vsa_aclcnt * sizeof (ace_t)); 713 /* ACE has no vsecattr.vsa_dfaclcnt */ 714 } 715 return (isnontrivial); 716 } 717