1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/sysmacros.h> 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/fcntl.h> 33 #include <sys/vfs.h> 34 #include <sys/vnode.h> 35 #include <sys/share.h> 36 #include <sys/cmn_err.h> 37 #include <sys/kmem.h> 38 #include <sys/debug.h> 39 #include <sys/t_lock.h> 40 #include <sys/errno.h> 41 #include <sys/nbmlock.h> 42 43 int share_debug = 0; 44 45 #ifdef DEBUG 46 static void print_shares(struct vnode *); 47 static void print_share(struct shrlock *); 48 #endif 49 50 static int isreadonly(struct vnode *); 51 static int lock_blocks_share(struct vnode *, struct shrlock *); 52 53 /* 54 * Add the share reservation shr to vp. 55 */ 56 int 57 add_share(struct vnode *vp, struct shrlock *shr) 58 { 59 struct shrlocklist *shrl; 60 61 /* 62 * An access of zero is not legal, however some older clients 63 * generate it anyways. Allow the request only if it is 64 * coming from a remote system. Be generous in what you 65 * accept and strict in what you send. 66 */ 67 if ((shr->s_access == 0) && (GETSYSID(shr->s_sysid) == 0)) { 68 return (EINVAL); 69 } 70 71 /* 72 * Sanity check to make sure we have valid options. 73 * There is known overlap but it doesn't hurt to be careful. 74 */ 75 if (shr->s_access & ~(F_RDACC|F_WRACC|F_RWACC|F_RMACC|F_MDACC)) { 76 return (EINVAL); 77 } 78 if (shr->s_deny & ~(F_NODNY|F_RDDNY|F_WRDNY|F_RWDNY|F_COMPAT| 79 F_MANDDNY|F_RMDNY)) { 80 return (EINVAL); 81 } 82 83 /* 84 * If the caller wants non-blocking mandatory semantics, make sure 85 * that there isn't already a conflicting lock. 86 */ 87 if (shr->s_deny & F_MANDDNY) { 88 ASSERT(nbl_in_crit(vp)); 89 if (lock_blocks_share(vp, shr)) { 90 return (EAGAIN); 91 } 92 } 93 94 mutex_enter(&vp->v_lock); 95 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) { 96 /* 97 * If the share owner matches previous request 98 * do special handling. 99 */ 100 if ((shrl->shr->s_sysid == shr->s_sysid) && 101 (shrl->shr->s_pid == shr->s_pid) && 102 (shrl->shr->s_own_len == shr->s_own_len) && 103 bcmp(shrl->shr->s_owner, shr->s_owner, 104 shr->s_own_len) == 0) { 105 106 /* 107 * If the existing request is F_COMPAT and 108 * is the first share then allow any F_COMPAT 109 * from the same process. Trick: If the existing 110 * F_COMPAT is write access then it must have 111 * the same owner as the first. 112 */ 113 if ((shrl->shr->s_deny & F_COMPAT) && 114 (shr->s_deny & F_COMPAT) && 115 ((shrl->next == NULL) || 116 (shrl->shr->s_access & F_WRACC))) 117 break; 118 } 119 120 /* 121 * If a first share has been done in compatibility mode 122 * handle the special cases. 123 */ 124 if ((shrl->shr->s_deny & F_COMPAT) && (shrl->next == NULL)) { 125 126 if (!(shr->s_deny & F_COMPAT)) { 127 /* 128 * If not compat and want write access or 129 * want to deny read or 130 * write exists, fails 131 */ 132 if ((shr->s_access & F_WRACC) || 133 (shr->s_deny & F_RDDNY) || 134 (shrl->shr->s_access & F_WRACC)) { 135 mutex_exit(&vp->v_lock); 136 return (EAGAIN); 137 } 138 /* 139 * If read only file allow, this may allow 140 * a deny write but that is meaningless on 141 * a read only file. 142 */ 143 if (isreadonly(vp)) 144 break; 145 mutex_exit(&vp->v_lock); 146 return (EAGAIN); 147 } 148 /* 149 * This is a compat request and read access 150 * and the first was also read access 151 * we always allow it, otherwise we reject because 152 * we have handled the only valid write case above. 153 */ 154 if ((shr->s_access == F_RDACC) && 155 (shrl->shr->s_access == F_RDACC)) 156 break; 157 mutex_exit(&vp->v_lock); 158 return (EAGAIN); 159 } 160 161 /* 162 * If we are trying to share in compatibility mode 163 * and the current share is compat (and not the first) 164 * we don't know enough. 165 */ 166 if ((shrl->shr->s_deny & F_COMPAT) && (shr->s_deny & F_COMPAT)) 167 continue; 168 169 /* 170 * If this is a compat we check for what can't succeed. 171 */ 172 if (shr->s_deny & F_COMPAT) { 173 /* 174 * If we want write access or 175 * if anyone is denying read or 176 * if anyone has write access we fail 177 */ 178 if ((shr->s_access & F_WRACC) || 179 (shrl->shr->s_deny & F_RDDNY) || 180 (shrl->shr->s_access & F_WRACC)) { 181 mutex_exit(&vp->v_lock); 182 return (EAGAIN); 183 } 184 /* 185 * If the first was opened with only read access 186 * and is a read only file we allow. 187 */ 188 if (shrl->next == NULL) { 189 if ((shrl->shr->s_access == F_RDACC) && 190 isreadonly(vp)) { 191 break; 192 } 193 mutex_exit(&vp->v_lock); 194 return (EAGAIN); 195 } 196 /* 197 * We still can't determine our fate so continue 198 */ 199 continue; 200 } 201 202 /* 203 * Simple bitwise test, if we are trying to access what 204 * someone else is denying or we are trying to deny 205 * what someone else is accessing we fail. 206 */ 207 if ((shr->s_access & shrl->shr->s_deny) || 208 (shr->s_deny & shrl->shr->s_access)) { 209 mutex_exit(&vp->v_lock); 210 return (EAGAIN); 211 } 212 } 213 214 shrl = kmem_alloc(sizeof (struct shrlocklist), KM_SLEEP); 215 shrl->shr = kmem_alloc(sizeof (struct shrlock), KM_SLEEP); 216 shrl->shr->s_access = shr->s_access; 217 shrl->shr->s_deny = shr->s_deny; 218 219 /* 220 * Make sure no other deny modes are also set with F_COMPAT 221 */ 222 if (shrl->shr->s_deny & F_COMPAT) 223 shrl->shr->s_deny = F_COMPAT; 224 shrl->shr->s_sysid = shr->s_sysid; /* XXX ref cnt? */ 225 shrl->shr->s_pid = shr->s_pid; 226 shrl->shr->s_own_len = shr->s_own_len; 227 shrl->shr->s_owner = kmem_alloc(shr->s_own_len, KM_SLEEP); 228 bcopy(shr->s_owner, shrl->shr->s_owner, shr->s_own_len); 229 shrl->next = vp->v_shrlocks; 230 vp->v_shrlocks = shrl; 231 #ifdef DEBUG 232 if (share_debug) 233 print_shares(vp); 234 #endif 235 236 mutex_exit(&vp->v_lock); 237 238 return (0); 239 } 240 241 /* 242 * nlmid sysid pid 243 * ===== ===== === 244 * !=0 !=0 =0 in cluster; NLM lock 245 * !=0 =0 =0 in cluster; special case for NLM lock 246 * !=0 =0 !=0 in cluster; PXFS local lock 247 * !=0 !=0 !=0 cannot happen 248 * =0 !=0 =0 not in cluster; NLM lock 249 * =0 =0 !=0 not in cluster; local lock 250 * =0 =0 =0 cannot happen 251 * =0 !=0 !=0 cannot happen 252 */ 253 static int 254 is_match_for_del(struct shrlock *shr, struct shrlock *element) 255 { 256 int nlmid1, nlmid2; 257 int result = 0; 258 259 nlmid1 = GETNLMID(shr->s_sysid); 260 nlmid2 = GETNLMID(element->s_sysid); 261 262 if (nlmid1 != 0) { /* in a cluster */ 263 if (GETSYSID(shr->s_sysid) != 0 && shr->s_pid == 0) { 264 /* 265 * Lock obtained through nlm server. Just need to 266 * compare whole sysids. pid will always = 0. 267 */ 268 result = shr->s_sysid == element->s_sysid; 269 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid == 0) { 270 /* 271 * This is a special case. The NLM server wishes to 272 * delete all share locks obtained through nlmid1. 273 */ 274 result = (nlmid1 == nlmid2); 275 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid != 0) { 276 /* 277 * Lock obtained locally through PXFS. Match nlmids 278 * and pids. 279 */ 280 result = (nlmid1 == nlmid2 && 281 shr->s_pid == element->s_pid); 282 } 283 } else { /* not in a cluster */ 284 result = ((shr->s_sysid == 0 && 285 shr->s_pid == element->s_pid) || 286 (shr->s_sysid != 0 && 287 shr->s_sysid == element->s_sysid)); 288 } 289 return (result); 290 } 291 292 /* 293 * Delete the given share reservation. Returns 0 if okay, EINVAL if the 294 * share could not be found. If the share reservation is an NBMAND share 295 * reservation, signal anyone waiting for the share to go away (e.g., 296 * blocking lock requests). 297 */ 298 299 int 300 del_share(struct vnode *vp, struct shrlock *shr) 301 { 302 struct shrlocklist *shrl; 303 struct shrlocklist **shrlp; 304 int found = 0; 305 int is_nbmand = 0; 306 307 mutex_enter(&vp->v_lock); 308 /* 309 * Delete the shares with the matching sysid and owner 310 * But if own_len == 0 and sysid == 0 delete all with matching pid 311 * But if own_len == 0 delete all with matching sysid. 312 */ 313 shrlp = &vp->v_shrlocks; 314 while (*shrlp) { 315 if ((shr->s_own_len == (*shrlp)->shr->s_own_len && 316 (bcmp(shr->s_owner, (*shrlp)->shr->s_owner, 317 shr->s_own_len) == 0)) || 318 319 (shr->s_own_len == 0 && 320 is_match_for_del(shr, (*shrlp)->shr))) { 321 322 shrl = *shrlp; 323 *shrlp = shrl->next; 324 325 if (shrl->shr->s_deny & F_MANDDNY) 326 is_nbmand = 1; 327 328 /* XXX deref sysid */ 329 kmem_free(shrl->shr->s_owner, shrl->shr->s_own_len); 330 kmem_free(shrl->shr, sizeof (struct shrlock)); 331 kmem_free(shrl, sizeof (struct shrlocklist)); 332 found++; 333 continue; 334 } 335 shrlp = &(*shrlp)->next; 336 } 337 338 if (is_nbmand) 339 cv_broadcast(&vp->v_cv); 340 341 mutex_exit(&vp->v_lock); 342 return (found ? 0 : EINVAL); 343 } 344 345 /* 346 * Clean up all local share reservations that the given process has with 347 * the given file. 348 */ 349 void 350 cleanshares(struct vnode *vp, pid_t pid) 351 { 352 struct shrlock shr; 353 354 if (vp->v_shrlocks == NULL) 355 return; 356 357 shr.s_access = 0; 358 shr.s_deny = 0; 359 shr.s_pid = pid; 360 shr.s_sysid = 0; 361 shr.s_own_len = 0; 362 shr.s_owner = NULL; 363 364 (void) del_share(vp, &shr); 365 } 366 367 static int 368 is_match_for_has_remote(int32_t sysid1, int32_t sysid2) 369 { 370 int result = 0; 371 372 if (GETNLMID(sysid1) != 0) { /* in a cluster */ 373 if (GETSYSID(sysid1) != 0) { 374 /* 375 * Lock obtained through nlm server. Just need to 376 * compare whole sysids. 377 */ 378 result = (sysid1 == sysid2); 379 } else if (GETSYSID(sysid1) == 0) { 380 /* 381 * This is a special case. The NLM server identified 382 * by nlmid1 wishes to find out if it has obtained 383 * any share locks on the vnode. 384 */ 385 result = (GETNLMID(sysid1) == GETNLMID(sysid2)); 386 } 387 } else { /* not in a cluster */ 388 result = ((sysid1 != 0 && sysid1 == sysid2) || 389 (sysid1 == 0 && sysid2 != 0)); 390 } 391 return (result); 392 } 393 394 395 /* 396 * Determine whether there are any shares for the given vnode 397 * with a remote sysid. Returns zero if not, non-zero if there are. 398 * If sysid is non-zero then determine if this sysid has a share. 399 * 400 * Note that the return value from this function is potentially invalid 401 * once it has been returned. The caller is responsible for providing its 402 * own synchronization mechanism to ensure that the return value is useful. 403 */ 404 int 405 shr_has_remote_shares(vnode_t *vp, int32_t sysid) 406 { 407 struct shrlocklist *shrl; 408 int result = 0; 409 410 mutex_enter(&vp->v_lock); 411 shrl = vp->v_shrlocks; 412 while (shrl) { 413 if (is_match_for_has_remote(sysid, shrl->shr->s_sysid)) { 414 415 result = 1; 416 break; 417 } 418 shrl = shrl->next; 419 } 420 mutex_exit(&vp->v_lock); 421 return (result); 422 } 423 424 static int 425 isreadonly(struct vnode *vp) 426 { 427 return (vp->v_type != VCHR && vp->v_type != VBLK && 428 vp->v_type != VFIFO && vn_is_readonly(vp)); 429 } 430 431 #ifdef DEBUG 432 static void 433 print_shares(struct vnode *vp) 434 { 435 struct shrlocklist *shrl; 436 437 if (vp->v_shrlocks == NULL) { 438 printf("<NULL>\n"); 439 return; 440 } 441 442 shrl = vp->v_shrlocks; 443 while (shrl) { 444 print_share(shrl->shr); 445 shrl = shrl->next; 446 } 447 } 448 449 static void 450 print_share(struct shrlock *shr) 451 { 452 int i; 453 454 if (shr == NULL) { 455 printf("<NULL>\n"); 456 return; 457 } 458 459 printf(" access(%d): ", shr->s_access); 460 if (shr->s_access & F_RDACC) 461 printf("R"); 462 if (shr->s_access & F_WRACC) 463 printf("W"); 464 if ((shr->s_access & (F_RDACC|F_WRACC)) == 0) 465 printf("N"); 466 printf("\n"); 467 printf(" deny: "); 468 if (shr->s_deny & F_COMPAT) 469 printf("C"); 470 if (shr->s_deny & F_RDDNY) 471 printf("R"); 472 if (shr->s_deny & F_WRDNY) 473 printf("W"); 474 if (shr->s_deny == F_NODNY) 475 printf("N"); 476 printf("\n"); 477 printf(" sysid: %d\n", shr->s_sysid); 478 printf(" pid: %d\n", shr->s_pid); 479 printf(" owner: [%d]", shr->s_own_len); 480 printf("'"); 481 for (i = 0; i < shr->s_own_len; i++) 482 printf("%02x", (unsigned)shr->s_owner[i]); 483 printf("'\n"); 484 } 485 #endif 486 487 /* 488 * Return non-zero if the given I/O request conflicts with a registered 489 * share reservation. 490 * 491 * A process is identified by the tuple (sysid, pid). When the caller 492 * context is passed to nbl_share_conflict, the sysid and pid in the 493 * caller context are used. Otherwise the sysid is zero, and the pid is 494 * taken from the current process. 495 * 496 * Conflict Algorithm: 497 * 1. An op request of NBL_READ will fail if a different 498 * process has a mandatory share reservation with deny read. 499 * 500 * 2. An op request of NBL_WRITE will fail if a different 501 * process has a mandatory share reservation with deny write. 502 * 503 * 3. An op request of NBL_READWRITE will fail if a different 504 * process has a mandatory share reservation with deny read 505 * or deny write. 506 * 507 * 4. An op request of NBL_REMOVE will fail if there is 508 * a mandatory share reservation with an access of read, 509 * write, or remove. (Anything other than meta data access). 510 * 511 * 5. An op request of NBL_RENAME will fail if there is 512 * a mandatory share reservation with: 513 * a) access write or access remove 514 * or 515 * b) access read and deny remove 516 * 517 * Otherwise there is no conflict and the op request succeeds. 518 * 519 * This behavior is required for interoperability between 520 * the nfs server, cifs server, and local access. 521 * This behavior can result in non-posix semantics. 522 * 523 * When mandatory share reservations are enabled, a process 524 * should call nbl_share_conflict to determine if the 525 * desired operation would conflict with an existing share 526 * reservation. 527 * 528 * The call to nbl_share_conflict may be skipped if the 529 * process has an existing share reservation and the operation 530 * is being performed in the context of that existing share 531 * reservation. 532 */ 533 int 534 nbl_share_conflict(vnode_t *vp, nbl_op_t op, caller_context_t *ct) 535 { 536 struct shrlocklist *shrl; 537 int conflict = 0; 538 pid_t pid; 539 int sysid; 540 541 ASSERT(nbl_in_crit(vp)); 542 543 if (ct == NULL) { 544 pid = curproc->p_pid; 545 sysid = 0; 546 } else { 547 pid = ct->cc_pid; 548 sysid = ct->cc_sysid; 549 } 550 551 mutex_enter(&vp->v_lock); 552 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) { 553 if (!(shrl->shr->s_deny & F_MANDDNY)) 554 continue; 555 /* 556 * NBL_READ, NBL_WRITE, and NBL_READWRITE need to 557 * check if the share reservation being examined 558 * belongs to the current process. 559 * NBL_REMOVE and NBL_RENAME do not. 560 * This behavior is required by the conflict 561 * algorithm described above. 562 */ 563 switch (op) { 564 case NBL_READ: 565 if ((shrl->shr->s_deny & F_RDDNY) && 566 (shrl->shr->s_sysid != sysid || 567 shrl->shr->s_pid != pid)) 568 conflict = 1; 569 break; 570 case NBL_WRITE: 571 if ((shrl->shr->s_deny & F_WRDNY) && 572 (shrl->shr->s_sysid != sysid || 573 shrl->shr->s_pid != pid)) 574 conflict = 1; 575 break; 576 case NBL_READWRITE: 577 if ((shrl->shr->s_deny & F_RWDNY) && 578 (shrl->shr->s_sysid != sysid || 579 shrl->shr->s_pid != pid)) 580 conflict = 1; 581 break; 582 case NBL_REMOVE: 583 if (shrl->shr->s_access & (F_RWACC|F_RMACC)) 584 conflict = 1; 585 break; 586 case NBL_RENAME: 587 if (shrl->shr->s_access & (F_WRACC|F_RMACC)) 588 conflict = 1; 589 590 else if ((shrl->shr->s_access & F_RDACC) && 591 (shrl->shr->s_deny & F_RMDNY)) 592 conflict = 1; 593 break; 594 #ifdef DEBUG 595 default: 596 cmn_err(CE_PANIC, 597 "nbl_share_conflict: bogus op (%d)", 598 op); 599 break; 600 #endif 601 } 602 if (conflict) 603 break; 604 } 605 606 mutex_exit(&vp->v_lock); 607 return (conflict); 608 } 609 610 /* 611 * Return non-zero if the given lock request conflicts with an existing 612 * non-blocking mandatory share reservation. 613 */ 614 615 int 616 share_blocks_lock(vnode_t *vp, flock64_t *flkp) 617 { 618 caller_context_t ct; 619 620 ASSERT(nbl_in_crit(vp)); 621 622 ct.cc_pid = flkp->l_pid; 623 ct.cc_sysid = flkp->l_sysid; 624 ct.cc_caller_id = 0; 625 626 if ((flkp->l_type == F_RDLCK || flkp->l_type == F_WRLCK) && 627 nbl_share_conflict(vp, nbl_lock_to_op(flkp->l_type), &ct)) 628 return (1); 629 else 630 return (0); 631 } 632 633 /* 634 * Wait for all share reservations to go away that block the given lock 635 * request. Returns 0 after successfully waiting, or EINTR. 636 */ 637 638 int 639 wait_for_share(vnode_t *vp, flock64_t *flkp) 640 { 641 int result = 0; 642 643 ASSERT(nbl_in_crit(vp)); 644 645 /* 646 * We have to hold the vnode's lock before leaving the nbmand 647 * critical region, to prevent a race with the thread that deletes 648 * the share that's blocking us. Then we have to drop the lock 649 * before reentering the critical region, to avoid a deadlock. 650 */ 651 while (result == 0 && share_blocks_lock(vp, flkp)) { 652 mutex_enter(&vp->v_lock); 653 nbl_end_crit(vp); 654 if (cv_wait_sig(&vp->v_cv, &vp->v_lock) == 0) 655 result = EINTR; 656 mutex_exit(&vp->v_lock); 657 nbl_start_crit(vp, RW_WRITER); 658 } 659 660 return (result); 661 } 662 663 /* 664 * Determine if the given share reservation conflicts with any existing 665 * locks or mapped regions for the file. This is used to compensate for 666 * the fact that most Unix applications don't get a share reservation, so 667 * we use existing locks as an indication of what files are open. 668 * 669 * XXX needs a better name to reflect that it also looks for mapped file 670 * conflicts. 671 * 672 * Returns non-zero if there is a conflict, zero if okay. 673 */ 674 675 static int 676 lock_blocks_share(vnode_t *vp, struct shrlock *shr) 677 { 678 struct flock64 lck; 679 int error; 680 v_mode_t mode = 0; 681 682 if ((shr->s_deny & (F_RWDNY|F_COMPAT)) == 0) { 683 /* if no deny mode, then there's no conflict */ 684 return (0); 685 } 686 687 /* check for conflict with mapped region */ 688 if ((shr->s_deny & F_RWDNY) == F_WRDNY) { 689 mode = V_WRITE; 690 } else if ((shr->s_deny & F_RWDNY) == F_RDDNY) { 691 mode = V_READ; 692 } else { 693 mode = V_RDORWR; 694 } 695 if (vn_is_mapped(vp, mode)) 696 return (1); 697 698 lck.l_type = ((shr->s_deny & F_RDDNY) ? F_WRLCK : F_RDLCK); 699 lck.l_whence = 0; 700 lck.l_start = 0; 701 lck.l_len = 0; /* to EOF */ 702 703 /* XXX should use non-NULL cred? */ 704 error = VOP_FRLOCK(vp, F_GETLK, &lck, 0, 0, NULL, NULL, NULL); 705 if (error != 0) { 706 cmn_err(CE_WARN, "lock_blocks_share: unexpected error (%d)", 707 error); 708 return (1); 709 } 710 711 return (lck.l_type == F_UNLCK ? 0 : 1); 712 } 713 714 /* 715 * Determine if the given process has a NBMAND share reservation on the 716 * given vnode. Returns 1 if the process has such a share reservation, 717 * returns 0 otherwise. 718 */ 719 int 720 proc_has_nbmand_share_on_vp(vnode_t *vp, pid_t pid) 721 { 722 struct shrlocklist *shrl; 723 724 /* 725 * Any NBMAND share reservation on the vp for this process? 726 */ 727 mutex_enter(&vp->v_lock); 728 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) { 729 if (shrl->shr->s_sysid == 0 && 730 (shrl->shr->s_deny & F_MANDDNY) && 731 (shrl->shr->s_pid == pid)) { 732 mutex_exit(&vp->v_lock); 733 return (1); 734 } 735 } 736 mutex_exit(&vp->v_lock); 737 738 return (0); 739 } 740