1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 1996-1998,2001,2003 Sun Microsystems, Inc. 24 * All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/types.h> 31 #include <sys/sysmacros.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/fcntl.h> 35 #include <sys/vfs.h> 36 #include <sys/vnode.h> 37 #include <sys/share.h> 38 #include <sys/cmn_err.h> 39 #include <sys/kmem.h> 40 #include <sys/debug.h> 41 #include <sys/t_lock.h> 42 #include <sys/errno.h> 43 #include <sys/nbmlock.h> 44 45 int share_debug = 0; 46 47 #ifdef DEBUG 48 static void print_shares(struct vnode *); 49 static void print_share(struct shrlock *); 50 #endif 51 52 static int isreadonly(struct vnode *); 53 static int lock_blocks_share(struct vnode *, struct shrlock *); 54 55 /* 56 * Add the share reservation shr to vp. 57 */ 58 int 59 add_share(struct vnode *vp, struct shrlock *shr) 60 { 61 struct shrlocklist *shrl; 62 63 /* 64 * An access of zero is not legal, however some older clients 65 * generate it anyways. Allow the request only if it is 66 * coming from a remote system. Be generous in what you 67 * accept and strict in what you send. 68 */ 69 if ((shr->s_access == 0) && (GETSYSID(shr->s_sysid) == 0)) { 70 return (EINVAL); 71 } 72 73 /* 74 * Sanity check to make sure we have valid options. 75 * There is known overlap but it doesn't hurt to be careful. 76 */ 77 if (shr->s_access & ~(F_RDACC|F_WRACC|F_RWACC)) { 78 return (EINVAL); 79 } 80 if (shr->s_deny & ~(F_NODNY|F_RDDNY|F_WRDNY|F_RWDNY|F_COMPAT| 81 F_MANDDNY)) { 82 return (EINVAL); 83 } 84 85 /* 86 * If the caller wants non-blocking mandatory semantics, make sure 87 * that there isn't already a conflicting lock. 88 */ 89 if (shr->s_deny & F_MANDDNY) { 90 ASSERT(nbl_in_crit(vp)); 91 if (lock_blocks_share(vp, shr)) { 92 return (EAGAIN); 93 } 94 } 95 96 mutex_enter(&vp->v_lock); 97 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) { 98 /* 99 * If the share owner matches previous request 100 * do special handling. 101 */ 102 if ((shrl->shr->s_sysid == shr->s_sysid) && 103 (shrl->shr->s_pid == shr->s_pid) && 104 (shrl->shr->s_own_len == shr->s_own_len) && 105 bcmp(shrl->shr->s_owner, shr->s_owner, 106 shr->s_own_len) == 0) { 107 108 /* 109 * If the existing request is F_COMPAT and 110 * is the first share then allow any F_COMPAT 111 * from the same process. Trick: If the existing 112 * F_COMPAT is write access then it must have 113 * the same owner as the first. 114 */ 115 if ((shrl->shr->s_deny & F_COMPAT) && 116 (shr->s_deny & F_COMPAT) && 117 ((shrl->next == NULL) || 118 (shrl->shr->s_access & F_WRACC))) 119 break; 120 } 121 122 /* 123 * If a first share has been done in compatibility mode 124 * handle the special cases. 125 */ 126 if ((shrl->shr->s_deny & F_COMPAT) && (shrl->next == NULL)) { 127 128 if (!(shr->s_deny & F_COMPAT)) { 129 /* 130 * If not compat and want write access or 131 * want to deny read or 132 * write exists, fails 133 */ 134 if ((shr->s_access & F_WRACC) || 135 (shr->s_deny & F_RDDNY) || 136 (shrl->shr->s_access & F_WRACC)) { 137 mutex_exit(&vp->v_lock); 138 return (EAGAIN); 139 } 140 /* 141 * If read only file allow, this may allow 142 * a deny write but that is meaningless on 143 * a read only file. 144 */ 145 if (isreadonly(vp)) 146 break; 147 mutex_exit(&vp->v_lock); 148 return (EAGAIN); 149 } 150 /* 151 * This is a compat request and read access 152 * and the first was also read access 153 * we always allow it, otherwise we reject because 154 * we have handled the only valid write case above. 155 */ 156 if ((shr->s_access == F_RDACC) && 157 (shrl->shr->s_access == F_RDACC)) 158 break; 159 mutex_exit(&vp->v_lock); 160 return (EAGAIN); 161 } 162 163 /* 164 * If we are trying to share in compatibility mode 165 * and the current share is compat (and not the first) 166 * we don't know enough. 167 */ 168 if ((shrl->shr->s_deny & F_COMPAT) && (shr->s_deny & F_COMPAT)) 169 continue; 170 171 /* 172 * If this is a compat we check for what can't succeed. 173 */ 174 if (shr->s_deny & F_COMPAT) { 175 /* 176 * If we want write access or 177 * if anyone is denying read or 178 * if anyone has write access we fail 179 */ 180 if ((shr->s_access & F_WRACC) || 181 (shrl->shr->s_deny & F_RDDNY) || 182 (shrl->shr->s_access & F_WRACC)) { 183 mutex_exit(&vp->v_lock); 184 return (EAGAIN); 185 } 186 /* 187 * If the first was opened with only read access 188 * and is a read only file we allow. 189 */ 190 if (shrl->next == NULL) { 191 if ((shrl->shr->s_access == F_RDACC) && 192 isreadonly(vp)) { 193 break; 194 } 195 mutex_exit(&vp->v_lock); 196 return (EAGAIN); 197 } 198 /* 199 * We still can't determine our fate so continue 200 */ 201 continue; 202 } 203 204 /* 205 * Simple bitwise test, if we are trying to access what 206 * someone else is denying or we are trying to deny 207 * what someone else is accessing we fail. 208 */ 209 if ((shr->s_access & shrl->shr->s_deny) || 210 (shr->s_deny & shrl->shr->s_access)) { 211 mutex_exit(&vp->v_lock); 212 return (EAGAIN); 213 } 214 } 215 216 shrl = kmem_alloc(sizeof (struct shrlocklist), KM_SLEEP); 217 shrl->shr = kmem_alloc(sizeof (struct shrlock), KM_SLEEP); 218 shrl->shr->s_access = shr->s_access; 219 shrl->shr->s_deny = shr->s_deny; 220 221 /* 222 * Make sure no other deny modes are also set with F_COMPAT 223 */ 224 if (shrl->shr->s_deny & F_COMPAT) 225 shrl->shr->s_deny = F_COMPAT; 226 shrl->shr->s_sysid = shr->s_sysid; /* XXX ref cnt? */ 227 shrl->shr->s_pid = shr->s_pid; 228 shrl->shr->s_own_len = shr->s_own_len; 229 shrl->shr->s_owner = kmem_alloc(shr->s_own_len, KM_SLEEP); 230 bcopy(shr->s_owner, shrl->shr->s_owner, shr->s_own_len); 231 shrl->next = vp->v_shrlocks; 232 vp->v_shrlocks = shrl; 233 #ifdef DEBUG 234 if (share_debug) 235 print_shares(vp); 236 #endif 237 238 mutex_exit(&vp->v_lock); 239 240 return (0); 241 } 242 243 /* 244 * nlmid sysid pid 245 * ===== ===== === 246 * !=0 !=0 =0 in cluster; NLM lock 247 * !=0 =0 =0 in cluster; special case for NLM lock 248 * !=0 =0 !=0 in cluster; PXFS local lock 249 * !=0 !=0 !=0 cannot happen 250 * =0 !=0 =0 not in cluster; NLM lock 251 * =0 =0 !=0 not in cluster; local lock 252 * =0 =0 =0 cannot happen 253 * =0 !=0 !=0 cannot happen 254 */ 255 static int 256 is_match_for_del(struct shrlock *shr, struct shrlock *element) 257 { 258 int nlmid1, nlmid2; 259 int result = 0; 260 261 nlmid1 = GETNLMID(shr->s_sysid); 262 nlmid2 = GETNLMID(element->s_sysid); 263 264 if (nlmid1 != 0) { /* in a cluster */ 265 if (GETSYSID(shr->s_sysid) != 0 && shr->s_pid == 0) { 266 /* 267 * Lock obtained through nlm server. Just need to 268 * compare whole sysids. pid will always = 0. 269 */ 270 result = shr->s_sysid == element->s_sysid; 271 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid == 0) { 272 /* 273 * This is a special case. The NLM server wishes to 274 * delete all share locks obtained through nlmid1. 275 */ 276 result = (nlmid1 == nlmid2); 277 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid != 0) { 278 /* 279 * Lock obtained locally through PXFS. Match nlmids 280 * and pids. 281 */ 282 result = (nlmid1 == nlmid2 && 283 shr->s_pid == element->s_pid); 284 } 285 } else { /* not in a cluster */ 286 result = ((shr->s_sysid == 0 && 287 shr->s_pid == element->s_pid) || 288 (shr->s_sysid != 0 && 289 shr->s_sysid == element->s_sysid)); 290 } 291 return (result); 292 } 293 294 /* 295 * Delete the given share reservation. Returns 0 if okay, EINVAL if the 296 * share could not be found. If the share reservation is an NBMAND share 297 * reservation, signal anyone waiting for the share to go away (e.g., 298 * blocking lock requests). 299 */ 300 301 int 302 del_share(struct vnode *vp, struct shrlock *shr) 303 { 304 struct shrlocklist *shrl; 305 struct shrlocklist **shrlp; 306 int found = 0; 307 int is_nbmand = 0; 308 309 mutex_enter(&vp->v_lock); 310 /* 311 * Delete the shares with the matching sysid and owner 312 * But if own_len == 0 and sysid == 0 delete all with matching pid 313 * But if own_len == 0 delete all with matching sysid. 314 */ 315 shrlp = &vp->v_shrlocks; 316 while (*shrlp) { 317 if ((shr->s_own_len == (*shrlp)->shr->s_own_len && 318 (bcmp(shr->s_owner, (*shrlp)->shr->s_owner, 319 shr->s_own_len) == 0)) || 320 321 (shr->s_own_len == 0 && 322 is_match_for_del(shr, (*shrlp)->shr))) { 323 324 shrl = *shrlp; 325 *shrlp = shrl->next; 326 327 if (shrl->shr->s_deny & F_MANDDNY) 328 is_nbmand = 1; 329 330 /* XXX deref sysid */ 331 kmem_free(shrl->shr->s_owner, shrl->shr->s_own_len); 332 kmem_free(shrl->shr, sizeof (struct shrlock)); 333 kmem_free(shrl, sizeof (struct shrlocklist)); 334 found++; 335 continue; 336 } 337 shrlp = &(*shrlp)->next; 338 } 339 340 if (is_nbmand) 341 cv_broadcast(&vp->v_cv); 342 343 mutex_exit(&vp->v_lock); 344 return (found ? 0 : EINVAL); 345 } 346 347 /* 348 * Clean up all local share reservations that the given process has with 349 * the given file. 350 */ 351 void 352 cleanshares(struct vnode *vp, pid_t pid) 353 { 354 struct shrlock shr; 355 356 if (vp->v_shrlocks == NULL) 357 return; 358 359 shr.s_access = 0; 360 shr.s_deny = 0; 361 shr.s_pid = pid; 362 shr.s_sysid = 0; 363 shr.s_own_len = 0; 364 shr.s_owner = NULL; 365 366 (void) del_share(vp, &shr); 367 } 368 369 static int 370 is_match_for_has_remote(int32_t sysid1, int32_t sysid2) 371 { 372 int result = 0; 373 374 if (GETNLMID(sysid1) != 0) { /* in a cluster */ 375 if (GETSYSID(sysid1) != 0) { 376 /* 377 * Lock obtained through nlm server. Just need to 378 * compare whole sysids. 379 */ 380 result = (sysid1 == sysid2); 381 } else if (GETSYSID(sysid1) == 0) { 382 /* 383 * This is a special case. The NLM server identified 384 * by nlmid1 wishes to find out if it has obtained 385 * any share locks on the vnode. 386 */ 387 result = (GETNLMID(sysid1) == GETNLMID(sysid2)); 388 } 389 } else { /* not in a cluster */ 390 result = ((sysid1 != 0 && sysid1 == sysid2) || 391 (sysid1 == 0 && sysid2 != 0)); 392 } 393 return (result); 394 } 395 396 397 /* 398 * Determine whether there are any shares for the given vnode 399 * with a remote sysid. Returns zero if not, non-zero if there are. 400 * If sysid is non-zero then determine if this sysid has a share. 401 * 402 * Note that the return value from this function is potentially invalid 403 * once it has been returned. The caller is responsible for providing its 404 * own synchronization mechanism to ensure that the return value is useful. 405 */ 406 int 407 shr_has_remote_shares(vnode_t *vp, int32_t sysid) 408 { 409 struct shrlocklist *shrl; 410 int result = 0; 411 412 mutex_enter(&vp->v_lock); 413 shrl = vp->v_shrlocks; 414 while (shrl) { 415 if (is_match_for_has_remote(sysid, shrl->shr->s_sysid)) { 416 417 result = 1; 418 break; 419 } 420 shrl = shrl->next; 421 } 422 mutex_exit(&vp->v_lock); 423 return (result); 424 } 425 426 static int 427 isreadonly(struct vnode *vp) 428 { 429 return (vp->v_type != VCHR && vp->v_type != VBLK && 430 vp->v_type != VFIFO && vn_is_readonly(vp)); 431 } 432 433 #ifdef DEBUG 434 static void 435 print_shares(struct vnode *vp) 436 { 437 struct shrlocklist *shrl; 438 439 if (vp->v_shrlocks == NULL) { 440 printf("<NULL>\n"); 441 return; 442 } 443 444 shrl = vp->v_shrlocks; 445 while (shrl) { 446 print_share(shrl->shr); 447 shrl = shrl->next; 448 } 449 } 450 451 static void 452 print_share(struct shrlock *shr) 453 { 454 int i; 455 456 if (shr == NULL) { 457 printf("<NULL>\n"); 458 return; 459 } 460 461 printf(" access(%d): ", shr->s_access); 462 if (shr->s_access & F_RDACC) 463 printf("R"); 464 if (shr->s_access & F_WRACC) 465 printf("W"); 466 if ((shr->s_access & (F_RDACC|F_WRACC)) == 0) 467 printf("N"); 468 printf("\n"); 469 printf(" deny: "); 470 if (shr->s_deny & F_COMPAT) 471 printf("C"); 472 if (shr->s_deny & F_RDDNY) 473 printf("R"); 474 if (shr->s_deny & F_WRDNY) 475 printf("W"); 476 if (shr->s_deny == F_NODNY) 477 printf("N"); 478 printf("\n"); 479 printf(" sysid: %d\n", shr->s_sysid); 480 printf(" pid: %d\n", shr->s_pid); 481 printf(" owner: [%d]", shr->s_own_len); 482 printf("'"); 483 for (i = 0; i < shr->s_own_len; i++) 484 printf("%02x", (unsigned)shr->s_owner[i]); 485 printf("'\n"); 486 } 487 #endif 488 489 /* 490 * Return non-zero if the given I/O request conflicts with a registered 491 * share reservation. 492 */ 493 494 int 495 nbl_share_conflict(vnode_t *vp, nbl_op_t op) 496 { 497 struct shrlocklist *shrl; 498 int conflict = 0; 499 500 ASSERT(nbl_in_crit(vp)); 501 502 mutex_enter(&vp->v_lock); 503 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) { 504 if (shrl->shr->s_sysid == 0 && 505 (shrl->shr->s_deny & F_MANDDNY) && 506 shrl->shr->s_pid != curproc->p_pid) { 507 switch (op) { 508 case NBL_READ: 509 if (shrl->shr->s_deny & F_RDDNY) 510 conflict = 1; 511 break; 512 case NBL_WRITE: 513 if (shrl->shr->s_deny & F_WRDNY) 514 conflict = 1; 515 break; 516 case NBL_READWRITE: 517 if (shrl->shr->s_deny & F_RWDNY) 518 conflict = 1; 519 break; 520 case NBL_RENAME: 521 case NBL_REMOVE: 522 conflict = 1; 523 break; 524 #ifdef DEBUG 525 default: 526 cmn_err(CE_PANIC, 527 "nbl_share_conflict: bogus op (%d)", 528 op); 529 break; 530 #endif 531 } 532 } 533 if (conflict) 534 break; 535 } 536 537 mutex_exit(&vp->v_lock); 538 return (conflict); 539 } 540 541 /* 542 * Return non-zero if the given lock request conflicts with an existing 543 * non-blocking mandatory share reservation. 544 */ 545 546 int 547 share_blocks_lock(vnode_t *vp, flock64_t *flkp) 548 { 549 ASSERT(nbl_in_crit(vp)); 550 551 if ((flkp->l_type == F_RDLCK || flkp->l_type == F_WRLCK) && 552 nbl_share_conflict(vp, nbl_lock_to_op(flkp->l_type))) 553 return (1); 554 else 555 return (0); 556 } 557 558 /* 559 * Wait for all share reservations to go away that block the given lock 560 * request. Returns 0 after successfully waiting, or EINTR. 561 */ 562 563 int 564 wait_for_share(vnode_t *vp, flock64_t *flkp) 565 { 566 int result = 0; 567 568 ASSERT(nbl_in_crit(vp)); 569 570 /* 571 * We have to hold the vnode's lock before leaving the nbmand 572 * critical region, to prevent a race with the thread that deletes 573 * the share that's blocking us. Then we have to drop the lock 574 * before reentering the critical region, to avoid a deadlock. 575 */ 576 while (result == 0 && share_blocks_lock(vp, flkp)) { 577 mutex_enter(&vp->v_lock); 578 nbl_end_crit(vp); 579 if (cv_wait_sig(&vp->v_cv, &vp->v_lock) == 0) 580 result = EINTR; 581 mutex_exit(&vp->v_lock); 582 nbl_start_crit(vp, RW_WRITER); 583 } 584 585 return (result); 586 } 587 588 /* 589 * Determine if the given share reservation conflicts with any existing 590 * locks or mapped regions for the file. This is used to compensate for 591 * the fact that most Unix applications don't get a share reservation, so 592 * we use existing locks as an indication of what files are open. 593 * 594 * XXX needs a better name to reflect that it also looks for mapped file 595 * conflicts. 596 * 597 * Returns non-zero if there is a conflict, zero if okay. 598 */ 599 600 static int 601 lock_blocks_share(vnode_t *vp, struct shrlock *shr) 602 { 603 struct flock64 lck; 604 int error; 605 606 /* 607 * We don't currently have a good way to match lock 608 * ownership with share ownership for remote requests. 609 * Fortunately, we know that only local processes (in particular, 610 * local CIFS servers) care about conflicts between locks and 611 * share reservations, and we can distinguish local processes from 612 * each other and from remote processes. 613 */ 614 ASSERT(shr->s_sysid == 0); 615 616 if ((shr->s_deny & (F_RWDNY|F_COMPAT)) == 0) { 617 /* if no deny mode, then there's no conflict */ 618 return (0); 619 } 620 621 lck.l_type = ((shr->s_deny & F_RDDNY) ? F_WRLCK : F_RDLCK); 622 623 lck.l_whence = 0; 624 lck.l_start = 0; 625 lck.l_len = 0; /* to EOF */ 626 627 /* would check here for conflict with mapped region */ 628 629 /* XXX should use non-NULL cred? */ 630 error = VOP_FRLOCK(vp, F_GETLK, &lck, 0, 0, NULL, NULL); 631 if (error != 0) { 632 cmn_err(CE_WARN, "lock_blocks_share: unexpected error (%d)", 633 error); 634 return (1); 635 } 636 637 return (lck.l_type == F_UNLCK ? 0 : 1); 638 } 639 640 /* 641 * Determine if the given process has a NBMAND share reservation on the 642 * given vnode. Returns 1 if the process has such a share reservation, 643 * returns 0 otherwise. 644 */ 645 int 646 proc_has_nbmand_share_on_vp(vnode_t *vp, pid_t pid) 647 { 648 struct shrlocklist *shrl; 649 650 /* 651 * Any NBMAND share reservation on the vp for this process? 652 */ 653 mutex_enter(&vp->v_lock); 654 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) { 655 if (shrl->shr->s_sysid == 0 && 656 (shrl->shr->s_deny & F_MANDDNY) && 657 (shrl->shr->s_pid == pid)) { 658 mutex_exit(&vp->v_lock); 659 return (1); 660 } 661 } 662 mutex_exit(&vp->v_lock); 663 664 return (0); 665 } 666