1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright 2019 Nexenta by DDN, Inc. All rights reserved. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/sysmacros.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/fcntl.h> 35 #include <sys/vfs.h> 36 #include <sys/vnode.h> 37 #include <sys/share.h> 38 #include <sys/cmn_err.h> 39 #include <sys/kmem.h> 40 #include <sys/debug.h> 41 #include <sys/t_lock.h> 42 #include <sys/errno.h> 43 #include <sys/nbmlock.h> 44 45 int share_debug = 0; 46 47 #ifdef DEBUG 48 static void print_shares(struct vnode *); 49 static void print_share(struct shrlock *); 50 #endif 51 52 static int isreadonly(struct vnode *); 53 static void do_cleanshares(struct vnode *, pid_t, int32_t); 54 55 56 /* 57 * Add the share reservation shr to vp. 58 */ 59 int 60 add_share(struct vnode *vp, struct shrlock *shr) 61 { 62 struct shrlocklist *shrl; 63 64 /* 65 * An access of zero is not legal, however some older clients 66 * generate it anyways. Allow the request only if it is 67 * coming from a remote system. Be generous in what you 68 * accept and strict in what you send. 69 */ 70 if ((shr->s_access == 0) && (GETSYSID(shr->s_sysid) == 0)) { 71 return (EINVAL); 72 } 73 74 /* 75 * Sanity check to make sure we have valid options. 76 * There is known overlap but it doesn't hurt to be careful. 77 */ 78 if (shr->s_access & ~(F_RDACC|F_WRACC|F_RWACC|F_RMACC|F_MDACC)) { 79 return (EINVAL); 80 } 81 if (shr->s_deny & ~(F_NODNY|F_RDDNY|F_WRDNY|F_RWDNY|F_COMPAT| 82 F_MANDDNY|F_RMDNY)) { 83 return (EINVAL); 84 } 85 86 mutex_enter(&vp->v_lock); 87 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) { 88 /* 89 * If the share owner matches previous request 90 * do special handling. 91 */ 92 if ((shrl->shr->s_sysid == shr->s_sysid) && 93 (shrl->shr->s_pid == shr->s_pid) && 94 (shrl->shr->s_own_len == shr->s_own_len) && 95 bcmp(shrl->shr->s_owner, shr->s_owner, 96 shr->s_own_len) == 0) { 97 98 /* 99 * If the existing request is F_COMPAT and 100 * is the first share then allow any F_COMPAT 101 * from the same process. Trick: If the existing 102 * F_COMPAT is write access then it must have 103 * the same owner as the first. 104 */ 105 if ((shrl->shr->s_deny & F_COMPAT) && 106 (shr->s_deny & F_COMPAT) && 107 ((shrl->next == NULL) || 108 (shrl->shr->s_access & F_WRACC))) 109 break; 110 } 111 112 /* 113 * If a first share has been done in compatibility mode 114 * handle the special cases. 115 */ 116 if ((shrl->shr->s_deny & F_COMPAT) && (shrl->next == NULL)) { 117 118 if (!(shr->s_deny & F_COMPAT)) { 119 /* 120 * If not compat and want write access or 121 * want to deny read or 122 * write exists, fails 123 */ 124 if ((shr->s_access & F_WRACC) || 125 (shr->s_deny & F_RDDNY) || 126 (shrl->shr->s_access & F_WRACC)) { 127 mutex_exit(&vp->v_lock); 128 DTRACE_PROBE1(conflict_shrlock, 129 struct shrlock *, shrl->shr); 130 return (EAGAIN); 131 } 132 /* 133 * If read only file allow, this may allow 134 * a deny write but that is meaningless on 135 * a read only file. 136 */ 137 if (isreadonly(vp)) 138 break; 139 mutex_exit(&vp->v_lock); 140 DTRACE_PROBE1(conflict_shrlock, 141 struct shrlock *, shrl->shr); 142 return (EAGAIN); 143 } 144 /* 145 * This is a compat request and read access 146 * and the first was also read access 147 * we always allow it, otherwise we reject because 148 * we have handled the only valid write case above. 149 */ 150 if ((shr->s_access == F_RDACC) && 151 (shrl->shr->s_access == F_RDACC)) 152 break; 153 mutex_exit(&vp->v_lock); 154 DTRACE_PROBE1(conflict_shrlock, 155 struct shrlock *, shrl->shr); 156 return (EAGAIN); 157 } 158 159 /* 160 * If we are trying to share in compatibility mode 161 * and the current share is compat (and not the first) 162 * we don't know enough. 163 */ 164 if ((shrl->shr->s_deny & F_COMPAT) && (shr->s_deny & F_COMPAT)) 165 continue; 166 167 /* 168 * If this is a compat we check for what can't succeed. 169 */ 170 if (shr->s_deny & F_COMPAT) { 171 /* 172 * If we want write access or 173 * if anyone is denying read or 174 * if anyone has write access we fail 175 */ 176 if ((shr->s_access & F_WRACC) || 177 (shrl->shr->s_deny & F_RDDNY) || 178 (shrl->shr->s_access & F_WRACC)) { 179 mutex_exit(&vp->v_lock); 180 DTRACE_PROBE1(conflict_shrlock, 181 struct shrlock *, shrl->shr); 182 return (EAGAIN); 183 } 184 /* 185 * If the first was opened with only read access 186 * and is a read only file we allow. 187 */ 188 if (shrl->next == NULL) { 189 if ((shrl->shr->s_access == F_RDACC) && 190 isreadonly(vp)) { 191 break; 192 } 193 mutex_exit(&vp->v_lock); 194 DTRACE_PROBE1(conflict_shrlock, 195 struct shrlock *, shrl->shr); 196 return (EAGAIN); 197 } 198 /* 199 * We still can't determine our fate so continue 200 */ 201 continue; 202 } 203 204 /* 205 * Simple bitwise test, if we are trying to access what 206 * someone else is denying or we are trying to deny 207 * what someone else is accessing we fail. 208 */ 209 if ((shr->s_access & shrl->shr->s_deny) || 210 (shr->s_deny & shrl->shr->s_access)) { 211 mutex_exit(&vp->v_lock); 212 DTRACE_PROBE1(conflict_shrlock, 213 struct shrlock *, shrl->shr); 214 return (EAGAIN); 215 } 216 } 217 218 shrl = kmem_alloc(sizeof (struct shrlocklist), KM_SLEEP); 219 shrl->shr = kmem_alloc(sizeof (struct shrlock), KM_SLEEP); 220 shrl->shr->s_access = shr->s_access; 221 shrl->shr->s_deny = shr->s_deny; 222 223 /* 224 * Make sure no other deny modes are also set with F_COMPAT 225 */ 226 if (shrl->shr->s_deny & F_COMPAT) 227 shrl->shr->s_deny = F_COMPAT; 228 shrl->shr->s_sysid = shr->s_sysid; /* XXX ref cnt? */ 229 shrl->shr->s_pid = shr->s_pid; 230 shrl->shr->s_own_len = shr->s_own_len; 231 shrl->shr->s_owner = kmem_alloc(shr->s_own_len, KM_SLEEP); 232 bcopy(shr->s_owner, shrl->shr->s_owner, shr->s_own_len); 233 shrl->next = vp->v_shrlocks; 234 vp->v_shrlocks = shrl; 235 #ifdef DEBUG 236 if (share_debug) 237 print_shares(vp); 238 #endif 239 240 mutex_exit(&vp->v_lock); 241 242 return (0); 243 } 244 245 /* 246 * nlmid sysid pid 247 * ===== ===== === 248 * !=0 !=0 =0 in cluster; NLM lock 249 * !=0 =0 =0 in cluster; special case for NLM lock 250 * !=0 =0 !=0 in cluster; PXFS local lock 251 * !=0 !=0 !=0 cannot happen 252 * =0 !=0 =0 not in cluster; NLM lock 253 * =0 =0 !=0 not in cluster; local lock 254 * =0 =0 =0 cannot happen 255 * =0 !=0 !=0 cannot happen 256 */ 257 static int 258 is_match_for_del(struct shrlock *shr, struct shrlock *element) 259 { 260 int nlmid1, nlmid2; 261 int result = 0; 262 263 nlmid1 = GETNLMID(shr->s_sysid); 264 nlmid2 = GETNLMID(element->s_sysid); 265 266 if (nlmid1 != 0) { /* in a cluster */ 267 if (GETSYSID(shr->s_sysid) != 0 && shr->s_pid == 0) { 268 /* 269 * Lock obtained through nlm server. Just need to 270 * compare whole sysids. pid will always = 0. 271 */ 272 result = shr->s_sysid == element->s_sysid; 273 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid == 0) { 274 /* 275 * This is a special case. The NLM server wishes to 276 * delete all share locks obtained through nlmid1. 277 */ 278 result = (nlmid1 == nlmid2); 279 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid != 0) { 280 /* 281 * Lock obtained locally through PXFS. Match nlmids 282 * and pids. 283 */ 284 result = (nlmid1 == nlmid2 && 285 shr->s_pid == element->s_pid); 286 } 287 } else { /* not in a cluster */ 288 result = ((shr->s_sysid == 0 && 289 shr->s_pid == element->s_pid) || 290 (shr->s_sysid != 0 && 291 shr->s_sysid == element->s_sysid)); 292 } 293 return (result); 294 } 295 296 /* 297 * Delete the given share reservation. Returns 0 if okay, EINVAL if the 298 * share could not be found. If the share reservation is an NBMAND share 299 * reservation, signal anyone waiting for the share to go away (e.g., 300 * blocking lock requests). 301 */ 302 303 int 304 del_share(struct vnode *vp, struct shrlock *shr) 305 { 306 struct shrlocklist *shrl; 307 struct shrlocklist **shrlp; 308 int found = 0; 309 int is_nbmand = 0; 310 311 mutex_enter(&vp->v_lock); 312 /* 313 * Delete the shares with the matching sysid and owner 314 * But if own_len == 0 and sysid == 0 delete all with matching pid 315 * But if own_len == 0 delete all with matching sysid. 316 */ 317 shrlp = &vp->v_shrlocks; 318 while (*shrlp) { 319 if ((shr->s_own_len == (*shrlp)->shr->s_own_len && 320 (bcmp(shr->s_owner, (*shrlp)->shr->s_owner, 321 shr->s_own_len) == 0)) || 322 323 (shr->s_own_len == 0 && 324 is_match_for_del(shr, (*shrlp)->shr))) { 325 326 shrl = *shrlp; 327 *shrlp = shrl->next; 328 329 if (shrl->shr->s_deny & F_MANDDNY) 330 is_nbmand = 1; 331 332 /* XXX deref sysid */ 333 kmem_free(shrl->shr->s_owner, shrl->shr->s_own_len); 334 kmem_free(shrl->shr, sizeof (struct shrlock)); 335 kmem_free(shrl, sizeof (struct shrlocklist)); 336 found++; 337 continue; 338 } 339 shrlp = &(*shrlp)->next; 340 } 341 342 if (is_nbmand) 343 cv_broadcast(&vp->v_cv); 344 345 mutex_exit(&vp->v_lock); 346 return (found ? 0 : EINVAL); 347 } 348 349 /* 350 * Clean up all local share reservations that the given process has with 351 * the given file. 352 */ 353 void 354 cleanshares(struct vnode *vp, pid_t pid) 355 { 356 do_cleanshares(vp, pid, 0); 357 } 358 359 /* 360 * Cleanup all remote share reservations that 361 * were made by the given sysid on given vnode. 362 */ 363 void 364 cleanshares_by_sysid(struct vnode *vp, int32_t sysid) 365 { 366 if (sysid == 0) 367 return; 368 369 do_cleanshares(vp, 0, sysid); 370 } 371 372 /* 373 * Cleanup share reservations on given vnode made 374 * by the either given pid or sysid. 375 * If sysid is 0, remove all shares made by given pid, 376 * otherwise all shares made by the given sysid will 377 * be removed. 378 */ 379 static void 380 do_cleanshares(struct vnode *vp, pid_t pid, int32_t sysid) 381 { 382 struct shrlock shr; 383 384 if (vp->v_shrlocks == NULL) 385 return; 386 387 shr.s_access = 0; 388 shr.s_deny = 0; 389 shr.s_pid = pid; 390 shr.s_sysid = sysid; 391 shr.s_own_len = 0; 392 shr.s_owner = NULL; 393 394 (void) del_share(vp, &shr); 395 } 396 397 static int 398 is_match_for_has_remote(int32_t sysid1, int32_t sysid2) 399 { 400 int result = 0; 401 402 if (GETNLMID(sysid1) != 0) { /* in a cluster */ 403 if (GETSYSID(sysid1) != 0) { 404 /* 405 * Lock obtained through nlm server. Just need to 406 * compare whole sysids. 407 */ 408 result = (sysid1 == sysid2); 409 } else if (GETSYSID(sysid1) == 0) { 410 /* 411 * This is a special case. The NLM server identified 412 * by nlmid1 wishes to find out if it has obtained 413 * any share locks on the vnode. 414 */ 415 result = (GETNLMID(sysid1) == GETNLMID(sysid2)); 416 } 417 } else { /* not in a cluster */ 418 result = ((sysid1 != 0 && sysid1 == sysid2) || 419 (sysid1 == 0 && sysid2 != 0)); 420 } 421 return (result); 422 } 423 424 425 /* 426 * Determine whether there are any shares for the given vnode 427 * with a remote sysid. Returns zero if not, non-zero if there are. 428 * If sysid is non-zero then determine if this sysid has a share. 429 * 430 * Note that the return value from this function is potentially invalid 431 * once it has been returned. The caller is responsible for providing its 432 * own synchronization mechanism to ensure that the return value is useful. 433 */ 434 int 435 shr_has_remote_shares(vnode_t *vp, int32_t sysid) 436 { 437 struct shrlocklist *shrl; 438 int result = 0; 439 440 mutex_enter(&vp->v_lock); 441 shrl = vp->v_shrlocks; 442 while (shrl) { 443 if (is_match_for_has_remote(sysid, shrl->shr->s_sysid)) { 444 445 result = 1; 446 break; 447 } 448 shrl = shrl->next; 449 } 450 mutex_exit(&vp->v_lock); 451 return (result); 452 } 453 454 static int 455 isreadonly(struct vnode *vp) 456 { 457 return (vp->v_type != VCHR && vp->v_type != VBLK && 458 vp->v_type != VFIFO && vn_is_readonly(vp)); 459 } 460 461 #ifdef DEBUG 462 static void 463 print_shares(struct vnode *vp) 464 { 465 struct shrlocklist *shrl; 466 467 if (vp->v_shrlocks == NULL) { 468 printf("<NULL>\n"); 469 return; 470 } 471 472 shrl = vp->v_shrlocks; 473 while (shrl) { 474 print_share(shrl->shr); 475 shrl = shrl->next; 476 } 477 } 478 479 static void 480 print_share(struct shrlock *shr) 481 { 482 int i; 483 484 if (shr == NULL) { 485 printf("<NULL>\n"); 486 return; 487 } 488 489 printf(" access(%d): ", shr->s_access); 490 if (shr->s_access & F_RDACC) 491 printf("R"); 492 if (shr->s_access & F_WRACC) 493 printf("W"); 494 if ((shr->s_access & (F_RDACC|F_WRACC)) == 0) 495 printf("N"); 496 printf("\n"); 497 printf(" deny: "); 498 if (shr->s_deny & F_COMPAT) 499 printf("C"); 500 if (shr->s_deny & F_RDDNY) 501 printf("R"); 502 if (shr->s_deny & F_WRDNY) 503 printf("W"); 504 if (shr->s_deny == F_NODNY) 505 printf("N"); 506 printf("\n"); 507 printf(" sysid: %d\n", shr->s_sysid); 508 printf(" pid: %d\n", shr->s_pid); 509 printf(" owner: [%d]", shr->s_own_len); 510 printf("'"); 511 for (i = 0; i < shr->s_own_len; i++) 512 printf("%02x", (unsigned)shr->s_owner[i]); 513 printf("'\n"); 514 } 515 #endif 516 517 /* 518 * Return non-zero if the given I/O request conflicts with a registered 519 * share reservation. Note: These are Windows-compatible semantics, but 520 * windows would do these checks only when opening a file. Details in: 521 * [MS-FSA] 2.1.5.1.2.2 Algorithm to check sharing access... 522 * 523 * A process is identified by the tuple (sysid, pid). When the caller 524 * context is passed to nbl_share_conflict, the sysid and pid in the 525 * caller context are used. Otherwise the sysid is zero, and the pid is 526 * taken from the current process. 527 * 528 * Conflict Algorithm: 529 * 1. An op request of NBL_READ will fail if a different 530 * process has a mandatory share reservation with deny read. 531 * 532 * 2. An op request of NBL_WRITE will fail if a different 533 * process has a mandatory share reservation with deny write. 534 * 535 * 3. An op request of NBL_READWRITE will fail if a different 536 * process has a mandatory share reservation with deny read 537 * or deny write. 538 * 539 * 4. An op request of NBL_REMOVE will fail if there is 540 * a mandatory share reservation with deny remove. 541 * 542 * 5. An op request of NBL_RENAME ... (same as NBL_REMOVE) 543 * 544 * Otherwise there is no conflict and the op request succeeds. 545 * 546 * This behavior is required for interoperability between 547 * the nfs server, cifs server, and local access. 548 * This behavior can result in non-posix semantics. 549 * 550 * When mandatory share reservations are enabled, a process 551 * should call nbl_share_conflict to determine if the 552 * desired operation would conflict with an existing share 553 * reservation. 554 * 555 * The call to nbl_share_conflict may be skipped if the 556 * process has an existing share reservation and the operation 557 * is being performed in the context of that existing share 558 * reservation. 559 */ 560 int 561 nbl_share_conflict(vnode_t *vp, nbl_op_t op, caller_context_t *ct) 562 { 563 struct shrlocklist *shrl; 564 int conflict = 0; 565 pid_t pid; 566 int sysid; 567 568 ASSERT(nbl_in_crit(vp)); 569 570 if (ct == NULL) { 571 pid = curproc->p_pid; 572 sysid = 0; 573 } else { 574 pid = ct->cc_pid; 575 sysid = ct->cc_sysid; 576 } 577 578 mutex_enter(&vp->v_lock); 579 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) { 580 if (!(shrl->shr->s_deny & F_MANDDNY)) 581 continue; 582 /* 583 * Share deny reservations apply to _subsequent_ opens 584 * and therefore only to I/O on _other_ handles. 585 */ 586 if (shrl->shr->s_sysid == sysid && 587 shrl->shr->s_pid == pid) 588 continue; 589 590 /* 591 * NBL_READ, NBL_WRITE, and NBL_READWRITE need to 592 * check if the share reservation being examined 593 * belongs to the current process. 594 * NBL_REMOVE and NBL_RENAME do not. 595 * This behavior is required by the conflict 596 * algorithm described above. 597 */ 598 switch (op) { 599 case NBL_READ: 600 if (shrl->shr->s_deny & F_RDDNY) 601 conflict = 1; 602 break; 603 case NBL_WRITE: 604 if (shrl->shr->s_deny & F_WRDNY) 605 conflict = 1; 606 break; 607 case NBL_READWRITE: 608 if (shrl->shr->s_deny & F_RWDNY) 609 conflict = 1; 610 break; 611 case NBL_REMOVE: 612 case NBL_RENAME: 613 if (shrl->shr->s_deny & F_RMDNY) 614 conflict = 1; 615 break; 616 #ifdef DEBUG 617 default: 618 cmn_err(CE_PANIC, 619 "nbl_share_conflict: bogus op (%d)", 620 op); 621 break; 622 #endif 623 } 624 if (conflict) { 625 DTRACE_PROBE1(conflict_shrlock, 626 struct shrlock *, shrl->shr); 627 break; 628 } 629 } 630 631 mutex_exit(&vp->v_lock); 632 return (conflict); 633 } 634 635 /* 636 * Determine if the given process has a NBMAND share reservation on the 637 * given vnode. Returns 1 if the process has such a share reservation, 638 * returns 0 otherwise. 639 */ 640 int 641 proc_has_nbmand_share_on_vp(vnode_t *vp, pid_t pid) 642 { 643 struct shrlocklist *shrl; 644 645 /* 646 * Any NBMAND share reservation on the vp for this process? 647 */ 648 mutex_enter(&vp->v_lock); 649 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) { 650 if (shrl->shr->s_sysid == 0 && 651 (shrl->shr->s_deny & F_MANDDNY) && 652 (shrl->shr->s_pid == pid)) { 653 mutex_exit(&vp->v_lock); 654 return (1); 655 } 656 } 657 mutex_exit(&vp->v_lock); 658 659 return (0); 660 } 661