1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
28 */
29
30 #include <sys/types.h>
31 #include <sys/sysmacros.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/fcntl.h>
35 #include <sys/vfs.h>
36 #include <sys/vnode.h>
37 #include <sys/share.h>
38 #include <sys/cmn_err.h>
39 #include <sys/kmem.h>
40 #include <sys/debug.h>
41 #include <sys/t_lock.h>
42 #include <sys/errno.h>
43 #include <sys/nbmlock.h>
44
45 int share_debug = 0;
46
47 #ifdef DEBUG
48 static void print_shares(struct vnode *);
49 static void print_share(struct shrlock *);
50 #endif
51
52 static int isreadonly(struct vnode *);
53 static void do_cleanshares(struct vnode *, pid_t, int32_t);
54
55
56 /*
57 * Add the share reservation shr to vp.
58 */
59 int
add_share(struct vnode * vp,struct shrlock * shr)60 add_share(struct vnode *vp, struct shrlock *shr)
61 {
62 struct shrlocklist *shrl;
63
64 /*
65 * An access of zero is not legal, however some older clients
66 * generate it anyways. Allow the request only if it is
67 * coming from a remote system. Be generous in what you
68 * accept and strict in what you send.
69 */
70 if ((shr->s_access == 0) && (GETSYSID(shr->s_sysid) == 0)) {
71 return (EINVAL);
72 }
73
74 /*
75 * Sanity check to make sure we have valid options.
76 * There is known overlap but it doesn't hurt to be careful.
77 */
78 if (shr->s_access & ~(F_RDACC|F_WRACC|F_RWACC|F_RMACC|F_MDACC)) {
79 return (EINVAL);
80 }
81 if (shr->s_deny & ~(F_NODNY|F_RDDNY|F_WRDNY|F_RWDNY|F_COMPAT|
82 F_MANDDNY|F_RMDNY)) {
83 return (EINVAL);
84 }
85
86 mutex_enter(&vp->v_lock);
87 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
88 /*
89 * If the share owner matches previous request
90 * do special handling.
91 */
92 if ((shrl->shr->s_sysid == shr->s_sysid) &&
93 (shrl->shr->s_pid == shr->s_pid) &&
94 (shrl->shr->s_own_len == shr->s_own_len) &&
95 bcmp(shrl->shr->s_owner, shr->s_owner,
96 shr->s_own_len) == 0) {
97
98 /*
99 * If the existing request is F_COMPAT and
100 * is the first share then allow any F_COMPAT
101 * from the same process. Trick: If the existing
102 * F_COMPAT is write access then it must have
103 * the same owner as the first.
104 */
105 if ((shrl->shr->s_deny & F_COMPAT) &&
106 (shr->s_deny & F_COMPAT) &&
107 ((shrl->next == NULL) ||
108 (shrl->shr->s_access & F_WRACC)))
109 break;
110 }
111
112 /*
113 * If a first share has been done in compatibility mode
114 * handle the special cases.
115 */
116 if ((shrl->shr->s_deny & F_COMPAT) && (shrl->next == NULL)) {
117
118 if (!(shr->s_deny & F_COMPAT)) {
119 /*
120 * If not compat and want write access or
121 * want to deny read or
122 * write exists, fails
123 */
124 if ((shr->s_access & F_WRACC) ||
125 (shr->s_deny & F_RDDNY) ||
126 (shrl->shr->s_access & F_WRACC)) {
127 mutex_exit(&vp->v_lock);
128 DTRACE_PROBE1(conflict_shrlock,
129 struct shrlock *, shrl->shr);
130 return (EAGAIN);
131 }
132 /*
133 * If read only file allow, this may allow
134 * a deny write but that is meaningless on
135 * a read only file.
136 */
137 if (isreadonly(vp))
138 break;
139 mutex_exit(&vp->v_lock);
140 DTRACE_PROBE1(conflict_shrlock,
141 struct shrlock *, shrl->shr);
142 return (EAGAIN);
143 }
144 /*
145 * This is a compat request and read access
146 * and the first was also read access
147 * we always allow it, otherwise we reject because
148 * we have handled the only valid write case above.
149 */
150 if ((shr->s_access == F_RDACC) &&
151 (shrl->shr->s_access == F_RDACC))
152 break;
153 mutex_exit(&vp->v_lock);
154 DTRACE_PROBE1(conflict_shrlock,
155 struct shrlock *, shrl->shr);
156 return (EAGAIN);
157 }
158
159 /*
160 * If we are trying to share in compatibility mode
161 * and the current share is compat (and not the first)
162 * we don't know enough.
163 */
164 if ((shrl->shr->s_deny & F_COMPAT) && (shr->s_deny & F_COMPAT))
165 continue;
166
167 /*
168 * If this is a compat we check for what can't succeed.
169 */
170 if (shr->s_deny & F_COMPAT) {
171 /*
172 * If we want write access or
173 * if anyone is denying read or
174 * if anyone has write access we fail
175 */
176 if ((shr->s_access & F_WRACC) ||
177 (shrl->shr->s_deny & F_RDDNY) ||
178 (shrl->shr->s_access & F_WRACC)) {
179 mutex_exit(&vp->v_lock);
180 DTRACE_PROBE1(conflict_shrlock,
181 struct shrlock *, shrl->shr);
182 return (EAGAIN);
183 }
184 /*
185 * If the first was opened with only read access
186 * and is a read only file we allow.
187 */
188 if (shrl->next == NULL) {
189 if ((shrl->shr->s_access == F_RDACC) &&
190 isreadonly(vp)) {
191 break;
192 }
193 mutex_exit(&vp->v_lock);
194 DTRACE_PROBE1(conflict_shrlock,
195 struct shrlock *, shrl->shr);
196 return (EAGAIN);
197 }
198 /*
199 * We still can't determine our fate so continue
200 */
201 continue;
202 }
203
204 /*
205 * Simple bitwise test, if we are trying to access what
206 * someone else is denying or we are trying to deny
207 * what someone else is accessing we fail.
208 */
209 if ((shr->s_access & shrl->shr->s_deny) ||
210 (shr->s_deny & shrl->shr->s_access)) {
211 mutex_exit(&vp->v_lock);
212 DTRACE_PROBE1(conflict_shrlock,
213 struct shrlock *, shrl->shr);
214 return (EAGAIN);
215 }
216 }
217
218 shrl = kmem_alloc(sizeof (struct shrlocklist), KM_SLEEP);
219 shrl->shr = kmem_alloc(sizeof (struct shrlock), KM_SLEEP);
220 shrl->shr->s_access = shr->s_access;
221 shrl->shr->s_deny = shr->s_deny;
222
223 /*
224 * Make sure no other deny modes are also set with F_COMPAT
225 */
226 if (shrl->shr->s_deny & F_COMPAT)
227 shrl->shr->s_deny = F_COMPAT;
228 shrl->shr->s_sysid = shr->s_sysid; /* XXX ref cnt? */
229 shrl->shr->s_pid = shr->s_pid;
230 shrl->shr->s_own_len = shr->s_own_len;
231 shrl->shr->s_owner = kmem_alloc(shr->s_own_len, KM_SLEEP);
232 bcopy(shr->s_owner, shrl->shr->s_owner, shr->s_own_len);
233 shrl->next = vp->v_shrlocks;
234 vp->v_shrlocks = shrl;
235 #ifdef DEBUG
236 if (share_debug)
237 print_shares(vp);
238 #endif
239
240 mutex_exit(&vp->v_lock);
241
242 return (0);
243 }
244
245 /*
246 * nlmid sysid pid
247 * ===== ===== ===
248 * !=0 !=0 =0 in cluster; NLM lock
249 * !=0 =0 =0 in cluster; special case for NLM lock
250 * !=0 =0 !=0 in cluster; PXFS local lock
251 * !=0 !=0 !=0 cannot happen
252 * =0 !=0 =0 not in cluster; NLM lock
253 * =0 =0 !=0 not in cluster; local lock
254 * =0 =0 =0 cannot happen
255 * =0 !=0 !=0 cannot happen
256 */
257 static int
is_match_for_del(struct shrlock * shr,struct shrlock * element)258 is_match_for_del(struct shrlock *shr, struct shrlock *element)
259 {
260 int nlmid1, nlmid2;
261 int result = 0;
262
263 nlmid1 = GETNLMID(shr->s_sysid);
264 nlmid2 = GETNLMID(element->s_sysid);
265
266 if (nlmid1 != 0) { /* in a cluster */
267 if (GETSYSID(shr->s_sysid) != 0 && shr->s_pid == 0) {
268 /*
269 * Lock obtained through nlm server. Just need to
270 * compare whole sysids. pid will always = 0.
271 */
272 result = shr->s_sysid == element->s_sysid;
273 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid == 0) {
274 /*
275 * This is a special case. The NLM server wishes to
276 * delete all share locks obtained through nlmid1.
277 */
278 result = (nlmid1 == nlmid2);
279 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid != 0) {
280 /*
281 * Lock obtained locally through PXFS. Match nlmids
282 * and pids.
283 */
284 result = (nlmid1 == nlmid2 &&
285 shr->s_pid == element->s_pid);
286 }
287 } else { /* not in a cluster */
288 result = ((shr->s_sysid == 0 &&
289 shr->s_pid == element->s_pid) ||
290 (shr->s_sysid != 0 &&
291 shr->s_sysid == element->s_sysid));
292 }
293 return (result);
294 }
295
296 /*
297 * Delete the given share reservation. Returns 0 if okay, EINVAL if the
298 * share could not be found. If the share reservation is an NBMAND share
299 * reservation, signal anyone waiting for the share to go away (e.g.,
300 * blocking lock requests).
301 */
302
303 int
del_share(struct vnode * vp,struct shrlock * shr)304 del_share(struct vnode *vp, struct shrlock *shr)
305 {
306 struct shrlocklist *shrl;
307 struct shrlocklist **shrlp;
308 int found = 0;
309 int is_nbmand = 0;
310
311 mutex_enter(&vp->v_lock);
312 /*
313 * Delete the shares with the matching sysid and owner
314 * But if own_len == 0 and sysid == 0 delete all with matching pid
315 * But if own_len == 0 delete all with matching sysid.
316 */
317 shrlp = &vp->v_shrlocks;
318 while (*shrlp) {
319 if ((shr->s_own_len == (*shrlp)->shr->s_own_len &&
320 (bcmp(shr->s_owner, (*shrlp)->shr->s_owner,
321 shr->s_own_len) == 0)) ||
322
323 (shr->s_own_len == 0 &&
324 is_match_for_del(shr, (*shrlp)->shr))) {
325
326 shrl = *shrlp;
327 *shrlp = shrl->next;
328
329 if (shrl->shr->s_deny & F_MANDDNY)
330 is_nbmand = 1;
331
332 /* XXX deref sysid */
333 kmem_free(shrl->shr->s_owner, shrl->shr->s_own_len);
334 kmem_free(shrl->shr, sizeof (struct shrlock));
335 kmem_free(shrl, sizeof (struct shrlocklist));
336 found++;
337 continue;
338 }
339 shrlp = &(*shrlp)->next;
340 }
341
342 if (is_nbmand)
343 cv_broadcast(&vp->v_cv);
344
345 mutex_exit(&vp->v_lock);
346 return (found ? 0 : EINVAL);
347 }
348
349 /*
350 * Clean up all local share reservations that the given process has with
351 * the given file.
352 */
353 void
cleanshares(struct vnode * vp,pid_t pid)354 cleanshares(struct vnode *vp, pid_t pid)
355 {
356 do_cleanshares(vp, pid, 0);
357 }
358
359 /*
360 * Cleanup all remote share reservations that
361 * were made by the given sysid on given vnode.
362 */
363 void
cleanshares_by_sysid(struct vnode * vp,int32_t sysid)364 cleanshares_by_sysid(struct vnode *vp, int32_t sysid)
365 {
366 if (sysid == 0)
367 return;
368
369 do_cleanshares(vp, 0, sysid);
370 }
371
372 /*
373 * Cleanup share reservations on given vnode made
374 * by the either given pid or sysid.
375 * If sysid is 0, remove all shares made by given pid,
376 * otherwise all shares made by the given sysid will
377 * be removed.
378 */
379 static void
do_cleanshares(struct vnode * vp,pid_t pid,int32_t sysid)380 do_cleanshares(struct vnode *vp, pid_t pid, int32_t sysid)
381 {
382 struct shrlock shr;
383
384 if (vp->v_shrlocks == NULL)
385 return;
386
387 shr.s_access = 0;
388 shr.s_deny = 0;
389 shr.s_pid = pid;
390 shr.s_sysid = sysid;
391 shr.s_own_len = 0;
392 shr.s_owner = NULL;
393
394 (void) del_share(vp, &shr);
395 }
396
397 static int
is_match_for_has_remote(int32_t sysid1,int32_t sysid2)398 is_match_for_has_remote(int32_t sysid1, int32_t sysid2)
399 {
400 int result = 0;
401
402 if (GETNLMID(sysid1) != 0) { /* in a cluster */
403 if (GETSYSID(sysid1) != 0) {
404 /*
405 * Lock obtained through nlm server. Just need to
406 * compare whole sysids.
407 */
408 result = (sysid1 == sysid2);
409 } else if (GETSYSID(sysid1) == 0) {
410 /*
411 * This is a special case. The NLM server identified
412 * by nlmid1 wishes to find out if it has obtained
413 * any share locks on the vnode.
414 */
415 result = (GETNLMID(sysid1) == GETNLMID(sysid2));
416 }
417 } else { /* not in a cluster */
418 result = ((sysid1 != 0 && sysid1 == sysid2) ||
419 (sysid1 == 0 && sysid2 != 0));
420 }
421 return (result);
422 }
423
424
425 /*
426 * Determine whether there are any shares for the given vnode
427 * with a remote sysid. Returns zero if not, non-zero if there are.
428 * If sysid is non-zero then determine if this sysid has a share.
429 *
430 * Note that the return value from this function is potentially invalid
431 * once it has been returned. The caller is responsible for providing its
432 * own synchronization mechanism to ensure that the return value is useful.
433 */
434 int
shr_has_remote_shares(vnode_t * vp,int32_t sysid)435 shr_has_remote_shares(vnode_t *vp, int32_t sysid)
436 {
437 struct shrlocklist *shrl;
438 int result = 0;
439
440 mutex_enter(&vp->v_lock);
441 shrl = vp->v_shrlocks;
442 while (shrl) {
443 if (is_match_for_has_remote(sysid, shrl->shr->s_sysid)) {
444
445 result = 1;
446 break;
447 }
448 shrl = shrl->next;
449 }
450 mutex_exit(&vp->v_lock);
451 return (result);
452 }
453
454 static int
isreadonly(struct vnode * vp)455 isreadonly(struct vnode *vp)
456 {
457 return (vp->v_type != VCHR && vp->v_type != VBLK &&
458 vp->v_type != VFIFO && vn_is_readonly(vp));
459 }
460
461 #ifdef DEBUG
462 static void
print_shares(struct vnode * vp)463 print_shares(struct vnode *vp)
464 {
465 struct shrlocklist *shrl;
466
467 if (vp->v_shrlocks == NULL) {
468 printf("<NULL>\n");
469 return;
470 }
471
472 shrl = vp->v_shrlocks;
473 while (shrl) {
474 print_share(shrl->shr);
475 shrl = shrl->next;
476 }
477 }
478
479 static void
print_share(struct shrlock * shr)480 print_share(struct shrlock *shr)
481 {
482 int i;
483
484 if (shr == NULL) {
485 printf("<NULL>\n");
486 return;
487 }
488
489 printf(" access(%d): ", shr->s_access);
490 if (shr->s_access & F_RDACC)
491 printf("R");
492 if (shr->s_access & F_WRACC)
493 printf("W");
494 if ((shr->s_access & (F_RDACC|F_WRACC)) == 0)
495 printf("N");
496 printf("\n");
497 printf(" deny: ");
498 if (shr->s_deny & F_COMPAT)
499 printf("C");
500 if (shr->s_deny & F_RDDNY)
501 printf("R");
502 if (shr->s_deny & F_WRDNY)
503 printf("W");
504 if (shr->s_deny == F_NODNY)
505 printf("N");
506 printf("\n");
507 printf(" sysid: %d\n", shr->s_sysid);
508 printf(" pid: %d\n", shr->s_pid);
509 printf(" owner: [%d]", shr->s_own_len);
510 printf("'");
511 for (i = 0; i < shr->s_own_len; i++)
512 printf("%02x", (unsigned)shr->s_owner[i]);
513 printf("'\n");
514 }
515 #endif
516
517 /*
518 * Return non-zero if the given I/O request conflicts with a registered
519 * share reservation. Note: These are Windows-compatible semantics, but
520 * windows would do these checks only when opening a file. Details in:
521 * [MS-FSA] 2.1.5.1.2.2 Algorithm to check sharing access...
522 *
523 * A process is identified by the tuple (sysid, pid). When the caller
524 * context is passed to nbl_share_conflict, the sysid and pid in the
525 * caller context are used. Otherwise the sysid is zero, and the pid is
526 * taken from the current process.
527 *
528 * Conflict Algorithm:
529 * 1. An op request of NBL_READ will fail if a different
530 * process has a mandatory share reservation with deny read.
531 *
532 * 2. An op request of NBL_WRITE will fail if a different
533 * process has a mandatory share reservation with deny write.
534 *
535 * 3. An op request of NBL_READWRITE will fail if a different
536 * process has a mandatory share reservation with deny read
537 * or deny write.
538 *
539 * 4. An op request of NBL_REMOVE will fail if there is
540 * a mandatory share reservation with deny remove.
541 *
542 * 5. An op request of NBL_RENAME ... (same as NBL_REMOVE)
543 *
544 * Otherwise there is no conflict and the op request succeeds.
545 *
546 * This behavior is required for interoperability between
547 * the nfs server, cifs server, and local access.
548 * This behavior can result in non-posix semantics.
549 *
550 * When mandatory share reservations are enabled, a process
551 * should call nbl_share_conflict to determine if the
552 * desired operation would conflict with an existing share
553 * reservation.
554 *
555 * The call to nbl_share_conflict may be skipped if the
556 * process has an existing share reservation and the operation
557 * is being performed in the context of that existing share
558 * reservation.
559 */
560 int
nbl_share_conflict(vnode_t * vp,nbl_op_t op,caller_context_t * ct)561 nbl_share_conflict(vnode_t *vp, nbl_op_t op, caller_context_t *ct)
562 {
563 struct shrlocklist *shrl;
564 int conflict = 0;
565 pid_t pid;
566 int sysid;
567
568 ASSERT(nbl_in_crit(vp));
569
570 if (ct == NULL) {
571 pid = curproc->p_pid;
572 sysid = 0;
573 } else {
574 pid = ct->cc_pid;
575 sysid = ct->cc_sysid;
576 }
577
578 mutex_enter(&vp->v_lock);
579 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
580 if (!(shrl->shr->s_deny & F_MANDDNY))
581 continue;
582 /*
583 * Share deny reservations apply to _subsequent_ opens
584 * and therefore only to I/O on _other_ handles.
585 */
586 if (shrl->shr->s_sysid == sysid &&
587 shrl->shr->s_pid == pid)
588 continue;
589
590 /*
591 * NBL_READ, NBL_WRITE, and NBL_READWRITE need to
592 * check if the share reservation being examined
593 * belongs to the current process.
594 * NBL_REMOVE and NBL_RENAME do not.
595 * This behavior is required by the conflict
596 * algorithm described above.
597 */
598 switch (op) {
599 case NBL_READ:
600 if (shrl->shr->s_deny & F_RDDNY)
601 conflict = 1;
602 break;
603 case NBL_WRITE:
604 if (shrl->shr->s_deny & F_WRDNY)
605 conflict = 1;
606 break;
607 case NBL_READWRITE:
608 if (shrl->shr->s_deny & F_RWDNY)
609 conflict = 1;
610 break;
611 case NBL_REMOVE:
612 case NBL_RENAME:
613 if (shrl->shr->s_deny & F_RMDNY)
614 conflict = 1;
615 break;
616 #ifdef DEBUG
617 default:
618 cmn_err(CE_PANIC,
619 "nbl_share_conflict: bogus op (%d)",
620 op);
621 break;
622 #endif
623 }
624 if (conflict) {
625 DTRACE_PROBE1(conflict_shrlock,
626 struct shrlock *, shrl->shr);
627 break;
628 }
629 }
630
631 mutex_exit(&vp->v_lock);
632 return (conflict);
633 }
634
635 /*
636 * Determine if the given process has a NBMAND share reservation on the
637 * given vnode. Returns 1 if the process has such a share reservation,
638 * returns 0 otherwise.
639 */
640 int
proc_has_nbmand_share_on_vp(vnode_t * vp,pid_t pid)641 proc_has_nbmand_share_on_vp(vnode_t *vp, pid_t pid)
642 {
643 struct shrlocklist *shrl;
644
645 /*
646 * Any NBMAND share reservation on the vp for this process?
647 */
648 mutex_enter(&vp->v_lock);
649 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
650 if (shrl->shr->s_sysid == 0 &&
651 (shrl->shr->s_deny & F_MANDDNY) &&
652 (shrl->shr->s_pid == pid)) {
653 mutex_exit(&vp->v_lock);
654 return (1);
655 }
656 }
657 mutex_exit(&vp->v_lock);
658
659 return (0);
660 }
661