xref: /illumos-gate/usr/src/uts/common/os/share.c (revision dd72704bd9e794056c558153663c739e2012d721)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
28  */
29 
30 #include <sys/types.h>
31 #include <sys/sysmacros.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/fcntl.h>
35 #include <sys/vfs.h>
36 #include <sys/vnode.h>
37 #include <sys/share.h>
38 #include <sys/cmn_err.h>
39 #include <sys/kmem.h>
40 #include <sys/debug.h>
41 #include <sys/t_lock.h>
42 #include <sys/errno.h>
43 #include <sys/nbmlock.h>
44 
45 int share_debug = 0;
46 
47 #ifdef DEBUG
48 static void print_shares(struct vnode *);
49 static void print_share(struct shrlock *);
50 #endif
51 
52 static int isreadonly(struct vnode *);
53 static void do_cleanshares(struct vnode *, pid_t, int32_t);
54 
55 
56 /*
57  * Add the share reservation shr to vp.
58  */
59 int
60 add_share(struct vnode *vp, struct shrlock *shr)
61 {
62 	struct shrlocklist *shrl;
63 
64 	/*
65 	 * An access of zero is not legal, however some older clients
66 	 * generate it anyways.  Allow the request only if it is
67 	 * coming from a remote system.  Be generous in what you
68 	 * accept and strict in what you send.
69 	 */
70 	if ((shr->s_access == 0) && (GETSYSID(shr->s_sysid) == 0)) {
71 		return (EINVAL);
72 	}
73 
74 	/*
75 	 * Sanity check to make sure we have valid options.
76 	 * There is known overlap but it doesn't hurt to be careful.
77 	 */
78 	if (shr->s_access & ~(F_RDACC|F_WRACC|F_RWACC|F_RMACC|F_MDACC)) {
79 		return (EINVAL);
80 	}
81 	if (shr->s_deny & ~(F_NODNY|F_RDDNY|F_WRDNY|F_RWDNY|F_COMPAT|
82 	    F_MANDDNY|F_RMDNY)) {
83 		return (EINVAL);
84 	}
85 
86 	mutex_enter(&vp->v_lock);
87 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
88 		/*
89 		 * If the share owner matches previous request
90 		 * do special handling.
91 		 */
92 		if ((shrl->shr->s_sysid == shr->s_sysid) &&
93 		    (shrl->shr->s_pid == shr->s_pid) &&
94 		    (shrl->shr->s_own_len == shr->s_own_len) &&
95 		    bcmp(shrl->shr->s_owner, shr->s_owner,
96 		    shr->s_own_len) == 0) {
97 
98 			/*
99 			 * If the existing request is F_COMPAT and
100 			 * is the first share then allow any F_COMPAT
101 			 * from the same process.  Trick:  If the existing
102 			 * F_COMPAT is write access then it must have
103 			 * the same owner as the first.
104 			 */
105 			if ((shrl->shr->s_deny & F_COMPAT) &&
106 			    (shr->s_deny & F_COMPAT) &&
107 			    ((shrl->next == NULL) ||
108 			    (shrl->shr->s_access & F_WRACC)))
109 				break;
110 		}
111 
112 		/*
113 		 * If a first share has been done in compatibility mode
114 		 * handle the special cases.
115 		 */
116 		if ((shrl->shr->s_deny & F_COMPAT) && (shrl->next == NULL)) {
117 
118 			if (!(shr->s_deny & F_COMPAT)) {
119 				/*
120 				 * If not compat and want write access or
121 				 * want to deny read or
122 				 * write exists, fails
123 				 */
124 				if ((shr->s_access & F_WRACC) ||
125 				    (shr->s_deny & F_RDDNY) ||
126 				    (shrl->shr->s_access & F_WRACC)) {
127 					mutex_exit(&vp->v_lock);
128 					DTRACE_PROBE1(conflict_shrlock,
129 					    struct shrlock *, shrl->shr);
130 					return (EAGAIN);
131 				}
132 				/*
133 				 * If read only file allow, this may allow
134 				 * a deny write but that is meaningless on
135 				 * a read only file.
136 				 */
137 				if (isreadonly(vp))
138 					break;
139 				mutex_exit(&vp->v_lock);
140 				DTRACE_PROBE1(conflict_shrlock,
141 				    struct shrlock *, shrl->shr);
142 				return (EAGAIN);
143 			}
144 			/*
145 			 * This is a compat request and read access
146 			 * and the first was also read access
147 			 * we always allow it, otherwise we reject because
148 			 * we have handled the only valid write case above.
149 			 */
150 			if ((shr->s_access == F_RDACC) &&
151 			    (shrl->shr->s_access == F_RDACC))
152 				break;
153 			mutex_exit(&vp->v_lock);
154 			DTRACE_PROBE1(conflict_shrlock,
155 			    struct shrlock *, shrl->shr);
156 			return (EAGAIN);
157 		}
158 
159 		/*
160 		 * If we are trying to share in compatibility mode
161 		 * and the current share is compat (and not the first)
162 		 * we don't know enough.
163 		 */
164 		if ((shrl->shr->s_deny & F_COMPAT) && (shr->s_deny & F_COMPAT))
165 			continue;
166 
167 		/*
168 		 * If this is a compat we check for what can't succeed.
169 		 */
170 		if (shr->s_deny & F_COMPAT) {
171 			/*
172 			 * If we want write access or
173 			 * if anyone is denying read or
174 			 * if anyone has write access we fail
175 			 */
176 			if ((shr->s_access & F_WRACC) ||
177 			    (shrl->shr->s_deny & F_RDDNY) ||
178 			    (shrl->shr->s_access & F_WRACC)) {
179 				mutex_exit(&vp->v_lock);
180 				DTRACE_PROBE1(conflict_shrlock,
181 				    struct shrlock *, shrl->shr);
182 				return (EAGAIN);
183 			}
184 			/*
185 			 * If the first was opened with only read access
186 			 * and is a read only file we allow.
187 			 */
188 			if (shrl->next == NULL) {
189 				if ((shrl->shr->s_access == F_RDACC) &&
190 				    isreadonly(vp)) {
191 					break;
192 				}
193 				mutex_exit(&vp->v_lock);
194 				DTRACE_PROBE1(conflict_shrlock,
195 				    struct shrlock *, shrl->shr);
196 				return (EAGAIN);
197 			}
198 			/*
199 			 * We still can't determine our fate so continue
200 			 */
201 			continue;
202 		}
203 
204 		/*
205 		 * Simple bitwise test, if we are trying to access what
206 		 * someone else is denying or we are trying to deny
207 		 * what someone else is accessing we fail.
208 		 */
209 		if ((shr->s_access & shrl->shr->s_deny) ||
210 		    (shr->s_deny & shrl->shr->s_access)) {
211 			mutex_exit(&vp->v_lock);
212 			DTRACE_PROBE1(conflict_shrlock,
213 			    struct shrlock *, shrl->shr);
214 			return (EAGAIN);
215 		}
216 	}
217 
218 	shrl = kmem_alloc(sizeof (struct shrlocklist), KM_SLEEP);
219 	shrl->shr = kmem_alloc(sizeof (struct shrlock), KM_SLEEP);
220 	shrl->shr->s_access = shr->s_access;
221 	shrl->shr->s_deny = shr->s_deny;
222 
223 	/*
224 	 * Make sure no other deny modes are also set with F_COMPAT
225 	 */
226 	if (shrl->shr->s_deny & F_COMPAT)
227 		shrl->shr->s_deny = F_COMPAT;
228 	shrl->shr->s_sysid = shr->s_sysid;		/* XXX ref cnt? */
229 	shrl->shr->s_pid = shr->s_pid;
230 	shrl->shr->s_own_len = shr->s_own_len;
231 	shrl->shr->s_owner = kmem_alloc(shr->s_own_len, KM_SLEEP);
232 	bcopy(shr->s_owner, shrl->shr->s_owner, shr->s_own_len);
233 	shrl->next = vp->v_shrlocks;
234 	vp->v_shrlocks = shrl;
235 #ifdef DEBUG
236 	if (share_debug)
237 		print_shares(vp);
238 #endif
239 
240 	mutex_exit(&vp->v_lock);
241 
242 	return (0);
243 }
244 
245 /*
246  *	nlmid	sysid	pid
247  *	=====	=====	===
248  *	!=0	!=0	=0	in cluster; NLM lock
249  *	!=0	=0	=0	in cluster; special case for NLM lock
250  *	!=0	=0	!=0	in cluster; PXFS local lock
251  *	!=0	!=0	!=0	cannot happen
252  *	=0	!=0	=0	not in cluster; NLM lock
253  *	=0	=0	!=0	not in cluster; local lock
254  *	=0	=0	=0	cannot happen
255  *	=0	!=0	!=0	cannot happen
256  */
257 static int
258 is_match_for_del(struct shrlock *shr, struct shrlock *element)
259 {
260 	int nlmid1, nlmid2;
261 	int result = 0;
262 
263 	nlmid1 = GETNLMID(shr->s_sysid);
264 	nlmid2 = GETNLMID(element->s_sysid);
265 
266 	if (nlmid1 != 0) {		/* in a cluster */
267 		if (GETSYSID(shr->s_sysid) != 0 && shr->s_pid == 0) {
268 			/*
269 			 * Lock obtained through nlm server.  Just need to
270 			 * compare whole sysids.  pid will always = 0.
271 			 */
272 			result = shr->s_sysid == element->s_sysid;
273 		} else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid == 0) {
274 			/*
275 			 * This is a special case.  The NLM server wishes to
276 			 * delete all share locks obtained through nlmid1.
277 			 */
278 			result = (nlmid1 == nlmid2);
279 		} else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid != 0) {
280 			/*
281 			 * Lock obtained locally through PXFS.  Match nlmids
282 			 * and pids.
283 			 */
284 			result = (nlmid1 == nlmid2 &&
285 			    shr->s_pid == element->s_pid);
286 		}
287 	} else {			/* not in a cluster */
288 		result = ((shr->s_sysid == 0 &&
289 		    shr->s_pid == element->s_pid) ||
290 		    (shr->s_sysid != 0 &&
291 		    shr->s_sysid == element->s_sysid));
292 	}
293 	return (result);
294 }
295 
296 /*
297  * Delete the given share reservation.  Returns 0 if okay, EINVAL if the
298  * share could not be found.  If the share reservation is an NBMAND share
299  * reservation, signal anyone waiting for the share to go away (e.g.,
300  * blocking lock requests).
301  */
302 
303 int
304 del_share(struct vnode *vp, struct shrlock *shr)
305 {
306 	struct shrlocklist *shrl;
307 	struct shrlocklist **shrlp;
308 	int found = 0;
309 	int is_nbmand = 0;
310 
311 	mutex_enter(&vp->v_lock);
312 	/*
313 	 * Delete the shares with the matching sysid and owner
314 	 * But if own_len == 0 and sysid == 0 delete all with matching pid
315 	 * But if own_len == 0 delete all with matching sysid.
316 	 */
317 	shrlp = &vp->v_shrlocks;
318 	while (*shrlp) {
319 		if ((shr->s_own_len == (*shrlp)->shr->s_own_len &&
320 		    (bcmp(shr->s_owner, (*shrlp)->shr->s_owner,
321 		    shr->s_own_len) == 0)) ||
322 
323 		    (shr->s_own_len == 0 &&
324 		    is_match_for_del(shr, (*shrlp)->shr))) {
325 
326 			shrl = *shrlp;
327 			*shrlp = shrl->next;
328 
329 			if (shrl->shr->s_deny & F_MANDDNY)
330 				is_nbmand = 1;
331 
332 			/* XXX deref sysid */
333 			kmem_free(shrl->shr->s_owner, shrl->shr->s_own_len);
334 			kmem_free(shrl->shr, sizeof (struct shrlock));
335 			kmem_free(shrl, sizeof (struct shrlocklist));
336 			found++;
337 			continue;
338 		}
339 		shrlp = &(*shrlp)->next;
340 	}
341 
342 	if (is_nbmand)
343 		cv_broadcast(&vp->v_cv);
344 
345 	mutex_exit(&vp->v_lock);
346 	return (found ? 0 : EINVAL);
347 }
348 
349 /*
350  * Clean up all local share reservations that the given process has with
351  * the given file.
352  */
353 void
354 cleanshares(struct vnode *vp, pid_t pid)
355 {
356 	do_cleanshares(vp, pid, 0);
357 }
358 
359 /*
360  * Cleanup all remote share reservations that
361  * were made by the given sysid on given vnode.
362  */
363 void
364 cleanshares_by_sysid(struct vnode *vp, int32_t sysid)
365 {
366 	if (sysid == 0)
367 		return;
368 
369 	do_cleanshares(vp, 0, sysid);
370 }
371 
372 /*
373  * Cleanup share reservations on given vnode made
374  * by the either given pid or sysid.
375  * If sysid is 0, remove all shares made by given pid,
376  * otherwise all shares made by the given sysid will
377  * be removed.
378  */
379 static void
380 do_cleanshares(struct vnode *vp, pid_t pid, int32_t sysid)
381 {
382 	struct shrlock shr;
383 
384 	if (vp->v_shrlocks == NULL)
385 		return;
386 
387 	shr.s_access = 0;
388 	shr.s_deny = 0;
389 	shr.s_pid = pid;
390 	shr.s_sysid = sysid;
391 	shr.s_own_len = 0;
392 	shr.s_owner = NULL;
393 
394 	(void) del_share(vp, &shr);
395 }
396 
397 static int
398 is_match_for_has_remote(int32_t sysid1, int32_t sysid2)
399 {
400 	int result = 0;
401 
402 	if (GETNLMID(sysid1) != 0) { /* in a cluster */
403 		if (GETSYSID(sysid1) != 0) {
404 			/*
405 			 * Lock obtained through nlm server.  Just need to
406 			 * compare whole sysids.
407 			 */
408 			result = (sysid1 == sysid2);
409 		} else if (GETSYSID(sysid1) == 0) {
410 			/*
411 			 * This is a special case.  The NLM server identified
412 			 * by nlmid1 wishes to find out if it has obtained
413 			 * any share locks on the vnode.
414 			 */
415 			result = (GETNLMID(sysid1) == GETNLMID(sysid2));
416 		}
417 	} else {			/* not in a cluster */
418 		result = ((sysid1 != 0 && sysid1 == sysid2) ||
419 		    (sysid1 == 0 && sysid2 != 0));
420 	}
421 	return (result);
422 }
423 
424 
425 /*
426  * Determine whether there are any shares for the given vnode
427  * with a remote sysid. Returns zero if not, non-zero if there are.
428  * If sysid is non-zero then determine if this sysid has a share.
429  *
430  * Note that the return value from this function is potentially invalid
431  * once it has been returned.  The caller is responsible for providing its
432  * own synchronization mechanism to ensure that the return value is useful.
433  */
434 int
435 shr_has_remote_shares(vnode_t *vp, int32_t sysid)
436 {
437 	struct shrlocklist *shrl;
438 	int result = 0;
439 
440 	mutex_enter(&vp->v_lock);
441 	shrl = vp->v_shrlocks;
442 	while (shrl) {
443 		if (is_match_for_has_remote(sysid, shrl->shr->s_sysid)) {
444 
445 			result = 1;
446 			break;
447 		}
448 		shrl = shrl->next;
449 	}
450 	mutex_exit(&vp->v_lock);
451 	return (result);
452 }
453 
454 static int
455 isreadonly(struct vnode *vp)
456 {
457 	return (vp->v_type != VCHR && vp->v_type != VBLK &&
458 	    vp->v_type != VFIFO && vn_is_readonly(vp));
459 }
460 
461 #ifdef DEBUG
462 static void
463 print_shares(struct vnode *vp)
464 {
465 	struct shrlocklist *shrl;
466 
467 	if (vp->v_shrlocks == NULL) {
468 		printf("<NULL>\n");
469 		return;
470 	}
471 
472 	shrl = vp->v_shrlocks;
473 	while (shrl) {
474 		print_share(shrl->shr);
475 		shrl = shrl->next;
476 	}
477 }
478 
479 static void
480 print_share(struct shrlock *shr)
481 {
482 	int i;
483 
484 	if (shr == NULL) {
485 		printf("<NULL>\n");
486 		return;
487 	}
488 
489 	printf("    access(%d):	", shr->s_access);
490 	if (shr->s_access & F_RDACC)
491 		printf("R");
492 	if (shr->s_access & F_WRACC)
493 		printf("W");
494 	if ((shr->s_access & (F_RDACC|F_WRACC)) == 0)
495 		printf("N");
496 	printf("\n");
497 	printf("    deny:	");
498 	if (shr->s_deny & F_COMPAT)
499 		printf("C");
500 	if (shr->s_deny & F_RDDNY)
501 		printf("R");
502 	if (shr->s_deny & F_WRDNY)
503 		printf("W");
504 	if (shr->s_deny == F_NODNY)
505 		printf("N");
506 	printf("\n");
507 	printf("    sysid:	%d\n", shr->s_sysid);
508 	printf("    pid:	%d\n", shr->s_pid);
509 	printf("    owner:	[%d]", shr->s_own_len);
510 	printf("'");
511 	for (i = 0; i < shr->s_own_len; i++)
512 		printf("%02x", (unsigned)shr->s_owner[i]);
513 	printf("'\n");
514 }
515 #endif
516 
517 /*
518  * Return non-zero if the given I/O request conflicts with a registered
519  * share reservation.  Note: These are Windows-compatible semantics, but
520  * windows would do these checks only when opening a file.  Details in:
521  *	[MS-FSA] 2.1.5.1.2.2 Algorithm to check sharing access...
522  *
523  * A process is identified by the tuple (sysid, pid). When the caller
524  * context is passed to nbl_share_conflict, the sysid and pid in the
525  * caller context are used. Otherwise the sysid is zero, and the pid is
526  * taken from the current process.
527  *
528  * Conflict Algorithm:
529  *   1. An op request of NBL_READ will fail if a different
530  *      process has a mandatory share reservation with deny read.
531  *
532  *   2. An op request of NBL_WRITE will fail if a different
533  *      process has a mandatory share reservation with deny write.
534  *
535  *   3. An op request of NBL_READWRITE will fail if a different
536  *      process has a mandatory share reservation with deny read
537  *      or deny write.
538  *
539  *   4. An op request of NBL_REMOVE will fail if there is
540  *      a mandatory share reservation with deny remove.
541  *
542  *   5. An op request of NBL_RENAME ... (same as NBL_REMOVE)
543  *
544  *   Otherwise there is no conflict and the op request succeeds.
545  *
546  * This behavior is required for interoperability between
547  * the nfs server, cifs server, and local access.
548  * This behavior can result in non-posix semantics.
549  *
550  * When mandatory share reservations are enabled, a process
551  * should call nbl_share_conflict to determine if the
552  * desired operation would conflict with an existing share
553  * reservation.
554  *
555  * The call to nbl_share_conflict may be skipped if the
556  * process has an existing share reservation and the operation
557  * is being performed in the context of that existing share
558  * reservation.
559  */
560 int
561 nbl_share_conflict(vnode_t *vp, nbl_op_t op, caller_context_t *ct)
562 {
563 	struct shrlocklist *shrl;
564 	int conflict = 0;
565 	pid_t pid;
566 	int sysid;
567 
568 	ASSERT(nbl_in_crit(vp));
569 
570 	if (ct == NULL) {
571 		pid = curproc->p_pid;
572 		sysid = 0;
573 	} else {
574 		pid = ct->cc_pid;
575 		sysid = ct->cc_sysid;
576 	}
577 
578 	mutex_enter(&vp->v_lock);
579 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
580 		if (!(shrl->shr->s_deny & F_MANDDNY))
581 			continue;
582 		/*
583 		 * Share deny reservations apply to _subsequent_ opens
584 		 * and therefore only to I/O on _other_ handles.
585 		 */
586 		if (shrl->shr->s_sysid == sysid &&
587 		    shrl->shr->s_pid == pid)
588 			continue;
589 
590 		/*
591 		 * NBL_READ, NBL_WRITE, and NBL_READWRITE need to
592 		 * check if the share reservation being examined
593 		 * belongs to the current process.
594 		 * NBL_REMOVE and NBL_RENAME do not.
595 		 * This behavior is required by the conflict
596 		 * algorithm described above.
597 		 */
598 		switch (op) {
599 		case NBL_READ:
600 			if (shrl->shr->s_deny & F_RDDNY)
601 				conflict = 1;
602 			break;
603 		case NBL_WRITE:
604 			if (shrl->shr->s_deny & F_WRDNY)
605 				conflict = 1;
606 			break;
607 		case NBL_READWRITE:
608 			if (shrl->shr->s_deny & F_RWDNY)
609 				conflict = 1;
610 			break;
611 		case NBL_REMOVE:
612 		case NBL_RENAME:
613 			if (shrl->shr->s_deny & F_RMDNY)
614 				conflict = 1;
615 			break;
616 #ifdef DEBUG
617 		default:
618 			cmn_err(CE_PANIC,
619 			    "nbl_share_conflict: bogus op (%d)",
620 			    op);
621 			break;
622 #endif
623 		}
624 		if (conflict) {
625 			DTRACE_PROBE1(conflict_shrlock,
626 			    struct shrlock *, shrl->shr);
627 			break;
628 		}
629 	}
630 
631 	mutex_exit(&vp->v_lock);
632 	return (conflict);
633 }
634 
635 /*
636  * Determine if the given process has a NBMAND share reservation on the
637  * given vnode. Returns 1 if the process has such a share reservation,
638  * returns 0 otherwise.
639  */
640 int
641 proc_has_nbmand_share_on_vp(vnode_t *vp, pid_t pid)
642 {
643 	struct shrlocklist *shrl;
644 
645 	/*
646 	 * Any NBMAND share reservation on the vp for this process?
647 	 */
648 	mutex_enter(&vp->v_lock);
649 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
650 		if (shrl->shr->s_sysid == 0 &&
651 		    (shrl->shr->s_deny & F_MANDDNY) &&
652 		    (shrl->shr->s_pid == pid)) {
653 			mutex_exit(&vp->v_lock);
654 			return (1);
655 		}
656 	}
657 	mutex_exit(&vp->v_lock);
658 
659 	return (0);
660 }
661