xref: /illumos-gate/usr/src/uts/common/os/share.c (revision d21cedec4ed074c3a6feb4a8a007a9cb83cca060)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
28  */
29 
30 #include <sys/types.h>
31 #include <sys/sysmacros.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/fcntl.h>
35 #include <sys/vfs.h>
36 #include <sys/vnode.h>
37 #include <sys/share.h>
38 #include <sys/cmn_err.h>
39 #include <sys/kmem.h>
40 #include <sys/debug.h>
41 #include <sys/t_lock.h>
42 #include <sys/errno.h>
43 #include <sys/nbmlock.h>
44 
45 int share_debug = 0;
46 
47 #ifdef DEBUG
48 static void print_shares(struct vnode *);
49 static void print_share(struct shrlock *);
50 #endif
51 
52 static int isreadonly(struct vnode *);
53 static void do_cleanshares(struct vnode *, pid_t, int32_t);
54 
55 
56 /*
57  * Add the share reservation shr to vp.
58  */
59 int
60 add_share(struct vnode *vp, struct shrlock *shr)
61 {
62 	struct shrlocklist *shrl;
63 
64 	/*
65 	 * An access of zero is not legal, however some older clients
66 	 * generate it anyways.  Allow the request only if it is
67 	 * coming from a remote system.  Be generous in what you
68 	 * accept and strict in what you send.
69 	 */
70 	if ((shr->s_access == 0) && (GETSYSID(shr->s_sysid) == 0)) {
71 		return (EINVAL);
72 	}
73 
74 	/*
75 	 * Sanity check to make sure we have valid options.
76 	 * There is known overlap but it doesn't hurt to be careful.
77 	 */
78 	if (shr->s_access & ~(F_RDACC|F_WRACC|F_RWACC|F_RMACC|F_MDACC)) {
79 		return (EINVAL);
80 	}
81 	if (shr->s_deny & ~(F_NODNY|F_RDDNY|F_WRDNY|F_RWDNY|F_COMPAT|
82 	    F_MANDDNY|F_RMDNY)) {
83 		return (EINVAL);
84 	}
85 
86 	mutex_enter(&vp->v_lock);
87 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
88 		/*
89 		 * If the share owner matches previous request
90 		 * do special handling.
91 		 */
92 		if ((shrl->shr->s_sysid == shr->s_sysid) &&
93 		    (shrl->shr->s_pid == shr->s_pid) &&
94 		    (shrl->shr->s_own_len == shr->s_own_len) &&
95 		    bcmp(shrl->shr->s_owner, shr->s_owner,
96 		    shr->s_own_len) == 0) {
97 
98 			/*
99 			 * If the existing request is F_COMPAT and
100 			 * is the first share then allow any F_COMPAT
101 			 * from the same process.  Trick:  If the existing
102 			 * F_COMPAT is write access then it must have
103 			 * the same owner as the first.
104 			 */
105 			if ((shrl->shr->s_deny & F_COMPAT) &&
106 			    (shr->s_deny & F_COMPAT) &&
107 			    ((shrl->next == NULL) ||
108 			    (shrl->shr->s_access & F_WRACC)))
109 				break;
110 		}
111 
112 		/*
113 		 * If a first share has been done in compatibility mode
114 		 * handle the special cases.
115 		 */
116 		if ((shrl->shr->s_deny & F_COMPAT) && (shrl->next == NULL)) {
117 
118 			if (!(shr->s_deny & F_COMPAT)) {
119 				/*
120 				 * If not compat and want write access or
121 				 * want to deny read or
122 				 * write exists, fails
123 				 */
124 				if ((shr->s_access & F_WRACC) ||
125 				    (shr->s_deny & F_RDDNY) ||
126 				    (shrl->shr->s_access & F_WRACC)) {
127 					mutex_exit(&vp->v_lock);
128 					return (EAGAIN);
129 				}
130 				/*
131 				 * If read only file allow, this may allow
132 				 * a deny write but that is meaningless on
133 				 * a read only file.
134 				 */
135 				if (isreadonly(vp))
136 					break;
137 				mutex_exit(&vp->v_lock);
138 				return (EAGAIN);
139 			}
140 			/*
141 			 * This is a compat request and read access
142 			 * and the first was also read access
143 			 * we always allow it, otherwise we reject because
144 			 * we have handled the only valid write case above.
145 			 */
146 			if ((shr->s_access == F_RDACC) &&
147 			    (shrl->shr->s_access == F_RDACC))
148 				break;
149 			mutex_exit(&vp->v_lock);
150 			return (EAGAIN);
151 		}
152 
153 		/*
154 		 * If we are trying to share in compatibility mode
155 		 * and the current share is compat (and not the first)
156 		 * we don't know enough.
157 		 */
158 		if ((shrl->shr->s_deny & F_COMPAT) && (shr->s_deny & F_COMPAT))
159 			continue;
160 
161 		/*
162 		 * If this is a compat we check for what can't succeed.
163 		 */
164 		if (shr->s_deny & F_COMPAT) {
165 			/*
166 			 * If we want write access or
167 			 * if anyone is denying read or
168 			 * if anyone has write access we fail
169 			 */
170 			if ((shr->s_access & F_WRACC) ||
171 			    (shrl->shr->s_deny & F_RDDNY) ||
172 			    (shrl->shr->s_access & F_WRACC)) {
173 				mutex_exit(&vp->v_lock);
174 				return (EAGAIN);
175 			}
176 			/*
177 			 * If the first was opened with only read access
178 			 * and is a read only file we allow.
179 			 */
180 			if (shrl->next == NULL) {
181 				if ((shrl->shr->s_access == F_RDACC) &&
182 				    isreadonly(vp)) {
183 					break;
184 				}
185 				mutex_exit(&vp->v_lock);
186 				return (EAGAIN);
187 			}
188 			/*
189 			 * We still can't determine our fate so continue
190 			 */
191 			continue;
192 		}
193 
194 		/*
195 		 * Simple bitwise test, if we are trying to access what
196 		 * someone else is denying or we are trying to deny
197 		 * what someone else is accessing we fail.
198 		 */
199 		if ((shr->s_access & shrl->shr->s_deny) ||
200 		    (shr->s_deny & shrl->shr->s_access)) {
201 			mutex_exit(&vp->v_lock);
202 			return (EAGAIN);
203 		}
204 	}
205 
206 	shrl = kmem_alloc(sizeof (struct shrlocklist), KM_SLEEP);
207 	shrl->shr = kmem_alloc(sizeof (struct shrlock), KM_SLEEP);
208 	shrl->shr->s_access = shr->s_access;
209 	shrl->shr->s_deny = shr->s_deny;
210 
211 	/*
212 	 * Make sure no other deny modes are also set with F_COMPAT
213 	 */
214 	if (shrl->shr->s_deny & F_COMPAT)
215 		shrl->shr->s_deny = F_COMPAT;
216 	shrl->shr->s_sysid = shr->s_sysid;		/* XXX ref cnt? */
217 	shrl->shr->s_pid = shr->s_pid;
218 	shrl->shr->s_own_len = shr->s_own_len;
219 	shrl->shr->s_owner = kmem_alloc(shr->s_own_len, KM_SLEEP);
220 	bcopy(shr->s_owner, shrl->shr->s_owner, shr->s_own_len);
221 	shrl->next = vp->v_shrlocks;
222 	vp->v_shrlocks = shrl;
223 #ifdef DEBUG
224 	if (share_debug)
225 		print_shares(vp);
226 #endif
227 
228 	mutex_exit(&vp->v_lock);
229 
230 	return (0);
231 }
232 
233 /*
234  *	nlmid	sysid	pid
235  *	=====	=====	===
236  *	!=0	!=0	=0	in cluster; NLM lock
237  *	!=0	=0	=0	in cluster; special case for NLM lock
238  *	!=0	=0	!=0	in cluster; PXFS local lock
239  *	!=0	!=0	!=0	cannot happen
240  *	=0	!=0	=0	not in cluster; NLM lock
241  *	=0	=0	!=0	not in cluster; local lock
242  *	=0	=0	=0	cannot happen
243  *	=0	!=0	!=0	cannot happen
244  */
245 static int
246 is_match_for_del(struct shrlock *shr, struct shrlock *element)
247 {
248 	int nlmid1, nlmid2;
249 	int result = 0;
250 
251 	nlmid1 = GETNLMID(shr->s_sysid);
252 	nlmid2 = GETNLMID(element->s_sysid);
253 
254 	if (nlmid1 != 0) {		/* in a cluster */
255 		if (GETSYSID(shr->s_sysid) != 0 && shr->s_pid == 0) {
256 			/*
257 			 * Lock obtained through nlm server.  Just need to
258 			 * compare whole sysids.  pid will always = 0.
259 			 */
260 			result = shr->s_sysid == element->s_sysid;
261 		} else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid == 0) {
262 			/*
263 			 * This is a special case.  The NLM server wishes to
264 			 * delete all share locks obtained through nlmid1.
265 			 */
266 			result = (nlmid1 == nlmid2);
267 		} else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid != 0) {
268 			/*
269 			 * Lock obtained locally through PXFS.  Match nlmids
270 			 * and pids.
271 			 */
272 			result = (nlmid1 == nlmid2 &&
273 			    shr->s_pid == element->s_pid);
274 		}
275 	} else {			/* not in a cluster */
276 		result = ((shr->s_sysid == 0 &&
277 		    shr->s_pid == element->s_pid) ||
278 		    (shr->s_sysid != 0 &&
279 		    shr->s_sysid == element->s_sysid));
280 	}
281 	return (result);
282 }
283 
284 /*
285  * Delete the given share reservation.  Returns 0 if okay, EINVAL if the
286  * share could not be found.  If the share reservation is an NBMAND share
287  * reservation, signal anyone waiting for the share to go away (e.g.,
288  * blocking lock requests).
289  */
290 
291 int
292 del_share(struct vnode *vp, struct shrlock *shr)
293 {
294 	struct shrlocklist *shrl;
295 	struct shrlocklist **shrlp;
296 	int found = 0;
297 	int is_nbmand = 0;
298 
299 	mutex_enter(&vp->v_lock);
300 	/*
301 	 * Delete the shares with the matching sysid and owner
302 	 * But if own_len == 0 and sysid == 0 delete all with matching pid
303 	 * But if own_len == 0 delete all with matching sysid.
304 	 */
305 	shrlp = &vp->v_shrlocks;
306 	while (*shrlp) {
307 		if ((shr->s_own_len == (*shrlp)->shr->s_own_len &&
308 		    (bcmp(shr->s_owner, (*shrlp)->shr->s_owner,
309 		    shr->s_own_len) == 0)) ||
310 
311 		    (shr->s_own_len == 0 &&
312 		    is_match_for_del(shr, (*shrlp)->shr))) {
313 
314 			shrl = *shrlp;
315 			*shrlp = shrl->next;
316 
317 			if (shrl->shr->s_deny & F_MANDDNY)
318 				is_nbmand = 1;
319 
320 			/* XXX deref sysid */
321 			kmem_free(shrl->shr->s_owner, shrl->shr->s_own_len);
322 			kmem_free(shrl->shr, sizeof (struct shrlock));
323 			kmem_free(shrl, sizeof (struct shrlocklist));
324 			found++;
325 			continue;
326 		}
327 		shrlp = &(*shrlp)->next;
328 	}
329 
330 	if (is_nbmand)
331 		cv_broadcast(&vp->v_cv);
332 
333 	mutex_exit(&vp->v_lock);
334 	return (found ? 0 : EINVAL);
335 }
336 
337 /*
338  * Clean up all local share reservations that the given process has with
339  * the given file.
340  */
341 void
342 cleanshares(struct vnode *vp, pid_t pid)
343 {
344 	do_cleanshares(vp, pid, 0);
345 }
346 
347 /*
348  * Cleanup all remote share reservations that
349  * were made by the given sysid on given vnode.
350  */
351 void
352 cleanshares_by_sysid(struct vnode *vp, int32_t sysid)
353 {
354 	if (sysid == 0)
355 		return;
356 
357 	do_cleanshares(vp, 0, sysid);
358 }
359 
360 /*
361  * Cleanup share reservations on given vnode made
362  * by the either given pid or sysid.
363  * If sysid is 0, remove all shares made by given pid,
364  * otherwise all shares made by the given sysid will
365  * be removed.
366  */
367 static void
368 do_cleanshares(struct vnode *vp, pid_t pid, int32_t sysid)
369 {
370 	struct shrlock shr;
371 
372 	if (vp->v_shrlocks == NULL)
373 		return;
374 
375 	shr.s_access = 0;
376 	shr.s_deny = 0;
377 	shr.s_pid = pid;
378 	shr.s_sysid = sysid;
379 	shr.s_own_len = 0;
380 	shr.s_owner = NULL;
381 
382 	(void) del_share(vp, &shr);
383 }
384 
385 static int
386 is_match_for_has_remote(int32_t sysid1, int32_t sysid2)
387 {
388 	int result = 0;
389 
390 	if (GETNLMID(sysid1) != 0) { /* in a cluster */
391 		if (GETSYSID(sysid1) != 0) {
392 			/*
393 			 * Lock obtained through nlm server.  Just need to
394 			 * compare whole sysids.
395 			 */
396 			result = (sysid1 == sysid2);
397 		} else if (GETSYSID(sysid1) == 0) {
398 			/*
399 			 * This is a special case.  The NLM server identified
400 			 * by nlmid1 wishes to find out if it has obtained
401 			 * any share locks on the vnode.
402 			 */
403 			result = (GETNLMID(sysid1) == GETNLMID(sysid2));
404 		}
405 	} else {			/* not in a cluster */
406 		result = ((sysid1 != 0 && sysid1 == sysid2) ||
407 		    (sysid1 == 0 && sysid2 != 0));
408 	}
409 	return (result);
410 }
411 
412 
413 /*
414  * Determine whether there are any shares for the given vnode
415  * with a remote sysid. Returns zero if not, non-zero if there are.
416  * If sysid is non-zero then determine if this sysid has a share.
417  *
418  * Note that the return value from this function is potentially invalid
419  * once it has been returned.  The caller is responsible for providing its
420  * own synchronization mechanism to ensure that the return value is useful.
421  */
422 int
423 shr_has_remote_shares(vnode_t *vp, int32_t sysid)
424 {
425 	struct shrlocklist *shrl;
426 	int result = 0;
427 
428 	mutex_enter(&vp->v_lock);
429 	shrl = vp->v_shrlocks;
430 	while (shrl) {
431 		if (is_match_for_has_remote(sysid, shrl->shr->s_sysid)) {
432 
433 			result = 1;
434 			break;
435 		}
436 		shrl = shrl->next;
437 	}
438 	mutex_exit(&vp->v_lock);
439 	return (result);
440 }
441 
442 static int
443 isreadonly(struct vnode *vp)
444 {
445 	return (vp->v_type != VCHR && vp->v_type != VBLK &&
446 	    vp->v_type != VFIFO && vn_is_readonly(vp));
447 }
448 
449 #ifdef DEBUG
450 static void
451 print_shares(struct vnode *vp)
452 {
453 	struct shrlocklist *shrl;
454 
455 	if (vp->v_shrlocks == NULL) {
456 		printf("<NULL>\n");
457 		return;
458 	}
459 
460 	shrl = vp->v_shrlocks;
461 	while (shrl) {
462 		print_share(shrl->shr);
463 		shrl = shrl->next;
464 	}
465 }
466 
467 static void
468 print_share(struct shrlock *shr)
469 {
470 	int i;
471 
472 	if (shr == NULL) {
473 		printf("<NULL>\n");
474 		return;
475 	}
476 
477 	printf("    access(%d):	", shr->s_access);
478 	if (shr->s_access & F_RDACC)
479 		printf("R");
480 	if (shr->s_access & F_WRACC)
481 		printf("W");
482 	if ((shr->s_access & (F_RDACC|F_WRACC)) == 0)
483 		printf("N");
484 	printf("\n");
485 	printf("    deny:	");
486 	if (shr->s_deny & F_COMPAT)
487 		printf("C");
488 	if (shr->s_deny & F_RDDNY)
489 		printf("R");
490 	if (shr->s_deny & F_WRDNY)
491 		printf("W");
492 	if (shr->s_deny == F_NODNY)
493 		printf("N");
494 	printf("\n");
495 	printf("    sysid:	%d\n", shr->s_sysid);
496 	printf("    pid:	%d\n", shr->s_pid);
497 	printf("    owner:	[%d]", shr->s_own_len);
498 	printf("'");
499 	for (i = 0; i < shr->s_own_len; i++)
500 		printf("%02x", (unsigned)shr->s_owner[i]);
501 	printf("'\n");
502 }
503 #endif
504 
505 /*
506  * Return non-zero if the given I/O request conflicts with a registered
507  * share reservation.  Note: These are Windows-compatible semantics, but
508  * windows would do these checks only when opening a file.  Details in:
509  *	[MS-FSA] 2.1.5.1.2.2 Algorithm to check sharing access...
510  *
511  * A process is identified by the tuple (sysid, pid). When the caller
512  * context is passed to nbl_share_conflict, the sysid and pid in the
513  * caller context are used. Otherwise the sysid is zero, and the pid is
514  * taken from the current process.
515  *
516  * Conflict Algorithm:
517  *   1. An op request of NBL_READ will fail if a different
518  *      process has a mandatory share reservation with deny read.
519  *
520  *   2. An op request of NBL_WRITE will fail if a different
521  *      process has a mandatory share reservation with deny write.
522  *
523  *   3. An op request of NBL_READWRITE will fail if a different
524  *      process has a mandatory share reservation with deny read
525  *      or deny write.
526  *
527  *   4. An op request of NBL_REMOVE will fail if there is
528  *      a mandatory share reservation with deny remove.
529  *
530  *   5. An op request of NBL_RENAME ... (same as NBL_REMOVE)
531  *
532  *   Otherwise there is no conflict and the op request succeeds.
533  *
534  * This behavior is required for interoperability between
535  * the nfs server, cifs server, and local access.
536  * This behavior can result in non-posix semantics.
537  *
538  * When mandatory share reservations are enabled, a process
539  * should call nbl_share_conflict to determine if the
540  * desired operation would conflict with an existing share
541  * reservation.
542  *
543  * The call to nbl_share_conflict may be skipped if the
544  * process has an existing share reservation and the operation
545  * is being performed in the context of that existing share
546  * reservation.
547  */
548 int
549 nbl_share_conflict(vnode_t *vp, nbl_op_t op, caller_context_t *ct)
550 {
551 	struct shrlocklist *shrl;
552 	int conflict = 0;
553 	pid_t pid;
554 	int sysid;
555 
556 	ASSERT(nbl_in_crit(vp));
557 
558 	if (ct == NULL) {
559 		pid = curproc->p_pid;
560 		sysid = 0;
561 	} else {
562 		pid = ct->cc_pid;
563 		sysid = ct->cc_sysid;
564 	}
565 
566 	mutex_enter(&vp->v_lock);
567 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
568 		if (!(shrl->shr->s_deny & F_MANDDNY))
569 			continue;
570 		/*
571 		 * Share deny reservations apply to _subsequent_ opens
572 		 * and therefore only to I/O on _other_ handles.
573 		 */
574 		if (shrl->shr->s_sysid == sysid &&
575 		    shrl->shr->s_pid == pid)
576 			continue;
577 
578 		/*
579 		 * NBL_READ, NBL_WRITE, and NBL_READWRITE need to
580 		 * check if the share reservation being examined
581 		 * belongs to the current process.
582 		 * NBL_REMOVE and NBL_RENAME do not.
583 		 * This behavior is required by the conflict
584 		 * algorithm described above.
585 		 */
586 		switch (op) {
587 		case NBL_READ:
588 			if (shrl->shr->s_deny & F_RDDNY)
589 				conflict = 1;
590 			break;
591 		case NBL_WRITE:
592 			if (shrl->shr->s_deny & F_WRDNY)
593 				conflict = 1;
594 			break;
595 		case NBL_READWRITE:
596 			if (shrl->shr->s_deny & F_RWDNY)
597 				conflict = 1;
598 			break;
599 		case NBL_REMOVE:
600 		case NBL_RENAME:
601 			if (shrl->shr->s_deny & F_RMDNY)
602 				conflict = 1;
603 			break;
604 #ifdef DEBUG
605 		default:
606 			cmn_err(CE_PANIC,
607 			    "nbl_share_conflict: bogus op (%d)",
608 			    op);
609 			break;
610 #endif
611 		}
612 		if (conflict)
613 			break;
614 	}
615 
616 	mutex_exit(&vp->v_lock);
617 	return (conflict);
618 }
619 
620 /*
621  * Determine if the given process has a NBMAND share reservation on the
622  * given vnode. Returns 1 if the process has such a share reservation,
623  * returns 0 otherwise.
624  */
625 int
626 proc_has_nbmand_share_on_vp(vnode_t *vp, pid_t pid)
627 {
628 	struct shrlocklist *shrl;
629 
630 	/*
631 	 * Any NBMAND share reservation on the vp for this process?
632 	 */
633 	mutex_enter(&vp->v_lock);
634 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
635 		if (shrl->shr->s_sysid == 0 &&
636 		    (shrl->shr->s_deny & F_MANDDNY) &&
637 		    (shrl->shr->s_pid == pid)) {
638 			mutex_exit(&vp->v_lock);
639 			return (1);
640 		}
641 	}
642 	mutex_exit(&vp->v_lock);
643 
644 	return (0);
645 }
646