xref: /titanic_50/usr/src/uts/common/os/share.c (revision a9478106a12424322498e53cf7cd75bd8a4d6004)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
28  */
29 
30 #include <sys/types.h>
31 #include <sys/sysmacros.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/fcntl.h>
35 #include <sys/vfs.h>
36 #include <sys/vnode.h>
37 #include <sys/share.h>
38 #include <sys/cmn_err.h>
39 #include <sys/kmem.h>
40 #include <sys/debug.h>
41 #include <sys/t_lock.h>
42 #include <sys/errno.h>
43 #include <sys/nbmlock.h>
44 
45 int share_debug = 0;
46 
47 #ifdef DEBUG
48 static void print_shares(struct vnode *);
49 static void print_share(struct shrlock *);
50 #endif
51 
52 static int isreadonly(struct vnode *);
53 static void do_cleanshares(struct vnode *, pid_t, int32_t);
54 
55 
56 /*
57  * Add the share reservation shr to vp.
58  */
59 int
60 add_share(struct vnode *vp, struct shrlock *shr)
61 {
62 	struct shrlocklist *shrl;
63 
64 	/*
65 	 * An access of zero is not legal, however some older clients
66 	 * generate it anyways.  Allow the request only if it is
67 	 * coming from a remote system.  Be generous in what you
68 	 * accept and strict in what you send.
69 	 */
70 	if ((shr->s_access == 0) && (GETSYSID(shr->s_sysid) == 0)) {
71 		return (EINVAL);
72 	}
73 
74 	/*
75 	 * Sanity check to make sure we have valid options.
76 	 * There is known overlap but it doesn't hurt to be careful.
77 	 */
78 	if (shr->s_access & ~(F_RDACC|F_WRACC|F_RWACC|F_RMACC|F_MDACC)) {
79 		return (EINVAL);
80 	}
81 	if (shr->s_deny & ~(F_NODNY|F_RDDNY|F_WRDNY|F_RWDNY|F_COMPAT|
82 	    F_MANDDNY|F_RMDNY)) {
83 		return (EINVAL);
84 	}
85 
86 	mutex_enter(&vp->v_lock);
87 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
88 		/*
89 		 * If the share owner matches previous request
90 		 * do special handling.
91 		 */
92 		if ((shrl->shr->s_sysid == shr->s_sysid) &&
93 		    (shrl->shr->s_pid == shr->s_pid) &&
94 		    (shrl->shr->s_own_len == shr->s_own_len) &&
95 		    bcmp(shrl->shr->s_owner, shr->s_owner,
96 		    shr->s_own_len) == 0) {
97 
98 			/*
99 			 * If the existing request is F_COMPAT and
100 			 * is the first share then allow any F_COMPAT
101 			 * from the same process.  Trick:  If the existing
102 			 * F_COMPAT is write access then it must have
103 			 * the same owner as the first.
104 			 */
105 			if ((shrl->shr->s_deny & F_COMPAT) &&
106 			    (shr->s_deny & F_COMPAT) &&
107 			    ((shrl->next == NULL) ||
108 			    (shrl->shr->s_access & F_WRACC)))
109 				break;
110 		}
111 
112 		/*
113 		 * If a first share has been done in compatibility mode
114 		 * handle the special cases.
115 		 */
116 		if ((shrl->shr->s_deny & F_COMPAT) && (shrl->next == NULL)) {
117 
118 			if (!(shr->s_deny & F_COMPAT)) {
119 				/*
120 				 * If not compat and want write access or
121 				 * want to deny read or
122 				 * write exists, fails
123 				 */
124 				if ((shr->s_access & F_WRACC) ||
125 				    (shr->s_deny & F_RDDNY) ||
126 				    (shrl->shr->s_access & F_WRACC)) {
127 					mutex_exit(&vp->v_lock);
128 					return (EAGAIN);
129 				}
130 				/*
131 				 * If read only file allow, this may allow
132 				 * a deny write but that is meaningless on
133 				 * a read only file.
134 				 */
135 				if (isreadonly(vp))
136 					break;
137 				mutex_exit(&vp->v_lock);
138 				return (EAGAIN);
139 			}
140 			/*
141 			 * This is a compat request and read access
142 			 * and the first was also read access
143 			 * we always allow it, otherwise we reject because
144 			 * we have handled the only valid write case above.
145 			 */
146 			if ((shr->s_access == F_RDACC) &&
147 			    (shrl->shr->s_access == F_RDACC))
148 				break;
149 			mutex_exit(&vp->v_lock);
150 			return (EAGAIN);
151 		}
152 
153 		/*
154 		 * If we are trying to share in compatibility mode
155 		 * and the current share is compat (and not the first)
156 		 * we don't know enough.
157 		 */
158 		if ((shrl->shr->s_deny & F_COMPAT) && (shr->s_deny & F_COMPAT))
159 			continue;
160 
161 		/*
162 		 * If this is a compat we check for what can't succeed.
163 		 */
164 		if (shr->s_deny & F_COMPAT) {
165 			/*
166 			 * If we want write access or
167 			 * if anyone is denying read or
168 			 * if anyone has write access we fail
169 			 */
170 			if ((shr->s_access & F_WRACC) ||
171 			    (shrl->shr->s_deny & F_RDDNY) ||
172 			    (shrl->shr->s_access & F_WRACC)) {
173 				mutex_exit(&vp->v_lock);
174 				return (EAGAIN);
175 			}
176 			/*
177 			 * If the first was opened with only read access
178 			 * and is a read only file we allow.
179 			 */
180 			if (shrl->next == NULL) {
181 				if ((shrl->shr->s_access == F_RDACC) &&
182 				    isreadonly(vp)) {
183 					break;
184 				}
185 				mutex_exit(&vp->v_lock);
186 				return (EAGAIN);
187 			}
188 			/*
189 			 * We still can't determine our fate so continue
190 			 */
191 			continue;
192 		}
193 
194 		/*
195 		 * Simple bitwise test, if we are trying to access what
196 		 * someone else is denying or we are trying to deny
197 		 * what someone else is accessing we fail.
198 		 */
199 		if ((shr->s_access & shrl->shr->s_deny) ||
200 		    (shr->s_deny & shrl->shr->s_access)) {
201 			mutex_exit(&vp->v_lock);
202 			return (EAGAIN);
203 		}
204 	}
205 
206 	shrl = kmem_alloc(sizeof (struct shrlocklist), KM_SLEEP);
207 	shrl->shr = kmem_alloc(sizeof (struct shrlock), KM_SLEEP);
208 	shrl->shr->s_access = shr->s_access;
209 	shrl->shr->s_deny = shr->s_deny;
210 
211 	/*
212 	 * Make sure no other deny modes are also set with F_COMPAT
213 	 */
214 	if (shrl->shr->s_deny & F_COMPAT)
215 		shrl->shr->s_deny = F_COMPAT;
216 	shrl->shr->s_sysid = shr->s_sysid;		/* XXX ref cnt? */
217 	shrl->shr->s_pid = shr->s_pid;
218 	shrl->shr->s_own_len = shr->s_own_len;
219 	shrl->shr->s_owner = kmem_alloc(shr->s_own_len, KM_SLEEP);
220 	bcopy(shr->s_owner, shrl->shr->s_owner, shr->s_own_len);
221 	shrl->next = vp->v_shrlocks;
222 	vp->v_shrlocks = shrl;
223 #ifdef DEBUG
224 	if (share_debug)
225 		print_shares(vp);
226 #endif
227 
228 	mutex_exit(&vp->v_lock);
229 
230 	return (0);
231 }
232 
233 /*
234  *	nlmid	sysid	pid
235  *	=====	=====	===
236  *	!=0	!=0	=0	in cluster; NLM lock
237  *	!=0	=0	=0	in cluster; special case for NLM lock
238  *	!=0	=0	!=0	in cluster; PXFS local lock
239  *	!=0	!=0	!=0	cannot happen
240  *	=0	!=0	=0	not in cluster; NLM lock
241  *	=0	=0	!=0	not in cluster; local lock
242  *	=0	=0	=0	cannot happen
243  *	=0	!=0	!=0	cannot happen
244  */
245 static int
246 is_match_for_del(struct shrlock *shr, struct shrlock *element)
247 {
248 	int nlmid1, nlmid2;
249 	int result = 0;
250 
251 	nlmid1 = GETNLMID(shr->s_sysid);
252 	nlmid2 = GETNLMID(element->s_sysid);
253 
254 	if (nlmid1 != 0) {		/* in a cluster */
255 		if (GETSYSID(shr->s_sysid) != 0 && shr->s_pid == 0) {
256 			/*
257 			 * Lock obtained through nlm server.  Just need to
258 			 * compare whole sysids.  pid will always = 0.
259 			 */
260 			result = shr->s_sysid == element->s_sysid;
261 		} else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid == 0) {
262 			/*
263 			 * This is a special case.  The NLM server wishes to
264 			 * delete all share locks obtained through nlmid1.
265 			 */
266 			result = (nlmid1 == nlmid2);
267 		} else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid != 0) {
268 			/*
269 			 * Lock obtained locally through PXFS.  Match nlmids
270 			 * and pids.
271 			 */
272 			result = (nlmid1 == nlmid2 &&
273 			    shr->s_pid == element->s_pid);
274 		}
275 	} else {			/* not in a cluster */
276 		result = ((shr->s_sysid == 0 &&
277 		    shr->s_pid == element->s_pid) ||
278 		    (shr->s_sysid != 0 &&
279 		    shr->s_sysid == element->s_sysid));
280 	}
281 	return (result);
282 }
283 
284 /*
285  * Delete the given share reservation.  Returns 0 if okay, EINVAL if the
286  * share could not be found.  If the share reservation is an NBMAND share
287  * reservation, signal anyone waiting for the share to go away (e.g.,
288  * blocking lock requests).
289  */
290 
291 int
292 del_share(struct vnode *vp, struct shrlock *shr)
293 {
294 	struct shrlocklist *shrl;
295 	struct shrlocklist **shrlp;
296 	int found = 0;
297 	int is_nbmand = 0;
298 
299 	mutex_enter(&vp->v_lock);
300 	/*
301 	 * Delete the shares with the matching sysid and owner
302 	 * But if own_len == 0 and sysid == 0 delete all with matching pid
303 	 * But if own_len == 0 delete all with matching sysid.
304 	 */
305 	shrlp = &vp->v_shrlocks;
306 	while (*shrlp) {
307 		if ((shr->s_own_len == (*shrlp)->shr->s_own_len &&
308 		    (bcmp(shr->s_owner, (*shrlp)->shr->s_owner,
309 		    shr->s_own_len) == 0)) ||
310 
311 		    (shr->s_own_len == 0 &&
312 		    is_match_for_del(shr, (*shrlp)->shr))) {
313 
314 			shrl = *shrlp;
315 			*shrlp = shrl->next;
316 
317 			if (shrl->shr->s_deny & F_MANDDNY)
318 				is_nbmand = 1;
319 
320 			/* XXX deref sysid */
321 			kmem_free(shrl->shr->s_owner, shrl->shr->s_own_len);
322 			kmem_free(shrl->shr, sizeof (struct shrlock));
323 			kmem_free(shrl, sizeof (struct shrlocklist));
324 			found++;
325 			continue;
326 		}
327 		shrlp = &(*shrlp)->next;
328 	}
329 
330 	if (is_nbmand)
331 		cv_broadcast(&vp->v_cv);
332 
333 	mutex_exit(&vp->v_lock);
334 	return (found ? 0 : EINVAL);
335 }
336 
337 /*
338  * Clean up all local share reservations that the given process has with
339  * the given file.
340  */
341 void
342 cleanshares(struct vnode *vp, pid_t pid)
343 {
344 	do_cleanshares(vp, pid, 0);
345 }
346 
347 /*
348  * Cleanup all remote share reservations that
349  * were made by the given sysid on given vnode.
350  */
351 void
352 cleanshares_by_sysid(struct vnode *vp, int32_t sysid)
353 {
354 	if (sysid == 0)
355 		return;
356 
357 	do_cleanshares(vp, 0, sysid);
358 }
359 
360 /*
361  * Cleanup share reservations on given vnode made
362  * by the either given pid or sysid.
363  * If sysid is 0, remove all shares made by given pid,
364  * otherwise all shares made by the given sysid will
365  * be removed.
366  */
367 static void
368 do_cleanshares(struct vnode *vp, pid_t pid, int32_t sysid)
369 {
370 	struct shrlock shr;
371 
372 	if (vp->v_shrlocks == NULL)
373 		return;
374 
375 	shr.s_access = 0;
376 	shr.s_deny = 0;
377 	shr.s_pid = pid;
378 	shr.s_sysid = sysid;
379 	shr.s_own_len = 0;
380 	shr.s_owner = NULL;
381 
382 	(void) del_share(vp, &shr);
383 }
384 
385 static int
386 is_match_for_has_remote(int32_t sysid1, int32_t sysid2)
387 {
388 	int result = 0;
389 
390 	if (GETNLMID(sysid1) != 0) { /* in a cluster */
391 		if (GETSYSID(sysid1) != 0) {
392 			/*
393 			 * Lock obtained through nlm server.  Just need to
394 			 * compare whole sysids.
395 			 */
396 			result = (sysid1 == sysid2);
397 		} else if (GETSYSID(sysid1) == 0) {
398 			/*
399 			 * This is a special case.  The NLM server identified
400 			 * by nlmid1 wishes to find out if it has obtained
401 			 * any share locks on the vnode.
402 			 */
403 			result = (GETNLMID(sysid1) == GETNLMID(sysid2));
404 		}
405 	} else {			/* not in a cluster */
406 		result = ((sysid1 != 0 && sysid1 == sysid2) ||
407 		    (sysid1 == 0 && sysid2 != 0));
408 	}
409 	return (result);
410 }
411 
412 
413 /*
414  * Determine whether there are any shares for the given vnode
415  * with a remote sysid. Returns zero if not, non-zero if there are.
416  * If sysid is non-zero then determine if this sysid has a share.
417  *
418  * Note that the return value from this function is potentially invalid
419  * once it has been returned.  The caller is responsible for providing its
420  * own synchronization mechanism to ensure that the return value is useful.
421  */
422 int
423 shr_has_remote_shares(vnode_t *vp, int32_t sysid)
424 {
425 	struct shrlocklist *shrl;
426 	int result = 0;
427 
428 	mutex_enter(&vp->v_lock);
429 	shrl = vp->v_shrlocks;
430 	while (shrl) {
431 		if (is_match_for_has_remote(sysid, shrl->shr->s_sysid)) {
432 
433 			result = 1;
434 			break;
435 		}
436 		shrl = shrl->next;
437 	}
438 	mutex_exit(&vp->v_lock);
439 	return (result);
440 }
441 
442 static int
443 isreadonly(struct vnode *vp)
444 {
445 	return (vp->v_type != VCHR && vp->v_type != VBLK &&
446 	    vp->v_type != VFIFO && vn_is_readonly(vp));
447 }
448 
449 #ifdef DEBUG
450 static void
451 print_shares(struct vnode *vp)
452 {
453 	struct shrlocklist *shrl;
454 
455 	if (vp->v_shrlocks == NULL) {
456 		printf("<NULL>\n");
457 		return;
458 	}
459 
460 	shrl = vp->v_shrlocks;
461 	while (shrl) {
462 		print_share(shrl->shr);
463 		shrl = shrl->next;
464 	}
465 }
466 
467 static void
468 print_share(struct shrlock *shr)
469 {
470 	int i;
471 
472 	if (shr == NULL) {
473 		printf("<NULL>\n");
474 		return;
475 	}
476 
477 	printf("    access(%d):	", shr->s_access);
478 	if (shr->s_access & F_RDACC)
479 		printf("R");
480 	if (shr->s_access & F_WRACC)
481 		printf("W");
482 	if ((shr->s_access & (F_RDACC|F_WRACC)) == 0)
483 		printf("N");
484 	printf("\n");
485 	printf("    deny:	");
486 	if (shr->s_deny & F_COMPAT)
487 		printf("C");
488 	if (shr->s_deny & F_RDDNY)
489 		printf("R");
490 	if (shr->s_deny & F_WRDNY)
491 		printf("W");
492 	if (shr->s_deny == F_NODNY)
493 		printf("N");
494 	printf("\n");
495 	printf("    sysid:	%d\n", shr->s_sysid);
496 	printf("    pid:	%d\n", shr->s_pid);
497 	printf("    owner:	[%d]", shr->s_own_len);
498 	printf("'");
499 	for (i = 0; i < shr->s_own_len; i++)
500 		printf("%02x", (unsigned)shr->s_owner[i]);
501 	printf("'\n");
502 }
503 #endif
504 
505 /*
506  * Return non-zero if the given I/O request conflicts with a registered
507  * share reservation.
508  *
509  * A process is identified by the tuple (sysid, pid). When the caller
510  * context is passed to nbl_share_conflict, the sysid and pid in the
511  * caller context are used. Otherwise the sysid is zero, and the pid is
512  * taken from the current process.
513  *
514  * Conflict Algorithm:
515  *   1. An op request of NBL_READ will fail if a different
516  *      process has a mandatory share reservation with deny read.
517  *
518  *   2. An op request of NBL_WRITE will fail if a different
519  *      process has a mandatory share reservation with deny write.
520  *
521  *   3. An op request of NBL_READWRITE will fail if a different
522  *      process has a mandatory share reservation with deny read
523  *      or deny write.
524  *
525  *   4. An op request of NBL_REMOVE will fail if there is
526  *      a mandatory share reservation with an access of read,
527  *      write, or remove. (Anything other than meta data access).
528  *
529  *   5. An op request of NBL_RENAME will fail if there is
530  *      a mandatory share reservation with:
531  *        a) access write or access remove
532  *      or
533  *        b) access read and deny remove
534  *
535  *   Otherwise there is no conflict and the op request succeeds.
536  *
537  * This behavior is required for interoperability between
538  * the nfs server, cifs server, and local access.
539  * This behavior can result in non-posix semantics.
540  *
541  * When mandatory share reservations are enabled, a process
542  * should call nbl_share_conflict to determine if the
543  * desired operation would conflict with an existing share
544  * reservation.
545  *
546  * The call to nbl_share_conflict may be skipped if the
547  * process has an existing share reservation and the operation
548  * is being performed in the context of that existing share
549  * reservation.
550  */
551 int
552 nbl_share_conflict(vnode_t *vp, nbl_op_t op, caller_context_t *ct)
553 {
554 	struct shrlocklist *shrl;
555 	int conflict = 0;
556 	pid_t pid;
557 	int sysid;
558 
559 	ASSERT(nbl_in_crit(vp));
560 
561 	if (ct == NULL) {
562 		pid = curproc->p_pid;
563 		sysid = 0;
564 	} else {
565 		pid = ct->cc_pid;
566 		sysid = ct->cc_sysid;
567 	}
568 
569 	mutex_enter(&vp->v_lock);
570 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
571 		if (!(shrl->shr->s_deny & F_MANDDNY))
572 			continue;
573 		/*
574 		 * NBL_READ, NBL_WRITE, and NBL_READWRITE need to
575 		 * check if the share reservation being examined
576 		 * belongs to the current process.
577 		 * NBL_REMOVE and NBL_RENAME do not.
578 		 * This behavior is required by the conflict
579 		 * algorithm described above.
580 		 */
581 		switch (op) {
582 		case NBL_READ:
583 			if ((shrl->shr->s_deny & F_RDDNY) &&
584 			    (shrl->shr->s_sysid != sysid ||
585 			    shrl->shr->s_pid != pid))
586 				conflict = 1;
587 			break;
588 		case NBL_WRITE:
589 			if ((shrl->shr->s_deny & F_WRDNY) &&
590 			    (shrl->shr->s_sysid != sysid ||
591 			    shrl->shr->s_pid != pid))
592 				conflict = 1;
593 			break;
594 		case NBL_READWRITE:
595 			if ((shrl->shr->s_deny & F_RWDNY) &&
596 			    (shrl->shr->s_sysid != sysid ||
597 			    shrl->shr->s_pid != pid))
598 				conflict = 1;
599 			break;
600 		case NBL_REMOVE:
601 			if (shrl->shr->s_access & (F_RWACC|F_RMACC))
602 				conflict = 1;
603 			break;
604 		case NBL_RENAME:
605 			if (shrl->shr->s_access & (F_WRACC|F_RMACC))
606 				conflict = 1;
607 
608 			else if ((shrl->shr->s_access & F_RDACC) &&
609 			    (shrl->shr->s_deny & F_RMDNY))
610 				conflict = 1;
611 			break;
612 #ifdef DEBUG
613 		default:
614 			cmn_err(CE_PANIC,
615 			    "nbl_share_conflict: bogus op (%d)",
616 			    op);
617 			break;
618 #endif
619 		}
620 		if (conflict)
621 			break;
622 	}
623 
624 	mutex_exit(&vp->v_lock);
625 	return (conflict);
626 }
627 
628 /*
629  * Determine if the given process has a NBMAND share reservation on the
630  * given vnode. Returns 1 if the process has such a share reservation,
631  * returns 0 otherwise.
632  */
633 int
634 proc_has_nbmand_share_on_vp(vnode_t *vp, pid_t pid)
635 {
636 	struct shrlocklist *shrl;
637 
638 	/*
639 	 * Any NBMAND share reservation on the vp for this process?
640 	 */
641 	mutex_enter(&vp->v_lock);
642 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
643 		if (shrl->shr->s_sysid == 0 &&
644 		    (shrl->shr->s_deny & F_MANDDNY) &&
645 		    (shrl->shr->s_pid == pid)) {
646 			mutex_exit(&vp->v_lock);
647 			return (1);
648 		}
649 	}
650 	mutex_exit(&vp->v_lock);
651 
652 	return (0);
653 }
654