xref: /titanic_41/usr/src/uts/common/os/share.c (revision 70025d765b044c6d8594bb965a2247a61e991a99)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 1996-1998,2001,2003 Sun Microsystems, Inc.
24  * All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 
30 #include <sys/types.h>
31 #include <sys/sysmacros.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/fcntl.h>
35 #include <sys/vfs.h>
36 #include <sys/vnode.h>
37 #include <sys/share.h>
38 #include <sys/cmn_err.h>
39 #include <sys/kmem.h>
40 #include <sys/debug.h>
41 #include <sys/t_lock.h>
42 #include <sys/errno.h>
43 #include <sys/nbmlock.h>
44 
45 int share_debug = 0;
46 
47 #ifdef DEBUG
48 static void print_shares(struct vnode *);
49 static void print_share(struct shrlock *);
50 #endif
51 
52 static int isreadonly(struct vnode *);
53 static int lock_blocks_share(struct vnode *, struct shrlock *);
54 
55 /*
56  * Add the share reservation shr to vp.
57  */
58 int
59 add_share(struct vnode *vp, struct shrlock *shr)
60 {
61 	struct shrlocklist *shrl;
62 
63 	/*
64 	 * An access of zero is not legal, however some older clients
65 	 * generate it anyways.  Allow the request only if it is
66 	 * coming from a remote system.  Be generous in what you
67 	 * accept and strict in what you send.
68 	 */
69 	if ((shr->s_access == 0) && (GETSYSID(shr->s_sysid) == 0)) {
70 		return (EINVAL);
71 	}
72 
73 	/*
74 	 * Sanity check to make sure we have valid options.
75 	 * There is known overlap but it doesn't hurt to be careful.
76 	 */
77 	if (shr->s_access & ~(F_RDACC|F_WRACC|F_RWACC)) {
78 		return (EINVAL);
79 	}
80 	if (shr->s_deny & ~(F_NODNY|F_RDDNY|F_WRDNY|F_RWDNY|F_COMPAT|
81 	    F_MANDDNY)) {
82 		return (EINVAL);
83 	}
84 
85 	/*
86 	 * If the caller wants non-blocking mandatory semantics, make sure
87 	 * that there isn't already a conflicting lock.
88 	 */
89 	if (shr->s_deny & F_MANDDNY) {
90 		ASSERT(nbl_in_crit(vp));
91 		if (lock_blocks_share(vp, shr)) {
92 			return (EAGAIN);
93 		}
94 	}
95 
96 	mutex_enter(&vp->v_lock);
97 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
98 		/*
99 		 * If the share owner matches previous request
100 		 * do special handling.
101 		 */
102 		if ((shrl->shr->s_sysid == shr->s_sysid) &&
103 		    (shrl->shr->s_pid == shr->s_pid) &&
104 		    (shrl->shr->s_own_len == shr->s_own_len) &&
105 		    bcmp(shrl->shr->s_owner, shr->s_owner,
106 		    shr->s_own_len) == 0) {
107 
108 			/*
109 			 * If the existing request is F_COMPAT and
110 			 * is the first share then allow any F_COMPAT
111 			 * from the same process.  Trick:  If the existing
112 			 * F_COMPAT is write access then it must have
113 			 * the same owner as the first.
114 			 */
115 			if ((shrl->shr->s_deny & F_COMPAT) &&
116 			    (shr->s_deny & F_COMPAT) &&
117 			    ((shrl->next == NULL) ||
118 				(shrl->shr->s_access & F_WRACC)))
119 				break;
120 		}
121 
122 		/*
123 		 * If a first share has been done in compatibility mode
124 		 * handle the special cases.
125 		 */
126 		if ((shrl->shr->s_deny & F_COMPAT) && (shrl->next == NULL)) {
127 
128 			if (!(shr->s_deny & F_COMPAT)) {
129 				/*
130 				 * If not compat and want write access or
131 				 * want to deny read or
132 				 * write exists, fails
133 				 */
134 				if ((shr->s_access & F_WRACC) ||
135 				    (shr->s_deny & F_RDDNY) ||
136 				    (shrl->shr->s_access & F_WRACC)) {
137 					mutex_exit(&vp->v_lock);
138 					return (EAGAIN);
139 				}
140 				/*
141 				 * If read only file allow, this may allow
142 				 * a deny write but that is meaningless on
143 				 * a read only file.
144 				 */
145 				if (isreadonly(vp))
146 					break;
147 				mutex_exit(&vp->v_lock);
148 				return (EAGAIN);
149 			}
150 			/*
151 			 * This is a compat request and read access
152 			 * and the first was also read access
153 			 * we always allow it, otherwise we reject because
154 			 * we have handled the only valid write case above.
155 			 */
156 			if ((shr->s_access == F_RDACC) &&
157 			    (shrl->shr->s_access == F_RDACC))
158 				break;
159 			mutex_exit(&vp->v_lock);
160 			return (EAGAIN);
161 		}
162 
163 		/*
164 		 * If we are trying to share in compatibility mode
165 		 * and the current share is compat (and not the first)
166 		 * we don't know enough.
167 		 */
168 		if ((shrl->shr->s_deny & F_COMPAT) && (shr->s_deny & F_COMPAT))
169 			continue;
170 
171 		/*
172 		 * If this is a compat we check for what can't succeed.
173 		 */
174 		if (shr->s_deny & F_COMPAT) {
175 			/*
176 			 * If we want write access or
177 			 * if anyone is denying read or
178 			 * if anyone has write access we fail
179 			 */
180 			if ((shr->s_access & F_WRACC) ||
181 			    (shrl->shr->s_deny & F_RDDNY) ||
182 			    (shrl->shr->s_access & F_WRACC)) {
183 				mutex_exit(&vp->v_lock);
184 				return (EAGAIN);
185 			}
186 			/*
187 			 * If the first was opened with only read access
188 			 * and is a read only file we allow.
189 			 */
190 			if (shrl->next == NULL) {
191 				if ((shrl->shr->s_access == F_RDACC) &&
192 				    isreadonly(vp)) {
193 					break;
194 				}
195 				mutex_exit(&vp->v_lock);
196 				return (EAGAIN);
197 			}
198 			/*
199 			 * We still can't determine our fate so continue
200 			 */
201 			continue;
202 		}
203 
204 		/*
205 		 * Simple bitwise test, if we are trying to access what
206 		 * someone else is denying or we are trying to deny
207 		 * what someone else is accessing we fail.
208 		 */
209 		if ((shr->s_access & shrl->shr->s_deny) ||
210 		    (shr->s_deny & shrl->shr->s_access)) {
211 			mutex_exit(&vp->v_lock);
212 			return (EAGAIN);
213 		}
214 	}
215 
216 	shrl = kmem_alloc(sizeof (struct shrlocklist), KM_SLEEP);
217 	shrl->shr = kmem_alloc(sizeof (struct shrlock), KM_SLEEP);
218 	shrl->shr->s_access = shr->s_access;
219 	shrl->shr->s_deny = shr->s_deny;
220 
221 	/*
222 	 * Make sure no other deny modes are also set with F_COMPAT
223 	 */
224 	if (shrl->shr->s_deny & F_COMPAT)
225 		shrl->shr->s_deny = F_COMPAT;
226 	shrl->shr->s_sysid = shr->s_sysid;		/* XXX ref cnt? */
227 	shrl->shr->s_pid = shr->s_pid;
228 	shrl->shr->s_own_len = shr->s_own_len;
229 	shrl->shr->s_owner = kmem_alloc(shr->s_own_len, KM_SLEEP);
230 	bcopy(shr->s_owner, shrl->shr->s_owner, shr->s_own_len);
231 	shrl->next = vp->v_shrlocks;
232 	vp->v_shrlocks = shrl;
233 #ifdef DEBUG
234 	if (share_debug)
235 		print_shares(vp);
236 #endif
237 
238 	mutex_exit(&vp->v_lock);
239 
240 	return (0);
241 }
242 
243 /*
244  *	nlmid	sysid	pid
245  *	=====	=====	===
246  *	!=0	!=0	=0	in cluster; NLM lock
247  *	!=0	=0	=0	in cluster; special case for NLM lock
248  *	!=0	=0	!=0	in cluster; PXFS local lock
249  *	!=0	!=0	!=0	cannot happen
250  *	=0	!=0	=0	not in cluster; NLM lock
251  *	=0	=0	!=0	not in cluster; local lock
252  *	=0	=0	=0	cannot happen
253  *	=0	!=0	!=0	cannot happen
254  */
255 static int
256 is_match_for_del(struct shrlock *shr, struct shrlock *element)
257 {
258 	int nlmid1, nlmid2;
259 	int result = 0;
260 
261 	nlmid1 = GETNLMID(shr->s_sysid);
262 	nlmid2 = GETNLMID(element->s_sysid);
263 
264 	if (nlmid1 != 0) {		/* in a cluster */
265 		if (GETSYSID(shr->s_sysid) != 0 && shr->s_pid == 0) {
266 			/*
267 			 * Lock obtained through nlm server.  Just need to
268 			 * compare whole sysids.  pid will always = 0.
269 			 */
270 			result = shr->s_sysid == element->s_sysid;
271 		} else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid == 0) {
272 			/*
273 			 * This is a special case.  The NLM server wishes to
274 			 * delete all share locks obtained through nlmid1.
275 			 */
276 			result = (nlmid1 == nlmid2);
277 		} else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid != 0) {
278 			/*
279 			 * Lock obtained locally through PXFS.  Match nlmids
280 			 * and pids.
281 			 */
282 			result = (nlmid1 == nlmid2 &&
283 				shr->s_pid == element->s_pid);
284 		}
285 	} else {			/* not in a cluster */
286 		result = ((shr->s_sysid == 0 &&
287 			shr->s_pid == element->s_pid) ||
288 			(shr->s_sysid != 0 &&
289 				shr->s_sysid == element->s_sysid));
290 	}
291 	return (result);
292 }
293 
294 /*
295  * Delete the given share reservation.  Returns 0 if okay, EINVAL if the
296  * share could not be found.  If the share reservation is an NBMAND share
297  * reservation, signal anyone waiting for the share to go away (e.g.,
298  * blocking lock requests).
299  */
300 
301 int
302 del_share(struct vnode *vp, struct shrlock *shr)
303 {
304 	struct shrlocklist *shrl;
305 	struct shrlocklist **shrlp;
306 	int found = 0;
307 	int is_nbmand = 0;
308 
309 	mutex_enter(&vp->v_lock);
310 	/*
311 	 * Delete the shares with the matching sysid and owner
312 	 * But if own_len == 0 and sysid == 0 delete all with matching pid
313 	 * But if own_len == 0 delete all with matching sysid.
314 	 */
315 	shrlp = &vp->v_shrlocks;
316 	while (*shrlp) {
317 		if ((shr->s_own_len == (*shrlp)->shr->s_own_len &&
318 				    (bcmp(shr->s_owner, (*shrlp)->shr->s_owner,
319 						shr->s_own_len) == 0)) ||
320 
321 			(shr->s_own_len == 0 &&
322 				is_match_for_del(shr, (*shrlp)->shr))) {
323 
324 			shrl = *shrlp;
325 			*shrlp = shrl->next;
326 
327 			if (shrl->shr->s_deny & F_MANDDNY)
328 				is_nbmand = 1;
329 
330 			/* XXX deref sysid */
331 			kmem_free(shrl->shr->s_owner, shrl->shr->s_own_len);
332 			kmem_free(shrl->shr, sizeof (struct shrlock));
333 			kmem_free(shrl, sizeof (struct shrlocklist));
334 			found++;
335 			continue;
336 		}
337 		shrlp = &(*shrlp)->next;
338 	}
339 
340 	if (is_nbmand)
341 		cv_broadcast(&vp->v_cv);
342 
343 	mutex_exit(&vp->v_lock);
344 	return (found ? 0 : EINVAL);
345 }
346 
347 /*
348  * Clean up all local share reservations that the given process has with
349  * the given file.
350  */
351 void
352 cleanshares(struct vnode *vp, pid_t pid)
353 {
354 	struct shrlock shr;
355 
356 	if (vp->v_shrlocks == NULL)
357 		return;
358 
359 	shr.s_access = 0;
360 	shr.s_deny = 0;
361 	shr.s_pid = pid;
362 	shr.s_sysid = 0;
363 	shr.s_own_len = 0;
364 	shr.s_owner = NULL;
365 
366 	(void) del_share(vp, &shr);
367 }
368 
369 static int
370 is_match_for_has_remote(int32_t sysid1, int32_t sysid2)
371 {
372 	int result = 0;
373 
374 	if (GETNLMID(sysid1) != 0) { /* in a cluster */
375 		if (GETSYSID(sysid1) != 0) {
376 			/*
377 			 * Lock obtained through nlm server.  Just need to
378 			 * compare whole sysids.
379 			 */
380 			result = (sysid1 == sysid2);
381 		} else if (GETSYSID(sysid1) == 0) {
382 			/*
383 			 * This is a special case.  The NLM server identified
384 			 * by nlmid1 wishes to find out if it has obtained
385 			 * any share locks on the vnode.
386 			 */
387 			result = (GETNLMID(sysid1) == GETNLMID(sysid2));
388 		}
389 	} else {			/* not in a cluster */
390 		result = ((sysid1 != 0 && sysid1 == sysid2) ||
391 		    (sysid1 == 0 && sysid2 != 0));
392 	}
393 	return (result);
394 }
395 
396 
397 /*
398  * Determine whether there are any shares for the given vnode
399  * with a remote sysid. Returns zero if not, non-zero if there are.
400  * If sysid is non-zero then determine if this sysid has a share.
401  *
402  * Note that the return value from this function is potentially invalid
403  * once it has been returned.  The caller is responsible for providing its
404  * own synchronization mechanism to ensure that the return value is useful.
405  */
406 int
407 shr_has_remote_shares(vnode_t *vp, int32_t sysid)
408 {
409 	struct shrlocklist *shrl;
410 	int result = 0;
411 
412 	mutex_enter(&vp->v_lock);
413 	shrl = vp->v_shrlocks;
414 	while (shrl) {
415 		if (is_match_for_has_remote(sysid, shrl->shr->s_sysid)) {
416 
417 			result = 1;
418 			break;
419 		}
420 		shrl = shrl->next;
421 	}
422 	mutex_exit(&vp->v_lock);
423 	return (result);
424 }
425 
426 static int
427 isreadonly(struct vnode *vp)
428 {
429 	return (vp->v_type != VCHR && vp->v_type != VBLK &&
430 		vp->v_type != VFIFO && vn_is_readonly(vp));
431 }
432 
433 #ifdef DEBUG
434 static void
435 print_shares(struct vnode *vp)
436 {
437 	struct shrlocklist *shrl;
438 
439 	if (vp->v_shrlocks == NULL) {
440 		printf("<NULL>\n");
441 		return;
442 	}
443 
444 	shrl = vp->v_shrlocks;
445 	while (shrl) {
446 		print_share(shrl->shr);
447 		shrl = shrl->next;
448 	}
449 }
450 
451 static void
452 print_share(struct shrlock *shr)
453 {
454 	int i;
455 
456 	if (shr == NULL) {
457 		printf("<NULL>\n");
458 		return;
459 	}
460 
461 	printf("    access(%d):	", shr->s_access);
462 	if (shr->s_access & F_RDACC)
463 		printf("R");
464 	if (shr->s_access & F_WRACC)
465 		printf("W");
466 	if ((shr->s_access & (F_RDACC|F_WRACC)) == 0)
467 		printf("N");
468 	printf("\n");
469 	printf("    deny:	");
470 	if (shr->s_deny & F_COMPAT)
471 		printf("C");
472 	if (shr->s_deny & F_RDDNY)
473 		printf("R");
474 	if (shr->s_deny & F_WRDNY)
475 		printf("W");
476 	if (shr->s_deny == F_NODNY)
477 		printf("N");
478 	printf("\n");
479 	printf("    sysid:	%d\n", shr->s_sysid);
480 	printf("    pid:	%d\n", shr->s_pid);
481 	printf("    owner:	[%d]", shr->s_own_len);
482 	printf("'");
483 	for (i = 0; i < shr->s_own_len; i++)
484 		printf("%02x", (unsigned)shr->s_owner[i]);
485 	printf("'\n");
486 }
487 #endif
488 
489 /*
490  * Return non-zero if the given I/O request conflicts with a registered
491  * share reservation.
492  */
493 
494 int
495 nbl_share_conflict(vnode_t *vp, nbl_op_t op)
496 {
497 	struct shrlocklist *shrl;
498 	int conflict = 0;
499 
500 	ASSERT(nbl_in_crit(vp));
501 
502 	mutex_enter(&vp->v_lock);
503 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
504 		if (shrl->shr->s_sysid == 0 &&
505 		    (shrl->shr->s_deny & F_MANDDNY) &&
506 		    shrl->shr->s_pid != curproc->p_pid) {
507 			switch (op) {
508 			case NBL_READ:
509 				if (shrl->shr->s_deny & F_RDDNY)
510 					conflict = 1;
511 				break;
512 			case NBL_WRITE:
513 				if (shrl->shr->s_deny & F_WRDNY)
514 					conflict = 1;
515 				break;
516 			case NBL_READWRITE:
517 				if (shrl->shr->s_deny & F_RWDNY)
518 					conflict = 1;
519 				break;
520 			case NBL_RENAME:
521 			case NBL_REMOVE:
522 				conflict = 1;
523 				break;
524 #ifdef DEBUG
525 			default:
526 				cmn_err(CE_PANIC,
527 					"nbl_share_conflict: bogus op (%d)",
528 					op);
529 				break;
530 #endif
531 			}
532 		}
533 		if (conflict)
534 			break;
535 	}
536 
537 	mutex_exit(&vp->v_lock);
538 	return (conflict);
539 }
540 
541 /*
542  * Return non-zero if the given lock request conflicts with an existing
543  * non-blocking mandatory share reservation.
544  */
545 
546 int
547 share_blocks_lock(vnode_t *vp, flock64_t *flkp)
548 {
549 	ASSERT(nbl_in_crit(vp));
550 
551 	if ((flkp->l_type == F_RDLCK || flkp->l_type == F_WRLCK) &&
552 	    nbl_share_conflict(vp, nbl_lock_to_op(flkp->l_type)))
553 		return (1);
554 	else
555 		return (0);
556 }
557 
558 /*
559  * Wait for all share reservations to go away that block the given lock
560  * request.  Returns 0 after successfully waiting, or EINTR.
561  */
562 
563 int
564 wait_for_share(vnode_t *vp, flock64_t *flkp)
565 {
566 	int result = 0;
567 
568 	ASSERT(nbl_in_crit(vp));
569 
570 	/*
571 	 * We have to hold the vnode's lock before leaving the nbmand
572 	 * critical region, to prevent a race with the thread that deletes
573 	 * the share that's blocking us.  Then we have to drop the lock
574 	 * before reentering the critical region, to avoid a deadlock.
575 	 */
576 	while (result == 0 && share_blocks_lock(vp, flkp)) {
577 		mutex_enter(&vp->v_lock);
578 		nbl_end_crit(vp);
579 		if (cv_wait_sig(&vp->v_cv, &vp->v_lock) == 0)
580 			result = EINTR;
581 		mutex_exit(&vp->v_lock);
582 		nbl_start_crit(vp, RW_WRITER);
583 	}
584 
585 	return (result);
586 }
587 
588 /*
589  * Determine if the given share reservation conflicts with any existing
590  * locks or mapped regions for the file.  This is used to compensate for
591  * the fact that most Unix applications don't get a share reservation, so
592  * we use existing locks as an indication of what files are open.
593  *
594  * XXX needs a better name to reflect that it also looks for mapped file
595  * conflicts.
596  *
597  * Returns non-zero if there is a conflict, zero if okay.
598  */
599 
600 static int
601 lock_blocks_share(vnode_t *vp, struct shrlock *shr)
602 {
603 	struct flock64 lck;
604 	int error;
605 
606 	/*
607 	 * We don't currently have a good way to match lock
608 	 * ownership with share ownership for remote requests.
609 	 * Fortunately, we know that only local processes (in particular,
610 	 * local CIFS servers) care about conflicts between locks and
611 	 * share reservations, and we can distinguish local processes from
612 	 * each other and from remote processes.
613 	 */
614 	ASSERT(shr->s_sysid == 0);
615 
616 	if ((shr->s_deny & (F_RWDNY|F_COMPAT)) == 0) {
617 		/* if no deny mode, then there's no conflict */
618 		return (0);
619 	}
620 
621 	lck.l_type = ((shr->s_deny & F_RDDNY) ? F_WRLCK : F_RDLCK);
622 
623 	lck.l_whence = 0;
624 	lck.l_start = 0;
625 	lck.l_len = 0;			/* to EOF */
626 
627 	/* would check here for conflict with mapped region */
628 
629 	/* XXX should use non-NULL cred? */
630 	error = VOP_FRLOCK(vp, F_GETLK, &lck, 0, 0, NULL, NULL);
631 	if (error != 0) {
632 		cmn_err(CE_WARN, "lock_blocks_share: unexpected error (%d)",
633 			error);
634 		return (1);
635 	}
636 
637 	return (lck.l_type == F_UNLCK ? 0 : 1);
638 }
639 
640 /*
641  * Determine if the given process has a NBMAND share reservation on the
642  * given vnode. Returns 1 if the process has such a share reservation,
643  * returns 0 otherwise.
644  */
645 int
646 proc_has_nbmand_share_on_vp(vnode_t *vp, pid_t pid)
647 {
648 	struct shrlocklist *shrl;
649 
650 	/*
651 	 * Any NBMAND share reservation on the vp for this process?
652 	 */
653 	mutex_enter(&vp->v_lock);
654 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
655 		if (shrl->shr->s_sysid == 0 &&
656 		    (shrl->shr->s_deny & F_MANDDNY) &&
657 		    (shrl->shr->s_pid == pid)) {
658 			mutex_exit(&vp->v_lock);
659 			return (1);
660 		}
661 	}
662 	mutex_exit(&vp->v_lock);
663 
664 	return (0);
665 }
666