xref: /titanic_52/usr/src/uts/common/os/share.c (revision 53dbcc5939527e6d5d52d814e51e364b5e8bb532)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/sysmacros.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/fcntl.h>
31 #include <sys/vfs.h>
32 #include <sys/vnode.h>
33 #include <sys/share.h>
34 #include <sys/cmn_err.h>
35 #include <sys/kmem.h>
36 #include <sys/debug.h>
37 #include <sys/t_lock.h>
38 #include <sys/errno.h>
39 #include <sys/nbmlock.h>
40 
41 int share_debug = 0;
42 
43 #ifdef DEBUG
44 static void print_shares(struct vnode *);
45 static void print_share(struct shrlock *);
46 #endif
47 
48 static int isreadonly(struct vnode *);
49 
50 /*
51  * Add the share reservation shr to vp.
52  */
53 int
54 add_share(struct vnode *vp, struct shrlock *shr)
55 {
56 	struct shrlocklist *shrl;
57 
58 	/*
59 	 * An access of zero is not legal, however some older clients
60 	 * generate it anyways.  Allow the request only if it is
61 	 * coming from a remote system.  Be generous in what you
62 	 * accept and strict in what you send.
63 	 */
64 	if ((shr->s_access == 0) && (GETSYSID(shr->s_sysid) == 0)) {
65 		return (EINVAL);
66 	}
67 
68 	/*
69 	 * Sanity check to make sure we have valid options.
70 	 * There is known overlap but it doesn't hurt to be careful.
71 	 */
72 	if (shr->s_access & ~(F_RDACC|F_WRACC|F_RWACC|F_RMACC|F_MDACC)) {
73 		return (EINVAL);
74 	}
75 	if (shr->s_deny & ~(F_NODNY|F_RDDNY|F_WRDNY|F_RWDNY|F_COMPAT|
76 	    F_MANDDNY|F_RMDNY)) {
77 		return (EINVAL);
78 	}
79 
80 	mutex_enter(&vp->v_lock);
81 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
82 		/*
83 		 * If the share owner matches previous request
84 		 * do special handling.
85 		 */
86 		if ((shrl->shr->s_sysid == shr->s_sysid) &&
87 		    (shrl->shr->s_pid == shr->s_pid) &&
88 		    (shrl->shr->s_own_len == shr->s_own_len) &&
89 		    bcmp(shrl->shr->s_owner, shr->s_owner,
90 		    shr->s_own_len) == 0) {
91 
92 			/*
93 			 * If the existing request is F_COMPAT and
94 			 * is the first share then allow any F_COMPAT
95 			 * from the same process.  Trick:  If the existing
96 			 * F_COMPAT is write access then it must have
97 			 * the same owner as the first.
98 			 */
99 			if ((shrl->shr->s_deny & F_COMPAT) &&
100 			    (shr->s_deny & F_COMPAT) &&
101 			    ((shrl->next == NULL) ||
102 			    (shrl->shr->s_access & F_WRACC)))
103 				break;
104 		}
105 
106 		/*
107 		 * If a first share has been done in compatibility mode
108 		 * handle the special cases.
109 		 */
110 		if ((shrl->shr->s_deny & F_COMPAT) && (shrl->next == NULL)) {
111 
112 			if (!(shr->s_deny & F_COMPAT)) {
113 				/*
114 				 * If not compat and want write access or
115 				 * want to deny read or
116 				 * write exists, fails
117 				 */
118 				if ((shr->s_access & F_WRACC) ||
119 				    (shr->s_deny & F_RDDNY) ||
120 				    (shrl->shr->s_access & F_WRACC)) {
121 					mutex_exit(&vp->v_lock);
122 					return (EAGAIN);
123 				}
124 				/*
125 				 * If read only file allow, this may allow
126 				 * a deny write but that is meaningless on
127 				 * a read only file.
128 				 */
129 				if (isreadonly(vp))
130 					break;
131 				mutex_exit(&vp->v_lock);
132 				return (EAGAIN);
133 			}
134 			/*
135 			 * This is a compat request and read access
136 			 * and the first was also read access
137 			 * we always allow it, otherwise we reject because
138 			 * we have handled the only valid write case above.
139 			 */
140 			if ((shr->s_access == F_RDACC) &&
141 			    (shrl->shr->s_access == F_RDACC))
142 				break;
143 			mutex_exit(&vp->v_lock);
144 			return (EAGAIN);
145 		}
146 
147 		/*
148 		 * If we are trying to share in compatibility mode
149 		 * and the current share is compat (and not the first)
150 		 * we don't know enough.
151 		 */
152 		if ((shrl->shr->s_deny & F_COMPAT) && (shr->s_deny & F_COMPAT))
153 			continue;
154 
155 		/*
156 		 * If this is a compat we check for what can't succeed.
157 		 */
158 		if (shr->s_deny & F_COMPAT) {
159 			/*
160 			 * If we want write access or
161 			 * if anyone is denying read or
162 			 * if anyone has write access we fail
163 			 */
164 			if ((shr->s_access & F_WRACC) ||
165 			    (shrl->shr->s_deny & F_RDDNY) ||
166 			    (shrl->shr->s_access & F_WRACC)) {
167 				mutex_exit(&vp->v_lock);
168 				return (EAGAIN);
169 			}
170 			/*
171 			 * If the first was opened with only read access
172 			 * and is a read only file we allow.
173 			 */
174 			if (shrl->next == NULL) {
175 				if ((shrl->shr->s_access == F_RDACC) &&
176 				    isreadonly(vp)) {
177 					break;
178 				}
179 				mutex_exit(&vp->v_lock);
180 				return (EAGAIN);
181 			}
182 			/*
183 			 * We still can't determine our fate so continue
184 			 */
185 			continue;
186 		}
187 
188 		/*
189 		 * Simple bitwise test, if we are trying to access what
190 		 * someone else is denying or we are trying to deny
191 		 * what someone else is accessing we fail.
192 		 */
193 		if ((shr->s_access & shrl->shr->s_deny) ||
194 		    (shr->s_deny & shrl->shr->s_access)) {
195 			mutex_exit(&vp->v_lock);
196 			return (EAGAIN);
197 		}
198 	}
199 
200 	shrl = kmem_alloc(sizeof (struct shrlocklist), KM_SLEEP);
201 	shrl->shr = kmem_alloc(sizeof (struct shrlock), KM_SLEEP);
202 	shrl->shr->s_access = shr->s_access;
203 	shrl->shr->s_deny = shr->s_deny;
204 
205 	/*
206 	 * Make sure no other deny modes are also set with F_COMPAT
207 	 */
208 	if (shrl->shr->s_deny & F_COMPAT)
209 		shrl->shr->s_deny = F_COMPAT;
210 	shrl->shr->s_sysid = shr->s_sysid;		/* XXX ref cnt? */
211 	shrl->shr->s_pid = shr->s_pid;
212 	shrl->shr->s_own_len = shr->s_own_len;
213 	shrl->shr->s_owner = kmem_alloc(shr->s_own_len, KM_SLEEP);
214 	bcopy(shr->s_owner, shrl->shr->s_owner, shr->s_own_len);
215 	shrl->next = vp->v_shrlocks;
216 	vp->v_shrlocks = shrl;
217 #ifdef DEBUG
218 	if (share_debug)
219 		print_shares(vp);
220 #endif
221 
222 	mutex_exit(&vp->v_lock);
223 
224 	return (0);
225 }
226 
227 /*
228  *	nlmid	sysid	pid
229  *	=====	=====	===
230  *	!=0	!=0	=0	in cluster; NLM lock
231  *	!=0	=0	=0	in cluster; special case for NLM lock
232  *	!=0	=0	!=0	in cluster; PXFS local lock
233  *	!=0	!=0	!=0	cannot happen
234  *	=0	!=0	=0	not in cluster; NLM lock
235  *	=0	=0	!=0	not in cluster; local lock
236  *	=0	=0	=0	cannot happen
237  *	=0	!=0	!=0	cannot happen
238  */
239 static int
240 is_match_for_del(struct shrlock *shr, struct shrlock *element)
241 {
242 	int nlmid1, nlmid2;
243 	int result = 0;
244 
245 	nlmid1 = GETNLMID(shr->s_sysid);
246 	nlmid2 = GETNLMID(element->s_sysid);
247 
248 	if (nlmid1 != 0) {		/* in a cluster */
249 		if (GETSYSID(shr->s_sysid) != 0 && shr->s_pid == 0) {
250 			/*
251 			 * Lock obtained through nlm server.  Just need to
252 			 * compare whole sysids.  pid will always = 0.
253 			 */
254 			result = shr->s_sysid == element->s_sysid;
255 		} else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid == 0) {
256 			/*
257 			 * This is a special case.  The NLM server wishes to
258 			 * delete all share locks obtained through nlmid1.
259 			 */
260 			result = (nlmid1 == nlmid2);
261 		} else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid != 0) {
262 			/*
263 			 * Lock obtained locally through PXFS.  Match nlmids
264 			 * and pids.
265 			 */
266 			result = (nlmid1 == nlmid2 &&
267 			    shr->s_pid == element->s_pid);
268 		}
269 	} else {			/* not in a cluster */
270 		result = ((shr->s_sysid == 0 &&
271 		    shr->s_pid == element->s_pid) ||
272 		    (shr->s_sysid != 0 &&
273 		    shr->s_sysid == element->s_sysid));
274 	}
275 	return (result);
276 }
277 
278 /*
279  * Delete the given share reservation.  Returns 0 if okay, EINVAL if the
280  * share could not be found.  If the share reservation is an NBMAND share
281  * reservation, signal anyone waiting for the share to go away (e.g.,
282  * blocking lock requests).
283  */
284 
285 int
286 del_share(struct vnode *vp, struct shrlock *shr)
287 {
288 	struct shrlocklist *shrl;
289 	struct shrlocklist **shrlp;
290 	int found = 0;
291 	int is_nbmand = 0;
292 
293 	mutex_enter(&vp->v_lock);
294 	/*
295 	 * Delete the shares with the matching sysid and owner
296 	 * But if own_len == 0 and sysid == 0 delete all with matching pid
297 	 * But if own_len == 0 delete all with matching sysid.
298 	 */
299 	shrlp = &vp->v_shrlocks;
300 	while (*shrlp) {
301 		if ((shr->s_own_len == (*shrlp)->shr->s_own_len &&
302 		    (bcmp(shr->s_owner, (*shrlp)->shr->s_owner,
303 		    shr->s_own_len) == 0)) ||
304 
305 		    (shr->s_own_len == 0 &&
306 		    is_match_for_del(shr, (*shrlp)->shr))) {
307 
308 			shrl = *shrlp;
309 			*shrlp = shrl->next;
310 
311 			if (shrl->shr->s_deny & F_MANDDNY)
312 				is_nbmand = 1;
313 
314 			/* XXX deref sysid */
315 			kmem_free(shrl->shr->s_owner, shrl->shr->s_own_len);
316 			kmem_free(shrl->shr, sizeof (struct shrlock));
317 			kmem_free(shrl, sizeof (struct shrlocklist));
318 			found++;
319 			continue;
320 		}
321 		shrlp = &(*shrlp)->next;
322 	}
323 
324 	if (is_nbmand)
325 		cv_broadcast(&vp->v_cv);
326 
327 	mutex_exit(&vp->v_lock);
328 	return (found ? 0 : EINVAL);
329 }
330 
331 /*
332  * Clean up all local share reservations that the given process has with
333  * the given file.
334  */
335 void
336 cleanshares(struct vnode *vp, pid_t pid)
337 {
338 	struct shrlock shr;
339 
340 	if (vp->v_shrlocks == NULL)
341 		return;
342 
343 	shr.s_access = 0;
344 	shr.s_deny = 0;
345 	shr.s_pid = pid;
346 	shr.s_sysid = 0;
347 	shr.s_own_len = 0;
348 	shr.s_owner = NULL;
349 
350 	(void) del_share(vp, &shr);
351 }
352 
353 static int
354 is_match_for_has_remote(int32_t sysid1, int32_t sysid2)
355 {
356 	int result = 0;
357 
358 	if (GETNLMID(sysid1) != 0) { /* in a cluster */
359 		if (GETSYSID(sysid1) != 0) {
360 			/*
361 			 * Lock obtained through nlm server.  Just need to
362 			 * compare whole sysids.
363 			 */
364 			result = (sysid1 == sysid2);
365 		} else if (GETSYSID(sysid1) == 0) {
366 			/*
367 			 * This is a special case.  The NLM server identified
368 			 * by nlmid1 wishes to find out if it has obtained
369 			 * any share locks on the vnode.
370 			 */
371 			result = (GETNLMID(sysid1) == GETNLMID(sysid2));
372 		}
373 	} else {			/* not in a cluster */
374 		result = ((sysid1 != 0 && sysid1 == sysid2) ||
375 		    (sysid1 == 0 && sysid2 != 0));
376 	}
377 	return (result);
378 }
379 
380 
381 /*
382  * Determine whether there are any shares for the given vnode
383  * with a remote sysid. Returns zero if not, non-zero if there are.
384  * If sysid is non-zero then determine if this sysid has a share.
385  *
386  * Note that the return value from this function is potentially invalid
387  * once it has been returned.  The caller is responsible for providing its
388  * own synchronization mechanism to ensure that the return value is useful.
389  */
390 int
391 shr_has_remote_shares(vnode_t *vp, int32_t sysid)
392 {
393 	struct shrlocklist *shrl;
394 	int result = 0;
395 
396 	mutex_enter(&vp->v_lock);
397 	shrl = vp->v_shrlocks;
398 	while (shrl) {
399 		if (is_match_for_has_remote(sysid, shrl->shr->s_sysid)) {
400 
401 			result = 1;
402 			break;
403 		}
404 		shrl = shrl->next;
405 	}
406 	mutex_exit(&vp->v_lock);
407 	return (result);
408 }
409 
410 static int
411 isreadonly(struct vnode *vp)
412 {
413 	return (vp->v_type != VCHR && vp->v_type != VBLK &&
414 	    vp->v_type != VFIFO && vn_is_readonly(vp));
415 }
416 
417 #ifdef DEBUG
418 static void
419 print_shares(struct vnode *vp)
420 {
421 	struct shrlocklist *shrl;
422 
423 	if (vp->v_shrlocks == NULL) {
424 		printf("<NULL>\n");
425 		return;
426 	}
427 
428 	shrl = vp->v_shrlocks;
429 	while (shrl) {
430 		print_share(shrl->shr);
431 		shrl = shrl->next;
432 	}
433 }
434 
435 static void
436 print_share(struct shrlock *shr)
437 {
438 	int i;
439 
440 	if (shr == NULL) {
441 		printf("<NULL>\n");
442 		return;
443 	}
444 
445 	printf("    access(%d):	", shr->s_access);
446 	if (shr->s_access & F_RDACC)
447 		printf("R");
448 	if (shr->s_access & F_WRACC)
449 		printf("W");
450 	if ((shr->s_access & (F_RDACC|F_WRACC)) == 0)
451 		printf("N");
452 	printf("\n");
453 	printf("    deny:	");
454 	if (shr->s_deny & F_COMPAT)
455 		printf("C");
456 	if (shr->s_deny & F_RDDNY)
457 		printf("R");
458 	if (shr->s_deny & F_WRDNY)
459 		printf("W");
460 	if (shr->s_deny == F_NODNY)
461 		printf("N");
462 	printf("\n");
463 	printf("    sysid:	%d\n", shr->s_sysid);
464 	printf("    pid:	%d\n", shr->s_pid);
465 	printf("    owner:	[%d]", shr->s_own_len);
466 	printf("'");
467 	for (i = 0; i < shr->s_own_len; i++)
468 		printf("%02x", (unsigned)shr->s_owner[i]);
469 	printf("'\n");
470 }
471 #endif
472 
473 /*
474  * Return non-zero if the given I/O request conflicts with a registered
475  * share reservation.
476  *
477  * A process is identified by the tuple (sysid, pid). When the caller
478  * context is passed to nbl_share_conflict, the sysid and pid in the
479  * caller context are used. Otherwise the sysid is zero, and the pid is
480  * taken from the current process.
481  *
482  * Conflict Algorithm:
483  *   1. An op request of NBL_READ will fail if a different
484  *      process has a mandatory share reservation with deny read.
485  *
486  *   2. An op request of NBL_WRITE will fail if a different
487  *      process has a mandatory share reservation with deny write.
488  *
489  *   3. An op request of NBL_READWRITE will fail if a different
490  *      process has a mandatory share reservation with deny read
491  *      or deny write.
492  *
493  *   4. An op request of NBL_REMOVE will fail if there is
494  *      a mandatory share reservation with an access of read,
495  *      write, or remove. (Anything other than meta data access).
496  *
497  *   5. An op request of NBL_RENAME will fail if there is
498  *      a mandatory share reservation with:
499  *        a) access write or access remove
500  *      or
501  *        b) access read and deny remove
502  *
503  *   Otherwise there is no conflict and the op request succeeds.
504  *
505  * This behavior is required for interoperability between
506  * the nfs server, cifs server, and local access.
507  * This behavior can result in non-posix semantics.
508  *
509  * When mandatory share reservations are enabled, a process
510  * should call nbl_share_conflict to determine if the
511  * desired operation would conflict with an existing share
512  * reservation.
513  *
514  * The call to nbl_share_conflict may be skipped if the
515  * process has an existing share reservation and the operation
516  * is being performed in the context of that existing share
517  * reservation.
518  */
519 int
520 nbl_share_conflict(vnode_t *vp, nbl_op_t op, caller_context_t *ct)
521 {
522 	struct shrlocklist *shrl;
523 	int conflict = 0;
524 	pid_t pid;
525 	int sysid;
526 
527 	ASSERT(nbl_in_crit(vp));
528 
529 	if (ct == NULL) {
530 		pid = curproc->p_pid;
531 		sysid = 0;
532 	} else {
533 		pid = ct->cc_pid;
534 		sysid = ct->cc_sysid;
535 	}
536 
537 	mutex_enter(&vp->v_lock);
538 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
539 		if (!(shrl->shr->s_deny & F_MANDDNY))
540 			continue;
541 		/*
542 		 * NBL_READ, NBL_WRITE, and NBL_READWRITE need to
543 		 * check if the share reservation being examined
544 		 * belongs to the current process.
545 		 * NBL_REMOVE and NBL_RENAME do not.
546 		 * This behavior is required by the conflict
547 		 * algorithm described above.
548 		 */
549 		switch (op) {
550 		case NBL_READ:
551 			if ((shrl->shr->s_deny & F_RDDNY) &&
552 			    (shrl->shr->s_sysid != sysid ||
553 			    shrl->shr->s_pid != pid))
554 				conflict = 1;
555 			break;
556 		case NBL_WRITE:
557 			if ((shrl->shr->s_deny & F_WRDNY) &&
558 			    (shrl->shr->s_sysid != sysid ||
559 			    shrl->shr->s_pid != pid))
560 				conflict = 1;
561 			break;
562 		case NBL_READWRITE:
563 			if ((shrl->shr->s_deny & F_RWDNY) &&
564 			    (shrl->shr->s_sysid != sysid ||
565 			    shrl->shr->s_pid != pid))
566 				conflict = 1;
567 			break;
568 		case NBL_REMOVE:
569 			if (shrl->shr->s_access & (F_RWACC|F_RMACC))
570 				conflict = 1;
571 			break;
572 		case NBL_RENAME:
573 			if (shrl->shr->s_access & (F_WRACC|F_RMACC))
574 				conflict = 1;
575 
576 			else if ((shrl->shr->s_access & F_RDACC) &&
577 			    (shrl->shr->s_deny & F_RMDNY))
578 				conflict = 1;
579 			break;
580 #ifdef DEBUG
581 		default:
582 			cmn_err(CE_PANIC,
583 			    "nbl_share_conflict: bogus op (%d)",
584 			    op);
585 			break;
586 #endif
587 		}
588 		if (conflict)
589 			break;
590 	}
591 
592 	mutex_exit(&vp->v_lock);
593 	return (conflict);
594 }
595 
596 /*
597  * Determine if the given process has a NBMAND share reservation on the
598  * given vnode. Returns 1 if the process has such a share reservation,
599  * returns 0 otherwise.
600  */
601 int
602 proc_has_nbmand_share_on_vp(vnode_t *vp, pid_t pid)
603 {
604 	struct shrlocklist *shrl;
605 
606 	/*
607 	 * Any NBMAND share reservation on the vp for this process?
608 	 */
609 	mutex_enter(&vp->v_lock);
610 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
611 		if (shrl->shr->s_sysid == 0 &&
612 		    (shrl->shr->s_deny & F_MANDDNY) &&
613 		    (shrl->shr->s_pid == pid)) {
614 			mutex_exit(&vp->v_lock);
615 			return (1);
616 		}
617 	}
618 	mutex_exit(&vp->v_lock);
619 
620 	return (0);
621 }
622