1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
28 */
29
30 #include <sys/types.h>
31 #include <sys/sysmacros.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/fcntl.h>
35 #include <sys/vfs.h>
36 #include <sys/vnode.h>
37 #include <sys/share.h>
38 #include <sys/cmn_err.h>
39 #include <sys/kmem.h>
40 #include <sys/debug.h>
41 #include <sys/t_lock.h>
42 #include <sys/errno.h>
43 #include <sys/nbmlock.h>
44
45 int share_debug = 0;
46
47 #ifdef DEBUG
48 static void print_shares(struct vnode *);
49 static void print_share(struct shrlock *);
50 #endif
51
52 static int isreadonly(struct vnode *);
53 static void do_cleanshares(struct vnode *, pid_t, int32_t);
54
55
56 /*
57 * Add the share reservation shr to vp.
58 */
59 int
add_share(struct vnode * vp,struct shrlock * shr)60 add_share(struct vnode *vp, struct shrlock *shr)
61 {
62 struct shrlocklist *shrl;
63
64 /*
65 * An access of zero is not legal, however some older clients
66 * generate it anyways. Allow the request only if it is
67 * coming from a remote system. Be generous in what you
68 * accept and strict in what you send.
69 */
70 if ((shr->s_access == 0) && (GETSYSID(shr->s_sysid) == 0)) {
71 return (EINVAL);
72 }
73
74 /*
75 * Sanity check to make sure we have valid options.
76 * There is known overlap but it doesn't hurt to be careful.
77 */
78 if (shr->s_access & ~(F_RDACC|F_WRACC|F_RWACC|F_RMACC|F_MDACC)) {
79 return (EINVAL);
80 }
81 if (shr->s_deny & ~(F_NODNY|F_RDDNY|F_WRDNY|F_RWDNY|F_COMPAT|
82 F_MANDDNY|F_RMDNY)) {
83 return (EINVAL);
84 }
85
86 mutex_enter(&vp->v_lock);
87 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
88 /*
89 * If the share owner matches previous request
90 * do special handling.
91 */
92 if ((shrl->shr->s_sysid == shr->s_sysid) &&
93 (shrl->shr->s_pid == shr->s_pid) &&
94 (shrl->shr->s_own_len == shr->s_own_len) &&
95 bcmp(shrl->shr->s_owner, shr->s_owner,
96 shr->s_own_len) == 0) {
97
98 /*
99 * If the existing request is F_COMPAT and
100 * is the first share then allow any F_COMPAT
101 * from the same process. Trick: If the existing
102 * F_COMPAT is write access then it must have
103 * the same owner as the first.
104 */
105 if ((shrl->shr->s_deny & F_COMPAT) &&
106 (shr->s_deny & F_COMPAT) &&
107 ((shrl->next == NULL) ||
108 (shrl->shr->s_access & F_WRACC)))
109 break;
110 }
111
112 /*
113 * If a first share has been done in compatibility mode
114 * handle the special cases.
115 */
116 if ((shrl->shr->s_deny & F_COMPAT) && (shrl->next == NULL)) {
117
118 if (!(shr->s_deny & F_COMPAT)) {
119 /*
120 * If not compat and want write access or
121 * want to deny read or
122 * write exists, fails
123 */
124 if ((shr->s_access & F_WRACC) ||
125 (shr->s_deny & F_RDDNY) ||
126 (shrl->shr->s_access & F_WRACC)) {
127 mutex_exit(&vp->v_lock);
128 return (EAGAIN);
129 }
130 /*
131 * If read only file allow, this may allow
132 * a deny write but that is meaningless on
133 * a read only file.
134 */
135 if (isreadonly(vp))
136 break;
137 mutex_exit(&vp->v_lock);
138 return (EAGAIN);
139 }
140 /*
141 * This is a compat request and read access
142 * and the first was also read access
143 * we always allow it, otherwise we reject because
144 * we have handled the only valid write case above.
145 */
146 if ((shr->s_access == F_RDACC) &&
147 (shrl->shr->s_access == F_RDACC))
148 break;
149 mutex_exit(&vp->v_lock);
150 return (EAGAIN);
151 }
152
153 /*
154 * If we are trying to share in compatibility mode
155 * and the current share is compat (and not the first)
156 * we don't know enough.
157 */
158 if ((shrl->shr->s_deny & F_COMPAT) && (shr->s_deny & F_COMPAT))
159 continue;
160
161 /*
162 * If this is a compat we check for what can't succeed.
163 */
164 if (shr->s_deny & F_COMPAT) {
165 /*
166 * If we want write access or
167 * if anyone is denying read or
168 * if anyone has write access we fail
169 */
170 if ((shr->s_access & F_WRACC) ||
171 (shrl->shr->s_deny & F_RDDNY) ||
172 (shrl->shr->s_access & F_WRACC)) {
173 mutex_exit(&vp->v_lock);
174 return (EAGAIN);
175 }
176 /*
177 * If the first was opened with only read access
178 * and is a read only file we allow.
179 */
180 if (shrl->next == NULL) {
181 if ((shrl->shr->s_access == F_RDACC) &&
182 isreadonly(vp)) {
183 break;
184 }
185 mutex_exit(&vp->v_lock);
186 return (EAGAIN);
187 }
188 /*
189 * We still can't determine our fate so continue
190 */
191 continue;
192 }
193
194 /*
195 * Simple bitwise test, if we are trying to access what
196 * someone else is denying or we are trying to deny
197 * what someone else is accessing we fail.
198 */
199 if ((shr->s_access & shrl->shr->s_deny) ||
200 (shr->s_deny & shrl->shr->s_access)) {
201 mutex_exit(&vp->v_lock);
202 return (EAGAIN);
203 }
204 }
205
206 shrl = kmem_alloc(sizeof (struct shrlocklist), KM_SLEEP);
207 shrl->shr = kmem_alloc(sizeof (struct shrlock), KM_SLEEP);
208 shrl->shr->s_access = shr->s_access;
209 shrl->shr->s_deny = shr->s_deny;
210
211 /*
212 * Make sure no other deny modes are also set with F_COMPAT
213 */
214 if (shrl->shr->s_deny & F_COMPAT)
215 shrl->shr->s_deny = F_COMPAT;
216 shrl->shr->s_sysid = shr->s_sysid; /* XXX ref cnt? */
217 shrl->shr->s_pid = shr->s_pid;
218 shrl->shr->s_own_len = shr->s_own_len;
219 shrl->shr->s_owner = kmem_alloc(shr->s_own_len, KM_SLEEP);
220 bcopy(shr->s_owner, shrl->shr->s_owner, shr->s_own_len);
221 shrl->next = vp->v_shrlocks;
222 vp->v_shrlocks = shrl;
223 #ifdef DEBUG
224 if (share_debug)
225 print_shares(vp);
226 #endif
227
228 mutex_exit(&vp->v_lock);
229
230 return (0);
231 }
232
233 /*
234 * nlmid sysid pid
235 * ===== ===== ===
236 * !=0 !=0 =0 in cluster; NLM lock
237 * !=0 =0 =0 in cluster; special case for NLM lock
238 * !=0 =0 !=0 in cluster; PXFS local lock
239 * !=0 !=0 !=0 cannot happen
240 * =0 !=0 =0 not in cluster; NLM lock
241 * =0 =0 !=0 not in cluster; local lock
242 * =0 =0 =0 cannot happen
243 * =0 !=0 !=0 cannot happen
244 */
245 static int
is_match_for_del(struct shrlock * shr,struct shrlock * element)246 is_match_for_del(struct shrlock *shr, struct shrlock *element)
247 {
248 int nlmid1, nlmid2;
249 int result = 0;
250
251 nlmid1 = GETNLMID(shr->s_sysid);
252 nlmid2 = GETNLMID(element->s_sysid);
253
254 if (nlmid1 != 0) { /* in a cluster */
255 if (GETSYSID(shr->s_sysid) != 0 && shr->s_pid == 0) {
256 /*
257 * Lock obtained through nlm server. Just need to
258 * compare whole sysids. pid will always = 0.
259 */
260 result = shr->s_sysid == element->s_sysid;
261 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid == 0) {
262 /*
263 * This is a special case. The NLM server wishes to
264 * delete all share locks obtained through nlmid1.
265 */
266 result = (nlmid1 == nlmid2);
267 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid != 0) {
268 /*
269 * Lock obtained locally through PXFS. Match nlmids
270 * and pids.
271 */
272 result = (nlmid1 == nlmid2 &&
273 shr->s_pid == element->s_pid);
274 }
275 } else { /* not in a cluster */
276 result = ((shr->s_sysid == 0 &&
277 shr->s_pid == element->s_pid) ||
278 (shr->s_sysid != 0 &&
279 shr->s_sysid == element->s_sysid));
280 }
281 return (result);
282 }
283
284 /*
285 * Delete the given share reservation. Returns 0 if okay, EINVAL if the
286 * share could not be found. If the share reservation is an NBMAND share
287 * reservation, signal anyone waiting for the share to go away (e.g.,
288 * blocking lock requests).
289 */
290
291 int
del_share(struct vnode * vp,struct shrlock * shr)292 del_share(struct vnode *vp, struct shrlock *shr)
293 {
294 struct shrlocklist *shrl;
295 struct shrlocklist **shrlp;
296 int found = 0;
297 int is_nbmand = 0;
298
299 mutex_enter(&vp->v_lock);
300 /*
301 * Delete the shares with the matching sysid and owner
302 * But if own_len == 0 and sysid == 0 delete all with matching pid
303 * But if own_len == 0 delete all with matching sysid.
304 */
305 shrlp = &vp->v_shrlocks;
306 while (*shrlp) {
307 if ((shr->s_own_len == (*shrlp)->shr->s_own_len &&
308 (bcmp(shr->s_owner, (*shrlp)->shr->s_owner,
309 shr->s_own_len) == 0)) ||
310
311 (shr->s_own_len == 0 &&
312 is_match_for_del(shr, (*shrlp)->shr))) {
313
314 shrl = *shrlp;
315 *shrlp = shrl->next;
316
317 if (shrl->shr->s_deny & F_MANDDNY)
318 is_nbmand = 1;
319
320 /* XXX deref sysid */
321 kmem_free(shrl->shr->s_owner, shrl->shr->s_own_len);
322 kmem_free(shrl->shr, sizeof (struct shrlock));
323 kmem_free(shrl, sizeof (struct shrlocklist));
324 found++;
325 continue;
326 }
327 shrlp = &(*shrlp)->next;
328 }
329
330 if (is_nbmand)
331 cv_broadcast(&vp->v_cv);
332
333 mutex_exit(&vp->v_lock);
334 return (found ? 0 : EINVAL);
335 }
336
337 /*
338 * Clean up all local share reservations that the given process has with
339 * the given file.
340 */
341 void
cleanshares(struct vnode * vp,pid_t pid)342 cleanshares(struct vnode *vp, pid_t pid)
343 {
344 do_cleanshares(vp, pid, 0);
345 }
346
347 /*
348 * Cleanup all remote share reservations that
349 * were made by the given sysid on given vnode.
350 */
351 void
cleanshares_by_sysid(struct vnode * vp,int32_t sysid)352 cleanshares_by_sysid(struct vnode *vp, int32_t sysid)
353 {
354 if (sysid == 0)
355 return;
356
357 do_cleanshares(vp, 0, sysid);
358 }
359
360 /*
361 * Cleanup share reservations on given vnode made
362 * by the either given pid or sysid.
363 * If sysid is 0, remove all shares made by given pid,
364 * otherwise all shares made by the given sysid will
365 * be removed.
366 */
367 static void
do_cleanshares(struct vnode * vp,pid_t pid,int32_t sysid)368 do_cleanshares(struct vnode *vp, pid_t pid, int32_t sysid)
369 {
370 struct shrlock shr;
371
372 if (vp->v_shrlocks == NULL)
373 return;
374
375 shr.s_access = 0;
376 shr.s_deny = 0;
377 shr.s_pid = pid;
378 shr.s_sysid = sysid;
379 shr.s_own_len = 0;
380 shr.s_owner = NULL;
381
382 (void) del_share(vp, &shr);
383 }
384
385 static int
is_match_for_has_remote(int32_t sysid1,int32_t sysid2)386 is_match_for_has_remote(int32_t sysid1, int32_t sysid2)
387 {
388 int result = 0;
389
390 if (GETNLMID(sysid1) != 0) { /* in a cluster */
391 if (GETSYSID(sysid1) != 0) {
392 /*
393 * Lock obtained through nlm server. Just need to
394 * compare whole sysids.
395 */
396 result = (sysid1 == sysid2);
397 } else if (GETSYSID(sysid1) == 0) {
398 /*
399 * This is a special case. The NLM server identified
400 * by nlmid1 wishes to find out if it has obtained
401 * any share locks on the vnode.
402 */
403 result = (GETNLMID(sysid1) == GETNLMID(sysid2));
404 }
405 } else { /* not in a cluster */
406 result = ((sysid1 != 0 && sysid1 == sysid2) ||
407 (sysid1 == 0 && sysid2 != 0));
408 }
409 return (result);
410 }
411
412
413 /*
414 * Determine whether there are any shares for the given vnode
415 * with a remote sysid. Returns zero if not, non-zero if there are.
416 * If sysid is non-zero then determine if this sysid has a share.
417 *
418 * Note that the return value from this function is potentially invalid
419 * once it has been returned. The caller is responsible for providing its
420 * own synchronization mechanism to ensure that the return value is useful.
421 */
422 int
shr_has_remote_shares(vnode_t * vp,int32_t sysid)423 shr_has_remote_shares(vnode_t *vp, int32_t sysid)
424 {
425 struct shrlocklist *shrl;
426 int result = 0;
427
428 mutex_enter(&vp->v_lock);
429 shrl = vp->v_shrlocks;
430 while (shrl) {
431 if (is_match_for_has_remote(sysid, shrl->shr->s_sysid)) {
432
433 result = 1;
434 break;
435 }
436 shrl = shrl->next;
437 }
438 mutex_exit(&vp->v_lock);
439 return (result);
440 }
441
442 static int
isreadonly(struct vnode * vp)443 isreadonly(struct vnode *vp)
444 {
445 return (vp->v_type != VCHR && vp->v_type != VBLK &&
446 vp->v_type != VFIFO && vn_is_readonly(vp));
447 }
448
449 #ifdef DEBUG
450 static void
print_shares(struct vnode * vp)451 print_shares(struct vnode *vp)
452 {
453 struct shrlocklist *shrl;
454
455 if (vp->v_shrlocks == NULL) {
456 printf("<NULL>\n");
457 return;
458 }
459
460 shrl = vp->v_shrlocks;
461 while (shrl) {
462 print_share(shrl->shr);
463 shrl = shrl->next;
464 }
465 }
466
467 static void
print_share(struct shrlock * shr)468 print_share(struct shrlock *shr)
469 {
470 int i;
471
472 if (shr == NULL) {
473 printf("<NULL>\n");
474 return;
475 }
476
477 printf(" access(%d): ", shr->s_access);
478 if (shr->s_access & F_RDACC)
479 printf("R");
480 if (shr->s_access & F_WRACC)
481 printf("W");
482 if ((shr->s_access & (F_RDACC|F_WRACC)) == 0)
483 printf("N");
484 printf("\n");
485 printf(" deny: ");
486 if (shr->s_deny & F_COMPAT)
487 printf("C");
488 if (shr->s_deny & F_RDDNY)
489 printf("R");
490 if (shr->s_deny & F_WRDNY)
491 printf("W");
492 if (shr->s_deny == F_NODNY)
493 printf("N");
494 printf("\n");
495 printf(" sysid: %d\n", shr->s_sysid);
496 printf(" pid: %d\n", shr->s_pid);
497 printf(" owner: [%d]", shr->s_own_len);
498 printf("'");
499 for (i = 0; i < shr->s_own_len; i++)
500 printf("%02x", (unsigned)shr->s_owner[i]);
501 printf("'\n");
502 }
503 #endif
504
505 /*
506 * Return non-zero if the given I/O request conflicts with a registered
507 * share reservation.
508 *
509 * A process is identified by the tuple (sysid, pid). When the caller
510 * context is passed to nbl_share_conflict, the sysid and pid in the
511 * caller context are used. Otherwise the sysid is zero, and the pid is
512 * taken from the current process.
513 *
514 * Conflict Algorithm:
515 * 1. An op request of NBL_READ will fail if a different
516 * process has a mandatory share reservation with deny read.
517 *
518 * 2. An op request of NBL_WRITE will fail if a different
519 * process has a mandatory share reservation with deny write.
520 *
521 * 3. An op request of NBL_READWRITE will fail if a different
522 * process has a mandatory share reservation with deny read
523 * or deny write.
524 *
525 * 4. An op request of NBL_REMOVE will fail if there is
526 * a mandatory share reservation with an access of read,
527 * write, or remove. (Anything other than meta data access).
528 *
529 * 5. An op request of NBL_RENAME will fail if there is
530 * a mandatory share reservation with:
531 * a) access write or access remove
532 * or
533 * b) access read and deny remove
534 *
535 * Otherwise there is no conflict and the op request succeeds.
536 *
537 * This behavior is required for interoperability between
538 * the nfs server, cifs server, and local access.
539 * This behavior can result in non-posix semantics.
540 *
541 * When mandatory share reservations are enabled, a process
542 * should call nbl_share_conflict to determine if the
543 * desired operation would conflict with an existing share
544 * reservation.
545 *
546 * The call to nbl_share_conflict may be skipped if the
547 * process has an existing share reservation and the operation
548 * is being performed in the context of that existing share
549 * reservation.
550 */
551 int
nbl_share_conflict(vnode_t * vp,nbl_op_t op,caller_context_t * ct)552 nbl_share_conflict(vnode_t *vp, nbl_op_t op, caller_context_t *ct)
553 {
554 struct shrlocklist *shrl;
555 int conflict = 0;
556 pid_t pid;
557 int sysid;
558
559 ASSERT(nbl_in_crit(vp));
560
561 if (ct == NULL) {
562 pid = curproc->p_pid;
563 sysid = 0;
564 } else {
565 pid = ct->cc_pid;
566 sysid = ct->cc_sysid;
567 }
568
569 mutex_enter(&vp->v_lock);
570 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
571 if (!(shrl->shr->s_deny & F_MANDDNY))
572 continue;
573 /*
574 * NBL_READ, NBL_WRITE, and NBL_READWRITE need to
575 * check if the share reservation being examined
576 * belongs to the current process.
577 * NBL_REMOVE and NBL_RENAME do not.
578 * This behavior is required by the conflict
579 * algorithm described above.
580 */
581 switch (op) {
582 case NBL_READ:
583 if ((shrl->shr->s_deny & F_RDDNY) &&
584 (shrl->shr->s_sysid != sysid ||
585 shrl->shr->s_pid != pid))
586 conflict = 1;
587 break;
588 case NBL_WRITE:
589 if ((shrl->shr->s_deny & F_WRDNY) &&
590 (shrl->shr->s_sysid != sysid ||
591 shrl->shr->s_pid != pid))
592 conflict = 1;
593 break;
594 case NBL_READWRITE:
595 if ((shrl->shr->s_deny & F_RWDNY) &&
596 (shrl->shr->s_sysid != sysid ||
597 shrl->shr->s_pid != pid))
598 conflict = 1;
599 break;
600 case NBL_REMOVE:
601 if (shrl->shr->s_access & (F_RWACC|F_RMACC))
602 conflict = 1;
603 break;
604 case NBL_RENAME:
605 if (shrl->shr->s_access & (F_WRACC|F_RMACC))
606 conflict = 1;
607
608 else if ((shrl->shr->s_access & F_RDACC) &&
609 (shrl->shr->s_deny & F_RMDNY))
610 conflict = 1;
611 break;
612 #ifdef DEBUG
613 default:
614 cmn_err(CE_PANIC,
615 "nbl_share_conflict: bogus op (%d)",
616 op);
617 break;
618 #endif
619 }
620 if (conflict)
621 break;
622 }
623
624 mutex_exit(&vp->v_lock);
625 return (conflict);
626 }
627
628 /*
629 * Determine if the given process has a NBMAND share reservation on the
630 * given vnode. Returns 1 if the process has such a share reservation,
631 * returns 0 otherwise.
632 */
633 int
proc_has_nbmand_share_on_vp(vnode_t * vp,pid_t pid)634 proc_has_nbmand_share_on_vp(vnode_t *vp, pid_t pid)
635 {
636 struct shrlocklist *shrl;
637
638 /*
639 * Any NBMAND share reservation on the vp for this process?
640 */
641 mutex_enter(&vp->v_lock);
642 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
643 if (shrl->shr->s_sysid == 0 &&
644 (shrl->shr->s_deny & F_MANDDNY) &&
645 (shrl->shr->s_pid == pid)) {
646 mutex_exit(&vp->v_lock);
647 return (1);
648 }
649 }
650 mutex_exit(&vp->v_lock);
651
652 return (0);
653 }
654