xref: /illumos-gate/usr/src/uts/common/fs/smbsrv/smb_lock.c (revision 17a5fa85fe0c34b1146222e40a80b42f2aae8500)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
24  */
25 
26 /*
27  * This module provides range lock functionality for CIFS/SMB clients.
28  * Lock range service functions process SMB lock and and unlock
29  * requests for a file by applying lock rules and marks file range
30  * as locked if the lock is successful otherwise return proper
31  * error code.
32  */
33 
34 #include <smbsrv/smb_kproto.h>
35 #include <smbsrv/smb_fsops.h>
36 #include <sys/nbmlock.h>
37 #include <sys/param.h>
38 
39 extern caller_context_t smb_ct;
40 
41 #ifdef	DEBUG
42 int smb_lock_debug = 0;
43 static void smb_lock_dump1(smb_lock_t *);
44 static void smb_lock_dumplist(smb_llist_t *);
45 static void smb_lock_dumpnode(smb_node_t *);
46 #endif
47 
48 static void smb_lock_posix_unlock(smb_node_t *, smb_lock_t *, cred_t *);
49 static boolean_t smb_is_range_unlocked(uint64_t, uint64_t, uint32_t,
50     smb_llist_t *, uint64_t *);
51 static int smb_lock_range_overlap(smb_lock_t *, uint64_t, uint64_t);
52 static uint32_t smb_lock_range_lckrules(smb_ofile_t *, smb_lock_t *,
53     smb_lock_t **);
54 static uint32_t smb_lock_wait(smb_request_t *, smb_lock_t *, smb_lock_t *);
55 static uint32_t smb_lock_range_ulckrules(smb_ofile_t *,
56     uint64_t, uint64_t, uint32_t, smb_lock_t **);
57 static smb_lock_t *smb_lock_create(smb_request_t *, uint64_t, uint64_t,
58     uint32_t, uint32_t, uint32_t);
59 static void smb_lock_destroy(smb_lock_t *);
60 static void smb_lock_free(smb_lock_t *);
61 
62 /*
63  * Return the number of range locks on the specified ofile.
64  */
65 uint32_t
66 smb_lock_get_lock_count(smb_node_t *node, smb_ofile_t *of)
67 {
68 	smb_lock_t 	*lock;
69 	smb_llist_t	*llist;
70 	uint32_t	count = 0;
71 
72 	SMB_NODE_VALID(node);
73 	SMB_OFILE_VALID(of);
74 
75 	llist = &node->n_lock_list;
76 
77 	smb_llist_enter(llist, RW_READER);
78 	for (lock = smb_llist_head(llist);
79 	    lock != NULL;
80 	    lock = smb_llist_next(llist, lock)) {
81 		if (lock->l_file == of)
82 			++count;
83 	}
84 	smb_llist_exit(llist);
85 
86 	return (count);
87 }
88 
89 /*
90  * smb_unlock_range
91  *
92  * locates lock range performed for corresponding to unlock request.
93  *
94  * NT_STATUS_SUCCESS - Lock range performed successfully.
95  * !NT_STATUS_SUCCESS - Error in unlock range operation.
96  */
97 uint32_t
98 smb_unlock_range(
99     smb_request_t	*sr,
100     uint64_t		start,
101     uint64_t		length,
102     uint32_t		pid)
103 {
104 	smb_ofile_t	*file = sr->fid_ofile;
105 	smb_node_t	*node = file->f_node;
106 	smb_lock_t	*lock = NULL;
107 	uint32_t	status;
108 
109 	if (length > 1 &&
110 	    (start + length) < start)
111 		return (NT_STATUS_INVALID_LOCK_RANGE);
112 
113 #ifdef	DEBUG
114 	if (smb_lock_debug) {
115 		cmn_err(CE_CONT, "smb_unlock_range "
116 		    "off=0x%llx, len=0x%llx, f=%p, pid=%d\n",
117 		    (long long)start, (long long)length,
118 		    (void *)sr->fid_ofile, pid);
119 	}
120 #endif
121 
122 	/* Apply unlocking rules */
123 	smb_llist_enter(&node->n_lock_list, RW_WRITER);
124 	status = smb_lock_range_ulckrules(file, start, length, pid, &lock);
125 	if (status != NT_STATUS_SUCCESS) {
126 		/*
127 		 * If lock range is not matching in the list
128 		 * return error.
129 		 */
130 		ASSERT(lock == NULL);
131 	}
132 	if (lock != NULL) {
133 		smb_llist_remove(&node->n_lock_list, lock);
134 		smb_lock_posix_unlock(node, lock, sr->user_cr);
135 	}
136 
137 #ifdef	DEBUG
138 	if (smb_lock_debug && lock == NULL) {
139 		cmn_err(CE_CONT, "unlock failed, 0x%x\n", status);
140 		smb_lock_dumpnode(node);
141 	}
142 #endif
143 
144 	smb_llist_exit(&node->n_lock_list);
145 
146 	if (lock != NULL)
147 		smb_lock_destroy(lock);
148 
149 	return (status);
150 }
151 
152 /*
153  * smb_lock_range
154  *
155  * Checks for integrity of file lock operation for the given range of file data.
156  * This is performed by applying lock rules with all the elements of the node
157  * lock list.
158  *
159  * Break shared (levelII) oplocks. If there is an exclusive oplock, it is
160  * owned by this ofile and therefore should not be broken.
161  *
162  * The function returns with new lock added if lock request is non-conflicting
163  * with existing range lock for the file. Otherwise smb request is filed
164  * without returning.
165  *
166  * NT_STATUS_SUCCESS - Lock range performed successfully.
167  * !NT_STATUS_SUCCESS - Error in lock range operation.
168  */
169 uint32_t
170 smb_lock_range(
171     smb_request_t	*sr,
172     uint64_t		start,
173     uint64_t		length,
174     uint32_t		pid,
175     uint32_t		locktype,
176     uint32_t		timeout)
177 {
178 	smb_ofile_t	*file = sr->fid_ofile;
179 	smb_node_t	*node = file->f_node;
180 	smb_lock_t	*lock;
181 	smb_lock_t	*conflict = NULL;
182 	uint32_t	result;
183 	int		rc;
184 	boolean_t	lock_has_timeout =
185 	    (timeout != 0 && timeout != UINT_MAX);
186 
187 	if (length > 1 &&
188 	    (start + length) < start)
189 		return (NT_STATUS_INVALID_LOCK_RANGE);
190 
191 #ifdef	DEBUG
192 	if (smb_lock_debug) {
193 		cmn_err(CE_CONT, "smb_lock_range "
194 		    "off=0x%llx, len=0x%llx, "
195 		    "f=%p, pid=%d, typ=%d, tmo=%d\n",
196 		    (long long)start, (long long)length,
197 		    (void *)sr->fid_ofile, pid, locktype, timeout);
198 	}
199 #endif
200 
201 	lock = smb_lock_create(sr, start, length, pid, locktype, timeout);
202 
203 	smb_llist_enter(&node->n_lock_list, RW_WRITER);
204 	for (;;) {
205 
206 		/* Apply locking rules */
207 		result = smb_lock_range_lckrules(file, lock, &conflict);
208 		switch (result) {
209 		case NT_STATUS_LOCK_NOT_GRANTED: /* conflict! */
210 			/* may need to wait */
211 			break;
212 		case NT_STATUS_SUCCESS:
213 		case NT_STATUS_FILE_CLOSED:
214 			goto break_loop;
215 		default:
216 			cmn_err(CE_CONT, "smb_lock_range1, status 0x%x\n",
217 			    result);
218 			goto break_loop;
219 		}
220 		if (timeout == 0)
221 			goto break_loop;
222 
223 		/*
224 		 * Call smb_lock_wait holding write lock for
225 		 * node lock list.  smb_lock_wait will release
226 		 * the node list lock if it blocks, so after
227 		 * the call, (*conflict) may no longer exist.
228 		 */
229 		result = smb_lock_wait(sr, lock, conflict);
230 		conflict = NULL;
231 		switch (result) {
232 		case NT_STATUS_SUCCESS:
233 			/* conflict gone, try again */
234 			break;
235 		case NT_STATUS_TIMEOUT:
236 			/* try just once more */
237 			timeout = 0;
238 			break;
239 		case NT_STATUS_CANCELLED:
240 		case NT_STATUS_FILE_CLOSED:
241 			goto break_loop;
242 		default:
243 			cmn_err(CE_CONT, "smb_lock_range2, status 0x%x\n",
244 			    result);
245 			goto break_loop;
246 		}
247 	}
248 
249 break_loop:
250 	lock->l_blocked_by = NULL;
251 
252 	if (result != NT_STATUS_SUCCESS) {
253 		if (result == NT_STATUS_FILE_CLOSED)
254 			result = NT_STATUS_RANGE_NOT_LOCKED;
255 
256 		/*
257 		 * Under certain conditions NT_STATUS_FILE_LOCK_CONFLICT
258 		 * should be returned instead of NT_STATUS_LOCK_NOT_GRANTED.
259 		 * All of this appears to be specific to SMB1
260 		 */
261 		if (sr->session->dialect <= NT_LM_0_12 &&
262 		    result == NT_STATUS_LOCK_NOT_GRANTED) {
263 			/*
264 			 * Locks with timeouts always return
265 			 * NT_STATUS_FILE_LOCK_CONFLICT
266 			 */
267 			if (lock_has_timeout)
268 				result = NT_STATUS_FILE_LOCK_CONFLICT;
269 
270 			/*
271 			 * Locks starting higher than 0xef000000 that do not
272 			 * have the MSB set always return
273 			 * NT_STATUS_FILE_LOCK_CONFLICT
274 			 */
275 			if ((lock->l_start >= 0xef000000) &&
276 			    !(lock->l_start & (1ULL << 63))) {
277 				result = NT_STATUS_FILE_LOCK_CONFLICT;
278 			}
279 
280 			/*
281 			 * If the last lock attempt to fail on this file handle
282 			 * started at the same offset as this one then return
283 			 * NT_STATUS_FILE_LOCK_CONFLICT
284 			 */
285 			mutex_enter(&file->f_mutex);
286 			if ((file->f_flags & SMB_OFLAGS_LLF_POS_VALID) &&
287 			    (lock->l_start == file->f_llf_pos)) {
288 				result = NT_STATUS_FILE_LOCK_CONFLICT;
289 			}
290 			mutex_exit(&file->f_mutex);
291 		}
292 
293 		/* Update last lock failed offset */
294 		mutex_enter(&file->f_mutex);
295 		file->f_llf_pos = lock->l_start;
296 		file->f_flags |= SMB_OFLAGS_LLF_POS_VALID;
297 		mutex_exit(&file->f_mutex);
298 
299 		smb_lock_free(lock);
300 	} else {
301 		/*
302 		 * don't insert into the CIFS lock list unless the
303 		 * posix lock worked
304 		 */
305 		rc = smb_fsop_frlock(node, lock, B_FALSE, sr->user_cr);
306 		if (rc != 0) {
307 #ifdef	DEBUG
308 			if (smb_lock_debug)
309 				cmn_err(CE_CONT, "fop_frlock, err=%d\n", rc);
310 #endif
311 			result = NT_STATUS_FILE_LOCK_CONFLICT;
312 		} else {
313 			/*
314 			 * We want unlock to find exclusive locks before
315 			 * shared locks, so insert those at the head.
316 			 */
317 			if (lock->l_type == SMB_LOCK_TYPE_READWRITE)
318 				smb_llist_insert_head(&node->n_lock_list, lock);
319 			else
320 				smb_llist_insert_tail(&node->n_lock_list, lock);
321 		}
322 	}
323 
324 #ifdef	DEBUG
325 	if (smb_lock_debug && result != 0) {
326 		cmn_err(CE_CONT, "lock failed, 0x%x\n", result);
327 		smb_lock_dumpnode(node);
328 	}
329 #endif
330 
331 	smb_llist_exit(&node->n_lock_list);
332 
333 	if (result == NT_STATUS_SUCCESS)
334 		smb_oplock_break_levelII(node);
335 
336 	return (result);
337 }
338 
339 /*
340  * smb_lock_range_access
341  *
342  * scans node lock list
343  * to check if there is any overlapping lock. Overlapping
344  * lock is allowed only under same session and client pid.
345  *
346  * Return values
347  *	NT_STATUS_SUCCESS		lock access granted.
348  *	NT_STATUS_FILE_LOCK_CONFLICT 	access denied due to lock conflict.
349  */
350 int
351 smb_lock_range_access(
352     smb_request_t	*sr,
353     smb_node_t		*node,
354     uint64_t		start,
355     uint64_t		length,
356     boolean_t		will_write)
357 {
358 	smb_lock_t	*lock;
359 	smb_llist_t	*llist;
360 	uint32_t	lk_pid = 0;
361 	int		status = NT_STATUS_SUCCESS;
362 
363 	if (length == 0)
364 		return (status);
365 
366 	/*
367 	 * What PID to use for lock conflict checks?
368 	 * SMB2 locking ignores PIDs (have lk_pid=0)
369 	 * SMB1 uses low 16 bits of sr->smb_pid
370 	 */
371 	if (sr->session->dialect < SMB_VERS_2_BASE)
372 		lk_pid = sr->smb_pid & 0xFFFF;
373 
374 	llist = &node->n_lock_list;
375 	smb_llist_enter(llist, RW_READER);
376 	/* Search for any applicable lock */
377 	for (lock = smb_llist_head(llist);
378 	    lock != NULL;
379 	    lock = smb_llist_next(llist, lock)) {
380 
381 		if (!smb_lock_range_overlap(lock, start, length))
382 			/* Lock does not overlap */
383 			continue;
384 
385 		if (lock->l_type == SMB_LOCK_TYPE_READONLY && !will_write)
386 			continue;
387 
388 		if (lock->l_type == SMB_LOCK_TYPE_READWRITE &&
389 		    lock->l_file == sr->fid_ofile &&
390 		    lock->l_pid == lk_pid)
391 			continue;
392 
393 #ifdef	DEBUG
394 		if (smb_lock_debug) {
395 			cmn_err(CE_CONT, "smb_lock_range_access conflict: "
396 			    "off=0x%llx, len=0x%llx, "
397 			    "f=%p, pid=%d, typ=%d\n",
398 			    (long long)lock->l_start,
399 			    (long long)lock->l_length,
400 			    (void *)lock->l_file,
401 			    lock->l_pid, lock->l_type);
402 		}
403 #endif
404 		status = NT_STATUS_FILE_LOCK_CONFLICT;
405 		break;
406 	}
407 	smb_llist_exit(llist);
408 	return (status);
409 }
410 
411 /*
412  * The ofile is being closed.  Wake any waiting locks and
413  * clear any granted locks.
414  */
415 void
416 smb_node_destroy_lock_by_ofile(smb_node_t *node, smb_ofile_t *file)
417 {
418 	smb_lock_t	*lock;
419 	smb_lock_t	*nxtl;
420 	list_t		destroy_list;
421 
422 	SMB_NODE_VALID(node);
423 	ASSERT(node->n_refcnt);
424 
425 	/*
426 	 * Cancel any waiting locks for this ofile
427 	 */
428 	smb_llist_enter(&node->n_wlock_list, RW_READER);
429 	for (lock = smb_llist_head(&node->n_wlock_list);
430 	    lock != NULL;
431 	    lock = smb_llist_next(&node->n_wlock_list, lock)) {
432 
433 		if (lock->l_file == file) {
434 			mutex_enter(&lock->l_mutex);
435 			lock->l_blocked_by = NULL;
436 			lock->l_flags |= SMB_LOCK_FLAG_CLOSED;
437 			cv_broadcast(&lock->l_cv);
438 			mutex_exit(&lock->l_mutex);
439 		}
440 	}
441 	smb_llist_exit(&node->n_wlock_list);
442 
443 	/*
444 	 * Move locks matching the specified file from the node->n_lock_list
445 	 * to a temporary list (holding the lock the entire time) then
446 	 * destroy all the matching locks.  We can't call smb_lock_destroy
447 	 * while we are holding the lock for node->n_lock_list because we will
448 	 * deadlock and we can't drop the lock because the list contents might
449 	 * change (for example nxtl might get removed on another thread).
450 	 */
451 	list_create(&destroy_list, sizeof (smb_lock_t),
452 	    offsetof(smb_lock_t, l_lnd));
453 
454 	smb_llist_enter(&node->n_lock_list, RW_WRITER);
455 	lock = smb_llist_head(&node->n_lock_list);
456 	while (lock) {
457 		nxtl = smb_llist_next(&node->n_lock_list, lock);
458 		if (lock->l_file == file) {
459 			smb_llist_remove(&node->n_lock_list, lock);
460 			smb_lock_posix_unlock(node, lock, file->f_user->u_cred);
461 			list_insert_tail(&destroy_list, lock);
462 		}
463 		lock = nxtl;
464 	}
465 	smb_llist_exit(&node->n_lock_list);
466 
467 	lock = list_head(&destroy_list);
468 	while (lock) {
469 		nxtl = list_next(&destroy_list, lock);
470 		list_remove(&destroy_list, lock);
471 		smb_lock_destroy(lock);
472 		lock = nxtl;
473 	}
474 
475 	list_destroy(&destroy_list);
476 }
477 
478 /*
479  * Cause a waiting lock to stop waiting and return an error.
480  * returns same status codes as unlock:
481  * NT_STATUS_SUCCESS, NT_STATUS_RANGE_NOT_LOCKED
482  */
483 uint32_t
484 smb_lock_range_cancel(smb_request_t *sr,
485     uint64_t start, uint64_t length, uint32_t pid)
486 {
487 	smb_node_t *node;
488 	smb_lock_t *lock;
489 	uint32_t status = NT_STATUS_RANGE_NOT_LOCKED;
490 	int cnt = 0;
491 
492 	node = sr->fid_ofile->f_node;
493 
494 	smb_llist_enter(&node->n_wlock_list, RW_READER);
495 
496 #ifdef	DEBUG
497 	if (smb_lock_debug) {
498 		cmn_err(CE_CONT, "smb_lock_range_cancel:\n"
499 		    "\tstart=0x%llx, len=0x%llx, of=%p, pid=%d\n",
500 		    (long long)start, (long long)length,
501 		    (void *)sr->fid_ofile, pid);
502 	}
503 #endif
504 
505 	for (lock = smb_llist_head(&node->n_wlock_list);
506 	    lock != NULL;
507 	    lock = smb_llist_next(&node->n_wlock_list, lock)) {
508 
509 		if ((start == lock->l_start) &&
510 		    (length == lock->l_length) &&
511 		    lock->l_file == sr->fid_ofile &&
512 		    lock->l_pid == pid) {
513 
514 			mutex_enter(&lock->l_mutex);
515 			lock->l_blocked_by = NULL;
516 			lock->l_flags |= SMB_LOCK_FLAG_CANCELLED;
517 			cv_broadcast(&lock->l_cv);
518 			mutex_exit(&lock->l_mutex);
519 			status = NT_STATUS_SUCCESS;
520 			cnt++;
521 		}
522 	}
523 
524 #ifdef	DEBUG
525 	if (smb_lock_debug && cnt != 1) {
526 		cmn_err(CE_CONT, "cancel found %d\n", cnt);
527 		smb_lock_dumpnode(node);
528 	}
529 #endif
530 
531 	smb_llist_exit(&node->n_wlock_list);
532 
533 	return (status);
534 }
535 
536 void
537 smb_lock_range_error(smb_request_t *sr, uint32_t status32)
538 {
539 	uint16_t errcode;
540 
541 	if (status32 == NT_STATUS_CANCELLED) {
542 		status32 = NT_STATUS_FILE_LOCK_CONFLICT;
543 		errcode = ERROR_LOCK_VIOLATION;
544 	} else {
545 		errcode = ERRlock;
546 	}
547 
548 	smbsr_error(sr, status32, ERRDOS, errcode);
549 }
550 
551 /*
552  * An SMB variant of nbl_conflict().
553  *
554  * SMB prevents remove or rename when conflicting locks exist
555  * (unlike NFS, which is why we can't just use nbl_conflict).
556  *
557  * Returns:
558  *	NT_STATUS_SHARING_VIOLATION - nbl_share_conflict
559  *	NT_STATUS_FILE_LOCK_CONFLICT - nbl_lock_conflict
560  *	NT_STATUS_SUCCESS - operation can proceed
561  *
562  * NB: This function used to also check the list of ofiles,
563  * via: smb_lock_range_access() but we _can't_ do that here
564  * due to lock order constraints between node->n_lock_list
565  * and node->vp->vnbllock (taken via nvl_start_crit).
566  * They must be taken in that order, and in here, we
567  * already hold vp->vnbllock.
568  */
569 DWORD
570 smb_nbl_conflict(smb_node_t *node, uint64_t off, uint64_t len, nbl_op_t op)
571 {
572 	int svmand;
573 
574 	SMB_NODE_VALID(node);
575 	ASSERT(smb_node_in_crit(node));
576 	ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE ||
577 	    op == NBL_REMOVE || op == NBL_RENAME);
578 
579 	if (smb_node_is_dir(node))
580 		return (NT_STATUS_SUCCESS);
581 
582 	if (nbl_share_conflict(node->vp, op, &smb_ct))
583 		return (NT_STATUS_SHARING_VIOLATION);
584 
585 	/*
586 	 * When checking for lock conflicts, rename and remove
587 	 * are not allowed, so treat those as read/write.
588 	 */
589 	if (op == NBL_RENAME || op == NBL_REMOVE)
590 		op = NBL_READWRITE;
591 
592 	if (nbl_svmand(node->vp, zone_kcred(), &svmand))
593 		svmand = 1;
594 
595 	if (nbl_lock_conflict(node->vp, op, off, len, svmand, &smb_ct))
596 		return (NT_STATUS_FILE_LOCK_CONFLICT);
597 
598 	return (NT_STATUS_SUCCESS);
599 }
600 
601 /*
602  * smb_lock_posix_unlock
603  *
604  * checks if the current unlock request is in another lock and repeatedly calls
605  * smb_is_range_unlocked on a sliding basis to unlock all bits of the lock
606  * that are not in other locks
607  *
608  */
609 static void
610 smb_lock_posix_unlock(smb_node_t *node, smb_lock_t *lock, cred_t *cr)
611 {
612 	uint64_t	new_mark;
613 	uint64_t	unlock_start;
614 	uint64_t	unlock_end;
615 	smb_lock_t	new_unlock;
616 	smb_llist_t	*llist;
617 	boolean_t	can_unlock;
618 
619 	new_mark = 0;
620 	unlock_start = lock->l_start;
621 	unlock_end = unlock_start + lock->l_length;
622 	llist = &node->n_lock_list;
623 
624 	for (;;) {
625 		can_unlock = smb_is_range_unlocked(unlock_start, unlock_end,
626 		    lock->l_file->f_uniqid, llist, &new_mark);
627 		if (can_unlock) {
628 			if (new_mark) {
629 				new_unlock = *lock;
630 				new_unlock.l_start = unlock_start;
631 				new_unlock.l_length = new_mark - unlock_start;
632 				(void) smb_fsop_frlock(node, &new_unlock,
633 				    B_TRUE, cr);
634 				unlock_start = new_mark;
635 			} else {
636 				new_unlock = *lock;
637 				new_unlock.l_start = unlock_start;
638 				new_unlock.l_length = unlock_end - unlock_start;
639 				(void) smb_fsop_frlock(node, &new_unlock,
640 				    B_TRUE, cr);
641 				break;
642 			}
643 		} else if (new_mark) {
644 			unlock_start = new_mark;
645 		} else {
646 			break;
647 		}
648 	}
649 }
650 
651 /*
652  * smb_lock_range_overlap
653  *
654  * Checks if lock range(start, length) overlaps range in lock structure.
655  *
656  * Zero-length byte range locks actually affect no single byte of the stream,
657  * meaning they can still be accessed even with such locks in place. However,
658  * they do conflict with other ranges in the following manner:
659  *  conflict will only exist if the positive-length range contains the
660  *  zero-length range's offset but doesn't start at it
661  *
662  * return values:
663  *	0 - Lock range doesn't overlap
664  *	1 - Lock range overlaps.
665  */
666 
667 #define	RANGE_NO_OVERLAP	0
668 #define	RANGE_OVERLAP		1
669 
670 static int
671 smb_lock_range_overlap(struct smb_lock *lock, uint64_t start, uint64_t length)
672 {
673 	if (length == 0) {
674 		if ((lock->l_start < start) &&
675 		    ((lock->l_start + lock->l_length) > start))
676 			return (RANGE_OVERLAP);
677 
678 		return (RANGE_NO_OVERLAP);
679 	}
680 
681 	/* The following test is intended to catch roll over locks. */
682 	if ((start == lock->l_start) && (length == lock->l_length))
683 		return (RANGE_OVERLAP);
684 
685 	if (start < lock->l_start) {
686 		if (start + length > lock->l_start)
687 			return (RANGE_OVERLAP);
688 	} else if (start < lock->l_start + lock->l_length)
689 		return (RANGE_OVERLAP);
690 
691 	return (RANGE_NO_OVERLAP);
692 }
693 
694 /*
695  * smb_lock_range_lckrules
696  *
697  * Lock range rules:
698  *	1. Overlapping read locks are allowed if the
699  *	   current locks in the region are only read locks
700  *	   irrespective of pid of smb client issuing lock request.
701  *
702  *	2. Read lock in the overlapped region of write lock
703  *	   are allowed if the previous lock is performed by the
704  *	   same pid and connection.
705  *
706  * return status:
707  *	NT_STATUS_SUCCESS - Input lock range conforms to lock rules.
708  *	NT_STATUS_LOCK_NOT_GRANTED - Input lock conflicts lock rules.
709  *	NT_STATUS_FILE_CLOSED
710  */
711 static uint32_t
712 smb_lock_range_lckrules(
713     smb_ofile_t		*file,
714     smb_lock_t		*dlock,		/* desired lock */
715     smb_lock_t		**conflictp)
716 {
717 	smb_node_t	*node = file->f_node;
718 	smb_lock_t	*lock;
719 	uint32_t	status = NT_STATUS_SUCCESS;
720 
721 	/* Check if file is closed */
722 	if (!smb_ofile_is_open(file)) {
723 		return (NT_STATUS_FILE_CLOSED);
724 	}
725 
726 	/* Caller must hold lock for node->n_lock_list */
727 	for (lock = smb_llist_head(&node->n_lock_list);
728 	    lock != NULL;
729 	    lock = smb_llist_next(&node->n_lock_list, lock)) {
730 
731 		if (!smb_lock_range_overlap(lock, dlock->l_start,
732 		    dlock->l_length))
733 			continue;
734 
735 		/*
736 		 * Check to see if lock in the overlapping record
737 		 * is only read lock. Current finding is read
738 		 * locks can overlapped irrespective of pids.
739 		 */
740 		if ((lock->l_type == SMB_LOCK_TYPE_READONLY) &&
741 		    (dlock->l_type == SMB_LOCK_TYPE_READONLY)) {
742 			continue;
743 		}
744 
745 		/*
746 		 * When the read lock overlaps write lock, check if
747 		 * allowed.
748 		 */
749 		if ((dlock->l_type == SMB_LOCK_TYPE_READONLY) &&
750 		    !(lock->l_type == SMB_LOCK_TYPE_READONLY)) {
751 			if (lock->l_file == dlock->l_file &&
752 			    lock->l_pid == dlock->l_pid) {
753 				continue;
754 			}
755 		}
756 
757 		/* Conflict in overlapping lock element */
758 		*conflictp = lock;
759 		status = NT_STATUS_LOCK_NOT_GRANTED;
760 		break;
761 	}
762 
763 	return (status);
764 }
765 
766 /*
767  * Cancel method for smb_lock_wait()
768  *
769  * This request is waiting on a lock.  Wakeup everything
770  * waiting on the lock so that the relevant thread regains
771  * control and notices that is has been cancelled.  The
772  * other lock request threads waiting on this lock will go
773  * back to sleep when they discover they are still blocked.
774  */
775 static void
776 smb_lock_cancel_sr(smb_request_t *sr)
777 {
778 	smb_lock_t *lock = sr->cancel_arg2;
779 
780 	ASSERT(lock->l_magic == SMB_LOCK_MAGIC);
781 	mutex_enter(&lock->l_mutex);
782 	lock->l_blocked_by = NULL;
783 	lock->l_flags |= SMB_LOCK_FLAG_CANCELLED;
784 	cv_broadcast(&lock->l_cv);
785 	mutex_exit(&lock->l_mutex);
786 }
787 
788 /*
789  * smb_lock_wait
790  *
791  * Wait operation for smb overlapping lock to be released.  Caller must hold
792  * write lock for node->n_lock_list so that the set of active locks can't
793  * change unexpectedly.  The lock for node->n_lock_list  will be released
794  * within this function during the sleep after the lock dependency has
795  * been recorded.
796  *
797  * Returns NT_STATUS_SUCCESS when the lock can be granted,
798  * otherwise NT_STATUS_CANCELLED, etc.
799  */
800 static uint32_t
801 smb_lock_wait(smb_request_t *sr, smb_lock_t *lock, smb_lock_t *conflict)
802 {
803 	smb_node_t	*node;
804 	clock_t		rc;
805 	uint32_t	status = NT_STATUS_SUCCESS;
806 
807 	node = lock->l_file->f_node;
808 	ASSERT(node == conflict->l_file->f_node);
809 
810 	/*
811 	 * Let the blocked lock (lock) l_blocked_by point to the
812 	 * conflicting lock (conflict), and increment a count of
813 	 * conflicts with the latter.  When the conflicting lock
814 	 * is destroyed, we'll search the list of waiting locks
815 	 * (on the node) and wake any with l_blocked_by ==
816 	 * the formerly conflicting lock.
817 	 */
818 	mutex_enter(&lock->l_mutex);
819 	lock->l_blocked_by = conflict;
820 	mutex_exit(&lock->l_mutex);
821 
822 	mutex_enter(&conflict->l_mutex);
823 	conflict->l_conflicts++;
824 	mutex_exit(&conflict->l_mutex);
825 
826 	/*
827 	 * Put the blocked lock on the waiting list.
828 	 */
829 	smb_llist_enter(&node->n_wlock_list, RW_WRITER);
830 	smb_llist_insert_tail(&node->n_wlock_list, lock);
831 	smb_llist_exit(&node->n_wlock_list);
832 
833 #ifdef	DEBUG
834 	if (smb_lock_debug) {
835 		cmn_err(CE_CONT, "smb_lock_wait: lock=%p conflict=%p\n",
836 		    (void *)lock, (void *)conflict);
837 		smb_lock_dumpnode(node);
838 	}
839 #endif
840 
841 	/*
842 	 * We come in with n_lock_list already held, and keep
843 	 * that hold until we're done with conflict (are now).
844 	 * Drop that now, and retake later.  Note that the lock
845 	 * (*conflict) may go away once we exit this list.
846 	 */
847 	smb_llist_exit(&node->n_lock_list);
848 	conflict = NULL;
849 
850 	/*
851 	 * Before we actually start waiting, setup the hooks
852 	 * smb_request_cancel uses to unblock this wait.
853 	 */
854 	mutex_enter(&sr->sr_mutex);
855 	if (sr->sr_state == SMB_REQ_STATE_ACTIVE) {
856 		sr->sr_state = SMB_REQ_STATE_WAITING_LOCK;
857 		sr->cancel_method = smb_lock_cancel_sr;
858 		sr->cancel_arg2 = lock;
859 	} else {
860 		status = NT_STATUS_CANCELLED;
861 	}
862 	mutex_exit(&sr->sr_mutex);
863 
864 	/*
865 	 * Now we're ready to actually wait for the conflicting
866 	 * lock to be removed, or for the wait to be ended by
867 	 * an external cancel, or a timeout.
868 	 */
869 	mutex_enter(&lock->l_mutex);
870 	while (status == NT_STATUS_SUCCESS &&
871 	    lock->l_blocked_by != NULL) {
872 		if (lock->l_flags & SMB_LOCK_FLAG_INDEFINITE) {
873 			cv_wait(&lock->l_cv, &lock->l_mutex);
874 		} else {
875 			rc = cv_timedwait(&lock->l_cv,
876 			    &lock->l_mutex, lock->l_end_time);
877 			if (rc < 0)
878 				status = NT_STATUS_TIMEOUT;
879 		}
880 	}
881 	if (status == NT_STATUS_SUCCESS) {
882 		if (lock->l_flags & SMB_LOCK_FLAG_CANCELLED)
883 			status = NT_STATUS_CANCELLED;
884 		if (lock->l_flags & SMB_LOCK_FLAG_CLOSED)
885 			status = NT_STATUS_FILE_CLOSED;
886 	}
887 	mutex_exit(&lock->l_mutex);
888 
889 	/*
890 	 * Done waiting.  Cleanup cancel hooks and
891 	 * finish SR state transitions.
892 	 */
893 	mutex_enter(&sr->sr_mutex);
894 	sr->cancel_method = NULL;
895 	sr->cancel_arg2 = NULL;
896 
897 	switch (sr->sr_state) {
898 	case SMB_REQ_STATE_WAITING_LOCK:
899 		/* Normal wakeup.  Keep status from above. */
900 		sr->sr_state = SMB_REQ_STATE_ACTIVE;
901 		break;
902 
903 	case SMB_REQ_STATE_CANCEL_PENDING:
904 		/* Cancelled via smb_lock_cancel_sr */
905 		sr->sr_state = SMB_REQ_STATE_CANCELLED;
906 		/* FALLTHROUGH */
907 	case SMB_REQ_STATE_CANCELLED:
908 		if (status == NT_STATUS_SUCCESS)
909 			status = NT_STATUS_CANCELLED;
910 		break;
911 
912 	default:
913 		break;
914 	}
915 	mutex_exit(&sr->sr_mutex);
916 
917 	/* Return to the caller with n_lock_list held. */
918 	smb_llist_enter(&node->n_lock_list, RW_WRITER);
919 
920 	smb_llist_enter(&node->n_wlock_list, RW_WRITER);
921 	smb_llist_remove(&node->n_wlock_list, lock);
922 	smb_llist_exit(&node->n_wlock_list);
923 
924 	return (status);
925 }
926 
927 /*
928  * smb_lock_range_ulckrules
929  *
930  *	1. Unlock should be performed at exactly matching ends.
931  *	   This has been changed because overlapping ends is
932  *	   allowed and there is no other precise way of locating
933  *	   lock entity in node lock list.
934  *
935  *	2. Unlock is failed if there is no corresponding lock exists.
936  *
937  * Return values
938  *
939  *	NT_STATUS_SUCCESS		Unlock request matches lock record
940  *					pointed by 'foundlock' lock structure.
941  *
942  *	NT_STATUS_RANGE_NOT_LOCKED	Unlock request doen't match any
943  *					of lock record in node lock request or
944  *					error in unlock range processing.
945  */
946 static uint32_t
947 smb_lock_range_ulckrules(
948     smb_ofile_t		*file,
949     uint64_t		start,
950     uint64_t		length,
951     uint32_t		pid,
952     smb_lock_t		**foundlock)
953 {
954 	smb_node_t	*node = file->f_node;
955 	smb_lock_t	*lock;
956 	uint32_t	status = NT_STATUS_RANGE_NOT_LOCKED;
957 
958 	/* Caller must hold lock for node->n_lock_list */
959 	for (lock = smb_llist_head(&node->n_lock_list);
960 	    lock != NULL;
961 	    lock = smb_llist_next(&node->n_lock_list, lock)) {
962 
963 		if ((start == lock->l_start) &&
964 		    (length == lock->l_length) &&
965 		    lock->l_file == file &&
966 		    lock->l_pid == pid) {
967 			*foundlock = lock;
968 			status = NT_STATUS_SUCCESS;
969 			break;
970 		}
971 	}
972 
973 	return (status);
974 }
975 
976 static smb_lock_t *
977 smb_lock_create(
978     smb_request_t *sr,
979     uint64_t start,
980     uint64_t length,
981     uint32_t pid,
982     uint32_t locktype,
983     uint32_t timeout)
984 {
985 	smb_lock_t *lock;
986 
987 	ASSERT(locktype == SMB_LOCK_TYPE_READWRITE ||
988 	    locktype == SMB_LOCK_TYPE_READONLY);
989 
990 	lock = kmem_cache_alloc(smb_cache_lock, KM_SLEEP);
991 	bzero(lock, sizeof (*lock));
992 	lock->l_magic = SMB_LOCK_MAGIC;
993 	lock->l_file = sr->fid_ofile;
994 	/* l_file == fid_ofile implies same connection (see ofile lookup) */
995 	lock->l_pid = pid;
996 	lock->l_type = locktype;
997 	lock->l_start = start;
998 	lock->l_length = length;
999 	/*
1000 	 * Calculate the absolute end time so that we can use it
1001 	 * in cv_timedwait.
1002 	 */
1003 	lock->l_end_time = ddi_get_lbolt() + MSEC_TO_TICK(timeout);
1004 	if (timeout == UINT_MAX)
1005 		lock->l_flags |= SMB_LOCK_FLAG_INDEFINITE;
1006 
1007 	mutex_init(&lock->l_mutex, NULL, MUTEX_DEFAULT, NULL);
1008 	cv_init(&lock->l_cv, NULL, CV_DEFAULT, NULL);
1009 
1010 	return (lock);
1011 }
1012 
1013 static void
1014 smb_lock_free(smb_lock_t *lock)
1015 {
1016 
1017 	lock->l_magic = 0;
1018 	cv_destroy(&lock->l_cv);
1019 	mutex_destroy(&lock->l_mutex);
1020 
1021 	kmem_cache_free(smb_cache_lock, lock);
1022 }
1023 
1024 /*
1025  * smb_lock_destroy
1026  *
1027  * Caller must hold node->n_lock_list
1028  */
1029 static void
1030 smb_lock_destroy(smb_lock_t *lock)
1031 {
1032 	smb_lock_t *tl;
1033 	smb_node_t *node;
1034 	uint32_t ccnt;
1035 
1036 	/*
1037 	 * Wake any waiting locks that were blocked by this.
1038 	 * We want them to wake and continue in FIFO order,
1039 	 * so enter/exit the llist every time...
1040 	 */
1041 	mutex_enter(&lock->l_mutex);
1042 	ccnt = lock->l_conflicts;
1043 	lock->l_conflicts = 0;
1044 	mutex_exit(&lock->l_mutex);
1045 
1046 	node = lock->l_file->f_node;
1047 	while (ccnt) {
1048 
1049 		smb_llist_enter(&node->n_wlock_list, RW_READER);
1050 
1051 		for (tl = smb_llist_head(&node->n_wlock_list);
1052 		    tl != NULL;
1053 		    tl = smb_llist_next(&node->n_wlock_list, tl)) {
1054 			mutex_enter(&tl->l_mutex);
1055 			if (tl->l_blocked_by == lock) {
1056 				tl->l_blocked_by = NULL;
1057 				cv_broadcast(&tl->l_cv);
1058 				mutex_exit(&tl->l_mutex);
1059 				goto woke_one;
1060 			}
1061 			mutex_exit(&tl->l_mutex);
1062 		}
1063 		/* No more in the list blocked by this lock. */
1064 		ccnt = 0;
1065 	woke_one:
1066 		smb_llist_exit(&node->n_wlock_list);
1067 		if (ccnt) {
1068 			/*
1069 			 * Let the thread we woke have a chance to run
1070 			 * before we wake competitors for their lock.
1071 			 */
1072 			delay(MSEC_TO_TICK(1));
1073 		}
1074 	}
1075 
1076 	smb_lock_free(lock);
1077 }
1078 
1079 /*
1080  * smb_is_range_unlocked
1081  *
1082  * Checks if the current unlock byte range request overlaps another lock
1083  * This function is used to determine where POSIX unlocks should be
1084  * applied.
1085  *
1086  * The return code and the value of new_mark must be interpreted as
1087  * follows:
1088  *
1089  * B_TRUE and (new_mark == 0):
1090  *   This is the last or only lock left to be unlocked
1091  *
1092  * B_TRUE and (new_mark > 0):
1093  *   The range from start to new_mark can be unlocked
1094  *
1095  * B_FALSE and (new_mark == 0):
1096  *   The unlock can't be performed and we are done
1097  *
1098  * B_FALSE and (new_mark > 0),
1099  *   The range from start to new_mark can't be unlocked
1100  *   Start should be reset to new_mark for the next pass
1101  */
1102 
1103 static boolean_t
1104 smb_is_range_unlocked(uint64_t start, uint64_t end, uint32_t uniqid,
1105     smb_llist_t *llist_head, uint64_t *new_mark)
1106 {
1107 	struct smb_lock *lk = NULL;
1108 	uint64_t low_water_mark = MAXOFFSET_T;
1109 	uint64_t lk_start;
1110 	uint64_t lk_end;
1111 
1112 	*new_mark = 0;
1113 	lk = smb_llist_head(llist_head);
1114 	while (lk) {
1115 		if (lk->l_length == 0) {
1116 			lk = smb_llist_next(llist_head, lk);
1117 			continue;
1118 		}
1119 
1120 		if (lk->l_file->f_uniqid != uniqid) {
1121 			lk = smb_llist_next(llist_head, lk);
1122 			continue;
1123 		}
1124 
1125 		lk_end = lk->l_start + lk->l_length - 1;
1126 		lk_start = lk->l_start;
1127 
1128 		/*
1129 		 * there is no overlap for the first 2 cases
1130 		 * check next node
1131 		 */
1132 		if (lk_end < start) {
1133 			lk = smb_llist_next(llist_head, lk);
1134 			continue;
1135 		}
1136 		if (lk_start > end) {
1137 			lk = smb_llist_next(llist_head, lk);
1138 			continue;
1139 		}
1140 
1141 		/* this range is completely locked */
1142 		if ((lk_start <= start) && (lk_end >= end)) {
1143 			return (B_FALSE);
1144 		}
1145 
1146 		/* the first part of this range is locked */
1147 		if ((start >= lk_start) && (start <= lk_end)) {
1148 			if (end > lk_end)
1149 				*new_mark = lk_end + 1;
1150 			return (B_FALSE);
1151 		}
1152 
1153 		/* this piece is unlocked */
1154 		if ((lk_start >= start) && (lk_start <= end)) {
1155 			if (low_water_mark > lk_start)
1156 				low_water_mark  = lk_start;
1157 		}
1158 
1159 		lk = smb_llist_next(llist_head, lk);
1160 	}
1161 
1162 	if (low_water_mark != MAXOFFSET_T) {
1163 		*new_mark = low_water_mark;
1164 		return (B_TRUE);
1165 	}
1166 	/* the range is completely unlocked */
1167 	return (B_TRUE);
1168 }
1169 
1170 #ifdef	DEBUG
1171 static void
1172 smb_lock_dump1(smb_lock_t *lock)
1173 {
1174 	cmn_err(CE_CONT, "\t0x%p: 0x%llx, 0x%llx, %p, %d\n",
1175 	    (void *)lock,
1176 	    (long long)lock->l_start,
1177 	    (long long)lock->l_length,
1178 	    (void *)lock->l_file,
1179 	    lock->l_pid);
1180 
1181 }
1182 
1183 static void
1184 smb_lock_dumplist(smb_llist_t *llist)
1185 {
1186 	smb_lock_t *lock;
1187 
1188 	for (lock = smb_llist_head(llist);
1189 	    lock != NULL;
1190 	    lock = smb_llist_next(llist, lock)) {
1191 		smb_lock_dump1(lock);
1192 	}
1193 }
1194 
1195 static void
1196 smb_lock_dumpnode(smb_node_t *node)
1197 {
1198 	cmn_err(CE_CONT, "Granted Locks on %p (%d)\n",
1199 	    (void *)node, node->n_lock_list.ll_count);
1200 	smb_lock_dumplist(&node->n_lock_list);
1201 
1202 	cmn_err(CE_CONT, "Waiting Locks on %p (%d)\n",
1203 	    (void *)node, node->n_wlock_list.ll_count);
1204 	smb_lock_dumplist(&node->n_wlock_list);
1205 }
1206 
1207 #endif
1208