xref: /illumos-gate/usr/src/uts/common/fs/smbsrv/smb_lock.c (revision c211fc479225fa54805cf480633bf6689ca9a2db)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * This module provides range lock functionality for CIFS/SMB clients.
28  * Lock range service functions process SMB lock and and unlock
29  * requests for a file by applying lock rules and marks file range
30  * as locked if the lock is successful otherwise return proper
31  * error code.
32  */
33 
34 #include <smbsrv/smb_incl.h>
35 #include <smbsrv/smb_fsops.h>
36 #include <sys/nbmlock.h>
37 #include <sys/param.h>
38 
39 extern caller_context_t smb_ct;
40 
41 static void smb_lock_posix_unlock(smb_node_t *, smb_lock_t *, cred_t *);
42 static boolean_t smb_is_range_unlocked(uint64_t, uint64_t, uint32_t,
43     smb_llist_t *, uint64_t *);
44 static int smb_lock_range_overlap(smb_lock_t *, uint64_t, uint64_t);
45 static uint32_t smb_lock_range_lckrules(smb_request_t *, smb_ofile_t *,
46     smb_node_t *, smb_lock_t *, smb_lock_t **);
47 static clock_t smb_lock_wait(smb_request_t *, smb_lock_t *, smb_lock_t *);
48 static uint32_t smb_lock_range_ulckrules(smb_request_t *, smb_node_t *,
49     uint64_t, uint64_t, smb_lock_t **nodelock);
50 static smb_lock_t *smb_lock_create(smb_request_t *, uint64_t, uint64_t,
51     uint32_t, uint32_t);
52 static void smb_lock_destroy(smb_lock_t *);
53 static void smb_lock_free(smb_lock_t *);
54 
55 
56 
57 /*
58  * smb_unlock_range
59  *
60  * locates lock range performed for corresponding to unlock request.
61  *
62  * NT_STATUS_SUCCESS - Lock range performed successfully.
63  * !NT_STATUS_SUCCESS - Error in unlock range operation.
64  */
65 uint32_t
66 smb_unlock_range(
67     smb_request_t	*sr,
68     smb_node_t		*node,
69     uint64_t		start,
70     uint64_t		length)
71 {
72 	smb_lock_t	*lock = NULL;
73 	uint32_t	status;
74 
75 	/* Apply unlocking rules */
76 	smb_llist_enter(&node->n_lock_list, RW_WRITER);
77 	status = smb_lock_range_ulckrules(sr, node, start, length, &lock);
78 	if (status != NT_STATUS_SUCCESS) {
79 		/*
80 		 * If lock range is not matching in the list
81 		 * return error.
82 		 */
83 		ASSERT(lock == NULL);
84 		smb_llist_exit(&node->n_lock_list);
85 		return (status);
86 	}
87 
88 	smb_llist_remove(&node->n_lock_list, lock);
89 	smb_lock_posix_unlock(node, lock, sr->user_cr);
90 	smb_llist_exit(&node->n_lock_list);
91 	smb_lock_destroy(lock);
92 
93 	return (status);
94 }
95 
96 /*
97  * smb_lock_range
98  *
99  * checks for integrity of file lock operation for the given range of file data.
100  * This is performed by applying lock rules with all the elements of the node
101  * lock list.
102  *
103  * The function returns with new lock added if lock request is non-conflicting
104  * with existing range lock for the file. Otherwise smb request is filed
105  * without returning.
106  *
107  * NT_STATUS_SUCCESS - Lock range performed successfully.
108  * !NT_STATUS_SUCCESS - Error in lock range operation.
109  */
110 uint32_t
111 smb_lock_range(
112     smb_request_t	*sr,
113     uint64_t		start,
114     uint64_t		length,
115     uint32_t		timeout,
116     uint32_t		locktype)
117 {
118 	smb_ofile_t	*file = sr->fid_ofile;
119 	smb_node_t	*node = file->f_node;
120 	smb_lock_t	*lock;
121 	smb_lock_t	*clock = NULL;
122 	uint32_t	result = NT_STATUS_SUCCESS;
123 	boolean_t	lock_has_timeout = (timeout != 0);
124 
125 	lock = smb_lock_create(sr, start, length, locktype, timeout);
126 
127 	smb_llist_enter(&node->n_lock_list, RW_WRITER);
128 	for (;;) {
129 		clock_t	rc;
130 
131 		/* Apply locking rules */
132 		result = smb_lock_range_lckrules(sr, file, node, lock, &clock);
133 
134 		if ((result == NT_STATUS_CANCELLED) ||
135 		    (result == NT_STATUS_SUCCESS) ||
136 		    (result == NT_STATUS_RANGE_NOT_LOCKED)) {
137 			ASSERT(clock == NULL);
138 			break;
139 		} else if (timeout == 0) {
140 			break;
141 		}
142 
143 		ASSERT(result == NT_STATUS_LOCK_NOT_GRANTED);
144 		ASSERT(clock);
145 		/*
146 		 * Call smb_lock_wait holding write lock for
147 		 * node lock list.  smb_lock_wait will release
148 		 * this lock if it blocks.
149 		 */
150 		ASSERT(node == clock->l_file->f_node);
151 
152 		rc = smb_lock_wait(sr, lock, clock);
153 		if (rc == 0) {
154 			result = NT_STATUS_CANCELLED;
155 			break;
156 		}
157 		if (rc == -1)
158 			timeout = 0;
159 
160 		clock = NULL;
161 	}
162 
163 	lock->l_blocked_by = NULL;
164 
165 	if (result != NT_STATUS_SUCCESS) {
166 		/*
167 		 * Under certain conditions NT_STATUS_FILE_LOCK_CONFLICT
168 		 * should be returned instead of NT_STATUS_LOCK_NOT_GRANTED.
169 		 */
170 		if (result == NT_STATUS_LOCK_NOT_GRANTED) {
171 			/*
172 			 * Locks with timeouts always return
173 			 * NT_STATUS_FILE_LOCK_CONFLICT
174 			 */
175 			if (lock_has_timeout)
176 				result = NT_STATUS_FILE_LOCK_CONFLICT;
177 
178 			/*
179 			 * Locks starting higher than 0xef000000 that do not
180 			 * have the MSB set always return
181 			 * NT_STATUS_FILE_LOCK_CONFLICT
182 			 */
183 			if ((lock->l_start >= 0xef000000) &&
184 			    !(lock->l_start & (1ULL << 63))) {
185 				result = NT_STATUS_FILE_LOCK_CONFLICT;
186 			}
187 
188 			/*
189 			 * If the last lock attempt to fail on this file handle
190 			 * started at the same offset as this one then return
191 			 * NT_STATUS_FILE_LOCK_CONFLICT
192 			 */
193 			mutex_enter(&file->f_mutex);
194 			if ((file->f_flags & SMB_OFLAGS_LLF_POS_VALID) &&
195 			    (lock->l_start == file->f_llf_pos)) {
196 				result = NT_STATUS_FILE_LOCK_CONFLICT;
197 			}
198 			mutex_exit(&file->f_mutex);
199 		}
200 
201 		/* Update last lock failed offset */
202 		mutex_enter(&file->f_mutex);
203 		file->f_llf_pos = lock->l_start;
204 		file->f_flags |= SMB_OFLAGS_LLF_POS_VALID;
205 		mutex_exit(&file->f_mutex);
206 
207 		smb_lock_free(lock);
208 	} else {
209 		/*
210 		 * don't insert into the CIFS lock list unless the
211 		 * posix lock worked
212 		 */
213 		if (smb_fsop_frlock(node, lock, B_FALSE, sr->user_cr))
214 			result = NT_STATUS_FILE_LOCK_CONFLICT;
215 		else
216 			smb_llist_insert_tail(&node->n_lock_list, lock);
217 	}
218 	smb_llist_exit(&node->n_lock_list);
219 
220 	return (result);
221 }
222 
223 
224 /*
225  * smb_lock_range_access
226  *
227  * scans node lock list
228  * to check if there is any overlapping lock. Overlapping
229  * lock is allowed only under same session and client pid.
230  *
231  * Return values
232  *	NT_STATUS_SUCCESS		lock access granted.
233  *	NT_STATUS_FILE_LOCK_CONFLICT 	access denied due to lock conflict.
234  */
235 int
236 smb_lock_range_access(
237     smb_request_t	*sr,
238     smb_node_t		*node,
239     uint64_t		start,
240     uint64_t		length,
241     boolean_t		will_write)
242 {
243 	smb_lock_t	*lock;
244 	smb_llist_t	*llist;
245 	int		status = NT_STATUS_SUCCESS;
246 
247 	llist = &node->n_lock_list;
248 	smb_llist_enter(llist, RW_READER);
249 	/* Search for any applicable lock */
250 	for (lock = smb_llist_head(llist);
251 	    lock != NULL;
252 	    lock = smb_llist_next(llist, lock)) {
253 
254 		if (!smb_lock_range_overlap(lock, start, length))
255 			/* Lock does not overlap */
256 			continue;
257 
258 		if (lock->l_type == SMB_LOCK_TYPE_READONLY && !will_write)
259 			continue;
260 
261 		if (lock->l_type == SMB_LOCK_TYPE_READWRITE &&
262 		    lock->l_session_kid == sr->session->s_kid &&
263 		    lock->l_pid == sr->smb_pid)
264 			continue;
265 
266 		status = NT_STATUS_FILE_LOCK_CONFLICT;
267 		break;
268 	}
269 	smb_llist_exit(llist);
270 	return (status);
271 }
272 
273 void
274 smb_node_destroy_lock_by_ofile(smb_node_t *node, smb_ofile_t *file)
275 {
276 	smb_lock_t	*lock;
277 	smb_lock_t	*nxtl;
278 	list_t		destroy_list;
279 
280 	SMB_NODE_VALID(node);
281 	ASSERT(node->n_refcnt);
282 
283 	/*
284 	 * Move locks matching the specified file from the node->n_lock_list
285 	 * to a temporary list (holding the lock the entire time) then
286 	 * destroy all the matching locks.  We can't call smb_lock_destroy
287 	 * while we are holding the lock for node->n_lock_list because we will
288 	 * deadlock and we can't drop the lock because the list contents might
289 	 * change (for example nxtl might get removed on another thread).
290 	 */
291 	list_create(&destroy_list, sizeof (smb_lock_t),
292 	    offsetof(smb_lock_t, l_lnd));
293 
294 	smb_llist_enter(&node->n_lock_list, RW_WRITER);
295 	lock = smb_llist_head(&node->n_lock_list);
296 	while (lock) {
297 		nxtl = smb_llist_next(&node->n_lock_list, lock);
298 		if (lock->l_file == file) {
299 			smb_llist_remove(&node->n_lock_list, lock);
300 			smb_lock_posix_unlock(node, lock, file->f_user->u_cred);
301 			list_insert_tail(&destroy_list, lock);
302 		}
303 		lock = nxtl;
304 	}
305 	smb_llist_exit(&node->n_lock_list);
306 
307 	lock = list_head(&destroy_list);
308 	while (lock) {
309 		nxtl = list_next(&destroy_list, lock);
310 		list_remove(&destroy_list, lock);
311 		smb_lock_destroy(lock);
312 		lock = nxtl;
313 	}
314 
315 	list_destroy(&destroy_list);
316 }
317 
318 void
319 smb_lock_range_error(smb_request_t *sr, uint32_t status32)
320 {
321 	uint16_t errcode;
322 
323 	if (status32 == NT_STATUS_CANCELLED)
324 		errcode = ERROR_OPERATION_ABORTED;
325 	else
326 		errcode = ERRlock;
327 
328 	smbsr_error(sr, status32, ERRDOS, errcode);
329 }
330 
331 /*
332  * smb_range_check()
333  *
334  * Perform range checking.  First check for internal CIFS range conflicts
335  * and then check for external conflicts, for example, with NFS or local
336  * access.
337  *
338  * If nbmand is enabled, this function must be called from within an nbmand
339  * critical region
340  */
341 
342 DWORD
343 smb_range_check(smb_request_t *sr, smb_node_t *node, uint64_t start,
344     uint64_t length, boolean_t will_write)
345 {
346 	smb_error_t smberr;
347 	int svmand;
348 	int nbl_op;
349 	int rc;
350 
351 	SMB_NODE_VALID(node);
352 
353 	ASSERT(smb_node_in_crit(node));
354 
355 	if (node->attr.sa_vattr.va_type == VDIR)
356 		return (NT_STATUS_SUCCESS);
357 
358 	rc = smb_lock_range_access(sr, node, start, length, will_write);
359 	if (rc)
360 		return (NT_STATUS_FILE_LOCK_CONFLICT);
361 
362 	if ((rc = nbl_svmand(node->vp, kcred, &svmand)) != 0) {
363 		smbsr_map_errno(rc, &smberr);
364 		return (smberr.status);
365 	}
366 
367 	nbl_op = (will_write) ? NBL_WRITE : NBL_READ;
368 
369 	if (nbl_lock_conflict(node->vp, nbl_op, start, length, svmand, &smb_ct))
370 		return (NT_STATUS_FILE_LOCK_CONFLICT);
371 
372 	return (NT_STATUS_SUCCESS);
373 }
374 
375 /*
376  * smb_lock_posix_unlock
377  *
378  * checks if the current unlock request is in another lock and repeatedly calls
379  * smb_is_range_unlocked on a sliding basis to unlock all bits of the lock
380  * that are not in other locks
381  *
382  */
383 static void
384 smb_lock_posix_unlock(smb_node_t *node, smb_lock_t *lock, cred_t *cr)
385 {
386 	uint64_t	new_mark;
387 	uint64_t	unlock_start;
388 	uint64_t	unlock_end;
389 	smb_lock_t	new_unlock;
390 	smb_llist_t	*llist;
391 	boolean_t	can_unlock;
392 
393 	new_mark = 0;
394 	unlock_start = lock->l_start;
395 	unlock_end = unlock_start + lock->l_length;
396 	llist = &node->n_lock_list;
397 
398 	for (;;) {
399 		can_unlock = smb_is_range_unlocked(unlock_start, unlock_end,
400 		    lock->l_file->f_uniqid, llist, &new_mark);
401 		if (can_unlock) {
402 			if (new_mark) {
403 				new_unlock = *lock;
404 				new_unlock.l_start = unlock_start;
405 				new_unlock.l_length = new_mark - unlock_start;
406 				(void) smb_fsop_frlock(node, &new_unlock,
407 				    B_TRUE, cr);
408 				unlock_start = new_mark;
409 			} else {
410 				new_unlock = *lock;
411 				new_unlock.l_start = unlock_start;
412 				new_unlock.l_length = unlock_end - unlock_start;
413 				(void) smb_fsop_frlock(node, &new_unlock,
414 				    B_TRUE, cr);
415 				break;
416 			}
417 		} else if (new_mark) {
418 			unlock_start = new_mark;
419 		} else {
420 			break;
421 		}
422 	}
423 }
424 
425 /*
426  * smb_lock_range_overlap
427  *
428  * Checks if lock range(start, length) overlaps range in lock structure.
429  *
430  * Zero-length byte range locks actually affect no single byte of the stream,
431  * meaning they can still be accessed even with such locks in place. However,
432  * they do conflict with other ranges in the following manner:
433  *  conflict will only exist if the positive-length range contains the
434  *  zero-length range's offset but doesn't start at it
435  *
436  * return values:
437  *	0 - Lock range doesn't overlap
438  *	1 - Lock range overlaps.
439  */
440 
441 #define	RANGE_NO_OVERLAP	0
442 #define	RANGE_OVERLAP		1
443 
444 static int
445 smb_lock_range_overlap(struct smb_lock *lock, uint64_t start, uint64_t length)
446 {
447 	if (length == 0) {
448 		if ((lock->l_start < start) &&
449 		    ((lock->l_start + lock->l_length) > start))
450 			return (RANGE_OVERLAP);
451 
452 		return (RANGE_NO_OVERLAP);
453 	}
454 
455 	/* The following test is intended to catch roll over locks. */
456 	if ((start == lock->l_start) && (length == lock->l_length))
457 		return (RANGE_OVERLAP);
458 
459 	if (start < lock->l_start) {
460 		if (start + length > lock->l_start)
461 			return (RANGE_OVERLAP);
462 	} else if (start < lock->l_start + lock->l_length)
463 		return (RANGE_OVERLAP);
464 
465 	return (RANGE_NO_OVERLAP);
466 }
467 
468 /*
469  * smb_lock_range_lckrules
470  *
471  * Lock range rules:
472  *	1. Overlapping read locks are allowed if the
473  *	   current locks in the region are only read locks
474  *	   irrespective of pid of smb client issuing lock request.
475  *
476  *	2. Read lock in the overlapped region of write lock
477  *	   are allowed if the pervious lock is performed by the
478  *	   same pid and connection.
479  *
480  * return status:
481  *	NT_STATUS_SUCCESS - Input lock range adapts to lock rules.
482  *	NT_STATUS_LOCK_NOT_GRANTED - Input lock conflicts lock rules.
483  *	NT_STATUS_CANCELLED - Error in processing lock rules
484  */
485 static uint32_t
486 smb_lock_range_lckrules(
487     smb_request_t	*sr,
488     smb_ofile_t		*file,
489     smb_node_t		*node,
490     smb_lock_t		*dlock,
491     smb_lock_t		**clockp)
492 {
493 	smb_lock_t	*lock;
494 	uint32_t	status = NT_STATUS_SUCCESS;
495 
496 	/* Check if file is closed */
497 	if (!smb_ofile_is_open(file)) {
498 		return (NT_STATUS_RANGE_NOT_LOCKED);
499 	}
500 
501 	/* Caller must hold lock for node->n_lock_list */
502 	for (lock = smb_llist_head(&node->n_lock_list);
503 	    lock != NULL;
504 	    lock = smb_llist_next(&node->n_lock_list, lock)) {
505 
506 		if (!smb_lock_range_overlap(lock, dlock->l_start,
507 		    dlock->l_length))
508 			continue;
509 
510 		/*
511 		 * Check to see if lock in the overlapping record
512 		 * is only read lock. Current finding is read
513 		 * locks can overlapped irrespective of pids.
514 		 */
515 		if ((lock->l_type == SMB_LOCK_TYPE_READONLY) &&
516 		    (dlock->l_type == SMB_LOCK_TYPE_READONLY)) {
517 			continue;
518 		}
519 
520 		/*
521 		 * When the read lock overlaps write lock, check if
522 		 * allowed.
523 		 */
524 		if ((dlock->l_type == SMB_LOCK_TYPE_READONLY) &&
525 		    !(lock->l_type == SMB_LOCK_TYPE_READONLY)) {
526 			if (lock->l_file == sr->fid_ofile &&
527 			    lock->l_session_kid == sr->session->s_kid &&
528 			    lock->l_pid == sr->smb_pid &&
529 			    lock->l_uid == sr->smb_uid) {
530 				continue;
531 			}
532 		}
533 
534 		/* Conflict in overlapping lock element */
535 		*clockp = lock;
536 		status = NT_STATUS_LOCK_NOT_GRANTED;
537 		break;
538 	}
539 
540 	return (status);
541 }
542 
543 /*
544  * smb_lock_wait
545  *
546  * Wait operation for smb overlapping lock to be released.  Caller must hold
547  * write lock for node->n_lock_list so that the set of active locks can't
548  * change unexpectedly.  The lock for node->n_lock_list  will be released
549  * within this function during the sleep after the lock dependency has
550  * been recorded.
551  *
552  * return value
553  *
554  *	0	The request was canceled.
555  *	-1	The timeout was reached.
556  *	>0	Condition met.
557  */
558 static clock_t
559 smb_lock_wait(smb_request_t *sr, smb_lock_t *b_lock, smb_lock_t *c_lock)
560 {
561 	clock_t		rc;
562 
563 	ASSERT(sr->sr_awaiting == NULL);
564 
565 	mutex_enter(&sr->sr_mutex);
566 
567 	switch (sr->sr_state) {
568 	case SMB_REQ_STATE_ACTIVE:
569 		/*
570 		 * Wait up till the timeout time keeping track of actual
571 		 * time waited for possible retry failure.
572 		 */
573 		sr->sr_state = SMB_REQ_STATE_WAITING_LOCK;
574 		sr->sr_awaiting = c_lock;
575 		mutex_exit(&sr->sr_mutex);
576 
577 		mutex_enter(&c_lock->l_mutex);
578 		/*
579 		 * The conflict list (l_conflict_list) for a lock contains
580 		 * all the locks that are blocked by and in conflict with
581 		 * that lock.  Add the new lock to the conflict list for the
582 		 * active lock.
583 		 *
584 		 * l_conflict_list is currently a fancy way of representing
585 		 * the references/dependencies on a lock.  It could be
586 		 * replaced with a reference count but this approach
587 		 * has the advantage that MDB can display the lock
588 		 * dependencies at any point in time.  In the future
589 		 * we should be able to leverage the list to implement
590 		 * an asynchronous locking model.
591 		 *
592 		 * l_blocked_by is the reverse of the conflict list.  It
593 		 * points to the lock that the new lock conflicts with.
594 		 * As currently implemented this value is purely for
595 		 * debug purposes -- there are windows of time when
596 		 * l_blocked_by may be non-NULL even though there is no
597 		 * conflict list
598 		 */
599 		b_lock->l_blocked_by = c_lock;
600 		smb_slist_insert_tail(&c_lock->l_conflict_list, b_lock);
601 		smb_llist_exit(&c_lock->l_file->f_node->n_lock_list);
602 
603 		/*
604 		 * XXX Hack.. drop s_lock to avoid blocking subsequent SMBs
605 		 * that might affect the state of this lock (i.e.
606 		 * smb_com_close).  We shouldn't sleep while holding
607 		 * locks anyway.
608 		 */
609 		smb_rwx_rwexit(&sr->session->s_lock);
610 
611 		if (SMB_LOCK_INDEFINITE_WAIT(b_lock)) {
612 			cv_wait(&c_lock->l_cv, &c_lock->l_mutex);
613 		} else {
614 			rc = cv_timedwait(&c_lock->l_cv,
615 			    &c_lock->l_mutex, b_lock->l_end_time);
616 		}
617 
618 		/*
619 		 * XXX Hack continued from above... re-acquire s_lock
620 		 * OK to hardcode RW_READER since this is just a hack and
621 		 * we really should yank it out and do something else.
622 		 */
623 		smb_rwx_rwenter(&sr->session->s_lock, RW_READER);
624 
625 		mutex_exit(&c_lock->l_mutex);
626 
627 		smb_llist_enter(&c_lock->l_file->f_node->n_lock_list,
628 		    RW_WRITER);
629 		smb_slist_remove(&c_lock->l_conflict_list, b_lock);
630 
631 		mutex_enter(&sr->sr_mutex);
632 		sr->sr_awaiting = NULL;
633 		if (sr->sr_state == SMB_REQ_STATE_CANCELED) {
634 			rc = 0;
635 		} else {
636 			sr->sr_state = SMB_REQ_STATE_ACTIVE;
637 		}
638 		break;
639 
640 	default:
641 		ASSERT(sr->sr_state == SMB_REQ_STATE_CANCELED);
642 		rc = 0;
643 		break;
644 	}
645 	mutex_exit(&sr->sr_mutex);
646 
647 	return (rc);
648 }
649 
650 /*
651  * smb_lock_range_ulckrules
652  *
653  *	1. Unlock should be performed at exactly matching ends.
654  *	   This has been changed because overlapping ends is
655  *	   allowed and there is no other precise way of locating
656  *	   lock entity in node lock list.
657  *
658  *	2. Unlock is failed if there is no corresponding lock exists.
659  *
660  * Return values
661  *
662  *	NT_STATUS_SUCCESS		Unlock request matches lock record
663  *					pointed by 'nodelock' lock structure.
664  *
665  *	NT_STATUS_RANGE_NOT_LOCKED	Unlock request doen't match any
666  *					of lock record in node lock request or
667  *					error in unlock range processing.
668  */
669 static uint32_t
670 smb_lock_range_ulckrules(
671     smb_request_t	*sr,
672     smb_node_t		*node,
673     uint64_t		start,
674     uint64_t		length,
675     smb_lock_t		**nodelock)
676 {
677 	smb_lock_t	*lock;
678 	uint32_t	status = NT_STATUS_RANGE_NOT_LOCKED;
679 
680 	/* Caller must hold lock for node->n_lock_list */
681 	for (lock = smb_llist_head(&node->n_lock_list);
682 	    lock != NULL;
683 	    lock = smb_llist_next(&node->n_lock_list, lock)) {
684 
685 		if ((start == lock->l_start) &&
686 		    (length == lock->l_length) &&
687 		    lock->l_file == sr->fid_ofile &&
688 		    lock->l_session_kid == sr->session->s_kid &&
689 		    lock->l_pid == sr->smb_pid &&
690 		    lock->l_uid == sr->smb_uid) {
691 			*nodelock = lock;
692 			status = NT_STATUS_SUCCESS;
693 			break;
694 		}
695 	}
696 
697 	return (status);
698 }
699 
700 static smb_lock_t *
701 smb_lock_create(
702     smb_request_t *sr,
703     uint64_t start,
704     uint64_t length,
705     uint32_t locktype,
706     uint32_t timeout)
707 {
708 	smb_lock_t *lock;
709 
710 	ASSERT(locktype == SMB_LOCK_TYPE_READWRITE ||
711 	    locktype == SMB_LOCK_TYPE_READONLY);
712 
713 	lock = kmem_zalloc(sizeof (smb_lock_t), KM_SLEEP);
714 	lock->l_magic = SMB_LOCK_MAGIC;
715 	lock->l_sr = sr; /* Invalid after lock is active */
716 	lock->l_session_kid = sr->session->s_kid;
717 	lock->l_session = sr->session;
718 	lock->l_file = sr->fid_ofile;
719 	lock->l_uid = sr->smb_uid;
720 	lock->l_pid = sr->smb_pid;
721 	lock->l_type = locktype;
722 	lock->l_start = start;
723 	lock->l_length = length;
724 	/*
725 	 * Calculate the absolute end time so that we can use it
726 	 * in cv_timedwait.
727 	 */
728 	lock->l_end_time = lbolt + MSEC_TO_TICK(timeout);
729 	if (timeout == UINT_MAX)
730 		lock->l_flags |= SMB_LOCK_FLAG_INDEFINITE;
731 
732 	mutex_init(&lock->l_mutex, NULL, MUTEX_DEFAULT, NULL);
733 	cv_init(&lock->l_cv, NULL, CV_DEFAULT, NULL);
734 	smb_slist_constructor(&lock->l_conflict_list, sizeof (smb_lock_t),
735 	    offsetof(smb_lock_t, l_conflict_lnd));
736 
737 	return (lock);
738 }
739 
740 static void
741 smb_lock_free(smb_lock_t *lock)
742 {
743 	smb_slist_destructor(&lock->l_conflict_list);
744 	cv_destroy(&lock->l_cv);
745 	mutex_destroy(&lock->l_mutex);
746 
747 	kmem_free(lock, sizeof (smb_lock_t));
748 }
749 
750 /*
751  * smb_lock_destroy
752  *
753  * Caller must hold node->n_lock_list
754  */
755 static void
756 smb_lock_destroy(smb_lock_t *lock)
757 {
758 	/*
759 	 * Caller must hold node->n_lock_list lock.
760 	 */
761 	mutex_enter(&lock->l_mutex);
762 	cv_broadcast(&lock->l_cv);
763 	mutex_exit(&lock->l_mutex);
764 
765 	/*
766 	 * The cv_broadcast above should wake up any locks that previous
767 	 * had conflicts with this lock.  Wait for the locking threads
768 	 * to remove their references to this lock.
769 	 */
770 	smb_slist_wait_for_empty(&lock->l_conflict_list);
771 
772 	smb_lock_free(lock);
773 }
774 
775 /*
776  * smb_is_range_unlocked
777  *
778  * Checks if the current unlock byte range request overlaps another lock
779  * This function is used to determine where POSIX unlocks should be
780  * applied.
781  *
782  * The return code and the value of new_mark must be interpreted as
783  * follows:
784  *
785  * B_TRUE and (new_mark == 0):
786  *   This is the last or only lock left to be unlocked
787  *
788  * B_TRUE and (new_mark > 0):
789  *   The range from start to new_mark can be unlocked
790  *
791  * B_FALSE and (new_mark == 0):
792  *   The unlock can't be performed and we are done
793  *
794  * B_FALSE and (new_mark > 0),
795  *   The range from start to new_mark can't be unlocked
796  *   Start should be reset to new_mark for the next pass
797  */
798 
799 static boolean_t
800 smb_is_range_unlocked(uint64_t start, uint64_t end, uint32_t uniqid,
801     smb_llist_t *llist_head, uint64_t *new_mark)
802 {
803 	struct smb_lock *lk = NULL;
804 	uint64_t low_water_mark = MAXOFFSET_T;
805 	uint64_t lk_start;
806 	uint64_t lk_end;
807 
808 	*new_mark = 0;
809 	lk = smb_llist_head(llist_head);
810 	while (lk) {
811 		if (lk->l_length == 0) {
812 			lk = smb_llist_next(llist_head, lk);
813 			continue;
814 		}
815 
816 		if (lk->l_file->f_uniqid != uniqid) {
817 			lk = smb_llist_next(llist_head, lk);
818 			continue;
819 		}
820 
821 		lk_end = lk->l_start + lk->l_length - 1;
822 		lk_start = lk->l_start;
823 
824 		/*
825 		 * there is no overlap for the first 2 cases
826 		 * check next node
827 		 */
828 		if (lk_end < start) {
829 			lk = smb_llist_next(llist_head, lk);
830 			continue;
831 		}
832 		if (lk_start > end) {
833 			lk = smb_llist_next(llist_head, lk);
834 			continue;
835 		}
836 
837 		/* this range is completely locked */
838 		if ((lk_start <= start) && (lk_end >= end)) {
839 			return (B_FALSE);
840 		}
841 
842 		/* the first part of this range is locked */
843 		if ((start >= lk_start) && (start <= lk_end)) {
844 			if (end > lk_end)
845 				*new_mark = lk_end + 1;
846 			return (B_FALSE);
847 		}
848 
849 		/* this piece is unlocked */
850 		if ((lk_start >= start) && (lk_start <= end)) {
851 			if (low_water_mark > lk_start)
852 				low_water_mark  = lk_start;
853 		}
854 
855 		lk = smb_llist_next(llist_head, lk);
856 	}
857 
858 	if (low_water_mark != MAXOFFSET_T) {
859 		*new_mark = low_water_mark;
860 		return (B_TRUE);
861 	}
862 	/* the range is completely unlocked */
863 	return (B_TRUE);
864 }
865