xref: /illumos-gate/usr/src/uts/common/fs/smbsrv/smb_lock.c (revision 9b9d39d2a32ff806d2431dbcc50968ef1e6d46b2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
24  */
25 
26 /*
27  * This module provides range lock functionality for CIFS/SMB clients.
28  * Lock range service functions process SMB lock and and unlock
29  * requests for a file by applying lock rules and marks file range
30  * as locked if the lock is successful otherwise return proper
31  * error code.
32  */
33 
34 #include <smbsrv/smb_kproto.h>
35 #include <smbsrv/smb_fsops.h>
36 #include <sys/nbmlock.h>
37 #include <sys/param.h>
38 
39 extern caller_context_t smb_ct;
40 
41 #ifdef	DEBUG
42 int smb_lock_debug = 0;
43 static void smb_lock_dump1(smb_lock_t *);
44 static void smb_lock_dumplist(smb_llist_t *);
45 static void smb_lock_dumpnode(smb_node_t *);
46 #endif
47 
48 static void smb_lock_posix_unlock(smb_node_t *, smb_lock_t *, cred_t *);
49 static boolean_t smb_is_range_unlocked(uint64_t, uint64_t, uint32_t,
50     smb_llist_t *, uint64_t *);
51 static int smb_lock_range_overlap(smb_lock_t *, uint64_t, uint64_t);
52 static uint32_t smb_lock_range_lckrules(smb_ofile_t *, smb_lock_t *,
53     smb_lock_t **);
54 static uint32_t smb_lock_wait(smb_request_t *, smb_lock_t *, smb_lock_t *);
55 static uint32_t smb_lock_range_ulckrules(smb_ofile_t *,
56     uint64_t, uint64_t, uint32_t, smb_lock_t **);
57 static smb_lock_t *smb_lock_create(smb_request_t *, uint64_t, uint64_t,
58     uint32_t, uint32_t, uint32_t);
59 static void smb_lock_destroy(smb_lock_t *);
60 static void smb_lock_free(smb_lock_t *);
61 
62 /*
63  * Return the number of range locks on the specified ofile.
64  */
65 uint32_t
66 smb_lock_get_lock_count(smb_node_t *node, smb_ofile_t *of)
67 {
68 	smb_lock_t	*lock;
69 	smb_llist_t	*llist;
70 	uint32_t	count = 0;
71 
72 	SMB_NODE_VALID(node);
73 	SMB_OFILE_VALID(of);
74 
75 	llist = &node->n_lock_list;
76 
77 	smb_llist_enter(llist, RW_READER);
78 	for (lock = smb_llist_head(llist);
79 	    lock != NULL;
80 	    lock = smb_llist_next(llist, lock)) {
81 		if (lock->l_file == of)
82 			++count;
83 	}
84 	smb_llist_exit(llist);
85 
86 	return (count);
87 }
88 
89 /*
90  * smb_unlock_range
91  *
92  * locates lock range performed for corresponding to unlock request.
93  *
94  * NT_STATUS_SUCCESS - Lock range performed successfully.
95  * !NT_STATUS_SUCCESS - Error in unlock range operation.
96  */
97 uint32_t
98 smb_unlock_range(
99     smb_request_t	*sr,
100     uint64_t		start,
101     uint64_t		length,
102     uint32_t		pid)
103 {
104 	smb_ofile_t	*file = sr->fid_ofile;
105 	smb_node_t	*node = file->f_node;
106 	smb_lock_t	*lock = NULL;
107 	uint32_t	status;
108 
109 	if (length > 1 &&
110 	    (start + length) < start)
111 		return (NT_STATUS_INVALID_LOCK_RANGE);
112 
113 #ifdef	DEBUG
114 	if (smb_lock_debug) {
115 		cmn_err(CE_CONT, "smb_unlock_range "
116 		    "off=0x%llx, len=0x%llx, f=%p, pid=%d\n",
117 		    (long long)start, (long long)length,
118 		    (void *)sr->fid_ofile, pid);
119 	}
120 #endif
121 
122 	/* Apply unlocking rules */
123 	smb_llist_enter(&node->n_lock_list, RW_WRITER);
124 	status = smb_lock_range_ulckrules(file, start, length, pid, &lock);
125 	if (status != NT_STATUS_SUCCESS) {
126 		/*
127 		 * If lock range is not matching in the list
128 		 * return error.
129 		 */
130 		ASSERT(lock == NULL);
131 	}
132 	if (lock != NULL) {
133 		smb_llist_remove(&node->n_lock_list, lock);
134 		smb_lock_posix_unlock(node, lock, sr->user_cr);
135 	}
136 
137 #ifdef	DEBUG
138 	if (smb_lock_debug && lock == NULL) {
139 		cmn_err(CE_CONT, "unlock failed, 0x%x\n", status);
140 		smb_lock_dumpnode(node);
141 	}
142 #endif
143 
144 	smb_llist_exit(&node->n_lock_list);
145 
146 	if (lock != NULL)
147 		smb_lock_destroy(lock);
148 
149 	return (status);
150 }
151 
152 /*
153  * smb_lock_range
154  *
155  * Checks for integrity of file lock operation for the given range of file data.
156  * This is performed by applying lock rules with all the elements of the node
157  * lock list.
158  *
159  * Break shared (levelII) oplocks. If there is an exclusive oplock, it is
160  * owned by this ofile and therefore should not be broken.
161  *
162  * The function returns with new lock added if lock request is non-conflicting
163  * with existing range lock for the file. Otherwise smb request is filed
164  * without returning.
165  *
166  * NT_STATUS_SUCCESS - Lock range performed successfully.
167  * !NT_STATUS_SUCCESS - Error in lock range operation.
168  */
169 uint32_t
170 smb_lock_range(
171     smb_request_t	*sr,
172     uint64_t		start,
173     uint64_t		length,
174     uint32_t		pid,
175     uint32_t		locktype,
176     uint32_t		timeout)
177 {
178 	smb_ofile_t	*file = sr->fid_ofile;
179 	smb_node_t	*node = file->f_node;
180 	smb_lock_t	*lock;
181 	smb_lock_t	*conflict = NULL;
182 	uint32_t	result;
183 	int		rc;
184 	boolean_t	lock_has_timeout =
185 	    (timeout != 0 && timeout != UINT_MAX);
186 
187 	if (length > 1 &&
188 	    (start + length) < start)
189 		return (NT_STATUS_INVALID_LOCK_RANGE);
190 
191 #ifdef	DEBUG
192 	if (smb_lock_debug) {
193 		cmn_err(CE_CONT, "smb_lock_range "
194 		    "off=0x%llx, len=0x%llx, "
195 		    "f=%p, pid=%d, typ=%d, tmo=%d\n",
196 		    (long long)start, (long long)length,
197 		    (void *)sr->fid_ofile, pid, locktype, timeout);
198 	}
199 #endif
200 
201 	lock = smb_lock_create(sr, start, length, pid, locktype, timeout);
202 
203 	smb_llist_enter(&node->n_lock_list, RW_WRITER);
204 	for (;;) {
205 
206 		/* Apply locking rules */
207 		result = smb_lock_range_lckrules(file, lock, &conflict);
208 		switch (result) {
209 		case NT_STATUS_LOCK_NOT_GRANTED: /* conflict! */
210 			/* may need to wait */
211 			break;
212 		case NT_STATUS_SUCCESS:
213 		case NT_STATUS_FILE_CLOSED:
214 			goto break_loop;
215 		default:
216 			cmn_err(CE_CONT, "smb_lock_range1, status 0x%x\n",
217 			    result);
218 			goto break_loop;
219 		}
220 		if (timeout == 0)
221 			goto break_loop;
222 
223 		/*
224 		 * Call smb_lock_wait holding write lock for
225 		 * node lock list.  smb_lock_wait will release
226 		 * the node list lock if it blocks, so after
227 		 * the call, (*conflict) may no longer exist.
228 		 */
229 		result = smb_lock_wait(sr, lock, conflict);
230 		conflict = NULL;
231 		switch (result) {
232 		case NT_STATUS_SUCCESS:
233 			/* conflict gone, try again */
234 			break;
235 		case NT_STATUS_TIMEOUT:
236 			/* try just once more */
237 			timeout = 0;
238 			break;
239 		case NT_STATUS_CANCELLED:
240 		case NT_STATUS_FILE_CLOSED:
241 			goto break_loop;
242 		default:
243 			cmn_err(CE_CONT, "smb_lock_range2, status 0x%x\n",
244 			    result);
245 			goto break_loop;
246 		}
247 	}
248 
249 break_loop:
250 	lock->l_blocked_by = NULL;
251 
252 	if (result != NT_STATUS_SUCCESS) {
253 		if (result == NT_STATUS_FILE_CLOSED)
254 			result = NT_STATUS_RANGE_NOT_LOCKED;
255 
256 		/*
257 		 * Under certain conditions NT_STATUS_FILE_LOCK_CONFLICT
258 		 * should be returned instead of NT_STATUS_LOCK_NOT_GRANTED.
259 		 * All of this appears to be specific to SMB1
260 		 */
261 		if (sr->session->dialect <= NT_LM_0_12 &&
262 		    result == NT_STATUS_LOCK_NOT_GRANTED) {
263 			/*
264 			 * Locks with timeouts always return
265 			 * NT_STATUS_FILE_LOCK_CONFLICT
266 			 */
267 			if (lock_has_timeout)
268 				result = NT_STATUS_FILE_LOCK_CONFLICT;
269 
270 			/*
271 			 * Locks starting higher than 0xef000000 that do not
272 			 * have the MSB set always return
273 			 * NT_STATUS_FILE_LOCK_CONFLICT
274 			 */
275 			if ((lock->l_start >= 0xef000000) &&
276 			    !(lock->l_start & (1ULL << 63))) {
277 				result = NT_STATUS_FILE_LOCK_CONFLICT;
278 			}
279 
280 			/*
281 			 * If the last lock attempt to fail on this file handle
282 			 * started at the same offset as this one then return
283 			 * NT_STATUS_FILE_LOCK_CONFLICT
284 			 */
285 			mutex_enter(&file->f_mutex);
286 			if ((file->f_flags & SMB_OFLAGS_LLF_POS_VALID) &&
287 			    (lock->l_start == file->f_llf_pos)) {
288 				result = NT_STATUS_FILE_LOCK_CONFLICT;
289 			}
290 			mutex_exit(&file->f_mutex);
291 		}
292 
293 		/* Update last lock failed offset */
294 		mutex_enter(&file->f_mutex);
295 		file->f_llf_pos = lock->l_start;
296 		file->f_flags |= SMB_OFLAGS_LLF_POS_VALID;
297 		mutex_exit(&file->f_mutex);
298 
299 		smb_lock_free(lock);
300 	} else {
301 		/*
302 		 * don't insert into the CIFS lock list unless the
303 		 * posix lock worked
304 		 */
305 		rc = smb_fsop_frlock(node, lock, B_FALSE, sr->user_cr);
306 		if (rc != 0) {
307 #ifdef	DEBUG
308 			if (smb_lock_debug)
309 				cmn_err(CE_CONT, "fop_frlock, err=%d\n", rc);
310 #endif
311 			result = NT_STATUS_FILE_LOCK_CONFLICT;
312 		} else {
313 			/*
314 			 * We want unlock to find exclusive locks before
315 			 * shared locks, so insert those at the head.
316 			 */
317 			if (lock->l_type == SMB_LOCK_TYPE_READWRITE)
318 				smb_llist_insert_head(&node->n_lock_list, lock);
319 			else
320 				smb_llist_insert_tail(&node->n_lock_list, lock);
321 		}
322 	}
323 
324 #ifdef	DEBUG
325 	if (smb_lock_debug && result != 0) {
326 		cmn_err(CE_CONT, "lock failed, 0x%x\n", result);
327 		smb_lock_dumpnode(node);
328 	}
329 #endif
330 
331 	smb_llist_exit(&node->n_lock_list);
332 
333 	if (result == NT_STATUS_SUCCESS) {
334 		/* This revokes read cache delegations. */
335 		(void) smb_oplock_break_WRITE(node, file);
336 	}
337 
338 	return (result);
339 }
340 
341 /*
342  * smb_lock_range_access
343  *
344  * scans node lock list
345  * to check if there is any overlapping lock. Overlapping
346  * lock is allowed only under same session and client pid.
347  *
348  * Return values
349  *	NT_STATUS_SUCCESS		lock access granted.
350  *	NT_STATUS_FILE_LOCK_CONFLICT	access denied due to lock conflict.
351  */
352 int
353 smb_lock_range_access(
354     smb_request_t	*sr,
355     smb_node_t		*node,
356     uint64_t		start,
357     uint64_t		length,
358     boolean_t		will_write)
359 {
360 	smb_lock_t	*lock;
361 	smb_llist_t	*llist;
362 	uint32_t	lk_pid = 0;
363 	int		status = NT_STATUS_SUCCESS;
364 
365 	if (length == 0)
366 		return (status);
367 
368 	/*
369 	 * What PID to use for lock conflict checks?
370 	 * SMB2 locking ignores PIDs (have lk_pid=0)
371 	 * SMB1 uses low 16 bits of sr->smb_pid
372 	 */
373 	if (sr->session->dialect < SMB_VERS_2_BASE)
374 		lk_pid = sr->smb_pid & 0xFFFF;
375 
376 	llist = &node->n_lock_list;
377 	smb_llist_enter(llist, RW_READER);
378 	/* Search for any applicable lock */
379 	for (lock = smb_llist_head(llist);
380 	    lock != NULL;
381 	    lock = smb_llist_next(llist, lock)) {
382 
383 		if (!smb_lock_range_overlap(lock, start, length))
384 			/* Lock does not overlap */
385 			continue;
386 
387 		if (lock->l_type == SMB_LOCK_TYPE_READONLY && !will_write)
388 			continue;
389 
390 		if (lock->l_type == SMB_LOCK_TYPE_READWRITE &&
391 		    lock->l_file == sr->fid_ofile &&
392 		    lock->l_pid == lk_pid)
393 			continue;
394 
395 #ifdef	DEBUG
396 		if (smb_lock_debug) {
397 			cmn_err(CE_CONT, "smb_lock_range_access conflict: "
398 			    "off=0x%llx, len=0x%llx, "
399 			    "f=%p, pid=%d, typ=%d\n",
400 			    (long long)lock->l_start,
401 			    (long long)lock->l_length,
402 			    (void *)lock->l_file,
403 			    lock->l_pid, lock->l_type);
404 		}
405 #endif
406 		status = NT_STATUS_FILE_LOCK_CONFLICT;
407 		break;
408 	}
409 	smb_llist_exit(llist);
410 	return (status);
411 }
412 
413 /*
414  * The ofile is being closed.  Wake any waiting locks and
415  * clear any granted locks.
416  */
417 void
418 smb_node_destroy_lock_by_ofile(smb_node_t *node, smb_ofile_t *file)
419 {
420 	cred_t		*kcr = zone_kcred();
421 	smb_lock_t	*lock;
422 	smb_lock_t	*nxtl;
423 	list_t		destroy_list;
424 
425 	SMB_NODE_VALID(node);
426 	ASSERT(node->n_refcnt);
427 
428 	/*
429 	 * Cancel any waiting locks for this ofile
430 	 */
431 	smb_llist_enter(&node->n_wlock_list, RW_READER);
432 	for (lock = smb_llist_head(&node->n_wlock_list);
433 	    lock != NULL;
434 	    lock = smb_llist_next(&node->n_wlock_list, lock)) {
435 
436 		if (lock->l_file == file) {
437 			mutex_enter(&lock->l_mutex);
438 			lock->l_blocked_by = NULL;
439 			lock->l_flags |= SMB_LOCK_FLAG_CLOSED;
440 			cv_broadcast(&lock->l_cv);
441 			mutex_exit(&lock->l_mutex);
442 		}
443 	}
444 	smb_llist_exit(&node->n_wlock_list);
445 
446 	/*
447 	 * Move locks matching the specified file from the node->n_lock_list
448 	 * to a temporary list (holding the lock the entire time) then
449 	 * destroy all the matching locks.  We can't call smb_lock_destroy
450 	 * while we are holding the lock for node->n_lock_list because we will
451 	 * deadlock and we can't drop the lock because the list contents might
452 	 * change (for example nxtl might get removed on another thread).
453 	 */
454 	list_create(&destroy_list, sizeof (smb_lock_t),
455 	    offsetof(smb_lock_t, l_lnd));
456 
457 	smb_llist_enter(&node->n_lock_list, RW_WRITER);
458 	lock = smb_llist_head(&node->n_lock_list);
459 	while (lock) {
460 		nxtl = smb_llist_next(&node->n_lock_list, lock);
461 		if (lock->l_file == file) {
462 			smb_llist_remove(&node->n_lock_list, lock);
463 			smb_lock_posix_unlock(node, lock, kcr);
464 			list_insert_tail(&destroy_list, lock);
465 		}
466 		lock = nxtl;
467 	}
468 	smb_llist_exit(&node->n_lock_list);
469 
470 	lock = list_head(&destroy_list);
471 	while (lock) {
472 		nxtl = list_next(&destroy_list, lock);
473 		list_remove(&destroy_list, lock);
474 		smb_lock_destroy(lock);
475 		lock = nxtl;
476 	}
477 
478 	list_destroy(&destroy_list);
479 }
480 
481 /*
482  * Cause a waiting lock to stop waiting and return an error.
483  * returns same status codes as unlock:
484  * NT_STATUS_SUCCESS, NT_STATUS_RANGE_NOT_LOCKED
485  */
486 uint32_t
487 smb_lock_range_cancel(smb_request_t *sr,
488     uint64_t start, uint64_t length, uint32_t pid)
489 {
490 	smb_node_t *node;
491 	smb_lock_t *lock;
492 	uint32_t status = NT_STATUS_RANGE_NOT_LOCKED;
493 	int cnt = 0;
494 
495 	node = sr->fid_ofile->f_node;
496 
497 	smb_llist_enter(&node->n_wlock_list, RW_READER);
498 
499 #ifdef	DEBUG
500 	if (smb_lock_debug) {
501 		cmn_err(CE_CONT, "smb_lock_range_cancel:\n"
502 		    "\tstart=0x%llx, len=0x%llx, of=%p, pid=%d\n",
503 		    (long long)start, (long long)length,
504 		    (void *)sr->fid_ofile, pid);
505 	}
506 #endif
507 
508 	for (lock = smb_llist_head(&node->n_wlock_list);
509 	    lock != NULL;
510 	    lock = smb_llist_next(&node->n_wlock_list, lock)) {
511 
512 		if ((start == lock->l_start) &&
513 		    (length == lock->l_length) &&
514 		    lock->l_file == sr->fid_ofile &&
515 		    lock->l_pid == pid) {
516 
517 			mutex_enter(&lock->l_mutex);
518 			lock->l_blocked_by = NULL;
519 			lock->l_flags |= SMB_LOCK_FLAG_CANCELLED;
520 			cv_broadcast(&lock->l_cv);
521 			mutex_exit(&lock->l_mutex);
522 			status = NT_STATUS_SUCCESS;
523 			cnt++;
524 		}
525 	}
526 
527 #ifdef	DEBUG
528 	if (smb_lock_debug && cnt != 1) {
529 		cmn_err(CE_CONT, "cancel found %d\n", cnt);
530 		smb_lock_dumpnode(node);
531 	}
532 #endif
533 
534 	smb_llist_exit(&node->n_wlock_list);
535 
536 	return (status);
537 }
538 
539 void
540 smb_lock_range_error(smb_request_t *sr, uint32_t status32)
541 {
542 	uint16_t errcode;
543 
544 	if (status32 == NT_STATUS_CANCELLED) {
545 		status32 = NT_STATUS_FILE_LOCK_CONFLICT;
546 		errcode = ERROR_LOCK_VIOLATION;
547 	} else {
548 		errcode = ERRlock;
549 	}
550 
551 	smbsr_error(sr, status32, ERRDOS, errcode);
552 }
553 
554 /*
555  * An SMB variant of nbl_conflict().
556  *
557  * SMB prevents remove or rename when conflicting locks exist
558  * (unlike NFS, which is why we can't just use nbl_conflict).
559  *
560  * Returns:
561  *	NT_STATUS_SHARING_VIOLATION - nbl_share_conflict
562  *	NT_STATUS_FILE_LOCK_CONFLICT - nbl_lock_conflict
563  *	NT_STATUS_SUCCESS - operation can proceed
564  *
565  * NB: This function used to also check the list of ofiles,
566  * via: smb_lock_range_access() but we _can't_ do that here
567  * due to lock order constraints between node->n_lock_list
568  * and node->vp->vnbllock (taken via nvl_start_crit).
569  * They must be taken in that order, and in here, we
570  * already hold vp->vnbllock.
571  */
572 DWORD
573 smb_nbl_conflict(smb_node_t *node, uint64_t off, uint64_t len, nbl_op_t op)
574 {
575 	int svmand;
576 
577 	SMB_NODE_VALID(node);
578 	ASSERT(smb_node_in_crit(node));
579 	ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE ||
580 	    op == NBL_REMOVE || op == NBL_RENAME);
581 
582 	if (smb_node_is_dir(node))
583 		return (NT_STATUS_SUCCESS);
584 
585 	if (nbl_share_conflict(node->vp, op, &smb_ct))
586 		return (NT_STATUS_SHARING_VIOLATION);
587 
588 	/*
589 	 * When checking for lock conflicts, rename and remove
590 	 * are not allowed, so treat those as read/write.
591 	 */
592 	if (op == NBL_RENAME || op == NBL_REMOVE)
593 		op = NBL_READWRITE;
594 
595 	if (nbl_svmand(node->vp, zone_kcred(), &svmand))
596 		svmand = 1;
597 
598 	if (nbl_lock_conflict(node->vp, op, off, len, svmand, &smb_ct))
599 		return (NT_STATUS_FILE_LOCK_CONFLICT);
600 
601 	return (NT_STATUS_SUCCESS);
602 }
603 
604 /*
605  * smb_lock_posix_unlock
606  *
607  * checks if the current unlock request is in another lock and repeatedly calls
608  * smb_is_range_unlocked on a sliding basis to unlock all bits of the lock
609  * that are not in other locks
610  *
611  */
612 static void
613 smb_lock_posix_unlock(smb_node_t *node, smb_lock_t *lock, cred_t *cr)
614 {
615 	uint64_t	new_mark;
616 	uint64_t	unlock_start;
617 	uint64_t	unlock_end;
618 	smb_lock_t	new_unlock;
619 	smb_llist_t	*llist;
620 	boolean_t	can_unlock;
621 
622 	new_mark = 0;
623 	unlock_start = lock->l_start;
624 	unlock_end = unlock_start + lock->l_length;
625 	llist = &node->n_lock_list;
626 
627 	for (;;) {
628 		can_unlock = smb_is_range_unlocked(unlock_start, unlock_end,
629 		    lock->l_file->f_uniqid, llist, &new_mark);
630 		if (can_unlock) {
631 			if (new_mark) {
632 				new_unlock = *lock;
633 				new_unlock.l_start = unlock_start;
634 				new_unlock.l_length = new_mark - unlock_start;
635 				(void) smb_fsop_frlock(node, &new_unlock,
636 				    B_TRUE, cr);
637 				unlock_start = new_mark;
638 			} else {
639 				new_unlock = *lock;
640 				new_unlock.l_start = unlock_start;
641 				new_unlock.l_length = unlock_end - unlock_start;
642 				(void) smb_fsop_frlock(node, &new_unlock,
643 				    B_TRUE, cr);
644 				break;
645 			}
646 		} else if (new_mark) {
647 			unlock_start = new_mark;
648 		} else {
649 			break;
650 		}
651 	}
652 }
653 
654 /*
655  * smb_lock_range_overlap
656  *
657  * Checks if lock range(start, length) overlaps range in lock structure.
658  *
659  * Zero-length byte range locks actually affect no single byte of the stream,
660  * meaning they can still be accessed even with such locks in place. However,
661  * they do conflict with other ranges in the following manner:
662  *  conflict will only exist if the positive-length range contains the
663  *  zero-length range's offset but doesn't start at it
664  *
665  * return values:
666  *	0 - Lock range doesn't overlap
667  *	1 - Lock range overlaps.
668  */
669 
670 #define	RANGE_NO_OVERLAP	0
671 #define	RANGE_OVERLAP		1
672 
673 static int
674 smb_lock_range_overlap(struct smb_lock *lock, uint64_t start, uint64_t length)
675 {
676 	if (length == 0) {
677 		if ((lock->l_start < start) &&
678 		    ((lock->l_start + lock->l_length) > start))
679 			return (RANGE_OVERLAP);
680 
681 		return (RANGE_NO_OVERLAP);
682 	}
683 
684 	/* The following test is intended to catch roll over locks. */
685 	if ((start == lock->l_start) && (length == lock->l_length))
686 		return (RANGE_OVERLAP);
687 
688 	if (start < lock->l_start) {
689 		if (start + length > lock->l_start)
690 			return (RANGE_OVERLAP);
691 	} else if (start < lock->l_start + lock->l_length)
692 		return (RANGE_OVERLAP);
693 
694 	return (RANGE_NO_OVERLAP);
695 }
696 
697 /*
698  * smb_lock_range_lckrules
699  *
700  * Lock range rules:
701  *	1. Overlapping read locks are allowed if the
702  *	   current locks in the region are only read locks
703  *	   irrespective of pid of smb client issuing lock request.
704  *
705  *	2. Read lock in the overlapped region of write lock
706  *	   are allowed if the previous lock is performed by the
707  *	   same pid and connection.
708  *
709  * return status:
710  *	NT_STATUS_SUCCESS - Input lock range conforms to lock rules.
711  *	NT_STATUS_LOCK_NOT_GRANTED - Input lock conflicts lock rules.
712  *	NT_STATUS_FILE_CLOSED
713  */
714 static uint32_t
715 smb_lock_range_lckrules(
716     smb_ofile_t		*file,
717     smb_lock_t		*dlock,		/* desired lock */
718     smb_lock_t		**conflictp)
719 {
720 	smb_node_t	*node = file->f_node;
721 	smb_lock_t	*lock;
722 	uint32_t	status = NT_STATUS_SUCCESS;
723 
724 	/* Check if file is closed */
725 	if (!smb_ofile_is_open(file)) {
726 		return (NT_STATUS_FILE_CLOSED);
727 	}
728 
729 	/* Caller must hold lock for node->n_lock_list */
730 	for (lock = smb_llist_head(&node->n_lock_list);
731 	    lock != NULL;
732 	    lock = smb_llist_next(&node->n_lock_list, lock)) {
733 
734 		if (!smb_lock_range_overlap(lock, dlock->l_start,
735 		    dlock->l_length))
736 			continue;
737 
738 		/*
739 		 * Check to see if lock in the overlapping record
740 		 * is only read lock. Current finding is read
741 		 * locks can overlapped irrespective of pids.
742 		 */
743 		if ((lock->l_type == SMB_LOCK_TYPE_READONLY) &&
744 		    (dlock->l_type == SMB_LOCK_TYPE_READONLY)) {
745 			continue;
746 		}
747 
748 		/*
749 		 * When the read lock overlaps write lock, check if
750 		 * allowed.
751 		 */
752 		if ((dlock->l_type == SMB_LOCK_TYPE_READONLY) &&
753 		    !(lock->l_type == SMB_LOCK_TYPE_READONLY)) {
754 			if (lock->l_file == dlock->l_file &&
755 			    lock->l_pid == dlock->l_pid) {
756 				continue;
757 			}
758 		}
759 
760 		/* Conflict in overlapping lock element */
761 		*conflictp = lock;
762 		status = NT_STATUS_LOCK_NOT_GRANTED;
763 		break;
764 	}
765 
766 	return (status);
767 }
768 
769 /*
770  * Cancel method for smb_lock_wait()
771  *
772  * This request is waiting on a lock.  Wakeup everything
773  * waiting on the lock so that the relevant thread regains
774  * control and notices that is has been cancelled.  The
775  * other lock request threads waiting on this lock will go
776  * back to sleep when they discover they are still blocked.
777  */
778 static void
779 smb_lock_cancel_sr(smb_request_t *sr)
780 {
781 	smb_lock_t *lock = sr->cancel_arg2;
782 
783 	ASSERT(lock->l_magic == SMB_LOCK_MAGIC);
784 	mutex_enter(&lock->l_mutex);
785 	lock->l_blocked_by = NULL;
786 	lock->l_flags |= SMB_LOCK_FLAG_CANCELLED;
787 	cv_broadcast(&lock->l_cv);
788 	mutex_exit(&lock->l_mutex);
789 }
790 
791 /*
792  * smb_lock_wait
793  *
794  * Wait operation for smb overlapping lock to be released.  Caller must hold
795  * write lock for node->n_lock_list so that the set of active locks can't
796  * change unexpectedly.  The lock for node->n_lock_list  will be released
797  * within this function during the sleep after the lock dependency has
798  * been recorded.
799  *
800  * Returns NT_STATUS_SUCCESS when the lock can be granted,
801  * otherwise NT_STATUS_CANCELLED, etc.
802  */
803 static uint32_t
804 smb_lock_wait(smb_request_t *sr, smb_lock_t *lock, smb_lock_t *conflict)
805 {
806 	smb_node_t	*node;
807 	clock_t		rc;
808 	uint32_t	status = NT_STATUS_SUCCESS;
809 
810 	node = lock->l_file->f_node;
811 	ASSERT(node == conflict->l_file->f_node);
812 
813 	/*
814 	 * Let the blocked lock (lock) l_blocked_by point to the
815 	 * conflicting lock (conflict), and increment a count of
816 	 * conflicts with the latter.  When the conflicting lock
817 	 * is destroyed, we'll search the list of waiting locks
818 	 * (on the node) and wake any with l_blocked_by ==
819 	 * the formerly conflicting lock.
820 	 */
821 	mutex_enter(&lock->l_mutex);
822 	lock->l_blocked_by = conflict;
823 	mutex_exit(&lock->l_mutex);
824 
825 	mutex_enter(&conflict->l_mutex);
826 	conflict->l_conflicts++;
827 	mutex_exit(&conflict->l_mutex);
828 
829 	/*
830 	 * Put the blocked lock on the waiting list.
831 	 */
832 	smb_llist_enter(&node->n_wlock_list, RW_WRITER);
833 	smb_llist_insert_tail(&node->n_wlock_list, lock);
834 	smb_llist_exit(&node->n_wlock_list);
835 
836 #ifdef	DEBUG
837 	if (smb_lock_debug) {
838 		cmn_err(CE_CONT, "smb_lock_wait: lock=%p conflict=%p\n",
839 		    (void *)lock, (void *)conflict);
840 		smb_lock_dumpnode(node);
841 	}
842 #endif
843 
844 	/*
845 	 * We come in with n_lock_list already held, and keep
846 	 * that hold until we're done with conflict (are now).
847 	 * Drop that now, and retake later.  Note that the lock
848 	 * (*conflict) may go away once we exit this list.
849 	 */
850 	smb_llist_exit(&node->n_lock_list);
851 	conflict = NULL;
852 
853 	/*
854 	 * Prepare for cancellable lock wait.
855 	 *
856 	 * If cancelled, smb_lock_cancel_sr sets
857 	 * l_flags |= SMB_LOCK_FLAG_CANCELLED
858 	 */
859 	mutex_enter(&sr->sr_mutex);
860 	if (sr->sr_state == SMB_REQ_STATE_ACTIVE) {
861 		sr->sr_state = SMB_REQ_STATE_WAITING_LOCK;
862 		sr->cancel_method = smb_lock_cancel_sr;
863 		sr->cancel_arg2 = lock;
864 	} else {
865 		status = NT_STATUS_CANCELLED;
866 	}
867 	mutex_exit(&sr->sr_mutex);
868 
869 	/*
870 	 * Now we're ready to actually wait for the conflicting
871 	 * lock to be removed, or for the wait to be ended by
872 	 * an external cancel, or a timeout.
873 	 */
874 	mutex_enter(&lock->l_mutex);
875 	while (status == NT_STATUS_SUCCESS &&
876 	    lock->l_blocked_by != NULL) {
877 		if (lock->l_flags & SMB_LOCK_FLAG_INDEFINITE) {
878 			cv_wait(&lock->l_cv, &lock->l_mutex);
879 		} else {
880 			rc = cv_timedwait(&lock->l_cv,
881 			    &lock->l_mutex, lock->l_end_time);
882 			if (rc < 0)
883 				status = NT_STATUS_TIMEOUT;
884 		}
885 	}
886 	if (status == NT_STATUS_SUCCESS) {
887 		if (lock->l_flags & SMB_LOCK_FLAG_CANCELLED)
888 			status = NT_STATUS_CANCELLED;
889 		if (lock->l_flags & SMB_LOCK_FLAG_CLOSED)
890 			status = NT_STATUS_FILE_CLOSED;
891 	}
892 	mutex_exit(&lock->l_mutex);
893 
894 	/*
895 	 * Did we get the lock or were we cancelled?
896 	 */
897 	mutex_enter(&sr->sr_mutex);
898 switch_state:
899 	switch (sr->sr_state) {
900 	case SMB_REQ_STATE_WAITING_LOCK:
901 		/* Normal wakeup.  Keep status from above. */
902 		sr->sr_state = SMB_REQ_STATE_ACTIVE;
903 		break;
904 	case SMB_REQ_STATE_CANCEL_PENDING:
905 		/* cancel_method running. wait. */
906 		cv_wait(&sr->sr_st_cv, &sr->sr_mutex);
907 		goto switch_state;
908 	case SMB_REQ_STATE_CANCELLED:
909 		/* Call should return an error. */
910 		if (status == NT_STATUS_SUCCESS)
911 			status = NT_STATUS_CANCELLED;
912 		break;
913 	default:
914 		break;
915 	}
916 	sr->cancel_method = NULL;
917 	sr->cancel_arg2 = NULL;
918 	mutex_exit(&sr->sr_mutex);
919 
920 	/* Return to the caller with n_lock_list held. */
921 	smb_llist_enter(&node->n_lock_list, RW_WRITER);
922 
923 	smb_llist_enter(&node->n_wlock_list, RW_WRITER);
924 	smb_llist_remove(&node->n_wlock_list, lock);
925 	smb_llist_exit(&node->n_wlock_list);
926 
927 	return (status);
928 }
929 
930 /*
931  * smb_lock_range_ulckrules
932  *
933  *	1. Unlock should be performed at exactly matching ends.
934  *	   This has been changed because overlapping ends is
935  *	   allowed and there is no other precise way of locating
936  *	   lock entity in node lock list.
937  *
938  *	2. Unlock is failed if there is no corresponding lock exists.
939  *
940  * Return values
941  *
942  *	NT_STATUS_SUCCESS		Unlock request matches lock record
943  *					pointed by 'foundlock' lock structure.
944  *
945  *	NT_STATUS_RANGE_NOT_LOCKED	Unlock request doen't match any
946  *					of lock record in node lock request or
947  *					error in unlock range processing.
948  */
949 static uint32_t
950 smb_lock_range_ulckrules(
951     smb_ofile_t		*file,
952     uint64_t		start,
953     uint64_t		length,
954     uint32_t		pid,
955     smb_lock_t		**foundlock)
956 {
957 	smb_node_t	*node = file->f_node;
958 	smb_lock_t	*lock;
959 	uint32_t	status = NT_STATUS_RANGE_NOT_LOCKED;
960 
961 	/* Caller must hold lock for node->n_lock_list */
962 	for (lock = smb_llist_head(&node->n_lock_list);
963 	    lock != NULL;
964 	    lock = smb_llist_next(&node->n_lock_list, lock)) {
965 
966 		if ((start == lock->l_start) &&
967 		    (length == lock->l_length) &&
968 		    lock->l_file == file &&
969 		    lock->l_pid == pid) {
970 			*foundlock = lock;
971 			status = NT_STATUS_SUCCESS;
972 			break;
973 		}
974 	}
975 
976 	return (status);
977 }
978 
979 static smb_lock_t *
980 smb_lock_create(
981     smb_request_t *sr,
982     uint64_t start,
983     uint64_t length,
984     uint32_t pid,
985     uint32_t locktype,
986     uint32_t timeout)
987 {
988 	smb_lock_t *lock;
989 
990 	ASSERT(locktype == SMB_LOCK_TYPE_READWRITE ||
991 	    locktype == SMB_LOCK_TYPE_READONLY);
992 
993 	lock = kmem_cache_alloc(smb_cache_lock, KM_SLEEP);
994 	bzero(lock, sizeof (*lock));
995 	lock->l_magic = SMB_LOCK_MAGIC;
996 	lock->l_file = sr->fid_ofile;
997 	/* l_file == fid_ofile implies same connection (see ofile lookup) */
998 	lock->l_pid = pid;
999 	lock->l_type = locktype;
1000 	lock->l_start = start;
1001 	lock->l_length = length;
1002 	/*
1003 	 * Calculate the absolute end time so that we can use it
1004 	 * in cv_timedwait.
1005 	 */
1006 	lock->l_end_time = ddi_get_lbolt() + MSEC_TO_TICK(timeout);
1007 	if (timeout == UINT_MAX)
1008 		lock->l_flags |= SMB_LOCK_FLAG_INDEFINITE;
1009 
1010 	mutex_init(&lock->l_mutex, NULL, MUTEX_DEFAULT, NULL);
1011 	cv_init(&lock->l_cv, NULL, CV_DEFAULT, NULL);
1012 
1013 	return (lock);
1014 }
1015 
1016 static void
1017 smb_lock_free(smb_lock_t *lock)
1018 {
1019 
1020 	lock->l_magic = 0;
1021 	cv_destroy(&lock->l_cv);
1022 	mutex_destroy(&lock->l_mutex);
1023 
1024 	kmem_cache_free(smb_cache_lock, lock);
1025 }
1026 
1027 /*
1028  * smb_lock_destroy
1029  *
1030  * Caller must hold node->n_lock_list
1031  */
1032 static void
1033 smb_lock_destroy(smb_lock_t *lock)
1034 {
1035 	smb_lock_t *tl;
1036 	smb_node_t *node;
1037 	uint32_t ccnt;
1038 
1039 	/*
1040 	 * Wake any waiting locks that were blocked by this.
1041 	 * We want them to wake and continue in FIFO order,
1042 	 * so enter/exit the llist every time...
1043 	 */
1044 	mutex_enter(&lock->l_mutex);
1045 	ccnt = lock->l_conflicts;
1046 	lock->l_conflicts = 0;
1047 	mutex_exit(&lock->l_mutex);
1048 
1049 	node = lock->l_file->f_node;
1050 	while (ccnt) {
1051 
1052 		smb_llist_enter(&node->n_wlock_list, RW_READER);
1053 
1054 		for (tl = smb_llist_head(&node->n_wlock_list);
1055 		    tl != NULL;
1056 		    tl = smb_llist_next(&node->n_wlock_list, tl)) {
1057 			mutex_enter(&tl->l_mutex);
1058 			if (tl->l_blocked_by == lock) {
1059 				tl->l_blocked_by = NULL;
1060 				cv_broadcast(&tl->l_cv);
1061 				mutex_exit(&tl->l_mutex);
1062 				goto woke_one;
1063 			}
1064 			mutex_exit(&tl->l_mutex);
1065 		}
1066 		/* No more in the list blocked by this lock. */
1067 		ccnt = 0;
1068 	woke_one:
1069 		smb_llist_exit(&node->n_wlock_list);
1070 		if (ccnt) {
1071 			/*
1072 			 * Let the thread we woke have a chance to run
1073 			 * before we wake competitors for their lock.
1074 			 */
1075 			delay(MSEC_TO_TICK(1));
1076 		}
1077 	}
1078 
1079 	smb_lock_free(lock);
1080 }
1081 
1082 /*
1083  * smb_is_range_unlocked
1084  *
1085  * Checks if the current unlock byte range request overlaps another lock
1086  * This function is used to determine where POSIX unlocks should be
1087  * applied.
1088  *
1089  * The return code and the value of new_mark must be interpreted as
1090  * follows:
1091  *
1092  * B_TRUE and (new_mark == 0):
1093  *   This is the last or only lock left to be unlocked
1094  *
1095  * B_TRUE and (new_mark > 0):
1096  *   The range from start to new_mark can be unlocked
1097  *
1098  * B_FALSE and (new_mark == 0):
1099  *   The unlock can't be performed and we are done
1100  *
1101  * B_FALSE and (new_mark > 0),
1102  *   The range from start to new_mark can't be unlocked
1103  *   Start should be reset to new_mark for the next pass
1104  */
1105 
1106 static boolean_t
1107 smb_is_range_unlocked(uint64_t start, uint64_t end, uint32_t uniqid,
1108     smb_llist_t *llist_head, uint64_t *new_mark)
1109 {
1110 	struct smb_lock *lk = NULL;
1111 	uint64_t low_water_mark = MAXOFFSET_T;
1112 	uint64_t lk_start;
1113 	uint64_t lk_end;
1114 
1115 	*new_mark = 0;
1116 	lk = smb_llist_head(llist_head);
1117 	while (lk) {
1118 		if (lk->l_length == 0) {
1119 			lk = smb_llist_next(llist_head, lk);
1120 			continue;
1121 		}
1122 
1123 		if (lk->l_file->f_uniqid != uniqid) {
1124 			lk = smb_llist_next(llist_head, lk);
1125 			continue;
1126 		}
1127 
1128 		lk_end = lk->l_start + lk->l_length - 1;
1129 		lk_start = lk->l_start;
1130 
1131 		/*
1132 		 * there is no overlap for the first 2 cases
1133 		 * check next node
1134 		 */
1135 		if (lk_end < start) {
1136 			lk = smb_llist_next(llist_head, lk);
1137 			continue;
1138 		}
1139 		if (lk_start > end) {
1140 			lk = smb_llist_next(llist_head, lk);
1141 			continue;
1142 		}
1143 
1144 		/* this range is completely locked */
1145 		if ((lk_start <= start) && (lk_end >= end)) {
1146 			return (B_FALSE);
1147 		}
1148 
1149 		/* the first part of this range is locked */
1150 		if ((start >= lk_start) && (start <= lk_end)) {
1151 			if (end > lk_end)
1152 				*new_mark = lk_end + 1;
1153 			return (B_FALSE);
1154 		}
1155 
1156 		/* this piece is unlocked */
1157 		if ((lk_start >= start) && (lk_start <= end)) {
1158 			if (low_water_mark > lk_start)
1159 				low_water_mark  = lk_start;
1160 		}
1161 
1162 		lk = smb_llist_next(llist_head, lk);
1163 	}
1164 
1165 	if (low_water_mark != MAXOFFSET_T) {
1166 		*new_mark = low_water_mark;
1167 		return (B_TRUE);
1168 	}
1169 	/* the range is completely unlocked */
1170 	return (B_TRUE);
1171 }
1172 
1173 #ifdef	DEBUG
1174 static void
1175 smb_lock_dump1(smb_lock_t *lock)
1176 {
1177 	cmn_err(CE_CONT, "\t0x%p: 0x%llx, 0x%llx, %p, %d\n",
1178 	    (void *)lock,
1179 	    (long long)lock->l_start,
1180 	    (long long)lock->l_length,
1181 	    (void *)lock->l_file,
1182 	    lock->l_pid);
1183 
1184 }
1185 
1186 static void
1187 smb_lock_dumplist(smb_llist_t *llist)
1188 {
1189 	smb_lock_t *lock;
1190 
1191 	for (lock = smb_llist_head(llist);
1192 	    lock != NULL;
1193 	    lock = smb_llist_next(llist, lock)) {
1194 		smb_lock_dump1(lock);
1195 	}
1196 }
1197 
1198 static void
1199 smb_lock_dumpnode(smb_node_t *node)
1200 {
1201 	cmn_err(CE_CONT, "Granted Locks on %p (%d)\n",
1202 	    (void *)node, node->n_lock_list.ll_count);
1203 	smb_lock_dumplist(&node->n_lock_list);
1204 
1205 	cmn_err(CE_CONT, "Waiting Locks on %p (%d)\n",
1206 	    (void *)node, node->n_wlock_list.ll_count);
1207 	smb_lock_dumplist(&node->n_wlock_list);
1208 }
1209 
1210 #endif
1211