xref: /illumos-gate/usr/src/uts/common/fs/smbsrv/smb_srv_oplock.c (revision dd72704bd9e794056c558153663c739e2012d721)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2021 Tintri by DDN, Inc. All rights reserved.
14  * Copyright 2022 RackTop Systems, Inc.
15  */
16 
17 /*
18  * (SMB1/SMB2) Server-level Oplock support.
19  *
20  * Conceptually, this is a separate layer on top of the
21  * file system (FS) layer oplock code in smb_cmn_oplock.c.
22  * If these layers were more distinct, the FS layer would
23  * need to use call-back functions (installed from here)
24  * to "indicate an oplock break to the server" (see below).
25  * As these layers are all in the same kernel module, the
26  * delivery of these break indications just uses a direct
27  * function call to smb_oplock_ind_break() below.
28  *
29  * This layer is responsible for handling the break indication,
30  * which often requires scheduling a taskq job in the server,
31  * and sending an oplock break mesage to the client using
32  * the appropriate protocol for the open handle affected.
33  *
34  * The details of composing an oplock break message, the
35  * protocol-specific details of requesting an oplock, and
36  * returning that oplock to the client are in the files:
37  *  smb_oplock.c, smb2_oplock.c, smb2_lease.c
38  */
39 
40 #include <smbsrv/smb2_kproto.h>
41 #include <smbsrv/smb_oplock.h>
42 
43 /*
44  * Verify relationship between BREAK_TO_... and CACHE bits,
45  * used when setting the BREAK_TO_... below.
46  */
47 #if BREAK_TO_READ_CACHING != (READ_CACHING << BREAK_SHIFT)
48 #error "BREAK_TO_READ_CACHING"
49 #endif
50 #if BREAK_TO_HANDLE_CACHING != (HANDLE_CACHING << BREAK_SHIFT)
51 #error "BREAK_TO_HANDLE_CACHING"
52 #endif
53 #if BREAK_TO_WRITE_CACHING != (WRITE_CACHING << BREAK_SHIFT)
54 #error "BREAK_TO_WRITE_CACHING"
55 #endif
56 #define	CACHE_RWH (READ_CACHING | WRITE_CACHING | HANDLE_CACHING)
57 
58 /*
59  * This is the timeout used in the thread that sends an
60  * oplock break and waits for the client to respond
61  * before it breaks the oplock locally.
62  */
63 int smb_oplock_timeout_ack = 30000; /* mSec. */
64 
65 /*
66  * This is the timeout used in threads that have just
67  * finished some sort of oplock request and now must
68  * wait for (possibly multiple) breaks to complete.
69  * This value must be at least a couple seconds LONGER
70  * than the ack timeout above so that I/O callers won't
71  * give up waiting before the local ack timeout.
72  */
73 int smb_oplock_timeout_def = 45000; /* mSec. */
74 
75 static void smb_oplock_async_break(void *);
76 static void smb_oplock_hdl_update(smb_request_t *sr);
77 static void smb_oplock_hdl_moved(smb_ofile_t *);
78 static void smb_oplock_hdl_closed(smb_ofile_t *);
79 static void smb_oplock_wait_break_cancel(smb_request_t *sr);
80 
81 
82 /*
83  * 2.1.5.17.3 Indicating an Oplock Break to the Server
84  *
85  * The inputs for indicating an oplock break to the server are:
86  *
87  *	BreakingOplockOpen: The Open used to request the oplock
88  *	  that is now breaking.
89  *	 NewOplockLevel: The type of oplock the requested oplock
90  *	  has been broken to.  Valid values are as follows:
91  *		LEVEL_NONE (that is, no oplock)
92  *		LEVEL_TWO
93  *		A combination of one or more of the following flags:
94  *			READ_CACHING
95  *			HANDLE_CACHING
96  *			WRITE_CACHING
97  *	AcknowledgeRequired: A Boolean value; TRUE if the server
98  *	  MUST acknowledge the oplock break, FALSE if not,
99  *	  as specified in section 2.1.5.18.
100  *	OplockCompletionStatus: The NTSTATUS code to return to the server.
101  *
102  * This algorithm simply represents the completion of an oplock request,
103  * as specified in section 2.1.5.17.1 or section 2.1.5.17.2. The server
104  * is expected to associate the return status from this algorithm with
105  * BreakingOplockOpen, which is the Open passed in when it requested
106  * the oplock that is now breaking.
107  *
108  * It is important to note that because several oplocks can be outstanding
109  * in parallel, although this algorithm represents the completion of an
110  * oplock request, it might not result in the completion of the algorithm
111  * that called it. In particular, calling this algorithm will result in
112  * completion of the caller only if BreakingOplockOpen is the same as the
113  * Open with which the calling algorithm was itself called. To mitigate
114  * confusion, each algorithm that refers to this section will specify
115  * whether that algorithm's operation terminates at that point or not.
116  *
117  * The object store MUST return OplockCompletionStatus,
118  * AcknowledgeRequired, and NewOplockLevel to the server (the algorithm is
119  * as specified in section 2.1.5.17.1 and section 2.1.5.17.2).
120  *
121  * Implementation:
122  *
123  * We use two versions of this function:
124  *	smb_oplock_ind_break_in_ack
125  *	smb_oplock_ind_break
126  *
127  * The first is used when we're handling an Oplock Break Ack.
128  * The second is used when other operations cause a break,
129  * generally in one of the smb_oplock_break_... functions.
130  *
131  * Note that these are call-back functions that may be called with the
132  * node ofile list rwlock held and the node oplock mutex entered, so
133  * these should ONLY schedule oplock break work, and MUST NOT attempt
134  * any actions that might require either of those locks.
135  */
136 
137 /*
138  * smb_oplock_ind_break_in_ack
139  *
140  * Variant of smb_oplock_ind_break() for the oplock Ack handler.
141  * When we need to indicate another oplock break from within the
142  * Ack handler (during the Ack. of some previous oplock break)
143  * we need to make sure this new break indication goes out only
144  * AFTER the reply to the current break ack. is sent out.
145  *
146  * In this case, we always have an SR (the break ack) so we can
147  * append the "ind break" work to the current SR and let the
148  * request hander thread do this work after the reply is sent.
149  * Note: this is always an SMB2 or later request, because this
150  * only happens for "granular" oplocks, which are SMB2-only.
151  *
152  * This is mostly the same as smb_oplock_ind_break() except:
153  * - The only CompletionStatus possible is STATUS_CANT_GRANT.
154  * - Instead of taskq_dispatch this appends the new SR to
155  *   the "post work" queue on the current SR (if possible).
156  *
157  * Note called with the node ofile list rwlock held and
158  * the oplock mutex entered.
159  */
160 void
161 smb_oplock_ind_break_in_ack(smb_request_t *ack_sr, smb_ofile_t *ofile,
162     uint32_t NewLevel, boolean_t AckRequired)
163 {
164 	smb_server_t *sv = ofile->f_server;
165 	smb_node_t *node = ofile->f_node;
166 	smb_request_t *sr = NULL;
167 	boolean_t use_postwork = B_TRUE;
168 
169 	ASSERT(RW_READ_HELD(&node->n_ofile_list.ll_lock));
170 	ASSERT(MUTEX_HELD(&node->n_oplock.ol_mutex));
171 
172 	/*
173 	 * This should happen only with SMB2 or later,
174 	 * but in case that ever changes...
175 	 */
176 	if (ack_sr->session->dialect < SMB_VERS_2_BASE) {
177 		smb_oplock_ind_break(ofile, NewLevel,
178 		    AckRequired, STATUS_CANT_GRANT);
179 		return;
180 	}
181 
182 	/*
183 	 * We're going to schedule a request that will have a
184 	 * reference to this ofile. Get the hold first.
185 	 */
186 	if (!smb_ofile_hold_olbrk(ofile)) {
187 		/* It's closing (or whatever).  Nothing to do. */
188 		return;
189 	}
190 
191 	/*
192 	 * When called from Ack processing, we want to use a
193 	 * request on the session doing the ack, so we can
194 	 * append "post work" to that session.  If we can't
195 	 * allocate a request on that session (because it's
196 	 * now disconnecting) use a request from the server
197 	 * session like smb_oplock_ind_break does, and then
198 	 * use taskq_dispatch instead of postwork.
199 	 */
200 	sr = smb_request_alloc(ack_sr->session, 0);
201 	if (sr == NULL) {
202 		use_postwork = B_FALSE;
203 		sr = smb_request_alloc(sv->sv_session, 0);
204 	}
205 	if (sr == NULL) {
206 		/*
207 		 * Server must be shutting down.  We took a
208 		 * hold on the ofile that must be released,
209 		 * but we can't release here because we're
210 		 * called with the node ofile list entered.
211 		 * See smb_ofile_release_LL.
212 		 */
213 		smb_llist_post(&node->n_ofile_list, ofile,
214 		    smb_ofile_release_LL);
215 		return;
216 	}
217 
218 	sr->sr_state = SMB_REQ_STATE_SUBMITTED;
219 	sr->smb2_async = B_TRUE;
220 	sr->user_cr = zone_kcred();
221 	sr->fid_ofile = ofile;
222 	if (ofile->f_tree != NULL) {
223 		sr->tid_tree = ofile->f_tree;
224 		smb_tree_hold_internal(sr->tid_tree);
225 	}
226 	if (ofile->f_user != NULL) {
227 		sr->uid_user = ofile->f_user;
228 		smb_user_hold_internal(sr->uid_user);
229 	}
230 	if (ofile->f_lease != NULL)
231 		NewLevel |= OPLOCK_LEVEL_GRANULAR;
232 
233 	sr->arg.olbrk.NewLevel = NewLevel;
234 	sr->arg.olbrk.AckRequired = AckRequired;
235 
236 	/*
237 	 * Could do this in _hdl_update but this way it's
238 	 * visible in the dtrace fbt entry probe.
239 	 */
240 	sr->arg.olbrk.OldLevel = ofile->f_oplock.og_breakto;
241 
242 	smb_oplock_hdl_update(sr);
243 
244 	if (use_postwork) {
245 		/*
246 		 * Using smb2_cmd_code to indicate what to call.
247 		 * work func. will call smb_oplock_send_brk
248 		 */
249 		sr->smb2_cmd_code = SMB2_OPLOCK_BREAK;
250 		smb2sr_append_postwork(ack_sr, sr);
251 	} else {
252 		/* Will call smb_oplock_send_break */
253 		sr->smb2_status = STATUS_CANT_GRANT;
254 		(void) taskq_dispatch(sv->sv_worker_pool,
255 		    smb_oplock_async_break, sr, TQ_SLEEP);
256 	}
257 }
258 
259 /*
260  * smb_oplock_ind_break
261  *
262  * This is the function described in [MS-FSA] 2.1.5.17.3
263  * which is called many places in the oplock break code.
264  *
265  * Schedule a request & taskq job to do oplock break work
266  * as requested by the FS-level code (smb_cmn_oplock.c).
267  *
268  * See also: smb_oplock_ind_break_in_ack
269  *
270  * Note called with the node ofile list rwlock held and
271  * the oplock mutex entered.
272  */
273 void
274 smb_oplock_ind_break(smb_ofile_t *ofile, uint32_t NewLevel,
275     boolean_t AckRequired, uint32_t CompletionStatus)
276 {
277 	smb_server_t *sv = ofile->f_server;
278 	smb_node_t *node = ofile->f_node;
279 	smb_request_t *sr = NULL;
280 
281 	ASSERT(RW_READ_HELD(&node->n_ofile_list.ll_lock));
282 	ASSERT(MUTEX_HELD(&node->n_oplock.ol_mutex));
283 
284 	/*
285 	 * See notes at smb_oplock_async_break re. CompletionStatus
286 	 * Check for any invalid codes here, so assert happens in
287 	 * the thread passing an unexpected value.
288 	 * The real work happens in a taskq job.
289 	 */
290 	switch (CompletionStatus) {
291 
292 	case NT_STATUS_SUCCESS:
293 	case STATUS_CANT_GRANT:
294 		/* Send break via taskq job. */
295 		break;
296 
297 	case STATUS_NEW_HANDLE:
298 		smb_oplock_hdl_moved(ofile);
299 		return;
300 
301 	case NT_STATUS_OPLOCK_HANDLE_CLOSED:
302 		smb_oplock_hdl_closed(ofile);
303 		return;
304 
305 	default:
306 		ASSERT(0);
307 		return;
308 	}
309 
310 	/*
311 	 * We're going to schedule a request that will have a
312 	 * reference to this ofile. Get the hold first.
313 	 */
314 	if (!smb_ofile_hold_olbrk(ofile)) {
315 		/* It's closing (or whatever).  Nothing to do. */
316 		return;
317 	}
318 
319 	/*
320 	 * We need a request allocated on the session that owns
321 	 * this ofile in order to safely send on that session.
322 	 *
323 	 * Note that while we hold a ref. on the ofile, it's
324 	 * f_session will not change.  An ofile in state
325 	 * _ORPHANED will have f_session == NULL, but the
326 	 * f_session won't _change_ while we have a ref,
327 	 * and won't be torn down under our feet.
328 	 * Same for f_tree and f_user
329 	 *
330 	 * If f_session is NULL, or it's in a state that doesn't
331 	 * allow new requests, use the special "server" session.
332 	 */
333 	if (ofile->f_session != NULL)
334 		sr = smb_request_alloc(ofile->f_session, 0);
335 	if (sr == NULL)
336 		sr = smb_request_alloc(sv->sv_session, 0);
337 	if (sr == NULL) {
338 		/*
339 		 * Server must be shutting down.  We took a
340 		 * hold on the ofile that must be released,
341 		 * but we can't release here because we're
342 		 * called with the node ofile list entered.
343 		 * See smb_ofile_release_LL.
344 		 */
345 		smb_llist_post(&node->n_ofile_list, ofile,
346 		    smb_ofile_release_LL);
347 		return;
348 	}
349 
350 	sr->sr_state = SMB_REQ_STATE_SUBMITTED;
351 	sr->smb2_async = B_TRUE;
352 	sr->user_cr = zone_kcred();
353 	sr->fid_ofile = ofile;
354 	if (ofile->f_tree != NULL) {
355 		sr->tid_tree = ofile->f_tree;
356 		smb_tree_hold_internal(sr->tid_tree);
357 	}
358 	if (ofile->f_user != NULL) {
359 		sr->uid_user = ofile->f_user;
360 		smb_user_hold_internal(sr->uid_user);
361 	}
362 	if (ofile->f_lease != NULL)
363 		NewLevel |= OPLOCK_LEVEL_GRANULAR;
364 
365 	sr->arg.olbrk.NewLevel = NewLevel;
366 	sr->arg.olbrk.AckRequired = AckRequired;
367 	sr->smb2_status = CompletionStatus;
368 
369 	/*
370 	 * Could do this in _hdl_update but this way it's
371 	 * visible in the dtrace fbt entry probe.
372 	 */
373 	sr->arg.olbrk.OldLevel = ofile->f_oplock.og_breakto;
374 
375 	smb_oplock_hdl_update(sr);
376 
377 	/* Will call smb_oplock_send_break */
378 	(void) taskq_dispatch(sv->sv_worker_pool,
379 	    smb_oplock_async_break, sr, TQ_SLEEP);
380 }
381 
382 /*
383  * smb_oplock_async_break
384  *
385  * Called via the taskq to handle an asynchronous oplock break.
386  * We have a hold on the ofile, which will be released in
387  * smb_request_free (via sr->fid_ofile)
388  *
389  * Note we may have: sr->uid_user == NULL, sr->tid_tree == NULL.
390  */
391 static void
392 smb_oplock_async_break(void *arg)
393 {
394 	smb_request_t	*sr = arg;
395 	uint32_t	CompletionStatus;
396 
397 	SMB_REQ_VALID(sr);
398 
399 	CompletionStatus = sr->smb2_status;
400 	sr->smb2_status = NT_STATUS_SUCCESS;
401 
402 	mutex_enter(&sr->sr_mutex);
403 	sr->sr_worker = curthread;
404 	sr->sr_state = SMB_REQ_STATE_ACTIVE;
405 	mutex_exit(&sr->sr_mutex);
406 
407 	/*
408 	 * Note that the CompletionStatus from the FS level
409 	 * (smb_cmn_oplock.c) encodes what kind of action we
410 	 * need to take at the SMB level.
411 	 */
412 	switch (CompletionStatus) {
413 
414 	case STATUS_CANT_GRANT:
415 	case NT_STATUS_SUCCESS:
416 		smb_oplock_send_break(sr);
417 		break;
418 
419 	default:
420 		/* Checked by caller. */
421 		ASSERT(0);
422 		break;
423 	}
424 
425 	if (sr->dh_nvl_dirty) {
426 		sr->dh_nvl_dirty = B_FALSE;
427 		smb2_dh_update_nvfile(sr);
428 	}
429 
430 	sr->sr_state = SMB_REQ_STATE_COMPLETED;
431 	smb_request_free(sr);
432 }
433 
434 /*
435  * Send an oplock (or lease) break to the client.
436  * If we can't, then do a local break.
437  *
438  * This is called either from smb_oplock_async_break via a
439  * taskq job scheduled in smb_oplock_ind_break, or from the
440  * smb2sr_append_postwork() mechanism when we're doing a
441  * "break in ack", via smb_oplock_ind_break_in_ack.
442  *
443  * We don't always have an sr->session here, so
444  * determine the oplock type (lease etc) from
445  * f_lease and f_oplock.og_dialect etc.
446  */
447 void
448 smb_oplock_send_break(smb_request_t *sr)
449 {
450 	smb_ofile_t	*ofile = sr->fid_ofile;
451 
452 	if (ofile->f_lease != NULL)
453 		smb2_lease_send_break(sr);
454 	else if (ofile->f_oplock.og_dialect >= SMB_VERS_2_BASE)
455 		smb2_oplock_send_break(sr);
456 	else
457 		smb1_oplock_send_break(sr);
458 }
459 
460 /*
461  * Called by smb_oplock_ind_break for the case STATUS_NEW_HANDLE,
462  * which is an alias for NT_STATUS_OPLOCK_SWITCHED_TO_NEW_HANDLE.
463  *
464  * The FS-level oplock layer calls this to update the SMB-level state
465  * when the oplock for some lease is about to move to a different
466  * ofile on the lease.
467  *
468  * To avoid later confusion, clear og_state on this ofile now.
469  * Without this, smb_oplock_move() may issue debug complaints
470  * about moving oplock state onto a non-empty oplock.
471  */
472 static const smb_ofile_t invalid_ofile;
473 static void
474 smb_oplock_hdl_moved(smb_ofile_t *ofile)
475 {
476 	smb_lease_t *ls = ofile->f_lease;
477 
478 	ASSERT(ls != NULL);
479 	if (ls != NULL && ls->ls_oplock_ofile == ofile)
480 		ls->ls_oplock_ofile = (smb_ofile_t *)&invalid_ofile;
481 
482 	ofile->f_oplock.og_state = 0;
483 	ofile->f_oplock.og_breakto = 0;
484 	ofile->f_oplock.og_breaking = B_FALSE;
485 }
486 
487 /*
488  * See: NT_STATUS_OPLOCK_HANDLE_CLOSED above and
489  * smb_ofile_close, smb_oplock_break_CLOSE.
490  *
491  * The FS-level oplock layer calls this to update the
492  * SMB-level state when a handle loses its oplock.
493  */
494 static void
495 smb_oplock_hdl_closed(smb_ofile_t *ofile)
496 {
497 	smb_lease_t *lease = ofile->f_lease;
498 
499 	if (lease != NULL) {
500 		if (lease->ls_oplock_ofile == ofile) {
501 			/*
502 			 * smb2_lease_ofile_close should have
503 			 * moved the oplock to another ofile.
504 			 */
505 			ASSERT(0);
506 			lease->ls_oplock_ofile = NULL;
507 		}
508 	}
509 	ofile->f_oplock.og_state = 0;
510 	ofile->f_oplock.og_breakto = 0;
511 	ofile->f_oplock.og_breaking = B_FALSE;
512 }
513 
514 /*
515  * smb_oplock_hdl_update
516  *
517  * Called by smb_oplock_ind_break (and ...in_ack) just before we
518  * schedule smb_oplock_async_break / mb_oplock_send_break taskq job,
519  * so we can make any state changes that should happen immediately.
520  *
521  * Here, keep track of what we will send to the client.
522  * Saves old state in arg.olbck.OldLevel
523  *
524  * Note that because we may be in the midst of processing an
525  * smb_oplock_ack_break call here, the _breaking flag will be
526  * temporarily false, and is set true again if this ack causes
527  * another break.  This makes it tricky to know when to update
528  * the epoch, which is not supposed to increment when there's
529  * already an unacknowledged break out to the client.
530  * We can recognize that by comparing ls_state vs ls_breakto.
531  * If no unacknowledged break, ls_state == ls_breakto.
532  */
533 static void
534 smb_oplock_hdl_update(smb_request_t *sr)
535 {
536 	smb_ofile_t	*ofile = sr->fid_ofile;
537 	smb_lease_t	*lease = ofile->f_lease;
538 	uint32_t	NewLevel = sr->arg.olbrk.NewLevel;
539 	boolean_t	AckReq = sr->arg.olbrk.AckRequired;
540 
541 #ifdef	DEBUG
542 	smb_node_t *node = ofile->f_node;
543 	ASSERT(RW_READ_HELD(&node->n_ofile_list.ll_lock));
544 	ASSERT(MUTEX_HELD(&node->n_oplock.ol_mutex));
545 #endif
546 
547 	/* Caller sets arg.olbrk.OldLevel */
548 	ofile->f_oplock.og_breakto = NewLevel;
549 	ofile->f_oplock.og_breaking = B_TRUE;
550 	if (lease != NULL) {
551 		// If no unacknowledged break, update epoch.
552 		if (lease->ls_breakto == lease->ls_state)
553 			lease->ls_epoch++;
554 
555 		lease->ls_breakto = NewLevel;
556 		lease->ls_breaking = B_TRUE;
557 	}
558 
559 	if (!AckReq) {
560 		/*
561 		 * Not expecting an Ack from the client.
562 		 * Update state immediately.
563 		 */
564 		ofile->f_oplock.og_state = NewLevel;
565 		ofile->f_oplock.og_breaking = B_FALSE;
566 		if (lease != NULL) {
567 			lease->ls_state = NewLevel;
568 			lease->ls_breaking = B_FALSE;
569 		}
570 		if (ofile->dh_persist) {
571 			smb2_dh_update_oplock(sr, ofile);
572 		}
573 	}
574 }
575 
576 /*
577  * Helper for smb_ofile_close
578  *
579  * Note that a client may close an ofile in response to an
580  * oplock break or lease break intead of doing an Ack break,
581  * so this must wake anything that might be waiting on an ack.
582  */
583 void
584 smb_oplock_close(smb_ofile_t *ofile)
585 {
586 	smb_node_t *node = ofile->f_node;
587 
588 	smb_llist_enter(&node->n_ofile_list, RW_READER);
589 	mutex_enter(&node->n_oplock.ol_mutex);
590 
591 	if (ofile->f_oplock_closing == B_FALSE) {
592 		ofile->f_oplock_closing = B_TRUE;
593 
594 		if (ofile->f_lease != NULL)
595 			smb2_lease_ofile_close(ofile);
596 
597 		smb_oplock_break_CLOSE(node, ofile);
598 
599 		ofile->f_oplock.og_state = 0;
600 		ofile->f_oplock.og_breakto = 0;
601 		ofile->f_oplock.og_breaking = B_FALSE;
602 		cv_broadcast(&ofile->f_oplock.og_ack_cv);
603 	}
604 
605 	mutex_exit(&node->n_oplock.ol_mutex);
606 	smb_llist_exit(&node->n_ofile_list);
607 }
608 
609 /*
610  * Called by smb_request_cancel() via sr->cancel_method
611  * Arg is the smb_node_t with the breaking oplock.
612  */
613 static void
614 smb_oplock_wait_ack_cancel(smb_request_t *sr)
615 {
616 	kcondvar_t	*cvp = sr->cancel_arg2;
617 	smb_ofile_t	*ofile = sr->fid_ofile;
618 	smb_node_t	*node = ofile->f_node;
619 
620 	mutex_enter(&node->n_oplock.ol_mutex);
621 	cv_broadcast(cvp);
622 	mutex_exit(&node->n_oplock.ol_mutex);
623 }
624 
625 /*
626  * Wait for an oplock break ACK to arrive.  This is called after
627  * we've sent an oplock break or lease break to the client where
628  * an "Ack break" is expected back.  If we get an Ack, that will
629  * wake us up via smb2_oplock_break_ack or smb2_lease_break_ack.
630  *
631  * Wait until state reduced to NewLevel (or less).
632  * Note that in multi-break cases, we might wait here for just
633  * one ack when another has become pending, in which case the
634  * og_breakto might be a subset of NewLevel.  Wait until the
635  * state field is no longer a superset of NewLevel.
636  */
637 uint32_t
638 smb_oplock_wait_ack(smb_request_t *sr, uint32_t NewLevel)
639 {
640 	smb_ofile_t	*ofile = sr->fid_ofile;
641 	smb_lease_t	*lease = ofile->f_lease;
642 	smb_node_t	*node = ofile->f_node;
643 	smb_oplock_t	*ol = &node->n_oplock;
644 	uint32_t	*state_p;
645 	kcondvar_t	*cv_p;
646 	clock_t		time, rv;
647 	uint32_t	status = 0;
648 	smb_req_state_t  srstate;
649 	uint32_t	wait_mask;
650 
651 	time = ddi_get_lbolt() +
652 	    MSEC_TO_TICK(smb_oplock_timeout_ack);
653 
654 	/*
655 	 * Wait on either lease state or oplock state
656 	 */
657 	if (lease != NULL) {
658 		state_p = &lease->ls_state;
659 		cv_p = &lease->ls_ack_cv;
660 	} else {
661 		state_p = &ofile->f_oplock.og_state;
662 		cv_p = &ofile->f_oplock.og_ack_cv;
663 	}
664 
665 	/*
666 	 * These are all the bits that we wait to be cleared.
667 	 */
668 	wait_mask = ~NewLevel & (CACHE_RWH |
669 	    LEVEL_TWO | LEVEL_ONE | LEVEL_BATCH);
670 
671 	/*
672 	 * Setup cancellation callback
673 	 */
674 	mutex_enter(&sr->sr_mutex);
675 	if (sr->sr_state != SMB_REQ_STATE_ACTIVE) {
676 		mutex_exit(&sr->sr_mutex);
677 		return (NT_STATUS_CANCELLED);
678 	}
679 	sr->sr_state = SMB_REQ_STATE_WAITING_OLBRK;
680 	sr->cancel_method = smb_oplock_wait_ack_cancel;
681 	sr->cancel_arg2 = cv_p;
682 	mutex_exit(&sr->sr_mutex);
683 
684 	/*
685 	 * Enter the wait loop
686 	 */
687 	mutex_enter(&ol->ol_mutex);
688 
689 	while ((*state_p & wait_mask) != 0) {
690 		rv = cv_timedwait(cv_p, &ol->ol_mutex, time);
691 		if (rv < 0) {
692 			/* cv_timewait timeout */
693 			status = NT_STATUS_CANNOT_BREAK_OPLOCK;
694 			break;
695 		}
696 
697 		/*
698 		 * Check if we were woken by smb_request_cancel,
699 		 * which sets state SMB_REQ_STATE_CANCEL_PENDING
700 		 * and signals the CV.  The mutex enter/exit is
701 		 * just to ensure cache visibility of sr_state
702 		 * that was updated in smb_request_cancel.
703 		 */
704 		mutex_enter(&sr->sr_mutex);
705 		srstate = sr->sr_state;
706 		mutex_exit(&sr->sr_mutex);
707 		if (srstate != SMB_REQ_STATE_WAITING_OLBRK) {
708 			break;
709 		}
710 	}
711 	mutex_exit(&ol->ol_mutex);
712 
713 	/*
714 	 * Clear cancellation callback and see if it fired.
715 	 */
716 	mutex_enter(&sr->sr_mutex);
717 	sr->cancel_method = NULL;
718 	sr->cancel_arg2 = NULL;
719 	switch (sr->sr_state) {
720 	case SMB_REQ_STATE_WAITING_OLBRK:
721 		sr->sr_state = SMB_REQ_STATE_ACTIVE;
722 		/* status from above */
723 		break;
724 	case SMB_REQ_STATE_CANCEL_PENDING:
725 		sr->sr_state = SMB_REQ_STATE_CANCELLED;
726 		status = NT_STATUS_CANCELLED;
727 		break;
728 	default:
729 		status = NT_STATUS_INTERNAL_ERROR;
730 		break;
731 	}
732 	mutex_exit(&sr->sr_mutex);
733 
734 	return (status);
735 }
736 
737 /*
738  * Called by smb_request_cancel() via sr->cancel_method
739  * Arg is the smb_node_t with the breaking oplock.
740  */
741 static void
742 smb_oplock_wait_break_cancel(smb_request_t *sr)
743 {
744 	smb_node_t   *node = sr->cancel_arg2;
745 	smb_oplock_t *ol;
746 
747 	SMB_NODE_VALID(node);
748 	ol = &node->n_oplock;
749 
750 	mutex_enter(&ol->ol_mutex);
751 	cv_broadcast(&ol->WaitingOpenCV);
752 	mutex_exit(&ol->ol_mutex);
753 }
754 
755 /*
756  * Wait up to "timeout" mSec. for the current oplock "breaking" flags
757  * to be cleared (by smb_oplock_ack_break or smb_oplock_break_CLOSE).
758  *
759  * Callers of the above public oplock functions:
760  *	smb_oplock_request()
761  *	smb_oplock_ack_break()
762  *	smb_oplock_break_OPEN() ...
763  * check for return status == NT_STATUS_OPLOCK_BREAK_IN_PROGRESS
764  * and call this function to wait for the break to complete.
765  *
766  * Most callers should use this default timeout, which they get
767  * by passing zero as the timeout arg.  This include places where
768  * we're about to do something that invalidates some cache.
769  */
770 uint32_t
771 smb_oplock_wait_break(smb_request_t *sr, smb_node_t *node, int timeout)
772 {
773 	smb_oplock_t	*ol;
774 	clock_t		time, rv;
775 	uint32_t	status = 0;
776 	smb_req_state_t  srstate;
777 
778 	SMB_NODE_VALID(node);
779 	ol = &node->n_oplock;
780 
781 	if (timeout == 0)
782 		timeout = smb_oplock_timeout_def;
783 	time = MSEC_TO_TICK(timeout) + ddi_get_lbolt();
784 
785 	mutex_enter(&sr->sr_mutex);
786 	if (sr->sr_state != SMB_REQ_STATE_ACTIVE) {
787 		mutex_exit(&sr->sr_mutex);
788 		return (NT_STATUS_CANCELLED);
789 	}
790 	sr->sr_state = SMB_REQ_STATE_WAITING_OLBRK;
791 	sr->cancel_method = smb_oplock_wait_break_cancel;
792 	sr->cancel_arg2 = node;
793 	mutex_exit(&sr->sr_mutex);
794 
795 	mutex_enter(&ol->ol_mutex);
796 	while ((ol->ol_state & BREAK_ANY) != 0) {
797 		ol->waiters++;
798 		rv = cv_timedwait(&ol->WaitingOpenCV,
799 		    &ol->ol_mutex, time);
800 		ol->waiters--;
801 		if (rv < 0) {
802 			/* cv_timewait timeout */
803 			status = NT_STATUS_CANNOT_BREAK_OPLOCK;
804 			break;
805 		}
806 
807 		/*
808 		 * Check if we were woken by smb_request_cancel,
809 		 * which sets state SMB_REQ_STATE_CANCEL_PENDING
810 		 * and signals the CV.  The mutex enter/exit is
811 		 * just to ensure cache visibility of sr_state
812 		 * that was updated in smb_request_cancel.
813 		 */
814 		mutex_enter(&sr->sr_mutex);
815 		srstate = sr->sr_state;
816 		mutex_exit(&sr->sr_mutex);
817 		if (srstate != SMB_REQ_STATE_WAITING_OLBRK) {
818 			break;
819 		}
820 	}
821 
822 	mutex_exit(&ol->ol_mutex);
823 
824 	mutex_enter(&sr->sr_mutex);
825 	sr->cancel_method = NULL;
826 	sr->cancel_arg2 = NULL;
827 	switch (sr->sr_state) {
828 	case SMB_REQ_STATE_WAITING_OLBRK:
829 		sr->sr_state = SMB_REQ_STATE_ACTIVE;
830 		/* status from above */
831 		break;
832 	case SMB_REQ_STATE_CANCEL_PENDING:
833 		sr->sr_state = SMB_REQ_STATE_CANCELLED;
834 		status = NT_STATUS_CANCELLED;
835 		break;
836 	default:
837 		status = NT_STATUS_INTERNAL_ERROR;
838 		break;
839 	}
840 	mutex_exit(&sr->sr_mutex);
841 
842 	return (status);
843 }
844 
845 /*
846  * Simplified version used in smb_fem.c, like above,
847  * but no smb_request_cancel stuff.
848  */
849 uint32_t
850 smb_oplock_wait_break_fem(smb_node_t *node, int timeout)  /* mSec. */
851 {
852 	smb_oplock_t	*ol;
853 	clock_t		time, rv;
854 	uint32_t	status = 0;
855 
856 	if (timeout == 0)
857 		timeout = smb_oplock_timeout_def;
858 
859 	SMB_NODE_VALID(node);
860 	ol = &node->n_oplock;
861 
862 	mutex_enter(&ol->ol_mutex);
863 	time = MSEC_TO_TICK(timeout) + ddi_get_lbolt();
864 
865 	while ((ol->ol_state & BREAK_ANY) != 0) {
866 		ol->waiters++;
867 		rv = cv_timedwait(&ol->WaitingOpenCV,
868 		    &ol->ol_mutex, time);
869 		ol->waiters--;
870 		if (rv < 0) {
871 			status = NT_STATUS_CANNOT_BREAK_OPLOCK;
872 			break;
873 		}
874 	}
875 
876 	mutex_exit(&ol->ol_mutex);
877 
878 	return (status);
879 }
880