xref: /illumos-gate/usr/src/uts/common/fs/smbsrv/smb_srv_oplock.c (revision 9788d6deb586816d79d2ee1d4c4215f15cb944f5)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2021 Tintri by DDN, Inc. All rights reserved.
14  * Copyright 2022 RackTop Systems, Inc.
15  */
16 
17 /*
18  * (SMB1/SMB2) Server-level Oplock support.
19  *
20  * Conceptually, this is a separate layer on top of the
21  * file system (FS) layer oplock code in smb_cmn_oplock.c.
22  * If these layers were more distinct, the FS layer would
23  * need to use call-back functions (installed from here)
24  * to "indicate an oplock break to the server" (see below).
25  * As these layers are all in the same kernel module, the
26  * delivery of these break indications just uses a direct
27  * function call to smb_oplock_ind_break() below.
28  *
29  * This layer is responsible for handling the break indication,
30  * which often requires scheduling a taskq job in the server,
31  * and sending an oplock break mesage to the client using
32  * the appropriate protocol for the open handle affected.
33  *
34  * The details of composing an oplock break message, the
35  * protocol-specific details of requesting an oplock, and
36  * returning that oplock to the client are in the files:
37  *  smb_oplock.c, smb2_oplock.c, smb2_lease.c
38  */
39 
40 #include <smbsrv/smb2_kproto.h>
41 #include <smbsrv/smb_oplock.h>
42 
43 /*
44  * Verify relationship between BREAK_TO_... and CACHE bits,
45  * used when setting the BREAK_TO_... below.
46  */
47 #if BREAK_TO_READ_CACHING != (READ_CACHING << BREAK_SHIFT)
48 #error "BREAK_TO_READ_CACHING"
49 #endif
50 #if BREAK_TO_HANDLE_CACHING != (HANDLE_CACHING << BREAK_SHIFT)
51 #error "BREAK_TO_HANDLE_CACHING"
52 #endif
53 #if BREAK_TO_WRITE_CACHING != (WRITE_CACHING << BREAK_SHIFT)
54 #error "BREAK_TO_WRITE_CACHING"
55 #endif
56 #define	CACHE_RWH (READ_CACHING | WRITE_CACHING | HANDLE_CACHING)
57 
58 /*
59  * This is the timeout used in the thread that sends an
60  * oplock break and waits for the client to respond
61  * before it breaks the oplock locally.
62  */
63 int smb_oplock_timeout_ack = 30000; /* mSec. */
64 
65 /*
66  * This is the timeout used in threads that have just
67  * finished some sort of oplock request and now must
68  * wait for (possibly multiple) breaks to complete.
69  * This value must be at least a couple seconds LONGER
70  * than the ack timeout above so that I/O callers won't
71  * give up waiting before the local ack timeout.
72  */
73 int smb_oplock_timeout_def = 45000; /* mSec. */
74 
75 static void smb_oplock_async_break(void *);
76 static void smb_oplock_hdl_update(smb_request_t *sr);
77 static void smb_oplock_hdl_moved(smb_ofile_t *);
78 static void smb_oplock_hdl_closed(smb_ofile_t *);
79 static void smb_oplock_wait_break_cancel(smb_request_t *sr);
80 
81 
82 /*
83  * 2.1.5.17.3 Indicating an Oplock Break to the Server
84  *
85  * The inputs for indicating an oplock break to the server are:
86  *
87  *	BreakingOplockOpen: The Open used to request the oplock
88  *	  that is now breaking.
89  *	 NewOplockLevel: The type of oplock the requested oplock
90  *	  has been broken to.  Valid values are as follows:
91  *		LEVEL_NONE (that is, no oplock)
92  *		LEVEL_TWO
93  *		A combination of one or more of the following flags:
94  *			READ_CACHING
95  *			HANDLE_CACHING
96  *			WRITE_CACHING
97  *	AcknowledgeRequired: A Boolean value; TRUE if the server
98  *	  MUST acknowledge the oplock break, FALSE if not,
99  *	  as specified in section 2.1.5.18.
100  *	OplockCompletionStatus: The NTSTATUS code to return to the server.
101  *
102  * This algorithm simply represents the completion of an oplock request,
103  * as specified in section 2.1.5.17.1 or section 2.1.5.17.2. The server
104  * is expected to associate the return status from this algorithm with
105  * BreakingOplockOpen, which is the Open passed in when it requested
106  * the oplock that is now breaking.
107  *
108  * It is important to note that because several oplocks can be outstanding
109  * in parallel, although this algorithm represents the completion of an
110  * oplock request, it might not result in the completion of the algorithm
111  * that called it. In particular, calling this algorithm will result in
112  * completion of the caller only if BreakingOplockOpen is the same as the
113  * Open with which the calling algorithm was itself called. To mitigate
114  * confusion, each algorithm that refers to this section will specify
115  * whether that algorithm's operation terminates at that point or not.
116  *
117  * The object store MUST return OplockCompletionStatus,
118  * AcknowledgeRequired, and NewOplockLevel to the server (the algorithm is
119  * as specified in section 2.1.5.17.1 and section 2.1.5.17.2).
120  *
121  * Implementation:
122  *
123  * We use two versions of this function:
124  *	smb_oplock_ind_break_in_ack
125  *	smb_oplock_ind_break
126  *
127  * The first is used when we're handling an Oplock Break Ack.
128  * The second is used when other operations cause a break,
129  * generally in one of the smb_oplock_break_... functions.
130  *
131  * Note that these are call-back functions that may be called with the
132  * node ofile list rwlock held and the node oplock mutex entered, so
133  * these should ONLY schedule oplock break work, and MUST NOT attempt
134  * any actions that might require either of those locks.
135  */
136 
137 /*
138  * smb_oplock_ind_break_in_ack
139  *
140  * Variant of smb_oplock_ind_break() for the oplock Ack handler.
141  * When we need to indicate another oplock break from within the
142  * Ack handler (during the Ack. of some previous oplock break)
143  * we need to make sure this new break indication goes out only
144  * AFTER the reply to the current break ack. is sent out.
145  *
146  * In this case, we always have an SR (the break ack) so we can
147  * append the "ind break" work to the current SR and let the
148  * request hander thread do this work after the reply is sent.
149  * Note: this is always an SMB2 or later request, because this
150  * only happens for "granular" oplocks, which are SMB2-only.
151  *
152  * This is mostly the same as smb_oplock_ind_break() except:
153  * - The only CompletionStatus possible is STATUS_CANT_GRANT.
154  * - Instead of taskq_dispatch this appends the new SR to
155  *   the "post work" queue on the current SR (if possible).
156  *
157  * Note called with the node ofile list rwlock held and
158  * the oplock mutex entered.
159  */
160 void
161 smb_oplock_ind_break_in_ack(smb_request_t *ack_sr, smb_ofile_t *ofile,
162     uint32_t NewLevel, boolean_t AckRequired)
163 {
164 	smb_server_t *sv = ofile->f_server;
165 	smb_node_t *node = ofile->f_node;
166 	smb_request_t *sr = NULL;
167 	taskqid_t tqid;
168 	boolean_t use_postwork = B_TRUE;
169 
170 	ASSERT(RW_READ_HELD(&node->n_ofile_list.ll_lock));
171 	ASSERT(MUTEX_HELD(&node->n_oplock.ol_mutex));
172 
173 	/*
174 	 * This should happen only with SMB2 or later,
175 	 * but in case that ever changes...
176 	 */
177 	if (ack_sr->session->dialect < SMB_VERS_2_BASE) {
178 		smb_oplock_ind_break(ofile, NewLevel,
179 		    AckRequired, STATUS_CANT_GRANT);
180 		return;
181 	}
182 
183 	/*
184 	 * We're going to schedule a request that will have a
185 	 * reference to this ofile. Get the hold first.
186 	 */
187 	if (!smb_ofile_hold_olbrk(ofile)) {
188 		/* It's closing (or whatever).  Nothing to do. */
189 		return;
190 	}
191 
192 	/*
193 	 * When called from Ack processing, we want to use a
194 	 * request on the session doing the ack, so we can
195 	 * append "post work" to that session.  If we can't
196 	 * allocate a request on that session (because it's
197 	 * now disconnecting) use a request from the server
198 	 * session like smb_oplock_ind_break does, and then
199 	 * use taskq_dispatch instead of postwork.
200 	 */
201 	sr = smb_request_alloc(ack_sr->session, 0);
202 	if (sr == NULL) {
203 		use_postwork = B_FALSE;
204 		sr = smb_request_alloc(sv->sv_session, 0);
205 	}
206 	if (sr == NULL) {
207 		/*
208 		 * Server must be shutting down.  We took a
209 		 * hold on the ofile that must be released,
210 		 * but we can't release here because we're
211 		 * called with the node ofile list entered.
212 		 * See smb_ofile_release_LL.
213 		 */
214 		smb_llist_post(&node->n_ofile_list, ofile,
215 		    smb_ofile_release_LL);
216 		return;
217 	}
218 
219 	sr->sr_state = SMB_REQ_STATE_SUBMITTED;
220 	sr->smb2_async = B_TRUE;
221 	sr->user_cr = zone_kcred();
222 	sr->fid_ofile = ofile;
223 	if (ofile->f_tree != NULL) {
224 		sr->tid_tree = ofile->f_tree;
225 		smb_tree_hold_internal(sr->tid_tree);
226 	}
227 	if (ofile->f_user != NULL) {
228 		sr->uid_user = ofile->f_user;
229 		smb_user_hold_internal(sr->uid_user);
230 	}
231 	if (ofile->f_lease != NULL)
232 		NewLevel |= OPLOCK_LEVEL_GRANULAR;
233 
234 	sr->arg.olbrk.NewLevel = NewLevel;
235 	sr->arg.olbrk.AckRequired = AckRequired;
236 
237 	/*
238 	 * Could do this in _hdl_update but this way it's
239 	 * visible in the dtrace fbt entry probe.
240 	 */
241 	sr->arg.olbrk.OldLevel = ofile->f_oplock.og_breakto;
242 
243 	smb_oplock_hdl_update(sr);
244 
245 	if (use_postwork) {
246 		/*
247 		 * Using smb2_cmd_code to indicate what to call.
248 		 * work func. will call smb_oplock_send_brk
249 		 */
250 		sr->smb2_cmd_code = SMB2_OPLOCK_BREAK;
251 		smb2sr_append_postwork(ack_sr, sr);
252 		return;
253 	}
254 
255 	/* Will call smb_oplock_send_break */
256 	sr->smb2_status = STATUS_CANT_GRANT;
257 	tqid = taskq_dispatch(sv->sv_worker_pool,
258 	    smb_oplock_async_break, sr, TQ_SLEEP);
259 	VERIFY(tqid != TASKQID_INVALID);
260 }
261 
262 /*
263  * smb_oplock_ind_break
264  *
265  * This is the function described in [MS-FSA] 2.1.5.17.3
266  * which is called many places in the oplock break code.
267  *
268  * Schedule a request & taskq job to do oplock break work
269  * as requested by the FS-level code (smb_cmn_oplock.c).
270  *
271  * See also: smb_oplock_ind_break_in_ack
272  *
273  * Note called with the node ofile list rwlock held and
274  * the oplock mutex entered.
275  */
276 void
277 smb_oplock_ind_break(smb_ofile_t *ofile, uint32_t NewLevel,
278     boolean_t AckRequired, uint32_t CompletionStatus)
279 {
280 	smb_server_t *sv = ofile->f_server;
281 	smb_node_t *node = ofile->f_node;
282 	smb_request_t *sr = NULL;
283 	taskqid_t tqid;
284 
285 	ASSERT(RW_READ_HELD(&node->n_ofile_list.ll_lock));
286 	ASSERT(MUTEX_HELD(&node->n_oplock.ol_mutex));
287 
288 	/*
289 	 * See notes at smb_oplock_async_break re. CompletionStatus
290 	 * Check for any invalid codes here, so assert happens in
291 	 * the thread passing an unexpected value.
292 	 * The real work happens in a taskq job.
293 	 */
294 	switch (CompletionStatus) {
295 
296 	case NT_STATUS_SUCCESS:
297 	case STATUS_CANT_GRANT:
298 		/* Send break via taskq job. */
299 		break;
300 
301 	case STATUS_NEW_HANDLE:
302 		smb_oplock_hdl_moved(ofile);
303 		return;
304 
305 	case NT_STATUS_OPLOCK_HANDLE_CLOSED:
306 		smb_oplock_hdl_closed(ofile);
307 		return;
308 
309 	default:
310 		ASSERT(0);
311 		return;
312 	}
313 
314 	/*
315 	 * We're going to schedule a request that will have a
316 	 * reference to this ofile. Get the hold first.
317 	 */
318 	if (!smb_ofile_hold_olbrk(ofile)) {
319 		/* It's closing (or whatever).  Nothing to do. */
320 		return;
321 	}
322 
323 	/*
324 	 * We need a request allocated on the session that owns
325 	 * this ofile in order to safely send on that session.
326 	 *
327 	 * Note that while we hold a ref. on the ofile, it's
328 	 * f_session will not change.  An ofile in state
329 	 * _ORPHANED will have f_session == NULL, but the
330 	 * f_session won't _change_ while we have a ref,
331 	 * and won't be torn down under our feet.
332 	 * Same for f_tree and f_user
333 	 *
334 	 * If f_session is NULL, or it's in a state that doesn't
335 	 * allow new requests, use the special "server" session.
336 	 */
337 	if (ofile->f_session != NULL)
338 		sr = smb_request_alloc(ofile->f_session, 0);
339 	if (sr == NULL)
340 		sr = smb_request_alloc(sv->sv_session, 0);
341 	if (sr == NULL) {
342 		/*
343 		 * Server must be shutting down.  We took a
344 		 * hold on the ofile that must be released,
345 		 * but we can't release here because we're
346 		 * called with the node ofile list entered.
347 		 * See smb_ofile_release_LL.
348 		 */
349 		smb_llist_post(&node->n_ofile_list, ofile,
350 		    smb_ofile_release_LL);
351 		return;
352 	}
353 
354 	sr->sr_state = SMB_REQ_STATE_SUBMITTED;
355 	sr->smb2_async = B_TRUE;
356 	sr->user_cr = zone_kcred();
357 	sr->fid_ofile = ofile;
358 	if (ofile->f_tree != NULL) {
359 		sr->tid_tree = ofile->f_tree;
360 		smb_tree_hold_internal(sr->tid_tree);
361 	}
362 	if (ofile->f_user != NULL) {
363 		sr->uid_user = ofile->f_user;
364 		smb_user_hold_internal(sr->uid_user);
365 	}
366 	if (ofile->f_lease != NULL)
367 		NewLevel |= OPLOCK_LEVEL_GRANULAR;
368 
369 	sr->arg.olbrk.NewLevel = NewLevel;
370 	sr->arg.olbrk.AckRequired = AckRequired;
371 	sr->smb2_status = CompletionStatus;
372 
373 	/*
374 	 * Could do this in _hdl_update but this way it's
375 	 * visible in the dtrace fbt entry probe.
376 	 */
377 	sr->arg.olbrk.OldLevel = ofile->f_oplock.og_breakto;
378 
379 	smb_oplock_hdl_update(sr);
380 
381 	/* Will call smb_oplock_send_break */
382 	tqid = taskq_dispatch(sv->sv_worker_pool,
383 	    smb_oplock_async_break, sr, TQ_SLEEP);
384 	VERIFY(tqid != TASKQID_INVALID);
385 }
386 
387 /*
388  * smb_oplock_async_break
389  *
390  * Called via the taskq to handle an asynchronous oplock break.
391  * We have a hold on the ofile, which will be released in
392  * smb_request_free (via sr->fid_ofile)
393  *
394  * Note we may have: sr->uid_user == NULL, sr->tid_tree == NULL.
395  */
396 static void
397 smb_oplock_async_break(void *arg)
398 {
399 	smb_request_t	*sr = arg;
400 	uint32_t	CompletionStatus;
401 
402 	SMB_REQ_VALID(sr);
403 
404 	CompletionStatus = sr->smb2_status;
405 	sr->smb2_status = NT_STATUS_SUCCESS;
406 
407 	mutex_enter(&sr->sr_mutex);
408 	sr->sr_worker = curthread;
409 	sr->sr_state = SMB_REQ_STATE_ACTIVE;
410 	mutex_exit(&sr->sr_mutex);
411 
412 	/*
413 	 * Note that the CompletionStatus from the FS level
414 	 * (smb_cmn_oplock.c) encodes what kind of action we
415 	 * need to take at the SMB level.
416 	 */
417 	switch (CompletionStatus) {
418 
419 	case STATUS_CANT_GRANT:
420 	case NT_STATUS_SUCCESS:
421 		smb_oplock_send_break(sr);
422 		break;
423 
424 	default:
425 		/* Checked by caller. */
426 		ASSERT(0);
427 		break;
428 	}
429 
430 	if (sr->dh_nvl_dirty) {
431 		sr->dh_nvl_dirty = B_FALSE;
432 		smb2_dh_update_nvfile(sr);
433 	}
434 
435 	sr->sr_state = SMB_REQ_STATE_COMPLETED;
436 	smb_request_free(sr);
437 }
438 
439 /*
440  * Send an oplock (or lease) break to the client.
441  * If we can't, then do a local break.
442  *
443  * This is called either from smb_oplock_async_break via a
444  * taskq job scheduled in smb_oplock_ind_break, or from the
445  * smb2sr_append_postwork() mechanism when we're doing a
446  * "break in ack", via smb_oplock_ind_break_in_ack.
447  *
448  * We don't always have an sr->session here, so
449  * determine the oplock type (lease etc) from
450  * f_lease and f_oplock.og_dialect etc.
451  */
452 void
453 smb_oplock_send_break(smb_request_t *sr)
454 {
455 	smb_ofile_t	*ofile = sr->fid_ofile;
456 
457 	if (ofile->f_lease != NULL)
458 		smb2_lease_send_break(sr);
459 	else if (ofile->f_oplock.og_dialect >= SMB_VERS_2_BASE)
460 		smb2_oplock_send_break(sr);
461 	else
462 		smb1_oplock_send_break(sr);
463 }
464 
465 /*
466  * Called by smb_oplock_ind_break for the case STATUS_NEW_HANDLE,
467  * which is an alias for NT_STATUS_OPLOCK_SWITCHED_TO_NEW_HANDLE.
468  *
469  * The FS-level oplock layer calls this to update the SMB-level state
470  * when the oplock for some lease is about to move to a different
471  * ofile on the lease.
472  *
473  * To avoid later confusion, clear og_state on this ofile now.
474  * Without this, smb_oplock_move() may issue debug complaints
475  * about moving oplock state onto a non-empty oplock.
476  */
477 static const smb_ofile_t invalid_ofile;
478 static void
479 smb_oplock_hdl_moved(smb_ofile_t *ofile)
480 {
481 	smb_lease_t *ls = ofile->f_lease;
482 
483 	ASSERT(ls != NULL);
484 	if (ls != NULL && ls->ls_oplock_ofile == ofile)
485 		ls->ls_oplock_ofile = (smb_ofile_t *)&invalid_ofile;
486 
487 	ofile->f_oplock.og_state = 0;
488 	ofile->f_oplock.og_breakto = 0;
489 	ofile->f_oplock.og_breaking = B_FALSE;
490 }
491 
492 /*
493  * See: NT_STATUS_OPLOCK_HANDLE_CLOSED above and
494  * smb_ofile_close, smb_oplock_break_CLOSE.
495  *
496  * The FS-level oplock layer calls this to update the
497  * SMB-level state when a handle loses its oplock.
498  */
499 static void
500 smb_oplock_hdl_closed(smb_ofile_t *ofile)
501 {
502 	smb_lease_t *lease = ofile->f_lease;
503 
504 	if (lease != NULL) {
505 		if (lease->ls_oplock_ofile == ofile) {
506 			/*
507 			 * smb2_lease_ofile_close should have
508 			 * moved the oplock to another ofile.
509 			 */
510 			ASSERT(0);
511 			lease->ls_oplock_ofile = NULL;
512 		}
513 	}
514 	ofile->f_oplock.og_state = 0;
515 	ofile->f_oplock.og_breakto = 0;
516 	ofile->f_oplock.og_breaking = B_FALSE;
517 }
518 
519 /*
520  * smb_oplock_hdl_update
521  *
522  * Called by smb_oplock_ind_break (and ...in_ack) just before we
523  * schedule smb_oplock_async_break / mb_oplock_send_break taskq job,
524  * so we can make any state changes that should happen immediately.
525  *
526  * Here, keep track of what we will send to the client.
527  * Saves old state in arg.olbck.OldLevel
528  *
529  * Note that because we may be in the midst of processing an
530  * smb_oplock_ack_break call here, the _breaking flag will be
531  * temporarily false, and is set true again if this ack causes
532  * another break.  This makes it tricky to know when to update
533  * the epoch, which is not supposed to increment when there's
534  * already an unacknowledged break out to the client.
535  * We can recognize that by comparing ls_state vs ls_breakto.
536  * If no unacknowledged break, ls_state == ls_breakto.
537  */
538 static void
539 smb_oplock_hdl_update(smb_request_t *sr)
540 {
541 	smb_ofile_t	*ofile = sr->fid_ofile;
542 	smb_lease_t	*lease = ofile->f_lease;
543 	uint32_t	NewLevel = sr->arg.olbrk.NewLevel;
544 	boolean_t	AckReq = sr->arg.olbrk.AckRequired;
545 
546 #ifdef	DEBUG
547 	smb_node_t *node = ofile->f_node;
548 	ASSERT(RW_READ_HELD(&node->n_ofile_list.ll_lock));
549 	ASSERT(MUTEX_HELD(&node->n_oplock.ol_mutex));
550 #endif
551 
552 	/* Caller sets arg.olbrk.OldLevel */
553 	ofile->f_oplock.og_breakto = NewLevel;
554 	ofile->f_oplock.og_breaking = B_TRUE;
555 	if (lease != NULL) {
556 		// If no unacknowledged break, update epoch.
557 		if (lease->ls_breakto == lease->ls_state)
558 			lease->ls_epoch++;
559 
560 		lease->ls_breakto = NewLevel;
561 		lease->ls_breaking = B_TRUE;
562 	}
563 
564 	if (!AckReq) {
565 		/*
566 		 * Not expecting an Ack from the client.
567 		 * Update state immediately.
568 		 */
569 		ofile->f_oplock.og_state = NewLevel;
570 		ofile->f_oplock.og_breaking = B_FALSE;
571 		if (lease != NULL) {
572 			lease->ls_state = NewLevel;
573 			lease->ls_breaking = B_FALSE;
574 		}
575 		if (ofile->dh_persist) {
576 			smb2_dh_update_oplock(sr, ofile);
577 		}
578 	}
579 }
580 
581 /*
582  * Helper for smb_ofile_close
583  *
584  * Note that a client may close an ofile in response to an
585  * oplock break or lease break intead of doing an Ack break,
586  * so this must wake anything that might be waiting on an ack.
587  */
588 void
589 smb_oplock_close(smb_ofile_t *ofile)
590 {
591 	smb_node_t *node = ofile->f_node;
592 
593 	smb_llist_enter(&node->n_ofile_list, RW_READER);
594 	mutex_enter(&node->n_oplock.ol_mutex);
595 
596 	if (ofile->f_oplock_closing == B_FALSE) {
597 		ofile->f_oplock_closing = B_TRUE;
598 
599 		if (ofile->f_lease != NULL)
600 			smb2_lease_ofile_close(ofile);
601 
602 		smb_oplock_break_CLOSE(node, ofile);
603 
604 		ofile->f_oplock.og_state = 0;
605 		ofile->f_oplock.og_breakto = 0;
606 		ofile->f_oplock.og_breaking = B_FALSE;
607 		cv_broadcast(&ofile->f_oplock.og_ack_cv);
608 	}
609 
610 	mutex_exit(&node->n_oplock.ol_mutex);
611 	smb_llist_exit(&node->n_ofile_list);
612 }
613 
614 /*
615  * Called by smb_request_cancel() via sr->cancel_method
616  * Arg is the smb_node_t with the breaking oplock.
617  */
618 static void
619 smb_oplock_wait_ack_cancel(smb_request_t *sr)
620 {
621 	kcondvar_t	*cvp = sr->cancel_arg2;
622 	smb_ofile_t	*ofile = sr->fid_ofile;
623 	smb_node_t	*node = ofile->f_node;
624 
625 	mutex_enter(&node->n_oplock.ol_mutex);
626 	cv_broadcast(cvp);
627 	mutex_exit(&node->n_oplock.ol_mutex);
628 }
629 
630 /*
631  * Wait for an oplock break ACK to arrive.  This is called after
632  * we've sent an oplock break or lease break to the client where
633  * an "Ack break" is expected back.  If we get an Ack, that will
634  * wake us up via smb2_oplock_break_ack or smb2_lease_break_ack.
635  *
636  * Wait until state reduced to NewLevel (or less).
637  * Note that in multi-break cases, we might wait here for just
638  * one ack when another has become pending, in which case the
639  * og_breakto might be a subset of NewLevel.  Wait until the
640  * state field is no longer a superset of NewLevel.
641  */
642 uint32_t
643 smb_oplock_wait_ack(smb_request_t *sr, uint32_t NewLevel)
644 {
645 	smb_ofile_t	*ofile = sr->fid_ofile;
646 	smb_lease_t	*lease = ofile->f_lease;
647 	smb_node_t	*node = ofile->f_node;
648 	smb_oplock_t	*ol = &node->n_oplock;
649 	uint32_t	*state_p;
650 	kcondvar_t	*cv_p;
651 	clock_t		time, rv;
652 	uint32_t	status = 0;
653 	smb_req_state_t  srstate;
654 	uint32_t	wait_mask;
655 
656 	time = ddi_get_lbolt() +
657 	    MSEC_TO_TICK(smb_oplock_timeout_ack);
658 
659 	/*
660 	 * Wait on either lease state or oplock state
661 	 */
662 	if (lease != NULL) {
663 		state_p = &lease->ls_state;
664 		cv_p = &lease->ls_ack_cv;
665 	} else {
666 		state_p = &ofile->f_oplock.og_state;
667 		cv_p = &ofile->f_oplock.og_ack_cv;
668 	}
669 
670 	/*
671 	 * These are all the bits that we wait to be cleared.
672 	 */
673 	wait_mask = ~NewLevel & (CACHE_RWH |
674 	    LEVEL_TWO | LEVEL_ONE | LEVEL_BATCH);
675 
676 	/*
677 	 * Setup cancellation callback
678 	 */
679 	mutex_enter(&sr->sr_mutex);
680 	if (sr->sr_state != SMB_REQ_STATE_ACTIVE) {
681 		mutex_exit(&sr->sr_mutex);
682 		return (NT_STATUS_CANCELLED);
683 	}
684 	sr->sr_state = SMB_REQ_STATE_WAITING_OLBRK;
685 	sr->cancel_method = smb_oplock_wait_ack_cancel;
686 	sr->cancel_arg2 = cv_p;
687 	mutex_exit(&sr->sr_mutex);
688 
689 	/*
690 	 * Enter the wait loop
691 	 */
692 	mutex_enter(&ol->ol_mutex);
693 
694 	while ((*state_p & wait_mask) != 0) {
695 		rv = cv_timedwait(cv_p, &ol->ol_mutex, time);
696 		if (rv < 0) {
697 			/* cv_timewait timeout */
698 			status = NT_STATUS_CANNOT_BREAK_OPLOCK;
699 			break;
700 		}
701 
702 		/*
703 		 * Check if we were woken by smb_request_cancel,
704 		 * which sets state SMB_REQ_STATE_CANCEL_PENDING
705 		 * and signals the CV.  The mutex enter/exit is
706 		 * just to ensure cache visibility of sr_state
707 		 * that was updated in smb_request_cancel.
708 		 */
709 		mutex_enter(&sr->sr_mutex);
710 		srstate = sr->sr_state;
711 		mutex_exit(&sr->sr_mutex);
712 		if (srstate != SMB_REQ_STATE_WAITING_OLBRK) {
713 			break;
714 		}
715 	}
716 	mutex_exit(&ol->ol_mutex);
717 
718 	/*
719 	 * Clear cancellation callback and see if it fired.
720 	 */
721 	mutex_enter(&sr->sr_mutex);
722 	sr->cancel_method = NULL;
723 	sr->cancel_arg2 = NULL;
724 	switch (sr->sr_state) {
725 	case SMB_REQ_STATE_WAITING_OLBRK:
726 		sr->sr_state = SMB_REQ_STATE_ACTIVE;
727 		/* status from above */
728 		break;
729 	case SMB_REQ_STATE_CANCEL_PENDING:
730 		sr->sr_state = SMB_REQ_STATE_CANCELLED;
731 		status = NT_STATUS_CANCELLED;
732 		break;
733 	default:
734 		status = NT_STATUS_INTERNAL_ERROR;
735 		break;
736 	}
737 	mutex_exit(&sr->sr_mutex);
738 
739 	return (status);
740 }
741 
742 /*
743  * Called by smb_request_cancel() via sr->cancel_method
744  * Arg is the smb_node_t with the breaking oplock.
745  */
746 static void
747 smb_oplock_wait_break_cancel(smb_request_t *sr)
748 {
749 	smb_node_t   *node = sr->cancel_arg2;
750 	smb_oplock_t *ol;
751 
752 	SMB_NODE_VALID(node);
753 	ol = &node->n_oplock;
754 
755 	mutex_enter(&ol->ol_mutex);
756 	cv_broadcast(&ol->WaitingOpenCV);
757 	mutex_exit(&ol->ol_mutex);
758 }
759 
760 /*
761  * Wait up to "timeout" mSec. for the current oplock "breaking" flags
762  * to be cleared (by smb_oplock_ack_break or smb_oplock_break_CLOSE).
763  *
764  * Callers of the above public oplock functions:
765  *	smb_oplock_request()
766  *	smb_oplock_ack_break()
767  *	smb_oplock_break_OPEN() ...
768  * check for return status == NT_STATUS_OPLOCK_BREAK_IN_PROGRESS
769  * and call this function to wait for the break to complete.
770  *
771  * Most callers should use this default timeout, which they get
772  * by passing zero as the timeout arg.  This include places where
773  * we're about to do something that invalidates some cache.
774  */
775 uint32_t
776 smb_oplock_wait_break(smb_request_t *sr, smb_node_t *node, int timeout)
777 {
778 	smb_oplock_t	*ol;
779 	clock_t		time, rv;
780 	uint32_t	status = 0;
781 	smb_req_state_t  srstate;
782 
783 	SMB_NODE_VALID(node);
784 	ol = &node->n_oplock;
785 
786 	if (timeout == 0)
787 		timeout = smb_oplock_timeout_def;
788 	time = MSEC_TO_TICK(timeout) + ddi_get_lbolt();
789 
790 	mutex_enter(&sr->sr_mutex);
791 	if (sr->sr_state != SMB_REQ_STATE_ACTIVE) {
792 		mutex_exit(&sr->sr_mutex);
793 		return (NT_STATUS_CANCELLED);
794 	}
795 	sr->sr_state = SMB_REQ_STATE_WAITING_OLBRK;
796 	sr->cancel_method = smb_oplock_wait_break_cancel;
797 	sr->cancel_arg2 = node;
798 	mutex_exit(&sr->sr_mutex);
799 
800 	mutex_enter(&ol->ol_mutex);
801 	while ((ol->ol_state & BREAK_ANY) != 0) {
802 		ol->waiters++;
803 		rv = cv_timedwait(&ol->WaitingOpenCV,
804 		    &ol->ol_mutex, time);
805 		ol->waiters--;
806 		if (rv < 0) {
807 			/* cv_timewait timeout */
808 			status = NT_STATUS_CANNOT_BREAK_OPLOCK;
809 			break;
810 		}
811 
812 		/*
813 		 * Check if we were woken by smb_request_cancel,
814 		 * which sets state SMB_REQ_STATE_CANCEL_PENDING
815 		 * and signals the CV.  The mutex enter/exit is
816 		 * just to ensure cache visibility of sr_state
817 		 * that was updated in smb_request_cancel.
818 		 */
819 		mutex_enter(&sr->sr_mutex);
820 		srstate = sr->sr_state;
821 		mutex_exit(&sr->sr_mutex);
822 		if (srstate != SMB_REQ_STATE_WAITING_OLBRK) {
823 			break;
824 		}
825 	}
826 
827 	mutex_exit(&ol->ol_mutex);
828 
829 	mutex_enter(&sr->sr_mutex);
830 	sr->cancel_method = NULL;
831 	sr->cancel_arg2 = NULL;
832 	switch (sr->sr_state) {
833 	case SMB_REQ_STATE_WAITING_OLBRK:
834 		sr->sr_state = SMB_REQ_STATE_ACTIVE;
835 		/* status from above */
836 		break;
837 	case SMB_REQ_STATE_CANCEL_PENDING:
838 		sr->sr_state = SMB_REQ_STATE_CANCELLED;
839 		status = NT_STATUS_CANCELLED;
840 		break;
841 	default:
842 		status = NT_STATUS_INTERNAL_ERROR;
843 		break;
844 	}
845 	mutex_exit(&sr->sr_mutex);
846 
847 	return (status);
848 }
849 
850 /*
851  * Simplified version used in smb_fem.c, like above,
852  * but no smb_request_cancel stuff.
853  */
854 uint32_t
855 smb_oplock_wait_break_fem(smb_node_t *node, int timeout)  /* mSec. */
856 {
857 	smb_oplock_t	*ol;
858 	clock_t		time, rv;
859 	uint32_t	status = 0;
860 
861 	if (timeout == 0)
862 		timeout = smb_oplock_timeout_def;
863 
864 	SMB_NODE_VALID(node);
865 	ol = &node->n_oplock;
866 
867 	mutex_enter(&ol->ol_mutex);
868 	time = MSEC_TO_TICK(timeout) + ddi_get_lbolt();
869 
870 	while ((ol->ol_state & BREAK_ANY) != 0) {
871 		ol->waiters++;
872 		rv = cv_timedwait(&ol->WaitingOpenCV,
873 		    &ol->ol_mutex, time);
874 		ol->waiters--;
875 		if (rv < 0) {
876 			status = NT_STATUS_CANNOT_BREAK_OPLOCK;
877 			break;
878 		}
879 	}
880 
881 	mutex_exit(&ol->ol_mutex);
882 
883 	return (status);
884 }
885