xref: /illumos-gate/usr/src/uts/common/fs/smbsrv/smb_oplock.c (revision 3d393ee6c37fa10ac512ed6d36109ad616dc7c1a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"@(#)smb_oplock.c	1.5	08/08/07 SMI"
27 
28 /*
29  * SMB Locking library functions.
30  */
31 
32 #include <smbsrv/smb_incl.h>
33 #include <smbsrv/smb_fsops.h>
34 #include <inet/tcp.h>
35 
36 /*
37  * Oplock functionality enable/disable
38  */
39 
40 /*
41  *	Magic		0xFF 'S' 'M' 'B'
42  *	smb_com 	a byte, the "first" command
43  *	Error		a 4-byte union, ignored in a request
44  *	smb_flg		a one byte set of eight flags
45  *	smb_flg2	a two byte set of 16 flags
46  *	.		twelve reserved bytes, have a role
47  *			in connectionless transports (IPX, UDP?)
48  *	smb_tid		a 16-bit tree ID, a mount point sorta,
49  *			0xFFFF is this command does not have
50  *			or require a tree context
51  *	smb_pid		a 16-bit process ID
52  *	smb_uid		a 16-bit user ID, specific to this "session"
53  *			and mapped to a system (bona-fide) UID
54  *	smb_mid		a 16-bit multiplex ID, used to differentiate
55  *			multiple simultaneous requests from the same
56  *			process (pid) (ref RPC "xid")
57  *
58  * SMB_COM_LOCKING_ANDX allows both locking and/or unlocking of file range(s).
59  *
60  *  Client Request                     Description
61  *  ================================== =================================
62  *
63  *  UCHAR WordCount;                   Count of parameter words = 8
64  *  UCHAR AndXCommand;                 Secondary (X) command;  0xFF = none
65  *  UCHAR AndXReserved;                Reserved (must be 0)
66  *  USHORT AndXOffset;                 Offset to next command WordCount
67  *  USHORT Fid;                        File handle
68  *  UCHAR LockType;                    See LockType table below
69  *  UCHAR OplockLevel;                 The new oplock level
70  *  ULONG Timeout;                     Milliseconds to wait for unlock
71  *  USHORT NumberOfUnlocks;            Num. unlock range structs following
72  *  USHORT NumberOfLocks;              Num. lock range structs following
73  *  USHORT ByteCount;                  Count of data bytes
74  *  LOCKING_ANDX_RANGE Unlocks[];      Unlock ranges
75  *  LOCKING_ANDX_RANGE Locks[];        Lock ranges
76  *
77  *  LockType Flag Name            Value Description
78  *  ============================  ===== ================================
79  *
80  *  LOCKING_ANDX_SHARED_LOCK      0x01  Read-only lock
81  *  LOCKING_ANDX_OPLOCK_RELEASE   0x02  Oplock break notification
82  *  LOCKING_ANDX_CHANGE_LOCKTYPE  0x04  Change lock type
83  *  LOCKING_ANDX_CANCEL_LOCK      0x08  Cancel outstanding request
84  *  LOCKING_ANDX_LARGE_FILES      0x10  Large file locking format
85  *
86  *  LOCKING_ANDX_RANGE Format
87  *  =====================================================================
88  *
89  *  USHORT Pid;                        PID of process "owning" lock
90  *  ULONG Offset;                      Offset to bytes to [un]lock
91  *  ULONG Length;                      Number of bytes to [un]lock
92  *
93  *  Large File LOCKING_ANDX_RANGE Format
94  *  =====================================================================
95  *
96  *  USHORT Pid;                        PID of process "owning" lock
97  *  USHORT Pad;                        Pad to DWORD align (mbz)
98  *  ULONG OffsetHigh;                  Offset to bytes to [un]lock
99  *                                      (high)
100  *  ULONG OffsetLow;                   Offset to bytes to [un]lock (low)
101  *  ULONG LengthHigh;                  Number of bytes to [un]lock
102  *                                      (high)
103  *  ULONG LengthLow;                   Number of bytes to [un]lock (low)
104  *
105  *  Server Response                    Description
106  *  ================================== =================================
107  *
108  *  UCHAR WordCount;                   Count of parameter words = 2
109  *  UCHAR AndXCommand;                 Secondary (X) command;  0xFF =
110  *                                      none
111  *  UCHAR AndXReserved;                Reserved (must be 0)
112  *  USHORT AndXOffset;                 Offset to next command WordCount
113  *  USHORT ByteCount;                  Count of data bytes = 0
114  *
115  */
116 
117 /*
118  * smb_oplock_acquire
119  *
120  * Attempt to acquire an oplock. Note that the oplock granted may be
121  * none, i.e. the oplock was not granted.
122  *
123  * Grant an oplock to the requestor if this session is the only one
124  * that has the file open, regardless of the number of instances of
125  * the file opened by this session.
126  *
127  * However, if there is no oplock on this file and there is already
128  * at least one open, we will not grant an oplock, even if the only
129  * existing opens are from the same client.  This is "server discretion."
130  *
131  * An oplock may need to be broken in order for one to be granted, and
132  * depending on what action is taken by the other client (unlock or close),
133  * an oplock may or may not be granted.  (The breaking of an oplock is
134  * done earlier in the calling path.)
135  *
136  * XXX: Node synchronization is not yet implemented.  However, racing
137  * opens are handled thus:
138  *
139  * A racing oplock acquire can happen in the open path between
140  * smb_oplock_break() and smb_fsop_open(), but no later.  (Once
141  * the file is open via smb_fsop_open()/VOP_OPEN,
142  * smb_fsop_oplock_install() will not be able to install an oplock,
143  * which requires an open count of 1.)
144  *
145  * Hence, we can safely break any oplock that came in after the
146  * smb_oplock_break() done previously in the open path, knowing that
147  * no other racing oplock acquisitions should be able to succeed
148  * because we already have the file open (see above).
149  *
150  * The type of oplock being requested is passed in op->my_flags.  The result
151  * is also returned in op->my_flags.
152  *
153  * (Note that exclusive and batch oplocks are treated interchangeably.)
154  *
155  * The Returns NT status codes:
156  *	NT_STATUS_SUCCESS
157  *	NT_STATUS_CONNECTION_DISCONNECTED
158  */
159 DWORD
160 smb_oplock_acquire(
161     smb_request_t	*sr,
162     smb_ofile_t		*of,
163     struct open_param	*op)
164 {
165 	smb_node_t		*node;
166 	unsigned int		level;
167 
168 	ASSERT(sr);
169 	ASSERT(of);
170 	ASSERT(op);
171 	ASSERT(op->fqi.last_attr.sa_vattr.va_type == VREG);
172 
173 	level = op->my_flags & MYF_OPLOCK_MASK;
174 
175 	op->my_flags &= ~MYF_OPLOCK_MASK;
176 
177 	if ((sr->sr_cfg->skc_oplock_enable == 0) ||
178 	    smb_tree_has_feature(of->f_tree, SMB_TREE_NO_OPLOCKS))
179 		return (NT_STATUS_SUCCESS);
180 
181 	if (!((MYF_IS_EXCLUSIVE_OPLOCK(level)) ||
182 	    (MYF_IS_BATCH_OPLOCK(level))))
183 		return (NT_STATUS_SUCCESS);
184 
185 	node = of->f_node;
186 
187 	smb_rwx_rwenter(&node->n_lock, RW_WRITER);
188 
189 	if (EXCLUSIVE_OPLOCK_IN_FORCE(node) ||
190 	    BATCH_OPLOCK_IN_FORCE(node)) {
191 
192 		smb_rwx_rwexit(&node->n_lock);
193 
194 		if (SMB_SAME_SESSION(sr->session,
195 		    node->n_oplock.op_ofile->f_session)) {
196 			op->my_flags |= level;
197 			return (NT_STATUS_SUCCESS);
198 		} else if (SMB_ATTR_ONLY_OPEN(op)) {
199 			ASSERT(!(op->my_flags & MYF_OPLOCK_MASK));
200 			return (NT_STATUS_SUCCESS);
201 		}
202 
203 		smb_oplock_break(node);
204 
205 		smb_rwx_rwenter(&node->n_lock, RW_WRITER);
206 	}
207 
208 	if (smb_fsop_oplock_install(node, of->f_mode) != 0) {
209 		smb_rwx_rwexit(&node->n_lock);
210 		return (NT_STATUS_SUCCESS);
211 	}
212 
213 	node->n_oplock.op_ofile = of;
214 	node->n_oplock.op_ipaddr = sr->session->ipaddr;
215 	node->n_oplock.op_kid = sr->session->s_kid;
216 	node->flags &= ~NODE_OPLOCKS_IN_FORCE;
217 
218 	if (MYF_IS_EXCLUSIVE_OPLOCK(level))
219 		node->flags |= NODE_EXCLUSIVE_OPLOCK;
220 
221 	if (MYF_IS_BATCH_OPLOCK(level))
222 		node->flags |= NODE_BATCH_OPLOCK;
223 
224 	op->my_flags |= level;
225 
226 	smb_rwx_rwexit(&node->n_lock);
227 
228 	return (NT_STATUS_SUCCESS);
229 }
230 
231 /*
232  * smb_oplock_break
233  *
234  * The oplock break may succeed for multiple reasons: file close, oplock
235  * release, holder connection dropped, requesting client disconnect etc.
236  * Whatever the reason, the oplock should be broken when this function
237  * returns. The exceptions are when the client making this request gets
238  * disconnected or when another client is handling the break and it gets
239  * disconnected.
240  *
241  * Returns NT status codes:
242  *	NT_STATUS_SUCCESS                  No oplock in force, i.e. the
243  *						oplock has been broken.
244  *	NT_STATUS_CONNECTION_DISCONNECTED  Requesting client disconnected.
245  *	NT_STATUS_INTERNAL_ERROR
246  */
247 
248 void
249 smb_oplock_break(smb_node_t *node)
250 {
251 	smb_session_t		*oplock_session;
252 	smb_ofile_t		*oplock_ofile;
253 	struct mbuf_chain	mbc;
254 	int			retries = 0;
255 	clock_t			elapsed_time;
256 	clock_t			max_time;
257 	boolean_t		flag;
258 
259 	smb_rwx_rwenter(&node->n_lock, RW_WRITER);
260 
261 	if (!OPLOCKS_IN_FORCE(node)) {
262 		smb_rwx_rwexit(&node->n_lock);
263 		return;
264 	}
265 
266 	if (node->n_oplock.op_flags & OPLOCK_FLAG_BREAKING) {
267 		elapsed_time = 0;
268 		max_time = MSEC_TO_TICK(smb_oplock_timeout * OPLOCK_RETRIES);
269 		/*
270 		 * Another client is already attempting to break the oplock.
271 		 * We wait for it to finish. If the caller was trying to
272 		 * acquire an oplock, he should retry in case the client's
273 		 * connection was dropped while trying to break the oplock.
274 		 *
275 		 * If the holder's connection has been dropped, we yield to
276 		 * allow the thread handling the break to detect it and set
277 		 * the flags.
278 		 */
279 		while ((node->n_oplock.op_flags & OPLOCK_FLAG_BREAKING) &&
280 		    (elapsed_time < max_time)) {
281 			clock_t	timeleft;
282 
283 			timeleft = smb_rwx_rwwait(&node->n_lock, max_time);
284 			if (timeleft == -1) {
285 				elapsed_time = max_time;
286 			} else {
287 				elapsed_time += max_time - timeleft;
288 			}
289 		}
290 		/*
291 		 * If there are no oplocks in force we're done.
292 		 */
293 		if (!OPLOCKS_IN_FORCE(node)) {
294 			smb_rwx_rwexit(&node->n_lock);
295 			return;
296 		} else {
297 			/*
298 			 * This is an anomalous condition.
299 			 * Cancel/release the oplock.
300 			 */
301 			smb_oplock_release(node, B_TRUE);
302 			smb_rwx_rwexit(&node->n_lock);
303 			return;
304 		}
305 	}
306 
307 	oplock_ofile = node->n_oplock.op_ofile;
308 	ASSERT(oplock_ofile);
309 
310 	oplock_session = oplock_ofile->f_session;
311 	ASSERT(oplock_session);
312 
313 	/*
314 	 * Start oplock break.
315 	 */
316 
317 	node->n_oplock.op_flags |= OPLOCK_FLAG_BREAKING;
318 
319 	smb_rwx_rwexit(&node->n_lock);
320 
321 	max_time = MSEC_TO_TICK(smb_oplock_timeout);
322 	do {
323 		MBC_INIT(&mbc, MLEN);
324 		(void) smb_mbc_encodef(&mbc, "Mb19.wwwwbb3.ww10.",
325 		    SMB_COM_LOCKING_ANDX, oplock_ofile->f_tree->t_tid,
326 		    0xffff, 0, 0xffff, 8, 0xff, oplock_ofile->f_fid,
327 		    LOCKING_ANDX_OPLOCK_RELEASE);
328 
329 		flag = B_TRUE;
330 		smb_rwx_rwenter(&oplock_session->s_lock, RW_WRITER);
331 		while (flag) {
332 			switch (oplock_session->s_state) {
333 			case SMB_SESSION_STATE_DISCONNECTED:
334 			case SMB_SESSION_STATE_TERMINATED:
335 				smb_rwx_rwexit(&oplock_session->s_lock);
336 				smb_rwx_rwenter(&node->n_lock, RW_WRITER);
337 
338 				node->flags &= ~NODE_OPLOCKS_IN_FORCE;
339 				node->n_oplock.op_flags &=
340 				    ~OPLOCK_FLAG_BREAKING;
341 				node->n_oplock.op_ofile = NULL;
342 				bzero(&node->n_oplock.op_ipaddr,
343 				    sizeof (node->n_oplock.op_ipaddr));
344 				node->n_oplock.op_kid = 0;
345 
346 				smb_rwx_rwexit(&node->n_lock);
347 
348 				return;
349 
350 			case SMB_SESSION_STATE_OPLOCK_BREAKING:
351 				flag = B_FALSE;
352 				break;
353 
354 			case SMB_SESSION_STATE_NEGOTIATED:
355 				oplock_session->s_state =
356 				    SMB_SESSION_STATE_OPLOCK_BREAKING;
357 				flag = B_FALSE;
358 				break;
359 
360 			default:
361 				(void) smb_rwx_rwwait(&oplock_session->s_lock,
362 				    -1);
363 				break;
364 			}
365 		}
366 		smb_rwx_rwexit(&oplock_session->s_lock);
367 
368 		(void) smb_session_send(oplock_session, 0, &mbc);
369 
370 		elapsed_time = 0;
371 
372 		smb_rwx_rwenter(&node->n_lock, RW_WRITER);
373 		while ((node->n_oplock.op_flags & OPLOCK_FLAG_BREAKING) &&
374 		    (elapsed_time < max_time)) {
375 			clock_t	timeleft;
376 
377 			timeleft = smb_rwx_rwwait(&node->n_lock, max_time);
378 			if (timeleft == -1) {
379 				elapsed_time = max_time;
380 			} else {
381 				elapsed_time += max_time - timeleft;
382 			}
383 		}
384 
385 		if (!OPLOCKS_IN_FORCE(node)) {
386 			/*
387 			 * smb_oplock_release() was called
388 			 */
389 			smb_rwx_rwexit(&node->n_lock);
390 			return;
391 		}
392 	} while (++retries < OPLOCK_RETRIES);
393 
394 	/*
395 	 * Retries exhausted and timed out.
396 	 * Cancel the oplock and continue.
397 	 */
398 
399 	smb_oplock_release(node, B_TRUE);
400 
401 	smb_rwx_rwexit(&node->n_lock);
402 }
403 
404 /*
405  * smb_oplock_release
406  *
407  * This function uninstalls the FEM oplock monitors and
408  * clears all flags in relation to an oplock on the
409  * given node.
410  *
411  * The function can be called with the node->n_lock held
412  * or not held.
413  */
414 
415 void /*ARGSUSED*/
416 smb_oplock_release(smb_node_t *node, boolean_t have_rwx)
417 {
418 	if (!have_rwx)
419 		smb_rwx_rwenter(&node->n_lock, RW_WRITER);
420 
421 	if (!OPLOCKS_IN_FORCE(node)) {
422 		if (!have_rwx)
423 			smb_rwx_rwexit(&node->n_lock);
424 		return;
425 	}
426 
427 	smb_fsop_oplock_uninstall(node);
428 
429 	node->flags &= ~NODE_OPLOCKS_IN_FORCE;
430 	node->n_oplock.op_flags &= ~OPLOCK_FLAG_BREAKING;
431 	node->n_oplock.op_ofile = NULL;
432 	bzero(&node->n_oplock.op_ipaddr, sizeof (node->n_oplock.op_ipaddr));
433 	node->n_oplock.op_kid = 0;
434 
435 	if (!have_rwx)
436 		smb_rwx_rwexit(&node->n_lock);
437 }
438 
439 /*
440  * smb_oplock_conflict
441  *
442  * The two checks on "session" and "op" are primarily for the open path.
443  * Other CIFS functions may call smb_oplock_conflict() with a session
444  * pointer so as to do the session check.
445  */
446 
447 boolean_t
448 smb_oplock_conflict(smb_node_t *node, smb_session_t *session,
449     struct open_param *op)
450 {
451 	smb_session_t		*oplock_session;
452 	smb_ofile_t		*oplock_ofile;
453 
454 	smb_rwx_rwenter(&node->n_lock, RW_READER);
455 
456 	if (!OPLOCKS_IN_FORCE(node)) {
457 		smb_rwx_rwexit(&node->n_lock);
458 		return (B_FALSE);
459 	}
460 
461 	oplock_ofile = node->n_oplock.op_ofile;
462 	ASSERT(oplock_ofile);
463 
464 	oplock_session = oplock_ofile->f_session;
465 	ASSERT(oplock_session);
466 
467 	if (SMB_SAME_SESSION(session, oplock_session)) {
468 		smb_rwx_rwexit(&node->n_lock);
469 		return (B_FALSE);
470 	}
471 
472 	if (SMB_ATTR_ONLY_OPEN(op)) {
473 		smb_rwx_rwexit(&node->n_lock);
474 		return (B_FALSE);
475 	}
476 
477 	smb_rwx_rwexit(&node->n_lock);
478 	return (B_TRUE);
479 }
480