xref: /titanic_50/usr/src/uts/common/fs/smbsrv/smb_oplock.c (revision 25351652d920ae27c5a56c199da581033ce763f6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"@(#)smb_oplock.c	1.5	08/08/07 SMI"
27 
28 /*
29  * SMB Locking library functions.
30  */
31 
32 #include <smbsrv/smb_incl.h>
33 #include <smbsrv/smb_fsops.h>
34 
35 /*
36  * Oplock functionality enable/disable
37  */
38 
39 /*
40  *	Magic		0xFF 'S' 'M' 'B'
41  *	smb_com 	a byte, the "first" command
42  *	Error		a 4-byte union, ignored in a request
43  *	smb_flg		a one byte set of eight flags
44  *	smb_flg2	a two byte set of 16 flags
45  *	.		twelve reserved bytes, have a role
46  *			in connectionless transports (IPX, UDP?)
47  *	smb_tid		a 16-bit tree ID, a mount point sorta,
48  *			0xFFFF is this command does not have
49  *			or require a tree context
50  *	smb_pid		a 16-bit process ID
51  *	smb_uid		a 16-bit user ID, specific to this "session"
52  *			and mapped to a system (bona-fide) UID
53  *	smb_mid		a 16-bit multiplex ID, used to differentiate
54  *			multiple simultaneous requests from the same
55  *			process (pid) (ref RPC "xid")
56  *
57  * SMB_COM_LOCKING_ANDX allows both locking and/or unlocking of file range(s).
58  *
59  *  Client Request                     Description
60  *  ================================== =================================
61  *
62  *  UCHAR WordCount;                   Count of parameter words = 8
63  *  UCHAR AndXCommand;                 Secondary (X) command;  0xFF = none
64  *  UCHAR AndXReserved;                Reserved (must be 0)
65  *  USHORT AndXOffset;                 Offset to next command WordCount
66  *  USHORT Fid;                        File handle
67  *  UCHAR LockType;                    See LockType table below
68  *  UCHAR OplockLevel;                 The new oplock level
69  *  ULONG Timeout;                     Milliseconds to wait for unlock
70  *  USHORT NumberOfUnlocks;            Num. unlock range structs following
71  *  USHORT NumberOfLocks;              Num. lock range structs following
72  *  USHORT ByteCount;                  Count of data bytes
73  *  LOCKING_ANDX_RANGE Unlocks[];      Unlock ranges
74  *  LOCKING_ANDX_RANGE Locks[];        Lock ranges
75  *
76  *  LockType Flag Name            Value Description
77  *  ============================  ===== ================================
78  *
79  *  LOCKING_ANDX_SHARED_LOCK      0x01  Read-only lock
80  *  LOCKING_ANDX_OPLOCK_RELEASE   0x02  Oplock break notification
81  *  LOCKING_ANDX_CHANGE_LOCKTYPE  0x04  Change lock type
82  *  LOCKING_ANDX_CANCEL_LOCK      0x08  Cancel outstanding request
83  *  LOCKING_ANDX_LARGE_FILES      0x10  Large file locking format
84  *
85  *  LOCKING_ANDX_RANGE Format
86  *  =====================================================================
87  *
88  *  USHORT Pid;                        PID of process "owning" lock
89  *  ULONG Offset;                      Offset to bytes to [un]lock
90  *  ULONG Length;                      Number of bytes to [un]lock
91  *
92  *  Large File LOCKING_ANDX_RANGE Format
93  *  =====================================================================
94  *
95  *  USHORT Pid;                        PID of process "owning" lock
96  *  USHORT Pad;                        Pad to DWORD align (mbz)
97  *  ULONG OffsetHigh;                  Offset to bytes to [un]lock
98  *                                      (high)
99  *  ULONG OffsetLow;                   Offset to bytes to [un]lock (low)
100  *  ULONG LengthHigh;                  Number of bytes to [un]lock
101  *                                      (high)
102  *  ULONG LengthLow;                   Number of bytes to [un]lock (low)
103  *
104  *  Server Response                    Description
105  *  ================================== =================================
106  *
107  *  UCHAR WordCount;                   Count of parameter words = 2
108  *  UCHAR AndXCommand;                 Secondary (X) command;  0xFF =
109  *                                      none
110  *  UCHAR AndXReserved;                Reserved (must be 0)
111  *  USHORT AndXOffset;                 Offset to next command WordCount
112  *  USHORT ByteCount;                  Count of data bytes = 0
113  *
114  */
115 
116 /*
117  * smb_oplock_acquire
118  *
119  * Attempt to acquire an oplock. Note that the oplock granted may be
120  * none, i.e. the oplock was not granted.
121  *
122  * Grant an oplock to the requestor if this session is the only one
123  * that has the file open, regardless of the number of instances of
124  * the file opened by this session.
125  *
126  * However, if there is no oplock on this file and there is already
127  * at least one open, we will not grant an oplock, even if the only
128  * existing opens are from the same client.  This is "server discretion."
129  *
130  * An oplock may need to be broken in order for one to be granted, and
131  * depending on what action is taken by the other client (unlock or close),
132  * an oplock may or may not be granted.  (The breaking of an oplock is
133  * done earlier in the calling path.)
134  *
135  * XXX: Node synchronization is not yet implemented.  However, racing
136  * opens are handled thus:
137  *
138  * A racing oplock acquire can happen in the open path between
139  * smb_oplock_break() and smb_fsop_open(), but no later.  (Once
140  * the file is open via smb_fsop_open()/VOP_OPEN,
141  * smb_fsop_oplock_install() will not be able to install an oplock,
142  * which requires an open count of 1.)
143  *
144  * Hence, we can safely break any oplock that came in after the
145  * smb_oplock_break() done previously in the open path, knowing that
146  * no other racing oplock acquisitions should be able to succeed
147  * because we already have the file open (see above).
148  *
149  * The type of oplock being requested is passed in op->my_flags.  The result
150  * is also returned in op->my_flags.
151  *
152  * (Note that exclusive and batch oplocks are treated interchangeably.)
153  *
154  * The Returns NT status codes:
155  *	NT_STATUS_SUCCESS
156  *	NT_STATUS_CONNECTION_DISCONNECTED
157  */
158 DWORD
159 smb_oplock_acquire(
160     smb_request_t	*sr,
161     smb_ofile_t		*of,
162     struct open_param	*op)
163 {
164 	smb_node_t		*node;
165 	unsigned int		level;
166 
167 	ASSERT(sr);
168 	ASSERT(of);
169 	ASSERT(op);
170 	ASSERT(op->fqi.last_attr.sa_vattr.va_type == VREG);
171 
172 	level = op->my_flags & MYF_OPLOCK_MASK;
173 
174 	op->my_flags &= ~MYF_OPLOCK_MASK;
175 
176 	if ((sr->sr_cfg->skc_oplock_enable == 0) ||
177 	    smb_tree_has_feature(of->f_tree, SMB_TREE_NO_OPLOCKS))
178 		return (NT_STATUS_SUCCESS);
179 
180 	if (!((MYF_IS_EXCLUSIVE_OPLOCK(level)) ||
181 	    (MYF_IS_BATCH_OPLOCK(level))))
182 		return (NT_STATUS_SUCCESS);
183 
184 	node = of->f_node;
185 
186 	smb_rwx_rwenter(&node->n_lock, RW_WRITER);
187 
188 	if (EXCLUSIVE_OPLOCK_IN_FORCE(node) ||
189 	    BATCH_OPLOCK_IN_FORCE(node)) {
190 
191 		smb_rwx_rwexit(&node->n_lock);
192 
193 		if (SMB_SAME_SESSION(sr->session,
194 		    node->n_oplock.op_ofile->f_session)) {
195 			op->my_flags |= level;
196 			return (NT_STATUS_SUCCESS);
197 		} else if (SMB_ATTR_ONLY_OPEN(op)) {
198 			ASSERT(!(op->my_flags & MYF_OPLOCK_MASK));
199 			return (NT_STATUS_SUCCESS);
200 		}
201 
202 		smb_oplock_break(node);
203 
204 		smb_rwx_rwenter(&node->n_lock, RW_WRITER);
205 	}
206 
207 	if (smb_fsop_oplock_install(node, of->f_mode) != 0) {
208 		smb_rwx_rwexit(&node->n_lock);
209 		return (NT_STATUS_SUCCESS);
210 	}
211 
212 	node->n_oplock.op_ofile = of;
213 	node->n_oplock.op_ipaddr = sr->session->ipaddr;
214 	node->n_oplock.op_kid = sr->session->s_kid;
215 	node->flags &= ~NODE_OPLOCKS_IN_FORCE;
216 
217 	if (MYF_IS_EXCLUSIVE_OPLOCK(level))
218 		node->flags |= NODE_EXCLUSIVE_OPLOCK;
219 
220 	if (MYF_IS_BATCH_OPLOCK(level))
221 		node->flags |= NODE_BATCH_OPLOCK;
222 
223 	op->my_flags |= level;
224 
225 	smb_rwx_rwexit(&node->n_lock);
226 
227 	return (NT_STATUS_SUCCESS);
228 }
229 
230 /*
231  * smb_oplock_break
232  *
233  * The oplock break may succeed for multiple reasons: file close, oplock
234  * release, holder connection dropped, requesting client disconnect etc.
235  * Whatever the reason, the oplock should be broken when this function
236  * returns. The exceptions are when the client making this request gets
237  * disconnected or when another client is handling the break and it gets
238  * disconnected.
239  *
240  * Returns NT status codes:
241  *	NT_STATUS_SUCCESS                  No oplock in force, i.e. the
242  *						oplock has been broken.
243  *	NT_STATUS_CONNECTION_DISCONNECTED  Requesting client disconnected.
244  *	NT_STATUS_INTERNAL_ERROR
245  */
246 
247 void
248 smb_oplock_break(smb_node_t *node)
249 {
250 	smb_session_t		*oplock_session;
251 	smb_ofile_t		*oplock_ofile;
252 	struct mbuf_chain	mbc;
253 	int			retries = 0;
254 	clock_t			elapsed_time;
255 	clock_t			max_time;
256 	boolean_t		flag;
257 
258 	smb_rwx_rwenter(&node->n_lock, RW_WRITER);
259 
260 	if (!OPLOCKS_IN_FORCE(node)) {
261 		smb_rwx_rwexit(&node->n_lock);
262 		return;
263 	}
264 
265 	if (node->n_oplock.op_flags & OPLOCK_FLAG_BREAKING) {
266 		elapsed_time = 0;
267 		max_time = MSEC_TO_TICK(smb_oplock_timeout * OPLOCK_RETRIES);
268 		/*
269 		 * Another client is already attempting to break the oplock.
270 		 * We wait for it to finish. If the caller was trying to
271 		 * acquire an oplock, he should retry in case the client's
272 		 * connection was dropped while trying to break the oplock.
273 		 *
274 		 * If the holder's connection has been dropped, we yield to
275 		 * allow the thread handling the break to detect it and set
276 		 * the flags.
277 		 */
278 		while ((node->n_oplock.op_flags & OPLOCK_FLAG_BREAKING) &&
279 		    (elapsed_time < max_time)) {
280 			clock_t	timeleft;
281 
282 			timeleft = smb_rwx_rwwait(&node->n_lock, max_time);
283 			if (timeleft == -1) {
284 				elapsed_time = max_time;
285 			} else {
286 				elapsed_time += max_time - timeleft;
287 			}
288 		}
289 		/*
290 		 * If there are no oplocks in force we're done.
291 		 */
292 		if (!OPLOCKS_IN_FORCE(node)) {
293 			smb_rwx_rwexit(&node->n_lock);
294 			return;
295 		} else {
296 			/*
297 			 * This is an anomalous condition.
298 			 * Cancel/release the oplock.
299 			 */
300 			smb_oplock_release(node, B_TRUE);
301 			smb_rwx_rwexit(&node->n_lock);
302 			return;
303 		}
304 	}
305 
306 	oplock_ofile = node->n_oplock.op_ofile;
307 	ASSERT(oplock_ofile);
308 
309 	oplock_session = oplock_ofile->f_session;
310 	ASSERT(oplock_session);
311 
312 	/*
313 	 * Start oplock break.
314 	 */
315 
316 	node->n_oplock.op_flags |= OPLOCK_FLAG_BREAKING;
317 
318 	smb_rwx_rwexit(&node->n_lock);
319 
320 	max_time = MSEC_TO_TICK(smb_oplock_timeout);
321 	do {
322 		MBC_INIT(&mbc, MLEN);
323 		(void) smb_mbc_encodef(&mbc, "Mb19.wwwwbb3.ww10.",
324 		    SMB_COM_LOCKING_ANDX, oplock_ofile->f_tree->t_tid,
325 		    0xffff, 0, 0xffff, 8, 0xff, oplock_ofile->f_fid,
326 		    LOCKING_ANDX_OPLOCK_RELEASE);
327 
328 		flag = B_TRUE;
329 		smb_rwx_rwenter(&oplock_session->s_lock, RW_WRITER);
330 		while (flag) {
331 			switch (oplock_session->s_state) {
332 			case SMB_SESSION_STATE_DISCONNECTED:
333 			case SMB_SESSION_STATE_TERMINATED:
334 				smb_rwx_rwexit(&oplock_session->s_lock);
335 				smb_rwx_rwenter(&node->n_lock, RW_WRITER);
336 
337 				node->flags &= ~NODE_OPLOCKS_IN_FORCE;
338 				node->n_oplock.op_flags &=
339 				    ~OPLOCK_FLAG_BREAKING;
340 				node->n_oplock.op_ofile = NULL;
341 				node->n_oplock.op_ipaddr = 0;
342 				node->n_oplock.op_kid = 0;
343 
344 				smb_rwx_rwexit(&node->n_lock);
345 
346 				return;
347 
348 			case SMB_SESSION_STATE_OPLOCK_BREAKING:
349 				flag = B_FALSE;
350 				break;
351 
352 			case SMB_SESSION_STATE_NEGOTIATED:
353 				oplock_session->s_state =
354 				    SMB_SESSION_STATE_OPLOCK_BREAKING;
355 				flag = B_FALSE;
356 				break;
357 
358 			default:
359 				(void) smb_rwx_rwwait(&oplock_session->s_lock,
360 				    -1);
361 				break;
362 			}
363 		}
364 		smb_rwx_rwexit(&oplock_session->s_lock);
365 
366 		(void) smb_session_send(oplock_session, 0, &mbc);
367 
368 		elapsed_time = 0;
369 
370 		smb_rwx_rwenter(&node->n_lock, RW_WRITER);
371 		while ((node->n_oplock.op_flags & OPLOCK_FLAG_BREAKING) &&
372 		    (elapsed_time < max_time)) {
373 			clock_t	timeleft;
374 
375 			timeleft = smb_rwx_rwwait(&node->n_lock, max_time);
376 			if (timeleft == -1) {
377 				elapsed_time = max_time;
378 			} else {
379 				elapsed_time += max_time - timeleft;
380 			}
381 		}
382 
383 		if (!OPLOCKS_IN_FORCE(node)) {
384 			/*
385 			 * smb_oplock_release() was called
386 			 */
387 			smb_rwx_rwexit(&node->n_lock);
388 			return;
389 		}
390 	} while (++retries < OPLOCK_RETRIES);
391 
392 	/*
393 	 * Retries exhausted and timed out.
394 	 * Cancel the oplock and continue.
395 	 */
396 
397 	smb_oplock_release(node, B_TRUE);
398 
399 	smb_rwx_rwexit(&node->n_lock);
400 }
401 
402 /*
403  * smb_oplock_release
404  *
405  * This function uninstalls the FEM oplock monitors and
406  * clears all flags in relation to an oplock on the
407  * given node.
408  *
409  * The function can be called with the node->n_lock held
410  * or not held.
411  */
412 
413 void /*ARGSUSED*/
414 smb_oplock_release(smb_node_t *node, boolean_t have_rwx)
415 {
416 	if (!have_rwx)
417 		smb_rwx_rwenter(&node->n_lock, RW_WRITER);
418 
419 	if (!OPLOCKS_IN_FORCE(node)) {
420 		if (!have_rwx)
421 			smb_rwx_rwexit(&node->n_lock);
422 		return;
423 	}
424 
425 	smb_fsop_oplock_uninstall(node);
426 
427 	node->flags &= ~NODE_OPLOCKS_IN_FORCE;
428 	node->n_oplock.op_flags &= ~OPLOCK_FLAG_BREAKING;
429 	node->n_oplock.op_ofile = NULL;
430 	node->n_oplock.op_ipaddr = 0;
431 	node->n_oplock.op_kid = 0;
432 
433 	if (!have_rwx)
434 		smb_rwx_rwexit(&node->n_lock);
435 }
436 
437 /*
438  * smb_oplock_conflict
439  *
440  * The two checks on "session" and "op" are primarily for the open path.
441  * Other CIFS functions may call smb_oplock_conflict() with a session
442  * pointer so as to do the session check.
443  */
444 
445 boolean_t
446 smb_oplock_conflict(smb_node_t *node, smb_session_t *session,
447     struct open_param *op)
448 {
449 	smb_session_t		*oplock_session;
450 	smb_ofile_t		*oplock_ofile;
451 
452 	smb_rwx_rwenter(&node->n_lock, RW_READER);
453 
454 	if (!OPLOCKS_IN_FORCE(node)) {
455 		smb_rwx_rwexit(&node->n_lock);
456 		return (B_FALSE);
457 	}
458 
459 	oplock_ofile = node->n_oplock.op_ofile;
460 	ASSERT(oplock_ofile);
461 
462 	oplock_session = oplock_ofile->f_session;
463 	ASSERT(oplock_session);
464 
465 	if (SMB_SAME_SESSION(session, oplock_session)) {
466 		smb_rwx_rwexit(&node->n_lock);
467 		return (B_FALSE);
468 	}
469 
470 	if (SMB_ATTR_ONLY_OPEN(op)) {
471 		smb_rwx_rwexit(&node->n_lock);
472 		return (B_FALSE);
473 	}
474 
475 	smb_rwx_rwexit(&node->n_lock);
476 	return (B_TRUE);
477 }
478