1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "@(#)smb_oplock.c 1.5 08/08/07 SMI" 27 28 /* 29 * SMB Locking library functions. 30 */ 31 32 #include <smbsrv/smb_incl.h> 33 #include <smbsrv/smb_fsops.h> 34 #include <inet/tcp.h> 35 36 /* 37 * Oplock functionality enable/disable 38 */ 39 40 /* 41 * Magic 0xFF 'S' 'M' 'B' 42 * smb_com a byte, the "first" command 43 * Error a 4-byte union, ignored in a request 44 * smb_flg a one byte set of eight flags 45 * smb_flg2 a two byte set of 16 flags 46 * . twelve reserved bytes, have a role 47 * in connectionless transports (IPX, UDP?) 48 * smb_tid a 16-bit tree ID, a mount point sorta, 49 * 0xFFFF is this command does not have 50 * or require a tree context 51 * smb_pid a 16-bit process ID 52 * smb_uid a 16-bit user ID, specific to this "session" 53 * and mapped to a system (bona-fide) UID 54 * smb_mid a 16-bit multiplex ID, used to differentiate 55 * multiple simultaneous requests from the same 56 * process (pid) (ref RPC "xid") 57 * 58 * SMB_COM_LOCKING_ANDX allows both locking and/or unlocking of file range(s). 59 * 60 * Client Request Description 61 * ================================== ================================= 62 * 63 * UCHAR WordCount; Count of parameter words = 8 64 * UCHAR AndXCommand; Secondary (X) command; 0xFF = none 65 * UCHAR AndXReserved; Reserved (must be 0) 66 * USHORT AndXOffset; Offset to next command WordCount 67 * USHORT Fid; File handle 68 * UCHAR LockType; See LockType table below 69 * UCHAR OplockLevel; The new oplock level 70 * ULONG Timeout; Milliseconds to wait for unlock 71 * USHORT NumberOfUnlocks; Num. unlock range structs following 72 * USHORT NumberOfLocks; Num. lock range structs following 73 * USHORT ByteCount; Count of data bytes 74 * LOCKING_ANDX_RANGE Unlocks[]; Unlock ranges 75 * LOCKING_ANDX_RANGE Locks[]; Lock ranges 76 * 77 * LockType Flag Name Value Description 78 * ============================ ===== ================================ 79 * 80 * LOCKING_ANDX_SHARED_LOCK 0x01 Read-only lock 81 * LOCKING_ANDX_OPLOCK_RELEASE 0x02 Oplock break notification 82 * LOCKING_ANDX_CHANGE_LOCKTYPE 0x04 Change lock type 83 * LOCKING_ANDX_CANCEL_LOCK 0x08 Cancel outstanding request 84 * LOCKING_ANDX_LARGE_FILES 0x10 Large file locking format 85 * 86 * LOCKING_ANDX_RANGE Format 87 * ===================================================================== 88 * 89 * USHORT Pid; PID of process "owning" lock 90 * ULONG Offset; Offset to bytes to [un]lock 91 * ULONG Length; Number of bytes to [un]lock 92 * 93 * Large File LOCKING_ANDX_RANGE Format 94 * ===================================================================== 95 * 96 * USHORT Pid; PID of process "owning" lock 97 * USHORT Pad; Pad to DWORD align (mbz) 98 * ULONG OffsetHigh; Offset to bytes to [un]lock 99 * (high) 100 * ULONG OffsetLow; Offset to bytes to [un]lock (low) 101 * ULONG LengthHigh; Number of bytes to [un]lock 102 * (high) 103 * ULONG LengthLow; Number of bytes to [un]lock (low) 104 * 105 * Server Response Description 106 * ================================== ================================= 107 * 108 * UCHAR WordCount; Count of parameter words = 2 109 * UCHAR AndXCommand; Secondary (X) command; 0xFF = 110 * none 111 * UCHAR AndXReserved; Reserved (must be 0) 112 * USHORT AndXOffset; Offset to next command WordCount 113 * USHORT ByteCount; Count of data bytes = 0 114 * 115 */ 116 117 /* 118 * smb_oplock_acquire 119 * 120 * Attempt to acquire an oplock. Note that the oplock granted may be 121 * none, i.e. the oplock was not granted. 122 * 123 * Grant an oplock to the requestor if this session is the only one 124 * that has the file open, regardless of the number of instances of 125 * the file opened by this session. 126 * 127 * However, if there is no oplock on this file and there is already 128 * at least one open, we will not grant an oplock, even if the only 129 * existing opens are from the same client. This is "server discretion." 130 * 131 * An oplock may need to be broken in order for one to be granted, and 132 * depending on what action is taken by the other client (unlock or close), 133 * an oplock may or may not be granted. (The breaking of an oplock is 134 * done earlier in the calling path.) 135 * 136 * XXX: Node synchronization is not yet implemented. However, racing 137 * opens are handled thus: 138 * 139 * A racing oplock acquire can happen in the open path between 140 * smb_oplock_break() and smb_fsop_open(), but no later. (Once 141 * the file is open via smb_fsop_open()/VOP_OPEN, 142 * smb_fsop_oplock_install() will not be able to install an oplock, 143 * which requires an open count of 1.) 144 * 145 * Hence, we can safely break any oplock that came in after the 146 * smb_oplock_break() done previously in the open path, knowing that 147 * no other racing oplock acquisitions should be able to succeed 148 * because we already have the file open (see above). 149 * 150 * The type of oplock being requested is passed in op->my_flags. The result 151 * is also returned in op->my_flags. 152 * 153 * (Note that exclusive and batch oplocks are treated interchangeably.) 154 * 155 * The Returns NT status codes: 156 * NT_STATUS_SUCCESS 157 * NT_STATUS_CONNECTION_DISCONNECTED 158 */ 159 DWORD 160 smb_oplock_acquire( 161 smb_request_t *sr, 162 smb_ofile_t *of, 163 struct open_param *op) 164 { 165 smb_node_t *node; 166 unsigned int level; 167 168 ASSERT(sr); 169 ASSERT(of); 170 ASSERT(op); 171 ASSERT(op->fqi.last_attr.sa_vattr.va_type == VREG); 172 173 level = op->my_flags & MYF_OPLOCK_MASK; 174 175 op->my_flags &= ~MYF_OPLOCK_MASK; 176 177 if ((sr->sr_cfg->skc_oplock_enable == 0) || 178 smb_tree_has_feature(of->f_tree, SMB_TREE_NO_OPLOCKS)) 179 return (NT_STATUS_SUCCESS); 180 181 if (!((MYF_IS_EXCLUSIVE_OPLOCK(level)) || 182 (MYF_IS_BATCH_OPLOCK(level)))) 183 return (NT_STATUS_SUCCESS); 184 185 node = of->f_node; 186 187 smb_rwx_rwenter(&node->n_lock, RW_WRITER); 188 189 if (EXCLUSIVE_OPLOCK_IN_FORCE(node) || 190 BATCH_OPLOCK_IN_FORCE(node)) { 191 192 smb_rwx_rwexit(&node->n_lock); 193 194 if (SMB_SAME_SESSION(sr->session, 195 node->n_oplock.op_ofile->f_session)) { 196 op->my_flags |= level; 197 return (NT_STATUS_SUCCESS); 198 } else if (SMB_ATTR_ONLY_OPEN(op)) { 199 ASSERT(!(op->my_flags & MYF_OPLOCK_MASK)); 200 return (NT_STATUS_SUCCESS); 201 } 202 203 smb_oplock_break(node); 204 205 smb_rwx_rwenter(&node->n_lock, RW_WRITER); 206 } 207 208 if (smb_fsop_oplock_install(node, of->f_mode) != 0) { 209 smb_rwx_rwexit(&node->n_lock); 210 return (NT_STATUS_SUCCESS); 211 } 212 213 node->n_oplock.op_ofile = of; 214 node->n_oplock.op_ipaddr = sr->session->ipaddr; 215 node->n_oplock.op_kid = sr->session->s_kid; 216 node->flags &= ~NODE_OPLOCKS_IN_FORCE; 217 218 if (MYF_IS_EXCLUSIVE_OPLOCK(level)) 219 node->flags |= NODE_EXCLUSIVE_OPLOCK; 220 221 if (MYF_IS_BATCH_OPLOCK(level)) 222 node->flags |= NODE_BATCH_OPLOCK; 223 224 op->my_flags |= level; 225 226 smb_rwx_rwexit(&node->n_lock); 227 228 return (NT_STATUS_SUCCESS); 229 } 230 231 /* 232 * smb_oplock_break 233 * 234 * The oplock break may succeed for multiple reasons: file close, oplock 235 * release, holder connection dropped, requesting client disconnect etc. 236 * Whatever the reason, the oplock should be broken when this function 237 * returns. The exceptions are when the client making this request gets 238 * disconnected or when another client is handling the break and it gets 239 * disconnected. 240 * 241 * Returns NT status codes: 242 * NT_STATUS_SUCCESS No oplock in force, i.e. the 243 * oplock has been broken. 244 * NT_STATUS_CONNECTION_DISCONNECTED Requesting client disconnected. 245 * NT_STATUS_INTERNAL_ERROR 246 */ 247 248 void 249 smb_oplock_break(smb_node_t *node) 250 { 251 smb_session_t *oplock_session; 252 smb_ofile_t *oplock_ofile; 253 struct mbuf_chain mbc; 254 int retries = 0; 255 clock_t elapsed_time; 256 clock_t max_time; 257 boolean_t flag; 258 259 smb_rwx_rwenter(&node->n_lock, RW_WRITER); 260 261 if (!OPLOCKS_IN_FORCE(node)) { 262 smb_rwx_rwexit(&node->n_lock); 263 return; 264 } 265 266 if (node->n_oplock.op_flags & OPLOCK_FLAG_BREAKING) { 267 elapsed_time = 0; 268 max_time = MSEC_TO_TICK(smb_oplock_timeout * OPLOCK_RETRIES); 269 /* 270 * Another client is already attempting to break the oplock. 271 * We wait for it to finish. If the caller was trying to 272 * acquire an oplock, he should retry in case the client's 273 * connection was dropped while trying to break the oplock. 274 * 275 * If the holder's connection has been dropped, we yield to 276 * allow the thread handling the break to detect it and set 277 * the flags. 278 */ 279 while ((node->n_oplock.op_flags & OPLOCK_FLAG_BREAKING) && 280 (elapsed_time < max_time)) { 281 clock_t timeleft; 282 283 timeleft = smb_rwx_rwwait(&node->n_lock, max_time); 284 if (timeleft == -1) { 285 elapsed_time = max_time; 286 } else { 287 elapsed_time += max_time - timeleft; 288 } 289 } 290 /* 291 * If there are no oplocks in force we're done. 292 */ 293 if (!OPLOCKS_IN_FORCE(node)) { 294 smb_rwx_rwexit(&node->n_lock); 295 return; 296 } else { 297 /* 298 * This is an anomalous condition. 299 * Cancel/release the oplock. 300 */ 301 smb_oplock_release(node, B_TRUE); 302 smb_rwx_rwexit(&node->n_lock); 303 return; 304 } 305 } 306 307 oplock_ofile = node->n_oplock.op_ofile; 308 ASSERT(oplock_ofile); 309 310 oplock_session = oplock_ofile->f_session; 311 ASSERT(oplock_session); 312 313 /* 314 * Start oplock break. 315 */ 316 317 node->n_oplock.op_flags |= OPLOCK_FLAG_BREAKING; 318 319 smb_rwx_rwexit(&node->n_lock); 320 321 max_time = MSEC_TO_TICK(smb_oplock_timeout); 322 do { 323 MBC_INIT(&mbc, MLEN); 324 (void) smb_mbc_encodef(&mbc, "Mb19.wwwwbb3.ww10.", 325 SMB_COM_LOCKING_ANDX, oplock_ofile->f_tree->t_tid, 326 0xffff, 0, 0xffff, 8, 0xff, oplock_ofile->f_fid, 327 LOCKING_ANDX_OPLOCK_RELEASE); 328 329 flag = B_TRUE; 330 smb_rwx_rwenter(&oplock_session->s_lock, RW_WRITER); 331 while (flag) { 332 switch (oplock_session->s_state) { 333 case SMB_SESSION_STATE_DISCONNECTED: 334 case SMB_SESSION_STATE_TERMINATED: 335 smb_rwx_rwexit(&oplock_session->s_lock); 336 smb_rwx_rwenter(&node->n_lock, RW_WRITER); 337 338 node->flags &= ~NODE_OPLOCKS_IN_FORCE; 339 node->n_oplock.op_flags &= 340 ~OPLOCK_FLAG_BREAKING; 341 node->n_oplock.op_ofile = NULL; 342 bzero(&node->n_oplock.op_ipaddr, 343 sizeof (node->n_oplock.op_ipaddr)); 344 node->n_oplock.op_kid = 0; 345 346 smb_rwx_rwexit(&node->n_lock); 347 348 return; 349 350 case SMB_SESSION_STATE_OPLOCK_BREAKING: 351 flag = B_FALSE; 352 break; 353 354 case SMB_SESSION_STATE_NEGOTIATED: 355 oplock_session->s_state = 356 SMB_SESSION_STATE_OPLOCK_BREAKING; 357 flag = B_FALSE; 358 break; 359 360 default: 361 (void) smb_rwx_rwwait(&oplock_session->s_lock, 362 -1); 363 break; 364 } 365 } 366 smb_rwx_rwexit(&oplock_session->s_lock); 367 368 (void) smb_session_send(oplock_session, 0, &mbc); 369 370 elapsed_time = 0; 371 372 smb_rwx_rwenter(&node->n_lock, RW_WRITER); 373 while ((node->n_oplock.op_flags & OPLOCK_FLAG_BREAKING) && 374 (elapsed_time < max_time)) { 375 clock_t timeleft; 376 377 timeleft = smb_rwx_rwwait(&node->n_lock, max_time); 378 if (timeleft == -1) { 379 elapsed_time = max_time; 380 } else { 381 elapsed_time += max_time - timeleft; 382 } 383 } 384 385 if (!OPLOCKS_IN_FORCE(node)) { 386 /* 387 * smb_oplock_release() was called 388 */ 389 smb_rwx_rwexit(&node->n_lock); 390 return; 391 } 392 } while (++retries < OPLOCK_RETRIES); 393 394 /* 395 * Retries exhausted and timed out. 396 * Cancel the oplock and continue. 397 */ 398 399 smb_oplock_release(node, B_TRUE); 400 401 smb_rwx_rwexit(&node->n_lock); 402 } 403 404 /* 405 * smb_oplock_release 406 * 407 * This function uninstalls the FEM oplock monitors and 408 * clears all flags in relation to an oplock on the 409 * given node. 410 * 411 * The function can be called with the node->n_lock held 412 * or not held. 413 */ 414 415 void /*ARGSUSED*/ 416 smb_oplock_release(smb_node_t *node, boolean_t have_rwx) 417 { 418 if (!have_rwx) 419 smb_rwx_rwenter(&node->n_lock, RW_WRITER); 420 421 if (!OPLOCKS_IN_FORCE(node)) { 422 if (!have_rwx) 423 smb_rwx_rwexit(&node->n_lock); 424 return; 425 } 426 427 smb_fsop_oplock_uninstall(node); 428 429 node->flags &= ~NODE_OPLOCKS_IN_FORCE; 430 node->n_oplock.op_flags &= ~OPLOCK_FLAG_BREAKING; 431 node->n_oplock.op_ofile = NULL; 432 bzero(&node->n_oplock.op_ipaddr, sizeof (node->n_oplock.op_ipaddr)); 433 node->n_oplock.op_kid = 0; 434 435 if (!have_rwx) 436 smb_rwx_rwexit(&node->n_lock); 437 } 438 439 /* 440 * smb_oplock_conflict 441 * 442 * The two checks on "session" and "op" are primarily for the open path. 443 * Other CIFS functions may call smb_oplock_conflict() with a session 444 * pointer so as to do the session check. 445 */ 446 447 boolean_t 448 smb_oplock_conflict(smb_node_t *node, smb_session_t *session, 449 struct open_param *op) 450 { 451 smb_session_t *oplock_session; 452 smb_ofile_t *oplock_ofile; 453 454 smb_rwx_rwenter(&node->n_lock, RW_READER); 455 456 if (!OPLOCKS_IN_FORCE(node)) { 457 smb_rwx_rwexit(&node->n_lock); 458 return (B_FALSE); 459 } 460 461 oplock_ofile = node->n_oplock.op_ofile; 462 ASSERT(oplock_ofile); 463 464 oplock_session = oplock_ofile->f_session; 465 ASSERT(oplock_session); 466 467 if (SMB_SAME_SESSION(session, oplock_session)) { 468 smb_rwx_rwexit(&node->n_lock); 469 return (B_FALSE); 470 } 471 472 if (SMB_ATTR_ONLY_OPEN(op)) { 473 smb_rwx_rwexit(&node->n_lock); 474 return (B_FALSE); 475 } 476 477 smb_rwx_rwexit(&node->n_lock); 478 return (B_TRUE); 479 } 480