1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2017 Nexenta Systems, Inc. All rights reserved. 14 */ 15 16 /* 17 * (SMB1/SMB2) Server-level Oplock support. 18 * 19 * Conceptually, this is a separate layer on top of the 20 * file system (FS) layer oplock code in smb_cmn_oplock.c. 21 * If these layers were more distinct, the FS layer would 22 * need to use call-back functions (installed from here) 23 * to "indicate an oplock break to the server" (see below). 24 * As these layers are all in the same kernel module, the 25 * delivery of these break indications just uses a direct 26 * function call to smb_oplock_ind_break() below. 27 * 28 * This layer is responsible for handling the break indication, 29 * which often requires scheduling a taskq job in the server, 30 * and sending an oplock break mesage to the client using 31 * the appropriate protocol for the open handle affected. 32 * 33 * The details of composing an oplock break message, the 34 * protocol-specific details of requesting an oplock, and 35 * returning that oplock to the client are in the files: 36 * smb_oplock.c, smb2_oplock.c, smb2_lease.c 37 */ 38 39 #include <smbsrv/smb2_kproto.h> 40 #include <smbsrv/smb_oplock.h> 41 42 /* 43 * Verify relationship between BREAK_TO_... and CACHE bits, 44 * used when setting the BREAK_TO_... below. 45 */ 46 #if BREAK_TO_READ_CACHING != (READ_CACHING << BREAK_SHIFT) 47 #error "BREAK_TO_READ_CACHING" 48 #endif 49 #if BREAK_TO_HANDLE_CACHING != (HANDLE_CACHING << BREAK_SHIFT) 50 #error "BREAK_TO_HANDLE_CACHING" 51 #endif 52 #if BREAK_TO_WRITE_CACHING != (WRITE_CACHING << BREAK_SHIFT) 53 #error "BREAK_TO_WRITE_CACHING" 54 #endif 55 #define CACHE_RWH (READ_CACHING | WRITE_CACHING | HANDLE_CACHING) 56 57 /* 58 * This is the timeout used in the thread that sends an 59 * oplock break and waits for the client to respond 60 * before it breaks the oplock locally. 61 */ 62 int smb_oplock_timeout_ack = 30000; /* mSec. */ 63 64 /* 65 * This is the timeout used in threads that have just 66 * finished some sort of oplock request and now must 67 * wait for (possibly multiple) breaks to complete. 68 * This value must be at least a couple seconds LONGER 69 * than the ack timeout above so that I/O callers won't 70 * give up waiting before the local ack timeout. 71 */ 72 int smb_oplock_timeout_def = 45000; /* mSec. */ 73 74 static void smb_oplock_async_break(void *); 75 static void smb_oplock_hdl_clear(smb_ofile_t *); 76 77 78 /* 79 * 2.1.5.17.3 Indicating an Oplock Break to the Server 80 * 81 * The inputs for indicating an oplock break to the server are: 82 * 83 * BreakingOplockOpen: The Open used to request the oplock 84 * that is now breaking. 85 * NewOplockLevel: The type of oplock the requested oplock 86 * has been broken to. Valid values are as follows: 87 * LEVEL_NONE (that is, no oplock) 88 * LEVEL_TWO 89 * A combination of one or more of the following flags: 90 * READ_CACHING 91 * HANDLE_CACHING 92 * WRITE_CACHING 93 * AcknowledgeRequired: A Boolean value; TRUE if the server 94 * MUST acknowledge the oplock break, FALSE if not, 95 * as specified in section 2.1.5.18. 96 * OplockCompletionStatus: The NTSTATUS code to return to the server. 97 * 98 * This algorithm simply represents the completion of an oplock request, 99 * as specified in section 2.1.5.17.1 or section 2.1.5.17.2. The server 100 * is expected to associate the return status from this algorithm with 101 * BreakingOplockOpen, which is the Open passed in when it requested 102 * the oplock that is now breaking. 103 * 104 * It is important to note that because several oplocks can be outstanding 105 * in parallel, although this algorithm represents the completion of an 106 * oplock request, it might not result in the completion of the algorithm 107 * that called it. In particular, calling this algorithm will result in 108 * completion of the caller only if BreakingOplockOpen is the same as the 109 * Open with which the calling algorithm was itself called. To mitigate 110 * confusion, each algorithm that refers to this section will specify 111 * whether that algorithm's operation terminates at that point or not. 112 * 113 * The object store MUST return OplockCompletionStatus, 114 * AcknowledgeRequired, and NewOplockLevel to the server (the algorithm is 115 * as specified in section 2.1.5.17.1 and section 2.1.5.17.2). 116 * 117 * Implementation: 118 * 119 * We use two versions of this function: 120 * smb_oplock_ind_break_in_ack 121 * smb_oplock_ind_break 122 * 123 * The first is used when we're handling an Oplock Break Ack. 124 * The second is used when other operations cause a break, 125 * generally in one of the smb_oplock_break_... functions. 126 * 127 * Note that these are call-back functions that may be called with the 128 * node ofile list rwlock held and the node oplock mutex entered, so 129 * these should ONLY schedule oplock break work, and MUST NOT attempt 130 * any actions that might require either of those locks. 131 */ 132 133 /* 134 * smb_oplock_ind_break_in_ack 135 * 136 * Variant of smb_oplock_ind_break() for the oplock Ack handler. 137 * When we need to indicate another oplock break from within the 138 * Ack handler (during the Ack. of some previous oplock break) 139 * we need to make sure this new break indication goes out only 140 * AFTER the reply to the current break ack. is sent out. 141 * 142 * In this case, we always have an SR (the break ack) so we can 143 * append the "ind break" work to the current SR and let the 144 * request hander thread do this work after the reply is sent. 145 * Note: this is always an SMB2 or later request, because this 146 * only happens for "granular" oplocks, which are SMB2-only. 147 * 148 * This is mostly the same as smb_oplock_ind_break() except: 149 * - The only CompletionStatus possible is STATUS_CANT_GRANT. 150 * - Instead of taskq_dispatch this appends the new SR to 151 * the "post work" queue on the current SR. 152 * 153 * Note called with the node ofile list rwlock held and 154 * the oplock mutex entered. 155 */ 156 void 157 smb_oplock_ind_break_in_ack(smb_request_t *ack_sr, smb_ofile_t *ofile, 158 uint32_t NewLevel, boolean_t AckRequired) 159 { 160 smb_request_t *new_sr; 161 162 /* 163 * This should happen only with SMB2 or later, 164 * but in case that ever changes... 165 */ 166 if (ack_sr->session->dialect < SMB_VERS_2_BASE) { 167 smb_oplock_ind_break(ofile, NewLevel, 168 AckRequired, STATUS_CANT_GRANT); 169 return; 170 } 171 172 /* 173 * We're going to schedule a request that will have a 174 * reference to this ofile. Get the hold first. 175 */ 176 if (!smb_ofile_hold_olbrk(ofile)) { 177 /* It's closing (or whatever). Nothing to do. */ 178 return; 179 } 180 181 /* 182 * When called from Ack processing, we want to use a 183 * request on the session doing the ack. If we can't 184 * allocate a request on that session (because it's 185 * now disconnecting) just fall-back to the normal 186 * oplock break code path which deals with that. 187 * Once we have a request on the ack session, that 188 * session won't go away until the request is done. 189 */ 190 new_sr = smb_request_alloc(ack_sr->session, 0); 191 if (new_sr == NULL) { 192 smb_oplock_ind_break(ofile, NewLevel, 193 AckRequired, STATUS_CANT_GRANT); 194 smb_ofile_release(ofile); 195 return; 196 } 197 198 new_sr->sr_state = SMB_REQ_STATE_SUBMITTED; 199 new_sr->smb2_async = B_TRUE; 200 new_sr->user_cr = zone_kcred(); 201 new_sr->fid_ofile = ofile; 202 /* Leave tid_tree, uid_user NULL. */ 203 new_sr->arg.olbrk.NewLevel = NewLevel; 204 new_sr->arg.olbrk.AckRequired = AckRequired; 205 206 /* 207 * Using smb2_cmd_code to indicate what to call. 208 * work func. will call smb_oplock_send_brk 209 */ 210 new_sr->smb2_cmd_code = SMB2_OPLOCK_BREAK; 211 smb2sr_append_postwork(ack_sr, new_sr); 212 } 213 214 /* 215 * smb_oplock_ind_break 216 * 217 * This is the function described in [MS-FSA] 2.1.5.17.3 218 * which is called many places in the oplock break code. 219 * 220 * Schedule a request & taskq job to do oplock break work 221 * as requested by the FS-level code (smb_cmn_oplock.c). 222 * 223 * Note called with the node ofile list rwlock held and 224 * the oplock mutex entered. 225 */ 226 void 227 smb_oplock_ind_break(smb_ofile_t *ofile, uint32_t NewLevel, 228 boolean_t AckRequired, uint32_t CompletionStatus) 229 { 230 smb_server_t *sv = ofile->f_server; 231 smb_request_t *sr = NULL; 232 233 /* 234 * See notes at smb_oplock_async_break re. CompletionStatus 235 * Check for any invalid codes here, so assert happens in 236 * the thread passing an unexpected value. 237 * The real work happens in a taskq job. 238 */ 239 switch (CompletionStatus) { 240 241 case NT_STATUS_SUCCESS: 242 case STATUS_CANT_GRANT: 243 /* Send break via taskq job. */ 244 break; 245 246 case STATUS_NEW_HANDLE: 247 case NT_STATUS_OPLOCK_HANDLE_CLOSED: 248 smb_oplock_hdl_clear(ofile); 249 return; 250 251 default: 252 ASSERT(0); 253 return; 254 } 255 256 /* 257 * We're going to schedule a request that will have a 258 * reference to this ofile. Get the hold first. 259 */ 260 if (!smb_ofile_hold_olbrk(ofile)) { 261 /* It's closing (or whatever). Nothing to do. */ 262 return; 263 } 264 265 /* 266 * We need a request allocated on the session that owns 267 * this ofile in order to safely send on that session. 268 * 269 * Note that while we hold a ref. on the ofile, it's 270 * f_session will not change. An ofile in state 271 * _ORPHANED will have f_session == NULL, but the 272 * f_session won't _change_ while we have a ref, 273 * and won't be torn down under our feet. 274 * 275 * If f_session is NULL, or it's in a state that doesn't 276 * allow new requests, use the special "server" session. 277 */ 278 if (ofile->f_session != NULL) 279 sr = smb_request_alloc(ofile->f_session, 0); 280 if (sr == NULL) 281 sr = smb_request_alloc(sv->sv_session, 0); 282 283 sr->sr_state = SMB_REQ_STATE_SUBMITTED; 284 sr->smb2_async = B_TRUE; 285 sr->user_cr = zone_kcred(); 286 sr->fid_ofile = ofile; 287 /* Leave tid_tree, uid_user NULL. */ 288 sr->arg.olbrk.NewLevel = NewLevel; 289 sr->arg.olbrk.AckRequired = AckRequired; 290 sr->smb2_status = CompletionStatus; 291 292 (void) taskq_dispatch( 293 sv->sv_worker_pool, 294 smb_oplock_async_break, sr, TQ_SLEEP); 295 } 296 297 /* 298 * smb_oplock_async_break 299 * 300 * Called via the taskq to handle an asynchronous oplock break. 301 * We have a hold on the ofile, which will be released in 302 * smb_request_free (via sr->fid_ofile) 303 * 304 * Note we have: sr->uid_user == NULL, sr->tid_tree == NULL. 305 * Nothing called here needs those. 306 * 307 * Note that NewLevel as provided by the FS up-call does NOT 308 * include the GRANULAR flag. The SMB level is expected to 309 * keep track of how each oplock was acquired (by lease or 310 * traditional oplock request) and put the GRANULAR flag 311 * back into the oplock state when calling down to the 312 * FS-level code. Also note that the lease break message 313 * carries only the cache flags, not the GRANULAR flag. 314 */ 315 static void 316 smb_oplock_async_break(void *arg) 317 { 318 smb_request_t *sr = arg; 319 uint32_t CompletionStatus; 320 321 SMB_REQ_VALID(sr); 322 323 CompletionStatus = sr->smb2_status; 324 sr->smb2_status = NT_STATUS_SUCCESS; 325 326 mutex_enter(&sr->sr_mutex); 327 sr->sr_worker = curthread; 328 sr->sr_state = SMB_REQ_STATE_ACTIVE; 329 mutex_exit(&sr->sr_mutex); 330 331 /* 332 * Note that the CompletionStatus from the FS level 333 * (smb_cmn_oplock.c) encodes what kind of action we 334 * need to take at the SMB level. 335 */ 336 switch (CompletionStatus) { 337 338 case STATUS_CANT_GRANT: 339 case NT_STATUS_SUCCESS: 340 smb_oplock_send_brk(sr); 341 break; 342 343 default: 344 /* Checked by caller. */ 345 ASSERT(0); 346 break; 347 } 348 349 sr->sr_state = SMB_REQ_STATE_COMPLETED; 350 smb_request_free(sr); 351 } 352 353 #ifdef DEBUG 354 int smb_oplock_debug_wait = 0; 355 #endif 356 357 /* 358 * Send an oplock break over the wire, or if we can't, 359 * then process the oplock break locally. 360 * 361 * Note that we have sr->fid_ofile here but all the other 362 * normal sr members are NULL: uid_user, tid_tree. 363 * Also sr->session may or may not be the same session as 364 * the ofile came from (ofile->f_session) depending on 365 * whether this is a "live" open or an orphaned DH, 366 * where ofile->f_session will be NULL. 367 * 368 * Given that we don't always have a session, we determine 369 * the oplock type (lease etc) from f_oplock.og_dialect. 370 */ 371 void 372 smb_oplock_send_brk(smb_request_t *sr) 373 { 374 smb_ofile_t *ofile; 375 smb_lease_t *lease; 376 uint32_t NewLevel; 377 boolean_t AckReq; 378 uint32_t status; 379 int rc; 380 381 ofile = sr->fid_ofile; 382 NewLevel = sr->arg.olbrk.NewLevel; 383 AckReq = sr->arg.olbrk.AckRequired; 384 lease = ofile->f_lease; 385 386 /* 387 * Build the break message in sr->reply. 388 * It's free'd in smb_request_free(). 389 * Also updates the lease and NewLevel. 390 */ 391 sr->reply.max_bytes = MLEN; 392 if (ofile->f_oplock.og_dialect >= SMB_VERS_2_BASE) { 393 if (lease != NULL) { 394 /* 395 * Oplock state has changed, so 396 * update the epoch. 397 */ 398 mutex_enter(&lease->ls_mutex); 399 lease->ls_epoch++; 400 mutex_exit(&lease->ls_mutex); 401 402 /* Note, needs "old" state in og_state */ 403 smb2_lease_break_notification(sr, 404 (NewLevel & CACHE_RWH), AckReq); 405 NewLevel |= OPLOCK_LEVEL_GRANULAR; 406 } else { 407 smb2_oplock_break_notification(sr, NewLevel); 408 } 409 } else { 410 /* 411 * SMB1 clients should only get Level II oplocks if they 412 * set the capability indicating they know about them. 413 */ 414 if (NewLevel == OPLOCK_LEVEL_TWO && 415 ofile->f_oplock.og_dialect < NT_LM_0_12) 416 NewLevel = OPLOCK_LEVEL_NONE; 417 smb1_oplock_break_notification(sr, NewLevel); 418 } 419 420 /* 421 * Keep track of what we last sent to the client, 422 * preserving the GRANULAR flag (if a lease). 423 * If we're expecting an ACK, set og_breaking 424 * (and maybe lease->ls_breaking) so we can 425 * later find the ofile with breaks pending. 426 */ 427 if (AckReq) { 428 uint32_t BreakTo; 429 430 if (lease != NULL) { 431 BreakTo = (NewLevel & CACHE_RWH) << BREAK_SHIFT; 432 if (BreakTo == 0) 433 BreakTo = BREAK_TO_NO_CACHING; 434 lease->ls_breaking = BreakTo; 435 } else { 436 if ((NewLevel & LEVEL_TWO_OPLOCK) != 0) 437 BreakTo = BREAK_TO_TWO; 438 else 439 BreakTo = BREAK_TO_NONE; 440 } 441 /* Will update og_state in ack. */ 442 ofile->f_oplock.og_breaking = BreakTo; 443 } else { 444 if (lease != NULL) 445 lease->ls_state = NewLevel & CACHE_RWH; 446 ofile->f_oplock.og_state = NewLevel; 447 } 448 449 /* 450 * Try to send the break message to the client. 451 * When we get to multi-channel, this is supposed to 452 * try to send on every channel before giving up. 453 */ 454 if (sr->session == ofile->f_session) 455 rc = smb_session_send(sr->session, 0, &sr->reply); 456 else 457 rc = ENOTCONN; 458 459 if (rc == 0) { 460 /* 461 * OK, we were able to send the break message. 462 * If no ack. required, we're done. 463 */ 464 if (!AckReq) 465 return; 466 467 /* 468 * We're expecting an ACK. Wait in this thread 469 * so we can log clients that don't respond. 470 * 471 * If debugging, may want to break after a 472 * short wait to look into why we might be 473 * holding up progress. (i.e. locks?) 474 */ 475 #ifdef DEBUG 476 if (smb_oplock_debug_wait > 0) { 477 status = smb_oplock_wait_break(ofile->f_node, 478 smb_oplock_debug_wait); 479 if (status == 0) 480 return; 481 cmn_err(CE_NOTE, "clnt %s oplock break wait debug", 482 sr->session->ip_addr_str); 483 debug_enter("oplock_wait"); 484 } 485 #endif 486 status = smb_oplock_wait_break(ofile->f_node, 487 smb_oplock_timeout_ack); 488 if (status == 0) 489 return; 490 491 cmn_err(CE_NOTE, "clnt %s oplock break timeout", 492 sr->session->ip_addr_str); 493 DTRACE_PROBE1(break_timeout, smb_ofile_t, ofile); 494 495 /* 496 * Will do local ack below. Note, after timeout, 497 * do a break to none or "no caching" regardless 498 * of what the passed in cache level was. 499 * That means: clear all except GRANULAR. 500 */ 501 NewLevel &= OPLOCK_LEVEL_GRANULAR; 502 } else { 503 /* 504 * We were unable to send the oplock break request. 505 * Generally, that means we have no connection to this 506 * client right now, and this ofile will have state 507 * SMB_OFILE_STATE_ORPHANED. We either close the handle 508 * or break the oplock locally, in which case the client 509 * gets the updated oplock state when they reconnect. 510 * Decide whether to keep or close. 511 * 512 * Relevant [MS-SMB2] sections: 513 * 514 * 3.3.4.6 Object Store Indicates an Oplock Break 515 * If Open.Connection is NULL, Open.IsResilient is FALSE, 516 * Open.IsDurable is FALSE and Open.IsPersistent is FALSE, 517 * the server SHOULD close the Open as specified in... 518 * 519 * 3.3.4.7 Object Store Indicates a Lease Break 520 * If Open.Connection is NULL, the server MUST close the 521 * Open as specified in ... for the following cases: 522 * - Open.IsResilient is FALSE, Open.IsDurable is FALSE, 523 * and Open.IsPersistent is FALSE. 524 * - Lease.BreakToLeaseState does not contain 525 * ...HANDLE_CACHING and Open.IsDurable is TRUE. 526 * If Lease.LeaseOpens is empty, (... local ack to "none"). 527 */ 528 529 /* 530 * See similar logic in smb_dh_should_save 531 */ 532 switch (ofile->dh_vers) { 533 case SMB2_RESILIENT: 534 break; /* keep DH */ 535 536 case SMB2_DURABLE_V2: 537 if (ofile->dh_persist) 538 break; /* keep DH */ 539 /* FALLTHROUGH */ 540 case SMB2_DURABLE_V1: 541 /* IS durable (v1 or v2) */ 542 if ((NewLevel & (OPLOCK_LEVEL_BATCH | 543 OPLOCK_LEVEL_CACHE_HANDLE)) != 0) 544 break; /* keep DH */ 545 /* FALLTHROUGH */ 546 case SMB2_NOT_DURABLE: 547 default: 548 smb_ofile_close(ofile, 0); 549 return; 550 } 551 /* Keep this ofile (durable handle). */ 552 553 if (!AckReq) { 554 /* Nothing more to do. */ 555 return; 556 } 557 } 558 559 /* 560 * We get here after either an oplock break ack timeout, 561 * or a send failure for a durable handle type that we 562 * preserve rather than just close. Do local ack. 563 */ 564 ofile->f_oplock.og_breaking = 0; 565 if (lease != NULL) 566 lease->ls_breaking = 0; 567 568 status = smb_oplock_ack_break(sr, ofile, &NewLevel); 569 if (status == NT_STATUS_OPLOCK_BREAK_IN_PROGRESS) { 570 /* Not expecting this status return. */ 571 cmn_err(CE_NOTE, "clnt local oplock ack wait?"); 572 (void) smb_oplock_wait_break(ofile->f_node, 573 smb_oplock_timeout_ack); 574 status = 0; 575 } 576 if (status != 0) { 577 cmn_err(CE_NOTE, "clnt local oplock ack, " 578 "status=0x%x", status); 579 } 580 581 /* Update og_state as if we heard from the client. */ 582 ofile->f_oplock.og_state = NewLevel; 583 if (lease != NULL) { 584 lease->ls_state = NewLevel & CACHE_RWH; 585 } 586 } 587 588 /* 589 * See: NT_STATUS_OPLOCK_HANDLE_CLOSED above, 590 * and: STATUS_NEW_HANDLE 591 * 592 * The FS-level oplock layer calls this to update the 593 * SMB-level state when a handle loses its oplock. 594 */ 595 static void 596 smb_oplock_hdl_clear(smb_ofile_t *ofile) 597 { 598 smb_lease_t *lease = ofile->f_lease; 599 600 if (lease != NULL) { 601 if (lease->ls_oplock_ofile == ofile) { 602 /* Last close on the lease. */ 603 lease->ls_oplock_ofile = NULL; 604 } 605 } 606 ofile->f_oplock.og_state = 0; 607 ofile->f_oplock.og_breaking = 0; 608 } 609 610 /* 611 * Wait up to "timeout" mSec. for the current oplock "breaking" flags 612 * to be cleared (by smb_oplock_ack_break or smb_oplock_break_CLOSE). 613 * 614 * Callers of the above public oplock functions: 615 * smb_oplock_request() 616 * smb_oplock_ack_break() 617 * smb_oplock_break_OPEN() ... 618 * check for return status == NT_STATUS_OPLOCK_BREAK_IN_PROGRESS 619 * and call this function to wait for the break to complete. 620 * 621 * Most callers should use this default timeout, which they get 622 * by passing zero as the timeout arg. This include places where 623 * we're about to do something that invalidates some cache. 624 */ 625 uint32_t 626 smb_oplock_wait_break(smb_node_t *node, int timeout) /* mSec. */ 627 { 628 smb_oplock_t *ol; 629 clock_t time, rv; 630 uint32_t status = 0; 631 632 if (timeout == 0) 633 timeout = smb_oplock_timeout_def; 634 635 SMB_NODE_VALID(node); 636 ol = &node->n_oplock; 637 638 mutex_enter(&ol->ol_mutex); 639 time = MSEC_TO_TICK(timeout) + ddi_get_lbolt(); 640 641 while ((ol->ol_state & BREAK_ANY) != 0) { 642 ol->waiters++; 643 rv = cv_timedwait(&ol->WaitingOpenCV, 644 &ol->ol_mutex, time); 645 ol->waiters--; 646 if (rv < 0) { 647 status = NT_STATUS_CANNOT_BREAK_OPLOCK; 648 break; 649 } 650 } 651 652 mutex_exit(&ol->ol_mutex); 653 654 return (status); 655 } 656