1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2017 Nexenta Systems, Inc. All rights reserved. 14 */ 15 16 /* 17 * (SMB1/SMB2) Server-level Oplock support. 18 * 19 * Conceptually, this is a separate layer on top of the 20 * file system (FS) layer oplock code in smb_cmn_oplock.c. 21 * If these layers were more distinct, the FS layer would 22 * need to use call-back functions (installed from here) 23 * to "indicate an oplock break to the server" (see below). 24 * As these layers are all in the same kernel module, the 25 * delivery of these break indications just uses a direct 26 * function call to smb_oplock_ind_break() below. 27 * 28 * This layer is responsible for handling the break indication, 29 * which often requires scheduling a taskq job in the server, 30 * and sending an oplock break mesage to the client using 31 * the appropriate protocol for the open handle affected. 32 * 33 * The details of composing an oplock break message, the 34 * protocol-specific details of requesting an oplock, and 35 * returning that oplock to the client are in the files: 36 * smb_oplock.c, smb2_oplock.c, smb2_lease.c 37 */ 38 39 #include <smbsrv/smb2_kproto.h> 40 #include <smbsrv/smb_oplock.h> 41 42 /* 43 * Verify relationship between BREAK_TO_... and CACHE bits, 44 * used when setting the BREAK_TO_... below. 45 */ 46 #if BREAK_TO_READ_CACHING != (READ_CACHING << BREAK_SHIFT) 47 #error "BREAK_TO_READ_CACHING" 48 #endif 49 #if BREAK_TO_HANDLE_CACHING != (HANDLE_CACHING << BREAK_SHIFT) 50 #error "BREAK_TO_HANDLE_CACHING" 51 #endif 52 #if BREAK_TO_WRITE_CACHING != (WRITE_CACHING << BREAK_SHIFT) 53 #error "BREAK_TO_WRITE_CACHING" 54 #endif 55 #define CACHE_RWH (READ_CACHING | WRITE_CACHING | HANDLE_CACHING) 56 57 /* 58 * This is the timeout used in the thread that sends an 59 * oplock break and waits for the client to respond 60 * before it breaks the oplock locally. 61 */ 62 int smb_oplock_timeout_ack = 30000; /* mSec. */ 63 64 /* 65 * This is the timeout used in threads that have just 66 * finished some sort of oplock request and now must 67 * wait for (possibly multiple) breaks to complete. 68 * This value must be at least a couple seconds LONGER 69 * than the ack timeout above so that I/O callers won't 70 * give up waiting before the local ack timeout. 71 */ 72 int smb_oplock_timeout_def = 45000; /* mSec. */ 73 74 static void smb_oplock_async_break(void *); 75 static void smb_oplock_hdl_clear(smb_ofile_t *); 76 77 78 /* 79 * 2.1.5.17.3 Indicating an Oplock Break to the Server 80 * 81 * The inputs for indicating an oplock break to the server are: 82 * 83 * BreakingOplockOpen: The Open used to request the oplock 84 * that is now breaking. 85 * NewOplockLevel: The type of oplock the requested oplock 86 * has been broken to. Valid values are as follows: 87 * LEVEL_NONE (that is, no oplock) 88 * LEVEL_TWO 89 * A combination of one or more of the following flags: 90 * READ_CACHING 91 * HANDLE_CACHING 92 * WRITE_CACHING 93 * AcknowledgeRequired: A Boolean value; TRUE if the server 94 * MUST acknowledge the oplock break, FALSE if not, 95 * as specified in section 2.1.5.18. 96 * OplockCompletionStatus: The NTSTATUS code to return to the server. 97 * 98 * This algorithm simply represents the completion of an oplock request, 99 * as specified in section 2.1.5.17.1 or section 2.1.5.17.2. The server 100 * is expected to associate the return status from this algorithm with 101 * BreakingOplockOpen, which is the Open passed in when it requested 102 * the oplock that is now breaking. 103 * 104 * It is important to note that because several oplocks can be outstanding 105 * in parallel, although this algorithm represents the completion of an 106 * oplock request, it might not result in the completion of the algorithm 107 * that called it. In particular, calling this algorithm will result in 108 * completion of the caller only if BreakingOplockOpen is the same as the 109 * Open with which the calling algorithm was itself called. To mitigate 110 * confusion, each algorithm that refers to this section will specify 111 * whether that algorithm's operation terminates at that point or not. 112 * 113 * The object store MUST return OplockCompletionStatus, 114 * AcknowledgeRequired, and NewOplockLevel to the server (the algorithm is 115 * as specified in section 2.1.5.17.1 and section 2.1.5.17.2). 116 * 117 * Implementation: 118 * 119 * We use two versions of this function: 120 * smb_oplock_ind_break_in_ack 121 * smb_oplock_ind_break 122 * 123 * The first is used when we're handling an Oplock Break Ack. 124 * The second is used when other operations cause a break, 125 * generally in one of the smb_oplock_break_... functions. 126 * 127 * Note that these are call-back functions that may be called with the 128 * node ofile list rwlock held and the node oplock mutex entered, so 129 * these should ONLY schedule oplock break work, and MUST NOT attempt 130 * any actions that might require either of those locks. 131 */ 132 133 /* 134 * smb_oplock_ind_break_in_ack 135 * 136 * Variant of smb_oplock_ind_break() for the oplock Ack handler. 137 * When we need to indicate another oplock break from within the 138 * Ack handler (during the Ack. of some previous oplock break) 139 * we need to make sure this new break indication goes out only 140 * AFTER the reply to the current break ack. is sent out. 141 * 142 * In this case, we always have an SR (the break ack) so we can 143 * append the "ind break" work to the current SR and let the 144 * request hander thread do this work after the reply is sent. 145 * Note: this is always an SMB2 or later request, because this 146 * only happens for "granular" oplocks, which are SMB2-only. 147 * 148 * This is mostly the same as smb_oplock_ind_break() except: 149 * - The only CompletionStatus possible is STATUS_CANT_GRANT. 150 * - Instead of taskq_dispatch this appends the new SR to 151 * the "post work" queue on the current SR. 152 * 153 * Note called with the node ofile list rwlock held and 154 * the oplock mutex entered. 155 */ 156 void 157 smb_oplock_ind_break_in_ack(smb_request_t *ack_sr, smb_ofile_t *ofile, 158 uint32_t NewLevel, boolean_t AckRequired) 159 { 160 smb_request_t *new_sr; 161 162 /* 163 * This should happen only with SMB2 or later, 164 * but in case that ever changes... 165 */ 166 if (ack_sr->session->dialect < SMB_VERS_2_BASE) { 167 smb_oplock_ind_break(ofile, NewLevel, 168 AckRequired, STATUS_CANT_GRANT); 169 return; 170 } 171 172 /* 173 * We're going to schedule a request that will have a 174 * reference to this ofile. Get the hold first. 175 */ 176 if (!smb_ofile_hold_olbrk(ofile)) { 177 /* It's closing (or whatever). Nothing to do. */ 178 return; 179 } 180 181 /* 182 * When called from Ack processing, we want to use a 183 * request on the session doing the ack. If we can't 184 * allocate a request on that session (because it's 185 * now disconnecting) just fall-back to the normal 186 * oplock break code path which deals with that. 187 * Once we have a request on the ack session, that 188 * session won't go away until the request is done. 189 */ 190 new_sr = smb_request_alloc(ack_sr->session, 0); 191 if (new_sr == NULL) { 192 smb_oplock_ind_break(ofile, NewLevel, 193 AckRequired, STATUS_CANT_GRANT); 194 smb_ofile_release(ofile); 195 return; 196 } 197 198 new_sr->sr_state = SMB_REQ_STATE_SUBMITTED; 199 new_sr->smb2_async = B_TRUE; 200 new_sr->user_cr = zone_kcred(); 201 new_sr->fid_ofile = ofile; 202 /* Leave tid_tree, uid_user NULL. */ 203 new_sr->arg.olbrk.NewLevel = NewLevel; 204 new_sr->arg.olbrk.AckRequired = AckRequired; 205 206 /* 207 * Using smb2_cmd_code to indicate what to call. 208 * work func. will call smb_oplock_send_brk 209 */ 210 new_sr->smb2_cmd_code = SMB2_OPLOCK_BREAK; 211 smb2sr_append_postwork(ack_sr, new_sr); 212 } 213 214 /* 215 * smb_oplock_ind_break 216 * 217 * This is the function described in [MS-FSA] 2.1.5.17.3 218 * which is called many places in the oplock break code. 219 * 220 * Schedule a request & taskq job to do oplock break work 221 * as requested by the FS-level code (smb_cmn_oplock.c). 222 * 223 * Note called with the node ofile list rwlock held and 224 * the oplock mutex entered. 225 */ 226 void 227 smb_oplock_ind_break(smb_ofile_t *ofile, uint32_t NewLevel, 228 boolean_t AckRequired, uint32_t CompletionStatus) 229 { 230 smb_server_t *sv = ofile->f_server; 231 smb_request_t *sr = NULL; 232 233 /* 234 * See notes at smb_oplock_async_break re. CompletionStatus 235 * Check for any invalid codes here, so assert happens in 236 * the thread passing an unexpected value. 237 * The real work happens in a taskq job. 238 */ 239 switch (CompletionStatus) { 240 241 case NT_STATUS_SUCCESS: 242 case STATUS_CANT_GRANT: 243 /* Send break via taskq job. */ 244 break; 245 246 case STATUS_NEW_HANDLE: 247 case NT_STATUS_OPLOCK_HANDLE_CLOSED: 248 smb_oplock_hdl_clear(ofile); 249 return; 250 251 default: 252 ASSERT(0); 253 return; 254 } 255 256 /* 257 * We're going to schedule a request that will have a 258 * reference to this ofile. Get the hold first. 259 */ 260 if (!smb_ofile_hold_olbrk(ofile)) { 261 /* It's closing (or whatever). Nothing to do. */ 262 return; 263 } 264 265 /* 266 * We need a request allocated on the session that owns 267 * this ofile in order to safely send on that session. 268 * 269 * Note that while we hold a ref. on the ofile, it's 270 * f_session will not change. An ofile in state 271 * _ORPHANED will have f_session == NULL, but the 272 * f_session won't _change_ while we have a ref, 273 * and won't be torn down under our feet. 274 * 275 * If f_session is NULL, or it's in a state that doesn't 276 * allow new requests, use the special "server" session. 277 */ 278 if (ofile->f_session != NULL) 279 sr = smb_request_alloc(ofile->f_session, 0); 280 if (sr == NULL) 281 sr = smb_request_alloc(sv->sv_session, 0); 282 283 sr->sr_state = SMB_REQ_STATE_SUBMITTED; 284 sr->smb2_async = B_TRUE; 285 sr->user_cr = zone_kcred(); 286 sr->fid_ofile = ofile; 287 /* Leave tid_tree, uid_user NULL. */ 288 sr->arg.olbrk.NewLevel = NewLevel; 289 sr->arg.olbrk.AckRequired = AckRequired; 290 sr->smb2_status = CompletionStatus; 291 292 (void) taskq_dispatch( 293 sv->sv_worker_pool, 294 smb_oplock_async_break, sr, TQ_SLEEP); 295 } 296 297 /* 298 * smb_oplock_async_break 299 * 300 * Called via the taskq to handle an asynchronous oplock break. 301 * We have a hold on the ofile, which will be released in 302 * smb_request_free (via sr->fid_ofile) 303 * 304 * Note we have: sr->uid_user == NULL, sr->tid_tree == NULL. 305 * Nothing called here needs those. 306 * 307 * Note that NewLevel as provided by the FS up-call does NOT 308 * include the GRANULAR flag. The SMB level is expected to 309 * keep track of how each oplock was acquired (by lease or 310 * traditional oplock request) and put the GRANULAR flag 311 * back into the oplock state when calling down to the 312 * FS-level code. Also note that the lease break message 313 * carries only the cache flags, not the GRANULAR flag. 314 */ 315 static void 316 smb_oplock_async_break(void *arg) 317 { 318 smb_request_t *sr = arg; 319 uint32_t CompletionStatus; 320 321 SMB_REQ_VALID(sr); 322 323 CompletionStatus = sr->smb2_status; 324 sr->smb2_status = NT_STATUS_SUCCESS; 325 326 mutex_enter(&sr->sr_mutex); 327 sr->sr_worker = curthread; 328 sr->sr_state = SMB_REQ_STATE_ACTIVE; 329 mutex_exit(&sr->sr_mutex); 330 331 /* 332 * Note that the CompletionStatus from the FS level 333 * (smb_cmn_oplock.c) encodes what kind of action we 334 * need to take at the SMB level. 335 */ 336 switch (CompletionStatus) { 337 338 case STATUS_CANT_GRANT: 339 case NT_STATUS_SUCCESS: 340 smb_oplock_send_brk(sr); 341 break; 342 343 default: 344 /* Checked by caller. */ 345 ASSERT(0); 346 break; 347 } 348 349 if (sr->dh_nvl_dirty) { 350 sr->dh_nvl_dirty = B_FALSE; 351 smb2_dh_update_nvfile(sr); 352 } 353 354 sr->sr_state = SMB_REQ_STATE_COMPLETED; 355 smb_request_free(sr); 356 } 357 358 #ifdef DEBUG 359 int smb_oplock_debug_wait = 0; 360 #endif 361 362 /* 363 * Send an oplock break over the wire, or if we can't, 364 * then process the oplock break locally. 365 * 366 * Note that we have sr->fid_ofile here but all the other 367 * normal sr members are NULL: uid_user, tid_tree. 368 * Also sr->session may or may not be the same session as 369 * the ofile came from (ofile->f_session) depending on 370 * whether this is a "live" open or an orphaned DH, 371 * where ofile->f_session will be NULL. 372 * 373 * Given that we don't always have a session, we determine 374 * the oplock type (lease etc) from f_oplock.og_dialect. 375 */ 376 void 377 smb_oplock_send_brk(smb_request_t *sr) 378 { 379 smb_ofile_t *ofile; 380 smb_lease_t *lease; 381 uint32_t NewLevel; 382 boolean_t AckReq; 383 uint32_t status; 384 int rc; 385 386 ofile = sr->fid_ofile; 387 NewLevel = sr->arg.olbrk.NewLevel; 388 AckReq = sr->arg.olbrk.AckRequired; 389 lease = ofile->f_lease; 390 391 /* 392 * Build the break message in sr->reply. 393 * It's free'd in smb_request_free(). 394 * Also updates the lease and NewLevel. 395 */ 396 sr->reply.max_bytes = MLEN; 397 if (ofile->f_oplock.og_dialect >= SMB_VERS_2_BASE) { 398 if (lease != NULL) { 399 /* 400 * Oplock state has changed, so 401 * update the epoch. 402 */ 403 mutex_enter(&lease->ls_mutex); 404 lease->ls_epoch++; 405 mutex_exit(&lease->ls_mutex); 406 407 /* Note, needs "old" state in og_state */ 408 smb2_lease_break_notification(sr, 409 (NewLevel & CACHE_RWH), AckReq); 410 NewLevel |= OPLOCK_LEVEL_GRANULAR; 411 } else { 412 smb2_oplock_break_notification(sr, NewLevel); 413 } 414 } else { 415 /* 416 * SMB1 clients should only get Level II oplocks if they 417 * set the capability indicating they know about them. 418 */ 419 if (NewLevel == OPLOCK_LEVEL_TWO && 420 ofile->f_oplock.og_dialect < NT_LM_0_12) 421 NewLevel = OPLOCK_LEVEL_NONE; 422 smb1_oplock_break_notification(sr, NewLevel); 423 } 424 425 /* 426 * Keep track of what we last sent to the client, 427 * preserving the GRANULAR flag (if a lease). 428 * If we're expecting an ACK, set og_breaking 429 * (and maybe lease->ls_breaking) so we can 430 * later find the ofile with breaks pending. 431 */ 432 if (AckReq) { 433 uint32_t BreakTo; 434 435 if (lease != NULL) { 436 BreakTo = (NewLevel & CACHE_RWH) << BREAK_SHIFT; 437 if (BreakTo == 0) 438 BreakTo = BREAK_TO_NO_CACHING; 439 lease->ls_breaking = BreakTo; 440 } else { 441 if ((NewLevel & LEVEL_TWO_OPLOCK) != 0) 442 BreakTo = BREAK_TO_TWO; 443 else 444 BreakTo = BREAK_TO_NONE; 445 } 446 /* Will update og_state in ack. */ 447 ofile->f_oplock.og_breaking = BreakTo; 448 } else { 449 if (lease != NULL) 450 lease->ls_state = NewLevel & CACHE_RWH; 451 ofile->f_oplock.og_state = NewLevel; 452 453 if (ofile->dh_persist) { 454 smb2_dh_update_oplock(sr, ofile); 455 } 456 } 457 458 /* 459 * Try to send the break message to the client. 460 * When we get to multi-channel, this is supposed to 461 * try to send on every channel before giving up. 462 */ 463 if (sr->session == ofile->f_session) 464 rc = smb_session_send(sr->session, 0, &sr->reply); 465 else 466 rc = ENOTCONN; 467 468 if (rc == 0) { 469 /* 470 * OK, we were able to send the break message. 471 * If no ack. required, we're done. 472 */ 473 if (!AckReq) 474 return; 475 476 /* 477 * We're expecting an ACK. Wait in this thread 478 * so we can log clients that don't respond. 479 * 480 * If debugging, may want to break after a 481 * short wait to look into why we might be 482 * holding up progress. (i.e. locks?) 483 */ 484 #ifdef DEBUG 485 if (smb_oplock_debug_wait > 0) { 486 status = smb_oplock_wait_break(ofile->f_node, 487 smb_oplock_debug_wait); 488 if (status == 0) 489 return; 490 cmn_err(CE_NOTE, "clnt %s oplock break wait debug", 491 sr->session->ip_addr_str); 492 debug_enter("oplock_wait"); 493 } 494 #endif 495 status = smb_oplock_wait_break(ofile->f_node, 496 smb_oplock_timeout_ack); 497 if (status == 0) 498 return; 499 500 cmn_err(CE_NOTE, "clnt %s oplock break timeout", 501 sr->session->ip_addr_str); 502 DTRACE_PROBE1(break_timeout, smb_ofile_t, ofile); 503 504 /* 505 * Will do local ack below. Note, after timeout, 506 * do a break to none or "no caching" regardless 507 * of what the passed in cache level was. 508 * That means: clear all except GRANULAR. 509 */ 510 NewLevel &= OPLOCK_LEVEL_GRANULAR; 511 } else { 512 /* 513 * We were unable to send the oplock break request. 514 * Generally, that means we have no connection to this 515 * client right now, and this ofile will have state 516 * SMB_OFILE_STATE_ORPHANED. We either close the handle 517 * or break the oplock locally, in which case the client 518 * gets the updated oplock state when they reconnect. 519 * Decide whether to keep or close. 520 * 521 * Relevant [MS-SMB2] sections: 522 * 523 * 3.3.4.6 Object Store Indicates an Oplock Break 524 * If Open.Connection is NULL, Open.IsResilient is FALSE, 525 * Open.IsDurable is FALSE and Open.IsPersistent is FALSE, 526 * the server SHOULD close the Open as specified in... 527 * 528 * 3.3.4.7 Object Store Indicates a Lease Break 529 * If Open.Connection is NULL, the server MUST close the 530 * Open as specified in ... for the following cases: 531 * - Open.IsResilient is FALSE, Open.IsDurable is FALSE, 532 * and Open.IsPersistent is FALSE. 533 * - Lease.BreakToLeaseState does not contain 534 * ...HANDLE_CACHING and Open.IsDurable is TRUE. 535 * If Lease.LeaseOpens is empty, (... local ack to "none"). 536 */ 537 538 /* 539 * See similar logic in smb_dh_should_save 540 */ 541 switch (ofile->dh_vers) { 542 case SMB2_RESILIENT: 543 break; /* keep DH */ 544 545 case SMB2_DURABLE_V2: 546 if (ofile->dh_persist) 547 break; /* keep DH */ 548 /* FALLTHROUGH */ 549 case SMB2_DURABLE_V1: 550 /* IS durable (v1 or v2) */ 551 if ((NewLevel & (OPLOCK_LEVEL_BATCH | 552 OPLOCK_LEVEL_CACHE_HANDLE)) != 0) 553 break; /* keep DH */ 554 /* FALLTHROUGH */ 555 case SMB2_NOT_DURABLE: 556 default: 557 smb_ofile_close(ofile, 0); 558 return; 559 } 560 /* Keep this ofile (durable handle). */ 561 562 if (!AckReq) { 563 /* Nothing more to do. */ 564 return; 565 } 566 } 567 568 /* 569 * We get here after either an oplock break ack timeout, 570 * or a send failure for a durable handle type that we 571 * preserve rather than just close. Do local ack. 572 */ 573 ofile->f_oplock.og_breaking = 0; 574 if (lease != NULL) 575 lease->ls_breaking = 0; 576 577 status = smb_oplock_ack_break(sr, ofile, &NewLevel); 578 if (status == NT_STATUS_OPLOCK_BREAK_IN_PROGRESS) { 579 /* Not expecting this status return. */ 580 cmn_err(CE_NOTE, "clnt local oplock ack wait?"); 581 (void) smb_oplock_wait_break(ofile->f_node, 582 smb_oplock_timeout_ack); 583 status = 0; 584 } 585 if (status != 0) { 586 cmn_err(CE_NOTE, "clnt local oplock ack, " 587 "status=0x%x", status); 588 } 589 590 /* Update og_state as if we heard from the client. */ 591 ofile->f_oplock.og_state = NewLevel; 592 if (lease != NULL) { 593 lease->ls_state = NewLevel & CACHE_RWH; 594 } 595 596 if (ofile->dh_persist) { 597 smb2_dh_update_oplock(sr, ofile); 598 } 599 } 600 601 /* 602 * See: NT_STATUS_OPLOCK_HANDLE_CLOSED above, 603 * and: STATUS_NEW_HANDLE 604 * 605 * The FS-level oplock layer calls this to update the 606 * SMB-level state when a handle loses its oplock. 607 */ 608 static void 609 smb_oplock_hdl_clear(smb_ofile_t *ofile) 610 { 611 smb_lease_t *lease = ofile->f_lease; 612 613 if (lease != NULL) { 614 if (lease->ls_oplock_ofile == ofile) { 615 /* Last close on the lease. */ 616 lease->ls_oplock_ofile = NULL; 617 } 618 } 619 ofile->f_oplock.og_state = 0; 620 ofile->f_oplock.og_breaking = 0; 621 } 622 623 /* 624 * Wait up to "timeout" mSec. for the current oplock "breaking" flags 625 * to be cleared (by smb_oplock_ack_break or smb_oplock_break_CLOSE). 626 * 627 * Callers of the above public oplock functions: 628 * smb_oplock_request() 629 * smb_oplock_ack_break() 630 * smb_oplock_break_OPEN() ... 631 * check for return status == NT_STATUS_OPLOCK_BREAK_IN_PROGRESS 632 * and call this function to wait for the break to complete. 633 * 634 * Most callers should use this default timeout, which they get 635 * by passing zero as the timeout arg. This include places where 636 * we're about to do something that invalidates some cache. 637 */ 638 uint32_t 639 smb_oplock_wait_break(smb_node_t *node, int timeout) /* mSec. */ 640 { 641 smb_oplock_t *ol; 642 clock_t time, rv; 643 uint32_t status = 0; 644 645 if (timeout == 0) 646 timeout = smb_oplock_timeout_def; 647 648 SMB_NODE_VALID(node); 649 ol = &node->n_oplock; 650 651 mutex_enter(&ol->ol_mutex); 652 time = MSEC_TO_TICK(timeout) + ddi_get_lbolt(); 653 654 while ((ol->ol_state & BREAK_ANY) != 0) { 655 ol->waiters++; 656 rv = cv_timedwait(&ol->WaitingOpenCV, 657 &ol->ol_mutex, time); 658 ol->waiters--; 659 if (rv < 0) { 660 status = NT_STATUS_CANNOT_BREAK_OPLOCK; 661 break; 662 } 663 } 664 665 mutex_exit(&ol->ol_mutex); 666 667 return (status); 668 } 669