1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * 4 * Copyright (C) International Business Machines Corp., 2002,2008 5 * Author(s): Steve French (sfrench@us.ibm.com) 6 * Jeremy Allison (jra@samba.org) 2006. 7 * 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/list.h> 12 #include <linux/gfp.h> 13 #include <linux/wait.h> 14 #include <linux/net.h> 15 #include <linux/delay.h> 16 #include <linux/freezer.h> 17 #include <linux/tcp.h> 18 #include <linux/bvec.h> 19 #include <linux/highmem.h> 20 #include <linux/uaccess.h> 21 #include <linux/processor.h> 22 #include <linux/mempool.h> 23 #include <linux/sched/signal.h> 24 #include <linux/task_io_accounting_ops.h> 25 #include <linux/task_work.h> 26 #include "cifsglob.h" 27 #include "cifsproto.h" 28 #include "cifs_debug.h" 29 #include "smb2proto.h" 30 #include "smbdirect.h" 31 #include "compress.h" 32 33 void 34 cifs_wake_up_task(struct TCP_Server_Info *server, struct mid_q_entry *mid) 35 { 36 if (mid->mid_state == MID_RESPONSE_RECEIVED) 37 mid->mid_state = MID_RESPONSE_READY; 38 wake_up_process(mid->callback_data); 39 } 40 41 void __release_mid(struct TCP_Server_Info *server, struct mid_q_entry *midEntry) 42 { 43 #ifdef CONFIG_CIFS_STATS2 44 __le16 command = server->vals->lock_cmd; 45 __u16 smb_cmd = le16_to_cpu(midEntry->command); 46 unsigned long now; 47 unsigned long roundtrip_time; 48 #endif 49 50 if (midEntry->resp_buf && (midEntry->wait_cancelled) && 51 (midEntry->mid_state == MID_RESPONSE_RECEIVED || 52 midEntry->mid_state == MID_RESPONSE_READY) && 53 server->ops->handle_cancelled_mid) 54 server->ops->handle_cancelled_mid(midEntry, server); 55 56 midEntry->mid_state = MID_FREE; 57 atomic_dec(&mid_count); 58 if (midEntry->large_buf) 59 cifs_buf_release(midEntry->resp_buf); 60 else 61 cifs_small_buf_release(midEntry->resp_buf); 62 #ifdef CONFIG_CIFS_STATS2 63 now = jiffies; 64 if (now < midEntry->when_alloc) 65 cifs_server_dbg(VFS, "Invalid mid allocation time\n"); 66 roundtrip_time = now - midEntry->when_alloc; 67 68 if (smb_cmd < NUMBER_OF_SMB2_COMMANDS) { 69 if (atomic_read(&server->num_cmds[smb_cmd]) == 0) { 70 server->slowest_cmd[smb_cmd] = roundtrip_time; 71 server->fastest_cmd[smb_cmd] = roundtrip_time; 72 } else { 73 if (server->slowest_cmd[smb_cmd] < roundtrip_time) 74 server->slowest_cmd[smb_cmd] = roundtrip_time; 75 else if (server->fastest_cmd[smb_cmd] > roundtrip_time) 76 server->fastest_cmd[smb_cmd] = roundtrip_time; 77 } 78 cifs_stats_inc(&server->num_cmds[smb_cmd]); 79 server->time_per_cmd[smb_cmd] += roundtrip_time; 80 } 81 /* 82 * commands taking longer than one second (default) can be indications 83 * that something is wrong, unless it is quite a slow link or a very 84 * busy server. Note that this calc is unlikely or impossible to wrap 85 * as long as slow_rsp_threshold is not set way above recommended max 86 * value (32767 ie 9 hours) and is generally harmless even if wrong 87 * since only affects debug counters - so leaving the calc as simple 88 * comparison rather than doing multiple conversions and overflow 89 * checks 90 */ 91 if ((slow_rsp_threshold != 0) && 92 time_after(now, midEntry->when_alloc + (slow_rsp_threshold * HZ)) && 93 (midEntry->command != command)) { 94 /* 95 * smb2slowcmd[NUMBER_OF_SMB2_COMMANDS] counts by command 96 * NB: le16_to_cpu returns unsigned so can not be negative below 97 */ 98 if (smb_cmd < NUMBER_OF_SMB2_COMMANDS) 99 cifs_stats_inc(&server->smb2slowcmd[smb_cmd]); 100 101 trace_smb3_slow_rsp(smb_cmd, midEntry->mid, midEntry->pid, 102 midEntry->when_sent, midEntry->when_received); 103 if (cifsFYI & CIFS_TIMER) { 104 pr_debug("slow rsp: cmd %d mid %llu", 105 midEntry->command, midEntry->mid); 106 cifs_info("A: 0x%lx S: 0x%lx R: 0x%lx\n", 107 now - midEntry->when_alloc, 108 now - midEntry->when_sent, 109 now - midEntry->when_received); 110 } 111 } 112 #endif 113 put_task_struct(midEntry->creator); 114 115 mempool_free(midEntry, &cifs_mid_pool); 116 } 117 118 void 119 delete_mid(struct TCP_Server_Info *server, struct mid_q_entry *mid) 120 { 121 spin_lock(&server->mid_queue_lock); 122 123 if (!mid->deleted_from_q) { 124 list_del_init(&mid->qhead); 125 mid->deleted_from_q = true; 126 } 127 spin_unlock(&server->mid_queue_lock); 128 129 release_mid(server, mid); 130 } 131 132 /* 133 * smb_send_kvec - send an array of kvecs to the server 134 * @server: Server to send the data to 135 * @smb_msg: Message to send 136 * @sent: amount of data sent on socket is stored here 137 * 138 * Our basic "send data to server" function. Should be called with srv_mutex 139 * held. The caller is responsible for handling the results. 140 */ 141 int 142 smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg, 143 size_t *sent) 144 { 145 int rc = 0; 146 int retries = 0; 147 struct socket *ssocket = server->ssocket; 148 149 *sent = 0; 150 151 if (server->noblocksnd) 152 smb_msg->msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; 153 else 154 smb_msg->msg_flags = MSG_NOSIGNAL; 155 156 while (msg_data_left(smb_msg)) { 157 /* 158 * If blocking send, we try 3 times, since each can block 159 * for 5 seconds. For nonblocking we have to try more 160 * but wait increasing amounts of time allowing time for 161 * socket to clear. The overall time we wait in either 162 * case to send on the socket is about 15 seconds. 163 * Similarly we wait for 15 seconds for a response from 164 * the server in SendReceive[2] for the server to send 165 * a response back for most types of requests (except 166 * SMB Write past end of file which can be slow, and 167 * blocking lock operations). NFS waits slightly longer 168 * than CIFS, but this can make it take longer for 169 * nonresponsive servers to be detected and 15 seconds 170 * is more than enough time for modern networks to 171 * send a packet. In most cases if we fail to send 172 * after the retries we will kill the socket and 173 * reconnect which may clear the network problem. 174 * 175 * Even if regular signals are masked, EINTR might be 176 * propagated from sk_stream_wait_memory() to here when 177 * TIF_NOTIFY_SIGNAL is used for task work. For example, 178 * certain io_uring completions will use that. Treat 179 * having EINTR with pending task work the same as EAGAIN 180 * to avoid unnecessary reconnects. 181 */ 182 rc = sock_sendmsg(ssocket, smb_msg); 183 if (rc == -EAGAIN || unlikely(rc == -EINTR && task_work_pending(current))) { 184 retries++; 185 if (retries >= 14 || 186 (!server->noblocksnd && (retries > 2))) { 187 cifs_server_dbg(VFS, "sends on sock %p stuck for 15 seconds\n", 188 ssocket); 189 return -EAGAIN; 190 } 191 msleep(1 << retries); 192 continue; 193 } 194 195 if (rc < 0) 196 return rc; 197 198 if (rc == 0) { 199 /* should never happen, letting socket clear before 200 retrying is our only obvious option here */ 201 cifs_server_dbg(VFS, "tcp sent no data\n"); 202 msleep(500); 203 continue; 204 } 205 206 /* send was at least partially successful */ 207 *sent += rc; 208 retries = 0; /* in case we get ENOSPC on the next send */ 209 } 210 return 0; 211 } 212 213 unsigned long 214 smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst) 215 { 216 unsigned int i; 217 struct kvec *iov; 218 int nvec; 219 unsigned long buflen = 0; 220 221 if (!is_smb1(server) && rqst->rq_nvec >= 2 && 222 rqst->rq_iov[0].iov_len == 4) { 223 iov = &rqst->rq_iov[1]; 224 nvec = rqst->rq_nvec - 1; 225 } else { 226 iov = rqst->rq_iov; 227 nvec = rqst->rq_nvec; 228 } 229 230 /* total up iov array first */ 231 for (i = 0; i < nvec; i++) 232 buflen += iov[i].iov_len; 233 234 buflen += iov_iter_count(&rqst->rq_iter); 235 return buflen; 236 } 237 238 int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, 239 struct smb_rqst *rqst) 240 { 241 int rc; 242 struct kvec *iov; 243 int n_vec; 244 unsigned int send_length = 0; 245 unsigned int i, j; 246 sigset_t mask, oldmask; 247 size_t total_len = 0, sent, size; 248 struct socket *ssocket = server->ssocket; 249 struct msghdr smb_msg = {}; 250 __be32 rfc1002_marker; 251 252 cifs_in_send_inc(server); 253 if (cifs_rdma_enabled(server)) { 254 /* return -EAGAIN when connecting or reconnecting */ 255 rc = -EAGAIN; 256 if (server->smbd_conn) 257 rc = smbd_send(server, num_rqst, rqst); 258 goto smbd_done; 259 } 260 261 rc = -EAGAIN; 262 if (ssocket == NULL) 263 goto out; 264 265 rc = -ERESTARTSYS; 266 if (fatal_signal_pending(current)) { 267 cifs_dbg(FYI, "signal pending before send request\n"); 268 goto out; 269 } 270 271 rc = 0; 272 /* cork the socket */ 273 tcp_sock_set_cork(ssocket->sk, true); 274 275 for (j = 0; j < num_rqst; j++) 276 send_length += smb_rqst_len(server, &rqst[j]); 277 rfc1002_marker = cpu_to_be32(send_length); 278 279 /* 280 * We should not allow signals to interrupt the network send because 281 * any partial send will cause session reconnects thus increasing 282 * latency of system calls and overload a server with unnecessary 283 * requests. 284 */ 285 286 sigfillset(&mask); 287 sigprocmask(SIG_BLOCK, &mask, &oldmask); 288 289 /* Generate a rfc1002 marker */ 290 { 291 struct kvec hiov = { 292 .iov_base = &rfc1002_marker, 293 .iov_len = 4 294 }; 295 iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, &hiov, 1, 4); 296 rc = smb_send_kvec(server, &smb_msg, &sent); 297 if (rc < 0) 298 goto unmask; 299 300 total_len += sent; 301 send_length += 4; 302 } 303 304 cifs_dbg(FYI, "Sending smb: smb_len=%u\n", send_length); 305 306 for (j = 0; j < num_rqst; j++) { 307 iov = rqst[j].rq_iov; 308 n_vec = rqst[j].rq_nvec; 309 310 size = 0; 311 for (i = 0; i < n_vec; i++) { 312 dump_smb(iov[i].iov_base, iov[i].iov_len); 313 size += iov[i].iov_len; 314 } 315 316 iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, iov, n_vec, size); 317 318 rc = smb_send_kvec(server, &smb_msg, &sent); 319 if (rc < 0) 320 goto unmask; 321 322 total_len += sent; 323 324 if (iov_iter_count(&rqst[j].rq_iter) > 0) { 325 smb_msg.msg_iter = rqst[j].rq_iter; 326 rc = smb_send_kvec(server, &smb_msg, &sent); 327 if (rc < 0) 328 break; 329 total_len += sent; 330 } 331 } 332 333 unmask: 334 sigprocmask(SIG_SETMASK, &oldmask, NULL); 335 336 /* 337 * If signal is pending but we have already sent the whole packet to 338 * the server we need to return success status to allow a corresponding 339 * mid entry to be kept in the pending requests queue thus allowing 340 * to handle responses from the server by the client. 341 * 342 * If only part of the packet has been sent there is no need to hide 343 * interrupt because the session will be reconnected anyway, so there 344 * won't be any response from the server to handle. 345 */ 346 347 if (signal_pending(current) && (total_len != send_length)) { 348 cifs_dbg(FYI, "signal is pending after attempt to send\n"); 349 rc = -ERESTARTSYS; 350 } 351 352 /* uncork it */ 353 tcp_sock_set_cork(ssocket->sk, false); 354 355 if ((total_len > 0) && (total_len != send_length)) { 356 cifs_dbg(FYI, "partial send (wanted=%u sent=%zu): terminating session\n", 357 send_length, total_len); 358 /* 359 * If we have only sent part of an SMB then the next SMB could 360 * be taken as the remainder of this one. We need to kill the 361 * socket so the server throws away the partial SMB 362 */ 363 cifs_signal_cifsd_for_reconnect(server, false); 364 trace_smb3_partial_send_reconnect(server->current_mid, 365 server->conn_id, server->hostname); 366 } 367 smbd_done: 368 /* 369 * there's hardly any use for the layers above to know the 370 * actual error code here. All they should do at this point is 371 * to retry the connection and hope it goes away. 372 */ 373 if (rc < 0 && rc != -EINTR && rc != -EAGAIN) { 374 cifs_server_dbg(VFS, "Error %d sending data on socket to server\n", 375 rc); 376 rc = -ECONNABORTED; 377 cifs_signal_cifsd_for_reconnect(server, false); 378 } else if (rc > 0) 379 rc = 0; 380 out: 381 cifs_in_send_dec(server); 382 return rc; 383 } 384 385 static int 386 smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, 387 struct smb_rqst *rqst, int flags) 388 { 389 struct smb2_transform_hdr tr_hdr; 390 struct smb_rqst new_rqst[MAX_COMPOUND] = {}; 391 struct kvec iov = { 392 .iov_base = &tr_hdr, 393 .iov_len = sizeof(tr_hdr), 394 }; 395 int rc; 396 397 if (flags & CIFS_COMPRESS_REQ) 398 return smb_compress(server, &rqst[0], __smb_send_rqst); 399 400 if (!(flags & CIFS_TRANSFORM_REQ)) 401 return __smb_send_rqst(server, num_rqst, rqst); 402 403 if (WARN_ON_ONCE(num_rqst > MAX_COMPOUND - 1)) 404 return smb_EIO1(smb_eio_trace_tx_max_compound, num_rqst); 405 406 if (!server->ops->init_transform_rq) { 407 cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n"); 408 return smb_EIO(smb_eio_trace_tx_need_transform); 409 } 410 411 new_rqst[0].rq_iov = &iov; 412 new_rqst[0].rq_nvec = 1; 413 414 rc = server->ops->init_transform_rq(server, num_rqst + 1, 415 new_rqst, rqst); 416 if (!rc) { 417 rc = __smb_send_rqst(server, num_rqst + 1, new_rqst); 418 smb3_free_compound_rqst(num_rqst, &new_rqst[1]); 419 } 420 return rc; 421 } 422 423 static int 424 wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, 425 const int timeout, const int flags, 426 unsigned int *instance) 427 { 428 long rc; 429 int *credits; 430 int optype; 431 long int t; 432 int scredits, in_flight; 433 434 if (timeout < 0) 435 t = MAX_JIFFY_OFFSET; 436 else 437 t = msecs_to_jiffies(timeout); 438 439 optype = flags & CIFS_OP_MASK; 440 441 *instance = 0; 442 443 credits = server->ops->get_credits_field(server, optype); 444 /* Since an echo is already inflight, no need to wait to send another */ 445 if (*credits <= 0 && optype == CIFS_ECHO_OP) 446 return -EAGAIN; 447 448 spin_lock(&server->req_lock); 449 if ((flags & CIFS_TIMEOUT_MASK) == CIFS_NON_BLOCKING) { 450 /* oplock breaks must not be held up */ 451 server->in_flight++; 452 if (server->in_flight > server->max_in_flight) 453 server->max_in_flight = server->in_flight; 454 *credits -= 1; 455 *instance = server->reconnect_instance; 456 scredits = *credits; 457 in_flight = server->in_flight; 458 spin_unlock(&server->req_lock); 459 460 trace_smb3_nblk_credits(server->current_mid, 461 server->conn_id, server->hostname, scredits, -1, in_flight); 462 cifs_dbg(FYI, "%s: remove %u credits total=%d\n", 463 __func__, 1, scredits); 464 465 return 0; 466 } 467 468 while (1) { 469 spin_unlock(&server->req_lock); 470 471 spin_lock(&server->srv_lock); 472 if (server->tcpStatus == CifsExiting) { 473 spin_unlock(&server->srv_lock); 474 return -ENOENT; 475 } 476 spin_unlock(&server->srv_lock); 477 478 spin_lock(&server->req_lock); 479 if (*credits < num_credits) { 480 scredits = *credits; 481 spin_unlock(&server->req_lock); 482 483 cifs_num_waiters_inc(server); 484 rc = wait_event_killable_timeout(server->request_q, 485 has_credits(server, credits, num_credits), t); 486 cifs_num_waiters_dec(server); 487 if (!rc) { 488 spin_lock(&server->req_lock); 489 scredits = *credits; 490 in_flight = server->in_flight; 491 spin_unlock(&server->req_lock); 492 493 trace_smb3_credit_timeout(server->current_mid, 494 server->conn_id, server->hostname, scredits, 495 num_credits, in_flight); 496 cifs_server_dbg(VFS, "wait timed out after %d ms\n", 497 timeout); 498 return -EBUSY; 499 } 500 if (rc == -ERESTARTSYS) 501 return -ERESTARTSYS; 502 spin_lock(&server->req_lock); 503 } else { 504 /* 505 * For normal commands, reserve the last MAX_COMPOUND 506 * credits to compound requests. 507 * Otherwise these compounds could be permanently 508 * starved for credits by single-credit requests. 509 * 510 * To prevent spinning CPU, block this thread until 511 * there are >MAX_COMPOUND credits available. 512 * But only do this is we already have a lot of 513 * credits in flight to avoid triggering this check 514 * for servers that are slow to hand out credits on 515 * new sessions. 516 */ 517 if (!optype && num_credits == 1 && 518 server->in_flight > 2 * MAX_COMPOUND && 519 *credits <= MAX_COMPOUND) { 520 spin_unlock(&server->req_lock); 521 522 cifs_num_waiters_inc(server); 523 rc = wait_event_killable_timeout( 524 server->request_q, 525 has_credits(server, credits, 526 MAX_COMPOUND + 1), 527 t); 528 cifs_num_waiters_dec(server); 529 if (!rc) { 530 spin_lock(&server->req_lock); 531 scredits = *credits; 532 in_flight = server->in_flight; 533 spin_unlock(&server->req_lock); 534 535 trace_smb3_credit_timeout( 536 server->current_mid, 537 server->conn_id, server->hostname, 538 scredits, num_credits, in_flight); 539 cifs_server_dbg(VFS, "wait timed out after %d ms\n", 540 timeout); 541 return -EBUSY; 542 } 543 if (rc == -ERESTARTSYS) 544 return -ERESTARTSYS; 545 spin_lock(&server->req_lock); 546 continue; 547 } 548 549 /* 550 * Can not count locking commands against total 551 * as they are allowed to block on server. 552 */ 553 554 /* update # of requests on the wire to server */ 555 if ((flags & CIFS_TIMEOUT_MASK) != CIFS_BLOCKING_OP) { 556 *credits -= num_credits; 557 server->in_flight += num_credits; 558 if (server->in_flight > server->max_in_flight) 559 server->max_in_flight = server->in_flight; 560 *instance = server->reconnect_instance; 561 } 562 scredits = *credits; 563 in_flight = server->in_flight; 564 spin_unlock(&server->req_lock); 565 566 trace_smb3_waitff_credits(server->current_mid, 567 server->conn_id, server->hostname, scredits, 568 -(num_credits), in_flight); 569 cifs_dbg(FYI, "%s: remove %u credits total=%d\n", 570 __func__, num_credits, scredits); 571 break; 572 } 573 } 574 return 0; 575 } 576 577 int wait_for_free_request(struct TCP_Server_Info *server, const int flags, 578 unsigned int *instance) 579 { 580 return wait_for_free_credits(server, 1, -1, flags, 581 instance); 582 } 583 584 static int 585 wait_for_compound_request(struct TCP_Server_Info *server, int num, 586 const int flags, unsigned int *instance) 587 { 588 int *credits; 589 int scredits, in_flight; 590 591 credits = server->ops->get_credits_field(server, flags & CIFS_OP_MASK); 592 593 spin_lock(&server->req_lock); 594 scredits = *credits; 595 in_flight = server->in_flight; 596 597 if (*credits < num) { 598 /* 599 * If the server is tight on resources or just gives us less 600 * credits for other reasons (e.g. requests are coming out of 601 * order and the server delays granting more credits until it 602 * processes a missing mid) and we exhausted most available 603 * credits there may be situations when we try to send 604 * a compound request but we don't have enough credits. At this 605 * point the client needs to decide if it should wait for 606 * additional credits or fail the request. If at least one 607 * request is in flight there is a high probability that the 608 * server will return enough credits to satisfy this compound 609 * request. 610 * 611 * Return immediately if no requests in flight since we will be 612 * stuck on waiting for credits. 613 */ 614 if (server->in_flight == 0) { 615 spin_unlock(&server->req_lock); 616 trace_smb3_insufficient_credits(server->current_mid, 617 server->conn_id, server->hostname, scredits, 618 num, in_flight); 619 cifs_dbg(FYI, "%s: %d requests in flight, needed %d total=%d\n", 620 __func__, in_flight, num, scredits); 621 return -EDEADLK; 622 } 623 } 624 spin_unlock(&server->req_lock); 625 626 return wait_for_free_credits(server, num, 60000, flags, 627 instance); 628 } 629 630 int 631 cifs_wait_mtu_credits(struct TCP_Server_Info *server, size_t size, 632 size_t *num, struct cifs_credits *credits) 633 { 634 *num = size; 635 credits->value = 0; 636 credits->instance = server->reconnect_instance; 637 return 0; 638 } 639 640 int wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *mid) 641 { 642 unsigned int sleep_state = TASK_KILLABLE; 643 int error; 644 645 if (mid->sr_flags & CIFS_INTERRUPTIBLE_WAIT) 646 sleep_state = TASK_INTERRUPTIBLE; 647 648 error = wait_event_state(server->response_q, 649 mid->mid_state != MID_REQUEST_SUBMITTED && 650 mid->mid_state != MID_RESPONSE_RECEIVED, 651 (sleep_state | TASK_FREEZABLE_UNSAFE)); 652 if (error < 0) 653 return -ERESTARTSYS; 654 655 return 0; 656 } 657 658 /* 659 * Send a SMB request and set the callback function in the mid to handle 660 * the result. Caller is responsible for dealing with timeouts. 661 */ 662 int 663 cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, 664 mid_receive_t receive, mid_callback_t callback, 665 mid_handle_t handle, void *cbdata, const int flags, 666 const struct cifs_credits *exist_credits) 667 { 668 int rc; 669 struct mid_q_entry *mid; 670 struct cifs_credits credits = { .value = 0, .instance = 0 }; 671 unsigned int instance; 672 int optype; 673 674 optype = flags & CIFS_OP_MASK; 675 676 if ((flags & CIFS_HAS_CREDITS) == 0) { 677 rc = wait_for_free_request(server, flags, &instance); 678 if (rc) 679 return rc; 680 credits.value = 1; 681 credits.instance = instance; 682 } else 683 instance = exist_credits->instance; 684 685 cifs_server_lock(server); 686 687 /* 688 * We can't use credits obtained from the previous session to send this 689 * request. Check if there were reconnects after we obtained credits and 690 * return -EAGAIN in such cases to let callers handle it. 691 */ 692 if (instance != server->reconnect_instance) { 693 cifs_server_unlock(server); 694 add_credits_and_wake_if(server, &credits, optype); 695 return -EAGAIN; 696 } 697 698 mid = server->ops->setup_async_request(server, rqst); 699 if (IS_ERR(mid)) { 700 cifs_server_unlock(server); 701 add_credits_and_wake_if(server, &credits, optype); 702 return PTR_ERR(mid); 703 } 704 705 mid->sr_flags = flags; 706 mid->receive = receive; 707 mid->callback = callback; 708 mid->callback_data = cbdata; 709 mid->handle = handle; 710 mid->mid_state = MID_REQUEST_SUBMITTED; 711 712 /* put it on the pending_mid_q */ 713 spin_lock(&server->mid_queue_lock); 714 list_add_tail(&mid->qhead, &server->pending_mid_q); 715 spin_unlock(&server->mid_queue_lock); 716 717 /* 718 * Need to store the time in mid before calling I/O. For call_async, 719 * I/O response may come back and free the mid entry on another thread. 720 */ 721 cifs_save_when_sent(mid); 722 rc = smb_send_rqst(server, 1, rqst, flags); 723 724 if (rc < 0) { 725 revert_current_mid(server, mid->credits); 726 server->sequence_number -= 2; 727 delete_mid(server, mid); 728 } 729 730 cifs_server_unlock(server); 731 732 if (rc == 0) 733 return 0; 734 735 add_credits_and_wake_if(server, &credits, optype); 736 return rc; 737 } 738 739 int cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) 740 { 741 int rc = 0; 742 743 cifs_dbg(FYI, "%s: cmd=%d mid=%llu state=%d\n", 744 __func__, le16_to_cpu(mid->command), mid->mid, mid->mid_state); 745 746 spin_lock(&server->mid_queue_lock); 747 switch (mid->mid_state) { 748 case MID_RESPONSE_READY: 749 spin_unlock(&server->mid_queue_lock); 750 return rc; 751 case MID_RETRY_NEEDED: 752 rc = -EAGAIN; 753 break; 754 case MID_RESPONSE_MALFORMED: 755 rc = smb_EIO(smb_eio_trace_rx_sync_mid_malformed); 756 break; 757 case MID_SHUTDOWN: 758 rc = -EHOSTDOWN; 759 break; 760 case MID_RC: 761 rc = mid->mid_rc; 762 break; 763 default: 764 if (mid->deleted_from_q == false) { 765 list_del_init(&mid->qhead); 766 mid->deleted_from_q = true; 767 } 768 spin_unlock(&server->mid_queue_lock); 769 cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n", 770 __func__, mid->mid, mid->mid_state); 771 rc = smb_EIO1(smb_eio_trace_rx_sync_mid_invalid, mid->mid_state); 772 goto sync_mid_done; 773 } 774 spin_unlock(&server->mid_queue_lock); 775 776 sync_mid_done: 777 release_mid(server, mid); 778 return rc; 779 } 780 781 static void 782 cifs_compound_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) 783 { 784 struct cifs_credits credits = { 785 .value = server->ops->get_credits(mid), 786 .instance = server->reconnect_instance, 787 }; 788 789 add_credits(server, &credits, mid->optype); 790 791 if (mid->mid_state == MID_RESPONSE_RECEIVED) 792 mid->mid_state = MID_RESPONSE_READY; 793 } 794 795 static void 796 cifs_compound_last_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) 797 { 798 cifs_compound_callback(server, mid); 799 cifs_wake_up_task(server, mid); 800 } 801 802 static void 803 cifs_cancelled_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) 804 { 805 cifs_compound_callback(server, mid); 806 release_mid(server, mid); 807 } 808 809 /* 810 * cifs_pick_channel - pick an eligible channel for network operations 811 * 812 * @ses: session reference 813 * 814 * Select an eligible channel (not terminating and not marked as needing 815 * reconnect), preferring the least loaded one. If no eligible channel is 816 * found, fall back to the primary channel (index 0). 817 * 818 * Return: TCP_Server_Info pointer for the chosen channel, or NULL if @ses is 819 * NULL. 820 */ 821 struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) 822 { 823 uint index = 0; 824 unsigned int min_in_flight = UINT_MAX; 825 struct TCP_Server_Info *server = NULL; 826 int i, start, cur; 827 828 if (!ses) 829 return NULL; 830 831 spin_lock(&ses->chan_lock); 832 start = atomic_inc_return(&ses->chan_seq); 833 for (i = 0; i < ses->chan_count; i++) { 834 cur = (start + i) % ses->chan_count; 835 server = ses->chans[cur].server; 836 if (!server || server->terminate) 837 continue; 838 839 if (CIFS_CHAN_NEEDS_RECONNECT(ses, cur)) 840 continue; 841 842 /* 843 * strictly speaking, we should pick up req_lock to read 844 * server->in_flight. But it shouldn't matter much here if we 845 * race while reading this data. The worst that can happen is 846 * that we could use a channel that's not least loaded. Avoiding 847 * taking the lock could help reduce wait time, which is 848 * important for this function 849 */ 850 if (server->in_flight < min_in_flight) { 851 min_in_flight = server->in_flight; 852 index = cur; 853 } 854 } 855 856 server = ses->chans[index].server; 857 spin_unlock(&ses->chan_lock); 858 859 return server; 860 } 861 862 int 863 compound_send_recv(const unsigned int xid, struct cifs_ses *ses, 864 struct TCP_Server_Info *server, 865 const int flags, const int num_rqst, struct smb_rqst *rqst, 866 int *resp_buf_type, struct kvec *resp_iov) 867 { 868 int i, j, optype, rc = 0; 869 struct mid_q_entry *mid[MAX_COMPOUND]; 870 bool cancelled_mid[MAX_COMPOUND] = {false}; 871 struct cifs_credits credits[MAX_COMPOUND] = { 872 { .value = 0, .instance = 0 } 873 }; 874 unsigned int instance; 875 char *buf; 876 877 optype = flags & CIFS_OP_MASK; 878 879 for (i = 0; i < num_rqst; i++) 880 resp_buf_type[i] = CIFS_NO_BUFFER; /* no response buf yet */ 881 882 if (!ses || !ses->server || !server) { 883 cifs_dbg(VFS, "Null session\n"); 884 return smb_EIO(smb_eio_trace_null_pointers); 885 } 886 887 spin_lock(&server->srv_lock); 888 if (server->tcpStatus == CifsExiting) { 889 spin_unlock(&server->srv_lock); 890 return -ENOENT; 891 } 892 spin_unlock(&server->srv_lock); 893 894 /* 895 * Wait for all the requests to become available. 896 * This approach still leaves the possibility to be stuck waiting for 897 * credits if the server doesn't grant credits to the outstanding 898 * requests and if the client is completely idle, not generating any 899 * other requests. 900 * This can be handled by the eventual session reconnect. 901 */ 902 rc = wait_for_compound_request(server, num_rqst, flags, 903 &instance); 904 if (rc) 905 return rc; 906 907 for (i = 0; i < num_rqst; i++) { 908 credits[i].value = 1; 909 credits[i].instance = instance; 910 } 911 912 /* 913 * Make sure that we sign in the same order that we send on this socket 914 * and avoid races inside tcp sendmsg code that could cause corruption 915 * of smb data. 916 */ 917 918 cifs_server_lock(server); 919 920 /* 921 * All the parts of the compound chain belong obtained credits from the 922 * same session. We can not use credits obtained from the previous 923 * session to send this request. Check if there were reconnects after 924 * we obtained credits and return -EAGAIN in such cases to let callers 925 * handle it. 926 */ 927 if (instance != server->reconnect_instance) { 928 cifs_server_unlock(server); 929 for (j = 0; j < num_rqst; j++) 930 add_credits(server, &credits[j], optype); 931 return -EAGAIN; 932 } 933 934 for (i = 0; i < num_rqst; i++) { 935 mid[i] = server->ops->setup_request(ses, server, &rqst[i]); 936 if (IS_ERR(mid[i])) { 937 revert_current_mid(server, i); 938 for (j = 0; j < i; j++) 939 delete_mid(server, mid[j]); 940 cifs_server_unlock(server); 941 942 /* Update # of requests on wire to server */ 943 for (j = 0; j < num_rqst; j++) 944 add_credits(server, &credits[j], optype); 945 return PTR_ERR(mid[i]); 946 } 947 948 mid[i]->sr_flags = flags; 949 mid[i]->mid_state = MID_REQUEST_SUBMITTED; 950 mid[i]->optype = optype; 951 /* 952 * Invoke callback for every part of the compound chain 953 * to calculate credits properly. Wake up this thread only when 954 * the last element is received. 955 */ 956 if (i < num_rqst - 1) 957 mid[i]->callback = cifs_compound_callback; 958 else 959 mid[i]->callback = cifs_compound_last_callback; 960 } 961 rc = smb_send_rqst(server, num_rqst, rqst, flags); 962 963 for (i = 0; i < num_rqst; i++) 964 cifs_save_when_sent(mid[i]); 965 966 if (rc < 0) { 967 revert_current_mid(server, num_rqst); 968 server->sequence_number -= 2; 969 } 970 971 cifs_server_unlock(server); 972 973 /* 974 * If sending failed for some reason or it is an oplock break that we 975 * will not receive a response to - return credits back 976 */ 977 if (rc < 0 || (flags & CIFS_NO_SRV_RSP)) { 978 for (i = 0; i < num_rqst; i++) 979 add_credits(server, &credits[i], optype); 980 goto out; 981 } 982 983 /* 984 * At this point the request is passed to the network stack - we assume 985 * that any credits taken from the server structure on the client have 986 * been spent and we can't return them back. Once we receive responses 987 * we will collect credits granted by the server in the mid callbacks 988 * and add those credits to the server structure. 989 */ 990 991 /* 992 * Compounding is never used during session establish. 993 */ 994 spin_lock(&ses->ses_lock); 995 if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) { 996 spin_unlock(&ses->ses_lock); 997 998 if (WARN_ON_ONCE(num_rqst != 1 || !resp_iov)) 999 return -EINVAL; 1000 1001 cifs_server_lock(server); 1002 smb311_update_preauth_hash(ses, server, rqst[0].rq_iov, rqst[0].rq_nvec); 1003 cifs_server_unlock(server); 1004 1005 spin_lock(&ses->ses_lock); 1006 } 1007 spin_unlock(&ses->ses_lock); 1008 1009 for (i = 0; i < num_rqst; i++) { 1010 rc = wait_for_response(server, mid[i]); 1011 if (rc != 0) 1012 break; 1013 } 1014 if (rc != 0) { 1015 for (; i < num_rqst; i++) { 1016 cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n", 1017 mid[i]->mid, le16_to_cpu(mid[i]->command)); 1018 send_cancel(ses, server, &rqst[i], mid[i], xid); 1019 spin_lock(&mid[i]->mid_lock); 1020 mid[i]->wait_cancelled = true; 1021 if (mid[i]->mid_state == MID_REQUEST_SUBMITTED || 1022 mid[i]->mid_state == MID_RESPONSE_RECEIVED) { 1023 mid[i]->callback = cifs_cancelled_callback; 1024 cancelled_mid[i] = true; 1025 credits[i].value = 0; 1026 } 1027 spin_unlock(&mid[i]->mid_lock); 1028 } 1029 } 1030 1031 for (i = 0; i < num_rqst; i++) { 1032 if (rc < 0) 1033 goto out; 1034 1035 rc = cifs_sync_mid_result(mid[i], server); 1036 if (rc != 0) { 1037 /* mark this mid as cancelled to not free it below */ 1038 cancelled_mid[i] = true; 1039 goto out; 1040 } 1041 1042 if (!mid[i]->resp_buf || 1043 mid[i]->mid_state != MID_RESPONSE_READY) { 1044 rc = smb_EIO1(smb_eio_trace_rx_mid_unready, mid[i]->mid_state); 1045 cifs_dbg(FYI, "Bad MID state?\n"); 1046 goto out; 1047 } 1048 1049 rc = server->ops->check_receive(mid[i], server, 1050 flags & CIFS_LOG_ERROR); 1051 1052 if (resp_iov) { 1053 buf = (char *)mid[i]->resp_buf; 1054 resp_iov[i].iov_base = buf; 1055 resp_iov[i].iov_len = mid[i]->resp_buf_size; 1056 1057 if (mid[i]->large_buf) 1058 resp_buf_type[i] = CIFS_LARGE_BUFFER; 1059 else 1060 resp_buf_type[i] = CIFS_SMALL_BUFFER; 1061 1062 /* mark it so buf will not be freed by delete_mid */ 1063 if ((flags & CIFS_NO_RSP_BUF) == 0) 1064 mid[i]->resp_buf = NULL; 1065 } 1066 } 1067 1068 /* 1069 * Compounding is never used during session establish. 1070 */ 1071 spin_lock(&ses->ses_lock); 1072 if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) { 1073 struct kvec iov = { 1074 .iov_base = resp_iov[0].iov_base, 1075 .iov_len = resp_iov[0].iov_len 1076 }; 1077 spin_unlock(&ses->ses_lock); 1078 cifs_server_lock(server); 1079 smb311_update_preauth_hash(ses, server, &iov, 1); 1080 cifs_server_unlock(server); 1081 spin_lock(&ses->ses_lock); 1082 } 1083 spin_unlock(&ses->ses_lock); 1084 1085 out: 1086 /* 1087 * This will dequeue all mids. After this it is important that the 1088 * demultiplex_thread will not process any of these mids any further. 1089 * This is prevented above by using a noop callback that will not 1090 * wake this thread except for the very last PDU. 1091 */ 1092 for (i = 0; i < num_rqst; i++) { 1093 if (!cancelled_mid[i]) 1094 delete_mid(server, mid[i]); 1095 } 1096 1097 return rc; 1098 } 1099 1100 int 1101 cifs_send_recv(const unsigned int xid, struct cifs_ses *ses, 1102 struct TCP_Server_Info *server, 1103 struct smb_rqst *rqst, int *resp_buf_type, const int flags, 1104 struct kvec *resp_iov) 1105 { 1106 return compound_send_recv(xid, ses, server, flags, 1, 1107 rqst, resp_buf_type, resp_iov); 1108 } 1109 1110 1111 /* 1112 * Discard any remaining data in the current SMB. To do this, we borrow the 1113 * current bigbuf. 1114 */ 1115 int 1116 cifs_discard_remaining_data(struct TCP_Server_Info *server) 1117 { 1118 unsigned int rfclen = server->pdu_size; 1119 size_t remaining = rfclen - server->total_read; 1120 1121 while (remaining > 0) { 1122 ssize_t length; 1123 1124 length = cifs_discard_from_socket(server, 1125 min_t(size_t, remaining, 1126 CIFSMaxBufSize + MAX_HEADER_SIZE(server))); 1127 if (length < 0) 1128 return length; 1129 server->total_read += length; 1130 remaining -= length; 1131 } 1132 1133 return 0; 1134 } 1135 1136 static int 1137 __cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid, 1138 bool malformed) 1139 { 1140 int length; 1141 1142 length = cifs_discard_remaining_data(server); 1143 dequeue_mid(server, mid, malformed); 1144 mid->resp_buf = server->smallbuf; 1145 server->smallbuf = NULL; 1146 return length; 1147 } 1148 1149 static int 1150 cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) 1151 { 1152 struct cifs_io_subrequest *rdata = mid->callback_data; 1153 1154 return __cifs_readv_discard(server, mid, rdata->result); 1155 } 1156 1157 int 1158 cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) 1159 { 1160 int length, len; 1161 unsigned int data_offset, data_len; 1162 struct cifs_io_subrequest *rdata = mid->callback_data; 1163 char *buf = server->smallbuf; 1164 unsigned int buflen = server->pdu_size; 1165 bool use_rdma_mr = false; 1166 1167 cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%zu\n", 1168 __func__, mid->mid, rdata->subreq.start, rdata->subreq.len); 1169 1170 /* 1171 * read the rest of READ_RSP header (sans Data array), or whatever we 1172 * can if there's not enough data. At this point, we've read down to 1173 * the Mid. 1174 */ 1175 len = min_t(unsigned int, buflen, server->vals->read_rsp_size) - 1176 HEADER_SIZE(server) + 1; 1177 1178 length = cifs_read_from_socket(server, 1179 buf + HEADER_SIZE(server) - 1, len); 1180 if (length < 0) 1181 return length; 1182 server->total_read += length; 1183 1184 if (server->ops->is_session_expired && 1185 server->ops->is_session_expired(buf)) { 1186 cifs_reconnect(server, true); 1187 return -1; 1188 } 1189 1190 if (server->ops->is_status_pending && 1191 server->ops->is_status_pending(buf, server)) { 1192 cifs_discard_remaining_data(server); 1193 return -1; 1194 } 1195 1196 /* set up first two iov for signature check and to get credits */ 1197 rdata->iov[0].iov_base = buf; 1198 rdata->iov[0].iov_len = server->total_read; 1199 cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n", 1200 rdata->iov[0].iov_base, rdata->iov[0].iov_len); 1201 1202 /* Was the SMB read successful? */ 1203 rdata->result = server->ops->map_error(buf, false); 1204 if (rdata->result != 0) { 1205 cifs_dbg(FYI, "%s: server returned error %d\n", 1206 __func__, rdata->result); 1207 /* normal error on read response */ 1208 return __cifs_readv_discard(server, mid, false); 1209 } 1210 1211 /* Is there enough to get to the rest of the READ_RSP header? */ 1212 if (server->total_read < server->vals->read_rsp_size) { 1213 cifs_dbg(FYI, "%s: server returned short header. got=%u expected=%zu\n", 1214 __func__, server->total_read, 1215 server->vals->read_rsp_size); 1216 rdata->result = smb_EIO2(smb_eio_trace_read_rsp_short, 1217 server->total_read, server->vals->read_rsp_size); 1218 return cifs_readv_discard(server, mid); 1219 } 1220 1221 data_offset = server->ops->read_data_offset(buf); 1222 if (data_offset < server->total_read) { 1223 /* 1224 * win2k8 sometimes sends an offset of 0 when the read 1225 * is beyond the EOF. Treat it as if the data starts just after 1226 * the header. 1227 */ 1228 cifs_dbg(FYI, "%s: data offset (%u) inside read response header\n", 1229 __func__, data_offset); 1230 data_offset = server->total_read; 1231 } else if (data_offset > MAX_CIFS_SMALL_BUFFER_SIZE) { 1232 /* data_offset is beyond the end of smallbuf */ 1233 cifs_dbg(FYI, "%s: data offset (%u) beyond end of smallbuf\n", 1234 __func__, data_offset); 1235 rdata->result = smb_EIO1(smb_eio_trace_read_overlarge, 1236 data_offset); 1237 return cifs_readv_discard(server, mid); 1238 } 1239 1240 cifs_dbg(FYI, "%s: total_read=%u data_offset=%u\n", 1241 __func__, server->total_read, data_offset); 1242 1243 len = data_offset - server->total_read; 1244 if (len > 0) { 1245 /* read any junk before data into the rest of smallbuf */ 1246 length = cifs_read_from_socket(server, 1247 buf + server->total_read, len); 1248 if (length < 0) 1249 return length; 1250 server->total_read += length; 1251 rdata->iov[0].iov_len = server->total_read; 1252 } 1253 1254 /* how much data is in the response? */ 1255 #ifdef CONFIG_CIFS_SMB_DIRECT 1256 use_rdma_mr = rdata->mr; 1257 #endif 1258 data_len = server->ops->read_data_length(buf, use_rdma_mr); 1259 if (!use_rdma_mr && (data_offset + data_len > buflen)) { 1260 /* data_len is corrupt -- discard frame */ 1261 rdata->result = smb_EIO2(smb_eio_trace_read_rsp_malformed, 1262 data_offset + data_len, buflen); 1263 return cifs_readv_discard(server, mid); 1264 } 1265 1266 #ifdef CONFIG_CIFS_SMB_DIRECT 1267 if (rdata->mr) 1268 length = data_len; /* An RDMA read is already done. */ 1269 else 1270 #endif 1271 length = cifs_read_iter_from_socket(server, &rdata->subreq.io_iter, 1272 data_len); 1273 if (length > 0) 1274 rdata->got_bytes += length; 1275 server->total_read += length; 1276 1277 cifs_dbg(FYI, "total_read=%u buflen=%u remaining=%u\n", 1278 server->total_read, buflen, data_len); 1279 1280 /* discard anything left over */ 1281 if (server->total_read < buflen) 1282 return cifs_readv_discard(server, mid); 1283 1284 dequeue_mid(server, mid, false); 1285 mid->resp_buf = server->smallbuf; 1286 server->smallbuf = NULL; 1287 return length; 1288 } 1289