1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * 4 * Copyright (C) International Business Machines Corp., 2002,2008 5 * Author(s): Steve French (sfrench@us.ibm.com) 6 * Jeremy Allison (jra@samba.org) 2006. 7 * 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/list.h> 12 #include <linux/gfp.h> 13 #include <linux/wait.h> 14 #include <linux/net.h> 15 #include <linux/delay.h> 16 #include <linux/freezer.h> 17 #include <linux/tcp.h> 18 #include <linux/bvec.h> 19 #include <linux/highmem.h> 20 #include <linux/uaccess.h> 21 #include <linux/processor.h> 22 #include <linux/mempool.h> 23 #include <linux/sched/signal.h> 24 #include <linux/task_io_accounting_ops.h> 25 #include <linux/task_work.h> 26 #include "cifspdu.h" 27 #include "cifsglob.h" 28 #include "cifsproto.h" 29 #include "cifs_debug.h" 30 #include "smb2proto.h" 31 #include "smbdirect.h" 32 #include "compress.h" 33 34 void 35 cifs_wake_up_task(struct TCP_Server_Info *server, struct mid_q_entry *mid) 36 { 37 if (mid->mid_state == MID_RESPONSE_RECEIVED) 38 mid->mid_state = MID_RESPONSE_READY; 39 wake_up_process(mid->callback_data); 40 } 41 42 void __release_mid(struct TCP_Server_Info *server, struct mid_q_entry *midEntry) 43 { 44 #ifdef CONFIG_CIFS_STATS2 45 __le16 command = server->vals->lock_cmd; 46 __u16 smb_cmd = le16_to_cpu(midEntry->command); 47 unsigned long now; 48 unsigned long roundtrip_time; 49 #endif 50 51 if (midEntry->resp_buf && (midEntry->wait_cancelled) && 52 (midEntry->mid_state == MID_RESPONSE_RECEIVED || 53 midEntry->mid_state == MID_RESPONSE_READY) && 54 server->ops->handle_cancelled_mid) 55 server->ops->handle_cancelled_mid(midEntry, server); 56 57 midEntry->mid_state = MID_FREE; 58 atomic_dec(&mid_count); 59 if (midEntry->large_buf) 60 cifs_buf_release(midEntry->resp_buf); 61 else 62 cifs_small_buf_release(midEntry->resp_buf); 63 #ifdef CONFIG_CIFS_STATS2 64 now = jiffies; 65 if (now < midEntry->when_alloc) 66 cifs_server_dbg(VFS, "Invalid mid allocation time\n"); 67 roundtrip_time = now - midEntry->when_alloc; 68 69 if (smb_cmd < NUMBER_OF_SMB2_COMMANDS) { 70 if (atomic_read(&server->num_cmds[smb_cmd]) == 0) { 71 server->slowest_cmd[smb_cmd] = roundtrip_time; 72 server->fastest_cmd[smb_cmd] = roundtrip_time; 73 } else { 74 if (server->slowest_cmd[smb_cmd] < roundtrip_time) 75 server->slowest_cmd[smb_cmd] = roundtrip_time; 76 else if (server->fastest_cmd[smb_cmd] > roundtrip_time) 77 server->fastest_cmd[smb_cmd] = roundtrip_time; 78 } 79 cifs_stats_inc(&server->num_cmds[smb_cmd]); 80 server->time_per_cmd[smb_cmd] += roundtrip_time; 81 } 82 /* 83 * commands taking longer than one second (default) can be indications 84 * that something is wrong, unless it is quite a slow link or a very 85 * busy server. Note that this calc is unlikely or impossible to wrap 86 * as long as slow_rsp_threshold is not set way above recommended max 87 * value (32767 ie 9 hours) and is generally harmless even if wrong 88 * since only affects debug counters - so leaving the calc as simple 89 * comparison rather than doing multiple conversions and overflow 90 * checks 91 */ 92 if ((slow_rsp_threshold != 0) && 93 time_after(now, midEntry->when_alloc + (slow_rsp_threshold * HZ)) && 94 (midEntry->command != command)) { 95 /* 96 * smb2slowcmd[NUMBER_OF_SMB2_COMMANDS] counts by command 97 * NB: le16_to_cpu returns unsigned so can not be negative below 98 */ 99 if (smb_cmd < NUMBER_OF_SMB2_COMMANDS) 100 cifs_stats_inc(&server->smb2slowcmd[smb_cmd]); 101 102 trace_smb3_slow_rsp(smb_cmd, midEntry->mid, midEntry->pid, 103 midEntry->when_sent, midEntry->when_received); 104 if (cifsFYI & CIFS_TIMER) { 105 pr_debug("slow rsp: cmd %d mid %llu", 106 midEntry->command, midEntry->mid); 107 cifs_info("A: 0x%lx S: 0x%lx R: 0x%lx\n", 108 now - midEntry->when_alloc, 109 now - midEntry->when_sent, 110 now - midEntry->when_received); 111 } 112 } 113 #endif 114 put_task_struct(midEntry->creator); 115 116 mempool_free(midEntry, &cifs_mid_pool); 117 } 118 119 void 120 delete_mid(struct TCP_Server_Info *server, struct mid_q_entry *mid) 121 { 122 spin_lock(&server->mid_queue_lock); 123 124 if (!mid->deleted_from_q) { 125 list_del_init(&mid->qhead); 126 mid->deleted_from_q = true; 127 } 128 spin_unlock(&server->mid_queue_lock); 129 130 release_mid(server, mid); 131 } 132 133 /* 134 * smb_send_kvec - send an array of kvecs to the server 135 * @server: Server to send the data to 136 * @smb_msg: Message to send 137 * @sent: amount of data sent on socket is stored here 138 * 139 * Our basic "send data to server" function. Should be called with srv_mutex 140 * held. The caller is responsible for handling the results. 141 */ 142 int 143 smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg, 144 size_t *sent) 145 { 146 int rc = 0; 147 int retries = 0; 148 struct socket *ssocket = server->ssocket; 149 150 *sent = 0; 151 152 if (server->noblocksnd) 153 smb_msg->msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; 154 else 155 smb_msg->msg_flags = MSG_NOSIGNAL; 156 157 while (msg_data_left(smb_msg)) { 158 /* 159 * If blocking send, we try 3 times, since each can block 160 * for 5 seconds. For nonblocking we have to try more 161 * but wait increasing amounts of time allowing time for 162 * socket to clear. The overall time we wait in either 163 * case to send on the socket is about 15 seconds. 164 * Similarly we wait for 15 seconds for a response from 165 * the server in SendReceive[2] for the server to send 166 * a response back for most types of requests (except 167 * SMB Write past end of file which can be slow, and 168 * blocking lock operations). NFS waits slightly longer 169 * than CIFS, but this can make it take longer for 170 * nonresponsive servers to be detected and 15 seconds 171 * is more than enough time for modern networks to 172 * send a packet. In most cases if we fail to send 173 * after the retries we will kill the socket and 174 * reconnect which may clear the network problem. 175 * 176 * Even if regular signals are masked, EINTR might be 177 * propagated from sk_stream_wait_memory() to here when 178 * TIF_NOTIFY_SIGNAL is used for task work. For example, 179 * certain io_uring completions will use that. Treat 180 * having EINTR with pending task work the same as EAGAIN 181 * to avoid unnecessary reconnects. 182 */ 183 rc = sock_sendmsg(ssocket, smb_msg); 184 if (rc == -EAGAIN || unlikely(rc == -EINTR && task_work_pending(current))) { 185 retries++; 186 if (retries >= 14 || 187 (!server->noblocksnd && (retries > 2))) { 188 cifs_server_dbg(VFS, "sends on sock %p stuck for 15 seconds\n", 189 ssocket); 190 return -EAGAIN; 191 } 192 msleep(1 << retries); 193 continue; 194 } 195 196 if (rc < 0) 197 return rc; 198 199 if (rc == 0) { 200 /* should never happen, letting socket clear before 201 retrying is our only obvious option here */ 202 cifs_server_dbg(VFS, "tcp sent no data\n"); 203 msleep(500); 204 continue; 205 } 206 207 /* send was at least partially successful */ 208 *sent += rc; 209 retries = 0; /* in case we get ENOSPC on the next send */ 210 } 211 return 0; 212 } 213 214 unsigned long 215 smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst) 216 { 217 unsigned int i; 218 struct kvec *iov; 219 int nvec; 220 unsigned long buflen = 0; 221 222 if (!is_smb1(server) && rqst->rq_nvec >= 2 && 223 rqst->rq_iov[0].iov_len == 4) { 224 iov = &rqst->rq_iov[1]; 225 nvec = rqst->rq_nvec - 1; 226 } else { 227 iov = rqst->rq_iov; 228 nvec = rqst->rq_nvec; 229 } 230 231 /* total up iov array first */ 232 for (i = 0; i < nvec; i++) 233 buflen += iov[i].iov_len; 234 235 buflen += iov_iter_count(&rqst->rq_iter); 236 return buflen; 237 } 238 239 int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, 240 struct smb_rqst *rqst) 241 { 242 int rc; 243 struct kvec *iov; 244 int n_vec; 245 unsigned int send_length = 0; 246 unsigned int i, j; 247 sigset_t mask, oldmask; 248 size_t total_len = 0, sent, size; 249 struct socket *ssocket = server->ssocket; 250 struct msghdr smb_msg = {}; 251 __be32 rfc1002_marker; 252 253 cifs_in_send_inc(server); 254 if (cifs_rdma_enabled(server)) { 255 /* return -EAGAIN when connecting or reconnecting */ 256 rc = -EAGAIN; 257 if (server->smbd_conn) 258 rc = smbd_send(server, num_rqst, rqst); 259 goto smbd_done; 260 } 261 262 rc = -EAGAIN; 263 if (ssocket == NULL) 264 goto out; 265 266 rc = -ERESTARTSYS; 267 if (fatal_signal_pending(current)) { 268 cifs_dbg(FYI, "signal pending before send request\n"); 269 goto out; 270 } 271 272 rc = 0; 273 /* cork the socket */ 274 tcp_sock_set_cork(ssocket->sk, true); 275 276 for (j = 0; j < num_rqst; j++) 277 send_length += smb_rqst_len(server, &rqst[j]); 278 rfc1002_marker = cpu_to_be32(send_length); 279 280 /* 281 * We should not allow signals to interrupt the network send because 282 * any partial send will cause session reconnects thus increasing 283 * latency of system calls and overload a server with unnecessary 284 * requests. 285 */ 286 287 sigfillset(&mask); 288 sigprocmask(SIG_BLOCK, &mask, &oldmask); 289 290 /* Generate a rfc1002 marker */ 291 { 292 struct kvec hiov = { 293 .iov_base = &rfc1002_marker, 294 .iov_len = 4 295 }; 296 iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, &hiov, 1, 4); 297 rc = smb_send_kvec(server, &smb_msg, &sent); 298 if (rc < 0) 299 goto unmask; 300 301 total_len += sent; 302 send_length += 4; 303 } 304 305 cifs_dbg(FYI, "Sending smb: smb_len=%u\n", send_length); 306 307 for (j = 0; j < num_rqst; j++) { 308 iov = rqst[j].rq_iov; 309 n_vec = rqst[j].rq_nvec; 310 311 size = 0; 312 for (i = 0; i < n_vec; i++) { 313 dump_smb(iov[i].iov_base, iov[i].iov_len); 314 size += iov[i].iov_len; 315 } 316 317 iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, iov, n_vec, size); 318 319 rc = smb_send_kvec(server, &smb_msg, &sent); 320 if (rc < 0) 321 goto unmask; 322 323 total_len += sent; 324 325 if (iov_iter_count(&rqst[j].rq_iter) > 0) { 326 smb_msg.msg_iter = rqst[j].rq_iter; 327 rc = smb_send_kvec(server, &smb_msg, &sent); 328 if (rc < 0) 329 break; 330 total_len += sent; 331 } 332 } 333 334 unmask: 335 sigprocmask(SIG_SETMASK, &oldmask, NULL); 336 337 /* 338 * If signal is pending but we have already sent the whole packet to 339 * the server we need to return success status to allow a corresponding 340 * mid entry to be kept in the pending requests queue thus allowing 341 * to handle responses from the server by the client. 342 * 343 * If only part of the packet has been sent there is no need to hide 344 * interrupt because the session will be reconnected anyway, so there 345 * won't be any response from the server to handle. 346 */ 347 348 if (signal_pending(current) && (total_len != send_length)) { 349 cifs_dbg(FYI, "signal is pending after attempt to send\n"); 350 rc = -ERESTARTSYS; 351 } 352 353 /* uncork it */ 354 tcp_sock_set_cork(ssocket->sk, false); 355 356 if ((total_len > 0) && (total_len != send_length)) { 357 cifs_dbg(FYI, "partial send (wanted=%u sent=%zu): terminating session\n", 358 send_length, total_len); 359 /* 360 * If we have only sent part of an SMB then the next SMB could 361 * be taken as the remainder of this one. We need to kill the 362 * socket so the server throws away the partial SMB 363 */ 364 cifs_signal_cifsd_for_reconnect(server, false); 365 trace_smb3_partial_send_reconnect(server->current_mid, 366 server->conn_id, server->hostname); 367 } 368 smbd_done: 369 /* 370 * there's hardly any use for the layers above to know the 371 * actual error code here. All they should do at this point is 372 * to retry the connection and hope it goes away. 373 */ 374 if (rc < 0 && rc != -EINTR && rc != -EAGAIN) { 375 cifs_server_dbg(VFS, "Error %d sending data on socket to server\n", 376 rc); 377 rc = -ECONNABORTED; 378 cifs_signal_cifsd_for_reconnect(server, false); 379 } else if (rc > 0) 380 rc = 0; 381 out: 382 cifs_in_send_dec(server); 383 return rc; 384 } 385 386 static int 387 smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, 388 struct smb_rqst *rqst, int flags) 389 { 390 struct smb2_transform_hdr tr_hdr; 391 struct smb_rqst new_rqst[MAX_COMPOUND] = {}; 392 struct kvec iov = { 393 .iov_base = &tr_hdr, 394 .iov_len = sizeof(tr_hdr), 395 }; 396 int rc; 397 398 if (flags & CIFS_COMPRESS_REQ) 399 return smb_compress(server, &rqst[0], __smb_send_rqst); 400 401 if (!(flags & CIFS_TRANSFORM_REQ)) 402 return __smb_send_rqst(server, num_rqst, rqst); 403 404 if (WARN_ON_ONCE(num_rqst > MAX_COMPOUND - 1)) 405 return smb_EIO1(smb_eio_trace_tx_max_compound, num_rqst); 406 407 if (!server->ops->init_transform_rq) { 408 cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n"); 409 return smb_EIO(smb_eio_trace_tx_need_transform); 410 } 411 412 new_rqst[0].rq_iov = &iov; 413 new_rqst[0].rq_nvec = 1; 414 415 rc = server->ops->init_transform_rq(server, num_rqst + 1, 416 new_rqst, rqst); 417 if (!rc) { 418 rc = __smb_send_rqst(server, num_rqst + 1, new_rqst); 419 smb3_free_compound_rqst(num_rqst, &new_rqst[1]); 420 } 421 return rc; 422 } 423 424 static int 425 wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, 426 const int timeout, const int flags, 427 unsigned int *instance) 428 { 429 long rc; 430 int *credits; 431 int optype; 432 long int t; 433 int scredits, in_flight; 434 435 if (timeout < 0) 436 t = MAX_JIFFY_OFFSET; 437 else 438 t = msecs_to_jiffies(timeout); 439 440 optype = flags & CIFS_OP_MASK; 441 442 *instance = 0; 443 444 credits = server->ops->get_credits_field(server, optype); 445 /* Since an echo is already inflight, no need to wait to send another */ 446 if (*credits <= 0 && optype == CIFS_ECHO_OP) 447 return -EAGAIN; 448 449 spin_lock(&server->req_lock); 450 if ((flags & CIFS_TIMEOUT_MASK) == CIFS_NON_BLOCKING) { 451 /* oplock breaks must not be held up */ 452 server->in_flight++; 453 if (server->in_flight > server->max_in_flight) 454 server->max_in_flight = server->in_flight; 455 *credits -= 1; 456 *instance = server->reconnect_instance; 457 scredits = *credits; 458 in_flight = server->in_flight; 459 spin_unlock(&server->req_lock); 460 461 trace_smb3_nblk_credits(server->current_mid, 462 server->conn_id, server->hostname, scredits, -1, in_flight); 463 cifs_dbg(FYI, "%s: remove %u credits total=%d\n", 464 __func__, 1, scredits); 465 466 return 0; 467 } 468 469 while (1) { 470 spin_unlock(&server->req_lock); 471 472 spin_lock(&server->srv_lock); 473 if (server->tcpStatus == CifsExiting) { 474 spin_unlock(&server->srv_lock); 475 return -ENOENT; 476 } 477 spin_unlock(&server->srv_lock); 478 479 spin_lock(&server->req_lock); 480 if (*credits < num_credits) { 481 scredits = *credits; 482 spin_unlock(&server->req_lock); 483 484 cifs_num_waiters_inc(server); 485 rc = wait_event_killable_timeout(server->request_q, 486 has_credits(server, credits, num_credits), t); 487 cifs_num_waiters_dec(server); 488 if (!rc) { 489 spin_lock(&server->req_lock); 490 scredits = *credits; 491 in_flight = server->in_flight; 492 spin_unlock(&server->req_lock); 493 494 trace_smb3_credit_timeout(server->current_mid, 495 server->conn_id, server->hostname, scredits, 496 num_credits, in_flight); 497 cifs_server_dbg(VFS, "wait timed out after %d ms\n", 498 timeout); 499 return -EBUSY; 500 } 501 if (rc == -ERESTARTSYS) 502 return -ERESTARTSYS; 503 spin_lock(&server->req_lock); 504 } else { 505 /* 506 * For normal commands, reserve the last MAX_COMPOUND 507 * credits to compound requests. 508 * Otherwise these compounds could be permanently 509 * starved for credits by single-credit requests. 510 * 511 * To prevent spinning CPU, block this thread until 512 * there are >MAX_COMPOUND credits available. 513 * But only do this is we already have a lot of 514 * credits in flight to avoid triggering this check 515 * for servers that are slow to hand out credits on 516 * new sessions. 517 */ 518 if (!optype && num_credits == 1 && 519 server->in_flight > 2 * MAX_COMPOUND && 520 *credits <= MAX_COMPOUND) { 521 spin_unlock(&server->req_lock); 522 523 cifs_num_waiters_inc(server); 524 rc = wait_event_killable_timeout( 525 server->request_q, 526 has_credits(server, credits, 527 MAX_COMPOUND + 1), 528 t); 529 cifs_num_waiters_dec(server); 530 if (!rc) { 531 spin_lock(&server->req_lock); 532 scredits = *credits; 533 in_flight = server->in_flight; 534 spin_unlock(&server->req_lock); 535 536 trace_smb3_credit_timeout( 537 server->current_mid, 538 server->conn_id, server->hostname, 539 scredits, num_credits, in_flight); 540 cifs_server_dbg(VFS, "wait timed out after %d ms\n", 541 timeout); 542 return -EBUSY; 543 } 544 if (rc == -ERESTARTSYS) 545 return -ERESTARTSYS; 546 spin_lock(&server->req_lock); 547 continue; 548 } 549 550 /* 551 * Can not count locking commands against total 552 * as they are allowed to block on server. 553 */ 554 555 /* update # of requests on the wire to server */ 556 if ((flags & CIFS_TIMEOUT_MASK) != CIFS_BLOCKING_OP) { 557 *credits -= num_credits; 558 server->in_flight += num_credits; 559 if (server->in_flight > server->max_in_flight) 560 server->max_in_flight = server->in_flight; 561 *instance = server->reconnect_instance; 562 } 563 scredits = *credits; 564 in_flight = server->in_flight; 565 spin_unlock(&server->req_lock); 566 567 trace_smb3_waitff_credits(server->current_mid, 568 server->conn_id, server->hostname, scredits, 569 -(num_credits), in_flight); 570 cifs_dbg(FYI, "%s: remove %u credits total=%d\n", 571 __func__, num_credits, scredits); 572 break; 573 } 574 } 575 return 0; 576 } 577 578 int wait_for_free_request(struct TCP_Server_Info *server, const int flags, 579 unsigned int *instance) 580 { 581 return wait_for_free_credits(server, 1, -1, flags, 582 instance); 583 } 584 585 static int 586 wait_for_compound_request(struct TCP_Server_Info *server, int num, 587 const int flags, unsigned int *instance) 588 { 589 int *credits; 590 int scredits, in_flight; 591 592 credits = server->ops->get_credits_field(server, flags & CIFS_OP_MASK); 593 594 spin_lock(&server->req_lock); 595 scredits = *credits; 596 in_flight = server->in_flight; 597 598 if (*credits < num) { 599 /* 600 * If the server is tight on resources or just gives us less 601 * credits for other reasons (e.g. requests are coming out of 602 * order and the server delays granting more credits until it 603 * processes a missing mid) and we exhausted most available 604 * credits there may be situations when we try to send 605 * a compound request but we don't have enough credits. At this 606 * point the client needs to decide if it should wait for 607 * additional credits or fail the request. If at least one 608 * request is in flight there is a high probability that the 609 * server will return enough credits to satisfy this compound 610 * request. 611 * 612 * Return immediately if no requests in flight since we will be 613 * stuck on waiting for credits. 614 */ 615 if (server->in_flight == 0) { 616 spin_unlock(&server->req_lock); 617 trace_smb3_insufficient_credits(server->current_mid, 618 server->conn_id, server->hostname, scredits, 619 num, in_flight); 620 cifs_dbg(FYI, "%s: %d requests in flight, needed %d total=%d\n", 621 __func__, in_flight, num, scredits); 622 return -EDEADLK; 623 } 624 } 625 spin_unlock(&server->req_lock); 626 627 return wait_for_free_credits(server, num, 60000, flags, 628 instance); 629 } 630 631 int 632 cifs_wait_mtu_credits(struct TCP_Server_Info *server, size_t size, 633 size_t *num, struct cifs_credits *credits) 634 { 635 *num = size; 636 credits->value = 0; 637 credits->instance = server->reconnect_instance; 638 return 0; 639 } 640 641 int wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *mid) 642 { 643 unsigned int sleep_state = TASK_KILLABLE; 644 int error; 645 646 if (mid->sr_flags & CIFS_INTERRUPTIBLE_WAIT) 647 sleep_state = TASK_INTERRUPTIBLE; 648 649 error = wait_event_state(server->response_q, 650 mid->mid_state != MID_REQUEST_SUBMITTED && 651 mid->mid_state != MID_RESPONSE_RECEIVED, 652 (sleep_state | TASK_FREEZABLE_UNSAFE)); 653 if (error < 0) 654 return -ERESTARTSYS; 655 656 return 0; 657 } 658 659 /* 660 * Send a SMB request and set the callback function in the mid to handle 661 * the result. Caller is responsible for dealing with timeouts. 662 */ 663 int 664 cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, 665 mid_receive_t receive, mid_callback_t callback, 666 mid_handle_t handle, void *cbdata, const int flags, 667 const struct cifs_credits *exist_credits) 668 { 669 int rc; 670 struct mid_q_entry *mid; 671 struct cifs_credits credits = { .value = 0, .instance = 0 }; 672 unsigned int instance; 673 int optype; 674 675 optype = flags & CIFS_OP_MASK; 676 677 if ((flags & CIFS_HAS_CREDITS) == 0) { 678 rc = wait_for_free_request(server, flags, &instance); 679 if (rc) 680 return rc; 681 credits.value = 1; 682 credits.instance = instance; 683 } else 684 instance = exist_credits->instance; 685 686 cifs_server_lock(server); 687 688 /* 689 * We can't use credits obtained from the previous session to send this 690 * request. Check if there were reconnects after we obtained credits and 691 * return -EAGAIN in such cases to let callers handle it. 692 */ 693 if (instance != server->reconnect_instance) { 694 cifs_server_unlock(server); 695 add_credits_and_wake_if(server, &credits, optype); 696 return -EAGAIN; 697 } 698 699 mid = server->ops->setup_async_request(server, rqst); 700 if (IS_ERR(mid)) { 701 cifs_server_unlock(server); 702 add_credits_and_wake_if(server, &credits, optype); 703 return PTR_ERR(mid); 704 } 705 706 mid->sr_flags = flags; 707 mid->receive = receive; 708 mid->callback = callback; 709 mid->callback_data = cbdata; 710 mid->handle = handle; 711 mid->mid_state = MID_REQUEST_SUBMITTED; 712 713 /* put it on the pending_mid_q */ 714 spin_lock(&server->mid_queue_lock); 715 list_add_tail(&mid->qhead, &server->pending_mid_q); 716 spin_unlock(&server->mid_queue_lock); 717 718 /* 719 * Need to store the time in mid before calling I/O. For call_async, 720 * I/O response may come back and free the mid entry on another thread. 721 */ 722 cifs_save_when_sent(mid); 723 rc = smb_send_rqst(server, 1, rqst, flags); 724 725 if (rc < 0) { 726 revert_current_mid(server, mid->credits); 727 server->sequence_number -= 2; 728 delete_mid(server, mid); 729 } 730 731 cifs_server_unlock(server); 732 733 if (rc == 0) 734 return 0; 735 736 add_credits_and_wake_if(server, &credits, optype); 737 return rc; 738 } 739 740 int cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) 741 { 742 int rc = 0; 743 744 cifs_dbg(FYI, "%s: cmd=%d mid=%llu state=%d\n", 745 __func__, le16_to_cpu(mid->command), mid->mid, mid->mid_state); 746 747 spin_lock(&server->mid_queue_lock); 748 switch (mid->mid_state) { 749 case MID_RESPONSE_READY: 750 spin_unlock(&server->mid_queue_lock); 751 return rc; 752 case MID_RETRY_NEEDED: 753 rc = -EAGAIN; 754 break; 755 case MID_RESPONSE_MALFORMED: 756 rc = smb_EIO(smb_eio_trace_rx_sync_mid_malformed); 757 break; 758 case MID_SHUTDOWN: 759 rc = -EHOSTDOWN; 760 break; 761 case MID_RC: 762 rc = mid->mid_rc; 763 break; 764 default: 765 if (mid->deleted_from_q == false) { 766 list_del_init(&mid->qhead); 767 mid->deleted_from_q = true; 768 } 769 spin_unlock(&server->mid_queue_lock); 770 cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n", 771 __func__, mid->mid, mid->mid_state); 772 rc = smb_EIO1(smb_eio_trace_rx_sync_mid_invalid, mid->mid_state); 773 goto sync_mid_done; 774 } 775 spin_unlock(&server->mid_queue_lock); 776 777 sync_mid_done: 778 release_mid(server, mid); 779 return rc; 780 } 781 782 static void 783 cifs_compound_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) 784 { 785 struct cifs_credits credits = { 786 .value = server->ops->get_credits(mid), 787 .instance = server->reconnect_instance, 788 }; 789 790 add_credits(server, &credits, mid->optype); 791 792 if (mid->mid_state == MID_RESPONSE_RECEIVED) 793 mid->mid_state = MID_RESPONSE_READY; 794 } 795 796 static void 797 cifs_compound_last_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) 798 { 799 cifs_compound_callback(server, mid); 800 cifs_wake_up_task(server, mid); 801 } 802 803 static void 804 cifs_cancelled_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) 805 { 806 cifs_compound_callback(server, mid); 807 release_mid(server, mid); 808 } 809 810 /* 811 * Return a channel (master if none) of @ses that can be used to send 812 * regular requests. 813 * 814 * If we are currently binding a new channel (negprot/sess.setup), 815 * return the new incomplete channel. 816 */ 817 struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) 818 { 819 uint index = 0; 820 unsigned int min_in_flight = UINT_MAX, max_in_flight = 0; 821 struct TCP_Server_Info *server = NULL; 822 int i, start, cur; 823 824 if (!ses) 825 return NULL; 826 827 spin_lock(&ses->chan_lock); 828 start = atomic_inc_return(&ses->chan_seq); 829 for (i = 0; i < ses->chan_count; i++) { 830 cur = (start + i) % ses->chan_count; 831 server = ses->chans[cur].server; 832 if (!server || server->terminate) 833 continue; 834 835 if (CIFS_CHAN_NEEDS_RECONNECT(ses, cur)) 836 continue; 837 838 /* 839 * strictly speaking, we should pick up req_lock to read 840 * server->in_flight. But it shouldn't matter much here if we 841 * race while reading this data. The worst that can happen is 842 * that we could use a channel that's not least loaded. Avoiding 843 * taking the lock could help reduce wait time, which is 844 * important for this function 845 */ 846 if (server->in_flight < min_in_flight) { 847 min_in_flight = server->in_flight; 848 index = cur; 849 } 850 if (server->in_flight > max_in_flight) 851 max_in_flight = server->in_flight; 852 } 853 854 /* if all channels are equally loaded, fall back to round-robin */ 855 if (min_in_flight == max_in_flight) 856 index = (uint)start % ses->chan_count; 857 858 server = ses->chans[index].server; 859 spin_unlock(&ses->chan_lock); 860 861 return server; 862 } 863 864 int 865 compound_send_recv(const unsigned int xid, struct cifs_ses *ses, 866 struct TCP_Server_Info *server, 867 const int flags, const int num_rqst, struct smb_rqst *rqst, 868 int *resp_buf_type, struct kvec *resp_iov) 869 { 870 int i, j, optype, rc = 0; 871 struct mid_q_entry *mid[MAX_COMPOUND]; 872 bool cancelled_mid[MAX_COMPOUND] = {false}; 873 struct cifs_credits credits[MAX_COMPOUND] = { 874 { .value = 0, .instance = 0 } 875 }; 876 unsigned int instance; 877 char *buf; 878 879 optype = flags & CIFS_OP_MASK; 880 881 for (i = 0; i < num_rqst; i++) 882 resp_buf_type[i] = CIFS_NO_BUFFER; /* no response buf yet */ 883 884 if (!ses || !ses->server || !server) { 885 cifs_dbg(VFS, "Null session\n"); 886 return smb_EIO(smb_eio_trace_null_pointers); 887 } 888 889 spin_lock(&server->srv_lock); 890 if (server->tcpStatus == CifsExiting) { 891 spin_unlock(&server->srv_lock); 892 return -ENOENT; 893 } 894 spin_unlock(&server->srv_lock); 895 896 /* 897 * Wait for all the requests to become available. 898 * This approach still leaves the possibility to be stuck waiting for 899 * credits if the server doesn't grant credits to the outstanding 900 * requests and if the client is completely idle, not generating any 901 * other requests. 902 * This can be handled by the eventual session reconnect. 903 */ 904 rc = wait_for_compound_request(server, num_rqst, flags, 905 &instance); 906 if (rc) 907 return rc; 908 909 for (i = 0; i < num_rqst; i++) { 910 credits[i].value = 1; 911 credits[i].instance = instance; 912 } 913 914 /* 915 * Make sure that we sign in the same order that we send on this socket 916 * and avoid races inside tcp sendmsg code that could cause corruption 917 * of smb data. 918 */ 919 920 cifs_server_lock(server); 921 922 /* 923 * All the parts of the compound chain belong obtained credits from the 924 * same session. We can not use credits obtained from the previous 925 * session to send this request. Check if there were reconnects after 926 * we obtained credits and return -EAGAIN in such cases to let callers 927 * handle it. 928 */ 929 if (instance != server->reconnect_instance) { 930 cifs_server_unlock(server); 931 for (j = 0; j < num_rqst; j++) 932 add_credits(server, &credits[j], optype); 933 return -EAGAIN; 934 } 935 936 for (i = 0; i < num_rqst; i++) { 937 mid[i] = server->ops->setup_request(ses, server, &rqst[i]); 938 if (IS_ERR(mid[i])) { 939 revert_current_mid(server, i); 940 for (j = 0; j < i; j++) 941 delete_mid(server, mid[j]); 942 cifs_server_unlock(server); 943 944 /* Update # of requests on wire to server */ 945 for (j = 0; j < num_rqst; j++) 946 add_credits(server, &credits[j], optype); 947 return PTR_ERR(mid[i]); 948 } 949 950 mid[i]->sr_flags = flags; 951 mid[i]->mid_state = MID_REQUEST_SUBMITTED; 952 mid[i]->optype = optype; 953 /* 954 * Invoke callback for every part of the compound chain 955 * to calculate credits properly. Wake up this thread only when 956 * the last element is received. 957 */ 958 if (i < num_rqst - 1) 959 mid[i]->callback = cifs_compound_callback; 960 else 961 mid[i]->callback = cifs_compound_last_callback; 962 } 963 rc = smb_send_rqst(server, num_rqst, rqst, flags); 964 965 for (i = 0; i < num_rqst; i++) 966 cifs_save_when_sent(mid[i]); 967 968 if (rc < 0) { 969 revert_current_mid(server, num_rqst); 970 server->sequence_number -= 2; 971 } 972 973 cifs_server_unlock(server); 974 975 /* 976 * If sending failed for some reason or it is an oplock break that we 977 * will not receive a response to - return credits back 978 */ 979 if (rc < 0 || (flags & CIFS_NO_SRV_RSP)) { 980 for (i = 0; i < num_rqst; i++) 981 add_credits(server, &credits[i], optype); 982 goto out; 983 } 984 985 /* 986 * At this point the request is passed to the network stack - we assume 987 * that any credits taken from the server structure on the client have 988 * been spent and we can't return them back. Once we receive responses 989 * we will collect credits granted by the server in the mid callbacks 990 * and add those credits to the server structure. 991 */ 992 993 /* 994 * Compounding is never used during session establish. 995 */ 996 spin_lock(&ses->ses_lock); 997 if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) { 998 spin_unlock(&ses->ses_lock); 999 1000 if (WARN_ON_ONCE(num_rqst != 1 || !resp_iov)) 1001 return -EINVAL; 1002 1003 cifs_server_lock(server); 1004 smb311_update_preauth_hash(ses, server, rqst[0].rq_iov, rqst[0].rq_nvec); 1005 cifs_server_unlock(server); 1006 1007 spin_lock(&ses->ses_lock); 1008 } 1009 spin_unlock(&ses->ses_lock); 1010 1011 for (i = 0; i < num_rqst; i++) { 1012 rc = wait_for_response(server, mid[i]); 1013 if (rc != 0) 1014 break; 1015 } 1016 if (rc != 0) { 1017 for (; i < num_rqst; i++) { 1018 cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n", 1019 mid[i]->mid, le16_to_cpu(mid[i]->command)); 1020 send_cancel(ses, server, &rqst[i], mid[i], xid); 1021 spin_lock(&mid[i]->mid_lock); 1022 mid[i]->wait_cancelled = true; 1023 if (mid[i]->mid_state == MID_REQUEST_SUBMITTED || 1024 mid[i]->mid_state == MID_RESPONSE_RECEIVED) { 1025 mid[i]->callback = cifs_cancelled_callback; 1026 cancelled_mid[i] = true; 1027 credits[i].value = 0; 1028 } 1029 spin_unlock(&mid[i]->mid_lock); 1030 } 1031 } 1032 1033 for (i = 0; i < num_rqst; i++) { 1034 if (rc < 0) 1035 goto out; 1036 1037 rc = cifs_sync_mid_result(mid[i], server); 1038 if (rc != 0) { 1039 /* mark this mid as cancelled to not free it below */ 1040 cancelled_mid[i] = true; 1041 goto out; 1042 } 1043 1044 if (!mid[i]->resp_buf || 1045 mid[i]->mid_state != MID_RESPONSE_READY) { 1046 rc = smb_EIO1(smb_eio_trace_rx_mid_unready, mid[i]->mid_state); 1047 cifs_dbg(FYI, "Bad MID state?\n"); 1048 goto out; 1049 } 1050 1051 rc = server->ops->check_receive(mid[i], server, 1052 flags & CIFS_LOG_ERROR); 1053 1054 if (resp_iov) { 1055 buf = (char *)mid[i]->resp_buf; 1056 resp_iov[i].iov_base = buf; 1057 resp_iov[i].iov_len = mid[i]->resp_buf_size; 1058 1059 if (mid[i]->large_buf) 1060 resp_buf_type[i] = CIFS_LARGE_BUFFER; 1061 else 1062 resp_buf_type[i] = CIFS_SMALL_BUFFER; 1063 1064 /* mark it so buf will not be freed by delete_mid */ 1065 if ((flags & CIFS_NO_RSP_BUF) == 0) 1066 mid[i]->resp_buf = NULL; 1067 } 1068 } 1069 1070 /* 1071 * Compounding is never used during session establish. 1072 */ 1073 spin_lock(&ses->ses_lock); 1074 if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) { 1075 struct kvec iov = { 1076 .iov_base = resp_iov[0].iov_base, 1077 .iov_len = resp_iov[0].iov_len 1078 }; 1079 spin_unlock(&ses->ses_lock); 1080 cifs_server_lock(server); 1081 smb311_update_preauth_hash(ses, server, &iov, 1); 1082 cifs_server_unlock(server); 1083 spin_lock(&ses->ses_lock); 1084 } 1085 spin_unlock(&ses->ses_lock); 1086 1087 out: 1088 /* 1089 * This will dequeue all mids. After this it is important that the 1090 * demultiplex_thread will not process any of these mids any further. 1091 * This is prevented above by using a noop callback that will not 1092 * wake this thread except for the very last PDU. 1093 */ 1094 for (i = 0; i < num_rqst; i++) { 1095 if (!cancelled_mid[i]) 1096 delete_mid(server, mid[i]); 1097 } 1098 1099 return rc; 1100 } 1101 1102 int 1103 cifs_send_recv(const unsigned int xid, struct cifs_ses *ses, 1104 struct TCP_Server_Info *server, 1105 struct smb_rqst *rqst, int *resp_buf_type, const int flags, 1106 struct kvec *resp_iov) 1107 { 1108 return compound_send_recv(xid, ses, server, flags, 1, 1109 rqst, resp_buf_type, resp_iov); 1110 } 1111 1112 1113 /* 1114 * Discard any remaining data in the current SMB. To do this, we borrow the 1115 * current bigbuf. 1116 */ 1117 int 1118 cifs_discard_remaining_data(struct TCP_Server_Info *server) 1119 { 1120 unsigned int rfclen = server->pdu_size; 1121 size_t remaining = rfclen - server->total_read; 1122 1123 while (remaining > 0) { 1124 ssize_t length; 1125 1126 length = cifs_discard_from_socket(server, 1127 min_t(size_t, remaining, 1128 CIFSMaxBufSize + MAX_HEADER_SIZE(server))); 1129 if (length < 0) 1130 return length; 1131 server->total_read += length; 1132 remaining -= length; 1133 } 1134 1135 return 0; 1136 } 1137 1138 static int 1139 __cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid, 1140 bool malformed) 1141 { 1142 int length; 1143 1144 length = cifs_discard_remaining_data(server); 1145 dequeue_mid(server, mid, malformed); 1146 mid->resp_buf = server->smallbuf; 1147 server->smallbuf = NULL; 1148 return length; 1149 } 1150 1151 static int 1152 cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) 1153 { 1154 struct cifs_io_subrequest *rdata = mid->callback_data; 1155 1156 return __cifs_readv_discard(server, mid, rdata->result); 1157 } 1158 1159 int 1160 cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) 1161 { 1162 int length, len; 1163 unsigned int data_offset, data_len; 1164 struct cifs_io_subrequest *rdata = mid->callback_data; 1165 char *buf = server->smallbuf; 1166 unsigned int buflen = server->pdu_size; 1167 bool use_rdma_mr = false; 1168 1169 cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%zu\n", 1170 __func__, mid->mid, rdata->subreq.start, rdata->subreq.len); 1171 1172 /* 1173 * read the rest of READ_RSP header (sans Data array), or whatever we 1174 * can if there's not enough data. At this point, we've read down to 1175 * the Mid. 1176 */ 1177 len = min_t(unsigned int, buflen, server->vals->read_rsp_size) - 1178 HEADER_SIZE(server) + 1; 1179 1180 length = cifs_read_from_socket(server, 1181 buf + HEADER_SIZE(server) - 1, len); 1182 if (length < 0) 1183 return length; 1184 server->total_read += length; 1185 1186 if (server->ops->is_session_expired && 1187 server->ops->is_session_expired(buf)) { 1188 cifs_reconnect(server, true); 1189 return -1; 1190 } 1191 1192 if (server->ops->is_status_pending && 1193 server->ops->is_status_pending(buf, server)) { 1194 cifs_discard_remaining_data(server); 1195 return -1; 1196 } 1197 1198 /* set up first two iov for signature check and to get credits */ 1199 rdata->iov[0].iov_base = buf; 1200 rdata->iov[0].iov_len = server->total_read; 1201 cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n", 1202 rdata->iov[0].iov_base, rdata->iov[0].iov_len); 1203 1204 /* Was the SMB read successful? */ 1205 rdata->result = server->ops->map_error(buf, false); 1206 if (rdata->result != 0) { 1207 cifs_dbg(FYI, "%s: server returned error %d\n", 1208 __func__, rdata->result); 1209 /* normal error on read response */ 1210 return __cifs_readv_discard(server, mid, false); 1211 } 1212 1213 /* Is there enough to get to the rest of the READ_RSP header? */ 1214 if (server->total_read < server->vals->read_rsp_size) { 1215 cifs_dbg(FYI, "%s: server returned short header. got=%u expected=%zu\n", 1216 __func__, server->total_read, 1217 server->vals->read_rsp_size); 1218 rdata->result = smb_EIO2(smb_eio_trace_read_rsp_short, 1219 server->total_read, server->vals->read_rsp_size); 1220 return cifs_readv_discard(server, mid); 1221 } 1222 1223 data_offset = server->ops->read_data_offset(buf); 1224 if (data_offset < server->total_read) { 1225 /* 1226 * win2k8 sometimes sends an offset of 0 when the read 1227 * is beyond the EOF. Treat it as if the data starts just after 1228 * the header. 1229 */ 1230 cifs_dbg(FYI, "%s: data offset (%u) inside read response header\n", 1231 __func__, data_offset); 1232 data_offset = server->total_read; 1233 } else if (data_offset > MAX_CIFS_SMALL_BUFFER_SIZE) { 1234 /* data_offset is beyond the end of smallbuf */ 1235 cifs_dbg(FYI, "%s: data offset (%u) beyond end of smallbuf\n", 1236 __func__, data_offset); 1237 rdata->result = smb_EIO1(smb_eio_trace_read_overlarge, 1238 data_offset); 1239 return cifs_readv_discard(server, mid); 1240 } 1241 1242 cifs_dbg(FYI, "%s: total_read=%u data_offset=%u\n", 1243 __func__, server->total_read, data_offset); 1244 1245 len = data_offset - server->total_read; 1246 if (len > 0) { 1247 /* read any junk before data into the rest of smallbuf */ 1248 length = cifs_read_from_socket(server, 1249 buf + server->total_read, len); 1250 if (length < 0) 1251 return length; 1252 server->total_read += length; 1253 rdata->iov[0].iov_len = server->total_read; 1254 } 1255 1256 /* how much data is in the response? */ 1257 #ifdef CONFIG_CIFS_SMB_DIRECT 1258 use_rdma_mr = rdata->mr; 1259 #endif 1260 data_len = server->ops->read_data_length(buf, use_rdma_mr); 1261 if (!use_rdma_mr && (data_offset + data_len > buflen)) { 1262 /* data_len is corrupt -- discard frame */ 1263 rdata->result = smb_EIO2(smb_eio_trace_read_rsp_malformed, 1264 data_offset + data_len, buflen); 1265 return cifs_readv_discard(server, mid); 1266 } 1267 1268 #ifdef CONFIG_CIFS_SMB_DIRECT 1269 if (rdata->mr) 1270 length = data_len; /* An RDMA read is already done. */ 1271 else 1272 #endif 1273 length = cifs_read_iter_from_socket(server, &rdata->subreq.io_iter, 1274 data_len); 1275 if (length > 0) 1276 rdata->got_bytes += length; 1277 server->total_read += length; 1278 1279 cifs_dbg(FYI, "total_read=%u buflen=%u remaining=%u\n", 1280 server->total_read, buflen, data_len); 1281 1282 /* discard anything left over */ 1283 if (server->total_read < buflen) 1284 return cifs_readv_discard(server, mid); 1285 1286 dequeue_mid(server, mid, false); 1287 mid->resp_buf = server->smallbuf; 1288 server->smallbuf = NULL; 1289 return length; 1290 } 1291