1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * 4 * Copyright (C) International Business Machines Corp., 2002,2008 5 * Author(s): Steve French (sfrench@us.ibm.com) 6 * Jeremy Allison (jra@samba.org) 2006. 7 * 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/list.h> 12 #include <linux/gfp.h> 13 #include <linux/wait.h> 14 #include <linux/net.h> 15 #include <linux/delay.h> 16 #include <linux/freezer.h> 17 #include <linux/tcp.h> 18 #include <linux/bvec.h> 19 #include <linux/highmem.h> 20 #include <linux/uaccess.h> 21 #include <linux/processor.h> 22 #include <linux/mempool.h> 23 #include <linux/sched/signal.h> 24 #include <linux/task_io_accounting_ops.h> 25 #include <linux/task_work.h> 26 #include "cifsglob.h" 27 #include "cifsproto.h" 28 #include "cifs_debug.h" 29 #include "smb2proto.h" 30 #include "smbdirect.h" 31 #include "compress.h" 32 33 void 34 cifs_wake_up_task(struct TCP_Server_Info *server, struct mid_q_entry *mid) 35 { 36 if (mid->mid_state == MID_RESPONSE_RECEIVED) 37 mid->mid_state = MID_RESPONSE_READY; 38 wake_up_process(mid->callback_data); 39 } 40 41 void __release_mid(struct TCP_Server_Info *server, struct mid_q_entry *midEntry) 42 { 43 #ifdef CONFIG_CIFS_STATS2 44 __le16 command = server->vals->lock_cmd; 45 __u16 smb_cmd = le16_to_cpu(midEntry->command); 46 unsigned long now; 47 unsigned long roundtrip_time; 48 #endif 49 50 if (midEntry->resp_buf && (midEntry->wait_cancelled) && 51 (midEntry->mid_state == MID_RESPONSE_RECEIVED || 52 midEntry->mid_state == MID_RESPONSE_READY) && 53 server->ops->handle_cancelled_mid) 54 server->ops->handle_cancelled_mid(midEntry, server); 55 56 midEntry->mid_state = MID_FREE; 57 atomic_dec(&mid_count); 58 if (midEntry->large_buf) 59 cifs_buf_release(midEntry->resp_buf); 60 else 61 cifs_small_buf_release(midEntry->resp_buf); 62 #ifdef CONFIG_CIFS_STATS2 63 now = jiffies; 64 if (now < midEntry->when_alloc) 65 cifs_server_dbg(VFS, "Invalid mid allocation time\n"); 66 roundtrip_time = now - midEntry->when_alloc; 67 68 if (smb_cmd < NUMBER_OF_SMB2_COMMANDS) { 69 if (atomic_read(&server->num_cmds[smb_cmd]) == 0) { 70 server->slowest_cmd[smb_cmd] = roundtrip_time; 71 server->fastest_cmd[smb_cmd] = roundtrip_time; 72 } else { 73 if (server->slowest_cmd[smb_cmd] < roundtrip_time) 74 server->slowest_cmd[smb_cmd] = roundtrip_time; 75 else if (server->fastest_cmd[smb_cmd] > roundtrip_time) 76 server->fastest_cmd[smb_cmd] = roundtrip_time; 77 } 78 cifs_stats_inc(&server->num_cmds[smb_cmd]); 79 server->time_per_cmd[smb_cmd] += roundtrip_time; 80 } 81 /* 82 * commands taking longer than one second (default) can be indications 83 * that something is wrong, unless it is quite a slow link or a very 84 * busy server. Note that this calc is unlikely or impossible to wrap 85 * as long as slow_rsp_threshold is not set way above recommended max 86 * value (32767 ie 9 hours) and is generally harmless even if wrong 87 * since only affects debug counters - so leaving the calc as simple 88 * comparison rather than doing multiple conversions and overflow 89 * checks 90 */ 91 if ((slow_rsp_threshold != 0) && 92 time_after(now, midEntry->when_alloc + (slow_rsp_threshold * HZ)) && 93 (midEntry->command != command)) { 94 /* 95 * smb2slowcmd[NUMBER_OF_SMB2_COMMANDS] counts by command 96 * NB: le16_to_cpu returns unsigned so can not be negative below 97 */ 98 if (smb_cmd < NUMBER_OF_SMB2_COMMANDS) 99 cifs_stats_inc(&server->smb2slowcmd[smb_cmd]); 100 101 trace_smb3_slow_rsp(smb_cmd, midEntry->mid, midEntry->pid, 102 midEntry->when_sent, midEntry->when_received); 103 if (cifsFYI & CIFS_TIMER) { 104 pr_debug("slow rsp: cmd %d mid %llu", 105 midEntry->command, midEntry->mid); 106 cifs_info("A: 0x%lx S: 0x%lx R: 0x%lx\n", 107 now - midEntry->when_alloc, 108 now - midEntry->when_sent, 109 now - midEntry->when_received); 110 } 111 } 112 #endif 113 put_task_struct(midEntry->creator); 114 115 mempool_free(midEntry, &cifs_mid_pool); 116 } 117 118 void 119 delete_mid(struct TCP_Server_Info *server, struct mid_q_entry *mid) 120 { 121 spin_lock(&server->mid_queue_lock); 122 123 if (!mid->deleted_from_q) { 124 list_del_init(&mid->qhead); 125 mid->deleted_from_q = true; 126 } 127 spin_unlock(&server->mid_queue_lock); 128 129 release_mid(server, mid); 130 } 131 132 /* 133 * smb_send_kvec - send an array of kvecs to the server 134 * @server: Server to send the data to 135 * @smb_msg: Message to send 136 * @sent: amount of data sent on socket is stored here 137 * 138 * Our basic "send data to server" function. Should be called with srv_mutex 139 * held. The caller is responsible for handling the results. 140 */ 141 int 142 smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg, 143 size_t *sent) 144 { 145 int rc = 0; 146 int retries = 0; 147 struct socket *ssocket = server->ssocket; 148 149 *sent = 0; 150 151 if (server->noblocksnd) 152 smb_msg->msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; 153 else 154 smb_msg->msg_flags = MSG_NOSIGNAL; 155 156 while (msg_data_left(smb_msg)) { 157 /* 158 * If blocking send, we try 3 times, since each can block 159 * for 5 seconds. For nonblocking we have to try more 160 * but wait increasing amounts of time allowing time for 161 * socket to clear. The overall time we wait in either 162 * case to send on the socket is about 15 seconds. 163 * Similarly we wait for 15 seconds for a response from 164 * the server in SendReceive[2] for the server to send 165 * a response back for most types of requests (except 166 * SMB Write past end of file which can be slow, and 167 * blocking lock operations). NFS waits slightly longer 168 * than CIFS, but this can make it take longer for 169 * nonresponsive servers to be detected and 15 seconds 170 * is more than enough time for modern networks to 171 * send a packet. In most cases if we fail to send 172 * after the retries we will kill the socket and 173 * reconnect which may clear the network problem. 174 * 175 * Even if regular signals are masked, EINTR might be 176 * propagated from sk_stream_wait_memory() to here when 177 * TIF_NOTIFY_SIGNAL is used for task work. For example, 178 * certain io_uring completions will use that. Treat 179 * having EINTR with pending task work the same as EAGAIN 180 * to avoid unnecessary reconnects. 181 */ 182 rc = sock_sendmsg(ssocket, smb_msg); 183 if (rc == -EAGAIN || unlikely(rc == -EINTR && task_work_pending(current))) { 184 retries++; 185 if (retries >= 14 || 186 (!server->noblocksnd && (retries > 2))) { 187 cifs_server_dbg(VFS, "sends on sock %p stuck for 15 seconds\n", 188 ssocket); 189 return -EAGAIN; 190 } 191 msleep(1 << retries); 192 continue; 193 } 194 195 if (rc < 0) 196 return rc; 197 198 if (rc == 0) { 199 /* should never happen, letting socket clear before 200 retrying is our only obvious option here */ 201 cifs_server_dbg(VFS, "tcp sent no data\n"); 202 msleep(500); 203 continue; 204 } 205 206 /* send was at least partially successful */ 207 *sent += rc; 208 retries = 0; /* in case we get ENOSPC on the next send */ 209 } 210 return 0; 211 } 212 213 unsigned long 214 smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst) 215 { 216 unsigned int i; 217 struct kvec *iov; 218 int nvec; 219 unsigned long buflen = 0; 220 221 if (!is_smb1(server) && rqst->rq_nvec >= 2 && 222 rqst->rq_iov[0].iov_len == 4) { 223 iov = &rqst->rq_iov[1]; 224 nvec = rqst->rq_nvec - 1; 225 } else { 226 iov = rqst->rq_iov; 227 nvec = rqst->rq_nvec; 228 } 229 230 /* total up iov array first */ 231 for (i = 0; i < nvec; i++) 232 buflen += iov[i].iov_len; 233 234 buflen += iov_iter_count(&rqst->rq_iter); 235 return buflen; 236 } 237 238 int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, 239 struct smb_rqst *rqst) 240 { 241 int rc; 242 struct kvec *iov; 243 int n_vec; 244 unsigned int send_length = 0; 245 unsigned int i, j; 246 sigset_t mask, oldmask; 247 size_t total_len = 0, sent, size; 248 struct socket *ssocket = server->ssocket; 249 struct msghdr smb_msg = {}; 250 __be32 rfc1002_marker; 251 252 cifs_in_send_inc(server); 253 if (cifs_rdma_enabled(server)) { 254 /* return -EAGAIN when connecting or reconnecting */ 255 rc = -EAGAIN; 256 if (server->smbd_conn) 257 rc = smbd_send(server, num_rqst, rqst); 258 goto smbd_done; 259 } 260 261 rc = -EAGAIN; 262 if (ssocket == NULL) 263 goto out; 264 265 rc = -ERESTARTSYS; 266 if (fatal_signal_pending(current)) { 267 cifs_dbg(FYI, "signal pending before send request\n"); 268 goto out; 269 } 270 271 rc = 0; 272 /* cork the socket */ 273 tcp_sock_set_cork(ssocket->sk, true); 274 275 for (j = 0; j < num_rqst; j++) 276 send_length += smb_rqst_len(server, &rqst[j]); 277 rfc1002_marker = cpu_to_be32(send_length); 278 279 /* 280 * We should not allow signals to interrupt the network send because 281 * any partial send will cause session reconnects thus increasing 282 * latency of system calls and overload a server with unnecessary 283 * requests. 284 */ 285 286 sigfillset(&mask); 287 sigprocmask(SIG_BLOCK, &mask, &oldmask); 288 289 /* Generate a rfc1002 marker */ 290 { 291 struct kvec hiov = { 292 .iov_base = &rfc1002_marker, 293 .iov_len = 4 294 }; 295 iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, &hiov, 1, 4); 296 rc = smb_send_kvec(server, &smb_msg, &sent); 297 if (rc < 0) 298 goto unmask; 299 300 total_len += sent; 301 send_length += 4; 302 } 303 304 cifs_dbg(FYI, "Sending smb: smb_len=%u\n", send_length); 305 306 for (j = 0; j < num_rqst; j++) { 307 iov = rqst[j].rq_iov; 308 n_vec = rqst[j].rq_nvec; 309 310 size = 0; 311 for (i = 0; i < n_vec; i++) { 312 dump_smb(iov[i].iov_base, iov[i].iov_len); 313 size += iov[i].iov_len; 314 } 315 316 iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, iov, n_vec, size); 317 318 rc = smb_send_kvec(server, &smb_msg, &sent); 319 if (rc < 0) 320 goto unmask; 321 322 total_len += sent; 323 324 if (iov_iter_count(&rqst[j].rq_iter) > 0) { 325 smb_msg.msg_iter = rqst[j].rq_iter; 326 rc = smb_send_kvec(server, &smb_msg, &sent); 327 if (rc < 0) 328 break; 329 total_len += sent; 330 } 331 } 332 333 unmask: 334 sigprocmask(SIG_SETMASK, &oldmask, NULL); 335 336 /* 337 * If signal is pending but we have already sent the whole packet to 338 * the server we need to return success status to allow a corresponding 339 * mid entry to be kept in the pending requests queue thus allowing 340 * to handle responses from the server by the client. 341 * 342 * If only part of the packet has been sent there is no need to hide 343 * interrupt because the session will be reconnected anyway, so there 344 * won't be any response from the server to handle. 345 */ 346 347 if (signal_pending(current) && (total_len != send_length)) { 348 cifs_dbg(FYI, "signal is pending after attempt to send\n"); 349 rc = -ERESTARTSYS; 350 } 351 352 /* uncork it */ 353 tcp_sock_set_cork(ssocket->sk, false); 354 355 if ((total_len > 0) && (total_len != send_length)) { 356 cifs_dbg(FYI, "partial send (wanted=%u sent=%zu): terminating session\n", 357 send_length, total_len); 358 /* 359 * If we have only sent part of an SMB then the next SMB could 360 * be taken as the remainder of this one. We need to kill the 361 * socket so the server throws away the partial SMB 362 */ 363 cifs_signal_cifsd_for_reconnect(server, false); 364 trace_smb3_partial_send_reconnect(server->current_mid, 365 server->conn_id, server->hostname); 366 } 367 smbd_done: 368 /* 369 * there's hardly any use for the layers above to know the 370 * actual error code here. All they should do at this point is 371 * to retry the connection and hope it goes away. 372 */ 373 if (rc < 0 && rc != -EINTR && rc != -EAGAIN) { 374 cifs_server_dbg(VFS, "Error %d sending data on socket to server\n", 375 rc); 376 rc = -ECONNABORTED; 377 cifs_signal_cifsd_for_reconnect(server, false); 378 } else if (rc > 0) 379 rc = 0; 380 out: 381 cifs_in_send_dec(server); 382 return rc; 383 } 384 385 static int 386 smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, 387 struct smb_rqst *rqst, int flags) 388 { 389 struct smb2_transform_hdr tr_hdr; 390 struct smb_rqst new_rqst[MAX_COMPOUND] = {}; 391 struct kvec iov = { 392 .iov_base = &tr_hdr, 393 .iov_len = sizeof(tr_hdr), 394 }; 395 int rc; 396 397 if (flags & CIFS_COMPRESS_REQ) 398 return smb_compress(server, &rqst[0], __smb_send_rqst); 399 400 if (!(flags & CIFS_TRANSFORM_REQ)) 401 return __smb_send_rqst(server, num_rqst, rqst); 402 403 if (WARN_ON_ONCE(num_rqst > MAX_COMPOUND - 1)) 404 return smb_EIO1(smb_eio_trace_tx_max_compound, num_rqst); 405 406 if (!server->ops->init_transform_rq) { 407 cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n"); 408 return smb_EIO(smb_eio_trace_tx_need_transform); 409 } 410 411 new_rqst[0].rq_iov = &iov; 412 new_rqst[0].rq_nvec = 1; 413 414 rc = server->ops->init_transform_rq(server, num_rqst + 1, 415 new_rqst, rqst); 416 if (!rc) { 417 rc = __smb_send_rqst(server, num_rqst + 1, new_rqst); 418 smb3_free_compound_rqst(num_rqst, &new_rqst[1]); 419 } 420 return rc; 421 } 422 423 static int 424 wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, 425 const int timeout, const int flags, 426 unsigned int *instance) 427 { 428 long rc; 429 int *credits; 430 int optype; 431 long int t; 432 int scredits, in_flight; 433 434 if (timeout < 0) 435 t = MAX_JIFFY_OFFSET; 436 else 437 t = msecs_to_jiffies(timeout); 438 439 optype = flags & CIFS_OP_MASK; 440 441 *instance = 0; 442 443 credits = server->ops->get_credits_field(server, optype); 444 /* Since an echo is already inflight, no need to wait to send another */ 445 if (*credits <= 0 && optype == CIFS_ECHO_OP) 446 return -EAGAIN; 447 448 spin_lock(&server->req_lock); 449 if ((flags & CIFS_TIMEOUT_MASK) == CIFS_NON_BLOCKING) { 450 /* oplock breaks must not be held up */ 451 server->in_flight++; 452 if (server->in_flight > server->max_in_flight) 453 server->max_in_flight = server->in_flight; 454 *credits -= 1; 455 *instance = server->reconnect_instance; 456 scredits = *credits; 457 in_flight = server->in_flight; 458 spin_unlock(&server->req_lock); 459 460 trace_smb3_nblk_credits(server->current_mid, 461 server->conn_id, server->hostname, scredits, -1, in_flight); 462 cifs_dbg(FYI, "%s: remove %u credits total=%d\n", 463 __func__, 1, scredits); 464 465 return 0; 466 } 467 468 while (1) { 469 spin_unlock(&server->req_lock); 470 471 spin_lock(&server->srv_lock); 472 if (server->tcpStatus == CifsExiting) { 473 spin_unlock(&server->srv_lock); 474 return -ENOENT; 475 } 476 spin_unlock(&server->srv_lock); 477 478 spin_lock(&server->req_lock); 479 if (*credits < num_credits) { 480 scredits = *credits; 481 spin_unlock(&server->req_lock); 482 483 cifs_num_waiters_inc(server); 484 rc = wait_event_killable_timeout(server->request_q, 485 has_credits(server, credits, num_credits), t); 486 cifs_num_waiters_dec(server); 487 if (!rc) { 488 spin_lock(&server->req_lock); 489 scredits = *credits; 490 in_flight = server->in_flight; 491 spin_unlock(&server->req_lock); 492 493 trace_smb3_credit_timeout(server->current_mid, 494 server->conn_id, server->hostname, scredits, 495 num_credits, in_flight); 496 cifs_server_dbg(VFS, "wait timed out after %d ms\n", 497 timeout); 498 return -EBUSY; 499 } 500 if (rc == -ERESTARTSYS) 501 return -ERESTARTSYS; 502 spin_lock(&server->req_lock); 503 } else { 504 /* 505 * For normal commands, reserve the last MAX_COMPOUND 506 * credits to compound requests. 507 * Otherwise these compounds could be permanently 508 * starved for credits by single-credit requests. 509 * 510 * To prevent spinning CPU, block this thread until 511 * there are >MAX_COMPOUND credits available. 512 * But only do this is we already have a lot of 513 * credits in flight to avoid triggering this check 514 * for servers that are slow to hand out credits on 515 * new sessions. 516 */ 517 if (!optype && num_credits == 1 && 518 server->in_flight > 2 * MAX_COMPOUND && 519 *credits <= MAX_COMPOUND) { 520 spin_unlock(&server->req_lock); 521 522 cifs_num_waiters_inc(server); 523 rc = wait_event_killable_timeout( 524 server->request_q, 525 has_credits(server, credits, 526 MAX_COMPOUND + 1), 527 t); 528 cifs_num_waiters_dec(server); 529 if (!rc) { 530 spin_lock(&server->req_lock); 531 scredits = *credits; 532 in_flight = server->in_flight; 533 spin_unlock(&server->req_lock); 534 535 trace_smb3_credit_timeout( 536 server->current_mid, 537 server->conn_id, server->hostname, 538 scredits, num_credits, in_flight); 539 cifs_server_dbg(VFS, "wait timed out after %d ms\n", 540 timeout); 541 return -EBUSY; 542 } 543 if (rc == -ERESTARTSYS) 544 return -ERESTARTSYS; 545 spin_lock(&server->req_lock); 546 continue; 547 } 548 549 /* 550 * Can not count locking commands against total 551 * as they are allowed to block on server. 552 */ 553 554 /* update # of requests on the wire to server */ 555 if ((flags & CIFS_TIMEOUT_MASK) != CIFS_BLOCKING_OP) { 556 *credits -= num_credits; 557 server->in_flight += num_credits; 558 if (server->in_flight > server->max_in_flight) 559 server->max_in_flight = server->in_flight; 560 *instance = server->reconnect_instance; 561 } 562 scredits = *credits; 563 in_flight = server->in_flight; 564 spin_unlock(&server->req_lock); 565 566 trace_smb3_waitff_credits(server->current_mid, 567 server->conn_id, server->hostname, scredits, 568 -(num_credits), in_flight); 569 cifs_dbg(FYI, "%s: remove %u credits total=%d\n", 570 __func__, num_credits, scredits); 571 break; 572 } 573 } 574 return 0; 575 } 576 577 int wait_for_free_request(struct TCP_Server_Info *server, const int flags, 578 unsigned int *instance) 579 { 580 return wait_for_free_credits(server, 1, -1, flags, 581 instance); 582 } 583 584 static int 585 wait_for_compound_request(struct TCP_Server_Info *server, int num, 586 const int flags, unsigned int *instance) 587 { 588 int *credits; 589 int scredits, in_flight; 590 591 credits = server->ops->get_credits_field(server, flags & CIFS_OP_MASK); 592 593 spin_lock(&server->req_lock); 594 scredits = *credits; 595 in_flight = server->in_flight; 596 597 if (*credits < num) { 598 /* 599 * If the server is tight on resources or just gives us less 600 * credits for other reasons (e.g. requests are coming out of 601 * order and the server delays granting more credits until it 602 * processes a missing mid) and we exhausted most available 603 * credits there may be situations when we try to send 604 * a compound request but we don't have enough credits. At this 605 * point the client needs to decide if it should wait for 606 * additional credits or fail the request. If at least one 607 * request is in flight there is a high probability that the 608 * server will return enough credits to satisfy this compound 609 * request. 610 * 611 * Return immediately if no requests in flight since we will be 612 * stuck on waiting for credits. 613 */ 614 if (server->in_flight == 0) { 615 spin_unlock(&server->req_lock); 616 trace_smb3_insufficient_credits(server->current_mid, 617 server->conn_id, server->hostname, scredits, 618 num, in_flight); 619 cifs_dbg(FYI, "%s: %d requests in flight, needed %d total=%d\n", 620 __func__, in_flight, num, scredits); 621 return -EDEADLK; 622 } 623 } 624 spin_unlock(&server->req_lock); 625 626 return wait_for_free_credits(server, num, 60000, flags, 627 instance); 628 } 629 630 int 631 cifs_wait_mtu_credits(struct TCP_Server_Info *server, size_t size, 632 size_t *num, struct cifs_credits *credits) 633 { 634 *num = size; 635 credits->value = 0; 636 credits->instance = server->reconnect_instance; 637 return 0; 638 } 639 640 int wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *mid) 641 { 642 unsigned int sleep_state = TASK_KILLABLE; 643 int error; 644 645 if (mid->sr_flags & CIFS_INTERRUPTIBLE_WAIT) 646 sleep_state = TASK_INTERRUPTIBLE; 647 648 error = wait_event_state(server->response_q, 649 mid->mid_state != MID_REQUEST_SUBMITTED && 650 mid->mid_state != MID_RESPONSE_RECEIVED, 651 (sleep_state | TASK_FREEZABLE_UNSAFE)); 652 if (error < 0) 653 return -ERESTARTSYS; 654 655 return 0; 656 } 657 658 /* 659 * Send a SMB request and set the callback function in the mid to handle 660 * the result. Caller is responsible for dealing with timeouts. 661 */ 662 int 663 cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, 664 mid_receive_t receive, mid_callback_t callback, 665 mid_handle_t handle, void *cbdata, const int flags, 666 const struct cifs_credits *exist_credits) 667 { 668 int rc; 669 struct mid_q_entry *mid; 670 struct cifs_credits credits = { .value = 0, .instance = 0 }; 671 unsigned int instance; 672 int optype; 673 674 optype = flags & CIFS_OP_MASK; 675 676 if ((flags & CIFS_HAS_CREDITS) == 0) { 677 rc = wait_for_free_request(server, flags, &instance); 678 if (rc) 679 return rc; 680 credits.value = 1; 681 credits.instance = instance; 682 } else 683 instance = exist_credits->instance; 684 685 cifs_server_lock(server); 686 687 /* 688 * We can't use credits obtained from the previous session to send this 689 * request. Check if there were reconnects after we obtained credits and 690 * return -EAGAIN in such cases to let callers handle it. 691 */ 692 if (instance != server->reconnect_instance) { 693 cifs_server_unlock(server); 694 add_credits_and_wake_if(server, &credits, optype); 695 return -EAGAIN; 696 } 697 698 mid = server->ops->setup_async_request(server, rqst); 699 if (IS_ERR(mid)) { 700 cifs_server_unlock(server); 701 add_credits_and_wake_if(server, &credits, optype); 702 return PTR_ERR(mid); 703 } 704 705 mid->sr_flags = flags; 706 mid->receive = receive; 707 mid->callback = callback; 708 mid->callback_data = cbdata; 709 mid->handle = handle; 710 mid->mid_state = MID_REQUEST_SUBMITTED; 711 712 /* put it on the pending_mid_q */ 713 spin_lock(&server->mid_queue_lock); 714 list_add_tail(&mid->qhead, &server->pending_mid_q); 715 spin_unlock(&server->mid_queue_lock); 716 717 /* 718 * Need to store the time in mid before calling I/O. For call_async, 719 * I/O response may come back and free the mid entry on another thread. 720 */ 721 cifs_save_when_sent(mid); 722 rc = smb_send_rqst(server, 1, rqst, flags); 723 724 if (rc < 0) { 725 revert_current_mid(server, mid->credits); 726 server->sequence_number -= 2; 727 delete_mid(server, mid); 728 } 729 730 cifs_server_unlock(server); 731 732 if (rc == 0) 733 return 0; 734 735 add_credits_and_wake_if(server, &credits, optype); 736 return rc; 737 } 738 739 int cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) 740 { 741 int rc = 0; 742 743 cifs_dbg(FYI, "%s: cmd=%d mid=%llu state=%d\n", 744 __func__, le16_to_cpu(mid->command), mid->mid, mid->mid_state); 745 746 spin_lock(&server->mid_queue_lock); 747 switch (mid->mid_state) { 748 case MID_RESPONSE_READY: 749 spin_unlock(&server->mid_queue_lock); 750 return rc; 751 case MID_RETRY_NEEDED: 752 rc = -EAGAIN; 753 break; 754 case MID_RESPONSE_MALFORMED: 755 rc = smb_EIO(smb_eio_trace_rx_sync_mid_malformed); 756 break; 757 case MID_SHUTDOWN: 758 rc = -EHOSTDOWN; 759 break; 760 case MID_RC: 761 rc = mid->mid_rc; 762 break; 763 default: 764 if (mid->deleted_from_q == false) { 765 list_del_init(&mid->qhead); 766 mid->deleted_from_q = true; 767 } 768 spin_unlock(&server->mid_queue_lock); 769 cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n", 770 __func__, mid->mid, mid->mid_state); 771 rc = smb_EIO1(smb_eio_trace_rx_sync_mid_invalid, mid->mid_state); 772 goto sync_mid_done; 773 } 774 spin_unlock(&server->mid_queue_lock); 775 776 sync_mid_done: 777 release_mid(server, mid); 778 return rc; 779 } 780 781 static void 782 cifs_compound_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) 783 { 784 struct cifs_credits credits = { 785 .value = server->ops->get_credits(mid), 786 .instance = server->reconnect_instance, 787 }; 788 789 add_credits(server, &credits, mid->optype); 790 791 if (mid->mid_state == MID_RESPONSE_RECEIVED) 792 mid->mid_state = MID_RESPONSE_READY; 793 } 794 795 static void 796 cifs_compound_last_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) 797 { 798 cifs_compound_callback(server, mid); 799 cifs_wake_up_task(server, mid); 800 } 801 802 static void 803 cifs_cancelled_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) 804 { 805 cifs_compound_callback(server, mid); 806 release_mid(server, mid); 807 } 808 809 /* 810 * Return a channel (master if none) of @ses that can be used to send 811 * regular requests. 812 * 813 * If we are currently binding a new channel (negprot/sess.setup), 814 * return the new incomplete channel. 815 */ 816 struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) 817 { 818 uint index = 0; 819 unsigned int min_in_flight = UINT_MAX, max_in_flight = 0; 820 struct TCP_Server_Info *server = NULL; 821 int i, start, cur; 822 823 if (!ses) 824 return NULL; 825 826 spin_lock(&ses->chan_lock); 827 start = atomic_inc_return(&ses->chan_seq); 828 for (i = 0; i < ses->chan_count; i++) { 829 cur = (start + i) % ses->chan_count; 830 server = ses->chans[cur].server; 831 if (!server || server->terminate) 832 continue; 833 834 if (CIFS_CHAN_NEEDS_RECONNECT(ses, cur)) 835 continue; 836 837 /* 838 * strictly speaking, we should pick up req_lock to read 839 * server->in_flight. But it shouldn't matter much here if we 840 * race while reading this data. The worst that can happen is 841 * that we could use a channel that's not least loaded. Avoiding 842 * taking the lock could help reduce wait time, which is 843 * important for this function 844 */ 845 if (server->in_flight < min_in_flight) { 846 min_in_flight = server->in_flight; 847 index = cur; 848 } 849 if (server->in_flight > max_in_flight) 850 max_in_flight = server->in_flight; 851 } 852 853 /* if all channels are equally loaded, fall back to round-robin */ 854 if (min_in_flight == max_in_flight) 855 index = (uint)start % ses->chan_count; 856 857 server = ses->chans[index].server; 858 spin_unlock(&ses->chan_lock); 859 860 return server; 861 } 862 863 int 864 compound_send_recv(const unsigned int xid, struct cifs_ses *ses, 865 struct TCP_Server_Info *server, 866 const int flags, const int num_rqst, struct smb_rqst *rqst, 867 int *resp_buf_type, struct kvec *resp_iov) 868 { 869 int i, j, optype, rc = 0; 870 struct mid_q_entry *mid[MAX_COMPOUND]; 871 bool cancelled_mid[MAX_COMPOUND] = {false}; 872 struct cifs_credits credits[MAX_COMPOUND] = { 873 { .value = 0, .instance = 0 } 874 }; 875 unsigned int instance; 876 char *buf; 877 878 optype = flags & CIFS_OP_MASK; 879 880 for (i = 0; i < num_rqst; i++) 881 resp_buf_type[i] = CIFS_NO_BUFFER; /* no response buf yet */ 882 883 if (!ses || !ses->server || !server) { 884 cifs_dbg(VFS, "Null session\n"); 885 return smb_EIO(smb_eio_trace_null_pointers); 886 } 887 888 spin_lock(&server->srv_lock); 889 if (server->tcpStatus == CifsExiting) { 890 spin_unlock(&server->srv_lock); 891 return -ENOENT; 892 } 893 spin_unlock(&server->srv_lock); 894 895 /* 896 * Wait for all the requests to become available. 897 * This approach still leaves the possibility to be stuck waiting for 898 * credits if the server doesn't grant credits to the outstanding 899 * requests and if the client is completely idle, not generating any 900 * other requests. 901 * This can be handled by the eventual session reconnect. 902 */ 903 rc = wait_for_compound_request(server, num_rqst, flags, 904 &instance); 905 if (rc) 906 return rc; 907 908 for (i = 0; i < num_rqst; i++) { 909 credits[i].value = 1; 910 credits[i].instance = instance; 911 } 912 913 /* 914 * Make sure that we sign in the same order that we send on this socket 915 * and avoid races inside tcp sendmsg code that could cause corruption 916 * of smb data. 917 */ 918 919 cifs_server_lock(server); 920 921 /* 922 * All the parts of the compound chain belong obtained credits from the 923 * same session. We can not use credits obtained from the previous 924 * session to send this request. Check if there were reconnects after 925 * we obtained credits and return -EAGAIN in such cases to let callers 926 * handle it. 927 */ 928 if (instance != server->reconnect_instance) { 929 cifs_server_unlock(server); 930 for (j = 0; j < num_rqst; j++) 931 add_credits(server, &credits[j], optype); 932 return -EAGAIN; 933 } 934 935 for (i = 0; i < num_rqst; i++) { 936 mid[i] = server->ops->setup_request(ses, server, &rqst[i]); 937 if (IS_ERR(mid[i])) { 938 revert_current_mid(server, i); 939 for (j = 0; j < i; j++) 940 delete_mid(server, mid[j]); 941 cifs_server_unlock(server); 942 943 /* Update # of requests on wire to server */ 944 for (j = 0; j < num_rqst; j++) 945 add_credits(server, &credits[j], optype); 946 return PTR_ERR(mid[i]); 947 } 948 949 mid[i]->sr_flags = flags; 950 mid[i]->mid_state = MID_REQUEST_SUBMITTED; 951 mid[i]->optype = optype; 952 /* 953 * Invoke callback for every part of the compound chain 954 * to calculate credits properly. Wake up this thread only when 955 * the last element is received. 956 */ 957 if (i < num_rqst - 1) 958 mid[i]->callback = cifs_compound_callback; 959 else 960 mid[i]->callback = cifs_compound_last_callback; 961 } 962 rc = smb_send_rqst(server, num_rqst, rqst, flags); 963 964 for (i = 0; i < num_rqst; i++) 965 cifs_save_when_sent(mid[i]); 966 967 if (rc < 0) { 968 revert_current_mid(server, num_rqst); 969 server->sequence_number -= 2; 970 } 971 972 cifs_server_unlock(server); 973 974 /* 975 * If sending failed for some reason or it is an oplock break that we 976 * will not receive a response to - return credits back 977 */ 978 if (rc < 0 || (flags & CIFS_NO_SRV_RSP)) { 979 for (i = 0; i < num_rqst; i++) 980 add_credits(server, &credits[i], optype); 981 goto out; 982 } 983 984 /* 985 * At this point the request is passed to the network stack - we assume 986 * that any credits taken from the server structure on the client have 987 * been spent and we can't return them back. Once we receive responses 988 * we will collect credits granted by the server in the mid callbacks 989 * and add those credits to the server structure. 990 */ 991 992 /* 993 * Compounding is never used during session establish. 994 */ 995 spin_lock(&ses->ses_lock); 996 if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) { 997 spin_unlock(&ses->ses_lock); 998 999 if (WARN_ON_ONCE(num_rqst != 1 || !resp_iov)) 1000 return -EINVAL; 1001 1002 cifs_server_lock(server); 1003 smb311_update_preauth_hash(ses, server, rqst[0].rq_iov, rqst[0].rq_nvec); 1004 cifs_server_unlock(server); 1005 1006 spin_lock(&ses->ses_lock); 1007 } 1008 spin_unlock(&ses->ses_lock); 1009 1010 for (i = 0; i < num_rqst; i++) { 1011 rc = wait_for_response(server, mid[i]); 1012 if (rc != 0) 1013 break; 1014 } 1015 if (rc != 0) { 1016 for (; i < num_rqst; i++) { 1017 cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n", 1018 mid[i]->mid, le16_to_cpu(mid[i]->command)); 1019 send_cancel(ses, server, &rqst[i], mid[i], xid); 1020 spin_lock(&mid[i]->mid_lock); 1021 mid[i]->wait_cancelled = true; 1022 if (mid[i]->mid_state == MID_REQUEST_SUBMITTED || 1023 mid[i]->mid_state == MID_RESPONSE_RECEIVED) { 1024 mid[i]->callback = cifs_cancelled_callback; 1025 cancelled_mid[i] = true; 1026 credits[i].value = 0; 1027 } 1028 spin_unlock(&mid[i]->mid_lock); 1029 } 1030 } 1031 1032 for (i = 0; i < num_rqst; i++) { 1033 if (rc < 0) 1034 goto out; 1035 1036 rc = cifs_sync_mid_result(mid[i], server); 1037 if (rc != 0) { 1038 /* mark this mid as cancelled to not free it below */ 1039 cancelled_mid[i] = true; 1040 goto out; 1041 } 1042 1043 if (!mid[i]->resp_buf || 1044 mid[i]->mid_state != MID_RESPONSE_READY) { 1045 rc = smb_EIO1(smb_eio_trace_rx_mid_unready, mid[i]->mid_state); 1046 cifs_dbg(FYI, "Bad MID state?\n"); 1047 goto out; 1048 } 1049 1050 rc = server->ops->check_receive(mid[i], server, 1051 flags & CIFS_LOG_ERROR); 1052 1053 if (resp_iov) { 1054 buf = (char *)mid[i]->resp_buf; 1055 resp_iov[i].iov_base = buf; 1056 resp_iov[i].iov_len = mid[i]->resp_buf_size; 1057 1058 if (mid[i]->large_buf) 1059 resp_buf_type[i] = CIFS_LARGE_BUFFER; 1060 else 1061 resp_buf_type[i] = CIFS_SMALL_BUFFER; 1062 1063 /* mark it so buf will not be freed by delete_mid */ 1064 if ((flags & CIFS_NO_RSP_BUF) == 0) 1065 mid[i]->resp_buf = NULL; 1066 } 1067 } 1068 1069 /* 1070 * Compounding is never used during session establish. 1071 */ 1072 spin_lock(&ses->ses_lock); 1073 if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) { 1074 struct kvec iov = { 1075 .iov_base = resp_iov[0].iov_base, 1076 .iov_len = resp_iov[0].iov_len 1077 }; 1078 spin_unlock(&ses->ses_lock); 1079 cifs_server_lock(server); 1080 smb311_update_preauth_hash(ses, server, &iov, 1); 1081 cifs_server_unlock(server); 1082 spin_lock(&ses->ses_lock); 1083 } 1084 spin_unlock(&ses->ses_lock); 1085 1086 out: 1087 /* 1088 * This will dequeue all mids. After this it is important that the 1089 * demultiplex_thread will not process any of these mids any further. 1090 * This is prevented above by using a noop callback that will not 1091 * wake this thread except for the very last PDU. 1092 */ 1093 for (i = 0; i < num_rqst; i++) { 1094 if (!cancelled_mid[i]) 1095 delete_mid(server, mid[i]); 1096 } 1097 1098 return rc; 1099 } 1100 1101 int 1102 cifs_send_recv(const unsigned int xid, struct cifs_ses *ses, 1103 struct TCP_Server_Info *server, 1104 struct smb_rqst *rqst, int *resp_buf_type, const int flags, 1105 struct kvec *resp_iov) 1106 { 1107 return compound_send_recv(xid, ses, server, flags, 1, 1108 rqst, resp_buf_type, resp_iov); 1109 } 1110 1111 1112 /* 1113 * Discard any remaining data in the current SMB. To do this, we borrow the 1114 * current bigbuf. 1115 */ 1116 int 1117 cifs_discard_remaining_data(struct TCP_Server_Info *server) 1118 { 1119 unsigned int rfclen = server->pdu_size; 1120 size_t remaining = rfclen - server->total_read; 1121 1122 while (remaining > 0) { 1123 ssize_t length; 1124 1125 length = cifs_discard_from_socket(server, 1126 min_t(size_t, remaining, 1127 CIFSMaxBufSize + MAX_HEADER_SIZE(server))); 1128 if (length < 0) 1129 return length; 1130 server->total_read += length; 1131 remaining -= length; 1132 } 1133 1134 return 0; 1135 } 1136 1137 static int 1138 __cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid, 1139 bool malformed) 1140 { 1141 int length; 1142 1143 length = cifs_discard_remaining_data(server); 1144 dequeue_mid(server, mid, malformed); 1145 mid->resp_buf = server->smallbuf; 1146 server->smallbuf = NULL; 1147 return length; 1148 } 1149 1150 static int 1151 cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) 1152 { 1153 struct cifs_io_subrequest *rdata = mid->callback_data; 1154 1155 return __cifs_readv_discard(server, mid, rdata->result); 1156 } 1157 1158 int 1159 cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) 1160 { 1161 int length, len; 1162 unsigned int data_offset, data_len; 1163 struct cifs_io_subrequest *rdata = mid->callback_data; 1164 char *buf = server->smallbuf; 1165 unsigned int buflen = server->pdu_size; 1166 bool use_rdma_mr = false; 1167 1168 cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%zu\n", 1169 __func__, mid->mid, rdata->subreq.start, rdata->subreq.len); 1170 1171 /* 1172 * read the rest of READ_RSP header (sans Data array), or whatever we 1173 * can if there's not enough data. At this point, we've read down to 1174 * the Mid. 1175 */ 1176 len = min_t(unsigned int, buflen, server->vals->read_rsp_size) - 1177 HEADER_SIZE(server) + 1; 1178 1179 length = cifs_read_from_socket(server, 1180 buf + HEADER_SIZE(server) - 1, len); 1181 if (length < 0) 1182 return length; 1183 server->total_read += length; 1184 1185 if (server->ops->is_session_expired && 1186 server->ops->is_session_expired(buf)) { 1187 cifs_reconnect(server, true); 1188 return -1; 1189 } 1190 1191 if (server->ops->is_status_pending && 1192 server->ops->is_status_pending(buf, server)) { 1193 cifs_discard_remaining_data(server); 1194 return -1; 1195 } 1196 1197 /* set up first two iov for signature check and to get credits */ 1198 rdata->iov[0].iov_base = buf; 1199 rdata->iov[0].iov_len = server->total_read; 1200 cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n", 1201 rdata->iov[0].iov_base, rdata->iov[0].iov_len); 1202 1203 /* Was the SMB read successful? */ 1204 rdata->result = server->ops->map_error(buf, false); 1205 if (rdata->result != 0) { 1206 cifs_dbg(FYI, "%s: server returned error %d\n", 1207 __func__, rdata->result); 1208 /* normal error on read response */ 1209 return __cifs_readv_discard(server, mid, false); 1210 } 1211 1212 /* Is there enough to get to the rest of the READ_RSP header? */ 1213 if (server->total_read < server->vals->read_rsp_size) { 1214 cifs_dbg(FYI, "%s: server returned short header. got=%u expected=%zu\n", 1215 __func__, server->total_read, 1216 server->vals->read_rsp_size); 1217 rdata->result = smb_EIO2(smb_eio_trace_read_rsp_short, 1218 server->total_read, server->vals->read_rsp_size); 1219 return cifs_readv_discard(server, mid); 1220 } 1221 1222 data_offset = server->ops->read_data_offset(buf); 1223 if (data_offset < server->total_read) { 1224 /* 1225 * win2k8 sometimes sends an offset of 0 when the read 1226 * is beyond the EOF. Treat it as if the data starts just after 1227 * the header. 1228 */ 1229 cifs_dbg(FYI, "%s: data offset (%u) inside read response header\n", 1230 __func__, data_offset); 1231 data_offset = server->total_read; 1232 } else if (data_offset > MAX_CIFS_SMALL_BUFFER_SIZE) { 1233 /* data_offset is beyond the end of smallbuf */ 1234 cifs_dbg(FYI, "%s: data offset (%u) beyond end of smallbuf\n", 1235 __func__, data_offset); 1236 rdata->result = smb_EIO1(smb_eio_trace_read_overlarge, 1237 data_offset); 1238 return cifs_readv_discard(server, mid); 1239 } 1240 1241 cifs_dbg(FYI, "%s: total_read=%u data_offset=%u\n", 1242 __func__, server->total_read, data_offset); 1243 1244 len = data_offset - server->total_read; 1245 if (len > 0) { 1246 /* read any junk before data into the rest of smallbuf */ 1247 length = cifs_read_from_socket(server, 1248 buf + server->total_read, len); 1249 if (length < 0) 1250 return length; 1251 server->total_read += length; 1252 rdata->iov[0].iov_len = server->total_read; 1253 } 1254 1255 /* how much data is in the response? */ 1256 #ifdef CONFIG_CIFS_SMB_DIRECT 1257 use_rdma_mr = rdata->mr; 1258 #endif 1259 data_len = server->ops->read_data_length(buf, use_rdma_mr); 1260 if (!use_rdma_mr && (data_offset + data_len > buflen)) { 1261 /* data_len is corrupt -- discard frame */ 1262 rdata->result = smb_EIO2(smb_eio_trace_read_rsp_malformed, 1263 data_offset + data_len, buflen); 1264 return cifs_readv_discard(server, mid); 1265 } 1266 1267 #ifdef CONFIG_CIFS_SMB_DIRECT 1268 if (rdata->mr) 1269 length = data_len; /* An RDMA read is already done. */ 1270 else 1271 #endif 1272 length = cifs_read_iter_from_socket(server, &rdata->subreq.io_iter, 1273 data_len); 1274 if (length > 0) 1275 rdata->got_bytes += length; 1276 server->total_read += length; 1277 1278 cifs_dbg(FYI, "total_read=%u buflen=%u remaining=%u\n", 1279 server->total_read, buflen, data_len); 1280 1281 /* discard anything left over */ 1282 if (server->total_read < buflen) 1283 return cifs_readv_discard(server, mid); 1284 1285 dequeue_mid(server, mid, false); 1286 mid->resp_buf = server->smallbuf; 1287 server->smallbuf = NULL; 1288 return length; 1289 } 1290