1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3 *
4 * Copyright (C) International Business Machines Corp., 2002,2008
5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * Jeremy Allison (jra@samba.org) 2006.
7 *
8 */
9
10 #include <linux/fs.h>
11 #include <linux/list.h>
12 #include <linux/gfp.h>
13 #include <linux/wait.h>
14 #include <linux/net.h>
15 #include <linux/delay.h>
16 #include <linux/freezer.h>
17 #include <linux/tcp.h>
18 #include <linux/bvec.h>
19 #include <linux/highmem.h>
20 #include <linux/uaccess.h>
21 #include <linux/processor.h>
22 #include <linux/mempool.h>
23 #include <linux/sched/signal.h>
24 #include <linux/task_io_accounting_ops.h>
25 #include <linux/task_work.h>
26 #include "cifsglob.h"
27 #include "cifsproto.h"
28 #include "cifs_debug.h"
29 #include "smb2proto.h"
30 #include "smbdirect.h"
31 #include "compress.h"
32
33 void
cifs_wake_up_task(struct TCP_Server_Info * server,struct mid_q_entry * mid)34 cifs_wake_up_task(struct TCP_Server_Info *server, struct mid_q_entry *mid)
35 {
36 if (mid->mid_state == MID_RESPONSE_RECEIVED)
37 mid->mid_state = MID_RESPONSE_READY;
38 wake_up_process(mid->callback_data);
39 }
40
__release_mid(struct TCP_Server_Info * server,struct mid_q_entry * midEntry)41 void __release_mid(struct TCP_Server_Info *server, struct mid_q_entry *midEntry)
42 {
43 #ifdef CONFIG_CIFS_STATS2
44 __le16 command = server->vals->lock_cmd;
45 __u16 smb_cmd = le16_to_cpu(midEntry->command);
46 unsigned long now;
47 unsigned long roundtrip_time;
48 #endif
49
50 if (midEntry->resp_buf && (midEntry->wait_cancelled) &&
51 (midEntry->mid_state == MID_RESPONSE_RECEIVED ||
52 midEntry->mid_state == MID_RESPONSE_READY) &&
53 server->ops->handle_cancelled_mid)
54 server->ops->handle_cancelled_mid(midEntry, server);
55
56 midEntry->mid_state = MID_FREE;
57 atomic_dec(&mid_count);
58 if (midEntry->large_buf)
59 cifs_buf_release(midEntry->resp_buf);
60 else
61 cifs_small_buf_release(midEntry->resp_buf);
62 #ifdef CONFIG_CIFS_STATS2
63 now = jiffies;
64 if (now < midEntry->when_alloc)
65 cifs_server_dbg(VFS, "Invalid mid allocation time\n");
66 roundtrip_time = now - midEntry->when_alloc;
67
68 if (smb_cmd < NUMBER_OF_SMB2_COMMANDS) {
69 if (atomic_read(&server->num_cmds[smb_cmd]) == 0) {
70 server->slowest_cmd[smb_cmd] = roundtrip_time;
71 server->fastest_cmd[smb_cmd] = roundtrip_time;
72 } else {
73 if (server->slowest_cmd[smb_cmd] < roundtrip_time)
74 server->slowest_cmd[smb_cmd] = roundtrip_time;
75 else if (server->fastest_cmd[smb_cmd] > roundtrip_time)
76 server->fastest_cmd[smb_cmd] = roundtrip_time;
77 }
78 cifs_stats_inc(&server->num_cmds[smb_cmd]);
79 server->time_per_cmd[smb_cmd] += roundtrip_time;
80 }
81 /*
82 * commands taking longer than one second (default) can be indications
83 * that something is wrong, unless it is quite a slow link or a very
84 * busy server. Note that this calc is unlikely or impossible to wrap
85 * as long as slow_rsp_threshold is not set way above recommended max
86 * value (32767 ie 9 hours) and is generally harmless even if wrong
87 * since only affects debug counters - so leaving the calc as simple
88 * comparison rather than doing multiple conversions and overflow
89 * checks
90 */
91 if ((slow_rsp_threshold != 0) &&
92 time_after(now, midEntry->when_alloc + (slow_rsp_threshold * HZ)) &&
93 (midEntry->command != command)) {
94 /*
95 * smb2slowcmd[NUMBER_OF_SMB2_COMMANDS] counts by command
96 * NB: le16_to_cpu returns unsigned so can not be negative below
97 */
98 if (smb_cmd < NUMBER_OF_SMB2_COMMANDS)
99 cifs_stats_inc(&server->smb2slowcmd[smb_cmd]);
100
101 trace_smb3_slow_rsp(smb_cmd, midEntry->mid, midEntry->pid,
102 midEntry->when_sent, midEntry->when_received);
103 if (cifsFYI & CIFS_TIMER) {
104 pr_debug("slow rsp: cmd %d mid %llu",
105 midEntry->command, midEntry->mid);
106 cifs_info("A: 0x%lx S: 0x%lx R: 0x%lx\n",
107 now - midEntry->when_alloc,
108 now - midEntry->when_sent,
109 now - midEntry->when_received);
110 }
111 }
112 #endif
113 put_task_struct(midEntry->creator);
114
115 mempool_free(midEntry, &cifs_mid_pool);
116 }
117
118 void
delete_mid(struct TCP_Server_Info * server,struct mid_q_entry * mid)119 delete_mid(struct TCP_Server_Info *server, struct mid_q_entry *mid)
120 {
121 spin_lock(&server->mid_queue_lock);
122
123 if (!mid->deleted_from_q) {
124 list_del_init(&mid->qhead);
125 mid->deleted_from_q = true;
126 }
127 spin_unlock(&server->mid_queue_lock);
128
129 release_mid(server, mid);
130 }
131
132 /*
133 * smb_send_kvec - send an array of kvecs to the server
134 * @server: Server to send the data to
135 * @smb_msg: Message to send
136 * @sent: amount of data sent on socket is stored here
137 *
138 * Our basic "send data to server" function. Should be called with srv_mutex
139 * held. The caller is responsible for handling the results.
140 */
141 int
smb_send_kvec(struct TCP_Server_Info * server,struct msghdr * smb_msg,size_t * sent)142 smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg,
143 size_t *sent)
144 {
145 int rc = 0;
146 int retries = 0;
147 struct socket *ssocket = server->ssocket;
148
149 *sent = 0;
150
151 if (server->noblocksnd)
152 smb_msg->msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
153 else
154 smb_msg->msg_flags = MSG_NOSIGNAL;
155
156 while (msg_data_left(smb_msg)) {
157 /*
158 * If blocking send, we try 3 times, since each can block
159 * for 5 seconds. For nonblocking we have to try more
160 * but wait increasing amounts of time allowing time for
161 * socket to clear. The overall time we wait in either
162 * case to send on the socket is about 15 seconds.
163 * Similarly we wait for 15 seconds for a response from
164 * the server in SendReceive[2] for the server to send
165 * a response back for most types of requests (except
166 * SMB Write past end of file which can be slow, and
167 * blocking lock operations). NFS waits slightly longer
168 * than CIFS, but this can make it take longer for
169 * nonresponsive servers to be detected and 15 seconds
170 * is more than enough time for modern networks to
171 * send a packet. In most cases if we fail to send
172 * after the retries we will kill the socket and
173 * reconnect which may clear the network problem.
174 *
175 * Even if regular signals are masked, EINTR might be
176 * propagated from sk_stream_wait_memory() to here when
177 * TIF_NOTIFY_SIGNAL is used for task work. For example,
178 * certain io_uring completions will use that. Treat
179 * having EINTR with pending task work the same as EAGAIN
180 * to avoid unnecessary reconnects.
181 */
182 rc = sock_sendmsg(ssocket, smb_msg);
183 if (rc == -EAGAIN || unlikely(rc == -EINTR && task_work_pending(current))) {
184 retries++;
185 if (retries >= 14 ||
186 (!server->noblocksnd && (retries > 2))) {
187 cifs_server_dbg(VFS, "sends on sock %p stuck for 15 seconds\n",
188 ssocket);
189 return -EAGAIN;
190 }
191 msleep(1 << retries);
192 continue;
193 }
194
195 if (rc < 0)
196 return rc;
197
198 if (rc == 0) {
199 /* should never happen, letting socket clear before
200 retrying is our only obvious option here */
201 cifs_server_dbg(VFS, "tcp sent no data\n");
202 msleep(500);
203 continue;
204 }
205
206 /* send was at least partially successful */
207 *sent += rc;
208 retries = 0; /* in case we get ENOSPC on the next send */
209 }
210 return 0;
211 }
212
213 unsigned long
smb_rqst_len(struct TCP_Server_Info * server,struct smb_rqst * rqst)214 smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst)
215 {
216 unsigned int i;
217 struct kvec *iov;
218 int nvec;
219 unsigned long buflen = 0;
220
221 if (!is_smb1(server) && rqst->rq_nvec >= 2 &&
222 rqst->rq_iov[0].iov_len == 4) {
223 iov = &rqst->rq_iov[1];
224 nvec = rqst->rq_nvec - 1;
225 } else {
226 iov = rqst->rq_iov;
227 nvec = rqst->rq_nvec;
228 }
229
230 /* total up iov array first */
231 for (i = 0; i < nvec; i++)
232 buflen += iov[i].iov_len;
233
234 buflen += iov_iter_count(&rqst->rq_iter);
235 return buflen;
236 }
237
__smb_send_rqst(struct TCP_Server_Info * server,int num_rqst,struct smb_rqst * rqst)238 int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
239 struct smb_rqst *rqst)
240 {
241 int rc;
242 struct kvec *iov;
243 int n_vec;
244 unsigned int send_length = 0;
245 unsigned int i, j;
246 sigset_t mask, oldmask;
247 size_t total_len = 0, sent, size;
248 struct socket *ssocket = server->ssocket;
249 struct msghdr smb_msg = {};
250 __be32 rfc1002_marker;
251
252 cifs_in_send_inc(server);
253 if (cifs_rdma_enabled(server)) {
254 /* return -EAGAIN when connecting or reconnecting */
255 rc = -EAGAIN;
256 if (server->smbd_conn)
257 rc = smbd_send(server, num_rqst, rqst);
258 goto smbd_done;
259 }
260
261 rc = -EAGAIN;
262 if (ssocket == NULL)
263 goto out;
264
265 rc = -ERESTARTSYS;
266 if (fatal_signal_pending(current)) {
267 cifs_dbg(FYI, "signal pending before send request\n");
268 goto out;
269 }
270
271 rc = 0;
272 /* cork the socket */
273 tcp_sock_set_cork(ssocket->sk, true);
274
275 for (j = 0; j < num_rqst; j++)
276 send_length += smb_rqst_len(server, &rqst[j]);
277 rfc1002_marker = cpu_to_be32(send_length);
278
279 /*
280 * We should not allow signals to interrupt the network send because
281 * any partial send will cause session reconnects thus increasing
282 * latency of system calls and overload a server with unnecessary
283 * requests.
284 */
285
286 sigfillset(&mask);
287 sigprocmask(SIG_BLOCK, &mask, &oldmask);
288
289 /* Generate a rfc1002 marker */
290 {
291 struct kvec hiov = {
292 .iov_base = &rfc1002_marker,
293 .iov_len = 4
294 };
295 iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, &hiov, 1, 4);
296 rc = smb_send_kvec(server, &smb_msg, &sent);
297 if (rc < 0)
298 goto unmask;
299
300 total_len += sent;
301 send_length += 4;
302 }
303
304 cifs_dbg(FYI, "Sending smb: smb_len=%u\n", send_length);
305
306 for (j = 0; j < num_rqst; j++) {
307 iov = rqst[j].rq_iov;
308 n_vec = rqst[j].rq_nvec;
309
310 size = 0;
311 for (i = 0; i < n_vec; i++) {
312 dump_smb(iov[i].iov_base, iov[i].iov_len);
313 size += iov[i].iov_len;
314 }
315
316 iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, iov, n_vec, size);
317
318 rc = smb_send_kvec(server, &smb_msg, &sent);
319 if (rc < 0)
320 goto unmask;
321
322 total_len += sent;
323
324 if (iov_iter_count(&rqst[j].rq_iter) > 0) {
325 smb_msg.msg_iter = rqst[j].rq_iter;
326 rc = smb_send_kvec(server, &smb_msg, &sent);
327 if (rc < 0)
328 break;
329 total_len += sent;
330 }
331 }
332
333 unmask:
334 sigprocmask(SIG_SETMASK, &oldmask, NULL);
335
336 /*
337 * If signal is pending but we have already sent the whole packet to
338 * the server we need to return success status to allow a corresponding
339 * mid entry to be kept in the pending requests queue thus allowing
340 * to handle responses from the server by the client.
341 *
342 * If only part of the packet has been sent there is no need to hide
343 * interrupt because the session will be reconnected anyway, so there
344 * won't be any response from the server to handle.
345 */
346
347 if (signal_pending(current) && (total_len != send_length)) {
348 cifs_dbg(FYI, "signal is pending after attempt to send\n");
349 rc = -ERESTARTSYS;
350 }
351
352 /* uncork it */
353 tcp_sock_set_cork(ssocket->sk, false);
354
355 if ((total_len > 0) && (total_len != send_length)) {
356 cifs_dbg(FYI, "partial send (wanted=%u sent=%zu): terminating session\n",
357 send_length, total_len);
358 /*
359 * If we have only sent part of an SMB then the next SMB could
360 * be taken as the remainder of this one. We need to kill the
361 * socket so the server throws away the partial SMB
362 */
363 cifs_signal_cifsd_for_reconnect(server, false);
364 trace_smb3_partial_send_reconnect(server->current_mid,
365 server->conn_id, server->hostname);
366 }
367 smbd_done:
368 /*
369 * there's hardly any use for the layers above to know the
370 * actual error code here. All they should do at this point is
371 * to retry the connection and hope it goes away.
372 */
373 if (rc < 0 && rc != -EINTR && rc != -EAGAIN) {
374 cifs_server_dbg(VFS, "Error %d sending data on socket to server\n",
375 rc);
376 rc = -ECONNABORTED;
377 cifs_signal_cifsd_for_reconnect(server, false);
378 } else if (rc > 0)
379 rc = 0;
380 out:
381 cifs_in_send_dec(server);
382 return rc;
383 }
384
385 static int
smb_send_rqst(struct TCP_Server_Info * server,int num_rqst,struct smb_rqst * rqst,int flags)386 smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
387 struct smb_rqst *rqst, int flags)
388 {
389 struct smb2_transform_hdr tr_hdr;
390 struct smb_rqst new_rqst[MAX_COMPOUND] = {};
391 struct kvec iov = {
392 .iov_base = &tr_hdr,
393 .iov_len = sizeof(tr_hdr),
394 };
395 int rc;
396
397 if (flags & CIFS_COMPRESS_REQ)
398 return smb_compress(server, &rqst[0], __smb_send_rqst);
399
400 if (!(flags & CIFS_TRANSFORM_REQ))
401 return __smb_send_rqst(server, num_rqst, rqst);
402
403 if (WARN_ON_ONCE(num_rqst > MAX_COMPOUND - 1))
404 return smb_EIO1(smb_eio_trace_tx_max_compound, num_rqst);
405
406 if (!server->ops->init_transform_rq) {
407 cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n");
408 return smb_EIO(smb_eio_trace_tx_need_transform);
409 }
410
411 new_rqst[0].rq_iov = &iov;
412 new_rqst[0].rq_nvec = 1;
413
414 rc = server->ops->init_transform_rq(server, num_rqst + 1,
415 new_rqst, rqst);
416 if (!rc) {
417 rc = __smb_send_rqst(server, num_rqst + 1, new_rqst);
418 smb3_free_compound_rqst(num_rqst, &new_rqst[1]);
419 }
420 return rc;
421 }
422
423 static int
wait_for_free_credits(struct TCP_Server_Info * server,const int num_credits,const int timeout,const int flags,unsigned int * instance)424 wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
425 const int timeout, const int flags,
426 unsigned int *instance)
427 {
428 long rc;
429 int *credits;
430 int optype;
431 long int t;
432 int scredits, in_flight;
433
434 if (timeout < 0)
435 t = MAX_JIFFY_OFFSET;
436 else
437 t = msecs_to_jiffies(timeout);
438
439 optype = flags & CIFS_OP_MASK;
440
441 *instance = 0;
442
443 credits = server->ops->get_credits_field(server, optype);
444 /* Since an echo is already inflight, no need to wait to send another */
445 if (*credits <= 0 && optype == CIFS_ECHO_OP)
446 return -EAGAIN;
447
448 spin_lock(&server->req_lock);
449 if ((flags & CIFS_TIMEOUT_MASK) == CIFS_NON_BLOCKING) {
450 /* oplock breaks must not be held up */
451 server->in_flight++;
452 if (server->in_flight > server->max_in_flight)
453 server->max_in_flight = server->in_flight;
454 *credits -= 1;
455 *instance = server->reconnect_instance;
456 scredits = *credits;
457 in_flight = server->in_flight;
458 spin_unlock(&server->req_lock);
459
460 trace_smb3_nblk_credits(server->current_mid,
461 server->conn_id, server->hostname, scredits, -1, in_flight);
462 cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
463 __func__, 1, scredits);
464
465 return 0;
466 }
467
468 while (1) {
469 spin_unlock(&server->req_lock);
470
471 spin_lock(&server->srv_lock);
472 if (server->tcpStatus == CifsExiting) {
473 spin_unlock(&server->srv_lock);
474 return -ENOENT;
475 }
476 spin_unlock(&server->srv_lock);
477
478 spin_lock(&server->req_lock);
479 if (*credits < num_credits) {
480 scredits = *credits;
481 spin_unlock(&server->req_lock);
482
483 cifs_num_waiters_inc(server);
484 rc = wait_event_killable_timeout(server->request_q,
485 has_credits(server, credits, num_credits), t);
486 cifs_num_waiters_dec(server);
487 if (!rc) {
488 spin_lock(&server->req_lock);
489 scredits = *credits;
490 in_flight = server->in_flight;
491 spin_unlock(&server->req_lock);
492
493 trace_smb3_credit_timeout(server->current_mid,
494 server->conn_id, server->hostname, scredits,
495 num_credits, in_flight);
496 cifs_server_dbg(VFS, "wait timed out after %d ms\n",
497 timeout);
498 return -EBUSY;
499 }
500 if (rc == -ERESTARTSYS)
501 return -ERESTARTSYS;
502 spin_lock(&server->req_lock);
503 } else {
504 /*
505 * For normal commands, reserve the last MAX_COMPOUND
506 * credits to compound requests.
507 * Otherwise these compounds could be permanently
508 * starved for credits by single-credit requests.
509 *
510 * To prevent spinning CPU, block this thread until
511 * there are >MAX_COMPOUND credits available.
512 * But only do this is we already have a lot of
513 * credits in flight to avoid triggering this check
514 * for servers that are slow to hand out credits on
515 * new sessions.
516 */
517 if (!optype && num_credits == 1 &&
518 server->in_flight > 2 * MAX_COMPOUND &&
519 *credits <= MAX_COMPOUND) {
520 spin_unlock(&server->req_lock);
521
522 cifs_num_waiters_inc(server);
523 rc = wait_event_killable_timeout(
524 server->request_q,
525 has_credits(server, credits,
526 MAX_COMPOUND + 1),
527 t);
528 cifs_num_waiters_dec(server);
529 if (!rc) {
530 spin_lock(&server->req_lock);
531 scredits = *credits;
532 in_flight = server->in_flight;
533 spin_unlock(&server->req_lock);
534
535 trace_smb3_credit_timeout(
536 server->current_mid,
537 server->conn_id, server->hostname,
538 scredits, num_credits, in_flight);
539 cifs_server_dbg(VFS, "wait timed out after %d ms\n",
540 timeout);
541 return -EBUSY;
542 }
543 if (rc == -ERESTARTSYS)
544 return -ERESTARTSYS;
545 spin_lock(&server->req_lock);
546 continue;
547 }
548
549 /*
550 * Can not count locking commands against total
551 * as they are allowed to block on server.
552 */
553
554 /* update # of requests on the wire to server */
555 if ((flags & CIFS_TIMEOUT_MASK) != CIFS_BLOCKING_OP) {
556 *credits -= num_credits;
557 server->in_flight += num_credits;
558 if (server->in_flight > server->max_in_flight)
559 server->max_in_flight = server->in_flight;
560 *instance = server->reconnect_instance;
561 }
562 scredits = *credits;
563 in_flight = server->in_flight;
564 spin_unlock(&server->req_lock);
565
566 trace_smb3_waitff_credits(server->current_mid,
567 server->conn_id, server->hostname, scredits,
568 -(num_credits), in_flight);
569 cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
570 __func__, num_credits, scredits);
571 break;
572 }
573 }
574 return 0;
575 }
576
wait_for_free_request(struct TCP_Server_Info * server,const int flags,unsigned int * instance)577 int wait_for_free_request(struct TCP_Server_Info *server, const int flags,
578 unsigned int *instance)
579 {
580 return wait_for_free_credits(server, 1, -1, flags,
581 instance);
582 }
583
584 static int
wait_for_compound_request(struct TCP_Server_Info * server,int num,const int flags,unsigned int * instance)585 wait_for_compound_request(struct TCP_Server_Info *server, int num,
586 const int flags, unsigned int *instance)
587 {
588 int *credits;
589 int scredits, in_flight;
590
591 credits = server->ops->get_credits_field(server, flags & CIFS_OP_MASK);
592
593 spin_lock(&server->req_lock);
594 scredits = *credits;
595 in_flight = server->in_flight;
596
597 if (*credits < num) {
598 /*
599 * If the server is tight on resources or just gives us less
600 * credits for other reasons (e.g. requests are coming out of
601 * order and the server delays granting more credits until it
602 * processes a missing mid) and we exhausted most available
603 * credits there may be situations when we try to send
604 * a compound request but we don't have enough credits. At this
605 * point the client needs to decide if it should wait for
606 * additional credits or fail the request. If at least one
607 * request is in flight there is a high probability that the
608 * server will return enough credits to satisfy this compound
609 * request.
610 *
611 * Return immediately if no requests in flight since we will be
612 * stuck on waiting for credits.
613 */
614 if (server->in_flight == 0) {
615 spin_unlock(&server->req_lock);
616 trace_smb3_insufficient_credits(server->current_mid,
617 server->conn_id, server->hostname, scredits,
618 num, in_flight);
619 cifs_dbg(FYI, "%s: %d requests in flight, needed %d total=%d\n",
620 __func__, in_flight, num, scredits);
621 return -EDEADLK;
622 }
623 }
624 spin_unlock(&server->req_lock);
625
626 return wait_for_free_credits(server, num, 60000, flags,
627 instance);
628 }
629
630 int
cifs_wait_mtu_credits(struct TCP_Server_Info * server,size_t size,size_t * num,struct cifs_credits * credits)631 cifs_wait_mtu_credits(struct TCP_Server_Info *server, size_t size,
632 size_t *num, struct cifs_credits *credits)
633 {
634 *num = size;
635 credits->value = 0;
636 credits->instance = server->reconnect_instance;
637 return 0;
638 }
639
wait_for_response(struct TCP_Server_Info * server,struct mid_q_entry * mid)640 int wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *mid)
641 {
642 unsigned int sleep_state = TASK_KILLABLE;
643 int error;
644
645 if (mid->sr_flags & CIFS_INTERRUPTIBLE_WAIT)
646 sleep_state = TASK_INTERRUPTIBLE;
647
648 error = wait_event_state(server->response_q,
649 mid->mid_state != MID_REQUEST_SUBMITTED &&
650 mid->mid_state != MID_RESPONSE_RECEIVED,
651 (sleep_state | TASK_FREEZABLE_UNSAFE));
652 if (error < 0)
653 return -ERESTARTSYS;
654
655 return 0;
656 }
657
658 /*
659 * Send a SMB request and set the callback function in the mid to handle
660 * the result. Caller is responsible for dealing with timeouts.
661 */
662 int
cifs_call_async(struct TCP_Server_Info * server,struct smb_rqst * rqst,mid_receive_t receive,mid_callback_t callback,mid_handle_t handle,void * cbdata,const int flags,const struct cifs_credits * exist_credits)663 cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
664 mid_receive_t receive, mid_callback_t callback,
665 mid_handle_t handle, void *cbdata, const int flags,
666 const struct cifs_credits *exist_credits)
667 {
668 int rc;
669 struct mid_q_entry *mid;
670 struct cifs_credits credits = { .value = 0, .instance = 0 };
671 unsigned int instance;
672 int optype;
673
674 optype = flags & CIFS_OP_MASK;
675
676 if ((flags & CIFS_HAS_CREDITS) == 0) {
677 rc = wait_for_free_request(server, flags, &instance);
678 if (rc)
679 return rc;
680 credits.value = 1;
681 credits.instance = instance;
682 } else
683 instance = exist_credits->instance;
684
685 cifs_server_lock(server);
686
687 /*
688 * We can't use credits obtained from the previous session to send this
689 * request. Check if there were reconnects after we obtained credits and
690 * return -EAGAIN in such cases to let callers handle it.
691 */
692 if (instance != server->reconnect_instance) {
693 cifs_server_unlock(server);
694 add_credits_and_wake_if(server, &credits, optype);
695 return -EAGAIN;
696 }
697
698 mid = server->ops->setup_async_request(server, rqst);
699 if (IS_ERR(mid)) {
700 cifs_server_unlock(server);
701 add_credits_and_wake_if(server, &credits, optype);
702 return PTR_ERR(mid);
703 }
704
705 mid->sr_flags = flags;
706 mid->receive = receive;
707 mid->callback = callback;
708 mid->callback_data = cbdata;
709 mid->handle = handle;
710 mid->mid_state = MID_REQUEST_SUBMITTED;
711
712 /* put it on the pending_mid_q */
713 spin_lock(&server->mid_queue_lock);
714 list_add_tail(&mid->qhead, &server->pending_mid_q);
715 spin_unlock(&server->mid_queue_lock);
716
717 /*
718 * Need to store the time in mid before calling I/O. For call_async,
719 * I/O response may come back and free the mid entry on another thread.
720 */
721 cifs_save_when_sent(mid);
722 rc = smb_send_rqst(server, 1, rqst, flags);
723
724 if (rc < 0) {
725 revert_current_mid(server, mid->credits);
726 server->sequence_number -= 2;
727 delete_mid(server, mid);
728 }
729
730 cifs_server_unlock(server);
731
732 if (rc == 0)
733 return 0;
734
735 add_credits_and_wake_if(server, &credits, optype);
736 return rc;
737 }
738
cifs_sync_mid_result(struct mid_q_entry * mid,struct TCP_Server_Info * server)739 int cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
740 {
741 int rc = 0;
742
743 cifs_dbg(FYI, "%s: cmd=%d mid=%llu state=%d\n",
744 __func__, le16_to_cpu(mid->command), mid->mid, mid->mid_state);
745
746 spin_lock(&server->mid_queue_lock);
747 switch (mid->mid_state) {
748 case MID_RESPONSE_READY:
749 spin_unlock(&server->mid_queue_lock);
750 return rc;
751 case MID_RETRY_NEEDED:
752 rc = -EAGAIN;
753 break;
754 case MID_RESPONSE_MALFORMED:
755 rc = smb_EIO(smb_eio_trace_rx_sync_mid_malformed);
756 break;
757 case MID_SHUTDOWN:
758 rc = -EHOSTDOWN;
759 break;
760 case MID_RC:
761 rc = mid->mid_rc;
762 break;
763 default:
764 if (mid->deleted_from_q == false) {
765 list_del_init(&mid->qhead);
766 mid->deleted_from_q = true;
767 }
768 spin_unlock(&server->mid_queue_lock);
769 cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n",
770 __func__, mid->mid, mid->mid_state);
771 rc = smb_EIO1(smb_eio_trace_rx_sync_mid_invalid, mid->mid_state);
772 goto sync_mid_done;
773 }
774 spin_unlock(&server->mid_queue_lock);
775
776 sync_mid_done:
777 release_mid(server, mid);
778 return rc;
779 }
780
781 static void
cifs_compound_callback(struct TCP_Server_Info * server,struct mid_q_entry * mid)782 cifs_compound_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
783 {
784 struct cifs_credits credits = {
785 .value = server->ops->get_credits(mid),
786 .instance = server->reconnect_instance,
787 };
788
789 add_credits(server, &credits, mid->optype);
790
791 if (mid->mid_state == MID_RESPONSE_RECEIVED)
792 mid->mid_state = MID_RESPONSE_READY;
793 }
794
795 static void
cifs_compound_last_callback(struct TCP_Server_Info * server,struct mid_q_entry * mid)796 cifs_compound_last_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
797 {
798 cifs_compound_callback(server, mid);
799 cifs_wake_up_task(server, mid);
800 }
801
802 static void
cifs_cancelled_callback(struct TCP_Server_Info * server,struct mid_q_entry * mid)803 cifs_cancelled_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
804 {
805 cifs_compound_callback(server, mid);
806 release_mid(server, mid);
807 }
808
809 /*
810 * cifs_pick_channel - pick an eligible channel for network operations
811 *
812 * @ses: session reference
813 *
814 * Select an eligible channel (not terminating and not marked as needing
815 * reconnect), preferring the least loaded one. If no eligible channel is
816 * found, fall back to the primary channel (index 0).
817 *
818 * Return: TCP_Server_Info pointer for the chosen channel, or NULL if @ses is
819 * NULL.
820 */
cifs_pick_channel(struct cifs_ses * ses)821 struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
822 {
823 uint index = 0;
824 unsigned int min_in_flight = UINT_MAX;
825 struct TCP_Server_Info *server = NULL;
826 int i, start, cur;
827
828 if (!ses)
829 return NULL;
830
831 spin_lock(&ses->chan_lock);
832 start = atomic_inc_return(&ses->chan_seq);
833 for (i = 0; i < ses->chan_count; i++) {
834 cur = (start + i) % ses->chan_count;
835 server = ses->chans[cur].server;
836 if (!server || server->terminate)
837 continue;
838
839 if (CIFS_CHAN_NEEDS_RECONNECT(ses, cur))
840 continue;
841
842 /*
843 * strictly speaking, we should pick up req_lock to read
844 * server->in_flight. But it shouldn't matter much here if we
845 * race while reading this data. The worst that can happen is
846 * that we could use a channel that's not least loaded. Avoiding
847 * taking the lock could help reduce wait time, which is
848 * important for this function
849 */
850 if (server->in_flight < min_in_flight) {
851 min_in_flight = server->in_flight;
852 index = cur;
853 }
854 }
855
856 server = ses->chans[index].server;
857 spin_unlock(&ses->chan_lock);
858
859 return server;
860 }
861
862 int
compound_send_recv(const unsigned int xid,struct cifs_ses * ses,struct TCP_Server_Info * server,const int flags,const int num_rqst,struct smb_rqst * rqst,int * resp_buf_type,struct kvec * resp_iov)863 compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
864 struct TCP_Server_Info *server,
865 const int flags, const int num_rqst, struct smb_rqst *rqst,
866 int *resp_buf_type, struct kvec *resp_iov)
867 {
868 int i, j, optype, rc = 0;
869 struct mid_q_entry *mid[MAX_COMPOUND];
870 bool cancelled_mid[MAX_COMPOUND] = {false};
871 struct cifs_credits credits[MAX_COMPOUND] = {
872 { .value = 0, .instance = 0 }
873 };
874 unsigned int instance;
875 char *buf;
876
877 optype = flags & CIFS_OP_MASK;
878
879 for (i = 0; i < num_rqst; i++)
880 resp_buf_type[i] = CIFS_NO_BUFFER; /* no response buf yet */
881
882 if (!ses || !ses->server || !server) {
883 cifs_dbg(VFS, "Null session\n");
884 return smb_EIO(smb_eio_trace_null_pointers);
885 }
886
887 spin_lock(&server->srv_lock);
888 if (server->tcpStatus == CifsExiting) {
889 spin_unlock(&server->srv_lock);
890 return -ENOENT;
891 }
892 spin_unlock(&server->srv_lock);
893
894 /*
895 * Wait for all the requests to become available.
896 * This approach still leaves the possibility to be stuck waiting for
897 * credits if the server doesn't grant credits to the outstanding
898 * requests and if the client is completely idle, not generating any
899 * other requests.
900 * This can be handled by the eventual session reconnect.
901 */
902 rc = wait_for_compound_request(server, num_rqst, flags,
903 &instance);
904 if (rc)
905 return rc;
906
907 for (i = 0; i < num_rqst; i++) {
908 credits[i].value = 1;
909 credits[i].instance = instance;
910 }
911
912 /*
913 * Make sure that we sign in the same order that we send on this socket
914 * and avoid races inside tcp sendmsg code that could cause corruption
915 * of smb data.
916 */
917
918 cifs_server_lock(server);
919
920 /*
921 * All the parts of the compound chain belong obtained credits from the
922 * same session. We can not use credits obtained from the previous
923 * session to send this request. Check if there were reconnects after
924 * we obtained credits and return -EAGAIN in such cases to let callers
925 * handle it.
926 */
927 if (instance != server->reconnect_instance) {
928 cifs_server_unlock(server);
929 for (j = 0; j < num_rqst; j++)
930 add_credits(server, &credits[j], optype);
931 return -EAGAIN;
932 }
933
934 for (i = 0; i < num_rqst; i++) {
935 mid[i] = server->ops->setup_request(ses, server, &rqst[i]);
936 if (IS_ERR(mid[i])) {
937 revert_current_mid(server, i);
938 for (j = 0; j < i; j++)
939 delete_mid(server, mid[j]);
940 cifs_server_unlock(server);
941
942 /* Update # of requests on wire to server */
943 for (j = 0; j < num_rqst; j++)
944 add_credits(server, &credits[j], optype);
945 return PTR_ERR(mid[i]);
946 }
947
948 mid[i]->sr_flags = flags;
949 mid[i]->mid_state = MID_REQUEST_SUBMITTED;
950 mid[i]->optype = optype;
951 /*
952 * Invoke callback for every part of the compound chain
953 * to calculate credits properly. Wake up this thread only when
954 * the last element is received.
955 */
956 if (i < num_rqst - 1)
957 mid[i]->callback = cifs_compound_callback;
958 else
959 mid[i]->callback = cifs_compound_last_callback;
960 }
961 rc = smb_send_rqst(server, num_rqst, rqst, flags);
962
963 for (i = 0; i < num_rqst; i++)
964 cifs_save_when_sent(mid[i]);
965
966 if (rc < 0) {
967 revert_current_mid(server, num_rqst);
968 server->sequence_number -= 2;
969 }
970
971 cifs_server_unlock(server);
972
973 /*
974 * If sending failed for some reason or it is an oplock break that we
975 * will not receive a response to - return credits back
976 */
977 if (rc < 0 || (flags & CIFS_NO_SRV_RSP)) {
978 for (i = 0; i < num_rqst; i++)
979 add_credits(server, &credits[i], optype);
980 goto out;
981 }
982
983 /*
984 * At this point the request is passed to the network stack - we assume
985 * that any credits taken from the server structure on the client have
986 * been spent and we can't return them back. Once we receive responses
987 * we will collect credits granted by the server in the mid callbacks
988 * and add those credits to the server structure.
989 */
990
991 /*
992 * Compounding is never used during session establish.
993 */
994 spin_lock(&ses->ses_lock);
995 if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
996 spin_unlock(&ses->ses_lock);
997
998 if (WARN_ON_ONCE(num_rqst != 1 || !resp_iov))
999 return -EINVAL;
1000
1001 cifs_server_lock(server);
1002 smb311_update_preauth_hash(ses, server, rqst[0].rq_iov, rqst[0].rq_nvec);
1003 cifs_server_unlock(server);
1004
1005 spin_lock(&ses->ses_lock);
1006 }
1007 spin_unlock(&ses->ses_lock);
1008
1009 for (i = 0; i < num_rqst; i++) {
1010 rc = wait_for_response(server, mid[i]);
1011 if (rc != 0)
1012 break;
1013 }
1014 if (rc != 0) {
1015 for (; i < num_rqst; i++) {
1016 cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n",
1017 mid[i]->mid, le16_to_cpu(mid[i]->command));
1018 send_cancel(ses, server, &rqst[i], mid[i], xid);
1019 spin_lock(&mid[i]->mid_lock);
1020 mid[i]->wait_cancelled = true;
1021 if (mid[i]->mid_state == MID_REQUEST_SUBMITTED ||
1022 mid[i]->mid_state == MID_RESPONSE_RECEIVED) {
1023 mid[i]->callback = cifs_cancelled_callback;
1024 cancelled_mid[i] = true;
1025 credits[i].value = 0;
1026 }
1027 spin_unlock(&mid[i]->mid_lock);
1028 }
1029 }
1030
1031 for (i = 0; i < num_rqst; i++) {
1032 if (rc < 0)
1033 goto out;
1034
1035 rc = cifs_sync_mid_result(mid[i], server);
1036 if (rc != 0) {
1037 /* mark this mid as cancelled to not free it below */
1038 cancelled_mid[i] = true;
1039 goto out;
1040 }
1041
1042 if (!mid[i]->resp_buf ||
1043 mid[i]->mid_state != MID_RESPONSE_READY) {
1044 rc = smb_EIO1(smb_eio_trace_rx_mid_unready, mid[i]->mid_state);
1045 cifs_dbg(FYI, "Bad MID state?\n");
1046 goto out;
1047 }
1048
1049 rc = server->ops->check_receive(mid[i], server,
1050 flags & CIFS_LOG_ERROR);
1051
1052 if (resp_iov) {
1053 buf = (char *)mid[i]->resp_buf;
1054 resp_iov[i].iov_base = buf;
1055 resp_iov[i].iov_len = mid[i]->resp_buf_size;
1056
1057 if (mid[i]->large_buf)
1058 resp_buf_type[i] = CIFS_LARGE_BUFFER;
1059 else
1060 resp_buf_type[i] = CIFS_SMALL_BUFFER;
1061
1062 /* mark it so buf will not be freed by delete_mid */
1063 if ((flags & CIFS_NO_RSP_BUF) == 0)
1064 mid[i]->resp_buf = NULL;
1065 }
1066 }
1067
1068 /*
1069 * Compounding is never used during session establish.
1070 */
1071 spin_lock(&ses->ses_lock);
1072 if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
1073 struct kvec iov = {
1074 .iov_base = resp_iov[0].iov_base,
1075 .iov_len = resp_iov[0].iov_len
1076 };
1077 spin_unlock(&ses->ses_lock);
1078 cifs_server_lock(server);
1079 smb311_update_preauth_hash(ses, server, &iov, 1);
1080 cifs_server_unlock(server);
1081 spin_lock(&ses->ses_lock);
1082 }
1083 spin_unlock(&ses->ses_lock);
1084
1085 out:
1086 /*
1087 * This will dequeue all mids. After this it is important that the
1088 * demultiplex_thread will not process any of these mids any further.
1089 * This is prevented above by using a noop callback that will not
1090 * wake this thread except for the very last PDU.
1091 */
1092 for (i = 0; i < num_rqst; i++) {
1093 if (!cancelled_mid[i])
1094 delete_mid(server, mid[i]);
1095 }
1096
1097 return rc;
1098 }
1099
1100 int
cifs_send_recv(const unsigned int xid,struct cifs_ses * ses,struct TCP_Server_Info * server,struct smb_rqst * rqst,int * resp_buf_type,const int flags,struct kvec * resp_iov)1101 cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
1102 struct TCP_Server_Info *server,
1103 struct smb_rqst *rqst, int *resp_buf_type, const int flags,
1104 struct kvec *resp_iov)
1105 {
1106 return compound_send_recv(xid, ses, server, flags, 1,
1107 rqst, resp_buf_type, resp_iov);
1108 }
1109
1110
1111 /*
1112 * Discard any remaining data in the current SMB. To do this, we borrow the
1113 * current bigbuf.
1114 */
1115 int
cifs_discard_remaining_data(struct TCP_Server_Info * server)1116 cifs_discard_remaining_data(struct TCP_Server_Info *server)
1117 {
1118 unsigned int rfclen = server->pdu_size;
1119 size_t remaining = rfclen - server->total_read;
1120
1121 while (remaining > 0) {
1122 ssize_t length;
1123
1124 length = cifs_discard_from_socket(server,
1125 min_t(size_t, remaining,
1126 CIFSMaxBufSize + MAX_HEADER_SIZE(server)));
1127 if (length < 0)
1128 return length;
1129 server->total_read += length;
1130 remaining -= length;
1131 }
1132
1133 return 0;
1134 }
1135
1136 static int
__cifs_readv_discard(struct TCP_Server_Info * server,struct mid_q_entry * mid,bool malformed)1137 __cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid,
1138 bool malformed)
1139 {
1140 int length;
1141
1142 length = cifs_discard_remaining_data(server);
1143 dequeue_mid(server, mid, malformed);
1144 mid->resp_buf = server->smallbuf;
1145 server->smallbuf = NULL;
1146 return length;
1147 }
1148
1149 static int
cifs_readv_discard(struct TCP_Server_Info * server,struct mid_q_entry * mid)1150 cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1151 {
1152 struct cifs_io_subrequest *rdata = mid->callback_data;
1153
1154 return __cifs_readv_discard(server, mid, rdata->result);
1155 }
1156
1157 int
cifs_readv_receive(struct TCP_Server_Info * server,struct mid_q_entry * mid)1158 cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1159 {
1160 int length, len;
1161 unsigned int data_offset, data_len;
1162 struct cifs_io_subrequest *rdata = mid->callback_data;
1163 char *buf = server->smallbuf;
1164 unsigned int buflen = server->pdu_size;
1165 bool use_rdma_mr = false;
1166
1167 cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%zu\n",
1168 __func__, mid->mid, rdata->subreq.start, rdata->subreq.len);
1169
1170 /*
1171 * read the rest of READ_RSP header (sans Data array), or whatever we
1172 * can if there's not enough data. At this point, we've read down to
1173 * the Mid.
1174 */
1175 len = min_t(unsigned int, buflen, server->vals->read_rsp_size) -
1176 HEADER_SIZE(server) + 1;
1177
1178 length = cifs_read_from_socket(server,
1179 buf + HEADER_SIZE(server) - 1, len);
1180 if (length < 0)
1181 return length;
1182 server->total_read += length;
1183
1184 if (server->ops->is_session_expired &&
1185 server->ops->is_session_expired(buf)) {
1186 cifs_reconnect(server, true);
1187 return -1;
1188 }
1189
1190 if (server->ops->is_status_pending &&
1191 server->ops->is_status_pending(buf, server)) {
1192 cifs_discard_remaining_data(server);
1193 return -1;
1194 }
1195
1196 /* set up first two iov for signature check and to get credits */
1197 rdata->iov[0].iov_base = buf;
1198 rdata->iov[0].iov_len = server->total_read;
1199 cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
1200 rdata->iov[0].iov_base, rdata->iov[0].iov_len);
1201
1202 /* Was the SMB read successful? */
1203 rdata->result = server->ops->map_error(buf, false);
1204 if (rdata->result != 0) {
1205 cifs_dbg(FYI, "%s: server returned error %d\n",
1206 __func__, rdata->result);
1207 /* normal error on read response */
1208 return __cifs_readv_discard(server, mid, false);
1209 }
1210
1211 /* Is there enough to get to the rest of the READ_RSP header? */
1212 if (server->total_read < server->vals->read_rsp_size) {
1213 cifs_dbg(FYI, "%s: server returned short header. got=%u expected=%zu\n",
1214 __func__, server->total_read,
1215 server->vals->read_rsp_size);
1216 rdata->result = smb_EIO2(smb_eio_trace_read_rsp_short,
1217 server->total_read, server->vals->read_rsp_size);
1218 return cifs_readv_discard(server, mid);
1219 }
1220
1221 data_offset = server->ops->read_data_offset(buf);
1222 if (data_offset < server->total_read) {
1223 /*
1224 * win2k8 sometimes sends an offset of 0 when the read
1225 * is beyond the EOF. Treat it as if the data starts just after
1226 * the header.
1227 */
1228 cifs_dbg(FYI, "%s: data offset (%u) inside read response header\n",
1229 __func__, data_offset);
1230 data_offset = server->total_read;
1231 } else if (data_offset > MAX_CIFS_SMALL_BUFFER_SIZE) {
1232 /* data_offset is beyond the end of smallbuf */
1233 cifs_dbg(FYI, "%s: data offset (%u) beyond end of smallbuf\n",
1234 __func__, data_offset);
1235 rdata->result = smb_EIO1(smb_eio_trace_read_overlarge,
1236 data_offset);
1237 return cifs_readv_discard(server, mid);
1238 }
1239
1240 cifs_dbg(FYI, "%s: total_read=%u data_offset=%u\n",
1241 __func__, server->total_read, data_offset);
1242
1243 len = data_offset - server->total_read;
1244 if (len > 0) {
1245 /* read any junk before data into the rest of smallbuf */
1246 length = cifs_read_from_socket(server,
1247 buf + server->total_read, len);
1248 if (length < 0)
1249 return length;
1250 server->total_read += length;
1251 rdata->iov[0].iov_len = server->total_read;
1252 }
1253
1254 /* how much data is in the response? */
1255 #ifdef CONFIG_CIFS_SMB_DIRECT
1256 use_rdma_mr = rdata->mr;
1257 #endif
1258 data_len = server->ops->read_data_length(buf, use_rdma_mr);
1259 if (!use_rdma_mr && (data_offset + data_len > buflen)) {
1260 /* data_len is corrupt -- discard frame */
1261 rdata->result = smb_EIO2(smb_eio_trace_read_rsp_malformed,
1262 data_offset + data_len, buflen);
1263 return cifs_readv_discard(server, mid);
1264 }
1265
1266 #ifdef CONFIG_CIFS_SMB_DIRECT
1267 if (rdata->mr)
1268 length = data_len; /* An RDMA read is already done. */
1269 else
1270 #endif
1271 length = cifs_read_iter_from_socket(server, &rdata->subreq.io_iter,
1272 data_len);
1273 if (length > 0)
1274 rdata->got_bytes += length;
1275 server->total_read += length;
1276
1277 cifs_dbg(FYI, "total_read=%u buflen=%u remaining=%u\n",
1278 server->total_read, buflen, data_len);
1279
1280 /* discard anything left over */
1281 if (server->total_read < buflen)
1282 return cifs_readv_discard(server, mid);
1283
1284 dequeue_mid(server, mid, false);
1285 mid->resp_buf = server->smallbuf;
1286 server->smallbuf = NULL;
1287 return length;
1288 }
1289