xref: /linux/fs/smb/client/transport.c (revision 42eb01783091e49020221a8a7d6c00e154ae7e58)
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   Copyright (C) International Business Machines  Corp., 2002,2008
5  *   Author(s): Steve French (sfrench@us.ibm.com)
6  *   Jeremy Allison (jra@samba.org) 2006.
7  *
8  */
9 
10 #include <linux/fs.h>
11 #include <linux/list.h>
12 #include <linux/gfp.h>
13 #include <linux/wait.h>
14 #include <linux/net.h>
15 #include <linux/delay.h>
16 #include <linux/freezer.h>
17 #include <linux/tcp.h>
18 #include <linux/bvec.h>
19 #include <linux/highmem.h>
20 #include <linux/uaccess.h>
21 #include <linux/processor.h>
22 #include <linux/mempool.h>
23 #include <linux/sched/signal.h>
24 #include <linux/task_io_accounting_ops.h>
25 #include <linux/task_work.h>
26 #include "cifsglob.h"
27 #include "cifsproto.h"
28 #include "cifs_debug.h"
29 #include "smb2proto.h"
30 #include "smbdirect.h"
31 #include "compress.h"
32 
33 void
cifs_wake_up_task(struct TCP_Server_Info * server,struct mid_q_entry * mid)34 cifs_wake_up_task(struct TCP_Server_Info *server, struct mid_q_entry *mid)
35 {
36 	if (mid->mid_state == MID_RESPONSE_RECEIVED)
37 		mid->mid_state = MID_RESPONSE_READY;
38 	wake_up_process(mid->callback_data);
39 }
40 
__release_mid(struct TCP_Server_Info * server,struct mid_q_entry * midEntry)41 void __release_mid(struct TCP_Server_Info *server, struct mid_q_entry *midEntry)
42 {
43 #ifdef CONFIG_CIFS_STATS2
44 	__le16 command = server->vals->lock_cmd;
45 	__u16 smb_cmd = le16_to_cpu(midEntry->command);
46 	unsigned long now;
47 	unsigned long roundtrip_time;
48 #endif
49 
50 	if (midEntry->resp_buf && (midEntry->wait_cancelled) &&
51 	    (midEntry->mid_state == MID_RESPONSE_RECEIVED ||
52 	     midEntry->mid_state == MID_RESPONSE_READY) &&
53 	    server->ops->handle_cancelled_mid)
54 		server->ops->handle_cancelled_mid(midEntry, server);
55 
56 	midEntry->mid_state = MID_FREE;
57 	atomic_dec(&mid_count);
58 	if (midEntry->large_buf)
59 		cifs_buf_release(midEntry->resp_buf);
60 	else
61 		cifs_small_buf_release(midEntry->resp_buf);
62 #ifdef CONFIG_CIFS_STATS2
63 	now = jiffies;
64 	if (now < midEntry->when_alloc)
65 		cifs_server_dbg(VFS, "Invalid mid allocation time\n");
66 	roundtrip_time = now - midEntry->when_alloc;
67 
68 	if (smb_cmd < NUMBER_OF_SMB2_COMMANDS) {
69 		if (atomic_read(&server->num_cmds[smb_cmd]) == 0) {
70 			server->slowest_cmd[smb_cmd] = roundtrip_time;
71 			server->fastest_cmd[smb_cmd] = roundtrip_time;
72 		} else {
73 			if (server->slowest_cmd[smb_cmd] < roundtrip_time)
74 				server->slowest_cmd[smb_cmd] = roundtrip_time;
75 			else if (server->fastest_cmd[smb_cmd] > roundtrip_time)
76 				server->fastest_cmd[smb_cmd] = roundtrip_time;
77 		}
78 		cifs_stats_inc(&server->num_cmds[smb_cmd]);
79 		server->time_per_cmd[smb_cmd] += roundtrip_time;
80 	}
81 	/*
82 	 * commands taking longer than one second (default) can be indications
83 	 * that something is wrong, unless it is quite a slow link or a very
84 	 * busy server. Note that this calc is unlikely or impossible to wrap
85 	 * as long as slow_rsp_threshold is not set way above recommended max
86 	 * value (32767 ie 9 hours) and is generally harmless even if wrong
87 	 * since only affects debug counters - so leaving the calc as simple
88 	 * comparison rather than doing multiple conversions and overflow
89 	 * checks
90 	 */
91 	if ((slow_rsp_threshold != 0) &&
92 	    time_after(now, midEntry->when_alloc + (slow_rsp_threshold * HZ)) &&
93 	    (midEntry->command != command)) {
94 		/*
95 		 * smb2slowcmd[NUMBER_OF_SMB2_COMMANDS] counts by command
96 		 * NB: le16_to_cpu returns unsigned so can not be negative below
97 		 */
98 		if (smb_cmd < NUMBER_OF_SMB2_COMMANDS)
99 			cifs_stats_inc(&server->smb2slowcmd[smb_cmd]);
100 
101 		trace_smb3_slow_rsp(smb_cmd, midEntry->mid, midEntry->pid,
102 			       midEntry->when_sent, midEntry->when_received);
103 		if (cifsFYI & CIFS_TIMER) {
104 			pr_debug("slow rsp: cmd %d mid %llu",
105 				 midEntry->command, midEntry->mid);
106 			cifs_info("A: 0x%lx S: 0x%lx R: 0x%lx\n",
107 				  now - midEntry->when_alloc,
108 				  now - midEntry->when_sent,
109 				  now - midEntry->when_received);
110 		}
111 	}
112 #endif
113 	put_task_struct(midEntry->creator);
114 
115 	mempool_free(midEntry, &cifs_mid_pool);
116 }
117 
118 void
delete_mid(struct TCP_Server_Info * server,struct mid_q_entry * mid)119 delete_mid(struct TCP_Server_Info *server, struct mid_q_entry *mid)
120 {
121 	spin_lock(&server->mid_queue_lock);
122 
123 	if (!mid->deleted_from_q) {
124 		list_del_init(&mid->qhead);
125 		mid->deleted_from_q = true;
126 	}
127 	spin_unlock(&server->mid_queue_lock);
128 
129 	release_mid(server, mid);
130 }
131 
132 /*
133  * smb_send_kvec - send an array of kvecs to the server
134  * @server:	Server to send the data to
135  * @smb_msg:	Message to send
136  * @sent:	amount of data sent on socket is stored here
137  *
138  * Our basic "send data to server" function. Should be called with srv_mutex
139  * held. The caller is responsible for handling the results.
140  */
141 int
smb_send_kvec(struct TCP_Server_Info * server,struct msghdr * smb_msg,size_t * sent)142 smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg,
143 	      size_t *sent)
144 {
145 	int rc = 0;
146 	int retries = 0;
147 	struct socket *ssocket = server->ssocket;
148 
149 	*sent = 0;
150 
151 	if (server->noblocksnd)
152 		smb_msg->msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
153 	else
154 		smb_msg->msg_flags = MSG_NOSIGNAL;
155 
156 	while (msg_data_left(smb_msg)) {
157 		/*
158 		 * If blocking send, we try 3 times, since each can block
159 		 * for 5 seconds. For nonblocking  we have to try more
160 		 * but wait increasing amounts of time allowing time for
161 		 * socket to clear.  The overall time we wait in either
162 		 * case to send on the socket is about 15 seconds.
163 		 * Similarly we wait for 15 seconds for a response from
164 		 * the server in SendReceive[2] for the server to send
165 		 * a response back for most types of requests (except
166 		 * SMB Write past end of file which can be slow, and
167 		 * blocking lock operations). NFS waits slightly longer
168 		 * than CIFS, but this can make it take longer for
169 		 * nonresponsive servers to be detected and 15 seconds
170 		 * is more than enough time for modern networks to
171 		 * send a packet.  In most cases if we fail to send
172 		 * after the retries we will kill the socket and
173 		 * reconnect which may clear the network problem.
174 		 *
175 		 * Even if regular signals are masked, EINTR might be
176 		 * propagated from sk_stream_wait_memory() to here when
177 		 * TIF_NOTIFY_SIGNAL is used for task work. For example,
178 		 * certain io_uring completions will use that. Treat
179 		 * having EINTR with pending task work the same as EAGAIN
180 		 * to avoid unnecessary reconnects.
181 		 */
182 		rc = sock_sendmsg(ssocket, smb_msg);
183 		if (rc == -EAGAIN || unlikely(rc == -EINTR && task_work_pending(current))) {
184 			retries++;
185 			if (retries >= 14 ||
186 			    (!server->noblocksnd && (retries > 2))) {
187 				cifs_server_dbg(VFS, "sends on sock %p stuck for 15 seconds\n",
188 					 ssocket);
189 				return -EAGAIN;
190 			}
191 			msleep(1 << retries);
192 			continue;
193 		}
194 
195 		if (rc < 0)
196 			return rc;
197 
198 		if (rc == 0) {
199 			/* should never happen, letting socket clear before
200 			   retrying is our only obvious option here */
201 			cifs_server_dbg(VFS, "tcp sent no data\n");
202 			msleep(500);
203 			continue;
204 		}
205 
206 		/* send was at least partially successful */
207 		*sent += rc;
208 		retries = 0; /* in case we get ENOSPC on the next send */
209 	}
210 	return 0;
211 }
212 
213 unsigned long
smb_rqst_len(struct TCP_Server_Info * server,struct smb_rqst * rqst)214 smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst)
215 {
216 	unsigned int i;
217 	struct kvec *iov;
218 	int nvec;
219 	unsigned long buflen = 0;
220 
221 	if (!is_smb1(server) && rqst->rq_nvec >= 2 &&
222 	    rqst->rq_iov[0].iov_len == 4) {
223 		iov = &rqst->rq_iov[1];
224 		nvec = rqst->rq_nvec - 1;
225 	} else {
226 		iov = rqst->rq_iov;
227 		nvec = rqst->rq_nvec;
228 	}
229 
230 	/* total up iov array first */
231 	for (i = 0; i < nvec; i++)
232 		buflen += iov[i].iov_len;
233 
234 	buflen += iov_iter_count(&rqst->rq_iter);
235 	return buflen;
236 }
237 
__smb_send_rqst(struct TCP_Server_Info * server,int num_rqst,struct smb_rqst * rqst)238 int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
239 		    struct smb_rqst *rqst)
240 {
241 	int rc;
242 	struct kvec *iov;
243 	int n_vec;
244 	unsigned int send_length = 0;
245 	unsigned int i, j;
246 	sigset_t mask, oldmask;
247 	size_t total_len = 0, sent, size;
248 	struct socket *ssocket = server->ssocket;
249 	struct msghdr smb_msg = {};
250 	__be32 rfc1002_marker;
251 
252 	cifs_in_send_inc(server);
253 	if (cifs_rdma_enabled(server)) {
254 		/* return -EAGAIN when connecting or reconnecting */
255 		rc = -EAGAIN;
256 		if (server->smbd_conn)
257 			rc = smbd_send(server, num_rqst, rqst);
258 		goto smbd_done;
259 	}
260 
261 	rc = -EAGAIN;
262 	if (ssocket == NULL)
263 		goto out;
264 
265 	rc = -ERESTARTSYS;
266 	if (fatal_signal_pending(current)) {
267 		cifs_dbg(FYI, "signal pending before send request\n");
268 		goto out;
269 	}
270 
271 	rc = 0;
272 	/* cork the socket */
273 	tcp_sock_set_cork(ssocket->sk, true);
274 
275 	for (j = 0; j < num_rqst; j++)
276 		send_length += smb_rqst_len(server, &rqst[j]);
277 	rfc1002_marker = cpu_to_be32(send_length);
278 
279 	/*
280 	 * We should not allow signals to interrupt the network send because
281 	 * any partial send will cause session reconnects thus increasing
282 	 * latency of system calls and overload a server with unnecessary
283 	 * requests.
284 	 */
285 
286 	sigfillset(&mask);
287 	sigprocmask(SIG_BLOCK, &mask, &oldmask);
288 
289 	/* Generate a rfc1002 marker */
290 	{
291 		struct kvec hiov = {
292 			.iov_base = &rfc1002_marker,
293 			.iov_len  = 4
294 		};
295 		iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, &hiov, 1, 4);
296 		rc = smb_send_kvec(server, &smb_msg, &sent);
297 		if (rc < 0)
298 			goto unmask;
299 
300 		total_len += sent;
301 		send_length += 4;
302 	}
303 
304 	cifs_dbg(FYI, "Sending smb: smb_len=%u\n", send_length);
305 
306 	for (j = 0; j < num_rqst; j++) {
307 		iov = rqst[j].rq_iov;
308 		n_vec = rqst[j].rq_nvec;
309 
310 		size = 0;
311 		for (i = 0; i < n_vec; i++) {
312 			dump_smb(iov[i].iov_base, iov[i].iov_len);
313 			size += iov[i].iov_len;
314 		}
315 
316 		iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, iov, n_vec, size);
317 
318 		rc = smb_send_kvec(server, &smb_msg, &sent);
319 		if (rc < 0)
320 			goto unmask;
321 
322 		total_len += sent;
323 
324 		if (iov_iter_count(&rqst[j].rq_iter) > 0) {
325 			smb_msg.msg_iter = rqst[j].rq_iter;
326 			rc = smb_send_kvec(server, &smb_msg, &sent);
327 			if (rc < 0)
328 				break;
329 			total_len += sent;
330 		}
331 	}
332 
333 unmask:
334 	sigprocmask(SIG_SETMASK, &oldmask, NULL);
335 
336 	/*
337 	 * If signal is pending but we have already sent the whole packet to
338 	 * the server we need to return success status to allow a corresponding
339 	 * mid entry to be kept in the pending requests queue thus allowing
340 	 * to handle responses from the server by the client.
341 	 *
342 	 * If only part of the packet has been sent there is no need to hide
343 	 * interrupt because the session will be reconnected anyway, so there
344 	 * won't be any response from the server to handle.
345 	 */
346 
347 	if (signal_pending(current) && (total_len != send_length)) {
348 		cifs_dbg(FYI, "signal is pending after attempt to send\n");
349 		rc = -ERESTARTSYS;
350 	}
351 
352 	/* uncork it */
353 	tcp_sock_set_cork(ssocket->sk, false);
354 
355 	if ((total_len > 0) && (total_len != send_length)) {
356 		cifs_dbg(FYI, "partial send (wanted=%u sent=%zu): terminating session\n",
357 			 send_length, total_len);
358 		/*
359 		 * If we have only sent part of an SMB then the next SMB could
360 		 * be taken as the remainder of this one. We need to kill the
361 		 * socket so the server throws away the partial SMB
362 		 */
363 		cifs_signal_cifsd_for_reconnect(server, false);
364 		trace_smb3_partial_send_reconnect(server->current_mid,
365 						  server->conn_id, server->hostname);
366 	}
367 smbd_done:
368 	/*
369 	 * there's hardly any use for the layers above to know the
370 	 * actual error code here. All they should do at this point is
371 	 * to retry the connection and hope it goes away.
372 	 */
373 	if (rc < 0 && rc != -EINTR && rc != -EAGAIN) {
374 		cifs_server_dbg(VFS, "Error %d sending data on socket to server\n",
375 			 rc);
376 		rc = -ECONNABORTED;
377 		cifs_signal_cifsd_for_reconnect(server, false);
378 	} else if (rc > 0)
379 		rc = 0;
380 out:
381 	cifs_in_send_dec(server);
382 	return rc;
383 }
384 
385 static int
smb_send_rqst(struct TCP_Server_Info * server,int num_rqst,struct smb_rqst * rqst,int flags)386 smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
387 	      struct smb_rqst *rqst, int flags)
388 {
389 	struct smb2_transform_hdr tr_hdr;
390 	struct smb_rqst new_rqst[MAX_COMPOUND] = {};
391 	struct kvec iov = {
392 		.iov_base = &tr_hdr,
393 		.iov_len = sizeof(tr_hdr),
394 	};
395 	int rc;
396 
397 	if (flags & CIFS_COMPRESS_REQ)
398 		return smb_compress(server, &rqst[0], __smb_send_rqst);
399 
400 	if (!(flags & CIFS_TRANSFORM_REQ))
401 		return __smb_send_rqst(server, num_rqst, rqst);
402 
403 	if (WARN_ON_ONCE(num_rqst > MAX_COMPOUND - 1))
404 		return smb_EIO1(smb_eio_trace_tx_max_compound, num_rqst);
405 
406 	if (!server->ops->init_transform_rq) {
407 		cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n");
408 		return smb_EIO(smb_eio_trace_tx_need_transform);
409 	}
410 
411 	new_rqst[0].rq_iov = &iov;
412 	new_rqst[0].rq_nvec = 1;
413 
414 	rc = server->ops->init_transform_rq(server, num_rqst + 1,
415 					    new_rqst, rqst);
416 	if (!rc) {
417 		rc = __smb_send_rqst(server, num_rqst + 1, new_rqst);
418 		smb3_free_compound_rqst(num_rqst, &new_rqst[1]);
419 	}
420 	return rc;
421 }
422 
423 static int
wait_for_free_credits(struct TCP_Server_Info * server,const int num_credits,const int timeout,const int flags,unsigned int * instance)424 wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
425 		      const int timeout, const int flags,
426 		      unsigned int *instance)
427 {
428 	long rc;
429 	int *credits;
430 	int optype;
431 	long int t;
432 	int scredits, in_flight;
433 
434 	if (timeout < 0)
435 		t = MAX_JIFFY_OFFSET;
436 	else
437 		t = msecs_to_jiffies(timeout);
438 
439 	optype = flags & CIFS_OP_MASK;
440 
441 	*instance = 0;
442 
443 	credits = server->ops->get_credits_field(server, optype);
444 	/* Since an echo is already inflight, no need to wait to send another */
445 	if (*credits <= 0 && optype == CIFS_ECHO_OP)
446 		return -EAGAIN;
447 
448 	spin_lock(&server->req_lock);
449 	if ((flags & CIFS_TIMEOUT_MASK) == CIFS_NON_BLOCKING) {
450 		/* oplock breaks must not be held up */
451 		server->in_flight++;
452 		if (server->in_flight > server->max_in_flight)
453 			server->max_in_flight = server->in_flight;
454 		*credits -= 1;
455 		*instance = server->reconnect_instance;
456 		scredits = *credits;
457 		in_flight = server->in_flight;
458 		spin_unlock(&server->req_lock);
459 
460 		trace_smb3_nblk_credits(server->current_mid,
461 				server->conn_id, server->hostname, scredits, -1, in_flight);
462 		cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
463 				__func__, 1, scredits);
464 
465 		return 0;
466 	}
467 
468 	while (1) {
469 		spin_unlock(&server->req_lock);
470 
471 		spin_lock(&server->srv_lock);
472 		if (server->tcpStatus == CifsExiting) {
473 			spin_unlock(&server->srv_lock);
474 			return -ENOENT;
475 		}
476 		spin_unlock(&server->srv_lock);
477 
478 		spin_lock(&server->req_lock);
479 		if (*credits < num_credits) {
480 			scredits = *credits;
481 			spin_unlock(&server->req_lock);
482 
483 			cifs_num_waiters_inc(server);
484 			rc = wait_event_killable_timeout(server->request_q,
485 				has_credits(server, credits, num_credits), t);
486 			cifs_num_waiters_dec(server);
487 			if (!rc) {
488 				spin_lock(&server->req_lock);
489 				scredits = *credits;
490 				in_flight = server->in_flight;
491 				spin_unlock(&server->req_lock);
492 
493 				trace_smb3_credit_timeout(server->current_mid,
494 						server->conn_id, server->hostname, scredits,
495 						num_credits, in_flight);
496 				cifs_server_dbg(VFS, "wait timed out after %d ms\n",
497 						timeout);
498 				return -EBUSY;
499 			}
500 			if (rc == -ERESTARTSYS)
501 				return -ERESTARTSYS;
502 			spin_lock(&server->req_lock);
503 		} else {
504 			/*
505 			 * For normal commands, reserve the last MAX_COMPOUND
506 			 * credits to compound requests.
507 			 * Otherwise these compounds could be permanently
508 			 * starved for credits by single-credit requests.
509 			 *
510 			 * To prevent spinning CPU, block this thread until
511 			 * there are >MAX_COMPOUND credits available.
512 			 * But only do this is we already have a lot of
513 			 * credits in flight to avoid triggering this check
514 			 * for servers that are slow to hand out credits on
515 			 * new sessions.
516 			 */
517 			if (!optype && num_credits == 1 &&
518 			    server->in_flight > 2 * MAX_COMPOUND &&
519 			    *credits <= MAX_COMPOUND) {
520 				spin_unlock(&server->req_lock);
521 
522 				cifs_num_waiters_inc(server);
523 				rc = wait_event_killable_timeout(
524 					server->request_q,
525 					has_credits(server, credits,
526 						    MAX_COMPOUND + 1),
527 					t);
528 				cifs_num_waiters_dec(server);
529 				if (!rc) {
530 					spin_lock(&server->req_lock);
531 					scredits = *credits;
532 					in_flight = server->in_flight;
533 					spin_unlock(&server->req_lock);
534 
535 					trace_smb3_credit_timeout(
536 							server->current_mid,
537 							server->conn_id, server->hostname,
538 							scredits, num_credits, in_flight);
539 					cifs_server_dbg(VFS, "wait timed out after %d ms\n",
540 							timeout);
541 					return -EBUSY;
542 				}
543 				if (rc == -ERESTARTSYS)
544 					return -ERESTARTSYS;
545 				spin_lock(&server->req_lock);
546 				continue;
547 			}
548 
549 			/*
550 			 * Can not count locking commands against total
551 			 * as they are allowed to block on server.
552 			 */
553 
554 			/* update # of requests on the wire to server */
555 			if ((flags & CIFS_TIMEOUT_MASK) != CIFS_BLOCKING_OP) {
556 				*credits -= num_credits;
557 				server->in_flight += num_credits;
558 				if (server->in_flight > server->max_in_flight)
559 					server->max_in_flight = server->in_flight;
560 				*instance = server->reconnect_instance;
561 			}
562 			scredits = *credits;
563 			in_flight = server->in_flight;
564 			spin_unlock(&server->req_lock);
565 
566 			trace_smb3_waitff_credits(server->current_mid,
567 					server->conn_id, server->hostname, scredits,
568 					-(num_credits), in_flight);
569 			cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
570 					__func__, num_credits, scredits);
571 			break;
572 		}
573 	}
574 	return 0;
575 }
576 
wait_for_free_request(struct TCP_Server_Info * server,const int flags,unsigned int * instance)577 int wait_for_free_request(struct TCP_Server_Info *server, const int flags,
578 			  unsigned int *instance)
579 {
580 	return wait_for_free_credits(server, 1, -1, flags,
581 				     instance);
582 }
583 
584 static int
wait_for_compound_request(struct TCP_Server_Info * server,int num,const int flags,unsigned int * instance)585 wait_for_compound_request(struct TCP_Server_Info *server, int num,
586 			  const int flags, unsigned int *instance)
587 {
588 	int *credits;
589 	int scredits, in_flight;
590 
591 	credits = server->ops->get_credits_field(server, flags & CIFS_OP_MASK);
592 
593 	spin_lock(&server->req_lock);
594 	scredits = *credits;
595 	in_flight = server->in_flight;
596 
597 	if (*credits < num) {
598 		/*
599 		 * If the server is tight on resources or just gives us less
600 		 * credits for other reasons (e.g. requests are coming out of
601 		 * order and the server delays granting more credits until it
602 		 * processes a missing mid) and we exhausted most available
603 		 * credits there may be situations when we try to send
604 		 * a compound request but we don't have enough credits. At this
605 		 * point the client needs to decide if it should wait for
606 		 * additional credits or fail the request. If at least one
607 		 * request is in flight there is a high probability that the
608 		 * server will return enough credits to satisfy this compound
609 		 * request.
610 		 *
611 		 * Return immediately if no requests in flight since we will be
612 		 * stuck on waiting for credits.
613 		 */
614 		if (server->in_flight == 0) {
615 			spin_unlock(&server->req_lock);
616 			trace_smb3_insufficient_credits(server->current_mid,
617 					server->conn_id, server->hostname, scredits,
618 					num, in_flight);
619 			cifs_dbg(FYI, "%s: %d requests in flight, needed %d total=%d\n",
620 					__func__, in_flight, num, scredits);
621 			return -EDEADLK;
622 		}
623 	}
624 	spin_unlock(&server->req_lock);
625 
626 	return wait_for_free_credits(server, num, 60000, flags,
627 				     instance);
628 }
629 
630 int
cifs_wait_mtu_credits(struct TCP_Server_Info * server,size_t size,size_t * num,struct cifs_credits * credits)631 cifs_wait_mtu_credits(struct TCP_Server_Info *server, size_t size,
632 		      size_t *num, struct cifs_credits *credits)
633 {
634 	*num = size;
635 	credits->value = 0;
636 	credits->instance = server->reconnect_instance;
637 	return 0;
638 }
639 
wait_for_response(struct TCP_Server_Info * server,struct mid_q_entry * mid)640 int wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *mid)
641 {
642 	unsigned int sleep_state = TASK_KILLABLE;
643 	int error;
644 
645 	if (mid->sr_flags & CIFS_INTERRUPTIBLE_WAIT)
646 		sleep_state = TASK_INTERRUPTIBLE;
647 
648 	error = wait_event_state(server->response_q,
649 				 mid->mid_state != MID_REQUEST_SUBMITTED &&
650 				 mid->mid_state != MID_RESPONSE_RECEIVED,
651 				 (sleep_state | TASK_FREEZABLE_UNSAFE));
652 	if (error < 0)
653 		return -ERESTARTSYS;
654 
655 	return 0;
656 }
657 
658 /*
659  * Send a SMB request and set the callback function in the mid to handle
660  * the result. Caller is responsible for dealing with timeouts.
661  */
662 int
cifs_call_async(struct TCP_Server_Info * server,struct smb_rqst * rqst,mid_receive_t receive,mid_callback_t callback,mid_handle_t handle,void * cbdata,const int flags,const struct cifs_credits * exist_credits)663 cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
664 		mid_receive_t receive, mid_callback_t callback,
665 		mid_handle_t handle, void *cbdata, const int flags,
666 		const struct cifs_credits *exist_credits)
667 {
668 	int rc;
669 	struct mid_q_entry *mid;
670 	struct cifs_credits credits = { .value = 0, .instance = 0 };
671 	unsigned int instance;
672 	int optype;
673 
674 	optype = flags & CIFS_OP_MASK;
675 
676 	if ((flags & CIFS_HAS_CREDITS) == 0) {
677 		rc = wait_for_free_request(server, flags, &instance);
678 		if (rc)
679 			return rc;
680 		credits.value = 1;
681 		credits.instance = instance;
682 	} else
683 		instance = exist_credits->instance;
684 
685 	cifs_server_lock(server);
686 
687 	/*
688 	 * We can't use credits obtained from the previous session to send this
689 	 * request. Check if there were reconnects after we obtained credits and
690 	 * return -EAGAIN in such cases to let callers handle it.
691 	 */
692 	if (instance != server->reconnect_instance) {
693 		cifs_server_unlock(server);
694 		add_credits_and_wake_if(server, &credits, optype);
695 		return -EAGAIN;
696 	}
697 
698 	mid = server->ops->setup_async_request(server, rqst);
699 	if (IS_ERR(mid)) {
700 		cifs_server_unlock(server);
701 		add_credits_and_wake_if(server, &credits, optype);
702 		return PTR_ERR(mid);
703 	}
704 
705 	mid->sr_flags = flags;
706 	mid->receive = receive;
707 	mid->callback = callback;
708 	mid->callback_data = cbdata;
709 	mid->handle = handle;
710 	mid->mid_state = MID_REQUEST_SUBMITTED;
711 
712 	/* put it on the pending_mid_q */
713 	spin_lock(&server->mid_queue_lock);
714 	list_add_tail(&mid->qhead, &server->pending_mid_q);
715 	spin_unlock(&server->mid_queue_lock);
716 
717 	/*
718 	 * Need to store the time in mid before calling I/O. For call_async,
719 	 * I/O response may come back and free the mid entry on another thread.
720 	 */
721 	cifs_save_when_sent(mid);
722 	rc = smb_send_rqst(server, 1, rqst, flags);
723 
724 	if (rc < 0) {
725 		revert_current_mid(server, mid->credits);
726 		server->sequence_number -= 2;
727 		delete_mid(server, mid);
728 	}
729 
730 	cifs_server_unlock(server);
731 
732 	if (rc == 0)
733 		return 0;
734 
735 	add_credits_and_wake_if(server, &credits, optype);
736 	return rc;
737 }
738 
cifs_sync_mid_result(struct mid_q_entry * mid,struct TCP_Server_Info * server)739 int cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
740 {
741 	int rc = 0;
742 
743 	cifs_dbg(FYI, "%s: cmd=%d mid=%llu state=%d\n",
744 		 __func__, le16_to_cpu(mid->command), mid->mid, mid->mid_state);
745 
746 	spin_lock(&server->mid_queue_lock);
747 	switch (mid->mid_state) {
748 	case MID_RESPONSE_READY:
749 		spin_unlock(&server->mid_queue_lock);
750 		return rc;
751 	case MID_RETRY_NEEDED:
752 		rc = -EAGAIN;
753 		break;
754 	case MID_RESPONSE_MALFORMED:
755 		rc = smb_EIO(smb_eio_trace_rx_sync_mid_malformed);
756 		break;
757 	case MID_SHUTDOWN:
758 		rc = -EHOSTDOWN;
759 		break;
760 	case MID_RC:
761 		rc = mid->mid_rc;
762 		break;
763 	default:
764 		if (mid->deleted_from_q == false) {
765 			list_del_init(&mid->qhead);
766 			mid->deleted_from_q = true;
767 		}
768 		spin_unlock(&server->mid_queue_lock);
769 		cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n",
770 			 __func__, mid->mid, mid->mid_state);
771 		rc = smb_EIO1(smb_eio_trace_rx_sync_mid_invalid, mid->mid_state);
772 		goto sync_mid_done;
773 	}
774 	spin_unlock(&server->mid_queue_lock);
775 
776 sync_mid_done:
777 	release_mid(server, mid);
778 	return rc;
779 }
780 
781 static void
cifs_compound_callback(struct TCP_Server_Info * server,struct mid_q_entry * mid)782 cifs_compound_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
783 {
784 	struct cifs_credits credits = {
785 		.value = server->ops->get_credits(mid),
786 		.instance = server->reconnect_instance,
787 	};
788 
789 	add_credits(server, &credits, mid->optype);
790 
791 	if (mid->mid_state == MID_RESPONSE_RECEIVED)
792 		mid->mid_state = MID_RESPONSE_READY;
793 }
794 
795 static void
cifs_compound_last_callback(struct TCP_Server_Info * server,struct mid_q_entry * mid)796 cifs_compound_last_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
797 {
798 	cifs_compound_callback(server, mid);
799 	cifs_wake_up_task(server, mid);
800 }
801 
802 static void
cifs_cancelled_callback(struct TCP_Server_Info * server,struct mid_q_entry * mid)803 cifs_cancelled_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
804 {
805 	cifs_compound_callback(server, mid);
806 	release_mid(server, mid);
807 }
808 
809 /*
810  * cifs_pick_channel - pick an eligible channel for network operations
811  *
812  * @ses: session reference
813  *
814  * Select an eligible channel (not terminating and not marked as needing
815  * reconnect), preferring the least loaded one. If no eligible channel is
816  * found, fall back to the primary channel (index 0).
817  *
818  * Return: TCP_Server_Info pointer for the chosen channel, or NULL if @ses is
819  * NULL.
820  */
cifs_pick_channel(struct cifs_ses * ses)821 struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
822 {
823 	uint index = 0;
824 	unsigned int min_in_flight = UINT_MAX;
825 	struct TCP_Server_Info *server = NULL;
826 	int i, start, cur;
827 
828 	if (!ses)
829 		return NULL;
830 
831 	spin_lock(&ses->chan_lock);
832 	start = atomic_inc_return(&ses->chan_seq);
833 	for (i = 0; i < ses->chan_count; i++) {
834 		cur = (start + i) % ses->chan_count;
835 		server = ses->chans[cur].server;
836 		if (!server || server->terminate)
837 			continue;
838 
839 		if (CIFS_CHAN_NEEDS_RECONNECT(ses, cur))
840 			continue;
841 
842 		/*
843 		 * strictly speaking, we should pick up req_lock to read
844 		 * server->in_flight. But it shouldn't matter much here if we
845 		 * race while reading this data. The worst that can happen is
846 		 * that we could use a channel that's not least loaded. Avoiding
847 		 * taking the lock could help reduce wait time, which is
848 		 * important for this function
849 		 */
850 		if (server->in_flight < min_in_flight) {
851 			min_in_flight = server->in_flight;
852 			index = cur;
853 		}
854 	}
855 
856 	server = ses->chans[index].server;
857 	spin_unlock(&ses->chan_lock);
858 
859 	return server;
860 }
861 
862 int
compound_send_recv(const unsigned int xid,struct cifs_ses * ses,struct TCP_Server_Info * server,const int flags,const int num_rqst,struct smb_rqst * rqst,int * resp_buf_type,struct kvec * resp_iov)863 compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
864 		   struct TCP_Server_Info *server,
865 		   const int flags, const int num_rqst, struct smb_rqst *rqst,
866 		   int *resp_buf_type, struct kvec *resp_iov)
867 {
868 	int i, j, optype, rc = 0;
869 	struct mid_q_entry *mid[MAX_COMPOUND];
870 	bool cancelled_mid[MAX_COMPOUND] = {false};
871 	struct cifs_credits credits[MAX_COMPOUND] = {
872 		{ .value = 0, .instance = 0 }
873 	};
874 	unsigned int instance;
875 	char *buf;
876 
877 	optype = flags & CIFS_OP_MASK;
878 
879 	for (i = 0; i < num_rqst; i++)
880 		resp_buf_type[i] = CIFS_NO_BUFFER;  /* no response buf yet */
881 
882 	if (!ses || !ses->server || !server) {
883 		cifs_dbg(VFS, "Null session\n");
884 		return smb_EIO(smb_eio_trace_null_pointers);
885 	}
886 
887 	spin_lock(&server->srv_lock);
888 	if (server->tcpStatus == CifsExiting) {
889 		spin_unlock(&server->srv_lock);
890 		return -ENOENT;
891 	}
892 	spin_unlock(&server->srv_lock);
893 
894 	/*
895 	 * Wait for all the requests to become available.
896 	 * This approach still leaves the possibility to be stuck waiting for
897 	 * credits if the server doesn't grant credits to the outstanding
898 	 * requests and if the client is completely idle, not generating any
899 	 * other requests.
900 	 * This can be handled by the eventual session reconnect.
901 	 */
902 	rc = wait_for_compound_request(server, num_rqst, flags,
903 				       &instance);
904 	if (rc)
905 		return rc;
906 
907 	for (i = 0; i < num_rqst; i++) {
908 		credits[i].value = 1;
909 		credits[i].instance = instance;
910 	}
911 
912 	/*
913 	 * Make sure that we sign in the same order that we send on this socket
914 	 * and avoid races inside tcp sendmsg code that could cause corruption
915 	 * of smb data.
916 	 */
917 
918 	cifs_server_lock(server);
919 
920 	/*
921 	 * All the parts of the compound chain belong obtained credits from the
922 	 * same session. We can not use credits obtained from the previous
923 	 * session to send this request. Check if there were reconnects after
924 	 * we obtained credits and return -EAGAIN in such cases to let callers
925 	 * handle it.
926 	 */
927 	if (instance != server->reconnect_instance) {
928 		cifs_server_unlock(server);
929 		for (j = 0; j < num_rqst; j++)
930 			add_credits(server, &credits[j], optype);
931 		return -EAGAIN;
932 	}
933 
934 	for (i = 0; i < num_rqst; i++) {
935 		mid[i] = server->ops->setup_request(ses, server, &rqst[i]);
936 		if (IS_ERR(mid[i])) {
937 			revert_current_mid(server, i);
938 			for (j = 0; j < i; j++)
939 				delete_mid(server, mid[j]);
940 			cifs_server_unlock(server);
941 
942 			/* Update # of requests on wire to server */
943 			for (j = 0; j < num_rqst; j++)
944 				add_credits(server, &credits[j], optype);
945 			return PTR_ERR(mid[i]);
946 		}
947 
948 		mid[i]->sr_flags = flags;
949 		mid[i]->mid_state = MID_REQUEST_SUBMITTED;
950 		mid[i]->optype = optype;
951 		/*
952 		 * Invoke callback for every part of the compound chain
953 		 * to calculate credits properly. Wake up this thread only when
954 		 * the last element is received.
955 		 */
956 		if (i < num_rqst - 1)
957 			mid[i]->callback = cifs_compound_callback;
958 		else
959 			mid[i]->callback = cifs_compound_last_callback;
960 	}
961 	rc = smb_send_rqst(server, num_rqst, rqst, flags);
962 
963 	for (i = 0; i < num_rqst; i++)
964 		cifs_save_when_sent(mid[i]);
965 
966 	if (rc < 0) {
967 		revert_current_mid(server, num_rqst);
968 		server->sequence_number -= 2;
969 	}
970 
971 	cifs_server_unlock(server);
972 
973 	/*
974 	 * If sending failed for some reason or it is an oplock break that we
975 	 * will not receive a response to - return credits back
976 	 */
977 	if (rc < 0 || (flags & CIFS_NO_SRV_RSP)) {
978 		for (i = 0; i < num_rqst; i++)
979 			add_credits(server, &credits[i], optype);
980 		goto out;
981 	}
982 
983 	/*
984 	 * At this point the request is passed to the network stack - we assume
985 	 * that any credits taken from the server structure on the client have
986 	 * been spent and we can't return them back. Once we receive responses
987 	 * we will collect credits granted by the server in the mid callbacks
988 	 * and add those credits to the server structure.
989 	 */
990 
991 	/*
992 	 * Compounding is never used during session establish.
993 	 */
994 	spin_lock(&ses->ses_lock);
995 	if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
996 		spin_unlock(&ses->ses_lock);
997 
998 		if (WARN_ON_ONCE(num_rqst != 1 || !resp_iov))
999 			return -EINVAL;
1000 
1001 		cifs_server_lock(server);
1002 		smb311_update_preauth_hash(ses, server, rqst[0].rq_iov, rqst[0].rq_nvec);
1003 		cifs_server_unlock(server);
1004 
1005 		spin_lock(&ses->ses_lock);
1006 	}
1007 	spin_unlock(&ses->ses_lock);
1008 
1009 	for (i = 0; i < num_rqst; i++) {
1010 		rc = wait_for_response(server, mid[i]);
1011 		if (rc != 0)
1012 			break;
1013 	}
1014 	if (rc != 0) {
1015 		for (; i < num_rqst; i++) {
1016 			cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n",
1017 				 mid[i]->mid, le16_to_cpu(mid[i]->command));
1018 			send_cancel(ses, server, &rqst[i], mid[i], xid);
1019 			spin_lock(&mid[i]->mid_lock);
1020 			mid[i]->wait_cancelled = true;
1021 			if (mid[i]->mid_state == MID_REQUEST_SUBMITTED ||
1022 			    mid[i]->mid_state == MID_RESPONSE_RECEIVED) {
1023 				mid[i]->callback = cifs_cancelled_callback;
1024 				cancelled_mid[i] = true;
1025 				credits[i].value = 0;
1026 			}
1027 			spin_unlock(&mid[i]->mid_lock);
1028 		}
1029 	}
1030 
1031 	for (i = 0; i < num_rqst; i++) {
1032 		if (rc < 0)
1033 			goto out;
1034 
1035 		rc = cifs_sync_mid_result(mid[i], server);
1036 		if (rc != 0) {
1037 			/* mark this mid as cancelled to not free it below */
1038 			cancelled_mid[i] = true;
1039 			goto out;
1040 		}
1041 
1042 		if (!mid[i]->resp_buf ||
1043 		    mid[i]->mid_state != MID_RESPONSE_READY) {
1044 			rc = smb_EIO1(smb_eio_trace_rx_mid_unready, mid[i]->mid_state);
1045 			cifs_dbg(FYI, "Bad MID state?\n");
1046 			goto out;
1047 		}
1048 
1049 		rc = server->ops->check_receive(mid[i], server,
1050 						flags & CIFS_LOG_ERROR);
1051 
1052 		if (resp_iov) {
1053 			buf = (char *)mid[i]->resp_buf;
1054 			resp_iov[i].iov_base = buf;
1055 			resp_iov[i].iov_len = mid[i]->resp_buf_size;
1056 
1057 			if (mid[i]->large_buf)
1058 				resp_buf_type[i] = CIFS_LARGE_BUFFER;
1059 			else
1060 				resp_buf_type[i] = CIFS_SMALL_BUFFER;
1061 
1062 			/* mark it so buf will not be freed by delete_mid */
1063 			if ((flags & CIFS_NO_RSP_BUF) == 0)
1064 				mid[i]->resp_buf = NULL;
1065 		}
1066 	}
1067 
1068 	/*
1069 	 * Compounding is never used during session establish.
1070 	 */
1071 	spin_lock(&ses->ses_lock);
1072 	if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
1073 		struct kvec iov = {
1074 			.iov_base = resp_iov[0].iov_base,
1075 			.iov_len = resp_iov[0].iov_len
1076 		};
1077 		spin_unlock(&ses->ses_lock);
1078 		cifs_server_lock(server);
1079 		smb311_update_preauth_hash(ses, server, &iov, 1);
1080 		cifs_server_unlock(server);
1081 		spin_lock(&ses->ses_lock);
1082 	}
1083 	spin_unlock(&ses->ses_lock);
1084 
1085 out:
1086 	/*
1087 	 * This will dequeue all mids. After this it is important that the
1088 	 * demultiplex_thread will not process any of these mids any further.
1089 	 * This is prevented above by using a noop callback that will not
1090 	 * wake this thread except for the very last PDU.
1091 	 */
1092 	for (i = 0; i < num_rqst; i++) {
1093 		if (!cancelled_mid[i])
1094 			delete_mid(server, mid[i]);
1095 	}
1096 
1097 	return rc;
1098 }
1099 
1100 int
cifs_send_recv(const unsigned int xid,struct cifs_ses * ses,struct TCP_Server_Info * server,struct smb_rqst * rqst,int * resp_buf_type,const int flags,struct kvec * resp_iov)1101 cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
1102 	       struct TCP_Server_Info *server,
1103 	       struct smb_rqst *rqst, int *resp_buf_type, const int flags,
1104 	       struct kvec *resp_iov)
1105 {
1106 	return compound_send_recv(xid, ses, server, flags, 1,
1107 				  rqst, resp_buf_type, resp_iov);
1108 }
1109 
1110 
1111 /*
1112  * Discard any remaining data in the current SMB. To do this, we borrow the
1113  * current bigbuf.
1114  */
1115 int
cifs_discard_remaining_data(struct TCP_Server_Info * server)1116 cifs_discard_remaining_data(struct TCP_Server_Info *server)
1117 {
1118 	unsigned int rfclen = server->pdu_size;
1119 	size_t remaining = rfclen - server->total_read;
1120 
1121 	while (remaining > 0) {
1122 		ssize_t length;
1123 
1124 		length = cifs_discard_from_socket(server,
1125 				min_t(size_t, remaining,
1126 				      CIFSMaxBufSize + MAX_HEADER_SIZE(server)));
1127 		if (length < 0)
1128 			return length;
1129 		server->total_read += length;
1130 		remaining -= length;
1131 	}
1132 
1133 	return 0;
1134 }
1135 
1136 static int
__cifs_readv_discard(struct TCP_Server_Info * server,struct mid_q_entry * mid,bool malformed)1137 __cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid,
1138 		     bool malformed)
1139 {
1140 	int length;
1141 
1142 	length = cifs_discard_remaining_data(server);
1143 	dequeue_mid(server, mid, malformed);
1144 	mid->resp_buf = server->smallbuf;
1145 	server->smallbuf = NULL;
1146 	return length;
1147 }
1148 
1149 static int
cifs_readv_discard(struct TCP_Server_Info * server,struct mid_q_entry * mid)1150 cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1151 {
1152 	struct cifs_io_subrequest *rdata = mid->callback_data;
1153 
1154 	return  __cifs_readv_discard(server, mid, rdata->result);
1155 }
1156 
1157 int
cifs_readv_receive(struct TCP_Server_Info * server,struct mid_q_entry * mid)1158 cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1159 {
1160 	int length, len;
1161 	unsigned int data_offset, data_len;
1162 	struct cifs_io_subrequest *rdata = mid->callback_data;
1163 	char *buf = server->smallbuf;
1164 	unsigned int buflen = server->pdu_size;
1165 	bool use_rdma_mr = false;
1166 
1167 	cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%zu\n",
1168 		 __func__, mid->mid, rdata->subreq.start, rdata->subreq.len);
1169 
1170 	/*
1171 	 * read the rest of READ_RSP header (sans Data array), or whatever we
1172 	 * can if there's not enough data. At this point, we've read down to
1173 	 * the Mid.
1174 	 */
1175 	len = min_t(unsigned int, buflen, server->vals->read_rsp_size) -
1176 							HEADER_SIZE(server) + 1;
1177 
1178 	length = cifs_read_from_socket(server,
1179 				       buf + HEADER_SIZE(server) - 1, len);
1180 	if (length < 0)
1181 		return length;
1182 	server->total_read += length;
1183 
1184 	if (server->ops->is_session_expired &&
1185 	    server->ops->is_session_expired(buf)) {
1186 		cifs_reconnect(server, true);
1187 		return -1;
1188 	}
1189 
1190 	if (server->ops->is_status_pending &&
1191 	    server->ops->is_status_pending(buf, server)) {
1192 		cifs_discard_remaining_data(server);
1193 		return -1;
1194 	}
1195 
1196 	/* set up first two iov for signature check and to get credits */
1197 	rdata->iov[0].iov_base = buf;
1198 	rdata->iov[0].iov_len = server->total_read;
1199 	cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
1200 		 rdata->iov[0].iov_base, rdata->iov[0].iov_len);
1201 
1202 	/* Was the SMB read successful? */
1203 	rdata->result = server->ops->map_error(buf, false);
1204 	if (rdata->result != 0) {
1205 		cifs_dbg(FYI, "%s: server returned error %d\n",
1206 			 __func__, rdata->result);
1207 		/* normal error on read response */
1208 		return __cifs_readv_discard(server, mid, false);
1209 	}
1210 
1211 	/* Is there enough to get to the rest of the READ_RSP header? */
1212 	if (server->total_read < server->vals->read_rsp_size) {
1213 		cifs_dbg(FYI, "%s: server returned short header. got=%u expected=%zu\n",
1214 			 __func__, server->total_read,
1215 			 server->vals->read_rsp_size);
1216 		rdata->result = smb_EIO2(smb_eio_trace_read_rsp_short,
1217 					 server->total_read, server->vals->read_rsp_size);
1218 		return cifs_readv_discard(server, mid);
1219 	}
1220 
1221 	data_offset = server->ops->read_data_offset(buf);
1222 	if (data_offset < server->total_read) {
1223 		/*
1224 		 * win2k8 sometimes sends an offset of 0 when the read
1225 		 * is beyond the EOF. Treat it as if the data starts just after
1226 		 * the header.
1227 		 */
1228 		cifs_dbg(FYI, "%s: data offset (%u) inside read response header\n",
1229 			 __func__, data_offset);
1230 		data_offset = server->total_read;
1231 	} else if (data_offset > MAX_CIFS_SMALL_BUFFER_SIZE) {
1232 		/* data_offset is beyond the end of smallbuf */
1233 		cifs_dbg(FYI, "%s: data offset (%u) beyond end of smallbuf\n",
1234 			 __func__, data_offset);
1235 		rdata->result = smb_EIO1(smb_eio_trace_read_overlarge,
1236 					 data_offset);
1237 		return cifs_readv_discard(server, mid);
1238 	}
1239 
1240 	cifs_dbg(FYI, "%s: total_read=%u data_offset=%u\n",
1241 		 __func__, server->total_read, data_offset);
1242 
1243 	len = data_offset - server->total_read;
1244 	if (len > 0) {
1245 		/* read any junk before data into the rest of smallbuf */
1246 		length = cifs_read_from_socket(server,
1247 					       buf + server->total_read, len);
1248 		if (length < 0)
1249 			return length;
1250 		server->total_read += length;
1251 		rdata->iov[0].iov_len = server->total_read;
1252 	}
1253 
1254 	/* how much data is in the response? */
1255 #ifdef CONFIG_CIFS_SMB_DIRECT
1256 	use_rdma_mr = rdata->mr;
1257 #endif
1258 	data_len = server->ops->read_data_length(buf, use_rdma_mr);
1259 	if (!use_rdma_mr && (data_offset + data_len > buflen)) {
1260 		/* data_len is corrupt -- discard frame */
1261 		rdata->result = smb_EIO2(smb_eio_trace_read_rsp_malformed,
1262 					 data_offset + data_len, buflen);
1263 		return cifs_readv_discard(server, mid);
1264 	}
1265 
1266 #ifdef CONFIG_CIFS_SMB_DIRECT
1267 	if (rdata->mr)
1268 		length = data_len; /* An RDMA read is already done. */
1269 	else
1270 #endif
1271 		length = cifs_read_iter_from_socket(server, &rdata->subreq.io_iter,
1272 						    data_len);
1273 	if (length > 0)
1274 		rdata->got_bytes += length;
1275 	server->total_read += length;
1276 
1277 	cifs_dbg(FYI, "total_read=%u buflen=%u remaining=%u\n",
1278 		 server->total_read, buflen, data_len);
1279 
1280 	/* discard anything left over */
1281 	if (server->total_read < buflen)
1282 		return cifs_readv_discard(server, mid);
1283 
1284 	dequeue_mid(server, mid, false);
1285 	mid->resp_buf = server->smallbuf;
1286 	server->smallbuf = NULL;
1287 	return length;
1288 }
1289