xref: /linux/fs/smb/client/transport.c (revision 3d99347a2e1ae60d9368b1d734290bab1acde0ce)
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   Copyright (C) International Business Machines  Corp., 2002,2008
5  *   Author(s): Steve French (sfrench@us.ibm.com)
6  *   Jeremy Allison (jra@samba.org) 2006.
7  *
8  */
9 
10 #include <linux/fs.h>
11 #include <linux/list.h>
12 #include <linux/gfp.h>
13 #include <linux/wait.h>
14 #include <linux/net.h>
15 #include <linux/delay.h>
16 #include <linux/freezer.h>
17 #include <linux/tcp.h>
18 #include <linux/bvec.h>
19 #include <linux/highmem.h>
20 #include <linux/uaccess.h>
21 #include <linux/processor.h>
22 #include <linux/mempool.h>
23 #include <linux/sched/signal.h>
24 #include <linux/task_io_accounting_ops.h>
25 #include <linux/task_work.h>
26 #include "cifspdu.h"
27 #include "cifsglob.h"
28 #include "cifsproto.h"
29 #include "cifs_debug.h"
30 #include "smb2proto.h"
31 #include "smbdirect.h"
32 #include "compress.h"
33 
34 void
cifs_wake_up_task(struct TCP_Server_Info * server,struct mid_q_entry * mid)35 cifs_wake_up_task(struct TCP_Server_Info *server, struct mid_q_entry *mid)
36 {
37 	if (mid->mid_state == MID_RESPONSE_RECEIVED)
38 		mid->mid_state = MID_RESPONSE_READY;
39 	wake_up_process(mid->callback_data);
40 }
41 
__release_mid(struct TCP_Server_Info * server,struct mid_q_entry * midEntry)42 void __release_mid(struct TCP_Server_Info *server, struct mid_q_entry *midEntry)
43 {
44 #ifdef CONFIG_CIFS_STATS2
45 	__le16 command = server->vals->lock_cmd;
46 	__u16 smb_cmd = le16_to_cpu(midEntry->command);
47 	unsigned long now;
48 	unsigned long roundtrip_time;
49 #endif
50 
51 	if (midEntry->resp_buf && (midEntry->wait_cancelled) &&
52 	    (midEntry->mid_state == MID_RESPONSE_RECEIVED ||
53 	     midEntry->mid_state == MID_RESPONSE_READY) &&
54 	    server->ops->handle_cancelled_mid)
55 		server->ops->handle_cancelled_mid(midEntry, server);
56 
57 	midEntry->mid_state = MID_FREE;
58 	atomic_dec(&mid_count);
59 	if (midEntry->large_buf)
60 		cifs_buf_release(midEntry->resp_buf);
61 	else
62 		cifs_small_buf_release(midEntry->resp_buf);
63 #ifdef CONFIG_CIFS_STATS2
64 	now = jiffies;
65 	if (now < midEntry->when_alloc)
66 		cifs_server_dbg(VFS, "Invalid mid allocation time\n");
67 	roundtrip_time = now - midEntry->when_alloc;
68 
69 	if (smb_cmd < NUMBER_OF_SMB2_COMMANDS) {
70 		if (atomic_read(&server->num_cmds[smb_cmd]) == 0) {
71 			server->slowest_cmd[smb_cmd] = roundtrip_time;
72 			server->fastest_cmd[smb_cmd] = roundtrip_time;
73 		} else {
74 			if (server->slowest_cmd[smb_cmd] < roundtrip_time)
75 				server->slowest_cmd[smb_cmd] = roundtrip_time;
76 			else if (server->fastest_cmd[smb_cmd] > roundtrip_time)
77 				server->fastest_cmd[smb_cmd] = roundtrip_time;
78 		}
79 		cifs_stats_inc(&server->num_cmds[smb_cmd]);
80 		server->time_per_cmd[smb_cmd] += roundtrip_time;
81 	}
82 	/*
83 	 * commands taking longer than one second (default) can be indications
84 	 * that something is wrong, unless it is quite a slow link or a very
85 	 * busy server. Note that this calc is unlikely or impossible to wrap
86 	 * as long as slow_rsp_threshold is not set way above recommended max
87 	 * value (32767 ie 9 hours) and is generally harmless even if wrong
88 	 * since only affects debug counters - so leaving the calc as simple
89 	 * comparison rather than doing multiple conversions and overflow
90 	 * checks
91 	 */
92 	if ((slow_rsp_threshold != 0) &&
93 	    time_after(now, midEntry->when_alloc + (slow_rsp_threshold * HZ)) &&
94 	    (midEntry->command != command)) {
95 		/*
96 		 * smb2slowcmd[NUMBER_OF_SMB2_COMMANDS] counts by command
97 		 * NB: le16_to_cpu returns unsigned so can not be negative below
98 		 */
99 		if (smb_cmd < NUMBER_OF_SMB2_COMMANDS)
100 			cifs_stats_inc(&server->smb2slowcmd[smb_cmd]);
101 
102 		trace_smb3_slow_rsp(smb_cmd, midEntry->mid, midEntry->pid,
103 			       midEntry->when_sent, midEntry->when_received);
104 		if (cifsFYI & CIFS_TIMER) {
105 			pr_debug("slow rsp: cmd %d mid %llu",
106 				 midEntry->command, midEntry->mid);
107 			cifs_info("A: 0x%lx S: 0x%lx R: 0x%lx\n",
108 				  now - midEntry->when_alloc,
109 				  now - midEntry->when_sent,
110 				  now - midEntry->when_received);
111 		}
112 	}
113 #endif
114 	put_task_struct(midEntry->creator);
115 
116 	mempool_free(midEntry, &cifs_mid_pool);
117 }
118 
119 void
delete_mid(struct TCP_Server_Info * server,struct mid_q_entry * mid)120 delete_mid(struct TCP_Server_Info *server, struct mid_q_entry *mid)
121 {
122 	spin_lock(&server->mid_queue_lock);
123 
124 	if (!mid->deleted_from_q) {
125 		list_del_init(&mid->qhead);
126 		mid->deleted_from_q = true;
127 	}
128 	spin_unlock(&server->mid_queue_lock);
129 
130 	release_mid(server, mid);
131 }
132 
133 /*
134  * smb_send_kvec - send an array of kvecs to the server
135  * @server:	Server to send the data to
136  * @smb_msg:	Message to send
137  * @sent:	amount of data sent on socket is stored here
138  *
139  * Our basic "send data to server" function. Should be called with srv_mutex
140  * held. The caller is responsible for handling the results.
141  */
142 int
smb_send_kvec(struct TCP_Server_Info * server,struct msghdr * smb_msg,size_t * sent)143 smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg,
144 	      size_t *sent)
145 {
146 	int rc = 0;
147 	int retries = 0;
148 	struct socket *ssocket = server->ssocket;
149 
150 	*sent = 0;
151 
152 	if (server->noblocksnd)
153 		smb_msg->msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
154 	else
155 		smb_msg->msg_flags = MSG_NOSIGNAL;
156 
157 	while (msg_data_left(smb_msg)) {
158 		/*
159 		 * If blocking send, we try 3 times, since each can block
160 		 * for 5 seconds. For nonblocking  we have to try more
161 		 * but wait increasing amounts of time allowing time for
162 		 * socket to clear.  The overall time we wait in either
163 		 * case to send on the socket is about 15 seconds.
164 		 * Similarly we wait for 15 seconds for a response from
165 		 * the server in SendReceive[2] for the server to send
166 		 * a response back for most types of requests (except
167 		 * SMB Write past end of file which can be slow, and
168 		 * blocking lock operations). NFS waits slightly longer
169 		 * than CIFS, but this can make it take longer for
170 		 * nonresponsive servers to be detected and 15 seconds
171 		 * is more than enough time for modern networks to
172 		 * send a packet.  In most cases if we fail to send
173 		 * after the retries we will kill the socket and
174 		 * reconnect which may clear the network problem.
175 		 *
176 		 * Even if regular signals are masked, EINTR might be
177 		 * propagated from sk_stream_wait_memory() to here when
178 		 * TIF_NOTIFY_SIGNAL is used for task work. For example,
179 		 * certain io_uring completions will use that. Treat
180 		 * having EINTR with pending task work the same as EAGAIN
181 		 * to avoid unnecessary reconnects.
182 		 */
183 		rc = sock_sendmsg(ssocket, smb_msg);
184 		if (rc == -EAGAIN || unlikely(rc == -EINTR && task_work_pending(current))) {
185 			retries++;
186 			if (retries >= 14 ||
187 			    (!server->noblocksnd && (retries > 2))) {
188 				cifs_server_dbg(VFS, "sends on sock %p stuck for 15 seconds\n",
189 					 ssocket);
190 				return -EAGAIN;
191 			}
192 			msleep(1 << retries);
193 			continue;
194 		}
195 
196 		if (rc < 0)
197 			return rc;
198 
199 		if (rc == 0) {
200 			/* should never happen, letting socket clear before
201 			   retrying is our only obvious option here */
202 			cifs_server_dbg(VFS, "tcp sent no data\n");
203 			msleep(500);
204 			continue;
205 		}
206 
207 		/* send was at least partially successful */
208 		*sent += rc;
209 		retries = 0; /* in case we get ENOSPC on the next send */
210 	}
211 	return 0;
212 }
213 
214 unsigned long
smb_rqst_len(struct TCP_Server_Info * server,struct smb_rqst * rqst)215 smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst)
216 {
217 	unsigned int i;
218 	struct kvec *iov;
219 	int nvec;
220 	unsigned long buflen = 0;
221 
222 	if (!is_smb1(server) && rqst->rq_nvec >= 2 &&
223 	    rqst->rq_iov[0].iov_len == 4) {
224 		iov = &rqst->rq_iov[1];
225 		nvec = rqst->rq_nvec - 1;
226 	} else {
227 		iov = rqst->rq_iov;
228 		nvec = rqst->rq_nvec;
229 	}
230 
231 	/* total up iov array first */
232 	for (i = 0; i < nvec; i++)
233 		buflen += iov[i].iov_len;
234 
235 	buflen += iov_iter_count(&rqst->rq_iter);
236 	return buflen;
237 }
238 
__smb_send_rqst(struct TCP_Server_Info * server,int num_rqst,struct smb_rqst * rqst)239 int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
240 		    struct smb_rqst *rqst)
241 {
242 	int rc;
243 	struct kvec *iov;
244 	int n_vec;
245 	unsigned int send_length = 0;
246 	unsigned int i, j;
247 	sigset_t mask, oldmask;
248 	size_t total_len = 0, sent, size;
249 	struct socket *ssocket = server->ssocket;
250 	struct msghdr smb_msg = {};
251 	__be32 rfc1002_marker;
252 
253 	cifs_in_send_inc(server);
254 	if (cifs_rdma_enabled(server)) {
255 		/* return -EAGAIN when connecting or reconnecting */
256 		rc = -EAGAIN;
257 		if (server->smbd_conn)
258 			rc = smbd_send(server, num_rqst, rqst);
259 		goto smbd_done;
260 	}
261 
262 	rc = -EAGAIN;
263 	if (ssocket == NULL)
264 		goto out;
265 
266 	rc = -ERESTARTSYS;
267 	if (fatal_signal_pending(current)) {
268 		cifs_dbg(FYI, "signal pending before send request\n");
269 		goto out;
270 	}
271 
272 	rc = 0;
273 	/* cork the socket */
274 	tcp_sock_set_cork(ssocket->sk, true);
275 
276 	for (j = 0; j < num_rqst; j++)
277 		send_length += smb_rqst_len(server, &rqst[j]);
278 	rfc1002_marker = cpu_to_be32(send_length);
279 
280 	/*
281 	 * We should not allow signals to interrupt the network send because
282 	 * any partial send will cause session reconnects thus increasing
283 	 * latency of system calls and overload a server with unnecessary
284 	 * requests.
285 	 */
286 
287 	sigfillset(&mask);
288 	sigprocmask(SIG_BLOCK, &mask, &oldmask);
289 
290 	/* Generate a rfc1002 marker */
291 	{
292 		struct kvec hiov = {
293 			.iov_base = &rfc1002_marker,
294 			.iov_len  = 4
295 		};
296 		iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, &hiov, 1, 4);
297 		rc = smb_send_kvec(server, &smb_msg, &sent);
298 		if (rc < 0)
299 			goto unmask;
300 
301 		total_len += sent;
302 		send_length += 4;
303 	}
304 
305 	cifs_dbg(FYI, "Sending smb: smb_len=%u\n", send_length);
306 
307 	for (j = 0; j < num_rqst; j++) {
308 		iov = rqst[j].rq_iov;
309 		n_vec = rqst[j].rq_nvec;
310 
311 		size = 0;
312 		for (i = 0; i < n_vec; i++) {
313 			dump_smb(iov[i].iov_base, iov[i].iov_len);
314 			size += iov[i].iov_len;
315 		}
316 
317 		iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, iov, n_vec, size);
318 
319 		rc = smb_send_kvec(server, &smb_msg, &sent);
320 		if (rc < 0)
321 			goto unmask;
322 
323 		total_len += sent;
324 
325 		if (iov_iter_count(&rqst[j].rq_iter) > 0) {
326 			smb_msg.msg_iter = rqst[j].rq_iter;
327 			rc = smb_send_kvec(server, &smb_msg, &sent);
328 			if (rc < 0)
329 				break;
330 			total_len += sent;
331 		}
332 	}
333 
334 unmask:
335 	sigprocmask(SIG_SETMASK, &oldmask, NULL);
336 
337 	/*
338 	 * If signal is pending but we have already sent the whole packet to
339 	 * the server we need to return success status to allow a corresponding
340 	 * mid entry to be kept in the pending requests queue thus allowing
341 	 * to handle responses from the server by the client.
342 	 *
343 	 * If only part of the packet has been sent there is no need to hide
344 	 * interrupt because the session will be reconnected anyway, so there
345 	 * won't be any response from the server to handle.
346 	 */
347 
348 	if (signal_pending(current) && (total_len != send_length)) {
349 		cifs_dbg(FYI, "signal is pending after attempt to send\n");
350 		rc = -ERESTARTSYS;
351 	}
352 
353 	/* uncork it */
354 	tcp_sock_set_cork(ssocket->sk, false);
355 
356 	if ((total_len > 0) && (total_len != send_length)) {
357 		cifs_dbg(FYI, "partial send (wanted=%u sent=%zu): terminating session\n",
358 			 send_length, total_len);
359 		/*
360 		 * If we have only sent part of an SMB then the next SMB could
361 		 * be taken as the remainder of this one. We need to kill the
362 		 * socket so the server throws away the partial SMB
363 		 */
364 		cifs_signal_cifsd_for_reconnect(server, false);
365 		trace_smb3_partial_send_reconnect(server->current_mid,
366 						  server->conn_id, server->hostname);
367 	}
368 smbd_done:
369 	/*
370 	 * there's hardly any use for the layers above to know the
371 	 * actual error code here. All they should do at this point is
372 	 * to retry the connection and hope it goes away.
373 	 */
374 	if (rc < 0 && rc != -EINTR && rc != -EAGAIN) {
375 		cifs_server_dbg(VFS, "Error %d sending data on socket to server\n",
376 			 rc);
377 		rc = -ECONNABORTED;
378 		cifs_signal_cifsd_for_reconnect(server, false);
379 	} else if (rc > 0)
380 		rc = 0;
381 out:
382 	cifs_in_send_dec(server);
383 	return rc;
384 }
385 
386 static int
smb_send_rqst(struct TCP_Server_Info * server,int num_rqst,struct smb_rqst * rqst,int flags)387 smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
388 	      struct smb_rqst *rqst, int flags)
389 {
390 	struct smb2_transform_hdr tr_hdr;
391 	struct smb_rqst new_rqst[MAX_COMPOUND] = {};
392 	struct kvec iov = {
393 		.iov_base = &tr_hdr,
394 		.iov_len = sizeof(tr_hdr),
395 	};
396 	int rc;
397 
398 	if (flags & CIFS_COMPRESS_REQ)
399 		return smb_compress(server, &rqst[0], __smb_send_rqst);
400 
401 	if (!(flags & CIFS_TRANSFORM_REQ))
402 		return __smb_send_rqst(server, num_rqst, rqst);
403 
404 	if (WARN_ON_ONCE(num_rqst > MAX_COMPOUND - 1))
405 		return smb_EIO1(smb_eio_trace_tx_max_compound, num_rqst);
406 
407 	if (!server->ops->init_transform_rq) {
408 		cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n");
409 		return smb_EIO(smb_eio_trace_tx_need_transform);
410 	}
411 
412 	new_rqst[0].rq_iov = &iov;
413 	new_rqst[0].rq_nvec = 1;
414 
415 	rc = server->ops->init_transform_rq(server, num_rqst + 1,
416 					    new_rqst, rqst);
417 	if (!rc) {
418 		rc = __smb_send_rqst(server, num_rqst + 1, new_rqst);
419 		smb3_free_compound_rqst(num_rqst, &new_rqst[1]);
420 	}
421 	return rc;
422 }
423 
424 static int
wait_for_free_credits(struct TCP_Server_Info * server,const int num_credits,const int timeout,const int flags,unsigned int * instance)425 wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
426 		      const int timeout, const int flags,
427 		      unsigned int *instance)
428 {
429 	long rc;
430 	int *credits;
431 	int optype;
432 	long int t;
433 	int scredits, in_flight;
434 
435 	if (timeout < 0)
436 		t = MAX_JIFFY_OFFSET;
437 	else
438 		t = msecs_to_jiffies(timeout);
439 
440 	optype = flags & CIFS_OP_MASK;
441 
442 	*instance = 0;
443 
444 	credits = server->ops->get_credits_field(server, optype);
445 	/* Since an echo is already inflight, no need to wait to send another */
446 	if (*credits <= 0 && optype == CIFS_ECHO_OP)
447 		return -EAGAIN;
448 
449 	spin_lock(&server->req_lock);
450 	if ((flags & CIFS_TIMEOUT_MASK) == CIFS_NON_BLOCKING) {
451 		/* oplock breaks must not be held up */
452 		server->in_flight++;
453 		if (server->in_flight > server->max_in_flight)
454 			server->max_in_flight = server->in_flight;
455 		*credits -= 1;
456 		*instance = server->reconnect_instance;
457 		scredits = *credits;
458 		in_flight = server->in_flight;
459 		spin_unlock(&server->req_lock);
460 
461 		trace_smb3_nblk_credits(server->current_mid,
462 				server->conn_id, server->hostname, scredits, -1, in_flight);
463 		cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
464 				__func__, 1, scredits);
465 
466 		return 0;
467 	}
468 
469 	while (1) {
470 		spin_unlock(&server->req_lock);
471 
472 		spin_lock(&server->srv_lock);
473 		if (server->tcpStatus == CifsExiting) {
474 			spin_unlock(&server->srv_lock);
475 			return -ENOENT;
476 		}
477 		spin_unlock(&server->srv_lock);
478 
479 		spin_lock(&server->req_lock);
480 		if (*credits < num_credits) {
481 			scredits = *credits;
482 			spin_unlock(&server->req_lock);
483 
484 			cifs_num_waiters_inc(server);
485 			rc = wait_event_killable_timeout(server->request_q,
486 				has_credits(server, credits, num_credits), t);
487 			cifs_num_waiters_dec(server);
488 			if (!rc) {
489 				spin_lock(&server->req_lock);
490 				scredits = *credits;
491 				in_flight = server->in_flight;
492 				spin_unlock(&server->req_lock);
493 
494 				trace_smb3_credit_timeout(server->current_mid,
495 						server->conn_id, server->hostname, scredits,
496 						num_credits, in_flight);
497 				cifs_server_dbg(VFS, "wait timed out after %d ms\n",
498 						timeout);
499 				return -EBUSY;
500 			}
501 			if (rc == -ERESTARTSYS)
502 				return -ERESTARTSYS;
503 			spin_lock(&server->req_lock);
504 		} else {
505 			/*
506 			 * For normal commands, reserve the last MAX_COMPOUND
507 			 * credits to compound requests.
508 			 * Otherwise these compounds could be permanently
509 			 * starved for credits by single-credit requests.
510 			 *
511 			 * To prevent spinning CPU, block this thread until
512 			 * there are >MAX_COMPOUND credits available.
513 			 * But only do this is we already have a lot of
514 			 * credits in flight to avoid triggering this check
515 			 * for servers that are slow to hand out credits on
516 			 * new sessions.
517 			 */
518 			if (!optype && num_credits == 1 &&
519 			    server->in_flight > 2 * MAX_COMPOUND &&
520 			    *credits <= MAX_COMPOUND) {
521 				spin_unlock(&server->req_lock);
522 
523 				cifs_num_waiters_inc(server);
524 				rc = wait_event_killable_timeout(
525 					server->request_q,
526 					has_credits(server, credits,
527 						    MAX_COMPOUND + 1),
528 					t);
529 				cifs_num_waiters_dec(server);
530 				if (!rc) {
531 					spin_lock(&server->req_lock);
532 					scredits = *credits;
533 					in_flight = server->in_flight;
534 					spin_unlock(&server->req_lock);
535 
536 					trace_smb3_credit_timeout(
537 							server->current_mid,
538 							server->conn_id, server->hostname,
539 							scredits, num_credits, in_flight);
540 					cifs_server_dbg(VFS, "wait timed out after %d ms\n",
541 							timeout);
542 					return -EBUSY;
543 				}
544 				if (rc == -ERESTARTSYS)
545 					return -ERESTARTSYS;
546 				spin_lock(&server->req_lock);
547 				continue;
548 			}
549 
550 			/*
551 			 * Can not count locking commands against total
552 			 * as they are allowed to block on server.
553 			 */
554 
555 			/* update # of requests on the wire to server */
556 			if ((flags & CIFS_TIMEOUT_MASK) != CIFS_BLOCKING_OP) {
557 				*credits -= num_credits;
558 				server->in_flight += num_credits;
559 				if (server->in_flight > server->max_in_flight)
560 					server->max_in_flight = server->in_flight;
561 				*instance = server->reconnect_instance;
562 			}
563 			scredits = *credits;
564 			in_flight = server->in_flight;
565 			spin_unlock(&server->req_lock);
566 
567 			trace_smb3_waitff_credits(server->current_mid,
568 					server->conn_id, server->hostname, scredits,
569 					-(num_credits), in_flight);
570 			cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
571 					__func__, num_credits, scredits);
572 			break;
573 		}
574 	}
575 	return 0;
576 }
577 
wait_for_free_request(struct TCP_Server_Info * server,const int flags,unsigned int * instance)578 int wait_for_free_request(struct TCP_Server_Info *server, const int flags,
579 			  unsigned int *instance)
580 {
581 	return wait_for_free_credits(server, 1, -1, flags,
582 				     instance);
583 }
584 
585 static int
wait_for_compound_request(struct TCP_Server_Info * server,int num,const int flags,unsigned int * instance)586 wait_for_compound_request(struct TCP_Server_Info *server, int num,
587 			  const int flags, unsigned int *instance)
588 {
589 	int *credits;
590 	int scredits, in_flight;
591 
592 	credits = server->ops->get_credits_field(server, flags & CIFS_OP_MASK);
593 
594 	spin_lock(&server->req_lock);
595 	scredits = *credits;
596 	in_flight = server->in_flight;
597 
598 	if (*credits < num) {
599 		/*
600 		 * If the server is tight on resources or just gives us less
601 		 * credits for other reasons (e.g. requests are coming out of
602 		 * order and the server delays granting more credits until it
603 		 * processes a missing mid) and we exhausted most available
604 		 * credits there may be situations when we try to send
605 		 * a compound request but we don't have enough credits. At this
606 		 * point the client needs to decide if it should wait for
607 		 * additional credits or fail the request. If at least one
608 		 * request is in flight there is a high probability that the
609 		 * server will return enough credits to satisfy this compound
610 		 * request.
611 		 *
612 		 * Return immediately if no requests in flight since we will be
613 		 * stuck on waiting for credits.
614 		 */
615 		if (server->in_flight == 0) {
616 			spin_unlock(&server->req_lock);
617 			trace_smb3_insufficient_credits(server->current_mid,
618 					server->conn_id, server->hostname, scredits,
619 					num, in_flight);
620 			cifs_dbg(FYI, "%s: %d requests in flight, needed %d total=%d\n",
621 					__func__, in_flight, num, scredits);
622 			return -EDEADLK;
623 		}
624 	}
625 	spin_unlock(&server->req_lock);
626 
627 	return wait_for_free_credits(server, num, 60000, flags,
628 				     instance);
629 }
630 
631 int
cifs_wait_mtu_credits(struct TCP_Server_Info * server,size_t size,size_t * num,struct cifs_credits * credits)632 cifs_wait_mtu_credits(struct TCP_Server_Info *server, size_t size,
633 		      size_t *num, struct cifs_credits *credits)
634 {
635 	*num = size;
636 	credits->value = 0;
637 	credits->instance = server->reconnect_instance;
638 	return 0;
639 }
640 
wait_for_response(struct TCP_Server_Info * server,struct mid_q_entry * mid)641 int wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *mid)
642 {
643 	unsigned int sleep_state = TASK_KILLABLE;
644 	int error;
645 
646 	if (mid->sr_flags & CIFS_INTERRUPTIBLE_WAIT)
647 		sleep_state = TASK_INTERRUPTIBLE;
648 
649 	error = wait_event_state(server->response_q,
650 				 mid->mid_state != MID_REQUEST_SUBMITTED &&
651 				 mid->mid_state != MID_RESPONSE_RECEIVED,
652 				 (sleep_state | TASK_FREEZABLE_UNSAFE));
653 	if (error < 0)
654 		return -ERESTARTSYS;
655 
656 	return 0;
657 }
658 
659 /*
660  * Send a SMB request and set the callback function in the mid to handle
661  * the result. Caller is responsible for dealing with timeouts.
662  */
663 int
cifs_call_async(struct TCP_Server_Info * server,struct smb_rqst * rqst,mid_receive_t receive,mid_callback_t callback,mid_handle_t handle,void * cbdata,const int flags,const struct cifs_credits * exist_credits)664 cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
665 		mid_receive_t receive, mid_callback_t callback,
666 		mid_handle_t handle, void *cbdata, const int flags,
667 		const struct cifs_credits *exist_credits)
668 {
669 	int rc;
670 	struct mid_q_entry *mid;
671 	struct cifs_credits credits = { .value = 0, .instance = 0 };
672 	unsigned int instance;
673 	int optype;
674 
675 	optype = flags & CIFS_OP_MASK;
676 
677 	if ((flags & CIFS_HAS_CREDITS) == 0) {
678 		rc = wait_for_free_request(server, flags, &instance);
679 		if (rc)
680 			return rc;
681 		credits.value = 1;
682 		credits.instance = instance;
683 	} else
684 		instance = exist_credits->instance;
685 
686 	cifs_server_lock(server);
687 
688 	/*
689 	 * We can't use credits obtained from the previous session to send this
690 	 * request. Check if there were reconnects after we obtained credits and
691 	 * return -EAGAIN in such cases to let callers handle it.
692 	 */
693 	if (instance != server->reconnect_instance) {
694 		cifs_server_unlock(server);
695 		add_credits_and_wake_if(server, &credits, optype);
696 		return -EAGAIN;
697 	}
698 
699 	mid = server->ops->setup_async_request(server, rqst);
700 	if (IS_ERR(mid)) {
701 		cifs_server_unlock(server);
702 		add_credits_and_wake_if(server, &credits, optype);
703 		return PTR_ERR(mid);
704 	}
705 
706 	mid->sr_flags = flags;
707 	mid->receive = receive;
708 	mid->callback = callback;
709 	mid->callback_data = cbdata;
710 	mid->handle = handle;
711 	mid->mid_state = MID_REQUEST_SUBMITTED;
712 
713 	/* put it on the pending_mid_q */
714 	spin_lock(&server->mid_queue_lock);
715 	list_add_tail(&mid->qhead, &server->pending_mid_q);
716 	spin_unlock(&server->mid_queue_lock);
717 
718 	/*
719 	 * Need to store the time in mid before calling I/O. For call_async,
720 	 * I/O response may come back and free the mid entry on another thread.
721 	 */
722 	cifs_save_when_sent(mid);
723 	rc = smb_send_rqst(server, 1, rqst, flags);
724 
725 	if (rc < 0) {
726 		revert_current_mid(server, mid->credits);
727 		server->sequence_number -= 2;
728 		delete_mid(server, mid);
729 	}
730 
731 	cifs_server_unlock(server);
732 
733 	if (rc == 0)
734 		return 0;
735 
736 	add_credits_and_wake_if(server, &credits, optype);
737 	return rc;
738 }
739 
cifs_sync_mid_result(struct mid_q_entry * mid,struct TCP_Server_Info * server)740 int cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
741 {
742 	int rc = 0;
743 
744 	cifs_dbg(FYI, "%s: cmd=%d mid=%llu state=%d\n",
745 		 __func__, le16_to_cpu(mid->command), mid->mid, mid->mid_state);
746 
747 	spin_lock(&server->mid_queue_lock);
748 	switch (mid->mid_state) {
749 	case MID_RESPONSE_READY:
750 		spin_unlock(&server->mid_queue_lock);
751 		return rc;
752 	case MID_RETRY_NEEDED:
753 		rc = -EAGAIN;
754 		break;
755 	case MID_RESPONSE_MALFORMED:
756 		rc = smb_EIO(smb_eio_trace_rx_sync_mid_malformed);
757 		break;
758 	case MID_SHUTDOWN:
759 		rc = -EHOSTDOWN;
760 		break;
761 	case MID_RC:
762 		rc = mid->mid_rc;
763 		break;
764 	default:
765 		if (mid->deleted_from_q == false) {
766 			list_del_init(&mid->qhead);
767 			mid->deleted_from_q = true;
768 		}
769 		spin_unlock(&server->mid_queue_lock);
770 		cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n",
771 			 __func__, mid->mid, mid->mid_state);
772 		rc = smb_EIO1(smb_eio_trace_rx_sync_mid_invalid, mid->mid_state);
773 		goto sync_mid_done;
774 	}
775 	spin_unlock(&server->mid_queue_lock);
776 
777 sync_mid_done:
778 	release_mid(server, mid);
779 	return rc;
780 }
781 
782 static void
cifs_compound_callback(struct TCP_Server_Info * server,struct mid_q_entry * mid)783 cifs_compound_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
784 {
785 	struct cifs_credits credits = {
786 		.value = server->ops->get_credits(mid),
787 		.instance = server->reconnect_instance,
788 	};
789 
790 	add_credits(server, &credits, mid->optype);
791 
792 	if (mid->mid_state == MID_RESPONSE_RECEIVED)
793 		mid->mid_state = MID_RESPONSE_READY;
794 }
795 
796 static void
cifs_compound_last_callback(struct TCP_Server_Info * server,struct mid_q_entry * mid)797 cifs_compound_last_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
798 {
799 	cifs_compound_callback(server, mid);
800 	cifs_wake_up_task(server, mid);
801 }
802 
803 static void
cifs_cancelled_callback(struct TCP_Server_Info * server,struct mid_q_entry * mid)804 cifs_cancelled_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
805 {
806 	cifs_compound_callback(server, mid);
807 	release_mid(server, mid);
808 }
809 
810 /*
811  * Return a channel (master if none) of @ses that can be used to send
812  * regular requests.
813  *
814  * If we are currently binding a new channel (negprot/sess.setup),
815  * return the new incomplete channel.
816  */
cifs_pick_channel(struct cifs_ses * ses)817 struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
818 {
819 	uint index = 0;
820 	unsigned int min_in_flight = UINT_MAX, max_in_flight = 0;
821 	struct TCP_Server_Info *server = NULL;
822 	int i, start, cur;
823 
824 	if (!ses)
825 		return NULL;
826 
827 	spin_lock(&ses->chan_lock);
828 	start = atomic_inc_return(&ses->chan_seq);
829 	for (i = 0; i < ses->chan_count; i++) {
830 		cur = (start + i) % ses->chan_count;
831 		server = ses->chans[cur].server;
832 		if (!server || server->terminate)
833 			continue;
834 
835 		if (CIFS_CHAN_NEEDS_RECONNECT(ses, cur))
836 			continue;
837 
838 		/*
839 		 * strictly speaking, we should pick up req_lock to read
840 		 * server->in_flight. But it shouldn't matter much here if we
841 		 * race while reading this data. The worst that can happen is
842 		 * that we could use a channel that's not least loaded. Avoiding
843 		 * taking the lock could help reduce wait time, which is
844 		 * important for this function
845 		 */
846 		if (server->in_flight < min_in_flight) {
847 			min_in_flight = server->in_flight;
848 			index = cur;
849 		}
850 		if (server->in_flight > max_in_flight)
851 			max_in_flight = server->in_flight;
852 	}
853 
854 	/* if all channels are equally loaded, fall back to round-robin */
855 	if (min_in_flight == max_in_flight)
856 		index = (uint)start % ses->chan_count;
857 
858 	server = ses->chans[index].server;
859 	spin_unlock(&ses->chan_lock);
860 
861 	return server;
862 }
863 
864 int
compound_send_recv(const unsigned int xid,struct cifs_ses * ses,struct TCP_Server_Info * server,const int flags,const int num_rqst,struct smb_rqst * rqst,int * resp_buf_type,struct kvec * resp_iov)865 compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
866 		   struct TCP_Server_Info *server,
867 		   const int flags, const int num_rqst, struct smb_rqst *rqst,
868 		   int *resp_buf_type, struct kvec *resp_iov)
869 {
870 	int i, j, optype, rc = 0;
871 	struct mid_q_entry *mid[MAX_COMPOUND];
872 	bool cancelled_mid[MAX_COMPOUND] = {false};
873 	struct cifs_credits credits[MAX_COMPOUND] = {
874 		{ .value = 0, .instance = 0 }
875 	};
876 	unsigned int instance;
877 	char *buf;
878 
879 	optype = flags & CIFS_OP_MASK;
880 
881 	for (i = 0; i < num_rqst; i++)
882 		resp_buf_type[i] = CIFS_NO_BUFFER;  /* no response buf yet */
883 
884 	if (!ses || !ses->server || !server) {
885 		cifs_dbg(VFS, "Null session\n");
886 		return smb_EIO(smb_eio_trace_null_pointers);
887 	}
888 
889 	spin_lock(&server->srv_lock);
890 	if (server->tcpStatus == CifsExiting) {
891 		spin_unlock(&server->srv_lock);
892 		return -ENOENT;
893 	}
894 	spin_unlock(&server->srv_lock);
895 
896 	/*
897 	 * Wait for all the requests to become available.
898 	 * This approach still leaves the possibility to be stuck waiting for
899 	 * credits if the server doesn't grant credits to the outstanding
900 	 * requests and if the client is completely idle, not generating any
901 	 * other requests.
902 	 * This can be handled by the eventual session reconnect.
903 	 */
904 	rc = wait_for_compound_request(server, num_rqst, flags,
905 				       &instance);
906 	if (rc)
907 		return rc;
908 
909 	for (i = 0; i < num_rqst; i++) {
910 		credits[i].value = 1;
911 		credits[i].instance = instance;
912 	}
913 
914 	/*
915 	 * Make sure that we sign in the same order that we send on this socket
916 	 * and avoid races inside tcp sendmsg code that could cause corruption
917 	 * of smb data.
918 	 */
919 
920 	cifs_server_lock(server);
921 
922 	/*
923 	 * All the parts of the compound chain belong obtained credits from the
924 	 * same session. We can not use credits obtained from the previous
925 	 * session to send this request. Check if there were reconnects after
926 	 * we obtained credits and return -EAGAIN in such cases to let callers
927 	 * handle it.
928 	 */
929 	if (instance != server->reconnect_instance) {
930 		cifs_server_unlock(server);
931 		for (j = 0; j < num_rqst; j++)
932 			add_credits(server, &credits[j], optype);
933 		return -EAGAIN;
934 	}
935 
936 	for (i = 0; i < num_rqst; i++) {
937 		mid[i] = server->ops->setup_request(ses, server, &rqst[i]);
938 		if (IS_ERR(mid[i])) {
939 			revert_current_mid(server, i);
940 			for (j = 0; j < i; j++)
941 				delete_mid(server, mid[j]);
942 			cifs_server_unlock(server);
943 
944 			/* Update # of requests on wire to server */
945 			for (j = 0; j < num_rqst; j++)
946 				add_credits(server, &credits[j], optype);
947 			return PTR_ERR(mid[i]);
948 		}
949 
950 		mid[i]->sr_flags = flags;
951 		mid[i]->mid_state = MID_REQUEST_SUBMITTED;
952 		mid[i]->optype = optype;
953 		/*
954 		 * Invoke callback for every part of the compound chain
955 		 * to calculate credits properly. Wake up this thread only when
956 		 * the last element is received.
957 		 */
958 		if (i < num_rqst - 1)
959 			mid[i]->callback = cifs_compound_callback;
960 		else
961 			mid[i]->callback = cifs_compound_last_callback;
962 	}
963 	rc = smb_send_rqst(server, num_rqst, rqst, flags);
964 
965 	for (i = 0; i < num_rqst; i++)
966 		cifs_save_when_sent(mid[i]);
967 
968 	if (rc < 0) {
969 		revert_current_mid(server, num_rqst);
970 		server->sequence_number -= 2;
971 	}
972 
973 	cifs_server_unlock(server);
974 
975 	/*
976 	 * If sending failed for some reason or it is an oplock break that we
977 	 * will not receive a response to - return credits back
978 	 */
979 	if (rc < 0 || (flags & CIFS_NO_SRV_RSP)) {
980 		for (i = 0; i < num_rqst; i++)
981 			add_credits(server, &credits[i], optype);
982 		goto out;
983 	}
984 
985 	/*
986 	 * At this point the request is passed to the network stack - we assume
987 	 * that any credits taken from the server structure on the client have
988 	 * been spent and we can't return them back. Once we receive responses
989 	 * we will collect credits granted by the server in the mid callbacks
990 	 * and add those credits to the server structure.
991 	 */
992 
993 	/*
994 	 * Compounding is never used during session establish.
995 	 */
996 	spin_lock(&ses->ses_lock);
997 	if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
998 		spin_unlock(&ses->ses_lock);
999 
1000 		if (WARN_ON_ONCE(num_rqst != 1 || !resp_iov))
1001 			return -EINVAL;
1002 
1003 		cifs_server_lock(server);
1004 		smb311_update_preauth_hash(ses, server, rqst[0].rq_iov, rqst[0].rq_nvec);
1005 		cifs_server_unlock(server);
1006 
1007 		spin_lock(&ses->ses_lock);
1008 	}
1009 	spin_unlock(&ses->ses_lock);
1010 
1011 	for (i = 0; i < num_rqst; i++) {
1012 		rc = wait_for_response(server, mid[i]);
1013 		if (rc != 0)
1014 			break;
1015 	}
1016 	if (rc != 0) {
1017 		for (; i < num_rqst; i++) {
1018 			cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n",
1019 				 mid[i]->mid, le16_to_cpu(mid[i]->command));
1020 			send_cancel(ses, server, &rqst[i], mid[i], xid);
1021 			spin_lock(&mid[i]->mid_lock);
1022 			mid[i]->wait_cancelled = true;
1023 			if (mid[i]->mid_state == MID_REQUEST_SUBMITTED ||
1024 			    mid[i]->mid_state == MID_RESPONSE_RECEIVED) {
1025 				mid[i]->callback = cifs_cancelled_callback;
1026 				cancelled_mid[i] = true;
1027 				credits[i].value = 0;
1028 			}
1029 			spin_unlock(&mid[i]->mid_lock);
1030 		}
1031 	}
1032 
1033 	for (i = 0; i < num_rqst; i++) {
1034 		if (rc < 0)
1035 			goto out;
1036 
1037 		rc = cifs_sync_mid_result(mid[i], server);
1038 		if (rc != 0) {
1039 			/* mark this mid as cancelled to not free it below */
1040 			cancelled_mid[i] = true;
1041 			goto out;
1042 		}
1043 
1044 		if (!mid[i]->resp_buf ||
1045 		    mid[i]->mid_state != MID_RESPONSE_READY) {
1046 			rc = smb_EIO1(smb_eio_trace_rx_mid_unready, mid[i]->mid_state);
1047 			cifs_dbg(FYI, "Bad MID state?\n");
1048 			goto out;
1049 		}
1050 
1051 		rc = server->ops->check_receive(mid[i], server,
1052 						flags & CIFS_LOG_ERROR);
1053 
1054 		if (resp_iov) {
1055 			buf = (char *)mid[i]->resp_buf;
1056 			resp_iov[i].iov_base = buf;
1057 			resp_iov[i].iov_len = mid[i]->resp_buf_size;
1058 
1059 			if (mid[i]->large_buf)
1060 				resp_buf_type[i] = CIFS_LARGE_BUFFER;
1061 			else
1062 				resp_buf_type[i] = CIFS_SMALL_BUFFER;
1063 
1064 			/* mark it so buf will not be freed by delete_mid */
1065 			if ((flags & CIFS_NO_RSP_BUF) == 0)
1066 				mid[i]->resp_buf = NULL;
1067 		}
1068 	}
1069 
1070 	/*
1071 	 * Compounding is never used during session establish.
1072 	 */
1073 	spin_lock(&ses->ses_lock);
1074 	if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
1075 		struct kvec iov = {
1076 			.iov_base = resp_iov[0].iov_base,
1077 			.iov_len = resp_iov[0].iov_len
1078 		};
1079 		spin_unlock(&ses->ses_lock);
1080 		cifs_server_lock(server);
1081 		smb311_update_preauth_hash(ses, server, &iov, 1);
1082 		cifs_server_unlock(server);
1083 		spin_lock(&ses->ses_lock);
1084 	}
1085 	spin_unlock(&ses->ses_lock);
1086 
1087 out:
1088 	/*
1089 	 * This will dequeue all mids. After this it is important that the
1090 	 * demultiplex_thread will not process any of these mids any further.
1091 	 * This is prevented above by using a noop callback that will not
1092 	 * wake this thread except for the very last PDU.
1093 	 */
1094 	for (i = 0; i < num_rqst; i++) {
1095 		if (!cancelled_mid[i])
1096 			delete_mid(server, mid[i]);
1097 	}
1098 
1099 	return rc;
1100 }
1101 
1102 int
cifs_send_recv(const unsigned int xid,struct cifs_ses * ses,struct TCP_Server_Info * server,struct smb_rqst * rqst,int * resp_buf_type,const int flags,struct kvec * resp_iov)1103 cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
1104 	       struct TCP_Server_Info *server,
1105 	       struct smb_rqst *rqst, int *resp_buf_type, const int flags,
1106 	       struct kvec *resp_iov)
1107 {
1108 	return compound_send_recv(xid, ses, server, flags, 1,
1109 				  rqst, resp_buf_type, resp_iov);
1110 }
1111 
1112 
1113 /*
1114  * Discard any remaining data in the current SMB. To do this, we borrow the
1115  * current bigbuf.
1116  */
1117 int
cifs_discard_remaining_data(struct TCP_Server_Info * server)1118 cifs_discard_remaining_data(struct TCP_Server_Info *server)
1119 {
1120 	unsigned int rfclen = server->pdu_size;
1121 	size_t remaining = rfclen - server->total_read;
1122 
1123 	while (remaining > 0) {
1124 		ssize_t length;
1125 
1126 		length = cifs_discard_from_socket(server,
1127 				min_t(size_t, remaining,
1128 				      CIFSMaxBufSize + MAX_HEADER_SIZE(server)));
1129 		if (length < 0)
1130 			return length;
1131 		server->total_read += length;
1132 		remaining -= length;
1133 	}
1134 
1135 	return 0;
1136 }
1137 
1138 static int
__cifs_readv_discard(struct TCP_Server_Info * server,struct mid_q_entry * mid,bool malformed)1139 __cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid,
1140 		     bool malformed)
1141 {
1142 	int length;
1143 
1144 	length = cifs_discard_remaining_data(server);
1145 	dequeue_mid(server, mid, malformed);
1146 	mid->resp_buf = server->smallbuf;
1147 	server->smallbuf = NULL;
1148 	return length;
1149 }
1150 
1151 static int
cifs_readv_discard(struct TCP_Server_Info * server,struct mid_q_entry * mid)1152 cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1153 {
1154 	struct cifs_io_subrequest *rdata = mid->callback_data;
1155 
1156 	return  __cifs_readv_discard(server, mid, rdata->result);
1157 }
1158 
1159 int
cifs_readv_receive(struct TCP_Server_Info * server,struct mid_q_entry * mid)1160 cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1161 {
1162 	int length, len;
1163 	unsigned int data_offset, data_len;
1164 	struct cifs_io_subrequest *rdata = mid->callback_data;
1165 	char *buf = server->smallbuf;
1166 	unsigned int buflen = server->pdu_size;
1167 	bool use_rdma_mr = false;
1168 
1169 	cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%zu\n",
1170 		 __func__, mid->mid, rdata->subreq.start, rdata->subreq.len);
1171 
1172 	/*
1173 	 * read the rest of READ_RSP header (sans Data array), or whatever we
1174 	 * can if there's not enough data. At this point, we've read down to
1175 	 * the Mid.
1176 	 */
1177 	len = min_t(unsigned int, buflen, server->vals->read_rsp_size) -
1178 							HEADER_SIZE(server) + 1;
1179 
1180 	length = cifs_read_from_socket(server,
1181 				       buf + HEADER_SIZE(server) - 1, len);
1182 	if (length < 0)
1183 		return length;
1184 	server->total_read += length;
1185 
1186 	if (server->ops->is_session_expired &&
1187 	    server->ops->is_session_expired(buf)) {
1188 		cifs_reconnect(server, true);
1189 		return -1;
1190 	}
1191 
1192 	if (server->ops->is_status_pending &&
1193 	    server->ops->is_status_pending(buf, server)) {
1194 		cifs_discard_remaining_data(server);
1195 		return -1;
1196 	}
1197 
1198 	/* set up first two iov for signature check and to get credits */
1199 	rdata->iov[0].iov_base = buf;
1200 	rdata->iov[0].iov_len = server->total_read;
1201 	cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
1202 		 rdata->iov[0].iov_base, rdata->iov[0].iov_len);
1203 
1204 	/* Was the SMB read successful? */
1205 	rdata->result = server->ops->map_error(buf, false);
1206 	if (rdata->result != 0) {
1207 		cifs_dbg(FYI, "%s: server returned error %d\n",
1208 			 __func__, rdata->result);
1209 		/* normal error on read response */
1210 		return __cifs_readv_discard(server, mid, false);
1211 	}
1212 
1213 	/* Is there enough to get to the rest of the READ_RSP header? */
1214 	if (server->total_read < server->vals->read_rsp_size) {
1215 		cifs_dbg(FYI, "%s: server returned short header. got=%u expected=%zu\n",
1216 			 __func__, server->total_read,
1217 			 server->vals->read_rsp_size);
1218 		rdata->result = smb_EIO2(smb_eio_trace_read_rsp_short,
1219 					 server->total_read, server->vals->read_rsp_size);
1220 		return cifs_readv_discard(server, mid);
1221 	}
1222 
1223 	data_offset = server->ops->read_data_offset(buf);
1224 	if (data_offset < server->total_read) {
1225 		/*
1226 		 * win2k8 sometimes sends an offset of 0 when the read
1227 		 * is beyond the EOF. Treat it as if the data starts just after
1228 		 * the header.
1229 		 */
1230 		cifs_dbg(FYI, "%s: data offset (%u) inside read response header\n",
1231 			 __func__, data_offset);
1232 		data_offset = server->total_read;
1233 	} else if (data_offset > MAX_CIFS_SMALL_BUFFER_SIZE) {
1234 		/* data_offset is beyond the end of smallbuf */
1235 		cifs_dbg(FYI, "%s: data offset (%u) beyond end of smallbuf\n",
1236 			 __func__, data_offset);
1237 		rdata->result = smb_EIO1(smb_eio_trace_read_overlarge,
1238 					 data_offset);
1239 		return cifs_readv_discard(server, mid);
1240 	}
1241 
1242 	cifs_dbg(FYI, "%s: total_read=%u data_offset=%u\n",
1243 		 __func__, server->total_read, data_offset);
1244 
1245 	len = data_offset - server->total_read;
1246 	if (len > 0) {
1247 		/* read any junk before data into the rest of smallbuf */
1248 		length = cifs_read_from_socket(server,
1249 					       buf + server->total_read, len);
1250 		if (length < 0)
1251 			return length;
1252 		server->total_read += length;
1253 		rdata->iov[0].iov_len = server->total_read;
1254 	}
1255 
1256 	/* how much data is in the response? */
1257 #ifdef CONFIG_CIFS_SMB_DIRECT
1258 	use_rdma_mr = rdata->mr;
1259 #endif
1260 	data_len = server->ops->read_data_length(buf, use_rdma_mr);
1261 	if (!use_rdma_mr && (data_offset + data_len > buflen)) {
1262 		/* data_len is corrupt -- discard frame */
1263 		rdata->result = smb_EIO2(smb_eio_trace_read_rsp_malformed,
1264 					 data_offset + data_len, buflen);
1265 		return cifs_readv_discard(server, mid);
1266 	}
1267 
1268 #ifdef CONFIG_CIFS_SMB_DIRECT
1269 	if (rdata->mr)
1270 		length = data_len; /* An RDMA read is already done. */
1271 	else
1272 #endif
1273 		length = cifs_read_iter_from_socket(server, &rdata->subreq.io_iter,
1274 						    data_len);
1275 	if (length > 0)
1276 		rdata->got_bytes += length;
1277 	server->total_read += length;
1278 
1279 	cifs_dbg(FYI, "total_read=%u buflen=%u remaining=%u\n",
1280 		 server->total_read, buflen, data_len);
1281 
1282 	/* discard anything left over */
1283 	if (server->total_read < buflen)
1284 		return cifs_readv_discard(server, mid);
1285 
1286 	dequeue_mid(server, mid, false);
1287 	mid->resp_buf = server->smallbuf;
1288 	server->smallbuf = NULL;
1289 	return length;
1290 }
1291