xref: /freebsd/sys/ofed/drivers/infiniband/ulp/sdp/sdp_zcopy.c (revision 6de6c9569169a6a58ebd4ce5b4478b383eaa87dc)
1fe267a55SPedro F. Giffuni /*-
2fe267a55SPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3fe267a55SPedro F. Giffuni  *
4aa0a1e58SJeff Roberson  * Copyright (c) 2006 Mellanox Technologies Ltd.  All rights reserved.
5aa0a1e58SJeff Roberson  *
6aa0a1e58SJeff Roberson  * This software is available to you under a choice of one of two
7aa0a1e58SJeff Roberson  * licenses.  You may choose to be licensed under the terms of the GNU
8aa0a1e58SJeff Roberson  * General Public License (GPL) Version 2, available from the file
9aa0a1e58SJeff Roberson  * COPYING in the main directory of this source tree, or the
10aa0a1e58SJeff Roberson  * OpenIB.org BSD license below:
11aa0a1e58SJeff Roberson  *
12aa0a1e58SJeff Roberson  *     Redistribution and use in source and binary forms, with or
13aa0a1e58SJeff Roberson  *     without modification, are permitted provided that the following
14aa0a1e58SJeff Roberson  *     conditions are met:
15aa0a1e58SJeff Roberson  *
16aa0a1e58SJeff Roberson  *      - Redistributions of source code must retain the above
17aa0a1e58SJeff Roberson  *        copyright notice, this list of conditions and the following
18aa0a1e58SJeff Roberson  *        disclaimer.
19aa0a1e58SJeff Roberson  *
20aa0a1e58SJeff Roberson  *      - Redistributions in binary form must reproduce the above
21aa0a1e58SJeff Roberson  *        copyright notice, this list of conditions and the following
22aa0a1e58SJeff Roberson  *        disclaimer in the documentation and/or other materials
23aa0a1e58SJeff Roberson  *        provided with the distribution.
24aa0a1e58SJeff Roberson  *
25aa0a1e58SJeff Roberson  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26aa0a1e58SJeff Roberson  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27aa0a1e58SJeff Roberson  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28aa0a1e58SJeff Roberson  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29aa0a1e58SJeff Roberson  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30aa0a1e58SJeff Roberson  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31aa0a1e58SJeff Roberson  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32aa0a1e58SJeff Roberson  * SOFTWARE.
33aa0a1e58SJeff Roberson  */
34aa0a1e58SJeff Roberson #include <linux/tcp.h>
35aa0a1e58SJeff Roberson #include <asm/ioctls.h>
36aa0a1e58SJeff Roberson #include <linux/workqueue.h>
37aa0a1e58SJeff Roberson #include <linux/net.h>
38aa0a1e58SJeff Roberson #include <linux/socket.h>
39aa0a1e58SJeff Roberson #include <net/protocol.h>
40aa0a1e58SJeff Roberson #include <net/inet_common.h>
41aa0a1e58SJeff Roberson #include <rdma/rdma_cm.h>
42aa0a1e58SJeff Roberson #include <rdma/ib_verbs.h>
43aa0a1e58SJeff Roberson #include <rdma/ib_fmr_pool.h>
44aa0a1e58SJeff Roberson #include <rdma/ib_umem.h>
45aa0a1e58SJeff Roberson #include <net/tcp.h> /* for memcpy_toiovec */
46aa0a1e58SJeff Roberson #include <asm/io.h>
47aa0a1e58SJeff Roberson #include <asm/uaccess.h>
48aa0a1e58SJeff Roberson #include <linux/delay.h>
49aa0a1e58SJeff Roberson #include "sdp.h"
50aa0a1e58SJeff Roberson 
sdp_post_srcavail(struct socket * sk,struct tx_srcavail_state * tx_sa)51aa0a1e58SJeff Roberson static int sdp_post_srcavail(struct socket *sk, struct tx_srcavail_state *tx_sa)
52aa0a1e58SJeff Roberson {
53aa0a1e58SJeff Roberson 	struct sdp_sock *ssk = sdp_sk(sk);
54aa0a1e58SJeff Roberson 	struct mbuf *mb;
55aa0a1e58SJeff Roberson 	int payload_len;
56aa0a1e58SJeff Roberson 	struct page *payload_pg;
57aa0a1e58SJeff Roberson 	int off, len;
58aa0a1e58SJeff Roberson 	struct ib_umem_chunk *chunk;
59aa0a1e58SJeff Roberson 
60aa0a1e58SJeff Roberson 	WARN_ON(ssk->tx_sa);
61aa0a1e58SJeff Roberson 
62aa0a1e58SJeff Roberson 	BUG_ON(!tx_sa);
63aa0a1e58SJeff Roberson 	BUG_ON(!tx_sa->fmr || !tx_sa->fmr->fmr->lkey);
64aa0a1e58SJeff Roberson 	BUG_ON(!tx_sa->umem);
65aa0a1e58SJeff Roberson 	BUG_ON(!tx_sa->umem->chunk_list.next);
66aa0a1e58SJeff Roberson 
67aa0a1e58SJeff Roberson 	chunk = list_entry(tx_sa->umem->chunk_list.next, struct ib_umem_chunk, list);
68aa0a1e58SJeff Roberson 	BUG_ON(!chunk->nmap);
69aa0a1e58SJeff Roberson 
70aa0a1e58SJeff Roberson 	off = tx_sa->umem->offset;
71aa0a1e58SJeff Roberson 	len = tx_sa->umem->length;
72aa0a1e58SJeff Roberson 
73aa0a1e58SJeff Roberson 	tx_sa->bytes_sent = tx_sa->bytes_acked = 0;
74aa0a1e58SJeff Roberson 
75aa0a1e58SJeff Roberson 	mb = sdp_alloc_mb_srcavail(sk, len, tx_sa->fmr->fmr->lkey, off, 0);
76aa0a1e58SJeff Roberson 	if (!mb) {
77aa0a1e58SJeff Roberson 		return -ENOMEM;
78aa0a1e58SJeff Roberson 	}
79aa0a1e58SJeff Roberson 	sdp_dbg_data(sk, "sending SrcAvail\n");
80aa0a1e58SJeff Roberson 
81aa0a1e58SJeff Roberson 	TX_SRCAVAIL_STATE(mb) = tx_sa; /* tx_sa is hanged on the mb
82aa0a1e58SJeff Roberson 					 * but continue to live after mb is freed */
83aa0a1e58SJeff Roberson 	ssk->tx_sa = tx_sa;
84aa0a1e58SJeff Roberson 
85aa0a1e58SJeff Roberson 	/* must have payload inlined in SrcAvail packet in combined mode */
86aa0a1e58SJeff Roberson 	payload_len = MIN(tx_sa->umem->page_size - off, len);
87aa0a1e58SJeff Roberson 	payload_len = MIN(payload_len, ssk->xmit_size_goal - sizeof(struct sdp_srcah));
88aa0a1e58SJeff Roberson 	payload_pg  = sg_page(&chunk->page_list[0]);
89aa0a1e58SJeff Roberson 	get_page(payload_pg);
90aa0a1e58SJeff Roberson 
91aa0a1e58SJeff Roberson 	sdp_dbg_data(sk, "payload: off: 0x%x, pg: %p, len: 0x%x\n",
92aa0a1e58SJeff Roberson 		off, payload_pg, payload_len);
93aa0a1e58SJeff Roberson 
94aa0a1e58SJeff Roberson 	mb_fill_page_desc(mb, mb_shinfo(mb)->nr_frags,
95aa0a1e58SJeff Roberson 			payload_pg, off, payload_len);
96aa0a1e58SJeff Roberson 
97aa0a1e58SJeff Roberson 	mb->len             += payload_len;
98aa0a1e58SJeff Roberson 	mb->data_len         = payload_len;
99aa0a1e58SJeff Roberson 	mb->truesize        += payload_len;
100aa0a1e58SJeff Roberson //	sk->sk_wmem_queued   += payload_len;
101aa0a1e58SJeff Roberson //	sk->sk_forward_alloc -= payload_len;
102aa0a1e58SJeff Roberson 
103aa0a1e58SJeff Roberson 	mb_entail(sk, ssk, mb);
104aa0a1e58SJeff Roberson 
105aa0a1e58SJeff Roberson 	ssk->write_seq += payload_len;
106aa0a1e58SJeff Roberson 	SDP_SKB_CB(mb)->end_seq += payload_len;
107aa0a1e58SJeff Roberson 
108aa0a1e58SJeff Roberson 	tx_sa->bytes_sent = tx_sa->umem->length;
109aa0a1e58SJeff Roberson 	tx_sa->bytes_acked = payload_len;
110aa0a1e58SJeff Roberson 
111aa0a1e58SJeff Roberson 	/* TODO: pushing the mb into the tx_queue should be enough */
112aa0a1e58SJeff Roberson 
113aa0a1e58SJeff Roberson 	return 0;
114aa0a1e58SJeff Roberson }
115aa0a1e58SJeff Roberson 
sdp_post_srcavail_cancel(struct socket * sk)116aa0a1e58SJeff Roberson static int sdp_post_srcavail_cancel(struct socket *sk)
117aa0a1e58SJeff Roberson {
118aa0a1e58SJeff Roberson 	struct sdp_sock *ssk = sdp_sk(sk);
119aa0a1e58SJeff Roberson 	struct mbuf *mb;
120aa0a1e58SJeff Roberson 
121aa0a1e58SJeff Roberson 	sdp_dbg_data(ssk->socket, "Posting srcavail cancel\n");
122aa0a1e58SJeff Roberson 
123aa0a1e58SJeff Roberson 	mb = sdp_alloc_mb_srcavail_cancel(sk, 0);
124aa0a1e58SJeff Roberson 	mb_entail(sk, ssk, mb);
125aa0a1e58SJeff Roberson 
126aa0a1e58SJeff Roberson 	sdp_post_sends(ssk, 0);
127aa0a1e58SJeff Roberson 
128aa0a1e58SJeff Roberson 	schedule_delayed_work(&ssk->srcavail_cancel_work,
129aa0a1e58SJeff Roberson 			SDP_SRCAVAIL_CANCEL_TIMEOUT);
130aa0a1e58SJeff Roberson 
131aa0a1e58SJeff Roberson 	return 0;
132aa0a1e58SJeff Roberson }
133aa0a1e58SJeff Roberson 
srcavail_cancel_timeout(struct work_struct * work)134aa0a1e58SJeff Roberson void srcavail_cancel_timeout(struct work_struct *work)
135aa0a1e58SJeff Roberson {
136aa0a1e58SJeff Roberson 	struct sdp_sock *ssk =
137aa0a1e58SJeff Roberson 		container_of(work, struct sdp_sock, srcavail_cancel_work.work);
138aa0a1e58SJeff Roberson 	struct socket *sk = ssk->socket;
139aa0a1e58SJeff Roberson 
140aa0a1e58SJeff Roberson 	lock_sock(sk);
141aa0a1e58SJeff Roberson 
142aa0a1e58SJeff Roberson 	sdp_dbg_data(sk, "both SrcAvail and SrcAvailCancel timedout."
143aa0a1e58SJeff Roberson 			" closing connection\n");
144aa0a1e58SJeff Roberson 	sdp_set_error(sk, -ECONNRESET);
145aa0a1e58SJeff Roberson 	wake_up(&ssk->wq);
146aa0a1e58SJeff Roberson 
147aa0a1e58SJeff Roberson 	release_sock(sk);
148aa0a1e58SJeff Roberson }
149aa0a1e58SJeff Roberson 
sdp_wait_rdmardcompl(struct sdp_sock * ssk,long * timeo_p,int ignore_signals)150aa0a1e58SJeff Roberson static int sdp_wait_rdmardcompl(struct sdp_sock *ssk, long *timeo_p,
151aa0a1e58SJeff Roberson 		int ignore_signals)
152aa0a1e58SJeff Roberson {
153aa0a1e58SJeff Roberson 	struct socket *sk = ssk->socket;
154aa0a1e58SJeff Roberson 	int err = 0;
155aa0a1e58SJeff Roberson 	long vm_wait = 0;
156aa0a1e58SJeff Roberson 	long current_timeo = *timeo_p;
157aa0a1e58SJeff Roberson 	struct tx_srcavail_state *tx_sa = ssk->tx_sa;
158aa0a1e58SJeff Roberson 	DEFINE_WAIT(wait);
159aa0a1e58SJeff Roberson 
160aa0a1e58SJeff Roberson 	sdp_dbg_data(sk, "sleep till RdmaRdCompl. timeo = %ld.\n", *timeo_p);
161aa0a1e58SJeff Roberson 	sdp_prf1(sk, NULL, "Going to sleep");
162aa0a1e58SJeff Roberson 	while (ssk->qp_active) {
163aa0a1e58SJeff Roberson 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
164aa0a1e58SJeff Roberson 
165aa0a1e58SJeff Roberson 		if (unlikely(!*timeo_p)) {
166aa0a1e58SJeff Roberson 			err = -ETIME;
167aa0a1e58SJeff Roberson 			tx_sa->abort_flags |= TX_SA_TIMEDOUT;
168aa0a1e58SJeff Roberson 			sdp_prf1(sk, NULL, "timeout");
169aa0a1e58SJeff Roberson 			SDPSTATS_COUNTER_INC(zcopy_tx_timeout);
170aa0a1e58SJeff Roberson 			break;
171aa0a1e58SJeff Roberson 		}
172aa0a1e58SJeff Roberson 
173aa0a1e58SJeff Roberson 		else if (tx_sa->bytes_acked > tx_sa->bytes_sent) {
174aa0a1e58SJeff Roberson 			err = -EINVAL;
175aa0a1e58SJeff Roberson 			sdp_dbg_data(sk, "acked bytes > sent bytes\n");
176aa0a1e58SJeff Roberson 			tx_sa->abort_flags |= TX_SA_ERROR;
177aa0a1e58SJeff Roberson 			break;
178aa0a1e58SJeff Roberson 		}
179aa0a1e58SJeff Roberson 
180aa0a1e58SJeff Roberson 		if (tx_sa->abort_flags & TX_SA_SENDSM) {
181aa0a1e58SJeff Roberson 			sdp_prf1(sk, NULL, "Aborting SrcAvail sending");
182aa0a1e58SJeff Roberson 			SDPSTATS_COUNTER_INC(zcopy_tx_aborted);
183aa0a1e58SJeff Roberson 			err = -EAGAIN;
184aa0a1e58SJeff Roberson 			break ;
185aa0a1e58SJeff Roberson 		}
186aa0a1e58SJeff Roberson 
187aa0a1e58SJeff Roberson 		if (!ignore_signals) {
188aa0a1e58SJeff Roberson 			if (signal_pending(current)) {
189aa0a1e58SJeff Roberson 				err = -EINTR;
190aa0a1e58SJeff Roberson 				sdp_prf1(sk, NULL, "signalled");
191aa0a1e58SJeff Roberson 				tx_sa->abort_flags |= TX_SA_INTRRUPTED;
192aa0a1e58SJeff Roberson 				break;
193aa0a1e58SJeff Roberson 			}
194aa0a1e58SJeff Roberson 
195aa0a1e58SJeff Roberson 			if (ssk->rx_sa && (tx_sa->bytes_acked < tx_sa->bytes_sent)) {
196aa0a1e58SJeff Roberson 				sdp_dbg_data(sk, "Crossing SrcAvail - aborting this\n");
197aa0a1e58SJeff Roberson 				tx_sa->abort_flags |= TX_SA_CROSS_SEND;
198aa0a1e58SJeff Roberson 				SDPSTATS_COUNTER_INC(zcopy_cross_send);
199aa0a1e58SJeff Roberson 				err = -ETIME;
200aa0a1e58SJeff Roberson 				break ;
201aa0a1e58SJeff Roberson 			}
202aa0a1e58SJeff Roberson 		}
203aa0a1e58SJeff Roberson 
204aa0a1e58SJeff Roberson 		posts_handler_put(ssk);
205aa0a1e58SJeff Roberson 
206aa0a1e58SJeff Roberson 		sk_wait_event(sk, &current_timeo,
207aa0a1e58SJeff Roberson 				tx_sa->abort_flags &&
208aa0a1e58SJeff Roberson 				ssk->rx_sa &&
209aa0a1e58SJeff Roberson 				(tx_sa->bytes_acked < tx_sa->bytes_sent) &&
210aa0a1e58SJeff Roberson 				vm_wait);
211aa0a1e58SJeff Roberson 		sdp_dbg_data(ssk->socket, "woke up sleepers\n");
212aa0a1e58SJeff Roberson 
213aa0a1e58SJeff Roberson 		posts_handler_get(ssk);
214aa0a1e58SJeff Roberson 
215aa0a1e58SJeff Roberson 		if (tx_sa->bytes_acked == tx_sa->bytes_sent)
216aa0a1e58SJeff Roberson 			break;
217aa0a1e58SJeff Roberson 
218aa0a1e58SJeff Roberson 		if (vm_wait) {
219aa0a1e58SJeff Roberson 			vm_wait -= current_timeo;
220aa0a1e58SJeff Roberson 			current_timeo = *timeo_p;
221aa0a1e58SJeff Roberson 			if (current_timeo != MAX_SCHEDULE_TIMEOUT &&
222aa0a1e58SJeff Roberson 			    (current_timeo -= vm_wait) < 0)
223aa0a1e58SJeff Roberson 				current_timeo = 0;
224aa0a1e58SJeff Roberson 			vm_wait = 0;
225aa0a1e58SJeff Roberson 		}
226aa0a1e58SJeff Roberson 		*timeo_p = current_timeo;
227aa0a1e58SJeff Roberson 	}
228aa0a1e58SJeff Roberson 
229aa0a1e58SJeff Roberson 	finish_wait(sk->sk_sleep, &wait);
230aa0a1e58SJeff Roberson 
231aa0a1e58SJeff Roberson 	sdp_dbg_data(sk, "Finished waiting - RdmaRdCompl: %d/%d bytes, flags: 0x%x\n",
232aa0a1e58SJeff Roberson 			tx_sa->bytes_acked, tx_sa->bytes_sent, tx_sa->abort_flags);
233aa0a1e58SJeff Roberson 
234aa0a1e58SJeff Roberson 	if (!ssk->qp_active) {
235aa0a1e58SJeff Roberson 		sdp_dbg(sk, "QP destroyed while waiting\n");
236aa0a1e58SJeff Roberson 		return -EINVAL;
237aa0a1e58SJeff Roberson 	}
238aa0a1e58SJeff Roberson 	return err;
239aa0a1e58SJeff Roberson }
240aa0a1e58SJeff Roberson 
sdp_wait_rdma_wr_finished(struct sdp_sock * ssk)241aa0a1e58SJeff Roberson static void sdp_wait_rdma_wr_finished(struct sdp_sock *ssk)
242aa0a1e58SJeff Roberson {
243aa0a1e58SJeff Roberson 	struct socket *sk = ssk->socket;
244*6de6c956SGordon Bergling 	long timeo = HZ * 5; /* Timeout for RDMA read */
245aa0a1e58SJeff Roberson 	DEFINE_WAIT(wait);
246aa0a1e58SJeff Roberson 
247aa0a1e58SJeff Roberson 	sdp_dbg_data(sk, "Sleep till RDMA wr finished.\n");
248aa0a1e58SJeff Roberson 	while (1) {
249aa0a1e58SJeff Roberson 		prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
250aa0a1e58SJeff Roberson 
251aa0a1e58SJeff Roberson 		if (!ssk->tx_ring.rdma_inflight->busy) {
252aa0a1e58SJeff Roberson 			sdp_dbg_data(sk, "got rdma cqe\n");
253aa0a1e58SJeff Roberson 			break;
254aa0a1e58SJeff Roberson 		}
255aa0a1e58SJeff Roberson 
256aa0a1e58SJeff Roberson 		if (!ssk->qp_active) {
257aa0a1e58SJeff Roberson 			sdp_dbg_data(sk, "QP destroyed\n");
258aa0a1e58SJeff Roberson 			break;
259aa0a1e58SJeff Roberson 		}
260aa0a1e58SJeff Roberson 
261aa0a1e58SJeff Roberson 		if (!timeo) {
262aa0a1e58SJeff Roberson 			sdp_warn(sk, "Panic: Timed out waiting for RDMA read\n");
263aa0a1e58SJeff Roberson 			WARN_ON(1);
264aa0a1e58SJeff Roberson 			break;
265aa0a1e58SJeff Roberson 		}
266aa0a1e58SJeff Roberson 
267aa0a1e58SJeff Roberson 		posts_handler_put(ssk);
268aa0a1e58SJeff Roberson 
269aa0a1e58SJeff Roberson 		sdp_prf1(sk, NULL, "Going to sleep");
270aa0a1e58SJeff Roberson 		sk_wait_event(sk, &timeo,
271aa0a1e58SJeff Roberson 			!ssk->tx_ring.rdma_inflight->busy);
272aa0a1e58SJeff Roberson 		sdp_prf1(sk, NULL, "Woke up");
273aa0a1e58SJeff Roberson 		sdp_dbg_data(ssk->socket, "woke up sleepers\n");
274aa0a1e58SJeff Roberson 
275aa0a1e58SJeff Roberson 		posts_handler_get(ssk);
276aa0a1e58SJeff Roberson 	}
277aa0a1e58SJeff Roberson 
278aa0a1e58SJeff Roberson 	finish_wait(sk->sk_sleep, &wait);
279aa0a1e58SJeff Roberson 
280aa0a1e58SJeff Roberson 	sdp_dbg_data(sk, "Finished waiting\n");
281aa0a1e58SJeff Roberson }
282aa0a1e58SJeff Roberson 
sdp_post_rdma_rd_compl(struct sdp_sock * ssk,struct rx_srcavail_state * rx_sa)283aa0a1e58SJeff Roberson int sdp_post_rdma_rd_compl(struct sdp_sock *ssk,
284aa0a1e58SJeff Roberson 		struct rx_srcavail_state *rx_sa)
285aa0a1e58SJeff Roberson {
286aa0a1e58SJeff Roberson 	struct mbuf *mb;
287aa0a1e58SJeff Roberson 	int copied = rx_sa->used - rx_sa->reported;
288aa0a1e58SJeff Roberson 
289aa0a1e58SJeff Roberson 	if (rx_sa->used <= rx_sa->reported)
290aa0a1e58SJeff Roberson 		return 0;
291aa0a1e58SJeff Roberson 
292aa0a1e58SJeff Roberson 	mb = sdp_alloc_mb_rdmardcompl(ssk->socket, copied, 0);
293aa0a1e58SJeff Roberson 
294aa0a1e58SJeff Roberson 	rx_sa->reported += copied;
295aa0a1e58SJeff Roberson 
296aa0a1e58SJeff Roberson 	/* TODO: What if no tx_credits available? */
297aa0a1e58SJeff Roberson 	sdp_post_send(ssk, mb);
298aa0a1e58SJeff Roberson 
299aa0a1e58SJeff Roberson 	return 0;
300aa0a1e58SJeff Roberson }
301aa0a1e58SJeff Roberson 
sdp_post_sendsm(struct socket * sk)302aa0a1e58SJeff Roberson int sdp_post_sendsm(struct socket *sk)
303aa0a1e58SJeff Roberson {
304aa0a1e58SJeff Roberson 	struct mbuf *mb = sdp_alloc_mb_sendsm(sk, 0);
305aa0a1e58SJeff Roberson 
306aa0a1e58SJeff Roberson 	sdp_post_send(sdp_sk(sk), mb);
307aa0a1e58SJeff Roberson 
308aa0a1e58SJeff Roberson 	return 0;
309aa0a1e58SJeff Roberson }
310aa0a1e58SJeff Roberson 
sdp_update_iov_used(struct socket * sk,struct iovec * iov,int len)311aa0a1e58SJeff Roberson static int sdp_update_iov_used(struct socket *sk, struct iovec *iov, int len)
312aa0a1e58SJeff Roberson {
313aa0a1e58SJeff Roberson 	sdp_dbg_data(sk, "updating consumed 0x%x bytes from iov\n", len);
314aa0a1e58SJeff Roberson 	while (len > 0) {
315aa0a1e58SJeff Roberson 		if (iov->iov_len) {
316aa0a1e58SJeff Roberson 			int copy = min_t(unsigned int, iov->iov_len, len);
317aa0a1e58SJeff Roberson 			len -= copy;
318aa0a1e58SJeff Roberson 			iov->iov_len -= copy;
319aa0a1e58SJeff Roberson 			iov->iov_base += copy;
320aa0a1e58SJeff Roberson 		}
321aa0a1e58SJeff Roberson 		iov++;
322aa0a1e58SJeff Roberson 	}
323aa0a1e58SJeff Roberson 
324aa0a1e58SJeff Roberson 	return 0;
325aa0a1e58SJeff Roberson }
326aa0a1e58SJeff Roberson 
sge_bytes(struct ib_sge * sge,int sge_cnt)327aa0a1e58SJeff Roberson static inline int sge_bytes(struct ib_sge *sge, int sge_cnt)
328aa0a1e58SJeff Roberson {
329aa0a1e58SJeff Roberson 	int bytes = 0;
330aa0a1e58SJeff Roberson 
331aa0a1e58SJeff Roberson 	while (sge_cnt > 0) {
332aa0a1e58SJeff Roberson 		bytes += sge->length;
333aa0a1e58SJeff Roberson 		sge++;
334aa0a1e58SJeff Roberson 		sge_cnt--;
335aa0a1e58SJeff Roberson 	}
336aa0a1e58SJeff Roberson 
337aa0a1e58SJeff Roberson 	return bytes;
338aa0a1e58SJeff Roberson }
sdp_handle_sendsm(struct sdp_sock * ssk,u32 mseq_ack)339aa0a1e58SJeff Roberson void sdp_handle_sendsm(struct sdp_sock *ssk, u32 mseq_ack)
340aa0a1e58SJeff Roberson {
341aa0a1e58SJeff Roberson 	struct socket *sk = ssk->socket;
342aa0a1e58SJeff Roberson 	unsigned long flags;
343aa0a1e58SJeff Roberson 
344aa0a1e58SJeff Roberson 	spin_lock_irqsave(&ssk->tx_sa_lock, flags);
345aa0a1e58SJeff Roberson 
346aa0a1e58SJeff Roberson 	if (!ssk->tx_sa) {
347aa0a1e58SJeff Roberson 		sdp_prf1(sk, NULL, "SendSM for cancelled/finished SrcAvail");
348aa0a1e58SJeff Roberson 		goto out;
349aa0a1e58SJeff Roberson 	}
350aa0a1e58SJeff Roberson 
351aa0a1e58SJeff Roberson 	if (ssk->tx_sa->mseq > mseq_ack) {
352aa0a1e58SJeff Roberson 		sdp_dbg_data(sk, "SendSM arrived for old SrcAvail. "
353aa0a1e58SJeff Roberson 			"SendSM mseq_ack: 0x%x, SrcAvail mseq: 0x%x\n",
354aa0a1e58SJeff Roberson 			mseq_ack, ssk->tx_sa->mseq);
355aa0a1e58SJeff Roberson 		goto out;
356aa0a1e58SJeff Roberson 	}
357aa0a1e58SJeff Roberson 
358aa0a1e58SJeff Roberson 	sdp_dbg_data(sk, "Got SendSM - aborting SrcAvail\n");
359aa0a1e58SJeff Roberson 
360aa0a1e58SJeff Roberson 	ssk->tx_sa->abort_flags |= TX_SA_SENDSM;
361aa0a1e58SJeff Roberson 	cancel_delayed_work(&ssk->srcavail_cancel_work);
362aa0a1e58SJeff Roberson 
363aa0a1e58SJeff Roberson 	wake_up(sk->sk_sleep);
364aa0a1e58SJeff Roberson 	sdp_dbg_data(sk, "woke up sleepers\n");
365aa0a1e58SJeff Roberson 
366aa0a1e58SJeff Roberson out:
367aa0a1e58SJeff Roberson 	spin_unlock_irqrestore(&ssk->tx_sa_lock, flags);
368aa0a1e58SJeff Roberson }
369aa0a1e58SJeff Roberson 
sdp_handle_rdma_read_compl(struct sdp_sock * ssk,u32 mseq_ack,u32 bytes_completed)370aa0a1e58SJeff Roberson void sdp_handle_rdma_read_compl(struct sdp_sock *ssk, u32 mseq_ack,
371aa0a1e58SJeff Roberson 		u32 bytes_completed)
372aa0a1e58SJeff Roberson {
373aa0a1e58SJeff Roberson 	struct socket *sk = ssk->socket;
374aa0a1e58SJeff Roberson 	unsigned long flags;
375aa0a1e58SJeff Roberson 
376aa0a1e58SJeff Roberson 	sdp_prf1(sk, NULL, "RdmaRdCompl ssk=%p tx_sa=%p", ssk, ssk->tx_sa);
377aa0a1e58SJeff Roberson 	sdp_dbg_data(sk, "RdmaRdCompl ssk=%p tx_sa=%p\n", ssk, ssk->tx_sa);
378aa0a1e58SJeff Roberson 
379aa0a1e58SJeff Roberson 	spin_lock_irqsave(&ssk->tx_sa_lock, flags);
380aa0a1e58SJeff Roberson 
381aa0a1e58SJeff Roberson 	BUG_ON(!ssk);
382aa0a1e58SJeff Roberson 
383aa0a1e58SJeff Roberson 	if (!ssk->tx_sa) {
384aa0a1e58SJeff Roberson 		sdp_dbg_data(sk, "Got RdmaRdCompl for aborted SrcAvail\n");
385aa0a1e58SJeff Roberson 		goto out;
386aa0a1e58SJeff Roberson 	}
387aa0a1e58SJeff Roberson 
388aa0a1e58SJeff Roberson 	if (ssk->tx_sa->mseq > mseq_ack) {
389aa0a1e58SJeff Roberson 		sdp_dbg_data(sk, "RdmaRdCompl arrived for old SrcAvail. "
390aa0a1e58SJeff Roberson 			"SendSM mseq_ack: 0x%x, SrcAvail mseq: 0x%x\n",
391aa0a1e58SJeff Roberson 			mseq_ack, ssk->tx_sa->mseq);
392aa0a1e58SJeff Roberson 		goto out;
393aa0a1e58SJeff Roberson 	}
394aa0a1e58SJeff Roberson 
395aa0a1e58SJeff Roberson 	ssk->tx_sa->bytes_acked += bytes_completed;
396aa0a1e58SJeff Roberson 
397aa0a1e58SJeff Roberson 	wake_up(sk->sk_sleep);
398aa0a1e58SJeff Roberson 	sdp_dbg_data(sk, "woke up sleepers\n");
399aa0a1e58SJeff Roberson 
400aa0a1e58SJeff Roberson out:
401aa0a1e58SJeff Roberson 	spin_unlock_irqrestore(&ssk->tx_sa_lock, flags);
402aa0a1e58SJeff Roberson 	return;
403aa0a1e58SJeff Roberson }
404aa0a1e58SJeff Roberson 
sdp_get_max_memlockable_bytes(unsigned long offset)405aa0a1e58SJeff Roberson static unsigned long sdp_get_max_memlockable_bytes(unsigned long offset)
406aa0a1e58SJeff Roberson {
407aa0a1e58SJeff Roberson 	unsigned long avail;
408aa0a1e58SJeff Roberson 	unsigned long lock_limit;
409aa0a1e58SJeff Roberson 
410aa0a1e58SJeff Roberson 	if (capable(CAP_IPC_LOCK))
411aa0a1e58SJeff Roberson 		return ULONG_MAX;
412aa0a1e58SJeff Roberson 
413aa0a1e58SJeff Roberson 	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
414aa0a1e58SJeff Roberson 	avail = lock_limit - (current->mm->locked_vm << PAGE_SHIFT);
415aa0a1e58SJeff Roberson 
416aa0a1e58SJeff Roberson 	return avail - offset;
417aa0a1e58SJeff Roberson }
418aa0a1e58SJeff Roberson 
sdp_alloc_fmr(struct socket * sk,void * uaddr,size_t len,struct ib_pool_fmr ** _fmr,struct ib_umem ** _umem)419aa0a1e58SJeff Roberson static int sdp_alloc_fmr(struct socket *sk, void *uaddr, size_t len,
420aa0a1e58SJeff Roberson 	struct ib_pool_fmr **_fmr, struct ib_umem **_umem)
421aa0a1e58SJeff Roberson {
422aa0a1e58SJeff Roberson 	struct ib_pool_fmr *fmr;
423aa0a1e58SJeff Roberson 	struct ib_umem *umem;
424aa0a1e58SJeff Roberson 	struct ib_device *dev;
425aa0a1e58SJeff Roberson 	u64 *pages;
426aa0a1e58SJeff Roberson 	struct ib_umem_chunk *chunk;
427aa0a1e58SJeff Roberson 	int n, j, k;
428aa0a1e58SJeff Roberson 	int rc = 0;
429aa0a1e58SJeff Roberson 	unsigned long max_lockable_bytes;
430aa0a1e58SJeff Roberson 
431aa0a1e58SJeff Roberson 	if (unlikely(len > SDP_MAX_RDMA_READ_LEN)) {
432aa0a1e58SJeff Roberson 		sdp_dbg_data(sk, "len:0x%lx > FMR_SIZE: 0x%lx\n",
433aa0a1e58SJeff Roberson 			len, SDP_MAX_RDMA_READ_LEN);
434aa0a1e58SJeff Roberson 		len = SDP_MAX_RDMA_READ_LEN;
435aa0a1e58SJeff Roberson 	}
436aa0a1e58SJeff Roberson 
437aa0a1e58SJeff Roberson 	max_lockable_bytes = sdp_get_max_memlockable_bytes((unsigned long)uaddr & ~PAGE_MASK);
438aa0a1e58SJeff Roberson 	if (unlikely(len > max_lockable_bytes)) {
439aa0a1e58SJeff Roberson 		sdp_dbg_data(sk, "len:0x%lx > RLIMIT_MEMLOCK available: 0x%lx\n",
440aa0a1e58SJeff Roberson 			len, max_lockable_bytes);
441aa0a1e58SJeff Roberson 		len = max_lockable_bytes;
442aa0a1e58SJeff Roberson 	}
443aa0a1e58SJeff Roberson 
444aa0a1e58SJeff Roberson 	sdp_dbg_data(sk, "user buf: %p, len:0x%lx max_lockable_bytes: 0x%lx\n",
445aa0a1e58SJeff Roberson 			uaddr, len, max_lockable_bytes);
446aa0a1e58SJeff Roberson 
447aa0a1e58SJeff Roberson 	umem = ib_umem_get(&sdp_sk(sk)->context, (unsigned long)uaddr, len,
448aa0a1e58SJeff Roberson 		IB_ACCESS_REMOTE_WRITE, 0);
449aa0a1e58SJeff Roberson 
450aa0a1e58SJeff Roberson 	if (IS_ERR(umem)) {
451aa0a1e58SJeff Roberson 		rc = PTR_ERR(umem);
452aa0a1e58SJeff Roberson 		sdp_warn(sk, "Error doing umem_get 0x%lx bytes: %d\n", len, rc);
453aa0a1e58SJeff Roberson 		sdp_warn(sk, "RLIMIT_MEMLOCK: 0x%lx[cur] 0x%lx[max] CAP_IPC_LOCK: %d\n",
454aa0a1e58SJeff Roberson 				current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur,
455aa0a1e58SJeff Roberson 				current->signal->rlim[RLIMIT_MEMLOCK].rlim_max,
456aa0a1e58SJeff Roberson 				capable(CAP_IPC_LOCK));
457aa0a1e58SJeff Roberson 		goto err_umem_get;
458aa0a1e58SJeff Roberson 	}
459aa0a1e58SJeff Roberson 
460aa0a1e58SJeff Roberson 	sdp_dbg_data(sk, "umem->offset = 0x%x, length = 0x%lx\n",
461aa0a1e58SJeff Roberson 		umem->offset, umem->length);
462aa0a1e58SJeff Roberson 
463aa0a1e58SJeff Roberson 	pages = (u64 *) __get_free_page(GFP_KERNEL);
464aa0a1e58SJeff Roberson 	if (!pages)
465aa0a1e58SJeff Roberson 		goto err_pages_alloc;
466aa0a1e58SJeff Roberson 
467aa0a1e58SJeff Roberson 	n = 0;
468aa0a1e58SJeff Roberson 
469aa0a1e58SJeff Roberson 	dev = sdp_sk(sk)->ib_device;
470aa0a1e58SJeff Roberson 	list_for_each_entry(chunk, &umem->chunk_list, list) {
471aa0a1e58SJeff Roberson 		for (j = 0; j < chunk->nmap; ++j) {
472aa0a1e58SJeff Roberson 			len = ib_sg_dma_len(dev,
473aa0a1e58SJeff Roberson 					&chunk->page_list[j]) >> PAGE_SHIFT;
474aa0a1e58SJeff Roberson 
475aa0a1e58SJeff Roberson 			for (k = 0; k < len; ++k) {
476aa0a1e58SJeff Roberson 				pages[n++] = ib_sg_dma_address(dev,
477aa0a1e58SJeff Roberson 						&chunk->page_list[j]) +
478aa0a1e58SJeff Roberson 					umem->page_size * k;
479aa0a1e58SJeff Roberson 
480aa0a1e58SJeff Roberson 			}
481aa0a1e58SJeff Roberson 		}
482aa0a1e58SJeff Roberson 	}
483aa0a1e58SJeff Roberson 
484aa0a1e58SJeff Roberson 	fmr = ib_fmr_pool_map_phys(sdp_sk(sk)->sdp_dev->fmr_pool, pages, n, 0);
485aa0a1e58SJeff Roberson 	if (IS_ERR(fmr)) {
486aa0a1e58SJeff Roberson 		sdp_warn(sk, "Error allocating fmr: %ld\n", PTR_ERR(fmr));
487aa0a1e58SJeff Roberson 		goto err_fmr_alloc;
488aa0a1e58SJeff Roberson 	}
489aa0a1e58SJeff Roberson 
490aa0a1e58SJeff Roberson 	free_page((unsigned long) pages);
491aa0a1e58SJeff Roberson 
492aa0a1e58SJeff Roberson 	*_umem = umem;
493aa0a1e58SJeff Roberson 	*_fmr = fmr;
494aa0a1e58SJeff Roberson 
495aa0a1e58SJeff Roberson 	return 0;
496aa0a1e58SJeff Roberson 
497aa0a1e58SJeff Roberson err_fmr_alloc:
498aa0a1e58SJeff Roberson 	free_page((unsigned long) pages);
499aa0a1e58SJeff Roberson 
500aa0a1e58SJeff Roberson err_pages_alloc:
501aa0a1e58SJeff Roberson 	ib_umem_release(umem);
502aa0a1e58SJeff Roberson 
503aa0a1e58SJeff Roberson err_umem_get:
504aa0a1e58SJeff Roberson 
505aa0a1e58SJeff Roberson 	return rc;
506aa0a1e58SJeff Roberson }
507aa0a1e58SJeff Roberson 
sdp_free_fmr(struct socket * sk,struct ib_pool_fmr ** _fmr,struct ib_umem ** _umem)508aa0a1e58SJeff Roberson void sdp_free_fmr(struct socket *sk, struct ib_pool_fmr **_fmr, struct ib_umem **_umem)
509aa0a1e58SJeff Roberson {
510aa0a1e58SJeff Roberson 	if (!sdp_sk(sk)->qp_active)
511aa0a1e58SJeff Roberson 		return;
512aa0a1e58SJeff Roberson 
513aa0a1e58SJeff Roberson 	ib_fmr_pool_unmap(*_fmr);
514aa0a1e58SJeff Roberson 	*_fmr = NULL;
515aa0a1e58SJeff Roberson 
516aa0a1e58SJeff Roberson 	ib_umem_release(*_umem);
517aa0a1e58SJeff Roberson 	*_umem = NULL;
518aa0a1e58SJeff Roberson }
519aa0a1e58SJeff Roberson 
sdp_post_rdma_read(struct socket * sk,struct rx_srcavail_state * rx_sa)520aa0a1e58SJeff Roberson static int sdp_post_rdma_read(struct socket *sk, struct rx_srcavail_state *rx_sa)
521aa0a1e58SJeff Roberson {
522aa0a1e58SJeff Roberson 	struct sdp_sock *ssk = sdp_sk(sk);
523aa0a1e58SJeff Roberson 	struct ib_send_wr *bad_wr;
524aa0a1e58SJeff Roberson 	struct ib_send_wr wr = { NULL };
525aa0a1e58SJeff Roberson 	struct ib_sge sge;
526aa0a1e58SJeff Roberson 
527aa0a1e58SJeff Roberson 	wr.opcode = IB_WR_RDMA_READ;
528aa0a1e58SJeff Roberson 	wr.next = NULL;
529aa0a1e58SJeff Roberson 	wr.wr_id = SDP_OP_RDMA;
530aa0a1e58SJeff Roberson 	wr.wr.rdma.rkey = rx_sa->rkey;
531aa0a1e58SJeff Roberson 	wr.send_flags = 0;
532aa0a1e58SJeff Roberson 
533aa0a1e58SJeff Roberson 	ssk->tx_ring.rdma_inflight = rx_sa;
534aa0a1e58SJeff Roberson 
535aa0a1e58SJeff Roberson 	sge.addr = rx_sa->umem->offset;
536aa0a1e58SJeff Roberson 	sge.length = rx_sa->umem->length;
537aa0a1e58SJeff Roberson 	sge.lkey = rx_sa->fmr->fmr->lkey;
538aa0a1e58SJeff Roberson 
539aa0a1e58SJeff Roberson 	wr.wr.rdma.remote_addr = rx_sa->vaddr + rx_sa->used;
540aa0a1e58SJeff Roberson 	wr.num_sge = 1;
541aa0a1e58SJeff Roberson 	wr.sg_list = &sge;
542aa0a1e58SJeff Roberson 	rx_sa->busy++;
543aa0a1e58SJeff Roberson 
544aa0a1e58SJeff Roberson 	wr.send_flags = IB_SEND_SIGNALED;
545aa0a1e58SJeff Roberson 
546aa0a1e58SJeff Roberson 	return ib_post_send(ssk->qp, &wr, &bad_wr);
547aa0a1e58SJeff Roberson }
548aa0a1e58SJeff Roberson 
sdp_rdma_to_iovec(struct socket * sk,struct iovec * iov,struct mbuf * mb,unsigned long * used)549aa0a1e58SJeff Roberson int sdp_rdma_to_iovec(struct socket *sk, struct iovec *iov, struct mbuf *mb,
550aa0a1e58SJeff Roberson 		unsigned long *used)
551aa0a1e58SJeff Roberson {
552aa0a1e58SJeff Roberson 	struct sdp_sock *ssk = sdp_sk(sk);
553aa0a1e58SJeff Roberson 	struct rx_srcavail_state *rx_sa = RX_SRCAVAIL_STATE(mb);
554aa0a1e58SJeff Roberson 	int got_srcavail_cancel;
555aa0a1e58SJeff Roberson 	int rc = 0;
556aa0a1e58SJeff Roberson 	int len = *used;
557aa0a1e58SJeff Roberson 	int copied;
558aa0a1e58SJeff Roberson 
559aa0a1e58SJeff Roberson 	sdp_dbg_data(ssk->socket, "preparing RDMA read."
560aa0a1e58SJeff Roberson 		" len: 0x%x. buffer len: 0x%lx\n", len, iov->iov_len);
561aa0a1e58SJeff Roberson 
562aa0a1e58SJeff Roberson 	sock_hold(sk, SOCK_REF_RDMA_RD);
563aa0a1e58SJeff Roberson 
564aa0a1e58SJeff Roberson 	if (len > rx_sa->len) {
565aa0a1e58SJeff Roberson 		sdp_warn(sk, "len:0x%x > rx_sa->len: 0x%x\n", len, rx_sa->len);
566aa0a1e58SJeff Roberson 		WARN_ON(1);
567aa0a1e58SJeff Roberson 		len = rx_sa->len;
568aa0a1e58SJeff Roberson 	}
569aa0a1e58SJeff Roberson 
570aa0a1e58SJeff Roberson 	rc = sdp_alloc_fmr(sk, iov->iov_base, len, &rx_sa->fmr, &rx_sa->umem);
571aa0a1e58SJeff Roberson 	if (rc) {
572aa0a1e58SJeff Roberson 		sdp_warn(sk, "Error allocating fmr: %d\n", rc);
573aa0a1e58SJeff Roberson 		goto err_alloc_fmr;
574aa0a1e58SJeff Roberson 	}
575aa0a1e58SJeff Roberson 
576aa0a1e58SJeff Roberson 	rc = sdp_post_rdma_read(sk, rx_sa);
577aa0a1e58SJeff Roberson 	if (unlikely(rc)) {
578aa0a1e58SJeff Roberson 		sdp_warn(sk, "ib_post_send failed with status %d.\n", rc);
579aa0a1e58SJeff Roberson 		sdp_set_error(ssk->socket, -ECONNRESET);
580aa0a1e58SJeff Roberson 		wake_up(&ssk->wq);
581aa0a1e58SJeff Roberson 		goto err_post_send;
582aa0a1e58SJeff Roberson 	}
583aa0a1e58SJeff Roberson 
584aa0a1e58SJeff Roberson 	sdp_prf(sk, mb, "Finished posting(rc=%d), now to wait", rc);
585aa0a1e58SJeff Roberson 
586aa0a1e58SJeff Roberson 	got_srcavail_cancel = ssk->srcavail_cancel_mseq > rx_sa->mseq;
587aa0a1e58SJeff Roberson 
588aa0a1e58SJeff Roberson 	sdp_arm_tx_cq(sk);
589aa0a1e58SJeff Roberson 
590aa0a1e58SJeff Roberson 	sdp_wait_rdma_wr_finished(ssk);
591aa0a1e58SJeff Roberson 
592aa0a1e58SJeff Roberson 	sdp_prf(sk, mb, "Finished waiting(rc=%d)", rc);
593aa0a1e58SJeff Roberson 	if (!ssk->qp_active) {
594aa0a1e58SJeff Roberson 		sdp_dbg_data(sk, "QP destroyed during RDMA read\n");
595aa0a1e58SJeff Roberson 		rc = -EPIPE;
596aa0a1e58SJeff Roberson 		goto err_post_send;
597aa0a1e58SJeff Roberson 	}
598aa0a1e58SJeff Roberson 
599aa0a1e58SJeff Roberson 	copied = rx_sa->umem->length;
600aa0a1e58SJeff Roberson 
601aa0a1e58SJeff Roberson 	sdp_update_iov_used(sk, iov, copied);
602aa0a1e58SJeff Roberson 	rx_sa->used += copied;
603aa0a1e58SJeff Roberson 	atomic_add(copied, &ssk->rcv_nxt);
604aa0a1e58SJeff Roberson 	*used = copied;
605aa0a1e58SJeff Roberson 
606aa0a1e58SJeff Roberson 	ssk->tx_ring.rdma_inflight = NULL;
607aa0a1e58SJeff Roberson 
608aa0a1e58SJeff Roberson err_post_send:
609aa0a1e58SJeff Roberson 	sdp_free_fmr(sk, &rx_sa->fmr, &rx_sa->umem);
610aa0a1e58SJeff Roberson 
611aa0a1e58SJeff Roberson err_alloc_fmr:
612aa0a1e58SJeff Roberson 	if (rc && ssk->qp_active) {
613aa0a1e58SJeff Roberson 		sdp_warn(sk, "Couldn't do RDMA - post sendsm\n");
614aa0a1e58SJeff Roberson 		rx_sa->flags |= RX_SA_ABORTED;
615aa0a1e58SJeff Roberson 	}
616aa0a1e58SJeff Roberson 
617aa0a1e58SJeff Roberson 	sock_put(sk, SOCK_REF_RDMA_RD);
618aa0a1e58SJeff Roberson 
619aa0a1e58SJeff Roberson 	return rc;
620aa0a1e58SJeff Roberson }
621aa0a1e58SJeff Roberson 
wait_for_sndbuf(struct socket * sk,long * timeo_p)622aa0a1e58SJeff Roberson static inline int wait_for_sndbuf(struct socket *sk, long *timeo_p)
623aa0a1e58SJeff Roberson {
624aa0a1e58SJeff Roberson 	struct sdp_sock *ssk = sdp_sk(sk);
625aa0a1e58SJeff Roberson 	int ret = 0;
626aa0a1e58SJeff Roberson 	int credits_needed = 1;
627aa0a1e58SJeff Roberson 
628aa0a1e58SJeff Roberson 	sdp_dbg_data(sk, "Wait for mem\n");
629aa0a1e58SJeff Roberson 
630aa0a1e58SJeff Roberson 	set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
631aa0a1e58SJeff Roberson 
632aa0a1e58SJeff Roberson 	SDPSTATS_COUNTER_INC(send_wait_for_mem);
633aa0a1e58SJeff Roberson 
634aa0a1e58SJeff Roberson 	sdp_do_posts(ssk);
635aa0a1e58SJeff Roberson 
636aa0a1e58SJeff Roberson 	sdp_xmit_poll(ssk, 1);
637aa0a1e58SJeff Roberson 
638aa0a1e58SJeff Roberson 	ret = sdp_tx_wait_memory(ssk, timeo_p, &credits_needed);
639aa0a1e58SJeff Roberson 
640aa0a1e58SJeff Roberson 	return ret;
641aa0a1e58SJeff Roberson }
642aa0a1e58SJeff Roberson 
do_sdp_sendmsg_zcopy(struct socket * sk,struct tx_srcavail_state * tx_sa,struct iovec * iov,long * timeo)643aa0a1e58SJeff Roberson static int do_sdp_sendmsg_zcopy(struct socket *sk, struct tx_srcavail_state *tx_sa,
644aa0a1e58SJeff Roberson 		struct iovec *iov, long *timeo)
645aa0a1e58SJeff Roberson {
646aa0a1e58SJeff Roberson 	struct sdp_sock *ssk = sdp_sk(sk);
647aa0a1e58SJeff Roberson 	int rc = 0;
648aa0a1e58SJeff Roberson 	unsigned long lock_flags;
649aa0a1e58SJeff Roberson 
650aa0a1e58SJeff Roberson 	rc = sdp_alloc_fmr(sk, iov->iov_base, iov->iov_len,
651aa0a1e58SJeff Roberson 			&tx_sa->fmr, &tx_sa->umem);
652aa0a1e58SJeff Roberson 	if (rc) {
653aa0a1e58SJeff Roberson 		sdp_warn(sk, "Error allocating fmr: %d\n", rc);
654aa0a1e58SJeff Roberson 		goto err_alloc_fmr;
655aa0a1e58SJeff Roberson 	}
656aa0a1e58SJeff Roberson 
657aa0a1e58SJeff Roberson 	if (tx_slots_free(ssk) == 0) {
658aa0a1e58SJeff Roberson 		rc = wait_for_sndbuf(sk, timeo);
659aa0a1e58SJeff Roberson 		if (rc) {
660aa0a1e58SJeff Roberson 			sdp_warn(sk, "Couldn't get send buffer\n");
661aa0a1e58SJeff Roberson 			goto err_no_tx_slots;
662aa0a1e58SJeff Roberson 		}
663aa0a1e58SJeff Roberson 	}
664aa0a1e58SJeff Roberson 
665aa0a1e58SJeff Roberson 	rc = sdp_post_srcavail(sk, tx_sa);
666aa0a1e58SJeff Roberson 	if (rc) {
667aa0a1e58SJeff Roberson 		sdp_dbg(sk, "Error posting SrcAvail\n");
668aa0a1e58SJeff Roberson 		goto err_abort_send;
669aa0a1e58SJeff Roberson 	}
670aa0a1e58SJeff Roberson 
671aa0a1e58SJeff Roberson 	rc = sdp_wait_rdmardcompl(ssk, timeo, 0);
672aa0a1e58SJeff Roberson 	if (unlikely(rc)) {
673aa0a1e58SJeff Roberson 		enum tx_sa_flag f = tx_sa->abort_flags;
674aa0a1e58SJeff Roberson 
675aa0a1e58SJeff Roberson 		if (f & TX_SA_SENDSM) {
676aa0a1e58SJeff Roberson 			sdp_dbg_data(sk, "Got SendSM. use SEND verb.\n");
677aa0a1e58SJeff Roberson 		} else if (f & TX_SA_ERROR) {
678aa0a1e58SJeff Roberson 			sdp_dbg_data(sk, "SrcAvail error completion\n");
679aa0a1e58SJeff Roberson 			sdp_reset(sk);
680aa0a1e58SJeff Roberson 			SDPSTATS_COUNTER_INC(zcopy_tx_error);
681aa0a1e58SJeff Roberson 		} else if (ssk->qp_active) {
682aa0a1e58SJeff Roberson 			sdp_post_srcavail_cancel(sk);
683aa0a1e58SJeff Roberson 
684aa0a1e58SJeff Roberson 			/* Wait for RdmaRdCompl/SendSM to
685aa0a1e58SJeff Roberson 			 * finish the transaction */
686aa0a1e58SJeff Roberson 			*timeo = 2 * HZ;
687aa0a1e58SJeff Roberson 			sdp_dbg_data(sk, "Waiting for SendSM\n");
688aa0a1e58SJeff Roberson 			sdp_wait_rdmardcompl(ssk, timeo, 1);
689aa0a1e58SJeff Roberson 			sdp_dbg_data(sk, "finished waiting\n");
690aa0a1e58SJeff Roberson 
691aa0a1e58SJeff Roberson 			cancel_delayed_work(&ssk->srcavail_cancel_work);
692aa0a1e58SJeff Roberson 		} else {
693aa0a1e58SJeff Roberson 			sdp_dbg_data(sk, "QP was destroyed while waiting\n");
694aa0a1e58SJeff Roberson 		}
695aa0a1e58SJeff Roberson 	} else {
696aa0a1e58SJeff Roberson 		sdp_dbg_data(sk, "got RdmaRdCompl\n");
697aa0a1e58SJeff Roberson 	}
698aa0a1e58SJeff Roberson 
699aa0a1e58SJeff Roberson 	spin_lock_irqsave(&ssk->tx_sa_lock, lock_flags);
700aa0a1e58SJeff Roberson 	ssk->tx_sa = NULL;
701aa0a1e58SJeff Roberson 	spin_unlock_irqrestore(&ssk->tx_sa_lock, lock_flags);
702aa0a1e58SJeff Roberson 
703aa0a1e58SJeff Roberson err_abort_send:
704aa0a1e58SJeff Roberson 	sdp_update_iov_used(sk, iov, tx_sa->bytes_acked);
705aa0a1e58SJeff Roberson 
706aa0a1e58SJeff Roberson err_no_tx_slots:
707aa0a1e58SJeff Roberson 	sdp_free_fmr(sk, &tx_sa->fmr, &tx_sa->umem);
708aa0a1e58SJeff Roberson 
709aa0a1e58SJeff Roberson err_alloc_fmr:
710aa0a1e58SJeff Roberson 	return rc;
711aa0a1e58SJeff Roberson }
712aa0a1e58SJeff Roberson 
sdp_sendmsg_zcopy(struct kiocb * iocb,struct socket * sk,struct iovec * iov)713aa0a1e58SJeff Roberson int sdp_sendmsg_zcopy(struct kiocb *iocb, struct socket *sk, struct iovec *iov)
714aa0a1e58SJeff Roberson {
715aa0a1e58SJeff Roberson 	struct sdp_sock *ssk = sdp_sk(sk);
716aa0a1e58SJeff Roberson 	int rc = 0;
717aa0a1e58SJeff Roberson 	long timeo;
718aa0a1e58SJeff Roberson 	struct tx_srcavail_state *tx_sa;
719aa0a1e58SJeff Roberson 	int offset;
720aa0a1e58SJeff Roberson 	size_t bytes_to_copy = 0;
721aa0a1e58SJeff Roberson 	int copied = 0;
722aa0a1e58SJeff Roberson 
723aa0a1e58SJeff Roberson 	sdp_dbg_data(sk, "Sending iov: %p, iov_len: 0x%lx\n",
724aa0a1e58SJeff Roberson 			iov->iov_base, iov->iov_len);
725aa0a1e58SJeff Roberson 	sdp_prf1(sk, NULL, "sdp_sendmsg_zcopy start");
726aa0a1e58SJeff Roberson 	if (ssk->rx_sa) {
727aa0a1e58SJeff Roberson 		sdp_dbg_data(sk, "Deadlock prevent: crossing SrcAvail\n");
728aa0a1e58SJeff Roberson 		return 0;
729aa0a1e58SJeff Roberson 	}
730aa0a1e58SJeff Roberson 
731aa0a1e58SJeff Roberson 	sock_hold(ssk->socket, SOCK_REF_ZCOPY);
732aa0a1e58SJeff Roberson 
733aa0a1e58SJeff Roberson 	SDPSTATS_COUNTER_INC(sendmsg_zcopy_segment);
734aa0a1e58SJeff Roberson 
735aa0a1e58SJeff Roberson 	timeo = SDP_SRCAVAIL_ADV_TIMEOUT ;
736aa0a1e58SJeff Roberson 
737aa0a1e58SJeff Roberson 	/* Ok commence sending. */
738aa0a1e58SJeff Roberson 	offset = (unsigned long)iov->iov_base & (PAGE_SIZE - 1);
739aa0a1e58SJeff Roberson 
740aa0a1e58SJeff Roberson 	tx_sa = kmalloc(sizeof(struct tx_srcavail_state), GFP_KERNEL);
741aa0a1e58SJeff Roberson 	if (!tx_sa) {
742aa0a1e58SJeff Roberson 		sdp_warn(sk, "Error allocating zcopy context\n");
743aa0a1e58SJeff Roberson 		rc = -EAGAIN; /* Buffer too big - fallback to bcopy */
744aa0a1e58SJeff Roberson 		goto err_alloc_tx_sa;
745aa0a1e58SJeff Roberson 	}
746aa0a1e58SJeff Roberson 
747aa0a1e58SJeff Roberson 	bytes_to_copy = iov->iov_len;
748aa0a1e58SJeff Roberson 	do {
749aa0a1e58SJeff Roberson 		tx_sa_reset(tx_sa);
750aa0a1e58SJeff Roberson 
751aa0a1e58SJeff Roberson 		rc = do_sdp_sendmsg_zcopy(sk, tx_sa, iov, &timeo);
752aa0a1e58SJeff Roberson 
753aa0a1e58SJeff Roberson 		if (iov->iov_len && iov->iov_len < sdp_zcopy_thresh) {
754aa0a1e58SJeff Roberson 			sdp_dbg_data(sk, "0x%lx bytes left, switching to bcopy\n",
755aa0a1e58SJeff Roberson 				iov->iov_len);
756aa0a1e58SJeff Roberson 			break;
757aa0a1e58SJeff Roberson 		}
758aa0a1e58SJeff Roberson 	} while (!rc && iov->iov_len > 0 && !tx_sa->abort_flags);
759aa0a1e58SJeff Roberson 
760aa0a1e58SJeff Roberson 	kfree(tx_sa);
761aa0a1e58SJeff Roberson err_alloc_tx_sa:
762aa0a1e58SJeff Roberson 	copied = bytes_to_copy - iov->iov_len;
763aa0a1e58SJeff Roberson 
764aa0a1e58SJeff Roberson 	sdp_prf1(sk, NULL, "sdp_sendmsg_zcopy end rc: %d copied: %d", rc, copied);
765aa0a1e58SJeff Roberson 
766aa0a1e58SJeff Roberson 	sock_put(ssk->socket, SOCK_REF_ZCOPY);
767aa0a1e58SJeff Roberson 
768aa0a1e58SJeff Roberson 	if (rc < 0 && rc != -EAGAIN && rc != -ETIME)
769aa0a1e58SJeff Roberson 		return rc;
770aa0a1e58SJeff Roberson 
771aa0a1e58SJeff Roberson 	return copied;
772aa0a1e58SJeff Roberson }
773aa0a1e58SJeff Roberson 
sdp_abort_srcavail(struct socket * sk)774aa0a1e58SJeff Roberson void sdp_abort_srcavail(struct socket *sk)
775aa0a1e58SJeff Roberson {
776aa0a1e58SJeff Roberson 	struct sdp_sock *ssk = sdp_sk(sk);
777aa0a1e58SJeff Roberson 	struct tx_srcavail_state *tx_sa = ssk->tx_sa;
778aa0a1e58SJeff Roberson 	unsigned long flags;
779aa0a1e58SJeff Roberson 
780aa0a1e58SJeff Roberson 	if (!tx_sa)
781aa0a1e58SJeff Roberson 		return;
782aa0a1e58SJeff Roberson 
783aa0a1e58SJeff Roberson 	cancel_delayed_work(&ssk->srcavail_cancel_work);
784aa0a1e58SJeff Roberson 	flush_scheduled_work();
785aa0a1e58SJeff Roberson 
786aa0a1e58SJeff Roberson 	spin_lock_irqsave(&ssk->tx_sa_lock, flags);
787aa0a1e58SJeff Roberson 
788aa0a1e58SJeff Roberson 	sdp_free_fmr(sk, &tx_sa->fmr, &tx_sa->umem);
789aa0a1e58SJeff Roberson 
790aa0a1e58SJeff Roberson 	ssk->tx_sa = NULL;
791aa0a1e58SJeff Roberson 
792aa0a1e58SJeff Roberson 	spin_unlock_irqrestore(&ssk->tx_sa_lock, flags);
793aa0a1e58SJeff Roberson }
794aa0a1e58SJeff Roberson 
sdp_abort_rdma_read(struct socket * sk)795aa0a1e58SJeff Roberson void sdp_abort_rdma_read(struct socket *sk)
796aa0a1e58SJeff Roberson {
797aa0a1e58SJeff Roberson 	struct sdp_sock *ssk = sdp_sk(sk);
798aa0a1e58SJeff Roberson 	struct rx_srcavail_state *rx_sa = ssk->rx_sa;
799aa0a1e58SJeff Roberson 
800aa0a1e58SJeff Roberson 	if (!rx_sa)
801aa0a1e58SJeff Roberson 		return;
802aa0a1e58SJeff Roberson 
803aa0a1e58SJeff Roberson 	sdp_free_fmr(sk, &rx_sa->fmr, &rx_sa->umem);
804aa0a1e58SJeff Roberson 
805aa0a1e58SJeff Roberson 	ssk->rx_sa = NULL;
806aa0a1e58SJeff Roberson }
807