xref: /linux/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c (revision 6aac2aa2dfae38b60f22c3dfe4103ceefbe2d761)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2018 Chelsio Communications, Inc.
4  *
5  * Written by: Atul Gupta (atul.gupta@chelsio.com)
6  */
7 
8 #include <linux/module.h>
9 #include <linux/list.h>
10 #include <linux/workqueue.h>
11 #include <linux/skbuff.h>
12 #include <linux/timer.h>
13 #include <linux/notifier.h>
14 #include <linux/inetdevice.h>
15 #include <linux/ip.h>
16 #include <linux/tcp.h>
17 #include <linux/sched/signal.h>
18 #include <net/tcp.h>
19 #include <net/busy_poll.h>
20 #include <crypto/aes.h>
21 
22 #include "chtls.h"
23 #include "chtls_cm.h"
24 
25 static bool is_tls_tx(struct chtls_sock *csk)
26 {
27 	return csk->tlshws.txkey >= 0;
28 }
29 
30 static bool is_tls_rx(struct chtls_sock *csk)
31 {
32 	return csk->tlshws.rxkey >= 0;
33 }
34 
35 static int data_sgl_len(const struct sk_buff *skb)
36 {
37 	unsigned int cnt;
38 
39 	cnt = skb_shinfo(skb)->nr_frags;
40 	return sgl_len(cnt) * 8;
41 }
42 
43 static int nos_ivs(struct sock *sk, unsigned int size)
44 {
45 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
46 
47 	return DIV_ROUND_UP(size, csk->tlshws.mfs);
48 }
49 
50 static int set_ivs_imm(struct sock *sk, const struct sk_buff *skb)
51 {
52 	int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE;
53 	int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb);
54 
55 	if ((hlen + KEY_ON_MEM_SZ + ivs_size) <
56 	    MAX_IMM_OFLD_TX_DATA_WR_LEN) {
57 		ULP_SKB_CB(skb)->ulp.tls.iv = 1;
58 		return 1;
59 	}
60 	ULP_SKB_CB(skb)->ulp.tls.iv = 0;
61 	return 0;
62 }
63 
64 static int max_ivs_size(struct sock *sk, int size)
65 {
66 	return nos_ivs(sk, size) * CIPHER_BLOCK_SIZE;
67 }
68 
69 static int ivs_size(struct sock *sk, const struct sk_buff *skb)
70 {
71 	return set_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) *
72 		 CIPHER_BLOCK_SIZE) : 0;
73 }
74 
75 static int flowc_wr_credits(int nparams, int *flowclenp)
76 {
77 	int flowclen16, flowclen;
78 
79 	flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
80 	flowclen16 = DIV_ROUND_UP(flowclen, 16);
81 	flowclen = flowclen16 * 16;
82 
83 	if (flowclenp)
84 		*flowclenp = flowclen;
85 
86 	return flowclen16;
87 }
88 
89 static struct sk_buff *create_flowc_wr_skb(struct sock *sk,
90 					   struct fw_flowc_wr *flowc,
91 					   int flowclen)
92 {
93 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
94 	struct sk_buff *skb;
95 
96 	skb = alloc_skb(flowclen, GFP_ATOMIC);
97 	if (!skb)
98 		return NULL;
99 
100 	__skb_put_data(skb, flowc, flowclen);
101 	skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
102 
103 	return skb;
104 }
105 
106 static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc,
107 			 int flowclen)
108 {
109 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
110 	struct tcp_sock *tp = tcp_sk(sk);
111 	struct sk_buff *skb;
112 	int flowclen16;
113 	int ret;
114 
115 	flowclen16 = flowclen / 16;
116 
117 	if (csk_flag(sk, CSK_TX_DATA_SENT)) {
118 		skb = create_flowc_wr_skb(sk, flowc, flowclen);
119 		if (!skb)
120 			return -ENOMEM;
121 
122 		skb_entail(sk, skb,
123 			   ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND);
124 		return 0;
125 	}
126 
127 	ret = cxgb4_immdata_send(csk->egress_dev,
128 				 csk->txq_idx,
129 				 flowc, flowclen);
130 	if (!ret)
131 		return flowclen16;
132 	skb = create_flowc_wr_skb(sk, flowc, flowclen);
133 	if (!skb)
134 		return -ENOMEM;
135 	send_or_defer(sk, tp, skb, 0);
136 	return flowclen16;
137 }
138 
139 static u8 tcp_state_to_flowc_state(u8 state)
140 {
141 	switch (state) {
142 	case TCP_ESTABLISHED:
143 		return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
144 	case TCP_CLOSE_WAIT:
145 		return FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT;
146 	case TCP_FIN_WAIT1:
147 		return FW_FLOWC_MNEM_TCPSTATE_FINWAIT1;
148 	case TCP_CLOSING:
149 		return FW_FLOWC_MNEM_TCPSTATE_CLOSING;
150 	case TCP_LAST_ACK:
151 		return FW_FLOWC_MNEM_TCPSTATE_LASTACK;
152 	case TCP_FIN_WAIT2:
153 		return FW_FLOWC_MNEM_TCPSTATE_FINWAIT2;
154 	}
155 
156 	return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
157 }
158 
159 int send_tx_flowc_wr(struct sock *sk, int compl,
160 		     u32 snd_nxt, u32 rcv_nxt)
161 {
162 	DEFINE_RAW_FLEX(struct fw_flowc_wr, flowc, mnemval, FW_FLOWC_MNEM_MAX);
163 	int nparams, paramidx, flowclen16, flowclen;
164 	struct chtls_sock *csk;
165 	struct tcp_sock *tp;
166 
167 	csk = rcu_dereference_sk_user_data(sk);
168 	tp = tcp_sk(sk);
169 
170 #define FLOWC_PARAM(__m, __v) \
171 	do { \
172 		flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \
173 		flowc->mnemval[paramidx].val = cpu_to_be32(__v); \
174 		paramidx++; \
175 	} while (0)
176 
177 	paramidx = 0;
178 
179 	FLOWC_PARAM(PFNVFN, FW_PFVF_CMD_PFN_V(csk->cdev->lldi->pf));
180 	FLOWC_PARAM(CH, csk->tx_chan);
181 	FLOWC_PARAM(PORT, csk->tx_chan);
182 	FLOWC_PARAM(IQID, csk->rss_qid);
183 	FLOWC_PARAM(SNDNXT, tp->snd_nxt);
184 	FLOWC_PARAM(RCVNXT, tp->rcv_nxt);
185 	FLOWC_PARAM(SNDBUF, csk->sndbuf);
186 	FLOWC_PARAM(MSS, tp->mss_cache);
187 	FLOWC_PARAM(TCPSTATE, tcp_state_to_flowc_state(sk->sk_state));
188 
189 	if (SND_WSCALE(tp))
190 		FLOWC_PARAM(RCV_SCALE, SND_WSCALE(tp));
191 
192 	if (csk->ulp_mode == ULP_MODE_TLS)
193 		FLOWC_PARAM(ULD_MODE, ULP_MODE_TLS);
194 
195 	if (csk->tlshws.fcplenmax)
196 		FLOWC_PARAM(TXDATAPLEN_MAX, csk->tlshws.fcplenmax);
197 
198 	nparams = paramidx;
199 #undef FLOWC_PARAM
200 
201 	flowclen16 = flowc_wr_credits(nparams, &flowclen);
202 	flowc->op_to_nparams =
203 		cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
204 			    FW_WR_COMPL_V(compl) |
205 			    FW_FLOWC_WR_NPARAMS_V(nparams));
206 	flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
207 					  FW_WR_FLOWID_V(csk->tid));
208 
209 	return send_flowc_wr(sk, flowc, flowclen);
210 }
211 
212 /* Copy IVs to WR */
213 static int tls_copy_ivs(struct sock *sk, struct sk_buff *skb)
214 
215 {
216 	struct chtls_sock *csk;
217 	unsigned char *iv_loc;
218 	struct chtls_hws *hws;
219 	unsigned char *ivs;
220 	u16 number_of_ivs;
221 	struct page *page;
222 	int err = 0;
223 
224 	csk = rcu_dereference_sk_user_data(sk);
225 	hws = &csk->tlshws;
226 	number_of_ivs = nos_ivs(sk, skb->len);
227 
228 	if (number_of_ivs > MAX_IVS_PAGE) {
229 		pr_warn("MAX IVs in PAGE exceeded %d\n", number_of_ivs);
230 		return -ENOMEM;
231 	}
232 
233 	/* generate the  IVs */
234 	ivs = kmalloc_array(CIPHER_BLOCK_SIZE, number_of_ivs, GFP_ATOMIC);
235 	if (!ivs)
236 		return -ENOMEM;
237 	get_random_bytes(ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
238 
239 	if (skb_ulp_tls_iv_imm(skb)) {
240 		/* send the IVs as immediate data in the WR */
241 		iv_loc = (unsigned char *)__skb_push(skb, number_of_ivs *
242 						CIPHER_BLOCK_SIZE);
243 		if (iv_loc)
244 			memcpy(iv_loc, ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
245 
246 		hws->ivsize = number_of_ivs * CIPHER_BLOCK_SIZE;
247 	} else {
248 		/* Send the IVs as sgls */
249 		/* Already accounted IV DSGL for credits */
250 		skb_shinfo(skb)->nr_frags--;
251 		page = alloc_pages(sk->sk_allocation | __GFP_COMP, 0);
252 		if (!page) {
253 			pr_info("%s : Page allocation for IVs failed\n",
254 				__func__);
255 			err = -ENOMEM;
256 			goto out;
257 		}
258 		memcpy(page_address(page), ivs, number_of_ivs *
259 		       CIPHER_BLOCK_SIZE);
260 		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page, 0,
261 				   number_of_ivs * CIPHER_BLOCK_SIZE);
262 		hws->ivsize = 0;
263 	}
264 out:
265 	kfree(ivs);
266 	return err;
267 }
268 
269 /* Copy Key to WR */
270 static void tls_copy_tx_key(struct sock *sk, struct sk_buff *skb)
271 {
272 	struct ulptx_sc_memrd *sc_memrd;
273 	struct chtls_sock *csk;
274 	struct chtls_dev *cdev;
275 	struct ulptx_idata *sc;
276 	struct chtls_hws *hws;
277 	u32 immdlen;
278 	int kaddr;
279 
280 	csk = rcu_dereference_sk_user_data(sk);
281 	hws = &csk->tlshws;
282 	cdev = csk->cdev;
283 
284 	immdlen = sizeof(*sc) + sizeof(*sc_memrd);
285 	kaddr = keyid_to_addr(cdev->kmap.start, hws->txkey);
286 	sc = (struct ulptx_idata *)__skb_push(skb, immdlen);
287 	if (sc) {
288 		sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
289 		sc->len = htonl(0);
290 		sc_memrd = (struct ulptx_sc_memrd *)(sc + 1);
291 		sc_memrd->cmd_to_len =
292 				htonl(ULPTX_CMD_V(ULP_TX_SC_MEMRD) |
293 				ULP_TX_SC_MORE_V(1) |
294 				ULPTX_LEN16_V(hws->keylen >> 4));
295 		sc_memrd->addr = htonl(kaddr);
296 	}
297 }
298 
299 static u64 tlstx_incr_seqnum(struct chtls_hws *hws)
300 {
301 	return hws->tx_seq_no++;
302 }
303 
304 static bool is_sg_request(const struct sk_buff *skb)
305 {
306 	return skb->peeked ||
307 		(skb->len > MAX_IMM_ULPTX_WR_LEN);
308 }
309 
310 /*
311  * Returns true if an sk_buff carries urgent data.
312  */
313 static bool skb_urgent(struct sk_buff *skb)
314 {
315 	return ULP_SKB_CB(skb)->flags & ULPCB_FLAG_URG;
316 }
317 
318 /* TLS content type for CPL SFO */
319 static unsigned char tls_content_type(unsigned char content_type)
320 {
321 	switch (content_type) {
322 	case TLS_HDR_TYPE_CCS:
323 		return CPL_TX_TLS_SFO_TYPE_CCS;
324 	case TLS_HDR_TYPE_ALERT:
325 		return CPL_TX_TLS_SFO_TYPE_ALERT;
326 	case TLS_HDR_TYPE_HANDSHAKE:
327 		return CPL_TX_TLS_SFO_TYPE_HANDSHAKE;
328 	case TLS_HDR_TYPE_HEARTBEAT:
329 		return CPL_TX_TLS_SFO_TYPE_HEARTBEAT;
330 	}
331 	return CPL_TX_TLS_SFO_TYPE_DATA;
332 }
333 
334 static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb,
335 			   int dlen, int tls_immd, u32 credits,
336 			   int expn, int pdus)
337 {
338 	struct fw_tlstx_data_wr *req_wr;
339 	struct cpl_tx_tls_sfo *req_cpl;
340 	unsigned int wr_ulp_mode_force;
341 	struct tls_scmd *updated_scmd;
342 	unsigned char data_type;
343 	struct chtls_sock *csk;
344 	struct net_device *dev;
345 	struct chtls_hws *hws;
346 	struct tls_scmd *scmd;
347 	struct adapter *adap;
348 	unsigned char *req;
349 	int immd_len;
350 	int iv_imm;
351 	int len;
352 
353 	csk = rcu_dereference_sk_user_data(sk);
354 	iv_imm = skb_ulp_tls_iv_imm(skb);
355 	dev = csk->egress_dev;
356 	adap = netdev2adap(dev);
357 	hws = &csk->tlshws;
358 	scmd = &hws->scmd;
359 	len = dlen + expn;
360 
361 	dlen = (dlen < hws->mfs) ? dlen : hws->mfs;
362 	atomic_inc(&adap->chcr_stats.tls_pdu_tx);
363 
364 	updated_scmd = scmd;
365 	updated_scmd->seqno_numivs &= 0xffffff80;
366 	updated_scmd->seqno_numivs |= SCMD_NUM_IVS_V(pdus);
367 	hws->scmd = *updated_scmd;
368 
369 	req = (unsigned char *)__skb_push(skb, sizeof(struct cpl_tx_tls_sfo));
370 	req_cpl = (struct cpl_tx_tls_sfo *)req;
371 	req = (unsigned char *)__skb_push(skb, (sizeof(struct
372 				fw_tlstx_data_wr)));
373 
374 	req_wr = (struct fw_tlstx_data_wr *)req;
375 	immd_len = (tls_immd ? dlen : 0);
376 	req_wr->op_to_immdlen =
377 		htonl(FW_WR_OP_V(FW_TLSTX_DATA_WR) |
378 		FW_TLSTX_DATA_WR_COMPL_V(1) |
379 		FW_TLSTX_DATA_WR_IMMDLEN_V(immd_len));
380 	req_wr->flowid_len16 = htonl(FW_TLSTX_DATA_WR_FLOWID_V(csk->tid) |
381 				     FW_TLSTX_DATA_WR_LEN16_V(credits));
382 	wr_ulp_mode_force = TX_ULP_MODE_V(ULP_MODE_TLS);
383 
384 	if (is_sg_request(skb))
385 		wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
386 			((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
387 			FW_OFLD_TX_DATA_WR_SHOVE_F);
388 
389 	req_wr->lsodisable_to_flags =
390 			htonl(TX_ULP_MODE_V(ULP_MODE_TLS) |
391 			      TX_URG_V(skb_urgent(skb)) |
392 			      T6_TX_FORCE_F | wr_ulp_mode_force |
393 			      TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
394 					 skb_queue_empty(&csk->txq)));
395 
396 	req_wr->ctxloc_to_exp =
397 			htonl(FW_TLSTX_DATA_WR_NUMIVS_V(pdus) |
398 			      FW_TLSTX_DATA_WR_EXP_V(expn) |
399 			      FW_TLSTX_DATA_WR_CTXLOC_V(CHTLS_KEY_CONTEXT_DDR) |
400 			      FW_TLSTX_DATA_WR_IVDSGL_V(!iv_imm) |
401 			      FW_TLSTX_DATA_WR_KEYSIZE_V(hws->keylen >> 4));
402 
403 	/* Fill in the length */
404 	req_wr->plen = htonl(len);
405 	req_wr->mfs = htons(hws->mfs);
406 	req_wr->adjustedplen_pkd =
407 		htons(FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(hws->adjustlen));
408 	req_wr->expinplenmax_pkd =
409 		htons(FW_TLSTX_DATA_WR_EXPINPLENMAX_V(hws->expansion));
410 	req_wr->pdusinplenmax_pkd =
411 		FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(hws->pdus);
412 	req_wr->r10 = 0;
413 
414 	data_type = tls_content_type(ULP_SKB_CB(skb)->ulp.tls.type);
415 	req_cpl->op_to_seg_len = htonl(CPL_TX_TLS_SFO_OPCODE_V(CPL_TX_TLS_SFO) |
416 				       CPL_TX_TLS_SFO_DATA_TYPE_V(data_type) |
417 				       CPL_TX_TLS_SFO_CPL_LEN_V(2) |
418 				       CPL_TX_TLS_SFO_SEG_LEN_V(dlen));
419 	req_cpl->pld_len = htonl(len - expn);
420 
421 	req_cpl->type_protover = htonl(CPL_TX_TLS_SFO_TYPE_V
422 		((data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) ?
423 		TLS_HDR_TYPE_HEARTBEAT : 0) |
424 		CPL_TX_TLS_SFO_PROTOVER_V(0));
425 
426 	/* create the s-command */
427 	req_cpl->r1_lo = 0;
428 	req_cpl->seqno_numivs  = cpu_to_be32(hws->scmd.seqno_numivs);
429 	req_cpl->ivgen_hdrlen = cpu_to_be32(hws->scmd.ivgen_hdrlen);
430 	req_cpl->scmd1 = cpu_to_be64(tlstx_incr_seqnum(hws));
431 }
432 
433 /*
434  * Calculate the TLS data expansion size
435  */
436 static int chtls_expansion_size(struct sock *sk, int data_len,
437 				int fullpdu,
438 				unsigned short *pducnt)
439 {
440 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
441 	struct chtls_hws *hws = &csk->tlshws;
442 	struct tls_scmd *scmd = &hws->scmd;
443 	int fragsize = hws->mfs;
444 	int expnsize = 0;
445 	int fragleft;
446 	int fragcnt;
447 	int expppdu;
448 
449 	if (SCMD_CIPH_MODE_G(scmd->seqno_numivs) ==
450 	    SCMD_CIPH_MODE_AES_GCM) {
451 		expppdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE +
452 			  TLS_HEADER_LENGTH;
453 
454 		if (fullpdu) {
455 			*pducnt = data_len / (expppdu + fragsize);
456 			if (*pducnt > 32)
457 				*pducnt = 32;
458 			else if (!*pducnt)
459 				*pducnt = 1;
460 			expnsize = (*pducnt) * expppdu;
461 			return expnsize;
462 		}
463 		fragcnt = (data_len / fragsize);
464 		expnsize =  fragcnt * expppdu;
465 		fragleft = data_len % fragsize;
466 		if (fragleft > 0)
467 			expnsize += expppdu;
468 	}
469 	return expnsize;
470 }
471 
472 /* WR with IV, KEY and CPL SFO added */
473 static void make_tlstx_data_wr(struct sock *sk, struct sk_buff *skb,
474 			       int tls_tx_imm, int tls_len, u32 credits)
475 {
476 	unsigned short pdus_per_ulp = 0;
477 	struct chtls_sock *csk;
478 	struct chtls_hws *hws;
479 	int expn_sz;
480 	int pdus;
481 
482 	csk = rcu_dereference_sk_user_data(sk);
483 	hws = &csk->tlshws;
484 	pdus = DIV_ROUND_UP(tls_len, hws->mfs);
485 	expn_sz = chtls_expansion_size(sk, tls_len, 0, NULL);
486 	if (!hws->compute) {
487 		hws->expansion = chtls_expansion_size(sk,
488 						      hws->fcplenmax,
489 						      1, &pdus_per_ulp);
490 		hws->pdus = pdus_per_ulp;
491 		hws->adjustlen = hws->pdus *
492 			((hws->expansion / hws->pdus) + hws->mfs);
493 		hws->compute = 1;
494 	}
495 	if (tls_copy_ivs(sk, skb))
496 		return;
497 	tls_copy_tx_key(sk, skb);
498 	tls_tx_data_wr(sk, skb, tls_len, tls_tx_imm, credits, expn_sz, pdus);
499 	hws->tx_seq_no += (pdus - 1);
500 }
501 
502 static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb,
503 			    unsigned int immdlen, int len,
504 			    u32 credits, u32 compl)
505 {
506 	struct fw_ofld_tx_data_wr *req;
507 	unsigned int wr_ulp_mode_force;
508 	struct chtls_sock *csk;
509 	unsigned int opcode;
510 
511 	csk = rcu_dereference_sk_user_data(sk);
512 	opcode = FW_OFLD_TX_DATA_WR;
513 
514 	req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req));
515 	req->op_to_immdlen = htonl(WR_OP_V(opcode) |
516 				FW_WR_COMPL_V(compl) |
517 				FW_WR_IMMDLEN_V(immdlen));
518 	req->flowid_len16 = htonl(FW_WR_FLOWID_V(csk->tid) |
519 				FW_WR_LEN16_V(credits));
520 
521 	wr_ulp_mode_force = TX_ULP_MODE_V(csk->ulp_mode);
522 	if (is_sg_request(skb))
523 		wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
524 			((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
525 				FW_OFLD_TX_DATA_WR_SHOVE_F);
526 
527 	req->tunnel_to_proxy = htonl(wr_ulp_mode_force |
528 			TX_URG_V(skb_urgent(skb)) |
529 			TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
530 				   skb_queue_empty(&csk->txq)));
531 	req->plen = htonl(len);
532 }
533 
534 static int chtls_wr_size(struct chtls_sock *csk, const struct sk_buff *skb,
535 			 bool size)
536 {
537 	int wr_size;
538 
539 	wr_size = TLS_WR_CPL_LEN;
540 	wr_size += KEY_ON_MEM_SZ;
541 	wr_size += ivs_size(csk->sk, skb);
542 
543 	if (size)
544 		return wr_size;
545 
546 	/* frags counted for IV dsgl */
547 	if (!skb_ulp_tls_iv_imm(skb))
548 		skb_shinfo(skb)->nr_frags++;
549 
550 	return wr_size;
551 }
552 
553 static bool is_ofld_imm(struct chtls_sock *csk, const struct sk_buff *skb)
554 {
555 	int length = skb->len;
556 
557 	if (skb->peeked || skb->len > MAX_IMM_ULPTX_WR_LEN)
558 		return false;
559 
560 	if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
561 		/* Check TLS header len for Immediate */
562 		if (csk->ulp_mode == ULP_MODE_TLS &&
563 		    skb_ulp_tls_inline(skb))
564 			length += chtls_wr_size(csk, skb, true);
565 		else
566 			length += sizeof(struct fw_ofld_tx_data_wr);
567 
568 		return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN;
569 	}
570 	return true;
571 }
572 
573 static unsigned int calc_tx_flits(const struct sk_buff *skb,
574 				  unsigned int immdlen)
575 {
576 	unsigned int flits, cnt;
577 
578 	flits = immdlen / 8;   /* headers */
579 	cnt = skb_shinfo(skb)->nr_frags;
580 	if (skb_tail_pointer(skb) != skb_transport_header(skb))
581 		cnt++;
582 	return flits + sgl_len(cnt);
583 }
584 
585 static void arp_failure_discard(void *handle, struct sk_buff *skb)
586 {
587 	kfree_skb(skb);
588 }
589 
590 int chtls_push_frames(struct chtls_sock *csk, int comp)
591 {
592 	struct chtls_hws *hws = &csk->tlshws;
593 	struct tcp_sock *tp;
594 	struct sk_buff *skb;
595 	int total_size = 0;
596 	struct sock *sk;
597 	int wr_size;
598 
599 	wr_size = sizeof(struct fw_ofld_tx_data_wr);
600 	sk = csk->sk;
601 	tp = tcp_sk(sk);
602 
603 	if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE)))
604 		return 0;
605 
606 	if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN)))
607 		return 0;
608 
609 	while (csk->wr_credits && (skb = skb_peek(&csk->txq)) &&
610 	       (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_HOLD) ||
611 		skb_queue_len(&csk->txq) > 1)) {
612 		unsigned int credit_len = skb->len;
613 		unsigned int credits_needed;
614 		unsigned int completion = 0;
615 		int tls_len = skb->len;/* TLS data len before IV/key */
616 		unsigned int immdlen;
617 		int len = skb->len;    /* length [ulp bytes] inserted by hw */
618 		int flowclen16 = 0;
619 		int tls_tx_imm = 0;
620 
621 		immdlen = skb->len;
622 		if (!is_ofld_imm(csk, skb)) {
623 			immdlen = skb_transport_offset(skb);
624 			if (skb_ulp_tls_inline(skb))
625 				wr_size = chtls_wr_size(csk, skb, false);
626 			credit_len = 8 * calc_tx_flits(skb, immdlen);
627 		} else {
628 			if (skb_ulp_tls_inline(skb)) {
629 				wr_size = chtls_wr_size(csk, skb, false);
630 				tls_tx_imm = 1;
631 			}
632 		}
633 		if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR))
634 			credit_len += wr_size;
635 		credits_needed = DIV_ROUND_UP(credit_len, 16);
636 		if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
637 			flowclen16 = send_tx_flowc_wr(sk, 1, tp->snd_nxt,
638 						      tp->rcv_nxt);
639 			if (flowclen16 <= 0)
640 				break;
641 			csk->wr_credits -= flowclen16;
642 			csk->wr_unacked += flowclen16;
643 			csk->wr_nondata += flowclen16;
644 			csk_set_flag(csk, CSK_TX_DATA_SENT);
645 		}
646 
647 		if (csk->wr_credits < credits_needed) {
648 			if (skb_ulp_tls_inline(skb) &&
649 			    !skb_ulp_tls_iv_imm(skb))
650 				skb_shinfo(skb)->nr_frags--;
651 			break;
652 		}
653 
654 		__skb_unlink(skb, &csk->txq);
655 		skb_set_queue_mapping(skb, (csk->txq_idx << 1) |
656 				      CPL_PRIORITY_DATA);
657 		if (hws->ofld)
658 			hws->txqid = (skb->queue_mapping >> 1);
659 		skb->csum = (__force __wsum)(credits_needed + csk->wr_nondata);
660 		csk->wr_credits -= credits_needed;
661 		csk->wr_unacked += credits_needed;
662 		csk->wr_nondata = 0;
663 		enqueue_wr(csk, skb);
664 
665 		if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
666 			if ((comp && csk->wr_unacked == credits_needed) ||
667 			    (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) ||
668 			    csk->wr_unacked >= csk->wr_max_credits / 2) {
669 				completion = 1;
670 				csk->wr_unacked = 0;
671 			}
672 			if (skb_ulp_tls_inline(skb))
673 				make_tlstx_data_wr(sk, skb, tls_tx_imm,
674 						   tls_len, credits_needed);
675 			else
676 				make_tx_data_wr(sk, skb, immdlen, len,
677 						credits_needed, completion);
678 			tp->snd_nxt += len;
679 			tp->lsndtime = tcp_jiffies32;
680 			if (completion)
681 				ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR;
682 		} else {
683 			struct cpl_close_con_req *req = cplhdr(skb);
684 			unsigned int cmd  = CPL_OPCODE_G(ntohl
685 					     (OPCODE_TID(req)));
686 
687 			if (cmd == CPL_CLOSE_CON_REQ)
688 				csk_set_flag(csk,
689 					     CSK_CLOSE_CON_REQUESTED);
690 
691 			if ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) &&
692 			    (csk->wr_unacked >= csk->wr_max_credits / 2)) {
693 				req->wr.wr_hi |= htonl(FW_WR_COMPL_F);
694 				csk->wr_unacked = 0;
695 			}
696 		}
697 		total_size += skb->truesize;
698 		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_BARRIER)
699 			csk_set_flag(csk, CSK_TX_WAIT_IDLE);
700 		t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
701 		cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
702 	}
703 	sk->sk_wmem_queued -= total_size;
704 	return total_size;
705 }
706 
707 static void mark_urg(struct tcp_sock *tp, int flags,
708 		     struct sk_buff *skb)
709 {
710 	if (unlikely(flags & MSG_OOB)) {
711 		tp->snd_up = tp->write_seq;
712 		ULP_SKB_CB(skb)->flags = ULPCB_FLAG_URG |
713 					 ULPCB_FLAG_BARRIER |
714 					 ULPCB_FLAG_NO_APPEND |
715 					 ULPCB_FLAG_NEED_HDR;
716 	}
717 }
718 
719 /*
720  * Returns true if a connection should send more data to TCP engine
721  */
722 static bool should_push(struct sock *sk)
723 {
724 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
725 	struct chtls_dev *cdev = csk->cdev;
726 	struct tcp_sock *tp = tcp_sk(sk);
727 
728 	/*
729 	 * If we've released our offload resources there's nothing to do ...
730 	 */
731 	if (!cdev)
732 		return false;
733 
734 	/*
735 	 * If there aren't any work requests in flight, or there isn't enough
736 	 * data in flight, or Nagle is off then send the current TX_DATA
737 	 * otherwise hold it and wait to accumulate more data.
738 	 */
739 	return csk->wr_credits == csk->wr_max_credits ||
740 		(tp->nonagle & TCP_NAGLE_OFF);
741 }
742 
743 /*
744  * Returns true if a TCP socket is corked.
745  */
746 static bool corked(const struct tcp_sock *tp, int flags)
747 {
748 	return (flags & MSG_MORE) || (tp->nonagle & TCP_NAGLE_CORK);
749 }
750 
751 /*
752  * Returns true if a send should try to push new data.
753  */
754 static bool send_should_push(struct sock *sk, int flags)
755 {
756 	return should_push(sk) && !corked(tcp_sk(sk), flags);
757 }
758 
759 void chtls_tcp_push(struct sock *sk, int flags)
760 {
761 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
762 	int qlen = skb_queue_len(&csk->txq);
763 
764 	if (likely(qlen)) {
765 		struct sk_buff *skb = skb_peek_tail(&csk->txq);
766 		struct tcp_sock *tp = tcp_sk(sk);
767 
768 		mark_urg(tp, flags, skb);
769 
770 		if (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) &&
771 		    corked(tp, flags)) {
772 			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_HOLD;
773 			return;
774 		}
775 
776 		ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_HOLD;
777 		if (qlen == 1 &&
778 		    ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
779 		     should_push(sk)))
780 			chtls_push_frames(csk, 1);
781 	}
782 }
783 
784 /*
785  * Calculate the size for a new send sk_buff.  It's maximum size so we can
786  * pack lots of data into it, unless we plan to send it immediately, in which
787  * case we size it more tightly.
788  *
789  * Note: we don't bother compensating for MSS < PAGE_SIZE because it doesn't
790  * arise in normal cases and when it does we are just wasting memory.
791  */
792 static int select_size(struct sock *sk, int io_len, int flags, int len)
793 {
794 	const int pgbreak = SKB_MAX_HEAD(len);
795 
796 	/*
797 	 * If the data wouldn't fit in the main body anyway, put only the
798 	 * header in the main body so it can use immediate data and place all
799 	 * the payload in page fragments.
800 	 */
801 	if (io_len > pgbreak)
802 		return 0;
803 
804 	/*
805 	 * If we will be accumulating payload get a large main body.
806 	 */
807 	if (!send_should_push(sk, flags))
808 		return pgbreak;
809 
810 	return io_len;
811 }
812 
813 void skb_entail(struct sock *sk, struct sk_buff *skb, int flags)
814 {
815 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
816 	struct tcp_sock *tp = tcp_sk(sk);
817 
818 	ULP_SKB_CB(skb)->seq = tp->write_seq;
819 	ULP_SKB_CB(skb)->flags = flags;
820 	__skb_queue_tail(&csk->txq, skb);
821 	sk->sk_wmem_queued += skb->truesize;
822 
823 	if (TCP_PAGE(sk) && TCP_OFF(sk)) {
824 		put_page(TCP_PAGE(sk));
825 		TCP_PAGE(sk) = NULL;
826 		TCP_OFF(sk) = 0;
827 	}
828 }
829 
830 static struct sk_buff *get_tx_skb(struct sock *sk, int size)
831 {
832 	struct sk_buff *skb;
833 
834 	skb = alloc_skb(size + TX_HEADER_LEN, sk->sk_allocation);
835 	if (likely(skb)) {
836 		skb_reserve(skb, TX_HEADER_LEN);
837 		skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
838 		skb_reset_transport_header(skb);
839 	}
840 	return skb;
841 }
842 
843 static struct sk_buff *get_record_skb(struct sock *sk, int size, bool zcopy)
844 {
845 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
846 	struct sk_buff *skb;
847 
848 	skb = alloc_skb(((zcopy ? 0 : size) + TX_TLSHDR_LEN +
849 			KEY_ON_MEM_SZ + max_ivs_size(sk, size)),
850 			sk->sk_allocation);
851 	if (likely(skb)) {
852 		skb_reserve(skb, (TX_TLSHDR_LEN +
853 			    KEY_ON_MEM_SZ + max_ivs_size(sk, size)));
854 		skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
855 		skb_reset_transport_header(skb);
856 		ULP_SKB_CB(skb)->ulp.tls.ofld = 1;
857 		ULP_SKB_CB(skb)->ulp.tls.type = csk->tlshws.type;
858 	}
859 	return skb;
860 }
861 
862 static void tx_skb_finalize(struct sk_buff *skb)
863 {
864 	struct ulp_skb_cb *cb = ULP_SKB_CB(skb);
865 
866 	if (!(cb->flags & ULPCB_FLAG_NO_HDR))
867 		cb->flags = ULPCB_FLAG_NEED_HDR;
868 	cb->flags |= ULPCB_FLAG_NO_APPEND;
869 }
870 
871 static void push_frames_if_head(struct sock *sk)
872 {
873 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
874 
875 	if (skb_queue_len(&csk->txq) == 1)
876 		chtls_push_frames(csk, 1);
877 }
878 
879 static int chtls_skb_copy_to_page_nocache(struct sock *sk,
880 					  struct iov_iter *from,
881 					  struct sk_buff *skb,
882 					  struct page *page,
883 					  int off, int copy)
884 {
885 	int err;
886 
887 	err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) +
888 				       off, copy, skb->len);
889 	if (err)
890 		return err;
891 
892 	skb->len             += copy;
893 	skb->data_len        += copy;
894 	skb->truesize        += copy;
895 	sk->sk_wmem_queued   += copy;
896 	return 0;
897 }
898 
899 static bool csk_mem_free(struct chtls_dev *cdev, struct sock *sk)
900 {
901 	return (cdev->max_host_sndbuf - sk->sk_wmem_queued > 0);
902 }
903 
904 static int csk_wait_memory(struct chtls_dev *cdev,
905 			   struct sock *sk, long *timeo_p)
906 {
907 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
908 	int ret, err = 0;
909 	long current_timeo;
910 	long vm_wait = 0;
911 	bool noblock;
912 
913 	current_timeo = *timeo_p;
914 	noblock = (*timeo_p ? false : true);
915 	if (csk_mem_free(cdev, sk)) {
916 		current_timeo = get_random_u32_below(HZ / 5) + 2;
917 		vm_wait = get_random_u32_below(HZ / 5) + 2;
918 	}
919 
920 	add_wait_queue(sk_sleep(sk), &wait);
921 	while (1) {
922 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
923 
924 		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
925 			goto do_error;
926 		if (!*timeo_p) {
927 			if (noblock)
928 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
929 			goto do_nonblock;
930 		}
931 		if (signal_pending(current))
932 			goto do_interrupted;
933 		sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
934 		if (csk_mem_free(cdev, sk) && !vm_wait)
935 			break;
936 
937 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
938 		sk->sk_write_pending++;
939 		ret = sk_wait_event(sk, &current_timeo, sk->sk_err ||
940 				    (sk->sk_shutdown & SEND_SHUTDOWN) ||
941 				    (csk_mem_free(cdev, sk) && !vm_wait),
942 				    &wait);
943 		sk->sk_write_pending--;
944 		if (ret < 0)
945 			goto do_error;
946 
947 		if (vm_wait) {
948 			vm_wait -= current_timeo;
949 			current_timeo = *timeo_p;
950 			if (current_timeo != MAX_SCHEDULE_TIMEOUT) {
951 				current_timeo -= vm_wait;
952 				if (current_timeo < 0)
953 					current_timeo = 0;
954 			}
955 			vm_wait = 0;
956 		}
957 		*timeo_p = current_timeo;
958 	}
959 do_rm_wq:
960 	remove_wait_queue(sk_sleep(sk), &wait);
961 	return err;
962 do_error:
963 	err = -EPIPE;
964 	goto do_rm_wq;
965 do_nonblock:
966 	err = -EAGAIN;
967 	goto do_rm_wq;
968 do_interrupted:
969 	err = sock_intr_errno(*timeo_p);
970 	goto do_rm_wq;
971 }
972 
973 static int chtls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
974 			       unsigned char *record_type)
975 {
976 	struct cmsghdr *cmsg;
977 	int rc = -EINVAL;
978 
979 	for_each_cmsghdr(cmsg, msg) {
980 		if (!CMSG_OK(msg, cmsg))
981 			return -EINVAL;
982 		if (cmsg->cmsg_level != SOL_TLS)
983 			continue;
984 
985 		switch (cmsg->cmsg_type) {
986 		case TLS_SET_RECORD_TYPE:
987 			if (cmsg->cmsg_len < CMSG_LEN(sizeof(*record_type)))
988 				return -EINVAL;
989 
990 			if (msg->msg_flags & MSG_MORE)
991 				return -EINVAL;
992 
993 			*record_type = *(unsigned char *)CMSG_DATA(cmsg);
994 			rc = 0;
995 			break;
996 		default:
997 			return -EINVAL;
998 		}
999 	}
1000 
1001 	return rc;
1002 }
1003 
1004 int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
1005 {
1006 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1007 	struct chtls_dev *cdev = csk->cdev;
1008 	struct tcp_sock *tp = tcp_sk(sk);
1009 	struct sk_buff *skb;
1010 	int mss, flags, err;
1011 	int recordsz = 0;
1012 	int copied = 0;
1013 	long timeo;
1014 
1015 	lock_sock(sk);
1016 	flags = msg->msg_flags;
1017 	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
1018 
1019 	if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
1020 		err = sk_stream_wait_connect(sk, &timeo);
1021 		if (err)
1022 			goto out_err;
1023 	}
1024 
1025 	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1026 	err = -EPIPE;
1027 	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
1028 		goto out_err;
1029 
1030 	mss = csk->mss;
1031 	csk_set_flag(csk, CSK_TX_MORE_DATA);
1032 
1033 	while (msg_data_left(msg)) {
1034 		int copy = 0;
1035 
1036 		skb = skb_peek_tail(&csk->txq);
1037 		if (skb) {
1038 			copy = mss - skb->len;
1039 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1040 		}
1041 		if (!csk_mem_free(cdev, sk))
1042 			goto wait_for_sndbuf;
1043 
1044 		if (is_tls_tx(csk) && !csk->tlshws.txleft) {
1045 			unsigned char record_type = TLS_RECORD_TYPE_DATA;
1046 
1047 			if (unlikely(msg->msg_controllen)) {
1048 				err = chtls_proccess_cmsg(sk, msg,
1049 							  &record_type);
1050 				if (err)
1051 					goto out_err;
1052 
1053 				/* Avoid appending tls handshake, alert to tls data */
1054 				if (skb)
1055 					tx_skb_finalize(skb);
1056 			}
1057 
1058 			recordsz = size;
1059 			csk->tlshws.txleft = recordsz;
1060 			csk->tlshws.type = record_type;
1061 		}
1062 
1063 		if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
1064 		    copy <= 0) {
1065 new_buf:
1066 			if (skb) {
1067 				tx_skb_finalize(skb);
1068 				push_frames_if_head(sk);
1069 			}
1070 
1071 			if (is_tls_tx(csk)) {
1072 				skb = get_record_skb(sk,
1073 						     select_size(sk,
1074 								 recordsz,
1075 								 flags,
1076 								 TX_TLSHDR_LEN),
1077 								 false);
1078 			} else {
1079 				skb = get_tx_skb(sk,
1080 						 select_size(sk, size, flags,
1081 							     TX_HEADER_LEN));
1082 			}
1083 			if (unlikely(!skb))
1084 				goto wait_for_memory;
1085 
1086 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1087 			copy = mss;
1088 		}
1089 		if (copy > size)
1090 			copy = size;
1091 
1092 		if (msg->msg_flags & MSG_SPLICE_PAGES) {
1093 			err = skb_splice_from_iter(skb, &msg->msg_iter, copy);
1094 			if (err < 0) {
1095 				if (err == -EMSGSIZE)
1096 					goto new_buf;
1097 				goto do_fault;
1098 			}
1099 			copy = err;
1100 			sk_wmem_queued_add(sk, copy);
1101 		} else if (skb_tailroom(skb) > 0) {
1102 			copy = min(copy, skb_tailroom(skb));
1103 			if (is_tls_tx(csk))
1104 				copy = min_t(int, copy, csk->tlshws.txleft);
1105 			err = skb_add_data_nocache(sk, skb,
1106 						   &msg->msg_iter, copy);
1107 			if (err)
1108 				goto do_fault;
1109 		} else {
1110 			int i = skb_shinfo(skb)->nr_frags;
1111 			struct page *page = TCP_PAGE(sk);
1112 			int pg_size = PAGE_SIZE;
1113 			int off = TCP_OFF(sk);
1114 			bool merge;
1115 
1116 			if (page)
1117 				pg_size = page_size(page);
1118 			if (off < pg_size &&
1119 			    skb_can_coalesce(skb, i, page, off)) {
1120 				merge = true;
1121 				goto copy;
1122 			}
1123 			merge = false;
1124 			if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) :
1125 			    MAX_SKB_FRAGS))
1126 				goto new_buf;
1127 
1128 			if (page && off == pg_size) {
1129 				put_page(page);
1130 				TCP_PAGE(sk) = page = NULL;
1131 				pg_size = PAGE_SIZE;
1132 			}
1133 
1134 			if (!page) {
1135 				gfp_t gfp = sk->sk_allocation;
1136 				int order = cdev->send_page_order;
1137 
1138 				if (order) {
1139 					page = alloc_pages(gfp | __GFP_COMP |
1140 							   __GFP_NOWARN |
1141 							   __GFP_NORETRY,
1142 							   order);
1143 					if (page)
1144 						pg_size <<= order;
1145 				}
1146 				if (!page) {
1147 					page = alloc_page(gfp);
1148 					pg_size = PAGE_SIZE;
1149 				}
1150 				if (!page)
1151 					goto wait_for_memory;
1152 				off = 0;
1153 			}
1154 copy:
1155 			if (copy > pg_size - off)
1156 				copy = pg_size - off;
1157 			if (is_tls_tx(csk))
1158 				copy = min_t(int, copy, csk->tlshws.txleft);
1159 
1160 			err = chtls_skb_copy_to_page_nocache(sk, &msg->msg_iter,
1161 							     skb, page,
1162 							     off, copy);
1163 			if (unlikely(err)) {
1164 				if (!TCP_PAGE(sk)) {
1165 					TCP_PAGE(sk) = page;
1166 					TCP_OFF(sk) = 0;
1167 				}
1168 				goto do_fault;
1169 			}
1170 			/* Update the skb. */
1171 			if (merge) {
1172 				skb_frag_size_add(
1173 						&skb_shinfo(skb)->frags[i - 1],
1174 						copy);
1175 			} else {
1176 				skb_fill_page_desc(skb, i, page, off, copy);
1177 				if (off + copy < pg_size) {
1178 					/* space left keep page */
1179 					get_page(page);
1180 					TCP_PAGE(sk) = page;
1181 				} else {
1182 					TCP_PAGE(sk) = NULL;
1183 				}
1184 			}
1185 			TCP_OFF(sk) = off + copy;
1186 		}
1187 		if (unlikely(skb->len == mss))
1188 			tx_skb_finalize(skb);
1189 		tp->write_seq += copy;
1190 		copied += copy;
1191 		size -= copy;
1192 
1193 		if (is_tls_tx(csk))
1194 			csk->tlshws.txleft -= copy;
1195 
1196 		if (corked(tp, flags) &&
1197 		    (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
1198 			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
1199 
1200 		if (size == 0)
1201 			goto out;
1202 
1203 		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND)
1204 			push_frames_if_head(sk);
1205 		continue;
1206 wait_for_sndbuf:
1207 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1208 wait_for_memory:
1209 		err = csk_wait_memory(cdev, sk, &timeo);
1210 		if (err)
1211 			goto do_error;
1212 	}
1213 out:
1214 	csk_reset_flag(csk, CSK_TX_MORE_DATA);
1215 	if (copied)
1216 		chtls_tcp_push(sk, flags);
1217 done:
1218 	release_sock(sk);
1219 	return copied;
1220 do_fault:
1221 	if (!skb->len) {
1222 		__skb_unlink(skb, &csk->txq);
1223 		sk->sk_wmem_queued -= skb->truesize;
1224 		__kfree_skb(skb);
1225 	}
1226 do_error:
1227 	if (copied)
1228 		goto out;
1229 out_err:
1230 	if (csk_conn_inline(csk))
1231 		csk_reset_flag(csk, CSK_TX_MORE_DATA);
1232 	copied = sk_stream_error(sk, flags, err);
1233 	goto done;
1234 }
1235 
1236 void chtls_splice_eof(struct socket *sock)
1237 {
1238 	struct sock *sk = sock->sk;
1239 
1240 	lock_sock(sk);
1241 	chtls_tcp_push(sk, 0);
1242 	release_sock(sk);
1243 }
1244 
1245 static void chtls_select_window(struct sock *sk)
1246 {
1247 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1248 	struct tcp_sock *tp = tcp_sk(sk);
1249 	unsigned int wnd = tp->rcv_wnd;
1250 
1251 	wnd = max_t(unsigned int, wnd, tcp_full_space(sk));
1252 	wnd = max_t(unsigned int, MIN_RCV_WND, wnd);
1253 
1254 	if (wnd > MAX_RCV_WND)
1255 		wnd = MAX_RCV_WND;
1256 
1257 /*
1258  * Check if we need to grow the receive window in response to an increase in
1259  * the socket's receive buffer size.  Some applications increase the buffer
1260  * size dynamically and rely on the window to grow accordingly.
1261  */
1262 
1263 	if (wnd > tp->rcv_wnd) {
1264 		tp->rcv_wup -= wnd - tp->rcv_wnd;
1265 		tp->rcv_wnd = wnd;
1266 		/* Mark the receive window as updated */
1267 		csk_reset_flag(csk, CSK_UPDATE_RCV_WND);
1268 	}
1269 }
1270 
1271 /*
1272  * Send RX credits through an RX_DATA_ACK CPL message.  We are permitted
1273  * to return without sending the message in case we cannot allocate
1274  * an sk_buff.  Returns the number of credits sent.
1275  */
1276 static u32 send_rx_credits(struct chtls_sock *csk, u32 credits)
1277 {
1278 	struct cpl_rx_data_ack *req;
1279 	struct sk_buff *skb;
1280 
1281 	skb = alloc_skb(sizeof(*req), GFP_ATOMIC);
1282 	if (!skb)
1283 		return 0;
1284 	__skb_put(skb, sizeof(*req));
1285 	req = (struct cpl_rx_data_ack *)skb->head;
1286 
1287 	set_wr_txq(skb, CPL_PRIORITY_ACK, csk->port_id);
1288 	INIT_TP_WR(req, csk->tid);
1289 	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
1290 						    csk->tid));
1291 	req->credit_dack = cpu_to_be32(RX_CREDITS_V(credits) |
1292 				       RX_FORCE_ACK_F);
1293 	cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb);
1294 	return credits;
1295 }
1296 
1297 #define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | \
1298 			     TCPF_FIN_WAIT1 | \
1299 			     TCPF_FIN_WAIT2)
1300 
1301 /*
1302  * Called after some received data has been read.  It returns RX credits
1303  * to the HW for the amount of data processed.
1304  */
1305 static void chtls_cleanup_rbuf(struct sock *sk, int copied)
1306 {
1307 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1308 	struct tcp_sock *tp;
1309 	int must_send;
1310 	u32 credits;
1311 	u32 thres;
1312 
1313 	thres = 15 * 1024;
1314 
1315 	if (!sk_in_state(sk, CREDIT_RETURN_STATE))
1316 		return;
1317 
1318 	chtls_select_window(sk);
1319 	tp = tcp_sk(sk);
1320 	credits = tp->copied_seq - tp->rcv_wup;
1321 	if (unlikely(!credits))
1322 		return;
1323 
1324 /*
1325  * For coalescing to work effectively ensure the receive window has
1326  * at least 16KB left.
1327  */
1328 	must_send = credits + 16384 >= tp->rcv_wnd;
1329 
1330 	if (must_send || credits >= thres)
1331 		tp->rcv_wup += send_rx_credits(csk, credits);
1332 }
1333 
1334 static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
1335 			    int flags, int *addr_len)
1336 {
1337 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1338 	struct chtls_hws *hws = &csk->tlshws;
1339 	struct net_device *dev = csk->egress_dev;
1340 	struct adapter *adap = netdev2adap(dev);
1341 	struct tcp_sock *tp = tcp_sk(sk);
1342 	unsigned long avail;
1343 	int buffers_freed;
1344 	int copied = 0;
1345 	int target;
1346 	long timeo;
1347 	int ret;
1348 
1349 	buffers_freed = 0;
1350 
1351 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1352 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1353 
1354 	if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
1355 		chtls_cleanup_rbuf(sk, copied);
1356 
1357 	do {
1358 		struct sk_buff *skb;
1359 		u32 offset = 0;
1360 
1361 		if (unlikely(tp->urg_data &&
1362 			     tp->urg_seq == tp->copied_seq)) {
1363 			if (copied)
1364 				break;
1365 			if (signal_pending(current)) {
1366 				copied = timeo ? sock_intr_errno(timeo) :
1367 					-EAGAIN;
1368 				break;
1369 			}
1370 		}
1371 		skb = skb_peek(&sk->sk_receive_queue);
1372 		if (skb)
1373 			goto found_ok_skb;
1374 		if (csk->wr_credits &&
1375 		    skb_queue_len(&csk->txq) &&
1376 		    chtls_push_frames(csk, csk->wr_credits ==
1377 				      csk->wr_max_credits))
1378 			sk->sk_write_space(sk);
1379 
1380 		if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
1381 			break;
1382 
1383 		if (copied) {
1384 			if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
1385 			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1386 			    signal_pending(current))
1387 				break;
1388 
1389 			if (!timeo)
1390 				break;
1391 		} else {
1392 			if (sock_flag(sk, SOCK_DONE))
1393 				break;
1394 			if (sk->sk_err) {
1395 				copied = sock_error(sk);
1396 				break;
1397 			}
1398 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1399 				break;
1400 			if (sk->sk_state == TCP_CLOSE) {
1401 				copied = -ENOTCONN;
1402 				break;
1403 			}
1404 			if (!timeo) {
1405 				copied = -EAGAIN;
1406 				break;
1407 			}
1408 			if (signal_pending(current)) {
1409 				copied = sock_intr_errno(timeo);
1410 				break;
1411 			}
1412 		}
1413 		if (READ_ONCE(sk->sk_backlog.tail)) {
1414 			release_sock(sk);
1415 			lock_sock(sk);
1416 			chtls_cleanup_rbuf(sk, copied);
1417 			continue;
1418 		}
1419 
1420 		if (copied >= target)
1421 			break;
1422 		chtls_cleanup_rbuf(sk, copied);
1423 		ret = sk_wait_data(sk, &timeo, NULL);
1424 		if (ret < 0) {
1425 			copied = copied ? : ret;
1426 			goto unlock;
1427 		}
1428 		continue;
1429 found_ok_skb:
1430 		if (!skb->len) {
1431 			skb_dstref_steal(skb);
1432 			__skb_unlink(skb, &sk->sk_receive_queue);
1433 			kfree_skb(skb);
1434 
1435 			if (!copied && !timeo) {
1436 				copied = -EAGAIN;
1437 				break;
1438 			}
1439 
1440 			if (copied < target) {
1441 				release_sock(sk);
1442 				lock_sock(sk);
1443 				continue;
1444 			}
1445 			break;
1446 		}
1447 		offset = hws->copied_seq;
1448 		avail = skb->len - offset;
1449 		if (len < avail)
1450 			avail = len;
1451 
1452 		if (unlikely(tp->urg_data)) {
1453 			u32 urg_offset = tp->urg_seq - tp->copied_seq;
1454 
1455 			if (urg_offset < avail) {
1456 				if (urg_offset) {
1457 					avail = urg_offset;
1458 				} else if (!sock_flag(sk, SOCK_URGINLINE)) {
1459 					/* First byte is urgent, skip */
1460 					tp->copied_seq++;
1461 					offset++;
1462 					avail--;
1463 					if (!avail)
1464 						goto skip_copy;
1465 				}
1466 			}
1467 		}
1468 		/* Set record type if not already done. For a non-data record,
1469 		 * do not proceed if record type could not be copied.
1470 		 */
1471 		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
1472 			struct tls_hdr *thdr = (struct tls_hdr *)skb->data;
1473 			int cerr = 0;
1474 
1475 			cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
1476 					sizeof(thdr->type), &thdr->type);
1477 
1478 			if (cerr && thdr->type != TLS_RECORD_TYPE_DATA) {
1479 				copied = -EIO;
1480 				break;
1481 			}
1482 			/*  don't send tls header, skip copy */
1483 			goto skip_copy;
1484 		}
1485 
1486 		if (skb_copy_datagram_msg(skb, offset, msg, avail)) {
1487 			if (!copied) {
1488 				copied = -EFAULT;
1489 				break;
1490 			}
1491 		}
1492 
1493 		copied += avail;
1494 		len -= avail;
1495 		hws->copied_seq += avail;
1496 skip_copy:
1497 		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
1498 			tp->urg_data = 0;
1499 
1500 		if ((avail + offset) >= skb->len) {
1501 			struct sk_buff *next_skb;
1502 			if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
1503 				tp->copied_seq += skb->len;
1504 				hws->rcvpld = skb->hdr_len;
1505 			} else {
1506 				atomic_inc(&adap->chcr_stats.tls_pdu_rx);
1507 				tp->copied_seq += hws->rcvpld;
1508 			}
1509 			chtls_free_skb(sk, skb);
1510 			buffers_freed++;
1511 			hws->copied_seq = 0;
1512 			next_skb = skb_peek(&sk->sk_receive_queue);
1513 			if (copied >= target && !next_skb)
1514 				break;
1515 			if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR)
1516 				break;
1517 		}
1518 	} while (len > 0);
1519 
1520 	if (buffers_freed)
1521 		chtls_cleanup_rbuf(sk, copied);
1522 
1523 unlock:
1524 	release_sock(sk);
1525 	return copied;
1526 }
1527 
1528 /*
1529  * Peek at data in a socket's receive buffer.
1530  */
1531 static int peekmsg(struct sock *sk, struct msghdr *msg,
1532 		   size_t len, int flags)
1533 {
1534 	struct tcp_sock *tp = tcp_sk(sk);
1535 	u32 peek_seq, offset;
1536 	struct sk_buff *skb;
1537 	int copied = 0;
1538 	size_t avail;          /* amount of available data in current skb */
1539 	long timeo;
1540 	int ret;
1541 
1542 	lock_sock(sk);
1543 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1544 	peek_seq = tp->copied_seq;
1545 
1546 	do {
1547 		if (unlikely(tp->urg_data && tp->urg_seq == peek_seq)) {
1548 			if (copied)
1549 				break;
1550 			if (signal_pending(current)) {
1551 				copied = timeo ? sock_intr_errno(timeo) :
1552 				-EAGAIN;
1553 				break;
1554 			}
1555 		}
1556 
1557 		skb_queue_walk(&sk->sk_receive_queue, skb) {
1558 			offset = peek_seq - ULP_SKB_CB(skb)->seq;
1559 			if (offset < skb->len)
1560 				goto found_ok_skb;
1561 		}
1562 
1563 		/* empty receive queue */
1564 		if (copied)
1565 			break;
1566 		if (sock_flag(sk, SOCK_DONE))
1567 			break;
1568 		if (sk->sk_err) {
1569 			copied = sock_error(sk);
1570 			break;
1571 		}
1572 		if (sk->sk_shutdown & RCV_SHUTDOWN)
1573 			break;
1574 		if (sk->sk_state == TCP_CLOSE) {
1575 			copied = -ENOTCONN;
1576 			break;
1577 		}
1578 		if (!timeo) {
1579 			copied = -EAGAIN;
1580 			break;
1581 		}
1582 		if (signal_pending(current)) {
1583 			copied = sock_intr_errno(timeo);
1584 			break;
1585 		}
1586 
1587 		if (READ_ONCE(sk->sk_backlog.tail)) {
1588 			/* Do not sleep, just process backlog. */
1589 			release_sock(sk);
1590 			lock_sock(sk);
1591 		} else {
1592 			ret = sk_wait_data(sk, &timeo, NULL);
1593 			if (ret < 0) {
1594 				/* here 'copied' is 0 due to previous checks */
1595 				copied = ret;
1596 				break;
1597 			}
1598 		}
1599 
1600 		if (unlikely(peek_seq != tp->copied_seq)) {
1601 			if (net_ratelimit())
1602 				pr_info("TCP(%s:%d), race in MSG_PEEK.\n",
1603 					current->comm, current->pid);
1604 			peek_seq = tp->copied_seq;
1605 		}
1606 		continue;
1607 
1608 found_ok_skb:
1609 		avail = skb->len - offset;
1610 		if (len < avail)
1611 			avail = len;
1612 		/*
1613 		 * Do we have urgent data here?  We need to skip over the
1614 		 * urgent byte.
1615 		 */
1616 		if (unlikely(tp->urg_data)) {
1617 			u32 urg_offset = tp->urg_seq - peek_seq;
1618 
1619 			if (urg_offset < avail) {
1620 				/*
1621 				 * The amount of data we are preparing to copy
1622 				 * contains urgent data.
1623 				 */
1624 				if (!urg_offset) { /* First byte is urgent */
1625 					if (!sock_flag(sk, SOCK_URGINLINE)) {
1626 						peek_seq++;
1627 						offset++;
1628 						avail--;
1629 					}
1630 					if (!avail)
1631 						continue;
1632 				} else {
1633 					/* stop short of the urgent data */
1634 					avail = urg_offset;
1635 				}
1636 			}
1637 		}
1638 
1639 		/*
1640 		 * If MSG_TRUNC is specified the data is discarded.
1641 		 */
1642 		if (likely(!(flags & MSG_TRUNC)))
1643 			if (skb_copy_datagram_msg(skb, offset, msg, len)) {
1644 				if (!copied) {
1645 					copied = -EFAULT;
1646 					break;
1647 				}
1648 			}
1649 		peek_seq += avail;
1650 		copied += avail;
1651 		len -= avail;
1652 	} while (len > 0);
1653 
1654 	release_sock(sk);
1655 	return copied;
1656 }
1657 
1658 int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
1659 		  int flags, int *addr_len)
1660 {
1661 	struct tcp_sock *tp = tcp_sk(sk);
1662 	struct chtls_sock *csk;
1663 	unsigned long avail;    /* amount of available data in current skb */
1664 	int buffers_freed;
1665 	int copied = 0;
1666 	long timeo;
1667 	int target;             /* Read at least this many bytes */
1668 	int ret;
1669 
1670 	buffers_freed = 0;
1671 
1672 	if (unlikely(flags & MSG_OOB))
1673 		return tcp_prot.recvmsg(sk, msg, len, flags, addr_len);
1674 
1675 	if (unlikely(flags & MSG_PEEK))
1676 		return peekmsg(sk, msg, len, flags);
1677 
1678 	if (sk_can_busy_loop(sk) &&
1679 	    skb_queue_empty_lockless(&sk->sk_receive_queue) &&
1680 	    sk->sk_state == TCP_ESTABLISHED)
1681 		sk_busy_loop(sk, flags & MSG_DONTWAIT);
1682 
1683 	lock_sock(sk);
1684 	csk = rcu_dereference_sk_user_data(sk);
1685 
1686 	if (is_tls_rx(csk))
1687 		return chtls_pt_recvmsg(sk, msg, len, flags, addr_len);
1688 
1689 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1690 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1691 
1692 	if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
1693 		chtls_cleanup_rbuf(sk, copied);
1694 
1695 	do {
1696 		struct sk_buff *skb;
1697 		u32 offset;
1698 
1699 		if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) {
1700 			if (copied)
1701 				break;
1702 			if (signal_pending(current)) {
1703 				copied = timeo ? sock_intr_errno(timeo) :
1704 					-EAGAIN;
1705 				break;
1706 			}
1707 		}
1708 
1709 		skb = skb_peek(&sk->sk_receive_queue);
1710 		if (skb)
1711 			goto found_ok_skb;
1712 
1713 		if (csk->wr_credits &&
1714 		    skb_queue_len(&csk->txq) &&
1715 		    chtls_push_frames(csk, csk->wr_credits ==
1716 				      csk->wr_max_credits))
1717 			sk->sk_write_space(sk);
1718 
1719 		if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
1720 			break;
1721 
1722 		if (copied) {
1723 			if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
1724 			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1725 			    signal_pending(current))
1726 				break;
1727 		} else {
1728 			if (sock_flag(sk, SOCK_DONE))
1729 				break;
1730 			if (sk->sk_err) {
1731 				copied = sock_error(sk);
1732 				break;
1733 			}
1734 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1735 				break;
1736 			if (sk->sk_state == TCP_CLOSE) {
1737 				copied = -ENOTCONN;
1738 				break;
1739 			}
1740 			if (!timeo) {
1741 				copied = -EAGAIN;
1742 				break;
1743 			}
1744 			if (signal_pending(current)) {
1745 				copied = sock_intr_errno(timeo);
1746 				break;
1747 			}
1748 		}
1749 
1750 		if (READ_ONCE(sk->sk_backlog.tail)) {
1751 			release_sock(sk);
1752 			lock_sock(sk);
1753 			chtls_cleanup_rbuf(sk, copied);
1754 			continue;
1755 		}
1756 
1757 		if (copied >= target)
1758 			break;
1759 		chtls_cleanup_rbuf(sk, copied);
1760 		ret = sk_wait_data(sk, &timeo, NULL);
1761 		if (ret < 0) {
1762 			copied = copied ? : ret;
1763 			goto unlock;
1764 		}
1765 		continue;
1766 
1767 found_ok_skb:
1768 		if (!skb->len) {
1769 			chtls_kfree_skb(sk, skb);
1770 			if (!copied && !timeo) {
1771 				copied = -EAGAIN;
1772 				break;
1773 			}
1774 
1775 			if (copied < target)
1776 				continue;
1777 
1778 			break;
1779 		}
1780 
1781 		offset = tp->copied_seq - ULP_SKB_CB(skb)->seq;
1782 		avail = skb->len - offset;
1783 		if (len < avail)
1784 			avail = len;
1785 
1786 		if (unlikely(tp->urg_data)) {
1787 			u32 urg_offset = tp->urg_seq - tp->copied_seq;
1788 
1789 			if (urg_offset < avail) {
1790 				if (urg_offset) {
1791 					avail = urg_offset;
1792 				} else if (!sock_flag(sk, SOCK_URGINLINE)) {
1793 					tp->copied_seq++;
1794 					offset++;
1795 					avail--;
1796 					if (!avail)
1797 						goto skip_copy;
1798 				}
1799 			}
1800 		}
1801 
1802 		if (likely(!(flags & MSG_TRUNC))) {
1803 			if (skb_copy_datagram_msg(skb, offset,
1804 						  msg, avail)) {
1805 				if (!copied) {
1806 					copied = -EFAULT;
1807 					break;
1808 				}
1809 			}
1810 		}
1811 
1812 		tp->copied_seq += avail;
1813 		copied += avail;
1814 		len -= avail;
1815 
1816 skip_copy:
1817 		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
1818 			tp->urg_data = 0;
1819 
1820 		if (avail + offset >= skb->len) {
1821 			chtls_free_skb(sk, skb);
1822 			buffers_freed++;
1823 
1824 			if  (copied >= target &&
1825 			     !skb_peek(&sk->sk_receive_queue))
1826 				break;
1827 		}
1828 	} while (len > 0);
1829 
1830 	if (buffers_freed)
1831 		chtls_cleanup_rbuf(sk, copied);
1832 
1833 unlock:
1834 	release_sock(sk);
1835 	return copied;
1836 }
1837