1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2018 Chelsio Communications, Inc.
4 *
5 * Written by: Atul Gupta (atul.gupta@chelsio.com)
6 */
7
8 #include <linux/module.h>
9 #include <linux/list.h>
10 #include <linux/workqueue.h>
11 #include <linux/skbuff.h>
12 #include <linux/timer.h>
13 #include <linux/notifier.h>
14 #include <linux/inetdevice.h>
15 #include <linux/ip.h>
16 #include <linux/tcp.h>
17 #include <linux/sched/signal.h>
18 #include <net/tcp.h>
19 #include <net/busy_poll.h>
20 #include <crypto/aes.h>
21
22 #include "chtls.h"
23 #include "chtls_cm.h"
24
is_tls_tx(struct chtls_sock * csk)25 static bool is_tls_tx(struct chtls_sock *csk)
26 {
27 return csk->tlshws.txkey >= 0;
28 }
29
is_tls_rx(struct chtls_sock * csk)30 static bool is_tls_rx(struct chtls_sock *csk)
31 {
32 return csk->tlshws.rxkey >= 0;
33 }
34
data_sgl_len(const struct sk_buff * skb)35 static int data_sgl_len(const struct sk_buff *skb)
36 {
37 unsigned int cnt;
38
39 cnt = skb_shinfo(skb)->nr_frags;
40 return sgl_len(cnt) * 8;
41 }
42
nos_ivs(struct sock * sk,unsigned int size)43 static int nos_ivs(struct sock *sk, unsigned int size)
44 {
45 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
46
47 return DIV_ROUND_UP(size, csk->tlshws.mfs);
48 }
49
set_ivs_imm(struct sock * sk,const struct sk_buff * skb)50 static int set_ivs_imm(struct sock *sk, const struct sk_buff *skb)
51 {
52 int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE;
53 int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb);
54
55 if ((hlen + KEY_ON_MEM_SZ + ivs_size) <
56 MAX_IMM_OFLD_TX_DATA_WR_LEN) {
57 ULP_SKB_CB(skb)->ulp.tls.iv = 1;
58 return 1;
59 }
60 ULP_SKB_CB(skb)->ulp.tls.iv = 0;
61 return 0;
62 }
63
max_ivs_size(struct sock * sk,int size)64 static int max_ivs_size(struct sock *sk, int size)
65 {
66 return nos_ivs(sk, size) * CIPHER_BLOCK_SIZE;
67 }
68
ivs_size(struct sock * sk,const struct sk_buff * skb)69 static int ivs_size(struct sock *sk, const struct sk_buff *skb)
70 {
71 return set_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) *
72 CIPHER_BLOCK_SIZE) : 0;
73 }
74
flowc_wr_credits(int nparams,int * flowclenp)75 static int flowc_wr_credits(int nparams, int *flowclenp)
76 {
77 int flowclen16, flowclen;
78
79 flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
80 flowclen16 = DIV_ROUND_UP(flowclen, 16);
81 flowclen = flowclen16 * 16;
82
83 if (flowclenp)
84 *flowclenp = flowclen;
85
86 return flowclen16;
87 }
88
create_flowc_wr_skb(struct sock * sk,struct fw_flowc_wr * flowc,int flowclen)89 static struct sk_buff *create_flowc_wr_skb(struct sock *sk,
90 struct fw_flowc_wr *flowc,
91 int flowclen)
92 {
93 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
94 struct sk_buff *skb;
95
96 skb = alloc_skb(flowclen, GFP_ATOMIC);
97 if (!skb)
98 return NULL;
99
100 __skb_put_data(skb, flowc, flowclen);
101 skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
102
103 return skb;
104 }
105
send_flowc_wr(struct sock * sk,struct fw_flowc_wr * flowc,int flowclen)106 static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc,
107 int flowclen)
108 {
109 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
110 struct tcp_sock *tp = tcp_sk(sk);
111 struct sk_buff *skb;
112 int flowclen16;
113 int ret;
114
115 flowclen16 = flowclen / 16;
116
117 if (csk_flag(sk, CSK_TX_DATA_SENT)) {
118 skb = create_flowc_wr_skb(sk, flowc, flowclen);
119 if (!skb)
120 return -ENOMEM;
121
122 skb_entail(sk, skb,
123 ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND);
124 return 0;
125 }
126
127 ret = cxgb4_immdata_send(csk->egress_dev,
128 csk->txq_idx,
129 flowc, flowclen);
130 if (!ret)
131 return flowclen16;
132 skb = create_flowc_wr_skb(sk, flowc, flowclen);
133 if (!skb)
134 return -ENOMEM;
135 send_or_defer(sk, tp, skb, 0);
136 return flowclen16;
137 }
138
tcp_state_to_flowc_state(u8 state)139 static u8 tcp_state_to_flowc_state(u8 state)
140 {
141 switch (state) {
142 case TCP_ESTABLISHED:
143 return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
144 case TCP_CLOSE_WAIT:
145 return FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT;
146 case TCP_FIN_WAIT1:
147 return FW_FLOWC_MNEM_TCPSTATE_FINWAIT1;
148 case TCP_CLOSING:
149 return FW_FLOWC_MNEM_TCPSTATE_CLOSING;
150 case TCP_LAST_ACK:
151 return FW_FLOWC_MNEM_TCPSTATE_LASTACK;
152 case TCP_FIN_WAIT2:
153 return FW_FLOWC_MNEM_TCPSTATE_FINWAIT2;
154 }
155
156 return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
157 }
158
send_tx_flowc_wr(struct sock * sk,int compl,u32 snd_nxt,u32 rcv_nxt)159 int send_tx_flowc_wr(struct sock *sk, int compl,
160 u32 snd_nxt, u32 rcv_nxt)
161 {
162 struct flowc_packed {
163 struct fw_flowc_wr fc;
164 struct fw_flowc_mnemval mnemval[FW_FLOWC_MNEM_MAX];
165 } __packed sflowc;
166 int nparams, paramidx, flowclen16, flowclen;
167 struct fw_flowc_wr *flowc;
168 struct chtls_sock *csk;
169 struct tcp_sock *tp;
170
171 csk = rcu_dereference_sk_user_data(sk);
172 tp = tcp_sk(sk);
173 memset(&sflowc, 0, sizeof(sflowc));
174 flowc = &sflowc.fc;
175
176 #define FLOWC_PARAM(__m, __v) \
177 do { \
178 flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \
179 flowc->mnemval[paramidx].val = cpu_to_be32(__v); \
180 paramidx++; \
181 } while (0)
182
183 paramidx = 0;
184
185 FLOWC_PARAM(PFNVFN, FW_PFVF_CMD_PFN_V(csk->cdev->lldi->pf));
186 FLOWC_PARAM(CH, csk->tx_chan);
187 FLOWC_PARAM(PORT, csk->tx_chan);
188 FLOWC_PARAM(IQID, csk->rss_qid);
189 FLOWC_PARAM(SNDNXT, tp->snd_nxt);
190 FLOWC_PARAM(RCVNXT, tp->rcv_nxt);
191 FLOWC_PARAM(SNDBUF, csk->sndbuf);
192 FLOWC_PARAM(MSS, tp->mss_cache);
193 FLOWC_PARAM(TCPSTATE, tcp_state_to_flowc_state(sk->sk_state));
194
195 if (SND_WSCALE(tp))
196 FLOWC_PARAM(RCV_SCALE, SND_WSCALE(tp));
197
198 if (csk->ulp_mode == ULP_MODE_TLS)
199 FLOWC_PARAM(ULD_MODE, ULP_MODE_TLS);
200
201 if (csk->tlshws.fcplenmax)
202 FLOWC_PARAM(TXDATAPLEN_MAX, csk->tlshws.fcplenmax);
203
204 nparams = paramidx;
205 #undef FLOWC_PARAM
206
207 flowclen16 = flowc_wr_credits(nparams, &flowclen);
208 flowc->op_to_nparams =
209 cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
210 FW_WR_COMPL_V(compl) |
211 FW_FLOWC_WR_NPARAMS_V(nparams));
212 flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
213 FW_WR_FLOWID_V(csk->tid));
214
215 return send_flowc_wr(sk, flowc, flowclen);
216 }
217
218 /* Copy IVs to WR */
tls_copy_ivs(struct sock * sk,struct sk_buff * skb)219 static int tls_copy_ivs(struct sock *sk, struct sk_buff *skb)
220
221 {
222 struct chtls_sock *csk;
223 unsigned char *iv_loc;
224 struct chtls_hws *hws;
225 unsigned char *ivs;
226 u16 number_of_ivs;
227 struct page *page;
228 int err = 0;
229
230 csk = rcu_dereference_sk_user_data(sk);
231 hws = &csk->tlshws;
232 number_of_ivs = nos_ivs(sk, skb->len);
233
234 if (number_of_ivs > MAX_IVS_PAGE) {
235 pr_warn("MAX IVs in PAGE exceeded %d\n", number_of_ivs);
236 return -ENOMEM;
237 }
238
239 /* generate the IVs */
240 ivs = kmalloc_array(CIPHER_BLOCK_SIZE, number_of_ivs, GFP_ATOMIC);
241 if (!ivs)
242 return -ENOMEM;
243 get_random_bytes(ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
244
245 if (skb_ulp_tls_iv_imm(skb)) {
246 /* send the IVs as immediate data in the WR */
247 iv_loc = (unsigned char *)__skb_push(skb, number_of_ivs *
248 CIPHER_BLOCK_SIZE);
249 if (iv_loc)
250 memcpy(iv_loc, ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
251
252 hws->ivsize = number_of_ivs * CIPHER_BLOCK_SIZE;
253 } else {
254 /* Send the IVs as sgls */
255 /* Already accounted IV DSGL for credits */
256 skb_shinfo(skb)->nr_frags--;
257 page = alloc_pages(sk->sk_allocation | __GFP_COMP, 0);
258 if (!page) {
259 pr_info("%s : Page allocation for IVs failed\n",
260 __func__);
261 err = -ENOMEM;
262 goto out;
263 }
264 memcpy(page_address(page), ivs, number_of_ivs *
265 CIPHER_BLOCK_SIZE);
266 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page, 0,
267 number_of_ivs * CIPHER_BLOCK_SIZE);
268 hws->ivsize = 0;
269 }
270 out:
271 kfree(ivs);
272 return err;
273 }
274
275 /* Copy Key to WR */
tls_copy_tx_key(struct sock * sk,struct sk_buff * skb)276 static void tls_copy_tx_key(struct sock *sk, struct sk_buff *skb)
277 {
278 struct ulptx_sc_memrd *sc_memrd;
279 struct chtls_sock *csk;
280 struct chtls_dev *cdev;
281 struct ulptx_idata *sc;
282 struct chtls_hws *hws;
283 u32 immdlen;
284 int kaddr;
285
286 csk = rcu_dereference_sk_user_data(sk);
287 hws = &csk->tlshws;
288 cdev = csk->cdev;
289
290 immdlen = sizeof(*sc) + sizeof(*sc_memrd);
291 kaddr = keyid_to_addr(cdev->kmap.start, hws->txkey);
292 sc = (struct ulptx_idata *)__skb_push(skb, immdlen);
293 if (sc) {
294 sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
295 sc->len = htonl(0);
296 sc_memrd = (struct ulptx_sc_memrd *)(sc + 1);
297 sc_memrd->cmd_to_len =
298 htonl(ULPTX_CMD_V(ULP_TX_SC_MEMRD) |
299 ULP_TX_SC_MORE_V(1) |
300 ULPTX_LEN16_V(hws->keylen >> 4));
301 sc_memrd->addr = htonl(kaddr);
302 }
303 }
304
tlstx_incr_seqnum(struct chtls_hws * hws)305 static u64 tlstx_incr_seqnum(struct chtls_hws *hws)
306 {
307 return hws->tx_seq_no++;
308 }
309
is_sg_request(const struct sk_buff * skb)310 static bool is_sg_request(const struct sk_buff *skb)
311 {
312 return skb->peeked ||
313 (skb->len > MAX_IMM_ULPTX_WR_LEN);
314 }
315
316 /*
317 * Returns true if an sk_buff carries urgent data.
318 */
skb_urgent(struct sk_buff * skb)319 static bool skb_urgent(struct sk_buff *skb)
320 {
321 return ULP_SKB_CB(skb)->flags & ULPCB_FLAG_URG;
322 }
323
324 /* TLS content type for CPL SFO */
tls_content_type(unsigned char content_type)325 static unsigned char tls_content_type(unsigned char content_type)
326 {
327 switch (content_type) {
328 case TLS_HDR_TYPE_CCS:
329 return CPL_TX_TLS_SFO_TYPE_CCS;
330 case TLS_HDR_TYPE_ALERT:
331 return CPL_TX_TLS_SFO_TYPE_ALERT;
332 case TLS_HDR_TYPE_HANDSHAKE:
333 return CPL_TX_TLS_SFO_TYPE_HANDSHAKE;
334 case TLS_HDR_TYPE_HEARTBEAT:
335 return CPL_TX_TLS_SFO_TYPE_HEARTBEAT;
336 }
337 return CPL_TX_TLS_SFO_TYPE_DATA;
338 }
339
tls_tx_data_wr(struct sock * sk,struct sk_buff * skb,int dlen,int tls_immd,u32 credits,int expn,int pdus)340 static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb,
341 int dlen, int tls_immd, u32 credits,
342 int expn, int pdus)
343 {
344 struct fw_tlstx_data_wr *req_wr;
345 struct cpl_tx_tls_sfo *req_cpl;
346 unsigned int wr_ulp_mode_force;
347 struct tls_scmd *updated_scmd;
348 unsigned char data_type;
349 struct chtls_sock *csk;
350 struct net_device *dev;
351 struct chtls_hws *hws;
352 struct tls_scmd *scmd;
353 struct adapter *adap;
354 unsigned char *req;
355 int immd_len;
356 int iv_imm;
357 int len;
358
359 csk = rcu_dereference_sk_user_data(sk);
360 iv_imm = skb_ulp_tls_iv_imm(skb);
361 dev = csk->egress_dev;
362 adap = netdev2adap(dev);
363 hws = &csk->tlshws;
364 scmd = &hws->scmd;
365 len = dlen + expn;
366
367 dlen = (dlen < hws->mfs) ? dlen : hws->mfs;
368 atomic_inc(&adap->chcr_stats.tls_pdu_tx);
369
370 updated_scmd = scmd;
371 updated_scmd->seqno_numivs &= 0xffffff80;
372 updated_scmd->seqno_numivs |= SCMD_NUM_IVS_V(pdus);
373 hws->scmd = *updated_scmd;
374
375 req = (unsigned char *)__skb_push(skb, sizeof(struct cpl_tx_tls_sfo));
376 req_cpl = (struct cpl_tx_tls_sfo *)req;
377 req = (unsigned char *)__skb_push(skb, (sizeof(struct
378 fw_tlstx_data_wr)));
379
380 req_wr = (struct fw_tlstx_data_wr *)req;
381 immd_len = (tls_immd ? dlen : 0);
382 req_wr->op_to_immdlen =
383 htonl(FW_WR_OP_V(FW_TLSTX_DATA_WR) |
384 FW_TLSTX_DATA_WR_COMPL_V(1) |
385 FW_TLSTX_DATA_WR_IMMDLEN_V(immd_len));
386 req_wr->flowid_len16 = htonl(FW_TLSTX_DATA_WR_FLOWID_V(csk->tid) |
387 FW_TLSTX_DATA_WR_LEN16_V(credits));
388 wr_ulp_mode_force = TX_ULP_MODE_V(ULP_MODE_TLS);
389
390 if (is_sg_request(skb))
391 wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
392 ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
393 FW_OFLD_TX_DATA_WR_SHOVE_F);
394
395 req_wr->lsodisable_to_flags =
396 htonl(TX_ULP_MODE_V(ULP_MODE_TLS) |
397 TX_URG_V(skb_urgent(skb)) |
398 T6_TX_FORCE_F | wr_ulp_mode_force |
399 TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
400 skb_queue_empty(&csk->txq)));
401
402 req_wr->ctxloc_to_exp =
403 htonl(FW_TLSTX_DATA_WR_NUMIVS_V(pdus) |
404 FW_TLSTX_DATA_WR_EXP_V(expn) |
405 FW_TLSTX_DATA_WR_CTXLOC_V(CHTLS_KEY_CONTEXT_DDR) |
406 FW_TLSTX_DATA_WR_IVDSGL_V(!iv_imm) |
407 FW_TLSTX_DATA_WR_KEYSIZE_V(hws->keylen >> 4));
408
409 /* Fill in the length */
410 req_wr->plen = htonl(len);
411 req_wr->mfs = htons(hws->mfs);
412 req_wr->adjustedplen_pkd =
413 htons(FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(hws->adjustlen));
414 req_wr->expinplenmax_pkd =
415 htons(FW_TLSTX_DATA_WR_EXPINPLENMAX_V(hws->expansion));
416 req_wr->pdusinplenmax_pkd =
417 FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(hws->pdus);
418 req_wr->r10 = 0;
419
420 data_type = tls_content_type(ULP_SKB_CB(skb)->ulp.tls.type);
421 req_cpl->op_to_seg_len = htonl(CPL_TX_TLS_SFO_OPCODE_V(CPL_TX_TLS_SFO) |
422 CPL_TX_TLS_SFO_DATA_TYPE_V(data_type) |
423 CPL_TX_TLS_SFO_CPL_LEN_V(2) |
424 CPL_TX_TLS_SFO_SEG_LEN_V(dlen));
425 req_cpl->pld_len = htonl(len - expn);
426
427 req_cpl->type_protover = htonl(CPL_TX_TLS_SFO_TYPE_V
428 ((data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) ?
429 TLS_HDR_TYPE_HEARTBEAT : 0) |
430 CPL_TX_TLS_SFO_PROTOVER_V(0));
431
432 /* create the s-command */
433 req_cpl->r1_lo = 0;
434 req_cpl->seqno_numivs = cpu_to_be32(hws->scmd.seqno_numivs);
435 req_cpl->ivgen_hdrlen = cpu_to_be32(hws->scmd.ivgen_hdrlen);
436 req_cpl->scmd1 = cpu_to_be64(tlstx_incr_seqnum(hws));
437 }
438
439 /*
440 * Calculate the TLS data expansion size
441 */
chtls_expansion_size(struct sock * sk,int data_len,int fullpdu,unsigned short * pducnt)442 static int chtls_expansion_size(struct sock *sk, int data_len,
443 int fullpdu,
444 unsigned short *pducnt)
445 {
446 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
447 struct chtls_hws *hws = &csk->tlshws;
448 struct tls_scmd *scmd = &hws->scmd;
449 int fragsize = hws->mfs;
450 int expnsize = 0;
451 int fragleft;
452 int fragcnt;
453 int expppdu;
454
455 if (SCMD_CIPH_MODE_G(scmd->seqno_numivs) ==
456 SCMD_CIPH_MODE_AES_GCM) {
457 expppdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE +
458 TLS_HEADER_LENGTH;
459
460 if (fullpdu) {
461 *pducnt = data_len / (expppdu + fragsize);
462 if (*pducnt > 32)
463 *pducnt = 32;
464 else if (!*pducnt)
465 *pducnt = 1;
466 expnsize = (*pducnt) * expppdu;
467 return expnsize;
468 }
469 fragcnt = (data_len / fragsize);
470 expnsize = fragcnt * expppdu;
471 fragleft = data_len % fragsize;
472 if (fragleft > 0)
473 expnsize += expppdu;
474 }
475 return expnsize;
476 }
477
478 /* WR with IV, KEY and CPL SFO added */
make_tlstx_data_wr(struct sock * sk,struct sk_buff * skb,int tls_tx_imm,int tls_len,u32 credits)479 static void make_tlstx_data_wr(struct sock *sk, struct sk_buff *skb,
480 int tls_tx_imm, int tls_len, u32 credits)
481 {
482 unsigned short pdus_per_ulp = 0;
483 struct chtls_sock *csk;
484 struct chtls_hws *hws;
485 int expn_sz;
486 int pdus;
487
488 csk = rcu_dereference_sk_user_data(sk);
489 hws = &csk->tlshws;
490 pdus = DIV_ROUND_UP(tls_len, hws->mfs);
491 expn_sz = chtls_expansion_size(sk, tls_len, 0, NULL);
492 if (!hws->compute) {
493 hws->expansion = chtls_expansion_size(sk,
494 hws->fcplenmax,
495 1, &pdus_per_ulp);
496 hws->pdus = pdus_per_ulp;
497 hws->adjustlen = hws->pdus *
498 ((hws->expansion / hws->pdus) + hws->mfs);
499 hws->compute = 1;
500 }
501 if (tls_copy_ivs(sk, skb))
502 return;
503 tls_copy_tx_key(sk, skb);
504 tls_tx_data_wr(sk, skb, tls_len, tls_tx_imm, credits, expn_sz, pdus);
505 hws->tx_seq_no += (pdus - 1);
506 }
507
make_tx_data_wr(struct sock * sk,struct sk_buff * skb,unsigned int immdlen,int len,u32 credits,u32 compl)508 static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb,
509 unsigned int immdlen, int len,
510 u32 credits, u32 compl)
511 {
512 struct fw_ofld_tx_data_wr *req;
513 unsigned int wr_ulp_mode_force;
514 struct chtls_sock *csk;
515 unsigned int opcode;
516
517 csk = rcu_dereference_sk_user_data(sk);
518 opcode = FW_OFLD_TX_DATA_WR;
519
520 req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req));
521 req->op_to_immdlen = htonl(WR_OP_V(opcode) |
522 FW_WR_COMPL_V(compl) |
523 FW_WR_IMMDLEN_V(immdlen));
524 req->flowid_len16 = htonl(FW_WR_FLOWID_V(csk->tid) |
525 FW_WR_LEN16_V(credits));
526
527 wr_ulp_mode_force = TX_ULP_MODE_V(csk->ulp_mode);
528 if (is_sg_request(skb))
529 wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
530 ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
531 FW_OFLD_TX_DATA_WR_SHOVE_F);
532
533 req->tunnel_to_proxy = htonl(wr_ulp_mode_force |
534 TX_URG_V(skb_urgent(skb)) |
535 TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
536 skb_queue_empty(&csk->txq)));
537 req->plen = htonl(len);
538 }
539
chtls_wr_size(struct chtls_sock * csk,const struct sk_buff * skb,bool size)540 static int chtls_wr_size(struct chtls_sock *csk, const struct sk_buff *skb,
541 bool size)
542 {
543 int wr_size;
544
545 wr_size = TLS_WR_CPL_LEN;
546 wr_size += KEY_ON_MEM_SZ;
547 wr_size += ivs_size(csk->sk, skb);
548
549 if (size)
550 return wr_size;
551
552 /* frags counted for IV dsgl */
553 if (!skb_ulp_tls_iv_imm(skb))
554 skb_shinfo(skb)->nr_frags++;
555
556 return wr_size;
557 }
558
is_ofld_imm(struct chtls_sock * csk,const struct sk_buff * skb)559 static bool is_ofld_imm(struct chtls_sock *csk, const struct sk_buff *skb)
560 {
561 int length = skb->len;
562
563 if (skb->peeked || skb->len > MAX_IMM_ULPTX_WR_LEN)
564 return false;
565
566 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
567 /* Check TLS header len for Immediate */
568 if (csk->ulp_mode == ULP_MODE_TLS &&
569 skb_ulp_tls_inline(skb))
570 length += chtls_wr_size(csk, skb, true);
571 else
572 length += sizeof(struct fw_ofld_tx_data_wr);
573
574 return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN;
575 }
576 return true;
577 }
578
calc_tx_flits(const struct sk_buff * skb,unsigned int immdlen)579 static unsigned int calc_tx_flits(const struct sk_buff *skb,
580 unsigned int immdlen)
581 {
582 unsigned int flits, cnt;
583
584 flits = immdlen / 8; /* headers */
585 cnt = skb_shinfo(skb)->nr_frags;
586 if (skb_tail_pointer(skb) != skb_transport_header(skb))
587 cnt++;
588 return flits + sgl_len(cnt);
589 }
590
arp_failure_discard(void * handle,struct sk_buff * skb)591 static void arp_failure_discard(void *handle, struct sk_buff *skb)
592 {
593 kfree_skb(skb);
594 }
595
chtls_push_frames(struct chtls_sock * csk,int comp)596 int chtls_push_frames(struct chtls_sock *csk, int comp)
597 {
598 struct chtls_hws *hws = &csk->tlshws;
599 struct tcp_sock *tp;
600 struct sk_buff *skb;
601 int total_size = 0;
602 struct sock *sk;
603 int wr_size;
604
605 wr_size = sizeof(struct fw_ofld_tx_data_wr);
606 sk = csk->sk;
607 tp = tcp_sk(sk);
608
609 if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE)))
610 return 0;
611
612 if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN)))
613 return 0;
614
615 while (csk->wr_credits && (skb = skb_peek(&csk->txq)) &&
616 (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_HOLD) ||
617 skb_queue_len(&csk->txq) > 1)) {
618 unsigned int credit_len = skb->len;
619 unsigned int credits_needed;
620 unsigned int completion = 0;
621 int tls_len = skb->len;/* TLS data len before IV/key */
622 unsigned int immdlen;
623 int len = skb->len; /* length [ulp bytes] inserted by hw */
624 int flowclen16 = 0;
625 int tls_tx_imm = 0;
626
627 immdlen = skb->len;
628 if (!is_ofld_imm(csk, skb)) {
629 immdlen = skb_transport_offset(skb);
630 if (skb_ulp_tls_inline(skb))
631 wr_size = chtls_wr_size(csk, skb, false);
632 credit_len = 8 * calc_tx_flits(skb, immdlen);
633 } else {
634 if (skb_ulp_tls_inline(skb)) {
635 wr_size = chtls_wr_size(csk, skb, false);
636 tls_tx_imm = 1;
637 }
638 }
639 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR))
640 credit_len += wr_size;
641 credits_needed = DIV_ROUND_UP(credit_len, 16);
642 if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
643 flowclen16 = send_tx_flowc_wr(sk, 1, tp->snd_nxt,
644 tp->rcv_nxt);
645 if (flowclen16 <= 0)
646 break;
647 csk->wr_credits -= flowclen16;
648 csk->wr_unacked += flowclen16;
649 csk->wr_nondata += flowclen16;
650 csk_set_flag(csk, CSK_TX_DATA_SENT);
651 }
652
653 if (csk->wr_credits < credits_needed) {
654 if (skb_ulp_tls_inline(skb) &&
655 !skb_ulp_tls_iv_imm(skb))
656 skb_shinfo(skb)->nr_frags--;
657 break;
658 }
659
660 __skb_unlink(skb, &csk->txq);
661 skb_set_queue_mapping(skb, (csk->txq_idx << 1) |
662 CPL_PRIORITY_DATA);
663 if (hws->ofld)
664 hws->txqid = (skb->queue_mapping >> 1);
665 skb->csum = (__force __wsum)(credits_needed + csk->wr_nondata);
666 csk->wr_credits -= credits_needed;
667 csk->wr_unacked += credits_needed;
668 csk->wr_nondata = 0;
669 enqueue_wr(csk, skb);
670
671 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
672 if ((comp && csk->wr_unacked == credits_needed) ||
673 (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) ||
674 csk->wr_unacked >= csk->wr_max_credits / 2) {
675 completion = 1;
676 csk->wr_unacked = 0;
677 }
678 if (skb_ulp_tls_inline(skb))
679 make_tlstx_data_wr(sk, skb, tls_tx_imm,
680 tls_len, credits_needed);
681 else
682 make_tx_data_wr(sk, skb, immdlen, len,
683 credits_needed, completion);
684 tp->snd_nxt += len;
685 tp->lsndtime = tcp_jiffies32;
686 if (completion)
687 ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR;
688 } else {
689 struct cpl_close_con_req *req = cplhdr(skb);
690 unsigned int cmd = CPL_OPCODE_G(ntohl
691 (OPCODE_TID(req)));
692
693 if (cmd == CPL_CLOSE_CON_REQ)
694 csk_set_flag(csk,
695 CSK_CLOSE_CON_REQUESTED);
696
697 if ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) &&
698 (csk->wr_unacked >= csk->wr_max_credits / 2)) {
699 req->wr.wr_hi |= htonl(FW_WR_COMPL_F);
700 csk->wr_unacked = 0;
701 }
702 }
703 total_size += skb->truesize;
704 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_BARRIER)
705 csk_set_flag(csk, CSK_TX_WAIT_IDLE);
706 t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
707 cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
708 }
709 sk->sk_wmem_queued -= total_size;
710 return total_size;
711 }
712
mark_urg(struct tcp_sock * tp,int flags,struct sk_buff * skb)713 static void mark_urg(struct tcp_sock *tp, int flags,
714 struct sk_buff *skb)
715 {
716 if (unlikely(flags & MSG_OOB)) {
717 tp->snd_up = tp->write_seq;
718 ULP_SKB_CB(skb)->flags = ULPCB_FLAG_URG |
719 ULPCB_FLAG_BARRIER |
720 ULPCB_FLAG_NO_APPEND |
721 ULPCB_FLAG_NEED_HDR;
722 }
723 }
724
725 /*
726 * Returns true if a connection should send more data to TCP engine
727 */
should_push(struct sock * sk)728 static bool should_push(struct sock *sk)
729 {
730 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
731 struct chtls_dev *cdev = csk->cdev;
732 struct tcp_sock *tp = tcp_sk(sk);
733
734 /*
735 * If we've released our offload resources there's nothing to do ...
736 */
737 if (!cdev)
738 return false;
739
740 /*
741 * If there aren't any work requests in flight, or there isn't enough
742 * data in flight, or Nagle is off then send the current TX_DATA
743 * otherwise hold it and wait to accumulate more data.
744 */
745 return csk->wr_credits == csk->wr_max_credits ||
746 (tp->nonagle & TCP_NAGLE_OFF);
747 }
748
749 /*
750 * Returns true if a TCP socket is corked.
751 */
corked(const struct tcp_sock * tp,int flags)752 static bool corked(const struct tcp_sock *tp, int flags)
753 {
754 return (flags & MSG_MORE) || (tp->nonagle & TCP_NAGLE_CORK);
755 }
756
757 /*
758 * Returns true if a send should try to push new data.
759 */
send_should_push(struct sock * sk,int flags)760 static bool send_should_push(struct sock *sk, int flags)
761 {
762 return should_push(sk) && !corked(tcp_sk(sk), flags);
763 }
764
chtls_tcp_push(struct sock * sk,int flags)765 void chtls_tcp_push(struct sock *sk, int flags)
766 {
767 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
768 int qlen = skb_queue_len(&csk->txq);
769
770 if (likely(qlen)) {
771 struct sk_buff *skb = skb_peek_tail(&csk->txq);
772 struct tcp_sock *tp = tcp_sk(sk);
773
774 mark_urg(tp, flags, skb);
775
776 if (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) &&
777 corked(tp, flags)) {
778 ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_HOLD;
779 return;
780 }
781
782 ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_HOLD;
783 if (qlen == 1 &&
784 ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
785 should_push(sk)))
786 chtls_push_frames(csk, 1);
787 }
788 }
789
790 /*
791 * Calculate the size for a new send sk_buff. It's maximum size so we can
792 * pack lots of data into it, unless we plan to send it immediately, in which
793 * case we size it more tightly.
794 *
795 * Note: we don't bother compensating for MSS < PAGE_SIZE because it doesn't
796 * arise in normal cases and when it does we are just wasting memory.
797 */
select_size(struct sock * sk,int io_len,int flags,int len)798 static int select_size(struct sock *sk, int io_len, int flags, int len)
799 {
800 const int pgbreak = SKB_MAX_HEAD(len);
801
802 /*
803 * If the data wouldn't fit in the main body anyway, put only the
804 * header in the main body so it can use immediate data and place all
805 * the payload in page fragments.
806 */
807 if (io_len > pgbreak)
808 return 0;
809
810 /*
811 * If we will be accumulating payload get a large main body.
812 */
813 if (!send_should_push(sk, flags))
814 return pgbreak;
815
816 return io_len;
817 }
818
skb_entail(struct sock * sk,struct sk_buff * skb,int flags)819 void skb_entail(struct sock *sk, struct sk_buff *skb, int flags)
820 {
821 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
822 struct tcp_sock *tp = tcp_sk(sk);
823
824 ULP_SKB_CB(skb)->seq = tp->write_seq;
825 ULP_SKB_CB(skb)->flags = flags;
826 __skb_queue_tail(&csk->txq, skb);
827 sk->sk_wmem_queued += skb->truesize;
828
829 if (TCP_PAGE(sk) && TCP_OFF(sk)) {
830 put_page(TCP_PAGE(sk));
831 TCP_PAGE(sk) = NULL;
832 TCP_OFF(sk) = 0;
833 }
834 }
835
get_tx_skb(struct sock * sk,int size)836 static struct sk_buff *get_tx_skb(struct sock *sk, int size)
837 {
838 struct sk_buff *skb;
839
840 skb = alloc_skb(size + TX_HEADER_LEN, sk->sk_allocation);
841 if (likely(skb)) {
842 skb_reserve(skb, TX_HEADER_LEN);
843 skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
844 skb_reset_transport_header(skb);
845 }
846 return skb;
847 }
848
get_record_skb(struct sock * sk,int size,bool zcopy)849 static struct sk_buff *get_record_skb(struct sock *sk, int size, bool zcopy)
850 {
851 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
852 struct sk_buff *skb;
853
854 skb = alloc_skb(((zcopy ? 0 : size) + TX_TLSHDR_LEN +
855 KEY_ON_MEM_SZ + max_ivs_size(sk, size)),
856 sk->sk_allocation);
857 if (likely(skb)) {
858 skb_reserve(skb, (TX_TLSHDR_LEN +
859 KEY_ON_MEM_SZ + max_ivs_size(sk, size)));
860 skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
861 skb_reset_transport_header(skb);
862 ULP_SKB_CB(skb)->ulp.tls.ofld = 1;
863 ULP_SKB_CB(skb)->ulp.tls.type = csk->tlshws.type;
864 }
865 return skb;
866 }
867
tx_skb_finalize(struct sk_buff * skb)868 static void tx_skb_finalize(struct sk_buff *skb)
869 {
870 struct ulp_skb_cb *cb = ULP_SKB_CB(skb);
871
872 if (!(cb->flags & ULPCB_FLAG_NO_HDR))
873 cb->flags = ULPCB_FLAG_NEED_HDR;
874 cb->flags |= ULPCB_FLAG_NO_APPEND;
875 }
876
push_frames_if_head(struct sock * sk)877 static void push_frames_if_head(struct sock *sk)
878 {
879 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
880
881 if (skb_queue_len(&csk->txq) == 1)
882 chtls_push_frames(csk, 1);
883 }
884
chtls_skb_copy_to_page_nocache(struct sock * sk,struct iov_iter * from,struct sk_buff * skb,struct page * page,int off,int copy)885 static int chtls_skb_copy_to_page_nocache(struct sock *sk,
886 struct iov_iter *from,
887 struct sk_buff *skb,
888 struct page *page,
889 int off, int copy)
890 {
891 int err;
892
893 err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) +
894 off, copy, skb->len);
895 if (err)
896 return err;
897
898 skb->len += copy;
899 skb->data_len += copy;
900 skb->truesize += copy;
901 sk->sk_wmem_queued += copy;
902 return 0;
903 }
904
csk_mem_free(struct chtls_dev * cdev,struct sock * sk)905 static bool csk_mem_free(struct chtls_dev *cdev, struct sock *sk)
906 {
907 return (cdev->max_host_sndbuf - sk->sk_wmem_queued > 0);
908 }
909
csk_wait_memory(struct chtls_dev * cdev,struct sock * sk,long * timeo_p)910 static int csk_wait_memory(struct chtls_dev *cdev,
911 struct sock *sk, long *timeo_p)
912 {
913 DEFINE_WAIT_FUNC(wait, woken_wake_function);
914 int ret, err = 0;
915 long current_timeo;
916 long vm_wait = 0;
917 bool noblock;
918
919 current_timeo = *timeo_p;
920 noblock = (*timeo_p ? false : true);
921 if (csk_mem_free(cdev, sk)) {
922 current_timeo = get_random_u32_below(HZ / 5) + 2;
923 vm_wait = get_random_u32_below(HZ / 5) + 2;
924 }
925
926 add_wait_queue(sk_sleep(sk), &wait);
927 while (1) {
928 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
929
930 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
931 goto do_error;
932 if (!*timeo_p) {
933 if (noblock)
934 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
935 goto do_nonblock;
936 }
937 if (signal_pending(current))
938 goto do_interrupted;
939 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
940 if (csk_mem_free(cdev, sk) && !vm_wait)
941 break;
942
943 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
944 sk->sk_write_pending++;
945 ret = sk_wait_event(sk, ¤t_timeo, sk->sk_err ||
946 (sk->sk_shutdown & SEND_SHUTDOWN) ||
947 (csk_mem_free(cdev, sk) && !vm_wait),
948 &wait);
949 sk->sk_write_pending--;
950 if (ret < 0)
951 goto do_error;
952
953 if (vm_wait) {
954 vm_wait -= current_timeo;
955 current_timeo = *timeo_p;
956 if (current_timeo != MAX_SCHEDULE_TIMEOUT) {
957 current_timeo -= vm_wait;
958 if (current_timeo < 0)
959 current_timeo = 0;
960 }
961 vm_wait = 0;
962 }
963 *timeo_p = current_timeo;
964 }
965 do_rm_wq:
966 remove_wait_queue(sk_sleep(sk), &wait);
967 return err;
968 do_error:
969 err = -EPIPE;
970 goto do_rm_wq;
971 do_nonblock:
972 err = -EAGAIN;
973 goto do_rm_wq;
974 do_interrupted:
975 err = sock_intr_errno(*timeo_p);
976 goto do_rm_wq;
977 }
978
chtls_proccess_cmsg(struct sock * sk,struct msghdr * msg,unsigned char * record_type)979 static int chtls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
980 unsigned char *record_type)
981 {
982 struct cmsghdr *cmsg;
983 int rc = -EINVAL;
984
985 for_each_cmsghdr(cmsg, msg) {
986 if (!CMSG_OK(msg, cmsg))
987 return -EINVAL;
988 if (cmsg->cmsg_level != SOL_TLS)
989 continue;
990
991 switch (cmsg->cmsg_type) {
992 case TLS_SET_RECORD_TYPE:
993 if (cmsg->cmsg_len < CMSG_LEN(sizeof(*record_type)))
994 return -EINVAL;
995
996 if (msg->msg_flags & MSG_MORE)
997 return -EINVAL;
998
999 *record_type = *(unsigned char *)CMSG_DATA(cmsg);
1000 rc = 0;
1001 break;
1002 default:
1003 return -EINVAL;
1004 }
1005 }
1006
1007 return rc;
1008 }
1009
chtls_sendmsg(struct sock * sk,struct msghdr * msg,size_t size)1010 int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
1011 {
1012 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1013 struct chtls_dev *cdev = csk->cdev;
1014 struct tcp_sock *tp = tcp_sk(sk);
1015 struct sk_buff *skb;
1016 int mss, flags, err;
1017 int recordsz = 0;
1018 int copied = 0;
1019 long timeo;
1020
1021 lock_sock(sk);
1022 flags = msg->msg_flags;
1023 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
1024
1025 if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
1026 err = sk_stream_wait_connect(sk, &timeo);
1027 if (err)
1028 goto out_err;
1029 }
1030
1031 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1032 err = -EPIPE;
1033 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
1034 goto out_err;
1035
1036 mss = csk->mss;
1037 csk_set_flag(csk, CSK_TX_MORE_DATA);
1038
1039 while (msg_data_left(msg)) {
1040 int copy = 0;
1041
1042 skb = skb_peek_tail(&csk->txq);
1043 if (skb) {
1044 copy = mss - skb->len;
1045 skb->ip_summed = CHECKSUM_UNNECESSARY;
1046 }
1047 if (!csk_mem_free(cdev, sk))
1048 goto wait_for_sndbuf;
1049
1050 if (is_tls_tx(csk) && !csk->tlshws.txleft) {
1051 unsigned char record_type = TLS_RECORD_TYPE_DATA;
1052
1053 if (unlikely(msg->msg_controllen)) {
1054 err = chtls_proccess_cmsg(sk, msg,
1055 &record_type);
1056 if (err)
1057 goto out_err;
1058
1059 /* Avoid appending tls handshake, alert to tls data */
1060 if (skb)
1061 tx_skb_finalize(skb);
1062 }
1063
1064 recordsz = size;
1065 csk->tlshws.txleft = recordsz;
1066 csk->tlshws.type = record_type;
1067 }
1068
1069 if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
1070 copy <= 0) {
1071 new_buf:
1072 if (skb) {
1073 tx_skb_finalize(skb);
1074 push_frames_if_head(sk);
1075 }
1076
1077 if (is_tls_tx(csk)) {
1078 skb = get_record_skb(sk,
1079 select_size(sk,
1080 recordsz,
1081 flags,
1082 TX_TLSHDR_LEN),
1083 false);
1084 } else {
1085 skb = get_tx_skb(sk,
1086 select_size(sk, size, flags,
1087 TX_HEADER_LEN));
1088 }
1089 if (unlikely(!skb))
1090 goto wait_for_memory;
1091
1092 skb->ip_summed = CHECKSUM_UNNECESSARY;
1093 copy = mss;
1094 }
1095 if (copy > size)
1096 copy = size;
1097
1098 if (msg->msg_flags & MSG_SPLICE_PAGES) {
1099 err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
1100 sk->sk_allocation);
1101 if (err < 0) {
1102 if (err == -EMSGSIZE)
1103 goto new_buf;
1104 goto do_fault;
1105 }
1106 copy = err;
1107 sk_wmem_queued_add(sk, copy);
1108 } else if (skb_tailroom(skb) > 0) {
1109 copy = min(copy, skb_tailroom(skb));
1110 if (is_tls_tx(csk))
1111 copy = min_t(int, copy, csk->tlshws.txleft);
1112 err = skb_add_data_nocache(sk, skb,
1113 &msg->msg_iter, copy);
1114 if (err)
1115 goto do_fault;
1116 } else {
1117 int i = skb_shinfo(skb)->nr_frags;
1118 struct page *page = TCP_PAGE(sk);
1119 int pg_size = PAGE_SIZE;
1120 int off = TCP_OFF(sk);
1121 bool merge;
1122
1123 if (page)
1124 pg_size = page_size(page);
1125 if (off < pg_size &&
1126 skb_can_coalesce(skb, i, page, off)) {
1127 merge = true;
1128 goto copy;
1129 }
1130 merge = false;
1131 if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) :
1132 MAX_SKB_FRAGS))
1133 goto new_buf;
1134
1135 if (page && off == pg_size) {
1136 put_page(page);
1137 TCP_PAGE(sk) = page = NULL;
1138 pg_size = PAGE_SIZE;
1139 }
1140
1141 if (!page) {
1142 gfp_t gfp = sk->sk_allocation;
1143 int order = cdev->send_page_order;
1144
1145 if (order) {
1146 page = alloc_pages(gfp | __GFP_COMP |
1147 __GFP_NOWARN |
1148 __GFP_NORETRY,
1149 order);
1150 if (page)
1151 pg_size <<= order;
1152 }
1153 if (!page) {
1154 page = alloc_page(gfp);
1155 pg_size = PAGE_SIZE;
1156 }
1157 if (!page)
1158 goto wait_for_memory;
1159 off = 0;
1160 }
1161 copy:
1162 if (copy > pg_size - off)
1163 copy = pg_size - off;
1164 if (is_tls_tx(csk))
1165 copy = min_t(int, copy, csk->tlshws.txleft);
1166
1167 err = chtls_skb_copy_to_page_nocache(sk, &msg->msg_iter,
1168 skb, page,
1169 off, copy);
1170 if (unlikely(err)) {
1171 if (!TCP_PAGE(sk)) {
1172 TCP_PAGE(sk) = page;
1173 TCP_OFF(sk) = 0;
1174 }
1175 goto do_fault;
1176 }
1177 /* Update the skb. */
1178 if (merge) {
1179 skb_frag_size_add(
1180 &skb_shinfo(skb)->frags[i - 1],
1181 copy);
1182 } else {
1183 skb_fill_page_desc(skb, i, page, off, copy);
1184 if (off + copy < pg_size) {
1185 /* space left keep page */
1186 get_page(page);
1187 TCP_PAGE(sk) = page;
1188 } else {
1189 TCP_PAGE(sk) = NULL;
1190 }
1191 }
1192 TCP_OFF(sk) = off + copy;
1193 }
1194 if (unlikely(skb->len == mss))
1195 tx_skb_finalize(skb);
1196 tp->write_seq += copy;
1197 copied += copy;
1198 size -= copy;
1199
1200 if (is_tls_tx(csk))
1201 csk->tlshws.txleft -= copy;
1202
1203 if (corked(tp, flags) &&
1204 (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
1205 ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
1206
1207 if (size == 0)
1208 goto out;
1209
1210 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND)
1211 push_frames_if_head(sk);
1212 continue;
1213 wait_for_sndbuf:
1214 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1215 wait_for_memory:
1216 err = csk_wait_memory(cdev, sk, &timeo);
1217 if (err)
1218 goto do_error;
1219 }
1220 out:
1221 csk_reset_flag(csk, CSK_TX_MORE_DATA);
1222 if (copied)
1223 chtls_tcp_push(sk, flags);
1224 done:
1225 release_sock(sk);
1226 return copied;
1227 do_fault:
1228 if (!skb->len) {
1229 __skb_unlink(skb, &csk->txq);
1230 sk->sk_wmem_queued -= skb->truesize;
1231 __kfree_skb(skb);
1232 }
1233 do_error:
1234 if (copied)
1235 goto out;
1236 out_err:
1237 if (csk_conn_inline(csk))
1238 csk_reset_flag(csk, CSK_TX_MORE_DATA);
1239 copied = sk_stream_error(sk, flags, err);
1240 goto done;
1241 }
1242
chtls_splice_eof(struct socket * sock)1243 void chtls_splice_eof(struct socket *sock)
1244 {
1245 struct sock *sk = sock->sk;
1246
1247 lock_sock(sk);
1248 chtls_tcp_push(sk, 0);
1249 release_sock(sk);
1250 }
1251
chtls_select_window(struct sock * sk)1252 static void chtls_select_window(struct sock *sk)
1253 {
1254 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1255 struct tcp_sock *tp = tcp_sk(sk);
1256 unsigned int wnd = tp->rcv_wnd;
1257
1258 wnd = max_t(unsigned int, wnd, tcp_full_space(sk));
1259 wnd = max_t(unsigned int, MIN_RCV_WND, wnd);
1260
1261 if (wnd > MAX_RCV_WND)
1262 wnd = MAX_RCV_WND;
1263
1264 /*
1265 * Check if we need to grow the receive window in response to an increase in
1266 * the socket's receive buffer size. Some applications increase the buffer
1267 * size dynamically and rely on the window to grow accordingly.
1268 */
1269
1270 if (wnd > tp->rcv_wnd) {
1271 tp->rcv_wup -= wnd - tp->rcv_wnd;
1272 tp->rcv_wnd = wnd;
1273 /* Mark the receive window as updated */
1274 csk_reset_flag(csk, CSK_UPDATE_RCV_WND);
1275 }
1276 }
1277
1278 /*
1279 * Send RX credits through an RX_DATA_ACK CPL message. We are permitted
1280 * to return without sending the message in case we cannot allocate
1281 * an sk_buff. Returns the number of credits sent.
1282 */
send_rx_credits(struct chtls_sock * csk,u32 credits)1283 static u32 send_rx_credits(struct chtls_sock *csk, u32 credits)
1284 {
1285 struct cpl_rx_data_ack *req;
1286 struct sk_buff *skb;
1287
1288 skb = alloc_skb(sizeof(*req), GFP_ATOMIC);
1289 if (!skb)
1290 return 0;
1291 __skb_put(skb, sizeof(*req));
1292 req = (struct cpl_rx_data_ack *)skb->head;
1293
1294 set_wr_txq(skb, CPL_PRIORITY_ACK, csk->port_id);
1295 INIT_TP_WR(req, csk->tid);
1296 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
1297 csk->tid));
1298 req->credit_dack = cpu_to_be32(RX_CREDITS_V(credits) |
1299 RX_FORCE_ACK_F);
1300 cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb);
1301 return credits;
1302 }
1303
1304 #define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | \
1305 TCPF_FIN_WAIT1 | \
1306 TCPF_FIN_WAIT2)
1307
1308 /*
1309 * Called after some received data has been read. It returns RX credits
1310 * to the HW for the amount of data processed.
1311 */
chtls_cleanup_rbuf(struct sock * sk,int copied)1312 static void chtls_cleanup_rbuf(struct sock *sk, int copied)
1313 {
1314 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1315 struct tcp_sock *tp;
1316 int must_send;
1317 u32 credits;
1318 u32 thres;
1319
1320 thres = 15 * 1024;
1321
1322 if (!sk_in_state(sk, CREDIT_RETURN_STATE))
1323 return;
1324
1325 chtls_select_window(sk);
1326 tp = tcp_sk(sk);
1327 credits = tp->copied_seq - tp->rcv_wup;
1328 if (unlikely(!credits))
1329 return;
1330
1331 /*
1332 * For coalescing to work effectively ensure the receive window has
1333 * at least 16KB left.
1334 */
1335 must_send = credits + 16384 >= tp->rcv_wnd;
1336
1337 if (must_send || credits >= thres)
1338 tp->rcv_wup += send_rx_credits(csk, credits);
1339 }
1340
chtls_pt_recvmsg(struct sock * sk,struct msghdr * msg,size_t len,int flags,int * addr_len)1341 static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
1342 int flags, int *addr_len)
1343 {
1344 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1345 struct chtls_hws *hws = &csk->tlshws;
1346 struct net_device *dev = csk->egress_dev;
1347 struct adapter *adap = netdev2adap(dev);
1348 struct tcp_sock *tp = tcp_sk(sk);
1349 unsigned long avail;
1350 int buffers_freed;
1351 int copied = 0;
1352 int target;
1353 long timeo;
1354 int ret;
1355
1356 buffers_freed = 0;
1357
1358 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1359 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1360
1361 if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
1362 chtls_cleanup_rbuf(sk, copied);
1363
1364 do {
1365 struct sk_buff *skb;
1366 u32 offset = 0;
1367
1368 if (unlikely(tp->urg_data &&
1369 tp->urg_seq == tp->copied_seq)) {
1370 if (copied)
1371 break;
1372 if (signal_pending(current)) {
1373 copied = timeo ? sock_intr_errno(timeo) :
1374 -EAGAIN;
1375 break;
1376 }
1377 }
1378 skb = skb_peek(&sk->sk_receive_queue);
1379 if (skb)
1380 goto found_ok_skb;
1381 if (csk->wr_credits &&
1382 skb_queue_len(&csk->txq) &&
1383 chtls_push_frames(csk, csk->wr_credits ==
1384 csk->wr_max_credits))
1385 sk->sk_write_space(sk);
1386
1387 if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
1388 break;
1389
1390 if (copied) {
1391 if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
1392 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1393 signal_pending(current))
1394 break;
1395
1396 if (!timeo)
1397 break;
1398 } else {
1399 if (sock_flag(sk, SOCK_DONE))
1400 break;
1401 if (sk->sk_err) {
1402 copied = sock_error(sk);
1403 break;
1404 }
1405 if (sk->sk_shutdown & RCV_SHUTDOWN)
1406 break;
1407 if (sk->sk_state == TCP_CLOSE) {
1408 copied = -ENOTCONN;
1409 break;
1410 }
1411 if (!timeo) {
1412 copied = -EAGAIN;
1413 break;
1414 }
1415 if (signal_pending(current)) {
1416 copied = sock_intr_errno(timeo);
1417 break;
1418 }
1419 }
1420 if (READ_ONCE(sk->sk_backlog.tail)) {
1421 release_sock(sk);
1422 lock_sock(sk);
1423 chtls_cleanup_rbuf(sk, copied);
1424 continue;
1425 }
1426
1427 if (copied >= target)
1428 break;
1429 chtls_cleanup_rbuf(sk, copied);
1430 ret = sk_wait_data(sk, &timeo, NULL);
1431 if (ret < 0) {
1432 copied = copied ? : ret;
1433 goto unlock;
1434 }
1435 continue;
1436 found_ok_skb:
1437 if (!skb->len) {
1438 skb_dst_set(skb, NULL);
1439 __skb_unlink(skb, &sk->sk_receive_queue);
1440 kfree_skb(skb);
1441
1442 if (!copied && !timeo) {
1443 copied = -EAGAIN;
1444 break;
1445 }
1446
1447 if (copied < target) {
1448 release_sock(sk);
1449 lock_sock(sk);
1450 continue;
1451 }
1452 break;
1453 }
1454 offset = hws->copied_seq;
1455 avail = skb->len - offset;
1456 if (len < avail)
1457 avail = len;
1458
1459 if (unlikely(tp->urg_data)) {
1460 u32 urg_offset = tp->urg_seq - tp->copied_seq;
1461
1462 if (urg_offset < avail) {
1463 if (urg_offset) {
1464 avail = urg_offset;
1465 } else if (!sock_flag(sk, SOCK_URGINLINE)) {
1466 /* First byte is urgent, skip */
1467 tp->copied_seq++;
1468 offset++;
1469 avail--;
1470 if (!avail)
1471 goto skip_copy;
1472 }
1473 }
1474 }
1475 /* Set record type if not already done. For a non-data record,
1476 * do not proceed if record type could not be copied.
1477 */
1478 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
1479 struct tls_hdr *thdr = (struct tls_hdr *)skb->data;
1480 int cerr = 0;
1481
1482 cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
1483 sizeof(thdr->type), &thdr->type);
1484
1485 if (cerr && thdr->type != TLS_RECORD_TYPE_DATA) {
1486 copied = -EIO;
1487 break;
1488 }
1489 /* don't send tls header, skip copy */
1490 goto skip_copy;
1491 }
1492
1493 if (skb_copy_datagram_msg(skb, offset, msg, avail)) {
1494 if (!copied) {
1495 copied = -EFAULT;
1496 break;
1497 }
1498 }
1499
1500 copied += avail;
1501 len -= avail;
1502 hws->copied_seq += avail;
1503 skip_copy:
1504 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
1505 tp->urg_data = 0;
1506
1507 if ((avail + offset) >= skb->len) {
1508 struct sk_buff *next_skb;
1509 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
1510 tp->copied_seq += skb->len;
1511 hws->rcvpld = skb->hdr_len;
1512 } else {
1513 atomic_inc(&adap->chcr_stats.tls_pdu_rx);
1514 tp->copied_seq += hws->rcvpld;
1515 }
1516 chtls_free_skb(sk, skb);
1517 buffers_freed++;
1518 hws->copied_seq = 0;
1519 next_skb = skb_peek(&sk->sk_receive_queue);
1520 if (copied >= target && !next_skb)
1521 break;
1522 if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR)
1523 break;
1524 }
1525 } while (len > 0);
1526
1527 if (buffers_freed)
1528 chtls_cleanup_rbuf(sk, copied);
1529
1530 unlock:
1531 release_sock(sk);
1532 return copied;
1533 }
1534
1535 /*
1536 * Peek at data in a socket's receive buffer.
1537 */
peekmsg(struct sock * sk,struct msghdr * msg,size_t len,int flags)1538 static int peekmsg(struct sock *sk, struct msghdr *msg,
1539 size_t len, int flags)
1540 {
1541 struct tcp_sock *tp = tcp_sk(sk);
1542 u32 peek_seq, offset;
1543 struct sk_buff *skb;
1544 int copied = 0;
1545 size_t avail; /* amount of available data in current skb */
1546 long timeo;
1547 int ret;
1548
1549 lock_sock(sk);
1550 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1551 peek_seq = tp->copied_seq;
1552
1553 do {
1554 if (unlikely(tp->urg_data && tp->urg_seq == peek_seq)) {
1555 if (copied)
1556 break;
1557 if (signal_pending(current)) {
1558 copied = timeo ? sock_intr_errno(timeo) :
1559 -EAGAIN;
1560 break;
1561 }
1562 }
1563
1564 skb_queue_walk(&sk->sk_receive_queue, skb) {
1565 offset = peek_seq - ULP_SKB_CB(skb)->seq;
1566 if (offset < skb->len)
1567 goto found_ok_skb;
1568 }
1569
1570 /* empty receive queue */
1571 if (copied)
1572 break;
1573 if (sock_flag(sk, SOCK_DONE))
1574 break;
1575 if (sk->sk_err) {
1576 copied = sock_error(sk);
1577 break;
1578 }
1579 if (sk->sk_shutdown & RCV_SHUTDOWN)
1580 break;
1581 if (sk->sk_state == TCP_CLOSE) {
1582 copied = -ENOTCONN;
1583 break;
1584 }
1585 if (!timeo) {
1586 copied = -EAGAIN;
1587 break;
1588 }
1589 if (signal_pending(current)) {
1590 copied = sock_intr_errno(timeo);
1591 break;
1592 }
1593
1594 if (READ_ONCE(sk->sk_backlog.tail)) {
1595 /* Do not sleep, just process backlog. */
1596 release_sock(sk);
1597 lock_sock(sk);
1598 } else {
1599 ret = sk_wait_data(sk, &timeo, NULL);
1600 if (ret < 0) {
1601 /* here 'copied' is 0 due to previous checks */
1602 copied = ret;
1603 break;
1604 }
1605 }
1606
1607 if (unlikely(peek_seq != tp->copied_seq)) {
1608 if (net_ratelimit())
1609 pr_info("TCP(%s:%d), race in MSG_PEEK.\n",
1610 current->comm, current->pid);
1611 peek_seq = tp->copied_seq;
1612 }
1613 continue;
1614
1615 found_ok_skb:
1616 avail = skb->len - offset;
1617 if (len < avail)
1618 avail = len;
1619 /*
1620 * Do we have urgent data here? We need to skip over the
1621 * urgent byte.
1622 */
1623 if (unlikely(tp->urg_data)) {
1624 u32 urg_offset = tp->urg_seq - peek_seq;
1625
1626 if (urg_offset < avail) {
1627 /*
1628 * The amount of data we are preparing to copy
1629 * contains urgent data.
1630 */
1631 if (!urg_offset) { /* First byte is urgent */
1632 if (!sock_flag(sk, SOCK_URGINLINE)) {
1633 peek_seq++;
1634 offset++;
1635 avail--;
1636 }
1637 if (!avail)
1638 continue;
1639 } else {
1640 /* stop short of the urgent data */
1641 avail = urg_offset;
1642 }
1643 }
1644 }
1645
1646 /*
1647 * If MSG_TRUNC is specified the data is discarded.
1648 */
1649 if (likely(!(flags & MSG_TRUNC)))
1650 if (skb_copy_datagram_msg(skb, offset, msg, len)) {
1651 if (!copied) {
1652 copied = -EFAULT;
1653 break;
1654 }
1655 }
1656 peek_seq += avail;
1657 copied += avail;
1658 len -= avail;
1659 } while (len > 0);
1660
1661 release_sock(sk);
1662 return copied;
1663 }
1664
chtls_recvmsg(struct sock * sk,struct msghdr * msg,size_t len,int flags,int * addr_len)1665 int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
1666 int flags, int *addr_len)
1667 {
1668 struct tcp_sock *tp = tcp_sk(sk);
1669 struct chtls_sock *csk;
1670 unsigned long avail; /* amount of available data in current skb */
1671 int buffers_freed;
1672 int copied = 0;
1673 long timeo;
1674 int target; /* Read at least this many bytes */
1675 int ret;
1676
1677 buffers_freed = 0;
1678
1679 if (unlikely(flags & MSG_OOB))
1680 return tcp_prot.recvmsg(sk, msg, len, flags, addr_len);
1681
1682 if (unlikely(flags & MSG_PEEK))
1683 return peekmsg(sk, msg, len, flags);
1684
1685 if (sk_can_busy_loop(sk) &&
1686 skb_queue_empty_lockless(&sk->sk_receive_queue) &&
1687 sk->sk_state == TCP_ESTABLISHED)
1688 sk_busy_loop(sk, flags & MSG_DONTWAIT);
1689
1690 lock_sock(sk);
1691 csk = rcu_dereference_sk_user_data(sk);
1692
1693 if (is_tls_rx(csk))
1694 return chtls_pt_recvmsg(sk, msg, len, flags, addr_len);
1695
1696 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1697 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1698
1699 if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
1700 chtls_cleanup_rbuf(sk, copied);
1701
1702 do {
1703 struct sk_buff *skb;
1704 u32 offset;
1705
1706 if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) {
1707 if (copied)
1708 break;
1709 if (signal_pending(current)) {
1710 copied = timeo ? sock_intr_errno(timeo) :
1711 -EAGAIN;
1712 break;
1713 }
1714 }
1715
1716 skb = skb_peek(&sk->sk_receive_queue);
1717 if (skb)
1718 goto found_ok_skb;
1719
1720 if (csk->wr_credits &&
1721 skb_queue_len(&csk->txq) &&
1722 chtls_push_frames(csk, csk->wr_credits ==
1723 csk->wr_max_credits))
1724 sk->sk_write_space(sk);
1725
1726 if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
1727 break;
1728
1729 if (copied) {
1730 if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
1731 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1732 signal_pending(current))
1733 break;
1734 } else {
1735 if (sock_flag(sk, SOCK_DONE))
1736 break;
1737 if (sk->sk_err) {
1738 copied = sock_error(sk);
1739 break;
1740 }
1741 if (sk->sk_shutdown & RCV_SHUTDOWN)
1742 break;
1743 if (sk->sk_state == TCP_CLOSE) {
1744 copied = -ENOTCONN;
1745 break;
1746 }
1747 if (!timeo) {
1748 copied = -EAGAIN;
1749 break;
1750 }
1751 if (signal_pending(current)) {
1752 copied = sock_intr_errno(timeo);
1753 break;
1754 }
1755 }
1756
1757 if (READ_ONCE(sk->sk_backlog.tail)) {
1758 release_sock(sk);
1759 lock_sock(sk);
1760 chtls_cleanup_rbuf(sk, copied);
1761 continue;
1762 }
1763
1764 if (copied >= target)
1765 break;
1766 chtls_cleanup_rbuf(sk, copied);
1767 ret = sk_wait_data(sk, &timeo, NULL);
1768 if (ret < 0) {
1769 copied = copied ? : ret;
1770 goto unlock;
1771 }
1772 continue;
1773
1774 found_ok_skb:
1775 if (!skb->len) {
1776 chtls_kfree_skb(sk, skb);
1777 if (!copied && !timeo) {
1778 copied = -EAGAIN;
1779 break;
1780 }
1781
1782 if (copied < target)
1783 continue;
1784
1785 break;
1786 }
1787
1788 offset = tp->copied_seq - ULP_SKB_CB(skb)->seq;
1789 avail = skb->len - offset;
1790 if (len < avail)
1791 avail = len;
1792
1793 if (unlikely(tp->urg_data)) {
1794 u32 urg_offset = tp->urg_seq - tp->copied_seq;
1795
1796 if (urg_offset < avail) {
1797 if (urg_offset) {
1798 avail = urg_offset;
1799 } else if (!sock_flag(sk, SOCK_URGINLINE)) {
1800 tp->copied_seq++;
1801 offset++;
1802 avail--;
1803 if (!avail)
1804 goto skip_copy;
1805 }
1806 }
1807 }
1808
1809 if (likely(!(flags & MSG_TRUNC))) {
1810 if (skb_copy_datagram_msg(skb, offset,
1811 msg, avail)) {
1812 if (!copied) {
1813 copied = -EFAULT;
1814 break;
1815 }
1816 }
1817 }
1818
1819 tp->copied_seq += avail;
1820 copied += avail;
1821 len -= avail;
1822
1823 skip_copy:
1824 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
1825 tp->urg_data = 0;
1826
1827 if (avail + offset >= skb->len) {
1828 chtls_free_skb(sk, skb);
1829 buffers_freed++;
1830
1831 if (copied >= target &&
1832 !skb_peek(&sk->sk_receive_queue))
1833 break;
1834 }
1835 } while (len > 0);
1836
1837 if (buffers_freed)
1838 chtls_cleanup_rbuf(sk, copied);
1839
1840 unlock:
1841 release_sock(sk);
1842 return copied;
1843 }
1844