1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2018 Chelsio Communications, Inc. 4 * 5 * Written by: Atul Gupta (atul.gupta@chelsio.com) 6 */ 7 8 #include <linux/module.h> 9 #include <linux/list.h> 10 #include <linux/workqueue.h> 11 #include <linux/skbuff.h> 12 #include <linux/timer.h> 13 #include <linux/notifier.h> 14 #include <linux/inetdevice.h> 15 #include <linux/ip.h> 16 #include <linux/tcp.h> 17 #include <linux/sched/signal.h> 18 #include <net/tcp.h> 19 #include <net/busy_poll.h> 20 #include <crypto/aes.h> 21 22 #include "chtls.h" 23 #include "chtls_cm.h" 24 25 static bool is_tls_tx(struct chtls_sock *csk) 26 { 27 return csk->tlshws.txkey >= 0; 28 } 29 30 static bool is_tls_rx(struct chtls_sock *csk) 31 { 32 return csk->tlshws.rxkey >= 0; 33 } 34 35 static int data_sgl_len(const struct sk_buff *skb) 36 { 37 unsigned int cnt; 38 39 cnt = skb_shinfo(skb)->nr_frags; 40 return sgl_len(cnt) * 8; 41 } 42 43 static int nos_ivs(struct sock *sk, unsigned int size) 44 { 45 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 46 47 return DIV_ROUND_UP(size, csk->tlshws.mfs); 48 } 49 50 static int set_ivs_imm(struct sock *sk, const struct sk_buff *skb) 51 { 52 int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE; 53 int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb); 54 55 if ((hlen + KEY_ON_MEM_SZ + ivs_size) < 56 MAX_IMM_OFLD_TX_DATA_WR_LEN) { 57 ULP_SKB_CB(skb)->ulp.tls.iv = 1; 58 return 1; 59 } 60 ULP_SKB_CB(skb)->ulp.tls.iv = 0; 61 return 0; 62 } 63 64 static int max_ivs_size(struct sock *sk, int size) 65 { 66 return nos_ivs(sk, size) * CIPHER_BLOCK_SIZE; 67 } 68 69 static int ivs_size(struct sock *sk, const struct sk_buff *skb) 70 { 71 return set_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) * 72 CIPHER_BLOCK_SIZE) : 0; 73 } 74 75 static int flowc_wr_credits(int nparams, int *flowclenp) 76 { 77 int flowclen16, flowclen; 78 79 flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]); 80 flowclen16 = DIV_ROUND_UP(flowclen, 16); 81 flowclen = flowclen16 * 16; 82 83 if (flowclenp) 84 *flowclenp = flowclen; 85 86 return flowclen16; 87 } 88 89 static struct sk_buff *create_flowc_wr_skb(struct sock *sk, 90 struct fw_flowc_wr *flowc, 91 int flowclen) 92 { 93 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 94 struct sk_buff *skb; 95 96 skb = alloc_skb(flowclen, GFP_ATOMIC); 97 if (!skb) 98 return NULL; 99 100 __skb_put_data(skb, flowc, flowclen); 101 skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA); 102 103 return skb; 104 } 105 106 static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc, 107 int flowclen) 108 { 109 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 110 struct tcp_sock *tp = tcp_sk(sk); 111 struct sk_buff *skb; 112 int flowclen16; 113 int ret; 114 115 flowclen16 = flowclen / 16; 116 117 if (csk_flag(sk, CSK_TX_DATA_SENT)) { 118 skb = create_flowc_wr_skb(sk, flowc, flowclen); 119 if (!skb) 120 return -ENOMEM; 121 122 skb_entail(sk, skb, 123 ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND); 124 return 0; 125 } 126 127 ret = cxgb4_immdata_send(csk->egress_dev, 128 csk->txq_idx, 129 flowc, flowclen); 130 if (!ret) 131 return flowclen16; 132 skb = create_flowc_wr_skb(sk, flowc, flowclen); 133 if (!skb) 134 return -ENOMEM; 135 send_or_defer(sk, tp, skb, 0); 136 return flowclen16; 137 } 138 139 static u8 tcp_state_to_flowc_state(u8 state) 140 { 141 switch (state) { 142 case TCP_ESTABLISHED: 143 return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED; 144 case TCP_CLOSE_WAIT: 145 return FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT; 146 case TCP_FIN_WAIT1: 147 return FW_FLOWC_MNEM_TCPSTATE_FINWAIT1; 148 case TCP_CLOSING: 149 return FW_FLOWC_MNEM_TCPSTATE_CLOSING; 150 case TCP_LAST_ACK: 151 return FW_FLOWC_MNEM_TCPSTATE_LASTACK; 152 case TCP_FIN_WAIT2: 153 return FW_FLOWC_MNEM_TCPSTATE_FINWAIT2; 154 } 155 156 return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED; 157 } 158 159 int send_tx_flowc_wr(struct sock *sk, int compl, 160 u32 snd_nxt, u32 rcv_nxt) 161 { 162 DEFINE_RAW_FLEX(struct fw_flowc_wr, flowc, mnemval, FW_FLOWC_MNEM_MAX); 163 int nparams, paramidx, flowclen16, flowclen; 164 struct chtls_sock *csk; 165 struct tcp_sock *tp; 166 167 csk = rcu_dereference_sk_user_data(sk); 168 tp = tcp_sk(sk); 169 170 #define FLOWC_PARAM(__m, __v) \ 171 do { \ 172 flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \ 173 flowc->mnemval[paramidx].val = cpu_to_be32(__v); \ 174 paramidx++; \ 175 } while (0) 176 177 paramidx = 0; 178 179 FLOWC_PARAM(PFNVFN, FW_PFVF_CMD_PFN_V(csk->cdev->lldi->pf)); 180 FLOWC_PARAM(CH, csk->tx_chan); 181 FLOWC_PARAM(PORT, csk->tx_chan); 182 FLOWC_PARAM(IQID, csk->rss_qid); 183 FLOWC_PARAM(SNDNXT, tp->snd_nxt); 184 FLOWC_PARAM(RCVNXT, tp->rcv_nxt); 185 FLOWC_PARAM(SNDBUF, csk->sndbuf); 186 FLOWC_PARAM(MSS, tp->mss_cache); 187 FLOWC_PARAM(TCPSTATE, tcp_state_to_flowc_state(sk->sk_state)); 188 189 if (SND_WSCALE(tp)) 190 FLOWC_PARAM(RCV_SCALE, SND_WSCALE(tp)); 191 192 if (csk->ulp_mode == ULP_MODE_TLS) 193 FLOWC_PARAM(ULD_MODE, ULP_MODE_TLS); 194 195 if (csk->tlshws.fcplenmax) 196 FLOWC_PARAM(TXDATAPLEN_MAX, csk->tlshws.fcplenmax); 197 198 nparams = paramidx; 199 #undef FLOWC_PARAM 200 201 flowclen16 = flowc_wr_credits(nparams, &flowclen); 202 flowc->op_to_nparams = 203 cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | 204 FW_WR_COMPL_V(compl) | 205 FW_FLOWC_WR_NPARAMS_V(nparams)); 206 flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) | 207 FW_WR_FLOWID_V(csk->tid)); 208 209 return send_flowc_wr(sk, flowc, flowclen); 210 } 211 212 /* Copy IVs to WR */ 213 static int tls_copy_ivs(struct sock *sk, struct sk_buff *skb) 214 215 { 216 struct chtls_sock *csk; 217 unsigned char *iv_loc; 218 struct chtls_hws *hws; 219 unsigned char *ivs; 220 u16 number_of_ivs; 221 struct page *page; 222 int err = 0; 223 224 csk = rcu_dereference_sk_user_data(sk); 225 hws = &csk->tlshws; 226 number_of_ivs = nos_ivs(sk, skb->len); 227 228 if (number_of_ivs > MAX_IVS_PAGE) { 229 pr_warn("MAX IVs in PAGE exceeded %d\n", number_of_ivs); 230 return -ENOMEM; 231 } 232 233 /* generate the IVs */ 234 ivs = kmalloc_array(CIPHER_BLOCK_SIZE, number_of_ivs, GFP_ATOMIC); 235 if (!ivs) 236 return -ENOMEM; 237 get_random_bytes(ivs, number_of_ivs * CIPHER_BLOCK_SIZE); 238 239 if (skb_ulp_tls_iv_imm(skb)) { 240 /* send the IVs as immediate data in the WR */ 241 iv_loc = (unsigned char *)__skb_push(skb, number_of_ivs * 242 CIPHER_BLOCK_SIZE); 243 if (iv_loc) 244 memcpy(iv_loc, ivs, number_of_ivs * CIPHER_BLOCK_SIZE); 245 246 hws->ivsize = number_of_ivs * CIPHER_BLOCK_SIZE; 247 } else { 248 /* Send the IVs as sgls */ 249 /* Already accounted IV DSGL for credits */ 250 skb_shinfo(skb)->nr_frags--; 251 page = alloc_pages(sk->sk_allocation | __GFP_COMP, 0); 252 if (!page) { 253 pr_info("%s : Page allocation for IVs failed\n", 254 __func__); 255 err = -ENOMEM; 256 goto out; 257 } 258 memcpy(page_address(page), ivs, number_of_ivs * 259 CIPHER_BLOCK_SIZE); 260 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page, 0, 261 number_of_ivs * CIPHER_BLOCK_SIZE); 262 hws->ivsize = 0; 263 } 264 out: 265 kfree(ivs); 266 return err; 267 } 268 269 /* Copy Key to WR */ 270 static void tls_copy_tx_key(struct sock *sk, struct sk_buff *skb) 271 { 272 struct ulptx_sc_memrd *sc_memrd; 273 struct chtls_sock *csk; 274 struct chtls_dev *cdev; 275 struct ulptx_idata *sc; 276 struct chtls_hws *hws; 277 u32 immdlen; 278 int kaddr; 279 280 csk = rcu_dereference_sk_user_data(sk); 281 hws = &csk->tlshws; 282 cdev = csk->cdev; 283 284 immdlen = sizeof(*sc) + sizeof(*sc_memrd); 285 kaddr = keyid_to_addr(cdev->kmap.start, hws->txkey); 286 sc = (struct ulptx_idata *)__skb_push(skb, immdlen); 287 if (sc) { 288 sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP)); 289 sc->len = htonl(0); 290 sc_memrd = (struct ulptx_sc_memrd *)(sc + 1); 291 sc_memrd->cmd_to_len = 292 htonl(ULPTX_CMD_V(ULP_TX_SC_MEMRD) | 293 ULP_TX_SC_MORE_V(1) | 294 ULPTX_LEN16_V(hws->keylen >> 4)); 295 sc_memrd->addr = htonl(kaddr); 296 } 297 } 298 299 static u64 tlstx_incr_seqnum(struct chtls_hws *hws) 300 { 301 return hws->tx_seq_no++; 302 } 303 304 static bool is_sg_request(const struct sk_buff *skb) 305 { 306 return skb->peeked || 307 (skb->len > MAX_IMM_ULPTX_WR_LEN); 308 } 309 310 /* 311 * Returns true if an sk_buff carries urgent data. 312 */ 313 static bool skb_urgent(struct sk_buff *skb) 314 { 315 return ULP_SKB_CB(skb)->flags & ULPCB_FLAG_URG; 316 } 317 318 /* TLS content type for CPL SFO */ 319 static unsigned char tls_content_type(unsigned char content_type) 320 { 321 switch (content_type) { 322 case TLS_HDR_TYPE_CCS: 323 return CPL_TX_TLS_SFO_TYPE_CCS; 324 case TLS_HDR_TYPE_ALERT: 325 return CPL_TX_TLS_SFO_TYPE_ALERT; 326 case TLS_HDR_TYPE_HANDSHAKE: 327 return CPL_TX_TLS_SFO_TYPE_HANDSHAKE; 328 case TLS_HDR_TYPE_HEARTBEAT: 329 return CPL_TX_TLS_SFO_TYPE_HEARTBEAT; 330 } 331 return CPL_TX_TLS_SFO_TYPE_DATA; 332 } 333 334 static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb, 335 int dlen, int tls_immd, u32 credits, 336 int expn, int pdus) 337 { 338 struct fw_tlstx_data_wr *req_wr; 339 struct cpl_tx_tls_sfo *req_cpl; 340 unsigned int wr_ulp_mode_force; 341 struct tls_scmd *updated_scmd; 342 unsigned char data_type; 343 struct chtls_sock *csk; 344 struct net_device *dev; 345 struct chtls_hws *hws; 346 struct tls_scmd *scmd; 347 struct adapter *adap; 348 unsigned char *req; 349 int immd_len; 350 int iv_imm; 351 int len; 352 353 csk = rcu_dereference_sk_user_data(sk); 354 iv_imm = skb_ulp_tls_iv_imm(skb); 355 dev = csk->egress_dev; 356 adap = netdev2adap(dev); 357 hws = &csk->tlshws; 358 scmd = &hws->scmd; 359 len = dlen + expn; 360 361 dlen = (dlen < hws->mfs) ? dlen : hws->mfs; 362 atomic_inc(&adap->chcr_stats.tls_pdu_tx); 363 364 updated_scmd = scmd; 365 updated_scmd->seqno_numivs &= 0xffffff80; 366 updated_scmd->seqno_numivs |= SCMD_NUM_IVS_V(pdus); 367 hws->scmd = *updated_scmd; 368 369 req = (unsigned char *)__skb_push(skb, sizeof(struct cpl_tx_tls_sfo)); 370 req_cpl = (struct cpl_tx_tls_sfo *)req; 371 req = (unsigned char *)__skb_push(skb, (sizeof(struct 372 fw_tlstx_data_wr))); 373 374 req_wr = (struct fw_tlstx_data_wr *)req; 375 immd_len = (tls_immd ? dlen : 0); 376 req_wr->op_to_immdlen = 377 htonl(FW_WR_OP_V(FW_TLSTX_DATA_WR) | 378 FW_TLSTX_DATA_WR_COMPL_V(1) | 379 FW_TLSTX_DATA_WR_IMMDLEN_V(immd_len)); 380 req_wr->flowid_len16 = htonl(FW_TLSTX_DATA_WR_FLOWID_V(csk->tid) | 381 FW_TLSTX_DATA_WR_LEN16_V(credits)); 382 wr_ulp_mode_force = TX_ULP_MODE_V(ULP_MODE_TLS); 383 384 if (is_sg_request(skb)) 385 wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F | 386 ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 : 387 FW_OFLD_TX_DATA_WR_SHOVE_F); 388 389 req_wr->lsodisable_to_flags = 390 htonl(TX_ULP_MODE_V(ULP_MODE_TLS) | 391 TX_URG_V(skb_urgent(skb)) | 392 T6_TX_FORCE_F | wr_ulp_mode_force | 393 TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) && 394 skb_queue_empty(&csk->txq))); 395 396 req_wr->ctxloc_to_exp = 397 htonl(FW_TLSTX_DATA_WR_NUMIVS_V(pdus) | 398 FW_TLSTX_DATA_WR_EXP_V(expn) | 399 FW_TLSTX_DATA_WR_CTXLOC_V(CHTLS_KEY_CONTEXT_DDR) | 400 FW_TLSTX_DATA_WR_IVDSGL_V(!iv_imm) | 401 FW_TLSTX_DATA_WR_KEYSIZE_V(hws->keylen >> 4)); 402 403 /* Fill in the length */ 404 req_wr->plen = htonl(len); 405 req_wr->mfs = htons(hws->mfs); 406 req_wr->adjustedplen_pkd = 407 htons(FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(hws->adjustlen)); 408 req_wr->expinplenmax_pkd = 409 htons(FW_TLSTX_DATA_WR_EXPINPLENMAX_V(hws->expansion)); 410 req_wr->pdusinplenmax_pkd = 411 FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(hws->pdus); 412 req_wr->r10 = 0; 413 414 data_type = tls_content_type(ULP_SKB_CB(skb)->ulp.tls.type); 415 req_cpl->op_to_seg_len = htonl(CPL_TX_TLS_SFO_OPCODE_V(CPL_TX_TLS_SFO) | 416 CPL_TX_TLS_SFO_DATA_TYPE_V(data_type) | 417 CPL_TX_TLS_SFO_CPL_LEN_V(2) | 418 CPL_TX_TLS_SFO_SEG_LEN_V(dlen)); 419 req_cpl->pld_len = htonl(len - expn); 420 421 req_cpl->type_protover = htonl(CPL_TX_TLS_SFO_TYPE_V 422 ((data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) ? 423 TLS_HDR_TYPE_HEARTBEAT : 0) | 424 CPL_TX_TLS_SFO_PROTOVER_V(0)); 425 426 /* create the s-command */ 427 req_cpl->r1_lo = 0; 428 req_cpl->seqno_numivs = cpu_to_be32(hws->scmd.seqno_numivs); 429 req_cpl->ivgen_hdrlen = cpu_to_be32(hws->scmd.ivgen_hdrlen); 430 req_cpl->scmd1 = cpu_to_be64(tlstx_incr_seqnum(hws)); 431 } 432 433 /* 434 * Calculate the TLS data expansion size 435 */ 436 static int chtls_expansion_size(struct sock *sk, int data_len, 437 int fullpdu, 438 unsigned short *pducnt) 439 { 440 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 441 struct chtls_hws *hws = &csk->tlshws; 442 struct tls_scmd *scmd = &hws->scmd; 443 int fragsize = hws->mfs; 444 int expnsize = 0; 445 int fragleft; 446 int fragcnt; 447 int expppdu; 448 449 if (SCMD_CIPH_MODE_G(scmd->seqno_numivs) == 450 SCMD_CIPH_MODE_AES_GCM) { 451 expppdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE + 452 TLS_HEADER_LENGTH; 453 454 if (fullpdu) { 455 *pducnt = data_len / (expppdu + fragsize); 456 if (*pducnt > 32) 457 *pducnt = 32; 458 else if (!*pducnt) 459 *pducnt = 1; 460 expnsize = (*pducnt) * expppdu; 461 return expnsize; 462 } 463 fragcnt = (data_len / fragsize); 464 expnsize = fragcnt * expppdu; 465 fragleft = data_len % fragsize; 466 if (fragleft > 0) 467 expnsize += expppdu; 468 } 469 return expnsize; 470 } 471 472 /* WR with IV, KEY and CPL SFO added */ 473 static void make_tlstx_data_wr(struct sock *sk, struct sk_buff *skb, 474 int tls_tx_imm, int tls_len, u32 credits) 475 { 476 unsigned short pdus_per_ulp = 0; 477 struct chtls_sock *csk; 478 struct chtls_hws *hws; 479 int expn_sz; 480 int pdus; 481 482 csk = rcu_dereference_sk_user_data(sk); 483 hws = &csk->tlshws; 484 pdus = DIV_ROUND_UP(tls_len, hws->mfs); 485 expn_sz = chtls_expansion_size(sk, tls_len, 0, NULL); 486 if (!hws->compute) { 487 hws->expansion = chtls_expansion_size(sk, 488 hws->fcplenmax, 489 1, &pdus_per_ulp); 490 hws->pdus = pdus_per_ulp; 491 hws->adjustlen = hws->pdus * 492 ((hws->expansion / hws->pdus) + hws->mfs); 493 hws->compute = 1; 494 } 495 if (tls_copy_ivs(sk, skb)) 496 return; 497 tls_copy_tx_key(sk, skb); 498 tls_tx_data_wr(sk, skb, tls_len, tls_tx_imm, credits, expn_sz, pdus); 499 hws->tx_seq_no += (pdus - 1); 500 } 501 502 static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb, 503 unsigned int immdlen, int len, 504 u32 credits, u32 compl) 505 { 506 struct fw_ofld_tx_data_wr *req; 507 unsigned int wr_ulp_mode_force; 508 struct chtls_sock *csk; 509 unsigned int opcode; 510 511 csk = rcu_dereference_sk_user_data(sk); 512 opcode = FW_OFLD_TX_DATA_WR; 513 514 req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req)); 515 req->op_to_immdlen = htonl(WR_OP_V(opcode) | 516 FW_WR_COMPL_V(compl) | 517 FW_WR_IMMDLEN_V(immdlen)); 518 req->flowid_len16 = htonl(FW_WR_FLOWID_V(csk->tid) | 519 FW_WR_LEN16_V(credits)); 520 521 wr_ulp_mode_force = TX_ULP_MODE_V(csk->ulp_mode); 522 if (is_sg_request(skb)) 523 wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F | 524 ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 : 525 FW_OFLD_TX_DATA_WR_SHOVE_F); 526 527 req->tunnel_to_proxy = htonl(wr_ulp_mode_force | 528 TX_URG_V(skb_urgent(skb)) | 529 TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) && 530 skb_queue_empty(&csk->txq))); 531 req->plen = htonl(len); 532 } 533 534 static int chtls_wr_size(struct chtls_sock *csk, const struct sk_buff *skb, 535 bool size) 536 { 537 int wr_size; 538 539 wr_size = TLS_WR_CPL_LEN; 540 wr_size += KEY_ON_MEM_SZ; 541 wr_size += ivs_size(csk->sk, skb); 542 543 if (size) 544 return wr_size; 545 546 /* frags counted for IV dsgl */ 547 if (!skb_ulp_tls_iv_imm(skb)) 548 skb_shinfo(skb)->nr_frags++; 549 550 return wr_size; 551 } 552 553 static bool is_ofld_imm(struct chtls_sock *csk, const struct sk_buff *skb) 554 { 555 int length = skb->len; 556 557 if (skb->peeked || skb->len > MAX_IMM_ULPTX_WR_LEN) 558 return false; 559 560 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) { 561 /* Check TLS header len for Immediate */ 562 if (csk->ulp_mode == ULP_MODE_TLS && 563 skb_ulp_tls_inline(skb)) 564 length += chtls_wr_size(csk, skb, true); 565 else 566 length += sizeof(struct fw_ofld_tx_data_wr); 567 568 return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN; 569 } 570 return true; 571 } 572 573 static unsigned int calc_tx_flits(const struct sk_buff *skb, 574 unsigned int immdlen) 575 { 576 unsigned int flits, cnt; 577 578 flits = immdlen / 8; /* headers */ 579 cnt = skb_shinfo(skb)->nr_frags; 580 if (skb_tail_pointer(skb) != skb_transport_header(skb)) 581 cnt++; 582 return flits + sgl_len(cnt); 583 } 584 585 static void arp_failure_discard(void *handle, struct sk_buff *skb) 586 { 587 kfree_skb(skb); 588 } 589 590 int chtls_push_frames(struct chtls_sock *csk, int comp) 591 { 592 struct chtls_hws *hws = &csk->tlshws; 593 struct tcp_sock *tp; 594 struct sk_buff *skb; 595 int total_size = 0; 596 struct sock *sk; 597 int wr_size; 598 599 wr_size = sizeof(struct fw_ofld_tx_data_wr); 600 sk = csk->sk; 601 tp = tcp_sk(sk); 602 603 if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE))) 604 return 0; 605 606 if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN))) 607 return 0; 608 609 while (csk->wr_credits && (skb = skb_peek(&csk->txq)) && 610 (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_HOLD) || 611 skb_queue_len(&csk->txq) > 1)) { 612 unsigned int credit_len = skb->len; 613 unsigned int credits_needed; 614 unsigned int completion = 0; 615 int tls_len = skb->len;/* TLS data len before IV/key */ 616 unsigned int immdlen; 617 int len = skb->len; /* length [ulp bytes] inserted by hw */ 618 int flowclen16 = 0; 619 int tls_tx_imm = 0; 620 621 immdlen = skb->len; 622 if (!is_ofld_imm(csk, skb)) { 623 immdlen = skb_transport_offset(skb); 624 if (skb_ulp_tls_inline(skb)) 625 wr_size = chtls_wr_size(csk, skb, false); 626 credit_len = 8 * calc_tx_flits(skb, immdlen); 627 } else { 628 if (skb_ulp_tls_inline(skb)) { 629 wr_size = chtls_wr_size(csk, skb, false); 630 tls_tx_imm = 1; 631 } 632 } 633 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) 634 credit_len += wr_size; 635 credits_needed = DIV_ROUND_UP(credit_len, 16); 636 if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) { 637 flowclen16 = send_tx_flowc_wr(sk, 1, tp->snd_nxt, 638 tp->rcv_nxt); 639 if (flowclen16 <= 0) 640 break; 641 csk->wr_credits -= flowclen16; 642 csk->wr_unacked += flowclen16; 643 csk->wr_nondata += flowclen16; 644 csk_set_flag(csk, CSK_TX_DATA_SENT); 645 } 646 647 if (csk->wr_credits < credits_needed) { 648 if (skb_ulp_tls_inline(skb) && 649 !skb_ulp_tls_iv_imm(skb)) 650 skb_shinfo(skb)->nr_frags--; 651 break; 652 } 653 654 __skb_unlink(skb, &csk->txq); 655 skb_set_queue_mapping(skb, (csk->txq_idx << 1) | 656 CPL_PRIORITY_DATA); 657 if (hws->ofld) 658 hws->txqid = (skb->queue_mapping >> 1); 659 skb->csum = (__force __wsum)(credits_needed + csk->wr_nondata); 660 csk->wr_credits -= credits_needed; 661 csk->wr_unacked += credits_needed; 662 csk->wr_nondata = 0; 663 enqueue_wr(csk, skb); 664 665 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) { 666 if ((comp && csk->wr_unacked == credits_needed) || 667 (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) || 668 csk->wr_unacked >= csk->wr_max_credits / 2) { 669 completion = 1; 670 csk->wr_unacked = 0; 671 } 672 if (skb_ulp_tls_inline(skb)) 673 make_tlstx_data_wr(sk, skb, tls_tx_imm, 674 tls_len, credits_needed); 675 else 676 make_tx_data_wr(sk, skb, immdlen, len, 677 credits_needed, completion); 678 tp->snd_nxt += len; 679 tp->lsndtime = tcp_jiffies32; 680 if (completion) 681 ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR; 682 } else { 683 struct cpl_close_con_req *req = cplhdr(skb); 684 unsigned int cmd = CPL_OPCODE_G(ntohl 685 (OPCODE_TID(req))); 686 687 if (cmd == CPL_CLOSE_CON_REQ) 688 csk_set_flag(csk, 689 CSK_CLOSE_CON_REQUESTED); 690 691 if ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) && 692 (csk->wr_unacked >= csk->wr_max_credits / 2)) { 693 req->wr.wr_hi |= htonl(FW_WR_COMPL_F); 694 csk->wr_unacked = 0; 695 } 696 } 697 total_size += skb->truesize; 698 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_BARRIER) 699 csk_set_flag(csk, CSK_TX_WAIT_IDLE); 700 t4_set_arp_err_handler(skb, NULL, arp_failure_discard); 701 cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry); 702 } 703 sk->sk_wmem_queued -= total_size; 704 return total_size; 705 } 706 707 static void mark_urg(struct tcp_sock *tp, int flags, 708 struct sk_buff *skb) 709 { 710 if (unlikely(flags & MSG_OOB)) { 711 tp->snd_up = tp->write_seq; 712 ULP_SKB_CB(skb)->flags = ULPCB_FLAG_URG | 713 ULPCB_FLAG_BARRIER | 714 ULPCB_FLAG_NO_APPEND | 715 ULPCB_FLAG_NEED_HDR; 716 } 717 } 718 719 /* 720 * Returns true if a connection should send more data to TCP engine 721 */ 722 static bool should_push(struct sock *sk) 723 { 724 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 725 struct chtls_dev *cdev = csk->cdev; 726 struct tcp_sock *tp = tcp_sk(sk); 727 728 /* 729 * If we've released our offload resources there's nothing to do ... 730 */ 731 if (!cdev) 732 return false; 733 734 /* 735 * If there aren't any work requests in flight, or there isn't enough 736 * data in flight, or Nagle is off then send the current TX_DATA 737 * otherwise hold it and wait to accumulate more data. 738 */ 739 return csk->wr_credits == csk->wr_max_credits || 740 (tp->nonagle & TCP_NAGLE_OFF); 741 } 742 743 /* 744 * Returns true if a TCP socket is corked. 745 */ 746 static bool corked(const struct tcp_sock *tp, int flags) 747 { 748 return (flags & MSG_MORE) || (tp->nonagle & TCP_NAGLE_CORK); 749 } 750 751 /* 752 * Returns true if a send should try to push new data. 753 */ 754 static bool send_should_push(struct sock *sk, int flags) 755 { 756 return should_push(sk) && !corked(tcp_sk(sk), flags); 757 } 758 759 void chtls_tcp_push(struct sock *sk, int flags) 760 { 761 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 762 int qlen = skb_queue_len(&csk->txq); 763 764 if (likely(qlen)) { 765 struct sk_buff *skb = skb_peek_tail(&csk->txq); 766 struct tcp_sock *tp = tcp_sk(sk); 767 768 mark_urg(tp, flags, skb); 769 770 if (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) && 771 corked(tp, flags)) { 772 ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_HOLD; 773 return; 774 } 775 776 ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_HOLD; 777 if (qlen == 1 && 778 ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || 779 should_push(sk))) 780 chtls_push_frames(csk, 1); 781 } 782 } 783 784 /* 785 * Calculate the size for a new send sk_buff. It's maximum size so we can 786 * pack lots of data into it, unless we plan to send it immediately, in which 787 * case we size it more tightly. 788 * 789 * Note: we don't bother compensating for MSS < PAGE_SIZE because it doesn't 790 * arise in normal cases and when it does we are just wasting memory. 791 */ 792 static int select_size(struct sock *sk, int io_len, int flags, int len) 793 { 794 const int pgbreak = SKB_MAX_HEAD(len); 795 796 /* 797 * If the data wouldn't fit in the main body anyway, put only the 798 * header in the main body so it can use immediate data and place all 799 * the payload in page fragments. 800 */ 801 if (io_len > pgbreak) 802 return 0; 803 804 /* 805 * If we will be accumulating payload get a large main body. 806 */ 807 if (!send_should_push(sk, flags)) 808 return pgbreak; 809 810 return io_len; 811 } 812 813 void skb_entail(struct sock *sk, struct sk_buff *skb, int flags) 814 { 815 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 816 struct tcp_sock *tp = tcp_sk(sk); 817 818 ULP_SKB_CB(skb)->seq = tp->write_seq; 819 ULP_SKB_CB(skb)->flags = flags; 820 __skb_queue_tail(&csk->txq, skb); 821 sk->sk_wmem_queued += skb->truesize; 822 823 if (TCP_PAGE(sk) && TCP_OFF(sk)) { 824 put_page(TCP_PAGE(sk)); 825 TCP_PAGE(sk) = NULL; 826 TCP_OFF(sk) = 0; 827 } 828 } 829 830 static struct sk_buff *get_tx_skb(struct sock *sk, int size) 831 { 832 struct sk_buff *skb; 833 834 skb = alloc_skb(size + TX_HEADER_LEN, sk->sk_allocation); 835 if (likely(skb)) { 836 skb_reserve(skb, TX_HEADER_LEN); 837 skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR); 838 skb_reset_transport_header(skb); 839 } 840 return skb; 841 } 842 843 static struct sk_buff *get_record_skb(struct sock *sk, int size, bool zcopy) 844 { 845 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 846 struct sk_buff *skb; 847 848 skb = alloc_skb(((zcopy ? 0 : size) + TX_TLSHDR_LEN + 849 KEY_ON_MEM_SZ + max_ivs_size(sk, size)), 850 sk->sk_allocation); 851 if (likely(skb)) { 852 skb_reserve(skb, (TX_TLSHDR_LEN + 853 KEY_ON_MEM_SZ + max_ivs_size(sk, size))); 854 skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR); 855 skb_reset_transport_header(skb); 856 ULP_SKB_CB(skb)->ulp.tls.ofld = 1; 857 ULP_SKB_CB(skb)->ulp.tls.type = csk->tlshws.type; 858 } 859 return skb; 860 } 861 862 static void tx_skb_finalize(struct sk_buff *skb) 863 { 864 struct ulp_skb_cb *cb = ULP_SKB_CB(skb); 865 866 if (!(cb->flags & ULPCB_FLAG_NO_HDR)) 867 cb->flags = ULPCB_FLAG_NEED_HDR; 868 cb->flags |= ULPCB_FLAG_NO_APPEND; 869 } 870 871 static void push_frames_if_head(struct sock *sk) 872 { 873 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 874 875 if (skb_queue_len(&csk->txq) == 1) 876 chtls_push_frames(csk, 1); 877 } 878 879 static int chtls_skb_copy_to_page_nocache(struct sock *sk, 880 struct iov_iter *from, 881 struct sk_buff *skb, 882 struct page *page, 883 int off, int copy) 884 { 885 int err; 886 887 err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) + 888 off, copy, skb->len); 889 if (err) 890 return err; 891 892 skb->len += copy; 893 skb->data_len += copy; 894 skb->truesize += copy; 895 sk->sk_wmem_queued += copy; 896 return 0; 897 } 898 899 static bool csk_mem_free(struct chtls_dev *cdev, struct sock *sk) 900 { 901 return (cdev->max_host_sndbuf - sk->sk_wmem_queued > 0); 902 } 903 904 static int csk_wait_memory(struct chtls_dev *cdev, 905 struct sock *sk, long *timeo_p) 906 { 907 DEFINE_WAIT_FUNC(wait, woken_wake_function); 908 int ret, err = 0; 909 long current_timeo; 910 long vm_wait = 0; 911 bool noblock; 912 913 current_timeo = *timeo_p; 914 noblock = (*timeo_p ? false : true); 915 if (csk_mem_free(cdev, sk)) { 916 current_timeo = get_random_u32_below(HZ / 5) + 2; 917 vm_wait = get_random_u32_below(HZ / 5) + 2; 918 } 919 920 add_wait_queue(sk_sleep(sk), &wait); 921 while (1) { 922 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 923 924 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 925 goto do_error; 926 if (!*timeo_p) { 927 if (noblock) 928 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 929 goto do_nonblock; 930 } 931 if (signal_pending(current)) 932 goto do_interrupted; 933 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 934 if (csk_mem_free(cdev, sk) && !vm_wait) 935 break; 936 937 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 938 sk->sk_write_pending++; 939 ret = sk_wait_event(sk, ¤t_timeo, sk->sk_err || 940 (sk->sk_shutdown & SEND_SHUTDOWN) || 941 (csk_mem_free(cdev, sk) && !vm_wait), 942 &wait); 943 sk->sk_write_pending--; 944 if (ret < 0) 945 goto do_error; 946 947 if (vm_wait) { 948 vm_wait -= current_timeo; 949 current_timeo = *timeo_p; 950 if (current_timeo != MAX_SCHEDULE_TIMEOUT) { 951 current_timeo -= vm_wait; 952 if (current_timeo < 0) 953 current_timeo = 0; 954 } 955 vm_wait = 0; 956 } 957 *timeo_p = current_timeo; 958 } 959 do_rm_wq: 960 remove_wait_queue(sk_sleep(sk), &wait); 961 return err; 962 do_error: 963 err = -EPIPE; 964 goto do_rm_wq; 965 do_nonblock: 966 err = -EAGAIN; 967 goto do_rm_wq; 968 do_interrupted: 969 err = sock_intr_errno(*timeo_p); 970 goto do_rm_wq; 971 } 972 973 static int chtls_proccess_cmsg(struct sock *sk, struct msghdr *msg, 974 unsigned char *record_type) 975 { 976 struct cmsghdr *cmsg; 977 int rc = -EINVAL; 978 979 for_each_cmsghdr(cmsg, msg) { 980 if (!CMSG_OK(msg, cmsg)) 981 return -EINVAL; 982 if (cmsg->cmsg_level != SOL_TLS) 983 continue; 984 985 switch (cmsg->cmsg_type) { 986 case TLS_SET_RECORD_TYPE: 987 if (cmsg->cmsg_len < CMSG_LEN(sizeof(*record_type))) 988 return -EINVAL; 989 990 if (msg->msg_flags & MSG_MORE) 991 return -EINVAL; 992 993 *record_type = *(unsigned char *)CMSG_DATA(cmsg); 994 rc = 0; 995 break; 996 default: 997 return -EINVAL; 998 } 999 } 1000 1001 return rc; 1002 } 1003 1004 int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) 1005 { 1006 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 1007 struct chtls_dev *cdev = csk->cdev; 1008 struct tcp_sock *tp = tcp_sk(sk); 1009 struct sk_buff *skb; 1010 int mss, flags, err; 1011 int recordsz = 0; 1012 int copied = 0; 1013 long timeo; 1014 1015 lock_sock(sk); 1016 flags = msg->msg_flags; 1017 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 1018 1019 if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { 1020 err = sk_stream_wait_connect(sk, &timeo); 1021 if (err) 1022 goto out_err; 1023 } 1024 1025 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1026 err = -EPIPE; 1027 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 1028 goto out_err; 1029 1030 mss = csk->mss; 1031 csk_set_flag(csk, CSK_TX_MORE_DATA); 1032 1033 while (msg_data_left(msg)) { 1034 int copy = 0; 1035 1036 skb = skb_peek_tail(&csk->txq); 1037 if (skb) { 1038 copy = mss - skb->len; 1039 skb->ip_summed = CHECKSUM_UNNECESSARY; 1040 } 1041 if (!csk_mem_free(cdev, sk)) 1042 goto wait_for_sndbuf; 1043 1044 if (is_tls_tx(csk) && !csk->tlshws.txleft) { 1045 unsigned char record_type = TLS_RECORD_TYPE_DATA; 1046 1047 if (unlikely(msg->msg_controllen)) { 1048 err = chtls_proccess_cmsg(sk, msg, 1049 &record_type); 1050 if (err) 1051 goto out_err; 1052 1053 /* Avoid appending tls handshake, alert to tls data */ 1054 if (skb) 1055 tx_skb_finalize(skb); 1056 } 1057 1058 recordsz = size; 1059 csk->tlshws.txleft = recordsz; 1060 csk->tlshws.type = record_type; 1061 } 1062 1063 if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || 1064 copy <= 0) { 1065 new_buf: 1066 if (skb) { 1067 tx_skb_finalize(skb); 1068 push_frames_if_head(sk); 1069 } 1070 1071 if (is_tls_tx(csk)) { 1072 skb = get_record_skb(sk, 1073 select_size(sk, 1074 recordsz, 1075 flags, 1076 TX_TLSHDR_LEN), 1077 false); 1078 } else { 1079 skb = get_tx_skb(sk, 1080 select_size(sk, size, flags, 1081 TX_HEADER_LEN)); 1082 } 1083 if (unlikely(!skb)) 1084 goto wait_for_memory; 1085 1086 skb->ip_summed = CHECKSUM_UNNECESSARY; 1087 copy = mss; 1088 } 1089 if (copy > size) 1090 copy = size; 1091 1092 if (msg->msg_flags & MSG_SPLICE_PAGES) { 1093 err = skb_splice_from_iter(skb, &msg->msg_iter, copy); 1094 if (err < 0) { 1095 if (err == -EMSGSIZE) 1096 goto new_buf; 1097 goto do_fault; 1098 } 1099 copy = err; 1100 sk_wmem_queued_add(sk, copy); 1101 } else if (skb_tailroom(skb) > 0) { 1102 copy = min(copy, skb_tailroom(skb)); 1103 if (is_tls_tx(csk)) 1104 copy = min_t(int, copy, csk->tlshws.txleft); 1105 err = skb_add_data_nocache(sk, skb, 1106 &msg->msg_iter, copy); 1107 if (err) 1108 goto do_fault; 1109 } else { 1110 int i = skb_shinfo(skb)->nr_frags; 1111 struct page *page = TCP_PAGE(sk); 1112 int pg_size = PAGE_SIZE; 1113 int off = TCP_OFF(sk); 1114 bool merge; 1115 1116 if (page) 1117 pg_size = page_size(page); 1118 if (off < pg_size && 1119 skb_can_coalesce(skb, i, page, off)) { 1120 merge = true; 1121 goto copy; 1122 } 1123 merge = false; 1124 if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) : 1125 MAX_SKB_FRAGS)) 1126 goto new_buf; 1127 1128 if (page && off == pg_size) { 1129 put_page(page); 1130 TCP_PAGE(sk) = page = NULL; 1131 pg_size = PAGE_SIZE; 1132 } 1133 1134 if (!page) { 1135 gfp_t gfp = sk->sk_allocation; 1136 int order = cdev->send_page_order; 1137 1138 if (order) { 1139 page = alloc_pages(gfp | __GFP_COMP | 1140 __GFP_NOWARN | 1141 __GFP_NORETRY, 1142 order); 1143 if (page) 1144 pg_size <<= order; 1145 } 1146 if (!page) { 1147 page = alloc_page(gfp); 1148 pg_size = PAGE_SIZE; 1149 } 1150 if (!page) 1151 goto wait_for_memory; 1152 off = 0; 1153 } 1154 copy: 1155 if (copy > pg_size - off) 1156 copy = pg_size - off; 1157 if (is_tls_tx(csk)) 1158 copy = min_t(int, copy, csk->tlshws.txleft); 1159 1160 err = chtls_skb_copy_to_page_nocache(sk, &msg->msg_iter, 1161 skb, page, 1162 off, copy); 1163 if (unlikely(err)) { 1164 if (!TCP_PAGE(sk)) { 1165 TCP_PAGE(sk) = page; 1166 TCP_OFF(sk) = 0; 1167 } 1168 goto do_fault; 1169 } 1170 /* Update the skb. */ 1171 if (merge) { 1172 skb_frag_size_add( 1173 &skb_shinfo(skb)->frags[i - 1], 1174 copy); 1175 } else { 1176 skb_fill_page_desc(skb, i, page, off, copy); 1177 if (off + copy < pg_size) { 1178 /* space left keep page */ 1179 get_page(page); 1180 TCP_PAGE(sk) = page; 1181 } else { 1182 TCP_PAGE(sk) = NULL; 1183 } 1184 } 1185 TCP_OFF(sk) = off + copy; 1186 } 1187 if (unlikely(skb->len == mss)) 1188 tx_skb_finalize(skb); 1189 tp->write_seq += copy; 1190 copied += copy; 1191 size -= copy; 1192 1193 if (is_tls_tx(csk)) 1194 csk->tlshws.txleft -= copy; 1195 1196 if (corked(tp, flags) && 1197 (sk_stream_wspace(sk) < sk_stream_min_wspace(sk))) 1198 ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND; 1199 1200 if (size == 0) 1201 goto out; 1202 1203 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) 1204 push_frames_if_head(sk); 1205 continue; 1206 wait_for_sndbuf: 1207 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1208 wait_for_memory: 1209 err = csk_wait_memory(cdev, sk, &timeo); 1210 if (err) 1211 goto do_error; 1212 } 1213 out: 1214 csk_reset_flag(csk, CSK_TX_MORE_DATA); 1215 if (copied) 1216 chtls_tcp_push(sk, flags); 1217 done: 1218 release_sock(sk); 1219 return copied; 1220 do_fault: 1221 if (!skb->len) { 1222 __skb_unlink(skb, &csk->txq); 1223 sk->sk_wmem_queued -= skb->truesize; 1224 __kfree_skb(skb); 1225 } 1226 do_error: 1227 if (copied) 1228 goto out; 1229 out_err: 1230 if (csk_conn_inline(csk)) 1231 csk_reset_flag(csk, CSK_TX_MORE_DATA); 1232 copied = sk_stream_error(sk, flags, err); 1233 goto done; 1234 } 1235 1236 void chtls_splice_eof(struct socket *sock) 1237 { 1238 struct sock *sk = sock->sk; 1239 1240 lock_sock(sk); 1241 chtls_tcp_push(sk, 0); 1242 release_sock(sk); 1243 } 1244 1245 static void chtls_select_window(struct sock *sk) 1246 { 1247 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 1248 struct tcp_sock *tp = tcp_sk(sk); 1249 unsigned int wnd = tp->rcv_wnd; 1250 1251 wnd = max_t(unsigned int, wnd, tcp_full_space(sk)); 1252 wnd = max_t(unsigned int, MIN_RCV_WND, wnd); 1253 1254 if (wnd > MAX_RCV_WND) 1255 wnd = MAX_RCV_WND; 1256 1257 /* 1258 * Check if we need to grow the receive window in response to an increase in 1259 * the socket's receive buffer size. Some applications increase the buffer 1260 * size dynamically and rely on the window to grow accordingly. 1261 */ 1262 1263 if (wnd > tp->rcv_wnd) { 1264 tp->rcv_wup -= wnd - tp->rcv_wnd; 1265 tp->rcv_wnd = wnd; 1266 /* Mark the receive window as updated */ 1267 csk_reset_flag(csk, CSK_UPDATE_RCV_WND); 1268 } 1269 } 1270 1271 /* 1272 * Send RX credits through an RX_DATA_ACK CPL message. We are permitted 1273 * to return without sending the message in case we cannot allocate 1274 * an sk_buff. Returns the number of credits sent. 1275 */ 1276 static u32 send_rx_credits(struct chtls_sock *csk, u32 credits) 1277 { 1278 struct cpl_rx_data_ack *req; 1279 struct sk_buff *skb; 1280 1281 skb = alloc_skb(sizeof(*req), GFP_ATOMIC); 1282 if (!skb) 1283 return 0; 1284 __skb_put(skb, sizeof(*req)); 1285 req = (struct cpl_rx_data_ack *)skb->head; 1286 1287 set_wr_txq(skb, CPL_PRIORITY_ACK, csk->port_id); 1288 INIT_TP_WR(req, csk->tid); 1289 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, 1290 csk->tid)); 1291 req->credit_dack = cpu_to_be32(RX_CREDITS_V(credits) | 1292 RX_FORCE_ACK_F); 1293 cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb); 1294 return credits; 1295 } 1296 1297 #define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | \ 1298 TCPF_FIN_WAIT1 | \ 1299 TCPF_FIN_WAIT2) 1300 1301 /* 1302 * Called after some received data has been read. It returns RX credits 1303 * to the HW for the amount of data processed. 1304 */ 1305 static void chtls_cleanup_rbuf(struct sock *sk, int copied) 1306 { 1307 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 1308 struct tcp_sock *tp; 1309 int must_send; 1310 u32 credits; 1311 u32 thres; 1312 1313 thres = 15 * 1024; 1314 1315 if (!sk_in_state(sk, CREDIT_RETURN_STATE)) 1316 return; 1317 1318 chtls_select_window(sk); 1319 tp = tcp_sk(sk); 1320 credits = tp->copied_seq - tp->rcv_wup; 1321 if (unlikely(!credits)) 1322 return; 1323 1324 /* 1325 * For coalescing to work effectively ensure the receive window has 1326 * at least 16KB left. 1327 */ 1328 must_send = credits + 16384 >= tp->rcv_wnd; 1329 1330 if (must_send || credits >= thres) 1331 tp->rcv_wup += send_rx_credits(csk, credits); 1332 } 1333 1334 static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 1335 int flags, int *addr_len) 1336 { 1337 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 1338 struct chtls_hws *hws = &csk->tlshws; 1339 struct net_device *dev = csk->egress_dev; 1340 struct adapter *adap = netdev2adap(dev); 1341 struct tcp_sock *tp = tcp_sk(sk); 1342 unsigned long avail; 1343 int buffers_freed; 1344 int copied = 0; 1345 int target; 1346 long timeo; 1347 int ret; 1348 1349 buffers_freed = 0; 1350 1351 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1352 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); 1353 1354 if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND))) 1355 chtls_cleanup_rbuf(sk, copied); 1356 1357 do { 1358 struct sk_buff *skb; 1359 u32 offset = 0; 1360 1361 if (unlikely(tp->urg_data && 1362 tp->urg_seq == tp->copied_seq)) { 1363 if (copied) 1364 break; 1365 if (signal_pending(current)) { 1366 copied = timeo ? sock_intr_errno(timeo) : 1367 -EAGAIN; 1368 break; 1369 } 1370 } 1371 skb = skb_peek(&sk->sk_receive_queue); 1372 if (skb) 1373 goto found_ok_skb; 1374 if (csk->wr_credits && 1375 skb_queue_len(&csk->txq) && 1376 chtls_push_frames(csk, csk->wr_credits == 1377 csk->wr_max_credits)) 1378 sk->sk_write_space(sk); 1379 1380 if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) 1381 break; 1382 1383 if (copied) { 1384 if (sk->sk_err || sk->sk_state == TCP_CLOSE || 1385 (sk->sk_shutdown & RCV_SHUTDOWN) || 1386 signal_pending(current)) 1387 break; 1388 1389 if (!timeo) 1390 break; 1391 } else { 1392 if (sock_flag(sk, SOCK_DONE)) 1393 break; 1394 if (sk->sk_err) { 1395 copied = sock_error(sk); 1396 break; 1397 } 1398 if (sk->sk_shutdown & RCV_SHUTDOWN) 1399 break; 1400 if (sk->sk_state == TCP_CLOSE) { 1401 copied = -ENOTCONN; 1402 break; 1403 } 1404 if (!timeo) { 1405 copied = -EAGAIN; 1406 break; 1407 } 1408 if (signal_pending(current)) { 1409 copied = sock_intr_errno(timeo); 1410 break; 1411 } 1412 } 1413 if (READ_ONCE(sk->sk_backlog.tail)) { 1414 release_sock(sk); 1415 lock_sock(sk); 1416 chtls_cleanup_rbuf(sk, copied); 1417 continue; 1418 } 1419 1420 if (copied >= target) 1421 break; 1422 chtls_cleanup_rbuf(sk, copied); 1423 ret = sk_wait_data(sk, &timeo, NULL); 1424 if (ret < 0) { 1425 copied = copied ? : ret; 1426 goto unlock; 1427 } 1428 continue; 1429 found_ok_skb: 1430 if (!skb->len) { 1431 skb_dstref_steal(skb); 1432 __skb_unlink(skb, &sk->sk_receive_queue); 1433 kfree_skb(skb); 1434 1435 if (!copied && !timeo) { 1436 copied = -EAGAIN; 1437 break; 1438 } 1439 1440 if (copied < target) { 1441 release_sock(sk); 1442 lock_sock(sk); 1443 continue; 1444 } 1445 break; 1446 } 1447 offset = hws->copied_seq; 1448 avail = skb->len - offset; 1449 if (len < avail) 1450 avail = len; 1451 1452 if (unlikely(tp->urg_data)) { 1453 u32 urg_offset = tp->urg_seq - tp->copied_seq; 1454 1455 if (urg_offset < avail) { 1456 if (urg_offset) { 1457 avail = urg_offset; 1458 } else if (!sock_flag(sk, SOCK_URGINLINE)) { 1459 /* First byte is urgent, skip */ 1460 tp->copied_seq++; 1461 offset++; 1462 avail--; 1463 if (!avail) 1464 goto skip_copy; 1465 } 1466 } 1467 } 1468 /* Set record type if not already done. For a non-data record, 1469 * do not proceed if record type could not be copied. 1470 */ 1471 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) { 1472 struct tls_hdr *thdr = (struct tls_hdr *)skb->data; 1473 int cerr = 0; 1474 1475 cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, 1476 sizeof(thdr->type), &thdr->type); 1477 1478 if (cerr && thdr->type != TLS_RECORD_TYPE_DATA) { 1479 copied = -EIO; 1480 break; 1481 } 1482 /* don't send tls header, skip copy */ 1483 goto skip_copy; 1484 } 1485 1486 if (skb_copy_datagram_msg(skb, offset, msg, avail)) { 1487 if (!copied) { 1488 copied = -EFAULT; 1489 break; 1490 } 1491 } 1492 1493 copied += avail; 1494 len -= avail; 1495 hws->copied_seq += avail; 1496 skip_copy: 1497 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) 1498 tp->urg_data = 0; 1499 1500 if ((avail + offset) >= skb->len) { 1501 struct sk_buff *next_skb; 1502 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) { 1503 tp->copied_seq += skb->len; 1504 hws->rcvpld = skb->hdr_len; 1505 } else { 1506 atomic_inc(&adap->chcr_stats.tls_pdu_rx); 1507 tp->copied_seq += hws->rcvpld; 1508 } 1509 chtls_free_skb(sk, skb); 1510 buffers_freed++; 1511 hws->copied_seq = 0; 1512 next_skb = skb_peek(&sk->sk_receive_queue); 1513 if (copied >= target && !next_skb) 1514 break; 1515 if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR) 1516 break; 1517 } 1518 } while (len > 0); 1519 1520 if (buffers_freed) 1521 chtls_cleanup_rbuf(sk, copied); 1522 1523 unlock: 1524 release_sock(sk); 1525 return copied; 1526 } 1527 1528 /* 1529 * Peek at data in a socket's receive buffer. 1530 */ 1531 static int peekmsg(struct sock *sk, struct msghdr *msg, 1532 size_t len, int flags) 1533 { 1534 struct tcp_sock *tp = tcp_sk(sk); 1535 u32 peek_seq, offset; 1536 struct sk_buff *skb; 1537 int copied = 0; 1538 size_t avail; /* amount of available data in current skb */ 1539 long timeo; 1540 int ret; 1541 1542 lock_sock(sk); 1543 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1544 peek_seq = tp->copied_seq; 1545 1546 do { 1547 if (unlikely(tp->urg_data && tp->urg_seq == peek_seq)) { 1548 if (copied) 1549 break; 1550 if (signal_pending(current)) { 1551 copied = timeo ? sock_intr_errno(timeo) : 1552 -EAGAIN; 1553 break; 1554 } 1555 } 1556 1557 skb_queue_walk(&sk->sk_receive_queue, skb) { 1558 offset = peek_seq - ULP_SKB_CB(skb)->seq; 1559 if (offset < skb->len) 1560 goto found_ok_skb; 1561 } 1562 1563 /* empty receive queue */ 1564 if (copied) 1565 break; 1566 if (sock_flag(sk, SOCK_DONE)) 1567 break; 1568 if (sk->sk_err) { 1569 copied = sock_error(sk); 1570 break; 1571 } 1572 if (sk->sk_shutdown & RCV_SHUTDOWN) 1573 break; 1574 if (sk->sk_state == TCP_CLOSE) { 1575 copied = -ENOTCONN; 1576 break; 1577 } 1578 if (!timeo) { 1579 copied = -EAGAIN; 1580 break; 1581 } 1582 if (signal_pending(current)) { 1583 copied = sock_intr_errno(timeo); 1584 break; 1585 } 1586 1587 if (READ_ONCE(sk->sk_backlog.tail)) { 1588 /* Do not sleep, just process backlog. */ 1589 release_sock(sk); 1590 lock_sock(sk); 1591 } else { 1592 ret = sk_wait_data(sk, &timeo, NULL); 1593 if (ret < 0) { 1594 /* here 'copied' is 0 due to previous checks */ 1595 copied = ret; 1596 break; 1597 } 1598 } 1599 1600 if (unlikely(peek_seq != tp->copied_seq)) { 1601 if (net_ratelimit()) 1602 pr_info("TCP(%s:%d), race in MSG_PEEK.\n", 1603 current->comm, current->pid); 1604 peek_seq = tp->copied_seq; 1605 } 1606 continue; 1607 1608 found_ok_skb: 1609 avail = skb->len - offset; 1610 if (len < avail) 1611 avail = len; 1612 /* 1613 * Do we have urgent data here? We need to skip over the 1614 * urgent byte. 1615 */ 1616 if (unlikely(tp->urg_data)) { 1617 u32 urg_offset = tp->urg_seq - peek_seq; 1618 1619 if (urg_offset < avail) { 1620 /* 1621 * The amount of data we are preparing to copy 1622 * contains urgent data. 1623 */ 1624 if (!urg_offset) { /* First byte is urgent */ 1625 if (!sock_flag(sk, SOCK_URGINLINE)) { 1626 peek_seq++; 1627 offset++; 1628 avail--; 1629 } 1630 if (!avail) 1631 continue; 1632 } else { 1633 /* stop short of the urgent data */ 1634 avail = urg_offset; 1635 } 1636 } 1637 } 1638 1639 /* 1640 * If MSG_TRUNC is specified the data is discarded. 1641 */ 1642 if (likely(!(flags & MSG_TRUNC))) 1643 if (skb_copy_datagram_msg(skb, offset, msg, len)) { 1644 if (!copied) { 1645 copied = -EFAULT; 1646 break; 1647 } 1648 } 1649 peek_seq += avail; 1650 copied += avail; 1651 len -= avail; 1652 } while (len > 0); 1653 1654 release_sock(sk); 1655 return copied; 1656 } 1657 1658 int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 1659 int flags, int *addr_len) 1660 { 1661 struct tcp_sock *tp = tcp_sk(sk); 1662 struct chtls_sock *csk; 1663 unsigned long avail; /* amount of available data in current skb */ 1664 int buffers_freed; 1665 int copied = 0; 1666 long timeo; 1667 int target; /* Read at least this many bytes */ 1668 int ret; 1669 1670 buffers_freed = 0; 1671 1672 if (unlikely(flags & MSG_OOB)) 1673 return tcp_prot.recvmsg(sk, msg, len, flags, addr_len); 1674 1675 if (unlikely(flags & MSG_PEEK)) 1676 return peekmsg(sk, msg, len, flags); 1677 1678 if (sk_can_busy_loop(sk) && 1679 skb_queue_empty_lockless(&sk->sk_receive_queue) && 1680 sk->sk_state == TCP_ESTABLISHED) 1681 sk_busy_loop(sk, flags & MSG_DONTWAIT); 1682 1683 lock_sock(sk); 1684 csk = rcu_dereference_sk_user_data(sk); 1685 1686 if (is_tls_rx(csk)) 1687 return chtls_pt_recvmsg(sk, msg, len, flags, addr_len); 1688 1689 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1690 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); 1691 1692 if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND))) 1693 chtls_cleanup_rbuf(sk, copied); 1694 1695 do { 1696 struct sk_buff *skb; 1697 u32 offset; 1698 1699 if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) { 1700 if (copied) 1701 break; 1702 if (signal_pending(current)) { 1703 copied = timeo ? sock_intr_errno(timeo) : 1704 -EAGAIN; 1705 break; 1706 } 1707 } 1708 1709 skb = skb_peek(&sk->sk_receive_queue); 1710 if (skb) 1711 goto found_ok_skb; 1712 1713 if (csk->wr_credits && 1714 skb_queue_len(&csk->txq) && 1715 chtls_push_frames(csk, csk->wr_credits == 1716 csk->wr_max_credits)) 1717 sk->sk_write_space(sk); 1718 1719 if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) 1720 break; 1721 1722 if (copied) { 1723 if (sk->sk_err || sk->sk_state == TCP_CLOSE || 1724 (sk->sk_shutdown & RCV_SHUTDOWN) || 1725 signal_pending(current)) 1726 break; 1727 } else { 1728 if (sock_flag(sk, SOCK_DONE)) 1729 break; 1730 if (sk->sk_err) { 1731 copied = sock_error(sk); 1732 break; 1733 } 1734 if (sk->sk_shutdown & RCV_SHUTDOWN) 1735 break; 1736 if (sk->sk_state == TCP_CLOSE) { 1737 copied = -ENOTCONN; 1738 break; 1739 } 1740 if (!timeo) { 1741 copied = -EAGAIN; 1742 break; 1743 } 1744 if (signal_pending(current)) { 1745 copied = sock_intr_errno(timeo); 1746 break; 1747 } 1748 } 1749 1750 if (READ_ONCE(sk->sk_backlog.tail)) { 1751 release_sock(sk); 1752 lock_sock(sk); 1753 chtls_cleanup_rbuf(sk, copied); 1754 continue; 1755 } 1756 1757 if (copied >= target) 1758 break; 1759 chtls_cleanup_rbuf(sk, copied); 1760 ret = sk_wait_data(sk, &timeo, NULL); 1761 if (ret < 0) { 1762 copied = copied ? : ret; 1763 goto unlock; 1764 } 1765 continue; 1766 1767 found_ok_skb: 1768 if (!skb->len) { 1769 chtls_kfree_skb(sk, skb); 1770 if (!copied && !timeo) { 1771 copied = -EAGAIN; 1772 break; 1773 } 1774 1775 if (copied < target) 1776 continue; 1777 1778 break; 1779 } 1780 1781 offset = tp->copied_seq - ULP_SKB_CB(skb)->seq; 1782 avail = skb->len - offset; 1783 if (len < avail) 1784 avail = len; 1785 1786 if (unlikely(tp->urg_data)) { 1787 u32 urg_offset = tp->urg_seq - tp->copied_seq; 1788 1789 if (urg_offset < avail) { 1790 if (urg_offset) { 1791 avail = urg_offset; 1792 } else if (!sock_flag(sk, SOCK_URGINLINE)) { 1793 tp->copied_seq++; 1794 offset++; 1795 avail--; 1796 if (!avail) 1797 goto skip_copy; 1798 } 1799 } 1800 } 1801 1802 if (likely(!(flags & MSG_TRUNC))) { 1803 if (skb_copy_datagram_msg(skb, offset, 1804 msg, avail)) { 1805 if (!copied) { 1806 copied = -EFAULT; 1807 break; 1808 } 1809 } 1810 } 1811 1812 tp->copied_seq += avail; 1813 copied += avail; 1814 len -= avail; 1815 1816 skip_copy: 1817 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) 1818 tp->urg_data = 0; 1819 1820 if (avail + offset >= skb->len) { 1821 chtls_free_skb(sk, skb); 1822 buffers_freed++; 1823 1824 if (copied >= target && 1825 !skb_peek(&sk->sk_receive_queue)) 1826 break; 1827 } 1828 } while (len > 0); 1829 1830 if (buffers_freed) 1831 chtls_cleanup_rbuf(sk, copied); 1832 1833 unlock: 1834 release_sock(sk); 1835 return copied; 1836 } 1837