1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2018 Chelsio Communications, Inc. 4 * 5 * Written by: Atul Gupta (atul.gupta@chelsio.com) 6 */ 7 8 #include <linux/module.h> 9 #include <linux/list.h> 10 #include <linux/workqueue.h> 11 #include <linux/skbuff.h> 12 #include <linux/timer.h> 13 #include <linux/notifier.h> 14 #include <linux/inetdevice.h> 15 #include <linux/ip.h> 16 #include <linux/tcp.h> 17 #include <linux/sched/signal.h> 18 #include <net/tcp.h> 19 #include <net/busy_poll.h> 20 #include <crypto/aes.h> 21 22 #include "chtls.h" 23 #include "chtls_cm.h" 24 25 static bool is_tls_tx(struct chtls_sock *csk) 26 { 27 return csk->tlshws.txkey >= 0; 28 } 29 30 static bool is_tls_rx(struct chtls_sock *csk) 31 { 32 return csk->tlshws.rxkey >= 0; 33 } 34 35 static int data_sgl_len(const struct sk_buff *skb) 36 { 37 unsigned int cnt; 38 39 cnt = skb_shinfo(skb)->nr_frags; 40 return sgl_len(cnt) * 8; 41 } 42 43 static int nos_ivs(struct sock *sk, unsigned int size) 44 { 45 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 46 47 return DIV_ROUND_UP(size, csk->tlshws.mfs); 48 } 49 50 static int set_ivs_imm(struct sock *sk, const struct sk_buff *skb) 51 { 52 int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE; 53 int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb); 54 55 if ((hlen + KEY_ON_MEM_SZ + ivs_size) < 56 MAX_IMM_OFLD_TX_DATA_WR_LEN) { 57 ULP_SKB_CB(skb)->ulp.tls.iv = 1; 58 return 1; 59 } 60 ULP_SKB_CB(skb)->ulp.tls.iv = 0; 61 return 0; 62 } 63 64 static int max_ivs_size(struct sock *sk, int size) 65 { 66 return nos_ivs(sk, size) * CIPHER_BLOCK_SIZE; 67 } 68 69 static int ivs_size(struct sock *sk, const struct sk_buff *skb) 70 { 71 return set_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) * 72 CIPHER_BLOCK_SIZE) : 0; 73 } 74 75 static int flowc_wr_credits(int nparams, int *flowclenp) 76 { 77 int flowclen16, flowclen; 78 79 flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]); 80 flowclen16 = DIV_ROUND_UP(flowclen, 16); 81 flowclen = flowclen16 * 16; 82 83 if (flowclenp) 84 *flowclenp = flowclen; 85 86 return flowclen16; 87 } 88 89 static struct sk_buff *create_flowc_wr_skb(struct sock *sk, 90 struct fw_flowc_wr *flowc, 91 int flowclen) 92 { 93 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 94 struct sk_buff *skb; 95 96 skb = alloc_skb(flowclen, GFP_ATOMIC); 97 if (!skb) 98 return NULL; 99 100 __skb_put_data(skb, flowc, flowclen); 101 skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA); 102 103 return skb; 104 } 105 106 static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc, 107 int flowclen) 108 { 109 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 110 struct tcp_sock *tp = tcp_sk(sk); 111 struct sk_buff *skb; 112 int flowclen16; 113 int ret; 114 115 flowclen16 = flowclen / 16; 116 117 if (csk_flag(sk, CSK_TX_DATA_SENT)) { 118 skb = create_flowc_wr_skb(sk, flowc, flowclen); 119 if (!skb) 120 return -ENOMEM; 121 122 skb_entail(sk, skb, 123 ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND); 124 return 0; 125 } 126 127 ret = cxgb4_immdata_send(csk->egress_dev, 128 csk->txq_idx, 129 flowc, flowclen); 130 if (!ret) 131 return flowclen16; 132 skb = create_flowc_wr_skb(sk, flowc, flowclen); 133 if (!skb) 134 return -ENOMEM; 135 send_or_defer(sk, tp, skb, 0); 136 return flowclen16; 137 } 138 139 static u8 tcp_state_to_flowc_state(u8 state) 140 { 141 switch (state) { 142 case TCP_ESTABLISHED: 143 return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED; 144 case TCP_CLOSE_WAIT: 145 return FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT; 146 case TCP_FIN_WAIT1: 147 return FW_FLOWC_MNEM_TCPSTATE_FINWAIT1; 148 case TCP_CLOSING: 149 return FW_FLOWC_MNEM_TCPSTATE_CLOSING; 150 case TCP_LAST_ACK: 151 return FW_FLOWC_MNEM_TCPSTATE_LASTACK; 152 case TCP_FIN_WAIT2: 153 return FW_FLOWC_MNEM_TCPSTATE_FINWAIT2; 154 } 155 156 return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED; 157 } 158 159 int send_tx_flowc_wr(struct sock *sk, int compl, 160 u32 snd_nxt, u32 rcv_nxt) 161 { 162 struct flowc_packed { 163 struct fw_flowc_wr fc; 164 struct fw_flowc_mnemval mnemval[FW_FLOWC_MNEM_MAX]; 165 } __packed sflowc; 166 int nparams, paramidx, flowclen16, flowclen; 167 struct fw_flowc_wr *flowc; 168 struct chtls_sock *csk; 169 struct tcp_sock *tp; 170 171 csk = rcu_dereference_sk_user_data(sk); 172 tp = tcp_sk(sk); 173 memset(&sflowc, 0, sizeof(sflowc)); 174 flowc = &sflowc.fc; 175 176 #define FLOWC_PARAM(__m, __v) \ 177 do { \ 178 flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \ 179 flowc->mnemval[paramidx].val = cpu_to_be32(__v); \ 180 paramidx++; \ 181 } while (0) 182 183 paramidx = 0; 184 185 FLOWC_PARAM(PFNVFN, FW_PFVF_CMD_PFN_V(csk->cdev->lldi->pf)); 186 FLOWC_PARAM(CH, csk->tx_chan); 187 FLOWC_PARAM(PORT, csk->tx_chan); 188 FLOWC_PARAM(IQID, csk->rss_qid); 189 FLOWC_PARAM(SNDNXT, tp->snd_nxt); 190 FLOWC_PARAM(RCVNXT, tp->rcv_nxt); 191 FLOWC_PARAM(SNDBUF, csk->sndbuf); 192 FLOWC_PARAM(MSS, tp->mss_cache); 193 FLOWC_PARAM(TCPSTATE, tcp_state_to_flowc_state(sk->sk_state)); 194 195 if (SND_WSCALE(tp)) 196 FLOWC_PARAM(RCV_SCALE, SND_WSCALE(tp)); 197 198 if (csk->ulp_mode == ULP_MODE_TLS) 199 FLOWC_PARAM(ULD_MODE, ULP_MODE_TLS); 200 201 if (csk->tlshws.fcplenmax) 202 FLOWC_PARAM(TXDATAPLEN_MAX, csk->tlshws.fcplenmax); 203 204 nparams = paramidx; 205 #undef FLOWC_PARAM 206 207 flowclen16 = flowc_wr_credits(nparams, &flowclen); 208 flowc->op_to_nparams = 209 cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | 210 FW_WR_COMPL_V(compl) | 211 FW_FLOWC_WR_NPARAMS_V(nparams)); 212 flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) | 213 FW_WR_FLOWID_V(csk->tid)); 214 215 return send_flowc_wr(sk, flowc, flowclen); 216 } 217 218 /* Copy IVs to WR */ 219 static int tls_copy_ivs(struct sock *sk, struct sk_buff *skb) 220 221 { 222 struct chtls_sock *csk; 223 unsigned char *iv_loc; 224 struct chtls_hws *hws; 225 unsigned char *ivs; 226 u16 number_of_ivs; 227 struct page *page; 228 int err = 0; 229 230 csk = rcu_dereference_sk_user_data(sk); 231 hws = &csk->tlshws; 232 number_of_ivs = nos_ivs(sk, skb->len); 233 234 if (number_of_ivs > MAX_IVS_PAGE) { 235 pr_warn("MAX IVs in PAGE exceeded %d\n", number_of_ivs); 236 return -ENOMEM; 237 } 238 239 /* generate the IVs */ 240 ivs = kmalloc_array(CIPHER_BLOCK_SIZE, number_of_ivs, GFP_ATOMIC); 241 if (!ivs) 242 return -ENOMEM; 243 get_random_bytes(ivs, number_of_ivs * CIPHER_BLOCK_SIZE); 244 245 if (skb_ulp_tls_iv_imm(skb)) { 246 /* send the IVs as immediate data in the WR */ 247 iv_loc = (unsigned char *)__skb_push(skb, number_of_ivs * 248 CIPHER_BLOCK_SIZE); 249 if (iv_loc) 250 memcpy(iv_loc, ivs, number_of_ivs * CIPHER_BLOCK_SIZE); 251 252 hws->ivsize = number_of_ivs * CIPHER_BLOCK_SIZE; 253 } else { 254 /* Send the IVs as sgls */ 255 /* Already accounted IV DSGL for credits */ 256 skb_shinfo(skb)->nr_frags--; 257 page = alloc_pages(sk->sk_allocation | __GFP_COMP, 0); 258 if (!page) { 259 pr_info("%s : Page allocation for IVs failed\n", 260 __func__); 261 err = -ENOMEM; 262 goto out; 263 } 264 memcpy(page_address(page), ivs, number_of_ivs * 265 CIPHER_BLOCK_SIZE); 266 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page, 0, 267 number_of_ivs * CIPHER_BLOCK_SIZE); 268 hws->ivsize = 0; 269 } 270 out: 271 kfree(ivs); 272 return err; 273 } 274 275 /* Copy Key to WR */ 276 static void tls_copy_tx_key(struct sock *sk, struct sk_buff *skb) 277 { 278 struct ulptx_sc_memrd *sc_memrd; 279 struct chtls_sock *csk; 280 struct chtls_dev *cdev; 281 struct ulptx_idata *sc; 282 struct chtls_hws *hws; 283 u32 immdlen; 284 int kaddr; 285 286 csk = rcu_dereference_sk_user_data(sk); 287 hws = &csk->tlshws; 288 cdev = csk->cdev; 289 290 immdlen = sizeof(*sc) + sizeof(*sc_memrd); 291 kaddr = keyid_to_addr(cdev->kmap.start, hws->txkey); 292 sc = (struct ulptx_idata *)__skb_push(skb, immdlen); 293 if (sc) { 294 sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP)); 295 sc->len = htonl(0); 296 sc_memrd = (struct ulptx_sc_memrd *)(sc + 1); 297 sc_memrd->cmd_to_len = 298 htonl(ULPTX_CMD_V(ULP_TX_SC_MEMRD) | 299 ULP_TX_SC_MORE_V(1) | 300 ULPTX_LEN16_V(hws->keylen >> 4)); 301 sc_memrd->addr = htonl(kaddr); 302 } 303 } 304 305 static u64 tlstx_incr_seqnum(struct chtls_hws *hws) 306 { 307 return hws->tx_seq_no++; 308 } 309 310 static bool is_sg_request(const struct sk_buff *skb) 311 { 312 return skb->peeked || 313 (skb->len > MAX_IMM_ULPTX_WR_LEN); 314 } 315 316 /* 317 * Returns true if an sk_buff carries urgent data. 318 */ 319 static bool skb_urgent(struct sk_buff *skb) 320 { 321 return ULP_SKB_CB(skb)->flags & ULPCB_FLAG_URG; 322 } 323 324 /* TLS content type for CPL SFO */ 325 static unsigned char tls_content_type(unsigned char content_type) 326 { 327 switch (content_type) { 328 case TLS_HDR_TYPE_CCS: 329 return CPL_TX_TLS_SFO_TYPE_CCS; 330 case TLS_HDR_TYPE_ALERT: 331 return CPL_TX_TLS_SFO_TYPE_ALERT; 332 case TLS_HDR_TYPE_HANDSHAKE: 333 return CPL_TX_TLS_SFO_TYPE_HANDSHAKE; 334 case TLS_HDR_TYPE_HEARTBEAT: 335 return CPL_TX_TLS_SFO_TYPE_HEARTBEAT; 336 } 337 return CPL_TX_TLS_SFO_TYPE_DATA; 338 } 339 340 static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb, 341 int dlen, int tls_immd, u32 credits, 342 int expn, int pdus) 343 { 344 struct fw_tlstx_data_wr *req_wr; 345 struct cpl_tx_tls_sfo *req_cpl; 346 unsigned int wr_ulp_mode_force; 347 struct tls_scmd *updated_scmd; 348 unsigned char data_type; 349 struct chtls_sock *csk; 350 struct net_device *dev; 351 struct chtls_hws *hws; 352 struct tls_scmd *scmd; 353 struct adapter *adap; 354 unsigned char *req; 355 int immd_len; 356 int iv_imm; 357 int len; 358 359 csk = rcu_dereference_sk_user_data(sk); 360 iv_imm = skb_ulp_tls_iv_imm(skb); 361 dev = csk->egress_dev; 362 adap = netdev2adap(dev); 363 hws = &csk->tlshws; 364 scmd = &hws->scmd; 365 len = dlen + expn; 366 367 dlen = (dlen < hws->mfs) ? dlen : hws->mfs; 368 atomic_inc(&adap->chcr_stats.tls_pdu_tx); 369 370 updated_scmd = scmd; 371 updated_scmd->seqno_numivs &= 0xffffff80; 372 updated_scmd->seqno_numivs |= SCMD_NUM_IVS_V(pdus); 373 hws->scmd = *updated_scmd; 374 375 req = (unsigned char *)__skb_push(skb, sizeof(struct cpl_tx_tls_sfo)); 376 req_cpl = (struct cpl_tx_tls_sfo *)req; 377 req = (unsigned char *)__skb_push(skb, (sizeof(struct 378 fw_tlstx_data_wr))); 379 380 req_wr = (struct fw_tlstx_data_wr *)req; 381 immd_len = (tls_immd ? dlen : 0); 382 req_wr->op_to_immdlen = 383 htonl(FW_WR_OP_V(FW_TLSTX_DATA_WR) | 384 FW_TLSTX_DATA_WR_COMPL_V(1) | 385 FW_TLSTX_DATA_WR_IMMDLEN_V(immd_len)); 386 req_wr->flowid_len16 = htonl(FW_TLSTX_DATA_WR_FLOWID_V(csk->tid) | 387 FW_TLSTX_DATA_WR_LEN16_V(credits)); 388 wr_ulp_mode_force = TX_ULP_MODE_V(ULP_MODE_TLS); 389 390 if (is_sg_request(skb)) 391 wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F | 392 ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 : 393 FW_OFLD_TX_DATA_WR_SHOVE_F); 394 395 req_wr->lsodisable_to_flags = 396 htonl(TX_ULP_MODE_V(ULP_MODE_TLS) | 397 TX_URG_V(skb_urgent(skb)) | 398 T6_TX_FORCE_F | wr_ulp_mode_force | 399 TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) && 400 skb_queue_empty(&csk->txq))); 401 402 req_wr->ctxloc_to_exp = 403 htonl(FW_TLSTX_DATA_WR_NUMIVS_V(pdus) | 404 FW_TLSTX_DATA_WR_EXP_V(expn) | 405 FW_TLSTX_DATA_WR_CTXLOC_V(CHTLS_KEY_CONTEXT_DDR) | 406 FW_TLSTX_DATA_WR_IVDSGL_V(!iv_imm) | 407 FW_TLSTX_DATA_WR_KEYSIZE_V(hws->keylen >> 4)); 408 409 /* Fill in the length */ 410 req_wr->plen = htonl(len); 411 req_wr->mfs = htons(hws->mfs); 412 req_wr->adjustedplen_pkd = 413 htons(FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(hws->adjustlen)); 414 req_wr->expinplenmax_pkd = 415 htons(FW_TLSTX_DATA_WR_EXPINPLENMAX_V(hws->expansion)); 416 req_wr->pdusinplenmax_pkd = 417 FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(hws->pdus); 418 req_wr->r10 = 0; 419 420 data_type = tls_content_type(ULP_SKB_CB(skb)->ulp.tls.type); 421 req_cpl->op_to_seg_len = htonl(CPL_TX_TLS_SFO_OPCODE_V(CPL_TX_TLS_SFO) | 422 CPL_TX_TLS_SFO_DATA_TYPE_V(data_type) | 423 CPL_TX_TLS_SFO_CPL_LEN_V(2) | 424 CPL_TX_TLS_SFO_SEG_LEN_V(dlen)); 425 req_cpl->pld_len = htonl(len - expn); 426 427 req_cpl->type_protover = htonl(CPL_TX_TLS_SFO_TYPE_V 428 ((data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) ? 429 TLS_HDR_TYPE_HEARTBEAT : 0) | 430 CPL_TX_TLS_SFO_PROTOVER_V(0)); 431 432 /* create the s-command */ 433 req_cpl->r1_lo = 0; 434 req_cpl->seqno_numivs = cpu_to_be32(hws->scmd.seqno_numivs); 435 req_cpl->ivgen_hdrlen = cpu_to_be32(hws->scmd.ivgen_hdrlen); 436 req_cpl->scmd1 = cpu_to_be64(tlstx_incr_seqnum(hws)); 437 } 438 439 /* 440 * Calculate the TLS data expansion size 441 */ 442 static int chtls_expansion_size(struct sock *sk, int data_len, 443 int fullpdu, 444 unsigned short *pducnt) 445 { 446 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 447 struct chtls_hws *hws = &csk->tlshws; 448 struct tls_scmd *scmd = &hws->scmd; 449 int fragsize = hws->mfs; 450 int expnsize = 0; 451 int fragleft; 452 int fragcnt; 453 int expppdu; 454 455 if (SCMD_CIPH_MODE_G(scmd->seqno_numivs) == 456 SCMD_CIPH_MODE_AES_GCM) { 457 expppdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE + 458 TLS_HEADER_LENGTH; 459 460 if (fullpdu) { 461 *pducnt = data_len / (expppdu + fragsize); 462 if (*pducnt > 32) 463 *pducnt = 32; 464 else if (!*pducnt) 465 *pducnt = 1; 466 expnsize = (*pducnt) * expppdu; 467 return expnsize; 468 } 469 fragcnt = (data_len / fragsize); 470 expnsize = fragcnt * expppdu; 471 fragleft = data_len % fragsize; 472 if (fragleft > 0) 473 expnsize += expppdu; 474 } 475 return expnsize; 476 } 477 478 /* WR with IV, KEY and CPL SFO added */ 479 static void make_tlstx_data_wr(struct sock *sk, struct sk_buff *skb, 480 int tls_tx_imm, int tls_len, u32 credits) 481 { 482 unsigned short pdus_per_ulp = 0; 483 struct chtls_sock *csk; 484 struct chtls_hws *hws; 485 int expn_sz; 486 int pdus; 487 488 csk = rcu_dereference_sk_user_data(sk); 489 hws = &csk->tlshws; 490 pdus = DIV_ROUND_UP(tls_len, hws->mfs); 491 expn_sz = chtls_expansion_size(sk, tls_len, 0, NULL); 492 if (!hws->compute) { 493 hws->expansion = chtls_expansion_size(sk, 494 hws->fcplenmax, 495 1, &pdus_per_ulp); 496 hws->pdus = pdus_per_ulp; 497 hws->adjustlen = hws->pdus * 498 ((hws->expansion / hws->pdus) + hws->mfs); 499 hws->compute = 1; 500 } 501 if (tls_copy_ivs(sk, skb)) 502 return; 503 tls_copy_tx_key(sk, skb); 504 tls_tx_data_wr(sk, skb, tls_len, tls_tx_imm, credits, expn_sz, pdus); 505 hws->tx_seq_no += (pdus - 1); 506 } 507 508 static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb, 509 unsigned int immdlen, int len, 510 u32 credits, u32 compl) 511 { 512 struct fw_ofld_tx_data_wr *req; 513 unsigned int wr_ulp_mode_force; 514 struct chtls_sock *csk; 515 unsigned int opcode; 516 517 csk = rcu_dereference_sk_user_data(sk); 518 opcode = FW_OFLD_TX_DATA_WR; 519 520 req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req)); 521 req->op_to_immdlen = htonl(WR_OP_V(opcode) | 522 FW_WR_COMPL_V(compl) | 523 FW_WR_IMMDLEN_V(immdlen)); 524 req->flowid_len16 = htonl(FW_WR_FLOWID_V(csk->tid) | 525 FW_WR_LEN16_V(credits)); 526 527 wr_ulp_mode_force = TX_ULP_MODE_V(csk->ulp_mode); 528 if (is_sg_request(skb)) 529 wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F | 530 ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 : 531 FW_OFLD_TX_DATA_WR_SHOVE_F); 532 533 req->tunnel_to_proxy = htonl(wr_ulp_mode_force | 534 TX_URG_V(skb_urgent(skb)) | 535 TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) && 536 skb_queue_empty(&csk->txq))); 537 req->plen = htonl(len); 538 } 539 540 static int chtls_wr_size(struct chtls_sock *csk, const struct sk_buff *skb, 541 bool size) 542 { 543 int wr_size; 544 545 wr_size = TLS_WR_CPL_LEN; 546 wr_size += KEY_ON_MEM_SZ; 547 wr_size += ivs_size(csk->sk, skb); 548 549 if (size) 550 return wr_size; 551 552 /* frags counted for IV dsgl */ 553 if (!skb_ulp_tls_iv_imm(skb)) 554 skb_shinfo(skb)->nr_frags++; 555 556 return wr_size; 557 } 558 559 static bool is_ofld_imm(struct chtls_sock *csk, const struct sk_buff *skb) 560 { 561 int length = skb->len; 562 563 if (skb->peeked || skb->len > MAX_IMM_ULPTX_WR_LEN) 564 return false; 565 566 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) { 567 /* Check TLS header len for Immediate */ 568 if (csk->ulp_mode == ULP_MODE_TLS && 569 skb_ulp_tls_inline(skb)) 570 length += chtls_wr_size(csk, skb, true); 571 else 572 length += sizeof(struct fw_ofld_tx_data_wr); 573 574 return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN; 575 } 576 return true; 577 } 578 579 static unsigned int calc_tx_flits(const struct sk_buff *skb, 580 unsigned int immdlen) 581 { 582 unsigned int flits, cnt; 583 584 flits = immdlen / 8; /* headers */ 585 cnt = skb_shinfo(skb)->nr_frags; 586 if (skb_tail_pointer(skb) != skb_transport_header(skb)) 587 cnt++; 588 return flits + sgl_len(cnt); 589 } 590 591 static void arp_failure_discard(void *handle, struct sk_buff *skb) 592 { 593 kfree_skb(skb); 594 } 595 596 int chtls_push_frames(struct chtls_sock *csk, int comp) 597 { 598 struct chtls_hws *hws = &csk->tlshws; 599 struct tcp_sock *tp; 600 struct sk_buff *skb; 601 int total_size = 0; 602 struct sock *sk; 603 int wr_size; 604 605 wr_size = sizeof(struct fw_ofld_tx_data_wr); 606 sk = csk->sk; 607 tp = tcp_sk(sk); 608 609 if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE))) 610 return 0; 611 612 if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN))) 613 return 0; 614 615 while (csk->wr_credits && (skb = skb_peek(&csk->txq)) && 616 (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_HOLD) || 617 skb_queue_len(&csk->txq) > 1)) { 618 unsigned int credit_len = skb->len; 619 unsigned int credits_needed; 620 unsigned int completion = 0; 621 int tls_len = skb->len;/* TLS data len before IV/key */ 622 unsigned int immdlen; 623 int len = skb->len; /* length [ulp bytes] inserted by hw */ 624 int flowclen16 = 0; 625 int tls_tx_imm = 0; 626 627 immdlen = skb->len; 628 if (!is_ofld_imm(csk, skb)) { 629 immdlen = skb_transport_offset(skb); 630 if (skb_ulp_tls_inline(skb)) 631 wr_size = chtls_wr_size(csk, skb, false); 632 credit_len = 8 * calc_tx_flits(skb, immdlen); 633 } else { 634 if (skb_ulp_tls_inline(skb)) { 635 wr_size = chtls_wr_size(csk, skb, false); 636 tls_tx_imm = 1; 637 } 638 } 639 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) 640 credit_len += wr_size; 641 credits_needed = DIV_ROUND_UP(credit_len, 16); 642 if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) { 643 flowclen16 = send_tx_flowc_wr(sk, 1, tp->snd_nxt, 644 tp->rcv_nxt); 645 if (flowclen16 <= 0) 646 break; 647 csk->wr_credits -= flowclen16; 648 csk->wr_unacked += flowclen16; 649 csk->wr_nondata += flowclen16; 650 csk_set_flag(csk, CSK_TX_DATA_SENT); 651 } 652 653 if (csk->wr_credits < credits_needed) { 654 if (skb_ulp_tls_inline(skb) && 655 !skb_ulp_tls_iv_imm(skb)) 656 skb_shinfo(skb)->nr_frags--; 657 break; 658 } 659 660 __skb_unlink(skb, &csk->txq); 661 skb_set_queue_mapping(skb, (csk->txq_idx << 1) | 662 CPL_PRIORITY_DATA); 663 if (hws->ofld) 664 hws->txqid = (skb->queue_mapping >> 1); 665 skb->csum = (__force __wsum)(credits_needed + csk->wr_nondata); 666 csk->wr_credits -= credits_needed; 667 csk->wr_unacked += credits_needed; 668 csk->wr_nondata = 0; 669 enqueue_wr(csk, skb); 670 671 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) { 672 if ((comp && csk->wr_unacked == credits_needed) || 673 (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) || 674 csk->wr_unacked >= csk->wr_max_credits / 2) { 675 completion = 1; 676 csk->wr_unacked = 0; 677 } 678 if (skb_ulp_tls_inline(skb)) 679 make_tlstx_data_wr(sk, skb, tls_tx_imm, 680 tls_len, credits_needed); 681 else 682 make_tx_data_wr(sk, skb, immdlen, len, 683 credits_needed, completion); 684 tp->snd_nxt += len; 685 tp->lsndtime = tcp_jiffies32; 686 if (completion) 687 ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR; 688 } else { 689 struct cpl_close_con_req *req = cplhdr(skb); 690 unsigned int cmd = CPL_OPCODE_G(ntohl 691 (OPCODE_TID(req))); 692 693 if (cmd == CPL_CLOSE_CON_REQ) 694 csk_set_flag(csk, 695 CSK_CLOSE_CON_REQUESTED); 696 697 if ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) && 698 (csk->wr_unacked >= csk->wr_max_credits / 2)) { 699 req->wr.wr_hi |= htonl(FW_WR_COMPL_F); 700 csk->wr_unacked = 0; 701 } 702 } 703 total_size += skb->truesize; 704 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_BARRIER) 705 csk_set_flag(csk, CSK_TX_WAIT_IDLE); 706 t4_set_arp_err_handler(skb, NULL, arp_failure_discard); 707 cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry); 708 } 709 sk->sk_wmem_queued -= total_size; 710 return total_size; 711 } 712 713 static void mark_urg(struct tcp_sock *tp, int flags, 714 struct sk_buff *skb) 715 { 716 if (unlikely(flags & MSG_OOB)) { 717 tp->snd_up = tp->write_seq; 718 ULP_SKB_CB(skb)->flags = ULPCB_FLAG_URG | 719 ULPCB_FLAG_BARRIER | 720 ULPCB_FLAG_NO_APPEND | 721 ULPCB_FLAG_NEED_HDR; 722 } 723 } 724 725 /* 726 * Returns true if a connection should send more data to TCP engine 727 */ 728 static bool should_push(struct sock *sk) 729 { 730 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 731 struct chtls_dev *cdev = csk->cdev; 732 struct tcp_sock *tp = tcp_sk(sk); 733 734 /* 735 * If we've released our offload resources there's nothing to do ... 736 */ 737 if (!cdev) 738 return false; 739 740 /* 741 * If there aren't any work requests in flight, or there isn't enough 742 * data in flight, or Nagle is off then send the current TX_DATA 743 * otherwise hold it and wait to accumulate more data. 744 */ 745 return csk->wr_credits == csk->wr_max_credits || 746 (tp->nonagle & TCP_NAGLE_OFF); 747 } 748 749 /* 750 * Returns true if a TCP socket is corked. 751 */ 752 static bool corked(const struct tcp_sock *tp, int flags) 753 { 754 return (flags & MSG_MORE) || (tp->nonagle & TCP_NAGLE_CORK); 755 } 756 757 /* 758 * Returns true if a send should try to push new data. 759 */ 760 static bool send_should_push(struct sock *sk, int flags) 761 { 762 return should_push(sk) && !corked(tcp_sk(sk), flags); 763 } 764 765 void chtls_tcp_push(struct sock *sk, int flags) 766 { 767 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 768 int qlen = skb_queue_len(&csk->txq); 769 770 if (likely(qlen)) { 771 struct sk_buff *skb = skb_peek_tail(&csk->txq); 772 struct tcp_sock *tp = tcp_sk(sk); 773 774 mark_urg(tp, flags, skb); 775 776 if (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) && 777 corked(tp, flags)) { 778 ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_HOLD; 779 return; 780 } 781 782 ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_HOLD; 783 if (qlen == 1 && 784 ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || 785 should_push(sk))) 786 chtls_push_frames(csk, 1); 787 } 788 } 789 790 /* 791 * Calculate the size for a new send sk_buff. It's maximum size so we can 792 * pack lots of data into it, unless we plan to send it immediately, in which 793 * case we size it more tightly. 794 * 795 * Note: we don't bother compensating for MSS < PAGE_SIZE because it doesn't 796 * arise in normal cases and when it does we are just wasting memory. 797 */ 798 static int select_size(struct sock *sk, int io_len, int flags, int len) 799 { 800 const int pgbreak = SKB_MAX_HEAD(len); 801 802 /* 803 * If the data wouldn't fit in the main body anyway, put only the 804 * header in the main body so it can use immediate data and place all 805 * the payload in page fragments. 806 */ 807 if (io_len > pgbreak) 808 return 0; 809 810 /* 811 * If we will be accumulating payload get a large main body. 812 */ 813 if (!send_should_push(sk, flags)) 814 return pgbreak; 815 816 return io_len; 817 } 818 819 void skb_entail(struct sock *sk, struct sk_buff *skb, int flags) 820 { 821 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 822 struct tcp_sock *tp = tcp_sk(sk); 823 824 ULP_SKB_CB(skb)->seq = tp->write_seq; 825 ULP_SKB_CB(skb)->flags = flags; 826 __skb_queue_tail(&csk->txq, skb); 827 sk->sk_wmem_queued += skb->truesize; 828 829 if (TCP_PAGE(sk) && TCP_OFF(sk)) { 830 put_page(TCP_PAGE(sk)); 831 TCP_PAGE(sk) = NULL; 832 TCP_OFF(sk) = 0; 833 } 834 } 835 836 static struct sk_buff *get_tx_skb(struct sock *sk, int size) 837 { 838 struct sk_buff *skb; 839 840 skb = alloc_skb(size + TX_HEADER_LEN, sk->sk_allocation); 841 if (likely(skb)) { 842 skb_reserve(skb, TX_HEADER_LEN); 843 skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR); 844 skb_reset_transport_header(skb); 845 } 846 return skb; 847 } 848 849 static struct sk_buff *get_record_skb(struct sock *sk, int size, bool zcopy) 850 { 851 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 852 struct sk_buff *skb; 853 854 skb = alloc_skb(((zcopy ? 0 : size) + TX_TLSHDR_LEN + 855 KEY_ON_MEM_SZ + max_ivs_size(sk, size)), 856 sk->sk_allocation); 857 if (likely(skb)) { 858 skb_reserve(skb, (TX_TLSHDR_LEN + 859 KEY_ON_MEM_SZ + max_ivs_size(sk, size))); 860 skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR); 861 skb_reset_transport_header(skb); 862 ULP_SKB_CB(skb)->ulp.tls.ofld = 1; 863 ULP_SKB_CB(skb)->ulp.tls.type = csk->tlshws.type; 864 } 865 return skb; 866 } 867 868 static void tx_skb_finalize(struct sk_buff *skb) 869 { 870 struct ulp_skb_cb *cb = ULP_SKB_CB(skb); 871 872 if (!(cb->flags & ULPCB_FLAG_NO_HDR)) 873 cb->flags = ULPCB_FLAG_NEED_HDR; 874 cb->flags |= ULPCB_FLAG_NO_APPEND; 875 } 876 877 static void push_frames_if_head(struct sock *sk) 878 { 879 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 880 881 if (skb_queue_len(&csk->txq) == 1) 882 chtls_push_frames(csk, 1); 883 } 884 885 static int chtls_skb_copy_to_page_nocache(struct sock *sk, 886 struct iov_iter *from, 887 struct sk_buff *skb, 888 struct page *page, 889 int off, int copy) 890 { 891 int err; 892 893 err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) + 894 off, copy, skb->len); 895 if (err) 896 return err; 897 898 skb->len += copy; 899 skb->data_len += copy; 900 skb->truesize += copy; 901 sk->sk_wmem_queued += copy; 902 return 0; 903 } 904 905 static bool csk_mem_free(struct chtls_dev *cdev, struct sock *sk) 906 { 907 return (cdev->max_host_sndbuf - sk->sk_wmem_queued > 0); 908 } 909 910 static int csk_wait_memory(struct chtls_dev *cdev, 911 struct sock *sk, long *timeo_p) 912 { 913 DEFINE_WAIT_FUNC(wait, woken_wake_function); 914 int ret, err = 0; 915 long current_timeo; 916 long vm_wait = 0; 917 bool noblock; 918 919 current_timeo = *timeo_p; 920 noblock = (*timeo_p ? false : true); 921 if (csk_mem_free(cdev, sk)) { 922 current_timeo = get_random_u32_below(HZ / 5) + 2; 923 vm_wait = get_random_u32_below(HZ / 5) + 2; 924 } 925 926 add_wait_queue(sk_sleep(sk), &wait); 927 while (1) { 928 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 929 930 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 931 goto do_error; 932 if (!*timeo_p) { 933 if (noblock) 934 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 935 goto do_nonblock; 936 } 937 if (signal_pending(current)) 938 goto do_interrupted; 939 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 940 if (csk_mem_free(cdev, sk) && !vm_wait) 941 break; 942 943 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 944 sk->sk_write_pending++; 945 ret = sk_wait_event(sk, ¤t_timeo, sk->sk_err || 946 (sk->sk_shutdown & SEND_SHUTDOWN) || 947 (csk_mem_free(cdev, sk) && !vm_wait), 948 &wait); 949 sk->sk_write_pending--; 950 if (ret < 0) 951 goto do_error; 952 953 if (vm_wait) { 954 vm_wait -= current_timeo; 955 current_timeo = *timeo_p; 956 if (current_timeo != MAX_SCHEDULE_TIMEOUT) { 957 current_timeo -= vm_wait; 958 if (current_timeo < 0) 959 current_timeo = 0; 960 } 961 vm_wait = 0; 962 } 963 *timeo_p = current_timeo; 964 } 965 do_rm_wq: 966 remove_wait_queue(sk_sleep(sk), &wait); 967 return err; 968 do_error: 969 err = -EPIPE; 970 goto do_rm_wq; 971 do_nonblock: 972 err = -EAGAIN; 973 goto do_rm_wq; 974 do_interrupted: 975 err = sock_intr_errno(*timeo_p); 976 goto do_rm_wq; 977 } 978 979 static int chtls_proccess_cmsg(struct sock *sk, struct msghdr *msg, 980 unsigned char *record_type) 981 { 982 struct cmsghdr *cmsg; 983 int rc = -EINVAL; 984 985 for_each_cmsghdr(cmsg, msg) { 986 if (!CMSG_OK(msg, cmsg)) 987 return -EINVAL; 988 if (cmsg->cmsg_level != SOL_TLS) 989 continue; 990 991 switch (cmsg->cmsg_type) { 992 case TLS_SET_RECORD_TYPE: 993 if (cmsg->cmsg_len < CMSG_LEN(sizeof(*record_type))) 994 return -EINVAL; 995 996 if (msg->msg_flags & MSG_MORE) 997 return -EINVAL; 998 999 *record_type = *(unsigned char *)CMSG_DATA(cmsg); 1000 rc = 0; 1001 break; 1002 default: 1003 return -EINVAL; 1004 } 1005 } 1006 1007 return rc; 1008 } 1009 1010 int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) 1011 { 1012 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 1013 struct chtls_dev *cdev = csk->cdev; 1014 struct tcp_sock *tp = tcp_sk(sk); 1015 struct sk_buff *skb; 1016 int mss, flags, err; 1017 int recordsz = 0; 1018 int copied = 0; 1019 long timeo; 1020 1021 lock_sock(sk); 1022 flags = msg->msg_flags; 1023 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 1024 1025 if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { 1026 err = sk_stream_wait_connect(sk, &timeo); 1027 if (err) 1028 goto out_err; 1029 } 1030 1031 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1032 err = -EPIPE; 1033 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 1034 goto out_err; 1035 1036 mss = csk->mss; 1037 csk_set_flag(csk, CSK_TX_MORE_DATA); 1038 1039 while (msg_data_left(msg)) { 1040 int copy = 0; 1041 1042 skb = skb_peek_tail(&csk->txq); 1043 if (skb) { 1044 copy = mss - skb->len; 1045 skb->ip_summed = CHECKSUM_UNNECESSARY; 1046 } 1047 if (!csk_mem_free(cdev, sk)) 1048 goto wait_for_sndbuf; 1049 1050 if (is_tls_tx(csk) && !csk->tlshws.txleft) { 1051 unsigned char record_type = TLS_RECORD_TYPE_DATA; 1052 1053 if (unlikely(msg->msg_controllen)) { 1054 err = chtls_proccess_cmsg(sk, msg, 1055 &record_type); 1056 if (err) 1057 goto out_err; 1058 1059 /* Avoid appending tls handshake, alert to tls data */ 1060 if (skb) 1061 tx_skb_finalize(skb); 1062 } 1063 1064 recordsz = size; 1065 csk->tlshws.txleft = recordsz; 1066 csk->tlshws.type = record_type; 1067 } 1068 1069 if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || 1070 copy <= 0) { 1071 new_buf: 1072 if (skb) { 1073 tx_skb_finalize(skb); 1074 push_frames_if_head(sk); 1075 } 1076 1077 if (is_tls_tx(csk)) { 1078 skb = get_record_skb(sk, 1079 select_size(sk, 1080 recordsz, 1081 flags, 1082 TX_TLSHDR_LEN), 1083 false); 1084 } else { 1085 skb = get_tx_skb(sk, 1086 select_size(sk, size, flags, 1087 TX_HEADER_LEN)); 1088 } 1089 if (unlikely(!skb)) 1090 goto wait_for_memory; 1091 1092 skb->ip_summed = CHECKSUM_UNNECESSARY; 1093 copy = mss; 1094 } 1095 if (copy > size) 1096 copy = size; 1097 1098 if (msg->msg_flags & MSG_SPLICE_PAGES) { 1099 err = skb_splice_from_iter(skb, &msg->msg_iter, copy, 1100 sk->sk_allocation); 1101 if (err < 0) { 1102 if (err == -EMSGSIZE) 1103 goto new_buf; 1104 goto do_fault; 1105 } 1106 copy = err; 1107 sk_wmem_queued_add(sk, copy); 1108 } else if (skb_tailroom(skb) > 0) { 1109 copy = min(copy, skb_tailroom(skb)); 1110 if (is_tls_tx(csk)) 1111 copy = min_t(int, copy, csk->tlshws.txleft); 1112 err = skb_add_data_nocache(sk, skb, 1113 &msg->msg_iter, copy); 1114 if (err) 1115 goto do_fault; 1116 } else { 1117 int i = skb_shinfo(skb)->nr_frags; 1118 struct page *page = TCP_PAGE(sk); 1119 int pg_size = PAGE_SIZE; 1120 int off = TCP_OFF(sk); 1121 bool merge; 1122 1123 if (page) 1124 pg_size = page_size(page); 1125 if (off < pg_size && 1126 skb_can_coalesce(skb, i, page, off)) { 1127 merge = true; 1128 goto copy; 1129 } 1130 merge = false; 1131 if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) : 1132 MAX_SKB_FRAGS)) 1133 goto new_buf; 1134 1135 if (page && off == pg_size) { 1136 put_page(page); 1137 TCP_PAGE(sk) = page = NULL; 1138 pg_size = PAGE_SIZE; 1139 } 1140 1141 if (!page) { 1142 gfp_t gfp = sk->sk_allocation; 1143 int order = cdev->send_page_order; 1144 1145 if (order) { 1146 page = alloc_pages(gfp | __GFP_COMP | 1147 __GFP_NOWARN | 1148 __GFP_NORETRY, 1149 order); 1150 if (page) 1151 pg_size <<= order; 1152 } 1153 if (!page) { 1154 page = alloc_page(gfp); 1155 pg_size = PAGE_SIZE; 1156 } 1157 if (!page) 1158 goto wait_for_memory; 1159 off = 0; 1160 } 1161 copy: 1162 if (copy > pg_size - off) 1163 copy = pg_size - off; 1164 if (is_tls_tx(csk)) 1165 copy = min_t(int, copy, csk->tlshws.txleft); 1166 1167 err = chtls_skb_copy_to_page_nocache(sk, &msg->msg_iter, 1168 skb, page, 1169 off, copy); 1170 if (unlikely(err)) { 1171 if (!TCP_PAGE(sk)) { 1172 TCP_PAGE(sk) = page; 1173 TCP_OFF(sk) = 0; 1174 } 1175 goto do_fault; 1176 } 1177 /* Update the skb. */ 1178 if (merge) { 1179 skb_frag_size_add( 1180 &skb_shinfo(skb)->frags[i - 1], 1181 copy); 1182 } else { 1183 skb_fill_page_desc(skb, i, page, off, copy); 1184 if (off + copy < pg_size) { 1185 /* space left keep page */ 1186 get_page(page); 1187 TCP_PAGE(sk) = page; 1188 } else { 1189 TCP_PAGE(sk) = NULL; 1190 } 1191 } 1192 TCP_OFF(sk) = off + copy; 1193 } 1194 if (unlikely(skb->len == mss)) 1195 tx_skb_finalize(skb); 1196 tp->write_seq += copy; 1197 copied += copy; 1198 size -= copy; 1199 1200 if (is_tls_tx(csk)) 1201 csk->tlshws.txleft -= copy; 1202 1203 if (corked(tp, flags) && 1204 (sk_stream_wspace(sk) < sk_stream_min_wspace(sk))) 1205 ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND; 1206 1207 if (size == 0) 1208 goto out; 1209 1210 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) 1211 push_frames_if_head(sk); 1212 continue; 1213 wait_for_sndbuf: 1214 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1215 wait_for_memory: 1216 err = csk_wait_memory(cdev, sk, &timeo); 1217 if (err) 1218 goto do_error; 1219 } 1220 out: 1221 csk_reset_flag(csk, CSK_TX_MORE_DATA); 1222 if (copied) 1223 chtls_tcp_push(sk, flags); 1224 done: 1225 release_sock(sk); 1226 return copied; 1227 do_fault: 1228 if (!skb->len) { 1229 __skb_unlink(skb, &csk->txq); 1230 sk->sk_wmem_queued -= skb->truesize; 1231 __kfree_skb(skb); 1232 } 1233 do_error: 1234 if (copied) 1235 goto out; 1236 out_err: 1237 if (csk_conn_inline(csk)) 1238 csk_reset_flag(csk, CSK_TX_MORE_DATA); 1239 copied = sk_stream_error(sk, flags, err); 1240 goto done; 1241 } 1242 1243 void chtls_splice_eof(struct socket *sock) 1244 { 1245 struct sock *sk = sock->sk; 1246 1247 lock_sock(sk); 1248 chtls_tcp_push(sk, 0); 1249 release_sock(sk); 1250 } 1251 1252 static void chtls_select_window(struct sock *sk) 1253 { 1254 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 1255 struct tcp_sock *tp = tcp_sk(sk); 1256 unsigned int wnd = tp->rcv_wnd; 1257 1258 wnd = max_t(unsigned int, wnd, tcp_full_space(sk)); 1259 wnd = max_t(unsigned int, MIN_RCV_WND, wnd); 1260 1261 if (wnd > MAX_RCV_WND) 1262 wnd = MAX_RCV_WND; 1263 1264 /* 1265 * Check if we need to grow the receive window in response to an increase in 1266 * the socket's receive buffer size. Some applications increase the buffer 1267 * size dynamically and rely on the window to grow accordingly. 1268 */ 1269 1270 if (wnd > tp->rcv_wnd) { 1271 tp->rcv_wup -= wnd - tp->rcv_wnd; 1272 tp->rcv_wnd = wnd; 1273 /* Mark the receive window as updated */ 1274 csk_reset_flag(csk, CSK_UPDATE_RCV_WND); 1275 } 1276 } 1277 1278 /* 1279 * Send RX credits through an RX_DATA_ACK CPL message. We are permitted 1280 * to return without sending the message in case we cannot allocate 1281 * an sk_buff. Returns the number of credits sent. 1282 */ 1283 static u32 send_rx_credits(struct chtls_sock *csk, u32 credits) 1284 { 1285 struct cpl_rx_data_ack *req; 1286 struct sk_buff *skb; 1287 1288 skb = alloc_skb(sizeof(*req), GFP_ATOMIC); 1289 if (!skb) 1290 return 0; 1291 __skb_put(skb, sizeof(*req)); 1292 req = (struct cpl_rx_data_ack *)skb->head; 1293 1294 set_wr_txq(skb, CPL_PRIORITY_ACK, csk->port_id); 1295 INIT_TP_WR(req, csk->tid); 1296 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, 1297 csk->tid)); 1298 req->credit_dack = cpu_to_be32(RX_CREDITS_V(credits) | 1299 RX_FORCE_ACK_F); 1300 cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb); 1301 return credits; 1302 } 1303 1304 #define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | \ 1305 TCPF_FIN_WAIT1 | \ 1306 TCPF_FIN_WAIT2) 1307 1308 /* 1309 * Called after some received data has been read. It returns RX credits 1310 * to the HW for the amount of data processed. 1311 */ 1312 static void chtls_cleanup_rbuf(struct sock *sk, int copied) 1313 { 1314 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 1315 struct tcp_sock *tp; 1316 int must_send; 1317 u32 credits; 1318 u32 thres; 1319 1320 thres = 15 * 1024; 1321 1322 if (!sk_in_state(sk, CREDIT_RETURN_STATE)) 1323 return; 1324 1325 chtls_select_window(sk); 1326 tp = tcp_sk(sk); 1327 credits = tp->copied_seq - tp->rcv_wup; 1328 if (unlikely(!credits)) 1329 return; 1330 1331 /* 1332 * For coalescing to work effectively ensure the receive window has 1333 * at least 16KB left. 1334 */ 1335 must_send = credits + 16384 >= tp->rcv_wnd; 1336 1337 if (must_send || credits >= thres) 1338 tp->rcv_wup += send_rx_credits(csk, credits); 1339 } 1340 1341 static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 1342 int flags, int *addr_len) 1343 { 1344 struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); 1345 struct chtls_hws *hws = &csk->tlshws; 1346 struct net_device *dev = csk->egress_dev; 1347 struct adapter *adap = netdev2adap(dev); 1348 struct tcp_sock *tp = tcp_sk(sk); 1349 unsigned long avail; 1350 int buffers_freed; 1351 int copied = 0; 1352 int target; 1353 long timeo; 1354 int ret; 1355 1356 buffers_freed = 0; 1357 1358 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1359 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); 1360 1361 if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND))) 1362 chtls_cleanup_rbuf(sk, copied); 1363 1364 do { 1365 struct sk_buff *skb; 1366 u32 offset = 0; 1367 1368 if (unlikely(tp->urg_data && 1369 tp->urg_seq == tp->copied_seq)) { 1370 if (copied) 1371 break; 1372 if (signal_pending(current)) { 1373 copied = timeo ? sock_intr_errno(timeo) : 1374 -EAGAIN; 1375 break; 1376 } 1377 } 1378 skb = skb_peek(&sk->sk_receive_queue); 1379 if (skb) 1380 goto found_ok_skb; 1381 if (csk->wr_credits && 1382 skb_queue_len(&csk->txq) && 1383 chtls_push_frames(csk, csk->wr_credits == 1384 csk->wr_max_credits)) 1385 sk->sk_write_space(sk); 1386 1387 if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) 1388 break; 1389 1390 if (copied) { 1391 if (sk->sk_err || sk->sk_state == TCP_CLOSE || 1392 (sk->sk_shutdown & RCV_SHUTDOWN) || 1393 signal_pending(current)) 1394 break; 1395 1396 if (!timeo) 1397 break; 1398 } else { 1399 if (sock_flag(sk, SOCK_DONE)) 1400 break; 1401 if (sk->sk_err) { 1402 copied = sock_error(sk); 1403 break; 1404 } 1405 if (sk->sk_shutdown & RCV_SHUTDOWN) 1406 break; 1407 if (sk->sk_state == TCP_CLOSE) { 1408 copied = -ENOTCONN; 1409 break; 1410 } 1411 if (!timeo) { 1412 copied = -EAGAIN; 1413 break; 1414 } 1415 if (signal_pending(current)) { 1416 copied = sock_intr_errno(timeo); 1417 break; 1418 } 1419 } 1420 if (READ_ONCE(sk->sk_backlog.tail)) { 1421 release_sock(sk); 1422 lock_sock(sk); 1423 chtls_cleanup_rbuf(sk, copied); 1424 continue; 1425 } 1426 1427 if (copied >= target) 1428 break; 1429 chtls_cleanup_rbuf(sk, copied); 1430 ret = sk_wait_data(sk, &timeo, NULL); 1431 if (ret < 0) { 1432 copied = copied ? : ret; 1433 goto unlock; 1434 } 1435 continue; 1436 found_ok_skb: 1437 if (!skb->len) { 1438 skb_dst_set(skb, NULL); 1439 __skb_unlink(skb, &sk->sk_receive_queue); 1440 kfree_skb(skb); 1441 1442 if (!copied && !timeo) { 1443 copied = -EAGAIN; 1444 break; 1445 } 1446 1447 if (copied < target) { 1448 release_sock(sk); 1449 lock_sock(sk); 1450 continue; 1451 } 1452 break; 1453 } 1454 offset = hws->copied_seq; 1455 avail = skb->len - offset; 1456 if (len < avail) 1457 avail = len; 1458 1459 if (unlikely(tp->urg_data)) { 1460 u32 urg_offset = tp->urg_seq - tp->copied_seq; 1461 1462 if (urg_offset < avail) { 1463 if (urg_offset) { 1464 avail = urg_offset; 1465 } else if (!sock_flag(sk, SOCK_URGINLINE)) { 1466 /* First byte is urgent, skip */ 1467 tp->copied_seq++; 1468 offset++; 1469 avail--; 1470 if (!avail) 1471 goto skip_copy; 1472 } 1473 } 1474 } 1475 /* Set record type if not already done. For a non-data record, 1476 * do not proceed if record type could not be copied. 1477 */ 1478 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) { 1479 struct tls_hdr *thdr = (struct tls_hdr *)skb->data; 1480 int cerr = 0; 1481 1482 cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, 1483 sizeof(thdr->type), &thdr->type); 1484 1485 if (cerr && thdr->type != TLS_RECORD_TYPE_DATA) { 1486 copied = -EIO; 1487 break; 1488 } 1489 /* don't send tls header, skip copy */ 1490 goto skip_copy; 1491 } 1492 1493 if (skb_copy_datagram_msg(skb, offset, msg, avail)) { 1494 if (!copied) { 1495 copied = -EFAULT; 1496 break; 1497 } 1498 } 1499 1500 copied += avail; 1501 len -= avail; 1502 hws->copied_seq += avail; 1503 skip_copy: 1504 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) 1505 tp->urg_data = 0; 1506 1507 if ((avail + offset) >= skb->len) { 1508 struct sk_buff *next_skb; 1509 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) { 1510 tp->copied_seq += skb->len; 1511 hws->rcvpld = skb->hdr_len; 1512 } else { 1513 atomic_inc(&adap->chcr_stats.tls_pdu_rx); 1514 tp->copied_seq += hws->rcvpld; 1515 } 1516 chtls_free_skb(sk, skb); 1517 buffers_freed++; 1518 hws->copied_seq = 0; 1519 next_skb = skb_peek(&sk->sk_receive_queue); 1520 if (copied >= target && !next_skb) 1521 break; 1522 if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR) 1523 break; 1524 } 1525 } while (len > 0); 1526 1527 if (buffers_freed) 1528 chtls_cleanup_rbuf(sk, copied); 1529 1530 unlock: 1531 release_sock(sk); 1532 return copied; 1533 } 1534 1535 /* 1536 * Peek at data in a socket's receive buffer. 1537 */ 1538 static int peekmsg(struct sock *sk, struct msghdr *msg, 1539 size_t len, int flags) 1540 { 1541 struct tcp_sock *tp = tcp_sk(sk); 1542 u32 peek_seq, offset; 1543 struct sk_buff *skb; 1544 int copied = 0; 1545 size_t avail; /* amount of available data in current skb */ 1546 long timeo; 1547 int ret; 1548 1549 lock_sock(sk); 1550 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1551 peek_seq = tp->copied_seq; 1552 1553 do { 1554 if (unlikely(tp->urg_data && tp->urg_seq == peek_seq)) { 1555 if (copied) 1556 break; 1557 if (signal_pending(current)) { 1558 copied = timeo ? sock_intr_errno(timeo) : 1559 -EAGAIN; 1560 break; 1561 } 1562 } 1563 1564 skb_queue_walk(&sk->sk_receive_queue, skb) { 1565 offset = peek_seq - ULP_SKB_CB(skb)->seq; 1566 if (offset < skb->len) 1567 goto found_ok_skb; 1568 } 1569 1570 /* empty receive queue */ 1571 if (copied) 1572 break; 1573 if (sock_flag(sk, SOCK_DONE)) 1574 break; 1575 if (sk->sk_err) { 1576 copied = sock_error(sk); 1577 break; 1578 } 1579 if (sk->sk_shutdown & RCV_SHUTDOWN) 1580 break; 1581 if (sk->sk_state == TCP_CLOSE) { 1582 copied = -ENOTCONN; 1583 break; 1584 } 1585 if (!timeo) { 1586 copied = -EAGAIN; 1587 break; 1588 } 1589 if (signal_pending(current)) { 1590 copied = sock_intr_errno(timeo); 1591 break; 1592 } 1593 1594 if (READ_ONCE(sk->sk_backlog.tail)) { 1595 /* Do not sleep, just process backlog. */ 1596 release_sock(sk); 1597 lock_sock(sk); 1598 } else { 1599 ret = sk_wait_data(sk, &timeo, NULL); 1600 if (ret < 0) { 1601 /* here 'copied' is 0 due to previous checks */ 1602 copied = ret; 1603 break; 1604 } 1605 } 1606 1607 if (unlikely(peek_seq != tp->copied_seq)) { 1608 if (net_ratelimit()) 1609 pr_info("TCP(%s:%d), race in MSG_PEEK.\n", 1610 current->comm, current->pid); 1611 peek_seq = tp->copied_seq; 1612 } 1613 continue; 1614 1615 found_ok_skb: 1616 avail = skb->len - offset; 1617 if (len < avail) 1618 avail = len; 1619 /* 1620 * Do we have urgent data here? We need to skip over the 1621 * urgent byte. 1622 */ 1623 if (unlikely(tp->urg_data)) { 1624 u32 urg_offset = tp->urg_seq - peek_seq; 1625 1626 if (urg_offset < avail) { 1627 /* 1628 * The amount of data we are preparing to copy 1629 * contains urgent data. 1630 */ 1631 if (!urg_offset) { /* First byte is urgent */ 1632 if (!sock_flag(sk, SOCK_URGINLINE)) { 1633 peek_seq++; 1634 offset++; 1635 avail--; 1636 } 1637 if (!avail) 1638 continue; 1639 } else { 1640 /* stop short of the urgent data */ 1641 avail = urg_offset; 1642 } 1643 } 1644 } 1645 1646 /* 1647 * If MSG_TRUNC is specified the data is discarded. 1648 */ 1649 if (likely(!(flags & MSG_TRUNC))) 1650 if (skb_copy_datagram_msg(skb, offset, msg, len)) { 1651 if (!copied) { 1652 copied = -EFAULT; 1653 break; 1654 } 1655 } 1656 peek_seq += avail; 1657 copied += avail; 1658 len -= avail; 1659 } while (len > 0); 1660 1661 release_sock(sk); 1662 return copied; 1663 } 1664 1665 int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 1666 int flags, int *addr_len) 1667 { 1668 struct tcp_sock *tp = tcp_sk(sk); 1669 struct chtls_sock *csk; 1670 unsigned long avail; /* amount of available data in current skb */ 1671 int buffers_freed; 1672 int copied = 0; 1673 long timeo; 1674 int target; /* Read at least this many bytes */ 1675 int ret; 1676 1677 buffers_freed = 0; 1678 1679 if (unlikely(flags & MSG_OOB)) 1680 return tcp_prot.recvmsg(sk, msg, len, flags, addr_len); 1681 1682 if (unlikely(flags & MSG_PEEK)) 1683 return peekmsg(sk, msg, len, flags); 1684 1685 if (sk_can_busy_loop(sk) && 1686 skb_queue_empty_lockless(&sk->sk_receive_queue) && 1687 sk->sk_state == TCP_ESTABLISHED) 1688 sk_busy_loop(sk, flags & MSG_DONTWAIT); 1689 1690 lock_sock(sk); 1691 csk = rcu_dereference_sk_user_data(sk); 1692 1693 if (is_tls_rx(csk)) 1694 return chtls_pt_recvmsg(sk, msg, len, flags, addr_len); 1695 1696 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1697 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); 1698 1699 if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND))) 1700 chtls_cleanup_rbuf(sk, copied); 1701 1702 do { 1703 struct sk_buff *skb; 1704 u32 offset; 1705 1706 if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) { 1707 if (copied) 1708 break; 1709 if (signal_pending(current)) { 1710 copied = timeo ? sock_intr_errno(timeo) : 1711 -EAGAIN; 1712 break; 1713 } 1714 } 1715 1716 skb = skb_peek(&sk->sk_receive_queue); 1717 if (skb) 1718 goto found_ok_skb; 1719 1720 if (csk->wr_credits && 1721 skb_queue_len(&csk->txq) && 1722 chtls_push_frames(csk, csk->wr_credits == 1723 csk->wr_max_credits)) 1724 sk->sk_write_space(sk); 1725 1726 if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) 1727 break; 1728 1729 if (copied) { 1730 if (sk->sk_err || sk->sk_state == TCP_CLOSE || 1731 (sk->sk_shutdown & RCV_SHUTDOWN) || 1732 signal_pending(current)) 1733 break; 1734 } else { 1735 if (sock_flag(sk, SOCK_DONE)) 1736 break; 1737 if (sk->sk_err) { 1738 copied = sock_error(sk); 1739 break; 1740 } 1741 if (sk->sk_shutdown & RCV_SHUTDOWN) 1742 break; 1743 if (sk->sk_state == TCP_CLOSE) { 1744 copied = -ENOTCONN; 1745 break; 1746 } 1747 if (!timeo) { 1748 copied = -EAGAIN; 1749 break; 1750 } 1751 if (signal_pending(current)) { 1752 copied = sock_intr_errno(timeo); 1753 break; 1754 } 1755 } 1756 1757 if (READ_ONCE(sk->sk_backlog.tail)) { 1758 release_sock(sk); 1759 lock_sock(sk); 1760 chtls_cleanup_rbuf(sk, copied); 1761 continue; 1762 } 1763 1764 if (copied >= target) 1765 break; 1766 chtls_cleanup_rbuf(sk, copied); 1767 ret = sk_wait_data(sk, &timeo, NULL); 1768 if (ret < 0) { 1769 copied = copied ? : ret; 1770 goto unlock; 1771 } 1772 continue; 1773 1774 found_ok_skb: 1775 if (!skb->len) { 1776 chtls_kfree_skb(sk, skb); 1777 if (!copied && !timeo) { 1778 copied = -EAGAIN; 1779 break; 1780 } 1781 1782 if (copied < target) 1783 continue; 1784 1785 break; 1786 } 1787 1788 offset = tp->copied_seq - ULP_SKB_CB(skb)->seq; 1789 avail = skb->len - offset; 1790 if (len < avail) 1791 avail = len; 1792 1793 if (unlikely(tp->urg_data)) { 1794 u32 urg_offset = tp->urg_seq - tp->copied_seq; 1795 1796 if (urg_offset < avail) { 1797 if (urg_offset) { 1798 avail = urg_offset; 1799 } else if (!sock_flag(sk, SOCK_URGINLINE)) { 1800 tp->copied_seq++; 1801 offset++; 1802 avail--; 1803 if (!avail) 1804 goto skip_copy; 1805 } 1806 } 1807 } 1808 1809 if (likely(!(flags & MSG_TRUNC))) { 1810 if (skb_copy_datagram_msg(skb, offset, 1811 msg, avail)) { 1812 if (!copied) { 1813 copied = -EFAULT; 1814 break; 1815 } 1816 } 1817 } 1818 1819 tp->copied_seq += avail; 1820 copied += avail; 1821 len -= avail; 1822 1823 skip_copy: 1824 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) 1825 tp->urg_data = 0; 1826 1827 if (avail + offset >= skb->len) { 1828 chtls_free_skb(sk, skb); 1829 buffers_freed++; 1830 1831 if (copied >= target && 1832 !skb_peek(&sk->sk_receive_queue)) 1833 break; 1834 } 1835 } while (len > 0); 1836 1837 if (buffers_freed) 1838 chtls_cleanup_rbuf(sk, copied); 1839 1840 unlock: 1841 release_sock(sk); 1842 return copied; 1843 } 1844