1 /*- 2 * Copyright (c) 2016-2018 Netflix, Inc. 3 * Copyright (c) 2016-2021 Mellanox Technologies. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 */ 27 #include <sys/cdefs.h> 28 #include "opt_inet.h" 29 #include "opt_inet6.h" 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/malloc.h> 35 #include <sys/mbuf.h> 36 #include <sys/socket.h> 37 #include <sys/socketvar.h> 38 #include <sys/sysctl.h> 39 40 #include <net/if.h> 41 #include <net/if_var.h> 42 #include <net/if_private.h> 43 #include <net/ethernet.h> 44 #include <net/bpf.h> 45 #include <net/vnet.h> 46 #include <net/if_dl.h> 47 #include <net/if_media.h> 48 #include <net/if_types.h> 49 #include <net/infiniband.h> 50 #include <net/if_lagg.h> 51 #include <net/pfil.h> 52 53 #include <netinet/in.h> 54 #include <netinet/in_kdtrace.h> 55 #include <netinet/ip6.h> 56 #include <netinet/ip.h> 57 #include <netinet/ip_var.h> 58 #include <netinet/in_pcb.h> 59 #include <netinet6/in6_pcb.h> 60 #include <netinet6/ip6_var.h> 61 #include <netinet/tcp.h> 62 #include <netinet/tcp_lro.h> 63 #include <netinet/tcp_var.h> 64 #include <netinet/tcp_hpts.h> 65 #include <netinet/tcp_log_buf.h> 66 67 static void 68 build_ack_entry(struct tcp_ackent *ae, struct tcphdr *th, struct mbuf *m, 69 uint32_t *ts_ptr, uint16_t iptos) 70 { 71 /* 72 * Given a TCP ACK, summarize it down into the small TCP ACK 73 * entry. 74 */ 75 ae->timestamp = m->m_pkthdr.rcv_tstmp; 76 ae->flags = 0; 77 if (m->m_flags & M_TSTMP_LRO) 78 ae->flags |= TSTMP_LRO; 79 else if (m->m_flags & M_TSTMP) 80 ae->flags |= TSTMP_HDWR; 81 ae->seq = th->th_seq; 82 ae->ack = th->th_ack; 83 ae->flags |= tcp_get_flags(th); 84 if (ts_ptr != NULL) { 85 ae->ts_value = ntohl(ts_ptr[1]); 86 ae->ts_echo = ntohl(ts_ptr[2]); 87 ae->flags |= HAS_TSTMP; 88 } 89 ae->win = th->th_win; 90 ae->codepoint = iptos; 91 } 92 93 static inline bool 94 tcp_lro_ack_valid(struct mbuf *m, struct tcphdr *th, uint32_t **ppts, bool *other_opts) 95 { 96 /* 97 * This function returns two bits of valuable information. 98 * a) Is what is present capable of being ack-compressed, 99 * we can ack-compress if there is no options or just 100 * a timestamp option, and of course the th_flags must 101 * be correct as well. 102 * b) Our other options present such as SACK. This is 103 * used to determine if we want to wakeup or not. 104 */ 105 bool ret = true; 106 107 switch (th->th_off << 2) { 108 case (sizeof(*th) + TCPOLEN_TSTAMP_APPA): 109 *ppts = (uint32_t *)(th + 1); 110 /* Check if we have only one timestamp option. */ 111 if (**ppts == TCP_LRO_TS_OPTION) 112 *other_opts = false; 113 else { 114 *other_opts = true; 115 ret = false; 116 } 117 break; 118 case (sizeof(*th)): 119 /* No options. */ 120 *ppts = NULL; 121 *other_opts = false; 122 break; 123 default: 124 *ppts = NULL; 125 *other_opts = true; 126 ret = false; 127 break; 128 } 129 /* For ACKCMP we only accept ACK, PUSH, ECE and CWR. */ 130 if ((tcp_get_flags(th) & ~(TH_ACK | TH_PUSH | TH_ECE | TH_CWR)) != 0) 131 ret = false; 132 /* If it has data on it we cannot compress it */ 133 if (m->m_pkthdr.lro_tcp_d_len) 134 ret = false; 135 136 /* ACK flag must be set. */ 137 if (!(tcp_get_flags(th) & TH_ACK)) 138 ret = false; 139 return (ret); 140 } 141 142 static bool 143 tcp_lro_check_wake_status(struct tcpcb *tp) 144 { 145 146 if (tp->t_fb->tfb_early_wake_check != NULL) 147 return ((tp->t_fb->tfb_early_wake_check)(tp)); 148 return (false); 149 } 150 151 static void 152 tcp_lro_log(struct tcpcb *tp, const struct lro_ctrl *lc, 153 const struct lro_entry *le, const struct mbuf *m, 154 int frm, int32_t tcp_data_len, uint32_t th_seq, 155 uint32_t th_ack, uint16_t th_win) 156 { 157 if (tcp_bblogging_on(tp)) { 158 union tcp_log_stackspecific log; 159 struct timeval tv, btv; 160 uint32_t cts; 161 162 cts = tcp_get_usecs(&tv); 163 memset(&log, 0, sizeof(union tcp_log_stackspecific)); 164 log.u_bbr.flex8 = frm; 165 log.u_bbr.flex1 = tcp_data_len; 166 if (m) 167 log.u_bbr.flex2 = m->m_pkthdr.len; 168 else 169 log.u_bbr.flex2 = 0; 170 if (le->m_head) { 171 log.u_bbr.flex3 = le->m_head->m_pkthdr.lro_nsegs; 172 log.u_bbr.flex4 = le->m_head->m_pkthdr.lro_tcp_d_len; 173 log.u_bbr.flex5 = le->m_head->m_pkthdr.len; 174 log.u_bbr.delRate = le->m_head->m_flags; 175 log.u_bbr.rttProp = le->m_head->m_pkthdr.rcv_tstmp; 176 } 177 log.u_bbr.inflight = th_seq; 178 log.u_bbr.delivered = th_ack; 179 log.u_bbr.timeStamp = cts; 180 log.u_bbr.epoch = le->next_seq; 181 log.u_bbr.lt_epoch = le->ack_seq; 182 log.u_bbr.pacing_gain = th_win; 183 log.u_bbr.cwnd_gain = le->window; 184 log.u_bbr.lost = curcpu; 185 log.u_bbr.cur_del_rate = (uintptr_t)m; 186 log.u_bbr.bw_inuse = (uintptr_t)le->m_head; 187 bintime2timeval(&lc->lro_last_queue_time, &btv); 188 log.u_bbr.flex6 = tcp_tv_to_usectick(&btv); 189 log.u_bbr.flex7 = le->compressed; 190 log.u_bbr.pacing_gain = le->uncompressed; 191 if (in_epoch(net_epoch_preempt)) 192 log.u_bbr.inhpts = 1; 193 else 194 log.u_bbr.inhpts = 0; 195 TCP_LOG_EVENTP(tp, NULL, &tptosocket(tp)->so_rcv, 196 &tptosocket(tp)->so_snd, 197 TCP_LOG_LRO, 0, 0, &log, false, &tv); 198 } 199 } 200 201 static struct mbuf * 202 tcp_lro_get_last_if_ackcmp(struct lro_ctrl *lc, struct lro_entry *le, 203 struct tcpcb *tp, int32_t *new_m, bool can_append_old_cmp) 204 { 205 struct mbuf *m; 206 207 /* Look at the last mbuf if any in queue */ 208 if (can_append_old_cmp) { 209 m = STAILQ_LAST(&tp->t_inqueue, mbuf, m_stailqpkt); 210 if (m != NULL && (m->m_flags & M_ACKCMP) != 0) { 211 if (M_TRAILINGSPACE(m) >= sizeof(struct tcp_ackent)) { 212 tcp_lro_log(tp, lc, le, NULL, 23, 0, 0, 0, 0); 213 *new_m = 0; 214 counter_u64_add(tcp_extra_mbuf, 1); 215 return (m); 216 } else { 217 /* Mark we ran out of space */ 218 tp->t_flags2 |= TF2_MBUF_L_ACKS; 219 } 220 } 221 } 222 /* Decide mbuf size. */ 223 tcp_lro_log(tp, lc, le, NULL, 21, 0, 0, 0, 0); 224 if (tp->t_flags2 & TF2_MBUF_L_ACKS) 225 m = m_getcl(M_NOWAIT, MT_DATA, M_ACKCMP | M_PKTHDR); 226 else 227 m = m_gethdr(M_NOWAIT, MT_DATA); 228 229 if (__predict_false(m == NULL)) { 230 counter_u64_add(tcp_would_have_but, 1); 231 return (NULL); 232 } 233 counter_u64_add(tcp_comp_total, 1); 234 m->m_pkthdr.rcvif = lc->ifp; 235 m->m_flags |= M_ACKCMP; 236 *new_m = 1; 237 return (m); 238 } 239 240 /* 241 * Do BPF tap for either ACK_CMP packets or MBUF QUEUE type packets 242 * and strip all, but the IPv4/IPv6 header. 243 */ 244 static bool 245 do_bpf_strip_and_compress(struct tcpcb *tp, struct lro_ctrl *lc, 246 struct lro_entry *le, struct mbuf **pp, struct mbuf **cmp, 247 struct mbuf **mv_to, bool *should_wake, bool bpf_req, bool lagg_bpf_req, 248 struct ifnet *lagg_ifp, bool can_append_old_cmp) 249 { 250 union { 251 void *ptr; 252 struct ip *ip4; 253 struct ip6_hdr *ip6; 254 } l3; 255 struct mbuf *m; 256 struct mbuf *nm; 257 struct tcphdr *th; 258 struct tcp_ackent *ack_ent; 259 uint32_t *ts_ptr; 260 int32_t n_mbuf; 261 bool other_opts, can_compress; 262 uint8_t lro_type; 263 uint16_t iptos; 264 int tcp_hdr_offset; 265 int idx; 266 267 /* Get current mbuf. */ 268 m = *pp; 269 270 /* Let the BPF see the packet */ 271 if (__predict_false(bpf_req)) 272 ETHER_BPF_MTAP(lc->ifp, m); 273 274 if (__predict_false(lagg_bpf_req)) 275 ETHER_BPF_MTAP(lagg_ifp, m); 276 277 tcp_hdr_offset = m->m_pkthdr.lro_tcp_h_off; 278 lro_type = le->inner.data.lro_type; 279 switch (lro_type) { 280 case LRO_TYPE_NONE: 281 lro_type = le->outer.data.lro_type; 282 switch (lro_type) { 283 case LRO_TYPE_IPV4_TCP: 284 tcp_hdr_offset -= sizeof(*le->outer.ip4); 285 m->m_pkthdr.lro_etype = ETHERTYPE_IP; 286 IP_PROBE(receive, NULL, NULL, le->outer.ip4, lc->ifp, 287 le->outer.ip4, NULL); 288 break; 289 case LRO_TYPE_IPV6_TCP: 290 tcp_hdr_offset -= sizeof(*le->outer.ip6); 291 m->m_pkthdr.lro_etype = ETHERTYPE_IPV6; 292 IP_PROBE(receive, NULL, NULL, le->outer.ip6, lc->ifp, 293 NULL, le->outer.ip6); 294 break; 295 default: 296 goto compressed; 297 } 298 break; 299 case LRO_TYPE_IPV4_TCP: 300 switch (le->outer.data.lro_type) { 301 case LRO_TYPE_IPV4_UDP: 302 IP_PROBE(receive, NULL, NULL, le->outer.ip4, lc->ifp, 303 le->outer.ip4, NULL); 304 UDP_PROBE(receive, NULL, NULL, le->outer.ip4, NULL, 305 le->outer.udp); 306 break; 307 case LRO_TYPE_IPV6_UDP: 308 IP_PROBE(receive, NULL, NULL, le->outer.ip6, lc->ifp, 309 NULL, le->outer.ip6); 310 UDP_PROBE(receive, NULL, NULL, le->outer.ip6, NULL, 311 le->outer.udp); 312 break; 313 default: 314 __assert_unreachable(); 315 break; 316 } 317 tcp_hdr_offset -= sizeof(*le->outer.ip4); 318 m->m_pkthdr.lro_etype = ETHERTYPE_IP; 319 IP_PROBE(receive, NULL, NULL, le->inner.ip4, NULL, 320 le->inner.ip4, NULL); 321 break; 322 case LRO_TYPE_IPV6_TCP: 323 switch (le->outer.data.lro_type) { 324 case LRO_TYPE_IPV4_UDP: 325 IP_PROBE(receive, NULL, NULL, le->outer.ip4, lc->ifp, 326 le->outer.ip4, NULL); 327 UDP_PROBE(receive, NULL, NULL, le->outer.ip4, NULL, 328 le->outer.udp); 329 break; 330 case LRO_TYPE_IPV6_UDP: 331 IP_PROBE(receive, NULL, NULL, le->outer.ip6, lc->ifp, 332 NULL, le->outer.ip6); 333 UDP_PROBE(receive, NULL, NULL, le->outer.ip6, NULL, 334 le->outer.udp); 335 break; 336 default: 337 __assert_unreachable(); 338 break; 339 } 340 tcp_hdr_offset -= sizeof(*le->outer.ip6); 341 m->m_pkthdr.lro_etype = ETHERTYPE_IPV6; 342 IP_PROBE(receive, NULL, NULL, le->inner.ip6, NULL, NULL, 343 le->inner.ip6); 344 break; 345 default: 346 goto compressed; 347 } 348 349 MPASS(tcp_hdr_offset >= 0); 350 351 m_adj(m, tcp_hdr_offset); 352 m->m_flags |= M_LRO_EHDRSTRP; 353 m->m_flags &= ~M_ACKCMP; 354 m->m_pkthdr.lro_tcp_h_off -= tcp_hdr_offset; 355 356 th = tcp_lro_get_th(m); 357 358 th->th_sum = 0; /* TCP checksum is valid. */ 359 tcp_fields_to_host(th); 360 TCP_PROBE5(receive, NULL, tp, m, tp, th); 361 362 /* Check if ACK can be compressed */ 363 can_compress = tcp_lro_ack_valid(m, th, &ts_ptr, &other_opts); 364 365 /* Now lets look at the should wake states */ 366 if ((other_opts == true) && 367 ((tp->t_flags2 & TF2_DONT_SACK_QUEUE) == 0)) { 368 /* 369 * If there are other options (SACK?) and the 370 * tcp endpoint has not expressly told us it does 371 * not care about SACKS, then we should wake up. 372 */ 373 *should_wake = true; 374 } else if (*should_wake == false) { 375 /* Wakeup override check if we are false here */ 376 *should_wake = tcp_lro_check_wake_status(tp); 377 } 378 /* Is the ack compressable? */ 379 if (can_compress == false) 380 goto done; 381 /* Does the TCP endpoint support ACK compression? */ 382 if ((tp->t_flags2 & TF2_MBUF_ACKCMP) == 0) 383 goto done; 384 385 /* Lets get the TOS/traffic class field */ 386 l3.ptr = mtod(m, void *); 387 switch (lro_type) { 388 case LRO_TYPE_IPV4_TCP: 389 iptos = l3.ip4->ip_tos; 390 break; 391 case LRO_TYPE_IPV6_TCP: 392 iptos = IPV6_TRAFFIC_CLASS(l3.ip6); 393 break; 394 default: 395 iptos = 0; /* Keep compiler happy. */ 396 break; 397 } 398 /* Now lets get space if we don't have some already */ 399 if (*cmp == NULL) { 400 new_one: 401 nm = tcp_lro_get_last_if_ackcmp(lc, le, tp, &n_mbuf, 402 can_append_old_cmp); 403 if (__predict_false(nm == NULL)) 404 goto done; 405 *cmp = nm; 406 if (n_mbuf) { 407 /* 408 * Link in the new cmp ack to our in-order place, 409 * first set our cmp ack's next to where we are. 410 */ 411 nm->m_nextpkt = m; 412 (*pp) = nm; 413 /* 414 * Set it up so mv_to is advanced to our 415 * compressed ack. This way the caller can 416 * advance pp to the right place. 417 */ 418 *mv_to = nm; 419 /* 420 * Advance it here locally as well. 421 */ 422 pp = &nm->m_nextpkt; 423 } 424 } else { 425 /* We have one already we are working on */ 426 nm = *cmp; 427 if (M_TRAILINGSPACE(nm) < sizeof(struct tcp_ackent)) { 428 /* We ran out of space */ 429 tp->t_flags2 |= TF2_MBUF_L_ACKS; 430 goto new_one; 431 } 432 } 433 MPASS(M_TRAILINGSPACE(nm) >= sizeof(struct tcp_ackent)); 434 counter_u64_add(tcp_inp_lro_compressed, 1); 435 le->compressed++; 436 /* We can add in to the one on the tail */ 437 ack_ent = mtod(nm, struct tcp_ackent *); 438 idx = (nm->m_len / sizeof(struct tcp_ackent)); 439 build_ack_entry(&ack_ent[idx], th, m, ts_ptr, iptos); 440 441 /* Bump the size of both pkt-hdr and len */ 442 nm->m_len += sizeof(struct tcp_ackent); 443 nm->m_pkthdr.len += sizeof(struct tcp_ackent); 444 compressed: 445 /* Advance to next mbuf before freeing. */ 446 *pp = m->m_nextpkt; 447 m->m_nextpkt = NULL; 448 m_freem(m); 449 return (true); 450 done: 451 counter_u64_add(tcp_uncomp_total, 1); 452 le->uncompressed++; 453 return (false); 454 } 455 456 static void 457 tcp_queue_pkts(struct tcpcb *tp, struct lro_entry *le) 458 { 459 460 INP_WLOCK_ASSERT(tptoinpcb(tp)); 461 462 STAILQ_HEAD(, mbuf) q = { le->m_head, 463 &STAILQ_NEXT(le->m_last_mbuf, m_stailqpkt) }; 464 STAILQ_CONCAT(&tp->t_inqueue, &q); 465 le->m_head = NULL; 466 le->m_last_mbuf = NULL; 467 } 468 469 static struct tcpcb * 470 tcp_lro_lookup(struct ifnet *ifp, struct lro_parser *pa) 471 { 472 struct inpcb *inp; 473 474 CURVNET_ASSERT_SET(); 475 switch (pa->data.lro_type) { 476 #ifdef INET6 477 case LRO_TYPE_IPV6_TCP: 478 inp = in6_pcblookup(&V_tcbinfo, 479 &pa->data.s_addr.v6, 480 pa->data.s_port, 481 &pa->data.d_addr.v6, 482 pa->data.d_port, 483 INPLOOKUP_WLOCKPCB, 484 ifp); 485 break; 486 #endif 487 #ifdef INET 488 case LRO_TYPE_IPV4_TCP: 489 inp = in_pcblookup(&V_tcbinfo, 490 pa->data.s_addr.v4, 491 pa->data.s_port, 492 pa->data.d_addr.v4, 493 pa->data.d_port, 494 INPLOOKUP_WLOCKPCB, 495 ifp); 496 break; 497 #endif 498 default: 499 return (NULL); 500 } 501 502 return (intotcpcb(inp)); 503 } 504 505 static int 506 _tcp_lro_flush_tcphpts(struct lro_ctrl *lc, struct lro_entry *le) 507 { 508 struct tcpcb *tp; 509 struct mbuf **pp, *cmp, *mv_to; 510 struct ifnet *lagg_ifp; 511 bool bpf_req, lagg_bpf_req, should_wake, can_append_old_cmp; 512 513 /* Check if packet doesn't belongs to our network interface. */ 514 if ((tcplro_stacks_wanting_mbufq == 0) || 515 (le->outer.data.vlan_id != 0) || 516 (le->inner.data.lro_type != LRO_TYPE_NONE)) 517 return (TCP_LRO_CANNOT); 518 519 #ifdef INET6 520 /* 521 * Be proactive about unspecified IPv6 address in source. As 522 * we use all-zero to indicate unbounded/unconnected pcb, 523 * unspecified IPv6 address can be used to confuse us. 524 * 525 * Note that packets with unspecified IPv6 destination is 526 * already dropped in ip6_input. 527 */ 528 if (__predict_false(le->outer.data.lro_type == LRO_TYPE_IPV6_TCP && 529 IN6_IS_ADDR_UNSPECIFIED(&le->outer.data.s_addr.v6))) 530 return (TCP_LRO_CANNOT); 531 532 if (__predict_false(le->inner.data.lro_type == LRO_TYPE_IPV6_TCP && 533 IN6_IS_ADDR_UNSPECIFIED(&le->inner.data.s_addr.v6))) 534 return (TCP_LRO_CANNOT); 535 #endif 536 537 CURVNET_SET(lc->ifp->if_vnet); 538 /* 539 * Ensure that there are no packet filter hooks which would normally 540 * being triggered in ether_demux(), ip_input(), or ip6_input(). 541 */ 542 if ( 543 #ifdef INET 544 PFIL_HOOKED_IN(V_inet_pfil_head) || 545 #endif 546 #ifdef INET6 547 PFIL_HOOKED_IN(V_inet6_pfil_head) || 548 #endif 549 PFIL_HOOKED_IN(V_link_pfil_head)) { 550 CURVNET_RESTORE(); 551 return (TCP_LRO_CANNOT); 552 } 553 554 /* Lookup inp, if any. Returns locked TCP inpcb. */ 555 tp = tcp_lro_lookup(lc->ifp, 556 (le->inner.data.lro_type == LRO_TYPE_NONE) ? &le->outer : &le->inner); 557 CURVNET_RESTORE(); 558 if (tp == NULL) 559 return (TCP_LRO_CANNOT); 560 561 counter_u64_add(tcp_inp_lro_locks_taken, 1); 562 563 /* Check if the inp is dead, Jim. */ 564 if (tp->t_state == TCPS_TIME_WAIT) { 565 INP_WUNLOCK(tptoinpcb(tp)); 566 return (TCP_LRO_CANNOT); 567 } 568 if (tp->t_lro_cpu == HPTS_CPU_NONE && lc->lro_cpu_is_set == 1) 569 tp->t_lro_cpu = lc->lro_last_cpu; 570 /* Check if the transport doesn't support the needed optimizations. */ 571 if ((tp->t_flags2 & (TF2_SUPPORTS_MBUFQ | TF2_MBUF_ACKCMP)) == 0) { 572 INP_WUNLOCK(tptoinpcb(tp)); 573 return (TCP_LRO_CANNOT); 574 } 575 576 if (tp->t_flags2 & TF2_MBUF_QUEUE_READY) 577 should_wake = false; 578 else 579 should_wake = true; 580 /* Check if packets should be tapped to BPF. */ 581 bpf_req = bpf_peers_present(lc->ifp->if_bpf); 582 lagg_bpf_req = false; 583 lagg_ifp = NULL; 584 if (lc->ifp->if_type == IFT_IEEE8023ADLAG || 585 lc->ifp->if_type == IFT_INFINIBANDLAG) { 586 struct lagg_port *lp = lc->ifp->if_lagg; 587 struct lagg_softc *sc = lp->lp_softc; 588 589 lagg_ifp = sc->sc_ifp; 590 if (lagg_ifp != NULL) 591 lagg_bpf_req = bpf_peers_present(lagg_ifp->if_bpf); 592 } 593 594 /* Strip and compress all the incoming packets. */ 595 can_append_old_cmp = true; 596 cmp = NULL; 597 for (pp = &le->m_head; *pp != NULL; ) { 598 mv_to = NULL; 599 if (do_bpf_strip_and_compress(tp, lc, le, pp, &cmp, &mv_to, 600 &should_wake, bpf_req, lagg_bpf_req, lagg_ifp, 601 can_append_old_cmp) == false) { 602 /* Advance to next mbuf. */ 603 pp = &(*pp)->m_nextpkt; 604 /* 605 * Once we have appended we can't look in the pending 606 * inbound packets for a compressed ack to append to. 607 */ 608 can_append_old_cmp = false; 609 /* 610 * Once we append we also need to stop adding to any 611 * compressed ack we were remembering. A new cmp 612 * ack will be required. 613 */ 614 cmp = NULL; 615 tcp_lro_log(tp, lc, le, NULL, 25, 0, 0, 0, 0); 616 } else if (mv_to != NULL) { 617 /* We are asked to move pp up */ 618 pp = &mv_to->m_nextpkt; 619 tcp_lro_log(tp, lc, le, NULL, 24, 0, 0, 0, 0); 620 } else 621 tcp_lro_log(tp, lc, le, NULL, 26, 0, 0, 0, 0); 622 } 623 /* Update "m_last_mbuf", if any. */ 624 if (pp == &le->m_head) 625 le->m_last_mbuf = *pp; 626 else 627 le->m_last_mbuf = __containerof(pp, struct mbuf, m_nextpkt); 628 629 /* Check if any data mbufs left. */ 630 if (le->m_head != NULL) { 631 counter_u64_add(tcp_inp_lro_direct_queue, 1); 632 tcp_lro_log(tp, lc, le, NULL, 22, 1, tp->t_flags2, 0, 1); 633 tcp_queue_pkts(tp, le); 634 } 635 if (should_wake) { 636 /* Wakeup */ 637 counter_u64_add(tcp_inp_lro_wokeup_queue, 1); 638 if ((*tp->t_fb->tfb_do_queued_segments)(tp, 0)) 639 /* TCP cb gone and unlocked. */ 640 return (0); 641 } 642 INP_WUNLOCK(tptoinpcb(tp)); 643 644 return (0); /* Success. */ 645 } 646 647 void 648 tcp_lro_hpts_init(void) 649 { 650 tcp_lro_flush_tcphpts = _tcp_lro_flush_tcphpts; 651 } 652 653 void 654 tcp_lro_hpts_uninit(void) 655 { 656 atomic_store_ptr(&tcp_lro_flush_tcphpts, NULL); 657 } 658