1 /*- 2 * Copyright (c) 2016-2018 Netflix, Inc. 3 * Copyright (c) 2016-2021 Mellanox Technologies. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 */ 27 #include <sys/cdefs.h> 28 #include "opt_inet.h" 29 #include "opt_inet6.h" 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/malloc.h> 35 #include <sys/mbuf.h> 36 #include <sys/socket.h> 37 #include <sys/socketvar.h> 38 #include <sys/sysctl.h> 39 40 #include <net/if.h> 41 #include <net/if_var.h> 42 #include <net/ethernet.h> 43 #include <net/bpf.h> 44 #include <net/vnet.h> 45 #include <net/if_dl.h> 46 #include <net/if_media.h> 47 #include <net/if_types.h> 48 #include <net/infiniband.h> 49 #include <net/if_lagg.h> 50 #include <net/pfil.h> 51 52 #include <netinet/in.h> 53 #include <netinet/in_kdtrace.h> 54 #include <netinet/ip6.h> 55 #include <netinet/ip.h> 56 #include <netinet/ip_var.h> 57 #include <netinet/in_pcb.h> 58 #include <netinet6/in6_pcb.h> 59 #include <netinet6/ip6_var.h> 60 #include <netinet/tcp.h> 61 #include <netinet/tcp_lro.h> 62 #include <netinet/tcp_var.h> 63 #include <netinet/tcp_hpts.h> 64 #include <netinet/tcp_log_buf.h> 65 66 static void 67 build_ack_entry(struct tcp_ackent *ae, struct tcphdr *th, struct mbuf *m, 68 uint32_t *ts_ptr, uint16_t iptos) 69 { 70 /* 71 * Given a TCP ACK, summarize it down into the small TCP ACK 72 * entry. 73 */ 74 ae->timestamp = m->m_pkthdr.rcv_tstmp; 75 ae->flags = 0; 76 if (m->m_flags & M_TSTMP_LRO) 77 ae->flags |= TSTMP_LRO; 78 else if (m->m_flags & M_TSTMP) 79 ae->flags |= TSTMP_HDWR; 80 ae->seq = th->th_seq; 81 ae->ack = th->th_ack; 82 ae->flags |= tcp_get_flags(th); 83 if (ts_ptr != NULL) { 84 ae->ts_value = ntohl(ts_ptr[1]); 85 ae->ts_echo = ntohl(ts_ptr[2]); 86 ae->flags |= HAS_TSTMP; 87 } 88 ae->win = th->th_win; 89 ae->codepoint = iptos; 90 } 91 92 static inline bool 93 tcp_lro_ack_valid(struct mbuf *m, struct tcphdr *th, uint32_t **ppts, bool *other_opts) 94 { 95 /* 96 * This function returns two bits of valuable information. 97 * a) Is what is present capable of being ack-compressed, 98 * we can ack-compress if there is no options or just 99 * a timestamp option, and of course the th_flags must 100 * be correct as well. 101 * b) Our other options present such as SACK. This is 102 * used to determine if we want to wakeup or not. 103 */ 104 bool ret = true; 105 106 switch (th->th_off << 2) { 107 case (sizeof(*th) + TCPOLEN_TSTAMP_APPA): 108 *ppts = (uint32_t *)(th + 1); 109 /* Check if we have only one timestamp option. */ 110 if (**ppts == TCP_LRO_TS_OPTION) 111 *other_opts = false; 112 else { 113 *other_opts = true; 114 ret = false; 115 } 116 break; 117 case (sizeof(*th)): 118 /* No options. */ 119 *ppts = NULL; 120 *other_opts = false; 121 break; 122 default: 123 *ppts = NULL; 124 *other_opts = true; 125 ret = false; 126 break; 127 } 128 /* For ACKCMP we only accept ACK, PUSH, ECE and CWR. */ 129 if ((tcp_get_flags(th) & ~(TH_ACK | TH_PUSH | TH_ECE | TH_CWR)) != 0) 130 ret = false; 131 /* If it has data on it we cannot compress it */ 132 if (m->m_pkthdr.lro_tcp_d_len) 133 ret = false; 134 135 /* ACK flag must be set. */ 136 if (!(tcp_get_flags(th) & TH_ACK)) 137 ret = false; 138 return (ret); 139 } 140 141 static bool 142 tcp_lro_check_wake_status(struct tcpcb *tp) 143 { 144 145 if (tp->t_fb->tfb_early_wake_check != NULL) 146 return ((tp->t_fb->tfb_early_wake_check)(tp)); 147 return (false); 148 } 149 150 static void 151 tcp_lro_log(struct tcpcb *tp, const struct lro_ctrl *lc, 152 const struct lro_entry *le, const struct mbuf *m, 153 int frm, int32_t tcp_data_len, uint32_t th_seq, 154 uint32_t th_ack, uint16_t th_win) 155 { 156 if (tcp_bblogging_on(tp)) { 157 union tcp_log_stackspecific log; 158 struct timeval tv, btv; 159 uint32_t cts; 160 161 cts = tcp_get_usecs(&tv); 162 memset(&log, 0, sizeof(union tcp_log_stackspecific)); 163 log.u_bbr.flex8 = frm; 164 log.u_bbr.flex1 = tcp_data_len; 165 if (m) 166 log.u_bbr.flex2 = m->m_pkthdr.len; 167 else 168 log.u_bbr.flex2 = 0; 169 if (le->m_head) { 170 log.u_bbr.flex3 = le->m_head->m_pkthdr.lro_nsegs; 171 log.u_bbr.flex4 = le->m_head->m_pkthdr.lro_tcp_d_len; 172 log.u_bbr.flex5 = le->m_head->m_pkthdr.len; 173 log.u_bbr.delRate = le->m_head->m_flags; 174 log.u_bbr.rttProp = le->m_head->m_pkthdr.rcv_tstmp; 175 } 176 log.u_bbr.inflight = th_seq; 177 log.u_bbr.delivered = th_ack; 178 log.u_bbr.timeStamp = cts; 179 log.u_bbr.epoch = le->next_seq; 180 log.u_bbr.lt_epoch = le->ack_seq; 181 log.u_bbr.pacing_gain = th_win; 182 log.u_bbr.cwnd_gain = le->window; 183 log.u_bbr.lost = curcpu; 184 log.u_bbr.cur_del_rate = (uintptr_t)m; 185 log.u_bbr.bw_inuse = (uintptr_t)le->m_head; 186 bintime2timeval(&lc->lro_last_queue_time, &btv); 187 log.u_bbr.flex6 = tcp_tv_to_usectick(&btv); 188 log.u_bbr.flex7 = le->compressed; 189 log.u_bbr.pacing_gain = le->uncompressed; 190 if (in_epoch(net_epoch_preempt)) 191 log.u_bbr.inhpts = 1; 192 else 193 log.u_bbr.inhpts = 0; 194 TCP_LOG_EVENTP(tp, NULL, &tptosocket(tp)->so_rcv, 195 &tptosocket(tp)->so_snd, 196 TCP_LOG_LRO, 0, 0, &log, false, &tv); 197 } 198 } 199 200 static struct mbuf * 201 tcp_lro_get_last_if_ackcmp(struct lro_ctrl *lc, struct lro_entry *le, 202 struct tcpcb *tp, int32_t *new_m, bool can_append_old_cmp) 203 { 204 struct mbuf *m; 205 206 /* Look at the last mbuf if any in queue */ 207 if (can_append_old_cmp) { 208 m = STAILQ_LAST(&tp->t_inqueue, mbuf, m_stailqpkt); 209 if (m != NULL && (m->m_flags & M_ACKCMP) != 0) { 210 if (M_TRAILINGSPACE(m) >= sizeof(struct tcp_ackent)) { 211 tcp_lro_log(tp, lc, le, NULL, 23, 0, 0, 0, 0); 212 *new_m = 0; 213 counter_u64_add(tcp_extra_mbuf, 1); 214 return (m); 215 } else { 216 /* Mark we ran out of space */ 217 tp->t_flags2 |= TF2_MBUF_L_ACKS; 218 } 219 } 220 } 221 /* Decide mbuf size. */ 222 tcp_lro_log(tp, lc, le, NULL, 21, 0, 0, 0, 0); 223 if (tp->t_flags2 & TF2_MBUF_L_ACKS) 224 m = m_getcl(M_NOWAIT, MT_DATA, M_ACKCMP | M_PKTHDR); 225 else 226 m = m_gethdr(M_NOWAIT, MT_DATA); 227 228 if (__predict_false(m == NULL)) { 229 counter_u64_add(tcp_would_have_but, 1); 230 return (NULL); 231 } 232 counter_u64_add(tcp_comp_total, 1); 233 m->m_pkthdr.rcvif = lc->ifp; 234 m->m_flags |= M_ACKCMP; 235 *new_m = 1; 236 return (m); 237 } 238 239 /* 240 * Do BPF tap for either ACK_CMP packets or MBUF QUEUE type packets 241 * and strip all, but the IPv4/IPv6 header. 242 */ 243 static bool 244 do_bpf_strip_and_compress(struct tcpcb *tp, struct lro_ctrl *lc, 245 struct lro_entry *le, struct mbuf **pp, struct mbuf **cmp, 246 struct mbuf **mv_to, bool *should_wake, bool bpf_req, bool lagg_bpf_req, 247 struct ifnet *lagg_ifp, bool can_append_old_cmp) 248 { 249 union { 250 void *ptr; 251 struct ip *ip4; 252 struct ip6_hdr *ip6; 253 } l3; 254 struct mbuf *m; 255 struct mbuf *nm; 256 struct tcphdr *th; 257 struct tcp_ackent *ack_ent; 258 uint32_t *ts_ptr; 259 int32_t n_mbuf; 260 bool other_opts, can_compress; 261 uint8_t lro_type; 262 uint16_t iptos; 263 int tcp_hdr_offset; 264 int idx; 265 266 /* Get current mbuf. */ 267 m = *pp; 268 269 /* Let the BPF see the packet */ 270 if (__predict_false(bpf_req)) 271 ETHER_BPF_MTAP(lc->ifp, m); 272 273 if (__predict_false(lagg_bpf_req)) 274 ETHER_BPF_MTAP(lagg_ifp, m); 275 276 tcp_hdr_offset = m->m_pkthdr.lro_tcp_h_off; 277 lro_type = le->inner.data.lro_type; 278 switch (lro_type) { 279 case LRO_TYPE_NONE: 280 lro_type = le->outer.data.lro_type; 281 switch (lro_type) { 282 case LRO_TYPE_IPV4_TCP: 283 tcp_hdr_offset -= sizeof(*le->outer.ip4); 284 m->m_pkthdr.lro_etype = ETHERTYPE_IP; 285 IP_PROBE(receive, NULL, NULL, le->outer.ip4, lc->ifp, 286 le->outer.ip4, NULL); 287 break; 288 case LRO_TYPE_IPV6_TCP: 289 tcp_hdr_offset -= sizeof(*le->outer.ip6); 290 m->m_pkthdr.lro_etype = ETHERTYPE_IPV6; 291 IP_PROBE(receive, NULL, NULL, le->outer.ip6, lc->ifp, 292 NULL, le->outer.ip6); 293 break; 294 default: 295 goto compressed; 296 } 297 break; 298 case LRO_TYPE_IPV4_TCP: 299 switch (le->outer.data.lro_type) { 300 case LRO_TYPE_IPV4_UDP: 301 IP_PROBE(receive, NULL, NULL, le->outer.ip4, lc->ifp, 302 le->outer.ip4, NULL); 303 UDP_PROBE(receive, NULL, NULL, le->outer.ip4, NULL, 304 le->outer.udp); 305 break; 306 case LRO_TYPE_IPV6_UDP: 307 IP_PROBE(receive, NULL, NULL, le->outer.ip6, lc->ifp, 308 NULL, le->outer.ip6); 309 UDP_PROBE(receive, NULL, NULL, le->outer.ip6, NULL, 310 le->outer.udp); 311 break; 312 default: 313 __assert_unreachable(); 314 break; 315 } 316 tcp_hdr_offset -= sizeof(*le->outer.ip4); 317 m->m_pkthdr.lro_etype = ETHERTYPE_IP; 318 IP_PROBE(receive, NULL, NULL, le->inner.ip4, NULL, 319 le->inner.ip4, NULL); 320 break; 321 case LRO_TYPE_IPV6_TCP: 322 switch (le->outer.data.lro_type) { 323 case LRO_TYPE_IPV4_UDP: 324 IP_PROBE(receive, NULL, NULL, le->outer.ip4, lc->ifp, 325 le->outer.ip4, NULL); 326 UDP_PROBE(receive, NULL, NULL, le->outer.ip4, NULL, 327 le->outer.udp); 328 break; 329 case LRO_TYPE_IPV6_UDP: 330 IP_PROBE(receive, NULL, NULL, le->outer.ip6, lc->ifp, 331 NULL, le->outer.ip6); 332 UDP_PROBE(receive, NULL, NULL, le->outer.ip6, NULL, 333 le->outer.udp); 334 break; 335 default: 336 __assert_unreachable(); 337 break; 338 } 339 tcp_hdr_offset -= sizeof(*le->outer.ip6); 340 m->m_pkthdr.lro_etype = ETHERTYPE_IPV6; 341 IP_PROBE(receive, NULL, NULL, le->inner.ip6, NULL, NULL, 342 le->inner.ip6); 343 break; 344 default: 345 goto compressed; 346 } 347 348 MPASS(tcp_hdr_offset >= 0); 349 350 m_adj(m, tcp_hdr_offset); 351 m->m_flags |= M_LRO_EHDRSTRP; 352 m->m_flags &= ~M_ACKCMP; 353 m->m_pkthdr.lro_tcp_h_off -= tcp_hdr_offset; 354 355 th = tcp_lro_get_th(m); 356 357 th->th_sum = 0; /* TCP checksum is valid. */ 358 tcp_fields_to_host(th); 359 TCP_PROBE5(receive, NULL, tp, m, tp, th); 360 361 /* Check if ACK can be compressed */ 362 can_compress = tcp_lro_ack_valid(m, th, &ts_ptr, &other_opts); 363 364 /* Now lets look at the should wake states */ 365 if ((other_opts == true) && 366 ((tp->t_flags2 & TF2_DONT_SACK_QUEUE) == 0)) { 367 /* 368 * If there are other options (SACK?) and the 369 * tcp endpoint has not expressly told us it does 370 * not care about SACKS, then we should wake up. 371 */ 372 *should_wake = true; 373 } else if (*should_wake == false) { 374 /* Wakeup override check if we are false here */ 375 *should_wake = tcp_lro_check_wake_status(tp); 376 } 377 /* Is the ack compressable? */ 378 if (can_compress == false) 379 goto done; 380 /* Does the TCP endpoint support ACK compression? */ 381 if ((tp->t_flags2 & TF2_MBUF_ACKCMP) == 0) 382 goto done; 383 384 /* Lets get the TOS/traffic class field */ 385 l3.ptr = mtod(m, void *); 386 switch (lro_type) { 387 case LRO_TYPE_IPV4_TCP: 388 iptos = l3.ip4->ip_tos; 389 break; 390 case LRO_TYPE_IPV6_TCP: 391 iptos = IPV6_TRAFFIC_CLASS(l3.ip6); 392 break; 393 default: 394 iptos = 0; /* Keep compiler happy. */ 395 break; 396 } 397 /* Now lets get space if we don't have some already */ 398 if (*cmp == NULL) { 399 new_one: 400 nm = tcp_lro_get_last_if_ackcmp(lc, le, tp, &n_mbuf, 401 can_append_old_cmp); 402 if (__predict_false(nm == NULL)) 403 goto done; 404 *cmp = nm; 405 if (n_mbuf) { 406 /* 407 * Link in the new cmp ack to our in-order place, 408 * first set our cmp ack's next to where we are. 409 */ 410 nm->m_nextpkt = m; 411 (*pp) = nm; 412 /* 413 * Set it up so mv_to is advanced to our 414 * compressed ack. This way the caller can 415 * advance pp to the right place. 416 */ 417 *mv_to = nm; 418 /* 419 * Advance it here locally as well. 420 */ 421 pp = &nm->m_nextpkt; 422 } 423 } else { 424 /* We have one already we are working on */ 425 nm = *cmp; 426 if (M_TRAILINGSPACE(nm) < sizeof(struct tcp_ackent)) { 427 /* We ran out of space */ 428 tp->t_flags2 |= TF2_MBUF_L_ACKS; 429 goto new_one; 430 } 431 } 432 MPASS(M_TRAILINGSPACE(nm) >= sizeof(struct tcp_ackent)); 433 counter_u64_add(tcp_inp_lro_compressed, 1); 434 le->compressed++; 435 /* We can add in to the one on the tail */ 436 ack_ent = mtod(nm, struct tcp_ackent *); 437 idx = (nm->m_len / sizeof(struct tcp_ackent)); 438 build_ack_entry(&ack_ent[idx], th, m, ts_ptr, iptos); 439 440 /* Bump the size of both pkt-hdr and len */ 441 nm->m_len += sizeof(struct tcp_ackent); 442 nm->m_pkthdr.len += sizeof(struct tcp_ackent); 443 compressed: 444 /* Advance to next mbuf before freeing. */ 445 *pp = m->m_nextpkt; 446 m->m_nextpkt = NULL; 447 m_freem(m); 448 return (true); 449 done: 450 counter_u64_add(tcp_uncomp_total, 1); 451 le->uncompressed++; 452 return (false); 453 } 454 455 static void 456 tcp_queue_pkts(struct tcpcb *tp, struct lro_entry *le) 457 { 458 459 INP_WLOCK_ASSERT(tptoinpcb(tp)); 460 461 STAILQ_HEAD(, mbuf) q = { le->m_head, 462 &STAILQ_NEXT(le->m_last_mbuf, m_stailqpkt) }; 463 STAILQ_CONCAT(&tp->t_inqueue, &q); 464 le->m_head = NULL; 465 le->m_last_mbuf = NULL; 466 } 467 468 static struct tcpcb * 469 tcp_lro_lookup(struct ifnet *ifp, struct lro_parser *pa) 470 { 471 struct inpcb *inp; 472 473 CURVNET_ASSERT_SET(); 474 switch (pa->data.lro_type) { 475 #ifdef INET6 476 case LRO_TYPE_IPV6_TCP: 477 inp = in6_pcblookup(&V_tcbinfo, 478 &pa->data.s_addr.v6, 479 pa->data.s_port, 480 &pa->data.d_addr.v6, 481 pa->data.d_port, 482 INPLOOKUP_WLOCKPCB, 483 ifp); 484 break; 485 #endif 486 #ifdef INET 487 case LRO_TYPE_IPV4_TCP: 488 inp = in_pcblookup(&V_tcbinfo, 489 pa->data.s_addr.v4, 490 pa->data.s_port, 491 pa->data.d_addr.v4, 492 pa->data.d_port, 493 INPLOOKUP_WLOCKPCB, 494 ifp); 495 break; 496 #endif 497 default: 498 return (NULL); 499 } 500 501 return (intotcpcb(inp)); 502 } 503 504 static int 505 _tcp_lro_flush_tcphpts(struct lro_ctrl *lc, struct lro_entry *le) 506 { 507 struct tcpcb *tp; 508 struct mbuf **pp, *cmp, *mv_to; 509 struct ifnet *lagg_ifp; 510 bool bpf_req, lagg_bpf_req, should_wake, can_append_old_cmp; 511 512 /* Check if packet doesn't belongs to our network interface. */ 513 if ((tcplro_stacks_wanting_mbufq == 0) || 514 (le->outer.data.vlan_id != 0) || 515 (le->inner.data.lro_type != LRO_TYPE_NONE)) 516 return (TCP_LRO_CANNOT); 517 518 #ifdef INET6 519 /* 520 * Be proactive about unspecified IPv6 address in source. As 521 * we use all-zero to indicate unbounded/unconnected pcb, 522 * unspecified IPv6 address can be used to confuse us. 523 * 524 * Note that packets with unspecified IPv6 destination is 525 * already dropped in ip6_input. 526 */ 527 if (__predict_false(le->outer.data.lro_type == LRO_TYPE_IPV6_TCP && 528 IN6_IS_ADDR_UNSPECIFIED(&le->outer.data.s_addr.v6))) 529 return (TCP_LRO_CANNOT); 530 531 if (__predict_false(le->inner.data.lro_type == LRO_TYPE_IPV6_TCP && 532 IN6_IS_ADDR_UNSPECIFIED(&le->inner.data.s_addr.v6))) 533 return (TCP_LRO_CANNOT); 534 #endif 535 536 CURVNET_SET(lc->ifp->if_vnet); 537 /* 538 * Ensure that there are no packet filter hooks which would normally 539 * being triggered in ether_demux(), ip_input(), or ip6_input(). 540 */ 541 if ( 542 #ifdef INET 543 PFIL_HOOKED_IN(V_inet_pfil_head) || 544 #endif 545 #ifdef INET6 546 PFIL_HOOKED_IN(V_inet6_pfil_head) || 547 #endif 548 PFIL_HOOKED_IN(V_link_pfil_head)) { 549 CURVNET_RESTORE(); 550 return (TCP_LRO_CANNOT); 551 } 552 553 /* Lookup inp, if any. Returns locked TCP inpcb. */ 554 tp = tcp_lro_lookup(lc->ifp, 555 (le->inner.data.lro_type == LRO_TYPE_NONE) ? &le->outer : &le->inner); 556 CURVNET_RESTORE(); 557 if (tp == NULL) 558 return (TCP_LRO_CANNOT); 559 560 counter_u64_add(tcp_inp_lro_locks_taken, 1); 561 562 /* Check if the inp is dead, Jim. */ 563 if (tp->t_state == TCPS_TIME_WAIT) { 564 INP_WUNLOCK(tptoinpcb(tp)); 565 return (TCP_LRO_CANNOT); 566 } 567 if (tp->t_lro_cpu == HPTS_CPU_NONE && lc->lro_cpu_is_set == 1) 568 tp->t_lro_cpu = lc->lro_last_cpu; 569 /* Check if the transport doesn't support the needed optimizations. */ 570 if ((tp->t_flags2 & (TF2_SUPPORTS_MBUFQ | TF2_MBUF_ACKCMP)) == 0) { 571 INP_WUNLOCK(tptoinpcb(tp)); 572 return (TCP_LRO_CANNOT); 573 } 574 575 if (tp->t_flags2 & TF2_MBUF_QUEUE_READY) 576 should_wake = false; 577 else 578 should_wake = true; 579 /* Check if packets should be tapped to BPF. */ 580 bpf_req = bpf_peers_present(lc->ifp->if_bpf); 581 lagg_bpf_req = false; 582 lagg_ifp = NULL; 583 if (lc->ifp->if_type == IFT_IEEE8023ADLAG || 584 lc->ifp->if_type == IFT_INFINIBANDLAG) { 585 struct lagg_port *lp = lc->ifp->if_lagg; 586 struct lagg_softc *sc = lp->lp_softc; 587 588 lagg_ifp = sc->sc_ifp; 589 if (lagg_ifp != NULL) 590 lagg_bpf_req = bpf_peers_present(lagg_ifp->if_bpf); 591 } 592 593 /* Strip and compress all the incoming packets. */ 594 can_append_old_cmp = true; 595 cmp = NULL; 596 for (pp = &le->m_head; *pp != NULL; ) { 597 mv_to = NULL; 598 if (do_bpf_strip_and_compress(tp, lc, le, pp, &cmp, &mv_to, 599 &should_wake, bpf_req, lagg_bpf_req, lagg_ifp, 600 can_append_old_cmp) == false) { 601 /* Advance to next mbuf. */ 602 pp = &(*pp)->m_nextpkt; 603 /* 604 * Once we have appended we can't look in the pending 605 * inbound packets for a compressed ack to append to. 606 */ 607 can_append_old_cmp = false; 608 /* 609 * Once we append we also need to stop adding to any 610 * compressed ack we were remembering. A new cmp 611 * ack will be required. 612 */ 613 cmp = NULL; 614 tcp_lro_log(tp, lc, le, NULL, 25, 0, 0, 0, 0); 615 } else if (mv_to != NULL) { 616 /* We are asked to move pp up */ 617 pp = &mv_to->m_nextpkt; 618 tcp_lro_log(tp, lc, le, NULL, 24, 0, 0, 0, 0); 619 } else 620 tcp_lro_log(tp, lc, le, NULL, 26, 0, 0, 0, 0); 621 } 622 /* Update "m_last_mbuf", if any. */ 623 if (pp == &le->m_head) 624 le->m_last_mbuf = *pp; 625 else 626 le->m_last_mbuf = __containerof(pp, struct mbuf, m_nextpkt); 627 628 /* Check if any data mbufs left. */ 629 if (le->m_head != NULL) { 630 counter_u64_add(tcp_inp_lro_direct_queue, 1); 631 tcp_lro_log(tp, lc, le, NULL, 22, 1, tp->t_flags2, 0, 1); 632 tcp_queue_pkts(tp, le); 633 } 634 if (should_wake) { 635 /* Wakeup */ 636 counter_u64_add(tcp_inp_lro_wokeup_queue, 1); 637 if ((*tp->t_fb->tfb_do_queued_segments)(tp, 0)) 638 /* TCP cb gone and unlocked. */ 639 return (0); 640 } 641 INP_WUNLOCK(tptoinpcb(tp)); 642 643 return (0); /* Success. */ 644 } 645 646 void 647 tcp_lro_hpts_init(void) 648 { 649 tcp_lro_flush_tcphpts = _tcp_lro_flush_tcphpts; 650 } 651 652 void 653 tcp_lro_hpts_uninit(void) 654 { 655 atomic_store_ptr(&tcp_lro_flush_tcphpts, NULL); 656 } 657