1 /*- 2 * Copyright (c) 2016-2018 Netflix, Inc. 3 * Copyright (c) 2016-2021 Mellanox Technologies. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 */ 27 #include <sys/cdefs.h> 28 #include "opt_inet.h" 29 #include "opt_inet6.h" 30 31 #include <sys/param.h> 32 #include <sys/bus.h> 33 #include <sys/interrupt.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/socketvar.h> 40 #include <sys/sysctl.h> 41 42 #include <net/if.h> 43 #include <net/if_var.h> 44 #include <net/if_private.h> 45 #include <net/ethernet.h> 46 #include <net/bpf.h> 47 #include <net/vnet.h> 48 #include <net/if_dl.h> 49 #include <net/if_media.h> 50 #include <net/if_types.h> 51 #include <net/infiniband.h> 52 #include <net/if_lagg.h> 53 #include <net/pfil.h> 54 55 #include <netinet/in.h> 56 #include <netinet/in_kdtrace.h> 57 #include <netinet/ip6.h> 58 #include <netinet/ip.h> 59 #include <netinet/ip_var.h> 60 #include <netinet/in_pcb.h> 61 #include <netinet6/in6_pcb.h> 62 #include <netinet6/ip6_var.h> 63 #include <netinet/tcp.h> 64 #include <netinet/tcp_lro.h> 65 #include <netinet/tcp_var.h> 66 #include <netinet/tcp_hpts.h> 67 #include <netinet/tcp_hpts_internal.h> 68 #ifdef TCP_BLACKBOX 69 #include <netinet/tcp_log_buf.h> 70 #endif 71 72 static void 73 build_ack_entry(struct tcp_ackent *ae, struct tcphdr *th, struct mbuf *m, 74 uint32_t *ts_ptr, uint16_t iptos) 75 { 76 /* 77 * Given a TCP ACK, summarize it down into the small TCP ACK 78 * entry. 79 */ 80 ae->timestamp = m->m_pkthdr.rcv_tstmp; 81 ae->flags = 0; 82 if (m->m_flags & M_TSTMP_LRO) 83 ae->flags |= TSTMP_LRO; 84 else if (m->m_flags & M_TSTMP) 85 ae->flags |= TSTMP_HDWR; 86 ae->seq = th->th_seq; 87 ae->ack = th->th_ack; 88 ae->flags |= tcp_get_flags(th); 89 if (ts_ptr != NULL) { 90 ae->ts_value = ntohl(ts_ptr[1]); 91 ae->ts_echo = ntohl(ts_ptr[2]); 92 ae->flags |= HAS_TSTMP; 93 } 94 ae->win = th->th_win; 95 ae->codepoint = iptos; 96 } 97 98 static inline bool 99 tcp_lro_ack_valid(struct mbuf *m, struct tcphdr *th, uint32_t **ppts, bool *other_opts) 100 { 101 /* 102 * This function returns two bits of valuable information. 103 * a) Is what is present capable of being ack-compressed, 104 * we can ack-compress if there is no options or just 105 * a timestamp option, and of course the th_flags must 106 * be correct as well. 107 * b) Our other options present such as SACK. This is 108 * used to determine if we want to wakeup or not. 109 */ 110 bool ret = true; 111 112 switch (th->th_off << 2) { 113 case (sizeof(*th) + TCPOLEN_TSTAMP_APPA): 114 *ppts = (uint32_t *)(th + 1); 115 /* Check if we have only one timestamp option. */ 116 if (**ppts == TCP_LRO_TS_OPTION) 117 *other_opts = false; 118 else { 119 *other_opts = true; 120 ret = false; 121 } 122 break; 123 case (sizeof(*th)): 124 /* No options. */ 125 *ppts = NULL; 126 *other_opts = false; 127 break; 128 default: 129 *ppts = NULL; 130 *other_opts = true; 131 ret = false; 132 break; 133 } 134 /* For ACKCMP we only accept ACK, PUSH, ECE and CWR. */ 135 if ((tcp_get_flags(th) & ~(TH_ACK | TH_PUSH | TH_ECE | TH_CWR)) != 0) 136 ret = false; 137 /* If it has data on it we cannot compress it */ 138 if (m->m_pkthdr.lro_tcp_d_len) 139 ret = false; 140 141 /* ACK flag must be set. */ 142 if (!(tcp_get_flags(th) & TH_ACK)) 143 ret = false; 144 return (ret); 145 } 146 147 static bool 148 tcp_lro_check_wake_status(struct tcpcb *tp) 149 { 150 151 if (tp->t_fb->tfb_early_wake_check != NULL) 152 return ((tp->t_fb->tfb_early_wake_check)(tp)); 153 return (false); 154 } 155 156 #ifdef TCP_BLACKBOX 157 static void 158 tcp_lro_log(struct tcpcb *tp, const struct lro_ctrl *lc, 159 const struct lro_entry *le, const struct mbuf *m, 160 int frm, int32_t tcp_data_len, uint32_t th_seq, 161 uint32_t th_ack, uint16_t th_win) 162 { 163 if (tcp_bblogging_on(tp)) { 164 union tcp_log_stackspecific log; 165 struct timeval tv, btv; 166 uint32_t cts; 167 168 cts = tcp_get_usecs(&tv); 169 memset(&log, 0, sizeof(union tcp_log_stackspecific)); 170 log.u_bbr.flex8 = frm; 171 log.u_bbr.flex1 = tcp_data_len; 172 if (m) 173 log.u_bbr.flex2 = m->m_pkthdr.len; 174 else 175 log.u_bbr.flex2 = 0; 176 if (le->m_head) { 177 log.u_bbr.flex3 = le->m_head->m_pkthdr.lro_nsegs; 178 log.u_bbr.flex4 = le->m_head->m_pkthdr.lro_tcp_d_len; 179 log.u_bbr.flex5 = le->m_head->m_pkthdr.len; 180 log.u_bbr.delRate = le->m_head->m_flags; 181 log.u_bbr.rttProp = le->m_head->m_pkthdr.rcv_tstmp; 182 } 183 log.u_bbr.inflight = th_seq; 184 log.u_bbr.delivered = th_ack; 185 log.u_bbr.timeStamp = cts; 186 log.u_bbr.epoch = le->next_seq; 187 log.u_bbr.lt_epoch = le->ack_seq; 188 log.u_bbr.pacing_gain = th_win; 189 log.u_bbr.cwnd_gain = le->window; 190 log.u_bbr.lost = curcpu; 191 log.u_bbr.cur_del_rate = (uintptr_t)m; 192 log.u_bbr.bw_inuse = (uintptr_t)le->m_head; 193 bintime2timeval(&lc->lro_last_queue_time, &btv); 194 log.u_bbr.flex6 = tcp_tv_to_usec(&btv); 195 log.u_bbr.flex7 = le->compressed; 196 log.u_bbr.pacing_gain = le->uncompressed; 197 if (in_epoch(net_epoch_preempt)) 198 log.u_bbr.inhpts = 1; 199 else 200 log.u_bbr.inhpts = 0; 201 TCP_LOG_EVENTP(tp, NULL, &tptosocket(tp)->so_rcv, 202 &tptosocket(tp)->so_snd, 203 TCP_LOG_LRO, 0, 0, &log, false, &tv); 204 } 205 } 206 #endif 207 208 static struct mbuf * 209 tcp_lro_get_last_if_ackcmp(struct lro_ctrl *lc, struct lro_entry *le, 210 struct tcpcb *tp, int32_t *new_m, bool can_append_old_cmp) 211 { 212 struct mbuf *m; 213 214 /* Look at the last mbuf if any in queue */ 215 if (can_append_old_cmp) { 216 m = STAILQ_LAST(&tp->t_inqueue, mbuf, m_stailqpkt); 217 if (m != NULL && (m->m_flags & M_ACKCMP) != 0) { 218 if (M_TRAILINGSPACE(m) >= sizeof(struct tcp_ackent)) { 219 #ifdef TCP_BLACKBOX 220 tcp_lro_log(tp, lc, le, NULL, 23, 0, 0, 0, 0); 221 #endif 222 *new_m = 0; 223 counter_u64_add(tcp_extra_mbuf, 1); 224 return (m); 225 } else { 226 /* Mark we ran out of space */ 227 tp->t_flags2 |= TF2_MBUF_L_ACKS; 228 } 229 } 230 } 231 /* Decide mbuf size. */ 232 #ifdef TCP_BLACKBOX 233 tcp_lro_log(tp, lc, le, NULL, 21, 0, 0, 0, 0); 234 #endif 235 if (tp->t_flags2 & TF2_MBUF_L_ACKS) 236 m = m_getcl(M_NOWAIT, MT_DATA, M_ACKCMP | M_PKTHDR); 237 else 238 m = m_gethdr(M_NOWAIT, MT_DATA); 239 240 if (__predict_false(m == NULL)) { 241 counter_u64_add(tcp_would_have_but, 1); 242 return (NULL); 243 } 244 counter_u64_add(tcp_comp_total, 1); 245 m->m_pkthdr.rcvif = lc->ifp; 246 m->m_flags |= M_ACKCMP; 247 *new_m = 1; 248 return (m); 249 } 250 251 /* 252 * Do BPF tap for either ACK_CMP packets or MBUF QUEUE type packets 253 * and strip all, but the IPv4/IPv6 header. 254 */ 255 static bool 256 do_bpf_strip_and_compress(struct tcpcb *tp, struct lro_ctrl *lc, 257 struct lro_entry *le, struct mbuf **pp, struct mbuf **cmp, 258 struct mbuf **mv_to, bool *should_wake, bool bpf_req, bool lagg_bpf_req, 259 struct ifnet *lagg_ifp, bool can_append_old_cmp) 260 { 261 union { 262 void *ptr; 263 struct ip *ip4; 264 struct ip6_hdr *ip6; 265 } l3; 266 struct mbuf *m; 267 struct mbuf *nm; 268 struct tcphdr *th; 269 struct tcp_ackent *ack_ent; 270 uint32_t *ts_ptr; 271 int32_t n_mbuf; 272 bool other_opts, can_compress; 273 uint8_t lro_type; 274 uint16_t iptos; 275 int tcp_hdr_offset; 276 int idx; 277 278 /* Get current mbuf. */ 279 m = *pp; 280 281 /* Let the BPF see the packet */ 282 if (__predict_false(bpf_req)) 283 ETHER_BPF_MTAP(lc->ifp, m); 284 285 if (__predict_false(lagg_bpf_req)) 286 ETHER_BPF_MTAP(lagg_ifp, m); 287 288 tcp_hdr_offset = m->m_pkthdr.lro_tcp_h_off; 289 lro_type = le->inner.data.lro_type; 290 switch (lro_type) { 291 case LRO_TYPE_NONE: 292 lro_type = le->outer.data.lro_type; 293 switch (lro_type) { 294 case LRO_TYPE_IPV4_TCP: 295 tcp_hdr_offset -= sizeof(*le->outer.ip4); 296 m->m_pkthdr.lro_etype = ETHERTYPE_IP; 297 IP_PROBE(receive, NULL, NULL, le->outer.ip4, lc->ifp, 298 le->outer.ip4, NULL); 299 break; 300 case LRO_TYPE_IPV6_TCP: 301 tcp_hdr_offset -= sizeof(*le->outer.ip6); 302 m->m_pkthdr.lro_etype = ETHERTYPE_IPV6; 303 IP_PROBE(receive, NULL, NULL, le->outer.ip6, lc->ifp, 304 NULL, le->outer.ip6); 305 break; 306 default: 307 goto compressed; 308 } 309 break; 310 case LRO_TYPE_IPV4_TCP: 311 switch (le->outer.data.lro_type) { 312 case LRO_TYPE_IPV4_UDP: 313 IP_PROBE(receive, NULL, NULL, le->outer.ip4, lc->ifp, 314 le->outer.ip4, NULL); 315 UDP_PROBE(receive, NULL, NULL, le->outer.ip4, NULL, 316 le->outer.udp); 317 break; 318 case LRO_TYPE_IPV6_UDP: 319 IP_PROBE(receive, NULL, NULL, le->outer.ip6, lc->ifp, 320 NULL, le->outer.ip6); 321 UDP_PROBE(receive, NULL, NULL, le->outer.ip6, NULL, 322 le->outer.udp); 323 break; 324 default: 325 __assert_unreachable(); 326 break; 327 } 328 tcp_hdr_offset -= sizeof(*le->outer.ip4); 329 m->m_pkthdr.lro_etype = ETHERTYPE_IP; 330 IP_PROBE(receive, NULL, NULL, le->inner.ip4, NULL, 331 le->inner.ip4, NULL); 332 break; 333 case LRO_TYPE_IPV6_TCP: 334 switch (le->outer.data.lro_type) { 335 case LRO_TYPE_IPV4_UDP: 336 IP_PROBE(receive, NULL, NULL, le->outer.ip4, lc->ifp, 337 le->outer.ip4, NULL); 338 UDP_PROBE(receive, NULL, NULL, le->outer.ip4, NULL, 339 le->outer.udp); 340 break; 341 case LRO_TYPE_IPV6_UDP: 342 IP_PROBE(receive, NULL, NULL, le->outer.ip6, lc->ifp, 343 NULL, le->outer.ip6); 344 UDP_PROBE(receive, NULL, NULL, le->outer.ip6, NULL, 345 le->outer.udp); 346 break; 347 default: 348 __assert_unreachable(); 349 break; 350 } 351 tcp_hdr_offset -= sizeof(*le->outer.ip6); 352 m->m_pkthdr.lro_etype = ETHERTYPE_IPV6; 353 IP_PROBE(receive, NULL, NULL, le->inner.ip6, NULL, NULL, 354 le->inner.ip6); 355 break; 356 default: 357 goto compressed; 358 } 359 360 MPASS(tcp_hdr_offset >= 0); 361 362 m_adj(m, tcp_hdr_offset); 363 m->m_flags |= M_LRO_EHDRSTRP; 364 m->m_flags &= ~M_ACKCMP; 365 m->m_pkthdr.lro_tcp_h_off -= tcp_hdr_offset; 366 367 th = tcp_lro_get_th(m); 368 369 th->th_sum = 0; /* TCP checksum is valid. */ 370 tcp_fields_to_host(th); 371 TCP_PROBE5(receive, NULL, tp, m, tp, th); 372 373 /* Check if ACK can be compressed */ 374 can_compress = tcp_lro_ack_valid(m, th, &ts_ptr, &other_opts); 375 376 /* Now lets look at the should wake states */ 377 if ((other_opts == true) && 378 ((tp->t_flags2 & TF2_DONT_SACK_QUEUE) == 0)) { 379 /* 380 * If there are other options (SACK?) and the 381 * tcp endpoint has not expressly told us it does 382 * not care about SACKS, then we should wake up. 383 */ 384 *should_wake = true; 385 } else if (*should_wake == false) { 386 /* Wakeup override check if we are false here */ 387 *should_wake = tcp_lro_check_wake_status(tp); 388 } 389 /* Is the ack compressable? */ 390 if (can_compress == false) 391 goto done; 392 /* Does the TCP endpoint support ACK compression? */ 393 if ((tp->t_flags2 & TF2_MBUF_ACKCMP) == 0) 394 goto done; 395 396 /* Lets get the TOS/traffic class field */ 397 l3.ptr = mtod(m, void *); 398 switch (lro_type) { 399 case LRO_TYPE_IPV4_TCP: 400 iptos = l3.ip4->ip_tos; 401 break; 402 case LRO_TYPE_IPV6_TCP: 403 iptos = IPV6_TRAFFIC_CLASS(l3.ip6); 404 break; 405 default: 406 iptos = 0; /* Keep compiler happy. */ 407 break; 408 } 409 /* Now lets get space if we don't have some already */ 410 if (*cmp == NULL) { 411 new_one: 412 nm = tcp_lro_get_last_if_ackcmp(lc, le, tp, &n_mbuf, 413 can_append_old_cmp); 414 if (__predict_false(nm == NULL)) 415 goto done; 416 *cmp = nm; 417 if (n_mbuf) { 418 /* 419 * Link in the new cmp ack to our in-order place, 420 * first set our cmp ack's next to where we are. 421 */ 422 nm->m_nextpkt = m; 423 (*pp) = nm; 424 /* 425 * Set it up so mv_to is advanced to our 426 * compressed ack. This way the caller can 427 * advance pp to the right place. 428 */ 429 *mv_to = nm; 430 /* 431 * Advance it here locally as well. 432 */ 433 pp = &nm->m_nextpkt; 434 } 435 } else { 436 /* We have one already we are working on */ 437 nm = *cmp; 438 if (M_TRAILINGSPACE(nm) < sizeof(struct tcp_ackent)) { 439 /* We ran out of space */ 440 tp->t_flags2 |= TF2_MBUF_L_ACKS; 441 goto new_one; 442 } 443 } 444 MPASS(M_TRAILINGSPACE(nm) >= sizeof(struct tcp_ackent)); 445 counter_u64_add(tcp_inp_lro_compressed, 1); 446 le->compressed++; 447 /* We can add in to the one on the tail */ 448 ack_ent = mtod(nm, struct tcp_ackent *); 449 idx = (nm->m_len / sizeof(struct tcp_ackent)); 450 build_ack_entry(&ack_ent[idx], th, m, ts_ptr, iptos); 451 452 /* Bump the size of both pkt-hdr and len */ 453 nm->m_len += sizeof(struct tcp_ackent); 454 nm->m_pkthdr.len += sizeof(struct tcp_ackent); 455 compressed: 456 /* Advance to next mbuf before freeing. */ 457 *pp = m->m_nextpkt; 458 m->m_nextpkt = NULL; 459 m_freem(m); 460 return (true); 461 done: 462 counter_u64_add(tcp_uncomp_total, 1); 463 le->uncompressed++; 464 return (false); 465 } 466 467 static void 468 tcp_queue_pkts(struct tcpcb *tp, struct lro_entry *le) 469 { 470 471 INP_WLOCK_ASSERT(tptoinpcb(tp)); 472 473 STAILQ_HEAD(, mbuf) q = { le->m_head, 474 &STAILQ_NEXT(le->m_last_mbuf, m_stailqpkt) }; 475 STAILQ_CONCAT(&tp->t_inqueue, &q); 476 le->m_head = NULL; 477 le->m_last_mbuf = NULL; 478 } 479 480 static struct tcpcb * 481 tcp_lro_lookup(struct ifnet *ifp, struct lro_parser *pa) 482 { 483 struct inpcb *inp; 484 485 CURVNET_ASSERT_SET(); 486 switch (pa->data.lro_type) { 487 #ifdef INET6 488 case LRO_TYPE_IPV6_TCP: 489 inp = in6_pcblookup(&V_tcbinfo, 490 &pa->data.s_addr.v6, 491 pa->data.s_port, 492 &pa->data.d_addr.v6, 493 pa->data.d_port, 494 INPLOOKUP_WLOCKPCB, 495 ifp); 496 break; 497 #endif 498 #ifdef INET 499 case LRO_TYPE_IPV4_TCP: 500 inp = in_pcblookup(&V_tcbinfo, 501 pa->data.s_addr.v4, 502 pa->data.s_port, 503 pa->data.d_addr.v4, 504 pa->data.d_port, 505 INPLOOKUP_WLOCKPCB, 506 ifp); 507 break; 508 #endif 509 default: 510 return (NULL); 511 } 512 513 return (intotcpcb(inp)); 514 } 515 516 static int 517 _tcp_lro_flush_tcphpts(struct lro_ctrl *lc, struct lro_entry *le) 518 { 519 struct tcpcb *tp; 520 struct mbuf **pp, *cmp, *mv_to; 521 struct ifnet *lagg_ifp; 522 bool bpf_req, lagg_bpf_req, should_wake, can_append_old_cmp; 523 524 /* Check if packet doesn't belongs to our network interface. */ 525 if ((tcplro_stacks_wanting_mbufq == 0) || 526 (le->outer.data.vlan_id != 0) || 527 (le->inner.data.lro_type != LRO_TYPE_NONE)) 528 return (TCP_LRO_CANNOT); 529 530 #ifdef INET6 531 /* 532 * Be proactive about unspecified IPv6 address in source. As 533 * we use all-zero to indicate unbounded/unconnected pcb, 534 * unspecified IPv6 address can be used to confuse us. 535 * 536 * Note that packets with unspecified IPv6 destination is 537 * already dropped in ip6_input. 538 */ 539 if (__predict_false(le->outer.data.lro_type == LRO_TYPE_IPV6_TCP && 540 IN6_IS_ADDR_UNSPECIFIED(&le->outer.data.s_addr.v6))) 541 return (TCP_LRO_CANNOT); 542 543 if (__predict_false(le->inner.data.lro_type == LRO_TYPE_IPV6_TCP && 544 IN6_IS_ADDR_UNSPECIFIED(&le->inner.data.s_addr.v6))) 545 return (TCP_LRO_CANNOT); 546 #endif 547 548 CURVNET_SET(lc->ifp->if_vnet); 549 /* 550 * Ensure that there are no packet filter hooks which would normally 551 * being triggered in ether_demux(), ip_input(), or ip6_input(). 552 */ 553 if ( 554 #ifdef INET 555 PFIL_HOOKED_IN(V_inet_pfil_head) || 556 #endif 557 #ifdef INET6 558 PFIL_HOOKED_IN(V_inet6_pfil_head) || 559 #endif 560 PFIL_HOOKED_IN(V_link_pfil_head)) { 561 CURVNET_RESTORE(); 562 return (TCP_LRO_CANNOT); 563 } 564 565 /* Lookup inp, if any. Returns locked TCP inpcb. */ 566 tp = tcp_lro_lookup(lc->ifp, 567 (le->inner.data.lro_type == LRO_TYPE_NONE) ? &le->outer : &le->inner); 568 CURVNET_RESTORE(); 569 if (tp == NULL) 570 return (TCP_LRO_CANNOT); 571 572 counter_u64_add(tcp_inp_lro_locks_taken, 1); 573 574 /* Check if the inp is dead, Jim. */ 575 if (tp->t_state == TCPS_TIME_WAIT) { 576 INP_WUNLOCK(tptoinpcb(tp)); 577 return (TCP_LRO_CANNOT); 578 } 579 if (tp->t_lro_cpu == HPTS_CPU_NONE && lc->lro_cpu_is_set == 1) 580 tp->t_lro_cpu = lc->lro_last_cpu; 581 /* Check if the transport doesn't support the needed optimizations. */ 582 if ((tp->t_flags2 & (TF2_SUPPORTS_MBUFQ | TF2_MBUF_ACKCMP)) == 0) { 583 INP_WUNLOCK(tptoinpcb(tp)); 584 return (TCP_LRO_CANNOT); 585 } 586 587 if (tp->t_flags2 & TF2_MBUF_QUEUE_READY) 588 should_wake = false; 589 else 590 should_wake = true; 591 /* Check if packets should be tapped to BPF. */ 592 bpf_req = bpf_peers_present(lc->ifp->if_bpf); 593 lagg_bpf_req = false; 594 lagg_ifp = NULL; 595 if (lc->ifp->if_type == IFT_IEEE8023ADLAG || 596 lc->ifp->if_type == IFT_INFINIBANDLAG) { 597 struct lagg_port *lp = lc->ifp->if_lagg; 598 struct lagg_softc *sc = lp->lp_softc; 599 600 lagg_ifp = sc->sc_ifp; 601 if (lagg_ifp != NULL) 602 lagg_bpf_req = bpf_peers_present(lagg_ifp->if_bpf); 603 } 604 605 /* Strip and compress all the incoming packets. */ 606 can_append_old_cmp = true; 607 cmp = NULL; 608 for (pp = &le->m_head; *pp != NULL; ) { 609 mv_to = NULL; 610 if (do_bpf_strip_and_compress(tp, lc, le, pp, &cmp, &mv_to, 611 &should_wake, bpf_req, lagg_bpf_req, lagg_ifp, 612 can_append_old_cmp) == false) { 613 /* Advance to next mbuf. */ 614 pp = &(*pp)->m_nextpkt; 615 /* 616 * Once we have appended we can't look in the pending 617 * inbound packets for a compressed ack to append to. 618 */ 619 can_append_old_cmp = false; 620 /* 621 * Once we append we also need to stop adding to any 622 * compressed ack we were remembering. A new cmp 623 * ack will be required. 624 */ 625 cmp = NULL; 626 #ifdef TCP_BLACKBOX 627 tcp_lro_log(tp, lc, le, NULL, 25, 0, 0, 0, 0); 628 #endif 629 } else if (mv_to != NULL) { 630 /* We are asked to move pp up */ 631 pp = &mv_to->m_nextpkt; 632 #ifdef TCP_BLACKBOX 633 tcp_lro_log(tp, lc, le, NULL, 24, 0, 0, 0, 0); 634 } else 635 tcp_lro_log(tp, lc, le, NULL, 26, 0, 0, 0, 0); 636 #else 637 } 638 #endif 639 } 640 /* Update "m_last_mbuf", if any. */ 641 if (pp == &le->m_head) 642 le->m_last_mbuf = *pp; 643 else 644 le->m_last_mbuf = __containerof(pp, struct mbuf, m_nextpkt); 645 646 /* Check if any data mbufs left. */ 647 if (le->m_head != NULL) { 648 counter_u64_add(tcp_inp_lro_direct_queue, 1); 649 #ifdef TCP_BLACKBOX 650 tcp_lro_log(tp, lc, le, NULL, 22, 1, tp->t_flags2, 0, 1); 651 #endif 652 tcp_queue_pkts(tp, le); 653 } 654 if (should_wake) { 655 /* Wakeup */ 656 counter_u64_add(tcp_inp_lro_wokeup_queue, 1); 657 if ((*tp->t_fb->tfb_do_queued_segments)(tp, 0)) 658 /* TCP cb gone and unlocked. */ 659 return (0); 660 } 661 INP_WUNLOCK(tptoinpcb(tp)); 662 663 return (0); /* Success. */ 664 } 665 666 void 667 tcp_lro_hpts_init(void) 668 { 669 tcp_lro_flush_tcphpts = _tcp_lro_flush_tcphpts; 670 } 671 672 void 673 tcp_lro_hpts_uninit(void) 674 { 675 atomic_store_ptr(&tcp_lro_flush_tcphpts, NULL); 676 } 677