1 /* 2 * Copyright (c) 2003 Andre Oppermann, Internet Business Solutions AG 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The name of the author may not be used to endorse or promote 14 * products derived from this software without specific prior written 15 * permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 32 /* 33 * ip_fastforward gets its speed from processing the forwarded packet to 34 * completion (if_output on the other side) without any queues or netisr's. 35 * The receiving interface DMAs the packet into memory, the upper half of 36 * driver calls ip_fastforward, we do our routing table lookup and directly 37 * send it off to the outgoing interface which DMAs the packet to the 38 * network card. The only part of the packet we touch with the CPU is the 39 * IP header (unless there are complex firewall rules touching other parts 40 * of the packet, but that is up to you). We are essentially limited by bus 41 * bandwidth and how fast the network card/driver can set up receives and 42 * transmits. 43 * 44 * We handle basic errors, ip header errors, checksum errors, 45 * destination unreachable, fragmentation and fragmentation needed and 46 * report them via icmp to the sender. 47 * 48 * Else if something is not pure IPv4 unicast forwarding we fall back to 49 * the normal ip_input processing path. We should only be called from 50 * interfaces connected to the outside world. 51 * 52 * Firewalling is fully supported including divert, ipfw fwd and ipfilter 53 * ipnat and address rewrite. 54 * 55 * IPSEC is not supported if this host is a tunnel broker. IPSEC is 56 * supported for connections to/from local host. 57 * 58 * We try to do the least expensive (in CPU ops) checks and operations 59 * first to catch junk with as little overhead as possible. 60 * 61 * We take full advantage of hardware support for ip checksum and 62 * fragmentation offloading. 63 * 64 * We don't do ICMP redirect in the fast forwarding path. I have had my own 65 * cases where two core routers with Zebra routing suite would send millions 66 * ICMP redirects to connected hosts if the router to dest was not the default 67 * gateway. In one case it was filling the routing table of a host with close 68 * 300'000 cloned redirect entries until it ran out of kernel memory. However 69 * the networking code proved very robust and it didn't crash or went ill 70 * otherwise. 71 */ 72 73 /* 74 * Many thanks to Matt Thomas of NetBSD for basic structure of ip_flow.c which 75 * is being followed here. 76 */ 77 78 #include "opt_ipfw.h" 79 #include "opt_ipdn.h" 80 #include "opt_ipdivert.h" 81 #include "opt_ipfilter.h" 82 #include "opt_ipstealth.h" 83 #include "opt_pfil_hooks.h" 84 85 #include <sys/param.h> 86 #include <sys/systm.h> 87 #include <sys/kernel.h> 88 #include <sys/malloc.h> 89 #include <sys/mbuf.h> 90 #include <sys/protosw.h> 91 #include <sys/socket.h> 92 #include <sys/sysctl.h> 93 94 #include <net/pfil.h> 95 #include <net/if.h> 96 #include <net/if_types.h> 97 #include <net/if_var.h> 98 #include <net/if_dl.h> 99 #include <net/route.h> 100 101 #include <netinet/in.h> 102 #include <netinet/in_systm.h> 103 #include <netinet/in_var.h> 104 #include <netinet/ip.h> 105 #include <netinet/ip_var.h> 106 #include <netinet/ip_icmp.h> 107 108 #include <machine/in_cksum.h> 109 110 #include <netinet/ip_fw.h> 111 #include <netinet/ip_divert.h> 112 #include <netinet/ip_dummynet.h> 113 114 static int ipfastforward_active = 0; 115 SYSCTL_INT(_net_inet_ip, OID_AUTO, fastforwarding, CTLFLAG_RW, 116 &ipfastforward_active, 0, "Enable fast IP forwarding"); 117 118 static struct sockaddr_in * 119 ip_findroute(struct route *ro, in_addr_t dest, struct mbuf *m) 120 { 121 struct sockaddr_in *dst; 122 struct rtentry *rt; 123 124 /* 125 * Find route to destination. 126 */ 127 bzero(ro, sizeof(*ro)); 128 dst = (struct sockaddr_in *)&ro->ro_dst; 129 dst->sin_family = AF_INET; 130 dst->sin_len = sizeof(*dst); 131 dst->sin_addr.s_addr = dest; 132 rtalloc_ign(ro, RTF_CLONING); 133 134 /* 135 * Route there and interface still up? 136 */ 137 rt = ro->ro_rt; 138 if (rt && (rt->rt_flags & RTF_UP) && 139 (rt->rt_ifp->if_flags & IFF_UP) && 140 (rt->rt_ifp->if_flags & IFF_RUNNING)) { 141 if (rt->rt_flags & RTF_GATEWAY) 142 dst = (struct sockaddr_in *)rt->rt_gateway; 143 } else { 144 ipstat.ips_noroute++; 145 ipstat.ips_cantforward++; 146 if (rt) 147 RTFREE(rt); 148 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, NULL); 149 return NULL; 150 } 151 return dst; 152 } 153 154 /* 155 * Try to forward a packet based on the destination address. 156 * This is a fast path optimized for the plain forwarding case. 157 * If the packet is handled (and consumed) here then we return 1; 158 * otherwise 0 is returned and the packet should be delivered 159 * to ip_input for full processing. 160 */ 161 int 162 ip_fastforward(struct mbuf *m) 163 { 164 struct ip *ip; 165 struct mbuf *m0 = NULL; 166 #ifdef IPDIVERT 167 struct ip *tip; 168 struct mbuf *clone = NULL; 169 #endif 170 struct route ro; 171 struct sockaddr_in *dst = NULL; 172 struct in_ifaddr *ia = NULL; 173 struct ifaddr *ifa = NULL; 174 struct ifnet *ifp; 175 struct ip_fw_args args; 176 in_addr_t odest, dest; 177 u_short sum, ip_len; 178 int error = 0; 179 int hlen, ipfw, mtu; 180 181 /* 182 * Are we active and forwarding packets? 183 */ 184 if (!ipfastforward_active || !ipforwarding) 185 return 0; 186 187 M_ASSERTVALID(m); 188 M_ASSERTPKTHDR(m); 189 190 ro.ro_rt = NULL; 191 192 /* 193 * Step 1: check for packet drop conditions (and sanity checks) 194 */ 195 196 /* 197 * Is entire packet big enough? 198 */ 199 if (m->m_pkthdr.len < sizeof(struct ip)) { 200 ipstat.ips_tooshort++; 201 goto drop; 202 } 203 204 /* 205 * Is first mbuf large enough for ip header and is header present? 206 */ 207 if (m->m_len < sizeof (struct ip) && 208 (m = m_pullup(m, sizeof (struct ip))) == 0) { 209 ipstat.ips_toosmall++; 210 goto drop; 211 } 212 213 ip = mtod(m, struct ip *); 214 215 /* 216 * Is it IPv4? 217 */ 218 if (ip->ip_v != IPVERSION) { 219 ipstat.ips_badvers++; 220 goto drop; 221 } 222 223 /* 224 * Is IP header length correct and is it in first mbuf? 225 */ 226 hlen = ip->ip_hl << 2; 227 if (hlen < sizeof(struct ip)) { /* minimum header length */ 228 ipstat.ips_badlen++; 229 goto drop; 230 } 231 if (hlen > m->m_len) { 232 if ((m = m_pullup(m, hlen)) == 0) { 233 ipstat.ips_badhlen++; 234 goto drop; 235 } 236 ip = mtod(m, struct ip *); 237 } 238 239 /* 240 * Checksum correct? 241 */ 242 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) 243 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 244 else { 245 if (hlen == sizeof(struct ip)) 246 sum = in_cksum_hdr(ip); 247 else 248 sum = in_cksum(m, hlen); 249 } 250 if (sum) { 251 ipstat.ips_badsum++; 252 goto drop; 253 } 254 m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); 255 256 ip_len = ntohs(ip->ip_len); 257 258 /* 259 * Is IP length longer than packet we have got? 260 */ 261 if (m->m_pkthdr.len < ip_len) { 262 ipstat.ips_tooshort++; 263 goto drop; 264 } 265 266 /* 267 * Is packet longer than IP header tells us? If yes, truncate packet. 268 */ 269 if (m->m_pkthdr.len > ip_len) { 270 if (m->m_len == m->m_pkthdr.len) { 271 m->m_len = ip_len; 272 m->m_pkthdr.len = ip_len; 273 } else 274 m_adj(m, ip_len - m->m_pkthdr.len); 275 } 276 277 /* 278 * Is packet from or to 127/8? 279 */ 280 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 281 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 282 ipstat.ips_badaddr++; 283 goto drop; 284 } 285 286 /* 287 * Step 2: fallback conditions to normal ip_input path processing 288 */ 289 290 /* 291 * Only IP packets without options 292 */ 293 if (ip->ip_hl != (sizeof(struct ip) >> 2)) { 294 if (ip_doopts == 1) 295 return 0; 296 else if (ip_doopts == 2) { 297 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_FILTER_PROHIB, 298 0, NULL); 299 return 1; 300 } 301 /* else ignore IP options and continue */ 302 } 303 304 /* 305 * Only unicast IP, not from loopback, no L2 or IP broadcast, 306 * no multicast, no INADDR_ANY 307 * 308 * XXX: Probably some of these checks could be direct drop 309 * conditions. However it is not clear whether there are some 310 * hacks or obscure behaviours which make it neccessary to 311 * let ip_input handle it. We play safe here and let ip_input 312 * deal with it until it is proven that we can directly drop it. 313 */ 314 if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) || 315 ntohl(ip->ip_src.s_addr) == (u_long)INADDR_BROADCAST || 316 ntohl(ip->ip_dst.s_addr) == (u_long)INADDR_BROADCAST || 317 IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || 318 IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 319 ip->ip_dst.s_addr == INADDR_ANY ) 320 return 0; 321 322 /* 323 * Is it for a local address on this host? 324 */ 325 LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 326 if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr) 327 return 0; 328 } 329 330 /* 331 * Or is it for a local IP broadcast address on this host? 332 */ 333 if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { 334 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) { 335 if (ifa->ifa_addr->sa_family != AF_INET) 336 continue; 337 ia = ifatoia(ifa); 338 if (ia->ia_netbroadcast.s_addr == ip->ip_dst.s_addr) 339 return 0; 340 if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 341 ip->ip_dst.s_addr) 342 return 0; 343 } 344 } 345 ipstat.ips_total++; 346 347 /* 348 * Step 3: incoming packet firewall processing 349 */ 350 351 /* 352 * Convert to host representation 353 */ 354 ip->ip_len = ntohs(ip->ip_len); 355 ip->ip_off = ntohs(ip->ip_off); 356 357 odest = dest = ip->ip_dst.s_addr; 358 #ifdef PFIL_HOOKS 359 /* 360 * Run through list of ipfilter hooks for input packets 361 */ 362 if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN) || 363 m == NULL) 364 return 1; 365 366 M_ASSERTVALID(m); 367 M_ASSERTPKTHDR(m); 368 369 ip = mtod(m, struct ip *); /* m may have changed by pfil hook */ 370 dest = ip->ip_dst.s_addr; 371 #endif 372 373 /* 374 * Run through ipfw for input packets 375 */ 376 if (fw_enable && IPFW_LOADED) { 377 bzero(&args, sizeof(args)); 378 args.m = m; 379 380 ipfw = ip_fw_chk_ptr(&args); 381 m = args.m; 382 383 M_ASSERTVALID(m); 384 M_ASSERTPKTHDR(m); 385 386 /* 387 * Packet denied, drop it 388 */ 389 if ((ipfw & IP_FW_PORT_DENY_FLAG) || m == NULL) 390 goto drop; 391 /* 392 * Send packet to the appropriate pipe 393 */ 394 if (DUMMYNET_LOADED && (ipfw & IP_FW_PORT_DYNT_FLAG) != 0) { 395 ip_dn_io_ptr(m, ipfw & 0xffff, DN_TO_IP_IN, &args); 396 return 1; 397 } 398 #ifdef IPDIVERT 399 /* 400 * Divert packet 401 */ 402 if (ipfw != 0 && (ipfw & IP_FW_PORT_DYNT_FLAG) == 0) { 403 /* 404 * See if this is a fragment 405 */ 406 if (ip->ip_off & (IP_MF | IP_OFFMASK)) 407 goto droptoours; 408 /* 409 * Tee packet 410 */ 411 if ((ipfw & IP_FW_PORT_TEE_FLAG) != 0) 412 clone = divert_clone(m); 413 else 414 clone = m; 415 if (clone == NULL) 416 goto passin; 417 418 /* 419 * Delayed checksums are not compatible 420 */ 421 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 422 in_delayed_cksum(m); 423 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 424 } 425 /* 426 * Restore packet header fields to original values 427 */ 428 tip = mtod(m, struct ip *); 429 tip->ip_len = htons(tip->ip_len); 430 tip->ip_off = htons(tip->ip_off); 431 /* 432 * Deliver packet to divert input routine 433 */ 434 divert_packet(m, 0); 435 /* 436 * If this was not tee, we are done 437 */ 438 m = clone; 439 if ((ipfw & IP_FW_PORT_TEE_FLAG) == 0) 440 return 1; 441 /* Continue if it was tee */ 442 goto passin; 443 } 444 #endif 445 if (ipfw == 0 && args.next_hop != NULL) { 446 dest = args.next_hop->sin_addr.s_addr; 447 goto passin; 448 } 449 /* 450 * Let through or not? 451 */ 452 if (ipfw != 0) 453 goto drop; 454 } 455 passin: 456 ip = mtod(m, struct ip *); /* if m changed during fw processing */ 457 458 /* 459 * Destination address changed? 460 */ 461 if (odest != dest) { 462 /* 463 * Is it now for a local address on this host? 464 */ 465 LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 466 if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr) 467 goto forwardlocal; 468 } 469 /* 470 * Go on with new destination address 471 */ 472 } 473 474 /* 475 * Step 4: decrement TTL and look up route 476 */ 477 478 /* 479 * Check TTL 480 */ 481 #ifdef IPSTEALTH 482 if (!ipstealth) { 483 #endif 484 if (ip->ip_ttl <= IPTTLDEC) { 485 icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, NULL); 486 return 1; 487 } 488 489 /* 490 * Decrement the TTL and incrementally change the checksum. 491 * Don't bother doing this with hw checksum offloading. 492 */ 493 ip->ip_ttl -= IPTTLDEC; 494 if (ip->ip_sum >= (u_int16_t) ~htons(IPTTLDEC << 8)) 495 ip->ip_sum -= ~htons(IPTTLDEC << 8); 496 else 497 ip->ip_sum += htons(IPTTLDEC << 8); 498 #ifdef IPSTEALTH 499 } 500 #endif 501 502 /* 503 * Find route to destination. 504 */ 505 if ((dst = ip_findroute(&ro, dest, m)) == NULL) 506 return 1; /* icmp unreach already sent */ 507 ifp = ro.ro_rt->rt_ifp; 508 509 /* 510 * Step 5: outgoing firewall packet processing 511 */ 512 513 #ifdef PFIL_HOOKS 514 /* 515 * Run through list of hooks for output packets. 516 */ 517 if (pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT) || m == NULL) { 518 goto consumed; 519 } 520 521 M_ASSERTVALID(m); 522 M_ASSERTPKTHDR(m); 523 524 ip = mtod(m, struct ip *); 525 dest = ip->ip_dst.s_addr; 526 #endif 527 if (fw_enable && IPFW_LOADED && !args.next_hop) { 528 bzero(&args, sizeof(args)); 529 args.m = m; 530 args.oif = ifp; 531 532 ipfw = ip_fw_chk_ptr(&args); 533 m = args.m; 534 535 M_ASSERTVALID(m); 536 M_ASSERTPKTHDR(m); 537 538 if ((ipfw & IP_FW_PORT_DENY_FLAG) || m == NULL) 539 goto drop; 540 541 if (DUMMYNET_LOADED && (ipfw & IP_FW_PORT_DYNT_FLAG) != 0) { 542 /* 543 * XXX note: if the ifp or rt entry are deleted 544 * while a pkt is in dummynet, we are in trouble! 545 */ 546 args.ro = &ro; /* dummynet does not save it */ 547 args.dst = dst; 548 549 ip_dn_io_ptr(m, ipfw & 0xffff, DN_TO_IP_OUT, &args); 550 goto consumed; 551 } 552 #ifdef IPDIVERT 553 if (ipfw != 0 && (ipfw & IP_FW_PORT_DYNT_FLAG) == 0) { 554 /* 555 * See if this is a fragment 556 */ 557 if (ip->ip_off & (IP_MF | IP_OFFMASK)) 558 goto droptoours; 559 /* 560 * Tee packet 561 */ 562 if ((ipfw & IP_FW_PORT_TEE_FLAG) != 0) 563 clone = divert_clone(m); 564 else 565 clone = m; 566 if (clone == NULL) 567 goto passout; 568 569 /* 570 * Delayed checksums are not compatible with divert 571 */ 572 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 573 in_delayed_cksum(m); 574 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 575 } 576 /* 577 * Restore packet header fields to original values 578 */ 579 tip = mtod(m, struct ip *); 580 tip->ip_len = htons(tip->ip_len); 581 tip->ip_off = htons(tip->ip_off); 582 /* 583 * Deliver packet to divert input routine 584 */ 585 divert_packet(m, 0); 586 /* 587 * If this was not tee, we are done 588 */ 589 m = clone; 590 if ((ipfw & IP_FW_PORT_TEE_FLAG) == 0) { 591 goto consumed; 592 } 593 /* Continue if it was tee */ 594 goto passout; 595 } 596 #endif 597 if (ipfw == 0 && args.next_hop != NULL) { 598 dest = args.next_hop->sin_addr.s_addr; 599 goto passout; 600 } 601 /* 602 * Let through or not? 603 */ 604 if (ipfw != 0) 605 goto drop; 606 } 607 passout: 608 ip = mtod(m, struct ip *); 609 610 /* 611 * Destination address changed? 612 */ 613 if (odest != dest) { 614 /* 615 * Is it now for a local address on this host? 616 */ 617 LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 618 if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr) { 619 forwardlocal: 620 if (args.next_hop) { 621 struct m_tag *mtag = m_tag_get( 622 PACKET_TAG_IPFORWARD, 623 sizeof(struct sockaddr_in *), 624 M_NOWAIT); 625 if (mtag == NULL) { 626 goto drop; 627 } 628 *(struct sockaddr_in **)(mtag+1) = 629 args.next_hop; 630 m_tag_prepend(m, mtag); 631 } 632 #ifdef IPDIVERT 633 droptoours: /* Used for DIVERT */ 634 #endif 635 /* for ip_input */ 636 m->m_flags |= M_FASTFWD_OURS; 637 638 /* ip still points to the real packet */ 639 ip->ip_len = htons(ip->ip_len); 640 ip->ip_off = htons(ip->ip_off); 641 642 /* 643 * Return packet for processing by ip_input 644 */ 645 if (ro.ro_rt) 646 RTFREE(ro.ro_rt); 647 return 0; 648 } 649 } 650 /* 651 * Redo route lookup with new destination address 652 */ 653 RTFREE(ro.ro_rt); 654 if ((dst = ip_findroute(&ro, dest, m)) == NULL) 655 return 1; /* icmp unreach already sent */ 656 ifp = ro.ro_rt->rt_ifp; 657 } 658 659 /* 660 * Step 6: send off the packet 661 */ 662 663 /* 664 * Check if route is dampned (when ARP is unable to resolve) 665 */ 666 if ((ro.ro_rt->rt_flags & RTF_REJECT) && 667 ro.ro_rt->rt_rmx.rmx_expire >= time_second) { 668 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, NULL); 669 goto consumed; 670 } 671 672 /* 673 * Check if there is enough space in the interface queue 674 */ 675 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= 676 ifp->if_snd.ifq_maxlen) { 677 ipstat.ips_odropped++; 678 /* would send source quench here but that is depreciated */ 679 goto drop; 680 } 681 682 /* 683 * Check if media link state of interface is not down 684 */ 685 if (ifp->if_link_state == LINK_STATE_DOWN) { 686 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, NULL); 687 goto consumed; 688 } 689 690 /* 691 * Check if packet fits MTU or if hardware will fragement for us 692 */ 693 if (ro.ro_rt->rt_rmx.rmx_mtu) 694 mtu = min(ro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu); 695 else 696 mtu = ifp->if_mtu; 697 698 if (ip->ip_len <= mtu || 699 (ifp->if_hwassist & CSUM_FRAGMENT && (ip->ip_off & IP_DF) == 0)) { 700 /* 701 * Restore packet header fields to original values 702 */ 703 ip->ip_len = htons(ip->ip_len); 704 ip->ip_off = htons(ip->ip_off); 705 /* 706 * Send off the packet via outgoing interface 707 */ 708 error = (*ifp->if_output)(ifp, m, 709 (struct sockaddr *)dst, ro.ro_rt); 710 } else { 711 /* 712 * Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery 713 */ 714 if (ip->ip_off & IP_DF) { 715 ipstat.ips_cantfrag++; 716 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 717 0, ifp); 718 goto consumed; 719 } else { 720 /* 721 * We have to fragement the packet 722 */ 723 m->m_pkthdr.csum_flags |= CSUM_IP; 724 /* 725 * ip_fragment expects ip_len and ip_off in host byte 726 * order but returns all packets in network byte order 727 */ 728 if (ip_fragment(ip, &m, mtu, ifp->if_hwassist, 729 (~ifp->if_hwassist & CSUM_DELAY_IP))) { 730 goto drop; 731 } 732 KASSERT(m != NULL, ("null mbuf and no error")); 733 /* 734 * Send off the fragments via outgoing interface 735 */ 736 error = 0; 737 do { 738 m0 = m->m_nextpkt; 739 m->m_nextpkt = NULL; 740 741 error = (*ifp->if_output)(ifp, m, 742 (struct sockaddr *)dst, ro.ro_rt); 743 if (error) 744 break; 745 } while ((m = m0) != NULL); 746 if (error) { 747 /* Reclaim remaining fragments */ 748 for (; m; m = m0) { 749 m0 = m->m_nextpkt; 750 m->m_nextpkt = NULL; 751 m_freem(m); 752 } 753 } else 754 ipstat.ips_fragmented++; 755 } 756 } 757 758 if (error != 0) 759 ipstat.ips_odropped++; 760 else { 761 ro.ro_rt->rt_rmx.rmx_pksent++; 762 ipstat.ips_forward++; 763 ipstat.ips_fastforward++; 764 } 765 consumed: 766 RTFREE(ro.ro_rt); 767 return 1; 768 drop: 769 if (m) 770 m_freem(m); 771 if (ro.ro_rt) 772 RTFREE(ro.ro_rt); 773 return 1; 774 } 775