1 /*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_inet.h" 36 #include "opt_ipfw.h" 37 #include "opt_ipsec.h" 38 #include "opt_mbuf_stress_test.h" 39 #include "opt_mpath.h" 40 #include "opt_route.h" 41 #include "opt_sctp.h" 42 #include "opt_rss.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/malloc.h> 48 #include <sys/mbuf.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/protosw.h> 52 #include <sys/sdt.h> 53 #include <sys/socket.h> 54 #include <sys/socketvar.h> 55 #include <sys/sysctl.h> 56 #include <sys/ucred.h> 57 58 #include <net/if.h> 59 #include <net/if_var.h> 60 #include <net/if_llatbl.h> 61 #include <net/netisr.h> 62 #include <net/pfil.h> 63 #include <net/route.h> 64 #include <net/flowtable.h> 65 #ifdef RADIX_MPATH 66 #include <net/radix_mpath.h> 67 #endif 68 #include <net/rss_config.h> 69 #include <net/vnet.h> 70 71 #include <netinet/in.h> 72 #include <netinet/in_kdtrace.h> 73 #include <netinet/in_systm.h> 74 #include <netinet/ip.h> 75 #include <netinet/in_pcb.h> 76 #include <netinet/in_rss.h> 77 #include <netinet/in_var.h> 78 #include <netinet/ip_var.h> 79 #include <netinet/ip_options.h> 80 #ifdef SCTP 81 #include <netinet/sctp.h> 82 #include <netinet/sctp_crc32.h> 83 #endif 84 85 #ifdef IPSEC 86 #include <netinet/ip_ipsec.h> 87 #include <netipsec/ipsec.h> 88 #endif /* IPSEC*/ 89 90 #include <machine/in_cksum.h> 91 92 #include <security/mac/mac_framework.h> 93 94 #ifdef MBUF_STRESS_TEST 95 static int mbuf_frag_size = 0; 96 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, 97 &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); 98 #endif 99 100 static void ip_mloopback 101 (struct ifnet *, struct mbuf *, struct sockaddr_in *, int); 102 103 104 extern int in_mcast_loop; 105 extern struct protosw inetsw[]; 106 107 /* 108 * IP output. The packet in mbuf chain m contains a skeletal IP 109 * header (with len, off, ttl, proto, tos, src, dst). 110 * The mbuf chain containing the packet will be freed. 111 * The mbuf opt, if present, will not be freed. 112 * If route ro is present and has ro_rt initialized, route lookup would be 113 * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL, 114 * then result of route lookup is stored in ro->ro_rt. 115 * 116 * In the IP forwarding case, the packet will arrive with options already 117 * inserted, so must have a NULL opt pointer. 118 */ 119 int 120 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, 121 struct ip_moptions *imo, struct inpcb *inp) 122 { 123 struct ip *ip; 124 struct ifnet *ifp = NULL; /* keep compiler happy */ 125 struct mbuf *m0; 126 int hlen = sizeof (struct ip); 127 int mtu; 128 int error = 0; 129 struct sockaddr_in *dst; 130 const struct sockaddr_in *gw; 131 struct in_ifaddr *ia; 132 int isbroadcast; 133 uint16_t ip_len, ip_off; 134 struct route iproute; 135 struct rtentry *rte; /* cache for ro->ro_rt */ 136 struct in_addr odst; 137 struct m_tag *fwd_tag = NULL; 138 uint32_t fibnum; 139 int have_ia_ref; 140 int needfiblookup; 141 #ifdef IPSEC 142 int no_route_but_check_spd = 0; 143 #endif 144 M_ASSERTPKTHDR(m); 145 146 if (inp != NULL) { 147 INP_LOCK_ASSERT(inp); 148 M_SETFIB(m, inp->inp_inc.inc_fibnum); 149 if ((flags & IP_NODEFAULTFLOWID) == 0) { 150 m->m_pkthdr.flowid = inp->inp_flowid; 151 M_HASHTYPE_SET(m, inp->inp_flowtype); 152 } 153 } 154 155 if (ro == NULL) { 156 ro = &iproute; 157 bzero(ro, sizeof (*ro)); 158 } 159 160 #ifdef FLOWTABLE 161 if (ro->ro_rt == NULL) 162 (void )flowtable_lookup(AF_INET, m, ro); 163 #endif 164 165 if (opt) { 166 int len = 0; 167 m = ip_insertoptions(m, opt, &len); 168 if (len != 0) 169 hlen = len; /* ip->ip_hl is updated above */ 170 } 171 ip = mtod(m, struct ip *); 172 ip_len = ntohs(ip->ip_len); 173 ip_off = ntohs(ip->ip_off); 174 175 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 176 ip->ip_v = IPVERSION; 177 ip->ip_hl = hlen >> 2; 178 ip_fillid(ip); 179 IPSTAT_INC(ips_localout); 180 } else { 181 /* Header already set, fetch hlen from there */ 182 hlen = ip->ip_hl << 2; 183 } 184 185 /* 186 * dst/gw handling: 187 * 188 * dst can be rewritten but always points to &ro->ro_dst. 189 * gw is readonly but can point either to dst OR rt_gateway, 190 * therefore we need restore gw if we're redoing lookup. 191 */ 192 gw = dst = (struct sockaddr_in *)&ro->ro_dst; 193 fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); 194 again: 195 ia = NULL; 196 have_ia_ref = 0; 197 /* 198 * If there is a cached route, check that it is to the same 199 * destination and is still up. If not, free it and try again. 200 * The address family should also be checked in case of sharing 201 * the cache with IPv6. 202 */ 203 rte = ro->ro_rt; 204 if (rte && ((rte->rt_flags & RTF_UP) == 0 || 205 rte->rt_ifp == NULL || 206 !RT_LINK_IS_UP(rte->rt_ifp) || 207 dst->sin_family != AF_INET || 208 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 209 RO_RTFREE(ro); 210 ro->ro_lle = NULL; 211 rte = NULL; 212 gw = dst; 213 } 214 if (rte == NULL && fwd_tag == NULL) { 215 bzero(dst, sizeof(*dst)); 216 dst->sin_family = AF_INET; 217 dst->sin_len = sizeof(*dst); 218 dst->sin_addr = ip->ip_dst; 219 } 220 /* 221 * If routing to interface only, short circuit routing lookup. 222 * The use of an all-ones broadcast address implies this; an 223 * interface is specified by the broadcast address of an interface, 224 * or the destination address of a ptp interface. 225 */ 226 if (flags & IP_SENDONES) { 227 if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst), 228 M_GETFIB(m)))) == NULL && 229 (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst), 230 M_GETFIB(m)))) == NULL) { 231 IPSTAT_INC(ips_noroute); 232 error = ENETUNREACH; 233 goto bad; 234 } 235 have_ia_ref = 1; 236 ip->ip_dst.s_addr = INADDR_BROADCAST; 237 dst->sin_addr = ip->ip_dst; 238 ifp = ia->ia_ifp; 239 ip->ip_ttl = 1; 240 isbroadcast = 1; 241 } else if (flags & IP_ROUTETOIF) { 242 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst), 243 M_GETFIB(m)))) == NULL && 244 (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0, 245 M_GETFIB(m)))) == NULL) { 246 IPSTAT_INC(ips_noroute); 247 error = ENETUNREACH; 248 goto bad; 249 } 250 have_ia_ref = 1; 251 ifp = ia->ia_ifp; 252 ip->ip_ttl = 1; 253 isbroadcast = in_broadcast(dst->sin_addr, ifp); 254 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 255 imo != NULL && imo->imo_multicast_ifp != NULL) { 256 /* 257 * Bypass the normal routing lookup for multicast 258 * packets if the interface is specified. 259 */ 260 ifp = imo->imo_multicast_ifp; 261 IFP_TO_IA(ifp, ia); 262 if (ia) 263 have_ia_ref = 1; 264 isbroadcast = 0; /* fool gcc */ 265 } else { 266 /* 267 * We want to do any cloning requested by the link layer, 268 * as this is probably required in all cases for correct 269 * operation (as it is for ARP). 270 */ 271 if (rte == NULL) { 272 #ifdef RADIX_MPATH 273 rtalloc_mpath_fib(ro, 274 ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), 275 fibnum); 276 #else 277 in_rtalloc_ign(ro, 0, fibnum); 278 #endif 279 rte = ro->ro_rt; 280 } 281 if (rte == NULL || 282 rte->rt_ifp == NULL || 283 !RT_LINK_IS_UP(rte->rt_ifp)) { 284 #ifdef IPSEC 285 /* 286 * There is no route for this packet, but it is 287 * possible that a matching SPD entry exists. 288 */ 289 no_route_but_check_spd = 1; 290 mtu = 0; /* Silence GCC warning. */ 291 goto sendit; 292 #endif 293 IPSTAT_INC(ips_noroute); 294 error = EHOSTUNREACH; 295 goto bad; 296 } 297 ia = ifatoia(rte->rt_ifa); 298 ifp = rte->rt_ifp; 299 counter_u64_add(rte->rt_pksent, 1); 300 if (rte->rt_flags & RTF_GATEWAY) 301 gw = (struct sockaddr_in *)rte->rt_gateway; 302 if (rte->rt_flags & RTF_HOST) 303 isbroadcast = (rte->rt_flags & RTF_BROADCAST); 304 else 305 isbroadcast = in_broadcast(gw->sin_addr, ifp); 306 } 307 /* 308 * Calculate MTU. If we have a route that is up, use that, 309 * otherwise use the interface's MTU. 310 */ 311 if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST))) 312 mtu = rte->rt_mtu; 313 else 314 mtu = ifp->if_mtu; 315 /* Catch a possible divide by zero later. */ 316 KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p", 317 __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp)); 318 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 319 m->m_flags |= M_MCAST; 320 /* 321 * IP destination address is multicast. Make sure "gw" 322 * still points to the address in "ro". (It may have been 323 * changed to point to a gateway address, above.) 324 */ 325 gw = dst; 326 /* 327 * See if the caller provided any multicast options 328 */ 329 if (imo != NULL) { 330 ip->ip_ttl = imo->imo_multicast_ttl; 331 if (imo->imo_multicast_vif != -1) 332 ip->ip_src.s_addr = 333 ip_mcast_src ? 334 ip_mcast_src(imo->imo_multicast_vif) : 335 INADDR_ANY; 336 } else 337 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 338 /* 339 * Confirm that the outgoing interface supports multicast. 340 */ 341 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 342 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 343 IPSTAT_INC(ips_noroute); 344 error = ENETUNREACH; 345 goto bad; 346 } 347 } 348 /* 349 * If source address not specified yet, use address 350 * of outgoing interface. 351 */ 352 if (ip->ip_src.s_addr == INADDR_ANY) { 353 /* Interface may have no addresses. */ 354 if (ia != NULL) 355 ip->ip_src = IA_SIN(ia)->sin_addr; 356 } 357 358 if ((imo == NULL && in_mcast_loop) || 359 (imo && imo->imo_multicast_loop)) { 360 /* 361 * Loop back multicast datagram if not expressly 362 * forbidden to do so, even if we are not a member 363 * of the group; ip_input() will filter it later, 364 * thus deferring a hash lookup and mutex acquisition 365 * at the expense of a cheap copy using m_copym(). 366 */ 367 ip_mloopback(ifp, m, dst, hlen); 368 } else { 369 /* 370 * If we are acting as a multicast router, perform 371 * multicast forwarding as if the packet had just 372 * arrived on the interface to which we are about 373 * to send. The multicast forwarding function 374 * recursively calls this function, using the 375 * IP_FORWARDING flag to prevent infinite recursion. 376 * 377 * Multicasts that are looped back by ip_mloopback(), 378 * above, will be forwarded by the ip_input() routine, 379 * if necessary. 380 */ 381 if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) { 382 /* 383 * If rsvp daemon is not running, do not 384 * set ip_moptions. This ensures that the packet 385 * is multicast and not just sent down one link 386 * as prescribed by rsvpd. 387 */ 388 if (!V_rsvp_on) 389 imo = NULL; 390 if (ip_mforward && 391 ip_mforward(ip, ifp, m, imo) != 0) { 392 m_freem(m); 393 goto done; 394 } 395 } 396 } 397 398 /* 399 * Multicasts with a time-to-live of zero may be looped- 400 * back, above, but must not be transmitted on a network. 401 * Also, multicasts addressed to the loopback interface 402 * are not sent -- the above call to ip_mloopback() will 403 * loop back a copy. ip_input() will drop the copy if 404 * this host does not belong to the destination group on 405 * the loopback interface. 406 */ 407 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 408 m_freem(m); 409 goto done; 410 } 411 412 goto sendit; 413 } 414 415 /* 416 * If the source address is not specified yet, use the address 417 * of the outoing interface. 418 */ 419 if (ip->ip_src.s_addr == INADDR_ANY) { 420 /* Interface may have no addresses. */ 421 if (ia != NULL) { 422 ip->ip_src = IA_SIN(ia)->sin_addr; 423 } 424 } 425 426 /* 427 * Look for broadcast address and 428 * verify user is allowed to send 429 * such a packet. 430 */ 431 if (isbroadcast) { 432 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 433 error = EADDRNOTAVAIL; 434 goto bad; 435 } 436 if ((flags & IP_ALLOWBROADCAST) == 0) { 437 error = EACCES; 438 goto bad; 439 } 440 /* don't allow broadcast messages to be fragmented */ 441 if (ip_len > mtu) { 442 error = EMSGSIZE; 443 goto bad; 444 } 445 m->m_flags |= M_BCAST; 446 } else { 447 m->m_flags &= ~M_BCAST; 448 } 449 450 sendit: 451 #ifdef IPSEC 452 switch(ip_ipsec_output(&m, inp, &error)) { 453 case 1: 454 goto bad; 455 case -1: 456 goto done; 457 case 0: 458 default: 459 break; /* Continue with packet processing. */ 460 } 461 /* 462 * Check if there was a route for this packet; return error if not. 463 */ 464 if (no_route_but_check_spd) { 465 IPSTAT_INC(ips_noroute); 466 error = EHOSTUNREACH; 467 goto bad; 468 } 469 /* Update variables that are affected by ipsec4_output(). */ 470 ip = mtod(m, struct ip *); 471 hlen = ip->ip_hl << 2; 472 #endif /* IPSEC */ 473 474 /* Jump over all PFIL processing if hooks are not active. */ 475 if (!PFIL_HOOKED(&V_inet_pfil_hook)) 476 goto passout; 477 478 /* Run through list of hooks for output packets. */ 479 odst.s_addr = ip->ip_dst.s_addr; 480 error = pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_OUT, inp); 481 if (error != 0 || m == NULL) 482 goto done; 483 484 ip = mtod(m, struct ip *); 485 needfiblookup = 0; 486 487 /* See if destination IP address was changed by packet filter. */ 488 if (odst.s_addr != ip->ip_dst.s_addr) { 489 m->m_flags |= M_SKIP_FIREWALL; 490 /* If destination is now ourself drop to ip_input(). */ 491 if (in_localip(ip->ip_dst)) { 492 m->m_flags |= M_FASTFWD_OURS; 493 if (m->m_pkthdr.rcvif == NULL) 494 m->m_pkthdr.rcvif = V_loif; 495 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 496 m->m_pkthdr.csum_flags |= 497 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 498 m->m_pkthdr.csum_data = 0xffff; 499 } 500 m->m_pkthdr.csum_flags |= 501 CSUM_IP_CHECKED | CSUM_IP_VALID; 502 #ifdef SCTP 503 if (m->m_pkthdr.csum_flags & CSUM_SCTP) 504 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 505 #endif 506 error = netisr_queue(NETISR_IP, m); 507 goto done; 508 } else { 509 if (have_ia_ref) 510 ifa_free(&ia->ia_ifa); 511 needfiblookup = 1; /* Redo the routing table lookup. */ 512 } 513 } 514 /* See if fib was changed by packet filter. */ 515 if (fibnum != M_GETFIB(m)) { 516 m->m_flags |= M_SKIP_FIREWALL; 517 fibnum = M_GETFIB(m); 518 RO_RTFREE(ro); 519 needfiblookup = 1; 520 } 521 if (needfiblookup) 522 goto again; 523 524 /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */ 525 if (m->m_flags & M_FASTFWD_OURS) { 526 if (m->m_pkthdr.rcvif == NULL) 527 m->m_pkthdr.rcvif = V_loif; 528 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 529 m->m_pkthdr.csum_flags |= 530 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 531 m->m_pkthdr.csum_data = 0xffff; 532 } 533 #ifdef SCTP 534 if (m->m_pkthdr.csum_flags & CSUM_SCTP) 535 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 536 #endif 537 m->m_pkthdr.csum_flags |= 538 CSUM_IP_CHECKED | CSUM_IP_VALID; 539 540 error = netisr_queue(NETISR_IP, m); 541 goto done; 542 } 543 /* Or forward to some other address? */ 544 if ((m->m_flags & M_IP_NEXTHOP) && 545 (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { 546 bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in)); 547 m->m_flags |= M_SKIP_FIREWALL; 548 m->m_flags &= ~M_IP_NEXTHOP; 549 m_tag_delete(m, fwd_tag); 550 if (have_ia_ref) 551 ifa_free(&ia->ia_ifa); 552 goto again; 553 } 554 555 passout: 556 /* 127/8 must not appear on wire - RFC1122. */ 557 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 558 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 559 if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 560 IPSTAT_INC(ips_badaddr); 561 error = EADDRNOTAVAIL; 562 goto bad; 563 } 564 } 565 566 m->m_pkthdr.csum_flags |= CSUM_IP; 567 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) { 568 in_delayed_cksum(m); 569 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 570 } 571 #ifdef SCTP 572 if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) { 573 sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); 574 m->m_pkthdr.csum_flags &= ~CSUM_SCTP; 575 } 576 #endif 577 578 /* 579 * If small enough for interface, or the interface will take 580 * care of the fragmentation for us, we can just send directly. 581 */ 582 if (ip_len <= mtu || 583 (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) { 584 ip->ip_sum = 0; 585 if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) { 586 ip->ip_sum = in_cksum(m, hlen); 587 m->m_pkthdr.csum_flags &= ~CSUM_IP; 588 } 589 590 /* 591 * Record statistics for this interface address. 592 * With CSUM_TSO the byte/packet count will be slightly 593 * incorrect because we count the IP+TCP headers only 594 * once instead of for every generated packet. 595 */ 596 if (!(flags & IP_FORWARDING) && ia) { 597 if (m->m_pkthdr.csum_flags & CSUM_TSO) 598 counter_u64_add(ia->ia_ifa.ifa_opackets, 599 m->m_pkthdr.len / m->m_pkthdr.tso_segsz); 600 else 601 counter_u64_add(ia->ia_ifa.ifa_opackets, 1); 602 603 counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); 604 } 605 #ifdef MBUF_STRESS_TEST 606 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) 607 m = m_fragment(m, M_NOWAIT, mbuf_frag_size); 608 #endif 609 /* 610 * Reset layer specific mbuf flags 611 * to avoid confusing lower layers. 612 */ 613 m_clrprotoflags(m); 614 IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL); 615 error = (*ifp->if_output)(ifp, m, 616 (const struct sockaddr *)gw, ro); 617 goto done; 618 } 619 620 /* Balk when DF bit is set or the interface didn't support TSO. */ 621 if ((ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) { 622 error = EMSGSIZE; 623 IPSTAT_INC(ips_cantfrag); 624 goto bad; 625 } 626 627 /* 628 * Too large for interface; fragment if possible. If successful, 629 * on return, m will point to a list of packets to be sent. 630 */ 631 error = ip_fragment(ip, &m, mtu, ifp->if_hwassist); 632 if (error) 633 goto bad; 634 for (; m; m = m0) { 635 m0 = m->m_nextpkt; 636 m->m_nextpkt = 0; 637 if (error == 0) { 638 /* Record statistics for this interface address. */ 639 if (ia != NULL) { 640 counter_u64_add(ia->ia_ifa.ifa_opackets, 1); 641 counter_u64_add(ia->ia_ifa.ifa_obytes, 642 m->m_pkthdr.len); 643 } 644 /* 645 * Reset layer specific mbuf flags 646 * to avoid confusing upper layers. 647 */ 648 m_clrprotoflags(m); 649 650 IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL); 651 error = (*ifp->if_output)(ifp, m, 652 (const struct sockaddr *)gw, ro); 653 } else 654 m_freem(m); 655 } 656 657 if (error == 0) 658 IPSTAT_INC(ips_fragmented); 659 660 done: 661 if (ro == &iproute) 662 RO_RTFREE(ro); 663 if (have_ia_ref) 664 ifa_free(&ia->ia_ifa); 665 return (error); 666 bad: 667 m_freem(m); 668 goto done; 669 } 670 671 /* 672 * Create a chain of fragments which fit the given mtu. m_frag points to the 673 * mbuf to be fragmented; on return it points to the chain with the fragments. 674 * Return 0 if no error. If error, m_frag may contain a partially built 675 * chain of fragments that should be freed by the caller. 676 * 677 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) 678 */ 679 int 680 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, 681 u_long if_hwassist_flags) 682 { 683 int error = 0; 684 int hlen = ip->ip_hl << 2; 685 int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ 686 int off; 687 struct mbuf *m0 = *m_frag; /* the original packet */ 688 int firstlen; 689 struct mbuf **mnext; 690 int nfrags; 691 uint16_t ip_len, ip_off; 692 693 ip_len = ntohs(ip->ip_len); 694 ip_off = ntohs(ip->ip_off); 695 696 if (ip_off & IP_DF) { /* Fragmentation not allowed */ 697 IPSTAT_INC(ips_cantfrag); 698 return EMSGSIZE; 699 } 700 701 /* 702 * Must be able to put at least 8 bytes per fragment. 703 */ 704 if (len < 8) 705 return EMSGSIZE; 706 707 /* 708 * If the interface will not calculate checksums on 709 * fragmented packets, then do it here. 710 */ 711 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 712 in_delayed_cksum(m0); 713 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 714 } 715 #ifdef SCTP 716 if (m0->m_pkthdr.csum_flags & CSUM_SCTP) { 717 sctp_delayed_cksum(m0, hlen); 718 m0->m_pkthdr.csum_flags &= ~CSUM_SCTP; 719 } 720 #endif 721 if (len > PAGE_SIZE) { 722 /* 723 * Fragment large datagrams such that each segment 724 * contains a multiple of PAGE_SIZE amount of data, 725 * plus headers. This enables a receiver to perform 726 * page-flipping zero-copy optimizations. 727 * 728 * XXX When does this help given that sender and receiver 729 * could have different page sizes, and also mtu could 730 * be less than the receiver's page size ? 731 */ 732 int newlen; 733 734 off = MIN(mtu, m0->m_pkthdr.len); 735 736 /* 737 * firstlen (off - hlen) must be aligned on an 738 * 8-byte boundary 739 */ 740 if (off < hlen) 741 goto smart_frag_failure; 742 off = ((off - hlen) & ~7) + hlen; 743 newlen = (~PAGE_MASK) & mtu; 744 if ((newlen + sizeof (struct ip)) > mtu) { 745 /* we failed, go back the default */ 746 smart_frag_failure: 747 newlen = len; 748 off = hlen + len; 749 } 750 len = newlen; 751 752 } else { 753 off = hlen + len; 754 } 755 756 firstlen = off - hlen; 757 mnext = &m0->m_nextpkt; /* pointer to next packet */ 758 759 /* 760 * Loop through length of segment after first fragment, 761 * make new header and copy data of each part and link onto chain. 762 * Here, m0 is the original packet, m is the fragment being created. 763 * The fragments are linked off the m_nextpkt of the original 764 * packet, which after processing serves as the first fragment. 765 */ 766 for (nfrags = 1; off < ip_len; off += len, nfrags++) { 767 struct ip *mhip; /* ip header on the fragment */ 768 struct mbuf *m; 769 int mhlen = sizeof (struct ip); 770 771 m = m_gethdr(M_NOWAIT, MT_DATA); 772 if (m == NULL) { 773 error = ENOBUFS; 774 IPSTAT_INC(ips_odropped); 775 goto done; 776 } 777 /* 778 * Make sure the complete packet header gets copied 779 * from the originating mbuf to the newly created 780 * mbuf. This also ensures that existing firewall 781 * classification(s), VLAN tags and so on get copied 782 * to the resulting fragmented packet(s): 783 */ 784 if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) { 785 m_free(m); 786 error = ENOBUFS; 787 IPSTAT_INC(ips_odropped); 788 goto done; 789 } 790 /* 791 * In the first mbuf, leave room for the link header, then 792 * copy the original IP header including options. The payload 793 * goes into an additional mbuf chain returned by m_copym(). 794 */ 795 m->m_data += max_linkhdr; 796 mhip = mtod(m, struct ip *); 797 *mhip = *ip; 798 if (hlen > sizeof (struct ip)) { 799 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 800 mhip->ip_v = IPVERSION; 801 mhip->ip_hl = mhlen >> 2; 802 } 803 m->m_len = mhlen; 804 /* XXX do we need to add ip_off below ? */ 805 mhip->ip_off = ((off - hlen) >> 3) + ip_off; 806 if (off + len >= ip_len) 807 len = ip_len - off; 808 else 809 mhip->ip_off |= IP_MF; 810 mhip->ip_len = htons((u_short)(len + mhlen)); 811 m->m_next = m_copym(m0, off, len, M_NOWAIT); 812 if (m->m_next == NULL) { /* copy failed */ 813 m_free(m); 814 error = ENOBUFS; /* ??? */ 815 IPSTAT_INC(ips_odropped); 816 goto done; 817 } 818 m->m_pkthdr.len = mhlen + len; 819 #ifdef MAC 820 mac_netinet_fragment(m0, m); 821 #endif 822 mhip->ip_off = htons(mhip->ip_off); 823 mhip->ip_sum = 0; 824 if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) { 825 mhip->ip_sum = in_cksum(m, mhlen); 826 m->m_pkthdr.csum_flags &= ~CSUM_IP; 827 } 828 *mnext = m; 829 mnext = &m->m_nextpkt; 830 } 831 IPSTAT_ADD(ips_ofragments, nfrags); 832 833 /* 834 * Update first fragment by trimming what's been copied out 835 * and updating header. 836 */ 837 m_adj(m0, hlen + firstlen - ip_len); 838 m0->m_pkthdr.len = hlen + firstlen; 839 ip->ip_len = htons((u_short)m0->m_pkthdr.len); 840 ip->ip_off = htons(ip_off | IP_MF); 841 ip->ip_sum = 0; 842 if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) { 843 ip->ip_sum = in_cksum(m0, hlen); 844 m0->m_pkthdr.csum_flags &= ~CSUM_IP; 845 } 846 847 done: 848 *m_frag = m0; 849 return error; 850 } 851 852 void 853 in_delayed_cksum(struct mbuf *m) 854 { 855 struct ip *ip; 856 uint16_t csum, offset, ip_len; 857 858 ip = mtod(m, struct ip *); 859 offset = ip->ip_hl << 2 ; 860 ip_len = ntohs(ip->ip_len); 861 csum = in_cksum_skip(m, ip_len, offset); 862 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 863 csum = 0xffff; 864 offset += m->m_pkthdr.csum_data; /* checksum offset */ 865 866 /* find the mbuf in the chain where the checksum starts*/ 867 while ((m != NULL) && (offset >= m->m_len)) { 868 offset -= m->m_len; 869 m = m->m_next; 870 } 871 KASSERT(m != NULL, ("in_delayed_cksum: checksum outside mbuf chain.")); 872 KASSERT(offset + sizeof(u_short) <= m->m_len, ("in_delayed_cksum: checksum split between mbufs.")); 873 *(u_short *)(m->m_data + offset) = csum; 874 } 875 876 /* 877 * IP socket option processing. 878 */ 879 int 880 ip_ctloutput(struct socket *so, struct sockopt *sopt) 881 { 882 struct inpcb *inp = sotoinpcb(so); 883 int error, optval; 884 #ifdef RSS 885 uint32_t rss_bucket; 886 int retval; 887 #endif 888 889 error = optval = 0; 890 if (sopt->sopt_level != IPPROTO_IP) { 891 error = EINVAL; 892 893 if (sopt->sopt_level == SOL_SOCKET && 894 sopt->sopt_dir == SOPT_SET) { 895 switch (sopt->sopt_name) { 896 case SO_REUSEADDR: 897 INP_WLOCK(inp); 898 if ((so->so_options & SO_REUSEADDR) != 0) 899 inp->inp_flags2 |= INP_REUSEADDR; 900 else 901 inp->inp_flags2 &= ~INP_REUSEADDR; 902 INP_WUNLOCK(inp); 903 error = 0; 904 break; 905 case SO_REUSEPORT: 906 INP_WLOCK(inp); 907 if ((so->so_options & SO_REUSEPORT) != 0) 908 inp->inp_flags2 |= INP_REUSEPORT; 909 else 910 inp->inp_flags2 &= ~INP_REUSEPORT; 911 INP_WUNLOCK(inp); 912 error = 0; 913 break; 914 case SO_SETFIB: 915 INP_WLOCK(inp); 916 inp->inp_inc.inc_fibnum = so->so_fibnum; 917 INP_WUNLOCK(inp); 918 error = 0; 919 break; 920 default: 921 break; 922 } 923 } 924 return (error); 925 } 926 927 switch (sopt->sopt_dir) { 928 case SOPT_SET: 929 switch (sopt->sopt_name) { 930 case IP_OPTIONS: 931 #ifdef notyet 932 case IP_RETOPTS: 933 #endif 934 { 935 struct mbuf *m; 936 if (sopt->sopt_valsize > MLEN) { 937 error = EMSGSIZE; 938 break; 939 } 940 m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA); 941 if (m == NULL) { 942 error = ENOBUFS; 943 break; 944 } 945 m->m_len = sopt->sopt_valsize; 946 error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 947 m->m_len); 948 if (error) { 949 m_free(m); 950 break; 951 } 952 INP_WLOCK(inp); 953 error = ip_pcbopts(inp, sopt->sopt_name, m); 954 INP_WUNLOCK(inp); 955 return (error); 956 } 957 958 case IP_BINDANY: 959 if (sopt->sopt_td != NULL) { 960 error = priv_check(sopt->sopt_td, 961 PRIV_NETINET_BINDANY); 962 if (error) 963 break; 964 } 965 /* FALLTHROUGH */ 966 case IP_BINDMULTI: 967 #ifdef RSS 968 case IP_RSS_LISTEN_BUCKET: 969 #endif 970 case IP_TOS: 971 case IP_TTL: 972 case IP_MINTTL: 973 case IP_RECVOPTS: 974 case IP_RECVRETOPTS: 975 case IP_RECVDSTADDR: 976 case IP_RECVTTL: 977 case IP_RECVIF: 978 case IP_ONESBCAST: 979 case IP_DONTFRAG: 980 case IP_RECVTOS: 981 case IP_RECVFLOWID: 982 #ifdef RSS 983 case IP_RECVRSSBUCKETID: 984 #endif 985 error = sooptcopyin(sopt, &optval, sizeof optval, 986 sizeof optval); 987 if (error) 988 break; 989 990 switch (sopt->sopt_name) { 991 case IP_TOS: 992 inp->inp_ip_tos = optval; 993 break; 994 995 case IP_TTL: 996 inp->inp_ip_ttl = optval; 997 break; 998 999 case IP_MINTTL: 1000 if (optval >= 0 && optval <= MAXTTL) 1001 inp->inp_ip_minttl = optval; 1002 else 1003 error = EINVAL; 1004 break; 1005 1006 #define OPTSET(bit) do { \ 1007 INP_WLOCK(inp); \ 1008 if (optval) \ 1009 inp->inp_flags |= bit; \ 1010 else \ 1011 inp->inp_flags &= ~bit; \ 1012 INP_WUNLOCK(inp); \ 1013 } while (0) 1014 1015 #define OPTSET2(bit, val) do { \ 1016 INP_WLOCK(inp); \ 1017 if (val) \ 1018 inp->inp_flags2 |= bit; \ 1019 else \ 1020 inp->inp_flags2 &= ~bit; \ 1021 INP_WUNLOCK(inp); \ 1022 } while (0) 1023 1024 case IP_RECVOPTS: 1025 OPTSET(INP_RECVOPTS); 1026 break; 1027 1028 case IP_RECVRETOPTS: 1029 OPTSET(INP_RECVRETOPTS); 1030 break; 1031 1032 case IP_RECVDSTADDR: 1033 OPTSET(INP_RECVDSTADDR); 1034 break; 1035 1036 case IP_RECVTTL: 1037 OPTSET(INP_RECVTTL); 1038 break; 1039 1040 case IP_RECVIF: 1041 OPTSET(INP_RECVIF); 1042 break; 1043 1044 case IP_ONESBCAST: 1045 OPTSET(INP_ONESBCAST); 1046 break; 1047 case IP_DONTFRAG: 1048 OPTSET(INP_DONTFRAG); 1049 break; 1050 case IP_BINDANY: 1051 OPTSET(INP_BINDANY); 1052 break; 1053 case IP_RECVTOS: 1054 OPTSET(INP_RECVTOS); 1055 break; 1056 case IP_BINDMULTI: 1057 OPTSET2(INP_BINDMULTI, optval); 1058 break; 1059 case IP_RECVFLOWID: 1060 OPTSET2(INP_RECVFLOWID, optval); 1061 break; 1062 #ifdef RSS 1063 case IP_RSS_LISTEN_BUCKET: 1064 if ((optval >= 0) && 1065 (optval < rss_getnumbuckets())) { 1066 inp->inp_rss_listen_bucket = optval; 1067 OPTSET2(INP_RSS_BUCKET_SET, 1); 1068 } else { 1069 error = EINVAL; 1070 } 1071 break; 1072 case IP_RECVRSSBUCKETID: 1073 OPTSET2(INP_RECVRSSBUCKETID, optval); 1074 break; 1075 #endif 1076 } 1077 break; 1078 #undef OPTSET 1079 #undef OPTSET2 1080 1081 /* 1082 * Multicast socket options are processed by the in_mcast 1083 * module. 1084 */ 1085 case IP_MULTICAST_IF: 1086 case IP_MULTICAST_VIF: 1087 case IP_MULTICAST_TTL: 1088 case IP_MULTICAST_LOOP: 1089 case IP_ADD_MEMBERSHIP: 1090 case IP_DROP_MEMBERSHIP: 1091 case IP_ADD_SOURCE_MEMBERSHIP: 1092 case IP_DROP_SOURCE_MEMBERSHIP: 1093 case IP_BLOCK_SOURCE: 1094 case IP_UNBLOCK_SOURCE: 1095 case IP_MSFILTER: 1096 case MCAST_JOIN_GROUP: 1097 case MCAST_LEAVE_GROUP: 1098 case MCAST_JOIN_SOURCE_GROUP: 1099 case MCAST_LEAVE_SOURCE_GROUP: 1100 case MCAST_BLOCK_SOURCE: 1101 case MCAST_UNBLOCK_SOURCE: 1102 error = inp_setmoptions(inp, sopt); 1103 break; 1104 1105 case IP_PORTRANGE: 1106 error = sooptcopyin(sopt, &optval, sizeof optval, 1107 sizeof optval); 1108 if (error) 1109 break; 1110 1111 INP_WLOCK(inp); 1112 switch (optval) { 1113 case IP_PORTRANGE_DEFAULT: 1114 inp->inp_flags &= ~(INP_LOWPORT); 1115 inp->inp_flags &= ~(INP_HIGHPORT); 1116 break; 1117 1118 case IP_PORTRANGE_HIGH: 1119 inp->inp_flags &= ~(INP_LOWPORT); 1120 inp->inp_flags |= INP_HIGHPORT; 1121 break; 1122 1123 case IP_PORTRANGE_LOW: 1124 inp->inp_flags &= ~(INP_HIGHPORT); 1125 inp->inp_flags |= INP_LOWPORT; 1126 break; 1127 1128 default: 1129 error = EINVAL; 1130 break; 1131 } 1132 INP_WUNLOCK(inp); 1133 break; 1134 1135 #ifdef IPSEC 1136 case IP_IPSEC_POLICY: 1137 { 1138 caddr_t req; 1139 struct mbuf *m; 1140 1141 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 1142 break; 1143 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ 1144 break; 1145 req = mtod(m, caddr_t); 1146 error = ipsec_set_policy(inp, sopt->sopt_name, req, 1147 m->m_len, (sopt->sopt_td != NULL) ? 1148 sopt->sopt_td->td_ucred : NULL); 1149 m_freem(m); 1150 break; 1151 } 1152 #endif /* IPSEC */ 1153 1154 default: 1155 error = ENOPROTOOPT; 1156 break; 1157 } 1158 break; 1159 1160 case SOPT_GET: 1161 switch (sopt->sopt_name) { 1162 case IP_OPTIONS: 1163 case IP_RETOPTS: 1164 if (inp->inp_options) 1165 error = sooptcopyout(sopt, 1166 mtod(inp->inp_options, 1167 char *), 1168 inp->inp_options->m_len); 1169 else 1170 sopt->sopt_valsize = 0; 1171 break; 1172 1173 case IP_TOS: 1174 case IP_TTL: 1175 case IP_MINTTL: 1176 case IP_RECVOPTS: 1177 case IP_RECVRETOPTS: 1178 case IP_RECVDSTADDR: 1179 case IP_RECVTTL: 1180 case IP_RECVIF: 1181 case IP_PORTRANGE: 1182 case IP_ONESBCAST: 1183 case IP_DONTFRAG: 1184 case IP_BINDANY: 1185 case IP_RECVTOS: 1186 case IP_BINDMULTI: 1187 case IP_FLOWID: 1188 case IP_FLOWTYPE: 1189 case IP_RECVFLOWID: 1190 #ifdef RSS 1191 case IP_RSSBUCKETID: 1192 case IP_RECVRSSBUCKETID: 1193 #endif 1194 switch (sopt->sopt_name) { 1195 1196 case IP_TOS: 1197 optval = inp->inp_ip_tos; 1198 break; 1199 1200 case IP_TTL: 1201 optval = inp->inp_ip_ttl; 1202 break; 1203 1204 case IP_MINTTL: 1205 optval = inp->inp_ip_minttl; 1206 break; 1207 1208 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1209 #define OPTBIT2(bit) (inp->inp_flags2 & bit ? 1 : 0) 1210 1211 case IP_RECVOPTS: 1212 optval = OPTBIT(INP_RECVOPTS); 1213 break; 1214 1215 case IP_RECVRETOPTS: 1216 optval = OPTBIT(INP_RECVRETOPTS); 1217 break; 1218 1219 case IP_RECVDSTADDR: 1220 optval = OPTBIT(INP_RECVDSTADDR); 1221 break; 1222 1223 case IP_RECVTTL: 1224 optval = OPTBIT(INP_RECVTTL); 1225 break; 1226 1227 case IP_RECVIF: 1228 optval = OPTBIT(INP_RECVIF); 1229 break; 1230 1231 case IP_PORTRANGE: 1232 if (inp->inp_flags & INP_HIGHPORT) 1233 optval = IP_PORTRANGE_HIGH; 1234 else if (inp->inp_flags & INP_LOWPORT) 1235 optval = IP_PORTRANGE_LOW; 1236 else 1237 optval = 0; 1238 break; 1239 1240 case IP_ONESBCAST: 1241 optval = OPTBIT(INP_ONESBCAST); 1242 break; 1243 case IP_DONTFRAG: 1244 optval = OPTBIT(INP_DONTFRAG); 1245 break; 1246 case IP_BINDANY: 1247 optval = OPTBIT(INP_BINDANY); 1248 break; 1249 case IP_RECVTOS: 1250 optval = OPTBIT(INP_RECVTOS); 1251 break; 1252 case IP_FLOWID: 1253 optval = inp->inp_flowid; 1254 break; 1255 case IP_FLOWTYPE: 1256 optval = inp->inp_flowtype; 1257 break; 1258 case IP_RECVFLOWID: 1259 optval = OPTBIT2(INP_RECVFLOWID); 1260 break; 1261 #ifdef RSS 1262 case IP_RSSBUCKETID: 1263 retval = rss_hash2bucket(inp->inp_flowid, 1264 inp->inp_flowtype, 1265 &rss_bucket); 1266 if (retval == 0) 1267 optval = rss_bucket; 1268 else 1269 error = EINVAL; 1270 break; 1271 case IP_RECVRSSBUCKETID: 1272 optval = OPTBIT2(INP_RECVRSSBUCKETID); 1273 break; 1274 #endif 1275 case IP_BINDMULTI: 1276 optval = OPTBIT2(INP_BINDMULTI); 1277 break; 1278 } 1279 error = sooptcopyout(sopt, &optval, sizeof optval); 1280 break; 1281 1282 /* 1283 * Multicast socket options are processed by the in_mcast 1284 * module. 1285 */ 1286 case IP_MULTICAST_IF: 1287 case IP_MULTICAST_VIF: 1288 case IP_MULTICAST_TTL: 1289 case IP_MULTICAST_LOOP: 1290 case IP_MSFILTER: 1291 error = inp_getmoptions(inp, sopt); 1292 break; 1293 1294 #ifdef IPSEC 1295 case IP_IPSEC_POLICY: 1296 { 1297 struct mbuf *m = NULL; 1298 caddr_t req = NULL; 1299 size_t len = 0; 1300 1301 if (m != 0) { 1302 req = mtod(m, caddr_t); 1303 len = m->m_len; 1304 } 1305 error = ipsec_get_policy(sotoinpcb(so), req, len, &m); 1306 if (error == 0) 1307 error = soopt_mcopyout(sopt, m); /* XXX */ 1308 if (error == 0) 1309 m_freem(m); 1310 break; 1311 } 1312 #endif /* IPSEC */ 1313 1314 default: 1315 error = ENOPROTOOPT; 1316 break; 1317 } 1318 break; 1319 } 1320 return (error); 1321 } 1322 1323 /* 1324 * Routine called from ip_output() to loop back a copy of an IP multicast 1325 * packet to the input queue of a specified interface. Note that this 1326 * calls the output routine of the loopback "driver", but with an interface 1327 * pointer that might NOT be a loopback interface -- evil, but easier than 1328 * replicating that code here. 1329 */ 1330 static void 1331 ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst, 1332 int hlen) 1333 { 1334 register struct ip *ip; 1335 struct mbuf *copym; 1336 1337 /* 1338 * Make a deep copy of the packet because we're going to 1339 * modify the pack in order to generate checksums. 1340 */ 1341 copym = m_dup(m, M_NOWAIT); 1342 if (copym != NULL && (!M_WRITABLE(copym) || copym->m_len < hlen)) 1343 copym = m_pullup(copym, hlen); 1344 if (copym != NULL) { 1345 /* If needed, compute the checksum and mark it as valid. */ 1346 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1347 in_delayed_cksum(copym); 1348 copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1349 copym->m_pkthdr.csum_flags |= 1350 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1351 copym->m_pkthdr.csum_data = 0xffff; 1352 } 1353 /* 1354 * We don't bother to fragment if the IP length is greater 1355 * than the interface's MTU. Can this possibly matter? 1356 */ 1357 ip = mtod(copym, struct ip *); 1358 ip->ip_sum = 0; 1359 ip->ip_sum = in_cksum(copym, hlen); 1360 #if 1 /* XXX */ 1361 if (dst->sin_family != AF_INET) { 1362 printf("ip_mloopback: bad address family %d\n", 1363 dst->sin_family); 1364 dst->sin_family = AF_INET; 1365 } 1366 #endif 1367 if_simloop(ifp, copym, dst->sin_family, 0); 1368 } 1369 } 1370