1 /*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_inet.h" 36 #include "opt_ratelimit.h" 37 #include "opt_ipsec.h" 38 #include "opt_mbuf_stress_test.h" 39 #include "opt_mpath.h" 40 #include "opt_route.h" 41 #include "opt_sctp.h" 42 #include "opt_rss.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/malloc.h> 49 #include <sys/mbuf.h> 50 #include <sys/priv.h> 51 #include <sys/proc.h> 52 #include <sys/protosw.h> 53 #include <sys/rmlock.h> 54 #include <sys/sdt.h> 55 #include <sys/socket.h> 56 #include <sys/socketvar.h> 57 #include <sys/sysctl.h> 58 #include <sys/ucred.h> 59 60 #include <net/if.h> 61 #include <net/if_var.h> 62 #include <net/if_llatbl.h> 63 #include <net/netisr.h> 64 #include <net/pfil.h> 65 #include <net/route.h> 66 #include <net/flowtable.h> 67 #ifdef RADIX_MPATH 68 #include <net/radix_mpath.h> 69 #endif 70 #include <net/rss_config.h> 71 #include <net/vnet.h> 72 73 #include <netinet/in.h> 74 #include <netinet/in_kdtrace.h> 75 #include <netinet/in_systm.h> 76 #include <netinet/ip.h> 77 #include <netinet/in_pcb.h> 78 #include <netinet/in_rss.h> 79 #include <netinet/in_var.h> 80 #include <netinet/ip_var.h> 81 #include <netinet/ip_options.h> 82 #ifdef SCTP 83 #include <netinet/sctp.h> 84 #include <netinet/sctp_crc32.h> 85 #endif 86 87 #include <netipsec/ipsec_support.h> 88 89 #include <machine/in_cksum.h> 90 91 #include <security/mac/mac_framework.h> 92 93 #ifdef MBUF_STRESS_TEST 94 static int mbuf_frag_size = 0; 95 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, 96 &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); 97 #endif 98 99 static void ip_mloopback(struct ifnet *, const struct mbuf *, int); 100 101 102 extern int in_mcast_loop; 103 extern struct protosw inetsw[]; 104 105 static inline int 106 ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, struct inpcb *inp, 107 struct sockaddr_in *dst, int *fibnum, int *error) 108 { 109 struct m_tag *fwd_tag = NULL; 110 struct mbuf *m; 111 struct in_addr odst; 112 struct ip *ip; 113 114 m = *mp; 115 ip = mtod(m, struct ip *); 116 117 /* Run through list of hooks for output packets. */ 118 odst.s_addr = ip->ip_dst.s_addr; 119 *error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp, PFIL_OUT, inp); 120 m = *mp; 121 if ((*error) != 0 || m == NULL) 122 return 1; /* Finished */ 123 124 ip = mtod(m, struct ip *); 125 126 /* See if destination IP address was changed by packet filter. */ 127 if (odst.s_addr != ip->ip_dst.s_addr) { 128 m->m_flags |= M_SKIP_FIREWALL; 129 /* If destination is now ourself drop to ip_input(). */ 130 if (in_localip(ip->ip_dst)) { 131 m->m_flags |= M_FASTFWD_OURS; 132 if (m->m_pkthdr.rcvif == NULL) 133 m->m_pkthdr.rcvif = V_loif; 134 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 135 m->m_pkthdr.csum_flags |= 136 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 137 m->m_pkthdr.csum_data = 0xffff; 138 } 139 m->m_pkthdr.csum_flags |= 140 CSUM_IP_CHECKED | CSUM_IP_VALID; 141 #ifdef SCTP 142 if (m->m_pkthdr.csum_flags & CSUM_SCTP) 143 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 144 #endif 145 *error = netisr_queue(NETISR_IP, m); 146 return 1; /* Finished */ 147 } 148 149 bzero(dst, sizeof(*dst)); 150 dst->sin_family = AF_INET; 151 dst->sin_len = sizeof(*dst); 152 dst->sin_addr = ip->ip_dst; 153 154 return -1; /* Reloop */ 155 } 156 /* See if fib was changed by packet filter. */ 157 if ((*fibnum) != M_GETFIB(m)) { 158 m->m_flags |= M_SKIP_FIREWALL; 159 *fibnum = M_GETFIB(m); 160 return -1; /* Reloop for FIB change */ 161 } 162 163 /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */ 164 if (m->m_flags & M_FASTFWD_OURS) { 165 if (m->m_pkthdr.rcvif == NULL) 166 m->m_pkthdr.rcvif = V_loif; 167 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 168 m->m_pkthdr.csum_flags |= 169 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 170 m->m_pkthdr.csum_data = 0xffff; 171 } 172 #ifdef SCTP 173 if (m->m_pkthdr.csum_flags & CSUM_SCTP) 174 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 175 #endif 176 m->m_pkthdr.csum_flags |= 177 CSUM_IP_CHECKED | CSUM_IP_VALID; 178 179 *error = netisr_queue(NETISR_IP, m); 180 return 1; /* Finished */ 181 } 182 /* Or forward to some other address? */ 183 if ((m->m_flags & M_IP_NEXTHOP) && 184 ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) { 185 bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in)); 186 m->m_flags |= M_SKIP_FIREWALL; 187 m->m_flags &= ~M_IP_NEXTHOP; 188 m_tag_delete(m, fwd_tag); 189 190 return -1; /* Reloop for CHANGE of dst */ 191 } 192 193 return 0; 194 } 195 196 /* 197 * IP output. The packet in mbuf chain m contains a skeletal IP 198 * header (with len, off, ttl, proto, tos, src, dst). 199 * The mbuf chain containing the packet will be freed. 200 * The mbuf opt, if present, will not be freed. 201 * If route ro is present and has ro_rt initialized, route lookup would be 202 * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL, 203 * then result of route lookup is stored in ro->ro_rt. 204 * 205 * In the IP forwarding case, the packet will arrive with options already 206 * inserted, so must have a NULL opt pointer. 207 */ 208 int 209 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, 210 struct ip_moptions *imo, struct inpcb *inp) 211 { 212 struct rm_priotracker in_ifa_tracker; 213 struct ip *ip; 214 struct ifnet *ifp = NULL; /* keep compiler happy */ 215 struct mbuf *m0; 216 int hlen = sizeof (struct ip); 217 int mtu; 218 int error = 0; 219 struct sockaddr_in *dst; 220 const struct sockaddr_in *gw; 221 struct in_ifaddr *ia; 222 int isbroadcast; 223 uint16_t ip_len, ip_off; 224 struct route iproute; 225 struct rtentry *rte; /* cache for ro->ro_rt */ 226 uint32_t fibnum; 227 int have_ia_ref; 228 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 229 int no_route_but_check_spd = 0; 230 #endif 231 M_ASSERTPKTHDR(m); 232 233 if (inp != NULL) { 234 INP_LOCK_ASSERT(inp); 235 M_SETFIB(m, inp->inp_inc.inc_fibnum); 236 if ((flags & IP_NODEFAULTFLOWID) == 0) { 237 m->m_pkthdr.flowid = inp->inp_flowid; 238 M_HASHTYPE_SET(m, inp->inp_flowtype); 239 } 240 } 241 242 if (ro == NULL) { 243 ro = &iproute; 244 bzero(ro, sizeof (*ro)); 245 } 246 247 #ifdef FLOWTABLE 248 if (ro->ro_rt == NULL) 249 (void )flowtable_lookup(AF_INET, m, ro); 250 #endif 251 252 if (opt) { 253 int len = 0; 254 m = ip_insertoptions(m, opt, &len); 255 if (len != 0) 256 hlen = len; /* ip->ip_hl is updated above */ 257 } 258 ip = mtod(m, struct ip *); 259 ip_len = ntohs(ip->ip_len); 260 ip_off = ntohs(ip->ip_off); 261 262 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 263 ip->ip_v = IPVERSION; 264 ip->ip_hl = hlen >> 2; 265 ip_fillid(ip); 266 IPSTAT_INC(ips_localout); 267 } else { 268 /* Header already set, fetch hlen from there */ 269 hlen = ip->ip_hl << 2; 270 } 271 272 /* 273 * dst/gw handling: 274 * 275 * dst can be rewritten but always points to &ro->ro_dst. 276 * gw is readonly but can point either to dst OR rt_gateway, 277 * therefore we need restore gw if we're redoing lookup. 278 */ 279 gw = dst = (struct sockaddr_in *)&ro->ro_dst; 280 fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); 281 rte = ro->ro_rt; 282 if (rte == NULL) { 283 bzero(dst, sizeof(*dst)); 284 dst->sin_family = AF_INET; 285 dst->sin_len = sizeof(*dst); 286 dst->sin_addr = ip->ip_dst; 287 } 288 again: 289 /* 290 * Validate route against routing table additions; 291 * a better/more specific route might have been added. 292 */ 293 if (inp) 294 RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum); 295 /* 296 * If there is a cached route, 297 * check that it is to the same destination 298 * and is still up. If not, free it and try again. 299 * The address family should also be checked in case of sharing the 300 * cache with IPv6. 301 * Also check whether routing cache needs invalidation. 302 */ 303 rte = ro->ro_rt; 304 if (rte && ((rte->rt_flags & RTF_UP) == 0 || 305 rte->rt_ifp == NULL || 306 !RT_LINK_IS_UP(rte->rt_ifp) || 307 dst->sin_family != AF_INET || 308 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 309 RTFREE(rte); 310 rte = ro->ro_rt = (struct rtentry *)NULL; 311 if (ro->ro_lle) 312 LLE_FREE(ro->ro_lle); /* zeros ro_lle */ 313 ro->ro_lle = (struct llentry *)NULL; 314 } 315 ia = NULL; 316 have_ia_ref = 0; 317 /* 318 * If routing to interface only, short circuit routing lookup. 319 * The use of an all-ones broadcast address implies this; an 320 * interface is specified by the broadcast address of an interface, 321 * or the destination address of a ptp interface. 322 */ 323 if (flags & IP_SENDONES) { 324 if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst), 325 M_GETFIB(m)))) == NULL && 326 (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst), 327 M_GETFIB(m)))) == NULL) { 328 IPSTAT_INC(ips_noroute); 329 error = ENETUNREACH; 330 goto bad; 331 } 332 have_ia_ref = 1; 333 ip->ip_dst.s_addr = INADDR_BROADCAST; 334 dst->sin_addr = ip->ip_dst; 335 ifp = ia->ia_ifp; 336 ip->ip_ttl = 1; 337 isbroadcast = 1; 338 } else if (flags & IP_ROUTETOIF) { 339 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst), 340 M_GETFIB(m)))) == NULL && 341 (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0, 342 M_GETFIB(m)))) == NULL) { 343 IPSTAT_INC(ips_noroute); 344 error = ENETUNREACH; 345 goto bad; 346 } 347 have_ia_ref = 1; 348 ifp = ia->ia_ifp; 349 ip->ip_ttl = 1; 350 isbroadcast = ifp->if_flags & IFF_BROADCAST ? 351 in_ifaddr_broadcast(dst->sin_addr, ia) : 0; 352 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 353 imo != NULL && imo->imo_multicast_ifp != NULL) { 354 /* 355 * Bypass the normal routing lookup for multicast 356 * packets if the interface is specified. 357 */ 358 ifp = imo->imo_multicast_ifp; 359 IFP_TO_IA(ifp, ia, &in_ifa_tracker); 360 if (ia) 361 have_ia_ref = 1; 362 isbroadcast = 0; /* fool gcc */ 363 } else { 364 /* 365 * We want to do any cloning requested by the link layer, 366 * as this is probably required in all cases for correct 367 * operation (as it is for ARP). 368 */ 369 if (rte == NULL) { 370 #ifdef RADIX_MPATH 371 rtalloc_mpath_fib(ro, 372 ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), 373 fibnum); 374 #else 375 in_rtalloc_ign(ro, 0, fibnum); 376 #endif 377 rte = ro->ro_rt; 378 } 379 if (rte == NULL || 380 (rte->rt_flags & RTF_UP) == 0 || 381 rte->rt_ifp == NULL || 382 !RT_LINK_IS_UP(rte->rt_ifp)) { 383 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 384 /* 385 * There is no route for this packet, but it is 386 * possible that a matching SPD entry exists. 387 */ 388 no_route_but_check_spd = 1; 389 mtu = 0; /* Silence GCC warning. */ 390 goto sendit; 391 #endif 392 IPSTAT_INC(ips_noroute); 393 error = EHOSTUNREACH; 394 goto bad; 395 } 396 ia = ifatoia(rte->rt_ifa); 397 ifp = rte->rt_ifp; 398 counter_u64_add(rte->rt_pksent, 1); 399 rt_update_ro_flags(ro); 400 if (rte->rt_flags & RTF_GATEWAY) 401 gw = (struct sockaddr_in *)rte->rt_gateway; 402 if (rte->rt_flags & RTF_HOST) 403 isbroadcast = (rte->rt_flags & RTF_BROADCAST); 404 else if (ifp->if_flags & IFF_BROADCAST) 405 isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia); 406 else 407 isbroadcast = 0; 408 } 409 410 /* 411 * Calculate MTU. If we have a route that is up, use that, 412 * otherwise use the interface's MTU. 413 */ 414 if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST))) 415 mtu = rte->rt_mtu; 416 else 417 mtu = ifp->if_mtu; 418 /* Catch a possible divide by zero later. */ 419 KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p", 420 __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp)); 421 422 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 423 m->m_flags |= M_MCAST; 424 /* 425 * IP destination address is multicast. Make sure "gw" 426 * still points to the address in "ro". (It may have been 427 * changed to point to a gateway address, above.) 428 */ 429 gw = dst; 430 /* 431 * See if the caller provided any multicast options 432 */ 433 if (imo != NULL) { 434 ip->ip_ttl = imo->imo_multicast_ttl; 435 if (imo->imo_multicast_vif != -1) 436 ip->ip_src.s_addr = 437 ip_mcast_src ? 438 ip_mcast_src(imo->imo_multicast_vif) : 439 INADDR_ANY; 440 } else 441 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 442 /* 443 * Confirm that the outgoing interface supports multicast. 444 */ 445 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 446 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 447 IPSTAT_INC(ips_noroute); 448 error = ENETUNREACH; 449 goto bad; 450 } 451 } 452 /* 453 * If source address not specified yet, use address 454 * of outgoing interface. 455 */ 456 if (ip->ip_src.s_addr == INADDR_ANY) { 457 /* Interface may have no addresses. */ 458 if (ia != NULL) 459 ip->ip_src = IA_SIN(ia)->sin_addr; 460 } 461 462 if ((imo == NULL && in_mcast_loop) || 463 (imo && imo->imo_multicast_loop)) { 464 /* 465 * Loop back multicast datagram if not expressly 466 * forbidden to do so, even if we are not a member 467 * of the group; ip_input() will filter it later, 468 * thus deferring a hash lookup and mutex acquisition 469 * at the expense of a cheap copy using m_copym(). 470 */ 471 ip_mloopback(ifp, m, hlen); 472 } else { 473 /* 474 * If we are acting as a multicast router, perform 475 * multicast forwarding as if the packet had just 476 * arrived on the interface to which we are about 477 * to send. The multicast forwarding function 478 * recursively calls this function, using the 479 * IP_FORWARDING flag to prevent infinite recursion. 480 * 481 * Multicasts that are looped back by ip_mloopback(), 482 * above, will be forwarded by the ip_input() routine, 483 * if necessary. 484 */ 485 if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) { 486 /* 487 * If rsvp daemon is not running, do not 488 * set ip_moptions. This ensures that the packet 489 * is multicast and not just sent down one link 490 * as prescribed by rsvpd. 491 */ 492 if (!V_rsvp_on) 493 imo = NULL; 494 if (ip_mforward && 495 ip_mforward(ip, ifp, m, imo) != 0) { 496 m_freem(m); 497 goto done; 498 } 499 } 500 } 501 502 /* 503 * Multicasts with a time-to-live of zero may be looped- 504 * back, above, but must not be transmitted on a network. 505 * Also, multicasts addressed to the loopback interface 506 * are not sent -- the above call to ip_mloopback() will 507 * loop back a copy. ip_input() will drop the copy if 508 * this host does not belong to the destination group on 509 * the loopback interface. 510 */ 511 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 512 m_freem(m); 513 goto done; 514 } 515 516 goto sendit; 517 } 518 519 /* 520 * If the source address is not specified yet, use the address 521 * of the outoing interface. 522 */ 523 if (ip->ip_src.s_addr == INADDR_ANY) { 524 /* Interface may have no addresses. */ 525 if (ia != NULL) { 526 ip->ip_src = IA_SIN(ia)->sin_addr; 527 } 528 } 529 530 /* 531 * Look for broadcast address and 532 * verify user is allowed to send 533 * such a packet. 534 */ 535 if (isbroadcast) { 536 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 537 error = EADDRNOTAVAIL; 538 goto bad; 539 } 540 if ((flags & IP_ALLOWBROADCAST) == 0) { 541 error = EACCES; 542 goto bad; 543 } 544 /* don't allow broadcast messages to be fragmented */ 545 if (ip_len > mtu) { 546 error = EMSGSIZE; 547 goto bad; 548 } 549 m->m_flags |= M_BCAST; 550 } else { 551 m->m_flags &= ~M_BCAST; 552 } 553 554 sendit: 555 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 556 if (IPSEC_ENABLED(ipv4)) { 557 if ((error = IPSEC_OUTPUT(ipv4, m, inp)) != 0) { 558 if (error == EINPROGRESS) 559 error = 0; 560 goto done; 561 } 562 } 563 /* 564 * Check if there was a route for this packet; return error if not. 565 */ 566 if (no_route_but_check_spd) { 567 IPSTAT_INC(ips_noroute); 568 error = EHOSTUNREACH; 569 goto bad; 570 } 571 /* Update variables that are affected by ipsec4_output(). */ 572 ip = mtod(m, struct ip *); 573 hlen = ip->ip_hl << 2; 574 #endif /* IPSEC */ 575 576 /* Jump over all PFIL processing if hooks are not active. */ 577 if (PFIL_HOOKED(&V_inet_pfil_hook)) { 578 switch (ip_output_pfil(&m, ifp, inp, dst, &fibnum, &error)) { 579 case 1: /* Finished */ 580 goto done; 581 582 case 0: /* Continue normally */ 583 ip = mtod(m, struct ip *); 584 break; 585 586 case -1: /* Need to try again */ 587 /* Reset everything for a new round */ 588 RO_RTFREE(ro); 589 if (have_ia_ref) 590 ifa_free(&ia->ia_ifa); 591 ro->ro_prepend = NULL; 592 rte = NULL; 593 gw = dst; 594 ip = mtod(m, struct ip *); 595 goto again; 596 597 } 598 } 599 600 /* 127/8 must not appear on wire - RFC1122. */ 601 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 602 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 603 if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 604 IPSTAT_INC(ips_badaddr); 605 error = EADDRNOTAVAIL; 606 goto bad; 607 } 608 } 609 610 m->m_pkthdr.csum_flags |= CSUM_IP; 611 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) { 612 in_delayed_cksum(m); 613 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 614 } 615 #ifdef SCTP 616 if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) { 617 sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); 618 m->m_pkthdr.csum_flags &= ~CSUM_SCTP; 619 } 620 #endif 621 622 /* 623 * If small enough for interface, or the interface will take 624 * care of the fragmentation for us, we can just send directly. 625 */ 626 if (ip_len <= mtu || 627 (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) { 628 ip->ip_sum = 0; 629 if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) { 630 ip->ip_sum = in_cksum(m, hlen); 631 m->m_pkthdr.csum_flags &= ~CSUM_IP; 632 } 633 634 /* 635 * Record statistics for this interface address. 636 * With CSUM_TSO the byte/packet count will be slightly 637 * incorrect because we count the IP+TCP headers only 638 * once instead of for every generated packet. 639 */ 640 if (!(flags & IP_FORWARDING) && ia) { 641 if (m->m_pkthdr.csum_flags & CSUM_TSO) 642 counter_u64_add(ia->ia_ifa.ifa_opackets, 643 m->m_pkthdr.len / m->m_pkthdr.tso_segsz); 644 else 645 counter_u64_add(ia->ia_ifa.ifa_opackets, 1); 646 647 counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); 648 } 649 #ifdef MBUF_STRESS_TEST 650 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) 651 m = m_fragment(m, M_NOWAIT, mbuf_frag_size); 652 #endif 653 /* 654 * Reset layer specific mbuf flags 655 * to avoid confusing lower layers. 656 */ 657 m_clrprotoflags(m); 658 IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL); 659 #ifdef RATELIMIT 660 if (inp != NULL) { 661 if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) 662 in_pcboutput_txrtlmt(inp, ifp, m); 663 /* stamp send tag on mbuf */ 664 m->m_pkthdr.snd_tag = inp->inp_snd_tag; 665 } else { 666 m->m_pkthdr.snd_tag = NULL; 667 } 668 #endif 669 error = (*ifp->if_output)(ifp, m, 670 (const struct sockaddr *)gw, ro); 671 #ifdef RATELIMIT 672 /* check for route change */ 673 if (error == EAGAIN) 674 in_pcboutput_eagain(inp); 675 #endif 676 goto done; 677 } 678 679 /* Balk when DF bit is set or the interface didn't support TSO. */ 680 if ((ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) { 681 error = EMSGSIZE; 682 IPSTAT_INC(ips_cantfrag); 683 goto bad; 684 } 685 686 /* 687 * Too large for interface; fragment if possible. If successful, 688 * on return, m will point to a list of packets to be sent. 689 */ 690 error = ip_fragment(ip, &m, mtu, ifp->if_hwassist); 691 if (error) 692 goto bad; 693 for (; m; m = m0) { 694 m0 = m->m_nextpkt; 695 m->m_nextpkt = 0; 696 if (error == 0) { 697 /* Record statistics for this interface address. */ 698 if (ia != NULL) { 699 counter_u64_add(ia->ia_ifa.ifa_opackets, 1); 700 counter_u64_add(ia->ia_ifa.ifa_obytes, 701 m->m_pkthdr.len); 702 } 703 /* 704 * Reset layer specific mbuf flags 705 * to avoid confusing upper layers. 706 */ 707 m_clrprotoflags(m); 708 709 IP_PROBE(send, NULL, NULL, mtod(m, struct ip *), ifp, 710 mtod(m, struct ip *), NULL); 711 #ifdef RATELIMIT 712 if (inp != NULL) { 713 if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) 714 in_pcboutput_txrtlmt(inp, ifp, m); 715 /* stamp send tag on mbuf */ 716 m->m_pkthdr.snd_tag = inp->inp_snd_tag; 717 } else { 718 m->m_pkthdr.snd_tag = NULL; 719 } 720 #endif 721 error = (*ifp->if_output)(ifp, m, 722 (const struct sockaddr *)gw, ro); 723 #ifdef RATELIMIT 724 /* check for route change */ 725 if (error == EAGAIN) 726 in_pcboutput_eagain(inp); 727 #endif 728 } else 729 m_freem(m); 730 } 731 732 if (error == 0) 733 IPSTAT_INC(ips_fragmented); 734 735 done: 736 if (ro == &iproute) 737 RO_RTFREE(ro); 738 else if (rte == NULL) 739 /* 740 * If the caller supplied a route but somehow the reference 741 * to it has been released need to prevent the caller 742 * calling RTFREE on it again. 743 */ 744 ro->ro_rt = NULL; 745 if (have_ia_ref) 746 ifa_free(&ia->ia_ifa); 747 return (error); 748 bad: 749 m_freem(m); 750 goto done; 751 } 752 753 /* 754 * Create a chain of fragments which fit the given mtu. m_frag points to the 755 * mbuf to be fragmented; on return it points to the chain with the fragments. 756 * Return 0 if no error. If error, m_frag may contain a partially built 757 * chain of fragments that should be freed by the caller. 758 * 759 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) 760 */ 761 int 762 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, 763 u_long if_hwassist_flags) 764 { 765 int error = 0; 766 int hlen = ip->ip_hl << 2; 767 int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ 768 int off; 769 struct mbuf *m0 = *m_frag; /* the original packet */ 770 int firstlen; 771 struct mbuf **mnext; 772 int nfrags; 773 uint16_t ip_len, ip_off; 774 775 ip_len = ntohs(ip->ip_len); 776 ip_off = ntohs(ip->ip_off); 777 778 if (ip_off & IP_DF) { /* Fragmentation not allowed */ 779 IPSTAT_INC(ips_cantfrag); 780 return EMSGSIZE; 781 } 782 783 /* 784 * Must be able to put at least 8 bytes per fragment. 785 */ 786 if (len < 8) 787 return EMSGSIZE; 788 789 /* 790 * If the interface will not calculate checksums on 791 * fragmented packets, then do it here. 792 */ 793 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 794 in_delayed_cksum(m0); 795 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 796 } 797 #ifdef SCTP 798 if (m0->m_pkthdr.csum_flags & CSUM_SCTP) { 799 sctp_delayed_cksum(m0, hlen); 800 m0->m_pkthdr.csum_flags &= ~CSUM_SCTP; 801 } 802 #endif 803 if (len > PAGE_SIZE) { 804 /* 805 * Fragment large datagrams such that each segment 806 * contains a multiple of PAGE_SIZE amount of data, 807 * plus headers. This enables a receiver to perform 808 * page-flipping zero-copy optimizations. 809 * 810 * XXX When does this help given that sender and receiver 811 * could have different page sizes, and also mtu could 812 * be less than the receiver's page size ? 813 */ 814 int newlen; 815 816 off = MIN(mtu, m0->m_pkthdr.len); 817 818 /* 819 * firstlen (off - hlen) must be aligned on an 820 * 8-byte boundary 821 */ 822 if (off < hlen) 823 goto smart_frag_failure; 824 off = ((off - hlen) & ~7) + hlen; 825 newlen = (~PAGE_MASK) & mtu; 826 if ((newlen + sizeof (struct ip)) > mtu) { 827 /* we failed, go back the default */ 828 smart_frag_failure: 829 newlen = len; 830 off = hlen + len; 831 } 832 len = newlen; 833 834 } else { 835 off = hlen + len; 836 } 837 838 firstlen = off - hlen; 839 mnext = &m0->m_nextpkt; /* pointer to next packet */ 840 841 /* 842 * Loop through length of segment after first fragment, 843 * make new header and copy data of each part and link onto chain. 844 * Here, m0 is the original packet, m is the fragment being created. 845 * The fragments are linked off the m_nextpkt of the original 846 * packet, which after processing serves as the first fragment. 847 */ 848 for (nfrags = 1; off < ip_len; off += len, nfrags++) { 849 struct ip *mhip; /* ip header on the fragment */ 850 struct mbuf *m; 851 int mhlen = sizeof (struct ip); 852 853 m = m_gethdr(M_NOWAIT, MT_DATA); 854 if (m == NULL) { 855 error = ENOBUFS; 856 IPSTAT_INC(ips_odropped); 857 goto done; 858 } 859 /* 860 * Make sure the complete packet header gets copied 861 * from the originating mbuf to the newly created 862 * mbuf. This also ensures that existing firewall 863 * classification(s), VLAN tags and so on get copied 864 * to the resulting fragmented packet(s): 865 */ 866 if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) { 867 m_free(m); 868 error = ENOBUFS; 869 IPSTAT_INC(ips_odropped); 870 goto done; 871 } 872 /* 873 * In the first mbuf, leave room for the link header, then 874 * copy the original IP header including options. The payload 875 * goes into an additional mbuf chain returned by m_copym(). 876 */ 877 m->m_data += max_linkhdr; 878 mhip = mtod(m, struct ip *); 879 *mhip = *ip; 880 if (hlen > sizeof (struct ip)) { 881 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 882 mhip->ip_v = IPVERSION; 883 mhip->ip_hl = mhlen >> 2; 884 } 885 m->m_len = mhlen; 886 /* XXX do we need to add ip_off below ? */ 887 mhip->ip_off = ((off - hlen) >> 3) + ip_off; 888 if (off + len >= ip_len) 889 len = ip_len - off; 890 else 891 mhip->ip_off |= IP_MF; 892 mhip->ip_len = htons((u_short)(len + mhlen)); 893 m->m_next = m_copym(m0, off, len, M_NOWAIT); 894 if (m->m_next == NULL) { /* copy failed */ 895 m_free(m); 896 error = ENOBUFS; /* ??? */ 897 IPSTAT_INC(ips_odropped); 898 goto done; 899 } 900 m->m_pkthdr.len = mhlen + len; 901 #ifdef MAC 902 mac_netinet_fragment(m0, m); 903 #endif 904 mhip->ip_off = htons(mhip->ip_off); 905 mhip->ip_sum = 0; 906 if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) { 907 mhip->ip_sum = in_cksum(m, mhlen); 908 m->m_pkthdr.csum_flags &= ~CSUM_IP; 909 } 910 *mnext = m; 911 mnext = &m->m_nextpkt; 912 } 913 IPSTAT_ADD(ips_ofragments, nfrags); 914 915 /* 916 * Update first fragment by trimming what's been copied out 917 * and updating header. 918 */ 919 m_adj(m0, hlen + firstlen - ip_len); 920 m0->m_pkthdr.len = hlen + firstlen; 921 ip->ip_len = htons((u_short)m0->m_pkthdr.len); 922 ip->ip_off = htons(ip_off | IP_MF); 923 ip->ip_sum = 0; 924 if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) { 925 ip->ip_sum = in_cksum(m0, hlen); 926 m0->m_pkthdr.csum_flags &= ~CSUM_IP; 927 } 928 929 done: 930 *m_frag = m0; 931 return error; 932 } 933 934 void 935 in_delayed_cksum(struct mbuf *m) 936 { 937 struct ip *ip; 938 uint16_t csum, offset, ip_len; 939 940 ip = mtod(m, struct ip *); 941 offset = ip->ip_hl << 2 ; 942 ip_len = ntohs(ip->ip_len); 943 csum = in_cksum_skip(m, ip_len, offset); 944 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 945 csum = 0xffff; 946 offset += m->m_pkthdr.csum_data; /* checksum offset */ 947 948 /* find the mbuf in the chain where the checksum starts*/ 949 while ((m != NULL) && (offset >= m->m_len)) { 950 offset -= m->m_len; 951 m = m->m_next; 952 } 953 KASSERT(m != NULL, ("in_delayed_cksum: checksum outside mbuf chain.")); 954 KASSERT(offset + sizeof(u_short) <= m->m_len, ("in_delayed_cksum: checksum split between mbufs.")); 955 *(u_short *)(m->m_data + offset) = csum; 956 } 957 958 /* 959 * IP socket option processing. 960 */ 961 int 962 ip_ctloutput(struct socket *so, struct sockopt *sopt) 963 { 964 struct inpcb *inp = sotoinpcb(so); 965 int error, optval; 966 #ifdef RSS 967 uint32_t rss_bucket; 968 int retval; 969 #endif 970 971 error = optval = 0; 972 if (sopt->sopt_level != IPPROTO_IP) { 973 error = EINVAL; 974 975 if (sopt->sopt_level == SOL_SOCKET && 976 sopt->sopt_dir == SOPT_SET) { 977 switch (sopt->sopt_name) { 978 case SO_REUSEADDR: 979 INP_WLOCK(inp); 980 if ((so->so_options & SO_REUSEADDR) != 0) 981 inp->inp_flags2 |= INP_REUSEADDR; 982 else 983 inp->inp_flags2 &= ~INP_REUSEADDR; 984 INP_WUNLOCK(inp); 985 error = 0; 986 break; 987 case SO_REUSEPORT: 988 INP_WLOCK(inp); 989 if ((so->so_options & SO_REUSEPORT) != 0) 990 inp->inp_flags2 |= INP_REUSEPORT; 991 else 992 inp->inp_flags2 &= ~INP_REUSEPORT; 993 INP_WUNLOCK(inp); 994 error = 0; 995 break; 996 case SO_SETFIB: 997 INP_WLOCK(inp); 998 inp->inp_inc.inc_fibnum = so->so_fibnum; 999 INP_WUNLOCK(inp); 1000 error = 0; 1001 break; 1002 case SO_MAX_PACING_RATE: 1003 #ifdef RATELIMIT 1004 INP_WLOCK(inp); 1005 inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED; 1006 INP_WUNLOCK(inp); 1007 error = 0; 1008 #else 1009 error = EOPNOTSUPP; 1010 #endif 1011 break; 1012 default: 1013 break; 1014 } 1015 } 1016 return (error); 1017 } 1018 1019 switch (sopt->sopt_dir) { 1020 case SOPT_SET: 1021 switch (sopt->sopt_name) { 1022 case IP_OPTIONS: 1023 #ifdef notyet 1024 case IP_RETOPTS: 1025 #endif 1026 { 1027 struct mbuf *m; 1028 if (sopt->sopt_valsize > MLEN) { 1029 error = EMSGSIZE; 1030 break; 1031 } 1032 m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA); 1033 if (m == NULL) { 1034 error = ENOBUFS; 1035 break; 1036 } 1037 m->m_len = sopt->sopt_valsize; 1038 error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 1039 m->m_len); 1040 if (error) { 1041 m_free(m); 1042 break; 1043 } 1044 INP_WLOCK(inp); 1045 error = ip_pcbopts(inp, sopt->sopt_name, m); 1046 INP_WUNLOCK(inp); 1047 return (error); 1048 } 1049 1050 case IP_BINDANY: 1051 if (sopt->sopt_td != NULL) { 1052 error = priv_check(sopt->sopt_td, 1053 PRIV_NETINET_BINDANY); 1054 if (error) 1055 break; 1056 } 1057 /* FALLTHROUGH */ 1058 case IP_BINDMULTI: 1059 #ifdef RSS 1060 case IP_RSS_LISTEN_BUCKET: 1061 #endif 1062 case IP_TOS: 1063 case IP_TTL: 1064 case IP_MINTTL: 1065 case IP_RECVOPTS: 1066 case IP_RECVRETOPTS: 1067 case IP_ORIGDSTADDR: 1068 case IP_RECVDSTADDR: 1069 case IP_RECVTTL: 1070 case IP_RECVIF: 1071 case IP_ONESBCAST: 1072 case IP_DONTFRAG: 1073 case IP_RECVTOS: 1074 case IP_RECVFLOWID: 1075 #ifdef RSS 1076 case IP_RECVRSSBUCKETID: 1077 #endif 1078 error = sooptcopyin(sopt, &optval, sizeof optval, 1079 sizeof optval); 1080 if (error) 1081 break; 1082 1083 switch (sopt->sopt_name) { 1084 case IP_TOS: 1085 inp->inp_ip_tos = optval; 1086 break; 1087 1088 case IP_TTL: 1089 inp->inp_ip_ttl = optval; 1090 break; 1091 1092 case IP_MINTTL: 1093 if (optval >= 0 && optval <= MAXTTL) 1094 inp->inp_ip_minttl = optval; 1095 else 1096 error = EINVAL; 1097 break; 1098 1099 #define OPTSET(bit) do { \ 1100 INP_WLOCK(inp); \ 1101 if (optval) \ 1102 inp->inp_flags |= bit; \ 1103 else \ 1104 inp->inp_flags &= ~bit; \ 1105 INP_WUNLOCK(inp); \ 1106 } while (0) 1107 1108 #define OPTSET2(bit, val) do { \ 1109 INP_WLOCK(inp); \ 1110 if (val) \ 1111 inp->inp_flags2 |= bit; \ 1112 else \ 1113 inp->inp_flags2 &= ~bit; \ 1114 INP_WUNLOCK(inp); \ 1115 } while (0) 1116 1117 case IP_RECVOPTS: 1118 OPTSET(INP_RECVOPTS); 1119 break; 1120 1121 case IP_RECVRETOPTS: 1122 OPTSET(INP_RECVRETOPTS); 1123 break; 1124 1125 case IP_RECVDSTADDR: 1126 OPTSET(INP_RECVDSTADDR); 1127 break; 1128 1129 case IP_ORIGDSTADDR: 1130 OPTSET2(INP_ORIGDSTADDR, optval); 1131 break; 1132 1133 case IP_RECVTTL: 1134 OPTSET(INP_RECVTTL); 1135 break; 1136 1137 case IP_RECVIF: 1138 OPTSET(INP_RECVIF); 1139 break; 1140 1141 case IP_ONESBCAST: 1142 OPTSET(INP_ONESBCAST); 1143 break; 1144 case IP_DONTFRAG: 1145 OPTSET(INP_DONTFRAG); 1146 break; 1147 case IP_BINDANY: 1148 OPTSET(INP_BINDANY); 1149 break; 1150 case IP_RECVTOS: 1151 OPTSET(INP_RECVTOS); 1152 break; 1153 case IP_BINDMULTI: 1154 OPTSET2(INP_BINDMULTI, optval); 1155 break; 1156 case IP_RECVFLOWID: 1157 OPTSET2(INP_RECVFLOWID, optval); 1158 break; 1159 #ifdef RSS 1160 case IP_RSS_LISTEN_BUCKET: 1161 if ((optval >= 0) && 1162 (optval < rss_getnumbuckets())) { 1163 inp->inp_rss_listen_bucket = optval; 1164 OPTSET2(INP_RSS_BUCKET_SET, 1); 1165 } else { 1166 error = EINVAL; 1167 } 1168 break; 1169 case IP_RECVRSSBUCKETID: 1170 OPTSET2(INP_RECVRSSBUCKETID, optval); 1171 break; 1172 #endif 1173 } 1174 break; 1175 #undef OPTSET 1176 #undef OPTSET2 1177 1178 /* 1179 * Multicast socket options are processed by the in_mcast 1180 * module. 1181 */ 1182 case IP_MULTICAST_IF: 1183 case IP_MULTICAST_VIF: 1184 case IP_MULTICAST_TTL: 1185 case IP_MULTICAST_LOOP: 1186 case IP_ADD_MEMBERSHIP: 1187 case IP_DROP_MEMBERSHIP: 1188 case IP_ADD_SOURCE_MEMBERSHIP: 1189 case IP_DROP_SOURCE_MEMBERSHIP: 1190 case IP_BLOCK_SOURCE: 1191 case IP_UNBLOCK_SOURCE: 1192 case IP_MSFILTER: 1193 case MCAST_JOIN_GROUP: 1194 case MCAST_LEAVE_GROUP: 1195 case MCAST_JOIN_SOURCE_GROUP: 1196 case MCAST_LEAVE_SOURCE_GROUP: 1197 case MCAST_BLOCK_SOURCE: 1198 case MCAST_UNBLOCK_SOURCE: 1199 error = inp_setmoptions(inp, sopt); 1200 break; 1201 1202 case IP_PORTRANGE: 1203 error = sooptcopyin(sopt, &optval, sizeof optval, 1204 sizeof optval); 1205 if (error) 1206 break; 1207 1208 INP_WLOCK(inp); 1209 switch (optval) { 1210 case IP_PORTRANGE_DEFAULT: 1211 inp->inp_flags &= ~(INP_LOWPORT); 1212 inp->inp_flags &= ~(INP_HIGHPORT); 1213 break; 1214 1215 case IP_PORTRANGE_HIGH: 1216 inp->inp_flags &= ~(INP_LOWPORT); 1217 inp->inp_flags |= INP_HIGHPORT; 1218 break; 1219 1220 case IP_PORTRANGE_LOW: 1221 inp->inp_flags &= ~(INP_HIGHPORT); 1222 inp->inp_flags |= INP_LOWPORT; 1223 break; 1224 1225 default: 1226 error = EINVAL; 1227 break; 1228 } 1229 INP_WUNLOCK(inp); 1230 break; 1231 1232 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 1233 case IP_IPSEC_POLICY: 1234 if (IPSEC_ENABLED(ipv4)) { 1235 error = IPSEC_PCBCTL(ipv4, inp, sopt); 1236 break; 1237 } 1238 /* FALLTHROUGH */ 1239 #endif /* IPSEC */ 1240 1241 default: 1242 error = ENOPROTOOPT; 1243 break; 1244 } 1245 break; 1246 1247 case SOPT_GET: 1248 switch (sopt->sopt_name) { 1249 case IP_OPTIONS: 1250 case IP_RETOPTS: 1251 if (inp->inp_options) 1252 error = sooptcopyout(sopt, 1253 mtod(inp->inp_options, 1254 char *), 1255 inp->inp_options->m_len); 1256 else 1257 sopt->sopt_valsize = 0; 1258 break; 1259 1260 case IP_TOS: 1261 case IP_TTL: 1262 case IP_MINTTL: 1263 case IP_RECVOPTS: 1264 case IP_RECVRETOPTS: 1265 case IP_ORIGDSTADDR: 1266 case IP_RECVDSTADDR: 1267 case IP_RECVTTL: 1268 case IP_RECVIF: 1269 case IP_PORTRANGE: 1270 case IP_ONESBCAST: 1271 case IP_DONTFRAG: 1272 case IP_BINDANY: 1273 case IP_RECVTOS: 1274 case IP_BINDMULTI: 1275 case IP_FLOWID: 1276 case IP_FLOWTYPE: 1277 case IP_RECVFLOWID: 1278 #ifdef RSS 1279 case IP_RSSBUCKETID: 1280 case IP_RECVRSSBUCKETID: 1281 #endif 1282 switch (sopt->sopt_name) { 1283 1284 case IP_TOS: 1285 optval = inp->inp_ip_tos; 1286 break; 1287 1288 case IP_TTL: 1289 optval = inp->inp_ip_ttl; 1290 break; 1291 1292 case IP_MINTTL: 1293 optval = inp->inp_ip_minttl; 1294 break; 1295 1296 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1297 #define OPTBIT2(bit) (inp->inp_flags2 & bit ? 1 : 0) 1298 1299 case IP_RECVOPTS: 1300 optval = OPTBIT(INP_RECVOPTS); 1301 break; 1302 1303 case IP_RECVRETOPTS: 1304 optval = OPTBIT(INP_RECVRETOPTS); 1305 break; 1306 1307 case IP_RECVDSTADDR: 1308 optval = OPTBIT(INP_RECVDSTADDR); 1309 break; 1310 1311 case IP_ORIGDSTADDR: 1312 optval = OPTBIT2(INP_ORIGDSTADDR); 1313 break; 1314 1315 case IP_RECVTTL: 1316 optval = OPTBIT(INP_RECVTTL); 1317 break; 1318 1319 case IP_RECVIF: 1320 optval = OPTBIT(INP_RECVIF); 1321 break; 1322 1323 case IP_PORTRANGE: 1324 if (inp->inp_flags & INP_HIGHPORT) 1325 optval = IP_PORTRANGE_HIGH; 1326 else if (inp->inp_flags & INP_LOWPORT) 1327 optval = IP_PORTRANGE_LOW; 1328 else 1329 optval = 0; 1330 break; 1331 1332 case IP_ONESBCAST: 1333 optval = OPTBIT(INP_ONESBCAST); 1334 break; 1335 case IP_DONTFRAG: 1336 optval = OPTBIT(INP_DONTFRAG); 1337 break; 1338 case IP_BINDANY: 1339 optval = OPTBIT(INP_BINDANY); 1340 break; 1341 case IP_RECVTOS: 1342 optval = OPTBIT(INP_RECVTOS); 1343 break; 1344 case IP_FLOWID: 1345 optval = inp->inp_flowid; 1346 break; 1347 case IP_FLOWTYPE: 1348 optval = inp->inp_flowtype; 1349 break; 1350 case IP_RECVFLOWID: 1351 optval = OPTBIT2(INP_RECVFLOWID); 1352 break; 1353 #ifdef RSS 1354 case IP_RSSBUCKETID: 1355 retval = rss_hash2bucket(inp->inp_flowid, 1356 inp->inp_flowtype, 1357 &rss_bucket); 1358 if (retval == 0) 1359 optval = rss_bucket; 1360 else 1361 error = EINVAL; 1362 break; 1363 case IP_RECVRSSBUCKETID: 1364 optval = OPTBIT2(INP_RECVRSSBUCKETID); 1365 break; 1366 #endif 1367 case IP_BINDMULTI: 1368 optval = OPTBIT2(INP_BINDMULTI); 1369 break; 1370 } 1371 error = sooptcopyout(sopt, &optval, sizeof optval); 1372 break; 1373 1374 /* 1375 * Multicast socket options are processed by the in_mcast 1376 * module. 1377 */ 1378 case IP_MULTICAST_IF: 1379 case IP_MULTICAST_VIF: 1380 case IP_MULTICAST_TTL: 1381 case IP_MULTICAST_LOOP: 1382 case IP_MSFILTER: 1383 error = inp_getmoptions(inp, sopt); 1384 break; 1385 1386 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 1387 case IP_IPSEC_POLICY: 1388 if (IPSEC_ENABLED(ipv4)) { 1389 error = IPSEC_PCBCTL(ipv4, inp, sopt); 1390 break; 1391 } 1392 /* FALLTHROUGH */ 1393 #endif /* IPSEC */ 1394 1395 default: 1396 error = ENOPROTOOPT; 1397 break; 1398 } 1399 break; 1400 } 1401 return (error); 1402 } 1403 1404 /* 1405 * Routine called from ip_output() to loop back a copy of an IP multicast 1406 * packet to the input queue of a specified interface. Note that this 1407 * calls the output routine of the loopback "driver", but with an interface 1408 * pointer that might NOT be a loopback interface -- evil, but easier than 1409 * replicating that code here. 1410 */ 1411 static void 1412 ip_mloopback(struct ifnet *ifp, const struct mbuf *m, int hlen) 1413 { 1414 struct ip *ip; 1415 struct mbuf *copym; 1416 1417 /* 1418 * Make a deep copy of the packet because we're going to 1419 * modify the pack in order to generate checksums. 1420 */ 1421 copym = m_dup(m, M_NOWAIT); 1422 if (copym != NULL && (!M_WRITABLE(copym) || copym->m_len < hlen)) 1423 copym = m_pullup(copym, hlen); 1424 if (copym != NULL) { 1425 /* If needed, compute the checksum and mark it as valid. */ 1426 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1427 in_delayed_cksum(copym); 1428 copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1429 copym->m_pkthdr.csum_flags |= 1430 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1431 copym->m_pkthdr.csum_data = 0xffff; 1432 } 1433 /* 1434 * We don't bother to fragment if the IP length is greater 1435 * than the interface's MTU. Can this possibly matter? 1436 */ 1437 ip = mtod(copym, struct ip *); 1438 ip->ip_sum = 0; 1439 ip->ip_sum = in_cksum(copym, hlen); 1440 if_simloop(ifp, copym, AF_INET, 0); 1441 } 1442 } 1443