1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1988, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_inet.h" 38 #include "opt_ratelimit.h" 39 #include "opt_ipsec.h" 40 #include "opt_mbuf_stress_test.h" 41 #include "opt_mpath.h" 42 #include "opt_route.h" 43 #include "opt_sctp.h" 44 #include "opt_rss.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/lock.h> 50 #include <sys/malloc.h> 51 #include <sys/mbuf.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/protosw.h> 55 #include <sys/rmlock.h> 56 #include <sys/sdt.h> 57 #include <sys/socket.h> 58 #include <sys/socketvar.h> 59 #include <sys/sysctl.h> 60 #include <sys/ucred.h> 61 62 #include <net/if.h> 63 #include <net/if_var.h> 64 #include <net/if_llatbl.h> 65 #include <net/netisr.h> 66 #include <net/pfil.h> 67 #include <net/route.h> 68 #ifdef RADIX_MPATH 69 #include <net/radix_mpath.h> 70 #endif 71 #include <net/rss_config.h> 72 #include <net/vnet.h> 73 74 #include <netinet/in.h> 75 #include <netinet/in_kdtrace.h> 76 #include <netinet/in_systm.h> 77 #include <netinet/ip.h> 78 #include <netinet/in_pcb.h> 79 #include <netinet/in_rss.h> 80 #include <netinet/in_var.h> 81 #include <netinet/ip_var.h> 82 #include <netinet/ip_options.h> 83 #ifdef SCTP 84 #include <netinet/sctp.h> 85 #include <netinet/sctp_crc32.h> 86 #endif 87 88 #include <netipsec/ipsec_support.h> 89 90 #include <machine/in_cksum.h> 91 92 #include <security/mac/mac_framework.h> 93 94 #ifdef MBUF_STRESS_TEST 95 static int mbuf_frag_size = 0; 96 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, 97 &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); 98 #endif 99 100 static void ip_mloopback(struct ifnet *, const struct mbuf *, int); 101 102 103 extern int in_mcast_loop; 104 extern struct protosw inetsw[]; 105 106 static inline int 107 ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, struct inpcb *inp, 108 struct sockaddr_in *dst, int *fibnum, int *error) 109 { 110 struct m_tag *fwd_tag = NULL; 111 struct mbuf *m; 112 struct in_addr odst; 113 struct ip *ip; 114 115 m = *mp; 116 ip = mtod(m, struct ip *); 117 118 /* Run through list of hooks for output packets. */ 119 odst.s_addr = ip->ip_dst.s_addr; 120 *error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp, PFIL_OUT, 0, inp); 121 m = *mp; 122 if ((*error) != 0 || m == NULL) 123 return 1; /* Finished */ 124 125 ip = mtod(m, struct ip *); 126 127 /* See if destination IP address was changed by packet filter. */ 128 if (odst.s_addr != ip->ip_dst.s_addr) { 129 m->m_flags |= M_SKIP_FIREWALL; 130 /* If destination is now ourself drop to ip_input(). */ 131 if (in_localip(ip->ip_dst)) { 132 m->m_flags |= M_FASTFWD_OURS; 133 if (m->m_pkthdr.rcvif == NULL) 134 m->m_pkthdr.rcvif = V_loif; 135 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 136 m->m_pkthdr.csum_flags |= 137 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 138 m->m_pkthdr.csum_data = 0xffff; 139 } 140 m->m_pkthdr.csum_flags |= 141 CSUM_IP_CHECKED | CSUM_IP_VALID; 142 #ifdef SCTP 143 if (m->m_pkthdr.csum_flags & CSUM_SCTP) 144 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 145 #endif 146 *error = netisr_queue(NETISR_IP, m); 147 return 1; /* Finished */ 148 } 149 150 bzero(dst, sizeof(*dst)); 151 dst->sin_family = AF_INET; 152 dst->sin_len = sizeof(*dst); 153 dst->sin_addr = ip->ip_dst; 154 155 return -1; /* Reloop */ 156 } 157 /* See if fib was changed by packet filter. */ 158 if ((*fibnum) != M_GETFIB(m)) { 159 m->m_flags |= M_SKIP_FIREWALL; 160 *fibnum = M_GETFIB(m); 161 return -1; /* Reloop for FIB change */ 162 } 163 164 /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */ 165 if (m->m_flags & M_FASTFWD_OURS) { 166 if (m->m_pkthdr.rcvif == NULL) 167 m->m_pkthdr.rcvif = V_loif; 168 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 169 m->m_pkthdr.csum_flags |= 170 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 171 m->m_pkthdr.csum_data = 0xffff; 172 } 173 #ifdef SCTP 174 if (m->m_pkthdr.csum_flags & CSUM_SCTP) 175 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 176 #endif 177 m->m_pkthdr.csum_flags |= 178 CSUM_IP_CHECKED | CSUM_IP_VALID; 179 180 *error = netisr_queue(NETISR_IP, m); 181 return 1; /* Finished */ 182 } 183 /* Or forward to some other address? */ 184 if ((m->m_flags & M_IP_NEXTHOP) && 185 ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) { 186 bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in)); 187 m->m_flags |= M_SKIP_FIREWALL; 188 m->m_flags &= ~M_IP_NEXTHOP; 189 m_tag_delete(m, fwd_tag); 190 191 return -1; /* Reloop for CHANGE of dst */ 192 } 193 194 return 0; 195 } 196 197 /* 198 * IP output. The packet in mbuf chain m contains a skeletal IP 199 * header (with len, off, ttl, proto, tos, src, dst). 200 * The mbuf chain containing the packet will be freed. 201 * The mbuf opt, if present, will not be freed. 202 * If route ro is present and has ro_rt initialized, route lookup would be 203 * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL, 204 * then result of route lookup is stored in ro->ro_rt. 205 * 206 * In the IP forwarding case, the packet will arrive with options already 207 * inserted, so must have a NULL opt pointer. 208 */ 209 int 210 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, 211 struct ip_moptions *imo, struct inpcb *inp) 212 { 213 struct rm_priotracker in_ifa_tracker; 214 struct ip *ip; 215 struct ifnet *ifp = NULL; /* keep compiler happy */ 216 struct mbuf *m0; 217 int hlen = sizeof (struct ip); 218 int mtu; 219 int error = 0; 220 struct sockaddr_in *dst; 221 const struct sockaddr_in *gw; 222 struct in_ifaddr *ia; 223 int isbroadcast; 224 uint16_t ip_len, ip_off; 225 struct route iproute; 226 struct rtentry *rte; /* cache for ro->ro_rt */ 227 uint32_t fibnum; 228 int have_ia_ref; 229 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 230 int no_route_but_check_spd = 0; 231 #endif 232 M_ASSERTPKTHDR(m); 233 234 if (inp != NULL) { 235 INP_LOCK_ASSERT(inp); 236 M_SETFIB(m, inp->inp_inc.inc_fibnum); 237 if ((flags & IP_NODEFAULTFLOWID) == 0) { 238 m->m_pkthdr.flowid = inp->inp_flowid; 239 M_HASHTYPE_SET(m, inp->inp_flowtype); 240 } 241 } 242 243 if (ro == NULL) { 244 ro = &iproute; 245 bzero(ro, sizeof (*ro)); 246 } 247 248 if (opt) { 249 int len = 0; 250 m = ip_insertoptions(m, opt, &len); 251 if (len != 0) 252 hlen = len; /* ip->ip_hl is updated above */ 253 } 254 ip = mtod(m, struct ip *); 255 ip_len = ntohs(ip->ip_len); 256 ip_off = ntohs(ip->ip_off); 257 258 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 259 ip->ip_v = IPVERSION; 260 ip->ip_hl = hlen >> 2; 261 ip_fillid(ip); 262 IPSTAT_INC(ips_localout); 263 } else { 264 /* Header already set, fetch hlen from there */ 265 hlen = ip->ip_hl << 2; 266 } 267 268 /* 269 * dst/gw handling: 270 * 271 * dst can be rewritten but always points to &ro->ro_dst. 272 * gw is readonly but can point either to dst OR rt_gateway, 273 * therefore we need restore gw if we're redoing lookup. 274 */ 275 gw = dst = (struct sockaddr_in *)&ro->ro_dst; 276 fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); 277 rte = ro->ro_rt; 278 if (rte == NULL) { 279 bzero(dst, sizeof(*dst)); 280 dst->sin_family = AF_INET; 281 dst->sin_len = sizeof(*dst); 282 dst->sin_addr = ip->ip_dst; 283 } 284 again: 285 /* 286 * Validate route against routing table additions; 287 * a better/more specific route might have been added. 288 */ 289 if (inp) 290 RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum); 291 /* 292 * If there is a cached route, 293 * check that it is to the same destination 294 * and is still up. If not, free it and try again. 295 * The address family should also be checked in case of sharing the 296 * cache with IPv6. 297 * Also check whether routing cache needs invalidation. 298 */ 299 rte = ro->ro_rt; 300 if (rte && ((rte->rt_flags & RTF_UP) == 0 || 301 rte->rt_ifp == NULL || 302 !RT_LINK_IS_UP(rte->rt_ifp) || 303 dst->sin_family != AF_INET || 304 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 305 RO_INVALIDATE_CACHE(ro); 306 rte = NULL; 307 } 308 ia = NULL; 309 have_ia_ref = 0; 310 /* 311 * If routing to interface only, short circuit routing lookup. 312 * The use of an all-ones broadcast address implies this; an 313 * interface is specified by the broadcast address of an interface, 314 * or the destination address of a ptp interface. 315 */ 316 if (flags & IP_SENDONES) { 317 if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst), 318 M_GETFIB(m)))) == NULL && 319 (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst), 320 M_GETFIB(m)))) == NULL) { 321 IPSTAT_INC(ips_noroute); 322 error = ENETUNREACH; 323 goto bad; 324 } 325 have_ia_ref = 1; 326 ip->ip_dst.s_addr = INADDR_BROADCAST; 327 dst->sin_addr = ip->ip_dst; 328 ifp = ia->ia_ifp; 329 ip->ip_ttl = 1; 330 isbroadcast = 1; 331 } else if (flags & IP_ROUTETOIF) { 332 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst), 333 M_GETFIB(m)))) == NULL && 334 (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0, 335 M_GETFIB(m)))) == NULL) { 336 IPSTAT_INC(ips_noroute); 337 error = ENETUNREACH; 338 goto bad; 339 } 340 have_ia_ref = 1; 341 ifp = ia->ia_ifp; 342 ip->ip_ttl = 1; 343 isbroadcast = ifp->if_flags & IFF_BROADCAST ? 344 in_ifaddr_broadcast(dst->sin_addr, ia) : 0; 345 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 346 imo != NULL && imo->imo_multicast_ifp != NULL) { 347 /* 348 * Bypass the normal routing lookup for multicast 349 * packets if the interface is specified. 350 */ 351 ifp = imo->imo_multicast_ifp; 352 IFP_TO_IA(ifp, ia, &in_ifa_tracker); 353 if (ia) 354 have_ia_ref = 1; 355 isbroadcast = 0; /* fool gcc */ 356 } else { 357 /* 358 * We want to do any cloning requested by the link layer, 359 * as this is probably required in all cases for correct 360 * operation (as it is for ARP). 361 */ 362 if (rte == NULL) { 363 #ifdef RADIX_MPATH 364 rtalloc_mpath_fib(ro, 365 ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), 366 fibnum); 367 #else 368 in_rtalloc_ign(ro, 0, fibnum); 369 #endif 370 rte = ro->ro_rt; 371 } 372 if (rte == NULL || 373 (rte->rt_flags & RTF_UP) == 0 || 374 rte->rt_ifp == NULL || 375 !RT_LINK_IS_UP(rte->rt_ifp)) { 376 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 377 /* 378 * There is no route for this packet, but it is 379 * possible that a matching SPD entry exists. 380 */ 381 no_route_but_check_spd = 1; 382 mtu = 0; /* Silence GCC warning. */ 383 goto sendit; 384 #endif 385 IPSTAT_INC(ips_noroute); 386 error = EHOSTUNREACH; 387 goto bad; 388 } 389 ia = ifatoia(rte->rt_ifa); 390 ifp = rte->rt_ifp; 391 counter_u64_add(rte->rt_pksent, 1); 392 rt_update_ro_flags(ro); 393 if (rte->rt_flags & RTF_GATEWAY) 394 gw = (struct sockaddr_in *)rte->rt_gateway; 395 if (rte->rt_flags & RTF_HOST) 396 isbroadcast = (rte->rt_flags & RTF_BROADCAST); 397 else if (ifp->if_flags & IFF_BROADCAST) 398 isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia); 399 else 400 isbroadcast = 0; 401 } 402 403 /* 404 * Calculate MTU. If we have a route that is up, use that, 405 * otherwise use the interface's MTU. 406 */ 407 if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST))) 408 mtu = rte->rt_mtu; 409 else 410 mtu = ifp->if_mtu; 411 /* Catch a possible divide by zero later. */ 412 KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p", 413 __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp)); 414 415 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 416 m->m_flags |= M_MCAST; 417 /* 418 * IP destination address is multicast. Make sure "gw" 419 * still points to the address in "ro". (It may have been 420 * changed to point to a gateway address, above.) 421 */ 422 gw = dst; 423 /* 424 * See if the caller provided any multicast options 425 */ 426 if (imo != NULL) { 427 ip->ip_ttl = imo->imo_multicast_ttl; 428 if (imo->imo_multicast_vif != -1) 429 ip->ip_src.s_addr = 430 ip_mcast_src ? 431 ip_mcast_src(imo->imo_multicast_vif) : 432 INADDR_ANY; 433 } else 434 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 435 /* 436 * Confirm that the outgoing interface supports multicast. 437 */ 438 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 439 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 440 IPSTAT_INC(ips_noroute); 441 error = ENETUNREACH; 442 goto bad; 443 } 444 } 445 /* 446 * If source address not specified yet, use address 447 * of outgoing interface. 448 */ 449 if (ip->ip_src.s_addr == INADDR_ANY) { 450 /* Interface may have no addresses. */ 451 if (ia != NULL) 452 ip->ip_src = IA_SIN(ia)->sin_addr; 453 } 454 455 if ((imo == NULL && in_mcast_loop) || 456 (imo && imo->imo_multicast_loop)) { 457 /* 458 * Loop back multicast datagram if not expressly 459 * forbidden to do so, even if we are not a member 460 * of the group; ip_input() will filter it later, 461 * thus deferring a hash lookup and mutex acquisition 462 * at the expense of a cheap copy using m_copym(). 463 */ 464 ip_mloopback(ifp, m, hlen); 465 } else { 466 /* 467 * If we are acting as a multicast router, perform 468 * multicast forwarding as if the packet had just 469 * arrived on the interface to which we are about 470 * to send. The multicast forwarding function 471 * recursively calls this function, using the 472 * IP_FORWARDING flag to prevent infinite recursion. 473 * 474 * Multicasts that are looped back by ip_mloopback(), 475 * above, will be forwarded by the ip_input() routine, 476 * if necessary. 477 */ 478 if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) { 479 /* 480 * If rsvp daemon is not running, do not 481 * set ip_moptions. This ensures that the packet 482 * is multicast and not just sent down one link 483 * as prescribed by rsvpd. 484 */ 485 if (!V_rsvp_on) 486 imo = NULL; 487 if (ip_mforward && 488 ip_mforward(ip, ifp, m, imo) != 0) { 489 m_freem(m); 490 goto done; 491 } 492 } 493 } 494 495 /* 496 * Multicasts with a time-to-live of zero may be looped- 497 * back, above, but must not be transmitted on a network. 498 * Also, multicasts addressed to the loopback interface 499 * are not sent -- the above call to ip_mloopback() will 500 * loop back a copy. ip_input() will drop the copy if 501 * this host does not belong to the destination group on 502 * the loopback interface. 503 */ 504 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 505 m_freem(m); 506 goto done; 507 } 508 509 goto sendit; 510 } 511 512 /* 513 * If the source address is not specified yet, use the address 514 * of the outoing interface. 515 */ 516 if (ip->ip_src.s_addr == INADDR_ANY) { 517 /* Interface may have no addresses. */ 518 if (ia != NULL) { 519 ip->ip_src = IA_SIN(ia)->sin_addr; 520 } 521 } 522 523 /* 524 * Look for broadcast address and 525 * verify user is allowed to send 526 * such a packet. 527 */ 528 if (isbroadcast) { 529 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 530 error = EADDRNOTAVAIL; 531 goto bad; 532 } 533 if ((flags & IP_ALLOWBROADCAST) == 0) { 534 error = EACCES; 535 goto bad; 536 } 537 /* don't allow broadcast messages to be fragmented */ 538 if (ip_len > mtu) { 539 error = EMSGSIZE; 540 goto bad; 541 } 542 m->m_flags |= M_BCAST; 543 } else { 544 m->m_flags &= ~M_BCAST; 545 } 546 547 sendit: 548 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 549 if (IPSEC_ENABLED(ipv4)) { 550 if ((error = IPSEC_OUTPUT(ipv4, m, inp)) != 0) { 551 if (error == EINPROGRESS) 552 error = 0; 553 goto done; 554 } 555 } 556 /* 557 * Check if there was a route for this packet; return error if not. 558 */ 559 if (no_route_but_check_spd) { 560 IPSTAT_INC(ips_noroute); 561 error = EHOSTUNREACH; 562 goto bad; 563 } 564 /* Update variables that are affected by ipsec4_output(). */ 565 ip = mtod(m, struct ip *); 566 hlen = ip->ip_hl << 2; 567 #endif /* IPSEC */ 568 569 /* Jump over all PFIL processing if hooks are not active. */ 570 if (PFIL_HOOKED(&V_inet_pfil_hook)) { 571 switch (ip_output_pfil(&m, ifp, inp, dst, &fibnum, &error)) { 572 case 1: /* Finished */ 573 goto done; 574 575 case 0: /* Continue normally */ 576 ip = mtod(m, struct ip *); 577 break; 578 579 case -1: /* Need to try again */ 580 /* Reset everything for a new round */ 581 RO_RTFREE(ro); 582 if (have_ia_ref) 583 ifa_free(&ia->ia_ifa); 584 ro->ro_prepend = NULL; 585 rte = NULL; 586 gw = dst; 587 ip = mtod(m, struct ip *); 588 goto again; 589 590 } 591 } 592 593 /* 127/8 must not appear on wire - RFC1122. */ 594 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 595 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 596 if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 597 IPSTAT_INC(ips_badaddr); 598 error = EADDRNOTAVAIL; 599 goto bad; 600 } 601 } 602 603 m->m_pkthdr.csum_flags |= CSUM_IP; 604 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) { 605 in_delayed_cksum(m); 606 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 607 } 608 #ifdef SCTP 609 if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) { 610 sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); 611 m->m_pkthdr.csum_flags &= ~CSUM_SCTP; 612 } 613 #endif 614 615 /* 616 * If small enough for interface, or the interface will take 617 * care of the fragmentation for us, we can just send directly. 618 */ 619 if (ip_len <= mtu || 620 (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) { 621 ip->ip_sum = 0; 622 if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) { 623 ip->ip_sum = in_cksum(m, hlen); 624 m->m_pkthdr.csum_flags &= ~CSUM_IP; 625 } 626 627 /* 628 * Record statistics for this interface address. 629 * With CSUM_TSO the byte/packet count will be slightly 630 * incorrect because we count the IP+TCP headers only 631 * once instead of for every generated packet. 632 */ 633 if (!(flags & IP_FORWARDING) && ia) { 634 if (m->m_pkthdr.csum_flags & CSUM_TSO) 635 counter_u64_add(ia->ia_ifa.ifa_opackets, 636 m->m_pkthdr.len / m->m_pkthdr.tso_segsz); 637 else 638 counter_u64_add(ia->ia_ifa.ifa_opackets, 1); 639 640 counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); 641 } 642 #ifdef MBUF_STRESS_TEST 643 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) 644 m = m_fragment(m, M_NOWAIT, mbuf_frag_size); 645 #endif 646 /* 647 * Reset layer specific mbuf flags 648 * to avoid confusing lower layers. 649 */ 650 m_clrprotoflags(m); 651 IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL); 652 #ifdef RATELIMIT 653 if (inp != NULL) { 654 if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) 655 in_pcboutput_txrtlmt(inp, ifp, m); 656 /* stamp send tag on mbuf */ 657 m->m_pkthdr.snd_tag = inp->inp_snd_tag; 658 } else { 659 m->m_pkthdr.snd_tag = NULL; 660 } 661 #endif 662 error = (*ifp->if_output)(ifp, m, 663 (const struct sockaddr *)gw, ro); 664 #ifdef RATELIMIT 665 /* check for route change */ 666 if (error == EAGAIN) 667 in_pcboutput_eagain(inp); 668 #endif 669 goto done; 670 } 671 672 /* Balk when DF bit is set or the interface didn't support TSO. */ 673 if ((ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) { 674 error = EMSGSIZE; 675 IPSTAT_INC(ips_cantfrag); 676 goto bad; 677 } 678 679 /* 680 * Too large for interface; fragment if possible. If successful, 681 * on return, m will point to a list of packets to be sent. 682 */ 683 error = ip_fragment(ip, &m, mtu, ifp->if_hwassist); 684 if (error) 685 goto bad; 686 for (; m; m = m0) { 687 m0 = m->m_nextpkt; 688 m->m_nextpkt = 0; 689 if (error == 0) { 690 /* Record statistics for this interface address. */ 691 if (ia != NULL) { 692 counter_u64_add(ia->ia_ifa.ifa_opackets, 1); 693 counter_u64_add(ia->ia_ifa.ifa_obytes, 694 m->m_pkthdr.len); 695 } 696 /* 697 * Reset layer specific mbuf flags 698 * to avoid confusing upper layers. 699 */ 700 m_clrprotoflags(m); 701 702 IP_PROBE(send, NULL, NULL, mtod(m, struct ip *), ifp, 703 mtod(m, struct ip *), NULL); 704 #ifdef RATELIMIT 705 if (inp != NULL) { 706 if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) 707 in_pcboutput_txrtlmt(inp, ifp, m); 708 /* stamp send tag on mbuf */ 709 m->m_pkthdr.snd_tag = inp->inp_snd_tag; 710 } else { 711 m->m_pkthdr.snd_tag = NULL; 712 } 713 #endif 714 error = (*ifp->if_output)(ifp, m, 715 (const struct sockaddr *)gw, ro); 716 #ifdef RATELIMIT 717 /* check for route change */ 718 if (error == EAGAIN) 719 in_pcboutput_eagain(inp); 720 #endif 721 } else 722 m_freem(m); 723 } 724 725 if (error == 0) 726 IPSTAT_INC(ips_fragmented); 727 728 done: 729 if (ro == &iproute) 730 RO_RTFREE(ro); 731 else if (rte == NULL) 732 /* 733 * If the caller supplied a route but somehow the reference 734 * to it has been released need to prevent the caller 735 * calling RTFREE on it again. 736 */ 737 ro->ro_rt = NULL; 738 if (have_ia_ref) 739 ifa_free(&ia->ia_ifa); 740 return (error); 741 bad: 742 m_freem(m); 743 goto done; 744 } 745 746 /* 747 * Create a chain of fragments which fit the given mtu. m_frag points to the 748 * mbuf to be fragmented; on return it points to the chain with the fragments. 749 * Return 0 if no error. If error, m_frag may contain a partially built 750 * chain of fragments that should be freed by the caller. 751 * 752 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) 753 */ 754 int 755 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, 756 u_long if_hwassist_flags) 757 { 758 int error = 0; 759 int hlen = ip->ip_hl << 2; 760 int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ 761 int off; 762 struct mbuf *m0 = *m_frag; /* the original packet */ 763 int firstlen; 764 struct mbuf **mnext; 765 int nfrags; 766 uint16_t ip_len, ip_off; 767 768 ip_len = ntohs(ip->ip_len); 769 ip_off = ntohs(ip->ip_off); 770 771 if (ip_off & IP_DF) { /* Fragmentation not allowed */ 772 IPSTAT_INC(ips_cantfrag); 773 return EMSGSIZE; 774 } 775 776 /* 777 * Must be able to put at least 8 bytes per fragment. 778 */ 779 if (len < 8) 780 return EMSGSIZE; 781 782 /* 783 * If the interface will not calculate checksums on 784 * fragmented packets, then do it here. 785 */ 786 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 787 in_delayed_cksum(m0); 788 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 789 } 790 #ifdef SCTP 791 if (m0->m_pkthdr.csum_flags & CSUM_SCTP) { 792 sctp_delayed_cksum(m0, hlen); 793 m0->m_pkthdr.csum_flags &= ~CSUM_SCTP; 794 } 795 #endif 796 if (len > PAGE_SIZE) { 797 /* 798 * Fragment large datagrams such that each segment 799 * contains a multiple of PAGE_SIZE amount of data, 800 * plus headers. This enables a receiver to perform 801 * page-flipping zero-copy optimizations. 802 * 803 * XXX When does this help given that sender and receiver 804 * could have different page sizes, and also mtu could 805 * be less than the receiver's page size ? 806 */ 807 int newlen; 808 809 off = MIN(mtu, m0->m_pkthdr.len); 810 811 /* 812 * firstlen (off - hlen) must be aligned on an 813 * 8-byte boundary 814 */ 815 if (off < hlen) 816 goto smart_frag_failure; 817 off = ((off - hlen) & ~7) + hlen; 818 newlen = (~PAGE_MASK) & mtu; 819 if ((newlen + sizeof (struct ip)) > mtu) { 820 /* we failed, go back the default */ 821 smart_frag_failure: 822 newlen = len; 823 off = hlen + len; 824 } 825 len = newlen; 826 827 } else { 828 off = hlen + len; 829 } 830 831 firstlen = off - hlen; 832 mnext = &m0->m_nextpkt; /* pointer to next packet */ 833 834 /* 835 * Loop through length of segment after first fragment, 836 * make new header and copy data of each part and link onto chain. 837 * Here, m0 is the original packet, m is the fragment being created. 838 * The fragments are linked off the m_nextpkt of the original 839 * packet, which after processing serves as the first fragment. 840 */ 841 for (nfrags = 1; off < ip_len; off += len, nfrags++) { 842 struct ip *mhip; /* ip header on the fragment */ 843 struct mbuf *m; 844 int mhlen = sizeof (struct ip); 845 846 m = m_gethdr(M_NOWAIT, MT_DATA); 847 if (m == NULL) { 848 error = ENOBUFS; 849 IPSTAT_INC(ips_odropped); 850 goto done; 851 } 852 /* 853 * Make sure the complete packet header gets copied 854 * from the originating mbuf to the newly created 855 * mbuf. This also ensures that existing firewall 856 * classification(s), VLAN tags and so on get copied 857 * to the resulting fragmented packet(s): 858 */ 859 if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) { 860 m_free(m); 861 error = ENOBUFS; 862 IPSTAT_INC(ips_odropped); 863 goto done; 864 } 865 /* 866 * In the first mbuf, leave room for the link header, then 867 * copy the original IP header including options. The payload 868 * goes into an additional mbuf chain returned by m_copym(). 869 */ 870 m->m_data += max_linkhdr; 871 mhip = mtod(m, struct ip *); 872 *mhip = *ip; 873 if (hlen > sizeof (struct ip)) { 874 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 875 mhip->ip_v = IPVERSION; 876 mhip->ip_hl = mhlen >> 2; 877 } 878 m->m_len = mhlen; 879 /* XXX do we need to add ip_off below ? */ 880 mhip->ip_off = ((off - hlen) >> 3) + ip_off; 881 if (off + len >= ip_len) 882 len = ip_len - off; 883 else 884 mhip->ip_off |= IP_MF; 885 mhip->ip_len = htons((u_short)(len + mhlen)); 886 m->m_next = m_copym(m0, off, len, M_NOWAIT); 887 if (m->m_next == NULL) { /* copy failed */ 888 m_free(m); 889 error = ENOBUFS; /* ??? */ 890 IPSTAT_INC(ips_odropped); 891 goto done; 892 } 893 m->m_pkthdr.len = mhlen + len; 894 #ifdef MAC 895 mac_netinet_fragment(m0, m); 896 #endif 897 mhip->ip_off = htons(mhip->ip_off); 898 mhip->ip_sum = 0; 899 if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) { 900 mhip->ip_sum = in_cksum(m, mhlen); 901 m->m_pkthdr.csum_flags &= ~CSUM_IP; 902 } 903 *mnext = m; 904 mnext = &m->m_nextpkt; 905 } 906 IPSTAT_ADD(ips_ofragments, nfrags); 907 908 /* 909 * Update first fragment by trimming what's been copied out 910 * and updating header. 911 */ 912 m_adj(m0, hlen + firstlen - ip_len); 913 m0->m_pkthdr.len = hlen + firstlen; 914 ip->ip_len = htons((u_short)m0->m_pkthdr.len); 915 ip->ip_off = htons(ip_off | IP_MF); 916 ip->ip_sum = 0; 917 if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) { 918 ip->ip_sum = in_cksum(m0, hlen); 919 m0->m_pkthdr.csum_flags &= ~CSUM_IP; 920 } 921 922 done: 923 *m_frag = m0; 924 return error; 925 } 926 927 void 928 in_delayed_cksum(struct mbuf *m) 929 { 930 struct ip *ip; 931 uint16_t csum, offset, ip_len; 932 933 ip = mtod(m, struct ip *); 934 offset = ip->ip_hl << 2 ; 935 ip_len = ntohs(ip->ip_len); 936 csum = in_cksum_skip(m, ip_len, offset); 937 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 938 csum = 0xffff; 939 offset += m->m_pkthdr.csum_data; /* checksum offset */ 940 941 /* find the mbuf in the chain where the checksum starts*/ 942 while ((m != NULL) && (offset >= m->m_len)) { 943 offset -= m->m_len; 944 m = m->m_next; 945 } 946 KASSERT(m != NULL, ("in_delayed_cksum: checksum outside mbuf chain.")); 947 KASSERT(offset + sizeof(u_short) <= m->m_len, ("in_delayed_cksum: checksum split between mbufs.")); 948 *(u_short *)(m->m_data + offset) = csum; 949 } 950 951 /* 952 * IP socket option processing. 953 */ 954 int 955 ip_ctloutput(struct socket *so, struct sockopt *sopt) 956 { 957 struct inpcb *inp = sotoinpcb(so); 958 int error, optval; 959 #ifdef RSS 960 uint32_t rss_bucket; 961 int retval; 962 #endif 963 964 error = optval = 0; 965 if (sopt->sopt_level != IPPROTO_IP) { 966 error = EINVAL; 967 968 if (sopt->sopt_level == SOL_SOCKET && 969 sopt->sopt_dir == SOPT_SET) { 970 switch (sopt->sopt_name) { 971 case SO_REUSEADDR: 972 INP_WLOCK(inp); 973 if ((so->so_options & SO_REUSEADDR) != 0) 974 inp->inp_flags2 |= INP_REUSEADDR; 975 else 976 inp->inp_flags2 &= ~INP_REUSEADDR; 977 INP_WUNLOCK(inp); 978 error = 0; 979 break; 980 case SO_REUSEPORT: 981 INP_WLOCK(inp); 982 if ((so->so_options & SO_REUSEPORT) != 0) 983 inp->inp_flags2 |= INP_REUSEPORT; 984 else 985 inp->inp_flags2 &= ~INP_REUSEPORT; 986 INP_WUNLOCK(inp); 987 error = 0; 988 break; 989 case SO_SETFIB: 990 INP_WLOCK(inp); 991 inp->inp_inc.inc_fibnum = so->so_fibnum; 992 INP_WUNLOCK(inp); 993 error = 0; 994 break; 995 case SO_MAX_PACING_RATE: 996 #ifdef RATELIMIT 997 INP_WLOCK(inp); 998 inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED; 999 INP_WUNLOCK(inp); 1000 error = 0; 1001 #else 1002 error = EOPNOTSUPP; 1003 #endif 1004 break; 1005 default: 1006 break; 1007 } 1008 } 1009 return (error); 1010 } 1011 1012 switch (sopt->sopt_dir) { 1013 case SOPT_SET: 1014 switch (sopt->sopt_name) { 1015 case IP_OPTIONS: 1016 #ifdef notyet 1017 case IP_RETOPTS: 1018 #endif 1019 { 1020 struct mbuf *m; 1021 if (sopt->sopt_valsize > MLEN) { 1022 error = EMSGSIZE; 1023 break; 1024 } 1025 m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA); 1026 if (m == NULL) { 1027 error = ENOBUFS; 1028 break; 1029 } 1030 m->m_len = sopt->sopt_valsize; 1031 error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 1032 m->m_len); 1033 if (error) { 1034 m_free(m); 1035 break; 1036 } 1037 INP_WLOCK(inp); 1038 error = ip_pcbopts(inp, sopt->sopt_name, m); 1039 INP_WUNLOCK(inp); 1040 return (error); 1041 } 1042 1043 case IP_BINDANY: 1044 if (sopt->sopt_td != NULL) { 1045 error = priv_check(sopt->sopt_td, 1046 PRIV_NETINET_BINDANY); 1047 if (error) 1048 break; 1049 } 1050 /* FALLTHROUGH */ 1051 case IP_BINDMULTI: 1052 #ifdef RSS 1053 case IP_RSS_LISTEN_BUCKET: 1054 #endif 1055 case IP_TOS: 1056 case IP_TTL: 1057 case IP_MINTTL: 1058 case IP_RECVOPTS: 1059 case IP_RECVRETOPTS: 1060 case IP_ORIGDSTADDR: 1061 case IP_RECVDSTADDR: 1062 case IP_RECVTTL: 1063 case IP_RECVIF: 1064 case IP_ONESBCAST: 1065 case IP_DONTFRAG: 1066 case IP_RECVTOS: 1067 case IP_RECVFLOWID: 1068 #ifdef RSS 1069 case IP_RECVRSSBUCKETID: 1070 #endif 1071 error = sooptcopyin(sopt, &optval, sizeof optval, 1072 sizeof optval); 1073 if (error) 1074 break; 1075 1076 switch (sopt->sopt_name) { 1077 case IP_TOS: 1078 inp->inp_ip_tos = optval; 1079 break; 1080 1081 case IP_TTL: 1082 inp->inp_ip_ttl = optval; 1083 break; 1084 1085 case IP_MINTTL: 1086 if (optval >= 0 && optval <= MAXTTL) 1087 inp->inp_ip_minttl = optval; 1088 else 1089 error = EINVAL; 1090 break; 1091 1092 #define OPTSET(bit) do { \ 1093 INP_WLOCK(inp); \ 1094 if (optval) \ 1095 inp->inp_flags |= bit; \ 1096 else \ 1097 inp->inp_flags &= ~bit; \ 1098 INP_WUNLOCK(inp); \ 1099 } while (0) 1100 1101 #define OPTSET2(bit, val) do { \ 1102 INP_WLOCK(inp); \ 1103 if (val) \ 1104 inp->inp_flags2 |= bit; \ 1105 else \ 1106 inp->inp_flags2 &= ~bit; \ 1107 INP_WUNLOCK(inp); \ 1108 } while (0) 1109 1110 case IP_RECVOPTS: 1111 OPTSET(INP_RECVOPTS); 1112 break; 1113 1114 case IP_RECVRETOPTS: 1115 OPTSET(INP_RECVRETOPTS); 1116 break; 1117 1118 case IP_RECVDSTADDR: 1119 OPTSET(INP_RECVDSTADDR); 1120 break; 1121 1122 case IP_ORIGDSTADDR: 1123 OPTSET2(INP_ORIGDSTADDR, optval); 1124 break; 1125 1126 case IP_RECVTTL: 1127 OPTSET(INP_RECVTTL); 1128 break; 1129 1130 case IP_RECVIF: 1131 OPTSET(INP_RECVIF); 1132 break; 1133 1134 case IP_ONESBCAST: 1135 OPTSET(INP_ONESBCAST); 1136 break; 1137 case IP_DONTFRAG: 1138 OPTSET(INP_DONTFRAG); 1139 break; 1140 case IP_BINDANY: 1141 OPTSET(INP_BINDANY); 1142 break; 1143 case IP_RECVTOS: 1144 OPTSET(INP_RECVTOS); 1145 break; 1146 case IP_BINDMULTI: 1147 OPTSET2(INP_BINDMULTI, optval); 1148 break; 1149 case IP_RECVFLOWID: 1150 OPTSET2(INP_RECVFLOWID, optval); 1151 break; 1152 #ifdef RSS 1153 case IP_RSS_LISTEN_BUCKET: 1154 if ((optval >= 0) && 1155 (optval < rss_getnumbuckets())) { 1156 inp->inp_rss_listen_bucket = optval; 1157 OPTSET2(INP_RSS_BUCKET_SET, 1); 1158 } else { 1159 error = EINVAL; 1160 } 1161 break; 1162 case IP_RECVRSSBUCKETID: 1163 OPTSET2(INP_RECVRSSBUCKETID, optval); 1164 break; 1165 #endif 1166 } 1167 break; 1168 #undef OPTSET 1169 #undef OPTSET2 1170 1171 /* 1172 * Multicast socket options are processed by the in_mcast 1173 * module. 1174 */ 1175 case IP_MULTICAST_IF: 1176 case IP_MULTICAST_VIF: 1177 case IP_MULTICAST_TTL: 1178 case IP_MULTICAST_LOOP: 1179 case IP_ADD_MEMBERSHIP: 1180 case IP_DROP_MEMBERSHIP: 1181 case IP_ADD_SOURCE_MEMBERSHIP: 1182 case IP_DROP_SOURCE_MEMBERSHIP: 1183 case IP_BLOCK_SOURCE: 1184 case IP_UNBLOCK_SOURCE: 1185 case IP_MSFILTER: 1186 case MCAST_JOIN_GROUP: 1187 case MCAST_LEAVE_GROUP: 1188 case MCAST_JOIN_SOURCE_GROUP: 1189 case MCAST_LEAVE_SOURCE_GROUP: 1190 case MCAST_BLOCK_SOURCE: 1191 case MCAST_UNBLOCK_SOURCE: 1192 error = inp_setmoptions(inp, sopt); 1193 break; 1194 1195 case IP_PORTRANGE: 1196 error = sooptcopyin(sopt, &optval, sizeof optval, 1197 sizeof optval); 1198 if (error) 1199 break; 1200 1201 INP_WLOCK(inp); 1202 switch (optval) { 1203 case IP_PORTRANGE_DEFAULT: 1204 inp->inp_flags &= ~(INP_LOWPORT); 1205 inp->inp_flags &= ~(INP_HIGHPORT); 1206 break; 1207 1208 case IP_PORTRANGE_HIGH: 1209 inp->inp_flags &= ~(INP_LOWPORT); 1210 inp->inp_flags |= INP_HIGHPORT; 1211 break; 1212 1213 case IP_PORTRANGE_LOW: 1214 inp->inp_flags &= ~(INP_HIGHPORT); 1215 inp->inp_flags |= INP_LOWPORT; 1216 break; 1217 1218 default: 1219 error = EINVAL; 1220 break; 1221 } 1222 INP_WUNLOCK(inp); 1223 break; 1224 1225 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 1226 case IP_IPSEC_POLICY: 1227 if (IPSEC_ENABLED(ipv4)) { 1228 error = IPSEC_PCBCTL(ipv4, inp, sopt); 1229 break; 1230 } 1231 /* FALLTHROUGH */ 1232 #endif /* IPSEC */ 1233 1234 default: 1235 error = ENOPROTOOPT; 1236 break; 1237 } 1238 break; 1239 1240 case SOPT_GET: 1241 switch (sopt->sopt_name) { 1242 case IP_OPTIONS: 1243 case IP_RETOPTS: 1244 if (inp->inp_options) 1245 error = sooptcopyout(sopt, 1246 mtod(inp->inp_options, 1247 char *), 1248 inp->inp_options->m_len); 1249 else 1250 sopt->sopt_valsize = 0; 1251 break; 1252 1253 case IP_TOS: 1254 case IP_TTL: 1255 case IP_MINTTL: 1256 case IP_RECVOPTS: 1257 case IP_RECVRETOPTS: 1258 case IP_ORIGDSTADDR: 1259 case IP_RECVDSTADDR: 1260 case IP_RECVTTL: 1261 case IP_RECVIF: 1262 case IP_PORTRANGE: 1263 case IP_ONESBCAST: 1264 case IP_DONTFRAG: 1265 case IP_BINDANY: 1266 case IP_RECVTOS: 1267 case IP_BINDMULTI: 1268 case IP_FLOWID: 1269 case IP_FLOWTYPE: 1270 case IP_RECVFLOWID: 1271 #ifdef RSS 1272 case IP_RSSBUCKETID: 1273 case IP_RECVRSSBUCKETID: 1274 #endif 1275 switch (sopt->sopt_name) { 1276 1277 case IP_TOS: 1278 optval = inp->inp_ip_tos; 1279 break; 1280 1281 case IP_TTL: 1282 optval = inp->inp_ip_ttl; 1283 break; 1284 1285 case IP_MINTTL: 1286 optval = inp->inp_ip_minttl; 1287 break; 1288 1289 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1290 #define OPTBIT2(bit) (inp->inp_flags2 & bit ? 1 : 0) 1291 1292 case IP_RECVOPTS: 1293 optval = OPTBIT(INP_RECVOPTS); 1294 break; 1295 1296 case IP_RECVRETOPTS: 1297 optval = OPTBIT(INP_RECVRETOPTS); 1298 break; 1299 1300 case IP_RECVDSTADDR: 1301 optval = OPTBIT(INP_RECVDSTADDR); 1302 break; 1303 1304 case IP_ORIGDSTADDR: 1305 optval = OPTBIT2(INP_ORIGDSTADDR); 1306 break; 1307 1308 case IP_RECVTTL: 1309 optval = OPTBIT(INP_RECVTTL); 1310 break; 1311 1312 case IP_RECVIF: 1313 optval = OPTBIT(INP_RECVIF); 1314 break; 1315 1316 case IP_PORTRANGE: 1317 if (inp->inp_flags & INP_HIGHPORT) 1318 optval = IP_PORTRANGE_HIGH; 1319 else if (inp->inp_flags & INP_LOWPORT) 1320 optval = IP_PORTRANGE_LOW; 1321 else 1322 optval = 0; 1323 break; 1324 1325 case IP_ONESBCAST: 1326 optval = OPTBIT(INP_ONESBCAST); 1327 break; 1328 case IP_DONTFRAG: 1329 optval = OPTBIT(INP_DONTFRAG); 1330 break; 1331 case IP_BINDANY: 1332 optval = OPTBIT(INP_BINDANY); 1333 break; 1334 case IP_RECVTOS: 1335 optval = OPTBIT(INP_RECVTOS); 1336 break; 1337 case IP_FLOWID: 1338 optval = inp->inp_flowid; 1339 break; 1340 case IP_FLOWTYPE: 1341 optval = inp->inp_flowtype; 1342 break; 1343 case IP_RECVFLOWID: 1344 optval = OPTBIT2(INP_RECVFLOWID); 1345 break; 1346 #ifdef RSS 1347 case IP_RSSBUCKETID: 1348 retval = rss_hash2bucket(inp->inp_flowid, 1349 inp->inp_flowtype, 1350 &rss_bucket); 1351 if (retval == 0) 1352 optval = rss_bucket; 1353 else 1354 error = EINVAL; 1355 break; 1356 case IP_RECVRSSBUCKETID: 1357 optval = OPTBIT2(INP_RECVRSSBUCKETID); 1358 break; 1359 #endif 1360 case IP_BINDMULTI: 1361 optval = OPTBIT2(INP_BINDMULTI); 1362 break; 1363 } 1364 error = sooptcopyout(sopt, &optval, sizeof optval); 1365 break; 1366 1367 /* 1368 * Multicast socket options are processed by the in_mcast 1369 * module. 1370 */ 1371 case IP_MULTICAST_IF: 1372 case IP_MULTICAST_VIF: 1373 case IP_MULTICAST_TTL: 1374 case IP_MULTICAST_LOOP: 1375 case IP_MSFILTER: 1376 error = inp_getmoptions(inp, sopt); 1377 break; 1378 1379 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 1380 case IP_IPSEC_POLICY: 1381 if (IPSEC_ENABLED(ipv4)) { 1382 error = IPSEC_PCBCTL(ipv4, inp, sopt); 1383 break; 1384 } 1385 /* FALLTHROUGH */ 1386 #endif /* IPSEC */ 1387 1388 default: 1389 error = ENOPROTOOPT; 1390 break; 1391 } 1392 break; 1393 } 1394 return (error); 1395 } 1396 1397 /* 1398 * Routine called from ip_output() to loop back a copy of an IP multicast 1399 * packet to the input queue of a specified interface. Note that this 1400 * calls the output routine of the loopback "driver", but with an interface 1401 * pointer that might NOT be a loopback interface -- evil, but easier than 1402 * replicating that code here. 1403 */ 1404 static void 1405 ip_mloopback(struct ifnet *ifp, const struct mbuf *m, int hlen) 1406 { 1407 struct ip *ip; 1408 struct mbuf *copym; 1409 1410 /* 1411 * Make a deep copy of the packet because we're going to 1412 * modify the pack in order to generate checksums. 1413 */ 1414 copym = m_dup(m, M_NOWAIT); 1415 if (copym != NULL && (!M_WRITABLE(copym) || copym->m_len < hlen)) 1416 copym = m_pullup(copym, hlen); 1417 if (copym != NULL) { 1418 /* If needed, compute the checksum and mark it as valid. */ 1419 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1420 in_delayed_cksum(copym); 1421 copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1422 copym->m_pkthdr.csum_flags |= 1423 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1424 copym->m_pkthdr.csum_data = 0xffff; 1425 } 1426 /* 1427 * We don't bother to fragment if the IP length is greater 1428 * than the interface's MTU. Can this possibly matter? 1429 */ 1430 ip = mtod(copym, struct ip *); 1431 ip->ip_sum = 0; 1432 ip->ip_sum = in_cksum(copym, hlen); 1433 if_simloop(ifp, copym, AF_INET, 0); 1434 } 1435 } 1436