1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1988, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_inet.h" 38 #include "opt_ratelimit.h" 39 #include "opt_ipsec.h" 40 #include "opt_mbuf_stress_test.h" 41 #include "opt_mpath.h" 42 #include "opt_route.h" 43 #include "opt_sctp.h" 44 #include "opt_rss.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/lock.h> 50 #include <sys/malloc.h> 51 #include <sys/mbuf.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/protosw.h> 55 #include <sys/rmlock.h> 56 #include <sys/sdt.h> 57 #include <sys/socket.h> 58 #include <sys/socketvar.h> 59 #include <sys/sysctl.h> 60 #include <sys/ucred.h> 61 62 #include <net/if.h> 63 #include <net/if_var.h> 64 #include <net/if_llatbl.h> 65 #include <net/netisr.h> 66 #include <net/pfil.h> 67 #include <net/route.h> 68 #ifdef RADIX_MPATH 69 #include <net/radix_mpath.h> 70 #endif 71 #include <net/rss_config.h> 72 #include <net/vnet.h> 73 74 #include <netinet/in.h> 75 #include <netinet/in_kdtrace.h> 76 #include <netinet/in_systm.h> 77 #include <netinet/ip.h> 78 #include <netinet/in_pcb.h> 79 #include <netinet/in_rss.h> 80 #include <netinet/in_var.h> 81 #include <netinet/ip_var.h> 82 #include <netinet/ip_options.h> 83 84 #include <netinet/udp.h> 85 #include <netinet/udp_var.h> 86 87 #ifdef SCTP 88 #include <netinet/sctp.h> 89 #include <netinet/sctp_crc32.h> 90 #endif 91 92 #include <netipsec/ipsec_support.h> 93 94 #include <machine/in_cksum.h> 95 96 #include <security/mac/mac_framework.h> 97 98 #ifdef MBUF_STRESS_TEST 99 static int mbuf_frag_size = 0; 100 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, 101 &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); 102 #endif 103 104 static void ip_mloopback(struct ifnet *, const struct mbuf *, int); 105 106 107 extern int in_mcast_loop; 108 extern struct protosw inetsw[]; 109 110 static inline int 111 ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, struct inpcb *inp, 112 struct sockaddr_in *dst, int *fibnum, int *error) 113 { 114 struct m_tag *fwd_tag = NULL; 115 struct mbuf *m; 116 struct in_addr odst; 117 struct ip *ip; 118 119 m = *mp; 120 ip = mtod(m, struct ip *); 121 122 /* Run through list of hooks for output packets. */ 123 odst.s_addr = ip->ip_dst.s_addr; 124 *error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp, PFIL_OUT, 0, inp); 125 m = *mp; 126 if ((*error) != 0 || m == NULL) 127 return 1; /* Finished */ 128 129 ip = mtod(m, struct ip *); 130 131 /* See if destination IP address was changed by packet filter. */ 132 if (odst.s_addr != ip->ip_dst.s_addr) { 133 m->m_flags |= M_SKIP_FIREWALL; 134 /* If destination is now ourself drop to ip_input(). */ 135 if (in_localip(ip->ip_dst)) { 136 m->m_flags |= M_FASTFWD_OURS; 137 if (m->m_pkthdr.rcvif == NULL) 138 m->m_pkthdr.rcvif = V_loif; 139 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 140 m->m_pkthdr.csum_flags |= 141 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 142 m->m_pkthdr.csum_data = 0xffff; 143 } 144 m->m_pkthdr.csum_flags |= 145 CSUM_IP_CHECKED | CSUM_IP_VALID; 146 #ifdef SCTP 147 if (m->m_pkthdr.csum_flags & CSUM_SCTP) 148 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 149 #endif 150 *error = netisr_queue(NETISR_IP, m); 151 return 1; /* Finished */ 152 } 153 154 bzero(dst, sizeof(*dst)); 155 dst->sin_family = AF_INET; 156 dst->sin_len = sizeof(*dst); 157 dst->sin_addr = ip->ip_dst; 158 159 return -1; /* Reloop */ 160 } 161 /* See if fib was changed by packet filter. */ 162 if ((*fibnum) != M_GETFIB(m)) { 163 m->m_flags |= M_SKIP_FIREWALL; 164 *fibnum = M_GETFIB(m); 165 return -1; /* Reloop for FIB change */ 166 } 167 168 /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */ 169 if (m->m_flags & M_FASTFWD_OURS) { 170 if (m->m_pkthdr.rcvif == NULL) 171 m->m_pkthdr.rcvif = V_loif; 172 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 173 m->m_pkthdr.csum_flags |= 174 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 175 m->m_pkthdr.csum_data = 0xffff; 176 } 177 #ifdef SCTP 178 if (m->m_pkthdr.csum_flags & CSUM_SCTP) 179 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 180 #endif 181 m->m_pkthdr.csum_flags |= 182 CSUM_IP_CHECKED | CSUM_IP_VALID; 183 184 *error = netisr_queue(NETISR_IP, m); 185 return 1; /* Finished */ 186 } 187 /* Or forward to some other address? */ 188 if ((m->m_flags & M_IP_NEXTHOP) && 189 ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) { 190 bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in)); 191 m->m_flags |= M_SKIP_FIREWALL; 192 m->m_flags &= ~M_IP_NEXTHOP; 193 m_tag_delete(m, fwd_tag); 194 195 return -1; /* Reloop for CHANGE of dst */ 196 } 197 198 return 0; 199 } 200 201 /* 202 * IP output. The packet in mbuf chain m contains a skeletal IP 203 * header (with len, off, ttl, proto, tos, src, dst). 204 * The mbuf chain containing the packet will be freed. 205 * The mbuf opt, if present, will not be freed. 206 * If route ro is present and has ro_rt initialized, route lookup would be 207 * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL, 208 * then result of route lookup is stored in ro->ro_rt. 209 * 210 * In the IP forwarding case, the packet will arrive with options already 211 * inserted, so must have a NULL opt pointer. 212 */ 213 int 214 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, 215 struct ip_moptions *imo, struct inpcb *inp) 216 { 217 struct rm_priotracker in_ifa_tracker; 218 struct ip *ip; 219 struct ifnet *ifp = NULL; /* keep compiler happy */ 220 struct mbuf *m0; 221 int hlen = sizeof (struct ip); 222 int mtu; 223 int error = 0; 224 struct sockaddr_in *dst; 225 const struct sockaddr_in *gw; 226 struct in_ifaddr *ia; 227 int isbroadcast; 228 uint16_t ip_len, ip_off; 229 struct route iproute; 230 struct rtentry *rte; /* cache for ro->ro_rt */ 231 uint32_t fibnum; 232 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 233 int no_route_but_check_spd = 0; 234 #endif 235 M_ASSERTPKTHDR(m); 236 237 if (inp != NULL) { 238 INP_LOCK_ASSERT(inp); 239 M_SETFIB(m, inp->inp_inc.inc_fibnum); 240 if ((flags & IP_NODEFAULTFLOWID) == 0) { 241 m->m_pkthdr.flowid = inp->inp_flowid; 242 M_HASHTYPE_SET(m, inp->inp_flowtype); 243 } 244 } 245 246 if (ro == NULL) { 247 ro = &iproute; 248 bzero(ro, sizeof (*ro)); 249 } 250 251 if (opt) { 252 int len = 0; 253 m = ip_insertoptions(m, opt, &len); 254 if (len != 0) 255 hlen = len; /* ip->ip_hl is updated above */ 256 } 257 ip = mtod(m, struct ip *); 258 ip_len = ntohs(ip->ip_len); 259 ip_off = ntohs(ip->ip_off); 260 261 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 262 ip->ip_v = IPVERSION; 263 ip->ip_hl = hlen >> 2; 264 ip_fillid(ip); 265 } else { 266 /* Header already set, fetch hlen from there */ 267 hlen = ip->ip_hl << 2; 268 } 269 if ((flags & IP_FORWARDING) == 0) 270 IPSTAT_INC(ips_localout); 271 272 /* 273 * dst/gw handling: 274 * 275 * dst can be rewritten but always points to &ro->ro_dst. 276 * gw is readonly but can point either to dst OR rt_gateway, 277 * therefore we need restore gw if we're redoing lookup. 278 */ 279 gw = dst = (struct sockaddr_in *)&ro->ro_dst; 280 fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); 281 rte = ro->ro_rt; 282 if (rte == NULL) { 283 bzero(dst, sizeof(*dst)); 284 dst->sin_family = AF_INET; 285 dst->sin_len = sizeof(*dst); 286 dst->sin_addr = ip->ip_dst; 287 } 288 NET_EPOCH_ENTER(); 289 again: 290 /* 291 * Validate route against routing table additions; 292 * a better/more specific route might have been added. 293 */ 294 if (inp) 295 RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum); 296 /* 297 * If there is a cached route, 298 * check that it is to the same destination 299 * and is still up. If not, free it and try again. 300 * The address family should also be checked in case of sharing the 301 * cache with IPv6. 302 * Also check whether routing cache needs invalidation. 303 */ 304 rte = ro->ro_rt; 305 if (rte && ((rte->rt_flags & RTF_UP) == 0 || 306 rte->rt_ifp == NULL || 307 !RT_LINK_IS_UP(rte->rt_ifp) || 308 dst->sin_family != AF_INET || 309 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 310 RO_INVALIDATE_CACHE(ro); 311 rte = NULL; 312 } 313 ia = NULL; 314 /* 315 * If routing to interface only, short circuit routing lookup. 316 * The use of an all-ones broadcast address implies this; an 317 * interface is specified by the broadcast address of an interface, 318 * or the destination address of a ptp interface. 319 */ 320 if (flags & IP_SENDONES) { 321 if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst), 322 M_GETFIB(m)))) == NULL && 323 (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst), 324 M_GETFIB(m)))) == NULL) { 325 IPSTAT_INC(ips_noroute); 326 error = ENETUNREACH; 327 goto bad; 328 } 329 ip->ip_dst.s_addr = INADDR_BROADCAST; 330 dst->sin_addr = ip->ip_dst; 331 ifp = ia->ia_ifp; 332 ip->ip_ttl = 1; 333 isbroadcast = 1; 334 } else if (flags & IP_ROUTETOIF) { 335 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst), 336 M_GETFIB(m)))) == NULL && 337 (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0, 338 M_GETFIB(m)))) == NULL) { 339 IPSTAT_INC(ips_noroute); 340 error = ENETUNREACH; 341 goto bad; 342 } 343 ifp = ia->ia_ifp; 344 ip->ip_ttl = 1; 345 isbroadcast = ifp->if_flags & IFF_BROADCAST ? 346 in_ifaddr_broadcast(dst->sin_addr, ia) : 0; 347 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 348 imo != NULL && imo->imo_multicast_ifp != NULL) { 349 /* 350 * Bypass the normal routing lookup for multicast 351 * packets if the interface is specified. 352 */ 353 ifp = imo->imo_multicast_ifp; 354 IFP_TO_IA(ifp, ia, &in_ifa_tracker); 355 isbroadcast = 0; /* fool gcc */ 356 } else { 357 /* 358 * We want to do any cloning requested by the link layer, 359 * as this is probably required in all cases for correct 360 * operation (as it is for ARP). 361 */ 362 if (rte == NULL) { 363 #ifdef RADIX_MPATH 364 rtalloc_mpath_fib(ro, 365 ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), 366 fibnum); 367 #else 368 in_rtalloc_ign(ro, 0, fibnum); 369 #endif 370 rte = ro->ro_rt; 371 } 372 if (rte == NULL || 373 (rte->rt_flags & RTF_UP) == 0 || 374 rte->rt_ifp == NULL || 375 !RT_LINK_IS_UP(rte->rt_ifp)) { 376 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 377 /* 378 * There is no route for this packet, but it is 379 * possible that a matching SPD entry exists. 380 */ 381 no_route_but_check_spd = 1; 382 mtu = 0; /* Silence GCC warning. */ 383 goto sendit; 384 #endif 385 IPSTAT_INC(ips_noroute); 386 error = EHOSTUNREACH; 387 goto bad; 388 } 389 ia = ifatoia(rte->rt_ifa); 390 ifp = rte->rt_ifp; 391 counter_u64_add(rte->rt_pksent, 1); 392 rt_update_ro_flags(ro); 393 if (rte->rt_flags & RTF_GATEWAY) 394 gw = (struct sockaddr_in *)rte->rt_gateway; 395 if (rte->rt_flags & RTF_HOST) 396 isbroadcast = (rte->rt_flags & RTF_BROADCAST); 397 else if (ifp->if_flags & IFF_BROADCAST) 398 isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia); 399 else 400 isbroadcast = 0; 401 } 402 403 /* 404 * Calculate MTU. If we have a route that is up, use that, 405 * otherwise use the interface's MTU. 406 */ 407 if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST))) 408 mtu = rte->rt_mtu; 409 else 410 mtu = ifp->if_mtu; 411 /* Catch a possible divide by zero later. */ 412 KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p", 413 __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp)); 414 415 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 416 m->m_flags |= M_MCAST; 417 /* 418 * IP destination address is multicast. Make sure "gw" 419 * still points to the address in "ro". (It may have been 420 * changed to point to a gateway address, above.) 421 */ 422 gw = dst; 423 /* 424 * See if the caller provided any multicast options 425 */ 426 if (imo != NULL) { 427 ip->ip_ttl = imo->imo_multicast_ttl; 428 if (imo->imo_multicast_vif != -1) 429 ip->ip_src.s_addr = 430 ip_mcast_src ? 431 ip_mcast_src(imo->imo_multicast_vif) : 432 INADDR_ANY; 433 } else 434 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 435 /* 436 * Confirm that the outgoing interface supports multicast. 437 */ 438 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 439 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 440 IPSTAT_INC(ips_noroute); 441 error = ENETUNREACH; 442 goto bad; 443 } 444 } 445 /* 446 * If source address not specified yet, use address 447 * of outgoing interface. 448 */ 449 if (ip->ip_src.s_addr == INADDR_ANY) { 450 /* Interface may have no addresses. */ 451 if (ia != NULL) 452 ip->ip_src = IA_SIN(ia)->sin_addr; 453 } 454 455 if ((imo == NULL && in_mcast_loop) || 456 (imo && imo->imo_multicast_loop)) { 457 /* 458 * Loop back multicast datagram if not expressly 459 * forbidden to do so, even if we are not a member 460 * of the group; ip_input() will filter it later, 461 * thus deferring a hash lookup and mutex acquisition 462 * at the expense of a cheap copy using m_copym(). 463 */ 464 ip_mloopback(ifp, m, hlen); 465 } else { 466 /* 467 * If we are acting as a multicast router, perform 468 * multicast forwarding as if the packet had just 469 * arrived on the interface to which we are about 470 * to send. The multicast forwarding function 471 * recursively calls this function, using the 472 * IP_FORWARDING flag to prevent infinite recursion. 473 * 474 * Multicasts that are looped back by ip_mloopback(), 475 * above, will be forwarded by the ip_input() routine, 476 * if necessary. 477 */ 478 if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) { 479 /* 480 * If rsvp daemon is not running, do not 481 * set ip_moptions. This ensures that the packet 482 * is multicast and not just sent down one link 483 * as prescribed by rsvpd. 484 */ 485 if (!V_rsvp_on) 486 imo = NULL; 487 if (ip_mforward && 488 ip_mforward(ip, ifp, m, imo) != 0) { 489 m_freem(m); 490 goto done; 491 } 492 } 493 } 494 495 /* 496 * Multicasts with a time-to-live of zero may be looped- 497 * back, above, but must not be transmitted on a network. 498 * Also, multicasts addressed to the loopback interface 499 * are not sent -- the above call to ip_mloopback() will 500 * loop back a copy. ip_input() will drop the copy if 501 * this host does not belong to the destination group on 502 * the loopback interface. 503 */ 504 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 505 m_freem(m); 506 goto done; 507 } 508 509 goto sendit; 510 } 511 512 /* 513 * If the source address is not specified yet, use the address 514 * of the outoing interface. 515 */ 516 if (ip->ip_src.s_addr == INADDR_ANY) { 517 /* Interface may have no addresses. */ 518 if (ia != NULL) { 519 ip->ip_src = IA_SIN(ia)->sin_addr; 520 } 521 } 522 523 /* 524 * Look for broadcast address and 525 * verify user is allowed to send 526 * such a packet. 527 */ 528 if (isbroadcast) { 529 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 530 error = EADDRNOTAVAIL; 531 goto bad; 532 } 533 if ((flags & IP_ALLOWBROADCAST) == 0) { 534 error = EACCES; 535 goto bad; 536 } 537 /* don't allow broadcast messages to be fragmented */ 538 if (ip_len > mtu) { 539 error = EMSGSIZE; 540 goto bad; 541 } 542 m->m_flags |= M_BCAST; 543 } else { 544 m->m_flags &= ~M_BCAST; 545 } 546 547 sendit: 548 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 549 if (IPSEC_ENABLED(ipv4)) { 550 if ((error = IPSEC_OUTPUT(ipv4, m, inp)) != 0) { 551 if (error == EINPROGRESS) 552 error = 0; 553 goto done; 554 } 555 } 556 /* 557 * Check if there was a route for this packet; return error if not. 558 */ 559 if (no_route_but_check_spd) { 560 IPSTAT_INC(ips_noroute); 561 error = EHOSTUNREACH; 562 goto bad; 563 } 564 /* Update variables that are affected by ipsec4_output(). */ 565 ip = mtod(m, struct ip *); 566 hlen = ip->ip_hl << 2; 567 #endif /* IPSEC */ 568 569 /* Jump over all PFIL processing if hooks are not active. */ 570 if (PFIL_HOOKED(&V_inet_pfil_hook)) { 571 switch (ip_output_pfil(&m, ifp, inp, dst, &fibnum, &error)) { 572 case 1: /* Finished */ 573 goto done; 574 575 case 0: /* Continue normally */ 576 ip = mtod(m, struct ip *); 577 break; 578 579 case -1: /* Need to try again */ 580 /* Reset everything for a new round */ 581 RO_RTFREE(ro); 582 ro->ro_prepend = NULL; 583 rte = NULL; 584 gw = dst; 585 ip = mtod(m, struct ip *); 586 goto again; 587 588 } 589 } 590 591 /* 127/8 must not appear on wire - RFC1122. */ 592 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 593 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 594 if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 595 IPSTAT_INC(ips_badaddr); 596 error = EADDRNOTAVAIL; 597 goto bad; 598 } 599 } 600 601 m->m_pkthdr.csum_flags |= CSUM_IP; 602 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) { 603 in_delayed_cksum(m); 604 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 605 } 606 #ifdef SCTP 607 if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) { 608 sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); 609 m->m_pkthdr.csum_flags &= ~CSUM_SCTP; 610 } 611 #endif 612 613 /* 614 * If small enough for interface, or the interface will take 615 * care of the fragmentation for us, we can just send directly. 616 */ 617 if (ip_len <= mtu || 618 (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) { 619 ip->ip_sum = 0; 620 if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) { 621 ip->ip_sum = in_cksum(m, hlen); 622 m->m_pkthdr.csum_flags &= ~CSUM_IP; 623 } 624 625 /* 626 * Record statistics for this interface address. 627 * With CSUM_TSO the byte/packet count will be slightly 628 * incorrect because we count the IP+TCP headers only 629 * once instead of for every generated packet. 630 */ 631 if (!(flags & IP_FORWARDING) && ia) { 632 if (m->m_pkthdr.csum_flags & CSUM_TSO) 633 counter_u64_add(ia->ia_ifa.ifa_opackets, 634 m->m_pkthdr.len / m->m_pkthdr.tso_segsz); 635 else 636 counter_u64_add(ia->ia_ifa.ifa_opackets, 1); 637 638 counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); 639 } 640 #ifdef MBUF_STRESS_TEST 641 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) 642 m = m_fragment(m, M_NOWAIT, mbuf_frag_size); 643 #endif 644 /* 645 * Reset layer specific mbuf flags 646 * to avoid confusing lower layers. 647 */ 648 m_clrprotoflags(m); 649 IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL); 650 #ifdef RATELIMIT 651 if (inp != NULL) { 652 if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) 653 in_pcboutput_txrtlmt(inp, ifp, m); 654 /* stamp send tag on mbuf */ 655 m->m_pkthdr.snd_tag = inp->inp_snd_tag; 656 } else { 657 m->m_pkthdr.snd_tag = NULL; 658 } 659 #endif 660 error = (*ifp->if_output)(ifp, m, 661 (const struct sockaddr *)gw, ro); 662 #ifdef RATELIMIT 663 /* check for route change */ 664 if (error == EAGAIN) 665 in_pcboutput_eagain(inp); 666 #endif 667 goto done; 668 } 669 670 /* Balk when DF bit is set or the interface didn't support TSO. */ 671 if ((ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) { 672 error = EMSGSIZE; 673 IPSTAT_INC(ips_cantfrag); 674 goto bad; 675 } 676 677 /* 678 * Too large for interface; fragment if possible. If successful, 679 * on return, m will point to a list of packets to be sent. 680 */ 681 error = ip_fragment(ip, &m, mtu, ifp->if_hwassist); 682 if (error) 683 goto bad; 684 for (; m; m = m0) { 685 m0 = m->m_nextpkt; 686 m->m_nextpkt = 0; 687 if (error == 0) { 688 /* Record statistics for this interface address. */ 689 if (ia != NULL) { 690 counter_u64_add(ia->ia_ifa.ifa_opackets, 1); 691 counter_u64_add(ia->ia_ifa.ifa_obytes, 692 m->m_pkthdr.len); 693 } 694 /* 695 * Reset layer specific mbuf flags 696 * to avoid confusing upper layers. 697 */ 698 m_clrprotoflags(m); 699 700 IP_PROBE(send, NULL, NULL, mtod(m, struct ip *), ifp, 701 mtod(m, struct ip *), NULL); 702 #ifdef RATELIMIT 703 if (inp != NULL) { 704 if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) 705 in_pcboutput_txrtlmt(inp, ifp, m); 706 /* stamp send tag on mbuf */ 707 m->m_pkthdr.snd_tag = inp->inp_snd_tag; 708 } else { 709 m->m_pkthdr.snd_tag = NULL; 710 } 711 #endif 712 error = (*ifp->if_output)(ifp, m, 713 (const struct sockaddr *)gw, ro); 714 #ifdef RATELIMIT 715 /* check for route change */ 716 if (error == EAGAIN) 717 in_pcboutput_eagain(inp); 718 #endif 719 } else 720 m_freem(m); 721 } 722 723 if (error == 0) 724 IPSTAT_INC(ips_fragmented); 725 726 done: 727 if (ro == &iproute) 728 RO_RTFREE(ro); 729 else if (rte == NULL) 730 /* 731 * If the caller supplied a route but somehow the reference 732 * to it has been released need to prevent the caller 733 * calling RTFREE on it again. 734 */ 735 ro->ro_rt = NULL; 736 NET_EPOCH_EXIT(); 737 return (error); 738 bad: 739 m_freem(m); 740 goto done; 741 } 742 743 /* 744 * Create a chain of fragments which fit the given mtu. m_frag points to the 745 * mbuf to be fragmented; on return it points to the chain with the fragments. 746 * Return 0 if no error. If error, m_frag may contain a partially built 747 * chain of fragments that should be freed by the caller. 748 * 749 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) 750 */ 751 int 752 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, 753 u_long if_hwassist_flags) 754 { 755 int error = 0; 756 int hlen = ip->ip_hl << 2; 757 int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ 758 int off; 759 struct mbuf *m0 = *m_frag; /* the original packet */ 760 int firstlen; 761 struct mbuf **mnext; 762 int nfrags; 763 uint16_t ip_len, ip_off; 764 765 ip_len = ntohs(ip->ip_len); 766 ip_off = ntohs(ip->ip_off); 767 768 if (ip_off & IP_DF) { /* Fragmentation not allowed */ 769 IPSTAT_INC(ips_cantfrag); 770 return EMSGSIZE; 771 } 772 773 /* 774 * Must be able to put at least 8 bytes per fragment. 775 */ 776 if (len < 8) 777 return EMSGSIZE; 778 779 /* 780 * If the interface will not calculate checksums on 781 * fragmented packets, then do it here. 782 */ 783 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 784 in_delayed_cksum(m0); 785 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 786 } 787 #ifdef SCTP 788 if (m0->m_pkthdr.csum_flags & CSUM_SCTP) { 789 sctp_delayed_cksum(m0, hlen); 790 m0->m_pkthdr.csum_flags &= ~CSUM_SCTP; 791 } 792 #endif 793 if (len > PAGE_SIZE) { 794 /* 795 * Fragment large datagrams such that each segment 796 * contains a multiple of PAGE_SIZE amount of data, 797 * plus headers. This enables a receiver to perform 798 * page-flipping zero-copy optimizations. 799 * 800 * XXX When does this help given that sender and receiver 801 * could have different page sizes, and also mtu could 802 * be less than the receiver's page size ? 803 */ 804 int newlen; 805 806 off = MIN(mtu, m0->m_pkthdr.len); 807 808 /* 809 * firstlen (off - hlen) must be aligned on an 810 * 8-byte boundary 811 */ 812 if (off < hlen) 813 goto smart_frag_failure; 814 off = ((off - hlen) & ~7) + hlen; 815 newlen = (~PAGE_MASK) & mtu; 816 if ((newlen + sizeof (struct ip)) > mtu) { 817 /* we failed, go back the default */ 818 smart_frag_failure: 819 newlen = len; 820 off = hlen + len; 821 } 822 len = newlen; 823 824 } else { 825 off = hlen + len; 826 } 827 828 firstlen = off - hlen; 829 mnext = &m0->m_nextpkt; /* pointer to next packet */ 830 831 /* 832 * Loop through length of segment after first fragment, 833 * make new header and copy data of each part and link onto chain. 834 * Here, m0 is the original packet, m is the fragment being created. 835 * The fragments are linked off the m_nextpkt of the original 836 * packet, which after processing serves as the first fragment. 837 */ 838 for (nfrags = 1; off < ip_len; off += len, nfrags++) { 839 struct ip *mhip; /* ip header on the fragment */ 840 struct mbuf *m; 841 int mhlen = sizeof (struct ip); 842 843 m = m_gethdr(M_NOWAIT, MT_DATA); 844 if (m == NULL) { 845 error = ENOBUFS; 846 IPSTAT_INC(ips_odropped); 847 goto done; 848 } 849 /* 850 * Make sure the complete packet header gets copied 851 * from the originating mbuf to the newly created 852 * mbuf. This also ensures that existing firewall 853 * classification(s), VLAN tags and so on get copied 854 * to the resulting fragmented packet(s): 855 */ 856 if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) { 857 m_free(m); 858 error = ENOBUFS; 859 IPSTAT_INC(ips_odropped); 860 goto done; 861 } 862 /* 863 * In the first mbuf, leave room for the link header, then 864 * copy the original IP header including options. The payload 865 * goes into an additional mbuf chain returned by m_copym(). 866 */ 867 m->m_data += max_linkhdr; 868 mhip = mtod(m, struct ip *); 869 *mhip = *ip; 870 if (hlen > sizeof (struct ip)) { 871 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 872 mhip->ip_v = IPVERSION; 873 mhip->ip_hl = mhlen >> 2; 874 } 875 m->m_len = mhlen; 876 /* XXX do we need to add ip_off below ? */ 877 mhip->ip_off = ((off - hlen) >> 3) + ip_off; 878 if (off + len >= ip_len) 879 len = ip_len - off; 880 else 881 mhip->ip_off |= IP_MF; 882 mhip->ip_len = htons((u_short)(len + mhlen)); 883 m->m_next = m_copym(m0, off, len, M_NOWAIT); 884 if (m->m_next == NULL) { /* copy failed */ 885 m_free(m); 886 error = ENOBUFS; /* ??? */ 887 IPSTAT_INC(ips_odropped); 888 goto done; 889 } 890 m->m_pkthdr.len = mhlen + len; 891 #ifdef MAC 892 mac_netinet_fragment(m0, m); 893 #endif 894 mhip->ip_off = htons(mhip->ip_off); 895 mhip->ip_sum = 0; 896 if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) { 897 mhip->ip_sum = in_cksum(m, mhlen); 898 m->m_pkthdr.csum_flags &= ~CSUM_IP; 899 } 900 *mnext = m; 901 mnext = &m->m_nextpkt; 902 } 903 IPSTAT_ADD(ips_ofragments, nfrags); 904 905 /* 906 * Update first fragment by trimming what's been copied out 907 * and updating header. 908 */ 909 m_adj(m0, hlen + firstlen - ip_len); 910 m0->m_pkthdr.len = hlen + firstlen; 911 ip->ip_len = htons((u_short)m0->m_pkthdr.len); 912 ip->ip_off = htons(ip_off | IP_MF); 913 ip->ip_sum = 0; 914 if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) { 915 ip->ip_sum = in_cksum(m0, hlen); 916 m0->m_pkthdr.csum_flags &= ~CSUM_IP; 917 } 918 919 done: 920 *m_frag = m0; 921 return error; 922 } 923 924 void 925 in_delayed_cksum(struct mbuf *m) 926 { 927 struct ip *ip; 928 struct udphdr *uh; 929 uint16_t cklen, csum, offset; 930 931 ip = mtod(m, struct ip *); 932 offset = ip->ip_hl << 2 ; 933 934 if (m->m_pkthdr.csum_flags & CSUM_UDP) { 935 /* if udp header is not in the first mbuf copy udplen */ 936 if (offset + sizeof(struct udphdr) > m->m_len) { 937 m_copydata(m, offset + offsetof(struct udphdr, 938 uh_ulen), sizeof(cklen), (caddr_t)&cklen); 939 cklen = ntohs(cklen); 940 } else { 941 uh = (struct udphdr *)mtodo(m, offset); 942 cklen = ntohs(uh->uh_ulen); 943 } 944 csum = in_cksum_skip(m, cklen + offset, offset); 945 if (csum == 0) 946 csum = 0xffff; 947 } else { 948 cklen = ntohs(ip->ip_len); 949 csum = in_cksum_skip(m, cklen, offset); 950 } 951 offset += m->m_pkthdr.csum_data; /* checksum offset */ 952 953 if (offset + sizeof(csum) > m->m_len) 954 m_copyback(m, offset, sizeof(csum), (caddr_t)&csum); 955 else 956 *(u_short *)mtodo(m, offset) = csum; 957 } 958 959 /* 960 * IP socket option processing. 961 */ 962 int 963 ip_ctloutput(struct socket *so, struct sockopt *sopt) 964 { 965 struct inpcb *inp = sotoinpcb(so); 966 int error, optval; 967 #ifdef RSS 968 uint32_t rss_bucket; 969 int retval; 970 #endif 971 972 error = optval = 0; 973 if (sopt->sopt_level != IPPROTO_IP) { 974 error = EINVAL; 975 976 if (sopt->sopt_level == SOL_SOCKET && 977 sopt->sopt_dir == SOPT_SET) { 978 switch (sopt->sopt_name) { 979 case SO_REUSEADDR: 980 INP_WLOCK(inp); 981 if ((so->so_options & SO_REUSEADDR) != 0) 982 inp->inp_flags2 |= INP_REUSEADDR; 983 else 984 inp->inp_flags2 &= ~INP_REUSEADDR; 985 INP_WUNLOCK(inp); 986 error = 0; 987 break; 988 case SO_REUSEPORT: 989 INP_WLOCK(inp); 990 if ((so->so_options & SO_REUSEPORT) != 0) 991 inp->inp_flags2 |= INP_REUSEPORT; 992 else 993 inp->inp_flags2 &= ~INP_REUSEPORT; 994 INP_WUNLOCK(inp); 995 error = 0; 996 break; 997 case SO_REUSEPORT_LB: 998 INP_WLOCK(inp); 999 if ((so->so_options & SO_REUSEPORT_LB) != 0) 1000 inp->inp_flags2 |= INP_REUSEPORT_LB; 1001 else 1002 inp->inp_flags2 &= ~INP_REUSEPORT_LB; 1003 INP_WUNLOCK(inp); 1004 error = 0; 1005 break; 1006 case SO_SETFIB: 1007 INP_WLOCK(inp); 1008 inp->inp_inc.inc_fibnum = so->so_fibnum; 1009 INP_WUNLOCK(inp); 1010 error = 0; 1011 break; 1012 case SO_MAX_PACING_RATE: 1013 #ifdef RATELIMIT 1014 INP_WLOCK(inp); 1015 inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED; 1016 INP_WUNLOCK(inp); 1017 error = 0; 1018 #else 1019 error = EOPNOTSUPP; 1020 #endif 1021 break; 1022 default: 1023 break; 1024 } 1025 } 1026 return (error); 1027 } 1028 1029 switch (sopt->sopt_dir) { 1030 case SOPT_SET: 1031 switch (sopt->sopt_name) { 1032 case IP_OPTIONS: 1033 #ifdef notyet 1034 case IP_RETOPTS: 1035 #endif 1036 { 1037 struct mbuf *m; 1038 if (sopt->sopt_valsize > MLEN) { 1039 error = EMSGSIZE; 1040 break; 1041 } 1042 m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA); 1043 if (m == NULL) { 1044 error = ENOBUFS; 1045 break; 1046 } 1047 m->m_len = sopt->sopt_valsize; 1048 error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 1049 m->m_len); 1050 if (error) { 1051 m_free(m); 1052 break; 1053 } 1054 INP_WLOCK(inp); 1055 error = ip_pcbopts(inp, sopt->sopt_name, m); 1056 INP_WUNLOCK(inp); 1057 return (error); 1058 } 1059 1060 case IP_BINDANY: 1061 if (sopt->sopt_td != NULL) { 1062 error = priv_check(sopt->sopt_td, 1063 PRIV_NETINET_BINDANY); 1064 if (error) 1065 break; 1066 } 1067 /* FALLTHROUGH */ 1068 case IP_BINDMULTI: 1069 #ifdef RSS 1070 case IP_RSS_LISTEN_BUCKET: 1071 #endif 1072 case IP_TOS: 1073 case IP_TTL: 1074 case IP_MINTTL: 1075 case IP_RECVOPTS: 1076 case IP_RECVRETOPTS: 1077 case IP_ORIGDSTADDR: 1078 case IP_RECVDSTADDR: 1079 case IP_RECVTTL: 1080 case IP_RECVIF: 1081 case IP_ONESBCAST: 1082 case IP_DONTFRAG: 1083 case IP_RECVTOS: 1084 case IP_RECVFLOWID: 1085 #ifdef RSS 1086 case IP_RECVRSSBUCKETID: 1087 #endif 1088 error = sooptcopyin(sopt, &optval, sizeof optval, 1089 sizeof optval); 1090 if (error) 1091 break; 1092 1093 switch (sopt->sopt_name) { 1094 case IP_TOS: 1095 inp->inp_ip_tos = optval; 1096 break; 1097 1098 case IP_TTL: 1099 inp->inp_ip_ttl = optval; 1100 break; 1101 1102 case IP_MINTTL: 1103 if (optval >= 0 && optval <= MAXTTL) 1104 inp->inp_ip_minttl = optval; 1105 else 1106 error = EINVAL; 1107 break; 1108 1109 #define OPTSET(bit) do { \ 1110 INP_WLOCK(inp); \ 1111 if (optval) \ 1112 inp->inp_flags |= bit; \ 1113 else \ 1114 inp->inp_flags &= ~bit; \ 1115 INP_WUNLOCK(inp); \ 1116 } while (0) 1117 1118 #define OPTSET2(bit, val) do { \ 1119 INP_WLOCK(inp); \ 1120 if (val) \ 1121 inp->inp_flags2 |= bit; \ 1122 else \ 1123 inp->inp_flags2 &= ~bit; \ 1124 INP_WUNLOCK(inp); \ 1125 } while (0) 1126 1127 case IP_RECVOPTS: 1128 OPTSET(INP_RECVOPTS); 1129 break; 1130 1131 case IP_RECVRETOPTS: 1132 OPTSET(INP_RECVRETOPTS); 1133 break; 1134 1135 case IP_RECVDSTADDR: 1136 OPTSET(INP_RECVDSTADDR); 1137 break; 1138 1139 case IP_ORIGDSTADDR: 1140 OPTSET2(INP_ORIGDSTADDR, optval); 1141 break; 1142 1143 case IP_RECVTTL: 1144 OPTSET(INP_RECVTTL); 1145 break; 1146 1147 case IP_RECVIF: 1148 OPTSET(INP_RECVIF); 1149 break; 1150 1151 case IP_ONESBCAST: 1152 OPTSET(INP_ONESBCAST); 1153 break; 1154 case IP_DONTFRAG: 1155 OPTSET(INP_DONTFRAG); 1156 break; 1157 case IP_BINDANY: 1158 OPTSET(INP_BINDANY); 1159 break; 1160 case IP_RECVTOS: 1161 OPTSET(INP_RECVTOS); 1162 break; 1163 case IP_BINDMULTI: 1164 OPTSET2(INP_BINDMULTI, optval); 1165 break; 1166 case IP_RECVFLOWID: 1167 OPTSET2(INP_RECVFLOWID, optval); 1168 break; 1169 #ifdef RSS 1170 case IP_RSS_LISTEN_BUCKET: 1171 if ((optval >= 0) && 1172 (optval < rss_getnumbuckets())) { 1173 inp->inp_rss_listen_bucket = optval; 1174 OPTSET2(INP_RSS_BUCKET_SET, 1); 1175 } else { 1176 error = EINVAL; 1177 } 1178 break; 1179 case IP_RECVRSSBUCKETID: 1180 OPTSET2(INP_RECVRSSBUCKETID, optval); 1181 break; 1182 #endif 1183 } 1184 break; 1185 #undef OPTSET 1186 #undef OPTSET2 1187 1188 /* 1189 * Multicast socket options are processed by the in_mcast 1190 * module. 1191 */ 1192 case IP_MULTICAST_IF: 1193 case IP_MULTICAST_VIF: 1194 case IP_MULTICAST_TTL: 1195 case IP_MULTICAST_LOOP: 1196 case IP_ADD_MEMBERSHIP: 1197 case IP_DROP_MEMBERSHIP: 1198 case IP_ADD_SOURCE_MEMBERSHIP: 1199 case IP_DROP_SOURCE_MEMBERSHIP: 1200 case IP_BLOCK_SOURCE: 1201 case IP_UNBLOCK_SOURCE: 1202 case IP_MSFILTER: 1203 case MCAST_JOIN_GROUP: 1204 case MCAST_LEAVE_GROUP: 1205 case MCAST_JOIN_SOURCE_GROUP: 1206 case MCAST_LEAVE_SOURCE_GROUP: 1207 case MCAST_BLOCK_SOURCE: 1208 case MCAST_UNBLOCK_SOURCE: 1209 error = inp_setmoptions(inp, sopt); 1210 break; 1211 1212 case IP_PORTRANGE: 1213 error = sooptcopyin(sopt, &optval, sizeof optval, 1214 sizeof optval); 1215 if (error) 1216 break; 1217 1218 INP_WLOCK(inp); 1219 switch (optval) { 1220 case IP_PORTRANGE_DEFAULT: 1221 inp->inp_flags &= ~(INP_LOWPORT); 1222 inp->inp_flags &= ~(INP_HIGHPORT); 1223 break; 1224 1225 case IP_PORTRANGE_HIGH: 1226 inp->inp_flags &= ~(INP_LOWPORT); 1227 inp->inp_flags |= INP_HIGHPORT; 1228 break; 1229 1230 case IP_PORTRANGE_LOW: 1231 inp->inp_flags &= ~(INP_HIGHPORT); 1232 inp->inp_flags |= INP_LOWPORT; 1233 break; 1234 1235 default: 1236 error = EINVAL; 1237 break; 1238 } 1239 INP_WUNLOCK(inp); 1240 break; 1241 1242 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 1243 case IP_IPSEC_POLICY: 1244 if (IPSEC_ENABLED(ipv4)) { 1245 error = IPSEC_PCBCTL(ipv4, inp, sopt); 1246 break; 1247 } 1248 /* FALLTHROUGH */ 1249 #endif /* IPSEC */ 1250 1251 default: 1252 error = ENOPROTOOPT; 1253 break; 1254 } 1255 break; 1256 1257 case SOPT_GET: 1258 switch (sopt->sopt_name) { 1259 case IP_OPTIONS: 1260 case IP_RETOPTS: 1261 INP_RLOCK(inp); 1262 if (inp->inp_options) { 1263 struct mbuf *options; 1264 1265 options = m_dup(inp->inp_options, M_NOWAIT); 1266 INP_RUNLOCK(inp); 1267 if (options != NULL) { 1268 error = sooptcopyout(sopt, 1269 mtod(options, char *), 1270 options->m_len); 1271 m_freem(options); 1272 } else 1273 error = ENOMEM; 1274 } else { 1275 INP_RUNLOCK(inp); 1276 sopt->sopt_valsize = 0; 1277 } 1278 break; 1279 1280 case IP_TOS: 1281 case IP_TTL: 1282 case IP_MINTTL: 1283 case IP_RECVOPTS: 1284 case IP_RECVRETOPTS: 1285 case IP_ORIGDSTADDR: 1286 case IP_RECVDSTADDR: 1287 case IP_RECVTTL: 1288 case IP_RECVIF: 1289 case IP_PORTRANGE: 1290 case IP_ONESBCAST: 1291 case IP_DONTFRAG: 1292 case IP_BINDANY: 1293 case IP_RECVTOS: 1294 case IP_BINDMULTI: 1295 case IP_FLOWID: 1296 case IP_FLOWTYPE: 1297 case IP_RECVFLOWID: 1298 #ifdef RSS 1299 case IP_RSSBUCKETID: 1300 case IP_RECVRSSBUCKETID: 1301 #endif 1302 switch (sopt->sopt_name) { 1303 1304 case IP_TOS: 1305 optval = inp->inp_ip_tos; 1306 break; 1307 1308 case IP_TTL: 1309 optval = inp->inp_ip_ttl; 1310 break; 1311 1312 case IP_MINTTL: 1313 optval = inp->inp_ip_minttl; 1314 break; 1315 1316 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1317 #define OPTBIT2(bit) (inp->inp_flags2 & bit ? 1 : 0) 1318 1319 case IP_RECVOPTS: 1320 optval = OPTBIT(INP_RECVOPTS); 1321 break; 1322 1323 case IP_RECVRETOPTS: 1324 optval = OPTBIT(INP_RECVRETOPTS); 1325 break; 1326 1327 case IP_RECVDSTADDR: 1328 optval = OPTBIT(INP_RECVDSTADDR); 1329 break; 1330 1331 case IP_ORIGDSTADDR: 1332 optval = OPTBIT2(INP_ORIGDSTADDR); 1333 break; 1334 1335 case IP_RECVTTL: 1336 optval = OPTBIT(INP_RECVTTL); 1337 break; 1338 1339 case IP_RECVIF: 1340 optval = OPTBIT(INP_RECVIF); 1341 break; 1342 1343 case IP_PORTRANGE: 1344 if (inp->inp_flags & INP_HIGHPORT) 1345 optval = IP_PORTRANGE_HIGH; 1346 else if (inp->inp_flags & INP_LOWPORT) 1347 optval = IP_PORTRANGE_LOW; 1348 else 1349 optval = 0; 1350 break; 1351 1352 case IP_ONESBCAST: 1353 optval = OPTBIT(INP_ONESBCAST); 1354 break; 1355 case IP_DONTFRAG: 1356 optval = OPTBIT(INP_DONTFRAG); 1357 break; 1358 case IP_BINDANY: 1359 optval = OPTBIT(INP_BINDANY); 1360 break; 1361 case IP_RECVTOS: 1362 optval = OPTBIT(INP_RECVTOS); 1363 break; 1364 case IP_FLOWID: 1365 optval = inp->inp_flowid; 1366 break; 1367 case IP_FLOWTYPE: 1368 optval = inp->inp_flowtype; 1369 break; 1370 case IP_RECVFLOWID: 1371 optval = OPTBIT2(INP_RECVFLOWID); 1372 break; 1373 #ifdef RSS 1374 case IP_RSSBUCKETID: 1375 retval = rss_hash2bucket(inp->inp_flowid, 1376 inp->inp_flowtype, 1377 &rss_bucket); 1378 if (retval == 0) 1379 optval = rss_bucket; 1380 else 1381 error = EINVAL; 1382 break; 1383 case IP_RECVRSSBUCKETID: 1384 optval = OPTBIT2(INP_RECVRSSBUCKETID); 1385 break; 1386 #endif 1387 case IP_BINDMULTI: 1388 optval = OPTBIT2(INP_BINDMULTI); 1389 break; 1390 } 1391 error = sooptcopyout(sopt, &optval, sizeof optval); 1392 break; 1393 1394 /* 1395 * Multicast socket options are processed by the in_mcast 1396 * module. 1397 */ 1398 case IP_MULTICAST_IF: 1399 case IP_MULTICAST_VIF: 1400 case IP_MULTICAST_TTL: 1401 case IP_MULTICAST_LOOP: 1402 case IP_MSFILTER: 1403 error = inp_getmoptions(inp, sopt); 1404 break; 1405 1406 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 1407 case IP_IPSEC_POLICY: 1408 if (IPSEC_ENABLED(ipv4)) { 1409 error = IPSEC_PCBCTL(ipv4, inp, sopt); 1410 break; 1411 } 1412 /* FALLTHROUGH */ 1413 #endif /* IPSEC */ 1414 1415 default: 1416 error = ENOPROTOOPT; 1417 break; 1418 } 1419 break; 1420 } 1421 return (error); 1422 } 1423 1424 /* 1425 * Routine called from ip_output() to loop back a copy of an IP multicast 1426 * packet to the input queue of a specified interface. Note that this 1427 * calls the output routine of the loopback "driver", but with an interface 1428 * pointer that might NOT be a loopback interface -- evil, but easier than 1429 * replicating that code here. 1430 */ 1431 static void 1432 ip_mloopback(struct ifnet *ifp, const struct mbuf *m, int hlen) 1433 { 1434 struct ip *ip; 1435 struct mbuf *copym; 1436 1437 /* 1438 * Make a deep copy of the packet because we're going to 1439 * modify the pack in order to generate checksums. 1440 */ 1441 copym = m_dup(m, M_NOWAIT); 1442 if (copym != NULL && (!M_WRITABLE(copym) || copym->m_len < hlen)) 1443 copym = m_pullup(copym, hlen); 1444 if (copym != NULL) { 1445 /* If needed, compute the checksum and mark it as valid. */ 1446 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1447 in_delayed_cksum(copym); 1448 copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1449 copym->m_pkthdr.csum_flags |= 1450 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1451 copym->m_pkthdr.csum_data = 0xffff; 1452 } 1453 /* 1454 * We don't bother to fragment if the IP length is greater 1455 * than the interface's MTU. Can this possibly matter? 1456 */ 1457 ip = mtod(copym, struct ip *); 1458 ip->ip_sum = 0; 1459 ip->ip_sum = in_cksum(copym, hlen); 1460 if_simloop(ifp, copym, AF_INET, 0); 1461 } 1462 } 1463