1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1988, 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_inet.h" 38 #include "opt_ratelimit.h" 39 #include "opt_ipsec.h" 40 #include "opt_mbuf_stress_test.h" 41 #include "opt_mpath.h" 42 #include "opt_route.h" 43 #include "opt_sctp.h" 44 #include "opt_rss.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/lock.h> 50 #include <sys/malloc.h> 51 #include <sys/mbuf.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/protosw.h> 55 #include <sys/rmlock.h> 56 #include <sys/sdt.h> 57 #include <sys/socket.h> 58 #include <sys/socketvar.h> 59 #include <sys/sysctl.h> 60 #include <sys/ucred.h> 61 62 #include <net/if.h> 63 #include <net/if_var.h> 64 #include <net/if_llatbl.h> 65 #include <net/netisr.h> 66 #include <net/pfil.h> 67 #include <net/route.h> 68 #ifdef RADIX_MPATH 69 #include <net/radix_mpath.h> 70 #endif 71 #include <net/rss_config.h> 72 #include <net/vnet.h> 73 74 #include <netinet/in.h> 75 #include <netinet/in_fib.h> 76 #include <netinet/in_kdtrace.h> 77 #include <netinet/in_systm.h> 78 #include <netinet/ip.h> 79 #include <netinet/in_pcb.h> 80 #include <netinet/in_rss.h> 81 #include <netinet/in_var.h> 82 #include <netinet/ip_var.h> 83 #include <netinet/ip_options.h> 84 85 #include <netinet/udp.h> 86 #include <netinet/udp_var.h> 87 88 #ifdef SCTP 89 #include <netinet/sctp.h> 90 #include <netinet/sctp_crc32.h> 91 #endif 92 93 #include <netipsec/ipsec_support.h> 94 95 #include <machine/in_cksum.h> 96 97 #include <security/mac/mac_framework.h> 98 99 #ifdef MBUF_STRESS_TEST 100 static int mbuf_frag_size = 0; 101 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, 102 &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); 103 #endif 104 105 static void ip_mloopback(struct ifnet *, const struct mbuf *, int); 106 107 108 extern int in_mcast_loop; 109 extern struct protosw inetsw[]; 110 111 static inline int 112 ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, struct inpcb *inp, 113 struct sockaddr_in *dst, int *fibnum, int *error) 114 { 115 struct m_tag *fwd_tag = NULL; 116 struct mbuf *m; 117 struct in_addr odst; 118 struct ip *ip; 119 120 m = *mp; 121 ip = mtod(m, struct ip *); 122 123 /* Run through list of hooks for output packets. */ 124 odst.s_addr = ip->ip_dst.s_addr; 125 switch (pfil_run_hooks(V_inet_pfil_head, mp, ifp, PFIL_OUT, inp)) { 126 case PFIL_DROPPED: 127 *error = EPERM; 128 /* FALLTHROUGH */ 129 case PFIL_CONSUMED: 130 return 1; /* Finished */ 131 case PFIL_PASS: 132 *error = 0; 133 } 134 m = *mp; 135 ip = mtod(m, struct ip *); 136 137 /* See if destination IP address was changed by packet filter. */ 138 if (odst.s_addr != ip->ip_dst.s_addr) { 139 m->m_flags |= M_SKIP_FIREWALL; 140 /* If destination is now ourself drop to ip_input(). */ 141 if (in_localip(ip->ip_dst)) { 142 m->m_flags |= M_FASTFWD_OURS; 143 if (m->m_pkthdr.rcvif == NULL) 144 m->m_pkthdr.rcvif = V_loif; 145 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 146 m->m_pkthdr.csum_flags |= 147 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 148 m->m_pkthdr.csum_data = 0xffff; 149 } 150 m->m_pkthdr.csum_flags |= 151 CSUM_IP_CHECKED | CSUM_IP_VALID; 152 #ifdef SCTP 153 if (m->m_pkthdr.csum_flags & CSUM_SCTP) 154 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 155 #endif 156 *error = netisr_queue(NETISR_IP, m); 157 return 1; /* Finished */ 158 } 159 160 bzero(dst, sizeof(*dst)); 161 dst->sin_family = AF_INET; 162 dst->sin_len = sizeof(*dst); 163 dst->sin_addr = ip->ip_dst; 164 165 return -1; /* Reloop */ 166 } 167 /* See if fib was changed by packet filter. */ 168 if ((*fibnum) != M_GETFIB(m)) { 169 m->m_flags |= M_SKIP_FIREWALL; 170 *fibnum = M_GETFIB(m); 171 return -1; /* Reloop for FIB change */ 172 } 173 174 /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */ 175 if (m->m_flags & M_FASTFWD_OURS) { 176 if (m->m_pkthdr.rcvif == NULL) 177 m->m_pkthdr.rcvif = V_loif; 178 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 179 m->m_pkthdr.csum_flags |= 180 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 181 m->m_pkthdr.csum_data = 0xffff; 182 } 183 #ifdef SCTP 184 if (m->m_pkthdr.csum_flags & CSUM_SCTP) 185 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 186 #endif 187 m->m_pkthdr.csum_flags |= 188 CSUM_IP_CHECKED | CSUM_IP_VALID; 189 190 *error = netisr_queue(NETISR_IP, m); 191 return 1; /* Finished */ 192 } 193 /* Or forward to some other address? */ 194 if ((m->m_flags & M_IP_NEXTHOP) && 195 ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) { 196 bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in)); 197 m->m_flags |= M_SKIP_FIREWALL; 198 m->m_flags &= ~M_IP_NEXTHOP; 199 m_tag_delete(m, fwd_tag); 200 201 return -1; /* Reloop for CHANGE of dst */ 202 } 203 204 return 0; 205 } 206 207 static int 208 ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m, 209 const struct sockaddr_in *gw, struct route *ro) 210 { 211 struct m_snd_tag *mst; 212 int error; 213 214 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 215 mst = NULL; 216 217 #ifdef RATELIMIT 218 if (inp != NULL) { 219 if ((inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) != 0 || 220 (inp->inp_snd_tag != NULL && 221 inp->inp_snd_tag->ifp != ifp)) 222 in_pcboutput_txrtlmt(inp, ifp, m); 223 224 if (inp->inp_snd_tag != NULL) 225 mst = inp->inp_snd_tag; 226 } 227 #endif 228 if (mst != NULL) { 229 KASSERT(m->m_pkthdr.rcvif == NULL, 230 ("trying to add a send tag to a forwarded packet")); 231 if (mst->ifp != ifp) { 232 error = EAGAIN; 233 goto done; 234 } 235 236 /* stamp send tag on mbuf */ 237 m->m_pkthdr.snd_tag = m_snd_tag_ref(mst); 238 m->m_pkthdr.csum_flags |= CSUM_SND_TAG; 239 } 240 241 error = (*ifp->if_output)(ifp, m, (const struct sockaddr *)gw, ro); 242 243 done: 244 /* Check for route change invalidating send tags. */ 245 #ifdef RATELIMIT 246 if (error == EAGAIN) 247 in_pcboutput_eagain(inp); 248 #endif 249 return (error); 250 } 251 252 /* 253 * IP output. The packet in mbuf chain m contains a skeletal IP 254 * header (with len, off, ttl, proto, tos, src, dst). 255 * The mbuf chain containing the packet will be freed. 256 * The mbuf opt, if present, will not be freed. 257 * If route ro is present and has ro_rt initialized, route lookup would be 258 * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL, 259 * then result of route lookup is stored in ro->ro_rt. 260 * 261 * In the IP forwarding case, the packet will arrive with options already 262 * inserted, so must have a NULL opt pointer. 263 */ 264 int 265 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, 266 struct ip_moptions *imo, struct inpcb *inp) 267 { 268 struct rm_priotracker in_ifa_tracker; 269 struct epoch_tracker et; 270 struct ip *ip; 271 struct ifnet *ifp = NULL; /* keep compiler happy */ 272 struct mbuf *m0; 273 int hlen = sizeof (struct ip); 274 int mtu; 275 int error = 0; 276 struct sockaddr_in *dst, sin; 277 const struct sockaddr_in *gw; 278 struct in_ifaddr *ia; 279 struct in_addr src; 280 int isbroadcast; 281 uint16_t ip_len, ip_off; 282 uint32_t fibnum; 283 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 284 int no_route_but_check_spd = 0; 285 #endif 286 M_ASSERTPKTHDR(m); 287 288 if (inp != NULL) { 289 INP_LOCK_ASSERT(inp); 290 M_SETFIB(m, inp->inp_inc.inc_fibnum); 291 if ((flags & IP_NODEFAULTFLOWID) == 0) { 292 m->m_pkthdr.flowid = inp->inp_flowid; 293 M_HASHTYPE_SET(m, inp->inp_flowtype); 294 } 295 #ifdef NUMA 296 m->m_pkthdr.numa_domain = inp->inp_numa_domain; 297 #endif 298 } 299 300 if (opt) { 301 int len = 0; 302 m = ip_insertoptions(m, opt, &len); 303 if (len != 0) 304 hlen = len; /* ip->ip_hl is updated above */ 305 } 306 ip = mtod(m, struct ip *); 307 ip_len = ntohs(ip->ip_len); 308 ip_off = ntohs(ip->ip_off); 309 310 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 311 ip->ip_v = IPVERSION; 312 ip->ip_hl = hlen >> 2; 313 ip_fillid(ip); 314 } else { 315 /* Header already set, fetch hlen from there */ 316 hlen = ip->ip_hl << 2; 317 } 318 if ((flags & IP_FORWARDING) == 0) 319 IPSTAT_INC(ips_localout); 320 321 /* 322 * dst/gw handling: 323 * 324 * gw is readonly but can point either to dst OR rt_gateway, 325 * therefore we need restore gw if we're redoing lookup. 326 */ 327 fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); 328 if (ro != NULL) 329 dst = (struct sockaddr_in *)&ro->ro_dst; 330 else 331 dst = &sin; 332 if (ro == NULL || ro->ro_rt == NULL) { 333 bzero(dst, sizeof(*dst)); 334 dst->sin_family = AF_INET; 335 dst->sin_len = sizeof(*dst); 336 dst->sin_addr = ip->ip_dst; 337 } 338 gw = dst; 339 NET_EPOCH_ENTER(et); 340 again: 341 /* 342 * Validate route against routing table additions; 343 * a better/more specific route might have been added. 344 */ 345 if (inp != NULL && ro != NULL && ro->ro_rt != NULL) 346 RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum); 347 /* 348 * If there is a cached route, 349 * check that it is to the same destination 350 * and is still up. If not, free it and try again. 351 * The address family should also be checked in case of sharing the 352 * cache with IPv6. 353 * Also check whether routing cache needs invalidation. 354 */ 355 if (ro != NULL && ro->ro_rt != NULL && 356 ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 357 ro->ro_rt->rt_ifp == NULL || !RT_LINK_IS_UP(ro->ro_rt->rt_ifp) || 358 dst->sin_family != AF_INET || 359 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) 360 RO_INVALIDATE_CACHE(ro); 361 ia = NULL; 362 /* 363 * If routing to interface only, short circuit routing lookup. 364 * The use of an all-ones broadcast address implies this; an 365 * interface is specified by the broadcast address of an interface, 366 * or the destination address of a ptp interface. 367 */ 368 if (flags & IP_SENDONES) { 369 if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst), 370 M_GETFIB(m)))) == NULL && 371 (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst), 372 M_GETFIB(m)))) == NULL) { 373 IPSTAT_INC(ips_noroute); 374 error = ENETUNREACH; 375 goto bad; 376 } 377 ip->ip_dst.s_addr = INADDR_BROADCAST; 378 dst->sin_addr = ip->ip_dst; 379 ifp = ia->ia_ifp; 380 mtu = ifp->if_mtu; 381 ip->ip_ttl = 1; 382 isbroadcast = 1; 383 src = IA_SIN(ia)->sin_addr; 384 } else if (flags & IP_ROUTETOIF) { 385 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst), 386 M_GETFIB(m)))) == NULL && 387 (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0, 388 M_GETFIB(m)))) == NULL) { 389 IPSTAT_INC(ips_noroute); 390 error = ENETUNREACH; 391 goto bad; 392 } 393 ifp = ia->ia_ifp; 394 mtu = ifp->if_mtu; 395 ip->ip_ttl = 1; 396 isbroadcast = ifp->if_flags & IFF_BROADCAST ? 397 in_ifaddr_broadcast(dst->sin_addr, ia) : 0; 398 src = IA_SIN(ia)->sin_addr; 399 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 400 imo != NULL && imo->imo_multicast_ifp != NULL) { 401 /* 402 * Bypass the normal routing lookup for multicast 403 * packets if the interface is specified. 404 */ 405 ifp = imo->imo_multicast_ifp; 406 mtu = ifp->if_mtu; 407 IFP_TO_IA(ifp, ia, &in_ifa_tracker); 408 isbroadcast = 0; /* fool gcc */ 409 /* Interface may have no addresses. */ 410 if (ia != NULL) 411 src = IA_SIN(ia)->sin_addr; 412 else 413 src.s_addr = INADDR_ANY; 414 } else if (ro != NULL) { 415 if (ro->ro_rt == NULL) { 416 /* 417 * We want to do any cloning requested by the link 418 * layer, as this is probably required in all cases 419 * for correct operation (as it is for ARP). 420 */ 421 #ifdef RADIX_MPATH 422 rtalloc_mpath_fib(ro, 423 ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), 424 fibnum); 425 #else 426 in_rtalloc_ign(ro, 0, fibnum); 427 #endif 428 if (ro->ro_rt == NULL || 429 (ro->ro_rt->rt_flags & RTF_UP) == 0 || 430 ro->ro_rt->rt_ifp == NULL || 431 !RT_LINK_IS_UP(ro->ro_rt->rt_ifp)) { 432 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 433 /* 434 * There is no route for this packet, but it is 435 * possible that a matching SPD entry exists. 436 */ 437 no_route_but_check_spd = 1; 438 mtu = 0; /* Silence GCC warning. */ 439 goto sendit; 440 #endif 441 IPSTAT_INC(ips_noroute); 442 error = EHOSTUNREACH; 443 goto bad; 444 } 445 } 446 ia = ifatoia(ro->ro_rt->rt_ifa); 447 ifp = ro->ro_rt->rt_ifp; 448 counter_u64_add(ro->ro_rt->rt_pksent, 1); 449 rt_update_ro_flags(ro); 450 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 451 gw = (struct sockaddr_in *)ro->ro_rt->rt_gateway; 452 if (ro->ro_rt->rt_flags & RTF_HOST) 453 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); 454 else if (ifp->if_flags & IFF_BROADCAST) 455 isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia); 456 else 457 isbroadcast = 0; 458 if (ro->ro_rt->rt_flags & RTF_HOST) 459 mtu = ro->ro_rt->rt_mtu; 460 else 461 mtu = ifp->if_mtu; 462 src = IA_SIN(ia)->sin_addr; 463 } else { 464 struct nhop4_extended nh; 465 466 bzero(&nh, sizeof(nh)); 467 if (fib4_lookup_nh_ext(M_GETFIB(m), ip->ip_dst, 0, 0, &nh) != 468 0) { 469 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 470 /* 471 * There is no route for this packet, but it is 472 * possible that a matching SPD entry exists. 473 */ 474 no_route_but_check_spd = 1; 475 mtu = 0; /* Silence GCC warning. */ 476 goto sendit; 477 #endif 478 IPSTAT_INC(ips_noroute); 479 error = EHOSTUNREACH; 480 goto bad; 481 } 482 ifp = nh.nh_ifp; 483 mtu = nh.nh_mtu; 484 /* 485 * We are rewriting here dst to be gw actually, contradicting 486 * comment at the beginning of the function. However, in this 487 * case we are always dealing with on stack dst. 488 * In case if pfil(9) sends us back to beginning of the 489 * function, the dst would be rewritten by ip_output_pfil(). 490 */ 491 MPASS(dst == &sin); 492 dst->sin_addr = nh.nh_addr; 493 ia = nh.nh_ia; 494 src = nh.nh_src; 495 isbroadcast = (((nh.nh_flags & (NHF_HOST | NHF_BROADCAST)) == 496 (NHF_HOST | NHF_BROADCAST)) || 497 ((ifp->if_flags & IFF_BROADCAST) && 498 in_ifaddr_broadcast(dst->sin_addr, ia))); 499 } 500 501 /* Catch a possible divide by zero later. */ 502 KASSERT(mtu > 0, ("%s: mtu %d <= 0, ro=%p (rt_flags=0x%08x) ifp=%p", 503 __func__, mtu, ro, 504 (ro != NULL && ro->ro_rt != NULL) ? ro->ro_rt->rt_flags : 0, ifp)); 505 506 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 507 m->m_flags |= M_MCAST; 508 /* 509 * IP destination address is multicast. Make sure "gw" 510 * still points to the address in "ro". (It may have been 511 * changed to point to a gateway address, above.) 512 */ 513 gw = dst; 514 /* 515 * See if the caller provided any multicast options 516 */ 517 if (imo != NULL) { 518 ip->ip_ttl = imo->imo_multicast_ttl; 519 if (imo->imo_multicast_vif != -1) 520 ip->ip_src.s_addr = 521 ip_mcast_src ? 522 ip_mcast_src(imo->imo_multicast_vif) : 523 INADDR_ANY; 524 } else 525 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 526 /* 527 * Confirm that the outgoing interface supports multicast. 528 */ 529 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 530 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 531 IPSTAT_INC(ips_noroute); 532 error = ENETUNREACH; 533 goto bad; 534 } 535 } 536 /* 537 * If source address not specified yet, use address 538 * of outgoing interface. 539 */ 540 if (ip->ip_src.s_addr == INADDR_ANY) 541 ip->ip_src = src; 542 543 if ((imo == NULL && in_mcast_loop) || 544 (imo && imo->imo_multicast_loop)) { 545 /* 546 * Loop back multicast datagram if not expressly 547 * forbidden to do so, even if we are not a member 548 * of the group; ip_input() will filter it later, 549 * thus deferring a hash lookup and mutex acquisition 550 * at the expense of a cheap copy using m_copym(). 551 */ 552 ip_mloopback(ifp, m, hlen); 553 } else { 554 /* 555 * If we are acting as a multicast router, perform 556 * multicast forwarding as if the packet had just 557 * arrived on the interface to which we are about 558 * to send. The multicast forwarding function 559 * recursively calls this function, using the 560 * IP_FORWARDING flag to prevent infinite recursion. 561 * 562 * Multicasts that are looped back by ip_mloopback(), 563 * above, will be forwarded by the ip_input() routine, 564 * if necessary. 565 */ 566 if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) { 567 /* 568 * If rsvp daemon is not running, do not 569 * set ip_moptions. This ensures that the packet 570 * is multicast and not just sent down one link 571 * as prescribed by rsvpd. 572 */ 573 if (!V_rsvp_on) 574 imo = NULL; 575 if (ip_mforward && 576 ip_mforward(ip, ifp, m, imo) != 0) { 577 m_freem(m); 578 goto done; 579 } 580 } 581 } 582 583 /* 584 * Multicasts with a time-to-live of zero may be looped- 585 * back, above, but must not be transmitted on a network. 586 * Also, multicasts addressed to the loopback interface 587 * are not sent -- the above call to ip_mloopback() will 588 * loop back a copy. ip_input() will drop the copy if 589 * this host does not belong to the destination group on 590 * the loopback interface. 591 */ 592 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 593 m_freem(m); 594 goto done; 595 } 596 597 goto sendit; 598 } 599 600 /* 601 * If the source address is not specified yet, use the address 602 * of the outoing interface. 603 */ 604 if (ip->ip_src.s_addr == INADDR_ANY) 605 ip->ip_src = src; 606 607 /* 608 * Look for broadcast address and 609 * verify user is allowed to send 610 * such a packet. 611 */ 612 if (isbroadcast) { 613 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 614 error = EADDRNOTAVAIL; 615 goto bad; 616 } 617 if ((flags & IP_ALLOWBROADCAST) == 0) { 618 error = EACCES; 619 goto bad; 620 } 621 /* don't allow broadcast messages to be fragmented */ 622 if (ip_len > mtu) { 623 error = EMSGSIZE; 624 goto bad; 625 } 626 m->m_flags |= M_BCAST; 627 } else { 628 m->m_flags &= ~M_BCAST; 629 } 630 631 sendit: 632 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 633 if (IPSEC_ENABLED(ipv4)) { 634 if ((error = IPSEC_OUTPUT(ipv4, m, inp)) != 0) { 635 if (error == EINPROGRESS) 636 error = 0; 637 goto done; 638 } 639 } 640 /* 641 * Check if there was a route for this packet; return error if not. 642 */ 643 if (no_route_but_check_spd) { 644 IPSTAT_INC(ips_noroute); 645 error = EHOSTUNREACH; 646 goto bad; 647 } 648 /* Update variables that are affected by ipsec4_output(). */ 649 ip = mtod(m, struct ip *); 650 hlen = ip->ip_hl << 2; 651 #endif /* IPSEC */ 652 653 /* Jump over all PFIL processing if hooks are not active. */ 654 if (PFIL_HOOKED_OUT(V_inet_pfil_head)) { 655 switch (ip_output_pfil(&m, ifp, inp, dst, &fibnum, &error)) { 656 case 1: /* Finished */ 657 goto done; 658 659 case 0: /* Continue normally */ 660 ip = mtod(m, struct ip *); 661 break; 662 663 case -1: /* Need to try again */ 664 /* Reset everything for a new round */ 665 if (ro != NULL) { 666 RO_RTFREE(ro); 667 ro->ro_prepend = NULL; 668 } 669 gw = dst; 670 ip = mtod(m, struct ip *); 671 goto again; 672 673 } 674 } 675 676 /* IN_LOOPBACK must not appear on the wire - RFC1122. */ 677 if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) || 678 IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) { 679 if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 680 IPSTAT_INC(ips_badaddr); 681 error = EADDRNOTAVAIL; 682 goto bad; 683 } 684 } 685 686 m->m_pkthdr.csum_flags |= CSUM_IP; 687 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) { 688 in_delayed_cksum(m); 689 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 690 } 691 #ifdef SCTP 692 if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) { 693 sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); 694 m->m_pkthdr.csum_flags &= ~CSUM_SCTP; 695 } 696 #endif 697 698 /* 699 * If small enough for interface, or the interface will take 700 * care of the fragmentation for us, we can just send directly. 701 */ 702 if (ip_len <= mtu || 703 (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) { 704 ip->ip_sum = 0; 705 if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) { 706 ip->ip_sum = in_cksum(m, hlen); 707 m->m_pkthdr.csum_flags &= ~CSUM_IP; 708 } 709 710 /* 711 * Record statistics for this interface address. 712 * With CSUM_TSO the byte/packet count will be slightly 713 * incorrect because we count the IP+TCP headers only 714 * once instead of for every generated packet. 715 */ 716 if (!(flags & IP_FORWARDING) && ia) { 717 if (m->m_pkthdr.csum_flags & CSUM_TSO) 718 counter_u64_add(ia->ia_ifa.ifa_opackets, 719 m->m_pkthdr.len / m->m_pkthdr.tso_segsz); 720 else 721 counter_u64_add(ia->ia_ifa.ifa_opackets, 1); 722 723 counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); 724 } 725 #ifdef MBUF_STRESS_TEST 726 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) 727 m = m_fragment(m, M_NOWAIT, mbuf_frag_size); 728 #endif 729 /* 730 * Reset layer specific mbuf flags 731 * to avoid confusing lower layers. 732 */ 733 m_clrprotoflags(m); 734 IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL); 735 error = ip_output_send(inp, ifp, m, gw, ro); 736 goto done; 737 } 738 739 /* Balk when DF bit is set or the interface didn't support TSO. */ 740 if ((ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) { 741 error = EMSGSIZE; 742 IPSTAT_INC(ips_cantfrag); 743 goto bad; 744 } 745 746 /* 747 * Too large for interface; fragment if possible. If successful, 748 * on return, m will point to a list of packets to be sent. 749 */ 750 error = ip_fragment(ip, &m, mtu, ifp->if_hwassist); 751 if (error) 752 goto bad; 753 for (; m; m = m0) { 754 m0 = m->m_nextpkt; 755 m->m_nextpkt = 0; 756 if (error == 0) { 757 /* Record statistics for this interface address. */ 758 if (ia != NULL) { 759 counter_u64_add(ia->ia_ifa.ifa_opackets, 1); 760 counter_u64_add(ia->ia_ifa.ifa_obytes, 761 m->m_pkthdr.len); 762 } 763 /* 764 * Reset layer specific mbuf flags 765 * to avoid confusing upper layers. 766 */ 767 m_clrprotoflags(m); 768 769 IP_PROBE(send, NULL, NULL, mtod(m, struct ip *), ifp, 770 mtod(m, struct ip *), NULL); 771 error = ip_output_send(inp, ifp, m, gw, ro); 772 } else 773 m_freem(m); 774 } 775 776 if (error == 0) 777 IPSTAT_INC(ips_fragmented); 778 779 done: 780 NET_EPOCH_EXIT(et); 781 return (error); 782 bad: 783 m_freem(m); 784 goto done; 785 } 786 787 /* 788 * Create a chain of fragments which fit the given mtu. m_frag points to the 789 * mbuf to be fragmented; on return it points to the chain with the fragments. 790 * Return 0 if no error. If error, m_frag may contain a partially built 791 * chain of fragments that should be freed by the caller. 792 * 793 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) 794 */ 795 int 796 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, 797 u_long if_hwassist_flags) 798 { 799 int error = 0; 800 int hlen = ip->ip_hl << 2; 801 int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ 802 int off; 803 struct mbuf *m0 = *m_frag; /* the original packet */ 804 int firstlen; 805 struct mbuf **mnext; 806 int nfrags; 807 uint16_t ip_len, ip_off; 808 809 ip_len = ntohs(ip->ip_len); 810 ip_off = ntohs(ip->ip_off); 811 812 if (ip_off & IP_DF) { /* Fragmentation not allowed */ 813 IPSTAT_INC(ips_cantfrag); 814 return EMSGSIZE; 815 } 816 817 /* 818 * Must be able to put at least 8 bytes per fragment. 819 */ 820 if (len < 8) 821 return EMSGSIZE; 822 823 /* 824 * If the interface will not calculate checksums on 825 * fragmented packets, then do it here. 826 */ 827 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 828 in_delayed_cksum(m0); 829 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 830 } 831 #ifdef SCTP 832 if (m0->m_pkthdr.csum_flags & CSUM_SCTP) { 833 sctp_delayed_cksum(m0, hlen); 834 m0->m_pkthdr.csum_flags &= ~CSUM_SCTP; 835 } 836 #endif 837 if (len > PAGE_SIZE) { 838 /* 839 * Fragment large datagrams such that each segment 840 * contains a multiple of PAGE_SIZE amount of data, 841 * plus headers. This enables a receiver to perform 842 * page-flipping zero-copy optimizations. 843 * 844 * XXX When does this help given that sender and receiver 845 * could have different page sizes, and also mtu could 846 * be less than the receiver's page size ? 847 */ 848 int newlen; 849 850 off = MIN(mtu, m0->m_pkthdr.len); 851 852 /* 853 * firstlen (off - hlen) must be aligned on an 854 * 8-byte boundary 855 */ 856 if (off < hlen) 857 goto smart_frag_failure; 858 off = ((off - hlen) & ~7) + hlen; 859 newlen = (~PAGE_MASK) & mtu; 860 if ((newlen + sizeof (struct ip)) > mtu) { 861 /* we failed, go back the default */ 862 smart_frag_failure: 863 newlen = len; 864 off = hlen + len; 865 } 866 len = newlen; 867 868 } else { 869 off = hlen + len; 870 } 871 872 firstlen = off - hlen; 873 mnext = &m0->m_nextpkt; /* pointer to next packet */ 874 875 /* 876 * Loop through length of segment after first fragment, 877 * make new header and copy data of each part and link onto chain. 878 * Here, m0 is the original packet, m is the fragment being created. 879 * The fragments are linked off the m_nextpkt of the original 880 * packet, which after processing serves as the first fragment. 881 */ 882 for (nfrags = 1; off < ip_len; off += len, nfrags++) { 883 struct ip *mhip; /* ip header on the fragment */ 884 struct mbuf *m; 885 int mhlen = sizeof (struct ip); 886 887 m = m_gethdr(M_NOWAIT, MT_DATA); 888 if (m == NULL) { 889 error = ENOBUFS; 890 IPSTAT_INC(ips_odropped); 891 goto done; 892 } 893 /* 894 * Make sure the complete packet header gets copied 895 * from the originating mbuf to the newly created 896 * mbuf. This also ensures that existing firewall 897 * classification(s), VLAN tags and so on get copied 898 * to the resulting fragmented packet(s): 899 */ 900 if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) { 901 m_free(m); 902 error = ENOBUFS; 903 IPSTAT_INC(ips_odropped); 904 goto done; 905 } 906 /* 907 * In the first mbuf, leave room for the link header, then 908 * copy the original IP header including options. The payload 909 * goes into an additional mbuf chain returned by m_copym(). 910 */ 911 m->m_data += max_linkhdr; 912 mhip = mtod(m, struct ip *); 913 *mhip = *ip; 914 if (hlen > sizeof (struct ip)) { 915 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 916 mhip->ip_v = IPVERSION; 917 mhip->ip_hl = mhlen >> 2; 918 } 919 m->m_len = mhlen; 920 /* XXX do we need to add ip_off below ? */ 921 mhip->ip_off = ((off - hlen) >> 3) + ip_off; 922 if (off + len >= ip_len) 923 len = ip_len - off; 924 else 925 mhip->ip_off |= IP_MF; 926 mhip->ip_len = htons((u_short)(len + mhlen)); 927 m->m_next = m_copym(m0, off, len, M_NOWAIT); 928 if (m->m_next == NULL) { /* copy failed */ 929 m_free(m); 930 error = ENOBUFS; /* ??? */ 931 IPSTAT_INC(ips_odropped); 932 goto done; 933 } 934 m->m_pkthdr.len = mhlen + len; 935 #ifdef MAC 936 mac_netinet_fragment(m0, m); 937 #endif 938 mhip->ip_off = htons(mhip->ip_off); 939 mhip->ip_sum = 0; 940 if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) { 941 mhip->ip_sum = in_cksum(m, mhlen); 942 m->m_pkthdr.csum_flags &= ~CSUM_IP; 943 } 944 *mnext = m; 945 mnext = &m->m_nextpkt; 946 } 947 IPSTAT_ADD(ips_ofragments, nfrags); 948 949 /* 950 * Update first fragment by trimming what's been copied out 951 * and updating header. 952 */ 953 m_adj(m0, hlen + firstlen - ip_len); 954 m0->m_pkthdr.len = hlen + firstlen; 955 ip->ip_len = htons((u_short)m0->m_pkthdr.len); 956 ip->ip_off = htons(ip_off | IP_MF); 957 ip->ip_sum = 0; 958 if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) { 959 ip->ip_sum = in_cksum(m0, hlen); 960 m0->m_pkthdr.csum_flags &= ~CSUM_IP; 961 } 962 963 done: 964 *m_frag = m0; 965 return error; 966 } 967 968 void 969 in_delayed_cksum(struct mbuf *m) 970 { 971 struct ip *ip; 972 struct udphdr *uh; 973 uint16_t cklen, csum, offset; 974 975 ip = mtod(m, struct ip *); 976 offset = ip->ip_hl << 2 ; 977 978 if (m->m_pkthdr.csum_flags & CSUM_UDP) { 979 /* if udp header is not in the first mbuf copy udplen */ 980 if (offset + sizeof(struct udphdr) > m->m_len) { 981 m_copydata(m, offset + offsetof(struct udphdr, 982 uh_ulen), sizeof(cklen), (caddr_t)&cklen); 983 cklen = ntohs(cklen); 984 } else { 985 uh = (struct udphdr *)mtodo(m, offset); 986 cklen = ntohs(uh->uh_ulen); 987 } 988 csum = in_cksum_skip(m, cklen + offset, offset); 989 if (csum == 0) 990 csum = 0xffff; 991 } else { 992 cklen = ntohs(ip->ip_len); 993 csum = in_cksum_skip(m, cklen, offset); 994 } 995 offset += m->m_pkthdr.csum_data; /* checksum offset */ 996 997 if (offset + sizeof(csum) > m->m_len) 998 m_copyback(m, offset, sizeof(csum), (caddr_t)&csum); 999 else 1000 *(u_short *)mtodo(m, offset) = csum; 1001 } 1002 1003 /* 1004 * IP socket option processing. 1005 */ 1006 int 1007 ip_ctloutput(struct socket *so, struct sockopt *sopt) 1008 { 1009 struct inpcb *inp = sotoinpcb(so); 1010 int error, optval; 1011 #ifdef RSS 1012 uint32_t rss_bucket; 1013 int retval; 1014 #endif 1015 1016 error = optval = 0; 1017 if (sopt->sopt_level != IPPROTO_IP) { 1018 error = EINVAL; 1019 1020 if (sopt->sopt_level == SOL_SOCKET && 1021 sopt->sopt_dir == SOPT_SET) { 1022 switch (sopt->sopt_name) { 1023 case SO_REUSEADDR: 1024 INP_WLOCK(inp); 1025 if ((so->so_options & SO_REUSEADDR) != 0) 1026 inp->inp_flags2 |= INP_REUSEADDR; 1027 else 1028 inp->inp_flags2 &= ~INP_REUSEADDR; 1029 INP_WUNLOCK(inp); 1030 error = 0; 1031 break; 1032 case SO_REUSEPORT: 1033 INP_WLOCK(inp); 1034 if ((so->so_options & SO_REUSEPORT) != 0) 1035 inp->inp_flags2 |= INP_REUSEPORT; 1036 else 1037 inp->inp_flags2 &= ~INP_REUSEPORT; 1038 INP_WUNLOCK(inp); 1039 error = 0; 1040 break; 1041 case SO_REUSEPORT_LB: 1042 INP_WLOCK(inp); 1043 if ((so->so_options & SO_REUSEPORT_LB) != 0) 1044 inp->inp_flags2 |= INP_REUSEPORT_LB; 1045 else 1046 inp->inp_flags2 &= ~INP_REUSEPORT_LB; 1047 INP_WUNLOCK(inp); 1048 error = 0; 1049 break; 1050 case SO_SETFIB: 1051 INP_WLOCK(inp); 1052 inp->inp_inc.inc_fibnum = so->so_fibnum; 1053 INP_WUNLOCK(inp); 1054 error = 0; 1055 break; 1056 case SO_MAX_PACING_RATE: 1057 #ifdef RATELIMIT 1058 INP_WLOCK(inp); 1059 inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED; 1060 INP_WUNLOCK(inp); 1061 error = 0; 1062 #else 1063 error = EOPNOTSUPP; 1064 #endif 1065 break; 1066 default: 1067 break; 1068 } 1069 } 1070 return (error); 1071 } 1072 1073 switch (sopt->sopt_dir) { 1074 case SOPT_SET: 1075 switch (sopt->sopt_name) { 1076 case IP_OPTIONS: 1077 #ifdef notyet 1078 case IP_RETOPTS: 1079 #endif 1080 { 1081 struct mbuf *m; 1082 if (sopt->sopt_valsize > MLEN) { 1083 error = EMSGSIZE; 1084 break; 1085 } 1086 m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA); 1087 if (m == NULL) { 1088 error = ENOBUFS; 1089 break; 1090 } 1091 m->m_len = sopt->sopt_valsize; 1092 error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 1093 m->m_len); 1094 if (error) { 1095 m_free(m); 1096 break; 1097 } 1098 INP_WLOCK(inp); 1099 error = ip_pcbopts(inp, sopt->sopt_name, m); 1100 INP_WUNLOCK(inp); 1101 return (error); 1102 } 1103 1104 case IP_BINDANY: 1105 if (sopt->sopt_td != NULL) { 1106 error = priv_check(sopt->sopt_td, 1107 PRIV_NETINET_BINDANY); 1108 if (error) 1109 break; 1110 } 1111 /* FALLTHROUGH */ 1112 case IP_BINDMULTI: 1113 #ifdef RSS 1114 case IP_RSS_LISTEN_BUCKET: 1115 #endif 1116 case IP_TOS: 1117 case IP_TTL: 1118 case IP_MINTTL: 1119 case IP_RECVOPTS: 1120 case IP_RECVRETOPTS: 1121 case IP_ORIGDSTADDR: 1122 case IP_RECVDSTADDR: 1123 case IP_RECVTTL: 1124 case IP_RECVIF: 1125 case IP_ONESBCAST: 1126 case IP_DONTFRAG: 1127 case IP_RECVTOS: 1128 case IP_RECVFLOWID: 1129 #ifdef RSS 1130 case IP_RECVRSSBUCKETID: 1131 #endif 1132 error = sooptcopyin(sopt, &optval, sizeof optval, 1133 sizeof optval); 1134 if (error) 1135 break; 1136 1137 switch (sopt->sopt_name) { 1138 case IP_TOS: 1139 inp->inp_ip_tos = optval; 1140 break; 1141 1142 case IP_TTL: 1143 inp->inp_ip_ttl = optval; 1144 break; 1145 1146 case IP_MINTTL: 1147 if (optval >= 0 && optval <= MAXTTL) 1148 inp->inp_ip_minttl = optval; 1149 else 1150 error = EINVAL; 1151 break; 1152 1153 #define OPTSET(bit) do { \ 1154 INP_WLOCK(inp); \ 1155 if (optval) \ 1156 inp->inp_flags |= bit; \ 1157 else \ 1158 inp->inp_flags &= ~bit; \ 1159 INP_WUNLOCK(inp); \ 1160 } while (0) 1161 1162 #define OPTSET2(bit, val) do { \ 1163 INP_WLOCK(inp); \ 1164 if (val) \ 1165 inp->inp_flags2 |= bit; \ 1166 else \ 1167 inp->inp_flags2 &= ~bit; \ 1168 INP_WUNLOCK(inp); \ 1169 } while (0) 1170 1171 case IP_RECVOPTS: 1172 OPTSET(INP_RECVOPTS); 1173 break; 1174 1175 case IP_RECVRETOPTS: 1176 OPTSET(INP_RECVRETOPTS); 1177 break; 1178 1179 case IP_RECVDSTADDR: 1180 OPTSET(INP_RECVDSTADDR); 1181 break; 1182 1183 case IP_ORIGDSTADDR: 1184 OPTSET2(INP_ORIGDSTADDR, optval); 1185 break; 1186 1187 case IP_RECVTTL: 1188 OPTSET(INP_RECVTTL); 1189 break; 1190 1191 case IP_RECVIF: 1192 OPTSET(INP_RECVIF); 1193 break; 1194 1195 case IP_ONESBCAST: 1196 OPTSET(INP_ONESBCAST); 1197 break; 1198 case IP_DONTFRAG: 1199 OPTSET(INP_DONTFRAG); 1200 break; 1201 case IP_BINDANY: 1202 OPTSET(INP_BINDANY); 1203 break; 1204 case IP_RECVTOS: 1205 OPTSET(INP_RECVTOS); 1206 break; 1207 case IP_BINDMULTI: 1208 OPTSET2(INP_BINDMULTI, optval); 1209 break; 1210 case IP_RECVFLOWID: 1211 OPTSET2(INP_RECVFLOWID, optval); 1212 break; 1213 #ifdef RSS 1214 case IP_RSS_LISTEN_BUCKET: 1215 if ((optval >= 0) && 1216 (optval < rss_getnumbuckets())) { 1217 inp->inp_rss_listen_bucket = optval; 1218 OPTSET2(INP_RSS_BUCKET_SET, 1); 1219 } else { 1220 error = EINVAL; 1221 } 1222 break; 1223 case IP_RECVRSSBUCKETID: 1224 OPTSET2(INP_RECVRSSBUCKETID, optval); 1225 break; 1226 #endif 1227 } 1228 break; 1229 #undef OPTSET 1230 #undef OPTSET2 1231 1232 /* 1233 * Multicast socket options are processed by the in_mcast 1234 * module. 1235 */ 1236 case IP_MULTICAST_IF: 1237 case IP_MULTICAST_VIF: 1238 case IP_MULTICAST_TTL: 1239 case IP_MULTICAST_LOOP: 1240 case IP_ADD_MEMBERSHIP: 1241 case IP_DROP_MEMBERSHIP: 1242 case IP_ADD_SOURCE_MEMBERSHIP: 1243 case IP_DROP_SOURCE_MEMBERSHIP: 1244 case IP_BLOCK_SOURCE: 1245 case IP_UNBLOCK_SOURCE: 1246 case IP_MSFILTER: 1247 case MCAST_JOIN_GROUP: 1248 case MCAST_LEAVE_GROUP: 1249 case MCAST_JOIN_SOURCE_GROUP: 1250 case MCAST_LEAVE_SOURCE_GROUP: 1251 case MCAST_BLOCK_SOURCE: 1252 case MCAST_UNBLOCK_SOURCE: 1253 error = inp_setmoptions(inp, sopt); 1254 break; 1255 1256 case IP_PORTRANGE: 1257 error = sooptcopyin(sopt, &optval, sizeof optval, 1258 sizeof optval); 1259 if (error) 1260 break; 1261 1262 INP_WLOCK(inp); 1263 switch (optval) { 1264 case IP_PORTRANGE_DEFAULT: 1265 inp->inp_flags &= ~(INP_LOWPORT); 1266 inp->inp_flags &= ~(INP_HIGHPORT); 1267 break; 1268 1269 case IP_PORTRANGE_HIGH: 1270 inp->inp_flags &= ~(INP_LOWPORT); 1271 inp->inp_flags |= INP_HIGHPORT; 1272 break; 1273 1274 case IP_PORTRANGE_LOW: 1275 inp->inp_flags &= ~(INP_HIGHPORT); 1276 inp->inp_flags |= INP_LOWPORT; 1277 break; 1278 1279 default: 1280 error = EINVAL; 1281 break; 1282 } 1283 INP_WUNLOCK(inp); 1284 break; 1285 1286 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 1287 case IP_IPSEC_POLICY: 1288 if (IPSEC_ENABLED(ipv4)) { 1289 error = IPSEC_PCBCTL(ipv4, inp, sopt); 1290 break; 1291 } 1292 /* FALLTHROUGH */ 1293 #endif /* IPSEC */ 1294 1295 default: 1296 error = ENOPROTOOPT; 1297 break; 1298 } 1299 break; 1300 1301 case SOPT_GET: 1302 switch (sopt->sopt_name) { 1303 case IP_OPTIONS: 1304 case IP_RETOPTS: 1305 INP_RLOCK(inp); 1306 if (inp->inp_options) { 1307 struct mbuf *options; 1308 1309 options = m_copym(inp->inp_options, 0, 1310 M_COPYALL, M_NOWAIT); 1311 INP_RUNLOCK(inp); 1312 if (options != NULL) { 1313 error = sooptcopyout(sopt, 1314 mtod(options, char *), 1315 options->m_len); 1316 m_freem(options); 1317 } else 1318 error = ENOMEM; 1319 } else { 1320 INP_RUNLOCK(inp); 1321 sopt->sopt_valsize = 0; 1322 } 1323 break; 1324 1325 case IP_TOS: 1326 case IP_TTL: 1327 case IP_MINTTL: 1328 case IP_RECVOPTS: 1329 case IP_RECVRETOPTS: 1330 case IP_ORIGDSTADDR: 1331 case IP_RECVDSTADDR: 1332 case IP_RECVTTL: 1333 case IP_RECVIF: 1334 case IP_PORTRANGE: 1335 case IP_ONESBCAST: 1336 case IP_DONTFRAG: 1337 case IP_BINDANY: 1338 case IP_RECVTOS: 1339 case IP_BINDMULTI: 1340 case IP_FLOWID: 1341 case IP_FLOWTYPE: 1342 case IP_RECVFLOWID: 1343 #ifdef RSS 1344 case IP_RSSBUCKETID: 1345 case IP_RECVRSSBUCKETID: 1346 #endif 1347 switch (sopt->sopt_name) { 1348 1349 case IP_TOS: 1350 optval = inp->inp_ip_tos; 1351 break; 1352 1353 case IP_TTL: 1354 optval = inp->inp_ip_ttl; 1355 break; 1356 1357 case IP_MINTTL: 1358 optval = inp->inp_ip_minttl; 1359 break; 1360 1361 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1362 #define OPTBIT2(bit) (inp->inp_flags2 & bit ? 1 : 0) 1363 1364 case IP_RECVOPTS: 1365 optval = OPTBIT(INP_RECVOPTS); 1366 break; 1367 1368 case IP_RECVRETOPTS: 1369 optval = OPTBIT(INP_RECVRETOPTS); 1370 break; 1371 1372 case IP_RECVDSTADDR: 1373 optval = OPTBIT(INP_RECVDSTADDR); 1374 break; 1375 1376 case IP_ORIGDSTADDR: 1377 optval = OPTBIT2(INP_ORIGDSTADDR); 1378 break; 1379 1380 case IP_RECVTTL: 1381 optval = OPTBIT(INP_RECVTTL); 1382 break; 1383 1384 case IP_RECVIF: 1385 optval = OPTBIT(INP_RECVIF); 1386 break; 1387 1388 case IP_PORTRANGE: 1389 if (inp->inp_flags & INP_HIGHPORT) 1390 optval = IP_PORTRANGE_HIGH; 1391 else if (inp->inp_flags & INP_LOWPORT) 1392 optval = IP_PORTRANGE_LOW; 1393 else 1394 optval = 0; 1395 break; 1396 1397 case IP_ONESBCAST: 1398 optval = OPTBIT(INP_ONESBCAST); 1399 break; 1400 case IP_DONTFRAG: 1401 optval = OPTBIT(INP_DONTFRAG); 1402 break; 1403 case IP_BINDANY: 1404 optval = OPTBIT(INP_BINDANY); 1405 break; 1406 case IP_RECVTOS: 1407 optval = OPTBIT(INP_RECVTOS); 1408 break; 1409 case IP_FLOWID: 1410 optval = inp->inp_flowid; 1411 break; 1412 case IP_FLOWTYPE: 1413 optval = inp->inp_flowtype; 1414 break; 1415 case IP_RECVFLOWID: 1416 optval = OPTBIT2(INP_RECVFLOWID); 1417 break; 1418 #ifdef RSS 1419 case IP_RSSBUCKETID: 1420 retval = rss_hash2bucket(inp->inp_flowid, 1421 inp->inp_flowtype, 1422 &rss_bucket); 1423 if (retval == 0) 1424 optval = rss_bucket; 1425 else 1426 error = EINVAL; 1427 break; 1428 case IP_RECVRSSBUCKETID: 1429 optval = OPTBIT2(INP_RECVRSSBUCKETID); 1430 break; 1431 #endif 1432 case IP_BINDMULTI: 1433 optval = OPTBIT2(INP_BINDMULTI); 1434 break; 1435 } 1436 error = sooptcopyout(sopt, &optval, sizeof optval); 1437 break; 1438 1439 /* 1440 * Multicast socket options are processed by the in_mcast 1441 * module. 1442 */ 1443 case IP_MULTICAST_IF: 1444 case IP_MULTICAST_VIF: 1445 case IP_MULTICAST_TTL: 1446 case IP_MULTICAST_LOOP: 1447 case IP_MSFILTER: 1448 error = inp_getmoptions(inp, sopt); 1449 break; 1450 1451 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 1452 case IP_IPSEC_POLICY: 1453 if (IPSEC_ENABLED(ipv4)) { 1454 error = IPSEC_PCBCTL(ipv4, inp, sopt); 1455 break; 1456 } 1457 /* FALLTHROUGH */ 1458 #endif /* IPSEC */ 1459 1460 default: 1461 error = ENOPROTOOPT; 1462 break; 1463 } 1464 break; 1465 } 1466 return (error); 1467 } 1468 1469 /* 1470 * Routine called from ip_output() to loop back a copy of an IP multicast 1471 * packet to the input queue of a specified interface. Note that this 1472 * calls the output routine of the loopback "driver", but with an interface 1473 * pointer that might NOT be a loopback interface -- evil, but easier than 1474 * replicating that code here. 1475 */ 1476 static void 1477 ip_mloopback(struct ifnet *ifp, const struct mbuf *m, int hlen) 1478 { 1479 struct ip *ip; 1480 struct mbuf *copym; 1481 1482 /* 1483 * Make a deep copy of the packet because we're going to 1484 * modify the pack in order to generate checksums. 1485 */ 1486 copym = m_dup(m, M_NOWAIT); 1487 if (copym != NULL && (!M_WRITABLE(copym) || copym->m_len < hlen)) 1488 copym = m_pullup(copym, hlen); 1489 if (copym != NULL) { 1490 /* If needed, compute the checksum and mark it as valid. */ 1491 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1492 in_delayed_cksum(copym); 1493 copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1494 copym->m_pkthdr.csum_flags |= 1495 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1496 copym->m_pkthdr.csum_data = 0xffff; 1497 } 1498 /* 1499 * We don't bother to fragment if the IP length is greater 1500 * than the interface's MTU. Can this possibly matter? 1501 */ 1502 ip = mtod(copym, struct ip *); 1503 ip->ip_sum = 0; 1504 ip->ip_sum = in_cksum(copym, hlen); 1505 if_simloop(ifp, copym, AF_INET, 0); 1506 } 1507 } 1508