1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 34 * $FreeBSD$ 35 */ 36 37 #define _IP_VHL 38 39 #include "opt_ipfw.h" 40 #include "opt_ipdn.h" 41 #include "opt_ipdivert.h" 42 #include "opt_ipfilter.h" 43 #include "opt_ipsec.h" 44 #include "opt_pfil_hooks.h" 45 #include "opt_random_ip_id.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/kernel.h> 50 #include <sys/malloc.h> 51 #include <sys/mbuf.h> 52 #include <sys/protosw.h> 53 #include <sys/socket.h> 54 #include <sys/socketvar.h> 55 56 #include <net/if.h> 57 #include <net/route.h> 58 59 #include <netinet/in.h> 60 #include <netinet/in_systm.h> 61 #include <netinet/ip.h> 62 #include <netinet/in_pcb.h> 63 #include <netinet/in_var.h> 64 #include <netinet/ip_var.h> 65 66 #include <machine/in_cksum.h> 67 68 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); 69 70 #ifdef IPSEC 71 #include <netinet6/ipsec.h> 72 #include <netkey/key.h> 73 #ifdef IPSEC_DEBUG 74 #include <netkey/key_debug.h> 75 #else 76 #define KEYDEBUG(lev,arg) 77 #endif 78 #endif /*IPSEC*/ 79 80 #include <netinet/ip_fw.h> 81 #include <netinet/ip_dummynet.h> 82 83 #ifdef IPFIREWALL_FORWARD_DEBUG 84 #define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\ 85 (ntohl(a.s_addr)>>16)&0xFF,\ 86 (ntohl(a.s_addr)>>8)&0xFF,\ 87 (ntohl(a.s_addr))&0xFF); 88 #endif 89 90 u_short ip_id; 91 92 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *)); 93 static struct ifnet *ip_multicast_if __P((struct in_addr *, int *)); 94 static void ip_mloopback 95 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int)); 96 static int ip_getmoptions 97 __P((struct sockopt *, struct ip_moptions *)); 98 static int ip_pcbopts __P((int, struct mbuf **, struct mbuf *)); 99 static int ip_setmoptions 100 __P((struct sockopt *, struct ip_moptions **)); 101 102 int ip_optcopy __P((struct ip *, struct ip *)); 103 104 105 extern struct protosw inetsw[]; 106 107 /* 108 * IP output. The packet in mbuf chain m contains a skeletal IP 109 * header (with len, off, ttl, proto, tos, src, dst). 110 * The mbuf chain containing the packet will be freed. 111 * The mbuf opt, if present, will not be freed. 112 */ 113 int 114 ip_output(m0, opt, ro, flags, imo) 115 struct mbuf *m0; 116 struct mbuf *opt; 117 struct route *ro; 118 int flags; 119 struct ip_moptions *imo; 120 { 121 struct ip *ip, *mhip; 122 struct ifnet *ifp; 123 struct mbuf *m = m0; 124 int hlen = sizeof (struct ip); 125 int len, off, error = 0; 126 struct route iproute; 127 struct sockaddr_in *dst; 128 struct in_ifaddr *ia; 129 int isbroadcast, sw_csum; 130 struct in_addr pkt_dst; 131 #ifdef IPSEC 132 struct socket *so = NULL; 133 struct secpolicy *sp = NULL; 134 #endif 135 u_int16_t divert_cookie; /* firewall cookie */ 136 #ifdef PFIL_HOOKS 137 struct packet_filter_hook *pfh; 138 struct mbuf *m1; 139 int rv; 140 #endif /* PFIL_HOOKS */ 141 #ifdef IPFIREWALL_FORWARD 142 int fwd_rewrite_src = 0; 143 #endif 144 struct ip_fw *rule = NULL; 145 146 #ifdef IPDIVERT 147 /* Get and reset firewall cookie */ 148 divert_cookie = ip_divert_cookie; 149 ip_divert_cookie = 0; 150 #else 151 divert_cookie = 0; 152 #endif 153 154 /* 155 * dummynet packet are prepended a vestigial mbuf with 156 * m_type = MT_DUMMYNET and m_data pointing to the matching 157 * rule. 158 */ 159 if (m->m_type == MT_DUMMYNET) { 160 /* 161 * the packet was already tagged, so part of the 162 * processing was already done, and we need to go down. 163 * Get parameters from the header. 164 */ 165 rule = (struct ip_fw *)(m->m_data) ; 166 opt = NULL ; 167 ro = & ( ((struct dn_pkt *)m)->ro ) ; 168 imo = NULL ; 169 dst = ((struct dn_pkt *)m)->dn_dst ; 170 ifp = ((struct dn_pkt *)m)->ifp ; 171 flags = ((struct dn_pkt *)m)->flags ; 172 173 m0 = m = m->m_next ; 174 #ifdef IPSEC 175 so = ipsec_getsocket(m); 176 (void)ipsec_setsocket(m, NULL); 177 #endif 178 ip = mtod(m, struct ip *); 179 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ; 180 ia = ifatoia(ro->ro_rt->rt_ifa); 181 goto sendit; 182 } else 183 rule = NULL ; 184 #ifdef IPSEC 185 so = ipsec_getsocket(m); 186 (void)ipsec_setsocket(m, NULL); 187 #endif 188 189 #ifdef DIAGNOSTIC 190 if ((m->m_flags & M_PKTHDR) == 0) 191 panic("ip_output no HDR"); 192 #endif 193 if (opt) { 194 m = ip_insertoptions(m, opt, &len); 195 hlen = len; 196 } 197 ip = mtod(m, struct ip *); 198 pkt_dst = ip_fw_fwd_addr == NULL 199 ? ip->ip_dst : ip_fw_fwd_addr->sin_addr; 200 201 /* 202 * Fill in IP header. 203 */ 204 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 205 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2); 206 ip->ip_off &= IP_DF; 207 #ifdef RANDOM_IP_ID 208 ip->ip_id = ip_randomid(); 209 #else 210 ip->ip_id = htons(ip_id++); 211 #endif 212 ipstat.ips_localout++; 213 } else { 214 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 215 } 216 217 /* Route packet. */ 218 if (ro == NULL) { 219 ro = &iproute; 220 bzero(ro, sizeof(*ro)); 221 } 222 dst = (struct sockaddr_in *)&ro->ro_dst; 223 /* 224 * If there is a cached route, 225 * check that it is to the same destination 226 * and is still up. If not, free it and try again. 227 * The address family should also be checked in case of sharing the 228 * cache with IPv6. 229 */ 230 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 231 dst->sin_family != AF_INET || 232 dst->sin_addr.s_addr != pkt_dst.s_addr)) { 233 RTFREE(ro->ro_rt); 234 ro->ro_rt = (struct rtentry *)0; 235 } 236 if (ro->ro_rt == 0) { 237 bzero(dst, sizeof(*dst)); 238 dst->sin_family = AF_INET; 239 dst->sin_len = sizeof(*dst); 240 dst->sin_addr = pkt_dst; 241 } 242 /* 243 * If routing to interface only, 244 * short circuit routing lookup. 245 */ 246 if (flags & IP_ROUTETOIF) { 247 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && 248 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { 249 ipstat.ips_noroute++; 250 error = ENETUNREACH; 251 goto bad; 252 } 253 ifp = ia->ia_ifp; 254 ip->ip_ttl = 1; 255 isbroadcast = in_broadcast(dst->sin_addr, ifp); 256 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 257 imo != NULL && imo->imo_multicast_ifp != NULL) { 258 /* 259 * Bypass the normal routing lookup for multicast 260 * packets if the interface is specified. 261 */ 262 ifp = imo->imo_multicast_ifp; 263 IFP_TO_IA(ifp, ia); 264 isbroadcast = 0; /* fool gcc */ 265 } else { 266 /* 267 * If this is the case, we probably don't want to allocate 268 * a protocol-cloned route since we didn't get one from the 269 * ULP. This lets TCP do its thing, while not burdening 270 * forwarding or ICMP with the overhead of cloning a route. 271 * Of course, we still want to do any cloning requested by 272 * the link layer, as this is probably required in all cases 273 * for correct operation (as it is for ARP). 274 */ 275 if (ro->ro_rt == 0) 276 rtalloc_ign(ro, RTF_PRCLONING); 277 if (ro->ro_rt == 0) { 278 ipstat.ips_noroute++; 279 error = EHOSTUNREACH; 280 goto bad; 281 } 282 ia = ifatoia(ro->ro_rt->rt_ifa); 283 ifp = ro->ro_rt->rt_ifp; 284 ro->ro_rt->rt_use++; 285 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 286 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; 287 if (ro->ro_rt->rt_flags & RTF_HOST) 288 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); 289 else 290 isbroadcast = in_broadcast(dst->sin_addr, ifp); 291 } 292 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) { 293 struct in_multi *inm; 294 295 m->m_flags |= M_MCAST; 296 /* 297 * IP destination address is multicast. Make sure "dst" 298 * still points to the address in "ro". (It may have been 299 * changed to point to a gateway address, above.) 300 */ 301 dst = (struct sockaddr_in *)&ro->ro_dst; 302 /* 303 * See if the caller provided any multicast options 304 */ 305 if (imo != NULL) { 306 ip->ip_ttl = imo->imo_multicast_ttl; 307 if (imo->imo_multicast_vif != -1) 308 ip->ip_src.s_addr = 309 ip_mcast_src(imo->imo_multicast_vif); 310 } else 311 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 312 /* 313 * Confirm that the outgoing interface supports multicast. 314 */ 315 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 316 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 317 ipstat.ips_noroute++; 318 error = ENETUNREACH; 319 goto bad; 320 } 321 } 322 /* 323 * If source address not specified yet, use address 324 * of outgoing interface. 325 */ 326 if (ip->ip_src.s_addr == INADDR_ANY) { 327 /* Interface may have no addresses. */ 328 if (ia != NULL) 329 ip->ip_src = IA_SIN(ia)->sin_addr; 330 } 331 332 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 333 /* 334 * XXX 335 * delayed checksums are not currently 336 * compatible with IP multicast routing 337 */ 338 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 339 in_delayed_cksum(m); 340 m->m_pkthdr.csum_flags &= 341 ~CSUM_DELAY_DATA; 342 } 343 } 344 IN_LOOKUP_MULTI(pkt_dst, ifp, inm); 345 if (inm != NULL && 346 (imo == NULL || imo->imo_multicast_loop)) { 347 /* 348 * If we belong to the destination multicast group 349 * on the outgoing interface, and the caller did not 350 * forbid loopback, loop back a copy. 351 */ 352 ip_mloopback(ifp, m, dst, hlen); 353 } 354 else { 355 /* 356 * If we are acting as a multicast router, perform 357 * multicast forwarding as if the packet had just 358 * arrived on the interface to which we are about 359 * to send. The multicast forwarding function 360 * recursively calls this function, using the 361 * IP_FORWARDING flag to prevent infinite recursion. 362 * 363 * Multicasts that are looped back by ip_mloopback(), 364 * above, will be forwarded by the ip_input() routine, 365 * if necessary. 366 */ 367 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 368 /* 369 * Check if rsvp daemon is running. If not, don't 370 * set ip_moptions. This ensures that the packet 371 * is multicast and not just sent down one link 372 * as prescribed by rsvpd. 373 */ 374 if (!rsvp_on) 375 imo = NULL; 376 if (ip_mforward(ip, ifp, m, imo) != 0) { 377 m_freem(m); 378 goto done; 379 } 380 } 381 } 382 383 /* 384 * Multicasts with a time-to-live of zero may be looped- 385 * back, above, but must not be transmitted on a network. 386 * Also, multicasts addressed to the loopback interface 387 * are not sent -- the above call to ip_mloopback() will 388 * loop back a copy if this host actually belongs to the 389 * destination group on the loopback interface. 390 */ 391 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 392 m_freem(m); 393 goto done; 394 } 395 396 goto sendit; 397 } 398 #ifndef notdef 399 /* 400 * If source address not specified yet, use address 401 * of outgoing interface. 402 */ 403 if (ip->ip_src.s_addr == INADDR_ANY) { 404 /* Interface may have no addresses. */ 405 if (ia != NULL) { 406 ip->ip_src = IA_SIN(ia)->sin_addr; 407 #ifdef IPFIREWALL_FORWARD 408 /* Keep note that we did this - if the firewall changes 409 * the next-hop, our interface may change, changing the 410 * default source IP. It's a shame so much effort happens 411 * twice. Oh well. 412 */ 413 fwd_rewrite_src++; 414 #endif /* IPFIREWALL_FORWARD */ 415 } 416 } 417 #endif /* notdef */ 418 /* 419 * Verify that we have any chance at all of being able to queue 420 * the packet or packet fragments 421 */ 422 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= 423 ifp->if_snd.ifq_maxlen) { 424 error = ENOBUFS; 425 ipstat.ips_odropped++; 426 goto bad; 427 } 428 429 /* 430 * Look for broadcast address and 431 * verify user is allowed to send 432 * such a packet. 433 */ 434 if (isbroadcast) { 435 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 436 error = EADDRNOTAVAIL; 437 goto bad; 438 } 439 if ((flags & IP_ALLOWBROADCAST) == 0) { 440 error = EACCES; 441 goto bad; 442 } 443 /* don't allow broadcast messages to be fragmented */ 444 if ((u_short)ip->ip_len > ifp->if_mtu) { 445 error = EMSGSIZE; 446 goto bad; 447 } 448 m->m_flags |= M_BCAST; 449 } else { 450 m->m_flags &= ~M_BCAST; 451 } 452 453 sendit: 454 #ifdef IPSEC 455 /* get SP for this packet */ 456 if (so == NULL) 457 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); 458 else 459 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); 460 461 if (sp == NULL) { 462 ipsecstat.out_inval++; 463 goto bad; 464 } 465 466 error = 0; 467 468 /* check policy */ 469 switch (sp->policy) { 470 case IPSEC_POLICY_DISCARD: 471 /* 472 * This packet is just discarded. 473 */ 474 ipsecstat.out_polvio++; 475 goto bad; 476 477 case IPSEC_POLICY_BYPASS: 478 case IPSEC_POLICY_NONE: 479 /* no need to do IPsec. */ 480 goto skip_ipsec; 481 482 case IPSEC_POLICY_IPSEC: 483 if (sp->req == NULL) { 484 /* acquire a policy */ 485 error = key_spdacquire(sp); 486 goto bad; 487 } 488 break; 489 490 case IPSEC_POLICY_ENTRUST: 491 default: 492 printf("ip_output: Invalid policy found. %d\n", sp->policy); 493 } 494 { 495 struct ipsec_output_state state; 496 bzero(&state, sizeof(state)); 497 state.m = m; 498 if (flags & IP_ROUTETOIF) { 499 state.ro = &iproute; 500 bzero(&iproute, sizeof(iproute)); 501 } else 502 state.ro = ro; 503 state.dst = (struct sockaddr *)dst; 504 505 ip->ip_sum = 0; 506 507 /* 508 * XXX 509 * delayed checksums are not currently compatible with IPsec 510 */ 511 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 512 in_delayed_cksum(m); 513 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 514 } 515 516 ip->ip_len = htons(ip->ip_len); 517 ip->ip_off = htons(ip->ip_off); 518 519 error = ipsec4_output(&state, sp, flags); 520 521 m = state.m; 522 if (flags & IP_ROUTETOIF) { 523 /* 524 * if we have tunnel mode SA, we may need to ignore 525 * IP_ROUTETOIF. 526 */ 527 if (state.ro != &iproute || state.ro->ro_rt != NULL) { 528 flags &= ~IP_ROUTETOIF; 529 ro = state.ro; 530 } 531 } else 532 ro = state.ro; 533 dst = (struct sockaddr_in *)state.dst; 534 if (error) { 535 /* mbuf is already reclaimed in ipsec4_output. */ 536 m0 = NULL; 537 switch (error) { 538 case EHOSTUNREACH: 539 case ENETUNREACH: 540 case EMSGSIZE: 541 case ENOBUFS: 542 case ENOMEM: 543 break; 544 default: 545 printf("ip4_output (ipsec): error code %d\n", error); 546 /*fall through*/ 547 case ENOENT: 548 /* don't show these error codes to the user */ 549 error = 0; 550 break; 551 } 552 goto bad; 553 } 554 } 555 556 /* be sure to update variables that are affected by ipsec4_output() */ 557 ip = mtod(m, struct ip *); 558 #ifdef _IP_VHL 559 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 560 #else 561 hlen = ip->ip_hl << 2; 562 #endif 563 if (ro->ro_rt == NULL) { 564 if ((flags & IP_ROUTETOIF) == 0) { 565 printf("ip_output: " 566 "can't update route after IPsec processing\n"); 567 error = EHOSTUNREACH; /*XXX*/ 568 goto bad; 569 } 570 } else { 571 ia = ifatoia(ro->ro_rt->rt_ifa); 572 ifp = ro->ro_rt->rt_ifp; 573 } 574 575 /* make it flipped, again. */ 576 ip->ip_len = ntohs(ip->ip_len); 577 ip->ip_off = ntohs(ip->ip_off); 578 skip_ipsec: 579 #endif /*IPSEC*/ 580 581 /* 582 * IpHack's section. 583 * - Xlate: translate packet's addr/port (NAT). 584 * - Firewall: deny/allow/etc. 585 * - Wrap: fake packet's addr/port <unimpl.> 586 * - Encapsulate: put it in another IP and send out. <unimp.> 587 */ 588 #ifdef PFIL_HOOKS 589 /* 590 * Run through list of hooks for output packets. 591 */ 592 m1 = m; 593 pfh = pfil_hook_get(PFIL_OUT, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh); 594 for (; pfh; pfh = TAILQ_NEXT(pfh, pfil_link)) 595 if (pfh->pfil_func) { 596 rv = pfh->pfil_func(ip, hlen, ifp, 1, &m1); 597 if (rv) { 598 error = EHOSTUNREACH; 599 goto done; 600 } 601 m = m1; 602 if (m == NULL) 603 goto done; 604 ip = mtod(m, struct ip *); 605 } 606 #endif /* PFIL_HOOKS */ 607 608 /* 609 * Check with the firewall... 610 * but not if we are already being fwd'd from a firewall. 611 */ 612 if (fw_enable && IPFW_LOADED && !ip_fw_fwd_addr) { 613 struct sockaddr_in *old = dst; 614 615 off = ip_fw_chk_ptr(&ip, 616 hlen, ifp, &divert_cookie, &m, &rule, &dst); 617 /* 618 * On return we must do the following: 619 * m == NULL -> drop the pkt (old interface, deprecated) 620 * (off & IP_FW_PORT_DENY_FLAG) -> drop the pkt (new interface) 621 * 1<=off<= 0xffff -> DIVERT 622 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe 623 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet 624 * dst != old -> IPFIREWALL_FORWARD 625 * off==0, dst==old -> accept 626 * If some of the above modules are not compiled in, then 627 * we should't have to check the corresponding condition 628 * (because the ipfw control socket should not accept 629 * unsupported rules), but better play safe and drop 630 * packets in case of doubt. 631 */ 632 if (off & IP_FW_PORT_DENY_FLAG) { /* XXX new interface-denied */ 633 if (m) 634 m_freem(m); 635 error = EACCES; 636 goto done; 637 } 638 if (!m) { /* firewall said to reject */ 639 static int __debug=10; 640 641 if (__debug > 0) { 642 printf( 643 "firewall returns NULL, please update!\n"); 644 __debug--; 645 } 646 error = EACCES; 647 goto done; 648 } 649 if (off == 0 && dst == old) /* common case */ 650 goto pass; 651 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) { 652 /* 653 * pass the pkt to dummynet. Need to include 654 * pipe number, m, ifp, ro, dst because these are 655 * not recomputed in the next pass. 656 * All other parameters have been already used and 657 * so they are not needed anymore. 658 * XXX note: if the ifp or ro entry are deleted 659 * while a pkt is in dummynet, we are in trouble! 660 */ 661 error = ip_dn_io_ptr(off & 0xffff, DN_TO_IP_OUT, m, 662 ifp, ro, dst, rule, flags); 663 goto done; 664 } 665 #ifdef IPDIVERT 666 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { 667 struct mbuf *clone = NULL; 668 669 /* Clone packet if we're doing a 'tee' */ 670 if ((off & IP_FW_PORT_TEE_FLAG) != 0) 671 clone = m_dup(m, M_DONTWAIT); 672 673 /* 674 * XXX 675 * delayed checksums are not currently compatible 676 * with divert sockets. 677 */ 678 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 679 in_delayed_cksum(m); 680 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 681 } 682 683 /* Restore packet header fields to original values */ 684 ip->ip_len = htons(ip->ip_len); 685 ip->ip_off = htons(ip->ip_off); 686 687 /* Deliver packet to divert input routine */ 688 ip_divert_cookie = divert_cookie; 689 divert_packet(m, 0, off & 0xffff); 690 691 /* If 'tee', continue with original packet */ 692 if (clone != NULL) { 693 m = clone; 694 ip = mtod(m, struct ip *); 695 goto pass; 696 } 697 goto done; 698 } 699 #endif 700 701 #ifdef IPFIREWALL_FORWARD 702 /* Here we check dst to make sure it's directly reachable on the 703 * interface we previously thought it was. 704 * If it isn't (which may be likely in some situations) we have 705 * to re-route it (ie, find a route for the next-hop and the 706 * associated interface) and set them here. This is nested 707 * forwarding which in most cases is undesirable, except where 708 * such control is nigh impossible. So we do it here. 709 * And I'm babbling. 710 */ 711 if (off == 0 && old != dst) { 712 struct in_ifaddr *ia; 713 714 /* It's changed... */ 715 /* There must be a better way to do this next line... */ 716 static struct route sro_fwd, *ro_fwd = &sro_fwd; 717 #ifdef IPFIREWALL_FORWARD_DEBUG 718 printf("IPFIREWALL_FORWARD: New dst ip: "); 719 print_ip(dst->sin_addr); 720 printf("\n"); 721 #endif 722 /* 723 * We need to figure out if we have been forwarded 724 * to a local socket. If so then we should somehow 725 * "loop back" to ip_input, and get directed to the 726 * PCB as if we had received this packet. This is 727 * because it may be dificult to identify the packets 728 * you want to forward until they are being output 729 * and have selected an interface. (e.g. locally 730 * initiated packets) If we used the loopback inteface, 731 * we would not be able to control what happens 732 * as the packet runs through ip_input() as 733 * it is done through a ISR. 734 */ 735 LIST_FOREACH(ia, 736 INADDR_HASH(dst->sin_addr.s_addr), ia_hash) { 737 /* 738 * If the addr to forward to is one 739 * of ours, we pretend to 740 * be the destination for this packet. 741 */ 742 if (IA_SIN(ia)->sin_addr.s_addr == 743 dst->sin_addr.s_addr) 744 break; 745 } 746 if (ia) { 747 /* tell ip_input "dont filter" */ 748 ip_fw_fwd_addr = dst; 749 if (m->m_pkthdr.rcvif == NULL) 750 m->m_pkthdr.rcvif = ifunit("lo0"); 751 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 752 m->m_pkthdr.csum_flags |= 753 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 754 m0->m_pkthdr.csum_data = 0xffff; 755 } 756 m->m_pkthdr.csum_flags |= 757 CSUM_IP_CHECKED | CSUM_IP_VALID; 758 ip->ip_len = htons(ip->ip_len); 759 ip->ip_off = htons(ip->ip_off); 760 ip_input(m); 761 goto done; 762 } 763 /* Some of the logic for this was 764 * nicked from above. 765 * 766 * This rewrites the cached route in a local PCB. 767 * Is this what we want to do? 768 */ 769 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst)); 770 771 ro_fwd->ro_rt = 0; 772 rtalloc_ign(ro_fwd, RTF_PRCLONING); 773 774 if (ro_fwd->ro_rt == 0) { 775 ipstat.ips_noroute++; 776 error = EHOSTUNREACH; 777 goto bad; 778 } 779 780 ia = ifatoia(ro_fwd->ro_rt->rt_ifa); 781 ifp = ro_fwd->ro_rt->rt_ifp; 782 ro_fwd->ro_rt->rt_use++; 783 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) 784 dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway; 785 if (ro_fwd->ro_rt->rt_flags & RTF_HOST) 786 isbroadcast = 787 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST); 788 else 789 isbroadcast = in_broadcast(dst->sin_addr, ifp); 790 if (ro->ro_rt) 791 RTFREE(ro->ro_rt); 792 ro->ro_rt = ro_fwd->ro_rt; 793 dst = (struct sockaddr_in *)&ro_fwd->ro_dst; 794 795 /* 796 * If we added a default src ip earlier, 797 * which would have been gotten from the-then 798 * interface, do it again, from the new one. 799 */ 800 if (fwd_rewrite_src) 801 ip->ip_src = IA_SIN(ia)->sin_addr; 802 goto pass ; 803 } 804 #endif /* IPFIREWALL_FORWARD */ 805 /* 806 * if we get here, none of the above matches, and 807 * we have to drop the pkt 808 */ 809 m_freem(m); 810 error = EACCES; /* not sure this is the right error msg */ 811 goto done; 812 } 813 814 ip_fw_fwd_addr = NULL; 815 pass: 816 /* 127/8 must not appear on wire - RFC1122. */ 817 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 818 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 819 if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 820 ipstat.ips_badaddr++; 821 error = EADDRNOTAVAIL; 822 goto bad; 823 } 824 } 825 826 m->m_pkthdr.csum_flags |= CSUM_IP; 827 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; 828 if (sw_csum & CSUM_DELAY_DATA) { 829 in_delayed_cksum(m); 830 sw_csum &= ~CSUM_DELAY_DATA; 831 } 832 m->m_pkthdr.csum_flags &= ifp->if_hwassist; 833 834 /* 835 * If small enough for interface, or the interface will take 836 * care of the fragmentation for us, can just send directly. 837 */ 838 if ((u_short)ip->ip_len <= ifp->if_mtu || 839 ifp->if_hwassist & CSUM_FRAGMENT) { 840 ip->ip_len = htons(ip->ip_len); 841 ip->ip_off = htons(ip->ip_off); 842 ip->ip_sum = 0; 843 if (sw_csum & CSUM_DELAY_IP) { 844 if (ip->ip_vhl == IP_VHL_BORING) { 845 ip->ip_sum = in_cksum_hdr(ip); 846 } else { 847 ip->ip_sum = in_cksum(m, hlen); 848 } 849 } 850 851 /* Record statistics for this interface address. */ 852 if (!(flags & IP_FORWARDING) && ia) { 853 ia->ia_ifa.if_opackets++; 854 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 855 } 856 857 #ifdef IPSEC 858 /* clean ipsec history once it goes out of the node */ 859 ipsec_delaux(m); 860 #endif 861 862 error = (*ifp->if_output)(ifp, m, 863 (struct sockaddr *)dst, ro->ro_rt); 864 goto done; 865 } 866 /* 867 * Too large for interface; fragment if possible. 868 * Must be able to put at least 8 bytes per fragment. 869 */ 870 if (ip->ip_off & IP_DF) { 871 error = EMSGSIZE; 872 /* 873 * This case can happen if the user changed the MTU 874 * of an interface after enabling IP on it. Because 875 * most netifs don't keep track of routes pointing to 876 * them, there is no way for one to update all its 877 * routes when the MTU is changed. 878 */ 879 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) 880 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) 881 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 882 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 883 } 884 ipstat.ips_cantfrag++; 885 goto bad; 886 } 887 len = (ifp->if_mtu - hlen) &~ 7; 888 if (len < 8) { 889 error = EMSGSIZE; 890 goto bad; 891 } 892 893 /* 894 * if the interface will not calculate checksums on 895 * fragmented packets, then do it here. 896 */ 897 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA && 898 (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) { 899 in_delayed_cksum(m); 900 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 901 } 902 903 { 904 int mhlen, firstlen = len; 905 struct mbuf **mnext = &m->m_nextpkt; 906 int nfrags = 1; 907 908 /* 909 * Loop through length of segment after first fragment, 910 * make new header and copy data of each part and link onto chain. 911 */ 912 m0 = m; 913 mhlen = sizeof (struct ip); 914 for (off = hlen + len; off < (u_short)ip->ip_len; off += len) { 915 MGETHDR(m, M_DONTWAIT, MT_HEADER); 916 if (m == 0) { 917 error = ENOBUFS; 918 ipstat.ips_odropped++; 919 goto sendorfree; 920 } 921 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; 922 m->m_data += max_linkhdr; 923 mhip = mtod(m, struct ip *); 924 *mhip = *ip; 925 if (hlen > sizeof (struct ip)) { 926 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 927 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2); 928 } 929 m->m_len = mhlen; 930 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; 931 if (off + len >= (u_short)ip->ip_len) 932 len = (u_short)ip->ip_len - off; 933 else 934 mhip->ip_off |= IP_MF; 935 mhip->ip_len = htons((u_short)(len + mhlen)); 936 m->m_next = m_copy(m0, off, len); 937 if (m->m_next == 0) { 938 (void) m_free(m); 939 error = ENOBUFS; /* ??? */ 940 ipstat.ips_odropped++; 941 goto sendorfree; 942 } 943 m->m_pkthdr.len = mhlen + len; 944 m->m_pkthdr.rcvif = (struct ifnet *)0; 945 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; 946 mhip->ip_off = htons(mhip->ip_off); 947 mhip->ip_sum = 0; 948 if (sw_csum & CSUM_DELAY_IP) { 949 if (mhip->ip_vhl == IP_VHL_BORING) { 950 mhip->ip_sum = in_cksum_hdr(mhip); 951 } else { 952 mhip->ip_sum = in_cksum(m, mhlen); 953 } 954 } 955 *mnext = m; 956 mnext = &m->m_nextpkt; 957 nfrags++; 958 } 959 ipstat.ips_ofragments += nfrags; 960 961 /* set first/last markers for fragment chain */ 962 m->m_flags |= M_LASTFRAG; 963 m0->m_flags |= M_FIRSTFRAG | M_FRAG; 964 m0->m_pkthdr.csum_data = nfrags; 965 966 /* 967 * Update first fragment by trimming what's been copied out 968 * and updating header, then send each fragment (in order). 969 */ 970 m = m0; 971 m_adj(m, hlen + firstlen - (u_short)ip->ip_len); 972 m->m_pkthdr.len = hlen + firstlen; 973 ip->ip_len = htons((u_short)m->m_pkthdr.len); 974 ip->ip_off |= IP_MF; 975 ip->ip_off = htons(ip->ip_off); 976 ip->ip_sum = 0; 977 if (sw_csum & CSUM_DELAY_IP) { 978 if (ip->ip_vhl == IP_VHL_BORING) { 979 ip->ip_sum = in_cksum_hdr(ip); 980 } else { 981 ip->ip_sum = in_cksum(m, hlen); 982 } 983 } 984 sendorfree: 985 for (m = m0; m; m = m0) { 986 m0 = m->m_nextpkt; 987 m->m_nextpkt = 0; 988 #ifdef IPSEC 989 /* clean ipsec history once it goes out of the node */ 990 ipsec_delaux(m); 991 #endif 992 if (error == 0) { 993 /* Record statistics for this interface address. */ 994 if (ia != NULL) { 995 ia->ia_ifa.if_opackets++; 996 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 997 } 998 999 error = (*ifp->if_output)(ifp, m, 1000 (struct sockaddr *)dst, ro->ro_rt); 1001 } else 1002 m_freem(m); 1003 } 1004 1005 if (error == 0) 1006 ipstat.ips_fragmented++; 1007 } 1008 done: 1009 #ifdef IPSEC 1010 if (ro == &iproute && ro->ro_rt) { 1011 RTFREE(ro->ro_rt); 1012 ro->ro_rt = NULL; 1013 } 1014 if (sp != NULL) { 1015 KEYDEBUG(KEYDEBUG_IPSEC_STAMP, 1016 printf("DP ip_output call free SP:%p\n", sp)); 1017 key_freesp(sp); 1018 } 1019 #endif /* IPSEC */ 1020 return (error); 1021 bad: 1022 m_freem(m); 1023 goto done; 1024 } 1025 1026 void 1027 in_delayed_cksum(struct mbuf *m) 1028 { 1029 struct ip *ip; 1030 u_short csum, offset; 1031 1032 ip = mtod(m, struct ip *); 1033 offset = IP_VHL_HL(ip->ip_vhl) << 2 ; 1034 csum = in_cksum_skip(m, ip->ip_len, offset); 1035 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 1036 csum = 0xffff; 1037 offset += m->m_pkthdr.csum_data; /* checksum offset */ 1038 1039 if (offset + sizeof(u_short) > m->m_len) { 1040 printf("delayed m_pullup, m->len: %d off: %d p: %d\n", 1041 m->m_len, offset, ip->ip_p); 1042 /* 1043 * XXX 1044 * this shouldn't happen, but if it does, the 1045 * correct behavior may be to insert the checksum 1046 * in the existing chain instead of rearranging it. 1047 */ 1048 m = m_pullup(m, offset + sizeof(u_short)); 1049 } 1050 *(u_short *)(m->m_data + offset) = csum; 1051 } 1052 1053 /* 1054 * Insert IP options into preformed packet. 1055 * Adjust IP destination as required for IP source routing, 1056 * as indicated by a non-zero in_addr at the start of the options. 1057 * 1058 * XXX This routine assumes that the packet has no options in place. 1059 */ 1060 static struct mbuf * 1061 ip_insertoptions(m, opt, phlen) 1062 register struct mbuf *m; 1063 struct mbuf *opt; 1064 int *phlen; 1065 { 1066 register struct ipoption *p = mtod(opt, struct ipoption *); 1067 struct mbuf *n; 1068 register struct ip *ip = mtod(m, struct ip *); 1069 unsigned optlen; 1070 1071 optlen = opt->m_len - sizeof(p->ipopt_dst); 1072 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) 1073 return (m); /* XXX should fail */ 1074 if (p->ipopt_dst.s_addr) 1075 ip->ip_dst = p->ipopt_dst; 1076 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 1077 MGETHDR(n, M_DONTWAIT, MT_HEADER); 1078 if (n == 0) 1079 return (m); 1080 n->m_pkthdr.rcvif = (struct ifnet *)0; 1081 n->m_pkthdr.len = m->m_pkthdr.len + optlen; 1082 m->m_len -= sizeof(struct ip); 1083 m->m_data += sizeof(struct ip); 1084 n->m_next = m; 1085 m = n; 1086 m->m_len = optlen + sizeof(struct ip); 1087 m->m_data += max_linkhdr; 1088 (void)memcpy(mtod(m, void *), ip, sizeof(struct ip)); 1089 } else { 1090 m->m_data -= optlen; 1091 m->m_len += optlen; 1092 m->m_pkthdr.len += optlen; 1093 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 1094 } 1095 ip = mtod(m, struct ip *); 1096 bcopy(p->ipopt_list, ip + 1, optlen); 1097 *phlen = sizeof(struct ip) + optlen; 1098 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2); 1099 ip->ip_len += optlen; 1100 return (m); 1101 } 1102 1103 /* 1104 * Copy options from ip to jp, 1105 * omitting those not copied during fragmentation. 1106 */ 1107 int 1108 ip_optcopy(ip, jp) 1109 struct ip *ip, *jp; 1110 { 1111 register u_char *cp, *dp; 1112 int opt, optlen, cnt; 1113 1114 cp = (u_char *)(ip + 1); 1115 dp = (u_char *)(jp + 1); 1116 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip); 1117 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1118 opt = cp[0]; 1119 if (opt == IPOPT_EOL) 1120 break; 1121 if (opt == IPOPT_NOP) { 1122 /* Preserve for IP mcast tunnel's LSRR alignment. */ 1123 *dp++ = IPOPT_NOP; 1124 optlen = 1; 1125 continue; 1126 } 1127 #ifdef DIAGNOSTIC 1128 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1129 panic("malformed IPv4 option passed to ip_optcopy"); 1130 #endif 1131 optlen = cp[IPOPT_OLEN]; 1132 #ifdef DIAGNOSTIC 1133 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1134 panic("malformed IPv4 option passed to ip_optcopy"); 1135 #endif 1136 /* bogus lengths should have been caught by ip_dooptions */ 1137 if (optlen > cnt) 1138 optlen = cnt; 1139 if (IPOPT_COPIED(opt)) { 1140 bcopy(cp, dp, optlen); 1141 dp += optlen; 1142 } 1143 } 1144 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 1145 *dp++ = IPOPT_EOL; 1146 return (optlen); 1147 } 1148 1149 /* 1150 * IP socket option processing. 1151 */ 1152 int 1153 ip_ctloutput(so, sopt) 1154 struct socket *so; 1155 struct sockopt *sopt; 1156 { 1157 struct inpcb *inp = sotoinpcb(so); 1158 int error, optval; 1159 1160 error = optval = 0; 1161 if (sopt->sopt_level != IPPROTO_IP) { 1162 return (EINVAL); 1163 } 1164 1165 switch (sopt->sopt_dir) { 1166 case SOPT_SET: 1167 switch (sopt->sopt_name) { 1168 case IP_OPTIONS: 1169 #ifdef notyet 1170 case IP_RETOPTS: 1171 #endif 1172 { 1173 struct mbuf *m; 1174 if (sopt->sopt_valsize > MLEN) { 1175 error = EMSGSIZE; 1176 break; 1177 } 1178 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_HEADER); 1179 if (m == 0) { 1180 error = ENOBUFS; 1181 break; 1182 } 1183 m->m_len = sopt->sopt_valsize; 1184 error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 1185 m->m_len); 1186 1187 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options, 1188 m)); 1189 } 1190 1191 case IP_TOS: 1192 case IP_TTL: 1193 case IP_RECVOPTS: 1194 case IP_RECVRETOPTS: 1195 case IP_RECVDSTADDR: 1196 case IP_RECVIF: 1197 case IP_FAITH: 1198 error = sooptcopyin(sopt, &optval, sizeof optval, 1199 sizeof optval); 1200 if (error) 1201 break; 1202 1203 switch (sopt->sopt_name) { 1204 case IP_TOS: 1205 inp->inp_ip_tos = optval; 1206 break; 1207 1208 case IP_TTL: 1209 inp->inp_ip_ttl = optval; 1210 break; 1211 #define OPTSET(bit) \ 1212 if (optval) \ 1213 inp->inp_flags |= bit; \ 1214 else \ 1215 inp->inp_flags &= ~bit; 1216 1217 case IP_RECVOPTS: 1218 OPTSET(INP_RECVOPTS); 1219 break; 1220 1221 case IP_RECVRETOPTS: 1222 OPTSET(INP_RECVRETOPTS); 1223 break; 1224 1225 case IP_RECVDSTADDR: 1226 OPTSET(INP_RECVDSTADDR); 1227 break; 1228 1229 case IP_RECVIF: 1230 OPTSET(INP_RECVIF); 1231 break; 1232 1233 case IP_FAITH: 1234 OPTSET(INP_FAITH); 1235 break; 1236 } 1237 break; 1238 #undef OPTSET 1239 1240 case IP_MULTICAST_IF: 1241 case IP_MULTICAST_VIF: 1242 case IP_MULTICAST_TTL: 1243 case IP_MULTICAST_LOOP: 1244 case IP_ADD_MEMBERSHIP: 1245 case IP_DROP_MEMBERSHIP: 1246 error = ip_setmoptions(sopt, &inp->inp_moptions); 1247 break; 1248 1249 case IP_PORTRANGE: 1250 error = sooptcopyin(sopt, &optval, sizeof optval, 1251 sizeof optval); 1252 if (error) 1253 break; 1254 1255 switch (optval) { 1256 case IP_PORTRANGE_DEFAULT: 1257 inp->inp_flags &= ~(INP_LOWPORT); 1258 inp->inp_flags &= ~(INP_HIGHPORT); 1259 break; 1260 1261 case IP_PORTRANGE_HIGH: 1262 inp->inp_flags &= ~(INP_LOWPORT); 1263 inp->inp_flags |= INP_HIGHPORT; 1264 break; 1265 1266 case IP_PORTRANGE_LOW: 1267 inp->inp_flags &= ~(INP_HIGHPORT); 1268 inp->inp_flags |= INP_LOWPORT; 1269 break; 1270 1271 default: 1272 error = EINVAL; 1273 break; 1274 } 1275 break; 1276 1277 #ifdef IPSEC 1278 case IP_IPSEC_POLICY: 1279 { 1280 caddr_t req; 1281 size_t len = 0; 1282 int priv; 1283 struct mbuf *m; 1284 int optname; 1285 1286 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 1287 break; 1288 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ 1289 break; 1290 priv = (sopt->sopt_td != NULL && 1291 suser_td(sopt->sopt_td) != 0) ? 0 : 1; 1292 req = mtod(m, caddr_t); 1293 len = m->m_len; 1294 optname = sopt->sopt_name; 1295 error = ipsec4_set_policy(inp, optname, req, len, priv); 1296 m_freem(m); 1297 break; 1298 } 1299 #endif /*IPSEC*/ 1300 1301 default: 1302 error = ENOPROTOOPT; 1303 break; 1304 } 1305 break; 1306 1307 case SOPT_GET: 1308 switch (sopt->sopt_name) { 1309 case IP_OPTIONS: 1310 case IP_RETOPTS: 1311 if (inp->inp_options) 1312 error = sooptcopyout(sopt, 1313 mtod(inp->inp_options, 1314 char *), 1315 inp->inp_options->m_len); 1316 else 1317 sopt->sopt_valsize = 0; 1318 break; 1319 1320 case IP_TOS: 1321 case IP_TTL: 1322 case IP_RECVOPTS: 1323 case IP_RECVRETOPTS: 1324 case IP_RECVDSTADDR: 1325 case IP_RECVIF: 1326 case IP_PORTRANGE: 1327 case IP_FAITH: 1328 switch (sopt->sopt_name) { 1329 1330 case IP_TOS: 1331 optval = inp->inp_ip_tos; 1332 break; 1333 1334 case IP_TTL: 1335 optval = inp->inp_ip_ttl; 1336 break; 1337 1338 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1339 1340 case IP_RECVOPTS: 1341 optval = OPTBIT(INP_RECVOPTS); 1342 break; 1343 1344 case IP_RECVRETOPTS: 1345 optval = OPTBIT(INP_RECVRETOPTS); 1346 break; 1347 1348 case IP_RECVDSTADDR: 1349 optval = OPTBIT(INP_RECVDSTADDR); 1350 break; 1351 1352 case IP_RECVIF: 1353 optval = OPTBIT(INP_RECVIF); 1354 break; 1355 1356 case IP_PORTRANGE: 1357 if (inp->inp_flags & INP_HIGHPORT) 1358 optval = IP_PORTRANGE_HIGH; 1359 else if (inp->inp_flags & INP_LOWPORT) 1360 optval = IP_PORTRANGE_LOW; 1361 else 1362 optval = 0; 1363 break; 1364 1365 case IP_FAITH: 1366 optval = OPTBIT(INP_FAITH); 1367 break; 1368 } 1369 error = sooptcopyout(sopt, &optval, sizeof optval); 1370 break; 1371 1372 case IP_MULTICAST_IF: 1373 case IP_MULTICAST_VIF: 1374 case IP_MULTICAST_TTL: 1375 case IP_MULTICAST_LOOP: 1376 case IP_ADD_MEMBERSHIP: 1377 case IP_DROP_MEMBERSHIP: 1378 error = ip_getmoptions(sopt, inp->inp_moptions); 1379 break; 1380 1381 #ifdef IPSEC 1382 case IP_IPSEC_POLICY: 1383 { 1384 struct mbuf *m = NULL; 1385 caddr_t req = NULL; 1386 size_t len = 0; 1387 1388 if (m != 0) { 1389 req = mtod(m, caddr_t); 1390 len = m->m_len; 1391 } 1392 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m); 1393 if (error == 0) 1394 error = soopt_mcopyout(sopt, m); /* XXX */ 1395 if (error == 0) 1396 m_freem(m); 1397 break; 1398 } 1399 #endif /*IPSEC*/ 1400 1401 default: 1402 error = ENOPROTOOPT; 1403 break; 1404 } 1405 break; 1406 } 1407 return (error); 1408 } 1409 1410 /* 1411 * Set up IP options in pcb for insertion in output packets. 1412 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1413 * with destination address if source routed. 1414 */ 1415 static int 1416 ip_pcbopts(optname, pcbopt, m) 1417 int optname; 1418 struct mbuf **pcbopt; 1419 register struct mbuf *m; 1420 { 1421 register int cnt, optlen; 1422 register u_char *cp; 1423 u_char opt; 1424 1425 /* turn off any old options */ 1426 if (*pcbopt) 1427 (void)m_free(*pcbopt); 1428 *pcbopt = 0; 1429 if (m == (struct mbuf *)0 || m->m_len == 0) { 1430 /* 1431 * Only turning off any previous options. 1432 */ 1433 if (m) 1434 (void)m_free(m); 1435 return (0); 1436 } 1437 1438 if (m->m_len % sizeof(int32_t)) 1439 goto bad; 1440 /* 1441 * IP first-hop destination address will be stored before 1442 * actual options; move other options back 1443 * and clear it when none present. 1444 */ 1445 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1446 goto bad; 1447 cnt = m->m_len; 1448 m->m_len += sizeof(struct in_addr); 1449 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1450 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt); 1451 bzero(mtod(m, caddr_t), sizeof(struct in_addr)); 1452 1453 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1454 opt = cp[IPOPT_OPTVAL]; 1455 if (opt == IPOPT_EOL) 1456 break; 1457 if (opt == IPOPT_NOP) 1458 optlen = 1; 1459 else { 1460 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1461 goto bad; 1462 optlen = cp[IPOPT_OLEN]; 1463 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1464 goto bad; 1465 } 1466 switch (opt) { 1467 1468 default: 1469 break; 1470 1471 case IPOPT_LSRR: 1472 case IPOPT_SSRR: 1473 /* 1474 * user process specifies route as: 1475 * ->A->B->C->D 1476 * D must be our final destination (but we can't 1477 * check that since we may not have connected yet). 1478 * A is first hop destination, which doesn't appear in 1479 * actual IP option, but is stored before the options. 1480 */ 1481 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1482 goto bad; 1483 m->m_len -= sizeof(struct in_addr); 1484 cnt -= sizeof(struct in_addr); 1485 optlen -= sizeof(struct in_addr); 1486 cp[IPOPT_OLEN] = optlen; 1487 /* 1488 * Move first hop before start of options. 1489 */ 1490 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), 1491 sizeof(struct in_addr)); 1492 /* 1493 * Then copy rest of options back 1494 * to close up the deleted entry. 1495 */ 1496 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] + 1497 sizeof(struct in_addr)), 1498 (caddr_t)&cp[IPOPT_OFFSET+1], 1499 (unsigned)cnt + sizeof(struct in_addr)); 1500 break; 1501 } 1502 } 1503 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1504 goto bad; 1505 *pcbopt = m; 1506 return (0); 1507 1508 bad: 1509 (void)m_free(m); 1510 return (EINVAL); 1511 } 1512 1513 /* 1514 * XXX 1515 * The whole multicast option thing needs to be re-thought. 1516 * Several of these options are equally applicable to non-multicast 1517 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a 1518 * standard option (IP_TTL). 1519 */ 1520 1521 /* 1522 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. 1523 */ 1524 static struct ifnet * 1525 ip_multicast_if(a, ifindexp) 1526 struct in_addr *a; 1527 int *ifindexp; 1528 { 1529 int ifindex; 1530 struct ifnet *ifp; 1531 1532 if (ifindexp) 1533 *ifindexp = 0; 1534 if (ntohl(a->s_addr) >> 24 == 0) { 1535 ifindex = ntohl(a->s_addr) & 0xffffff; 1536 if (ifindex < 0 || if_index < ifindex) 1537 return NULL; 1538 ifp = ifnet_byindex(ifindex); 1539 if (ifindexp) 1540 *ifindexp = ifindex; 1541 } else { 1542 INADDR_TO_IFP(*a, ifp); 1543 } 1544 return ifp; 1545 } 1546 1547 /* 1548 * Set the IP multicast options in response to user setsockopt(). 1549 */ 1550 static int 1551 ip_setmoptions(sopt, imop) 1552 struct sockopt *sopt; 1553 struct ip_moptions **imop; 1554 { 1555 int error = 0; 1556 int i; 1557 struct in_addr addr; 1558 struct ip_mreq mreq; 1559 struct ifnet *ifp; 1560 struct ip_moptions *imo = *imop; 1561 struct route ro; 1562 struct sockaddr_in *dst; 1563 int ifindex; 1564 int s; 1565 1566 if (imo == NULL) { 1567 /* 1568 * No multicast option buffer attached to the pcb; 1569 * allocate one and initialize to default values. 1570 */ 1571 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, 1572 M_WAITOK); 1573 1574 if (imo == NULL) 1575 return (ENOBUFS); 1576 *imop = imo; 1577 imo->imo_multicast_ifp = NULL; 1578 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1579 imo->imo_multicast_vif = -1; 1580 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1581 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1582 imo->imo_num_memberships = 0; 1583 } 1584 1585 switch (sopt->sopt_name) { 1586 /* store an index number for the vif you wanna use in the send */ 1587 case IP_MULTICAST_VIF: 1588 if (legal_vif_num == 0) { 1589 error = EOPNOTSUPP; 1590 break; 1591 } 1592 error = sooptcopyin(sopt, &i, sizeof i, sizeof i); 1593 if (error) 1594 break; 1595 if (!legal_vif_num(i) && (i != -1)) { 1596 error = EINVAL; 1597 break; 1598 } 1599 imo->imo_multicast_vif = i; 1600 break; 1601 1602 case IP_MULTICAST_IF: 1603 /* 1604 * Select the interface for outgoing multicast packets. 1605 */ 1606 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr); 1607 if (error) 1608 break; 1609 /* 1610 * INADDR_ANY is used to remove a previous selection. 1611 * When no interface is selected, a default one is 1612 * chosen every time a multicast packet is sent. 1613 */ 1614 if (addr.s_addr == INADDR_ANY) { 1615 imo->imo_multicast_ifp = NULL; 1616 break; 1617 } 1618 /* 1619 * The selected interface is identified by its local 1620 * IP address. Find the interface and confirm that 1621 * it supports multicasting. 1622 */ 1623 s = splimp(); 1624 ifp = ip_multicast_if(&addr, &ifindex); 1625 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1626 splx(s); 1627 error = EADDRNOTAVAIL; 1628 break; 1629 } 1630 imo->imo_multicast_ifp = ifp; 1631 if (ifindex) 1632 imo->imo_multicast_addr = addr; 1633 else 1634 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1635 splx(s); 1636 break; 1637 1638 case IP_MULTICAST_TTL: 1639 /* 1640 * Set the IP time-to-live for outgoing multicast packets. 1641 * The original multicast API required a char argument, 1642 * which is inconsistent with the rest of the socket API. 1643 * We allow either a char or an int. 1644 */ 1645 if (sopt->sopt_valsize == 1) { 1646 u_char ttl; 1647 error = sooptcopyin(sopt, &ttl, 1, 1); 1648 if (error) 1649 break; 1650 imo->imo_multicast_ttl = ttl; 1651 } else { 1652 u_int ttl; 1653 error = sooptcopyin(sopt, &ttl, sizeof ttl, 1654 sizeof ttl); 1655 if (error) 1656 break; 1657 if (ttl > 255) 1658 error = EINVAL; 1659 else 1660 imo->imo_multicast_ttl = ttl; 1661 } 1662 break; 1663 1664 case IP_MULTICAST_LOOP: 1665 /* 1666 * Set the loopback flag for outgoing multicast packets. 1667 * Must be zero or one. The original multicast API required a 1668 * char argument, which is inconsistent with the rest 1669 * of the socket API. We allow either a char or an int. 1670 */ 1671 if (sopt->sopt_valsize == 1) { 1672 u_char loop; 1673 error = sooptcopyin(sopt, &loop, 1, 1); 1674 if (error) 1675 break; 1676 imo->imo_multicast_loop = !!loop; 1677 } else { 1678 u_int loop; 1679 error = sooptcopyin(sopt, &loop, sizeof loop, 1680 sizeof loop); 1681 if (error) 1682 break; 1683 imo->imo_multicast_loop = !!loop; 1684 } 1685 break; 1686 1687 case IP_ADD_MEMBERSHIP: 1688 /* 1689 * Add a multicast group membership. 1690 * Group must be a valid IP multicast address. 1691 */ 1692 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1693 if (error) 1694 break; 1695 1696 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1697 error = EINVAL; 1698 break; 1699 } 1700 s = splimp(); 1701 /* 1702 * If no interface address was provided, use the interface of 1703 * the route to the given multicast address. 1704 */ 1705 if (mreq.imr_interface.s_addr == INADDR_ANY) { 1706 bzero((caddr_t)&ro, sizeof(ro)); 1707 dst = (struct sockaddr_in *)&ro.ro_dst; 1708 dst->sin_len = sizeof(*dst); 1709 dst->sin_family = AF_INET; 1710 dst->sin_addr = mreq.imr_multiaddr; 1711 rtalloc(&ro); 1712 if (ro.ro_rt == NULL) { 1713 error = EADDRNOTAVAIL; 1714 splx(s); 1715 break; 1716 } 1717 ifp = ro.ro_rt->rt_ifp; 1718 rtfree(ro.ro_rt); 1719 } 1720 else { 1721 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1722 } 1723 1724 /* 1725 * See if we found an interface, and confirm that it 1726 * supports multicast. 1727 */ 1728 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1729 error = EADDRNOTAVAIL; 1730 splx(s); 1731 break; 1732 } 1733 /* 1734 * See if the membership already exists or if all the 1735 * membership slots are full. 1736 */ 1737 for (i = 0; i < imo->imo_num_memberships; ++i) { 1738 if (imo->imo_membership[i]->inm_ifp == ifp && 1739 imo->imo_membership[i]->inm_addr.s_addr 1740 == mreq.imr_multiaddr.s_addr) 1741 break; 1742 } 1743 if (i < imo->imo_num_memberships) { 1744 error = EADDRINUSE; 1745 splx(s); 1746 break; 1747 } 1748 if (i == IP_MAX_MEMBERSHIPS) { 1749 error = ETOOMANYREFS; 1750 splx(s); 1751 break; 1752 } 1753 /* 1754 * Everything looks good; add a new record to the multicast 1755 * address list for the given interface. 1756 */ 1757 if ((imo->imo_membership[i] = 1758 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) { 1759 error = ENOBUFS; 1760 splx(s); 1761 break; 1762 } 1763 ++imo->imo_num_memberships; 1764 splx(s); 1765 break; 1766 1767 case IP_DROP_MEMBERSHIP: 1768 /* 1769 * Drop a multicast group membership. 1770 * Group must be a valid IP multicast address. 1771 */ 1772 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1773 if (error) 1774 break; 1775 1776 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1777 error = EINVAL; 1778 break; 1779 } 1780 1781 s = splimp(); 1782 /* 1783 * If an interface address was specified, get a pointer 1784 * to its ifnet structure. 1785 */ 1786 if (mreq.imr_interface.s_addr == INADDR_ANY) 1787 ifp = NULL; 1788 else { 1789 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1790 if (ifp == NULL) { 1791 error = EADDRNOTAVAIL; 1792 splx(s); 1793 break; 1794 } 1795 } 1796 /* 1797 * Find the membership in the membership array. 1798 */ 1799 for (i = 0; i < imo->imo_num_memberships; ++i) { 1800 if ((ifp == NULL || 1801 imo->imo_membership[i]->inm_ifp == ifp) && 1802 imo->imo_membership[i]->inm_addr.s_addr == 1803 mreq.imr_multiaddr.s_addr) 1804 break; 1805 } 1806 if (i == imo->imo_num_memberships) { 1807 error = EADDRNOTAVAIL; 1808 splx(s); 1809 break; 1810 } 1811 /* 1812 * Give up the multicast address record to which the 1813 * membership points. 1814 */ 1815 in_delmulti(imo->imo_membership[i]); 1816 /* 1817 * Remove the gap in the membership array. 1818 */ 1819 for (++i; i < imo->imo_num_memberships; ++i) 1820 imo->imo_membership[i-1] = imo->imo_membership[i]; 1821 --imo->imo_num_memberships; 1822 splx(s); 1823 break; 1824 1825 default: 1826 error = EOPNOTSUPP; 1827 break; 1828 } 1829 1830 /* 1831 * If all options have default values, no need to keep the mbuf. 1832 */ 1833 if (imo->imo_multicast_ifp == NULL && 1834 imo->imo_multicast_vif == -1 && 1835 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 1836 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 1837 imo->imo_num_memberships == 0) { 1838 free(*imop, M_IPMOPTS); 1839 *imop = NULL; 1840 } 1841 1842 return (error); 1843 } 1844 1845 /* 1846 * Return the IP multicast options in response to user getsockopt(). 1847 */ 1848 static int 1849 ip_getmoptions(sopt, imo) 1850 struct sockopt *sopt; 1851 register struct ip_moptions *imo; 1852 { 1853 struct in_addr addr; 1854 struct in_ifaddr *ia; 1855 int error, optval; 1856 u_char coptval; 1857 1858 error = 0; 1859 switch (sopt->sopt_name) { 1860 case IP_MULTICAST_VIF: 1861 if (imo != NULL) 1862 optval = imo->imo_multicast_vif; 1863 else 1864 optval = -1; 1865 error = sooptcopyout(sopt, &optval, sizeof optval); 1866 break; 1867 1868 case IP_MULTICAST_IF: 1869 if (imo == NULL || imo->imo_multicast_ifp == NULL) 1870 addr.s_addr = INADDR_ANY; 1871 else if (imo->imo_multicast_addr.s_addr) { 1872 /* return the value user has set */ 1873 addr = imo->imo_multicast_addr; 1874 } else { 1875 IFP_TO_IA(imo->imo_multicast_ifp, ia); 1876 addr.s_addr = (ia == NULL) ? INADDR_ANY 1877 : IA_SIN(ia)->sin_addr.s_addr; 1878 } 1879 error = sooptcopyout(sopt, &addr, sizeof addr); 1880 break; 1881 1882 case IP_MULTICAST_TTL: 1883 if (imo == 0) 1884 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1885 else 1886 optval = coptval = imo->imo_multicast_ttl; 1887 if (sopt->sopt_valsize == 1) 1888 error = sooptcopyout(sopt, &coptval, 1); 1889 else 1890 error = sooptcopyout(sopt, &optval, sizeof optval); 1891 break; 1892 1893 case IP_MULTICAST_LOOP: 1894 if (imo == 0) 1895 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1896 else 1897 optval = coptval = imo->imo_multicast_loop; 1898 if (sopt->sopt_valsize == 1) 1899 error = sooptcopyout(sopt, &coptval, 1); 1900 else 1901 error = sooptcopyout(sopt, &optval, sizeof optval); 1902 break; 1903 1904 default: 1905 error = ENOPROTOOPT; 1906 break; 1907 } 1908 return (error); 1909 } 1910 1911 /* 1912 * Discard the IP multicast options. 1913 */ 1914 void 1915 ip_freemoptions(imo) 1916 register struct ip_moptions *imo; 1917 { 1918 register int i; 1919 1920 if (imo != NULL) { 1921 for (i = 0; i < imo->imo_num_memberships; ++i) 1922 in_delmulti(imo->imo_membership[i]); 1923 free(imo, M_IPMOPTS); 1924 } 1925 } 1926 1927 /* 1928 * Routine called from ip_output() to loop back a copy of an IP multicast 1929 * packet to the input queue of a specified interface. Note that this 1930 * calls the output routine of the loopback "driver", but with an interface 1931 * pointer that might NOT be a loopback interface -- evil, but easier than 1932 * replicating that code here. 1933 */ 1934 static void 1935 ip_mloopback(ifp, m, dst, hlen) 1936 struct ifnet *ifp; 1937 register struct mbuf *m; 1938 register struct sockaddr_in *dst; 1939 int hlen; 1940 { 1941 register struct ip *ip; 1942 struct mbuf *copym; 1943 1944 copym = m_copy(m, 0, M_COPYALL); 1945 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) 1946 copym = m_pullup(copym, hlen); 1947 if (copym != NULL) { 1948 /* 1949 * We don't bother to fragment if the IP length is greater 1950 * than the interface's MTU. Can this possibly matter? 1951 */ 1952 ip = mtod(copym, struct ip *); 1953 ip->ip_len = htons(ip->ip_len); 1954 ip->ip_off = htons(ip->ip_off); 1955 ip->ip_sum = 0; 1956 if (ip->ip_vhl == IP_VHL_BORING) { 1957 ip->ip_sum = in_cksum_hdr(ip); 1958 } else { 1959 ip->ip_sum = in_cksum(copym, hlen); 1960 } 1961 /* 1962 * NB: 1963 * It's not clear whether there are any lingering 1964 * reentrancy problems in other areas which might 1965 * be exposed by using ip_input directly (in 1966 * particular, everything which modifies the packet 1967 * in-place). Yet another option is using the 1968 * protosw directly to deliver the looped back 1969 * packet. For the moment, we'll err on the side 1970 * of safety by using if_simloop(). 1971 */ 1972 #if 1 /* XXX */ 1973 if (dst->sin_family != AF_INET) { 1974 printf("ip_mloopback: bad address family %d\n", 1975 dst->sin_family); 1976 dst->sin_family = AF_INET; 1977 } 1978 #endif 1979 1980 #ifdef notdef 1981 copym->m_pkthdr.rcvif = ifp; 1982 ip_input(copym); 1983 #else 1984 /* if the checksum hasn't been computed, mark it as valid */ 1985 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1986 copym->m_pkthdr.csum_flags |= 1987 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1988 copym->m_pkthdr.csum_data = 0xffff; 1989 } 1990 if_simloop(ifp, copym, dst->sin_family, 0); 1991 #endif 1992 } 1993 } 1994