1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 30 * $FreeBSD$ 31 */ 32 33 #include "opt_ipfw.h" 34 #include "opt_ipdn.h" 35 #include "opt_ipdivert.h" 36 #include "opt_ipfilter.h" 37 #include "opt_ipsec.h" 38 #include "opt_mac.h" 39 #include "opt_pfil_hooks.h" 40 #include "opt_mbuf_stress_test.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/mac.h> 46 #include <sys/malloc.h> 47 #include <sys/mbuf.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/sysctl.h> 52 53 #include <net/if.h> 54 #include <net/route.h> 55 56 #include <netinet/in.h> 57 #include <netinet/in_systm.h> 58 #include <netinet/ip.h> 59 #include <netinet/in_pcb.h> 60 #include <netinet/in_var.h> 61 #include <netinet/ip_var.h> 62 63 #ifdef PFIL_HOOKS 64 #include <net/pfil.h> 65 #endif 66 67 #include <machine/in_cksum.h> 68 69 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); 70 71 #ifdef IPSEC 72 #include <netinet6/ipsec.h> 73 #include <netkey/key.h> 74 #ifdef IPSEC_DEBUG 75 #include <netkey/key_debug.h> 76 #else 77 #define KEYDEBUG(lev,arg) 78 #endif 79 #endif /*IPSEC*/ 80 81 #ifdef FAST_IPSEC 82 #include <netipsec/ipsec.h> 83 #include <netipsec/xform.h> 84 #include <netipsec/key.h> 85 #endif /*FAST_IPSEC*/ 86 87 #include <netinet/ip_fw.h> 88 #include <netinet/ip_divert.h> 89 #include <netinet/ip_dummynet.h> 90 91 #define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\ 92 x, (ntohl(a.s_addr)>>24)&0xFF,\ 93 (ntohl(a.s_addr)>>16)&0xFF,\ 94 (ntohl(a.s_addr)>>8)&0xFF,\ 95 (ntohl(a.s_addr))&0xFF, y); 96 97 u_short ip_id; 98 99 #ifdef MBUF_STRESS_TEST 100 int mbuf_frag_size = 0; 101 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, 102 &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); 103 #endif 104 105 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); 106 static struct ifnet *ip_multicast_if(struct in_addr *, int *); 107 static void ip_mloopback 108 (struct ifnet *, struct mbuf *, struct sockaddr_in *, int); 109 static int ip_getmoptions 110 (struct sockopt *, struct ip_moptions *); 111 static int ip_pcbopts(int, struct mbuf **, struct mbuf *); 112 static int ip_setmoptions 113 (struct sockopt *, struct ip_moptions **); 114 115 int ip_optcopy(struct ip *, struct ip *); 116 117 118 extern struct protosw inetsw[]; 119 120 /* 121 * IP output. The packet in mbuf chain m contains a skeletal IP 122 * header (with len, off, ttl, proto, tos, src, dst). 123 * The mbuf chain containing the packet will be freed. 124 * The mbuf opt, if present, will not be freed. 125 * In the IP forwarding case, the packet will arrive with options already 126 * inserted, so must have a NULL opt pointer. 127 */ 128 int 129 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, 130 int flags, struct ip_moptions *imo, struct inpcb *inp) 131 { 132 struct ip *ip; 133 struct ifnet *ifp = NULL; /* keep compiler happy */ 134 struct mbuf *m0; 135 int hlen = sizeof (struct ip); 136 int len, off, error = 0; 137 struct sockaddr_in *dst = NULL; /* keep compiler happy */ 138 struct in_ifaddr *ia = NULL; 139 int isbroadcast, sw_csum; 140 struct in_addr pkt_dst; 141 struct route iproute; 142 struct m_tag *mtag, *dummytag; 143 #ifdef IPSEC 144 struct secpolicy *sp = NULL; 145 #endif 146 #ifdef FAST_IPSEC 147 struct secpolicy *sp = NULL; 148 struct tdb_ident *tdbi; 149 int s; 150 #endif /* FAST_IPSEC */ 151 struct ip_fw_args args; 152 int src_was_INADDR_ANY = 0; /* as the name says... */ 153 154 args.eh = NULL; 155 args.rule = NULL; 156 157 M_ASSERTPKTHDR(m); 158 159 args.next_hop = m_claim_next(m, PACKET_TAG_IPFORWARD); 160 dummytag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); 161 if (dummytag != NULL) { 162 struct dn_pkt_tag *dt = (struct dn_pkt_tag *)(dummytag+1); 163 /* 164 * Prevent lower layers from finding the tag 165 * Cleanup and free is done below 166 */ 167 m_tag_unlink(m, dummytag); 168 /* 169 * the packet was already tagged, so part of the 170 * processing was already done, and we need to go down. 171 * Get parameters from the header. 172 */ 173 args.rule = dt->rule; 174 ro = &(dt->ro); 175 dst = dt->dn_dst; 176 ifp = dt->ifp; 177 } 178 179 if (ro == NULL) { 180 ro = &iproute; 181 bzero(ro, sizeof (*ro)); 182 } 183 184 if (inp != NULL) 185 INP_LOCK_ASSERT(inp); 186 187 if (args.rule != NULL) { /* dummynet already saw us */ 188 ip = mtod(m, struct ip *); 189 hlen = ip->ip_hl << 2 ; 190 if (ro->ro_rt) 191 ia = ifatoia(ro->ro_rt->rt_ifa); 192 goto sendit; 193 } 194 195 if (opt) { 196 len = 0; 197 m = ip_insertoptions(m, opt, &len); 198 if (len != 0) 199 hlen = len; 200 } 201 ip = mtod(m, struct ip *); 202 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst; 203 204 /* 205 * Fill in IP header. If we are not allowing fragmentation, 206 * then the ip_id field is meaningless, but we don't set it 207 * to zero. Doing so causes various problems when devices along 208 * the path (routers, load balancers, firewalls, etc.) illegally 209 * disable DF on our packet. Note that a 16-bit counter 210 * will wrap around in less than 10 seconds at 100 Mbit/s on a 211 * medium with MTU 1500. See Steven M. Bellovin, "A Technique 212 * for Counting NATted Hosts", Proc. IMW'02, available at 213 * <http://www.research.att.com/~smb/papers/fnat.pdf>. 214 */ 215 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 216 ip->ip_v = IPVERSION; 217 ip->ip_hl = hlen >> 2; 218 ip->ip_id = ip_newid(); 219 ipstat.ips_localout++; 220 } else { 221 hlen = ip->ip_hl << 2; 222 } 223 224 dst = (struct sockaddr_in *)&ro->ro_dst; 225 /* 226 * If there is a cached route, 227 * check that it is to the same destination 228 * and is still up. If not, free it and try again. 229 * The address family should also be checked in case of sharing the 230 * cache with IPv6. 231 */ 232 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 233 dst->sin_family != AF_INET || 234 dst->sin_addr.s_addr != pkt_dst.s_addr)) { 235 RTFREE(ro->ro_rt); 236 ro->ro_rt = (struct rtentry *)0; 237 } 238 if (ro->ro_rt == NULL) { 239 bzero(dst, sizeof(*dst)); 240 dst->sin_family = AF_INET; 241 dst->sin_len = sizeof(*dst); 242 dst->sin_addr = pkt_dst; 243 } 244 /* 245 * If routing to interface only, 246 * short circuit routing lookup. 247 */ 248 if (flags & IP_ROUTETOIF) { 249 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL && 250 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) { 251 ipstat.ips_noroute++; 252 error = ENETUNREACH; 253 goto bad; 254 } 255 ifp = ia->ia_ifp; 256 ip->ip_ttl = 1; 257 isbroadcast = in_broadcast(dst->sin_addr, ifp); 258 } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 259 imo != NULL && imo->imo_multicast_ifp != NULL) { 260 /* 261 * Bypass the normal routing lookup for multicast 262 * packets if the interface is specified. 263 */ 264 ifp = imo->imo_multicast_ifp; 265 IFP_TO_IA(ifp, ia); 266 isbroadcast = 0; /* fool gcc */ 267 } else { 268 /* 269 * We want to do any cloning requested by the link layer, 270 * as this is probably required in all cases for correct 271 * operation (as it is for ARP). 272 */ 273 if (ro->ro_rt == NULL) 274 rtalloc_ign(ro, 0); 275 if (ro->ro_rt == NULL) { 276 ipstat.ips_noroute++; 277 error = EHOSTUNREACH; 278 goto bad; 279 } 280 ia = ifatoia(ro->ro_rt->rt_ifa); 281 ifp = ro->ro_rt->rt_ifp; 282 ro->ro_rt->rt_rmx.rmx_pksent++; 283 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 284 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; 285 if (ro->ro_rt->rt_flags & RTF_HOST) 286 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); 287 else 288 isbroadcast = in_broadcast(dst->sin_addr, ifp); 289 } 290 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) { 291 struct in_multi *inm; 292 293 m->m_flags |= M_MCAST; 294 /* 295 * IP destination address is multicast. Make sure "dst" 296 * still points to the address in "ro". (It may have been 297 * changed to point to a gateway address, above.) 298 */ 299 dst = (struct sockaddr_in *)&ro->ro_dst; 300 /* 301 * See if the caller provided any multicast options 302 */ 303 if (imo != NULL) { 304 ip->ip_ttl = imo->imo_multicast_ttl; 305 if (imo->imo_multicast_vif != -1) 306 ip->ip_src.s_addr = 307 ip_mcast_src ? 308 ip_mcast_src(imo->imo_multicast_vif) : 309 INADDR_ANY; 310 } else 311 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 312 /* 313 * Confirm that the outgoing interface supports multicast. 314 */ 315 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 316 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 317 ipstat.ips_noroute++; 318 error = ENETUNREACH; 319 goto bad; 320 } 321 } 322 /* 323 * If source address not specified yet, use address 324 * of outgoing interface. 325 */ 326 if (ip->ip_src.s_addr == INADDR_ANY) { 327 /* Interface may have no addresses. */ 328 if (ia != NULL) 329 ip->ip_src = IA_SIN(ia)->sin_addr; 330 } 331 332 IN_LOOKUP_MULTI(pkt_dst, ifp, inm); 333 if (inm != NULL && 334 (imo == NULL || imo->imo_multicast_loop)) { 335 /* 336 * If we belong to the destination multicast group 337 * on the outgoing interface, and the caller did not 338 * forbid loopback, loop back a copy. 339 */ 340 ip_mloopback(ifp, m, dst, hlen); 341 } 342 else { 343 /* 344 * If we are acting as a multicast router, perform 345 * multicast forwarding as if the packet had just 346 * arrived on the interface to which we are about 347 * to send. The multicast forwarding function 348 * recursively calls this function, using the 349 * IP_FORWARDING flag to prevent infinite recursion. 350 * 351 * Multicasts that are looped back by ip_mloopback(), 352 * above, will be forwarded by the ip_input() routine, 353 * if necessary. 354 */ 355 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 356 /* 357 * If rsvp daemon is not running, do not 358 * set ip_moptions. This ensures that the packet 359 * is multicast and not just sent down one link 360 * as prescribed by rsvpd. 361 */ 362 if (!rsvp_on) 363 imo = NULL; 364 if (ip_mforward && 365 ip_mforward(ip, ifp, m, imo) != 0) { 366 m_freem(m); 367 goto done; 368 } 369 } 370 } 371 372 /* 373 * Multicasts with a time-to-live of zero may be looped- 374 * back, above, but must not be transmitted on a network. 375 * Also, multicasts addressed to the loopback interface 376 * are not sent -- the above call to ip_mloopback() will 377 * loop back a copy if this host actually belongs to the 378 * destination group on the loopback interface. 379 */ 380 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 381 m_freem(m); 382 goto done; 383 } 384 385 goto sendit; 386 } 387 #ifndef notdef 388 /* 389 * If the source address is not specified yet, use the address 390 * of the outoing interface. In case, keep note we did that, so 391 * if the the firewall changes the next-hop causing the output 392 * interface to change, we can fix that. 393 */ 394 if (ip->ip_src.s_addr == INADDR_ANY) { 395 /* Interface may have no addresses. */ 396 if (ia != NULL) { 397 ip->ip_src = IA_SIN(ia)->sin_addr; 398 src_was_INADDR_ANY = 1; 399 } 400 } 401 #endif /* notdef */ 402 #ifdef ALTQ 403 /* 404 * disable packet drop hack. 405 * packetdrop should be done by queueing. 406 */ 407 #else /* !ALTQ */ 408 /* 409 * Verify that we have any chance at all of being able to queue 410 * the packet or packet fragments 411 */ 412 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= 413 ifp->if_snd.ifq_maxlen) { 414 error = ENOBUFS; 415 ipstat.ips_odropped++; 416 goto bad; 417 } 418 #endif /* !ALTQ */ 419 420 /* 421 * Look for broadcast address and 422 * verify user is allowed to send 423 * such a packet. 424 */ 425 if (isbroadcast) { 426 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 427 error = EADDRNOTAVAIL; 428 goto bad; 429 } 430 if ((flags & IP_ALLOWBROADCAST) == 0) { 431 error = EACCES; 432 goto bad; 433 } 434 /* don't allow broadcast messages to be fragmented */ 435 if (ip->ip_len > ifp->if_mtu) { 436 error = EMSGSIZE; 437 goto bad; 438 } 439 if (flags & IP_SENDONES) 440 ip->ip_dst.s_addr = INADDR_BROADCAST; 441 m->m_flags |= M_BCAST; 442 } else { 443 m->m_flags &= ~M_BCAST; 444 } 445 446 sendit: 447 #ifdef IPSEC 448 /* get SP for this packet */ 449 if (inp == NULL) 450 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 451 flags, &error); 452 else 453 sp = ipsec4_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error); 454 455 if (sp == NULL) { 456 ipsecstat.out_inval++; 457 goto bad; 458 } 459 460 error = 0; 461 462 /* check policy */ 463 switch (sp->policy) { 464 case IPSEC_POLICY_DISCARD: 465 /* 466 * This packet is just discarded. 467 */ 468 ipsecstat.out_polvio++; 469 goto bad; 470 471 case IPSEC_POLICY_BYPASS: 472 case IPSEC_POLICY_NONE: 473 case IPSEC_POLICY_TCP: 474 /* no need to do IPsec. */ 475 goto skip_ipsec; 476 477 case IPSEC_POLICY_IPSEC: 478 if (sp->req == NULL) { 479 /* acquire a policy */ 480 error = key_spdacquire(sp); 481 goto bad; 482 } 483 break; 484 485 case IPSEC_POLICY_ENTRUST: 486 default: 487 printf("ip_output: Invalid policy found. %d\n", sp->policy); 488 } 489 { 490 struct ipsec_output_state state; 491 bzero(&state, sizeof(state)); 492 state.m = m; 493 if (flags & IP_ROUTETOIF) { 494 state.ro = &iproute; 495 bzero(&iproute, sizeof(iproute)); 496 } else 497 state.ro = ro; 498 state.dst = (struct sockaddr *)dst; 499 500 ip->ip_sum = 0; 501 502 /* 503 * XXX 504 * delayed checksums are not currently compatible with IPsec 505 */ 506 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 507 in_delayed_cksum(m); 508 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 509 } 510 511 ip->ip_len = htons(ip->ip_len); 512 ip->ip_off = htons(ip->ip_off); 513 514 error = ipsec4_output(&state, sp, flags); 515 516 m = state.m; 517 if (flags & IP_ROUTETOIF) { 518 /* 519 * if we have tunnel mode SA, we may need to ignore 520 * IP_ROUTETOIF. 521 */ 522 if (state.ro != &iproute || state.ro->ro_rt != NULL) { 523 flags &= ~IP_ROUTETOIF; 524 ro = state.ro; 525 } 526 } else 527 ro = state.ro; 528 dst = (struct sockaddr_in *)state.dst; 529 if (error) { 530 /* mbuf is already reclaimed in ipsec4_output. */ 531 m = NULL; 532 switch (error) { 533 case EHOSTUNREACH: 534 case ENETUNREACH: 535 case EMSGSIZE: 536 case ENOBUFS: 537 case ENOMEM: 538 break; 539 default: 540 printf("ip4_output (ipsec): error code %d\n", error); 541 /*fall through*/ 542 case ENOENT: 543 /* don't show these error codes to the user */ 544 error = 0; 545 break; 546 } 547 goto bad; 548 } 549 550 /* be sure to update variables that are affected by ipsec4_output() */ 551 ip = mtod(m, struct ip *); 552 hlen = ip->ip_hl << 2; 553 if (ro->ro_rt == NULL) { 554 if ((flags & IP_ROUTETOIF) == 0) { 555 printf("ip_output: " 556 "can't update route after IPsec processing\n"); 557 error = EHOSTUNREACH; /*XXX*/ 558 goto bad; 559 } 560 } else { 561 if (state.encap) { 562 ia = ifatoia(ro->ro_rt->rt_ifa); 563 ifp = ro->ro_rt->rt_ifp; 564 } 565 } 566 } 567 568 /* make it flipped, again. */ 569 ip->ip_len = ntohs(ip->ip_len); 570 ip->ip_off = ntohs(ip->ip_off); 571 skip_ipsec: 572 #endif /*IPSEC*/ 573 #ifdef FAST_IPSEC 574 /* 575 * Check the security policy (SP) for the packet and, if 576 * required, do IPsec-related processing. There are two 577 * cases here; the first time a packet is sent through 578 * it will be untagged and handled by ipsec4_checkpolicy. 579 * If the packet is resubmitted to ip_output (e.g. after 580 * AH, ESP, etc. processing), there will be a tag to bypass 581 * the lookup and related policy checking. 582 */ 583 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); 584 s = splnet(); 585 if (mtag != NULL) { 586 tdbi = (struct tdb_ident *)(mtag + 1); 587 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND); 588 if (sp == NULL) 589 error = -EINVAL; /* force silent drop */ 590 m_tag_delete(m, mtag); 591 } else { 592 sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags, 593 &error, inp); 594 } 595 /* 596 * There are four return cases: 597 * sp != NULL apply IPsec policy 598 * sp == NULL, error == 0 no IPsec handling needed 599 * sp == NULL, error == -EINVAL discard packet w/o error 600 * sp == NULL, error != 0 discard packet, report error 601 */ 602 if (sp != NULL) { 603 /* Loop detection, check if ipsec processing already done */ 604 KASSERT(sp->req != NULL, ("ip_output: no ipsec request")); 605 for (mtag = m_tag_first(m); mtag != NULL; 606 mtag = m_tag_next(m, mtag)) { 607 if (mtag->m_tag_cookie != MTAG_ABI_COMPAT) 608 continue; 609 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && 610 mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) 611 continue; 612 /* 613 * Check if policy has an SA associated with it. 614 * This can happen when an SP has yet to acquire 615 * an SA; e.g. on first reference. If it occurs, 616 * then we let ipsec4_process_packet do its thing. 617 */ 618 if (sp->req->sav == NULL) 619 break; 620 tdbi = (struct tdb_ident *)(mtag + 1); 621 if (tdbi->spi == sp->req->sav->spi && 622 tdbi->proto == sp->req->sav->sah->saidx.proto && 623 bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst, 624 sizeof (union sockaddr_union)) == 0) { 625 /* 626 * No IPsec processing is needed, free 627 * reference to SP. 628 * 629 * NB: null pointer to avoid free at 630 * done: below. 631 */ 632 KEY_FREESP(&sp), sp = NULL; 633 splx(s); 634 goto spd_done; 635 } 636 } 637 638 /* 639 * Do delayed checksums now because we send before 640 * this is done in the normal processing path. 641 */ 642 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 643 in_delayed_cksum(m); 644 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 645 } 646 647 ip->ip_len = htons(ip->ip_len); 648 ip->ip_off = htons(ip->ip_off); 649 650 /* NB: callee frees mbuf */ 651 error = ipsec4_process_packet(m, sp->req, flags, 0); 652 /* 653 * Preserve KAME behaviour: ENOENT can be returned 654 * when an SA acquire is in progress. Don't propagate 655 * this to user-level; it confuses applications. 656 * 657 * XXX this will go away when the SADB is redone. 658 */ 659 if (error == ENOENT) 660 error = 0; 661 splx(s); 662 goto done; 663 } else { 664 splx(s); 665 666 if (error != 0) { 667 /* 668 * Hack: -EINVAL is used to signal that a packet 669 * should be silently discarded. This is typically 670 * because we asked key management for an SA and 671 * it was delayed (e.g. kicked up to IKE). 672 */ 673 if (error == -EINVAL) 674 error = 0; 675 goto bad; 676 } else { 677 /* No IPsec processing for this packet. */ 678 } 679 #ifdef notyet 680 /* 681 * If deferred crypto processing is needed, check that 682 * the interface supports it. 683 */ 684 mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL); 685 if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) { 686 /* notify IPsec to do its own crypto */ 687 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); 688 error = EHOSTUNREACH; 689 goto bad; 690 } 691 #endif 692 } 693 spd_done: 694 #endif /* FAST_IPSEC */ 695 696 /* 697 * IpHack's section. 698 * - Xlate: translate packet's addr/port (NAT). 699 * - Firewall: deny/allow/etc. 700 * - Wrap: fake packet's addr/port <unimpl.> 701 * - Encapsulate: put it in another IP and send out. <unimp.> 702 */ 703 #ifdef PFIL_HOOKS 704 /* 705 * Run through list of hooks for output packets. 706 */ 707 error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT); 708 if (error != 0 || m == NULL) 709 goto done; 710 ip = mtod(m, struct ip *); 711 #endif /* PFIL_HOOKS */ 712 713 /* 714 * Check with the firewall... 715 * but not if we are already being fwd'd from a firewall. 716 */ 717 if (fw_enable && IPFW_LOADED && !args.next_hop) { 718 struct sockaddr_in *old = dst; 719 720 args.m = m; 721 args.next_hop = dst; 722 args.oif = ifp; 723 off = ip_fw_chk_ptr(&args); 724 m = args.m; 725 dst = args.next_hop; 726 727 /* 728 * On return we must do the following: 729 * m == NULL -> drop the pkt (old interface, deprecated) 730 * (off & IP_FW_PORT_DENY_FLAG) -> drop the pkt (new interface) 731 * 1<=off<= 0xffff -> DIVERT 732 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe 733 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet 734 * dst != old -> IPFIREWALL_FORWARD 735 * off==0, dst==old -> accept 736 * If some of the above modules are not compiled in, then 737 * we should't have to check the corresponding condition 738 * (because the ipfw control socket should not accept 739 * unsupported rules), but better play safe and drop 740 * packets in case of doubt. 741 */ 742 if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) { 743 if (m) 744 m_freem(m); 745 error = EACCES; 746 goto done; 747 } 748 ip = mtod(m, struct ip *); 749 if (off == 0 && dst == old) /* common case */ 750 goto pass; 751 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) { 752 /* 753 * pass the pkt to dummynet. Need to include 754 * pipe number, m, ifp, ro, dst because these are 755 * not recomputed in the next pass. 756 * All other parameters have been already used and 757 * so they are not needed anymore. 758 * XXX note: if the ifp or ro entry are deleted 759 * while a pkt is in dummynet, we are in trouble! 760 */ 761 args.ro = ro; 762 args.dst = dst; 763 args.flags = flags; 764 765 error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT, 766 &args); 767 goto done; 768 } 769 #ifdef IPDIVERT 770 if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { 771 struct mbuf *clone; 772 773 /* Clone packet if we're doing a 'tee' */ 774 if ((off & IP_FW_PORT_TEE_FLAG) != 0) 775 clone = divert_clone(m); 776 else 777 clone = NULL; 778 779 /* Restore packet header fields to original values */ 780 ip->ip_len = htons(ip->ip_len); 781 ip->ip_off = htons(ip->ip_off); 782 783 /* Deliver packet to divert input routine */ 784 divert_packet(m, 0); 785 786 /* If 'tee', continue with original packet */ 787 if (clone != NULL) { 788 m = clone; 789 ip = mtod(m, struct ip *); 790 goto pass; 791 } 792 goto done; 793 } 794 #endif 795 796 /* IPFIREWALL_FORWARD */ 797 /* 798 * Check dst to make sure it is directly reachable on the 799 * interface we previously thought it was. 800 * If it isn't (which may be likely in some situations) we have 801 * to re-route it (ie, find a route for the next-hop and the 802 * associated interface) and set them here. This is nested 803 * forwarding which in most cases is undesirable, except where 804 * such control is nigh impossible. So we do it here. 805 * And I'm babbling. 806 */ 807 if (off == 0 && old != dst) { /* FORWARD, dst has changed */ 808 #if 0 809 /* 810 * XXX To improve readability, this block should be 811 * changed into a function call as below: 812 */ 813 error = ip_ipforward(&m, &dst, &ifp); 814 if (error) 815 goto bad; 816 if (m == NULL) /* ip_input consumed the mbuf */ 817 goto done; 818 #else 819 struct in_ifaddr *ia; 820 821 /* 822 * XXX sro_fwd below is static, and a pointer 823 * to it gets passed to routines downstream. 824 * This could have surprisingly bad results in 825 * practice, because its content is overwritten 826 * by subsequent packets. 827 * XXX: Breaks on SMP and possibly preemption! 828 */ 829 /* There must be a better way to do this next line... */ 830 static struct route sro_fwd; 831 struct route *ro_fwd = &sro_fwd; 832 833 #if 0 834 print_ip("IPFIREWALL_FORWARD: New dst ip: ", 835 dst->sin_addr, "\n"); 836 #endif 837 838 /* 839 * We need to figure out if we have been forwarded 840 * to a local socket. If so, then we should somehow 841 * "loop back" to ip_input, and get directed to the 842 * PCB as if we had received this packet. This is 843 * because it may be dificult to identify the packets 844 * you want to forward until they are being output 845 * and have selected an interface. (e.g. locally 846 * initiated packets) If we used the loopback inteface, 847 * we would not be able to control what happens 848 * as the packet runs through ip_input() as 849 * it is done through an ISR. 850 */ 851 LIST_FOREACH(ia, 852 INADDR_HASH(dst->sin_addr.s_addr), ia_hash) { 853 /* 854 * If the addr to forward to is one 855 * of ours, we pretend to 856 * be the destination for this packet. 857 */ 858 if (IA_SIN(ia)->sin_addr.s_addr == 859 dst->sin_addr.s_addr) 860 break; 861 } 862 if (ia) { /* tell ip_input "dont filter" */ 863 mtag = m_tag_get( 864 PACKET_TAG_IPFORWARD, 865 sizeof(struct sockaddr_in *), M_NOWAIT); 866 if (mtag == NULL) { 867 error = ENOBUFS; 868 goto bad; 869 } 870 *(struct sockaddr_in **)(mtag+1) = 871 args.next_hop; 872 m_tag_prepend(m, mtag); 873 874 if (m->m_pkthdr.rcvif == NULL) 875 m->m_pkthdr.rcvif = ifunit("lo0"); 876 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 877 m->m_pkthdr.csum_flags |= 878 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 879 m->m_pkthdr.csum_data = 0xffff; 880 } 881 m->m_pkthdr.csum_flags |= 882 CSUM_IP_CHECKED | CSUM_IP_VALID; 883 ip->ip_len = htons(ip->ip_len); 884 ip->ip_off = htons(ip->ip_off); 885 ip_input(m); 886 goto done; 887 } 888 /* 889 * Some of the logic for this was 890 * nicked from above. 891 */ 892 bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst)); 893 894 ro_fwd->ro_rt = 0; 895 rtalloc_ign(ro_fwd, RTF_CLONING); 896 897 if (ro_fwd->ro_rt == NULL) { 898 ipstat.ips_noroute++; 899 error = EHOSTUNREACH; 900 goto bad; 901 } 902 903 ia = ifatoia(ro_fwd->ro_rt->rt_ifa); 904 ifp = ro_fwd->ro_rt->rt_ifp; 905 ro_fwd->ro_rt->rt_rmx.rmx_pksent++; 906 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) 907 dst = (struct sockaddr_in *) 908 ro_fwd->ro_rt->rt_gateway; 909 if (ro_fwd->ro_rt->rt_flags & RTF_HOST) 910 isbroadcast = 911 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST); 912 else 913 isbroadcast = in_broadcast(dst->sin_addr, ifp); 914 if (ro->ro_rt) 915 RTFREE(ro->ro_rt); 916 ro->ro_rt = ro_fwd->ro_rt; 917 dst = (struct sockaddr_in *)&ro_fwd->ro_dst; 918 919 #endif /* ... block to be put into a function */ 920 /* 921 * If we added a default src ip earlier, 922 * which would have been gotten from the-then 923 * interface, do it again, from the new one. 924 */ 925 if (src_was_INADDR_ANY) 926 ip->ip_src = IA_SIN(ia)->sin_addr; 927 goto pass ; 928 } 929 930 /* 931 * if we get here, none of the above matches, and 932 * we have to drop the pkt 933 */ 934 m_freem(m); 935 error = EACCES; /* not sure this is the right error msg */ 936 goto done; 937 } 938 939 pass: 940 /* 127/8 must not appear on wire - RFC1122. */ 941 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 942 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 943 if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 944 ipstat.ips_badaddr++; 945 error = EADDRNOTAVAIL; 946 goto bad; 947 } 948 } 949 950 m->m_pkthdr.csum_flags |= CSUM_IP; 951 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; 952 if (sw_csum & CSUM_DELAY_DATA) { 953 in_delayed_cksum(m); 954 sw_csum &= ~CSUM_DELAY_DATA; 955 } 956 m->m_pkthdr.csum_flags &= ifp->if_hwassist; 957 958 /* 959 * If small enough for interface, or the interface will take 960 * care of the fragmentation for us, can just send directly. 961 */ 962 if (ip->ip_len <= ifp->if_mtu || (ifp->if_hwassist & CSUM_FRAGMENT && 963 ((ip->ip_off & IP_DF) == 0))) { 964 ip->ip_len = htons(ip->ip_len); 965 ip->ip_off = htons(ip->ip_off); 966 ip->ip_sum = 0; 967 if (sw_csum & CSUM_DELAY_IP) 968 ip->ip_sum = in_cksum(m, hlen); 969 970 /* Record statistics for this interface address. */ 971 if (!(flags & IP_FORWARDING) && ia) { 972 ia->ia_ifa.if_opackets++; 973 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 974 } 975 976 #ifdef IPSEC 977 /* clean ipsec history once it goes out of the node */ 978 ipsec_delaux(m); 979 #endif 980 981 #ifdef MBUF_STRESS_TEST 982 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) 983 m = m_fragment(m, M_DONTWAIT, mbuf_frag_size); 984 #endif 985 error = (*ifp->if_output)(ifp, m, 986 (struct sockaddr *)dst, ro->ro_rt); 987 goto done; 988 } 989 990 if (ip->ip_off & IP_DF) { 991 error = EMSGSIZE; 992 /* 993 * This case can happen if the user changed the MTU 994 * of an interface after enabling IP on it. Because 995 * most netifs don't keep track of routes pointing to 996 * them, there is no way for one to update all its 997 * routes when the MTU is changed. 998 */ 999 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) && 1000 (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 1001 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 1002 } 1003 ipstat.ips_cantfrag++; 1004 goto bad; 1005 } 1006 1007 /* 1008 * Too large for interface; fragment if possible. If successful, 1009 * on return, m will point to a list of packets to be sent. 1010 */ 1011 error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum); 1012 if (error) 1013 goto bad; 1014 for (; m; m = m0) { 1015 m0 = m->m_nextpkt; 1016 m->m_nextpkt = 0; 1017 #ifdef IPSEC 1018 /* clean ipsec history once it goes out of the node */ 1019 ipsec_delaux(m); 1020 #endif 1021 if (error == 0) { 1022 /* Record statistics for this interface address. */ 1023 if (ia != NULL) { 1024 ia->ia_ifa.if_opackets++; 1025 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 1026 } 1027 1028 error = (*ifp->if_output)(ifp, m, 1029 (struct sockaddr *)dst, ro->ro_rt); 1030 } else 1031 m_freem(m); 1032 } 1033 1034 if (error == 0) 1035 ipstat.ips_fragmented++; 1036 1037 done: 1038 if (ro == &iproute && ro->ro_rt) { 1039 RTFREE(ro->ro_rt); 1040 ro->ro_rt = NULL; 1041 } 1042 if (dummytag) { 1043 struct dn_pkt_tag *dt = (struct dn_pkt_tag *)(dummytag+1); 1044 if (dt->ro.ro_rt) 1045 RTFREE(dt->ro.ro_rt); 1046 m_tag_free(dummytag); 1047 } 1048 #ifdef IPSEC 1049 if (sp != NULL) { 1050 KEYDEBUG(KEYDEBUG_IPSEC_STAMP, 1051 printf("DP ip_output call free SP:%p\n", sp)); 1052 key_freesp(sp); 1053 } 1054 #endif 1055 #ifdef FAST_IPSEC 1056 if (sp != NULL) 1057 KEY_FREESP(&sp); 1058 #endif 1059 return (error); 1060 bad: 1061 m_freem(m); 1062 goto done; 1063 } 1064 1065 /* 1066 * Create a chain of fragments which fit the given mtu. m_frag points to the 1067 * mbuf to be fragmented; on return it points to the chain with the fragments. 1068 * Return 0 if no error. If error, m_frag may contain a partially built 1069 * chain of fragments that should be freed by the caller. 1070 * 1071 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) 1072 * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP). 1073 */ 1074 int 1075 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, 1076 u_long if_hwassist_flags, int sw_csum) 1077 { 1078 int error = 0; 1079 int hlen = ip->ip_hl << 2; 1080 int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ 1081 int off; 1082 struct mbuf *m0 = *m_frag; /* the original packet */ 1083 int firstlen; 1084 struct mbuf **mnext; 1085 int nfrags; 1086 1087 if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */ 1088 ipstat.ips_cantfrag++; 1089 return EMSGSIZE; 1090 } 1091 1092 /* 1093 * Must be able to put at least 8 bytes per fragment. 1094 */ 1095 if (len < 8) 1096 return EMSGSIZE; 1097 1098 /* 1099 * If the interface will not calculate checksums on 1100 * fragmented packets, then do it here. 1101 */ 1102 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA && 1103 (if_hwassist_flags & CSUM_IP_FRAGS) == 0) { 1104 in_delayed_cksum(m0); 1105 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1106 } 1107 1108 if (len > PAGE_SIZE) { 1109 /* 1110 * Fragment large datagrams such that each segment 1111 * contains a multiple of PAGE_SIZE amount of data, 1112 * plus headers. This enables a receiver to perform 1113 * page-flipping zero-copy optimizations. 1114 * 1115 * XXX When does this help given that sender and receiver 1116 * could have different page sizes, and also mtu could 1117 * be less than the receiver's page size ? 1118 */ 1119 int newlen; 1120 struct mbuf *m; 1121 1122 for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next) 1123 off += m->m_len; 1124 1125 /* 1126 * firstlen (off - hlen) must be aligned on an 1127 * 8-byte boundary 1128 */ 1129 if (off < hlen) 1130 goto smart_frag_failure; 1131 off = ((off - hlen) & ~7) + hlen; 1132 newlen = (~PAGE_MASK) & mtu; 1133 if ((newlen + sizeof (struct ip)) > mtu) { 1134 /* we failed, go back the default */ 1135 smart_frag_failure: 1136 newlen = len; 1137 off = hlen + len; 1138 } 1139 len = newlen; 1140 1141 } else { 1142 off = hlen + len; 1143 } 1144 1145 firstlen = off - hlen; 1146 mnext = &m0->m_nextpkt; /* pointer to next packet */ 1147 1148 /* 1149 * Loop through length of segment after first fragment, 1150 * make new header and copy data of each part and link onto chain. 1151 * Here, m0 is the original packet, m is the fragment being created. 1152 * The fragments are linked off the m_nextpkt of the original 1153 * packet, which after processing serves as the first fragment. 1154 */ 1155 for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) { 1156 struct ip *mhip; /* ip header on the fragment */ 1157 struct mbuf *m; 1158 int mhlen = sizeof (struct ip); 1159 1160 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1161 if (m == NULL) { 1162 error = ENOBUFS; 1163 ipstat.ips_odropped++; 1164 goto done; 1165 } 1166 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; 1167 /* 1168 * In the first mbuf, leave room for the link header, then 1169 * copy the original IP header including options. The payload 1170 * goes into an additional mbuf chain returned by m_copy(). 1171 */ 1172 m->m_data += max_linkhdr; 1173 mhip = mtod(m, struct ip *); 1174 *mhip = *ip; 1175 if (hlen > sizeof (struct ip)) { 1176 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 1177 mhip->ip_v = IPVERSION; 1178 mhip->ip_hl = mhlen >> 2; 1179 } 1180 m->m_len = mhlen; 1181 /* XXX do we need to add ip->ip_off below ? */ 1182 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; 1183 if (off + len >= ip->ip_len) { /* last fragment */ 1184 len = ip->ip_len - off; 1185 m->m_flags |= M_LASTFRAG; 1186 } else 1187 mhip->ip_off |= IP_MF; 1188 mhip->ip_len = htons((u_short)(len + mhlen)); 1189 m->m_next = m_copy(m0, off, len); 1190 if (m->m_next == NULL) { /* copy failed */ 1191 m_free(m); 1192 error = ENOBUFS; /* ??? */ 1193 ipstat.ips_odropped++; 1194 goto done; 1195 } 1196 m->m_pkthdr.len = mhlen + len; 1197 m->m_pkthdr.rcvif = (struct ifnet *)0; 1198 #ifdef MAC 1199 mac_create_fragment(m0, m); 1200 #endif 1201 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; 1202 mhip->ip_off = htons(mhip->ip_off); 1203 mhip->ip_sum = 0; 1204 if (sw_csum & CSUM_DELAY_IP) 1205 mhip->ip_sum = in_cksum(m, mhlen); 1206 *mnext = m; 1207 mnext = &m->m_nextpkt; 1208 } 1209 ipstat.ips_ofragments += nfrags; 1210 1211 /* set first marker for fragment chain */ 1212 m0->m_flags |= M_FIRSTFRAG | M_FRAG; 1213 m0->m_pkthdr.csum_data = nfrags; 1214 1215 /* 1216 * Update first fragment by trimming what's been copied out 1217 * and updating header. 1218 */ 1219 m_adj(m0, hlen + firstlen - ip->ip_len); 1220 m0->m_pkthdr.len = hlen + firstlen; 1221 ip->ip_len = htons((u_short)m0->m_pkthdr.len); 1222 ip->ip_off |= IP_MF; 1223 ip->ip_off = htons(ip->ip_off); 1224 ip->ip_sum = 0; 1225 if (sw_csum & CSUM_DELAY_IP) 1226 ip->ip_sum = in_cksum(m0, hlen); 1227 1228 done: 1229 *m_frag = m0; 1230 return error; 1231 } 1232 1233 void 1234 in_delayed_cksum(struct mbuf *m) 1235 { 1236 struct ip *ip; 1237 u_short csum, offset; 1238 1239 ip = mtod(m, struct ip *); 1240 offset = ip->ip_hl << 2 ; 1241 csum = in_cksum_skip(m, ip->ip_len, offset); 1242 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 1243 csum = 0xffff; 1244 offset += m->m_pkthdr.csum_data; /* checksum offset */ 1245 1246 if (offset + sizeof(u_short) > m->m_len) { 1247 printf("delayed m_pullup, m->len: %d off: %d p: %d\n", 1248 m->m_len, offset, ip->ip_p); 1249 /* 1250 * XXX 1251 * this shouldn't happen, but if it does, the 1252 * correct behavior may be to insert the checksum 1253 * in the existing chain instead of rearranging it. 1254 */ 1255 m = m_pullup(m, offset + sizeof(u_short)); 1256 } 1257 *(u_short *)(m->m_data + offset) = csum; 1258 } 1259 1260 /* 1261 * Insert IP options into preformed packet. 1262 * Adjust IP destination as required for IP source routing, 1263 * as indicated by a non-zero in_addr at the start of the options. 1264 * 1265 * XXX This routine assumes that the packet has no options in place. 1266 */ 1267 static struct mbuf * 1268 ip_insertoptions(m, opt, phlen) 1269 register struct mbuf *m; 1270 struct mbuf *opt; 1271 int *phlen; 1272 { 1273 register struct ipoption *p = mtod(opt, struct ipoption *); 1274 struct mbuf *n; 1275 register struct ip *ip = mtod(m, struct ip *); 1276 unsigned optlen; 1277 1278 optlen = opt->m_len - sizeof(p->ipopt_dst); 1279 if (optlen + ip->ip_len > IP_MAXPACKET) { 1280 *phlen = 0; 1281 return (m); /* XXX should fail */ 1282 } 1283 if (p->ipopt_dst.s_addr) 1284 ip->ip_dst = p->ipopt_dst; 1285 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 1286 MGETHDR(n, M_DONTWAIT, MT_HEADER); 1287 if (n == NULL) { 1288 *phlen = 0; 1289 return (m); 1290 } 1291 n->m_pkthdr.rcvif = (struct ifnet *)0; 1292 #ifdef MAC 1293 mac_create_mbuf_from_mbuf(m, n); 1294 #endif 1295 n->m_pkthdr.len = m->m_pkthdr.len + optlen; 1296 m->m_len -= sizeof(struct ip); 1297 m->m_data += sizeof(struct ip); 1298 n->m_next = m; 1299 m = n; 1300 m->m_len = optlen + sizeof(struct ip); 1301 m->m_data += max_linkhdr; 1302 bcopy(ip, mtod(m, void *), sizeof(struct ip)); 1303 } else { 1304 m->m_data -= optlen; 1305 m->m_len += optlen; 1306 m->m_pkthdr.len += optlen; 1307 bcopy(ip, mtod(m, void *), sizeof(struct ip)); 1308 } 1309 ip = mtod(m, struct ip *); 1310 bcopy(p->ipopt_list, ip + 1, optlen); 1311 *phlen = sizeof(struct ip) + optlen; 1312 ip->ip_v = IPVERSION; 1313 ip->ip_hl = *phlen >> 2; 1314 ip->ip_len += optlen; 1315 return (m); 1316 } 1317 1318 /* 1319 * Copy options from ip to jp, 1320 * omitting those not copied during fragmentation. 1321 */ 1322 int 1323 ip_optcopy(ip, jp) 1324 struct ip *ip, *jp; 1325 { 1326 register u_char *cp, *dp; 1327 int opt, optlen, cnt; 1328 1329 cp = (u_char *)(ip + 1); 1330 dp = (u_char *)(jp + 1); 1331 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 1332 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1333 opt = cp[0]; 1334 if (opt == IPOPT_EOL) 1335 break; 1336 if (opt == IPOPT_NOP) { 1337 /* Preserve for IP mcast tunnel's LSRR alignment. */ 1338 *dp++ = IPOPT_NOP; 1339 optlen = 1; 1340 continue; 1341 } 1342 1343 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp), 1344 ("ip_optcopy: malformed ipv4 option")); 1345 optlen = cp[IPOPT_OLEN]; 1346 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt, 1347 ("ip_optcopy: malformed ipv4 option")); 1348 1349 /* bogus lengths should have been caught by ip_dooptions */ 1350 if (optlen > cnt) 1351 optlen = cnt; 1352 if (IPOPT_COPIED(opt)) { 1353 bcopy(cp, dp, optlen); 1354 dp += optlen; 1355 } 1356 } 1357 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 1358 *dp++ = IPOPT_EOL; 1359 return (optlen); 1360 } 1361 1362 /* 1363 * IP socket option processing. 1364 */ 1365 int 1366 ip_ctloutput(so, sopt) 1367 struct socket *so; 1368 struct sockopt *sopt; 1369 { 1370 struct inpcb *inp = sotoinpcb(so); 1371 int error, optval; 1372 1373 error = optval = 0; 1374 if (sopt->sopt_level != IPPROTO_IP) { 1375 return (EINVAL); 1376 } 1377 1378 switch (sopt->sopt_dir) { 1379 case SOPT_SET: 1380 switch (sopt->sopt_name) { 1381 case IP_OPTIONS: 1382 #ifdef notyet 1383 case IP_RETOPTS: 1384 #endif 1385 { 1386 struct mbuf *m; 1387 if (sopt->sopt_valsize > MLEN) { 1388 error = EMSGSIZE; 1389 break; 1390 } 1391 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_HEADER); 1392 if (m == NULL) { 1393 error = ENOBUFS; 1394 break; 1395 } 1396 m->m_len = sopt->sopt_valsize; 1397 error = sooptcopyin(sopt, mtod(m, char *), m->m_len, 1398 m->m_len); 1399 1400 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options, 1401 m)); 1402 } 1403 1404 case IP_TOS: 1405 case IP_TTL: 1406 case IP_RECVOPTS: 1407 case IP_RECVRETOPTS: 1408 case IP_RECVDSTADDR: 1409 case IP_RECVTTL: 1410 case IP_RECVIF: 1411 case IP_FAITH: 1412 case IP_ONESBCAST: 1413 error = sooptcopyin(sopt, &optval, sizeof optval, 1414 sizeof optval); 1415 if (error) 1416 break; 1417 1418 switch (sopt->sopt_name) { 1419 case IP_TOS: 1420 inp->inp_ip_tos = optval; 1421 break; 1422 1423 case IP_TTL: 1424 inp->inp_ip_ttl = optval; 1425 break; 1426 #define OPTSET(bit) do { \ 1427 INP_LOCK(inp); \ 1428 if (optval) \ 1429 inp->inp_flags |= bit; \ 1430 else \ 1431 inp->inp_flags &= ~bit; \ 1432 INP_UNLOCK(inp); \ 1433 } while (0) 1434 1435 case IP_RECVOPTS: 1436 OPTSET(INP_RECVOPTS); 1437 break; 1438 1439 case IP_RECVRETOPTS: 1440 OPTSET(INP_RECVRETOPTS); 1441 break; 1442 1443 case IP_RECVDSTADDR: 1444 OPTSET(INP_RECVDSTADDR); 1445 break; 1446 1447 case IP_RECVTTL: 1448 OPTSET(INP_RECVTTL); 1449 break; 1450 1451 case IP_RECVIF: 1452 OPTSET(INP_RECVIF); 1453 break; 1454 1455 case IP_FAITH: 1456 OPTSET(INP_FAITH); 1457 break; 1458 1459 case IP_ONESBCAST: 1460 OPTSET(INP_ONESBCAST); 1461 break; 1462 } 1463 break; 1464 #undef OPTSET 1465 1466 case IP_MULTICAST_IF: 1467 case IP_MULTICAST_VIF: 1468 case IP_MULTICAST_TTL: 1469 case IP_MULTICAST_LOOP: 1470 case IP_ADD_MEMBERSHIP: 1471 case IP_DROP_MEMBERSHIP: 1472 error = ip_setmoptions(sopt, &inp->inp_moptions); 1473 break; 1474 1475 case IP_PORTRANGE: 1476 error = sooptcopyin(sopt, &optval, sizeof optval, 1477 sizeof optval); 1478 if (error) 1479 break; 1480 1481 INP_LOCK(inp); 1482 switch (optval) { 1483 case IP_PORTRANGE_DEFAULT: 1484 inp->inp_flags &= ~(INP_LOWPORT); 1485 inp->inp_flags &= ~(INP_HIGHPORT); 1486 break; 1487 1488 case IP_PORTRANGE_HIGH: 1489 inp->inp_flags &= ~(INP_LOWPORT); 1490 inp->inp_flags |= INP_HIGHPORT; 1491 break; 1492 1493 case IP_PORTRANGE_LOW: 1494 inp->inp_flags &= ~(INP_HIGHPORT); 1495 inp->inp_flags |= INP_LOWPORT; 1496 break; 1497 1498 default: 1499 error = EINVAL; 1500 break; 1501 } 1502 INP_UNLOCK(inp); 1503 break; 1504 1505 #if defined(IPSEC) || defined(FAST_IPSEC) 1506 case IP_IPSEC_POLICY: 1507 { 1508 caddr_t req; 1509 size_t len = 0; 1510 int priv; 1511 struct mbuf *m; 1512 int optname; 1513 1514 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 1515 break; 1516 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ 1517 break; 1518 priv = (sopt->sopt_td != NULL && 1519 suser(sopt->sopt_td) != 0) ? 0 : 1; 1520 req = mtod(m, caddr_t); 1521 len = m->m_len; 1522 optname = sopt->sopt_name; 1523 error = ipsec4_set_policy(inp, optname, req, len, priv); 1524 m_freem(m); 1525 break; 1526 } 1527 #endif /*IPSEC*/ 1528 1529 default: 1530 error = ENOPROTOOPT; 1531 break; 1532 } 1533 break; 1534 1535 case SOPT_GET: 1536 switch (sopt->sopt_name) { 1537 case IP_OPTIONS: 1538 case IP_RETOPTS: 1539 if (inp->inp_options) 1540 error = sooptcopyout(sopt, 1541 mtod(inp->inp_options, 1542 char *), 1543 inp->inp_options->m_len); 1544 else 1545 sopt->sopt_valsize = 0; 1546 break; 1547 1548 case IP_TOS: 1549 case IP_TTL: 1550 case IP_RECVOPTS: 1551 case IP_RECVRETOPTS: 1552 case IP_RECVDSTADDR: 1553 case IP_RECVTTL: 1554 case IP_RECVIF: 1555 case IP_PORTRANGE: 1556 case IP_FAITH: 1557 case IP_ONESBCAST: 1558 switch (sopt->sopt_name) { 1559 1560 case IP_TOS: 1561 optval = inp->inp_ip_tos; 1562 break; 1563 1564 case IP_TTL: 1565 optval = inp->inp_ip_ttl; 1566 break; 1567 1568 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1569 1570 case IP_RECVOPTS: 1571 optval = OPTBIT(INP_RECVOPTS); 1572 break; 1573 1574 case IP_RECVRETOPTS: 1575 optval = OPTBIT(INP_RECVRETOPTS); 1576 break; 1577 1578 case IP_RECVDSTADDR: 1579 optval = OPTBIT(INP_RECVDSTADDR); 1580 break; 1581 1582 case IP_RECVTTL: 1583 optval = OPTBIT(INP_RECVTTL); 1584 break; 1585 1586 case IP_RECVIF: 1587 optval = OPTBIT(INP_RECVIF); 1588 break; 1589 1590 case IP_PORTRANGE: 1591 if (inp->inp_flags & INP_HIGHPORT) 1592 optval = IP_PORTRANGE_HIGH; 1593 else if (inp->inp_flags & INP_LOWPORT) 1594 optval = IP_PORTRANGE_LOW; 1595 else 1596 optval = 0; 1597 break; 1598 1599 case IP_FAITH: 1600 optval = OPTBIT(INP_FAITH); 1601 break; 1602 1603 case IP_ONESBCAST: 1604 optval = OPTBIT(INP_ONESBCAST); 1605 break; 1606 } 1607 error = sooptcopyout(sopt, &optval, sizeof optval); 1608 break; 1609 1610 case IP_MULTICAST_IF: 1611 case IP_MULTICAST_VIF: 1612 case IP_MULTICAST_TTL: 1613 case IP_MULTICAST_LOOP: 1614 case IP_ADD_MEMBERSHIP: 1615 case IP_DROP_MEMBERSHIP: 1616 error = ip_getmoptions(sopt, inp->inp_moptions); 1617 break; 1618 1619 #if defined(IPSEC) || defined(FAST_IPSEC) 1620 case IP_IPSEC_POLICY: 1621 { 1622 struct mbuf *m = NULL; 1623 caddr_t req = NULL; 1624 size_t len = 0; 1625 1626 if (m != 0) { 1627 req = mtod(m, caddr_t); 1628 len = m->m_len; 1629 } 1630 error = ipsec4_get_policy(sotoinpcb(so), req, len, &m); 1631 if (error == 0) 1632 error = soopt_mcopyout(sopt, m); /* XXX */ 1633 if (error == 0) 1634 m_freem(m); 1635 break; 1636 } 1637 #endif /*IPSEC*/ 1638 1639 default: 1640 error = ENOPROTOOPT; 1641 break; 1642 } 1643 break; 1644 } 1645 return (error); 1646 } 1647 1648 /* 1649 * Set up IP options in pcb for insertion in output packets. 1650 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1651 * with destination address if source routed. 1652 */ 1653 static int 1654 ip_pcbopts(optname, pcbopt, m) 1655 int optname; 1656 struct mbuf **pcbopt; 1657 register struct mbuf *m; 1658 { 1659 register int cnt, optlen; 1660 register u_char *cp; 1661 u_char opt; 1662 1663 /* turn off any old options */ 1664 if (*pcbopt) 1665 (void)m_free(*pcbopt); 1666 *pcbopt = 0; 1667 if (m == (struct mbuf *)0 || m->m_len == 0) { 1668 /* 1669 * Only turning off any previous options. 1670 */ 1671 if (m) 1672 (void)m_free(m); 1673 return (0); 1674 } 1675 1676 if (m->m_len % sizeof(int32_t)) 1677 goto bad; 1678 /* 1679 * IP first-hop destination address will be stored before 1680 * actual options; move other options back 1681 * and clear it when none present. 1682 */ 1683 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1684 goto bad; 1685 cnt = m->m_len; 1686 m->m_len += sizeof(struct in_addr); 1687 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1688 bcopy(mtod(m, void *), cp, (unsigned)cnt); 1689 bzero(mtod(m, void *), sizeof(struct in_addr)); 1690 1691 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1692 opt = cp[IPOPT_OPTVAL]; 1693 if (opt == IPOPT_EOL) 1694 break; 1695 if (opt == IPOPT_NOP) 1696 optlen = 1; 1697 else { 1698 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1699 goto bad; 1700 optlen = cp[IPOPT_OLEN]; 1701 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1702 goto bad; 1703 } 1704 switch (opt) { 1705 1706 default: 1707 break; 1708 1709 case IPOPT_LSRR: 1710 case IPOPT_SSRR: 1711 /* 1712 * user process specifies route as: 1713 * ->A->B->C->D 1714 * D must be our final destination (but we can't 1715 * check that since we may not have connected yet). 1716 * A is first hop destination, which doesn't appear in 1717 * actual IP option, but is stored before the options. 1718 */ 1719 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1720 goto bad; 1721 m->m_len -= sizeof(struct in_addr); 1722 cnt -= sizeof(struct in_addr); 1723 optlen -= sizeof(struct in_addr); 1724 cp[IPOPT_OLEN] = optlen; 1725 /* 1726 * Move first hop before start of options. 1727 */ 1728 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), 1729 sizeof(struct in_addr)); 1730 /* 1731 * Then copy rest of options back 1732 * to close up the deleted entry. 1733 */ 1734 bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)), 1735 &cp[IPOPT_OFFSET+1], 1736 (unsigned)cnt - (IPOPT_MINOFF - 1)); 1737 break; 1738 } 1739 } 1740 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1741 goto bad; 1742 *pcbopt = m; 1743 return (0); 1744 1745 bad: 1746 (void)m_free(m); 1747 return (EINVAL); 1748 } 1749 1750 /* 1751 * XXX 1752 * The whole multicast option thing needs to be re-thought. 1753 * Several of these options are equally applicable to non-multicast 1754 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a 1755 * standard option (IP_TTL). 1756 */ 1757 1758 /* 1759 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. 1760 */ 1761 static struct ifnet * 1762 ip_multicast_if(a, ifindexp) 1763 struct in_addr *a; 1764 int *ifindexp; 1765 { 1766 int ifindex; 1767 struct ifnet *ifp; 1768 1769 if (ifindexp) 1770 *ifindexp = 0; 1771 if (ntohl(a->s_addr) >> 24 == 0) { 1772 ifindex = ntohl(a->s_addr) & 0xffffff; 1773 if (ifindex < 0 || if_index < ifindex) 1774 return NULL; 1775 ifp = ifnet_byindex(ifindex); 1776 if (ifindexp) 1777 *ifindexp = ifindex; 1778 } else { 1779 INADDR_TO_IFP(*a, ifp); 1780 } 1781 return ifp; 1782 } 1783 1784 /* 1785 * Set the IP multicast options in response to user setsockopt(). 1786 */ 1787 static int 1788 ip_setmoptions(sopt, imop) 1789 struct sockopt *sopt; 1790 struct ip_moptions **imop; 1791 { 1792 int error = 0; 1793 int i; 1794 struct in_addr addr; 1795 struct ip_mreq mreq; 1796 struct ifnet *ifp; 1797 struct ip_moptions *imo = *imop; 1798 struct route ro; 1799 struct sockaddr_in *dst; 1800 int ifindex; 1801 int s; 1802 1803 if (imo == NULL) { 1804 /* 1805 * No multicast option buffer attached to the pcb; 1806 * allocate one and initialize to default values. 1807 */ 1808 imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, 1809 M_WAITOK); 1810 1811 if (imo == NULL) 1812 return (ENOBUFS); 1813 *imop = imo; 1814 imo->imo_multicast_ifp = NULL; 1815 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1816 imo->imo_multicast_vif = -1; 1817 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1818 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1819 imo->imo_num_memberships = 0; 1820 } 1821 1822 switch (sopt->sopt_name) { 1823 /* store an index number for the vif you wanna use in the send */ 1824 case IP_MULTICAST_VIF: 1825 if (legal_vif_num == 0) { 1826 error = EOPNOTSUPP; 1827 break; 1828 } 1829 error = sooptcopyin(sopt, &i, sizeof i, sizeof i); 1830 if (error) 1831 break; 1832 if (!legal_vif_num(i) && (i != -1)) { 1833 error = EINVAL; 1834 break; 1835 } 1836 imo->imo_multicast_vif = i; 1837 break; 1838 1839 case IP_MULTICAST_IF: 1840 /* 1841 * Select the interface for outgoing multicast packets. 1842 */ 1843 error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr); 1844 if (error) 1845 break; 1846 /* 1847 * INADDR_ANY is used to remove a previous selection. 1848 * When no interface is selected, a default one is 1849 * chosen every time a multicast packet is sent. 1850 */ 1851 if (addr.s_addr == INADDR_ANY) { 1852 imo->imo_multicast_ifp = NULL; 1853 break; 1854 } 1855 /* 1856 * The selected interface is identified by its local 1857 * IP address. Find the interface and confirm that 1858 * it supports multicasting. 1859 */ 1860 s = splimp(); 1861 ifp = ip_multicast_if(&addr, &ifindex); 1862 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1863 splx(s); 1864 error = EADDRNOTAVAIL; 1865 break; 1866 } 1867 imo->imo_multicast_ifp = ifp; 1868 if (ifindex) 1869 imo->imo_multicast_addr = addr; 1870 else 1871 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1872 splx(s); 1873 break; 1874 1875 case IP_MULTICAST_TTL: 1876 /* 1877 * Set the IP time-to-live for outgoing multicast packets. 1878 * The original multicast API required a char argument, 1879 * which is inconsistent with the rest of the socket API. 1880 * We allow either a char or an int. 1881 */ 1882 if (sopt->sopt_valsize == 1) { 1883 u_char ttl; 1884 error = sooptcopyin(sopt, &ttl, 1, 1); 1885 if (error) 1886 break; 1887 imo->imo_multicast_ttl = ttl; 1888 } else { 1889 u_int ttl; 1890 error = sooptcopyin(sopt, &ttl, sizeof ttl, 1891 sizeof ttl); 1892 if (error) 1893 break; 1894 if (ttl > 255) 1895 error = EINVAL; 1896 else 1897 imo->imo_multicast_ttl = ttl; 1898 } 1899 break; 1900 1901 case IP_MULTICAST_LOOP: 1902 /* 1903 * Set the loopback flag for outgoing multicast packets. 1904 * Must be zero or one. The original multicast API required a 1905 * char argument, which is inconsistent with the rest 1906 * of the socket API. We allow either a char or an int. 1907 */ 1908 if (sopt->sopt_valsize == 1) { 1909 u_char loop; 1910 error = sooptcopyin(sopt, &loop, 1, 1); 1911 if (error) 1912 break; 1913 imo->imo_multicast_loop = !!loop; 1914 } else { 1915 u_int loop; 1916 error = sooptcopyin(sopt, &loop, sizeof loop, 1917 sizeof loop); 1918 if (error) 1919 break; 1920 imo->imo_multicast_loop = !!loop; 1921 } 1922 break; 1923 1924 case IP_ADD_MEMBERSHIP: 1925 /* 1926 * Add a multicast group membership. 1927 * Group must be a valid IP multicast address. 1928 */ 1929 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 1930 if (error) 1931 break; 1932 1933 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1934 error = EINVAL; 1935 break; 1936 } 1937 s = splimp(); 1938 /* 1939 * If no interface address was provided, use the interface of 1940 * the route to the given multicast address. 1941 */ 1942 if (mreq.imr_interface.s_addr == INADDR_ANY) { 1943 bzero((caddr_t)&ro, sizeof(ro)); 1944 dst = (struct sockaddr_in *)&ro.ro_dst; 1945 dst->sin_len = sizeof(*dst); 1946 dst->sin_family = AF_INET; 1947 dst->sin_addr = mreq.imr_multiaddr; 1948 rtalloc_ign(&ro, RTF_CLONING); 1949 if (ro.ro_rt == NULL) { 1950 error = EADDRNOTAVAIL; 1951 splx(s); 1952 break; 1953 } 1954 ifp = ro.ro_rt->rt_ifp; 1955 RTFREE(ro.ro_rt); 1956 } 1957 else { 1958 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1959 } 1960 1961 /* 1962 * See if we found an interface, and confirm that it 1963 * supports multicast. 1964 */ 1965 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1966 error = EADDRNOTAVAIL; 1967 splx(s); 1968 break; 1969 } 1970 /* 1971 * See if the membership already exists or if all the 1972 * membership slots are full. 1973 */ 1974 for (i = 0; i < imo->imo_num_memberships; ++i) { 1975 if (imo->imo_membership[i]->inm_ifp == ifp && 1976 imo->imo_membership[i]->inm_addr.s_addr 1977 == mreq.imr_multiaddr.s_addr) 1978 break; 1979 } 1980 if (i < imo->imo_num_memberships) { 1981 error = EADDRINUSE; 1982 splx(s); 1983 break; 1984 } 1985 if (i == IP_MAX_MEMBERSHIPS) { 1986 error = ETOOMANYREFS; 1987 splx(s); 1988 break; 1989 } 1990 /* 1991 * Everything looks good; add a new record to the multicast 1992 * address list for the given interface. 1993 */ 1994 if ((imo->imo_membership[i] = 1995 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) { 1996 error = ENOBUFS; 1997 splx(s); 1998 break; 1999 } 2000 ++imo->imo_num_memberships; 2001 splx(s); 2002 break; 2003 2004 case IP_DROP_MEMBERSHIP: 2005 /* 2006 * Drop a multicast group membership. 2007 * Group must be a valid IP multicast address. 2008 */ 2009 error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); 2010 if (error) 2011 break; 2012 2013 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 2014 error = EINVAL; 2015 break; 2016 } 2017 2018 s = splimp(); 2019 /* 2020 * If an interface address was specified, get a pointer 2021 * to its ifnet structure. 2022 */ 2023 if (mreq.imr_interface.s_addr == INADDR_ANY) 2024 ifp = NULL; 2025 else { 2026 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 2027 if (ifp == NULL) { 2028 error = EADDRNOTAVAIL; 2029 splx(s); 2030 break; 2031 } 2032 } 2033 /* 2034 * Find the membership in the membership array. 2035 */ 2036 for (i = 0; i < imo->imo_num_memberships; ++i) { 2037 if ((ifp == NULL || 2038 imo->imo_membership[i]->inm_ifp == ifp) && 2039 imo->imo_membership[i]->inm_addr.s_addr == 2040 mreq.imr_multiaddr.s_addr) 2041 break; 2042 } 2043 if (i == imo->imo_num_memberships) { 2044 error = EADDRNOTAVAIL; 2045 splx(s); 2046 break; 2047 } 2048 /* 2049 * Give up the multicast address record to which the 2050 * membership points. 2051 */ 2052 in_delmulti(imo->imo_membership[i]); 2053 /* 2054 * Remove the gap in the membership array. 2055 */ 2056 for (++i; i < imo->imo_num_memberships; ++i) 2057 imo->imo_membership[i-1] = imo->imo_membership[i]; 2058 --imo->imo_num_memberships; 2059 splx(s); 2060 break; 2061 2062 default: 2063 error = EOPNOTSUPP; 2064 break; 2065 } 2066 2067 /* 2068 * If all options have default values, no need to keep the mbuf. 2069 */ 2070 if (imo->imo_multicast_ifp == NULL && 2071 imo->imo_multicast_vif == -1 && 2072 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 2073 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 2074 imo->imo_num_memberships == 0) { 2075 free(*imop, M_IPMOPTS); 2076 *imop = NULL; 2077 } 2078 2079 return (error); 2080 } 2081 2082 /* 2083 * Return the IP multicast options in response to user getsockopt(). 2084 */ 2085 static int 2086 ip_getmoptions(sopt, imo) 2087 struct sockopt *sopt; 2088 register struct ip_moptions *imo; 2089 { 2090 struct in_addr addr; 2091 struct in_ifaddr *ia; 2092 int error, optval; 2093 u_char coptval; 2094 2095 error = 0; 2096 switch (sopt->sopt_name) { 2097 case IP_MULTICAST_VIF: 2098 if (imo != NULL) 2099 optval = imo->imo_multicast_vif; 2100 else 2101 optval = -1; 2102 error = sooptcopyout(sopt, &optval, sizeof optval); 2103 break; 2104 2105 case IP_MULTICAST_IF: 2106 if (imo == NULL || imo->imo_multicast_ifp == NULL) 2107 addr.s_addr = INADDR_ANY; 2108 else if (imo->imo_multicast_addr.s_addr) { 2109 /* return the value user has set */ 2110 addr = imo->imo_multicast_addr; 2111 } else { 2112 IFP_TO_IA(imo->imo_multicast_ifp, ia); 2113 addr.s_addr = (ia == NULL) ? INADDR_ANY 2114 : IA_SIN(ia)->sin_addr.s_addr; 2115 } 2116 error = sooptcopyout(sopt, &addr, sizeof addr); 2117 break; 2118 2119 case IP_MULTICAST_TTL: 2120 if (imo == 0) 2121 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 2122 else 2123 optval = coptval = imo->imo_multicast_ttl; 2124 if (sopt->sopt_valsize == 1) 2125 error = sooptcopyout(sopt, &coptval, 1); 2126 else 2127 error = sooptcopyout(sopt, &optval, sizeof optval); 2128 break; 2129 2130 case IP_MULTICAST_LOOP: 2131 if (imo == 0) 2132 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 2133 else 2134 optval = coptval = imo->imo_multicast_loop; 2135 if (sopt->sopt_valsize == 1) 2136 error = sooptcopyout(sopt, &coptval, 1); 2137 else 2138 error = sooptcopyout(sopt, &optval, sizeof optval); 2139 break; 2140 2141 default: 2142 error = ENOPROTOOPT; 2143 break; 2144 } 2145 return (error); 2146 } 2147 2148 /* 2149 * Discard the IP multicast options. 2150 */ 2151 void 2152 ip_freemoptions(imo) 2153 register struct ip_moptions *imo; 2154 { 2155 register int i; 2156 2157 if (imo != NULL) { 2158 for (i = 0; i < imo->imo_num_memberships; ++i) 2159 in_delmulti(imo->imo_membership[i]); 2160 free(imo, M_IPMOPTS); 2161 } 2162 } 2163 2164 /* 2165 * Routine called from ip_output() to loop back a copy of an IP multicast 2166 * packet to the input queue of a specified interface. Note that this 2167 * calls the output routine of the loopback "driver", but with an interface 2168 * pointer that might NOT be a loopback interface -- evil, but easier than 2169 * replicating that code here. 2170 */ 2171 static void 2172 ip_mloopback(ifp, m, dst, hlen) 2173 struct ifnet *ifp; 2174 register struct mbuf *m; 2175 register struct sockaddr_in *dst; 2176 int hlen; 2177 { 2178 register struct ip *ip; 2179 struct mbuf *copym; 2180 2181 copym = m_copy(m, 0, M_COPYALL); 2182 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) 2183 copym = m_pullup(copym, hlen); 2184 if (copym != NULL) { 2185 /* If needed, compute the checksum and mark it as valid. */ 2186 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 2187 in_delayed_cksum(copym); 2188 copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 2189 copym->m_pkthdr.csum_flags |= 2190 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 2191 copym->m_pkthdr.csum_data = 0xffff; 2192 } 2193 /* 2194 * We don't bother to fragment if the IP length is greater 2195 * than the interface's MTU. Can this possibly matter? 2196 */ 2197 ip = mtod(copym, struct ip *); 2198 ip->ip_len = htons(ip->ip_len); 2199 ip->ip_off = htons(ip->ip_off); 2200 ip->ip_sum = 0; 2201 ip->ip_sum = in_cksum(copym, hlen); 2202 /* 2203 * NB: 2204 * It's not clear whether there are any lingering 2205 * reentrancy problems in other areas which might 2206 * be exposed by using ip_input directly (in 2207 * particular, everything which modifies the packet 2208 * in-place). Yet another option is using the 2209 * protosw directly to deliver the looped back 2210 * packet. For the moment, we'll err on the side 2211 * of safety by using if_simloop(). 2212 */ 2213 #if 1 /* XXX */ 2214 if (dst->sin_family != AF_INET) { 2215 printf("ip_mloopback: bad address family %d\n", 2216 dst->sin_family); 2217 dst->sin_family = AF_INET; 2218 } 2219 #endif 2220 2221 #ifdef notdef 2222 copym->m_pkthdr.rcvif = ifp; 2223 ip_input(copym); 2224 #else 2225 if_simloop(ifp, copym, dst->sin_family, 0); 2226 #endif 2227 } 2228 } 2229