1 /*- 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_bootp.h" 36 #include "opt_ipfw.h" 37 #include "opt_ipstealth.h" 38 #include "opt_ipsec.h" 39 #include "opt_route.h" 40 #include "opt_rss.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/mbuf.h> 45 #include <sys/malloc.h> 46 #include <sys/domain.h> 47 #include <sys/protosw.h> 48 #include <sys/socket.h> 49 #include <sys/time.h> 50 #include <sys/kernel.h> 51 #include <sys/lock.h> 52 #include <sys/rmlock.h> 53 #include <sys/rwlock.h> 54 #include <sys/sdt.h> 55 #include <sys/syslog.h> 56 #include <sys/sysctl.h> 57 58 #include <net/pfil.h> 59 #include <net/if.h> 60 #include <net/if_types.h> 61 #include <net/if_var.h> 62 #include <net/if_dl.h> 63 #include <net/route.h> 64 #include <net/netisr.h> 65 #include <net/rss_config.h> 66 #include <net/vnet.h> 67 68 #include <netinet/in.h> 69 #include <netinet/in_kdtrace.h> 70 #include <netinet/in_systm.h> 71 #include <netinet/in_var.h> 72 #include <netinet/ip.h> 73 #include <netinet/in_pcb.h> 74 #include <netinet/ip_var.h> 75 #include <netinet/ip_fw.h> 76 #include <netinet/ip_icmp.h> 77 #include <netinet/ip_options.h> 78 #include <machine/in_cksum.h> 79 #include <netinet/ip_carp.h> 80 #ifdef IPSEC 81 #include <netinet/ip_ipsec.h> 82 #include <netipsec/ipsec.h> 83 #include <netipsec/key.h> 84 #endif /* IPSEC */ 85 #include <netinet/in_rss.h> 86 87 #include <sys/socketvar.h> 88 89 #include <security/mac/mac_framework.h> 90 91 #ifdef CTASSERT 92 CTASSERT(sizeof(struct ip) == 20); 93 #endif 94 95 /* IP reassembly functions are defined in ip_reass.c. */ 96 extern void ipreass_init(void); 97 extern void ipreass_drain(void); 98 extern void ipreass_slowtimo(void); 99 #ifdef VIMAGE 100 extern void ipreass_destroy(void); 101 #endif 102 103 struct rmlock in_ifaddr_lock; 104 RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock"); 105 106 VNET_DEFINE(int, rsvp_on); 107 108 VNET_DEFINE(int, ipforwarding); 109 SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW, 110 &VNET_NAME(ipforwarding), 0, 111 "Enable IP forwarding between interfaces"); 112 113 static VNET_DEFINE(int, ipsendredirects) = 1; /* XXX */ 114 #define V_ipsendredirects VNET(ipsendredirects) 115 SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW, 116 &VNET_NAME(ipsendredirects), 0, 117 "Enable sending IP redirects"); 118 119 /* 120 * XXX - Setting ip_checkinterface mostly implements the receive side of 121 * the Strong ES model described in RFC 1122, but since the routing table 122 * and transmit implementation do not implement the Strong ES model, 123 * setting this to 1 results in an odd hybrid. 124 * 125 * XXX - ip_checkinterface currently must be disabled if you use ipnat 126 * to translate the destination address to another local interface. 127 * 128 * XXX - ip_checkinterface must be disabled if you add IP aliases 129 * to the loopback interface instead of the interface where the 130 * packets for those addresses are received. 131 */ 132 static VNET_DEFINE(int, ip_checkinterface); 133 #define V_ip_checkinterface VNET(ip_checkinterface) 134 SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW, 135 &VNET_NAME(ip_checkinterface), 0, 136 "Verify packet arrives on correct interface"); 137 138 VNET_DEFINE(struct pfil_head, inet_pfil_hook); /* Packet filter hooks */ 139 140 static struct netisr_handler ip_nh = { 141 .nh_name = "ip", 142 .nh_handler = ip_input, 143 .nh_proto = NETISR_IP, 144 #ifdef RSS 145 .nh_m2cpuid = rss_soft_m2cpuid_v4, 146 .nh_policy = NETISR_POLICY_CPU, 147 .nh_dispatch = NETISR_DISPATCH_HYBRID, 148 #else 149 .nh_policy = NETISR_POLICY_FLOW, 150 #endif 151 }; 152 153 #ifdef RSS 154 /* 155 * Directly dispatched frames are currently assumed 156 * to have a flowid already calculated. 157 * 158 * It should likely have something that assert it 159 * actually has valid flow details. 160 */ 161 static struct netisr_handler ip_direct_nh = { 162 .nh_name = "ip_direct", 163 .nh_handler = ip_direct_input, 164 .nh_proto = NETISR_IP_DIRECT, 165 .nh_m2cpuid = rss_soft_m2cpuid_v4, 166 .nh_policy = NETISR_POLICY_CPU, 167 .nh_dispatch = NETISR_DISPATCH_HYBRID, 168 }; 169 #endif 170 171 extern struct domain inetdomain; 172 extern struct protosw inetsw[]; 173 u_char ip_protox[IPPROTO_MAX]; 174 VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */ 175 VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ 176 VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ 177 178 #ifdef IPCTL_DEFMTU 179 SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, 180 &ip_mtu, 0, "Default MTU"); 181 #endif 182 183 #ifdef IPSTEALTH 184 VNET_DEFINE(int, ipstealth); 185 SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW, 186 &VNET_NAME(ipstealth), 0, 187 "IP stealth mode, no TTL decrementation on forwarding"); 188 #endif 189 190 /* 191 * IP statistics are stored in the "array" of counter(9)s. 192 */ 193 VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat); 194 VNET_PCPUSTAT_SYSINIT(ipstat); 195 SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat, 196 "IP statistics (struct ipstat, netinet/ip_var.h)"); 197 198 #ifdef VIMAGE 199 VNET_PCPUSTAT_SYSUNINIT(ipstat); 200 #endif /* VIMAGE */ 201 202 /* 203 * Kernel module interface for updating ipstat. The argument is an index 204 * into ipstat treated as an array. 205 */ 206 void 207 kmod_ipstat_inc(int statnum) 208 { 209 210 counter_u64_add(VNET(ipstat)[statnum], 1); 211 } 212 213 void 214 kmod_ipstat_dec(int statnum) 215 { 216 217 counter_u64_add(VNET(ipstat)[statnum], -1); 218 } 219 220 static int 221 sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) 222 { 223 int error, qlimit; 224 225 netisr_getqlimit(&ip_nh, &qlimit); 226 error = sysctl_handle_int(oidp, &qlimit, 0, req); 227 if (error || !req->newptr) 228 return (error); 229 if (qlimit < 1) 230 return (EINVAL); 231 return (netisr_setqlimit(&ip_nh, qlimit)); 232 } 233 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, 234 CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I", 235 "Maximum size of the IP input queue"); 236 237 static int 238 sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS) 239 { 240 u_int64_t qdrops_long; 241 int error, qdrops; 242 243 netisr_getqdrops(&ip_nh, &qdrops_long); 244 qdrops = qdrops_long; 245 error = sysctl_handle_int(oidp, &qdrops, 0, req); 246 if (error || !req->newptr) 247 return (error); 248 if (qdrops != 0) 249 return (EINVAL); 250 netisr_clearqdrops(&ip_nh); 251 return (0); 252 } 253 254 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, 255 CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I", 256 "Number of packets dropped from the IP input queue"); 257 258 #ifdef RSS 259 static int 260 sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS) 261 { 262 int error, qlimit; 263 264 netisr_getqlimit(&ip_direct_nh, &qlimit); 265 error = sysctl_handle_int(oidp, &qlimit, 0, req); 266 if (error || !req->newptr) 267 return (error); 268 if (qlimit < 1) 269 return (EINVAL); 270 return (netisr_setqlimit(&ip_direct_nh, qlimit)); 271 } 272 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_direct_queue_maxlen, 273 CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, "I", 274 "Maximum size of the IP direct input queue"); 275 276 static int 277 sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS) 278 { 279 u_int64_t qdrops_long; 280 int error, qdrops; 281 282 netisr_getqdrops(&ip_direct_nh, &qdrops_long); 283 qdrops = qdrops_long; 284 error = sysctl_handle_int(oidp, &qdrops, 0, req); 285 if (error || !req->newptr) 286 return (error); 287 if (qdrops != 0) 288 return (EINVAL); 289 netisr_clearqdrops(&ip_direct_nh); 290 return (0); 291 } 292 293 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_direct_queue_drops, 294 CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I", 295 "Number of packets dropped from the IP direct input queue"); 296 #endif /* RSS */ 297 298 /* 299 * IP initialization: fill in IP protocol switch table. 300 * All protocols not implemented in kernel go to raw IP protocol handler. 301 */ 302 void 303 ip_init(void) 304 { 305 struct protosw *pr; 306 int i; 307 308 TAILQ_INIT(&V_in_ifaddrhead); 309 V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask); 310 311 /* Initialize IP reassembly queue. */ 312 ipreass_init(); 313 314 /* Initialize packet filter hooks. */ 315 V_inet_pfil_hook.ph_type = PFIL_TYPE_AF; 316 V_inet_pfil_hook.ph_af = AF_INET; 317 if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0) 318 printf("%s: WARNING: unable to register pfil hook, " 319 "error %d\n", __func__, i); 320 321 /* Skip initialization of globals for non-default instances. */ 322 if (!IS_DEFAULT_VNET(curvnet)) 323 return; 324 325 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 326 if (pr == NULL) 327 panic("ip_init: PF_INET not found"); 328 329 /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ 330 for (i = 0; i < IPPROTO_MAX; i++) 331 ip_protox[i] = pr - inetsw; 332 /* 333 * Cycle through IP protocols and put them into the appropriate place 334 * in ip_protox[]. 335 */ 336 for (pr = inetdomain.dom_protosw; 337 pr < inetdomain.dom_protoswNPROTOSW; pr++) 338 if (pr->pr_domain->dom_family == PF_INET && 339 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 340 /* Be careful to only index valid IP protocols. */ 341 if (pr->pr_protocol < IPPROTO_MAX) 342 ip_protox[pr->pr_protocol] = pr - inetsw; 343 } 344 345 netisr_register(&ip_nh); 346 #ifdef RSS 347 netisr_register(&ip_direct_nh); 348 #endif 349 } 350 351 #ifdef VIMAGE 352 void 353 ip_destroy(void) 354 { 355 int i; 356 357 if ((i = pfil_head_unregister(&V_inet_pfil_hook)) != 0) 358 printf("%s: WARNING: unable to unregister pfil hook, " 359 "error %d\n", __func__, i); 360 361 /* Cleanup in_ifaddr hash table; should be empty. */ 362 hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask); 363 364 /* Destroy IP reassembly queue. */ 365 ipreass_destroy(); 366 } 367 #endif 368 369 #ifdef RSS 370 /* 371 * IP direct input routine. 372 * 373 * This is called when reinjecting completed fragments where 374 * all of the previous checking and book-keeping has been done. 375 */ 376 void 377 ip_direct_input(struct mbuf *m) 378 { 379 struct ip *ip; 380 int hlen; 381 382 ip = mtod(m, struct ip *); 383 hlen = ip->ip_hl << 2; 384 385 IPSTAT_INC(ips_delivered); 386 (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 387 return; 388 } 389 #endif 390 391 /* 392 * Ip input routine. Checksum and byte swap header. If fragmented 393 * try to reassemble. Process options. Pass to next level. 394 */ 395 void 396 ip_input(struct mbuf *m) 397 { 398 struct ip *ip = NULL; 399 struct in_ifaddr *ia = NULL; 400 struct ifaddr *ifa; 401 struct ifnet *ifp; 402 int checkif, hlen = 0; 403 uint16_t sum, ip_len; 404 int dchg = 0; /* dest changed after fw */ 405 struct in_addr odst; /* original dst address */ 406 407 M_ASSERTPKTHDR(m); 408 409 if (m->m_flags & M_FASTFWD_OURS) { 410 m->m_flags &= ~M_FASTFWD_OURS; 411 /* Set up some basics that will be used later. */ 412 ip = mtod(m, struct ip *); 413 hlen = ip->ip_hl << 2; 414 ip_len = ntohs(ip->ip_len); 415 goto ours; 416 } 417 418 IPSTAT_INC(ips_total); 419 420 if (m->m_pkthdr.len < sizeof(struct ip)) 421 goto tooshort; 422 423 if (m->m_len < sizeof (struct ip) && 424 (m = m_pullup(m, sizeof (struct ip))) == NULL) { 425 IPSTAT_INC(ips_toosmall); 426 return; 427 } 428 ip = mtod(m, struct ip *); 429 430 if (ip->ip_v != IPVERSION) { 431 IPSTAT_INC(ips_badvers); 432 goto bad; 433 } 434 435 hlen = ip->ip_hl << 2; 436 if (hlen < sizeof(struct ip)) { /* minimum header length */ 437 IPSTAT_INC(ips_badhlen); 438 goto bad; 439 } 440 if (hlen > m->m_len) { 441 if ((m = m_pullup(m, hlen)) == NULL) { 442 IPSTAT_INC(ips_badhlen); 443 return; 444 } 445 ip = mtod(m, struct ip *); 446 } 447 448 IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL); 449 450 /* 127/8 must not appear on wire - RFC1122 */ 451 ifp = m->m_pkthdr.rcvif; 452 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 453 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 454 if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 455 IPSTAT_INC(ips_badaddr); 456 goto bad; 457 } 458 } 459 460 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { 461 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 462 } else { 463 if (hlen == sizeof(struct ip)) { 464 sum = in_cksum_hdr(ip); 465 } else { 466 sum = in_cksum(m, hlen); 467 } 468 } 469 if (sum) { 470 IPSTAT_INC(ips_badsum); 471 goto bad; 472 } 473 474 #ifdef ALTQ 475 if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) 476 /* packet is dropped by traffic conditioner */ 477 return; 478 #endif 479 480 ip_len = ntohs(ip->ip_len); 481 if (ip_len < hlen) { 482 IPSTAT_INC(ips_badlen); 483 goto bad; 484 } 485 486 /* 487 * Check that the amount of data in the buffers 488 * is as at least much as the IP header would have us expect. 489 * Trim mbufs if longer than we expect. 490 * Drop packet if shorter than we expect. 491 */ 492 if (m->m_pkthdr.len < ip_len) { 493 tooshort: 494 IPSTAT_INC(ips_tooshort); 495 goto bad; 496 } 497 if (m->m_pkthdr.len > ip_len) { 498 if (m->m_len == m->m_pkthdr.len) { 499 m->m_len = ip_len; 500 m->m_pkthdr.len = ip_len; 501 } else 502 m_adj(m, ip_len - m->m_pkthdr.len); 503 } 504 505 /* Try to forward the packet, but if we fail continue */ 506 #ifdef IPSEC 507 /* For now we do not handle IPSEC in tryforward. */ 508 if (!key_havesp(IPSEC_DIR_INBOUND) && !key_havesp(IPSEC_DIR_OUTBOUND) && 509 (V_ipforwarding == 1)) 510 if (ip_tryforward(m) == NULL) 511 return; 512 /* 513 * Bypass packet filtering for packets previously handled by IPsec. 514 */ 515 if (ip_ipsec_filtertunnel(m)) 516 goto passin; 517 #else 518 if (V_ipforwarding == 1) 519 if (ip_tryforward(m) == NULL) 520 return; 521 #endif /* IPSEC */ 522 523 /* 524 * Run through list of hooks for input packets. 525 * 526 * NB: Beware of the destination address changing (e.g. 527 * by NAT rewriting). When this happens, tell 528 * ip_forward to do the right thing. 529 */ 530 531 /* Jump over all PFIL processing if hooks are not active. */ 532 if (!PFIL_HOOKED(&V_inet_pfil_hook)) 533 goto passin; 534 535 odst = ip->ip_dst; 536 if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0) 537 return; 538 if (m == NULL) /* consumed by filter */ 539 return; 540 541 ip = mtod(m, struct ip *); 542 dchg = (odst.s_addr != ip->ip_dst.s_addr); 543 ifp = m->m_pkthdr.rcvif; 544 545 if (m->m_flags & M_FASTFWD_OURS) { 546 m->m_flags &= ~M_FASTFWD_OURS; 547 goto ours; 548 } 549 if (m->m_flags & M_IP_NEXTHOP) { 550 dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL); 551 if (dchg != 0) { 552 /* 553 * Directly ship the packet on. This allows 554 * forwarding packets originally destined to us 555 * to some other directly connected host. 556 */ 557 ip_forward(m, 1); 558 return; 559 } 560 } 561 passin: 562 563 /* 564 * Process options and, if not destined for us, 565 * ship it on. ip_dooptions returns 1 when an 566 * error was detected (causing an icmp message 567 * to be sent and the original packet to be freed). 568 */ 569 if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) 570 return; 571 572 /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no 573 * matter if it is destined to another node, or whether it is 574 * a multicast one, RSVP wants it! and prevents it from being forwarded 575 * anywhere else. Also checks if the rsvp daemon is running before 576 * grabbing the packet. 577 */ 578 if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) 579 goto ours; 580 581 /* 582 * Check our list of addresses, to see if the packet is for us. 583 * If we don't have any addresses, assume any unicast packet 584 * we receive might be for us (and let the upper layers deal 585 * with it). 586 */ 587 if (TAILQ_EMPTY(&V_in_ifaddrhead) && 588 (m->m_flags & (M_MCAST|M_BCAST)) == 0) 589 goto ours; 590 591 /* 592 * Enable a consistency check between the destination address 593 * and the arrival interface for a unicast packet (the RFC 1122 594 * strong ES model) if IP forwarding is disabled and the packet 595 * is not locally generated and the packet is not subject to 596 * 'ipfw fwd'. 597 * 598 * XXX - Checking also should be disabled if the destination 599 * address is ipnat'ed to a different interface. 600 * 601 * XXX - Checking is incompatible with IP aliases added 602 * to the loopback interface instead of the interface where 603 * the packets are received. 604 * 605 * XXX - This is the case for carp vhost IPs as well so we 606 * insert a workaround. If the packet got here, we already 607 * checked with carp_iamatch() and carp_forus(). 608 */ 609 checkif = V_ip_checkinterface && (V_ipforwarding == 0) && 610 ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) && 611 ifp->if_carp == NULL && (dchg == 0); 612 613 /* 614 * Check for exact addresses in the hash bucket. 615 */ 616 /* IN_IFADDR_RLOCK(); */ 617 LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 618 /* 619 * If the address matches, verify that the packet 620 * arrived via the correct interface if checking is 621 * enabled. 622 */ 623 if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 624 (!checkif || ia->ia_ifp == ifp)) { 625 counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 626 counter_u64_add(ia->ia_ifa.ifa_ibytes, 627 m->m_pkthdr.len); 628 /* IN_IFADDR_RUNLOCK(); */ 629 goto ours; 630 } 631 } 632 /* IN_IFADDR_RUNLOCK(); */ 633 634 /* 635 * Check for broadcast addresses. 636 * 637 * Only accept broadcast packets that arrive via the matching 638 * interface. Reception of forwarded directed broadcasts would 639 * be handled via ip_forward() and ether_output() with the loopback 640 * into the stack for SIMPLEX interfaces handled by ether_output(). 641 */ 642 if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { 643 IF_ADDR_RLOCK(ifp); 644 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 645 if (ifa->ifa_addr->sa_family != AF_INET) 646 continue; 647 ia = ifatoia(ifa); 648 if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 649 ip->ip_dst.s_addr) { 650 counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 651 counter_u64_add(ia->ia_ifa.ifa_ibytes, 652 m->m_pkthdr.len); 653 IF_ADDR_RUNLOCK(ifp); 654 goto ours; 655 } 656 #ifdef BOOTP_COMPAT 657 if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { 658 counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 659 counter_u64_add(ia->ia_ifa.ifa_ibytes, 660 m->m_pkthdr.len); 661 IF_ADDR_RUNLOCK(ifp); 662 goto ours; 663 } 664 #endif 665 } 666 IF_ADDR_RUNLOCK(ifp); 667 ia = NULL; 668 } 669 /* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */ 670 if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) { 671 IPSTAT_INC(ips_cantforward); 672 m_freem(m); 673 return; 674 } 675 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 676 if (V_ip_mrouter) { 677 /* 678 * If we are acting as a multicast router, all 679 * incoming multicast packets are passed to the 680 * kernel-level multicast forwarding function. 681 * The packet is returned (relatively) intact; if 682 * ip_mforward() returns a non-zero value, the packet 683 * must be discarded, else it may be accepted below. 684 */ 685 if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) { 686 IPSTAT_INC(ips_cantforward); 687 m_freem(m); 688 return; 689 } 690 691 /* 692 * The process-level routing daemon needs to receive 693 * all multicast IGMP packets, whether or not this 694 * host belongs to their destination groups. 695 */ 696 if (ip->ip_p == IPPROTO_IGMP) 697 goto ours; 698 IPSTAT_INC(ips_forward); 699 } 700 /* 701 * Assume the packet is for us, to avoid prematurely taking 702 * a lock on the in_multi hash. Protocols must perform 703 * their own filtering and update statistics accordingly. 704 */ 705 goto ours; 706 } 707 if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) 708 goto ours; 709 if (ip->ip_dst.s_addr == INADDR_ANY) 710 goto ours; 711 712 /* 713 * Not for us; forward if possible and desirable. 714 */ 715 if (V_ipforwarding == 0) { 716 IPSTAT_INC(ips_cantforward); 717 m_freem(m); 718 } else { 719 ip_forward(m, dchg); 720 } 721 return; 722 723 ours: 724 #ifdef IPSTEALTH 725 /* 726 * IPSTEALTH: Process non-routing options only 727 * if the packet is destined for us. 728 */ 729 if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) 730 return; 731 #endif /* IPSTEALTH */ 732 733 /* 734 * Attempt reassembly; if it succeeds, proceed. 735 * ip_reass() will return a different mbuf. 736 */ 737 if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { 738 /* XXXGL: shouldn't we save & set m_flags? */ 739 m = ip_reass(m); 740 if (m == NULL) 741 return; 742 ip = mtod(m, struct ip *); 743 /* Get the header length of the reassembled packet */ 744 hlen = ip->ip_hl << 2; 745 } 746 747 #ifdef IPSEC 748 /* 749 * enforce IPsec policy checking if we are seeing last header. 750 * note that we do not visit this with protocols with pcb layer 751 * code - like udp/tcp/raw ip. 752 */ 753 if (ip_ipsec_input(m, ip->ip_p) != 0) 754 goto bad; 755 #endif /* IPSEC */ 756 757 /* 758 * Switch out to protocol's input routine. 759 */ 760 IPSTAT_INC(ips_delivered); 761 762 (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 763 return; 764 bad: 765 m_freem(m); 766 } 767 768 /* 769 * IP timer processing; 770 * if a timer expires on a reassembly 771 * queue, discard it. 772 */ 773 void 774 ip_slowtimo(void) 775 { 776 VNET_ITERATOR_DECL(vnet_iter); 777 778 VNET_LIST_RLOCK_NOSLEEP(); 779 VNET_FOREACH(vnet_iter) { 780 CURVNET_SET(vnet_iter); 781 ipreass_slowtimo(); 782 CURVNET_RESTORE(); 783 } 784 VNET_LIST_RUNLOCK_NOSLEEP(); 785 } 786 787 void 788 ip_drain(void) 789 { 790 VNET_ITERATOR_DECL(vnet_iter); 791 792 VNET_LIST_RLOCK_NOSLEEP(); 793 VNET_FOREACH(vnet_iter) { 794 CURVNET_SET(vnet_iter); 795 ipreass_drain(); 796 CURVNET_RESTORE(); 797 } 798 VNET_LIST_RUNLOCK_NOSLEEP(); 799 } 800 801 /* 802 * The protocol to be inserted into ip_protox[] must be already registered 803 * in inetsw[], either statically or through pf_proto_register(). 804 */ 805 int 806 ipproto_register(short ipproto) 807 { 808 struct protosw *pr; 809 810 /* Sanity checks. */ 811 if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 812 return (EPROTONOSUPPORT); 813 814 /* 815 * The protocol slot must not be occupied by another protocol 816 * already. An index pointing to IPPROTO_RAW is unused. 817 */ 818 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 819 if (pr == NULL) 820 return (EPFNOSUPPORT); 821 if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ 822 return (EEXIST); 823 824 /* Find the protocol position in inetsw[] and set the index. */ 825 for (pr = inetdomain.dom_protosw; 826 pr < inetdomain.dom_protoswNPROTOSW; pr++) { 827 if (pr->pr_domain->dom_family == PF_INET && 828 pr->pr_protocol && pr->pr_protocol == ipproto) { 829 ip_protox[pr->pr_protocol] = pr - inetsw; 830 return (0); 831 } 832 } 833 return (EPROTONOSUPPORT); 834 } 835 836 int 837 ipproto_unregister(short ipproto) 838 { 839 struct protosw *pr; 840 841 /* Sanity checks. */ 842 if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 843 return (EPROTONOSUPPORT); 844 845 /* Check if the protocol was indeed registered. */ 846 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 847 if (pr == NULL) 848 return (EPFNOSUPPORT); 849 if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ 850 return (ENOENT); 851 852 /* Reset the protocol slot to IPPROTO_RAW. */ 853 ip_protox[ipproto] = pr - inetsw; 854 return (0); 855 } 856 857 /* 858 * Given address of next destination (final or next hop), return (referenced) 859 * internet address info of interface to be used to get there. 860 */ 861 struct in_ifaddr * 862 ip_rtaddr(struct in_addr dst, u_int fibnum) 863 { 864 struct route sro; 865 struct sockaddr_in *sin; 866 struct in_ifaddr *ia; 867 868 bzero(&sro, sizeof(sro)); 869 sin = (struct sockaddr_in *)&sro.ro_dst; 870 sin->sin_family = AF_INET; 871 sin->sin_len = sizeof(*sin); 872 sin->sin_addr = dst; 873 in_rtalloc_ign(&sro, 0, fibnum); 874 875 if (sro.ro_rt == NULL) 876 return (NULL); 877 878 ia = ifatoia(sro.ro_rt->rt_ifa); 879 ifa_ref(&ia->ia_ifa); 880 RTFREE(sro.ro_rt); 881 return (ia); 882 } 883 884 u_char inetctlerrmap[PRC_NCMDS] = { 885 0, 0, 0, 0, 886 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 887 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 888 EMSGSIZE, EHOSTUNREACH, 0, 0, 889 0, 0, EHOSTUNREACH, 0, 890 ENOPROTOOPT, ECONNREFUSED 891 }; 892 893 /* 894 * Forward a packet. If some error occurs return the sender 895 * an icmp packet. Note we can't always generate a meaningful 896 * icmp message because icmp doesn't have a large enough repertoire 897 * of codes and types. 898 * 899 * If not forwarding, just drop the packet. This could be confusing 900 * if ipforwarding was zero but some routing protocol was advancing 901 * us as a gateway to somewhere. However, we must let the routing 902 * protocol deal with that. 903 * 904 * The srcrt parameter indicates whether the packet is being forwarded 905 * via a source route. 906 */ 907 void 908 ip_forward(struct mbuf *m, int srcrt) 909 { 910 struct ip *ip = mtod(m, struct ip *); 911 struct in_ifaddr *ia; 912 struct mbuf *mcopy; 913 struct sockaddr_in *sin; 914 struct in_addr dest; 915 struct route ro; 916 int error, type = 0, code = 0, mtu = 0; 917 918 if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 919 IPSTAT_INC(ips_cantforward); 920 m_freem(m); 921 return; 922 } 923 #ifdef IPSEC 924 if (ip_ipsec_fwd(m) != 0) { 925 IPSTAT_INC(ips_cantforward); 926 m_freem(m); 927 return; 928 } 929 #endif /* IPSEC */ 930 #ifdef IPSTEALTH 931 if (!V_ipstealth) { 932 #endif 933 if (ip->ip_ttl <= IPTTLDEC) { 934 icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 935 0, 0); 936 return; 937 } 938 #ifdef IPSTEALTH 939 } 940 #endif 941 942 bzero(&ro, sizeof(ro)); 943 sin = (struct sockaddr_in *)&ro.ro_dst; 944 sin->sin_family = AF_INET; 945 sin->sin_len = sizeof(*sin); 946 sin->sin_addr = ip->ip_dst; 947 #ifdef RADIX_MPATH 948 rtalloc_mpath_fib(&ro, 949 ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), 950 M_GETFIB(m)); 951 #else 952 in_rtalloc_ign(&ro, 0, M_GETFIB(m)); 953 #endif 954 if (ro.ro_rt != NULL) { 955 ia = ifatoia(ro.ro_rt->rt_ifa); 956 ifa_ref(&ia->ia_ifa); 957 } else 958 ia = NULL; 959 #ifndef IPSEC 960 /* 961 * 'ia' may be NULL if there is no route for this destination. 962 * In case of IPsec, Don't discard it just yet, but pass it to 963 * ip_output in case of outgoing IPsec policy. 964 */ 965 if (!srcrt && ia == NULL) { 966 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); 967 RO_RTFREE(&ro); 968 return; 969 } 970 #endif 971 972 /* 973 * Save the IP header and at most 8 bytes of the payload, 974 * in case we need to generate an ICMP message to the src. 975 * 976 * XXX this can be optimized a lot by saving the data in a local 977 * buffer on the stack (72 bytes at most), and only allocating the 978 * mbuf if really necessary. The vast majority of the packets 979 * are forwarded without having to send an ICMP back (either 980 * because unnecessary, or because rate limited), so we are 981 * really we are wasting a lot of work here. 982 * 983 * We don't use m_copy() because it might return a reference 984 * to a shared cluster. Both this function and ip_output() 985 * assume exclusive access to the IP header in `m', so any 986 * data in a cluster may change before we reach icmp_error(). 987 */ 988 mcopy = m_gethdr(M_NOWAIT, m->m_type); 989 if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) { 990 /* 991 * It's probably ok if the pkthdr dup fails (because 992 * the deep copy of the tag chain failed), but for now 993 * be conservative and just discard the copy since 994 * code below may some day want the tags. 995 */ 996 m_free(mcopy); 997 mcopy = NULL; 998 } 999 if (mcopy != NULL) { 1000 mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy)); 1001 mcopy->m_pkthdr.len = mcopy->m_len; 1002 m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); 1003 } 1004 1005 #ifdef IPSTEALTH 1006 if (!V_ipstealth) { 1007 #endif 1008 ip->ip_ttl -= IPTTLDEC; 1009 #ifdef IPSTEALTH 1010 } 1011 #endif 1012 1013 /* 1014 * If forwarding packet using same interface that it came in on, 1015 * perhaps should send a redirect to sender to shortcut a hop. 1016 * Only send redirect if source is sending directly to us, 1017 * and if packet was not source routed (or has any options). 1018 * Also, don't send redirect if forwarding using a default route 1019 * or a route modified by a redirect. 1020 */ 1021 dest.s_addr = 0; 1022 if (!srcrt && V_ipsendredirects && 1023 ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) { 1024 struct rtentry *rt; 1025 1026 rt = ro.ro_rt; 1027 1028 if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && 1029 satosin(rt_key(rt))->sin_addr.s_addr != 0) { 1030 #define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) 1031 u_long src = ntohl(ip->ip_src.s_addr); 1032 1033 if (RTA(rt) && 1034 (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { 1035 if (rt->rt_flags & RTF_GATEWAY) 1036 dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr; 1037 else 1038 dest.s_addr = ip->ip_dst.s_addr; 1039 /* Router requirements says to only send host redirects */ 1040 type = ICMP_REDIRECT; 1041 code = ICMP_REDIRECT_HOST; 1042 } 1043 } 1044 } 1045 1046 error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL); 1047 1048 if (error == EMSGSIZE && ro.ro_rt) 1049 mtu = ro.ro_rt->rt_mtu; 1050 RO_RTFREE(&ro); 1051 1052 if (error) 1053 IPSTAT_INC(ips_cantforward); 1054 else { 1055 IPSTAT_INC(ips_forward); 1056 if (type) 1057 IPSTAT_INC(ips_redirectsent); 1058 else { 1059 if (mcopy) 1060 m_freem(mcopy); 1061 if (ia != NULL) 1062 ifa_free(&ia->ia_ifa); 1063 return; 1064 } 1065 } 1066 if (mcopy == NULL) { 1067 if (ia != NULL) 1068 ifa_free(&ia->ia_ifa); 1069 return; 1070 } 1071 1072 switch (error) { 1073 1074 case 0: /* forwarded, but need redirect */ 1075 /* type, code set above */ 1076 break; 1077 1078 case ENETUNREACH: 1079 case EHOSTUNREACH: 1080 case ENETDOWN: 1081 case EHOSTDOWN: 1082 default: 1083 type = ICMP_UNREACH; 1084 code = ICMP_UNREACH_HOST; 1085 break; 1086 1087 case EMSGSIZE: 1088 type = ICMP_UNREACH; 1089 code = ICMP_UNREACH_NEEDFRAG; 1090 1091 #ifdef IPSEC 1092 /* 1093 * If IPsec is configured for this path, 1094 * override any possibly mtu value set by ip_output. 1095 */ 1096 mtu = ip_ipsec_mtu(mcopy, mtu); 1097 #endif /* IPSEC */ 1098 /* 1099 * If the MTU was set before make sure we are below the 1100 * interface MTU. 1101 * If the MTU wasn't set before use the interface mtu or 1102 * fall back to the next smaller mtu step compared to the 1103 * current packet size. 1104 */ 1105 if (mtu != 0) { 1106 if (ia != NULL) 1107 mtu = min(mtu, ia->ia_ifp->if_mtu); 1108 } else { 1109 if (ia != NULL) 1110 mtu = ia->ia_ifp->if_mtu; 1111 else 1112 mtu = ip_next_mtu(ntohs(ip->ip_len), 0); 1113 } 1114 IPSTAT_INC(ips_cantfrag); 1115 break; 1116 1117 case ENOBUFS: 1118 case EACCES: /* ipfw denied packet */ 1119 m_freem(mcopy); 1120 if (ia != NULL) 1121 ifa_free(&ia->ia_ifa); 1122 return; 1123 } 1124 if (ia != NULL) 1125 ifa_free(&ia->ia_ifa); 1126 icmp_error(mcopy, type, code, dest.s_addr, mtu); 1127 } 1128 1129 void 1130 ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, 1131 struct mbuf *m) 1132 { 1133 1134 if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) { 1135 struct bintime bt; 1136 1137 bintime(&bt); 1138 if (inp->inp_socket->so_options & SO_BINTIME) { 1139 *mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt), 1140 SCM_BINTIME, SOL_SOCKET); 1141 if (*mp) 1142 mp = &(*mp)->m_next; 1143 } 1144 if (inp->inp_socket->so_options & SO_TIMESTAMP) { 1145 struct timeval tv; 1146 1147 bintime2timeval(&bt, &tv); 1148 *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), 1149 SCM_TIMESTAMP, SOL_SOCKET); 1150 if (*mp) 1151 mp = &(*mp)->m_next; 1152 } 1153 } 1154 if (inp->inp_flags & INP_RECVDSTADDR) { 1155 *mp = sbcreatecontrol((caddr_t)&ip->ip_dst, 1156 sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); 1157 if (*mp) 1158 mp = &(*mp)->m_next; 1159 } 1160 if (inp->inp_flags & INP_RECVTTL) { 1161 *mp = sbcreatecontrol((caddr_t)&ip->ip_ttl, 1162 sizeof(u_char), IP_RECVTTL, IPPROTO_IP); 1163 if (*mp) 1164 mp = &(*mp)->m_next; 1165 } 1166 #ifdef notyet 1167 /* XXX 1168 * Moving these out of udp_input() made them even more broken 1169 * than they already were. 1170 */ 1171 /* options were tossed already */ 1172 if (inp->inp_flags & INP_RECVOPTS) { 1173 *mp = sbcreatecontrol((caddr_t)opts_deleted_above, 1174 sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); 1175 if (*mp) 1176 mp = &(*mp)->m_next; 1177 } 1178 /* ip_srcroute doesn't do what we want here, need to fix */ 1179 if (inp->inp_flags & INP_RECVRETOPTS) { 1180 *mp = sbcreatecontrol((caddr_t)ip_srcroute(m), 1181 sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); 1182 if (*mp) 1183 mp = &(*mp)->m_next; 1184 } 1185 #endif 1186 if (inp->inp_flags & INP_RECVIF) { 1187 struct ifnet *ifp; 1188 struct sdlbuf { 1189 struct sockaddr_dl sdl; 1190 u_char pad[32]; 1191 } sdlbuf; 1192 struct sockaddr_dl *sdp; 1193 struct sockaddr_dl *sdl2 = &sdlbuf.sdl; 1194 1195 if ((ifp = m->m_pkthdr.rcvif) && 1196 ifp->if_index && ifp->if_index <= V_if_index) { 1197 sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; 1198 /* 1199 * Change our mind and don't try copy. 1200 */ 1201 if (sdp->sdl_family != AF_LINK || 1202 sdp->sdl_len > sizeof(sdlbuf)) { 1203 goto makedummy; 1204 } 1205 bcopy(sdp, sdl2, sdp->sdl_len); 1206 } else { 1207 makedummy: 1208 sdl2->sdl_len = 1209 offsetof(struct sockaddr_dl, sdl_data[0]); 1210 sdl2->sdl_family = AF_LINK; 1211 sdl2->sdl_index = 0; 1212 sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; 1213 } 1214 *mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len, 1215 IP_RECVIF, IPPROTO_IP); 1216 if (*mp) 1217 mp = &(*mp)->m_next; 1218 } 1219 if (inp->inp_flags & INP_RECVTOS) { 1220 *mp = sbcreatecontrol((caddr_t)&ip->ip_tos, 1221 sizeof(u_char), IP_RECVTOS, IPPROTO_IP); 1222 if (*mp) 1223 mp = &(*mp)->m_next; 1224 } 1225 1226 if (inp->inp_flags2 & INP_RECVFLOWID) { 1227 uint32_t flowid, flow_type; 1228 1229 flowid = m->m_pkthdr.flowid; 1230 flow_type = M_HASHTYPE_GET(m); 1231 1232 /* 1233 * XXX should handle the failure of one or the 1234 * other - don't populate both? 1235 */ 1236 *mp = sbcreatecontrol((caddr_t) &flowid, 1237 sizeof(uint32_t), IP_FLOWID, IPPROTO_IP); 1238 if (*mp) 1239 mp = &(*mp)->m_next; 1240 *mp = sbcreatecontrol((caddr_t) &flow_type, 1241 sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP); 1242 if (*mp) 1243 mp = &(*mp)->m_next; 1244 } 1245 1246 #ifdef RSS 1247 if (inp->inp_flags2 & INP_RECVRSSBUCKETID) { 1248 uint32_t flowid, flow_type; 1249 uint32_t rss_bucketid; 1250 1251 flowid = m->m_pkthdr.flowid; 1252 flow_type = M_HASHTYPE_GET(m); 1253 1254 if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) { 1255 *mp = sbcreatecontrol((caddr_t) &rss_bucketid, 1256 sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP); 1257 if (*mp) 1258 mp = &(*mp)->m_next; 1259 } 1260 } 1261 #endif 1262 } 1263 1264 /* 1265 * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the 1266 * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on 1267 * locking. This code remains in ip_input.c as ip_mroute.c is optionally 1268 * compiled. 1269 */ 1270 static VNET_DEFINE(int, ip_rsvp_on); 1271 VNET_DEFINE(struct socket *, ip_rsvpd); 1272 1273 #define V_ip_rsvp_on VNET(ip_rsvp_on) 1274 1275 int 1276 ip_rsvp_init(struct socket *so) 1277 { 1278 1279 if (so->so_type != SOCK_RAW || 1280 so->so_proto->pr_protocol != IPPROTO_RSVP) 1281 return EOPNOTSUPP; 1282 1283 if (V_ip_rsvpd != NULL) 1284 return EADDRINUSE; 1285 1286 V_ip_rsvpd = so; 1287 /* 1288 * This may seem silly, but we need to be sure we don't over-increment 1289 * the RSVP counter, in case something slips up. 1290 */ 1291 if (!V_ip_rsvp_on) { 1292 V_ip_rsvp_on = 1; 1293 V_rsvp_on++; 1294 } 1295 1296 return 0; 1297 } 1298 1299 int 1300 ip_rsvp_done(void) 1301 { 1302 1303 V_ip_rsvpd = NULL; 1304 /* 1305 * This may seem silly, but we need to be sure we don't over-decrement 1306 * the RSVP counter, in case something slips up. 1307 */ 1308 if (V_ip_rsvp_on) { 1309 V_ip_rsvp_on = 0; 1310 V_rsvp_on--; 1311 } 1312 return 0; 1313 } 1314 1315 int 1316 rsvp_input(struct mbuf **mp, int *offp, int proto) 1317 { 1318 struct mbuf *m; 1319 1320 m = *mp; 1321 *mp = NULL; 1322 1323 if (rsvp_input_p) { /* call the real one if loaded */ 1324 *mp = m; 1325 rsvp_input_p(mp, offp, proto); 1326 return (IPPROTO_DONE); 1327 } 1328 1329 /* Can still get packets with rsvp_on = 0 if there is a local member 1330 * of the group to which the RSVP packet is addressed. But in this 1331 * case we want to throw the packet away. 1332 */ 1333 1334 if (!V_rsvp_on) { 1335 m_freem(m); 1336 return (IPPROTO_DONE); 1337 } 1338 1339 if (V_ip_rsvpd != NULL) { 1340 *mp = m; 1341 rip_input(mp, offp, proto); 1342 return (IPPROTO_DONE); 1343 } 1344 /* Drop the packet */ 1345 m_freem(m); 1346 return (IPPROTO_DONE); 1347 } 1348