1 /*- 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_bootp.h" 36 #include "opt_ipstealth.h" 37 #include "opt_ipsec.h" 38 #include "opt_route.h" 39 #include "opt_rss.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/hhook.h> 44 #include <sys/mbuf.h> 45 #include <sys/malloc.h> 46 #include <sys/domain.h> 47 #include <sys/protosw.h> 48 #include <sys/socket.h> 49 #include <sys/time.h> 50 #include <sys/kernel.h> 51 #include <sys/lock.h> 52 #include <sys/rmlock.h> 53 #include <sys/rwlock.h> 54 #include <sys/sdt.h> 55 #include <sys/syslog.h> 56 #include <sys/sysctl.h> 57 58 #include <net/pfil.h> 59 #include <net/if.h> 60 #include <net/if_types.h> 61 #include <net/if_var.h> 62 #include <net/if_dl.h> 63 #include <net/route.h> 64 #include <net/netisr.h> 65 #include <net/rss_config.h> 66 #include <net/vnet.h> 67 68 #include <netinet/in.h> 69 #include <netinet/in_kdtrace.h> 70 #include <netinet/in_systm.h> 71 #include <netinet/in_var.h> 72 #include <netinet/ip.h> 73 #include <netinet/in_pcb.h> 74 #include <netinet/ip_var.h> 75 #include <netinet/ip_fw.h> 76 #include <netinet/ip_icmp.h> 77 #include <netinet/ip_options.h> 78 #include <machine/in_cksum.h> 79 #include <netinet/ip_carp.h> 80 #ifdef IPSEC 81 #include <netinet/ip_ipsec.h> 82 #include <netipsec/ipsec.h> 83 #include <netipsec/key.h> 84 #endif /* IPSEC */ 85 #include <netinet/in_rss.h> 86 87 #include <sys/socketvar.h> 88 89 #include <security/mac/mac_framework.h> 90 91 #ifdef CTASSERT 92 CTASSERT(sizeof(struct ip) == 20); 93 #endif 94 95 /* IP reassembly functions are defined in ip_reass.c. */ 96 extern void ipreass_init(void); 97 extern void ipreass_drain(void); 98 extern void ipreass_slowtimo(void); 99 #ifdef VIMAGE 100 extern void ipreass_destroy(void); 101 #endif 102 103 struct rmlock in_ifaddr_lock; 104 RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock"); 105 106 VNET_DEFINE(int, rsvp_on); 107 108 VNET_DEFINE(int, ipforwarding); 109 SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW, 110 &VNET_NAME(ipforwarding), 0, 111 "Enable IP forwarding between interfaces"); 112 113 static VNET_DEFINE(int, ipsendredirects) = 1; /* XXX */ 114 #define V_ipsendredirects VNET(ipsendredirects) 115 SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW, 116 &VNET_NAME(ipsendredirects), 0, 117 "Enable sending IP redirects"); 118 119 /* 120 * XXX - Setting ip_checkinterface mostly implements the receive side of 121 * the Strong ES model described in RFC 1122, but since the routing table 122 * and transmit implementation do not implement the Strong ES model, 123 * setting this to 1 results in an odd hybrid. 124 * 125 * XXX - ip_checkinterface currently must be disabled if you use ipnat 126 * to translate the destination address to another local interface. 127 * 128 * XXX - ip_checkinterface must be disabled if you add IP aliases 129 * to the loopback interface instead of the interface where the 130 * packets for those addresses are received. 131 */ 132 static VNET_DEFINE(int, ip_checkinterface); 133 #define V_ip_checkinterface VNET(ip_checkinterface) 134 SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW, 135 &VNET_NAME(ip_checkinterface), 0, 136 "Verify packet arrives on correct interface"); 137 138 VNET_DEFINE(struct pfil_head, inet_pfil_hook); /* Packet filter hooks */ 139 140 static struct netisr_handler ip_nh = { 141 .nh_name = "ip", 142 .nh_handler = ip_input, 143 .nh_proto = NETISR_IP, 144 #ifdef RSS 145 .nh_m2cpuid = rss_soft_m2cpuid_v4, 146 .nh_policy = NETISR_POLICY_CPU, 147 .nh_dispatch = NETISR_DISPATCH_HYBRID, 148 #else 149 .nh_policy = NETISR_POLICY_FLOW, 150 #endif 151 }; 152 153 #ifdef RSS 154 /* 155 * Directly dispatched frames are currently assumed 156 * to have a flowid already calculated. 157 * 158 * It should likely have something that assert it 159 * actually has valid flow details. 160 */ 161 static struct netisr_handler ip_direct_nh = { 162 .nh_name = "ip_direct", 163 .nh_handler = ip_direct_input, 164 .nh_proto = NETISR_IP_DIRECT, 165 .nh_m2cpuid = rss_soft_m2cpuid_v4, 166 .nh_policy = NETISR_POLICY_CPU, 167 .nh_dispatch = NETISR_DISPATCH_HYBRID, 168 }; 169 #endif 170 171 extern struct domain inetdomain; 172 extern struct protosw inetsw[]; 173 u_char ip_protox[IPPROTO_MAX]; 174 VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */ 175 VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ 176 VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ 177 178 #ifdef IPCTL_DEFMTU 179 SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, 180 &ip_mtu, 0, "Default MTU"); 181 #endif 182 183 #ifdef IPSTEALTH 184 VNET_DEFINE(int, ipstealth); 185 SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW, 186 &VNET_NAME(ipstealth), 0, 187 "IP stealth mode, no TTL decrementation on forwarding"); 188 #endif 189 190 /* 191 * IP statistics are stored in the "array" of counter(9)s. 192 */ 193 VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat); 194 VNET_PCPUSTAT_SYSINIT(ipstat); 195 SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat, 196 "IP statistics (struct ipstat, netinet/ip_var.h)"); 197 198 #ifdef VIMAGE 199 VNET_PCPUSTAT_SYSUNINIT(ipstat); 200 #endif /* VIMAGE */ 201 202 /* 203 * Kernel module interface for updating ipstat. The argument is an index 204 * into ipstat treated as an array. 205 */ 206 void 207 kmod_ipstat_inc(int statnum) 208 { 209 210 counter_u64_add(VNET(ipstat)[statnum], 1); 211 } 212 213 void 214 kmod_ipstat_dec(int statnum) 215 { 216 217 counter_u64_add(VNET(ipstat)[statnum], -1); 218 } 219 220 static int 221 sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) 222 { 223 int error, qlimit; 224 225 netisr_getqlimit(&ip_nh, &qlimit); 226 error = sysctl_handle_int(oidp, &qlimit, 0, req); 227 if (error || !req->newptr) 228 return (error); 229 if (qlimit < 1) 230 return (EINVAL); 231 return (netisr_setqlimit(&ip_nh, qlimit)); 232 } 233 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, 234 CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I", 235 "Maximum size of the IP input queue"); 236 237 static int 238 sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS) 239 { 240 u_int64_t qdrops_long; 241 int error, qdrops; 242 243 netisr_getqdrops(&ip_nh, &qdrops_long); 244 qdrops = qdrops_long; 245 error = sysctl_handle_int(oidp, &qdrops, 0, req); 246 if (error || !req->newptr) 247 return (error); 248 if (qdrops != 0) 249 return (EINVAL); 250 netisr_clearqdrops(&ip_nh); 251 return (0); 252 } 253 254 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, 255 CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I", 256 "Number of packets dropped from the IP input queue"); 257 258 #ifdef RSS 259 static int 260 sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS) 261 { 262 int error, qlimit; 263 264 netisr_getqlimit(&ip_direct_nh, &qlimit); 265 error = sysctl_handle_int(oidp, &qlimit, 0, req); 266 if (error || !req->newptr) 267 return (error); 268 if (qlimit < 1) 269 return (EINVAL); 270 return (netisr_setqlimit(&ip_direct_nh, qlimit)); 271 } 272 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_direct_queue_maxlen, 273 CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, "I", 274 "Maximum size of the IP direct input queue"); 275 276 static int 277 sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS) 278 { 279 u_int64_t qdrops_long; 280 int error, qdrops; 281 282 netisr_getqdrops(&ip_direct_nh, &qdrops_long); 283 qdrops = qdrops_long; 284 error = sysctl_handle_int(oidp, &qdrops, 0, req); 285 if (error || !req->newptr) 286 return (error); 287 if (qdrops != 0) 288 return (EINVAL); 289 netisr_clearqdrops(&ip_direct_nh); 290 return (0); 291 } 292 293 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_direct_queue_drops, 294 CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I", 295 "Number of packets dropped from the IP direct input queue"); 296 #endif /* RSS */ 297 298 /* 299 * IP initialization: fill in IP protocol switch table. 300 * All protocols not implemented in kernel go to raw IP protocol handler. 301 */ 302 void 303 ip_init(void) 304 { 305 struct protosw *pr; 306 int i; 307 308 TAILQ_INIT(&V_in_ifaddrhead); 309 V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask); 310 311 /* Initialize IP reassembly queue. */ 312 ipreass_init(); 313 314 /* Initialize packet filter hooks. */ 315 V_inet_pfil_hook.ph_type = PFIL_TYPE_AF; 316 V_inet_pfil_hook.ph_af = AF_INET; 317 if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0) 318 printf("%s: WARNING: unable to register pfil hook, " 319 "error %d\n", __func__, i); 320 321 if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET, 322 &V_ipsec_hhh_in[HHOOK_IPSEC_INET], 323 HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) 324 printf("%s: WARNING: unable to register input helper hook\n", 325 __func__); 326 if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET, 327 &V_ipsec_hhh_out[HHOOK_IPSEC_INET], 328 HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) 329 printf("%s: WARNING: unable to register output helper hook\n", 330 __func__); 331 332 /* Skip initialization of globals for non-default instances. */ 333 #ifdef VIMAGE 334 if (!IS_DEFAULT_VNET(curvnet)) { 335 netisr_register_vnet(&ip_nh); 336 #ifdef RSS 337 netisr_register_vnet(&ip_direct_nh); 338 #endif 339 return; 340 } 341 #endif 342 343 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 344 if (pr == NULL) 345 panic("ip_init: PF_INET not found"); 346 347 /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ 348 for (i = 0; i < IPPROTO_MAX; i++) 349 ip_protox[i] = pr - inetsw; 350 /* 351 * Cycle through IP protocols and put them into the appropriate place 352 * in ip_protox[]. 353 */ 354 for (pr = inetdomain.dom_protosw; 355 pr < inetdomain.dom_protoswNPROTOSW; pr++) 356 if (pr->pr_domain->dom_family == PF_INET && 357 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 358 /* Be careful to only index valid IP protocols. */ 359 if (pr->pr_protocol < IPPROTO_MAX) 360 ip_protox[pr->pr_protocol] = pr - inetsw; 361 } 362 363 netisr_register(&ip_nh); 364 #ifdef RSS 365 netisr_register(&ip_direct_nh); 366 #endif 367 } 368 369 #ifdef VIMAGE 370 static void 371 ip_destroy(void *unused __unused) 372 { 373 int error; 374 375 #ifdef RSS 376 netisr_unregister_vnet(&ip_direct_nh); 377 #endif 378 netisr_unregister_vnet(&ip_nh); 379 380 if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0) 381 printf("%s: WARNING: unable to unregister pfil hook, " 382 "error %d\n", __func__, error); 383 384 error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]); 385 if (error != 0) { 386 printf("%s: WARNING: unable to deregister input helper hook " 387 "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: " 388 "error %d returned\n", __func__, error); 389 } 390 error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]); 391 if (error != 0) { 392 printf("%s: WARNING: unable to deregister output helper hook " 393 "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: " 394 "error %d returned\n", __func__, error); 395 } 396 /* Cleanup in_ifaddr hash table; should be empty. */ 397 hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask); 398 399 /* Destroy IP reassembly queue. */ 400 ipreass_destroy(); 401 } 402 403 VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL); 404 #endif 405 406 #ifdef RSS 407 /* 408 * IP direct input routine. 409 * 410 * This is called when reinjecting completed fragments where 411 * all of the previous checking and book-keeping has been done. 412 */ 413 void 414 ip_direct_input(struct mbuf *m) 415 { 416 struct ip *ip; 417 int hlen; 418 419 ip = mtod(m, struct ip *); 420 hlen = ip->ip_hl << 2; 421 422 IPSTAT_INC(ips_delivered); 423 (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 424 return; 425 } 426 #endif 427 428 /* 429 * Ip input routine. Checksum and byte swap header. If fragmented 430 * try to reassemble. Process options. Pass to next level. 431 */ 432 void 433 ip_input(struct mbuf *m) 434 { 435 struct ip *ip = NULL; 436 struct in_ifaddr *ia = NULL; 437 struct ifaddr *ifa; 438 struct ifnet *ifp; 439 int checkif, hlen = 0; 440 uint16_t sum, ip_len; 441 int dchg = 0; /* dest changed after fw */ 442 struct in_addr odst; /* original dst address */ 443 444 M_ASSERTPKTHDR(m); 445 446 if (m->m_flags & M_FASTFWD_OURS) { 447 m->m_flags &= ~M_FASTFWD_OURS; 448 /* Set up some basics that will be used later. */ 449 ip = mtod(m, struct ip *); 450 hlen = ip->ip_hl << 2; 451 ip_len = ntohs(ip->ip_len); 452 goto ours; 453 } 454 455 IPSTAT_INC(ips_total); 456 457 if (m->m_pkthdr.len < sizeof(struct ip)) 458 goto tooshort; 459 460 if (m->m_len < sizeof (struct ip) && 461 (m = m_pullup(m, sizeof (struct ip))) == NULL) { 462 IPSTAT_INC(ips_toosmall); 463 return; 464 } 465 ip = mtod(m, struct ip *); 466 467 if (ip->ip_v != IPVERSION) { 468 IPSTAT_INC(ips_badvers); 469 goto bad; 470 } 471 472 hlen = ip->ip_hl << 2; 473 if (hlen < sizeof(struct ip)) { /* minimum header length */ 474 IPSTAT_INC(ips_badhlen); 475 goto bad; 476 } 477 if (hlen > m->m_len) { 478 if ((m = m_pullup(m, hlen)) == NULL) { 479 IPSTAT_INC(ips_badhlen); 480 return; 481 } 482 ip = mtod(m, struct ip *); 483 } 484 485 IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL); 486 487 /* 127/8 must not appear on wire - RFC1122 */ 488 ifp = m->m_pkthdr.rcvif; 489 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 490 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 491 if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 492 IPSTAT_INC(ips_badaddr); 493 goto bad; 494 } 495 } 496 497 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { 498 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 499 } else { 500 if (hlen == sizeof(struct ip)) { 501 sum = in_cksum_hdr(ip); 502 } else { 503 sum = in_cksum(m, hlen); 504 } 505 } 506 if (sum) { 507 IPSTAT_INC(ips_badsum); 508 goto bad; 509 } 510 511 #ifdef ALTQ 512 if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) 513 /* packet is dropped by traffic conditioner */ 514 return; 515 #endif 516 517 ip_len = ntohs(ip->ip_len); 518 if (ip_len < hlen) { 519 IPSTAT_INC(ips_badlen); 520 goto bad; 521 } 522 523 /* 524 * Check that the amount of data in the buffers 525 * is as at least much as the IP header would have us expect. 526 * Trim mbufs if longer than we expect. 527 * Drop packet if shorter than we expect. 528 */ 529 if (m->m_pkthdr.len < ip_len) { 530 tooshort: 531 IPSTAT_INC(ips_tooshort); 532 goto bad; 533 } 534 if (m->m_pkthdr.len > ip_len) { 535 if (m->m_len == m->m_pkthdr.len) { 536 m->m_len = ip_len; 537 m->m_pkthdr.len = ip_len; 538 } else 539 m_adj(m, ip_len - m->m_pkthdr.len); 540 } 541 542 /* Try to forward the packet, but if we fail continue */ 543 #ifdef IPSEC 544 /* For now we do not handle IPSEC in tryforward. */ 545 if (!key_havesp(IPSEC_DIR_INBOUND) && !key_havesp(IPSEC_DIR_OUTBOUND) && 546 (V_ipforwarding == 1)) 547 if (ip_tryforward(m) == NULL) 548 return; 549 /* 550 * Bypass packet filtering for packets previously handled by IPsec. 551 */ 552 if (ip_ipsec_filtertunnel(m)) 553 goto passin; 554 #else 555 if (V_ipforwarding == 1) 556 if (ip_tryforward(m) == NULL) 557 return; 558 #endif /* IPSEC */ 559 560 /* 561 * Run through list of hooks for input packets. 562 * 563 * NB: Beware of the destination address changing (e.g. 564 * by NAT rewriting). When this happens, tell 565 * ip_forward to do the right thing. 566 */ 567 568 /* Jump over all PFIL processing if hooks are not active. */ 569 if (!PFIL_HOOKED(&V_inet_pfil_hook)) 570 goto passin; 571 572 odst = ip->ip_dst; 573 if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0) 574 return; 575 if (m == NULL) /* consumed by filter */ 576 return; 577 578 ip = mtod(m, struct ip *); 579 dchg = (odst.s_addr != ip->ip_dst.s_addr); 580 ifp = m->m_pkthdr.rcvif; 581 582 if (m->m_flags & M_FASTFWD_OURS) { 583 m->m_flags &= ~M_FASTFWD_OURS; 584 goto ours; 585 } 586 if (m->m_flags & M_IP_NEXTHOP) { 587 if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) { 588 /* 589 * Directly ship the packet on. This allows 590 * forwarding packets originally destined to us 591 * to some other directly connected host. 592 */ 593 ip_forward(m, 1); 594 return; 595 } 596 } 597 passin: 598 599 /* 600 * Process options and, if not destined for us, 601 * ship it on. ip_dooptions returns 1 when an 602 * error was detected (causing an icmp message 603 * to be sent and the original packet to be freed). 604 */ 605 if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) 606 return; 607 608 /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no 609 * matter if it is destined to another node, or whether it is 610 * a multicast one, RSVP wants it! and prevents it from being forwarded 611 * anywhere else. Also checks if the rsvp daemon is running before 612 * grabbing the packet. 613 */ 614 if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) 615 goto ours; 616 617 /* 618 * Check our list of addresses, to see if the packet is for us. 619 * If we don't have any addresses, assume any unicast packet 620 * we receive might be for us (and let the upper layers deal 621 * with it). 622 */ 623 if (TAILQ_EMPTY(&V_in_ifaddrhead) && 624 (m->m_flags & (M_MCAST|M_BCAST)) == 0) 625 goto ours; 626 627 /* 628 * Enable a consistency check between the destination address 629 * and the arrival interface for a unicast packet (the RFC 1122 630 * strong ES model) if IP forwarding is disabled and the packet 631 * is not locally generated and the packet is not subject to 632 * 'ipfw fwd'. 633 * 634 * XXX - Checking also should be disabled if the destination 635 * address is ipnat'ed to a different interface. 636 * 637 * XXX - Checking is incompatible with IP aliases added 638 * to the loopback interface instead of the interface where 639 * the packets are received. 640 * 641 * XXX - This is the case for carp vhost IPs as well so we 642 * insert a workaround. If the packet got here, we already 643 * checked with carp_iamatch() and carp_forus(). 644 */ 645 checkif = V_ip_checkinterface && (V_ipforwarding == 0) && 646 ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) && 647 ifp->if_carp == NULL && (dchg == 0); 648 649 /* 650 * Check for exact addresses in the hash bucket. 651 */ 652 /* IN_IFADDR_RLOCK(); */ 653 LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 654 /* 655 * If the address matches, verify that the packet 656 * arrived via the correct interface if checking is 657 * enabled. 658 */ 659 if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 660 (!checkif || ia->ia_ifp == ifp)) { 661 counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 662 counter_u64_add(ia->ia_ifa.ifa_ibytes, 663 m->m_pkthdr.len); 664 /* IN_IFADDR_RUNLOCK(); */ 665 goto ours; 666 } 667 } 668 /* IN_IFADDR_RUNLOCK(); */ 669 670 /* 671 * Check for broadcast addresses. 672 * 673 * Only accept broadcast packets that arrive via the matching 674 * interface. Reception of forwarded directed broadcasts would 675 * be handled via ip_forward() and ether_output() with the loopback 676 * into the stack for SIMPLEX interfaces handled by ether_output(). 677 */ 678 if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { 679 IF_ADDR_RLOCK(ifp); 680 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 681 if (ifa->ifa_addr->sa_family != AF_INET) 682 continue; 683 ia = ifatoia(ifa); 684 if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 685 ip->ip_dst.s_addr) { 686 counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 687 counter_u64_add(ia->ia_ifa.ifa_ibytes, 688 m->m_pkthdr.len); 689 IF_ADDR_RUNLOCK(ifp); 690 goto ours; 691 } 692 #ifdef BOOTP_COMPAT 693 if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { 694 counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 695 counter_u64_add(ia->ia_ifa.ifa_ibytes, 696 m->m_pkthdr.len); 697 IF_ADDR_RUNLOCK(ifp); 698 goto ours; 699 } 700 #endif 701 } 702 IF_ADDR_RUNLOCK(ifp); 703 ia = NULL; 704 } 705 /* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */ 706 if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) { 707 IPSTAT_INC(ips_cantforward); 708 m_freem(m); 709 return; 710 } 711 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 712 if (V_ip_mrouter) { 713 /* 714 * If we are acting as a multicast router, all 715 * incoming multicast packets are passed to the 716 * kernel-level multicast forwarding function. 717 * The packet is returned (relatively) intact; if 718 * ip_mforward() returns a non-zero value, the packet 719 * must be discarded, else it may be accepted below. 720 */ 721 if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) { 722 IPSTAT_INC(ips_cantforward); 723 m_freem(m); 724 return; 725 } 726 727 /* 728 * The process-level routing daemon needs to receive 729 * all multicast IGMP packets, whether or not this 730 * host belongs to their destination groups. 731 */ 732 if (ip->ip_p == IPPROTO_IGMP) 733 goto ours; 734 IPSTAT_INC(ips_forward); 735 } 736 /* 737 * Assume the packet is for us, to avoid prematurely taking 738 * a lock on the in_multi hash. Protocols must perform 739 * their own filtering and update statistics accordingly. 740 */ 741 goto ours; 742 } 743 if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) 744 goto ours; 745 if (ip->ip_dst.s_addr == INADDR_ANY) 746 goto ours; 747 748 /* 749 * Not for us; forward if possible and desirable. 750 */ 751 if (V_ipforwarding == 0) { 752 IPSTAT_INC(ips_cantforward); 753 m_freem(m); 754 } else { 755 ip_forward(m, dchg); 756 } 757 return; 758 759 ours: 760 #ifdef IPSTEALTH 761 /* 762 * IPSTEALTH: Process non-routing options only 763 * if the packet is destined for us. 764 */ 765 if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) 766 return; 767 #endif /* IPSTEALTH */ 768 769 /* 770 * Attempt reassembly; if it succeeds, proceed. 771 * ip_reass() will return a different mbuf. 772 */ 773 if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { 774 /* XXXGL: shouldn't we save & set m_flags? */ 775 m = ip_reass(m); 776 if (m == NULL) 777 return; 778 ip = mtod(m, struct ip *); 779 /* Get the header length of the reassembled packet */ 780 hlen = ip->ip_hl << 2; 781 } 782 783 #ifdef IPSEC 784 /* 785 * enforce IPsec policy checking if we are seeing last header. 786 * note that we do not visit this with protocols with pcb layer 787 * code - like udp/tcp/raw ip. 788 */ 789 if (ip_ipsec_input(m, ip->ip_p) != 0) 790 goto bad; 791 #endif /* IPSEC */ 792 793 /* 794 * Switch out to protocol's input routine. 795 */ 796 IPSTAT_INC(ips_delivered); 797 798 (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 799 return; 800 bad: 801 m_freem(m); 802 } 803 804 /* 805 * IP timer processing; 806 * if a timer expires on a reassembly 807 * queue, discard it. 808 */ 809 void 810 ip_slowtimo(void) 811 { 812 VNET_ITERATOR_DECL(vnet_iter); 813 814 VNET_LIST_RLOCK_NOSLEEP(); 815 VNET_FOREACH(vnet_iter) { 816 CURVNET_SET(vnet_iter); 817 ipreass_slowtimo(); 818 CURVNET_RESTORE(); 819 } 820 VNET_LIST_RUNLOCK_NOSLEEP(); 821 } 822 823 void 824 ip_drain(void) 825 { 826 VNET_ITERATOR_DECL(vnet_iter); 827 828 VNET_LIST_RLOCK_NOSLEEP(); 829 VNET_FOREACH(vnet_iter) { 830 CURVNET_SET(vnet_iter); 831 ipreass_drain(); 832 CURVNET_RESTORE(); 833 } 834 VNET_LIST_RUNLOCK_NOSLEEP(); 835 } 836 837 /* 838 * The protocol to be inserted into ip_protox[] must be already registered 839 * in inetsw[], either statically or through pf_proto_register(). 840 */ 841 int 842 ipproto_register(short ipproto) 843 { 844 struct protosw *pr; 845 846 /* Sanity checks. */ 847 if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 848 return (EPROTONOSUPPORT); 849 850 /* 851 * The protocol slot must not be occupied by another protocol 852 * already. An index pointing to IPPROTO_RAW is unused. 853 */ 854 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 855 if (pr == NULL) 856 return (EPFNOSUPPORT); 857 if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ 858 return (EEXIST); 859 860 /* Find the protocol position in inetsw[] and set the index. */ 861 for (pr = inetdomain.dom_protosw; 862 pr < inetdomain.dom_protoswNPROTOSW; pr++) { 863 if (pr->pr_domain->dom_family == PF_INET && 864 pr->pr_protocol && pr->pr_protocol == ipproto) { 865 ip_protox[pr->pr_protocol] = pr - inetsw; 866 return (0); 867 } 868 } 869 return (EPROTONOSUPPORT); 870 } 871 872 int 873 ipproto_unregister(short ipproto) 874 { 875 struct protosw *pr; 876 877 /* Sanity checks. */ 878 if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 879 return (EPROTONOSUPPORT); 880 881 /* Check if the protocol was indeed registered. */ 882 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 883 if (pr == NULL) 884 return (EPFNOSUPPORT); 885 if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ 886 return (ENOENT); 887 888 /* Reset the protocol slot to IPPROTO_RAW. */ 889 ip_protox[ipproto] = pr - inetsw; 890 return (0); 891 } 892 893 u_char inetctlerrmap[PRC_NCMDS] = { 894 0, 0, 0, 0, 895 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 896 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 897 EMSGSIZE, EHOSTUNREACH, 0, 0, 898 0, 0, EHOSTUNREACH, 0, 899 ENOPROTOOPT, ECONNREFUSED 900 }; 901 902 /* 903 * Forward a packet. If some error occurs return the sender 904 * an icmp packet. Note we can't always generate a meaningful 905 * icmp message because icmp doesn't have a large enough repertoire 906 * of codes and types. 907 * 908 * If not forwarding, just drop the packet. This could be confusing 909 * if ipforwarding was zero but some routing protocol was advancing 910 * us as a gateway to somewhere. However, we must let the routing 911 * protocol deal with that. 912 * 913 * The srcrt parameter indicates whether the packet is being forwarded 914 * via a source route. 915 */ 916 void 917 ip_forward(struct mbuf *m, int srcrt) 918 { 919 struct ip *ip = mtod(m, struct ip *); 920 struct in_ifaddr *ia; 921 struct mbuf *mcopy; 922 struct sockaddr_in *sin; 923 struct in_addr dest; 924 struct route ro; 925 int error, type = 0, code = 0, mtu = 0; 926 927 if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 928 IPSTAT_INC(ips_cantforward); 929 m_freem(m); 930 return; 931 } 932 #ifdef IPSEC 933 if (ip_ipsec_fwd(m) != 0) { 934 IPSTAT_INC(ips_cantforward); 935 m_freem(m); 936 return; 937 } 938 #endif /* IPSEC */ 939 #ifdef IPSTEALTH 940 if (!V_ipstealth) { 941 #endif 942 if (ip->ip_ttl <= IPTTLDEC) { 943 icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 944 0, 0); 945 return; 946 } 947 #ifdef IPSTEALTH 948 } 949 #endif 950 951 bzero(&ro, sizeof(ro)); 952 sin = (struct sockaddr_in *)&ro.ro_dst; 953 sin->sin_family = AF_INET; 954 sin->sin_len = sizeof(*sin); 955 sin->sin_addr = ip->ip_dst; 956 #ifdef RADIX_MPATH 957 rtalloc_mpath_fib(&ro, 958 ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), 959 M_GETFIB(m)); 960 #else 961 in_rtalloc_ign(&ro, 0, M_GETFIB(m)); 962 #endif 963 if (ro.ro_rt != NULL) { 964 ia = ifatoia(ro.ro_rt->rt_ifa); 965 ifa_ref(&ia->ia_ifa); 966 } else 967 ia = NULL; 968 #ifndef IPSEC 969 /* 970 * 'ia' may be NULL if there is no route for this destination. 971 * In case of IPsec, Don't discard it just yet, but pass it to 972 * ip_output in case of outgoing IPsec policy. 973 */ 974 if (!srcrt && ia == NULL) { 975 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); 976 RO_RTFREE(&ro); 977 return; 978 } 979 #endif 980 981 /* 982 * Save the IP header and at most 8 bytes of the payload, 983 * in case we need to generate an ICMP message to the src. 984 * 985 * XXX this can be optimized a lot by saving the data in a local 986 * buffer on the stack (72 bytes at most), and only allocating the 987 * mbuf if really necessary. The vast majority of the packets 988 * are forwarded without having to send an ICMP back (either 989 * because unnecessary, or because rate limited), so we are 990 * really we are wasting a lot of work here. 991 * 992 * We don't use m_copy() because it might return a reference 993 * to a shared cluster. Both this function and ip_output() 994 * assume exclusive access to the IP header in `m', so any 995 * data in a cluster may change before we reach icmp_error(). 996 */ 997 mcopy = m_gethdr(M_NOWAIT, m->m_type); 998 if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) { 999 /* 1000 * It's probably ok if the pkthdr dup fails (because 1001 * the deep copy of the tag chain failed), but for now 1002 * be conservative and just discard the copy since 1003 * code below may some day want the tags. 1004 */ 1005 m_free(mcopy); 1006 mcopy = NULL; 1007 } 1008 if (mcopy != NULL) { 1009 mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy)); 1010 mcopy->m_pkthdr.len = mcopy->m_len; 1011 m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); 1012 } 1013 1014 #ifdef IPSTEALTH 1015 if (!V_ipstealth) { 1016 #endif 1017 ip->ip_ttl -= IPTTLDEC; 1018 #ifdef IPSTEALTH 1019 } 1020 #endif 1021 1022 /* 1023 * If forwarding packet using same interface that it came in on, 1024 * perhaps should send a redirect to sender to shortcut a hop. 1025 * Only send redirect if source is sending directly to us, 1026 * and if packet was not source routed (or has any options). 1027 * Also, don't send redirect if forwarding using a default route 1028 * or a route modified by a redirect. 1029 */ 1030 dest.s_addr = 0; 1031 if (!srcrt && V_ipsendredirects && 1032 ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) { 1033 struct rtentry *rt; 1034 1035 rt = ro.ro_rt; 1036 1037 if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && 1038 satosin(rt_key(rt))->sin_addr.s_addr != 0) { 1039 #define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) 1040 u_long src = ntohl(ip->ip_src.s_addr); 1041 1042 if (RTA(rt) && 1043 (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { 1044 if (rt->rt_flags & RTF_GATEWAY) 1045 dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr; 1046 else 1047 dest.s_addr = ip->ip_dst.s_addr; 1048 /* Router requirements says to only send host redirects */ 1049 type = ICMP_REDIRECT; 1050 code = ICMP_REDIRECT_HOST; 1051 } 1052 } 1053 } 1054 1055 error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL); 1056 1057 if (error == EMSGSIZE && ro.ro_rt) 1058 mtu = ro.ro_rt->rt_mtu; 1059 RO_RTFREE(&ro); 1060 1061 if (error) 1062 IPSTAT_INC(ips_cantforward); 1063 else { 1064 IPSTAT_INC(ips_forward); 1065 if (type) 1066 IPSTAT_INC(ips_redirectsent); 1067 else { 1068 if (mcopy) 1069 m_freem(mcopy); 1070 if (ia != NULL) 1071 ifa_free(&ia->ia_ifa); 1072 return; 1073 } 1074 } 1075 if (mcopy == NULL) { 1076 if (ia != NULL) 1077 ifa_free(&ia->ia_ifa); 1078 return; 1079 } 1080 1081 switch (error) { 1082 1083 case 0: /* forwarded, but need redirect */ 1084 /* type, code set above */ 1085 break; 1086 1087 case ENETUNREACH: 1088 case EHOSTUNREACH: 1089 case ENETDOWN: 1090 case EHOSTDOWN: 1091 default: 1092 type = ICMP_UNREACH; 1093 code = ICMP_UNREACH_HOST; 1094 break; 1095 1096 case EMSGSIZE: 1097 type = ICMP_UNREACH; 1098 code = ICMP_UNREACH_NEEDFRAG; 1099 1100 #ifdef IPSEC 1101 /* 1102 * If IPsec is configured for this path, 1103 * override any possibly mtu value set by ip_output. 1104 */ 1105 mtu = ip_ipsec_mtu(mcopy, mtu); 1106 #endif /* IPSEC */ 1107 /* 1108 * If the MTU was set before make sure we are below the 1109 * interface MTU. 1110 * If the MTU wasn't set before use the interface mtu or 1111 * fall back to the next smaller mtu step compared to the 1112 * current packet size. 1113 */ 1114 if (mtu != 0) { 1115 if (ia != NULL) 1116 mtu = min(mtu, ia->ia_ifp->if_mtu); 1117 } else { 1118 if (ia != NULL) 1119 mtu = ia->ia_ifp->if_mtu; 1120 else 1121 mtu = ip_next_mtu(ntohs(ip->ip_len), 0); 1122 } 1123 IPSTAT_INC(ips_cantfrag); 1124 break; 1125 1126 case ENOBUFS: 1127 case EACCES: /* ipfw denied packet */ 1128 m_freem(mcopy); 1129 if (ia != NULL) 1130 ifa_free(&ia->ia_ifa); 1131 return; 1132 } 1133 if (ia != NULL) 1134 ifa_free(&ia->ia_ifa); 1135 icmp_error(mcopy, type, code, dest.s_addr, mtu); 1136 } 1137 1138 void 1139 ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, 1140 struct mbuf *m) 1141 { 1142 1143 if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) { 1144 struct bintime bt; 1145 1146 bintime(&bt); 1147 if (inp->inp_socket->so_options & SO_BINTIME) { 1148 *mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt), 1149 SCM_BINTIME, SOL_SOCKET); 1150 if (*mp) 1151 mp = &(*mp)->m_next; 1152 } 1153 if (inp->inp_socket->so_options & SO_TIMESTAMP) { 1154 struct timeval tv; 1155 1156 bintime2timeval(&bt, &tv); 1157 *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), 1158 SCM_TIMESTAMP, SOL_SOCKET); 1159 if (*mp) 1160 mp = &(*mp)->m_next; 1161 } 1162 } 1163 if (inp->inp_flags & INP_RECVDSTADDR) { 1164 *mp = sbcreatecontrol((caddr_t)&ip->ip_dst, 1165 sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); 1166 if (*mp) 1167 mp = &(*mp)->m_next; 1168 } 1169 if (inp->inp_flags & INP_RECVTTL) { 1170 *mp = sbcreatecontrol((caddr_t)&ip->ip_ttl, 1171 sizeof(u_char), IP_RECVTTL, IPPROTO_IP); 1172 if (*mp) 1173 mp = &(*mp)->m_next; 1174 } 1175 #ifdef notyet 1176 /* XXX 1177 * Moving these out of udp_input() made them even more broken 1178 * than they already were. 1179 */ 1180 /* options were tossed already */ 1181 if (inp->inp_flags & INP_RECVOPTS) { 1182 *mp = sbcreatecontrol((caddr_t)opts_deleted_above, 1183 sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); 1184 if (*mp) 1185 mp = &(*mp)->m_next; 1186 } 1187 /* ip_srcroute doesn't do what we want here, need to fix */ 1188 if (inp->inp_flags & INP_RECVRETOPTS) { 1189 *mp = sbcreatecontrol((caddr_t)ip_srcroute(m), 1190 sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); 1191 if (*mp) 1192 mp = &(*mp)->m_next; 1193 } 1194 #endif 1195 if (inp->inp_flags & INP_RECVIF) { 1196 struct ifnet *ifp; 1197 struct sdlbuf { 1198 struct sockaddr_dl sdl; 1199 u_char pad[32]; 1200 } sdlbuf; 1201 struct sockaddr_dl *sdp; 1202 struct sockaddr_dl *sdl2 = &sdlbuf.sdl; 1203 1204 if ((ifp = m->m_pkthdr.rcvif) && 1205 ifp->if_index && ifp->if_index <= V_if_index) { 1206 sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; 1207 /* 1208 * Change our mind and don't try copy. 1209 */ 1210 if (sdp->sdl_family != AF_LINK || 1211 sdp->sdl_len > sizeof(sdlbuf)) { 1212 goto makedummy; 1213 } 1214 bcopy(sdp, sdl2, sdp->sdl_len); 1215 } else { 1216 makedummy: 1217 sdl2->sdl_len = 1218 offsetof(struct sockaddr_dl, sdl_data[0]); 1219 sdl2->sdl_family = AF_LINK; 1220 sdl2->sdl_index = 0; 1221 sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; 1222 } 1223 *mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len, 1224 IP_RECVIF, IPPROTO_IP); 1225 if (*mp) 1226 mp = &(*mp)->m_next; 1227 } 1228 if (inp->inp_flags & INP_RECVTOS) { 1229 *mp = sbcreatecontrol((caddr_t)&ip->ip_tos, 1230 sizeof(u_char), IP_RECVTOS, IPPROTO_IP); 1231 if (*mp) 1232 mp = &(*mp)->m_next; 1233 } 1234 1235 if (inp->inp_flags2 & INP_RECVFLOWID) { 1236 uint32_t flowid, flow_type; 1237 1238 flowid = m->m_pkthdr.flowid; 1239 flow_type = M_HASHTYPE_GET(m); 1240 1241 /* 1242 * XXX should handle the failure of one or the 1243 * other - don't populate both? 1244 */ 1245 *mp = sbcreatecontrol((caddr_t) &flowid, 1246 sizeof(uint32_t), IP_FLOWID, IPPROTO_IP); 1247 if (*mp) 1248 mp = &(*mp)->m_next; 1249 *mp = sbcreatecontrol((caddr_t) &flow_type, 1250 sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP); 1251 if (*mp) 1252 mp = &(*mp)->m_next; 1253 } 1254 1255 #ifdef RSS 1256 if (inp->inp_flags2 & INP_RECVRSSBUCKETID) { 1257 uint32_t flowid, flow_type; 1258 uint32_t rss_bucketid; 1259 1260 flowid = m->m_pkthdr.flowid; 1261 flow_type = M_HASHTYPE_GET(m); 1262 1263 if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) { 1264 *mp = sbcreatecontrol((caddr_t) &rss_bucketid, 1265 sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP); 1266 if (*mp) 1267 mp = &(*mp)->m_next; 1268 } 1269 } 1270 #endif 1271 } 1272 1273 /* 1274 * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the 1275 * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on 1276 * locking. This code remains in ip_input.c as ip_mroute.c is optionally 1277 * compiled. 1278 */ 1279 static VNET_DEFINE(int, ip_rsvp_on); 1280 VNET_DEFINE(struct socket *, ip_rsvpd); 1281 1282 #define V_ip_rsvp_on VNET(ip_rsvp_on) 1283 1284 int 1285 ip_rsvp_init(struct socket *so) 1286 { 1287 1288 if (so->so_type != SOCK_RAW || 1289 so->so_proto->pr_protocol != IPPROTO_RSVP) 1290 return EOPNOTSUPP; 1291 1292 if (V_ip_rsvpd != NULL) 1293 return EADDRINUSE; 1294 1295 V_ip_rsvpd = so; 1296 /* 1297 * This may seem silly, but we need to be sure we don't over-increment 1298 * the RSVP counter, in case something slips up. 1299 */ 1300 if (!V_ip_rsvp_on) { 1301 V_ip_rsvp_on = 1; 1302 V_rsvp_on++; 1303 } 1304 1305 return 0; 1306 } 1307 1308 int 1309 ip_rsvp_done(void) 1310 { 1311 1312 V_ip_rsvpd = NULL; 1313 /* 1314 * This may seem silly, but we need to be sure we don't over-decrement 1315 * the RSVP counter, in case something slips up. 1316 */ 1317 if (V_ip_rsvp_on) { 1318 V_ip_rsvp_on = 0; 1319 V_rsvp_on--; 1320 } 1321 return 0; 1322 } 1323 1324 int 1325 rsvp_input(struct mbuf **mp, int *offp, int proto) 1326 { 1327 struct mbuf *m; 1328 1329 m = *mp; 1330 *mp = NULL; 1331 1332 if (rsvp_input_p) { /* call the real one if loaded */ 1333 *mp = m; 1334 rsvp_input_p(mp, offp, proto); 1335 return (IPPROTO_DONE); 1336 } 1337 1338 /* Can still get packets with rsvp_on = 0 if there is a local member 1339 * of the group to which the RSVP packet is addressed. But in this 1340 * case we want to throw the packet away. 1341 */ 1342 1343 if (!V_rsvp_on) { 1344 m_freem(m); 1345 return (IPPROTO_DONE); 1346 } 1347 1348 if (V_ip_rsvpd != NULL) { 1349 *mp = m; 1350 rip_input(mp, offp, proto); 1351 return (IPPROTO_DONE); 1352 } 1353 /* Drop the packet */ 1354 m_freem(m); 1355 return (IPPROTO_DONE); 1356 } 1357