1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (C) 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * $KAME: ip6_mroute.c,v 1.58 2001/12/18 02:36:31 itojun Exp $ 32 */ 33 34 /*- 35 * Copyright (c) 1989 Stephen Deering 36 * Copyright (c) 1992, 1993 37 * The Regents of the University of California. All rights reserved. 38 * 39 * This code is derived from software contributed to Berkeley by 40 * Stephen Deering of Stanford University. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * BSDI ip_mroute.c,v 2.10 1996/11/14 00:29:52 jch Exp 66 */ 67 68 /* 69 * IP multicast forwarding procedures 70 * 71 * Written by David Waitzman, BBN Labs, August 1988. 72 * Modified by Steve Deering, Stanford, February 1989. 73 * Modified by Mark J. Steiglitz, Stanford, May, 1991 74 * Modified by Van Jacobson, LBL, January 1993 75 * Modified by Ajit Thyagarajan, PARC, August 1993 76 * Modified by Bill Fenner, PARC, April 1994 77 * 78 * MROUTING Revision: 3.5.1.2 + PIM-SMv2 (pimd) Support 79 */ 80 81 #include "opt_inet6.h" 82 83 #include <sys/param.h> 84 #include <sys/callout.h> 85 #include <sys/errno.h> 86 #include <sys/eventhandler.h> 87 #include <sys/kernel.h> 88 #include <sys/lock.h> 89 #include <sys/malloc.h> 90 #include <sys/mbuf.h> 91 #include <sys/module.h> 92 #include <sys/domain.h> 93 #include <sys/priv.h> 94 #include <sys/proc.h> 95 #include <sys/protosw.h> 96 #include <sys/sdt.h> 97 #include <sys/signalvar.h> 98 #include <sys/socket.h> 99 #include <sys/socketvar.h> 100 #include <sys/sockio.h> 101 #include <sys/sx.h> 102 #include <sys/sysctl.h> 103 #include <sys/syslog.h> 104 #include <sys/systm.h> 105 #include <sys/time.h> 106 107 #include <net/if.h> 108 #include <net/if_var.h> 109 #include <net/if_private.h> 110 #include <net/if_types.h> 111 #include <net/route.h> 112 #include <net/vnet.h> 113 114 #include <netinet/in.h> 115 #include <netinet/in_var.h> 116 #include <netinet/icmp6.h> 117 #include <netinet/ip_encap.h> 118 119 #include <netinet/ip6.h> 120 #include <netinet/in_kdtrace.h> 121 #include <netinet6/ip6_var.h> 122 #include <netinet6/scope6_var.h> 123 #include <netinet6/nd6.h> 124 #include <netinet6/ip6_mroute.h> 125 #include <netinet6/pim6.h> 126 #include <netinet6/pim6_var.h> 127 128 static MALLOC_DEFINE(M_MRTABLE6, "mf6c", "multicast forwarding cache entry"); 129 130 struct mf6ctable; 131 132 static int ip6_mdq(struct mf6ctable *, struct mbuf *, struct ifnet *, 133 struct mf6c *); 134 static void phyint_send(struct ip6_hdr *, struct mif6 *, struct mbuf *); 135 static int register_send(struct mf6ctable *, struct ip6_hdr *, mifi_t, 136 struct mbuf *); 137 static int set_pim6(int *); 138 static int socket_send(struct socket *, struct mbuf *, 139 struct sockaddr_in6 *); 140 141 extern int in6_mcast_loop; 142 extern struct domain inet6domain; 143 144 static const struct encaptab *pim6_encap_cookie; 145 static int pim6_encapcheck(const struct mbuf *, int, int, void *); 146 static int pim6_input(struct mbuf *, int, int, void *); 147 148 static const struct encap_config ipv6_encap_cfg = { 149 .proto = IPPROTO_PIM, 150 .min_length = sizeof(struct ip6_hdr) + PIM_MINLEN, 151 .exact_match = 8, 152 .check = pim6_encapcheck, 153 .input = pim6_input 154 }; 155 156 SYSCTL_DECL(_net_inet6); 157 SYSCTL_DECL(_net_inet6_ip6); 158 static SYSCTL_NODE(_net_inet6, IPPROTO_PIM, pim, 159 CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 160 "PIM"); 161 162 VNET_DEFINE_STATIC(struct mrt6stat, mrt6stat); 163 #define V_mrt6stat VNET(mrt6stat) 164 SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_VNET | CTLFLAG_RW, 165 &VNET_NAME(mrt6stat), mrt6stat, 166 "Multicast Routing Statistics (struct mrt6stat, netinet6/ip6_mroute.h)"); 167 168 #define MRT6STAT_INC(name) V_mrt6stat.name += 1 169 #define NO_RTE_FOUND 0x1 170 #define RTE_FOUND 0x2 171 172 static struct sx mrouter6_mtx; 173 #define MROUTER6_LOCKPTR() (&mrouter6_mtx) 174 #define MROUTER6_LOCK() sx_xlock(MROUTER6_LOCKPTR()) 175 #define MROUTER6_UNLOCK() sx_xunlock(MROUTER6_LOCKPTR()) 176 #define MROUTER6_LOCK_ASSERT() sx_assert(MROUTER6_LOCKPTR(), SA_XLOCKED 177 #define MROUTER6_LOCK_INIT() sx_init(MROUTER6_LOCKPTR(), "mrouter6") 178 #define MROUTER6_LOCK_DESTROY() sx_destroy(MROUTER6_LOCKPTR()) 179 180 static struct mtx mfc6_mtx; 181 #define MFC6_LOCKPTR() (&mfc6_mtx) 182 #define MFC6_LOCK() mtx_lock(MFC6_LOCKPTR()) 183 #define MFC6_UNLOCK() mtx_unlock(MFC6_LOCKPTR()) 184 #define MFC6_LOCK_ASSERT() mtx_assert(MFC6_LOCKPTR(), MA_OWNED) 185 #define MFC6_LOCK_INIT() mtx_init(MFC6_LOCKPTR(), \ 186 "IPv6 multicast forwarding cache", \ 187 NULL, MTX_DEF) 188 #define MFC6_LOCK_DESTROY() mtx_destroy(MFC6_LOCKPTR()) 189 190 struct mf6ctable { 191 struct socket *router; 192 int router_ver; 193 struct mf6c *mfchashtbl[MF6CTBLSIZ]; 194 u_char nexpire[MF6CTBLSIZ]; 195 int nummifs; 196 struct mif6 miftable[MAXMIFS]; 197 198 /* 199 * 'Interfaces' associated with decapsulator (so we can tell packets 200 * that went through it from ones that get reflected by a broken 201 * gateway). Different from IPv4 register_if, these interfaces are 202 * linked into the system ifnet list, because per-interface IPv6 203 * statistics are maintained in ifp->if_afdata. But it does not have 204 * any routes point to them. I.e., packets can't be sent this way. 205 * They only exist as a placeholder for multicast source verification. 206 */ 207 struct ifnet *register_if; 208 mifi_t register_mif; 209 }; 210 211 VNET_DEFINE_STATIC(struct mf6ctable *, mfctables); 212 #define V_mfctables VNET(mfctables) 213 VNET_DEFINE_STATIC(uint32_t, nmfctables); 214 #define V_nmfctables VNET(nmfctables) 215 216 static eventhandler_tag ifdetach_tag, rtnumfibs_change_tag; 217 218 static int 219 sysctl_mfctable(SYSCTL_HANDLER_ARGS) 220 { 221 int fibnum; 222 223 fibnum = curthread->td_proc->p_fibnum; 224 return (SYSCTL_OUT(req, &V_mfctables[fibnum].mfchashtbl, 225 sizeof(struct mf6c *) * MF6CTBLSIZ)); 226 } 227 SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, mf6ctable, 228 CTLTYPE_OPAQUE | CTLFLAG_RD, 229 NULL, 0, sysctl_mfctable, "S,*mf6c[MF6CTBLSIZ]", 230 "IPv6 Multicast Forwarding Table (struct mf6c *[MF6CTBLSIZ], " 231 "netinet6/ip6_mroute.h)"); 232 233 static int 234 sysctl_mif6table(SYSCTL_HANDLER_ARGS) 235 { 236 struct mif6_sctl *out; 237 struct mf6ctable *mfct; 238 int error; 239 240 mfct = &V_mfctables[curthread->td_proc->p_fibnum]; 241 out = malloc(sizeof(struct mif6_sctl) * MAXMIFS, M_TEMP, 242 M_WAITOK | M_ZERO); 243 for (int i = 0; i < MAXMIFS; i++) { 244 struct mif6_sctl *outp = &out[i]; 245 struct mif6 *mifp = &mfct->miftable[i]; 246 247 outp->m6_flags = mifp->m6_flags; 248 outp->m6_rate_limit = mifp->m6_rate_limit; 249 outp->m6_lcl_addr = mifp->m6_lcl_addr; 250 if (mifp->m6_ifp != NULL) 251 outp->m6_ifp = mifp->m6_ifp->if_index; 252 else 253 outp->m6_ifp = 0; 254 outp->m6_pkt_in = mifp->m6_pkt_in; 255 outp->m6_pkt_out = mifp->m6_pkt_out; 256 outp->m6_bytes_in = mifp->m6_bytes_in; 257 outp->m6_bytes_out = mifp->m6_bytes_out; 258 } 259 error = SYSCTL_OUT(req, out, sizeof(struct mif6_sctl) * MAXMIFS); 260 free(out, M_TEMP); 261 return (error); 262 } 263 SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, mif6table, 264 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_NEEDGIANT, 265 NULL, 0, sysctl_mif6table, "S,mif6_sctl[MAXMIFS]", 266 "IPv6 Multicast Interfaces (struct mif6_sctl[MAXMIFS], " 267 "netinet6/ip6_mroute.h)"); 268 269 static struct mtx mif6_mtx; 270 #define MIF6_LOCKPTR() (&mif6_mtx) 271 #define MIF6_LOCK() mtx_lock(MIF6_LOCKPTR()) 272 #define MIF6_UNLOCK() mtx_unlock(MIF6_LOCKPTR()) 273 #define MIF6_LOCK_ASSERT() mtx_assert(MIF6_LOCKPTR(), MA_OWNED) 274 #define MIF6_LOCK_INIT() \ 275 mtx_init(MIF6_LOCKPTR(), "IPv6 multicast interfaces", NULL, MTX_DEF) 276 #define MIF6_LOCK_DESTROY() mtx_destroy(MIF6_LOCKPTR()) 277 278 #ifdef MRT6DEBUG 279 VNET_DEFINE_STATIC(u_int, mrt6debug) = 0; /* debug level */ 280 #define V_mrt6debug VNET(mrt6debug) 281 #define DEBUG_MFC 0x02 282 #define DEBUG_FORWARD 0x04 283 #define DEBUG_EXPIRE 0x08 284 #define DEBUG_XMIT 0x10 285 #define DEBUG_REG 0x20 286 #define DEBUG_PIM 0x40 287 #define DEBUG_ERR 0x80 288 #define DEBUG_ANY 0x7f 289 #define MRT6_DLOG(m, fmt, ...) \ 290 if (V_mrt6debug & (m)) \ 291 log(((m) & DEBUG_ERR) ? LOG_ERR: LOG_DEBUG, \ 292 "%s: " fmt "\n", __func__, ##__VA_ARGS__) 293 #else 294 #define MRT6_DLOG(m, fmt, ...) 295 #endif 296 297 static void expire_upcalls(struct mf6ctable *); 298 static void expire_upcalls_all(void *); 299 #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ 300 #define UPCALL_EXPIRE 6 /* number of timeouts */ 301 302 VNET_DEFINE_STATIC(struct pim6stat, pim6stat); 303 #define V_pim6stat VNET(pim6stat) 304 SYSCTL_STRUCT(_net_inet6_pim, PIM6CTL_STATS, stats, CTLFLAG_VNET | CTLFLAG_RW, 305 &VNET_NAME(pim6stat), pim6stat, 306 "PIM Statistics (struct pim6stat, netinet6/pim6_var.h)"); 307 308 #define PIM6STAT_INC(name) V_pim6stat.name += 1 309 VNET_DEFINE_STATIC(int, pim6); 310 #define V_pim6 VNET(pim6) 311 312 /* 313 * Hash function for a source, group entry 314 */ 315 #define MF6CHASH(a, g) MF6CHASHMOD((a).s6_addr32[0] ^ (a).s6_addr32[1] ^ \ 316 (a).s6_addr32[2] ^ (a).s6_addr32[3] ^ \ 317 (g).s6_addr32[0] ^ (g).s6_addr32[1] ^ \ 318 (g).s6_addr32[2] ^ (g).s6_addr32[3]) 319 320 /* 321 * Macros to compute elapsed time efficiently 322 * Borrowed from Van Jacobson's scheduling code 323 * XXX: replace with timersub() ? 324 */ 325 #define TV_DELTA(a, b, delta) do { \ 326 int xxs; \ 327 \ 328 delta = (a).tv_usec - (b).tv_usec; \ 329 if ((xxs = (a).tv_sec - (b).tv_sec)) { \ 330 switch (xxs) { \ 331 case 2: \ 332 delta += 1000000; \ 333 /* FALLTHROUGH */ \ 334 case 1: \ 335 delta += 1000000; \ 336 break; \ 337 default: \ 338 delta += (1000000 * xxs); \ 339 } \ 340 } \ 341 } while (/*CONSTCOND*/ 0) 342 343 /* XXX: replace with timercmp(a, b, <) ? */ 344 #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ 345 (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 346 347 #ifdef UPCALL_TIMING 348 #define UPCALL_MAX 50 349 static u_long upcall_data[UPCALL_MAX + 1]; 350 static void collate(struct timeval *); 351 #endif /* UPCALL_TIMING */ 352 353 static int ip6_mrouter_init(struct socket *, int, int); 354 static int add_m6fc(struct mf6ctable *, struct mf6cctl *); 355 static int add_m6if(struct mf6ctable *, int, struct mif6ctl *); 356 static int del_m6fc(struct mf6ctable *, struct mf6cctl *); 357 static int del_m6if(struct mf6ctable *, mifi_t); 358 static int del_m6if_locked(struct mf6ctable *, mifi_t); 359 static int get_mif6_cnt(struct mf6ctable *, struct sioc_mif_req6 *); 360 static int get_sg_cnt(struct mf6ctable *, struct sioc_sg_req6 *); 361 362 VNET_DEFINE_STATIC(struct callout, expire_upcalls_ch); 363 #define V_expire_upcalls_ch VNET(expire_upcalls_ch) 364 365 static int X_ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *); 366 static void X_ip6_mrouter_done(struct socket *); 367 static int X_ip6_mrouter_set(struct socket *, struct sockopt *); 368 static int X_ip6_mrouter_get(struct socket *, struct sockopt *); 369 static int X_mrt6_ioctl(u_long, caddr_t, int); 370 371 static struct mf6c * 372 mf6c_find(const struct mf6ctable *mfct, const struct in6_addr *origin, 373 const struct in6_addr *group) 374 { 375 MFC6_LOCK_ASSERT(); 376 377 for (struct mf6c *rt = mfct->mfchashtbl[MF6CHASH(*origin, *group)]; 378 rt != NULL; rt = rt->mf6c_next) { 379 if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr, origin) && 380 IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr, group) && 381 rt->mf6c_stall == NULL) 382 return (rt); 383 } 384 MRT6STAT_INC(mrt6s_mfc_misses); 385 return (NULL); 386 } 387 388 static struct mf6ctable * 389 somfctable(struct socket *so) 390 { 391 int fib; 392 393 fib = atomic_load_int(&so->so_fibnum); 394 KASSERT(fib >= 0 && fib < V_nmfctables, 395 ("%s: so_fibnum %d out of range", __func__, fib)); 396 return (&V_mfctables[fib]); 397 } 398 399 /* 400 * Handle MRT setsockopt commands to modify the multicast routing tables. 401 */ 402 static int 403 X_ip6_mrouter_set(struct socket *so, struct sockopt *sopt) 404 { 405 struct mf6ctable *mfct; 406 int error = 0; 407 int optval; 408 struct mif6ctl mifc; 409 struct mf6cctl mfcc; 410 mifi_t mifi; 411 412 mfct = somfctable(so); 413 if (so != mfct->router && sopt->sopt_name != MRT6_INIT) 414 return (EPERM); 415 416 switch (sopt->sopt_name) { 417 case MRT6_INIT: 418 #ifdef MRT6_OINIT 419 case MRT6_OINIT: 420 #endif 421 error = sooptcopyin(sopt, &optval, sizeof(optval), 422 sizeof(optval)); 423 if (error) 424 break; 425 error = ip6_mrouter_init(so, optval, sopt->sopt_name); 426 break; 427 case MRT6_DONE: 428 X_ip6_mrouter_done(so); 429 break; 430 case MRT6_ADD_MIF: 431 error = sooptcopyin(sopt, &mifc, sizeof(mifc), sizeof(mifc)); 432 if (error) 433 break; 434 error = add_m6if(mfct, so->so_fibnum, &mifc); 435 break; 436 case MRT6_ADD_MFC: 437 error = sooptcopyin(sopt, &mfcc, sizeof(mfcc), sizeof(mfcc)); 438 if (error) 439 break; 440 error = add_m6fc(mfct, &mfcc); 441 break; 442 case MRT6_DEL_MFC: 443 error = sooptcopyin(sopt, &mfcc, sizeof(mfcc), sizeof(mfcc)); 444 if (error) 445 break; 446 error = del_m6fc(mfct, &mfcc); 447 break; 448 case MRT6_DEL_MIF: 449 error = sooptcopyin(sopt, &mifi, sizeof(mifi), sizeof(mifi)); 450 if (error) 451 break; 452 error = del_m6if(mfct, mifi); 453 break; 454 case MRT6_PIM: 455 error = sooptcopyin(sopt, &optval, sizeof(optval), 456 sizeof(optval)); 457 if (error) 458 break; 459 error = set_pim6(&optval); 460 break; 461 default: 462 error = EOPNOTSUPP; 463 break; 464 } 465 466 return (error); 467 } 468 469 /* 470 * Handle MRT getsockopt commands 471 */ 472 static int 473 X_ip6_mrouter_get(struct socket *so, struct sockopt *sopt) 474 { 475 struct mf6ctable *mfct; 476 int error = 0; 477 478 mfct = somfctable(so); 479 if (so != mfct->router) 480 return (EACCES); 481 482 switch (sopt->sopt_name) { 483 case MRT6_PIM: 484 error = sooptcopyout(sopt, &V_pim6, sizeof(V_pim6)); 485 break; 486 } 487 return (error); 488 } 489 490 /* 491 * Handle ioctl commands to obtain information from the cache 492 */ 493 static int 494 X_mrt6_ioctl(u_long cmd, caddr_t data, int fibnum) 495 { 496 struct mf6ctable *mfct; 497 int error; 498 499 error = priv_check(curthread, PRIV_NETINET_MROUTE); 500 if (error) 501 return (error); 502 503 mfct = &V_mfctables[fibnum]; 504 switch (cmd) { 505 case SIOCGETSGCNT_IN6: 506 error = get_sg_cnt(mfct, (struct sioc_sg_req6 *)data); 507 break; 508 509 case SIOCGETMIFCNT_IN6: 510 error = get_mif6_cnt(mfct, (struct sioc_mif_req6 *)data); 511 break; 512 513 default: 514 error = EINVAL; 515 break; 516 } 517 518 return (error); 519 } 520 521 /* 522 * returns the packet, byte, rpf-failure count for the source group provided 523 */ 524 static int 525 get_sg_cnt(struct mf6ctable *mfct, struct sioc_sg_req6 *req) 526 { 527 struct mf6c *rt; 528 int ret; 529 530 ret = 0; 531 532 MFC6_LOCK(); 533 rt = mf6c_find(mfct, &req->src.sin6_addr, &req->grp.sin6_addr); 534 if (rt == NULL) { 535 ret = ESRCH; 536 } else { 537 req->pktcnt = rt->mf6c_pkt_cnt; 538 req->bytecnt = rt->mf6c_byte_cnt; 539 req->wrong_if = rt->mf6c_wrong_if; 540 } 541 MFC6_UNLOCK(); 542 543 return (ret); 544 } 545 546 /* 547 * returns the input and output packet and byte counts on the mif provided 548 */ 549 static int 550 get_mif6_cnt(struct mf6ctable *mfct, struct sioc_mif_req6 *req) 551 { 552 mifi_t mifi; 553 int ret; 554 555 ret = 0; 556 mifi = req->mifi; 557 558 MIF6_LOCK(); 559 560 if (mifi >= mfct->nummifs) { 561 ret = EINVAL; 562 } else { 563 struct mif6 *mif = &mfct->miftable[mifi]; 564 565 req->icount = mif->m6_pkt_in; 566 req->ocount = mif->m6_pkt_out; 567 req->ibytes = mif->m6_bytes_in; 568 req->obytes = mif->m6_bytes_out; 569 } 570 571 MIF6_UNLOCK(); 572 573 return (ret); 574 } 575 576 static int 577 set_pim6(int *i) 578 { 579 if ((*i != 1) && (*i != 0)) 580 return (EINVAL); 581 582 /* XXX-MJ */ 583 V_pim6 = *i; 584 585 return (0); 586 } 587 588 /* 589 * Enable multicast routing 590 */ 591 static int 592 ip6_mrouter_init(struct socket *so, int v, int cmd) 593 { 594 struct mf6ctable *mfct; 595 596 MRT6_DLOG(DEBUG_ANY, "%s: socket %p", __func__, so); 597 598 if (v != 1) 599 return (ENOPROTOOPT); 600 601 mfct = somfctable(so); 602 MROUTER6_LOCK(); 603 604 if (mfct->router != NULL) { 605 MROUTER6_UNLOCK(); 606 return (EADDRINUSE); 607 } 608 609 MFC6_LOCK(); 610 V_ip6_mrouting_enabled = true; 611 mfct->router = so; 612 mfct->router_ver = cmd; 613 614 bzero(&mfct->mfchashtbl, sizeof(mfct->mfchashtbl)); 615 bzero(&mfct->nexpire, sizeof(mfct->nexpire)); 616 617 V_pim6 = 0;/* used for stubbing out/in pim stuff */ 618 619 callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls_all, 620 curvnet); 621 622 MFC6_UNLOCK(); 623 MROUTER6_UNLOCK(); 624 625 MRT6_DLOG(DEBUG_ANY, "finished"); 626 627 return (0); 628 } 629 630 /* 631 * Disable IPv6 multicast forwarding. 632 */ 633 static void 634 X_ip6_mrouter_done(struct socket *so) 635 { 636 struct mf6ctable *mfct; 637 mifi_t mifi; 638 u_long i; 639 struct mf6c *rt; 640 struct rtdetq *rte; 641 642 mfct = somfctable(so); 643 MROUTER6_LOCK(); 644 645 if (mfct->router != so) { 646 MROUTER6_UNLOCK(); 647 return; 648 } 649 650 /* 651 * For each phyint in use, disable promiscuous reception of all IPv6 652 * multicasts. 653 */ 654 for (mifi = 0; mifi < mfct->nummifs; mifi++) { 655 struct mif6 *mif = &mfct->miftable[mifi]; 656 657 if (mif->m6_ifp && !(mif->m6_flags & MIFF_REGISTER)) { 658 if_allmulti(mif->m6_ifp, 0); 659 } 660 } 661 MFC6_LOCK(); 662 bzero(mfct->miftable, sizeof(mfct->miftable)); 663 mfct->nummifs = 0; 664 665 V_pim6 = 0; /* used to stub out/in pim specific code */ 666 667 /* 668 * Free all multicast forwarding cache entries. 669 */ 670 for (i = 0; i < MF6CTBLSIZ; i++) { 671 rt = mfct->mfchashtbl[i]; 672 while (rt) { 673 struct mf6c *frt; 674 675 for (rte = rt->mf6c_stall; rte != NULL; ) { 676 struct rtdetq *n = rte->next; 677 678 m_freem(rte->m); 679 free(rte, M_MRTABLE6); 680 rte = n; 681 } 682 frt = rt; 683 rt = rt->mf6c_next; 684 free(frt, M_MRTABLE6); 685 } 686 } 687 mfct->router = NULL; 688 mfct->router_ver = 0; 689 V_ip6_mrouting_enabled = false; 690 691 bzero(mfct->mfchashtbl, sizeof(mfct->mfchashtbl)); 692 MFC6_UNLOCK(); 693 694 /* 695 * Reset register interface 696 */ 697 if (mfct->register_mif != (mifi_t)-1 && mfct->register_if != NULL) { 698 if_detach(mfct->register_if); 699 if_free(mfct->register_if); 700 mfct->register_mif = (mifi_t)-1; 701 mfct->register_if = NULL; 702 } 703 704 MROUTER6_UNLOCK(); 705 MRT6_DLOG(DEBUG_ANY, "finished"); 706 } 707 708 static struct sockaddr_in6 sin6 = { sizeof(sin6), AF_INET6 }; 709 710 /* 711 * Add a mif to the mif table 712 */ 713 static int 714 add_m6if(struct mf6ctable *mfct, int fibnum, struct mif6ctl *mifcp) 715 { 716 struct epoch_tracker et; 717 struct mif6 *mifp; 718 struct ifnet *ifp; 719 int error; 720 721 MIF6_LOCK(); 722 723 if (mifcp->mif6c_mifi >= MAXMIFS) { 724 MIF6_UNLOCK(); 725 return (EINVAL); 726 } 727 mifp = &mfct->miftable[mifcp->mif6c_mifi]; 728 if (mifp->m6_ifp != NULL) { 729 MIF6_UNLOCK(); 730 return (EADDRINUSE); /* XXX: is it appropriate? */ 731 } 732 733 NET_EPOCH_ENTER(et); 734 if ((ifp = ifnet_byindex(mifcp->mif6c_pifi)) == NULL) { 735 NET_EPOCH_EXIT(et); 736 MIF6_UNLOCK(); 737 return (ENXIO); 738 } 739 NET_EPOCH_EXIT(et); /* XXXGL: unsafe ifp */ 740 741 if (mifcp->mif6c_flags & MIFF_REGISTER) { 742 if (mfct->register_mif == (mifi_t)-1) { 743 ifp = if_alloc(IFT_OTHER); 744 745 if_initname(ifp, "register_mif", 0); 746 ifp->if_flags |= IFF_LOOPBACK; 747 if_attach(ifp); 748 mfct->register_if = ifp; 749 mfct->register_mif = mifcp->mif6c_mifi; 750 /* 751 * it is impossible to guess the ifindex of the 752 * register interface. So mif6c_pifi is automatically 753 * calculated. 754 */ 755 mifcp->mif6c_pifi = ifp->if_index; 756 } else { 757 ifp = mfct->register_if; 758 } 759 } else { 760 /* Make sure the interface supports multicast */ 761 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 762 MIF6_UNLOCK(); 763 return (EOPNOTSUPP); 764 } 765 if (ifp->if_fib != fibnum) { 766 MIF6_UNLOCK(); 767 return (EADDRNOTAVAIL); 768 } 769 770 error = if_allmulti(ifp, 1); 771 if (error) { 772 MIF6_UNLOCK(); 773 return (error); 774 } 775 } 776 777 mifp->m6_flags = mifcp->mif6c_flags; 778 mifp->m6_ifp = ifp; 779 780 /* initialize per mif pkt counters */ 781 mifp->m6_pkt_in = 0; 782 mifp->m6_pkt_out = 0; 783 mifp->m6_bytes_in = 0; 784 mifp->m6_bytes_out = 0; 785 786 /* Adjust nummifs up if the mifi is higher than nummifs */ 787 if (mfct->nummifs <= mifcp->mif6c_mifi) 788 mfct->nummifs = mifcp->mif6c_mifi + 1; 789 790 MIF6_UNLOCK(); 791 MRT6_DLOG(DEBUG_ANY, "mif #%d, phyint %s", mifcp->mif6c_mifi, 792 if_name(ifp)); 793 794 return (0); 795 } 796 797 static void 798 expire_mf6c(struct mf6c *mfc) 799 { 800 struct rtdetq *rte; 801 802 while ((rte = mfc->mf6c_stall) != NULL) { 803 mfc->mf6c_stall = rte->next; 804 m_freem(rte->m); 805 free(rte, M_MRTABLE6); 806 } 807 808 free(mfc, M_MRTABLE6); 809 } 810 811 /* 812 * Delete a mif from the mif table 813 */ 814 static int 815 del_m6if_locked(struct mf6ctable *mfct, mifi_t mifi) 816 { 817 struct mif6 *mifp; 818 mifi_t tmp; 819 struct ifnet *ifp; 820 821 MIF6_LOCK_ASSERT(); 822 823 if (mifi >= mfct->nummifs) 824 return (EINVAL); 825 mifp = &mfct->miftable[mifi]; 826 if (mifp->m6_ifp == NULL) 827 return (EINVAL); 828 829 if (!(mifp->m6_flags & MIFF_REGISTER)) { 830 ifp = mifp->m6_ifp; 831 if_allmulti(ifp, 0); 832 833 MFC6_LOCK(); 834 for (int i = 0; i < MF6CTBLSIZ; i++) { 835 struct mf6c *mfc, **nmfc; 836 837 nmfc = &mfct->mfchashtbl[i]; 838 while ((mfc = *nmfc) != NULL) { 839 if (mfc->mf6c_parent == mifi) { 840 *nmfc = mfc->mf6c_next; 841 if (mfc->mf6c_expire) 842 mfct->nexpire[i]--; 843 expire_mf6c(mfc); 844 } else { 845 /* Remove this mif from the ifset */ 846 if (IF_ISSET(mifi, &mfc->mf6c_ifset)) 847 IF_CLR(mifi, &mfc->mf6c_ifset); 848 nmfc = &mfc->mf6c_next; 849 } 850 } 851 } 852 MFC6_UNLOCK(); 853 } else { 854 if (mfct->register_mif != (mifi_t)-1 && 855 mfct->register_if != NULL) { 856 if_detach(mfct->register_if); 857 if_free(mfct->register_if); 858 mfct->register_mif = (mifi_t)-1; 859 mfct->register_if = NULL; 860 } 861 } 862 863 bzero(mifp, sizeof(*mifp)); 864 865 /* Adjust nummifs down */ 866 for (tmp = mfct->nummifs; tmp > 0; tmp--) 867 if (mfct->miftable[tmp - 1].m6_ifp != NULL) 868 break; 869 mfct->nummifs = tmp; 870 MRT6_DLOG(DEBUG_ANY, "mif %d, nummifs %d", mifi, mfct->nummifs); 871 872 return (0); 873 } 874 875 static int 876 del_m6if(struct mf6ctable *mfct, mifi_t mifi) 877 { 878 int cc; 879 880 MIF6_LOCK(); 881 cc = del_m6if_locked(mfct, mifi); 882 MIF6_UNLOCK(); 883 884 return (cc); 885 } 886 887 /* 888 * Add an mfc entry 889 */ 890 static int 891 add_m6fc(struct mf6ctable *mfct, struct mf6cctl *mfccp) 892 { 893 struct mf6c *rt; 894 u_long hash; 895 struct rtdetq *rte; 896 u_short nstl; 897 char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN]; 898 899 MFC6_LOCK(); 900 rt = mf6c_find(mfct, &mfccp->mf6cc_origin.sin6_addr, 901 &mfccp->mf6cc_mcastgrp.sin6_addr); 902 /* If an entry already exists, just update the fields */ 903 if (rt) { 904 MRT6_DLOG(DEBUG_MFC, "no upcall o %s g %s p %x", 905 ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr), 906 ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr), 907 mfccp->mf6cc_parent); 908 909 rt->mf6c_parent = mfccp->mf6cc_parent; 910 rt->mf6c_ifset = mfccp->mf6cc_ifset; 911 912 MFC6_UNLOCK(); 913 return (0); 914 } 915 916 /* 917 * Find the entry for which the upcall was made and update 918 */ 919 hash = MF6CHASH(mfccp->mf6cc_origin.sin6_addr, 920 mfccp->mf6cc_mcastgrp.sin6_addr); 921 for (rt = mfct->mfchashtbl[hash], nstl = 0; rt; rt = rt->mf6c_next) { 922 if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr, 923 &mfccp->mf6cc_origin.sin6_addr) && 924 IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr, 925 &mfccp->mf6cc_mcastgrp.sin6_addr) && 926 (rt->mf6c_stall != NULL)) { 927 if (nstl++) 928 log(LOG_ERR, 929 "add_m6fc: %s o %s g %s p %x dbx %p\n", 930 "multiple kernel entries", 931 ip6_sprintf(ip6bufo, 932 &mfccp->mf6cc_origin.sin6_addr), 933 ip6_sprintf(ip6bufg, 934 &mfccp->mf6cc_mcastgrp.sin6_addr), 935 mfccp->mf6cc_parent, rt->mf6c_stall); 936 937 MRT6_DLOG(DEBUG_MFC, "o %s g %s p %x dbg %p", 938 ip6_sprintf(ip6bufo, 939 &mfccp->mf6cc_origin.sin6_addr), 940 ip6_sprintf(ip6bufg, 941 &mfccp->mf6cc_mcastgrp.sin6_addr), 942 mfccp->mf6cc_parent, rt->mf6c_stall); 943 944 rt->mf6c_origin = mfccp->mf6cc_origin; 945 rt->mf6c_mcastgrp = mfccp->mf6cc_mcastgrp; 946 rt->mf6c_parent = mfccp->mf6cc_parent; 947 rt->mf6c_ifset = mfccp->mf6cc_ifset; 948 /* initialize pkt counters per src-grp */ 949 rt->mf6c_pkt_cnt = 0; 950 rt->mf6c_byte_cnt = 0; 951 rt->mf6c_wrong_if = 0; 952 953 rt->mf6c_expire = 0; /* Don't clean this guy up */ 954 mfct->nexpire[hash]--; 955 956 /* free packets Qed at the end of this entry */ 957 for (rte = rt->mf6c_stall; rte != NULL; ) { 958 struct rtdetq *n = rte->next; 959 ip6_mdq(mfct, rte->m, rte->ifp, rt); 960 m_freem(rte->m); 961 #ifdef UPCALL_TIMING 962 collate(&(rte->t)); 963 #endif /* UPCALL_TIMING */ 964 free(rte, M_MRTABLE6); 965 rte = n; 966 } 967 rt->mf6c_stall = NULL; 968 } 969 } 970 971 /* 972 * It is possible that an entry is being inserted without an upcall 973 */ 974 if (nstl == 0) { 975 MRT6_DLOG(DEBUG_MFC, "no upcall h %lu o %s g %s p %x", hash, 976 ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr), 977 ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr), 978 mfccp->mf6cc_parent); 979 980 for (rt = mfct->mfchashtbl[hash]; rt; rt = rt->mf6c_next) { 981 if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr, 982 &mfccp->mf6cc_origin.sin6_addr)&& 983 IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr, 984 &mfccp->mf6cc_mcastgrp.sin6_addr)) { 985 rt->mf6c_origin = mfccp->mf6cc_origin; 986 rt->mf6c_mcastgrp = mfccp->mf6cc_mcastgrp; 987 rt->mf6c_parent = mfccp->mf6cc_parent; 988 rt->mf6c_ifset = mfccp->mf6cc_ifset; 989 /* initialize pkt counters per src-grp */ 990 rt->mf6c_pkt_cnt = 0; 991 rt->mf6c_byte_cnt = 0; 992 rt->mf6c_wrong_if = 0; 993 994 if (rt->mf6c_expire) 995 mfct->nexpire[hash]--; 996 rt->mf6c_expire = 0; 997 } 998 } 999 if (rt == NULL) { 1000 /* no upcall, so make a new entry */ 1001 rt = malloc(sizeof(*rt), M_MRTABLE6, M_NOWAIT); 1002 if (rt == NULL) { 1003 MFC6_UNLOCK(); 1004 return (ENOBUFS); 1005 } 1006 1007 /* insert new entry at head of hash chain */ 1008 rt->mf6c_origin = mfccp->mf6cc_origin; 1009 rt->mf6c_mcastgrp = mfccp->mf6cc_mcastgrp; 1010 rt->mf6c_parent = mfccp->mf6cc_parent; 1011 rt->mf6c_ifset = mfccp->mf6cc_ifset; 1012 /* initialize pkt counters per src-grp */ 1013 rt->mf6c_pkt_cnt = 0; 1014 rt->mf6c_byte_cnt = 0; 1015 rt->mf6c_wrong_if = 0; 1016 rt->mf6c_expire = 0; 1017 rt->mf6c_stall = NULL; 1018 1019 /* link into table */ 1020 rt->mf6c_next = mfct->mfchashtbl[hash]; 1021 mfct->mfchashtbl[hash] = rt; 1022 } 1023 } 1024 1025 MFC6_UNLOCK(); 1026 return (0); 1027 } 1028 1029 #ifdef UPCALL_TIMING 1030 /* 1031 * collect delay statistics on the upcalls 1032 */ 1033 static void 1034 collate(struct timeval *t) 1035 { 1036 u_long d; 1037 struct timeval tp; 1038 u_long delta; 1039 1040 GET_TIME(tp); 1041 1042 if (TV_LT(*t, tp)) 1043 { 1044 TV_DELTA(tp, *t, delta); 1045 1046 d = delta >> 10; 1047 if (d > UPCALL_MAX) 1048 d = UPCALL_MAX; 1049 1050 ++upcall_data[d]; 1051 } 1052 } 1053 #endif /* UPCALL_TIMING */ 1054 1055 /* 1056 * Delete an mfc entry 1057 */ 1058 static int 1059 del_m6fc(struct mf6ctable *mfct, struct mf6cctl *mfccp) 1060 { 1061 #ifdef MRT6DEBUG 1062 char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN]; 1063 #endif 1064 struct sockaddr_in6 origin; 1065 struct sockaddr_in6 mcastgrp; 1066 struct mf6c *rt; 1067 struct mf6c **nptr; 1068 u_long hash; 1069 1070 origin = mfccp->mf6cc_origin; 1071 mcastgrp = mfccp->mf6cc_mcastgrp; 1072 hash = MF6CHASH(origin.sin6_addr, mcastgrp.sin6_addr); 1073 1074 MRT6_DLOG(DEBUG_MFC, "orig %s mcastgrp %s", 1075 ip6_sprintf(ip6bufo, &origin.sin6_addr), 1076 ip6_sprintf(ip6bufg, &mcastgrp.sin6_addr)); 1077 1078 MFC6_LOCK(); 1079 1080 nptr = &mfct->mfchashtbl[hash]; 1081 while ((rt = *nptr) != NULL) { 1082 if (IN6_ARE_ADDR_EQUAL(&origin.sin6_addr, 1083 &rt->mf6c_origin.sin6_addr) && 1084 IN6_ARE_ADDR_EQUAL(&mcastgrp.sin6_addr, 1085 &rt->mf6c_mcastgrp.sin6_addr) && 1086 rt->mf6c_stall == NULL) 1087 break; 1088 1089 nptr = &rt->mf6c_next; 1090 } 1091 if (rt == NULL) { 1092 MFC6_UNLOCK(); 1093 return (EADDRNOTAVAIL); 1094 } 1095 1096 *nptr = rt->mf6c_next; 1097 free(rt, M_MRTABLE6); 1098 1099 MFC6_UNLOCK(); 1100 1101 return (0); 1102 } 1103 1104 static int 1105 socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in6 *src) 1106 { 1107 1108 if (s) { 1109 if (sbappendaddr(&s->so_rcv, 1110 (struct sockaddr *)src, 1111 mm, (struct mbuf *)0) != 0) { 1112 sorwakeup(s); 1113 return (0); 1114 } else 1115 soroverflow(s); 1116 } 1117 m_freem(mm); 1118 return (-1); 1119 } 1120 1121 /* 1122 * IPv6 multicast forwarding function. This function assumes that the packet 1123 * pointed to by "ip6" has arrived on (or is about to be sent to) the interface 1124 * pointed to by "ifp", and the packet is to be relayed to other networks 1125 * that have members of the packet's destination IPv6 multicast group. 1126 * 1127 * The packet is returned unscathed to the caller, unless it is 1128 * erroneous, in which case a non-zero return value tells the caller to 1129 * discard it. 1130 * 1131 * NOTE: this implementation assumes that m->m_pkthdr.rcvif is NULL iff 1132 * this function is called in the originating context (i.e., not when 1133 * forwarding a packet from other node). ip6_output(), which is currently the 1134 * only function that calls this function is called in the originating context, 1135 * explicitly ensures this condition. It is caller's responsibility to ensure 1136 * that if this function is called from somewhere else in the originating 1137 * context in the future. 1138 */ 1139 static int 1140 X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m) 1141 { 1142 struct mf6ctable *mfct; 1143 struct rtdetq *rte; 1144 struct mbuf *mb0; 1145 struct mf6c *rt; 1146 struct mif6 *mifp; 1147 struct mbuf *mm; 1148 u_long hash; 1149 mifi_t mifi; 1150 char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; 1151 #ifdef UPCALL_TIMING 1152 struct timeval tp; 1153 1154 GET_TIME(tp); 1155 #endif /* UPCALL_TIMING */ 1156 1157 M_ASSERTMAPPED(m); 1158 MRT6_DLOG(DEBUG_FORWARD, "src %s, dst %s, ifindex %d", 1159 ip6_sprintf(ip6bufs, &ip6->ip6_src), 1160 ip6_sprintf(ip6bufd, &ip6->ip6_dst), ifp->if_index); 1161 1162 /* 1163 * Don't forward a packet with Hop limit of zero or one, 1164 * or a packet destined to a local-only group. 1165 */ 1166 if (ip6->ip6_hlim <= 1 || IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) || 1167 IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst)) 1168 return (0); 1169 ip6->ip6_hlim--; 1170 1171 /* 1172 * Source address check: do not forward packets with unspecified 1173 * source. It was discussed in July 2000, on ipngwg mailing list. 1174 * This is rather more serious than unicast cases, because some 1175 * MLD packets can be sent with the unspecified source address 1176 * (although such packets must normally set 1 to the hop limit field). 1177 */ 1178 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { 1179 IP6STAT_INC(ip6s_cantforward); 1180 if (V_ip6_log_cannot_forward && ip6_log_ratelimit()) { 1181 log(LOG_DEBUG, 1182 "cannot forward " 1183 "from %s to %s nxt %d received on %s\n", 1184 ip6_sprintf(ip6bufs, &ip6->ip6_src), 1185 ip6_sprintf(ip6bufd, &ip6->ip6_dst), 1186 ip6->ip6_nxt, 1187 if_name(m->m_pkthdr.rcvif)); 1188 } 1189 return (0); 1190 } 1191 1192 mfct = &V_mfctables[M_GETFIB(m)]; 1193 MFC6_LOCK(); 1194 if (__predict_false(mfct->router == NULL)) { 1195 MFC6_UNLOCK(); 1196 return (EADDRNOTAVAIL); 1197 } 1198 1199 /* 1200 * Determine forwarding mifs from the forwarding cache table 1201 */ 1202 rt = mf6c_find(mfct, &ip6->ip6_src, &ip6->ip6_dst); 1203 MRT6STAT_INC(mrt6s_mfc_lookups); 1204 1205 /* Entry exists, so forward if necessary */ 1206 if (rt) { 1207 MFC6_UNLOCK(); 1208 return (ip6_mdq(mfct, m, ifp, rt)); 1209 } 1210 1211 /* 1212 * If we don't have a route for packet's origin, 1213 * Make a copy of the packet & send message to routing daemon. 1214 */ 1215 MRT6STAT_INC(mrt6s_no_route); 1216 MRT6_DLOG(DEBUG_FORWARD | DEBUG_MFC, "no rte s %s g %s", 1217 ip6_sprintf(ip6bufs, &ip6->ip6_src), 1218 ip6_sprintf(ip6bufd, &ip6->ip6_dst)); 1219 1220 /* 1221 * Allocate mbufs early so that we don't do extra work if we 1222 * are just going to fail anyway. 1223 */ 1224 rte = malloc(sizeof(*rte), M_MRTABLE6, M_NOWAIT); 1225 if (rte == NULL) { 1226 MFC6_UNLOCK(); 1227 return (ENOBUFS); 1228 } 1229 mb0 = m_copym(m, 0, M_COPYALL, M_NOWAIT); 1230 /* 1231 * Pullup packet header if needed before storing it, 1232 * as other references may modify it in the meantime. 1233 */ 1234 if (mb0 && (!M_WRITABLE(mb0) || mb0->m_len < sizeof(struct ip6_hdr))) 1235 mb0 = m_pullup(mb0, sizeof(struct ip6_hdr)); 1236 if (mb0 == NULL) { 1237 free(rte, M_MRTABLE6); 1238 MFC6_UNLOCK(); 1239 return (ENOBUFS); 1240 } 1241 1242 /* is there an upcall waiting for this packet? */ 1243 hash = MF6CHASH(ip6->ip6_src, ip6->ip6_dst); 1244 for (rt = mfct->mfchashtbl[hash]; rt; rt = rt->mf6c_next) { 1245 if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, 1246 &rt->mf6c_origin.sin6_addr) && 1247 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, 1248 &rt->mf6c_mcastgrp.sin6_addr) && (rt->mf6c_stall != NULL)) 1249 break; 1250 } 1251 1252 if (rt == NULL) { 1253 struct mrt6msg *im; 1254 #ifdef MRT6_OINIT 1255 struct omrt6msg *oim; 1256 #endif 1257 /* no upcall, so make a new entry */ 1258 rt = malloc(sizeof(*rt), M_MRTABLE6, M_NOWAIT); 1259 if (rt == NULL) { 1260 free(rte, M_MRTABLE6); 1261 m_freem(mb0); 1262 MFC6_UNLOCK(); 1263 return (ENOBUFS); 1264 } 1265 /* 1266 * Make a copy of the header to send to the user 1267 * level process 1268 */ 1269 mm = m_copym(mb0, 0, sizeof(struct ip6_hdr), M_NOWAIT); 1270 if (mm == NULL) { 1271 free(rte, M_MRTABLE6); 1272 m_freem(mb0); 1273 free(rt, M_MRTABLE6); 1274 MFC6_UNLOCK(); 1275 return (ENOBUFS); 1276 } 1277 1278 /* 1279 * Send message to routing daemon 1280 */ 1281 sin6.sin6_addr = ip6->ip6_src; 1282 im = NULL; 1283 #ifdef MRT6_OINIT 1284 oim = NULL; 1285 #endif 1286 switch (mfct->router_ver) { 1287 #ifdef MRT6_OINIT 1288 case MRT6_OINIT: 1289 oim = mtod(mm, struct omrt6msg *); 1290 oim->im6_msgtype = MRT6MSG_NOCACHE; 1291 oim->im6_mbz = 0; 1292 break; 1293 #endif 1294 case MRT6_INIT: 1295 im = mtod(mm, struct mrt6msg *); 1296 im->im6_msgtype = MRT6MSG_NOCACHE; 1297 im->im6_mbz = 0; 1298 break; 1299 default: 1300 free(rte, M_MRTABLE6); 1301 m_freem(mb0); 1302 free(rt, M_MRTABLE6); 1303 MFC6_UNLOCK(); 1304 return (EINVAL); 1305 } 1306 1307 MRT6_DLOG(DEBUG_FORWARD, "getting the iif info in the kernel"); 1308 for (mifp = mfct->miftable, mifi = 0; 1309 mifi < mfct->nummifs && mifp->m6_ifp != ifp; mifp++, mifi++) 1310 ; 1311 1312 switch (mfct->router_ver) { 1313 #ifdef MRT6_OINIT 1314 case MRT6_OINIT: 1315 oim->im6_mif = mifi; 1316 break; 1317 #endif 1318 case MRT6_INIT: 1319 im->im6_mif = mifi; 1320 break; 1321 } 1322 1323 if (socket_send(mfct->router, mm, &sin6) < 0) { 1324 log(LOG_WARNING, "ip6_mforward: ip6_mrouter " 1325 "socket queue full\n"); 1326 MRT6STAT_INC(mrt6s_upq_sockfull); 1327 free(rte, M_MRTABLE6); 1328 m_freem(mb0); 1329 free(rt, M_MRTABLE6); 1330 MFC6_UNLOCK(); 1331 return (ENOBUFS); 1332 } 1333 1334 MRT6STAT_INC(mrt6s_upcalls); 1335 1336 /* insert new entry at head of hash chain */ 1337 bzero(rt, sizeof(*rt)); 1338 rt->mf6c_origin.sin6_family = AF_INET6; 1339 rt->mf6c_origin.sin6_len = sizeof(struct sockaddr_in6); 1340 rt->mf6c_origin.sin6_addr = ip6->ip6_src; 1341 rt->mf6c_mcastgrp.sin6_family = AF_INET6; 1342 rt->mf6c_mcastgrp.sin6_len = sizeof(struct sockaddr_in6); 1343 rt->mf6c_mcastgrp.sin6_addr = ip6->ip6_dst; 1344 rt->mf6c_expire = UPCALL_EXPIRE; 1345 mfct->nexpire[hash]++; 1346 rt->mf6c_parent = MF6C_INCOMPLETE_PARENT; 1347 1348 /* link into table */ 1349 rt->mf6c_next = mfct->mfchashtbl[hash]; 1350 mfct->mfchashtbl[hash] = rt; 1351 /* Add this entry to the end of the queue */ 1352 rt->mf6c_stall = rte; 1353 } else { 1354 /* determine if q has overflowed */ 1355 struct rtdetq **p; 1356 int npkts = 0; 1357 1358 for (p = &rt->mf6c_stall; *p != NULL; p = &(*p)->next) 1359 if (++npkts > MAX_UPQ6) { 1360 MRT6STAT_INC(mrt6s_upq_ovflw); 1361 free(rte, M_MRTABLE6); 1362 m_freem(mb0); 1363 MFC6_UNLOCK(); 1364 return (0); 1365 } 1366 1367 /* Add this entry to the end of the queue */ 1368 *p = rte; 1369 } 1370 1371 rte->next = NULL; 1372 rte->m = mb0; 1373 rte->ifp = ifp; 1374 #ifdef UPCALL_TIMING 1375 rte->t = tp; 1376 #endif /* UPCALL_TIMING */ 1377 1378 MFC6_UNLOCK(); 1379 1380 return (0); 1381 } 1382 1383 /* 1384 * Clean up cache entries if upcalls are not serviced 1385 * Call from the Slow Timeout mechanism, every half second. 1386 */ 1387 static void 1388 expire_upcalls(struct mf6ctable *mfct) 1389 { 1390 #ifdef MRT6DEBUG 1391 char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN]; 1392 #endif 1393 struct rtdetq *rte; 1394 struct mf6c *mfc, **nptr; 1395 u_long i; 1396 1397 MFC6_LOCK_ASSERT(); 1398 1399 for (i = 0; i < MF6CTBLSIZ; i++) { 1400 if (mfct->nexpire[i] == 0) 1401 continue; 1402 nptr = &mfct->mfchashtbl[i]; 1403 while ((mfc = *nptr) != NULL) { 1404 rte = mfc->mf6c_stall; 1405 /* 1406 * Skip real cache entries 1407 * Make sure it wasn't marked to not expire (shouldn't happen) 1408 * If it expires now 1409 */ 1410 if (rte != NULL && 1411 mfc->mf6c_expire != 0 && 1412 --mfc->mf6c_expire == 0) { 1413 MRT6_DLOG(DEBUG_EXPIRE, "expiring (%s %s)", 1414 ip6_sprintf(ip6bufo, &mfc->mf6c_origin.sin6_addr), 1415 ip6_sprintf(ip6bufg, &mfc->mf6c_mcastgrp.sin6_addr)); 1416 /* 1417 * drop all the packets 1418 * free the mbuf with the pkt, if, timing info 1419 */ 1420 do { 1421 struct rtdetq *n = rte->next; 1422 m_freem(rte->m); 1423 free(rte, M_MRTABLE6); 1424 rte = n; 1425 } while (rte != NULL); 1426 MRT6STAT_INC(mrt6s_cache_cleanups); 1427 mfct->nexpire[i]--; 1428 1429 *nptr = mfc->mf6c_next; 1430 free(mfc, M_MRTABLE6); 1431 } else { 1432 nptr = &mfc->mf6c_next; 1433 } 1434 } 1435 } 1436 } 1437 1438 /* 1439 * Clean up the cache entry if upcall is not serviced 1440 */ 1441 static void 1442 expire_upcalls_all(void *arg) 1443 { 1444 CURVNET_SET((struct vnet *)arg); 1445 1446 for (int i = 0; i < V_nmfctables; i++) 1447 expire_upcalls(&V_mfctables[i]); 1448 1449 callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls_all, 1450 curvnet); 1451 1452 CURVNET_RESTORE(); 1453 } 1454 1455 /* 1456 * Packet forwarding routine once entry in the cache is made 1457 */ 1458 static int 1459 ip6_mdq(struct mf6ctable *mfct, struct mbuf *m, struct ifnet *ifp, 1460 struct mf6c *rt) 1461 { 1462 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 1463 mifi_t mifi, iif; 1464 struct mif6 *mifp; 1465 int plen = m->m_pkthdr.len; 1466 struct in6_addr src0, dst0; /* copies for local work */ 1467 u_int32_t iszone, idzone, oszone, odzone; 1468 int error = 0; 1469 1470 M_ASSERTMAPPED(m); 1471 1472 /* 1473 * Don't forward if it didn't arrive from the parent mif 1474 * for its origin. 1475 */ 1476 mifi = rt->mf6c_parent; 1477 if (mifi >= mfct->nummifs || mfct->miftable[mifi].m6_ifp != ifp) { 1478 MRT6STAT_INC(mrt6s_wrong_if); 1479 rt->mf6c_wrong_if++; 1480 if (mifi >= mfct->nummifs) 1481 return (0); 1482 1483 mifp = &mfct->miftable[mifi]; 1484 MRT6_DLOG(DEBUG_FORWARD, 1485 "wrong if: ifid %d mifi %d mififid %x", ifp->if_index, 1486 mifi, mifp->m6_ifp->if_index); 1487 1488 /* 1489 * If we are doing PIM processing, and we are forwarding 1490 * packets on this interface, send a message to the 1491 * routing daemon. 1492 */ 1493 /* have to make sure this is a valid mif */ 1494 if (mifp->m6_ifp && V_pim6 && (m->m_flags & M_LOOP) == 0) { 1495 /* 1496 * Check the M_LOOP flag to avoid an 1497 * unnecessary PIM assert. 1498 * XXX: M_LOOP is an ad-hoc hack... 1499 */ 1500 static struct sockaddr_in6 sin6 = 1501 { sizeof(sin6), AF_INET6 }; 1502 1503 struct mbuf *mm; 1504 struct mrt6msg *im; 1505 #ifdef MRT6_OINIT 1506 struct omrt6msg *oim; 1507 #endif 1508 1509 mm = m_copym(m, 0, sizeof(struct ip6_hdr), 1510 M_NOWAIT); 1511 if (mm && 1512 (!M_WRITABLE(mm) || 1513 mm->m_len < sizeof(struct ip6_hdr))) 1514 mm = m_pullup(mm, sizeof(struct ip6_hdr)); 1515 if (mm == NULL) 1516 return (ENOBUFS); 1517 1518 #ifdef MRT6_OINIT 1519 oim = NULL; 1520 #endif 1521 im = NULL; 1522 switch (mfct->router_ver) { 1523 #ifdef MRT6_OINIT 1524 case MRT6_OINIT: 1525 oim = mtod(mm, struct omrt6msg *); 1526 oim->im6_msgtype = MRT6MSG_WRONGMIF; 1527 oim->im6_mbz = 0; 1528 break; 1529 #endif 1530 case MRT6_INIT: 1531 im = mtod(mm, struct mrt6msg *); 1532 im->im6_msgtype = MRT6MSG_WRONGMIF; 1533 im->im6_mbz = 0; 1534 break; 1535 default: 1536 m_freem(mm); 1537 return (EINVAL); 1538 } 1539 1540 for (mifp = mfct->miftable, iif = 0; 1541 iif < mfct->nummifs && mifp->m6_ifp != ifp; 1542 mifp++, iif++) 1543 ; 1544 1545 switch (mfct->router_ver) { 1546 #ifdef MRT6_OINIT 1547 case MRT6_OINIT: 1548 oim->im6_mif = iif; 1549 sin6.sin6_addr = oim->im6_src; 1550 break; 1551 #endif 1552 case MRT6_INIT: 1553 im->im6_mif = iif; 1554 sin6.sin6_addr = im->im6_src; 1555 break; 1556 } 1557 1558 MRT6STAT_INC(mrt6s_upcalls); 1559 1560 if (socket_send(mfct->router, mm, &sin6) < 0) { 1561 MRT6_DLOG(DEBUG_ANY, 1562 "ip6_mrouter socket queue full"); 1563 MRT6STAT_INC(mrt6s_upq_sockfull); 1564 return (ENOBUFS); 1565 } 1566 } 1567 return (0); 1568 } 1569 1570 mifp = &mfct->miftable[mifi]; 1571 1572 /* If I sourced this packet, it counts as output, else it was input. */ 1573 if (m->m_pkthdr.rcvif == NULL) { 1574 /* XXX: is rcvif really NULL when output?? */ 1575 mifp->m6_pkt_out++; 1576 mifp->m6_bytes_out += plen; 1577 } else { 1578 mifp->m6_pkt_in++; 1579 mifp->m6_bytes_in += plen; 1580 } 1581 rt->mf6c_pkt_cnt++; 1582 rt->mf6c_byte_cnt += plen; 1583 1584 /* 1585 * For each mif, forward a copy of the packet if there are group 1586 * members downstream on the interface. 1587 */ 1588 src0 = ip6->ip6_src; 1589 dst0 = ip6->ip6_dst; 1590 if ((error = in6_setscope(&src0, ifp, &iszone)) != 0 || 1591 (error = in6_setscope(&dst0, ifp, &idzone)) != 0) { 1592 IP6STAT_INC(ip6s_badscope); 1593 return (error); 1594 } 1595 for (mifp = mfct->miftable, mifi = 0; mifi < mfct->nummifs; 1596 mifp++, mifi++) { 1597 if (IF_ISSET(mifi, &rt->mf6c_ifset)) { 1598 /* 1599 * check if the outgoing packet is going to break 1600 * a scope boundary. 1601 * XXX For packets through PIM register tunnel 1602 * interface, we believe a routing daemon. 1603 */ 1604 if (!(mfct->miftable[rt->mf6c_parent].m6_flags & 1605 MIFF_REGISTER) && 1606 !(mifp->m6_flags & MIFF_REGISTER)) { 1607 if (in6_setscope(&src0, mifp->m6_ifp, 1608 &oszone) || 1609 in6_setscope(&dst0, mifp->m6_ifp, 1610 &odzone) || 1611 iszone != oszone || 1612 idzone != odzone) { 1613 IP6STAT_INC(ip6s_badscope); 1614 continue; 1615 } 1616 } 1617 1618 mifp->m6_pkt_out++; 1619 mifp->m6_bytes_out += plen; 1620 if (mifp->m6_flags & MIFF_REGISTER) 1621 register_send(mfct, ip6, mifi, m); 1622 else 1623 phyint_send(ip6, mifp, m); 1624 } 1625 } 1626 return (0); 1627 } 1628 1629 static void 1630 phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m) 1631 { 1632 #ifdef MRT6DEBUG 1633 char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; 1634 #endif 1635 struct mbuf *mb_copy; 1636 struct ifnet *ifp = mifp->m6_ifp; 1637 int error __unused = 0; 1638 u_long linkmtu; 1639 1640 M_ASSERTMAPPED(m); 1641 1642 /* 1643 * Make a new reference to the packet; make sure that 1644 * the IPv6 header is actually copied, not just referenced, 1645 * so that ip6_output() only scribbles on the copy. 1646 */ 1647 mb_copy = m_copym(m, 0, M_COPYALL, M_NOWAIT); 1648 if (mb_copy && 1649 (!M_WRITABLE(mb_copy) || mb_copy->m_len < sizeof(struct ip6_hdr))) 1650 mb_copy = m_pullup(mb_copy, sizeof(struct ip6_hdr)); 1651 if (mb_copy == NULL) { 1652 return; 1653 } 1654 /* set MCAST flag to the outgoing packet */ 1655 mb_copy->m_flags |= M_MCAST; 1656 1657 /* 1658 * If we sourced the packet, call ip6_output since we may devide 1659 * the packet into fragments when the packet is too big for the 1660 * outgoing interface. 1661 * Otherwise, we can simply send the packet to the interface 1662 * sending queue. 1663 */ 1664 if (m->m_pkthdr.rcvif == NULL) { 1665 struct ip6_moptions im6o; 1666 struct epoch_tracker et; 1667 1668 im6o.im6o_multicast_ifp = ifp; 1669 /* XXX: ip6_output will override ip6->ip6_hlim */ 1670 im6o.im6o_multicast_hlim = ip6->ip6_hlim; 1671 im6o.im6o_multicast_loop = 1; 1672 NET_EPOCH_ENTER(et); 1673 error = ip6_output(mb_copy, NULL, NULL, IPV6_FORWARDING, &im6o, 1674 NULL, NULL); 1675 NET_EPOCH_EXIT(et); 1676 1677 MRT6_DLOG(DEBUG_XMIT, "mif %u err %d", 1678 (uint16_t)(mifp - mif6table), error); 1679 return; 1680 } 1681 1682 /* 1683 * If configured to loop back multicasts by default, 1684 * loop back a copy now. 1685 */ 1686 if (in6_mcast_loop) 1687 ip6_mloopback(ifp, m); 1688 1689 /* 1690 * Put the packet into the sending queue of the outgoing interface 1691 * if it would fit in the MTU of the interface. 1692 */ 1693 linkmtu = in6_ifmtu(ifp); 1694 if (mb_copy->m_pkthdr.len <= linkmtu || linkmtu < IPV6_MMTU) { 1695 struct sockaddr_in6 dst6; 1696 1697 bzero(&dst6, sizeof(dst6)); 1698 dst6.sin6_len = sizeof(struct sockaddr_in6); 1699 dst6.sin6_family = AF_INET6; 1700 dst6.sin6_addr = ip6->ip6_dst; 1701 1702 IP_PROBE(send, NULL, NULL, ip6, ifp, NULL, ip6); 1703 /* 1704 * We just call if_output instead of nd6_output here, since 1705 * we need no ND for a multicast forwarded packet...right? 1706 */ 1707 m_clrprotoflags(m); /* Avoid confusing lower layers. */ 1708 error = (*ifp->if_output)(ifp, mb_copy, 1709 (struct sockaddr *)&dst6, NULL); 1710 MRT6_DLOG(DEBUG_XMIT, "mif %u err %d", 1711 (uint16_t)(mifp - mif6table), error); 1712 } else { 1713 /* 1714 * pMTU discovery is intentionally disabled by default, since 1715 * various router may notify pMTU in multicast, which can be 1716 * a DDoS to a router 1717 */ 1718 if (V_ip6_mcast_pmtu) 1719 icmp6_error(mb_copy, ICMP6_PACKET_TOO_BIG, 0, linkmtu); 1720 else { 1721 MRT6_DLOG(DEBUG_XMIT, " packet too big on %s o %s " 1722 "g %s size %d (discarded)", if_name(ifp), 1723 ip6_sprintf(ip6bufs, &ip6->ip6_src), 1724 ip6_sprintf(ip6bufd, &ip6->ip6_dst), 1725 mb_copy->m_pkthdr.len); 1726 m_freem(mb_copy); /* simply discard the packet */ 1727 } 1728 } 1729 } 1730 1731 static int 1732 register_send(struct mf6ctable *mfct, struct ip6_hdr *ip6, mifi_t mifi, 1733 struct mbuf *m) 1734 { 1735 #ifdef MRT6DEBUG 1736 char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; 1737 #endif 1738 struct mbuf *mm; 1739 int i, len = m->m_pkthdr.len; 1740 static struct sockaddr_in6 sin6 = { sizeof(sin6), AF_INET6 }; 1741 struct mrt6msg *im6; 1742 1743 MRT6_DLOG(DEBUG_ANY, "src %s dst %s", 1744 ip6_sprintf(ip6bufs, &ip6->ip6_src), 1745 ip6_sprintf(ip6bufd, &ip6->ip6_dst)); 1746 PIM6STAT_INC(pim6s_snd_registers); 1747 1748 /* Make a copy of the packet to send to the user level process. */ 1749 mm = m_gethdr(M_NOWAIT, MT_DATA); 1750 if (mm == NULL) 1751 return (ENOBUFS); 1752 mm->m_data += max_linkhdr; 1753 mm->m_len = sizeof(struct ip6_hdr); 1754 1755 if ((mm->m_next = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { 1756 m_freem(mm); 1757 return (ENOBUFS); 1758 } 1759 i = MHLEN - M_LEADINGSPACE(mm); 1760 if (i > len) 1761 i = len; 1762 mm = m_pullup(mm, i); 1763 if (mm == NULL) 1764 return (ENOBUFS); 1765 /* TODO: check it! */ 1766 mm->m_pkthdr.len = len + sizeof(struct ip6_hdr); 1767 1768 /* 1769 * Send message to routing daemon 1770 */ 1771 sin6.sin6_addr = ip6->ip6_src; 1772 1773 im6 = mtod(mm, struct mrt6msg *); 1774 im6->im6_msgtype = MRT6MSG_WHOLEPKT; 1775 im6->im6_mbz = 0; 1776 1777 im6->im6_mif = mifi; 1778 1779 /* iif info is not given for reg. encap.n */ 1780 MRT6STAT_INC(mrt6s_upcalls); 1781 1782 if (socket_send(mfct->router, mm, &sin6) < 0) { 1783 MRT6_DLOG(DEBUG_ANY, "ip6_mrouter socket queue full"); 1784 MRT6STAT_INC(mrt6s_upq_sockfull); 1785 return (ENOBUFS); 1786 } 1787 return (0); 1788 } 1789 1790 /* 1791 * pim6_encapcheck() is called by the encap6_input() path at runtime to 1792 * determine if a packet is for PIM; allowing PIM to be dynamically loaded 1793 * into the kernel. 1794 */ 1795 static int 1796 pim6_encapcheck(const struct mbuf *m __unused, int off __unused, 1797 int proto __unused, void *arg __unused) 1798 { 1799 1800 KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM")); 1801 return (8); /* claim the datagram. */ 1802 } 1803 1804 /* 1805 * PIM sparse mode hook 1806 * Receives the pim control messages, and passes them up to the listening 1807 * socket, using rip6_input. 1808 * The only message processed is the REGISTER pim message; the pim header 1809 * is stripped off, and the inner packet is passed to register_mforward. 1810 */ 1811 static int 1812 pim6_input(struct mbuf *m, int off, int proto, void *arg __unused) 1813 { 1814 struct mf6ctable *mfct; 1815 struct pim *pim; 1816 struct ip6_hdr *ip6; 1817 int pimlen; 1818 int minlen; 1819 1820 mfct = &V_mfctables[M_GETFIB(m)]; 1821 1822 PIM6STAT_INC(pim6s_rcv_total); 1823 1824 /* 1825 * Validate lengths 1826 */ 1827 pimlen = m->m_pkthdr.len - off; 1828 if (pimlen < PIM_MINLEN) { 1829 PIM6STAT_INC(pim6s_rcv_tooshort); 1830 MRT6_DLOG(DEBUG_PIM, "PIM packet too short"); 1831 m_freem(m); 1832 return (IPPROTO_DONE); 1833 } 1834 1835 /* 1836 * if the packet is at least as big as a REGISTER, go ahead 1837 * and grab the PIM REGISTER header size, to avoid another 1838 * possible m_pullup() later. 1839 * 1840 * PIM_MINLEN == pimhdr + u_int32 == 8 1841 * PIM6_REG_MINLEN == pimhdr + reghdr + eip6hdr == 4 + 4 + 40 1842 */ 1843 minlen = (pimlen >= PIM6_REG_MINLEN) ? PIM6_REG_MINLEN : PIM_MINLEN; 1844 1845 /* 1846 * Make sure that the IP6 and PIM headers in contiguous memory, and 1847 * possibly the PIM REGISTER header 1848 */ 1849 if (m->m_len < off + minlen) { 1850 m = m_pullup(m, off + minlen); 1851 if (m == NULL) { 1852 IP6STAT_INC(ip6s_exthdrtoolong); 1853 return (IPPROTO_DONE); 1854 } 1855 } 1856 ip6 = mtod(m, struct ip6_hdr *); 1857 pim = (struct pim *)((caddr_t)ip6 + off); 1858 1859 #define PIM6_CHECKSUM 1860 #ifdef PIM6_CHECKSUM 1861 { 1862 int cksumlen; 1863 1864 /* 1865 * Validate checksum. 1866 * If PIM REGISTER, exclude the data packet 1867 */ 1868 if (pim->pim_type == PIM_REGISTER) 1869 cksumlen = PIM_MINLEN; 1870 else 1871 cksumlen = pimlen; 1872 1873 if (in6_cksum(m, IPPROTO_PIM, off, cksumlen)) { 1874 PIM6STAT_INC(pim6s_rcv_badsum); 1875 MRT6_DLOG(DEBUG_PIM, "invalid checksum"); 1876 m_freem(m); 1877 return (IPPROTO_DONE); 1878 } 1879 } 1880 #endif /* PIM_CHECKSUM */ 1881 1882 /* PIM version check */ 1883 if (pim->pim_ver != PIM_VERSION) { 1884 PIM6STAT_INC(pim6s_rcv_badversion); 1885 MRT6_DLOG(DEBUG_ANY | DEBUG_ERR, 1886 "incorrect version %d, expecting %d", 1887 pim->pim_ver, PIM_VERSION); 1888 m_freem(m); 1889 return (IPPROTO_DONE); 1890 } 1891 1892 if (pim->pim_type == PIM_REGISTER) { 1893 /* 1894 * since this is a REGISTER, we'll make a copy of the register 1895 * headers ip6+pim+u_int32_t+encap_ip6, to be passed up to the 1896 * routing daemon. 1897 */ 1898 static struct sockaddr_in6 dst = { sizeof(dst), AF_INET6 }; 1899 1900 struct mbuf *mcp; 1901 struct ip6_hdr *eip6; 1902 u_int32_t *reghdr; 1903 #ifdef MRT6DEBUG 1904 char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; 1905 #endif 1906 1907 PIM6STAT_INC(pim6s_rcv_registers); 1908 1909 if (mfct->register_mif >= mfct->nummifs || 1910 mfct->register_mif == (mifi_t)-1) { 1911 MRT6_DLOG(DEBUG_PIM, "register mif not set: %d", 1912 mfct->register_mif); 1913 m_freem(m); 1914 return (IPPROTO_DONE); 1915 } 1916 1917 reghdr = (u_int32_t *)(pim + 1); 1918 1919 if ((ntohl(*reghdr) & PIM_NULL_REGISTER)) 1920 goto pim6_input_to_daemon; 1921 1922 /* 1923 * Validate length 1924 */ 1925 if (pimlen < PIM6_REG_MINLEN) { 1926 PIM6STAT_INC(pim6s_rcv_tooshort); 1927 PIM6STAT_INC(pim6s_rcv_badregisters); 1928 MRT6_DLOG(DEBUG_ANY | DEBUG_ERR, "register packet " 1929 "size too small %d from %s", 1930 pimlen, ip6_sprintf(ip6bufs, &ip6->ip6_src)); 1931 m_freem(m); 1932 return (IPPROTO_DONE); 1933 } 1934 1935 eip6 = (struct ip6_hdr *) (reghdr + 1); 1936 MRT6_DLOG(DEBUG_PIM, "eip6: %s -> %s, eip6 plen %d", 1937 ip6_sprintf(ip6bufs, &eip6->ip6_src), 1938 ip6_sprintf(ip6bufd, &eip6->ip6_dst), 1939 ntohs(eip6->ip6_plen)); 1940 1941 /* verify the version number of the inner packet */ 1942 if ((eip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { 1943 PIM6STAT_INC(pim6s_rcv_badregisters); 1944 MRT6_DLOG(DEBUG_ANY, "invalid IP version (%d) " 1945 "of the inner packet", 1946 (eip6->ip6_vfc & IPV6_VERSION)); 1947 m_freem(m); 1948 return (IPPROTO_DONE); 1949 } 1950 1951 /* verify the inner packet is destined to a mcast group */ 1952 if (!IN6_IS_ADDR_MULTICAST(&eip6->ip6_dst)) { 1953 PIM6STAT_INC(pim6s_rcv_badregisters); 1954 MRT6_DLOG(DEBUG_PIM, "inner packet of register " 1955 "is not multicast %s", 1956 ip6_sprintf(ip6bufd, &eip6->ip6_dst)); 1957 m_freem(m); 1958 return (IPPROTO_DONE); 1959 } 1960 1961 /* 1962 * make a copy of the whole header to pass to the daemon later. 1963 */ 1964 mcp = m_copym(m, 0, off + PIM6_REG_MINLEN, M_NOWAIT); 1965 if (mcp == NULL) { 1966 MRT6_DLOG(DEBUG_ANY | DEBUG_ERR, "pim register: " 1967 "could not copy register head"); 1968 m_freem(m); 1969 return (IPPROTO_DONE); 1970 } 1971 1972 /* 1973 * forward the inner ip6 packet; point m_data at the inner ip6. 1974 */ 1975 m_adj(m, off + PIM_MINLEN); 1976 MRT6_DLOG(DEBUG_PIM, "forwarding decapsulated register: " 1977 "src %s, dst %s, mif %d", 1978 ip6_sprintf(ip6bufs, &eip6->ip6_src), 1979 ip6_sprintf(ip6bufd, &eip6->ip6_dst), mfct->register_mif); 1980 1981 if_simloop(mfct->miftable[mfct->register_mif].m6_ifp, m, 1982 dst.sin6_family, 0); 1983 1984 /* prepare the register head to send to the mrouting daemon */ 1985 m = mcp; 1986 } 1987 1988 /* 1989 * Pass the PIM message up to the daemon; if it is a register message 1990 * pass the 'head' only up to the daemon. This includes the 1991 * encapsulator ip6 header, pim header, register header and the 1992 * encapsulated ip6 header. 1993 */ 1994 pim6_input_to_daemon: 1995 return (rip6_input(&m, &off, proto)); 1996 } 1997 1998 static void 1999 ip6_mrouter_ifdetach(void *arg __unused, struct ifnet *ifp) 2000 { 2001 struct mf6ctable *mfct; 2002 2003 if (!V_ip6_mrouting_enabled) 2004 return; 2005 for (int i = 0; i < V_nmfctables; i++) { 2006 mfct = &V_mfctables[i]; 2007 2008 MIF6_LOCK(); 2009 restart: 2010 for (mifi_t mifi = 0; mifi < mfct->nummifs; mifi++) { 2011 int error __diagused; 2012 2013 if (mfct->miftable[mifi].m6_ifp != ifp) 2014 continue; 2015 error = del_m6if_locked(mfct, mifi); 2016 KASSERT(error == 0, 2017 ("del_m6if_locked(%s) %d", ifp->if_xname, error)); 2018 goto restart; 2019 } 2020 MIF6_UNLOCK(); 2021 } 2022 } 2023 2024 static void 2025 ip6_mroute_rtnumfibs_change(void *arg __unused, uint32_t ntables) 2026 { 2027 struct mf6ctable *mfctables, *omfctables; 2028 2029 KASSERT(ntables >= V_nmfctables, 2030 ("%s: ntables %u nmfctables %u", __func__, ntables, V_nmfctables)); 2031 2032 mfctables = mallocarray(ntables, sizeof(*mfctables), M_MRTABLE6, 2033 M_WAITOK | M_ZERO); 2034 omfctables = V_mfctables; 2035 2036 MROUTER6_LOCK(); 2037 MFC6_LOCK(); 2038 for (int i = 0; i < V_nmfctables; i++) 2039 memcpy(&mfctables[i], &omfctables[i], sizeof(*mfctables)); 2040 atomic_store_rel_ptr((uintptr_t *)&V_mfctables, (uintptr_t)mfctables); 2041 MFC6_UNLOCK(); 2042 MROUTER6_UNLOCK(); 2043 2044 NET_EPOCH_WAIT(); 2045 2046 V_nmfctables = ntables; 2047 free(omfctables, M_MRTABLE6); 2048 } 2049 2050 static void 2051 vnet_mroute_init(const void *unused __unused) 2052 { 2053 ip6_mroute_rtnumfibs_change(NULL, V_rt_numfibs); 2054 2055 callout_init_mtx(&V_expire_upcalls_ch, MFC6_LOCKPTR(), 0); 2056 } 2057 VNET_SYSINIT(vnet_mroute6_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mroute_init, 2058 NULL); 2059 2060 static void 2061 vnet_mroute_uninit(const void *unused __unused) 2062 { 2063 callout_drain(&V_expire_upcalls_ch); 2064 free(V_mfctables, M_MRTABLE6); 2065 V_mfctables = NULL; 2066 } 2067 VNET_SYSUNINIT(vnet_mroute6_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY, 2068 vnet_mroute_uninit, NULL); 2069 2070 static int 2071 ip6_mroute_modevent(module_t mod, int type, void *unused) 2072 { 2073 2074 switch (type) { 2075 case MOD_LOAD: 2076 MROUTER6_LOCK_INIT(); 2077 MFC6_LOCK_INIT(); 2078 MIF6_LOCK_INIT(); 2079 2080 ifdetach_tag = EVENTHANDLER_REGISTER( 2081 ifnet_departure_event, ip6_mrouter_ifdetach, 2082 NULL, EVENTHANDLER_PRI_ANY); 2083 rtnumfibs_change_tag = EVENTHANDLER_REGISTER( 2084 rtnumfibs_change, ip6_mroute_rtnumfibs_change, 2085 NULL, EVENTHANDLER_PRI_ANY); 2086 2087 pim6_encap_cookie = ip6_encap_attach(&ipv6_encap_cfg, 2088 NULL, M_WAITOK); 2089 if (pim6_encap_cookie == NULL) { 2090 printf("ip6_mroute: unable to attach pim6 encap\n"); 2091 MIF6_LOCK_DESTROY(); 2092 MFC6_LOCK_DESTROY(); 2093 MROUTER6_LOCK_DESTROY(); 2094 return (EINVAL); 2095 } 2096 2097 ip6_mforward = X_ip6_mforward; 2098 ip6_mrouter_done = X_ip6_mrouter_done; 2099 ip6_mrouter_get = X_ip6_mrouter_get; 2100 ip6_mrouter_set = X_ip6_mrouter_set; 2101 mrt6_ioctl = X_mrt6_ioctl; 2102 break; 2103 2104 case MOD_UNLOAD: 2105 if (V_ip6_mrouting_enabled) 2106 return (EBUSY); 2107 2108 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 2109 ifdetach_tag); 2110 EVENTHANDLER_DEREGISTER(rtnumfibs_change, 2111 rtnumfibs_change_tag); 2112 2113 if (pim6_encap_cookie) { 2114 ip6_encap_detach(pim6_encap_cookie); 2115 pim6_encap_cookie = NULL; 2116 } 2117 2118 ip6_mforward = NULL; 2119 ip6_mrouter_done = NULL; 2120 ip6_mrouter_get = NULL; 2121 ip6_mrouter_set = NULL; 2122 mrt6_ioctl = NULL; 2123 2124 MIF6_LOCK_DESTROY(); 2125 MFC6_LOCK_DESTROY(); 2126 MROUTER6_LOCK_DESTROY(); 2127 break; 2128 2129 default: 2130 return (EOPNOTSUPP); 2131 } 2132 2133 return (0); 2134 } 2135 2136 static moduledata_t ip6_mroutemod = { 2137 "ip6_mroute", 2138 ip6_mroute_modevent, 2139 0 2140 }; 2141 2142 DECLARE_MODULE(ip6_mroute, ip6_mroutemod, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE); 2143