1 /* 2 * IP multicast forwarding procedures 3 * 4 * Written by David Waitzman, BBN Labs, August 1988. 5 * Modified by Steve Deering, Stanford, February 1989. 6 * Modified by Mark J. Steiglitz, Stanford, May, 1991 7 * Modified by Van Jacobson, LBL, January 1993 8 * Modified by Ajit Thyagarajan, PARC, August 1993 9 * 10 * MROUTING 1.8 11 */ 12 13 14 #include <sys/param.h> 15 #include <sys/systm.h> 16 #include <sys/mbuf.h> 17 #include <sys/socket.h> 18 #include <sys/socketvar.h> 19 #include <sys/protosw.h> 20 #include <sys/errno.h> 21 #include <sys/time.h> 22 #include <sys/ioctl.h> 23 #include <sys/syslog.h> 24 #include <net/if.h> 25 #include <net/route.h> 26 #include <net/raw_cb.h> 27 #include <netinet/in.h> 28 #include <netinet/in_systm.h> 29 #include <netinet/ip.h> 30 #include <netinet/ip_var.h> 31 #include <netinet/in_pcb.h> 32 #include <netinet/in_var.h> 33 #include <netinet/igmp.h> 34 #include <netinet/igmp_var.h> 35 #include <netinet/ip_mroute.h> 36 37 #ifndef NTOHL 38 #if BYTE_ORDER != BIG_ENDIAN 39 #define NTOHL(d) ((d) = ntohl((d))) 40 #define NTOHS(d) ((d) = ntohs((u_short)(d))) 41 #define HTONL(d) ((d) = htonl((d))) 42 #define HTONS(d) ((d) = htons((u_short)(d))) 43 #else 44 #define NTOHL(d) 45 #define NTOHS(d) 46 #define HTONL(d) 47 #define HTONS(d) 48 #endif 49 #endif 50 51 #ifndef MROUTING 52 /* 53 * Dummy routines and globals used when multicast routing is not compiled in. 54 */ 55 56 u_int ip_mrtproto = 0; 57 struct socket *ip_mrouter = NULL; 58 struct mrtstat mrtstat; 59 60 61 int 62 _ip_mrouter_cmd(cmd, so, m) 63 int cmd; 64 struct socket *so; 65 struct mbuf *m; 66 { 67 return(EOPNOTSUPP); 68 } 69 70 int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = _ip_mrouter_cmd; 71 72 int 73 _ip_mrouter_done() 74 { 75 return(0); 76 } 77 78 int (*ip_mrouter_done)(void) = _ip_mrouter_done; 79 80 int 81 _ip_mforward(ip, ifp, m, imo) 82 struct ip *ip; 83 struct ifnet *ifp; 84 struct mbuf *m; 85 struct ip_moptions *imo; 86 { 87 return(0); 88 } 89 90 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 91 struct ip_moptions *) = _ip_mforward; 92 93 int 94 _mrt_ioctl(int req, caddr_t data, struct proc *p) 95 { 96 return EOPNOTSUPP; 97 } 98 99 int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl; 100 101 void multiencap_decap(struct mbuf *m) { /* XXX must fixup manually */ 102 rip_input(m); 103 } 104 105 int (*legal_vif_num)(int) = 0; 106 107 #else 108 109 #define INSIZ sizeof(struct in_addr) 110 #define same(a1, a2) \ 111 (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) 112 113 #define MT_MRTABLE MT_RTABLE /* since nothing else uses it */ 114 115 /* 116 * Globals. All but ip_mrouter and ip_mrtproto could be static, 117 * except for netstat or debugging purposes. 118 */ 119 #ifndef MROUTE_LKM 120 struct socket *ip_mrouter = NULL; 121 struct mrtstat mrtstat; 122 123 int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ 124 #else 125 extern struct mrtstat mrtstat; 126 extern int ip_mrtproto; 127 #endif 128 129 #define NO_RTE_FOUND 0x1 130 #define RTE_FOUND 0x2 131 132 struct mbuf *mfctable[MFCTBLSIZ]; 133 struct vif viftable[MAXVIFS]; 134 u_int mrtdebug = 0; /* debug level */ 135 u_int tbfdebug = 0; /* tbf debug level */ 136 137 u_long timeout_val = 0; /* count of outstanding upcalls */ 138 139 /* 140 * Define the token bucket filter structures 141 * tbftable -> each vif has one of these for storing info 142 * qtable -> each interface has an associated queue of pkts 143 */ 144 145 struct tbf tbftable[MAXVIFS]; 146 struct pkt_queue qtable[MAXVIFS][MAXQSIZE]; 147 148 /* 149 * 'Interfaces' associated with decapsulator (so we can tell 150 * packets that went through it from ones that get reflected 151 * by a broken gateway). These interfaces are never linked into 152 * the system ifnet list & no routes point to them. I.e., packets 153 * can't be sent this way. They only exist as a placeholder for 154 * multicast source verification. 155 */ 156 struct ifnet multicast_decap_if[MAXVIFS]; 157 158 #define ENCAP_TTL 64 159 #define ENCAP_PROTO 4 160 161 /* prototype IP hdr for encapsulated packets */ 162 struct ip multicast_encap_iphdr = { 163 #if BYTE_ORDER == LITTLE_ENDIAN 164 sizeof(struct ip) >> 2, IPVERSION, 165 #else 166 IPVERSION, sizeof(struct ip) >> 2, 167 #endif 168 0, /* tos */ 169 sizeof(struct ip), /* total length */ 170 0, /* id */ 171 0, /* frag offset */ 172 ENCAP_TTL, ENCAP_PROTO, 173 0, /* checksum */ 174 }; 175 176 /* 177 * Private variables. 178 */ 179 static vifi_t numvifs = 0; 180 181 /* 182 * one-back cache used by multiencap_decap to locate a tunnel's vif 183 * given a datagram's src ip address. 184 */ 185 static u_long last_encap_src; 186 static struct vif *last_encap_vif; 187 188 static u_long nethash_fc(u_long, u_long); 189 static struct mfc *mfcfind(u_long, u_long); 190 int get_sg_cnt(struct sioc_sg_req *); 191 int get_vif_cnt(struct sioc_vif_req *); 192 int get_vifs(caddr_t); 193 static int add_vif(struct vifctl *); 194 static int del_vif(vifi_t *); 195 static int add_mfc(struct mfcctl *); 196 static int del_mfc(struct delmfcctl *); 197 static void cleanup_cache(void *); 198 static int ip_mdq(struct mbuf *, struct ifnet *, u_long, struct mfc *, 199 struct ip_moptions *); 200 extern int (*legal_vif_num)(int); 201 static void phyint_send(struct ip *, struct vif *, struct mbuf *); 202 static void srcrt_send(struct ip *, struct vif *, struct mbuf *); 203 static void encap_send(struct ip *, struct vif *, struct mbuf *); 204 void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long, 205 struct ip_moptions *); 206 void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *); 207 void tbf_process_q(struct vif *); 208 void tbf_dequeue(struct vif *, int); 209 void tbf_reprocess_q(void *); 210 int tbf_dq_sel(struct vif *, struct ip *); 211 void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *); 212 void tbf_update_tokens(struct vif *); 213 static int priority(struct vif *, struct ip *); 214 static int ip_mrouter_init(struct socket *); 215 216 /* 217 * A simple hash function: returns MFCHASHMOD of the low-order octet of 218 * the argument's network or subnet number and the multicast group assoc. 219 */ 220 static u_long 221 nethash_fc(m,n) 222 register u_long m; 223 register u_long n; 224 { 225 struct in_addr in1; 226 struct in_addr in2; 227 228 in1.s_addr = m; 229 m = in_netof(in1); 230 while ((m & 0xff) == 0) m >>= 8; 231 232 in2.s_addr = n; 233 n = in_netof(in2); 234 while ((n & 0xff) == 0) n >>= 8; 235 236 return (MFCHASHMOD(m) ^ MFCHASHMOD(n)); 237 } 238 239 /* 240 * this is a direct-mapped cache used to speed the mapping from a 241 * datagram source address to the associated multicast route. Note 242 * that unlike mrttable, the hash is on IP address, not IP net number. 243 */ 244 #define MFCHASHSIZ 1024 245 #define MFCHASH(a, g) ((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 246 ((g) >> 20) ^ ((g) >> 10) ^ (g)) & (MFCHASHSIZ-1)) 247 struct mfc *mfchash[MFCHASHSIZ]; 248 249 /* 250 * Find a route for a given origin IP address and Multicast group address 251 * Type of service parameter to be added in the future!!! 252 */ 253 #define MFCFIND(o, g, rt) { \ 254 register u_int _mrhasho = o; \ 255 register u_int _mrhashg = g; \ 256 _mrhasho = MFCHASH(_mrhasho, _mrhashg); \ 257 ++mrtstat.mrts_mfc_lookups; \ 258 rt = mfchash[_mrhasho]; \ 259 if ((rt == NULL) || \ 260 ((o & rt->mfc_originmask.s_addr) != rt->mfc_origin.s_addr) || \ 261 (g != rt->mfc_mcastgrp.s_addr)) \ 262 if ((rt = mfcfind(o, g)) != NULL) \ 263 mfchash[_mrhasho] = rt; \ 264 } 265 266 /* 267 * Find route by examining hash table entries 268 */ 269 static struct mfc * 270 mfcfind(origin, mcastgrp) 271 u_long origin; 272 u_long mcastgrp; 273 { 274 register struct mbuf *mb_rt; 275 register struct mfc *rt; 276 register u_long hash; 277 278 hash = nethash_fc(origin, mcastgrp); 279 for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { 280 rt = mtod(mb_rt, struct mfc *); 281 if (((origin & rt->mfc_originmask.s_addr) == rt->mfc_origin.s_addr) && 282 (mcastgrp == rt->mfc_mcastgrp.s_addr) && 283 (mb_rt->m_act == NULL)) 284 return (rt); 285 } 286 mrtstat.mrts_mfc_misses++; 287 return NULL; 288 } 289 290 /* 291 * Macros to compute elapsed time efficiently 292 * Borrowed from Van Jacobson's scheduling code 293 */ 294 #define TV_DELTA(a, b, delta) { \ 295 register int xxs; \ 296 \ 297 delta = (a).tv_usec - (b).tv_usec; \ 298 if ((xxs = (a).tv_sec - (b).tv_sec)) { \ 299 switch (xxs) { \ 300 case 2: \ 301 delta += 1000000; \ 302 /* fall through */ \ 303 case 1: \ 304 delta += 1000000; \ 305 break; \ 306 default: \ 307 delta += (1000000 * xxs); \ 308 } \ 309 } \ 310 } 311 312 #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ 313 (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 314 315 /* 316 * Handle DVMRP setsockopt commands to modify the multicast routing tables. 317 */ 318 int 319 X_ip_mrouter_cmd(cmd, so, m) 320 int cmd; 321 struct socket *so; 322 struct mbuf *m; 323 { 324 if (cmd != DVMRP_INIT && so != ip_mrouter) return EACCES; 325 326 switch (cmd) { 327 case DVMRP_INIT: return ip_mrouter_init(so); 328 case DVMRP_DONE: return ip_mrouter_done(); 329 case DVMRP_ADD_VIF: return add_vif (mtod(m, struct vifctl *)); 330 case DVMRP_DEL_VIF: return del_vif (mtod(m, vifi_t *)); 331 case DVMRP_ADD_MFC: return add_mfc (mtod(m, struct mfcctl *)); 332 case DVMRP_DEL_MFC: return del_mfc (mtod(m, struct delmfcctl *)); 333 default: return EOPNOTSUPP; 334 } 335 } 336 337 #ifndef MROUTE_LKM 338 int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = X_ip_mrouter_cmd; 339 #endif 340 341 /* 342 * Handle ioctl commands to obtain information from the cache 343 */ 344 int 345 X_mrt_ioctl(cmd, data) 346 int cmd; 347 caddr_t data; 348 { 349 int error = 0; 350 351 switch (cmd) { 352 case (SIOCGETVIFINF): /* Read Virtual Interface (m/cast) */ 353 return (get_vifs(data)); 354 break; 355 case (SIOCGETVIFCNT): 356 return (get_vif_cnt((struct sioc_vif_req *)data)); 357 break; 358 case (SIOCGETSGCNT): 359 return (get_sg_cnt((struct sioc_sg_req *)data)); 360 break; 361 default: 362 return (EINVAL); 363 break; 364 } 365 return error; 366 } 367 368 #ifndef MROUTE_LKM 369 int (*mrt_ioctl)(int, caddr_t, struct proc *) = X_mrt_ioctl; 370 #else 371 extern int (*mrt_ioctl)(int, caddr_t, struct proc *); 372 #endif 373 374 /* 375 * returns the packet count for the source group provided 376 */ 377 int 378 get_sg_cnt(req) 379 register struct sioc_sg_req *req; 380 { 381 register struct mfc *rt; 382 int s; 383 384 s = splnet(); 385 MFCFIND(req->src.s_addr, req->grp.s_addr, rt); 386 splx(s); 387 if (rt != NULL) 388 req->count = rt->mfc_pkt_cnt; 389 else 390 req->count = 0xffffffff; 391 392 return 0; 393 } 394 395 /* 396 * returns the input and output packet counts on the interface provided 397 */ 398 int 399 get_vif_cnt(req) 400 register struct sioc_vif_req *req; 401 { 402 register vifi_t vifi = req->vifi; 403 404 req->icount = viftable[vifi].v_pkt_in; 405 req->ocount = viftable[vifi].v_pkt_out; 406 407 return 0; 408 } 409 410 int 411 get_vifs(data) 412 char *data; 413 { 414 struct vif_conf *vifc = (struct vif_conf *)data; 415 struct vif_req *vifrp, vifr; 416 int space, error=0; 417 418 vifi_t vifi; 419 int s; 420 421 space = vifc->vifc_len; 422 vifrp = vifc->vifc_req; 423 424 s = splnet(); 425 vifc->vifc_num=numvifs; 426 427 for (vifi = 0; vifi < numvifs; vifi++, vifrp++) { 428 if (viftable[vifi].v_lcl_addr.s_addr != 0) { 429 vifr.v_flags=viftable[vifi].v_flags; 430 vifr.v_threshold=viftable[vifi].v_threshold; 431 vifr.v_lcl_addr=viftable[vifi].v_lcl_addr; 432 vifr.v_rmt_addr=viftable[vifi].v_rmt_addr; 433 strncpy(vifr.v_if_name,viftable[vifi].v_ifp->if_name,IFNAMSIZ); 434 if ((space -= sizeof(vifr)) < 0) { 435 splx(s); 436 return(ENOSPC); 437 } 438 error = copyout((caddr_t)&vifr,(caddr_t)vifrp,(u_int)(sizeof vifr)); 439 if (error) { 440 splx(s); 441 return(error); 442 } 443 } 444 } 445 splx(s); 446 return 0; 447 } 448 /* 449 * Enable multicast routing 450 */ 451 static int 452 ip_mrouter_init(so) 453 struct socket *so; 454 { 455 if (so->so_type != SOCK_RAW || 456 so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; 457 458 if (ip_mrouter != NULL) return EADDRINUSE; 459 460 ip_mrouter = so; 461 462 if (mrtdebug) 463 log(LOG_DEBUG, "ip_mrouter_init"); 464 465 return 0; 466 } 467 468 /* 469 * Disable multicast routing 470 */ 471 int 472 X_ip_mrouter_done() 473 { 474 vifi_t vifi; 475 int i; 476 struct ifnet *ifp; 477 struct ifreq ifr; 478 struct mbuf *mb_rt; 479 struct mbuf *m; 480 struct rtdetq *rte; 481 int s; 482 483 s = splnet(); 484 485 /* 486 * For each phyint in use, disable promiscuous reception of all IP 487 * multicasts. 488 */ 489 for (vifi = 0; vifi < numvifs; vifi++) { 490 if (viftable[vifi].v_lcl_addr.s_addr != 0 && 491 !(viftable[vifi].v_flags & VIFF_TUNNEL)) { 492 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 493 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr 494 = INADDR_ANY; 495 ifp = viftable[vifi].v_ifp; 496 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 497 } 498 } 499 bzero((caddr_t)qtable, sizeof(qtable)); 500 bzero((caddr_t)tbftable, sizeof(tbftable)); 501 bzero((caddr_t)viftable, sizeof(viftable)); 502 numvifs = 0; 503 504 /* 505 * Check if any outstanding timeouts remain 506 */ 507 if (timeout_val != 0) 508 for (i = 0; i < MFCTBLSIZ; i++) { 509 mb_rt = mfctable[i]; 510 while (mb_rt) { 511 if ( mb_rt->m_act != NULL) { 512 untimeout(cleanup_cache, (caddr_t)mb_rt); 513 while (mb_rt->m_act) { 514 m = mb_rt->m_act; 515 mb_rt->m_act = m->m_act; 516 rte = mtod(m, struct rtdetq *); 517 m_freem(rte->m); 518 m_free(m); 519 } 520 timeout_val--; 521 } 522 mb_rt = mb_rt->m_next; 523 } 524 if (timeout_val == 0) 525 break; 526 } 527 528 /* 529 * Free all multicast forwarding cache entries. 530 */ 531 for (i = 0; i < MFCTBLSIZ; i++) 532 m_freem(mfctable[i]); 533 534 bzero((caddr_t)mfctable, sizeof(mfctable)); 535 bzero((caddr_t)mfchash, sizeof(mfchash)); 536 537 /* 538 * Reset de-encapsulation cache 539 */ 540 last_encap_src = NULL; 541 last_encap_vif = NULL; 542 543 ip_mrouter = NULL; 544 545 splx(s); 546 547 if (mrtdebug) 548 log(LOG_DEBUG, "ip_mrouter_done"); 549 550 return 0; 551 } 552 553 #ifndef MROUTE_LKM 554 int (*ip_mrouter_done)(void) = X_ip_mrouter_done; 555 #endif 556 557 /* 558 * Add a vif to the vif table 559 */ 560 static int 561 add_vif(vifcp) 562 register struct vifctl *vifcp; 563 { 564 register struct vif *vifp = viftable + vifcp->vifc_vifi; 565 static struct sockaddr_in sin = {sizeof sin, AF_INET}; 566 struct ifaddr *ifa; 567 struct ifnet *ifp; 568 struct ifreq ifr; 569 int error, s; 570 struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; 571 572 if (vifcp->vifc_vifi >= MAXVIFS) return EINVAL; 573 if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE; 574 575 /* Find the interface with an address in AF_INET family */ 576 sin.sin_addr = vifcp->vifc_lcl_addr; 577 ifa = ifa_ifwithaddr((struct sockaddr *)&sin); 578 if (ifa == 0) return EADDRNOTAVAIL; 579 ifp = ifa->ifa_ifp; 580 581 if (vifcp->vifc_flags & VIFF_TUNNEL) { 582 if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { 583 static int inited = 0; 584 if(!inited) { 585 for (s = 0; s < MAXVIFS; ++s) { 586 multicast_decap_if[s].if_name = "mdecap"; 587 multicast_decap_if[s].if_unit = s; 588 } 589 inited = 1; 590 } 591 ifp = &multicast_decap_if[vifcp->vifc_vifi]; 592 } else { 593 ifp = 0; 594 } 595 } else { 596 /* Make sure the interface supports multicast */ 597 if ((ifp->if_flags & IFF_MULTICAST) == 0) 598 return EOPNOTSUPP; 599 600 /* Enable promiscuous reception of all IP multicasts from the if */ 601 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 602 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; 603 s = splnet(); 604 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); 605 splx(s); 606 if (error) 607 return error; 608 } 609 610 s = splnet(); 611 /* define parameters for the tbf structure */ 612 vifp->v_tbf = v_tbf; 613 vifp->v_tbf->q_len = 0; 614 vifp->v_tbf->n_tok = 0; 615 vifp->v_tbf->last_pkt_t = 0; 616 617 vifp->v_flags = vifcp->vifc_flags; 618 vifp->v_threshold = vifcp->vifc_threshold; 619 vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 620 vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 621 vifp->v_ifp = ifp; 622 vifp->v_rate_limit= vifcp->vifc_rate_limit; 623 /* initialize per vif pkt counters */ 624 vifp->v_pkt_in = 0; 625 vifp->v_pkt_out = 0; 626 splx(s); 627 628 /* Adjust numvifs up if the vifi is higher than numvifs */ 629 if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; 630 631 if (mrtdebug) 632 log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d", 633 vifcp->vifc_vifi, 634 ntohl(vifcp->vifc_lcl_addr.s_addr), 635 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 636 ntohl(vifcp->vifc_rmt_addr.s_addr), 637 vifcp->vifc_threshold, 638 vifcp->vifc_rate_limit); 639 640 return 0; 641 } 642 643 /* 644 * Delete a vif from the vif table 645 */ 646 static int 647 del_vif(vifip) 648 vifi_t *vifip; 649 { 650 register struct vif *vifp = viftable + *vifip; 651 register vifi_t vifi; 652 struct ifnet *ifp; 653 struct ifreq ifr; 654 int s; 655 656 if (*vifip >= numvifs) return EINVAL; 657 if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL; 658 659 s = splnet(); 660 661 if (!(vifp->v_flags & VIFF_TUNNEL)) { 662 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 663 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; 664 ifp = vifp->v_ifp; 665 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 666 } 667 668 if (vifp == last_encap_vif) { 669 last_encap_vif = 0; 670 last_encap_src = 0; 671 } 672 673 bzero((caddr_t)qtable[*vifip], 674 sizeof(qtable[*vifip])); 675 bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); 676 bzero((caddr_t)vifp, sizeof (*vifp)); 677 678 /* Adjust numvifs down */ 679 for (vifi = numvifs; vifi > 0; vifi--) 680 if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break; 681 numvifs = vifi; 682 683 splx(s); 684 685 if (mrtdebug) 686 log(LOG_DEBUG, "del_vif %d, numvifs %d", *vifip, numvifs); 687 688 return 0; 689 } 690 691 /* 692 * Add an mfc entry 693 */ 694 static int 695 add_mfc(mfccp) 696 struct mfcctl *mfccp; 697 { 698 struct mfc *rt; 699 struct mfc *rt1 = 0; 700 register struct mbuf *mb_rt; 701 struct mbuf *prev_mb_rt; 702 u_long hash; 703 struct mbuf *mb_ntry; 704 struct rtdetq *rte; 705 register u_short nstl; 706 int s; 707 int i; 708 709 rt = mfcfind(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 710 711 /* If an entry already exists, just update the fields */ 712 if (rt) { 713 if (mrtdebug) 714 log(LOG_DEBUG,"add_mfc update o %x g %x m %x p %x", 715 ntohl(mfccp->mfcc_origin.s_addr), 716 ntohl(mfccp->mfcc_mcastgrp.s_addr), 717 ntohl(mfccp->mfcc_originmask.s_addr), 718 mfccp->mfcc_parent); 719 720 s = splnet(); 721 rt->mfc_parent = mfccp->mfcc_parent; 722 for (i = 0; i < numvifs; i++) 723 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 724 splx(s); 725 return 0; 726 } 727 728 /* 729 * Find the entry for which the upcall was made and update 730 */ 731 s = splnet(); 732 hash = nethash_fc(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 733 for (prev_mb_rt = mb_rt = mfctable[hash], nstl = 0; 734 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 735 736 rt = mtod(mb_rt, struct mfc *); 737 if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) 738 == mfccp->mfcc_origin.s_addr) && 739 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 740 (mb_rt->m_act != NULL)) { 741 742 if (!nstl++) { 743 if (mrtdebug) 744 log(LOG_DEBUG,"add_mfc o %x g %x m %x p %x dbg %x", 745 ntohl(mfccp->mfcc_origin.s_addr), 746 ntohl(mfccp->mfcc_mcastgrp.s_addr), 747 ntohl(mfccp->mfcc_originmask.s_addr), 748 mfccp->mfcc_parent, mb_rt->m_act); 749 750 rt->mfc_origin = mfccp->mfcc_origin; 751 rt->mfc_originmask = mfccp->mfcc_originmask; 752 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 753 rt->mfc_parent = mfccp->mfcc_parent; 754 for (i = 0; i < numvifs; i++) 755 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 756 /* initialize pkt counters per src-grp */ 757 rt->mfc_pkt_cnt = 0; 758 rt1 = rt; 759 } 760 761 /* prevent cleanup of cache entry */ 762 untimeout(cleanup_cache, (caddr_t)mb_rt); 763 timeout_val--; 764 765 /* free packets Qed at the end of this entry */ 766 while (mb_rt->m_act) { 767 mb_ntry = mb_rt->m_act; 768 rte = mtod(mb_ntry, struct rtdetq *); 769 ip_mdq(rte->m, rte->ifp, rte->tunnel_src, 770 rt1, rte->imo); 771 mb_rt->m_act = mb_ntry->m_act; 772 m_freem(rte->m); 773 m_free(mb_ntry); 774 } 775 776 /* 777 * If more than one entry was created for a single upcall 778 * delete that entry 779 */ 780 if (nstl > 1) { 781 MFREE(mb_rt, prev_mb_rt->m_next); 782 mb_rt = prev_mb_rt; 783 } 784 } 785 } 786 787 /* 788 * It is possible that an entry is being inserted without an upcall 789 */ 790 if (nstl == 0) { 791 if (mrtdebug) 792 log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x m %x p %x", 793 hash, ntohl(mfccp->mfcc_origin.s_addr), 794 ntohl(mfccp->mfcc_mcastgrp.s_addr), 795 ntohl(mfccp->mfcc_originmask.s_addr), 796 mfccp->mfcc_parent); 797 798 for (prev_mb_rt = mb_rt = mfctable[hash]; 799 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 800 801 rt = mtod(mb_rt, struct mfc *); 802 if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) 803 == mfccp->mfcc_origin.s_addr) && 804 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { 805 806 rt->mfc_origin = mfccp->mfcc_origin; 807 rt->mfc_originmask = mfccp->mfcc_originmask; 808 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 809 rt->mfc_parent = mfccp->mfcc_parent; 810 for (i = 0; i < numvifs; i++) 811 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 812 /* initialize pkt counters per src-grp */ 813 rt->mfc_pkt_cnt = 0; 814 } 815 } 816 if (mb_rt == NULL) { 817 /* no upcall, so make a new entry */ 818 MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); 819 if (mb_rt == NULL) { 820 splx(s); 821 return ENOBUFS; 822 } 823 824 rt = mtod(mb_rt, struct mfc *); 825 826 /* insert new entry at head of hash chain */ 827 rt->mfc_origin = mfccp->mfcc_origin; 828 rt->mfc_originmask = mfccp->mfcc_originmask; 829 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 830 rt->mfc_parent = mfccp->mfcc_parent; 831 for (i = 0; i < numvifs; i++) 832 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 833 /* initialize pkt counters per src-grp */ 834 rt->mfc_pkt_cnt = 0; 835 836 /* link into table */ 837 mb_rt->m_next = mfctable[hash]; 838 mfctable[hash] = mb_rt; 839 mb_rt->m_act = NULL; 840 } 841 } 842 splx(s); 843 return 0; 844 } 845 846 /* 847 * Delete an mfc entry 848 */ 849 static int 850 del_mfc(mfccp) 851 struct delmfcctl *mfccp; 852 { 853 struct in_addr origin; 854 struct in_addr mcastgrp; 855 struct mfc *rt; 856 struct mbuf *mb_rt; 857 struct mbuf *prev_mb_rt; 858 u_long hash; 859 struct mfc **cmfc; 860 struct mfc **cmfcend; 861 int s; 862 863 origin = mfccp->mfcc_origin; 864 mcastgrp = mfccp->mfcc_mcastgrp; 865 hash = nethash_fc(origin.s_addr, mcastgrp.s_addr); 866 867 if (mrtdebug) 868 log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x", 869 ntohl(origin.s_addr), ntohl(mcastgrp.s_addr)); 870 871 for (prev_mb_rt = mb_rt = mfctable[hash] 872 ; mb_rt 873 ; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 874 rt = mtod(mb_rt, struct mfc *); 875 if (origin.s_addr == rt->mfc_origin.s_addr && 876 mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 877 mb_rt->m_act == NULL) 878 break; 879 } 880 if (mb_rt == NULL) { 881 return ESRCH; 882 } 883 884 s = splnet(); 885 886 cmfc = mfchash; 887 cmfcend = cmfc + MFCHASHSIZ; 888 for ( ; cmfc < cmfcend; ++cmfc) 889 if (*cmfc == rt) 890 *cmfc = 0; 891 892 if (prev_mb_rt != mb_rt) { /* if moved past head of list */ 893 MFREE(mb_rt, prev_mb_rt->m_next); 894 } else /* delete head of list, it is in the table */ 895 mfctable[hash] = m_free(mb_rt); 896 897 splx(s); 898 899 return 0; 900 } 901 902 /* 903 * IP multicast forwarding function. This function assumes that the packet 904 * pointed to by "ip" has arrived on (or is about to be sent to) the interface 905 * pointed to by "ifp", and the packet is to be relayed to other networks 906 * that have members of the packet's destination IP multicast group. 907 * 908 * The packet is returned unscathed to the caller, unless it is tunneled 909 * or erroneous, in which case a non-zero return value tells the caller to 910 * discard it. 911 */ 912 913 #define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ 914 #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 915 916 int 917 X_ip_mforward(ip, ifp, m, imo) 918 register struct ip *ip; 919 struct ifnet *ifp; 920 struct mbuf *m; 921 struct ip_moptions *imo; 922 { 923 register struct mfc *rt; 924 register u_char *ipoptions; 925 u_long tunnel_src; 926 static struct sockproto k_igmpproto = { AF_INET, IPPROTO_IGMP }; 927 static struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 928 static struct sockaddr_in k_igmpdst = { sizeof k_igmpdst, AF_INET }; 929 register struct mbuf *mm; 930 register struct ip *k_data; 931 int s; 932 933 if (mrtdebug > 1) 934 log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x", 935 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp); 936 937 if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || 938 (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { 939 /* 940 * Packet arrived via a physical interface. 941 */ 942 tunnel_src = 0; 943 } else { 944 /* 945 * Packet arrived through a source-route tunnel. 946 * 947 * A source-route tunneled packet has a single NOP option and a 948 * two-element 949 * loose-source-and-record-route (LSRR) option immediately following 950 * the fixed-size part of the IP header. At this point in processing, 951 * the IP header should contain the following IP addresses: 952 * 953 * original source - in the source address field 954 * destination group - in the destination address field 955 * remote tunnel end-point - in the first element of LSRR 956 * one of this host's addrs - in the second element of LSRR 957 * 958 * NOTE: RFC-1075 would have the original source and remote tunnel 959 * end-point addresses swapped. However, that could cause 960 * delivery of ICMP error messages to innocent applications 961 * on intermediate routing hosts! Therefore, we hereby 962 * change the spec. 963 */ 964 965 /* 966 * Verify that the tunnel options are well-formed. 967 */ 968 if (ipoptions[0] != IPOPT_NOP || 969 ipoptions[2] != 11 || /* LSRR option length */ 970 ipoptions[3] != 12 || /* LSRR address pointer */ 971 (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) { 972 mrtstat.mrts_bad_tunnel++; 973 if (mrtdebug) 974 log(LOG_DEBUG, 975 "ip_mforward: bad tunnel from %u (%x %x %x %x %x %x)", 976 ntohl(ip->ip_src.s_addr), 977 ipoptions[0], ipoptions[1], ipoptions[2], ipoptions[3], 978 *(u_long *)(&ipoptions[4]), *(u_long *)(&ipoptions[8])); 979 return 1; 980 } 981 982 /* 983 * Delete the tunnel options from the packet. 984 */ 985 ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions, 986 (unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN))); 987 m->m_len -= TUNNEL_LEN; 988 ip->ip_len -= TUNNEL_LEN; 989 ip->ip_hl -= TUNNEL_LEN >> 2; 990 991 ifp = 0; 992 } 993 994 /* 995 * Don't forward a packet with time-to-live of zero or one, 996 * or a packet destined to a local-only group. 997 */ 998 if (ip->ip_ttl <= 1 || 999 ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) 1000 return (int)tunnel_src; 1001 1002 /* 1003 * Determine forwarding vifs from the forwarding cache table 1004 */ 1005 s = splnet(); 1006 MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt); 1007 1008 /* Entry exists, so forward if necessary */ 1009 if (rt != NULL) { 1010 splx(s); 1011 return (ip_mdq(m, ifp, tunnel_src, rt, imo)); 1012 } 1013 1014 else { 1015 /* 1016 * If we don't have a route for packet's origin, 1017 * Make a copy of the packet & 1018 * send message to routing daemon 1019 */ 1020 1021 register struct mbuf *mb_rt; 1022 register struct mbuf *mb_ntry; 1023 register struct mbuf *mb0; 1024 register struct rtdetq *rte; 1025 register struct mbuf *rte_m; 1026 register u_long hash; 1027 1028 mrtstat.mrts_no_route++; 1029 if (mrtdebug) 1030 log(LOG_DEBUG, "ip_mforward: no rte s %x g %x", 1031 ntohl(ip->ip_src.s_addr), 1032 ntohl(ip->ip_dst.s_addr)); 1033 1034 /* is there an upcall waiting for this packet? */ 1035 hash = nethash_fc(ip->ip_src.s_addr, ip->ip_dst.s_addr); 1036 for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { 1037 rt = mtod(mb_rt, struct mfc *); 1038 if (((ip->ip_src.s_addr & rt->mfc_originmask.s_addr) == 1039 rt->mfc_origin.s_addr) && 1040 (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && 1041 (mb_rt->m_act != NULL)) 1042 break; 1043 } 1044 1045 if (mb_rt == NULL) { 1046 /* no upcall, so make a new entry */ 1047 MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); 1048 if (mb_rt == NULL) { 1049 splx(s); 1050 return ENOBUFS; 1051 } 1052 1053 rt = mtod(mb_rt, struct mfc *); 1054 1055 /* insert new entry at head of hash chain */ 1056 rt->mfc_origin.s_addr = ip->ip_src.s_addr; 1057 rt->mfc_originmask.s_addr = (u_long)0xffffffff; 1058 rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; 1059 1060 /* link into table */ 1061 hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); 1062 mb_rt->m_next = mfctable[hash]; 1063 mfctable[hash] = mb_rt; 1064 mb_rt->m_act = NULL; 1065 1066 } 1067 1068 /* determine if q has overflowed */ 1069 for (rte_m = mb_rt, hash = 0; rte_m->m_act; rte_m = rte_m->m_act) 1070 hash++; 1071 1072 if (hash > MAX_UPQ) { 1073 mrtstat.mrts_upq_ovflw++; 1074 splx(s); 1075 return 0; 1076 } 1077 1078 /* add this packet and timing, ifp info to m_act */ 1079 MGET(mb_ntry, M_DONTWAIT, MT_DATA); 1080 if (mb_ntry == NULL) { 1081 splx(s); 1082 return ENOBUFS; 1083 } 1084 1085 mb_ntry->m_act = NULL; 1086 rte = mtod(mb_ntry, struct rtdetq *); 1087 1088 mb0 = m_copy(m, 0, M_COPYALL); 1089 if (mb0 == NULL) { 1090 splx(s); 1091 return ENOBUFS; 1092 } 1093 1094 rte->m = mb0; 1095 rte->ifp = ifp; 1096 rte->tunnel_src = tunnel_src; 1097 rte->imo = imo; 1098 1099 rte_m->m_act = mb_ntry; 1100 1101 splx(s); 1102 1103 if (hash == 0) { 1104 /* 1105 * Send message to routing daemon to install 1106 * a route into the kernel table 1107 */ 1108 k_igmpsrc.sin_addr = ip->ip_src; 1109 k_igmpdst.sin_addr = ip->ip_dst; 1110 1111 mm = m_copy(m, 0, M_COPYALL); 1112 if (mm == NULL) { 1113 splx(s); 1114 return ENOBUFS; 1115 } 1116 1117 k_data = mtod(mm, struct ip *); 1118 k_data->ip_p = 0; 1119 1120 mrtstat.mrts_upcalls++; 1121 1122 raw_input(mm, &k_igmpproto, 1123 (struct sockaddr *)&k_igmpsrc, 1124 (struct sockaddr *)&k_igmpdst); 1125 1126 /* set timer to cleanup entry if upcall is lost */ 1127 timeout(cleanup_cache, (caddr_t)mb_rt, 100); 1128 timeout_val++; 1129 } 1130 1131 return 0; 1132 } 1133 } 1134 1135 #ifndef MROUTE_LKM 1136 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 1137 struct ip_moptions *) = X_ip_mforward; 1138 #endif 1139 1140 /* 1141 * Clean up the cache entry if upcall is not serviced 1142 */ 1143 static void 1144 cleanup_cache(xmb_rt) 1145 void *xmb_rt; 1146 { 1147 struct mbuf *mb_rt = xmb_rt; 1148 struct mfc *rt; 1149 u_long hash; 1150 struct mbuf *prev_m0; 1151 struct mbuf *m0; 1152 struct mbuf *m; 1153 struct rtdetq *rte; 1154 int s; 1155 1156 rt = mtod(mb_rt, struct mfc *); 1157 hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); 1158 1159 if (mrtdebug) 1160 log(LOG_DEBUG, "ip_mforward: cleanup ipm %d h %d s %x g %x", 1161 ip_mrouter, hash, ntohl(rt->mfc_origin.s_addr), 1162 ntohl(rt->mfc_mcastgrp.s_addr)); 1163 1164 mrtstat.mrts_cache_cleanups++; 1165 1166 /* 1167 * determine entry to be cleaned up in cache table 1168 */ 1169 s = splnet(); 1170 for (prev_m0 = m0 = mfctable[hash]; m0; prev_m0 = m0, m0 = m0->m_next) 1171 if (m0 == mb_rt) 1172 break; 1173 1174 /* 1175 * drop all the packets 1176 * free the mbuf with the pkt, if, timing info 1177 */ 1178 while (mb_rt->m_act) { 1179 m = mb_rt->m_act; 1180 mb_rt->m_act = m->m_act; 1181 1182 rte = mtod(m, struct rtdetq *); 1183 m_freem(rte->m); 1184 m_free(m); 1185 } 1186 1187 /* 1188 * Delete the entry from the cache 1189 */ 1190 if (prev_m0 != m0) { /* if moved past head of list */ 1191 MFREE(m0, prev_m0->m_next); 1192 } else /* delete head of list, it is in the table */ 1193 mfctable[hash] = m_free(m0); 1194 1195 timeout_val--; 1196 splx(s); 1197 } 1198 1199 /* 1200 * Packet forwarding routine once entry in the cache is made 1201 */ 1202 static int 1203 ip_mdq(m, ifp, tunnel_src, rt, imo) 1204 register struct mbuf *m; 1205 register struct ifnet *ifp; 1206 register u_long tunnel_src; 1207 register struct mfc *rt; 1208 register struct ip_moptions *imo; 1209 { 1210 register struct ip *ip = mtod(m, struct ip *); 1211 register vifi_t vifi; 1212 register struct vif *vifp; 1213 1214 /* 1215 * Don't forward if it didn't arrive from the parent vif for its origin. 1216 * Notes: v_ifp is zero for src route tunnels, multicast_decap_if 1217 * for encapsulated tunnels and a real ifnet for non-tunnels so 1218 * the first part of the if catches wrong physical interface or 1219 * tunnel type; v_rmt_addr is zero for non-tunneled packets so 1220 * the 2nd part catches both packets that arrive via a tunnel 1221 * that shouldn't and packets that arrive via the wrong tunnel. 1222 */ 1223 vifi = rt->mfc_parent; 1224 if (viftable[vifi].v_ifp != ifp || 1225 (ifp == 0 && viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) { 1226 /* came in the wrong interface */ 1227 if (mrtdebug) 1228 log(LOG_DEBUG, "wrong if: ifp %x vifi %d", 1229 ifp, vifi); 1230 ++mrtstat.mrts_wrong_if; 1231 return (int)tunnel_src; 1232 } 1233 1234 /* increment the interface and s-g counters */ 1235 viftable[vifi].v_pkt_in++; 1236 rt->mfc_pkt_cnt++; 1237 1238 /* 1239 * For each vif, decide if a copy of the packet should be forwarded. 1240 * Forward if: 1241 * - the ttl exceeds the vif's threshold 1242 * - there are group members downstream on interface 1243 */ 1244 #define MC_SEND(ip,vifp,m) { \ 1245 (vifp)->v_pkt_out++; \ 1246 if ((vifp)->v_flags & VIFF_SRCRT) \ 1247 srcrt_send((ip), (vifp), (m)); \ 1248 else if ((vifp)->v_flags & VIFF_TUNNEL) \ 1249 encap_send((ip), (vifp), (m)); \ 1250 else \ 1251 phyint_send((ip), (vifp), (m)); \ 1252 } 1253 1254 /* If no options or the imo_multicast_vif option is 0, don't do this part 1255 */ 1256 if ((imo != NULL) && 1257 (( vifi = imo->imo_multicast_vif - 1) < numvifs) /*&& (vifi>=0)*/) 1258 { 1259 MC_SEND(ip,viftable+vifi,m); 1260 return (1); /* make sure we are done: No more physical sends */ 1261 } 1262 1263 for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) 1264 if ((rt->mfc_ttls[vifi] > 0) && 1265 (ip->ip_ttl > rt->mfc_ttls[vifi])) 1266 MC_SEND(ip, vifp, m); 1267 1268 return 0; 1269 } 1270 1271 /* check if a vif number is legal/ok. This is used by ip_output, to export 1272 * numvifs there, 1273 */ 1274 int 1275 X_legal_vif_num(vif) 1276 int vif; 1277 { if (vif>=0 && vif<=numvifs) 1278 return(1); 1279 else 1280 return(0); 1281 } 1282 1283 #ifndef MROUTE_LKM 1284 int (*legal_vif_num)(int) = X_legal_vif_num; 1285 #endif 1286 1287 static void 1288 phyint_send(ip, vifp, m) 1289 struct ip *ip; 1290 struct vif *vifp; 1291 struct mbuf *m; 1292 { 1293 register struct mbuf *mb_copy; 1294 int hlen = ip->ip_hl << 2; 1295 register struct ip_moptions *imo; 1296 1297 if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) 1298 return; 1299 1300 /* 1301 * Make sure the header isn't in an cluster, because the sharing 1302 * in clusters defeats the whole purpose of making the copy above. 1303 */ 1304 mb_copy = m_pullup(mb_copy, hlen); 1305 if (mb_copy == NULL) 1306 return; 1307 1308 MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT); 1309 if (imo == NULL) { 1310 m_freem(mb_copy); 1311 return; 1312 } 1313 1314 imo->imo_multicast_ifp = vifp->v_ifp; 1315 imo->imo_multicast_ttl = ip->ip_ttl - 1; 1316 imo->imo_multicast_loop = 1; 1317 1318 if (vifp->v_rate_limit <= 0) 1319 tbf_send_packet(vifp, mb_copy, imo); 1320 else 1321 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len, 1322 imo); 1323 } 1324 1325 static void 1326 srcrt_send(ip, vifp, m) 1327 struct ip *ip; 1328 struct vif *vifp; 1329 struct mbuf *m; 1330 { 1331 struct mbuf *mb_copy, *mb_opts; 1332 int hlen = ip->ip_hl << 2; 1333 register struct ip *ip_copy; 1334 u_char *cp; 1335 1336 /* 1337 * Make sure that adding the tunnel options won't exceed the 1338 * maximum allowed number of option bytes. 1339 */ 1340 if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) { 1341 mrtstat.mrts_cant_tunnel++; 1342 if (mrtdebug) 1343 log(LOG_DEBUG, "srcrt_send: no room for tunnel options, from %u", 1344 ntohl(ip->ip_src.s_addr)); 1345 return; 1346 } 1347 1348 if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) 1349 return; 1350 1351 MGETHDR(mb_opts, M_DONTWAIT, MT_HEADER); 1352 if (mb_opts == NULL) { 1353 m_freem(mb_copy); 1354 return; 1355 } 1356 /* 1357 * 'Delete' the base ip header from the mb_copy chain 1358 */ 1359 mb_copy->m_len -= hlen; 1360 mb_copy->m_data += hlen; 1361 /* 1362 * Make mb_opts be the new head of the packet chain. 1363 * Any options of the packet were left in the old packet chain head 1364 */ 1365 mb_opts->m_next = mb_copy; 1366 mb_opts->m_len = hlen + TUNNEL_LEN; 1367 mb_opts->m_data += MSIZE - mb_opts->m_len; 1368 mb_opts->m_pkthdr.len = mb_copy->m_pkthdr.len + TUNNEL_LEN; 1369 /* 1370 * Copy the base ip header from the mb_copy chain to the new head mbuf 1371 */ 1372 ip_copy = mtod(mb_opts, struct ip *); 1373 bcopy((caddr_t)ip_copy, mtod(mb_opts, caddr_t), hlen); 1374 ip_copy->ip_ttl--; 1375 ip_copy->ip_dst = vifp->v_rmt_addr; /* remote tunnel end-point */ 1376 /* 1377 * Adjust the ip header length to account for the tunnel options. 1378 */ 1379 ip_copy->ip_hl += TUNNEL_LEN >> 2; 1380 ip_copy->ip_len += TUNNEL_LEN; 1381 /* 1382 * Add the NOP and LSRR after the base ip header 1383 */ 1384 cp = mtod(mb_opts, u_char *) + IP_HDR_LEN; 1385 *cp++ = IPOPT_NOP; 1386 *cp++ = IPOPT_LSRR; 1387 *cp++ = 11; /* LSRR option length */ 1388 *cp++ = 8; /* LSSR pointer to second element */ 1389 *(u_long*)cp = vifp->v_lcl_addr.s_addr; /* local tunnel end-point */ 1390 cp += 4; 1391 *(u_long*)cp = ip->ip_dst.s_addr; /* destination group */ 1392 1393 if (vifp->v_rate_limit <= 0) 1394 tbf_send_packet(vifp, mb_opts, 0); 1395 else 1396 tbf_control(vifp, mb_opts, 1397 mtod(mb_opts, struct ip *), ip_copy->ip_len, 0); 1398 } 1399 1400 static void 1401 encap_send(ip, vifp, m) 1402 register struct ip *ip; 1403 register struct vif *vifp; 1404 register struct mbuf *m; 1405 { 1406 register struct mbuf *mb_copy; 1407 register struct ip *ip_copy; 1408 int hlen = ip->ip_hl << 2; 1409 register int i, len = ip->ip_len; 1410 1411 /* 1412 * copy the old packet & pullup it's IP header into the 1413 * new mbuf so we can modify it. Try to fill the new 1414 * mbuf since if we don't the ethernet driver will. 1415 */ 1416 MGET(mb_copy, M_DONTWAIT, MT_DATA); 1417 if (mb_copy == NULL) 1418 return; 1419 mb_copy->m_data += 16; 1420 mb_copy->m_len = sizeof(multicast_encap_iphdr); 1421 1422 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { 1423 m_freem(mb_copy); 1424 return; 1425 } 1426 i = MHLEN - M_LEADINGSPACE(mb_copy); 1427 if (i > len) 1428 i = len; 1429 mb_copy = m_pullup(mb_copy, i); 1430 if (mb_copy == NULL) 1431 return; 1432 mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr); 1433 1434 /* 1435 * fill in the encapsulating IP header. 1436 */ 1437 ip_copy = mtod(mb_copy, struct ip *); 1438 *ip_copy = multicast_encap_iphdr; 1439 ip_copy->ip_id = htons(ip_id++); 1440 ip_copy->ip_len += len; 1441 ip_copy->ip_src = vifp->v_lcl_addr; 1442 ip_copy->ip_dst = vifp->v_rmt_addr; 1443 1444 /* 1445 * turn the encapsulated IP header back into a valid one. 1446 */ 1447 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); 1448 --ip->ip_ttl; 1449 HTONS(ip->ip_len); 1450 HTONS(ip->ip_off); 1451 ip->ip_sum = 0; 1452 #if defined(LBL) && !defined(ultrix) 1453 ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0); 1454 #else 1455 mb_copy->m_data += sizeof(multicast_encap_iphdr); 1456 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 1457 mb_copy->m_data -= sizeof(multicast_encap_iphdr); 1458 #endif 1459 1460 if (vifp->v_rate_limit <= 0) 1461 tbf_send_packet(vifp, mb_copy, 0); 1462 else 1463 tbf_control(vifp, mb_copy, ip, ip_copy->ip_len, 0); 1464 } 1465 1466 /* 1467 * De-encapsulate a packet and feed it back through ip input (this 1468 * routine is called whenever IP gets a packet with proto type 1469 * ENCAP_PROTO and a local destination address). 1470 */ 1471 void 1472 #ifdef MROUTE_LKM 1473 X_multiencap_decap(m) 1474 #else 1475 multiencap_decap(m) 1476 #endif 1477 register struct mbuf *m; 1478 { 1479 struct ifnet *ifp = m->m_pkthdr.rcvif; 1480 register struct ip *ip = mtod(m, struct ip *); 1481 register int hlen = ip->ip_hl << 2; 1482 register int s; 1483 register struct ifqueue *ifq; 1484 register struct vif *vifp; 1485 1486 if (ip->ip_p != ENCAP_PROTO) { 1487 rip_input(m); 1488 return; 1489 } 1490 /* 1491 * dump the packet if it's not to a multicast destination or if 1492 * we don't have an encapsulating tunnel with the source. 1493 * Note: This code assumes that the remote site IP address 1494 * uniquely identifies the tunnel (i.e., that this site has 1495 * at most one tunnel with the remote site). 1496 */ 1497 if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) { 1498 ++mrtstat.mrts_bad_tunnel; 1499 m_freem(m); 1500 return; 1501 } 1502 if (ip->ip_src.s_addr != last_encap_src) { 1503 register struct vif *vife; 1504 1505 vifp = viftable; 1506 vife = vifp + numvifs; 1507 last_encap_src = ip->ip_src.s_addr; 1508 last_encap_vif = 0; 1509 for ( ; vifp < vife; ++vifp) 1510 if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { 1511 if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) 1512 == VIFF_TUNNEL) 1513 last_encap_vif = vifp; 1514 break; 1515 } 1516 } 1517 if ((vifp = last_encap_vif) == 0) { 1518 last_encap_src = 0; 1519 mrtstat.mrts_cant_tunnel++; /*XXX*/ 1520 m_freem(m); 1521 if (mrtdebug) 1522 log(LOG_DEBUG, "ip_mforward: no tunnel with %u", 1523 ntohl(ip->ip_src.s_addr)); 1524 return; 1525 } 1526 ifp = vifp->v_ifp; 1527 hlen -= sizeof(struct ifnet *); 1528 m->m_data += hlen; 1529 m->m_len -= hlen; 1530 *(mtod(m, struct ifnet **)) = ifp; 1531 ifq = &ipintrq; 1532 s = splimp(); 1533 if (IF_QFULL(ifq)) { 1534 IF_DROP(ifq); 1535 m_freem(m); 1536 } else { 1537 IF_ENQUEUE(ifq, m); 1538 /* 1539 * normally we would need a "schednetisr(NETISR_IP)" 1540 * here but we were called by ip_input and it is going 1541 * to loop back & try to dequeue the packet we just 1542 * queued as soon as we return so we avoid the 1543 * unnecessary software interrrupt. 1544 */ 1545 } 1546 splx(s); 1547 } 1548 1549 /* 1550 * Token bucket filter module 1551 */ 1552 void 1553 tbf_control(vifp, m, ip, p_len, imo) 1554 register struct vif *vifp; 1555 register struct mbuf *m; 1556 register struct ip *ip; 1557 register u_long p_len; 1558 struct ip_moptions *imo; 1559 { 1560 tbf_update_tokens(vifp); 1561 1562 /* if there are enough tokens, 1563 * and the queue is empty, 1564 * send this packet out 1565 */ 1566 1567 if (vifp->v_tbf->q_len == 0) { 1568 if (p_len <= vifp->v_tbf->n_tok) { 1569 vifp->v_tbf->n_tok -= p_len; 1570 tbf_send_packet(vifp, m, imo); 1571 } else if (p_len > MAX_BKT_SIZE) { 1572 /* drop if packet is too large */ 1573 mrtstat.mrts_pkt2large++; 1574 m_freem(m); 1575 return; 1576 } else { 1577 /* queue packet and timeout till later */ 1578 tbf_queue(vifp, m, ip, imo); 1579 timeout(tbf_reprocess_q, (caddr_t)vifp, 1); 1580 } 1581 } else if (vifp->v_tbf->q_len < MAXQSIZE) { 1582 /* finite queue length, so queue pkts and process queue */ 1583 tbf_queue(vifp, m, ip, imo); 1584 tbf_process_q(vifp); 1585 } else { 1586 /* queue length too much, try to dq and queue and process */ 1587 if (!tbf_dq_sel(vifp, ip)) { 1588 mrtstat.mrts_q_overflow++; 1589 m_freem(m); 1590 return; 1591 } else { 1592 tbf_queue(vifp, m, ip, imo); 1593 tbf_process_q(vifp); 1594 } 1595 } 1596 return; 1597 } 1598 1599 /* 1600 * adds a packet to the queue at the interface 1601 */ 1602 void 1603 tbf_queue(vifp, m, ip, imo) 1604 register struct vif *vifp; 1605 register struct mbuf *m; 1606 register struct ip *ip; 1607 struct ip_moptions *imo; 1608 { 1609 register u_long ql; 1610 register int index = (vifp - viftable); 1611 register int s = splnet(); 1612 1613 ql = vifp->v_tbf->q_len; 1614 1615 qtable[index][ql].pkt_m = m; 1616 qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len; 1617 qtable[index][ql].pkt_ip = ip; 1618 qtable[index][ql].pkt_imo = imo; 1619 1620 vifp->v_tbf->q_len++; 1621 splx(s); 1622 } 1623 1624 1625 /* 1626 * processes the queue at the interface 1627 */ 1628 void 1629 tbf_process_q(vifp) 1630 register struct vif *vifp; 1631 { 1632 register struct pkt_queue pkt_1; 1633 register int index = (vifp - viftable); 1634 register int s = splnet(); 1635 1636 /* loop through the queue at the interface and send as many packets 1637 * as possible 1638 */ 1639 while (vifp->v_tbf->q_len > 0) { 1640 /* locate the first packet */ 1641 pkt_1.pkt_len = ((qtable[index][0]).pkt_len); 1642 pkt_1.pkt_m = (qtable[index][0]).pkt_m; 1643 pkt_1.pkt_ip = (qtable[index][0]).pkt_ip; 1644 pkt_1.pkt_imo = (qtable[index][0]).pkt_imo; 1645 1646 /* determine if the packet can be sent */ 1647 if (pkt_1.pkt_len <= vifp->v_tbf->n_tok) { 1648 /* if so, 1649 * reduce no of tokens, dequeue the queue, 1650 * send the packet. 1651 */ 1652 vifp->v_tbf->n_tok -= pkt_1.pkt_len; 1653 1654 tbf_dequeue(vifp, 0); 1655 1656 tbf_send_packet(vifp, pkt_1.pkt_m, pkt_1.pkt_imo); 1657 1658 } else break; 1659 } 1660 splx(s); 1661 } 1662 1663 /* 1664 * removes the jth packet from the queue at the interface 1665 */ 1666 void 1667 tbf_dequeue(vifp,j) 1668 register struct vif *vifp; 1669 register int j; 1670 { 1671 register u_long index = vifp - viftable; 1672 register int i; 1673 1674 for (i=j+1; i <= vifp->v_tbf->q_len - 1; i++) { 1675 qtable[index][i-1].pkt_m = qtable[index][i].pkt_m; 1676 qtable[index][i-1].pkt_len = qtable[index][i].pkt_len; 1677 qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip; 1678 qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo; 1679 } 1680 qtable[index][i-1].pkt_m = NULL; 1681 qtable[index][i-1].pkt_len = NULL; 1682 qtable[index][i-1].pkt_ip = NULL; 1683 qtable[index][i-1].pkt_imo = NULL; 1684 1685 vifp->v_tbf->q_len--; 1686 1687 if (tbfdebug > 1) 1688 log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d",vifp-viftable, i-1); 1689 } 1690 1691 void 1692 tbf_reprocess_q(xvifp) 1693 void *xvifp; 1694 { 1695 register struct vif *vifp = xvifp; 1696 if (ip_mrouter == NULL) 1697 return; 1698 1699 tbf_update_tokens(vifp); 1700 1701 tbf_process_q(vifp); 1702 1703 if (vifp->v_tbf->q_len) 1704 timeout(tbf_reprocess_q, (caddr_t)vifp, 1); 1705 } 1706 1707 /* function that will selectively discard a member of the queue 1708 * based on the precedence value and the priority obtained through 1709 * a lookup table - not yet implemented accurately! 1710 */ 1711 int 1712 tbf_dq_sel(vifp, ip) 1713 register struct vif *vifp; 1714 register struct ip *ip; 1715 { 1716 register int i; 1717 register int s = splnet(); 1718 register u_int p; 1719 1720 p = priority(vifp, ip); 1721 1722 for(i=vifp->v_tbf->q_len-1;i >= 0;i--) { 1723 if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) { 1724 m_freem(qtable[vifp-viftable][i].pkt_m); 1725 tbf_dequeue(vifp,i); 1726 splx(s); 1727 mrtstat.mrts_drop_sel++; 1728 return(1); 1729 } 1730 } 1731 splx(s); 1732 return(0); 1733 } 1734 1735 void 1736 tbf_send_packet(vifp, m, imo) 1737 register struct vif *vifp; 1738 register struct mbuf *m; 1739 struct ip_moptions *imo; 1740 { 1741 int error; 1742 int s = splnet(); 1743 1744 /* if source route tunnels */ 1745 if (vifp->v_flags & VIFF_SRCRT) { 1746 error = ip_output(m, (struct mbuf *)0, (struct route *)0, 1747 IP_FORWARDING, imo); 1748 if (mrtdebug > 1) 1749 log(LOG_DEBUG, "srcrt_send on vif %d err %d", vifp-viftable, error); 1750 } else if (vifp->v_flags & VIFF_TUNNEL) { 1751 /* If tunnel options */ 1752 ip_output(m, (struct mbuf *)0, (struct route *)0, 1753 IP_FORWARDING, imo); 1754 } else { 1755 /* if physical interface option, extract the options and then send */ 1756 error = ip_output(m, (struct mbuf *)0, (struct route *)0, 1757 IP_FORWARDING, imo); 1758 FREE(imo, M_IPMOPTS); 1759 1760 if (mrtdebug > 1) 1761 log(LOG_DEBUG, "phyint_send on vif %d err %d", vifp-viftable, error); 1762 } 1763 splx(s); 1764 } 1765 1766 /* determine the current time and then 1767 * the elapsed time (between the last time and time now) 1768 * in milliseconds & update the no. of tokens in the bucket 1769 */ 1770 void 1771 tbf_update_tokens(vifp) 1772 register struct vif *vifp; 1773 { 1774 struct timeval tp; 1775 register u_long t; 1776 register u_long elapsed; 1777 register int s = splnet(); 1778 1779 GET_TIME(tp); 1780 1781 t = tp.tv_sec*1000 + tp.tv_usec/1000; 1782 1783 elapsed = (t - vifp->v_tbf->last_pkt_t) * vifp->v_rate_limit /8; 1784 vifp->v_tbf->n_tok += elapsed; 1785 vifp->v_tbf->last_pkt_t = t; 1786 1787 if (vifp->v_tbf->n_tok > MAX_BKT_SIZE) 1788 vifp->v_tbf->n_tok = MAX_BKT_SIZE; 1789 1790 splx(s); 1791 } 1792 1793 static int 1794 priority(vifp, ip) 1795 register struct vif *vifp; 1796 register struct ip *ip; 1797 { 1798 register u_long graddr; 1799 register int prio; 1800 1801 /* temporary hack; will add general packet classifier some day */ 1802 1803 prio = 50; /* default priority */ 1804 1805 /* check for source route options and add option length to get dst */ 1806 if (vifp->v_flags & VIFF_SRCRT) 1807 graddr = ntohl((ip+8)->ip_dst.s_addr); 1808 else 1809 graddr = ntohl(ip->ip_dst.s_addr); 1810 1811 switch (graddr & 0xf) { 1812 case 0x0: break; 1813 case 0x1: if (graddr == 0xe0020001) prio = 65; /* MBone Audio */ 1814 break; 1815 case 0x2: break; 1816 case 0x3: break; 1817 case 0x4: break; 1818 case 0x5: break; 1819 case 0x6: break; 1820 case 0x7: break; 1821 case 0x8: break; 1822 case 0x9: break; 1823 case 0xa: if (graddr == 0xe000010a) prio = 85; /* IETF Low Audio 1 */ 1824 break; 1825 case 0xb: if (graddr == 0xe000010b) prio = 75; /* IETF Audio 1 */ 1826 break; 1827 case 0xc: if (graddr == 0xe000010c) prio = 60; /* IETF Video 1 */ 1828 break; 1829 case 0xd: if (graddr == 0xe000010d) prio = 80; /* IETF Low Audio 2 */ 1830 break; 1831 case 0xe: if (graddr == 0xe000010e) prio = 70; /* IETF Audio 2 */ 1832 break; 1833 case 0xf: if (graddr == 0xe000010f) prio = 55; /* IETF Video 2 */ 1834 break; 1835 } 1836 1837 if (tbfdebug > 1) log(LOG_DEBUG, "graddr%x prio%d", graddr, prio); 1838 1839 return prio; 1840 } 1841 1842 /* 1843 * End of token bucket filter modifications 1844 */ 1845 1846 #ifdef MROUTE_LKM 1847 #include <sys/conf.h> 1848 #include <sys/exec.h> 1849 #include <sys/sysent.h> 1850 #include <sys/lkm.h> 1851 1852 MOD_MISC("ip_mroute_mod") 1853 1854 static int 1855 ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd) 1856 { 1857 int i; 1858 struct lkm_misc *args = lkmtp->private.lkm_misc; 1859 int err = 0; 1860 1861 switch(cmd) { 1862 static int (*old_ip_mrouter_cmd)(); 1863 static int (*old_ip_mrouter_done)(); 1864 static int (*old_ip_mforward)(); 1865 static int (*old_mrt_ioctl)(); 1866 static int (*old_proto4_input)(); 1867 static int (*old_legal_vif_num)(); 1868 extern u_char ip_protox[]; 1869 extern struct protosw inetsw[]; 1870 1871 case LKM_E_LOAD: 1872 if(lkmexists(lkmtp) || ip_mrtproto) 1873 return(EEXIST); 1874 old_ip_mrouter_cmd = ip_mrouter_cmd; 1875 ip_mrouter_cmd = X_ip_mrouter_cmd; 1876 old_ip_mrouter_done = ip_mrouter_done; 1877 ip_mrouter_done = X_ip_mrouter_done; 1878 old_ip_mforward = ip_mforward; 1879 ip_mforward = X_ip_mforward; 1880 old_mrt_ioctl = mrt_ioctl; 1881 mrt_ioctl = X_mrt_ioctl; 1882 old_proto4_input = inetsw[ip_protox[IPPROTO_ENCAP]].pr_input; 1883 inetsw[ip_protox[IPPROTO_ENCAP]].pr_input = X_multiencap_decap; 1884 old_legal_vif_num = legal_vif_num; 1885 legal_vif_num = X_legal_vif_num; 1886 ip_mrtproto = IGMP_DVMRP; 1887 1888 printf("\nIP multicast routing loaded\n"); 1889 break; 1890 1891 case LKM_E_UNLOAD: 1892 if (ip_mrouter) 1893 return EINVAL; 1894 1895 ip_mrouter_cmd = old_ip_mrouter_cmd; 1896 ip_mrouter_done = old_ip_mrouter_done; 1897 ip_mforward = old_ip_mforward; 1898 mrt_ioctl = old_mrt_ioctl; 1899 inetsw[ip_protox[IPPROTO_ENCAP]].pr_input = old_proto4_input; 1900 legal_vif_num = old_legal_vif_num; 1901 ip_mrtproto = 0; 1902 break; 1903 1904 default: 1905 err = EINVAL; 1906 break; 1907 } 1908 1909 return(err); 1910 } 1911 1912 int 1913 ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) { 1914 DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle, 1915 nosys); 1916 } 1917 1918 #endif /* MROUTE_LKM */ 1919 #endif /* MROUTING */ 1920 1921 1922