1 /* 2 * IP multicast forwarding procedures 3 * 4 * Written by David Waitzman, BBN Labs, August 1988. 5 * Modified by Steve Deering, Stanford, February 1989. 6 * Modified by Mark J. Steiglitz, Stanford, May, 1991 7 * Modified by Van Jacobson, LBL, January 1993 8 * Modified by Ajit Thyagarajan, PARC, August 1993 9 * 10 * MROUTING 1.8 11 */ 12 13 14 #include <sys/param.h> 15 #include <sys/systm.h> 16 #include <sys/mbuf.h> 17 #include <sys/socket.h> 18 #include <sys/socketvar.h> 19 #include <sys/protosw.h> 20 #include <sys/errno.h> 21 #include <sys/time.h> 22 #include <sys/ioctl.h> 23 #include <sys/syslog.h> 24 #include <sys/queue.h> 25 #include <net/if.h> 26 #include <net/route.h> 27 #include <netinet/in.h> 28 #include <netinet/in_systm.h> 29 #include <netinet/ip.h> 30 #include <netinet/ip_var.h> 31 #include <netinet/in_pcb.h> 32 #include <netinet/in_var.h> 33 #include <netinet/igmp.h> 34 #include <netinet/igmp_var.h> 35 #include <netinet/ip_mroute.h> 36 37 #ifndef NTOHL 38 #if BYTE_ORDER != BIG_ENDIAN 39 #define NTOHL(d) ((d) = ntohl((d))) 40 #define NTOHS(d) ((d) = ntohs((u_short)(d))) 41 #define HTONL(d) ((d) = htonl((d))) 42 #define HTONS(d) ((d) = htons((u_short)(d))) 43 #else 44 #define NTOHL(d) 45 #define NTOHS(d) 46 #define HTONL(d) 47 #define HTONS(d) 48 #endif 49 #endif 50 51 #ifndef MROUTING 52 /* 53 * Dummy routines and globals used when multicast routing is not compiled in. 54 */ 55 56 u_int ip_mrtproto = 0; 57 struct socket *ip_mrouter = NULL; 58 struct mrtstat mrtstat; 59 60 61 int 62 _ip_mrouter_cmd(cmd, so, m) 63 int cmd; 64 struct socket *so; 65 struct mbuf *m; 66 { 67 return(EOPNOTSUPP); 68 } 69 70 int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = _ip_mrouter_cmd; 71 72 int 73 _ip_mrouter_done() 74 { 75 return(0); 76 } 77 78 int (*ip_mrouter_done)(void) = _ip_mrouter_done; 79 80 int 81 _ip_mforward(ip, ifp, m, imo) 82 struct ip *ip; 83 struct ifnet *ifp; 84 struct mbuf *m; 85 struct ip_moptions *imo; 86 { 87 return(0); 88 } 89 90 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 91 struct ip_moptions *) = _ip_mforward; 92 93 int 94 _mrt_ioctl(int req, caddr_t data, struct proc *p) 95 { 96 return EOPNOTSUPP; 97 } 98 99 int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl; 100 101 void multiencap_decap(struct mbuf *m) { /* XXX must fixup manually */ 102 rip_input(m); 103 } 104 105 int (*legal_vif_num)(int) = 0; 106 107 #else /* MROUTING */ 108 109 #define INSIZ sizeof(struct in_addr) 110 #define same(a1, a2) \ 111 (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) 112 113 #define MT_MRTABLE MT_RTABLE /* since nothing else uses it */ 114 115 /* 116 * Globals. All but ip_mrouter and ip_mrtproto could be static, 117 * except for netstat or debugging purposes. 118 */ 119 #ifndef MROUTE_LKM 120 struct socket *ip_mrouter = NULL; 121 struct mrtstat mrtstat; 122 123 int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ 124 #else /* MROUTE_LKM */ 125 extern struct mrtstat mrtstat; 126 extern int ip_mrtproto; 127 #endif 128 129 #define NO_RTE_FOUND 0x1 130 #define RTE_FOUND 0x2 131 132 struct mbuf *mfctable[MFCTBLSIZ]; 133 struct vif viftable[MAXVIFS]; 134 u_int mrtdebug = 0; /* debug level */ 135 u_int tbfdebug = 0; /* tbf debug level */ 136 137 u_long timeout_val = 0; /* count of outstanding upcalls */ 138 139 /* 140 * Define the token bucket filter structures 141 * tbftable -> each vif has one of these for storing info 142 * qtable -> each interface has an associated queue of pkts 143 */ 144 145 struct tbf tbftable[MAXVIFS]; 146 struct pkt_queue qtable[MAXVIFS][MAXQSIZE]; 147 148 /* 149 * 'Interfaces' associated with decapsulator (so we can tell 150 * packets that went through it from ones that get reflected 151 * by a broken gateway). These interfaces are never linked into 152 * the system ifnet list & no routes point to them. I.e., packets 153 * can't be sent this way. They only exist as a placeholder for 154 * multicast source verification. 155 */ 156 struct ifnet multicast_decap_if[MAXVIFS]; 157 158 #define ENCAP_TTL 64 159 #define ENCAP_PROTO 4 160 161 /* prototype IP hdr for encapsulated packets */ 162 struct ip multicast_encap_iphdr = { 163 #if BYTE_ORDER == LITTLE_ENDIAN 164 sizeof(struct ip) >> 2, IPVERSION, 165 #else 166 IPVERSION, sizeof(struct ip) >> 2, 167 #endif 168 0, /* tos */ 169 sizeof(struct ip), /* total length */ 170 0, /* id */ 171 0, /* frag offset */ 172 ENCAP_TTL, ENCAP_PROTO, 173 0, /* checksum */ 174 }; 175 176 /* 177 * Private variables. 178 */ 179 static vifi_t numvifs = 0; 180 static void (*encap_oldrawip)() = 0; 181 182 /* 183 * one-back cache used by multiencap_decap to locate a tunnel's vif 184 * given a datagram's src ip address. 185 */ 186 static u_long last_encap_src; 187 static struct vif *last_encap_vif; 188 189 static u_long nethash_fc(u_long, u_long); 190 static struct mfc *mfcfind(u_long, u_long); 191 int get_sg_cnt(struct sioc_sg_req *); 192 int get_vif_cnt(struct sioc_vif_req *); 193 int get_vifs(caddr_t); 194 static int add_vif(struct vifctl *); 195 static int del_vif(vifi_t *); 196 static int add_mfc(struct mfcctl *); 197 static int del_mfc(struct delmfcctl *); 198 static void cleanup_cache(void *); 199 static int ip_mdq(struct mbuf *, struct ifnet *, u_long, struct mfc *, 200 struct ip_moptions *); 201 static void phyint_send(struct ip *, struct vif *, struct mbuf *); 202 static void srcrt_send(struct ip *, struct vif *, struct mbuf *); 203 static void encap_send(struct ip *, struct vif *, struct mbuf *); 204 void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long, 205 struct ip_moptions *); 206 void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *); 207 void tbf_process_q(struct vif *); 208 void tbf_dequeue(struct vif *, int); 209 void tbf_reprocess_q(void *); 210 int tbf_dq_sel(struct vif *, struct ip *); 211 void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *); 212 void tbf_update_tokens(struct vif *); 213 static int priority(struct vif *, struct ip *); 214 static int ip_mrouter_init(struct socket *); 215 void multiencap_decap(struct mbuf *m); 216 217 /* 218 * A simple hash function: returns MFCHASHMOD of the low-order octet of 219 * the argument's network or subnet number and the multicast group assoc. 220 */ 221 static u_long 222 nethash_fc(m,n) 223 register u_long m; 224 register u_long n; 225 { 226 struct in_addr in1; 227 struct in_addr in2; 228 229 in1.s_addr = m; 230 m = in_netof(in1); 231 while ((m & 0xff) == 0) m >>= 8; 232 233 in2.s_addr = n; 234 n = in_netof(in2); 235 while ((n & 0xff) == 0) n >>= 8; 236 237 return (MFCHASHMOD(m) ^ MFCHASHMOD(n)); 238 } 239 240 /* 241 * this is a direct-mapped cache used to speed the mapping from a 242 * datagram source address to the associated multicast route. Note 243 * that unlike mrttable, the hash is on IP address, not IP net number. 244 */ 245 #define MFCHASHSIZ 1024 246 #define MFCHASH(a, g) ((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 247 ((g) >> 20) ^ ((g) >> 10) ^ (g)) & (MFCHASHSIZ-1)) 248 struct mfc *mfchash[MFCHASHSIZ]; 249 250 /* 251 * Find a route for a given origin IP address and Multicast group address 252 * Type of service parameter to be added in the future!!! 253 */ 254 #define MFCFIND(o, g, rt) { \ 255 register u_int _mrhasho = o; \ 256 register u_int _mrhashg = g; \ 257 _mrhasho = MFCHASH(_mrhasho, _mrhashg); \ 258 ++mrtstat.mrts_mfc_lookups; \ 259 rt = mfchash[_mrhasho]; \ 260 if ((rt == NULL) || \ 261 ((o & rt->mfc_originmask.s_addr) != rt->mfc_origin.s_addr) || \ 262 (g != rt->mfc_mcastgrp.s_addr)) \ 263 if ((rt = mfcfind(o, g)) != NULL) \ 264 mfchash[_mrhasho] = rt; \ 265 } 266 267 /* 268 * Find route by examining hash table entries 269 */ 270 static struct mfc * 271 mfcfind(origin, mcastgrp) 272 u_long origin; 273 u_long mcastgrp; 274 { 275 register struct mbuf *mb_rt; 276 register struct mfc *rt; 277 register u_long hash; 278 279 hash = nethash_fc(origin, mcastgrp); 280 for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { 281 rt = mtod(mb_rt, struct mfc *); 282 if (((origin & rt->mfc_originmask.s_addr) == rt->mfc_origin.s_addr) && 283 (mcastgrp == rt->mfc_mcastgrp.s_addr) && 284 (mb_rt->m_act == NULL)) 285 return (rt); 286 } 287 mrtstat.mrts_mfc_misses++; 288 return NULL; 289 } 290 291 /* 292 * Macros to compute elapsed time efficiently 293 * Borrowed from Van Jacobson's scheduling code 294 */ 295 #define TV_DELTA(a, b, delta) { \ 296 register int xxs; \ 297 \ 298 delta = (a).tv_usec - (b).tv_usec; \ 299 if ((xxs = (a).tv_sec - (b).tv_sec)) { \ 300 switch (xxs) { \ 301 case 2: \ 302 delta += 1000000; \ 303 /* fall through */ \ 304 case 1: \ 305 delta += 1000000; \ 306 break; \ 307 default: \ 308 delta += (1000000 * xxs); \ 309 } \ 310 } \ 311 } 312 313 #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ 314 (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 315 316 /* 317 * Handle DVMRP setsockopt commands to modify the multicast routing tables. 318 */ 319 int 320 X_ip_mrouter_cmd(cmd, so, m) 321 int cmd; 322 struct socket *so; 323 struct mbuf *m; 324 { 325 if (cmd != DVMRP_INIT && so != ip_mrouter) return EACCES; 326 327 switch (cmd) { 328 case DVMRP_INIT: return ip_mrouter_init(so); 329 case DVMRP_DONE: return ip_mrouter_done(); 330 case DVMRP_ADD_VIF: return add_vif (mtod(m, struct vifctl *)); 331 case DVMRP_DEL_VIF: return del_vif (mtod(m, vifi_t *)); 332 case DVMRP_ADD_MFC: return add_mfc (mtod(m, struct mfcctl *)); 333 case DVMRP_DEL_MFC: return del_mfc (mtod(m, struct delmfcctl *)); 334 default: return EOPNOTSUPP; 335 } 336 } 337 338 #ifndef MROUTE_LKM 339 int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = X_ip_mrouter_cmd; 340 #endif 341 342 /* 343 * Handle ioctl commands to obtain information from the cache 344 */ 345 int 346 X_mrt_ioctl(cmd, data) 347 int cmd; 348 caddr_t data; 349 { 350 int error = 0; 351 352 switch (cmd) { 353 case (SIOCGETVIFINF): /* Read Virtual Interface (m/cast) */ 354 return (get_vifs(data)); 355 break; 356 case (SIOCGETVIFCNT): 357 return (get_vif_cnt((struct sioc_vif_req *)data)); 358 break; 359 case (SIOCGETSGCNT): 360 return (get_sg_cnt((struct sioc_sg_req *)data)); 361 break; 362 default: 363 return (EINVAL); 364 break; 365 } 366 return error; 367 } 368 369 #ifndef MROUTE_LKM 370 int (*mrt_ioctl)(int, caddr_t, struct proc *) = X_mrt_ioctl; 371 #endif 372 373 /* 374 * returns the packet count for the source group provided 375 */ 376 int 377 get_sg_cnt(req) 378 register struct sioc_sg_req *req; 379 { 380 register struct mfc *rt; 381 int s; 382 383 s = splnet(); 384 MFCFIND(req->src.s_addr, req->grp.s_addr, rt); 385 splx(s); 386 if (rt != NULL) 387 req->count = rt->mfc_pkt_cnt; 388 else 389 req->count = 0xffffffff; 390 391 return 0; 392 } 393 394 /* 395 * returns the input and output packet counts on the interface provided 396 */ 397 int 398 get_vif_cnt(req) 399 register struct sioc_vif_req *req; 400 { 401 register vifi_t vifi = req->vifi; 402 403 req->icount = viftable[vifi].v_pkt_in; 404 req->ocount = viftable[vifi].v_pkt_out; 405 406 return 0; 407 } 408 409 int 410 get_vifs(data) 411 char *data; 412 { 413 struct vif_conf *vifc = (struct vif_conf *)data; 414 struct vif_req *vifrp, vifr; 415 int space, error=0; 416 417 vifi_t vifi; 418 int s; 419 420 space = vifc->vifc_len; 421 vifrp = vifc->vifc_req; 422 423 s = splnet(); 424 vifc->vifc_num=numvifs; 425 426 for (vifi = 0; vifi < numvifs; vifi++, vifrp++) { 427 if (viftable[vifi].v_lcl_addr.s_addr != 0) { 428 vifr.v_flags=viftable[vifi].v_flags; 429 vifr.v_threshold=viftable[vifi].v_threshold; 430 vifr.v_lcl_addr=viftable[vifi].v_lcl_addr; 431 vifr.v_rmt_addr=viftable[vifi].v_rmt_addr; 432 strncpy(vifr.v_if_name,viftable[vifi].v_ifp->if_name,IFNAMSIZ); 433 if ((space -= sizeof(vifr)) < 0) { 434 splx(s); 435 return(ENOSPC); 436 } 437 error = copyout((caddr_t)&vifr,(caddr_t)vifrp,(u_int)(sizeof vifr)); 438 if (error) { 439 splx(s); 440 return(error); 441 } 442 } 443 } 444 splx(s); 445 return 0; 446 } 447 /* 448 * Enable multicast routing 449 */ 450 static int 451 ip_mrouter_init(so) 452 struct socket *so; 453 { 454 if (so->so_type != SOCK_RAW || 455 so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; 456 457 if (ip_mrouter != NULL) return EADDRINUSE; 458 459 ip_mrouter = so; 460 461 if (mrtdebug) 462 log(LOG_DEBUG, "ip_mrouter_init\n"); 463 464 return 0; 465 } 466 467 /* 468 * Disable multicast routing 469 */ 470 int 471 X_ip_mrouter_done() 472 { 473 vifi_t vifi; 474 int i; 475 struct ifnet *ifp; 476 struct ifreq ifr; 477 struct mbuf *mb_rt; 478 struct mbuf *m; 479 struct rtdetq *rte; 480 int s; 481 482 s = splnet(); 483 484 /* 485 * For each phyint in use, disable promiscuous reception of all IP 486 * multicasts. 487 */ 488 for (vifi = 0; vifi < numvifs; vifi++) { 489 if (viftable[vifi].v_lcl_addr.s_addr != 0 && 490 !(viftable[vifi].v_flags & VIFF_TUNNEL)) { 491 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 492 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr 493 = INADDR_ANY; 494 ifp = viftable[vifi].v_ifp; 495 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 496 } 497 } 498 bzero((caddr_t)qtable, sizeof(qtable)); 499 bzero((caddr_t)tbftable, sizeof(tbftable)); 500 bzero((caddr_t)viftable, sizeof(viftable)); 501 numvifs = 0; 502 503 /* 504 * Check if any outstanding timeouts remain 505 */ 506 if (timeout_val != 0) 507 for (i = 0; i < MFCTBLSIZ; i++) { 508 mb_rt = mfctable[i]; 509 while (mb_rt) { 510 if ( mb_rt->m_act != NULL) { 511 untimeout(cleanup_cache, (caddr_t)mb_rt); 512 while (mb_rt->m_act) { 513 m = mb_rt->m_act; 514 mb_rt->m_act = m->m_act; 515 rte = mtod(m, struct rtdetq *); 516 m_freem(rte->m); 517 m_free(m); 518 } 519 timeout_val--; 520 } 521 mb_rt = mb_rt->m_next; 522 } 523 if (timeout_val == 0) 524 break; 525 } 526 527 /* 528 * Free all multicast forwarding cache entries. 529 */ 530 for (i = 0; i < MFCTBLSIZ; i++) 531 m_freem(mfctable[i]); 532 533 bzero((caddr_t)mfctable, sizeof(mfctable)); 534 bzero((caddr_t)mfchash, sizeof(mfchash)); 535 536 /* 537 * Reset de-encapsulation cache 538 */ 539 last_encap_src = NULL; 540 last_encap_vif = NULL; 541 542 ip_mrouter = NULL; 543 544 splx(s); 545 546 if (mrtdebug) 547 log(LOG_DEBUG, "ip_mrouter_done\n"); 548 549 return 0; 550 } 551 552 #ifndef MROUTE_LKM 553 int (*ip_mrouter_done)(void) = X_ip_mrouter_done; 554 #endif 555 556 /* 557 * Add a vif to the vif table 558 */ 559 static int 560 add_vif(vifcp) 561 register struct vifctl *vifcp; 562 { 563 register struct vif *vifp = viftable + vifcp->vifc_vifi; 564 static struct sockaddr_in sin = {sizeof sin, AF_INET}; 565 struct ifaddr *ifa; 566 struct ifnet *ifp; 567 struct ifreq ifr; 568 int error, s; 569 struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; 570 571 if (vifcp->vifc_vifi >= MAXVIFS) return EINVAL; 572 if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE; 573 574 /* Find the interface with an address in AF_INET family */ 575 sin.sin_addr = vifcp->vifc_lcl_addr; 576 ifa = ifa_ifwithaddr((struct sockaddr *)&sin); 577 if (ifa == 0) return EADDRNOTAVAIL; 578 ifp = ifa->ifa_ifp; 579 580 if (vifcp->vifc_flags & VIFF_TUNNEL) { 581 if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { 582 if (encap_oldrawip == 0) { 583 extern struct protosw inetsw[]; 584 register u_char pr = ip_protox[ENCAP_PROTO]; 585 586 encap_oldrawip = inetsw[pr].pr_input; 587 inetsw[pr].pr_input = multiencap_decap; 588 for (s = 0; s < MAXVIFS; ++s) { 589 multicast_decap_if[s].if_name = "mdecap"; 590 multicast_decap_if[s].if_unit = s; 591 } 592 } 593 ifp = &multicast_decap_if[vifcp->vifc_vifi]; 594 } else { 595 ifp = 0; 596 } 597 } else { 598 /* Make sure the interface supports multicast */ 599 if ((ifp->if_flags & IFF_MULTICAST) == 0) 600 return EOPNOTSUPP; 601 602 /* Enable promiscuous reception of all IP multicasts from the if */ 603 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 604 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; 605 s = splnet(); 606 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); 607 splx(s); 608 if (error) 609 return error; 610 } 611 612 s = splnet(); 613 /* define parameters for the tbf structure */ 614 vifp->v_tbf = v_tbf; 615 vifp->v_tbf->q_len = 0; 616 vifp->v_tbf->n_tok = 0; 617 vifp->v_tbf->last_pkt_t = 0; 618 619 vifp->v_flags = vifcp->vifc_flags; 620 vifp->v_threshold = vifcp->vifc_threshold; 621 vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 622 vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 623 vifp->v_ifp = ifp; 624 vifp->v_rate_limit= vifcp->vifc_rate_limit; 625 /* initialize per vif pkt counters */ 626 vifp->v_pkt_in = 0; 627 vifp->v_pkt_out = 0; 628 splx(s); 629 630 /* Adjust numvifs up if the vifi is higher than numvifs */ 631 if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; 632 633 if (mrtdebug) 634 log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d\n", 635 vifcp->vifc_vifi, 636 ntohl(vifcp->vifc_lcl_addr.s_addr), 637 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 638 ntohl(vifcp->vifc_rmt_addr.s_addr), 639 vifcp->vifc_threshold, 640 vifcp->vifc_rate_limit); 641 642 return 0; 643 } 644 645 /* 646 * Delete a vif from the vif table 647 */ 648 static int 649 del_vif(vifip) 650 vifi_t *vifip; 651 { 652 register struct vif *vifp = viftable + *vifip; 653 register vifi_t vifi; 654 struct ifnet *ifp; 655 struct ifreq ifr; 656 int s; 657 658 if (*vifip >= numvifs) return EINVAL; 659 if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL; 660 661 s = splnet(); 662 663 if (!(vifp->v_flags & VIFF_TUNNEL)) { 664 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 665 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; 666 ifp = vifp->v_ifp; 667 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 668 } 669 670 if (vifp == last_encap_vif) { 671 last_encap_vif = 0; 672 last_encap_src = 0; 673 } 674 675 bzero((caddr_t)qtable[*vifip], 676 sizeof(qtable[*vifip])); 677 bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); 678 bzero((caddr_t)vifp, sizeof (*vifp)); 679 680 /* Adjust numvifs down */ 681 for (vifi = numvifs; vifi > 0; vifi--) 682 if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break; 683 numvifs = vifi; 684 685 splx(s); 686 687 if (mrtdebug) 688 log(LOG_DEBUG, "del_vif %d, numvifs %d\n", *vifip, numvifs); 689 690 return 0; 691 } 692 693 /* 694 * Add an mfc entry 695 */ 696 static int 697 add_mfc(mfccp) 698 struct mfcctl *mfccp; 699 { 700 struct mfc *rt; 701 struct mfc *rt1 = 0; 702 register struct mbuf *mb_rt; 703 struct mbuf *prev_mb_rt; 704 u_long hash; 705 struct mbuf *mb_ntry; 706 struct rtdetq *rte; 707 register u_short nstl; 708 int s; 709 int i; 710 711 rt = mfcfind(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 712 713 /* If an entry already exists, just update the fields */ 714 if (rt) { 715 if (mrtdebug) 716 log(LOG_DEBUG,"add_mfc update o %x g %x m %x p %x\n", 717 ntohl(mfccp->mfcc_origin.s_addr), 718 ntohl(mfccp->mfcc_mcastgrp.s_addr), 719 ntohl(mfccp->mfcc_originmask.s_addr), 720 mfccp->mfcc_parent); 721 722 s = splnet(); 723 rt->mfc_parent = mfccp->mfcc_parent; 724 for (i = 0; i < numvifs; i++) 725 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 726 splx(s); 727 return 0; 728 } 729 730 /* 731 * Find the entry for which the upcall was made and update 732 */ 733 s = splnet(); 734 hash = nethash_fc(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 735 for (prev_mb_rt = mb_rt = mfctable[hash], nstl = 0; 736 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 737 738 rt = mtod(mb_rt, struct mfc *); 739 if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) 740 == mfccp->mfcc_origin.s_addr) && 741 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 742 (mb_rt->m_act != NULL)) { 743 744 if (!nstl++) { 745 if (mrtdebug) 746 log(LOG_DEBUG,"add_mfc o %x g %x m %x p %x dbg %x\n", 747 ntohl(mfccp->mfcc_origin.s_addr), 748 ntohl(mfccp->mfcc_mcastgrp.s_addr), 749 ntohl(mfccp->mfcc_originmask.s_addr), 750 mfccp->mfcc_parent, mb_rt->m_act); 751 752 rt->mfc_origin = mfccp->mfcc_origin; 753 rt->mfc_originmask = mfccp->mfcc_originmask; 754 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 755 rt->mfc_parent = mfccp->mfcc_parent; 756 for (i = 0; i < numvifs; i++) 757 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 758 /* initialize pkt counters per src-grp */ 759 rt->mfc_pkt_cnt = 0; 760 rt1 = rt; 761 } 762 763 /* prevent cleanup of cache entry */ 764 untimeout(cleanup_cache, (caddr_t)mb_rt); 765 timeout_val--; 766 767 /* free packets Qed at the end of this entry */ 768 while (mb_rt->m_act) { 769 mb_ntry = mb_rt->m_act; 770 rte = mtod(mb_ntry, struct rtdetq *); 771 ip_mdq(rte->m, rte->ifp, rte->tunnel_src, 772 rt1, rte->imo); 773 mb_rt->m_act = mb_ntry->m_act; 774 m_freem(rte->m); 775 m_free(mb_ntry); 776 } 777 778 /* 779 * If more than one entry was created for a single upcall 780 * delete that entry 781 */ 782 if (nstl > 1) { 783 MFREE(mb_rt, prev_mb_rt->m_next); 784 mb_rt = prev_mb_rt; 785 } 786 } 787 } 788 789 /* 790 * It is possible that an entry is being inserted without an upcall 791 */ 792 if (nstl == 0) { 793 if (mrtdebug) 794 log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x m %x p %x\n", 795 hash, ntohl(mfccp->mfcc_origin.s_addr), 796 ntohl(mfccp->mfcc_mcastgrp.s_addr), 797 ntohl(mfccp->mfcc_originmask.s_addr), 798 mfccp->mfcc_parent); 799 800 for (prev_mb_rt = mb_rt = mfctable[hash]; 801 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 802 803 rt = mtod(mb_rt, struct mfc *); 804 if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) 805 == mfccp->mfcc_origin.s_addr) && 806 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { 807 808 rt->mfc_origin = mfccp->mfcc_origin; 809 rt->mfc_originmask = mfccp->mfcc_originmask; 810 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 811 rt->mfc_parent = mfccp->mfcc_parent; 812 for (i = 0; i < numvifs; i++) 813 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 814 /* initialize pkt counters per src-grp */ 815 rt->mfc_pkt_cnt = 0; 816 } 817 } 818 if (mb_rt == NULL) { 819 /* no upcall, so make a new entry */ 820 MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); 821 if (mb_rt == NULL) { 822 splx(s); 823 return ENOBUFS; 824 } 825 826 rt = mtod(mb_rt, struct mfc *); 827 828 /* insert new entry at head of hash chain */ 829 rt->mfc_origin = mfccp->mfcc_origin; 830 rt->mfc_originmask = mfccp->mfcc_originmask; 831 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 832 rt->mfc_parent = mfccp->mfcc_parent; 833 for (i = 0; i < numvifs; i++) 834 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 835 /* initialize pkt counters per src-grp */ 836 rt->mfc_pkt_cnt = 0; 837 838 /* link into table */ 839 mb_rt->m_next = mfctable[hash]; 840 mfctable[hash] = mb_rt; 841 mb_rt->m_act = NULL; 842 } 843 } 844 splx(s); 845 return 0; 846 } 847 848 /* 849 * Delete an mfc entry 850 */ 851 static int 852 del_mfc(mfccp) 853 struct delmfcctl *mfccp; 854 { 855 struct in_addr origin; 856 struct in_addr mcastgrp; 857 struct mfc *rt; 858 struct mbuf *mb_rt; 859 struct mbuf *prev_mb_rt; 860 u_long hash; 861 struct mfc **cmfc; 862 struct mfc **cmfcend; 863 int s; 864 865 origin = mfccp->mfcc_origin; 866 mcastgrp = mfccp->mfcc_mcastgrp; 867 hash = nethash_fc(origin.s_addr, mcastgrp.s_addr); 868 869 if (mrtdebug) 870 log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x\n", 871 ntohl(origin.s_addr), ntohl(mcastgrp.s_addr)); 872 873 for (prev_mb_rt = mb_rt = mfctable[hash] 874 ; mb_rt 875 ; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 876 rt = mtod(mb_rt, struct mfc *); 877 if (origin.s_addr == rt->mfc_origin.s_addr && 878 mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 879 mb_rt->m_act == NULL) 880 break; 881 } 882 if (mb_rt == NULL) { 883 return ESRCH; 884 } 885 886 s = splnet(); 887 888 cmfc = mfchash; 889 cmfcend = cmfc + MFCHASHSIZ; 890 for ( ; cmfc < cmfcend; ++cmfc) 891 if (*cmfc == rt) 892 *cmfc = 0; 893 894 if (prev_mb_rt != mb_rt) { /* if moved past head of list */ 895 MFREE(mb_rt, prev_mb_rt->m_next); 896 } else /* delete head of list, it is in the table */ 897 mfctable[hash] = m_free(mb_rt); 898 899 splx(s); 900 901 return 0; 902 } 903 904 /* 905 * IP multicast forwarding function. This function assumes that the packet 906 * pointed to by "ip" has arrived on (or is about to be sent to) the interface 907 * pointed to by "ifp", and the packet is to be relayed to other networks 908 * that have members of the packet's destination IP multicast group. 909 * 910 * The packet is returned unscathed to the caller, unless it is tunneled 911 * or erroneous, in which case a non-zero return value tells the caller to 912 * discard it. 913 */ 914 915 #define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ 916 #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 917 918 int 919 X_ip_mforward(ip, ifp, m, imo) 920 register struct ip *ip; 921 struct ifnet *ifp; 922 struct mbuf *m; 923 struct ip_moptions *imo; 924 { 925 register struct mfc *rt; 926 register u_char *ipoptions; 927 u_long tunnel_src; 928 static struct sockproto k_igmpproto = { AF_INET, IPPROTO_IGMP }; 929 static struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 930 static struct sockaddr_in k_igmpdst = { sizeof k_igmpdst, AF_INET }; 931 register struct mbuf *mm; 932 register struct ip *k_data; 933 int s; 934 935 if (mrtdebug > 1) 936 log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x (%s%d)\n", 937 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp, 938 ifp->if_name, ifp->if_unit); 939 940 if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || 941 (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { 942 /* 943 * Packet arrived via a physical interface. 944 */ 945 tunnel_src = 0; 946 } else { 947 /* 948 * Packet arrived through a source-route tunnel. 949 * 950 * A source-route tunneled packet has a single NOP option and a 951 * two-element 952 * loose-source-and-record-route (LSRR) option immediately following 953 * the fixed-size part of the IP header. At this point in processing, 954 * the IP header should contain the following IP addresses: 955 * 956 * original source - in the source address field 957 * destination group - in the destination address field 958 * remote tunnel end-point - in the first element of LSRR 959 * one of this host's addrs - in the second element of LSRR 960 * 961 * NOTE: RFC-1075 would have the original source and remote tunnel 962 * end-point addresses swapped. However, that could cause 963 * delivery of ICMP error messages to innocent applications 964 * on intermediate routing hosts! Therefore, we hereby 965 * change the spec. 966 */ 967 968 /* 969 * Verify that the tunnel options are well-formed. 970 */ 971 if (ipoptions[0] != IPOPT_NOP || 972 ipoptions[2] != 11 || /* LSRR option length */ 973 ipoptions[3] != 12 || /* LSRR address pointer */ 974 (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) { 975 mrtstat.mrts_bad_tunnel++; 976 if (mrtdebug) 977 log(LOG_DEBUG, 978 "ip_mforward: bad tunnel from %u (%x %x %x %x %x %x)\n", 979 ntohl(ip->ip_src.s_addr), 980 ipoptions[0], ipoptions[1], ipoptions[2], ipoptions[3], 981 *(u_long *)(&ipoptions[4]), *(u_long *)(&ipoptions[8])); 982 return 1; 983 } 984 985 /* 986 * Delete the tunnel options from the packet. 987 */ 988 ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions, 989 (unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN))); 990 m->m_len -= TUNNEL_LEN; 991 ip->ip_len -= TUNNEL_LEN; 992 ip->ip_hl -= TUNNEL_LEN >> 2; 993 994 ifp = 0; 995 } 996 997 /* 998 * Don't forward a packet with time-to-live of zero or one, 999 * or a packet destined to a local-only group. 1000 */ 1001 if (ip->ip_ttl <= 1 || 1002 ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) 1003 return (int)tunnel_src; 1004 1005 /* 1006 * Determine forwarding vifs from the forwarding cache table 1007 */ 1008 s = splnet(); 1009 MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt); 1010 1011 /* Entry exists, so forward if necessary */ 1012 if (rt != NULL) { 1013 splx(s); 1014 return (ip_mdq(m, ifp, tunnel_src, rt, imo)); 1015 } 1016 1017 else { 1018 /* 1019 * If we don't have a route for packet's origin, 1020 * Make a copy of the packet & 1021 * send message to routing daemon 1022 */ 1023 1024 register struct mbuf *mb_rt; 1025 register struct mbuf *mb_ntry; 1026 register struct mbuf *mb0; 1027 register struct rtdetq *rte; 1028 register struct mbuf *rte_m; 1029 register u_long hash; 1030 1031 mrtstat.mrts_no_route++; 1032 if (mrtdebug) 1033 log(LOG_DEBUG, "ip_mforward: no rte s %x g %x\n", 1034 ntohl(ip->ip_src.s_addr), 1035 ntohl(ip->ip_dst.s_addr)); 1036 1037 /* is there an upcall waiting for this packet? */ 1038 hash = nethash_fc(ip->ip_src.s_addr, ip->ip_dst.s_addr); 1039 for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { 1040 rt = mtod(mb_rt, struct mfc *); 1041 if (((ip->ip_src.s_addr & rt->mfc_originmask.s_addr) == 1042 rt->mfc_origin.s_addr) && 1043 (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && 1044 (mb_rt->m_act != NULL)) 1045 break; 1046 } 1047 1048 if (mb_rt == NULL) { 1049 /* no upcall, so make a new entry */ 1050 MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); 1051 if (mb_rt == NULL) { 1052 splx(s); 1053 return ENOBUFS; 1054 } 1055 1056 rt = mtod(mb_rt, struct mfc *); 1057 1058 /* insert new entry at head of hash chain */ 1059 rt->mfc_origin.s_addr = ip->ip_src.s_addr; 1060 rt->mfc_originmask.s_addr = (u_long)0xffffffff; 1061 rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; 1062 1063 /* link into table */ 1064 hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); 1065 mb_rt->m_next = mfctable[hash]; 1066 mfctable[hash] = mb_rt; 1067 mb_rt->m_act = NULL; 1068 1069 } 1070 1071 /* determine if q has overflowed */ 1072 for (rte_m = mb_rt, hash = 0; rte_m->m_act; rte_m = rte_m->m_act) 1073 hash++; 1074 1075 if (hash > MAX_UPQ) { 1076 mrtstat.mrts_upq_ovflw++; 1077 splx(s); 1078 return 0; 1079 } 1080 1081 /* add this packet and timing, ifp info to m_act */ 1082 MGET(mb_ntry, M_DONTWAIT, MT_DATA); 1083 if (mb_ntry == NULL) { 1084 splx(s); 1085 return ENOBUFS; 1086 } 1087 1088 mb_ntry->m_act = NULL; 1089 rte = mtod(mb_ntry, struct rtdetq *); 1090 1091 mb0 = m_copy(m, 0, M_COPYALL); 1092 if (mb0 == NULL) { 1093 splx(s); 1094 return ENOBUFS; 1095 } 1096 1097 rte->m = mb0; 1098 rte->ifp = ifp; 1099 rte->tunnel_src = tunnel_src; 1100 rte->imo = imo; 1101 1102 rte_m->m_act = mb_ntry; 1103 1104 splx(s); 1105 1106 if (hash == 0) { 1107 /* 1108 * Send message to routing daemon to install 1109 * a route into the kernel table 1110 */ 1111 k_igmpsrc.sin_addr = ip->ip_src; 1112 k_igmpdst.sin_addr = ip->ip_dst; 1113 1114 mm = m_copy(m, 0, M_COPYALL); 1115 if (mm == NULL) { 1116 splx(s); 1117 return ENOBUFS; 1118 } 1119 1120 k_data = mtod(mm, struct ip *); 1121 k_data->ip_p = 0; 1122 1123 mrtstat.mrts_upcalls++; 1124 1125 rip_ip_input(mm, ip_mrouter, (struct sockaddr *)&k_igmpsrc); 1126 1127 /* set timer to cleanup entry if upcall is lost */ 1128 timeout(cleanup_cache, (caddr_t)mb_rt, 100); 1129 timeout_val++; 1130 } 1131 1132 return 0; 1133 } 1134 } 1135 1136 #ifndef MROUTE_LKM 1137 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 1138 struct ip_moptions *) = X_ip_mforward; 1139 #endif 1140 1141 /* 1142 * Clean up the cache entry if upcall is not serviced 1143 */ 1144 static void 1145 cleanup_cache(xmb_rt) 1146 void *xmb_rt; 1147 { 1148 struct mbuf *mb_rt = xmb_rt; 1149 struct mfc *rt; 1150 u_long hash; 1151 struct mbuf *prev_m0; 1152 struct mbuf *m0; 1153 struct mbuf *m; 1154 struct rtdetq *rte; 1155 int s; 1156 1157 rt = mtod(mb_rt, struct mfc *); 1158 hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); 1159 1160 if (mrtdebug) 1161 log(LOG_DEBUG, "ip_mforward: cleanup ipm %d h %d s %x g %x\n", 1162 ip_mrouter, hash, ntohl(rt->mfc_origin.s_addr), 1163 ntohl(rt->mfc_mcastgrp.s_addr)); 1164 1165 mrtstat.mrts_cache_cleanups++; 1166 1167 /* 1168 * determine entry to be cleaned up in cache table 1169 */ 1170 s = splnet(); 1171 for (prev_m0 = m0 = mfctable[hash]; m0; prev_m0 = m0, m0 = m0->m_next) 1172 if (m0 == mb_rt) 1173 break; 1174 1175 /* 1176 * drop all the packets 1177 * free the mbuf with the pkt, if, timing info 1178 */ 1179 while (mb_rt->m_act) { 1180 m = mb_rt->m_act; 1181 mb_rt->m_act = m->m_act; 1182 1183 rte = mtod(m, struct rtdetq *); 1184 m_freem(rte->m); 1185 m_free(m); 1186 } 1187 1188 /* 1189 * Delete the entry from the cache 1190 */ 1191 if (prev_m0 != m0) { /* if moved past head of list */ 1192 MFREE(m0, prev_m0->m_next); 1193 } else /* delete head of list, it is in the table */ 1194 mfctable[hash] = m_free(m0); 1195 1196 timeout_val--; 1197 splx(s); 1198 } 1199 1200 /* 1201 * Packet forwarding routine once entry in the cache is made 1202 */ 1203 static int 1204 ip_mdq(m, ifp, tunnel_src, rt, imo) 1205 register struct mbuf *m; 1206 register struct ifnet *ifp; 1207 register u_long tunnel_src; 1208 register struct mfc *rt; 1209 register struct ip_moptions *imo; 1210 { 1211 register struct ip *ip = mtod(m, struct ip *); 1212 register vifi_t vifi; 1213 register struct vif *vifp; 1214 1215 /* 1216 * Don't forward if it didn't arrive from the parent vif for its origin. 1217 * Notes: v_ifp is zero for src route tunnels, multicast_decap_if 1218 * for encapsulated tunnels and a real ifnet for non-tunnels so 1219 * the first part of the if catches wrong physical interface or 1220 * tunnel type; v_rmt_addr is zero for non-tunneled packets so 1221 * the 2nd part catches both packets that arrive via a tunnel 1222 * that shouldn't and packets that arrive via the wrong tunnel. 1223 */ 1224 vifi = rt->mfc_parent; 1225 if (viftable[vifi].v_ifp != ifp || 1226 (ifp == 0 && viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) { 1227 /* came in the wrong interface */ 1228 if (mrtdebug) 1229 log(LOG_DEBUG, "wrong if: ifp %x vifi %d\n", 1230 ifp, vifi); 1231 ++mrtstat.mrts_wrong_if; 1232 return (int)tunnel_src; 1233 } 1234 1235 /* increment the interface and s-g counters */ 1236 viftable[vifi].v_pkt_in++; 1237 rt->mfc_pkt_cnt++; 1238 1239 /* 1240 * For each vif, decide if a copy of the packet should be forwarded. 1241 * Forward if: 1242 * - the ttl exceeds the vif's threshold 1243 * - there are group members downstream on interface 1244 */ 1245 #define MC_SEND(ip,vifp,m) { \ 1246 (vifp)->v_pkt_out++; \ 1247 if ((vifp)->v_flags & VIFF_SRCRT) \ 1248 srcrt_send((ip), (vifp), (m)); \ 1249 else if ((vifp)->v_flags & VIFF_TUNNEL) \ 1250 encap_send((ip), (vifp), (m)); \ 1251 else \ 1252 phyint_send((ip), (vifp), (m)); \ 1253 } 1254 1255 /* If no options or the imo_multicast_vif option is 0, don't do this part 1256 */ 1257 if ((imo != NULL) && 1258 (( vifi = imo->imo_multicast_vif - 1) < numvifs) /*&& (vifi>=0)*/) 1259 { 1260 MC_SEND(ip,viftable+vifi,m); 1261 return (1); /* make sure we are done: No more physical sends */ 1262 } 1263 1264 for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) 1265 if ((rt->mfc_ttls[vifi] > 0) && 1266 (ip->ip_ttl > rt->mfc_ttls[vifi])) 1267 MC_SEND(ip, vifp, m); 1268 1269 return 0; 1270 } 1271 1272 /* check if a vif number is legal/ok. This is used by ip_output, to export 1273 * numvifs there, 1274 */ 1275 int 1276 X_legal_vif_num(vif) 1277 int vif; 1278 { if (vif>=0 && vif<=numvifs) 1279 return(1); 1280 else 1281 return(0); 1282 } 1283 1284 #ifndef MROUTE_LKM 1285 int (*legal_vif_num)(int) = X_legal_vif_num; 1286 #endif 1287 1288 static void 1289 phyint_send(ip, vifp, m) 1290 struct ip *ip; 1291 struct vif *vifp; 1292 struct mbuf *m; 1293 { 1294 register struct mbuf *mb_copy; 1295 int hlen = ip->ip_hl << 2; 1296 register struct ip_moptions *imo; 1297 1298 if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) 1299 return; 1300 1301 /* 1302 * Make sure the header isn't in an cluster, because the sharing 1303 * in clusters defeats the whole purpose of making the copy above. 1304 */ 1305 mb_copy = m_pullup(mb_copy, hlen); 1306 if (mb_copy == NULL) 1307 return; 1308 1309 MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT); 1310 if (imo == NULL) { 1311 m_freem(mb_copy); 1312 return; 1313 } 1314 1315 imo->imo_multicast_ifp = vifp->v_ifp; 1316 imo->imo_multicast_ttl = ip->ip_ttl - 1; 1317 imo->imo_multicast_loop = 1; 1318 1319 if (vifp->v_rate_limit <= 0) 1320 tbf_send_packet(vifp, mb_copy, imo); 1321 else 1322 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len, 1323 imo); 1324 } 1325 1326 static void 1327 srcrt_send(ip, vifp, m) 1328 struct ip *ip; 1329 struct vif *vifp; 1330 struct mbuf *m; 1331 { 1332 struct mbuf *mb_copy, *mb_opts; 1333 int hlen = ip->ip_hl << 2; 1334 register struct ip *ip_copy; 1335 u_char *cp; 1336 1337 /* 1338 * Make sure that adding the tunnel options won't exceed the 1339 * maximum allowed number of option bytes. 1340 */ 1341 if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) { 1342 mrtstat.mrts_cant_tunnel++; 1343 if (mrtdebug) 1344 log(LOG_DEBUG, "srcrt_send: no room for tunnel options, from %u\n", 1345 ntohl(ip->ip_src.s_addr)); 1346 return; 1347 } 1348 1349 if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) 1350 return; 1351 1352 MGETHDR(mb_opts, M_DONTWAIT, MT_HEADER); 1353 if (mb_opts == NULL) { 1354 m_freem(mb_copy); 1355 return; 1356 } 1357 /* 1358 * 'Delete' the base ip header from the mb_copy chain 1359 */ 1360 mb_copy->m_len -= hlen; 1361 mb_copy->m_data += hlen; 1362 /* 1363 * Make mb_opts be the new head of the packet chain. 1364 * Any options of the packet were left in the old packet chain head 1365 */ 1366 mb_opts->m_next = mb_copy; 1367 mb_opts->m_len = hlen + TUNNEL_LEN; 1368 mb_opts->m_data += MSIZE - mb_opts->m_len; 1369 mb_opts->m_pkthdr.len = mb_copy->m_pkthdr.len + TUNNEL_LEN; 1370 /* 1371 * Copy the base ip header from the mb_copy chain to the new head mbuf 1372 */ 1373 ip_copy = mtod(mb_opts, struct ip *); 1374 bcopy((caddr_t)ip_copy, mtod(mb_opts, caddr_t), hlen); 1375 ip_copy->ip_ttl--; 1376 ip_copy->ip_dst = vifp->v_rmt_addr; /* remote tunnel end-point */ 1377 /* 1378 * Adjust the ip header length to account for the tunnel options. 1379 */ 1380 ip_copy->ip_hl += TUNNEL_LEN >> 2; 1381 ip_copy->ip_len += TUNNEL_LEN; 1382 /* 1383 * Add the NOP and LSRR after the base ip header 1384 */ 1385 cp = mtod(mb_opts, u_char *) + IP_HDR_LEN; 1386 *cp++ = IPOPT_NOP; 1387 *cp++ = IPOPT_LSRR; 1388 *cp++ = 11; /* LSRR option length */ 1389 *cp++ = 8; /* LSSR pointer to second element */ 1390 *(u_long*)cp = vifp->v_lcl_addr.s_addr; /* local tunnel end-point */ 1391 cp += 4; 1392 *(u_long*)cp = ip->ip_dst.s_addr; /* destination group */ 1393 1394 if (vifp->v_rate_limit <= 0) 1395 tbf_send_packet(vifp, mb_opts, 0); 1396 else 1397 tbf_control(vifp, mb_opts, 1398 mtod(mb_opts, struct ip *), ip_copy->ip_len, 0); 1399 } 1400 1401 static void 1402 encap_send(ip, vifp, m) 1403 register struct ip *ip; 1404 register struct vif *vifp; 1405 register struct mbuf *m; 1406 { 1407 register struct mbuf *mb_copy; 1408 register struct ip *ip_copy; 1409 int hlen = ip->ip_hl << 2; 1410 register int i, len = ip->ip_len; 1411 1412 /* 1413 * copy the old packet & pullup it's IP header into the 1414 * new mbuf so we can modify it. Try to fill the new 1415 * mbuf since if we don't the ethernet driver will. 1416 */ 1417 MGET(mb_copy, M_DONTWAIT, MT_DATA); 1418 if (mb_copy == NULL) 1419 return; 1420 mb_copy->m_data += 16; 1421 mb_copy->m_len = sizeof(multicast_encap_iphdr); 1422 1423 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { 1424 m_freem(mb_copy); 1425 return; 1426 } 1427 i = MHLEN - M_LEADINGSPACE(mb_copy); 1428 if (i > len) 1429 i = len; 1430 mb_copy = m_pullup(mb_copy, i); 1431 if (mb_copy == NULL) 1432 return; 1433 mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr); 1434 1435 /* 1436 * fill in the encapsulating IP header. 1437 */ 1438 ip_copy = mtod(mb_copy, struct ip *); 1439 *ip_copy = multicast_encap_iphdr; 1440 ip_copy->ip_id = htons(ip_id++); 1441 ip_copy->ip_len += len; 1442 ip_copy->ip_src = vifp->v_lcl_addr; 1443 ip_copy->ip_dst = vifp->v_rmt_addr; 1444 1445 /* 1446 * turn the encapsulated IP header back into a valid one. 1447 */ 1448 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); 1449 --ip->ip_ttl; 1450 HTONS(ip->ip_len); 1451 HTONS(ip->ip_off); 1452 ip->ip_sum = 0; 1453 #if defined(LBL) && !defined(ultrix) 1454 ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0); 1455 #else 1456 mb_copy->m_data += sizeof(multicast_encap_iphdr); 1457 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 1458 mb_copy->m_data -= sizeof(multicast_encap_iphdr); 1459 #endif 1460 1461 if (vifp->v_rate_limit <= 0) 1462 tbf_send_packet(vifp, mb_copy, 0); 1463 else 1464 tbf_control(vifp, mb_copy, ip, ip_copy->ip_len, 0); 1465 } 1466 1467 /* 1468 * De-encapsulate a packet and feed it back through ip input (this 1469 * routine is called whenever IP gets a packet with proto type 1470 * ENCAP_PROTO and a local destination address). 1471 */ 1472 void 1473 #ifdef MROUTE_LKM 1474 X_multiencap_decap(m) 1475 #else 1476 multiencap_decap(m) 1477 #endif 1478 register struct mbuf *m; 1479 { 1480 struct ifnet *ifp = m->m_pkthdr.rcvif; 1481 register struct ip *ip = mtod(m, struct ip *); 1482 register int hlen = ip->ip_hl << 2; 1483 register int s; 1484 register struct ifqueue *ifq; 1485 register struct vif *vifp; 1486 1487 if (ip->ip_p != ENCAP_PROTO) { 1488 rip_input(m); 1489 return; 1490 } 1491 /* 1492 * dump the packet if it's not to a multicast destination or if 1493 * we don't have an encapsulating tunnel with the source. 1494 * Note: This code assumes that the remote site IP address 1495 * uniquely identifies the tunnel (i.e., that this site has 1496 * at most one tunnel with the remote site). 1497 */ 1498 if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) { 1499 ++mrtstat.mrts_bad_tunnel; 1500 m_freem(m); 1501 return; 1502 } 1503 if (ip->ip_src.s_addr != last_encap_src) { 1504 register struct vif *vife; 1505 1506 vifp = viftable; 1507 vife = vifp + numvifs; 1508 last_encap_src = ip->ip_src.s_addr; 1509 last_encap_vif = 0; 1510 for ( ; vifp < vife; ++vifp) 1511 if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { 1512 if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) 1513 == VIFF_TUNNEL) 1514 last_encap_vif = vifp; 1515 break; 1516 } 1517 } 1518 if ((vifp = last_encap_vif) == 0) { 1519 last_encap_src = 0; 1520 mrtstat.mrts_cant_tunnel++; /*XXX*/ 1521 m_freem(m); 1522 if (mrtdebug) 1523 log(LOG_DEBUG, "ip_mforward: no tunnel with %x\n", 1524 ntohl(ip->ip_src.s_addr)); 1525 return; 1526 } 1527 ifp = vifp->v_ifp; 1528 1529 if (hlen > IP_HDR_LEN) 1530 ip_stripoptions(m, (struct mbuf *) 0); 1531 m->m_data += IP_HDR_LEN; 1532 m->m_len -= IP_HDR_LEN; 1533 m->m_pkthdr.len -= IP_HDR_LEN; 1534 m->m_pkthdr.rcvif = ifp; 1535 1536 ifq = &ipintrq; 1537 s = splimp(); 1538 if (IF_QFULL(ifq)) { 1539 IF_DROP(ifq); 1540 m_freem(m); 1541 } else { 1542 IF_ENQUEUE(ifq, m); 1543 /* 1544 * normally we would need a "schednetisr(NETISR_IP)" 1545 * here but we were called by ip_input and it is going 1546 * to loop back & try to dequeue the packet we just 1547 * queued as soon as we return so we avoid the 1548 * unnecessary software interrrupt. 1549 */ 1550 } 1551 splx(s); 1552 } 1553 1554 /* 1555 * Token bucket filter module 1556 */ 1557 void 1558 tbf_control(vifp, m, ip, p_len, imo) 1559 register struct vif *vifp; 1560 register struct mbuf *m; 1561 register struct ip *ip; 1562 register u_long p_len; 1563 struct ip_moptions *imo; 1564 { 1565 tbf_update_tokens(vifp); 1566 1567 /* if there are enough tokens, 1568 * and the queue is empty, 1569 * send this packet out 1570 */ 1571 1572 if (vifp->v_tbf->q_len == 0) { 1573 if (p_len <= vifp->v_tbf->n_tok) { 1574 vifp->v_tbf->n_tok -= p_len; 1575 tbf_send_packet(vifp, m, imo); 1576 } else if (p_len > MAX_BKT_SIZE) { 1577 /* drop if packet is too large */ 1578 mrtstat.mrts_pkt2large++; 1579 m_freem(m); 1580 return; 1581 } else { 1582 /* queue packet and timeout till later */ 1583 tbf_queue(vifp, m, ip, imo); 1584 timeout(tbf_reprocess_q, (caddr_t)vifp, 1); 1585 } 1586 } else if (vifp->v_tbf->q_len < MAXQSIZE) { 1587 /* finite queue length, so queue pkts and process queue */ 1588 tbf_queue(vifp, m, ip, imo); 1589 tbf_process_q(vifp); 1590 } else { 1591 /* queue length too much, try to dq and queue and process */ 1592 if (!tbf_dq_sel(vifp, ip)) { 1593 mrtstat.mrts_q_overflow++; 1594 m_freem(m); 1595 return; 1596 } else { 1597 tbf_queue(vifp, m, ip, imo); 1598 tbf_process_q(vifp); 1599 } 1600 } 1601 return; 1602 } 1603 1604 /* 1605 * adds a packet to the queue at the interface 1606 */ 1607 void 1608 tbf_queue(vifp, m, ip, imo) 1609 register struct vif *vifp; 1610 register struct mbuf *m; 1611 register struct ip *ip; 1612 struct ip_moptions *imo; 1613 { 1614 register u_long ql; 1615 register int index = (vifp - viftable); 1616 register int s = splnet(); 1617 1618 ql = vifp->v_tbf->q_len; 1619 1620 qtable[index][ql].pkt_m = m; 1621 qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len; 1622 qtable[index][ql].pkt_ip = ip; 1623 qtable[index][ql].pkt_imo = imo; 1624 1625 vifp->v_tbf->q_len++; 1626 splx(s); 1627 } 1628 1629 1630 /* 1631 * processes the queue at the interface 1632 */ 1633 void 1634 tbf_process_q(vifp) 1635 register struct vif *vifp; 1636 { 1637 register struct pkt_queue pkt_1; 1638 register int index = (vifp - viftable); 1639 register int s = splnet(); 1640 1641 /* loop through the queue at the interface and send as many packets 1642 * as possible 1643 */ 1644 while (vifp->v_tbf->q_len > 0) { 1645 /* locate the first packet */ 1646 pkt_1.pkt_len = ((qtable[index][0]).pkt_len); 1647 pkt_1.pkt_m = (qtable[index][0]).pkt_m; 1648 pkt_1.pkt_ip = (qtable[index][0]).pkt_ip; 1649 pkt_1.pkt_imo = (qtable[index][0]).pkt_imo; 1650 1651 /* determine if the packet can be sent */ 1652 if (pkt_1.pkt_len <= vifp->v_tbf->n_tok) { 1653 /* if so, 1654 * reduce no of tokens, dequeue the queue, 1655 * send the packet. 1656 */ 1657 vifp->v_tbf->n_tok -= pkt_1.pkt_len; 1658 1659 tbf_dequeue(vifp, 0); 1660 1661 tbf_send_packet(vifp, pkt_1.pkt_m, pkt_1.pkt_imo); 1662 1663 } else break; 1664 } 1665 splx(s); 1666 } 1667 1668 /* 1669 * removes the jth packet from the queue at the interface 1670 */ 1671 void 1672 tbf_dequeue(vifp,j) 1673 register struct vif *vifp; 1674 register int j; 1675 { 1676 register u_long index = vifp - viftable; 1677 register int i; 1678 1679 for (i=j+1; i <= vifp->v_tbf->q_len - 1; i++) { 1680 qtable[index][i-1].pkt_m = qtable[index][i].pkt_m; 1681 qtable[index][i-1].pkt_len = qtable[index][i].pkt_len; 1682 qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip; 1683 qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo; 1684 } 1685 qtable[index][i-1].pkt_m = NULL; 1686 qtable[index][i-1].pkt_len = NULL; 1687 qtable[index][i-1].pkt_ip = NULL; 1688 qtable[index][i-1].pkt_imo = NULL; 1689 1690 vifp->v_tbf->q_len--; 1691 1692 if (tbfdebug > 1) 1693 log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d\n",vifp-viftable, i-1); 1694 } 1695 1696 void 1697 tbf_reprocess_q(xvifp) 1698 void *xvifp; 1699 { 1700 register struct vif *vifp = xvifp; 1701 if (ip_mrouter == NULL) 1702 return; 1703 1704 tbf_update_tokens(vifp); 1705 1706 tbf_process_q(vifp); 1707 1708 if (vifp->v_tbf->q_len) 1709 timeout(tbf_reprocess_q, (caddr_t)vifp, 1); 1710 } 1711 1712 /* function that will selectively discard a member of the queue 1713 * based on the precedence value and the priority obtained through 1714 * a lookup table - not yet implemented accurately! 1715 */ 1716 int 1717 tbf_dq_sel(vifp, ip) 1718 register struct vif *vifp; 1719 register struct ip *ip; 1720 { 1721 register int i; 1722 register int s = splnet(); 1723 register u_int p; 1724 1725 p = priority(vifp, ip); 1726 1727 for(i=vifp->v_tbf->q_len-1;i >= 0;i--) { 1728 if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) { 1729 m_freem(qtable[vifp-viftable][i].pkt_m); 1730 tbf_dequeue(vifp,i); 1731 splx(s); 1732 mrtstat.mrts_drop_sel++; 1733 return(1); 1734 } 1735 } 1736 splx(s); 1737 return(0); 1738 } 1739 1740 void 1741 tbf_send_packet(vifp, m, imo) 1742 register struct vif *vifp; 1743 register struct mbuf *m; 1744 struct ip_moptions *imo; 1745 { 1746 int error; 1747 int s = splnet(); 1748 1749 /* if source route tunnels */ 1750 if (vifp->v_flags & VIFF_SRCRT) { 1751 error = ip_output(m, (struct mbuf *)0, (struct route *)0, 1752 IP_FORWARDING, imo); 1753 if (mrtdebug > 1) 1754 log(LOG_DEBUG, "srcrt_send on vif %d err %d\n", vifp-viftable, error); 1755 } else if (vifp->v_flags & VIFF_TUNNEL) { 1756 /* If tunnel options */ 1757 ip_output(m, (struct mbuf *)0, (struct route *)0, 1758 IP_FORWARDING, imo); 1759 } else { 1760 /* if physical interface option, extract the options and then send */ 1761 error = ip_output(m, (struct mbuf *)0, (struct route *)0, 1762 IP_FORWARDING, imo); 1763 FREE(imo, M_IPMOPTS); 1764 1765 if (mrtdebug > 1) 1766 log(LOG_DEBUG, "phyint_send on vif %d err %d\n", vifp-viftable, error); 1767 } 1768 splx(s); 1769 } 1770 1771 /* determine the current time and then 1772 * the elapsed time (between the last time and time now) 1773 * in milliseconds & update the no. of tokens in the bucket 1774 */ 1775 void 1776 tbf_update_tokens(vifp) 1777 register struct vif *vifp; 1778 { 1779 struct timeval tp; 1780 register u_long t; 1781 register u_long elapsed; 1782 register int s = splnet(); 1783 1784 GET_TIME(tp); 1785 1786 t = tp.tv_sec*1000 + tp.tv_usec/1000; 1787 1788 elapsed = (t - vifp->v_tbf->last_pkt_t) * vifp->v_rate_limit /8; 1789 vifp->v_tbf->n_tok += elapsed; 1790 vifp->v_tbf->last_pkt_t = t; 1791 1792 if (vifp->v_tbf->n_tok > MAX_BKT_SIZE) 1793 vifp->v_tbf->n_tok = MAX_BKT_SIZE; 1794 1795 splx(s); 1796 } 1797 1798 static int 1799 priority(vifp, ip) 1800 register struct vif *vifp; 1801 register struct ip *ip; 1802 { 1803 register u_long graddr; 1804 register int prio; 1805 1806 /* temporary hack; will add general packet classifier some day */ 1807 1808 prio = 50; /* default priority */ 1809 1810 /* check for source route options and add option length to get dst */ 1811 if (vifp->v_flags & VIFF_SRCRT) 1812 graddr = ntohl((ip+8)->ip_dst.s_addr); 1813 else 1814 graddr = ntohl(ip->ip_dst.s_addr); 1815 1816 switch (graddr & 0xf) { 1817 case 0x0: break; 1818 case 0x1: if (graddr == 0xe0020001) prio = 65; /* MBone Audio */ 1819 break; 1820 case 0x2: break; 1821 case 0x3: break; 1822 case 0x4: break; 1823 case 0x5: break; 1824 case 0x6: break; 1825 case 0x7: break; 1826 case 0x8: break; 1827 case 0x9: break; 1828 case 0xa: if (graddr == 0xe000010a) prio = 85; /* IETF Low Audio 1 */ 1829 break; 1830 case 0xb: if (graddr == 0xe000010b) prio = 75; /* IETF Audio 1 */ 1831 break; 1832 case 0xc: if (graddr == 0xe000010c) prio = 60; /* IETF Video 1 */ 1833 break; 1834 case 0xd: if (graddr == 0xe000010d) prio = 80; /* IETF Low Audio 2 */ 1835 break; 1836 case 0xe: if (graddr == 0xe000010e) prio = 70; /* IETF Audio 2 */ 1837 break; 1838 case 0xf: if (graddr == 0xe000010f) prio = 55; /* IETF Video 2 */ 1839 break; 1840 } 1841 1842 if (tbfdebug > 1) log(LOG_DEBUG, "graddr%x prio%d\n", graddr, prio); 1843 1844 return prio; 1845 } 1846 1847 /* 1848 * End of token bucket filter modifications 1849 */ 1850 1851 #ifdef MROUTE_LKM 1852 #include <sys/conf.h> 1853 #include <sys/exec.h> 1854 #include <sys/sysent.h> 1855 #include <sys/lkm.h> 1856 1857 MOD_MISC("ip_mroute_mod") 1858 1859 static int 1860 ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd) 1861 { 1862 int i; 1863 struct lkm_misc *args = lkmtp->private.lkm_misc; 1864 int err = 0; 1865 1866 switch(cmd) { 1867 static int (*old_ip_mrouter_cmd)(); 1868 static int (*old_ip_mrouter_done)(); 1869 static int (*old_ip_mforward)(); 1870 static int (*old_mrt_ioctl)(); 1871 static void (*old_proto4_input)(); 1872 static int (*old_legal_vif_num)(); 1873 extern struct protosw inetsw[]; 1874 1875 case LKM_E_LOAD: 1876 if(lkmexists(lkmtp) || ip_mrtproto) 1877 return(EEXIST); 1878 old_ip_mrouter_cmd = ip_mrouter_cmd; 1879 ip_mrouter_cmd = X_ip_mrouter_cmd; 1880 old_ip_mrouter_done = ip_mrouter_done; 1881 ip_mrouter_done = X_ip_mrouter_done; 1882 old_ip_mforward = ip_mforward; 1883 ip_mforward = X_ip_mforward; 1884 old_mrt_ioctl = mrt_ioctl; 1885 mrt_ioctl = X_mrt_ioctl; 1886 old_proto4_input = inetsw[ip_protox[ENCAP_PROTO]].pr_input; 1887 inetsw[ip_protox[ENCAP_PROTO]].pr_input = X_multiencap_decap; 1888 old_legal_vif_num = legal_vif_num; 1889 legal_vif_num = X_legal_vif_num; 1890 ip_mrtproto = IGMP_DVMRP; 1891 1892 printf("\nIP multicast routing loaded\n"); 1893 break; 1894 1895 case LKM_E_UNLOAD: 1896 if (ip_mrouter) 1897 return EINVAL; 1898 1899 ip_mrouter_cmd = old_ip_mrouter_cmd; 1900 ip_mrouter_done = old_ip_mrouter_done; 1901 ip_mforward = old_ip_mforward; 1902 mrt_ioctl = old_mrt_ioctl; 1903 inetsw[ip_protox[ENCAP_PROTO]].pr_input = old_proto4_input; 1904 legal_vif_num = old_legal_vif_num; 1905 ip_mrtproto = 0; 1906 break; 1907 1908 default: 1909 err = EINVAL; 1910 break; 1911 } 1912 1913 return(err); 1914 } 1915 1916 int 1917 ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) { 1918 DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle, 1919 nosys); 1920 } 1921 1922 #endif /* MROUTE_LKM */ 1923 #endif /* MROUTING */ 1924 1925 1926