xref: /freebsd/sys/netinet/ip_mroute.c (revision 5521ff5a4d1929056e7ffc982fac3341ca54df7c)
1 /*
2  * IP multicast forwarding procedures
3  *
4  * Written by David Waitzman, BBN Labs, August 1988.
5  * Modified by Steve Deering, Stanford, February 1989.
6  * Modified by Mark J. Steiglitz, Stanford, May, 1991
7  * Modified by Van Jacobson, LBL, January 1993
8  * Modified by Ajit Thyagarajan, PARC, August 1993
9  * Modified by Bill Fenner, PARC, April 1995
10  *
11  * MROUTING Revision: 3.5
12  * $FreeBSD$
13  */
14 
15 #include "opt_mrouting.h"
16 #include "opt_random_ip_id.h"
17 
18 #include <sys/param.h>
19 #include <sys/systm.h>
20 #include <sys/malloc.h>
21 #include <sys/mbuf.h>
22 #include <sys/socket.h>
23 #include <sys/socketvar.h>
24 #include <sys/protosw.h>
25 #include <sys/time.h>
26 #include <sys/kernel.h>
27 #include <sys/sockio.h>
28 #include <sys/syslog.h>
29 #include <net/if.h>
30 #include <net/route.h>
31 #include <netinet/in.h>
32 #include <netinet/in_systm.h>
33 #include <netinet/ip.h>
34 #include <netinet/ip_var.h>
35 #include <netinet/in_var.h>
36 #include <netinet/igmp.h>
37 #include <netinet/ip_mroute.h>
38 #include <netinet/udp.h>
39 #include <machine/in_cksum.h>
40 
41 #ifndef NTOHL
42 #if BYTE_ORDER != BIG_ENDIAN
43 #define NTOHL(d) ((d) = ntohl((d)))
44 #define NTOHS(d) ((d) = ntohs((u_short)(d)))
45 #define HTONL(d) ((d) = htonl((d)))
46 #define HTONS(d) ((d) = htons((u_short)(d)))
47 #else
48 #define NTOHL(d)
49 #define NTOHS(d)
50 #define HTONL(d)
51 #define HTONS(d)
52 #endif
53 #endif
54 
55 #ifndef MROUTING
56 extern u_long	_ip_mcast_src __P((int vifi));
57 extern int	_ip_mforward __P((struct ip *ip, struct ifnet *ifp,
58 				  struct mbuf *m, struct ip_moptions *imo));
59 extern int	_ip_mrouter_done __P((void));
60 extern int	_ip_mrouter_get __P((struct socket *so, struct sockopt *sopt));
61 extern int	_ip_mrouter_set __P((struct socket *so, struct sockopt *sopt));
62 extern int	_mrt_ioctl __P((int req, caddr_t data, struct proc *p));
63 
64 /*
65  * Dummy routines and globals used when multicast routing is not compiled in.
66  */
67 
68 struct socket  *ip_mrouter  = NULL;
69 u_int		rsvpdebug = 0;
70 
71 int
72 _ip_mrouter_set(so, sopt)
73 	struct socket *so;
74 	struct sockopt *sopt;
75 {
76 	return(EOPNOTSUPP);
77 }
78 
79 int (*ip_mrouter_set)(struct socket *, struct sockopt *) = _ip_mrouter_set;
80 
81 
82 int
83 _ip_mrouter_get(so, sopt)
84 	struct socket *so;
85 	struct sockopt *sopt;
86 {
87 	return(EOPNOTSUPP);
88 }
89 
90 int (*ip_mrouter_get)(struct socket *, struct sockopt *) = _ip_mrouter_get;
91 
92 int
93 _ip_mrouter_done()
94 {
95 	return(0);
96 }
97 
98 int (*ip_mrouter_done)(void) = _ip_mrouter_done;
99 
100 int
101 _ip_mforward(ip, ifp, m, imo)
102 	struct ip *ip;
103 	struct ifnet *ifp;
104 	struct mbuf *m;
105 	struct ip_moptions *imo;
106 {
107 	return(0);
108 }
109 
110 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
111 		   struct ip_moptions *) = _ip_mforward;
112 
113 int
114 _mrt_ioctl(int req, caddr_t data, struct proc *p)
115 {
116 	return EOPNOTSUPP;
117 }
118 
119 int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl;
120 
121 void
122 rsvp_input(m, off, proto)		/* XXX must fixup manually */
123 	struct mbuf *m;
124 	int off;
125 	int proto;
126 {
127     /* Can still get packets with rsvp_on = 0 if there is a local member
128      * of the group to which the RSVP packet is addressed.  But in this
129      * case we want to throw the packet away.
130      */
131     if (!rsvp_on) {
132 	m_freem(m);
133 	return;
134     }
135 
136     if (ip_rsvpd != NULL) {
137 	if (rsvpdebug)
138 	    printf("rsvp_input: Sending packet up old-style socket\n");
139 	rip_input(m, off, proto);
140 	return;
141     }
142     /* Drop the packet */
143     m_freem(m);
144 }
145 
146 void ipip_input(struct mbuf *m, int off, int proto) { /* XXX must fixup manually */
147 	rip_input(m, off, proto);
148 }
149 
150 int (*legal_vif_num)(int) = 0;
151 
152 /*
153  * This should never be called, since IP_MULTICAST_VIF should fail, but
154  * just in case it does get called, the code a little lower in ip_output
155  * will assign the packet a local address.
156  */
157 u_long
158 _ip_mcast_src(int vifi) { return INADDR_ANY; }
159 u_long (*ip_mcast_src)(int) = _ip_mcast_src;
160 
161 int
162 ip_rsvp_vif_init(so, sopt)
163     struct socket *so;
164     struct sockopt *sopt;
165 {
166     return(EINVAL);
167 }
168 
169 int
170 ip_rsvp_vif_done(so, sopt)
171     struct socket *so;
172     struct sockopt *sopt;
173 {
174     return(EINVAL);
175 }
176 
177 void
178 ip_rsvp_force_done(so)
179     struct socket *so;
180 {
181     return;
182 }
183 
184 #else /* MROUTING */
185 
186 #define M_HASCL(m)	((m)->m_flags & M_EXT)
187 
188 #define INSIZ		sizeof(struct in_addr)
189 #define	same(a1, a2) \
190 	(bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0)
191 
192 static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast routing tables");
193 
194 /*
195  * Globals.  All but ip_mrouter and ip_mrtproto could be static,
196  * except for netstat or debugging purposes.
197  */
198 #ifndef MROUTE_LKM
199 struct socket  *ip_mrouter  = NULL;
200 static struct mrtstat	mrtstat;
201 #else /* MROUTE_LKM */
202 extern void	X_ipip_input __P((struct mbuf *m, int iphlen));
203 extern struct mrtstat mrtstat;
204 static int ip_mrtproto;
205 #endif
206 
207 #define NO_RTE_FOUND 	0x1
208 #define RTE_FOUND	0x2
209 
210 static struct mfc	*mfctable[MFCTBLSIZ];
211 static u_char		nexpire[MFCTBLSIZ];
212 static struct vif	viftable[MAXVIFS];
213 static u_int	mrtdebug = 0;	  /* debug level 	*/
214 #define		DEBUG_MFC	0x02
215 #define		DEBUG_FORWARD	0x04
216 #define		DEBUG_EXPIRE	0x08
217 #define		DEBUG_XMIT	0x10
218 static u_int  	tbfdebug = 0;     /* tbf debug level 	*/
219 static u_int	rsvpdebug = 0;	  /* rsvp debug level   */
220 
221 static struct callout_handle expire_upcalls_ch;
222 
223 #define		EXPIRE_TIMEOUT	(hz / 4)	/* 4x / second		*/
224 #define		UPCALL_EXPIRE	6		/* number of timeouts	*/
225 
226 /*
227  * Define the token bucket filter structures
228  * tbftable -> each vif has one of these for storing info
229  */
230 
231 static struct tbf tbftable[MAXVIFS];
232 #define		TBF_REPROCESS	(hz / 100)	/* 100x / second */
233 
234 /*
235  * 'Interfaces' associated with decapsulator (so we can tell
236  * packets that went through it from ones that get reflected
237  * by a broken gateway).  These interfaces are never linked into
238  * the system ifnet list & no routes point to them.  I.e., packets
239  * can't be sent this way.  They only exist as a placeholder for
240  * multicast source verification.
241  */
242 static struct ifnet multicast_decap_if[MAXVIFS];
243 
244 #define ENCAP_TTL 64
245 #define ENCAP_PROTO IPPROTO_IPIP	/* 4 */
246 
247 /* prototype IP hdr for encapsulated packets */
248 static struct ip multicast_encap_iphdr = {
249 #if BYTE_ORDER == LITTLE_ENDIAN
250 	sizeof(struct ip) >> 2, IPVERSION,
251 #else
252 	IPVERSION, sizeof(struct ip) >> 2,
253 #endif
254 	0,				/* tos */
255 	sizeof(struct ip),		/* total length */
256 	0,				/* id */
257 	0,				/* frag offset */
258 	ENCAP_TTL, ENCAP_PROTO,
259 	0,				/* checksum */
260 };
261 
262 /*
263  * Private variables.
264  */
265 static vifi_t	   numvifs = 0;
266 static int have_encap_tunnel = 0;
267 
268 /*
269  * one-back cache used by ipip_input to locate a tunnel's vif
270  * given a datagram's src ip address.
271  */
272 static u_long last_encap_src;
273 static struct vif *last_encap_vif;
274 
275 static u_long	X_ip_mcast_src __P((int vifi));
276 static int	X_ip_mforward __P((struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo));
277 static int	X_ip_mrouter_done __P((void));
278 static int	X_ip_mrouter_get __P((struct socket *so, struct sockopt *m));
279 static int	X_ip_mrouter_set __P((struct socket *so, struct sockopt *m));
280 static int	X_legal_vif_num __P((int vif));
281 static int	X_mrt_ioctl __P((int cmd, caddr_t data));
282 
283 static int get_sg_cnt(struct sioc_sg_req *);
284 static int get_vif_cnt(struct sioc_vif_req *);
285 static int ip_mrouter_init(struct socket *, int);
286 static int add_vif(struct vifctl *);
287 static int del_vif(vifi_t);
288 static int add_mfc(struct mfcctl *);
289 static int del_mfc(struct mfcctl *);
290 static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *);
291 static int set_assert(int);
292 static void expire_upcalls(void *);
293 static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *,
294 		  vifi_t);
295 static void phyint_send(struct ip *, struct vif *, struct mbuf *);
296 static void encap_send(struct ip *, struct vif *, struct mbuf *);
297 static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long);
298 static void tbf_queue(struct vif *, struct mbuf *);
299 static void tbf_process_q(struct vif *);
300 static void tbf_reprocess_q(void *);
301 static int tbf_dq_sel(struct vif *, struct ip *);
302 static void tbf_send_packet(struct vif *, struct mbuf *);
303 static void tbf_update_tokens(struct vif *);
304 static int priority(struct vif *, struct ip *);
305 void multiencap_decap(struct mbuf *);
306 
307 /*
308  * whether or not special PIM assert processing is enabled.
309  */
310 static int pim_assert;
311 /*
312  * Rate limit for assert notification messages, in usec
313  */
314 #define ASSERT_MSG_TIME		3000000
315 
316 /*
317  * Hash function for a source, group entry
318  */
319 #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
320 			((g) >> 20) ^ ((g) >> 10) ^ (g))
321 
322 /*
323  * Find a route for a given origin IP address and Multicast group address
324  * Type of service parameter to be added in the future!!!
325  */
326 
327 #define MFCFIND(o, g, rt) { \
328 	register struct mfc *_rt = mfctable[MFCHASH(o,g)]; \
329 	rt = NULL; \
330 	++mrtstat.mrts_mfc_lookups; \
331 	while (_rt) { \
332 		if ((_rt->mfc_origin.s_addr == o) && \
333 		    (_rt->mfc_mcastgrp.s_addr == g) && \
334 		    (_rt->mfc_stall == NULL)) { \
335 			rt = _rt; \
336 			break; \
337 		} \
338 		_rt = _rt->mfc_next; \
339 	} \
340 	if (rt == NULL) { \
341 		++mrtstat.mrts_mfc_misses; \
342 	} \
343 }
344 
345 
346 /*
347  * Macros to compute elapsed time efficiently
348  * Borrowed from Van Jacobson's scheduling code
349  */
350 #define TV_DELTA(a, b, delta) { \
351 	    register int xxs; \
352 		\
353 	    delta = (a).tv_usec - (b).tv_usec; \
354 	    if ((xxs = (a).tv_sec - (b).tv_sec)) { \
355 	       switch (xxs) { \
356 		      case 2: \
357 			  delta += 1000000; \
358 			      /* fall through */ \
359 		      case 1: \
360 			  delta += 1000000; \
361 			  break; \
362 		      default: \
363 			  delta += (1000000 * xxs); \
364 	       } \
365 	    } \
366 }
367 
368 #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
369 	      (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
370 
371 #ifdef UPCALL_TIMING
372 u_long upcall_data[51];
373 static void collate(struct timeval *);
374 #endif /* UPCALL_TIMING */
375 
376 
377 /*
378  * Handle MRT setsockopt commands to modify the multicast routing tables.
379  */
380 static int
381 X_ip_mrouter_set(so, sopt)
382 	struct socket *so;
383 	struct sockopt *sopt;
384 {
385 	int	error, optval;
386 	vifi_t	vifi;
387 	struct	vifctl vifc;
388 	struct	mfcctl mfc;
389 
390 	if (so != ip_mrouter && sopt->sopt_name != MRT_INIT)
391 		return (EPERM);
392 
393 	error = 0;
394 	switch (sopt->sopt_name) {
395 	case MRT_INIT:
396 		error = sooptcopyin(sopt, &optval, sizeof optval,
397 				    sizeof optval);
398 		if (error)
399 			break;
400 		error = ip_mrouter_init(so, optval);
401 		break;
402 
403 	case MRT_DONE:
404 		error = ip_mrouter_done();
405 		break;
406 
407 	case MRT_ADD_VIF:
408 		error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc);
409 		if (error)
410 			break;
411 		error = add_vif(&vifc);
412 		break;
413 
414 	case MRT_DEL_VIF:
415 		error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi);
416 		if (error)
417 			break;
418 		error = del_vif(vifi);
419 		break;
420 
421 	case MRT_ADD_MFC:
422 	case MRT_DEL_MFC:
423 		error = sooptcopyin(sopt, &mfc, sizeof mfc, sizeof mfc);
424 		if (error)
425 			break;
426 		if (sopt->sopt_name == MRT_ADD_MFC)
427 			error = add_mfc(&mfc);
428 		else
429 			error = del_mfc(&mfc);
430 		break;
431 
432 	case MRT_ASSERT:
433 		error = sooptcopyin(sopt, &optval, sizeof optval,
434 				    sizeof optval);
435 		if (error)
436 			break;
437 		set_assert(optval);
438 		break;
439 
440 	default:
441 		error = EOPNOTSUPP;
442 		break;
443 	}
444 	return (error);
445 }
446 
447 #ifndef MROUTE_LKM
448 int (*ip_mrouter_set)(struct socket *, struct sockopt *) = X_ip_mrouter_set;
449 #endif
450 
451 /*
452  * Handle MRT getsockopt commands
453  */
454 static int
455 X_ip_mrouter_get(so, sopt)
456 	struct socket *so;
457 	struct sockopt *sopt;
458 {
459 	int error;
460 	static int version = 0x0305; /* !!! why is this here? XXX */
461 
462 	switch (sopt->sopt_name) {
463 	case MRT_VERSION:
464 		error = sooptcopyout(sopt, &version, sizeof version);
465 		break;
466 
467 	case MRT_ASSERT:
468 		error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert);
469 		break;
470 	default:
471 		error = EOPNOTSUPP;
472 		break;
473 	}
474 	return (error);
475 }
476 
477 #ifndef MROUTE_LKM
478 int (*ip_mrouter_get)(struct socket *, struct sockopt *) = X_ip_mrouter_get;
479 #endif
480 
481 /*
482  * Handle ioctl commands to obtain information from the cache
483  */
484 static int
485 X_mrt_ioctl(cmd, data)
486     int cmd;
487     caddr_t data;
488 {
489     int error = 0;
490 
491     switch (cmd) {
492 	case (SIOCGETVIFCNT):
493 	    return (get_vif_cnt((struct sioc_vif_req *)data));
494 	    break;
495 	case (SIOCGETSGCNT):
496 	    return (get_sg_cnt((struct sioc_sg_req *)data));
497 	    break;
498 	default:
499 	    return (EINVAL);
500 	    break;
501     }
502     return error;
503 }
504 
505 #ifndef MROUTE_LKM
506 int (*mrt_ioctl)(int, caddr_t) = X_mrt_ioctl;
507 #endif
508 
509 /*
510  * returns the packet, byte, rpf-failure count for the source group provided
511  */
512 static int
513 get_sg_cnt(req)
514     register struct sioc_sg_req *req;
515 {
516     register struct mfc *rt;
517     int s;
518 
519     s = splnet();
520     MFCFIND(req->src.s_addr, req->grp.s_addr, rt);
521     splx(s);
522     if (rt != NULL) {
523 	req->pktcnt = rt->mfc_pkt_cnt;
524 	req->bytecnt = rt->mfc_byte_cnt;
525 	req->wrong_if = rt->mfc_wrong_if;
526     } else
527 	req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
528 
529     return 0;
530 }
531 
532 /*
533  * returns the input and output packet and byte counts on the vif provided
534  */
535 static int
536 get_vif_cnt(req)
537     register struct sioc_vif_req *req;
538 {
539     register vifi_t vifi = req->vifi;
540 
541     if (vifi >= numvifs) return EINVAL;
542 
543     req->icount = viftable[vifi].v_pkt_in;
544     req->ocount = viftable[vifi].v_pkt_out;
545     req->ibytes = viftable[vifi].v_bytes_in;
546     req->obytes = viftable[vifi].v_bytes_out;
547 
548     return 0;
549 }
550 
551 /*
552  * Enable multicast routing
553  */
554 static int
555 ip_mrouter_init(so, version)
556 	struct socket *so;
557 	int version;
558 {
559     if (mrtdebug)
560 	log(LOG_DEBUG,"ip_mrouter_init: so_type = %d, pr_protocol = %d\n",
561 		so->so_type, so->so_proto->pr_protocol);
562 
563     if (so->so_type != SOCK_RAW ||
564 	so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP;
565 
566     if (version != 1)
567 	return ENOPROTOOPT;
568 
569     if (ip_mrouter != NULL) return EADDRINUSE;
570 
571     ip_mrouter = so;
572 
573     bzero((caddr_t)mfctable, sizeof(mfctable));
574     bzero((caddr_t)nexpire, sizeof(nexpire));
575 
576     pim_assert = 0;
577 
578     expire_upcalls_ch = timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
579 
580     if (mrtdebug)
581 	log(LOG_DEBUG, "ip_mrouter_init\n");
582 
583     return 0;
584 }
585 
586 /*
587  * Disable multicast routing
588  */
589 static int
590 X_ip_mrouter_done()
591 {
592     vifi_t vifi;
593     int i;
594     struct ifnet *ifp;
595     struct ifreq ifr;
596     struct mfc *rt;
597     struct rtdetq *rte;
598     int s;
599 
600     s = splnet();
601 
602     /*
603      * For each phyint in use, disable promiscuous reception of all IP
604      * multicasts.
605      */
606     for (vifi = 0; vifi < numvifs; vifi++) {
607 	if (viftable[vifi].v_lcl_addr.s_addr != 0 &&
608 	    !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
609 	    ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
610 	    ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr
611 								= INADDR_ANY;
612 	    ifp = viftable[vifi].v_ifp;
613 	    if_allmulti(ifp, 0);
614 	}
615     }
616     bzero((caddr_t)tbftable, sizeof(tbftable));
617     bzero((caddr_t)viftable, sizeof(viftable));
618     numvifs = 0;
619     pim_assert = 0;
620 
621     untimeout(expire_upcalls, (caddr_t)NULL, expire_upcalls_ch);
622 
623     /*
624      * Free all multicast forwarding cache entries.
625      */
626     for (i = 0; i < MFCTBLSIZ; i++) {
627 	for (rt = mfctable[i]; rt != NULL; ) {
628 	    struct mfc *nr = rt->mfc_next;
629 
630 	    for (rte = rt->mfc_stall; rte != NULL; ) {
631 		struct rtdetq *n = rte->next;
632 
633 		m_freem(rte->m);
634 		free(rte, M_MRTABLE);
635 		rte = n;
636 	    }
637 	    free(rt, M_MRTABLE);
638 	    rt = nr;
639 	}
640     }
641 
642     bzero((caddr_t)mfctable, sizeof(mfctable));
643 
644     /*
645      * Reset de-encapsulation cache
646      */
647     last_encap_src = 0;
648     last_encap_vif = NULL;
649     have_encap_tunnel = 0;
650 
651     ip_mrouter = NULL;
652 
653     splx(s);
654 
655     if (mrtdebug)
656 	log(LOG_DEBUG, "ip_mrouter_done\n");
657 
658     return 0;
659 }
660 
661 #ifndef MROUTE_LKM
662 int (*ip_mrouter_done)(void) = X_ip_mrouter_done;
663 #endif
664 
665 /*
666  * Set PIM assert processing global
667  */
668 static int
669 set_assert(i)
670 	int i;
671 {
672     if ((i != 1) && (i != 0))
673 	return EINVAL;
674 
675     pim_assert = i;
676 
677     return 0;
678 }
679 
680 /*
681  * Add a vif to the vif table
682  */
683 static int
684 add_vif(vifcp)
685     register struct vifctl *vifcp;
686 {
687     register struct vif *vifp = viftable + vifcp->vifc_vifi;
688     static struct sockaddr_in sin = {sizeof sin, AF_INET};
689     struct ifaddr *ifa;
690     struct ifnet *ifp;
691     int error, s;
692     struct tbf *v_tbf = tbftable + vifcp->vifc_vifi;
693 
694     if (vifcp->vifc_vifi >= MAXVIFS)  return EINVAL;
695     if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE;
696 
697     /* Find the interface with an address in AF_INET family */
698     sin.sin_addr = vifcp->vifc_lcl_addr;
699     ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
700     if (ifa == 0) return EADDRNOTAVAIL;
701     ifp = ifa->ifa_ifp;
702 
703     if (vifcp->vifc_flags & VIFF_TUNNEL) {
704 	if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) {
705 		/*
706 		 * An encapsulating tunnel is wanted.  Tell ipip_input() to
707 		 * start paying attention to encapsulated packets.
708 		 */
709 		if (have_encap_tunnel == 0) {
710 			have_encap_tunnel = 1;
711 			for (s = 0; s < MAXVIFS; ++s) {
712 				multicast_decap_if[s].if_name = "mdecap";
713 				multicast_decap_if[s].if_unit = s;
714 			}
715 		}
716 		/*
717 		 * Set interface to fake encapsulator interface
718 		 */
719 		ifp = &multicast_decap_if[vifcp->vifc_vifi];
720 		/*
721 		 * Prepare cached route entry
722 		 */
723 		bzero(&vifp->v_route, sizeof(vifp->v_route));
724 	} else {
725 	    log(LOG_ERR, "source routed tunnels not supported\n");
726 	    return EOPNOTSUPP;
727 	}
728     } else {
729 	/* Make sure the interface supports multicast */
730 	if ((ifp->if_flags & IFF_MULTICAST) == 0)
731 	    return EOPNOTSUPP;
732 
733 	/* Enable promiscuous reception of all IP multicasts from the if */
734 	s = splnet();
735 	error = if_allmulti(ifp, 1);
736 	splx(s);
737 	if (error)
738 	    return error;
739     }
740 
741     s = splnet();
742     /* define parameters for the tbf structure */
743     vifp->v_tbf = v_tbf;
744     GET_TIME(vifp->v_tbf->tbf_last_pkt_t);
745     vifp->v_tbf->tbf_n_tok = 0;
746     vifp->v_tbf->tbf_q_len = 0;
747     vifp->v_tbf->tbf_max_q_len = MAXQSIZE;
748     vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL;
749 
750     vifp->v_flags     = vifcp->vifc_flags;
751     vifp->v_threshold = vifcp->vifc_threshold;
752     vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
753     vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
754     vifp->v_ifp       = ifp;
755     /* scaling up here allows division by 1024 in critical code */
756     vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000;
757     vifp->v_rsvp_on   = 0;
758     vifp->v_rsvpd     = NULL;
759     /* initialize per vif pkt counters */
760     vifp->v_pkt_in    = 0;
761     vifp->v_pkt_out   = 0;
762     vifp->v_bytes_in  = 0;
763     vifp->v_bytes_out = 0;
764     splx(s);
765 
766     /* Adjust numvifs up if the vifi is higher than numvifs */
767     if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
768 
769     if (mrtdebug)
770 	log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n",
771 	    vifcp->vifc_vifi,
772 	    (u_long)ntohl(vifcp->vifc_lcl_addr.s_addr),
773 	    (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
774 	    (u_long)ntohl(vifcp->vifc_rmt_addr.s_addr),
775 	    vifcp->vifc_threshold,
776 	    vifcp->vifc_rate_limit);
777 
778     return 0;
779 }
780 
781 /*
782  * Delete a vif from the vif table
783  */
784 static int
785 del_vif(vifi)
786 	vifi_t vifi;
787 {
788     register struct vif *vifp = &viftable[vifi];
789     register struct mbuf *m;
790     struct ifnet *ifp;
791     struct ifreq ifr;
792     int s;
793 
794     if (vifi >= numvifs) return EINVAL;
795     if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL;
796 
797     s = splnet();
798 
799     if (!(vifp->v_flags & VIFF_TUNNEL)) {
800 	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
801 	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
802 	ifp = vifp->v_ifp;
803 	if_allmulti(ifp, 0);
804     }
805 
806     if (vifp == last_encap_vif) {
807 	last_encap_vif = 0;
808 	last_encap_src = 0;
809     }
810 
811     /*
812      * Free packets queued at the interface
813      */
814     while (vifp->v_tbf->tbf_q) {
815 	m = vifp->v_tbf->tbf_q;
816 	vifp->v_tbf->tbf_q = m->m_act;
817 	m_freem(m);
818     }
819 
820     bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf)));
821     bzero((caddr_t)vifp, sizeof (*vifp));
822 
823     if (mrtdebug)
824       log(LOG_DEBUG, "del_vif %d, numvifs %d\n", vifi, numvifs);
825 
826     /* Adjust numvifs down */
827     for (vifi = numvifs; vifi > 0; vifi--)
828 	if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break;
829     numvifs = vifi;
830 
831     splx(s);
832 
833     return 0;
834 }
835 
836 /*
837  * Add an mfc entry
838  */
839 static int
840 add_mfc(mfccp)
841     struct mfcctl *mfccp;
842 {
843     struct mfc *rt;
844     u_long hash;
845     struct rtdetq *rte;
846     register u_short nstl;
847     int s;
848     int i;
849 
850     MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt);
851 
852     /* If an entry already exists, just update the fields */
853     if (rt) {
854 	if (mrtdebug & DEBUG_MFC)
855 	    log(LOG_DEBUG,"add_mfc update o %lx g %lx p %x\n",
856 		(u_long)ntohl(mfccp->mfcc_origin.s_addr),
857 		(u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
858 		mfccp->mfcc_parent);
859 
860 	s = splnet();
861 	rt->mfc_parent = mfccp->mfcc_parent;
862 	for (i = 0; i < numvifs; i++)
863 	    rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
864 	splx(s);
865 	return 0;
866     }
867 
868     /*
869      * Find the entry for which the upcall was made and update
870      */
871     s = splnet();
872     hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
873     for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) {
874 
875 	if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
876 	    (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
877 	    (rt->mfc_stall != NULL)) {
878 
879 	    if (nstl++)
880 		log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n",
881 		    "multiple kernel entries",
882 		    (u_long)ntohl(mfccp->mfcc_origin.s_addr),
883 		    (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
884 		    mfccp->mfcc_parent, (void *)rt->mfc_stall);
885 
886 	    if (mrtdebug & DEBUG_MFC)
887 		log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n",
888 		    (u_long)ntohl(mfccp->mfcc_origin.s_addr),
889 		    (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
890 		    mfccp->mfcc_parent, (void *)rt->mfc_stall);
891 
892 	    rt->mfc_origin     = mfccp->mfcc_origin;
893 	    rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
894 	    rt->mfc_parent     = mfccp->mfcc_parent;
895 	    for (i = 0; i < numvifs; i++)
896 		rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
897 	    /* initialize pkt counters per src-grp */
898 	    rt->mfc_pkt_cnt    = 0;
899 	    rt->mfc_byte_cnt   = 0;
900 	    rt->mfc_wrong_if   = 0;
901 	    rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
902 
903 	    rt->mfc_expire = 0;	/* Don't clean this guy up */
904 	    nexpire[hash]--;
905 
906 	    /* free packets Qed at the end of this entry */
907 	    for (rte = rt->mfc_stall; rte != NULL; ) {
908 		struct rtdetq *n = rte->next;
909 
910 		ip_mdq(rte->m, rte->ifp, rt, -1);
911 		m_freem(rte->m);
912 #ifdef UPCALL_TIMING
913 		collate(&(rte->t));
914 #endif /* UPCALL_TIMING */
915 		free(rte, M_MRTABLE);
916 		rte = n;
917 	    }
918 	    rt->mfc_stall = NULL;
919 	}
920     }
921 
922     /*
923      * It is possible that an entry is being inserted without an upcall
924      */
925     if (nstl == 0) {
926 	if (mrtdebug & DEBUG_MFC)
927 	    log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n",
928 		hash, (u_long)ntohl(mfccp->mfcc_origin.s_addr),
929 		(u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
930 		mfccp->mfcc_parent);
931 
932 	for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) {
933 
934 	    if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
935 		(rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) {
936 
937 		rt->mfc_origin     = mfccp->mfcc_origin;
938 		rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
939 		rt->mfc_parent     = mfccp->mfcc_parent;
940 		for (i = 0; i < numvifs; i++)
941 		    rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
942 		/* initialize pkt counters per src-grp */
943 		rt->mfc_pkt_cnt    = 0;
944 		rt->mfc_byte_cnt   = 0;
945 		rt->mfc_wrong_if   = 0;
946 		rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
947 		if (rt->mfc_expire)
948 		    nexpire[hash]--;
949 		rt->mfc_expire	   = 0;
950 	    }
951 	}
952 	if (rt == NULL) {
953 	    /* no upcall, so make a new entry */
954 	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
955 	    if (rt == NULL) {
956 		splx(s);
957 		return ENOBUFS;
958 	    }
959 
960 	    /* insert new entry at head of hash chain */
961 	    rt->mfc_origin     = mfccp->mfcc_origin;
962 	    rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
963 	    rt->mfc_parent     = mfccp->mfcc_parent;
964 	    for (i = 0; i < numvifs; i++)
965 		    rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
966 	    /* initialize pkt counters per src-grp */
967 	    rt->mfc_pkt_cnt    = 0;
968 	    rt->mfc_byte_cnt   = 0;
969 	    rt->mfc_wrong_if   = 0;
970 	    rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
971 	    rt->mfc_expire     = 0;
972 	    rt->mfc_stall      = NULL;
973 
974 	    /* link into table */
975 	    rt->mfc_next = mfctable[hash];
976 	    mfctable[hash] = rt;
977 	}
978     }
979     splx(s);
980     return 0;
981 }
982 
983 #ifdef UPCALL_TIMING
984 /*
985  * collect delay statistics on the upcalls
986  */
987 static void collate(t)
988 register struct timeval *t;
989 {
990     register u_long d;
991     register struct timeval tp;
992     register u_long delta;
993 
994     GET_TIME(tp);
995 
996     if (TV_LT(*t, tp))
997     {
998 	TV_DELTA(tp, *t, delta);
999 
1000 	d = delta >> 10;
1001 	if (d > 50)
1002 	    d = 50;
1003 
1004 	++upcall_data[d];
1005     }
1006 }
1007 #endif /* UPCALL_TIMING */
1008 
1009 /*
1010  * Delete an mfc entry
1011  */
1012 static int
1013 del_mfc(mfccp)
1014     struct mfcctl *mfccp;
1015 {
1016     struct in_addr 	origin;
1017     struct in_addr 	mcastgrp;
1018     struct mfc 		*rt;
1019     struct mfc	 	**nptr;
1020     u_long 		hash;
1021     int s;
1022 
1023     origin = mfccp->mfcc_origin;
1024     mcastgrp = mfccp->mfcc_mcastgrp;
1025     hash = MFCHASH(origin.s_addr, mcastgrp.s_addr);
1026 
1027     if (mrtdebug & DEBUG_MFC)
1028 	log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n",
1029 	    (u_long)ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr));
1030 
1031     s = splnet();
1032 
1033     nptr = &mfctable[hash];
1034     while ((rt = *nptr) != NULL) {
1035 	if (origin.s_addr == rt->mfc_origin.s_addr &&
1036 	    mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
1037 	    rt->mfc_stall == NULL)
1038 	    break;
1039 
1040 	nptr = &rt->mfc_next;
1041     }
1042     if (rt == NULL) {
1043 	splx(s);
1044 	return EADDRNOTAVAIL;
1045     }
1046 
1047     *nptr = rt->mfc_next;
1048     free(rt, M_MRTABLE);
1049 
1050     splx(s);
1051 
1052     return 0;
1053 }
1054 
1055 /*
1056  * Send a message to mrouted on the multicast routing socket
1057  */
1058 static int
1059 socket_send(s, mm, src)
1060 	struct socket *s;
1061 	struct mbuf *mm;
1062 	struct sockaddr_in *src;
1063 {
1064 	if (s) {
1065 		if (sbappendaddr(&s->so_rcv,
1066 				 (struct sockaddr *)src,
1067 				 mm, (struct mbuf *)0) != 0) {
1068 			sorwakeup(s);
1069 			return 0;
1070 		}
1071 	}
1072 	m_freem(mm);
1073 	return -1;
1074 }
1075 
1076 /*
1077  * IP multicast forwarding function. This function assumes that the packet
1078  * pointed to by "ip" has arrived on (or is about to be sent to) the interface
1079  * pointed to by "ifp", and the packet is to be relayed to other networks
1080  * that have members of the packet's destination IP multicast group.
1081  *
1082  * The packet is returned unscathed to the caller, unless it is
1083  * erroneous, in which case a non-zero return value tells the caller to
1084  * discard it.
1085  */
1086 
1087 #define IP_HDR_LEN  20	/* # bytes of fixed IP header (excluding options) */
1088 #define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
1089 
1090 static int
1091 X_ip_mforward(ip, ifp, m, imo)
1092     register struct ip *ip;
1093     struct ifnet *ifp;
1094     struct mbuf *m;
1095     struct ip_moptions *imo;
1096 {
1097     register struct mfc *rt;
1098     register u_char *ipoptions;
1099     static struct sockaddr_in 	k_igmpsrc	= { sizeof k_igmpsrc, AF_INET };
1100     static int srctun = 0;
1101     register struct mbuf *mm;
1102     int s;
1103     vifi_t vifi;
1104     struct vif *vifp;
1105 
1106     if (mrtdebug & DEBUG_FORWARD)
1107 	log(LOG_DEBUG, "ip_mforward: src %lx, dst %lx, ifp %p\n",
1108 	    (u_long)ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr),
1109 	    (void *)ifp);
1110 
1111     if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
1112 	(ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
1113 	/*
1114 	 * Packet arrived via a physical interface or
1115 	 * an encapsulated tunnel.
1116 	 */
1117     } else {
1118 	/*
1119 	 * Packet arrived through a source-route tunnel.
1120 	 * Source-route tunnels are no longer supported.
1121 	 */
1122 	if ((srctun++ % 1000) == 0)
1123 	    log(LOG_ERR,
1124 		"ip_mforward: received source-routed packet from %lx\n",
1125 		(u_long)ntohl(ip->ip_src.s_addr));
1126 
1127 	return 1;
1128     }
1129 
1130     if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) {
1131 	if (ip->ip_ttl < 255)
1132 		ip->ip_ttl++;	/* compensate for -1 in *_send routines */
1133 	if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1134 	    vifp = viftable + vifi;
1135 	    printf("Sending IPPROTO_RSVP from %lx to %lx on vif %d (%s%s%d)\n",
1136 		ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), vifi,
1137 		(vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
1138 		vifp->v_ifp->if_name, vifp->v_ifp->if_unit);
1139 	}
1140 	return (ip_mdq(m, ifp, NULL, vifi));
1141     }
1142     if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1143 	printf("Warning: IPPROTO_RSVP from %lx to %lx without vif option\n",
1144 	    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr));
1145 	if(!imo)
1146 		printf("In fact, no options were specified at all\n");
1147     }
1148 
1149     /*
1150      * Don't forward a packet with time-to-live of zero or one,
1151      * or a packet destined to a local-only group.
1152      */
1153     if (ip->ip_ttl <= 1 ||
1154 	ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP)
1155 	return 0;
1156 
1157     /*
1158      * Determine forwarding vifs from the forwarding cache table
1159      */
1160     s = splnet();
1161     MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt);
1162 
1163     /* Entry exists, so forward if necessary */
1164     if (rt != NULL) {
1165 	splx(s);
1166 	return (ip_mdq(m, ifp, rt, -1));
1167     } else {
1168 	/*
1169 	 * If we don't have a route for packet's origin,
1170 	 * Make a copy of the packet &
1171 	 * send message to routing daemon
1172 	 */
1173 
1174 	register struct mbuf *mb0;
1175 	register struct rtdetq *rte;
1176 	register u_long hash;
1177 	int hlen = ip->ip_hl << 2;
1178 #ifdef UPCALL_TIMING
1179 	struct timeval tp;
1180 
1181 	GET_TIME(tp);
1182 #endif
1183 
1184 	mrtstat.mrts_no_route++;
1185 	if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))
1186 	    log(LOG_DEBUG, "ip_mforward: no rte s %lx g %lx\n",
1187 		(u_long)ntohl(ip->ip_src.s_addr),
1188 		(u_long)ntohl(ip->ip_dst.s_addr));
1189 
1190 	/*
1191 	 * Allocate mbufs early so that we don't do extra work if we are
1192 	 * just going to fail anyway.  Make sure to pullup the header so
1193 	 * that other people can't step on it.
1194 	 */
1195 	rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE, M_NOWAIT);
1196 	if (rte == NULL) {
1197 	    splx(s);
1198 	    return ENOBUFS;
1199 	}
1200 	mb0 = m_copy(m, 0, M_COPYALL);
1201 	if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen))
1202 	    mb0 = m_pullup(mb0, hlen);
1203 	if (mb0 == NULL) {
1204 	    free(rte, M_MRTABLE);
1205 	    splx(s);
1206 	    return ENOBUFS;
1207 	}
1208 
1209 	/* is there an upcall waiting for this packet? */
1210 	hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr);
1211 	for (rt = mfctable[hash]; rt; rt = rt->mfc_next) {
1212 	    if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) &&
1213 		(ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) &&
1214 		(rt->mfc_stall != NULL))
1215 		break;
1216 	}
1217 
1218 	if (rt == NULL) {
1219 	    int i;
1220 	    struct igmpmsg *im;
1221 
1222 	    /* no upcall, so make a new entry */
1223 	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
1224 	    if (rt == NULL) {
1225 		free(rte, M_MRTABLE);
1226 		m_freem(mb0);
1227 		splx(s);
1228 		return ENOBUFS;
1229 	    }
1230 	    /* Make a copy of the header to send to the user level process */
1231 	    mm = m_copy(mb0, 0, hlen);
1232 	    if (mm == NULL) {
1233 		free(rte, M_MRTABLE);
1234 		m_freem(mb0);
1235 		free(rt, M_MRTABLE);
1236 		splx(s);
1237 		return ENOBUFS;
1238 	    }
1239 
1240 	    /*
1241 	     * Send message to routing daemon to install
1242 	     * a route into the kernel table
1243 	     */
1244 	    k_igmpsrc.sin_addr = ip->ip_src;
1245 
1246 	    im = mtod(mm, struct igmpmsg *);
1247 	    im->im_msgtype	= IGMPMSG_NOCACHE;
1248 	    im->im_mbz		= 0;
1249 
1250 	    mrtstat.mrts_upcalls++;
1251 
1252 	    if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) {
1253 		log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n");
1254 		++mrtstat.mrts_upq_sockfull;
1255 		free(rte, M_MRTABLE);
1256 		m_freem(mb0);
1257 		free(rt, M_MRTABLE);
1258 		splx(s);
1259 		return ENOBUFS;
1260 	    }
1261 
1262 	    /* insert new entry at head of hash chain */
1263 	    rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
1264 	    rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
1265 	    rt->mfc_expire	      = UPCALL_EXPIRE;
1266 	    nexpire[hash]++;
1267 	    for (i = 0; i < numvifs; i++)
1268 		rt->mfc_ttls[i] = 0;
1269 	    rt->mfc_parent = -1;
1270 
1271 	    /* link into table */
1272 	    rt->mfc_next   = mfctable[hash];
1273 	    mfctable[hash] = rt;
1274 	    rt->mfc_stall = rte;
1275 
1276 	} else {
1277 	    /* determine if q has overflowed */
1278 	    int npkts = 0;
1279 	    struct rtdetq **p;
1280 
1281 	    for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next)
1282 		npkts++;
1283 
1284 	    if (npkts > MAX_UPQ) {
1285 		mrtstat.mrts_upq_ovflw++;
1286 		free(rte, M_MRTABLE);
1287 		m_freem(mb0);
1288 		splx(s);
1289 		return 0;
1290 	    }
1291 
1292 	    /* Add this entry to the end of the queue */
1293 	    *p = rte;
1294 	}
1295 
1296 	rte->m 			= mb0;
1297 	rte->ifp 		= ifp;
1298 #ifdef UPCALL_TIMING
1299 	rte->t			= tp;
1300 #endif
1301 	rte->next		= NULL;
1302 
1303 	splx(s);
1304 
1305 	return 0;
1306     }
1307 }
1308 
1309 #ifndef MROUTE_LKM
1310 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
1311 		   struct ip_moptions *) = X_ip_mforward;
1312 #endif
1313 
1314 /*
1315  * Clean up the cache entry if upcall is not serviced
1316  */
1317 static void
1318 expire_upcalls(void *unused)
1319 {
1320     struct rtdetq *rte;
1321     struct mfc *mfc, **nptr;
1322     int i;
1323     int s;
1324 
1325     s = splnet();
1326     for (i = 0; i < MFCTBLSIZ; i++) {
1327 	if (nexpire[i] == 0)
1328 	    continue;
1329 	nptr = &mfctable[i];
1330 	for (mfc = *nptr; mfc != NULL; mfc = *nptr) {
1331 	    /*
1332 	     * Skip real cache entries
1333 	     * Make sure it wasn't marked to not expire (shouldn't happen)
1334 	     * If it expires now
1335 	     */
1336 	    if (mfc->mfc_stall != NULL &&
1337 	        mfc->mfc_expire != 0 &&
1338 		--mfc->mfc_expire == 0) {
1339 		if (mrtdebug & DEBUG_EXPIRE)
1340 		    log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n",
1341 			(u_long)ntohl(mfc->mfc_origin.s_addr),
1342 			(u_long)ntohl(mfc->mfc_mcastgrp.s_addr));
1343 		/*
1344 		 * drop all the packets
1345 		 * free the mbuf with the pkt, if, timing info
1346 		 */
1347 		for (rte = mfc->mfc_stall; rte; ) {
1348 		    struct rtdetq *n = rte->next;
1349 
1350 		    m_freem(rte->m);
1351 		    free(rte, M_MRTABLE);
1352 		    rte = n;
1353 		}
1354 		++mrtstat.mrts_cache_cleanups;
1355 		nexpire[i]--;
1356 
1357 		*nptr = mfc->mfc_next;
1358 		free(mfc, M_MRTABLE);
1359 	    } else {
1360 		nptr = &mfc->mfc_next;
1361 	    }
1362 	}
1363     }
1364     splx(s);
1365     expire_upcalls_ch = timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
1366 }
1367 
1368 /*
1369  * Packet forwarding routine once entry in the cache is made
1370  */
1371 static int
1372 ip_mdq(m, ifp, rt, xmt_vif)
1373     register struct mbuf *m;
1374     register struct ifnet *ifp;
1375     register struct mfc *rt;
1376     register vifi_t xmt_vif;
1377 {
1378     register struct ip  *ip = mtod(m, struct ip *);
1379     register vifi_t vifi;
1380     register struct vif *vifp;
1381     register int plen = ip->ip_len;
1382 
1383 /*
1384  * Macro to send packet on vif.  Since RSVP packets don't get counted on
1385  * input, they shouldn't get counted on output, so statistics keeping is
1386  * separate.
1387  */
1388 #define MC_SEND(ip,vifp,m) {                             \
1389                 if ((vifp)->v_flags & VIFF_TUNNEL)  	 \
1390                     encap_send((ip), (vifp), (m));       \
1391                 else                                     \
1392                     phyint_send((ip), (vifp), (m));      \
1393 }
1394 
1395     /*
1396      * If xmt_vif is not -1, send on only the requested vif.
1397      *
1398      * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
1399      */
1400     if (xmt_vif < numvifs) {
1401 	MC_SEND(ip, viftable + xmt_vif, m);
1402 	return 1;
1403     }
1404 
1405     /*
1406      * Don't forward if it didn't arrive from the parent vif for its origin.
1407      */
1408     vifi = rt->mfc_parent;
1409     if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) {
1410 	/* came in the wrong interface */
1411 	if (mrtdebug & DEBUG_FORWARD)
1412 	    log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n",
1413 		(void *)ifp, vifi, (void *)viftable[vifi].v_ifp);
1414 	++mrtstat.mrts_wrong_if;
1415 	++rt->mfc_wrong_if;
1416 	/*
1417 	 * If we are doing PIM assert processing, and we are forwarding
1418 	 * packets on this interface, and it is a broadcast medium
1419 	 * interface (and not a tunnel), send a message to the routing daemon.
1420 	 */
1421 	if (pim_assert && rt->mfc_ttls[vifi] &&
1422 		(ifp->if_flags & IFF_BROADCAST) &&
1423 		!(viftable[vifi].v_flags & VIFF_TUNNEL)) {
1424 	    struct sockaddr_in k_igmpsrc;
1425 	    struct mbuf *mm;
1426 	    struct igmpmsg *im;
1427 	    int hlen = ip->ip_hl << 2;
1428 	    struct timeval now;
1429 	    register u_long delta;
1430 
1431 	    GET_TIME(now);
1432 
1433 	    TV_DELTA(rt->mfc_last_assert, now, delta);
1434 
1435 	    if (delta > ASSERT_MSG_TIME) {
1436 		mm = m_copy(m, 0, hlen);
1437 		if (mm && (M_HASCL(mm) || mm->m_len < hlen))
1438 		    mm = m_pullup(mm, hlen);
1439 		if (mm == NULL) {
1440 		    return ENOBUFS;
1441 		}
1442 
1443 		rt->mfc_last_assert = now;
1444 
1445 		im = mtod(mm, struct igmpmsg *);
1446 		im->im_msgtype	= IGMPMSG_WRONGVIF;
1447 		im->im_mbz		= 0;
1448 		im->im_vif		= vifi;
1449 
1450 		k_igmpsrc.sin_addr = im->im_src;
1451 
1452 		socket_send(ip_mrouter, mm, &k_igmpsrc);
1453 	    }
1454 	}
1455 	return 0;
1456     }
1457 
1458     /* If I sourced this packet, it counts as output, else it was input. */
1459     if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) {
1460 	viftable[vifi].v_pkt_out++;
1461 	viftable[vifi].v_bytes_out += plen;
1462     } else {
1463 	viftable[vifi].v_pkt_in++;
1464 	viftable[vifi].v_bytes_in += plen;
1465     }
1466     rt->mfc_pkt_cnt++;
1467     rt->mfc_byte_cnt += plen;
1468 
1469     /*
1470      * For each vif, decide if a copy of the packet should be forwarded.
1471      * Forward if:
1472      *		- the ttl exceeds the vif's threshold
1473      *		- there are group members downstream on interface
1474      */
1475     for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
1476 	if ((rt->mfc_ttls[vifi] > 0) &&
1477 	    (ip->ip_ttl > rt->mfc_ttls[vifi])) {
1478 	    vifp->v_pkt_out++;
1479 	    vifp->v_bytes_out += plen;
1480 	    MC_SEND(ip, vifp, m);
1481 	}
1482 
1483     return 0;
1484 }
1485 
1486 /*
1487  * check if a vif number is legal/ok. This is used by ip_output, to export
1488  * numvifs there,
1489  */
1490 static int
1491 X_legal_vif_num(vif)
1492     int vif;
1493 {
1494     if (vif >= 0 && vif < numvifs)
1495        return(1);
1496     else
1497        return(0);
1498 }
1499 
1500 #ifndef MROUTE_LKM
1501 int (*legal_vif_num)(int) = X_legal_vif_num;
1502 #endif
1503 
1504 /*
1505  * Return the local address used by this vif
1506  */
1507 static u_long
1508 X_ip_mcast_src(vifi)
1509     int vifi;
1510 {
1511     if (vifi >= 0 && vifi < numvifs)
1512 	return viftable[vifi].v_lcl_addr.s_addr;
1513     else
1514 	return INADDR_ANY;
1515 }
1516 
1517 #ifndef MROUTE_LKM
1518 u_long (*ip_mcast_src)(int) = X_ip_mcast_src;
1519 #endif
1520 
1521 static void
1522 phyint_send(ip, vifp, m)
1523     struct ip *ip;
1524     struct vif *vifp;
1525     struct mbuf *m;
1526 {
1527     register struct mbuf *mb_copy;
1528     register int hlen = ip->ip_hl << 2;
1529 
1530     /*
1531      * Make a new reference to the packet; make sure that
1532      * the IP header is actually copied, not just referenced,
1533      * so that ip_output() only scribbles on the copy.
1534      */
1535     mb_copy = m_copy(m, 0, M_COPYALL);
1536     if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen))
1537 	mb_copy = m_pullup(mb_copy, hlen);
1538     if (mb_copy == NULL)
1539 	return;
1540 
1541     if (vifp->v_rate_limit == 0)
1542 	tbf_send_packet(vifp, mb_copy);
1543     else
1544 	tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len);
1545 }
1546 
1547 static void
1548 encap_send(ip, vifp, m)
1549     register struct ip *ip;
1550     register struct vif *vifp;
1551     register struct mbuf *m;
1552 {
1553     register struct mbuf *mb_copy;
1554     register struct ip *ip_copy;
1555     register int i, len = ip->ip_len;
1556 
1557     /*
1558      * copy the old packet & pullup its IP header into the
1559      * new mbuf so we can modify it.  Try to fill the new
1560      * mbuf since if we don't the ethernet driver will.
1561      */
1562     MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER);
1563     if (mb_copy == NULL)
1564 	return;
1565     mb_copy->m_data += max_linkhdr;
1566     mb_copy->m_len = sizeof(multicast_encap_iphdr);
1567 
1568     if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
1569 	m_freem(mb_copy);
1570 	return;
1571     }
1572     i = MHLEN - M_LEADINGSPACE(mb_copy);
1573     if (i > len)
1574 	i = len;
1575     mb_copy = m_pullup(mb_copy, i);
1576     if (mb_copy == NULL)
1577 	return;
1578     mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr);
1579 
1580     /*
1581      * fill in the encapsulating IP header.
1582      */
1583     ip_copy = mtod(mb_copy, struct ip *);
1584     *ip_copy = multicast_encap_iphdr;
1585 #ifdef RANDOM_IP_ID
1586     ip_copy->ip_id = ip_randomid();
1587 #else
1588     ip_copy->ip_id = htons(ip_id++);
1589 #endif
1590     ip_copy->ip_len += len;
1591     ip_copy->ip_src = vifp->v_lcl_addr;
1592     ip_copy->ip_dst = vifp->v_rmt_addr;
1593 
1594     /*
1595      * turn the encapsulated IP header back into a valid one.
1596      */
1597     ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
1598     --ip->ip_ttl;
1599     HTONS(ip->ip_len);
1600     HTONS(ip->ip_off);
1601     ip->ip_sum = 0;
1602     mb_copy->m_data += sizeof(multicast_encap_iphdr);
1603     ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
1604     mb_copy->m_data -= sizeof(multicast_encap_iphdr);
1605 
1606     if (vifp->v_rate_limit == 0)
1607 	tbf_send_packet(vifp, mb_copy);
1608     else
1609 	tbf_control(vifp, mb_copy, ip, ip_copy->ip_len);
1610 }
1611 
1612 /*
1613  * De-encapsulate a packet and feed it back through ip input (this
1614  * routine is called whenever IP gets a packet with proto type
1615  * ENCAP_PROTO and a local destination address).
1616  */
1617 void
1618 #ifdef MROUTE_LKM
1619 X_ipip_input(m, off, proto)
1620 #else
1621 ipip_input(m, off, proto)
1622 #endif
1623 	register struct mbuf *m;
1624 	int off;
1625 	int proto;
1626 {
1627     struct ifnet *ifp = m->m_pkthdr.rcvif;
1628     register struct ip *ip = mtod(m, struct ip *);
1629     register int hlen = ip->ip_hl << 2;
1630     register struct vif *vifp;
1631 
1632     if (!have_encap_tunnel) {
1633 	    rip_input(m, off, proto);
1634 	    return;
1635     }
1636     /*
1637      * dump the packet if it's not to a multicast destination or if
1638      * we don't have an encapsulating tunnel with the source.
1639      * Note:  This code assumes that the remote site IP address
1640      * uniquely identifies the tunnel (i.e., that this site has
1641      * at most one tunnel with the remote site).
1642      */
1643     if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) {
1644 	++mrtstat.mrts_bad_tunnel;
1645 	m_freem(m);
1646 	return;
1647     }
1648     if (ip->ip_src.s_addr != last_encap_src) {
1649 	register struct vif *vife;
1650 
1651 	vifp = viftable;
1652 	vife = vifp + numvifs;
1653 	last_encap_src = ip->ip_src.s_addr;
1654 	last_encap_vif = 0;
1655 	for ( ; vifp < vife; ++vifp)
1656 	    if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) {
1657 		if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT))
1658 		    == VIFF_TUNNEL)
1659 		    last_encap_vif = vifp;
1660 		break;
1661 	    }
1662     }
1663     if ((vifp = last_encap_vif) == 0) {
1664 	last_encap_src = 0;
1665 	mrtstat.mrts_cant_tunnel++; /*XXX*/
1666 	m_freem(m);
1667 	if (mrtdebug)
1668 	  log(LOG_DEBUG, "ip_mforward: no tunnel with %lx\n",
1669 		(u_long)ntohl(ip->ip_src.s_addr));
1670 	return;
1671     }
1672     ifp = vifp->v_ifp;
1673 
1674     if (hlen > IP_HDR_LEN)
1675       ip_stripoptions(m, (struct mbuf *) 0);
1676     m->m_data += IP_HDR_LEN;
1677     m->m_len -= IP_HDR_LEN;
1678     m->m_pkthdr.len -= IP_HDR_LEN;
1679     m->m_pkthdr.rcvif = ifp;
1680 
1681     (void) IF_HANDOFF(&ipintrq, m, NULL);
1682 	/*
1683 	 * normally we would need a "schednetisr(NETISR_IP)"
1684 	 * here but we were called by ip_input and it is going
1685 	 * to loop back & try to dequeue the packet we just
1686 	 * queued as soon as we return so we avoid the
1687 	 * unnecessary software interrrupt.
1688 	 */
1689 }
1690 
1691 /*
1692  * Token bucket filter module
1693  */
1694 
1695 static void
1696 tbf_control(vifp, m, ip, p_len)
1697 	register struct vif *vifp;
1698 	register struct mbuf *m;
1699 	register struct ip *ip;
1700 	register u_long p_len;
1701 {
1702     register struct tbf *t = vifp->v_tbf;
1703 
1704     if (p_len > MAX_BKT_SIZE) {
1705 	/* drop if packet is too large */
1706 	mrtstat.mrts_pkt2large++;
1707 	m_freem(m);
1708 	return;
1709     }
1710 
1711     tbf_update_tokens(vifp);
1712 
1713     /* if there are enough tokens,
1714      * and the queue is empty,
1715      * send this packet out
1716      */
1717 
1718     if (t->tbf_q_len == 0) {
1719 	/* queue empty, send packet if enough tokens */
1720 	if (p_len <= t->tbf_n_tok) {
1721 	    t->tbf_n_tok -= p_len;
1722 	    tbf_send_packet(vifp, m);
1723 	} else {
1724 	    /* queue packet and timeout till later */
1725 	    tbf_queue(vifp, m);
1726 	    timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS);
1727 	}
1728     } else if (t->tbf_q_len < t->tbf_max_q_len) {
1729 	/* finite queue length, so queue pkts and process queue */
1730 	tbf_queue(vifp, m);
1731 	tbf_process_q(vifp);
1732     } else {
1733 	/* queue length too much, try to dq and queue and process */
1734 	if (!tbf_dq_sel(vifp, ip)) {
1735 	    mrtstat.mrts_q_overflow++;
1736 	    m_freem(m);
1737 	    return;
1738 	} else {
1739 	    tbf_queue(vifp, m);
1740 	    tbf_process_q(vifp);
1741 	}
1742     }
1743     return;
1744 }
1745 
1746 /*
1747  * adds a packet to the queue at the interface
1748  */
1749 static void
1750 tbf_queue(vifp, m)
1751 	register struct vif *vifp;
1752 	register struct mbuf *m;
1753 {
1754     register int s = splnet();
1755     register struct tbf *t = vifp->v_tbf;
1756 
1757     if (t->tbf_t == NULL) {
1758 	/* Queue was empty */
1759 	t->tbf_q = m;
1760     } else {
1761 	/* Insert at tail */
1762 	t->tbf_t->m_act = m;
1763     }
1764 
1765     /* Set new tail pointer */
1766     t->tbf_t = m;
1767 
1768 #ifdef DIAGNOSTIC
1769     /* Make sure we didn't get fed a bogus mbuf */
1770     if (m->m_act)
1771 	panic("tbf_queue: m_act");
1772 #endif
1773     m->m_act = NULL;
1774 
1775     t->tbf_q_len++;
1776 
1777     splx(s);
1778 }
1779 
1780 
1781 /*
1782  * processes the queue at the interface
1783  */
1784 static void
1785 tbf_process_q(vifp)
1786     register struct vif *vifp;
1787 {
1788     register struct mbuf *m;
1789     register int len;
1790     register int s = splnet();
1791     register struct tbf *t = vifp->v_tbf;
1792 
1793     /* loop through the queue at the interface and send as many packets
1794      * as possible
1795      */
1796     while (t->tbf_q_len > 0) {
1797 	m = t->tbf_q;
1798 
1799 	len = mtod(m, struct ip *)->ip_len;
1800 
1801 	/* determine if the packet can be sent */
1802 	if (len <= t->tbf_n_tok) {
1803 	    /* if so,
1804 	     * reduce no of tokens, dequeue the packet,
1805 	     * send the packet.
1806 	     */
1807 	    t->tbf_n_tok -= len;
1808 
1809 	    t->tbf_q = m->m_act;
1810 	    if (--t->tbf_q_len == 0)
1811 		t->tbf_t = NULL;
1812 
1813 	    m->m_act = NULL;
1814 	    tbf_send_packet(vifp, m);
1815 
1816 	} else break;
1817     }
1818     splx(s);
1819 }
1820 
1821 static void
1822 tbf_reprocess_q(xvifp)
1823 	void *xvifp;
1824 {
1825     register struct vif *vifp = xvifp;
1826     if (ip_mrouter == NULL)
1827 	return;
1828 
1829     tbf_update_tokens(vifp);
1830 
1831     tbf_process_q(vifp);
1832 
1833     if (vifp->v_tbf->tbf_q_len)
1834 	timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS);
1835 }
1836 
1837 /* function that will selectively discard a member of the queue
1838  * based on the precedence value and the priority
1839  */
1840 static int
1841 tbf_dq_sel(vifp, ip)
1842     register struct vif *vifp;
1843     register struct ip *ip;
1844 {
1845     register int s = splnet();
1846     register u_int p;
1847     register struct mbuf *m, *last;
1848     register struct mbuf **np;
1849     register struct tbf *t = vifp->v_tbf;
1850 
1851     p = priority(vifp, ip);
1852 
1853     np = &t->tbf_q;
1854     last = NULL;
1855     while ((m = *np) != NULL) {
1856 	if (p > priority(vifp, mtod(m, struct ip *))) {
1857 	    *np = m->m_act;
1858 	    /* If we're removing the last packet, fix the tail pointer */
1859 	    if (m == t->tbf_t)
1860 		t->tbf_t = last;
1861 	    m_freem(m);
1862 	    /* it's impossible for the queue to be empty, but
1863 	     * we check anyway. */
1864 	    if (--t->tbf_q_len == 0)
1865 		t->tbf_t = NULL;
1866 	    splx(s);
1867 	    mrtstat.mrts_drop_sel++;
1868 	    return(1);
1869 	}
1870 	np = &m->m_act;
1871 	last = m;
1872     }
1873     splx(s);
1874     return(0);
1875 }
1876 
1877 static void
1878 tbf_send_packet(vifp, m)
1879     register struct vif *vifp;
1880     register struct mbuf *m;
1881 {
1882     struct ip_moptions imo;
1883     int error;
1884     static struct route ro;
1885     int s = splnet();
1886 
1887     if (vifp->v_flags & VIFF_TUNNEL) {
1888 	/* If tunnel options */
1889 	ip_output(m, (struct mbuf *)0, &vifp->v_route,
1890 		  IP_FORWARDING, (struct ip_moptions *)0);
1891     } else {
1892 	imo.imo_multicast_ifp  = vifp->v_ifp;
1893 	imo.imo_multicast_ttl  = mtod(m, struct ip *)->ip_ttl - 1;
1894 	imo.imo_multicast_loop = 1;
1895 	imo.imo_multicast_vif  = -1;
1896 
1897 	/*
1898 	 * Re-entrancy should not be a problem here, because
1899 	 * the packets that we send out and are looped back at us
1900 	 * should get rejected because they appear to come from
1901 	 * the loopback interface, thus preventing looping.
1902 	 */
1903 	error = ip_output(m, (struct mbuf *)0, &ro,
1904 			  IP_FORWARDING, &imo);
1905 
1906 	if (mrtdebug & DEBUG_XMIT)
1907 	    log(LOG_DEBUG, "phyint_send on vif %d err %d\n",
1908 		vifp - viftable, error);
1909     }
1910     splx(s);
1911 }
1912 
1913 /* determine the current time and then
1914  * the elapsed time (between the last time and time now)
1915  * in milliseconds & update the no. of tokens in the bucket
1916  */
1917 static void
1918 tbf_update_tokens(vifp)
1919     register struct vif *vifp;
1920 {
1921     struct timeval tp;
1922     register u_long tm;
1923     register int s = splnet();
1924     register struct tbf *t = vifp->v_tbf;
1925 
1926     GET_TIME(tp);
1927 
1928     TV_DELTA(tp, t->tbf_last_pkt_t, tm);
1929 
1930     /*
1931      * This formula is actually
1932      * "time in seconds" * "bytes/second".
1933      *
1934      * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8)
1935      *
1936      * The (1000/1024) was introduced in add_vif to optimize
1937      * this divide into a shift.
1938      */
1939     t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8;
1940     t->tbf_last_pkt_t = tp;
1941 
1942     if (t->tbf_n_tok > MAX_BKT_SIZE)
1943 	t->tbf_n_tok = MAX_BKT_SIZE;
1944 
1945     splx(s);
1946 }
1947 
1948 static int
1949 priority(vifp, ip)
1950     register struct vif *vifp;
1951     register struct ip *ip;
1952 {
1953     register int prio;
1954 
1955     /* temporary hack; may add general packet classifier some day */
1956 
1957     /*
1958      * The UDP port space is divided up into four priority ranges:
1959      * [0, 16384)     : unclassified - lowest priority
1960      * [16384, 32768) : audio - highest priority
1961      * [32768, 49152) : whiteboard - medium priority
1962      * [49152, 65536) : video - low priority
1963      */
1964     if (ip->ip_p == IPPROTO_UDP) {
1965 	struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
1966 	switch (ntohs(udp->uh_dport) & 0xc000) {
1967 	    case 0x4000:
1968 		prio = 70;
1969 		break;
1970 	    case 0x8000:
1971 		prio = 60;
1972 		break;
1973 	    case 0xc000:
1974 		prio = 55;
1975 		break;
1976 	    default:
1977 		prio = 50;
1978 		break;
1979 	}
1980 	if (tbfdebug > 1)
1981 		log(LOG_DEBUG, "port %x prio%d\n", ntohs(udp->uh_dport), prio);
1982     } else {
1983 	    prio = 50;
1984     }
1985     return prio;
1986 }
1987 
1988 /*
1989  * End of token bucket filter modifications
1990  */
1991 
1992 int
1993 ip_rsvp_vif_init(so, sopt)
1994 	struct socket *so;
1995 	struct sockopt *sopt;
1996 {
1997     int error, i, s;
1998 
1999     if (rsvpdebug)
2000 	printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n",
2001 	       so->so_type, so->so_proto->pr_protocol);
2002 
2003     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
2004 	return EOPNOTSUPP;
2005 
2006     /* Check mbuf. */
2007     error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
2008     if (error)
2009 	    return (error);
2010 
2011     if (rsvpdebug)
2012 	printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n", i, rsvp_on);
2013 
2014     s = splnet();
2015 
2016     /* Check vif. */
2017     if (!legal_vif_num(i)) {
2018 	splx(s);
2019 	return EADDRNOTAVAIL;
2020     }
2021 
2022     /* Check if socket is available. */
2023     if (viftable[i].v_rsvpd != NULL) {
2024 	splx(s);
2025 	return EADDRINUSE;
2026     }
2027 
2028     viftable[i].v_rsvpd = so;
2029     /* This may seem silly, but we need to be sure we don't over-increment
2030      * the RSVP counter, in case something slips up.
2031      */
2032     if (!viftable[i].v_rsvp_on) {
2033 	viftable[i].v_rsvp_on = 1;
2034 	rsvp_on++;
2035     }
2036 
2037     splx(s);
2038     return 0;
2039 }
2040 
2041 int
2042 ip_rsvp_vif_done(so, sopt)
2043 	struct socket *so;
2044 	struct sockopt *sopt;
2045 {
2046 	int error, i, s;
2047 
2048 	if (rsvpdebug)
2049 		printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n",
2050 		       so->so_type, so->so_proto->pr_protocol);
2051 
2052 	if (so->so_type != SOCK_RAW ||
2053 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
2054 		return EOPNOTSUPP;
2055 
2056 	error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
2057 	if (error)
2058 		return (error);
2059 
2060 	s = splnet();
2061 
2062 	/* Check vif. */
2063 	if (!legal_vif_num(i)) {
2064 		splx(s);
2065 		return EADDRNOTAVAIL;
2066 	}
2067 
2068 	if (rsvpdebug)
2069 		printf("ip_rsvp_vif_done: v_rsvpd = %p so = %p\n",
2070 		       viftable[i].v_rsvpd, so);
2071 
2072 	viftable[i].v_rsvpd = NULL;
2073 	/*
2074 	 * This may seem silly, but we need to be sure we don't over-decrement
2075 	 * the RSVP counter, in case something slips up.
2076 	 */
2077 	if (viftable[i].v_rsvp_on) {
2078 		viftable[i].v_rsvp_on = 0;
2079 		rsvp_on--;
2080 	}
2081 
2082 	splx(s);
2083 	return 0;
2084 }
2085 
2086 void
2087 ip_rsvp_force_done(so)
2088     struct socket *so;
2089 {
2090     int vifi;
2091     register int s;
2092 
2093     /* Don't bother if it is not the right type of socket. */
2094     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
2095 	return;
2096 
2097     s = splnet();
2098 
2099     /* The socket may be attached to more than one vif...this
2100      * is perfectly legal.
2101      */
2102     for (vifi = 0; vifi < numvifs; vifi++) {
2103 	if (viftable[vifi].v_rsvpd == so) {
2104 	    viftable[vifi].v_rsvpd = NULL;
2105 	    /* This may seem silly, but we need to be sure we don't
2106 	     * over-decrement the RSVP counter, in case something slips up.
2107 	     */
2108 	    if (viftable[vifi].v_rsvp_on) {
2109 		viftable[vifi].v_rsvp_on = 0;
2110 		rsvp_on--;
2111 	    }
2112 	}
2113     }
2114 
2115     splx(s);
2116     return;
2117 }
2118 
2119 void
2120 rsvp_input(m, off, proto)
2121 	struct mbuf *m;
2122 	int off;
2123 	int proto;
2124 {
2125     int vifi;
2126     register struct ip *ip = mtod(m, struct ip *);
2127     static struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET };
2128     register int s;
2129     struct ifnet *ifp;
2130 
2131     if (rsvpdebug)
2132 	printf("rsvp_input: rsvp_on %d\n",rsvp_on);
2133 
2134     /* Can still get packets with rsvp_on = 0 if there is a local member
2135      * of the group to which the RSVP packet is addressed.  But in this
2136      * case we want to throw the packet away.
2137      */
2138     if (!rsvp_on) {
2139 	m_freem(m);
2140 	return;
2141     }
2142 
2143     s = splnet();
2144 
2145     if (rsvpdebug)
2146 	printf("rsvp_input: check vifs\n");
2147 
2148 #ifdef DIAGNOSTIC
2149     if (!(m->m_flags & M_PKTHDR))
2150 	    panic("rsvp_input no hdr");
2151 #endif
2152 
2153     ifp = m->m_pkthdr.rcvif;
2154     /* Find which vif the packet arrived on. */
2155     for (vifi = 0; vifi < numvifs; vifi++)
2156 	if (viftable[vifi].v_ifp == ifp)
2157 	    break;
2158 
2159     if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) {
2160 	/*
2161 	 * If the old-style non-vif-associated socket is set,
2162 	 * then use it.  Otherwise, drop packet since there
2163 	 * is no specific socket for this vif.
2164 	 */
2165 	if (ip_rsvpd != NULL) {
2166 	    if (rsvpdebug)
2167 		printf("rsvp_input: Sending packet up old-style socket\n");
2168 	    rip_input(m, off, proto);  /* xxx */
2169 	} else {
2170 	    if (rsvpdebug && vifi == numvifs)
2171 		printf("rsvp_input: Can't find vif for packet.\n");
2172 	    else if (rsvpdebug && viftable[vifi].v_rsvpd == NULL)
2173 		printf("rsvp_input: No socket defined for vif %d\n",vifi);
2174 	    m_freem(m);
2175 	}
2176 	splx(s);
2177 	return;
2178     }
2179     rsvp_src.sin_addr = ip->ip_src;
2180 
2181     if (rsvpdebug && m)
2182 	printf("rsvp_input: m->m_len = %d, sbspace() = %ld\n",
2183 	       m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv)));
2184 
2185     if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) {
2186 	if (rsvpdebug)
2187 	    printf("rsvp_input: Failed to append to socket\n");
2188     } else {
2189 	if (rsvpdebug)
2190 	    printf("rsvp_input: send packet up\n");
2191     }
2192 
2193     splx(s);
2194 }
2195 
2196 #ifdef MROUTE_LKM
2197 #include <sys/conf.h>
2198 #include <sys/exec.h>
2199 #include <sys/sysent.h>
2200 #include <sys/lkm.h>
2201 
2202 MOD_MISC("ip_mroute_mod")
2203 
2204 static int
2205 ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd)
2206 {
2207 	int i;
2208 	struct lkm_misc	*args = lkmtp->private.lkm_misc;
2209 	int err = 0;
2210 
2211 	switch(cmd) {
2212 		static int (*old_ip_mrouter_cmd)();
2213 		static int (*old_ip_mrouter_done)();
2214 		static int (*old_ip_mforward)();
2215 		static int (*old_mrt_ioctl)();
2216 		static void (*old_proto4_input)();
2217 		static int (*old_legal_vif_num)();
2218 		extern struct protosw inetsw[];
2219 
2220 	case LKM_E_LOAD:
2221 		if(lkmexists(lkmtp) || ip_mrtproto)
2222 		  return(EEXIST);
2223 		old_ip_mrouter_cmd = ip_mrouter_cmd;
2224 		ip_mrouter_cmd = X_ip_mrouter_cmd;
2225 		old_ip_mrouter_done = ip_mrouter_done;
2226 		ip_mrouter_done = X_ip_mrouter_done;
2227 		old_ip_mforward = ip_mforward;
2228 		ip_mforward = X_ip_mforward;
2229 		old_mrt_ioctl = mrt_ioctl;
2230 		mrt_ioctl = X_mrt_ioctl;
2231               old_proto4_input = inetsw[ip_protox[ENCAP_PROTO]].pr_input;
2232               inetsw[ip_protox[ENCAP_PROTO]].pr_input = X_ipip_input;
2233 		old_legal_vif_num = legal_vif_num;
2234 		legal_vif_num = X_legal_vif_num;
2235 		ip_mrtproto = IGMP_DVMRP;
2236 
2237 		printf("\nIP multicast routing loaded\n");
2238 		break;
2239 
2240 	case LKM_E_UNLOAD:
2241 		if (ip_mrouter)
2242 		  return EINVAL;
2243 
2244 		ip_mrouter_cmd = old_ip_mrouter_cmd;
2245 		ip_mrouter_done = old_ip_mrouter_done;
2246 		ip_mforward = old_ip_mforward;
2247 		mrt_ioctl = old_mrt_ioctl;
2248               inetsw[ip_protox[ENCAP_PROTO]].pr_input = old_proto4_input;
2249 		legal_vif_num = old_legal_vif_num;
2250 		ip_mrtproto = 0;
2251 		break;
2252 
2253 	default:
2254 		err = EINVAL;
2255 		break;
2256 	}
2257 
2258 	return(err);
2259 }
2260 
2261 int
2262 ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) {
2263 	DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle,
2264 		 nosys);
2265 }
2266 
2267 #endif /* MROUTE_LKM */
2268 #endif /* MROUTING */
2269