xref: /freebsd/sys/netinet/ip_mroute.h (revision 2e3f49888ec8851bafb22011533217487764fdb0)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989 Stephen Deering.
5  * Copyright (c) 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Stephen Deering of Stanford University.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #ifndef _NETINET_IP_MROUTE_H_
37 #define _NETINET_IP_MROUTE_H_
38 
39 /*
40  * Definitions for IP multicast forwarding.
41  *
42  * Written by David Waitzman, BBN Labs, August 1988.
43  * Modified by Steve Deering, Stanford, February 1989.
44  * Modified by Ajit Thyagarajan, PARC, August 1993.
45  * Modified by Ajit Thyagarajan, PARC, August 1994.
46  * Modified by Ahmed Helmy, SGI, June 1996.
47  * Modified by Pavlin Radoslavov, ICSI, October 2002.
48  *
49  * MROUTING Revision: 3.3.1.3
50  * and PIM-SMv2 and PIM-DM support, advanced API support,
51  * bandwidth metering and signaling.
52  */
53 
54 /*
55  * Multicast Routing set/getsockopt commands.
56  */
57 #define	MRT_INIT	100	/* initialize forwarder */
58 #define	MRT_DONE	101	/* shut down forwarder */
59 #define	MRT_ADD_VIF	102	/* create virtual interface */
60 #define	MRT_DEL_VIF	103	/* delete virtual interface */
61 #define MRT_ADD_MFC	104	/* insert forwarding cache entry */
62 #define MRT_DEL_MFC	105	/* delete forwarding cache entry */
63 #define MRT_VERSION	106	/* get kernel version number */
64 #define MRT_ASSERT      107     /* enable assert processing */
65 #define MRT_PIM		MRT_ASSERT /* enable PIM processing */
66 #define MRT_API_SUPPORT	109	/* supported MRT API */
67 #define MRT_API_CONFIG	110	/* config MRT API */
68 #define MRT_ADD_BW_UPCALL 111	/* create bandwidth monitor */
69 #define MRT_DEL_BW_UPCALL 112	/* delete bandwidth monitor */
70 
71 /*
72  * Types and macros for handling bitmaps with one bit per virtual interface.
73  */
74 #define	MAXVIFS 32
75 typedef u_long vifbitmap_t;
76 typedef u_short vifi_t;		/* type of a vif index */
77 #define ALL_VIFS (vifi_t)-1
78 
79 #define	VIFM_SET(n, m)		((m) |= (1 << (n)))
80 #define	VIFM_CLR(n, m)		((m) &= ~(1 << (n)))
81 #define	VIFM_ISSET(n, m)	((m) & (1 << (n)))
82 #define	VIFM_CLRALL(m)		((m) = 0x00000000)
83 #define	VIFM_COPY(mfrom, mto)	((mto) = (mfrom))
84 #define	VIFM_SAME(m1, m2)	((m1) == (m2))
85 
86 struct mfc;
87 
88 /*
89  * Argument structure for MRT_ADD_VIF.
90  * (MRT_DEL_VIF takes a single vifi_t argument.)
91  */
92 struct vifctl {
93 	vifi_t	vifc_vifi;		/* the index of the vif to be added */
94 	u_char	vifc_flags;		/* VIFF_ flags defined below */
95 	u_char	vifc_threshold;		/* min ttl required to forward on vif */
96 	u_int	vifc_rate_limit;	/* max rate */
97 	struct	in_addr vifc_lcl_addr;	/* local interface address */
98 	struct	in_addr vifc_rmt_addr;	/* remote address (tunnels only) */
99 };
100 
101 #define	VIFF_TUNNEL	0x1		/* no-op; retained for old source */
102 #define VIFF_SRCRT	0x2		/* no-op; retained for old source */
103 #define VIFF_REGISTER	0x4		/* used for PIM Register encap/decap */
104 
105 /*
106  * Argument structure for MRT_ADD_MFC and MRT_DEL_MFC
107  * XXX if you change this, make sure to change struct mfcctl2 as well.
108  */
109 struct mfcctl {
110     struct in_addr  mfcc_origin;		/* ip origin of mcasts       */
111     struct in_addr  mfcc_mcastgrp;		/* multicast group associated*/
112     vifi_t	    mfcc_parent;		/* incoming vif              */
113     u_char	    mfcc_ttls[MAXVIFS];		/* forwarding ttls on vifs   */
114 };
115 
116 /*
117  * The new argument structure for MRT_ADD_MFC and MRT_DEL_MFC overlays
118  * and extends the old struct mfcctl.
119  */
120 struct mfcctl2 {
121 	/* the mfcctl fields */
122 	struct in_addr	mfcc_origin;		/* ip origin of mcasts	     */
123 	struct in_addr	mfcc_mcastgrp;		/* multicast group associated*/
124 	vifi_t		mfcc_parent;		/* incoming vif		     */
125 	u_char		mfcc_ttls[MAXVIFS];	/* forwarding ttls on vifs   */
126 
127 	/* extension fields */
128 	uint8_t		mfcc_flags[MAXVIFS];	/* the MRT_MFC_FLAGS_* flags */
129 	struct in_addr	mfcc_rp;		/* the RP address            */
130 };
131 /*
132  * The advanced-API flags.
133  *
134  * The MRT_MFC_FLAGS_XXX API flags are also used as flags
135  * for the mfcc_flags field.
136  */
137 #define	MRT_MFC_FLAGS_DISABLE_WRONGVIF	(1 << 0) /* disable WRONGVIF signals */
138 #define	MRT_MFC_FLAGS_BORDER_VIF	(1 << 1) /* border vif		     */
139 #define MRT_MFC_RP			(1 << 8) /* enable RP address	     */
140 #define MRT_MFC_BW_UPCALL		(1 << 9) /* enable bw upcalls	     */
141 #define MRT_MFC_FLAGS_ALL		(MRT_MFC_FLAGS_DISABLE_WRONGVIF |    \
142 					 MRT_MFC_FLAGS_BORDER_VIF)
143 #define MRT_API_FLAGS_ALL		(MRT_MFC_FLAGS_ALL |		     \
144 					 MRT_MFC_RP |			     \
145 					 MRT_MFC_BW_UPCALL)
146 
147 /*
148  * Structure for installing or delivering an upcall if the
149  * measured bandwidth is above or below a threshold.
150  *
151  * User programs (e.g. daemons) may have a need to know when the
152  * bandwidth used by some data flow is above or below some threshold.
153  * This interface allows the userland to specify the threshold (in
154  * bytes and/or packets) and the measurement interval. Flows are
155  * all packet with the same source and destination IP address.
156  * At the moment the code is only used for multicast destinations
157  * but there is nothing that prevents its use for unicast.
158  *
159  * The measurement interval cannot be shorter than some Tmin (currently, 3s).
160  * The threshold is set in packets and/or bytes per_interval.
161  *
162  * Measurement works as follows:
163  *
164  * For >= measurements:
165  * The first packet marks the start of a measurement interval.
166  * During an interval we count packets and bytes, and when we
167  * pass the threshold we deliver an upcall and we are done.
168  * The first packet after the end of the interval resets the
169  * count and restarts the measurement.
170  *
171  * For <= measurement:
172  * We start a timer to fire at the end of the interval, and
173  * then for each incoming packet we count packets and bytes.
174  * When the timer fires, we compare the value with the threshold,
175  * schedule an upcall if we are below, and restart the measurement
176  * (reschedule timer and zero counters).
177  */
178 
179 struct bw_data {
180 	struct timeval	b_time;
181 	uint64_t	b_packets;
182 	uint64_t	b_bytes;
183 };
184 
185 struct bw_upcall {
186 	struct in_addr	bu_src;			/* source address            */
187 	struct in_addr	bu_dst;			/* destination address       */
188 	uint32_t	bu_flags;		/* misc flags (see below)    */
189 #define BW_UPCALL_UNIT_PACKETS   (1 << 0)	/* threshold (in packets)    */
190 #define BW_UPCALL_UNIT_BYTES     (1 << 1)	/* threshold (in bytes)      */
191 #define BW_UPCALL_GEQ            (1 << 2)	/* upcall if bw >= threshold */
192 #define BW_UPCALL_LEQ            (1 << 3)	/* upcall if bw <= threshold */
193 #define BW_UPCALL_DELETE_ALL     (1 << 4)	/* delete all upcalls for s,d*/
194 	struct bw_data	bu_threshold;		/* the bw threshold	     */
195 	struct bw_data	bu_measured;		/* the measured bw	     */
196 };
197 
198 /* max. number of upcalls to deliver together */
199 #define BW_UPCALLS_MAX				1024
200 /* min. threshold time interval for bandwidth measurement */
201 #define BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC	3
202 #define BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC	0
203 
204 /*
205  * The kernel's multicast routing statistics.
206  */
207 struct mrtstat {
208 	uint64_t mrts_mfc_lookups;    /* # forw. cache hash table hits   */
209 	uint64_t mrts_mfc_misses;     /* # forw. cache hash table misses */
210 	uint64_t mrts_upcalls;	      /* # calls to multicast routing daemon */
211 	uint64_t mrts_no_route;	      /* no route for packet's origin    */
212 	uint64_t mrts_bad_tunnel;     /* malformed tunnel options        */
213 	uint64_t mrts_cant_tunnel;    /* no room for tunnel options      */
214 	uint64_t mrts_wrong_if;	      /* arrived on wrong interface	 */
215 	uint64_t mrts_upq_ovflw;      /* upcall Q overflow		 */
216 	uint64_t mrts_cache_cleanups; /* # entries with no upcalls	 */
217 	uint64_t mrts_drop_sel;	      /* pkts dropped selectively        */
218 	uint64_t mrts_q_overflow;     /* pkts dropped - Q overflow       */
219 	uint64_t mrts_pkt2large;      /* pkts dropped - size > BKT SIZE  */
220 	uint64_t mrts_upq_sockfull;   /* upcalls dropped - socket full   */
221 };
222 
223 #ifdef _KERNEL
224 #define	MRTSTAT_ADD(name, val)	\
225     VNET_PCPUSTAT_ADD(struct mrtstat, mrtstat, name, (val))
226 #define	MRTSTAT_INC(name)	MRTSTAT_ADD(name, 1)
227 #endif
228 
229 /*
230  * Argument structure used by mrouted to get src-grp pkt counts
231  */
232 struct sioc_sg_req {
233     struct in_addr src;
234     struct in_addr grp;
235     u_long pktcnt;
236     u_long bytecnt;
237     u_long wrong_if;
238 };
239 
240 /*
241  * Argument structure used by mrouted to get vif pkt counts
242  */
243 struct sioc_vif_req {
244     vifi_t vifi;		/* vif number				*/
245     u_long icount;		/* Input packet count on vif		*/
246     u_long ocount;		/* Output packet count on vif		*/
247     u_long ibytes;		/* Input byte count on vif		*/
248     u_long obytes;		/* Output byte count on vif		*/
249 };
250 
251 /*
252  * The kernel's virtual-interface structure.
253  */
254 struct vif {
255     u_char		v_flags;	/* VIFF_ flags defined above         */
256     u_char		v_threshold;	/* min ttl required to forward on vif*/
257     struct in_addr	v_lcl_addr;	/* local interface address           */
258     struct in_addr	v_rmt_addr;	/* remote address (tunnels only)     */
259     struct ifnet       *v_ifp;		/* pointer to interface              */
260     u_long		v_pkt_in;	/* # pkts in on interface            */
261     u_long		v_pkt_out;	/* # pkts out on interface           */
262     u_long		v_bytes_in;	/* # bytes in on interface	     */
263     u_long		v_bytes_out;	/* # bytes out on interface	     */
264 #ifdef _KERNEL
265 #define	MROUTE_VIF_SYSCTL_LEN	__offsetof(struct vif, v_spin)
266     struct mtx		v_spin;		/* Spin mutex for pkt stats          */
267     char		v_spin_name[32];
268 #endif
269 };
270 
271 #if defined(_KERNEL) || defined (_NETSTAT)
272 /*
273  * The kernel's multicast forwarding cache entry structure
274  */
275 struct mfc {
276 	LIST_ENTRY(mfc)	mfc_hash;
277 	struct in_addr	mfc_origin;		/* IP origin of mcasts	     */
278 	struct in_addr  mfc_mcastgrp;		/* multicast group associated*/
279 	vifi_t		mfc_parent;		/* incoming vif              */
280 	u_char		mfc_ttls[MAXVIFS];	/* forwarding ttls on vifs   */
281 	u_long		mfc_pkt_cnt;		/* pkt count for src-grp     */
282 	u_long		mfc_byte_cnt;		/* byte count for src-grp    */
283 	u_long		mfc_wrong_if;		/* wrong if for src-grp	     */
284 	int		mfc_expire;		/* time to clean entry up    */
285 	struct timeval	mfc_last_assert;	/* last time I sent an assert*/
286 	uint8_t		mfc_flags[MAXVIFS];	/* the MRT_MFC_FLAGS_* flags */
287 	struct in_addr	mfc_rp;			/* the RP address	     */
288 	struct bw_meter	*mfc_bw_meter_leq;	/* list of bandwidth meters
289 						   for Lower-or-EQual case   */
290 	struct bw_meter *mfc_bw_meter_geq;	/* list of bandwidth meters
291 						   for Greater-or-EQual case */
292 	struct buf_ring *mfc_stall_ring;	/* ring of awaiting mfc      */
293 };
294 #endif /* _KERNEL */
295 
296 /*
297  * Struct used to communicate from kernel to multicast router
298  * note the convenient similarity to an IP packet
299  */
300 struct igmpmsg {
301     uint32_t	    unused1;
302     uint32_t	    unused2;
303     u_char	    im_msgtype;			/* what type of message	    */
304 #define IGMPMSG_NOCACHE		1	/* no MFC in the kernel		    */
305 #define IGMPMSG_WRONGVIF	2	/* packet came from wrong interface */
306 #define	IGMPMSG_WHOLEPKT	3	/* PIM pkt for user level encap.    */
307 #define	IGMPMSG_BW_UPCALL	4	/* BW monitoring upcall		    */
308     u_char	    im_mbz;			/* must be zero		    */
309     u_char	    im_vif;			/* vif rec'd on		    */
310     u_char	    unused3;
311     struct in_addr  im_src, im_dst;
312 };
313 
314 #ifdef _KERNEL
315 /*
316  * Argument structure used for pkt info. while upcall is made
317  */
318 struct rtdetq {
319     TAILQ_ENTRY(rtdetq)	rte_link;
320     struct mbuf		*m;		/* A copy of the packet		    */
321     struct ifnet	*ifp;		/* Interface pkt came in on	    */
322     vifi_t		xmt_vif;	/* Saved copy of imo_multicast_vif  */
323 };
324 #define MAX_UPQ	4		/* max. no of pkts in upcall Q */
325 #endif /* _KERNEL */
326 
327 /*
328  * Structure for measuring the bandwidth and sending an upcall if the
329  * measured bandwidth is above or below a threshold.
330  */
331 struct bw_meter {
332 	struct bw_meter	*bm_mfc_next;		/* next bw meter (same mfc)  */
333 	struct bw_meter	*bm_time_next;		/* next bw meter (same time) */
334 	struct mfc	*bm_mfc;		/* the corresponding mfc     */
335 	uint32_t	bm_flags;		/* misc flags (see below)    */
336 #define BW_METER_UNIT_PACKETS	(1 << 0)	/* threshold (in packets)    */
337 #define BW_METER_UNIT_BYTES	(1 << 1)	/* threshold (in bytes)      */
338 #define BW_METER_GEQ		(1 << 2)	/* upcall if bw >= threshold */
339 #define BW_METER_LEQ		(1 << 3)	/* upcall if bw <= threshold */
340 #define BW_METER_USER_FLAGS	(BW_METER_UNIT_PACKETS |		\
341 				 BW_METER_UNIT_BYTES |			\
342 				 BW_METER_GEQ |				\
343 				 BW_METER_LEQ)
344 
345 #define BW_METER_UPCALL_DELIVERED (1 << 24)	/* upcall was delivered      */
346 
347 	struct bw_data	bm_threshold;		/* the upcall threshold	     */
348 	struct bw_data	bm_measured;		/* the measured bw	     */
349 	struct timeval	bm_start_time;		/* abs. time		     */
350 #ifdef _KERNEL
351 	struct callout	bm_meter_callout;	/* Periodic callout          */
352 	void*		arg;			/* custom argument           */
353 	struct mtx 	bm_spin;		/* meter spin lock           */
354 	char		bm_spin_name[32];
355 #endif
356 };
357 
358 #ifdef _KERNEL
359 
360 struct sockopt;
361 
362 extern int	(*ip_mrouter_set)(struct socket *, struct sockopt *);
363 extern int	(*ip_mrouter_get)(struct socket *, struct sockopt *);
364 extern int	(*ip_mrouter_done)(void);
365 extern int	(*mrt_ioctl)(u_long, caddr_t, int);
366 
367 #endif /* _KERNEL */
368 
369 #endif /* _NETINET_IP_MROUTE_H_ */
370