xref: /freebsd/sys/net/route/nhop.h (revision aa1207ea4f030c50a91bca6a3df950ca25113d5a)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * This header file contains public definitions for the nexthop routing subsystem.
30  */
31 
32 #ifndef	_NET_ROUTE_NHOP_H_
33 #define	_NET_ROUTE_NHOP_H_
34 
35 #include <netinet/in.h>			/* sockaddr_in && sockaddr_in6 */
36 
37 #include <sys/counter.h>
38 
39 enum nhop_type {
40 	NH_TYPE_IPV4_ETHER_RSLV = 1,	/* IPv4 ethernet without GW */
41 	NH_TYPE_IPV4_ETHER_NHOP = 2,	/* IPv4 with pre-calculated ethernet encap */
42 	NH_TYPE_IPV6_ETHER_RSLV = 3,	/* IPv6 ethernet, without GW */
43 	NH_TYPE_IPV6_ETHER_NHOP = 4	/* IPv6 with pre-calculated ethernet encap*/
44 };
45 
46 #ifdef _KERNEL
47 
48 /*
49  * Define shorter version of AF_LINK sockaddr.
50  *
51  * Currently the only use case of AF_LINK gateway is storing
52  * interface index of the interface of the source IPv6 address.
53  * This is used by the IPv6 code for the connections over loopback
54  * interface.
55  *
56  * The structure below copies 'struct sockaddr_dl', reducing the
57  * size of sdl_data buffer, as it is not used. This change
58  * allows to store the AF_LINK gateways in the nhop gateway itself,
59  * simplifying control plane handling.
60  */
61 struct sockaddr_dl_short {
62 	u_char	sdl_len;	/* Total length of sockaddr */
63 	u_char	sdl_family;	/* AF_LINK */
64 	u_short	sdl_index;	/* if != 0, system given index for interface */
65 	u_char	sdl_type;	/* interface type */
66 	u_char	sdl_nlen;	/* interface name length, no trailing 0 reqd. */
67 	u_char	sdl_alen;	/* link level address length */
68 	u_char	sdl_slen;	/* link layer selector length */
69 	char	sdl_data[8];	/* unused */
70 };
71 
72 #define	NHOP_RELATED_FLAGS	\
73 	(RTF_GATEWAY | RTF_HOST | RTF_REJECT | RTF_BLACKHOLE | \
74 	 RTF_FIXEDMTU | RTF_LOCAL | RTF_BROADCAST | RTF_MULTICAST)
75 
76 struct nh_control;
77 struct nhop_priv;
78 
79 /*
80  * Struct 'nhop_object' field description:
81  *
82  * nh_flags: NHF_ flags used in the dataplane code. NHF_GATEWAY or NHF_BLACKHOLE
83  *   can be examples of such flags.
84  * nh_mtu: ready-to-use nexthop mtu. Already accounts for the link-level header,
85  *   interface MTU and protocol-specific limitations.
86  * nh_prepend_len: link-level prepend length. Currently unused.
87  * nh_ifp: logical transmit interface. The one from which if_transmit() will be
88  *   called. Guaranteed to be non-NULL.
89  * nh_aifp: ifnet of the source address. Same as nh_ifp except IPv6 loopback
90  *   routes. See the example below.
91  * nh_ifa: interface address to use. Guaranteed to be non-NULL.
92  * nh_pksent: counter(9) reflecting the number of packets transmitted.
93  *
94  * gw_: storage suitable to hold AF_INET, AF_INET6 or AF_LINK gateway. More
95  *   details ara available in the examples below.
96  *
97  * Examples:
98  *
99  * Direct routes (routes w/o gateway):
100  *  NHF_GATEWAY is NOT set.
101  *  nh_ifp denotes the logical transmit interface ().
102  *  nh_aifp is the same as nh_ifp
103  *  gw_sa contains AF_LINK sa with nh_aifp ifindex (compat)
104  * Loopback routes:
105  *  NHF_GATEWAY is NOT set.
106  *  nh_ifp points to the loopback interface (lo0).
107  *  nh_aifp points to the interface where the destination address belongs to.
108  *    This is useful in IPv6 link-local-over-loopback communications.
109  *  gw_sa contains AF_LINK sa with nh_aifp ifindex (compat)
110  * GW routes:
111  *  NHF_GATEWAY is set.
112  *  nh_ifp denotes the logical transmit interface.
113  *  nh_aifp is the same as nh_ifp
114  *  gw_sa contains L3 address (either AF_INET or AF_INET6).
115  *
116  *
117  * Note: struct nhop_object fields are ordered in a way that
118  *  supports memcmp-based comparisons.
119  *
120  */
121 #define	NHOP_END_CMP	(__offsetof(struct nhop_object, nh_pksent))
122 
123 struct nhop_object {
124 	uint16_t		nh_flags;	/* nhop flags */
125 	uint16_t		nh_mtu;		/* nexthop mtu */
126 	union {
127 		struct sockaddr_in		gw4_sa;	/* GW accessor as IPv4 */
128 		struct sockaddr_in6		gw6_sa; /* GW accessor as IPv6 */
129 		struct sockaddr			gw_sa;
130 		struct sockaddr_dl_short	gwl_sa; /* AF_LINK gw (compat) */
131 		char				gw_buf[28];
132 	};
133 	struct ifnet		*nh_ifp;	/* Logical egress interface. Always != NULL */
134 	struct ifaddr		*nh_ifa;	/* interface address to use. Always != NULL */
135 	struct ifnet		*nh_aifp;	/* ifnet of the source address. Always != NULL */
136 	counter_u64_t		nh_pksent;	/* packets sent using this nhop */
137 	/* 32 bytes + 4xPTR == 64(amd64) / 48(i386)  */
138 	uint8_t			nh_prepend_len;	/* length of prepend data */
139 	uint8_t			spare[3];
140 	uint32_t		spare1;		/* alignment */
141 	char			nh_prepend[48];	/* L2 prepend */
142 	struct nhop_priv	*nh_priv;	/* control plane data */
143 	/* -- 128 bytes -- */
144 };
145 
146 /*
147  * Nhop validness.
148  *
149  * Currently we verify whether link is up or not on every packet, which can be
150  *   quite costy.
151  * TODO: subscribe for the interface notifications and update the nexthops
152  *  with NHF_INVALID flag.
153  */
154 
155 #define	NH_IS_VALID(_nh)	RT_LINK_IS_UP((_nh)->nh_ifp)
156 #define	NH_IS_NHGRP(_nh)	((_nh)->nh_flags & NHF_MULTIPATH)
157 
158 #define	NH_FREE(_nh) do {					\
159 	nhop_free(_nh);	\
160 	/* guard against invalid refs */			\
161 	_nh = NULL;						\
162 } while (0)
163 
164 struct weightened_nhop {
165 	struct nhop_object	*nh;
166 	uint32_t		weight;
167 	uint32_t		storage;
168 };
169 
170 void nhop_free(struct nhop_object *nh);
171 
172 struct sysctl_req;
173 struct sockaddr_dl;
174 struct rib_head;
175 
176 /* flags that can be set using nhop_set_rtflags() */
177 #define	RT_SET_RTFLAGS_MASK     (RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_STATIC)
178 #define	RT_CHANGE_RTFLAGS_MASK	RT_SET_RTFLAGS_MASK
179 
180 struct nhop_object *nhop_alloc(uint32_t fibnum, int family);
181 void nhop_copy(struct nhop_object *nh, const struct nhop_object *nh_orig);
182 struct nhop_object *nhop_get_nhop(struct nhop_object *nh, int *perror);
183 int nhop_get_unlinked(struct nhop_object *nh);
184 
185 void nhop_set_direct_gw(struct nhop_object *nh, struct ifnet *ifp);
186 bool nhop_set_gw(struct nhop_object *nh, const struct sockaddr *sa, bool is_gw);
187 
188 
189 void nhop_set_mtu(struct nhop_object *nh, uint32_t mtu, bool from_user);
190 void nhop_set_rtflags(struct nhop_object *nh, int rt_flags);
191 void nhop_set_pxtype_flag(struct nhop_object *nh, int nh_flag);
192 void nhop_set_broadcast(struct nhop_object *nh, bool is_broadcast);
193 void nhop_set_blackhole(struct nhop_object *nh, int blackhole_rt_flag);
194 void nhop_set_pinned(struct nhop_object *nh, bool is_pinned);
195 void nhop_set_redirect(struct nhop_object *nh, bool is_redirect);
196 void nhop_set_type(struct nhop_object *nh, enum nhop_type nh_type);
197 void nhop_set_src(struct nhop_object *nh, struct ifaddr *ifa);
198 void nhop_set_transmit_ifp(struct nhop_object *nh, struct ifnet *ifp);
199 
200 #define	NH_ORIGIN_UNSPEC	0 /* not set */
201 #define	NH_ORIGIN_REDIRECT	1 /* kernel-originated redirect */
202 #define	NH_ORIGIN_KERNEL	2 /* kernel (interface) routes */
203 #define	NH_ORIGIN_BOOT		3 /* kernel-originated routes at boot */
204 #define	NH_ORIGIN_STATIC	4 /* route(8) routes */
205 void nhop_set_origin(struct nhop_object *nh, uint8_t origin);
206 uint8_t nhop_get_origin(const struct nhop_object *nh);
207 
208 uint32_t nhop_get_idx(const struct nhop_object *nh);
209 uint32_t nhop_get_uidx(const struct nhop_object *nh);
210 void nhop_set_uidx(struct nhop_object *nh, uint32_t uidx);
211 enum nhop_type nhop_get_type(const struct nhop_object *nh);
212 int nhop_get_rtflags(const struct nhop_object *nh);
213 struct vnet *nhop_get_vnet(const struct nhop_object *nh);
214 struct nhop_object *nhop_select_func(struct nhop_object *nh, uint32_t flowid);
215 int nhop_get_upper_family(const struct nhop_object *nh);
216 bool nhop_set_upper_family(struct nhop_object *nh, int family);
217 int nhop_get_neigh_family(const struct nhop_object *nh);
218 uint32_t nhop_get_fibnum(const struct nhop_object *nh);
219 void nhop_set_fibnum(struct nhop_object *nh, uint32_t fibnum);
220 uint32_t nhop_get_expire(const struct nhop_object *nh);
221 void nhop_set_expire(struct nhop_object *nh, uint32_t expire);
222 struct rib_head *nhop_get_rh(const struct nhop_object *nh);
223 
224 struct nhgrp_object;
225 struct nhgrp_object *nhgrp_alloc(uint32_t fibnum, int family,
226     struct weightened_nhop *wn, int num_nhops, int *perror);
227 struct nhgrp_object *nhgrp_get_nhgrp(struct nhgrp_object *nhg, int *perror);
228 void nhgrp_set_uidx(struct nhgrp_object *nhg, uint32_t uidx);
229 uint32_t nhgrp_get_uidx(const struct nhgrp_object *nhg);
230 uint8_t nhgrp_get_origin(const struct nhgrp_object *nhg);
231 void nhgrp_set_origin(struct nhgrp_object *nhg, uint8_t origin);
232 #endif /* _KERNEL */
233 
234 /* Kernel <> userland structures */
235 
236 /* Structure usage and layout are described in dump_nhop_entry() */
237 struct nhop_external {
238 	uint32_t	nh_len;		/* length of the datastructure */
239 	uint32_t	nh_idx;		/* Nexthop index */
240 	uint32_t	nh_fib;		/* Fib nexhop is attached to */
241 	uint32_t	ifindex;	/* transmit interface ifindex */
242 	uint32_t	aifindex;	/* address ifindex */
243 	uint8_t		prepend_len;	/* length of the prepend */
244 	uint8_t		nh_family;	/* address family */
245 	uint16_t	nh_type;	/* nexthop type */
246 	uint16_t	nh_mtu;		/* nexthop mtu */
247 
248 	uint16_t	nh_flags;	/* nhop flags */
249 	struct in_addr	nh_addr;	/* GW/DST IPv4 address */
250 	struct in_addr	nh_src;		/* default source IPv4 address */
251 	uint64_t	nh_pksent;
252 	/* control plane */
253 	/* lookup key: address, family, type */
254 	char		nh_prepend[64];	/* L2 prepend */
255 	uint64_t	nh_refcount;	/* number of references */
256 };
257 
258 struct nhop_addrs {
259 	uint32_t	na_len;		/* length of the datastructure */
260 	uint16_t	gw_sa_off;	/* offset of gateway SA */
261 	uint16_t	src_sa_off;	/* offset of src address SA */
262 };
263 
264 #define	NHG_C_TYPE_CNHOPS	0x1	/* Control plane nhops list */
265 #define	NHG_C_TYPE_DNHOPS	0x2	/* Dataplane nhops list */
266 struct nhgrp_container {
267 	uint32_t	nhgc_len;	/* container length */
268 	uint16_t	nhgc_count;	/* number of items */
269 	uint8_t		nhgc_type;	/* container type */
270 	uint8_t		nhgc_subtype;	/* container subtype */
271 };
272 
273 struct nhgrp_nhop_external {
274 	uint32_t	nh_idx;
275 	uint32_t	nh_weight;
276 };
277 
278 /*
279  * Layout:
280  * - nhgrp_external
281  * - nhgrp_container (control plane nhops list)
282  *   - nhgrp_nhop_external
283  *   - nhgrp_nhop_external
284  *   ..
285  * - nhgrp_container (dataplane nhops list)
286  *   - nhgrp_nhop_external
287  *   - nhgrp_nhop_external
288  */
289 struct nhgrp_external {
290 	uint32_t	nhg_idx;	/* Nexthop group index */
291 	uint32_t	nhg_refcount;	/* number of references */
292 };
293 
294 #endif
295