xref: /freebsd/sys/netlink/route/rt.c (revision 8b04c1cbfc1cb71a1ce53b3a7855f1d45866fcfb)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2021 Ng Peng Nam Sean
5  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 #include "opt_route.h"
34 #include <sys/types.h>
35 #include <sys/malloc.h>
36 #include <sys/rmlock.h>
37 #include <sys/socket.h>
38 
39 #include <net/if.h>
40 #include <net/route.h>
41 #include <net/route/nhop.h>
42 #include <net/route/route_ctl.h>
43 #include <net/route/route_var.h>
44 #include <netinet6/scope6_var.h>
45 #include <netlink/netlink.h>
46 #include <netlink/netlink_ctl.h>
47 #include <netlink/netlink_route.h>
48 #include <netlink/route/route_var.h>
49 
50 #define	DEBUG_MOD_NAME	nl_route
51 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
52 #include <netlink/netlink_debug.h>
53 _DECLARE_DEBUG(LOG_DEBUG);
54 
55 static unsigned char
56 get_rtm_type(const struct nhop_object *nh)
57 {
58 	int nh_flags = nh->nh_flags;
59 
60 	/* Use the fact that nhg runtime flags are only NHF_MULTIPATH */
61 	if (nh_flags & NHF_BLACKHOLE)
62 		return (RTN_BLACKHOLE);
63 	else if (nh_flags & NHF_REJECT)
64 		return (RTN_PROHIBIT);
65 	return (RTN_UNICAST);
66 }
67 
68 static uint8_t
69 nl_get_rtm_protocol(const struct nhop_object *nh)
70 {
71 #ifdef ROUTE_MPATH
72 	if (NH_IS_NHGRP(nh)) {
73 		const struct nhgrp_object *nhg = (const struct nhgrp_object *)nh;
74 		uint8_t origin = nhgrp_get_origin(nhg);
75 		if (origin != RTPROT_UNSPEC)
76 			return (origin);
77 		nh = nhg->nhops[0];
78 	}
79 #endif
80 	uint8_t origin = nhop_get_origin(nh);
81 	if (origin != RTPROT_UNSPEC)
82 		return (origin);
83 	/* TODO: remove guesswork once all kernel users fill in origin */
84 	int rt_flags = nhop_get_rtflags(nh);
85 	if (rt_flags & RTF_PROTO1)
86 		return (RTPROT_ZEBRA);
87 	if (rt_flags & RTF_STATIC)
88 		return (RTPROT_STATIC);
89 	return (RTPROT_KERNEL);
90 }
91 
92 static int
93 get_rtmsg_type_from_rtsock(int cmd)
94 {
95 	switch (cmd) {
96 	case RTM_ADD:
97 	case RTM_CHANGE:
98 	case RTM_GET:
99 		return NL_RTM_NEWROUTE;
100 	case RTM_DELETE:
101 		return NL_RTM_DELROUTE;
102 	}
103 
104 	return (0);
105 }
106 
107 /*
108  * fibnum heuristics
109  *
110  * if (dump && rtm_table == 0 && !rta_table) RT_ALL_FIBS
111  * msg                rtm_table     RTA_TABLE            result
112  * RTM_GETROUTE/dump          0             -       RT_ALL_FIBS
113  * RTM_GETROUTE/dump          1             -                 1
114  * RTM_GETROUTE/get           0             -                 0
115  *
116  */
117 
118 static struct nhop_object *
119 rc_get_nhop(const struct rib_cmd_info *rc)
120 {
121 	return ((rc->rc_cmd == RTM_DELETE) ? rc->rc_nh_old : rc->rc_nh_new);
122 }
123 
124 static void
125 dump_rc_nhop_gw(struct nl_writer *nw, const struct nhop_object *nh)
126 {
127 #ifdef INET6
128 	int upper_family;
129 #endif
130 
131 	switch (nhop_get_neigh_family(nh)) {
132 	case AF_LINK:
133 		/* onlink prefix, skip */
134 		break;
135 	case AF_INET:
136 		nlattr_add(nw, NL_RTA_GATEWAY, 4, &nh->gw4_sa.sin_addr);
137 		break;
138 #ifdef INET6
139 	case AF_INET6:
140 		upper_family = nhop_get_upper_family(nh);
141 		if (upper_family == AF_INET6) {
142 			struct in6_addr gw6 = nh->gw6_sa.sin6_addr;
143 			in6_clearscope(&gw6);
144 
145 			nlattr_add(nw, NL_RTA_GATEWAY, 16, &gw6);
146 		} else if (upper_family == AF_INET) {
147 			/* IPv4 over IPv6 */
148 			struct in6_addr gw6 = nh->gw6_sa.sin6_addr;
149 			in6_clearscope(&gw6);
150 
151 			char buf[20];
152 			struct rtvia *via = (struct rtvia *)&buf[0];
153 			via->rtvia_family = AF_INET6;
154 			memcpy(via->rtvia_addr, &gw6, 16);
155 			nlattr_add(nw, NL_RTA_VIA, 17, via);
156 		}
157 		break;
158 #endif
159 	}
160 }
161 
162 static void
163 dump_rc_nhop_mtu(struct nl_writer *nw, const struct nhop_object *nh)
164 {
165 	int nla_len = sizeof(struct nlattr) * 2 + sizeof(uint32_t);
166 	struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
167 
168 	if (nla == NULL)
169 		return;
170 	nla->nla_type = NL_RTA_METRICS;
171 	nla->nla_len = nla_len;
172 	nla++;
173 	nla->nla_type = NL_RTAX_MTU;
174 	nla->nla_len = sizeof(struct nlattr) + sizeof(uint32_t);
175 	*((uint32_t *)(nla + 1)) = nh->nh_mtu;
176 }
177 
178 #ifdef ROUTE_MPATH
179 static void
180 dump_rc_nhg(struct nl_writer *nw, const struct nhgrp_object *nhg, struct rtmsg *rtm)
181 {
182 	uint32_t uidx = nhgrp_get_uidx(nhg);
183 	uint32_t num_nhops;
184 	const struct weightened_nhop *wn = nhgrp_get_nhops(nhg, &num_nhops);
185 	uint32_t base_rtflags = nhop_get_rtflags(wn[0].nh);
186 
187 	if (uidx != 0)
188 		nlattr_add_u32(nw, NL_RTA_NH_ID, uidx);
189 	nlattr_add_u32(nw, NL_RTA_KNH_ID, nhgrp_get_idx(nhg));
190 
191 	nlattr_add_u32(nw, NL_RTA_RTFLAGS, base_rtflags);
192 	int off = nlattr_add_nested(nw, NL_RTA_MULTIPATH);
193 	if (off == 0)
194 		return;
195 
196 	for (int i = 0; i < num_nhops; i++) {
197 		int nh_off = nlattr_save_offset(nw);
198 		struct rtnexthop *rtnh = nlmsg_reserve_object(nw, struct rtnexthop);
199 		if (rtnh == NULL)
200 			return;
201 		rtnh->rtnh_flags = 0;
202 		rtnh->rtnh_ifindex = wn[i].nh->nh_ifp->if_index;
203 		rtnh->rtnh_hops = wn[i].weight;
204 		dump_rc_nhop_gw(nw, wn[i].nh);
205 		uint32_t rtflags = nhop_get_rtflags(wn[i].nh);
206 		if (rtflags != base_rtflags)
207 			nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags);
208 		if (rtflags & RTF_FIXEDMTU)
209 			dump_rc_nhop_mtu(nw, wn[i].nh);
210 		rtnh = nlattr_restore_offset(nw, nh_off, struct rtnexthop);
211 		/*
212 		 * nlattr_add() allocates 4-byte aligned storage, no need to aligh
213 		 * length here
214 		 * */
215 		rtnh->rtnh_len = nlattr_save_offset(nw) - nh_off;
216 	}
217 	nlattr_set_len(nw, off);
218 }
219 #endif
220 
221 static void
222 dump_rc_nhop(struct nl_writer *nw, const struct route_nhop_data *rnd, struct rtmsg *rtm)
223 {
224 #ifdef ROUTE_MPATH
225 	if (NH_IS_NHGRP(rnd->rnd_nhop)) {
226 		dump_rc_nhg(nw, rnd->rnd_nhgrp, rtm);
227 		return;
228 	}
229 #endif
230 	const struct nhop_object *nh = rnd->rnd_nhop;
231 	uint32_t rtflags = nhop_get_rtflags(nh);
232 
233 	/*
234 	 * IPv4 over IPv6
235 	 *    ('RTA_VIA', {'family': 10, 'addr': 'fe80::20c:29ff:fe67:2dd'}), ('RTA_OIF', 2),
236 	 * IPv4 w/ gw
237 	 *    ('RTA_GATEWAY', '172.16.107.131'), ('RTA_OIF', 2)],
238 	 * Direct route:
239 	 *    ('RTA_OIF', 2)
240 	 */
241 	if (nh->nh_flags & NHF_GATEWAY)
242 		dump_rc_nhop_gw(nw, nh);
243 
244 	uint32_t uidx = nhop_get_uidx(nh);
245 	if (uidx != 0)
246 		nlattr_add_u32(nw, NL_RTA_NH_ID, uidx);
247 	nlattr_add_u32(nw, NL_RTA_KNH_ID, nhop_get_idx(nh));
248 	nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags);
249 
250 	if (rtflags & RTF_FIXEDMTU)
251 		dump_rc_nhop_mtu(nw, nh);
252 	uint32_t nh_expire = nhop_get_expire(nh);
253 	if (nh_expire > 0)
254 		nlattr_add_u32(nw, NL_RTA_EXPIRES, nh_expire - time_uptime);
255 
256 	/* In any case, fill outgoing interface */
257 	nlattr_add_u32(nw, NL_RTA_OIF, nh->nh_ifp->if_index);
258 
259 	if (rnd->rnd_weight != RT_DEFAULT_WEIGHT)
260 		nlattr_add_u32(nw, NL_RTA_WEIGHT, rnd->rnd_weight);
261 }
262 
263 /*
264  * Dumps output from a rib command into an rtmsg
265  */
266 
267 static int
268 dump_px(uint32_t fibnum, const struct nlmsghdr *hdr,
269     const struct rtentry *rt, struct route_nhop_data *rnd,
270     struct nl_writer *nw)
271 {
272 	struct rtmsg *rtm;
273 	int error = 0;
274 
275 	NET_EPOCH_ASSERT();
276 
277 	if (!nlmsg_reply(nw, hdr, sizeof(struct rtmsg)))
278 		goto enomem;
279 
280 	int family = rt_get_family(rt);
281 	int rtm_off = nlattr_save_offset(nw);
282 	rtm = nlmsg_reserve_object(nw, struct rtmsg);
283 	rtm->rtm_family = family;
284 	rtm->rtm_dst_len = 0;
285 	rtm->rtm_src_len = 0;
286 	rtm->rtm_tos = 0;
287 	if (fibnum < 255)
288 		rtm->rtm_table = (unsigned char)fibnum;
289 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
290 	rtm->rtm_protocol = nl_get_rtm_protocol(rnd->rnd_nhop);
291 	rtm->rtm_type = get_rtm_type(rnd->rnd_nhop);
292 
293 	nlattr_add_u32(nw, NL_RTA_TABLE, fibnum);
294 
295 	int plen = 0;
296 #if defined(INET) || defined(INET6)
297 	uint32_t scopeid;
298 #endif
299 	switch (family) {
300 #ifdef INET
301 	case AF_INET:
302 		{
303 			struct in_addr addr;
304 			rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid);
305 			nlattr_add(nw, NL_RTA_DST, 4, &addr);
306 			break;
307 		}
308 #endif
309 #ifdef INET6
310 	case AF_INET6:
311 		{
312 			struct in6_addr addr;
313 			rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid);
314 			nlattr_add(nw, NL_RTA_DST, 16, &addr);
315 			break;
316 		}
317 #endif
318 	default:
319 		FIB_LOG(LOG_NOTICE, fibnum, family, "unsupported rt family: %d", family);
320 		error = EAFNOSUPPORT;
321 		goto flush;
322 	}
323 
324 	rtm = nlattr_restore_offset(nw, rtm_off, struct rtmsg);
325 	if (plen > 0)
326 		rtm->rtm_dst_len = plen;
327 	dump_rc_nhop(nw, rnd, rtm);
328 
329 	if (nlmsg_end(nw))
330 		return (0);
331 enomem:
332 	error = ENOMEM;
333 flush:
334 	nlmsg_abort(nw);
335 	return (error);
336 }
337 
338 static int
339 family_to_group(int family)
340 {
341 	switch (family) {
342 	case AF_INET:
343 		return (RTNLGRP_IPV4_ROUTE);
344 	case AF_INET6:
345 		return (RTNLGRP_IPV6_ROUTE);
346 	}
347 	return (0);
348 }
349 
350 
351 static void
352 report_operation(uint32_t fibnum, struct rib_cmd_info *rc,
353     struct nlpcb *nlp, struct nlmsghdr *hdr)
354 {
355 	struct nl_writer nw = {};
356 	uint32_t group_id = family_to_group(rt_get_family(rc->rc_rt));
357 
358 	if (nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) {
359 		struct route_nhop_data rnd = {
360 			.rnd_nhop = rc_get_nhop(rc),
361 			.rnd_weight = rc->rc_nh_weight,
362 		};
363 		hdr->nlmsg_flags &= ~(NLM_F_REPLACE | NLM_F_CREATE);
364 		hdr->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_APPEND);
365 		switch (rc->rc_cmd) {
366 		case RTM_ADD:
367 			hdr->nlmsg_type = NL_RTM_NEWROUTE;
368 			hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
369 			break;
370 		case RTM_CHANGE:
371 			hdr->nlmsg_type = NL_RTM_NEWROUTE;
372 			hdr->nlmsg_flags |= NLM_F_REPLACE;
373 			break;
374 		case RTM_DELETE:
375 			hdr->nlmsg_type = NL_RTM_DELROUTE;
376 			break;
377 		}
378 		dump_px(fibnum, hdr, rc->rc_rt, &rnd, &nw);
379 		nlmsg_flush(&nw);
380 	}
381 
382 	rtsock_callback_p->route_f(fibnum, rc);
383 }
384 
385 struct rta_mpath_nh {
386 	struct sockaddr	*gw;
387 	struct ifnet	*ifp;
388 	uint8_t		rtnh_flags;
389 	uint8_t		rtnh_weight;
390 };
391 
392 #define	_IN(_field)	offsetof(struct rtnexthop, _field)
393 #define	_OUT(_field)	offsetof(struct rta_mpath_nh, _field)
394 const static struct nlattr_parser nla_p_rtnh[] = {
395 	{ .type = NL_RTA_GATEWAY, .off = _OUT(gw), .cb = nlattr_get_ip },
396 	{ .type = NL_RTA_VIA, .off = _OUT(gw), .cb = nlattr_get_ipvia },
397 };
398 const static struct nlfield_parser nlf_p_rtnh[] = {
399 	{ .off_in = _IN(rtnh_flags), .off_out = _OUT(rtnh_flags), .cb = nlf_get_u8 },
400 	{ .off_in = _IN(rtnh_hops), .off_out = _OUT(rtnh_weight), .cb = nlf_get_u8 },
401 	{ .off_in = _IN(rtnh_ifindex), .off_out = _OUT(ifp), .cb = nlf_get_ifpz },
402 };
403 #undef _IN
404 #undef _OUT
405 NL_DECLARE_PARSER(mpath_parser, struct rtnexthop, nlf_p_rtnh, nla_p_rtnh);
406 
407 struct rta_mpath {
408 	int num_nhops;
409 	struct rta_mpath_nh nhops[0];
410 };
411 
412 static int
413 nlattr_get_multipath(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target)
414 {
415 	int data_len = nla->nla_len - sizeof(struct nlattr);
416 	struct rtnexthop *rtnh;
417 
418 	int max_nhops = data_len / sizeof(struct rtnexthop);
419 
420 	struct rta_mpath *mp = npt_alloc(npt, (max_nhops + 2) * sizeof(struct rta_mpath_nh));
421 	mp->num_nhops = 0;
422 
423 	for (rtnh = (struct rtnexthop *)(nla + 1); data_len > 0; ) {
424 		struct rta_mpath_nh *mpnh = &mp->nhops[mp->num_nhops++];
425 
426 		int error = nl_parse_header(rtnh, rtnh->rtnh_len, &mpath_parser,
427 		    npt, mpnh);
428 		if (error != 0) {
429 			NLMSG_REPORT_ERR_MSG(npt, "RTA_MULTIPATH: nexhop %d: parse failed",
430 			    mp->num_nhops - 1);
431 			return (error);
432 		}
433 
434 		int len = NL_ITEM_ALIGN(rtnh->rtnh_len);
435 		data_len -= len;
436 		rtnh = (struct rtnexthop *)((char *)rtnh + len);
437 	}
438 	if (data_len != 0 || mp->num_nhops == 0) {
439 		NLMSG_REPORT_ERR_MSG(npt, "invalid RTA_MULTIPATH attr");
440 		return (EINVAL);
441 	}
442 
443 	*((struct rta_mpath **)target) = mp;
444 	return (0);
445 }
446 
447 
448 struct nl_parsed_route {
449 	struct sockaddr		*rta_dst;
450 	struct sockaddr		*rta_gw;
451 	struct ifnet		*rta_oif;
452 	struct rta_mpath	*rta_multipath;
453 	uint32_t		rta_table;
454 	uint32_t		rta_rtflags;
455 	uint32_t		rta_nh_id;
456 	uint32_t		rta_weight;
457 	uint32_t		rtax_mtu;
458 	uint8_t			rtm_family;
459 	uint8_t			rtm_dst_len;
460 	uint8_t			rtm_protocol;
461 };
462 
463 #define	_IN(_field)	offsetof(struct rtmsg, _field)
464 #define	_OUT(_field)	offsetof(struct nl_parsed_route, _field)
465 static struct nlattr_parser nla_p_rtmetrics[] = {
466 	{ .type = NL_RTAX_MTU, .off = _OUT(rtax_mtu), .cb = nlattr_get_uint32 },
467 };
468 NL_DECLARE_ATTR_PARSER(metrics_parser, nla_p_rtmetrics);
469 
470 static const struct nlattr_parser nla_p_rtmsg[] = {
471 	{ .type = NL_RTA_DST, .off = _OUT(rta_dst), .cb = nlattr_get_ip },
472 	{ .type = NL_RTA_OIF, .off = _OUT(rta_oif), .cb = nlattr_get_ifp },
473 	{ .type = NL_RTA_GATEWAY, .off = _OUT(rta_gw), .cb = nlattr_get_ip },
474 	{ .type = NL_RTA_METRICS, .arg = &metrics_parser, .cb = nlattr_get_nested },
475 	{ .type = NL_RTA_MULTIPATH, .off = _OUT(rta_multipath), .cb = nlattr_get_multipath },
476 	{ .type = NL_RTA_WEIGHT, .off = _OUT(rta_weight), .cb = nlattr_get_uint32 },
477 	{ .type = NL_RTA_RTFLAGS, .off = _OUT(rta_rtflags), .cb = nlattr_get_uint32 },
478 	{ .type = NL_RTA_TABLE, .off = _OUT(rta_table), .cb = nlattr_get_uint32 },
479 	{ .type = NL_RTA_VIA, .off = _OUT(rta_gw), .cb = nlattr_get_ipvia },
480 	{ .type = NL_RTA_NH_ID, .off = _OUT(rta_nh_id), .cb = nlattr_get_uint32 },
481 };
482 
483 static const struct nlfield_parser nlf_p_rtmsg[] = {
484 	{.off_in = _IN(rtm_family), .off_out = _OUT(rtm_family), .cb = nlf_get_u8 },
485 	{.off_in = _IN(rtm_dst_len), .off_out = _OUT(rtm_dst_len), .cb = nlf_get_u8 },
486 	{.off_in = _IN(rtm_protocol), .off_out = _OUT(rtm_protocol), .cb = nlf_get_u8 },
487 };
488 #undef _IN
489 #undef _OUT
490 NL_DECLARE_PARSER(rtm_parser, struct rtmsg, nlf_p_rtmsg, nla_p_rtmsg);
491 
492 struct netlink_walkargs {
493 	struct nl_writer *nw;
494 	struct route_nhop_data rnd;
495 	struct nlmsghdr hdr;
496 	struct nlpcb *nlp;
497 	uint32_t fibnum;
498 	int family;
499 	int error;
500 	int count;
501 	int dumped;
502 	int dumped_tables;
503 };
504 
505 static int
506 dump_rtentry(struct rtentry *rt, void *_arg)
507 {
508 	struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg;
509 	int error;
510 
511 	wa->count++;
512 	if (wa->error != 0)
513 		return (0);
514 	wa->dumped++;
515 
516 	rt_get_rnd(rt, &wa->rnd);
517 
518 	error = dump_px(wa->fibnum, &wa->hdr, rt, &wa->rnd, wa->nw);
519 
520 	IF_DEBUG_LEVEL(LOG_DEBUG3) {
521 		char rtbuf[INET6_ADDRSTRLEN + 5];
522 		FIB_LOG(LOG_DEBUG3, wa->fibnum, wa->family,
523 		    "Dump %s, offset %u, error %d",
524 		    rt_print_buf(rt, rtbuf, sizeof(rtbuf)),
525 		    wa->nw->offset, error);
526 	}
527 	wa->error = error;
528 
529 	return (0);
530 }
531 
532 static void
533 dump_rtable_one(struct netlink_walkargs *wa, uint32_t fibnum, int family)
534 {
535 	FIB_LOG(LOG_DEBUG2, fibnum, family, "Start dump");
536 	wa->count = 0;
537 	wa->dumped = 0;
538 
539 	rib_walk(fibnum, family, false, dump_rtentry, wa);
540 
541 	wa->dumped_tables++;
542 
543 	FIB_LOG(LOG_DEBUG2, fibnum, family, "End dump, iterated %d dumped %d",
544 	    wa->count, wa->dumped);
545 	NL_LOG(LOG_DEBUG2, "Current offset: %d", wa->nw->offset);
546 }
547 
548 static int
549 dump_rtable_fib(struct netlink_walkargs *wa, uint32_t fibnum, int family)
550 {
551 	wa->fibnum = fibnum;
552 
553 	if (family == AF_UNSPEC) {
554 		for (int i = 0; i < AF_MAX; i++) {
555 			if (rt_tables_get_rnh(fibnum, i) != 0) {
556 				wa->family = i;
557 				dump_rtable_one(wa, fibnum, i);
558 				if (wa->error != 0)
559 					break;
560 			}
561 		}
562 	} else {
563 		if (rt_tables_get_rnh(fibnum, family) != 0) {
564 			wa->family = family;
565 			dump_rtable_one(wa, fibnum, family);
566 		}
567 	}
568 
569 	return (wa->error);
570 }
571 
572 static int
573 handle_rtm_getroute(struct nlpcb *nlp, struct nl_parsed_route *attrs,
574     struct nlmsghdr *hdr, struct nl_pstate *npt)
575 {
576 	RIB_RLOCK_TRACKER;
577 	struct rib_head *rnh;
578 	struct rtentry *rt;
579 	uint32_t fibnum = attrs->rta_table;
580 	sa_family_t family = attrs->rtm_family;
581 
582 	if (attrs->rta_dst == NULL) {
583 		NLMSG_REPORT_ERR_MSG(npt, "No RTA_DST supplied");
584 			return (EINVAL);
585 	}
586 
587 	FIB_LOG(LOG_DEBUG, fibnum, family, "getroute called");
588 
589 	rnh = rt_tables_get_rnh(fibnum, family);
590 	if (rnh == NULL)
591 		return (EAFNOSUPPORT);
592 
593 	RIB_RLOCK(rnh);
594 
595 	rt = (struct rtentry *)rnh->rnh_matchaddr(attrs->rta_dst, &rnh->head);
596 	if (rt == NULL) {
597 		RIB_RUNLOCK(rnh);
598 		return (ESRCH);
599 	}
600 
601 	struct route_nhop_data rnd;
602 	rt_get_rnd(rt, &rnd);
603 	rnd.rnd_nhop = nhop_select_func(rnd.rnd_nhop, 0);
604 
605 	RIB_RUNLOCK(rnh);
606 
607 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
608 		char rtbuf[NHOP_PRINT_BUFSIZE] __unused, nhbuf[NHOP_PRINT_BUFSIZE] __unused;
609 		FIB_LOG(LOG_DEBUG2, fibnum, family, "getroute completed: got %s for %s",
610 		    nhop_print_buf_any(rnd.rnd_nhop, nhbuf, sizeof(nhbuf)),
611 		    rt_print_buf(rt, rtbuf, sizeof(rtbuf)));
612 	}
613 
614 	hdr->nlmsg_type = NL_RTM_NEWROUTE;
615 	dump_px(fibnum, hdr, rt, &rnd, npt->nw);
616 
617 	return (0);
618 }
619 
620 static int
621 handle_rtm_dump(struct nlpcb *nlp, uint32_t fibnum, int family,
622     struct nlmsghdr *hdr, struct nl_writer *nw)
623 {
624 	struct netlink_walkargs wa = {
625 		.nlp = nlp,
626 		.nw = nw,
627 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
628 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
629 		.hdr.nlmsg_type = NL_RTM_NEWROUTE,
630 		.hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
631 	};
632 
633 	if (fibnum == RT_TABLE_UNSPEC) {
634 		for (int i = 0; i < V_rt_numfibs; i++) {
635 			dump_rtable_fib(&wa, fibnum, family);
636 			if (wa.error != 0)
637 				break;
638 		}
639 	} else
640 		dump_rtable_fib(&wa, fibnum, family);
641 
642 	if (wa.error == 0 && wa.dumped_tables == 0) {
643 		FIB_LOG(LOG_DEBUG, fibnum, family, "incorrect fibnum/family");
644 		wa.error = ESRCH;
645 		// How do we propagate it?
646 	}
647 
648 	if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) {
649                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
650                 return (ENOMEM);
651         }
652 
653 	return (wa.error);
654 }
655 
656 static struct nhop_object *
657 finalize_nhop(struct nhop_object *nh, int *perror)
658 {
659 	/*
660 	 * The following MUST be filled:
661 	 *  nh_ifp, nh_ifa, nh_gw
662 	 */
663 	if (nh->gw_sa.sa_family == 0) {
664 		/*
665 		 * Empty gateway. Can be direct route with RTA_OIF set.
666 		 */
667 		if (nh->nh_ifp != NULL)
668 			nhop_set_direct_gw(nh, nh->nh_ifp);
669 		else {
670 			NL_LOG(LOG_DEBUG, "empty gateway and interface, skipping");
671 			*perror = EINVAL;
672 			return (NULL);
673 		}
674 		/* Both nh_ifp and gateway are set */
675 	} else {
676 		/* Gateway is set up, we can derive ifp if not set */
677 		if (nh->nh_ifp == NULL) {
678 			struct ifaddr *ifa = ifa_ifwithnet(&nh->gw_sa, 1, nhop_get_fibnum(nh));
679 			if (ifa == NULL) {
680 				NL_LOG(LOG_DEBUG, "Unable to determine ifp, skipping");
681 				*perror = EINVAL;
682 				return (NULL);
683 			}
684 			nhop_set_transmit_ifp(nh, ifa->ifa_ifp);
685 		}
686 	}
687 	/* Both nh_ifp and gateway are set */
688 	if (nh->nh_ifa == NULL) {
689 		struct ifaddr *ifa = ifaof_ifpforaddr(&nh->gw_sa, nh->nh_ifp);
690 		if (ifa == NULL) {
691 			NL_LOG(LOG_DEBUG, "Unable to determine ifa, skipping");
692 			*perror = EINVAL;
693 			return (NULL);
694 		}
695 		nhop_set_src(nh, ifa);
696 	}
697 
698 	return (nhop_get_nhop(nh, perror));
699 }
700 
701 static int
702 get_pxflag(const struct nl_parsed_route *attrs)
703 {
704 	int pxflag = 0;
705 	switch (attrs->rtm_family) {
706 	case AF_INET:
707 		if (attrs->rtm_dst_len == 32)
708 			pxflag = NHF_HOST;
709 		else if (attrs->rtm_dst_len == 0)
710 			pxflag = NHF_DEFAULT;
711 		break;
712 	case AF_INET6:
713 		if (attrs->rtm_dst_len == 32)
714 			pxflag = NHF_HOST;
715 		else if (attrs->rtm_dst_len == 0)
716 			pxflag = NHF_DEFAULT;
717 		break;
718 	}
719 
720 	return (pxflag);
721 }
722 
723 static int
724 get_op_flags(int nlm_flags)
725 {
726 	int op_flags = 0;
727 
728 	op_flags |= (nlm_flags & NLM_F_REPLACE) ? RTM_F_REPLACE : 0;
729 	op_flags |= (nlm_flags & NLM_F_EXCL) ? RTM_F_EXCL : 0;
730 	op_flags |= (nlm_flags & NLM_F_CREATE) ? RTM_F_CREATE : 0;
731 	op_flags |= (nlm_flags & NLM_F_APPEND) ? RTM_F_APPEND : 0;
732 
733 	return (op_flags);
734 }
735 
736 #ifdef ROUTE_MPATH
737 static int
738 create_nexthop_one(struct nl_parsed_route *attrs, struct rta_mpath_nh *mpnh,
739     struct nl_pstate *npt, struct nhop_object **pnh)
740 {
741 	int error;
742 
743 	if (mpnh->gw == NULL)
744 		return (EINVAL);
745 
746 	struct nhop_object *nh = nhop_alloc(attrs->rta_table, attrs->rtm_family);
747 	if (nh == NULL)
748 		return (ENOMEM);
749 
750 	error = nl_set_nexthop_gw(nh, mpnh->gw, mpnh->ifp, npt);
751 	if (error != 0) {
752 		nhop_free(nh);
753 		return (error);
754 	}
755 	if (mpnh->ifp != NULL)
756 		nhop_set_transmit_ifp(nh, mpnh->ifp);
757 	nhop_set_rtflags(nh, attrs->rta_rtflags);
758 	if (attrs->rtm_protocol > RTPROT_STATIC)
759 		nhop_set_origin(nh, attrs->rtm_protocol);
760 
761 	*pnh = finalize_nhop(nh, &error);
762 
763 	return (error);
764 }
765 #endif
766 
767 static struct nhop_object *
768 create_nexthop_from_attrs(struct nl_parsed_route *attrs,
769     struct nl_pstate *npt, int *perror)
770 {
771 	struct nhop_object *nh = NULL;
772 	int error = 0;
773 
774 	if (attrs->rta_multipath != NULL) {
775 #ifdef ROUTE_MPATH
776 		/* Multipath w/o explicit nexthops */
777 		int num_nhops = attrs->rta_multipath->num_nhops;
778 		struct weightened_nhop *wn = npt_alloc(npt, sizeof(*wn) * num_nhops);
779 
780 		for (int i = 0; i < num_nhops; i++) {
781 			struct rta_mpath_nh *mpnh = &attrs->rta_multipath->nhops[i];
782 
783 			error = create_nexthop_one(attrs, mpnh, npt, &wn[i].nh);
784 			if (error != 0) {
785 				for (int j = 0; j < i; j++)
786 					nhop_free(wn[j].nh);
787 				break;
788 			}
789 			wn[i].weight = mpnh->rtnh_weight > 0 ? mpnh->rtnh_weight : 1;
790 		}
791 		if (error == 0) {
792 			struct rib_head *rh = nhop_get_rh(wn[0].nh);
793 			struct nhgrp_object *nhg;
794 
795 			nhg = nhgrp_alloc(rh->rib_fibnum, rh->rib_family,
796 			    wn, num_nhops, perror);
797 			if (nhg != NULL) {
798 				if (attrs->rtm_protocol > RTPROT_STATIC)
799 					nhgrp_set_origin(nhg, attrs->rtm_protocol);
800 				nhg = nhgrp_get_nhgrp(nhg, perror);
801 			}
802 			for (int i = 0; i < num_nhops; i++)
803 				nhop_free(wn[i].nh);
804 			if (nhg != NULL)
805 				return ((struct nhop_object *)nhg);
806 			error = *perror;
807 		}
808 #else
809 		error = ENOTSUP;
810 #endif
811 		*perror = error;
812 	} else {
813 		nh = nhop_alloc(attrs->rta_table, attrs->rtm_family);
814 		if (nh == NULL) {
815 			*perror = ENOMEM;
816 			return (NULL);
817 		}
818 		if (attrs->rta_gw != NULL) {
819 			*perror = nl_set_nexthop_gw(nh, attrs->rta_gw, attrs->rta_oif, npt);
820 			if (*perror != 0) {
821 				nhop_free(nh);
822 				return (NULL);
823 			}
824 		}
825 		if (attrs->rta_oif != NULL)
826 			nhop_set_transmit_ifp(nh, attrs->rta_oif);
827 		if (attrs->rtax_mtu != 0)
828 			nhop_set_mtu(nh, attrs->rtax_mtu, true);
829 		if (attrs->rta_rtflags & RTF_BROADCAST)
830 			nhop_set_broadcast(nh, true);
831 		if (attrs->rta_rtflags & RTF_BLACKHOLE)
832 			nhop_set_blackhole(nh, NHF_BLACKHOLE);
833 		if (attrs->rta_rtflags & RTF_REJECT)
834 			nhop_set_blackhole(nh, NHF_REJECT);
835 		nhop_set_rtflags(nh, attrs->rta_rtflags);
836 		if (attrs->rtm_protocol > RTPROT_STATIC)
837 			nhop_set_origin(nh, attrs->rtm_protocol);
838 		nh = finalize_nhop(nh, perror);
839 	}
840 
841 	return (nh);
842 }
843 
844 static int
845 rtnl_handle_newroute(struct nlmsghdr *hdr, struct nlpcb *nlp,
846     struct nl_pstate *npt)
847 {
848 	struct rib_cmd_info rc = {};
849 	struct nhop_object *nh = NULL;
850 	int error;
851 
852 	struct nl_parsed_route attrs = {};
853 	error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs);
854 	if (error != 0)
855 		return (error);
856 
857 	/* Check if we have enough data */
858 	if (attrs.rta_dst == NULL) {
859 		NL_LOG(LOG_DEBUG, "missing RTA_DST");
860 		return (EINVAL);
861 	}
862 
863 	if (attrs.rta_table >= V_rt_numfibs) {
864 		NLMSG_REPORT_ERR_MSG(npt, "invalid fib");
865 		return (EINVAL);
866 	}
867 
868 	if (attrs.rta_nh_id != 0) {
869 		/* Referenced uindex */
870 		int pxflag = get_pxflag(&attrs);
871 		nh = nl_find_nhop(attrs.rta_table, attrs.rtm_family, attrs.rta_nh_id,
872 		    pxflag, &error);
873 		if (error != 0)
874 			return (error);
875 	} else {
876 		nh = create_nexthop_from_attrs(&attrs, npt, &error);
877 		if (error != 0) {
878 			NL_LOG(LOG_DEBUG, "Error creating nexthop");
879 			return (error);
880 		}
881 	}
882 
883 	if (!NH_IS_NHGRP(nh) && attrs.rta_weight == 0)
884 		attrs.rta_weight = RT_DEFAULT_WEIGHT;
885 	struct route_nhop_data rnd = { .rnd_nhop = nh, .rnd_weight = attrs.rta_weight };
886 	int op_flags = get_op_flags(hdr->nlmsg_flags);
887 
888 	error = rib_add_route_px(attrs.rta_table, attrs.rta_dst, attrs.rtm_dst_len,
889 	    &rnd, op_flags, &rc);
890 	if (error == 0)
891 		report_operation(attrs.rta_table, &rc, nlp, hdr);
892 	return (error);
893 }
894 
895 static int
896 path_match_func(const struct rtentry *rt, const struct nhop_object *nh, void *_data)
897 {
898 	struct nl_parsed_route *attrs = (struct nl_parsed_route *)_data;
899 
900 	if ((attrs->rta_gw != NULL) && !rib_match_gw(rt, nh, attrs->rta_gw))
901 		return (0);
902 
903 	if ((attrs->rta_oif != NULL) && (attrs->rta_oif != nh->nh_ifp))
904 		return (0);
905 
906 	return (1);
907 }
908 
909 static int
910 rtnl_handle_delroute(struct nlmsghdr *hdr, struct nlpcb *nlp,
911     struct nl_pstate *npt)
912 {
913 	struct rib_cmd_info rc;
914 	int error;
915 
916 	struct nl_parsed_route attrs = {};
917 	error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs);
918 	if (error != 0)
919 		return (error);
920 
921 	if (attrs.rta_dst == NULL) {
922 		NLMSG_REPORT_ERR_MSG(npt, "RTA_DST is not set");
923 		return (ESRCH);
924 	}
925 
926 	if (attrs.rta_table >= V_rt_numfibs) {
927 		NLMSG_REPORT_ERR_MSG(npt, "invalid fib");
928 		return (EINVAL);
929 	}
930 
931 	error = rib_del_route_px(attrs.rta_table, attrs.rta_dst,
932 	    attrs.rtm_dst_len, path_match_func, &attrs, 0, &rc);
933 	if (error == 0)
934 		report_operation(attrs.rta_table, &rc, nlp, hdr);
935 	return (error);
936 }
937 
938 static int
939 rtnl_handle_getroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
940 {
941 	int error;
942 
943 	struct nl_parsed_route attrs = {};
944 	error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs);
945 	if (error != 0)
946 		return (error);
947 
948 	if (attrs.rta_table >= V_rt_numfibs) {
949 		NLMSG_REPORT_ERR_MSG(npt, "invalid fib");
950 		return (EINVAL);
951 	}
952 
953 	if (hdr->nlmsg_flags & NLM_F_DUMP)
954 		error = handle_rtm_dump(nlp, attrs.rta_table, attrs.rtm_family, hdr, npt->nw);
955 	else
956 		error = handle_rtm_getroute(nlp, &attrs, hdr, npt);
957 
958 	return (error);
959 }
960 
961 void
962 rtnl_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc)
963 {
964 	struct nl_writer nw = {};
965 	int family, nlm_flags = 0;
966 
967 	family = rt_get_family(rc->rc_rt);
968 
969 	/* XXX: check if there are active listeners first */
970 
971 	/* TODO: consider passing PID/type/seq */
972 	switch (rc->rc_cmd) {
973 	case RTM_ADD:
974 		nlm_flags = NLM_F_EXCL | NLM_F_CREATE;
975 		break;
976 	case RTM_CHANGE:
977 		nlm_flags = NLM_F_REPLACE;
978 		break;
979 	case RTM_DELETE:
980 		nlm_flags = 0;
981 		break;
982 	}
983 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
984 		char rtbuf[NHOP_PRINT_BUFSIZE] __unused;
985 		FIB_LOG(LOG_DEBUG2, fibnum, family,
986 		    "received event %s for %s / nlm_flags=%X",
987 		    rib_print_cmd(rc->rc_cmd),
988 		    rt_print_buf(rc->rc_rt, rtbuf, sizeof(rtbuf)),
989 		    nlm_flags);
990 	}
991 
992 	struct nlmsghdr hdr = {
993 		.nlmsg_flags = nlm_flags,
994 		.nlmsg_type = get_rtmsg_type_from_rtsock(rc->rc_cmd),
995 	};
996 
997 	struct route_nhop_data rnd = {
998 		.rnd_nhop = rc_get_nhop(rc),
999 		.rnd_weight = rc->rc_nh_weight,
1000 	};
1001 
1002 	uint32_t group_id = family_to_group(family);
1003 	if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) {
1004 		NL_LOG(LOG_DEBUG, "error allocating event buffer");
1005 		return;
1006 	}
1007 
1008 	dump_px(fibnum, &hdr, rc->rc_rt, &rnd, &nw);
1009 	nlmsg_flush(&nw);
1010 }
1011 
1012 static const struct rtnl_cmd_handler cmd_handlers[] = {
1013 	{
1014 		.cmd = NL_RTM_GETROUTE,
1015 		.name = "RTM_GETROUTE",
1016 		.cb = &rtnl_handle_getroute,
1017 	},
1018 	{
1019 		.cmd = NL_RTM_DELROUTE,
1020 		.name = "RTM_DELROUTE",
1021 		.cb = &rtnl_handle_delroute,
1022 		.priv = PRIV_NET_ROUTE,
1023 	},
1024 	{
1025 		.cmd = NL_RTM_NEWROUTE,
1026 		.name = "RTM_NEWROUTE",
1027 		.cb = &rtnl_handle_newroute,
1028 		.priv = PRIV_NET_ROUTE,
1029 	}
1030 };
1031 
1032 static const struct nlhdr_parser *all_parsers[] = {&mpath_parser, &metrics_parser, &rtm_parser};
1033 
1034 void
1035 rtnl_routes_init(void)
1036 {
1037 	NL_VERIFY_PARSERS(all_parsers);
1038 	rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
1039 }
1040