xref: /freebsd/sys/netlink/route/rt.c (revision c19fc5cd9b49115604ce2b89279e3434c7f120cc)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2021 Ng Peng Nam Sean
5  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 #include "opt_route.h"
34 #include <sys/types.h>
35 #include <sys/malloc.h>
36 #include <sys/rmlock.h>
37 #include <sys/socket.h>
38 
39 #include <net/if.h>
40 #include <net/route.h>
41 #include <net/route/nhop.h>
42 #include <net/route/route_ctl.h>
43 #include <net/route/route_var.h>
44 #include <netlink/netlink.h>
45 #include <netlink/netlink_ctl.h>
46 #include <netlink/netlink_route.h>
47 #include <netlink/route/route_var.h>
48 
49 #define	DEBUG_MOD_NAME	nl_route
50 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
51 #include <netlink/netlink_debug.h>
52 _DECLARE_DEBUG(LOG_DEBUG);
53 
54 static unsigned char
55 get_rtm_type(const struct nhop_object *nh)
56 {
57 	int nh_flags = nh->nh_flags;
58 
59 	/* Use the fact that nhg runtime flags are only NHF_MULTIPATH */
60 	if (nh_flags & NHF_BLACKHOLE)
61 		return (RTN_BLACKHOLE);
62 	else if (nh_flags & NHF_REJECT)
63 		return (RTN_PROHIBIT);
64 	return (RTN_UNICAST);
65 }
66 
67 static uint8_t
68 nl_get_rtm_protocol(const struct nhop_object *nh)
69 {
70 #ifdef ROUTE_MPATH
71 	if (NH_IS_NHGRP(nh)) {
72 		const struct nhgrp_object *nhg = (const struct nhgrp_object *)nh;
73 		uint8_t origin = nhgrp_get_origin(nhg);
74 		if (origin != RTPROT_UNSPEC)
75 			return (origin);
76 		nh = nhg->nhops[0];
77 	}
78 #endif
79 	uint8_t origin = nhop_get_origin(nh);
80 	if (origin != RTPROT_UNSPEC)
81 		return (origin);
82 	/* TODO: remove guesswork once all kernel users fill in origin */
83 	int rt_flags = nhop_get_rtflags(nh);
84 	if (rt_flags & RTF_PROTO1)
85 		return (RTPROT_ZEBRA);
86 	if (rt_flags & RTF_STATIC)
87 		return (RTPROT_STATIC);
88 	return (RTPROT_KERNEL);
89 }
90 
91 static int
92 get_rtmsg_type_from_rtsock(int cmd)
93 {
94 	switch (cmd) {
95 	case RTM_ADD:
96 	case RTM_CHANGE:
97 	case RTM_GET:
98 		return NL_RTM_NEWROUTE;
99 	case RTM_DELETE:
100 		return NL_RTM_DELROUTE;
101 	}
102 
103 	return (0);
104 }
105 
106 /*
107  * fibnum heuristics
108  *
109  * if (dump && rtm_table == 0 && !rta_table) RT_ALL_FIBS
110  * msg                rtm_table     RTA_TABLE            result
111  * RTM_GETROUTE/dump          0             -       RT_ALL_FIBS
112  * RTM_GETROUTE/dump          1             -                 1
113  * RTM_GETROUTE/get           0             -                 0
114  *
115  */
116 
117 static struct nhop_object *
118 rc_get_nhop(const struct rib_cmd_info *rc)
119 {
120 	return ((rc->rc_cmd == RTM_DELETE) ? rc->rc_nh_old : rc->rc_nh_new);
121 }
122 
123 static void
124 dump_rc_nhop_gw(struct nl_writer *nw, const struct nhop_object *nh)
125 {
126 	int upper_family;
127 
128 	switch (nhop_get_neigh_family(nh)) {
129 	case AF_LINK:
130 		/* onlink prefix, skip */
131 		break;
132 	case AF_INET:
133 		nlattr_add(nw, NL_RTA_GATEWAY, 4, &nh->gw4_sa.sin_addr);
134 		break;
135 	case AF_INET6:
136 		upper_family = nhop_get_upper_family(nh);
137 		if (upper_family == AF_INET6) {
138 			nlattr_add(nw, NL_RTA_GATEWAY, 16, &nh->gw6_sa.sin6_addr);
139 		} else if (upper_family == AF_INET) {
140 			/* IPv4 over IPv6 */
141 			char buf[20];
142 			struct rtvia *via = (struct rtvia *)&buf[0];
143 			via->rtvia_family = AF_INET6;
144 			memcpy(via->rtvia_addr, &nh->gw6_sa.sin6_addr, 16);
145 			nlattr_add(nw, NL_RTA_VIA, 17, via);
146 		}
147 		break;
148 	}
149 }
150 
151 static void
152 dump_rc_nhop_mtu(struct nl_writer *nw, const struct nhop_object *nh)
153 {
154 	int nla_len = sizeof(struct nlattr) * 2 + sizeof(uint32_t);
155 	struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
156 
157 	if (nla == NULL)
158 		return;
159 	nla->nla_type = NL_RTA_METRICS;
160 	nla->nla_len = nla_len;
161 	nla++;
162 	nla->nla_type = NL_RTAX_MTU;
163 	nla->nla_len = sizeof(struct nlattr) + sizeof(uint32_t);
164 	*((uint32_t *)(nla + 1)) = nh->nh_mtu;
165 }
166 
167 #ifdef ROUTE_MPATH
168 static void
169 dump_rc_nhg(struct nl_writer *nw, const struct nhgrp_object *nhg, struct rtmsg *rtm)
170 {
171 	uint32_t uidx = nhgrp_get_uidx(nhg);
172 	uint32_t num_nhops;
173 	const struct weightened_nhop *wn = nhgrp_get_nhops(nhg, &num_nhops);
174 	uint32_t base_rtflags = nhop_get_rtflags(wn[0].nh);
175 
176 	if (uidx != 0)
177 		nlattr_add_u32(nw, NL_RTA_NH_ID, uidx);
178 	nlattr_add_u32(nw, NL_RTA_KNH_ID, nhgrp_get_idx(nhg));
179 
180 	nlattr_add_u32(nw, NL_RTA_RTFLAGS, base_rtflags);
181 	int off = nlattr_add_nested(nw, NL_RTA_MULTIPATH);
182 	if (off == 0)
183 		return;
184 
185 	for (int i = 0; i < num_nhops; i++) {
186 		int nh_off = nlattr_save_offset(nw);
187 		struct rtnexthop *rtnh = nlmsg_reserve_object(nw, struct rtnexthop);
188 		if (rtnh == NULL)
189 			return;
190 		rtnh->rtnh_flags = 0;
191 		rtnh->rtnh_ifindex = wn[i].nh->nh_ifp->if_index;
192 		rtnh->rtnh_hops = wn[i].weight;
193 		dump_rc_nhop_gw(nw, wn[i].nh);
194 		uint32_t rtflags = nhop_get_rtflags(wn[i].nh);
195 		if (rtflags != base_rtflags)
196 			nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags);
197 		if (rtflags & RTF_FIXEDMTU)
198 			dump_rc_nhop_mtu(nw, wn[i].nh);
199 		rtnh = nlattr_restore_offset(nw, nh_off, struct rtnexthop);
200 		/*
201 		 * nlattr_add() allocates 4-byte aligned storage, no need to aligh
202 		 * length here
203 		 * */
204 		rtnh->rtnh_len = nlattr_save_offset(nw) - nh_off;
205 	}
206 	nlattr_set_len(nw, off);
207 }
208 #endif
209 
210 static void
211 dump_rc_nhop(struct nl_writer *nw, const struct nhop_object *nh, struct rtmsg *rtm)
212 {
213 #ifdef ROUTE_MPATH
214 	if (NH_IS_NHGRP(nh)) {
215 		dump_rc_nhg(nw, (const struct nhgrp_object *)nh, rtm);
216 		return;
217 	}
218 #endif
219 	uint32_t rtflags = nhop_get_rtflags(nh);
220 
221 	/*
222 	 * IPv4 over IPv6
223 	 *    ('RTA_VIA', {'family': 10, 'addr': 'fe80::20c:29ff:fe67:2dd'}), ('RTA_OIF', 2),
224 	 * IPv4 w/ gw
225 	 *    ('RTA_GATEWAY', '172.16.107.131'), ('RTA_OIF', 2)],
226 	 * Direct route:
227 	 *    ('RTA_OIF', 2)
228 	 */
229 	if (nh->nh_flags & NHF_GATEWAY)
230 		dump_rc_nhop_gw(nw, nh);
231 
232 	uint32_t uidx = nhop_get_uidx(nh);
233 	if (uidx != 0)
234 		nlattr_add_u32(nw, NL_RTA_NH_ID, uidx);
235 	nlattr_add_u32(nw, NL_RTA_KNH_ID, nhop_get_idx(nh));
236 	nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags);
237 
238 	if (rtflags & RTF_FIXEDMTU)
239 		dump_rc_nhop_mtu(nw, nh);
240 	uint32_t nh_expire = nhop_get_expire(nh);
241 	if (nh_expire > 0)
242 		nlattr_add_u32(nw, NL_RTA_EXPIRES, nh_expire - time_uptime);
243 
244 	/* In any case, fill outgoing interface */
245 	nlattr_add_u32(nw, NL_RTA_OIF, nh->nh_ifp->if_index);
246 }
247 
248 /*
249  * Dumps output from a rib command into an rtmsg
250  */
251 
252 static int
253 dump_px(uint32_t fibnum, const struct nlmsghdr *hdr,
254     const struct rtentry *rt, struct route_nhop_data *rnd,
255     struct nl_writer *nw)
256 {
257 	struct rtmsg *rtm;
258 	int error = 0;
259 
260 	NET_EPOCH_ASSERT();
261 
262 	if (!nlmsg_reply(nw, hdr, sizeof(struct rtmsg)))
263 		goto enomem;
264 
265 	int family = rt_get_family(rt);
266 	int rtm_off = nlattr_save_offset(nw);
267 	rtm = nlmsg_reserve_object(nw, struct rtmsg);
268 	rtm->rtm_family = family;
269 	rtm->rtm_dst_len = 0;
270 	rtm->rtm_src_len = 0;
271 	rtm->rtm_tos = 0;
272 	if (fibnum < 255)
273 		rtm->rtm_table = (unsigned char)fibnum;
274 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
275 	rtm->rtm_protocol = nl_get_rtm_protocol(rnd->rnd_nhop);
276 	rtm->rtm_type = get_rtm_type(rnd->rnd_nhop);
277 
278 	nlattr_add_u32(nw, NL_RTA_TABLE, fibnum);
279 
280 	int plen = 0;
281 #if defined(INET) || defined(INET6)
282 	uint32_t scopeid;
283 #endif
284 	switch (family) {
285 #ifdef INET
286 	case AF_INET:
287 		{
288 			struct in_addr addr;
289 			rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid);
290 			nlattr_add(nw, NL_RTA_DST, 4, &addr);
291 			break;
292 		}
293 #endif
294 #ifdef INET6
295 	case AF_INET6:
296 		{
297 			struct in6_addr addr;
298 			rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid);
299 			nlattr_add(nw, NL_RTA_DST, 16, &addr);
300 			break;
301 		}
302 #endif
303 	default:
304 		FIB_LOG(LOG_NOTICE, fibnum, family, "unsupported rt family: %d", family);
305 		error = EAFNOSUPPORT;
306 		goto flush;
307 	}
308 
309 	rtm = nlattr_restore_offset(nw, rtm_off, struct rtmsg);
310 	if (plen > 0)
311 		rtm->rtm_dst_len = plen;
312 	dump_rc_nhop(nw, rnd->rnd_nhop, rtm);
313 
314 	if (nlmsg_end(nw))
315 		return (0);
316 enomem:
317 	error = ENOMEM;
318 flush:
319 	nlmsg_abort(nw);
320 	return (error);
321 }
322 
323 static int
324 family_to_group(int family)
325 {
326 	switch (family) {
327 	case AF_INET:
328 		return (RTNLGRP_IPV4_ROUTE);
329 	case AF_INET6:
330 		return (RTNLGRP_IPV6_ROUTE);
331 	}
332 	return (0);
333 }
334 
335 
336 static void
337 report_operation(uint32_t fibnum, struct rib_cmd_info *rc,
338     struct nlpcb *nlp, struct nlmsghdr *hdr)
339 {
340 	struct nl_writer nw = {};
341 	uint32_t group_id = family_to_group(rt_get_family(rc->rc_rt));
342 
343 	if (nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) {
344 		struct route_nhop_data rnd = {
345 			.rnd_nhop = rc_get_nhop(rc),
346 			.rnd_weight = rc->rc_nh_weight,
347 		};
348 		hdr->nlmsg_flags &= ~(NLM_F_REPLACE | NLM_F_CREATE);
349 		hdr->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_APPEND);
350 		switch (rc->rc_cmd) {
351 		case RTM_ADD:
352 			hdr->nlmsg_type = NL_RTM_NEWROUTE;
353 			hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
354 			break;
355 		case RTM_CHANGE:
356 			hdr->nlmsg_type = NL_RTM_NEWROUTE;
357 			hdr->nlmsg_flags |= NLM_F_REPLACE;
358 			break;
359 		case RTM_DELETE:
360 			hdr->nlmsg_type = NL_RTM_DELROUTE;
361 			break;
362 		}
363 		dump_px(fibnum, hdr, rc->rc_rt, &rnd, &nw);
364 		nlmsg_flush(&nw);
365 	}
366 
367 	rtsock_callback_p->route_f(fibnum, rc);
368 }
369 
370 struct rta_mpath_nh {
371 	struct sockaddr	*gw;
372 	struct ifnet	*ifp;
373 	uint8_t		rtnh_flags;
374 	uint8_t		rtnh_weight;
375 };
376 
377 #define	_IN(_field)	offsetof(struct rtnexthop, _field)
378 #define	_OUT(_field)	offsetof(struct rta_mpath_nh, _field)
379 const static struct nlattr_parser nla_p_rtnh[] = {
380 	{ .type = NL_RTA_GATEWAY, .off = _OUT(gw), .cb = nlattr_get_ip },
381 	{ .type = NL_RTA_VIA, .off = _OUT(gw), .cb = nlattr_get_ipvia },
382 };
383 const static struct nlfield_parser nlf_p_rtnh[] = {
384 	{ .off_in = _IN(rtnh_flags), .off_out = _OUT(rtnh_flags), .cb = nlf_get_u8 },
385 	{ .off_in = _IN(rtnh_hops), .off_out = _OUT(rtnh_weight), .cb = nlf_get_u8 },
386 	{ .off_in = _IN(rtnh_ifindex), .off_out = _OUT(ifp), .cb = nlf_get_ifpz },
387 };
388 #undef _IN
389 #undef _OUT
390 NL_DECLARE_PARSER(mpath_parser, struct rtnexthop, nlf_p_rtnh, nla_p_rtnh);
391 
392 struct rta_mpath {
393 	int num_nhops;
394 	struct rta_mpath_nh nhops[0];
395 };
396 
397 static int
398 nlattr_get_multipath(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target)
399 {
400 	int data_len = nla->nla_len - sizeof(struct nlattr);
401 	struct rtnexthop *rtnh;
402 
403 	int max_nhops = data_len / sizeof(struct rtnexthop);
404 
405 	struct rta_mpath *mp = npt_alloc(npt, (max_nhops + 2) * sizeof(struct rta_mpath_nh));
406 	mp->num_nhops = 0;
407 
408 	for (rtnh = (struct rtnexthop *)(nla + 1); data_len > 0; ) {
409 		struct rta_mpath_nh *mpnh = &mp->nhops[mp->num_nhops++];
410 
411 		int error = nl_parse_header(rtnh, rtnh->rtnh_len, &mpath_parser,
412 		    npt, mpnh);
413 		if (error != 0) {
414 			NLMSG_REPORT_ERR_MSG(npt, "RTA_MULTIPATH: nexhop %d: parse failed",
415 			    mp->num_nhops - 1);
416 			return (error);
417 		}
418 
419 		int len = NL_ITEM_ALIGN(rtnh->rtnh_len);
420 		data_len -= len;
421 		rtnh = (struct rtnexthop *)((char *)rtnh + len);
422 	}
423 	if (data_len != 0 || mp->num_nhops == 0) {
424 		NLMSG_REPORT_ERR_MSG(npt, "invalid RTA_MULTIPATH attr");
425 		return (EINVAL);
426 	}
427 
428 	*((struct rta_mpath **)target) = mp;
429 	return (0);
430 }
431 
432 
433 struct nl_parsed_route {
434 	struct sockaddr		*rta_dst;
435 	struct sockaddr		*rta_gw;
436 	struct ifnet		*rta_oif;
437 	struct rta_mpath	*rta_multipath;
438 	uint32_t		rta_table;
439 	uint32_t		rta_rtflags;
440 	uint32_t		rta_nh_id;
441 	uint32_t		rtax_mtu;
442 	uint8_t			rtm_family;
443 	uint8_t			rtm_dst_len;
444 	uint8_t			rtm_protocol;
445 };
446 
447 #define	_IN(_field)	offsetof(struct rtmsg, _field)
448 #define	_OUT(_field)	offsetof(struct nl_parsed_route, _field)
449 static struct nlattr_parser nla_p_rtmetrics[] = {
450 	{ .type = NL_RTAX_MTU, .off = _OUT(rtax_mtu), .cb = nlattr_get_uint32 },
451 };
452 NL_DECLARE_ATTR_PARSER(metrics_parser, nla_p_rtmetrics);
453 
454 static const struct nlattr_parser nla_p_rtmsg[] = {
455 	{ .type = NL_RTA_DST, .off = _OUT(rta_dst), .cb = nlattr_get_ip },
456 	{ .type = NL_RTA_OIF, .off = _OUT(rta_oif), .cb = nlattr_get_ifp },
457 	{ .type = NL_RTA_GATEWAY, .off = _OUT(rta_gw), .cb = nlattr_get_ip },
458 	{ .type = NL_RTA_METRICS, .arg = &metrics_parser, .cb = nlattr_get_nested },
459 	{ .type = NL_RTA_MULTIPATH, .off = _OUT(rta_multipath), .cb = nlattr_get_multipath },
460 	{ .type = NL_RTA_RTFLAGS, .off = _OUT(rta_rtflags), .cb = nlattr_get_uint32 },
461 	{ .type = NL_RTA_TABLE, .off = _OUT(rta_table), .cb = nlattr_get_uint32 },
462 	{ .type = NL_RTA_VIA, .off = _OUT(rta_gw), .cb = nlattr_get_ipvia },
463 	{ .type = NL_RTA_NH_ID, .off = _OUT(rta_nh_id), .cb = nlattr_get_uint32 },
464 };
465 
466 static const struct nlfield_parser nlf_p_rtmsg[] = {
467 	{.off_in = _IN(rtm_family), .off_out = _OUT(rtm_family), .cb = nlf_get_u8 },
468 	{.off_in = _IN(rtm_dst_len), .off_out = _OUT(rtm_dst_len), .cb = nlf_get_u8 },
469 	{.off_in = _IN(rtm_protocol), .off_out = _OUT(rtm_protocol), .cb = nlf_get_u8 },
470 };
471 #undef _IN
472 #undef _OUT
473 NL_DECLARE_PARSER(rtm_parser, struct rtmsg, nlf_p_rtmsg, nla_p_rtmsg);
474 
475 struct netlink_walkargs {
476 	struct nl_writer *nw;
477 	struct route_nhop_data rnd;
478 	struct nlmsghdr hdr;
479 	struct nlpcb *nlp;
480 	uint32_t fibnum;
481 	int family;
482 	int error;
483 	int count;
484 	int dumped;
485 	int dumped_tables;
486 };
487 
488 static int
489 dump_rtentry(struct rtentry *rt, void *_arg)
490 {
491 	struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg;
492 	int error;
493 
494 	wa->count++;
495 	if (wa->error != 0)
496 		return (0);
497 	wa->dumped++;
498 
499 	rt_get_rnd(rt, &wa->rnd);
500 
501 	error = dump_px(wa->fibnum, &wa->hdr, rt, &wa->rnd, wa->nw);
502 
503 	IF_DEBUG_LEVEL(LOG_DEBUG3) {
504 		char rtbuf[INET6_ADDRSTRLEN + 5];
505 		FIB_LOG(LOG_DEBUG3, wa->fibnum, wa->family,
506 		    "Dump %s, offset %u, error %d",
507 		    rt_print_buf(rt, rtbuf, sizeof(rtbuf)),
508 		    wa->nw->offset, error);
509 	}
510 	wa->error = error;
511 
512 	return (0);
513 }
514 
515 static void
516 dump_rtable_one(struct netlink_walkargs *wa, uint32_t fibnum, int family)
517 {
518 	FIB_LOG(LOG_DEBUG2, fibnum, family, "Start dump");
519 	wa->count = 0;
520 	wa->dumped = 0;
521 
522 	rib_walk(fibnum, family, false, dump_rtentry, wa);
523 
524 	wa->dumped_tables++;
525 
526 	FIB_LOG(LOG_DEBUG2, fibnum, family, "End dump, iterated %d dumped %d",
527 	    wa->count, wa->dumped);
528 	NL_LOG(LOG_DEBUG2, "Current offset: %d", wa->nw->offset);
529 }
530 
531 static int
532 dump_rtable_fib(struct netlink_walkargs *wa, uint32_t fibnum, int family)
533 {
534 	wa->fibnum = fibnum;
535 
536 	if (family == AF_UNSPEC) {
537 		for (int i = 0; i < AF_MAX; i++) {
538 			if (rt_tables_get_rnh(fibnum, i) != 0) {
539 				wa->family = i;
540 				dump_rtable_one(wa, fibnum, i);
541 				if (wa->error != 0)
542 					break;
543 			}
544 		}
545 	} else {
546 		if (rt_tables_get_rnh(fibnum, family) != 0) {
547 			wa->family = family;
548 			dump_rtable_one(wa, fibnum, family);
549 		}
550 	}
551 
552 	return (wa->error);
553 }
554 
555 static int
556 handle_rtm_getroute(struct nlpcb *nlp, struct nl_parsed_route *attrs,
557     struct nlmsghdr *hdr, struct nl_pstate *npt)
558 {
559 	RIB_RLOCK_TRACKER;
560 	struct rib_head *rnh;
561 	struct rtentry *rt;
562 	uint32_t fibnum = attrs->rta_table;
563 	sa_family_t family = attrs->rtm_family;
564 
565 	if (attrs->rta_dst == NULL) {
566 		NLMSG_REPORT_ERR_MSG(npt, "No RTA_DST supplied");
567 			return (EINVAL);
568 	}
569 
570 	FIB_LOG(LOG_DEBUG, fibnum, family, "getroute called");
571 
572 	rnh = rt_tables_get_rnh(fibnum, family);
573 	if (rnh == NULL)
574 		return (EAFNOSUPPORT);
575 
576 	RIB_RLOCK(rnh);
577 
578 	rt = (struct rtentry *)rnh->rnh_matchaddr(attrs->rta_dst, &rnh->head);
579 	if (rt == NULL) {
580 		RIB_RUNLOCK(rnh);
581 		return (ESRCH);
582 	}
583 
584 	struct route_nhop_data rnd;
585 	rt_get_rnd(rt, &rnd);
586 	rnd.rnd_nhop = nhop_select_func(rnd.rnd_nhop, 0);
587 
588 	RIB_RUNLOCK(rnh);
589 
590 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
591 		char rtbuf[NHOP_PRINT_BUFSIZE] __unused, nhbuf[NHOP_PRINT_BUFSIZE] __unused;
592 		FIB_LOG(LOG_DEBUG2, fibnum, family, "getroute completed: got %s for %s",
593 		    nhop_print_buf_any(rnd.rnd_nhop, nhbuf, sizeof(nhbuf)),
594 		    rt_print_buf(rt, rtbuf, sizeof(rtbuf)));
595 	}
596 
597 	hdr->nlmsg_type = NL_RTM_NEWROUTE;
598 	dump_px(fibnum, hdr, rt, &rnd, npt->nw);
599 
600 	return (0);
601 }
602 
603 static int
604 handle_rtm_dump(struct nlpcb *nlp, uint32_t fibnum, int family,
605     struct nlmsghdr *hdr, struct nl_writer *nw)
606 {
607 	struct netlink_walkargs wa = {
608 		.nlp = nlp,
609 		.nw = nw,
610 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
611 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
612 		.hdr.nlmsg_type = NL_RTM_NEWROUTE,
613 		.hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
614 	};
615 
616 	if (fibnum == RT_TABLE_UNSPEC) {
617 		for (int i = 0; i < V_rt_numfibs; i++) {
618 			dump_rtable_fib(&wa, fibnum, family);
619 			if (wa.error != 0)
620 				break;
621 		}
622 	} else
623 		dump_rtable_fib(&wa, fibnum, family);
624 
625 	if (wa.error == 0 && wa.dumped_tables == 0) {
626 		FIB_LOG(LOG_DEBUG, fibnum, family, "incorrect fibnum/family");
627 		wa.error = ESRCH;
628 		// How do we propagate it?
629 	}
630 
631 	if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) {
632                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
633                 return (ENOMEM);
634         }
635 
636 	return (wa.error);
637 }
638 
639 static struct nhop_object *
640 finalize_nhop(struct nhop_object *nh, int *perror)
641 {
642 	/*
643 	 * The following MUST be filled:
644 	 *  nh_ifp, nh_ifa, nh_gw
645 	 */
646 	if (nh->gw_sa.sa_family == 0) {
647 		/*
648 		 * Empty gateway. Can be direct route with RTA_OIF set.
649 		 */
650 		if (nh->nh_ifp != NULL)
651 			nhop_set_direct_gw(nh, nh->nh_ifp);
652 		else {
653 			NL_LOG(LOG_DEBUG, "empty gateway and interface, skipping");
654 			*perror = EINVAL;
655 			return (NULL);
656 		}
657 		/* Both nh_ifp and gateway are set */
658 	} else {
659 		/* Gateway is set up, we can derive ifp if not set */
660 		if (nh->nh_ifp == NULL) {
661 			struct ifaddr *ifa = ifa_ifwithnet(&nh->gw_sa, 1, nhop_get_fibnum(nh));
662 			if (ifa == NULL) {
663 				NL_LOG(LOG_DEBUG, "Unable to determine ifp, skipping");
664 				*perror = EINVAL;
665 				return (NULL);
666 			}
667 			nhop_set_transmit_ifp(nh, ifa->ifa_ifp);
668 		}
669 	}
670 	/* Both nh_ifp and gateway are set */
671 	if (nh->nh_ifa == NULL) {
672 		struct ifaddr *ifa = ifaof_ifpforaddr(&nh->gw_sa, nh->nh_ifp);
673 		if (ifa == NULL) {
674 			NL_LOG(LOG_DEBUG, "Unable to determine ifa, skipping");
675 			*perror = EINVAL;
676 			return (NULL);
677 		}
678 		nhop_set_src(nh, ifa);
679 	}
680 
681 	return (nhop_get_nhop(nh, perror));
682 }
683 
684 static int
685 get_pxflag(const struct nl_parsed_route *attrs)
686 {
687 	int pxflag = 0;
688 	switch (attrs->rtm_family) {
689 	case AF_INET:
690 		if (attrs->rtm_dst_len == 32)
691 			pxflag = NHF_HOST;
692 		else if (attrs->rtm_dst_len == 0)
693 			pxflag = NHF_DEFAULT;
694 		break;
695 	case AF_INET6:
696 		if (attrs->rtm_dst_len == 32)
697 			pxflag = NHF_HOST;
698 		else if (attrs->rtm_dst_len == 0)
699 			pxflag = NHF_DEFAULT;
700 		break;
701 	}
702 
703 	return (pxflag);
704 }
705 
706 static int
707 get_op_flags(int nlm_flags)
708 {
709 	int op_flags = 0;
710 
711 	op_flags |= (nlm_flags & NLM_F_REPLACE) ? RTM_F_REPLACE : 0;
712 	op_flags |= (nlm_flags & NLM_F_EXCL) ? RTM_F_EXCL : 0;
713 	op_flags |= (nlm_flags & NLM_F_CREATE) ? RTM_F_CREATE : 0;
714 	op_flags |= (nlm_flags & NLM_F_APPEND) ? RTM_F_APPEND : 0;
715 
716 	return (op_flags);
717 }
718 
719 #ifdef ROUTE_MPATH
720 static int
721 create_nexthop_one(struct nl_parsed_route *attrs, struct rta_mpath_nh *mpnh,
722     struct nl_pstate *npt, struct nhop_object **pnh)
723 {
724 	int error;
725 
726 	if (mpnh->gw == NULL)
727 		return (EINVAL);
728 
729 	struct nhop_object *nh = nhop_alloc(attrs->rta_table, attrs->rtm_family);
730 	if (nh == NULL)
731 		return (ENOMEM);
732 
733 	nhop_set_gw(nh, mpnh->gw, true);
734 	if (mpnh->ifp != NULL)
735 		nhop_set_transmit_ifp(nh, mpnh->ifp);
736 	nhop_set_rtflags(nh, attrs->rta_rtflags);
737 	if (attrs->rtm_protocol > RTPROT_STATIC)
738 		nhop_set_origin(nh, attrs->rtm_protocol);
739 
740 	*pnh = finalize_nhop(nh, &error);
741 
742 	return (error);
743 }
744 #endif
745 
746 static struct nhop_object *
747 create_nexthop_from_attrs(struct nl_parsed_route *attrs,
748     struct nl_pstate *npt, int *perror)
749 {
750 	struct nhop_object *nh = NULL;
751 	int error = 0;
752 
753 	if (attrs->rta_multipath != NULL) {
754 #ifdef ROUTE_MPATH
755 		/* Multipath w/o explicit nexthops */
756 		int num_nhops = attrs->rta_multipath->num_nhops;
757 		struct weightened_nhop *wn = npt_alloc(npt, sizeof(*wn) * num_nhops);
758 
759 		for (int i = 0; i < num_nhops; i++) {
760 			struct rta_mpath_nh *mpnh = &attrs->rta_multipath->nhops[i];
761 
762 			error = create_nexthop_one(attrs, mpnh, npt, &wn[i].nh);
763 			if (error != 0) {
764 				for (int j = 0; j < i; j++)
765 					nhop_free(wn[j].nh);
766 				break;
767 			}
768 			wn[i].weight = mpnh->rtnh_weight > 0 ? mpnh->rtnh_weight : 1;
769 		}
770 		if (error == 0) {
771 			struct rib_head *rh = nhop_get_rh(wn[0].nh);
772 			struct nhgrp_object *nhg;
773 
774 			nhg = nhgrp_alloc(rh->rib_fibnum, rh->rib_family,
775 			    wn, num_nhops, perror);
776 			if (nhg != NULL) {
777 				if (attrs->rtm_protocol > RTPROT_STATIC)
778 					nhgrp_set_origin(nhg, attrs->rtm_protocol);
779 				nhg = nhgrp_get_nhgrp(nhg, perror);
780 			}
781 			for (int i = 0; i < num_nhops; i++)
782 				nhop_free(wn[i].nh);
783 			if (nhg != NULL)
784 				return ((struct nhop_object *)nhg);
785 			error = *perror;
786 		}
787 #else
788 		error = ENOTSUP;
789 #endif
790 		*perror = error;
791 	} else {
792 		nh = nhop_alloc(attrs->rta_table, attrs->rtm_family);
793 		if (nh == NULL) {
794 			*perror = ENOMEM;
795 			return (NULL);
796 		}
797 		if (attrs->rta_gw != NULL)
798 			nhop_set_gw(nh, attrs->rta_gw, true);
799 		if (attrs->rta_oif != NULL)
800 			nhop_set_transmit_ifp(nh, attrs->rta_oif);
801 		if (attrs->rtax_mtu != 0)
802 			nhop_set_mtu(nh, attrs->rtax_mtu, true);
803 		if (attrs->rta_rtflags & RTF_BROADCAST)
804 			nhop_set_broadcast(nh, true);
805 		if (attrs->rta_rtflags & RTF_BLACKHOLE)
806 			nhop_set_blackhole(nh, NHF_BLACKHOLE);
807 		if (attrs->rta_rtflags & RTF_REJECT)
808 			nhop_set_blackhole(nh, NHF_REJECT);
809 		nhop_set_rtflags(nh, attrs->rta_rtflags);
810 		if (attrs->rtm_protocol > RTPROT_STATIC)
811 			nhop_set_origin(nh, attrs->rtm_protocol);
812 		nh = finalize_nhop(nh, perror);
813 	}
814 
815 	return (nh);
816 }
817 
818 static int
819 rtnl_handle_newroute(struct nlmsghdr *hdr, struct nlpcb *nlp,
820     struct nl_pstate *npt)
821 {
822 	struct rib_cmd_info rc = {};
823 	struct nhop_object *nh = NULL;
824 	int error;
825 
826 	struct nl_parsed_route attrs = {};
827 	error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs);
828 	if (error != 0)
829 		return (error);
830 
831 	/* Check if we have enough data */
832 	if (attrs.rta_dst == NULL) {
833 		NL_LOG(LOG_DEBUG, "missing RTA_DST");
834 		return (EINVAL);
835 	}
836 
837 	if (attrs.rta_nh_id != 0) {
838 		/* Referenced uindex */
839 		int pxflag = get_pxflag(&attrs);
840 		nh = nl_find_nhop(attrs.rta_table, attrs.rtm_family, attrs.rta_nh_id,
841 		    pxflag, &error);
842 		if (error != 0)
843 			return (error);
844 	} else {
845 		nh = create_nexthop_from_attrs(&attrs, npt, &error);
846 		if (error != 0) {
847 			NL_LOG(LOG_DEBUG, "Error creating nexthop");
848 			return (error);
849 		}
850 	}
851 
852 	int weight = NH_IS_NHGRP(nh) ? 0 : RT_DEFAULT_WEIGHT;
853 	struct route_nhop_data rnd = { .rnd_nhop = nh, .rnd_weight = weight };
854 	int op_flags = get_op_flags(hdr->nlmsg_flags);
855 
856 	error = rib_add_route_px(attrs.rta_table, attrs.rta_dst, attrs.rtm_dst_len,
857 	    &rnd, op_flags, &rc);
858 	if (error == 0)
859 		report_operation(attrs.rta_table, &rc, nlp, hdr);
860 	return (error);
861 }
862 
863 static int
864 path_match_func(const struct rtentry *rt, const struct nhop_object *nh, void *_data)
865 {
866 	struct nl_parsed_route *attrs = (struct nl_parsed_route *)_data;
867 
868 	if ((attrs->rta_gw != NULL) && !rib_match_gw(rt, nh, attrs->rta_gw))
869 		return (0);
870 
871 	if ((attrs->rta_oif != NULL) && (attrs->rta_oif != nh->nh_ifp))
872 		return (0);
873 
874 	return (1);
875 }
876 
877 static int
878 rtnl_handle_delroute(struct nlmsghdr *hdr, struct nlpcb *nlp,
879     struct nl_pstate *npt)
880 {
881 	struct rib_cmd_info rc;
882 	int error;
883 
884 	struct nl_parsed_route attrs = {};
885 	error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs);
886 	if (error != 0)
887 		return (error);
888 
889 	if (attrs.rta_dst == NULL) {
890 		NLMSG_REPORT_ERR_MSG(npt, "RTA_DST is not set");
891 		return (ESRCH);
892 	}
893 
894 	error = rib_del_route_px(attrs.rta_table, attrs.rta_dst,
895 	    attrs.rtm_dst_len, path_match_func, &attrs, 0, &rc);
896 	if (error == 0)
897 		report_operation(attrs.rta_table, &rc, nlp, hdr);
898 	return (error);
899 }
900 
901 static int
902 rtnl_handle_getroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
903 {
904 	int error;
905 
906 	struct nl_parsed_route attrs = {};
907 	error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs);
908 	if (error != 0)
909 		return (error);
910 
911 	if (hdr->nlmsg_flags & NLM_F_DUMP)
912 		error = handle_rtm_dump(nlp, attrs.rta_table, attrs.rtm_family, hdr, npt->nw);
913 	else
914 		error = handle_rtm_getroute(nlp, &attrs, hdr, npt);
915 
916 	return (error);
917 }
918 
919 void
920 rtnl_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc)
921 {
922 	struct nl_writer nw = {};
923 	int family, nlm_flags = 0;
924 
925 	family = rt_get_family(rc->rc_rt);
926 
927 	/* XXX: check if there are active listeners first */
928 
929 	/* TODO: consider passing PID/type/seq */
930 	switch (rc->rc_cmd) {
931 	case RTM_ADD:
932 		nlm_flags = NLM_F_EXCL | NLM_F_CREATE;
933 		break;
934 	case RTM_CHANGE:
935 		nlm_flags = NLM_F_REPLACE;
936 		break;
937 	case RTM_DELETE:
938 		nlm_flags = 0;
939 		break;
940 	}
941 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
942 		char rtbuf[NHOP_PRINT_BUFSIZE] __unused;
943 		FIB_LOG(LOG_DEBUG2, fibnum, family,
944 		    "received event %s for %s / nlm_flags=%X",
945 		    rib_print_cmd(rc->rc_cmd),
946 		    rt_print_buf(rc->rc_rt, rtbuf, sizeof(rtbuf)),
947 		    nlm_flags);
948 	}
949 
950 	struct nlmsghdr hdr = {
951 		.nlmsg_flags = nlm_flags,
952 		.nlmsg_type = get_rtmsg_type_from_rtsock(rc->rc_cmd),
953 	};
954 
955 	struct route_nhop_data rnd = {
956 		.rnd_nhop = rc_get_nhop(rc),
957 		.rnd_weight = rc->rc_nh_weight,
958 	};
959 
960 	uint32_t group_id = family_to_group(family);
961 	if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) {
962 		NL_LOG(LOG_DEBUG, "error allocating event buffer");
963 		return;
964 	}
965 
966 	dump_px(fibnum, &hdr, rc->rc_rt, &rnd, &nw);
967 	nlmsg_flush(&nw);
968 }
969 
970 static const struct rtnl_cmd_handler cmd_handlers[] = {
971 	{
972 		.cmd = NL_RTM_GETROUTE,
973 		.name = "RTM_GETROUTE",
974 		.cb = &rtnl_handle_getroute,
975 	},
976 	{
977 		.cmd = NL_RTM_DELROUTE,
978 		.name = "RTM_DELROUTE",
979 		.cb = &rtnl_handle_delroute,
980 		.priv = PRIV_NET_ROUTE,
981 	},
982 	{
983 		.cmd = NL_RTM_NEWROUTE,
984 		.name = "RTM_NEWROUTE",
985 		.cb = &rtnl_handle_newroute,
986 		.priv = PRIV_NET_ROUTE,
987 	}
988 };
989 
990 static const struct nlhdr_parser *all_parsers[] = {&mpath_parser, &metrics_parser, &rtm_parser};
991 
992 void
993 rtnl_routes_init(void)
994 {
995 	NL_VERIFY_PARSERS(all_parsers);
996 	rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
997 }
998