xref: /freebsd/sys/netlink/route/iface.c (revision 2e3507c25e42292b45a5482e116d278f5515d04d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 #include <sys/types.h>
32 #include <sys/eventhandler.h>
33 #include <sys/kernel.h>
34 #include <sys/jail.h>
35 #include <sys/malloc.h>
36 #include <sys/socket.h>
37 #include <sys/sockio.h>
38 #include <sys/syslog.h>
39 
40 #include <net/if.h>
41 #include <net/if_dl.h>
42 #include <net/if_media.h>
43 #include <net/if_var.h>
44 #include <net/if_clone.h>
45 #include <net/route.h>
46 #include <net/route/nhop.h>
47 #include <net/route/route_ctl.h>
48 #include <netinet/in_var.h>
49 #include <netinet6/in6_var.h>
50 #include <netinet6/scope6_var.h> /* scope deembedding */
51 #include <netlink/netlink.h>
52 #include <netlink/netlink_ctl.h>
53 #include <netlink/netlink_route.h>
54 #include <netlink/route/route_var.h>
55 
56 #define	DEBUG_MOD_NAME	nl_iface
57 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
58 #include <netlink/netlink_debug.h>
59 _DECLARE_DEBUG(LOG_INFO);
60 
61 struct netlink_walkargs {
62 	struct nl_writer *nw;
63 	struct nlmsghdr hdr;
64 	struct nlpcb *so;
65 	struct ucred *cred;
66 	uint32_t fibnum;
67 	int family;
68 	int error;
69 	int count;
70 	int dumped;
71 };
72 
73 static eventhandler_tag ifdetach_event, ifattach_event, iflink_event, ifaddr_event;
74 
75 static SLIST_HEAD(, nl_cloner) nl_cloners = SLIST_HEAD_INITIALIZER(nl_cloners);
76 
77 static struct sx rtnl_cloner_lock;
78 SX_SYSINIT(rtnl_cloner_lock, &rtnl_cloner_lock, "rtnl cloner lock");
79 
80 /* These are external hooks for CARP. */
81 extern int	(*carp_get_vhid_p)(struct ifaddr *);
82 
83 /*
84  * RTM_GETLINK request
85  * sendto(3, {{len=32, type=RTM_GETLINK, flags=NLM_F_REQUEST|NLM_F_DUMP, seq=1641940952, pid=0},
86  *  {ifi_family=AF_INET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}, 32, 0, NULL, 0) = 32
87  *
88  * Reply:
89  * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_ETHER, ifi_index=if_nametoindex("enp0s31f6"), ifi_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST|IFF_LOWER_UP, ifi_change=0},
90 {{nla_len=10, nla_type=IFLA_ADDRESS}, "\xfe\x54\x00\x52\x3e\x90"}
91 
92 [
93 {{nla_len=14, nla_type=IFLA_IFNAME}, "enp0s31f6"},
94 {{nla_len=8, nla_type=IFLA_TXQLEN}, 1000},
95 {{nla_len=5, nla_type=IFLA_OPERSTATE}, 6},
96 {{nla_len=5, nla_type=IFLA_LINKMODE}, 0},
97 {{nla_len=8, nla_type=IFLA_MTU}, 1500},
98 {{nla_len=8, nla_type=IFLA_MIN_MTU}, 68},
99  {{nla_len=8, nla_type=IFLA_MAX_MTU}, 9000},
100 {{nla_len=8, nla_type=IFLA_GROUP}, 0},
101 {{nla_len=8, nla_type=IFLA_PROMISCUITY}, 0},
102 {{nla_len=8, nla_type=IFLA_NUM_TX_QUEUES}, 1},
103 {{nla_len=8, nla_type=IFLA_GSO_MAX_SEGS}, 65535},
104 {{nla_len=8, nla_type=IFLA_GSO_MAX_SIZE}, 65536},
105 {{nla_len=8, nla_type=IFLA_NUM_RX_QUEUES}, 1},
106 {{nla_len=5, nla_type=IFLA_CARRIER}, 1},
107 {{nla_len=13, nla_type=IFLA_QDISC}, "fq_codel"},
108 {{nla_len=8, nla_type=IFLA_CARRIER_CHANGES}, 2},
109 {{nla_len=5, nla_type=IFLA_PROTO_DOWN}, 0},
110 {{nla_len=8, nla_type=IFLA_CARRIER_UP_COUNT}, 1},
111 {{nla_len=8, nla_type=IFLA_CARRIER_DOWN_COUNT}, 1},
112  */
113 
114 struct if_state {
115 	uint8_t		ifla_operstate;
116 	uint8_t		ifla_carrier;
117 };
118 
119 static void
120 get_operstate_ether(if_t ifp, struct if_state *pstate)
121 {
122 	struct ifmediareq ifmr = {};
123 	int error;
124 	error = if_ioctl(ifp, SIOCGIFMEDIA, (void *)&ifmr);
125 
126 	if (error != 0) {
127 		NL_LOG(LOG_DEBUG, "error calling SIOCGIFMEDIA on %s: %d",
128 		    if_name(ifp), error);
129 		return;
130 	}
131 
132 	switch (IFM_TYPE(ifmr.ifm_active)) {
133 	case IFM_ETHER:
134 		if (ifmr.ifm_status & IFM_ACTIVE) {
135 			pstate->ifla_carrier = 1;
136 			if (if_getflags(ifp) & IFF_MONITOR)
137 				pstate->ifla_operstate = IF_OPER_DORMANT;
138 			else
139 				pstate->ifla_operstate = IF_OPER_UP;
140 		} else
141 			pstate->ifla_operstate = IF_OPER_DOWN;
142 	}
143 }
144 
145 static bool
146 get_stats(struct nl_writer *nw, if_t ifp)
147 {
148 	struct rtnl_link_stats64 *stats;
149 
150 	int nla_len = sizeof(struct nlattr) + sizeof(*stats);
151 	struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
152 	if (nla == NULL)
153 		return (false);
154 	nla->nla_type = IFLA_STATS64;
155 	nla->nla_len = nla_len;
156 	stats = (struct rtnl_link_stats64 *)(nla + 1);
157 
158 	stats->rx_packets = if_getcounter(ifp, IFCOUNTER_IPACKETS);
159 	stats->tx_packets = if_getcounter(ifp, IFCOUNTER_OPACKETS);
160 	stats->rx_bytes = if_getcounter(ifp, IFCOUNTER_IBYTES);
161 	stats->tx_bytes = if_getcounter(ifp, IFCOUNTER_OBYTES);
162 	stats->rx_errors = if_getcounter(ifp, IFCOUNTER_IERRORS);
163 	stats->tx_errors = if_getcounter(ifp, IFCOUNTER_OERRORS);
164 	stats->rx_dropped = if_getcounter(ifp, IFCOUNTER_IQDROPS);
165 	stats->tx_dropped = if_getcounter(ifp, IFCOUNTER_OQDROPS);
166 	stats->multicast = if_getcounter(ifp, IFCOUNTER_IMCASTS);
167 	stats->rx_nohandler = if_getcounter(ifp, IFCOUNTER_NOPROTO);
168 
169 	return (true);
170 }
171 
172 static void
173 get_operstate(if_t ifp, struct if_state *pstate)
174 {
175 	pstate->ifla_operstate = IF_OPER_UNKNOWN;
176 	pstate->ifla_carrier = 0; /* no carrier */
177 
178 	switch (if_gettype(ifp)) {
179 	case IFT_ETHER:
180 	case IFT_L2VLAN:
181 		get_operstate_ether(ifp, pstate);
182 		break;
183 	default:
184 		/* Map admin state to the operstate */
185 		if (if_getflags(ifp) & IFF_UP) {
186 			pstate->ifla_operstate = IF_OPER_UP;
187 			pstate->ifla_carrier = 1;
188 		} else
189 			pstate->ifla_operstate = IF_OPER_DOWN;
190 		break;
191 	}
192 }
193 
194 static void
195 get_hwaddr(struct nl_writer *nw, if_t ifp)
196 {
197 	struct ifreq ifr = {};
198 
199 	if (if_gethwaddr(ifp, &ifr) == 0) {
200 		nlattr_add(nw, IFLAF_ORIG_HWADDR, if_getaddrlen(ifp),
201 		    ifr.ifr_addr.sa_data);
202 	}
203 }
204 
205 static unsigned
206 ifp_flags_to_netlink(const if_t ifp)
207 {
208         return (if_getflags(ifp) | if_getdrvflags(ifp));
209 }
210 
211 #define LLADDR_CONST(s) ((const void *)((s)->sdl_data + (s)->sdl_nlen))
212 static bool
213 dump_sa(struct nl_writer *nw, int attr, const struct sockaddr *sa)
214 {
215         uint32_t addr_len = 0;
216         const void *addr_data = NULL;
217 #ifdef INET6
218         struct in6_addr addr6;
219 #endif
220 
221         if (sa == NULL)
222                 return (true);
223 
224         switch (sa->sa_family) {
225 #ifdef INET
226         case AF_INET:
227                 addr_len = sizeof(struct in_addr);
228                 addr_data = &((const struct sockaddr_in *)sa)->sin_addr;
229                 break;
230 #endif
231 #ifdef INET6
232         case AF_INET6:
233                 in6_splitscope(&((const struct sockaddr_in6 *)sa)->sin6_addr, &addr6, &addr_len);
234                 addr_len = sizeof(struct in6_addr);
235                 addr_data = &addr6;
236                 break;
237 #endif
238         case AF_LINK:
239                 addr_len = ((const struct sockaddr_dl *)sa)->sdl_alen;
240                 addr_data = LLADDR_CONST((const struct sockaddr_dl *)sa);
241                 break;
242 	case AF_UNSPEC:
243 		/* Ignore empty SAs without warning */
244 		return (true);
245         default:
246                 NL_LOG(LOG_DEBUG2, "unsupported family: %d, skipping", sa->sa_family);
247                 return (true);
248         }
249 
250         return (nlattr_add(nw, attr, addr_len, addr_data));
251 }
252 
253 static bool
254 dump_iface_caps(struct nl_writer *nw, struct ifnet *ifp)
255 {
256 	int off = nlattr_add_nested(nw, IFLAF_CAPS);
257 	uint32_t active_caps[roundup2(IFCAP_B_SIZE, 32) / 32] = {};
258 	uint32_t all_caps[roundup2(IFCAP_B_SIZE, 32) / 32] = {};
259 
260 	MPASS(sizeof(active_caps) >= 8);
261 	MPASS(sizeof(all_caps) >= 8);
262 
263 	if (off == 0)
264 		return (false);
265 
266 	active_caps[0] = (uint32_t)if_getcapabilities(ifp);
267 	all_caps[0] = (uint32_t)if_getcapenable(ifp);
268 	active_caps[1] = (uint32_t)if_getcapabilities2(ifp);
269 	all_caps[1] = (uint32_t)if_getcapenable2(ifp);
270 
271 	nlattr_add_u32(nw, NLA_BITSET_SIZE, IFCAP_B_SIZE);
272 	nlattr_add(nw, NLA_BITSET_MASK, sizeof(all_caps), all_caps);
273 	nlattr_add(nw, NLA_BITSET_VALUE, sizeof(active_caps), active_caps);
274 
275 	nlattr_set_len(nw, off);
276 
277 	return (true);
278 }
279 
280 /*
281  * Dumps interface state, properties and metrics.
282  * @nw: message writer
283  * @ifp: target interface
284  * @hdr: template header
285  * @if_flags_mask: changed if_[drv]_flags bitmask
286  *
287  * This function is called without epoch and MAY sleep.
288  */
289 static bool
290 dump_iface(struct nl_writer *nw, if_t ifp, const struct nlmsghdr *hdr,
291     int if_flags_mask)
292 {
293 	struct epoch_tracker et;
294         struct ifinfomsg *ifinfo;
295 
296         NL_LOG(LOG_DEBUG3, "dumping interface %s data", if_name(ifp));
297 
298 	if (!nlmsg_reply(nw, hdr, sizeof(struct ifinfomsg)))
299 		goto enomem;
300 
301         ifinfo = nlmsg_reserve_object(nw, struct ifinfomsg);
302         ifinfo->ifi_family = AF_UNSPEC;
303         ifinfo->__ifi_pad = 0;
304         ifinfo->ifi_type = if_gettype(ifp);
305         ifinfo->ifi_index = if_getindex(ifp);
306         ifinfo->ifi_flags = ifp_flags_to_netlink(ifp);
307         ifinfo->ifi_change = if_flags_mask;
308 
309 	struct if_state ifs = {};
310 	get_operstate(ifp, &ifs);
311 
312 	if (ifs.ifla_operstate == IF_OPER_UP)
313 		ifinfo->ifi_flags |= IFF_LOWER_UP;
314 
315         nlattr_add_string(nw, IFLA_IFNAME, if_name(ifp));
316         nlattr_add_u8(nw, IFLA_OPERSTATE, ifs.ifla_operstate);
317         nlattr_add_u8(nw, IFLA_CARRIER, ifs.ifla_carrier);
318 
319 /*
320         nlattr_add_u8(nw, IFLA_PROTO_DOWN, val);
321         nlattr_add_u8(nw, IFLA_LINKMODE, val);
322 */
323 	if (if_getaddrlen(ifp) != 0) {
324 		struct ifaddr *ifa;
325 
326 		NET_EPOCH_ENTER(et);
327 		ifa = CK_STAILQ_FIRST(&ifp->if_addrhead);
328 		if (ifa != NULL)
329 			dump_sa(nw, IFLA_ADDRESS, ifa->ifa_addr);
330 		NET_EPOCH_EXIT(et);
331 	}
332 
333         if ((if_getbroadcastaddr(ifp) != NULL)) {
334 		nlattr_add(nw, IFLA_BROADCAST, if_getaddrlen(ifp),
335 		    if_getbroadcastaddr(ifp));
336         }
337 
338         nlattr_add_u32(nw, IFLA_MTU, if_getmtu(ifp));
339 /*
340         nlattr_add_u32(nw, IFLA_MIN_MTU, 60);
341         nlattr_add_u32(nw, IFLA_MAX_MTU, 9000);
342         nlattr_add_u32(nw, IFLA_GROUP, 0);
343 */
344 
345 	if (if_getdescr(ifp) != NULL)
346 		nlattr_add_string(nw, IFLA_IFALIAS, if_getdescr(ifp));
347 
348 	/* Store FreeBSD-specific attributes */
349 	int off = nlattr_add_nested(nw, IFLA_FREEBSD);
350 	if (off != 0) {
351 		get_hwaddr(nw, ifp);
352 		dump_iface_caps(nw, ifp);
353 
354 		nlattr_set_len(nw, off);
355 	}
356 
357 	get_stats(nw, ifp);
358 
359 	uint32_t val = (if_getflags(ifp) & IFF_PROMISC) != 0;
360         nlattr_add_u32(nw, IFLA_PROMISCUITY, val);
361 
362 	ifc_dump_ifp_nl(ifp, nw);
363 
364         if (nlmsg_end(nw))
365 		return (true);
366 
367 enomem:
368         NL_LOG(LOG_DEBUG, "unable to dump interface %s state (ENOMEM)", if_name(ifp));
369         nlmsg_abort(nw);
370         return (false);
371 }
372 
373 static bool
374 check_ifmsg(void *hdr, struct nl_pstate *npt)
375 {
376 	struct ifinfomsg *ifm = hdr;
377 
378 	if (ifm->__ifi_pad != 0 || ifm->ifi_type != 0 ||
379 	    ifm->ifi_flags != 0 || ifm->ifi_change != 0) {
380 		nlmsg_report_err_msg(npt,
381 		    "strict checking: non-zero values in ifinfomsg header");
382 		return (false);
383 	}
384 
385 	return (true);
386 }
387 
388 #define	_IN(_field)	offsetof(struct ifinfomsg, _field)
389 #define	_OUT(_field)	offsetof(struct nl_parsed_link, _field)
390 static const struct nlfield_parser nlf_p_if[] = {
391 	{ .off_in = _IN(ifi_type), .off_out = _OUT(ifi_type), .cb = nlf_get_u16 },
392 	{ .off_in = _IN(ifi_index), .off_out = _OUT(ifi_index), .cb = nlf_get_u32 },
393 	{ .off_in = _IN(ifi_flags), .off_out = _OUT(ifi_flags), .cb = nlf_get_u32 },
394 	{ .off_in = _IN(ifi_change), .off_out = _OUT(ifi_change), .cb = nlf_get_u32 },
395 };
396 
397 static const struct nlattr_parser nla_p_linfo[] = {
398 	{ .type = IFLA_INFO_KIND, .off = _OUT(ifla_cloner), .cb = nlattr_get_stringn },
399 	{ .type = IFLA_INFO_DATA, .off = _OUT(ifla_idata), .cb = nlattr_get_nla },
400 };
401 NL_DECLARE_ATTR_PARSER(linfo_parser, nla_p_linfo);
402 
403 static const struct nlattr_parser nla_p_if[] = {
404 	{ .type = IFLA_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
405 	{ .type = IFLA_MTU, .off = _OUT(ifla_mtu), .cb = nlattr_get_uint32 },
406 	{ .type = IFLA_LINK, .off = _OUT(ifla_link), .cb = nlattr_get_uint32 },
407 	{ .type = IFLA_LINKINFO, .arg = &linfo_parser, .cb = nlattr_get_nested },
408 	{ .type = IFLA_IFALIAS, .off = _OUT(ifla_ifalias), .cb = nlattr_get_string },
409 	{ .type = IFLA_GROUP, .off = _OUT(ifla_group), .cb = nlattr_get_string },
410 	{ .type = IFLA_ALT_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
411 };
412 #undef _IN
413 #undef _OUT
414 NL_DECLARE_STRICT_PARSER(ifmsg_parser, struct ifinfomsg, check_ifmsg, nlf_p_if, nla_p_if);
415 
416 static bool
417 match_iface(if_t ifp, void *_arg)
418 {
419 	struct nl_parsed_link *attrs = (struct nl_parsed_link *)_arg;
420 
421 	if (attrs->ifi_index != 0 && attrs->ifi_index != if_getindex(ifp))
422 		return (false);
423 	if (attrs->ifi_type != 0 && attrs->ifi_index != if_gettype(ifp))
424 		return (false);
425 	if (attrs->ifla_ifname != NULL && strcmp(attrs->ifla_ifname, if_name(ifp)))
426 		return (false);
427 	/* TODO: add group match */
428 
429 	return (true);
430 }
431 
432 static int
433 dump_cb(if_t ifp, void *_arg)
434 {
435 	struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg;
436 	if (!dump_iface(wa->nw, ifp, &wa->hdr, 0))
437 		return (ENOMEM);
438 	return (0);
439 }
440 
441 /*
442  * {nlmsg_len=52, nlmsg_type=RTM_GETLINK, nlmsg_flags=NLM_F_REQUEST, nlmsg_seq=1662842818, nlmsg_pid=0},
443  *  {ifi_family=AF_PACKET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
444  *   [
445  *    [{nla_len=10, nla_type=IFLA_IFNAME}, "vnet9"],
446  *    [{nla_len=8, nla_type=IFLA_EXT_MASK}, RTEXT_FILTER_VF]
447  *   ]
448  */
449 static int
450 rtnl_handle_getlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
451 {
452 	struct epoch_tracker et;
453         if_t ifp;
454 	int error = 0;
455 
456 	struct nl_parsed_link attrs = {};
457 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
458 	if (error != 0)
459 		return (error);
460 
461 	struct netlink_walkargs wa = {
462 		.so = nlp,
463 		.nw = npt->nw,
464 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
465 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
466 		.hdr.nlmsg_flags = hdr->nlmsg_flags,
467 		.hdr.nlmsg_type = NL_RTM_NEWLINK,
468 	};
469 
470 	/* Fast track for an interface w/ explicit name or index match */
471 	if ((attrs.ifi_index != 0) || (attrs.ifla_ifname != NULL)) {
472 		if (attrs.ifi_index != 0) {
473 			NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching index %u",
474 			    attrs.ifi_index);
475 			NET_EPOCH_ENTER(et);
476 			ifp = ifnet_byindex_ref(attrs.ifi_index);
477 			NET_EPOCH_EXIT(et);
478 		} else {
479 			NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching name %s",
480 			    attrs.ifla_ifname);
481 			ifp = ifunit_ref(attrs.ifla_ifname);
482 		}
483 
484 		if (ifp != NULL) {
485 			if (match_iface(ifp, &attrs)) {
486 				if (!dump_iface(wa.nw, ifp, &wa.hdr, 0))
487 					error = ENOMEM;
488 			} else
489 				error = ENODEV;
490 			if_rele(ifp);
491 		} else
492 			error = ENODEV;
493 		return (error);
494 	}
495 
496 	/* Always treat non-direct-match as a multipart message */
497 	wa.hdr.nlmsg_flags |= NLM_F_MULTI;
498 
499 	/*
500 	 * Fetching some link properties require performing ioctl's that may be blocking.
501 	 * Address it by saving referenced pointers of the matching links,
502 	 * exiting from epoch and going through the list one-by-one.
503 	 */
504 
505 	NL_LOG(LOG_DEBUG2, "Start dump");
506 	if_foreach_sleep(match_iface, &attrs, dump_cb, &wa);
507 	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
508 
509 	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
510                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
511                 return (ENOMEM);
512         }
513 
514 	return (error);
515 }
516 
517 /*
518  * sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[
519  * {nlmsg_len=60, nlmsg_type=RTM_NEWLINK, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1662715618, nlmsg_pid=0},
520  *  {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
521  *   {nla_len=11, nla_type=IFLA_IFNAME}, "dummy0"],
522  *   [
523  *    {nla_len=16, nla_type=IFLA_LINKINFO},
524  *     [
525  *      {nla_len=9, nla_type=IFLA_INFO_KIND}, "dummy"...
526  *     ]
527  *    ]
528  */
529 
530 static int
531 rtnl_handle_dellink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
532 {
533 	struct epoch_tracker et;
534         if_t ifp;
535 	int error;
536 
537 	struct nl_parsed_link attrs = {};
538 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
539 	if (error != 0)
540 		return (error);
541 
542 	NET_EPOCH_ENTER(et);
543 	ifp = ifnet_byindex_ref(attrs.ifi_index);
544 	NET_EPOCH_EXIT(et);
545 	if (ifp == NULL) {
546 		NLP_LOG(LOG_DEBUG, nlp, "unable to find interface %u", attrs.ifi_index);
547 		return (ENOENT);
548 	}
549 	NLP_LOG(LOG_DEBUG3, nlp, "mapped ifindex %u to %s", attrs.ifi_index, if_name(ifp));
550 
551 	sx_xlock(&ifnet_detach_sxlock);
552 	error = if_clone_destroy(if_name(ifp));
553 	sx_xunlock(&ifnet_detach_sxlock);
554 
555 	NLP_LOG(LOG_DEBUG2, nlp, "deleting interface %s returned %d", if_name(ifp), error);
556 
557 	if_rele(ifp);
558 	return (error);
559 }
560 
561 /*
562  * New link:
563  * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1668185590, pid=0},
564  *   {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}
565  *    [
566  *     {{nla_len=8, nla_type=IFLA_MTU}, 123},
567  *     {{nla_len=10, nla_type=IFLA_IFNAME}, "vlan1"},
568  *     {{nla_len=24, nla_type=IFLA_LINKINFO},
569  *      [
570  *       {{nla_len=8, nla_type=IFLA_INFO_KIND}, "vlan"...},
571  *       {{nla_len=12, nla_type=IFLA_INFO_DATA}, "\x06\x00\x01\x00\x7b\x00\x00\x00"}]}]}
572  *
573  * Update link:
574  * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=1668185923, pid=0},
575  * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=if_nametoindex("lo"), ifi_flags=0, ifi_change=0},
576  * {{nla_len=8, nla_type=IFLA_MTU}, 123}}
577  *
578  *
579  * Check command availability:
580  * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=0, pid=0},
581  *  {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}
582  */
583 
584 
585 static int
586 create_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs,
587     struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt)
588 {
589 	if (lattrs->ifla_ifname == NULL || strlen(lattrs->ifla_ifname) == 0) {
590 		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_IFNAME attribute");
591 		return (EINVAL);
592 	}
593 	if (lattrs->ifla_cloner == NULL || strlen(lattrs->ifla_cloner) == 0) {
594 		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_INFO_KIND attribute");
595 		return (EINVAL);
596 	}
597 
598 	struct ifc_data_nl ifd = {
599 		.flags = IFC_F_CREATE,
600 		.lattrs = lattrs,
601 		.bm = bm,
602 		.npt = npt,
603 	};
604 	if (ifc_create_ifp_nl(lattrs->ifla_ifname, &ifd) && ifd.error == 0)
605 		nl_store_ifp_cookie(npt, ifd.ifp);
606 
607 	return (ifd.error);
608 }
609 
610 static int
611 modify_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs,
612     struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt)
613 {
614 	if_t ifp = NULL;
615 	struct epoch_tracker et;
616 
617 	if (lattrs->ifi_index == 0 && lattrs->ifla_ifname == NULL) {
618 		/*
619 		 * Applications like ip(8) verify RTM_NEWLINK command
620 		 * existence by calling it with empty arguments. Always
621 		 * return "innocent" error in that case.
622 		 */
623 		NLMSG_REPORT_ERR_MSG(npt, "empty ifi_index field");
624 		return (EPERM);
625 	}
626 
627 	if (lattrs->ifi_index != 0) {
628 		NET_EPOCH_ENTER(et);
629 		ifp = ifnet_byindex_ref(lattrs->ifi_index);
630 		NET_EPOCH_EXIT(et);
631 		if (ifp == NULL) {
632 			NLMSG_REPORT_ERR_MSG(npt, "unable to find interface #%u",
633 			    lattrs->ifi_index);
634 			return (ENOENT);
635 		}
636 	}
637 
638 	if (ifp == NULL && lattrs->ifla_ifname != NULL) {
639 		ifp = ifunit_ref(lattrs->ifla_ifname);
640 		if (ifp == NULL) {
641 			NLMSG_REPORT_ERR_MSG(npt, "unable to find interface %s",
642 			    lattrs->ifla_ifname);
643 			return (ENOENT);
644 		}
645 	}
646 
647 	MPASS(ifp != NULL);
648 
649 	/*
650 	 * Modification request can address either
651 	 * 1) cloned interface, in which case we call the cloner-specific
652 	 *  modification routine
653 	 * or
654 	 * 2) non-cloned (e.g. "physical") interface, in which case we call
655 	 *  generic modification routine
656 	 */
657 	struct ifc_data_nl ifd = { .lattrs = lattrs, .bm = bm, .npt = npt };
658 	if (!ifc_modify_ifp_nl(ifp, &ifd))
659 		ifd.error = nl_modify_ifp_generic(ifp, lattrs, bm, npt);
660 
661 	if_rele(ifp);
662 
663 	return (ifd.error);
664 }
665 
666 
667 static int
668 rtnl_handle_newlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
669 {
670 	struct nlattr_bmask bm;
671 	int error;
672 
673 	struct nl_parsed_link attrs = {};
674 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
675 	if (error != 0)
676 		return (error);
677 	nl_get_attrs_bmask_nlmsg(hdr, &ifmsg_parser, &bm);
678 
679 	if (hdr->nlmsg_flags & NLM_F_CREATE)
680 		return (create_link(hdr, &attrs, &bm, nlp, npt));
681 	else
682 		return (modify_link(hdr, &attrs, &bm, nlp, npt));
683 }
684 
685 static void
686 set_scope6(struct sockaddr *sa, uint32_t ifindex)
687 {
688 #ifdef INET6
689 	if (sa != NULL && sa->sa_family == AF_INET6) {
690 		struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa;
691 
692 		if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr))
693 			in6_set_unicast_scopeid(&sa6->sin6_addr, ifindex);
694 	}
695 #endif
696 }
697 
698 static bool
699 check_sa_family(const struct sockaddr *sa, int family, const char *attr_name,
700     struct nl_pstate *npt)
701 {
702 	if (sa == NULL || sa->sa_family == family)
703 		return (true);
704 
705 	nlmsg_report_err_msg(npt, "wrong family for %s attribute: %d != %d",
706 	    attr_name, family, sa->sa_family);
707 	return (false);
708 }
709 
710 struct nl_parsed_ifa {
711 	uint8_t			ifa_family;
712 	uint8_t			ifa_prefixlen;
713 	uint8_t			ifa_scope;
714 	uint32_t		ifa_index;
715 	uint32_t		ifa_flags;
716 	uint32_t		ifaf_vhid;
717 	uint32_t		ifaf_flags;
718 	struct sockaddr		*ifa_address;
719 	struct sockaddr		*ifa_local;
720 	struct sockaddr		*ifa_broadcast;
721 	struct ifa_cacheinfo	*ifa_cacheinfo;
722 	struct sockaddr		*f_ifa_addr;
723 	struct sockaddr		*f_ifa_dst;
724 };
725 
726 static int
727 nlattr_get_cinfo(struct nlattr *nla, struct nl_pstate *npt,
728     const void *arg __unused, void *target)
729 {
730 	if (__predict_false(NLA_DATA_LEN(nla) != sizeof(struct ifa_cacheinfo))) {
731 		NLMSG_REPORT_ERR_MSG(npt, "nla type %d size(%u) is not ifa_cacheinfo",
732 		    nla->nla_type, NLA_DATA_LEN(nla));
733 		return (EINVAL);
734 	}
735 	*((struct ifa_cacheinfo **)target) = (struct ifa_cacheinfo *)NL_RTA_DATA(nla);
736 	return (0);
737 }
738 
739 #define	_IN(_field)	offsetof(struct ifaddrmsg, _field)
740 #define	_OUT(_field)	offsetof(struct nl_parsed_ifa, _field)
741 static const struct nlfield_parser nlf_p_ifa[] = {
742 	{ .off_in = _IN(ifa_family), .off_out = _OUT(ifa_family), .cb = nlf_get_u8 },
743 	{ .off_in = _IN(ifa_prefixlen), .off_out = _OUT(ifa_prefixlen), .cb = nlf_get_u8 },
744 	{ .off_in = _IN(ifa_scope), .off_out = _OUT(ifa_scope), .cb = nlf_get_u8 },
745 	{ .off_in = _IN(ifa_flags), .off_out = _OUT(ifa_flags), .cb = nlf_get_u8_u32 },
746 	{ .off_in = _IN(ifa_index), .off_out = _OUT(ifa_index), .cb = nlf_get_u32 },
747 };
748 
749 static const struct nlattr_parser nla_p_ifa_fbsd[] = {
750 	{ .type = IFAF_VHID, .off = _OUT(ifaf_vhid), .cb = nlattr_get_uint32 },
751 	{ .type = IFAF_FLAGS, .off = _OUT(ifaf_flags), .cb = nlattr_get_uint32 },
752 };
753 NL_DECLARE_ATTR_PARSER(ifa_fbsd_parser, nla_p_ifa_fbsd);
754 
755 static const struct nlattr_parser nla_p_ifa[] = {
756 	{ .type = IFA_ADDRESS, .off = _OUT(ifa_address), .cb = nlattr_get_ip },
757 	{ .type = IFA_LOCAL, .off = _OUT(ifa_local), .cb = nlattr_get_ip },
758 	{ .type = IFA_BROADCAST, .off = _OUT(ifa_broadcast), .cb = nlattr_get_ip },
759 	{ .type = IFA_CACHEINFO, .off = _OUT(ifa_cacheinfo), .cb = nlattr_get_cinfo },
760 	{ .type = IFA_FLAGS, .off = _OUT(ifa_flags), .cb = nlattr_get_uint32 },
761 	{ .type = IFA_FREEBSD, .arg = &ifa_fbsd_parser, .cb = nlattr_get_nested },
762 };
763 #undef _IN
764 #undef _OUT
765 
766 static bool
767 post_p_ifa(void *_attrs, struct nl_pstate *npt)
768 {
769 	struct nl_parsed_ifa *attrs = (struct nl_parsed_ifa *)_attrs;
770 
771 	if (!check_sa_family(attrs->ifa_address, attrs->ifa_family, "IFA_ADDRESS", npt))
772 		return (false);
773 	if (!check_sa_family(attrs->ifa_local, attrs->ifa_family, "IFA_LOCAL", npt))
774 		return (false);
775 	if (!check_sa_family(attrs->ifa_broadcast, attrs->ifa_family, "IFA_BROADADDR", npt))
776 		return (false);
777 
778 	set_scope6(attrs->ifa_address, attrs->ifa_index);
779 	set_scope6(attrs->ifa_local, attrs->ifa_index);
780 
781 	return (true);
782 }
783 
784 NL_DECLARE_PARSER_EXT(ifa_parser, struct ifaddrmsg, NULL, nlf_p_ifa, nla_p_ifa, post_p_ifa);
785 
786 
787 /*
788 
789 {ifa_family=AF_INET, ifa_prefixlen=8, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_HOST, ifa_index=if_nametoindex("lo")},
790  [
791         {{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("127.0.0.1")},
792         {{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("127.0.0.1")},
793         {{nla_len=7, nla_type=IFA_LABEL}, "lo"},
794         {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT},
795         {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=3619, tstamp=3619}}]},
796 ---
797 
798 {{len=72, type=RTM_NEWADDR, flags=NLM_F_MULTI, seq=1642191126, pid=566735},
799  {ifa_family=AF_INET6, ifa_prefixlen=96, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_UNIVERSE, ifa_index=if_nametoindex("virbr0")},
800    [
801     {{nla_len=20, nla_type=IFA_ADDRESS}, inet_pton(AF_INET6, "2a01:4f8:13a:70c:ffff::1")},
802    {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=4283, tstamp=4283}},
803    {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}]},
804 */
805 
806 static uint8_t
807 ifa_get_scope(const struct ifaddr *ifa)
808 {
809         const struct sockaddr *sa;
810         uint8_t addr_scope = RT_SCOPE_UNIVERSE;
811 
812         sa = ifa->ifa_addr;
813         switch (sa->sa_family) {
814 #ifdef INET
815         case AF_INET:
816                 {
817                         struct in_addr addr;
818                         addr = ((const struct sockaddr_in *)sa)->sin_addr;
819                         if (IN_LOOPBACK(addr.s_addr))
820                                 addr_scope = RT_SCOPE_HOST;
821                         else if (IN_LINKLOCAL(addr.s_addr))
822                                 addr_scope = RT_SCOPE_LINK;
823                         break;
824                 }
825 #endif
826 #ifdef INET6
827         case AF_INET6:
828                 {
829                         const struct in6_addr *addr;
830                         addr = &((const struct sockaddr_in6 *)sa)->sin6_addr;
831                         if (IN6_IS_ADDR_LOOPBACK(addr))
832                                 addr_scope = RT_SCOPE_HOST;
833                         else if (IN6_IS_ADDR_LINKLOCAL(addr))
834                                 addr_scope = RT_SCOPE_LINK;
835                         break;
836                 }
837 #endif
838         }
839 
840         return (addr_scope);
841 }
842 
843 #ifdef INET6
844 static uint8_t
845 inet6_get_plen(const struct in6_addr *addr)
846 {
847 
848 	return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) +
849 	    bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3]));
850 }
851 #endif
852 
853 static uint8_t
854 get_sa_plen(const struct sockaddr *sa)
855 {
856 #ifdef INET
857         const struct in_addr *paddr;
858 #endif
859 #ifdef INET6
860         const struct in6_addr *paddr6;
861 #endif
862 
863         switch (sa->sa_family) {
864 #ifdef INET
865         case AF_INET:
866                 paddr = &(((const struct sockaddr_in *)sa)->sin_addr);
867                 return bitcount32(paddr->s_addr);;
868 #endif
869 #ifdef INET6
870         case AF_INET6:
871                 paddr6 = &(((const struct sockaddr_in6 *)sa)->sin6_addr);
872                 return inet6_get_plen(paddr6);
873 #endif
874         }
875 
876         return (0);
877 }
878 
879 #ifdef INET6
880 static uint32_t
881 in6_flags_to_nl(uint32_t flags)
882 {
883 	uint32_t nl_flags = 0;
884 
885 	if (flags & IN6_IFF_TEMPORARY)
886 		nl_flags |= IFA_F_TEMPORARY;
887 	if (flags & IN6_IFF_NODAD)
888 		nl_flags |= IFA_F_NODAD;
889 	if (flags & IN6_IFF_DEPRECATED)
890 		nl_flags |= IFA_F_DEPRECATED;
891 	if (flags & IN6_IFF_TENTATIVE)
892 		nl_flags |= IFA_F_TENTATIVE;
893 	if ((flags & (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) == 0)
894 		flags |= IFA_F_PERMANENT;
895 	if (flags & IN6_IFF_DUPLICATED)
896 		flags |= IFA_F_DADFAILED;
897 	return (nl_flags);
898 }
899 
900 static uint32_t
901 nl_flags_to_in6(uint32_t flags)
902 {
903 	uint32_t in6_flags = 0;
904 
905 	if (flags & IFA_F_TEMPORARY)
906 		in6_flags |= IN6_IFF_TEMPORARY;
907 	if (flags & IFA_F_NODAD)
908 		in6_flags |= IN6_IFF_NODAD;
909 	if (flags & IFA_F_DEPRECATED)
910 		in6_flags |= IN6_IFF_DEPRECATED;
911 	if (flags & IFA_F_TENTATIVE)
912 		in6_flags |= IN6_IFF_TENTATIVE;
913 	if (flags & IFA_F_DADFAILED)
914 		in6_flags |= IN6_IFF_DUPLICATED;
915 
916 	return (in6_flags);
917 }
918 
919 static void
920 export_cache_info6(struct nl_writer *nw, const struct in6_ifaddr *ia)
921 {
922 	struct ifa_cacheinfo ci = {
923 		.cstamp = ia->ia6_createtime * 1000,
924 		.tstamp = ia->ia6_updatetime * 1000,
925 		.ifa_prefered = ia->ia6_lifetime.ia6t_pltime,
926 		.ifa_valid = ia->ia6_lifetime.ia6t_vltime,
927 	};
928 
929 	nlattr_add(nw, IFA_CACHEINFO, sizeof(ci), &ci);
930 }
931 #endif
932 
933 static void
934 export_cache_info(struct nl_writer *nw, struct ifaddr *ifa)
935 {
936 	switch (ifa->ifa_addr->sa_family) {
937 #ifdef INET6
938 	case AF_INET6:
939 		export_cache_info6(nw, (struct in6_ifaddr *)ifa);
940 		break;
941 #endif
942 	}
943 }
944 
945 /*
946  * {'attrs': [('IFA_ADDRESS', '12.0.0.1'),
947            ('IFA_LOCAL', '12.0.0.1'),
948            ('IFA_LABEL', 'eth10'),
949            ('IFA_FLAGS', 128),
950            ('IFA_CACHEINFO', {'ifa_preferred': 4294967295, 'ifa_valid': 4294967295, 'cstamp': 63745746, 'tstamp': 63745746})],
951  */
952 static bool
953 dump_iface_addr(struct nl_writer *nw, if_t ifp, struct ifaddr *ifa,
954     const struct nlmsghdr *hdr)
955 {
956         struct ifaddrmsg *ifamsg;
957         struct sockaddr *sa = ifa->ifa_addr;
958         struct sockaddr *sa_dst = ifa->ifa_dstaddr;
959 
960         NL_LOG(LOG_DEBUG3, "dumping ifa %p type %s(%d) for interface %s",
961             ifa, rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
962 
963 	if (!nlmsg_reply(nw, hdr, sizeof(struct ifaddrmsg)))
964 		goto enomem;
965 
966         ifamsg = nlmsg_reserve_object(nw, struct ifaddrmsg);
967         ifamsg->ifa_family = sa->sa_family;
968         ifamsg->ifa_prefixlen = get_sa_plen(ifa->ifa_netmask);
969         ifamsg->ifa_flags = 0; // ifa_flags is useless
970         ifamsg->ifa_scope = ifa_get_scope(ifa);
971         ifamsg->ifa_index = if_getindex(ifp);
972 
973 	if ((if_getflags(ifp) & IFF_POINTOPOINT) && sa_dst != NULL && sa_dst->sa_family != 0) {
974 		/* P2P interface may have IPv6 LL with no dst address */
975 		dump_sa(nw, IFA_ADDRESS, sa_dst);
976 		dump_sa(nw, IFA_LOCAL, sa);
977 	} else {
978 		dump_sa(nw, IFA_ADDRESS, sa);
979 #ifdef INET
980 		/*
981 		 * In most cases, IFA_ADDRESS == IFA_LOCAL
982 		 * Skip IFA_LOCAL for anything except INET
983 		 */
984 		if (sa->sa_family == AF_INET)
985 			dump_sa(nw, IFA_LOCAL, sa);
986 #endif
987 	}
988 	if (if_getflags(ifp) & IFF_BROADCAST)
989 		dump_sa(nw, IFA_BROADCAST, ifa->ifa_broadaddr);
990 
991         nlattr_add_string(nw, IFA_LABEL, if_name(ifp));
992 
993         uint32_t nl_ifa_flags = 0;
994 #ifdef INET6
995 	if (sa->sa_family == AF_INET6) {
996 		struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
997 		nl_ifa_flags = in6_flags_to_nl(ia->ia6_flags);
998 	}
999 #endif
1000         nlattr_add_u32(nw, IFA_FLAGS, nl_ifa_flags);
1001 
1002 	export_cache_info(nw, ifa);
1003 
1004 	/* Store FreeBSD-specific attributes */
1005 	int off = nlattr_add_nested(nw, IFA_FREEBSD);
1006 	if (off != 0) {
1007 		if (ifa->ifa_carp != NULL && carp_get_vhid_p != NULL) {
1008 			uint32_t vhid  = (uint32_t)(*carp_get_vhid_p)(ifa);
1009 			nlattr_add_u32(nw, IFAF_VHID, vhid);
1010 		}
1011 #ifdef INET6
1012 		if (sa->sa_family == AF_INET6) {
1013 			uint32_t ifa_flags = ((struct in6_ifaddr *)ifa)->ia6_flags;
1014 
1015 			nlattr_add_u32(nw, IFAF_FLAGS, ifa_flags);
1016 		}
1017 #endif
1018 
1019 		nlattr_set_len(nw, off);
1020 	}
1021 
1022 	if (nlmsg_end(nw))
1023 		return (true);
1024 enomem:
1025         NL_LOG(LOG_DEBUG, "Failed to dump ifa type %s(%d) for interface %s",
1026             rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
1027         nlmsg_abort(nw);
1028         return (false);
1029 }
1030 
1031 static int
1032 dump_iface_addrs(struct netlink_walkargs *wa, if_t ifp)
1033 {
1034         struct ifaddr *ifa;
1035 	struct ifa_iter it;
1036 	int error = 0;
1037 
1038 	for (ifa = ifa_iter_start(ifp, &it); ifa != NULL; ifa = ifa_iter_next(&it)) {
1039 		if (wa->family != 0 && wa->family != ifa->ifa_addr->sa_family)
1040 			continue;
1041 		if (ifa->ifa_addr->sa_family == AF_LINK)
1042 			continue;
1043 		if (prison_if(wa->cred, ifa->ifa_addr) != 0)
1044 			continue;
1045 		wa->count++;
1046 		if (!dump_iface_addr(wa->nw, ifp, ifa, &wa->hdr)) {
1047 			error = ENOMEM;
1048 			break;
1049 		}
1050 		wa->dumped++;
1051 	}
1052 	ifa_iter_finish(&it);
1053 
1054 	return (error);
1055 }
1056 
1057 static int
1058 rtnl_handle_getaddr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
1059 {
1060         if_t ifp;
1061 	int error = 0;
1062 
1063 	struct nl_parsed_ifa attrs = {};
1064 	error = nl_parse_nlmsg(hdr, &ifa_parser, npt, &attrs);
1065 	if (error != 0)
1066 		return (error);
1067 
1068 	struct netlink_walkargs wa = {
1069 		.so = nlp,
1070 		.nw = npt->nw,
1071 		.cred = nlp_get_cred(nlp),
1072 		.family = attrs.ifa_family,
1073 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
1074 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
1075 		.hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
1076 		.hdr.nlmsg_type = NL_RTM_NEWADDR,
1077 	};
1078 
1079 	NL_LOG(LOG_DEBUG2, "Start dump");
1080 
1081 	if (attrs.ifa_index != 0) {
1082 		ifp = ifnet_byindex(attrs.ifa_index);
1083 		if (ifp == NULL)
1084 			error = ENOENT;
1085 		else
1086 			error = dump_iface_addrs(&wa, ifp);
1087 	} else {
1088 		struct if_iter it;
1089 
1090 		for (ifp = if_iter_start(&it); ifp != NULL; ifp = if_iter_next(&it)) {
1091 			error = dump_iface_addrs(&wa, ifp);
1092 			if (error != 0)
1093 				break;
1094 		}
1095 		if_iter_finish(&it);
1096 	}
1097 
1098 	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
1099 
1100 	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
1101                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
1102                 return (ENOMEM);
1103         }
1104 
1105 	return (error);
1106 }
1107 
1108 #ifdef INET
1109 static int
1110 handle_newaddr_inet(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1111     if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1112 {
1113 	int plen = attrs->ifa_prefixlen;
1114 	int if_flags = if_getflags(ifp);
1115 	struct sockaddr_in *addr, *dst;
1116 
1117 	if (plen > 32) {
1118 		nlmsg_report_err_msg(npt, "invalid ifa_prefixlen");
1119 		return (EINVAL);
1120 	};
1121 
1122 	if (if_flags & IFF_POINTOPOINT) {
1123 		/*
1124 		 * Only P2P IFAs are allowed by the implementation.
1125 		 */
1126 		if (attrs->ifa_address == NULL || attrs->ifa_local == NULL) {
1127 			nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1128 			return (EINVAL);
1129 		}
1130 		addr = (struct sockaddr_in *)attrs->ifa_local;
1131 		dst = (struct sockaddr_in *)attrs->ifa_address;
1132 	} else {
1133 		/*
1134 		 * Map the Netlink attributes to FreeBSD ifa layout.
1135 		 * If only IFA_ADDRESS or IFA_LOCAL is set OR
1136 		 * both are set to the same value => ifa is not p2p
1137 		 * and the attribute value contains interface address.
1138 		 *
1139 		 * Otherwise (both IFA_ADDRESS and IFA_LOCAL are set and
1140 		 * different), IFA_LOCAL contains an interface address and
1141 		 * IFA_ADDRESS contains peer address.
1142 		 */
1143 		addr = (struct sockaddr_in *)attrs->ifa_local;
1144 		if (addr == NULL)
1145 			addr = (struct sockaddr_in *)attrs->ifa_address;
1146 
1147 		if (addr == NULL) {
1148 			nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1149 			return (EINVAL);
1150 		}
1151 
1152 		/* Generate broadcast address if not set */
1153 		if ((if_flags & IFF_BROADCAST) && attrs->ifa_broadcast == NULL) {
1154 			uint32_t s_baddr;
1155 			struct sockaddr_in *sin_brd;
1156 
1157 			if (plen == 31)
1158 				s_baddr = INADDR_BROADCAST; /* RFC 3021 */
1159 			else {
1160 				uint32_t s_mask;
1161 
1162 				s_mask = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
1163 				s_baddr = addr->sin_addr.s_addr | ~s_mask;
1164 			}
1165 
1166 			sin_brd = (struct sockaddr_in *)npt_alloc(npt, sizeof(*sin_brd));
1167 			if (sin_brd == NULL)
1168 				return (ENOMEM);
1169 			sin_brd->sin_family = AF_INET;
1170 			sin_brd->sin_len = sizeof(*sin_brd);
1171 			sin_brd->sin_addr.s_addr = s_baddr;
1172 			attrs->ifa_broadcast = (struct sockaddr *)sin_brd;
1173 		}
1174 		dst = (struct sockaddr_in *)attrs->ifa_broadcast;
1175 	}
1176 
1177 	struct sockaddr_in mask = {
1178 		.sin_len = sizeof(struct sockaddr_in),
1179 		.sin_family = AF_INET,
1180 		.sin_addr.s_addr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0),
1181 	};
1182 	struct in_aliasreq req = {
1183 		.ifra_addr = *addr,
1184 		.ifra_mask = mask,
1185 		.ifra_vhid = attrs->ifaf_vhid,
1186 	};
1187 	if (dst != NULL)
1188 		req.ifra_dstaddr = *dst;
1189 
1190 	return (in_control_ioctl(SIOCAIFADDR, &req, ifp, nlp_get_cred(nlp)));
1191 }
1192 
1193 static int
1194 handle_deladdr_inet(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1195     if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1196 {
1197 	struct sockaddr_in *addr = (struct sockaddr_in *)attrs->ifa_local;
1198 
1199 	if (addr == NULL)
1200 		addr = (struct sockaddr_in *)attrs->ifa_address;
1201 
1202 	if (addr == NULL) {
1203 		nlmsg_report_err_msg(npt, "empty IFA_ADDRESS/IFA_LOCAL");
1204 		return (EINVAL);
1205 	}
1206 
1207 	struct in_aliasreq req = { .ifra_addr = *addr };
1208 
1209 	return (in_control_ioctl(SIOCDIFADDR, &req, ifp, nlp_get_cred(nlp)));
1210 }
1211 #endif
1212 
1213 #ifdef INET6
1214 static int
1215 handle_newaddr_inet6(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1216     if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1217 {
1218 	struct sockaddr_in6 *addr, *dst;
1219 
1220 	if (attrs->ifa_prefixlen > 128) {
1221 		nlmsg_report_err_msg(npt, "invalid ifa_prefixlen");
1222 		return (EINVAL);
1223 	}
1224 
1225 	/*
1226 	 * In IPv6 implementation, adding non-P2P address to the P2P interface
1227 	 * is allowed.
1228 	 */
1229 	addr = (struct sockaddr_in6 *)(attrs->ifa_local);
1230 	dst = (struct sockaddr_in6 *)(attrs->ifa_address);
1231 
1232 	if (addr == NULL) {
1233 		addr = dst;
1234 		dst = NULL;
1235 	} else if (dst != NULL) {
1236 		if (IN6_ARE_ADDR_EQUAL(&addr->sin6_addr, &dst->sin6_addr)) {
1237 			/*
1238 			 * Sometimes Netlink users fills in both attributes
1239 			 * with the same address. It still means "non-p2p".
1240 			 */
1241 			dst = NULL;
1242 		}
1243 	}
1244 
1245 	if (addr == NULL) {
1246 		nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1247 		return (EINVAL);
1248 	}
1249 
1250 	uint32_t flags = nl_flags_to_in6(attrs->ifa_flags) | attrs->ifaf_flags;
1251 
1252 	uint32_t pltime = 0, vltime = 0;
1253 	if (attrs->ifa_cacheinfo != 0) {
1254 		pltime = attrs->ifa_cacheinfo->ifa_prefered;
1255 		vltime = attrs->ifa_cacheinfo->ifa_valid;
1256 	}
1257 
1258 	struct sockaddr_in6 mask = {
1259 		.sin6_len = sizeof(struct sockaddr_in6),
1260 		.sin6_family = AF_INET6,
1261 	};
1262 	ip6_writemask(&mask.sin6_addr, attrs->ifa_prefixlen);
1263 
1264 	struct in6_aliasreq req = {
1265 		.ifra_addr = *addr,
1266 		.ifra_prefixmask = mask,
1267 		.ifra_flags = flags,
1268 		.ifra_lifetime = { .ia6t_vltime = vltime, .ia6t_pltime = pltime },
1269 		.ifra_vhid = attrs->ifaf_vhid,
1270 	};
1271 	if (dst != NULL)
1272 		req.ifra_dstaddr = *dst;
1273 
1274 	return (in6_control_ioctl(SIOCAIFADDR_IN6, &req, ifp, nlp_get_cred(nlp)));
1275 }
1276 
1277 static int
1278 handle_deladdr_inet6(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1279     if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1280 {
1281 	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)attrs->ifa_local;
1282 
1283 	if (addr == NULL)
1284 		addr = (struct sockaddr_in6 *)(attrs->ifa_address);
1285 
1286 	if (addr == NULL) {
1287 		nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1288 		return (EINVAL);
1289 	}
1290 
1291 	struct in6_aliasreq req = { .ifra_addr = *addr };
1292 
1293 	return (in6_control_ioctl(SIOCDIFADDR_IN6, &req, ifp, nlp_get_cred(nlp)));
1294 }
1295 #endif
1296 
1297 
1298 static int
1299 rtnl_handle_addr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
1300 {
1301 	struct epoch_tracker et;
1302 	int error;
1303 
1304 	struct nl_parsed_ifa attrs = {};
1305 	error = nl_parse_nlmsg(hdr, &ifa_parser, npt, &attrs);
1306 	if (error != 0)
1307 		return (error);
1308 
1309 	NET_EPOCH_ENTER(et);
1310 	if_t ifp = ifnet_byindex_ref(attrs.ifa_index);
1311 	NET_EPOCH_EXIT(et);
1312 
1313 	if (ifp == NULL) {
1314 		nlmsg_report_err_msg(npt, "Unable to find interface with index %u",
1315 		    attrs.ifa_index);
1316 		return (ENOENT);
1317 	}
1318 	int if_flags = if_getflags(ifp);
1319 
1320 #if defined(INET) || defined(INET6)
1321 	bool new = hdr->nlmsg_type == NL_RTM_NEWADDR;
1322 #endif
1323 
1324 	/*
1325 	 * TODO: Properly handle NLM_F_CREATE / NLM_F_EXCL.
1326 	 * The current ioctl-based KPI always does an implicit create-or-replace.
1327 	 * It is not possible to specify fine-grained options.
1328 	 */
1329 
1330 	switch (attrs.ifa_family) {
1331 #ifdef INET
1332 	case AF_INET:
1333 		if (new)
1334 			error = handle_newaddr_inet(hdr, &attrs, ifp, nlp, npt);
1335 		else
1336 			error = handle_deladdr_inet(hdr, &attrs, ifp, nlp, npt);
1337 		break;
1338 #endif
1339 #ifdef INET6
1340 	case AF_INET6:
1341 		if (new)
1342 			error = handle_newaddr_inet6(hdr, &attrs, ifp, nlp, npt);
1343 		else
1344 			error = handle_deladdr_inet6(hdr, &attrs, ifp, nlp, npt);
1345 		break;
1346 #endif
1347 	default:
1348 		error = EAFNOSUPPORT;
1349 	}
1350 
1351 	if (error == 0 && !(if_flags & IFF_UP) && (if_getflags(ifp) & IFF_UP))
1352 		if_up(ifp);
1353 
1354 	if_rele(ifp);
1355 
1356 	return (error);
1357 }
1358 
1359 
1360 static void
1361 rtnl_handle_ifaddr(void *arg __unused, struct ifaddr *ifa, int cmd)
1362 {
1363 	struct nlmsghdr hdr = {};
1364 	struct nl_writer nw = {};
1365 	uint32_t group = 0;
1366 
1367 	switch (ifa->ifa_addr->sa_family) {
1368 #ifdef INET
1369 	case AF_INET:
1370 		group = RTNLGRP_IPV4_IFADDR;
1371 		break;
1372 #endif
1373 #ifdef INET6
1374 	case AF_INET6:
1375 		group = RTNLGRP_IPV6_IFADDR;
1376 		break;
1377 #endif
1378 	default:
1379 		NL_LOG(LOG_DEBUG2, "ifa notification for unknown AF: %d",
1380 		    ifa->ifa_addr->sa_family);
1381 		return;
1382 	}
1383 
1384 	if (!nl_has_listeners(NETLINK_ROUTE, group))
1385 		return;
1386 
1387 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, group)) {
1388 		NL_LOG(LOG_DEBUG, "error allocating group writer");
1389 		return;
1390 	}
1391 
1392 	hdr.nlmsg_type = (cmd == RTM_DELETE) ? NL_RTM_DELADDR : NL_RTM_NEWADDR;
1393 
1394 	dump_iface_addr(&nw, ifa->ifa_ifp, ifa, &hdr);
1395 	nlmsg_flush(&nw);
1396 }
1397 
1398 static void
1399 rtnl_handle_ifevent(if_t ifp, int nlmsg_type, int if_flags_mask)
1400 {
1401 	struct nlmsghdr hdr = { .nlmsg_type = nlmsg_type };
1402 	struct nl_writer nw = {};
1403 
1404 	if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK))
1405 		return;
1406 
1407 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK)) {
1408 		NL_LOG(LOG_DEBUG, "error allocating mbuf");
1409 		return;
1410 	}
1411 	dump_iface(&nw, ifp, &hdr, if_flags_mask);
1412         nlmsg_flush(&nw);
1413 }
1414 
1415 static void
1416 rtnl_handle_ifattach(void *arg, if_t ifp)
1417 {
1418 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1419 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0);
1420 }
1421 
1422 static void
1423 rtnl_handle_ifdetach(void *arg, if_t ifp)
1424 {
1425 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1426 	rtnl_handle_ifevent(ifp, NL_RTM_DELLINK, 0);
1427 }
1428 
1429 static void
1430 rtnl_handle_iflink(void *arg, if_t ifp)
1431 {
1432 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1433 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0);
1434 }
1435 
1436 void
1437 rtnl_handle_ifnet_event(if_t ifp, int if_flags_mask)
1438 {
1439 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1440 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, if_flags_mask);
1441 }
1442 
1443 static const struct rtnl_cmd_handler cmd_handlers[] = {
1444 	{
1445 		.cmd = NL_RTM_GETLINK,
1446 		.name = "RTM_GETLINK",
1447 		.cb = &rtnl_handle_getlink,
1448 		.flags = RTNL_F_NOEPOCH | RTNL_F_ALLOW_NONVNET_JAIL,
1449 	},
1450 	{
1451 		.cmd = NL_RTM_DELLINK,
1452 		.name = "RTM_DELLINK",
1453 		.cb = &rtnl_handle_dellink,
1454 		.priv = PRIV_NET_IFDESTROY,
1455 		.flags = RTNL_F_NOEPOCH,
1456 	},
1457 	{
1458 		.cmd = NL_RTM_NEWLINK,
1459 		.name = "RTM_NEWLINK",
1460 		.cb = &rtnl_handle_newlink,
1461 		.priv = PRIV_NET_IFCREATE,
1462 		.flags = RTNL_F_NOEPOCH,
1463 	},
1464 	{
1465 		.cmd = NL_RTM_GETADDR,
1466 		.name = "RTM_GETADDR",
1467 		.cb = &rtnl_handle_getaddr,
1468 		.flags = RTNL_F_ALLOW_NONVNET_JAIL,
1469 	},
1470 	{
1471 		.cmd = NL_RTM_NEWADDR,
1472 		.name = "RTM_NEWADDR",
1473 		.cb = &rtnl_handle_addr,
1474 		.priv = PRIV_NET_ADDIFADDR,
1475 		.flags = RTNL_F_NOEPOCH,
1476 	},
1477 	{
1478 		.cmd = NL_RTM_DELADDR,
1479 		.name = "RTM_DELADDR",
1480 		.cb = &rtnl_handle_addr,
1481 		.priv = PRIV_NET_DELIFADDR,
1482 		.flags = RTNL_F_NOEPOCH,
1483 	},
1484 };
1485 
1486 static const struct nlhdr_parser *all_parsers[] = {
1487 	&ifmsg_parser, &ifa_parser, &ifa_fbsd_parser,
1488 };
1489 
1490 void
1491 rtnl_iface_add_cloner(struct nl_cloner *cloner)
1492 {
1493 	sx_xlock(&rtnl_cloner_lock);
1494 	SLIST_INSERT_HEAD(&nl_cloners, cloner, next);
1495 	sx_xunlock(&rtnl_cloner_lock);
1496 }
1497 
1498 void
1499 rtnl_iface_del_cloner(struct nl_cloner *cloner)
1500 {
1501 	sx_xlock(&rtnl_cloner_lock);
1502 	SLIST_REMOVE(&nl_cloners, cloner, nl_cloner, next);
1503 	sx_xunlock(&rtnl_cloner_lock);
1504 }
1505 
1506 void
1507 rtnl_ifaces_init(void)
1508 {
1509 	ifattach_event = EVENTHANDLER_REGISTER(
1510 	    ifnet_arrival_event, rtnl_handle_ifattach, NULL,
1511 	    EVENTHANDLER_PRI_ANY);
1512 	ifdetach_event = EVENTHANDLER_REGISTER(
1513 	    ifnet_departure_event, rtnl_handle_ifdetach, NULL,
1514 	    EVENTHANDLER_PRI_ANY);
1515 	ifaddr_event = EVENTHANDLER_REGISTER(
1516 	    rt_addrmsg, rtnl_handle_ifaddr, NULL,
1517 	    EVENTHANDLER_PRI_ANY);
1518 	iflink_event = EVENTHANDLER_REGISTER(
1519 	    ifnet_link_event, rtnl_handle_iflink, NULL,
1520 	    EVENTHANDLER_PRI_ANY);
1521 	NL_VERIFY_PARSERS(all_parsers);
1522 	rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
1523 }
1524 
1525 void
1526 rtnl_ifaces_destroy(void)
1527 {
1528 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifattach_event);
1529 	EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_event);
1530 	EVENTHANDLER_DEREGISTER(rt_addrmsg, ifaddr_event);
1531 	EVENTHANDLER_DEREGISTER(ifnet_link_event, iflink_event);
1532 }
1533