xref: /freebsd/sys/netlink/route/iface.c (revision 9f23cbd6cae82fd77edfad7173432fa8dccd0a95)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include "opt_netlink.h"
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 #include "opt_inet.h"
33 #include "opt_inet6.h"
34 #include <sys/types.h>
35 #include <sys/eventhandler.h>
36 #include <sys/kernel.h>
37 #include <sys/jail.h>
38 #include <sys/malloc.h>
39 #include <sys/socket.h>
40 #include <sys/sockio.h>
41 #include <sys/syslog.h>
42 
43 #include <net/if.h>
44 #include <net/if_dl.h>
45 #include <net/if_media.h>
46 #include <net/if_var.h>
47 #include <net/if_clone.h>
48 #include <net/route.h>
49 #include <net/route/nhop.h>
50 #include <net/route/route_ctl.h>
51 #include <netinet/in_var.h>
52 #include <netinet6/in6_var.h>
53 #include <netinet6/scope6_var.h> /* scope deembedding */
54 #include <netlink/netlink.h>
55 #include <netlink/netlink_ctl.h>
56 #include <netlink/netlink_route.h>
57 #include <netlink/route/route_var.h>
58 
59 #define	DEBUG_MOD_NAME	nl_iface
60 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
61 #include <netlink/netlink_debug.h>
62 _DECLARE_DEBUG(LOG_INFO);
63 
64 struct netlink_walkargs {
65 	struct nl_writer *nw;
66 	struct nlmsghdr hdr;
67 	struct nlpcb *so;
68 	struct ucred *cred;
69 	uint32_t fibnum;
70 	int family;
71 	int error;
72 	int count;
73 	int dumped;
74 };
75 
76 static eventhandler_tag ifdetach_event, ifattach_event, iflink_event, ifaddr_event;
77 
78 static SLIST_HEAD(, nl_cloner) nl_cloners = SLIST_HEAD_INITIALIZER(nl_cloners);
79 
80 static struct sx rtnl_cloner_lock;
81 SX_SYSINIT(rtnl_cloner_lock, &rtnl_cloner_lock, "rtnl cloner lock");
82 
83 /* These are external hooks for CARP. */
84 extern int	(*carp_get_vhid_p)(struct ifaddr *);
85 
86 /*
87  * RTM_GETLINK request
88  * sendto(3, {{len=32, type=RTM_GETLINK, flags=NLM_F_REQUEST|NLM_F_DUMP, seq=1641940952, pid=0},
89  *  {ifi_family=AF_INET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}, 32, 0, NULL, 0) = 32
90  *
91  * Reply:
92  * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_ETHER, ifi_index=if_nametoindex("enp0s31f6"), ifi_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST|IFF_LOWER_UP, ifi_change=0},
93 {{nla_len=10, nla_type=IFLA_ADDRESS}, "\xfe\x54\x00\x52\x3e\x90"}
94 
95 [
96 {{nla_len=14, nla_type=IFLA_IFNAME}, "enp0s31f6"},
97 {{nla_len=8, nla_type=IFLA_TXQLEN}, 1000},
98 {{nla_len=5, nla_type=IFLA_OPERSTATE}, 6},
99 {{nla_len=5, nla_type=IFLA_LINKMODE}, 0},
100 {{nla_len=8, nla_type=IFLA_MTU}, 1500},
101 {{nla_len=8, nla_type=IFLA_MIN_MTU}, 68},
102  {{nla_len=8, nla_type=IFLA_MAX_MTU}, 9000},
103 {{nla_len=8, nla_type=IFLA_GROUP}, 0},
104 {{nla_len=8, nla_type=IFLA_PROMISCUITY}, 0},
105 {{nla_len=8, nla_type=IFLA_NUM_TX_QUEUES}, 1},
106 {{nla_len=8, nla_type=IFLA_GSO_MAX_SEGS}, 65535},
107 {{nla_len=8, nla_type=IFLA_GSO_MAX_SIZE}, 65536},
108 {{nla_len=8, nla_type=IFLA_NUM_RX_QUEUES}, 1},
109 {{nla_len=5, nla_type=IFLA_CARRIER}, 1},
110 {{nla_len=13, nla_type=IFLA_QDISC}, "fq_codel"},
111 {{nla_len=8, nla_type=IFLA_CARRIER_CHANGES}, 2},
112 {{nla_len=5, nla_type=IFLA_PROTO_DOWN}, 0},
113 {{nla_len=8, nla_type=IFLA_CARRIER_UP_COUNT}, 1},
114 {{nla_len=8, nla_type=IFLA_CARRIER_DOWN_COUNT}, 1},
115  */
116 
117 struct if_state {
118 	uint8_t		ifla_operstate;
119 	uint8_t		ifla_carrier;
120 };
121 
122 static void
123 get_operstate_ether(if_t ifp, struct if_state *pstate)
124 {
125 	struct ifmediareq ifmr = {};
126 	int error;
127 	error = if_ioctl(ifp, SIOCGIFMEDIA, (void *)&ifmr);
128 
129 	if (error != 0) {
130 		NL_LOG(LOG_DEBUG, "error calling SIOCGIFMEDIA on %s: %d",
131 		    if_name(ifp), error);
132 		return;
133 	}
134 
135 	switch (IFM_TYPE(ifmr.ifm_active)) {
136 	case IFM_ETHER:
137 		if (ifmr.ifm_status & IFM_ACTIVE) {
138 			pstate->ifla_carrier = 1;
139 			if (if_getflags(ifp) & IFF_MONITOR)
140 				pstate->ifla_operstate = IF_OPER_DORMANT;
141 			else
142 				pstate->ifla_operstate = IF_OPER_UP;
143 		} else
144 			pstate->ifla_operstate = IF_OPER_DOWN;
145 	}
146 }
147 
148 static bool
149 get_stats(struct nl_writer *nw, if_t ifp)
150 {
151 	struct rtnl_link_stats64 *stats;
152 
153 	int nla_len = sizeof(struct nlattr) + sizeof(*stats);
154 	struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
155 	if (nla == NULL)
156 		return (false);
157 	nla->nla_type = IFLA_STATS64;
158 	nla->nla_len = nla_len;
159 	stats = (struct rtnl_link_stats64 *)(nla + 1);
160 
161 	stats->rx_packets = if_getcounter(ifp, IFCOUNTER_IPACKETS);
162 	stats->tx_packets = if_getcounter(ifp, IFCOUNTER_OPACKETS);
163 	stats->rx_bytes = if_getcounter(ifp, IFCOUNTER_IBYTES);
164 	stats->tx_bytes = if_getcounter(ifp, IFCOUNTER_OBYTES);
165 	stats->rx_errors = if_getcounter(ifp, IFCOUNTER_IERRORS);
166 	stats->tx_errors = if_getcounter(ifp, IFCOUNTER_OERRORS);
167 	stats->rx_dropped = if_getcounter(ifp, IFCOUNTER_IQDROPS);
168 	stats->tx_dropped = if_getcounter(ifp, IFCOUNTER_OQDROPS);
169 	stats->multicast = if_getcounter(ifp, IFCOUNTER_IMCASTS);
170 	stats->rx_nohandler = if_getcounter(ifp, IFCOUNTER_NOPROTO);
171 
172 	return (true);
173 }
174 
175 static void
176 get_operstate(if_t ifp, struct if_state *pstate)
177 {
178 	pstate->ifla_operstate = IF_OPER_UNKNOWN;
179 	pstate->ifla_carrier = 0; /* no carrier */
180 
181 	switch (if_gettype(ifp)) {
182 	case IFT_ETHER:
183 	case IFT_L2VLAN:
184 		get_operstate_ether(ifp, pstate);
185 		break;
186 	default:
187 		/* Map admin state to the operstate */
188 		if (if_getflags(ifp) & IFF_UP) {
189 			pstate->ifla_operstate = IF_OPER_UP;
190 			pstate->ifla_carrier = 1;
191 		} else
192 			pstate->ifla_operstate = IF_OPER_DOWN;
193 		break;
194 	}
195 }
196 
197 static void
198 get_hwaddr(struct nl_writer *nw, if_t ifp)
199 {
200 	struct ifreq ifr = {};
201 
202 	if (if_gethwaddr(ifp, &ifr) == 0) {
203 		nlattr_add(nw, IFLAF_ORIG_HWADDR, if_getaddrlen(ifp),
204 		    ifr.ifr_addr.sa_data);
205 	}
206 }
207 
208 static unsigned
209 ifp_flags_to_netlink(const if_t ifp)
210 {
211         return (if_getflags(ifp) | if_getdrvflags(ifp));
212 }
213 
214 #define LLADDR_CONST(s) ((const void *)((s)->sdl_data + (s)->sdl_nlen))
215 static bool
216 dump_sa(struct nl_writer *nw, int attr, const struct sockaddr *sa)
217 {
218         uint32_t addr_len = 0;
219         const void *addr_data = NULL;
220 #ifdef INET6
221         struct in6_addr addr6;
222 #endif
223 
224         if (sa == NULL)
225                 return (true);
226 
227         switch (sa->sa_family) {
228 #ifdef INET
229         case AF_INET:
230                 addr_len = sizeof(struct in_addr);
231                 addr_data = &((const struct sockaddr_in *)sa)->sin_addr;
232                 break;
233 #endif
234 #ifdef INET6
235         case AF_INET6:
236                 in6_splitscope(&((const struct sockaddr_in6 *)sa)->sin6_addr, &addr6, &addr_len);
237                 addr_len = sizeof(struct in6_addr);
238                 addr_data = &addr6;
239                 break;
240 #endif
241         case AF_LINK:
242                 addr_len = ((const struct sockaddr_dl *)sa)->sdl_alen;
243                 addr_data = LLADDR_CONST((const struct sockaddr_dl *)sa);
244                 break;
245 	case AF_UNSPEC:
246 		/* Ignore empty SAs without warning */
247 		return (true);
248         default:
249                 NL_LOG(LOG_DEBUG2, "unsupported family: %d, skipping", sa->sa_family);
250                 return (true);
251         }
252 
253         return (nlattr_add(nw, attr, addr_len, addr_data));
254 }
255 
256 static bool
257 dump_iface_caps(struct nl_writer *nw, struct ifnet *ifp)
258 {
259 	int off = nlattr_add_nested(nw, IFLAF_CAPS);
260 	uint32_t active_caps[roundup2(IFCAP_B_SIZE, 32) / 32] = {};
261 	uint32_t all_caps[roundup2(IFCAP_B_SIZE, 32) / 32] = {};
262 
263 	MPASS(sizeof(active_caps) >= 8);
264 	MPASS(sizeof(all_caps) >= 8);
265 
266 	if (off == 0)
267 		return (false);
268 
269 	active_caps[0] = (uint32_t)if_getcapabilities(ifp);
270 	all_caps[0] = (uint32_t)if_getcapenable(ifp);
271 	active_caps[1] = (uint32_t)if_getcapabilities2(ifp);
272 	all_caps[1] = (uint32_t)if_getcapenable2(ifp);
273 
274 	nlattr_add_u32(nw, NLA_BITSET_SIZE, IFCAP_B_SIZE);
275 	nlattr_add(nw, NLA_BITSET_MASK, sizeof(all_caps), all_caps);
276 	nlattr_add(nw, NLA_BITSET_VALUE, sizeof(active_caps), active_caps);
277 
278 	nlattr_set_len(nw, off);
279 
280 	return (true);
281 }
282 
283 /*
284  * Dumps interface state, properties and metrics.
285  * @nw: message writer
286  * @ifp: target interface
287  * @hdr: template header
288  * @if_flags_mask: changed if_[drv]_flags bitmask
289  *
290  * This function is called without epoch and MAY sleep.
291  */
292 static bool
293 dump_iface(struct nl_writer *nw, if_t ifp, const struct nlmsghdr *hdr,
294     int if_flags_mask)
295 {
296         struct ifinfomsg *ifinfo;
297 
298         NL_LOG(LOG_DEBUG3, "dumping interface %s data", if_name(ifp));
299 
300 	if (!nlmsg_reply(nw, hdr, sizeof(struct ifinfomsg)))
301 		goto enomem;
302 
303         ifinfo = nlmsg_reserve_object(nw, struct ifinfomsg);
304         ifinfo->ifi_family = AF_UNSPEC;
305         ifinfo->__ifi_pad = 0;
306         ifinfo->ifi_type = if_gettype(ifp);
307         ifinfo->ifi_index = if_getindex(ifp);
308         ifinfo->ifi_flags = ifp_flags_to_netlink(ifp);
309         ifinfo->ifi_change = if_flags_mask;
310 
311 	struct if_state ifs = {};
312 	get_operstate(ifp, &ifs);
313 
314 	if (ifs.ifla_operstate == IF_OPER_UP)
315 		ifinfo->ifi_flags |= IFF_LOWER_UP;
316 
317         nlattr_add_string(nw, IFLA_IFNAME, if_name(ifp));
318         nlattr_add_u8(nw, IFLA_OPERSTATE, ifs.ifla_operstate);
319         nlattr_add_u8(nw, IFLA_CARRIER, ifs.ifla_carrier);
320 
321 /*
322         nlattr_add_u8(nw, IFLA_PROTO_DOWN, val);
323         nlattr_add_u8(nw, IFLA_LINKMODE, val);
324 */
325         if (if_getaddrlen(ifp) != 0) {
326 		struct ifaddr *ifa = if_getifaddr(ifp);
327 
328                 dump_sa(nw, IFLA_ADDRESS, ifa->ifa_addr);
329         }
330 
331         if ((if_getbroadcastaddr(ifp) != NULL)) {
332 		nlattr_add(nw, IFLA_BROADCAST, if_getaddrlen(ifp),
333 		    if_getbroadcastaddr(ifp));
334         }
335 
336         nlattr_add_u32(nw, IFLA_MTU, if_getmtu(ifp));
337 /*
338         nlattr_add_u32(nw, IFLA_MIN_MTU, 60);
339         nlattr_add_u32(nw, IFLA_MAX_MTU, 9000);
340         nlattr_add_u32(nw, IFLA_GROUP, 0);
341 */
342 
343 	if (if_getdescr(ifp) != NULL)
344 		nlattr_add_string(nw, IFLA_IFALIAS, if_getdescr(ifp));
345 
346 	/* Store FreeBSD-specific attributes */
347 	int off = nlattr_add_nested(nw, IFLA_FREEBSD);
348 	if (off != 0) {
349 		get_hwaddr(nw, ifp);
350 		dump_iface_caps(nw, ifp);
351 
352 		nlattr_set_len(nw, off);
353 	}
354 
355 	get_stats(nw, ifp);
356 
357 	uint32_t val = (if_getflags(ifp) & IFF_PROMISC) != 0;
358         nlattr_add_u32(nw, IFLA_PROMISCUITY, val);
359 
360 	ifc_dump_ifp_nl(ifp, nw);
361 
362         if (nlmsg_end(nw))
363 		return (true);
364 
365 enomem:
366         NL_LOG(LOG_DEBUG, "unable to dump interface %s state (ENOMEM)", if_name(ifp));
367         nlmsg_abort(nw);
368         return (false);
369 }
370 
371 static bool
372 check_ifmsg(void *hdr, struct nl_pstate *npt)
373 {
374 	struct ifinfomsg *ifm = hdr;
375 
376 	if (ifm->__ifi_pad != 0 || ifm->ifi_type != 0 ||
377 	    ifm->ifi_flags != 0 || ifm->ifi_change != 0) {
378 		nlmsg_report_err_msg(npt,
379 		    "strict checking: non-zero values in ifinfomsg header");
380 		return (false);
381 	}
382 
383 	return (true);
384 }
385 
386 #define	_IN(_field)	offsetof(struct ifinfomsg, _field)
387 #define	_OUT(_field)	offsetof(struct nl_parsed_link, _field)
388 static const struct nlfield_parser nlf_p_if[] = {
389 	{ .off_in = _IN(ifi_type), .off_out = _OUT(ifi_type), .cb = nlf_get_u16 },
390 	{ .off_in = _IN(ifi_index), .off_out = _OUT(ifi_index), .cb = nlf_get_u32 },
391 	{ .off_in = _IN(ifi_flags), .off_out = _OUT(ifi_flags), .cb = nlf_get_u32 },
392 	{ .off_in = _IN(ifi_change), .off_out = _OUT(ifi_change), .cb = nlf_get_u32 },
393 };
394 
395 static const struct nlattr_parser nla_p_linfo[] = {
396 	{ .type = IFLA_INFO_KIND, .off = _OUT(ifla_cloner), .cb = nlattr_get_stringn },
397 	{ .type = IFLA_INFO_DATA, .off = _OUT(ifla_idata), .cb = nlattr_get_nla },
398 };
399 NL_DECLARE_ATTR_PARSER(linfo_parser, nla_p_linfo);
400 
401 static const struct nlattr_parser nla_p_if[] = {
402 	{ .type = IFLA_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
403 	{ .type = IFLA_MTU, .off = _OUT(ifla_mtu), .cb = nlattr_get_uint32 },
404 	{ .type = IFLA_LINK, .off = _OUT(ifla_link), .cb = nlattr_get_uint32 },
405 	{ .type = IFLA_LINKINFO, .arg = &linfo_parser, .cb = nlattr_get_nested },
406 	{ .type = IFLA_IFALIAS, .off = _OUT(ifla_ifalias), .cb = nlattr_get_string },
407 	{ .type = IFLA_GROUP, .off = _OUT(ifla_group), .cb = nlattr_get_string },
408 	{ .type = IFLA_ALT_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
409 };
410 #undef _IN
411 #undef _OUT
412 NL_DECLARE_STRICT_PARSER(ifmsg_parser, struct ifinfomsg, check_ifmsg, nlf_p_if, nla_p_if);
413 
414 static bool
415 match_iface(if_t ifp, void *_arg)
416 {
417 	struct nl_parsed_link *attrs = (struct nl_parsed_link *)_arg;
418 
419 	if (attrs->ifi_index != 0 && attrs->ifi_index != if_getindex(ifp))
420 		return (false);
421 	if (attrs->ifi_type != 0 && attrs->ifi_index != if_gettype(ifp))
422 		return (false);
423 	if (attrs->ifla_ifname != NULL && strcmp(attrs->ifla_ifname, if_name(ifp)))
424 		return (false);
425 	/* TODO: add group match */
426 
427 	return (true);
428 }
429 
430 static int
431 dump_cb(if_t ifp, void *_arg)
432 {
433 	struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg;
434 	if (!dump_iface(wa->nw, ifp, &wa->hdr, 0))
435 		return (ENOMEM);
436 	return (0);
437 }
438 
439 /*
440  * {nlmsg_len=52, nlmsg_type=RTM_GETLINK, nlmsg_flags=NLM_F_REQUEST, nlmsg_seq=1662842818, nlmsg_pid=0},
441  *  {ifi_family=AF_PACKET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
442  *   [
443  *    [{nla_len=10, nla_type=IFLA_IFNAME}, "vnet9"],
444  *    [{nla_len=8, nla_type=IFLA_EXT_MASK}, RTEXT_FILTER_VF]
445  *   ]
446  */
447 static int
448 rtnl_handle_getlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
449 {
450 	struct epoch_tracker et;
451         if_t ifp;
452 	int error = 0;
453 
454 	struct nl_parsed_link attrs = {};
455 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
456 	if (error != 0)
457 		return (error);
458 
459 	struct netlink_walkargs wa = {
460 		.so = nlp,
461 		.nw = npt->nw,
462 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
463 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
464 		.hdr.nlmsg_flags = hdr->nlmsg_flags,
465 		.hdr.nlmsg_type = NL_RTM_NEWLINK,
466 	};
467 
468 	/* Fast track for an interface w/ explicit name or index match */
469 	if ((attrs.ifi_index != 0) || (attrs.ifla_ifname != NULL)) {
470 		if (attrs.ifi_index != 0) {
471 			NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching index %u",
472 			    attrs.ifi_index);
473 			NET_EPOCH_ENTER(et);
474 			ifp = ifnet_byindex_ref(attrs.ifi_index);
475 			NET_EPOCH_EXIT(et);
476 		} else {
477 			NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching name %s",
478 			    attrs.ifla_ifname);
479 			ifp = ifunit_ref(attrs.ifla_ifname);
480 		}
481 
482 		if (ifp != NULL) {
483 			if (match_iface(ifp, &attrs)) {
484 				if (!dump_iface(wa.nw, ifp, &wa.hdr, 0))
485 					error = ENOMEM;
486 			} else
487 				error = ENODEV;
488 			if_rele(ifp);
489 		} else
490 			error = ENODEV;
491 		return (error);
492 	}
493 
494 	/* Always treat non-direct-match as a multipart message */
495 	wa.hdr.nlmsg_flags |= NLM_F_MULTI;
496 
497 	/*
498 	 * Fetching some link properties require performing ioctl's that may be blocking.
499 	 * Address it by saving referenced pointers of the matching links,
500 	 * exiting from epoch and going through the list one-by-one.
501 	 */
502 
503 	NL_LOG(LOG_DEBUG2, "Start dump");
504 	if_foreach_sleep(match_iface, &attrs, dump_cb, &wa);
505 	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
506 
507 	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
508                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
509                 return (ENOMEM);
510         }
511 
512 	return (error);
513 }
514 
515 /*
516  * sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[
517  * {nlmsg_len=60, nlmsg_type=RTM_NEWLINK, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1662715618, nlmsg_pid=0},
518  *  {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
519  *   {nla_len=11, nla_type=IFLA_IFNAME}, "dummy0"],
520  *   [
521  *    {nla_len=16, nla_type=IFLA_LINKINFO},
522  *     [
523  *      {nla_len=9, nla_type=IFLA_INFO_KIND}, "dummy"...
524  *     ]
525  *    ]
526  */
527 
528 static int
529 rtnl_handle_dellink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
530 {
531 	struct epoch_tracker et;
532         if_t ifp;
533 	int error;
534 
535 	struct nl_parsed_link attrs = {};
536 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
537 	if (error != 0)
538 		return (error);
539 
540 	NET_EPOCH_ENTER(et);
541 	ifp = ifnet_byindex_ref(attrs.ifi_index);
542 	NET_EPOCH_EXIT(et);
543 	if (ifp == NULL) {
544 		NLP_LOG(LOG_DEBUG, nlp, "unable to find interface %u", attrs.ifi_index);
545 		return (ENOENT);
546 	}
547 	NLP_LOG(LOG_DEBUG3, nlp, "mapped ifindex %u to %s", attrs.ifi_index, if_name(ifp));
548 
549 	sx_xlock(&ifnet_detach_sxlock);
550 	error = if_clone_destroy(if_name(ifp));
551 	sx_xunlock(&ifnet_detach_sxlock);
552 
553 	NLP_LOG(LOG_DEBUG2, nlp, "deleting interface %s returned %d", if_name(ifp), error);
554 
555 	if_rele(ifp);
556 	return (error);
557 }
558 
559 /*
560  * New link:
561  * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1668185590, pid=0},
562  *   {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}
563  *    [
564  *     {{nla_len=8, nla_type=IFLA_MTU}, 123},
565  *     {{nla_len=10, nla_type=IFLA_IFNAME}, "vlan1"},
566  *     {{nla_len=24, nla_type=IFLA_LINKINFO},
567  *      [
568  *       {{nla_len=8, nla_type=IFLA_INFO_KIND}, "vlan"...},
569  *       {{nla_len=12, nla_type=IFLA_INFO_DATA}, "\x06\x00\x01\x00\x7b\x00\x00\x00"}]}]}
570  *
571  * Update link:
572  * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=1668185923, pid=0},
573  * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=if_nametoindex("lo"), ifi_flags=0, ifi_change=0},
574  * {{nla_len=8, nla_type=IFLA_MTU}, 123}}
575  *
576  *
577  * Check command availability:
578  * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=0, pid=0},
579  *  {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}
580  */
581 
582 
583 static int
584 create_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs,
585     struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt)
586 {
587 	if (lattrs->ifla_ifname == NULL || strlen(lattrs->ifla_ifname) == 0) {
588 		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_IFNAME attribute");
589 		return (EINVAL);
590 	}
591 	if (lattrs->ifla_cloner == NULL || strlen(lattrs->ifla_cloner) == 0) {
592 		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_INFO_KIND attribute");
593 		return (EINVAL);
594 	}
595 
596 	struct ifc_data_nl ifd = {
597 		.flags = IFC_F_CREATE,
598 		.lattrs = lattrs,
599 		.bm = bm,
600 		.npt = npt,
601 	};
602 	if (ifc_create_ifp_nl(lattrs->ifla_ifname, &ifd) && ifd.error == 0)
603 		nl_store_ifp_cookie(npt, ifd.ifp);
604 
605 	return (ifd.error);
606 }
607 
608 static int
609 modify_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs,
610     struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt)
611 {
612 	if_t ifp = NULL;
613 	struct epoch_tracker et;
614 
615 	if (lattrs->ifi_index == 0 && lattrs->ifla_ifname == NULL) {
616 		/*
617 		 * Applications like ip(8) verify RTM_NEWLINK command
618 		 * existence by calling it with empty arguments. Always
619 		 * return "innocent" error in that case.
620 		 */
621 		NLMSG_REPORT_ERR_MSG(npt, "empty ifi_index field");
622 		return (EPERM);
623 	}
624 
625 	if (lattrs->ifi_index != 0) {
626 		NET_EPOCH_ENTER(et);
627 		ifp = ifnet_byindex_ref(lattrs->ifi_index);
628 		NET_EPOCH_EXIT(et);
629 		if (ifp == NULL) {
630 			NLMSG_REPORT_ERR_MSG(npt, "unable to find interface #%u",
631 			    lattrs->ifi_index);
632 			return (ENOENT);
633 		}
634 	}
635 
636 	if (ifp == NULL && lattrs->ifla_ifname != NULL) {
637 		ifp = ifunit_ref(lattrs->ifla_ifname);
638 		if (ifp == NULL) {
639 			NLMSG_REPORT_ERR_MSG(npt, "unable to find interface %s",
640 			    lattrs->ifla_ifname);
641 			return (ENOENT);
642 		}
643 	}
644 
645 	MPASS(ifp != NULL);
646 
647 	/*
648 	 * Modification request can address either
649 	 * 1) cloned interface, in which case we call the cloner-specific
650 	 *  modification routine
651 	 * or
652 	 * 2) non-cloned (e.g. "physical") interface, in which case we call
653 	 *  generic modification routine
654 	 */
655 	struct ifc_data_nl ifd = { .lattrs = lattrs, .bm = bm, .npt = npt };
656 	if (!ifc_modify_ifp_nl(ifp, &ifd))
657 		ifd.error = nl_modify_ifp_generic(ifp, lattrs, bm, npt);
658 
659 	if_rele(ifp);
660 
661 	return (ifd.error);
662 }
663 
664 
665 static int
666 rtnl_handle_newlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
667 {
668 	struct nlattr_bmask bm;
669 	int error;
670 
671 	struct nl_parsed_link attrs = {};
672 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
673 	if (error != 0)
674 		return (error);
675 	nl_get_attrs_bmask_nlmsg(hdr, &ifmsg_parser, &bm);
676 
677 	if (hdr->nlmsg_flags & NLM_F_CREATE)
678 		return (create_link(hdr, &attrs, &bm, nlp, npt));
679 	else
680 		return (modify_link(hdr, &attrs, &bm, nlp, npt));
681 }
682 
683 static void
684 set_scope6(struct sockaddr *sa, uint32_t ifindex)
685 {
686 #ifdef INET6
687 	if (sa != NULL && sa->sa_family == AF_INET6) {
688 		struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa;
689 
690 		if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr))
691 			in6_set_unicast_scopeid(&sa6->sin6_addr, ifindex);
692 	}
693 #endif
694 }
695 
696 static bool
697 check_sa_family(const struct sockaddr *sa, int family, const char *attr_name,
698     struct nl_pstate *npt)
699 {
700 	if (sa == NULL || sa->sa_family == family)
701 		return (true);
702 
703 	nlmsg_report_err_msg(npt, "wrong family for %s attribute: %d != %d",
704 	    attr_name, family, sa->sa_family);
705 	return (false);
706 }
707 
708 struct nl_parsed_ifa {
709 	uint8_t			ifa_family;
710 	uint8_t			ifa_prefixlen;
711 	uint8_t			ifa_scope;
712 	uint32_t		ifa_index;
713 	uint32_t		ifa_flags;
714 	uint32_t		ifaf_vhid;
715 	uint32_t		ifaf_flags;
716 	struct sockaddr		*ifa_address;
717 	struct sockaddr		*ifa_local;
718 	struct sockaddr		*ifa_broadcast;
719 	struct ifa_cacheinfo	*ifa_cacheinfo;
720 	struct sockaddr		*f_ifa_addr;
721 	struct sockaddr		*f_ifa_dst;
722 };
723 
724 static int
725 nlattr_get_cinfo(struct nlattr *nla, struct nl_pstate *npt,
726     const void *arg __unused, void *target)
727 {
728 	if (__predict_false(NLA_DATA_LEN(nla) != sizeof(struct ifa_cacheinfo))) {
729 		NLMSG_REPORT_ERR_MSG(npt, "nla type %d size(%u) is not ifa_cacheinfo",
730 		    nla->nla_type, NLA_DATA_LEN(nla));
731 		return (EINVAL);
732 	}
733 	*((struct ifa_cacheinfo **)target) = (struct ifa_cacheinfo *)NL_RTA_DATA(nla);
734 	return (0);
735 }
736 
737 #define	_IN(_field)	offsetof(struct ifaddrmsg, _field)
738 #define	_OUT(_field)	offsetof(struct nl_parsed_ifa, _field)
739 static const struct nlfield_parser nlf_p_ifa[] = {
740 	{ .off_in = _IN(ifa_family), .off_out = _OUT(ifa_family), .cb = nlf_get_u8 },
741 	{ .off_in = _IN(ifa_prefixlen), .off_out = _OUT(ifa_prefixlen), .cb = nlf_get_u8 },
742 	{ .off_in = _IN(ifa_scope), .off_out = _OUT(ifa_scope), .cb = nlf_get_u8 },
743 	{ .off_in = _IN(ifa_flags), .off_out = _OUT(ifa_flags), .cb = nlf_get_u8_u32 },
744 	{ .off_in = _IN(ifa_index), .off_out = _OUT(ifa_index), .cb = nlf_get_u32 },
745 };
746 
747 static const struct nlattr_parser nla_p_ifa_fbsd[] = {
748 	{ .type = IFAF_VHID, .off = _OUT(ifaf_vhid), .cb = nlattr_get_uint32 },
749 	{ .type = IFAF_FLAGS, .off = _OUT(ifaf_flags), .cb = nlattr_get_uint32 },
750 };
751 NL_DECLARE_ATTR_PARSER(ifa_fbsd_parser, nla_p_ifa_fbsd);
752 
753 static const struct nlattr_parser nla_p_ifa[] = {
754 	{ .type = IFA_ADDRESS, .off = _OUT(ifa_address), .cb = nlattr_get_ip },
755 	{ .type = IFA_LOCAL, .off = _OUT(ifa_local), .cb = nlattr_get_ip },
756 	{ .type = IFA_BROADCAST, .off = _OUT(ifa_broadcast), .cb = nlattr_get_ip },
757 	{ .type = IFA_CACHEINFO, .off = _OUT(ifa_cacheinfo), .cb = nlattr_get_cinfo },
758 	{ .type = IFA_FLAGS, .off = _OUT(ifa_flags), .cb = nlattr_get_uint32 },
759 	{ .type = IFA_FREEBSD, .arg = &ifa_fbsd_parser, .cb = nlattr_get_nested },
760 };
761 #undef _IN
762 #undef _OUT
763 
764 static bool
765 post_p_ifa(void *_attrs, struct nl_pstate *npt)
766 {
767 	struct nl_parsed_ifa *attrs = (struct nl_parsed_ifa *)_attrs;
768 
769 	if (!check_sa_family(attrs->ifa_address, attrs->ifa_family, "IFA_ADDRESS", npt))
770 		return (false);
771 	if (!check_sa_family(attrs->ifa_local, attrs->ifa_family, "IFA_LOCAL", npt))
772 		return (false);
773 	if (!check_sa_family(attrs->ifa_broadcast, attrs->ifa_family, "IFA_BROADADDR", npt))
774 		return (false);
775 
776 	set_scope6(attrs->ifa_address, attrs->ifa_index);
777 	set_scope6(attrs->ifa_local, attrs->ifa_index);
778 
779 	return (true);
780 }
781 
782 NL_DECLARE_PARSER_EXT(ifa_parser, struct ifaddrmsg, NULL, nlf_p_ifa, nla_p_ifa, post_p_ifa);
783 
784 
785 /*
786 
787 {ifa_family=AF_INET, ifa_prefixlen=8, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_HOST, ifa_index=if_nametoindex("lo")},
788  [
789         {{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("127.0.0.1")},
790         {{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("127.0.0.1")},
791         {{nla_len=7, nla_type=IFA_LABEL}, "lo"},
792         {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT},
793         {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=3619, tstamp=3619}}]},
794 ---
795 
796 {{len=72, type=RTM_NEWADDR, flags=NLM_F_MULTI, seq=1642191126, pid=566735},
797  {ifa_family=AF_INET6, ifa_prefixlen=96, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_UNIVERSE, ifa_index=if_nametoindex("virbr0")},
798    [
799     {{nla_len=20, nla_type=IFA_ADDRESS}, inet_pton(AF_INET6, "2a01:4f8:13a:70c:ffff::1")},
800    {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=4283, tstamp=4283}},
801    {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}]},
802 */
803 
804 static uint8_t
805 ifa_get_scope(const struct ifaddr *ifa)
806 {
807         const struct sockaddr *sa;
808         uint8_t addr_scope = RT_SCOPE_UNIVERSE;
809 
810         sa = ifa->ifa_addr;
811         switch (sa->sa_family) {
812 #ifdef INET
813         case AF_INET:
814                 {
815                         struct in_addr addr;
816                         addr = ((const struct sockaddr_in *)sa)->sin_addr;
817                         if (IN_LOOPBACK(addr.s_addr))
818                                 addr_scope = RT_SCOPE_HOST;
819                         else if (IN_LINKLOCAL(addr.s_addr))
820                                 addr_scope = RT_SCOPE_LINK;
821                         break;
822                 }
823 #endif
824 #ifdef INET6
825         case AF_INET6:
826                 {
827                         const struct in6_addr *addr;
828                         addr = &((const struct sockaddr_in6 *)sa)->sin6_addr;
829                         if (IN6_IS_ADDR_LOOPBACK(addr))
830                                 addr_scope = RT_SCOPE_HOST;
831                         else if (IN6_IS_ADDR_LINKLOCAL(addr))
832                                 addr_scope = RT_SCOPE_LINK;
833                         break;
834                 }
835 #endif
836         }
837 
838         return (addr_scope);
839 }
840 
841 #ifdef INET6
842 static uint8_t
843 inet6_get_plen(const struct in6_addr *addr)
844 {
845 
846 	return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) +
847 	    bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3]));
848 }
849 #endif
850 
851 static uint8_t
852 get_sa_plen(const struct sockaddr *sa)
853 {
854 #ifdef INET
855         const struct in_addr *paddr;
856 #endif
857 #ifdef INET6
858         const struct in6_addr *paddr6;
859 #endif
860 
861         switch (sa->sa_family) {
862 #ifdef INET
863         case AF_INET:
864                 paddr = &(((const struct sockaddr_in *)sa)->sin_addr);
865                 return bitcount32(paddr->s_addr);;
866 #endif
867 #ifdef INET6
868         case AF_INET6:
869                 paddr6 = &(((const struct sockaddr_in6 *)sa)->sin6_addr);
870                 return inet6_get_plen(paddr6);
871 #endif
872         }
873 
874         return (0);
875 }
876 
877 #ifdef INET6
878 static uint32_t
879 in6_flags_to_nl(uint32_t flags)
880 {
881 	uint32_t nl_flags = 0;
882 
883 	if (flags & IN6_IFF_TEMPORARY)
884 		nl_flags |= IFA_F_TEMPORARY;
885 	if (flags & IN6_IFF_NODAD)
886 		nl_flags |= IFA_F_NODAD;
887 	if (flags & IN6_IFF_DEPRECATED)
888 		nl_flags |= IFA_F_DEPRECATED;
889 	if (flags & IN6_IFF_TENTATIVE)
890 		nl_flags |= IFA_F_TENTATIVE;
891 	if ((flags & (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) == 0)
892 		flags |= IFA_F_PERMANENT;
893 	if (flags & IN6_IFF_DUPLICATED)
894 		flags |= IFA_F_DADFAILED;
895 	return (nl_flags);
896 }
897 
898 static uint32_t
899 nl_flags_to_in6(uint32_t flags)
900 {
901 	uint32_t in6_flags = 0;
902 
903 	if (flags & IFA_F_TEMPORARY)
904 		in6_flags |= IN6_IFF_TEMPORARY;
905 	if (flags & IFA_F_NODAD)
906 		in6_flags |= IN6_IFF_NODAD;
907 	if (flags & IFA_F_DEPRECATED)
908 		in6_flags |= IN6_IFF_DEPRECATED;
909 	if (flags & IFA_F_TENTATIVE)
910 		in6_flags |= IN6_IFF_TENTATIVE;
911 	if (flags & IFA_F_DADFAILED)
912 		in6_flags |= IN6_IFF_DUPLICATED;
913 
914 	return (in6_flags);
915 }
916 
917 static void
918 export_cache_info6(struct nl_writer *nw, const struct in6_ifaddr *ia)
919 {
920 	struct ifa_cacheinfo ci = {
921 		.cstamp = ia->ia6_createtime * 1000,
922 		.tstamp = ia->ia6_updatetime * 1000,
923 		.ifa_prefered = ia->ia6_lifetime.ia6t_pltime,
924 		.ifa_valid = ia->ia6_lifetime.ia6t_vltime,
925 	};
926 
927 	nlattr_add(nw, IFA_CACHEINFO, sizeof(ci), &ci);
928 }
929 #endif
930 
931 static void
932 export_cache_info(struct nl_writer *nw, struct ifaddr *ifa)
933 {
934 	switch (ifa->ifa_addr->sa_family) {
935 #ifdef INET6
936 	case AF_INET6:
937 		export_cache_info6(nw, (struct in6_ifaddr *)ifa);
938 		break;
939 #endif
940 	}
941 }
942 
943 /*
944  * {'attrs': [('IFA_ADDRESS', '12.0.0.1'),
945            ('IFA_LOCAL', '12.0.0.1'),
946            ('IFA_LABEL', 'eth10'),
947            ('IFA_FLAGS', 128),
948            ('IFA_CACHEINFO', {'ifa_preferred': 4294967295, 'ifa_valid': 4294967295, 'cstamp': 63745746, 'tstamp': 63745746})],
949  */
950 static bool
951 dump_iface_addr(struct nl_writer *nw, if_t ifp, struct ifaddr *ifa,
952     const struct nlmsghdr *hdr)
953 {
954         struct ifaddrmsg *ifamsg;
955         struct sockaddr *sa = ifa->ifa_addr;
956         struct sockaddr *sa_dst = ifa->ifa_dstaddr;
957 
958         NL_LOG(LOG_DEBUG3, "dumping ifa %p type %s(%d) for interface %s",
959             ifa, rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
960 
961 	if (!nlmsg_reply(nw, hdr, sizeof(struct ifaddrmsg)))
962 		goto enomem;
963 
964         ifamsg = nlmsg_reserve_object(nw, struct ifaddrmsg);
965         ifamsg->ifa_family = sa->sa_family;
966         ifamsg->ifa_prefixlen = get_sa_plen(ifa->ifa_netmask);
967         ifamsg->ifa_flags = 0; // ifa_flags is useless
968         ifamsg->ifa_scope = ifa_get_scope(ifa);
969         ifamsg->ifa_index = if_getindex(ifp);
970 
971 	if ((if_getflags(ifp) & IFF_POINTOPOINT) && sa_dst != NULL && sa_dst->sa_family != 0) {
972 		/* P2P interface may have IPv6 LL with no dst address */
973 		dump_sa(nw, IFA_ADDRESS, sa_dst);
974 		dump_sa(nw, IFA_LOCAL, sa);
975 	} else {
976 		dump_sa(nw, IFA_ADDRESS, sa);
977 #ifdef INET
978 		/*
979 		 * In most cases, IFA_ADDRESS == IFA_LOCAL
980 		 * Skip IFA_LOCAL for anything except INET
981 		 */
982 		if (sa->sa_family == AF_INET)
983 			dump_sa(nw, IFA_LOCAL, sa);
984 #endif
985 	}
986 	if (if_getflags(ifp) & IFF_BROADCAST)
987 		dump_sa(nw, IFA_BROADCAST, ifa->ifa_broadaddr);
988 
989         nlattr_add_string(nw, IFA_LABEL, if_name(ifp));
990 
991         uint32_t nl_ifa_flags = 0;
992 #ifdef INET6
993 	if (sa->sa_family == AF_INET6) {
994 		struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
995 		nl_ifa_flags = in6_flags_to_nl(ia->ia6_flags);
996 	}
997 #endif
998         nlattr_add_u32(nw, IFA_FLAGS, nl_ifa_flags);
999 
1000 	export_cache_info(nw, ifa);
1001 
1002 	/* Store FreeBSD-specific attributes */
1003 	int off = nlattr_add_nested(nw, IFA_FREEBSD);
1004 	if (off != 0) {
1005 		if (ifa->ifa_carp != NULL && carp_get_vhid_p != NULL) {
1006 			uint32_t vhid  = (uint32_t)(*carp_get_vhid_p)(ifa);
1007 			nlattr_add_u32(nw, IFAF_VHID, vhid);
1008 		}
1009 #ifdef INET6
1010 		if (sa->sa_family == AF_INET6) {
1011 			uint32_t ifa_flags = ((struct in6_ifaddr *)ifa)->ia6_flags;
1012 
1013 			nlattr_add_u32(nw, IFAF_FLAGS, ifa_flags);
1014 		}
1015 #endif
1016 
1017 		nlattr_set_len(nw, off);
1018 	}
1019 
1020 	if (nlmsg_end(nw))
1021 		return (true);
1022 enomem:
1023         NL_LOG(LOG_DEBUG, "Failed to dump ifa type %s(%d) for interface %s",
1024             rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
1025         nlmsg_abort(nw);
1026         return (false);
1027 }
1028 
1029 static int
1030 dump_iface_addrs(struct netlink_walkargs *wa, if_t ifp)
1031 {
1032         struct ifaddr *ifa;
1033 	struct ifa_iter it;
1034 	int error = 0;
1035 
1036 	for (ifa = ifa_iter_start(ifp, &it); ifa != NULL; ifa = ifa_iter_next(&it)) {
1037 		if (wa->family != 0 && wa->family != ifa->ifa_addr->sa_family)
1038 			continue;
1039 		if (ifa->ifa_addr->sa_family == AF_LINK)
1040 			continue;
1041 		if (prison_if(wa->cred, ifa->ifa_addr) != 0)
1042 			continue;
1043 		wa->count++;
1044 		if (!dump_iface_addr(wa->nw, ifp, ifa, &wa->hdr)) {
1045 			error = ENOMEM;
1046 			break;
1047 		}
1048 		wa->dumped++;
1049 	}
1050 	ifa_iter_finish(&it);
1051 
1052 	return (error);
1053 }
1054 
1055 static int
1056 rtnl_handle_getaddr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
1057 {
1058         if_t ifp;
1059 	int error = 0;
1060 
1061 	struct nl_parsed_ifa attrs = {};
1062 	error = nl_parse_nlmsg(hdr, &ifa_parser, npt, &attrs);
1063 	if (error != 0)
1064 		return (error);
1065 
1066 	struct netlink_walkargs wa = {
1067 		.so = nlp,
1068 		.nw = npt->nw,
1069 		.cred = nlp_get_cred(nlp),
1070 		.family = attrs.ifa_family,
1071 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
1072 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
1073 		.hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
1074 		.hdr.nlmsg_type = NL_RTM_NEWADDR,
1075 	};
1076 
1077 	NL_LOG(LOG_DEBUG2, "Start dump");
1078 
1079 	if (attrs.ifa_index != 0) {
1080 		ifp = ifnet_byindex(attrs.ifa_index);
1081 		if (ifp == NULL)
1082 			error = ENOENT;
1083 		else
1084 			error = dump_iface_addrs(&wa, ifp);
1085 	} else {
1086 		struct if_iter it;
1087 
1088 		for (ifp = if_iter_start(&it); ifp != NULL; ifp = if_iter_next(&it)) {
1089 			error = dump_iface_addrs(&wa, ifp);
1090 			if (error != 0)
1091 				break;
1092 		}
1093 		if_iter_finish(&it);
1094 	}
1095 
1096 	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
1097 
1098 	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
1099                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
1100                 return (ENOMEM);
1101         }
1102 
1103 	return (error);
1104 }
1105 
1106 #ifdef INET
1107 static int
1108 handle_newaddr_inet(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1109     if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1110 {
1111 	int plen = attrs->ifa_prefixlen;
1112 	int if_flags = if_getflags(ifp);
1113 	struct sockaddr_in *addr, *dst;
1114 
1115 	if (plen > 32) {
1116 		nlmsg_report_err_msg(npt, "invalid ifa_prefixlen");
1117 		return (EINVAL);
1118 	};
1119 
1120 	if (if_flags & IFF_POINTOPOINT) {
1121 		/*
1122 		 * Only P2P IFAs are allowed by the implementation.
1123 		 */
1124 		if (attrs->ifa_address == NULL || attrs->ifa_local == NULL) {
1125 			nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1126 			return (EINVAL);
1127 		}
1128 		addr = (struct sockaddr_in *)attrs->ifa_local;
1129 		dst = (struct sockaddr_in *)attrs->ifa_address;
1130 	} else {
1131 		/*
1132 		 * Map the Netlink attributes to FreeBSD ifa layout.
1133 		 * If only IFA_ADDRESS or IFA_LOCAL is set OR
1134 		 * both are set to the same value => ifa is not p2p
1135 		 * and the attribute value contains interface address.
1136 		 *
1137 		 * Otherwise (both IFA_ADDRESS and IFA_LOCAL are set and
1138 		 * different), IFA_LOCAL contains an interface address and
1139 		 * IFA_ADDRESS contains peer address.
1140 		 */
1141 		addr = (struct sockaddr_in *)attrs->ifa_local;
1142 		if (addr == NULL)
1143 			addr = (struct sockaddr_in *)attrs->ifa_address;
1144 
1145 		if (addr == NULL) {
1146 			nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1147 			return (EINVAL);
1148 		}
1149 
1150 		/* Generate broadcast address if not set */
1151 		if ((if_flags & IFF_BROADCAST) && attrs->ifa_broadcast == NULL) {
1152 			uint32_t s_baddr;
1153 			struct sockaddr_in *sin_brd;
1154 
1155 			if (plen == 31)
1156 				s_baddr = INADDR_BROADCAST; /* RFC 3021 */
1157 			else {
1158 				uint32_t s_mask;
1159 
1160 				s_mask = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
1161 				s_baddr = addr->sin_addr.s_addr | ~s_mask;
1162 			}
1163 
1164 			sin_brd = (struct sockaddr_in *)npt_alloc(npt, sizeof(*sin_brd));
1165 			if (sin_brd == NULL)
1166 				return (ENOMEM);
1167 			sin_brd->sin_family = AF_INET;
1168 			sin_brd->sin_len = sizeof(*sin_brd);
1169 			sin_brd->sin_addr.s_addr = s_baddr;
1170 			attrs->ifa_broadcast = (struct sockaddr *)sin_brd;
1171 		}
1172 		dst = (struct sockaddr_in *)attrs->ifa_broadcast;
1173 	}
1174 
1175 	struct sockaddr_in mask = {
1176 		.sin_len = sizeof(struct sockaddr_in),
1177 		.sin_family = AF_INET,
1178 		.sin_addr.s_addr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0),
1179 	};
1180 	struct in_aliasreq req = {
1181 		.ifra_addr = *addr,
1182 		.ifra_mask = mask,
1183 		.ifra_vhid = attrs->ifaf_vhid,
1184 	};
1185 	if (dst != NULL)
1186 		req.ifra_dstaddr = *dst;
1187 
1188 	return (in_control_ioctl(SIOCAIFADDR, &req, ifp, nlp_get_cred(nlp)));
1189 }
1190 
1191 static int
1192 handle_deladdr_inet(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1193     if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1194 {
1195 	struct sockaddr_in *addr = (struct sockaddr_in *)attrs->ifa_local;
1196 
1197 	if (addr == NULL)
1198 		addr = (struct sockaddr_in *)attrs->ifa_address;
1199 
1200 	if (addr == NULL) {
1201 		nlmsg_report_err_msg(npt, "empty IFA_ADDRESS/IFA_LOCAL");
1202 		return (EINVAL);
1203 	}
1204 
1205 	struct in_aliasreq req = { .ifra_addr = *addr };
1206 
1207 	return (in_control_ioctl(SIOCDIFADDR, &req, ifp, nlp_get_cred(nlp)));
1208 }
1209 #endif
1210 
1211 #ifdef INET6
1212 static int
1213 handle_newaddr_inet6(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1214     if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1215 {
1216 	struct sockaddr_in6 *addr, *dst;
1217 
1218 	if (attrs->ifa_prefixlen > 128) {
1219 		nlmsg_report_err_msg(npt, "invalid ifa_prefixlen");
1220 		return (EINVAL);
1221 	}
1222 
1223 	/*
1224 	 * In IPv6 implementation, adding non-P2P address to the P2P interface
1225 	 * is allowed.
1226 	 */
1227 	addr = (struct sockaddr_in6 *)(attrs->ifa_local);
1228 	dst = (struct sockaddr_in6 *)(attrs->ifa_address);
1229 
1230 	if (addr == NULL) {
1231 		addr = dst;
1232 		dst = NULL;
1233 	} else if (dst != NULL) {
1234 		if (IN6_ARE_ADDR_EQUAL(&addr->sin6_addr, &dst->sin6_addr)) {
1235 			/*
1236 			 * Sometimes Netlink users fills in both attributes
1237 			 * with the same address. It still means "non-p2p".
1238 			 */
1239 			dst = NULL;
1240 		}
1241 	}
1242 
1243 	if (addr == NULL) {
1244 		nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1245 		return (EINVAL);
1246 	}
1247 
1248 	uint32_t flags = nl_flags_to_in6(attrs->ifa_flags) | attrs->ifaf_flags;
1249 
1250 	uint32_t pltime = 0, vltime = 0;
1251 	if (attrs->ifa_cacheinfo != 0) {
1252 		pltime = attrs->ifa_cacheinfo->ifa_prefered;
1253 		vltime = attrs->ifa_cacheinfo->ifa_valid;
1254 	}
1255 
1256 	struct sockaddr_in6 mask = {
1257 		.sin6_len = sizeof(struct sockaddr_in6),
1258 		.sin6_family = AF_INET6,
1259 	};
1260 	ip6_writemask(&mask.sin6_addr, attrs->ifa_prefixlen);
1261 
1262 	struct in6_aliasreq req = {
1263 		.ifra_addr = *addr,
1264 		.ifra_prefixmask = mask,
1265 		.ifra_flags = flags,
1266 		.ifra_lifetime = { .ia6t_vltime = vltime, .ia6t_pltime = pltime },
1267 		.ifra_vhid = attrs->ifaf_vhid,
1268 	};
1269 	if (dst != NULL)
1270 		req.ifra_dstaddr = *dst;
1271 
1272 	return (in6_control_ioctl(SIOCAIFADDR_IN6, &req, ifp, nlp_get_cred(nlp)));
1273 }
1274 
1275 static int
1276 handle_deladdr_inet6(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1277     if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1278 {
1279 	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)attrs->ifa_local;
1280 
1281 	if (addr == NULL)
1282 		addr = (struct sockaddr_in6 *)(attrs->ifa_address);
1283 
1284 	if (addr == NULL) {
1285 		nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1286 		return (EINVAL);
1287 	}
1288 
1289 	struct in6_aliasreq req = { .ifra_addr = *addr };
1290 
1291 	return (in6_control_ioctl(SIOCDIFADDR_IN6, &req, ifp, nlp_get_cred(nlp)));
1292 }
1293 #endif
1294 
1295 
1296 static int
1297 rtnl_handle_addr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
1298 {
1299 	struct epoch_tracker et;
1300 	int error;
1301 
1302 	struct nl_parsed_ifa attrs = {};
1303 	error = nl_parse_nlmsg(hdr, &ifa_parser, npt, &attrs);
1304 	if (error != 0)
1305 		return (error);
1306 
1307 	NET_EPOCH_ENTER(et);
1308 	if_t ifp = ifnet_byindex_ref(attrs.ifa_index);
1309 	NET_EPOCH_EXIT(et);
1310 
1311 	if (ifp == NULL) {
1312 		nlmsg_report_err_msg(npt, "Unable to find interface with index %u",
1313 		    attrs.ifa_index);
1314 		return (ENOENT);
1315 	}
1316 	int if_flags = if_getflags(ifp);
1317 
1318 #if defined(INET) || defined(INET6)
1319 	bool new = hdr->nlmsg_type == NL_RTM_NEWADDR;
1320 #endif
1321 
1322 	/*
1323 	 * TODO: Properly handle NLM_F_CREATE / NLM_F_EXCL.
1324 	 * The current ioctl-based KPI always does an implicit create-or-replace.
1325 	 * It is not possible to specify fine-grained options.
1326 	 */
1327 
1328 	switch (attrs.ifa_family) {
1329 #ifdef INET
1330 	case AF_INET:
1331 		if (new)
1332 			error = handle_newaddr_inet(hdr, &attrs, ifp, nlp, npt);
1333 		else
1334 			error = handle_deladdr_inet(hdr, &attrs, ifp, nlp, npt);
1335 		break;
1336 #endif
1337 #ifdef INET6
1338 	case AF_INET6:
1339 		if (new)
1340 			error = handle_newaddr_inet6(hdr, &attrs, ifp, nlp, npt);
1341 		else
1342 			error = handle_deladdr_inet6(hdr, &attrs, ifp, nlp, npt);
1343 		break;
1344 #endif
1345 	default:
1346 		error = EAFNOSUPPORT;
1347 	}
1348 
1349 	if (error == 0 && !(if_flags & IFF_UP) && (if_getflags(ifp) & IFF_UP))
1350 		if_up(ifp);
1351 
1352 	if_rele(ifp);
1353 
1354 	return (error);
1355 }
1356 
1357 
1358 static void
1359 rtnl_handle_ifaddr(void *arg __unused, struct ifaddr *ifa, int cmd)
1360 {
1361 	struct nlmsghdr hdr = {};
1362 	struct nl_writer nw = {};
1363 	uint32_t group = 0;
1364 
1365 	switch (ifa->ifa_addr->sa_family) {
1366 #ifdef INET
1367 	case AF_INET:
1368 		group = RTNLGRP_IPV4_IFADDR;
1369 		break;
1370 #endif
1371 #ifdef INET6
1372 	case AF_INET6:
1373 		group = RTNLGRP_IPV6_IFADDR;
1374 		break;
1375 #endif
1376 	default:
1377 		NL_LOG(LOG_DEBUG2, "ifa notification for unknown AF: %d",
1378 		    ifa->ifa_addr->sa_family);
1379 		return;
1380 	}
1381 
1382 	if (!nl_has_listeners(NETLINK_ROUTE, group))
1383 		return;
1384 
1385 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, group)) {
1386 		NL_LOG(LOG_DEBUG, "error allocating group writer");
1387 		return;
1388 	}
1389 
1390 	hdr.nlmsg_type = (cmd == RTM_DELETE) ? NL_RTM_DELADDR : NL_RTM_NEWADDR;
1391 
1392 	dump_iface_addr(&nw, ifa->ifa_ifp, ifa, &hdr);
1393 	nlmsg_flush(&nw);
1394 }
1395 
1396 static void
1397 rtnl_handle_ifevent(if_t ifp, int nlmsg_type, int if_flags_mask)
1398 {
1399 	struct nlmsghdr hdr = { .nlmsg_type = nlmsg_type };
1400 	struct nl_writer nw = {};
1401 
1402 	if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK))
1403 		return;
1404 
1405 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK)) {
1406 		NL_LOG(LOG_DEBUG, "error allocating mbuf");
1407 		return;
1408 	}
1409 	dump_iface(&nw, ifp, &hdr, if_flags_mask);
1410         nlmsg_flush(&nw);
1411 }
1412 
1413 static void
1414 rtnl_handle_ifattach(void *arg, if_t ifp)
1415 {
1416 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1417 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0);
1418 }
1419 
1420 static void
1421 rtnl_handle_ifdetach(void *arg, if_t ifp)
1422 {
1423 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1424 	rtnl_handle_ifevent(ifp, NL_RTM_DELLINK, 0);
1425 }
1426 
1427 static void
1428 rtnl_handle_iflink(void *arg, if_t ifp)
1429 {
1430 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1431 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0);
1432 }
1433 
1434 void
1435 rtnl_handle_ifnet_event(if_t ifp, int if_flags_mask)
1436 {
1437 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1438 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, if_flags_mask);
1439 }
1440 
1441 static const struct rtnl_cmd_handler cmd_handlers[] = {
1442 	{
1443 		.cmd = NL_RTM_GETLINK,
1444 		.name = "RTM_GETLINK",
1445 		.cb = &rtnl_handle_getlink,
1446 		.flags = RTNL_F_NOEPOCH | RTNL_F_ALLOW_NONVNET_JAIL,
1447 	},
1448 	{
1449 		.cmd = NL_RTM_DELLINK,
1450 		.name = "RTM_DELLINK",
1451 		.cb = &rtnl_handle_dellink,
1452 		.priv = PRIV_NET_IFDESTROY,
1453 		.flags = RTNL_F_NOEPOCH,
1454 	},
1455 	{
1456 		.cmd = NL_RTM_NEWLINK,
1457 		.name = "RTM_NEWLINK",
1458 		.cb = &rtnl_handle_newlink,
1459 		.priv = PRIV_NET_IFCREATE,
1460 		.flags = RTNL_F_NOEPOCH,
1461 	},
1462 	{
1463 		.cmd = NL_RTM_GETADDR,
1464 		.name = "RTM_GETADDR",
1465 		.cb = &rtnl_handle_getaddr,
1466 		.flags = RTNL_F_ALLOW_NONVNET_JAIL,
1467 	},
1468 	{
1469 		.cmd = NL_RTM_NEWADDR,
1470 		.name = "RTM_NEWADDR",
1471 		.cb = &rtnl_handle_addr,
1472 		.priv = PRIV_NET_ADDIFADDR,
1473 		.flags = RTNL_F_NOEPOCH,
1474 	},
1475 	{
1476 		.cmd = NL_RTM_DELADDR,
1477 		.name = "RTM_DELADDR",
1478 		.cb = &rtnl_handle_addr,
1479 		.priv = PRIV_NET_DELIFADDR,
1480 		.flags = RTNL_F_NOEPOCH,
1481 	},
1482 };
1483 
1484 static const struct nlhdr_parser *all_parsers[] = {
1485 	&ifmsg_parser, &ifa_parser, &ifa_fbsd_parser,
1486 };
1487 
1488 void
1489 rtnl_iface_add_cloner(struct nl_cloner *cloner)
1490 {
1491 	sx_xlock(&rtnl_cloner_lock);
1492 	SLIST_INSERT_HEAD(&nl_cloners, cloner, next);
1493 	sx_xunlock(&rtnl_cloner_lock);
1494 }
1495 
1496 void
1497 rtnl_iface_del_cloner(struct nl_cloner *cloner)
1498 {
1499 	sx_xlock(&rtnl_cloner_lock);
1500 	SLIST_REMOVE(&nl_cloners, cloner, nl_cloner, next);
1501 	sx_xunlock(&rtnl_cloner_lock);
1502 }
1503 
1504 void
1505 rtnl_ifaces_init(void)
1506 {
1507 	ifattach_event = EVENTHANDLER_REGISTER(
1508 	    ifnet_arrival_event, rtnl_handle_ifattach, NULL,
1509 	    EVENTHANDLER_PRI_ANY);
1510 	ifdetach_event = EVENTHANDLER_REGISTER(
1511 	    ifnet_departure_event, rtnl_handle_ifdetach, NULL,
1512 	    EVENTHANDLER_PRI_ANY);
1513 	ifaddr_event = EVENTHANDLER_REGISTER(
1514 	    rt_addrmsg, rtnl_handle_ifaddr, NULL,
1515 	    EVENTHANDLER_PRI_ANY);
1516 	iflink_event = EVENTHANDLER_REGISTER(
1517 	    ifnet_link_event, rtnl_handle_iflink, NULL,
1518 	    EVENTHANDLER_PRI_ANY);
1519 	NL_VERIFY_PARSERS(all_parsers);
1520 	rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
1521 }
1522 
1523 void
1524 rtnl_ifaces_destroy(void)
1525 {
1526 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifattach_event);
1527 	EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_event);
1528 	EVENTHANDLER_DEREGISTER(rt_addrmsg, ifaddr_event);
1529 	EVENTHANDLER_DEREGISTER(ifnet_link_event, iflink_event);
1530 }
1531