xref: /freebsd/sys/netlink/route/iface.c (revision 1843dfb05ed80149f5a412180af882e3cb8f451b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include "opt_netlink.h"
29 
30 #include <sys/cdefs.h>
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 #include <sys/types.h>
34 #include <sys/eventhandler.h>
35 #include <sys/kernel.h>
36 #include <sys/jail.h>
37 #include <sys/malloc.h>
38 #include <sys/socket.h>
39 #include <sys/sockio.h>
40 #include <sys/syslog.h>
41 
42 #include <net/if.h>
43 #include <net/if_dl.h>
44 #include <net/if_media.h>
45 #include <net/if_var.h>
46 #include <net/if_clone.h>
47 #include <net/route.h>
48 #include <net/route/nhop.h>
49 #include <net/route/route_ctl.h>
50 #include <netinet/in_var.h>
51 #include <netinet6/in6_var.h>
52 #include <netinet6/scope6_var.h> /* scope deembedding */
53 #include <netlink/netlink.h>
54 #include <netlink/netlink_ctl.h>
55 #include <netlink/netlink_route.h>
56 #include <netlink/route/route_var.h>
57 
58 #define	DEBUG_MOD_NAME	nl_iface
59 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
60 #include <netlink/netlink_debug.h>
61 _DECLARE_DEBUG(LOG_INFO);
62 
63 struct netlink_walkargs {
64 	struct nl_writer *nw;
65 	struct nlmsghdr hdr;
66 	struct nlpcb *so;
67 	struct ucred *cred;
68 	uint32_t fibnum;
69 	int family;
70 	int error;
71 	int count;
72 	int dumped;
73 };
74 
75 static eventhandler_tag ifdetach_event, ifattach_event, iflink_event, ifaddr_event;
76 
77 static SLIST_HEAD(, nl_cloner) nl_cloners = SLIST_HEAD_INITIALIZER(nl_cloners);
78 
79 static struct sx rtnl_cloner_lock;
80 SX_SYSINIT(rtnl_cloner_lock, &rtnl_cloner_lock, "rtnl cloner lock");
81 
82 /* These are external hooks for CARP. */
83 extern int	(*carp_get_vhid_p)(struct ifaddr *);
84 
85 /*
86  * RTM_GETLINK request
87  * sendto(3, {{len=32, type=RTM_GETLINK, flags=NLM_F_REQUEST|NLM_F_DUMP, seq=1641940952, pid=0},
88  *  {ifi_family=AF_INET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}, 32, 0, NULL, 0) = 32
89  *
90  * Reply:
91  * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_ETHER, ifi_index=if_nametoindex("enp0s31f6"), ifi_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST|IFF_LOWER_UP, ifi_change=0},
92 {{nla_len=10, nla_type=IFLA_ADDRESS}, "\xfe\x54\x00\x52\x3e\x90"}
93 
94 [
95 {{nla_len=14, nla_type=IFLA_IFNAME}, "enp0s31f6"},
96 {{nla_len=8, nla_type=IFLA_TXQLEN}, 1000},
97 {{nla_len=5, nla_type=IFLA_OPERSTATE}, 6},
98 {{nla_len=5, nla_type=IFLA_LINKMODE}, 0},
99 {{nla_len=8, nla_type=IFLA_MTU}, 1500},
100 {{nla_len=8, nla_type=IFLA_MIN_MTU}, 68},
101  {{nla_len=8, nla_type=IFLA_MAX_MTU}, 9000},
102 {{nla_len=8, nla_type=IFLA_GROUP}, 0},
103 {{nla_len=8, nla_type=IFLA_PROMISCUITY}, 0},
104 {{nla_len=8, nla_type=IFLA_NUM_TX_QUEUES}, 1},
105 {{nla_len=8, nla_type=IFLA_GSO_MAX_SEGS}, 65535},
106 {{nla_len=8, nla_type=IFLA_GSO_MAX_SIZE}, 65536},
107 {{nla_len=8, nla_type=IFLA_NUM_RX_QUEUES}, 1},
108 {{nla_len=5, nla_type=IFLA_CARRIER}, 1},
109 {{nla_len=13, nla_type=IFLA_QDISC}, "fq_codel"},
110 {{nla_len=8, nla_type=IFLA_CARRIER_CHANGES}, 2},
111 {{nla_len=5, nla_type=IFLA_PROTO_DOWN}, 0},
112 {{nla_len=8, nla_type=IFLA_CARRIER_UP_COUNT}, 1},
113 {{nla_len=8, nla_type=IFLA_CARRIER_DOWN_COUNT}, 1},
114  */
115 
116 struct if_state {
117 	uint8_t		ifla_operstate;
118 	uint8_t		ifla_carrier;
119 };
120 
121 static void
122 get_operstate_ether(if_t ifp, struct if_state *pstate)
123 {
124 	struct ifmediareq ifmr = {};
125 	int error;
126 	error = if_ioctl(ifp, SIOCGIFMEDIA, (void *)&ifmr);
127 
128 	if (error != 0) {
129 		NL_LOG(LOG_DEBUG, "error calling SIOCGIFMEDIA on %s: %d",
130 		    if_name(ifp), error);
131 		return;
132 	}
133 
134 	switch (IFM_TYPE(ifmr.ifm_active)) {
135 	case IFM_ETHER:
136 		if (ifmr.ifm_status & IFM_ACTIVE) {
137 			pstate->ifla_carrier = 1;
138 			if (if_getflags(ifp) & IFF_MONITOR)
139 				pstate->ifla_operstate = IF_OPER_DORMANT;
140 			else
141 				pstate->ifla_operstate = IF_OPER_UP;
142 		} else
143 			pstate->ifla_operstate = IF_OPER_DOWN;
144 	}
145 }
146 
147 static bool
148 get_stats(struct nl_writer *nw, if_t ifp)
149 {
150 	struct rtnl_link_stats64 *stats;
151 
152 	int nla_len = sizeof(struct nlattr) + sizeof(*stats);
153 	struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
154 	if (nla == NULL)
155 		return (false);
156 	nla->nla_type = IFLA_STATS64;
157 	nla->nla_len = nla_len;
158 	stats = (struct rtnl_link_stats64 *)(nla + 1);
159 
160 	stats->rx_packets = if_getcounter(ifp, IFCOUNTER_IPACKETS);
161 	stats->tx_packets = if_getcounter(ifp, IFCOUNTER_OPACKETS);
162 	stats->rx_bytes = if_getcounter(ifp, IFCOUNTER_IBYTES);
163 	stats->tx_bytes = if_getcounter(ifp, IFCOUNTER_OBYTES);
164 	stats->rx_errors = if_getcounter(ifp, IFCOUNTER_IERRORS);
165 	stats->tx_errors = if_getcounter(ifp, IFCOUNTER_OERRORS);
166 	stats->rx_dropped = if_getcounter(ifp, IFCOUNTER_IQDROPS);
167 	stats->tx_dropped = if_getcounter(ifp, IFCOUNTER_OQDROPS);
168 	stats->multicast = if_getcounter(ifp, IFCOUNTER_IMCASTS);
169 	stats->rx_nohandler = if_getcounter(ifp, IFCOUNTER_NOPROTO);
170 
171 	return (true);
172 }
173 
174 static void
175 get_operstate(if_t ifp, struct if_state *pstate)
176 {
177 	pstate->ifla_operstate = IF_OPER_UNKNOWN;
178 	pstate->ifla_carrier = 0; /* no carrier */
179 
180 	switch (if_gettype(ifp)) {
181 	case IFT_ETHER:
182 	case IFT_L2VLAN:
183 		get_operstate_ether(ifp, pstate);
184 		break;
185 	default:
186 		/* Map admin state to the operstate */
187 		if (if_getflags(ifp) & IFF_UP) {
188 			pstate->ifla_operstate = IF_OPER_UP;
189 			pstate->ifla_carrier = 1;
190 		} else
191 			pstate->ifla_operstate = IF_OPER_DOWN;
192 		break;
193 	}
194 }
195 
196 static void
197 get_hwaddr(struct nl_writer *nw, if_t ifp)
198 {
199 	struct ifreq ifr = {};
200 
201 	if (if_gethwaddr(ifp, &ifr) == 0) {
202 		nlattr_add(nw, IFLAF_ORIG_HWADDR, if_getaddrlen(ifp),
203 		    ifr.ifr_addr.sa_data);
204 	}
205 }
206 
207 static unsigned
208 ifp_flags_to_netlink(const if_t ifp)
209 {
210         return (if_getflags(ifp) | if_getdrvflags(ifp));
211 }
212 
213 #define LLADDR_CONST(s) ((const void *)((s)->sdl_data + (s)->sdl_nlen))
214 static bool
215 dump_sa(struct nl_writer *nw, int attr, const struct sockaddr *sa)
216 {
217         uint32_t addr_len = 0;
218         const void *addr_data = NULL;
219 #ifdef INET6
220         struct in6_addr addr6;
221 #endif
222 
223         if (sa == NULL)
224                 return (true);
225 
226         switch (sa->sa_family) {
227 #ifdef INET
228         case AF_INET:
229                 addr_len = sizeof(struct in_addr);
230                 addr_data = &((const struct sockaddr_in *)sa)->sin_addr;
231                 break;
232 #endif
233 #ifdef INET6
234         case AF_INET6:
235                 in6_splitscope(&((const struct sockaddr_in6 *)sa)->sin6_addr, &addr6, &addr_len);
236                 addr_len = sizeof(struct in6_addr);
237                 addr_data = &addr6;
238                 break;
239 #endif
240         case AF_LINK:
241                 addr_len = ((const struct sockaddr_dl *)sa)->sdl_alen;
242                 addr_data = LLADDR_CONST((const struct sockaddr_dl *)sa);
243                 break;
244 	case AF_UNSPEC:
245 		/* Ignore empty SAs without warning */
246 		return (true);
247         default:
248                 NL_LOG(LOG_DEBUG2, "unsupported family: %d, skipping", sa->sa_family);
249                 return (true);
250         }
251 
252         return (nlattr_add(nw, attr, addr_len, addr_data));
253 }
254 
255 static bool
256 dump_iface_caps(struct nl_writer *nw, struct ifnet *ifp)
257 {
258 	int off = nlattr_add_nested(nw, IFLAF_CAPS);
259 	uint32_t active_caps[roundup2(IFCAP_B_SIZE, 32) / 32] = {};
260 	uint32_t all_caps[roundup2(IFCAP_B_SIZE, 32) / 32] = {};
261 
262 	MPASS(sizeof(active_caps) >= 8);
263 	MPASS(sizeof(all_caps) >= 8);
264 
265 	if (off == 0)
266 		return (false);
267 
268 	active_caps[0] = (uint32_t)if_getcapabilities(ifp);
269 	all_caps[0] = (uint32_t)if_getcapenable(ifp);
270 	active_caps[1] = (uint32_t)if_getcapabilities2(ifp);
271 	all_caps[1] = (uint32_t)if_getcapenable2(ifp);
272 
273 	nlattr_add_u32(nw, NLA_BITSET_SIZE, IFCAP_B_SIZE);
274 	nlattr_add(nw, NLA_BITSET_MASK, sizeof(all_caps), all_caps);
275 	nlattr_add(nw, NLA_BITSET_VALUE, sizeof(active_caps), active_caps);
276 
277 	nlattr_set_len(nw, off);
278 
279 	return (true);
280 }
281 
282 /*
283  * Dumps interface state, properties and metrics.
284  * @nw: message writer
285  * @ifp: target interface
286  * @hdr: template header
287  * @if_flags_mask: changed if_[drv]_flags bitmask
288  *
289  * This function is called without epoch and MAY sleep.
290  */
291 static bool
292 dump_iface(struct nl_writer *nw, if_t ifp, const struct nlmsghdr *hdr,
293     int if_flags_mask)
294 {
295         struct ifinfomsg *ifinfo;
296 
297         NL_LOG(LOG_DEBUG3, "dumping interface %s data", if_name(ifp));
298 
299 	if (!nlmsg_reply(nw, hdr, sizeof(struct ifinfomsg)))
300 		goto enomem;
301 
302         ifinfo = nlmsg_reserve_object(nw, struct ifinfomsg);
303         ifinfo->ifi_family = AF_UNSPEC;
304         ifinfo->__ifi_pad = 0;
305         ifinfo->ifi_type = if_gettype(ifp);
306         ifinfo->ifi_index = if_getindex(ifp);
307         ifinfo->ifi_flags = ifp_flags_to_netlink(ifp);
308         ifinfo->ifi_change = if_flags_mask;
309 
310 	struct if_state ifs = {};
311 	get_operstate(ifp, &ifs);
312 
313 	if (ifs.ifla_operstate == IF_OPER_UP)
314 		ifinfo->ifi_flags |= IFF_LOWER_UP;
315 
316         nlattr_add_string(nw, IFLA_IFNAME, if_name(ifp));
317         nlattr_add_u8(nw, IFLA_OPERSTATE, ifs.ifla_operstate);
318         nlattr_add_u8(nw, IFLA_CARRIER, ifs.ifla_carrier);
319 
320 /*
321         nlattr_add_u8(nw, IFLA_PROTO_DOWN, val);
322         nlattr_add_u8(nw, IFLA_LINKMODE, val);
323 */
324         if (if_getaddrlen(ifp) != 0) {
325 		struct ifaddr *ifa = if_getifaddr(ifp);
326 
327                 dump_sa(nw, IFLA_ADDRESS, ifa->ifa_addr);
328         }
329 
330         if ((if_getbroadcastaddr(ifp) != NULL)) {
331 		nlattr_add(nw, IFLA_BROADCAST, if_getaddrlen(ifp),
332 		    if_getbroadcastaddr(ifp));
333         }
334 
335         nlattr_add_u32(nw, IFLA_MTU, if_getmtu(ifp));
336 /*
337         nlattr_add_u32(nw, IFLA_MIN_MTU, 60);
338         nlattr_add_u32(nw, IFLA_MAX_MTU, 9000);
339         nlattr_add_u32(nw, IFLA_GROUP, 0);
340 */
341 
342 	if (if_getdescr(ifp) != NULL)
343 		nlattr_add_string(nw, IFLA_IFALIAS, if_getdescr(ifp));
344 
345 	/* Store FreeBSD-specific attributes */
346 	int off = nlattr_add_nested(nw, IFLA_FREEBSD);
347 	if (off != 0) {
348 		get_hwaddr(nw, ifp);
349 		dump_iface_caps(nw, ifp);
350 
351 		nlattr_set_len(nw, off);
352 	}
353 
354 	get_stats(nw, ifp);
355 
356 	uint32_t val = (if_getflags(ifp) & IFF_PROMISC) != 0;
357         nlattr_add_u32(nw, IFLA_PROMISCUITY, val);
358 
359 	ifc_dump_ifp_nl(ifp, nw);
360 
361         if (nlmsg_end(nw))
362 		return (true);
363 
364 enomem:
365         NL_LOG(LOG_DEBUG, "unable to dump interface %s state (ENOMEM)", if_name(ifp));
366         nlmsg_abort(nw);
367         return (false);
368 }
369 
370 static bool
371 check_ifmsg(void *hdr, struct nl_pstate *npt)
372 {
373 	struct ifinfomsg *ifm = hdr;
374 
375 	if (ifm->__ifi_pad != 0 || ifm->ifi_type != 0 ||
376 	    ifm->ifi_flags != 0 || ifm->ifi_change != 0) {
377 		nlmsg_report_err_msg(npt,
378 		    "strict checking: non-zero values in ifinfomsg header");
379 		return (false);
380 	}
381 
382 	return (true);
383 }
384 
385 #define	_IN(_field)	offsetof(struct ifinfomsg, _field)
386 #define	_OUT(_field)	offsetof(struct nl_parsed_link, _field)
387 static const struct nlfield_parser nlf_p_if[] = {
388 	{ .off_in = _IN(ifi_type), .off_out = _OUT(ifi_type), .cb = nlf_get_u16 },
389 	{ .off_in = _IN(ifi_index), .off_out = _OUT(ifi_index), .cb = nlf_get_u32 },
390 	{ .off_in = _IN(ifi_flags), .off_out = _OUT(ifi_flags), .cb = nlf_get_u32 },
391 	{ .off_in = _IN(ifi_change), .off_out = _OUT(ifi_change), .cb = nlf_get_u32 },
392 };
393 
394 static const struct nlattr_parser nla_p_linfo[] = {
395 	{ .type = IFLA_INFO_KIND, .off = _OUT(ifla_cloner), .cb = nlattr_get_stringn },
396 	{ .type = IFLA_INFO_DATA, .off = _OUT(ifla_idata), .cb = nlattr_get_nla },
397 };
398 NL_DECLARE_ATTR_PARSER(linfo_parser, nla_p_linfo);
399 
400 static const struct nlattr_parser nla_p_if[] = {
401 	{ .type = IFLA_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
402 	{ .type = IFLA_MTU, .off = _OUT(ifla_mtu), .cb = nlattr_get_uint32 },
403 	{ .type = IFLA_LINK, .off = _OUT(ifla_link), .cb = nlattr_get_uint32 },
404 	{ .type = IFLA_LINKINFO, .arg = &linfo_parser, .cb = nlattr_get_nested },
405 	{ .type = IFLA_IFALIAS, .off = _OUT(ifla_ifalias), .cb = nlattr_get_string },
406 	{ .type = IFLA_GROUP, .off = _OUT(ifla_group), .cb = nlattr_get_string },
407 	{ .type = IFLA_ALT_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
408 };
409 #undef _IN
410 #undef _OUT
411 NL_DECLARE_STRICT_PARSER(ifmsg_parser, struct ifinfomsg, check_ifmsg, nlf_p_if, nla_p_if);
412 
413 static bool
414 match_iface(if_t ifp, void *_arg)
415 {
416 	struct nl_parsed_link *attrs = (struct nl_parsed_link *)_arg;
417 
418 	if (attrs->ifi_index != 0 && attrs->ifi_index != if_getindex(ifp))
419 		return (false);
420 	if (attrs->ifi_type != 0 && attrs->ifi_index != if_gettype(ifp))
421 		return (false);
422 	if (attrs->ifla_ifname != NULL && strcmp(attrs->ifla_ifname, if_name(ifp)))
423 		return (false);
424 	/* TODO: add group match */
425 
426 	return (true);
427 }
428 
429 static int
430 dump_cb(if_t ifp, void *_arg)
431 {
432 	struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg;
433 	if (!dump_iface(wa->nw, ifp, &wa->hdr, 0))
434 		return (ENOMEM);
435 	return (0);
436 }
437 
438 /*
439  * {nlmsg_len=52, nlmsg_type=RTM_GETLINK, nlmsg_flags=NLM_F_REQUEST, nlmsg_seq=1662842818, nlmsg_pid=0},
440  *  {ifi_family=AF_PACKET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
441  *   [
442  *    [{nla_len=10, nla_type=IFLA_IFNAME}, "vnet9"],
443  *    [{nla_len=8, nla_type=IFLA_EXT_MASK}, RTEXT_FILTER_VF]
444  *   ]
445  */
446 static int
447 rtnl_handle_getlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
448 {
449 	struct epoch_tracker et;
450         if_t ifp;
451 	int error = 0;
452 
453 	struct nl_parsed_link attrs = {};
454 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
455 	if (error != 0)
456 		return (error);
457 
458 	struct netlink_walkargs wa = {
459 		.so = nlp,
460 		.nw = npt->nw,
461 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
462 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
463 		.hdr.nlmsg_flags = hdr->nlmsg_flags,
464 		.hdr.nlmsg_type = NL_RTM_NEWLINK,
465 	};
466 
467 	/* Fast track for an interface w/ explicit name or index match */
468 	if ((attrs.ifi_index != 0) || (attrs.ifla_ifname != NULL)) {
469 		if (attrs.ifi_index != 0) {
470 			NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching index %u",
471 			    attrs.ifi_index);
472 			NET_EPOCH_ENTER(et);
473 			ifp = ifnet_byindex_ref(attrs.ifi_index);
474 			NET_EPOCH_EXIT(et);
475 		} else {
476 			NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching name %s",
477 			    attrs.ifla_ifname);
478 			ifp = ifunit_ref(attrs.ifla_ifname);
479 		}
480 
481 		if (ifp != NULL) {
482 			if (match_iface(ifp, &attrs)) {
483 				if (!dump_iface(wa.nw, ifp, &wa.hdr, 0))
484 					error = ENOMEM;
485 			} else
486 				error = ENODEV;
487 			if_rele(ifp);
488 		} else
489 			error = ENODEV;
490 		return (error);
491 	}
492 
493 	/* Always treat non-direct-match as a multipart message */
494 	wa.hdr.nlmsg_flags |= NLM_F_MULTI;
495 
496 	/*
497 	 * Fetching some link properties require performing ioctl's that may be blocking.
498 	 * Address it by saving referenced pointers of the matching links,
499 	 * exiting from epoch and going through the list one-by-one.
500 	 */
501 
502 	NL_LOG(LOG_DEBUG2, "Start dump");
503 	if_foreach_sleep(match_iface, &attrs, dump_cb, &wa);
504 	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
505 
506 	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
507                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
508                 return (ENOMEM);
509         }
510 
511 	return (error);
512 }
513 
514 /*
515  * sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[
516  * {nlmsg_len=60, nlmsg_type=RTM_NEWLINK, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1662715618, nlmsg_pid=0},
517  *  {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
518  *   {nla_len=11, nla_type=IFLA_IFNAME}, "dummy0"],
519  *   [
520  *    {nla_len=16, nla_type=IFLA_LINKINFO},
521  *     [
522  *      {nla_len=9, nla_type=IFLA_INFO_KIND}, "dummy"...
523  *     ]
524  *    ]
525  */
526 
527 static int
528 rtnl_handle_dellink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
529 {
530 	struct epoch_tracker et;
531         if_t ifp;
532 	int error;
533 
534 	struct nl_parsed_link attrs = {};
535 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
536 	if (error != 0)
537 		return (error);
538 
539 	NET_EPOCH_ENTER(et);
540 	ifp = ifnet_byindex_ref(attrs.ifi_index);
541 	NET_EPOCH_EXIT(et);
542 	if (ifp == NULL) {
543 		NLP_LOG(LOG_DEBUG, nlp, "unable to find interface %u", attrs.ifi_index);
544 		return (ENOENT);
545 	}
546 	NLP_LOG(LOG_DEBUG3, nlp, "mapped ifindex %u to %s", attrs.ifi_index, if_name(ifp));
547 
548 	sx_xlock(&ifnet_detach_sxlock);
549 	error = if_clone_destroy(if_name(ifp));
550 	sx_xunlock(&ifnet_detach_sxlock);
551 
552 	NLP_LOG(LOG_DEBUG2, nlp, "deleting interface %s returned %d", if_name(ifp), error);
553 
554 	if_rele(ifp);
555 	return (error);
556 }
557 
558 /*
559  * New link:
560  * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1668185590, pid=0},
561  *   {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}
562  *    [
563  *     {{nla_len=8, nla_type=IFLA_MTU}, 123},
564  *     {{nla_len=10, nla_type=IFLA_IFNAME}, "vlan1"},
565  *     {{nla_len=24, nla_type=IFLA_LINKINFO},
566  *      [
567  *       {{nla_len=8, nla_type=IFLA_INFO_KIND}, "vlan"...},
568  *       {{nla_len=12, nla_type=IFLA_INFO_DATA}, "\x06\x00\x01\x00\x7b\x00\x00\x00"}]}]}
569  *
570  * Update link:
571  * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=1668185923, pid=0},
572  * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=if_nametoindex("lo"), ifi_flags=0, ifi_change=0},
573  * {{nla_len=8, nla_type=IFLA_MTU}, 123}}
574  *
575  *
576  * Check command availability:
577  * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=0, pid=0},
578  *  {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}
579  */
580 
581 
582 static int
583 create_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs,
584     struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt)
585 {
586 	if (lattrs->ifla_ifname == NULL || strlen(lattrs->ifla_ifname) == 0) {
587 		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_IFNAME attribute");
588 		return (EINVAL);
589 	}
590 	if (lattrs->ifla_cloner == NULL || strlen(lattrs->ifla_cloner) == 0) {
591 		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_INFO_KIND attribute");
592 		return (EINVAL);
593 	}
594 
595 	struct ifc_data_nl ifd = {
596 		.flags = IFC_F_CREATE,
597 		.lattrs = lattrs,
598 		.bm = bm,
599 		.npt = npt,
600 	};
601 	if (ifc_create_ifp_nl(lattrs->ifla_ifname, &ifd) && ifd.error == 0)
602 		nl_store_ifp_cookie(npt, ifd.ifp);
603 
604 	return (ifd.error);
605 }
606 
607 static int
608 modify_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs,
609     struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt)
610 {
611 	if_t ifp = NULL;
612 	struct epoch_tracker et;
613 
614 	if (lattrs->ifi_index == 0 && lattrs->ifla_ifname == NULL) {
615 		/*
616 		 * Applications like ip(8) verify RTM_NEWLINK command
617 		 * existence by calling it with empty arguments. Always
618 		 * return "innocent" error in that case.
619 		 */
620 		NLMSG_REPORT_ERR_MSG(npt, "empty ifi_index field");
621 		return (EPERM);
622 	}
623 
624 	if (lattrs->ifi_index != 0) {
625 		NET_EPOCH_ENTER(et);
626 		ifp = ifnet_byindex_ref(lattrs->ifi_index);
627 		NET_EPOCH_EXIT(et);
628 		if (ifp == NULL) {
629 			NLMSG_REPORT_ERR_MSG(npt, "unable to find interface #%u",
630 			    lattrs->ifi_index);
631 			return (ENOENT);
632 		}
633 	}
634 
635 	if (ifp == NULL && lattrs->ifla_ifname != NULL) {
636 		ifp = ifunit_ref(lattrs->ifla_ifname);
637 		if (ifp == NULL) {
638 			NLMSG_REPORT_ERR_MSG(npt, "unable to find interface %s",
639 			    lattrs->ifla_ifname);
640 			return (ENOENT);
641 		}
642 	}
643 
644 	MPASS(ifp != NULL);
645 
646 	/*
647 	 * Modification request can address either
648 	 * 1) cloned interface, in which case we call the cloner-specific
649 	 *  modification routine
650 	 * or
651 	 * 2) non-cloned (e.g. "physical") interface, in which case we call
652 	 *  generic modification routine
653 	 */
654 	struct ifc_data_nl ifd = { .lattrs = lattrs, .bm = bm, .npt = npt };
655 	if (!ifc_modify_ifp_nl(ifp, &ifd))
656 		ifd.error = nl_modify_ifp_generic(ifp, lattrs, bm, npt);
657 
658 	if_rele(ifp);
659 
660 	return (ifd.error);
661 }
662 
663 
664 static int
665 rtnl_handle_newlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
666 {
667 	struct nlattr_bmask bm;
668 	int error;
669 
670 	struct nl_parsed_link attrs = {};
671 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
672 	if (error != 0)
673 		return (error);
674 	nl_get_attrs_bmask_nlmsg(hdr, &ifmsg_parser, &bm);
675 
676 	if (hdr->nlmsg_flags & NLM_F_CREATE)
677 		return (create_link(hdr, &attrs, &bm, nlp, npt));
678 	else
679 		return (modify_link(hdr, &attrs, &bm, nlp, npt));
680 }
681 
682 static void
683 set_scope6(struct sockaddr *sa, uint32_t ifindex)
684 {
685 #ifdef INET6
686 	if (sa != NULL && sa->sa_family == AF_INET6) {
687 		struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa;
688 
689 		if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr))
690 			in6_set_unicast_scopeid(&sa6->sin6_addr, ifindex);
691 	}
692 #endif
693 }
694 
695 static bool
696 check_sa_family(const struct sockaddr *sa, int family, const char *attr_name,
697     struct nl_pstate *npt)
698 {
699 	if (sa == NULL || sa->sa_family == family)
700 		return (true);
701 
702 	nlmsg_report_err_msg(npt, "wrong family for %s attribute: %d != %d",
703 	    attr_name, family, sa->sa_family);
704 	return (false);
705 }
706 
707 struct nl_parsed_ifa {
708 	uint8_t			ifa_family;
709 	uint8_t			ifa_prefixlen;
710 	uint8_t			ifa_scope;
711 	uint32_t		ifa_index;
712 	uint32_t		ifa_flags;
713 	uint32_t		ifaf_vhid;
714 	uint32_t		ifaf_flags;
715 	struct sockaddr		*ifa_address;
716 	struct sockaddr		*ifa_local;
717 	struct sockaddr		*ifa_broadcast;
718 	struct ifa_cacheinfo	*ifa_cacheinfo;
719 	struct sockaddr		*f_ifa_addr;
720 	struct sockaddr		*f_ifa_dst;
721 };
722 
723 static int
724 nlattr_get_cinfo(struct nlattr *nla, struct nl_pstate *npt,
725     const void *arg __unused, void *target)
726 {
727 	if (__predict_false(NLA_DATA_LEN(nla) != sizeof(struct ifa_cacheinfo))) {
728 		NLMSG_REPORT_ERR_MSG(npt, "nla type %d size(%u) is not ifa_cacheinfo",
729 		    nla->nla_type, NLA_DATA_LEN(nla));
730 		return (EINVAL);
731 	}
732 	*((struct ifa_cacheinfo **)target) = (struct ifa_cacheinfo *)NL_RTA_DATA(nla);
733 	return (0);
734 }
735 
736 #define	_IN(_field)	offsetof(struct ifaddrmsg, _field)
737 #define	_OUT(_field)	offsetof(struct nl_parsed_ifa, _field)
738 static const struct nlfield_parser nlf_p_ifa[] = {
739 	{ .off_in = _IN(ifa_family), .off_out = _OUT(ifa_family), .cb = nlf_get_u8 },
740 	{ .off_in = _IN(ifa_prefixlen), .off_out = _OUT(ifa_prefixlen), .cb = nlf_get_u8 },
741 	{ .off_in = _IN(ifa_scope), .off_out = _OUT(ifa_scope), .cb = nlf_get_u8 },
742 	{ .off_in = _IN(ifa_flags), .off_out = _OUT(ifa_flags), .cb = nlf_get_u8_u32 },
743 	{ .off_in = _IN(ifa_index), .off_out = _OUT(ifa_index), .cb = nlf_get_u32 },
744 };
745 
746 static const struct nlattr_parser nla_p_ifa_fbsd[] = {
747 	{ .type = IFAF_VHID, .off = _OUT(ifaf_vhid), .cb = nlattr_get_uint32 },
748 	{ .type = IFAF_FLAGS, .off = _OUT(ifaf_flags), .cb = nlattr_get_uint32 },
749 };
750 NL_DECLARE_ATTR_PARSER(ifa_fbsd_parser, nla_p_ifa_fbsd);
751 
752 static const struct nlattr_parser nla_p_ifa[] = {
753 	{ .type = IFA_ADDRESS, .off = _OUT(ifa_address), .cb = nlattr_get_ip },
754 	{ .type = IFA_LOCAL, .off = _OUT(ifa_local), .cb = nlattr_get_ip },
755 	{ .type = IFA_BROADCAST, .off = _OUT(ifa_broadcast), .cb = nlattr_get_ip },
756 	{ .type = IFA_CACHEINFO, .off = _OUT(ifa_cacheinfo), .cb = nlattr_get_cinfo },
757 	{ .type = IFA_FLAGS, .off = _OUT(ifa_flags), .cb = nlattr_get_uint32 },
758 	{ .type = IFA_FREEBSD, .arg = &ifa_fbsd_parser, .cb = nlattr_get_nested },
759 };
760 #undef _IN
761 #undef _OUT
762 
763 static bool
764 post_p_ifa(void *_attrs, struct nl_pstate *npt)
765 {
766 	struct nl_parsed_ifa *attrs = (struct nl_parsed_ifa *)_attrs;
767 
768 	if (!check_sa_family(attrs->ifa_address, attrs->ifa_family, "IFA_ADDRESS", npt))
769 		return (false);
770 	if (!check_sa_family(attrs->ifa_local, attrs->ifa_family, "IFA_LOCAL", npt))
771 		return (false);
772 	if (!check_sa_family(attrs->ifa_broadcast, attrs->ifa_family, "IFA_BROADADDR", npt))
773 		return (false);
774 
775 	set_scope6(attrs->ifa_address, attrs->ifa_index);
776 	set_scope6(attrs->ifa_local, attrs->ifa_index);
777 
778 	return (true);
779 }
780 
781 NL_DECLARE_PARSER_EXT(ifa_parser, struct ifaddrmsg, NULL, nlf_p_ifa, nla_p_ifa, post_p_ifa);
782 
783 
784 /*
785 
786 {ifa_family=AF_INET, ifa_prefixlen=8, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_HOST, ifa_index=if_nametoindex("lo")},
787  [
788         {{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("127.0.0.1")},
789         {{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("127.0.0.1")},
790         {{nla_len=7, nla_type=IFA_LABEL}, "lo"},
791         {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT},
792         {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=3619, tstamp=3619}}]},
793 ---
794 
795 {{len=72, type=RTM_NEWADDR, flags=NLM_F_MULTI, seq=1642191126, pid=566735},
796  {ifa_family=AF_INET6, ifa_prefixlen=96, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_UNIVERSE, ifa_index=if_nametoindex("virbr0")},
797    [
798     {{nla_len=20, nla_type=IFA_ADDRESS}, inet_pton(AF_INET6, "2a01:4f8:13a:70c:ffff::1")},
799    {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=4283, tstamp=4283}},
800    {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}]},
801 */
802 
803 static uint8_t
804 ifa_get_scope(const struct ifaddr *ifa)
805 {
806         const struct sockaddr *sa;
807         uint8_t addr_scope = RT_SCOPE_UNIVERSE;
808 
809         sa = ifa->ifa_addr;
810         switch (sa->sa_family) {
811 #ifdef INET
812         case AF_INET:
813                 {
814                         struct in_addr addr;
815                         addr = ((const struct sockaddr_in *)sa)->sin_addr;
816                         if (IN_LOOPBACK(addr.s_addr))
817                                 addr_scope = RT_SCOPE_HOST;
818                         else if (IN_LINKLOCAL(addr.s_addr))
819                                 addr_scope = RT_SCOPE_LINK;
820                         break;
821                 }
822 #endif
823 #ifdef INET6
824         case AF_INET6:
825                 {
826                         const struct in6_addr *addr;
827                         addr = &((const struct sockaddr_in6 *)sa)->sin6_addr;
828                         if (IN6_IS_ADDR_LOOPBACK(addr))
829                                 addr_scope = RT_SCOPE_HOST;
830                         else if (IN6_IS_ADDR_LINKLOCAL(addr))
831                                 addr_scope = RT_SCOPE_LINK;
832                         break;
833                 }
834 #endif
835         }
836 
837         return (addr_scope);
838 }
839 
840 #ifdef INET6
841 static uint8_t
842 inet6_get_plen(const struct in6_addr *addr)
843 {
844 
845 	return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) +
846 	    bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3]));
847 }
848 #endif
849 
850 static uint8_t
851 get_sa_plen(const struct sockaddr *sa)
852 {
853 #ifdef INET
854         const struct in_addr *paddr;
855 #endif
856 #ifdef INET6
857         const struct in6_addr *paddr6;
858 #endif
859 
860         switch (sa->sa_family) {
861 #ifdef INET
862         case AF_INET:
863                 paddr = &(((const struct sockaddr_in *)sa)->sin_addr);
864                 return bitcount32(paddr->s_addr);;
865 #endif
866 #ifdef INET6
867         case AF_INET6:
868                 paddr6 = &(((const struct sockaddr_in6 *)sa)->sin6_addr);
869                 return inet6_get_plen(paddr6);
870 #endif
871         }
872 
873         return (0);
874 }
875 
876 #ifdef INET6
877 static uint32_t
878 in6_flags_to_nl(uint32_t flags)
879 {
880 	uint32_t nl_flags = 0;
881 
882 	if (flags & IN6_IFF_TEMPORARY)
883 		nl_flags |= IFA_F_TEMPORARY;
884 	if (flags & IN6_IFF_NODAD)
885 		nl_flags |= IFA_F_NODAD;
886 	if (flags & IN6_IFF_DEPRECATED)
887 		nl_flags |= IFA_F_DEPRECATED;
888 	if (flags & IN6_IFF_TENTATIVE)
889 		nl_flags |= IFA_F_TENTATIVE;
890 	if ((flags & (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) == 0)
891 		flags |= IFA_F_PERMANENT;
892 	if (flags & IN6_IFF_DUPLICATED)
893 		flags |= IFA_F_DADFAILED;
894 	return (nl_flags);
895 }
896 
897 static uint32_t
898 nl_flags_to_in6(uint32_t flags)
899 {
900 	uint32_t in6_flags = 0;
901 
902 	if (flags & IFA_F_TEMPORARY)
903 		in6_flags |= IN6_IFF_TEMPORARY;
904 	if (flags & IFA_F_NODAD)
905 		in6_flags |= IN6_IFF_NODAD;
906 	if (flags & IFA_F_DEPRECATED)
907 		in6_flags |= IN6_IFF_DEPRECATED;
908 	if (flags & IFA_F_TENTATIVE)
909 		in6_flags |= IN6_IFF_TENTATIVE;
910 	if (flags & IFA_F_DADFAILED)
911 		in6_flags |= IN6_IFF_DUPLICATED;
912 
913 	return (in6_flags);
914 }
915 
916 static void
917 export_cache_info6(struct nl_writer *nw, const struct in6_ifaddr *ia)
918 {
919 	struct ifa_cacheinfo ci = {
920 		.cstamp = ia->ia6_createtime * 1000,
921 		.tstamp = ia->ia6_updatetime * 1000,
922 		.ifa_prefered = ia->ia6_lifetime.ia6t_pltime,
923 		.ifa_valid = ia->ia6_lifetime.ia6t_vltime,
924 	};
925 
926 	nlattr_add(nw, IFA_CACHEINFO, sizeof(ci), &ci);
927 }
928 #endif
929 
930 static void
931 export_cache_info(struct nl_writer *nw, struct ifaddr *ifa)
932 {
933 	switch (ifa->ifa_addr->sa_family) {
934 #ifdef INET6
935 	case AF_INET6:
936 		export_cache_info6(nw, (struct in6_ifaddr *)ifa);
937 		break;
938 #endif
939 	}
940 }
941 
942 /*
943  * {'attrs': [('IFA_ADDRESS', '12.0.0.1'),
944            ('IFA_LOCAL', '12.0.0.1'),
945            ('IFA_LABEL', 'eth10'),
946            ('IFA_FLAGS', 128),
947            ('IFA_CACHEINFO', {'ifa_preferred': 4294967295, 'ifa_valid': 4294967295, 'cstamp': 63745746, 'tstamp': 63745746})],
948  */
949 static bool
950 dump_iface_addr(struct nl_writer *nw, if_t ifp, struct ifaddr *ifa,
951     const struct nlmsghdr *hdr)
952 {
953         struct ifaddrmsg *ifamsg;
954         struct sockaddr *sa = ifa->ifa_addr;
955         struct sockaddr *sa_dst = ifa->ifa_dstaddr;
956 
957         NL_LOG(LOG_DEBUG3, "dumping ifa %p type %s(%d) for interface %s",
958             ifa, rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
959 
960 	if (!nlmsg_reply(nw, hdr, sizeof(struct ifaddrmsg)))
961 		goto enomem;
962 
963         ifamsg = nlmsg_reserve_object(nw, struct ifaddrmsg);
964         ifamsg->ifa_family = sa->sa_family;
965         ifamsg->ifa_prefixlen = get_sa_plen(ifa->ifa_netmask);
966         ifamsg->ifa_flags = 0; // ifa_flags is useless
967         ifamsg->ifa_scope = ifa_get_scope(ifa);
968         ifamsg->ifa_index = if_getindex(ifp);
969 
970 	if ((if_getflags(ifp) & IFF_POINTOPOINT) && sa_dst != NULL && sa_dst->sa_family != 0) {
971 		/* P2P interface may have IPv6 LL with no dst address */
972 		dump_sa(nw, IFA_ADDRESS, sa_dst);
973 		dump_sa(nw, IFA_LOCAL, sa);
974 	} else {
975 		dump_sa(nw, IFA_ADDRESS, sa);
976 #ifdef INET
977 		/*
978 		 * In most cases, IFA_ADDRESS == IFA_LOCAL
979 		 * Skip IFA_LOCAL for anything except INET
980 		 */
981 		if (sa->sa_family == AF_INET)
982 			dump_sa(nw, IFA_LOCAL, sa);
983 #endif
984 	}
985 	if (if_getflags(ifp) & IFF_BROADCAST)
986 		dump_sa(nw, IFA_BROADCAST, ifa->ifa_broadaddr);
987 
988         nlattr_add_string(nw, IFA_LABEL, if_name(ifp));
989 
990         uint32_t nl_ifa_flags = 0;
991 #ifdef INET6
992 	if (sa->sa_family == AF_INET6) {
993 		struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
994 		nl_ifa_flags = in6_flags_to_nl(ia->ia6_flags);
995 	}
996 #endif
997         nlattr_add_u32(nw, IFA_FLAGS, nl_ifa_flags);
998 
999 	export_cache_info(nw, ifa);
1000 
1001 	/* Store FreeBSD-specific attributes */
1002 	int off = nlattr_add_nested(nw, IFA_FREEBSD);
1003 	if (off != 0) {
1004 		if (ifa->ifa_carp != NULL && carp_get_vhid_p != NULL) {
1005 			uint32_t vhid  = (uint32_t)(*carp_get_vhid_p)(ifa);
1006 			nlattr_add_u32(nw, IFAF_VHID, vhid);
1007 		}
1008 #ifdef INET6
1009 		if (sa->sa_family == AF_INET6) {
1010 			uint32_t ifa_flags = ((struct in6_ifaddr *)ifa)->ia6_flags;
1011 
1012 			nlattr_add_u32(nw, IFAF_FLAGS, ifa_flags);
1013 		}
1014 #endif
1015 
1016 		nlattr_set_len(nw, off);
1017 	}
1018 
1019 	if (nlmsg_end(nw))
1020 		return (true);
1021 enomem:
1022         NL_LOG(LOG_DEBUG, "Failed to dump ifa type %s(%d) for interface %s",
1023             rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
1024         nlmsg_abort(nw);
1025         return (false);
1026 }
1027 
1028 static int
1029 dump_iface_addrs(struct netlink_walkargs *wa, if_t ifp)
1030 {
1031         struct ifaddr *ifa;
1032 	struct ifa_iter it;
1033 	int error = 0;
1034 
1035 	for (ifa = ifa_iter_start(ifp, &it); ifa != NULL; ifa = ifa_iter_next(&it)) {
1036 		if (wa->family != 0 && wa->family != ifa->ifa_addr->sa_family)
1037 			continue;
1038 		if (ifa->ifa_addr->sa_family == AF_LINK)
1039 			continue;
1040 		if (prison_if(wa->cred, ifa->ifa_addr) != 0)
1041 			continue;
1042 		wa->count++;
1043 		if (!dump_iface_addr(wa->nw, ifp, ifa, &wa->hdr)) {
1044 			error = ENOMEM;
1045 			break;
1046 		}
1047 		wa->dumped++;
1048 	}
1049 	ifa_iter_finish(&it);
1050 
1051 	return (error);
1052 }
1053 
1054 static int
1055 rtnl_handle_getaddr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
1056 {
1057         if_t ifp;
1058 	int error = 0;
1059 
1060 	struct nl_parsed_ifa attrs = {};
1061 	error = nl_parse_nlmsg(hdr, &ifa_parser, npt, &attrs);
1062 	if (error != 0)
1063 		return (error);
1064 
1065 	struct netlink_walkargs wa = {
1066 		.so = nlp,
1067 		.nw = npt->nw,
1068 		.cred = nlp_get_cred(nlp),
1069 		.family = attrs.ifa_family,
1070 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
1071 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
1072 		.hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
1073 		.hdr.nlmsg_type = NL_RTM_NEWADDR,
1074 	};
1075 
1076 	NL_LOG(LOG_DEBUG2, "Start dump");
1077 
1078 	if (attrs.ifa_index != 0) {
1079 		ifp = ifnet_byindex(attrs.ifa_index);
1080 		if (ifp == NULL)
1081 			error = ENOENT;
1082 		else
1083 			error = dump_iface_addrs(&wa, ifp);
1084 	} else {
1085 		struct if_iter it;
1086 
1087 		for (ifp = if_iter_start(&it); ifp != NULL; ifp = if_iter_next(&it)) {
1088 			error = dump_iface_addrs(&wa, ifp);
1089 			if (error != 0)
1090 				break;
1091 		}
1092 		if_iter_finish(&it);
1093 	}
1094 
1095 	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
1096 
1097 	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
1098                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
1099                 return (ENOMEM);
1100         }
1101 
1102 	return (error);
1103 }
1104 
1105 #ifdef INET
1106 static int
1107 handle_newaddr_inet(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1108     if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1109 {
1110 	int plen = attrs->ifa_prefixlen;
1111 	int if_flags = if_getflags(ifp);
1112 	struct sockaddr_in *addr, *dst;
1113 
1114 	if (plen > 32) {
1115 		nlmsg_report_err_msg(npt, "invalid ifa_prefixlen");
1116 		return (EINVAL);
1117 	};
1118 
1119 	if (if_flags & IFF_POINTOPOINT) {
1120 		/*
1121 		 * Only P2P IFAs are allowed by the implementation.
1122 		 */
1123 		if (attrs->ifa_address == NULL || attrs->ifa_local == NULL) {
1124 			nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1125 			return (EINVAL);
1126 		}
1127 		addr = (struct sockaddr_in *)attrs->ifa_local;
1128 		dst = (struct sockaddr_in *)attrs->ifa_address;
1129 	} else {
1130 		/*
1131 		 * Map the Netlink attributes to FreeBSD ifa layout.
1132 		 * If only IFA_ADDRESS or IFA_LOCAL is set OR
1133 		 * both are set to the same value => ifa is not p2p
1134 		 * and the attribute value contains interface address.
1135 		 *
1136 		 * Otherwise (both IFA_ADDRESS and IFA_LOCAL are set and
1137 		 * different), IFA_LOCAL contains an interface address and
1138 		 * IFA_ADDRESS contains peer address.
1139 		 */
1140 		addr = (struct sockaddr_in *)attrs->ifa_local;
1141 		if (addr == NULL)
1142 			addr = (struct sockaddr_in *)attrs->ifa_address;
1143 
1144 		if (addr == NULL) {
1145 			nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1146 			return (EINVAL);
1147 		}
1148 
1149 		/* Generate broadcast address if not set */
1150 		if ((if_flags & IFF_BROADCAST) && attrs->ifa_broadcast == NULL) {
1151 			uint32_t s_baddr;
1152 			struct sockaddr_in *sin_brd;
1153 
1154 			if (plen == 31)
1155 				s_baddr = INADDR_BROADCAST; /* RFC 3021 */
1156 			else {
1157 				uint32_t s_mask;
1158 
1159 				s_mask = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
1160 				s_baddr = addr->sin_addr.s_addr | ~s_mask;
1161 			}
1162 
1163 			sin_brd = (struct sockaddr_in *)npt_alloc(npt, sizeof(*sin_brd));
1164 			if (sin_brd == NULL)
1165 				return (ENOMEM);
1166 			sin_brd->sin_family = AF_INET;
1167 			sin_brd->sin_len = sizeof(*sin_brd);
1168 			sin_brd->sin_addr.s_addr = s_baddr;
1169 			attrs->ifa_broadcast = (struct sockaddr *)sin_brd;
1170 		}
1171 		dst = (struct sockaddr_in *)attrs->ifa_broadcast;
1172 	}
1173 
1174 	struct sockaddr_in mask = {
1175 		.sin_len = sizeof(struct sockaddr_in),
1176 		.sin_family = AF_INET,
1177 		.sin_addr.s_addr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0),
1178 	};
1179 	struct in_aliasreq req = {
1180 		.ifra_addr = *addr,
1181 		.ifra_mask = mask,
1182 		.ifra_vhid = attrs->ifaf_vhid,
1183 	};
1184 	if (dst != NULL)
1185 		req.ifra_dstaddr = *dst;
1186 
1187 	return (in_control_ioctl(SIOCAIFADDR, &req, ifp, nlp_get_cred(nlp)));
1188 }
1189 
1190 static int
1191 handle_deladdr_inet(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1192     if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1193 {
1194 	struct sockaddr_in *addr = (struct sockaddr_in *)attrs->ifa_local;
1195 
1196 	if (addr == NULL)
1197 		addr = (struct sockaddr_in *)attrs->ifa_address;
1198 
1199 	if (addr == NULL) {
1200 		nlmsg_report_err_msg(npt, "empty IFA_ADDRESS/IFA_LOCAL");
1201 		return (EINVAL);
1202 	}
1203 
1204 	struct in_aliasreq req = { .ifra_addr = *addr };
1205 
1206 	return (in_control_ioctl(SIOCDIFADDR, &req, ifp, nlp_get_cred(nlp)));
1207 }
1208 #endif
1209 
1210 #ifdef INET6
1211 static int
1212 handle_newaddr_inet6(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1213     if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1214 {
1215 	struct sockaddr_in6 *addr, *dst;
1216 
1217 	if (attrs->ifa_prefixlen > 128) {
1218 		nlmsg_report_err_msg(npt, "invalid ifa_prefixlen");
1219 		return (EINVAL);
1220 	}
1221 
1222 	/*
1223 	 * In IPv6 implementation, adding non-P2P address to the P2P interface
1224 	 * is allowed.
1225 	 */
1226 	addr = (struct sockaddr_in6 *)(attrs->ifa_local);
1227 	dst = (struct sockaddr_in6 *)(attrs->ifa_address);
1228 
1229 	if (addr == NULL) {
1230 		addr = dst;
1231 		dst = NULL;
1232 	} else if (dst != NULL) {
1233 		if (IN6_ARE_ADDR_EQUAL(&addr->sin6_addr, &dst->sin6_addr)) {
1234 			/*
1235 			 * Sometimes Netlink users fills in both attributes
1236 			 * with the same address. It still means "non-p2p".
1237 			 */
1238 			dst = NULL;
1239 		}
1240 	}
1241 
1242 	if (addr == NULL) {
1243 		nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1244 		return (EINVAL);
1245 	}
1246 
1247 	uint32_t flags = nl_flags_to_in6(attrs->ifa_flags) | attrs->ifaf_flags;
1248 
1249 	uint32_t pltime = 0, vltime = 0;
1250 	if (attrs->ifa_cacheinfo != 0) {
1251 		pltime = attrs->ifa_cacheinfo->ifa_prefered;
1252 		vltime = attrs->ifa_cacheinfo->ifa_valid;
1253 	}
1254 
1255 	struct sockaddr_in6 mask = {
1256 		.sin6_len = sizeof(struct sockaddr_in6),
1257 		.sin6_family = AF_INET6,
1258 	};
1259 	ip6_writemask(&mask.sin6_addr, attrs->ifa_prefixlen);
1260 
1261 	struct in6_aliasreq req = {
1262 		.ifra_addr = *addr,
1263 		.ifra_prefixmask = mask,
1264 		.ifra_flags = flags,
1265 		.ifra_lifetime = { .ia6t_vltime = vltime, .ia6t_pltime = pltime },
1266 		.ifra_vhid = attrs->ifaf_vhid,
1267 	};
1268 	if (dst != NULL)
1269 		req.ifra_dstaddr = *dst;
1270 
1271 	return (in6_control_ioctl(SIOCAIFADDR_IN6, &req, ifp, nlp_get_cred(nlp)));
1272 }
1273 
1274 static int
1275 handle_deladdr_inet6(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1276     if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1277 {
1278 	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)attrs->ifa_local;
1279 
1280 	if (addr == NULL)
1281 		addr = (struct sockaddr_in6 *)(attrs->ifa_address);
1282 
1283 	if (addr == NULL) {
1284 		nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1285 		return (EINVAL);
1286 	}
1287 
1288 	struct in6_aliasreq req = { .ifra_addr = *addr };
1289 
1290 	return (in6_control_ioctl(SIOCDIFADDR_IN6, &req, ifp, nlp_get_cred(nlp)));
1291 }
1292 #endif
1293 
1294 
1295 static int
1296 rtnl_handle_addr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
1297 {
1298 	struct epoch_tracker et;
1299 	int error;
1300 
1301 	struct nl_parsed_ifa attrs = {};
1302 	error = nl_parse_nlmsg(hdr, &ifa_parser, npt, &attrs);
1303 	if (error != 0)
1304 		return (error);
1305 
1306 	NET_EPOCH_ENTER(et);
1307 	if_t ifp = ifnet_byindex_ref(attrs.ifa_index);
1308 	NET_EPOCH_EXIT(et);
1309 
1310 	if (ifp == NULL) {
1311 		nlmsg_report_err_msg(npt, "Unable to find interface with index %u",
1312 		    attrs.ifa_index);
1313 		return (ENOENT);
1314 	}
1315 	int if_flags = if_getflags(ifp);
1316 
1317 #if defined(INET) || defined(INET6)
1318 	bool new = hdr->nlmsg_type == NL_RTM_NEWADDR;
1319 #endif
1320 
1321 	/*
1322 	 * TODO: Properly handle NLM_F_CREATE / NLM_F_EXCL.
1323 	 * The current ioctl-based KPI always does an implicit create-or-replace.
1324 	 * It is not possible to specify fine-grained options.
1325 	 */
1326 
1327 	switch (attrs.ifa_family) {
1328 #ifdef INET
1329 	case AF_INET:
1330 		if (new)
1331 			error = handle_newaddr_inet(hdr, &attrs, ifp, nlp, npt);
1332 		else
1333 			error = handle_deladdr_inet(hdr, &attrs, ifp, nlp, npt);
1334 		break;
1335 #endif
1336 #ifdef INET6
1337 	case AF_INET6:
1338 		if (new)
1339 			error = handle_newaddr_inet6(hdr, &attrs, ifp, nlp, npt);
1340 		else
1341 			error = handle_deladdr_inet6(hdr, &attrs, ifp, nlp, npt);
1342 		break;
1343 #endif
1344 	default:
1345 		error = EAFNOSUPPORT;
1346 	}
1347 
1348 	if (error == 0 && !(if_flags & IFF_UP) && (if_getflags(ifp) & IFF_UP))
1349 		if_up(ifp);
1350 
1351 	if_rele(ifp);
1352 
1353 	return (error);
1354 }
1355 
1356 
1357 static void
1358 rtnl_handle_ifaddr(void *arg __unused, struct ifaddr *ifa, int cmd)
1359 {
1360 	struct nlmsghdr hdr = {};
1361 	struct nl_writer nw = {};
1362 	uint32_t group = 0;
1363 
1364 	switch (ifa->ifa_addr->sa_family) {
1365 #ifdef INET
1366 	case AF_INET:
1367 		group = RTNLGRP_IPV4_IFADDR;
1368 		break;
1369 #endif
1370 #ifdef INET6
1371 	case AF_INET6:
1372 		group = RTNLGRP_IPV6_IFADDR;
1373 		break;
1374 #endif
1375 	default:
1376 		NL_LOG(LOG_DEBUG2, "ifa notification for unknown AF: %d",
1377 		    ifa->ifa_addr->sa_family);
1378 		return;
1379 	}
1380 
1381 	if (!nl_has_listeners(NETLINK_ROUTE, group))
1382 		return;
1383 
1384 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, group)) {
1385 		NL_LOG(LOG_DEBUG, "error allocating group writer");
1386 		return;
1387 	}
1388 
1389 	hdr.nlmsg_type = (cmd == RTM_DELETE) ? NL_RTM_DELADDR : NL_RTM_NEWADDR;
1390 
1391 	dump_iface_addr(&nw, ifa->ifa_ifp, ifa, &hdr);
1392 	nlmsg_flush(&nw);
1393 }
1394 
1395 static void
1396 rtnl_handle_ifevent(if_t ifp, int nlmsg_type, int if_flags_mask)
1397 {
1398 	struct nlmsghdr hdr = { .nlmsg_type = nlmsg_type };
1399 	struct nl_writer nw = {};
1400 
1401 	if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK))
1402 		return;
1403 
1404 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK)) {
1405 		NL_LOG(LOG_DEBUG, "error allocating mbuf");
1406 		return;
1407 	}
1408 	dump_iface(&nw, ifp, &hdr, if_flags_mask);
1409         nlmsg_flush(&nw);
1410 }
1411 
1412 static void
1413 rtnl_handle_ifattach(void *arg, if_t ifp)
1414 {
1415 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1416 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0);
1417 }
1418 
1419 static void
1420 rtnl_handle_ifdetach(void *arg, if_t ifp)
1421 {
1422 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1423 	rtnl_handle_ifevent(ifp, NL_RTM_DELLINK, 0);
1424 }
1425 
1426 static void
1427 rtnl_handle_iflink(void *arg, if_t ifp)
1428 {
1429 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1430 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0);
1431 }
1432 
1433 void
1434 rtnl_handle_ifnet_event(if_t ifp, int if_flags_mask)
1435 {
1436 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1437 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, if_flags_mask);
1438 }
1439 
1440 static const struct rtnl_cmd_handler cmd_handlers[] = {
1441 	{
1442 		.cmd = NL_RTM_GETLINK,
1443 		.name = "RTM_GETLINK",
1444 		.cb = &rtnl_handle_getlink,
1445 		.flags = RTNL_F_NOEPOCH | RTNL_F_ALLOW_NONVNET_JAIL,
1446 	},
1447 	{
1448 		.cmd = NL_RTM_DELLINK,
1449 		.name = "RTM_DELLINK",
1450 		.cb = &rtnl_handle_dellink,
1451 		.priv = PRIV_NET_IFDESTROY,
1452 		.flags = RTNL_F_NOEPOCH,
1453 	},
1454 	{
1455 		.cmd = NL_RTM_NEWLINK,
1456 		.name = "RTM_NEWLINK",
1457 		.cb = &rtnl_handle_newlink,
1458 		.priv = PRIV_NET_IFCREATE,
1459 		.flags = RTNL_F_NOEPOCH,
1460 	},
1461 	{
1462 		.cmd = NL_RTM_GETADDR,
1463 		.name = "RTM_GETADDR",
1464 		.cb = &rtnl_handle_getaddr,
1465 		.flags = RTNL_F_ALLOW_NONVNET_JAIL,
1466 	},
1467 	{
1468 		.cmd = NL_RTM_NEWADDR,
1469 		.name = "RTM_NEWADDR",
1470 		.cb = &rtnl_handle_addr,
1471 		.priv = PRIV_NET_ADDIFADDR,
1472 		.flags = RTNL_F_NOEPOCH,
1473 	},
1474 	{
1475 		.cmd = NL_RTM_DELADDR,
1476 		.name = "RTM_DELADDR",
1477 		.cb = &rtnl_handle_addr,
1478 		.priv = PRIV_NET_DELIFADDR,
1479 		.flags = RTNL_F_NOEPOCH,
1480 	},
1481 };
1482 
1483 static const struct nlhdr_parser *all_parsers[] = {
1484 	&ifmsg_parser, &ifa_parser, &ifa_fbsd_parser,
1485 };
1486 
1487 void
1488 rtnl_iface_add_cloner(struct nl_cloner *cloner)
1489 {
1490 	sx_xlock(&rtnl_cloner_lock);
1491 	SLIST_INSERT_HEAD(&nl_cloners, cloner, next);
1492 	sx_xunlock(&rtnl_cloner_lock);
1493 }
1494 
1495 void
1496 rtnl_iface_del_cloner(struct nl_cloner *cloner)
1497 {
1498 	sx_xlock(&rtnl_cloner_lock);
1499 	SLIST_REMOVE(&nl_cloners, cloner, nl_cloner, next);
1500 	sx_xunlock(&rtnl_cloner_lock);
1501 }
1502 
1503 void
1504 rtnl_ifaces_init(void)
1505 {
1506 	ifattach_event = EVENTHANDLER_REGISTER(
1507 	    ifnet_arrival_event, rtnl_handle_ifattach, NULL,
1508 	    EVENTHANDLER_PRI_ANY);
1509 	ifdetach_event = EVENTHANDLER_REGISTER(
1510 	    ifnet_departure_event, rtnl_handle_ifdetach, NULL,
1511 	    EVENTHANDLER_PRI_ANY);
1512 	ifaddr_event = EVENTHANDLER_REGISTER(
1513 	    rt_addrmsg, rtnl_handle_ifaddr, NULL,
1514 	    EVENTHANDLER_PRI_ANY);
1515 	iflink_event = EVENTHANDLER_REGISTER(
1516 	    ifnet_link_event, rtnl_handle_iflink, NULL,
1517 	    EVENTHANDLER_PRI_ANY);
1518 	NL_VERIFY_PARSERS(all_parsers);
1519 	rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
1520 }
1521 
1522 void
1523 rtnl_ifaces_destroy(void)
1524 {
1525 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifattach_event);
1526 	EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_event);
1527 	EVENTHANDLER_DEREGISTER(rt_addrmsg, ifaddr_event);
1528 	EVENTHANDLER_DEREGISTER(ifnet_link_event, iflink_event);
1529 }
1530