xref: /freebsd/sys/netlink/route/iface.c (revision 7ee6b0f125a092ed99d327bb8d608dd2ff77b7aa)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include "opt_netlink.h"
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 #include "opt_inet.h"
33 #include "opt_inet6.h"
34 #include <sys/types.h>
35 #include <sys/eventhandler.h>
36 #include <sys/kernel.h>
37 #include <sys/jail.h>
38 #include <sys/malloc.h>
39 #include <sys/socket.h>
40 #include <sys/sockio.h>
41 #include <sys/syslog.h>
42 
43 #include <net/if.h>
44 #include <net/if_dl.h>
45 #include <net/if_media.h>
46 #include <net/if_var.h>
47 #include <net/if_clone.h>
48 #include <net/route.h>
49 #include <net/route/nhop.h>
50 #include <net/route/route_ctl.h>
51 #include <netinet/in_var.h>
52 #include <netinet6/in6_var.h>
53 #include <netinet6/scope6_var.h> /* scope deembedding */
54 #include <netlink/netlink.h>
55 #include <netlink/netlink_ctl.h>
56 #include <netlink/netlink_route.h>
57 #include <netlink/route/route_var.h>
58 
59 #define	DEBUG_MOD_NAME	nl_iface
60 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
61 #include <netlink/netlink_debug.h>
62 _DECLARE_DEBUG(LOG_INFO);
63 
64 struct netlink_walkargs {
65 	struct nl_writer *nw;
66 	struct nlmsghdr hdr;
67 	struct nlpcb *so;
68 	struct ucred *cred;
69 	uint32_t fibnum;
70 	int family;
71 	int error;
72 	int count;
73 	int dumped;
74 };
75 
76 static eventhandler_tag ifdetach_event, ifattach_event, iflink_event, ifaddr_event;
77 
78 static SLIST_HEAD(, nl_cloner) nl_cloners = SLIST_HEAD_INITIALIZER(nl_cloners);
79 
80 static struct sx rtnl_cloner_lock;
81 SX_SYSINIT(rtnl_cloner_lock, &rtnl_cloner_lock, "rtnl cloner lock");
82 
83 /* These are external hooks for CARP. */
84 extern int	(*carp_get_vhid_p)(struct ifaddr *);
85 
86 /*
87  * RTM_GETLINK request
88  * sendto(3, {{len=32, type=RTM_GETLINK, flags=NLM_F_REQUEST|NLM_F_DUMP, seq=1641940952, pid=0},
89  *  {ifi_family=AF_INET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}, 32, 0, NULL, 0) = 32
90  *
91  * Reply:
92  * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_ETHER, ifi_index=if_nametoindex("enp0s31f6"), ifi_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST|IFF_LOWER_UP, ifi_change=0},
93 {{nla_len=10, nla_type=IFLA_ADDRESS}, "\xfe\x54\x00\x52\x3e\x90"}
94 
95 [
96 {{nla_len=14, nla_type=IFLA_IFNAME}, "enp0s31f6"},
97 {{nla_len=8, nla_type=IFLA_TXQLEN}, 1000},
98 {{nla_len=5, nla_type=IFLA_OPERSTATE}, 6},
99 {{nla_len=5, nla_type=IFLA_LINKMODE}, 0},
100 {{nla_len=8, nla_type=IFLA_MTU}, 1500},
101 {{nla_len=8, nla_type=IFLA_MIN_MTU}, 68},
102  {{nla_len=8, nla_type=IFLA_MAX_MTU}, 9000},
103 {{nla_len=8, nla_type=IFLA_GROUP}, 0},
104 {{nla_len=8, nla_type=IFLA_PROMISCUITY}, 0},
105 {{nla_len=8, nla_type=IFLA_NUM_TX_QUEUES}, 1},
106 {{nla_len=8, nla_type=IFLA_GSO_MAX_SEGS}, 65535},
107 {{nla_len=8, nla_type=IFLA_GSO_MAX_SIZE}, 65536},
108 {{nla_len=8, nla_type=IFLA_NUM_RX_QUEUES}, 1},
109 {{nla_len=5, nla_type=IFLA_CARRIER}, 1},
110 {{nla_len=13, nla_type=IFLA_QDISC}, "fq_codel"},
111 {{nla_len=8, nla_type=IFLA_CARRIER_CHANGES}, 2},
112 {{nla_len=5, nla_type=IFLA_PROTO_DOWN}, 0},
113 {{nla_len=8, nla_type=IFLA_CARRIER_UP_COUNT}, 1},
114 {{nla_len=8, nla_type=IFLA_CARRIER_DOWN_COUNT}, 1},
115  */
116 
117 struct if_state {
118 	uint8_t		ifla_operstate;
119 	uint8_t		ifla_carrier;
120 };
121 
122 static void
123 get_operstate_ether(struct ifnet *ifp, struct if_state *pstate)
124 {
125 	struct ifmediareq ifmr = {};
126 	int error;
127 	error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (void *)&ifmr);
128 
129 	if (error != 0) {
130 		NL_LOG(LOG_DEBUG, "error calling SIOCGIFMEDIA on %s: %d",
131 		    if_name(ifp), error);
132 		return;
133 	}
134 
135 	switch (IFM_TYPE(ifmr.ifm_active)) {
136 	case IFM_ETHER:
137 		if (ifmr.ifm_status & IFM_ACTIVE) {
138 			pstate->ifla_carrier = 1;
139 			if (ifp->if_flags & IFF_MONITOR)
140 				pstate->ifla_operstate = IF_OPER_DORMANT;
141 			else
142 				pstate->ifla_operstate = IF_OPER_UP;
143 		} else
144 			pstate->ifla_operstate = IF_OPER_DOWN;
145 	}
146 }
147 
148 static bool
149 get_stats(struct nl_writer *nw, struct ifnet *ifp)
150 {
151 	struct rtnl_link_stats64 *stats;
152 
153 	int nla_len = sizeof(struct nlattr) + sizeof(*stats);
154 	struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
155 	if (nla == NULL)
156 		return (false);
157 	nla->nla_type = IFLA_STATS64;
158 	nla->nla_len = nla_len;
159 	stats = (struct rtnl_link_stats64 *)(nla + 1);
160 
161 	stats->rx_packets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
162 	stats->tx_packets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
163 	stats->rx_bytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
164 	stats->tx_bytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
165 	stats->rx_errors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
166 	stats->tx_errors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
167 	stats->rx_dropped = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
168 	stats->tx_dropped = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
169 	stats->multicast = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
170 	stats->rx_nohandler = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
171 
172 	return (true);
173 }
174 
175 static void
176 get_operstate(struct ifnet *ifp, struct if_state *pstate)
177 {
178 	pstate->ifla_operstate = IF_OPER_UNKNOWN;
179 	pstate->ifla_carrier = 0; /* no carrier */
180 
181 	switch (ifp->if_type) {
182 	case IFT_ETHER:
183 	case IFT_L2VLAN:
184 		get_operstate_ether(ifp, pstate);
185 		break;
186 	default:
187 		/* Map admin state to the operstate */
188 		if (ifp->if_flags & IFF_UP) {
189 			pstate->ifla_operstate = IF_OPER_UP;
190 			pstate->ifla_carrier = 1;
191 		} else
192 			pstate->ifla_operstate = IF_OPER_DOWN;
193 		break;
194 	}
195 }
196 
197 static void
198 get_hwaddr(struct nl_writer *nw, struct ifnet *ifp)
199 {
200 	struct ifreq ifr = {};
201 
202 	if (if_gethwaddr(ifp, &ifr) == 0) {
203 		nlattr_add(nw, IFLAF_ORIG_HWADDR, if_getaddrlen(ifp),
204 		    ifr.ifr_addr.sa_data);
205 	}
206 }
207 
208 static unsigned
209 ifp_flags_to_netlink(const struct ifnet *ifp)
210 {
211         return (ifp->if_flags | ifp->if_drv_flags);
212 }
213 
214 #define LLADDR_CONST(s) ((const void *)((s)->sdl_data + (s)->sdl_nlen))
215 static bool
216 dump_sa(struct nl_writer *nw, int attr, const struct sockaddr *sa)
217 {
218         uint32_t addr_len = 0;
219         const void *addr_data = NULL;
220 #ifdef INET6
221         struct in6_addr addr6;
222 #endif
223 
224         if (sa == NULL)
225                 return (true);
226 
227         switch (sa->sa_family) {
228 #ifdef INET
229         case AF_INET:
230                 addr_len = sizeof(struct in_addr);
231                 addr_data = &((const struct sockaddr_in *)sa)->sin_addr;
232                 break;
233 #endif
234 #ifdef INET6
235         case AF_INET6:
236                 in6_splitscope(&((const struct sockaddr_in6 *)sa)->sin6_addr, &addr6, &addr_len);
237                 addr_len = sizeof(struct in6_addr);
238                 addr_data = &addr6;
239                 break;
240 #endif
241         case AF_LINK:
242                 addr_len = ((const struct sockaddr_dl *)sa)->sdl_alen;
243                 addr_data = LLADDR_CONST((const struct sockaddr_dl *)sa);
244                 break;
245 	case AF_UNSPEC:
246 		/* Ignore empty SAs without warning */
247 		return (true);
248         default:
249                 NL_LOG(LOG_DEBUG2, "unsupported family: %d, skipping", sa->sa_family);
250                 return (true);
251         }
252 
253         return (nlattr_add(nw, attr, addr_len, addr_data));
254 }
255 
256 /*
257  * Dumps interface state, properties and metrics.
258  * @nw: message writer
259  * @ifp: target interface
260  * @hdr: template header
261  * @if_flags_mask: changed if_[drv]_flags bitmask
262  *
263  * This function is called without epoch and MAY sleep.
264  */
265 static bool
266 dump_iface(struct nl_writer *nw, struct ifnet *ifp, const struct nlmsghdr *hdr,
267     int if_flags_mask)
268 {
269         struct ifinfomsg *ifinfo;
270 
271         NL_LOG(LOG_DEBUG3, "dumping interface %s data", if_name(ifp));
272 
273 	if (!nlmsg_reply(nw, hdr, sizeof(struct ifinfomsg)))
274 		goto enomem;
275 
276         ifinfo = nlmsg_reserve_object(nw, struct ifinfomsg);
277         ifinfo->ifi_family = AF_UNSPEC;
278         ifinfo->__ifi_pad = 0;
279         ifinfo->ifi_type = ifp->if_type;
280         ifinfo->ifi_index = ifp->if_index;
281         ifinfo->ifi_flags = ifp_flags_to_netlink(ifp);
282         ifinfo->ifi_change = if_flags_mask;
283 
284 	struct if_state ifs = {};
285 	get_operstate(ifp, &ifs);
286 
287 	if (ifs.ifla_operstate == IF_OPER_UP)
288 		ifinfo->ifi_flags |= IFF_LOWER_UP;
289 
290         nlattr_add_string(nw, IFLA_IFNAME, if_name(ifp));
291         nlattr_add_u8(nw, IFLA_OPERSTATE, ifs.ifla_operstate);
292         nlattr_add_u8(nw, IFLA_CARRIER, ifs.ifla_carrier);
293 
294 /*
295         nlattr_add_u8(nw, IFLA_PROTO_DOWN, val);
296         nlattr_add_u8(nw, IFLA_LINKMODE, val);
297 */
298         if (if_getaddrlen(ifp) != 0) {
299 		struct ifaddr *ifa = if_getifaddr(ifp);
300 
301                 dump_sa(nw, IFLA_ADDRESS, ifa->ifa_addr);
302         }
303 
304         if ((ifp->if_broadcastaddr != NULL)) {
305 		nlattr_add(nw, IFLA_BROADCAST, ifp->if_addrlen,
306 		    ifp->if_broadcastaddr);
307         }
308 
309         nlattr_add_u32(nw, IFLA_MTU, ifp->if_mtu);
310 /*
311         nlattr_add_u32(nw, IFLA_MIN_MTU, 60);
312         nlattr_add_u32(nw, IFLA_MAX_MTU, 9000);
313         nlattr_add_u32(nw, IFLA_GROUP, 0);
314 */
315 
316 	if (ifp->if_description != NULL)
317 		nlattr_add_string(nw, IFLA_IFALIAS, ifp->if_description);
318 
319 	/* Store FreeBSD-specific attributes */
320 	int off = nlattr_add_nested(nw, IFLA_FREEBSD);
321 	if (off != 0) {
322 		get_hwaddr(nw, ifp);
323 
324 		nlattr_set_len(nw, off);
325 	}
326 
327 	get_stats(nw, ifp);
328 
329 	uint32_t val = (ifp->if_flags & IFF_PROMISC) != 0;
330         nlattr_add_u32(nw, IFLA_PROMISCUITY, val);
331 
332 	ifc_dump_ifp_nl(ifp, nw);
333 
334         if (nlmsg_end(nw))
335 		return (true);
336 
337 enomem:
338         NL_LOG(LOG_DEBUG, "unable to dump interface %s state (ENOMEM)", if_name(ifp));
339         nlmsg_abort(nw);
340         return (false);
341 }
342 
343 static bool
344 check_ifmsg(void *hdr, struct nl_pstate *npt)
345 {
346 	struct ifinfomsg *ifm = hdr;
347 
348 	if (ifm->__ifi_pad != 0 || ifm->ifi_type != 0 ||
349 	    ifm->ifi_flags != 0 || ifm->ifi_change != 0) {
350 		nlmsg_report_err_msg(npt,
351 		    "strict checking: non-zero values in ifinfomsg header");
352 		return (false);
353 	}
354 
355 	return (true);
356 }
357 
358 #define	_IN(_field)	offsetof(struct ifinfomsg, _field)
359 #define	_OUT(_field)	offsetof(struct nl_parsed_link, _field)
360 static const struct nlfield_parser nlf_p_if[] = {
361 	{ .off_in = _IN(ifi_type), .off_out = _OUT(ifi_type), .cb = nlf_get_u16 },
362 	{ .off_in = _IN(ifi_index), .off_out = _OUT(ifi_index), .cb = nlf_get_u32 },
363 	{ .off_in = _IN(ifi_flags), .off_out = _OUT(ifi_flags), .cb = nlf_get_u32 },
364 	{ .off_in = _IN(ifi_change), .off_out = _OUT(ifi_change), .cb = nlf_get_u32 },
365 };
366 
367 static const struct nlattr_parser nla_p_linfo[] = {
368 	{ .type = IFLA_INFO_KIND, .off = _OUT(ifla_cloner), .cb = nlattr_get_stringn },
369 	{ .type = IFLA_INFO_DATA, .off = _OUT(ifla_idata), .cb = nlattr_get_nla },
370 };
371 NL_DECLARE_ATTR_PARSER(linfo_parser, nla_p_linfo);
372 
373 static const struct nlattr_parser nla_p_if[] = {
374 	{ .type = IFLA_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
375 	{ .type = IFLA_MTU, .off = _OUT(ifla_mtu), .cb = nlattr_get_uint32 },
376 	{ .type = IFLA_LINK, .off = _OUT(ifla_link), .cb = nlattr_get_uint32 },
377 	{ .type = IFLA_LINKINFO, .arg = &linfo_parser, .cb = nlattr_get_nested },
378 	{ .type = IFLA_IFALIAS, .off = _OUT(ifla_ifalias), .cb = nlattr_get_string },
379 	{ .type = IFLA_GROUP, .off = _OUT(ifla_group), .cb = nlattr_get_string },
380 	{ .type = IFLA_ALT_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
381 };
382 #undef _IN
383 #undef _OUT
384 NL_DECLARE_STRICT_PARSER(ifmsg_parser, struct ifinfomsg, check_ifmsg, nlf_p_if, nla_p_if);
385 
386 static bool
387 match_iface(struct ifnet *ifp, void *_arg)
388 {
389 	struct nl_parsed_link *attrs = (struct nl_parsed_link *)_arg;
390 
391 	if (attrs->ifi_index != 0 && attrs->ifi_index != ifp->if_index)
392 		return (false);
393 	if (attrs->ifi_type != 0 && attrs->ifi_index != ifp->if_type)
394 		return (false);
395 	if (attrs->ifla_ifname != NULL && strcmp(attrs->ifla_ifname, if_name(ifp)))
396 		return (false);
397 	/* TODO: add group match */
398 
399 	return (true);
400 }
401 
402 static int
403 dump_cb(struct ifnet *ifp, void *_arg)
404 {
405 	struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg;
406 	if (!dump_iface(wa->nw, ifp, &wa->hdr, 0))
407 		return (ENOMEM);
408 	return (0);
409 }
410 
411 /*
412  * {nlmsg_len=52, nlmsg_type=RTM_GETLINK, nlmsg_flags=NLM_F_REQUEST, nlmsg_seq=1662842818, nlmsg_pid=0},
413  *  {ifi_family=AF_PACKET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
414  *   [
415  *    [{nla_len=10, nla_type=IFLA_IFNAME}, "vnet9"],
416  *    [{nla_len=8, nla_type=IFLA_EXT_MASK}, RTEXT_FILTER_VF]
417  *   ]
418  */
419 static int
420 rtnl_handle_getlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
421 {
422 	struct epoch_tracker et;
423         struct ifnet *ifp;
424 	int error = 0;
425 
426 	struct nl_parsed_link attrs = {};
427 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
428 	if (error != 0)
429 		return (error);
430 
431 	struct netlink_walkargs wa = {
432 		.so = nlp,
433 		.nw = npt->nw,
434 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
435 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
436 		.hdr.nlmsg_flags = hdr->nlmsg_flags,
437 		.hdr.nlmsg_type = NL_RTM_NEWLINK,
438 	};
439 
440 	/* Fast track for an interface w/ explicit name or index match */
441 	if ((attrs.ifi_index != 0) || (attrs.ifla_ifname != NULL)) {
442 		if (attrs.ifi_index != 0) {
443 			NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching index %u",
444 			    attrs.ifi_index);
445 			NET_EPOCH_ENTER(et);
446 			ifp = ifnet_byindex_ref(attrs.ifi_index);
447 			NET_EPOCH_EXIT(et);
448 		} else {
449 			NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching name %s",
450 			    attrs.ifla_ifname);
451 			ifp = ifunit_ref(attrs.ifla_ifname);
452 		}
453 
454 		if (ifp != NULL) {
455 			if (match_iface(ifp, &attrs)) {
456 				if (!dump_iface(wa.nw, ifp, &wa.hdr, 0))
457 					error = ENOMEM;
458 			} else
459 				error = ENODEV;
460 			if_rele(ifp);
461 		} else
462 			error = ENODEV;
463 		return (error);
464 	}
465 
466 	/* Always treat non-direct-match as a multipart message */
467 	wa.hdr.nlmsg_flags |= NLM_F_MULTI;
468 
469 	/*
470 	 * Fetching some link properties require performing ioctl's that may be blocking.
471 	 * Address it by saving referenced pointers of the matching links,
472 	 * exiting from epoch and going through the list one-by-one.
473 	 */
474 
475 	NL_LOG(LOG_DEBUG2, "Start dump");
476 	if_foreach_sleep(match_iface, &attrs, dump_cb, &wa);
477 	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
478 
479 	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
480                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
481                 return (ENOMEM);
482         }
483 
484 	return (error);
485 }
486 
487 /*
488  * sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[
489  * {nlmsg_len=60, nlmsg_type=RTM_NEWLINK, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1662715618, nlmsg_pid=0},
490  *  {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
491  *   {nla_len=11, nla_type=IFLA_IFNAME}, "dummy0"],
492  *   [
493  *    {nla_len=16, nla_type=IFLA_LINKINFO},
494  *     [
495  *      {nla_len=9, nla_type=IFLA_INFO_KIND}, "dummy"...
496  *     ]
497  *    ]
498  */
499 
500 static int
501 rtnl_handle_dellink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
502 {
503 	struct epoch_tracker et;
504         struct ifnet *ifp;
505 	int error;
506 
507 	struct nl_parsed_link attrs = {};
508 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
509 	if (error != 0)
510 		return (error);
511 
512 	NET_EPOCH_ENTER(et);
513 	ifp = ifnet_byindex_ref(attrs.ifi_index);
514 	NET_EPOCH_EXIT(et);
515 	if (ifp == NULL) {
516 		NLP_LOG(LOG_DEBUG, nlp, "unable to find interface %u", attrs.ifi_index);
517 		return (ENOENT);
518 	}
519 	NLP_LOG(LOG_DEBUG3, nlp, "mapped ifindex %u to %s", attrs.ifi_index, if_name(ifp));
520 
521 	sx_xlock(&ifnet_detach_sxlock);
522 	error = if_clone_destroy(if_name(ifp));
523 	sx_xunlock(&ifnet_detach_sxlock);
524 
525 	NLP_LOG(LOG_DEBUG2, nlp, "deleting interface %s returned %d", if_name(ifp), error);
526 
527 	if_rele(ifp);
528 	return (error);
529 }
530 
531 /*
532  * New link:
533  * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1668185590, pid=0},
534  *   {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}
535  *    [
536  *     {{nla_len=8, nla_type=IFLA_MTU}, 123},
537  *     {{nla_len=10, nla_type=IFLA_IFNAME}, "vlan1"},
538  *     {{nla_len=24, nla_type=IFLA_LINKINFO},
539  *      [
540  *       {{nla_len=8, nla_type=IFLA_INFO_KIND}, "vlan"...},
541  *       {{nla_len=12, nla_type=IFLA_INFO_DATA}, "\x06\x00\x01\x00\x7b\x00\x00\x00"}]}]}
542  *
543  * Update link:
544  * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=1668185923, pid=0},
545  * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=if_nametoindex("lo"), ifi_flags=0, ifi_change=0},
546  * {{nla_len=8, nla_type=IFLA_MTU}, 123}}
547  *
548  *
549  * Check command availability:
550  * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=0, pid=0},
551  *  {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}
552  */
553 
554 
555 static int
556 create_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs,
557     struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt)
558 {
559 	if (lattrs->ifla_ifname == NULL || strlen(lattrs->ifla_ifname) == 0) {
560 		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_IFNAME attribute");
561 		return (EINVAL);
562 	}
563 	if (lattrs->ifla_cloner == NULL || strlen(lattrs->ifla_cloner) == 0) {
564 		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_INFO_KIND attribute");
565 		return (EINVAL);
566 	}
567 
568 	struct ifc_data_nl ifd = {
569 		.flags = IFC_F_CREATE,
570 		.lattrs = lattrs,
571 		.bm = bm,
572 		.npt = npt,
573 	};
574 	if (ifc_create_ifp_nl(lattrs->ifla_ifname, &ifd) && ifd.error == 0)
575 		nl_store_ifp_cookie(npt, ifd.ifp);
576 
577 	return (ifd.error);
578 }
579 
580 static int
581 modify_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs,
582     struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt)
583 {
584 	struct ifnet *ifp = NULL;
585 	struct epoch_tracker et;
586 
587 	if (lattrs->ifi_index == 0 && lattrs->ifla_ifname == NULL) {
588 		/*
589 		 * Applications like ip(8) verify RTM_NEWLINK command
590 		 * existence by calling it with empty arguments. Always
591 		 * return "innocent" error in that case.
592 		 */
593 		NLMSG_REPORT_ERR_MSG(npt, "empty ifi_index field");
594 		return (EPERM);
595 	}
596 
597 	if (lattrs->ifi_index != 0) {
598 		NET_EPOCH_ENTER(et);
599 		ifp = ifnet_byindex_ref(lattrs->ifi_index);
600 		NET_EPOCH_EXIT(et);
601 		if (ifp == NULL) {
602 			NLMSG_REPORT_ERR_MSG(npt, "unable to find interface #%u",
603 			    lattrs->ifi_index);
604 			return (ENOENT);
605 		}
606 	}
607 
608 	if (ifp == NULL && lattrs->ifla_ifname != NULL) {
609 		ifp = ifunit_ref(lattrs->ifla_ifname);
610 		if (ifp == NULL) {
611 			NLMSG_REPORT_ERR_MSG(npt, "unable to find interface %s",
612 			    lattrs->ifla_ifname);
613 			return (ENOENT);
614 		}
615 	}
616 
617 	MPASS(ifp != NULL);
618 
619 	/*
620 	 * Modification request can address either
621 	 * 1) cloned interface, in which case we call the cloner-specific
622 	 *  modification routine
623 	 * or
624 	 * 2) non-cloned (e.g. "physical") interface, in which case we call
625 	 *  generic modification routine
626 	 */
627 	struct ifc_data_nl ifd = { .lattrs = lattrs, .bm = bm, .npt = npt };
628 	if (!ifc_modify_ifp_nl(ifp, &ifd))
629 		ifd.error = nl_modify_ifp_generic(ifp, lattrs, bm, npt);
630 
631 	if_rele(ifp);
632 
633 	return (ifd.error);
634 }
635 
636 
637 static int
638 rtnl_handle_newlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
639 {
640 	struct nlattr_bmask bm;
641 	int error;
642 
643 	struct nl_parsed_link attrs = {};
644 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
645 	if (error != 0)
646 		return (error);
647 	nl_get_attrs_bmask_nlmsg(hdr, &ifmsg_parser, &bm);
648 
649 	if (hdr->nlmsg_flags & NLM_F_CREATE)
650 		return (create_link(hdr, &attrs, &bm, nlp, npt));
651 	else
652 		return (modify_link(hdr, &attrs, &bm, nlp, npt));
653 }
654 
655 static void
656 set_scope6(struct sockaddr *sa, uint32_t ifindex)
657 {
658 #ifdef INET6
659 	if (sa != NULL && sa->sa_family == AF_INET6) {
660 		struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa;
661 
662 		if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr))
663 			in6_set_unicast_scopeid(&sa6->sin6_addr, ifindex);
664 	}
665 #endif
666 }
667 
668 static bool
669 check_sa_family(const struct sockaddr *sa, int family, const char *attr_name,
670     struct nl_pstate *npt)
671 {
672 	if (sa == NULL || sa->sa_family == family)
673 		return (true);
674 
675 	nlmsg_report_err_msg(npt, "wrong family for %s attribute: %d != %d",
676 	    attr_name, family, sa->sa_family);
677 	return (false);
678 }
679 
680 struct nl_parsed_ifa {
681 	uint8_t			ifa_family;
682 	uint8_t			ifa_prefixlen;
683 	uint8_t			ifa_scope;
684 	uint32_t		ifa_index;
685 	uint32_t		ifa_flags;
686 	uint32_t		ifaf_vhid;
687 	uint32_t		ifaf_flags;
688 	struct sockaddr		*ifa_addr;
689 	struct sockaddr		*ifa_dst;
690 	struct sockaddr		*ifa_broadcast;
691 	struct ifa_cacheinfo	*ifa_cacheinfo;
692 	struct sockaddr		*f_ifa_addr;
693 	struct sockaddr		*f_ifa_dst;
694 };
695 
696 static int
697 nlattr_get_cinfo(struct nlattr *nla, struct nl_pstate *npt,
698     const void *arg __unused, void *target)
699 {
700 	if (__predict_false(NLA_DATA_LEN(nla) != sizeof(struct ifa_cacheinfo))) {
701 		NLMSG_REPORT_ERR_MSG(npt, "nla type %d size(%u) is not ifa_cacheinfo",
702 		    nla->nla_type, NLA_DATA_LEN(nla));
703 		return (EINVAL);
704 	}
705 	*((struct ifa_cacheinfo **)target) = (struct ifa_cacheinfo *)NL_RTA_DATA(nla);
706 	return (0);
707 }
708 
709 #define	_IN(_field)	offsetof(struct ifaddrmsg, _field)
710 #define	_OUT(_field)	offsetof(struct nl_parsed_ifa, _field)
711 static const struct nlfield_parser nlf_p_ifa[] = {
712 	{ .off_in = _IN(ifa_family), .off_out = _OUT(ifa_family), .cb = nlf_get_u8 },
713 	{ .off_in = _IN(ifa_prefixlen), .off_out = _OUT(ifa_prefixlen), .cb = nlf_get_u8 },
714 	{ .off_in = _IN(ifa_scope), .off_out = _OUT(ifa_scope), .cb = nlf_get_u8 },
715 	{ .off_in = _IN(ifa_flags), .off_out = _OUT(ifa_flags), .cb = nlf_get_u8_u32 },
716 	{ .off_in = _IN(ifa_index), .off_out = _OUT(ifa_index), .cb = nlf_get_u32 },
717 };
718 
719 static const struct nlattr_parser nla_p_ifa_fbsd[] = {
720 	{ .type = IFAF_VHID, .off = _OUT(ifaf_vhid), .cb = nlattr_get_uint32 },
721 	{ .type = IFAF_FLAGS, .off = _OUT(ifaf_flags), .cb = nlattr_get_uint32 },
722 };
723 NL_DECLARE_ATTR_PARSER(ifa_fbsd_parser, nla_p_ifa_fbsd);
724 
725 static const struct nlattr_parser nla_p_ifa[] = {
726 	{ .type = IFA_ADDRESS, .off = _OUT(ifa_addr), .cb = nlattr_get_ip },
727 	{ .type = IFA_LOCAL, .off = _OUT(ifa_dst), .cb = nlattr_get_ip },
728 	{ .type = IFA_BROADCAST, .off = _OUT(ifa_broadcast), .cb = nlattr_get_ip },
729 	{ .type = IFA_CACHEINFO, .off = _OUT(ifa_cacheinfo), .cb = nlattr_get_cinfo },
730 	{ .type = IFA_FLAGS, .off = _OUT(ifa_flags), .cb = nlattr_get_uint32 },
731 	{ .type = IFA_FREEBSD, .arg = &ifa_fbsd_parser, .cb = nlattr_get_nested },
732 };
733 #undef _IN
734 #undef _OUT
735 
736 static bool
737 post_p_ifa(void *_attrs, struct nl_pstate *npt)
738 {
739 	struct nl_parsed_ifa *attrs = (struct nl_parsed_ifa *)_attrs;
740 
741 	if (!check_sa_family(attrs->ifa_addr, attrs->ifa_family, "IFA_ADDRESS", npt))
742 		return (false);
743 	if (!check_sa_family(attrs->ifa_dst, attrs->ifa_family, "IFA_LOCAL", npt))
744 		return (false);
745 	if (!check_sa_family(attrs->ifa_broadcast, attrs->ifa_family, "IFA_BROADADDR", npt))
746 		return (false);
747 
748 	set_scope6(attrs->ifa_addr, attrs->ifa_index);
749 	set_scope6(attrs->ifa_dst, attrs->ifa_index);
750 
751 	/*
752 	 * Map the Netlink attributes to FreeBSD ifa layout.
753 	 * If only IFA_ADDRESS or IFA_LOCAL is set OR
754 	 * both are set to the same value => ifa is not broadcast
755 	 * and the attribute value contains interface address.
756 	 *
757 	 * Otherwise (both IFA_ADDRESS and IFA_LOCAL are set and
758 	 * different), IFA_LOCAL contains an interface address and
759 	 * IFA_ADDRESS contains peer address.
760 	 */
761 	struct sockaddr *addr, *dst;
762 
763 	addr = attrs->ifa_addr;
764 	if ((dst = attrs->ifa_dst) != NULL) {
765 		if (addr != NULL && !sa_equal(addr, dst)) {
766 			/* Ptp address */
767 			attrs->ifa_addr = dst;
768 			attrs->ifa_dst = addr;
769 		} else {
770 			attrs->ifa_addr = dst;
771 			attrs->ifa_dst = NULL;
772 		}
773 	}
774 
775 	return (true);
776 }
777 
778 NL_DECLARE_PARSER_EXT(ifa_parser, struct ifaddrmsg, NULL, nlf_p_ifa, nla_p_ifa, post_p_ifa);
779 
780 
781 /*
782 
783 {ifa_family=AF_INET, ifa_prefixlen=8, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_HOST, ifa_index=if_nametoindex("lo")},
784  [
785         {{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("127.0.0.1")},
786         {{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("127.0.0.1")},
787         {{nla_len=7, nla_type=IFA_LABEL}, "lo"},
788         {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT},
789         {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=3619, tstamp=3619}}]},
790 ---
791 
792 {{len=72, type=RTM_NEWADDR, flags=NLM_F_MULTI, seq=1642191126, pid=566735},
793  {ifa_family=AF_INET6, ifa_prefixlen=96, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_UNIVERSE, ifa_index=if_nametoindex("virbr0")},
794    [
795     {{nla_len=20, nla_type=IFA_ADDRESS}, inet_pton(AF_INET6, "2a01:4f8:13a:70c:ffff::1")},
796    {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=4283, tstamp=4283}},
797    {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}]},
798 */
799 
800 static uint8_t
801 ifa_get_scope(const struct ifaddr *ifa)
802 {
803         const struct sockaddr *sa;
804         uint8_t addr_scope = RT_SCOPE_UNIVERSE;
805 
806         sa = ifa->ifa_addr;
807         switch (sa->sa_family) {
808 #ifdef INET
809         case AF_INET:
810                 {
811                         struct in_addr addr;
812                         addr = ((const struct sockaddr_in *)sa)->sin_addr;
813                         if (IN_LOOPBACK(addr.s_addr))
814                                 addr_scope = RT_SCOPE_HOST;
815                         else if (IN_LINKLOCAL(addr.s_addr))
816                                 addr_scope = RT_SCOPE_LINK;
817                         break;
818                 }
819 #endif
820 #ifdef INET6
821         case AF_INET6:
822                 {
823                         const struct in6_addr *addr;
824                         addr = &((const struct sockaddr_in6 *)sa)->sin6_addr;
825                         if (IN6_IS_ADDR_LOOPBACK(addr))
826                                 addr_scope = RT_SCOPE_HOST;
827                         else if (IN6_IS_ADDR_LINKLOCAL(addr))
828                                 addr_scope = RT_SCOPE_LINK;
829                         break;
830                 }
831 #endif
832         }
833 
834         return (addr_scope);
835 }
836 
837 #ifdef INET6
838 static uint8_t
839 inet6_get_plen(const struct in6_addr *addr)
840 {
841 
842 	return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) +
843 	    bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3]));
844 }
845 #endif
846 
847 static uint8_t
848 get_sa_plen(const struct sockaddr *sa)
849 {
850 #ifdef INET
851         const struct in_addr *paddr;
852 #endif
853 #ifdef INET6
854         const struct in6_addr *paddr6;
855 #endif
856 
857         switch (sa->sa_family) {
858 #ifdef INET
859         case AF_INET:
860                 paddr = &(((const struct sockaddr_in *)sa)->sin_addr);
861                 return bitcount32(paddr->s_addr);;
862 #endif
863 #ifdef INET6
864         case AF_INET6:
865                 paddr6 = &(((const struct sockaddr_in6 *)sa)->sin6_addr);
866                 return inet6_get_plen(paddr6);
867 #endif
868         }
869 
870         return (0);
871 }
872 
873 #ifdef INET6
874 static uint32_t
875 in6_flags_to_nl(uint32_t flags)
876 {
877 	uint32_t nl_flags = 0;
878 
879 	if (flags & IN6_IFF_TEMPORARY)
880 		nl_flags |= IFA_F_TEMPORARY;
881 	if (flags & IN6_IFF_NODAD)
882 		nl_flags |= IFA_F_NODAD;
883 	if (flags & IN6_IFF_DEPRECATED)
884 		nl_flags |= IFA_F_DEPRECATED;
885 	if (flags & IN6_IFF_TENTATIVE)
886 		nl_flags |= IFA_F_TENTATIVE;
887 	if ((flags & (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) == 0)
888 		flags |= IFA_F_PERMANENT;
889 	if (flags & IN6_IFF_DUPLICATED)
890 		flags |= IFA_F_DADFAILED;
891 	return (nl_flags);
892 }
893 
894 static uint32_t
895 nl_flags_to_in6(uint32_t flags)
896 {
897 	uint32_t in6_flags = 0;
898 
899 	if (flags & IFA_F_TEMPORARY)
900 		in6_flags |= IN6_IFF_TEMPORARY;
901 	if (flags & IFA_F_NODAD)
902 		in6_flags |= IN6_IFF_NODAD;
903 	if (flags & IFA_F_DEPRECATED)
904 		in6_flags |= IN6_IFF_DEPRECATED;
905 	if (flags & IFA_F_TENTATIVE)
906 		in6_flags |= IN6_IFF_TENTATIVE;
907 	if (flags & IFA_F_DADFAILED)
908 		in6_flags |= IN6_IFF_DUPLICATED;
909 
910 	return (in6_flags);
911 }
912 
913 static void
914 export_cache_info6(struct nl_writer *nw, const struct in6_ifaddr *ia)
915 {
916 	struct ifa_cacheinfo ci = {
917 		.cstamp = ia->ia6_createtime * 1000,
918 		.tstamp = ia->ia6_updatetime * 1000,
919 		.ifa_prefered = ia->ia6_lifetime.ia6t_pltime,
920 		.ifa_valid = ia->ia6_lifetime.ia6t_vltime,
921 	};
922 
923 	nlattr_add(nw, IFA_CACHEINFO, sizeof(ci), &ci);
924 }
925 #endif
926 
927 static void
928 export_cache_info(struct nl_writer *nw, struct ifaddr *ifa)
929 {
930 	switch (ifa->ifa_addr->sa_family) {
931 #ifdef INET6
932 	case AF_INET6:
933 		export_cache_info6(nw, (struct in6_ifaddr *)ifa);
934 		break;
935 #endif
936 	}
937 }
938 
939 /*
940  * {'attrs': [('IFA_ADDRESS', '12.0.0.1'),
941            ('IFA_LOCAL', '12.0.0.1'),
942            ('IFA_LABEL', 'eth10'),
943            ('IFA_FLAGS', 128),
944            ('IFA_CACHEINFO', {'ifa_preferred': 4294967295, 'ifa_valid': 4294967295, 'cstamp': 63745746, 'tstamp': 63745746})],
945  */
946 static bool
947 dump_iface_addr(struct nl_writer *nw, struct ifnet *ifp, struct ifaddr *ifa,
948     const struct nlmsghdr *hdr)
949 {
950         struct ifaddrmsg *ifamsg;
951         struct sockaddr *sa = ifa->ifa_addr;
952         struct sockaddr *sa_dst = ifa->ifa_dstaddr;
953 
954         NL_LOG(LOG_DEBUG3, "dumping ifa %p type %s(%d) for interface %s",
955             ifa, rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
956 
957 	if (!nlmsg_reply(nw, hdr, sizeof(struct ifaddrmsg)))
958 		goto enomem;
959 
960         ifamsg = nlmsg_reserve_object(nw, struct ifaddrmsg);
961         ifamsg->ifa_family = sa->sa_family;
962         ifamsg->ifa_prefixlen = get_sa_plen(ifa->ifa_netmask);
963         ifamsg->ifa_flags = 0; // ifa_flags is useless
964         ifamsg->ifa_scope = ifa_get_scope(ifa);
965         ifamsg->ifa_index = ifp->if_index;
966 
967 	if ((ifp->if_flags & IFF_POINTOPOINT) && sa_dst != NULL && sa_dst->sa_family != 0) {
968 		/* P2P interface may have IPv6 LL with no dst address */
969 		dump_sa(nw, IFA_ADDRESS, sa_dst);
970 		dump_sa(nw, IFA_LOCAL, sa);
971 	} else {
972 		dump_sa(nw, IFA_ADDRESS, sa);
973 #ifdef INET
974 		/*
975 		 * In most cases, IFA_ADDRESS == IFA_LOCAL
976 		 * Skip IFA_LOCAL for anything except INET
977 		 */
978 		if (sa->sa_family == AF_INET)
979 			dump_sa(nw, IFA_LOCAL, sa);
980 #endif
981 	}
982 	if (ifp->if_flags & IFF_BROADCAST)
983 		dump_sa(nw, IFA_BROADCAST, ifa->ifa_broadaddr);
984 
985         nlattr_add_string(nw, IFA_LABEL, if_name(ifp));
986 
987         uint32_t nl_ifa_flags = 0;
988 #ifdef INET6
989 	if (sa->sa_family == AF_INET6) {
990 		struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
991 		nl_ifa_flags = in6_flags_to_nl(ia->ia6_flags);
992 	}
993 #endif
994         nlattr_add_u32(nw, IFA_FLAGS, nl_ifa_flags);
995 
996 	export_cache_info(nw, ifa);
997 
998 	/* Store FreeBSD-specific attributes */
999 	int off = nlattr_add_nested(nw, IFA_FREEBSD);
1000 	if (off != 0) {
1001 		if (ifa->ifa_carp != NULL && carp_get_vhid_p != NULL) {
1002 			uint32_t vhid  = (uint32_t)(*carp_get_vhid_p)(ifa);
1003 			nlattr_add_u32(nw, IFAF_VHID, vhid);
1004 		}
1005 #ifdef INET6
1006 		if (sa->sa_family == AF_INET6) {
1007 			uint32_t ifa_flags = ((struct in6_ifaddr *)ifa)->ia6_flags;
1008 
1009 			nlattr_add_u32(nw, IFAF_FLAGS, ifa_flags);
1010 		}
1011 #endif
1012 
1013 		nlattr_set_len(nw, off);
1014 	}
1015 
1016 	if (nlmsg_end(nw))
1017 		return (true);
1018 enomem:
1019         NL_LOG(LOG_DEBUG, "Failed to dump ifa type %s(%d) for interface %s",
1020             rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
1021         nlmsg_abort(nw);
1022         return (false);
1023 }
1024 
1025 static int
1026 dump_iface_addrs(struct netlink_walkargs *wa, struct ifnet *ifp)
1027 {
1028         struct ifaddr *ifa;
1029 
1030 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1031 		if (wa->family != 0 && wa->family != ifa->ifa_addr->sa_family)
1032 			continue;
1033 		if (ifa->ifa_addr->sa_family == AF_LINK)
1034 			continue;
1035 		if (prison_if(wa->cred, ifa->ifa_addr) != 0)
1036 			continue;
1037 		wa->count++;
1038 		if (!dump_iface_addr(wa->nw, ifp, ifa, &wa->hdr))
1039 			return (ENOMEM);
1040 		wa->dumped++;
1041 	}
1042 
1043 	return (0);
1044 }
1045 
1046 static int
1047 rtnl_handle_getaddr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
1048 {
1049         struct ifnet *ifp;
1050 	int error = 0;
1051 
1052 	struct nl_parsed_ifa attrs = {};
1053 	error = nl_parse_nlmsg(hdr, &ifa_parser, npt, &attrs);
1054 	if (error != 0)
1055 		return (error);
1056 
1057 	struct netlink_walkargs wa = {
1058 		.so = nlp,
1059 		.nw = npt->nw,
1060 		.cred = nlp_get_cred(nlp),
1061 		.family = attrs.ifa_family,
1062 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
1063 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
1064 		.hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
1065 		.hdr.nlmsg_type = NL_RTM_NEWADDR,
1066 	};
1067 
1068 	NL_LOG(LOG_DEBUG2, "Start dump");
1069 
1070 	if (attrs.ifa_index != 0) {
1071 		ifp = ifnet_byindex(attrs.ifa_index);
1072 		if (ifp == NULL)
1073 			error = ENOENT;
1074 		else
1075 			error = dump_iface_addrs(&wa, ifp);
1076 	} else {
1077 		CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1078 			error = dump_iface_addrs(&wa, ifp);
1079 			if (error != 0)
1080 				break;
1081 		}
1082 	}
1083 
1084 	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
1085 
1086 	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
1087                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
1088                 return (ENOMEM);
1089         }
1090 
1091 	return (error);
1092 }
1093 
1094 #ifdef INET
1095 static int
1096 handle_newaddr_inet(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1097     struct ifnet *ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1098 {
1099 	int plen = attrs->ifa_prefixlen;
1100 	int if_flags = if_getflags(ifp);
1101 
1102 	if (plen > 32) {
1103 		nlmsg_report_err_msg(npt, "invalid ifa_prefixlen");
1104 		return (EINVAL);
1105 	};
1106 
1107 	if (if_flags & IFF_POINTOPOINT) {
1108 		if (attrs->ifa_addr == NULL || attrs->ifa_dst == NULL) {
1109 			nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1110 			return (EINVAL);
1111 		}
1112 	} else {
1113 		if (attrs->ifa_addr == NULL) {
1114 			nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1115 			return (EINVAL);
1116 		}
1117 		attrs->ifa_dst = attrs->ifa_broadcast;
1118 
1119 		/* Generate broadcast address if not set */
1120 		if ((if_flags & IFF_BROADCAST) && attrs->ifa_dst == NULL) {
1121 			uint32_t s_baddr;
1122 			struct sockaddr_in *sin_brd;
1123 
1124 			if (plen == 31)
1125 				s_baddr = INADDR_BROADCAST; /* RFC 3021 */
1126 			else {
1127 				struct sockaddr_in *addr;
1128 				uint32_t s_mask;
1129 
1130 				addr = (struct sockaddr_in *)attrs->ifa_addr;
1131 				s_mask = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
1132 				s_baddr = addr->sin_addr.s_addr | ~s_mask;
1133 			}
1134 
1135 			sin_brd = (struct sockaddr_in *)npt_alloc(npt, sizeof(*sin_brd));
1136 			if (sin_brd == NULL)
1137 				return (ENOMEM);
1138 			sin_brd->sin_family = AF_INET;
1139 			sin_brd->sin_len = sizeof(*sin_brd);
1140 			sin_brd->sin_addr.s_addr = s_baddr;
1141 			attrs->ifa_dst = (struct sockaddr *)sin_brd;
1142 		}
1143 	}
1144 
1145 	struct sockaddr_in mask = {
1146 		.sin_len = sizeof(struct sockaddr_in),
1147 		.sin_family = AF_INET,
1148 		.sin_addr.s_addr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0),
1149 	};
1150 	struct in_aliasreq req = {
1151 		.ifra_addr = *((struct sockaddr_in *)attrs->ifa_addr),
1152 		.ifra_mask = mask,
1153 		.ifra_vhid = attrs->ifaf_vhid,
1154 	};
1155 	if (attrs->ifa_dst != NULL)
1156 		req.ifra_dstaddr = *((struct sockaddr_in *)attrs->ifa_dst);
1157 
1158 	return (in_control(NULL, SIOCAIFADDR, &req, ifp, curthread));
1159 }
1160 
1161 static int
1162 handle_deladdr_inet(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1163     struct ifnet *ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1164 {
1165 	if (attrs->ifa_addr == NULL) {
1166 		nlmsg_report_err_msg(npt, "empty IFA_ADDRESS/IFA_LOCAL");
1167 		return (EINVAL);
1168 	}
1169 
1170 	struct in_aliasreq req = {
1171 		.ifra_addr = *((struct sockaddr_in *)attrs->ifa_addr),
1172 	};
1173 
1174 	return (in_control(NULL, SIOCDIFADDR, &req, ifp, curthread));
1175 }
1176 #endif
1177 
1178 #ifdef INET6
1179 static int
1180 handle_newaddr_inet6(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1181     struct ifnet *ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1182 {
1183 	if (attrs->ifa_prefixlen > 128) {
1184 		nlmsg_report_err_msg(npt, "invalid ifa_prefixlen");
1185 		return (EINVAL);
1186 	}
1187 
1188 	if (attrs->ifa_addr == NULL) {
1189 		nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1190 		return (EINVAL);
1191 	}
1192 
1193 	/* TODO: Clarify addition of prefixes on p2p interfaces w/o ifa_dst */
1194 
1195 	uint32_t flags = nl_flags_to_in6(attrs->ifa_flags) | attrs->ifaf_flags;
1196 
1197 	uint32_t pltime = 0, vltime = 0;
1198 	if (attrs->ifa_cacheinfo != 0) {
1199 		pltime = attrs->ifa_cacheinfo->ifa_prefered;
1200 		vltime = attrs->ifa_cacheinfo->ifa_valid;
1201 	}
1202 
1203 	struct sockaddr_in6 mask = {
1204 		.sin6_len = sizeof(struct sockaddr_in6),
1205 		.sin6_family = AF_INET6,
1206 	};
1207 	ip6_writemask(&mask.sin6_addr, attrs->ifa_prefixlen);
1208 
1209 	struct in6_aliasreq req = {
1210 		.ifra_addr = *((struct sockaddr_in6 *)attrs->ifa_addr),
1211 		.ifra_prefixmask = mask,
1212 		.ifra_flags = flags,
1213 		.ifra_lifetime = { .ia6t_vltime = vltime, .ia6t_pltime = pltime },
1214 		.ifra_vhid = attrs->ifaf_vhid,
1215 	};
1216 	if (attrs->ifa_dst != NULL)
1217 		req.ifra_dstaddr = *((struct sockaddr_in6 *)attrs->ifa_dst);
1218 
1219 	return (in6_control(NULL, SIOCAIFADDR_IN6, &req, ifp, curthread));
1220 }
1221 
1222 static int
1223 handle_deladdr_inet6(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs,
1224     struct ifnet *ifp, struct nlpcb *nlp, struct nl_pstate *npt)
1225 {
1226 	if (attrs->ifa_addr == NULL) {
1227 		nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS");
1228 		return (EINVAL);
1229 	}
1230 
1231 	struct in6_aliasreq req = {
1232 		.ifra_addr = *((struct sockaddr_in6 *)attrs->ifa_addr),
1233 	};
1234 
1235 	return (in6_control(NULL, SIOCDIFADDR_IN6, &req, ifp, curthread));
1236 }
1237 #endif
1238 
1239 
1240 static int
1241 rtnl_handle_addr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
1242 {
1243 	struct epoch_tracker et;
1244 	int error;
1245 
1246 	struct nl_parsed_ifa attrs = {};
1247 	error = nl_parse_nlmsg(hdr, &ifa_parser, npt, &attrs);
1248 	if (error != 0)
1249 		return (error);
1250 
1251 	NET_EPOCH_ENTER(et);
1252 	struct ifnet *ifp = ifnet_byindex_ref(attrs.ifa_index);
1253 	NET_EPOCH_EXIT(et);
1254 
1255 	if (ifp == NULL) {
1256 		nlmsg_report_err_msg(npt, "Unable to find interface with index %u",
1257 		    attrs.ifa_index);
1258 		return (ENOENT);
1259 	}
1260 #ifdef INET6
1261 	int if_flags = if_getflags(ifp);
1262 #endif
1263 
1264 #if defined(INET) || defined(INET6)
1265 	bool new = hdr->nlmsg_type == NL_RTM_NEWADDR;
1266 #endif
1267 
1268 	/*
1269 	 * TODO: Properly handle NLM_F_CREATE / NLM_F_EXCL.
1270 	 * The current ioctl-based KPI always does an implicit create-or-replace.
1271 	 * It is not possible to specify fine-grained options.
1272 	 */
1273 
1274 	switch (attrs.ifa_family) {
1275 #ifdef INET
1276 	case AF_INET:
1277 		if (new)
1278 			error = handle_newaddr_inet(hdr, &attrs, ifp, nlp, npt);
1279 		else
1280 			error = handle_deladdr_inet(hdr, &attrs, ifp, nlp, npt);
1281 		break;
1282 #endif
1283 #ifdef INET6
1284 	case AF_INET6:
1285 		if (new)
1286 			error = handle_newaddr_inet6(hdr, &attrs, ifp, nlp, npt);
1287 		else
1288 			error = handle_deladdr_inet6(hdr, &attrs, ifp, nlp, npt);
1289 		break;
1290 #endif
1291 	default:
1292 		error = EAFNOSUPPORT;
1293 	}
1294 
1295 #ifdef INET6
1296 	if (error == 0 && !(if_flags & IFF_UP) && (if_getflags(ifp) & IFF_UP))
1297 		in6_if_up(ifp);
1298 #endif
1299 
1300 	if_rele(ifp);
1301 
1302 	return (error);
1303 }
1304 
1305 
1306 static void
1307 rtnl_handle_ifaddr(void *arg __unused, struct ifaddr *ifa, int cmd)
1308 {
1309 	struct nlmsghdr hdr = {};
1310 	struct nl_writer nw = {};
1311 	uint32_t group = 0;
1312 
1313 	switch (ifa->ifa_addr->sa_family) {
1314 #ifdef INET
1315 	case AF_INET:
1316 		group = RTNLGRP_IPV4_IFADDR;
1317 		break;
1318 #endif
1319 #ifdef INET6
1320 	case AF_INET6:
1321 		group = RTNLGRP_IPV6_IFADDR;
1322 		break;
1323 #endif
1324 	default:
1325 		NL_LOG(LOG_DEBUG2, "ifa notification for unknown AF: %d",
1326 		    ifa->ifa_addr->sa_family);
1327 		return;
1328 	}
1329 
1330 	if (!nl_has_listeners(NETLINK_ROUTE, group))
1331 		return;
1332 
1333 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, group)) {
1334 		NL_LOG(LOG_DEBUG, "error allocating group writer");
1335 		return;
1336 	}
1337 
1338 	hdr.nlmsg_type = (cmd == RTM_DELETE) ? NL_RTM_DELADDR : NL_RTM_NEWADDR;
1339 
1340 	dump_iface_addr(&nw, ifa->ifa_ifp, ifa, &hdr);
1341 	nlmsg_flush(&nw);
1342 }
1343 
1344 static void
1345 rtnl_handle_ifevent(struct ifnet *ifp, int nlmsg_type, int if_flags_mask)
1346 {
1347 	struct nlmsghdr hdr = { .nlmsg_type = nlmsg_type };
1348 	struct nl_writer nw = {};
1349 
1350 	if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK))
1351 		return;
1352 
1353 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK)) {
1354 		NL_LOG(LOG_DEBUG, "error allocating mbuf");
1355 		return;
1356 	}
1357 	dump_iface(&nw, ifp, &hdr, if_flags_mask);
1358         nlmsg_flush(&nw);
1359 }
1360 
1361 static void
1362 rtnl_handle_ifattach(void *arg, struct ifnet *ifp)
1363 {
1364 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1365 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0);
1366 }
1367 
1368 static void
1369 rtnl_handle_ifdetach(void *arg, struct ifnet *ifp)
1370 {
1371 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1372 	rtnl_handle_ifevent(ifp, NL_RTM_DELLINK, 0);
1373 }
1374 
1375 static void
1376 rtnl_handle_iflink(void *arg, struct ifnet *ifp)
1377 {
1378 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1379 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0);
1380 }
1381 
1382 void
1383 rtnl_handle_ifnet_event(struct ifnet *ifp, int if_flags_mask)
1384 {
1385 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
1386 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, if_flags_mask);
1387 }
1388 
1389 static const struct rtnl_cmd_handler cmd_handlers[] = {
1390 	{
1391 		.cmd = NL_RTM_GETLINK,
1392 		.name = "RTM_GETLINK",
1393 		.cb = &rtnl_handle_getlink,
1394 		.flags = RTNL_F_NOEPOCH | RTNL_F_ALLOW_NONVNET_JAIL,
1395 	},
1396 	{
1397 		.cmd = NL_RTM_DELLINK,
1398 		.name = "RTM_DELLINK",
1399 		.cb = &rtnl_handle_dellink,
1400 		.priv = PRIV_NET_IFDESTROY,
1401 		.flags = RTNL_F_NOEPOCH,
1402 	},
1403 	{
1404 		.cmd = NL_RTM_NEWLINK,
1405 		.name = "RTM_NEWLINK",
1406 		.cb = &rtnl_handle_newlink,
1407 		.priv = PRIV_NET_IFCREATE,
1408 		.flags = RTNL_F_NOEPOCH,
1409 	},
1410 	{
1411 		.cmd = NL_RTM_GETADDR,
1412 		.name = "RTM_GETADDR",
1413 		.cb = &rtnl_handle_getaddr,
1414 		.flags = RTNL_F_ALLOW_NONVNET_JAIL,
1415 	},
1416 	{
1417 		.cmd = NL_RTM_NEWADDR,
1418 		.name = "RTM_NEWADDR",
1419 		.cb = &rtnl_handle_addr,
1420 		.priv = PRIV_NET_ADDIFADDR,
1421 		.flags = RTNL_F_NOEPOCH,
1422 	},
1423 	{
1424 		.cmd = NL_RTM_DELADDR,
1425 		.name = "RTM_DELADDR",
1426 		.cb = &rtnl_handle_addr,
1427 		.priv = PRIV_NET_DELIFADDR,
1428 		.flags = RTNL_F_NOEPOCH,
1429 	},
1430 };
1431 
1432 static const struct nlhdr_parser *all_parsers[] = {
1433 	&ifmsg_parser, &ifa_parser, &ifa_fbsd_parser,
1434 };
1435 
1436 void
1437 rtnl_iface_add_cloner(struct nl_cloner *cloner)
1438 {
1439 	sx_xlock(&rtnl_cloner_lock);
1440 	SLIST_INSERT_HEAD(&nl_cloners, cloner, next);
1441 	sx_xunlock(&rtnl_cloner_lock);
1442 }
1443 
1444 void
1445 rtnl_iface_del_cloner(struct nl_cloner *cloner)
1446 {
1447 	sx_xlock(&rtnl_cloner_lock);
1448 	SLIST_REMOVE(&nl_cloners, cloner, nl_cloner, next);
1449 	sx_xunlock(&rtnl_cloner_lock);
1450 }
1451 
1452 void
1453 rtnl_ifaces_init(void)
1454 {
1455 	ifattach_event = EVENTHANDLER_REGISTER(
1456 	    ifnet_arrival_event, rtnl_handle_ifattach, NULL,
1457 	    EVENTHANDLER_PRI_ANY);
1458 	ifdetach_event = EVENTHANDLER_REGISTER(
1459 	    ifnet_departure_event, rtnl_handle_ifdetach, NULL,
1460 	    EVENTHANDLER_PRI_ANY);
1461 	ifaddr_event = EVENTHANDLER_REGISTER(
1462 	    rt_addrmsg, rtnl_handle_ifaddr, NULL,
1463 	    EVENTHANDLER_PRI_ANY);
1464 	iflink_event = EVENTHANDLER_REGISTER(
1465 	    ifnet_link_event, rtnl_handle_iflink, NULL,
1466 	    EVENTHANDLER_PRI_ANY);
1467 	NL_VERIFY_PARSERS(all_parsers);
1468 	rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
1469 }
1470 
1471 void
1472 rtnl_ifaces_destroy(void)
1473 {
1474 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifattach_event);
1475 	EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_event);
1476 	EVENTHANDLER_DEREGISTER(rt_addrmsg, ifaddr_event);
1477 	EVENTHANDLER_DEREGISTER(ifnet_link_event, iflink_event);
1478 }
1479