xref: /freebsd/sys/netlink/route/iface.c (revision 090e9752d7291db0c251a5576892e3bbbaea2479)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include "opt_netlink.h"
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 #include "opt_inet.h"
33 #include "opt_inet6.h"
34 #include <sys/types.h>
35 #include <sys/eventhandler.h>
36 #include <sys/kernel.h>
37 #include <sys/jail.h>
38 #include <sys/malloc.h>
39 #include <sys/socket.h>
40 #include <sys/sockio.h>
41 #include <sys/syslog.h>
42 
43 #include <net/if.h>
44 #include <net/if_dl.h>
45 #include <net/if_media.h>
46 #include <net/if_var.h>
47 #include <net/if_clone.h>
48 #include <net/route.h>
49 #include <net/route/nhop.h>
50 #include <net/route/route_ctl.h>
51 #include <netlink/netlink.h>
52 #include <netlink/netlink_ctl.h>
53 #include <netlink/netlink_route.h>
54 #include <netlink/route/route_var.h>
55 
56 #include <netinet6/scope6_var.h> /* scope deembedding */
57 
58 #define	DEBUG_MOD_NAME	nl_iface
59 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
60 #include <netlink/netlink_debug.h>
61 _DECLARE_DEBUG(LOG_DEBUG);
62 
63 struct netlink_walkargs {
64 	struct nl_writer *nw;
65 	struct nlmsghdr hdr;
66 	struct nlpcb *so;
67 	struct ucred *cred;
68 	uint32_t fibnum;
69 	int family;
70 	int error;
71 	int count;
72 	int dumped;
73 };
74 
75 static eventhandler_tag ifdetach_event, ifattach_event, iflink_event, ifaddr_event;
76 
77 static SLIST_HEAD(, nl_cloner) nl_cloners = SLIST_HEAD_INITIALIZER(nl_cloners);
78 
79 static struct sx rtnl_cloner_lock;
80 SX_SYSINIT(rtnl_cloner_lock, &rtnl_cloner_lock, "rtnl cloner lock");
81 
82 /*
83  * RTM_GETLINK request
84  * sendto(3, {{len=32, type=RTM_GETLINK, flags=NLM_F_REQUEST|NLM_F_DUMP, seq=1641940952, pid=0},
85  *  {ifi_family=AF_INET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}, 32, 0, NULL, 0) = 32
86  *
87  * Reply:
88  * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_ETHER, ifi_index=if_nametoindex("enp0s31f6"), ifi_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST|IFF_LOWER_UP, ifi_change=0},
89 {{nla_len=10, nla_type=IFLA_ADDRESS}, "\xfe\x54\x00\x52\x3e\x90"}
90 
91 [
92 {{nla_len=14, nla_type=IFLA_IFNAME}, "enp0s31f6"},
93 {{nla_len=8, nla_type=IFLA_TXQLEN}, 1000},
94 {{nla_len=5, nla_type=IFLA_OPERSTATE}, 6},
95 {{nla_len=5, nla_type=IFLA_LINKMODE}, 0},
96 {{nla_len=8, nla_type=IFLA_MTU}, 1500},
97 {{nla_len=8, nla_type=IFLA_MIN_MTU}, 68},
98  {{nla_len=8, nla_type=IFLA_MAX_MTU}, 9000},
99 {{nla_len=8, nla_type=IFLA_GROUP}, 0},
100 {{nla_len=8, nla_type=IFLA_PROMISCUITY}, 0},
101 {{nla_len=8, nla_type=IFLA_NUM_TX_QUEUES}, 1},
102 {{nla_len=8, nla_type=IFLA_GSO_MAX_SEGS}, 65535},
103 {{nla_len=8, nla_type=IFLA_GSO_MAX_SIZE}, 65536},
104 {{nla_len=8, nla_type=IFLA_NUM_RX_QUEUES}, 1},
105 {{nla_len=5, nla_type=IFLA_CARRIER}, 1},
106 {{nla_len=13, nla_type=IFLA_QDISC}, "fq_codel"},
107 {{nla_len=8, nla_type=IFLA_CARRIER_CHANGES}, 2},
108 {{nla_len=5, nla_type=IFLA_PROTO_DOWN}, 0},
109 {{nla_len=8, nla_type=IFLA_CARRIER_UP_COUNT}, 1},
110 {{nla_len=8, nla_type=IFLA_CARRIER_DOWN_COUNT}, 1},
111  */
112 
113 struct if_state {
114 	uint8_t		ifla_operstate;
115 	uint8_t		ifla_carrier;
116 };
117 
118 static void
119 get_operstate_ether(struct ifnet *ifp, struct if_state *pstate)
120 {
121 	struct ifmediareq ifmr = {};
122 	int error;
123 	error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (void *)&ifmr);
124 
125 	if (error != 0) {
126 		NL_LOG(LOG_DEBUG, "error calling SIOCGIFMEDIA on %s: %d",
127 		    if_name(ifp), error);
128 		return;
129 	}
130 
131 	switch (IFM_TYPE(ifmr.ifm_active)) {
132 	case IFM_ETHER:
133 		if (ifmr.ifm_status & IFM_ACTIVE) {
134 			pstate->ifla_carrier = 1;
135 			if (ifp->if_flags & IFF_MONITOR)
136 				pstate->ifla_operstate = IF_OPER_DORMANT;
137 			else
138 				pstate->ifla_operstate = IF_OPER_UP;
139 		} else
140 			pstate->ifla_operstate = IF_OPER_DOWN;
141 	}
142 }
143 
144 static bool
145 get_stats(struct nl_writer *nw, struct ifnet *ifp)
146 {
147 	struct rtnl_link_stats64 *stats;
148 
149 	int nla_len = sizeof(struct nlattr) + sizeof(*stats);
150 	struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
151 	if (nla == NULL)
152 		return (false);
153 	nla->nla_type = IFLA_STATS64;
154 	nla->nla_len = nla_len;
155 	stats = (struct rtnl_link_stats64 *)(nla + 1);
156 
157 	stats->rx_packets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
158 	stats->tx_packets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
159 	stats->rx_bytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
160 	stats->tx_bytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
161 	stats->rx_errors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
162 	stats->tx_errors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
163 	stats->rx_dropped = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
164 	stats->tx_dropped = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
165 	stats->multicast = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
166 	stats->rx_nohandler = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
167 
168 	return (true);
169 }
170 
171 static void
172 get_operstate(struct ifnet *ifp, struct if_state *pstate)
173 {
174 	pstate->ifla_operstate = IF_OPER_UNKNOWN;
175 	pstate->ifla_carrier = 0; /* no carrier */
176 
177 	switch (ifp->if_type) {
178 	case IFT_ETHER:
179 	case IFT_L2VLAN:
180 		get_operstate_ether(ifp, pstate);
181 		break;
182 	default:
183 		/* Map admin state to the operstate */
184 		if (ifp->if_flags & IFF_UP) {
185 			pstate->ifla_operstate = IF_OPER_UP;
186 			pstate->ifla_carrier = 1;
187 		} else
188 			pstate->ifla_operstate = IF_OPER_DOWN;
189 		break;
190 	}
191 }
192 
193 static unsigned
194 ifp_flags_to_netlink(const struct ifnet *ifp)
195 {
196         return (ifp->if_flags | ifp->if_drv_flags);
197 }
198 
199 #define LLADDR_CONST(s) ((const void *)((s)->sdl_data + (s)->sdl_nlen))
200 static bool
201 dump_sa(struct nl_writer *nw, int attr, const struct sockaddr *sa)
202 {
203         uint32_t addr_len = 0;
204         const void *addr_data = NULL;
205 #ifdef INET6
206         struct in6_addr addr6;
207 #endif
208 
209         if (sa == NULL)
210                 return (true);
211 
212         switch (sa->sa_family) {
213 #ifdef INET
214         case AF_INET:
215                 addr_len = sizeof(struct in_addr);
216                 addr_data = &((const struct sockaddr_in *)sa)->sin_addr;
217                 break;
218 #endif
219 #ifdef INET6
220         case AF_INET6:
221                 in6_splitscope(&((const struct sockaddr_in6 *)sa)->sin6_addr, &addr6, &addr_len);
222                 addr_len = sizeof(struct in6_addr);
223                 addr_data = &addr6;
224                 break;
225 #endif
226         case AF_LINK:
227                 addr_len = ((const struct sockaddr_dl *)sa)->sdl_alen;
228                 addr_data = LLADDR_CONST((const struct sockaddr_dl *)sa);
229                 break;
230         default:
231                 NL_LOG(LOG_DEBUG2, "unsupported family: %d, skipping", sa->sa_family);
232                 return (true);
233         }
234 
235         return (nlattr_add(nw, attr, addr_len, addr_data));
236 }
237 
238 /*
239  * Dumps interface state, properties and metrics.
240  * @nw: message writer
241  * @ifp: target interface
242  * @hdr: template header
243  * @if_flags_mask: changed if_[drv]_flags bitmask
244  *
245  * This function is called without epoch and MAY sleep.
246  */
247 static bool
248 dump_iface(struct nl_writer *nw, struct ifnet *ifp, const struct nlmsghdr *hdr,
249     int if_flags_mask)
250 {
251         struct ifinfomsg *ifinfo;
252 
253         NL_LOG(LOG_DEBUG3, "dumping interface %s data", if_name(ifp));
254 
255 	if (!nlmsg_reply(nw, hdr, sizeof(struct ifinfomsg)))
256 		goto enomem;
257 
258         ifinfo = nlmsg_reserve_object(nw, struct ifinfomsg);
259         ifinfo->ifi_family = AF_UNSPEC;
260         ifinfo->__ifi_pad = 0;
261         ifinfo->ifi_type = ifp->if_type;
262         ifinfo->ifi_index = ifp->if_index;
263         ifinfo->ifi_flags = ifp_flags_to_netlink(ifp);
264         ifinfo->ifi_change = if_flags_mask;
265 
266 	struct if_state ifs = {};
267 	get_operstate(ifp, &ifs);
268 
269 	if (ifs.ifla_operstate == IF_OPER_UP)
270 		ifinfo->ifi_flags |= IFF_LOWER_UP;
271 
272         nlattr_add_string(nw, IFLA_IFNAME, if_name(ifp));
273         nlattr_add_u8(nw, IFLA_OPERSTATE, ifs.ifla_operstate);
274         nlattr_add_u8(nw, IFLA_CARRIER, ifs.ifla_carrier);
275 
276 /*
277         nlattr_add_u8(nw, IFLA_PROTO_DOWN, val);
278         nlattr_add_u8(nw, IFLA_LINKMODE, val);
279 */
280         if ((ifp->if_addr != NULL)) {
281                 dump_sa(nw, IFLA_ADDRESS, ifp->if_addr->ifa_addr);
282         }
283 
284         if ((ifp->if_broadcastaddr != NULL)) {
285 		nlattr_add(nw, IFLA_BROADCAST, ifp->if_addrlen,
286 		    ifp->if_broadcastaddr);
287         }
288 
289         nlattr_add_u32(nw, IFLA_MTU, ifp->if_mtu);
290 /*
291         nlattr_add_u32(nw, IFLA_MIN_MTU, 60);
292         nlattr_add_u32(nw, IFLA_MAX_MTU, 9000);
293         nlattr_add_u32(nw, IFLA_GROUP, 0);
294 */
295 
296 	if (ifp->if_description != NULL)
297 		nlattr_add_string(nw, IFLA_IFALIAS, ifp->if_description);
298 
299 	get_stats(nw, ifp);
300 
301 	uint32_t val = (ifp->if_flags & IFF_PROMISC) != 0;
302         nlattr_add_u32(nw, IFLA_PROMISCUITY, val);
303 
304 	ifc_dump_ifp_nl(ifp, nw);
305 
306         if (nlmsg_end(nw))
307 		return (true);
308 
309 enomem:
310         NL_LOG(LOG_DEBUG, "unable to dump interface %s state (ENOMEM)", if_name(ifp));
311         nlmsg_abort(nw);
312         return (false);
313 }
314 
315 static bool
316 check_ifmsg(void *hdr, struct nl_pstate *npt)
317 {
318 	struct ifinfomsg *ifm = hdr;
319 
320 	if (ifm->__ifi_pad != 0 || ifm->ifi_type != 0 ||
321 	    ifm->ifi_flags != 0 || ifm->ifi_change != 0) {
322 		nlmsg_report_err_msg(npt,
323 		    "strict checking: non-zero values in ifinfomsg header");
324 		return (false);
325 	}
326 
327 	return (true);
328 }
329 
330 #define	_IN(_field)	offsetof(struct ifinfomsg, _field)
331 #define	_OUT(_field)	offsetof(struct nl_parsed_link, _field)
332 static const struct nlfield_parser nlf_p_if[] = {
333 	{ .off_in = _IN(ifi_type), .off_out = _OUT(ifi_type), .cb = nlf_get_u16 },
334 	{ .off_in = _IN(ifi_index), .off_out = _OUT(ifi_index), .cb = nlf_get_u32 },
335 	{ .off_in = _IN(ifi_flags), .off_out = _OUT(ifi_flags), .cb = nlf_get_u32 },
336 	{ .off_in = _IN(ifi_change), .off_out = _OUT(ifi_change), .cb = nlf_get_u32 },
337 };
338 
339 static const struct nlattr_parser nla_p_linfo[] = {
340 	{ .type = IFLA_INFO_KIND, .off = _OUT(ifla_cloner), .cb = nlattr_get_stringn },
341 	{ .type = IFLA_INFO_DATA, .off = _OUT(ifla_idata), .cb = nlattr_get_nla },
342 };
343 NL_DECLARE_ATTR_PARSER(linfo_parser, nla_p_linfo);
344 
345 static const struct nlattr_parser nla_p_if[] = {
346 	{ .type = IFLA_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
347 	{ .type = IFLA_MTU, .off = _OUT(ifla_mtu), .cb = nlattr_get_uint32 },
348 	{ .type = IFLA_LINK, .off = _OUT(ifla_link), .cb = nlattr_get_uint32 },
349 	{ .type = IFLA_LINKINFO, .arg = &linfo_parser, .cb = nlattr_get_nested },
350 	{ .type = IFLA_IFALIAS, .off = _OUT(ifla_ifalias), .cb = nlattr_get_string },
351 	{ .type = IFLA_GROUP, .off = _OUT(ifla_group), .cb = nlattr_get_string },
352 	{ .type = IFLA_ALT_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
353 };
354 #undef _IN
355 #undef _OUT
356 NL_DECLARE_STRICT_PARSER(ifmsg_parser, struct ifinfomsg, check_ifmsg, nlf_p_if, nla_p_if);
357 
358 static bool
359 match_iface(struct ifnet *ifp, void *_arg)
360 {
361 	struct nl_parsed_link *attrs = (struct nl_parsed_link *)_arg;
362 
363 	if (attrs->ifi_index != 0 && attrs->ifi_index != ifp->if_index)
364 		return (false);
365 	if (attrs->ifi_type != 0 && attrs->ifi_index != ifp->if_type)
366 		return (false);
367 	if (attrs->ifla_ifname != NULL && strcmp(attrs->ifla_ifname, if_name(ifp)))
368 		return (false);
369 	/* TODO: add group match */
370 
371 	return (true);
372 }
373 
374 static int
375 dump_cb(struct ifnet *ifp, void *_arg)
376 {
377 	struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg;
378 	if (!dump_iface(wa->nw, ifp, &wa->hdr, 0))
379 		return (ENOMEM);
380 	return (0);
381 }
382 
383 /*
384  * {nlmsg_len=52, nlmsg_type=RTM_GETLINK, nlmsg_flags=NLM_F_REQUEST, nlmsg_seq=1662842818, nlmsg_pid=0},
385  *  {ifi_family=AF_PACKET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
386  *   [
387  *    [{nla_len=10, nla_type=IFLA_IFNAME}, "vnet9"],
388  *    [{nla_len=8, nla_type=IFLA_EXT_MASK}, RTEXT_FILTER_VF]
389  *   ]
390  */
391 static int
392 rtnl_handle_getlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
393 {
394 	struct epoch_tracker et;
395         struct ifnet *ifp;
396 	int error = 0;
397 
398 	struct nl_parsed_link attrs = {};
399 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
400 	if (error != 0)
401 		return (error);
402 
403 	struct netlink_walkargs wa = {
404 		.so = nlp,
405 		.nw = npt->nw,
406 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
407 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
408 		.hdr.nlmsg_flags = hdr->nlmsg_flags,
409 		.hdr.nlmsg_type = NL_RTM_NEWLINK,
410 	};
411 
412 	/* Fast track for an interface w/ explicit name or index match */
413 	if ((attrs.ifi_index != 0) || (attrs.ifla_ifname != NULL)) {
414 		if (attrs.ifi_index != 0) {
415 			NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching index %u",
416 			    attrs.ifi_index);
417 			NET_EPOCH_ENTER(et);
418 			ifp = ifnet_byindex_ref(attrs.ifi_index);
419 			NET_EPOCH_EXIT(et);
420 		} else {
421 			NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching name %s",
422 			    attrs.ifla_ifname);
423 			ifp = ifunit_ref(attrs.ifla_ifname);
424 		}
425 
426 		if (ifp != NULL) {
427 			if (match_iface(ifp, &attrs)) {
428 				if (!dump_iface(wa.nw, ifp, &wa.hdr, 0))
429 					error = ENOMEM;
430 			} else
431 				error = ENODEV;
432 			if_rele(ifp);
433 		} else
434 			error = ENODEV;
435 		return (error);
436 	}
437 
438 	/* Always treat non-direct-match as a multipart message */
439 	wa.hdr.nlmsg_flags |= NLM_F_MULTI;
440 
441 	/*
442 	 * Fetching some link properties require performing ioctl's that may be blocking.
443 	 * Address it by saving referenced pointers of the matching links,
444 	 * exiting from epoch and going through the list one-by-one.
445 	 */
446 
447 	NL_LOG(LOG_DEBUG2, "Start dump");
448 	if_foreach_sleep(match_iface, &attrs, dump_cb, &wa);
449 	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
450 
451 	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
452                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
453                 return (ENOMEM);
454         }
455 
456 	return (error);
457 }
458 
459 /*
460  * sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[
461  * {nlmsg_len=60, nlmsg_type=RTM_NEWLINK, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1662715618, nlmsg_pid=0},
462  *  {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
463  *   {nla_len=11, nla_type=IFLA_IFNAME}, "dummy0"],
464  *   [
465  *    {nla_len=16, nla_type=IFLA_LINKINFO},
466  *     [
467  *      {nla_len=9, nla_type=IFLA_INFO_KIND}, "dummy"...
468  *     ]
469  *    ]
470  */
471 
472 static int
473 rtnl_handle_dellink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
474 {
475 	struct epoch_tracker et;
476         struct ifnet *ifp;
477 	int error;
478 
479 	struct nl_parsed_link attrs = {};
480 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
481 	if (error != 0)
482 		return (error);
483 
484 	NET_EPOCH_ENTER(et);
485 	ifp = ifnet_byindex_ref(attrs.ifi_index);
486 	NET_EPOCH_EXIT(et);
487 	if (ifp == NULL) {
488 		NLP_LOG(LOG_DEBUG, nlp, "unable to find interface %u", attrs.ifi_index);
489 		return (ENOENT);
490 	}
491 	NLP_LOG(LOG_DEBUG3, nlp, "mapped ifindex %u to %s", attrs.ifi_index, if_name(ifp));
492 
493 	sx_xlock(&ifnet_detach_sxlock);
494 	error = if_clone_destroy(if_name(ifp));
495 	sx_xunlock(&ifnet_detach_sxlock);
496 
497 	NLP_LOG(LOG_DEBUG2, nlp, "deleting interface %s returned %d", if_name(ifp), error);
498 
499 	if_rele(ifp);
500 	return (error);
501 }
502 
503 /*
504  * New link:
505  * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1668185590, pid=0},
506  *   {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}
507  *    [
508  *     {{nla_len=8, nla_type=IFLA_MTU}, 123},
509  *     {{nla_len=10, nla_type=IFLA_IFNAME}, "vlan1"},
510  *     {{nla_len=24, nla_type=IFLA_LINKINFO},
511  *      [
512  *       {{nla_len=8, nla_type=IFLA_INFO_KIND}, "vlan"...},
513  *       {{nla_len=12, nla_type=IFLA_INFO_DATA}, "\x06\x00\x01\x00\x7b\x00\x00\x00"}]}]}
514  *
515  * Update link:
516  * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=1668185923, pid=0},
517  * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=if_nametoindex("lo"), ifi_flags=0, ifi_change=0},
518  * {{nla_len=8, nla_type=IFLA_MTU}, 123}}
519  *
520  *
521  * Check command availability:
522  * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=0, pid=0},
523  *  {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}
524  */
525 
526 
527 static int
528 create_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs,
529     struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt)
530 {
531 	if (lattrs->ifla_ifname == NULL || strlen(lattrs->ifla_ifname) == 0) {
532 		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_IFNAME attribute");
533 		return (EINVAL);
534 	}
535 	if (lattrs->ifla_cloner == NULL || strlen(lattrs->ifla_cloner) == 0) {
536 		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_INFO_KIND attribute");
537 		return (EINVAL);
538 	}
539 
540 	struct ifc_data_nl ifd = {
541 		.flags = IFC_F_CREATE,
542 		.lattrs = lattrs,
543 		.bm = bm,
544 		.npt = npt,
545 	};
546 	if (ifc_create_ifp_nl(lattrs->ifla_ifname, &ifd) && ifd.error == 0)
547 		nl_store_ifp_cookie(npt, ifd.ifp);
548 
549 	return (ifd.error);
550 }
551 
552 static int
553 modify_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs,
554     struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt)
555 {
556 	struct ifnet *ifp = NULL;
557 	struct epoch_tracker et;
558 
559 	if (lattrs->ifi_index == 0 && lattrs->ifla_ifname == NULL) {
560 		/*
561 		 * Applications like ip(8) verify RTM_NEWLINK command
562 		 * existence by calling it with empty arguments. Always
563 		 * return "innocent" error in that case.
564 		 */
565 		NLMSG_REPORT_ERR_MSG(npt, "empty ifi_index field");
566 		return (EPERM);
567 	}
568 
569 	if (lattrs->ifi_index != 0) {
570 		NET_EPOCH_ENTER(et);
571 		ifp = ifnet_byindex_ref(lattrs->ifi_index);
572 		NET_EPOCH_EXIT(et);
573 		if (ifp == NULL) {
574 			NLMSG_REPORT_ERR_MSG(npt, "unable to find interface #%u",
575 			    lattrs->ifi_index);
576 			return (ENOENT);
577 		}
578 	}
579 
580 	if (ifp == NULL && lattrs->ifla_ifname != NULL) {
581 		ifp = ifunit_ref(lattrs->ifla_ifname);
582 		if (ifp == NULL) {
583 			NLMSG_REPORT_ERR_MSG(npt, "unable to find interface %s",
584 			    lattrs->ifla_ifname);
585 			return (ENOENT);
586 		}
587 	}
588 
589 	MPASS(ifp != NULL);
590 
591 	/*
592 	 * Modification request can address either
593 	 * 1) cloned interface, in which case we call the cloner-specific
594 	 *  modification routine
595 	 * or
596 	 * 2) non-cloned (e.g. "physical") interface, in which case we call
597 	 *  generic modification routine
598 	 */
599 	struct ifc_data_nl ifd = { .lattrs = lattrs, .bm = bm, .npt = npt };
600 	if (!ifc_modify_ifp_nl(ifp, &ifd))
601 		ifd.error = nl_modify_ifp_generic(ifp, lattrs, bm, npt);
602 
603 	if_rele(ifp);
604 
605 	return (ifd.error);
606 }
607 
608 
609 static int
610 rtnl_handle_newlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
611 {
612 	struct nlattr_bmask bm;
613 	int error;
614 
615 	struct nl_parsed_link attrs = {};
616 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
617 	if (error != 0)
618 		return (error);
619 	nl_get_attrs_bmask_nlmsg(hdr, &ifmsg_parser, &bm);
620 
621 	if (hdr->nlmsg_flags & NLM_F_CREATE)
622 		return (create_link(hdr, &attrs, &bm, nlp, npt));
623 	else
624 		return (modify_link(hdr, &attrs, &bm, nlp, npt));
625 }
626 
627 struct nl_parsed_ifa {
628 	uint8_t		ifa_family;
629 	uint8_t		ifa_prefixlen;
630 	uint8_t		ifa_scope;
631 	uint32_t	ifa_index;
632 	uint32_t	ifa_flags;
633 	struct sockaddr	*ifa_address;
634 	struct sockaddr	*ifa_local;
635 };
636 
637 #define	_IN(_field)	offsetof(struct ifaddrmsg, _field)
638 #define	_OUT(_field)	offsetof(struct nl_parsed_ifa, _field)
639 static const struct nlfield_parser nlf_p_ifa[] = {
640 	{ .off_in = _IN(ifa_family), .off_out = _OUT(ifa_family), .cb = nlf_get_u8 },
641 	{ .off_in = _IN(ifa_prefixlen), .off_out = _OUT(ifa_prefixlen), .cb = nlf_get_u8 },
642 	{ .off_in = _IN(ifa_scope), .off_out = _OUT(ifa_scope), .cb = nlf_get_u8 },
643 	{ .off_in = _IN(ifa_flags), .off_out = _OUT(ifa_flags), .cb = nlf_get_u8_u32 },
644 	{ .off_in = _IN(ifa_index), .off_out = _OUT(ifa_index), .cb = nlf_get_u32 },
645 };
646 
647 static const struct nlattr_parser nla_p_ifa[] = {
648 	{ .type = IFA_ADDRESS, .off = _OUT(ifa_address), .cb = nlattr_get_ip },
649 	{ .type = IFA_LOCAL, .off = _OUT(ifa_local), .cb = nlattr_get_ip },
650 	{ .type = IFA_FLAGS, .off = _OUT(ifa_flags), .cb = nlattr_get_uint32 },
651 };
652 #undef _IN
653 #undef _OUT
654 NL_DECLARE_PARSER(ifaddrmsg_parser, struct ifaddrmsg, nlf_p_ifa, nla_p_ifa);
655 
656 
657 /*
658 
659 {ifa_family=AF_INET, ifa_prefixlen=8, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_HOST, ifa_index=if_nametoindex("lo")},
660  [
661         {{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("127.0.0.1")},
662         {{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("127.0.0.1")},
663         {{nla_len=7, nla_type=IFA_LABEL}, "lo"},
664         {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT},
665         {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=3619, tstamp=3619}}]},
666 ---
667 
668 {{len=72, type=RTM_NEWADDR, flags=NLM_F_MULTI, seq=1642191126, pid=566735},
669  {ifa_family=AF_INET6, ifa_prefixlen=96, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_UNIVERSE, ifa_index=if_nametoindex("virbr0")},
670    [
671     {{nla_len=20, nla_type=IFA_ADDRESS}, inet_pton(AF_INET6, "2a01:4f8:13a:70c:ffff::1")},
672    {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=4283, tstamp=4283}},
673    {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}]},
674 */
675 
676 static uint8_t
677 ifa_get_scope(const struct ifaddr *ifa)
678 {
679         const struct sockaddr *sa;
680         uint8_t addr_scope = RT_SCOPE_UNIVERSE;
681 
682         sa = ifa->ifa_addr;
683         switch (sa->sa_family) {
684 #ifdef INET
685         case AF_INET:
686                 {
687                         struct in_addr addr;
688                         addr = ((const struct sockaddr_in *)sa)->sin_addr;
689                         if (IN_LOOPBACK(addr.s_addr))
690                                 addr_scope = RT_SCOPE_HOST;
691                         else if (IN_LINKLOCAL(addr.s_addr))
692                                 addr_scope = RT_SCOPE_LINK;
693                         break;
694                 }
695 #endif
696 #ifdef INET6
697         case AF_INET6:
698                 {
699                         const struct in6_addr *addr;
700                         addr = &((const struct sockaddr_in6 *)sa)->sin6_addr;
701                         if (IN6_IS_ADDR_LOOPBACK(addr))
702                                 addr_scope = RT_SCOPE_HOST;
703                         else if (IN6_IS_ADDR_LINKLOCAL(addr))
704                                 addr_scope = RT_SCOPE_LINK;
705                         break;
706                 }
707 #endif
708         }
709 
710         return (addr_scope);
711 }
712 
713 #ifdef INET6
714 static uint8_t
715 inet6_get_plen(const struct in6_addr *addr)
716 {
717 
718 	return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) +
719 	    bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3]));
720 }
721 #endif
722 
723 static uint8_t
724 get_sa_plen(const struct sockaddr *sa)
725 {
726 #ifdef INET
727         const struct in_addr *paddr;
728 #endif
729 #ifdef INET6
730         const struct in6_addr *paddr6;
731 #endif
732 
733         switch (sa->sa_family) {
734 #ifdef INET
735         case AF_INET:
736                 paddr = &(((const struct sockaddr_in *)sa)->sin_addr);
737                 return bitcount32(paddr->s_addr);;
738 #endif
739 #ifdef INET6
740         case AF_INET6:
741                 paddr6 = &(((const struct sockaddr_in6 *)sa)->sin6_addr);
742                 return inet6_get_plen(paddr6);
743 #endif
744         }
745 
746         return (0);
747 }
748 
749 
750 /*
751  * {'attrs': [('IFA_ADDRESS', '12.0.0.1'),
752            ('IFA_LOCAL', '12.0.0.1'),
753            ('IFA_LABEL', 'eth10'),
754            ('IFA_FLAGS', 128),
755            ('IFA_CACHEINFO', {'ifa_preferred': 4294967295, 'ifa_valid': 4294967295, 'cstamp': 63745746, 'tstamp': 63745746})],
756  */
757 static bool
758 dump_iface_addr(struct nl_writer *nw, struct ifnet *ifp, struct ifaddr *ifa,
759     const struct nlmsghdr *hdr)
760 {
761         struct ifaddrmsg *ifamsg;
762         struct sockaddr *sa = ifa->ifa_addr;
763 
764         NL_LOG(LOG_DEBUG3, "dumping ifa %p type %s(%d) for interface %s",
765             ifa, rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
766 
767 	if (!nlmsg_reply(nw, hdr, sizeof(struct ifaddrmsg)))
768 		goto enomem;
769 
770         ifamsg = nlmsg_reserve_object(nw, struct ifaddrmsg);
771         ifamsg->ifa_family = sa->sa_family;
772         ifamsg->ifa_prefixlen = get_sa_plen(ifa->ifa_netmask);
773         ifamsg->ifa_flags = 0; // ifa_flags is useless
774         ifamsg->ifa_scope = ifa_get_scope(ifa);
775         ifamsg->ifa_index = ifp->if_index;
776 
777 	if (ifp->if_flags & IFF_POINTOPOINT) {
778 		dump_sa(nw, IFA_ADDRESS, ifa->ifa_dstaddr);
779 		dump_sa(nw, IFA_LOCAL, sa);
780 	} else {
781 		dump_sa(nw, IFA_ADDRESS, sa);
782 #ifdef INET
783 		/*
784 		 * In most cases, IFA_ADDRESS == IFA_LOCAL
785 		 * Skip IFA_LOCAL for anything except INET
786 		 */
787 		if (sa->sa_family == AF_INET)
788 			dump_sa(nw, IFA_LOCAL, sa);
789 #endif
790 	}
791 	if (ifp->if_flags & IFF_BROADCAST)
792 		dump_sa(nw, IFA_BROADCAST, ifa->ifa_broadaddr);
793 
794         nlattr_add_string(nw, IFA_LABEL, if_name(ifp));
795 
796         uint32_t val = 0; // ifa->ifa_flags;
797         nlattr_add_u32(nw, IFA_FLAGS, val);
798 
799 	if (nlmsg_end(nw))
800 		return (true);
801 enomem:
802         NL_LOG(LOG_DEBUG, "Failed to dump ifa type %s(%d) for interface %s",
803             rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
804         nlmsg_abort(nw);
805         return (false);
806 }
807 
808 static int
809 dump_iface_addrs(struct netlink_walkargs *wa, struct ifnet *ifp)
810 {
811         struct ifaddr *ifa;
812 
813 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
814 		if (wa->family != 0 && wa->family != ifa->ifa_addr->sa_family)
815 			continue;
816 		if (ifa->ifa_addr->sa_family == AF_LINK)
817 			continue;
818 		if (prison_if(wa->cred, ifa->ifa_addr) != 0)
819 			continue;
820 		wa->count++;
821 		if (!dump_iface_addr(wa->nw, ifp, ifa, &wa->hdr))
822 			return (ENOMEM);
823 		wa->dumped++;
824 	}
825 
826 	return (0);
827 }
828 
829 static int
830 rtnl_handle_getaddr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
831 {
832         struct ifnet *ifp;
833 	int error = 0;
834 
835 	struct nl_parsed_ifa attrs = {};
836 	error = nl_parse_nlmsg(hdr, &ifaddrmsg_parser, npt, &attrs);
837 	if (error != 0)
838 		return (error);
839 
840 	struct netlink_walkargs wa = {
841 		.so = nlp,
842 		.nw = npt->nw,
843 		.cred = nlp_get_cred(nlp),
844 		.family = attrs.ifa_family,
845 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
846 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
847 		.hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
848 		.hdr.nlmsg_type = NL_RTM_NEWADDR,
849 	};
850 
851 	NL_LOG(LOG_DEBUG2, "Start dump");
852 
853 	if (attrs.ifa_index != 0) {
854 		ifp = ifnet_byindex(attrs.ifa_index);
855 		if (ifp == NULL)
856 			error = ENOENT;
857 		else
858 			error = dump_iface_addrs(&wa, ifp);
859 	} else {
860 		CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
861 			error = dump_iface_addrs(&wa, ifp);
862 			if (error != 0)
863 				break;
864 		}
865 	}
866 
867 	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
868 
869 	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
870                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
871                 return (ENOMEM);
872         }
873 
874 	return (error);
875 }
876 
877 static void
878 rtnl_handle_ifaddr(void *arg __unused, struct ifaddr *ifa, int cmd)
879 {
880 	struct nlmsghdr hdr = {};
881 	struct nl_writer nw = {};
882 	uint32_t group = 0;
883 
884 	switch (ifa->ifa_addr->sa_family) {
885 #ifdef INET
886 	case AF_INET:
887 		group = RTNLGRP_IPV4_IFADDR;
888 		break;
889 #endif
890 #ifdef INET6
891 	case AF_INET6:
892 		group = RTNLGRP_IPV6_IFADDR;
893 		break;
894 #endif
895 	default:
896 		NL_LOG(LOG_DEBUG2, "ifa notification for unknown AF: %d",
897 		    ifa->ifa_addr->sa_family);
898 		return;
899 	}
900 
901 	if (!nl_has_listeners(NETLINK_ROUTE, group))
902 		return;
903 
904 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, group)) {
905 		NL_LOG(LOG_DEBUG, "error allocating group writer");
906 		return;
907 	}
908 
909 	hdr.nlmsg_type = (cmd == RTM_DELETE) ? NL_RTM_DELADDR : NL_RTM_NEWADDR;
910 
911 	dump_iface_addr(&nw, ifa->ifa_ifp, ifa, &hdr);
912 	nlmsg_flush(&nw);
913 }
914 
915 static void
916 rtnl_handle_ifevent(struct ifnet *ifp, int nlmsg_type, int if_flags_mask)
917 {
918 	struct nlmsghdr hdr = { .nlmsg_type = nlmsg_type };
919 	struct nl_writer nw = {};
920 
921 	if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK))
922 		return;
923 
924 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK)) {
925 		NL_LOG(LOG_DEBUG, "error allocating mbuf");
926 		return;
927 	}
928 	dump_iface(&nw, ifp, &hdr, if_flags_mask);
929         nlmsg_flush(&nw);
930 }
931 
932 static void
933 rtnl_handle_ifattach(void *arg, struct ifnet *ifp)
934 {
935 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
936 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0);
937 }
938 
939 static void
940 rtnl_handle_ifdetach(void *arg, struct ifnet *ifp)
941 {
942 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
943 	rtnl_handle_ifevent(ifp, NL_RTM_DELLINK, 0);
944 }
945 
946 static void
947 rtnl_handle_iflink(void *arg, struct ifnet *ifp)
948 {
949 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
950 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0);
951 }
952 
953 void
954 rtnl_handle_ifnet_event(struct ifnet *ifp, int if_flags_mask)
955 {
956 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
957 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, if_flags_mask);
958 }
959 
960 static const struct rtnl_cmd_handler cmd_handlers[] = {
961 	{
962 		.cmd = NL_RTM_GETLINK,
963 		.name = "RTM_GETLINK",
964 		.cb = &rtnl_handle_getlink,
965 		.flags = RTNL_F_NOEPOCH | RTNL_F_ALLOW_NONVNET_JAIL,
966 	},
967 	{
968 		.cmd = NL_RTM_DELLINK,
969 		.name = "RTM_DELLINK",
970 		.cb = &rtnl_handle_dellink,
971 		.priv = PRIV_NET_IFDESTROY,
972 		.flags = RTNL_F_NOEPOCH,
973 	},
974 	{
975 		.cmd = NL_RTM_NEWLINK,
976 		.name = "RTM_NEWLINK",
977 		.cb = &rtnl_handle_newlink,
978 		.priv = PRIV_NET_IFCREATE,
979 		.flags = RTNL_F_NOEPOCH,
980 	},
981 	{
982 		.cmd = NL_RTM_GETADDR,
983 		.name = "RTM_GETADDR",
984 		.cb = &rtnl_handle_getaddr,
985 		.flags = RTNL_F_ALLOW_NONVNET_JAIL,
986 	},
987 	{
988 		.cmd = NL_RTM_NEWADDR,
989 		.name = "RTM_NEWADDR",
990 		.cb = &rtnl_handle_getaddr,
991 	},
992 	{
993 		.cmd = NL_RTM_DELADDR,
994 		.name = "RTM_DELADDR",
995 		.cb = &rtnl_handle_getaddr,
996 	},
997 };
998 
999 static const struct nlhdr_parser *all_parsers[] = { &ifmsg_parser, &ifaddrmsg_parser };
1000 
1001 void
1002 rtnl_iface_add_cloner(struct nl_cloner *cloner)
1003 {
1004 	sx_xlock(&rtnl_cloner_lock);
1005 	SLIST_INSERT_HEAD(&nl_cloners, cloner, next);
1006 	sx_xunlock(&rtnl_cloner_lock);
1007 }
1008 
1009 void
1010 rtnl_iface_del_cloner(struct nl_cloner *cloner)
1011 {
1012 	sx_xlock(&rtnl_cloner_lock);
1013 	SLIST_REMOVE(&nl_cloners, cloner, nl_cloner, next);
1014 	sx_xunlock(&rtnl_cloner_lock);
1015 }
1016 
1017 void
1018 rtnl_ifaces_init(void)
1019 {
1020 	ifattach_event = EVENTHANDLER_REGISTER(
1021 	    ifnet_arrival_event, rtnl_handle_ifattach, NULL,
1022 	    EVENTHANDLER_PRI_ANY);
1023 	ifdetach_event = EVENTHANDLER_REGISTER(
1024 	    ifnet_departure_event, rtnl_handle_ifdetach, NULL,
1025 	    EVENTHANDLER_PRI_ANY);
1026 	ifaddr_event = EVENTHANDLER_REGISTER(
1027 	    rt_addrmsg, rtnl_handle_ifaddr, NULL,
1028 	    EVENTHANDLER_PRI_ANY);
1029 	iflink_event = EVENTHANDLER_REGISTER(
1030 	    ifnet_link_event, rtnl_handle_iflink, NULL,
1031 	    EVENTHANDLER_PRI_ANY);
1032 	NL_VERIFY_PARSERS(all_parsers);
1033 	rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
1034 }
1035 
1036 void
1037 rtnl_ifaces_destroy(void)
1038 {
1039 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifattach_event);
1040 	EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_event);
1041 	EVENTHANDLER_DEREGISTER(rt_addrmsg, ifaddr_event);
1042 	EVENTHANDLER_DEREGISTER(ifnet_link_event, iflink_event);
1043 }
1044