xref: /freebsd/sys/netlink/route/iface.c (revision fa4d25f5b4573a54eebeb7f254b52153b8d3811e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet.h"
31 #include "opt_inet6.h"
32 #include <sys/types.h>
33 #include <sys/eventhandler.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/socket.h>
37 #include <sys/sockio.h>
38 #include <sys/syslog.h>
39 
40 #include <net/if.h>
41 #include <net/if_dl.h>
42 #include <net/if_media.h>
43 #include <net/if_var.h>
44 #include <net/if_clone.h>
45 #include <net/route.h>
46 #include <net/route/nhop.h>
47 #include <net/route/route_ctl.h>
48 #include <netlink/netlink.h>
49 #include <netlink/netlink_ctl.h>
50 #include <netlink/netlink_route.h>
51 #include <netlink/route/route_var.h>
52 
53 #include <netinet6/scope6_var.h> /* scope deembedding */
54 
55 #define	DEBUG_MOD_NAME	nl_iface
56 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
57 #include <netlink/netlink_debug.h>
58 _DECLARE_DEBUG(LOG_DEBUG);
59 
60 struct netlink_walkargs {
61 	struct nl_writer *nw;
62 	struct nlmsghdr hdr;
63 	struct nlpcb *so;
64 	uint32_t fibnum;
65 	int family;
66 	int error;
67 	int count;
68 	int dumped;
69 };
70 
71 static eventhandler_tag ifdetach_event, ifattach_event, iflink_event, ifaddr_event;
72 
73 static SLIST_HEAD(, nl_cloner) nl_cloners = SLIST_HEAD_INITIALIZER(nl_cloners);
74 
75 static struct sx rtnl_cloner_lock;
76 SX_SYSINIT(rtnl_cloner_lock, &rtnl_cloner_lock, "rtnl cloner lock");
77 
78 /*
79  * RTM_GETLINK request
80  * sendto(3, {{len=32, type=RTM_GETLINK, flags=NLM_F_REQUEST|NLM_F_DUMP, seq=1641940952, pid=0},
81  *  {ifi_family=AF_INET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}, 32, 0, NULL, 0) = 32
82  *
83  * Reply:
84  * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_ETHER, ifi_index=if_nametoindex("enp0s31f6"), ifi_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST|IFF_LOWER_UP, ifi_change=0},
85 {{nla_len=10, nla_type=IFLA_ADDRESS}, "\xfe\x54\x00\x52\x3e\x90"}
86 
87 [
88 {{nla_len=14, nla_type=IFLA_IFNAME}, "enp0s31f6"},
89 {{nla_len=8, nla_type=IFLA_TXQLEN}, 1000},
90 {{nla_len=5, nla_type=IFLA_OPERSTATE}, 6},
91 {{nla_len=5, nla_type=IFLA_LINKMODE}, 0},
92 {{nla_len=8, nla_type=IFLA_MTU}, 1500},
93 {{nla_len=8, nla_type=IFLA_MIN_MTU}, 68},
94  {{nla_len=8, nla_type=IFLA_MAX_MTU}, 9000},
95 {{nla_len=8, nla_type=IFLA_GROUP}, 0},
96 {{nla_len=8, nla_type=IFLA_PROMISCUITY}, 0},
97 {{nla_len=8, nla_type=IFLA_NUM_TX_QUEUES}, 1},
98 {{nla_len=8, nla_type=IFLA_GSO_MAX_SEGS}, 65535},
99 {{nla_len=8, nla_type=IFLA_GSO_MAX_SIZE}, 65536},
100 {{nla_len=8, nla_type=IFLA_NUM_RX_QUEUES}, 1},
101 {{nla_len=5, nla_type=IFLA_CARRIER}, 1},
102 {{nla_len=13, nla_type=IFLA_QDISC}, "fq_codel"},
103 {{nla_len=8, nla_type=IFLA_CARRIER_CHANGES}, 2},
104 {{nla_len=5, nla_type=IFLA_PROTO_DOWN}, 0},
105 {{nla_len=8, nla_type=IFLA_CARRIER_UP_COUNT}, 1},
106 {{nla_len=8, nla_type=IFLA_CARRIER_DOWN_COUNT}, 1},
107  */
108 
109 struct if_state {
110 	uint8_t		ifla_operstate;
111 	uint8_t		ifla_carrier;
112 };
113 
114 static void
115 get_operstate_ether(struct ifnet *ifp, struct if_state *pstate)
116 {
117 	struct ifmediareq ifmr = {};
118 	int error;
119 	error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (void *)&ifmr);
120 
121 	if (error != 0) {
122 		NL_LOG(LOG_DEBUG, "error calling SIOCGIFMEDIA on %s: %d",
123 		    if_name(ifp), error);
124 		return;
125 	}
126 
127 	switch (IFM_TYPE(ifmr.ifm_active)) {
128 	case IFM_ETHER:
129 		if (ifmr.ifm_status & IFM_ACTIVE) {
130 			pstate->ifla_carrier = 1;
131 			if (ifp->if_flags & IFF_MONITOR)
132 				pstate->ifla_operstate = IF_OPER_DORMANT;
133 			else
134 				pstate->ifla_operstate = IF_OPER_UP;
135 		} else
136 			pstate->ifla_operstate = IF_OPER_DOWN;
137 	}
138 }
139 
140 static bool
141 get_stats(struct nl_writer *nw, struct ifnet *ifp)
142 {
143 	struct rtnl_link_stats64 *stats;
144 
145 	int nla_len = sizeof(struct nlattr) + sizeof(*stats);
146 	struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
147 	if (nla == NULL)
148 		return (false);
149 	nla->nla_type = IFLA_STATS64;
150 	nla->nla_len = nla_len;
151 	stats = (struct rtnl_link_stats64 *)(nla + 1);
152 
153 	stats->rx_packets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
154 	stats->tx_packets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
155 	stats->rx_bytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
156 	stats->tx_bytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
157 	stats->rx_errors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
158 	stats->tx_errors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
159 	stats->rx_dropped = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
160 	stats->tx_dropped = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
161 	stats->multicast = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
162 	stats->rx_nohandler = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
163 
164 	return (true);
165 }
166 
167 static void
168 get_operstate(struct ifnet *ifp, struct if_state *pstate)
169 {
170 	pstate->ifla_operstate = IF_OPER_UNKNOWN;
171 	pstate->ifla_carrier = 0; /* no carrier */
172 
173 	switch (ifp->if_type) {
174 	case IFT_ETHER:
175 		get_operstate_ether(ifp, pstate);
176 		break;
177 	case IFT_LOOP:
178 		if (ifp->if_flags & IFF_UP) {
179 			pstate->ifla_operstate = IF_OPER_UP;
180 			pstate->ifla_carrier = 1;
181 		} else
182 			pstate->ifla_operstate = IF_OPER_DOWN;
183 		break;
184 	}
185 }
186 
187 static unsigned
188 ifp_flags_to_netlink(const struct ifnet *ifp)
189 {
190         return (ifp->if_flags | ifp->if_drv_flags);
191 }
192 
193 #define LLADDR_CONST(s) ((const void *)((s)->sdl_data + (s)->sdl_nlen))
194 static bool
195 dump_sa(struct nl_writer *nw, int attr, const struct sockaddr *sa)
196 {
197         uint32_t addr_len = 0;
198         const void *addr_data = NULL;
199 #ifdef INET6
200         struct in6_addr addr6;
201 #endif
202 
203         if (sa == NULL)
204                 return (true);
205 
206         switch (sa->sa_family) {
207 #ifdef INET
208         case AF_INET:
209                 addr_len = sizeof(struct in_addr);
210                 addr_data = &((const struct sockaddr_in *)sa)->sin_addr;
211                 break;
212 #endif
213 #ifdef INET6
214         case AF_INET6:
215                 in6_splitscope(&((const struct sockaddr_in6 *)sa)->sin6_addr, &addr6, &addr_len);
216                 addr_len = sizeof(struct in6_addr);
217                 addr_data = &addr6;
218                 break;
219 #endif
220         case AF_LINK:
221                 addr_len = ((const struct sockaddr_dl *)sa)->sdl_alen;
222                 addr_data = LLADDR_CONST((const struct sockaddr_dl *)sa);
223                 break;
224         default:
225                 NL_LOG(LOG_DEBUG, "unsupported family: %d, skipping", sa->sa_family);
226                 return (true);
227         }
228 
229         return (nlattr_add(nw, attr, addr_len, addr_data));
230 }
231 
232 /*
233  * Dumps interface state, properties and metrics.
234  * @nw: message writer
235  * @ifp: target interface
236  * @hdr: template header
237  * @if_flags_mask: changed if_[drv]_flags bitmask
238  *
239  * This function is called without epoch and MAY sleep.
240  */
241 static bool
242 dump_iface(struct nl_writer *nw, struct ifnet *ifp, const struct nlmsghdr *hdr,
243     int if_flags_mask)
244 {
245         struct ifinfomsg *ifinfo;
246 
247         NL_LOG(LOG_DEBUG3, "dumping interface %s data", if_name(ifp));
248 
249 	if (!nlmsg_reply(nw, hdr, sizeof(struct ifinfomsg)))
250 		goto enomem;
251 
252         ifinfo = nlmsg_reserve_object(nw, struct ifinfomsg);
253         ifinfo->ifi_family = AF_UNSPEC;
254         ifinfo->__ifi_pad = 0;
255         ifinfo->ifi_type = ifp->if_type;
256         ifinfo->ifi_index = ifp->if_index;
257         ifinfo->ifi_flags = ifp_flags_to_netlink(ifp);
258         ifinfo->ifi_change = if_flags_mask;
259 
260 	struct if_state ifs = {};
261 	get_operstate(ifp, &ifs);
262 
263 	if (ifs.ifla_operstate == IF_OPER_UP)
264 		ifinfo->ifi_flags |= IFF_LOWER_UP;
265 
266         nlattr_add_string(nw, IFLA_IFNAME, if_name(ifp));
267         nlattr_add_u8(nw, IFLA_OPERSTATE, ifs.ifla_operstate);
268         nlattr_add_u8(nw, IFLA_CARRIER, ifs.ifla_carrier);
269 
270 /*
271         nlattr_add_u8(nw, IFLA_PROTO_DOWN, val);
272         nlattr_add_u8(nw, IFLA_LINKMODE, val);
273 */
274         if ((ifp->if_addr != NULL)) {
275                 dump_sa(nw, IFLA_ADDRESS, ifp->if_addr->ifa_addr);
276         }
277 
278         if ((ifp->if_broadcastaddr != NULL)) {
279 		nlattr_add(nw, IFLA_BROADCAST, ifp->if_addrlen,
280 		    ifp->if_broadcastaddr);
281         }
282 
283         nlattr_add_u32(nw, IFLA_MTU, ifp->if_mtu);
284 /*
285         nlattr_add_u32(nw, IFLA_MIN_MTU, 60);
286         nlattr_add_u32(nw, IFLA_MAX_MTU, 9000);
287         nlattr_add_u32(nw, IFLA_GROUP, 0);
288 */
289 	get_stats(nw, ifp);
290 
291 	uint32_t val = (ifp->if_flags & IFF_PROMISC) != 0;
292         nlattr_add_u32(nw, IFLA_PROMISCUITY, val);
293 
294         if (nlmsg_end(nw))
295 		return (true);
296 
297 enomem:
298         NL_LOG(LOG_DEBUG, "unable to dump interface %s state (ENOMEM)", if_name(ifp));
299         nlmsg_abort(nw);
300         return (false);
301 }
302 
303 static bool
304 check_ifmsg(void *hdr, struct nl_pstate *npt)
305 {
306 	struct ifinfomsg *ifm = hdr;
307 
308 	if (ifm->__ifi_pad != 0 || ifm->ifi_type != 0 ||
309 	    ifm->ifi_flags != 0 || ifm->ifi_change != 0) {
310 		nlmsg_report_err_msg(npt,
311 		    "strict checking: non-zero values in ifinfomsg header");
312 		return (false);
313 	}
314 
315 	return (true);
316 }
317 
318 #define	_IN(_field)	offsetof(struct ifinfomsg, _field)
319 #define	_OUT(_field)	offsetof(struct nl_parsed_link, _field)
320 static const struct nlfield_parser nlf_p_if[] = {
321 	{ .off_in = _IN(ifi_type), .off_out = _OUT(ifi_type), .cb = nlf_get_u16 },
322 	{ .off_in = _IN(ifi_index), .off_out = _OUT(ifi_index), .cb = nlf_get_u32 },
323 };
324 
325 static const struct nlattr_parser nla_p_linfo[] = {
326 	{ .type = IFLA_INFO_KIND, .off = _OUT(ifla_cloner), .cb = nlattr_get_stringn },
327 	{ .type = IFLA_INFO_DATA, .off = _OUT(ifla_idata), .cb = nlattr_get_nla },
328 };
329 NL_DECLARE_ATTR_PARSER(linfo_parser, nla_p_linfo);
330 
331 static const struct nlattr_parser nla_p_if[] = {
332 	{ .type = IFLA_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
333 	{ .type = IFLA_MTU, .off = _OUT(ifla_mtu), .cb = nlattr_get_uint32 },
334 	{ .type = IFLA_LINK, .off = _OUT(ifi_index), .cb = nlattr_get_uint32 },
335 	{ .type = IFLA_LINKINFO, .arg = &linfo_parser, .cb = nlattr_get_nested },
336 	{ .type = IFLA_GROUP, .off = _OUT(ifla_group), .cb = nlattr_get_string },
337 	{ .type = IFLA_ALT_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
338 };
339 #undef _IN
340 #undef _OUT
341 NL_DECLARE_STRICT_PARSER(ifmsg_parser, struct ifinfomsg, check_ifmsg, nlf_p_if, nla_p_if);
342 
343 static bool
344 match_iface(struct nl_parsed_link *attrs, struct ifnet *ifp)
345 {
346 	if (attrs->ifi_index != 0 && attrs->ifi_index != ifp->if_index)
347 		return (false);
348 	if (attrs->ifi_type != 0 && attrs->ifi_index != ifp->if_type)
349 		return (false);
350 	if (attrs->ifla_ifname != NULL && strcmp(attrs->ifla_ifname, if_name(ifp)))
351 		return (false);
352 	/* TODO: add group match */
353 
354 	return (true);
355 }
356 
357 /*
358  * {nlmsg_len=52, nlmsg_type=RTM_GETLINK, nlmsg_flags=NLM_F_REQUEST, nlmsg_seq=1662842818, nlmsg_pid=0},
359  *  {ifi_family=AF_PACKET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
360  *   [
361  *    [{nla_len=10, nla_type=IFLA_IFNAME}, "vnet9"],
362  *    [{nla_len=8, nla_type=IFLA_EXT_MASK}, RTEXT_FILTER_VF]
363  *   ]
364  */
365 static int
366 rtnl_handle_getlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
367 {
368 	struct epoch_tracker et;
369         struct ifnet *ifp;
370 	int error = 0;
371 
372 	struct nl_parsed_link attrs = {};
373 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
374 	if (error != 0)
375 		return (error);
376 
377 	struct netlink_walkargs wa = {
378 		.so = nlp,
379 		.nw = npt->nw,
380 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
381 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
382 		.hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
383 		.hdr.nlmsg_type = NL_RTM_NEWLINK,
384 	};
385 
386 	/* Fast track for an interface w/ explicit index match */
387 	if (attrs.ifi_index != 0) {
388 		NET_EPOCH_ENTER(et);
389 		ifp = ifnet_byindex_ref(attrs.ifi_index);
390 		NET_EPOCH_EXIT(et);
391 		NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching index %u", attrs.ifi_index);
392 		if (ifp != NULL) {
393 			if (match_iface(&attrs, ifp)) {
394 				if (!dump_iface(wa.nw, ifp, &wa.hdr, 0))
395 					error = ENOMEM;
396 			} else
397 				error = ESRCH;
398 			if_rele(ifp);
399 		} else
400 			error = ESRCH;
401 		return (error);
402 	}
403 
404 	/*
405 	 * Fetching some link properties require performing ioctl's that may be blocking.
406 	 * Address it by saving referenced pointers of the matching links,
407 	 * exiting from epoch and going through the list one-by-one.
408 	 */
409 
410 	NL_LOG(LOG_DEBUG2, "Start dump");
411 
412 	struct ifnet **match_array;
413 	int offset = 0, base_count = 16; /* start with 128 bytes */
414 	match_array = malloc(base_count * sizeof(void *), M_TEMP, M_NOWAIT);
415 
416 	NLP_LOG(LOG_DEBUG3, nlp, "MATCHING: index=%u type=%d name=%s",
417 	    attrs.ifi_index, attrs.ifi_type, attrs.ifla_ifname);
418 	NET_EPOCH_ENTER(et);
419         CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
420 		wa.count++;
421 		if (match_iface(&attrs, ifp)) {
422 			if (offset < base_count) {
423 				if (!if_try_ref(ifp))
424 					continue;
425 				match_array[offset++] = ifp;
426 				continue;
427 			}
428 			/* Too many matches, need to reallocate */
429 			struct ifnet **new_array;
430 			int sz = base_count * sizeof(void *);
431 			base_count *= 2;
432 			new_array = malloc(sz * 2, M_TEMP, M_NOWAIT);
433 			if (new_array == NULL) {
434 				error = ENOMEM;
435 				break;
436 			}
437 			memcpy(new_array, match_array, sz);
438 			free(match_array, M_TEMP);
439 			match_array = new_array;
440                 }
441         }
442 	NET_EPOCH_EXIT(et);
443 
444 	NL_LOG(LOG_DEBUG2, "Matched %d interface(s), dumping", offset);
445 	for (int i = 0; error == 0 && i < offset; i++) {
446 		if (!dump_iface(wa.nw, match_array[i], &wa.hdr, 0))
447 			error = ENOMEM;
448 	}
449 	for (int i = 0; i < offset; i++)
450 		if_rele(match_array[i]);
451 	free(match_array, M_TEMP);
452 
453 	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
454 
455 	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
456                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
457                 return (ENOMEM);
458         }
459 
460 	return (error);
461 }
462 
463 /*
464  * sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[
465  * {nlmsg_len=60, nlmsg_type=RTM_NEWLINK, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1662715618, nlmsg_pid=0},
466  *  {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
467  *   {nla_len=11, nla_type=IFLA_IFNAME}, "dummy0"],
468  *   [
469  *    {nla_len=16, nla_type=IFLA_LINKINFO},
470  *     [
471  *      {nla_len=9, nla_type=IFLA_INFO_KIND}, "dummy"...
472  *     ]
473  *    ]
474  */
475 
476 static int
477 rtnl_handle_dellink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
478 {
479 	struct epoch_tracker et;
480         struct ifnet *ifp;
481 	int error;
482 
483 	struct nl_parsed_link attrs = {};
484 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
485 	if (error != 0)
486 		return (error);
487 
488 	NET_EPOCH_ENTER(et);
489 	ifp = ifnet_byindex_ref(attrs.ifi_index);
490 	NET_EPOCH_EXIT(et);
491 	if (ifp == NULL) {
492 		NLP_LOG(LOG_DEBUG, nlp, "unable to find interface %u", attrs.ifi_index);
493 		return (ENOENT);
494 	}
495 	NLP_LOG(LOG_DEBUG3, nlp, "mapped ifindex %u to %s", attrs.ifi_index, if_name(ifp));
496 
497 	sx_xlock(&ifnet_detach_sxlock);
498 	error = if_clone_destroy(if_name(ifp));
499 	sx_xunlock(&ifnet_detach_sxlock);
500 
501 	NLP_LOG(LOG_DEBUG2, nlp, "deleting interface %s returned %d", if_name(ifp), error);
502 
503 	if_rele(ifp);
504 	return (error);
505 }
506 
507 static int
508 rtnl_handle_newlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
509 {
510 	struct nl_cloner *cloner;
511 	int error;
512 
513 	struct nl_parsed_link attrs = {};
514 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
515 	if (error != 0)
516 		return (error);
517 
518 	if (attrs.ifla_ifname == NULL || strlen(attrs.ifla_ifname) == 0) {
519 		/* Applications like ip(8) verify RTM_NEWLINK existance
520 		 * by calling it with empty arguments. Always return "innocent"
521 		 * error.
522 		 */
523 		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_IFNAME attribute");
524 		return (EPERM);
525 	}
526 
527 	if (attrs.ifla_cloner == NULL || strlen(attrs.ifla_cloner) == 0) {
528 		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_INFO_KIND attribute");
529 		return (EINVAL);
530 	}
531 
532 	sx_slock(&rtnl_cloner_lock);
533 	SLIST_FOREACH(cloner, &nl_cloners, next) {
534 		if (!strcmp(attrs.ifla_cloner, cloner->name)) {
535 			error = cloner->create_f(&attrs, nlp, npt);
536 			sx_sunlock(&rtnl_cloner_lock);
537 			return (error);
538 		}
539 	}
540 	sx_sunlock(&rtnl_cloner_lock);
541 
542 	/* TODO: load cloner module if not exists & privilege permits */
543 	NLMSG_REPORT_ERR_MSG(npt, "interface type %s not supported", attrs.ifla_cloner);
544 	return (ENOTSUP);
545 
546 	return (error);
547 }
548 
549 /*
550 
551 {ifa_family=AF_INET, ifa_prefixlen=8, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_HOST, ifa_index=if_nametoindex("lo")},
552  [
553         {{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("127.0.0.1")},
554         {{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("127.0.0.1")},
555         {{nla_len=7, nla_type=IFA_LABEL}, "lo"},
556         {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT},
557         {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=3619, tstamp=3619}}]},
558 ---
559 
560 {{len=72, type=RTM_NEWADDR, flags=NLM_F_MULTI, seq=1642191126, pid=566735},
561  {ifa_family=AF_INET6, ifa_prefixlen=96, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_UNIVERSE, ifa_index=if_nametoindex("virbr0")},
562    [
563     {{nla_len=20, nla_type=IFA_ADDRESS}, inet_pton(AF_INET6, "2a01:4f8:13a:70c:ffff::1")},
564    {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=4283, tstamp=4283}},
565    {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}]},
566 */
567 
568 static uint8_t
569 ifa_get_scope(const struct ifaddr *ifa)
570 {
571         const struct sockaddr *sa;
572         uint8_t addr_scope = RT_SCOPE_UNIVERSE;
573 
574         sa = ifa->ifa_addr;
575         switch (sa->sa_family) {
576 #ifdef INET
577         case AF_INET:
578                 {
579                         struct in_addr addr;
580                         addr = ((const struct sockaddr_in *)sa)->sin_addr;
581                         if (IN_LOOPBACK(addr.s_addr))
582                                 addr_scope = RT_SCOPE_HOST;
583                         else if (IN_LINKLOCAL(addr.s_addr))
584                                 addr_scope = RT_SCOPE_LINK;
585                         break;
586                 }
587 #endif
588 #ifdef INET6
589         case AF_INET6:
590                 {
591                         const struct in6_addr *addr;
592                         addr = &((const struct sockaddr_in6 *)sa)->sin6_addr;
593                         if (IN6_IS_ADDR_LOOPBACK(addr))
594                                 addr_scope = RT_SCOPE_HOST;
595                         else if (IN6_IS_ADDR_LINKLOCAL(addr))
596                                 addr_scope = RT_SCOPE_LINK;
597                         break;
598                 }
599 #endif
600         }
601 
602         return (addr_scope);
603 }
604 
605 static uint8_t
606 inet6_get_plen(const struct in6_addr *addr)
607 {
608 
609 	return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) +
610 	    bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3]));
611 }
612 
613 static uint8_t
614 get_sa_plen(const struct sockaddr *sa)
615 {
616 #ifdef INET
617         const struct in_addr *paddr;
618 #endif
619 #ifdef INET6
620         const struct in6_addr *paddr6;
621 #endif
622 
623         switch (sa->sa_family) {
624 #ifdef INET
625         case AF_INET:
626                 if (sa == NULL)
627                         return (32);
628                 paddr = &(((const struct sockaddr_in *)sa)->sin_addr);
629                 return bitcount32(paddr->s_addr);;
630 #endif
631 #ifdef INET6
632         case AF_INET6:
633                 if (sa == NULL)
634                         return (128);
635                 paddr6 = &(((const struct sockaddr_in6 *)sa)->sin6_addr);
636                 return inet6_get_plen(paddr6);
637 #endif
638         }
639 
640         return (0);
641 }
642 
643 
644 /*
645  * {'attrs': [('IFA_ADDRESS', '12.0.0.1'),
646            ('IFA_LOCAL', '12.0.0.1'),
647            ('IFA_LABEL', 'eth10'),
648            ('IFA_FLAGS', 128),
649            ('IFA_CACHEINFO', {'ifa_preferred': 4294967295, 'ifa_valid': 4294967295, 'cstamp': 63745746, 'tstamp': 63745746})],
650  */
651 static bool
652 dump_iface_addr(struct nl_writer *nw, struct ifnet *ifp, struct ifaddr *ifa,
653     const struct nlmsghdr *hdr)
654 {
655         struct ifaddrmsg *ifamsg;
656         struct sockaddr *sa = ifa->ifa_addr;
657 
658         NL_LOG(LOG_DEBUG3, "dumping ifa %p type %s(%d) for interface %s",
659             ifa, rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
660 
661 	if (!nlmsg_reply(nw, hdr, sizeof(struct ifaddrmsg)))
662 		goto enomem;
663 
664         ifamsg = nlmsg_reserve_object(nw, struct ifaddrmsg);
665         ifamsg->ifa_family = sa->sa_family;
666         ifamsg->ifa_prefixlen = get_sa_plen(ifa->ifa_netmask);
667         ifamsg->ifa_flags = 0; // ifa_flags is useless
668         ifamsg->ifa_scope = ifa_get_scope(ifa);
669         ifamsg->ifa_index = ifp->if_index;
670 
671         struct sockaddr *dst_sa = ifa->ifa_dstaddr;
672         if ((dst_sa == NULL) || (dst_sa->sa_family != sa->sa_family))
673                 dst_sa = sa;
674         dump_sa(nw, IFA_ADDRESS, dst_sa);
675         dump_sa(nw, IFA_LOCAL, sa);
676         nlattr_add_string(nw, IFA_LABEL, if_name(ifp));
677 
678         uint32_t val = 0; // ifa->ifa_flags;
679         nlattr_add_u32(nw, IFA_FLAGS, val);
680 
681 	if (nlmsg_end(nw))
682 		return (true);
683 enomem:
684         NL_LOG(LOG_DEBUG, "Failed to dump ifa type %s(%d) for interface %s",
685             rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
686         nlmsg_abort(nw);
687         return (false);
688 }
689 
690 static int
691 rtnl_handle_getaddr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
692 {
693         struct ifaddr *ifa;
694         struct ifnet *ifp;
695 	int error = 0;
696 
697 	struct netlink_walkargs wa = {
698 		.so = nlp,
699 		.nw = npt->nw,
700 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
701 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
702 		.hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
703 		.hdr.nlmsg_type = NL_RTM_NEWADDR,
704 	};
705 
706 	NL_LOG(LOG_DEBUG2, "Start dump");
707 
708         CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
709                 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
710                         if (wa.family != 0 && wa.family != ifa->ifa_addr->sa_family)
711                                 continue;
712                         if (ifa->ifa_addr->sa_family == AF_LINK)
713                                 continue;
714 			wa.count++;
715                         if (!dump_iface_addr(wa.nw, ifp, ifa, &wa.hdr)) {
716                                 error = ENOMEM;
717                                 break;
718                         }
719 			wa.dumped++;
720                 }
721                 if (error != 0)
722                         break;
723         }
724 
725 	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
726 
727 	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
728                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
729                 return (ENOMEM);
730         }
731 
732 	return (error);
733 }
734 
735 static void
736 rtnl_handle_ifaddr(void *arg __unused, struct ifaddr *ifa, int cmd)
737 {
738 	struct nlmsghdr hdr = {};
739 	struct nl_writer nw = {};
740 	uint32_t group = 0;
741 
742 	switch (ifa->ifa_addr->sa_family) {
743 #ifdef INET
744 	case AF_INET:
745 		group = RTNLGRP_IPV4_IFADDR;
746 		break;
747 #endif
748 #ifdef INET6
749 	case AF_INET6:
750 		group = RTNLGRP_IPV6_IFADDR;
751 		break;
752 #endif
753 	default:
754 		NL_LOG(LOG_DEBUG2, "ifa notification for unknown AF: %d",
755 		    ifa->ifa_addr->sa_family);
756 		return;
757 	}
758 
759 	if (!nl_has_listeners(NETLINK_ROUTE, group))
760 		return;
761 
762 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, group)) {
763 		NL_LOG(LOG_DEBUG, "error allocating group writer");
764 		return;
765 	}
766 
767 	hdr.nlmsg_type = (cmd == RTM_DELETE) ? NL_RTM_DELADDR : NL_RTM_NEWADDR;
768 
769 	dump_iface_addr(&nw, ifa->ifa_ifp, ifa, &hdr);
770 	nlmsg_flush(&nw);
771 }
772 
773 static void
774 rtnl_handle_ifevent(struct ifnet *ifp, int nlmsg_type, int if_flags_mask)
775 {
776 	struct nlmsghdr hdr = { .nlmsg_type = nlmsg_type };
777 	struct nl_writer nw = {};
778 
779 	if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK))
780 		return;
781 
782 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK)) {
783 		NL_LOG(LOG_DEBUG, "error allocating mbuf");
784 		return;
785 	}
786 	dump_iface(&nw, ifp, &hdr, if_flags_mask);
787         nlmsg_flush(&nw);
788 }
789 
790 static void
791 rtnl_handle_ifattach(void *arg, struct ifnet *ifp)
792 {
793 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
794 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0);
795 }
796 
797 static void
798 rtnl_handle_ifdetach(void *arg, struct ifnet *ifp)
799 {
800 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
801 	rtnl_handle_ifevent(ifp, NL_RTM_DELLINK, 0);
802 }
803 
804 static void
805 rtnl_handle_iflink(void *arg, struct ifnet *ifp)
806 {
807 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
808 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0);
809 }
810 
811 void
812 rtnl_handle_ifnet_event(struct ifnet *ifp, int if_flags_mask)
813 {
814 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
815 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, if_flags_mask);
816 }
817 
818 static const struct rtnl_cmd_handler cmd_handlers[] = {
819 	{
820 		.cmd = NL_RTM_GETLINK,
821 		.name = "RTM_GETLINK",
822 		.cb = &rtnl_handle_getlink,
823 		.flags = RTNL_F_NOEPOCH,
824 	},
825 	{
826 		.cmd = NL_RTM_DELLINK,
827 		.name = "RTM_DELLINK",
828 		.cb = &rtnl_handle_dellink,
829 		.priv = PRIV_NET_IFDESTROY,
830 		.flags = RTNL_F_NOEPOCH,
831 	},
832 	{
833 		.cmd = NL_RTM_NEWLINK,
834 		.name = "RTM_NEWLINK",
835 		.cb = &rtnl_handle_newlink,
836 		.priv = PRIV_NET_IFCREATE,
837 		.flags = RTNL_F_NOEPOCH,
838 	},
839 	{
840 		.cmd = NL_RTM_GETADDR,
841 		.name = "RTM_GETADDR",
842 		.cb = &rtnl_handle_getaddr,
843 	},
844 	{
845 		.cmd = NL_RTM_NEWADDR,
846 		.name = "RTM_NEWADDR",
847 		.cb = &rtnl_handle_getaddr,
848 	},
849 	{
850 		.cmd = NL_RTM_DELADDR,
851 		.name = "RTM_DELADDR",
852 		.cb = &rtnl_handle_getaddr,
853 	},
854 };
855 
856 static const struct nlhdr_parser *all_parsers[] = { &ifmsg_parser };
857 
858 void
859 rtnl_iface_add_cloner(struct nl_cloner *cloner)
860 {
861 	sx_xlock(&rtnl_cloner_lock);
862 	SLIST_INSERT_HEAD(&nl_cloners, cloner, next);
863 	sx_xunlock(&rtnl_cloner_lock);
864 }
865 
866 void rtnl_iface_del_cloner(struct nl_cloner *cloner)
867 {
868 	sx_xlock(&rtnl_cloner_lock);
869 	SLIST_REMOVE(&nl_cloners, cloner, nl_cloner, next);
870 	sx_xunlock(&rtnl_cloner_lock);
871 }
872 
873 void
874 rtnl_ifaces_init(void)
875 {
876 	ifattach_event = EVENTHANDLER_REGISTER(
877 	    ifnet_arrival_event, rtnl_handle_ifattach, NULL,
878 	    EVENTHANDLER_PRI_ANY);
879 	ifdetach_event = EVENTHANDLER_REGISTER(
880 	    ifnet_departure_event, rtnl_handle_ifdetach, NULL,
881 	    EVENTHANDLER_PRI_ANY);
882 	ifaddr_event = EVENTHANDLER_REGISTER(
883 	    rt_addrmsg, rtnl_handle_ifaddr, NULL,
884 	    EVENTHANDLER_PRI_ANY);
885 	iflink_event = EVENTHANDLER_REGISTER(
886 	    ifnet_link_event, rtnl_handle_iflink, NULL,
887 	    EVENTHANDLER_PRI_ANY);
888 	NL_VERIFY_PARSERS(all_parsers);
889 	rtnl_iface_drivers_register();
890 	rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
891 }
892 
893 void
894 rtnl_ifaces_destroy(void)
895 {
896 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifattach_event);
897 	EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_event);
898 	EVENTHANDLER_DEREGISTER(rt_addrmsg, ifaddr_event);
899 	EVENTHANDLER_DEREGISTER(ifnet_link_event, iflink_event);
900 }
901