xref: /freebsd/sys/netlink/route/iface.c (revision 38a52bd3b5cac3da6f7f6eef3dd050e6aa08ebb3)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet.h"
31 #include "opt_inet6.h"
32 #include <sys/types.h>
33 #include <sys/eventhandler.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/socket.h>
37 #include <sys/sockio.h>
38 #include <sys/syslog.h>
39 
40 #include <net/if.h>
41 #include <net/if_dl.h>
42 #include <net/if_media.h>
43 #include <net/if_var.h>
44 #include <net/if_clone.h>
45 #include <net/route.h>
46 #include <net/route/nhop.h>
47 #include <net/route/route_ctl.h>
48 #include <netlink/netlink.h>
49 #include <netlink/netlink_ctl.h>
50 #include <netlink/netlink_route.h>
51 #include <netlink/route/route_var.h>
52 
53 #include <netinet6/scope6_var.h> /* scope deembedding */
54 
55 #define	DEBUG_MOD_NAME	nl_iface
56 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
57 #include <netlink/netlink_debug.h>
58 _DECLARE_DEBUG(LOG_DEBUG);
59 
60 struct netlink_walkargs {
61 	struct nl_writer *nw;
62 	struct nlmsghdr hdr;
63 	struct nlpcb *so;
64 	uint32_t fibnum;
65 	int family;
66 	int error;
67 	int count;
68 	int dumped;
69 };
70 
71 static eventhandler_tag ifdetach_event, ifattach_event, ifaddr_event;
72 
73 static SLIST_HEAD(, nl_cloner) nl_cloners = SLIST_HEAD_INITIALIZER(nl_cloners);
74 
75 static struct sx rtnl_cloner_lock;
76 SX_SYSINIT(rtnl_cloner_lock, &rtnl_cloner_lock, "rtnl cloner lock");
77 
78 /*
79  * RTM_GETLINK request
80  * sendto(3, {{len=32, type=RTM_GETLINK, flags=NLM_F_REQUEST|NLM_F_DUMP, seq=1641940952, pid=0},
81  *  {ifi_family=AF_INET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}, 32, 0, NULL, 0) = 32
82  *
83  * Reply:
84  * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_ETHER, ifi_index=if_nametoindex("enp0s31f6"), ifi_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST|IFF_LOWER_UP, ifi_change=0},
85 {{nla_len=10, nla_type=IFLA_ADDRESS}, "\xfe\x54\x00\x52\x3e\x90"}
86 
87 [
88 {{nla_len=14, nla_type=IFLA_IFNAME}, "enp0s31f6"},
89 {{nla_len=8, nla_type=IFLA_TXQLEN}, 1000},
90 {{nla_len=5, nla_type=IFLA_OPERSTATE}, 6},
91 {{nla_len=5, nla_type=IFLA_LINKMODE}, 0},
92 {{nla_len=8, nla_type=IFLA_MTU}, 1500},
93 {{nla_len=8, nla_type=IFLA_MIN_MTU}, 68},
94  {{nla_len=8, nla_type=IFLA_MAX_MTU}, 9000},
95 {{nla_len=8, nla_type=IFLA_GROUP}, 0},
96 {{nla_len=8, nla_type=IFLA_PROMISCUITY}, 0},
97 {{nla_len=8, nla_type=IFLA_NUM_TX_QUEUES}, 1},
98 {{nla_len=8, nla_type=IFLA_GSO_MAX_SEGS}, 65535},
99 {{nla_len=8, nla_type=IFLA_GSO_MAX_SIZE}, 65536},
100 {{nla_len=8, nla_type=IFLA_NUM_RX_QUEUES}, 1},
101 {{nla_len=5, nla_type=IFLA_CARRIER}, 1},
102 {{nla_len=13, nla_type=IFLA_QDISC}, "fq_codel"},
103 {{nla_len=8, nla_type=IFLA_CARRIER_CHANGES}, 2},
104 {{nla_len=5, nla_type=IFLA_PROTO_DOWN}, 0},
105 {{nla_len=8, nla_type=IFLA_CARRIER_UP_COUNT}, 1},
106 {{nla_len=8, nla_type=IFLA_CARRIER_DOWN_COUNT}, 1},
107  */
108 
109 struct if_state {
110 	uint8_t		ifla_operstate;
111 	uint8_t		ifla_carrier;
112 };
113 
114 static void
115 get_operstate_ether(struct ifnet *ifp, struct if_state *pstate)
116 {
117 	struct ifmediareq ifmr = {};
118 	int error;
119 	error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (void *)&ifmr);
120 
121 	if (error != 0) {
122 		NL_LOG(LOG_DEBUG, "error calling SIOCGIFMEDIA on %s: %d",
123 		    if_name(ifp), error);
124 		return;
125 	}
126 
127 	switch (IFM_TYPE(ifmr.ifm_active)) {
128 	case IFM_ETHER:
129 		if (ifmr.ifm_status & IFM_ACTIVE) {
130 			pstate->ifla_carrier = 1;
131 			if (ifp->if_flags & IFF_MONITOR)
132 				pstate->ifla_operstate = IF_OPER_DORMANT;
133 			else
134 				pstate->ifla_operstate = IF_OPER_UP;
135 		} else
136 			pstate->ifla_operstate = IF_OPER_DOWN;
137 	}
138 }
139 
140 static bool
141 get_stats(struct nl_writer *nw, struct ifnet *ifp)
142 {
143 	struct rtnl_link_stats64 *stats;
144 
145 	int nla_len = sizeof(struct nlattr) + sizeof(*stats);
146 	struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
147 	if (nla == NULL)
148 		return (false);
149 	nla->nla_type = IFLA_STATS64;
150 	nla->nla_len = nla_len;
151 	stats = (struct rtnl_link_stats64 *)(nla + 1);
152 
153 	stats->rx_packets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
154 	stats->tx_packets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
155 	stats->rx_bytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
156 	stats->tx_bytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
157 	stats->rx_errors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
158 	stats->tx_errors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
159 	stats->rx_dropped = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
160 	stats->tx_dropped = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
161 	stats->multicast = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
162 	stats->rx_nohandler = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
163 
164 	return (true);
165 }
166 
167 static void
168 get_operstate(struct ifnet *ifp, struct if_state *pstate)
169 {
170 	pstate->ifla_operstate = IF_OPER_UNKNOWN;
171 	pstate->ifla_carrier = 0; /* no carrier */
172 
173 	switch (ifp->if_type) {
174 	case IFT_ETHER:
175 		get_operstate_ether(ifp, pstate);
176 		break;
177 	case IFT_LOOP:
178 		if (ifp->if_flags & IFF_UP) {
179 			pstate->ifla_operstate = IF_OPER_UP;
180 			pstate->ifla_carrier = 1;
181 		} else
182 			pstate->ifla_operstate = IF_OPER_DOWN;
183 		break;
184 	}
185 }
186 
187 static unsigned
188 ifp_flags_to_netlink(const struct ifnet *ifp)
189 {
190         return (ifp->if_flags | ifp->if_drv_flags);
191 }
192 
193 #define LLADDR_CONST(s) ((const void *)((s)->sdl_data + (s)->sdl_nlen))
194 static bool
195 dump_sa(struct nl_writer *nw, int attr, const struct sockaddr *sa)
196 {
197         uint32_t addr_len = 0;
198         const void *addr_data = NULL;
199 #ifdef INET6
200         struct in6_addr addr6;
201 #endif
202 
203         if (sa == NULL)
204                 return (true);
205 
206         switch (sa->sa_family) {
207 #ifdef INET
208         case AF_INET:
209                 addr_len = sizeof(struct in_addr);
210                 addr_data = &((const struct sockaddr_in *)sa)->sin_addr;
211                 break;
212 #endif
213 #ifdef INET6
214         case AF_INET6:
215                 in6_splitscope(&((const struct sockaddr_in6 *)sa)->sin6_addr, &addr6, &addr_len);
216                 addr_len = sizeof(struct in6_addr);
217                 addr_data = &addr6;
218                 break;
219 #endif
220         case AF_LINK:
221                 addr_len = ((const struct sockaddr_dl *)sa)->sdl_alen;
222                 addr_data = LLADDR_CONST((const struct sockaddr_dl *)sa);
223                 break;
224         default:
225                 NL_LOG(LOG_DEBUG, "unsupported family: %d, skipping", sa->sa_family);
226                 return (true);
227         }
228 
229         return (nlattr_add(nw, attr, addr_len, addr_data));
230 }
231 
232 /*
233  * Dumps interface state, properties and metrics.
234  * @nw: message writer
235  * @ifp: target interface
236  * @hdr: template header
237  *
238  * This function is called without epoch and MAY sleep.
239  */
240 static bool
241 dump_iface(struct nl_writer *nw, struct ifnet *ifp, const struct nlmsghdr *hdr)
242 {
243         struct ifinfomsg *ifinfo;
244 
245         NL_LOG(LOG_DEBUG3, "dumping interface %s data", if_name(ifp));
246 
247 	if (!nlmsg_reply(nw, hdr, sizeof(struct ifinfomsg)))
248 		goto enomem;
249 
250         ifinfo = nlmsg_reserve_object(nw, struct ifinfomsg);
251         ifinfo->ifi_family = AF_UNSPEC;
252         ifinfo->__ifi_pad = 0;
253         ifinfo->ifi_type = ifp->if_type;
254         ifinfo->ifi_index = ifp->if_index;
255         ifinfo->ifi_flags = ifp_flags_to_netlink(ifp);
256         ifinfo->ifi_change = 0;
257 
258         nlattr_add_string(nw, IFLA_IFNAME, if_name(ifp));
259 
260 	struct if_state ifs = {};
261 	get_operstate(ifp, &ifs);
262 
263         nlattr_add_u8(nw, IFLA_OPERSTATE, ifs.ifla_operstate);
264         nlattr_add_u8(nw, IFLA_CARRIER, ifs.ifla_carrier);
265 
266 /*
267         nlattr_add_u8(nw, IFLA_PROTO_DOWN, val);
268         nlattr_add_u8(nw, IFLA_LINKMODE, val);
269 */
270         if ((ifp->if_addr != NULL)) {
271                 dump_sa(nw, IFLA_ADDRESS, ifp->if_addr->ifa_addr);
272         }
273 
274         if ((ifp->if_broadcastaddr != NULL)) {
275 		nlattr_add(nw, IFLA_BROADCAST, ifp->if_addrlen,
276 		    ifp->if_broadcastaddr);
277         }
278 
279         nlattr_add_u32(nw, IFLA_MTU, ifp->if_mtu);
280 /*
281         nlattr_add_u32(nw, IFLA_MIN_MTU, 60);
282         nlattr_add_u32(nw, IFLA_MAX_MTU, 9000);
283         nlattr_add_u32(nw, IFLA_GROUP, 0);
284 */
285 	get_stats(nw, ifp);
286 
287 	uint32_t val = (ifp->if_flags & IFF_PROMISC) != 0;
288         nlattr_add_u32(nw, IFLA_PROMISCUITY, val);
289 
290         if (nlmsg_end(nw))
291 		return (true);
292 
293 enomem:
294         NL_LOG(LOG_DEBUG, "unable to dump interface %s state (ENOMEM)", if_name(ifp));
295         nlmsg_abort(nw);
296         return (false);
297 }
298 
299 static bool
300 check_ifmsg(void *hdr, struct nl_pstate *npt)
301 {
302 	struct ifinfomsg *ifm = hdr;
303 
304 	if (ifm->__ifi_pad != 0 || ifm->ifi_type != 0 ||
305 	    ifm->ifi_flags != 0 || ifm->ifi_change != 0) {
306 		nlmsg_report_err_msg(npt,
307 		    "strict checking: non-zero values in ifinfomsg header");
308 		return (false);
309 	}
310 
311 	return (true);
312 }
313 
314 #define	_IN(_field)	offsetof(struct ifinfomsg, _field)
315 #define	_OUT(_field)	offsetof(struct nl_parsed_link, _field)
316 static const struct nlfield_parser nlf_p_if[] = {
317 	{ .off_in = _IN(ifi_type), .off_out = _OUT(ifi_type), .cb = nlf_get_u16 },
318 	{ .off_in = _IN(ifi_index), .off_out = _OUT(ifi_index), .cb = nlf_get_u32 },
319 };
320 
321 static const struct nlattr_parser nla_p_linfo[] = {
322 	{ .type = IFLA_INFO_KIND, .off = _OUT(ifla_cloner), .cb = nlattr_get_stringn },
323 	{ .type = IFLA_INFO_DATA, .off = _OUT(ifla_idata), .cb = nlattr_get_nla },
324 };
325 NL_DECLARE_ATTR_PARSER(linfo_parser, nla_p_linfo);
326 
327 static const struct nlattr_parser nla_p_if[] = {
328 	{ .type = IFLA_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
329 	{ .type = IFLA_MTU, .off = _OUT(ifla_mtu), .cb = nlattr_get_uint32 },
330 	{ .type = IFLA_LINK, .off = _OUT(ifi_index), .cb = nlattr_get_uint32 },
331 	{ .type = IFLA_LINKINFO, .arg = &linfo_parser, .cb = nlattr_get_nested },
332 	{ .type = IFLA_GROUP, .off = _OUT(ifla_group), .cb = nlattr_get_string },
333 	{ .type = IFLA_ALT_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
334 };
335 #undef _IN
336 #undef _OUT
337 NL_DECLARE_STRICT_PARSER(ifmsg_parser, struct ifinfomsg, check_ifmsg, nlf_p_if, nla_p_if);
338 
339 static bool
340 match_iface(struct nl_parsed_link *attrs, struct ifnet *ifp)
341 {
342 	if (attrs->ifi_index != 0 && attrs->ifi_index != ifp->if_index)
343 		return (false);
344 	if (attrs->ifi_type != 0 && attrs->ifi_index != ifp->if_type)
345 		return (false);
346 	if (attrs->ifla_ifname != NULL && strcmp(attrs->ifla_ifname, if_name(ifp)))
347 		return (false);
348 	/* TODO: add group match */
349 
350 	return (true);
351 }
352 
353 /*
354  * {nlmsg_len=52, nlmsg_type=RTM_GETLINK, nlmsg_flags=NLM_F_REQUEST, nlmsg_seq=1662842818, nlmsg_pid=0},
355  *  {ifi_family=AF_PACKET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
356  *   [
357  *    [{nla_len=10, nla_type=IFLA_IFNAME}, "vnet9"],
358  *    [{nla_len=8, nla_type=IFLA_EXT_MASK}, RTEXT_FILTER_VF]
359  *   ]
360  */
361 static int
362 rtnl_handle_getlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
363 {
364 	struct epoch_tracker et;
365         struct ifnet *ifp;
366 	int error = 0;
367 
368 	struct nl_parsed_link attrs = {};
369 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
370 	if (error != 0)
371 		return (error);
372 
373 	struct netlink_walkargs wa = {
374 		.so = nlp,
375 		.nw = npt->nw,
376 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
377 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
378 		.hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
379 		.hdr.nlmsg_type = NL_RTM_NEWLINK,
380 	};
381 
382 	/* Fast track for an interface w/ explicit index match */
383 	if (attrs.ifi_index != 0) {
384 		NET_EPOCH_ENTER(et);
385 		ifp = ifnet_byindex_ref(attrs.ifi_index);
386 		NET_EPOCH_EXIT(et);
387 		NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching index %u", attrs.ifi_index);
388 		if (ifp != NULL) {
389 			if (match_iface(&attrs, ifp)) {
390 				if (!dump_iface(wa.nw, ifp, &wa.hdr))
391 					error = ENOMEM;
392 			} else
393 				error = ESRCH;
394 			if_rele(ifp);
395 		} else
396 			error = ESRCH;
397 		return (error);
398 	}
399 
400 	/*
401 	 * Fetching some link properties require performing ioctl's that may be blocking.
402 	 * Address it by saving referenced pointers of the matching links,
403 	 * exiting from epoch and going through the list one-by-one.
404 	 */
405 
406 	NL_LOG(LOG_DEBUG2, "Start dump");
407 
408 	struct ifnet **match_array;
409 	int offset = 0, base_count = 16; /* start with 128 bytes */
410 	match_array = malloc(base_count * sizeof(void *), M_TEMP, M_NOWAIT);
411 
412 	NLP_LOG(LOG_DEBUG3, nlp, "MATCHING: index=%u type=%d name=%s",
413 	    attrs.ifi_index, attrs.ifi_type, attrs.ifla_ifname);
414 	NET_EPOCH_ENTER(et);
415         CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
416 		wa.count++;
417 		if (match_iface(&attrs, ifp)) {
418 			if (offset < base_count) {
419 				if (!if_try_ref(ifp))
420 					continue;
421 				match_array[offset++] = ifp;
422 				continue;
423 			}
424 			/* Too many matches, need to reallocate */
425 			struct ifnet **new_array;
426 			int sz = base_count * sizeof(void *);
427 			base_count *= 2;
428 			new_array = malloc(sz * 2, M_TEMP, M_NOWAIT);
429 			if (new_array == NULL) {
430 				error = ENOMEM;
431 				break;
432 			}
433 			memcpy(new_array, match_array, sz);
434 			free(match_array, M_TEMP);
435 			match_array = new_array;
436                 }
437         }
438 	NET_EPOCH_EXIT(et);
439 
440 	NL_LOG(LOG_DEBUG2, "Matched %d interface(s), dumping", offset);
441 	for (int i = 0; error == 0 && i < offset; i++) {
442 		if (!dump_iface(wa.nw, match_array[i], &wa.hdr))
443 			error = ENOMEM;
444 	}
445 	for (int i = 0; i < offset; i++)
446 		if_rele(match_array[i]);
447 	free(match_array, M_TEMP);
448 
449 	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
450 
451 	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
452                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
453                 return (ENOMEM);
454         }
455 
456 	return (error);
457 }
458 
459 /*
460  * sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[
461  * {nlmsg_len=60, nlmsg_type=RTM_NEWLINK, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1662715618, nlmsg_pid=0},
462  *  {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
463  *   {nla_len=11, nla_type=IFLA_IFNAME}, "dummy0"],
464  *   [
465  *    {nla_len=16, nla_type=IFLA_LINKINFO},
466  *     [
467  *      {nla_len=9, nla_type=IFLA_INFO_KIND}, "dummy"...
468  *     ]
469  *    ]
470  */
471 
472 static int
473 rtnl_handle_dellink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
474 {
475 	struct epoch_tracker et;
476         struct ifnet *ifp;
477 	int error;
478 
479 	struct nl_parsed_link attrs = {};
480 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
481 	if (error != 0)
482 		return (error);
483 
484 	NET_EPOCH_ENTER(et);
485 	ifp = ifnet_byindex_ref(attrs.ifi_index);
486 	NET_EPOCH_EXIT(et);
487 	if (ifp == NULL) {
488 		NLP_LOG(LOG_DEBUG, nlp, "unable to find interface %u", attrs.ifi_index);
489 		return (ENOENT);
490 	}
491 	NLP_LOG(LOG_DEBUG3, nlp, "mapped ifindex %u to %s", attrs.ifi_index, if_name(ifp));
492 
493 	sx_xlock(&ifnet_detach_sxlock);
494 	error = if_clone_destroy(if_name(ifp));
495 	sx_xunlock(&ifnet_detach_sxlock);
496 
497 	NLP_LOG(LOG_DEBUG2, nlp, "deleting interface %s returned %d", if_name(ifp), error);
498 
499 	if_rele(ifp);
500 	return (error);
501 }
502 
503 static int
504 rtnl_handle_newlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
505 {
506 	struct nl_cloner *cloner;
507 	int error;
508 
509 	struct nl_parsed_link attrs = {};
510 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
511 	if (error != 0)
512 		return (error);
513 
514 	if (attrs.ifla_ifname == NULL || strlen(attrs.ifla_ifname) == 0) {
515 		/* Applications like ip(8) verify RTM_NEWLINK existance
516 		 * by calling it with empty arguments. Always return "innocent"
517 		 * error.
518 		 */
519 		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_IFNAME attribute");
520 		return (EPERM);
521 	}
522 
523 	if (attrs.ifla_cloner == NULL || strlen(attrs.ifla_cloner) == 0) {
524 		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_INFO_KIND attribute");
525 		return (EINVAL);
526 	}
527 
528 	sx_slock(&rtnl_cloner_lock);
529 	SLIST_FOREACH(cloner, &nl_cloners, next) {
530 		if (!strcmp(attrs.ifla_cloner, cloner->name)) {
531 			error = cloner->create_f(&attrs, nlp, npt);
532 			sx_sunlock(&rtnl_cloner_lock);
533 			return (error);
534 		}
535 	}
536 	sx_sunlock(&rtnl_cloner_lock);
537 
538 	/* TODO: load cloner module if not exists & privilege permits */
539 	NLMSG_REPORT_ERR_MSG(npt, "interface type %s not supported", attrs.ifla_cloner);
540 	return (ENOTSUP);
541 
542 	return (error);
543 }
544 
545 /*
546 
547 {ifa_family=AF_INET, ifa_prefixlen=8, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_HOST, ifa_index=if_nametoindex("lo")},
548  [
549         {{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("127.0.0.1")},
550         {{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("127.0.0.1")},
551         {{nla_len=7, nla_type=IFA_LABEL}, "lo"},
552         {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT},
553         {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=3619, tstamp=3619}}]},
554 ---
555 
556 {{len=72, type=RTM_NEWADDR, flags=NLM_F_MULTI, seq=1642191126, pid=566735},
557  {ifa_family=AF_INET6, ifa_prefixlen=96, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_UNIVERSE, ifa_index=if_nametoindex("virbr0")},
558    [
559     {{nla_len=20, nla_type=IFA_ADDRESS}, inet_pton(AF_INET6, "2a01:4f8:13a:70c:ffff::1")},
560    {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=4283, tstamp=4283}},
561    {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}]},
562 */
563 
564 static uint8_t
565 ifa_get_scope(const struct ifaddr *ifa)
566 {
567         const struct sockaddr *sa;
568         uint8_t addr_scope = RT_SCOPE_UNIVERSE;
569 
570         sa = ifa->ifa_addr;
571         switch (sa->sa_family) {
572 #ifdef INET
573         case AF_INET:
574                 {
575                         struct in_addr addr;
576                         addr = ((const struct sockaddr_in *)sa)->sin_addr;
577                         if (IN_LOOPBACK(addr.s_addr))
578                                 addr_scope = RT_SCOPE_HOST;
579                         else if (IN_LINKLOCAL(addr.s_addr))
580                                 addr_scope = RT_SCOPE_LINK;
581                         break;
582                 }
583 #endif
584 #ifdef INET6
585         case AF_INET6:
586                 {
587                         const struct in6_addr *addr;
588                         addr = &((const struct sockaddr_in6 *)sa)->sin6_addr;
589                         if (IN6_IS_ADDR_LOOPBACK(addr))
590                                 addr_scope = RT_SCOPE_HOST;
591                         else if (IN6_IS_ADDR_LINKLOCAL(addr))
592                                 addr_scope = RT_SCOPE_LINK;
593                         break;
594                 }
595 #endif
596         }
597 
598         return (addr_scope);
599 }
600 
601 static uint8_t
602 inet6_get_plen(const struct in6_addr *addr)
603 {
604 
605 	return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) +
606 	    bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3]));
607 }
608 
609 static uint8_t
610 get_sa_plen(const struct sockaddr *sa)
611 {
612 #ifdef INET
613         const struct in_addr *paddr;
614 #endif
615 #ifdef INET6
616         const struct in6_addr *paddr6;
617 #endif
618 
619         switch (sa->sa_family) {
620 #ifdef INET
621         case AF_INET:
622                 if (sa == NULL)
623                         return (32);
624                 paddr = &(((const struct sockaddr_in *)sa)->sin_addr);
625                 return bitcount32(paddr->s_addr);;
626 #endif
627 #ifdef INET6
628         case AF_INET6:
629                 if (sa == NULL)
630                         return (128);
631                 paddr6 = &(((const struct sockaddr_in6 *)sa)->sin6_addr);
632                 return inet6_get_plen(paddr6);
633 #endif
634         }
635 
636         return (0);
637 }
638 
639 
640 /*
641  * {'attrs': [('IFA_ADDRESS', '12.0.0.1'),
642            ('IFA_LOCAL', '12.0.0.1'),
643            ('IFA_LABEL', 'eth10'),
644            ('IFA_FLAGS', 128),
645            ('IFA_CACHEINFO', {'ifa_preferred': 4294967295, 'ifa_valid': 4294967295, 'cstamp': 63745746, 'tstamp': 63745746})],
646  */
647 static bool
648 dump_iface_addr(struct nl_writer *nw, struct ifnet *ifp, struct ifaddr *ifa,
649     const struct nlmsghdr *hdr)
650 {
651         struct ifaddrmsg *ifamsg;
652         struct sockaddr *sa = ifa->ifa_addr;
653 
654         NL_LOG(LOG_DEBUG3, "dumping ifa %p type %s(%d) for interface %s",
655             ifa, rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
656 
657 	if (!nlmsg_reply(nw, hdr, sizeof(struct ifaddrmsg)))
658 		goto enomem;
659 
660         ifamsg = nlmsg_reserve_object(nw, struct ifaddrmsg);
661         ifamsg->ifa_family = sa->sa_family;
662         ifamsg->ifa_prefixlen = get_sa_plen(ifa->ifa_netmask);
663         ifamsg->ifa_flags = 0; // ifa_flags is useless
664         ifamsg->ifa_scope = ifa_get_scope(ifa);
665         ifamsg->ifa_index = ifp->if_index;
666 
667         struct sockaddr *dst_sa = ifa->ifa_dstaddr;
668         if ((dst_sa == NULL) || (dst_sa->sa_family != sa->sa_family))
669                 dst_sa = sa;
670         dump_sa(nw, IFA_ADDRESS, dst_sa);
671         dump_sa(nw, IFA_LOCAL, sa);
672         nlattr_add_string(nw, IFA_LABEL, if_name(ifp));
673 
674         uint32_t val = 0; // ifa->ifa_flags;
675         nlattr_add_u32(nw, IFA_FLAGS, val);
676 
677 	if (nlmsg_end(nw))
678 		return (true);
679 enomem:
680         NL_LOG(LOG_DEBUG, "Failed to dump ifa type %s(%d) for interface %s",
681             rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
682         nlmsg_abort(nw);
683         return (false);
684 }
685 
686 static int
687 rtnl_handle_getaddr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
688 {
689         struct ifaddr *ifa;
690         struct ifnet *ifp;
691 	int error = 0;
692 
693 	struct netlink_walkargs wa = {
694 		.so = nlp,
695 		.nw = npt->nw,
696 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
697 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
698 		.hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
699 		.hdr.nlmsg_type = NL_RTM_NEWADDR,
700 	};
701 
702 	NL_LOG(LOG_DEBUG2, "Start dump");
703 
704         CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
705                 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
706                         if (wa.family != 0 && wa.family != ifa->ifa_addr->sa_family)
707                                 continue;
708                         if (ifa->ifa_addr->sa_family == AF_LINK)
709                                 continue;
710 			wa.count++;
711                         if (!dump_iface_addr(wa.nw, ifp, ifa, &wa.hdr)) {
712                                 error = ENOMEM;
713                                 break;
714                         }
715 			wa.dumped++;
716                 }
717                 if (error != 0)
718                         break;
719         }
720 
721 	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
722 
723 	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
724                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
725                 return (ENOMEM);
726         }
727 
728 	return (error);
729 }
730 
731 static void
732 rtnl_handle_ifaddr(void *arg __unused, struct ifaddr *ifa, int cmd)
733 {
734 	struct nlmsghdr hdr = {};
735 	struct nl_writer nw = {};
736 	uint32_t group = 0;
737 
738 	switch (ifa->ifa_addr->sa_family) {
739 #ifdef INET
740 	case AF_INET:
741 		group = RTNLGRP_IPV4_IFADDR;
742 		break;
743 #endif
744 #ifdef INET6
745 	case AF_INET6:
746 		group = RTNLGRP_IPV6_IFADDR;
747 		break;
748 #endif
749 	default:
750 		NL_LOG(LOG_DEBUG2, "ifa notification for unknown AF: %d",
751 		    ifa->ifa_addr->sa_family);
752 		return;
753 	}
754 
755 	if (!nl_has_listeners(NETLINK_ROUTE, group))
756 		return;
757 
758 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, group)) {
759 		NL_LOG(LOG_DEBUG, "error allocating group writer");
760 		return;
761 	}
762 
763 	hdr.nlmsg_type = (cmd == RTM_DELETE) ? NL_RTM_DELADDR : NL_RTM_NEWADDR;
764 
765 	dump_iface_addr(&nw, ifa->ifa_ifp, ifa, &hdr);
766 	nlmsg_flush(&nw);
767 }
768 
769 static void
770 rtnl_handle_ifattach(void *arg, struct ifnet *ifp)
771 {
772 	struct nlmsghdr hdr = { .nlmsg_type = NL_RTM_NEWLINK };
773 	struct nl_writer nw = {};
774 
775 	if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK))
776 		return;
777 
778 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK)) {
779 		NL_LOG(LOG_DEBUG, "error allocating mbuf");
780 		return;
781 	}
782 	dump_iface(&nw, ifp, &hdr);
783         nlmsg_flush(&nw);
784 }
785 
786 static void
787 rtnl_handle_ifdetach(void *arg, struct ifnet *ifp)
788 {
789 	struct nlmsghdr hdr = { .nlmsg_type = NL_RTM_DELLINK };
790 	struct nl_writer nw = {};
791 
792 	if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK))
793 		return;
794 
795 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK)) {
796 		NL_LOG(LOG_DEBUG, "error allocating mbuf");
797 		return;
798 	}
799 	dump_iface(&nw, ifp, &hdr);
800         nlmsg_flush(&nw);
801 }
802 
803 static const struct rtnl_cmd_handler cmd_handlers[] = {
804 	{
805 		.cmd = NL_RTM_GETLINK,
806 		.name = "RTM_GETLINK",
807 		.cb = &rtnl_handle_getlink,
808 		.flags = RTNL_F_NOEPOCH,
809 	},
810 	{
811 		.cmd = NL_RTM_DELLINK,
812 		.name = "RTM_DELLINK",
813 		.cb = &rtnl_handle_dellink,
814 		.priv = PRIV_NET_IFDESTROY,
815 		.flags = RTNL_F_NOEPOCH,
816 	},
817 	{
818 		.cmd = NL_RTM_NEWLINK,
819 		.name = "RTM_NEWLINK",
820 		.cb = &rtnl_handle_newlink,
821 		.priv = PRIV_NET_IFCREATE,
822 		.flags = RTNL_F_NOEPOCH,
823 	},
824 	{
825 		.cmd = NL_RTM_GETADDR,
826 		.name = "RTM_GETADDR",
827 		.cb = &rtnl_handle_getaddr,
828 	},
829 	{
830 		.cmd = NL_RTM_NEWADDR,
831 		.name = "RTM_NEWADDR",
832 		.cb = &rtnl_handle_getaddr,
833 	},
834 	{
835 		.cmd = NL_RTM_DELADDR,
836 		.name = "RTM_DELADDR",
837 		.cb = &rtnl_handle_getaddr,
838 	},
839 };
840 
841 static const struct nlhdr_parser *all_parsers[] = { &ifmsg_parser };
842 
843 void
844 rtnl_iface_add_cloner(struct nl_cloner *cloner)
845 {
846 	sx_xlock(&rtnl_cloner_lock);
847 	SLIST_INSERT_HEAD(&nl_cloners, cloner, next);
848 	sx_xunlock(&rtnl_cloner_lock);
849 }
850 
851 void rtnl_iface_del_cloner(struct nl_cloner *cloner)
852 {
853 	sx_xlock(&rtnl_cloner_lock);
854 	SLIST_REMOVE(&nl_cloners, cloner, nl_cloner, next);
855 	sx_xunlock(&rtnl_cloner_lock);
856 }
857 
858 void
859 rtnl_ifaces_init(void)
860 {
861 	ifattach_event = EVENTHANDLER_REGISTER(
862 	    ifnet_arrival_event, rtnl_handle_ifattach, NULL,
863 	    EVENTHANDLER_PRI_ANY);
864 	ifdetach_event = EVENTHANDLER_REGISTER(
865 	    ifnet_departure_event, rtnl_handle_ifdetach, NULL,
866 	    EVENTHANDLER_PRI_ANY);
867 	ifaddr_event = EVENTHANDLER_REGISTER(
868 	    rt_addrmsg, rtnl_handle_ifaddr, NULL,
869 	    EVENTHANDLER_PRI_ANY);
870 	NL_VERIFY_PARSERS(all_parsers);
871 	rtnl_iface_drivers_register();
872 	rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
873 }
874 
875 void
876 rtnl_ifaces_destroy(void)
877 {
878 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifattach_event);
879 	EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_event);
880 	EVENTHANDLER_DEREGISTER(rt_addrmsg, ifaddr_event);
881 }
882