xref: /freebsd/sys/netlink/route/iface.c (revision c7a063741720ef81d4caa4613242579d12f1d605)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet.h"
31 #include "opt_inet6.h"
32 #include <sys/types.h>
33 #include <sys/eventhandler.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/socket.h>
37 #include <sys/sockio.h>
38 #include <sys/syslog.h>
39 
40 #include <net/if.h>
41 #include <net/if_dl.h>
42 #include <net/if_media.h>
43 #include <net/if_var.h>
44 #include <net/if_clone.h>
45 #include <net/route.h>
46 #include <net/route/nhop.h>
47 #include <net/route/route_ctl.h>
48 #include <netlink/netlink.h>
49 #include <netlink/netlink_ctl.h>
50 #include <netlink/netlink_route.h>
51 #include <netlink/route/route_var.h>
52 
53 #include <netinet6/scope6_var.h> /* scope deembedding */
54 
55 #define	DEBUG_MOD_NAME	nl_iface
56 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
57 #include <netlink/netlink_debug.h>
58 _DECLARE_DEBUG(LOG_DEBUG);
59 
60 struct netlink_walkargs {
61 	struct nl_writer *nw;
62 	struct nlmsghdr hdr;
63 	struct nlpcb *so;
64 	uint32_t fibnum;
65 	int family;
66 	int error;
67 	int count;
68 	int dumped;
69 };
70 
71 static eventhandler_tag ifdetach_event, ifattach_event, iflink_event, ifaddr_event;
72 
73 static SLIST_HEAD(, nl_cloner) nl_cloners = SLIST_HEAD_INITIALIZER(nl_cloners);
74 
75 static struct sx rtnl_cloner_lock;
76 SX_SYSINIT(rtnl_cloner_lock, &rtnl_cloner_lock, "rtnl cloner lock");
77 
78 static struct nl_cloner *rtnl_iface_find_cloner_locked(const char *name);
79 
80 /*
81  * RTM_GETLINK request
82  * sendto(3, {{len=32, type=RTM_GETLINK, flags=NLM_F_REQUEST|NLM_F_DUMP, seq=1641940952, pid=0},
83  *  {ifi_family=AF_INET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}, 32, 0, NULL, 0) = 32
84  *
85  * Reply:
86  * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_ETHER, ifi_index=if_nametoindex("enp0s31f6"), ifi_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST|IFF_LOWER_UP, ifi_change=0},
87 {{nla_len=10, nla_type=IFLA_ADDRESS}, "\xfe\x54\x00\x52\x3e\x90"}
88 
89 [
90 {{nla_len=14, nla_type=IFLA_IFNAME}, "enp0s31f6"},
91 {{nla_len=8, nla_type=IFLA_TXQLEN}, 1000},
92 {{nla_len=5, nla_type=IFLA_OPERSTATE}, 6},
93 {{nla_len=5, nla_type=IFLA_LINKMODE}, 0},
94 {{nla_len=8, nla_type=IFLA_MTU}, 1500},
95 {{nla_len=8, nla_type=IFLA_MIN_MTU}, 68},
96  {{nla_len=8, nla_type=IFLA_MAX_MTU}, 9000},
97 {{nla_len=8, nla_type=IFLA_GROUP}, 0},
98 {{nla_len=8, nla_type=IFLA_PROMISCUITY}, 0},
99 {{nla_len=8, nla_type=IFLA_NUM_TX_QUEUES}, 1},
100 {{nla_len=8, nla_type=IFLA_GSO_MAX_SEGS}, 65535},
101 {{nla_len=8, nla_type=IFLA_GSO_MAX_SIZE}, 65536},
102 {{nla_len=8, nla_type=IFLA_NUM_RX_QUEUES}, 1},
103 {{nla_len=5, nla_type=IFLA_CARRIER}, 1},
104 {{nla_len=13, nla_type=IFLA_QDISC}, "fq_codel"},
105 {{nla_len=8, nla_type=IFLA_CARRIER_CHANGES}, 2},
106 {{nla_len=5, nla_type=IFLA_PROTO_DOWN}, 0},
107 {{nla_len=8, nla_type=IFLA_CARRIER_UP_COUNT}, 1},
108 {{nla_len=8, nla_type=IFLA_CARRIER_DOWN_COUNT}, 1},
109  */
110 
111 struct if_state {
112 	uint8_t		ifla_operstate;
113 	uint8_t		ifla_carrier;
114 };
115 
116 static void
117 get_operstate_ether(struct ifnet *ifp, struct if_state *pstate)
118 {
119 	struct ifmediareq ifmr = {};
120 	int error;
121 	error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (void *)&ifmr);
122 
123 	if (error != 0) {
124 		NL_LOG(LOG_DEBUG, "error calling SIOCGIFMEDIA on %s: %d",
125 		    if_name(ifp), error);
126 		return;
127 	}
128 
129 	switch (IFM_TYPE(ifmr.ifm_active)) {
130 	case IFM_ETHER:
131 		if (ifmr.ifm_status & IFM_ACTIVE) {
132 			pstate->ifla_carrier = 1;
133 			if (ifp->if_flags & IFF_MONITOR)
134 				pstate->ifla_operstate = IF_OPER_DORMANT;
135 			else
136 				pstate->ifla_operstate = IF_OPER_UP;
137 		} else
138 			pstate->ifla_operstate = IF_OPER_DOWN;
139 	}
140 }
141 
142 static bool
143 get_stats(struct nl_writer *nw, struct ifnet *ifp)
144 {
145 	struct rtnl_link_stats64 *stats;
146 
147 	int nla_len = sizeof(struct nlattr) + sizeof(*stats);
148 	struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
149 	if (nla == NULL)
150 		return (false);
151 	nla->nla_type = IFLA_STATS64;
152 	nla->nla_len = nla_len;
153 	stats = (struct rtnl_link_stats64 *)(nla + 1);
154 
155 	stats->rx_packets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS);
156 	stats->tx_packets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS);
157 	stats->rx_bytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES);
158 	stats->tx_bytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES);
159 	stats->rx_errors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS);
160 	stats->tx_errors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS);
161 	stats->rx_dropped = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS);
162 	stats->tx_dropped = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS);
163 	stats->multicast = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS);
164 	stats->rx_nohandler = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO);
165 
166 	return (true);
167 }
168 
169 static void
170 get_operstate(struct ifnet *ifp, struct if_state *pstate)
171 {
172 	pstate->ifla_operstate = IF_OPER_UNKNOWN;
173 	pstate->ifla_carrier = 0; /* no carrier */
174 
175 	switch (ifp->if_type) {
176 	case IFT_ETHER:
177 		get_operstate_ether(ifp, pstate);
178 		break;
179 	case IFT_LOOP:
180 		if (ifp->if_flags & IFF_UP) {
181 			pstate->ifla_operstate = IF_OPER_UP;
182 			pstate->ifla_carrier = 1;
183 		} else
184 			pstate->ifla_operstate = IF_OPER_DOWN;
185 		break;
186 	}
187 }
188 
189 static unsigned
190 ifp_flags_to_netlink(const struct ifnet *ifp)
191 {
192         return (ifp->if_flags | ifp->if_drv_flags);
193 }
194 
195 #define LLADDR_CONST(s) ((const void *)((s)->sdl_data + (s)->sdl_nlen))
196 static bool
197 dump_sa(struct nl_writer *nw, int attr, const struct sockaddr *sa)
198 {
199         uint32_t addr_len = 0;
200         const void *addr_data = NULL;
201 #ifdef INET6
202         struct in6_addr addr6;
203 #endif
204 
205         if (sa == NULL)
206                 return (true);
207 
208         switch (sa->sa_family) {
209 #ifdef INET
210         case AF_INET:
211                 addr_len = sizeof(struct in_addr);
212                 addr_data = &((const struct sockaddr_in *)sa)->sin_addr;
213                 break;
214 #endif
215 #ifdef INET6
216         case AF_INET6:
217                 in6_splitscope(&((const struct sockaddr_in6 *)sa)->sin6_addr, &addr6, &addr_len);
218                 addr_len = sizeof(struct in6_addr);
219                 addr_data = &addr6;
220                 break;
221 #endif
222         case AF_LINK:
223                 addr_len = ((const struct sockaddr_dl *)sa)->sdl_alen;
224                 addr_data = LLADDR_CONST((const struct sockaddr_dl *)sa);
225                 break;
226         default:
227                 NL_LOG(LOG_DEBUG, "unsupported family: %d, skipping", sa->sa_family);
228                 return (true);
229         }
230 
231         return (nlattr_add(nw, attr, addr_len, addr_data));
232 }
233 
234 /*
235  * Dumps interface state, properties and metrics.
236  * @nw: message writer
237  * @ifp: target interface
238  * @hdr: template header
239  * @if_flags_mask: changed if_[drv]_flags bitmask
240  *
241  * This function is called without epoch and MAY sleep.
242  */
243 static bool
244 dump_iface(struct nl_writer *nw, struct ifnet *ifp, const struct nlmsghdr *hdr,
245     int if_flags_mask)
246 {
247         struct ifinfomsg *ifinfo;
248 
249         NL_LOG(LOG_DEBUG3, "dumping interface %s data", if_name(ifp));
250 
251 	if (!nlmsg_reply(nw, hdr, sizeof(struct ifinfomsg)))
252 		goto enomem;
253 
254         ifinfo = nlmsg_reserve_object(nw, struct ifinfomsg);
255         ifinfo->ifi_family = AF_UNSPEC;
256         ifinfo->__ifi_pad = 0;
257         ifinfo->ifi_type = ifp->if_type;
258         ifinfo->ifi_index = ifp->if_index;
259         ifinfo->ifi_flags = ifp_flags_to_netlink(ifp);
260         ifinfo->ifi_change = if_flags_mask;
261 
262 	struct if_state ifs = {};
263 	get_operstate(ifp, &ifs);
264 
265 	if (ifs.ifla_operstate == IF_OPER_UP)
266 		ifinfo->ifi_flags |= IFF_LOWER_UP;
267 
268         nlattr_add_string(nw, IFLA_IFNAME, if_name(ifp));
269         nlattr_add_u8(nw, IFLA_OPERSTATE, ifs.ifla_operstate);
270         nlattr_add_u8(nw, IFLA_CARRIER, ifs.ifla_carrier);
271 
272 /*
273         nlattr_add_u8(nw, IFLA_PROTO_DOWN, val);
274         nlattr_add_u8(nw, IFLA_LINKMODE, val);
275 */
276         if ((ifp->if_addr != NULL)) {
277                 dump_sa(nw, IFLA_ADDRESS, ifp->if_addr->ifa_addr);
278         }
279 
280         if ((ifp->if_broadcastaddr != NULL)) {
281 		nlattr_add(nw, IFLA_BROADCAST, ifp->if_addrlen,
282 		    ifp->if_broadcastaddr);
283         }
284 
285         nlattr_add_u32(nw, IFLA_MTU, ifp->if_mtu);
286 /*
287         nlattr_add_u32(nw, IFLA_MIN_MTU, 60);
288         nlattr_add_u32(nw, IFLA_MAX_MTU, 9000);
289         nlattr_add_u32(nw, IFLA_GROUP, 0);
290 */
291 
292 	if (ifp->if_description != NULL)
293 		nlattr_add_string(nw, IFLA_IFALIAS, ifp->if_description);
294 
295 	get_stats(nw, ifp);
296 
297 	uint32_t val = (ifp->if_flags & IFF_PROMISC) != 0;
298         nlattr_add_u32(nw, IFLA_PROMISCUITY, val);
299 
300 	sx_slock(&rtnl_cloner_lock);
301 	struct nl_cloner *cloner = rtnl_iface_find_cloner_locked(ifp->if_dname);
302 	if (cloner != NULL && cloner->dump_f != NULL) {
303 		/* Ignore any dump error */
304 		cloner->dump_f(ifp, nw);
305 	}
306 	sx_sunlock(&rtnl_cloner_lock);
307 
308         if (nlmsg_end(nw))
309 		return (true);
310 
311 enomem:
312         NL_LOG(LOG_DEBUG, "unable to dump interface %s state (ENOMEM)", if_name(ifp));
313         nlmsg_abort(nw);
314         return (false);
315 }
316 
317 static bool
318 check_ifmsg(void *hdr, struct nl_pstate *npt)
319 {
320 	struct ifinfomsg *ifm = hdr;
321 
322 	if (ifm->__ifi_pad != 0 || ifm->ifi_type != 0 ||
323 	    ifm->ifi_flags != 0 || ifm->ifi_change != 0) {
324 		nlmsg_report_err_msg(npt,
325 		    "strict checking: non-zero values in ifinfomsg header");
326 		return (false);
327 	}
328 
329 	return (true);
330 }
331 
332 #define	_IN(_field)	offsetof(struct ifinfomsg, _field)
333 #define	_OUT(_field)	offsetof(struct nl_parsed_link, _field)
334 static const struct nlfield_parser nlf_p_if[] = {
335 	{ .off_in = _IN(ifi_type), .off_out = _OUT(ifi_type), .cb = nlf_get_u16 },
336 	{ .off_in = _IN(ifi_index), .off_out = _OUT(ifi_index), .cb = nlf_get_u32 },
337 	{ .off_in = _IN(ifi_flags), .off_out = _OUT(ifi_flags), .cb = nlf_get_u32 },
338 	{ .off_in = _IN(ifi_change), .off_out = _OUT(ifi_change), .cb = nlf_get_u32 },
339 };
340 
341 static const struct nlattr_parser nla_p_linfo[] = {
342 	{ .type = IFLA_INFO_KIND, .off = _OUT(ifla_cloner), .cb = nlattr_get_stringn },
343 	{ .type = IFLA_INFO_DATA, .off = _OUT(ifla_idata), .cb = nlattr_get_nla },
344 };
345 NL_DECLARE_ATTR_PARSER(linfo_parser, nla_p_linfo);
346 
347 static const struct nlattr_parser nla_p_if[] = {
348 	{ .type = IFLA_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
349 	{ .type = IFLA_MTU, .off = _OUT(ifla_mtu), .cb = nlattr_get_uint32 },
350 	{ .type = IFLA_LINK, .off = _OUT(ifi_index), .cb = nlattr_get_uint32 },
351 	{ .type = IFLA_LINKINFO, .arg = &linfo_parser, .cb = nlattr_get_nested },
352 	{ .type = IFLA_IFALIAS, .off = _OUT(ifla_ifalias), .cb = nlattr_get_string },
353 	{ .type = IFLA_GROUP, .off = _OUT(ifla_group), .cb = nlattr_get_string },
354 	{ .type = IFLA_ALT_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string },
355 };
356 #undef _IN
357 #undef _OUT
358 NL_DECLARE_STRICT_PARSER(ifmsg_parser, struct ifinfomsg, check_ifmsg, nlf_p_if, nla_p_if);
359 
360 static bool
361 match_iface(struct nl_parsed_link *attrs, struct ifnet *ifp)
362 {
363 	if (attrs->ifi_index != 0 && attrs->ifi_index != ifp->if_index)
364 		return (false);
365 	if (attrs->ifi_type != 0 && attrs->ifi_index != ifp->if_type)
366 		return (false);
367 	if (attrs->ifla_ifname != NULL && strcmp(attrs->ifla_ifname, if_name(ifp)))
368 		return (false);
369 	/* TODO: add group match */
370 
371 	return (true);
372 }
373 
374 /*
375  * {nlmsg_len=52, nlmsg_type=RTM_GETLINK, nlmsg_flags=NLM_F_REQUEST, nlmsg_seq=1662842818, nlmsg_pid=0},
376  *  {ifi_family=AF_PACKET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
377  *   [
378  *    [{nla_len=10, nla_type=IFLA_IFNAME}, "vnet9"],
379  *    [{nla_len=8, nla_type=IFLA_EXT_MASK}, RTEXT_FILTER_VF]
380  *   ]
381  */
382 static int
383 rtnl_handle_getlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
384 {
385 	struct epoch_tracker et;
386         struct ifnet *ifp;
387 	int error = 0;
388 
389 	struct nl_parsed_link attrs = {};
390 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
391 	if (error != 0)
392 		return (error);
393 
394 	struct netlink_walkargs wa = {
395 		.so = nlp,
396 		.nw = npt->nw,
397 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
398 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
399 		.hdr.nlmsg_flags = hdr->nlmsg_flags,
400 		.hdr.nlmsg_type = NL_RTM_NEWLINK,
401 	};
402 
403 	/* Fast track for an interface w/ explicit name or index match */
404 	if ((attrs.ifi_index != 0) || (attrs.ifla_ifname != NULL)) {
405 		if (attrs.ifi_index != 0) {
406 			NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching index %u",
407 			    attrs.ifi_index);
408 			NET_EPOCH_ENTER(et);
409 			ifp = ifnet_byindex_ref(attrs.ifi_index);
410 			NET_EPOCH_EXIT(et);
411 		} else {
412 			NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching name %s",
413 			    attrs.ifla_ifname);
414 			ifp = ifunit_ref(attrs.ifla_ifname);
415 		}
416 
417 		if (ifp != NULL) {
418 			if (match_iface(&attrs, ifp)) {
419 				if (!dump_iface(wa.nw, ifp, &wa.hdr, 0))
420 					error = ENOMEM;
421 			} else
422 				error = ENODEV;
423 			if_rele(ifp);
424 		} else
425 			error = ENODEV;
426 		return (error);
427 	}
428 
429 	/* Always treat non-direct-match as a multipart message */
430 	wa.hdr.nlmsg_flags |= NLM_F_MULTI;
431 
432 	/*
433 	 * Fetching some link properties require performing ioctl's that may be blocking.
434 	 * Address it by saving referenced pointers of the matching links,
435 	 * exiting from epoch and going through the list one-by-one.
436 	 */
437 
438 	NL_LOG(LOG_DEBUG2, "Start dump");
439 
440 	struct ifnet **match_array;
441 	int offset = 0, base_count = 16; /* start with 128 bytes */
442 	match_array = malloc(base_count * sizeof(void *), M_TEMP, M_NOWAIT);
443 
444 	NLP_LOG(LOG_DEBUG3, nlp, "MATCHING: index=%u type=%d name=%s",
445 	    attrs.ifi_index, attrs.ifi_type, attrs.ifla_ifname);
446 	NET_EPOCH_ENTER(et);
447         CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
448 		wa.count++;
449 		if (match_iface(&attrs, ifp)) {
450 			if (offset < base_count) {
451 				if (!if_try_ref(ifp))
452 					continue;
453 				match_array[offset++] = ifp;
454 				continue;
455 			}
456 			/* Too many matches, need to reallocate */
457 			struct ifnet **new_array;
458 			int sz = base_count * sizeof(void *);
459 			base_count *= 2;
460 			new_array = malloc(sz * 2, M_TEMP, M_NOWAIT);
461 			if (new_array == NULL) {
462 				error = ENOMEM;
463 				break;
464 			}
465 			memcpy(new_array, match_array, sz);
466 			free(match_array, M_TEMP);
467 			match_array = new_array;
468                 }
469         }
470 	NET_EPOCH_EXIT(et);
471 
472 	NL_LOG(LOG_DEBUG2, "Matched %d interface(s), dumping", offset);
473 	for (int i = 0; error == 0 && i < offset; i++) {
474 		if (!dump_iface(wa.nw, match_array[i], &wa.hdr, 0))
475 			error = ENOMEM;
476 	}
477 	for (int i = 0; i < offset; i++)
478 		if_rele(match_array[i]);
479 	free(match_array, M_TEMP);
480 
481 	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
482 
483 	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
484                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
485                 return (ENOMEM);
486         }
487 
488 	return (error);
489 }
490 
491 /*
492  * sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[
493  * {nlmsg_len=60, nlmsg_type=RTM_NEWLINK, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1662715618, nlmsg_pid=0},
494  *  {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0},
495  *   {nla_len=11, nla_type=IFLA_IFNAME}, "dummy0"],
496  *   [
497  *    {nla_len=16, nla_type=IFLA_LINKINFO},
498  *     [
499  *      {nla_len=9, nla_type=IFLA_INFO_KIND}, "dummy"...
500  *     ]
501  *    ]
502  */
503 
504 static int
505 rtnl_handle_dellink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
506 {
507 	struct epoch_tracker et;
508         struct ifnet *ifp;
509 	int error;
510 
511 	struct nl_parsed_link attrs = {};
512 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
513 	if (error != 0)
514 		return (error);
515 
516 	NET_EPOCH_ENTER(et);
517 	ifp = ifnet_byindex_ref(attrs.ifi_index);
518 	NET_EPOCH_EXIT(et);
519 	if (ifp == NULL) {
520 		NLP_LOG(LOG_DEBUG, nlp, "unable to find interface %u", attrs.ifi_index);
521 		return (ENOENT);
522 	}
523 	NLP_LOG(LOG_DEBUG3, nlp, "mapped ifindex %u to %s", attrs.ifi_index, if_name(ifp));
524 
525 	sx_xlock(&ifnet_detach_sxlock);
526 	error = if_clone_destroy(if_name(ifp));
527 	sx_xunlock(&ifnet_detach_sxlock);
528 
529 	NLP_LOG(LOG_DEBUG2, nlp, "deleting interface %s returned %d", if_name(ifp), error);
530 
531 	if_rele(ifp);
532 	return (error);
533 }
534 
535 /*
536  * New link:
537  * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1668185590, pid=0},
538  *   {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}
539  *    [
540  *     {{nla_len=8, nla_type=IFLA_MTU}, 123},
541  *     {{nla_len=10, nla_type=IFLA_IFNAME}, "vlan1"},
542  *     {{nla_len=24, nla_type=IFLA_LINKINFO},
543  *      [
544  *       {{nla_len=8, nla_type=IFLA_INFO_KIND}, "vlan"...},
545  *       {{nla_len=12, nla_type=IFLA_INFO_DATA}, "\x06\x00\x01\x00\x7b\x00\x00\x00"}]}]}
546  *
547  * Update link:
548  * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=1668185923, pid=0},
549  * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=if_nametoindex("lo"), ifi_flags=0, ifi_change=0},
550  * {{nla_len=8, nla_type=IFLA_MTU}, 123}}
551  *
552  *
553  * Check command availability:
554  * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=0, pid=0},
555  *  {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}
556  */
557 
558 
559 static int
560 create_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs,
561     struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt)
562 {
563 	if (lattrs->ifla_ifname == NULL || strlen(lattrs->ifla_ifname) == 0) {
564 		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_IFNAME attribute");
565 		return (EINVAL);
566 	}
567 	if (lattrs->ifla_cloner == NULL || strlen(lattrs->ifla_cloner) == 0) {
568 		NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_INFO_KIND attribute");
569 		return (EINVAL);
570 	}
571 
572 	bool found = false;
573 	int error = 0;
574 
575 	sx_slock(&rtnl_cloner_lock);
576 	struct nl_cloner *cloner = rtnl_iface_find_cloner_locked(lattrs->ifla_cloner);
577 	if (cloner != NULL) {
578 		found = true;
579 		error = cloner->create_f(lattrs, bm, nlp, npt);
580 	}
581 	sx_sunlock(&rtnl_cloner_lock);
582 
583 	if (!found)
584 		error = generic_cloner.create_f(lattrs, bm, nlp, npt);
585 
586 	return (error);
587 }
588 
589 static int
590 modify_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs,
591     struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt)
592 {
593 	struct ifnet *ifp = NULL;
594 	struct epoch_tracker et;
595 
596 	if (lattrs->ifi_index == 0 && lattrs->ifla_ifname == NULL) {
597 		/*
598 		 * Applications like ip(8) verify RTM_NEWLINK command
599 		 * existence by calling it with empty arguments. Always
600 		 * return "innocent" error in that case.
601 		 */
602 		NLMSG_REPORT_ERR_MSG(npt, "empty ifi_index field");
603 		return (EPERM);
604 	}
605 
606 	if (lattrs->ifi_index != 0) {
607 		NET_EPOCH_ENTER(et);
608 		ifp = ifnet_byindex_ref(lattrs->ifi_index);
609 		NET_EPOCH_EXIT(et);
610 		if (ifp == NULL) {
611 			NLMSG_REPORT_ERR_MSG(npt, "unable to find interface #%u",
612 			    lattrs->ifi_index);
613 			return (ENOENT);
614 		}
615 	}
616 
617 	if (ifp == NULL && lattrs->ifla_ifname != NULL) {
618 		ifp = ifunit_ref(lattrs->ifla_ifname);
619 		if (ifp == NULL) {
620 			NLMSG_REPORT_ERR_MSG(npt, "unable to find interface %s",
621 			    lattrs->ifla_ifname);
622 			return (ENOENT);
623 		}
624 	}
625 
626 	MPASS(ifp != NULL);
627 
628 	/*
629 	 * There can be multiple kinds of interfaces:
630 	 * 1) cloned, with additional options
631 	 * 2) cloned, but w/o additional options
632 	 * 3) non-cloned (e.g. "physical).
633 	 *
634 	 * Thus, try to find cloner-specific callback and fallback to the
635 	 * "default" handler if not found.
636 	 */
637 	bool found = false;
638 	int error = 0;
639 
640 	sx_slock(&rtnl_cloner_lock);
641 	struct nl_cloner *cloner = rtnl_iface_find_cloner_locked(ifp->if_dname);
642 	if (cloner != NULL) {
643 		found = true;
644 		error = cloner->modify_f(ifp, lattrs, bm, nlp, npt);
645 	}
646 	sx_sunlock(&rtnl_cloner_lock);
647 
648 	if (!found)
649 		error = generic_cloner.modify_f(ifp, lattrs, bm, nlp, npt);
650 
651 	if_rele(ifp);
652 
653 	return (error);
654 }
655 
656 
657 static int
658 rtnl_handle_newlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
659 {
660 	struct nlattr_bmask bm;
661 	int error;
662 
663 	struct nl_parsed_link attrs = {};
664 	error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs);
665 	if (error != 0)
666 		return (error);
667 	nl_get_attrs_bmask_nlmsg(hdr, &ifmsg_parser, &bm);
668 
669 	if (hdr->nlmsg_flags & NLM_F_CREATE)
670 		return (create_link(hdr, &attrs, &bm, nlp, npt));
671 	else
672 		return (modify_link(hdr, &attrs, &bm, nlp, npt));
673 }
674 
675 /*
676 
677 {ifa_family=AF_INET, ifa_prefixlen=8, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_HOST, ifa_index=if_nametoindex("lo")},
678  [
679         {{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("127.0.0.1")},
680         {{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("127.0.0.1")},
681         {{nla_len=7, nla_type=IFA_LABEL}, "lo"},
682         {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT},
683         {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=3619, tstamp=3619}}]},
684 ---
685 
686 {{len=72, type=RTM_NEWADDR, flags=NLM_F_MULTI, seq=1642191126, pid=566735},
687  {ifa_family=AF_INET6, ifa_prefixlen=96, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_UNIVERSE, ifa_index=if_nametoindex("virbr0")},
688    [
689     {{nla_len=20, nla_type=IFA_ADDRESS}, inet_pton(AF_INET6, "2a01:4f8:13a:70c:ffff::1")},
690    {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=4283, tstamp=4283}},
691    {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}]},
692 */
693 
694 static uint8_t
695 ifa_get_scope(const struct ifaddr *ifa)
696 {
697         const struct sockaddr *sa;
698         uint8_t addr_scope = RT_SCOPE_UNIVERSE;
699 
700         sa = ifa->ifa_addr;
701         switch (sa->sa_family) {
702 #ifdef INET
703         case AF_INET:
704                 {
705                         struct in_addr addr;
706                         addr = ((const struct sockaddr_in *)sa)->sin_addr;
707                         if (IN_LOOPBACK(addr.s_addr))
708                                 addr_scope = RT_SCOPE_HOST;
709                         else if (IN_LINKLOCAL(addr.s_addr))
710                                 addr_scope = RT_SCOPE_LINK;
711                         break;
712                 }
713 #endif
714 #ifdef INET6
715         case AF_INET6:
716                 {
717                         const struct in6_addr *addr;
718                         addr = &((const struct sockaddr_in6 *)sa)->sin6_addr;
719                         if (IN6_IS_ADDR_LOOPBACK(addr))
720                                 addr_scope = RT_SCOPE_HOST;
721                         else if (IN6_IS_ADDR_LINKLOCAL(addr))
722                                 addr_scope = RT_SCOPE_LINK;
723                         break;
724                 }
725 #endif
726         }
727 
728         return (addr_scope);
729 }
730 
731 static uint8_t
732 inet6_get_plen(const struct in6_addr *addr)
733 {
734 
735 	return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) +
736 	    bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3]));
737 }
738 
739 static uint8_t
740 get_sa_plen(const struct sockaddr *sa)
741 {
742 #ifdef INET
743         const struct in_addr *paddr;
744 #endif
745 #ifdef INET6
746         const struct in6_addr *paddr6;
747 #endif
748 
749         switch (sa->sa_family) {
750 #ifdef INET
751         case AF_INET:
752                 if (sa == NULL)
753                         return (32);
754                 paddr = &(((const struct sockaddr_in *)sa)->sin_addr);
755                 return bitcount32(paddr->s_addr);;
756 #endif
757 #ifdef INET6
758         case AF_INET6:
759                 if (sa == NULL)
760                         return (128);
761                 paddr6 = &(((const struct sockaddr_in6 *)sa)->sin6_addr);
762                 return inet6_get_plen(paddr6);
763 #endif
764         }
765 
766         return (0);
767 }
768 
769 
770 /*
771  * {'attrs': [('IFA_ADDRESS', '12.0.0.1'),
772            ('IFA_LOCAL', '12.0.0.1'),
773            ('IFA_LABEL', 'eth10'),
774            ('IFA_FLAGS', 128),
775            ('IFA_CACHEINFO', {'ifa_preferred': 4294967295, 'ifa_valid': 4294967295, 'cstamp': 63745746, 'tstamp': 63745746})],
776  */
777 static bool
778 dump_iface_addr(struct nl_writer *nw, struct ifnet *ifp, struct ifaddr *ifa,
779     const struct nlmsghdr *hdr)
780 {
781         struct ifaddrmsg *ifamsg;
782         struct sockaddr *sa = ifa->ifa_addr;
783 
784         NL_LOG(LOG_DEBUG3, "dumping ifa %p type %s(%d) for interface %s",
785             ifa, rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
786 
787 	if (!nlmsg_reply(nw, hdr, sizeof(struct ifaddrmsg)))
788 		goto enomem;
789 
790         ifamsg = nlmsg_reserve_object(nw, struct ifaddrmsg);
791         ifamsg->ifa_family = sa->sa_family;
792         ifamsg->ifa_prefixlen = get_sa_plen(ifa->ifa_netmask);
793         ifamsg->ifa_flags = 0; // ifa_flags is useless
794         ifamsg->ifa_scope = ifa_get_scope(ifa);
795         ifamsg->ifa_index = ifp->if_index;
796 
797         struct sockaddr *dst_sa = ifa->ifa_dstaddr;
798         if ((dst_sa == NULL) || (dst_sa->sa_family != sa->sa_family))
799                 dst_sa = sa;
800         dump_sa(nw, IFA_ADDRESS, dst_sa);
801         dump_sa(nw, IFA_LOCAL, sa);
802         nlattr_add_string(nw, IFA_LABEL, if_name(ifp));
803 
804         uint32_t val = 0; // ifa->ifa_flags;
805         nlattr_add_u32(nw, IFA_FLAGS, val);
806 
807 	if (nlmsg_end(nw))
808 		return (true);
809 enomem:
810         NL_LOG(LOG_DEBUG, "Failed to dump ifa type %s(%d) for interface %s",
811             rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
812         nlmsg_abort(nw);
813         return (false);
814 }
815 
816 static int
817 rtnl_handle_getaddr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
818 {
819         struct ifaddr *ifa;
820         struct ifnet *ifp;
821 	int error = 0;
822 
823 	struct netlink_walkargs wa = {
824 		.so = nlp,
825 		.nw = npt->nw,
826 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
827 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
828 		.hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
829 		.hdr.nlmsg_type = NL_RTM_NEWADDR,
830 	};
831 
832 	NL_LOG(LOG_DEBUG2, "Start dump");
833 
834         CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
835                 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
836                         if (wa.family != 0 && wa.family != ifa->ifa_addr->sa_family)
837                                 continue;
838                         if (ifa->ifa_addr->sa_family == AF_LINK)
839                                 continue;
840 			wa.count++;
841                         if (!dump_iface_addr(wa.nw, ifp, ifa, &wa.hdr)) {
842                                 error = ENOMEM;
843                                 break;
844                         }
845 			wa.dumped++;
846                 }
847                 if (error != 0)
848                         break;
849         }
850 
851 	NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
852 
853 	if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) {
854                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
855                 return (ENOMEM);
856         }
857 
858 	return (error);
859 }
860 
861 static void
862 rtnl_handle_ifaddr(void *arg __unused, struct ifaddr *ifa, int cmd)
863 {
864 	struct nlmsghdr hdr = {};
865 	struct nl_writer nw = {};
866 	uint32_t group = 0;
867 
868 	switch (ifa->ifa_addr->sa_family) {
869 #ifdef INET
870 	case AF_INET:
871 		group = RTNLGRP_IPV4_IFADDR;
872 		break;
873 #endif
874 #ifdef INET6
875 	case AF_INET6:
876 		group = RTNLGRP_IPV6_IFADDR;
877 		break;
878 #endif
879 	default:
880 		NL_LOG(LOG_DEBUG2, "ifa notification for unknown AF: %d",
881 		    ifa->ifa_addr->sa_family);
882 		return;
883 	}
884 
885 	if (!nl_has_listeners(NETLINK_ROUTE, group))
886 		return;
887 
888 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, group)) {
889 		NL_LOG(LOG_DEBUG, "error allocating group writer");
890 		return;
891 	}
892 
893 	hdr.nlmsg_type = (cmd == RTM_DELETE) ? NL_RTM_DELADDR : NL_RTM_NEWADDR;
894 
895 	dump_iface_addr(&nw, ifa->ifa_ifp, ifa, &hdr);
896 	nlmsg_flush(&nw);
897 }
898 
899 static void
900 rtnl_handle_ifevent(struct ifnet *ifp, int nlmsg_type, int if_flags_mask)
901 {
902 	struct nlmsghdr hdr = { .nlmsg_type = nlmsg_type };
903 	struct nl_writer nw = {};
904 
905 	if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK))
906 		return;
907 
908 	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK)) {
909 		NL_LOG(LOG_DEBUG, "error allocating mbuf");
910 		return;
911 	}
912 	dump_iface(&nw, ifp, &hdr, if_flags_mask);
913         nlmsg_flush(&nw);
914 }
915 
916 static void
917 rtnl_handle_ifattach(void *arg, struct ifnet *ifp)
918 {
919 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
920 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0);
921 }
922 
923 static void
924 rtnl_handle_ifdetach(void *arg, struct ifnet *ifp)
925 {
926 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
927 	rtnl_handle_ifevent(ifp, NL_RTM_DELLINK, 0);
928 }
929 
930 static void
931 rtnl_handle_iflink(void *arg, struct ifnet *ifp)
932 {
933 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
934 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0);
935 }
936 
937 void
938 rtnl_handle_ifnet_event(struct ifnet *ifp, int if_flags_mask)
939 {
940 	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
941 	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, if_flags_mask);
942 }
943 
944 static const struct rtnl_cmd_handler cmd_handlers[] = {
945 	{
946 		.cmd = NL_RTM_GETLINK,
947 		.name = "RTM_GETLINK",
948 		.cb = &rtnl_handle_getlink,
949 		.flags = RTNL_F_NOEPOCH,
950 	},
951 	{
952 		.cmd = NL_RTM_DELLINK,
953 		.name = "RTM_DELLINK",
954 		.cb = &rtnl_handle_dellink,
955 		.priv = PRIV_NET_IFDESTROY,
956 		.flags = RTNL_F_NOEPOCH,
957 	},
958 	{
959 		.cmd = NL_RTM_NEWLINK,
960 		.name = "RTM_NEWLINK",
961 		.cb = &rtnl_handle_newlink,
962 		.priv = PRIV_NET_IFCREATE,
963 		.flags = RTNL_F_NOEPOCH,
964 	},
965 	{
966 		.cmd = NL_RTM_GETADDR,
967 		.name = "RTM_GETADDR",
968 		.cb = &rtnl_handle_getaddr,
969 	},
970 	{
971 		.cmd = NL_RTM_NEWADDR,
972 		.name = "RTM_NEWADDR",
973 		.cb = &rtnl_handle_getaddr,
974 	},
975 	{
976 		.cmd = NL_RTM_DELADDR,
977 		.name = "RTM_DELADDR",
978 		.cb = &rtnl_handle_getaddr,
979 	},
980 };
981 
982 static const struct nlhdr_parser *all_parsers[] = { &ifmsg_parser };
983 
984 void
985 rtnl_iface_add_cloner(struct nl_cloner *cloner)
986 {
987 	sx_xlock(&rtnl_cloner_lock);
988 	SLIST_INSERT_HEAD(&nl_cloners, cloner, next);
989 	sx_xunlock(&rtnl_cloner_lock);
990 }
991 
992 void
993 rtnl_iface_del_cloner(struct nl_cloner *cloner)
994 {
995 	sx_xlock(&rtnl_cloner_lock);
996 	SLIST_REMOVE(&nl_cloners, cloner, nl_cloner, next);
997 	sx_xunlock(&rtnl_cloner_lock);
998 }
999 
1000 static struct nl_cloner *
1001 rtnl_iface_find_cloner_locked(const char *name)
1002 {
1003 	struct nl_cloner *cloner;
1004 
1005 	SLIST_FOREACH(cloner, &nl_cloners, next) {
1006 		if (!strcmp(name, cloner->name))
1007 			return (cloner);
1008 	}
1009 
1010 	return (NULL);
1011 }
1012 
1013 void
1014 rtnl_ifaces_init(void)
1015 {
1016 	ifattach_event = EVENTHANDLER_REGISTER(
1017 	    ifnet_arrival_event, rtnl_handle_ifattach, NULL,
1018 	    EVENTHANDLER_PRI_ANY);
1019 	ifdetach_event = EVENTHANDLER_REGISTER(
1020 	    ifnet_departure_event, rtnl_handle_ifdetach, NULL,
1021 	    EVENTHANDLER_PRI_ANY);
1022 	ifaddr_event = EVENTHANDLER_REGISTER(
1023 	    rt_addrmsg, rtnl_handle_ifaddr, NULL,
1024 	    EVENTHANDLER_PRI_ANY);
1025 	iflink_event = EVENTHANDLER_REGISTER(
1026 	    ifnet_link_event, rtnl_handle_iflink, NULL,
1027 	    EVENTHANDLER_PRI_ANY);
1028 	NL_VERIFY_PARSERS(all_parsers);
1029 	rtnl_iface_drivers_register();
1030 	rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
1031 }
1032 
1033 void
1034 rtnl_ifaces_destroy(void)
1035 {
1036 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifattach_event);
1037 	EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_event);
1038 	EVENTHANDLER_DEREGISTER(rt_addrmsg, ifaddr_event);
1039 	EVENTHANDLER_DEREGISTER(ifnet_link_event, iflink_event);
1040 }
1041