xref: /freebsd/sys/compat/linux/linux_netlink.c (revision 942815c54820783d3d4f7f6faa71ab7919b5f0e5)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2022 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 
34 #include <sys/types.h>
35 #include <sys/ck.h>
36 #include <sys/lock.h>
37 #include <sys/malloc.h>
38 #include <sys/rmlock.h>
39 #include <sys/socket.h>
40 #include <sys/vnode.h>
41 
42 #include <net/if.h>
43 #include <net/if_dl.h>
44 #include <net/route.h>
45 #include <net/route/nhop.h>
46 #include <net/route/route_ctl.h>
47 #include <netlink/netlink.h>
48 #include <netlink/netlink_ctl.h>
49 #include <netlink/netlink_linux.h>
50 #include <netlink/netlink_route.h>
51 
52 #include <compat/linux/linux.h>
53 #include <compat/linux/linux_common.h>
54 #include <compat/linux/linux_util.h>
55 
56 #define	DEBUG_MOD_NAME	nl_linux
57 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
58 #include <netlink/netlink_debug.h>
59 _DECLARE_DEBUG(LOG_DEBUG);
60 
61 static bool
62 valid_rta_size(const struct rtattr *rta, int sz)
63 {
64 	return (NL_RTA_DATA_LEN(rta) == sz);
65 }
66 
67 static bool
68 valid_rta_u32(const struct rtattr *rta)
69 {
70 	return (valid_rta_size(rta, sizeof(uint32_t)));
71 }
72 
73 static uint32_t
74 _rta_get_uint32(const struct rtattr *rta)
75 {
76 	return (*((const uint32_t *)NL_RTA_DATA_CONST(rta)));
77 }
78 
79 static struct nlmsghdr *
80 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
81 {
82 	struct ndmsg *ndm = (struct ndmsg *)(hdr + 1);
83 
84 	if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ndmsg))
85 		ndm->ndm_family = linux_to_bsd_domain(ndm->ndm_family);
86 
87 	return (hdr);
88 }
89 
90 static struct nlmsghdr *
91 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
92 {
93 	struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1);
94 
95 	if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg))
96 		ifam->ifa_family = linux_to_bsd_domain(ifam->ifa_family);
97 
98 	return (hdr);
99 }
100 
101 static struct nlmsghdr *
102 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
103 {
104 	/* Tweak address families and default fib only */
105 	struct rtmsg *rtm = (struct rtmsg *)(hdr + 1);
106 	struct nlattr *nla, *nla_head;
107 	int attrs_len;
108 
109 	rtm->rtm_family = linux_to_bsd_domain(rtm->rtm_family);
110 
111 	if (rtm->rtm_table == 254)
112 		rtm->rtm_table = 0;
113 
114 	attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr);
115 	attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg));
116 	nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg)));
117 
118 	NLA_FOREACH(nla, nla_head, attrs_len) {
119 		RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d",
120 		    nla->nla_type, nla->nla_len, attrs_len);
121 		struct rtattr *rta = (struct rtattr *)nla;
122 		if (rta->rta_len < sizeof(struct rtattr)) {
123 			break;
124 		}
125 		switch (rta->rta_type) {
126 		case NL_RTA_TABLE:
127 			if (!valid_rta_u32(rta))
128 				goto done;
129 			rtm->rtm_table = 0;
130 			uint32_t fibnum = _rta_get_uint32(rta);
131 			RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum);
132 			if (fibnum == 254) {
133 				*((uint32_t *)NL_RTA_DATA(rta)) = 0;
134 			}
135 			break;
136 		}
137 	}
138 
139 done:
140 	return (hdr);
141 }
142 
143 static struct nlmsghdr *
144 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
145 {
146 	switch (hdr->nlmsg_type) {
147 	case NL_RTM_GETROUTE:
148 	case NL_RTM_NEWROUTE:
149 	case NL_RTM_DELROUTE:
150 		return (rtnl_route_from_linux(hdr, npt));
151 	case NL_RTM_GETNEIGH:
152 		return (rtnl_neigh_from_linux(hdr, npt));
153 	case NL_RTM_GETADDR:
154 		return (rtnl_ifaddr_from_linux(hdr, npt));
155 	/* Silence warning for the messages where no translation is required */
156 	case NL_RTM_NEWLINK:
157 	case NL_RTM_DELLINK:
158 	case NL_RTM_GETLINK:
159 		break;
160 	default:
161 		RT_LOG(LOG_DEBUG, "Passing message type %d untranslated",
162 		    hdr->nlmsg_type);
163 	}
164 
165 	return (hdr);
166 }
167 
168 static struct nlmsghdr *
169 nlmsg_from_linux(int netlink_family, struct nlmsghdr *hdr,
170     struct nl_pstate *npt)
171 {
172 	switch (netlink_family) {
173 	case NETLINK_ROUTE:
174 		return (rtnl_from_linux(hdr, npt));
175 	}
176 
177 	return (hdr);
178 }
179 
180 
181 /************************************************************
182  * Kernel -> Linux
183  ************************************************************/
184 
185 static bool
186 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw)
187 {
188 	char *out_hdr;
189 	out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char);
190 
191 	if (out_hdr != NULL) {
192 		memcpy(out_hdr, hdr, hdr->nlmsg_len);
193 		return (true);
194 	}
195 	return (false);
196 }
197 
198 static bool
199 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw)
200 {
201 	return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type,
202 	    hdr->nlmsg_flags, 0));
203 }
204 
205 static void *
206 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz)
207 {
208 	void *next_hdr = nlmsg_reserve_data(nw, sz, void);
209 	memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz));
210 
211 	return (next_hdr);
212 }
213 #define	nlmsg_copy_next_header(_hdr, _ns, _t)	\
214 	((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t))))
215 
216 static bool
217 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw)
218 {
219 	struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr);
220 	if (nla != NULL) {
221 		memcpy(nla, nla_orig, nla_orig->nla_len);
222 		return (true);
223 	}
224 	return (false);
225 }
226 
227 static bool
228 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw)
229 {
230 	struct nlattr *nla;
231 
232 	int hdrlen = NETLINK_ALIGN(raw_hdrlen);
233 	int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
234 	struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
235 
236 	NLA_FOREACH(nla, nla_head, attrs_len) {
237 		RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len);
238 		if (nla->nla_len < sizeof(struct nlattr)) {
239 			return (false);
240 		}
241 		if (!nlmsg_copy_nla(nla, nw))
242 			return (false);
243 	}
244 	return (true);
245 }
246 
247 static unsigned int
248 rtnl_if_flags_to_linux(unsigned int if_flags)
249 {
250 	unsigned int result = 0;
251 
252 	for (int i = 0; i < 31; i++) {
253 		unsigned int flag = 1 << i;
254 		if (!(flag & if_flags))
255 			continue;
256 		switch (flag) {
257 		case IFF_UP:
258 		case IFF_BROADCAST:
259 		case IFF_DEBUG:
260 		case IFF_LOOPBACK:
261 		case IFF_POINTOPOINT:
262 		case IFF_DRV_RUNNING:
263 		case IFF_NOARP:
264 		case IFF_PROMISC:
265 		case IFF_ALLMULTI:
266 			result |= flag;
267 			break;
268 		case IFF_KNOWSEPOCH:
269 		case IFF_DRV_OACTIVE:
270 		case IFF_SIMPLEX:
271 		case IFF_LINK0:
272 		case IFF_LINK1:
273 		case IFF_LINK2:
274 		case IFF_CANTCONFIG:
275 		case IFF_PPROMISC:
276 		case IFF_MONITOR:
277 		case IFF_STATICARP:
278 		case IFF_STICKYARP:
279 		case IFF_DYING:
280 		case IFF_RENAMING:
281 		case IFF_NOGROUP:
282 			/* No Linux analogue */
283 			break;
284 		case IFF_MULTICAST:
285 			result |= 1 << 12;
286 		}
287 	}
288 	return (result);
289 }
290 
291 static bool
292 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
293     struct nl_writer *nw)
294 {
295 	if (!nlmsg_copy_header(hdr, nw))
296 		return (false);
297 
298 	struct ifinfomsg *ifinfo;
299 	ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg);
300 
301 	ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family);
302 	/* Convert interface type */
303 	switch (ifinfo->ifi_type) {
304 	case IFT_ETHER:
305 		ifinfo->ifi_type = 1; // ARPHRD_ETHER
306 		break;
307 	}
308 	ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags);
309 
310 	/* Copy attributes unchanged */
311 	if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw))
312 		return (false);
313 
314 	/* make ip(8) happy */
315 	if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue"))
316 		return (false);
317 
318 	if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000))
319 		return (false);
320 
321 	nlmsg_end(nw);
322 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
323 	return (true);
324 }
325 
326 static bool
327 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
328     struct nl_writer *nw)
329 {
330 	if (!nlmsg_copy_header(hdr, nw))
331 		return (false);
332 
333 	struct ifaddrmsg *ifamsg;
334 	ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg);
335 
336 	ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family);
337 	/* XXX: fake ifa_flags? */
338 
339 	/* Copy attributes unchanged */
340 	if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw))
341 		return (false);
342 
343 	nlmsg_end(nw);
344 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
345 	return (true);
346 }
347 
348 static bool
349 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
350     struct nl_writer *nw)
351 {
352 	if (!nlmsg_copy_header(hdr, nw))
353 		return (false);
354 
355 	struct ndmsg *ndm;
356 	ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg);
357 
358 	ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family);
359 
360 	/* Copy attributes unchanged */
361 	if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw))
362 		return (false);
363 
364 	nlmsg_end(nw);
365 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
366 	return (true);
367 }
368 
369 static bool
370 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
371     struct nl_writer *nw)
372 {
373 	if (!nlmsg_copy_header(hdr, nw))
374 		return (false);
375 
376 	struct rtmsg *rtm;
377 	rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg);
378 	rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family);
379 
380 	struct nlattr *nla;
381 
382 	int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg));
383 	int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
384 	struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
385 
386 	NLA_FOREACH(nla, nla_head, attrs_len) {
387 		struct rtattr *rta = (struct rtattr *)nla;
388 		//RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len);
389 		if (rta->rta_len < sizeof(struct rtattr)) {
390 			break;
391 		}
392 
393 		switch (rta->rta_type) {
394 		case NL_RTA_TABLE:
395 			{
396 				uint32_t fibnum;
397 				fibnum = _rta_get_uint32(rta);
398 				if (fibnum == 0)
399 					fibnum = 254;
400 				RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum);
401 				if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum))
402 					return (false);
403 			}
404 			break;
405 		default:
406 			if (!nlmsg_copy_nla(nla, nw))
407 				return (false);
408 			break;
409 		}
410 	}
411 
412 	nlmsg_end(nw);
413 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
414 	return (true);
415 }
416 
417 static bool
418 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
419 {
420 	RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type);
421 
422 	switch (hdr->nlmsg_type) {
423 	case NL_RTM_NEWLINK:
424 	case NL_RTM_DELLINK:
425 	case NL_RTM_GETLINK:
426 		return (rtnl_newlink_to_linux(hdr, nlp, nw));
427 	case NL_RTM_NEWADDR:
428 	case NL_RTM_DELADDR:
429 		return (rtnl_newaddr_to_linux(hdr, nlp, nw));
430 	case NL_RTM_NEWROUTE:
431 	case NL_RTM_DELROUTE:
432 		return (rtnl_newroute_to_linux(hdr, nlp, nw));
433 	case NL_RTM_NEWNEIGH:
434 	case NL_RTM_DELNEIGH:
435 	case NL_RTM_GETNEIGH:
436 		return (rtnl_newneigh_to_linux(hdr, nlp, nw));
437 	default:
438 		RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
439 		    hdr->nlmsg_type);
440 		return (handle_default_out(hdr, nw));
441 	}
442 }
443 
444 static bool
445 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
446 {
447 	if (!nlmsg_copy_header(hdr, nw))
448 		return (false);
449 
450 	struct nlmsgerr *nlerr;
451 	nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr);
452 	nlerr->error = bsd_to_linux_errno(nlerr->error);
453 
454 	int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr);
455 	if (hdr->nlmsg_len == copied_len) {
456 		nlmsg_end(nw);
457 		return (true);
458 	}
459 
460 	/*
461 	 * CAP_ACK was not set. Original request needs to be translated.
462 	 * XXX: implement translation of the original message
463 	 */
464 	RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated",
465 	    nlerr->msg.nlmsg_type);
466 	char *dst_payload, *src_payload;
467 	int copy_len = hdr->nlmsg_len - copied_len;
468 	dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char);
469 
470 	src_payload = (char *)hdr + copied_len;
471 
472 	memcpy(dst_payload, src_payload, copy_len);
473 	nlmsg_end(nw);
474 
475 	return (true);
476 }
477 
478 static bool
479 nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp,
480     struct nl_writer *nw)
481 {
482 	if (hdr->nlmsg_type < NLMSG_MIN_TYPE) {
483 		switch (hdr->nlmsg_type) {
484 		case NLMSG_ERROR:
485 			return (nlmsg_error_to_linux(hdr, nlp, nw));
486 		case NLMSG_NOOP:
487 		case NLMSG_DONE:
488 		case NLMSG_OVERRUN:
489 			return (handle_default_out(hdr, nw));
490 		default:
491 			RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
492 			    hdr->nlmsg_type);
493 			return (handle_default_out(hdr, nw));
494 		}
495 	}
496 
497 	switch (netlink_family) {
498 	case NETLINK_ROUTE:
499 		return (rtnl_to_linux(hdr, nlp, nw));
500 	default:
501 		return (handle_default_out(hdr, nw));
502 	}
503 }
504 
505 static struct mbuf *
506 nlmsgs_to_linux(int netlink_family, char *buf, int data_length, struct nlpcb *nlp)
507 {
508 	RT_LOG(LOG_DEBUG3, "LINUX: get %p size %d", buf, data_length);
509 	struct nl_writer nw = {};
510 
511 	struct mbuf *m = NULL;
512 	if (!nlmsg_get_chain_writer(&nw, data_length, &m)) {
513 		RT_LOG(LOG_DEBUG, "unable to setup chain writer for size %d",
514 		    data_length);
515 		return (NULL);
516 	}
517 
518 	/* Assume correct headers. Buffer IS mutable */
519 	int count = 0;
520 	for (int offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) {
521 		struct nlmsghdr *hdr = (struct nlmsghdr *)&buf[offset];
522 		int msglen = NLMSG_ALIGN(hdr->nlmsg_len);
523 		count++;
524 
525 		if (!nlmsg_to_linux(netlink_family, hdr, nlp, &nw)) {
526 			RT_LOG(LOG_DEBUG, "failed to process msg type %d",
527 			    hdr->nlmsg_type);
528 			m_freem(m);
529 			return (NULL);
530 		}
531 		offset += msglen;
532 	}
533 	nlmsg_flush(&nw);
534 	RT_LOG(LOG_DEBUG3, "Processed %d messages, chain size %d", count,
535 	    m ? m_length(m, NULL) : 0);
536 
537 	return (m);
538 }
539 
540 static struct mbuf *
541 mbufs_to_linux(int netlink_family, struct mbuf *m, struct nlpcb *nlp)
542 {
543 	/* XXX: easiest solution, not optimized for performance */
544 	int data_length = m_length(m, NULL);
545 	char *buf = malloc(data_length, M_LINUX, M_NOWAIT);
546 	if (buf == NULL) {
547 		RT_LOG(LOG_DEBUG, "unable to allocate %d bytes, dropping message",
548 		    data_length);
549 		m_freem(m);
550 		return (NULL);
551 	}
552 	m_copydata(m, 0, data_length, buf);
553 	m_freem(m);
554 
555 	m = nlmsgs_to_linux(netlink_family, buf, data_length, nlp);
556 	free(buf, M_LINUX);
557 
558 	return (m);
559 }
560 
561 static struct linux_netlink_provider linux_netlink_v1 = {
562 	.mbufs_to_linux = mbufs_to_linux,
563 	.msgs_to_linux = nlmsgs_to_linux,
564 	.msg_from_linux = nlmsg_from_linux,
565 };
566 
567 void
568 linux_netlink_register(void)
569 {
570 	linux_netlink_p = &linux_netlink_v1;
571 }
572 
573 void
574 linux_netlink_deregister(void)
575 {
576 	linux_netlink_p = NULL;
577 }
578