xref: /freebsd/sys/compat/linux/linux_netlink.c (revision ec965063070e5753c166cf592c9336444b74720a)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2022 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet.h"
31 #include "opt_inet6.h"
32 #include <sys/types.h>
33 #include <sys/malloc.h>
34 #include <sys/rmlock.h>
35 #include <sys/socket.h>
36 #include <sys/ck.h>
37 
38 #include <net/if.h>
39 #include <net/if_dl.h>
40 #include <net/route.h>
41 #include <net/route/nhop.h>
42 #include <net/route/route_ctl.h>
43 #include <netlink/netlink.h>
44 #include <netlink/netlink_ctl.h>
45 #include <netlink/netlink_linux.h>
46 #include <netlink/netlink_route.h>
47 
48 #include <compat/linux/linux.h>
49 #include <compat/linux/linux_common.h>
50 #include <compat/linux/linux_util.h>
51 
52 #define	DEBUG_MOD_NAME	nl_linux
53 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
54 #include <netlink/netlink_debug.h>
55 _DECLARE_DEBUG(LOG_DEBUG);
56 
57 static bool
58 valid_rta_size(const struct rtattr *rta, int sz)
59 {
60 	return (NL_RTA_DATA_LEN(rta) == sz);
61 }
62 
63 static bool
64 valid_rta_u32(const struct rtattr *rta)
65 {
66 	return (valid_rta_size(rta, sizeof(uint32_t)));
67 }
68 
69 static uint32_t
70 _rta_get_uint32(const struct rtattr *rta)
71 {
72 	return (*((const uint32_t *)NL_RTA_DATA_CONST(rta)));
73 }
74 
75 static struct nlmsghdr *
76 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
77 {
78 	struct ndmsg *ndm = (struct ndmsg *)(hdr + 1);
79 
80 	if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ndmsg))
81 		ndm->ndm_family = linux_to_bsd_domain(ndm->ndm_family);
82 
83 	return (hdr);
84 }
85 
86 static struct nlmsghdr *
87 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
88 {
89 	struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1);
90 
91 	if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg))
92 		ifam->ifa_family = linux_to_bsd_domain(ifam->ifa_family);
93 
94 	return (hdr);
95 }
96 
97 static struct nlmsghdr *
98 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
99 {
100 	/* Tweak address families and default fib only */
101 	struct rtmsg *rtm = (struct rtmsg *)(hdr + 1);
102 	struct nlattr *nla, *nla_head;
103 	int attrs_len;
104 
105 	rtm->rtm_family = linux_to_bsd_domain(rtm->rtm_family);
106 
107 	if (rtm->rtm_table == 254)
108 		rtm->rtm_table = 0;
109 
110 	attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr);
111 	attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg));
112 	nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg)));
113 
114 	NLA_FOREACH(nla, nla_head, attrs_len) {
115 		RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d",
116 		    nla->nla_type, nla->nla_len, attrs_len);
117 		struct rtattr *rta = (struct rtattr *)nla;
118 		if (rta->rta_len < sizeof(struct rtattr)) {
119 			break;
120 		}
121 		switch (rta->rta_type) {
122 		case NL_RTA_TABLE:
123 			if (!valid_rta_u32(rta))
124 				goto done;
125 			rtm->rtm_table = 0;
126 			uint32_t fibnum = _rta_get_uint32(rta);
127 			RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum);
128 			if (fibnum == 254) {
129 				*((uint32_t *)NL_RTA_DATA(rta)) = 0;
130 			}
131 			break;
132 		}
133 	}
134 
135 done:
136 	return (hdr);
137 }
138 
139 static struct nlmsghdr *
140 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
141 {
142 	switch (hdr->nlmsg_type) {
143 	case NL_RTM_GETROUTE:
144 	case NL_RTM_NEWROUTE:
145 	case NL_RTM_DELROUTE:
146 		return (rtnl_route_from_linux(hdr, npt));
147 	case NL_RTM_GETNEIGH:
148 		return (rtnl_neigh_from_linux(hdr, npt));
149 	case NL_RTM_GETADDR:
150 		return (rtnl_ifaddr_from_linux(hdr, npt));
151 	/* Silence warning for the messages where no translation is required */
152 	case NL_RTM_NEWLINK:
153 	case NL_RTM_DELLINK:
154 	case NL_RTM_GETLINK:
155 		break;
156 	default:
157 		RT_LOG(LOG_DEBUG, "Passing message type %d untranslated",
158 		    hdr->nlmsg_type);
159 	}
160 
161 	return (hdr);
162 }
163 
164 static struct nlmsghdr *
165 nlmsg_from_linux(int netlink_family, struct nlmsghdr *hdr,
166     struct nl_pstate *npt)
167 {
168 	switch (netlink_family) {
169 	case NETLINK_ROUTE:
170 		return (rtnl_from_linux(hdr, npt));
171 	}
172 
173 	return (hdr);
174 }
175 
176 
177 /************************************************************
178  * Kernel -> Linux
179  ************************************************************/
180 
181 static bool
182 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw)
183 {
184 	char *out_hdr;
185 	out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char);
186 
187 	if (out_hdr != NULL) {
188 		memcpy(out_hdr, hdr, hdr->nlmsg_len);
189 		return (true);
190 	}
191 	return (false);
192 }
193 
194 static bool
195 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw)
196 {
197 	return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type,
198 	    hdr->nlmsg_flags, 0));
199 }
200 
201 static void *
202 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz)
203 {
204 	void *next_hdr = nlmsg_reserve_data(nw, sz, void);
205 	memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz));
206 
207 	return (next_hdr);
208 }
209 #define	nlmsg_copy_next_header(_hdr, _ns, _t)	\
210 	((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t))))
211 
212 static bool
213 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw)
214 {
215 	struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr);
216 	if (nla != NULL) {
217 		memcpy(nla, nla_orig, nla_orig->nla_len);
218 		return (true);
219 	}
220 	return (false);
221 }
222 
223 static bool
224 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw)
225 {
226 	struct nlattr *nla;
227 
228 	int hdrlen = NETLINK_ALIGN(raw_hdrlen);
229 	int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
230 	struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
231 
232 	NLA_FOREACH(nla, nla_head, attrs_len) {
233 		RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len);
234 		if (nla->nla_len < sizeof(struct nlattr)) {
235 			return (false);
236 		}
237 		if (!nlmsg_copy_nla(nla, nw))
238 			return (false);
239 	}
240 	return (true);
241 }
242 
243 static unsigned int
244 rtnl_if_flags_to_linux(unsigned int if_flags)
245 {
246 	unsigned int result = 0;
247 
248 	for (int i = 0; i < 31; i++) {
249 		unsigned int flag = 1 << i;
250 		if (!(flag & if_flags))
251 			continue;
252 		switch (flag) {
253 		case IFF_UP:
254 		case IFF_BROADCAST:
255 		case IFF_DEBUG:
256 		case IFF_LOOPBACK:
257 		case IFF_POINTOPOINT:
258 		case IFF_DRV_RUNNING:
259 		case IFF_NOARP:
260 		case IFF_PROMISC:
261 		case IFF_ALLMULTI:
262 			result |= flag;
263 			break;
264 		case IFF_KNOWSEPOCH:
265 		case IFF_DRV_OACTIVE:
266 		case IFF_SIMPLEX:
267 		case IFF_LINK0:
268 		case IFF_LINK1:
269 		case IFF_LINK2:
270 		case IFF_CANTCONFIG:
271 		case IFF_PPROMISC:
272 		case IFF_MONITOR:
273 		case IFF_STATICARP:
274 		case IFF_STICKYARP:
275 		case IFF_DYING:
276 		case IFF_RENAMING:
277 		case IFF_NOGROUP:
278 			/* No Linux analogue */
279 			break;
280 		case IFF_MULTICAST:
281 			result |= 1 << 12;
282 		}
283 	}
284 	return (result);
285 }
286 
287 static bool
288 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
289     struct nl_writer *nw)
290 {
291 	if (!nlmsg_copy_header(hdr, nw))
292 		return (false);
293 
294 	struct ifinfomsg *ifinfo;
295 	ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg);
296 
297 	ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family);
298 	/* Convert interface type */
299 	switch (ifinfo->ifi_type) {
300 	case IFT_ETHER:
301 		ifinfo->ifi_type = 1; // ARPHRD_ETHER
302 		break;
303 	}
304 	ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags);
305 
306 	/* Copy attributes unchanged */
307 	if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw))
308 		return (false);
309 
310 	/* make ip(8) happy */
311 	if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue"))
312 		return (false);
313 
314 	if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000))
315 		return (false);
316 
317 	nlmsg_end(nw);
318 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
319 	return (true);
320 }
321 
322 static bool
323 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
324     struct nl_writer *nw)
325 {
326 	if (!nlmsg_copy_header(hdr, nw))
327 		return (false);
328 
329 	struct ifaddrmsg *ifamsg;
330 	ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg);
331 
332 	ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family);
333 	/* XXX: fake ifa_flags? */
334 
335 	/* Copy attributes unchanged */
336 	if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw))
337 		return (false);
338 
339 	nlmsg_end(nw);
340 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
341 	return (true);
342 }
343 
344 static bool
345 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
346     struct nl_writer *nw)
347 {
348 	if (!nlmsg_copy_header(hdr, nw))
349 		return (false);
350 
351 	struct ndmsg *ndm;
352 	ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg);
353 
354 	ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family);
355 
356 	/* Copy attributes unchanged */
357 	if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw))
358 		return (false);
359 
360 	nlmsg_end(nw);
361 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
362 	return (true);
363 }
364 
365 static bool
366 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
367     struct nl_writer *nw)
368 {
369 	if (!nlmsg_copy_header(hdr, nw))
370 		return (false);
371 
372 	struct rtmsg *rtm;
373 	rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg);
374 	rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family);
375 
376 	struct nlattr *nla;
377 
378 	int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg));
379 	int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
380 	struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
381 
382 	NLA_FOREACH(nla, nla_head, attrs_len) {
383 		struct rtattr *rta = (struct rtattr *)nla;
384 		//RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len);
385 		if (rta->rta_len < sizeof(struct rtattr)) {
386 			break;
387 		}
388 
389 		switch (rta->rta_type) {
390 		case NL_RTA_TABLE:
391 			{
392 				uint32_t fibnum;
393 				fibnum = _rta_get_uint32(rta);
394 				if (fibnum == 0)
395 					fibnum = 254;
396 				RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum);
397 				if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum))
398 					return (false);
399 			}
400 			break;
401 		default:
402 			if (!nlmsg_copy_nla(nla, nw))
403 				return (false);
404 			break;
405 		}
406 	}
407 
408 	nlmsg_end(nw);
409 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
410 	return (true);
411 }
412 
413 static bool
414 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
415 {
416 	RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type);
417 
418 	switch (hdr->nlmsg_type) {
419 	case NL_RTM_NEWLINK:
420 	case NL_RTM_DELLINK:
421 	case NL_RTM_GETLINK:
422 		return (rtnl_newlink_to_linux(hdr, nlp, nw));
423 	case NL_RTM_NEWADDR:
424 	case NL_RTM_DELADDR:
425 		return (rtnl_newaddr_to_linux(hdr, nlp, nw));
426 	case NL_RTM_NEWROUTE:
427 	case NL_RTM_DELROUTE:
428 		return (rtnl_newroute_to_linux(hdr, nlp, nw));
429 	case NL_RTM_NEWNEIGH:
430 	case NL_RTM_DELNEIGH:
431 	case NL_RTM_GETNEIGH:
432 		return (rtnl_newneigh_to_linux(hdr, nlp, nw));
433 	default:
434 		RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
435 		    hdr->nlmsg_type);
436 		return (handle_default_out(hdr, nw));
437 	}
438 }
439 
440 static bool
441 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
442 {
443 	if (!nlmsg_copy_header(hdr, nw))
444 		return (false);
445 
446 	struct nlmsgerr *nlerr;
447 	nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr);
448 	nlerr->error = bsd_to_linux_errno(nlerr->error);
449 
450 	int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr);
451 	if (hdr->nlmsg_len == copied_len) {
452 		nlmsg_end(nw);
453 		return (true);
454 	}
455 
456 	/*
457 	 * CAP_ACK was not set. Original request needs to be translated.
458 	 * XXX: implement translation of the original message
459 	 */
460 	RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated",
461 	    nlerr->msg.nlmsg_type);
462 	char *dst_payload, *src_payload;
463 	int copy_len = hdr->nlmsg_len - copied_len;
464 	dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char);
465 
466 	src_payload = (char *)hdr + copied_len;
467 
468 	memcpy(dst_payload, src_payload, copy_len);
469 	nlmsg_end(nw);
470 
471 	return (true);
472 }
473 
474 static bool
475 nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp,
476     struct nl_writer *nw)
477 {
478 	if (hdr->nlmsg_type < NLMSG_MIN_TYPE) {
479 		switch (hdr->nlmsg_type) {
480 		case NLMSG_ERROR:
481 			return (nlmsg_error_to_linux(hdr, nlp, nw));
482 		case NLMSG_NOOP:
483 		case NLMSG_DONE:
484 		case NLMSG_OVERRUN:
485 			return (handle_default_out(hdr, nw));
486 		default:
487 			RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
488 			    hdr->nlmsg_type);
489 			return (handle_default_out(hdr, nw));
490 		}
491 	}
492 
493 	switch (netlink_family) {
494 	case NETLINK_ROUTE:
495 		return (rtnl_to_linux(hdr, nlp, nw));
496 	default:
497 		return (handle_default_out(hdr, nw));
498 	}
499 }
500 
501 static struct mbuf *
502 nlmsgs_to_linux(int netlink_family, char *buf, int data_length, struct nlpcb *nlp)
503 {
504 	RT_LOG(LOG_DEBUG3, "LINUX: get %p size %d", buf, data_length);
505 	struct nl_writer nw = {};
506 
507 	struct mbuf *m = NULL;
508 	if (!nlmsg_get_chain_writer(&nw, data_length, &m)) {
509 		RT_LOG(LOG_DEBUG, "unable to setup chain writer for size %d",
510 		    data_length);
511 		return (NULL);
512 	}
513 
514 	/* Assume correct headers. Buffer IS mutable */
515 	int count = 0;
516 	for (int offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) {
517 		struct nlmsghdr *hdr = (struct nlmsghdr *)&buf[offset];
518 		int msglen = NLMSG_ALIGN(hdr->nlmsg_len);
519 		count++;
520 
521 		if (!nlmsg_to_linux(netlink_family, hdr, nlp, &nw)) {
522 			RT_LOG(LOG_DEBUG, "failed to process msg type %d",
523 			    hdr->nlmsg_type);
524 			m_freem(m);
525 			return (NULL);
526 		}
527 		offset += msglen;
528 	}
529 	nlmsg_flush(&nw);
530 	RT_LOG(LOG_DEBUG3, "Processed %d messages, chain size %d", count,
531 	    m ? m_length(m, NULL) : 0);
532 
533 	return (m);
534 }
535 
536 static struct mbuf *
537 mbufs_to_linux(int netlink_family, struct mbuf *m, struct nlpcb *nlp)
538 {
539 	/* XXX: easiest solution, not optimized for performance */
540 	int data_length = m_length(m, NULL);
541 	char *buf = malloc(data_length, M_LINUX, M_NOWAIT);
542 	if (buf == NULL) {
543 		RT_LOG(LOG_DEBUG, "unable to allocate %d bytes, dropping message",
544 		    data_length);
545 		m_freem(m);
546 		return (NULL);
547 	}
548 	m_copydata(m, 0, data_length, buf);
549 	m_freem(m);
550 
551 	m = nlmsgs_to_linux(netlink_family, buf, data_length, nlp);
552 	free(buf, M_LINUX);
553 
554 	return (m);
555 }
556 
557 static struct linux_netlink_provider linux_netlink_v1 = {
558 	.mbufs_to_linux = mbufs_to_linux,
559 	.msgs_to_linux = nlmsgs_to_linux,
560 	.msg_from_linux = nlmsg_from_linux,
561 };
562 
563 void
564 linux_netlink_register(void)
565 {
566 	linux_netlink_p = &linux_netlink_v1;
567 }
568 
569 void
570 linux_netlink_deregister(void)
571 {
572 	linux_netlink_p = NULL;
573 }
574