xref: /freebsd/sys/compat/linux/linux_netlink.c (revision 2e3507c25e42292b45a5482e116d278f5515d04d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2022 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include "opt_inet.h"
29 #include "opt_inet6.h"
30 
31 #include <sys/types.h>
32 #include <sys/ck.h>
33 #include <sys/lock.h>
34 #include <sys/malloc.h>
35 #include <sys/socket.h>
36 #include <sys/vnode.h>
37 
38 #include <net/if.h>
39 #include <net/if_dl.h>
40 #include <net/route.h>
41 #include <net/route/nhop.h>
42 #include <net/route/route_ctl.h>
43 #include <netlink/netlink.h>
44 #include <netlink/netlink_ctl.h>
45 #include <netlink/netlink_linux.h>
46 #include <netlink/netlink_var.h>
47 #include <netlink/netlink_route.h>
48 
49 #include <compat/linux/linux.h>
50 #include <compat/linux/linux_common.h>
51 #include <compat/linux/linux_util.h>
52 
53 #define	DEBUG_MOD_NAME	nl_linux
54 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
55 #include <netlink/netlink_debug.h>
56 _DECLARE_DEBUG(LOG_INFO);
57 
58 static bool
59 valid_rta_size(const struct rtattr *rta, int sz)
60 {
61 	return (NL_RTA_DATA_LEN(rta) == sz);
62 }
63 
64 static bool
65 valid_rta_u32(const struct rtattr *rta)
66 {
67 	return (valid_rta_size(rta, sizeof(uint32_t)));
68 }
69 
70 static uint32_t
71 _rta_get_uint32(const struct rtattr *rta)
72 {
73 	return (*((const uint32_t *)NL_RTA_DATA_CONST(rta)));
74 }
75 
76 static struct nlmsghdr *
77 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
78 {
79 	struct ndmsg *ndm = (struct ndmsg *)(hdr + 1);
80 
81 	if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ndmsg))
82 		ndm->ndm_family = linux_to_bsd_domain(ndm->ndm_family);
83 
84 	return (hdr);
85 }
86 
87 static struct nlmsghdr *
88 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
89 {
90 	struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1);
91 
92 	if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg))
93 		ifam->ifa_family = linux_to_bsd_domain(ifam->ifa_family);
94 
95 	return (hdr);
96 }
97 
98 static struct nlmsghdr *
99 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
100 {
101 	/* Tweak address families and default fib only */
102 	struct rtmsg *rtm = (struct rtmsg *)(hdr + 1);
103 	struct nlattr *nla, *nla_head;
104 	int attrs_len;
105 
106 	rtm->rtm_family = linux_to_bsd_domain(rtm->rtm_family);
107 
108 	if (rtm->rtm_table == 254)
109 		rtm->rtm_table = 0;
110 
111 	attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr);
112 	attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg));
113 	nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg)));
114 
115 	NLA_FOREACH(nla, nla_head, attrs_len) {
116 		RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d",
117 		    nla->nla_type, nla->nla_len, attrs_len);
118 		struct rtattr *rta = (struct rtattr *)nla;
119 		if (rta->rta_len < sizeof(struct rtattr)) {
120 			break;
121 		}
122 		switch (rta->rta_type) {
123 		case NL_RTA_TABLE:
124 			if (!valid_rta_u32(rta))
125 				goto done;
126 			rtm->rtm_table = 0;
127 			uint32_t fibnum = _rta_get_uint32(rta);
128 			RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum);
129 			if (fibnum == 254) {
130 				*((uint32_t *)NL_RTA_DATA(rta)) = 0;
131 			}
132 			break;
133 		}
134 	}
135 
136 done:
137 	return (hdr);
138 }
139 
140 static struct nlmsghdr *
141 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
142 {
143 	switch (hdr->nlmsg_type) {
144 	case NL_RTM_GETROUTE:
145 	case NL_RTM_NEWROUTE:
146 	case NL_RTM_DELROUTE:
147 		return (rtnl_route_from_linux(hdr, npt));
148 	case NL_RTM_GETNEIGH:
149 		return (rtnl_neigh_from_linux(hdr, npt));
150 	case NL_RTM_GETADDR:
151 		return (rtnl_ifaddr_from_linux(hdr, npt));
152 	/* Silence warning for the messages where no translation is required */
153 	case NL_RTM_NEWLINK:
154 	case NL_RTM_DELLINK:
155 	case NL_RTM_GETLINK:
156 		break;
157 	default:
158 		RT_LOG(LOG_DEBUG, "Passing message type %d untranslated",
159 		    hdr->nlmsg_type);
160 	}
161 
162 	return (hdr);
163 }
164 
165 static struct nlmsghdr *
166 nlmsg_from_linux(int netlink_family, struct nlmsghdr *hdr,
167     struct nl_pstate *npt)
168 {
169 	switch (netlink_family) {
170 	case NETLINK_ROUTE:
171 		return (rtnl_from_linux(hdr, npt));
172 	}
173 
174 	return (hdr);
175 }
176 
177 
178 /************************************************************
179  * Kernel -> Linux
180  ************************************************************/
181 
182 static bool
183 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw)
184 {
185 	char *out_hdr;
186 	out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char);
187 
188 	if (out_hdr != NULL) {
189 		memcpy(out_hdr, hdr, hdr->nlmsg_len);
190 		nw->num_messages++;
191 		return (true);
192 	}
193 	return (false);
194 }
195 
196 static bool
197 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw)
198 {
199 	return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type,
200 	    hdr->nlmsg_flags, 0));
201 }
202 
203 static void *
204 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz)
205 {
206 	void *next_hdr = nlmsg_reserve_data(nw, sz, void);
207 	memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz));
208 
209 	return (next_hdr);
210 }
211 #define	nlmsg_copy_next_header(_hdr, _ns, _t)	\
212 	((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t))))
213 
214 static bool
215 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw)
216 {
217 	struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr);
218 	if (nla != NULL) {
219 		memcpy(nla, nla_orig, nla_orig->nla_len);
220 		return (true);
221 	}
222 	return (false);
223 }
224 
225 /*
226  * Translate a FreeBSD interface name to a Linux interface name.
227  */
228 static bool
229 nlmsg_translate_ifname_nla(struct nlattr *nla, struct nl_writer *nw)
230 {
231 	char ifname[LINUX_IFNAMSIZ];
232 
233 	if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname,
234 	    sizeof(ifname)) <= 0)
235 		return (false);
236 	return (nlattr_add_string(nw, IFLA_IFNAME, ifname));
237 }
238 
239 #define	LINUX_NLA_UNHANDLED	-1
240 /*
241  * Translate a FreeBSD attribute to a Linux attribute.
242  * Returns LINUX_NLA_UNHANDLED when the attribute is not processed
243  * and the caller must take care of it, otherwise the result is returned.
244  */
245 static int
246 nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla,
247     struct nl_writer *nw)
248 {
249 
250 	switch (hdr->nlmsg_type) {
251 	case NL_RTM_NEWLINK:
252 	case NL_RTM_DELLINK:
253 	case NL_RTM_GETLINK:
254 		switch (nla->nla_type) {
255 		case IFLA_IFNAME:
256 			return (nlmsg_translate_ifname_nla(nla, nw));
257 		default:
258 			break;
259 		}
260 	default:
261 		break;
262 	}
263 	return (LINUX_NLA_UNHANDLED);
264 }
265 
266 static bool
267 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw)
268 {
269 	struct nlattr *nla;
270 	int ret;
271 
272 	int hdrlen = NETLINK_ALIGN(raw_hdrlen);
273 	int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
274 	struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
275 
276 	NLA_FOREACH(nla, nla_head, attrs_len) {
277 		RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len);
278 		if (nla->nla_len < sizeof(struct nlattr)) {
279 			return (false);
280 		}
281 		ret = nlmsg_translate_all_nla(hdr, nla, nw);
282 		if (ret == LINUX_NLA_UNHANDLED)
283 			ret = nlmsg_copy_nla(nla, nw);
284 		if (!ret)
285 			return (false);
286 	}
287 	return (true);
288 }
289 #undef LINUX_NLA_UNHANDLED
290 
291 static unsigned int
292 rtnl_if_flags_to_linux(unsigned int if_flags)
293 {
294 	unsigned int result = 0;
295 
296 	for (int i = 0; i < 31; i++) {
297 		unsigned int flag = 1 << i;
298 		if (!(flag & if_flags))
299 			continue;
300 		switch (flag) {
301 		case IFF_UP:
302 		case IFF_BROADCAST:
303 		case IFF_DEBUG:
304 		case IFF_LOOPBACK:
305 		case IFF_POINTOPOINT:
306 		case IFF_DRV_RUNNING:
307 		case IFF_NOARP:
308 		case IFF_PROMISC:
309 		case IFF_ALLMULTI:
310 			result |= flag;
311 			break;
312 		case IFF_NEEDSEPOCH:
313 		case IFF_DRV_OACTIVE:
314 		case IFF_SIMPLEX:
315 		case IFF_LINK0:
316 		case IFF_LINK1:
317 		case IFF_LINK2:
318 		case IFF_CANTCONFIG:
319 		case IFF_PPROMISC:
320 		case IFF_MONITOR:
321 		case IFF_STATICARP:
322 		case IFF_STICKYARP:
323 		case IFF_DYING:
324 		case IFF_RENAMING:
325 			/* No Linux analogue */
326 			break;
327 		case IFF_MULTICAST:
328 			result |= 1 << 12;
329 		}
330 	}
331 	return (result);
332 }
333 
334 static bool
335 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
336     struct nl_writer *nw)
337 {
338 	if (!nlmsg_copy_header(hdr, nw))
339 		return (false);
340 
341 	struct ifinfomsg *ifinfo;
342 	ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg);
343 
344 	ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family);
345 	/* Convert interface type */
346 	switch (ifinfo->ifi_type) {
347 	case IFT_ETHER:
348 		ifinfo->ifi_type = LINUX_ARPHRD_ETHER;
349 		break;
350 	}
351 	ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags);
352 
353 	/* Copy attributes unchanged */
354 	if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw))
355 		return (false);
356 
357 	/* make ip(8) happy */
358 	if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue"))
359 		return (false);
360 
361 	if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000))
362 		return (false);
363 
364 	nlmsg_end(nw);
365 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
366 	return (true);
367 }
368 
369 static bool
370 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
371     struct nl_writer *nw)
372 {
373 	if (!nlmsg_copy_header(hdr, nw))
374 		return (false);
375 
376 	struct ifaddrmsg *ifamsg;
377 	ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg);
378 
379 	ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family);
380 	/* XXX: fake ifa_flags? */
381 
382 	/* Copy attributes unchanged */
383 	if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw))
384 		return (false);
385 
386 	nlmsg_end(nw);
387 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
388 	return (true);
389 }
390 
391 static bool
392 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
393     struct nl_writer *nw)
394 {
395 	if (!nlmsg_copy_header(hdr, nw))
396 		return (false);
397 
398 	struct ndmsg *ndm;
399 	ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg);
400 
401 	ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family);
402 
403 	/* Copy attributes unchanged */
404 	if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw))
405 		return (false);
406 
407 	nlmsg_end(nw);
408 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
409 	return (true);
410 }
411 
412 static bool
413 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
414     struct nl_writer *nw)
415 {
416 	if (!nlmsg_copy_header(hdr, nw))
417 		return (false);
418 
419 	struct rtmsg *rtm;
420 	rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg);
421 	rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family);
422 
423 	struct nlattr *nla;
424 
425 	int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg));
426 	int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
427 	struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
428 
429 	NLA_FOREACH(nla, nla_head, attrs_len) {
430 		struct rtattr *rta = (struct rtattr *)nla;
431 		//RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len);
432 		if (rta->rta_len < sizeof(struct rtattr)) {
433 			break;
434 		}
435 
436 		switch (rta->rta_type) {
437 		case NL_RTA_TABLE:
438 			{
439 				uint32_t fibnum;
440 				fibnum = _rta_get_uint32(rta);
441 				if (fibnum == 0)
442 					fibnum = 254;
443 				RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum);
444 				if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum))
445 					return (false);
446 			}
447 			break;
448 		default:
449 			if (!nlmsg_copy_nla(nla, nw))
450 				return (false);
451 			break;
452 		}
453 	}
454 
455 	nlmsg_end(nw);
456 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
457 	return (true);
458 }
459 
460 static bool
461 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
462 {
463 	RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type);
464 
465 	switch (hdr->nlmsg_type) {
466 	case NL_RTM_NEWLINK:
467 	case NL_RTM_DELLINK:
468 	case NL_RTM_GETLINK:
469 		return (rtnl_newlink_to_linux(hdr, nlp, nw));
470 	case NL_RTM_NEWADDR:
471 	case NL_RTM_DELADDR:
472 		return (rtnl_newaddr_to_linux(hdr, nlp, nw));
473 	case NL_RTM_NEWROUTE:
474 	case NL_RTM_DELROUTE:
475 		return (rtnl_newroute_to_linux(hdr, nlp, nw));
476 	case NL_RTM_NEWNEIGH:
477 	case NL_RTM_DELNEIGH:
478 	case NL_RTM_GETNEIGH:
479 		return (rtnl_newneigh_to_linux(hdr, nlp, nw));
480 	default:
481 		RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
482 		    hdr->nlmsg_type);
483 		return (handle_default_out(hdr, nw));
484 	}
485 }
486 
487 static bool
488 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
489 {
490 	if (!nlmsg_copy_header(hdr, nw))
491 		return (false);
492 
493 	struct nlmsgerr *nlerr;
494 	nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr);
495 	nlerr->error = bsd_to_linux_errno(nlerr->error);
496 
497 	int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr);
498 	if (hdr->nlmsg_len == copied_len) {
499 		nlmsg_end(nw);
500 		return (true);
501 	}
502 
503 	/*
504 	 * CAP_ACK was not set. Original request needs to be translated.
505 	 * XXX: implement translation of the original message
506 	 */
507 	RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated",
508 	    nlerr->msg.nlmsg_type);
509 	char *dst_payload, *src_payload;
510 	int copy_len = hdr->nlmsg_len - copied_len;
511 	dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char);
512 
513 	src_payload = (char *)hdr + copied_len;
514 
515 	memcpy(dst_payload, src_payload, copy_len);
516 	nlmsg_end(nw);
517 
518 	return (true);
519 }
520 
521 static bool
522 nlmsg_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
523 {
524 	if (hdr->nlmsg_type < NLMSG_MIN_TYPE) {
525 		switch (hdr->nlmsg_type) {
526 		case NLMSG_ERROR:
527 			return (nlmsg_error_to_linux(hdr, nlp, nw));
528 		case NLMSG_NOOP:
529 		case NLMSG_DONE:
530 		case NLMSG_OVERRUN:
531 			return (handle_default_out(hdr, nw));
532 		default:
533 			RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
534 			    hdr->nlmsg_type);
535 			return (handle_default_out(hdr, nw));
536 		}
537 	}
538 
539 	switch (nlp->nl_proto) {
540 	case NETLINK_ROUTE:
541 		return (rtnl_to_linux(hdr, nlp, nw));
542 	default:
543 		return (handle_default_out(hdr, nw));
544 	}
545 }
546 
547 static bool
548 nlmsgs_to_linux(struct nl_writer *nw, struct nlpcb *nlp)
549 {
550 	struct nl_buf *nb, *orig;
551 	u_int offset, msglen, orig_messages __diagused;
552 
553 	RT_LOG(LOG_DEBUG3, "%p: in %u bytes %u messages", __func__,
554 	    nw->buf->datalen, nw->num_messages);
555 
556 	orig = nw->buf;
557 	nb = nl_buf_alloc(orig->datalen + SCRATCH_BUFFER_SIZE, M_NOWAIT);
558 	if (__predict_false(nb == NULL))
559 		return (false);
560 	nw->buf = nb;
561 #ifdef INVARIANTS
562 	orig_messages = nw->num_messages;
563 #endif
564 	nw->num_messages = 0;
565 
566 	/* Assume correct headers. Buffer IS mutable */
567 	for (offset = 0;
568 	    offset + sizeof(struct nlmsghdr) <= orig->datalen;
569 	    offset += msglen) {
570 		struct nlmsghdr *hdr = (struct nlmsghdr *)&orig->data[offset];
571 
572 		msglen = NLMSG_ALIGN(hdr->nlmsg_len);
573 		if (!nlmsg_to_linux(hdr, nlp, nw)) {
574 			RT_LOG(LOG_DEBUG, "failed to process msg type %d",
575 			    hdr->nlmsg_type);
576 			nl_buf_free(nb);
577 			return (false);
578 		}
579 	}
580 
581 	MPASS(nw->num_messages == orig_messages);
582 	MPASS(nw->buf == nb);
583 	nl_buf_free(orig);
584 	RT_LOG(LOG_DEBUG3, "%p: out %u bytes", __func__, offset);
585 
586 	return (true);
587 }
588 
589 static struct linux_netlink_provider linux_netlink_v1 = {
590 	.msgs_to_linux = nlmsgs_to_linux,
591 	.msg_from_linux = nlmsg_from_linux,
592 };
593 
594 void
595 linux_netlink_register(void)
596 {
597 	linux_netlink_p = &linux_netlink_v1;
598 }
599 
600 void
601 linux_netlink_deregister(void)
602 {
603 	linux_netlink_p = NULL;
604 }
605