xref: /freebsd/sys/compat/linux/linux_netlink.c (revision 959806e0a8448ef5df372468b8deddc20d976702)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2022 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include "opt_inet.h"
29 #include "opt_inet6.h"
30 
31 #include <sys/types.h>
32 #include <sys/ck.h>
33 #include <sys/lock.h>
34 #include <sys/socket.h>
35 #include <sys/vnode.h>
36 
37 #include <net/if.h>
38 #include <net/if_dl.h>
39 #include <net/route.h>
40 #include <net/route/nhop.h>
41 #include <net/route/route_ctl.h>
42 #include <netlink/netlink.h>
43 #include <netlink/netlink_ctl.h>
44 #include <netlink/netlink_linux.h>
45 #include <netlink/netlink_var.h>
46 #include <netlink/netlink_route.h>
47 
48 #include <compat/linux/linux.h>
49 #include <compat/linux/linux_common.h>
50 #include <compat/linux/linux_util.h>
51 
52 #define	DEBUG_MOD_NAME	nl_linux
53 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
54 #include <netlink/netlink_debug.h>
55 _DECLARE_DEBUG(LOG_INFO);
56 
57 static bool
58 valid_rta_size(const struct rtattr *rta, int sz)
59 {
60 	return (NL_RTA_DATA_LEN(rta) == sz);
61 }
62 
63 static bool
64 valid_rta_u32(const struct rtattr *rta)
65 {
66 	return (valid_rta_size(rta, sizeof(uint32_t)));
67 }
68 
69 static uint32_t
70 _rta_get_uint32(const struct rtattr *rta)
71 {
72 	return (*((const uint32_t *)NL_RTA_DATA_CONST(rta)));
73 }
74 
75 static int
76 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
77 {
78 	struct ndmsg *ndm = (struct ndmsg *)(hdr + 1);
79 	sa_family_t f;
80 
81 	if (hdr->nlmsg_len < sizeof(struct nlmsghdr) + sizeof(struct ndmsg))
82 		return (EBADMSG);
83 	if ((f = linux_to_bsd_domain(ndm->ndm_family)) == AF_UNKNOWN)
84 		return (EPFNOSUPPORT);
85 
86 	ndm->ndm_family = f;
87 
88 	return (0);
89 }
90 
91 static int
92 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
93 {
94 	struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1);
95 	sa_family_t f;
96 
97 	if (hdr->nlmsg_len < sizeof(struct nlmsghdr) +
98 	    offsetof(struct ifaddrmsg, ifa_family) + sizeof(ifam->ifa_family))
99 		return (EBADMSG);
100 	if ((f = linux_to_bsd_domain(ifam->ifa_family)) == AF_UNKNOWN)
101 		return (EPFNOSUPPORT);
102 
103 	ifam->ifa_family = f;
104 
105 	return (0);
106 }
107 
108 /*
109  * XXX: in case of error state of hdr is inconsistent.
110  */
111 static int
112 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
113 {
114 	/* Tweak address families and default fib only */
115 	struct rtmsg *rtm = (struct rtmsg *)(hdr + 1);
116 	struct nlattr *nla, *nla_head;
117 	int attrs_len;
118 	sa_family_t f;
119 
120 	if (hdr->nlmsg_len < sizeof(struct nlmsghdr) + sizeof(struct rtmsg))
121 		return (EBADMSG);
122 	if ((f = linux_to_bsd_domain(rtm->rtm_family)) == AF_UNKNOWN)
123 		return (EPFNOSUPPORT);
124 	rtm->rtm_family = f;
125 
126 	if (rtm->rtm_table == 254)
127 		rtm->rtm_table = 0;
128 
129 	attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr);
130 	attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg));
131 	nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg)));
132 
133 	NLA_FOREACH(nla, nla_head, attrs_len) {
134 		RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d",
135 		    nla->nla_type, nla->nla_len, attrs_len);
136 		struct rtattr *rta = (struct rtattr *)nla;
137 		if (rta->rta_len < sizeof(struct rtattr)) {
138 			break;
139 		}
140 		switch (rta->rta_type) {
141 		case NL_RTA_TABLE:
142 			if (!valid_rta_u32(rta))
143 				return (EBADMSG);
144 			rtm->rtm_table = 0;
145 			uint32_t fibnum = _rta_get_uint32(rta);
146 			RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum);
147 			if (fibnum == 254) {
148 				*((uint32_t *)NL_RTA_DATA(rta)) = 0;
149 			}
150 			break;
151 		}
152 	}
153 
154 	return (0);
155 }
156 
157 static int
158 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
159 {
160 
161 	switch (hdr->nlmsg_type) {
162 	case NL_RTM_GETROUTE:
163 	case NL_RTM_NEWROUTE:
164 	case NL_RTM_DELROUTE:
165 		return (rtnl_route_from_linux(hdr, npt));
166 	case NL_RTM_GETNEIGH:
167 		return (rtnl_neigh_from_linux(hdr, npt));
168 	case NL_RTM_GETADDR:
169 		return (rtnl_ifaddr_from_linux(hdr, npt));
170 	/* Silence warning for the messages where no translation is required */
171 	case NL_RTM_NEWLINK:
172 	case NL_RTM_DELLINK:
173 	case NL_RTM_GETLINK:
174 		break;
175 	default:
176 		RT_LOG(LOG_DEBUG, "Passing message type %d untranslated",
177 		    hdr->nlmsg_type);
178 		/* XXXGL: maybe return error? */
179 	}
180 
181 	return (0);
182 }
183 
184 static int
185 nlmsg_from_linux(int netlink_family, struct nlmsghdr **hdr,
186     struct nl_pstate *npt)
187 {
188 	switch (netlink_family) {
189 	case NETLINK_ROUTE:
190 		return (rtnl_from_linux(*hdr, npt));
191 	}
192 
193 	return (0);
194 }
195 
196 
197 /************************************************************
198  * Kernel -> Linux
199  ************************************************************/
200 
201 static bool
202 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw)
203 {
204 	char *out_hdr;
205 	out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char);
206 
207 	if (out_hdr != NULL) {
208 		memcpy(out_hdr, hdr, hdr->nlmsg_len);
209 		nw->num_messages++;
210 		return (true);
211 	}
212 	return (false);
213 }
214 
215 static bool
216 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw)
217 {
218 	return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type,
219 	    hdr->nlmsg_flags, 0));
220 }
221 
222 static void *
223 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz)
224 {
225 	void *next_hdr = nlmsg_reserve_data(nw, sz, void);
226 	memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz));
227 
228 	return (next_hdr);
229 }
230 #define	nlmsg_copy_next_header(_hdr, _ns, _t)	\
231 	((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t))))
232 
233 static bool
234 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw)
235 {
236 	struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr);
237 	if (nla != NULL) {
238 		memcpy(nla, nla_orig, nla_orig->nla_len);
239 		return (true);
240 	}
241 	return (false);
242 }
243 
244 /*
245  * Translate a FreeBSD attribute to a Linux attribute.
246  * Returns false when the attribute is not processed and the caller must take
247  * care of it.
248  */
249 static int
250 nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla,
251     struct nl_writer *nw)
252 {
253 
254 	switch (hdr->nlmsg_type) {
255 	case NL_RTM_NEWLINK:
256 	case NL_RTM_DELLINK:
257 	case NL_RTM_GETLINK:
258 		switch (nla->nla_type) {
259 		case IFLA_IFNAME: {
260 			char ifname[LINUX_IFNAMSIZ];
261 
262 			if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname,
263 			    sizeof(ifname)) > 0)
264 				return (true);
265 			break;
266 		}
267 		default:
268 			break;
269 		}
270 	default:
271 		break;
272 	}
273 	return (false);
274 }
275 
276 static bool
277 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw)
278 {
279 	struct nlattr *nla;
280 
281 	int hdrlen = NETLINK_ALIGN(raw_hdrlen);
282 	int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
283 	struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
284 
285 	NLA_FOREACH(nla, nla_head, attrs_len) {
286 		RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len);
287 		if (nla->nla_len < sizeof(struct nlattr)) {
288 			return (false);
289 		}
290 		if (!nlmsg_translate_all_nla(hdr, nla, nw) &&
291 		    !nlmsg_copy_nla(nla, nw))
292 			return (false);
293 	}
294 	return (true);
295 }
296 
297 static unsigned int
298 rtnl_if_flags_to_linux(unsigned int if_flags)
299 {
300 	unsigned int result = 0;
301 
302 	for (int i = 0; i < 31; i++) {
303 		unsigned int flag = 1 << i;
304 		if (!(flag & if_flags))
305 			continue;
306 		switch (flag) {
307 		case IFF_UP:
308 		case IFF_BROADCAST:
309 		case IFF_DEBUG:
310 		case IFF_LOOPBACK:
311 		case IFF_POINTOPOINT:
312 		case IFF_DRV_RUNNING:
313 		case IFF_NOARP:
314 		case IFF_PROMISC:
315 		case IFF_ALLMULTI:
316 			result |= flag;
317 			break;
318 		case IFF_NEEDSEPOCH:
319 		case IFF_DRV_OACTIVE:
320 		case IFF_SIMPLEX:
321 		case IFF_LINK0:
322 		case IFF_LINK1:
323 		case IFF_LINK2:
324 		case IFF_CANTCONFIG:
325 		case IFF_PPROMISC:
326 		case IFF_MONITOR:
327 		case IFF_STATICARP:
328 		case IFF_STICKYARP:
329 		case IFF_DYING:
330 		case IFF_RENAMING:
331 			/* No Linux analogue */
332 			break;
333 		case IFF_MULTICAST:
334 			result |= 1 << 12;
335 		}
336 	}
337 	return (result);
338 }
339 
340 static bool
341 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
342     struct nl_writer *nw)
343 {
344 	if (!nlmsg_copy_header(hdr, nw))
345 		return (false);
346 
347 	struct ifinfomsg *ifinfo;
348 	ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg);
349 
350 	ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family);
351 	/* Convert interface type */
352 	switch (ifinfo->ifi_type) {
353 	case IFT_ETHER:
354 		ifinfo->ifi_type = LINUX_ARPHRD_ETHER;
355 		break;
356 	}
357 	ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags);
358 
359 	/* Copy attributes unchanged */
360 	if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw))
361 		return (false);
362 
363 	/* make ip(8) happy */
364 	if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue"))
365 		return (false);
366 
367 	if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000))
368 		return (false);
369 
370 	nlmsg_end(nw);
371 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
372 	return (true);
373 }
374 
375 static bool
376 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
377     struct nl_writer *nw)
378 {
379 	if (!nlmsg_copy_header(hdr, nw))
380 		return (false);
381 
382 	struct ifaddrmsg *ifamsg;
383 	ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg);
384 
385 	ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family);
386 	/* XXX: fake ifa_flags? */
387 
388 	/* Copy attributes unchanged */
389 	if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw))
390 		return (false);
391 
392 	nlmsg_end(nw);
393 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
394 	return (true);
395 }
396 
397 static bool
398 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
399     struct nl_writer *nw)
400 {
401 	if (!nlmsg_copy_header(hdr, nw))
402 		return (false);
403 
404 	struct ndmsg *ndm;
405 	ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg);
406 
407 	ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family);
408 
409 	/* Copy attributes unchanged */
410 	if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw))
411 		return (false);
412 
413 	nlmsg_end(nw);
414 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
415 	return (true);
416 }
417 
418 static bool
419 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
420     struct nl_writer *nw)
421 {
422 	if (!nlmsg_copy_header(hdr, nw))
423 		return (false);
424 
425 	struct rtmsg *rtm;
426 	rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg);
427 	rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family);
428 
429 	struct nlattr *nla;
430 
431 	int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg));
432 	int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
433 	struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
434 
435 	NLA_FOREACH(nla, nla_head, attrs_len) {
436 		struct rtattr *rta = (struct rtattr *)nla;
437 		//RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len);
438 		if (rta->rta_len < sizeof(struct rtattr)) {
439 			break;
440 		}
441 
442 		switch (rta->rta_type) {
443 		case NL_RTA_TABLE:
444 			{
445 				uint32_t fibnum;
446 				fibnum = _rta_get_uint32(rta);
447 				if (fibnum == 0)
448 					fibnum = 254;
449 				RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum);
450 				if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum))
451 					return (false);
452 			}
453 			break;
454 		default:
455 			if (!nlmsg_copy_nla(nla, nw))
456 				return (false);
457 			break;
458 		}
459 	}
460 
461 	nlmsg_end(nw);
462 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
463 	return (true);
464 }
465 
466 static bool
467 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
468 {
469 	RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type);
470 
471 	switch (hdr->nlmsg_type) {
472 	case NL_RTM_NEWLINK:
473 	case NL_RTM_DELLINK:
474 	case NL_RTM_GETLINK:
475 		return (rtnl_newlink_to_linux(hdr, nlp, nw));
476 	case NL_RTM_NEWADDR:
477 	case NL_RTM_DELADDR:
478 		return (rtnl_newaddr_to_linux(hdr, nlp, nw));
479 	case NL_RTM_NEWROUTE:
480 	case NL_RTM_DELROUTE:
481 		return (rtnl_newroute_to_linux(hdr, nlp, nw));
482 	case NL_RTM_NEWNEIGH:
483 	case NL_RTM_DELNEIGH:
484 	case NL_RTM_GETNEIGH:
485 		return (rtnl_newneigh_to_linux(hdr, nlp, nw));
486 	default:
487 		RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
488 		    hdr->nlmsg_type);
489 		return (handle_default_out(hdr, nw));
490 	}
491 }
492 
493 static bool
494 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
495 {
496 	if (!nlmsg_copy_header(hdr, nw))
497 		return (false);
498 
499 	struct nlmsgerr *nlerr;
500 	nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr);
501 	nlerr->error = bsd_to_linux_errno(nlerr->error);
502 
503 	int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr);
504 	if (hdr->nlmsg_len == copied_len) {
505 		nlmsg_end(nw);
506 		return (true);
507 	}
508 
509 	/*
510 	 * CAP_ACK was not set. Original request needs to be translated.
511 	 * XXX: implement translation of the original message
512 	 */
513 	RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated",
514 	    nlerr->msg.nlmsg_type);
515 	char *dst_payload, *src_payload;
516 	int copy_len = hdr->nlmsg_len - copied_len;
517 	dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char);
518 
519 	src_payload = (char *)hdr + copied_len;
520 
521 	memcpy(dst_payload, src_payload, copy_len);
522 	nlmsg_end(nw);
523 
524 	return (true);
525 }
526 
527 static bool
528 nlmsg_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
529 {
530 	if (hdr->nlmsg_type < NLMSG_MIN_TYPE) {
531 		switch (hdr->nlmsg_type) {
532 		case NLMSG_ERROR:
533 			return (nlmsg_error_to_linux(hdr, nlp, nw));
534 		case NLMSG_NOOP:
535 		case NLMSG_DONE:
536 		case NLMSG_OVERRUN:
537 			return (handle_default_out(hdr, nw));
538 		default:
539 			RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
540 			    hdr->nlmsg_type);
541 			return (handle_default_out(hdr, nw));
542 		}
543 	}
544 
545 	switch (nlp->nl_proto) {
546 	case NETLINK_ROUTE:
547 		return (rtnl_to_linux(hdr, nlp, nw));
548 	default:
549 		return (handle_default_out(hdr, nw));
550 	}
551 }
552 
553 static struct nl_buf *
554 nlmsgs_to_linux(struct nl_buf *orig, struct nlpcb *nlp)
555 {
556 	struct nl_writer nw;
557 	u_int offset, msglen;
558 
559 	if (__predict_false(!nl_writer_unicast(&nw,
560 	    orig->datalen + SCRATCH_BUFFER_SIZE, nlp, false)))
561 		return (NULL);
562 
563 	/* Assume correct headers. Buffer IS mutable */
564 	for (offset = 0;
565 	    offset + sizeof(struct nlmsghdr) <= orig->datalen;
566 	    offset += msglen) {
567 		struct nlmsghdr *hdr = (struct nlmsghdr *)&orig->data[offset];
568 
569 		msglen = NLMSG_ALIGN(hdr->nlmsg_len);
570 		if (!nlmsg_to_linux(hdr, nlp, &nw)) {
571 			RT_LOG(LOG_DEBUG, "failed to process msg type %d",
572 			    hdr->nlmsg_type);
573 			nl_buf_free(nw.buf);
574 			return (NULL);
575 		}
576 	}
577 
578 	RT_LOG(LOG_DEBUG3, "%p: in %u bytes %u messages", __func__,
579 	    nw.buf->datalen, nw.num_messages);
580 
581 	return (nw.buf);
582 }
583 
584 static struct linux_netlink_provider linux_netlink_v1 = {
585 	.msgs_to_linux = nlmsgs_to_linux,
586 	.msg_from_linux = nlmsg_from_linux,
587 };
588 
589 void
590 linux_netlink_register(void)
591 {
592 	linux_netlink_p = &linux_netlink_v1;
593 }
594 
595 void
596 linux_netlink_deregister(void)
597 {
598 	linux_netlink_p = NULL;
599 }
600