xref: /freebsd/sys/compat/linux/linux.c (revision f53355131f65d64e7643d734dbcd4fb2a5de20ed)
1 /*-
2  * Copyright (c) 2015 Dmitry Chagin <dchagin@FreeBSD.org>
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 #include "opt_inet6.h"
27 
28 #include <sys/param.h>
29 #include <sys/conf.h>
30 #include <sys/ctype.h>
31 #include <sys/file.h>
32 #include <sys/filedesc.h>
33 #include <sys/jail.h>
34 #include <sys/lock.h>
35 #include <sys/malloc.h>
36 #include <sys/poll.h>
37 #include <sys/proc.h>
38 #include <sys/signalvar.h>
39 #include <sys/socket.h>
40 #include <sys/socketvar.h>
41 
42 #include <net/if.h>
43 #include <net/if_var.h>
44 #include <net/if_dl.h>
45 #include <net/if_types.h>
46 #include <netlink/netlink.h>
47 
48 #include <sys/un.h>
49 #include <netinet/in.h>
50 
51 #include <compat/linux/linux.h>
52 #include <compat/linux/linux_common.h>
53 #include <compat/linux/linux_mib.h>
54 #include <compat/linux/linux_util.h>
55 
56 _Static_assert(LINUX_IFNAMSIZ == IFNAMSIZ, "Linux IFNAMSIZ");
57 _Static_assert(sizeof(struct sockaddr) == sizeof(struct l_sockaddr),
58     "Linux struct sockaddr size");
59 _Static_assert(offsetof(struct sockaddr, sa_data) ==
60     offsetof(struct l_sockaddr, sa_data), "Linux struct sockaddr layout");
61 
62 static bool use_real_ifnames = false;
63 SYSCTL_BOOL(_compat_linux, OID_AUTO, use_real_ifnames, CTLFLAG_RWTUN,
64     &use_real_ifnames, 0,
65     "Use FreeBSD interface names instead of generating ethN aliases");
66 
67 static int bsd_to_linux_sigtbl[LINUX_SIGTBLSZ] = {
68 	LINUX_SIGHUP,	/* SIGHUP */
69 	LINUX_SIGINT,	/* SIGINT */
70 	LINUX_SIGQUIT,	/* SIGQUIT */
71 	LINUX_SIGILL,	/* SIGILL */
72 	LINUX_SIGTRAP,	/* SIGTRAP */
73 	LINUX_SIGABRT,	/* SIGABRT */
74 	0,		/* SIGEMT */
75 	LINUX_SIGFPE,	/* SIGFPE */
76 	LINUX_SIGKILL,	/* SIGKILL */
77 	LINUX_SIGBUS,	/* SIGBUS */
78 	LINUX_SIGSEGV,	/* SIGSEGV */
79 	LINUX_SIGSYS,	/* SIGSYS */
80 	LINUX_SIGPIPE,	/* SIGPIPE */
81 	LINUX_SIGALRM,	/* SIGALRM */
82 	LINUX_SIGTERM,	/* SIGTERM */
83 	LINUX_SIGURG,	/* SIGURG */
84 	LINUX_SIGSTOP,	/* SIGSTOP */
85 	LINUX_SIGTSTP,	/* SIGTSTP */
86 	LINUX_SIGCONT,	/* SIGCONT */
87 	LINUX_SIGCHLD,	/* SIGCHLD */
88 	LINUX_SIGTTIN,	/* SIGTTIN */
89 	LINUX_SIGTTOU,	/* SIGTTOU */
90 	LINUX_SIGIO,	/* SIGIO */
91 	LINUX_SIGXCPU,	/* SIGXCPU */
92 	LINUX_SIGXFSZ,	/* SIGXFSZ */
93 	LINUX_SIGVTALRM,/* SIGVTALRM */
94 	LINUX_SIGPROF,	/* SIGPROF */
95 	LINUX_SIGWINCH,	/* SIGWINCH */
96 	0,		/* SIGINFO */
97 	LINUX_SIGUSR1,	/* SIGUSR1 */
98 	LINUX_SIGUSR2	/* SIGUSR2 */
99 };
100 
101 #define	LINUX_SIGPWREMU	(SIGRTMIN + (LINUX_SIGRTMAX - LINUX_SIGRTMIN) + 1)
102 
103 static int linux_to_bsd_sigtbl[LINUX_SIGTBLSZ] = {
104 	SIGHUP,		/* LINUX_SIGHUP */
105 	SIGINT,		/* LINUX_SIGINT */
106 	SIGQUIT,	/* LINUX_SIGQUIT */
107 	SIGILL,		/* LINUX_SIGILL */
108 	SIGTRAP,	/* LINUX_SIGTRAP */
109 	SIGABRT,	/* LINUX_SIGABRT */
110 	SIGBUS,		/* LINUX_SIGBUS */
111 	SIGFPE,		/* LINUX_SIGFPE */
112 	SIGKILL,	/* LINUX_SIGKILL */
113 	SIGUSR1,	/* LINUX_SIGUSR1 */
114 	SIGSEGV,	/* LINUX_SIGSEGV */
115 	SIGUSR2,	/* LINUX_SIGUSR2 */
116 	SIGPIPE,	/* LINUX_SIGPIPE */
117 	SIGALRM,	/* LINUX_SIGALRM */
118 	SIGTERM,	/* LINUX_SIGTERM */
119 	SIGBUS,		/* LINUX_SIGSTKFLT */
120 	SIGCHLD,	/* LINUX_SIGCHLD */
121 	SIGCONT,	/* LINUX_SIGCONT */
122 	SIGSTOP,	/* LINUX_SIGSTOP */
123 	SIGTSTP,	/* LINUX_SIGTSTP */
124 	SIGTTIN,	/* LINUX_SIGTTIN */
125 	SIGTTOU,	/* LINUX_SIGTTOU */
126 	SIGURG,		/* LINUX_SIGURG */
127 	SIGXCPU,	/* LINUX_SIGXCPU */
128 	SIGXFSZ,	/* LINUX_SIGXFSZ */
129 	SIGVTALRM,	/* LINUX_SIGVTALARM */
130 	SIGPROF,	/* LINUX_SIGPROF */
131 	SIGWINCH,	/* LINUX_SIGWINCH */
132 	SIGIO,		/* LINUX_SIGIO */
133 	/*
134 	 * FreeBSD does not have SIGPWR signal, map Linux SIGPWR signal
135 	 * to the first unused FreeBSD signal number. Since Linux supports
136 	 * signals from 1 to 64 we are ok here as our SIGRTMIN = 65.
137 	 */
138 	LINUX_SIGPWREMU,/* LINUX_SIGPWR */
139 	SIGSYS		/* LINUX_SIGSYS */
140 };
141 
142 static struct cdev *dev_shm_cdev;
143 static struct cdevsw dev_shm_cdevsw = {
144      .d_version = D_VERSION,
145      .d_name    = "dev_shm",
146 };
147 
148 /*
149  * Map Linux RT signals to the FreeBSD RT signals.
150  */
151 static inline int
152 linux_to_bsd_rt_signal(int sig)
153 {
154 
155 	return (SIGRTMIN + sig - LINUX_SIGRTMIN);
156 }
157 
158 static inline int
159 bsd_to_linux_rt_signal(int sig)
160 {
161 
162 	return (sig - SIGRTMIN + LINUX_SIGRTMIN);
163 }
164 
165 int
166 linux_to_bsd_signal(int sig)
167 {
168 
169 	KASSERT(sig > 0 && sig <= LINUX_SIGRTMAX, ("invalid Linux signal %d\n", sig));
170 
171 	if (sig < LINUX_SIGRTMIN)
172 		return (linux_to_bsd_sigtbl[_SIG_IDX(sig)]);
173 
174 	return (linux_to_bsd_rt_signal(sig));
175 }
176 
177 int
178 bsd_to_linux_signal(int sig)
179 {
180 
181 	if (sig <= LINUX_SIGTBLSZ)
182 		return (bsd_to_linux_sigtbl[_SIG_IDX(sig)]);
183 	if (sig == LINUX_SIGPWREMU)
184 		return (LINUX_SIGPWR);
185 
186 	return (bsd_to_linux_rt_signal(sig));
187 }
188 
189 int
190 linux_to_bsd_sigaltstack(int lsa)
191 {
192 	int bsa = 0;
193 
194 	if (lsa & LINUX_SS_DISABLE)
195 		bsa |= SS_DISABLE;
196 	/*
197 	 * Linux ignores SS_ONSTACK flag for ss
198 	 * parameter while FreeBSD prohibits it.
199 	 */
200 	return (bsa);
201 }
202 
203 int
204 bsd_to_linux_sigaltstack(int bsa)
205 {
206 	int lsa = 0;
207 
208 	if (bsa & SS_DISABLE)
209 		lsa |= LINUX_SS_DISABLE;
210 	if (bsa & SS_ONSTACK)
211 		lsa |= LINUX_SS_ONSTACK;
212 	return (lsa);
213 }
214 
215 void
216 linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss)
217 {
218 	int b, l;
219 
220 	SIGEMPTYSET(*bss);
221 	for (l = 1; l <= LINUX_SIGRTMAX; l++) {
222 		if (LINUX_SIGISMEMBER(*lss, l)) {
223 			b = linux_to_bsd_signal(l);
224 			if (b)
225 				SIGADDSET(*bss, b);
226 		}
227 	}
228 }
229 
230 void
231 bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss)
232 {
233 	int b, l;
234 
235 	LINUX_SIGEMPTYSET(*lss);
236 	for (b = 1; b <= SIGRTMAX; b++) {
237 		if (SIGISMEMBER(*bss, b)) {
238 			l = bsd_to_linux_signal(b);
239 			if (l)
240 				LINUX_SIGADDSET(*lss, l);
241 		}
242 	}
243 }
244 
245 /*
246  * Translate a FreeBSD interface name to a Linux interface name
247  * by interface name, and return the number of bytes copied to lxname.
248  */
249 int
250 ifname_bsd_to_linux_name(const char *bsdname, char *lxname, size_t len)
251 {
252 	struct epoch_tracker et;
253 	struct ifnet *ifp;
254 	int ret;
255 
256 	ret = 0;
257 	CURVNET_SET(TD_TO_VNET(curthread));
258 	NET_EPOCH_ENTER(et);
259 	ifp = ifunit(bsdname);
260 	if (ifp != NULL)
261 		ret = ifname_bsd_to_linux_ifp(ifp, lxname, len);
262 	NET_EPOCH_EXIT(et);
263 	CURVNET_RESTORE();
264 	return (ret);
265 }
266 
267 /*
268  * Translate a FreeBSD interface name to a Linux interface name
269  * by interface index, and return the number of bytes copied to lxname.
270  */
271 int
272 ifname_bsd_to_linux_idx(u_int idx, char *lxname, size_t len)
273 {
274 	struct epoch_tracker et;
275 	struct ifnet *ifp;
276 	int ret;
277 
278 	ret = 0;
279 	CURVNET_SET(TD_TO_VNET(curthread));
280 	NET_EPOCH_ENTER(et);
281 	ifp = ifnet_byindex(idx);
282 	if (ifp != NULL)
283 		ret = ifname_bsd_to_linux_ifp(ifp, lxname, len);
284 	NET_EPOCH_EXIT(et);
285 	CURVNET_RESTORE();
286 	return (ret);
287 }
288 
289 /*
290  * Translate a FreeBSD interface name to a Linux interface name,
291  * and return the number of bytes copied to lxname, 0 if interface
292  * not found, -1 on error.
293  */
294 struct ifname_bsd_to_linux_ifp_cb_s {
295 	struct ifnet	*ifp;
296 	int		ethno;
297 	char		*lxname;
298 	size_t		len;
299 };
300 
301 static int
302 ifname_bsd_to_linux_ifp_cb(if_t ifp, void *arg)
303 {
304 	struct ifname_bsd_to_linux_ifp_cb_s *cbs = arg;
305 
306 	if (ifp == cbs->ifp)
307 		return (snprintf(cbs->lxname, cbs->len, "eth%d", cbs->ethno));
308 	if (IFP_IS_ETH(ifp))
309 		cbs->ethno++;
310 	return (0);
311 }
312 
313 int
314 ifname_bsd_to_linux_ifp(struct ifnet *ifp, char *lxname, size_t len)
315 {
316 	struct ifname_bsd_to_linux_ifp_cb_s arg = {
317 		.ifp = ifp,
318 		.ethno = 0,
319 		.lxname = lxname,
320 		.len = len,
321 	};
322 
323 	NET_EPOCH_ASSERT();
324 
325 	/*
326 	 * Linux loopback interface name is lo (not lo0),
327 	 * we translate lo to lo0, loX to loX.
328 	 */
329 	if (IFP_IS_LOOP(ifp) && strncmp(if_name(ifp), "lo0", IFNAMSIZ) == 0)
330 		return (strlcpy(lxname, "lo", len));
331 
332 	/* Short-circuit non ethernet interfaces. */
333 	if (!IFP_IS_ETH(ifp) || linux_use_real_ifname(ifp))
334 		return (strlcpy(lxname, if_name(ifp), len));
335 
336  	/* Determine the (relative) unit number for ethernet interfaces. */
337 	return (if_foreach(ifname_bsd_to_linux_ifp_cb, &arg));
338 }
339 
340 /*
341  * Translate a Linux interface name to a FreeBSD interface name,
342  * and return the associated ifnet structure
343  * bsdname and lxname need to be least IFNAMSIZ bytes long, but
344  * can point to the same buffer.
345  */
346 struct ifname_linux_to_ifp_cb_s {
347 	bool		is_lo;
348 	bool		is_eth;
349 	int		ethno;
350 	int		unit;
351 	const char	*lxname;
352 	if_t		ifp;
353 };
354 
355 static int
356 ifname_linux_to_ifp_cb(if_t ifp, void *arg)
357 {
358 	struct ifname_linux_to_ifp_cb_s *cbs = arg;
359 
360 	NET_EPOCH_ASSERT();
361 
362 	/*
363 	 * Allow Linux programs to use FreeBSD names. Don't presume
364 	 * we never have an interface named "eth", so don't make
365 	 * the test optional based on is_eth.
366 	 */
367 	if (strncmp(if_name(ifp), cbs->lxname, LINUX_IFNAMSIZ) == 0)
368 		goto out;
369 	if (cbs->is_eth && IFP_IS_ETH(ifp) && cbs->unit == cbs->ethno)
370 		goto out;
371 	if (cbs->is_lo && IFP_IS_LOOP(ifp))
372 		goto out;
373 	if (IFP_IS_ETH(ifp))
374 		cbs->ethno++;
375 	return (0);
376 
377 out:
378 	cbs->ifp = ifp;
379 	return (1);
380 }
381 
382 struct ifnet *
383 ifname_linux_to_ifp(struct thread *td, const char *lxname)
384 {
385 	struct ifname_linux_to_ifp_cb_s arg = {
386 		.ethno = 0,
387 		.lxname = lxname,
388 		.ifp = NULL,
389 	};
390 	int len;
391 	char *ep;
392 
393 	NET_EPOCH_ASSERT();
394 
395 	for (len = 0; len < LINUX_IFNAMSIZ; ++len)
396 		if (!isalpha(lxname[len]) || lxname[len] == '\0')
397 			break;
398 	if (len == 0 || len == LINUX_IFNAMSIZ)
399 		return (NULL);
400 	/*
401 	 * Linux loopback interface name is lo (not lo0),
402 	 * we translate lo to lo0, loX to loX.
403 	 */
404 	arg.is_lo = (len == 2 && strncmp(lxname, "lo", LINUX_IFNAMSIZ) == 0);
405 	arg.unit = (int)strtoul(lxname + len, &ep, 10);
406 	if ((ep == NULL || ep == lxname + len || ep >= lxname + LINUX_IFNAMSIZ) &&
407 	    arg.is_lo == 0)
408 		return (NULL);
409 	arg.is_eth = (len == 3 && strncmp(lxname, "eth", len) == 0);
410 
411 	if_foreach(ifname_linux_to_ifp_cb, &arg);
412 	return (arg.ifp);
413 }
414 
415 int
416 ifname_linux_to_bsd(struct thread *td, const char *lxname, char *bsdname)
417 {
418 	struct epoch_tracker et;
419 	struct ifnet *ifp;
420 
421 	CURVNET_SET(TD_TO_VNET(td));
422 	NET_EPOCH_ENTER(et);
423 	ifp = ifname_linux_to_ifp(td, lxname);
424 	if (ifp != NULL && bsdname != NULL)
425 		strlcpy(bsdname, if_name(ifp), IFNAMSIZ);
426 	NET_EPOCH_EXIT(et);
427 	CURVNET_RESTORE();
428 	return (ifp != NULL ? 0 : EINVAL);
429 }
430 
431 unsigned short
432 linux_ifflags(struct ifnet *ifp)
433 {
434 	unsigned short flags;
435 
436 	NET_EPOCH_ASSERT();
437 
438 	flags = if_getflags(ifp) | if_getdrvflags(ifp);
439 	return (bsd_to_linux_ifflags(flags));
440 }
441 
442 unsigned short
443 bsd_to_linux_ifflags(int fl)
444 {
445 	unsigned short flags = 0;
446 
447 	if (fl & IFF_UP)
448 		flags |= LINUX_IFF_UP;
449 	if (fl & IFF_BROADCAST)
450 		flags |= LINUX_IFF_BROADCAST;
451 	if (fl & IFF_DEBUG)
452 		flags |= LINUX_IFF_DEBUG;
453 	if (fl & IFF_LOOPBACK)
454 		flags |= LINUX_IFF_LOOPBACK;
455 	if (fl & IFF_POINTOPOINT)
456 		flags |= LINUX_IFF_POINTOPOINT;
457 	if (fl & IFF_DRV_RUNNING)
458 		flags |= LINUX_IFF_RUNNING;
459 	if (fl & IFF_NOARP)
460 		flags |= LINUX_IFF_NOARP;
461 	if (fl & IFF_PROMISC)
462 		flags |= LINUX_IFF_PROMISC;
463 	if (fl & IFF_ALLMULTI)
464 		flags |= LINUX_IFF_ALLMULTI;
465 	if (fl & IFF_MULTICAST)
466 		flags |= LINUX_IFF_MULTICAST;
467 	return (flags);
468 }
469 
470 static u_int
471 linux_ifhwaddr_cb(void *arg, struct ifaddr *ifa, u_int count)
472 {
473 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)ifa->ifa_addr;
474 	struct l_sockaddr *lsa = arg;
475 
476 	if (count > 0)
477 		return (0);
478 	if (sdl->sdl_type != IFT_ETHER)
479 		return (0);
480 	bzero(lsa, sizeof(*lsa));
481 	lsa->sa_family = LINUX_ARPHRD_ETHER;
482 	bcopy(LLADDR(sdl), lsa->sa_data, LINUX_IFHWADDRLEN);
483 	return (1);
484 }
485 
486 int
487 linux_ifhwaddr(struct ifnet *ifp, struct l_sockaddr *lsa)
488 {
489 
490 	NET_EPOCH_ASSERT();
491 
492 	if (IFP_IS_LOOP(ifp)) {
493 		bzero(lsa, sizeof(*lsa));
494 		lsa->sa_family = LINUX_ARPHRD_LOOPBACK;
495 		return (0);
496 	}
497 	if (!IFP_IS_ETH(ifp))
498 		return (ENOENT);
499 	if (if_foreach_addr_type(ifp, AF_LINK, linux_ifhwaddr_cb, lsa) > 0)
500 		return (0);
501 	return (ENOENT);
502 }
503 
504 int
505 linux_to_bsd_domain(int domain)
506 {
507 
508 	switch (domain) {
509 	case LINUX_AF_UNSPEC:
510 		return (AF_UNSPEC);
511 	case LINUX_AF_UNIX:
512 		return (AF_LOCAL);
513 	case LINUX_AF_INET:
514 		return (AF_INET);
515 	case LINUX_AF_INET6:
516 		return (AF_INET6);
517 	case LINUX_AF_AX25:
518 		return (AF_CCITT);
519 	case LINUX_AF_IPX:
520 		return (AF_IPX);
521 	case LINUX_AF_APPLETALK:
522 		return (AF_APPLETALK);
523 	case LINUX_AF_NETLINK:
524 		return (AF_NETLINK);
525 	}
526 	return (-1);
527 }
528 
529 int
530 bsd_to_linux_domain(int domain)
531 {
532 
533 	switch (domain) {
534 	case AF_UNSPEC:
535 		return (LINUX_AF_UNSPEC);
536 	case AF_LOCAL:
537 		return (LINUX_AF_UNIX);
538 	case AF_INET:
539 		return (LINUX_AF_INET);
540 	case AF_INET6:
541 		return (LINUX_AF_INET6);
542 	case AF_CCITT:
543 		return (LINUX_AF_AX25);
544 	case AF_IPX:
545 		return (LINUX_AF_IPX);
546 	case AF_APPLETALK:
547 		return (LINUX_AF_APPLETALK);
548 	case AF_NETLINK:
549 		return (LINUX_AF_NETLINK);
550 	}
551 	return (-1);
552 }
553 
554 /*
555  * Based on the fact that:
556  * 1. Native and Linux storage of struct sockaddr
557  * and struct sockaddr_in6 are equal.
558  * 2. On Linux sa_family is the first member of all struct sockaddr.
559  */
560 int
561 bsd_to_linux_sockaddr(const struct sockaddr *sa, struct l_sockaddr **lsa,
562     socklen_t len)
563 {
564 	struct l_sockaddr *kosa;
565 	int bdom;
566 
567 	*lsa = NULL;
568 	if (len < 2 || len > UCHAR_MAX)
569 		return (EINVAL);
570 	bdom = bsd_to_linux_domain(sa->sa_family);
571 	if (bdom == -1)
572 		return (EAFNOSUPPORT);
573 
574 	kosa = malloc(len, M_LINUX, M_WAITOK);
575 	bcopy(sa, kosa, len);
576 	kosa->sa_family = bdom;
577 	*lsa = kosa;
578 	return (0);
579 }
580 
581 int
582 linux_to_bsd_sockaddr(const struct l_sockaddr *osa, struct sockaddr **sap,
583     socklen_t *len)
584 {
585 	struct sockaddr *sa;
586 	struct l_sockaddr *kosa;
587 #ifdef INET6
588 	struct sockaddr_in6 *sin6;
589 	bool  oldv6size;
590 #endif
591 	char *name;
592 	int salen, bdom, error, hdrlen, namelen;
593 
594 	if (*len < 2 || *len > UCHAR_MAX)
595 		return (EINVAL);
596 
597 	salen = *len;
598 
599 #ifdef INET6
600 	oldv6size = false;
601 	/*
602 	 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
603 	 * if it's a v4-mapped address, so reserve the proper space
604 	 * for it.
605 	 */
606 	if (salen == sizeof(struct sockaddr_in6) - sizeof(uint32_t)) {
607 		salen += sizeof(uint32_t);
608 		oldv6size = true;
609 	}
610 #endif
611 
612 	kosa = malloc(salen, M_SONAME, M_WAITOK);
613 
614 	if ((error = copyin(osa, kosa, *len)))
615 		goto out;
616 
617 	bdom = linux_to_bsd_domain(kosa->sa_family);
618 	if (bdom == -1) {
619 		error = EAFNOSUPPORT;
620 		goto out;
621 	}
622 
623 #ifdef INET6
624 	/*
625 	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
626 	 * which lacks the scope id compared with RFC2553 one. If we detect
627 	 * the situation, reject the address and write a message to system log.
628 	 *
629 	 * Still accept addresses for which the scope id is not used.
630 	 */
631 	if (oldv6size) {
632 		if (bdom == AF_INET6) {
633 			sin6 = (struct sockaddr_in6 *)kosa;
634 			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
635 			    (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
636 			     !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
637 			     !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
638 			     !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
639 			     !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
640 				sin6->sin6_scope_id = 0;
641 			} else {
642 				linux_msg(curthread,
643 				    "obsolete pre-RFC2553 sockaddr_in6 rejected");
644 				error = EINVAL;
645 				goto out;
646 			}
647 		} else
648 			salen -= sizeof(uint32_t);
649 	}
650 #endif
651 	if (bdom == AF_INET) {
652 		if (salen < sizeof(struct sockaddr_in)) {
653 			error = EINVAL;
654 			goto out;
655 		}
656 		salen = sizeof(struct sockaddr_in);
657 	}
658 
659 	if (bdom == AF_LOCAL && salen > sizeof(struct sockaddr_un)) {
660 		hdrlen = offsetof(struct sockaddr_un, sun_path);
661 		name = ((struct sockaddr_un *)kosa)->sun_path;
662 		if (*name == '\0') {
663 			/*
664 			 * Linux abstract namespace starts with a NULL byte.
665 			 * XXX We do not support abstract namespace yet.
666 			 */
667 			namelen = strnlen(name + 1, salen - hdrlen - 1) + 1;
668 		} else
669 			namelen = strnlen(name, salen - hdrlen);
670 		salen = hdrlen + namelen;
671 		if (salen > sizeof(struct sockaddr_un)) {
672 			error = ENAMETOOLONG;
673 			goto out;
674 		}
675 	}
676 
677 	if (bdom == AF_NETLINK) {
678 		if (salen < sizeof(struct sockaddr_nl)) {
679 			error = EINVAL;
680 			goto out;
681 		}
682 		salen = sizeof(struct sockaddr_nl);
683 	}
684 
685 	sa = (struct sockaddr *)kosa;
686 	sa->sa_family = bdom;
687 	sa->sa_len = salen;
688 
689 	*sap = sa;
690 	*len = salen;
691 	return (0);
692 
693 out:
694 	free(kosa, M_SONAME);
695 	return (error);
696 }
697 
698 void
699 linux_dev_shm_create(void)
700 {
701 	int error;
702 
703 	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev_shm_cdev,
704 	    &dev_shm_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0, "shm/.mountpoint");
705 	if (error != 0) {
706 		printf("%s: failed to create device node, error %d\n",
707 		    __func__, error);
708 	}
709 }
710 
711 void
712 linux_dev_shm_destroy(void)
713 {
714 
715 	destroy_dev(dev_shm_cdev);
716 }
717 
718 int
719 bsd_to_linux_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
720     size_t mapcnt, int no_value)
721 {
722 	int bsd_mask, bsd_value, linux_mask, linux_value;
723 	int linux_ret;
724 	size_t i;
725 	bool applied;
726 
727 	applied = false;
728 	linux_ret = 0;
729 	for (i = 0; i < mapcnt; ++i) {
730 		bsd_mask = bitmap[i].bsd_mask;
731 		bsd_value = bitmap[i].bsd_value;
732 		if (bsd_mask == 0)
733 			bsd_mask = bsd_value;
734 
735 		linux_mask = bitmap[i].linux_mask;
736 		linux_value = bitmap[i].linux_value;
737 		if (linux_mask == 0)
738 			linux_mask = linux_value;
739 
740 		/*
741 		 * If a mask larger than just the value is set, we explicitly
742 		 * want to make sure that only this bit we mapped within that
743 		 * mask is set.
744 		 */
745 		if ((value & bsd_mask) == bsd_value) {
746 			linux_ret = (linux_ret & ~linux_mask) | linux_value;
747 			applied = true;
748 		}
749 	}
750 
751 	if (!applied)
752 		return (no_value);
753 	return (linux_ret);
754 }
755 
756 int
757 linux_to_bsd_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
758     size_t mapcnt, int no_value)
759 {
760 	int bsd_mask, bsd_value, linux_mask, linux_value;
761 	int bsd_ret;
762 	size_t i;
763 	bool applied;
764 
765 	applied = false;
766 	bsd_ret = 0;
767 	for (i = 0; i < mapcnt; ++i) {
768 		bsd_mask = bitmap[i].bsd_mask;
769 		bsd_value = bitmap[i].bsd_value;
770 		if (bsd_mask == 0)
771 			bsd_mask = bsd_value;
772 
773 		linux_mask = bitmap[i].linux_mask;
774 		linux_value = bitmap[i].linux_value;
775 		if (linux_mask == 0)
776 			linux_mask = linux_value;
777 
778 		/*
779 		 * If a mask larger than just the value is set, we explicitly
780 		 * want to make sure that only this bit we mapped within that
781 		 * mask is set.
782 		 */
783 		if ((value & linux_mask) == linux_value) {
784 			bsd_ret = (bsd_ret & ~bsd_mask) | bsd_value;
785 			applied = true;
786 		}
787 	}
788 
789 	if (!applied)
790 		return (no_value);
791 	return (bsd_ret);
792 }
793 
794 void
795 linux_to_bsd_poll_events(struct thread *td, int fd, short lev,
796     short *bev)
797 {
798 	struct file *fp;
799 	int error;
800 	short bits = 0;
801 
802 	if (lev & LINUX_POLLIN)
803 		bits |= POLLIN;
804 	if (lev & LINUX_POLLPRI)
805 		bits |=	POLLPRI;
806 	if (lev & LINUX_POLLOUT)
807 		bits |= POLLOUT;
808 	if (lev & LINUX_POLLERR)
809 		bits |= POLLERR;
810 	if (lev & LINUX_POLLHUP)
811 		bits |= POLLHUP;
812 	if (lev & LINUX_POLLNVAL)
813 		bits |= POLLNVAL;
814 	if (lev & LINUX_POLLRDNORM)
815 		bits |= POLLRDNORM;
816 	if (lev & LINUX_POLLRDBAND)
817 		bits |= POLLRDBAND;
818 	if (lev & LINUX_POLLWRBAND)
819 		bits |= POLLWRBAND;
820 	if (lev & LINUX_POLLWRNORM)
821 		bits |= POLLWRNORM;
822 
823 	if (lev & LINUX_POLLRDHUP) {
824 		/*
825 		 * It seems that the Linux silencly ignores POLLRDHUP
826 		 * on non-socket file descriptors unlike FreeBSD, where
827 		 * events bits is more strictly checked (POLLSTANDARD).
828 		 */
829 		error = fget_unlocked(td, fd, &cap_no_rights, &fp);
830 		if (error == 0) {
831 			/*
832 			 * XXX. On FreeBSD POLLRDHUP applies only to
833 			 * stream sockets.
834 			 */
835 			if (fp->f_type == DTYPE_SOCKET)
836 				bits |= POLLRDHUP;
837 			fdrop(fp, td);
838 		}
839 	}
840 
841 	if (lev & LINUX_POLLMSG)
842 		LINUX_RATELIMIT_MSG_OPT1("unsupported POLLMSG, events(%d)", lev);
843 	if (lev & LINUX_POLLREMOVE)
844 		LINUX_RATELIMIT_MSG_OPT1("unsupported POLLREMOVE, events(%d)", lev);
845 
846 	*bev = bits;
847 }
848 
849 void
850 bsd_to_linux_poll_events(short bev, short *lev)
851 {
852 	short bits = 0;
853 
854 	if (bev & POLLIN)
855 		bits |= LINUX_POLLIN;
856 	if (bev & POLLPRI)
857 		bits |=	LINUX_POLLPRI;
858 	if (bev & (POLLOUT | POLLWRNORM))
859 		/*
860 		 * POLLWRNORM is equal to POLLOUT on FreeBSD,
861 		 * but not on Linux
862 		 */
863 		bits |= LINUX_POLLOUT;
864 	if (bev & POLLERR)
865 		bits |= LINUX_POLLERR;
866 	if (bev & POLLHUP)
867 		bits |= LINUX_POLLHUP;
868 	if (bev & POLLNVAL)
869 		bits |= LINUX_POLLNVAL;
870 	if (bev & POLLRDNORM)
871 		bits |= LINUX_POLLRDNORM;
872 	if (bev & POLLRDBAND)
873 		bits |= LINUX_POLLRDBAND;
874 	if (bev & POLLWRBAND)
875 		bits |= LINUX_POLLWRBAND;
876 	if (bev & POLLRDHUP)
877 		bits |= LINUX_POLLRDHUP;
878 
879 	*lev = bits;
880 }
881 
882 bool
883 linux_use_real_ifname(const struct ifnet *ifp)
884 {
885 
886 	return (use_real_ifnames);
887 }
888