xref: /freebsd/sys/compat/linux/linux.c (revision 911f0260390e18cf85f3dbf2c719b593efdc1e3c)
1 /*-
2  * Copyright (c) 2015 Dmitry Chagin <dchagin@FreeBSD.org>
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 #include <sys/cdefs.h>
27 __FBSDID("$FreeBSD$");
28 
29 #include "opt_inet6.h"
30 
31 #include <sys/param.h>
32 #include <sys/conf.h>
33 #include <sys/ctype.h>
34 #include <sys/file.h>
35 #include <sys/filedesc.h>
36 #include <sys/jail.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/poll.h>
40 #include <sys/proc.h>
41 #include <sys/signalvar.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 
45 #include <net/if.h>
46 #include <net/if_var.h>
47 #include <net/if_dl.h>
48 #include <net/if_types.h>
49 #include <netlink/netlink.h>
50 
51 #include <sys/un.h>
52 #include <netinet/in.h>
53 
54 #include <compat/linux/linux.h>
55 #include <compat/linux/linux_common.h>
56 #include <compat/linux/linux_mib.h>
57 #include <compat/linux/linux_util.h>
58 
59 _Static_assert(LINUX_IFNAMSIZ == IFNAMSIZ, "Linux IFNAMSIZ");
60 _Static_assert(sizeof(struct sockaddr) == sizeof(struct l_sockaddr),
61     "Linux struct sockaddr size");
62 _Static_assert(offsetof(struct sockaddr, sa_data) ==
63     offsetof(struct l_sockaddr, sa_data), "Linux struct sockaddr layout");
64 
65 static bool use_real_ifnames = false;
66 SYSCTL_BOOL(_compat_linux, OID_AUTO, use_real_ifnames, CTLFLAG_RWTUN,
67     &use_real_ifnames, 0,
68     "Use FreeBSD interface names instead of generating ethN aliases");
69 
70 static int bsd_to_linux_sigtbl[LINUX_SIGTBLSZ] = {
71 	LINUX_SIGHUP,	/* SIGHUP */
72 	LINUX_SIGINT,	/* SIGINT */
73 	LINUX_SIGQUIT,	/* SIGQUIT */
74 	LINUX_SIGILL,	/* SIGILL */
75 	LINUX_SIGTRAP,	/* SIGTRAP */
76 	LINUX_SIGABRT,	/* SIGABRT */
77 	0,		/* SIGEMT */
78 	LINUX_SIGFPE,	/* SIGFPE */
79 	LINUX_SIGKILL,	/* SIGKILL */
80 	LINUX_SIGBUS,	/* SIGBUS */
81 	LINUX_SIGSEGV,	/* SIGSEGV */
82 	LINUX_SIGSYS,	/* SIGSYS */
83 	LINUX_SIGPIPE,	/* SIGPIPE */
84 	LINUX_SIGALRM,	/* SIGALRM */
85 	LINUX_SIGTERM,	/* SIGTERM */
86 	LINUX_SIGURG,	/* SIGURG */
87 	LINUX_SIGSTOP,	/* SIGSTOP */
88 	LINUX_SIGTSTP,	/* SIGTSTP */
89 	LINUX_SIGCONT,	/* SIGCONT */
90 	LINUX_SIGCHLD,	/* SIGCHLD */
91 	LINUX_SIGTTIN,	/* SIGTTIN */
92 	LINUX_SIGTTOU,	/* SIGTTOU */
93 	LINUX_SIGIO,	/* SIGIO */
94 	LINUX_SIGXCPU,	/* SIGXCPU */
95 	LINUX_SIGXFSZ,	/* SIGXFSZ */
96 	LINUX_SIGVTALRM,/* SIGVTALRM */
97 	LINUX_SIGPROF,	/* SIGPROF */
98 	LINUX_SIGWINCH,	/* SIGWINCH */
99 	0,		/* SIGINFO */
100 	LINUX_SIGUSR1,	/* SIGUSR1 */
101 	LINUX_SIGUSR2	/* SIGUSR2 */
102 };
103 
104 #define	LINUX_SIGPWREMU	(SIGRTMIN + (LINUX_SIGRTMAX - LINUX_SIGRTMIN) + 1)
105 
106 static int linux_to_bsd_sigtbl[LINUX_SIGTBLSZ] = {
107 	SIGHUP,		/* LINUX_SIGHUP */
108 	SIGINT,		/* LINUX_SIGINT */
109 	SIGQUIT,	/* LINUX_SIGQUIT */
110 	SIGILL,		/* LINUX_SIGILL */
111 	SIGTRAP,	/* LINUX_SIGTRAP */
112 	SIGABRT,	/* LINUX_SIGABRT */
113 	SIGBUS,		/* LINUX_SIGBUS */
114 	SIGFPE,		/* LINUX_SIGFPE */
115 	SIGKILL,	/* LINUX_SIGKILL */
116 	SIGUSR1,	/* LINUX_SIGUSR1 */
117 	SIGSEGV,	/* LINUX_SIGSEGV */
118 	SIGUSR2,	/* LINUX_SIGUSR2 */
119 	SIGPIPE,	/* LINUX_SIGPIPE */
120 	SIGALRM,	/* LINUX_SIGALRM */
121 	SIGTERM,	/* LINUX_SIGTERM */
122 	SIGBUS,		/* LINUX_SIGSTKFLT */
123 	SIGCHLD,	/* LINUX_SIGCHLD */
124 	SIGCONT,	/* LINUX_SIGCONT */
125 	SIGSTOP,	/* LINUX_SIGSTOP */
126 	SIGTSTP,	/* LINUX_SIGTSTP */
127 	SIGTTIN,	/* LINUX_SIGTTIN */
128 	SIGTTOU,	/* LINUX_SIGTTOU */
129 	SIGURG,		/* LINUX_SIGURG */
130 	SIGXCPU,	/* LINUX_SIGXCPU */
131 	SIGXFSZ,	/* LINUX_SIGXFSZ */
132 	SIGVTALRM,	/* LINUX_SIGVTALARM */
133 	SIGPROF,	/* LINUX_SIGPROF */
134 	SIGWINCH,	/* LINUX_SIGWINCH */
135 	SIGIO,		/* LINUX_SIGIO */
136 	/*
137 	 * FreeBSD does not have SIGPWR signal, map Linux SIGPWR signal
138 	 * to the first unused FreeBSD signal number. Since Linux supports
139 	 * signals from 1 to 64 we are ok here as our SIGRTMIN = 65.
140 	 */
141 	LINUX_SIGPWREMU,/* LINUX_SIGPWR */
142 	SIGSYS		/* LINUX_SIGSYS */
143 };
144 
145 static struct cdev *dev_shm_cdev;
146 static struct cdevsw dev_shm_cdevsw = {
147      .d_version = D_VERSION,
148      .d_name    = "dev_shm",
149 };
150 
151 /*
152  * Map Linux RT signals to the FreeBSD RT signals.
153  */
154 static inline int
155 linux_to_bsd_rt_signal(int sig)
156 {
157 
158 	return (SIGRTMIN + sig - LINUX_SIGRTMIN);
159 }
160 
161 static inline int
162 bsd_to_linux_rt_signal(int sig)
163 {
164 
165 	return (sig - SIGRTMIN + LINUX_SIGRTMIN);
166 }
167 
168 int
169 linux_to_bsd_signal(int sig)
170 {
171 
172 	KASSERT(sig > 0 && sig <= LINUX_SIGRTMAX, ("invalid Linux signal %d\n", sig));
173 
174 	if (sig < LINUX_SIGRTMIN)
175 		return (linux_to_bsd_sigtbl[_SIG_IDX(sig)]);
176 
177 	return (linux_to_bsd_rt_signal(sig));
178 }
179 
180 int
181 bsd_to_linux_signal(int sig)
182 {
183 
184 	if (sig <= LINUX_SIGTBLSZ)
185 		return (bsd_to_linux_sigtbl[_SIG_IDX(sig)]);
186 	if (sig == LINUX_SIGPWREMU)
187 		return (LINUX_SIGPWR);
188 
189 	return (bsd_to_linux_rt_signal(sig));
190 }
191 
192 int
193 linux_to_bsd_sigaltstack(int lsa)
194 {
195 	int bsa = 0;
196 
197 	if (lsa & LINUX_SS_DISABLE)
198 		bsa |= SS_DISABLE;
199 	/*
200 	 * Linux ignores SS_ONSTACK flag for ss
201 	 * parameter while FreeBSD prohibits it.
202 	 */
203 	return (bsa);
204 }
205 
206 int
207 bsd_to_linux_sigaltstack(int bsa)
208 {
209 	int lsa = 0;
210 
211 	if (bsa & SS_DISABLE)
212 		lsa |= LINUX_SS_DISABLE;
213 	if (bsa & SS_ONSTACK)
214 		lsa |= LINUX_SS_ONSTACK;
215 	return (lsa);
216 }
217 
218 void
219 linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss)
220 {
221 	int b, l;
222 
223 	SIGEMPTYSET(*bss);
224 	for (l = 1; l <= LINUX_SIGRTMAX; l++) {
225 		if (LINUX_SIGISMEMBER(*lss, l)) {
226 			b = linux_to_bsd_signal(l);
227 			if (b)
228 				SIGADDSET(*bss, b);
229 		}
230 	}
231 }
232 
233 void
234 bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss)
235 {
236 	int b, l;
237 
238 	LINUX_SIGEMPTYSET(*lss);
239 	for (b = 1; b <= SIGRTMAX; b++) {
240 		if (SIGISMEMBER(*bss, b)) {
241 			l = bsd_to_linux_signal(b);
242 			if (l)
243 				LINUX_SIGADDSET(*lss, l);
244 		}
245 	}
246 }
247 
248 /*
249  * Translate a FreeBSD interface name to a Linux interface name
250  * by interface name, and return the number of bytes copied to lxname.
251  */
252 int
253 ifname_bsd_to_linux_name(const char *bsdname, char *lxname, size_t len)
254 {
255 	struct epoch_tracker et;
256 	struct ifnet *ifp;
257 	int ret;
258 
259 	ret = 0;
260 	CURVNET_SET(TD_TO_VNET(curthread));
261 	NET_EPOCH_ENTER(et);
262 	ifp = ifunit(bsdname);
263 	if (ifp != NULL)
264 		ret = ifname_bsd_to_linux_ifp(ifp, lxname, len);
265 	NET_EPOCH_EXIT(et);
266 	CURVNET_RESTORE();
267 	return (ret);
268 }
269 
270 /*
271  * Translate a FreeBSD interface name to a Linux interface name
272  * by interface index, and return the number of bytes copied to lxname.
273  */
274 int
275 ifname_bsd_to_linux_idx(u_int idx, char *lxname, size_t len)
276 {
277 	struct epoch_tracker et;
278 	struct ifnet *ifp;
279 	int ret;
280 
281 	ret = 0;
282 	CURVNET_SET(TD_TO_VNET(curthread));
283 	NET_EPOCH_ENTER(et);
284 	ifp = ifnet_byindex(idx);
285 	if (ifp != NULL)
286 		ret = ifname_bsd_to_linux_ifp(ifp, lxname, len);
287 	NET_EPOCH_EXIT(et);
288 	CURVNET_RESTORE();
289 	return (ret);
290 }
291 
292 /*
293  * Translate a FreeBSD interface name to a Linux interface name,
294  * and return the number of bytes copied to lxname, 0 if interface
295  * not found, -1 on error.
296  */
297 struct ifname_bsd_to_linux_ifp_cb_s {
298 	struct ifnet	*ifp;
299 	int		ethno;
300 	char		*lxname;
301 	size_t		len;
302 };
303 
304 static int
305 ifname_bsd_to_linux_ifp_cb(if_t ifp, void *arg)
306 {
307 	struct ifname_bsd_to_linux_ifp_cb_s *cbs = arg;
308 
309 	if (ifp == cbs->ifp)
310 		return (snprintf(cbs->lxname, cbs->len, "eth%d", cbs->ethno));
311 	if (IFP_IS_ETH(ifp))
312 		cbs->ethno++;
313 	return (0);
314 }
315 
316 int
317 ifname_bsd_to_linux_ifp(struct ifnet *ifp, char *lxname, size_t len)
318 {
319 	struct ifname_bsd_to_linux_ifp_cb_s arg = {
320 		.ifp = ifp,
321 		.ethno = 0,
322 		.lxname = lxname,
323 		.len = len,
324 	};
325 
326 	NET_EPOCH_ASSERT();
327 
328 	/*
329 	 * Linux loopback interface name is lo (not lo0),
330 	 * we translate lo to lo0, loX to loX.
331 	 */
332 	if (IFP_IS_LOOP(ifp) && strncmp(if_name(ifp), "lo0", IFNAMSIZ) == 0)
333 		return (strlcpy(lxname, "lo", len));
334 
335 	/* Short-circuit non ethernet interfaces. */
336 	if (!IFP_IS_ETH(ifp) || linux_use_real_ifname(ifp))
337 		return (strlcpy(lxname, if_name(ifp), len));
338 
339  	/* Determine the (relative) unit number for ethernet interfaces. */
340 	return (if_foreach(ifname_bsd_to_linux_ifp_cb, &arg));
341 }
342 
343 /*
344  * Translate a Linux interface name to a FreeBSD interface name,
345  * and return the associated ifnet structure
346  * bsdname and lxname need to be least IFNAMSIZ bytes long, but
347  * can point to the same buffer.
348  */
349 struct ifname_linux_to_ifp_cb_s {
350 	bool		is_lo;
351 	bool		is_eth;
352 	int		ethno;
353 	int		unit;
354 	const char	*lxname;
355 	if_t		ifp;
356 };
357 
358 static int
359 ifname_linux_to_ifp_cb(if_t ifp, void *arg)
360 {
361 	struct ifname_linux_to_ifp_cb_s *cbs = arg;
362 
363 	NET_EPOCH_ASSERT();
364 
365 	/*
366 	 * Allow Linux programs to use FreeBSD names. Don't presume
367 	 * we never have an interface named "eth", so don't make
368 	 * the test optional based on is_eth.
369 	 */
370 	if (strncmp(if_name(ifp), cbs->lxname, LINUX_IFNAMSIZ) == 0)
371 		goto out;
372 	if (cbs->is_eth && IFP_IS_ETH(ifp) && cbs->unit == cbs->ethno)
373 		goto out;
374 	if (cbs->is_lo && IFP_IS_LOOP(ifp))
375 		goto out;
376 	if (IFP_IS_ETH(ifp))
377 		cbs->ethno++;
378 	return (0);
379 
380 out:
381 	cbs->ifp = ifp;
382 	return (1);
383 }
384 
385 struct ifnet *
386 ifname_linux_to_ifp(struct thread *td, const char *lxname)
387 {
388 	struct ifname_linux_to_ifp_cb_s arg = {
389 		.ethno = 0,
390 		.lxname = lxname,
391 		.ifp = NULL,
392 	};
393 	int len;
394 	char *ep;
395 
396 	NET_EPOCH_ASSERT();
397 
398 	for (len = 0; len < LINUX_IFNAMSIZ; ++len)
399 		if (!isalpha(lxname[len]) || lxname[len] == '\0')
400 			break;
401 	if (len == 0 || len == LINUX_IFNAMSIZ)
402 		return (NULL);
403 	/*
404 	 * Linux loopback interface name is lo (not lo0),
405 	 * we translate lo to lo0, loX to loX.
406 	 */
407 	arg.is_lo = (len == 2 && strncmp(lxname, "lo", LINUX_IFNAMSIZ) == 0);
408 	arg.unit = (int)strtoul(lxname + len, &ep, 10);
409 	if ((ep == NULL || ep == lxname + len || ep >= lxname + LINUX_IFNAMSIZ) &&
410 	    arg.is_lo == 0)
411 		return (NULL);
412 	arg.is_eth = (len == 3 && strncmp(lxname, "eth", len) == 0);
413 
414 	if_foreach(ifname_linux_to_ifp_cb, &arg);
415 	return (arg.ifp);
416 }
417 
418 int
419 ifname_linux_to_bsd(struct thread *td, const char *lxname, char *bsdname)
420 {
421 	struct epoch_tracker et;
422 	struct ifnet *ifp;
423 
424 	CURVNET_SET(TD_TO_VNET(td));
425 	NET_EPOCH_ENTER(et);
426 	ifp = ifname_linux_to_ifp(td, lxname);
427 	if (ifp != NULL && bsdname != NULL)
428 		strlcpy(bsdname, if_name(ifp), IFNAMSIZ);
429 	NET_EPOCH_EXIT(et);
430 	CURVNET_RESTORE();
431 	return (ifp != NULL ? 0 : EINVAL);
432 }
433 
434 unsigned short
435 linux_ifflags(struct ifnet *ifp)
436 {
437 	unsigned short flags;
438 
439 	NET_EPOCH_ASSERT();
440 
441 	flags = if_getflags(ifp) | if_getdrvflags(ifp);
442 	return (bsd_to_linux_ifflags(flags));
443 }
444 
445 unsigned short
446 bsd_to_linux_ifflags(int fl)
447 {
448 	unsigned short flags = 0;
449 
450 	if (fl & IFF_UP)
451 		flags |= LINUX_IFF_UP;
452 	if (fl & IFF_BROADCAST)
453 		flags |= LINUX_IFF_BROADCAST;
454 	if (fl & IFF_DEBUG)
455 		flags |= LINUX_IFF_DEBUG;
456 	if (fl & IFF_LOOPBACK)
457 		flags |= LINUX_IFF_LOOPBACK;
458 	if (fl & IFF_POINTOPOINT)
459 		flags |= LINUX_IFF_POINTOPOINT;
460 	if (fl & IFF_DRV_RUNNING)
461 		flags |= LINUX_IFF_RUNNING;
462 	if (fl & IFF_NOARP)
463 		flags |= LINUX_IFF_NOARP;
464 	if (fl & IFF_PROMISC)
465 		flags |= LINUX_IFF_PROMISC;
466 	if (fl & IFF_ALLMULTI)
467 		flags |= LINUX_IFF_ALLMULTI;
468 	if (fl & IFF_MULTICAST)
469 		flags |= LINUX_IFF_MULTICAST;
470 	return (flags);
471 }
472 
473 static u_int
474 linux_ifhwaddr_cb(void *arg, struct ifaddr *ifa, u_int count)
475 {
476 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)ifa->ifa_addr;
477 	struct l_sockaddr *lsa = arg;
478 
479 	if (count > 0)
480 		return (0);
481 	if (sdl->sdl_type != IFT_ETHER)
482 		return (0);
483 	bzero(lsa, sizeof(*lsa));
484 	lsa->sa_family = LINUX_ARPHRD_ETHER;
485 	bcopy(LLADDR(sdl), lsa->sa_data, LINUX_IFHWADDRLEN);
486 	return (1);
487 }
488 
489 int
490 linux_ifhwaddr(struct ifnet *ifp, struct l_sockaddr *lsa)
491 {
492 
493 	NET_EPOCH_ASSERT();
494 
495 	if (IFP_IS_LOOP(ifp)) {
496 		bzero(lsa, sizeof(*lsa));
497 		lsa->sa_family = LINUX_ARPHRD_LOOPBACK;
498 		return (0);
499 	}
500 	if (!IFP_IS_ETH(ifp))
501 		return (ENOENT);
502 	if (if_foreach_addr_type(ifp, AF_LINK, linux_ifhwaddr_cb, lsa) > 0)
503 		return (0);
504 	return (ENOENT);
505 }
506 
507 int
508 linux_to_bsd_domain(int domain)
509 {
510 
511 	switch (domain) {
512 	case LINUX_AF_UNSPEC:
513 		return (AF_UNSPEC);
514 	case LINUX_AF_UNIX:
515 		return (AF_LOCAL);
516 	case LINUX_AF_INET:
517 		return (AF_INET);
518 	case LINUX_AF_INET6:
519 		return (AF_INET6);
520 	case LINUX_AF_AX25:
521 		return (AF_CCITT);
522 	case LINUX_AF_IPX:
523 		return (AF_IPX);
524 	case LINUX_AF_APPLETALK:
525 		return (AF_APPLETALK);
526 	case LINUX_AF_NETLINK:
527 		return (AF_NETLINK);
528 	}
529 	return (-1);
530 }
531 
532 int
533 bsd_to_linux_domain(int domain)
534 {
535 
536 	switch (domain) {
537 	case AF_UNSPEC:
538 		return (LINUX_AF_UNSPEC);
539 	case AF_LOCAL:
540 		return (LINUX_AF_UNIX);
541 	case AF_INET:
542 		return (LINUX_AF_INET);
543 	case AF_INET6:
544 		return (LINUX_AF_INET6);
545 	case AF_CCITT:
546 		return (LINUX_AF_AX25);
547 	case AF_IPX:
548 		return (LINUX_AF_IPX);
549 	case AF_APPLETALK:
550 		return (LINUX_AF_APPLETALK);
551 	case AF_NETLINK:
552 		return (LINUX_AF_NETLINK);
553 	}
554 	return (-1);
555 }
556 
557 /*
558  * Based on the fact that:
559  * 1. Native and Linux storage of struct sockaddr
560  * and struct sockaddr_in6 are equal.
561  * 2. On Linux sa_family is the first member of all struct sockaddr.
562  */
563 int
564 bsd_to_linux_sockaddr(const struct sockaddr *sa, struct l_sockaddr **lsa,
565     socklen_t len)
566 {
567 	struct l_sockaddr *kosa;
568 	int bdom;
569 
570 	*lsa = NULL;
571 	if (len < 2 || len > UCHAR_MAX)
572 		return (EINVAL);
573 	bdom = bsd_to_linux_domain(sa->sa_family);
574 	if (bdom == -1)
575 		return (EAFNOSUPPORT);
576 
577 	kosa = malloc(len, M_LINUX, M_WAITOK);
578 	bcopy(sa, kosa, len);
579 	kosa->sa_family = bdom;
580 	*lsa = kosa;
581 	return (0);
582 }
583 
584 int
585 linux_to_bsd_sockaddr(const struct l_sockaddr *osa, struct sockaddr **sap,
586     socklen_t *len)
587 {
588 	struct sockaddr *sa;
589 	struct l_sockaddr *kosa;
590 #ifdef INET6
591 	struct sockaddr_in6 *sin6;
592 	bool  oldv6size;
593 #endif
594 	char *name;
595 	int salen, bdom, error, hdrlen, namelen;
596 
597 	if (*len < 2 || *len > UCHAR_MAX)
598 		return (EINVAL);
599 
600 	salen = *len;
601 
602 #ifdef INET6
603 	oldv6size = false;
604 	/*
605 	 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
606 	 * if it's a v4-mapped address, so reserve the proper space
607 	 * for it.
608 	 */
609 	if (salen == sizeof(struct sockaddr_in6) - sizeof(uint32_t)) {
610 		salen += sizeof(uint32_t);
611 		oldv6size = true;
612 	}
613 #endif
614 
615 	kosa = malloc(salen, M_SONAME, M_WAITOK);
616 
617 	if ((error = copyin(osa, kosa, *len)))
618 		goto out;
619 
620 	bdom = linux_to_bsd_domain(kosa->sa_family);
621 	if (bdom == -1) {
622 		error = EAFNOSUPPORT;
623 		goto out;
624 	}
625 
626 #ifdef INET6
627 	/*
628 	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
629 	 * which lacks the scope id compared with RFC2553 one. If we detect
630 	 * the situation, reject the address and write a message to system log.
631 	 *
632 	 * Still accept addresses for which the scope id is not used.
633 	 */
634 	if (oldv6size) {
635 		if (bdom == AF_INET6) {
636 			sin6 = (struct sockaddr_in6 *)kosa;
637 			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
638 			    (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
639 			     !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
640 			     !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
641 			     !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
642 			     !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
643 				sin6->sin6_scope_id = 0;
644 			} else {
645 				linux_msg(curthread,
646 				    "obsolete pre-RFC2553 sockaddr_in6 rejected");
647 				error = EINVAL;
648 				goto out;
649 			}
650 		} else
651 			salen -= sizeof(uint32_t);
652 	}
653 #endif
654 	if (bdom == AF_INET) {
655 		if (salen < sizeof(struct sockaddr_in)) {
656 			error = EINVAL;
657 			goto out;
658 		}
659 		salen = sizeof(struct sockaddr_in);
660 	}
661 
662 	if (bdom == AF_LOCAL && salen > sizeof(struct sockaddr_un)) {
663 		hdrlen = offsetof(struct sockaddr_un, sun_path);
664 		name = ((struct sockaddr_un *)kosa)->sun_path;
665 		if (*name == '\0') {
666 			/*
667 			 * Linux abstract namespace starts with a NULL byte.
668 			 * XXX We do not support abstract namespace yet.
669 			 */
670 			namelen = strnlen(name + 1, salen - hdrlen - 1) + 1;
671 		} else
672 			namelen = strnlen(name, salen - hdrlen);
673 		salen = hdrlen + namelen;
674 		if (salen > sizeof(struct sockaddr_un)) {
675 			error = ENAMETOOLONG;
676 			goto out;
677 		}
678 	}
679 
680 	if (bdom == AF_NETLINK) {
681 		if (salen < sizeof(struct sockaddr_nl)) {
682 			error = EINVAL;
683 			goto out;
684 		}
685 		salen = sizeof(struct sockaddr_nl);
686 	}
687 
688 	sa = (struct sockaddr *)kosa;
689 	sa->sa_family = bdom;
690 	sa->sa_len = salen;
691 
692 	*sap = sa;
693 	*len = salen;
694 	return (0);
695 
696 out:
697 	free(kosa, M_SONAME);
698 	return (error);
699 }
700 
701 void
702 linux_dev_shm_create(void)
703 {
704 	int error;
705 
706 	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev_shm_cdev,
707 	    &dev_shm_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0, "shm/.mountpoint");
708 	if (error != 0) {
709 		printf("%s: failed to create device node, error %d\n",
710 		    __func__, error);
711 	}
712 }
713 
714 void
715 linux_dev_shm_destroy(void)
716 {
717 
718 	destroy_dev(dev_shm_cdev);
719 }
720 
721 int
722 bsd_to_linux_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
723     size_t mapcnt, int no_value)
724 {
725 	int bsd_mask, bsd_value, linux_mask, linux_value;
726 	int linux_ret;
727 	size_t i;
728 	bool applied;
729 
730 	applied = false;
731 	linux_ret = 0;
732 	for (i = 0; i < mapcnt; ++i) {
733 		bsd_mask = bitmap[i].bsd_mask;
734 		bsd_value = bitmap[i].bsd_value;
735 		if (bsd_mask == 0)
736 			bsd_mask = bsd_value;
737 
738 		linux_mask = bitmap[i].linux_mask;
739 		linux_value = bitmap[i].linux_value;
740 		if (linux_mask == 0)
741 			linux_mask = linux_value;
742 
743 		/*
744 		 * If a mask larger than just the value is set, we explicitly
745 		 * want to make sure that only this bit we mapped within that
746 		 * mask is set.
747 		 */
748 		if ((value & bsd_mask) == bsd_value) {
749 			linux_ret = (linux_ret & ~linux_mask) | linux_value;
750 			applied = true;
751 		}
752 	}
753 
754 	if (!applied)
755 		return (no_value);
756 	return (linux_ret);
757 }
758 
759 int
760 linux_to_bsd_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
761     size_t mapcnt, int no_value)
762 {
763 	int bsd_mask, bsd_value, linux_mask, linux_value;
764 	int bsd_ret;
765 	size_t i;
766 	bool applied;
767 
768 	applied = false;
769 	bsd_ret = 0;
770 	for (i = 0; i < mapcnt; ++i) {
771 		bsd_mask = bitmap[i].bsd_mask;
772 		bsd_value = bitmap[i].bsd_value;
773 		if (bsd_mask == 0)
774 			bsd_mask = bsd_value;
775 
776 		linux_mask = bitmap[i].linux_mask;
777 		linux_value = bitmap[i].linux_value;
778 		if (linux_mask == 0)
779 			linux_mask = linux_value;
780 
781 		/*
782 		 * If a mask larger than just the value is set, we explicitly
783 		 * want to make sure that only this bit we mapped within that
784 		 * mask is set.
785 		 */
786 		if ((value & linux_mask) == linux_value) {
787 			bsd_ret = (bsd_ret & ~bsd_mask) | bsd_value;
788 			applied = true;
789 		}
790 	}
791 
792 	if (!applied)
793 		return (no_value);
794 	return (bsd_ret);
795 }
796 
797 void
798 linux_to_bsd_poll_events(struct thread *td, int fd, short lev,
799     short *bev)
800 {
801 	struct file *fp;
802 	int error;
803 	short bits = 0;
804 
805 	if (lev & LINUX_POLLIN)
806 		bits |= POLLIN;
807 	if (lev & LINUX_POLLPRI)
808 		bits |=	POLLPRI;
809 	if (lev & LINUX_POLLOUT)
810 		bits |= POLLOUT;
811 	if (lev & LINUX_POLLERR)
812 		bits |= POLLERR;
813 	if (lev & LINUX_POLLHUP)
814 		bits |= POLLHUP;
815 	if (lev & LINUX_POLLNVAL)
816 		bits |= POLLNVAL;
817 	if (lev & LINUX_POLLRDNORM)
818 		bits |= POLLRDNORM;
819 	if (lev & LINUX_POLLRDBAND)
820 		bits |= POLLRDBAND;
821 	if (lev & LINUX_POLLWRBAND)
822 		bits |= POLLWRBAND;
823 	if (lev & LINUX_POLLWRNORM)
824 		bits |= POLLWRNORM;
825 
826 	if (lev & LINUX_POLLRDHUP) {
827 		/*
828 		 * It seems that the Linux silencly ignores POLLRDHUP
829 		 * on non-socket file descriptors unlike FreeBSD, where
830 		 * events bits is more strictly checked (POLLSTANDARD).
831 		 */
832 		error = fget_unlocked(td, fd, &cap_no_rights, &fp);
833 		if (error == 0) {
834 			/*
835 			 * XXX. On FreeBSD POLLRDHUP applies only to
836 			 * stream sockets.
837 			 */
838 			if (fp->f_type == DTYPE_SOCKET)
839 				bits |= POLLRDHUP;
840 			fdrop(fp, td);
841 		}
842 	}
843 
844 	if (lev & LINUX_POLLMSG)
845 		LINUX_RATELIMIT_MSG_OPT1("unsupported POLLMSG, events(%d)", lev);
846 	if (lev & LINUX_POLLREMOVE)
847 		LINUX_RATELIMIT_MSG_OPT1("unsupported POLLREMOVE, events(%d)", lev);
848 
849 	*bev = bits;
850 }
851 
852 void
853 bsd_to_linux_poll_events(short bev, short *lev)
854 {
855 	short bits = 0;
856 
857 	if (bev & POLLIN)
858 		bits |= LINUX_POLLIN;
859 	if (bev & POLLPRI)
860 		bits |=	LINUX_POLLPRI;
861 	if (bev & (POLLOUT | POLLWRNORM))
862 		/*
863 		 * POLLWRNORM is equal to POLLOUT on FreeBSD,
864 		 * but not on Linux
865 		 */
866 		bits |= LINUX_POLLOUT;
867 	if (bev & POLLERR)
868 		bits |= LINUX_POLLERR;
869 	if (bev & POLLHUP)
870 		bits |= LINUX_POLLHUP;
871 	if (bev & POLLNVAL)
872 		bits |= LINUX_POLLNVAL;
873 	if (bev & POLLRDNORM)
874 		bits |= LINUX_POLLRDNORM;
875 	if (bev & POLLRDBAND)
876 		bits |= LINUX_POLLRDBAND;
877 	if (bev & POLLWRBAND)
878 		bits |= LINUX_POLLWRBAND;
879 	if (bev & POLLRDHUP)
880 		bits |= LINUX_POLLRDHUP;
881 
882 	*lev = bits;
883 }
884 
885 bool
886 linux_use_real_ifname(const struct ifnet *ifp)
887 {
888 
889 	return (use_real_ifnames);
890 }
891