xref: /freebsd/sys/compat/linux/linux.c (revision 8d6dd96d50d2e6fea4cbbdb30ff12cc5730b7fbd)
1 /*-
2  * Copyright (c) 2015 Dmitry Chagin <dchagin@FreeBSD.org>
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 #include <sys/cdefs.h>
27 __FBSDID("$FreeBSD$");
28 
29 #include "opt_inet6.h"
30 
31 #include <sys/param.h>
32 #include <sys/conf.h>
33 #include <sys/ctype.h>
34 #include <sys/file.h>
35 #include <sys/filedesc.h>
36 #include <sys/jail.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/poll.h>
40 #include <sys/proc.h>
41 #include <sys/signalvar.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 
45 #include <net/if.h>
46 #include <net/if_var.h>
47 #include <net/if_dl.h>
48 #include <net/if_types.h>
49 #include <netlink/netlink.h>
50 
51 #include <sys/un.h>
52 #include <netinet/in.h>
53 
54 #include <compat/linux/linux.h>
55 #include <compat/linux/linux_common.h>
56 #include <compat/linux/linux_mib.h>
57 #include <compat/linux/linux_util.h>
58 
59 CTASSERT(LINUX_IFNAMSIZ == IFNAMSIZ);
60 
61 static bool use_real_ifnames = false;
62 SYSCTL_BOOL(_compat_linux, OID_AUTO, use_real_ifnames, CTLFLAG_RWTUN,
63     &use_real_ifnames, 0,
64     "Use FreeBSD interface names instead of generating ethN aliases");
65 
66 static int bsd_to_linux_sigtbl[LINUX_SIGTBLSZ] = {
67 	LINUX_SIGHUP,	/* SIGHUP */
68 	LINUX_SIGINT,	/* SIGINT */
69 	LINUX_SIGQUIT,	/* SIGQUIT */
70 	LINUX_SIGILL,	/* SIGILL */
71 	LINUX_SIGTRAP,	/* SIGTRAP */
72 	LINUX_SIGABRT,	/* SIGABRT */
73 	0,		/* SIGEMT */
74 	LINUX_SIGFPE,	/* SIGFPE */
75 	LINUX_SIGKILL,	/* SIGKILL */
76 	LINUX_SIGBUS,	/* SIGBUS */
77 	LINUX_SIGSEGV,	/* SIGSEGV */
78 	LINUX_SIGSYS,	/* SIGSYS */
79 	LINUX_SIGPIPE,	/* SIGPIPE */
80 	LINUX_SIGALRM,	/* SIGALRM */
81 	LINUX_SIGTERM,	/* SIGTERM */
82 	LINUX_SIGURG,	/* SIGURG */
83 	LINUX_SIGSTOP,	/* SIGSTOP */
84 	LINUX_SIGTSTP,	/* SIGTSTP */
85 	LINUX_SIGCONT,	/* SIGCONT */
86 	LINUX_SIGCHLD,	/* SIGCHLD */
87 	LINUX_SIGTTIN,	/* SIGTTIN */
88 	LINUX_SIGTTOU,	/* SIGTTOU */
89 	LINUX_SIGIO,	/* SIGIO */
90 	LINUX_SIGXCPU,	/* SIGXCPU */
91 	LINUX_SIGXFSZ,	/* SIGXFSZ */
92 	LINUX_SIGVTALRM,/* SIGVTALRM */
93 	LINUX_SIGPROF,	/* SIGPROF */
94 	LINUX_SIGWINCH,	/* SIGWINCH */
95 	0,		/* SIGINFO */
96 	LINUX_SIGUSR1,	/* SIGUSR1 */
97 	LINUX_SIGUSR2	/* SIGUSR2 */
98 };
99 
100 #define	LINUX_SIGPWREMU	(SIGRTMIN + (LINUX_SIGRTMAX - LINUX_SIGRTMIN) + 1)
101 
102 static int linux_to_bsd_sigtbl[LINUX_SIGTBLSZ] = {
103 	SIGHUP,		/* LINUX_SIGHUP */
104 	SIGINT,		/* LINUX_SIGINT */
105 	SIGQUIT,	/* LINUX_SIGQUIT */
106 	SIGILL,		/* LINUX_SIGILL */
107 	SIGTRAP,	/* LINUX_SIGTRAP */
108 	SIGABRT,	/* LINUX_SIGABRT */
109 	SIGBUS,		/* LINUX_SIGBUS */
110 	SIGFPE,		/* LINUX_SIGFPE */
111 	SIGKILL,	/* LINUX_SIGKILL */
112 	SIGUSR1,	/* LINUX_SIGUSR1 */
113 	SIGSEGV,	/* LINUX_SIGSEGV */
114 	SIGUSR2,	/* LINUX_SIGUSR2 */
115 	SIGPIPE,	/* LINUX_SIGPIPE */
116 	SIGALRM,	/* LINUX_SIGALRM */
117 	SIGTERM,	/* LINUX_SIGTERM */
118 	SIGBUS,		/* LINUX_SIGSTKFLT */
119 	SIGCHLD,	/* LINUX_SIGCHLD */
120 	SIGCONT,	/* LINUX_SIGCONT */
121 	SIGSTOP,	/* LINUX_SIGSTOP */
122 	SIGTSTP,	/* LINUX_SIGTSTP */
123 	SIGTTIN,	/* LINUX_SIGTTIN */
124 	SIGTTOU,	/* LINUX_SIGTTOU */
125 	SIGURG,		/* LINUX_SIGURG */
126 	SIGXCPU,	/* LINUX_SIGXCPU */
127 	SIGXFSZ,	/* LINUX_SIGXFSZ */
128 	SIGVTALRM,	/* LINUX_SIGVTALARM */
129 	SIGPROF,	/* LINUX_SIGPROF */
130 	SIGWINCH,	/* LINUX_SIGWINCH */
131 	SIGIO,		/* LINUX_SIGIO */
132 	/*
133 	 * FreeBSD does not have SIGPWR signal, map Linux SIGPWR signal
134 	 * to the first unused FreeBSD signal number. Since Linux supports
135 	 * signals from 1 to 64 we are ok here as our SIGRTMIN = 65.
136 	 */
137 	LINUX_SIGPWREMU,/* LINUX_SIGPWR */
138 	SIGSYS		/* LINUX_SIGSYS */
139 };
140 
141 static struct cdev *dev_shm_cdev;
142 static struct cdevsw dev_shm_cdevsw = {
143      .d_version = D_VERSION,
144      .d_name    = "dev_shm",
145 };
146 
147 /*
148  * Map Linux RT signals to the FreeBSD RT signals.
149  */
150 static inline int
151 linux_to_bsd_rt_signal(int sig)
152 {
153 
154 	return (SIGRTMIN + sig - LINUX_SIGRTMIN);
155 }
156 
157 static inline int
158 bsd_to_linux_rt_signal(int sig)
159 {
160 
161 	return (sig - SIGRTMIN + LINUX_SIGRTMIN);
162 }
163 
164 int
165 linux_to_bsd_signal(int sig)
166 {
167 
168 	KASSERT(sig > 0 && sig <= LINUX_SIGRTMAX, ("invalid Linux signal %d\n", sig));
169 
170 	if (sig < LINUX_SIGRTMIN)
171 		return (linux_to_bsd_sigtbl[_SIG_IDX(sig)]);
172 
173 	return (linux_to_bsd_rt_signal(sig));
174 }
175 
176 int
177 bsd_to_linux_signal(int sig)
178 {
179 
180 	if (sig <= LINUX_SIGTBLSZ)
181 		return (bsd_to_linux_sigtbl[_SIG_IDX(sig)]);
182 	if (sig == LINUX_SIGPWREMU)
183 		return (LINUX_SIGPWR);
184 
185 	return (bsd_to_linux_rt_signal(sig));
186 }
187 
188 int
189 linux_to_bsd_sigaltstack(int lsa)
190 {
191 	int bsa = 0;
192 
193 	if (lsa & LINUX_SS_DISABLE)
194 		bsa |= SS_DISABLE;
195 	/*
196 	 * Linux ignores SS_ONSTACK flag for ss
197 	 * parameter while FreeBSD prohibits it.
198 	 */
199 	return (bsa);
200 }
201 
202 int
203 bsd_to_linux_sigaltstack(int bsa)
204 {
205 	int lsa = 0;
206 
207 	if (bsa & SS_DISABLE)
208 		lsa |= LINUX_SS_DISABLE;
209 	if (bsa & SS_ONSTACK)
210 		lsa |= LINUX_SS_ONSTACK;
211 	return (lsa);
212 }
213 
214 void
215 linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss)
216 {
217 	int b, l;
218 
219 	SIGEMPTYSET(*bss);
220 	for (l = 1; l <= LINUX_SIGRTMAX; l++) {
221 		if (LINUX_SIGISMEMBER(*lss, l)) {
222 			b = linux_to_bsd_signal(l);
223 			if (b)
224 				SIGADDSET(*bss, b);
225 		}
226 	}
227 }
228 
229 void
230 bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss)
231 {
232 	int b, l;
233 
234 	LINUX_SIGEMPTYSET(*lss);
235 	for (b = 1; b <= SIGRTMAX; b++) {
236 		if (SIGISMEMBER(*bss, b)) {
237 			l = bsd_to_linux_signal(b);
238 			if (l)
239 				LINUX_SIGADDSET(*lss, l);
240 		}
241 	}
242 }
243 
244 /*
245  * Translate a FreeBSD interface name to a Linux interface name
246  * by interface name, and return the number of bytes copied to lxname.
247  */
248 int
249 ifname_bsd_to_linux_name(const char *bsdname, char *lxname, size_t len)
250 {
251 	struct epoch_tracker et;
252 	struct ifnet *ifp;
253 	int ret;
254 
255 	ret = 0;
256 	CURVNET_SET(TD_TO_VNET(curthread));
257 	NET_EPOCH_ENTER(et);
258 	ifp = ifunit(bsdname);
259 	if (ifp != NULL)
260 		ret = ifname_bsd_to_linux_ifp(ifp, lxname, len);
261 	NET_EPOCH_EXIT(et);
262 	CURVNET_RESTORE();
263 	return (ret);
264 }
265 
266 /*
267  * Translate a FreeBSD interface name to a Linux interface name
268  * by interface index, and return the number of bytes copied to lxname.
269  */
270 int
271 ifname_bsd_to_linux_idx(u_int idx, char *lxname, size_t len)
272 {
273 	struct epoch_tracker et;
274 	struct ifnet *ifp;
275 	int ret;
276 
277 	ret = 0;
278 	CURVNET_SET(TD_TO_VNET(curthread));
279 	NET_EPOCH_ENTER(et);
280 	ifp = ifnet_byindex(idx);
281 	if (ifp != NULL)
282 		ret = ifname_bsd_to_linux_ifp(ifp, lxname, len);
283 	NET_EPOCH_EXIT(et);
284 	CURVNET_RESTORE();
285 	return (ret);
286 }
287 
288 /*
289  * Translate a FreeBSD interface name to a Linux interface name,
290  * and return the number of bytes copied to lxname, 0 if interface
291  * not found, -1 on error.
292  */
293 struct ifname_bsd_to_linux_ifp_cb_s {
294 	struct ifnet	*ifp;
295 	int		ethno;
296 	char		*lxname;
297 	size_t		len;
298 };
299 
300 static int
301 ifname_bsd_to_linux_ifp_cb(if_t ifp, void *arg)
302 {
303 	struct ifname_bsd_to_linux_ifp_cb_s *cbs = arg;
304 
305 	if (ifp == cbs->ifp)
306 		return (snprintf(cbs->lxname, cbs->len, "eth%d", cbs->ethno));
307 	if (IFP_IS_ETH(ifp))
308 		cbs->ethno++;
309 	return (0);
310 }
311 
312 int
313 ifname_bsd_to_linux_ifp(struct ifnet *ifp, char *lxname, size_t len)
314 {
315 	struct ifname_bsd_to_linux_ifp_cb_s arg = {
316 		.ifp = ifp,
317 		.ethno = 0,
318 		.lxname = lxname,
319 		.len = len,
320 	};
321 
322 	NET_EPOCH_ASSERT();
323 
324 	/*
325 	 * Linux loopback interface name is lo (not lo0),
326 	 * we translate lo to lo0, loX to loX.
327 	 */
328 	if (IFP_IS_LOOP(ifp) && strncmp(ifp->if_xname, "lo0", IFNAMSIZ) == 0)
329 		return (strlcpy(lxname, "lo", len));
330 
331 	/* Short-circuit non ethernet interfaces. */
332 	if (!IFP_IS_ETH(ifp) || linux_use_real_ifname(ifp))
333 		return (strlcpy(lxname, if_name(ifp), len));
334 
335  	/* Determine the (relative) unit number for ethernet interfaces. */
336 	return (if_foreach(ifname_bsd_to_linux_ifp_cb, &arg));
337 }
338 
339 /*
340  * Translate a Linux interface name to a FreeBSD interface name,
341  * and return the associated ifnet structure
342  * bsdname and lxname need to be least IFNAMSIZ bytes long, but
343  * can point to the same buffer.
344  */
345 struct ifname_linux_to_ifp_cb_s {
346 	bool		is_lo;
347 	bool		is_eth;
348 	int		ethno;
349 	int		unit;
350 	const char	*lxname;
351 	if_t		ifp;
352 };
353 
354 static int
355 ifname_linux_to_ifp_cb(if_t ifp, void *arg)
356 {
357 	struct ifname_linux_to_ifp_cb_s *cbs = arg;
358 
359 	NET_EPOCH_ASSERT();
360 
361 	/*
362 	 * Allow Linux programs to use FreeBSD names. Don't presume
363 	 * we never have an interface named "eth", so don't make
364 	 * the test optional based on is_eth.
365 	 */
366 	if (strncmp(if_name(ifp), cbs->lxname, LINUX_IFNAMSIZ) == 0)
367 		goto out;
368 	if (cbs->is_eth && IFP_IS_ETH(ifp) && cbs->unit == cbs->ethno)
369 		goto out;
370 	if (cbs->is_lo && IFP_IS_LOOP(ifp))
371 		goto out;
372 	if (IFP_IS_ETH(ifp))
373 		cbs->ethno++;
374 	return (0);
375 
376 out:
377 	cbs->ifp = ifp;
378 	return (1);
379 }
380 
381 struct ifnet *
382 ifname_linux_to_ifp(struct thread *td, const char *lxname)
383 {
384 	struct ifname_linux_to_ifp_cb_s arg = {
385 		.ethno = 0,
386 		.lxname = lxname,
387 		.ifp = NULL,
388 	};
389 	int len;
390 	char *ep;
391 
392 	NET_EPOCH_ASSERT();
393 
394 	for (len = 0; len < LINUX_IFNAMSIZ; ++len)
395 		if (!isalpha(lxname[len]) || lxname[len] == '\0')
396 			break;
397 	if (len == 0 || len == LINUX_IFNAMSIZ)
398 		return (NULL);
399 	/*
400 	 * Linux loopback interface name is lo (not lo0),
401 	 * we translate lo to lo0, loX to loX.
402 	 */
403 	arg.is_lo = (len == 2 && strncmp(lxname, "lo", LINUX_IFNAMSIZ) == 0);
404 	arg.unit = (int)strtoul(lxname + len, &ep, 10);
405 	if ((ep == NULL || ep == lxname + len || ep >= lxname + LINUX_IFNAMSIZ) &&
406 	    arg.is_lo == 0)
407 		return (NULL);
408 	arg.is_eth = (len == 3 && strncmp(lxname, "eth", len) == 0);
409 
410 	if_foreach(ifname_linux_to_ifp_cb, &arg);
411 	return (arg.ifp);
412 }
413 
414 int
415 ifname_linux_to_bsd(struct thread *td, const char *lxname, char *bsdname)
416 {
417 	struct epoch_tracker et;
418 	struct ifnet *ifp;
419 
420 	CURVNET_SET(TD_TO_VNET(td));
421 	NET_EPOCH_ENTER(et);
422 	ifp = ifname_linux_to_ifp(td, lxname);
423 	if (ifp != NULL && bsdname != NULL)
424 		strlcpy(bsdname, if_name(ifp), IFNAMSIZ);
425 	NET_EPOCH_EXIT(et);
426 	CURVNET_RESTORE();
427 	return (ifp != NULL ? 0 : EINVAL);
428 }
429 
430 unsigned short
431 linux_ifflags(struct ifnet *ifp)
432 {
433 	unsigned short fl, flags;
434 
435 	fl = (if_getflags(ifp) | if_getdrvflags(ifp)) & 0xffff;
436 	flags = 0;
437 	if (fl & IFF_UP)
438 		flags |= LINUX_IFF_UP;
439 	if (fl & IFF_BROADCAST)
440 		flags |= LINUX_IFF_BROADCAST;
441 	if (fl & IFF_DEBUG)
442 		flags |= LINUX_IFF_DEBUG;
443 	if (fl & IFF_LOOPBACK)
444 		flags |= LINUX_IFF_LOOPBACK;
445 	if (fl & IFF_POINTOPOINT)
446 		flags |= LINUX_IFF_POINTOPOINT;
447 	if (fl & IFF_DRV_RUNNING)
448 		flags |= LINUX_IFF_RUNNING;
449 	if (fl & IFF_NOARP)
450 		flags |= LINUX_IFF_NOARP;
451 	if (fl & IFF_PROMISC)
452 		flags |= LINUX_IFF_PROMISC;
453 	if (fl & IFF_ALLMULTI)
454 		flags |= LINUX_IFF_ALLMULTI;
455 	if (fl & IFF_MULTICAST)
456 		flags |= LINUX_IFF_MULTICAST;
457 	return (flags);
458 }
459 
460 static u_int
461 linux_ifhwaddr_cb(void *arg, struct ifaddr *ifa, u_int count)
462 {
463 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)ifa->ifa_addr;
464 	struct l_sockaddr *lsa = arg;
465 
466 	if (count > 0)
467 		return (0);
468 	if (sdl->sdl_type != IFT_ETHER)
469 		return (0);
470 	bzero(lsa, sizeof(*lsa));
471 	lsa->sa_family = LINUX_ARPHRD_ETHER;
472 	bcopy(LLADDR(sdl), lsa->sa_data, LINUX_IFHWADDRLEN);
473 	return (1);
474 }
475 
476 int
477 linux_ifhwaddr(struct ifnet *ifp, struct l_sockaddr *lsa)
478 {
479 
480 	NET_EPOCH_ASSERT();
481 
482 	if (IFP_IS_LOOP(ifp)) {
483 		bzero(lsa, sizeof(*lsa));
484 		lsa->sa_family = LINUX_ARPHRD_LOOPBACK;
485 		return (0);
486 	}
487 	if (!IFP_IS_ETH(ifp))
488 		return (ENOENT);
489 	if (if_foreach_addr_type(ifp, AF_LINK, linux_ifhwaddr_cb, lsa) > 0)
490 		return (0);
491 	return (ENOENT);
492 }
493 
494 int
495 linux_to_bsd_domain(int domain)
496 {
497 
498 	switch (domain) {
499 	case LINUX_AF_UNSPEC:
500 		return (AF_UNSPEC);
501 	case LINUX_AF_UNIX:
502 		return (AF_LOCAL);
503 	case LINUX_AF_INET:
504 		return (AF_INET);
505 	case LINUX_AF_INET6:
506 		return (AF_INET6);
507 	case LINUX_AF_AX25:
508 		return (AF_CCITT);
509 	case LINUX_AF_IPX:
510 		return (AF_IPX);
511 	case LINUX_AF_APPLETALK:
512 		return (AF_APPLETALK);
513 	case LINUX_AF_NETLINK:
514 		return (AF_NETLINK);
515 	}
516 	return (-1);
517 }
518 
519 int
520 bsd_to_linux_domain(int domain)
521 {
522 
523 	switch (domain) {
524 	case AF_UNSPEC:
525 		return (LINUX_AF_UNSPEC);
526 	case AF_LOCAL:
527 		return (LINUX_AF_UNIX);
528 	case AF_INET:
529 		return (LINUX_AF_INET);
530 	case AF_INET6:
531 		return (LINUX_AF_INET6);
532 	case AF_CCITT:
533 		return (LINUX_AF_AX25);
534 	case AF_IPX:
535 		return (LINUX_AF_IPX);
536 	case AF_APPLETALK:
537 		return (LINUX_AF_APPLETALK);
538 	case AF_NETLINK:
539 		return (LINUX_AF_NETLINK);
540 	}
541 	return (-1);
542 }
543 
544 /*
545  * Based on the fact that:
546  * 1. Native and Linux storage of struct sockaddr
547  * and struct sockaddr_in6 are equal.
548  * 2. On Linux sa_family is the first member of all struct sockaddr.
549  */
550 int
551 bsd_to_linux_sockaddr(const struct sockaddr *sa, struct l_sockaddr **lsa,
552     socklen_t len)
553 {
554 	struct l_sockaddr *kosa;
555 	int bdom;
556 
557 	*lsa = NULL;
558 	if (len < 2 || len > UCHAR_MAX)
559 		return (EINVAL);
560 	bdom = bsd_to_linux_domain(sa->sa_family);
561 	if (bdom == -1)
562 		return (EAFNOSUPPORT);
563 
564 	kosa = malloc(len, M_LINUX, M_WAITOK);
565 	bcopy(sa, kosa, len);
566 	kosa->sa_family = bdom;
567 	*lsa = kosa;
568 	return (0);
569 }
570 
571 int
572 linux_to_bsd_sockaddr(const struct l_sockaddr *osa, struct sockaddr **sap,
573     socklen_t *len)
574 {
575 	struct sockaddr *sa;
576 	struct l_sockaddr *kosa;
577 #ifdef INET6
578 	struct sockaddr_in6 *sin6;
579 	bool  oldv6size;
580 #endif
581 	char *name;
582 	int salen, bdom, error, hdrlen, namelen;
583 
584 	if (*len < 2 || *len > UCHAR_MAX)
585 		return (EINVAL);
586 
587 	salen = *len;
588 
589 #ifdef INET6
590 	oldv6size = false;
591 	/*
592 	 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
593 	 * if it's a v4-mapped address, so reserve the proper space
594 	 * for it.
595 	 */
596 	if (salen == sizeof(struct sockaddr_in6) - sizeof(uint32_t)) {
597 		salen += sizeof(uint32_t);
598 		oldv6size = true;
599 	}
600 #endif
601 
602 	kosa = malloc(salen, M_SONAME, M_WAITOK);
603 
604 	if ((error = copyin(osa, kosa, *len)))
605 		goto out;
606 
607 	bdom = linux_to_bsd_domain(kosa->sa_family);
608 	if (bdom == -1) {
609 		error = EAFNOSUPPORT;
610 		goto out;
611 	}
612 
613 #ifdef INET6
614 	/*
615 	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
616 	 * which lacks the scope id compared with RFC2553 one. If we detect
617 	 * the situation, reject the address and write a message to system log.
618 	 *
619 	 * Still accept addresses for which the scope id is not used.
620 	 */
621 	if (oldv6size) {
622 		if (bdom == AF_INET6) {
623 			sin6 = (struct sockaddr_in6 *)kosa;
624 			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
625 			    (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
626 			     !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
627 			     !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
628 			     !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
629 			     !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
630 				sin6->sin6_scope_id = 0;
631 			} else {
632 				linux_msg(curthread,
633 				    "obsolete pre-RFC2553 sockaddr_in6 rejected");
634 				error = EINVAL;
635 				goto out;
636 			}
637 		} else
638 			salen -= sizeof(uint32_t);
639 	}
640 #endif
641 	if (bdom == AF_INET) {
642 		if (salen < sizeof(struct sockaddr_in)) {
643 			error = EINVAL;
644 			goto out;
645 		}
646 		salen = sizeof(struct sockaddr_in);
647 	}
648 
649 	if (bdom == AF_LOCAL && salen > sizeof(struct sockaddr_un)) {
650 		hdrlen = offsetof(struct sockaddr_un, sun_path);
651 		name = ((struct sockaddr_un *)kosa)->sun_path;
652 		if (*name == '\0') {
653 			/*
654 			 * Linux abstract namespace starts with a NULL byte.
655 			 * XXX We do not support abstract namespace yet.
656 			 */
657 			namelen = strnlen(name + 1, salen - hdrlen - 1) + 1;
658 		} else
659 			namelen = strnlen(name, salen - hdrlen);
660 		salen = hdrlen + namelen;
661 		if (salen > sizeof(struct sockaddr_un)) {
662 			error = ENAMETOOLONG;
663 			goto out;
664 		}
665 	}
666 
667 	if (bdom == AF_NETLINK) {
668 		if (salen < sizeof(struct sockaddr_nl)) {
669 			error = EINVAL;
670 			goto out;
671 		}
672 		salen = sizeof(struct sockaddr_nl);
673 	}
674 
675 	sa = (struct sockaddr *)kosa;
676 	sa->sa_family = bdom;
677 	sa->sa_len = salen;
678 
679 	*sap = sa;
680 	*len = salen;
681 	return (0);
682 
683 out:
684 	free(kosa, M_SONAME);
685 	return (error);
686 }
687 
688 void
689 linux_dev_shm_create(void)
690 {
691 	int error;
692 
693 	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev_shm_cdev,
694 	    &dev_shm_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0, "shm/.mountpoint");
695 	if (error != 0) {
696 		printf("%s: failed to create device node, error %d\n",
697 		    __func__, error);
698 	}
699 }
700 
701 void
702 linux_dev_shm_destroy(void)
703 {
704 
705 	destroy_dev(dev_shm_cdev);
706 }
707 
708 int
709 bsd_to_linux_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
710     size_t mapcnt, int no_value)
711 {
712 	int bsd_mask, bsd_value, linux_mask, linux_value;
713 	int linux_ret;
714 	size_t i;
715 	bool applied;
716 
717 	applied = false;
718 	linux_ret = 0;
719 	for (i = 0; i < mapcnt; ++i) {
720 		bsd_mask = bitmap[i].bsd_mask;
721 		bsd_value = bitmap[i].bsd_value;
722 		if (bsd_mask == 0)
723 			bsd_mask = bsd_value;
724 
725 		linux_mask = bitmap[i].linux_mask;
726 		linux_value = bitmap[i].linux_value;
727 		if (linux_mask == 0)
728 			linux_mask = linux_value;
729 
730 		/*
731 		 * If a mask larger than just the value is set, we explicitly
732 		 * want to make sure that only this bit we mapped within that
733 		 * mask is set.
734 		 */
735 		if ((value & bsd_mask) == bsd_value) {
736 			linux_ret = (linux_ret & ~linux_mask) | linux_value;
737 			applied = true;
738 		}
739 	}
740 
741 	if (!applied)
742 		return (no_value);
743 	return (linux_ret);
744 }
745 
746 int
747 linux_to_bsd_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
748     size_t mapcnt, int no_value)
749 {
750 	int bsd_mask, bsd_value, linux_mask, linux_value;
751 	int bsd_ret;
752 	size_t i;
753 	bool applied;
754 
755 	applied = false;
756 	bsd_ret = 0;
757 	for (i = 0; i < mapcnt; ++i) {
758 		bsd_mask = bitmap[i].bsd_mask;
759 		bsd_value = bitmap[i].bsd_value;
760 		if (bsd_mask == 0)
761 			bsd_mask = bsd_value;
762 
763 		linux_mask = bitmap[i].linux_mask;
764 		linux_value = bitmap[i].linux_value;
765 		if (linux_mask == 0)
766 			linux_mask = linux_value;
767 
768 		/*
769 		 * If a mask larger than just the value is set, we explicitly
770 		 * want to make sure that only this bit we mapped within that
771 		 * mask is set.
772 		 */
773 		if ((value & linux_mask) == linux_value) {
774 			bsd_ret = (bsd_ret & ~bsd_mask) | bsd_value;
775 			applied = true;
776 		}
777 	}
778 
779 	if (!applied)
780 		return (no_value);
781 	return (bsd_ret);
782 }
783 
784 void
785 linux_to_bsd_poll_events(struct thread *td, int fd, short lev,
786     short *bev)
787 {
788 	struct file *fp;
789 	int error;
790 	short bits = 0;
791 
792 	if (lev & LINUX_POLLIN)
793 		bits |= POLLIN;
794 	if (lev & LINUX_POLLPRI)
795 		bits |=	POLLPRI;
796 	if (lev & LINUX_POLLOUT)
797 		bits |= POLLOUT;
798 	if (lev & LINUX_POLLERR)
799 		bits |= POLLERR;
800 	if (lev & LINUX_POLLHUP)
801 		bits |= POLLHUP;
802 	if (lev & LINUX_POLLNVAL)
803 		bits |= POLLNVAL;
804 	if (lev & LINUX_POLLRDNORM)
805 		bits |= POLLRDNORM;
806 	if (lev & LINUX_POLLRDBAND)
807 		bits |= POLLRDBAND;
808 	if (lev & LINUX_POLLWRBAND)
809 		bits |= POLLWRBAND;
810 	if (lev & LINUX_POLLWRNORM)
811 		bits |= POLLWRNORM;
812 
813 	if (lev & LINUX_POLLRDHUP) {
814 		/*
815 		 * It seems that the Linux silencly ignores POLLRDHUP
816 		 * on non-socket file descriptors unlike FreeBSD, where
817 		 * events bits is more strictly checked (POLLSTANDARD).
818 		 */
819 		error = fget_unlocked(td, fd, &cap_no_rights, &fp);
820 		if (error == 0) {
821 			/*
822 			 * XXX. On FreeBSD POLLRDHUP applies only to
823 			 * stream sockets.
824 			 */
825 			if (fp->f_type == DTYPE_SOCKET)
826 				bits |= POLLRDHUP;
827 			fdrop(fp, td);
828 		}
829 	}
830 
831 	if (lev & LINUX_POLLMSG)
832 		LINUX_RATELIMIT_MSG_OPT1("unsupported POLLMSG, events(%d)", lev);
833 	if (lev & LINUX_POLLREMOVE)
834 		LINUX_RATELIMIT_MSG_OPT1("unsupported POLLREMOVE, events(%d)", lev);
835 
836 	*bev = bits;
837 }
838 
839 void
840 bsd_to_linux_poll_events(short bev, short *lev)
841 {
842 	short bits = 0;
843 
844 	if (bev & POLLIN)
845 		bits |= LINUX_POLLIN;
846 	if (bev & POLLPRI)
847 		bits |=	LINUX_POLLPRI;
848 	if (bev & (POLLOUT | POLLWRNORM))
849 		/*
850 		 * POLLWRNORM is equal to POLLOUT on FreeBSD,
851 		 * but not on Linux
852 		 */
853 		bits |= LINUX_POLLOUT;
854 	if (bev & POLLERR)
855 		bits |= LINUX_POLLERR;
856 	if (bev & POLLHUP)
857 		bits |= LINUX_POLLHUP;
858 	if (bev & POLLNVAL)
859 		bits |= LINUX_POLLNVAL;
860 	if (bev & POLLRDNORM)
861 		bits |= LINUX_POLLRDNORM;
862 	if (bev & POLLRDBAND)
863 		bits |= LINUX_POLLRDBAND;
864 	if (bev & POLLWRBAND)
865 		bits |= LINUX_POLLWRBAND;
866 	if (bev & POLLRDHUP)
867 		bits |= LINUX_POLLRDHUP;
868 
869 	*lev = bits;
870 }
871 
872 bool
873 linux_use_real_ifname(const struct ifnet *ifp)
874 {
875 
876 	return (use_real_ifnames);
877 }
878