xref: /freebsd/sys/compat/linux/linux.c (revision 479e29157259086c40eb61b1326299c22de5a59c)
1 /*-
2  * Copyright (c) 2015 Dmitry Chagin <dchagin@FreeBSD.org>
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 #include "opt_inet6.h"
27 
28 #include <sys/param.h>
29 #include <sys/conf.h>
30 #include <sys/ctype.h>
31 #include <sys/file.h>
32 #include <sys/filedesc.h>
33 #include <sys/jail.h>
34 #include <sys/lock.h>
35 #include <sys/malloc.h>
36 #include <sys/poll.h>
37 #include <sys/proc.h>
38 #include <sys/signalvar.h>
39 #include <sys/socket.h>
40 #include <sys/socketvar.h>
41 
42 #include <net/if.h>
43 #include <net/if_var.h>
44 #include <net/if_dl.h>
45 #include <net/if_types.h>
46 #include <netlink/netlink.h>
47 
48 #include <sys/un.h>
49 #include <netinet/in.h>
50 
51 #include <compat/linux/linux.h>
52 #include <compat/linux/linux_common.h>
53 #include <compat/linux/linux_mib.h>
54 #include <compat/linux/linux_util.h>
55 
56 _Static_assert(LINUX_IFNAMSIZ == IFNAMSIZ, "Linux IFNAMSIZ");
57 _Static_assert(sizeof(struct sockaddr) == sizeof(struct l_sockaddr),
58     "Linux struct sockaddr size");
59 _Static_assert(offsetof(struct sockaddr, sa_data) ==
60     offsetof(struct l_sockaddr, sa_data), "Linux struct sockaddr layout");
61 
62 static bool use_real_ifnames = false;
63 SYSCTL_BOOL(_compat_linux, OID_AUTO, use_real_ifnames, CTLFLAG_RWTUN,
64     &use_real_ifnames, 0,
65     "Use FreeBSD interface names instead of generating ethN aliases");
66 
67 static int bsd_to_linux_sigtbl[LINUX_SIGTBLSZ] = {
68 	LINUX_SIGHUP,	/* SIGHUP */
69 	LINUX_SIGINT,	/* SIGINT */
70 	LINUX_SIGQUIT,	/* SIGQUIT */
71 	LINUX_SIGILL,	/* SIGILL */
72 	LINUX_SIGTRAP,	/* SIGTRAP */
73 	LINUX_SIGABRT,	/* SIGABRT */
74 	0,		/* SIGEMT */
75 	LINUX_SIGFPE,	/* SIGFPE */
76 	LINUX_SIGKILL,	/* SIGKILL */
77 	LINUX_SIGBUS,	/* SIGBUS */
78 	LINUX_SIGSEGV,	/* SIGSEGV */
79 	LINUX_SIGSYS,	/* SIGSYS */
80 	LINUX_SIGPIPE,	/* SIGPIPE */
81 	LINUX_SIGALRM,	/* SIGALRM */
82 	LINUX_SIGTERM,	/* SIGTERM */
83 	LINUX_SIGURG,	/* SIGURG */
84 	LINUX_SIGSTOP,	/* SIGSTOP */
85 	LINUX_SIGTSTP,	/* SIGTSTP */
86 	LINUX_SIGCONT,	/* SIGCONT */
87 	LINUX_SIGCHLD,	/* SIGCHLD */
88 	LINUX_SIGTTIN,	/* SIGTTIN */
89 	LINUX_SIGTTOU,	/* SIGTTOU */
90 	LINUX_SIGIO,	/* SIGIO */
91 	LINUX_SIGXCPU,	/* SIGXCPU */
92 	LINUX_SIGXFSZ,	/* SIGXFSZ */
93 	LINUX_SIGVTALRM,/* SIGVTALRM */
94 	LINUX_SIGPROF,	/* SIGPROF */
95 	LINUX_SIGWINCH,	/* SIGWINCH */
96 	0,		/* SIGINFO */
97 	LINUX_SIGUSR1,	/* SIGUSR1 */
98 	LINUX_SIGUSR2	/* SIGUSR2 */
99 };
100 
101 #define	LINUX_SIGPWREMU	(SIGRTMIN + (LINUX_SIGRTMAX - LINUX_SIGRTMIN) + 1)
102 
103 static int linux_to_bsd_sigtbl[LINUX_SIGTBLSZ] = {
104 	SIGHUP,		/* LINUX_SIGHUP */
105 	SIGINT,		/* LINUX_SIGINT */
106 	SIGQUIT,	/* LINUX_SIGQUIT */
107 	SIGILL,		/* LINUX_SIGILL */
108 	SIGTRAP,	/* LINUX_SIGTRAP */
109 	SIGABRT,	/* LINUX_SIGABRT */
110 	SIGBUS,		/* LINUX_SIGBUS */
111 	SIGFPE,		/* LINUX_SIGFPE */
112 	SIGKILL,	/* LINUX_SIGKILL */
113 	SIGUSR1,	/* LINUX_SIGUSR1 */
114 	SIGSEGV,	/* LINUX_SIGSEGV */
115 	SIGUSR2,	/* LINUX_SIGUSR2 */
116 	SIGPIPE,	/* LINUX_SIGPIPE */
117 	SIGALRM,	/* LINUX_SIGALRM */
118 	SIGTERM,	/* LINUX_SIGTERM */
119 	SIGBUS,		/* LINUX_SIGSTKFLT */
120 	SIGCHLD,	/* LINUX_SIGCHLD */
121 	SIGCONT,	/* LINUX_SIGCONT */
122 	SIGSTOP,	/* LINUX_SIGSTOP */
123 	SIGTSTP,	/* LINUX_SIGTSTP */
124 	SIGTTIN,	/* LINUX_SIGTTIN */
125 	SIGTTOU,	/* LINUX_SIGTTOU */
126 	SIGURG,		/* LINUX_SIGURG */
127 	SIGXCPU,	/* LINUX_SIGXCPU */
128 	SIGXFSZ,	/* LINUX_SIGXFSZ */
129 	SIGVTALRM,	/* LINUX_SIGVTALARM */
130 	SIGPROF,	/* LINUX_SIGPROF */
131 	SIGWINCH,	/* LINUX_SIGWINCH */
132 	SIGIO,		/* LINUX_SIGIO */
133 	/*
134 	 * FreeBSD does not have SIGPWR signal, map Linux SIGPWR signal
135 	 * to the first unused FreeBSD signal number. Since Linux supports
136 	 * signals from 1 to 64 we are ok here as our SIGRTMIN = 65.
137 	 */
138 	LINUX_SIGPWREMU,/* LINUX_SIGPWR */
139 	SIGSYS		/* LINUX_SIGSYS */
140 };
141 
142 static struct cdev *dev_shm_cdev;
143 static struct cdevsw dev_shm_cdevsw = {
144      .d_version = D_VERSION,
145      .d_name    = "dev_shm",
146 };
147 
148 /*
149  * Map Linux RT signals to the FreeBSD RT signals.
150  */
151 static inline int
linux_to_bsd_rt_signal(int sig)152 linux_to_bsd_rt_signal(int sig)
153 {
154 
155 	return (SIGRTMIN + sig - LINUX_SIGRTMIN);
156 }
157 
158 static inline int
bsd_to_linux_rt_signal(int sig)159 bsd_to_linux_rt_signal(int sig)
160 {
161 
162 	return (sig - SIGRTMIN + LINUX_SIGRTMIN);
163 }
164 
165 int
linux_to_bsd_signal(int sig)166 linux_to_bsd_signal(int sig)
167 {
168 
169 	KASSERT(sig > 0 && sig <= LINUX_SIGRTMAX, ("invalid Linux signal %d\n", sig));
170 
171 	if (sig < LINUX_SIGRTMIN)
172 		return (linux_to_bsd_sigtbl[_SIG_IDX(sig)]);
173 
174 	return (linux_to_bsd_rt_signal(sig));
175 }
176 
177 int
bsd_to_linux_signal(int sig)178 bsd_to_linux_signal(int sig)
179 {
180 
181 	if (sig <= LINUX_SIGTBLSZ)
182 		return (bsd_to_linux_sigtbl[_SIG_IDX(sig)]);
183 	if (sig == LINUX_SIGPWREMU)
184 		return (LINUX_SIGPWR);
185 
186 	return (bsd_to_linux_rt_signal(sig));
187 }
188 
189 int
linux_to_bsd_sigaltstack(int lsa)190 linux_to_bsd_sigaltstack(int lsa)
191 {
192 	int bsa = 0;
193 
194 	if (lsa & LINUX_SS_DISABLE)
195 		bsa |= SS_DISABLE;
196 	/*
197 	 * Linux ignores SS_ONSTACK flag for ss
198 	 * parameter while FreeBSD prohibits it.
199 	 */
200 	return (bsa);
201 }
202 
203 int
bsd_to_linux_sigaltstack(int bsa)204 bsd_to_linux_sigaltstack(int bsa)
205 {
206 	int lsa = 0;
207 
208 	if (bsa & SS_DISABLE)
209 		lsa |= LINUX_SS_DISABLE;
210 	if (bsa & SS_ONSTACK)
211 		lsa |= LINUX_SS_ONSTACK;
212 	return (lsa);
213 }
214 
215 void
linux_to_bsd_sigset(l_sigset_t * lss,sigset_t * bss)216 linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss)
217 {
218 	int b, l;
219 
220 	SIGEMPTYSET(*bss);
221 	for (l = 1; l <= LINUX_SIGRTMAX; l++) {
222 		if (LINUX_SIGISMEMBER(*lss, l)) {
223 			b = linux_to_bsd_signal(l);
224 			if (b)
225 				SIGADDSET(*bss, b);
226 		}
227 	}
228 }
229 
230 void
bsd_to_linux_sigset(sigset_t * bss,l_sigset_t * lss)231 bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss)
232 {
233 	int b, l;
234 
235 	LINUX_SIGEMPTYSET(*lss);
236 	for (b = 1; b <= SIGRTMAX; b++) {
237 		if (SIGISMEMBER(*bss, b)) {
238 			l = bsd_to_linux_signal(b);
239 			if (l)
240 				LINUX_SIGADDSET(*lss, l);
241 		}
242 	}
243 }
244 
245 /*
246  * Translate a FreeBSD interface name to a Linux interface name
247  * by interface name, and return the number of bytes copied to lxname.
248  */
249 int
ifname_bsd_to_linux_name(const char * bsdname,char * lxname,size_t len)250 ifname_bsd_to_linux_name(const char *bsdname, char *lxname, size_t len)
251 {
252 	struct epoch_tracker et;
253 	struct ifnet *ifp;
254 	int ret;
255 
256 	CURVNET_ASSERT_SET();
257 
258 	ret = 0;
259 	NET_EPOCH_ENTER(et);
260 	ifp = ifunit(bsdname);
261 	if (ifp != NULL)
262 		ret = ifname_bsd_to_linux_ifp(ifp, lxname, len);
263 	NET_EPOCH_EXIT(et);
264 	return (ret);
265 }
266 
267 /*
268  * Translate a FreeBSD interface name to a Linux interface name
269  * by interface index, and return the number of bytes copied to lxname.
270  */
271 int
ifname_bsd_to_linux_idx(u_int idx,char * lxname,size_t len)272 ifname_bsd_to_linux_idx(u_int idx, char *lxname, size_t len)
273 {
274 	struct epoch_tracker et;
275 	struct ifnet *ifp;
276 	int ret;
277 
278 	ret = 0;
279 	CURVNET_SET(TD_TO_VNET(curthread));
280 	NET_EPOCH_ENTER(et);
281 	ifp = ifnet_byindex(idx);
282 	if (ifp != NULL)
283 		ret = ifname_bsd_to_linux_ifp(ifp, lxname, len);
284 	NET_EPOCH_EXIT(et);
285 	CURVNET_RESTORE();
286 	return (ret);
287 }
288 
289 /*
290  * Translate a FreeBSD interface name to a Linux interface name,
291  * and return the number of bytes copied to lxname, 0 if interface
292  * not found, -1 on error.
293  */
294 struct ifname_bsd_to_linux_ifp_cb_s {
295 	struct ifnet	*ifp;
296 	int		ethno;
297 	char		*lxname;
298 	size_t		len;
299 };
300 
301 static int
ifname_bsd_to_linux_ifp_cb(if_t ifp,void * arg)302 ifname_bsd_to_linux_ifp_cb(if_t ifp, void *arg)
303 {
304 	struct ifname_bsd_to_linux_ifp_cb_s *cbs = arg;
305 
306 	if (ifp == cbs->ifp)
307 		return (snprintf(cbs->lxname, cbs->len, "eth%d", cbs->ethno));
308 	if (IFP_IS_ETH(ifp))
309 		cbs->ethno++;
310 	return (0);
311 }
312 
313 int
ifname_bsd_to_linux_ifp(struct ifnet * ifp,char * lxname,size_t len)314 ifname_bsd_to_linux_ifp(struct ifnet *ifp, char *lxname, size_t len)
315 {
316 	struct ifname_bsd_to_linux_ifp_cb_s arg = {
317 		.ifp = ifp,
318 		.ethno = 0,
319 		.lxname = lxname,
320 		.len = len,
321 	};
322 
323 	NET_EPOCH_ASSERT();
324 
325 	/*
326 	 * Linux loopback interface name is lo (not lo0),
327 	 * we translate lo to lo0, loX to loX.
328 	 */
329 	if (IFP_IS_LOOP(ifp) && strncmp(if_name(ifp), "lo0", IFNAMSIZ) == 0)
330 		return (strlcpy(lxname, "lo", len));
331 
332 	/* Short-circuit non ethernet interfaces. */
333 	if (!IFP_IS_ETH(ifp) || linux_use_real_ifname(ifp))
334 		return (strlcpy(lxname, if_name(ifp), len));
335 
336  	/* Determine the (relative) unit number for ethernet interfaces. */
337 	return (if_foreach(ifname_bsd_to_linux_ifp_cb, &arg));
338 }
339 
340 /*
341  * Translate a Linux interface name to a FreeBSD interface name,
342  * and return the associated ifnet structure
343  * bsdname and lxname need to be least IFNAMSIZ bytes long, but
344  * can point to the same buffer.
345  */
346 struct ifname_linux_to_ifp_cb_s {
347 	bool		is_lo;
348 	bool		is_eth;
349 	int		ethno;
350 	int		unit;
351 	const char	*lxname;
352 	if_t		ifp;
353 };
354 
355 static int
ifname_linux_to_ifp_cb(if_t ifp,void * arg)356 ifname_linux_to_ifp_cb(if_t ifp, void *arg)
357 {
358 	struct ifname_linux_to_ifp_cb_s *cbs = arg;
359 
360 	NET_EPOCH_ASSERT();
361 
362 	/*
363 	 * Allow Linux programs to use FreeBSD names. Don't presume
364 	 * we never have an interface named "eth", so don't make
365 	 * the test optional based on is_eth.
366 	 */
367 	if (strncmp(if_name(ifp), cbs->lxname, LINUX_IFNAMSIZ) == 0)
368 		goto out;
369 	if (cbs->is_eth && IFP_IS_ETH(ifp) && cbs->unit == cbs->ethno)
370 		goto out;
371 	if (cbs->is_lo && IFP_IS_LOOP(ifp))
372 		goto out;
373 	if (IFP_IS_ETH(ifp))
374 		cbs->ethno++;
375 	return (0);
376 
377 out:
378 	cbs->ifp = ifp;
379 	return (1);
380 }
381 
382 struct ifnet *
ifname_linux_to_ifp(struct thread * td,const char * lxname)383 ifname_linux_to_ifp(struct thread *td, const char *lxname)
384 {
385 	struct ifname_linux_to_ifp_cb_s arg = {
386 		.ethno = 0,
387 		.lxname = lxname,
388 		.ifp = NULL,
389 	};
390 	int len;
391 	char *ep;
392 
393 	NET_EPOCH_ASSERT();
394 
395 	for (len = 0; len < LINUX_IFNAMSIZ; ++len)
396 		if (!isalpha(lxname[len]) || lxname[len] == '\0')
397 			break;
398 	if (len == 0 || len == LINUX_IFNAMSIZ)
399 		return (NULL);
400 	/*
401 	 * Linux loopback interface name is lo (not lo0),
402 	 * we translate lo to lo0, loX to loX.
403 	 */
404 	arg.is_lo = (len == 2 && strncmp(lxname, "lo", LINUX_IFNAMSIZ) == 0);
405 	arg.unit = (int)strtoul(lxname + len, &ep, 10);
406 	if ((ep == NULL || ep == lxname + len || ep >= lxname + LINUX_IFNAMSIZ) &&
407 	    arg.is_lo == 0)
408 		return (NULL);
409 	arg.is_eth = (len == 3 && strncmp(lxname, "eth", len) == 0);
410 
411 	if_foreach(ifname_linux_to_ifp_cb, &arg);
412 	return (arg.ifp);
413 }
414 
415 int
ifname_linux_to_bsd(struct thread * td,const char * lxname,char * bsdname)416 ifname_linux_to_bsd(struct thread *td, const char *lxname, char *bsdname)
417 {
418 	struct epoch_tracker et;
419 	struct ifnet *ifp;
420 
421 	CURVNET_SET(TD_TO_VNET(td));
422 	NET_EPOCH_ENTER(et);
423 	ifp = ifname_linux_to_ifp(td, lxname);
424 	if (ifp != NULL && bsdname != NULL)
425 		strlcpy(bsdname, if_name(ifp), IFNAMSIZ);
426 	NET_EPOCH_EXIT(et);
427 	CURVNET_RESTORE();
428 	return (ifp != NULL ? 0 : EINVAL);
429 }
430 
431 unsigned short
linux_ifflags(struct ifnet * ifp)432 linux_ifflags(struct ifnet *ifp)
433 {
434 	unsigned short flags;
435 
436 	NET_EPOCH_ASSERT();
437 
438 	flags = if_getflags(ifp) | if_getdrvflags(ifp);
439 	return (bsd_to_linux_ifflags(flags));
440 }
441 
442 unsigned short
bsd_to_linux_ifflags(int fl)443 bsd_to_linux_ifflags(int fl)
444 {
445 	unsigned short flags = 0;
446 
447 	if (fl & IFF_UP)
448 		flags |= LINUX_IFF_UP;
449 	if (fl & IFF_BROADCAST)
450 		flags |= LINUX_IFF_BROADCAST;
451 	if (fl & IFF_DEBUG)
452 		flags |= LINUX_IFF_DEBUG;
453 	if (fl & IFF_LOOPBACK)
454 		flags |= LINUX_IFF_LOOPBACK;
455 	if (fl & IFF_POINTOPOINT)
456 		flags |= LINUX_IFF_POINTOPOINT;
457 	if (fl & IFF_DRV_RUNNING)
458 		flags |= LINUX_IFF_RUNNING;
459 	if (fl & IFF_NOARP)
460 		flags |= LINUX_IFF_NOARP;
461 	if (fl & IFF_PROMISC)
462 		flags |= LINUX_IFF_PROMISC;
463 	if (fl & IFF_ALLMULTI)
464 		flags |= LINUX_IFF_ALLMULTI;
465 	if (fl & IFF_MULTICAST)
466 		flags |= LINUX_IFF_MULTICAST;
467 	return (flags);
468 }
469 
470 static u_int
linux_ifhwaddr_cb(void * arg,struct ifaddr * ifa,u_int count)471 linux_ifhwaddr_cb(void *arg, struct ifaddr *ifa, u_int count)
472 {
473 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)ifa->ifa_addr;
474 	struct l_sockaddr *lsa = arg;
475 
476 	if (count > 0)
477 		return (0);
478 	if (sdl->sdl_type != IFT_ETHER)
479 		return (0);
480 	bzero(lsa, sizeof(*lsa));
481 	lsa->sa_family = LINUX_ARPHRD_ETHER;
482 	bcopy(LLADDR(sdl), lsa->sa_data, LINUX_IFHWADDRLEN);
483 	return (1);
484 }
485 
486 int
linux_ifhwaddr(struct ifnet * ifp,struct l_sockaddr * lsa)487 linux_ifhwaddr(struct ifnet *ifp, struct l_sockaddr *lsa)
488 {
489 
490 	NET_EPOCH_ASSERT();
491 
492 	if (IFP_IS_LOOP(ifp)) {
493 		bzero(lsa, sizeof(*lsa));
494 		lsa->sa_family = LINUX_ARPHRD_LOOPBACK;
495 		return (0);
496 	}
497 	if (!IFP_IS_ETH(ifp))
498 		return (ENOENT);
499 	if (if_foreach_addr_type(ifp, AF_LINK, linux_ifhwaddr_cb, lsa) > 0)
500 		return (0);
501 	return (ENOENT);
502 }
503 
504 sa_family_t
linux_to_bsd_domain(sa_family_t domain)505 linux_to_bsd_domain(sa_family_t domain)
506 {
507 
508 	switch (domain) {
509 	case LINUX_AF_UNSPEC:
510 		return (AF_UNSPEC);
511 	case LINUX_AF_UNIX:
512 		return (AF_LOCAL);
513 	case LINUX_AF_INET:
514 		return (AF_INET);
515 	case LINUX_AF_INET6:
516 		return (AF_INET6);
517 	case LINUX_AF_AX25:
518 		return (AF_CCITT);
519 	case LINUX_AF_IPX:
520 		return (AF_IPX);
521 	case LINUX_AF_APPLETALK:
522 		return (AF_APPLETALK);
523 	case LINUX_AF_NETLINK:
524 		return (AF_NETLINK);
525 	}
526 	return (AF_UNKNOWN);
527 }
528 
529 sa_family_t
bsd_to_linux_domain(sa_family_t domain)530 bsd_to_linux_domain(sa_family_t domain)
531 {
532 
533 	switch (domain) {
534 	case AF_UNSPEC:
535 		return (LINUX_AF_UNSPEC);
536 	case AF_LOCAL:
537 		return (LINUX_AF_UNIX);
538 	case AF_INET:
539 		return (LINUX_AF_INET);
540 	case AF_INET6:
541 		return (LINUX_AF_INET6);
542 	case AF_CCITT:
543 		return (LINUX_AF_AX25);
544 	case AF_IPX:
545 		return (LINUX_AF_IPX);
546 	case AF_APPLETALK:
547 		return (LINUX_AF_APPLETALK);
548 	case AF_NETLINK:
549 		return (LINUX_AF_NETLINK);
550 	}
551 	return (AF_UNKNOWN);
552 }
553 
554 /*
555  * Based on the fact that:
556  * 1. Native and Linux storage of struct sockaddr
557  * and struct sockaddr_in6 are equal.
558  * 2. On Linux sa_family is the first member of all struct sockaddr.
559  */
560 int
bsd_to_linux_sockaddr(const struct sockaddr * sa,struct l_sockaddr ** lsa,socklen_t len)561 bsd_to_linux_sockaddr(const struct sockaddr *sa, struct l_sockaddr **lsa,
562     socklen_t len)
563 {
564 	struct l_sockaddr *kosa;
565 	sa_family_t bdom;
566 
567 	*lsa = NULL;
568 	if (len < 2 || len > UCHAR_MAX)
569 		return (EINVAL);
570 	bdom = bsd_to_linux_domain(sa->sa_family);
571 	if (bdom == AF_UNKNOWN)
572 		return (EAFNOSUPPORT);
573 
574 	kosa = malloc(len, M_LINUX, M_WAITOK);
575 	bcopy(sa, kosa, len);
576 	kosa->sa_family = bdom;
577 	*lsa = kosa;
578 	return (0);
579 }
580 
581 /*
582  * If sap is NULL, then osa points at already copied in linux sockaddr that
583  * should be edited in place.  Otherwise memory is allocated, sockaddr
584  * copied in and returned in *sap.
585  */
586 int
linux_to_bsd_sockaddr(struct l_sockaddr * osa,struct sockaddr ** sap,socklen_t * len)587 linux_to_bsd_sockaddr(struct l_sockaddr *osa, struct sockaddr **sap,
588     socklen_t *len)
589 {
590 	struct sockaddr *sa;
591 	struct l_sockaddr *kosa;
592 #ifdef INET6
593 	struct sockaddr_in6 *sin6;
594 	bool  oldv6size;
595 #endif
596 	char *name;
597 	int salen, bdom, error, hdrlen, namelen;
598 
599 	if (*len < 2 || *len > UCHAR_MAX)
600 		return (EINVAL);
601 
602 	salen = *len;
603 
604 #ifdef INET6
605 	oldv6size = false;
606 	/*
607 	 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
608 	 * if it's a v4-mapped address, so reserve the proper space
609 	 * for it.
610 	 */
611 	if (salen == sizeof(struct sockaddr_in6) - sizeof(uint32_t)) {
612 		salen += sizeof(uint32_t);
613 		oldv6size = true;
614 	}
615 #endif
616 
617 	if (sap != NULL) {
618 		kosa = malloc(salen, M_SONAME, M_WAITOK);
619 		if ((error = copyin(osa, kosa, *len)))
620 			goto out;
621 	} else
622 		kosa = osa;
623 
624 	bdom = linux_to_bsd_domain(kosa->sa_family);
625 	if (bdom == AF_UNKNOWN) {
626 		error = EAFNOSUPPORT;
627 		goto out;
628 	}
629 
630 #ifdef INET6
631 	/*
632 	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
633 	 * which lacks the scope id compared with RFC2553 one. If we detect
634 	 * the situation, reject the address and write a message to system log.
635 	 *
636 	 * Still accept addresses for which the scope id is not used.
637 	 */
638 	if (oldv6size) {
639 		if (bdom == AF_INET6) {
640 			sin6 = (struct sockaddr_in6 *)kosa;
641 			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
642 			    (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
643 			     !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
644 			     !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
645 			     !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
646 			     !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
647 				sin6->sin6_scope_id = 0;
648 			} else {
649 				linux_msg(curthread,
650 				    "obsolete pre-RFC2553 sockaddr_in6 rejected");
651 				error = EINVAL;
652 				goto out;
653 			}
654 		} else
655 			salen -= sizeof(uint32_t);
656 	}
657 #endif
658 	if (bdom == AF_INET) {
659 		if (salen < sizeof(struct sockaddr_in)) {
660 			error = EINVAL;
661 			goto out;
662 		}
663 		salen = sizeof(struct sockaddr_in);
664 	}
665 
666 	if (bdom == AF_LOCAL && salen > sizeof(struct sockaddr_un)) {
667 		hdrlen = offsetof(struct sockaddr_un, sun_path);
668 		name = ((struct sockaddr_un *)kosa)->sun_path;
669 		if (*name == '\0') {
670 			/*
671 			 * Linux abstract namespace starts with a NULL byte.
672 			 * XXX We do not support abstract namespace yet.
673 			 */
674 			namelen = strnlen(name + 1, salen - hdrlen - 1) + 1;
675 		} else
676 			namelen = strnlen(name, salen - hdrlen);
677 		salen = hdrlen + namelen;
678 		if (salen > sizeof(struct sockaddr_un)) {
679 			error = ENAMETOOLONG;
680 			goto out;
681 		}
682 	}
683 
684 	if (bdom == AF_NETLINK) {
685 		if (salen < sizeof(struct sockaddr_nl)) {
686 			error = EINVAL;
687 			goto out;
688 		}
689 		salen = sizeof(struct sockaddr_nl);
690 	}
691 
692 	sa = (struct sockaddr *)kosa;
693 	sa->sa_family = bdom;
694 	sa->sa_len = salen;
695 
696 	if (sap != NULL) {
697 		*sap = sa;
698 		*len = salen;
699 	}
700 	return (0);
701 
702 out:
703 	if (sap != NULL)
704 		free(kosa, M_SONAME);
705 	return (error);
706 }
707 
708 void
linux_dev_shm_create(void)709 linux_dev_shm_create(void)
710 {
711 	int error;
712 
713 	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev_shm_cdev,
714 	    &dev_shm_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0, "shm/.mountpoint");
715 	if (error != 0) {
716 		printf("%s: failed to create device node, error %d\n",
717 		    __func__, error);
718 	}
719 }
720 
721 void
linux_dev_shm_destroy(void)722 linux_dev_shm_destroy(void)
723 {
724 
725 	destroy_dev(dev_shm_cdev);
726 }
727 
728 int
bsd_to_linux_bits_(int value,struct bsd_to_linux_bitmap * bitmap,size_t mapcnt,int no_value)729 bsd_to_linux_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
730     size_t mapcnt, int no_value)
731 {
732 	int bsd_mask, bsd_value, linux_mask, linux_value;
733 	int linux_ret;
734 	size_t i;
735 	bool applied;
736 
737 	applied = false;
738 	linux_ret = 0;
739 	for (i = 0; i < mapcnt; ++i) {
740 		bsd_mask = bitmap[i].bsd_mask;
741 		bsd_value = bitmap[i].bsd_value;
742 		if (bsd_mask == 0)
743 			bsd_mask = bsd_value;
744 
745 		linux_mask = bitmap[i].linux_mask;
746 		linux_value = bitmap[i].linux_value;
747 		if (linux_mask == 0)
748 			linux_mask = linux_value;
749 
750 		/*
751 		 * If a mask larger than just the value is set, we explicitly
752 		 * want to make sure that only this bit we mapped within that
753 		 * mask is set.
754 		 */
755 		if ((value & bsd_mask) == bsd_value) {
756 			linux_ret = (linux_ret & ~linux_mask) | linux_value;
757 			applied = true;
758 		}
759 	}
760 
761 	if (!applied)
762 		return (no_value);
763 	return (linux_ret);
764 }
765 
766 int
linux_to_bsd_bits_(int value,struct bsd_to_linux_bitmap * bitmap,size_t mapcnt,int no_value)767 linux_to_bsd_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
768     size_t mapcnt, int no_value)
769 {
770 	int bsd_mask, bsd_value, linux_mask, linux_value;
771 	int bsd_ret;
772 	size_t i;
773 	bool applied;
774 
775 	applied = false;
776 	bsd_ret = 0;
777 	for (i = 0; i < mapcnt; ++i) {
778 		bsd_mask = bitmap[i].bsd_mask;
779 		bsd_value = bitmap[i].bsd_value;
780 		if (bsd_mask == 0)
781 			bsd_mask = bsd_value;
782 
783 		linux_mask = bitmap[i].linux_mask;
784 		linux_value = bitmap[i].linux_value;
785 		if (linux_mask == 0)
786 			linux_mask = linux_value;
787 
788 		/*
789 		 * If a mask larger than just the value is set, we explicitly
790 		 * want to make sure that only this bit we mapped within that
791 		 * mask is set.
792 		 */
793 		if ((value & linux_mask) == linux_value) {
794 			bsd_ret = (bsd_ret & ~bsd_mask) | bsd_value;
795 			applied = true;
796 		}
797 	}
798 
799 	if (!applied)
800 		return (no_value);
801 	return (bsd_ret);
802 }
803 
804 void
linux_to_bsd_poll_events(struct thread * td,int fd,short lev,short * bev)805 linux_to_bsd_poll_events(struct thread *td, int fd, short lev,
806     short *bev)
807 {
808 	struct file *fp;
809 	int error;
810 	short bits = 0;
811 
812 	if (lev & LINUX_POLLIN)
813 		bits |= POLLIN;
814 	if (lev & LINUX_POLLPRI)
815 		bits |=	POLLPRI;
816 	if (lev & LINUX_POLLOUT)
817 		bits |= POLLOUT;
818 	if (lev & LINUX_POLLERR)
819 		bits |= POLLERR;
820 	if (lev & LINUX_POLLHUP)
821 		bits |= POLLHUP;
822 	if (lev & LINUX_POLLNVAL)
823 		bits |= POLLNVAL;
824 	if (lev & LINUX_POLLRDNORM)
825 		bits |= POLLRDNORM;
826 	if (lev & LINUX_POLLRDBAND)
827 		bits |= POLLRDBAND;
828 	if (lev & LINUX_POLLWRBAND)
829 		bits |= POLLWRBAND;
830 	if (lev & LINUX_POLLWRNORM)
831 		bits |= POLLWRNORM;
832 
833 	if (lev & LINUX_POLLRDHUP) {
834 		/*
835 		 * It seems that the Linux silencly ignores POLLRDHUP
836 		 * on non-socket file descriptors unlike FreeBSD, where
837 		 * events bits is more strictly checked (POLLSTANDARD).
838 		 */
839 		error = fget_unlocked(td, fd, &cap_no_rights, &fp);
840 		if (error == 0) {
841 			/*
842 			 * XXX. On FreeBSD POLLRDHUP applies only to
843 			 * stream sockets.
844 			 */
845 			if (fp->f_type == DTYPE_SOCKET)
846 				bits |= POLLRDHUP;
847 			fdrop(fp, td);
848 		}
849 	}
850 
851 	if (lev & LINUX_POLLMSG)
852 		LINUX_RATELIMIT_MSG_OPT1("unsupported POLLMSG, events(%d)", lev);
853 	if (lev & LINUX_POLLREMOVE)
854 		LINUX_RATELIMIT_MSG_OPT1("unsupported POLLREMOVE, events(%d)", lev);
855 
856 	*bev = bits;
857 }
858 
859 void
bsd_to_linux_poll_events(short bev,short * lev)860 bsd_to_linux_poll_events(short bev, short *lev)
861 {
862 	short bits = 0;
863 
864 	if (bev & POLLIN)
865 		bits |= LINUX_POLLIN;
866 	if (bev & POLLPRI)
867 		bits |=	LINUX_POLLPRI;
868 	if (bev & (POLLOUT | POLLWRNORM))
869 		/*
870 		 * POLLWRNORM is equal to POLLOUT on FreeBSD,
871 		 * but not on Linux
872 		 */
873 		bits |= LINUX_POLLOUT;
874 	if (bev & POLLERR)
875 		bits |= LINUX_POLLERR;
876 	if (bev & POLLHUP)
877 		bits |= LINUX_POLLHUP;
878 	if (bev & POLLNVAL)
879 		bits |= LINUX_POLLNVAL;
880 	if (bev & POLLRDNORM)
881 		bits |= LINUX_POLLRDNORM;
882 	if (bev & POLLRDBAND)
883 		bits |= LINUX_POLLRDBAND;
884 	if (bev & POLLWRBAND)
885 		bits |= LINUX_POLLWRBAND;
886 	if (bev & POLLRDHUP)
887 		bits |= LINUX_POLLRDHUP;
888 
889 	*lev = bits;
890 }
891 
892 bool
linux_use_real_ifname(const struct ifnet * ifp)893 linux_use_real_ifname(const struct ifnet *ifp)
894 {
895 
896 	return (use_real_ifnames);
897 }
898