1 /*-
2 * Copyright (c) 2015 Dmitry Chagin <dchagin@FreeBSD.org>
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26 #include "opt_inet6.h"
27
28 #include <sys/param.h>
29 #include <sys/conf.h>
30 #include <sys/ctype.h>
31 #include <sys/file.h>
32 #include <sys/filedesc.h>
33 #include <sys/jail.h>
34 #include <sys/limits.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/poll.h>
38 #include <sys/proc.h>
39 #include <sys/signalvar.h>
40 #include <sys/socket.h>
41 #include <sys/socketvar.h>
42
43 #include <netlink/netlink.h>
44 #include <sys/un.h>
45 #include <netinet/in.h>
46
47 #include <compat/linux/linux.h>
48 #include <compat/linux/linux_common.h>
49 #include <compat/linux/linux_mib.h>
50 #include <compat/linux/linux_util.h>
51
52 _Static_assert(sizeof(struct sockaddr) == sizeof(struct l_sockaddr),
53 "Linux struct sockaddr size");
54 _Static_assert(offsetof(struct sockaddr, sa_data) ==
55 offsetof(struct l_sockaddr, sa_data), "Linux struct sockaddr layout");
56
57 static int bsd_to_linux_sigtbl[LINUX_SIGTBLSZ] = {
58 LINUX_SIGHUP, /* SIGHUP */
59 LINUX_SIGINT, /* SIGINT */
60 LINUX_SIGQUIT, /* SIGQUIT */
61 LINUX_SIGILL, /* SIGILL */
62 LINUX_SIGTRAP, /* SIGTRAP */
63 LINUX_SIGABRT, /* SIGABRT */
64 0, /* SIGEMT */
65 LINUX_SIGFPE, /* SIGFPE */
66 LINUX_SIGKILL, /* SIGKILL */
67 LINUX_SIGBUS, /* SIGBUS */
68 LINUX_SIGSEGV, /* SIGSEGV */
69 LINUX_SIGSYS, /* SIGSYS */
70 LINUX_SIGPIPE, /* SIGPIPE */
71 LINUX_SIGALRM, /* SIGALRM */
72 LINUX_SIGTERM, /* SIGTERM */
73 LINUX_SIGURG, /* SIGURG */
74 LINUX_SIGSTOP, /* SIGSTOP */
75 LINUX_SIGTSTP, /* SIGTSTP */
76 LINUX_SIGCONT, /* SIGCONT */
77 LINUX_SIGCHLD, /* SIGCHLD */
78 LINUX_SIGTTIN, /* SIGTTIN */
79 LINUX_SIGTTOU, /* SIGTTOU */
80 LINUX_SIGIO, /* SIGIO */
81 LINUX_SIGXCPU, /* SIGXCPU */
82 LINUX_SIGXFSZ, /* SIGXFSZ */
83 LINUX_SIGVTALRM,/* SIGVTALRM */
84 LINUX_SIGPROF, /* SIGPROF */
85 LINUX_SIGWINCH, /* SIGWINCH */
86 0, /* SIGINFO */
87 LINUX_SIGUSR1, /* SIGUSR1 */
88 LINUX_SIGUSR2 /* SIGUSR2 */
89 };
90
91 #define LINUX_SIGPWREMU (SIGRTMIN + (LINUX_SIGRTMAX - LINUX_SIGRTMIN) + 1)
92
93 static int linux_to_bsd_sigtbl[LINUX_SIGTBLSZ] = {
94 SIGHUP, /* LINUX_SIGHUP */
95 SIGINT, /* LINUX_SIGINT */
96 SIGQUIT, /* LINUX_SIGQUIT */
97 SIGILL, /* LINUX_SIGILL */
98 SIGTRAP, /* LINUX_SIGTRAP */
99 SIGABRT, /* LINUX_SIGABRT */
100 SIGBUS, /* LINUX_SIGBUS */
101 SIGFPE, /* LINUX_SIGFPE */
102 SIGKILL, /* LINUX_SIGKILL */
103 SIGUSR1, /* LINUX_SIGUSR1 */
104 SIGSEGV, /* LINUX_SIGSEGV */
105 SIGUSR2, /* LINUX_SIGUSR2 */
106 SIGPIPE, /* LINUX_SIGPIPE */
107 SIGALRM, /* LINUX_SIGALRM */
108 SIGTERM, /* LINUX_SIGTERM */
109 SIGBUS, /* LINUX_SIGSTKFLT */
110 SIGCHLD, /* LINUX_SIGCHLD */
111 SIGCONT, /* LINUX_SIGCONT */
112 SIGSTOP, /* LINUX_SIGSTOP */
113 SIGTSTP, /* LINUX_SIGTSTP */
114 SIGTTIN, /* LINUX_SIGTTIN */
115 SIGTTOU, /* LINUX_SIGTTOU */
116 SIGURG, /* LINUX_SIGURG */
117 SIGXCPU, /* LINUX_SIGXCPU */
118 SIGXFSZ, /* LINUX_SIGXFSZ */
119 SIGVTALRM, /* LINUX_SIGVTALARM */
120 SIGPROF, /* LINUX_SIGPROF */
121 SIGWINCH, /* LINUX_SIGWINCH */
122 SIGIO, /* LINUX_SIGIO */
123 /*
124 * FreeBSD does not have SIGPWR signal, map Linux SIGPWR signal
125 * to the first unused FreeBSD signal number. Since Linux supports
126 * signals from 1 to 64 we are ok here as our SIGRTMIN = 65.
127 */
128 LINUX_SIGPWREMU,/* LINUX_SIGPWR */
129 SIGSYS /* LINUX_SIGSYS */
130 };
131
132 static struct cdev *dev_shm_cdev;
133 static struct cdevsw dev_shm_cdevsw = {
134 .d_version = D_VERSION,
135 .d_name = "dev_shm",
136 };
137
138 /*
139 * Map Linux RT signals to the FreeBSD RT signals.
140 */
141 static inline int
linux_to_bsd_rt_signal(int sig)142 linux_to_bsd_rt_signal(int sig)
143 {
144
145 return (SIGRTMIN + sig - LINUX_SIGRTMIN);
146 }
147
148 static inline int
bsd_to_linux_rt_signal(int sig)149 bsd_to_linux_rt_signal(int sig)
150 {
151
152 return (sig - SIGRTMIN + LINUX_SIGRTMIN);
153 }
154
155 int
linux_to_bsd_signal(int sig)156 linux_to_bsd_signal(int sig)
157 {
158
159 KASSERT(sig > 0 && sig <= LINUX_SIGRTMAX, ("invalid Linux signal %d\n", sig));
160
161 if (sig < LINUX_SIGRTMIN)
162 return (linux_to_bsd_sigtbl[_SIG_IDX(sig)]);
163
164 return (linux_to_bsd_rt_signal(sig));
165 }
166
167 int
bsd_to_linux_signal(int sig)168 bsd_to_linux_signal(int sig)
169 {
170
171 if (sig <= LINUX_SIGTBLSZ)
172 return (bsd_to_linux_sigtbl[_SIG_IDX(sig)]);
173 if (sig == LINUX_SIGPWREMU)
174 return (LINUX_SIGPWR);
175
176 return (bsd_to_linux_rt_signal(sig));
177 }
178
179 int
linux_to_bsd_sigaltstack(int lsa)180 linux_to_bsd_sigaltstack(int lsa)
181 {
182 int bsa = 0;
183
184 if (lsa & LINUX_SS_DISABLE)
185 bsa |= SS_DISABLE;
186 /*
187 * Linux ignores SS_ONSTACK flag for ss
188 * parameter while FreeBSD prohibits it.
189 */
190 return (bsa);
191 }
192
193 int
bsd_to_linux_sigaltstack(int bsa)194 bsd_to_linux_sigaltstack(int bsa)
195 {
196 int lsa = 0;
197
198 if (bsa & SS_DISABLE)
199 lsa |= LINUX_SS_DISABLE;
200 if (bsa & SS_ONSTACK)
201 lsa |= LINUX_SS_ONSTACK;
202 return (lsa);
203 }
204
205 void
linux_to_bsd_sigset(l_sigset_t * lss,sigset_t * bss)206 linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss)
207 {
208 int b, l;
209
210 SIGEMPTYSET(*bss);
211 for (l = 1; l <= LINUX_SIGRTMAX; l++) {
212 if (LINUX_SIGISMEMBER(*lss, l)) {
213 b = linux_to_bsd_signal(l);
214 if (b)
215 SIGADDSET(*bss, b);
216 }
217 }
218 }
219
220 void
bsd_to_linux_sigset(sigset_t * bss,l_sigset_t * lss)221 bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss)
222 {
223 int b, l;
224
225 LINUX_SIGEMPTYSET(*lss);
226 for (b = 1; b <= SIGRTMAX; b++) {
227 if (SIGISMEMBER(*bss, b)) {
228 l = bsd_to_linux_signal(b);
229 if (l)
230 LINUX_SIGADDSET(*lss, l);
231 }
232 }
233 }
234
235 sa_family_t
linux_to_bsd_domain(sa_family_t domain)236 linux_to_bsd_domain(sa_family_t domain)
237 {
238
239 switch (domain) {
240 case LINUX_AF_UNSPEC:
241 return (AF_UNSPEC);
242 case LINUX_AF_UNIX:
243 return (AF_LOCAL);
244 case LINUX_AF_INET:
245 return (AF_INET);
246 case LINUX_AF_INET6:
247 return (AF_INET6);
248 case LINUX_AF_AX25:
249 return (AF_CCITT);
250 case LINUX_AF_IPX:
251 return (AF_IPX);
252 case LINUX_AF_APPLETALK:
253 return (AF_APPLETALK);
254 case LINUX_AF_NETLINK:
255 return (AF_NETLINK);
256 }
257 return (AF_UNKNOWN);
258 }
259
260 sa_family_t
bsd_to_linux_domain(sa_family_t domain)261 bsd_to_linux_domain(sa_family_t domain)
262 {
263
264 switch (domain) {
265 case AF_UNSPEC:
266 return (LINUX_AF_UNSPEC);
267 case AF_LOCAL:
268 return (LINUX_AF_UNIX);
269 case AF_INET:
270 return (LINUX_AF_INET);
271 case AF_INET6:
272 return (LINUX_AF_INET6);
273 case AF_CCITT:
274 return (LINUX_AF_AX25);
275 case AF_IPX:
276 return (LINUX_AF_IPX);
277 case AF_APPLETALK:
278 return (LINUX_AF_APPLETALK);
279 case AF_NETLINK:
280 return (LINUX_AF_NETLINK);
281 }
282 return (AF_UNKNOWN);
283 }
284
285 /*
286 * Based on the fact that:
287 * 1. Native and Linux storage of struct sockaddr
288 * and struct sockaddr_in6 are equal.
289 * 2. On Linux sa_family is the first member of all struct sockaddr.
290 */
291 int
bsd_to_linux_sockaddr(const struct sockaddr * sa,struct l_sockaddr ** lsa,socklen_t len)292 bsd_to_linux_sockaddr(const struct sockaddr *sa, struct l_sockaddr **lsa,
293 socklen_t len)
294 {
295 struct l_sockaddr *kosa;
296 sa_family_t bdom;
297
298 *lsa = NULL;
299 if (len < 2 || len > UCHAR_MAX)
300 return (EINVAL);
301 bdom = bsd_to_linux_domain(sa->sa_family);
302 if (bdom == AF_UNKNOWN)
303 return (EAFNOSUPPORT);
304
305 kosa = malloc(len, M_LINUX, M_WAITOK);
306 bcopy(sa, kosa, len);
307 kosa->sa_family = bdom;
308 *lsa = kosa;
309 return (0);
310 }
311
312 /*
313 * If sap is NULL, then osa points at already copied in linux sockaddr that
314 * should be edited in place. Otherwise memory is allocated, sockaddr
315 * copied in and returned in *sap.
316 */
317 int
linux_to_bsd_sockaddr(struct l_sockaddr * osa,struct sockaddr ** sap,socklen_t * len)318 linux_to_bsd_sockaddr(struct l_sockaddr *osa, struct sockaddr **sap,
319 socklen_t *len)
320 {
321 struct sockaddr *sa;
322 struct l_sockaddr *kosa;
323 #ifdef INET6
324 struct sockaddr_in6 *sin6;
325 bool oldv6size;
326 #endif
327 char *name;
328 int salen, bdom, error, hdrlen, namelen;
329
330 if (*len < 2 || *len > UCHAR_MAX)
331 return (EINVAL);
332
333 salen = *len;
334
335 #ifdef INET6
336 oldv6size = false;
337 /*
338 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
339 * if it's a v4-mapped address, so reserve the proper space
340 * for it.
341 */
342 if (salen == sizeof(struct sockaddr_in6) - sizeof(uint32_t)) {
343 salen += sizeof(uint32_t);
344 oldv6size = true;
345 }
346 #endif
347
348 if (sap != NULL) {
349 kosa = malloc(salen, M_SONAME, M_WAITOK);
350 if ((error = copyin(osa, kosa, *len)))
351 goto out;
352 } else
353 kosa = osa;
354
355 bdom = linux_to_bsd_domain(kosa->sa_family);
356 if (bdom == AF_UNKNOWN) {
357 error = EAFNOSUPPORT;
358 goto out;
359 }
360
361 #ifdef INET6
362 /*
363 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
364 * which lacks the scope id compared with RFC2553 one. If we detect
365 * the situation, reject the address and write a message to system log.
366 *
367 * Still accept addresses for which the scope id is not used.
368 */
369 if (oldv6size) {
370 if (bdom == AF_INET6) {
371 sin6 = (struct sockaddr_in6 *)kosa;
372 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
373 (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
374 !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
375 !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
376 !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
377 !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
378 sin6->sin6_scope_id = 0;
379 } else {
380 linux_msg(curthread,
381 "obsolete pre-RFC2553 sockaddr_in6 rejected");
382 error = EINVAL;
383 goto out;
384 }
385 } else
386 salen -= sizeof(uint32_t);
387 }
388 #endif
389 if (bdom == AF_INET) {
390 if (salen < sizeof(struct sockaddr_in)) {
391 error = EINVAL;
392 goto out;
393 }
394 salen = sizeof(struct sockaddr_in);
395 }
396
397 if (bdom == AF_LOCAL && salen > sizeof(struct sockaddr_un)) {
398 hdrlen = offsetof(struct sockaddr_un, sun_path);
399 name = ((struct sockaddr_un *)kosa)->sun_path;
400 if (*name == '\0') {
401 /*
402 * Linux abstract namespace starts with a NULL byte.
403 * XXX We do not support abstract namespace yet.
404 */
405 namelen = strnlen(name + 1, salen - hdrlen - 1) + 1;
406 } else
407 namelen = strnlen(name, salen - hdrlen);
408 salen = hdrlen + namelen;
409 if (salen > sizeof(struct sockaddr_un)) {
410 error = ENAMETOOLONG;
411 goto out;
412 }
413 }
414
415 if (bdom == AF_NETLINK) {
416 if (salen < sizeof(struct sockaddr_nl)) {
417 error = EINVAL;
418 goto out;
419 }
420 salen = sizeof(struct sockaddr_nl);
421 }
422
423 sa = (struct sockaddr *)kosa;
424 sa->sa_family = bdom;
425 sa->sa_len = salen;
426
427 if (sap != NULL) {
428 *sap = sa;
429 *len = salen;
430 }
431 return (0);
432
433 out:
434 if (sap != NULL)
435 free(kosa, M_SONAME);
436 return (error);
437 }
438
439 void
linux_dev_shm_create(void)440 linux_dev_shm_create(void)
441 {
442 int error;
443
444 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev_shm_cdev,
445 &dev_shm_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0, "shm/.mountpoint");
446 if (error != 0) {
447 printf("%s: failed to create device node, error %d\n",
448 __func__, error);
449 }
450 }
451
452 void
linux_dev_shm_destroy(void)453 linux_dev_shm_destroy(void)
454 {
455
456 destroy_dev(dev_shm_cdev);
457 }
458
459 int
bsd_to_linux_bits_(int value,struct bsd_to_linux_bitmap * bitmap,size_t mapcnt,int no_value)460 bsd_to_linux_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
461 size_t mapcnt, int no_value)
462 {
463 int bsd_mask, bsd_value, linux_mask, linux_value;
464 int linux_ret;
465 size_t i;
466 bool applied;
467
468 applied = false;
469 linux_ret = 0;
470 for (i = 0; i < mapcnt; ++i) {
471 bsd_mask = bitmap[i].bsd_mask;
472 bsd_value = bitmap[i].bsd_value;
473 if (bsd_mask == 0)
474 bsd_mask = bsd_value;
475
476 linux_mask = bitmap[i].linux_mask;
477 linux_value = bitmap[i].linux_value;
478 if (linux_mask == 0)
479 linux_mask = linux_value;
480
481 /*
482 * If a mask larger than just the value is set, we explicitly
483 * want to make sure that only this bit we mapped within that
484 * mask is set.
485 */
486 if ((value & bsd_mask) == bsd_value) {
487 linux_ret = (linux_ret & ~linux_mask) | linux_value;
488 applied = true;
489 }
490 }
491
492 if (!applied)
493 return (no_value);
494 return (linux_ret);
495 }
496
497 int
linux_to_bsd_bits_(int value,struct bsd_to_linux_bitmap * bitmap,size_t mapcnt,int no_value)498 linux_to_bsd_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
499 size_t mapcnt, int no_value)
500 {
501 int bsd_mask, bsd_value, linux_mask, linux_value;
502 int bsd_ret;
503 size_t i;
504 bool applied;
505
506 applied = false;
507 bsd_ret = 0;
508 for (i = 0; i < mapcnt; ++i) {
509 bsd_mask = bitmap[i].bsd_mask;
510 bsd_value = bitmap[i].bsd_value;
511 if (bsd_mask == 0)
512 bsd_mask = bsd_value;
513
514 linux_mask = bitmap[i].linux_mask;
515 linux_value = bitmap[i].linux_value;
516 if (linux_mask == 0)
517 linux_mask = linux_value;
518
519 /*
520 * If a mask larger than just the value is set, we explicitly
521 * want to make sure that only this bit we mapped within that
522 * mask is set.
523 */
524 if ((value & linux_mask) == linux_value) {
525 bsd_ret = (bsd_ret & ~bsd_mask) | bsd_value;
526 applied = true;
527 }
528 }
529
530 if (!applied)
531 return (no_value);
532 return (bsd_ret);
533 }
534
535 void
linux_to_bsd_poll_events(struct thread * td,int fd,short lev,short * bev)536 linux_to_bsd_poll_events(struct thread *td, int fd, short lev,
537 short *bev)
538 {
539 struct file *fp;
540 int error;
541 short bits = 0;
542
543 if (lev & LINUX_POLLIN)
544 bits |= POLLIN;
545 if (lev & LINUX_POLLPRI)
546 bits |= POLLPRI;
547 if (lev & LINUX_POLLOUT)
548 bits |= POLLOUT;
549 if (lev & LINUX_POLLERR)
550 bits |= POLLERR;
551 if (lev & LINUX_POLLHUP)
552 bits |= POLLHUP;
553 if (lev & LINUX_POLLNVAL)
554 bits |= POLLNVAL;
555 if (lev & LINUX_POLLRDNORM)
556 bits |= POLLRDNORM;
557 if (lev & LINUX_POLLRDBAND)
558 bits |= POLLRDBAND;
559 if (lev & LINUX_POLLWRBAND)
560 bits |= POLLWRBAND;
561 if (lev & LINUX_POLLWRNORM)
562 bits |= POLLWRNORM;
563
564 if (lev & LINUX_POLLRDHUP) {
565 /*
566 * It seems that the Linux silencly ignores POLLRDHUP
567 * on non-socket file descriptors unlike FreeBSD, where
568 * events bits is more strictly checked (POLLSTANDARD).
569 */
570 error = fget_unlocked(td, fd, &cap_no_rights, &fp);
571 if (error == 0) {
572 /*
573 * XXX. On FreeBSD POLLRDHUP applies only to
574 * stream sockets.
575 */
576 if (fp->f_type == DTYPE_SOCKET)
577 bits |= POLLRDHUP;
578 fdrop(fp, td);
579 }
580 }
581
582 if (lev & LINUX_POLLMSG)
583 LINUX_RATELIMIT_MSG_OPT1("unsupported POLLMSG, events(%d)", lev);
584 if (lev & LINUX_POLLREMOVE)
585 LINUX_RATELIMIT_MSG_OPT1("unsupported POLLREMOVE, events(%d)", lev);
586
587 *bev = bits;
588 }
589
590 void
bsd_to_linux_poll_events(short bev,short * lev)591 bsd_to_linux_poll_events(short bev, short *lev)
592 {
593 short bits = 0;
594
595 if (bev & POLLIN)
596 bits |= LINUX_POLLIN;
597 if (bev & POLLPRI)
598 bits |= LINUX_POLLPRI;
599 if (bev & (POLLOUT | POLLWRNORM))
600 /*
601 * POLLWRNORM is equal to POLLOUT on FreeBSD,
602 * but not on Linux
603 */
604 bits |= LINUX_POLLOUT;
605 if (bev & POLLERR)
606 bits |= LINUX_POLLERR;
607 if (bev & POLLHUP)
608 bits |= LINUX_POLLHUP;
609 if (bev & POLLNVAL)
610 bits |= LINUX_POLLNVAL;
611 if (bev & POLLRDNORM)
612 bits |= LINUX_POLLRDNORM;
613 if (bev & POLLRDBAND)
614 bits |= LINUX_POLLRDBAND;
615 if (bev & POLLWRBAND)
616 bits |= LINUX_POLLWRBAND;
617 if (bev & POLLRDHUP)
618 bits |= LINUX_POLLRDHUP;
619
620 *lev = bits;
621 }
622