1 /*- 2 * Copyright (c) 2015 Dmitry Chagin <dchagin@FreeBSD.org> 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 #include "opt_inet6.h" 27 28 #include <sys/param.h> 29 #include <sys/conf.h> 30 #include <sys/ctype.h> 31 #include <sys/file.h> 32 #include <sys/filedesc.h> 33 #include <sys/jail.h> 34 #include <sys/limits.h> 35 #include <sys/lock.h> 36 #include <sys/malloc.h> 37 #include <sys/poll.h> 38 #include <sys/proc.h> 39 #include <sys/signalvar.h> 40 #include <sys/socket.h> 41 #include <sys/socketvar.h> 42 43 #include <netlink/netlink.h> 44 #include <sys/un.h> 45 #include <netinet/in.h> 46 47 #include <compat/linux/linux.h> 48 #include <compat/linux/linux_common.h> 49 #include <compat/linux/linux_mib.h> 50 #include <compat/linux/linux_util.h> 51 52 _Static_assert(sizeof(struct sockaddr) == sizeof(struct l_sockaddr), 53 "Linux struct sockaddr size"); 54 _Static_assert(offsetof(struct sockaddr, sa_data) == 55 offsetof(struct l_sockaddr, sa_data), "Linux struct sockaddr layout"); 56 57 static int bsd_to_linux_sigtbl[LINUX_SIGTBLSZ] = { 58 LINUX_SIGHUP, /* SIGHUP */ 59 LINUX_SIGINT, /* SIGINT */ 60 LINUX_SIGQUIT, /* SIGQUIT */ 61 LINUX_SIGILL, /* SIGILL */ 62 LINUX_SIGTRAP, /* SIGTRAP */ 63 LINUX_SIGABRT, /* SIGABRT */ 64 0, /* SIGEMT */ 65 LINUX_SIGFPE, /* SIGFPE */ 66 LINUX_SIGKILL, /* SIGKILL */ 67 LINUX_SIGBUS, /* SIGBUS */ 68 LINUX_SIGSEGV, /* SIGSEGV */ 69 LINUX_SIGSYS, /* SIGSYS */ 70 LINUX_SIGPIPE, /* SIGPIPE */ 71 LINUX_SIGALRM, /* SIGALRM */ 72 LINUX_SIGTERM, /* SIGTERM */ 73 LINUX_SIGURG, /* SIGURG */ 74 LINUX_SIGSTOP, /* SIGSTOP */ 75 LINUX_SIGTSTP, /* SIGTSTP */ 76 LINUX_SIGCONT, /* SIGCONT */ 77 LINUX_SIGCHLD, /* SIGCHLD */ 78 LINUX_SIGTTIN, /* SIGTTIN */ 79 LINUX_SIGTTOU, /* SIGTTOU */ 80 LINUX_SIGIO, /* SIGIO */ 81 LINUX_SIGXCPU, /* SIGXCPU */ 82 LINUX_SIGXFSZ, /* SIGXFSZ */ 83 LINUX_SIGVTALRM,/* SIGVTALRM */ 84 LINUX_SIGPROF, /* SIGPROF */ 85 LINUX_SIGWINCH, /* SIGWINCH */ 86 0, /* SIGINFO */ 87 LINUX_SIGUSR1, /* SIGUSR1 */ 88 LINUX_SIGUSR2 /* SIGUSR2 */ 89 }; 90 91 #define LINUX_SIGPWREMU (SIGRTMIN + (LINUX_SIGRTMAX - LINUX_SIGRTMIN) + 1) 92 93 static int linux_to_bsd_sigtbl[LINUX_SIGTBLSZ] = { 94 SIGHUP, /* LINUX_SIGHUP */ 95 SIGINT, /* LINUX_SIGINT */ 96 SIGQUIT, /* LINUX_SIGQUIT */ 97 SIGILL, /* LINUX_SIGILL */ 98 SIGTRAP, /* LINUX_SIGTRAP */ 99 SIGABRT, /* LINUX_SIGABRT */ 100 SIGBUS, /* LINUX_SIGBUS */ 101 SIGFPE, /* LINUX_SIGFPE */ 102 SIGKILL, /* LINUX_SIGKILL */ 103 SIGUSR1, /* LINUX_SIGUSR1 */ 104 SIGSEGV, /* LINUX_SIGSEGV */ 105 SIGUSR2, /* LINUX_SIGUSR2 */ 106 SIGPIPE, /* LINUX_SIGPIPE */ 107 SIGALRM, /* LINUX_SIGALRM */ 108 SIGTERM, /* LINUX_SIGTERM */ 109 SIGBUS, /* LINUX_SIGSTKFLT */ 110 SIGCHLD, /* LINUX_SIGCHLD */ 111 SIGCONT, /* LINUX_SIGCONT */ 112 SIGSTOP, /* LINUX_SIGSTOP */ 113 SIGTSTP, /* LINUX_SIGTSTP */ 114 SIGTTIN, /* LINUX_SIGTTIN */ 115 SIGTTOU, /* LINUX_SIGTTOU */ 116 SIGURG, /* LINUX_SIGURG */ 117 SIGXCPU, /* LINUX_SIGXCPU */ 118 SIGXFSZ, /* LINUX_SIGXFSZ */ 119 SIGVTALRM, /* LINUX_SIGVTALARM */ 120 SIGPROF, /* LINUX_SIGPROF */ 121 SIGWINCH, /* LINUX_SIGWINCH */ 122 SIGIO, /* LINUX_SIGIO */ 123 /* 124 * FreeBSD does not have SIGPWR signal, map Linux SIGPWR signal 125 * to the first unused FreeBSD signal number. Since Linux supports 126 * signals from 1 to 64 we are ok here as our SIGRTMIN = 65. 127 */ 128 LINUX_SIGPWREMU,/* LINUX_SIGPWR */ 129 SIGSYS /* LINUX_SIGSYS */ 130 }; 131 132 static struct cdev *dev_shm_cdev; 133 static struct cdevsw dev_shm_cdevsw = { 134 .d_version = D_VERSION, 135 .d_name = "dev_shm", 136 }; 137 138 /* 139 * Map Linux RT signals to the FreeBSD RT signals. 140 */ 141 static inline int 142 linux_to_bsd_rt_signal(int sig) 143 { 144 145 return (SIGRTMIN + sig - LINUX_SIGRTMIN); 146 } 147 148 static inline int 149 bsd_to_linux_rt_signal(int sig) 150 { 151 152 return (sig - SIGRTMIN + LINUX_SIGRTMIN); 153 } 154 155 int 156 linux_to_bsd_signal(int sig) 157 { 158 159 KASSERT(sig > 0 && sig <= LINUX_SIGRTMAX, ("invalid Linux signal %d\n", sig)); 160 161 if (sig < LINUX_SIGRTMIN) 162 return (linux_to_bsd_sigtbl[_SIG_IDX(sig)]); 163 164 return (linux_to_bsd_rt_signal(sig)); 165 } 166 167 int 168 bsd_to_linux_signal(int sig) 169 { 170 171 if (sig <= LINUX_SIGTBLSZ) 172 return (bsd_to_linux_sigtbl[_SIG_IDX(sig)]); 173 if (sig == LINUX_SIGPWREMU) 174 return (LINUX_SIGPWR); 175 176 return (bsd_to_linux_rt_signal(sig)); 177 } 178 179 int 180 linux_to_bsd_sigaltstack(int lsa) 181 { 182 int bsa = 0; 183 184 if (lsa & LINUX_SS_DISABLE) 185 bsa |= SS_DISABLE; 186 /* 187 * Linux ignores SS_ONSTACK flag for ss 188 * parameter while FreeBSD prohibits it. 189 */ 190 return (bsa); 191 } 192 193 int 194 bsd_to_linux_sigaltstack(int bsa) 195 { 196 int lsa = 0; 197 198 if (bsa & SS_DISABLE) 199 lsa |= LINUX_SS_DISABLE; 200 if (bsa & SS_ONSTACK) 201 lsa |= LINUX_SS_ONSTACK; 202 return (lsa); 203 } 204 205 void 206 linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss) 207 { 208 int b, l; 209 210 SIGEMPTYSET(*bss); 211 for (l = 1; l <= LINUX_SIGRTMAX; l++) { 212 if (LINUX_SIGISMEMBER(*lss, l)) { 213 b = linux_to_bsd_signal(l); 214 if (b) 215 SIGADDSET(*bss, b); 216 } 217 } 218 } 219 220 void 221 bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss) 222 { 223 int b, l; 224 225 LINUX_SIGEMPTYSET(*lss); 226 for (b = 1; b <= SIGRTMAX; b++) { 227 if (SIGISMEMBER(*bss, b)) { 228 l = bsd_to_linux_signal(b); 229 if (l) 230 LINUX_SIGADDSET(*lss, l); 231 } 232 } 233 } 234 235 sa_family_t 236 linux_to_bsd_domain(sa_family_t domain) 237 { 238 239 switch (domain) { 240 case LINUX_AF_UNSPEC: 241 return (AF_UNSPEC); 242 case LINUX_AF_UNIX: 243 return (AF_LOCAL); 244 case LINUX_AF_INET: 245 return (AF_INET); 246 case LINUX_AF_INET6: 247 return (AF_INET6); 248 case LINUX_AF_AX25: 249 return (AF_CCITT); 250 case LINUX_AF_IPX: 251 return (AF_IPX); 252 case LINUX_AF_APPLETALK: 253 return (AF_APPLETALK); 254 case LINUX_AF_NETLINK: 255 return (AF_NETLINK); 256 } 257 return (AF_UNKNOWN); 258 } 259 260 sa_family_t 261 bsd_to_linux_domain(sa_family_t domain) 262 { 263 264 switch (domain) { 265 case AF_UNSPEC: 266 return (LINUX_AF_UNSPEC); 267 case AF_LOCAL: 268 return (LINUX_AF_UNIX); 269 case AF_INET: 270 return (LINUX_AF_INET); 271 case AF_INET6: 272 return (LINUX_AF_INET6); 273 case AF_CCITT: 274 return (LINUX_AF_AX25); 275 case AF_IPX: 276 return (LINUX_AF_IPX); 277 case AF_APPLETALK: 278 return (LINUX_AF_APPLETALK); 279 case AF_NETLINK: 280 return (LINUX_AF_NETLINK); 281 } 282 return (AF_UNKNOWN); 283 } 284 285 /* 286 * Based on the fact that: 287 * 1. Native and Linux storage of struct sockaddr 288 * and struct sockaddr_in6 are equal. 289 * 2. On Linux sa_family is the first member of all struct sockaddr. 290 */ 291 int 292 bsd_to_linux_sockaddr(const struct sockaddr *sa, struct l_sockaddr **lsa, 293 socklen_t len) 294 { 295 struct l_sockaddr *kosa; 296 sa_family_t bdom; 297 298 *lsa = NULL; 299 if (len < 2 || len > UCHAR_MAX) 300 return (EINVAL); 301 bdom = bsd_to_linux_domain(sa->sa_family); 302 if (bdom == AF_UNKNOWN) 303 return (EAFNOSUPPORT); 304 305 kosa = malloc(len, M_LINUX, M_WAITOK); 306 bcopy(sa, kosa, len); 307 kosa->sa_family = bdom; 308 *lsa = kosa; 309 return (0); 310 } 311 312 /* 313 * If sap is NULL, then osa points at already copied in linux sockaddr that 314 * should be edited in place. Otherwise memory is allocated, sockaddr 315 * copied in and returned in *sap. 316 */ 317 int 318 linux_to_bsd_sockaddr(struct l_sockaddr *osa, struct sockaddr **sap, 319 socklen_t *len) 320 { 321 struct sockaddr *sa; 322 struct l_sockaddr *kosa; 323 #ifdef INET6 324 struct sockaddr_in6 *sin6; 325 bool oldv6size; 326 #endif 327 char *name; 328 int salen, bdom, error, hdrlen, namelen; 329 330 if (*len < 2 || *len > UCHAR_MAX) 331 return (EINVAL); 332 333 salen = *len; 334 335 #ifdef INET6 336 oldv6size = false; 337 /* 338 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it 339 * if it's a v4-mapped address, so reserve the proper space 340 * for it. 341 */ 342 if (salen == sizeof(struct sockaddr_in6) - sizeof(uint32_t)) { 343 salen += sizeof(uint32_t); 344 oldv6size = true; 345 } 346 #endif 347 348 if (sap != NULL) { 349 kosa = malloc(salen, M_SONAME, M_WAITOK); 350 if ((error = copyin(osa, kosa, *len))) 351 goto out; 352 } else 353 kosa = osa; 354 355 bdom = linux_to_bsd_domain(kosa->sa_family); 356 if (bdom == AF_UNKNOWN) { 357 error = EAFNOSUPPORT; 358 goto out; 359 } 360 361 #ifdef INET6 362 /* 363 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6, 364 * which lacks the scope id compared with RFC2553 one. If we detect 365 * the situation, reject the address and write a message to system log. 366 * 367 * Still accept addresses for which the scope id is not used. 368 */ 369 if (oldv6size) { 370 if (bdom == AF_INET6) { 371 sin6 = (struct sockaddr_in6 *)kosa; 372 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) || 373 (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) && 374 !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) && 375 !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) && 376 !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) && 377 !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) { 378 sin6->sin6_scope_id = 0; 379 } else { 380 linux_msg(curthread, 381 "obsolete pre-RFC2553 sockaddr_in6 rejected"); 382 error = EINVAL; 383 goto out; 384 } 385 } else 386 salen -= sizeof(uint32_t); 387 } 388 #endif 389 if (bdom == AF_INET) { 390 if (salen < sizeof(struct sockaddr_in)) { 391 error = EINVAL; 392 goto out; 393 } 394 salen = sizeof(struct sockaddr_in); 395 } 396 397 if (bdom == AF_LOCAL && salen > sizeof(struct sockaddr_un)) { 398 hdrlen = offsetof(struct sockaddr_un, sun_path); 399 name = ((struct sockaddr_un *)kosa)->sun_path; 400 if (*name == '\0') { 401 /* 402 * Linux abstract namespace starts with a NULL byte. 403 * XXX We do not support abstract namespace yet. 404 */ 405 namelen = strnlen(name + 1, salen - hdrlen - 1) + 1; 406 } else 407 namelen = strnlen(name, salen - hdrlen); 408 salen = hdrlen + namelen; 409 if (salen > sizeof(struct sockaddr_un)) { 410 error = ENAMETOOLONG; 411 goto out; 412 } 413 } 414 415 if (bdom == AF_NETLINK) { 416 if (salen < sizeof(struct sockaddr_nl)) { 417 error = EINVAL; 418 goto out; 419 } 420 salen = sizeof(struct sockaddr_nl); 421 } 422 423 sa = (struct sockaddr *)kosa; 424 sa->sa_family = bdom; 425 sa->sa_len = salen; 426 427 if (sap != NULL) { 428 *sap = sa; 429 *len = salen; 430 } 431 return (0); 432 433 out: 434 if (sap != NULL) 435 free(kosa, M_SONAME); 436 return (error); 437 } 438 439 void 440 linux_dev_shm_create(void) 441 { 442 int error; 443 444 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev_shm_cdev, 445 &dev_shm_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0, "shm/.mountpoint"); 446 if (error != 0) { 447 printf("%s: failed to create device node, error %d\n", 448 __func__, error); 449 } 450 } 451 452 void 453 linux_dev_shm_destroy(void) 454 { 455 456 destroy_dev(dev_shm_cdev); 457 } 458 459 int 460 bsd_to_linux_bits_(int value, struct bsd_to_linux_bitmap *bitmap, 461 size_t mapcnt, int no_value) 462 { 463 int bsd_mask, bsd_value, linux_mask, linux_value; 464 int linux_ret; 465 size_t i; 466 bool applied; 467 468 applied = false; 469 linux_ret = 0; 470 for (i = 0; i < mapcnt; ++i) { 471 bsd_mask = bitmap[i].bsd_mask; 472 bsd_value = bitmap[i].bsd_value; 473 if (bsd_mask == 0) 474 bsd_mask = bsd_value; 475 476 linux_mask = bitmap[i].linux_mask; 477 linux_value = bitmap[i].linux_value; 478 if (linux_mask == 0) 479 linux_mask = linux_value; 480 481 /* 482 * If a mask larger than just the value is set, we explicitly 483 * want to make sure that only this bit we mapped within that 484 * mask is set. 485 */ 486 if ((value & bsd_mask) == bsd_value) { 487 linux_ret = (linux_ret & ~linux_mask) | linux_value; 488 applied = true; 489 } 490 } 491 492 if (!applied) 493 return (no_value); 494 return (linux_ret); 495 } 496 497 int 498 linux_to_bsd_bits_(int value, struct bsd_to_linux_bitmap *bitmap, 499 size_t mapcnt, int no_value) 500 { 501 int bsd_mask, bsd_value, linux_mask, linux_value; 502 int bsd_ret; 503 size_t i; 504 bool applied; 505 506 applied = false; 507 bsd_ret = 0; 508 for (i = 0; i < mapcnt; ++i) { 509 bsd_mask = bitmap[i].bsd_mask; 510 bsd_value = bitmap[i].bsd_value; 511 if (bsd_mask == 0) 512 bsd_mask = bsd_value; 513 514 linux_mask = bitmap[i].linux_mask; 515 linux_value = bitmap[i].linux_value; 516 if (linux_mask == 0) 517 linux_mask = linux_value; 518 519 /* 520 * If a mask larger than just the value is set, we explicitly 521 * want to make sure that only this bit we mapped within that 522 * mask is set. 523 */ 524 if ((value & linux_mask) == linux_value) { 525 bsd_ret = (bsd_ret & ~bsd_mask) | bsd_value; 526 applied = true; 527 } 528 } 529 530 if (!applied) 531 return (no_value); 532 return (bsd_ret); 533 } 534 535 void 536 linux_to_bsd_poll_events(struct thread *td, int fd, short lev, 537 short *bev) 538 { 539 struct file *fp; 540 int error; 541 short bits = 0; 542 543 if (lev & LINUX_POLLIN) 544 bits |= POLLIN; 545 if (lev & LINUX_POLLPRI) 546 bits |= POLLPRI; 547 if (lev & LINUX_POLLOUT) 548 bits |= POLLOUT; 549 if (lev & LINUX_POLLERR) 550 bits |= POLLERR; 551 if (lev & LINUX_POLLHUP) 552 bits |= POLLHUP; 553 if (lev & LINUX_POLLNVAL) 554 bits |= POLLNVAL; 555 if (lev & LINUX_POLLRDNORM) 556 bits |= POLLRDNORM; 557 if (lev & LINUX_POLLRDBAND) 558 bits |= POLLRDBAND; 559 if (lev & LINUX_POLLWRBAND) 560 bits |= POLLWRBAND; 561 if (lev & LINUX_POLLWRNORM) 562 bits |= POLLWRNORM; 563 564 if (lev & LINUX_POLLRDHUP) { 565 /* 566 * It seems that the Linux silencly ignores POLLRDHUP 567 * on non-socket file descriptors unlike FreeBSD, where 568 * events bits is more strictly checked (POLLSTANDARD). 569 */ 570 error = fget_unlocked(td, fd, &cap_no_rights, &fp); 571 if (error == 0) { 572 /* 573 * XXX. On FreeBSD POLLRDHUP applies only to 574 * stream sockets. 575 */ 576 if (fp->f_type == DTYPE_SOCKET) 577 bits |= POLLRDHUP; 578 fdrop(fp, td); 579 } 580 } 581 582 if (lev & LINUX_POLLMSG) 583 LINUX_RATELIMIT_MSG_OPT1("unsupported POLLMSG, events(%d)", lev); 584 if (lev & LINUX_POLLREMOVE) 585 LINUX_RATELIMIT_MSG_OPT1("unsupported POLLREMOVE, events(%d)", lev); 586 587 *bev = bits; 588 } 589 590 void 591 bsd_to_linux_poll_events(short bev, short *lev) 592 { 593 short bits = 0; 594 595 if (bev & POLLIN) 596 bits |= LINUX_POLLIN; 597 if (bev & POLLPRI) 598 bits |= LINUX_POLLPRI; 599 if (bev & (POLLOUT | POLLWRNORM)) 600 /* 601 * POLLWRNORM is equal to POLLOUT on FreeBSD, 602 * but not on Linux 603 */ 604 bits |= LINUX_POLLOUT; 605 if (bev & POLLERR) 606 bits |= LINUX_POLLERR; 607 if (bev & POLLHUP) 608 bits |= LINUX_POLLHUP; 609 if (bev & POLLNVAL) 610 bits |= LINUX_POLLNVAL; 611 if (bev & POLLRDNORM) 612 bits |= LINUX_POLLRDNORM; 613 if (bev & POLLRDBAND) 614 bits |= LINUX_POLLRDBAND; 615 if (bev & POLLWRBAND) 616 bits |= LINUX_POLLWRBAND; 617 if (bev & POLLRDHUP) 618 bits |= LINUX_POLLRDHUP; 619 620 *lev = bits; 621 } 622