1 /*- 2 * Copyright (c) 2015 Dmitry Chagin 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <opt_inet6.h> 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/conf.h> 35 #include <sys/ctype.h> 36 #include <sys/jail.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/signalvar.h> 40 #include <sys/socket.h> 41 #include <sys/socketvar.h> 42 43 #include <net/if.h> 44 #include <net/if_var.h> 45 #include <net/if_dl.h> 46 #include <net/if_types.h> 47 48 #include <sys/un.h> 49 #include <netinet/in.h> 50 51 #include <compat/linux/linux.h> 52 #include <compat/linux/linux_common.h> 53 #include <compat/linux/linux_util.h> 54 55 struct futex_list futex_list; 56 struct mtx futex_mtx; /* protects the futex list */ 57 58 CTASSERT(LINUX_IFNAMSIZ == IFNAMSIZ); 59 60 static int bsd_to_linux_sigtbl[LINUX_SIGTBLSZ] = { 61 LINUX_SIGHUP, /* SIGHUP */ 62 LINUX_SIGINT, /* SIGINT */ 63 LINUX_SIGQUIT, /* SIGQUIT */ 64 LINUX_SIGILL, /* SIGILL */ 65 LINUX_SIGTRAP, /* SIGTRAP */ 66 LINUX_SIGABRT, /* SIGABRT */ 67 0, /* SIGEMT */ 68 LINUX_SIGFPE, /* SIGFPE */ 69 LINUX_SIGKILL, /* SIGKILL */ 70 LINUX_SIGBUS, /* SIGBUS */ 71 LINUX_SIGSEGV, /* SIGSEGV */ 72 LINUX_SIGSYS, /* SIGSYS */ 73 LINUX_SIGPIPE, /* SIGPIPE */ 74 LINUX_SIGALRM, /* SIGALRM */ 75 LINUX_SIGTERM, /* SIGTERM */ 76 LINUX_SIGURG, /* SIGURG */ 77 LINUX_SIGSTOP, /* SIGSTOP */ 78 LINUX_SIGTSTP, /* SIGTSTP */ 79 LINUX_SIGCONT, /* SIGCONT */ 80 LINUX_SIGCHLD, /* SIGCHLD */ 81 LINUX_SIGTTIN, /* SIGTTIN */ 82 LINUX_SIGTTOU, /* SIGTTOU */ 83 LINUX_SIGIO, /* SIGIO */ 84 LINUX_SIGXCPU, /* SIGXCPU */ 85 LINUX_SIGXFSZ, /* SIGXFSZ */ 86 LINUX_SIGVTALRM,/* SIGVTALRM */ 87 LINUX_SIGPROF, /* SIGPROF */ 88 LINUX_SIGWINCH, /* SIGWINCH */ 89 0, /* SIGINFO */ 90 LINUX_SIGUSR1, /* SIGUSR1 */ 91 LINUX_SIGUSR2 /* SIGUSR2 */ 92 }; 93 94 static int linux_to_bsd_sigtbl[LINUX_SIGTBLSZ] = { 95 SIGHUP, /* LINUX_SIGHUP */ 96 SIGINT, /* LINUX_SIGINT */ 97 SIGQUIT, /* LINUX_SIGQUIT */ 98 SIGILL, /* LINUX_SIGILL */ 99 SIGTRAP, /* LINUX_SIGTRAP */ 100 SIGABRT, /* LINUX_SIGABRT */ 101 SIGBUS, /* LINUX_SIGBUS */ 102 SIGFPE, /* LINUX_SIGFPE */ 103 SIGKILL, /* LINUX_SIGKILL */ 104 SIGUSR1, /* LINUX_SIGUSR1 */ 105 SIGSEGV, /* LINUX_SIGSEGV */ 106 SIGUSR2, /* LINUX_SIGUSR2 */ 107 SIGPIPE, /* LINUX_SIGPIPE */ 108 SIGALRM, /* LINUX_SIGALRM */ 109 SIGTERM, /* LINUX_SIGTERM */ 110 SIGBUS, /* LINUX_SIGSTKFLT */ 111 SIGCHLD, /* LINUX_SIGCHLD */ 112 SIGCONT, /* LINUX_SIGCONT */ 113 SIGSTOP, /* LINUX_SIGSTOP */ 114 SIGTSTP, /* LINUX_SIGTSTP */ 115 SIGTTIN, /* LINUX_SIGTTIN */ 116 SIGTTOU, /* LINUX_SIGTTOU */ 117 SIGURG, /* LINUX_SIGURG */ 118 SIGXCPU, /* LINUX_SIGXCPU */ 119 SIGXFSZ, /* LINUX_SIGXFSZ */ 120 SIGVTALRM, /* LINUX_SIGVTALARM */ 121 SIGPROF, /* LINUX_SIGPROF */ 122 SIGWINCH, /* LINUX_SIGWINCH */ 123 SIGIO, /* LINUX_SIGIO */ 124 /* 125 * FreeBSD does not have SIGPWR signal, map Linux SIGPWR signal 126 * to the first unused FreeBSD signal number. Since Linux supports 127 * signals from 1 to 64 we are ok here as our SIGRTMIN = 65. 128 */ 129 SIGRTMIN, /* LINUX_SIGPWR */ 130 SIGSYS /* LINUX_SIGSYS */ 131 }; 132 133 static struct cdev *dev_shm_cdev; 134 static struct cdevsw dev_shm_cdevsw = { 135 .d_version = D_VERSION, 136 .d_name = "dev_shm", 137 }; 138 139 /* 140 * Map Linux RT signals to the FreeBSD RT signals. 141 */ 142 static inline int 143 linux_to_bsd_rt_signal(int sig) 144 { 145 146 return (SIGRTMIN + 1 + sig - LINUX_SIGRTMIN); 147 } 148 149 static inline int 150 bsd_to_linux_rt_signal(int sig) 151 { 152 153 return (sig - SIGRTMIN - 1 + LINUX_SIGRTMIN); 154 } 155 156 int 157 linux_to_bsd_signal(int sig) 158 { 159 160 KASSERT(sig > 0 && sig <= LINUX_SIGRTMAX, ("invalid Linux signal %d\n", sig)); 161 162 if (sig < LINUX_SIGRTMIN) 163 return (linux_to_bsd_sigtbl[_SIG_IDX(sig)]); 164 165 return (linux_to_bsd_rt_signal(sig)); 166 } 167 168 int 169 bsd_to_linux_signal(int sig) 170 { 171 172 if (sig <= LINUX_SIGTBLSZ) 173 return (bsd_to_linux_sigtbl[_SIG_IDX(sig)]); 174 if (sig == SIGRTMIN) 175 return (LINUX_SIGPWR); 176 177 return (bsd_to_linux_rt_signal(sig)); 178 } 179 180 int 181 linux_to_bsd_sigaltstack(int lsa) 182 { 183 int bsa = 0; 184 185 if (lsa & LINUX_SS_DISABLE) 186 bsa |= SS_DISABLE; 187 /* 188 * Linux ignores SS_ONSTACK flag for ss 189 * parameter while FreeBSD prohibits it. 190 */ 191 return (bsa); 192 } 193 194 int 195 bsd_to_linux_sigaltstack(int bsa) 196 { 197 int lsa = 0; 198 199 if (bsa & SS_DISABLE) 200 lsa |= LINUX_SS_DISABLE; 201 if (bsa & SS_ONSTACK) 202 lsa |= LINUX_SS_ONSTACK; 203 return (lsa); 204 } 205 206 void 207 linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss) 208 { 209 int b, l; 210 211 SIGEMPTYSET(*bss); 212 for (l = 1; l <= LINUX_SIGRTMAX; l++) { 213 if (LINUX_SIGISMEMBER(*lss, l)) { 214 b = linux_to_bsd_signal(l); 215 if (b) 216 SIGADDSET(*bss, b); 217 } 218 } 219 } 220 221 void 222 bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss) 223 { 224 int b, l; 225 226 LINUX_SIGEMPTYSET(*lss); 227 for (b = 1; b <= SIGRTMAX; b++) { 228 if (SIGISMEMBER(*bss, b)) { 229 l = bsd_to_linux_signal(b); 230 if (l) 231 LINUX_SIGADDSET(*lss, l); 232 } 233 } 234 } 235 236 /* 237 * Translate a Linux interface name to a FreeBSD interface name, 238 * and return the associated ifnet structure 239 * bsdname and lxname need to be least IFNAMSIZ bytes long, but 240 * can point to the same buffer. 241 */ 242 struct ifnet * 243 ifname_linux_to_bsd(struct thread *td, const char *lxname, char *bsdname) 244 { 245 struct ifnet *ifp; 246 int len, unit; 247 char *ep; 248 int index; 249 bool is_eth, is_lo; 250 251 for (len = 0; len < LINUX_IFNAMSIZ; ++len) 252 if (!isalpha(lxname[len]) || lxname[len] == '\0') 253 break; 254 if (len == 0 || len == LINUX_IFNAMSIZ) 255 return (NULL); 256 /* Linux loopback interface name is lo (not lo0) */ 257 is_lo = (len == 2 && strncmp(lxname, "lo", len) == 0); 258 unit = (int)strtoul(lxname + len, &ep, 10); 259 if ((ep == NULL || ep == lxname + len || ep >= lxname + LINUX_IFNAMSIZ) && 260 is_lo == 0) 261 return (NULL); 262 index = 0; 263 is_eth = (len == 3 && strncmp(lxname, "eth", len) == 0); 264 265 CURVNET_SET(TD_TO_VNET(td)); 266 IFNET_RLOCK(); 267 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { 268 /* 269 * Allow Linux programs to use FreeBSD names. Don't presume 270 * we never have an interface named "eth", so don't make 271 * the test optional based on is_eth. 272 */ 273 if (strncmp(ifp->if_xname, lxname, LINUX_IFNAMSIZ) == 0) 274 break; 275 if (is_eth && IFP_IS_ETH(ifp) && unit == index++) 276 break; 277 if (is_lo && IFP_IS_LOOP(ifp)) 278 break; 279 } 280 IFNET_RUNLOCK(); 281 CURVNET_RESTORE(); 282 if (ifp != NULL && bsdname != NULL) 283 strlcpy(bsdname, ifp->if_xname, IFNAMSIZ); 284 return (ifp); 285 } 286 287 void 288 linux_ifflags(struct ifnet *ifp, short *flags) 289 { 290 unsigned short fl; 291 292 fl = (ifp->if_flags | ifp->if_drv_flags) & 0xffff; 293 *flags = 0; 294 if (fl & IFF_UP) 295 *flags |= LINUX_IFF_UP; 296 if (fl & IFF_BROADCAST) 297 *flags |= LINUX_IFF_BROADCAST; 298 if (fl & IFF_DEBUG) 299 *flags |= LINUX_IFF_DEBUG; 300 if (fl & IFF_LOOPBACK) 301 *flags |= LINUX_IFF_LOOPBACK; 302 if (fl & IFF_POINTOPOINT) 303 *flags |= LINUX_IFF_POINTOPOINT; 304 if (fl & IFF_DRV_RUNNING) 305 *flags |= LINUX_IFF_RUNNING; 306 if (fl & IFF_NOARP) 307 *flags |= LINUX_IFF_NOARP; 308 if (fl & IFF_PROMISC) 309 *flags |= LINUX_IFF_PROMISC; 310 if (fl & IFF_ALLMULTI) 311 *flags |= LINUX_IFF_ALLMULTI; 312 if (fl & IFF_MULTICAST) 313 *flags |= LINUX_IFF_MULTICAST; 314 } 315 316 int 317 linux_ifhwaddr(struct ifnet *ifp, struct l_sockaddr *lsa) 318 { 319 struct ifaddr *ifa; 320 struct sockaddr_dl *sdl; 321 322 if (IFP_IS_LOOP(ifp)) { 323 bzero(lsa, sizeof(*lsa)); 324 lsa->sa_family = LINUX_ARPHRD_LOOPBACK; 325 return (0); 326 } 327 328 if (!IFP_IS_ETH(ifp)) 329 return (ENOENT); 330 331 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 332 sdl = (struct sockaddr_dl*)ifa->ifa_addr; 333 if (sdl != NULL && (sdl->sdl_family == AF_LINK) && 334 (sdl->sdl_type == IFT_ETHER)) { 335 bzero(lsa, sizeof(*lsa)); 336 lsa->sa_family = LINUX_ARPHRD_ETHER; 337 bcopy(LLADDR(sdl), lsa->sa_data, LINUX_IFHWADDRLEN); 338 return (0); 339 } 340 } 341 342 return (ENOENT); 343 } 344 345 int 346 linux_to_bsd_domain(int domain) 347 { 348 349 switch (domain) { 350 case LINUX_AF_UNSPEC: 351 return (AF_UNSPEC); 352 case LINUX_AF_UNIX: 353 return (AF_LOCAL); 354 case LINUX_AF_INET: 355 return (AF_INET); 356 case LINUX_AF_INET6: 357 return (AF_INET6); 358 case LINUX_AF_AX25: 359 return (AF_CCITT); 360 case LINUX_AF_IPX: 361 return (AF_IPX); 362 case LINUX_AF_APPLETALK: 363 return (AF_APPLETALK); 364 } 365 return (-1); 366 } 367 368 int 369 bsd_to_linux_domain(int domain) 370 { 371 372 switch (domain) { 373 case AF_UNSPEC: 374 return (LINUX_AF_UNSPEC); 375 case AF_LOCAL: 376 return (LINUX_AF_UNIX); 377 case AF_INET: 378 return (LINUX_AF_INET); 379 case AF_INET6: 380 return (LINUX_AF_INET6); 381 case AF_CCITT: 382 return (LINUX_AF_AX25); 383 case AF_IPX: 384 return (LINUX_AF_IPX); 385 case AF_APPLETALK: 386 return (LINUX_AF_APPLETALK); 387 } 388 return (-1); 389 } 390 391 /* 392 * Based on the fact that: 393 * 1. Native and Linux storage of struct sockaddr 394 * and struct sockaddr_in6 are equal. 395 * 2. On Linux sa_family is the first member of all struct sockaddr. 396 */ 397 int 398 bsd_to_linux_sockaddr(const struct sockaddr *sa, struct l_sockaddr **lsa, 399 socklen_t len) 400 { 401 struct l_sockaddr *kosa; 402 int error, bdom; 403 404 *lsa = NULL; 405 if (len < 2 || len > UCHAR_MAX) 406 return (EINVAL); 407 408 kosa = malloc(len, M_SONAME, M_WAITOK); 409 bcopy(sa, kosa, len); 410 411 bdom = bsd_to_linux_domain(sa->sa_family); 412 if (bdom == -1) { 413 error = EAFNOSUPPORT; 414 goto out; 415 } 416 417 kosa->sa_family = bdom; 418 *lsa = kosa; 419 return (0); 420 421 out: 422 free(kosa, M_SONAME); 423 return (error); 424 } 425 426 int 427 linux_to_bsd_sockaddr(const struct l_sockaddr *osa, struct sockaddr **sap, 428 socklen_t *len) 429 { 430 struct sockaddr *sa; 431 struct l_sockaddr *kosa; 432 #ifdef INET6 433 struct sockaddr_in6 *sin6; 434 bool oldv6size; 435 #endif 436 char *name; 437 int salen, bdom, error, hdrlen, namelen; 438 439 if (*len < 2 || *len > UCHAR_MAX) 440 return (EINVAL); 441 442 salen = *len; 443 444 #ifdef INET6 445 oldv6size = false; 446 /* 447 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it 448 * if it's a v4-mapped address, so reserve the proper space 449 * for it. 450 */ 451 if (salen == sizeof(struct sockaddr_in6) - sizeof(uint32_t)) { 452 salen += sizeof(uint32_t); 453 oldv6size = true; 454 } 455 #endif 456 457 kosa = malloc(salen, M_SONAME, M_WAITOK); 458 459 if ((error = copyin(osa, kosa, *len))) 460 goto out; 461 462 bdom = linux_to_bsd_domain(kosa->sa_family); 463 if (bdom == -1) { 464 error = EAFNOSUPPORT; 465 goto out; 466 } 467 468 #ifdef INET6 469 /* 470 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6, 471 * which lacks the scope id compared with RFC2553 one. If we detect 472 * the situation, reject the address and write a message to system log. 473 * 474 * Still accept addresses for which the scope id is not used. 475 */ 476 if (oldv6size) { 477 if (bdom == AF_INET6) { 478 sin6 = (struct sockaddr_in6 *)kosa; 479 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) || 480 (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) && 481 !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) && 482 !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) && 483 !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) && 484 !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) { 485 sin6->sin6_scope_id = 0; 486 } else { 487 linux_msg(curthread, 488 "obsolete pre-RFC2553 sockaddr_in6 rejected"); 489 error = EINVAL; 490 goto out; 491 } 492 } else 493 salen -= sizeof(uint32_t); 494 } 495 #endif 496 if (bdom == AF_INET) { 497 if (salen < sizeof(struct sockaddr_in)) { 498 error = EINVAL; 499 goto out; 500 } 501 salen = sizeof(struct sockaddr_in); 502 } 503 504 if (bdom == AF_LOCAL && salen > sizeof(struct sockaddr_un)) { 505 hdrlen = offsetof(struct sockaddr_un, sun_path); 506 name = ((struct sockaddr_un *)kosa)->sun_path; 507 if (*name == '\0') { 508 /* 509 * Linux abstract namespace starts with a NULL byte. 510 * XXX We do not support abstract namespace yet. 511 */ 512 namelen = strnlen(name + 1, salen - hdrlen - 1) + 1; 513 } else 514 namelen = strnlen(name, salen - hdrlen); 515 salen = hdrlen + namelen; 516 if (salen > sizeof(struct sockaddr_un)) { 517 error = ENAMETOOLONG; 518 goto out; 519 } 520 } 521 522 sa = (struct sockaddr *)kosa; 523 sa->sa_family = bdom; 524 sa->sa_len = salen; 525 526 *sap = sa; 527 *len = salen; 528 return (0); 529 530 out: 531 free(kosa, M_SONAME); 532 return (error); 533 } 534 535 void 536 linux_dev_shm_create(void) 537 { 538 int error; 539 540 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev_shm_cdev, 541 &dev_shm_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0, "shm/.mountpoint"); 542 if (error != 0) { 543 printf("%s: failed to create device node, error %d\n", 544 __func__, error); 545 } 546 } 547 548 void 549 linux_dev_shm_destroy(void) 550 { 551 552 destroy_dev(dev_shm_cdev); 553 } 554 555 int 556 bsd_to_linux_bits_(int value, struct bsd_to_linux_bitmap *bitmap, 557 size_t mapcnt, int no_value) 558 { 559 int bsd_mask, bsd_value, linux_mask, linux_value; 560 int linux_ret; 561 size_t i; 562 bool applied; 563 564 applied = false; 565 linux_ret = 0; 566 for (i = 0; i < mapcnt; ++i) { 567 bsd_mask = bitmap[i].bsd_mask; 568 bsd_value = bitmap[i].bsd_value; 569 if (bsd_mask == 0) 570 bsd_mask = bsd_value; 571 572 linux_mask = bitmap[i].linux_mask; 573 linux_value = bitmap[i].linux_value; 574 if (linux_mask == 0) 575 linux_mask = linux_value; 576 577 /* 578 * If a mask larger than just the value is set, we explicitly 579 * want to make sure that only this bit we mapped within that 580 * mask is set. 581 */ 582 if ((value & bsd_mask) == bsd_value) { 583 linux_ret = (linux_ret & ~linux_mask) | linux_value; 584 applied = true; 585 } 586 } 587 588 if (!applied) 589 return (no_value); 590 return (linux_ret); 591 } 592 593 int 594 linux_to_bsd_bits_(int value, struct bsd_to_linux_bitmap *bitmap, 595 size_t mapcnt, int no_value) 596 { 597 int bsd_mask, bsd_value, linux_mask, linux_value; 598 int bsd_ret; 599 size_t i; 600 bool applied; 601 602 applied = false; 603 bsd_ret = 0; 604 for (i = 0; i < mapcnt; ++i) { 605 bsd_mask = bitmap[i].bsd_mask; 606 bsd_value = bitmap[i].bsd_value; 607 if (bsd_mask == 0) 608 bsd_mask = bsd_value; 609 610 linux_mask = bitmap[i].linux_mask; 611 linux_value = bitmap[i].linux_value; 612 if (linux_mask == 0) 613 linux_mask = linux_value; 614 615 /* 616 * If a mask larger than just the value is set, we explicitly 617 * want to make sure that only this bit we mapped within that 618 * mask is set. 619 */ 620 if ((value & linux_mask) == linux_value) { 621 bsd_ret = (bsd_ret & ~bsd_mask) | bsd_value; 622 applied = true; 623 } 624 } 625 626 if (!applied) 627 return (no_value); 628 return (bsd_ret); 629 } 630