1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/types.h> 35 #include <sys/ck.h> 36 #include <sys/lock.h> 37 #include <sys/malloc.h> 38 #include <sys/rmlock.h> 39 #include <sys/socket.h> 40 #include <sys/vnode.h> 41 42 #include <net/if.h> 43 #include <net/if_dl.h> 44 #include <net/route.h> 45 #include <net/route/nhop.h> 46 #include <net/route/route_ctl.h> 47 #include <netlink/netlink.h> 48 #include <netlink/netlink_ctl.h> 49 #include <netlink/netlink_linux.h> 50 #include <netlink/netlink_route.h> 51 52 #include <compat/linux/linux.h> 53 #include <compat/linux/linux_common.h> 54 #include <compat/linux/linux_util.h> 55 56 #define DEBUG_MOD_NAME nl_linux 57 #define DEBUG_MAX_LEVEL LOG_DEBUG3 58 #include <netlink/netlink_debug.h> 59 _DECLARE_DEBUG(LOG_DEBUG); 60 61 static bool 62 valid_rta_size(const struct rtattr *rta, int sz) 63 { 64 return (NL_RTA_DATA_LEN(rta) == sz); 65 } 66 67 static bool 68 valid_rta_u32(const struct rtattr *rta) 69 { 70 return (valid_rta_size(rta, sizeof(uint32_t))); 71 } 72 73 static uint32_t 74 _rta_get_uint32(const struct rtattr *rta) 75 { 76 return (*((const uint32_t *)NL_RTA_DATA_CONST(rta))); 77 } 78 79 static struct nlmsghdr * 80 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 81 { 82 struct ndmsg *ndm = (struct ndmsg *)(hdr + 1); 83 84 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ndmsg)) 85 ndm->ndm_family = linux_to_bsd_domain(ndm->ndm_family); 86 87 return (hdr); 88 } 89 90 static struct nlmsghdr * 91 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 92 { 93 struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1); 94 95 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg)) 96 ifam->ifa_family = linux_to_bsd_domain(ifam->ifa_family); 97 98 return (hdr); 99 } 100 101 static struct nlmsghdr * 102 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 103 { 104 /* Tweak address families and default fib only */ 105 struct rtmsg *rtm = (struct rtmsg *)(hdr + 1); 106 struct nlattr *nla, *nla_head; 107 int attrs_len; 108 109 rtm->rtm_family = linux_to_bsd_domain(rtm->rtm_family); 110 111 if (rtm->rtm_table == 254) 112 rtm->rtm_table = 0; 113 114 attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr); 115 attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg)); 116 nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg))); 117 118 NLA_FOREACH(nla, nla_head, attrs_len) { 119 RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d", 120 nla->nla_type, nla->nla_len, attrs_len); 121 struct rtattr *rta = (struct rtattr *)nla; 122 if (rta->rta_len < sizeof(struct rtattr)) { 123 break; 124 } 125 switch (rta->rta_type) { 126 case NL_RTA_TABLE: 127 if (!valid_rta_u32(rta)) 128 goto done; 129 rtm->rtm_table = 0; 130 uint32_t fibnum = _rta_get_uint32(rta); 131 RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum); 132 if (fibnum == 254) { 133 *((uint32_t *)NL_RTA_DATA(rta)) = 0; 134 } 135 break; 136 } 137 } 138 139 done: 140 return (hdr); 141 } 142 143 static struct nlmsghdr * 144 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 145 { 146 switch (hdr->nlmsg_type) { 147 case NL_RTM_GETROUTE: 148 case NL_RTM_NEWROUTE: 149 case NL_RTM_DELROUTE: 150 return (rtnl_route_from_linux(hdr, npt)); 151 case NL_RTM_GETNEIGH: 152 return (rtnl_neigh_from_linux(hdr, npt)); 153 case NL_RTM_GETADDR: 154 return (rtnl_ifaddr_from_linux(hdr, npt)); 155 /* Silence warning for the messages where no translation is required */ 156 case NL_RTM_NEWLINK: 157 case NL_RTM_DELLINK: 158 case NL_RTM_GETLINK: 159 break; 160 default: 161 RT_LOG(LOG_DEBUG, "Passing message type %d untranslated", 162 hdr->nlmsg_type); 163 } 164 165 return (hdr); 166 } 167 168 static struct nlmsghdr * 169 nlmsg_from_linux(int netlink_family, struct nlmsghdr *hdr, 170 struct nl_pstate *npt) 171 { 172 switch (netlink_family) { 173 case NETLINK_ROUTE: 174 return (rtnl_from_linux(hdr, npt)); 175 } 176 177 return (hdr); 178 } 179 180 181 /************************************************************ 182 * Kernel -> Linux 183 ************************************************************/ 184 185 static bool 186 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw) 187 { 188 char *out_hdr; 189 out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char); 190 191 if (out_hdr != NULL) { 192 memcpy(out_hdr, hdr, hdr->nlmsg_len); 193 return (true); 194 } 195 return (false); 196 } 197 198 static bool 199 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw) 200 { 201 return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type, 202 hdr->nlmsg_flags, 0)); 203 } 204 205 static void * 206 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz) 207 { 208 void *next_hdr = nlmsg_reserve_data(nw, sz, void); 209 memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz)); 210 211 return (next_hdr); 212 } 213 #define nlmsg_copy_next_header(_hdr, _ns, _t) \ 214 ((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t)))) 215 216 static bool 217 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw) 218 { 219 struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr); 220 if (nla != NULL) { 221 memcpy(nla, nla_orig, nla_orig->nla_len); 222 return (true); 223 } 224 return (false); 225 } 226 227 /* 228 * Translate a FreeBSD interface name to a Linux interface name. 229 */ 230 static bool 231 nlmsg_translate_ifname_nla(struct nlattr *nla, struct nl_writer *nw) 232 { 233 char ifname[LINUX_IFNAMSIZ]; 234 235 if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname, 236 sizeof(ifname)) <= 0) 237 return (false); 238 return (nlattr_add_string(nw, IFLA_IFNAME, ifname)); 239 } 240 241 #define LINUX_NLA_UNHANDLED -1 242 /* 243 * Translate a FreeBSD attribute to a Linux attribute. 244 * Returns LINUX_NLA_UNHANDLED when the attribute is not processed 245 * and the caller must take care of it, otherwise the result is returned. 246 */ 247 static int 248 nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla, 249 struct nl_writer *nw) 250 { 251 252 switch (hdr->nlmsg_type) { 253 case NL_RTM_NEWLINK: 254 case NL_RTM_DELLINK: 255 case NL_RTM_GETLINK: 256 switch (nla->nla_type) { 257 case IFLA_IFNAME: 258 return (nlmsg_translate_ifname_nla(nla, nw)); 259 default: 260 break; 261 } 262 default: 263 break; 264 } 265 return (LINUX_NLA_UNHANDLED); 266 } 267 268 static bool 269 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw) 270 { 271 struct nlattr *nla; 272 int ret; 273 274 int hdrlen = NETLINK_ALIGN(raw_hdrlen); 275 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 276 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 277 278 NLA_FOREACH(nla, nla_head, attrs_len) { 279 RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len); 280 if (nla->nla_len < sizeof(struct nlattr)) { 281 return (false); 282 } 283 ret = nlmsg_translate_all_nla(hdr, nla, nw); 284 if (ret == LINUX_NLA_UNHANDLED) 285 ret = nlmsg_copy_nla(nla, nw); 286 if (!ret) 287 return (false); 288 } 289 return (true); 290 } 291 #undef LINUX_NLA_UNHANDLED 292 293 static unsigned int 294 rtnl_if_flags_to_linux(unsigned int if_flags) 295 { 296 unsigned int result = 0; 297 298 for (int i = 0; i < 31; i++) { 299 unsigned int flag = 1 << i; 300 if (!(flag & if_flags)) 301 continue; 302 switch (flag) { 303 case IFF_UP: 304 case IFF_BROADCAST: 305 case IFF_DEBUG: 306 case IFF_LOOPBACK: 307 case IFF_POINTOPOINT: 308 case IFF_DRV_RUNNING: 309 case IFF_NOARP: 310 case IFF_PROMISC: 311 case IFF_ALLMULTI: 312 result |= flag; 313 break; 314 case IFF_KNOWSEPOCH: 315 case IFF_DRV_OACTIVE: 316 case IFF_SIMPLEX: 317 case IFF_LINK0: 318 case IFF_LINK1: 319 case IFF_LINK2: 320 case IFF_CANTCONFIG: 321 case IFF_PPROMISC: 322 case IFF_MONITOR: 323 case IFF_STATICARP: 324 case IFF_STICKYARP: 325 case IFF_DYING: 326 case IFF_RENAMING: 327 case IFF_NOGROUP: 328 /* No Linux analogue */ 329 break; 330 case IFF_MULTICAST: 331 result |= 1 << 12; 332 } 333 } 334 return (result); 335 } 336 337 static bool 338 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 339 struct nl_writer *nw) 340 { 341 if (!nlmsg_copy_header(hdr, nw)) 342 return (false); 343 344 struct ifinfomsg *ifinfo; 345 ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg); 346 347 ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family); 348 /* Convert interface type */ 349 switch (ifinfo->ifi_type) { 350 case IFT_ETHER: 351 ifinfo->ifi_type = LINUX_ARPHRD_ETHER; 352 break; 353 } 354 ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags); 355 356 /* Copy attributes unchanged */ 357 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw)) 358 return (false); 359 360 /* make ip(8) happy */ 361 if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue")) 362 return (false); 363 364 if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000)) 365 return (false); 366 367 nlmsg_end(nw); 368 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 369 return (true); 370 } 371 372 static bool 373 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 374 struct nl_writer *nw) 375 { 376 if (!nlmsg_copy_header(hdr, nw)) 377 return (false); 378 379 struct ifaddrmsg *ifamsg; 380 ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg); 381 382 ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family); 383 /* XXX: fake ifa_flags? */ 384 385 /* Copy attributes unchanged */ 386 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw)) 387 return (false); 388 389 nlmsg_end(nw); 390 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 391 return (true); 392 } 393 394 static bool 395 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 396 struct nl_writer *nw) 397 { 398 if (!nlmsg_copy_header(hdr, nw)) 399 return (false); 400 401 struct ndmsg *ndm; 402 ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg); 403 404 ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family); 405 406 /* Copy attributes unchanged */ 407 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw)) 408 return (false); 409 410 nlmsg_end(nw); 411 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 412 return (true); 413 } 414 415 static bool 416 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 417 struct nl_writer *nw) 418 { 419 if (!nlmsg_copy_header(hdr, nw)) 420 return (false); 421 422 struct rtmsg *rtm; 423 rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg); 424 rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family); 425 426 struct nlattr *nla; 427 428 int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg)); 429 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 430 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 431 432 NLA_FOREACH(nla, nla_head, attrs_len) { 433 struct rtattr *rta = (struct rtattr *)nla; 434 //RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len); 435 if (rta->rta_len < sizeof(struct rtattr)) { 436 break; 437 } 438 439 switch (rta->rta_type) { 440 case NL_RTA_TABLE: 441 { 442 uint32_t fibnum; 443 fibnum = _rta_get_uint32(rta); 444 if (fibnum == 0) 445 fibnum = 254; 446 RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum); 447 if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum)) 448 return (false); 449 } 450 break; 451 default: 452 if (!nlmsg_copy_nla(nla, nw)) 453 return (false); 454 break; 455 } 456 } 457 458 nlmsg_end(nw); 459 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 460 return (true); 461 } 462 463 static bool 464 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 465 { 466 RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type); 467 468 switch (hdr->nlmsg_type) { 469 case NL_RTM_NEWLINK: 470 case NL_RTM_DELLINK: 471 case NL_RTM_GETLINK: 472 return (rtnl_newlink_to_linux(hdr, nlp, nw)); 473 case NL_RTM_NEWADDR: 474 case NL_RTM_DELADDR: 475 return (rtnl_newaddr_to_linux(hdr, nlp, nw)); 476 case NL_RTM_NEWROUTE: 477 case NL_RTM_DELROUTE: 478 return (rtnl_newroute_to_linux(hdr, nlp, nw)); 479 case NL_RTM_NEWNEIGH: 480 case NL_RTM_DELNEIGH: 481 case NL_RTM_GETNEIGH: 482 return (rtnl_newneigh_to_linux(hdr, nlp, nw)); 483 default: 484 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 485 hdr->nlmsg_type); 486 return (handle_default_out(hdr, nw)); 487 } 488 } 489 490 static bool 491 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 492 { 493 if (!nlmsg_copy_header(hdr, nw)) 494 return (false); 495 496 struct nlmsgerr *nlerr; 497 nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr); 498 nlerr->error = bsd_to_linux_errno(nlerr->error); 499 500 int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr); 501 if (hdr->nlmsg_len == copied_len) { 502 nlmsg_end(nw); 503 return (true); 504 } 505 506 /* 507 * CAP_ACK was not set. Original request needs to be translated. 508 * XXX: implement translation of the original message 509 */ 510 RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated", 511 nlerr->msg.nlmsg_type); 512 char *dst_payload, *src_payload; 513 int copy_len = hdr->nlmsg_len - copied_len; 514 dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char); 515 516 src_payload = (char *)hdr + copied_len; 517 518 memcpy(dst_payload, src_payload, copy_len); 519 nlmsg_end(nw); 520 521 return (true); 522 } 523 524 static bool 525 nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp, 526 struct nl_writer *nw) 527 { 528 if (hdr->nlmsg_type < NLMSG_MIN_TYPE) { 529 switch (hdr->nlmsg_type) { 530 case NLMSG_ERROR: 531 return (nlmsg_error_to_linux(hdr, nlp, nw)); 532 case NLMSG_NOOP: 533 case NLMSG_DONE: 534 case NLMSG_OVERRUN: 535 return (handle_default_out(hdr, nw)); 536 default: 537 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 538 hdr->nlmsg_type); 539 return (handle_default_out(hdr, nw)); 540 } 541 } 542 543 switch (netlink_family) { 544 case NETLINK_ROUTE: 545 return (rtnl_to_linux(hdr, nlp, nw)); 546 default: 547 return (handle_default_out(hdr, nw)); 548 } 549 } 550 551 static struct mbuf * 552 nlmsgs_to_linux(int netlink_family, char *buf, int data_length, struct nlpcb *nlp) 553 { 554 RT_LOG(LOG_DEBUG3, "LINUX: get %p size %d", buf, data_length); 555 struct nl_writer nw = {}; 556 557 struct mbuf *m = NULL; 558 if (!nlmsg_get_chain_writer(&nw, data_length, &m)) { 559 RT_LOG(LOG_DEBUG, "unable to setup chain writer for size %d", 560 data_length); 561 return (NULL); 562 } 563 564 /* Assume correct headers. Buffer IS mutable */ 565 int count = 0; 566 for (int offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) { 567 struct nlmsghdr *hdr = (struct nlmsghdr *)&buf[offset]; 568 int msglen = NLMSG_ALIGN(hdr->nlmsg_len); 569 count++; 570 571 if (!nlmsg_to_linux(netlink_family, hdr, nlp, &nw)) { 572 RT_LOG(LOG_DEBUG, "failed to process msg type %d", 573 hdr->nlmsg_type); 574 m_freem(m); 575 return (NULL); 576 } 577 offset += msglen; 578 } 579 nlmsg_flush(&nw); 580 RT_LOG(LOG_DEBUG3, "Processed %d messages, chain size %d", count, 581 m ? m_length(m, NULL) : 0); 582 583 return (m); 584 } 585 586 static struct mbuf * 587 mbufs_to_linux(int netlink_family, struct mbuf *m, struct nlpcb *nlp) 588 { 589 /* XXX: easiest solution, not optimized for performance */ 590 int data_length = m_length(m, NULL); 591 char *buf = malloc(data_length, M_LINUX, M_NOWAIT); 592 if (buf == NULL) { 593 RT_LOG(LOG_DEBUG, "unable to allocate %d bytes, dropping message", 594 data_length); 595 m_freem(m); 596 return (NULL); 597 } 598 m_copydata(m, 0, data_length, buf); 599 m_freem(m); 600 601 m = nlmsgs_to_linux(netlink_family, buf, data_length, nlp); 602 free(buf, M_LINUX); 603 604 return (m); 605 } 606 607 static struct linux_netlink_provider linux_netlink_v1 = { 608 .mbufs_to_linux = mbufs_to_linux, 609 .msgs_to_linux = nlmsgs_to_linux, 610 .msg_from_linux = nlmsg_from_linux, 611 }; 612 613 void 614 linux_netlink_register(void) 615 { 616 linux_netlink_p = &linux_netlink_v1; 617 } 618 619 void 620 linux_netlink_deregister(void) 621 { 622 linux_netlink_p = NULL; 623 } 624