1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "opt_inet.h" 29 #include "opt_inet6.h" 30 31 #include <sys/types.h> 32 #include <sys/ck.h> 33 #include <sys/lock.h> 34 #include <sys/malloc.h> 35 #include <sys/rmlock.h> 36 #include <sys/socket.h> 37 #include <sys/vnode.h> 38 39 #include <net/if.h> 40 #include <net/if_dl.h> 41 #include <net/route.h> 42 #include <net/route/nhop.h> 43 #include <net/route/route_ctl.h> 44 #include <netlink/netlink.h> 45 #include <netlink/netlink_ctl.h> 46 #include <netlink/netlink_linux.h> 47 #include <netlink/netlink_route.h> 48 49 #include <compat/linux/linux.h> 50 #include <compat/linux/linux_common.h> 51 #include <compat/linux/linux_util.h> 52 53 #define DEBUG_MOD_NAME nl_linux 54 #define DEBUG_MAX_LEVEL LOG_DEBUG3 55 #include <netlink/netlink_debug.h> 56 _DECLARE_DEBUG(LOG_INFO); 57 58 static bool 59 valid_rta_size(const struct rtattr *rta, int sz) 60 { 61 return (NL_RTA_DATA_LEN(rta) == sz); 62 } 63 64 static bool 65 valid_rta_u32(const struct rtattr *rta) 66 { 67 return (valid_rta_size(rta, sizeof(uint32_t))); 68 } 69 70 static uint32_t 71 _rta_get_uint32(const struct rtattr *rta) 72 { 73 return (*((const uint32_t *)NL_RTA_DATA_CONST(rta))); 74 } 75 76 static struct nlmsghdr * 77 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 78 { 79 struct ndmsg *ndm = (struct ndmsg *)(hdr + 1); 80 81 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ndmsg)) 82 ndm->ndm_family = linux_to_bsd_domain(ndm->ndm_family); 83 84 return (hdr); 85 } 86 87 static struct nlmsghdr * 88 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 89 { 90 struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1); 91 92 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg)) 93 ifam->ifa_family = linux_to_bsd_domain(ifam->ifa_family); 94 95 return (hdr); 96 } 97 98 static struct nlmsghdr * 99 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 100 { 101 /* Tweak address families and default fib only */ 102 struct rtmsg *rtm = (struct rtmsg *)(hdr + 1); 103 struct nlattr *nla, *nla_head; 104 int attrs_len; 105 106 rtm->rtm_family = linux_to_bsd_domain(rtm->rtm_family); 107 108 if (rtm->rtm_table == 254) 109 rtm->rtm_table = 0; 110 111 attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr); 112 attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg)); 113 nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg))); 114 115 NLA_FOREACH(nla, nla_head, attrs_len) { 116 RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d", 117 nla->nla_type, nla->nla_len, attrs_len); 118 struct rtattr *rta = (struct rtattr *)nla; 119 if (rta->rta_len < sizeof(struct rtattr)) { 120 break; 121 } 122 switch (rta->rta_type) { 123 case NL_RTA_TABLE: 124 if (!valid_rta_u32(rta)) 125 goto done; 126 rtm->rtm_table = 0; 127 uint32_t fibnum = _rta_get_uint32(rta); 128 RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum); 129 if (fibnum == 254) { 130 *((uint32_t *)NL_RTA_DATA(rta)) = 0; 131 } 132 break; 133 } 134 } 135 136 done: 137 return (hdr); 138 } 139 140 static struct nlmsghdr * 141 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 142 { 143 switch (hdr->nlmsg_type) { 144 case NL_RTM_GETROUTE: 145 case NL_RTM_NEWROUTE: 146 case NL_RTM_DELROUTE: 147 return (rtnl_route_from_linux(hdr, npt)); 148 case NL_RTM_GETNEIGH: 149 return (rtnl_neigh_from_linux(hdr, npt)); 150 case NL_RTM_GETADDR: 151 return (rtnl_ifaddr_from_linux(hdr, npt)); 152 /* Silence warning for the messages where no translation is required */ 153 case NL_RTM_NEWLINK: 154 case NL_RTM_DELLINK: 155 case NL_RTM_GETLINK: 156 break; 157 default: 158 RT_LOG(LOG_DEBUG, "Passing message type %d untranslated", 159 hdr->nlmsg_type); 160 } 161 162 return (hdr); 163 } 164 165 static struct nlmsghdr * 166 nlmsg_from_linux(int netlink_family, struct nlmsghdr *hdr, 167 struct nl_pstate *npt) 168 { 169 switch (netlink_family) { 170 case NETLINK_ROUTE: 171 return (rtnl_from_linux(hdr, npt)); 172 } 173 174 return (hdr); 175 } 176 177 178 /************************************************************ 179 * Kernel -> Linux 180 ************************************************************/ 181 182 static bool 183 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw) 184 { 185 char *out_hdr; 186 out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char); 187 188 if (out_hdr != NULL) { 189 memcpy(out_hdr, hdr, hdr->nlmsg_len); 190 return (true); 191 } 192 return (false); 193 } 194 195 static bool 196 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw) 197 { 198 return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type, 199 hdr->nlmsg_flags, 0)); 200 } 201 202 static void * 203 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz) 204 { 205 void *next_hdr = nlmsg_reserve_data(nw, sz, void); 206 memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz)); 207 208 return (next_hdr); 209 } 210 #define nlmsg_copy_next_header(_hdr, _ns, _t) \ 211 ((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t)))) 212 213 static bool 214 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw) 215 { 216 struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr); 217 if (nla != NULL) { 218 memcpy(nla, nla_orig, nla_orig->nla_len); 219 return (true); 220 } 221 return (false); 222 } 223 224 /* 225 * Translate a FreeBSD interface name to a Linux interface name. 226 */ 227 static bool 228 nlmsg_translate_ifname_nla(struct nlattr *nla, struct nl_writer *nw) 229 { 230 char ifname[LINUX_IFNAMSIZ]; 231 232 if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname, 233 sizeof(ifname)) <= 0) 234 return (false); 235 return (nlattr_add_string(nw, IFLA_IFNAME, ifname)); 236 } 237 238 #define LINUX_NLA_UNHANDLED -1 239 /* 240 * Translate a FreeBSD attribute to a Linux attribute. 241 * Returns LINUX_NLA_UNHANDLED when the attribute is not processed 242 * and the caller must take care of it, otherwise the result is returned. 243 */ 244 static int 245 nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla, 246 struct nl_writer *nw) 247 { 248 249 switch (hdr->nlmsg_type) { 250 case NL_RTM_NEWLINK: 251 case NL_RTM_DELLINK: 252 case NL_RTM_GETLINK: 253 switch (nla->nla_type) { 254 case IFLA_IFNAME: 255 return (nlmsg_translate_ifname_nla(nla, nw)); 256 default: 257 break; 258 } 259 default: 260 break; 261 } 262 return (LINUX_NLA_UNHANDLED); 263 } 264 265 static bool 266 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw) 267 { 268 struct nlattr *nla; 269 int ret; 270 271 int hdrlen = NETLINK_ALIGN(raw_hdrlen); 272 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 273 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 274 275 NLA_FOREACH(nla, nla_head, attrs_len) { 276 RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len); 277 if (nla->nla_len < sizeof(struct nlattr)) { 278 return (false); 279 } 280 ret = nlmsg_translate_all_nla(hdr, nla, nw); 281 if (ret == LINUX_NLA_UNHANDLED) 282 ret = nlmsg_copy_nla(nla, nw); 283 if (!ret) 284 return (false); 285 } 286 return (true); 287 } 288 #undef LINUX_NLA_UNHANDLED 289 290 static unsigned int 291 rtnl_if_flags_to_linux(unsigned int if_flags) 292 { 293 unsigned int result = 0; 294 295 for (int i = 0; i < 31; i++) { 296 unsigned int flag = 1 << i; 297 if (!(flag & if_flags)) 298 continue; 299 switch (flag) { 300 case IFF_UP: 301 case IFF_BROADCAST: 302 case IFF_DEBUG: 303 case IFF_LOOPBACK: 304 case IFF_POINTOPOINT: 305 case IFF_DRV_RUNNING: 306 case IFF_NOARP: 307 case IFF_PROMISC: 308 case IFF_ALLMULTI: 309 result |= flag; 310 break; 311 case IFF_NEEDSEPOCH: 312 case IFF_DRV_OACTIVE: 313 case IFF_SIMPLEX: 314 case IFF_LINK0: 315 case IFF_LINK1: 316 case IFF_LINK2: 317 case IFF_CANTCONFIG: 318 case IFF_PPROMISC: 319 case IFF_MONITOR: 320 case IFF_STATICARP: 321 case IFF_STICKYARP: 322 case IFF_DYING: 323 case IFF_RENAMING: 324 /* No Linux analogue */ 325 break; 326 case IFF_MULTICAST: 327 result |= 1 << 12; 328 } 329 } 330 return (result); 331 } 332 333 static bool 334 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 335 struct nl_writer *nw) 336 { 337 if (!nlmsg_copy_header(hdr, nw)) 338 return (false); 339 340 struct ifinfomsg *ifinfo; 341 ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg); 342 343 ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family); 344 /* Convert interface type */ 345 switch (ifinfo->ifi_type) { 346 case IFT_ETHER: 347 ifinfo->ifi_type = LINUX_ARPHRD_ETHER; 348 break; 349 } 350 ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags); 351 352 /* Copy attributes unchanged */ 353 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw)) 354 return (false); 355 356 /* make ip(8) happy */ 357 if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue")) 358 return (false); 359 360 if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000)) 361 return (false); 362 363 nlmsg_end(nw); 364 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 365 return (true); 366 } 367 368 static bool 369 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 370 struct nl_writer *nw) 371 { 372 if (!nlmsg_copy_header(hdr, nw)) 373 return (false); 374 375 struct ifaddrmsg *ifamsg; 376 ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg); 377 378 ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family); 379 /* XXX: fake ifa_flags? */ 380 381 /* Copy attributes unchanged */ 382 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw)) 383 return (false); 384 385 nlmsg_end(nw); 386 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 387 return (true); 388 } 389 390 static bool 391 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 392 struct nl_writer *nw) 393 { 394 if (!nlmsg_copy_header(hdr, nw)) 395 return (false); 396 397 struct ndmsg *ndm; 398 ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg); 399 400 ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family); 401 402 /* Copy attributes unchanged */ 403 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw)) 404 return (false); 405 406 nlmsg_end(nw); 407 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 408 return (true); 409 } 410 411 static bool 412 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 413 struct nl_writer *nw) 414 { 415 if (!nlmsg_copy_header(hdr, nw)) 416 return (false); 417 418 struct rtmsg *rtm; 419 rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg); 420 rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family); 421 422 struct nlattr *nla; 423 424 int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg)); 425 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 426 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 427 428 NLA_FOREACH(nla, nla_head, attrs_len) { 429 struct rtattr *rta = (struct rtattr *)nla; 430 //RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len); 431 if (rta->rta_len < sizeof(struct rtattr)) { 432 break; 433 } 434 435 switch (rta->rta_type) { 436 case NL_RTA_TABLE: 437 { 438 uint32_t fibnum; 439 fibnum = _rta_get_uint32(rta); 440 if (fibnum == 0) 441 fibnum = 254; 442 RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum); 443 if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum)) 444 return (false); 445 } 446 break; 447 default: 448 if (!nlmsg_copy_nla(nla, nw)) 449 return (false); 450 break; 451 } 452 } 453 454 nlmsg_end(nw); 455 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 456 return (true); 457 } 458 459 static bool 460 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 461 { 462 RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type); 463 464 switch (hdr->nlmsg_type) { 465 case NL_RTM_NEWLINK: 466 case NL_RTM_DELLINK: 467 case NL_RTM_GETLINK: 468 return (rtnl_newlink_to_linux(hdr, nlp, nw)); 469 case NL_RTM_NEWADDR: 470 case NL_RTM_DELADDR: 471 return (rtnl_newaddr_to_linux(hdr, nlp, nw)); 472 case NL_RTM_NEWROUTE: 473 case NL_RTM_DELROUTE: 474 return (rtnl_newroute_to_linux(hdr, nlp, nw)); 475 case NL_RTM_NEWNEIGH: 476 case NL_RTM_DELNEIGH: 477 case NL_RTM_GETNEIGH: 478 return (rtnl_newneigh_to_linux(hdr, nlp, nw)); 479 default: 480 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 481 hdr->nlmsg_type); 482 return (handle_default_out(hdr, nw)); 483 } 484 } 485 486 static bool 487 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 488 { 489 if (!nlmsg_copy_header(hdr, nw)) 490 return (false); 491 492 struct nlmsgerr *nlerr; 493 nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr); 494 nlerr->error = bsd_to_linux_errno(nlerr->error); 495 496 int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr); 497 if (hdr->nlmsg_len == copied_len) { 498 nlmsg_end(nw); 499 return (true); 500 } 501 502 /* 503 * CAP_ACK was not set. Original request needs to be translated. 504 * XXX: implement translation of the original message 505 */ 506 RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated", 507 nlerr->msg.nlmsg_type); 508 char *dst_payload, *src_payload; 509 int copy_len = hdr->nlmsg_len - copied_len; 510 dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char); 511 512 src_payload = (char *)hdr + copied_len; 513 514 memcpy(dst_payload, src_payload, copy_len); 515 nlmsg_end(nw); 516 517 return (true); 518 } 519 520 static bool 521 nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp, 522 struct nl_writer *nw) 523 { 524 if (hdr->nlmsg_type < NLMSG_MIN_TYPE) { 525 switch (hdr->nlmsg_type) { 526 case NLMSG_ERROR: 527 return (nlmsg_error_to_linux(hdr, nlp, nw)); 528 case NLMSG_NOOP: 529 case NLMSG_DONE: 530 case NLMSG_OVERRUN: 531 return (handle_default_out(hdr, nw)); 532 default: 533 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 534 hdr->nlmsg_type); 535 return (handle_default_out(hdr, nw)); 536 } 537 } 538 539 switch (netlink_family) { 540 case NETLINK_ROUTE: 541 return (rtnl_to_linux(hdr, nlp, nw)); 542 default: 543 return (handle_default_out(hdr, nw)); 544 } 545 } 546 547 static struct mbuf * 548 nlmsgs_to_linux(int netlink_family, char *buf, int data_length, struct nlpcb *nlp) 549 { 550 RT_LOG(LOG_DEBUG3, "LINUX: get %p size %d", buf, data_length); 551 struct nl_writer nw = {}; 552 553 struct mbuf *m = NULL; 554 if (!nlmsg_get_chain_writer(&nw, data_length, &m)) { 555 RT_LOG(LOG_DEBUG, "unable to setup chain writer for size %d", 556 data_length); 557 return (NULL); 558 } 559 560 /* Assume correct headers. Buffer IS mutable */ 561 int count = 0; 562 for (int offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) { 563 struct nlmsghdr *hdr = (struct nlmsghdr *)&buf[offset]; 564 int msglen = NLMSG_ALIGN(hdr->nlmsg_len); 565 count++; 566 567 if (!nlmsg_to_linux(netlink_family, hdr, nlp, &nw)) { 568 RT_LOG(LOG_DEBUG, "failed to process msg type %d", 569 hdr->nlmsg_type); 570 m_freem(m); 571 return (NULL); 572 } 573 offset += msglen; 574 } 575 nlmsg_flush(&nw); 576 RT_LOG(LOG_DEBUG3, "Processed %d messages, chain size %d", count, 577 m ? m_length(m, NULL) : 0); 578 579 return (m); 580 } 581 582 static struct mbuf * 583 mbufs_to_linux(int netlink_family, struct mbuf *m, struct nlpcb *nlp) 584 { 585 /* XXX: easiest solution, not optimized for performance */ 586 int data_length = m_length(m, NULL); 587 char *buf = malloc(data_length, M_LINUX, M_NOWAIT); 588 if (buf == NULL) { 589 RT_LOG(LOG_DEBUG, "unable to allocate %d bytes, dropping message", 590 data_length); 591 m_freem(m); 592 return (NULL); 593 } 594 m_copydata(m, 0, data_length, buf); 595 m_freem(m); 596 597 m = nlmsgs_to_linux(netlink_family, buf, data_length, nlp); 598 free(buf, M_LINUX); 599 600 return (m); 601 } 602 603 static struct linux_netlink_provider linux_netlink_v1 = { 604 .mbufs_to_linux = mbufs_to_linux, 605 .msgs_to_linux = nlmsgs_to_linux, 606 .msg_from_linux = nlmsg_from_linux, 607 }; 608 609 void 610 linux_netlink_register(void) 611 { 612 linux_netlink_p = &linux_netlink_v1; 613 } 614 615 void 616 linux_netlink_deregister(void) 617 { 618 linux_netlink_p = NULL; 619 } 620