1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 #include "opt_netlink.h" 34 35 #include <sys/types.h> 36 #include <sys/ck.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/rmlock.h> 40 #include <sys/socket.h> 41 #include <sys/vnode.h> 42 43 #include <net/if.h> 44 #include <net/if_dl.h> 45 #include <net/route.h> 46 #include <net/route/nhop.h> 47 #include <net/route/route_ctl.h> 48 #include <netlink/netlink.h> 49 #include <netlink/netlink_ctl.h> 50 #include <netlink/netlink_linux.h> 51 #include <netlink/netlink_route.h> 52 53 #include <compat/linux/linux.h> 54 #include <compat/linux/linux_common.h> 55 #include <compat/linux/linux_util.h> 56 57 #define DEBUG_MOD_NAME nl_linux 58 #define DEBUG_MAX_LEVEL LOG_DEBUG3 59 #include <netlink/netlink_debug.h> 60 _DECLARE_DEBUG(LOG_INFO); 61 62 static bool 63 valid_rta_size(const struct rtattr *rta, int sz) 64 { 65 return (NL_RTA_DATA_LEN(rta) == sz); 66 } 67 68 static bool 69 valid_rta_u32(const struct rtattr *rta) 70 { 71 return (valid_rta_size(rta, sizeof(uint32_t))); 72 } 73 74 static uint32_t 75 _rta_get_uint32(const struct rtattr *rta) 76 { 77 return (*((const uint32_t *)NL_RTA_DATA_CONST(rta))); 78 } 79 80 static struct nlmsghdr * 81 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 82 { 83 struct ndmsg *ndm = (struct ndmsg *)(hdr + 1); 84 85 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ndmsg)) 86 ndm->ndm_family = linux_to_bsd_domain(ndm->ndm_family); 87 88 return (hdr); 89 } 90 91 static struct nlmsghdr * 92 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 93 { 94 struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1); 95 96 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg)) 97 ifam->ifa_family = linux_to_bsd_domain(ifam->ifa_family); 98 99 return (hdr); 100 } 101 102 static struct nlmsghdr * 103 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 104 { 105 /* Tweak address families and default fib only */ 106 struct rtmsg *rtm = (struct rtmsg *)(hdr + 1); 107 struct nlattr *nla, *nla_head; 108 int attrs_len; 109 110 rtm->rtm_family = linux_to_bsd_domain(rtm->rtm_family); 111 112 if (rtm->rtm_table == 254) 113 rtm->rtm_table = 0; 114 115 attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr); 116 attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg)); 117 nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg))); 118 119 NLA_FOREACH(nla, nla_head, attrs_len) { 120 RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d", 121 nla->nla_type, nla->nla_len, attrs_len); 122 struct rtattr *rta = (struct rtattr *)nla; 123 if (rta->rta_len < sizeof(struct rtattr)) { 124 break; 125 } 126 switch (rta->rta_type) { 127 case NL_RTA_TABLE: 128 if (!valid_rta_u32(rta)) 129 goto done; 130 rtm->rtm_table = 0; 131 uint32_t fibnum = _rta_get_uint32(rta); 132 RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum); 133 if (fibnum == 254) { 134 *((uint32_t *)NL_RTA_DATA(rta)) = 0; 135 } 136 break; 137 } 138 } 139 140 done: 141 return (hdr); 142 } 143 144 static struct nlmsghdr * 145 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 146 { 147 switch (hdr->nlmsg_type) { 148 case NL_RTM_GETROUTE: 149 case NL_RTM_NEWROUTE: 150 case NL_RTM_DELROUTE: 151 return (rtnl_route_from_linux(hdr, npt)); 152 case NL_RTM_GETNEIGH: 153 return (rtnl_neigh_from_linux(hdr, npt)); 154 case NL_RTM_GETADDR: 155 return (rtnl_ifaddr_from_linux(hdr, npt)); 156 /* Silence warning for the messages where no translation is required */ 157 case NL_RTM_NEWLINK: 158 case NL_RTM_DELLINK: 159 case NL_RTM_GETLINK: 160 break; 161 default: 162 RT_LOG(LOG_DEBUG, "Passing message type %d untranslated", 163 hdr->nlmsg_type); 164 } 165 166 return (hdr); 167 } 168 169 static struct nlmsghdr * 170 nlmsg_from_linux(int netlink_family, struct nlmsghdr *hdr, 171 struct nl_pstate *npt) 172 { 173 switch (netlink_family) { 174 case NETLINK_ROUTE: 175 return (rtnl_from_linux(hdr, npt)); 176 } 177 178 return (hdr); 179 } 180 181 182 /************************************************************ 183 * Kernel -> Linux 184 ************************************************************/ 185 186 static bool 187 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw) 188 { 189 char *out_hdr; 190 out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char); 191 192 if (out_hdr != NULL) { 193 memcpy(out_hdr, hdr, hdr->nlmsg_len); 194 return (true); 195 } 196 return (false); 197 } 198 199 static bool 200 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw) 201 { 202 return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type, 203 hdr->nlmsg_flags, 0)); 204 } 205 206 static void * 207 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz) 208 { 209 void *next_hdr = nlmsg_reserve_data(nw, sz, void); 210 memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz)); 211 212 return (next_hdr); 213 } 214 #define nlmsg_copy_next_header(_hdr, _ns, _t) \ 215 ((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t)))) 216 217 static bool 218 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw) 219 { 220 struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr); 221 if (nla != NULL) { 222 memcpy(nla, nla_orig, nla_orig->nla_len); 223 return (true); 224 } 225 return (false); 226 } 227 228 /* 229 * Translate a FreeBSD interface name to a Linux interface name. 230 */ 231 static bool 232 nlmsg_translate_ifname_nla(struct nlattr *nla, struct nl_writer *nw) 233 { 234 char ifname[LINUX_IFNAMSIZ]; 235 236 if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname, 237 sizeof(ifname)) <= 0) 238 return (false); 239 return (nlattr_add_string(nw, IFLA_IFNAME, ifname)); 240 } 241 242 #define LINUX_NLA_UNHANDLED -1 243 /* 244 * Translate a FreeBSD attribute to a Linux attribute. 245 * Returns LINUX_NLA_UNHANDLED when the attribute is not processed 246 * and the caller must take care of it, otherwise the result is returned. 247 */ 248 static int 249 nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla, 250 struct nl_writer *nw) 251 { 252 253 switch (hdr->nlmsg_type) { 254 case NL_RTM_NEWLINK: 255 case NL_RTM_DELLINK: 256 case NL_RTM_GETLINK: 257 switch (nla->nla_type) { 258 case IFLA_IFNAME: 259 return (nlmsg_translate_ifname_nla(nla, nw)); 260 default: 261 break; 262 } 263 default: 264 break; 265 } 266 return (LINUX_NLA_UNHANDLED); 267 } 268 269 static bool 270 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw) 271 { 272 struct nlattr *nla; 273 int ret; 274 275 int hdrlen = NETLINK_ALIGN(raw_hdrlen); 276 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 277 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 278 279 NLA_FOREACH(nla, nla_head, attrs_len) { 280 RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len); 281 if (nla->nla_len < sizeof(struct nlattr)) { 282 return (false); 283 } 284 ret = nlmsg_translate_all_nla(hdr, nla, nw); 285 if (ret == LINUX_NLA_UNHANDLED) 286 ret = nlmsg_copy_nla(nla, nw); 287 if (!ret) 288 return (false); 289 } 290 return (true); 291 } 292 #undef LINUX_NLA_UNHANDLED 293 294 static unsigned int 295 rtnl_if_flags_to_linux(unsigned int if_flags) 296 { 297 unsigned int result = 0; 298 299 for (int i = 0; i < 31; i++) { 300 unsigned int flag = 1 << i; 301 if (!(flag & if_flags)) 302 continue; 303 switch (flag) { 304 case IFF_UP: 305 case IFF_BROADCAST: 306 case IFF_DEBUG: 307 case IFF_LOOPBACK: 308 case IFF_POINTOPOINT: 309 case IFF_DRV_RUNNING: 310 case IFF_NOARP: 311 case IFF_PROMISC: 312 case IFF_ALLMULTI: 313 result |= flag; 314 break; 315 case IFF_NEEDSEPOCH: 316 case IFF_DRV_OACTIVE: 317 case IFF_SIMPLEX: 318 case IFF_LINK0: 319 case IFF_LINK1: 320 case IFF_LINK2: 321 case IFF_CANTCONFIG: 322 case IFF_PPROMISC: 323 case IFF_MONITOR: 324 case IFF_STATICARP: 325 case IFF_STICKYARP: 326 case IFF_DYING: 327 case IFF_RENAMING: 328 case IFF_NOGROUP: 329 /* No Linux analogue */ 330 break; 331 case IFF_MULTICAST: 332 result |= 1 << 12; 333 } 334 } 335 return (result); 336 } 337 338 static bool 339 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 340 struct nl_writer *nw) 341 { 342 if (!nlmsg_copy_header(hdr, nw)) 343 return (false); 344 345 struct ifinfomsg *ifinfo; 346 ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg); 347 348 ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family); 349 /* Convert interface type */ 350 switch (ifinfo->ifi_type) { 351 case IFT_ETHER: 352 ifinfo->ifi_type = LINUX_ARPHRD_ETHER; 353 break; 354 } 355 ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags); 356 357 /* Copy attributes unchanged */ 358 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw)) 359 return (false); 360 361 /* make ip(8) happy */ 362 if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue")) 363 return (false); 364 365 if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000)) 366 return (false); 367 368 nlmsg_end(nw); 369 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 370 return (true); 371 } 372 373 static bool 374 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 375 struct nl_writer *nw) 376 { 377 if (!nlmsg_copy_header(hdr, nw)) 378 return (false); 379 380 struct ifaddrmsg *ifamsg; 381 ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg); 382 383 ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family); 384 /* XXX: fake ifa_flags? */ 385 386 /* Copy attributes unchanged */ 387 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw)) 388 return (false); 389 390 nlmsg_end(nw); 391 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 392 return (true); 393 } 394 395 static bool 396 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 397 struct nl_writer *nw) 398 { 399 if (!nlmsg_copy_header(hdr, nw)) 400 return (false); 401 402 struct ndmsg *ndm; 403 ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg); 404 405 ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family); 406 407 /* Copy attributes unchanged */ 408 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw)) 409 return (false); 410 411 nlmsg_end(nw); 412 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 413 return (true); 414 } 415 416 static bool 417 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 418 struct nl_writer *nw) 419 { 420 if (!nlmsg_copy_header(hdr, nw)) 421 return (false); 422 423 struct rtmsg *rtm; 424 rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg); 425 rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family); 426 427 struct nlattr *nla; 428 429 int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg)); 430 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 431 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 432 433 NLA_FOREACH(nla, nla_head, attrs_len) { 434 struct rtattr *rta = (struct rtattr *)nla; 435 //RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len); 436 if (rta->rta_len < sizeof(struct rtattr)) { 437 break; 438 } 439 440 switch (rta->rta_type) { 441 case NL_RTA_TABLE: 442 { 443 uint32_t fibnum; 444 fibnum = _rta_get_uint32(rta); 445 if (fibnum == 0) 446 fibnum = 254; 447 RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum); 448 if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum)) 449 return (false); 450 } 451 break; 452 default: 453 if (!nlmsg_copy_nla(nla, nw)) 454 return (false); 455 break; 456 } 457 } 458 459 nlmsg_end(nw); 460 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 461 return (true); 462 } 463 464 static bool 465 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 466 { 467 RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type); 468 469 switch (hdr->nlmsg_type) { 470 case NL_RTM_NEWLINK: 471 case NL_RTM_DELLINK: 472 case NL_RTM_GETLINK: 473 return (rtnl_newlink_to_linux(hdr, nlp, nw)); 474 case NL_RTM_NEWADDR: 475 case NL_RTM_DELADDR: 476 return (rtnl_newaddr_to_linux(hdr, nlp, nw)); 477 case NL_RTM_NEWROUTE: 478 case NL_RTM_DELROUTE: 479 return (rtnl_newroute_to_linux(hdr, nlp, nw)); 480 case NL_RTM_NEWNEIGH: 481 case NL_RTM_DELNEIGH: 482 case NL_RTM_GETNEIGH: 483 return (rtnl_newneigh_to_linux(hdr, nlp, nw)); 484 default: 485 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 486 hdr->nlmsg_type); 487 return (handle_default_out(hdr, nw)); 488 } 489 } 490 491 static bool 492 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 493 { 494 if (!nlmsg_copy_header(hdr, nw)) 495 return (false); 496 497 struct nlmsgerr *nlerr; 498 nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr); 499 nlerr->error = bsd_to_linux_errno(nlerr->error); 500 501 int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr); 502 if (hdr->nlmsg_len == copied_len) { 503 nlmsg_end(nw); 504 return (true); 505 } 506 507 /* 508 * CAP_ACK was not set. Original request needs to be translated. 509 * XXX: implement translation of the original message 510 */ 511 RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated", 512 nlerr->msg.nlmsg_type); 513 char *dst_payload, *src_payload; 514 int copy_len = hdr->nlmsg_len - copied_len; 515 dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char); 516 517 src_payload = (char *)hdr + copied_len; 518 519 memcpy(dst_payload, src_payload, copy_len); 520 nlmsg_end(nw); 521 522 return (true); 523 } 524 525 static bool 526 nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp, 527 struct nl_writer *nw) 528 { 529 if (hdr->nlmsg_type < NLMSG_MIN_TYPE) { 530 switch (hdr->nlmsg_type) { 531 case NLMSG_ERROR: 532 return (nlmsg_error_to_linux(hdr, nlp, nw)); 533 case NLMSG_NOOP: 534 case NLMSG_DONE: 535 case NLMSG_OVERRUN: 536 return (handle_default_out(hdr, nw)); 537 default: 538 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 539 hdr->nlmsg_type); 540 return (handle_default_out(hdr, nw)); 541 } 542 } 543 544 switch (netlink_family) { 545 case NETLINK_ROUTE: 546 return (rtnl_to_linux(hdr, nlp, nw)); 547 default: 548 return (handle_default_out(hdr, nw)); 549 } 550 } 551 552 static struct mbuf * 553 nlmsgs_to_linux(int netlink_family, char *buf, int data_length, struct nlpcb *nlp) 554 { 555 RT_LOG(LOG_DEBUG3, "LINUX: get %p size %d", buf, data_length); 556 struct nl_writer nw = {}; 557 558 struct mbuf *m = NULL; 559 if (!nlmsg_get_chain_writer(&nw, data_length, &m)) { 560 RT_LOG(LOG_DEBUG, "unable to setup chain writer for size %d", 561 data_length); 562 return (NULL); 563 } 564 565 /* Assume correct headers. Buffer IS mutable */ 566 int count = 0; 567 for (int offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) { 568 struct nlmsghdr *hdr = (struct nlmsghdr *)&buf[offset]; 569 int msglen = NLMSG_ALIGN(hdr->nlmsg_len); 570 count++; 571 572 if (!nlmsg_to_linux(netlink_family, hdr, nlp, &nw)) { 573 RT_LOG(LOG_DEBUG, "failed to process msg type %d", 574 hdr->nlmsg_type); 575 m_freem(m); 576 return (NULL); 577 } 578 offset += msglen; 579 } 580 nlmsg_flush(&nw); 581 RT_LOG(LOG_DEBUG3, "Processed %d messages, chain size %d", count, 582 m ? m_length(m, NULL) : 0); 583 584 return (m); 585 } 586 587 static struct mbuf * 588 mbufs_to_linux(int netlink_family, struct mbuf *m, struct nlpcb *nlp) 589 { 590 /* XXX: easiest solution, not optimized for performance */ 591 int data_length = m_length(m, NULL); 592 char *buf = malloc(data_length, M_LINUX, M_NOWAIT); 593 if (buf == NULL) { 594 RT_LOG(LOG_DEBUG, "unable to allocate %d bytes, dropping message", 595 data_length); 596 m_freem(m); 597 return (NULL); 598 } 599 m_copydata(m, 0, data_length, buf); 600 m_freem(m); 601 602 m = nlmsgs_to_linux(netlink_family, buf, data_length, nlp); 603 free(buf, M_LINUX); 604 605 return (m); 606 } 607 608 static struct linux_netlink_provider linux_netlink_v1 = { 609 .mbufs_to_linux = mbufs_to_linux, 610 .msgs_to_linux = nlmsgs_to_linux, 611 .msg_from_linux = nlmsg_from_linux, 612 }; 613 614 void 615 linux_netlink_register(void) 616 { 617 linux_netlink_p = &linux_netlink_v1; 618 } 619 620 void 621 linux_netlink_deregister(void) 622 { 623 linux_netlink_p = NULL; 624 } 625