1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 #include "opt_netlink.h" 34 35 #include <sys/types.h> 36 #include <sys/ck.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/rmlock.h> 40 #include <sys/socket.h> 41 #include <sys/vnode.h> 42 43 #include <net/if.h> 44 #include <net/if_dl.h> 45 #include <net/route.h> 46 #include <net/route/nhop.h> 47 #include <net/route/route_ctl.h> 48 #include <netlink/netlink.h> 49 #include <netlink/netlink_ctl.h> 50 #include <netlink/netlink_linux.h> 51 #include <netlink/netlink_route.h> 52 53 #include <compat/linux/linux.h> 54 #include <compat/linux/linux_common.h> 55 #include <compat/linux/linux_util.h> 56 57 #define DEBUG_MOD_NAME nl_linux 58 #define DEBUG_MAX_LEVEL LOG_DEBUG3 59 #include <netlink/netlink_debug.h> 60 _DECLARE_DEBUG(LOG_INFO); 61 62 static bool 63 valid_rta_size(const struct rtattr *rta, int sz) 64 { 65 return (NL_RTA_DATA_LEN(rta) == sz); 66 } 67 68 static bool 69 valid_rta_u32(const struct rtattr *rta) 70 { 71 return (valid_rta_size(rta, sizeof(uint32_t))); 72 } 73 74 static uint32_t 75 _rta_get_uint32(const struct rtattr *rta) 76 { 77 return (*((const uint32_t *)NL_RTA_DATA_CONST(rta))); 78 } 79 80 static struct nlmsghdr * 81 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 82 { 83 struct ndmsg *ndm = (struct ndmsg *)(hdr + 1); 84 85 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ndmsg)) 86 ndm->ndm_family = linux_to_bsd_domain(ndm->ndm_family); 87 88 return (hdr); 89 } 90 91 static struct nlmsghdr * 92 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 93 { 94 struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1); 95 96 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg)) 97 ifam->ifa_family = linux_to_bsd_domain(ifam->ifa_family); 98 99 return (hdr); 100 } 101 102 static struct nlmsghdr * 103 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 104 { 105 /* Tweak address families and default fib only */ 106 struct rtmsg *rtm = (struct rtmsg *)(hdr + 1); 107 struct nlattr *nla, *nla_head; 108 int attrs_len; 109 110 rtm->rtm_family = linux_to_bsd_domain(rtm->rtm_family); 111 112 if (rtm->rtm_table == 254) 113 rtm->rtm_table = 0; 114 115 attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr); 116 attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg)); 117 nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg))); 118 119 NLA_FOREACH(nla, nla_head, attrs_len) { 120 RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d", 121 nla->nla_type, nla->nla_len, attrs_len); 122 struct rtattr *rta = (struct rtattr *)nla; 123 if (rta->rta_len < sizeof(struct rtattr)) { 124 break; 125 } 126 switch (rta->rta_type) { 127 case NL_RTA_TABLE: 128 if (!valid_rta_u32(rta)) 129 goto done; 130 rtm->rtm_table = 0; 131 uint32_t fibnum = _rta_get_uint32(rta); 132 RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum); 133 if (fibnum == 254) { 134 *((uint32_t *)NL_RTA_DATA(rta)) = 0; 135 } 136 break; 137 } 138 } 139 140 done: 141 return (hdr); 142 } 143 144 static struct nlmsghdr * 145 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 146 { 147 switch (hdr->nlmsg_type) { 148 case NL_RTM_GETROUTE: 149 case NL_RTM_NEWROUTE: 150 case NL_RTM_DELROUTE: 151 return (rtnl_route_from_linux(hdr, npt)); 152 case NL_RTM_GETNEIGH: 153 return (rtnl_neigh_from_linux(hdr, npt)); 154 case NL_RTM_GETADDR: 155 return (rtnl_ifaddr_from_linux(hdr, npt)); 156 /* Silence warning for the messages where no translation is required */ 157 case NL_RTM_NEWLINK: 158 case NL_RTM_DELLINK: 159 case NL_RTM_GETLINK: 160 break; 161 default: 162 RT_LOG(LOG_DEBUG, "Passing message type %d untranslated", 163 hdr->nlmsg_type); 164 } 165 166 return (hdr); 167 } 168 169 static struct nlmsghdr * 170 nlmsg_from_linux(int netlink_family, struct nlmsghdr *hdr, 171 struct nl_pstate *npt) 172 { 173 switch (netlink_family) { 174 case NETLINK_ROUTE: 175 return (rtnl_from_linux(hdr, npt)); 176 } 177 178 return (hdr); 179 } 180 181 182 /************************************************************ 183 * Kernel -> Linux 184 ************************************************************/ 185 186 static bool 187 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw) 188 { 189 char *out_hdr; 190 out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char); 191 192 if (out_hdr != NULL) { 193 memcpy(out_hdr, hdr, hdr->nlmsg_len); 194 return (true); 195 } 196 return (false); 197 } 198 199 static bool 200 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw) 201 { 202 return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type, 203 hdr->nlmsg_flags, 0)); 204 } 205 206 static void * 207 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz) 208 { 209 void *next_hdr = nlmsg_reserve_data(nw, sz, void); 210 memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz)); 211 212 return (next_hdr); 213 } 214 #define nlmsg_copy_next_header(_hdr, _ns, _t) \ 215 ((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t)))) 216 217 static bool 218 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw) 219 { 220 struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr); 221 if (nla != NULL) { 222 memcpy(nla, nla_orig, nla_orig->nla_len); 223 return (true); 224 } 225 return (false); 226 } 227 228 /* 229 * Translate a FreeBSD interface name to a Linux interface name. 230 */ 231 static bool 232 nlmsg_translate_ifname_nla(struct nlattr *nla, struct nl_writer *nw) 233 { 234 char ifname[LINUX_IFNAMSIZ]; 235 236 if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname, 237 sizeof(ifname)) <= 0) 238 return (false); 239 return (nlattr_add_string(nw, IFLA_IFNAME, ifname)); 240 } 241 242 #define LINUX_NLA_UNHANDLED -1 243 /* 244 * Translate a FreeBSD attribute to a Linux attribute. 245 * Returns LINUX_NLA_UNHANDLED when the attribute is not processed 246 * and the caller must take care of it, otherwise the result is returned. 247 */ 248 static int 249 nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla, 250 struct nl_writer *nw) 251 { 252 253 switch (hdr->nlmsg_type) { 254 case NL_RTM_NEWLINK: 255 case NL_RTM_DELLINK: 256 case NL_RTM_GETLINK: 257 switch (nla->nla_type) { 258 case IFLA_IFNAME: 259 return (nlmsg_translate_ifname_nla(nla, nw)); 260 default: 261 break; 262 } 263 default: 264 break; 265 } 266 return (LINUX_NLA_UNHANDLED); 267 } 268 269 static bool 270 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw) 271 { 272 struct nlattr *nla; 273 int ret; 274 275 int hdrlen = NETLINK_ALIGN(raw_hdrlen); 276 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 277 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 278 279 NLA_FOREACH(nla, nla_head, attrs_len) { 280 RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len); 281 if (nla->nla_len < sizeof(struct nlattr)) { 282 return (false); 283 } 284 ret = nlmsg_translate_all_nla(hdr, nla, nw); 285 if (ret == LINUX_NLA_UNHANDLED) 286 ret = nlmsg_copy_nla(nla, nw); 287 if (!ret) 288 return (false); 289 } 290 return (true); 291 } 292 #undef LINUX_NLA_UNHANDLED 293 294 static unsigned int 295 rtnl_if_flags_to_linux(unsigned int if_flags) 296 { 297 unsigned int result = 0; 298 299 for (int i = 0; i < 31; i++) { 300 unsigned int flag = 1 << i; 301 if (!(flag & if_flags)) 302 continue; 303 switch (flag) { 304 case IFF_UP: 305 case IFF_BROADCAST: 306 case IFF_DEBUG: 307 case IFF_LOOPBACK: 308 case IFF_POINTOPOINT: 309 case IFF_DRV_RUNNING: 310 case IFF_NOARP: 311 case IFF_PROMISC: 312 case IFF_ALLMULTI: 313 result |= flag; 314 break; 315 case IFF_NEEDSEPOCH: 316 case IFF_DRV_OACTIVE: 317 case IFF_SIMPLEX: 318 case IFF_LINK0: 319 case IFF_LINK1: 320 case IFF_LINK2: 321 case IFF_CANTCONFIG: 322 case IFF_PPROMISC: 323 case IFF_MONITOR: 324 case IFF_STATICARP: 325 case IFF_STICKYARP: 326 case IFF_DYING: 327 case IFF_RENAMING: 328 /* No Linux analogue */ 329 break; 330 case IFF_MULTICAST: 331 result |= 1 << 12; 332 } 333 } 334 return (result); 335 } 336 337 static bool 338 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 339 struct nl_writer *nw) 340 { 341 if (!nlmsg_copy_header(hdr, nw)) 342 return (false); 343 344 struct ifinfomsg *ifinfo; 345 ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg); 346 347 ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family); 348 /* Convert interface type */ 349 switch (ifinfo->ifi_type) { 350 case IFT_ETHER: 351 ifinfo->ifi_type = LINUX_ARPHRD_ETHER; 352 break; 353 } 354 ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags); 355 356 /* Copy attributes unchanged */ 357 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw)) 358 return (false); 359 360 /* make ip(8) happy */ 361 if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue")) 362 return (false); 363 364 if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000)) 365 return (false); 366 367 nlmsg_end(nw); 368 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 369 return (true); 370 } 371 372 static bool 373 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 374 struct nl_writer *nw) 375 { 376 if (!nlmsg_copy_header(hdr, nw)) 377 return (false); 378 379 struct ifaddrmsg *ifamsg; 380 ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg); 381 382 ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family); 383 /* XXX: fake ifa_flags? */ 384 385 /* Copy attributes unchanged */ 386 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw)) 387 return (false); 388 389 nlmsg_end(nw); 390 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 391 return (true); 392 } 393 394 static bool 395 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 396 struct nl_writer *nw) 397 { 398 if (!nlmsg_copy_header(hdr, nw)) 399 return (false); 400 401 struct ndmsg *ndm; 402 ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg); 403 404 ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family); 405 406 /* Copy attributes unchanged */ 407 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw)) 408 return (false); 409 410 nlmsg_end(nw); 411 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 412 return (true); 413 } 414 415 static bool 416 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 417 struct nl_writer *nw) 418 { 419 if (!nlmsg_copy_header(hdr, nw)) 420 return (false); 421 422 struct rtmsg *rtm; 423 rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg); 424 rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family); 425 426 struct nlattr *nla; 427 428 int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg)); 429 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 430 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 431 432 NLA_FOREACH(nla, nla_head, attrs_len) { 433 struct rtattr *rta = (struct rtattr *)nla; 434 //RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len); 435 if (rta->rta_len < sizeof(struct rtattr)) { 436 break; 437 } 438 439 switch (rta->rta_type) { 440 case NL_RTA_TABLE: 441 { 442 uint32_t fibnum; 443 fibnum = _rta_get_uint32(rta); 444 if (fibnum == 0) 445 fibnum = 254; 446 RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum); 447 if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum)) 448 return (false); 449 } 450 break; 451 default: 452 if (!nlmsg_copy_nla(nla, nw)) 453 return (false); 454 break; 455 } 456 } 457 458 nlmsg_end(nw); 459 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 460 return (true); 461 } 462 463 static bool 464 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 465 { 466 RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type); 467 468 switch (hdr->nlmsg_type) { 469 case NL_RTM_NEWLINK: 470 case NL_RTM_DELLINK: 471 case NL_RTM_GETLINK: 472 return (rtnl_newlink_to_linux(hdr, nlp, nw)); 473 case NL_RTM_NEWADDR: 474 case NL_RTM_DELADDR: 475 return (rtnl_newaddr_to_linux(hdr, nlp, nw)); 476 case NL_RTM_NEWROUTE: 477 case NL_RTM_DELROUTE: 478 return (rtnl_newroute_to_linux(hdr, nlp, nw)); 479 case NL_RTM_NEWNEIGH: 480 case NL_RTM_DELNEIGH: 481 case NL_RTM_GETNEIGH: 482 return (rtnl_newneigh_to_linux(hdr, nlp, nw)); 483 default: 484 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 485 hdr->nlmsg_type); 486 return (handle_default_out(hdr, nw)); 487 } 488 } 489 490 static bool 491 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 492 { 493 if (!nlmsg_copy_header(hdr, nw)) 494 return (false); 495 496 struct nlmsgerr *nlerr; 497 nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr); 498 nlerr->error = bsd_to_linux_errno(nlerr->error); 499 500 int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr); 501 if (hdr->nlmsg_len == copied_len) { 502 nlmsg_end(nw); 503 return (true); 504 } 505 506 /* 507 * CAP_ACK was not set. Original request needs to be translated. 508 * XXX: implement translation of the original message 509 */ 510 RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated", 511 nlerr->msg.nlmsg_type); 512 char *dst_payload, *src_payload; 513 int copy_len = hdr->nlmsg_len - copied_len; 514 dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char); 515 516 src_payload = (char *)hdr + copied_len; 517 518 memcpy(dst_payload, src_payload, copy_len); 519 nlmsg_end(nw); 520 521 return (true); 522 } 523 524 static bool 525 nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp, 526 struct nl_writer *nw) 527 { 528 if (hdr->nlmsg_type < NLMSG_MIN_TYPE) { 529 switch (hdr->nlmsg_type) { 530 case NLMSG_ERROR: 531 return (nlmsg_error_to_linux(hdr, nlp, nw)); 532 case NLMSG_NOOP: 533 case NLMSG_DONE: 534 case NLMSG_OVERRUN: 535 return (handle_default_out(hdr, nw)); 536 default: 537 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 538 hdr->nlmsg_type); 539 return (handle_default_out(hdr, nw)); 540 } 541 } 542 543 switch (netlink_family) { 544 case NETLINK_ROUTE: 545 return (rtnl_to_linux(hdr, nlp, nw)); 546 default: 547 return (handle_default_out(hdr, nw)); 548 } 549 } 550 551 static struct mbuf * 552 nlmsgs_to_linux(int netlink_family, char *buf, int data_length, struct nlpcb *nlp) 553 { 554 RT_LOG(LOG_DEBUG3, "LINUX: get %p size %d", buf, data_length); 555 struct nl_writer nw = {}; 556 557 struct mbuf *m = NULL; 558 if (!nlmsg_get_chain_writer(&nw, data_length, &m)) { 559 RT_LOG(LOG_DEBUG, "unable to setup chain writer for size %d", 560 data_length); 561 return (NULL); 562 } 563 564 /* Assume correct headers. Buffer IS mutable */ 565 int count = 0; 566 for (int offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) { 567 struct nlmsghdr *hdr = (struct nlmsghdr *)&buf[offset]; 568 int msglen = NLMSG_ALIGN(hdr->nlmsg_len); 569 count++; 570 571 if (!nlmsg_to_linux(netlink_family, hdr, nlp, &nw)) { 572 RT_LOG(LOG_DEBUG, "failed to process msg type %d", 573 hdr->nlmsg_type); 574 m_freem(m); 575 return (NULL); 576 } 577 offset += msglen; 578 } 579 nlmsg_flush(&nw); 580 RT_LOG(LOG_DEBUG3, "Processed %d messages, chain size %d", count, 581 m ? m_length(m, NULL) : 0); 582 583 return (m); 584 } 585 586 static struct mbuf * 587 mbufs_to_linux(int netlink_family, struct mbuf *m, struct nlpcb *nlp) 588 { 589 /* XXX: easiest solution, not optimized for performance */ 590 int data_length = m_length(m, NULL); 591 char *buf = malloc(data_length, M_LINUX, M_NOWAIT); 592 if (buf == NULL) { 593 RT_LOG(LOG_DEBUG, "unable to allocate %d bytes, dropping message", 594 data_length); 595 m_freem(m); 596 return (NULL); 597 } 598 m_copydata(m, 0, data_length, buf); 599 m_freem(m); 600 601 m = nlmsgs_to_linux(netlink_family, buf, data_length, nlp); 602 free(buf, M_LINUX); 603 604 return (m); 605 } 606 607 static struct linux_netlink_provider linux_netlink_v1 = { 608 .mbufs_to_linux = mbufs_to_linux, 609 .msgs_to_linux = nlmsgs_to_linux, 610 .msg_from_linux = nlmsg_from_linux, 611 }; 612 613 void 614 linux_netlink_register(void) 615 { 616 linux_netlink_p = &linux_netlink_v1; 617 } 618 619 void 620 linux_netlink_deregister(void) 621 { 622 linux_netlink_p = NULL; 623 } 624