1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "opt_inet.h" 29 #include "opt_inet6.h" 30 #include "opt_netlink.h" 31 32 #include <sys/types.h> 33 #include <sys/ck.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/rmlock.h> 37 #include <sys/socket.h> 38 #include <sys/vnode.h> 39 40 #include <net/if.h> 41 #include <net/if_dl.h> 42 #include <net/route.h> 43 #include <net/route/nhop.h> 44 #include <net/route/route_ctl.h> 45 #include <netlink/netlink.h> 46 #include <netlink/netlink_ctl.h> 47 #include <netlink/netlink_linux.h> 48 #include <netlink/netlink_route.h> 49 50 #include <compat/linux/linux.h> 51 #include <compat/linux/linux_common.h> 52 #include <compat/linux/linux_util.h> 53 54 #define DEBUG_MOD_NAME nl_linux 55 #define DEBUG_MAX_LEVEL LOG_DEBUG3 56 #include <netlink/netlink_debug.h> 57 _DECLARE_DEBUG(LOG_INFO); 58 59 static bool 60 valid_rta_size(const struct rtattr *rta, int sz) 61 { 62 return (NL_RTA_DATA_LEN(rta) == sz); 63 } 64 65 static bool 66 valid_rta_u32(const struct rtattr *rta) 67 { 68 return (valid_rta_size(rta, sizeof(uint32_t))); 69 } 70 71 static uint32_t 72 _rta_get_uint32(const struct rtattr *rta) 73 { 74 return (*((const uint32_t *)NL_RTA_DATA_CONST(rta))); 75 } 76 77 static struct nlmsghdr * 78 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 79 { 80 struct ndmsg *ndm = (struct ndmsg *)(hdr + 1); 81 82 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ndmsg)) 83 ndm->ndm_family = linux_to_bsd_domain(ndm->ndm_family); 84 85 return (hdr); 86 } 87 88 static struct nlmsghdr * 89 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 90 { 91 struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1); 92 93 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg)) 94 ifam->ifa_family = linux_to_bsd_domain(ifam->ifa_family); 95 96 return (hdr); 97 } 98 99 static struct nlmsghdr * 100 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 101 { 102 /* Tweak address families and default fib only */ 103 struct rtmsg *rtm = (struct rtmsg *)(hdr + 1); 104 struct nlattr *nla, *nla_head; 105 int attrs_len; 106 107 rtm->rtm_family = linux_to_bsd_domain(rtm->rtm_family); 108 109 if (rtm->rtm_table == 254) 110 rtm->rtm_table = 0; 111 112 attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr); 113 attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg)); 114 nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg))); 115 116 NLA_FOREACH(nla, nla_head, attrs_len) { 117 RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d", 118 nla->nla_type, nla->nla_len, attrs_len); 119 struct rtattr *rta = (struct rtattr *)nla; 120 if (rta->rta_len < sizeof(struct rtattr)) { 121 break; 122 } 123 switch (rta->rta_type) { 124 case NL_RTA_TABLE: 125 if (!valid_rta_u32(rta)) 126 goto done; 127 rtm->rtm_table = 0; 128 uint32_t fibnum = _rta_get_uint32(rta); 129 RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum); 130 if (fibnum == 254) { 131 *((uint32_t *)NL_RTA_DATA(rta)) = 0; 132 } 133 break; 134 } 135 } 136 137 done: 138 return (hdr); 139 } 140 141 static struct nlmsghdr * 142 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 143 { 144 switch (hdr->nlmsg_type) { 145 case NL_RTM_GETROUTE: 146 case NL_RTM_NEWROUTE: 147 case NL_RTM_DELROUTE: 148 return (rtnl_route_from_linux(hdr, npt)); 149 case NL_RTM_GETNEIGH: 150 return (rtnl_neigh_from_linux(hdr, npt)); 151 case NL_RTM_GETADDR: 152 return (rtnl_ifaddr_from_linux(hdr, npt)); 153 /* Silence warning for the messages where no translation is required */ 154 case NL_RTM_NEWLINK: 155 case NL_RTM_DELLINK: 156 case NL_RTM_GETLINK: 157 break; 158 default: 159 RT_LOG(LOG_DEBUG, "Passing message type %d untranslated", 160 hdr->nlmsg_type); 161 } 162 163 return (hdr); 164 } 165 166 static struct nlmsghdr * 167 nlmsg_from_linux(int netlink_family, struct nlmsghdr *hdr, 168 struct nl_pstate *npt) 169 { 170 switch (netlink_family) { 171 case NETLINK_ROUTE: 172 return (rtnl_from_linux(hdr, npt)); 173 } 174 175 return (hdr); 176 } 177 178 179 /************************************************************ 180 * Kernel -> Linux 181 ************************************************************/ 182 183 static bool 184 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw) 185 { 186 char *out_hdr; 187 out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char); 188 189 if (out_hdr != NULL) { 190 memcpy(out_hdr, hdr, hdr->nlmsg_len); 191 return (true); 192 } 193 return (false); 194 } 195 196 static bool 197 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw) 198 { 199 return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type, 200 hdr->nlmsg_flags, 0)); 201 } 202 203 static void * 204 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz) 205 { 206 void *next_hdr = nlmsg_reserve_data(nw, sz, void); 207 memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz)); 208 209 return (next_hdr); 210 } 211 #define nlmsg_copy_next_header(_hdr, _ns, _t) \ 212 ((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t)))) 213 214 static bool 215 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw) 216 { 217 struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr); 218 if (nla != NULL) { 219 memcpy(nla, nla_orig, nla_orig->nla_len); 220 return (true); 221 } 222 return (false); 223 } 224 225 /* 226 * Translate a FreeBSD interface name to a Linux interface name. 227 */ 228 static bool 229 nlmsg_translate_ifname_nla(struct nlattr *nla, struct nl_writer *nw) 230 { 231 char ifname[LINUX_IFNAMSIZ]; 232 233 if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname, 234 sizeof(ifname)) <= 0) 235 return (false); 236 return (nlattr_add_string(nw, IFLA_IFNAME, ifname)); 237 } 238 239 #define LINUX_NLA_UNHANDLED -1 240 /* 241 * Translate a FreeBSD attribute to a Linux attribute. 242 * Returns LINUX_NLA_UNHANDLED when the attribute is not processed 243 * and the caller must take care of it, otherwise the result is returned. 244 */ 245 static int 246 nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla, 247 struct nl_writer *nw) 248 { 249 250 switch (hdr->nlmsg_type) { 251 case NL_RTM_NEWLINK: 252 case NL_RTM_DELLINK: 253 case NL_RTM_GETLINK: 254 switch (nla->nla_type) { 255 case IFLA_IFNAME: 256 return (nlmsg_translate_ifname_nla(nla, nw)); 257 default: 258 break; 259 } 260 default: 261 break; 262 } 263 return (LINUX_NLA_UNHANDLED); 264 } 265 266 static bool 267 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw) 268 { 269 struct nlattr *nla; 270 int ret; 271 272 int hdrlen = NETLINK_ALIGN(raw_hdrlen); 273 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 274 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 275 276 NLA_FOREACH(nla, nla_head, attrs_len) { 277 RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len); 278 if (nla->nla_len < sizeof(struct nlattr)) { 279 return (false); 280 } 281 ret = nlmsg_translate_all_nla(hdr, nla, nw); 282 if (ret == LINUX_NLA_UNHANDLED) 283 ret = nlmsg_copy_nla(nla, nw); 284 if (!ret) 285 return (false); 286 } 287 return (true); 288 } 289 #undef LINUX_NLA_UNHANDLED 290 291 static unsigned int 292 rtnl_if_flags_to_linux(unsigned int if_flags) 293 { 294 unsigned int result = 0; 295 296 for (int i = 0; i < 31; i++) { 297 unsigned int flag = 1 << i; 298 if (!(flag & if_flags)) 299 continue; 300 switch (flag) { 301 case IFF_UP: 302 case IFF_BROADCAST: 303 case IFF_DEBUG: 304 case IFF_LOOPBACK: 305 case IFF_POINTOPOINT: 306 case IFF_DRV_RUNNING: 307 case IFF_NOARP: 308 case IFF_PROMISC: 309 case IFF_ALLMULTI: 310 result |= flag; 311 break; 312 case IFF_NEEDSEPOCH: 313 case IFF_DRV_OACTIVE: 314 case IFF_SIMPLEX: 315 case IFF_LINK0: 316 case IFF_LINK1: 317 case IFF_LINK2: 318 case IFF_CANTCONFIG: 319 case IFF_PPROMISC: 320 case IFF_MONITOR: 321 case IFF_STATICARP: 322 case IFF_STICKYARP: 323 case IFF_DYING: 324 case IFF_RENAMING: 325 /* No Linux analogue */ 326 break; 327 case IFF_MULTICAST: 328 result |= 1 << 12; 329 } 330 } 331 return (result); 332 } 333 334 static bool 335 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 336 struct nl_writer *nw) 337 { 338 if (!nlmsg_copy_header(hdr, nw)) 339 return (false); 340 341 struct ifinfomsg *ifinfo; 342 ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg); 343 344 ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family); 345 /* Convert interface type */ 346 switch (ifinfo->ifi_type) { 347 case IFT_ETHER: 348 ifinfo->ifi_type = LINUX_ARPHRD_ETHER; 349 break; 350 } 351 ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags); 352 353 /* Copy attributes unchanged */ 354 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw)) 355 return (false); 356 357 /* make ip(8) happy */ 358 if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue")) 359 return (false); 360 361 if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000)) 362 return (false); 363 364 nlmsg_end(nw); 365 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 366 return (true); 367 } 368 369 static bool 370 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 371 struct nl_writer *nw) 372 { 373 if (!nlmsg_copy_header(hdr, nw)) 374 return (false); 375 376 struct ifaddrmsg *ifamsg; 377 ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg); 378 379 ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family); 380 /* XXX: fake ifa_flags? */ 381 382 /* Copy attributes unchanged */ 383 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw)) 384 return (false); 385 386 nlmsg_end(nw); 387 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 388 return (true); 389 } 390 391 static bool 392 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 393 struct nl_writer *nw) 394 { 395 if (!nlmsg_copy_header(hdr, nw)) 396 return (false); 397 398 struct ndmsg *ndm; 399 ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg); 400 401 ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family); 402 403 /* Copy attributes unchanged */ 404 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw)) 405 return (false); 406 407 nlmsg_end(nw); 408 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 409 return (true); 410 } 411 412 static bool 413 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 414 struct nl_writer *nw) 415 { 416 if (!nlmsg_copy_header(hdr, nw)) 417 return (false); 418 419 struct rtmsg *rtm; 420 rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg); 421 rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family); 422 423 struct nlattr *nla; 424 425 int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg)); 426 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 427 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 428 429 NLA_FOREACH(nla, nla_head, attrs_len) { 430 struct rtattr *rta = (struct rtattr *)nla; 431 //RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len); 432 if (rta->rta_len < sizeof(struct rtattr)) { 433 break; 434 } 435 436 switch (rta->rta_type) { 437 case NL_RTA_TABLE: 438 { 439 uint32_t fibnum; 440 fibnum = _rta_get_uint32(rta); 441 if (fibnum == 0) 442 fibnum = 254; 443 RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum); 444 if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum)) 445 return (false); 446 } 447 break; 448 default: 449 if (!nlmsg_copy_nla(nla, nw)) 450 return (false); 451 break; 452 } 453 } 454 455 nlmsg_end(nw); 456 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 457 return (true); 458 } 459 460 static bool 461 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 462 { 463 RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type); 464 465 switch (hdr->nlmsg_type) { 466 case NL_RTM_NEWLINK: 467 case NL_RTM_DELLINK: 468 case NL_RTM_GETLINK: 469 return (rtnl_newlink_to_linux(hdr, nlp, nw)); 470 case NL_RTM_NEWADDR: 471 case NL_RTM_DELADDR: 472 return (rtnl_newaddr_to_linux(hdr, nlp, nw)); 473 case NL_RTM_NEWROUTE: 474 case NL_RTM_DELROUTE: 475 return (rtnl_newroute_to_linux(hdr, nlp, nw)); 476 case NL_RTM_NEWNEIGH: 477 case NL_RTM_DELNEIGH: 478 case NL_RTM_GETNEIGH: 479 return (rtnl_newneigh_to_linux(hdr, nlp, nw)); 480 default: 481 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 482 hdr->nlmsg_type); 483 return (handle_default_out(hdr, nw)); 484 } 485 } 486 487 static bool 488 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 489 { 490 if (!nlmsg_copy_header(hdr, nw)) 491 return (false); 492 493 struct nlmsgerr *nlerr; 494 nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr); 495 nlerr->error = bsd_to_linux_errno(nlerr->error); 496 497 int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr); 498 if (hdr->nlmsg_len == copied_len) { 499 nlmsg_end(nw); 500 return (true); 501 } 502 503 /* 504 * CAP_ACK was not set. Original request needs to be translated. 505 * XXX: implement translation of the original message 506 */ 507 RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated", 508 nlerr->msg.nlmsg_type); 509 char *dst_payload, *src_payload; 510 int copy_len = hdr->nlmsg_len - copied_len; 511 dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char); 512 513 src_payload = (char *)hdr + copied_len; 514 515 memcpy(dst_payload, src_payload, copy_len); 516 nlmsg_end(nw); 517 518 return (true); 519 } 520 521 static bool 522 nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp, 523 struct nl_writer *nw) 524 { 525 if (hdr->nlmsg_type < NLMSG_MIN_TYPE) { 526 switch (hdr->nlmsg_type) { 527 case NLMSG_ERROR: 528 return (nlmsg_error_to_linux(hdr, nlp, nw)); 529 case NLMSG_NOOP: 530 case NLMSG_DONE: 531 case NLMSG_OVERRUN: 532 return (handle_default_out(hdr, nw)); 533 default: 534 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 535 hdr->nlmsg_type); 536 return (handle_default_out(hdr, nw)); 537 } 538 } 539 540 switch (netlink_family) { 541 case NETLINK_ROUTE: 542 return (rtnl_to_linux(hdr, nlp, nw)); 543 default: 544 return (handle_default_out(hdr, nw)); 545 } 546 } 547 548 static struct mbuf * 549 nlmsgs_to_linux(int netlink_family, char *buf, int data_length, struct nlpcb *nlp) 550 { 551 RT_LOG(LOG_DEBUG3, "LINUX: get %p size %d", buf, data_length); 552 struct nl_writer nw = {}; 553 554 struct mbuf *m = NULL; 555 if (!nlmsg_get_chain_writer(&nw, data_length, &m)) { 556 RT_LOG(LOG_DEBUG, "unable to setup chain writer for size %d", 557 data_length); 558 return (NULL); 559 } 560 561 /* Assume correct headers. Buffer IS mutable */ 562 int count = 0; 563 for (int offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) { 564 struct nlmsghdr *hdr = (struct nlmsghdr *)&buf[offset]; 565 int msglen = NLMSG_ALIGN(hdr->nlmsg_len); 566 count++; 567 568 if (!nlmsg_to_linux(netlink_family, hdr, nlp, &nw)) { 569 RT_LOG(LOG_DEBUG, "failed to process msg type %d", 570 hdr->nlmsg_type); 571 m_freem(m); 572 return (NULL); 573 } 574 offset += msglen; 575 } 576 nlmsg_flush(&nw); 577 RT_LOG(LOG_DEBUG3, "Processed %d messages, chain size %d", count, 578 m ? m_length(m, NULL) : 0); 579 580 return (m); 581 } 582 583 static struct mbuf * 584 mbufs_to_linux(int netlink_family, struct mbuf *m, struct nlpcb *nlp) 585 { 586 /* XXX: easiest solution, not optimized for performance */ 587 int data_length = m_length(m, NULL); 588 char *buf = malloc(data_length, M_LINUX, M_NOWAIT); 589 if (buf == NULL) { 590 RT_LOG(LOG_DEBUG, "unable to allocate %d bytes, dropping message", 591 data_length); 592 m_freem(m); 593 return (NULL); 594 } 595 m_copydata(m, 0, data_length, buf); 596 m_freem(m); 597 598 m = nlmsgs_to_linux(netlink_family, buf, data_length, nlp); 599 free(buf, M_LINUX); 600 601 return (m); 602 } 603 604 static struct linux_netlink_provider linux_netlink_v1 = { 605 .mbufs_to_linux = mbufs_to_linux, 606 .msgs_to_linux = nlmsgs_to_linux, 607 .msg_from_linux = nlmsg_from_linux, 608 }; 609 610 void 611 linux_netlink_register(void) 612 { 613 linux_netlink_p = &linux_netlink_v1; 614 } 615 616 void 617 linux_netlink_deregister(void) 618 { 619 linux_netlink_p = NULL; 620 } 621