1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 #include "opt_inet.h" 30 #include "opt_inet6.h" 31 #include "opt_netlink.h" 32 33 #include <sys/types.h> 34 #include <sys/ck.h> 35 #include <sys/lock.h> 36 #include <sys/malloc.h> 37 #include <sys/rmlock.h> 38 #include <sys/socket.h> 39 #include <sys/vnode.h> 40 41 #include <net/if.h> 42 #include <net/if_dl.h> 43 #include <net/route.h> 44 #include <net/route/nhop.h> 45 #include <net/route/route_ctl.h> 46 #include <netlink/netlink.h> 47 #include <netlink/netlink_ctl.h> 48 #include <netlink/netlink_linux.h> 49 #include <netlink/netlink_route.h> 50 51 #include <compat/linux/linux.h> 52 #include <compat/linux/linux_common.h> 53 #include <compat/linux/linux_util.h> 54 55 #define DEBUG_MOD_NAME nl_linux 56 #define DEBUG_MAX_LEVEL LOG_DEBUG3 57 #include <netlink/netlink_debug.h> 58 _DECLARE_DEBUG(LOG_INFO); 59 60 static bool 61 valid_rta_size(const struct rtattr *rta, int sz) 62 { 63 return (NL_RTA_DATA_LEN(rta) == sz); 64 } 65 66 static bool 67 valid_rta_u32(const struct rtattr *rta) 68 { 69 return (valid_rta_size(rta, sizeof(uint32_t))); 70 } 71 72 static uint32_t 73 _rta_get_uint32(const struct rtattr *rta) 74 { 75 return (*((const uint32_t *)NL_RTA_DATA_CONST(rta))); 76 } 77 78 static struct nlmsghdr * 79 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 80 { 81 struct ndmsg *ndm = (struct ndmsg *)(hdr + 1); 82 83 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ndmsg)) 84 ndm->ndm_family = linux_to_bsd_domain(ndm->ndm_family); 85 86 return (hdr); 87 } 88 89 static struct nlmsghdr * 90 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 91 { 92 struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1); 93 94 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg)) 95 ifam->ifa_family = linux_to_bsd_domain(ifam->ifa_family); 96 97 return (hdr); 98 } 99 100 static struct nlmsghdr * 101 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 102 { 103 /* Tweak address families and default fib only */ 104 struct rtmsg *rtm = (struct rtmsg *)(hdr + 1); 105 struct nlattr *nla, *nla_head; 106 int attrs_len; 107 108 rtm->rtm_family = linux_to_bsd_domain(rtm->rtm_family); 109 110 if (rtm->rtm_table == 254) 111 rtm->rtm_table = 0; 112 113 attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr); 114 attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg)); 115 nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg))); 116 117 NLA_FOREACH(nla, nla_head, attrs_len) { 118 RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d", 119 nla->nla_type, nla->nla_len, attrs_len); 120 struct rtattr *rta = (struct rtattr *)nla; 121 if (rta->rta_len < sizeof(struct rtattr)) { 122 break; 123 } 124 switch (rta->rta_type) { 125 case NL_RTA_TABLE: 126 if (!valid_rta_u32(rta)) 127 goto done; 128 rtm->rtm_table = 0; 129 uint32_t fibnum = _rta_get_uint32(rta); 130 RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum); 131 if (fibnum == 254) { 132 *((uint32_t *)NL_RTA_DATA(rta)) = 0; 133 } 134 break; 135 } 136 } 137 138 done: 139 return (hdr); 140 } 141 142 static struct nlmsghdr * 143 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 144 { 145 switch (hdr->nlmsg_type) { 146 case NL_RTM_GETROUTE: 147 case NL_RTM_NEWROUTE: 148 case NL_RTM_DELROUTE: 149 return (rtnl_route_from_linux(hdr, npt)); 150 case NL_RTM_GETNEIGH: 151 return (rtnl_neigh_from_linux(hdr, npt)); 152 case NL_RTM_GETADDR: 153 return (rtnl_ifaddr_from_linux(hdr, npt)); 154 /* Silence warning for the messages where no translation is required */ 155 case NL_RTM_NEWLINK: 156 case NL_RTM_DELLINK: 157 case NL_RTM_GETLINK: 158 break; 159 default: 160 RT_LOG(LOG_DEBUG, "Passing message type %d untranslated", 161 hdr->nlmsg_type); 162 } 163 164 return (hdr); 165 } 166 167 static struct nlmsghdr * 168 nlmsg_from_linux(int netlink_family, struct nlmsghdr *hdr, 169 struct nl_pstate *npt) 170 { 171 switch (netlink_family) { 172 case NETLINK_ROUTE: 173 return (rtnl_from_linux(hdr, npt)); 174 } 175 176 return (hdr); 177 } 178 179 180 /************************************************************ 181 * Kernel -> Linux 182 ************************************************************/ 183 184 static bool 185 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw) 186 { 187 char *out_hdr; 188 out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char); 189 190 if (out_hdr != NULL) { 191 memcpy(out_hdr, hdr, hdr->nlmsg_len); 192 return (true); 193 } 194 return (false); 195 } 196 197 static bool 198 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw) 199 { 200 return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type, 201 hdr->nlmsg_flags, 0)); 202 } 203 204 static void * 205 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz) 206 { 207 void *next_hdr = nlmsg_reserve_data(nw, sz, void); 208 memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz)); 209 210 return (next_hdr); 211 } 212 #define nlmsg_copy_next_header(_hdr, _ns, _t) \ 213 ((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t)))) 214 215 static bool 216 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw) 217 { 218 struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr); 219 if (nla != NULL) { 220 memcpy(nla, nla_orig, nla_orig->nla_len); 221 return (true); 222 } 223 return (false); 224 } 225 226 /* 227 * Translate a FreeBSD interface name to a Linux interface name. 228 */ 229 static bool 230 nlmsg_translate_ifname_nla(struct nlattr *nla, struct nl_writer *nw) 231 { 232 char ifname[LINUX_IFNAMSIZ]; 233 234 if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname, 235 sizeof(ifname)) <= 0) 236 return (false); 237 return (nlattr_add_string(nw, IFLA_IFNAME, ifname)); 238 } 239 240 #define LINUX_NLA_UNHANDLED -1 241 /* 242 * Translate a FreeBSD attribute to a Linux attribute. 243 * Returns LINUX_NLA_UNHANDLED when the attribute is not processed 244 * and the caller must take care of it, otherwise the result is returned. 245 */ 246 static int 247 nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla, 248 struct nl_writer *nw) 249 { 250 251 switch (hdr->nlmsg_type) { 252 case NL_RTM_NEWLINK: 253 case NL_RTM_DELLINK: 254 case NL_RTM_GETLINK: 255 switch (nla->nla_type) { 256 case IFLA_IFNAME: 257 return (nlmsg_translate_ifname_nla(nla, nw)); 258 default: 259 break; 260 } 261 default: 262 break; 263 } 264 return (LINUX_NLA_UNHANDLED); 265 } 266 267 static bool 268 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw) 269 { 270 struct nlattr *nla; 271 int ret; 272 273 int hdrlen = NETLINK_ALIGN(raw_hdrlen); 274 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 275 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 276 277 NLA_FOREACH(nla, nla_head, attrs_len) { 278 RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len); 279 if (nla->nla_len < sizeof(struct nlattr)) { 280 return (false); 281 } 282 ret = nlmsg_translate_all_nla(hdr, nla, nw); 283 if (ret == LINUX_NLA_UNHANDLED) 284 ret = nlmsg_copy_nla(nla, nw); 285 if (!ret) 286 return (false); 287 } 288 return (true); 289 } 290 #undef LINUX_NLA_UNHANDLED 291 292 static unsigned int 293 rtnl_if_flags_to_linux(unsigned int if_flags) 294 { 295 unsigned int result = 0; 296 297 for (int i = 0; i < 31; i++) { 298 unsigned int flag = 1 << i; 299 if (!(flag & if_flags)) 300 continue; 301 switch (flag) { 302 case IFF_UP: 303 case IFF_BROADCAST: 304 case IFF_DEBUG: 305 case IFF_LOOPBACK: 306 case IFF_POINTOPOINT: 307 case IFF_DRV_RUNNING: 308 case IFF_NOARP: 309 case IFF_PROMISC: 310 case IFF_ALLMULTI: 311 result |= flag; 312 break; 313 case IFF_NEEDSEPOCH: 314 case IFF_DRV_OACTIVE: 315 case IFF_SIMPLEX: 316 case IFF_LINK0: 317 case IFF_LINK1: 318 case IFF_LINK2: 319 case IFF_CANTCONFIG: 320 case IFF_PPROMISC: 321 case IFF_MONITOR: 322 case IFF_STATICARP: 323 case IFF_STICKYARP: 324 case IFF_DYING: 325 case IFF_RENAMING: 326 /* No Linux analogue */ 327 break; 328 case IFF_MULTICAST: 329 result |= 1 << 12; 330 } 331 } 332 return (result); 333 } 334 335 static bool 336 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 337 struct nl_writer *nw) 338 { 339 if (!nlmsg_copy_header(hdr, nw)) 340 return (false); 341 342 struct ifinfomsg *ifinfo; 343 ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg); 344 345 ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family); 346 /* Convert interface type */ 347 switch (ifinfo->ifi_type) { 348 case IFT_ETHER: 349 ifinfo->ifi_type = LINUX_ARPHRD_ETHER; 350 break; 351 } 352 ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags); 353 354 /* Copy attributes unchanged */ 355 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw)) 356 return (false); 357 358 /* make ip(8) happy */ 359 if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue")) 360 return (false); 361 362 if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000)) 363 return (false); 364 365 nlmsg_end(nw); 366 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 367 return (true); 368 } 369 370 static bool 371 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 372 struct nl_writer *nw) 373 { 374 if (!nlmsg_copy_header(hdr, nw)) 375 return (false); 376 377 struct ifaddrmsg *ifamsg; 378 ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg); 379 380 ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family); 381 /* XXX: fake ifa_flags? */ 382 383 /* Copy attributes unchanged */ 384 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw)) 385 return (false); 386 387 nlmsg_end(nw); 388 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 389 return (true); 390 } 391 392 static bool 393 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 394 struct nl_writer *nw) 395 { 396 if (!nlmsg_copy_header(hdr, nw)) 397 return (false); 398 399 struct ndmsg *ndm; 400 ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg); 401 402 ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family); 403 404 /* Copy attributes unchanged */ 405 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw)) 406 return (false); 407 408 nlmsg_end(nw); 409 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 410 return (true); 411 } 412 413 static bool 414 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 415 struct nl_writer *nw) 416 { 417 if (!nlmsg_copy_header(hdr, nw)) 418 return (false); 419 420 struct rtmsg *rtm; 421 rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg); 422 rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family); 423 424 struct nlattr *nla; 425 426 int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg)); 427 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 428 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 429 430 NLA_FOREACH(nla, nla_head, attrs_len) { 431 struct rtattr *rta = (struct rtattr *)nla; 432 //RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len); 433 if (rta->rta_len < sizeof(struct rtattr)) { 434 break; 435 } 436 437 switch (rta->rta_type) { 438 case NL_RTA_TABLE: 439 { 440 uint32_t fibnum; 441 fibnum = _rta_get_uint32(rta); 442 if (fibnum == 0) 443 fibnum = 254; 444 RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum); 445 if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum)) 446 return (false); 447 } 448 break; 449 default: 450 if (!nlmsg_copy_nla(nla, nw)) 451 return (false); 452 break; 453 } 454 } 455 456 nlmsg_end(nw); 457 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 458 return (true); 459 } 460 461 static bool 462 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 463 { 464 RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type); 465 466 switch (hdr->nlmsg_type) { 467 case NL_RTM_NEWLINK: 468 case NL_RTM_DELLINK: 469 case NL_RTM_GETLINK: 470 return (rtnl_newlink_to_linux(hdr, nlp, nw)); 471 case NL_RTM_NEWADDR: 472 case NL_RTM_DELADDR: 473 return (rtnl_newaddr_to_linux(hdr, nlp, nw)); 474 case NL_RTM_NEWROUTE: 475 case NL_RTM_DELROUTE: 476 return (rtnl_newroute_to_linux(hdr, nlp, nw)); 477 case NL_RTM_NEWNEIGH: 478 case NL_RTM_DELNEIGH: 479 case NL_RTM_GETNEIGH: 480 return (rtnl_newneigh_to_linux(hdr, nlp, nw)); 481 default: 482 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 483 hdr->nlmsg_type); 484 return (handle_default_out(hdr, nw)); 485 } 486 } 487 488 static bool 489 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 490 { 491 if (!nlmsg_copy_header(hdr, nw)) 492 return (false); 493 494 struct nlmsgerr *nlerr; 495 nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr); 496 nlerr->error = bsd_to_linux_errno(nlerr->error); 497 498 int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr); 499 if (hdr->nlmsg_len == copied_len) { 500 nlmsg_end(nw); 501 return (true); 502 } 503 504 /* 505 * CAP_ACK was not set. Original request needs to be translated. 506 * XXX: implement translation of the original message 507 */ 508 RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated", 509 nlerr->msg.nlmsg_type); 510 char *dst_payload, *src_payload; 511 int copy_len = hdr->nlmsg_len - copied_len; 512 dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char); 513 514 src_payload = (char *)hdr + copied_len; 515 516 memcpy(dst_payload, src_payload, copy_len); 517 nlmsg_end(nw); 518 519 return (true); 520 } 521 522 static bool 523 nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp, 524 struct nl_writer *nw) 525 { 526 if (hdr->nlmsg_type < NLMSG_MIN_TYPE) { 527 switch (hdr->nlmsg_type) { 528 case NLMSG_ERROR: 529 return (nlmsg_error_to_linux(hdr, nlp, nw)); 530 case NLMSG_NOOP: 531 case NLMSG_DONE: 532 case NLMSG_OVERRUN: 533 return (handle_default_out(hdr, nw)); 534 default: 535 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 536 hdr->nlmsg_type); 537 return (handle_default_out(hdr, nw)); 538 } 539 } 540 541 switch (netlink_family) { 542 case NETLINK_ROUTE: 543 return (rtnl_to_linux(hdr, nlp, nw)); 544 default: 545 return (handle_default_out(hdr, nw)); 546 } 547 } 548 549 static struct mbuf * 550 nlmsgs_to_linux(int netlink_family, char *buf, int data_length, struct nlpcb *nlp) 551 { 552 RT_LOG(LOG_DEBUG3, "LINUX: get %p size %d", buf, data_length); 553 struct nl_writer nw = {}; 554 555 struct mbuf *m = NULL; 556 if (!nlmsg_get_chain_writer(&nw, data_length, &m)) { 557 RT_LOG(LOG_DEBUG, "unable to setup chain writer for size %d", 558 data_length); 559 return (NULL); 560 } 561 562 /* Assume correct headers. Buffer IS mutable */ 563 int count = 0; 564 for (int offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) { 565 struct nlmsghdr *hdr = (struct nlmsghdr *)&buf[offset]; 566 int msglen = NLMSG_ALIGN(hdr->nlmsg_len); 567 count++; 568 569 if (!nlmsg_to_linux(netlink_family, hdr, nlp, &nw)) { 570 RT_LOG(LOG_DEBUG, "failed to process msg type %d", 571 hdr->nlmsg_type); 572 m_freem(m); 573 return (NULL); 574 } 575 offset += msglen; 576 } 577 nlmsg_flush(&nw); 578 RT_LOG(LOG_DEBUG3, "Processed %d messages, chain size %d", count, 579 m ? m_length(m, NULL) : 0); 580 581 return (m); 582 } 583 584 static struct mbuf * 585 mbufs_to_linux(int netlink_family, struct mbuf *m, struct nlpcb *nlp) 586 { 587 /* XXX: easiest solution, not optimized for performance */ 588 int data_length = m_length(m, NULL); 589 char *buf = malloc(data_length, M_LINUX, M_NOWAIT); 590 if (buf == NULL) { 591 RT_LOG(LOG_DEBUG, "unable to allocate %d bytes, dropping message", 592 data_length); 593 m_freem(m); 594 return (NULL); 595 } 596 m_copydata(m, 0, data_length, buf); 597 m_freem(m); 598 599 m = nlmsgs_to_linux(netlink_family, buf, data_length, nlp); 600 free(buf, M_LINUX); 601 602 return (m); 603 } 604 605 static struct linux_netlink_provider linux_netlink_v1 = { 606 .mbufs_to_linux = mbufs_to_linux, 607 .msgs_to_linux = nlmsgs_to_linux, 608 .msg_from_linux = nlmsg_from_linux, 609 }; 610 611 void 612 linux_netlink_register(void) 613 { 614 linux_netlink_p = &linux_netlink_v1; 615 } 616 617 void 618 linux_netlink_deregister(void) 619 { 620 linux_netlink_p = NULL; 621 } 622