1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "opt_inet.h" 29 #include "opt_inet6.h" 30 31 #include <sys/types.h> 32 #include <sys/ck.h> 33 #include <sys/lock.h> 34 #include <sys/socket.h> 35 #include <sys/vnode.h> 36 37 #include <net/if.h> 38 #include <net/if_dl.h> 39 #include <net/route.h> 40 #include <net/route/nhop.h> 41 #include <net/route/route_ctl.h> 42 #include <netlink/netlink.h> 43 #include <netlink/netlink_ctl.h> 44 #include <netlink/netlink_linux.h> 45 #include <netlink/netlink_var.h> 46 #include <netlink/netlink_route.h> 47 48 #include <compat/linux/linux.h> 49 #include <compat/linux/linux_common.h> 50 #include <compat/linux/linux_util.h> 51 52 #define DEBUG_MOD_NAME nl_linux 53 #define DEBUG_MAX_LEVEL LOG_DEBUG3 54 #include <netlink/netlink_debug.h> 55 _DECLARE_DEBUG(LOG_INFO); 56 57 static bool 58 valid_rta_size(const struct rtattr *rta, int sz) 59 { 60 return (NL_RTA_DATA_LEN(rta) == sz); 61 } 62 63 static bool 64 valid_rta_u32(const struct rtattr *rta) 65 { 66 return (valid_rta_size(rta, sizeof(uint32_t))); 67 } 68 69 static uint32_t 70 _rta_get_uint32(const struct rtattr *rta) 71 { 72 return (*((const uint32_t *)NL_RTA_DATA_CONST(rta))); 73 } 74 75 static int 76 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 77 { 78 struct ndmsg *ndm = (struct ndmsg *)(hdr + 1); 79 sa_family_t f; 80 81 if (hdr->nlmsg_len < sizeof(struct nlmsghdr) + sizeof(struct ndmsg)) 82 return (EBADMSG); 83 if ((f = linux_to_bsd_domain(ndm->ndm_family)) == AF_UNKNOWN) 84 return (EPFNOSUPPORT); 85 86 ndm->ndm_family = f; 87 88 return (0); 89 } 90 91 static int 92 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 93 { 94 struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1); 95 sa_family_t f; 96 97 if (hdr->nlmsg_len < sizeof(struct nlmsghdr) + 98 offsetof(struct ifaddrmsg, ifa_family) + sizeof(ifam->ifa_family)) 99 return (EBADMSG); 100 if ((f = linux_to_bsd_domain(ifam->ifa_family)) == AF_UNKNOWN) 101 return (EPFNOSUPPORT); 102 103 ifam->ifa_family = f; 104 105 return (0); 106 } 107 108 /* 109 * XXX: in case of error state of hdr is inconsistent. 110 */ 111 static int 112 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 113 { 114 /* Tweak address families and default fib only */ 115 struct rtmsg *rtm = (struct rtmsg *)(hdr + 1); 116 struct nlattr *nla, *nla_head; 117 int attrs_len; 118 sa_family_t f; 119 120 if (hdr->nlmsg_len < sizeof(struct nlmsghdr) + sizeof(struct rtmsg)) 121 return (EBADMSG); 122 if ((f = linux_to_bsd_domain(rtm->rtm_family)) == AF_UNKNOWN) 123 return (EPFNOSUPPORT); 124 rtm->rtm_family = f; 125 126 if (rtm->rtm_table == 254) 127 rtm->rtm_table = 0; 128 129 attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr); 130 attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg)); 131 nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg))); 132 133 NLA_FOREACH(nla, nla_head, attrs_len) { 134 RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d", 135 nla->nla_type, nla->nla_len, attrs_len); 136 struct rtattr *rta = (struct rtattr *)nla; 137 if (rta->rta_len < sizeof(struct rtattr)) { 138 break; 139 } 140 switch (rta->rta_type) { 141 case NL_RTA_TABLE: 142 if (!valid_rta_u32(rta)) 143 return (EBADMSG); 144 rtm->rtm_table = 0; 145 uint32_t fibnum = _rta_get_uint32(rta); 146 RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum); 147 if (fibnum == 254) { 148 *((uint32_t *)NL_RTA_DATA(rta)) = 0; 149 } 150 break; 151 } 152 } 153 154 return (0); 155 } 156 157 static int 158 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 159 { 160 161 switch (hdr->nlmsg_type) { 162 case NL_RTM_GETROUTE: 163 case NL_RTM_NEWROUTE: 164 case NL_RTM_DELROUTE: 165 return (rtnl_route_from_linux(hdr, npt)); 166 case NL_RTM_GETNEIGH: 167 return (rtnl_neigh_from_linux(hdr, npt)); 168 case NL_RTM_GETADDR: 169 return (rtnl_ifaddr_from_linux(hdr, npt)); 170 /* Silence warning for the messages where no translation is required */ 171 case NL_RTM_NEWLINK: 172 case NL_RTM_DELLINK: 173 case NL_RTM_GETLINK: 174 break; 175 default: 176 RT_LOG(LOG_DEBUG, "Passing message type %d untranslated", 177 hdr->nlmsg_type); 178 /* XXXGL: maybe return error? */ 179 } 180 181 return (0); 182 } 183 184 static int 185 nlmsg_from_linux(int netlink_family, struct nlmsghdr **hdr, 186 struct nl_pstate *npt) 187 { 188 switch (netlink_family) { 189 case NETLINK_ROUTE: 190 return (rtnl_from_linux(*hdr, npt)); 191 } 192 193 return (0); 194 } 195 196 197 /************************************************************ 198 * Kernel -> Linux 199 ************************************************************/ 200 201 static bool 202 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw) 203 { 204 char *out_hdr; 205 out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char); 206 207 if (out_hdr != NULL) { 208 memcpy(out_hdr, hdr, hdr->nlmsg_len); 209 nw->num_messages++; 210 return (true); 211 } 212 return (false); 213 } 214 215 static bool 216 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw) 217 { 218 return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type, 219 hdr->nlmsg_flags, 0)); 220 } 221 222 static void * 223 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz) 224 { 225 void *next_hdr = nlmsg_reserve_data(nw, sz, void); 226 memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz)); 227 228 return (next_hdr); 229 } 230 #define nlmsg_copy_next_header(_hdr, _ns, _t) \ 231 ((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t)))) 232 233 static bool 234 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw) 235 { 236 struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr); 237 if (nla != NULL) { 238 memcpy(nla, nla_orig, nla_orig->nla_len); 239 return (true); 240 } 241 return (false); 242 } 243 244 /* 245 * Translate a FreeBSD interface name to a Linux interface name. 246 */ 247 static bool 248 nlmsg_translate_ifname_nla(struct nlattr *nla, struct nl_writer *nw) 249 { 250 char ifname[LINUX_IFNAMSIZ]; 251 252 if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname, 253 sizeof(ifname)) <= 0) 254 return (false); 255 return (nlattr_add_string(nw, IFLA_IFNAME, ifname)); 256 } 257 258 #define LINUX_NLA_UNHANDLED -1 259 /* 260 * Translate a FreeBSD attribute to a Linux attribute. 261 * Returns LINUX_NLA_UNHANDLED when the attribute is not processed 262 * and the caller must take care of it, otherwise the result is returned. 263 */ 264 static int 265 nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla, 266 struct nl_writer *nw) 267 { 268 269 switch (hdr->nlmsg_type) { 270 case NL_RTM_NEWLINK: 271 case NL_RTM_DELLINK: 272 case NL_RTM_GETLINK: 273 switch (nla->nla_type) { 274 case IFLA_IFNAME: 275 return (nlmsg_translate_ifname_nla(nla, nw)); 276 default: 277 break; 278 } 279 default: 280 break; 281 } 282 return (LINUX_NLA_UNHANDLED); 283 } 284 285 static bool 286 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw) 287 { 288 struct nlattr *nla; 289 int ret; 290 291 int hdrlen = NETLINK_ALIGN(raw_hdrlen); 292 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 293 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 294 295 NLA_FOREACH(nla, nla_head, attrs_len) { 296 RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len); 297 if (nla->nla_len < sizeof(struct nlattr)) { 298 return (false); 299 } 300 ret = nlmsg_translate_all_nla(hdr, nla, nw); 301 if (ret == LINUX_NLA_UNHANDLED) 302 ret = nlmsg_copy_nla(nla, nw); 303 if (!ret) 304 return (false); 305 } 306 return (true); 307 } 308 #undef LINUX_NLA_UNHANDLED 309 310 static unsigned int 311 rtnl_if_flags_to_linux(unsigned int if_flags) 312 { 313 unsigned int result = 0; 314 315 for (int i = 0; i < 31; i++) { 316 unsigned int flag = 1 << i; 317 if (!(flag & if_flags)) 318 continue; 319 switch (flag) { 320 case IFF_UP: 321 case IFF_BROADCAST: 322 case IFF_DEBUG: 323 case IFF_LOOPBACK: 324 case IFF_POINTOPOINT: 325 case IFF_DRV_RUNNING: 326 case IFF_NOARP: 327 case IFF_PROMISC: 328 case IFF_ALLMULTI: 329 result |= flag; 330 break; 331 case IFF_NEEDSEPOCH: 332 case IFF_DRV_OACTIVE: 333 case IFF_SIMPLEX: 334 case IFF_LINK0: 335 case IFF_LINK1: 336 case IFF_LINK2: 337 case IFF_CANTCONFIG: 338 case IFF_PPROMISC: 339 case IFF_MONITOR: 340 case IFF_STATICARP: 341 case IFF_STICKYARP: 342 case IFF_DYING: 343 case IFF_RENAMING: 344 /* No Linux analogue */ 345 break; 346 case IFF_MULTICAST: 347 result |= 1 << 12; 348 } 349 } 350 return (result); 351 } 352 353 static bool 354 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 355 struct nl_writer *nw) 356 { 357 if (!nlmsg_copy_header(hdr, nw)) 358 return (false); 359 360 struct ifinfomsg *ifinfo; 361 ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg); 362 363 ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family); 364 /* Convert interface type */ 365 switch (ifinfo->ifi_type) { 366 case IFT_ETHER: 367 ifinfo->ifi_type = LINUX_ARPHRD_ETHER; 368 break; 369 } 370 ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags); 371 372 /* Copy attributes unchanged */ 373 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw)) 374 return (false); 375 376 /* make ip(8) happy */ 377 if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue")) 378 return (false); 379 380 if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000)) 381 return (false); 382 383 nlmsg_end(nw); 384 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 385 return (true); 386 } 387 388 static bool 389 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 390 struct nl_writer *nw) 391 { 392 if (!nlmsg_copy_header(hdr, nw)) 393 return (false); 394 395 struct ifaddrmsg *ifamsg; 396 ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg); 397 398 ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family); 399 /* XXX: fake ifa_flags? */ 400 401 /* Copy attributes unchanged */ 402 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw)) 403 return (false); 404 405 nlmsg_end(nw); 406 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 407 return (true); 408 } 409 410 static bool 411 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 412 struct nl_writer *nw) 413 { 414 if (!nlmsg_copy_header(hdr, nw)) 415 return (false); 416 417 struct ndmsg *ndm; 418 ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg); 419 420 ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family); 421 422 /* Copy attributes unchanged */ 423 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw)) 424 return (false); 425 426 nlmsg_end(nw); 427 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 428 return (true); 429 } 430 431 static bool 432 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 433 struct nl_writer *nw) 434 { 435 if (!nlmsg_copy_header(hdr, nw)) 436 return (false); 437 438 struct rtmsg *rtm; 439 rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg); 440 rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family); 441 442 struct nlattr *nla; 443 444 int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg)); 445 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 446 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 447 448 NLA_FOREACH(nla, nla_head, attrs_len) { 449 struct rtattr *rta = (struct rtattr *)nla; 450 //RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len); 451 if (rta->rta_len < sizeof(struct rtattr)) { 452 break; 453 } 454 455 switch (rta->rta_type) { 456 case NL_RTA_TABLE: 457 { 458 uint32_t fibnum; 459 fibnum = _rta_get_uint32(rta); 460 if (fibnum == 0) 461 fibnum = 254; 462 RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum); 463 if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum)) 464 return (false); 465 } 466 break; 467 default: 468 if (!nlmsg_copy_nla(nla, nw)) 469 return (false); 470 break; 471 } 472 } 473 474 nlmsg_end(nw); 475 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 476 return (true); 477 } 478 479 static bool 480 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 481 { 482 RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type); 483 484 switch (hdr->nlmsg_type) { 485 case NL_RTM_NEWLINK: 486 case NL_RTM_DELLINK: 487 case NL_RTM_GETLINK: 488 return (rtnl_newlink_to_linux(hdr, nlp, nw)); 489 case NL_RTM_NEWADDR: 490 case NL_RTM_DELADDR: 491 return (rtnl_newaddr_to_linux(hdr, nlp, nw)); 492 case NL_RTM_NEWROUTE: 493 case NL_RTM_DELROUTE: 494 return (rtnl_newroute_to_linux(hdr, nlp, nw)); 495 case NL_RTM_NEWNEIGH: 496 case NL_RTM_DELNEIGH: 497 case NL_RTM_GETNEIGH: 498 return (rtnl_newneigh_to_linux(hdr, nlp, nw)); 499 default: 500 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 501 hdr->nlmsg_type); 502 return (handle_default_out(hdr, nw)); 503 } 504 } 505 506 static bool 507 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 508 { 509 if (!nlmsg_copy_header(hdr, nw)) 510 return (false); 511 512 struct nlmsgerr *nlerr; 513 nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr); 514 nlerr->error = bsd_to_linux_errno(nlerr->error); 515 516 int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr); 517 if (hdr->nlmsg_len == copied_len) { 518 nlmsg_end(nw); 519 return (true); 520 } 521 522 /* 523 * CAP_ACK was not set. Original request needs to be translated. 524 * XXX: implement translation of the original message 525 */ 526 RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated", 527 nlerr->msg.nlmsg_type); 528 char *dst_payload, *src_payload; 529 int copy_len = hdr->nlmsg_len - copied_len; 530 dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char); 531 532 src_payload = (char *)hdr + copied_len; 533 534 memcpy(dst_payload, src_payload, copy_len); 535 nlmsg_end(nw); 536 537 return (true); 538 } 539 540 static bool 541 nlmsg_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 542 { 543 if (hdr->nlmsg_type < NLMSG_MIN_TYPE) { 544 switch (hdr->nlmsg_type) { 545 case NLMSG_ERROR: 546 return (nlmsg_error_to_linux(hdr, nlp, nw)); 547 case NLMSG_NOOP: 548 case NLMSG_DONE: 549 case NLMSG_OVERRUN: 550 return (handle_default_out(hdr, nw)); 551 default: 552 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 553 hdr->nlmsg_type); 554 return (handle_default_out(hdr, nw)); 555 } 556 } 557 558 switch (nlp->nl_proto) { 559 case NETLINK_ROUTE: 560 return (rtnl_to_linux(hdr, nlp, nw)); 561 default: 562 return (handle_default_out(hdr, nw)); 563 } 564 } 565 566 static bool 567 nlmsgs_to_linux(struct nl_writer *nw, struct nlpcb *nlp) 568 { 569 struct nl_buf *nb, *orig; 570 u_int offset, msglen, orig_messages; 571 572 RT_LOG(LOG_DEBUG3, "%p: in %u bytes %u messages", __func__, 573 nw->buf->datalen, nw->num_messages); 574 575 orig = nw->buf; 576 nb = nl_buf_alloc(orig->datalen + SCRATCH_BUFFER_SIZE, M_NOWAIT); 577 if (__predict_false(nb == NULL)) 578 return (false); 579 nw->buf = nb; 580 orig_messages = nw->num_messages; 581 nw->num_messages = 0; 582 583 /* Assume correct headers. Buffer IS mutable */ 584 for (offset = 0; 585 offset + sizeof(struct nlmsghdr) <= orig->datalen; 586 offset += msglen) { 587 struct nlmsghdr *hdr = (struct nlmsghdr *)&orig->data[offset]; 588 589 msglen = NLMSG_ALIGN(hdr->nlmsg_len); 590 if (!nlmsg_to_linux(hdr, nlp, nw)) { 591 RT_LOG(LOG_DEBUG, "failed to process msg type %d", 592 hdr->nlmsg_type); 593 nl_buf_free(nb); 594 nw->buf = orig; 595 nw->num_messages = orig_messages; 596 return (false); 597 } 598 } 599 600 MPASS(nw->num_messages == orig_messages); 601 MPASS(nw->buf == nb); 602 nl_buf_free(orig); 603 RT_LOG(LOG_DEBUG3, "%p: out %u bytes", __func__, offset); 604 605 return (true); 606 } 607 608 static struct linux_netlink_provider linux_netlink_v1 = { 609 .msgs_to_linux = nlmsgs_to_linux, 610 .msg_from_linux = nlmsg_from_linux, 611 }; 612 613 void 614 linux_netlink_register(void) 615 { 616 linux_netlink_p = &linux_netlink_v1; 617 } 618 619 void 620 linux_netlink_deregister(void) 621 { 622 linux_netlink_p = NULL; 623 } 624