1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "opt_inet.h" 29 #include "opt_inet6.h" 30 31 #include <sys/types.h> 32 #include <sys/ck.h> 33 #include <sys/lock.h> 34 #include <sys/socket.h> 35 #include <sys/vnode.h> 36 37 #include <net/if.h> 38 #include <net/if_dl.h> 39 #include <net/route.h> 40 #include <net/route/nhop.h> 41 #include <net/route/route_ctl.h> 42 #include <netlink/netlink.h> 43 #include <netlink/netlink_ctl.h> 44 #include <netlink/netlink_linux.h> 45 #include <netlink/netlink_var.h> 46 #include <netlink/netlink_route.h> 47 48 #include <compat/linux/linux.h> 49 #include <compat/linux/linux_common.h> 50 #include <compat/linux/linux_util.h> 51 52 #define DEBUG_MOD_NAME nl_linux 53 #define DEBUG_MAX_LEVEL LOG_DEBUG3 54 #include <netlink/netlink_debug.h> 55 _DECLARE_DEBUG(LOG_INFO); 56 57 static bool 58 valid_rta_size(const struct rtattr *rta, int sz) 59 { 60 return (NL_RTA_DATA_LEN(rta) == sz); 61 } 62 63 static bool 64 valid_rta_u32(const struct rtattr *rta) 65 { 66 return (valid_rta_size(rta, sizeof(uint32_t))); 67 } 68 69 static uint32_t 70 _rta_get_uint32(const struct rtattr *rta) 71 { 72 return (*((const uint32_t *)NL_RTA_DATA_CONST(rta))); 73 } 74 75 static int 76 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 77 { 78 struct ndmsg *ndm = (struct ndmsg *)(hdr + 1); 79 sa_family_t f; 80 81 if (hdr->nlmsg_len < sizeof(struct nlmsghdr) + sizeof(struct ndmsg)) 82 return (EBADMSG); 83 if ((f = linux_to_bsd_domain(ndm->ndm_family)) == AF_UNKNOWN) 84 return (EPFNOSUPPORT); 85 86 ndm->ndm_family = f; 87 88 return (0); 89 } 90 91 static int 92 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 93 { 94 struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1); 95 sa_family_t f; 96 97 if (hdr->nlmsg_len < sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg)) 98 return (EBADMSG); 99 if ((f = linux_to_bsd_domain(ifam->ifa_family)) == AF_UNKNOWN) 100 return (EPFNOSUPPORT); 101 102 ifam->ifa_family = f; 103 104 return (0); 105 } 106 107 /* 108 * XXX: in case of error state of hdr is inconsistent. 109 */ 110 static int 111 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 112 { 113 /* Tweak address families and default fib only */ 114 struct rtmsg *rtm = (struct rtmsg *)(hdr + 1); 115 struct nlattr *nla, *nla_head; 116 int attrs_len; 117 sa_family_t f; 118 119 if (hdr->nlmsg_len < sizeof(struct nlmsghdr) + sizeof(struct rtmsg)) 120 return (EBADMSG); 121 if ((f = linux_to_bsd_domain(rtm->rtm_family)) == AF_UNKNOWN) 122 return (EPFNOSUPPORT); 123 rtm->rtm_family = f; 124 125 if (rtm->rtm_table == 254) 126 rtm->rtm_table = 0; 127 128 attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr); 129 attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg)); 130 nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg))); 131 132 NLA_FOREACH(nla, nla_head, attrs_len) { 133 RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d", 134 nla->nla_type, nla->nla_len, attrs_len); 135 struct rtattr *rta = (struct rtattr *)nla; 136 if (rta->rta_len < sizeof(struct rtattr)) { 137 break; 138 } 139 switch (rta->rta_type) { 140 case NL_RTA_TABLE: 141 if (!valid_rta_u32(rta)) 142 return (EBADMSG); 143 rtm->rtm_table = 0; 144 uint32_t fibnum = _rta_get_uint32(rta); 145 RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum); 146 if (fibnum == 254) { 147 *((uint32_t *)NL_RTA_DATA(rta)) = 0; 148 } 149 break; 150 } 151 } 152 153 return (0); 154 } 155 156 static int 157 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 158 { 159 160 switch (hdr->nlmsg_type) { 161 case NL_RTM_GETROUTE: 162 case NL_RTM_NEWROUTE: 163 case NL_RTM_DELROUTE: 164 return (rtnl_route_from_linux(hdr, npt)); 165 case NL_RTM_GETNEIGH: 166 return (rtnl_neigh_from_linux(hdr, npt)); 167 case NL_RTM_GETADDR: 168 return (rtnl_ifaddr_from_linux(hdr, npt)); 169 /* Silence warning for the messages where no translation is required */ 170 case NL_RTM_NEWLINK: 171 case NL_RTM_DELLINK: 172 case NL_RTM_GETLINK: 173 break; 174 default: 175 RT_LOG(LOG_DEBUG, "Passing message type %d untranslated", 176 hdr->nlmsg_type); 177 /* XXXGL: maybe return error? */ 178 } 179 180 return (0); 181 } 182 183 static int 184 nlmsg_from_linux(int netlink_family, struct nlmsghdr **hdr, 185 struct nl_pstate *npt) 186 { 187 switch (netlink_family) { 188 case NETLINK_ROUTE: 189 return (rtnl_from_linux(*hdr, npt)); 190 } 191 192 return (0); 193 } 194 195 196 /************************************************************ 197 * Kernel -> Linux 198 ************************************************************/ 199 200 static bool 201 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw) 202 { 203 char *out_hdr; 204 out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char); 205 206 if (out_hdr != NULL) { 207 memcpy(out_hdr, hdr, hdr->nlmsg_len); 208 nw->num_messages++; 209 return (true); 210 } 211 return (false); 212 } 213 214 static bool 215 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw) 216 { 217 return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type, 218 hdr->nlmsg_flags, 0)); 219 } 220 221 static void * 222 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz) 223 { 224 void *next_hdr = nlmsg_reserve_data(nw, sz, void); 225 memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz)); 226 227 return (next_hdr); 228 } 229 #define nlmsg_copy_next_header(_hdr, _ns, _t) \ 230 ((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t)))) 231 232 static bool 233 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw) 234 { 235 struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr); 236 if (nla != NULL) { 237 memcpy(nla, nla_orig, nla_orig->nla_len); 238 return (true); 239 } 240 return (false); 241 } 242 243 /* 244 * Translate a FreeBSD interface name to a Linux interface name. 245 */ 246 static bool 247 nlmsg_translate_ifname_nla(struct nlattr *nla, struct nl_writer *nw) 248 { 249 char ifname[LINUX_IFNAMSIZ]; 250 251 if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname, 252 sizeof(ifname)) <= 0) 253 return (false); 254 return (nlattr_add_string(nw, IFLA_IFNAME, ifname)); 255 } 256 257 #define LINUX_NLA_UNHANDLED -1 258 /* 259 * Translate a FreeBSD attribute to a Linux attribute. 260 * Returns LINUX_NLA_UNHANDLED when the attribute is not processed 261 * and the caller must take care of it, otherwise the result is returned. 262 */ 263 static int 264 nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla, 265 struct nl_writer *nw) 266 { 267 268 switch (hdr->nlmsg_type) { 269 case NL_RTM_NEWLINK: 270 case NL_RTM_DELLINK: 271 case NL_RTM_GETLINK: 272 switch (nla->nla_type) { 273 case IFLA_IFNAME: 274 return (nlmsg_translate_ifname_nla(nla, nw)); 275 default: 276 break; 277 } 278 default: 279 break; 280 } 281 return (LINUX_NLA_UNHANDLED); 282 } 283 284 static bool 285 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw) 286 { 287 struct nlattr *nla; 288 int ret; 289 290 int hdrlen = NETLINK_ALIGN(raw_hdrlen); 291 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 292 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 293 294 NLA_FOREACH(nla, nla_head, attrs_len) { 295 RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len); 296 if (nla->nla_len < sizeof(struct nlattr)) { 297 return (false); 298 } 299 ret = nlmsg_translate_all_nla(hdr, nla, nw); 300 if (ret == LINUX_NLA_UNHANDLED) 301 ret = nlmsg_copy_nla(nla, nw); 302 if (!ret) 303 return (false); 304 } 305 return (true); 306 } 307 #undef LINUX_NLA_UNHANDLED 308 309 static unsigned int 310 rtnl_if_flags_to_linux(unsigned int if_flags) 311 { 312 unsigned int result = 0; 313 314 for (int i = 0; i < 31; i++) { 315 unsigned int flag = 1 << i; 316 if (!(flag & if_flags)) 317 continue; 318 switch (flag) { 319 case IFF_UP: 320 case IFF_BROADCAST: 321 case IFF_DEBUG: 322 case IFF_LOOPBACK: 323 case IFF_POINTOPOINT: 324 case IFF_DRV_RUNNING: 325 case IFF_NOARP: 326 case IFF_PROMISC: 327 case IFF_ALLMULTI: 328 result |= flag; 329 break; 330 case IFF_NEEDSEPOCH: 331 case IFF_DRV_OACTIVE: 332 case IFF_SIMPLEX: 333 case IFF_LINK0: 334 case IFF_LINK1: 335 case IFF_LINK2: 336 case IFF_CANTCONFIG: 337 case IFF_PPROMISC: 338 case IFF_MONITOR: 339 case IFF_STATICARP: 340 case IFF_STICKYARP: 341 case IFF_DYING: 342 case IFF_RENAMING: 343 /* No Linux analogue */ 344 break; 345 case IFF_MULTICAST: 346 result |= 1 << 12; 347 } 348 } 349 return (result); 350 } 351 352 static bool 353 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 354 struct nl_writer *nw) 355 { 356 if (!nlmsg_copy_header(hdr, nw)) 357 return (false); 358 359 struct ifinfomsg *ifinfo; 360 ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg); 361 362 ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family); 363 /* Convert interface type */ 364 switch (ifinfo->ifi_type) { 365 case IFT_ETHER: 366 ifinfo->ifi_type = LINUX_ARPHRD_ETHER; 367 break; 368 } 369 ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags); 370 371 /* Copy attributes unchanged */ 372 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw)) 373 return (false); 374 375 /* make ip(8) happy */ 376 if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue")) 377 return (false); 378 379 if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000)) 380 return (false); 381 382 nlmsg_end(nw); 383 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 384 return (true); 385 } 386 387 static bool 388 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 389 struct nl_writer *nw) 390 { 391 if (!nlmsg_copy_header(hdr, nw)) 392 return (false); 393 394 struct ifaddrmsg *ifamsg; 395 ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg); 396 397 ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family); 398 /* XXX: fake ifa_flags? */ 399 400 /* Copy attributes unchanged */ 401 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw)) 402 return (false); 403 404 nlmsg_end(nw); 405 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 406 return (true); 407 } 408 409 static bool 410 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 411 struct nl_writer *nw) 412 { 413 if (!nlmsg_copy_header(hdr, nw)) 414 return (false); 415 416 struct ndmsg *ndm; 417 ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg); 418 419 ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family); 420 421 /* Copy attributes unchanged */ 422 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw)) 423 return (false); 424 425 nlmsg_end(nw); 426 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 427 return (true); 428 } 429 430 static bool 431 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 432 struct nl_writer *nw) 433 { 434 if (!nlmsg_copy_header(hdr, nw)) 435 return (false); 436 437 struct rtmsg *rtm; 438 rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg); 439 rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family); 440 441 struct nlattr *nla; 442 443 int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg)); 444 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 445 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 446 447 NLA_FOREACH(nla, nla_head, attrs_len) { 448 struct rtattr *rta = (struct rtattr *)nla; 449 //RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len); 450 if (rta->rta_len < sizeof(struct rtattr)) { 451 break; 452 } 453 454 switch (rta->rta_type) { 455 case NL_RTA_TABLE: 456 { 457 uint32_t fibnum; 458 fibnum = _rta_get_uint32(rta); 459 if (fibnum == 0) 460 fibnum = 254; 461 RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum); 462 if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum)) 463 return (false); 464 } 465 break; 466 default: 467 if (!nlmsg_copy_nla(nla, nw)) 468 return (false); 469 break; 470 } 471 } 472 473 nlmsg_end(nw); 474 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 475 return (true); 476 } 477 478 static bool 479 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 480 { 481 RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type); 482 483 switch (hdr->nlmsg_type) { 484 case NL_RTM_NEWLINK: 485 case NL_RTM_DELLINK: 486 case NL_RTM_GETLINK: 487 return (rtnl_newlink_to_linux(hdr, nlp, nw)); 488 case NL_RTM_NEWADDR: 489 case NL_RTM_DELADDR: 490 return (rtnl_newaddr_to_linux(hdr, nlp, nw)); 491 case NL_RTM_NEWROUTE: 492 case NL_RTM_DELROUTE: 493 return (rtnl_newroute_to_linux(hdr, nlp, nw)); 494 case NL_RTM_NEWNEIGH: 495 case NL_RTM_DELNEIGH: 496 case NL_RTM_GETNEIGH: 497 return (rtnl_newneigh_to_linux(hdr, nlp, nw)); 498 default: 499 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 500 hdr->nlmsg_type); 501 return (handle_default_out(hdr, nw)); 502 } 503 } 504 505 static bool 506 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 507 { 508 if (!nlmsg_copy_header(hdr, nw)) 509 return (false); 510 511 struct nlmsgerr *nlerr; 512 nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr); 513 nlerr->error = bsd_to_linux_errno(nlerr->error); 514 515 int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr); 516 if (hdr->nlmsg_len == copied_len) { 517 nlmsg_end(nw); 518 return (true); 519 } 520 521 /* 522 * CAP_ACK was not set. Original request needs to be translated. 523 * XXX: implement translation of the original message 524 */ 525 RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated", 526 nlerr->msg.nlmsg_type); 527 char *dst_payload, *src_payload; 528 int copy_len = hdr->nlmsg_len - copied_len; 529 dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char); 530 531 src_payload = (char *)hdr + copied_len; 532 533 memcpy(dst_payload, src_payload, copy_len); 534 nlmsg_end(nw); 535 536 return (true); 537 } 538 539 static bool 540 nlmsg_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 541 { 542 if (hdr->nlmsg_type < NLMSG_MIN_TYPE) { 543 switch (hdr->nlmsg_type) { 544 case NLMSG_ERROR: 545 return (nlmsg_error_to_linux(hdr, nlp, nw)); 546 case NLMSG_NOOP: 547 case NLMSG_DONE: 548 case NLMSG_OVERRUN: 549 return (handle_default_out(hdr, nw)); 550 default: 551 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 552 hdr->nlmsg_type); 553 return (handle_default_out(hdr, nw)); 554 } 555 } 556 557 switch (nlp->nl_proto) { 558 case NETLINK_ROUTE: 559 return (rtnl_to_linux(hdr, nlp, nw)); 560 default: 561 return (handle_default_out(hdr, nw)); 562 } 563 } 564 565 static bool 566 nlmsgs_to_linux(struct nl_writer *nw, struct nlpcb *nlp) 567 { 568 struct nl_buf *nb, *orig; 569 u_int offset, msglen, orig_messages; 570 571 RT_LOG(LOG_DEBUG3, "%p: in %u bytes %u messages", __func__, 572 nw->buf->datalen, nw->num_messages); 573 574 orig = nw->buf; 575 nb = nl_buf_alloc(orig->datalen + SCRATCH_BUFFER_SIZE, M_NOWAIT); 576 if (__predict_false(nb == NULL)) 577 return (false); 578 nw->buf = nb; 579 orig_messages = nw->num_messages; 580 nw->num_messages = 0; 581 582 /* Assume correct headers. Buffer IS mutable */ 583 for (offset = 0; 584 offset + sizeof(struct nlmsghdr) <= orig->datalen; 585 offset += msglen) { 586 struct nlmsghdr *hdr = (struct nlmsghdr *)&orig->data[offset]; 587 588 msglen = NLMSG_ALIGN(hdr->nlmsg_len); 589 if (!nlmsg_to_linux(hdr, nlp, nw)) { 590 RT_LOG(LOG_DEBUG, "failed to process msg type %d", 591 hdr->nlmsg_type); 592 nl_buf_free(nb); 593 nw->buf = orig; 594 nw->num_messages = orig_messages; 595 return (false); 596 } 597 } 598 599 MPASS(nw->num_messages == orig_messages); 600 MPASS(nw->buf == nb); 601 nl_buf_free(orig); 602 RT_LOG(LOG_DEBUG3, "%p: out %u bytes", __func__, offset); 603 604 return (true); 605 } 606 607 static struct linux_netlink_provider linux_netlink_v1 = { 608 .msgs_to_linux = nlmsgs_to_linux, 609 .msg_from_linux = nlmsg_from_linux, 610 }; 611 612 void 613 linux_netlink_register(void) 614 { 615 linux_netlink_p = &linux_netlink_v1; 616 } 617 618 void 619 linux_netlink_deregister(void) 620 { 621 linux_netlink_p = NULL; 622 } 623