1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "opt_inet.h" 29 #include "opt_inet6.h" 30 31 #include <sys/types.h> 32 #include <sys/ck.h> 33 #include <sys/lock.h> 34 #include <sys/malloc.h> 35 #include <sys/socket.h> 36 #include <sys/vnode.h> 37 38 #include <net/if.h> 39 #include <net/if_dl.h> 40 #include <net/route.h> 41 #include <net/route/nhop.h> 42 #include <net/route/route_ctl.h> 43 #include <netlink/netlink.h> 44 #include <netlink/netlink_ctl.h> 45 #include <netlink/netlink_linux.h> 46 #include <netlink/netlink_var.h> 47 #include <netlink/netlink_route.h> 48 49 #include <compat/linux/linux.h> 50 #include <compat/linux/linux_common.h> 51 #include <compat/linux/linux_util.h> 52 53 #define DEBUG_MOD_NAME nl_linux 54 #define DEBUG_MAX_LEVEL LOG_DEBUG3 55 #include <netlink/netlink_debug.h> 56 _DECLARE_DEBUG(LOG_INFO); 57 58 static bool 59 valid_rta_size(const struct rtattr *rta, int sz) 60 { 61 return (NL_RTA_DATA_LEN(rta) == sz); 62 } 63 64 static bool 65 valid_rta_u32(const struct rtattr *rta) 66 { 67 return (valid_rta_size(rta, sizeof(uint32_t))); 68 } 69 70 static uint32_t 71 _rta_get_uint32(const struct rtattr *rta) 72 { 73 return (*((const uint32_t *)NL_RTA_DATA_CONST(rta))); 74 } 75 76 static struct nlmsghdr * 77 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 78 { 79 struct ndmsg *ndm = (struct ndmsg *)(hdr + 1); 80 81 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ndmsg)) 82 ndm->ndm_family = linux_to_bsd_domain(ndm->ndm_family); 83 84 return (hdr); 85 } 86 87 static struct nlmsghdr * 88 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 89 { 90 struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1); 91 92 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg)) 93 ifam->ifa_family = linux_to_bsd_domain(ifam->ifa_family); 94 95 return (hdr); 96 } 97 98 static struct nlmsghdr * 99 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 100 { 101 /* Tweak address families and default fib only */ 102 struct rtmsg *rtm = (struct rtmsg *)(hdr + 1); 103 struct nlattr *nla, *nla_head; 104 int attrs_len; 105 106 rtm->rtm_family = linux_to_bsd_domain(rtm->rtm_family); 107 108 if (rtm->rtm_table == 254) 109 rtm->rtm_table = 0; 110 111 attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr); 112 attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg)); 113 nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg))); 114 115 NLA_FOREACH(nla, nla_head, attrs_len) { 116 RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d", 117 nla->nla_type, nla->nla_len, attrs_len); 118 struct rtattr *rta = (struct rtattr *)nla; 119 if (rta->rta_len < sizeof(struct rtattr)) { 120 break; 121 } 122 switch (rta->rta_type) { 123 case NL_RTA_TABLE: 124 if (!valid_rta_u32(rta)) 125 goto done; 126 rtm->rtm_table = 0; 127 uint32_t fibnum = _rta_get_uint32(rta); 128 RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum); 129 if (fibnum == 254) { 130 *((uint32_t *)NL_RTA_DATA(rta)) = 0; 131 } 132 break; 133 } 134 } 135 136 done: 137 return (hdr); 138 } 139 140 static struct nlmsghdr * 141 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 142 { 143 switch (hdr->nlmsg_type) { 144 case NL_RTM_GETROUTE: 145 case NL_RTM_NEWROUTE: 146 case NL_RTM_DELROUTE: 147 return (rtnl_route_from_linux(hdr, npt)); 148 case NL_RTM_GETNEIGH: 149 return (rtnl_neigh_from_linux(hdr, npt)); 150 case NL_RTM_GETADDR: 151 return (rtnl_ifaddr_from_linux(hdr, npt)); 152 /* Silence warning for the messages where no translation is required */ 153 case NL_RTM_NEWLINK: 154 case NL_RTM_DELLINK: 155 case NL_RTM_GETLINK: 156 break; 157 default: 158 RT_LOG(LOG_DEBUG, "Passing message type %d untranslated", 159 hdr->nlmsg_type); 160 } 161 162 return (hdr); 163 } 164 165 static struct nlmsghdr * 166 nlmsg_from_linux(int netlink_family, struct nlmsghdr *hdr, 167 struct nl_pstate *npt) 168 { 169 switch (netlink_family) { 170 case NETLINK_ROUTE: 171 return (rtnl_from_linux(hdr, npt)); 172 } 173 174 return (hdr); 175 } 176 177 178 /************************************************************ 179 * Kernel -> Linux 180 ************************************************************/ 181 182 static bool 183 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw) 184 { 185 char *out_hdr; 186 out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char); 187 188 if (out_hdr != NULL) { 189 memcpy(out_hdr, hdr, hdr->nlmsg_len); 190 nw->num_messages++; 191 return (true); 192 } 193 return (false); 194 } 195 196 static bool 197 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw) 198 { 199 return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type, 200 hdr->nlmsg_flags, 0)); 201 } 202 203 static void * 204 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz) 205 { 206 void *next_hdr = nlmsg_reserve_data(nw, sz, void); 207 memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz)); 208 209 return (next_hdr); 210 } 211 #define nlmsg_copy_next_header(_hdr, _ns, _t) \ 212 ((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t)))) 213 214 static bool 215 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw) 216 { 217 struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr); 218 if (nla != NULL) { 219 memcpy(nla, nla_orig, nla_orig->nla_len); 220 return (true); 221 } 222 return (false); 223 } 224 225 /* 226 * Translate a FreeBSD interface name to a Linux interface name. 227 */ 228 static bool 229 nlmsg_translate_ifname_nla(struct nlattr *nla, struct nl_writer *nw) 230 { 231 char ifname[LINUX_IFNAMSIZ]; 232 233 if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname, 234 sizeof(ifname)) <= 0) 235 return (false); 236 return (nlattr_add_string(nw, IFLA_IFNAME, ifname)); 237 } 238 239 #define LINUX_NLA_UNHANDLED -1 240 /* 241 * Translate a FreeBSD attribute to a Linux attribute. 242 * Returns LINUX_NLA_UNHANDLED when the attribute is not processed 243 * and the caller must take care of it, otherwise the result is returned. 244 */ 245 static int 246 nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla, 247 struct nl_writer *nw) 248 { 249 250 switch (hdr->nlmsg_type) { 251 case NL_RTM_NEWLINK: 252 case NL_RTM_DELLINK: 253 case NL_RTM_GETLINK: 254 switch (nla->nla_type) { 255 case IFLA_IFNAME: 256 return (nlmsg_translate_ifname_nla(nla, nw)); 257 default: 258 break; 259 } 260 default: 261 break; 262 } 263 return (LINUX_NLA_UNHANDLED); 264 } 265 266 static bool 267 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw) 268 { 269 struct nlattr *nla; 270 int ret; 271 272 int hdrlen = NETLINK_ALIGN(raw_hdrlen); 273 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 274 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 275 276 NLA_FOREACH(nla, nla_head, attrs_len) { 277 RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len); 278 if (nla->nla_len < sizeof(struct nlattr)) { 279 return (false); 280 } 281 ret = nlmsg_translate_all_nla(hdr, nla, nw); 282 if (ret == LINUX_NLA_UNHANDLED) 283 ret = nlmsg_copy_nla(nla, nw); 284 if (!ret) 285 return (false); 286 } 287 return (true); 288 } 289 #undef LINUX_NLA_UNHANDLED 290 291 static unsigned int 292 rtnl_if_flags_to_linux(unsigned int if_flags) 293 { 294 unsigned int result = 0; 295 296 for (int i = 0; i < 31; i++) { 297 unsigned int flag = 1 << i; 298 if (!(flag & if_flags)) 299 continue; 300 switch (flag) { 301 case IFF_UP: 302 case IFF_BROADCAST: 303 case IFF_DEBUG: 304 case IFF_LOOPBACK: 305 case IFF_POINTOPOINT: 306 case IFF_DRV_RUNNING: 307 case IFF_NOARP: 308 case IFF_PROMISC: 309 case IFF_ALLMULTI: 310 result |= flag; 311 break; 312 case IFF_NEEDSEPOCH: 313 case IFF_DRV_OACTIVE: 314 case IFF_SIMPLEX: 315 case IFF_LINK0: 316 case IFF_LINK1: 317 case IFF_LINK2: 318 case IFF_CANTCONFIG: 319 case IFF_PPROMISC: 320 case IFF_MONITOR: 321 case IFF_STATICARP: 322 case IFF_STICKYARP: 323 case IFF_DYING: 324 case IFF_RENAMING: 325 /* No Linux analogue */ 326 break; 327 case IFF_MULTICAST: 328 result |= 1 << 12; 329 } 330 } 331 return (result); 332 } 333 334 static bool 335 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 336 struct nl_writer *nw) 337 { 338 if (!nlmsg_copy_header(hdr, nw)) 339 return (false); 340 341 struct ifinfomsg *ifinfo; 342 ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg); 343 344 ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family); 345 /* Convert interface type */ 346 switch (ifinfo->ifi_type) { 347 case IFT_ETHER: 348 ifinfo->ifi_type = LINUX_ARPHRD_ETHER; 349 break; 350 } 351 ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags); 352 353 /* Copy attributes unchanged */ 354 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw)) 355 return (false); 356 357 /* make ip(8) happy */ 358 if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue")) 359 return (false); 360 361 if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000)) 362 return (false); 363 364 nlmsg_end(nw); 365 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 366 return (true); 367 } 368 369 static bool 370 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 371 struct nl_writer *nw) 372 { 373 if (!nlmsg_copy_header(hdr, nw)) 374 return (false); 375 376 struct ifaddrmsg *ifamsg; 377 ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg); 378 379 ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family); 380 /* XXX: fake ifa_flags? */ 381 382 /* Copy attributes unchanged */ 383 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw)) 384 return (false); 385 386 nlmsg_end(nw); 387 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 388 return (true); 389 } 390 391 static bool 392 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 393 struct nl_writer *nw) 394 { 395 if (!nlmsg_copy_header(hdr, nw)) 396 return (false); 397 398 struct ndmsg *ndm; 399 ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg); 400 401 ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family); 402 403 /* Copy attributes unchanged */ 404 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw)) 405 return (false); 406 407 nlmsg_end(nw); 408 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 409 return (true); 410 } 411 412 static bool 413 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 414 struct nl_writer *nw) 415 { 416 if (!nlmsg_copy_header(hdr, nw)) 417 return (false); 418 419 struct rtmsg *rtm; 420 rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg); 421 rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family); 422 423 struct nlattr *nla; 424 425 int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg)); 426 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 427 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 428 429 NLA_FOREACH(nla, nla_head, attrs_len) { 430 struct rtattr *rta = (struct rtattr *)nla; 431 //RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len); 432 if (rta->rta_len < sizeof(struct rtattr)) { 433 break; 434 } 435 436 switch (rta->rta_type) { 437 case NL_RTA_TABLE: 438 { 439 uint32_t fibnum; 440 fibnum = _rta_get_uint32(rta); 441 if (fibnum == 0) 442 fibnum = 254; 443 RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum); 444 if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum)) 445 return (false); 446 } 447 break; 448 default: 449 if (!nlmsg_copy_nla(nla, nw)) 450 return (false); 451 break; 452 } 453 } 454 455 nlmsg_end(nw); 456 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 457 return (true); 458 } 459 460 static bool 461 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 462 { 463 RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type); 464 465 switch (hdr->nlmsg_type) { 466 case NL_RTM_NEWLINK: 467 case NL_RTM_DELLINK: 468 case NL_RTM_GETLINK: 469 return (rtnl_newlink_to_linux(hdr, nlp, nw)); 470 case NL_RTM_NEWADDR: 471 case NL_RTM_DELADDR: 472 return (rtnl_newaddr_to_linux(hdr, nlp, nw)); 473 case NL_RTM_NEWROUTE: 474 case NL_RTM_DELROUTE: 475 return (rtnl_newroute_to_linux(hdr, nlp, nw)); 476 case NL_RTM_NEWNEIGH: 477 case NL_RTM_DELNEIGH: 478 case NL_RTM_GETNEIGH: 479 return (rtnl_newneigh_to_linux(hdr, nlp, nw)); 480 default: 481 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 482 hdr->nlmsg_type); 483 return (handle_default_out(hdr, nw)); 484 } 485 } 486 487 static bool 488 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 489 { 490 if (!nlmsg_copy_header(hdr, nw)) 491 return (false); 492 493 struct nlmsgerr *nlerr; 494 nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr); 495 nlerr->error = bsd_to_linux_errno(nlerr->error); 496 497 int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr); 498 if (hdr->nlmsg_len == copied_len) { 499 nlmsg_end(nw); 500 return (true); 501 } 502 503 /* 504 * CAP_ACK was not set. Original request needs to be translated. 505 * XXX: implement translation of the original message 506 */ 507 RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated", 508 nlerr->msg.nlmsg_type); 509 char *dst_payload, *src_payload; 510 int copy_len = hdr->nlmsg_len - copied_len; 511 dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char); 512 513 src_payload = (char *)hdr + copied_len; 514 515 memcpy(dst_payload, src_payload, copy_len); 516 nlmsg_end(nw); 517 518 return (true); 519 } 520 521 static bool 522 nlmsg_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 523 { 524 if (hdr->nlmsg_type < NLMSG_MIN_TYPE) { 525 switch (hdr->nlmsg_type) { 526 case NLMSG_ERROR: 527 return (nlmsg_error_to_linux(hdr, nlp, nw)); 528 case NLMSG_NOOP: 529 case NLMSG_DONE: 530 case NLMSG_OVERRUN: 531 return (handle_default_out(hdr, nw)); 532 default: 533 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 534 hdr->nlmsg_type); 535 return (handle_default_out(hdr, nw)); 536 } 537 } 538 539 switch (nlp->nl_proto) { 540 case NETLINK_ROUTE: 541 return (rtnl_to_linux(hdr, nlp, nw)); 542 default: 543 return (handle_default_out(hdr, nw)); 544 } 545 } 546 547 static bool 548 nlmsgs_to_linux(struct nl_writer *nw, struct nlpcb *nlp) 549 { 550 struct nl_buf *nb, *orig; 551 u_int offset, msglen, orig_messages __diagused; 552 553 RT_LOG(LOG_DEBUG3, "%p: in %u bytes %u messages", __func__, 554 nw->buf->datalen, nw->num_messages); 555 556 orig = nw->buf; 557 nb = nl_buf_alloc(orig->datalen + SCRATCH_BUFFER_SIZE, M_NOWAIT); 558 if (__predict_false(nb == NULL)) 559 return (false); 560 nw->buf = nb; 561 #ifdef INVARIANTS 562 orig_messages = nw->num_messages; 563 #endif 564 nw->num_messages = 0; 565 566 /* Assume correct headers. Buffer IS mutable */ 567 for (offset = 0; 568 offset + sizeof(struct nlmsghdr) <= orig->datalen; 569 offset += msglen) { 570 struct nlmsghdr *hdr = (struct nlmsghdr *)&orig->data[offset]; 571 572 msglen = NLMSG_ALIGN(hdr->nlmsg_len); 573 if (!nlmsg_to_linux(hdr, nlp, nw)) { 574 RT_LOG(LOG_DEBUG, "failed to process msg type %d", 575 hdr->nlmsg_type); 576 nl_buf_free(nb); 577 return (false); 578 } 579 } 580 581 MPASS(nw->num_messages == orig_messages); 582 MPASS(nw->buf == nb); 583 nl_buf_free(orig); 584 RT_LOG(LOG_DEBUG3, "%p: out %u bytes", __func__, offset); 585 586 return (true); 587 } 588 589 static struct linux_netlink_provider linux_netlink_v1 = { 590 .msgs_to_linux = nlmsgs_to_linux, 591 .msg_from_linux = nlmsg_from_linux, 592 }; 593 594 void 595 linux_netlink_register(void) 596 { 597 linux_netlink_p = &linux_netlink_v1; 598 } 599 600 void 601 linux_netlink_deregister(void) 602 { 603 linux_netlink_p = NULL; 604 } 605