1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/types.h> 35 #include <sys/ck.h> 36 #include <sys/lock.h> 37 #include <sys/malloc.h> 38 #include <sys/rmlock.h> 39 #include <sys/socket.h> 40 #include <sys/vnode.h> 41 42 #include <net/if.h> 43 #include <net/if_dl.h> 44 #include <net/route.h> 45 #include <net/route/nhop.h> 46 #include <net/route/route_ctl.h> 47 #include <netlink/netlink.h> 48 #include <netlink/netlink_ctl.h> 49 #include <netlink/netlink_linux.h> 50 #include <netlink/netlink_route.h> 51 52 #include <compat/linux/linux.h> 53 #include <compat/linux/linux_common.h> 54 #include <compat/linux/linux_util.h> 55 56 #define DEBUG_MOD_NAME nl_linux 57 #define DEBUG_MAX_LEVEL LOG_DEBUG3 58 #include <netlink/netlink_debug.h> 59 _DECLARE_DEBUG(LOG_DEBUG); 60 61 static bool 62 valid_rta_size(const struct rtattr *rta, int sz) 63 { 64 return (NL_RTA_DATA_LEN(rta) == sz); 65 } 66 67 static bool 68 valid_rta_u32(const struct rtattr *rta) 69 { 70 return (valid_rta_size(rta, sizeof(uint32_t))); 71 } 72 73 static uint32_t 74 _rta_get_uint32(const struct rtattr *rta) 75 { 76 return (*((const uint32_t *)NL_RTA_DATA_CONST(rta))); 77 } 78 79 static struct nlmsghdr * 80 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 81 { 82 struct ndmsg *ndm = (struct ndmsg *)(hdr + 1); 83 84 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ndmsg)) 85 ndm->ndm_family = linux_to_bsd_domain(ndm->ndm_family); 86 87 return (hdr); 88 } 89 90 static struct nlmsghdr * 91 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 92 { 93 struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1); 94 95 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg)) 96 ifam->ifa_family = linux_to_bsd_domain(ifam->ifa_family); 97 98 return (hdr); 99 } 100 101 static struct nlmsghdr * 102 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 103 { 104 /* Tweak address families and default fib only */ 105 struct rtmsg *rtm = (struct rtmsg *)(hdr + 1); 106 struct nlattr *nla, *nla_head; 107 int attrs_len; 108 109 rtm->rtm_family = linux_to_bsd_domain(rtm->rtm_family); 110 111 if (rtm->rtm_table == 254) 112 rtm->rtm_table = 0; 113 114 attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr); 115 attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg)); 116 nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg))); 117 118 NLA_FOREACH(nla, nla_head, attrs_len) { 119 RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d", 120 nla->nla_type, nla->nla_len, attrs_len); 121 struct rtattr *rta = (struct rtattr *)nla; 122 if (rta->rta_len < sizeof(struct rtattr)) { 123 break; 124 } 125 switch (rta->rta_type) { 126 case NL_RTA_TABLE: 127 if (!valid_rta_u32(rta)) 128 goto done; 129 rtm->rtm_table = 0; 130 uint32_t fibnum = _rta_get_uint32(rta); 131 RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum); 132 if (fibnum == 254) { 133 *((uint32_t *)NL_RTA_DATA(rta)) = 0; 134 } 135 break; 136 } 137 } 138 139 done: 140 return (hdr); 141 } 142 143 static struct nlmsghdr * 144 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 145 { 146 switch (hdr->nlmsg_type) { 147 case NL_RTM_GETROUTE: 148 case NL_RTM_NEWROUTE: 149 case NL_RTM_DELROUTE: 150 return (rtnl_route_from_linux(hdr, npt)); 151 case NL_RTM_GETNEIGH: 152 return (rtnl_neigh_from_linux(hdr, npt)); 153 case NL_RTM_GETADDR: 154 return (rtnl_ifaddr_from_linux(hdr, npt)); 155 /* Silence warning for the messages where no translation is required */ 156 case NL_RTM_NEWLINK: 157 case NL_RTM_DELLINK: 158 case NL_RTM_GETLINK: 159 break; 160 default: 161 RT_LOG(LOG_DEBUG, "Passing message type %d untranslated", 162 hdr->nlmsg_type); 163 } 164 165 return (hdr); 166 } 167 168 static struct nlmsghdr * 169 nlmsg_from_linux(int netlink_family, struct nlmsghdr *hdr, 170 struct nl_pstate *npt) 171 { 172 switch (netlink_family) { 173 case NETLINK_ROUTE: 174 return (rtnl_from_linux(hdr, npt)); 175 } 176 177 return (hdr); 178 } 179 180 181 /************************************************************ 182 * Kernel -> Linux 183 ************************************************************/ 184 185 static bool 186 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw) 187 { 188 char *out_hdr; 189 out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char); 190 191 if (out_hdr != NULL) { 192 memcpy(out_hdr, hdr, hdr->nlmsg_len); 193 return (true); 194 } 195 return (false); 196 } 197 198 static bool 199 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw) 200 { 201 return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type, 202 hdr->nlmsg_flags, 0)); 203 } 204 205 static void * 206 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz) 207 { 208 void *next_hdr = nlmsg_reserve_data(nw, sz, void); 209 memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz)); 210 211 return (next_hdr); 212 } 213 #define nlmsg_copy_next_header(_hdr, _ns, _t) \ 214 ((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t)))) 215 216 static bool 217 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw) 218 { 219 struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr); 220 if (nla != NULL) { 221 memcpy(nla, nla_orig, nla_orig->nla_len); 222 return (true); 223 } 224 return (false); 225 } 226 227 static bool 228 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw) 229 { 230 struct nlattr *nla; 231 232 int hdrlen = NETLINK_ALIGN(raw_hdrlen); 233 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 234 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 235 236 NLA_FOREACH(nla, nla_head, attrs_len) { 237 RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len); 238 if (nla->nla_len < sizeof(struct nlattr)) { 239 return (false); 240 } 241 if (!nlmsg_copy_nla(nla, nw)) 242 return (false); 243 } 244 return (true); 245 } 246 247 static unsigned int 248 rtnl_if_flags_to_linux(unsigned int if_flags) 249 { 250 unsigned int result = 0; 251 252 for (int i = 0; i < 31; i++) { 253 unsigned int flag = 1 << i; 254 if (!(flag & if_flags)) 255 continue; 256 switch (flag) { 257 case IFF_UP: 258 case IFF_BROADCAST: 259 case IFF_DEBUG: 260 case IFF_LOOPBACK: 261 case IFF_POINTOPOINT: 262 case IFF_DRV_RUNNING: 263 case IFF_NOARP: 264 case IFF_PROMISC: 265 case IFF_ALLMULTI: 266 result |= flag; 267 break; 268 case IFF_KNOWSEPOCH: 269 case IFF_DRV_OACTIVE: 270 case IFF_SIMPLEX: 271 case IFF_LINK0: 272 case IFF_LINK1: 273 case IFF_LINK2: 274 case IFF_CANTCONFIG: 275 case IFF_PPROMISC: 276 case IFF_MONITOR: 277 case IFF_STATICARP: 278 case IFF_STICKYARP: 279 case IFF_DYING: 280 case IFF_RENAMING: 281 case IFF_NOGROUP: 282 /* No Linux analogue */ 283 break; 284 case IFF_MULTICAST: 285 result |= 1 << 12; 286 } 287 } 288 return (result); 289 } 290 291 static bool 292 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 293 struct nl_writer *nw) 294 { 295 if (!nlmsg_copy_header(hdr, nw)) 296 return (false); 297 298 struct ifinfomsg *ifinfo; 299 ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg); 300 301 ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family); 302 /* Convert interface type */ 303 switch (ifinfo->ifi_type) { 304 case IFT_ETHER: 305 ifinfo->ifi_type = 1; // ARPHRD_ETHER 306 break; 307 } 308 ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags); 309 310 /* Copy attributes unchanged */ 311 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw)) 312 return (false); 313 314 /* make ip(8) happy */ 315 if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue")) 316 return (false); 317 318 if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000)) 319 return (false); 320 321 nlmsg_end(nw); 322 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 323 return (true); 324 } 325 326 static bool 327 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 328 struct nl_writer *nw) 329 { 330 if (!nlmsg_copy_header(hdr, nw)) 331 return (false); 332 333 struct ifaddrmsg *ifamsg; 334 ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg); 335 336 ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family); 337 /* XXX: fake ifa_flags? */ 338 339 /* Copy attributes unchanged */ 340 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw)) 341 return (false); 342 343 nlmsg_end(nw); 344 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 345 return (true); 346 } 347 348 static bool 349 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 350 struct nl_writer *nw) 351 { 352 if (!nlmsg_copy_header(hdr, nw)) 353 return (false); 354 355 struct ndmsg *ndm; 356 ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg); 357 358 ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family); 359 360 /* Copy attributes unchanged */ 361 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw)) 362 return (false); 363 364 nlmsg_end(nw); 365 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 366 return (true); 367 } 368 369 static bool 370 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 371 struct nl_writer *nw) 372 { 373 if (!nlmsg_copy_header(hdr, nw)) 374 return (false); 375 376 struct rtmsg *rtm; 377 rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg); 378 rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family); 379 380 struct nlattr *nla; 381 382 int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg)); 383 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 384 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 385 386 NLA_FOREACH(nla, nla_head, attrs_len) { 387 struct rtattr *rta = (struct rtattr *)nla; 388 //RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len); 389 if (rta->rta_len < sizeof(struct rtattr)) { 390 break; 391 } 392 393 switch (rta->rta_type) { 394 case NL_RTA_TABLE: 395 { 396 uint32_t fibnum; 397 fibnum = _rta_get_uint32(rta); 398 if (fibnum == 0) 399 fibnum = 254; 400 RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum); 401 if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum)) 402 return (false); 403 } 404 break; 405 default: 406 if (!nlmsg_copy_nla(nla, nw)) 407 return (false); 408 break; 409 } 410 } 411 412 nlmsg_end(nw); 413 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 414 return (true); 415 } 416 417 static bool 418 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 419 { 420 RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type); 421 422 switch (hdr->nlmsg_type) { 423 case NL_RTM_NEWLINK: 424 case NL_RTM_DELLINK: 425 case NL_RTM_GETLINK: 426 return (rtnl_newlink_to_linux(hdr, nlp, nw)); 427 case NL_RTM_NEWADDR: 428 case NL_RTM_DELADDR: 429 return (rtnl_newaddr_to_linux(hdr, nlp, nw)); 430 case NL_RTM_NEWROUTE: 431 case NL_RTM_DELROUTE: 432 return (rtnl_newroute_to_linux(hdr, nlp, nw)); 433 case NL_RTM_NEWNEIGH: 434 case NL_RTM_DELNEIGH: 435 case NL_RTM_GETNEIGH: 436 return (rtnl_newneigh_to_linux(hdr, nlp, nw)); 437 default: 438 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 439 hdr->nlmsg_type); 440 return (handle_default_out(hdr, nw)); 441 } 442 } 443 444 static bool 445 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 446 { 447 if (!nlmsg_copy_header(hdr, nw)) 448 return (false); 449 450 struct nlmsgerr *nlerr; 451 nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr); 452 nlerr->error = bsd_to_linux_errno(nlerr->error); 453 454 int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr); 455 if (hdr->nlmsg_len == copied_len) { 456 nlmsg_end(nw); 457 return (true); 458 } 459 460 /* 461 * CAP_ACK was not set. Original request needs to be translated. 462 * XXX: implement translation of the original message 463 */ 464 RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated", 465 nlerr->msg.nlmsg_type); 466 char *dst_payload, *src_payload; 467 int copy_len = hdr->nlmsg_len - copied_len; 468 dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char); 469 470 src_payload = (char *)hdr + copied_len; 471 472 memcpy(dst_payload, src_payload, copy_len); 473 nlmsg_end(nw); 474 475 return (true); 476 } 477 478 static bool 479 nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp, 480 struct nl_writer *nw) 481 { 482 if (hdr->nlmsg_type < NLMSG_MIN_TYPE) { 483 switch (hdr->nlmsg_type) { 484 case NLMSG_ERROR: 485 return (nlmsg_error_to_linux(hdr, nlp, nw)); 486 case NLMSG_NOOP: 487 case NLMSG_DONE: 488 case NLMSG_OVERRUN: 489 return (handle_default_out(hdr, nw)); 490 default: 491 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 492 hdr->nlmsg_type); 493 return (handle_default_out(hdr, nw)); 494 } 495 } 496 497 switch (netlink_family) { 498 case NETLINK_ROUTE: 499 return (rtnl_to_linux(hdr, nlp, nw)); 500 default: 501 return (handle_default_out(hdr, nw)); 502 } 503 } 504 505 static struct mbuf * 506 nlmsgs_to_linux(int netlink_family, char *buf, int data_length, struct nlpcb *nlp) 507 { 508 RT_LOG(LOG_DEBUG3, "LINUX: get %p size %d", buf, data_length); 509 struct nl_writer nw = {}; 510 511 struct mbuf *m = NULL; 512 if (!nlmsg_get_chain_writer(&nw, data_length, &m)) { 513 RT_LOG(LOG_DEBUG, "unable to setup chain writer for size %d", 514 data_length); 515 return (NULL); 516 } 517 518 /* Assume correct headers. Buffer IS mutable */ 519 int count = 0; 520 for (int offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) { 521 struct nlmsghdr *hdr = (struct nlmsghdr *)&buf[offset]; 522 int msglen = NLMSG_ALIGN(hdr->nlmsg_len); 523 count++; 524 525 if (!nlmsg_to_linux(netlink_family, hdr, nlp, &nw)) { 526 RT_LOG(LOG_DEBUG, "failed to process msg type %d", 527 hdr->nlmsg_type); 528 m_freem(m); 529 return (NULL); 530 } 531 offset += msglen; 532 } 533 nlmsg_flush(&nw); 534 RT_LOG(LOG_DEBUG3, "Processed %d messages, chain size %d", count, 535 m ? m_length(m, NULL) : 0); 536 537 return (m); 538 } 539 540 static struct mbuf * 541 mbufs_to_linux(int netlink_family, struct mbuf *m, struct nlpcb *nlp) 542 { 543 /* XXX: easiest solution, not optimized for performance */ 544 int data_length = m_length(m, NULL); 545 char *buf = malloc(data_length, M_LINUX, M_NOWAIT); 546 if (buf == NULL) { 547 RT_LOG(LOG_DEBUG, "unable to allocate %d bytes, dropping message", 548 data_length); 549 m_freem(m); 550 return (NULL); 551 } 552 m_copydata(m, 0, data_length, buf); 553 m_freem(m); 554 555 m = nlmsgs_to_linux(netlink_family, buf, data_length, nlp); 556 free(buf, M_LINUX); 557 558 return (m); 559 } 560 561 static struct linux_netlink_provider linux_netlink_v1 = { 562 .mbufs_to_linux = mbufs_to_linux, 563 .msgs_to_linux = nlmsgs_to_linux, 564 .msg_from_linux = nlmsg_from_linux, 565 }; 566 567 void 568 linux_netlink_register(void) 569 { 570 linux_netlink_p = &linux_netlink_v1; 571 } 572 573 void 574 linux_netlink_deregister(void) 575 { 576 linux_netlink_p = NULL; 577 } 578