1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include <sys/types.h> 33 #include <sys/malloc.h> 34 #include <sys/rmlock.h> 35 #include <sys/socket.h> 36 #include <sys/ck.h> 37 38 #include <net/if.h> 39 #include <net/if_dl.h> 40 #include <net/route.h> 41 #include <net/route/nhop.h> 42 #include <net/route/route_ctl.h> 43 #include <netlink/netlink.h> 44 #include <netlink/netlink_ctl.h> 45 #include <netlink/netlink_linux.h> 46 #include <netlink/netlink_route.h> 47 48 #include <compat/linux/linux.h> 49 #include <compat/linux/linux_common.h> 50 #include <compat/linux/linux_util.h> 51 52 #define DEBUG_MOD_NAME nl_linux 53 #define DEBUG_MAX_LEVEL LOG_DEBUG3 54 #include <netlink/netlink_debug.h> 55 _DECLARE_DEBUG(LOG_DEBUG); 56 57 static bool 58 valid_rta_size(const struct rtattr *rta, int sz) 59 { 60 return (NL_RTA_DATA_LEN(rta) == sz); 61 } 62 63 static bool 64 valid_rta_u32(const struct rtattr *rta) 65 { 66 return (valid_rta_size(rta, sizeof(uint32_t))); 67 } 68 69 static uint32_t 70 _rta_get_uint32(const struct rtattr *rta) 71 { 72 return (*((const uint32_t *)NL_RTA_DATA_CONST(rta))); 73 } 74 75 static struct nlmsghdr * 76 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 77 { 78 struct ndmsg *ndm = (struct ndmsg *)(hdr + 1); 79 80 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ndmsg)) 81 ndm->ndm_family = linux_to_bsd_domain(ndm->ndm_family); 82 83 return (hdr); 84 } 85 86 static struct nlmsghdr * 87 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 88 { 89 struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1); 90 91 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg)) 92 ifam->ifa_family = linux_to_bsd_domain(ifam->ifa_family); 93 94 return (hdr); 95 } 96 97 static struct nlmsghdr * 98 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 99 { 100 /* Tweak address families and default fib only */ 101 struct rtmsg *rtm = (struct rtmsg *)(hdr + 1); 102 struct nlattr *nla, *nla_head; 103 int attrs_len; 104 105 rtm->rtm_family = linux_to_bsd_domain(rtm->rtm_family); 106 107 if (rtm->rtm_table == 254) 108 rtm->rtm_table = 0; 109 110 attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr); 111 attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg)); 112 nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg))); 113 114 NLA_FOREACH(nla, nla_head, attrs_len) { 115 RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d", 116 nla->nla_type, nla->nla_len, attrs_len); 117 struct rtattr *rta = (struct rtattr *)nla; 118 if (rta->rta_len < sizeof(struct rtattr)) { 119 break; 120 } 121 switch (rta->rta_type) { 122 case NL_RTA_TABLE: 123 if (!valid_rta_u32(rta)) 124 goto done; 125 rtm->rtm_table = 0; 126 uint32_t fibnum = _rta_get_uint32(rta); 127 RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum); 128 if (fibnum == 254) { 129 *((uint32_t *)NL_RTA_DATA(rta)) = 0; 130 } 131 break; 132 } 133 } 134 135 done: 136 return (hdr); 137 } 138 139 static struct nlmsghdr * 140 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 141 { 142 switch (hdr->nlmsg_type) { 143 case NL_RTM_GETROUTE: 144 case NL_RTM_NEWROUTE: 145 case NL_RTM_DELROUTE: 146 return (rtnl_route_from_linux(hdr, npt)); 147 case NL_RTM_GETNEIGH: 148 return (rtnl_neigh_from_linux(hdr, npt)); 149 case NL_RTM_GETADDR: 150 return (rtnl_ifaddr_from_linux(hdr, npt)); 151 /* Silence warning for the messages where no translation is required */ 152 case NL_RTM_NEWLINK: 153 case NL_RTM_DELLINK: 154 case NL_RTM_GETLINK: 155 break; 156 default: 157 RT_LOG(LOG_DEBUG, "Passing message type %d untranslated", 158 hdr->nlmsg_type); 159 } 160 161 return (hdr); 162 } 163 164 static struct nlmsghdr * 165 nlmsg_from_linux(int netlink_family, struct nlmsghdr *hdr, 166 struct nl_pstate *npt) 167 { 168 switch (netlink_family) { 169 case NETLINK_ROUTE: 170 return (rtnl_from_linux(hdr, npt)); 171 } 172 173 return (hdr); 174 } 175 176 177 /************************************************************ 178 * Kernel -> Linux 179 ************************************************************/ 180 181 static bool 182 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw) 183 { 184 char *out_hdr; 185 out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char); 186 187 if (out_hdr != NULL) { 188 memcpy(out_hdr, hdr, hdr->nlmsg_len); 189 return (true); 190 } 191 return (false); 192 } 193 194 static bool 195 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw) 196 { 197 return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type, 198 hdr->nlmsg_flags, 0)); 199 } 200 201 static void * 202 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz) 203 { 204 void *next_hdr = nlmsg_reserve_data(nw, sz, void); 205 memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz)); 206 207 return (next_hdr); 208 } 209 #define nlmsg_copy_next_header(_hdr, _ns, _t) \ 210 ((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t)))) 211 212 static bool 213 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw) 214 { 215 struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr); 216 if (nla != NULL) { 217 memcpy(nla, nla_orig, nla_orig->nla_len); 218 return (true); 219 } 220 return (false); 221 } 222 223 static bool 224 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw) 225 { 226 struct nlattr *nla; 227 228 int hdrlen = NETLINK_ALIGN(raw_hdrlen); 229 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 230 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 231 232 NLA_FOREACH(nla, nla_head, attrs_len) { 233 RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len); 234 if (nla->nla_len < sizeof(struct nlattr)) { 235 return (false); 236 } 237 if (!nlmsg_copy_nla(nla, nw)) 238 return (false); 239 } 240 return (true); 241 } 242 243 static unsigned int 244 rtnl_if_flags_to_linux(unsigned int if_flags) 245 { 246 unsigned int result = 0; 247 248 for (int i = 0; i < 31; i++) { 249 unsigned int flag = 1 << i; 250 if (!(flag & if_flags)) 251 continue; 252 switch (flag) { 253 case IFF_UP: 254 case IFF_BROADCAST: 255 case IFF_DEBUG: 256 case IFF_LOOPBACK: 257 case IFF_POINTOPOINT: 258 case IFF_DRV_RUNNING: 259 case IFF_NOARP: 260 case IFF_PROMISC: 261 case IFF_ALLMULTI: 262 result |= flag; 263 break; 264 case IFF_KNOWSEPOCH: 265 case IFF_DRV_OACTIVE: 266 case IFF_SIMPLEX: 267 case IFF_LINK0: 268 case IFF_LINK1: 269 case IFF_LINK2: 270 case IFF_CANTCONFIG: 271 case IFF_PPROMISC: 272 case IFF_MONITOR: 273 case IFF_STATICARP: 274 case IFF_STICKYARP: 275 case IFF_DYING: 276 case IFF_RENAMING: 277 case IFF_NOGROUP: 278 /* No Linux analogue */ 279 break; 280 case IFF_MULTICAST: 281 result |= 1 << 12; 282 } 283 } 284 return (result); 285 } 286 287 static bool 288 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 289 struct nl_writer *nw) 290 { 291 if (!nlmsg_copy_header(hdr, nw)) 292 return (false); 293 294 struct ifinfomsg *ifinfo; 295 ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg); 296 297 ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family); 298 /* Convert interface type */ 299 switch (ifinfo->ifi_type) { 300 case IFT_ETHER: 301 ifinfo->ifi_type = 1; // ARPHRD_ETHER 302 break; 303 } 304 ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags); 305 306 /* Copy attributes unchanged */ 307 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw)) 308 return (false); 309 310 /* make ip(8) happy */ 311 if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue")) 312 return (false); 313 314 if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000)) 315 return (false); 316 317 nlmsg_end(nw); 318 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 319 return (true); 320 } 321 322 static bool 323 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 324 struct nl_writer *nw) 325 { 326 if (!nlmsg_copy_header(hdr, nw)) 327 return (false); 328 329 struct ifaddrmsg *ifamsg; 330 ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg); 331 332 ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family); 333 /* XXX: fake ifa_flags? */ 334 335 /* Copy attributes unchanged */ 336 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw)) 337 return (false); 338 339 nlmsg_end(nw); 340 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 341 return (true); 342 } 343 344 static bool 345 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 346 struct nl_writer *nw) 347 { 348 if (!nlmsg_copy_header(hdr, nw)) 349 return (false); 350 351 struct ndmsg *ndm; 352 ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg); 353 354 ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family); 355 356 /* Copy attributes unchanged */ 357 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw)) 358 return (false); 359 360 nlmsg_end(nw); 361 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 362 return (true); 363 } 364 365 static bool 366 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 367 struct nl_writer *nw) 368 { 369 if (!nlmsg_copy_header(hdr, nw)) 370 return (false); 371 372 struct rtmsg *rtm; 373 rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg); 374 rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family); 375 376 struct nlattr *nla; 377 378 int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg)); 379 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 380 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 381 382 NLA_FOREACH(nla, nla_head, attrs_len) { 383 struct rtattr *rta = (struct rtattr *)nla; 384 //RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len); 385 if (rta->rta_len < sizeof(struct rtattr)) { 386 break; 387 } 388 389 switch (rta->rta_type) { 390 case NL_RTA_TABLE: 391 { 392 uint32_t fibnum; 393 fibnum = _rta_get_uint32(rta); 394 if (fibnum == 0) 395 fibnum = 254; 396 RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum); 397 if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum)) 398 return (false); 399 } 400 break; 401 default: 402 if (!nlmsg_copy_nla(nla, nw)) 403 return (false); 404 break; 405 } 406 } 407 408 nlmsg_end(nw); 409 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 410 return (true); 411 } 412 413 static bool 414 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 415 { 416 RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type); 417 418 switch (hdr->nlmsg_type) { 419 case NL_RTM_NEWLINK: 420 case NL_RTM_DELLINK: 421 case NL_RTM_GETLINK: 422 return (rtnl_newlink_to_linux(hdr, nlp, nw)); 423 case NL_RTM_NEWADDR: 424 case NL_RTM_DELADDR: 425 return (rtnl_newaddr_to_linux(hdr, nlp, nw)); 426 case NL_RTM_NEWROUTE: 427 case NL_RTM_DELROUTE: 428 return (rtnl_newroute_to_linux(hdr, nlp, nw)); 429 case NL_RTM_NEWNEIGH: 430 case NL_RTM_DELNEIGH: 431 case NL_RTM_GETNEIGH: 432 return (rtnl_newneigh_to_linux(hdr, nlp, nw)); 433 default: 434 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 435 hdr->nlmsg_type); 436 return (handle_default_out(hdr, nw)); 437 } 438 } 439 440 static bool 441 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 442 { 443 if (!nlmsg_copy_header(hdr, nw)) 444 return (false); 445 446 struct nlmsgerr *nlerr; 447 nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr); 448 nlerr->error = bsd_to_linux_errno(nlerr->error); 449 450 int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr); 451 if (hdr->nlmsg_len == copied_len) { 452 nlmsg_end(nw); 453 return (true); 454 } 455 456 /* 457 * CAP_ACK was not set. Original request needs to be translated. 458 * XXX: implement translation of the original message 459 */ 460 RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated", 461 nlerr->msg.nlmsg_type); 462 char *dst_payload, *src_payload; 463 int copy_len = hdr->nlmsg_len - copied_len; 464 dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char); 465 466 src_payload = (char *)hdr + copied_len; 467 468 memcpy(dst_payload, src_payload, copy_len); 469 nlmsg_end(nw); 470 471 return (true); 472 } 473 474 static bool 475 nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp, 476 struct nl_writer *nw) 477 { 478 if (hdr->nlmsg_type < NLMSG_MIN_TYPE) { 479 switch (hdr->nlmsg_type) { 480 case NLMSG_ERROR: 481 return (nlmsg_error_to_linux(hdr, nlp, nw)); 482 case NLMSG_NOOP: 483 case NLMSG_DONE: 484 case NLMSG_OVERRUN: 485 return (handle_default_out(hdr, nw)); 486 default: 487 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 488 hdr->nlmsg_type); 489 return (handle_default_out(hdr, nw)); 490 } 491 } 492 493 switch (netlink_family) { 494 case NETLINK_ROUTE: 495 return (rtnl_to_linux(hdr, nlp, nw)); 496 default: 497 return (handle_default_out(hdr, nw)); 498 } 499 } 500 501 static struct mbuf * 502 nlmsgs_to_linux(int netlink_family, char *buf, int data_length, struct nlpcb *nlp) 503 { 504 RT_LOG(LOG_DEBUG3, "LINUX: get %p size %d", buf, data_length); 505 struct nl_writer nw = {}; 506 507 struct mbuf *m = NULL; 508 if (!nlmsg_get_chain_writer(&nw, data_length, &m)) { 509 RT_LOG(LOG_DEBUG, "unable to setup chain writer for size %d", 510 data_length); 511 return (NULL); 512 } 513 514 /* Assume correct headers. Buffer IS mutable */ 515 int count = 0; 516 for (int offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) { 517 struct nlmsghdr *hdr = (struct nlmsghdr *)&buf[offset]; 518 int msglen = NLMSG_ALIGN(hdr->nlmsg_len); 519 count++; 520 521 if (!nlmsg_to_linux(netlink_family, hdr, nlp, &nw)) { 522 RT_LOG(LOG_DEBUG, "failed to process msg type %d", 523 hdr->nlmsg_type); 524 m_freem(m); 525 return (NULL); 526 } 527 offset += msglen; 528 } 529 nlmsg_flush(&nw); 530 RT_LOG(LOG_DEBUG3, "Processed %d messages, chain size %d", count, 531 m ? m_length(m, NULL) : 0); 532 533 return (m); 534 } 535 536 static struct mbuf * 537 mbufs_to_linux(int netlink_family, struct mbuf *m, struct nlpcb *nlp) 538 { 539 /* XXX: easiest solution, not optimized for performance */ 540 int data_length = m_length(m, NULL); 541 char *buf = malloc(data_length, M_LINUX, M_NOWAIT); 542 if (buf == NULL) { 543 RT_LOG(LOG_DEBUG, "unable to allocate %d bytes, dropping message", 544 data_length); 545 m_freem(m); 546 return (NULL); 547 } 548 m_copydata(m, 0, data_length, buf); 549 m_freem(m); 550 551 m = nlmsgs_to_linux(netlink_family, buf, data_length, nlp); 552 free(buf, M_LINUX); 553 554 return (m); 555 } 556 557 static struct linux_netlink_provider linux_netlink_v1 = { 558 .mbufs_to_linux = mbufs_to_linux, 559 .msgs_to_linux = nlmsgs_to_linux, 560 .msg_from_linux = nlmsg_from_linux, 561 }; 562 563 void 564 linux_netlink_register() 565 { 566 linux_netlink_p = &linux_netlink_v1; 567 } 568 569 void 570 linux_netlink_deregister() 571 { 572 linux_netlink_p = NULL; 573 } 574