1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "opt_inet.h" 29 #include "opt_inet6.h" 30 31 #include <sys/types.h> 32 #include <sys/ck.h> 33 #include <sys/lock.h> 34 #include <sys/socket.h> 35 #include <sys/vnode.h> 36 37 #include <net/if.h> 38 #include <net/if_dl.h> 39 #include <net/route.h> 40 #include <net/route/nhop.h> 41 #include <net/route/route_ctl.h> 42 #include <netlink/netlink.h> 43 #include <netlink/netlink_ctl.h> 44 #include <netlink/netlink_linux.h> 45 #include <netlink/netlink_var.h> 46 #include <netlink/netlink_route.h> 47 48 #include <compat/linux/linux.h> 49 #include <compat/linux/linux_common.h> 50 #include <compat/linux/linux_util.h> 51 52 #define DEBUG_MOD_NAME nl_linux 53 #define DEBUG_MAX_LEVEL LOG_DEBUG3 54 #include <netlink/netlink_debug.h> 55 _DECLARE_DEBUG(LOG_INFO); 56 57 static bool 58 valid_rta_size(const struct rtattr *rta, int sz) 59 { 60 return (NL_RTA_DATA_LEN(rta) == sz); 61 } 62 63 static bool 64 valid_rta_u32(const struct rtattr *rta) 65 { 66 return (valid_rta_size(rta, sizeof(uint32_t))); 67 } 68 69 static uint32_t 70 _rta_get_uint32(const struct rtattr *rta) 71 { 72 return (*((const uint32_t *)NL_RTA_DATA_CONST(rta))); 73 } 74 75 static int 76 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 77 { 78 struct ndmsg *ndm = (struct ndmsg *)(hdr + 1); 79 sa_family_t f; 80 81 if (hdr->nlmsg_len < sizeof(struct nlmsghdr) + sizeof(struct ndmsg)) 82 return (EBADMSG); 83 if ((f = linux_to_bsd_domain(ndm->ndm_family)) == AF_UNKNOWN) 84 return (EPFNOSUPPORT); 85 86 ndm->ndm_family = f; 87 88 return (0); 89 } 90 91 static int 92 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 93 { 94 struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1); 95 sa_family_t f; 96 97 if (hdr->nlmsg_len < sizeof(struct nlmsghdr) + 98 offsetof(struct ifaddrmsg, ifa_family) + sizeof(ifam->ifa_family)) 99 return (EBADMSG); 100 if ((f = linux_to_bsd_domain(ifam->ifa_family)) == AF_UNKNOWN) 101 return (EPFNOSUPPORT); 102 103 ifam->ifa_family = f; 104 105 return (0); 106 } 107 108 /* 109 * XXX: in case of error state of hdr is inconsistent. 110 */ 111 static int 112 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 113 { 114 /* Tweak address families and default fib only */ 115 struct rtmsg *rtm = (struct rtmsg *)(hdr + 1); 116 struct nlattr *nla, *nla_head; 117 int attrs_len; 118 sa_family_t f; 119 120 if (hdr->nlmsg_len < sizeof(struct nlmsghdr) + sizeof(struct rtmsg)) 121 return (EBADMSG); 122 if ((f = linux_to_bsd_domain(rtm->rtm_family)) == AF_UNKNOWN) 123 return (EPFNOSUPPORT); 124 rtm->rtm_family = f; 125 126 if (rtm->rtm_table == 254) 127 rtm->rtm_table = 0; 128 129 attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr); 130 attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg)); 131 nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg))); 132 133 NLA_FOREACH(nla, nla_head, attrs_len) { 134 RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d", 135 nla->nla_type, nla->nla_len, attrs_len); 136 struct rtattr *rta = (struct rtattr *)nla; 137 if (rta->rta_len < sizeof(struct rtattr)) { 138 break; 139 } 140 switch (rta->rta_type) { 141 case NL_RTA_TABLE: 142 if (!valid_rta_u32(rta)) 143 return (EBADMSG); 144 rtm->rtm_table = 0; 145 uint32_t fibnum = _rta_get_uint32(rta); 146 RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum); 147 if (fibnum == 254) { 148 *((uint32_t *)NL_RTA_DATA(rta)) = 0; 149 } 150 break; 151 } 152 } 153 154 return (0); 155 } 156 157 static int 158 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) 159 { 160 161 switch (hdr->nlmsg_type) { 162 case NL_RTM_GETROUTE: 163 case NL_RTM_NEWROUTE: 164 case NL_RTM_DELROUTE: 165 return (rtnl_route_from_linux(hdr, npt)); 166 case NL_RTM_GETNEIGH: 167 return (rtnl_neigh_from_linux(hdr, npt)); 168 case NL_RTM_GETADDR: 169 return (rtnl_ifaddr_from_linux(hdr, npt)); 170 /* Silence warning for the messages where no translation is required */ 171 case NL_RTM_NEWLINK: 172 case NL_RTM_DELLINK: 173 case NL_RTM_GETLINK: 174 break; 175 default: 176 RT_LOG(LOG_DEBUG, "Passing message type %d untranslated", 177 hdr->nlmsg_type); 178 /* XXXGL: maybe return error? */ 179 } 180 181 return (0); 182 } 183 184 static int 185 nlmsg_from_linux(int netlink_family, struct nlmsghdr **hdr, 186 struct nl_pstate *npt) 187 { 188 switch (netlink_family) { 189 case NETLINK_ROUTE: 190 return (rtnl_from_linux(*hdr, npt)); 191 } 192 193 return (0); 194 } 195 196 197 /************************************************************ 198 * Kernel -> Linux 199 ************************************************************/ 200 201 static bool 202 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw) 203 { 204 char *out_hdr; 205 out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char); 206 207 if (out_hdr != NULL) { 208 memcpy(out_hdr, hdr, hdr->nlmsg_len); 209 nw->num_messages++; 210 return (true); 211 } 212 return (false); 213 } 214 215 static bool 216 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw) 217 { 218 return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type, 219 hdr->nlmsg_flags, 0)); 220 } 221 222 static void * 223 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz) 224 { 225 void *next_hdr = nlmsg_reserve_data(nw, sz, void); 226 memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz)); 227 228 return (next_hdr); 229 } 230 #define nlmsg_copy_next_header(_hdr, _ns, _t) \ 231 ((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t)))) 232 233 static bool 234 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw) 235 { 236 struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr); 237 if (nla != NULL) { 238 memcpy(nla, nla_orig, nla_orig->nla_len); 239 return (true); 240 } 241 return (false); 242 } 243 244 /* 245 * Translate a FreeBSD attribute to a Linux attribute. 246 * Returns false when the attribute is not processed and the caller must take 247 * care of it. 248 */ 249 static int 250 nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla, 251 struct nl_writer *nw) 252 { 253 254 switch (hdr->nlmsg_type) { 255 case NL_RTM_NEWLINK: 256 case NL_RTM_DELLINK: 257 case NL_RTM_GETLINK: 258 switch (nla->nla_type) { 259 case IFLA_IFNAME: { 260 char ifname[LINUX_IFNAMSIZ]; 261 262 if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname, 263 sizeof(ifname)) > 0) 264 return (true); 265 break; 266 } 267 default: 268 break; 269 } 270 default: 271 break; 272 } 273 return (false); 274 } 275 276 static bool 277 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw) 278 { 279 struct nlattr *nla; 280 281 int hdrlen = NETLINK_ALIGN(raw_hdrlen); 282 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 283 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 284 285 NLA_FOREACH(nla, nla_head, attrs_len) { 286 RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len); 287 if (nla->nla_len < sizeof(struct nlattr)) { 288 return (false); 289 } 290 if (!nlmsg_translate_all_nla(hdr, nla, nw) && 291 !nlmsg_copy_nla(nla, nw)) 292 return (false); 293 } 294 return (true); 295 } 296 297 static unsigned int 298 rtnl_if_flags_to_linux(unsigned int if_flags) 299 { 300 unsigned int result = 0; 301 302 for (int i = 0; i < 31; i++) { 303 unsigned int flag = 1 << i; 304 if (!(flag & if_flags)) 305 continue; 306 switch (flag) { 307 case IFF_UP: 308 case IFF_BROADCAST: 309 case IFF_DEBUG: 310 case IFF_LOOPBACK: 311 case IFF_POINTOPOINT: 312 case IFF_DRV_RUNNING: 313 case IFF_NOARP: 314 case IFF_PROMISC: 315 case IFF_ALLMULTI: 316 result |= flag; 317 break; 318 case IFF_NEEDSEPOCH: 319 case IFF_DRV_OACTIVE: 320 case IFF_SIMPLEX: 321 case IFF_LINK0: 322 case IFF_LINK1: 323 case IFF_LINK2: 324 case IFF_CANTCONFIG: 325 case IFF_PPROMISC: 326 case IFF_MONITOR: 327 case IFF_STATICARP: 328 case IFF_STICKYARP: 329 case IFF_DYING: 330 case IFF_RENAMING: 331 /* No Linux analogue */ 332 break; 333 case IFF_MULTICAST: 334 result |= 1 << 12; 335 } 336 } 337 return (result); 338 } 339 340 static bool 341 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 342 struct nl_writer *nw) 343 { 344 if (!nlmsg_copy_header(hdr, nw)) 345 return (false); 346 347 struct ifinfomsg *ifinfo; 348 ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg); 349 350 ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family); 351 /* Convert interface type */ 352 switch (ifinfo->ifi_type) { 353 case IFT_ETHER: 354 ifinfo->ifi_type = LINUX_ARPHRD_ETHER; 355 break; 356 } 357 ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags); 358 359 /* Copy attributes unchanged */ 360 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw)) 361 return (false); 362 363 /* make ip(8) happy */ 364 if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue")) 365 return (false); 366 367 if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000)) 368 return (false); 369 370 nlmsg_end(nw); 371 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 372 return (true); 373 } 374 375 static bool 376 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 377 struct nl_writer *nw) 378 { 379 if (!nlmsg_copy_header(hdr, nw)) 380 return (false); 381 382 struct ifaddrmsg *ifamsg; 383 ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg); 384 385 ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family); 386 /* XXX: fake ifa_flags? */ 387 388 /* Copy attributes unchanged */ 389 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw)) 390 return (false); 391 392 nlmsg_end(nw); 393 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 394 return (true); 395 } 396 397 static bool 398 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 399 struct nl_writer *nw) 400 { 401 if (!nlmsg_copy_header(hdr, nw)) 402 return (false); 403 404 struct ndmsg *ndm; 405 ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg); 406 407 ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family); 408 409 /* Copy attributes unchanged */ 410 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw)) 411 return (false); 412 413 nlmsg_end(nw); 414 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 415 return (true); 416 } 417 418 static bool 419 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, 420 struct nl_writer *nw) 421 { 422 if (!nlmsg_copy_header(hdr, nw)) 423 return (false); 424 425 struct rtmsg *rtm; 426 rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg); 427 rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family); 428 429 struct nlattr *nla; 430 431 int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg)); 432 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; 433 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); 434 435 NLA_FOREACH(nla, nla_head, attrs_len) { 436 struct rtattr *rta = (struct rtattr *)nla; 437 //RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len); 438 if (rta->rta_len < sizeof(struct rtattr)) { 439 break; 440 } 441 442 switch (rta->rta_type) { 443 case NL_RTA_TABLE: 444 { 445 uint32_t fibnum; 446 fibnum = _rta_get_uint32(rta); 447 if (fibnum == 0) 448 fibnum = 254; 449 RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum); 450 if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum)) 451 return (false); 452 } 453 break; 454 default: 455 if (!nlmsg_copy_nla(nla, nw)) 456 return (false); 457 break; 458 } 459 } 460 461 nlmsg_end(nw); 462 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); 463 return (true); 464 } 465 466 static bool 467 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 468 { 469 RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type); 470 471 switch (hdr->nlmsg_type) { 472 case NL_RTM_NEWLINK: 473 case NL_RTM_DELLINK: 474 case NL_RTM_GETLINK: 475 return (rtnl_newlink_to_linux(hdr, nlp, nw)); 476 case NL_RTM_NEWADDR: 477 case NL_RTM_DELADDR: 478 return (rtnl_newaddr_to_linux(hdr, nlp, nw)); 479 case NL_RTM_NEWROUTE: 480 case NL_RTM_DELROUTE: 481 return (rtnl_newroute_to_linux(hdr, nlp, nw)); 482 case NL_RTM_NEWNEIGH: 483 case NL_RTM_DELNEIGH: 484 case NL_RTM_GETNEIGH: 485 return (rtnl_newneigh_to_linux(hdr, nlp, nw)); 486 default: 487 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 488 hdr->nlmsg_type); 489 return (handle_default_out(hdr, nw)); 490 } 491 } 492 493 static bool 494 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 495 { 496 if (!nlmsg_copy_header(hdr, nw)) 497 return (false); 498 499 struct nlmsgerr *nlerr; 500 nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr); 501 nlerr->error = bsd_to_linux_errno(nlerr->error); 502 503 int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr); 504 if (hdr->nlmsg_len == copied_len) { 505 nlmsg_end(nw); 506 return (true); 507 } 508 509 /* 510 * CAP_ACK was not set. Original request needs to be translated. 511 * XXX: implement translation of the original message 512 */ 513 RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated", 514 nlerr->msg.nlmsg_type); 515 char *dst_payload, *src_payload; 516 int copy_len = hdr->nlmsg_len - copied_len; 517 dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char); 518 519 src_payload = (char *)hdr + copied_len; 520 521 memcpy(dst_payload, src_payload, copy_len); 522 nlmsg_end(nw); 523 524 return (true); 525 } 526 527 static bool 528 nlmsg_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) 529 { 530 if (hdr->nlmsg_type < NLMSG_MIN_TYPE) { 531 switch (hdr->nlmsg_type) { 532 case NLMSG_ERROR: 533 return (nlmsg_error_to_linux(hdr, nlp, nw)); 534 case NLMSG_NOOP: 535 case NLMSG_DONE: 536 case NLMSG_OVERRUN: 537 return (handle_default_out(hdr, nw)); 538 default: 539 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", 540 hdr->nlmsg_type); 541 return (handle_default_out(hdr, nw)); 542 } 543 } 544 545 switch (nlp->nl_proto) { 546 case NETLINK_ROUTE: 547 return (rtnl_to_linux(hdr, nlp, nw)); 548 default: 549 return (handle_default_out(hdr, nw)); 550 } 551 } 552 553 static struct nl_buf * 554 nlmsgs_to_linux(struct nl_buf *orig, struct nlpcb *nlp) 555 { 556 struct nl_writer nw; 557 u_int offset, msglen; 558 559 if (__predict_false(!nl_writer_unicast(&nw, 560 orig->datalen + SCRATCH_BUFFER_SIZE, nlp, false))) 561 return (NULL); 562 563 /* Assume correct headers. Buffer IS mutable */ 564 for (offset = 0; 565 offset + sizeof(struct nlmsghdr) <= orig->datalen; 566 offset += msglen) { 567 struct nlmsghdr *hdr = (struct nlmsghdr *)&orig->data[offset]; 568 569 msglen = NLMSG_ALIGN(hdr->nlmsg_len); 570 if (!nlmsg_to_linux(hdr, nlp, &nw)) { 571 RT_LOG(LOG_DEBUG, "failed to process msg type %d", 572 hdr->nlmsg_type); 573 nl_buf_free(nw.buf); 574 return (NULL); 575 } 576 } 577 578 RT_LOG(LOG_DEBUG3, "%p: in %u bytes %u messages", __func__, 579 nw.buf->datalen, nw.num_messages); 580 581 return (nw.buf); 582 } 583 584 static struct linux_netlink_provider linux_netlink_v1 = { 585 .msgs_to_linux = nlmsgs_to_linux, 586 .msg_from_linux = nlmsg_from_linux, 587 }; 588 589 void 590 linux_netlink_register(void) 591 { 592 linux_netlink_p = &linux_netlink_v1; 593 } 594 595 void 596 linux_netlink_deregister(void) 597 { 598 linux_netlink_p = NULL; 599 } 600