1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2021 Ng Peng Nam Sean 5 * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include "opt_netlink.h" 30 31 #include <sys/cdefs.h> 32 #include "opt_inet.h" 33 #include "opt_inet6.h" 34 #include "opt_route.h" 35 #include <sys/types.h> 36 #include <sys/malloc.h> 37 #include <sys/rmlock.h> 38 #include <sys/socket.h> 39 40 #include <net/if.h> 41 #include <net/route.h> 42 #include <net/route/nhop.h> 43 #include <net/route/route_ctl.h> 44 #include <net/route/route_var.h> 45 #include <netinet6/scope6_var.h> 46 #include <netlink/netlink.h> 47 #include <netlink/netlink_ctl.h> 48 #include <netlink/netlink_route.h> 49 #include <netlink/route/route_var.h> 50 51 #define DEBUG_MOD_NAME nl_route 52 #define DEBUG_MAX_LEVEL LOG_DEBUG3 53 #include <netlink/netlink_debug.h> 54 _DECLARE_DEBUG(LOG_INFO); 55 56 static unsigned char 57 get_rtm_type(const struct nhop_object *nh) 58 { 59 int nh_flags = nh->nh_flags; 60 61 /* Use the fact that nhg runtime flags are only NHF_MULTIPATH */ 62 if (nh_flags & NHF_BLACKHOLE) 63 return (RTN_BLACKHOLE); 64 else if (nh_flags & NHF_REJECT) 65 return (RTN_PROHIBIT); 66 return (RTN_UNICAST); 67 } 68 69 static uint8_t 70 nl_get_rtm_protocol(const struct nhop_object *nh) 71 { 72 #ifdef ROUTE_MPATH 73 if (NH_IS_NHGRP(nh)) { 74 const struct nhgrp_object *nhg = (const struct nhgrp_object *)nh; 75 uint8_t origin = nhgrp_get_origin(nhg); 76 if (origin != RTPROT_UNSPEC) 77 return (origin); 78 nh = nhg->nhops[0]; 79 } 80 #endif 81 uint8_t origin = nhop_get_origin(nh); 82 if (origin != RTPROT_UNSPEC) 83 return (origin); 84 /* TODO: remove guesswork once all kernel users fill in origin */ 85 int rt_flags = nhop_get_rtflags(nh); 86 if (rt_flags & RTF_PROTO1) 87 return (RTPROT_ZEBRA); 88 if (rt_flags & RTF_STATIC) 89 return (RTPROT_STATIC); 90 return (RTPROT_KERNEL); 91 } 92 93 static int 94 get_rtmsg_type_from_rtsock(int cmd) 95 { 96 switch (cmd) { 97 case RTM_ADD: 98 case RTM_CHANGE: 99 case RTM_GET: 100 return NL_RTM_NEWROUTE; 101 case RTM_DELETE: 102 return NL_RTM_DELROUTE; 103 } 104 105 return (0); 106 } 107 108 /* 109 * fibnum heuristics 110 * 111 * if (dump && rtm_table == 0 && !rta_table) RT_ALL_FIBS 112 * msg rtm_table RTA_TABLE result 113 * RTM_GETROUTE/dump 0 - RT_ALL_FIBS 114 * RTM_GETROUTE/dump 1 - 1 115 * RTM_GETROUTE/get 0 - 0 116 * 117 */ 118 119 static struct nhop_object * 120 rc_get_nhop(const struct rib_cmd_info *rc) 121 { 122 return ((rc->rc_cmd == RTM_DELETE) ? rc->rc_nh_old : rc->rc_nh_new); 123 } 124 125 static void 126 dump_rc_nhop_gw(struct nl_writer *nw, const struct nhop_object *nh) 127 { 128 #ifdef INET6 129 int upper_family; 130 #endif 131 132 switch (nhop_get_neigh_family(nh)) { 133 case AF_LINK: 134 /* onlink prefix, skip */ 135 break; 136 case AF_INET: 137 nlattr_add(nw, NL_RTA_GATEWAY, 4, &nh->gw4_sa.sin_addr); 138 break; 139 #ifdef INET6 140 case AF_INET6: 141 upper_family = nhop_get_upper_family(nh); 142 if (upper_family == AF_INET6) { 143 struct in6_addr gw6 = nh->gw6_sa.sin6_addr; 144 in6_clearscope(&gw6); 145 146 nlattr_add(nw, NL_RTA_GATEWAY, 16, &gw6); 147 } else if (upper_family == AF_INET) { 148 /* IPv4 over IPv6 */ 149 struct in6_addr gw6 = nh->gw6_sa.sin6_addr; 150 in6_clearscope(&gw6); 151 152 char buf[20]; 153 struct rtvia *via = (struct rtvia *)&buf[0]; 154 via->rtvia_family = AF_INET6; 155 memcpy(via->rtvia_addr, &gw6, 16); 156 nlattr_add(nw, NL_RTA_VIA, 17, via); 157 } 158 break; 159 #endif 160 } 161 } 162 163 static void 164 dump_rc_nhop_mtu(struct nl_writer *nw, const struct nhop_object *nh) 165 { 166 int nla_len = sizeof(struct nlattr) * 2 + sizeof(uint32_t); 167 struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr); 168 169 if (nla == NULL) 170 return; 171 nla->nla_type = NL_RTA_METRICS; 172 nla->nla_len = nla_len; 173 nla++; 174 nla->nla_type = NL_RTAX_MTU; 175 nla->nla_len = sizeof(struct nlattr) + sizeof(uint32_t); 176 *((uint32_t *)(nla + 1)) = nh->nh_mtu; 177 } 178 179 #ifdef ROUTE_MPATH 180 static void 181 dump_rc_nhg(struct nl_writer *nw, const struct nhgrp_object *nhg, struct rtmsg *rtm) 182 { 183 uint32_t uidx = nhgrp_get_uidx(nhg); 184 uint32_t num_nhops; 185 const struct weightened_nhop *wn = nhgrp_get_nhops(nhg, &num_nhops); 186 uint32_t base_rtflags = nhop_get_rtflags(wn[0].nh); 187 188 if (uidx != 0) 189 nlattr_add_u32(nw, NL_RTA_NH_ID, uidx); 190 nlattr_add_u32(nw, NL_RTA_KNH_ID, nhgrp_get_idx(nhg)); 191 192 nlattr_add_u32(nw, NL_RTA_RTFLAGS, base_rtflags); 193 int off = nlattr_add_nested(nw, NL_RTA_MULTIPATH); 194 if (off == 0) 195 return; 196 197 for (int i = 0; i < num_nhops; i++) { 198 int nh_off = nlattr_save_offset(nw); 199 struct rtnexthop *rtnh = nlmsg_reserve_object(nw, struct rtnexthop); 200 if (rtnh == NULL) 201 return; 202 rtnh->rtnh_flags = 0; 203 rtnh->rtnh_ifindex = if_getindex(wn[i].nh->nh_ifp); 204 rtnh->rtnh_hops = wn[i].weight; 205 dump_rc_nhop_gw(nw, wn[i].nh); 206 uint32_t rtflags = nhop_get_rtflags(wn[i].nh); 207 if (rtflags != base_rtflags) 208 nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags); 209 if (rtflags & RTF_FIXEDMTU) 210 dump_rc_nhop_mtu(nw, wn[i].nh); 211 rtnh = nlattr_restore_offset(nw, nh_off, struct rtnexthop); 212 /* 213 * nlattr_add() allocates 4-byte aligned storage, no need to aligh 214 * length here 215 * */ 216 rtnh->rtnh_len = nlattr_save_offset(nw) - nh_off; 217 } 218 nlattr_set_len(nw, off); 219 } 220 #endif 221 222 static void 223 dump_rc_nhop(struct nl_writer *nw, const struct route_nhop_data *rnd, struct rtmsg *rtm) 224 { 225 #ifdef ROUTE_MPATH 226 if (NH_IS_NHGRP(rnd->rnd_nhop)) { 227 dump_rc_nhg(nw, rnd->rnd_nhgrp, rtm); 228 return; 229 } 230 #endif 231 const struct nhop_object *nh = rnd->rnd_nhop; 232 uint32_t rtflags = nhop_get_rtflags(nh); 233 234 /* 235 * IPv4 over IPv6 236 * ('RTA_VIA', {'family': 10, 'addr': 'fe80::20c:29ff:fe67:2dd'}), ('RTA_OIF', 2), 237 * IPv4 w/ gw 238 * ('RTA_GATEWAY', '172.16.107.131'), ('RTA_OIF', 2)], 239 * Direct route: 240 * ('RTA_OIF', 2) 241 */ 242 if (nh->nh_flags & NHF_GATEWAY) 243 dump_rc_nhop_gw(nw, nh); 244 245 uint32_t uidx = nhop_get_uidx(nh); 246 if (uidx != 0) 247 nlattr_add_u32(nw, NL_RTA_NH_ID, uidx); 248 nlattr_add_u32(nw, NL_RTA_KNH_ID, nhop_get_idx(nh)); 249 nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags); 250 251 if (rtflags & RTF_FIXEDMTU) 252 dump_rc_nhop_mtu(nw, nh); 253 uint32_t nh_expire = nhop_get_expire(nh); 254 if (nh_expire > 0) 255 nlattr_add_u32(nw, NL_RTA_EXPIRES, nh_expire - time_uptime); 256 257 /* In any case, fill outgoing interface */ 258 nlattr_add_u32(nw, NL_RTA_OIF, if_getindex(nh->nh_ifp)); 259 260 if (rnd->rnd_weight != RT_DEFAULT_WEIGHT) 261 nlattr_add_u32(nw, NL_RTA_WEIGHT, rnd->rnd_weight); 262 } 263 264 /* 265 * Dumps output from a rib command into an rtmsg 266 */ 267 268 static int 269 dump_px(uint32_t fibnum, const struct nlmsghdr *hdr, 270 const struct rtentry *rt, struct route_nhop_data *rnd, 271 struct nl_writer *nw) 272 { 273 struct rtmsg *rtm; 274 int error = 0; 275 276 NET_EPOCH_ASSERT(); 277 278 if (!nlmsg_reply(nw, hdr, sizeof(struct rtmsg))) 279 goto enomem; 280 281 int family = rt_get_family(rt); 282 int rtm_off = nlattr_save_offset(nw); 283 rtm = nlmsg_reserve_object(nw, struct rtmsg); 284 rtm->rtm_family = family; 285 rtm->rtm_dst_len = 0; 286 rtm->rtm_src_len = 0; 287 rtm->rtm_tos = 0; 288 if (fibnum < 255) 289 rtm->rtm_table = (unsigned char)fibnum; 290 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 291 rtm->rtm_protocol = nl_get_rtm_protocol(rnd->rnd_nhop); 292 rtm->rtm_type = get_rtm_type(rnd->rnd_nhop); 293 294 nlattr_add_u32(nw, NL_RTA_TABLE, fibnum); 295 296 int plen = 0; 297 #if defined(INET) || defined(INET6) 298 uint32_t scopeid; 299 #endif 300 switch (family) { 301 #ifdef INET 302 case AF_INET: 303 { 304 struct in_addr addr; 305 rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid); 306 nlattr_add(nw, NL_RTA_DST, 4, &addr); 307 break; 308 } 309 #endif 310 #ifdef INET6 311 case AF_INET6: 312 { 313 struct in6_addr addr; 314 rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid); 315 nlattr_add(nw, NL_RTA_DST, 16, &addr); 316 break; 317 } 318 #endif 319 default: 320 FIB_LOG(LOG_NOTICE, fibnum, family, "unsupported rt family: %d", family); 321 error = EAFNOSUPPORT; 322 goto flush; 323 } 324 325 rtm = nlattr_restore_offset(nw, rtm_off, struct rtmsg); 326 if (plen > 0) 327 rtm->rtm_dst_len = plen; 328 dump_rc_nhop(nw, rnd, rtm); 329 330 if (nlmsg_end(nw)) 331 return (0); 332 enomem: 333 error = ENOMEM; 334 flush: 335 nlmsg_abort(nw); 336 return (error); 337 } 338 339 static int 340 family_to_group(int family) 341 { 342 switch (family) { 343 case AF_INET: 344 return (RTNLGRP_IPV4_ROUTE); 345 case AF_INET6: 346 return (RTNLGRP_IPV6_ROUTE); 347 } 348 return (0); 349 } 350 351 static void 352 report_operation(uint32_t fibnum, struct rib_cmd_info *rc, 353 struct nlpcb *nlp, struct nlmsghdr *hdr) 354 { 355 struct nl_writer nw = {}; 356 uint32_t group_id = family_to_group(rt_get_family(rc->rc_rt)); 357 358 if (nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) { 359 struct route_nhop_data rnd = { 360 .rnd_nhop = rc_get_nhop(rc), 361 .rnd_weight = rc->rc_nh_weight, 362 }; 363 hdr->nlmsg_flags &= ~(NLM_F_REPLACE | NLM_F_CREATE); 364 hdr->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_APPEND); 365 switch (rc->rc_cmd) { 366 case RTM_ADD: 367 hdr->nlmsg_type = NL_RTM_NEWROUTE; 368 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL; 369 break; 370 case RTM_CHANGE: 371 hdr->nlmsg_type = NL_RTM_NEWROUTE; 372 hdr->nlmsg_flags |= NLM_F_REPLACE; 373 break; 374 case RTM_DELETE: 375 hdr->nlmsg_type = NL_RTM_DELROUTE; 376 break; 377 } 378 dump_px(fibnum, hdr, rc->rc_rt, &rnd, &nw); 379 nlmsg_flush(&nw); 380 } 381 382 rtsock_callback_p->route_f(fibnum, rc); 383 } 384 385 static void 386 set_scope6(struct sockaddr *sa, struct ifnet *ifp) 387 { 388 #ifdef INET6 389 if (sa != NULL && sa->sa_family == AF_INET6 && ifp != NULL) { 390 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa; 391 392 if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) 393 in6_set_unicast_scopeid(&sa6->sin6_addr, if_getindex(ifp)); 394 } 395 #endif 396 } 397 398 struct rta_mpath_nh { 399 struct sockaddr *gw; 400 struct ifnet *ifp; 401 uint8_t rtnh_flags; 402 uint8_t rtnh_weight; 403 }; 404 405 #define _IN(_field) offsetof(struct rtnexthop, _field) 406 #define _OUT(_field) offsetof(struct rta_mpath_nh, _field) 407 const static struct nlattr_parser nla_p_rtnh[] = { 408 { .type = NL_RTA_GATEWAY, .off = _OUT(gw), .cb = nlattr_get_ip }, 409 { .type = NL_RTA_VIA, .off = _OUT(gw), .cb = nlattr_get_ipvia }, 410 }; 411 const static struct nlfield_parser nlf_p_rtnh[] = { 412 { .off_in = _IN(rtnh_flags), .off_out = _OUT(rtnh_flags), .cb = nlf_get_u8 }, 413 { .off_in = _IN(rtnh_hops), .off_out = _OUT(rtnh_weight), .cb = nlf_get_u8 }, 414 { .off_in = _IN(rtnh_ifindex), .off_out = _OUT(ifp), .cb = nlf_get_ifpz }, 415 }; 416 #undef _IN 417 #undef _OUT 418 419 static bool 420 post_p_rtnh(void *_attrs, struct nl_pstate *npt __unused) 421 { 422 struct rta_mpath_nh *attrs = (struct rta_mpath_nh *)_attrs; 423 424 set_scope6(attrs->gw, attrs->ifp); 425 return (true); 426 } 427 NL_DECLARE_PARSER_EXT(mpath_parser, struct rtnexthop, NULL, nlf_p_rtnh, nla_p_rtnh, post_p_rtnh); 428 429 struct rta_mpath { 430 int num_nhops; 431 struct rta_mpath_nh nhops[0]; 432 }; 433 434 static int 435 nlattr_get_multipath(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) 436 { 437 int data_len = nla->nla_len - sizeof(struct nlattr); 438 struct rtnexthop *rtnh; 439 440 int max_nhops = data_len / sizeof(struct rtnexthop); 441 442 struct rta_mpath *mp = npt_alloc(npt, (max_nhops + 2) * sizeof(struct rta_mpath_nh)); 443 mp->num_nhops = 0; 444 445 for (rtnh = (struct rtnexthop *)(nla + 1); data_len > 0; ) { 446 struct rta_mpath_nh *mpnh = &mp->nhops[mp->num_nhops++]; 447 448 int error = nl_parse_header(rtnh, rtnh->rtnh_len, &mpath_parser, 449 npt, mpnh); 450 if (error != 0) { 451 NLMSG_REPORT_ERR_MSG(npt, "RTA_MULTIPATH: nexhop %d: parse failed", 452 mp->num_nhops - 1); 453 return (error); 454 } 455 456 int len = NL_ITEM_ALIGN(rtnh->rtnh_len); 457 data_len -= len; 458 rtnh = (struct rtnexthop *)((char *)rtnh + len); 459 } 460 if (data_len != 0 || mp->num_nhops == 0) { 461 NLMSG_REPORT_ERR_MSG(npt, "invalid RTA_MULTIPATH attr"); 462 return (EINVAL); 463 } 464 465 *((struct rta_mpath **)target) = mp; 466 return (0); 467 } 468 469 470 struct nl_parsed_route { 471 struct sockaddr *rta_dst; 472 struct sockaddr *rta_gw; 473 struct ifnet *rta_oif; 474 struct rta_mpath *rta_multipath; 475 uint32_t rta_table; 476 uint32_t rta_rtflags; 477 uint32_t rta_nh_id; 478 uint32_t rta_weight; 479 uint32_t rtax_mtu; 480 uint8_t rtm_family; 481 uint8_t rtm_dst_len; 482 uint8_t rtm_protocol; 483 uint8_t rtm_type; 484 uint32_t rtm_flags; 485 }; 486 487 #define _IN(_field) offsetof(struct rtmsg, _field) 488 #define _OUT(_field) offsetof(struct nl_parsed_route, _field) 489 static struct nlattr_parser nla_p_rtmetrics[] = { 490 { .type = NL_RTAX_MTU, .off = _OUT(rtax_mtu), .cb = nlattr_get_uint32 }, 491 }; 492 NL_DECLARE_ATTR_PARSER(metrics_parser, nla_p_rtmetrics); 493 494 static const struct nlattr_parser nla_p_rtmsg[] = { 495 { .type = NL_RTA_DST, .off = _OUT(rta_dst), .cb = nlattr_get_ip }, 496 { .type = NL_RTA_OIF, .off = _OUT(rta_oif), .cb = nlattr_get_ifp }, 497 { .type = NL_RTA_GATEWAY, .off = _OUT(rta_gw), .cb = nlattr_get_ip }, 498 { .type = NL_RTA_METRICS, .arg = &metrics_parser, .cb = nlattr_get_nested }, 499 { .type = NL_RTA_MULTIPATH, .off = _OUT(rta_multipath), .cb = nlattr_get_multipath }, 500 { .type = NL_RTA_WEIGHT, .off = _OUT(rta_weight), .cb = nlattr_get_uint32 }, 501 { .type = NL_RTA_RTFLAGS, .off = _OUT(rta_rtflags), .cb = nlattr_get_uint32 }, 502 { .type = NL_RTA_TABLE, .off = _OUT(rta_table), .cb = nlattr_get_uint32 }, 503 { .type = NL_RTA_VIA, .off = _OUT(rta_gw), .cb = nlattr_get_ipvia }, 504 { .type = NL_RTA_NH_ID, .off = _OUT(rta_nh_id), .cb = nlattr_get_uint32 }, 505 }; 506 507 static const struct nlfield_parser nlf_p_rtmsg[] = { 508 { .off_in = _IN(rtm_family), .off_out = _OUT(rtm_family), .cb = nlf_get_u8 }, 509 { .off_in = _IN(rtm_dst_len), .off_out = _OUT(rtm_dst_len), .cb = nlf_get_u8 }, 510 { .off_in = _IN(rtm_protocol), .off_out = _OUT(rtm_protocol), .cb = nlf_get_u8 }, 511 { .off_in = _IN(rtm_type), .off_out = _OUT(rtm_type), .cb = nlf_get_u8 }, 512 { .off_in = _IN(rtm_flags), .off_out = _OUT(rtm_flags), .cb = nlf_get_u32 }, 513 }; 514 #undef _IN 515 #undef _OUT 516 517 static bool 518 post_p_rtmsg(void *_attrs, struct nl_pstate *npt __unused) 519 { 520 struct nl_parsed_route *attrs = (struct nl_parsed_route *)_attrs; 521 522 set_scope6(attrs->rta_dst, attrs->rta_oif); 523 set_scope6(attrs->rta_gw, attrs->rta_oif); 524 return (true); 525 } 526 NL_DECLARE_PARSER_EXT(rtm_parser, struct rtmsg, NULL, nlf_p_rtmsg, nla_p_rtmsg, post_p_rtmsg); 527 528 struct netlink_walkargs { 529 struct nl_writer *nw; 530 struct route_nhop_data rnd; 531 struct nlmsghdr hdr; 532 struct nlpcb *nlp; 533 uint32_t fibnum; 534 int family; 535 int error; 536 int count; 537 int dumped; 538 int dumped_tables; 539 }; 540 541 static int 542 dump_rtentry(struct rtentry *rt, void *_arg) 543 { 544 struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg; 545 int error; 546 547 wa->count++; 548 if (wa->error != 0) 549 return (0); 550 if (!rt_is_exportable(rt, nlp_get_cred(wa->nlp))) 551 return (0); 552 wa->dumped++; 553 554 rt_get_rnd(rt, &wa->rnd); 555 556 error = dump_px(wa->fibnum, &wa->hdr, rt, &wa->rnd, wa->nw); 557 558 IF_DEBUG_LEVEL(LOG_DEBUG3) { 559 char rtbuf[INET6_ADDRSTRLEN + 5]; 560 FIB_LOG(LOG_DEBUG3, wa->fibnum, wa->family, 561 "Dump %s, offset %u, error %d", 562 rt_print_buf(rt, rtbuf, sizeof(rtbuf)), 563 wa->nw->offset, error); 564 } 565 wa->error = error; 566 567 return (0); 568 } 569 570 static void 571 dump_rtable_one(struct netlink_walkargs *wa, uint32_t fibnum, int family) 572 { 573 FIB_LOG(LOG_DEBUG2, fibnum, family, "Start dump"); 574 wa->count = 0; 575 wa->dumped = 0; 576 577 rib_walk(fibnum, family, false, dump_rtentry, wa); 578 579 wa->dumped_tables++; 580 581 FIB_LOG(LOG_DEBUG2, fibnum, family, "End dump, iterated %d dumped %d", 582 wa->count, wa->dumped); 583 NL_LOG(LOG_DEBUG2, "Current offset: %d", wa->nw->offset); 584 } 585 586 static int 587 dump_rtable_fib(struct netlink_walkargs *wa, uint32_t fibnum, int family) 588 { 589 wa->fibnum = fibnum; 590 591 if (family == AF_UNSPEC) { 592 for (int i = 0; i < AF_MAX; i++) { 593 if (rt_tables_get_rnh(fibnum, i) != 0) { 594 wa->family = i; 595 dump_rtable_one(wa, fibnum, i); 596 if (wa->error != 0) 597 break; 598 } 599 } 600 } else { 601 if (rt_tables_get_rnh(fibnum, family) != 0) { 602 wa->family = family; 603 dump_rtable_one(wa, fibnum, family); 604 } 605 } 606 607 return (wa->error); 608 } 609 610 static int 611 handle_rtm_getroute(struct nlpcb *nlp, struct nl_parsed_route *attrs, 612 struct nlmsghdr *hdr, struct nl_pstate *npt) 613 { 614 RIB_RLOCK_TRACKER; 615 struct rib_head *rnh; 616 const struct rtentry *rt; 617 struct route_nhop_data rnd; 618 uint32_t fibnum = attrs->rta_table; 619 sa_family_t family = attrs->rtm_family; 620 621 if (attrs->rta_dst == NULL) { 622 NLMSG_REPORT_ERR_MSG(npt, "No RTA_DST supplied"); 623 return (EINVAL); 624 } 625 626 rnh = rt_tables_get_rnh(fibnum, family); 627 if (rnh == NULL) 628 return (EAFNOSUPPORT); 629 630 RIB_RLOCK(rnh); 631 632 struct sockaddr *dst = attrs->rta_dst; 633 634 if (attrs->rtm_flags & RTM_F_PREFIX) 635 rt = rib_lookup_prefix_plen(rnh, dst, attrs->rtm_dst_len, &rnd); 636 else 637 rt = (const struct rtentry *)rnh->rnh_matchaddr(dst, &rnh->head); 638 if (rt == NULL) { 639 RIB_RUNLOCK(rnh); 640 return (ESRCH); 641 } 642 643 rt_get_rnd(rt, &rnd); 644 rnd.rnd_nhop = nhop_select_func(rnd.rnd_nhop, 0); 645 646 RIB_RUNLOCK(rnh); 647 648 if (!rt_is_exportable(rt, nlp_get_cred(nlp))) 649 return (ESRCH); 650 651 IF_DEBUG_LEVEL(LOG_DEBUG2) { 652 char rtbuf[NHOP_PRINT_BUFSIZE] __unused, nhbuf[NHOP_PRINT_BUFSIZE] __unused; 653 FIB_LOG(LOG_DEBUG2, fibnum, family, "getroute completed: got %s for %s", 654 nhop_print_buf_any(rnd.rnd_nhop, nhbuf, sizeof(nhbuf)), 655 rt_print_buf(rt, rtbuf, sizeof(rtbuf))); 656 } 657 658 hdr->nlmsg_type = NL_RTM_NEWROUTE; 659 dump_px(fibnum, hdr, rt, &rnd, npt->nw); 660 661 return (0); 662 } 663 664 static int 665 handle_rtm_dump(struct nlpcb *nlp, uint32_t fibnum, int family, 666 struct nlmsghdr *hdr, struct nl_writer *nw) 667 { 668 struct netlink_walkargs wa = { 669 .nlp = nlp, 670 .nw = nw, 671 .hdr.nlmsg_pid = hdr->nlmsg_pid, 672 .hdr.nlmsg_seq = hdr->nlmsg_seq, 673 .hdr.nlmsg_type = NL_RTM_NEWROUTE, 674 .hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI, 675 }; 676 677 if (fibnum == RT_TABLE_UNSPEC) { 678 for (int i = 0; i < V_rt_numfibs; i++) { 679 dump_rtable_fib(&wa, fibnum, family); 680 if (wa.error != 0) 681 break; 682 } 683 } else 684 dump_rtable_fib(&wa, fibnum, family); 685 686 if (wa.error == 0 && wa.dumped_tables == 0) { 687 FIB_LOG(LOG_DEBUG, fibnum, family, "incorrect fibnum/family"); 688 wa.error = ESRCH; 689 // How do we propagate it? 690 } 691 692 if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) { 693 NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); 694 return (ENOMEM); 695 } 696 697 return (wa.error); 698 } 699 700 static struct nhop_object * 701 finalize_nhop(struct nhop_object *nh, const struct sockaddr *dst, int *perror) 702 { 703 /* 704 * The following MUST be filled: 705 * nh_ifp, nh_ifa, nh_gw 706 */ 707 if (nh->gw_sa.sa_family == 0) { 708 /* 709 * Empty gateway. Can be direct route with RTA_OIF set. 710 */ 711 if (nh->nh_ifp != NULL) 712 nhop_set_direct_gw(nh, nh->nh_ifp); 713 else { 714 NL_LOG(LOG_DEBUG, "empty gateway and interface, skipping"); 715 *perror = EINVAL; 716 return (NULL); 717 } 718 /* Both nh_ifp and gateway are set */ 719 } else { 720 /* Gateway is set up, we can derive ifp if not set */ 721 if (nh->nh_ifp == NULL) { 722 uint32_t fibnum = nhop_get_fibnum(nh); 723 uint32_t flags = 0; 724 725 if (nh->nh_flags & NHF_GATEWAY) 726 flags = RTF_GATEWAY; 727 else if (nh->nh_flags & NHF_HOST) 728 flags = RTF_HOST; 729 730 struct ifaddr *ifa = ifa_ifwithroute(flags, dst, &nh->gw_sa, fibnum); 731 if (ifa == NULL) { 732 NL_LOG(LOG_DEBUG, "Unable to determine ifp, skipping"); 733 *perror = EINVAL; 734 return (NULL); 735 } 736 nhop_set_transmit_ifp(nh, ifa->ifa_ifp); 737 } 738 } 739 /* Both nh_ifp and gateway are set */ 740 if (nh->nh_ifa == NULL) { 741 const struct sockaddr *gw_sa = &nh->gw_sa; 742 743 if (gw_sa->sa_family != dst->sa_family) { 744 /* 745 * Use dst as the target for determining the default 746 * preferred ifa IF 747 * 1) the gateway is link-level (e.g. direct route) 748 * 2) the gateway family is different (e.g. IPv4 over IPv6). 749 */ 750 gw_sa = dst; 751 } 752 753 struct ifaddr *ifa = ifaof_ifpforaddr(gw_sa, nh->nh_ifp); 754 if (ifa == NULL) { 755 NL_LOG(LOG_DEBUG, "Unable to determine ifa, skipping"); 756 *perror = EINVAL; 757 return (NULL); 758 } 759 nhop_set_src(nh, ifa); 760 } 761 762 return (nhop_get_nhop(nh, perror)); 763 } 764 765 static int 766 get_pxflag(const struct nl_parsed_route *attrs) 767 { 768 int pxflag = 0; 769 switch (attrs->rtm_family) { 770 case AF_INET: 771 if (attrs->rtm_dst_len == 32) 772 pxflag = NHF_HOST; 773 else if (attrs->rtm_dst_len == 0) 774 pxflag = NHF_DEFAULT; 775 break; 776 case AF_INET6: 777 if (attrs->rtm_dst_len == 128) 778 pxflag = NHF_HOST; 779 else if (attrs->rtm_dst_len == 0) 780 pxflag = NHF_DEFAULT; 781 break; 782 } 783 784 return (pxflag); 785 } 786 787 static int 788 get_op_flags(int nlm_flags) 789 { 790 int op_flags = 0; 791 792 op_flags |= (nlm_flags & NLM_F_REPLACE) ? RTM_F_REPLACE : 0; 793 op_flags |= (nlm_flags & NLM_F_EXCL) ? RTM_F_EXCL : 0; 794 op_flags |= (nlm_flags & NLM_F_CREATE) ? RTM_F_CREATE : 0; 795 op_flags |= (nlm_flags & NLM_F_APPEND) ? RTM_F_APPEND : 0; 796 797 return (op_flags); 798 } 799 800 #ifdef ROUTE_MPATH 801 static int 802 create_nexthop_one(struct nl_parsed_route *attrs, struct rta_mpath_nh *mpnh, 803 struct nl_pstate *npt, struct nhop_object **pnh) 804 { 805 int error; 806 807 if (mpnh->gw == NULL) 808 return (EINVAL); 809 810 struct nhop_object *nh = nhop_alloc(attrs->rta_table, attrs->rtm_family); 811 if (nh == NULL) 812 return (ENOMEM); 813 814 error = nl_set_nexthop_gw(nh, mpnh->gw, mpnh->ifp, npt); 815 if (error != 0) { 816 nhop_free(nh); 817 return (error); 818 } 819 if (mpnh->ifp != NULL) 820 nhop_set_transmit_ifp(nh, mpnh->ifp); 821 nhop_set_pxtype_flag(nh, get_pxflag(attrs)); 822 nhop_set_rtflags(nh, attrs->rta_rtflags); 823 if (attrs->rtm_protocol > RTPROT_STATIC) 824 nhop_set_origin(nh, attrs->rtm_protocol); 825 826 *pnh = finalize_nhop(nh, attrs->rta_dst, &error); 827 828 return (error); 829 } 830 #endif 831 832 static struct nhop_object * 833 create_nexthop_from_attrs(struct nl_parsed_route *attrs, 834 struct nl_pstate *npt, int *perror) 835 { 836 struct nhop_object *nh = NULL; 837 int error = 0; 838 839 if (attrs->rta_multipath != NULL) { 840 #ifdef ROUTE_MPATH 841 /* Multipath w/o explicit nexthops */ 842 int num_nhops = attrs->rta_multipath->num_nhops; 843 struct weightened_nhop *wn = npt_alloc(npt, sizeof(*wn) * num_nhops); 844 845 for (int i = 0; i < num_nhops; i++) { 846 struct rta_mpath_nh *mpnh = &attrs->rta_multipath->nhops[i]; 847 848 error = create_nexthop_one(attrs, mpnh, npt, &wn[i].nh); 849 if (error != 0) { 850 for (int j = 0; j < i; j++) 851 nhop_free(wn[j].nh); 852 break; 853 } 854 wn[i].weight = mpnh->rtnh_weight > 0 ? mpnh->rtnh_weight : 1; 855 } 856 if (error == 0) { 857 struct rib_head *rh = nhop_get_rh(wn[0].nh); 858 struct nhgrp_object *nhg; 859 860 nhg = nhgrp_alloc(rh->rib_fibnum, rh->rib_family, 861 wn, num_nhops, perror); 862 if (nhg != NULL) { 863 if (attrs->rtm_protocol > RTPROT_STATIC) 864 nhgrp_set_origin(nhg, attrs->rtm_protocol); 865 nhg = nhgrp_get_nhgrp(nhg, perror); 866 } 867 for (int i = 0; i < num_nhops; i++) 868 nhop_free(wn[i].nh); 869 if (nhg != NULL) 870 return ((struct nhop_object *)nhg); 871 error = *perror; 872 } 873 #else 874 error = ENOTSUP; 875 #endif 876 *perror = error; 877 } else { 878 nh = nhop_alloc(attrs->rta_table, attrs->rtm_family); 879 if (nh == NULL) { 880 *perror = ENOMEM; 881 return (NULL); 882 } 883 if (attrs->rta_gw != NULL) { 884 *perror = nl_set_nexthop_gw(nh, attrs->rta_gw, attrs->rta_oif, npt); 885 if (*perror != 0) { 886 nhop_free(nh); 887 return (NULL); 888 } 889 } 890 if (attrs->rta_oif != NULL) 891 nhop_set_transmit_ifp(nh, attrs->rta_oif); 892 if (attrs->rtax_mtu != 0) 893 nhop_set_mtu(nh, attrs->rtax_mtu, true); 894 if (attrs->rta_rtflags & RTF_BROADCAST) 895 nhop_set_broadcast(nh, true); 896 if (attrs->rtm_protocol > RTPROT_STATIC) 897 nhop_set_origin(nh, attrs->rtm_protocol); 898 nhop_set_pxtype_flag(nh, get_pxflag(attrs)); 899 nhop_set_rtflags(nh, attrs->rta_rtflags); 900 901 switch (attrs->rtm_type) { 902 case RTN_UNICAST: 903 break; 904 case RTN_BLACKHOLE: 905 nhop_set_blackhole(nh, RTF_BLACKHOLE); 906 break; 907 case RTN_PROHIBIT: 908 case RTN_UNREACHABLE: 909 nhop_set_blackhole(nh, RTF_REJECT); 910 break; 911 /* TODO: return ENOTSUP for other types if strict option is set */ 912 } 913 914 nh = finalize_nhop(nh, attrs->rta_dst, perror); 915 } 916 917 return (nh); 918 } 919 920 static int 921 rtnl_handle_newroute(struct nlmsghdr *hdr, struct nlpcb *nlp, 922 struct nl_pstate *npt) 923 { 924 struct rib_cmd_info rc = {}; 925 struct nhop_object *nh = NULL; 926 int error; 927 928 struct nl_parsed_route attrs = {}; 929 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); 930 if (error != 0) 931 return (error); 932 933 /* Check if we have enough data */ 934 if (attrs.rta_dst == NULL) { 935 NL_LOG(LOG_DEBUG, "missing RTA_DST"); 936 return (EINVAL); 937 } 938 939 if (attrs.rta_table >= V_rt_numfibs) { 940 NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); 941 return (EINVAL); 942 } 943 944 if (attrs.rta_nh_id != 0) { 945 /* Referenced uindex */ 946 int pxflag = get_pxflag(&attrs); 947 nh = nl_find_nhop(attrs.rta_table, attrs.rtm_family, attrs.rta_nh_id, 948 pxflag, &error); 949 if (error != 0) 950 return (error); 951 } else { 952 nh = create_nexthop_from_attrs(&attrs, npt, &error); 953 if (error != 0) { 954 NL_LOG(LOG_DEBUG, "Error creating nexthop"); 955 return (error); 956 } 957 } 958 959 if (!NH_IS_NHGRP(nh) && attrs.rta_weight == 0) 960 attrs.rta_weight = RT_DEFAULT_WEIGHT; 961 struct route_nhop_data rnd = { .rnd_nhop = nh, .rnd_weight = attrs.rta_weight }; 962 int op_flags = get_op_flags(hdr->nlmsg_flags); 963 964 error = rib_add_route_px(attrs.rta_table, attrs.rta_dst, attrs.rtm_dst_len, 965 &rnd, op_flags, &rc); 966 if (error == 0) 967 report_operation(attrs.rta_table, &rc, nlp, hdr); 968 return (error); 969 } 970 971 static int 972 path_match_func(const struct rtentry *rt, const struct nhop_object *nh, void *_data) 973 { 974 struct nl_parsed_route *attrs = (struct nl_parsed_route *)_data; 975 976 if ((attrs->rta_gw != NULL) && !rib_match_gw(rt, nh, attrs->rta_gw)) 977 return (0); 978 979 if ((attrs->rta_oif != NULL) && (attrs->rta_oif != nh->nh_ifp)) 980 return (0); 981 982 return (1); 983 } 984 985 static int 986 rtnl_handle_delroute(struct nlmsghdr *hdr, struct nlpcb *nlp, 987 struct nl_pstate *npt) 988 { 989 struct rib_cmd_info rc; 990 int error; 991 992 struct nl_parsed_route attrs = {}; 993 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); 994 if (error != 0) 995 return (error); 996 997 if (attrs.rta_dst == NULL) { 998 NLMSG_REPORT_ERR_MSG(npt, "RTA_DST is not set"); 999 return (ESRCH); 1000 } 1001 1002 if (attrs.rta_table >= V_rt_numfibs) { 1003 NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); 1004 return (EINVAL); 1005 } 1006 1007 error = rib_del_route_px(attrs.rta_table, attrs.rta_dst, 1008 attrs.rtm_dst_len, path_match_func, &attrs, 0, &rc); 1009 if (error == 0) 1010 report_operation(attrs.rta_table, &rc, nlp, hdr); 1011 return (error); 1012 } 1013 1014 static int 1015 rtnl_handle_getroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) 1016 { 1017 int error; 1018 1019 struct nl_parsed_route attrs = {}; 1020 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); 1021 if (error != 0) 1022 return (error); 1023 1024 if (attrs.rta_table >= V_rt_numfibs) { 1025 NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); 1026 return (EINVAL); 1027 } 1028 1029 if (hdr->nlmsg_flags & NLM_F_DUMP) 1030 error = handle_rtm_dump(nlp, attrs.rta_table, attrs.rtm_family, hdr, npt->nw); 1031 else 1032 error = handle_rtm_getroute(nlp, &attrs, hdr, npt); 1033 1034 return (error); 1035 } 1036 1037 void 1038 rtnl_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc) 1039 { 1040 struct nl_writer nw = {}; 1041 int family, nlm_flags = 0; 1042 1043 family = rt_get_family(rc->rc_rt); 1044 1045 /* XXX: check if there are active listeners first */ 1046 1047 /* TODO: consider passing PID/type/seq */ 1048 switch (rc->rc_cmd) { 1049 case RTM_ADD: 1050 nlm_flags = NLM_F_EXCL | NLM_F_CREATE; 1051 break; 1052 case RTM_CHANGE: 1053 nlm_flags = NLM_F_REPLACE; 1054 break; 1055 case RTM_DELETE: 1056 nlm_flags = 0; 1057 break; 1058 } 1059 IF_DEBUG_LEVEL(LOG_DEBUG2) { 1060 char rtbuf[NHOP_PRINT_BUFSIZE] __unused; 1061 FIB_LOG(LOG_DEBUG2, fibnum, family, 1062 "received event %s for %s / nlm_flags=%X", 1063 rib_print_cmd(rc->rc_cmd), 1064 rt_print_buf(rc->rc_rt, rtbuf, sizeof(rtbuf)), 1065 nlm_flags); 1066 } 1067 1068 struct nlmsghdr hdr = { 1069 .nlmsg_flags = nlm_flags, 1070 .nlmsg_type = get_rtmsg_type_from_rtsock(rc->rc_cmd), 1071 }; 1072 1073 struct route_nhop_data rnd = { 1074 .rnd_nhop = rc_get_nhop(rc), 1075 .rnd_weight = rc->rc_nh_weight, 1076 }; 1077 1078 uint32_t group_id = family_to_group(family); 1079 if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) { 1080 NL_LOG(LOG_DEBUG, "error allocating event buffer"); 1081 return; 1082 } 1083 1084 dump_px(fibnum, &hdr, rc->rc_rt, &rnd, &nw); 1085 nlmsg_flush(&nw); 1086 } 1087 1088 static const struct rtnl_cmd_handler cmd_handlers[] = { 1089 { 1090 .cmd = NL_RTM_GETROUTE, 1091 .name = "RTM_GETROUTE", 1092 .cb = &rtnl_handle_getroute, 1093 .flags = RTNL_F_ALLOW_NONVNET_JAIL, 1094 }, 1095 { 1096 .cmd = NL_RTM_DELROUTE, 1097 .name = "RTM_DELROUTE", 1098 .cb = &rtnl_handle_delroute, 1099 .priv = PRIV_NET_ROUTE, 1100 }, 1101 { 1102 .cmd = NL_RTM_NEWROUTE, 1103 .name = "RTM_NEWROUTE", 1104 .cb = &rtnl_handle_newroute, 1105 .priv = PRIV_NET_ROUTE, 1106 } 1107 }; 1108 1109 static const struct nlhdr_parser *all_parsers[] = {&mpath_parser, &metrics_parser, &rtm_parser}; 1110 1111 void 1112 rtnl_routes_init(void) 1113 { 1114 NL_VERIFY_PARSERS(all_parsers); 1115 rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers)); 1116 } 1117