1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2021 Ng Peng Nam Sean 5 * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include "opt_netlink.h" 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 #include "opt_route.h" 36 #include <sys/types.h> 37 #include <sys/malloc.h> 38 #include <sys/rmlock.h> 39 #include <sys/socket.h> 40 41 #include <net/if.h> 42 #include <net/route.h> 43 #include <net/route/nhop.h> 44 #include <net/route/route_ctl.h> 45 #include <net/route/route_var.h> 46 #include <netinet6/scope6_var.h> 47 #include <netlink/netlink.h> 48 #include <netlink/netlink_ctl.h> 49 #include <netlink/netlink_route.h> 50 #include <netlink/route/route_var.h> 51 52 #define DEBUG_MOD_NAME nl_route 53 #define DEBUG_MAX_LEVEL LOG_DEBUG3 54 #include <netlink/netlink_debug.h> 55 _DECLARE_DEBUG(LOG_DEBUG); 56 57 static unsigned char 58 get_rtm_type(const struct nhop_object *nh) 59 { 60 int nh_flags = nh->nh_flags; 61 62 /* Use the fact that nhg runtime flags are only NHF_MULTIPATH */ 63 if (nh_flags & NHF_BLACKHOLE) 64 return (RTN_BLACKHOLE); 65 else if (nh_flags & NHF_REJECT) 66 return (RTN_PROHIBIT); 67 return (RTN_UNICAST); 68 } 69 70 static uint8_t 71 nl_get_rtm_protocol(const struct nhop_object *nh) 72 { 73 #ifdef ROUTE_MPATH 74 if (NH_IS_NHGRP(nh)) { 75 const struct nhgrp_object *nhg = (const struct nhgrp_object *)nh; 76 uint8_t origin = nhgrp_get_origin(nhg); 77 if (origin != RTPROT_UNSPEC) 78 return (origin); 79 nh = nhg->nhops[0]; 80 } 81 #endif 82 uint8_t origin = nhop_get_origin(nh); 83 if (origin != RTPROT_UNSPEC) 84 return (origin); 85 /* TODO: remove guesswork once all kernel users fill in origin */ 86 int rt_flags = nhop_get_rtflags(nh); 87 if (rt_flags & RTF_PROTO1) 88 return (RTPROT_ZEBRA); 89 if (rt_flags & RTF_STATIC) 90 return (RTPROT_STATIC); 91 return (RTPROT_KERNEL); 92 } 93 94 static int 95 get_rtmsg_type_from_rtsock(int cmd) 96 { 97 switch (cmd) { 98 case RTM_ADD: 99 case RTM_CHANGE: 100 case RTM_GET: 101 return NL_RTM_NEWROUTE; 102 case RTM_DELETE: 103 return NL_RTM_DELROUTE; 104 } 105 106 return (0); 107 } 108 109 /* 110 * fibnum heuristics 111 * 112 * if (dump && rtm_table == 0 && !rta_table) RT_ALL_FIBS 113 * msg rtm_table RTA_TABLE result 114 * RTM_GETROUTE/dump 0 - RT_ALL_FIBS 115 * RTM_GETROUTE/dump 1 - 1 116 * RTM_GETROUTE/get 0 - 0 117 * 118 */ 119 120 static struct nhop_object * 121 rc_get_nhop(const struct rib_cmd_info *rc) 122 { 123 return ((rc->rc_cmd == RTM_DELETE) ? rc->rc_nh_old : rc->rc_nh_new); 124 } 125 126 static void 127 dump_rc_nhop_gw(struct nl_writer *nw, const struct nhop_object *nh) 128 { 129 #ifdef INET6 130 int upper_family; 131 #endif 132 133 switch (nhop_get_neigh_family(nh)) { 134 case AF_LINK: 135 /* onlink prefix, skip */ 136 break; 137 case AF_INET: 138 nlattr_add(nw, NL_RTA_GATEWAY, 4, &nh->gw4_sa.sin_addr); 139 break; 140 #ifdef INET6 141 case AF_INET6: 142 upper_family = nhop_get_upper_family(nh); 143 if (upper_family == AF_INET6) { 144 struct in6_addr gw6 = nh->gw6_sa.sin6_addr; 145 in6_clearscope(&gw6); 146 147 nlattr_add(nw, NL_RTA_GATEWAY, 16, &gw6); 148 } else if (upper_family == AF_INET) { 149 /* IPv4 over IPv6 */ 150 struct in6_addr gw6 = nh->gw6_sa.sin6_addr; 151 in6_clearscope(&gw6); 152 153 char buf[20]; 154 struct rtvia *via = (struct rtvia *)&buf[0]; 155 via->rtvia_family = AF_INET6; 156 memcpy(via->rtvia_addr, &gw6, 16); 157 nlattr_add(nw, NL_RTA_VIA, 17, via); 158 } 159 break; 160 #endif 161 } 162 } 163 164 static void 165 dump_rc_nhop_mtu(struct nl_writer *nw, const struct nhop_object *nh) 166 { 167 int nla_len = sizeof(struct nlattr) * 2 + sizeof(uint32_t); 168 struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr); 169 170 if (nla == NULL) 171 return; 172 nla->nla_type = NL_RTA_METRICS; 173 nla->nla_len = nla_len; 174 nla++; 175 nla->nla_type = NL_RTAX_MTU; 176 nla->nla_len = sizeof(struct nlattr) + sizeof(uint32_t); 177 *((uint32_t *)(nla + 1)) = nh->nh_mtu; 178 } 179 180 #ifdef ROUTE_MPATH 181 static void 182 dump_rc_nhg(struct nl_writer *nw, const struct nhgrp_object *nhg, struct rtmsg *rtm) 183 { 184 uint32_t uidx = nhgrp_get_uidx(nhg); 185 uint32_t num_nhops; 186 const struct weightened_nhop *wn = nhgrp_get_nhops(nhg, &num_nhops); 187 uint32_t base_rtflags = nhop_get_rtflags(wn[0].nh); 188 189 if (uidx != 0) 190 nlattr_add_u32(nw, NL_RTA_NH_ID, uidx); 191 nlattr_add_u32(nw, NL_RTA_KNH_ID, nhgrp_get_idx(nhg)); 192 193 nlattr_add_u32(nw, NL_RTA_RTFLAGS, base_rtflags); 194 int off = nlattr_add_nested(nw, NL_RTA_MULTIPATH); 195 if (off == 0) 196 return; 197 198 for (int i = 0; i < num_nhops; i++) { 199 int nh_off = nlattr_save_offset(nw); 200 struct rtnexthop *rtnh = nlmsg_reserve_object(nw, struct rtnexthop); 201 if (rtnh == NULL) 202 return; 203 rtnh->rtnh_flags = 0; 204 rtnh->rtnh_ifindex = wn[i].nh->nh_ifp->if_index; 205 rtnh->rtnh_hops = wn[i].weight; 206 dump_rc_nhop_gw(nw, wn[i].nh); 207 uint32_t rtflags = nhop_get_rtflags(wn[i].nh); 208 if (rtflags != base_rtflags) 209 nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags); 210 if (rtflags & RTF_FIXEDMTU) 211 dump_rc_nhop_mtu(nw, wn[i].nh); 212 rtnh = nlattr_restore_offset(nw, nh_off, struct rtnexthop); 213 /* 214 * nlattr_add() allocates 4-byte aligned storage, no need to aligh 215 * length here 216 * */ 217 rtnh->rtnh_len = nlattr_save_offset(nw) - nh_off; 218 } 219 nlattr_set_len(nw, off); 220 } 221 #endif 222 223 static void 224 dump_rc_nhop(struct nl_writer *nw, const struct route_nhop_data *rnd, struct rtmsg *rtm) 225 { 226 #ifdef ROUTE_MPATH 227 if (NH_IS_NHGRP(rnd->rnd_nhop)) { 228 dump_rc_nhg(nw, rnd->rnd_nhgrp, rtm); 229 return; 230 } 231 #endif 232 const struct nhop_object *nh = rnd->rnd_nhop; 233 uint32_t rtflags = nhop_get_rtflags(nh); 234 235 /* 236 * IPv4 over IPv6 237 * ('RTA_VIA', {'family': 10, 'addr': 'fe80::20c:29ff:fe67:2dd'}), ('RTA_OIF', 2), 238 * IPv4 w/ gw 239 * ('RTA_GATEWAY', '172.16.107.131'), ('RTA_OIF', 2)], 240 * Direct route: 241 * ('RTA_OIF', 2) 242 */ 243 if (nh->nh_flags & NHF_GATEWAY) 244 dump_rc_nhop_gw(nw, nh); 245 246 uint32_t uidx = nhop_get_uidx(nh); 247 if (uidx != 0) 248 nlattr_add_u32(nw, NL_RTA_NH_ID, uidx); 249 nlattr_add_u32(nw, NL_RTA_KNH_ID, nhop_get_idx(nh)); 250 nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags); 251 252 if (rtflags & RTF_FIXEDMTU) 253 dump_rc_nhop_mtu(nw, nh); 254 uint32_t nh_expire = nhop_get_expire(nh); 255 if (nh_expire > 0) 256 nlattr_add_u32(nw, NL_RTA_EXPIRES, nh_expire - time_uptime); 257 258 /* In any case, fill outgoing interface */ 259 nlattr_add_u32(nw, NL_RTA_OIF, nh->nh_ifp->if_index); 260 261 if (rnd->rnd_weight != RT_DEFAULT_WEIGHT) 262 nlattr_add_u32(nw, NL_RTA_WEIGHT, rnd->rnd_weight); 263 } 264 265 /* 266 * Dumps output from a rib command into an rtmsg 267 */ 268 269 static int 270 dump_px(uint32_t fibnum, const struct nlmsghdr *hdr, 271 const struct rtentry *rt, struct route_nhop_data *rnd, 272 struct nl_writer *nw) 273 { 274 struct rtmsg *rtm; 275 int error = 0; 276 277 NET_EPOCH_ASSERT(); 278 279 if (!nlmsg_reply(nw, hdr, sizeof(struct rtmsg))) 280 goto enomem; 281 282 int family = rt_get_family(rt); 283 int rtm_off = nlattr_save_offset(nw); 284 rtm = nlmsg_reserve_object(nw, struct rtmsg); 285 rtm->rtm_family = family; 286 rtm->rtm_dst_len = 0; 287 rtm->rtm_src_len = 0; 288 rtm->rtm_tos = 0; 289 if (fibnum < 255) 290 rtm->rtm_table = (unsigned char)fibnum; 291 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 292 rtm->rtm_protocol = nl_get_rtm_protocol(rnd->rnd_nhop); 293 rtm->rtm_type = get_rtm_type(rnd->rnd_nhop); 294 295 nlattr_add_u32(nw, NL_RTA_TABLE, fibnum); 296 297 int plen = 0; 298 #if defined(INET) || defined(INET6) 299 uint32_t scopeid; 300 #endif 301 switch (family) { 302 #ifdef INET 303 case AF_INET: 304 { 305 struct in_addr addr; 306 rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid); 307 nlattr_add(nw, NL_RTA_DST, 4, &addr); 308 break; 309 } 310 #endif 311 #ifdef INET6 312 case AF_INET6: 313 { 314 struct in6_addr addr; 315 rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid); 316 nlattr_add(nw, NL_RTA_DST, 16, &addr); 317 break; 318 } 319 #endif 320 default: 321 FIB_LOG(LOG_NOTICE, fibnum, family, "unsupported rt family: %d", family); 322 error = EAFNOSUPPORT; 323 goto flush; 324 } 325 326 rtm = nlattr_restore_offset(nw, rtm_off, struct rtmsg); 327 if (plen > 0) 328 rtm->rtm_dst_len = plen; 329 dump_rc_nhop(nw, rnd, rtm); 330 331 if (nlmsg_end(nw)) 332 return (0); 333 enomem: 334 error = ENOMEM; 335 flush: 336 nlmsg_abort(nw); 337 return (error); 338 } 339 340 static int 341 family_to_group(int family) 342 { 343 switch (family) { 344 case AF_INET: 345 return (RTNLGRP_IPV4_ROUTE); 346 case AF_INET6: 347 return (RTNLGRP_IPV6_ROUTE); 348 } 349 return (0); 350 } 351 352 353 static void 354 report_operation(uint32_t fibnum, struct rib_cmd_info *rc, 355 struct nlpcb *nlp, struct nlmsghdr *hdr) 356 { 357 struct nl_writer nw = {}; 358 uint32_t group_id = family_to_group(rt_get_family(rc->rc_rt)); 359 360 if (nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) { 361 struct route_nhop_data rnd = { 362 .rnd_nhop = rc_get_nhop(rc), 363 .rnd_weight = rc->rc_nh_weight, 364 }; 365 hdr->nlmsg_flags &= ~(NLM_F_REPLACE | NLM_F_CREATE); 366 hdr->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_APPEND); 367 switch (rc->rc_cmd) { 368 case RTM_ADD: 369 hdr->nlmsg_type = NL_RTM_NEWROUTE; 370 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL; 371 break; 372 case RTM_CHANGE: 373 hdr->nlmsg_type = NL_RTM_NEWROUTE; 374 hdr->nlmsg_flags |= NLM_F_REPLACE; 375 break; 376 case RTM_DELETE: 377 hdr->nlmsg_type = NL_RTM_DELROUTE; 378 break; 379 } 380 dump_px(fibnum, hdr, rc->rc_rt, &rnd, &nw); 381 nlmsg_flush(&nw); 382 } 383 384 rtsock_callback_p->route_f(fibnum, rc); 385 } 386 387 struct rta_mpath_nh { 388 struct sockaddr *gw; 389 struct ifnet *ifp; 390 uint8_t rtnh_flags; 391 uint8_t rtnh_weight; 392 }; 393 394 #define _IN(_field) offsetof(struct rtnexthop, _field) 395 #define _OUT(_field) offsetof(struct rta_mpath_nh, _field) 396 const static struct nlattr_parser nla_p_rtnh[] = { 397 { .type = NL_RTA_GATEWAY, .off = _OUT(gw), .cb = nlattr_get_ip }, 398 { .type = NL_RTA_VIA, .off = _OUT(gw), .cb = nlattr_get_ipvia }, 399 }; 400 const static struct nlfield_parser nlf_p_rtnh[] = { 401 { .off_in = _IN(rtnh_flags), .off_out = _OUT(rtnh_flags), .cb = nlf_get_u8 }, 402 { .off_in = _IN(rtnh_hops), .off_out = _OUT(rtnh_weight), .cb = nlf_get_u8 }, 403 { .off_in = _IN(rtnh_ifindex), .off_out = _OUT(ifp), .cb = nlf_get_ifpz }, 404 }; 405 #undef _IN 406 #undef _OUT 407 NL_DECLARE_PARSER(mpath_parser, struct rtnexthop, nlf_p_rtnh, nla_p_rtnh); 408 409 struct rta_mpath { 410 int num_nhops; 411 struct rta_mpath_nh nhops[0]; 412 }; 413 414 static int 415 nlattr_get_multipath(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) 416 { 417 int data_len = nla->nla_len - sizeof(struct nlattr); 418 struct rtnexthop *rtnh; 419 420 int max_nhops = data_len / sizeof(struct rtnexthop); 421 422 struct rta_mpath *mp = npt_alloc(npt, (max_nhops + 2) * sizeof(struct rta_mpath_nh)); 423 mp->num_nhops = 0; 424 425 for (rtnh = (struct rtnexthop *)(nla + 1); data_len > 0; ) { 426 struct rta_mpath_nh *mpnh = &mp->nhops[mp->num_nhops++]; 427 428 int error = nl_parse_header(rtnh, rtnh->rtnh_len, &mpath_parser, 429 npt, mpnh); 430 if (error != 0) { 431 NLMSG_REPORT_ERR_MSG(npt, "RTA_MULTIPATH: nexhop %d: parse failed", 432 mp->num_nhops - 1); 433 return (error); 434 } 435 436 int len = NL_ITEM_ALIGN(rtnh->rtnh_len); 437 data_len -= len; 438 rtnh = (struct rtnexthop *)((char *)rtnh + len); 439 } 440 if (data_len != 0 || mp->num_nhops == 0) { 441 NLMSG_REPORT_ERR_MSG(npt, "invalid RTA_MULTIPATH attr"); 442 return (EINVAL); 443 } 444 445 *((struct rta_mpath **)target) = mp; 446 return (0); 447 } 448 449 450 struct nl_parsed_route { 451 struct sockaddr *rta_dst; 452 struct sockaddr *rta_gw; 453 struct ifnet *rta_oif; 454 struct rta_mpath *rta_multipath; 455 uint32_t rta_table; 456 uint32_t rta_rtflags; 457 uint32_t rta_nh_id; 458 uint32_t rta_weight; 459 uint32_t rtax_mtu; 460 uint8_t rtm_family; 461 uint8_t rtm_dst_len; 462 uint8_t rtm_protocol; 463 uint8_t rtm_type; 464 uint32_t rtm_flags; 465 }; 466 467 #define _IN(_field) offsetof(struct rtmsg, _field) 468 #define _OUT(_field) offsetof(struct nl_parsed_route, _field) 469 static struct nlattr_parser nla_p_rtmetrics[] = { 470 { .type = NL_RTAX_MTU, .off = _OUT(rtax_mtu), .cb = nlattr_get_uint32 }, 471 }; 472 NL_DECLARE_ATTR_PARSER(metrics_parser, nla_p_rtmetrics); 473 474 static const struct nlattr_parser nla_p_rtmsg[] = { 475 { .type = NL_RTA_DST, .off = _OUT(rta_dst), .cb = nlattr_get_ip }, 476 { .type = NL_RTA_OIF, .off = _OUT(rta_oif), .cb = nlattr_get_ifp }, 477 { .type = NL_RTA_GATEWAY, .off = _OUT(rta_gw), .cb = nlattr_get_ip }, 478 { .type = NL_RTA_METRICS, .arg = &metrics_parser, .cb = nlattr_get_nested }, 479 { .type = NL_RTA_MULTIPATH, .off = _OUT(rta_multipath), .cb = nlattr_get_multipath }, 480 { .type = NL_RTA_WEIGHT, .off = _OUT(rta_weight), .cb = nlattr_get_uint32 }, 481 { .type = NL_RTA_RTFLAGS, .off = _OUT(rta_rtflags), .cb = nlattr_get_uint32 }, 482 { .type = NL_RTA_TABLE, .off = _OUT(rta_table), .cb = nlattr_get_uint32 }, 483 { .type = NL_RTA_VIA, .off = _OUT(rta_gw), .cb = nlattr_get_ipvia }, 484 { .type = NL_RTA_NH_ID, .off = _OUT(rta_nh_id), .cb = nlattr_get_uint32 }, 485 }; 486 487 static const struct nlfield_parser nlf_p_rtmsg[] = { 488 { .off_in = _IN(rtm_family), .off_out = _OUT(rtm_family), .cb = nlf_get_u8 }, 489 { .off_in = _IN(rtm_dst_len), .off_out = _OUT(rtm_dst_len), .cb = nlf_get_u8 }, 490 { .off_in = _IN(rtm_protocol), .off_out = _OUT(rtm_protocol), .cb = nlf_get_u8 }, 491 { .off_in = _IN(rtm_type), .off_out = _OUT(rtm_type), .cb = nlf_get_u8 }, 492 { .off_in = _IN(rtm_flags), .off_out = _OUT(rtm_flags), .cb = nlf_get_u32 }, 493 }; 494 #undef _IN 495 #undef _OUT 496 NL_DECLARE_PARSER(rtm_parser, struct rtmsg, nlf_p_rtmsg, nla_p_rtmsg); 497 498 struct netlink_walkargs { 499 struct nl_writer *nw; 500 struct route_nhop_data rnd; 501 struct nlmsghdr hdr; 502 struct nlpcb *nlp; 503 uint32_t fibnum; 504 int family; 505 int error; 506 int count; 507 int dumped; 508 int dumped_tables; 509 }; 510 511 static int 512 dump_rtentry(struct rtentry *rt, void *_arg) 513 { 514 struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg; 515 int error; 516 517 wa->count++; 518 if (wa->error != 0) 519 return (0); 520 if (!rt_is_exportable(rt, nlp_get_cred(wa->nlp))) 521 return (0); 522 wa->dumped++; 523 524 rt_get_rnd(rt, &wa->rnd); 525 526 error = dump_px(wa->fibnum, &wa->hdr, rt, &wa->rnd, wa->nw); 527 528 IF_DEBUG_LEVEL(LOG_DEBUG3) { 529 char rtbuf[INET6_ADDRSTRLEN + 5]; 530 FIB_LOG(LOG_DEBUG3, wa->fibnum, wa->family, 531 "Dump %s, offset %u, error %d", 532 rt_print_buf(rt, rtbuf, sizeof(rtbuf)), 533 wa->nw->offset, error); 534 } 535 wa->error = error; 536 537 return (0); 538 } 539 540 static void 541 dump_rtable_one(struct netlink_walkargs *wa, uint32_t fibnum, int family) 542 { 543 FIB_LOG(LOG_DEBUG2, fibnum, family, "Start dump"); 544 wa->count = 0; 545 wa->dumped = 0; 546 547 rib_walk(fibnum, family, false, dump_rtentry, wa); 548 549 wa->dumped_tables++; 550 551 FIB_LOG(LOG_DEBUG2, fibnum, family, "End dump, iterated %d dumped %d", 552 wa->count, wa->dumped); 553 NL_LOG(LOG_DEBUG2, "Current offset: %d", wa->nw->offset); 554 } 555 556 static int 557 dump_rtable_fib(struct netlink_walkargs *wa, uint32_t fibnum, int family) 558 { 559 wa->fibnum = fibnum; 560 561 if (family == AF_UNSPEC) { 562 for (int i = 0; i < AF_MAX; i++) { 563 if (rt_tables_get_rnh(fibnum, i) != 0) { 564 wa->family = i; 565 dump_rtable_one(wa, fibnum, i); 566 if (wa->error != 0) 567 break; 568 } 569 } 570 } else { 571 if (rt_tables_get_rnh(fibnum, family) != 0) { 572 wa->family = family; 573 dump_rtable_one(wa, fibnum, family); 574 } 575 } 576 577 return (wa->error); 578 } 579 580 static int 581 handle_rtm_getroute(struct nlpcb *nlp, struct nl_parsed_route *attrs, 582 struct nlmsghdr *hdr, struct nl_pstate *npt) 583 { 584 RIB_RLOCK_TRACKER; 585 struct rib_head *rnh; 586 const struct rtentry *rt; 587 struct route_nhop_data rnd; 588 uint32_t fibnum = attrs->rta_table; 589 sa_family_t family = attrs->rtm_family; 590 591 if (attrs->rta_dst == NULL) { 592 NLMSG_REPORT_ERR_MSG(npt, "No RTA_DST supplied"); 593 return (EINVAL); 594 } 595 596 rnh = rt_tables_get_rnh(fibnum, family); 597 if (rnh == NULL) 598 return (EAFNOSUPPORT); 599 600 RIB_RLOCK(rnh); 601 602 struct sockaddr *dst = attrs->rta_dst; 603 604 if (attrs->rtm_flags & RTM_F_PREFIX) 605 rt = rib_lookup_prefix_plen(rnh, dst, attrs->rtm_dst_len, &rnd); 606 else 607 rt = (const struct rtentry *)rnh->rnh_matchaddr(dst, &rnh->head); 608 if (rt == NULL) { 609 RIB_RUNLOCK(rnh); 610 return (ESRCH); 611 } 612 613 rt_get_rnd(rt, &rnd); 614 rnd.rnd_nhop = nhop_select_func(rnd.rnd_nhop, 0); 615 616 RIB_RUNLOCK(rnh); 617 618 if (!rt_is_exportable(rt, nlp_get_cred(nlp))) 619 return (ESRCH); 620 621 IF_DEBUG_LEVEL(LOG_DEBUG2) { 622 char rtbuf[NHOP_PRINT_BUFSIZE] __unused, nhbuf[NHOP_PRINT_BUFSIZE] __unused; 623 FIB_LOG(LOG_DEBUG2, fibnum, family, "getroute completed: got %s for %s", 624 nhop_print_buf_any(rnd.rnd_nhop, nhbuf, sizeof(nhbuf)), 625 rt_print_buf(rt, rtbuf, sizeof(rtbuf))); 626 } 627 628 hdr->nlmsg_type = NL_RTM_NEWROUTE; 629 dump_px(fibnum, hdr, rt, &rnd, npt->nw); 630 631 return (0); 632 } 633 634 static int 635 handle_rtm_dump(struct nlpcb *nlp, uint32_t fibnum, int family, 636 struct nlmsghdr *hdr, struct nl_writer *nw) 637 { 638 struct netlink_walkargs wa = { 639 .nlp = nlp, 640 .nw = nw, 641 .hdr.nlmsg_pid = hdr->nlmsg_pid, 642 .hdr.nlmsg_seq = hdr->nlmsg_seq, 643 .hdr.nlmsg_type = NL_RTM_NEWROUTE, 644 .hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI, 645 }; 646 647 if (fibnum == RT_TABLE_UNSPEC) { 648 for (int i = 0; i < V_rt_numfibs; i++) { 649 dump_rtable_fib(&wa, fibnum, family); 650 if (wa.error != 0) 651 break; 652 } 653 } else 654 dump_rtable_fib(&wa, fibnum, family); 655 656 if (wa.error == 0 && wa.dumped_tables == 0) { 657 FIB_LOG(LOG_DEBUG, fibnum, family, "incorrect fibnum/family"); 658 wa.error = ESRCH; 659 // How do we propagate it? 660 } 661 662 if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) { 663 NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); 664 return (ENOMEM); 665 } 666 667 return (wa.error); 668 } 669 670 static struct nhop_object * 671 finalize_nhop(struct nhop_object *nh, const struct sockaddr *dst, int *perror) 672 { 673 /* 674 * The following MUST be filled: 675 * nh_ifp, nh_ifa, nh_gw 676 */ 677 if (nh->gw_sa.sa_family == 0) { 678 /* 679 * Empty gateway. Can be direct route with RTA_OIF set. 680 */ 681 if (nh->nh_ifp != NULL) 682 nhop_set_direct_gw(nh, nh->nh_ifp); 683 else { 684 NL_LOG(LOG_DEBUG, "empty gateway and interface, skipping"); 685 *perror = EINVAL; 686 return (NULL); 687 } 688 /* Both nh_ifp and gateway are set */ 689 } else { 690 /* Gateway is set up, we can derive ifp if not set */ 691 if (nh->nh_ifp == NULL) { 692 uint32_t fibnum = nhop_get_fibnum(nh); 693 uint32_t flags = 0; 694 695 if (nh->nh_flags & NHF_GATEWAY) 696 flags = RTF_GATEWAY; 697 else if (nh->nh_flags & NHF_HOST) 698 flags = RTF_HOST; 699 700 struct ifaddr *ifa = ifa_ifwithroute(flags, dst, &nh->gw_sa, fibnum); 701 if (ifa == NULL) { 702 NL_LOG(LOG_DEBUG, "Unable to determine ifp, skipping"); 703 *perror = EINVAL; 704 return (NULL); 705 } 706 nhop_set_transmit_ifp(nh, ifa->ifa_ifp); 707 } 708 } 709 /* Both nh_ifp and gateway are set */ 710 if (nh->nh_ifa == NULL) { 711 const struct sockaddr *gw_sa = &nh->gw_sa; 712 713 if (gw_sa->sa_family != dst->sa_family) { 714 /* 715 * Use dst as the target for determining the default 716 * preferred ifa IF 717 * 1) the gateway is link-level (e.g. direct route) 718 * 2) the gateway family is different (e.g. IPv4 over IPv6). 719 */ 720 gw_sa = dst; 721 } 722 723 struct ifaddr *ifa = ifaof_ifpforaddr(gw_sa, nh->nh_ifp); 724 if (ifa == NULL) { 725 NL_LOG(LOG_DEBUG, "Unable to determine ifa, skipping"); 726 *perror = EINVAL; 727 return (NULL); 728 } 729 nhop_set_src(nh, ifa); 730 } 731 732 return (nhop_get_nhop(nh, perror)); 733 } 734 735 static int 736 get_pxflag(const struct nl_parsed_route *attrs) 737 { 738 int pxflag = 0; 739 switch (attrs->rtm_family) { 740 case AF_INET: 741 if (attrs->rtm_dst_len == 32) 742 pxflag = NHF_HOST; 743 else if (attrs->rtm_dst_len == 0) 744 pxflag = NHF_DEFAULT; 745 break; 746 case AF_INET6: 747 if (attrs->rtm_dst_len == 128) 748 pxflag = NHF_HOST; 749 else if (attrs->rtm_dst_len == 0) 750 pxflag = NHF_DEFAULT; 751 break; 752 } 753 754 return (pxflag); 755 } 756 757 static int 758 get_op_flags(int nlm_flags) 759 { 760 int op_flags = 0; 761 762 op_flags |= (nlm_flags & NLM_F_REPLACE) ? RTM_F_REPLACE : 0; 763 op_flags |= (nlm_flags & NLM_F_EXCL) ? RTM_F_EXCL : 0; 764 op_flags |= (nlm_flags & NLM_F_CREATE) ? RTM_F_CREATE : 0; 765 op_flags |= (nlm_flags & NLM_F_APPEND) ? RTM_F_APPEND : 0; 766 767 return (op_flags); 768 } 769 770 #ifdef ROUTE_MPATH 771 static int 772 create_nexthop_one(struct nl_parsed_route *attrs, struct rta_mpath_nh *mpnh, 773 struct nl_pstate *npt, struct nhop_object **pnh) 774 { 775 int error; 776 777 if (mpnh->gw == NULL) 778 return (EINVAL); 779 780 struct nhop_object *nh = nhop_alloc(attrs->rta_table, attrs->rtm_family); 781 if (nh == NULL) 782 return (ENOMEM); 783 784 error = nl_set_nexthop_gw(nh, mpnh->gw, mpnh->ifp, npt); 785 if (error != 0) { 786 nhop_free(nh); 787 return (error); 788 } 789 if (mpnh->ifp != NULL) 790 nhop_set_transmit_ifp(nh, mpnh->ifp); 791 nhop_set_pxtype_flag(nh, get_pxflag(attrs)); 792 nhop_set_rtflags(nh, attrs->rta_rtflags); 793 if (attrs->rtm_protocol > RTPROT_STATIC) 794 nhop_set_origin(nh, attrs->rtm_protocol); 795 796 *pnh = finalize_nhop(nh, attrs->rta_dst, &error); 797 798 return (error); 799 } 800 #endif 801 802 static struct nhop_object * 803 create_nexthop_from_attrs(struct nl_parsed_route *attrs, 804 struct nl_pstate *npt, int *perror) 805 { 806 struct nhop_object *nh = NULL; 807 int error = 0; 808 809 if (attrs->rta_multipath != NULL) { 810 #ifdef ROUTE_MPATH 811 /* Multipath w/o explicit nexthops */ 812 int num_nhops = attrs->rta_multipath->num_nhops; 813 struct weightened_nhop *wn = npt_alloc(npt, sizeof(*wn) * num_nhops); 814 815 for (int i = 0; i < num_nhops; i++) { 816 struct rta_mpath_nh *mpnh = &attrs->rta_multipath->nhops[i]; 817 818 error = create_nexthop_one(attrs, mpnh, npt, &wn[i].nh); 819 if (error != 0) { 820 for (int j = 0; j < i; j++) 821 nhop_free(wn[j].nh); 822 break; 823 } 824 wn[i].weight = mpnh->rtnh_weight > 0 ? mpnh->rtnh_weight : 1; 825 } 826 if (error == 0) { 827 struct rib_head *rh = nhop_get_rh(wn[0].nh); 828 struct nhgrp_object *nhg; 829 830 nhg = nhgrp_alloc(rh->rib_fibnum, rh->rib_family, 831 wn, num_nhops, perror); 832 if (nhg != NULL) { 833 if (attrs->rtm_protocol > RTPROT_STATIC) 834 nhgrp_set_origin(nhg, attrs->rtm_protocol); 835 nhg = nhgrp_get_nhgrp(nhg, perror); 836 } 837 for (int i = 0; i < num_nhops; i++) 838 nhop_free(wn[i].nh); 839 if (nhg != NULL) 840 return ((struct nhop_object *)nhg); 841 error = *perror; 842 } 843 #else 844 error = ENOTSUP; 845 #endif 846 *perror = error; 847 } else { 848 nh = nhop_alloc(attrs->rta_table, attrs->rtm_family); 849 if (nh == NULL) { 850 *perror = ENOMEM; 851 return (NULL); 852 } 853 if (attrs->rta_gw != NULL) { 854 *perror = nl_set_nexthop_gw(nh, attrs->rta_gw, attrs->rta_oif, npt); 855 if (*perror != 0) { 856 nhop_free(nh); 857 return (NULL); 858 } 859 } 860 if (attrs->rta_oif != NULL) 861 nhop_set_transmit_ifp(nh, attrs->rta_oif); 862 if (attrs->rtax_mtu != 0) 863 nhop_set_mtu(nh, attrs->rtax_mtu, true); 864 if (attrs->rta_rtflags & RTF_BROADCAST) 865 nhop_set_broadcast(nh, true); 866 if (attrs->rtm_protocol > RTPROT_STATIC) 867 nhop_set_origin(nh, attrs->rtm_protocol); 868 nhop_set_pxtype_flag(nh, get_pxflag(attrs)); 869 nhop_set_rtflags(nh, attrs->rta_rtflags); 870 871 switch (attrs->rtm_type) { 872 case RTN_UNICAST: 873 break; 874 case RTN_BLACKHOLE: 875 nhop_set_blackhole(nh, RTF_BLACKHOLE); 876 break; 877 case RTN_PROHIBIT: 878 case RTN_UNREACHABLE: 879 nhop_set_blackhole(nh, RTF_REJECT); 880 break; 881 /* TODO: return ENOTSUP for other types if strict option is set */ 882 } 883 884 nh = finalize_nhop(nh, attrs->rta_dst, perror); 885 } 886 887 return (nh); 888 } 889 890 static int 891 rtnl_handle_newroute(struct nlmsghdr *hdr, struct nlpcb *nlp, 892 struct nl_pstate *npt) 893 { 894 struct rib_cmd_info rc = {}; 895 struct nhop_object *nh = NULL; 896 int error; 897 898 struct nl_parsed_route attrs = {}; 899 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); 900 if (error != 0) 901 return (error); 902 903 /* Check if we have enough data */ 904 if (attrs.rta_dst == NULL) { 905 NL_LOG(LOG_DEBUG, "missing RTA_DST"); 906 return (EINVAL); 907 } 908 909 if (attrs.rta_table >= V_rt_numfibs) { 910 NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); 911 return (EINVAL); 912 } 913 914 if (attrs.rta_nh_id != 0) { 915 /* Referenced uindex */ 916 int pxflag = get_pxflag(&attrs); 917 nh = nl_find_nhop(attrs.rta_table, attrs.rtm_family, attrs.rta_nh_id, 918 pxflag, &error); 919 if (error != 0) 920 return (error); 921 } else { 922 nh = create_nexthop_from_attrs(&attrs, npt, &error); 923 if (error != 0) { 924 NL_LOG(LOG_DEBUG, "Error creating nexthop"); 925 return (error); 926 } 927 } 928 929 if (!NH_IS_NHGRP(nh) && attrs.rta_weight == 0) 930 attrs.rta_weight = RT_DEFAULT_WEIGHT; 931 struct route_nhop_data rnd = { .rnd_nhop = nh, .rnd_weight = attrs.rta_weight }; 932 int op_flags = get_op_flags(hdr->nlmsg_flags); 933 934 error = rib_add_route_px(attrs.rta_table, attrs.rta_dst, attrs.rtm_dst_len, 935 &rnd, op_flags, &rc); 936 if (error == 0) 937 report_operation(attrs.rta_table, &rc, nlp, hdr); 938 return (error); 939 } 940 941 static int 942 path_match_func(const struct rtentry *rt, const struct nhop_object *nh, void *_data) 943 { 944 struct nl_parsed_route *attrs = (struct nl_parsed_route *)_data; 945 946 if ((attrs->rta_gw != NULL) && !rib_match_gw(rt, nh, attrs->rta_gw)) 947 return (0); 948 949 if ((attrs->rta_oif != NULL) && (attrs->rta_oif != nh->nh_ifp)) 950 return (0); 951 952 return (1); 953 } 954 955 static int 956 rtnl_handle_delroute(struct nlmsghdr *hdr, struct nlpcb *nlp, 957 struct nl_pstate *npt) 958 { 959 struct rib_cmd_info rc; 960 int error; 961 962 struct nl_parsed_route attrs = {}; 963 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); 964 if (error != 0) 965 return (error); 966 967 if (attrs.rta_dst == NULL) { 968 NLMSG_REPORT_ERR_MSG(npt, "RTA_DST is not set"); 969 return (ESRCH); 970 } 971 972 if (attrs.rta_table >= V_rt_numfibs) { 973 NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); 974 return (EINVAL); 975 } 976 977 error = rib_del_route_px(attrs.rta_table, attrs.rta_dst, 978 attrs.rtm_dst_len, path_match_func, &attrs, 0, &rc); 979 if (error == 0) 980 report_operation(attrs.rta_table, &rc, nlp, hdr); 981 return (error); 982 } 983 984 static int 985 rtnl_handle_getroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) 986 { 987 int error; 988 989 struct nl_parsed_route attrs = {}; 990 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); 991 if (error != 0) 992 return (error); 993 994 if (attrs.rta_table >= V_rt_numfibs) { 995 NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); 996 return (EINVAL); 997 } 998 999 if (hdr->nlmsg_flags & NLM_F_DUMP) 1000 error = handle_rtm_dump(nlp, attrs.rta_table, attrs.rtm_family, hdr, npt->nw); 1001 else 1002 error = handle_rtm_getroute(nlp, &attrs, hdr, npt); 1003 1004 return (error); 1005 } 1006 1007 void 1008 rtnl_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc) 1009 { 1010 struct nl_writer nw = {}; 1011 int family, nlm_flags = 0; 1012 1013 family = rt_get_family(rc->rc_rt); 1014 1015 /* XXX: check if there are active listeners first */ 1016 1017 /* TODO: consider passing PID/type/seq */ 1018 switch (rc->rc_cmd) { 1019 case RTM_ADD: 1020 nlm_flags = NLM_F_EXCL | NLM_F_CREATE; 1021 break; 1022 case RTM_CHANGE: 1023 nlm_flags = NLM_F_REPLACE; 1024 break; 1025 case RTM_DELETE: 1026 nlm_flags = 0; 1027 break; 1028 } 1029 IF_DEBUG_LEVEL(LOG_DEBUG2) { 1030 char rtbuf[NHOP_PRINT_BUFSIZE] __unused; 1031 FIB_LOG(LOG_DEBUG2, fibnum, family, 1032 "received event %s for %s / nlm_flags=%X", 1033 rib_print_cmd(rc->rc_cmd), 1034 rt_print_buf(rc->rc_rt, rtbuf, sizeof(rtbuf)), 1035 nlm_flags); 1036 } 1037 1038 struct nlmsghdr hdr = { 1039 .nlmsg_flags = nlm_flags, 1040 .nlmsg_type = get_rtmsg_type_from_rtsock(rc->rc_cmd), 1041 }; 1042 1043 struct route_nhop_data rnd = { 1044 .rnd_nhop = rc_get_nhop(rc), 1045 .rnd_weight = rc->rc_nh_weight, 1046 }; 1047 1048 uint32_t group_id = family_to_group(family); 1049 if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) { 1050 NL_LOG(LOG_DEBUG, "error allocating event buffer"); 1051 return; 1052 } 1053 1054 dump_px(fibnum, &hdr, rc->rc_rt, &rnd, &nw); 1055 nlmsg_flush(&nw); 1056 } 1057 1058 static const struct rtnl_cmd_handler cmd_handlers[] = { 1059 { 1060 .cmd = NL_RTM_GETROUTE, 1061 .name = "RTM_GETROUTE", 1062 .cb = &rtnl_handle_getroute, 1063 .flags = RTNL_F_ALLOW_NONVNET_JAIL, 1064 }, 1065 { 1066 .cmd = NL_RTM_DELROUTE, 1067 .name = "RTM_DELROUTE", 1068 .cb = &rtnl_handle_delroute, 1069 .priv = PRIV_NET_ROUTE, 1070 }, 1071 { 1072 .cmd = NL_RTM_NEWROUTE, 1073 .name = "RTM_NEWROUTE", 1074 .cb = &rtnl_handle_newroute, 1075 .priv = PRIV_NET_ROUTE, 1076 } 1077 }; 1078 1079 static const struct nlhdr_parser *all_parsers[] = {&mpath_parser, &metrics_parser, &rtm_parser}; 1080 1081 void 1082 rtnl_routes_init(void) 1083 { 1084 NL_VERIFY_PARSERS(all_parsers); 1085 rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers)); 1086 } 1087