1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2021 Ng Peng Nam Sean 5 * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 #include "opt_route.h" 34 #include <sys/types.h> 35 #include <sys/malloc.h> 36 #include <sys/rmlock.h> 37 #include <sys/socket.h> 38 39 #include <net/if.h> 40 #include <net/route.h> 41 #include <net/route/nhop.h> 42 #include <net/route/route_ctl.h> 43 #include <net/route/route_var.h> 44 #include <netinet6/scope6_var.h> 45 #include <netlink/netlink.h> 46 #include <netlink/netlink_ctl.h> 47 #include <netlink/netlink_route.h> 48 #include <netlink/route/route_var.h> 49 50 #define DEBUG_MOD_NAME nl_route 51 #define DEBUG_MAX_LEVEL LOG_DEBUG3 52 #include <netlink/netlink_debug.h> 53 _DECLARE_DEBUG(LOG_DEBUG); 54 55 static unsigned char 56 get_rtm_type(const struct nhop_object *nh) 57 { 58 int nh_flags = nh->nh_flags; 59 60 /* Use the fact that nhg runtime flags are only NHF_MULTIPATH */ 61 if (nh_flags & NHF_BLACKHOLE) 62 return (RTN_BLACKHOLE); 63 else if (nh_flags & NHF_REJECT) 64 return (RTN_PROHIBIT); 65 return (RTN_UNICAST); 66 } 67 68 static uint8_t 69 nl_get_rtm_protocol(const struct nhop_object *nh) 70 { 71 #ifdef ROUTE_MPATH 72 if (NH_IS_NHGRP(nh)) { 73 const struct nhgrp_object *nhg = (const struct nhgrp_object *)nh; 74 uint8_t origin = nhgrp_get_origin(nhg); 75 if (origin != RTPROT_UNSPEC) 76 return (origin); 77 nh = nhg->nhops[0]; 78 } 79 #endif 80 uint8_t origin = nhop_get_origin(nh); 81 if (origin != RTPROT_UNSPEC) 82 return (origin); 83 /* TODO: remove guesswork once all kernel users fill in origin */ 84 int rt_flags = nhop_get_rtflags(nh); 85 if (rt_flags & RTF_PROTO1) 86 return (RTPROT_ZEBRA); 87 if (rt_flags & RTF_STATIC) 88 return (RTPROT_STATIC); 89 return (RTPROT_KERNEL); 90 } 91 92 static int 93 get_rtmsg_type_from_rtsock(int cmd) 94 { 95 switch (cmd) { 96 case RTM_ADD: 97 case RTM_CHANGE: 98 case RTM_GET: 99 return NL_RTM_NEWROUTE; 100 case RTM_DELETE: 101 return NL_RTM_DELROUTE; 102 } 103 104 return (0); 105 } 106 107 /* 108 * fibnum heuristics 109 * 110 * if (dump && rtm_table == 0 && !rta_table) RT_ALL_FIBS 111 * msg rtm_table RTA_TABLE result 112 * RTM_GETROUTE/dump 0 - RT_ALL_FIBS 113 * RTM_GETROUTE/dump 1 - 1 114 * RTM_GETROUTE/get 0 - 0 115 * 116 */ 117 118 static struct nhop_object * 119 rc_get_nhop(const struct rib_cmd_info *rc) 120 { 121 return ((rc->rc_cmd == RTM_DELETE) ? rc->rc_nh_old : rc->rc_nh_new); 122 } 123 124 static void 125 dump_rc_nhop_gw(struct nl_writer *nw, const struct nhop_object *nh) 126 { 127 #ifdef INET6 128 int upper_family; 129 #endif 130 131 switch (nhop_get_neigh_family(nh)) { 132 case AF_LINK: 133 /* onlink prefix, skip */ 134 break; 135 case AF_INET: 136 nlattr_add(nw, NL_RTA_GATEWAY, 4, &nh->gw4_sa.sin_addr); 137 break; 138 #ifdef INET6 139 case AF_INET6: 140 upper_family = nhop_get_upper_family(nh); 141 if (upper_family == AF_INET6) { 142 struct in6_addr gw6 = nh->gw6_sa.sin6_addr; 143 in6_clearscope(&gw6); 144 145 nlattr_add(nw, NL_RTA_GATEWAY, 16, &gw6); 146 } else if (upper_family == AF_INET) { 147 /* IPv4 over IPv6 */ 148 struct in6_addr gw6 = nh->gw6_sa.sin6_addr; 149 in6_clearscope(&gw6); 150 151 char buf[20]; 152 struct rtvia *via = (struct rtvia *)&buf[0]; 153 via->rtvia_family = AF_INET6; 154 memcpy(via->rtvia_addr, &gw6, 16); 155 nlattr_add(nw, NL_RTA_VIA, 17, via); 156 } 157 break; 158 #endif 159 } 160 } 161 162 static void 163 dump_rc_nhop_mtu(struct nl_writer *nw, const struct nhop_object *nh) 164 { 165 int nla_len = sizeof(struct nlattr) * 2 + sizeof(uint32_t); 166 struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr); 167 168 if (nla == NULL) 169 return; 170 nla->nla_type = NL_RTA_METRICS; 171 nla->nla_len = nla_len; 172 nla++; 173 nla->nla_type = NL_RTAX_MTU; 174 nla->nla_len = sizeof(struct nlattr) + sizeof(uint32_t); 175 *((uint32_t *)(nla + 1)) = nh->nh_mtu; 176 } 177 178 #ifdef ROUTE_MPATH 179 static void 180 dump_rc_nhg(struct nl_writer *nw, const struct nhgrp_object *nhg, struct rtmsg *rtm) 181 { 182 uint32_t uidx = nhgrp_get_uidx(nhg); 183 uint32_t num_nhops; 184 const struct weightened_nhop *wn = nhgrp_get_nhops(nhg, &num_nhops); 185 uint32_t base_rtflags = nhop_get_rtflags(wn[0].nh); 186 187 if (uidx != 0) 188 nlattr_add_u32(nw, NL_RTA_NH_ID, uidx); 189 nlattr_add_u32(nw, NL_RTA_KNH_ID, nhgrp_get_idx(nhg)); 190 191 nlattr_add_u32(nw, NL_RTA_RTFLAGS, base_rtflags); 192 int off = nlattr_add_nested(nw, NL_RTA_MULTIPATH); 193 if (off == 0) 194 return; 195 196 for (int i = 0; i < num_nhops; i++) { 197 int nh_off = nlattr_save_offset(nw); 198 struct rtnexthop *rtnh = nlmsg_reserve_object(nw, struct rtnexthop); 199 if (rtnh == NULL) 200 return; 201 rtnh->rtnh_flags = 0; 202 rtnh->rtnh_ifindex = wn[i].nh->nh_ifp->if_index; 203 rtnh->rtnh_hops = wn[i].weight; 204 dump_rc_nhop_gw(nw, wn[i].nh); 205 uint32_t rtflags = nhop_get_rtflags(wn[i].nh); 206 if (rtflags != base_rtflags) 207 nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags); 208 if (rtflags & RTF_FIXEDMTU) 209 dump_rc_nhop_mtu(nw, wn[i].nh); 210 rtnh = nlattr_restore_offset(nw, nh_off, struct rtnexthop); 211 /* 212 * nlattr_add() allocates 4-byte aligned storage, no need to aligh 213 * length here 214 * */ 215 rtnh->rtnh_len = nlattr_save_offset(nw) - nh_off; 216 } 217 nlattr_set_len(nw, off); 218 } 219 #endif 220 221 static void 222 dump_rc_nhop(struct nl_writer *nw, const struct route_nhop_data *rnd, struct rtmsg *rtm) 223 { 224 #ifdef ROUTE_MPATH 225 if (NH_IS_NHGRP(rnd->rnd_nhop)) { 226 dump_rc_nhg(nw, rnd->rnd_nhgrp, rtm); 227 return; 228 } 229 #endif 230 const struct nhop_object *nh = rnd->rnd_nhop; 231 uint32_t rtflags = nhop_get_rtflags(nh); 232 233 /* 234 * IPv4 over IPv6 235 * ('RTA_VIA', {'family': 10, 'addr': 'fe80::20c:29ff:fe67:2dd'}), ('RTA_OIF', 2), 236 * IPv4 w/ gw 237 * ('RTA_GATEWAY', '172.16.107.131'), ('RTA_OIF', 2)], 238 * Direct route: 239 * ('RTA_OIF', 2) 240 */ 241 if (nh->nh_flags & NHF_GATEWAY) 242 dump_rc_nhop_gw(nw, nh); 243 244 uint32_t uidx = nhop_get_uidx(nh); 245 if (uidx != 0) 246 nlattr_add_u32(nw, NL_RTA_NH_ID, uidx); 247 nlattr_add_u32(nw, NL_RTA_KNH_ID, nhop_get_idx(nh)); 248 nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags); 249 250 if (rtflags & RTF_FIXEDMTU) 251 dump_rc_nhop_mtu(nw, nh); 252 uint32_t nh_expire = nhop_get_expire(nh); 253 if (nh_expire > 0) 254 nlattr_add_u32(nw, NL_RTA_EXPIRES, nh_expire - time_uptime); 255 256 /* In any case, fill outgoing interface */ 257 nlattr_add_u32(nw, NL_RTA_OIF, nh->nh_ifp->if_index); 258 259 if (rnd->rnd_weight != RT_DEFAULT_WEIGHT) 260 nlattr_add_u32(nw, NL_RTA_WEIGHT, rnd->rnd_weight); 261 } 262 263 /* 264 * Dumps output from a rib command into an rtmsg 265 */ 266 267 static int 268 dump_px(uint32_t fibnum, const struct nlmsghdr *hdr, 269 const struct rtentry *rt, struct route_nhop_data *rnd, 270 struct nl_writer *nw) 271 { 272 struct rtmsg *rtm; 273 int error = 0; 274 275 NET_EPOCH_ASSERT(); 276 277 if (!nlmsg_reply(nw, hdr, sizeof(struct rtmsg))) 278 goto enomem; 279 280 int family = rt_get_family(rt); 281 int rtm_off = nlattr_save_offset(nw); 282 rtm = nlmsg_reserve_object(nw, struct rtmsg); 283 rtm->rtm_family = family; 284 rtm->rtm_dst_len = 0; 285 rtm->rtm_src_len = 0; 286 rtm->rtm_tos = 0; 287 if (fibnum < 255) 288 rtm->rtm_table = (unsigned char)fibnum; 289 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 290 rtm->rtm_protocol = nl_get_rtm_protocol(rnd->rnd_nhop); 291 rtm->rtm_type = get_rtm_type(rnd->rnd_nhop); 292 293 nlattr_add_u32(nw, NL_RTA_TABLE, fibnum); 294 295 int plen = 0; 296 #if defined(INET) || defined(INET6) 297 uint32_t scopeid; 298 #endif 299 switch (family) { 300 #ifdef INET 301 case AF_INET: 302 { 303 struct in_addr addr; 304 rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid); 305 nlattr_add(nw, NL_RTA_DST, 4, &addr); 306 break; 307 } 308 #endif 309 #ifdef INET6 310 case AF_INET6: 311 { 312 struct in6_addr addr; 313 rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid); 314 nlattr_add(nw, NL_RTA_DST, 16, &addr); 315 break; 316 } 317 #endif 318 default: 319 FIB_LOG(LOG_NOTICE, fibnum, family, "unsupported rt family: %d", family); 320 error = EAFNOSUPPORT; 321 goto flush; 322 } 323 324 rtm = nlattr_restore_offset(nw, rtm_off, struct rtmsg); 325 if (plen > 0) 326 rtm->rtm_dst_len = plen; 327 dump_rc_nhop(nw, rnd, rtm); 328 329 if (nlmsg_end(nw)) 330 return (0); 331 enomem: 332 error = ENOMEM; 333 flush: 334 nlmsg_abort(nw); 335 return (error); 336 } 337 338 static int 339 family_to_group(int family) 340 { 341 switch (family) { 342 case AF_INET: 343 return (RTNLGRP_IPV4_ROUTE); 344 case AF_INET6: 345 return (RTNLGRP_IPV6_ROUTE); 346 } 347 return (0); 348 } 349 350 351 static void 352 report_operation(uint32_t fibnum, struct rib_cmd_info *rc, 353 struct nlpcb *nlp, struct nlmsghdr *hdr) 354 { 355 struct nl_writer nw = {}; 356 uint32_t group_id = family_to_group(rt_get_family(rc->rc_rt)); 357 358 if (nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) { 359 struct route_nhop_data rnd = { 360 .rnd_nhop = rc_get_nhop(rc), 361 .rnd_weight = rc->rc_nh_weight, 362 }; 363 hdr->nlmsg_flags &= ~(NLM_F_REPLACE | NLM_F_CREATE); 364 hdr->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_APPEND); 365 switch (rc->rc_cmd) { 366 case RTM_ADD: 367 hdr->nlmsg_type = NL_RTM_NEWROUTE; 368 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL; 369 break; 370 case RTM_CHANGE: 371 hdr->nlmsg_type = NL_RTM_NEWROUTE; 372 hdr->nlmsg_flags |= NLM_F_REPLACE; 373 break; 374 case RTM_DELETE: 375 hdr->nlmsg_type = NL_RTM_DELROUTE; 376 break; 377 } 378 dump_px(fibnum, hdr, rc->rc_rt, &rnd, &nw); 379 nlmsg_flush(&nw); 380 } 381 382 rtsock_callback_p->route_f(fibnum, rc); 383 } 384 385 struct rta_mpath_nh { 386 struct sockaddr *gw; 387 struct ifnet *ifp; 388 uint8_t rtnh_flags; 389 uint8_t rtnh_weight; 390 }; 391 392 #define _IN(_field) offsetof(struct rtnexthop, _field) 393 #define _OUT(_field) offsetof(struct rta_mpath_nh, _field) 394 const static struct nlattr_parser nla_p_rtnh[] = { 395 { .type = NL_RTA_GATEWAY, .off = _OUT(gw), .cb = nlattr_get_ip }, 396 { .type = NL_RTA_VIA, .off = _OUT(gw), .cb = nlattr_get_ipvia }, 397 }; 398 const static struct nlfield_parser nlf_p_rtnh[] = { 399 { .off_in = _IN(rtnh_flags), .off_out = _OUT(rtnh_flags), .cb = nlf_get_u8 }, 400 { .off_in = _IN(rtnh_hops), .off_out = _OUT(rtnh_weight), .cb = nlf_get_u8 }, 401 { .off_in = _IN(rtnh_ifindex), .off_out = _OUT(ifp), .cb = nlf_get_ifpz }, 402 }; 403 #undef _IN 404 #undef _OUT 405 NL_DECLARE_PARSER(mpath_parser, struct rtnexthop, nlf_p_rtnh, nla_p_rtnh); 406 407 struct rta_mpath { 408 int num_nhops; 409 struct rta_mpath_nh nhops[0]; 410 }; 411 412 static int 413 nlattr_get_multipath(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) 414 { 415 int data_len = nla->nla_len - sizeof(struct nlattr); 416 struct rtnexthop *rtnh; 417 418 int max_nhops = data_len / sizeof(struct rtnexthop); 419 420 struct rta_mpath *mp = npt_alloc(npt, (max_nhops + 2) * sizeof(struct rta_mpath_nh)); 421 mp->num_nhops = 0; 422 423 for (rtnh = (struct rtnexthop *)(nla + 1); data_len > 0; ) { 424 struct rta_mpath_nh *mpnh = &mp->nhops[mp->num_nhops++]; 425 426 int error = nl_parse_header(rtnh, rtnh->rtnh_len, &mpath_parser, 427 npt, mpnh); 428 if (error != 0) { 429 NLMSG_REPORT_ERR_MSG(npt, "RTA_MULTIPATH: nexhop %d: parse failed", 430 mp->num_nhops - 1); 431 return (error); 432 } 433 434 int len = NL_ITEM_ALIGN(rtnh->rtnh_len); 435 data_len -= len; 436 rtnh = (struct rtnexthop *)((char *)rtnh + len); 437 } 438 if (data_len != 0 || mp->num_nhops == 0) { 439 NLMSG_REPORT_ERR_MSG(npt, "invalid RTA_MULTIPATH attr"); 440 return (EINVAL); 441 } 442 443 *((struct rta_mpath **)target) = mp; 444 return (0); 445 } 446 447 448 struct nl_parsed_route { 449 struct sockaddr *rta_dst; 450 struct sockaddr *rta_gw; 451 struct ifnet *rta_oif; 452 struct rta_mpath *rta_multipath; 453 uint32_t rta_table; 454 uint32_t rta_rtflags; 455 uint32_t rta_nh_id; 456 uint32_t rta_weight; 457 uint32_t rtax_mtu; 458 uint8_t rtm_family; 459 uint8_t rtm_dst_len; 460 uint8_t rtm_protocol; 461 uint8_t rtm_type; 462 }; 463 464 #define _IN(_field) offsetof(struct rtmsg, _field) 465 #define _OUT(_field) offsetof(struct nl_parsed_route, _field) 466 static struct nlattr_parser nla_p_rtmetrics[] = { 467 { .type = NL_RTAX_MTU, .off = _OUT(rtax_mtu), .cb = nlattr_get_uint32 }, 468 }; 469 NL_DECLARE_ATTR_PARSER(metrics_parser, nla_p_rtmetrics); 470 471 static const struct nlattr_parser nla_p_rtmsg[] = { 472 { .type = NL_RTA_DST, .off = _OUT(rta_dst), .cb = nlattr_get_ip }, 473 { .type = NL_RTA_OIF, .off = _OUT(rta_oif), .cb = nlattr_get_ifp }, 474 { .type = NL_RTA_GATEWAY, .off = _OUT(rta_gw), .cb = nlattr_get_ip }, 475 { .type = NL_RTA_METRICS, .arg = &metrics_parser, .cb = nlattr_get_nested }, 476 { .type = NL_RTA_MULTIPATH, .off = _OUT(rta_multipath), .cb = nlattr_get_multipath }, 477 { .type = NL_RTA_WEIGHT, .off = _OUT(rta_weight), .cb = nlattr_get_uint32 }, 478 { .type = NL_RTA_RTFLAGS, .off = _OUT(rta_rtflags), .cb = nlattr_get_uint32 }, 479 { .type = NL_RTA_TABLE, .off = _OUT(rta_table), .cb = nlattr_get_uint32 }, 480 { .type = NL_RTA_VIA, .off = _OUT(rta_gw), .cb = nlattr_get_ipvia }, 481 { .type = NL_RTA_NH_ID, .off = _OUT(rta_nh_id), .cb = nlattr_get_uint32 }, 482 }; 483 484 static const struct nlfield_parser nlf_p_rtmsg[] = { 485 { .off_in = _IN(rtm_family), .off_out = _OUT(rtm_family), .cb = nlf_get_u8 }, 486 { .off_in = _IN(rtm_dst_len), .off_out = _OUT(rtm_dst_len), .cb = nlf_get_u8 }, 487 { .off_in = _IN(rtm_protocol), .off_out = _OUT(rtm_protocol), .cb = nlf_get_u8 }, 488 { .off_in = _IN(rtm_type), .off_out = _OUT(rtm_type), .cb = nlf_get_u8 }, 489 }; 490 #undef _IN 491 #undef _OUT 492 NL_DECLARE_PARSER(rtm_parser, struct rtmsg, nlf_p_rtmsg, nla_p_rtmsg); 493 494 struct netlink_walkargs { 495 struct nl_writer *nw; 496 struct route_nhop_data rnd; 497 struct nlmsghdr hdr; 498 struct nlpcb *nlp; 499 uint32_t fibnum; 500 int family; 501 int error; 502 int count; 503 int dumped; 504 int dumped_tables; 505 }; 506 507 static int 508 dump_rtentry(struct rtentry *rt, void *_arg) 509 { 510 struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg; 511 int error; 512 513 wa->count++; 514 if (wa->error != 0) 515 return (0); 516 wa->dumped++; 517 518 rt_get_rnd(rt, &wa->rnd); 519 520 error = dump_px(wa->fibnum, &wa->hdr, rt, &wa->rnd, wa->nw); 521 522 IF_DEBUG_LEVEL(LOG_DEBUG3) { 523 char rtbuf[INET6_ADDRSTRLEN + 5]; 524 FIB_LOG(LOG_DEBUG3, wa->fibnum, wa->family, 525 "Dump %s, offset %u, error %d", 526 rt_print_buf(rt, rtbuf, sizeof(rtbuf)), 527 wa->nw->offset, error); 528 } 529 wa->error = error; 530 531 return (0); 532 } 533 534 static void 535 dump_rtable_one(struct netlink_walkargs *wa, uint32_t fibnum, int family) 536 { 537 FIB_LOG(LOG_DEBUG2, fibnum, family, "Start dump"); 538 wa->count = 0; 539 wa->dumped = 0; 540 541 rib_walk(fibnum, family, false, dump_rtentry, wa); 542 543 wa->dumped_tables++; 544 545 FIB_LOG(LOG_DEBUG2, fibnum, family, "End dump, iterated %d dumped %d", 546 wa->count, wa->dumped); 547 NL_LOG(LOG_DEBUG2, "Current offset: %d", wa->nw->offset); 548 } 549 550 static int 551 dump_rtable_fib(struct netlink_walkargs *wa, uint32_t fibnum, int family) 552 { 553 wa->fibnum = fibnum; 554 555 if (family == AF_UNSPEC) { 556 for (int i = 0; i < AF_MAX; i++) { 557 if (rt_tables_get_rnh(fibnum, i) != 0) { 558 wa->family = i; 559 dump_rtable_one(wa, fibnum, i); 560 if (wa->error != 0) 561 break; 562 } 563 } 564 } else { 565 if (rt_tables_get_rnh(fibnum, family) != 0) { 566 wa->family = family; 567 dump_rtable_one(wa, fibnum, family); 568 } 569 } 570 571 return (wa->error); 572 } 573 574 static int 575 handle_rtm_getroute(struct nlpcb *nlp, struct nl_parsed_route *attrs, 576 struct nlmsghdr *hdr, struct nl_pstate *npt) 577 { 578 RIB_RLOCK_TRACKER; 579 struct rib_head *rnh; 580 struct rtentry *rt; 581 uint32_t fibnum = attrs->rta_table; 582 sa_family_t family = attrs->rtm_family; 583 584 if (attrs->rta_dst == NULL) { 585 NLMSG_REPORT_ERR_MSG(npt, "No RTA_DST supplied"); 586 return (EINVAL); 587 } 588 589 FIB_LOG(LOG_DEBUG, fibnum, family, "getroute called"); 590 591 rnh = rt_tables_get_rnh(fibnum, family); 592 if (rnh == NULL) 593 return (EAFNOSUPPORT); 594 595 RIB_RLOCK(rnh); 596 597 rt = (struct rtentry *)rnh->rnh_matchaddr(attrs->rta_dst, &rnh->head); 598 if (rt == NULL) { 599 RIB_RUNLOCK(rnh); 600 return (ESRCH); 601 } 602 603 struct route_nhop_data rnd; 604 rt_get_rnd(rt, &rnd); 605 rnd.rnd_nhop = nhop_select_func(rnd.rnd_nhop, 0); 606 607 RIB_RUNLOCK(rnh); 608 609 IF_DEBUG_LEVEL(LOG_DEBUG2) { 610 char rtbuf[NHOP_PRINT_BUFSIZE] __unused, nhbuf[NHOP_PRINT_BUFSIZE] __unused; 611 FIB_LOG(LOG_DEBUG2, fibnum, family, "getroute completed: got %s for %s", 612 nhop_print_buf_any(rnd.rnd_nhop, nhbuf, sizeof(nhbuf)), 613 rt_print_buf(rt, rtbuf, sizeof(rtbuf))); 614 } 615 616 hdr->nlmsg_type = NL_RTM_NEWROUTE; 617 dump_px(fibnum, hdr, rt, &rnd, npt->nw); 618 619 return (0); 620 } 621 622 static int 623 handle_rtm_dump(struct nlpcb *nlp, uint32_t fibnum, int family, 624 struct nlmsghdr *hdr, struct nl_writer *nw) 625 { 626 struct netlink_walkargs wa = { 627 .nlp = nlp, 628 .nw = nw, 629 .hdr.nlmsg_pid = hdr->nlmsg_pid, 630 .hdr.nlmsg_seq = hdr->nlmsg_seq, 631 .hdr.nlmsg_type = NL_RTM_NEWROUTE, 632 .hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI, 633 }; 634 635 if (fibnum == RT_TABLE_UNSPEC) { 636 for (int i = 0; i < V_rt_numfibs; i++) { 637 dump_rtable_fib(&wa, fibnum, family); 638 if (wa.error != 0) 639 break; 640 } 641 } else 642 dump_rtable_fib(&wa, fibnum, family); 643 644 if (wa.error == 0 && wa.dumped_tables == 0) { 645 FIB_LOG(LOG_DEBUG, fibnum, family, "incorrect fibnum/family"); 646 wa.error = ESRCH; 647 // How do we propagate it? 648 } 649 650 if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) { 651 NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); 652 return (ENOMEM); 653 } 654 655 return (wa.error); 656 } 657 658 static struct nhop_object * 659 finalize_nhop(struct nhop_object *nh, int *perror) 660 { 661 /* 662 * The following MUST be filled: 663 * nh_ifp, nh_ifa, nh_gw 664 */ 665 if (nh->gw_sa.sa_family == 0) { 666 /* 667 * Empty gateway. Can be direct route with RTA_OIF set. 668 */ 669 if (nh->nh_ifp != NULL) 670 nhop_set_direct_gw(nh, nh->nh_ifp); 671 else { 672 NL_LOG(LOG_DEBUG, "empty gateway and interface, skipping"); 673 *perror = EINVAL; 674 return (NULL); 675 } 676 /* Both nh_ifp and gateway are set */ 677 } else { 678 /* Gateway is set up, we can derive ifp if not set */ 679 if (nh->nh_ifp == NULL) { 680 struct ifaddr *ifa = ifa_ifwithnet(&nh->gw_sa, 1, nhop_get_fibnum(nh)); 681 if (ifa == NULL) { 682 NL_LOG(LOG_DEBUG, "Unable to determine ifp, skipping"); 683 *perror = EINVAL; 684 return (NULL); 685 } 686 nhop_set_transmit_ifp(nh, ifa->ifa_ifp); 687 } 688 } 689 /* Both nh_ifp and gateway are set */ 690 if (nh->nh_ifa == NULL) { 691 struct ifaddr *ifa = ifaof_ifpforaddr(&nh->gw_sa, nh->nh_ifp); 692 if (ifa == NULL) { 693 NL_LOG(LOG_DEBUG, "Unable to determine ifa, skipping"); 694 *perror = EINVAL; 695 return (NULL); 696 } 697 nhop_set_src(nh, ifa); 698 } 699 700 return (nhop_get_nhop(nh, perror)); 701 } 702 703 static int 704 get_pxflag(const struct nl_parsed_route *attrs) 705 { 706 int pxflag = 0; 707 switch (attrs->rtm_family) { 708 case AF_INET: 709 if (attrs->rtm_dst_len == 32) 710 pxflag = NHF_HOST; 711 else if (attrs->rtm_dst_len == 0) 712 pxflag = NHF_DEFAULT; 713 break; 714 case AF_INET6: 715 if (attrs->rtm_dst_len == 32) 716 pxflag = NHF_HOST; 717 else if (attrs->rtm_dst_len == 0) 718 pxflag = NHF_DEFAULT; 719 break; 720 } 721 722 return (pxflag); 723 } 724 725 static int 726 get_op_flags(int nlm_flags) 727 { 728 int op_flags = 0; 729 730 op_flags |= (nlm_flags & NLM_F_REPLACE) ? RTM_F_REPLACE : 0; 731 op_flags |= (nlm_flags & NLM_F_EXCL) ? RTM_F_EXCL : 0; 732 op_flags |= (nlm_flags & NLM_F_CREATE) ? RTM_F_CREATE : 0; 733 op_flags |= (nlm_flags & NLM_F_APPEND) ? RTM_F_APPEND : 0; 734 735 return (op_flags); 736 } 737 738 #ifdef ROUTE_MPATH 739 static int 740 create_nexthop_one(struct nl_parsed_route *attrs, struct rta_mpath_nh *mpnh, 741 struct nl_pstate *npt, struct nhop_object **pnh) 742 { 743 int error; 744 745 if (mpnh->gw == NULL) 746 return (EINVAL); 747 748 struct nhop_object *nh = nhop_alloc(attrs->rta_table, attrs->rtm_family); 749 if (nh == NULL) 750 return (ENOMEM); 751 752 error = nl_set_nexthop_gw(nh, mpnh->gw, mpnh->ifp, npt); 753 if (error != 0) { 754 nhop_free(nh); 755 return (error); 756 } 757 if (mpnh->ifp != NULL) 758 nhop_set_transmit_ifp(nh, mpnh->ifp); 759 nhop_set_rtflags(nh, attrs->rta_rtflags); 760 if (attrs->rtm_protocol > RTPROT_STATIC) 761 nhop_set_origin(nh, attrs->rtm_protocol); 762 763 *pnh = finalize_nhop(nh, &error); 764 765 return (error); 766 } 767 #endif 768 769 static struct nhop_object * 770 create_nexthop_from_attrs(struct nl_parsed_route *attrs, 771 struct nl_pstate *npt, int *perror) 772 { 773 struct nhop_object *nh = NULL; 774 int error = 0; 775 776 if (attrs->rta_multipath != NULL) { 777 #ifdef ROUTE_MPATH 778 /* Multipath w/o explicit nexthops */ 779 int num_nhops = attrs->rta_multipath->num_nhops; 780 struct weightened_nhop *wn = npt_alloc(npt, sizeof(*wn) * num_nhops); 781 782 for (int i = 0; i < num_nhops; i++) { 783 struct rta_mpath_nh *mpnh = &attrs->rta_multipath->nhops[i]; 784 785 error = create_nexthop_one(attrs, mpnh, npt, &wn[i].nh); 786 if (error != 0) { 787 for (int j = 0; j < i; j++) 788 nhop_free(wn[j].nh); 789 break; 790 } 791 wn[i].weight = mpnh->rtnh_weight > 0 ? mpnh->rtnh_weight : 1; 792 } 793 if (error == 0) { 794 struct rib_head *rh = nhop_get_rh(wn[0].nh); 795 struct nhgrp_object *nhg; 796 797 nhg = nhgrp_alloc(rh->rib_fibnum, rh->rib_family, 798 wn, num_nhops, perror); 799 if (nhg != NULL) { 800 if (attrs->rtm_protocol > RTPROT_STATIC) 801 nhgrp_set_origin(nhg, attrs->rtm_protocol); 802 nhg = nhgrp_get_nhgrp(nhg, perror); 803 } 804 for (int i = 0; i < num_nhops; i++) 805 nhop_free(wn[i].nh); 806 if (nhg != NULL) 807 return ((struct nhop_object *)nhg); 808 error = *perror; 809 } 810 #else 811 error = ENOTSUP; 812 #endif 813 *perror = error; 814 } else { 815 nh = nhop_alloc(attrs->rta_table, attrs->rtm_family); 816 if (nh == NULL) { 817 *perror = ENOMEM; 818 return (NULL); 819 } 820 if (attrs->rta_gw != NULL) { 821 *perror = nl_set_nexthop_gw(nh, attrs->rta_gw, attrs->rta_oif, npt); 822 if (*perror != 0) { 823 nhop_free(nh); 824 return (NULL); 825 } 826 } 827 if (attrs->rta_oif != NULL) 828 nhop_set_transmit_ifp(nh, attrs->rta_oif); 829 if (attrs->rtax_mtu != 0) 830 nhop_set_mtu(nh, attrs->rtax_mtu, true); 831 if (attrs->rta_rtflags & RTF_BROADCAST) 832 nhop_set_broadcast(nh, true); 833 if (attrs->rtm_protocol > RTPROT_STATIC) 834 nhop_set_origin(nh, attrs->rtm_protocol); 835 nhop_set_rtflags(nh, attrs->rta_rtflags); 836 837 switch (attrs->rtm_type) { 838 case RTN_UNICAST: 839 break; 840 case RTN_BLACKHOLE: 841 nhop_set_blackhole(nh, RTF_BLACKHOLE); 842 break; 843 case RTN_PROHIBIT: 844 case RTN_UNREACHABLE: 845 nhop_set_blackhole(nh, RTF_REJECT); 846 break; 847 /* TODO: return ENOTSUP for other types if strict option is set */ 848 } 849 850 nh = finalize_nhop(nh, perror); 851 } 852 853 return (nh); 854 } 855 856 static int 857 rtnl_handle_newroute(struct nlmsghdr *hdr, struct nlpcb *nlp, 858 struct nl_pstate *npt) 859 { 860 struct rib_cmd_info rc = {}; 861 struct nhop_object *nh = NULL; 862 int error; 863 864 struct nl_parsed_route attrs = {}; 865 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); 866 if (error != 0) 867 return (error); 868 869 /* Check if we have enough data */ 870 if (attrs.rta_dst == NULL) { 871 NL_LOG(LOG_DEBUG, "missing RTA_DST"); 872 return (EINVAL); 873 } 874 875 if (attrs.rta_table >= V_rt_numfibs) { 876 NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); 877 return (EINVAL); 878 } 879 880 if (attrs.rta_nh_id != 0) { 881 /* Referenced uindex */ 882 int pxflag = get_pxflag(&attrs); 883 nh = nl_find_nhop(attrs.rta_table, attrs.rtm_family, attrs.rta_nh_id, 884 pxflag, &error); 885 if (error != 0) 886 return (error); 887 } else { 888 nh = create_nexthop_from_attrs(&attrs, npt, &error); 889 if (error != 0) { 890 NL_LOG(LOG_DEBUG, "Error creating nexthop"); 891 return (error); 892 } 893 } 894 895 if (!NH_IS_NHGRP(nh) && attrs.rta_weight == 0) 896 attrs.rta_weight = RT_DEFAULT_WEIGHT; 897 struct route_nhop_data rnd = { .rnd_nhop = nh, .rnd_weight = attrs.rta_weight }; 898 int op_flags = get_op_flags(hdr->nlmsg_flags); 899 900 error = rib_add_route_px(attrs.rta_table, attrs.rta_dst, attrs.rtm_dst_len, 901 &rnd, op_flags, &rc); 902 if (error == 0) 903 report_operation(attrs.rta_table, &rc, nlp, hdr); 904 return (error); 905 } 906 907 static int 908 path_match_func(const struct rtentry *rt, const struct nhop_object *nh, void *_data) 909 { 910 struct nl_parsed_route *attrs = (struct nl_parsed_route *)_data; 911 912 if ((attrs->rta_gw != NULL) && !rib_match_gw(rt, nh, attrs->rta_gw)) 913 return (0); 914 915 if ((attrs->rta_oif != NULL) && (attrs->rta_oif != nh->nh_ifp)) 916 return (0); 917 918 return (1); 919 } 920 921 static int 922 rtnl_handle_delroute(struct nlmsghdr *hdr, struct nlpcb *nlp, 923 struct nl_pstate *npt) 924 { 925 struct rib_cmd_info rc; 926 int error; 927 928 struct nl_parsed_route attrs = {}; 929 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); 930 if (error != 0) 931 return (error); 932 933 if (attrs.rta_dst == NULL) { 934 NLMSG_REPORT_ERR_MSG(npt, "RTA_DST is not set"); 935 return (ESRCH); 936 } 937 938 if (attrs.rta_table >= V_rt_numfibs) { 939 NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); 940 return (EINVAL); 941 } 942 943 error = rib_del_route_px(attrs.rta_table, attrs.rta_dst, 944 attrs.rtm_dst_len, path_match_func, &attrs, 0, &rc); 945 if (error == 0) 946 report_operation(attrs.rta_table, &rc, nlp, hdr); 947 return (error); 948 } 949 950 static int 951 rtnl_handle_getroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) 952 { 953 int error; 954 955 struct nl_parsed_route attrs = {}; 956 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); 957 if (error != 0) 958 return (error); 959 960 if (attrs.rta_table >= V_rt_numfibs) { 961 NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); 962 return (EINVAL); 963 } 964 965 if (hdr->nlmsg_flags & NLM_F_DUMP) 966 error = handle_rtm_dump(nlp, attrs.rta_table, attrs.rtm_family, hdr, npt->nw); 967 else 968 error = handle_rtm_getroute(nlp, &attrs, hdr, npt); 969 970 return (error); 971 } 972 973 void 974 rtnl_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc) 975 { 976 struct nl_writer nw = {}; 977 int family, nlm_flags = 0; 978 979 family = rt_get_family(rc->rc_rt); 980 981 /* XXX: check if there are active listeners first */ 982 983 /* TODO: consider passing PID/type/seq */ 984 switch (rc->rc_cmd) { 985 case RTM_ADD: 986 nlm_flags = NLM_F_EXCL | NLM_F_CREATE; 987 break; 988 case RTM_CHANGE: 989 nlm_flags = NLM_F_REPLACE; 990 break; 991 case RTM_DELETE: 992 nlm_flags = 0; 993 break; 994 } 995 IF_DEBUG_LEVEL(LOG_DEBUG2) { 996 char rtbuf[NHOP_PRINT_BUFSIZE] __unused; 997 FIB_LOG(LOG_DEBUG2, fibnum, family, 998 "received event %s for %s / nlm_flags=%X", 999 rib_print_cmd(rc->rc_cmd), 1000 rt_print_buf(rc->rc_rt, rtbuf, sizeof(rtbuf)), 1001 nlm_flags); 1002 } 1003 1004 struct nlmsghdr hdr = { 1005 .nlmsg_flags = nlm_flags, 1006 .nlmsg_type = get_rtmsg_type_from_rtsock(rc->rc_cmd), 1007 }; 1008 1009 struct route_nhop_data rnd = { 1010 .rnd_nhop = rc_get_nhop(rc), 1011 .rnd_weight = rc->rc_nh_weight, 1012 }; 1013 1014 uint32_t group_id = family_to_group(family); 1015 if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) { 1016 NL_LOG(LOG_DEBUG, "error allocating event buffer"); 1017 return; 1018 } 1019 1020 dump_px(fibnum, &hdr, rc->rc_rt, &rnd, &nw); 1021 nlmsg_flush(&nw); 1022 } 1023 1024 static const struct rtnl_cmd_handler cmd_handlers[] = { 1025 { 1026 .cmd = NL_RTM_GETROUTE, 1027 .name = "RTM_GETROUTE", 1028 .cb = &rtnl_handle_getroute, 1029 }, 1030 { 1031 .cmd = NL_RTM_DELROUTE, 1032 .name = "RTM_DELROUTE", 1033 .cb = &rtnl_handle_delroute, 1034 .priv = PRIV_NET_ROUTE, 1035 }, 1036 { 1037 .cmd = NL_RTM_NEWROUTE, 1038 .name = "RTM_NEWROUTE", 1039 .cb = &rtnl_handle_newroute, 1040 .priv = PRIV_NET_ROUTE, 1041 } 1042 }; 1043 1044 static const struct nlhdr_parser *all_parsers[] = {&mpath_parser, &metrics_parser, &rtm_parser}; 1045 1046 void 1047 rtnl_routes_init(void) 1048 { 1049 NL_VERIFY_PARSERS(all_parsers); 1050 rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers)); 1051 } 1052