1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2021 Ng Peng Nam Sean 5 * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 #include "opt_route.h" 34 #include <sys/types.h> 35 #include <sys/malloc.h> 36 #include <sys/rmlock.h> 37 #include <sys/socket.h> 38 39 #include <net/if.h> 40 #include <net/route.h> 41 #include <net/route/nhop.h> 42 #include <net/route/route_ctl.h> 43 #include <net/route/route_var.h> 44 #include <netlink/netlink.h> 45 #include <netlink/netlink_ctl.h> 46 #include <netlink/netlink_route.h> 47 #include <netlink/route/route_var.h> 48 49 #define DEBUG_MOD_NAME nl_route 50 #define DEBUG_MAX_LEVEL LOG_DEBUG3 51 #include <netlink/netlink_debug.h> 52 _DECLARE_DEBUG(LOG_DEBUG); 53 54 static unsigned char 55 get_rtm_type(const struct nhop_object *nh) 56 { 57 int nh_flags = nh->nh_flags; 58 59 /* Use the fact that nhg runtime flags are only NHF_MULTIPATH */ 60 if (nh_flags & NHF_BLACKHOLE) 61 return (RTN_BLACKHOLE); 62 else if (nh_flags & NHF_REJECT) 63 return (RTN_PROHIBIT); 64 return (RTN_UNICAST); 65 } 66 67 static uint8_t 68 nl_get_rtm_protocol(const struct nhop_object *nh) 69 { 70 #ifdef ROUTE_MPATH 71 if (NH_IS_NHGRP(nh)) { 72 const struct nhgrp_object *nhg = (const struct nhgrp_object *)nh; 73 uint8_t origin = nhgrp_get_origin(nhg); 74 if (origin != RTPROT_UNSPEC) 75 return (origin); 76 nh = nhg->nhops[0]; 77 } 78 #endif 79 uint8_t origin = nhop_get_origin(nh); 80 if (origin != RTPROT_UNSPEC) 81 return (origin); 82 /* TODO: remove guesswork once all kernel users fill in origin */ 83 int rt_flags = nhop_get_rtflags(nh); 84 if (rt_flags & RTF_PROTO1) 85 return (RTPROT_ZEBRA); 86 if (rt_flags & RTF_STATIC) 87 return (RTPROT_STATIC); 88 return (RTPROT_KERNEL); 89 } 90 91 static int 92 get_rtmsg_type_from_rtsock(int cmd) 93 { 94 switch (cmd) { 95 case RTM_ADD: 96 case RTM_CHANGE: 97 case RTM_GET: 98 return NL_RTM_NEWROUTE; 99 case RTM_DELETE: 100 return NL_RTM_DELROUTE; 101 } 102 103 return (0); 104 } 105 106 /* 107 * fibnum heuristics 108 * 109 * if (dump && rtm_table == 0 && !rta_table) RT_ALL_FIBS 110 * msg rtm_table RTA_TABLE result 111 * RTM_GETROUTE/dump 0 - RT_ALL_FIBS 112 * RTM_GETROUTE/dump 1 - 1 113 * RTM_GETROUTE/get 0 - 0 114 * 115 */ 116 117 static struct nhop_object * 118 rc_get_nhop(const struct rib_cmd_info *rc) 119 { 120 return ((rc->rc_cmd == RTM_DELETE) ? rc->rc_nh_old : rc->rc_nh_new); 121 } 122 123 static void 124 dump_rc_nhop_gw(struct nl_writer *nw, const struct nhop_object *nh) 125 { 126 int upper_family; 127 128 switch (nhop_get_neigh_family(nh)) { 129 case AF_LINK: 130 /* onlink prefix, skip */ 131 break; 132 case AF_INET: 133 nlattr_add(nw, NL_RTA_GATEWAY, 4, &nh->gw4_sa.sin_addr); 134 break; 135 case AF_INET6: 136 upper_family = nhop_get_upper_family(nh); 137 if (upper_family == AF_INET6) { 138 nlattr_add(nw, NL_RTA_GATEWAY, 16, &nh->gw6_sa.sin6_addr); 139 } else if (upper_family == AF_INET) { 140 /* IPv4 over IPv6 */ 141 char buf[20]; 142 struct rtvia *via = (struct rtvia *)&buf[0]; 143 via->rtvia_family = AF_INET6; 144 memcpy(via->rtvia_addr, &nh->gw6_sa.sin6_addr, 16); 145 nlattr_add(nw, NL_RTA_VIA, 17, via); 146 } 147 break; 148 } 149 } 150 151 static void 152 dump_rc_nhop_mtu(struct nl_writer *nw, const struct nhop_object *nh) 153 { 154 int nla_len = sizeof(struct nlattr) * 2 + sizeof(uint32_t); 155 struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr); 156 157 if (nla == NULL) 158 return; 159 nla->nla_type = NL_RTA_METRICS; 160 nla->nla_len = nla_len; 161 nla++; 162 nla->nla_type = NL_RTAX_MTU; 163 nla->nla_len = sizeof(struct nlattr) + sizeof(uint32_t); 164 *((uint32_t *)(nla + 1)) = nh->nh_mtu; 165 } 166 167 #ifdef ROUTE_MPATH 168 static void 169 dump_rc_nhg(struct nl_writer *nw, const struct nhgrp_object *nhg, struct rtmsg *rtm) 170 { 171 uint32_t uidx = nhgrp_get_uidx(nhg); 172 uint32_t num_nhops; 173 const struct weightened_nhop *wn = nhgrp_get_nhops(nhg, &num_nhops); 174 uint32_t base_rtflags = nhop_get_rtflags(wn[0].nh); 175 176 if (uidx != 0) 177 nlattr_add_u32(nw, NL_RTA_NH_ID, uidx); 178 nlattr_add_u32(nw, NL_RTA_KNH_ID, nhgrp_get_idx(nhg)); 179 180 nlattr_add_u32(nw, NL_RTA_RTFLAGS, base_rtflags); 181 int off = nlattr_add_nested(nw, NL_RTA_MULTIPATH); 182 if (off == 0) 183 return; 184 185 for (int i = 0; i < num_nhops; i++) { 186 int nh_off = nlattr_save_offset(nw); 187 struct rtnexthop *rtnh = nlmsg_reserve_object(nw, struct rtnexthop); 188 if (rtnh == NULL) 189 return; 190 rtnh->rtnh_flags = 0; 191 rtnh->rtnh_ifindex = wn[i].nh->nh_ifp->if_index; 192 rtnh->rtnh_hops = wn[i].weight; 193 dump_rc_nhop_gw(nw, wn[i].nh); 194 uint32_t rtflags = nhop_get_rtflags(wn[i].nh); 195 if (rtflags != base_rtflags) 196 nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags); 197 if (rtflags & RTF_FIXEDMTU) 198 dump_rc_nhop_mtu(nw, wn[i].nh); 199 rtnh = nlattr_restore_offset(nw, nh_off, struct rtnexthop); 200 /* 201 * nlattr_add() allocates 4-byte aligned storage, no need to aligh 202 * length here 203 * */ 204 rtnh->rtnh_len = nlattr_save_offset(nw) - nh_off; 205 } 206 nlattr_set_len(nw, off); 207 } 208 #endif 209 210 static void 211 dump_rc_nhop(struct nl_writer *nw, const struct route_nhop_data *rnd, struct rtmsg *rtm) 212 { 213 #ifdef ROUTE_MPATH 214 if (NH_IS_NHGRP(rnd->rnd_nhop)) { 215 dump_rc_nhg(nw, rnd->rnd_nhgrp, rtm); 216 return; 217 } 218 #endif 219 const struct nhop_object *nh = rnd->rnd_nhop; 220 uint32_t rtflags = nhop_get_rtflags(nh); 221 222 /* 223 * IPv4 over IPv6 224 * ('RTA_VIA', {'family': 10, 'addr': 'fe80::20c:29ff:fe67:2dd'}), ('RTA_OIF', 2), 225 * IPv4 w/ gw 226 * ('RTA_GATEWAY', '172.16.107.131'), ('RTA_OIF', 2)], 227 * Direct route: 228 * ('RTA_OIF', 2) 229 */ 230 if (nh->nh_flags & NHF_GATEWAY) 231 dump_rc_nhop_gw(nw, nh); 232 233 uint32_t uidx = nhop_get_uidx(nh); 234 if (uidx != 0) 235 nlattr_add_u32(nw, NL_RTA_NH_ID, uidx); 236 nlattr_add_u32(nw, NL_RTA_KNH_ID, nhop_get_idx(nh)); 237 nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags); 238 239 if (rtflags & RTF_FIXEDMTU) 240 dump_rc_nhop_mtu(nw, nh); 241 uint32_t nh_expire = nhop_get_expire(nh); 242 if (nh_expire > 0) 243 nlattr_add_u32(nw, NL_RTA_EXPIRES, nh_expire - time_uptime); 244 245 /* In any case, fill outgoing interface */ 246 nlattr_add_u32(nw, NL_RTA_OIF, nh->nh_ifp->if_index); 247 248 if (rnd->rnd_weight != RT_DEFAULT_WEIGHT) 249 nlattr_add_u32(nw, NL_RTA_WEIGHT, rnd->rnd_weight); 250 } 251 252 /* 253 * Dumps output from a rib command into an rtmsg 254 */ 255 256 static int 257 dump_px(uint32_t fibnum, const struct nlmsghdr *hdr, 258 const struct rtentry *rt, struct route_nhop_data *rnd, 259 struct nl_writer *nw) 260 { 261 struct rtmsg *rtm; 262 int error = 0; 263 264 NET_EPOCH_ASSERT(); 265 266 if (!nlmsg_reply(nw, hdr, sizeof(struct rtmsg))) 267 goto enomem; 268 269 int family = rt_get_family(rt); 270 int rtm_off = nlattr_save_offset(nw); 271 rtm = nlmsg_reserve_object(nw, struct rtmsg); 272 rtm->rtm_family = family; 273 rtm->rtm_dst_len = 0; 274 rtm->rtm_src_len = 0; 275 rtm->rtm_tos = 0; 276 if (fibnum < 255) 277 rtm->rtm_table = (unsigned char)fibnum; 278 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 279 rtm->rtm_protocol = nl_get_rtm_protocol(rnd->rnd_nhop); 280 rtm->rtm_type = get_rtm_type(rnd->rnd_nhop); 281 282 nlattr_add_u32(nw, NL_RTA_TABLE, fibnum); 283 284 int plen = 0; 285 #if defined(INET) || defined(INET6) 286 uint32_t scopeid; 287 #endif 288 switch (family) { 289 #ifdef INET 290 case AF_INET: 291 { 292 struct in_addr addr; 293 rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid); 294 nlattr_add(nw, NL_RTA_DST, 4, &addr); 295 break; 296 } 297 #endif 298 #ifdef INET6 299 case AF_INET6: 300 { 301 struct in6_addr addr; 302 rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid); 303 nlattr_add(nw, NL_RTA_DST, 16, &addr); 304 break; 305 } 306 #endif 307 default: 308 FIB_LOG(LOG_NOTICE, fibnum, family, "unsupported rt family: %d", family); 309 error = EAFNOSUPPORT; 310 goto flush; 311 } 312 313 rtm = nlattr_restore_offset(nw, rtm_off, struct rtmsg); 314 if (plen > 0) 315 rtm->rtm_dst_len = plen; 316 dump_rc_nhop(nw, rnd, rtm); 317 318 if (nlmsg_end(nw)) 319 return (0); 320 enomem: 321 error = ENOMEM; 322 flush: 323 nlmsg_abort(nw); 324 return (error); 325 } 326 327 static int 328 family_to_group(int family) 329 { 330 switch (family) { 331 case AF_INET: 332 return (RTNLGRP_IPV4_ROUTE); 333 case AF_INET6: 334 return (RTNLGRP_IPV6_ROUTE); 335 } 336 return (0); 337 } 338 339 340 static void 341 report_operation(uint32_t fibnum, struct rib_cmd_info *rc, 342 struct nlpcb *nlp, struct nlmsghdr *hdr) 343 { 344 struct nl_writer nw = {}; 345 uint32_t group_id = family_to_group(rt_get_family(rc->rc_rt)); 346 347 if (nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) { 348 struct route_nhop_data rnd = { 349 .rnd_nhop = rc_get_nhop(rc), 350 .rnd_weight = rc->rc_nh_weight, 351 }; 352 hdr->nlmsg_flags &= ~(NLM_F_REPLACE | NLM_F_CREATE); 353 hdr->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_APPEND); 354 switch (rc->rc_cmd) { 355 case RTM_ADD: 356 hdr->nlmsg_type = NL_RTM_NEWROUTE; 357 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL; 358 break; 359 case RTM_CHANGE: 360 hdr->nlmsg_type = NL_RTM_NEWROUTE; 361 hdr->nlmsg_flags |= NLM_F_REPLACE; 362 break; 363 case RTM_DELETE: 364 hdr->nlmsg_type = NL_RTM_DELROUTE; 365 break; 366 } 367 dump_px(fibnum, hdr, rc->rc_rt, &rnd, &nw); 368 nlmsg_flush(&nw); 369 } 370 371 rtsock_callback_p->route_f(fibnum, rc); 372 } 373 374 struct rta_mpath_nh { 375 struct sockaddr *gw; 376 struct ifnet *ifp; 377 uint8_t rtnh_flags; 378 uint8_t rtnh_weight; 379 }; 380 381 #define _IN(_field) offsetof(struct rtnexthop, _field) 382 #define _OUT(_field) offsetof(struct rta_mpath_nh, _field) 383 const static struct nlattr_parser nla_p_rtnh[] = { 384 { .type = NL_RTA_GATEWAY, .off = _OUT(gw), .cb = nlattr_get_ip }, 385 { .type = NL_RTA_VIA, .off = _OUT(gw), .cb = nlattr_get_ipvia }, 386 }; 387 const static struct nlfield_parser nlf_p_rtnh[] = { 388 { .off_in = _IN(rtnh_flags), .off_out = _OUT(rtnh_flags), .cb = nlf_get_u8 }, 389 { .off_in = _IN(rtnh_hops), .off_out = _OUT(rtnh_weight), .cb = nlf_get_u8 }, 390 { .off_in = _IN(rtnh_ifindex), .off_out = _OUT(ifp), .cb = nlf_get_ifpz }, 391 }; 392 #undef _IN 393 #undef _OUT 394 NL_DECLARE_PARSER(mpath_parser, struct rtnexthop, nlf_p_rtnh, nla_p_rtnh); 395 396 struct rta_mpath { 397 int num_nhops; 398 struct rta_mpath_nh nhops[0]; 399 }; 400 401 static int 402 nlattr_get_multipath(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) 403 { 404 int data_len = nla->nla_len - sizeof(struct nlattr); 405 struct rtnexthop *rtnh; 406 407 int max_nhops = data_len / sizeof(struct rtnexthop); 408 409 struct rta_mpath *mp = npt_alloc(npt, (max_nhops + 2) * sizeof(struct rta_mpath_nh)); 410 mp->num_nhops = 0; 411 412 for (rtnh = (struct rtnexthop *)(nla + 1); data_len > 0; ) { 413 struct rta_mpath_nh *mpnh = &mp->nhops[mp->num_nhops++]; 414 415 int error = nl_parse_header(rtnh, rtnh->rtnh_len, &mpath_parser, 416 npt, mpnh); 417 if (error != 0) { 418 NLMSG_REPORT_ERR_MSG(npt, "RTA_MULTIPATH: nexhop %d: parse failed", 419 mp->num_nhops - 1); 420 return (error); 421 } 422 423 int len = NL_ITEM_ALIGN(rtnh->rtnh_len); 424 data_len -= len; 425 rtnh = (struct rtnexthop *)((char *)rtnh + len); 426 } 427 if (data_len != 0 || mp->num_nhops == 0) { 428 NLMSG_REPORT_ERR_MSG(npt, "invalid RTA_MULTIPATH attr"); 429 return (EINVAL); 430 } 431 432 *((struct rta_mpath **)target) = mp; 433 return (0); 434 } 435 436 437 struct nl_parsed_route { 438 struct sockaddr *rta_dst; 439 struct sockaddr *rta_gw; 440 struct ifnet *rta_oif; 441 struct rta_mpath *rta_multipath; 442 uint32_t rta_table; 443 uint32_t rta_rtflags; 444 uint32_t rta_nh_id; 445 uint32_t rta_weight; 446 uint32_t rtax_mtu; 447 uint8_t rtm_family; 448 uint8_t rtm_dst_len; 449 uint8_t rtm_protocol; 450 }; 451 452 #define _IN(_field) offsetof(struct rtmsg, _field) 453 #define _OUT(_field) offsetof(struct nl_parsed_route, _field) 454 static struct nlattr_parser nla_p_rtmetrics[] = { 455 { .type = NL_RTAX_MTU, .off = _OUT(rtax_mtu), .cb = nlattr_get_uint32 }, 456 }; 457 NL_DECLARE_ATTR_PARSER(metrics_parser, nla_p_rtmetrics); 458 459 static const struct nlattr_parser nla_p_rtmsg[] = { 460 { .type = NL_RTA_DST, .off = _OUT(rta_dst), .cb = nlattr_get_ip }, 461 { .type = NL_RTA_OIF, .off = _OUT(rta_oif), .cb = nlattr_get_ifp }, 462 { .type = NL_RTA_GATEWAY, .off = _OUT(rta_gw), .cb = nlattr_get_ip }, 463 { .type = NL_RTA_METRICS, .arg = &metrics_parser, .cb = nlattr_get_nested }, 464 { .type = NL_RTA_MULTIPATH, .off = _OUT(rta_multipath), .cb = nlattr_get_multipath }, 465 { .type = NL_RTA_WEIGHT, .off = _OUT(rta_weight), .cb = nlattr_get_uint32 }, 466 { .type = NL_RTA_RTFLAGS, .off = _OUT(rta_rtflags), .cb = nlattr_get_uint32 }, 467 { .type = NL_RTA_TABLE, .off = _OUT(rta_table), .cb = nlattr_get_uint32 }, 468 { .type = NL_RTA_VIA, .off = _OUT(rta_gw), .cb = nlattr_get_ipvia }, 469 { .type = NL_RTA_NH_ID, .off = _OUT(rta_nh_id), .cb = nlattr_get_uint32 }, 470 }; 471 472 static const struct nlfield_parser nlf_p_rtmsg[] = { 473 {.off_in = _IN(rtm_family), .off_out = _OUT(rtm_family), .cb = nlf_get_u8 }, 474 {.off_in = _IN(rtm_dst_len), .off_out = _OUT(rtm_dst_len), .cb = nlf_get_u8 }, 475 {.off_in = _IN(rtm_protocol), .off_out = _OUT(rtm_protocol), .cb = nlf_get_u8 }, 476 }; 477 #undef _IN 478 #undef _OUT 479 NL_DECLARE_PARSER(rtm_parser, struct rtmsg, nlf_p_rtmsg, nla_p_rtmsg); 480 481 struct netlink_walkargs { 482 struct nl_writer *nw; 483 struct route_nhop_data rnd; 484 struct nlmsghdr hdr; 485 struct nlpcb *nlp; 486 uint32_t fibnum; 487 int family; 488 int error; 489 int count; 490 int dumped; 491 int dumped_tables; 492 }; 493 494 static int 495 dump_rtentry(struct rtentry *rt, void *_arg) 496 { 497 struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg; 498 int error; 499 500 wa->count++; 501 if (wa->error != 0) 502 return (0); 503 wa->dumped++; 504 505 rt_get_rnd(rt, &wa->rnd); 506 507 error = dump_px(wa->fibnum, &wa->hdr, rt, &wa->rnd, wa->nw); 508 509 IF_DEBUG_LEVEL(LOG_DEBUG3) { 510 char rtbuf[INET6_ADDRSTRLEN + 5]; 511 FIB_LOG(LOG_DEBUG3, wa->fibnum, wa->family, 512 "Dump %s, offset %u, error %d", 513 rt_print_buf(rt, rtbuf, sizeof(rtbuf)), 514 wa->nw->offset, error); 515 } 516 wa->error = error; 517 518 return (0); 519 } 520 521 static void 522 dump_rtable_one(struct netlink_walkargs *wa, uint32_t fibnum, int family) 523 { 524 FIB_LOG(LOG_DEBUG2, fibnum, family, "Start dump"); 525 wa->count = 0; 526 wa->dumped = 0; 527 528 rib_walk(fibnum, family, false, dump_rtentry, wa); 529 530 wa->dumped_tables++; 531 532 FIB_LOG(LOG_DEBUG2, fibnum, family, "End dump, iterated %d dumped %d", 533 wa->count, wa->dumped); 534 NL_LOG(LOG_DEBUG2, "Current offset: %d", wa->nw->offset); 535 } 536 537 static int 538 dump_rtable_fib(struct netlink_walkargs *wa, uint32_t fibnum, int family) 539 { 540 wa->fibnum = fibnum; 541 542 if (family == AF_UNSPEC) { 543 for (int i = 0; i < AF_MAX; i++) { 544 if (rt_tables_get_rnh(fibnum, i) != 0) { 545 wa->family = i; 546 dump_rtable_one(wa, fibnum, i); 547 if (wa->error != 0) 548 break; 549 } 550 } 551 } else { 552 if (rt_tables_get_rnh(fibnum, family) != 0) { 553 wa->family = family; 554 dump_rtable_one(wa, fibnum, family); 555 } 556 } 557 558 return (wa->error); 559 } 560 561 static int 562 handle_rtm_getroute(struct nlpcb *nlp, struct nl_parsed_route *attrs, 563 struct nlmsghdr *hdr, struct nl_pstate *npt) 564 { 565 RIB_RLOCK_TRACKER; 566 struct rib_head *rnh; 567 struct rtentry *rt; 568 uint32_t fibnum = attrs->rta_table; 569 sa_family_t family = attrs->rtm_family; 570 571 if (attrs->rta_dst == NULL) { 572 NLMSG_REPORT_ERR_MSG(npt, "No RTA_DST supplied"); 573 return (EINVAL); 574 } 575 576 FIB_LOG(LOG_DEBUG, fibnum, family, "getroute called"); 577 578 rnh = rt_tables_get_rnh(fibnum, family); 579 if (rnh == NULL) 580 return (EAFNOSUPPORT); 581 582 RIB_RLOCK(rnh); 583 584 rt = (struct rtentry *)rnh->rnh_matchaddr(attrs->rta_dst, &rnh->head); 585 if (rt == NULL) { 586 RIB_RUNLOCK(rnh); 587 return (ESRCH); 588 } 589 590 struct route_nhop_data rnd; 591 rt_get_rnd(rt, &rnd); 592 rnd.rnd_nhop = nhop_select_func(rnd.rnd_nhop, 0); 593 594 RIB_RUNLOCK(rnh); 595 596 IF_DEBUG_LEVEL(LOG_DEBUG2) { 597 char rtbuf[NHOP_PRINT_BUFSIZE] __unused, nhbuf[NHOP_PRINT_BUFSIZE] __unused; 598 FIB_LOG(LOG_DEBUG2, fibnum, family, "getroute completed: got %s for %s", 599 nhop_print_buf_any(rnd.rnd_nhop, nhbuf, sizeof(nhbuf)), 600 rt_print_buf(rt, rtbuf, sizeof(rtbuf))); 601 } 602 603 hdr->nlmsg_type = NL_RTM_NEWROUTE; 604 dump_px(fibnum, hdr, rt, &rnd, npt->nw); 605 606 return (0); 607 } 608 609 static int 610 handle_rtm_dump(struct nlpcb *nlp, uint32_t fibnum, int family, 611 struct nlmsghdr *hdr, struct nl_writer *nw) 612 { 613 struct netlink_walkargs wa = { 614 .nlp = nlp, 615 .nw = nw, 616 .hdr.nlmsg_pid = hdr->nlmsg_pid, 617 .hdr.nlmsg_seq = hdr->nlmsg_seq, 618 .hdr.nlmsg_type = NL_RTM_NEWROUTE, 619 .hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI, 620 }; 621 622 if (fibnum == RT_TABLE_UNSPEC) { 623 for (int i = 0; i < V_rt_numfibs; i++) { 624 dump_rtable_fib(&wa, fibnum, family); 625 if (wa.error != 0) 626 break; 627 } 628 } else 629 dump_rtable_fib(&wa, fibnum, family); 630 631 if (wa.error == 0 && wa.dumped_tables == 0) { 632 FIB_LOG(LOG_DEBUG, fibnum, family, "incorrect fibnum/family"); 633 wa.error = ESRCH; 634 // How do we propagate it? 635 } 636 637 if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) { 638 NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); 639 return (ENOMEM); 640 } 641 642 return (wa.error); 643 } 644 645 static struct nhop_object * 646 finalize_nhop(struct nhop_object *nh, int *perror) 647 { 648 /* 649 * The following MUST be filled: 650 * nh_ifp, nh_ifa, nh_gw 651 */ 652 if (nh->gw_sa.sa_family == 0) { 653 /* 654 * Empty gateway. Can be direct route with RTA_OIF set. 655 */ 656 if (nh->nh_ifp != NULL) 657 nhop_set_direct_gw(nh, nh->nh_ifp); 658 else { 659 NL_LOG(LOG_DEBUG, "empty gateway and interface, skipping"); 660 *perror = EINVAL; 661 return (NULL); 662 } 663 /* Both nh_ifp and gateway are set */ 664 } else { 665 /* Gateway is set up, we can derive ifp if not set */ 666 if (nh->nh_ifp == NULL) { 667 struct ifaddr *ifa = ifa_ifwithnet(&nh->gw_sa, 1, nhop_get_fibnum(nh)); 668 if (ifa == NULL) { 669 NL_LOG(LOG_DEBUG, "Unable to determine ifp, skipping"); 670 *perror = EINVAL; 671 return (NULL); 672 } 673 nhop_set_transmit_ifp(nh, ifa->ifa_ifp); 674 } 675 } 676 /* Both nh_ifp and gateway are set */ 677 if (nh->nh_ifa == NULL) { 678 struct ifaddr *ifa = ifaof_ifpforaddr(&nh->gw_sa, nh->nh_ifp); 679 if (ifa == NULL) { 680 NL_LOG(LOG_DEBUG, "Unable to determine ifa, skipping"); 681 *perror = EINVAL; 682 return (NULL); 683 } 684 nhop_set_src(nh, ifa); 685 } 686 687 return (nhop_get_nhop(nh, perror)); 688 } 689 690 static int 691 get_pxflag(const struct nl_parsed_route *attrs) 692 { 693 int pxflag = 0; 694 switch (attrs->rtm_family) { 695 case AF_INET: 696 if (attrs->rtm_dst_len == 32) 697 pxflag = NHF_HOST; 698 else if (attrs->rtm_dst_len == 0) 699 pxflag = NHF_DEFAULT; 700 break; 701 case AF_INET6: 702 if (attrs->rtm_dst_len == 32) 703 pxflag = NHF_HOST; 704 else if (attrs->rtm_dst_len == 0) 705 pxflag = NHF_DEFAULT; 706 break; 707 } 708 709 return (pxflag); 710 } 711 712 static int 713 get_op_flags(int nlm_flags) 714 { 715 int op_flags = 0; 716 717 op_flags |= (nlm_flags & NLM_F_REPLACE) ? RTM_F_REPLACE : 0; 718 op_flags |= (nlm_flags & NLM_F_EXCL) ? RTM_F_EXCL : 0; 719 op_flags |= (nlm_flags & NLM_F_CREATE) ? RTM_F_CREATE : 0; 720 op_flags |= (nlm_flags & NLM_F_APPEND) ? RTM_F_APPEND : 0; 721 722 return (op_flags); 723 } 724 725 #ifdef ROUTE_MPATH 726 static int 727 create_nexthop_one(struct nl_parsed_route *attrs, struct rta_mpath_nh *mpnh, 728 struct nl_pstate *npt, struct nhop_object **pnh) 729 { 730 int error; 731 732 if (mpnh->gw == NULL) 733 return (EINVAL); 734 735 struct nhop_object *nh = nhop_alloc(attrs->rta_table, attrs->rtm_family); 736 if (nh == NULL) 737 return (ENOMEM); 738 739 nhop_set_gw(nh, mpnh->gw, true); 740 if (mpnh->ifp != NULL) 741 nhop_set_transmit_ifp(nh, mpnh->ifp); 742 nhop_set_rtflags(nh, attrs->rta_rtflags); 743 if (attrs->rtm_protocol > RTPROT_STATIC) 744 nhop_set_origin(nh, attrs->rtm_protocol); 745 746 *pnh = finalize_nhop(nh, &error); 747 748 return (error); 749 } 750 #endif 751 752 static struct nhop_object * 753 create_nexthop_from_attrs(struct nl_parsed_route *attrs, 754 struct nl_pstate *npt, int *perror) 755 { 756 struct nhop_object *nh = NULL; 757 int error = 0; 758 759 if (attrs->rta_multipath != NULL) { 760 #ifdef ROUTE_MPATH 761 /* Multipath w/o explicit nexthops */ 762 int num_nhops = attrs->rta_multipath->num_nhops; 763 struct weightened_nhop *wn = npt_alloc(npt, sizeof(*wn) * num_nhops); 764 765 for (int i = 0; i < num_nhops; i++) { 766 struct rta_mpath_nh *mpnh = &attrs->rta_multipath->nhops[i]; 767 768 error = create_nexthop_one(attrs, mpnh, npt, &wn[i].nh); 769 if (error != 0) { 770 for (int j = 0; j < i; j++) 771 nhop_free(wn[j].nh); 772 break; 773 } 774 wn[i].weight = mpnh->rtnh_weight > 0 ? mpnh->rtnh_weight : 1; 775 } 776 if (error == 0) { 777 struct rib_head *rh = nhop_get_rh(wn[0].nh); 778 struct nhgrp_object *nhg; 779 780 nhg = nhgrp_alloc(rh->rib_fibnum, rh->rib_family, 781 wn, num_nhops, perror); 782 if (nhg != NULL) { 783 if (attrs->rtm_protocol > RTPROT_STATIC) 784 nhgrp_set_origin(nhg, attrs->rtm_protocol); 785 nhg = nhgrp_get_nhgrp(nhg, perror); 786 } 787 for (int i = 0; i < num_nhops; i++) 788 nhop_free(wn[i].nh); 789 if (nhg != NULL) 790 return ((struct nhop_object *)nhg); 791 error = *perror; 792 } 793 #else 794 error = ENOTSUP; 795 #endif 796 *perror = error; 797 } else { 798 nh = nhop_alloc(attrs->rta_table, attrs->rtm_family); 799 if (nh == NULL) { 800 *perror = ENOMEM; 801 return (NULL); 802 } 803 if (attrs->rta_gw != NULL) 804 nhop_set_gw(nh, attrs->rta_gw, true); 805 if (attrs->rta_oif != NULL) 806 nhop_set_transmit_ifp(nh, attrs->rta_oif); 807 if (attrs->rtax_mtu != 0) 808 nhop_set_mtu(nh, attrs->rtax_mtu, true); 809 if (attrs->rta_rtflags & RTF_BROADCAST) 810 nhop_set_broadcast(nh, true); 811 if (attrs->rta_rtflags & RTF_BLACKHOLE) 812 nhop_set_blackhole(nh, NHF_BLACKHOLE); 813 if (attrs->rta_rtflags & RTF_REJECT) 814 nhop_set_blackhole(nh, NHF_REJECT); 815 nhop_set_rtflags(nh, attrs->rta_rtflags); 816 if (attrs->rtm_protocol > RTPROT_STATIC) 817 nhop_set_origin(nh, attrs->rtm_protocol); 818 nh = finalize_nhop(nh, perror); 819 } 820 821 return (nh); 822 } 823 824 static int 825 rtnl_handle_newroute(struct nlmsghdr *hdr, struct nlpcb *nlp, 826 struct nl_pstate *npt) 827 { 828 struct rib_cmd_info rc = {}; 829 struct nhop_object *nh = NULL; 830 int error; 831 832 struct nl_parsed_route attrs = {}; 833 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); 834 if (error != 0) 835 return (error); 836 837 /* Check if we have enough data */ 838 if (attrs.rta_dst == NULL) { 839 NL_LOG(LOG_DEBUG, "missing RTA_DST"); 840 return (EINVAL); 841 } 842 843 if (attrs.rta_nh_id != 0) { 844 /* Referenced uindex */ 845 int pxflag = get_pxflag(&attrs); 846 nh = nl_find_nhop(attrs.rta_table, attrs.rtm_family, attrs.rta_nh_id, 847 pxflag, &error); 848 if (error != 0) 849 return (error); 850 } else { 851 nh = create_nexthop_from_attrs(&attrs, npt, &error); 852 if (error != 0) { 853 NL_LOG(LOG_DEBUG, "Error creating nexthop"); 854 return (error); 855 } 856 } 857 858 if (!NH_IS_NHGRP(nh) && attrs.rta_weight == 0) 859 attrs.rta_weight = RT_DEFAULT_WEIGHT; 860 struct route_nhop_data rnd = { .rnd_nhop = nh, .rnd_weight = attrs.rta_weight }; 861 int op_flags = get_op_flags(hdr->nlmsg_flags); 862 863 error = rib_add_route_px(attrs.rta_table, attrs.rta_dst, attrs.rtm_dst_len, 864 &rnd, op_flags, &rc); 865 if (error == 0) 866 report_operation(attrs.rta_table, &rc, nlp, hdr); 867 return (error); 868 } 869 870 static int 871 path_match_func(const struct rtentry *rt, const struct nhop_object *nh, void *_data) 872 { 873 struct nl_parsed_route *attrs = (struct nl_parsed_route *)_data; 874 875 if ((attrs->rta_gw != NULL) && !rib_match_gw(rt, nh, attrs->rta_gw)) 876 return (0); 877 878 if ((attrs->rta_oif != NULL) && (attrs->rta_oif != nh->nh_ifp)) 879 return (0); 880 881 return (1); 882 } 883 884 static int 885 rtnl_handle_delroute(struct nlmsghdr *hdr, struct nlpcb *nlp, 886 struct nl_pstate *npt) 887 { 888 struct rib_cmd_info rc; 889 int error; 890 891 struct nl_parsed_route attrs = {}; 892 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); 893 if (error != 0) 894 return (error); 895 896 if (attrs.rta_dst == NULL) { 897 NLMSG_REPORT_ERR_MSG(npt, "RTA_DST is not set"); 898 return (ESRCH); 899 } 900 901 error = rib_del_route_px(attrs.rta_table, attrs.rta_dst, 902 attrs.rtm_dst_len, path_match_func, &attrs, 0, &rc); 903 if (error == 0) 904 report_operation(attrs.rta_table, &rc, nlp, hdr); 905 return (error); 906 } 907 908 static int 909 rtnl_handle_getroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) 910 { 911 int error; 912 913 struct nl_parsed_route attrs = {}; 914 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); 915 if (error != 0) 916 return (error); 917 918 if (hdr->nlmsg_flags & NLM_F_DUMP) 919 error = handle_rtm_dump(nlp, attrs.rta_table, attrs.rtm_family, hdr, npt->nw); 920 else 921 error = handle_rtm_getroute(nlp, &attrs, hdr, npt); 922 923 return (error); 924 } 925 926 void 927 rtnl_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc) 928 { 929 struct nl_writer nw = {}; 930 int family, nlm_flags = 0; 931 932 family = rt_get_family(rc->rc_rt); 933 934 /* XXX: check if there are active listeners first */ 935 936 /* TODO: consider passing PID/type/seq */ 937 switch (rc->rc_cmd) { 938 case RTM_ADD: 939 nlm_flags = NLM_F_EXCL | NLM_F_CREATE; 940 break; 941 case RTM_CHANGE: 942 nlm_flags = NLM_F_REPLACE; 943 break; 944 case RTM_DELETE: 945 nlm_flags = 0; 946 break; 947 } 948 IF_DEBUG_LEVEL(LOG_DEBUG2) { 949 char rtbuf[NHOP_PRINT_BUFSIZE] __unused; 950 FIB_LOG(LOG_DEBUG2, fibnum, family, 951 "received event %s for %s / nlm_flags=%X", 952 rib_print_cmd(rc->rc_cmd), 953 rt_print_buf(rc->rc_rt, rtbuf, sizeof(rtbuf)), 954 nlm_flags); 955 } 956 957 struct nlmsghdr hdr = { 958 .nlmsg_flags = nlm_flags, 959 .nlmsg_type = get_rtmsg_type_from_rtsock(rc->rc_cmd), 960 }; 961 962 struct route_nhop_data rnd = { 963 .rnd_nhop = rc_get_nhop(rc), 964 .rnd_weight = rc->rc_nh_weight, 965 }; 966 967 uint32_t group_id = family_to_group(family); 968 if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) { 969 NL_LOG(LOG_DEBUG, "error allocating event buffer"); 970 return; 971 } 972 973 dump_px(fibnum, &hdr, rc->rc_rt, &rnd, &nw); 974 nlmsg_flush(&nw); 975 } 976 977 static const struct rtnl_cmd_handler cmd_handlers[] = { 978 { 979 .cmd = NL_RTM_GETROUTE, 980 .name = "RTM_GETROUTE", 981 .cb = &rtnl_handle_getroute, 982 }, 983 { 984 .cmd = NL_RTM_DELROUTE, 985 .name = "RTM_DELROUTE", 986 .cb = &rtnl_handle_delroute, 987 .priv = PRIV_NET_ROUTE, 988 }, 989 { 990 .cmd = NL_RTM_NEWROUTE, 991 .name = "RTM_NEWROUTE", 992 .cb = &rtnl_handle_newroute, 993 .priv = PRIV_NET_ROUTE, 994 } 995 }; 996 997 static const struct nlhdr_parser *all_parsers[] = {&mpath_parser, &metrics_parser, &rtm_parser}; 998 999 void 1000 rtnl_routes_init(void) 1001 { 1002 NL_VERIFY_PARSERS(all_parsers); 1003 rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers)); 1004 } 1005