1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2021 Ng Peng Nam Sean 5 * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_route.h" 33 #include <sys/types.h> 34 #include <sys/malloc.h> 35 #include <sys/rmlock.h> 36 #include <sys/socket.h> 37 38 #include <net/if.h> 39 #include <net/route.h> 40 #include <net/route/nhop.h> 41 #include <net/route/route_ctl.h> 42 #include <net/route/route_var.h> 43 #include <netinet6/scope6_var.h> 44 #include <netlink/netlink.h> 45 #include <netlink/netlink_ctl.h> 46 #include <netlink/netlink_route.h> 47 #include <netlink/route/route_var.h> 48 49 #define DEBUG_MOD_NAME nl_route 50 #define DEBUG_MAX_LEVEL LOG_DEBUG3 51 #include <netlink/netlink_debug.h> 52 _DECLARE_DEBUG(LOG_INFO); 53 54 static unsigned char 55 get_rtm_type(const struct nhop_object *nh) 56 { 57 int nh_flags = nh->nh_flags; 58 59 /* Use the fact that nhg runtime flags are only NHF_MULTIPATH */ 60 if (nh_flags & NHF_BLACKHOLE) 61 return (RTN_BLACKHOLE); 62 else if (nh_flags & NHF_REJECT) 63 return (RTN_PROHIBIT); 64 return (RTN_UNICAST); 65 } 66 67 static uint8_t 68 nl_get_rtm_protocol(const struct nhop_object *nh) 69 { 70 #ifdef ROUTE_MPATH 71 if (NH_IS_NHGRP(nh)) { 72 const struct nhgrp_object *nhg = (const struct nhgrp_object *)nh; 73 uint8_t origin = nhgrp_get_origin(nhg); 74 if (origin != RTPROT_UNSPEC) 75 return (origin); 76 nh = nhg->nhops[0]; 77 } 78 #endif 79 uint8_t origin = nhop_get_origin(nh); 80 if (origin != RTPROT_UNSPEC) 81 return (origin); 82 /* TODO: remove guesswork once all kernel users fill in origin */ 83 int rt_flags = nhop_get_rtflags(nh); 84 if (rt_flags & RTF_PROTO1) 85 return (RTPROT_ZEBRA); 86 if (rt_flags & RTF_STATIC) 87 return (RTPROT_STATIC); 88 return (RTPROT_KERNEL); 89 } 90 91 static int 92 get_rtmsg_type_from_rtsock(int cmd) 93 { 94 switch (cmd) { 95 case RTM_ADD: 96 case RTM_CHANGE: 97 case RTM_GET: 98 return NL_RTM_NEWROUTE; 99 case RTM_DELETE: 100 return NL_RTM_DELROUTE; 101 } 102 103 return (0); 104 } 105 106 /* 107 * fibnum heuristics 108 * 109 * if (dump && rtm_table == 0 && !rta_table) RT_ALL_FIBS 110 * msg rtm_table RTA_TABLE result 111 * RTM_GETROUTE/dump 0 - RT_ALL_FIBS 112 * RTM_GETROUTE/dump 1 - 1 113 * RTM_GETROUTE/get 0 - 0 114 * 115 */ 116 117 static struct nhop_object * 118 rc_get_nhop(const struct rib_cmd_info *rc) 119 { 120 return ((rc->rc_cmd == RTM_DELETE) ? rc->rc_nh_old : rc->rc_nh_new); 121 } 122 123 static void 124 dump_rc_nhop_gw(struct nl_writer *nw, const struct nhop_object *nh) 125 { 126 #ifdef INET6 127 int upper_family; 128 #endif 129 130 switch (nhop_get_neigh_family(nh)) { 131 case AF_LINK: 132 /* onlink prefix, skip */ 133 break; 134 case AF_INET: 135 nlattr_add(nw, NL_RTA_GATEWAY, 4, &nh->gw4_sa.sin_addr); 136 break; 137 #ifdef INET6 138 case AF_INET6: 139 upper_family = nhop_get_upper_family(nh); 140 if (upper_family == AF_INET6) { 141 struct in6_addr gw6 = nh->gw6_sa.sin6_addr; 142 in6_clearscope(&gw6); 143 144 nlattr_add(nw, NL_RTA_GATEWAY, 16, &gw6); 145 } else if (upper_family == AF_INET) { 146 /* IPv4 over IPv6 */ 147 struct in6_addr gw6 = nh->gw6_sa.sin6_addr; 148 in6_clearscope(&gw6); 149 150 char buf[20]; 151 struct rtvia *via = (struct rtvia *)&buf[0]; 152 via->rtvia_family = AF_INET6; 153 memcpy(via->rtvia_addr, &gw6, 16); 154 nlattr_add(nw, NL_RTA_VIA, 17, via); 155 } 156 break; 157 #endif 158 } 159 } 160 161 static void 162 dump_rc_nhop_mtu(struct nl_writer *nw, const struct nhop_object *nh) 163 { 164 int nla_len = sizeof(struct nlattr) * 2 + sizeof(uint32_t); 165 struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr); 166 167 if (nla == NULL) 168 return; 169 nla->nla_type = NL_RTA_METRICS; 170 nla->nla_len = nla_len; 171 nla++; 172 nla->nla_type = NL_RTAX_MTU; 173 nla->nla_len = sizeof(struct nlattr) + sizeof(uint32_t); 174 *((uint32_t *)(nla + 1)) = nh->nh_mtu; 175 } 176 177 #ifdef ROUTE_MPATH 178 static void 179 dump_rc_nhg(struct nl_writer *nw, const struct nhgrp_object *nhg, struct rtmsg *rtm) 180 { 181 uint32_t uidx = nhgrp_get_uidx(nhg); 182 uint32_t num_nhops; 183 const struct weightened_nhop *wn = nhgrp_get_nhops(nhg, &num_nhops); 184 uint32_t base_rtflags = nhop_get_rtflags(wn[0].nh); 185 186 if (uidx != 0) 187 nlattr_add_u32(nw, NL_RTA_NH_ID, uidx); 188 nlattr_add_u32(nw, NL_RTA_KNH_ID, nhgrp_get_idx(nhg)); 189 190 nlattr_add_u32(nw, NL_RTA_RTFLAGS, base_rtflags); 191 int off = nlattr_add_nested(nw, NL_RTA_MULTIPATH); 192 if (off == 0) 193 return; 194 195 for (int i = 0; i < num_nhops; i++) { 196 int nh_off = nlattr_save_offset(nw); 197 struct rtnexthop *rtnh = nlmsg_reserve_object(nw, struct rtnexthop); 198 if (rtnh == NULL) 199 return; 200 rtnh->rtnh_flags = 0; 201 rtnh->rtnh_ifindex = if_getindex(wn[i].nh->nh_ifp); 202 rtnh->rtnh_hops = wn[i].weight; 203 dump_rc_nhop_gw(nw, wn[i].nh); 204 uint32_t rtflags = nhop_get_rtflags(wn[i].nh); 205 if (rtflags != base_rtflags) 206 nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags); 207 if (rtflags & RTF_FIXEDMTU) 208 dump_rc_nhop_mtu(nw, wn[i].nh); 209 rtnh = nlattr_restore_offset(nw, nh_off, struct rtnexthop); 210 /* 211 * nlattr_add() allocates 4-byte aligned storage, no need to aligh 212 * length here 213 * */ 214 rtnh->rtnh_len = nlattr_save_offset(nw) - nh_off; 215 } 216 nlattr_set_len(nw, off); 217 } 218 #endif 219 220 static void 221 dump_rc_nhop(struct nl_writer *nw, const struct route_nhop_data *rnd, struct rtmsg *rtm) 222 { 223 #ifdef ROUTE_MPATH 224 if (NH_IS_NHGRP(rnd->rnd_nhop)) { 225 dump_rc_nhg(nw, rnd->rnd_nhgrp, rtm); 226 return; 227 } 228 #endif 229 const struct nhop_object *nh = rnd->rnd_nhop; 230 uint32_t rtflags = nhop_get_rtflags(nh); 231 232 /* 233 * IPv4 over IPv6 234 * ('RTA_VIA', {'family': 10, 'addr': 'fe80::20c:29ff:fe67:2dd'}), ('RTA_OIF', 2), 235 * IPv4 w/ gw 236 * ('RTA_GATEWAY', '172.16.107.131'), ('RTA_OIF', 2)], 237 * Direct route: 238 * ('RTA_OIF', 2) 239 */ 240 if (nh->nh_flags & NHF_GATEWAY) 241 dump_rc_nhop_gw(nw, nh); 242 243 uint32_t uidx = nhop_get_uidx(nh); 244 if (uidx != 0) 245 nlattr_add_u32(nw, NL_RTA_NH_ID, uidx); 246 nlattr_add_u32(nw, NL_RTA_KNH_ID, nhop_get_idx(nh)); 247 nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags); 248 249 if (rtflags & RTF_FIXEDMTU) 250 dump_rc_nhop_mtu(nw, nh); 251 uint32_t nh_expire = nhop_get_expire(nh); 252 if (nh_expire > 0) 253 nlattr_add_u32(nw, NL_RTA_EXPIRES, nh_expire - time_uptime); 254 255 /* In any case, fill outgoing interface */ 256 nlattr_add_u32(nw, NL_RTA_OIF, if_getindex(nh->nh_ifp)); 257 258 if (rnd->rnd_weight != RT_DEFAULT_WEIGHT) 259 nlattr_add_u32(nw, NL_RTA_WEIGHT, rnd->rnd_weight); 260 } 261 262 /* 263 * Dumps output from a rib command into an rtmsg 264 */ 265 266 static int 267 dump_px(uint32_t fibnum, const struct nlmsghdr *hdr, 268 const struct rtentry *rt, struct route_nhop_data *rnd, 269 struct nl_writer *nw) 270 { 271 struct rtmsg *rtm; 272 int error = 0; 273 274 NET_EPOCH_ASSERT(); 275 276 if (!nlmsg_reply(nw, hdr, sizeof(struct rtmsg))) 277 goto enomem; 278 279 int family = rt_get_family(rt); 280 int rtm_off = nlattr_save_offset(nw); 281 rtm = nlmsg_reserve_object(nw, struct rtmsg); 282 rtm->rtm_family = family; 283 rtm->rtm_dst_len = 0; 284 rtm->rtm_src_len = 0; 285 rtm->rtm_tos = 0; 286 if (fibnum < 255) 287 rtm->rtm_table = (unsigned char)fibnum; 288 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 289 rtm->rtm_protocol = nl_get_rtm_protocol(rnd->rnd_nhop); 290 rtm->rtm_type = get_rtm_type(rnd->rnd_nhop); 291 292 nlattr_add_u32(nw, NL_RTA_TABLE, fibnum); 293 294 int plen = 0; 295 #if defined(INET) || defined(INET6) 296 uint32_t scopeid; 297 #endif 298 switch (family) { 299 #ifdef INET 300 case AF_INET: 301 { 302 struct in_addr addr; 303 rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid); 304 nlattr_add(nw, NL_RTA_DST, 4, &addr); 305 break; 306 } 307 #endif 308 #ifdef INET6 309 case AF_INET6: 310 { 311 struct in6_addr addr; 312 rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid); 313 nlattr_add(nw, NL_RTA_DST, 16, &addr); 314 break; 315 } 316 #endif 317 default: 318 FIB_LOG(LOG_NOTICE, fibnum, family, "unsupported rt family: %d", family); 319 error = EAFNOSUPPORT; 320 goto flush; 321 } 322 323 rtm = nlattr_restore_offset(nw, rtm_off, struct rtmsg); 324 if (plen > 0) 325 rtm->rtm_dst_len = plen; 326 dump_rc_nhop(nw, rnd, rtm); 327 328 if (nlmsg_end(nw)) 329 return (0); 330 enomem: 331 error = ENOMEM; 332 flush: 333 nlmsg_abort(nw); 334 return (error); 335 } 336 337 static int 338 family_to_group(int family) 339 { 340 switch (family) { 341 case AF_INET: 342 return (RTNLGRP_IPV4_ROUTE); 343 case AF_INET6: 344 return (RTNLGRP_IPV6_ROUTE); 345 } 346 return (0); 347 } 348 349 static void 350 report_operation(uint32_t fibnum, struct rib_cmd_info *rc, 351 struct nlpcb *nlp, struct nlmsghdr *hdr) 352 { 353 struct nl_writer nw; 354 uint32_t group_id = family_to_group(rt_get_family(rc->rc_rt)); 355 356 if (nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id, 0, 357 false)) { 358 struct route_nhop_data rnd = { 359 .rnd_nhop = rc_get_nhop(rc), 360 .rnd_weight = rc->rc_nh_weight, 361 }; 362 hdr->nlmsg_flags &= ~(NLM_F_REPLACE | NLM_F_CREATE); 363 hdr->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_APPEND); 364 switch (rc->rc_cmd) { 365 case RTM_ADD: 366 hdr->nlmsg_type = NL_RTM_NEWROUTE; 367 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL; 368 break; 369 case RTM_CHANGE: 370 hdr->nlmsg_type = NL_RTM_NEWROUTE; 371 hdr->nlmsg_flags |= NLM_F_REPLACE; 372 break; 373 case RTM_DELETE: 374 hdr->nlmsg_type = NL_RTM_DELROUTE; 375 break; 376 } 377 dump_px(fibnum, hdr, rc->rc_rt, &rnd, &nw); 378 nlmsg_flush(&nw); 379 } 380 381 rtsock_callback_p->route_f(fibnum, rc); 382 } 383 384 static void 385 set_scope6(struct sockaddr *sa, struct ifnet *ifp) 386 { 387 #ifdef INET6 388 if (sa != NULL && sa->sa_family == AF_INET6 && ifp != NULL) { 389 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa; 390 391 if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) 392 in6_set_unicast_scopeid(&sa6->sin6_addr, if_getindex(ifp)); 393 } 394 #endif 395 } 396 397 struct rta_mpath_nh { 398 struct sockaddr *gw; 399 struct ifnet *ifp; 400 uint8_t rtnh_flags; 401 uint8_t rtnh_weight; 402 }; 403 404 #define _IN(_field) offsetof(struct rtnexthop, _field) 405 #define _OUT(_field) offsetof(struct rta_mpath_nh, _field) 406 const static struct nlattr_parser nla_p_rtnh[] = { 407 { .type = NL_RTA_GATEWAY, .off = _OUT(gw), .cb = nlattr_get_ip }, 408 { .type = NL_RTA_VIA, .off = _OUT(gw), .cb = nlattr_get_ipvia }, 409 }; 410 const static struct nlfield_parser nlf_p_rtnh[] = { 411 { .off_in = _IN(rtnh_flags), .off_out = _OUT(rtnh_flags), .cb = nlf_get_u8 }, 412 { .off_in = _IN(rtnh_hops), .off_out = _OUT(rtnh_weight), .cb = nlf_get_u8 }, 413 { .off_in = _IN(rtnh_ifindex), .off_out = _OUT(ifp), .cb = nlf_get_ifpz }, 414 }; 415 #undef _IN 416 #undef _OUT 417 418 static bool 419 post_p_rtnh(void *_attrs, struct nl_pstate *npt __unused) 420 { 421 struct rta_mpath_nh *attrs = (struct rta_mpath_nh *)_attrs; 422 423 set_scope6(attrs->gw, attrs->ifp); 424 return (true); 425 } 426 NL_DECLARE_PARSER_EXT(mpath_parser, struct rtnexthop, NULL, nlf_p_rtnh, nla_p_rtnh, post_p_rtnh); 427 428 struct rta_mpath { 429 int num_nhops; 430 struct rta_mpath_nh nhops[0]; 431 }; 432 433 static int 434 nlattr_get_multipath(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) 435 { 436 int data_len = nla->nla_len - sizeof(struct nlattr); 437 struct rtnexthop *rtnh; 438 439 int max_nhops = data_len / sizeof(struct rtnexthop); 440 441 struct rta_mpath *mp = npt_alloc(npt, (max_nhops + 2) * sizeof(struct rta_mpath_nh)); 442 mp->num_nhops = 0; 443 444 for (rtnh = (struct rtnexthop *)(nla + 1); data_len > 0; ) { 445 struct rta_mpath_nh *mpnh = &mp->nhops[mp->num_nhops++]; 446 447 int error = nl_parse_header(rtnh, rtnh->rtnh_len, &mpath_parser, 448 npt, mpnh); 449 if (error != 0) { 450 NLMSG_REPORT_ERR_MSG(npt, "RTA_MULTIPATH: nexhop %d: parse failed", 451 mp->num_nhops - 1); 452 return (error); 453 } 454 455 int len = NL_ITEM_ALIGN(rtnh->rtnh_len); 456 data_len -= len; 457 rtnh = (struct rtnexthop *)((char *)rtnh + len); 458 } 459 if (data_len != 0 || mp->num_nhops == 0) { 460 NLMSG_REPORT_ERR_MSG(npt, "invalid RTA_MULTIPATH attr"); 461 return (EINVAL); 462 } 463 464 *((struct rta_mpath **)target) = mp; 465 return (0); 466 } 467 468 469 struct nl_parsed_route { 470 struct sockaddr *rta_dst; 471 struct sockaddr *rta_gw; 472 struct ifnet *rta_oif; 473 struct rta_mpath *rta_multipath; 474 uint32_t rta_table; 475 uint32_t rta_rtflags; 476 uint32_t rta_nh_id; 477 uint32_t rta_weight; 478 uint32_t rtax_mtu; 479 uint8_t rtm_table; 480 uint8_t rtm_family; 481 uint8_t rtm_dst_len; 482 uint8_t rtm_protocol; 483 uint8_t rtm_type; 484 uint32_t rtm_flags; 485 }; 486 487 #define _IN(_field) offsetof(struct rtmsg, _field) 488 #define _OUT(_field) offsetof(struct nl_parsed_route, _field) 489 static struct nlattr_parser nla_p_rtmetrics[] = { 490 { .type = NL_RTAX_MTU, .off = _OUT(rtax_mtu), .cb = nlattr_get_uint32 }, 491 }; 492 NL_DECLARE_ATTR_PARSER(metrics_parser, nla_p_rtmetrics); 493 494 static const struct nlattr_parser nla_p_rtmsg[] = { 495 { .type = NL_RTA_DST, .off = _OUT(rta_dst), .cb = nlattr_get_ip }, 496 { .type = NL_RTA_OIF, .off = _OUT(rta_oif), .cb = nlattr_get_ifp }, 497 { .type = NL_RTA_GATEWAY, .off = _OUT(rta_gw), .cb = nlattr_get_ip }, 498 { .type = NL_RTA_METRICS, .arg = &metrics_parser, .cb = nlattr_get_nested }, 499 { .type = NL_RTA_MULTIPATH, .off = _OUT(rta_multipath), .cb = nlattr_get_multipath }, 500 { .type = NL_RTA_WEIGHT, .off = _OUT(rta_weight), .cb = nlattr_get_uint32 }, 501 { .type = NL_RTA_RTFLAGS, .off = _OUT(rta_rtflags), .cb = nlattr_get_uint32 }, 502 { .type = NL_RTA_TABLE, .off = _OUT(rta_table), .cb = nlattr_get_uint32 }, 503 { .type = NL_RTA_VIA, .off = _OUT(rta_gw), .cb = nlattr_get_ipvia }, 504 { .type = NL_RTA_NH_ID, .off = _OUT(rta_nh_id), .cb = nlattr_get_uint32 }, 505 }; 506 507 static const struct nlfield_parser nlf_p_rtmsg[] = { 508 { .off_in = _IN(rtm_family), .off_out = _OUT(rtm_family), .cb = nlf_get_u8 }, 509 { .off_in = _IN(rtm_dst_len), .off_out = _OUT(rtm_dst_len), .cb = nlf_get_u8 }, 510 { .off_in = _IN(rtm_protocol), .off_out = _OUT(rtm_protocol), .cb = nlf_get_u8 }, 511 { .off_in = _IN(rtm_type), .off_out = _OUT(rtm_type), .cb = nlf_get_u8 }, 512 { .off_in = _IN(rtm_table), .off_out = _OUT(rtm_table), .cb = nlf_get_u8 }, 513 { .off_in = _IN(rtm_flags), .off_out = _OUT(rtm_flags), .cb = nlf_get_u32 }, 514 }; 515 #undef _IN 516 #undef _OUT 517 518 static bool 519 post_p_rtmsg(void *_attrs, struct nl_pstate *npt __unused) 520 { 521 struct nl_parsed_route *attrs = (struct nl_parsed_route *)_attrs; 522 523 set_scope6(attrs->rta_dst, attrs->rta_oif); 524 set_scope6(attrs->rta_gw, attrs->rta_oif); 525 return (true); 526 } 527 NL_DECLARE_PARSER_EXT(rtm_parser, struct rtmsg, NULL, nlf_p_rtmsg, nla_p_rtmsg, post_p_rtmsg); 528 529 struct netlink_walkargs { 530 struct nl_writer *nw; 531 struct route_nhop_data rnd; 532 struct nlmsghdr hdr; 533 struct nlpcb *nlp; 534 uint32_t fibnum; 535 int family; 536 int error; 537 int count; 538 int dumped; 539 int dumped_tables; 540 }; 541 542 static int 543 dump_rtentry(struct rtentry *rt, void *_arg) 544 { 545 struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg; 546 int error; 547 548 wa->count++; 549 if (wa->error != 0) 550 return (0); 551 if (!rt_is_exportable(rt, nlp_get_cred(wa->nlp))) 552 return (0); 553 wa->dumped++; 554 555 rt_get_rnd(rt, &wa->rnd); 556 557 error = dump_px(wa->fibnum, &wa->hdr, rt, &wa->rnd, wa->nw); 558 559 IF_DEBUG_LEVEL(LOG_DEBUG3) { 560 char rtbuf[INET6_ADDRSTRLEN + 5]; 561 FIB_LOG(LOG_DEBUG3, wa->fibnum, wa->family, 562 "Dump %s, error %d", 563 rt_print_buf(rt, rtbuf, sizeof(rtbuf)), error); 564 } 565 wa->error = error; 566 567 return (0); 568 } 569 570 static void 571 dump_rtable_one(struct netlink_walkargs *wa, uint32_t fibnum, int family) 572 { 573 FIB_LOG(LOG_DEBUG2, fibnum, family, "Start dump"); 574 wa->count = 0; 575 wa->dumped = 0; 576 577 rib_walk(fibnum, family, false, dump_rtentry, wa); 578 579 wa->dumped_tables++; 580 581 FIB_LOG(LOG_DEBUG2, fibnum, family, "End dump, iterated %d dumped %d", 582 wa->count, wa->dumped); 583 } 584 585 static int 586 dump_rtable_fib(struct netlink_walkargs *wa, uint32_t fibnum, int family) 587 { 588 wa->fibnum = fibnum; 589 590 if (family == AF_UNSPEC) { 591 for (int i = 0; i < AF_MAX; i++) { 592 if (rt_tables_get_rnh(fibnum, i) != 0) { 593 wa->family = i; 594 dump_rtable_one(wa, fibnum, i); 595 if (wa->error != 0) 596 break; 597 } 598 } 599 } else { 600 if (rt_tables_get_rnh(fibnum, family) != 0) { 601 wa->family = family; 602 dump_rtable_one(wa, fibnum, family); 603 } 604 } 605 606 return (wa->error); 607 } 608 609 static int 610 handle_rtm_getroute(struct nlpcb *nlp, struct nl_parsed_route *attrs, 611 struct nlmsghdr *hdr, struct nl_pstate *npt) 612 { 613 RIB_RLOCK_TRACKER; 614 struct rib_head *rnh; 615 const struct rtentry *rt; 616 struct route_nhop_data rnd; 617 uint32_t fibnum = attrs->rta_table; 618 sa_family_t family = attrs->rtm_family; 619 620 if (attrs->rta_dst == NULL) { 621 NLMSG_REPORT_ERR_MSG(npt, "No RTA_DST supplied"); 622 return (EINVAL); 623 } 624 625 rnh = rt_tables_get_rnh(fibnum, family); 626 if (rnh == NULL) 627 return (EAFNOSUPPORT); 628 629 RIB_RLOCK(rnh); 630 631 struct sockaddr *dst = attrs->rta_dst; 632 633 if (attrs->rtm_flags & RTM_F_PREFIX) 634 rt = rib_lookup_prefix_plen(rnh, dst, attrs->rtm_dst_len, &rnd); 635 else 636 rt = (const struct rtentry *)rnh->rnh_matchaddr(dst, &rnh->head); 637 if (rt == NULL) { 638 RIB_RUNLOCK(rnh); 639 return (ESRCH); 640 } 641 642 rt_get_rnd(rt, &rnd); 643 rnd.rnd_nhop = nhop_select_func(rnd.rnd_nhop, 0); 644 645 RIB_RUNLOCK(rnh); 646 647 if (!rt_is_exportable(rt, nlp_get_cred(nlp))) 648 return (ESRCH); 649 650 IF_DEBUG_LEVEL(LOG_DEBUG2) { 651 char rtbuf[NHOP_PRINT_BUFSIZE] __unused, nhbuf[NHOP_PRINT_BUFSIZE] __unused; 652 FIB_LOG(LOG_DEBUG2, fibnum, family, "getroute completed: got %s for %s", 653 nhop_print_buf_any(rnd.rnd_nhop, nhbuf, sizeof(nhbuf)), 654 rt_print_buf(rt, rtbuf, sizeof(rtbuf))); 655 } 656 657 hdr->nlmsg_type = NL_RTM_NEWROUTE; 658 dump_px(fibnum, hdr, rt, &rnd, npt->nw); 659 660 return (0); 661 } 662 663 static int 664 handle_rtm_dump(struct nlpcb *nlp, uint32_t fibnum, int family, 665 struct nlmsghdr *hdr, struct nl_writer *nw) 666 { 667 struct netlink_walkargs wa = { 668 .nlp = nlp, 669 .nw = nw, 670 .hdr.nlmsg_pid = hdr->nlmsg_pid, 671 .hdr.nlmsg_seq = hdr->nlmsg_seq, 672 .hdr.nlmsg_type = NL_RTM_NEWROUTE, 673 .hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI, 674 }; 675 676 if (fibnum == RT_TABLE_UNSPEC) { 677 for (int i = 0; i < V_rt_numfibs; i++) { 678 dump_rtable_fib(&wa, fibnum, family); 679 if (wa.error != 0) 680 break; 681 } 682 } else 683 dump_rtable_fib(&wa, fibnum, family); 684 685 if (wa.error == 0 && wa.dumped_tables == 0) { 686 FIB_LOG(LOG_DEBUG, fibnum, family, "incorrect fibnum/family"); 687 wa.error = ESRCH; 688 // How do we propagate it? 689 } 690 691 if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) { 692 NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); 693 return (ENOMEM); 694 } 695 696 return (wa.error); 697 } 698 699 static struct nhop_object * 700 finalize_nhop(struct nhop_object *nh, const struct sockaddr *dst, int *perror) 701 { 702 /* 703 * The following MUST be filled: 704 * nh_ifp, nh_ifa, nh_gw 705 */ 706 if (nh->gw_sa.sa_family == 0) { 707 /* 708 * Empty gateway. Can be direct route with RTA_OIF set. 709 */ 710 if (nh->nh_ifp != NULL) 711 nhop_set_direct_gw(nh, nh->nh_ifp); 712 else { 713 NL_LOG(LOG_DEBUG, "empty gateway and interface, skipping"); 714 *perror = EINVAL; 715 return (NULL); 716 } 717 /* Both nh_ifp and gateway are set */ 718 } else { 719 /* Gateway is set up, we can derive ifp if not set */ 720 if (nh->nh_ifp == NULL) { 721 uint32_t fibnum = nhop_get_fibnum(nh); 722 uint32_t flags = 0; 723 724 if (nh->nh_flags & NHF_GATEWAY) 725 flags = RTF_GATEWAY; 726 else if (nh->nh_flags & NHF_HOST) 727 flags = RTF_HOST; 728 729 struct ifaddr *ifa = ifa_ifwithroute(flags, dst, &nh->gw_sa, fibnum); 730 if (ifa == NULL) { 731 NL_LOG(LOG_DEBUG, "Unable to determine ifp, skipping"); 732 *perror = EINVAL; 733 return (NULL); 734 } 735 nhop_set_transmit_ifp(nh, ifa->ifa_ifp); 736 } 737 } 738 /* Both nh_ifp and gateway are set */ 739 if (nh->nh_ifa == NULL) { 740 const struct sockaddr *gw_sa = &nh->gw_sa; 741 742 if (gw_sa->sa_family != dst->sa_family) { 743 /* 744 * Use dst as the target for determining the default 745 * preferred ifa IF 746 * 1) the gateway is link-level (e.g. direct route) 747 * 2) the gateway family is different (e.g. IPv4 over IPv6). 748 */ 749 gw_sa = dst; 750 } 751 752 struct ifaddr *ifa = ifaof_ifpforaddr(gw_sa, nh->nh_ifp); 753 if (ifa == NULL) { 754 /* Try link-level ifa. */ 755 gw_sa = &nh->gw_sa; 756 ifa = ifaof_ifpforaddr(gw_sa, nh->nh_ifp); 757 if (ifa == NULL) { 758 NL_LOG(LOG_DEBUG, "Unable to determine ifa, skipping"); 759 *perror = EINVAL; 760 return (NULL); 761 } 762 } 763 nhop_set_src(nh, ifa); 764 } 765 766 return (nhop_get_nhop(nh, perror)); 767 } 768 769 static int 770 get_pxflag(const struct nl_parsed_route *attrs) 771 { 772 int pxflag = 0; 773 switch (attrs->rtm_family) { 774 case AF_INET: 775 if (attrs->rtm_dst_len == 32) 776 pxflag = NHF_HOST; 777 else if (attrs->rtm_dst_len == 0) 778 pxflag = NHF_DEFAULT; 779 break; 780 case AF_INET6: 781 if (attrs->rtm_dst_len == 128) 782 pxflag = NHF_HOST; 783 else if (attrs->rtm_dst_len == 0) 784 pxflag = NHF_DEFAULT; 785 break; 786 } 787 788 return (pxflag); 789 } 790 791 static int 792 get_op_flags(int nlm_flags) 793 { 794 int op_flags = 0; 795 796 op_flags |= (nlm_flags & NLM_F_REPLACE) ? RTM_F_REPLACE : 0; 797 op_flags |= (nlm_flags & NLM_F_EXCL) ? RTM_F_EXCL : 0; 798 op_flags |= (nlm_flags & NLM_F_CREATE) ? RTM_F_CREATE : 0; 799 op_flags |= (nlm_flags & NLM_F_APPEND) ? RTM_F_APPEND : 0; 800 801 return (op_flags); 802 } 803 804 #ifdef ROUTE_MPATH 805 static int 806 create_nexthop_one(struct nl_parsed_route *attrs, struct rta_mpath_nh *mpnh, 807 struct nl_pstate *npt, struct nhop_object **pnh) 808 { 809 int error; 810 811 if (mpnh->gw == NULL) 812 return (EINVAL); 813 814 struct nhop_object *nh = nhop_alloc(attrs->rta_table, attrs->rtm_family); 815 if (nh == NULL) 816 return (ENOMEM); 817 818 error = nl_set_nexthop_gw(nh, mpnh->gw, mpnh->ifp, npt); 819 if (error != 0) { 820 nhop_free(nh); 821 return (error); 822 } 823 if (mpnh->ifp != NULL) 824 nhop_set_transmit_ifp(nh, mpnh->ifp); 825 nhop_set_pxtype_flag(nh, get_pxflag(attrs)); 826 nhop_set_rtflags(nh, attrs->rta_rtflags); 827 if (attrs->rtm_protocol > RTPROT_STATIC) 828 nhop_set_origin(nh, attrs->rtm_protocol); 829 830 *pnh = finalize_nhop(nh, attrs->rta_dst, &error); 831 832 return (error); 833 } 834 #endif 835 836 static struct nhop_object * 837 create_nexthop_from_attrs(struct nl_parsed_route *attrs, 838 struct nl_pstate *npt, int *perror) 839 { 840 struct nhop_object *nh = NULL; 841 int error = 0; 842 843 if (attrs->rta_multipath != NULL) { 844 #ifdef ROUTE_MPATH 845 /* Multipath w/o explicit nexthops */ 846 int num_nhops = attrs->rta_multipath->num_nhops; 847 struct weightened_nhop *wn = npt_alloc(npt, sizeof(*wn) * num_nhops); 848 849 for (int i = 0; i < num_nhops; i++) { 850 struct rta_mpath_nh *mpnh = &attrs->rta_multipath->nhops[i]; 851 852 error = create_nexthop_one(attrs, mpnh, npt, &wn[i].nh); 853 if (error != 0) { 854 for (int j = 0; j < i; j++) 855 nhop_free(wn[j].nh); 856 break; 857 } 858 wn[i].weight = mpnh->rtnh_weight > 0 ? mpnh->rtnh_weight : 1; 859 } 860 if (error == 0) { 861 struct rib_head *rh = nhop_get_rh(wn[0].nh); 862 struct nhgrp_object *nhg; 863 864 nhg = nhgrp_alloc(rh->rib_fibnum, rh->rib_family, 865 wn, num_nhops, perror); 866 if (nhg != NULL) { 867 if (attrs->rtm_protocol > RTPROT_STATIC) 868 nhgrp_set_origin(nhg, attrs->rtm_protocol); 869 nhg = nhgrp_get_nhgrp(nhg, perror); 870 } 871 for (int i = 0; i < num_nhops; i++) 872 nhop_free(wn[i].nh); 873 if (nhg != NULL) 874 return ((struct nhop_object *)nhg); 875 error = *perror; 876 } 877 #else 878 error = ENOTSUP; 879 #endif 880 *perror = error; 881 } else { 882 nh = nhop_alloc(attrs->rta_table, attrs->rtm_family); 883 if (nh == NULL) { 884 *perror = ENOMEM; 885 return (NULL); 886 } 887 if (attrs->rta_gw != NULL) { 888 *perror = nl_set_nexthop_gw(nh, attrs->rta_gw, attrs->rta_oif, npt); 889 if (*perror != 0) { 890 nhop_free(nh); 891 return (NULL); 892 } 893 } 894 if (attrs->rta_oif != NULL) 895 nhop_set_transmit_ifp(nh, attrs->rta_oif); 896 if (attrs->rtax_mtu != 0) 897 nhop_set_mtu(nh, attrs->rtax_mtu, true); 898 if (attrs->rta_rtflags & RTF_BROADCAST) 899 nhop_set_broadcast(nh, true); 900 if (attrs->rtm_protocol > RTPROT_STATIC) 901 nhop_set_origin(nh, attrs->rtm_protocol); 902 nhop_set_pxtype_flag(nh, get_pxflag(attrs)); 903 nhop_set_rtflags(nh, attrs->rta_rtflags); 904 905 switch (attrs->rtm_type) { 906 case RTN_UNICAST: 907 break; 908 case RTN_BLACKHOLE: 909 nhop_set_blackhole(nh, RTF_BLACKHOLE); 910 break; 911 case RTN_PROHIBIT: 912 case RTN_UNREACHABLE: 913 nhop_set_blackhole(nh, RTF_REJECT); 914 break; 915 /* TODO: return ENOTSUP for other types if strict option is set */ 916 } 917 918 nh = finalize_nhop(nh, attrs->rta_dst, perror); 919 } 920 921 return (nh); 922 } 923 924 static int 925 rtnl_handle_newroute(struct nlmsghdr *hdr, struct nlpcb *nlp, 926 struct nl_pstate *npt) 927 { 928 struct rib_cmd_info rc = {}; 929 struct nhop_object *nh = NULL; 930 int error; 931 932 struct nl_parsed_route attrs = {}; 933 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); 934 if (error != 0) 935 return (error); 936 937 /* Check if we have enough data */ 938 if (attrs.rta_dst == NULL) { 939 NL_LOG(LOG_DEBUG, "missing RTA_DST"); 940 return (EINVAL); 941 } 942 943 if (attrs.rtm_table > 0 && attrs.rta_table == 0) { 944 /* pre-2.6.19 Linux API compatibility */ 945 attrs.rta_table = attrs.rtm_table; 946 } else if (attrs.rta_table >= V_rt_numfibs) { 947 NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); 948 return (EINVAL); 949 } 950 951 if (attrs.rta_nh_id != 0) { 952 /* Referenced uindex */ 953 int pxflag = get_pxflag(&attrs); 954 nh = nl_find_nhop(attrs.rta_table, attrs.rtm_family, attrs.rta_nh_id, 955 pxflag, &error); 956 if (error != 0) 957 return (error); 958 } else { 959 nh = create_nexthop_from_attrs(&attrs, npt, &error); 960 if (error != 0) { 961 NL_LOG(LOG_DEBUG, "Error creating nexthop"); 962 return (error); 963 } 964 } 965 966 if (!NH_IS_NHGRP(nh) && attrs.rta_weight == 0) 967 attrs.rta_weight = RT_DEFAULT_WEIGHT; 968 struct route_nhop_data rnd = { .rnd_nhop = nh, .rnd_weight = attrs.rta_weight }; 969 int op_flags = get_op_flags(hdr->nlmsg_flags); 970 971 error = rib_add_route_px(attrs.rta_table, attrs.rta_dst, attrs.rtm_dst_len, 972 &rnd, op_flags, &rc); 973 if (error == 0) 974 report_operation(attrs.rta_table, &rc, nlp, hdr); 975 return (error); 976 } 977 978 static int 979 path_match_func(const struct rtentry *rt, const struct nhop_object *nh, void *_data) 980 { 981 struct nl_parsed_route *attrs = (struct nl_parsed_route *)_data; 982 983 if ((attrs->rta_gw != NULL) && !rib_match_gw(rt, nh, attrs->rta_gw)) 984 return (0); 985 986 if ((attrs->rta_oif != NULL) && (attrs->rta_oif != nh->nh_ifp)) 987 return (0); 988 989 return (1); 990 } 991 992 static int 993 rtnl_handle_delroute(struct nlmsghdr *hdr, struct nlpcb *nlp, 994 struct nl_pstate *npt) 995 { 996 struct rib_cmd_info rc; 997 int error; 998 999 struct nl_parsed_route attrs = {}; 1000 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); 1001 if (error != 0) 1002 return (error); 1003 1004 if (attrs.rta_dst == NULL) { 1005 NLMSG_REPORT_ERR_MSG(npt, "RTA_DST is not set"); 1006 return (ESRCH); 1007 } 1008 1009 if (attrs.rta_table >= V_rt_numfibs) { 1010 NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); 1011 return (EINVAL); 1012 } 1013 1014 error = rib_del_route_px(attrs.rta_table, attrs.rta_dst, 1015 attrs.rtm_dst_len, path_match_func, &attrs, 1016 (attrs.rta_rtflags & RTF_PINNED) ? RTM_F_FORCE : 0, &rc); 1017 if (error == 0) 1018 report_operation(attrs.rta_table, &rc, nlp, hdr); 1019 return (error); 1020 } 1021 1022 static int 1023 rtnl_handle_getroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) 1024 { 1025 int error; 1026 1027 struct nl_parsed_route attrs = {}; 1028 error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); 1029 if (error != 0) 1030 return (error); 1031 1032 if (attrs.rta_table >= V_rt_numfibs) { 1033 NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); 1034 return (EINVAL); 1035 } 1036 1037 if (hdr->nlmsg_flags & NLM_F_DUMP) 1038 error = handle_rtm_dump(nlp, attrs.rta_table, attrs.rtm_family, hdr, npt->nw); 1039 else 1040 error = handle_rtm_getroute(nlp, &attrs, hdr, npt); 1041 1042 return (error); 1043 } 1044 1045 void 1046 rtnl_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc) 1047 { 1048 struct nl_writer nw; 1049 int family, nlm_flags = 0; 1050 1051 family = rt_get_family(rc->rc_rt); 1052 1053 /* XXX: check if there are active listeners first */ 1054 1055 /* TODO: consider passing PID/type/seq */ 1056 switch (rc->rc_cmd) { 1057 case RTM_ADD: 1058 nlm_flags = NLM_F_EXCL | NLM_F_CREATE; 1059 break; 1060 case RTM_CHANGE: 1061 nlm_flags = NLM_F_REPLACE; 1062 break; 1063 case RTM_DELETE: 1064 nlm_flags = 0; 1065 break; 1066 } 1067 IF_DEBUG_LEVEL(LOG_DEBUG2) { 1068 char rtbuf[NHOP_PRINT_BUFSIZE] __unused; 1069 FIB_LOG(LOG_DEBUG2, fibnum, family, 1070 "received event %s for %s / nlm_flags=%X", 1071 rib_print_cmd(rc->rc_cmd), 1072 rt_print_buf(rc->rc_rt, rtbuf, sizeof(rtbuf)), 1073 nlm_flags); 1074 } 1075 1076 struct nlmsghdr hdr = { 1077 .nlmsg_flags = nlm_flags, 1078 .nlmsg_type = get_rtmsg_type_from_rtsock(rc->rc_cmd), 1079 }; 1080 1081 struct route_nhop_data rnd = { 1082 .rnd_nhop = rc_get_nhop(rc), 1083 .rnd_weight = rc->rc_nh_weight, 1084 }; 1085 1086 uint32_t group_id = family_to_group(family); 1087 if (!nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id, 0, 1088 false)) { 1089 NL_LOG(LOG_DEBUG, "error allocating event buffer"); 1090 return; 1091 } 1092 1093 dump_px(fibnum, &hdr, rc->rc_rt, &rnd, &nw); 1094 nlmsg_flush(&nw); 1095 } 1096 1097 static const struct rtnl_cmd_handler cmd_handlers[] = { 1098 { 1099 .cmd = NL_RTM_GETROUTE, 1100 .name = "RTM_GETROUTE", 1101 .cb = &rtnl_handle_getroute, 1102 .flags = RTNL_F_ALLOW_NONVNET_JAIL, 1103 }, 1104 { 1105 .cmd = NL_RTM_DELROUTE, 1106 .name = "RTM_DELROUTE", 1107 .cb = &rtnl_handle_delroute, 1108 .priv = PRIV_NET_ROUTE, 1109 }, 1110 { 1111 .cmd = NL_RTM_NEWROUTE, 1112 .name = "RTM_NEWROUTE", 1113 .cb = &rtnl_handle_newroute, 1114 .priv = PRIV_NET_ROUTE, 1115 } 1116 }; 1117 1118 static const struct nlhdr_parser *all_parsers[] = {&mpath_parser, &metrics_parser, &rtm_parser}; 1119 1120 void 1121 rtnl_routes_init(void) 1122 { 1123 NL_VERIFY_PARSERS(all_parsers); 1124 rtnl_register_messages(cmd_handlers, nitems(cmd_handlers)); 1125 } 1126