/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_inet.h" #include "opt_inet6.h" #include "opt_route.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define DEBUG_MOD_NAME nl_nhop #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_INFO); /* * This file contains the logic to maintain kernel nexthops and * nexhop groups based om the data provided by the user. * * Kernel stores (nearly) all of the routing data in the nexthops, * including the prefix-specific flags (NHF_HOST and NHF_DEFAULT). * * Netlink API provides higher-level abstraction for the user. Each * user-created nexthop may map to multiple kernel nexthops. * * The following variations require separate kernel nexthop to be * created: * * prefix flags (NHF_HOST, NHF_DEFAULT) * * using IPv6 gateway for IPv4 routes * * different fibnum * * These kernel nexthops have the lifetime bound to the lifetime of * the user_nhop object. They are not collected until user requests * to delete the created user_nhop. * */ struct user_nhop { uint32_t un_idx; /* Userland-provided index */ uint32_t un_fibfam; /* fibnum+af(as highest byte) */ uint8_t un_protocol; /* protocol that install the record */ struct nhop_object *un_nhop; /* "production" nexthop */ struct nhop_object *un_nhop_src; /* nexthop to copy from */ struct weightened_nhop *un_nhgrp_src; /* nexthops for nhg */ uint32_t un_nhgrp_count; /* number of nexthops */ struct user_nhop *un_next; /* next item in hash chain */ struct user_nhop *un_nextchild; /* master -> children */ struct epoch_context un_epoch_ctx; /* epoch ctl helper */ }; /* produce hash value for an object */ #define unhop_hash_obj(_obj) (hash_unhop(_obj)) /* compare two objects */ #define unhop_cmp(_one, _two) (cmp_unhop(_one, _two)) /* next object accessor */ #define unhop_next(_obj) (_obj)->un_next CHT_SLIST_DEFINE(unhop, struct user_nhop); struct unhop_ctl { struct unhop_head un_head; struct rmlock un_lock; }; #define UN_LOCK_INIT(_ctl) rm_init(&(_ctl)->un_lock, "unhop_ctl") #define UN_TRACKER struct rm_priotracker un_tracker #define UN_RLOCK(_ctl) rm_rlock(&((_ctl)->un_lock), &un_tracker) #define UN_RUNLOCK(_ctl) rm_runlock(&((_ctl)->un_lock), &un_tracker) #define UN_WLOCK(_ctl) rm_wlock(&(_ctl)->un_lock); #define UN_WUNLOCK(_ctl) rm_wunlock(&(_ctl)->un_lock); VNET_DEFINE_STATIC(struct unhop_ctl *, un_ctl) = NULL; #define V_un_ctl VNET(un_ctl) static void consider_resize(struct unhop_ctl *ctl, uint32_t new_size); static int cmp_unhop(const struct user_nhop *a, const struct user_nhop *b); static unsigned int hash_unhop(const struct user_nhop *obj); static void destroy_unhop(struct user_nhop *unhop); static struct nhop_object *clone_unhop(const struct user_nhop *unhop, uint32_t fibnum, int family, int nh_flags); static int cmp_unhop(const struct user_nhop *a, const struct user_nhop *b) { return (a->un_idx == b->un_idx && a->un_fibfam == b->un_fibfam); } /* * Hash callback: calculate hash of an object */ static unsigned int hash_unhop(const struct user_nhop *obj) { return (obj->un_idx ^ obj->un_fibfam); } #define UNHOP_IS_MASTER(_unhop) ((_unhop)->un_fibfam == 0) /* * Factory interface for creating matching kernel nexthops/nexthop groups * * @uidx: userland nexhop index used to create the nexthop * @fibnum: fibnum nexthop will be used in * @family: upper family nexthop will be used in * @nh_flags: desired nexthop prefix flags * @perror: pointer to store error to * * Returns referenced nexthop linked to @fibnum/@family rib on success. */ struct nhop_object * nl_find_nhop(uint32_t fibnum, int family, uint32_t uidx, int nh_flags, int *perror) { struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); UN_TRACKER; if (__predict_false(ctl == NULL)) return (NULL); struct user_nhop key= { .un_idx = uidx, .un_fibfam = fibnum | ((uint32_t)family) << 24, }; struct user_nhop *unhop; nh_flags = nh_flags & (NHF_HOST | NHF_DEFAULT); if (__predict_false(family == 0)) return (NULL); UN_RLOCK(ctl); CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); if (unhop != NULL) { struct nhop_object *nh = unhop->un_nhop; UN_RLOCK(ctl); *perror = 0; nhop_ref_any(nh); return (nh); } /* * Exact nexthop not found. Search for template nexthop to clone from. */ key.un_fibfam = 0; CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); if (unhop == NULL) { UN_RUNLOCK(ctl); *perror = ESRCH; return (NULL); } UN_RUNLOCK(ctl); /* Create entry to insert first */ struct user_nhop *un_new, *un_tmp; un_new = malloc(sizeof(struct user_nhop), M_NETLINK, M_NOWAIT | M_ZERO); if (un_new == NULL) { *perror = ENOMEM; return (NULL); } un_new->un_idx = uidx; un_new->un_fibfam = fibnum | ((uint32_t)family) << 24; /* Relying on epoch to protect unhop here */ un_new->un_nhop = clone_unhop(unhop, fibnum, family, nh_flags); if (un_new->un_nhop == NULL) { free(un_new, M_NETLINK); *perror = ENOMEM; return (NULL); } /* Insert back and report */ UN_WLOCK(ctl); /* First, find template record once again */ CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); if (unhop == NULL) { /* Someone deleted the nexthop during the call */ UN_WUNLOCK(ctl); *perror = ESRCH; destroy_unhop(un_new); return (NULL); } /* Second, check the direct match */ CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, un_new, un_tmp); struct nhop_object *nh; if (un_tmp != NULL) { /* Another thread already created the desired nextop, use it */ nh = un_tmp->un_nhop; } else { /* Finally, insert the new nexthop and link it to the primary */ nh = un_new->un_nhop; CHT_SLIST_INSERT_HEAD(&ctl->un_head, unhop, un_new); un_new->un_nextchild = unhop->un_nextchild; unhop->un_nextchild = un_new; un_new = NULL; NL_LOG(LOG_DEBUG2, "linked cloned nexthop %p", nh); } UN_WUNLOCK(ctl); if (un_new != NULL) destroy_unhop(un_new); *perror = 0; nhop_ref_any(nh); return (nh); } static struct user_nhop * nl_find_base_unhop(struct unhop_ctl *ctl, uint32_t uidx) { struct user_nhop key= { .un_idx = uidx }; struct user_nhop *unhop = NULL; UN_TRACKER; UN_RLOCK(ctl); CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); UN_RUNLOCK(ctl); return (unhop); } #define MAX_STACK_NHOPS 4 static struct nhop_object * clone_unhop(const struct user_nhop *unhop, uint32_t fibnum, int family, int nh_flags) { #ifdef ROUTE_MPATH const struct weightened_nhop *wn; struct weightened_nhop *wn_new, wn_base[MAX_STACK_NHOPS]; uint32_t num_nhops; #endif struct nhop_object *nh = NULL; int error; if (unhop->un_nhop_src != NULL) { IF_DEBUG_LEVEL(LOG_DEBUG2) { char nhbuf[NHOP_PRINT_BUFSIZE]; nhop_print_buf_any(unhop->un_nhop_src, nhbuf, sizeof(nhbuf)); FIB_NH_LOG(LOG_DEBUG2, unhop->un_nhop_src, "cloning nhop %s -> %u.%u flags 0x%X", nhbuf, fibnum, family, nh_flags); } struct nhop_object *nh; nh = nhop_alloc(fibnum, AF_UNSPEC); if (nh == NULL) return (NULL); nhop_copy(nh, unhop->un_nhop_src); /* Check that nexthop gateway is compatible with the new family */ if (!nhop_set_upper_family(nh, family)) { nhop_free(nh); return (NULL); } nhop_set_uidx(nh, unhop->un_idx); nhop_set_pxtype_flag(nh, nh_flags); return (nhop_get_nhop(nh, &error)); } #ifdef ROUTE_MPATH wn = unhop->un_nhgrp_src; num_nhops = unhop->un_nhgrp_count; if (num_nhops > MAX_STACK_NHOPS) { wn_new = malloc(num_nhops * sizeof(struct weightened_nhop), M_TEMP, M_NOWAIT); if (wn_new == NULL) return (NULL); } else wn_new = wn_base; for (int i = 0; i < num_nhops; i++) { uint32_t uidx = nhop_get_uidx(wn[i].nh); MPASS(uidx != 0); wn_new[i].nh = nl_find_nhop(fibnum, family, uidx, nh_flags, &error); if (error != 0) break; wn_new[i].weight = wn[i].weight; } if (error == 0) { struct rib_head *rh = nhop_get_rh(wn_new[0].nh); struct nhgrp_object *nhg; error = nhgrp_get_group(rh, wn_new, num_nhops, unhop->un_idx, &nhg); nh = (struct nhop_object *)nhg; } if (wn_new != wn_base) free(wn_new, M_TEMP); #endif return (nh); } static void destroy_unhop(struct user_nhop *unhop) { if (unhop->un_nhop != NULL) nhop_free_any(unhop->un_nhop); if (unhop->un_nhop_src != NULL) nhop_free_any(unhop->un_nhop_src); free(unhop, M_NETLINK); } static void destroy_unhop_epoch(epoch_context_t ctx) { struct user_nhop *unhop; unhop = __containerof(ctx, struct user_nhop, un_epoch_ctx); destroy_unhop(unhop); } static uint32_t find_spare_uidx(struct unhop_ctl *ctl) { struct user_nhop *unhop, key = {}; uint32_t uidx = 0; UN_TRACKER; UN_RLOCK(ctl); /* This should return spare uid with 75% of 65k used in ~99/100 cases */ for (int i = 0; i < 16; i++) { key.un_idx = (arc4random() % 65536) + 65536 * 4; CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); if (unhop == NULL) { uidx = key.un_idx; break; } } UN_RUNLOCK(ctl); return (uidx); } /* * Actual netlink code */ struct netlink_walkargs { struct nl_writer *nw; struct nlmsghdr hdr; struct nlpcb *so; int family; int error; int count; int dumped; }; #define ENOMEM_IF_NULL(_v) if ((_v) == NULL) goto enomem static bool dump_nhgrp(const struct user_nhop *unhop, struct nlmsghdr *hdr, struct nl_writer *nw) { if (!nlmsg_reply(nw, hdr, sizeof(struct nhmsg))) goto enomem; struct nhmsg *nhm = nlmsg_reserve_object(nw, struct nhmsg); nhm->nh_family = AF_UNSPEC; nhm->nh_scope = 0; nhm->nh_protocol = unhop->un_protocol; nhm->nh_flags = 0; nlattr_add_u32(nw, NHA_ID, unhop->un_idx); nlattr_add_u16(nw, NHA_GROUP_TYPE, NEXTHOP_GRP_TYPE_MPATH); struct weightened_nhop *wn = unhop->un_nhgrp_src; uint32_t num_nhops = unhop->un_nhgrp_count; /* TODO: a better API? */ int nla_len = sizeof(struct nlattr); nla_len += NETLINK_ALIGN(num_nhops * sizeof(struct nexthop_grp)); struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr); if (nla == NULL) goto enomem; nla->nla_type = NHA_GROUP; nla->nla_len = nla_len; for (int i = 0; i < num_nhops; i++) { struct nexthop_grp *grp = &((struct nexthop_grp *)(nla + 1))[i]; grp->id = nhop_get_uidx(wn[i].nh); grp->weight = wn[i].weight; grp->resvd1 = 0; grp->resvd2 = 0; } if (nlmsg_end(nw)) return (true); enomem: NL_LOG(LOG_DEBUG, "error: unable to allocate attribute memory"); nlmsg_abort(nw); return (false); } static bool dump_nhop(const struct nhop_object *nh, uint32_t uidx, struct nlmsghdr *hdr, struct nl_writer *nw) { if (!nlmsg_reply(nw, hdr, sizeof(struct nhmsg))) goto enomem; struct nhmsg *nhm = nlmsg_reserve_object(nw, struct nhmsg); ENOMEM_IF_NULL(nhm); nhm->nh_family = nhop_get_neigh_family(nh); nhm->nh_scope = 0; // XXX: what's that? nhm->nh_protocol = nhop_get_origin(nh); nhm->nh_flags = 0; if (uidx != 0) nlattr_add_u32(nw, NHA_ID, uidx); if (nh->nh_flags & NHF_BLACKHOLE) { nlattr_add_flag(nw, NHA_BLACKHOLE); goto done; } nlattr_add_u32(nw, NHA_OIF, if_getindex(nh->nh_ifp)); switch (nh->gw_sa.sa_family) { #ifdef INET case AF_INET: nlattr_add(nw, NHA_GATEWAY, 4, &nh->gw4_sa.sin_addr); break; #endif #ifdef INET6 case AF_INET6: { struct in6_addr addr = nh->gw6_sa.sin6_addr; in6_clearscope(&addr); nlattr_add(nw, NHA_GATEWAY, 16, &addr); break; } #endif } int off = nlattr_add_nested(nw, NHA_FREEBSD); if (off != 0) { nlattr_add_u32(nw, NHAF_AIF, if_getindex(nh->nh_aifp)); if (uidx == 0) { nlattr_add_u32(nw, NHAF_KID, nhop_get_idx(nh)); nlattr_add_u32(nw, NHAF_FAMILY, nhop_get_upper_family(nh)); nlattr_add_u32(nw, NHAF_TABLE, nhop_get_fibnum(nh)); } nlattr_set_len(nw, off); } done: if (nlmsg_end(nw)) return (true); enomem: nlmsg_abort(nw); return (false); } static void dump_unhop(const struct user_nhop *unhop, struct nlmsghdr *hdr, struct nl_writer *nw) { if (unhop->un_nhop_src != NULL) dump_nhop(unhop->un_nhop_src, unhop->un_idx, hdr, nw); else dump_nhgrp(unhop, hdr, nw); } static int delete_unhop(struct unhop_ctl *ctl, struct nlmsghdr *hdr, uint32_t uidx) { struct user_nhop *unhop_ret, *unhop_base, *unhop_chain; struct nl_writer nw; struct user_nhop key = { .un_idx = uidx }; UN_WLOCK(ctl); CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop_base); if (unhop_base != NULL) { CHT_SLIST_REMOVE(&ctl->un_head, unhop, unhop_base, unhop_ret); IF_DEBUG_LEVEL(LOG_DEBUG2) { char nhbuf[NHOP_PRINT_BUFSIZE]; nhop_print_buf_any(unhop_base->un_nhop, nhbuf, sizeof(nhbuf)); FIB_NH_LOG(LOG_DEBUG3, unhop_base->un_nhop, "removed base nhop %u: %s", uidx, nhbuf); } /* Unlink all child nexhops as well, keeping the chain intact */ unhop_chain = unhop_base->un_nextchild; while (unhop_chain != NULL) { CHT_SLIST_REMOVE(&ctl->un_head, unhop, unhop_chain, unhop_ret); MPASS(unhop_chain == unhop_ret); IF_DEBUG_LEVEL(LOG_DEBUG3) { char nhbuf[NHOP_PRINT_BUFSIZE]; nhop_print_buf_any(unhop_chain->un_nhop, nhbuf, sizeof(nhbuf)); FIB_NH_LOG(LOG_DEBUG3, unhop_chain->un_nhop, "removed child nhop %u: %s", uidx, nhbuf); } unhop_chain = unhop_chain->un_nextchild; } } UN_WUNLOCK(ctl); if (unhop_base == NULL) { NL_LOG(LOG_DEBUG, "unable to find unhop %u", uidx); return (ENOENT); } /* Report nexthop deletion */ struct netlink_walkargs wa = { .hdr.nlmsg_pid = hdr->nlmsg_pid, .hdr.nlmsg_seq = hdr->nlmsg_seq, .hdr.nlmsg_flags = hdr->nlmsg_flags, .hdr.nlmsg_type = NL_RTM_DELNEXTHOP, }; if (!nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP, false)) { NL_LOG(LOG_DEBUG, "error allocating message writer"); return (ENOMEM); } dump_unhop(unhop_base, &wa.hdr, &nw); nlmsg_flush(&nw); while (unhop_base != NULL) { unhop_chain = unhop_base->un_nextchild; NET_EPOCH_CALL(destroy_unhop_epoch, &unhop_base->un_epoch_ctx); unhop_base = unhop_chain; } return (0); } static void consider_resize(struct unhop_ctl *ctl, uint32_t new_size) { void *new_ptr = NULL; size_t alloc_size; if (new_size == 0) return; if (new_size != 0) { alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_size); new_ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO); if (new_ptr == NULL) return; } NL_LOG(LOG_DEBUG, "resizing hash: %u -> %u", ctl->un_head.hash_size, new_size); UN_WLOCK(ctl); if (new_ptr != NULL) { CHT_SLIST_RESIZE(&ctl->un_head, unhop, new_ptr, new_size); } UN_WUNLOCK(ctl); if (new_ptr != NULL) free(new_ptr, M_NETLINK); } static bool __noinline vnet_init_unhops(void) { uint32_t num_buckets = 16; size_t alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets); struct unhop_ctl *ctl = malloc(sizeof(struct unhop_ctl), M_NETLINK, M_NOWAIT | M_ZERO); if (ctl == NULL) return (false); void *ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO); if (ptr == NULL) { free(ctl, M_NETLINK); return (false); } CHT_SLIST_INIT(&ctl->un_head, ptr, num_buckets); UN_LOCK_INIT(ctl); if (!atomic_cmpset_ptr((uintptr_t *)&V_un_ctl, (uintptr_t)NULL, (uintptr_t)ctl)) { free(ptr, M_NETLINK); free(ctl, M_NETLINK); } if (atomic_load_ptr(&V_un_ctl) == NULL) return (false); NL_LOG(LOG_NOTICE, "UNHOPS init done"); return (true); } static void vnet_destroy_unhops(const void *unused __unused) { struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); struct user_nhop *unhop, *tmp; if (ctl == NULL) return; V_un_ctl = NULL; /* Wait till all unhop users finish their reads */ NET_EPOCH_WAIT(); UN_WLOCK(ctl); CHT_SLIST_FOREACH_SAFE(&ctl->un_head, unhop, unhop, tmp) { destroy_unhop(unhop); } CHT_SLIST_FOREACH_SAFE_END; UN_WUNLOCK(ctl); free(ctl->un_head.ptr, M_NETLINK); free(ctl, M_NETLINK); } VNET_SYSUNINIT(vnet_destroy_unhops, SI_SUB_PROTO_IF, SI_ORDER_ANY, vnet_destroy_unhops, NULL); static int nlattr_get_nhg(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) { int error = 0; /* Verify attribute correctness */ struct nexthop_grp *grp = NLA_DATA(nla); int data_len = NLA_DATA_LEN(nla); int count = data_len / sizeof(*grp); if (count == 0 || (count * sizeof(*grp) != data_len)) { NL_LOG(LOG_DEBUG, "Invalid length for RTA_GROUP: %d", data_len); return (EINVAL); } *((struct nlattr **)target) = nla; return (error); } static void set_scope6(struct sockaddr *sa, if_t ifp) { #ifdef INET6 if (sa != NULL && sa->sa_family == AF_INET6 && ifp != NULL) { struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) in6_set_unicast_scopeid(&sa6->sin6_addr, if_getindex(ifp)); } #endif } struct nl_parsed_nhop { uint32_t nha_id; uint8_t nha_blackhole; uint8_t nha_groups; uint8_t nhaf_knhops; uint8_t nhaf_family; struct ifnet *nha_oif; struct sockaddr *nha_gw; struct nlattr *nha_group; uint8_t nh_family; uint8_t nh_protocol; uint32_t nhaf_table; uint32_t nhaf_kid; uint32_t nhaf_aif; }; #define _IN(_field) offsetof(struct nhmsg, _field) #define _OUT(_field) offsetof(struct nl_parsed_nhop, _field) static struct nlattr_parser nla_p_nh_fbsd[] = { { .type = NHAF_KNHOPS, .off = _OUT(nhaf_knhops), .cb = nlattr_get_flag }, { .type = NHAF_TABLE, .off = _OUT(nhaf_table), .cb = nlattr_get_uint32 }, { .type = NHAF_FAMILY, .off = _OUT(nhaf_family), .cb = nlattr_get_uint8 }, { .type = NHAF_KID, .off = _OUT(nhaf_kid), .cb = nlattr_get_uint32 }, { .type = NHAF_AIF, .off = _OUT(nhaf_aif), .cb = nlattr_get_uint32 }, }; NL_DECLARE_ATTR_PARSER(nh_fbsd_parser, nla_p_nh_fbsd); static const struct nlfield_parser nlf_p_nh[] = { { .off_in = _IN(nh_family), .off_out = _OUT(nh_family), .cb = nlf_get_u8 }, { .off_in = _IN(nh_protocol), .off_out = _OUT(nh_protocol), .cb = nlf_get_u8 }, }; static const struct nlattr_parser nla_p_nh[] = { { .type = NHA_ID, .off = _OUT(nha_id), .cb = nlattr_get_uint32 }, { .type = NHA_GROUP, .off = _OUT(nha_group), .cb = nlattr_get_nhg }, { .type = NHA_BLACKHOLE, .off = _OUT(nha_blackhole), .cb = nlattr_get_flag }, { .type = NHA_OIF, .off = _OUT(nha_oif), .cb = nlattr_get_ifp }, { .type = NHA_GATEWAY, .off = _OUT(nha_gw), .cb = nlattr_get_ip }, { .type = NHA_GROUPS, .off = _OUT(nha_groups), .cb = nlattr_get_flag }, { .type = NHA_FREEBSD, .arg = &nh_fbsd_parser, .cb = nlattr_get_nested }, }; #undef _IN #undef _OUT static bool post_p_nh(void *_attrs, struct nl_pstate *npt) { struct nl_parsed_nhop *attrs = (struct nl_parsed_nhop *)_attrs; set_scope6(attrs->nha_gw, attrs->nha_oif); return (true); } NL_DECLARE_PARSER_EXT(nhmsg_parser, struct nhmsg, NULL, nlf_p_nh, nla_p_nh, post_p_nh); static bool eligible_nhg(const struct nhop_object *nh) { return (nh->nh_flags & NHF_GATEWAY); } static int newnhg(struct unhop_ctl *ctl, struct nl_parsed_nhop *attrs, struct user_nhop *unhop) { struct nexthop_grp *grp = NLA_DATA(attrs->nha_group); int count = NLA_DATA_LEN(attrs->nha_group) / sizeof(*grp); struct weightened_nhop *wn; wn = malloc(sizeof(*wn) * count, M_NETLINK, M_NOWAIT | M_ZERO); if (wn == NULL) return (ENOMEM); for (int i = 0; i < count; i++) { struct user_nhop *unhop; unhop = nl_find_base_unhop(ctl, grp[i].id); if (unhop == NULL) { NL_LOG(LOG_DEBUG, "unable to find uidx %u", grp[i].id); free(wn, M_NETLINK); return (ESRCH); } else if (unhop->un_nhop_src == NULL) { NL_LOG(LOG_DEBUG, "uidx %u is a group, nested group unsupported", grp[i].id); free(wn, M_NETLINK); return (ENOTSUP); } else if (!eligible_nhg(unhop->un_nhop_src)) { NL_LOG(LOG_DEBUG, "uidx %u nhop is not mpath-eligible", grp[i].id); free(wn, M_NETLINK); return (ENOTSUP); } /* * TODO: consider more rigid eligibility checks: * restrict nexthops with the same gateway */ wn[i].nh = unhop->un_nhop_src; wn[i].weight = grp[i].weight; } unhop->un_nhgrp_src = wn; unhop->un_nhgrp_count = count; return (0); } /* * Sets nexthop @nh gateway specified by @gw. * If gateway is IPv6 link-local, alters @gw to include scopeid equal to * @ifp ifindex. * Returns 0 on success or errno. */ int nl_set_nexthop_gw(struct nhop_object *nh, struct sockaddr *gw, if_t ifp, struct nl_pstate *npt) { #ifdef INET6 if (gw->sa_family == AF_INET6) { struct sockaddr_in6 *gw6 = (struct sockaddr_in6 *)gw; if (IN6_IS_ADDR_LINKLOCAL(&gw6->sin6_addr)) { if (ifp == NULL) { NLMSG_REPORT_ERR_MSG(npt, "interface not set"); return (EINVAL); } in6_set_unicast_scopeid(&gw6->sin6_addr, if_getindex(ifp)); } } #endif nhop_set_gw(nh, gw, true); return (0); } static int newnhop(struct nl_parsed_nhop *attrs, struct user_nhop *unhop, struct nl_pstate *npt) { struct ifaddr *ifa = NULL; struct nhop_object *nh; int error; if (!attrs->nha_blackhole) { if (attrs->nha_gw == NULL) { NLMSG_REPORT_ERR_MSG(npt, "missing NHA_GATEWAY"); return (EINVAL); } if (attrs->nha_oif == NULL) { NLMSG_REPORT_ERR_MSG(npt, "missing NHA_OIF"); return (EINVAL); } if (ifa == NULL) ifa = ifaof_ifpforaddr(attrs->nha_gw, attrs->nha_oif); if (ifa == NULL) { NLMSG_REPORT_ERR_MSG(npt, "Unable to determine default source IP"); return (EINVAL); } } int family = attrs->nha_gw != NULL ? attrs->nha_gw->sa_family : attrs->nh_family; nh = nhop_alloc(RT_DEFAULT_FIB, family); if (nh == NULL) { NL_LOG(LOG_DEBUG, "Unable to allocate nexthop"); return (ENOMEM); } nhop_set_uidx(nh, attrs->nha_id); nhop_set_origin(nh, attrs->nh_protocol); if (attrs->nha_blackhole) nhop_set_blackhole(nh, NHF_BLACKHOLE); else { error = nl_set_nexthop_gw(nh, attrs->nha_gw, attrs->nha_oif, npt); if (error != 0) { nhop_free(nh); return (error); } nhop_set_transmit_ifp(nh, attrs->nha_oif); nhop_set_src(nh, ifa); } error = nhop_get_unlinked(nh); if (error != 0) { NL_LOG(LOG_DEBUG, "unable to finalize nexthop"); return (error); } IF_DEBUG_LEVEL(LOG_DEBUG2) { char nhbuf[NHOP_PRINT_BUFSIZE]; nhop_print_buf(nh, nhbuf, sizeof(nhbuf)); NL_LOG(LOG_DEBUG2, "Adding unhop %u: %s", attrs->nha_id, nhbuf); } unhop->un_nhop_src = nh; return (0); } static int rtnl_handle_newnhop(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct nl_writer nw; struct user_nhop *unhop; int error; if ((__predict_false(V_un_ctl == NULL)) && (!vnet_init_unhops())) return (ENOMEM); struct unhop_ctl *ctl = V_un_ctl; struct nl_parsed_nhop attrs = {}; error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs); if (error != 0) return (error); /* * Get valid nha_id. Treat nha_id == 0 (auto-assignment) as a second-class * citizen. */ if (attrs.nha_id == 0) { attrs.nha_id = find_spare_uidx(ctl); if (attrs.nha_id == 0) { NL_LOG(LOG_DEBUG, "Unable to get spare uidx"); return (ENOSPC); } } NL_LOG(LOG_DEBUG, "IFINDEX %d", attrs.nha_oif ? if_getindex(attrs.nha_oif) : 0); unhop = malloc(sizeof(struct user_nhop), M_NETLINK, M_NOWAIT | M_ZERO); if (unhop == NULL) { NL_LOG(LOG_DEBUG, "Unable to allocate user_nhop"); return (ENOMEM); } unhop->un_idx = attrs.nha_id; unhop->un_protocol = attrs.nh_protocol; if (attrs.nha_group) error = newnhg(ctl, &attrs, unhop); else error = newnhop(&attrs, unhop, npt); if (error != 0) { free(unhop, M_NETLINK); return (error); } UN_WLOCK(ctl); /* Check if uidx already exists */ struct user_nhop *tmp = NULL; CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, unhop, tmp); if (tmp != NULL) { UN_WUNLOCK(ctl); NL_LOG(LOG_DEBUG, "nhop idx %u already exists", attrs.nha_id); destroy_unhop(unhop); return (EEXIST); } CHT_SLIST_INSERT_HEAD(&ctl->un_head, unhop, unhop); uint32_t num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->un_head); UN_WUNLOCK(ctl); /* Report addition of the next nexhop */ struct netlink_walkargs wa = { .hdr.nlmsg_pid = hdr->nlmsg_pid, .hdr.nlmsg_seq = hdr->nlmsg_seq, .hdr.nlmsg_flags = hdr->nlmsg_flags, .hdr.nlmsg_type = NL_RTM_NEWNEXTHOP, }; if (!nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP, false)) { NL_LOG(LOG_DEBUG, "error allocating message writer"); return (ENOMEM); } dump_unhop(unhop, &wa.hdr, &nw); nlmsg_flush(&nw); consider_resize(ctl, num_buckets_new); return (0); } static int rtnl_handle_delnhop(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); int error; if (__predict_false(ctl == NULL)) return (ESRCH); struct nl_parsed_nhop attrs = {}; error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs); if (error != 0) return (error); if (attrs.nha_id == 0) { NL_LOG(LOG_DEBUG, "NHA_ID not set"); return (EINVAL); } error = delete_unhop(ctl, hdr, attrs.nha_id); return (error); } static bool match_unhop(const struct nl_parsed_nhop *attrs, struct user_nhop *unhop) { if (attrs->nha_id != 0 && unhop->un_idx != attrs->nha_id) return (false); if (attrs->nha_groups != 0 && unhop->un_nhgrp_src == NULL) return (false); if (attrs->nha_oif != NULL && (unhop->un_nhop_src == NULL || unhop->un_nhop_src->nh_ifp != attrs->nha_oif)) return (false); return (true); } static int rtnl_handle_getnhop(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct user_nhop *unhop; UN_TRACKER; int error; struct nl_parsed_nhop attrs = {}; error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs); if (error != 0) return (error); struct netlink_walkargs wa = { .nw = npt->nw, .hdr.nlmsg_pid = hdr->nlmsg_pid, .hdr.nlmsg_seq = hdr->nlmsg_seq, .hdr.nlmsg_flags = hdr->nlmsg_flags, .hdr.nlmsg_type = NL_RTM_NEWNEXTHOP, }; if (attrs.nha_id != 0) { struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); struct user_nhop key = { .un_idx = attrs.nha_id }; if (__predict_false(ctl == NULL)) return (ESRCH); NL_LOG(LOG_DEBUG2, "searching for uidx %u", attrs.nha_id); UN_RLOCK(ctl); CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); UN_RUNLOCK(ctl); if (unhop == NULL) return (ESRCH); dump_unhop(unhop, &wa.hdr, wa.nw); return (0); } else if (attrs.nhaf_kid != 0) { struct nhop_iter iter = { .fibnum = attrs.nhaf_table, .family = attrs.nhaf_family, }; int error = ESRCH; NL_LOG(LOG_DEBUG2, "START table %u family %d", attrs.nhaf_table, attrs.nhaf_family); for (struct nhop_object *nh = nhops_iter_start(&iter); nh; nh = nhops_iter_next(&iter)) { NL_LOG(LOG_DEBUG3, "get %u", nhop_get_idx(nh)); if (nhop_get_idx(nh) == attrs.nhaf_kid) { dump_nhop(nh, 0, &wa.hdr, wa.nw); error = 0; break; } } nhops_iter_stop(&iter); return (error); } else if (attrs.nhaf_knhops) { struct nhop_iter iter = { .fibnum = attrs.nhaf_table, .family = attrs.nhaf_family, }; NL_LOG(LOG_DEBUG2, "DUMP table %u family %d", attrs.nhaf_table, attrs.nhaf_family); wa.hdr.nlmsg_flags |= NLM_F_MULTI; for (struct nhop_object *nh = nhops_iter_start(&iter); nh; nh = nhops_iter_next(&iter)) { dump_nhop(nh, 0, &wa.hdr, wa.nw); } nhops_iter_stop(&iter); } else { struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); if (__predict_false(ctl == NULL)) return (ESRCH); NL_LOG(LOG_DEBUG2, "DUMP unhops"); UN_RLOCK(ctl); wa.hdr.nlmsg_flags |= NLM_F_MULTI; CHT_SLIST_FOREACH(&ctl->un_head, unhop, unhop) { if (UNHOP_IS_MASTER(unhop) && match_unhop(&attrs, unhop)) dump_unhop(unhop, &wa.hdr, wa.nw); } CHT_SLIST_FOREACH_END; UN_RUNLOCK(ctl); } if (wa.error == 0) { if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) return (ENOMEM); } return (0); } static const struct rtnl_cmd_handler cmd_handlers[] = { { .cmd = NL_RTM_NEWNEXTHOP, .name = "RTM_NEWNEXTHOP", .cb = &rtnl_handle_newnhop, .priv = PRIV_NET_ROUTE, }, { .cmd = NL_RTM_DELNEXTHOP, .name = "RTM_DELNEXTHOP", .cb = &rtnl_handle_delnhop, .priv = PRIV_NET_ROUTE, }, { .cmd = NL_RTM_GETNEXTHOP, .name = "RTM_GETNEXTHOP", .cb = &rtnl_handle_getnhop, } }; static const struct nlhdr_parser *all_parsers[] = { &nhmsg_parser, &nh_fbsd_parser }; void rtnl_nexthops_init(void) { NL_VERIFY_PARSERS(all_parsers); rtnl_register_messages(cmd_handlers, nitems(cmd_handlers)); }