1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_route.h" 33 34 #include <sys/param.h> 35 #include <sys/jail.h> 36 #include <sys/systm.h> 37 #include <sys/malloc.h> 38 #include <sys/mbuf.h> 39 #include <sys/socket.h> 40 #include <sys/sysctl.h> 41 #include <sys/syslog.h> 42 #include <sys/sysproto.h> 43 #include <sys/proc.h> 44 #include <sys/domain.h> 45 #include <sys/kernel.h> 46 #include <sys/lock.h> 47 #include <sys/rmlock.h> 48 49 #include <net/if.h> 50 #include <net/if_var.h> 51 #include <net/if_dl.h> 52 #include <net/route.h> 53 #include <net/route/route_ctl.h> 54 #include <net/route/route_var.h> 55 #include <net/route/nhop_utils.h> 56 #include <net/route/nhop.h> 57 #include <net/route/nhop_var.h> 58 #ifdef INET 59 #include <netinet/in_fib.h> 60 #endif 61 #ifdef INET6 62 #include <netinet6/in6_fib.h> 63 #endif 64 #include <net/vnet.h> 65 66 /* 67 * RIB helper functions. 68 */ 69 70 /* 71 * Calls @wa_f with @arg for each entry in the table specified by 72 * @af and @fibnum. 73 * 74 * Table is traversed under read lock. 75 */ 76 void 77 rib_walk(int af, u_int fibnum, rt_walktree_f_t *wa_f, void *arg) 78 { 79 RIB_RLOCK_TRACKER; 80 struct rib_head *rnh; 81 82 if ((rnh = rt_tables_get_rnh(fibnum, af)) == NULL) 83 return; 84 85 RIB_RLOCK(rnh); 86 rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f, arg); 87 RIB_RUNLOCK(rnh); 88 } 89 90 /* 91 * Wrapper for the control plane functions for performing af-agnostic 92 * lookups. 93 * @fibnum: fib to perform the lookup. 94 * @dst: sockaddr with family and addr filled in. IPv6 addresses needs to be in 95 * deembedded from. 96 * @flags: fib(9) flags. 97 * @flowid: flow id for path selection in multipath use case. 98 * 99 * Returns nhop_object or NULL. 100 * 101 * Requires NET_EPOCH. 102 * 103 */ 104 struct nhop_object * 105 rib_lookup(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags, 106 uint32_t flowid) 107 { 108 struct nhop_object *nh; 109 110 nh = NULL; 111 112 switch (dst->sa_family) { 113 #ifdef INET 114 case AF_INET: 115 { 116 const struct sockaddr_in *a = (const struct sockaddr_in *)dst; 117 nh = fib4_lookup(fibnum, a->sin_addr, 0, flags, flowid); 118 break; 119 } 120 #endif 121 #ifdef INET6 122 case AF_INET6: 123 { 124 const struct sockaddr_in6 *a = (const struct sockaddr_in6*)dst; 125 nh = fib6_lookup(fibnum, &a->sin6_addr, a->sin6_scope_id, 126 flags, flowid); 127 break; 128 } 129 #endif 130 } 131 132 return (nh); 133 } 134 135 #ifdef ROUTE_MPATH 136 static void 137 decompose_change_notification(struct rib_cmd_info *rc, route_notification_t *cb, 138 void *cbdata) 139 { 140 uint32_t num_old, num_new; 141 uint32_t nh_idx_old, nh_idx_new; 142 struct weightened_nhop *wn_old, *wn_new; 143 struct weightened_nhop tmp = { NULL, 0 }; 144 uint32_t idx_old = 0, idx_new = 0; 145 146 struct rib_cmd_info rc_del = { .rc_cmd = RTM_DELETE, .rc_rt = rc->rc_rt }; 147 struct rib_cmd_info rc_add = { .rc_cmd = RTM_ADD, .rc_rt = rc->rc_rt }; 148 149 if (NH_IS_NHGRP(rc->rc_nh_old)) { 150 wn_old = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_old, &num_old); 151 } else { 152 tmp.nh = rc->rc_nh_old; 153 tmp.weight = rc->rc_nh_weight; 154 wn_old = &tmp; 155 num_old = 1; 156 } 157 if (NH_IS_NHGRP(rc->rc_nh_new)) { 158 wn_new = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_new, &num_new); 159 } else { 160 tmp.nh = rc->rc_nh_new; 161 tmp.weight = rc->rc_nh_weight; 162 wn_new = &tmp; 163 num_new = 1; 164 } 165 166 /* Use the fact that each @wn array is sorted */ 167 /* 168 * Want to convert into set of add and delete operations 169 * [1] -> [1, 2] = A{2} 170 * [2] -> [1, 2] = A{1} 171 * [1, 2, 4]->[1, 3, 4] = A{2}, D{3} 172 * [1, 2, 4]->[1, 4] = D{2} 173 * [1, 2, 4] -> [3, 4] = D{1}, C{2,3} OR C{1,3}, D{2} OR D{1},D{2},A{3} 174 * [1, 2] -> [3, 4] = 175 * 176 */ 177 idx_old = 0; 178 while ((idx_old < num_old) && (idx_new < num_new)) { 179 nh_idx_old = wn_old[idx_old].nh->nh_priv->nh_idx; 180 nh_idx_new = wn_new[idx_new].nh->nh_priv->nh_idx; 181 182 if (nh_idx_old == nh_idx_new) { 183 if (wn_old[idx_old].weight != wn_new[idx_new].weight) { 184 /* Update weight by providing del/add notifications */ 185 rc_del.rc_nh_old = wn_old[idx_old].nh; 186 rc_del.rc_nh_weight = wn_old[idx_old].weight; 187 cb(&rc_del, cbdata); 188 189 rc_add.rc_nh_new = wn_new[idx_new].nh; 190 rc_add.rc_nh_weight = wn_new[idx_new].weight; 191 cb(&rc_add, cbdata); 192 } 193 idx_old++; 194 idx_new++; 195 } else if (nh_idx_old < nh_idx_new) { 196 /* 197 * [1, ~2~, 4], [1, ~3~, 4] 198 * [1, ~2~, 5], [1, ~3~, 4] 199 * [1, ~2~], [1, ~3~, 4] 200 */ 201 if ((idx_old + 1 >= num_old) || 202 (wn_old[idx_old + 1].nh->nh_priv->nh_idx > nh_idx_new)) { 203 /* Add new unless the next old item is still <= new */ 204 rc_add.rc_nh_new = wn_new[idx_new].nh; 205 rc_add.rc_nh_weight = wn_new[idx_new].weight; 206 cb(&rc_add, cbdata); 207 idx_new++; 208 } 209 /* In any case, delete current old */ 210 rc_del.rc_nh_old = wn_old[idx_old].nh; 211 rc_del.rc_nh_weight = wn_old[idx_old].weight; 212 cb(&rc_del, cbdata); 213 idx_old++; 214 } else { 215 /* 216 * nh_idx_old > nh_idx_new 217 * 218 * [1, ~3~, 4], [1, ~2~, 4] 219 * [1, ~3~, 5], [1, ~2~, 4] 220 * [1, ~3~, 4], [1, ~2~] 221 */ 222 if ((idx_new + 1 >= num_new) || 223 (wn_new[idx_new + 1].nh->nh_priv->nh_idx > nh_idx_old)) { 224 /* No next item or next item is > current one */ 225 rc_add.rc_nh_new = wn_new[idx_new].nh; 226 rc_add.rc_nh_weight = wn_new[idx_new].weight; 227 cb(&rc_add, cbdata); 228 idx_new++; 229 } 230 /* In any case, delete current old */ 231 rc_del.rc_nh_old = wn_old[idx_old].nh; 232 rc_del.rc_nh_weight = wn_old[idx_old].weight; 233 cb(&rc_del, cbdata); 234 idx_old++; 235 } 236 } 237 238 while (idx_old < num_old) { 239 rc_del.rc_nh_old = wn_old[idx_old].nh; 240 rc_del.rc_nh_weight = wn_old[idx_old].weight; 241 cb(&rc_del, cbdata); 242 idx_old++; 243 } 244 245 while (idx_new < num_new) { 246 rc_add.rc_nh_new = wn_new[idx_new].nh; 247 rc_add.rc_nh_weight = wn_new[idx_new].weight; 248 cb(&rc_add, cbdata); 249 idx_new++; 250 } 251 } 252 253 /* 254 * Decompose multipath cmd info @rc into a list of add/del/change 255 * single-path operations, calling @cb callback for each operation. 256 * Assumes at least one of the nexthops in @rc is multipath. 257 */ 258 void 259 rib_decompose_notification(struct rib_cmd_info *rc, route_notification_t *cb, 260 void *cbdata) 261 { 262 struct weightened_nhop *wn; 263 uint32_t num_nhops; 264 struct rib_cmd_info rc_new; 265 266 rc_new = *rc; 267 DPRINTF("cb=%p cmd=%d nh_old=%p nh_new=%p", 268 cb, rc->cmd, rc->nh_old, rc->nh_new); 269 switch (rc->rc_cmd) { 270 case RTM_ADD: 271 if (!NH_IS_NHGRP(rc->rc_nh_new)) 272 return; 273 wn = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_new, &num_nhops); 274 for (uint32_t i = 0; i < num_nhops; i++) { 275 rc_new.rc_nh_new = wn[i].nh; 276 rc_new.rc_nh_weight = wn[i].weight; 277 cb(&rc_new, cbdata); 278 } 279 break; 280 case RTM_DELETE: 281 if (!NH_IS_NHGRP(rc->rc_nh_old)) 282 return; 283 wn = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_old, &num_nhops); 284 for (uint32_t i = 0; i < num_nhops; i++) { 285 rc_new.rc_nh_old = wn[i].nh; 286 rc_new.rc_nh_weight = wn[i].weight; 287 cb(&rc_new, cbdata); 288 } 289 break; 290 case RTM_CHANGE: 291 if (!NH_IS_NHGRP(rc->rc_nh_old) && !NH_IS_NHGRP(rc->rc_nh_new)) 292 return; 293 decompose_change_notification(rc, cb, cbdata); 294 break; 295 } 296 } 297 #endif 298