1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet6.h" 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/lock.h> 35 #include <sys/rmlock.h> 36 #include <sys/malloc.h> 37 #include <sys/module.h> 38 #include <sys/kernel.h> 39 #include <sys/socket.h> 40 #include <sys/sysctl.h> 41 #include <sys/syslog.h> 42 #include <net/vnet.h> 43 44 #include <net/if.h> 45 #include <net/if_var.h> 46 47 #include <netinet/in.h> 48 #include <netinet/ip.h> 49 #include <netinet/ip6.h> 50 #include <netinet6/ip6_var.h> 51 #include <netinet6/in6_fib.h> 52 53 #include <net/route.h> 54 #include <net/route/nhop.h> 55 #include <net/route/route_ctl.h> 56 #include <net/route/fib_algo.h> 57 #define RTDEBUG 58 59 #include "rte_lpm6.h" 60 61 #define LPM6_MIN_TBL8 8 /* 2 pages of memory */ 62 #define LPM6_MAX_TBL8 65536 * 16 /* 256M */ 63 64 struct fib_algo_calldata { 65 void *lookup; 66 void *arg; 67 }; 68 69 struct dpdk_lpm6_data { 70 struct rte_lpm6 *lpm6; 71 uint64_t routes_added; 72 uint64_t routes_failed; 73 uint32_t number_tbl8s; 74 uint32_t fibnum; 75 uint8_t hit_tables; 76 struct fib_data *fd; 77 }; 78 79 static struct nhop_object * 80 lookup_ptr_ll(const struct rte_lpm6 *lpm6, const struct in6_addr *dst6, 81 uint32_t scopeid) 82 { 83 const struct rte_lpm6_external *rte_ext; 84 85 rte_ext = (const struct rte_lpm6_external *)lpm6; 86 87 return (fib6_radix_lookup_nh(rte_ext->fibnum, dst6, scopeid)); 88 } 89 90 /* 91 * Main datapath routing 92 */ 93 static struct nhop_object * 94 lookup_ptr(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid) 95 { 96 const struct rte_lpm6 *lpm6; 97 const struct rte_lpm6_external *rte_ext; 98 const struct in6_addr *addr6; 99 uint32_t nhidx = 0; 100 int ret; 101 102 lpm6 = (const struct rte_lpm6 *)algo_data; 103 addr6 = key.addr6; 104 rte_ext = (const struct rte_lpm6_external *)lpm6; 105 106 if (!IN6_IS_SCOPE_LINKLOCAL(addr6)) { 107 ret = rte_lpm6_lookup(lpm6, (const uint8_t *)addr6, &nhidx); 108 if (ret == 0) { 109 /* Success! */ 110 return (rte_ext->nh_idx[nhidx]); 111 } else { 112 /* Not found. Check default route */ 113 if (rte_ext->default_idx > 0) 114 return (rte_ext->nh_idx[rte_ext->default_idx]); 115 else 116 return (NULL); 117 } 118 } else { 119 /* LL */ 120 return (lookup_ptr_ll(lpm6, addr6, scopeid)); 121 } 122 } 123 124 static uint8_t 125 rte6_get_pref(const struct rib_rtable_info *rinfo) 126 { 127 128 if (rinfo->num_prefixes < 10) 129 return (1); 130 else if (rinfo->num_prefixes < 1000) 131 return (rinfo->num_prefixes / 10); 132 else if (rinfo->num_prefixes < 100000) 133 return (100 + rinfo->num_prefixes / 667); 134 else 135 return (250); 136 } 137 138 static enum flm_op_result 139 handle_default_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc) 140 { 141 struct rte_lpm6_external *rte_ext; 142 rte_ext = (struct rte_lpm6_external *)dd->lpm6; 143 144 if (rc->rc_cmd != RTM_DELETE) { 145 /* Reference new */ 146 uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new); 147 148 if (nhidx == 0) 149 return (FLM_REBUILD); 150 rte_ext->default_idx = nhidx; 151 } else { 152 /* No default route */ 153 rte_ext->default_idx = 0; 154 } 155 156 return (FLM_SUCCESS); 157 } 158 159 static enum flm_op_result 160 handle_ll_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc, 161 const struct in6_addr addr6, int plen, uint32_t scopeid) 162 { 163 164 return (FLM_SUCCESS); 165 } 166 167 static struct rte_lpm6_rule * 168 pack_parent_rule(struct dpdk_lpm6_data *dd, const struct in6_addr *addr6, int plen, 169 int *pplen, uint32_t *pnhop_idx, char *buffer) 170 { 171 struct rte_lpm6_rule *lsp_rule = NULL; 172 struct rtentry *rt; 173 174 *pnhop_idx = 0; 175 *pplen = 0; 176 177 rt = rt_get_inet6_parent(dd->fibnum, addr6, plen); 178 /* plen = 0 means default route and it's out of scope */ 179 if (rt != NULL) { 180 uint32_t nhop_idx, scopeid; 181 struct in6_addr new_addr6; 182 rt_get_inet6_prefix_plen(rt, &new_addr6, &plen, &scopeid); 183 if (plen > 0) { 184 nhop_idx = fib_get_nhop_idx(dd->fd, rt_get_raw_nhop(rt)); 185 lsp_rule = fill_rule6(buffer, (uint8_t *)&new_addr6, plen, nhop_idx); 186 *pnhop_idx = nhop_idx; 187 *pplen = plen; 188 } 189 } 190 191 return (lsp_rule); 192 } 193 194 static enum flm_op_result 195 handle_gu_change(struct dpdk_lpm6_data *dd, const struct rib_cmd_info *rc, 196 const struct in6_addr *addr6, int plen) 197 { 198 int ret; 199 char abuf[INET6_ADDRSTRLEN]; 200 inet_ntop(AF_INET6, addr6, abuf, sizeof(abuf)); 201 202 /* So we get sin6, plen and nhidx */ 203 if (rc->rc_cmd != RTM_DELETE) { 204 /* 205 * Addition or change. Save nhop in the internal table 206 * and get index. 207 */ 208 uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new); 209 if (nhidx == 0) { 210 FIB_PRINTF(LOG_INFO, dd->fd, "nhop limit reached, need rebuild"); 211 return (FLM_REBUILD); 212 } 213 214 ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)addr6, 215 plen, nhidx, (rc->rc_cmd == RTM_ADD) ? 1 : 0); 216 FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u -> %u ret: %d", 217 (rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE", 218 abuf, plen, 219 rc->rc_nh_old != NULL ? fib_get_nhop_idx(dd->fd, rc->rc_nh_old) : 0, 220 nhidx, ret); 221 } else { 222 /* 223 * Need to lookup parent. Assume deletion happened already 224 */ 225 char buffer[RTE_LPM6_RULE_SIZE]; 226 struct rte_lpm6_rule *lsp_rule = NULL; 227 int parent_plen; 228 uint32_t parent_nhop_idx; 229 lsp_rule = pack_parent_rule(dd, addr6, plen, &parent_plen, 230 &parent_nhop_idx, buffer); 231 232 ret = rte_lpm6_delete(dd->lpm6, (const uint8_t *)addr6, plen, lsp_rule); 233 FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d -> /%d nhop %u -> %u ret: %d", 234 "DEL", abuf, plen, parent_plen, fib_get_nhop_idx(dd->fd, rc->rc_nh_old), 235 parent_nhop_idx, ret); 236 } 237 238 if (ret != 0) { 239 FIB_PRINTF(LOG_INFO, dd->fd, "error: %d", ret); 240 if (ret == -ENOSPC) 241 return (FLM_REBUILD); 242 return (FLM_ERROR); 243 } 244 return (FLM_SUCCESS); 245 } 246 247 static enum flm_op_result 248 handle_any_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc) 249 { 250 enum flm_op_result ret; 251 struct in6_addr addr6; 252 uint32_t scopeid; 253 int plen; 254 255 rt_get_inet6_prefix_plen(rc->rc_rt, &addr6, &plen, &scopeid); 256 257 if (IN6_IS_SCOPE_LINKLOCAL(&addr6)) 258 ret = handle_ll_change(dd, rc, addr6, plen, scopeid); 259 else if (plen == 0) 260 ret = handle_default_change(dd, rc); 261 else 262 ret = handle_gu_change(dd, rc, &addr6, plen); 263 264 if (ret != 0) 265 FIB_PRINTF(LOG_INFO, dd->fd, "error handling route"); 266 return (ret); 267 } 268 269 static enum flm_op_result 270 handle_rtable_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc, 271 void *_data) 272 { 273 struct dpdk_lpm6_data *dd; 274 275 dd = (struct dpdk_lpm6_data *)_data; 276 277 return (handle_any_change(dd, rc)); 278 } 279 280 static void 281 destroy_dd(struct dpdk_lpm6_data *dd) 282 { 283 284 FIB_PRINTF(LOG_INFO, dd->fd, "destroy dd %p", dd); 285 if (dd->lpm6 != NULL) 286 rte_lpm6_free(dd->lpm6); 287 free(dd, M_TEMP); 288 } 289 290 static void 291 destroy_table(void *_data) 292 { 293 294 destroy_dd((struct dpdk_lpm6_data *)_data); 295 } 296 297 static enum flm_op_result 298 add_route_cb(struct rtentry *rt, void *_data) 299 { 300 struct dpdk_lpm6_data *dd = (struct dpdk_lpm6_data *)_data; 301 struct in6_addr addr6; 302 struct nhop_object *nh; 303 uint32_t scopeid; 304 int plen; 305 int ret; 306 307 rt_get_inet6_prefix_plen(rt, &addr6, &plen, &scopeid); 308 nh = rt_get_raw_nhop(rt); 309 310 if (IN6_IS_SCOPE_LINKLOCAL(&addr6)) { 311 312 /* 313 * We don't operate on LL directly, however 314 * reference them to maintain guarantee on 315 * ability to refcount nhops in epoch. 316 */ 317 fib_get_nhop_idx(dd->fd, nh); 318 return (FLM_SUCCESS); 319 } 320 321 char abuf[INET6_ADDRSTRLEN]; 322 inet_ntop(AF_INET6, &addr6, abuf, sizeof(abuf)); 323 FIB_PRINTF(LOG_DEBUG, dd->fd, "Operating on %s/%d", abuf, plen); 324 325 if (plen == 0) { 326 struct rib_cmd_info rc = { 327 .rc_cmd = RTM_ADD, 328 .rc_nh_new = nh, 329 }; 330 331 FIB_PRINTF(LOG_DEBUG, dd->fd, "Adding default route"); 332 return (handle_default_change(dd, &rc)); 333 } 334 335 uint32_t nhidx = fib_get_nhop_idx(dd->fd, nh); 336 if (nhidx == 0) { 337 FIB_PRINTF(LOG_INFO, dd->fd, "unable to get nhop index"); 338 return (FLM_REBUILD); 339 } 340 ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)&addr6, plen, nhidx, 1); 341 FIB_PRINTF(LOG_DEBUG, dd->fd, "ADD %p %s/%d nh %u = %d", 342 dd->lpm6, abuf, plen, nhidx, ret); 343 344 if (ret != 0) { 345 FIB_PRINTF(LOG_INFO, dd->fd, "rte_lpm6_add() returned %d", ret); 346 if (ret == -ENOSPC) { 347 dd->hit_tables = 1; 348 return (FLM_REBUILD); 349 } 350 dd->routes_failed++; 351 return (FLM_ERROR); 352 } else 353 dd->routes_added++; 354 355 return (FLM_SUCCESS); 356 } 357 358 static enum flm_op_result 359 check_dump_success(void *_data, struct fib_dp *dp) 360 { 361 struct dpdk_lpm6_data *dd; 362 363 dd = (struct dpdk_lpm6_data *)_data; 364 365 FIB_PRINTF(LOG_INFO, dd->fd, "scan completed. added: %zu failed: %zu", 366 dd->routes_added, dd->routes_failed); 367 if (dd->hit_tables || dd->routes_failed > 0) 368 return (FLM_REBUILD); 369 370 FIB_PRINTF(LOG_INFO, dd->fd, 371 "DPDK lookup engine synced with IPv6 RIB id %u, %zu routes", 372 dd->fibnum, dd->routes_added); 373 374 dp->f = lookup_ptr; 375 dp->arg = dd->lpm6; 376 377 return (FLM_SUCCESS); 378 } 379 380 static void 381 estimate_scale(const struct dpdk_lpm6_data *dd_src, struct dpdk_lpm6_data *dd) 382 { 383 384 /* XXX: update at 75% capacity */ 385 if (dd_src->hit_tables) 386 dd->number_tbl8s = dd_src->number_tbl8s * 2; 387 else 388 dd->number_tbl8s = dd_src->number_tbl8s; 389 390 /* TODO: look into the appropriate RIB to adjust */ 391 } 392 393 static struct dpdk_lpm6_data * 394 build_table(struct dpdk_lpm6_data *dd_prev, struct fib_data *fd) 395 { 396 struct dpdk_lpm6_data *dd; 397 struct rte_lpm6 *lpm6; 398 399 dd = malloc(sizeof(struct dpdk_lpm6_data), M_TEMP, M_NOWAIT | M_ZERO); 400 if (dd == NULL) { 401 FIB_PRINTF(LOG_INFO, fd, "Unable to allocate base datastructure"); 402 return (NULL); 403 } 404 dd->fibnum = dd_prev->fibnum; 405 dd->fd = fd; 406 407 estimate_scale(dd_prev, dd); 408 409 struct rte_lpm6_config cfg = {.number_tbl8s = dd->number_tbl8s}; 410 lpm6 = rte_lpm6_create("test", 0, &cfg); 411 if (lpm6 == NULL) { 412 FIB_PRINTF(LOG_INFO, fd, "unable to create lpm6"); 413 free(dd, M_TEMP); 414 return (NULL); 415 } 416 dd->lpm6 = lpm6; 417 struct rte_lpm6_external *ext = (struct rte_lpm6_external *)lpm6; 418 ext->nh_idx = fib_get_nhop_array(dd->fd); 419 420 FIB_PRINTF(LOG_INFO, fd, "allocated %u tbl8s", dd->number_tbl8s); 421 422 return (dd); 423 } 424 425 static enum flm_op_result 426 init_table(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **data) 427 { 428 struct dpdk_lpm6_data *dd, dd_base; 429 430 if (_old_data == NULL) { 431 bzero(&dd_base, sizeof(struct dpdk_lpm6_data)); 432 dd_base.fibnum = fibnum; 433 /* TODO: get rib statistics */ 434 dd_base.number_tbl8s = LPM6_MIN_TBL8; 435 dd = &dd_base; 436 } else { 437 FIB_PRINTF(LOG_INFO, fd, "Starting with old data"); 438 dd = (struct dpdk_lpm6_data *)_old_data; 439 } 440 441 /* Guaranteed to be in epoch */ 442 dd = build_table(dd, fd); 443 if (dd == NULL) { 444 FIB_PRINTF(LOG_INFO, fd, "table creation failed"); 445 return (FLM_REBUILD); 446 } 447 448 *data = dd; 449 return (FLM_SUCCESS); 450 } 451 452 static struct fib_lookup_module dpdk_lpm6 = { 453 .flm_name = "dpdk_lpm6", 454 .flm_family = AF_INET6, 455 .flm_init_cb = init_table, 456 .flm_destroy_cb = destroy_table, 457 .flm_dump_rib_item_cb = add_route_cb, 458 .flm_dump_end_cb = check_dump_success, 459 .flm_change_rib_item_cb = handle_rtable_change_cb, 460 .flm_get_pref = rte6_get_pref, 461 }; 462 463 static int 464 lpm6_modevent(module_t mod, int type, void *unused) 465 { 466 int error = 0; 467 468 switch (type) { 469 case MOD_LOAD: 470 fib_module_register(&dpdk_lpm6); 471 break; 472 case MOD_UNLOAD: 473 error = fib_module_unregister(&dpdk_lpm6); 474 break; 475 default: 476 error = EOPNOTSUPP; 477 break; 478 } 479 return (error); 480 } 481 482 static moduledata_t lpm6mod = { 483 "dpdk_lpm6", 484 lpm6_modevent, 485 0 486 }; 487 488 DECLARE_MODULE(lpm6mod, lpm6mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 489 MODULE_VERSION(lpm6mod, 1); 490