1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/lock.h> 35 #include <sys/rmlock.h> 36 #include <sys/malloc.h> 37 #include <sys/module.h> 38 #include <sys/kernel.h> 39 #include <sys/socket.h> 40 #include <sys/sysctl.h> 41 #include <sys/syslog.h> 42 #include <net/vnet.h> 43 44 #include <net/if.h> 45 #include <net/if_var.h> 46 47 #include <netinet/in.h> 48 #include <netinet/in_fib.h> 49 #include <netinet/ip.h> 50 51 #include <net/route.h> 52 #include <net/route/nhop.h> 53 #include <net/route/route_ctl.h> 54 #include <net/route/fib_algo.h> 55 56 #include "rte_shim.h" 57 #include "rte_lpm.h" 58 59 #define LPM_MIN_TBL8 8 /* 2 pages of memory */ 60 #define LPM_MAX_TBL8 65536 * 16 /* 256M */ 61 62 MALLOC_DECLARE(M_RTABLE); 63 64 struct dpdk_lpm_data { 65 struct rte_lpm *lpm; 66 uint64_t routes_added; 67 uint64_t routes_failed; 68 uint32_t number_tbl8s; 69 uint32_t fibnum; 70 uint8_t hit_tables; 71 uint8_t hit_records; 72 struct fib_data *fd; 73 }; 74 75 /* 76 * Main datapath routing 77 */ 78 static struct nhop_object * 79 lookup_ptr(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid) 80 { 81 struct rte_lpm *lpm; 82 const struct rte_lpm_external *rte_ext; 83 uint32_t nhidx = 0; 84 int ret; 85 86 lpm = (struct rte_lpm *)algo_data; 87 rte_ext = (const struct rte_lpm_external *)lpm; 88 89 ret = rte_lpm_lookup(lpm, ntohl(key.addr4.s_addr), &nhidx); 90 if (ret == 0) { 91 /* Success! */ 92 return (rte_ext->nh_idx[nhidx]); 93 } else { 94 /* Not found. Check default route */ 95 return (rte_ext->nh_idx[rte_ext->default_idx]); 96 } 97 98 return (NULL); 99 } 100 101 static uint8_t 102 rte_get_pref(const struct rib_rtable_info *rinfo) 103 { 104 105 if (rinfo->num_prefixes < 10) 106 return (1); 107 else if (rinfo->num_prefixes < 1000) 108 return (rinfo->num_prefixes / 10); 109 else if (rinfo->num_prefixes < 500000) 110 return (100 + rinfo->num_prefixes / 3334); 111 else 112 return (250); 113 } 114 115 static enum flm_op_result 116 handle_default_change(struct dpdk_lpm_data *dd, struct rib_cmd_info *rc) 117 { 118 struct rte_lpm_external *rte_ext; 119 rte_ext = (struct rte_lpm_external *)dd->lpm; 120 121 if (rc->rc_cmd != RTM_DELETE) { 122 /* Reference new */ 123 uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new); 124 125 if (nhidx == 0) 126 return (FLM_REBUILD); 127 rte_ext->default_idx = nhidx; 128 } else { 129 /* No default route */ 130 rte_ext->default_idx = 0; 131 } 132 133 return (FLM_SUCCESS); 134 } 135 136 static void 137 get_parent_rule(struct dpdk_lpm_data *dd, struct in_addr addr, int plen, 138 uint8_t *pplen, uint32_t *nhop_idx) 139 { 140 struct rtentry *rt; 141 142 rt = rt_get_inet_parent(dd->fibnum, addr, plen); 143 if (rt != NULL) { 144 struct in_addr addr4; 145 uint32_t scopeid; 146 int parent_plen; 147 148 rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid); 149 if (parent_plen > 0) { 150 *pplen = parent_plen; 151 *nhop_idx = fib_get_nhop_idx(dd->fd, rt_get_raw_nhop(rt)); 152 return; 153 } 154 } 155 156 *nhop_idx = 0; 157 *pplen = 0; 158 } 159 160 static enum flm_op_result 161 handle_gu_change(struct dpdk_lpm_data *dd, const struct rib_cmd_info *rc, 162 const struct in_addr addr, int plen) 163 { 164 uint32_t nhidx = 0; 165 int ret; 166 char abuf[INET_ADDRSTRLEN]; 167 uint32_t ip; 168 169 ip = ntohl(addr.s_addr); 170 inet_ntop(AF_INET, &addr, abuf, sizeof(abuf)); 171 172 /* So we get sin, plen and nhidx */ 173 if (rc->rc_cmd != RTM_DELETE) { 174 /* 175 * Addition or change. Save nhop in the internal table 176 * and get index. 177 */ 178 nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new); 179 if (nhidx == 0) { 180 FIB_PRINTF(LOG_INFO, dd->fd, "nhop limit reached, need rebuild"); 181 return (FLM_REBUILD); 182 } 183 184 ret = rte_lpm_add(dd->lpm, ip, plen, nhidx); 185 FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u -> %u ret: %d", 186 (rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE", 187 abuf, plen, 188 rc->rc_nh_old != NULL ? fib_get_nhop_idx(dd->fd, rc->rc_nh_old) : 0, 189 nhidx, ret); 190 } else { 191 /* 192 * Need to lookup parent. Assume deletion happened already 193 */ 194 uint8_t parent_plen; 195 uint32_t parent_nhop_idx; 196 get_parent_rule(dd, addr, plen, &parent_plen, &parent_nhop_idx); 197 198 ret = rte_lpm_delete(dd->lpm, ip, plen, parent_plen, parent_nhop_idx); 199 FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK: %s %s/%d -> /%d nhop %u -> %u ret: %d", 200 "DEL", abuf, plen, parent_plen, fib_get_nhop_idx(dd->fd, rc->rc_nh_old), 201 parent_nhop_idx, ret); 202 } 203 204 if (ret != 0) { 205 FIB_PRINTF(LOG_INFO, dd->fd, "error: %d", ret); 206 if (ret == -ENOSPC) 207 return (FLM_REBUILD); 208 return (FLM_ERROR); 209 } 210 return (FLM_SUCCESS); 211 } 212 213 static enum flm_op_result 214 handle_rtable_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc, 215 void *_data) 216 { 217 struct dpdk_lpm_data *dd; 218 enum flm_op_result ret; 219 struct in_addr addr4; 220 uint32_t scopeid; 221 int plen; 222 223 dd = (struct dpdk_lpm_data *)_data; 224 rt_get_inet_prefix_plen(rc->rc_rt, &addr4, &plen, &scopeid); 225 226 if (plen != 0) 227 ret = handle_gu_change(dd, rc, addr4, plen); 228 else 229 ret = handle_default_change(dd, rc); 230 231 if (ret != 0) 232 FIB_PRINTF(LOG_INFO, dd->fd, "error handling route"); 233 return (ret); 234 } 235 236 static void 237 destroy_table(void *_data) 238 { 239 struct dpdk_lpm_data *dd = (struct dpdk_lpm_data *)_data; 240 241 if (dd->lpm != NULL) 242 rte_lpm_free(dd->lpm); 243 free(dd, M_RTABLE); 244 } 245 246 static enum flm_op_result 247 add_route_cb(struct rtentry *rt, void *_data) 248 { 249 struct dpdk_lpm_data *dd = (struct dpdk_lpm_data *)_data; 250 struct nhop_object *nh; 251 int plen, ret; 252 struct in_addr addr4; 253 uint32_t scopeid; 254 255 nh = rt_get_raw_nhop(rt); 256 rt_get_inet_prefix_plen(rt, &addr4, &plen, &scopeid); 257 258 char abuf[INET_ADDRSTRLEN]; 259 inet_ntop(AF_INET, &addr4, abuf, sizeof(abuf)); 260 261 FIB_PRINTF(LOG_DEBUG, dd->fd, "Operating on %s/%d", abuf, plen); 262 263 if (plen == 0) { 264 struct rib_cmd_info rc = { 265 .rc_cmd = RTM_ADD, 266 .rc_nh_new = nh, 267 }; 268 269 FIB_PRINTF(LOG_DEBUG, dd->fd, "Adding default route"); 270 return (handle_default_change(dd, &rc)); 271 } 272 273 uint32_t nhidx = fib_get_nhop_idx(dd->fd, nh); 274 if (nhidx == 0) { 275 FIB_PRINTF(LOG_INFO, dd->fd, "unable to get nhop index"); 276 return (FLM_REBUILD); 277 } 278 ret = rte_lpm_add(dd->lpm, ntohl(addr4.s_addr), plen, nhidx); 279 FIB_PRINTF(LOG_DEBUG, dd->fd, "ADD %p %s/%d nh %u = %d", 280 dd->lpm, abuf, plen, nhidx, ret); 281 282 if (ret != 0) { 283 FIB_PRINTF(LOG_INFO, dd->fd, "rte_lpm_add() returned %d", ret); 284 if (ret == -ENOSPC) { 285 dd->hit_tables = 1; 286 return (FLM_REBUILD); 287 } 288 dd->routes_failed++; 289 return (FLM_ERROR); 290 } else 291 dd->routes_added++; 292 293 return (FLM_SUCCESS); 294 } 295 296 static enum flm_op_result 297 check_dump_success(void *_data, struct fib_dp *dp) 298 { 299 struct dpdk_lpm_data *dd; 300 301 dd = (struct dpdk_lpm_data *)_data; 302 303 FIB_PRINTF(LOG_INFO, dd->fd, "scan completed. added: %zu failed: %zu", 304 dd->routes_added, dd->routes_failed); 305 if (dd->hit_tables || dd->routes_failed > 0) 306 return (FLM_REBUILD); 307 308 FIB_PRINTF(LOG_INFO, dd->fd, 309 "DPDK lookup engine synced with IPv4 RIB id %u, %zu routes", 310 dd->fibnum, dd->routes_added); 311 312 dp->f = lookup_ptr; 313 dp->arg = dd->lpm; 314 315 return (FLM_SUCCESS); 316 } 317 318 static void 319 estimate_scale(const struct dpdk_lpm_data *dd_src, struct dpdk_lpm_data *dd) 320 { 321 322 /* XXX: update at 75% capacity */ 323 if (dd_src->hit_tables) 324 dd->number_tbl8s = dd_src->number_tbl8s * 2; 325 else 326 dd->number_tbl8s = dd_src->number_tbl8s; 327 328 /* TODO: look into the appropriate RIB to adjust */ 329 } 330 331 static struct dpdk_lpm_data * 332 build_table(struct dpdk_lpm_data *dd_prev, struct fib_data *fd) 333 { 334 struct dpdk_lpm_data *dd; 335 struct rte_lpm *lpm; 336 337 dd = malloc(sizeof(struct dpdk_lpm_data), M_RTABLE, M_NOWAIT | M_ZERO); 338 if (dd == NULL) { 339 FIB_PRINTF(LOG_INFO, fd, "Unable to allocate base datastructure"); 340 return (NULL); 341 } 342 dd->fibnum = dd_prev->fibnum; 343 dd->fd = fd; 344 345 estimate_scale(dd_prev, dd); 346 347 struct rte_lpm_config cfg = {.number_tbl8s = dd->number_tbl8s}; 348 lpm = rte_lpm_create("test", 0, &cfg); 349 if (lpm == NULL) { 350 FIB_PRINTF(LOG_INFO, fd, "unable to create lpm"); 351 free(dd, M_RTABLE); 352 return (NULL); 353 } 354 dd->lpm = lpm; 355 struct rte_lpm_external *ext = (struct rte_lpm_external *)lpm; 356 ext->nh_idx = fib_get_nhop_array(dd->fd); 357 358 FIB_PRINTF(LOG_INFO, fd, "allocated %u tbl8s", dd->number_tbl8s); 359 360 return (dd); 361 } 362 363 static enum flm_op_result 364 init_table(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **data) 365 { 366 struct dpdk_lpm_data *dd, dd_base; 367 368 if (_old_data == NULL) { 369 bzero(&dd_base, sizeof(struct dpdk_lpm_data)); 370 dd_base.fibnum = fibnum; 371 /* TODO: get rib statistics */ 372 dd_base.number_tbl8s = LPM_MIN_TBL8; 373 dd = &dd_base; 374 } else { 375 FIB_PRINTF(LOG_DEBUG, fd, "Starting with old data"); 376 dd = (struct dpdk_lpm_data *)_old_data; 377 } 378 379 /* Guaranteed to be in epoch */ 380 dd = build_table(dd, fd); 381 if (dd == NULL) { 382 FIB_PRINTF(LOG_NOTICE, fd, "table creation failed"); 383 return (FLM_REBUILD); 384 } 385 386 *data = dd; 387 return (FLM_SUCCESS); 388 } 389 390 static struct fib_lookup_module dpdk_lpm4 = { 391 .flm_name = "dpdk_lpm4", 392 .flm_family = AF_INET, 393 .flm_init_cb = init_table, 394 .flm_destroy_cb = destroy_table, 395 .flm_dump_rib_item_cb = add_route_cb, 396 .flm_dump_end_cb = check_dump_success, 397 .flm_change_rib_item_cb = handle_rtable_change_cb, 398 .flm_get_pref = rte_get_pref, 399 }; 400 401 static int 402 lpm4_modevent(module_t mod, int type, void *unused) 403 { 404 int error = 0; 405 406 switch (type) { 407 case MOD_LOAD: 408 fib_module_register(&dpdk_lpm4); 409 break; 410 case MOD_UNLOAD: 411 error = fib_module_unregister(&dpdk_lpm4); 412 break; 413 default: 414 error = EOPNOTSUPP; 415 break; 416 } 417 return (error); 418 } 419 420 static moduledata_t lpm4mod = { 421 "dpdk_lpm4", 422 lpm4_modevent, 423 0 424 }; 425 426 DECLARE_MODULE(lpm4mod, lpm4mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 427 MODULE_VERSION(lpm4mod, 1); 428