1 /* 2 * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c 3 * Copyright (c) 2016 Mellanox Technologies. All rights reserved. 4 * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> 5 * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com> 6 * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the names of the copyright holders nor the names of its 17 * contributors may be used to endorse or promote products derived from 18 * this software without specific prior written permission. 19 * 20 * Alternatively, this software may be distributed under the terms of the 21 * GNU General Public License ("GPL") version 2 as published by the Free 22 * Software Foundation. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <linux/kernel.h> 38 #include <linux/types.h> 39 #include <linux/rhashtable.h> 40 #include <linux/bitops.h> 41 #include <linux/in6.h> 42 #include <linux/notifier.h> 43 #include <net/netevent.h> 44 #include <net/neighbour.h> 45 #include <net/arp.h> 46 #include <net/ip_fib.h> 47 48 #include "spectrum.h" 49 #include "core.h" 50 #include "reg.h" 51 52 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \ 53 for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT) 54 55 static bool 56 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1, 57 struct mlxsw_sp_prefix_usage *prefix_usage2) 58 { 59 unsigned char prefix; 60 61 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) { 62 if (!test_bit(prefix, prefix_usage2->b)) 63 return false; 64 } 65 return true; 66 } 67 68 static bool 69 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1, 70 struct mlxsw_sp_prefix_usage *prefix_usage2) 71 { 72 return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); 73 } 74 75 static bool 76 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage) 77 { 78 struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } }; 79 80 return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none); 81 } 82 83 static void 84 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1, 85 struct mlxsw_sp_prefix_usage *prefix_usage2) 86 { 87 memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); 88 } 89 90 static void 91 mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage) 92 { 93 memset(prefix_usage, 0, sizeof(*prefix_usage)); 94 } 95 96 static void 97 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage, 98 unsigned char prefix_len) 99 { 100 set_bit(prefix_len, prefix_usage->b); 101 } 102 103 static void 104 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage, 105 unsigned char prefix_len) 106 { 107 clear_bit(prefix_len, prefix_usage->b); 108 } 109 110 struct mlxsw_sp_fib_key { 111 struct net_device *dev; 112 unsigned char addr[sizeof(struct in6_addr)]; 113 unsigned char prefix_len; 114 }; 115 116 enum mlxsw_sp_fib_entry_type { 117 MLXSW_SP_FIB_ENTRY_TYPE_REMOTE, 118 MLXSW_SP_FIB_ENTRY_TYPE_LOCAL, 119 MLXSW_SP_FIB_ENTRY_TYPE_TRAP, 120 }; 121 122 struct mlxsw_sp_nexthop_group; 123 124 struct mlxsw_sp_fib_entry { 125 struct rhash_head ht_node; 126 struct list_head list; 127 struct mlxsw_sp_fib_key key; 128 enum mlxsw_sp_fib_entry_type type; 129 unsigned int ref_count; 130 u16 rif; /* used for action local */ 131 struct mlxsw_sp_vr *vr; 132 struct fib_info *fi; 133 struct list_head nexthop_group_node; 134 struct mlxsw_sp_nexthop_group *nh_group; 135 }; 136 137 struct mlxsw_sp_fib { 138 struct rhashtable ht; 139 struct list_head entry_list; 140 unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT]; 141 struct mlxsw_sp_prefix_usage prefix_usage; 142 }; 143 144 static const struct rhashtable_params mlxsw_sp_fib_ht_params = { 145 .key_offset = offsetof(struct mlxsw_sp_fib_entry, key), 146 .head_offset = offsetof(struct mlxsw_sp_fib_entry, ht_node), 147 .key_len = sizeof(struct mlxsw_sp_fib_key), 148 .automatic_shrinking = true, 149 }; 150 151 static int mlxsw_sp_fib_entry_insert(struct mlxsw_sp_fib *fib, 152 struct mlxsw_sp_fib_entry *fib_entry) 153 { 154 unsigned char prefix_len = fib_entry->key.prefix_len; 155 int err; 156 157 err = rhashtable_insert_fast(&fib->ht, &fib_entry->ht_node, 158 mlxsw_sp_fib_ht_params); 159 if (err) 160 return err; 161 list_add_tail(&fib_entry->list, &fib->entry_list); 162 if (fib->prefix_ref_count[prefix_len]++ == 0) 163 mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len); 164 return 0; 165 } 166 167 static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib, 168 struct mlxsw_sp_fib_entry *fib_entry) 169 { 170 unsigned char prefix_len = fib_entry->key.prefix_len; 171 172 if (--fib->prefix_ref_count[prefix_len] == 0) 173 mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len); 174 list_del(&fib_entry->list); 175 rhashtable_remove_fast(&fib->ht, &fib_entry->ht_node, 176 mlxsw_sp_fib_ht_params); 177 } 178 179 static struct mlxsw_sp_fib_entry * 180 mlxsw_sp_fib_entry_create(struct mlxsw_sp_fib *fib, const void *addr, 181 size_t addr_len, unsigned char prefix_len, 182 struct net_device *dev) 183 { 184 struct mlxsw_sp_fib_entry *fib_entry; 185 186 fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL); 187 if (!fib_entry) 188 return NULL; 189 fib_entry->key.dev = dev; 190 memcpy(fib_entry->key.addr, addr, addr_len); 191 fib_entry->key.prefix_len = prefix_len; 192 return fib_entry; 193 } 194 195 static void mlxsw_sp_fib_entry_destroy(struct mlxsw_sp_fib_entry *fib_entry) 196 { 197 kfree(fib_entry); 198 } 199 200 static struct mlxsw_sp_fib_entry * 201 mlxsw_sp_fib_entry_lookup(struct mlxsw_sp_fib *fib, const void *addr, 202 size_t addr_len, unsigned char prefix_len, 203 struct net_device *dev) 204 { 205 struct mlxsw_sp_fib_key key; 206 207 memset(&key, 0, sizeof(key)); 208 key.dev = dev; 209 memcpy(key.addr, addr, addr_len); 210 key.prefix_len = prefix_len; 211 return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params); 212 } 213 214 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void) 215 { 216 struct mlxsw_sp_fib *fib; 217 int err; 218 219 fib = kzalloc(sizeof(*fib), GFP_KERNEL); 220 if (!fib) 221 return ERR_PTR(-ENOMEM); 222 err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params); 223 if (err) 224 goto err_rhashtable_init; 225 INIT_LIST_HEAD(&fib->entry_list); 226 return fib; 227 228 err_rhashtable_init: 229 kfree(fib); 230 return ERR_PTR(err); 231 } 232 233 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib) 234 { 235 rhashtable_destroy(&fib->ht); 236 kfree(fib); 237 } 238 239 static struct mlxsw_sp_lpm_tree * 240 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved) 241 { 242 static struct mlxsw_sp_lpm_tree *lpm_tree; 243 int i; 244 245 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { 246 lpm_tree = &mlxsw_sp->router.lpm_trees[i]; 247 if (lpm_tree->ref_count == 0) { 248 if (one_reserved) 249 one_reserved = false; 250 else 251 return lpm_tree; 252 } 253 } 254 return NULL; 255 } 256 257 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, 258 struct mlxsw_sp_lpm_tree *lpm_tree) 259 { 260 char ralta_pl[MLXSW_REG_RALTA_LEN]; 261 262 mlxsw_reg_ralta_pack(ralta_pl, true, 263 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, 264 lpm_tree->id); 265 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); 266 } 267 268 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, 269 struct mlxsw_sp_lpm_tree *lpm_tree) 270 { 271 char ralta_pl[MLXSW_REG_RALTA_LEN]; 272 273 mlxsw_reg_ralta_pack(ralta_pl, false, 274 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, 275 lpm_tree->id); 276 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); 277 } 278 279 static int 280 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp, 281 struct mlxsw_sp_prefix_usage *prefix_usage, 282 struct mlxsw_sp_lpm_tree *lpm_tree) 283 { 284 char ralst_pl[MLXSW_REG_RALST_LEN]; 285 u8 root_bin = 0; 286 u8 prefix; 287 u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD; 288 289 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) 290 root_bin = prefix; 291 292 mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id); 293 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) { 294 if (prefix == 0) 295 continue; 296 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix, 297 MLXSW_REG_RALST_BIN_NO_CHILD); 298 last_prefix = prefix; 299 } 300 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); 301 } 302 303 static struct mlxsw_sp_lpm_tree * 304 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, 305 struct mlxsw_sp_prefix_usage *prefix_usage, 306 enum mlxsw_sp_l3proto proto, bool one_reserved) 307 { 308 struct mlxsw_sp_lpm_tree *lpm_tree; 309 int err; 310 311 lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved); 312 if (!lpm_tree) 313 return ERR_PTR(-EBUSY); 314 lpm_tree->proto = proto; 315 err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree); 316 if (err) 317 return ERR_PTR(err); 318 319 err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage, 320 lpm_tree); 321 if (err) 322 goto err_left_struct_set; 323 memcpy(&lpm_tree->prefix_usage, prefix_usage, 324 sizeof(lpm_tree->prefix_usage)); 325 return lpm_tree; 326 327 err_left_struct_set: 328 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); 329 return ERR_PTR(err); 330 } 331 332 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, 333 struct mlxsw_sp_lpm_tree *lpm_tree) 334 { 335 return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); 336 } 337 338 static struct mlxsw_sp_lpm_tree * 339 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, 340 struct mlxsw_sp_prefix_usage *prefix_usage, 341 enum mlxsw_sp_l3proto proto, bool one_reserved) 342 { 343 struct mlxsw_sp_lpm_tree *lpm_tree; 344 int i; 345 346 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { 347 lpm_tree = &mlxsw_sp->router.lpm_trees[i]; 348 if (lpm_tree->ref_count != 0 && 349 lpm_tree->proto == proto && 350 mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage, 351 prefix_usage)) 352 goto inc_ref_count; 353 } 354 lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, 355 proto, one_reserved); 356 if (IS_ERR(lpm_tree)) 357 return lpm_tree; 358 359 inc_ref_count: 360 lpm_tree->ref_count++; 361 return lpm_tree; 362 } 363 364 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, 365 struct mlxsw_sp_lpm_tree *lpm_tree) 366 { 367 if (--lpm_tree->ref_count == 0) 368 return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); 369 return 0; 370 } 371 372 static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) 373 { 374 struct mlxsw_sp_lpm_tree *lpm_tree; 375 int i; 376 377 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { 378 lpm_tree = &mlxsw_sp->router.lpm_trees[i]; 379 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN; 380 } 381 } 382 383 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) 384 { 385 struct mlxsw_sp_vr *vr; 386 int i; 387 388 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { 389 vr = &mlxsw_sp->router.vrs[i]; 390 if (!vr->used) 391 return vr; 392 } 393 return NULL; 394 } 395 396 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, 397 struct mlxsw_sp_vr *vr) 398 { 399 char raltb_pl[MLXSW_REG_RALTB_LEN]; 400 401 mlxsw_reg_raltb_pack(raltb_pl, vr->id, 402 (enum mlxsw_reg_ralxx_protocol) vr->proto, 403 vr->lpm_tree->id); 404 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); 405 } 406 407 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, 408 struct mlxsw_sp_vr *vr) 409 { 410 char raltb_pl[MLXSW_REG_RALTB_LEN]; 411 412 /* Bind to tree 0 which is default */ 413 mlxsw_reg_raltb_pack(raltb_pl, vr->id, 414 (enum mlxsw_reg_ralxx_protocol) vr->proto, 0); 415 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); 416 } 417 418 static u32 mlxsw_sp_fix_tb_id(u32 tb_id) 419 { 420 /* For our purpose, squash main and local table into one */ 421 if (tb_id == RT_TABLE_LOCAL) 422 tb_id = RT_TABLE_MAIN; 423 return tb_id; 424 } 425 426 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, 427 u32 tb_id, 428 enum mlxsw_sp_l3proto proto) 429 { 430 struct mlxsw_sp_vr *vr; 431 int i; 432 433 tb_id = mlxsw_sp_fix_tb_id(tb_id); 434 435 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { 436 vr = &mlxsw_sp->router.vrs[i]; 437 if (vr->used && vr->proto == proto && vr->tb_id == tb_id) 438 return vr; 439 } 440 return NULL; 441 } 442 443 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, 444 unsigned char prefix_len, 445 u32 tb_id, 446 enum mlxsw_sp_l3proto proto) 447 { 448 struct mlxsw_sp_prefix_usage req_prefix_usage; 449 struct mlxsw_sp_lpm_tree *lpm_tree; 450 struct mlxsw_sp_vr *vr; 451 int err; 452 453 vr = mlxsw_sp_vr_find_unused(mlxsw_sp); 454 if (!vr) 455 return ERR_PTR(-EBUSY); 456 vr->fib = mlxsw_sp_fib_create(); 457 if (IS_ERR(vr->fib)) 458 return ERR_CAST(vr->fib); 459 460 vr->proto = proto; 461 vr->tb_id = tb_id; 462 mlxsw_sp_prefix_usage_zero(&req_prefix_usage); 463 mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); 464 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, 465 proto, true); 466 if (IS_ERR(lpm_tree)) { 467 err = PTR_ERR(lpm_tree); 468 goto err_tree_get; 469 } 470 vr->lpm_tree = lpm_tree; 471 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); 472 if (err) 473 goto err_tree_bind; 474 475 vr->used = true; 476 return vr; 477 478 err_tree_bind: 479 mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); 480 err_tree_get: 481 mlxsw_sp_fib_destroy(vr->fib); 482 483 return ERR_PTR(err); 484 } 485 486 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp, 487 struct mlxsw_sp_vr *vr) 488 { 489 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); 490 mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); 491 mlxsw_sp_fib_destroy(vr->fib); 492 vr->used = false; 493 } 494 495 static int 496 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, 497 struct mlxsw_sp_prefix_usage *req_prefix_usage) 498 { 499 struct mlxsw_sp_lpm_tree *lpm_tree; 500 501 if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, 502 &vr->lpm_tree->prefix_usage)) 503 return 0; 504 505 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage, 506 vr->proto, false); 507 if (IS_ERR(lpm_tree)) { 508 /* We failed to get a tree according to the required 509 * prefix usage. However, the current tree might be still good 510 * for us if our requirement is subset of the prefixes used 511 * in the tree. 512 */ 513 if (mlxsw_sp_prefix_usage_subset(req_prefix_usage, 514 &vr->lpm_tree->prefix_usage)) 515 return 0; 516 return PTR_ERR(lpm_tree); 517 } 518 519 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); 520 mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); 521 vr->lpm_tree = lpm_tree; 522 return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); 523 } 524 525 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, 526 unsigned char prefix_len, 527 u32 tb_id, 528 enum mlxsw_sp_l3proto proto) 529 { 530 struct mlxsw_sp_vr *vr; 531 int err; 532 533 tb_id = mlxsw_sp_fix_tb_id(tb_id); 534 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto); 535 if (!vr) { 536 vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto); 537 if (IS_ERR(vr)) 538 return vr; 539 } else { 540 struct mlxsw_sp_prefix_usage req_prefix_usage; 541 542 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, 543 &vr->fib->prefix_usage); 544 mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); 545 /* Need to replace LPM tree in case new prefix is required. */ 546 err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, 547 &req_prefix_usage); 548 if (err) 549 return ERR_PTR(err); 550 } 551 return vr; 552 } 553 554 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr) 555 { 556 /* Destroy virtual router entity in case the associated FIB is empty 557 * and allow it to be used for other tables in future. Otherwise, 558 * check if some prefix usage did not disappear and change tree if 559 * that is the case. Note that in case new, smaller tree cannot be 560 * allocated, the original one will be kept being used. 561 */ 562 if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage)) 563 mlxsw_sp_vr_destroy(mlxsw_sp, vr); 564 else 565 mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, 566 &vr->fib->prefix_usage); 567 } 568 569 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) 570 { 571 struct mlxsw_sp_vr *vr; 572 u64 max_vrs; 573 int i; 574 575 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS)) 576 return -EIO; 577 578 max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); 579 mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr), 580 GFP_KERNEL); 581 if (!mlxsw_sp->router.vrs) 582 return -ENOMEM; 583 584 for (i = 0; i < max_vrs; i++) { 585 vr = &mlxsw_sp->router.vrs[i]; 586 vr->id = i; 587 } 588 589 return 0; 590 } 591 592 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp); 593 594 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) 595 { 596 /* At this stage we're guaranteed not to have new incoming 597 * FIB notifications and the work queue is free from FIBs 598 * sitting on top of mlxsw netdevs. However, we can still 599 * have other FIBs queued. Flush the queue before flushing 600 * the device's tables. No need for locks, as we're the only 601 * writer. 602 */ 603 mlxsw_core_flush_owq(); 604 mlxsw_sp_router_fib_flush(mlxsw_sp); 605 kfree(mlxsw_sp->router.vrs); 606 } 607 608 struct mlxsw_sp_neigh_key { 609 struct neighbour *n; 610 }; 611 612 struct mlxsw_sp_neigh_entry { 613 struct rhash_head ht_node; 614 struct mlxsw_sp_neigh_key key; 615 u16 rif; 616 bool offloaded; 617 struct delayed_work dw; 618 struct mlxsw_sp_port *mlxsw_sp_port; 619 unsigned char ha[ETH_ALEN]; 620 struct list_head nexthop_list; /* list of nexthops using 621 * this neigh entry 622 */ 623 struct list_head nexthop_neighs_list_node; 624 }; 625 626 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = { 627 .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key), 628 .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node), 629 .key_len = sizeof(struct mlxsw_sp_neigh_key), 630 }; 631 632 static int 633 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp, 634 struct mlxsw_sp_neigh_entry *neigh_entry) 635 { 636 return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht, 637 &neigh_entry->ht_node, 638 mlxsw_sp_neigh_ht_params); 639 } 640 641 static void 642 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, 643 struct mlxsw_sp_neigh_entry *neigh_entry) 644 { 645 rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht, 646 &neigh_entry->ht_node, 647 mlxsw_sp_neigh_ht_params); 648 } 649 650 static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work); 651 652 static struct mlxsw_sp_neigh_entry * 653 mlxsw_sp_neigh_entry_create(struct neighbour *n, u16 rif) 654 { 655 struct mlxsw_sp_neigh_entry *neigh_entry; 656 657 neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC); 658 if (!neigh_entry) 659 return NULL; 660 neigh_entry->key.n = n; 661 neigh_entry->rif = rif; 662 INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw); 663 INIT_LIST_HEAD(&neigh_entry->nexthop_list); 664 return neigh_entry; 665 } 666 667 static void 668 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry) 669 { 670 kfree(neigh_entry); 671 } 672 673 static struct mlxsw_sp_neigh_entry * 674 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) 675 { 676 struct mlxsw_sp_neigh_key key; 677 678 key.n = n; 679 return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht, 680 &key, mlxsw_sp_neigh_ht_params); 681 } 682 683 int mlxsw_sp_router_neigh_construct(struct net_device *dev, 684 struct neighbour *n) 685 { 686 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); 687 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 688 struct mlxsw_sp_neigh_entry *neigh_entry; 689 struct mlxsw_sp_rif *r; 690 int err; 691 692 if (n->tbl != &arp_tbl) 693 return 0; 694 695 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); 696 if (neigh_entry) 697 return 0; 698 699 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev); 700 if (WARN_ON(!r)) 701 return -EINVAL; 702 703 neigh_entry = mlxsw_sp_neigh_entry_create(n, r->rif); 704 if (!neigh_entry) 705 return -ENOMEM; 706 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); 707 if (err) 708 goto err_neigh_entry_insert; 709 return 0; 710 711 err_neigh_entry_insert: 712 mlxsw_sp_neigh_entry_destroy(neigh_entry); 713 return err; 714 } 715 716 void mlxsw_sp_router_neigh_destroy(struct net_device *dev, 717 struct neighbour *n) 718 { 719 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); 720 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 721 struct mlxsw_sp_neigh_entry *neigh_entry; 722 723 if (n->tbl != &arp_tbl) 724 return; 725 726 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); 727 if (!neigh_entry) 728 return; 729 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); 730 mlxsw_sp_neigh_entry_destroy(neigh_entry); 731 } 732 733 static void 734 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp) 735 { 736 unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); 737 738 mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval); 739 } 740 741 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, 742 char *rauhtd_pl, 743 int ent_index) 744 { 745 struct net_device *dev; 746 struct neighbour *n; 747 __be32 dipn; 748 u32 dip; 749 u16 rif; 750 751 mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip); 752 753 if (!mlxsw_sp->rifs[rif]) { 754 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); 755 return; 756 } 757 758 dipn = htonl(dip); 759 dev = mlxsw_sp->rifs[rif]->dev; 760 n = neigh_lookup(&arp_tbl, &dipn, dev); 761 if (!n) { 762 netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n", 763 &dip); 764 return; 765 } 766 767 netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip); 768 neigh_event_send(n, NULL); 769 neigh_release(n); 770 } 771 772 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, 773 char *rauhtd_pl, 774 int rec_index) 775 { 776 u8 num_entries; 777 int i; 778 779 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, 780 rec_index); 781 /* Hardware starts counting at 0, so add 1. */ 782 num_entries++; 783 784 /* Each record consists of several neighbour entries. */ 785 for (i = 0; i < num_entries; i++) { 786 int ent_index; 787 788 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i; 789 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl, 790 ent_index); 791 } 792 793 } 794 795 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, 796 char *rauhtd_pl, int rec_index) 797 { 798 switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) { 799 case MLXSW_REG_RAUHTD_TYPE_IPV4: 800 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl, 801 rec_index); 802 break; 803 case MLXSW_REG_RAUHTD_TYPE_IPV6: 804 WARN_ON_ONCE(1); 805 break; 806 } 807 } 808 809 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl) 810 { 811 u8 num_rec, last_rec_index, num_entries; 812 813 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); 814 last_rec_index = num_rec - 1; 815 816 if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM) 817 return false; 818 if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) == 819 MLXSW_REG_RAUHTD_TYPE_IPV6) 820 return true; 821 822 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, 823 last_rec_index); 824 if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC) 825 return true; 826 return false; 827 } 828 829 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) 830 { 831 char *rauhtd_pl; 832 u8 num_rec; 833 int i, err; 834 835 rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); 836 if (!rauhtd_pl) 837 return -ENOMEM; 838 839 /* Make sure the neighbour's netdev isn't removed in the 840 * process. 841 */ 842 rtnl_lock(); 843 do { 844 mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4); 845 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), 846 rauhtd_pl); 847 if (err) { 848 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n"); 849 break; 850 } 851 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); 852 for (i = 0; i < num_rec; i++) 853 mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl, 854 i); 855 } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl)); 856 rtnl_unlock(); 857 858 kfree(rauhtd_pl); 859 return err; 860 } 861 862 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) 863 { 864 struct mlxsw_sp_neigh_entry *neigh_entry; 865 866 /* Take RTNL mutex here to prevent lists from changes */ 867 rtnl_lock(); 868 list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, 869 nexthop_neighs_list_node) { 870 /* If this neigh have nexthops, make the kernel think this neigh 871 * is active regardless of the traffic. 872 */ 873 if (!list_empty(&neigh_entry->nexthop_list)) 874 neigh_event_send(neigh_entry->key.n, NULL); 875 } 876 rtnl_unlock(); 877 } 878 879 static void 880 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp) 881 { 882 unsigned long interval = mlxsw_sp->router.neighs_update.interval; 883 884 mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 885 msecs_to_jiffies(interval)); 886 } 887 888 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work) 889 { 890 struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, 891 router.neighs_update.dw.work); 892 int err; 893 894 err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp); 895 if (err) 896 dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity"); 897 898 mlxsw_sp_router_neighs_update_nh(mlxsw_sp); 899 900 mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp); 901 } 902 903 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) 904 { 905 struct mlxsw_sp_neigh_entry *neigh_entry; 906 struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, 907 router.nexthop_probe_dw.work); 908 909 /* Iterate over nexthop neighbours, find those who are unresolved and 910 * send arp on them. This solves the chicken-egg problem when 911 * the nexthop wouldn't get offloaded until the neighbor is resolved 912 * but it wouldn't get resolved ever in case traffic is flowing in HW 913 * using different nexthop. 914 * 915 * Take RTNL mutex here to prevent lists from changes. 916 */ 917 rtnl_lock(); 918 list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, 919 nexthop_neighs_list_node) { 920 if (!(neigh_entry->key.n->nud_state & NUD_VALID) && 921 !list_empty(&neigh_entry->nexthop_list)) 922 neigh_event_send(neigh_entry->key.n, NULL); 923 } 924 rtnl_unlock(); 925 926 mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 927 MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL); 928 } 929 930 static void 931 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, 932 struct mlxsw_sp_neigh_entry *neigh_entry, 933 bool removing); 934 935 static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work) 936 { 937 struct mlxsw_sp_neigh_entry *neigh_entry = 938 container_of(work, struct mlxsw_sp_neigh_entry, dw.work); 939 struct neighbour *n = neigh_entry->key.n; 940 struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port; 941 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 942 char rauht_pl[MLXSW_REG_RAUHT_LEN]; 943 struct net_device *dev; 944 bool entry_connected; 945 u8 nud_state; 946 bool updating; 947 bool removing; 948 bool adding; 949 u32 dip; 950 int err; 951 952 read_lock_bh(&n->lock); 953 dip = ntohl(*((__be32 *) n->primary_key)); 954 memcpy(neigh_entry->ha, n->ha, sizeof(neigh_entry->ha)); 955 nud_state = n->nud_state; 956 dev = n->dev; 957 read_unlock_bh(&n->lock); 958 959 entry_connected = nud_state & NUD_VALID; 960 adding = (!neigh_entry->offloaded) && entry_connected; 961 updating = neigh_entry->offloaded && entry_connected; 962 removing = neigh_entry->offloaded && !entry_connected; 963 964 if (adding || updating) { 965 mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_ADD, 966 neigh_entry->rif, 967 neigh_entry->ha, dip); 968 err = mlxsw_reg_write(mlxsw_sp->core, 969 MLXSW_REG(rauht), rauht_pl); 970 if (err) { 971 netdev_err(dev, "Could not add neigh %pI4h\n", &dip); 972 neigh_entry->offloaded = false; 973 } else { 974 neigh_entry->offloaded = true; 975 } 976 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, false); 977 } else if (removing) { 978 mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE, 979 neigh_entry->rif, 980 neigh_entry->ha, dip); 981 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), 982 rauht_pl); 983 if (err) { 984 netdev_err(dev, "Could not delete neigh %pI4h\n", &dip); 985 neigh_entry->offloaded = true; 986 } else { 987 neigh_entry->offloaded = false; 988 } 989 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, true); 990 } 991 992 neigh_release(n); 993 mlxsw_sp_port_dev_put(mlxsw_sp_port); 994 } 995 996 int mlxsw_sp_router_netevent_event(struct notifier_block *unused, 997 unsigned long event, void *ptr) 998 { 999 struct mlxsw_sp_neigh_entry *neigh_entry; 1000 struct mlxsw_sp_port *mlxsw_sp_port; 1001 struct mlxsw_sp *mlxsw_sp; 1002 unsigned long interval; 1003 struct net_device *dev; 1004 struct neigh_parms *p; 1005 struct neighbour *n; 1006 u32 dip; 1007 1008 switch (event) { 1009 case NETEVENT_DELAY_PROBE_TIME_UPDATE: 1010 p = ptr; 1011 1012 /* We don't care about changes in the default table. */ 1013 if (!p->dev || p->tbl != &arp_tbl) 1014 return NOTIFY_DONE; 1015 1016 /* We are in atomic context and can't take RTNL mutex, 1017 * so use RCU variant to walk the device chain. 1018 */ 1019 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev); 1020 if (!mlxsw_sp_port) 1021 return NOTIFY_DONE; 1022 1023 mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 1024 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME)); 1025 mlxsw_sp->router.neighs_update.interval = interval; 1026 1027 mlxsw_sp_port_dev_put(mlxsw_sp_port); 1028 break; 1029 case NETEVENT_NEIGH_UPDATE: 1030 n = ptr; 1031 dev = n->dev; 1032 1033 if (n->tbl != &arp_tbl) 1034 return NOTIFY_DONE; 1035 1036 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(dev); 1037 if (!mlxsw_sp_port) 1038 return NOTIFY_DONE; 1039 1040 mlxsw_sp = mlxsw_sp_port->mlxsw_sp; 1041 dip = ntohl(*((__be32 *) n->primary_key)); 1042 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); 1043 if (WARN_ON(!neigh_entry)) { 1044 mlxsw_sp_port_dev_put(mlxsw_sp_port); 1045 return NOTIFY_DONE; 1046 } 1047 neigh_entry->mlxsw_sp_port = mlxsw_sp_port; 1048 1049 /* Take a reference to ensure the neighbour won't be 1050 * destructed until we drop the reference in delayed 1051 * work. 1052 */ 1053 neigh_clone(n); 1054 if (!mlxsw_core_schedule_dw(&neigh_entry->dw, 0)) { 1055 neigh_release(n); 1056 mlxsw_sp_port_dev_put(mlxsw_sp_port); 1057 } 1058 break; 1059 } 1060 1061 return NOTIFY_DONE; 1062 } 1063 1064 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp) 1065 { 1066 int err; 1067 1068 err = rhashtable_init(&mlxsw_sp->router.neigh_ht, 1069 &mlxsw_sp_neigh_ht_params); 1070 if (err) 1071 return err; 1072 1073 /* Initialize the polling interval according to the default 1074 * table. 1075 */ 1076 mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp); 1077 1078 /* Create the delayed works for the activity_update */ 1079 INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw, 1080 mlxsw_sp_router_neighs_update_work); 1081 INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw, 1082 mlxsw_sp_router_probe_unresolved_nexthops); 1083 mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0); 1084 mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0); 1085 return 0; 1086 } 1087 1088 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) 1089 { 1090 cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw); 1091 cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw); 1092 rhashtable_destroy(&mlxsw_sp->router.neigh_ht); 1093 } 1094 1095 struct mlxsw_sp_nexthop { 1096 struct list_head neigh_list_node; /* member of neigh entry list */ 1097 struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group 1098 * this belongs to 1099 */ 1100 u8 should_offload:1, /* set indicates this neigh is connected and 1101 * should be put to KVD linear area of this group. 1102 */ 1103 offloaded:1, /* set in case the neigh is actually put into 1104 * KVD linear area of this group. 1105 */ 1106 update:1; /* set indicates that MAC of this neigh should be 1107 * updated in HW 1108 */ 1109 struct mlxsw_sp_neigh_entry *neigh_entry; 1110 }; 1111 1112 struct mlxsw_sp_nexthop_group { 1113 struct list_head list; /* node in mlxsw->router.nexthop_group_list */ 1114 struct list_head fib_list; /* list of fib entries that use this group */ 1115 u8 adj_index_valid:1; 1116 u32 adj_index; 1117 u16 ecmp_size; 1118 u16 count; 1119 struct mlxsw_sp_nexthop nexthops[0]; 1120 }; 1121 1122 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, 1123 struct mlxsw_sp_vr *vr, 1124 u32 adj_index, u16 ecmp_size, 1125 u32 new_adj_index, 1126 u16 new_ecmp_size) 1127 { 1128 char raleu_pl[MLXSW_REG_RALEU_LEN]; 1129 1130 mlxsw_reg_raleu_pack(raleu_pl, 1131 (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id, 1132 adj_index, ecmp_size, new_adj_index, 1133 new_ecmp_size); 1134 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl); 1135 } 1136 1137 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, 1138 struct mlxsw_sp_nexthop_group *nh_grp, 1139 u32 old_adj_index, u16 old_ecmp_size) 1140 { 1141 struct mlxsw_sp_fib_entry *fib_entry; 1142 struct mlxsw_sp_vr *vr = NULL; 1143 int err; 1144 1145 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { 1146 if (vr == fib_entry->vr) 1147 continue; 1148 vr = fib_entry->vr; 1149 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr, 1150 old_adj_index, 1151 old_ecmp_size, 1152 nh_grp->adj_index, 1153 nh_grp->ecmp_size); 1154 if (err) 1155 return err; 1156 } 1157 return 0; 1158 } 1159 1160 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, 1161 struct mlxsw_sp_nexthop *nh) 1162 { 1163 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; 1164 char ratr_pl[MLXSW_REG_RATR_LEN]; 1165 1166 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, 1167 true, adj_index, neigh_entry->rif); 1168 mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); 1169 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); 1170 } 1171 1172 static int 1173 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, 1174 struct mlxsw_sp_nexthop_group *nh_grp) 1175 { 1176 u32 adj_index = nh_grp->adj_index; /* base */ 1177 struct mlxsw_sp_nexthop *nh; 1178 int i; 1179 int err; 1180 1181 for (i = 0; i < nh_grp->count; i++) { 1182 nh = &nh_grp->nexthops[i]; 1183 1184 if (!nh->should_offload) { 1185 nh->offloaded = 0; 1186 continue; 1187 } 1188 1189 if (nh->update) { 1190 err = mlxsw_sp_nexthop_mac_update(mlxsw_sp, 1191 adj_index, nh); 1192 if (err) 1193 return err; 1194 nh->update = 0; 1195 nh->offloaded = 1; 1196 } 1197 adj_index++; 1198 } 1199 return 0; 1200 } 1201 1202 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, 1203 struct mlxsw_sp_fib_entry *fib_entry); 1204 1205 static int 1206 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, 1207 struct mlxsw_sp_nexthop_group *nh_grp) 1208 { 1209 struct mlxsw_sp_fib_entry *fib_entry; 1210 int err; 1211 1212 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { 1213 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); 1214 if (err) 1215 return err; 1216 } 1217 return 0; 1218 } 1219 1220 static void 1221 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, 1222 struct mlxsw_sp_nexthop_group *nh_grp) 1223 { 1224 struct mlxsw_sp_nexthop *nh; 1225 bool offload_change = false; 1226 u32 adj_index; 1227 u16 ecmp_size = 0; 1228 bool old_adj_index_valid; 1229 u32 old_adj_index; 1230 u16 old_ecmp_size; 1231 int ret; 1232 int i; 1233 int err; 1234 1235 for (i = 0; i < nh_grp->count; i++) { 1236 nh = &nh_grp->nexthops[i]; 1237 1238 if (nh->should_offload ^ nh->offloaded) { 1239 offload_change = true; 1240 if (nh->should_offload) 1241 nh->update = 1; 1242 } 1243 if (nh->should_offload) 1244 ecmp_size++; 1245 } 1246 if (!offload_change) { 1247 /* Nothing was added or removed, so no need to reallocate. Just 1248 * update MAC on existing adjacency indexes. 1249 */ 1250 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp); 1251 if (err) { 1252 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); 1253 goto set_trap; 1254 } 1255 return; 1256 } 1257 if (!ecmp_size) 1258 /* No neigh of this group is connected so we just set 1259 * the trap and let everthing flow through kernel. 1260 */ 1261 goto set_trap; 1262 1263 ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size); 1264 if (ret < 0) { 1265 /* We ran out of KVD linear space, just set the 1266 * trap and let everything flow through kernel. 1267 */ 1268 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n"); 1269 goto set_trap; 1270 } 1271 adj_index = ret; 1272 old_adj_index_valid = nh_grp->adj_index_valid; 1273 old_adj_index = nh_grp->adj_index; 1274 old_ecmp_size = nh_grp->ecmp_size; 1275 nh_grp->adj_index_valid = 1; 1276 nh_grp->adj_index = adj_index; 1277 nh_grp->ecmp_size = ecmp_size; 1278 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp); 1279 if (err) { 1280 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); 1281 goto set_trap; 1282 } 1283 1284 if (!old_adj_index_valid) { 1285 /* The trap was set for fib entries, so we have to call 1286 * fib entry update to unset it and use adjacency index. 1287 */ 1288 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); 1289 if (err) { 1290 dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n"); 1291 goto set_trap; 1292 } 1293 return; 1294 } 1295 1296 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp, 1297 old_adj_index, old_ecmp_size); 1298 mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index); 1299 if (err) { 1300 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n"); 1301 goto set_trap; 1302 } 1303 return; 1304 1305 set_trap: 1306 old_adj_index_valid = nh_grp->adj_index_valid; 1307 nh_grp->adj_index_valid = 0; 1308 for (i = 0; i < nh_grp->count; i++) { 1309 nh = &nh_grp->nexthops[i]; 1310 nh->offloaded = 0; 1311 } 1312 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); 1313 if (err) 1314 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n"); 1315 if (old_adj_index_valid) 1316 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index); 1317 } 1318 1319 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, 1320 bool removing) 1321 { 1322 if (!removing && !nh->should_offload) 1323 nh->should_offload = 1; 1324 else if (removing && nh->offloaded) 1325 nh->should_offload = 0; 1326 nh->update = 1; 1327 } 1328 1329 static void 1330 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, 1331 struct mlxsw_sp_neigh_entry *neigh_entry, 1332 bool removing) 1333 { 1334 struct mlxsw_sp_nexthop *nh; 1335 1336 /* Take RTNL mutex here to prevent lists from changes */ 1337 rtnl_lock(); 1338 list_for_each_entry(nh, &neigh_entry->nexthop_list, 1339 neigh_list_node) { 1340 __mlxsw_sp_nexthop_neigh_update(nh, removing); 1341 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); 1342 } 1343 rtnl_unlock(); 1344 } 1345 1346 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, 1347 struct mlxsw_sp_nexthop_group *nh_grp, 1348 struct mlxsw_sp_nexthop *nh, 1349 struct fib_nh *fib_nh) 1350 { 1351 struct mlxsw_sp_neigh_entry *neigh_entry; 1352 struct net_device *dev = fib_nh->nh_dev; 1353 struct neighbour *n; 1354 u8 nud_state; 1355 1356 /* Take a reference of neigh here ensuring that neigh would 1357 * not be detructed before the nexthop entry is finished. 1358 * The reference is taken either in neigh_lookup() or 1359 * in neith_create() in case n is not found. 1360 */ 1361 n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, dev); 1362 if (!n) { 1363 n = neigh_create(&arp_tbl, &fib_nh->nh_gw, dev); 1364 if (IS_ERR(n)) 1365 return PTR_ERR(n); 1366 neigh_event_send(n, NULL); 1367 } 1368 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); 1369 if (!neigh_entry) { 1370 neigh_release(n); 1371 return -EINVAL; 1372 } 1373 1374 /* If that is the first nexthop connected to that neigh, add to 1375 * nexthop_neighs_list 1376 */ 1377 if (list_empty(&neigh_entry->nexthop_list)) 1378 list_add_tail(&neigh_entry->nexthop_neighs_list_node, 1379 &mlxsw_sp->router.nexthop_neighs_list); 1380 1381 nh->nh_grp = nh_grp; 1382 nh->neigh_entry = neigh_entry; 1383 list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list); 1384 read_lock_bh(&n->lock); 1385 nud_state = n->nud_state; 1386 read_unlock_bh(&n->lock); 1387 __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID)); 1388 1389 return 0; 1390 } 1391 1392 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, 1393 struct mlxsw_sp_nexthop *nh) 1394 { 1395 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; 1396 1397 list_del(&nh->neigh_list_node); 1398 1399 /* If that is the last nexthop connected to that neigh, remove from 1400 * nexthop_neighs_list 1401 */ 1402 if (list_empty(&nh->neigh_entry->nexthop_list)) 1403 list_del(&nh->neigh_entry->nexthop_neighs_list_node); 1404 1405 neigh_release(neigh_entry->key.n); 1406 } 1407 1408 static struct mlxsw_sp_nexthop_group * 1409 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) 1410 { 1411 struct mlxsw_sp_nexthop_group *nh_grp; 1412 struct mlxsw_sp_nexthop *nh; 1413 struct fib_nh *fib_nh; 1414 size_t alloc_size; 1415 int i; 1416 int err; 1417 1418 alloc_size = sizeof(*nh_grp) + 1419 fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop); 1420 nh_grp = kzalloc(alloc_size, GFP_KERNEL); 1421 if (!nh_grp) 1422 return ERR_PTR(-ENOMEM); 1423 INIT_LIST_HEAD(&nh_grp->fib_list); 1424 nh_grp->count = fi->fib_nhs; 1425 for (i = 0; i < nh_grp->count; i++) { 1426 nh = &nh_grp->nexthops[i]; 1427 fib_nh = &fi->fib_nh[i]; 1428 err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh); 1429 if (err) 1430 goto err_nexthop_init; 1431 } 1432 list_add_tail(&nh_grp->list, &mlxsw_sp->router.nexthop_group_list); 1433 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); 1434 return nh_grp; 1435 1436 err_nexthop_init: 1437 for (i--; i >= 0; i--) 1438 mlxsw_sp_nexthop_fini(mlxsw_sp, nh); 1439 kfree(nh_grp); 1440 return ERR_PTR(err); 1441 } 1442 1443 static void 1444 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, 1445 struct mlxsw_sp_nexthop_group *nh_grp) 1446 { 1447 struct mlxsw_sp_nexthop *nh; 1448 int i; 1449 1450 list_del(&nh_grp->list); 1451 for (i = 0; i < nh_grp->count; i++) { 1452 nh = &nh_grp->nexthops[i]; 1453 mlxsw_sp_nexthop_fini(mlxsw_sp, nh); 1454 } 1455 kfree(nh_grp); 1456 } 1457 1458 static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh, 1459 struct fib_info *fi) 1460 { 1461 int i; 1462 1463 for (i = 0; i < fi->fib_nhs; i++) { 1464 struct fib_nh *fib_nh = &fi->fib_nh[i]; 1465 struct neighbour *n = nh->neigh_entry->key.n; 1466 1467 if (memcmp(n->primary_key, &fib_nh->nh_gw, 1468 sizeof(fib_nh->nh_gw)) == 0 && 1469 n->dev == fib_nh->nh_dev) 1470 return true; 1471 } 1472 return false; 1473 } 1474 1475 static bool mlxsw_sp_nexthop_group_match(struct mlxsw_sp_nexthop_group *nh_grp, 1476 struct fib_info *fi) 1477 { 1478 int i; 1479 1480 if (nh_grp->count != fi->fib_nhs) 1481 return false; 1482 for (i = 0; i < nh_grp->count; i++) { 1483 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; 1484 1485 if (!mlxsw_sp_nexthop_match(nh, fi)) 1486 return false; 1487 } 1488 return true; 1489 } 1490 1491 static struct mlxsw_sp_nexthop_group * 1492 mlxsw_sp_nexthop_group_find(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) 1493 { 1494 struct mlxsw_sp_nexthop_group *nh_grp; 1495 1496 list_for_each_entry(nh_grp, &mlxsw_sp->router.nexthop_group_list, 1497 list) { 1498 if (mlxsw_sp_nexthop_group_match(nh_grp, fi)) 1499 return nh_grp; 1500 } 1501 return NULL; 1502 } 1503 1504 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, 1505 struct mlxsw_sp_fib_entry *fib_entry, 1506 struct fib_info *fi) 1507 { 1508 struct mlxsw_sp_nexthop_group *nh_grp; 1509 1510 nh_grp = mlxsw_sp_nexthop_group_find(mlxsw_sp, fi); 1511 if (!nh_grp) { 1512 nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi); 1513 if (IS_ERR(nh_grp)) 1514 return PTR_ERR(nh_grp); 1515 } 1516 list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list); 1517 fib_entry->nh_group = nh_grp; 1518 return 0; 1519 } 1520 1521 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, 1522 struct mlxsw_sp_fib_entry *fib_entry) 1523 { 1524 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; 1525 1526 list_del(&fib_entry->nexthop_group_node); 1527 if (!list_empty(&nh_grp->fib_list)) 1528 return; 1529 mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); 1530 } 1531 1532 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, 1533 struct mlxsw_sp_fib_entry *fib_entry, 1534 enum mlxsw_reg_ralue_op op) 1535 { 1536 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1537 u32 *p_dip = (u32 *) fib_entry->key.addr; 1538 struct mlxsw_sp_vr *vr = fib_entry->vr; 1539 enum mlxsw_reg_ralue_trap_action trap_action; 1540 u16 trap_id = 0; 1541 u32 adjacency_index = 0; 1542 u16 ecmp_size = 0; 1543 1544 /* In case the nexthop group adjacency index is valid, use it 1545 * with provided ECMP size. Otherwise, setup trap and pass 1546 * traffic to kernel. 1547 */ 1548 if (fib_entry->nh_group->adj_index_valid) { 1549 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; 1550 adjacency_index = fib_entry->nh_group->adj_index; 1551 ecmp_size = fib_entry->nh_group->ecmp_size; 1552 } else { 1553 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; 1554 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; 1555 } 1556 1557 mlxsw_reg_ralue_pack4(ralue_pl, 1558 (enum mlxsw_reg_ralxx_protocol) vr->proto, op, 1559 vr->id, fib_entry->key.prefix_len, *p_dip); 1560 mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, 1561 adjacency_index, ecmp_size); 1562 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1563 } 1564 1565 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, 1566 struct mlxsw_sp_fib_entry *fib_entry, 1567 enum mlxsw_reg_ralue_op op) 1568 { 1569 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1570 u32 *p_dip = (u32 *) fib_entry->key.addr; 1571 struct mlxsw_sp_vr *vr = fib_entry->vr; 1572 1573 mlxsw_reg_ralue_pack4(ralue_pl, 1574 (enum mlxsw_reg_ralxx_protocol) vr->proto, op, 1575 vr->id, fib_entry->key.prefix_len, *p_dip); 1576 mlxsw_reg_ralue_act_local_pack(ralue_pl, 1577 MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0, 1578 fib_entry->rif); 1579 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1580 } 1581 1582 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, 1583 struct mlxsw_sp_fib_entry *fib_entry, 1584 enum mlxsw_reg_ralue_op op) 1585 { 1586 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1587 u32 *p_dip = (u32 *) fib_entry->key.addr; 1588 struct mlxsw_sp_vr *vr = fib_entry->vr; 1589 1590 mlxsw_reg_ralue_pack4(ralue_pl, 1591 (enum mlxsw_reg_ralxx_protocol) vr->proto, op, 1592 vr->id, fib_entry->key.prefix_len, *p_dip); 1593 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); 1594 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1595 } 1596 1597 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp, 1598 struct mlxsw_sp_fib_entry *fib_entry, 1599 enum mlxsw_reg_ralue_op op) 1600 { 1601 switch (fib_entry->type) { 1602 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: 1603 return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op); 1604 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: 1605 return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op); 1606 case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: 1607 return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op); 1608 } 1609 return -EINVAL; 1610 } 1611 1612 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, 1613 struct mlxsw_sp_fib_entry *fib_entry, 1614 enum mlxsw_reg_ralue_op op) 1615 { 1616 switch (fib_entry->vr->proto) { 1617 case MLXSW_SP_L3_PROTO_IPV4: 1618 return mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); 1619 case MLXSW_SP_L3_PROTO_IPV6: 1620 return -EINVAL; 1621 } 1622 return -EINVAL; 1623 } 1624 1625 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, 1626 struct mlxsw_sp_fib_entry *fib_entry) 1627 { 1628 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, 1629 MLXSW_REG_RALUE_OP_WRITE_WRITE); 1630 } 1631 1632 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp, 1633 struct mlxsw_sp_fib_entry *fib_entry) 1634 { 1635 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, 1636 MLXSW_REG_RALUE_OP_WRITE_DELETE); 1637 } 1638 1639 static int 1640 mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp, 1641 const struct fib_entry_notifier_info *fen_info, 1642 struct mlxsw_sp_fib_entry *fib_entry) 1643 { 1644 struct fib_info *fi = fen_info->fi; 1645 struct mlxsw_sp_rif *r = NULL; 1646 int nhsel; 1647 int err; 1648 1649 if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) { 1650 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; 1651 return 0; 1652 } 1653 if (fen_info->type != RTN_UNICAST) 1654 return -EINVAL; 1655 1656 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { 1657 const struct fib_nh *nh = &fi->fib_nh[nhsel]; 1658 1659 if (!nh->nh_dev) 1660 continue; 1661 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, nh->nh_dev); 1662 if (!r) { 1663 /* In case router interface is not found for 1664 * at least one of the nexthops, that means 1665 * the nexthop points to some device unrelated 1666 * to us. Set trap and pass the packets for 1667 * this prefix to kernel. 1668 */ 1669 break; 1670 } 1671 } 1672 1673 if (!r) { 1674 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; 1675 return 0; 1676 } 1677 1678 if (fi->fib_scope != RT_SCOPE_UNIVERSE) { 1679 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; 1680 fib_entry->rif = r->rif; 1681 } else { 1682 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; 1683 err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi); 1684 if (err) 1685 return err; 1686 } 1687 fib_info_offload_inc(fen_info->fi); 1688 return 0; 1689 } 1690 1691 static void 1692 mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp, 1693 struct mlxsw_sp_fib_entry *fib_entry) 1694 { 1695 if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP) 1696 fib_info_offload_dec(fib_entry->fi); 1697 if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_REMOTE) 1698 mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); 1699 } 1700 1701 static struct mlxsw_sp_fib_entry * 1702 mlxsw_sp_fib_entry_get(struct mlxsw_sp *mlxsw_sp, 1703 const struct fib_entry_notifier_info *fen_info) 1704 { 1705 struct mlxsw_sp_fib_entry *fib_entry; 1706 struct fib_info *fi = fen_info->fi; 1707 struct mlxsw_sp_vr *vr; 1708 int err; 1709 1710 vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id, 1711 MLXSW_SP_L3_PROTO_IPV4); 1712 if (IS_ERR(vr)) 1713 return ERR_CAST(vr); 1714 1715 fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst, 1716 sizeof(fen_info->dst), 1717 fen_info->dst_len, fi->fib_dev); 1718 if (fib_entry) { 1719 /* Already exists, just take a reference */ 1720 fib_entry->ref_count++; 1721 return fib_entry; 1722 } 1723 fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fen_info->dst, 1724 sizeof(fen_info->dst), 1725 fen_info->dst_len, fi->fib_dev); 1726 if (!fib_entry) { 1727 err = -ENOMEM; 1728 goto err_fib_entry_create; 1729 } 1730 fib_entry->vr = vr; 1731 fib_entry->fi = fi; 1732 fib_entry->ref_count = 1; 1733 1734 err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fen_info, fib_entry); 1735 if (err) 1736 goto err_fib4_entry_init; 1737 1738 return fib_entry; 1739 1740 err_fib4_entry_init: 1741 mlxsw_sp_fib_entry_destroy(fib_entry); 1742 err_fib_entry_create: 1743 mlxsw_sp_vr_put(mlxsw_sp, vr); 1744 1745 return ERR_PTR(err); 1746 } 1747 1748 static struct mlxsw_sp_fib_entry * 1749 mlxsw_sp_fib_entry_find(struct mlxsw_sp *mlxsw_sp, 1750 const struct fib_entry_notifier_info *fen_info) 1751 { 1752 struct mlxsw_sp_vr *vr; 1753 1754 vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id, 1755 MLXSW_SP_L3_PROTO_IPV4); 1756 if (!vr) 1757 return NULL; 1758 1759 return mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst, 1760 sizeof(fen_info->dst), 1761 fen_info->dst_len, 1762 fen_info->fi->fib_dev); 1763 } 1764 1765 static void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, 1766 struct mlxsw_sp_fib_entry *fib_entry) 1767 { 1768 struct mlxsw_sp_vr *vr = fib_entry->vr; 1769 1770 if (--fib_entry->ref_count == 0) { 1771 mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); 1772 mlxsw_sp_fib_entry_destroy(fib_entry); 1773 } 1774 mlxsw_sp_vr_put(mlxsw_sp, vr); 1775 } 1776 1777 static void mlxsw_sp_fib_entry_put_all(struct mlxsw_sp *mlxsw_sp, 1778 struct mlxsw_sp_fib_entry *fib_entry) 1779 { 1780 unsigned int last_ref_count; 1781 1782 do { 1783 last_ref_count = fib_entry->ref_count; 1784 mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); 1785 } while (last_ref_count != 1); 1786 } 1787 1788 static int mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, 1789 struct fib_entry_notifier_info *fen_info) 1790 { 1791 struct mlxsw_sp_fib_entry *fib_entry; 1792 struct mlxsw_sp_vr *vr; 1793 int err; 1794 1795 if (mlxsw_sp->router.aborted) 1796 return 0; 1797 1798 fib_entry = mlxsw_sp_fib_entry_get(mlxsw_sp, fen_info); 1799 if (IS_ERR(fib_entry)) { 1800 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB4 entry being added.\n"); 1801 return PTR_ERR(fib_entry); 1802 } 1803 1804 if (fib_entry->ref_count != 1) 1805 return 0; 1806 1807 vr = fib_entry->vr; 1808 err = mlxsw_sp_fib_entry_insert(vr->fib, fib_entry); 1809 if (err) { 1810 dev_warn(mlxsw_sp->bus_info->dev, "Failed to insert FIB4 entry being added.\n"); 1811 goto err_fib_entry_insert; 1812 } 1813 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); 1814 if (err) 1815 goto err_fib_entry_add; 1816 return 0; 1817 1818 err_fib_entry_add: 1819 mlxsw_sp_fib_entry_remove(vr->fib, fib_entry); 1820 err_fib_entry_insert: 1821 mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); 1822 return err; 1823 } 1824 1825 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, 1826 struct fib_entry_notifier_info *fen_info) 1827 { 1828 struct mlxsw_sp_fib_entry *fib_entry; 1829 1830 if (mlxsw_sp->router.aborted) 1831 return; 1832 1833 fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fen_info); 1834 if (!fib_entry) 1835 return; 1836 1837 if (fib_entry->ref_count == 1) { 1838 mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); 1839 mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, fib_entry); 1840 } 1841 1842 mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); 1843 } 1844 1845 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) 1846 { 1847 char ralta_pl[MLXSW_REG_RALTA_LEN]; 1848 char ralst_pl[MLXSW_REG_RALST_LEN]; 1849 char raltb_pl[MLXSW_REG_RALTB_LEN]; 1850 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1851 int err; 1852 1853 mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4, 1854 MLXSW_SP_LPM_TREE_MIN); 1855 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); 1856 if (err) 1857 return err; 1858 1859 mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN); 1860 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); 1861 if (err) 1862 return err; 1863 1864 mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4, 1865 MLXSW_SP_LPM_TREE_MIN); 1866 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); 1867 if (err) 1868 return err; 1869 1870 mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4, 1871 MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0); 1872 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); 1873 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1874 } 1875 1876 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) 1877 { 1878 struct mlxsw_sp_fib_entry *fib_entry; 1879 struct mlxsw_sp_fib_entry *tmp; 1880 struct mlxsw_sp_vr *vr; 1881 int i; 1882 1883 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { 1884 vr = &mlxsw_sp->router.vrs[i]; 1885 1886 if (!vr->used) 1887 continue; 1888 1889 list_for_each_entry_safe(fib_entry, tmp, 1890 &vr->fib->entry_list, list) { 1891 bool do_break = &tmp->list == &vr->fib->entry_list; 1892 1893 mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); 1894 mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, 1895 fib_entry); 1896 mlxsw_sp_fib_entry_put_all(mlxsw_sp, fib_entry); 1897 if (do_break) 1898 break; 1899 } 1900 } 1901 } 1902 1903 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) 1904 { 1905 int err; 1906 1907 if (mlxsw_sp->router.aborted) 1908 return; 1909 dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n"); 1910 mlxsw_sp_router_fib_flush(mlxsw_sp); 1911 mlxsw_sp->router.aborted = true; 1912 err = mlxsw_sp_router_set_abort_trap(mlxsw_sp); 1913 if (err) 1914 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); 1915 } 1916 1917 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) 1918 { 1919 char rgcr_pl[MLXSW_REG_RGCR_LEN]; 1920 u64 max_rifs; 1921 int err; 1922 1923 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS)) 1924 return -EIO; 1925 1926 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); 1927 mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *), 1928 GFP_KERNEL); 1929 if (!mlxsw_sp->rifs) 1930 return -ENOMEM; 1931 1932 mlxsw_reg_rgcr_pack(rgcr_pl, true); 1933 mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); 1934 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); 1935 if (err) 1936 goto err_rgcr_fail; 1937 1938 return 0; 1939 1940 err_rgcr_fail: 1941 kfree(mlxsw_sp->rifs); 1942 return err; 1943 } 1944 1945 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) 1946 { 1947 char rgcr_pl[MLXSW_REG_RGCR_LEN]; 1948 int i; 1949 1950 mlxsw_reg_rgcr_pack(rgcr_pl, false); 1951 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); 1952 1953 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) 1954 WARN_ON_ONCE(mlxsw_sp->rifs[i]); 1955 1956 kfree(mlxsw_sp->rifs); 1957 } 1958 1959 struct mlxsw_sp_fib_event_work { 1960 struct delayed_work dw; 1961 struct fib_entry_notifier_info fen_info; 1962 struct mlxsw_sp *mlxsw_sp; 1963 unsigned long event; 1964 }; 1965 1966 static void mlxsw_sp_router_fib_event_work(struct work_struct *work) 1967 { 1968 struct mlxsw_sp_fib_event_work *fib_work = 1969 container_of(work, struct mlxsw_sp_fib_event_work, dw.work); 1970 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; 1971 int err; 1972 1973 /* Protect internal structures from changes */ 1974 rtnl_lock(); 1975 switch (fib_work->event) { 1976 case FIB_EVENT_ENTRY_ADD: 1977 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info); 1978 if (err) 1979 mlxsw_sp_router_fib4_abort(mlxsw_sp); 1980 fib_info_put(fib_work->fen_info.fi); 1981 break; 1982 case FIB_EVENT_ENTRY_DEL: 1983 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info); 1984 fib_info_put(fib_work->fen_info.fi); 1985 break; 1986 case FIB_EVENT_RULE_ADD: /* fall through */ 1987 case FIB_EVENT_RULE_DEL: 1988 mlxsw_sp_router_fib4_abort(mlxsw_sp); 1989 break; 1990 } 1991 rtnl_unlock(); 1992 kfree(fib_work); 1993 } 1994 1995 /* Called with rcu_read_lock() */ 1996 static int mlxsw_sp_router_fib_event(struct notifier_block *nb, 1997 unsigned long event, void *ptr) 1998 { 1999 struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); 2000 struct mlxsw_sp_fib_event_work *fib_work; 2001 struct fib_notifier_info *info = ptr; 2002 2003 if (!net_eq(info->net, &init_net)) 2004 return NOTIFY_DONE; 2005 2006 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); 2007 if (WARN_ON(!fib_work)) 2008 return NOTIFY_BAD; 2009 2010 INIT_DELAYED_WORK(&fib_work->dw, mlxsw_sp_router_fib_event_work); 2011 fib_work->mlxsw_sp = mlxsw_sp; 2012 fib_work->event = event; 2013 2014 switch (event) { 2015 case FIB_EVENT_ENTRY_ADD: /* fall through */ 2016 case FIB_EVENT_ENTRY_DEL: 2017 memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info)); 2018 /* Take referece on fib_info to prevent it from being 2019 * freed while work is queued. Release it afterwards. 2020 */ 2021 fib_info_hold(fib_work->fen_info.fi); 2022 break; 2023 } 2024 2025 mlxsw_core_schedule_odw(&fib_work->dw, 0); 2026 2027 return NOTIFY_DONE; 2028 } 2029 2030 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) 2031 { 2032 struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); 2033 2034 /* Flush pending FIB notifications and then flush the device's 2035 * table before requesting another dump. The FIB notification 2036 * block is unregistered, so no need to take RTNL. 2037 */ 2038 mlxsw_core_flush_owq(); 2039 mlxsw_sp_router_fib_flush(mlxsw_sp); 2040 } 2041 2042 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) 2043 { 2044 int err; 2045 2046 INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list); 2047 INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list); 2048 err = __mlxsw_sp_router_init(mlxsw_sp); 2049 if (err) 2050 return err; 2051 2052 mlxsw_sp_lpm_init(mlxsw_sp); 2053 err = mlxsw_sp_vrs_init(mlxsw_sp); 2054 if (err) 2055 goto err_vrs_init; 2056 2057 err = mlxsw_sp_neigh_init(mlxsw_sp); 2058 if (err) 2059 goto err_neigh_init; 2060 2061 mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event; 2062 err = register_fib_notifier(&mlxsw_sp->fib_nb, 2063 mlxsw_sp_router_fib_dump_flush); 2064 if (err) 2065 goto err_register_fib_notifier; 2066 2067 return 0; 2068 2069 err_register_fib_notifier: 2070 mlxsw_sp_neigh_fini(mlxsw_sp); 2071 err_neigh_init: 2072 mlxsw_sp_vrs_fini(mlxsw_sp); 2073 err_vrs_init: 2074 __mlxsw_sp_router_fini(mlxsw_sp); 2075 return err; 2076 } 2077 2078 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) 2079 { 2080 unregister_fib_notifier(&mlxsw_sp->fib_nb); 2081 mlxsw_sp_neigh_fini(mlxsw_sp); 2082 mlxsw_sp_vrs_fini(mlxsw_sp); 2083 __mlxsw_sp_router_fini(mlxsw_sp); 2084 } 2085