1 // SPDX-License-Identifier: GPL-2.0-only 2 /**************************************************************************** 3 * Driver for Solarflare network controllers and boards 4 * Copyright 2023, Advanced Micro Devices, Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published 8 * by the Free Software Foundation, incorporated herein by reference. 9 */ 10 11 #include "tc_encap_actions.h" 12 #include "tc.h" 13 #include "mae.h" 14 #include <net/flow.h> 15 #include <net/inet_dscp.h> 16 #include <net/vxlan.h> 17 #include <net/geneve.h> 18 #include <net/netevent.h> 19 #include <net/arp.h> 20 21 static const struct rhashtable_params efx_neigh_ht_params = { 22 .key_len = offsetof(struct efx_neigh_binder, ha), 23 .key_offset = 0, 24 .head_offset = offsetof(struct efx_neigh_binder, linkage), 25 }; 26 27 static const struct rhashtable_params efx_tc_encap_ht_params = { 28 .key_len = offsetofend(struct efx_tc_encap_action, key), 29 .key_offset = 0, 30 .head_offset = offsetof(struct efx_tc_encap_action, linkage), 31 }; 32 33 static void efx_tc_encap_free(void *ptr, void *__unused) 34 { 35 struct efx_tc_encap_action *enc = ptr; 36 37 WARN_ON(refcount_read(&enc->ref)); 38 kfree(enc); 39 } 40 41 static void efx_neigh_free(void *ptr, void *__unused) 42 { 43 struct efx_neigh_binder *neigh = ptr; 44 45 WARN_ON(refcount_read(&neigh->ref)); 46 WARN_ON(!list_empty(&neigh->users)); 47 put_net_track(neigh->net, &neigh->ns_tracker); 48 netdev_put(neigh->egdev, &neigh->dev_tracker); 49 kfree(neigh); 50 } 51 52 int efx_tc_init_encap_actions(struct efx_nic *efx) 53 { 54 int rc; 55 56 rc = rhashtable_init(&efx->tc->neigh_ht, &efx_neigh_ht_params); 57 if (rc < 0) 58 goto fail_neigh_ht; 59 rc = rhashtable_init(&efx->tc->encap_ht, &efx_tc_encap_ht_params); 60 if (rc < 0) 61 goto fail_encap_ht; 62 return 0; 63 fail_encap_ht: 64 rhashtable_destroy(&efx->tc->neigh_ht); 65 fail_neigh_ht: 66 return rc; 67 } 68 69 /* Only call this in init failure teardown. 70 * Normal exit should fini instead as there may be entries in the table. 71 */ 72 void efx_tc_destroy_encap_actions(struct efx_nic *efx) 73 { 74 rhashtable_destroy(&efx->tc->encap_ht); 75 rhashtable_destroy(&efx->tc->neigh_ht); 76 } 77 78 void efx_tc_fini_encap_actions(struct efx_nic *efx) 79 { 80 rhashtable_free_and_destroy(&efx->tc->encap_ht, efx_tc_encap_free, NULL); 81 rhashtable_free_and_destroy(&efx->tc->neigh_ht, efx_neigh_free, NULL); 82 } 83 84 static void efx_neigh_update(struct work_struct *work); 85 86 static int efx_bind_neigh(struct efx_nic *efx, 87 struct efx_tc_encap_action *encap, struct net *net, 88 struct netlink_ext_ack *extack) 89 { 90 struct efx_neigh_binder *neigh, *old; 91 struct flowi6 flow6 = {}; 92 struct flowi4 flow4 = {}; 93 int rc; 94 95 /* GCC stupidly thinks that only values explicitly listed in the enum 96 * definition can _possibly_ be sensible case values, so without this 97 * cast it complains about the IPv6 versions. 98 */ 99 switch ((int)encap->type) { 100 case EFX_ENCAP_TYPE_VXLAN: 101 case EFX_ENCAP_TYPE_GENEVE: 102 flow4.flowi4_proto = IPPROTO_UDP; 103 flow4.fl4_dport = encap->key.tp_dst; 104 flow4.flowi4_dscp = inet_dsfield_to_dscp(encap->key.tos); 105 flow4.daddr = encap->key.u.ipv4.dst; 106 flow4.saddr = encap->key.u.ipv4.src; 107 break; 108 case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6: 109 case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6: 110 flow6.flowi6_proto = IPPROTO_UDP; 111 flow6.fl6_dport = encap->key.tp_dst; 112 flow6.flowlabel = ip6_make_flowinfo(encap->key.tos, 113 encap->key.label); 114 flow6.daddr = encap->key.u.ipv6.dst; 115 flow6.saddr = encap->key.u.ipv6.src; 116 break; 117 default: 118 NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported encap type %d", 119 (int)encap->type); 120 return -EOPNOTSUPP; 121 } 122 123 neigh = kzalloc(sizeof(*neigh), GFP_KERNEL_ACCOUNT); 124 if (!neigh) 125 return -ENOMEM; 126 neigh->net = get_net_track(net, &neigh->ns_tracker, GFP_KERNEL_ACCOUNT); 127 neigh->dst_ip = flow4.daddr; 128 neigh->dst_ip6 = flow6.daddr; 129 130 old = rhashtable_lookup_get_insert_fast(&efx->tc->neigh_ht, 131 &neigh->linkage, 132 efx_neigh_ht_params); 133 if (old) { 134 /* don't need our new entry */ 135 put_net_track(neigh->net, &neigh->ns_tracker); 136 kfree(neigh); 137 if (IS_ERR(old)) /* oh dear, it's actually an error */ 138 return PTR_ERR(old); 139 if (!refcount_inc_not_zero(&old->ref)) 140 return -EAGAIN; 141 /* existing entry found, ref taken */ 142 neigh = old; 143 } else { 144 /* New entry. We need to initiate a lookup */ 145 struct neighbour *n; 146 struct rtable *rt; 147 148 if (encap->type & EFX_ENCAP_FLAG_IPV6) { 149 #if IS_ENABLED(CONFIG_IPV6) 150 struct dst_entry *dst; 151 152 dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow6, 153 NULL); 154 rc = PTR_ERR_OR_ZERO(dst); 155 if (rc) { 156 NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for IPv6 encap"); 157 goto out_free; 158 } 159 neigh->egdev = dst->dev; 160 netdev_hold(neigh->egdev, &neigh->dev_tracker, 161 GFP_KERNEL_ACCOUNT); 162 neigh->ttl = ip6_dst_hoplimit(dst); 163 n = dst_neigh_lookup(dst, &flow6.daddr); 164 dst_release(dst); 165 #else 166 /* We shouldn't ever get here, because if IPv6 isn't 167 * enabled how did someone create an IPv6 tunnel_key? 168 */ 169 rc = -EOPNOTSUPP; 170 NL_SET_ERR_MSG_MOD(extack, "No IPv6 support (neigh bind)"); 171 goto out_free; 172 #endif 173 } else { 174 rt = ip_route_output_key(net, &flow4); 175 if (IS_ERR_OR_NULL(rt)) { 176 rc = PTR_ERR_OR_ZERO(rt); 177 if (!rc) 178 rc = -EIO; 179 NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for encap"); 180 goto out_free; 181 } 182 neigh->egdev = rt->dst.dev; 183 netdev_hold(neigh->egdev, &neigh->dev_tracker, 184 GFP_KERNEL_ACCOUNT); 185 neigh->ttl = ip4_dst_hoplimit(&rt->dst); 186 n = dst_neigh_lookup(&rt->dst, &flow4.daddr); 187 ip_rt_put(rt); 188 } 189 if (!n) { 190 rc = -ENETUNREACH; 191 NL_SET_ERR_MSG_MOD(extack, "Failed to lookup neighbour for encap"); 192 netdev_put(neigh->egdev, &neigh->dev_tracker); 193 goto out_free; 194 } 195 refcount_set(&neigh->ref, 1); 196 INIT_LIST_HEAD(&neigh->users); 197 read_lock_bh(&n->lock); 198 ether_addr_copy(neigh->ha, n->ha); 199 neigh->n_valid = n->nud_state & NUD_VALID; 200 read_unlock_bh(&n->lock); 201 rwlock_init(&neigh->lock); 202 INIT_WORK(&neigh->work, efx_neigh_update); 203 neigh->efx = efx; 204 neigh->used = jiffies; 205 if (!neigh->n_valid) 206 /* Prod ARP to find us a neighbour */ 207 neigh_event_send(n, NULL); 208 neigh_release(n); 209 } 210 /* Add us to this neigh */ 211 encap->neigh = neigh; 212 list_add_tail(&encap->list, &neigh->users); 213 return 0; 214 215 out_free: 216 /* cleanup common to several error paths */ 217 rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage, 218 efx_neigh_ht_params); 219 synchronize_rcu(); 220 put_net_track(net, &neigh->ns_tracker); 221 kfree(neigh); 222 return rc; 223 } 224 225 static void efx_free_neigh(struct efx_neigh_binder *neigh) 226 { 227 struct efx_nic *efx = neigh->efx; 228 229 rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage, 230 efx_neigh_ht_params); 231 synchronize_rcu(); 232 netdev_put(neigh->egdev, &neigh->dev_tracker); 233 put_net_track(neigh->net, &neigh->ns_tracker); 234 kfree(neigh); 235 } 236 237 static void efx_release_neigh(struct efx_nic *efx, 238 struct efx_tc_encap_action *encap) 239 { 240 struct efx_neigh_binder *neigh = encap->neigh; 241 242 if (!neigh) 243 return; 244 list_del(&encap->list); 245 encap->neigh = NULL; 246 if (!refcount_dec_and_test(&neigh->ref)) 247 return; /* still in use */ 248 efx_free_neigh(neigh); 249 } 250 251 static void efx_gen_tun_header_eth(struct efx_tc_encap_action *encap, u16 proto) 252 { 253 struct efx_neigh_binder *neigh = encap->neigh; 254 struct ethhdr *eth; 255 256 encap->encap_hdr_len = sizeof(*eth); 257 eth = (struct ethhdr *)encap->encap_hdr; 258 259 if (encap->neigh->n_valid) 260 ether_addr_copy(eth->h_dest, neigh->ha); 261 else 262 eth_zero_addr(eth->h_dest); 263 ether_addr_copy(eth->h_source, neigh->egdev->dev_addr); 264 eth->h_proto = htons(proto); 265 } 266 267 static void efx_gen_tun_header_ipv4(struct efx_tc_encap_action *encap, u8 ipproto, u8 len) 268 { 269 struct efx_neigh_binder *neigh = encap->neigh; 270 struct ip_tunnel_key *key = &encap->key; 271 struct iphdr *ip; 272 273 ip = (struct iphdr *)(encap->encap_hdr + encap->encap_hdr_len); 274 encap->encap_hdr_len += sizeof(*ip); 275 276 ip->daddr = key->u.ipv4.dst; 277 ip->saddr = key->u.ipv4.src; 278 ip->ttl = neigh->ttl; 279 ip->protocol = ipproto; 280 ip->version = 0x4; 281 ip->ihl = 0x5; 282 ip->tot_len = cpu_to_be16(ip->ihl * 4 + len); 283 ip_send_check(ip); 284 } 285 286 #ifdef CONFIG_IPV6 287 static void efx_gen_tun_header_ipv6(struct efx_tc_encap_action *encap, u8 ipproto, u8 len) 288 { 289 struct efx_neigh_binder *neigh = encap->neigh; 290 struct ip_tunnel_key *key = &encap->key; 291 struct ipv6hdr *ip; 292 293 ip = (struct ipv6hdr *)(encap->encap_hdr + encap->encap_hdr_len); 294 encap->encap_hdr_len += sizeof(*ip); 295 296 ip6_flow_hdr(ip, key->tos, key->label); 297 ip->daddr = key->u.ipv6.dst; 298 ip->saddr = key->u.ipv6.src; 299 ip->hop_limit = neigh->ttl; 300 ip->nexthdr = ipproto; 301 ip->version = 0x6; 302 ip->payload_len = cpu_to_be16(len); 303 } 304 #endif 305 306 static void efx_gen_tun_header_udp(struct efx_tc_encap_action *encap, u8 len) 307 { 308 struct ip_tunnel_key *key = &encap->key; 309 struct udphdr *udp; 310 311 udp = (struct udphdr *)(encap->encap_hdr + encap->encap_hdr_len); 312 encap->encap_hdr_len += sizeof(*udp); 313 314 udp->dest = key->tp_dst; 315 udp->len = cpu_to_be16(sizeof(*udp) + len); 316 } 317 318 static void efx_gen_tun_header_vxlan(struct efx_tc_encap_action *encap) 319 { 320 struct ip_tunnel_key *key = &encap->key; 321 struct vxlanhdr *vxlan; 322 323 vxlan = (struct vxlanhdr *)(encap->encap_hdr + encap->encap_hdr_len); 324 encap->encap_hdr_len += sizeof(*vxlan); 325 326 vxlan->vx_flags = VXLAN_HF_VNI; 327 vxlan->vx_vni = vxlan_vni_field(tunnel_id_to_key32(key->tun_id)); 328 } 329 330 static void efx_gen_tun_header_geneve(struct efx_tc_encap_action *encap) 331 { 332 struct ip_tunnel_key *key = &encap->key; 333 struct genevehdr *geneve; 334 u32 vni; 335 336 geneve = (struct genevehdr *)(encap->encap_hdr + encap->encap_hdr_len); 337 encap->encap_hdr_len += sizeof(*geneve); 338 339 geneve->proto_type = htons(ETH_P_TEB); 340 /* convert tun_id to host-endian so we can use host arithmetic to 341 * extract individual bytes. 342 */ 343 vni = ntohl(tunnel_id_to_key32(key->tun_id)); 344 geneve->vni[0] = vni >> 16; 345 geneve->vni[1] = vni >> 8; 346 geneve->vni[2] = vni; 347 } 348 349 #define vxlan_header_l4_len (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) 350 #define vxlan4_header_len (sizeof(struct ethhdr) + sizeof(struct iphdr) + vxlan_header_l4_len) 351 static void efx_gen_vxlan_header_ipv4(struct efx_tc_encap_action *encap) 352 { 353 BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan4_header_len); 354 efx_gen_tun_header_eth(encap, ETH_P_IP); 355 efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, vxlan_header_l4_len); 356 efx_gen_tun_header_udp(encap, sizeof(struct vxlanhdr)); 357 efx_gen_tun_header_vxlan(encap); 358 } 359 360 #define geneve_header_l4_len (sizeof(struct udphdr) + sizeof(struct genevehdr)) 361 #define geneve4_header_len (sizeof(struct ethhdr) + sizeof(struct iphdr) + geneve_header_l4_len) 362 static void efx_gen_geneve_header_ipv4(struct efx_tc_encap_action *encap) 363 { 364 BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve4_header_len); 365 efx_gen_tun_header_eth(encap, ETH_P_IP); 366 efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, geneve_header_l4_len); 367 efx_gen_tun_header_udp(encap, sizeof(struct genevehdr)); 368 efx_gen_tun_header_geneve(encap); 369 } 370 371 #ifdef CONFIG_IPV6 372 #define vxlan6_header_len (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + vxlan_header_l4_len) 373 static void efx_gen_vxlan_header_ipv6(struct efx_tc_encap_action *encap) 374 { 375 BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan6_header_len); 376 efx_gen_tun_header_eth(encap, ETH_P_IPV6); 377 efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, vxlan_header_l4_len); 378 efx_gen_tun_header_udp(encap, sizeof(struct vxlanhdr)); 379 efx_gen_tun_header_vxlan(encap); 380 } 381 382 #define geneve6_header_len (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + geneve_header_l4_len) 383 static void efx_gen_geneve_header_ipv6(struct efx_tc_encap_action *encap) 384 { 385 BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve6_header_len); 386 efx_gen_tun_header_eth(encap, ETH_P_IPV6); 387 efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, geneve_header_l4_len); 388 efx_gen_tun_header_udp(encap, sizeof(struct genevehdr)); 389 efx_gen_tun_header_geneve(encap); 390 } 391 #endif 392 393 static void efx_gen_encap_header(struct efx_nic *efx, 394 struct efx_tc_encap_action *encap) 395 { 396 encap->n_valid = encap->neigh->n_valid; 397 398 /* GCC stupidly thinks that only values explicitly listed in the enum 399 * definition can _possibly_ be sensible case values, so without this 400 * cast it complains about the IPv6 versions. 401 */ 402 switch ((int)encap->type) { 403 case EFX_ENCAP_TYPE_VXLAN: 404 efx_gen_vxlan_header_ipv4(encap); 405 break; 406 case EFX_ENCAP_TYPE_GENEVE: 407 efx_gen_geneve_header_ipv4(encap); 408 break; 409 #ifdef CONFIG_IPV6 410 case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6: 411 efx_gen_vxlan_header_ipv6(encap); 412 break; 413 case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6: 414 efx_gen_geneve_header_ipv6(encap); 415 break; 416 #endif 417 default: 418 /* unhandled encap type, can't happen */ 419 if (net_ratelimit()) 420 netif_err(efx, drv, efx->net_dev, 421 "Bogus encap type %d, can't generate\n", 422 encap->type); 423 424 /* Use fallback action. */ 425 encap->n_valid = false; 426 break; 427 } 428 } 429 430 static void efx_tc_update_encap(struct efx_nic *efx, 431 struct efx_tc_encap_action *encap) 432 { 433 struct efx_tc_action_set_list *acts, *fallback; 434 struct efx_tc_flow_rule *rule; 435 struct efx_tc_action_set *act; 436 int rc; 437 438 if (encap->n_valid) { 439 /* Make sure no rules are using this encap while we change it */ 440 list_for_each_entry(act, &encap->users, encap_user) { 441 acts = act->user; 442 if (WARN_ON(!acts)) /* can't happen */ 443 continue; 444 rule = container_of(acts, struct efx_tc_flow_rule, acts); 445 if (rule->fallback) 446 fallback = rule->fallback; 447 else /* fallback of the fallback: deliver to PF */ 448 fallback = &efx->tc->facts.pf; 449 rc = efx_mae_update_rule(efx, fallback->fw_id, 450 rule->fw_id); 451 if (rc) 452 netif_err(efx, drv, efx->net_dev, 453 "Failed to update (f) rule %08x rc %d\n", 454 rule->fw_id, rc); 455 else 456 netif_dbg(efx, drv, efx->net_dev, "Updated (f) rule %08x\n", 457 rule->fw_id); 458 } 459 } 460 461 /* Make sure we don't leak arbitrary bytes on the wire; 462 * set an all-0s ethernet header. A successful call to 463 * efx_gen_encap_header() will overwrite this. 464 */ 465 memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr)); 466 encap->encap_hdr_len = ETH_HLEN; 467 468 if (encap->neigh) { 469 read_lock_bh(&encap->neigh->lock); 470 efx_gen_encap_header(efx, encap); 471 read_unlock_bh(&encap->neigh->lock); 472 } else { 473 encap->n_valid = false; 474 } 475 476 rc = efx_mae_update_encap_md(efx, encap); 477 if (rc) { 478 netif_err(efx, drv, efx->net_dev, 479 "Failed to update encap hdr %08x rc %d\n", 480 encap->fw_id, rc); 481 return; 482 } 483 netif_dbg(efx, drv, efx->net_dev, "Updated encap hdr %08x\n", 484 encap->fw_id); 485 if (!encap->n_valid) 486 return; 487 /* Update rule users: use the action if they are now ready */ 488 list_for_each_entry(act, &encap->users, encap_user) { 489 acts = act->user; 490 if (WARN_ON(!acts)) /* can't happen */ 491 continue; 492 rule = container_of(acts, struct efx_tc_flow_rule, acts); 493 if (!efx_tc_check_ready(efx, rule)) 494 continue; 495 rc = efx_mae_update_rule(efx, acts->fw_id, rule->fw_id); 496 if (rc) 497 netif_err(efx, drv, efx->net_dev, 498 "Failed to update rule %08x rc %d\n", 499 rule->fw_id, rc); 500 else 501 netif_dbg(efx, drv, efx->net_dev, "Updated rule %08x\n", 502 rule->fw_id); 503 } 504 } 505 506 static void efx_neigh_update(struct work_struct *work) 507 { 508 struct efx_neigh_binder *neigh = container_of(work, struct efx_neigh_binder, work); 509 struct efx_tc_encap_action *encap; 510 struct efx_nic *efx = neigh->efx; 511 512 mutex_lock(&efx->tc->mutex); 513 list_for_each_entry(encap, &neigh->users, list) 514 efx_tc_update_encap(neigh->efx, encap); 515 /* release ref taken in efx_neigh_event() */ 516 if (refcount_dec_and_test(&neigh->ref)) 517 efx_free_neigh(neigh); 518 mutex_unlock(&efx->tc->mutex); 519 } 520 521 static int efx_neigh_event(struct efx_nic *efx, struct neighbour *n) 522 { 523 struct efx_neigh_binder keys = {NULL}, *neigh; 524 bool n_valid, ipv6 = false; 525 char ha[ETH_ALEN]; 526 size_t keysize; 527 528 if (WARN_ON(!efx->tc)) 529 return NOTIFY_DONE; 530 531 if (n->tbl == &arp_tbl) { 532 keysize = sizeof(keys.dst_ip); 533 #if IS_ENABLED(CONFIG_IPV6) 534 } else if (n->tbl == ipv6_stub->nd_tbl) { 535 ipv6 = true; 536 keysize = sizeof(keys.dst_ip6); 537 #endif 538 } else { 539 return NOTIFY_DONE; 540 } 541 if (!n->parms) { 542 netif_warn(efx, drv, efx->net_dev, "neigh_event with no parms!\n"); 543 return NOTIFY_DONE; 544 } 545 keys.net = read_pnet(&n->parms->net); 546 if (n->tbl->key_len != keysize) { 547 netif_warn(efx, drv, efx->net_dev, "neigh_event with bad key_len %u\n", 548 n->tbl->key_len); 549 return NOTIFY_DONE; 550 } 551 read_lock_bh(&n->lock); /* Get a consistent view */ 552 memcpy(ha, n->ha, ETH_ALEN); 553 n_valid = (n->nud_state & NUD_VALID) && !n->dead; 554 read_unlock_bh(&n->lock); 555 if (ipv6) 556 memcpy(&keys.dst_ip6, n->primary_key, n->tbl->key_len); 557 else 558 memcpy(&keys.dst_ip, n->primary_key, n->tbl->key_len); 559 rcu_read_lock(); 560 neigh = rhashtable_lookup_fast(&efx->tc->neigh_ht, &keys, 561 efx_neigh_ht_params); 562 if (!neigh || neigh->dying) 563 /* We're not interested in this neighbour */ 564 goto done; 565 write_lock_bh(&neigh->lock); 566 if (n_valid == neigh->n_valid && !memcmp(ha, neigh->ha, ETH_ALEN)) { 567 write_unlock_bh(&neigh->lock); 568 /* Nothing has changed; no work to do */ 569 goto done; 570 } 571 neigh->n_valid = n_valid; 572 memcpy(neigh->ha, ha, ETH_ALEN); 573 write_unlock_bh(&neigh->lock); 574 if (refcount_inc_not_zero(&neigh->ref)) { 575 rcu_read_unlock(); 576 if (!schedule_work(&neigh->work)) 577 /* failed to schedule, release the ref we just took */ 578 if (refcount_dec_and_test(&neigh->ref)) 579 efx_free_neigh(neigh); 580 } else { 581 done: 582 rcu_read_unlock(); 583 } 584 return NOTIFY_DONE; 585 } 586 587 bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule) 588 { 589 struct efx_tc_action_set *act; 590 591 /* Encap actions can only be offloaded if they have valid 592 * neighbour info for the outer Ethernet header. 593 */ 594 list_for_each_entry(act, &rule->acts.list, list) 595 if (act->encap_md && !act->encap_md->n_valid) 596 return false; 597 return true; 598 } 599 600 struct efx_tc_encap_action *efx_tc_flower_create_encap_md( 601 struct efx_nic *efx, const struct ip_tunnel_info *info, 602 struct net_device *egdev, struct netlink_ext_ack *extack) 603 { 604 enum efx_encap_type type = efx_tc_indr_netdev_type(egdev); 605 struct efx_tc_encap_action *encap, *old; 606 struct efx_rep *to_efv; 607 s64 rc; 608 609 if (type == EFX_ENCAP_TYPE_NONE) { 610 /* dest is not an encap device */ 611 NL_SET_ERR_MSG_MOD(extack, "Not a (supported) tunnel device but tunnel_key is set"); 612 return ERR_PTR(-EOPNOTSUPP); 613 } 614 rc = efx_mae_check_encap_type_supported(efx, type); 615 if (rc < 0) { 616 NL_SET_ERR_MSG_MOD(extack, "Firmware reports no support for this tunnel type"); 617 return ERR_PTR(rc); 618 } 619 /* No support yet for Geneve options */ 620 if (info->options_len) { 621 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel options"); 622 return ERR_PTR(-EOPNOTSUPP); 623 } 624 switch (info->mode) { 625 case IP_TUNNEL_INFO_TX: 626 break; 627 case IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6: 628 type |= EFX_ENCAP_FLAG_IPV6; 629 break; 630 default: 631 NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported tunnel mode %u", 632 info->mode); 633 return ERR_PTR(-EOPNOTSUPP); 634 } 635 encap = kzalloc(sizeof(*encap), GFP_KERNEL_ACCOUNT); 636 if (!encap) 637 return ERR_PTR(-ENOMEM); 638 encap->type = type; 639 encap->key = info->key; 640 INIT_LIST_HEAD(&encap->users); 641 old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_ht, 642 &encap->linkage, 643 efx_tc_encap_ht_params); 644 if (old) { 645 /* don't need our new entry */ 646 kfree(encap); 647 if (IS_ERR(old)) /* oh dear, it's actually an error */ 648 return ERR_CAST(old); 649 if (!refcount_inc_not_zero(&old->ref)) 650 return ERR_PTR(-EAGAIN); 651 /* existing entry found, ref taken */ 652 return old; 653 } 654 655 rc = efx_bind_neigh(efx, encap, dev_net(egdev), extack); 656 if (rc < 0) 657 goto out_remove; 658 to_efv = efx_tc_flower_lookup_efv(efx, encap->neigh->egdev); 659 if (IS_ERR(to_efv)) { 660 /* neigh->egdev isn't ours */ 661 NL_SET_ERR_MSG_MOD(extack, "Tunnel egress device not on switch"); 662 rc = PTR_ERR(to_efv); 663 goto out_release; 664 } 665 rc = efx_tc_flower_external_mport(efx, to_efv); 666 if (rc < 0) { 667 NL_SET_ERR_MSG_MOD(extack, "Failed to identify tunnel egress m-port"); 668 goto out_release; 669 } 670 encap->dest_mport = rc; 671 read_lock_bh(&encap->neigh->lock); 672 efx_gen_encap_header(efx, encap); 673 read_unlock_bh(&encap->neigh->lock); 674 675 rc = efx_mae_allocate_encap_md(efx, encap); 676 if (rc < 0) { 677 NL_SET_ERR_MSG_MOD(extack, "Failed to write tunnel header to hw"); 678 goto out_release; 679 } 680 681 /* ref and return */ 682 refcount_set(&encap->ref, 1); 683 return encap; 684 out_release: 685 efx_release_neigh(efx, encap); 686 out_remove: 687 rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage, 688 efx_tc_encap_ht_params); 689 kfree(encap); 690 return ERR_PTR(rc); 691 } 692 693 void efx_tc_flower_release_encap_md(struct efx_nic *efx, 694 struct efx_tc_encap_action *encap) 695 { 696 if (!refcount_dec_and_test(&encap->ref)) 697 return; /* still in use */ 698 efx_release_neigh(efx, encap); 699 rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage, 700 efx_tc_encap_ht_params); 701 efx_mae_free_encap_md(efx, encap); 702 kfree(encap); 703 } 704 705 static void efx_tc_remove_neigh_users(struct efx_nic *efx, struct efx_neigh_binder *neigh) 706 { 707 struct efx_tc_encap_action *encap, *next; 708 709 list_for_each_entry_safe(encap, next, &neigh->users, list) { 710 /* Should cause neigh usage count to fall to zero, freeing it */ 711 efx_release_neigh(efx, encap); 712 /* The encap has lost its neigh, so it's now unready */ 713 efx_tc_update_encap(efx, encap); 714 } 715 } 716 717 void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev) 718 { 719 struct efx_neigh_binder *neigh; 720 struct rhashtable_iter walk; 721 722 mutex_lock(&efx->tc->mutex); 723 rhashtable_walk_enter(&efx->tc->neigh_ht, &walk); 724 rhashtable_walk_start(&walk); 725 while ((neigh = rhashtable_walk_next(&walk)) != NULL) { 726 if (IS_ERR(neigh)) 727 continue; 728 if (neigh->egdev != net_dev) 729 continue; 730 neigh->dying = true; 731 rhashtable_walk_stop(&walk); 732 synchronize_rcu(); /* Make sure any updates see dying flag */ 733 efx_tc_remove_neigh_users(efx, neigh); /* might sleep */ 734 rhashtable_walk_start(&walk); 735 } 736 rhashtable_walk_stop(&walk); 737 rhashtable_walk_exit(&walk); 738 mutex_unlock(&efx->tc->mutex); 739 } 740 741 int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event, 742 void *ptr) 743 { 744 if (efx->type->is_vf) 745 return NOTIFY_DONE; 746 747 switch (event) { 748 case NETEVENT_NEIGH_UPDATE: 749 return efx_neigh_event(efx, ptr); 750 default: 751 return NOTIFY_DONE; 752 } 753 } 754