xref: /linux/drivers/net/ethernet/sfc/tc_encap_actions.c (revision 07fdad3a93756b872da7b53647715c48d0f4a2d0)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3  * Driver for Solarflare network controllers and boards
4  * Copyright 2023, Advanced Micro Devices, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published
8  * by the Free Software Foundation, incorporated herein by reference.
9  */
10 
11 #include "tc_encap_actions.h"
12 #include "tc.h"
13 #include "mae.h"
14 #include <net/flow.h>
15 #include <net/inet_dscp.h>
16 #include <net/vxlan.h>
17 #include <net/geneve.h>
18 #include <net/netevent.h>
19 #include <net/arp.h>
20 
21 static const struct rhashtable_params efx_neigh_ht_params = {
22 	.key_len	= offsetof(struct efx_neigh_binder, ha),
23 	.key_offset	= 0,
24 	.head_offset	= offsetof(struct efx_neigh_binder, linkage),
25 };
26 
27 static const struct rhashtable_params efx_tc_encap_ht_params = {
28 	.key_len	= offsetofend(struct efx_tc_encap_action, key),
29 	.key_offset	= 0,
30 	.head_offset	= offsetof(struct efx_tc_encap_action, linkage),
31 };
32 
33 static void efx_tc_encap_free(void *ptr, void *__unused)
34 {
35 	struct efx_tc_encap_action *enc = ptr;
36 
37 	WARN_ON(refcount_read(&enc->ref));
38 	kfree(enc);
39 }
40 
41 static void efx_neigh_free(void *ptr, void *__unused)
42 {
43 	struct efx_neigh_binder *neigh = ptr;
44 
45 	WARN_ON(refcount_read(&neigh->ref));
46 	WARN_ON(!list_empty(&neigh->users));
47 	put_net_track(neigh->net, &neigh->ns_tracker);
48 	netdev_put(neigh->egdev, &neigh->dev_tracker);
49 	kfree(neigh);
50 }
51 
52 int efx_tc_init_encap_actions(struct efx_nic *efx)
53 {
54 	int rc;
55 
56 	rc = rhashtable_init(&efx->tc->neigh_ht, &efx_neigh_ht_params);
57 	if (rc < 0)
58 		goto fail_neigh_ht;
59 	rc = rhashtable_init(&efx->tc->encap_ht, &efx_tc_encap_ht_params);
60 	if (rc < 0)
61 		goto fail_encap_ht;
62 	return 0;
63 fail_encap_ht:
64 	rhashtable_destroy(&efx->tc->neigh_ht);
65 fail_neigh_ht:
66 	return rc;
67 }
68 
69 /* Only call this in init failure teardown.
70  * Normal exit should fini instead as there may be entries in the table.
71  */
72 void efx_tc_destroy_encap_actions(struct efx_nic *efx)
73 {
74 	rhashtable_destroy(&efx->tc->encap_ht);
75 	rhashtable_destroy(&efx->tc->neigh_ht);
76 }
77 
78 void efx_tc_fini_encap_actions(struct efx_nic *efx)
79 {
80 	rhashtable_free_and_destroy(&efx->tc->encap_ht, efx_tc_encap_free, NULL);
81 	rhashtable_free_and_destroy(&efx->tc->neigh_ht, efx_neigh_free, NULL);
82 }
83 
84 static void efx_neigh_update(struct work_struct *work);
85 
86 static int efx_bind_neigh(struct efx_nic *efx,
87 			  struct efx_tc_encap_action *encap, struct net *net,
88 			  struct netlink_ext_ack *extack)
89 {
90 	struct efx_neigh_binder *neigh, *old;
91 	struct flowi6 flow6 = {};
92 	struct flowi4 flow4 = {};
93 	int rc;
94 
95 	/* GCC stupidly thinks that only values explicitly listed in the enum
96 	 * definition can _possibly_ be sensible case values, so without this
97 	 * cast it complains about the IPv6 versions.
98 	 */
99 	switch ((int)encap->type) {
100 	case EFX_ENCAP_TYPE_VXLAN:
101 	case EFX_ENCAP_TYPE_GENEVE:
102 		flow4.flowi4_proto = IPPROTO_UDP;
103 		flow4.fl4_dport = encap->key.tp_dst;
104 		flow4.flowi4_dscp = inet_dsfield_to_dscp(encap->key.tos);
105 		flow4.daddr = encap->key.u.ipv4.dst;
106 		flow4.saddr = encap->key.u.ipv4.src;
107 		break;
108 	case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6:
109 	case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6:
110 		flow6.flowi6_proto = IPPROTO_UDP;
111 		flow6.fl6_dport = encap->key.tp_dst;
112 		flow6.flowlabel = ip6_make_flowinfo(encap->key.tos,
113 						    encap->key.label);
114 		flow6.daddr = encap->key.u.ipv6.dst;
115 		flow6.saddr = encap->key.u.ipv6.src;
116 		break;
117 	default:
118 		NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported encap type %d",
119 				       (int)encap->type);
120 		return -EOPNOTSUPP;
121 	}
122 
123 	neigh = kzalloc(sizeof(*neigh), GFP_KERNEL_ACCOUNT);
124 	if (!neigh)
125 		return -ENOMEM;
126 	neigh->net = get_net_track(net, &neigh->ns_tracker, GFP_KERNEL_ACCOUNT);
127 	neigh->dst_ip = flow4.daddr;
128 	neigh->dst_ip6 = flow6.daddr;
129 
130 	old = rhashtable_lookup_get_insert_fast(&efx->tc->neigh_ht,
131 						&neigh->linkage,
132 						efx_neigh_ht_params);
133 	if (old) {
134 		/* don't need our new entry */
135 		put_net_track(neigh->net, &neigh->ns_tracker);
136 		kfree(neigh);
137 		if (IS_ERR(old)) /* oh dear, it's actually an error */
138 			return PTR_ERR(old);
139 		if (!refcount_inc_not_zero(&old->ref))
140 			return -EAGAIN;
141 		/* existing entry found, ref taken */
142 		neigh = old;
143 	} else {
144 		/* New entry.  We need to initiate a lookup */
145 		struct neighbour *n;
146 		struct rtable *rt;
147 
148 		if (encap->type & EFX_ENCAP_FLAG_IPV6) {
149 #if IS_ENABLED(CONFIG_IPV6)
150 			struct dst_entry *dst;
151 
152 			dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow6,
153 							      NULL);
154 			rc = PTR_ERR_OR_ZERO(dst);
155 			if (rc) {
156 				NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for IPv6 encap");
157 				goto out_free;
158 			}
159 			neigh->egdev = dst->dev;
160 			netdev_hold(neigh->egdev, &neigh->dev_tracker,
161 				    GFP_KERNEL_ACCOUNT);
162 			neigh->ttl = ip6_dst_hoplimit(dst);
163 			n = dst_neigh_lookup(dst, &flow6.daddr);
164 			dst_release(dst);
165 #else
166 			/* We shouldn't ever get here, because if IPv6 isn't
167 			 * enabled how did someone create an IPv6 tunnel_key?
168 			 */
169 			rc = -EOPNOTSUPP;
170 			NL_SET_ERR_MSG_MOD(extack, "No IPv6 support (neigh bind)");
171 			goto out_free;
172 #endif
173 		} else {
174 			rt = ip_route_output_key(net, &flow4);
175 			if (IS_ERR_OR_NULL(rt)) {
176 				rc = PTR_ERR_OR_ZERO(rt);
177 				if (!rc)
178 					rc = -EIO;
179 				NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for encap");
180 				goto out_free;
181 			}
182 			neigh->egdev = rt->dst.dev;
183 			netdev_hold(neigh->egdev, &neigh->dev_tracker,
184 				    GFP_KERNEL_ACCOUNT);
185 			neigh->ttl = ip4_dst_hoplimit(&rt->dst);
186 			n = dst_neigh_lookup(&rt->dst, &flow4.daddr);
187 			ip_rt_put(rt);
188 		}
189 		if (!n) {
190 			rc = -ENETUNREACH;
191 			NL_SET_ERR_MSG_MOD(extack, "Failed to lookup neighbour for encap");
192 			netdev_put(neigh->egdev, &neigh->dev_tracker);
193 			goto out_free;
194 		}
195 		refcount_set(&neigh->ref, 1);
196 		INIT_LIST_HEAD(&neigh->users);
197 		read_lock_bh(&n->lock);
198 		ether_addr_copy(neigh->ha, n->ha);
199 		neigh->n_valid = n->nud_state & NUD_VALID;
200 		read_unlock_bh(&n->lock);
201 		rwlock_init(&neigh->lock);
202 		INIT_WORK(&neigh->work, efx_neigh_update);
203 		neigh->efx = efx;
204 		neigh->used = jiffies;
205 		if (!neigh->n_valid)
206 			/* Prod ARP to find us a neighbour */
207 			neigh_event_send(n, NULL);
208 		neigh_release(n);
209 	}
210 	/* Add us to this neigh */
211 	encap->neigh = neigh;
212 	list_add_tail(&encap->list, &neigh->users);
213 	return 0;
214 
215 out_free:
216 	/* cleanup common to several error paths */
217 	rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage,
218 			       efx_neigh_ht_params);
219 	synchronize_rcu();
220 	put_net_track(net, &neigh->ns_tracker);
221 	kfree(neigh);
222 	return rc;
223 }
224 
225 static void efx_free_neigh(struct efx_neigh_binder *neigh)
226 {
227 	struct efx_nic *efx = neigh->efx;
228 
229 	rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage,
230 			       efx_neigh_ht_params);
231 	synchronize_rcu();
232 	netdev_put(neigh->egdev, &neigh->dev_tracker);
233 	put_net_track(neigh->net, &neigh->ns_tracker);
234 	kfree(neigh);
235 }
236 
237 static void efx_release_neigh(struct efx_nic *efx,
238 			      struct efx_tc_encap_action *encap)
239 {
240 	struct efx_neigh_binder *neigh = encap->neigh;
241 
242 	if (!neigh)
243 		return;
244 	list_del(&encap->list);
245 	encap->neigh = NULL;
246 	if (!refcount_dec_and_test(&neigh->ref))
247 		return; /* still in use */
248 	efx_free_neigh(neigh);
249 }
250 
251 static void efx_gen_tun_header_eth(struct efx_tc_encap_action *encap, u16 proto)
252 {
253 	struct efx_neigh_binder *neigh = encap->neigh;
254 	struct ethhdr *eth;
255 
256 	encap->encap_hdr_len = sizeof(*eth);
257 	eth = (struct ethhdr *)encap->encap_hdr;
258 
259 	if (encap->neigh->n_valid)
260 		ether_addr_copy(eth->h_dest, neigh->ha);
261 	else
262 		eth_zero_addr(eth->h_dest);
263 	ether_addr_copy(eth->h_source, neigh->egdev->dev_addr);
264 	eth->h_proto = htons(proto);
265 }
266 
267 static void efx_gen_tun_header_ipv4(struct efx_tc_encap_action *encap, u8 ipproto, u8 len)
268 {
269 	struct efx_neigh_binder *neigh = encap->neigh;
270 	struct ip_tunnel_key *key = &encap->key;
271 	struct iphdr *ip;
272 
273 	ip = (struct iphdr *)(encap->encap_hdr + encap->encap_hdr_len);
274 	encap->encap_hdr_len += sizeof(*ip);
275 
276 	ip->daddr = key->u.ipv4.dst;
277 	ip->saddr = key->u.ipv4.src;
278 	ip->ttl = neigh->ttl;
279 	ip->protocol = ipproto;
280 	ip->version = 0x4;
281 	ip->ihl = 0x5;
282 	ip->tot_len = cpu_to_be16(ip->ihl * 4 + len);
283 	ip_send_check(ip);
284 }
285 
286 #ifdef CONFIG_IPV6
287 static void efx_gen_tun_header_ipv6(struct efx_tc_encap_action *encap, u8 ipproto, u8 len)
288 {
289 	struct efx_neigh_binder *neigh = encap->neigh;
290 	struct ip_tunnel_key *key = &encap->key;
291 	struct ipv6hdr *ip;
292 
293 	ip = (struct ipv6hdr *)(encap->encap_hdr + encap->encap_hdr_len);
294 	encap->encap_hdr_len += sizeof(*ip);
295 
296 	ip6_flow_hdr(ip, key->tos, key->label);
297 	ip->daddr = key->u.ipv6.dst;
298 	ip->saddr = key->u.ipv6.src;
299 	ip->hop_limit = neigh->ttl;
300 	ip->nexthdr = ipproto;
301 	ip->version = 0x6;
302 	ip->payload_len = cpu_to_be16(len);
303 }
304 #endif
305 
306 static void efx_gen_tun_header_udp(struct efx_tc_encap_action *encap, u8 len)
307 {
308 	struct ip_tunnel_key *key = &encap->key;
309 	struct udphdr *udp;
310 
311 	udp = (struct udphdr *)(encap->encap_hdr + encap->encap_hdr_len);
312 	encap->encap_hdr_len += sizeof(*udp);
313 
314 	udp->dest = key->tp_dst;
315 	udp->len = cpu_to_be16(sizeof(*udp) + len);
316 }
317 
318 static void efx_gen_tun_header_vxlan(struct efx_tc_encap_action *encap)
319 {
320 	struct ip_tunnel_key *key = &encap->key;
321 	struct vxlanhdr *vxlan;
322 
323 	vxlan = (struct vxlanhdr *)(encap->encap_hdr + encap->encap_hdr_len);
324 	encap->encap_hdr_len += sizeof(*vxlan);
325 
326 	vxlan->vx_flags = VXLAN_HF_VNI;
327 	vxlan->vx_vni = vxlan_vni_field(tunnel_id_to_key32(key->tun_id));
328 }
329 
330 static void efx_gen_tun_header_geneve(struct efx_tc_encap_action *encap)
331 {
332 	struct ip_tunnel_key *key = &encap->key;
333 	struct genevehdr *geneve;
334 	u32 vni;
335 
336 	geneve = (struct genevehdr *)(encap->encap_hdr + encap->encap_hdr_len);
337 	encap->encap_hdr_len += sizeof(*geneve);
338 
339 	geneve->proto_type = htons(ETH_P_TEB);
340 	/* convert tun_id to host-endian so we can use host arithmetic to
341 	 * extract individual bytes.
342 	 */
343 	vni = ntohl(tunnel_id_to_key32(key->tun_id));
344 	geneve->vni[0] = vni >> 16;
345 	geneve->vni[1] = vni >> 8;
346 	geneve->vni[2] = vni;
347 }
348 
349 #define vxlan_header_l4_len	(sizeof(struct udphdr) + sizeof(struct vxlanhdr))
350 #define vxlan4_header_len	(sizeof(struct ethhdr) + sizeof(struct iphdr) + vxlan_header_l4_len)
351 static void efx_gen_vxlan_header_ipv4(struct efx_tc_encap_action *encap)
352 {
353 	BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan4_header_len);
354 	efx_gen_tun_header_eth(encap, ETH_P_IP);
355 	efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, vxlan_header_l4_len);
356 	efx_gen_tun_header_udp(encap, sizeof(struct vxlanhdr));
357 	efx_gen_tun_header_vxlan(encap);
358 }
359 
360 #define geneve_header_l4_len	(sizeof(struct udphdr) + sizeof(struct genevehdr))
361 #define geneve4_header_len	(sizeof(struct ethhdr) + sizeof(struct iphdr) + geneve_header_l4_len)
362 static void efx_gen_geneve_header_ipv4(struct efx_tc_encap_action *encap)
363 {
364 	BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve4_header_len);
365 	efx_gen_tun_header_eth(encap, ETH_P_IP);
366 	efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, geneve_header_l4_len);
367 	efx_gen_tun_header_udp(encap, sizeof(struct genevehdr));
368 	efx_gen_tun_header_geneve(encap);
369 }
370 
371 #ifdef CONFIG_IPV6
372 #define vxlan6_header_len	(sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + vxlan_header_l4_len)
373 static void efx_gen_vxlan_header_ipv6(struct efx_tc_encap_action *encap)
374 {
375 	BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan6_header_len);
376 	efx_gen_tun_header_eth(encap, ETH_P_IPV6);
377 	efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, vxlan_header_l4_len);
378 	efx_gen_tun_header_udp(encap, sizeof(struct vxlanhdr));
379 	efx_gen_tun_header_vxlan(encap);
380 }
381 
382 #define geneve6_header_len	(sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + geneve_header_l4_len)
383 static void efx_gen_geneve_header_ipv6(struct efx_tc_encap_action *encap)
384 {
385 	BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve6_header_len);
386 	efx_gen_tun_header_eth(encap, ETH_P_IPV6);
387 	efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, geneve_header_l4_len);
388 	efx_gen_tun_header_udp(encap, sizeof(struct genevehdr));
389 	efx_gen_tun_header_geneve(encap);
390 }
391 #endif
392 
393 static void efx_gen_encap_header(struct efx_nic *efx,
394 				 struct efx_tc_encap_action *encap)
395 {
396 	encap->n_valid = encap->neigh->n_valid;
397 
398 	/* GCC stupidly thinks that only values explicitly listed in the enum
399 	 * definition can _possibly_ be sensible case values, so without this
400 	 * cast it complains about the IPv6 versions.
401 	 */
402 	switch ((int)encap->type) {
403 	case EFX_ENCAP_TYPE_VXLAN:
404 		efx_gen_vxlan_header_ipv4(encap);
405 		break;
406 	case EFX_ENCAP_TYPE_GENEVE:
407 		efx_gen_geneve_header_ipv4(encap);
408 		break;
409 #ifdef CONFIG_IPV6
410 	case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6:
411 		efx_gen_vxlan_header_ipv6(encap);
412 		break;
413 	case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6:
414 		efx_gen_geneve_header_ipv6(encap);
415 		break;
416 #endif
417 	default:
418 		/* unhandled encap type, can't happen */
419 		if (net_ratelimit())
420 			netif_err(efx, drv, efx->net_dev,
421 				  "Bogus encap type %d, can't generate\n",
422 				  encap->type);
423 
424 		/* Use fallback action. */
425 		encap->n_valid = false;
426 		break;
427 	}
428 }
429 
430 static void efx_tc_update_encap(struct efx_nic *efx,
431 				struct efx_tc_encap_action *encap)
432 {
433 	struct efx_tc_action_set_list *acts, *fallback;
434 	struct efx_tc_flow_rule *rule;
435 	struct efx_tc_action_set *act;
436 	int rc;
437 
438 	if (encap->n_valid) {
439 		/* Make sure no rules are using this encap while we change it */
440 		list_for_each_entry(act, &encap->users, encap_user) {
441 			acts = act->user;
442 			if (WARN_ON(!acts)) /* can't happen */
443 				continue;
444 			rule = container_of(acts, struct efx_tc_flow_rule, acts);
445 			if (rule->fallback)
446 				fallback = rule->fallback;
447 			else /* fallback of the fallback: deliver to PF */
448 				fallback = &efx->tc->facts.pf;
449 			rc = efx_mae_update_rule(efx, fallback->fw_id,
450 						 rule->fw_id);
451 			if (rc)
452 				netif_err(efx, drv, efx->net_dev,
453 					  "Failed to update (f) rule %08x rc %d\n",
454 					  rule->fw_id, rc);
455 			else
456 				netif_dbg(efx, drv, efx->net_dev, "Updated (f) rule %08x\n",
457 					  rule->fw_id);
458 		}
459 	}
460 
461 	/* Make sure we don't leak arbitrary bytes on the wire;
462 	 * set an all-0s ethernet header.  A successful call to
463 	 * efx_gen_encap_header() will overwrite this.
464 	 */
465 	memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr));
466 	encap->encap_hdr_len = ETH_HLEN;
467 
468 	if (encap->neigh) {
469 		read_lock_bh(&encap->neigh->lock);
470 		efx_gen_encap_header(efx, encap);
471 		read_unlock_bh(&encap->neigh->lock);
472 	} else {
473 		encap->n_valid = false;
474 	}
475 
476 	rc = efx_mae_update_encap_md(efx, encap);
477 	if (rc) {
478 		netif_err(efx, drv, efx->net_dev,
479 			  "Failed to update encap hdr %08x rc %d\n",
480 			  encap->fw_id, rc);
481 		return;
482 	}
483 	netif_dbg(efx, drv, efx->net_dev, "Updated encap hdr %08x\n",
484 		  encap->fw_id);
485 	if (!encap->n_valid)
486 		return;
487 	/* Update rule users: use the action if they are now ready */
488 	list_for_each_entry(act, &encap->users, encap_user) {
489 		acts = act->user;
490 		if (WARN_ON(!acts)) /* can't happen */
491 			continue;
492 		rule = container_of(acts, struct efx_tc_flow_rule, acts);
493 		if (!efx_tc_check_ready(efx, rule))
494 			continue;
495 		rc = efx_mae_update_rule(efx, acts->fw_id, rule->fw_id);
496 		if (rc)
497 			netif_err(efx, drv, efx->net_dev,
498 				  "Failed to update rule %08x rc %d\n",
499 				  rule->fw_id, rc);
500 		else
501 			netif_dbg(efx, drv, efx->net_dev, "Updated rule %08x\n",
502 				  rule->fw_id);
503 	}
504 }
505 
506 static void efx_neigh_update(struct work_struct *work)
507 {
508 	struct efx_neigh_binder *neigh = container_of(work, struct efx_neigh_binder, work);
509 	struct efx_tc_encap_action *encap;
510 	struct efx_nic *efx = neigh->efx;
511 
512 	mutex_lock(&efx->tc->mutex);
513 	list_for_each_entry(encap, &neigh->users, list)
514 		efx_tc_update_encap(neigh->efx, encap);
515 	/* release ref taken in efx_neigh_event() */
516 	if (refcount_dec_and_test(&neigh->ref))
517 		efx_free_neigh(neigh);
518 	mutex_unlock(&efx->tc->mutex);
519 }
520 
521 static int efx_neigh_event(struct efx_nic *efx, struct neighbour *n)
522 {
523 	struct efx_neigh_binder keys = {NULL}, *neigh;
524 	bool n_valid, ipv6 = false;
525 	char ha[ETH_ALEN];
526 	size_t keysize;
527 
528 	if (WARN_ON(!efx->tc))
529 		return NOTIFY_DONE;
530 
531 	if (n->tbl == &arp_tbl) {
532 		keysize = sizeof(keys.dst_ip);
533 #if IS_ENABLED(CONFIG_IPV6)
534 	} else if (n->tbl == ipv6_stub->nd_tbl) {
535 		ipv6 = true;
536 		keysize = sizeof(keys.dst_ip6);
537 #endif
538 	} else {
539 		return NOTIFY_DONE;
540 	}
541 	if (!n->parms) {
542 		netif_warn(efx, drv, efx->net_dev, "neigh_event with no parms!\n");
543 		return NOTIFY_DONE;
544 	}
545 	keys.net = read_pnet(&n->parms->net);
546 	if (n->tbl->key_len != keysize) {
547 		netif_warn(efx, drv, efx->net_dev, "neigh_event with bad key_len %u\n",
548 			   n->tbl->key_len);
549 		return NOTIFY_DONE;
550 	}
551 	read_lock_bh(&n->lock); /* Get a consistent view */
552 	memcpy(ha, n->ha, ETH_ALEN);
553 	n_valid = (n->nud_state & NUD_VALID) && !n->dead;
554 	read_unlock_bh(&n->lock);
555 	if (ipv6)
556 		memcpy(&keys.dst_ip6, n->primary_key, n->tbl->key_len);
557 	else
558 		memcpy(&keys.dst_ip, n->primary_key, n->tbl->key_len);
559 	rcu_read_lock();
560 	neigh = rhashtable_lookup_fast(&efx->tc->neigh_ht, &keys,
561 				       efx_neigh_ht_params);
562 	if (!neigh || neigh->dying)
563 		/* We're not interested in this neighbour */
564 		goto done;
565 	write_lock_bh(&neigh->lock);
566 	if (n_valid == neigh->n_valid && !memcmp(ha, neigh->ha, ETH_ALEN)) {
567 		write_unlock_bh(&neigh->lock);
568 		/* Nothing has changed; no work to do */
569 		goto done;
570 	}
571 	neigh->n_valid = n_valid;
572 	memcpy(neigh->ha, ha, ETH_ALEN);
573 	write_unlock_bh(&neigh->lock);
574 	if (refcount_inc_not_zero(&neigh->ref)) {
575 		rcu_read_unlock();
576 		if (!schedule_work(&neigh->work))
577 			/* failed to schedule, release the ref we just took */
578 			if (refcount_dec_and_test(&neigh->ref))
579 				efx_free_neigh(neigh);
580 	} else {
581 done:
582 		rcu_read_unlock();
583 	}
584 	return NOTIFY_DONE;
585 }
586 
587 bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
588 {
589 	struct efx_tc_action_set *act;
590 
591 	/* Encap actions can only be offloaded if they have valid
592 	 * neighbour info for the outer Ethernet header.
593 	 */
594 	list_for_each_entry(act, &rule->acts.list, list)
595 		if (act->encap_md && !act->encap_md->n_valid)
596 			return false;
597 	return true;
598 }
599 
600 struct efx_tc_encap_action *efx_tc_flower_create_encap_md(
601 			struct efx_nic *efx, const struct ip_tunnel_info *info,
602 			struct net_device *egdev, struct netlink_ext_ack *extack)
603 {
604 	enum efx_encap_type type = efx_tc_indr_netdev_type(egdev);
605 	struct efx_tc_encap_action *encap, *old;
606 	struct efx_rep *to_efv;
607 	s64 rc;
608 
609 	if (type == EFX_ENCAP_TYPE_NONE) {
610 		/* dest is not an encap device */
611 		NL_SET_ERR_MSG_MOD(extack, "Not a (supported) tunnel device but tunnel_key is set");
612 		return ERR_PTR(-EOPNOTSUPP);
613 	}
614 	rc = efx_mae_check_encap_type_supported(efx, type);
615 	if (rc < 0) {
616 		NL_SET_ERR_MSG_MOD(extack, "Firmware reports no support for this tunnel type");
617 		return ERR_PTR(rc);
618 	}
619 	/* No support yet for Geneve options */
620 	if (info->options_len) {
621 		NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel options");
622 		return ERR_PTR(-EOPNOTSUPP);
623 	}
624 	switch (info->mode) {
625 	case IP_TUNNEL_INFO_TX:
626 		break;
627 	case IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6:
628 		type |= EFX_ENCAP_FLAG_IPV6;
629 		break;
630 	default:
631 		NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported tunnel mode %u",
632 				       info->mode);
633 		return ERR_PTR(-EOPNOTSUPP);
634 	}
635 	encap = kzalloc(sizeof(*encap), GFP_KERNEL_ACCOUNT);
636 	if (!encap)
637 		return ERR_PTR(-ENOMEM);
638 	encap->type = type;
639 	encap->key = info->key;
640 	INIT_LIST_HEAD(&encap->users);
641 	old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_ht,
642 						&encap->linkage,
643 						efx_tc_encap_ht_params);
644 	if (old) {
645 		/* don't need our new entry */
646 		kfree(encap);
647 		if (IS_ERR(old)) /* oh dear, it's actually an error */
648 			return ERR_CAST(old);
649 		if (!refcount_inc_not_zero(&old->ref))
650 			return ERR_PTR(-EAGAIN);
651 		/* existing entry found, ref taken */
652 		return old;
653 	}
654 
655 	rc = efx_bind_neigh(efx, encap, dev_net(egdev), extack);
656 	if (rc < 0)
657 		goto out_remove;
658 	to_efv = efx_tc_flower_lookup_efv(efx, encap->neigh->egdev);
659 	if (IS_ERR(to_efv)) {
660 		/* neigh->egdev isn't ours */
661 		NL_SET_ERR_MSG_MOD(extack, "Tunnel egress device not on switch");
662 		rc = PTR_ERR(to_efv);
663 		goto out_release;
664 	}
665 	rc = efx_tc_flower_external_mport(efx, to_efv);
666 	if (rc < 0) {
667 		NL_SET_ERR_MSG_MOD(extack, "Failed to identify tunnel egress m-port");
668 		goto out_release;
669 	}
670 	encap->dest_mport = rc;
671 	read_lock_bh(&encap->neigh->lock);
672 	efx_gen_encap_header(efx, encap);
673 	read_unlock_bh(&encap->neigh->lock);
674 
675 	rc = efx_mae_allocate_encap_md(efx, encap);
676 	if (rc < 0) {
677 		NL_SET_ERR_MSG_MOD(extack, "Failed to write tunnel header to hw");
678 		goto out_release;
679 	}
680 
681 	/* ref and return */
682 	refcount_set(&encap->ref, 1);
683 	return encap;
684 out_release:
685 	efx_release_neigh(efx, encap);
686 out_remove:
687 	rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage,
688 			       efx_tc_encap_ht_params);
689 	kfree(encap);
690 	return ERR_PTR(rc);
691 }
692 
693 void efx_tc_flower_release_encap_md(struct efx_nic *efx,
694 				    struct efx_tc_encap_action *encap)
695 {
696 	if (!refcount_dec_and_test(&encap->ref))
697 		return; /* still in use */
698 	efx_release_neigh(efx, encap);
699 	rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage,
700 			       efx_tc_encap_ht_params);
701 	efx_mae_free_encap_md(efx, encap);
702 	kfree(encap);
703 }
704 
705 static void efx_tc_remove_neigh_users(struct efx_nic *efx, struct efx_neigh_binder *neigh)
706 {
707 	struct efx_tc_encap_action *encap, *next;
708 
709 	list_for_each_entry_safe(encap, next, &neigh->users, list) {
710 		/* Should cause neigh usage count to fall to zero, freeing it */
711 		efx_release_neigh(efx, encap);
712 		/* The encap has lost its neigh, so it's now unready */
713 		efx_tc_update_encap(efx, encap);
714 	}
715 }
716 
717 void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev)
718 {
719 	struct efx_neigh_binder *neigh;
720 	struct rhashtable_iter walk;
721 
722 	mutex_lock(&efx->tc->mutex);
723 	rhashtable_walk_enter(&efx->tc->neigh_ht, &walk);
724 	rhashtable_walk_start(&walk);
725 	while ((neigh = rhashtable_walk_next(&walk)) != NULL) {
726 		if (IS_ERR(neigh))
727 			continue;
728 		if (neigh->egdev != net_dev)
729 			continue;
730 		neigh->dying = true;
731 		rhashtable_walk_stop(&walk);
732 		synchronize_rcu(); /* Make sure any updates see dying flag */
733 		efx_tc_remove_neigh_users(efx, neigh); /* might sleep */
734 		rhashtable_walk_start(&walk);
735 	}
736 	rhashtable_walk_stop(&walk);
737 	rhashtable_walk_exit(&walk);
738 	mutex_unlock(&efx->tc->mutex);
739 }
740 
741 int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event,
742 			  void *ptr)
743 {
744 	if (efx->type->is_vf)
745 		return NOTIFY_DONE;
746 
747 	switch (event) {
748 	case NETEVENT_NEIGH_UPDATE:
749 		return efx_neigh_event(efx, ptr);
750 	default:
751 		return NOTIFY_DONE;
752 	}
753 }
754