xref: /linux/drivers/net/ethernet/sfc/tc.c (revision fdfc374af5dc345fbb9686921fa60176c1c41da0)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3  * Driver for Solarflare network controllers and boards
4  * Copyright 2019 Solarflare Communications Inc.
5  * Copyright 2020-2022 Xilinx Inc.
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License version 2 as published
9  * by the Free Software Foundation, incorporated herein by reference.
10  */
11 
12 #include <net/pkt_cls.h>
13 #include <net/vxlan.h>
14 #include <net/geneve.h>
15 #include <net/tc_act/tc_ct.h>
16 #include "tc.h"
17 #include "tc_bindings.h"
18 #include "tc_encap_actions.h"
19 #include "tc_conntrack.h"
20 #include "mae.h"
21 #include "ef100_rep.h"
22 #include "efx.h"
23 
24 enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
25 {
26 	if (netif_is_vxlan(net_dev))
27 		return EFX_ENCAP_TYPE_VXLAN;
28 	if (netif_is_geneve(net_dev))
29 		return EFX_ENCAP_TYPE_GENEVE;
30 
31 	return EFX_ENCAP_TYPE_NONE;
32 }
33 
34 #define EFX_TC_HDR_TYPE_TTL_MASK ((u32)0xff)
35 /* Hoplimit is stored in the most significant byte in the pedit ipv6 header action */
36 #define EFX_TC_HDR_TYPE_HLIMIT_MASK ~((u32)0xff000000)
37 #define EFX_EFV_PF	NULL
38 /* Look up the representor information (efv) for a device.
39  * May return NULL for the PF (us), or an error pointer for a device that
40  * isn't supported as a TC offload endpoint
41  */
42 struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx,
43 					 struct net_device *dev)
44 {
45 	struct efx_rep *efv;
46 
47 	if (!dev)
48 		return ERR_PTR(-EOPNOTSUPP);
49 	/* Is it us (the PF)? */
50 	if (dev == efx->net_dev)
51 		return EFX_EFV_PF;
52 	/* Is it an efx vfrep at all? */
53 	if (dev->netdev_ops != &efx_ef100_rep_netdev_ops)
54 		return ERR_PTR(-EOPNOTSUPP);
55 	/* Is it ours?  We don't support TC rules that include another
56 	 * EF100's netdevices (not even on another port of the same NIC).
57 	 */
58 	efv = netdev_priv(dev);
59 	if (efv->parent != efx)
60 		return ERR_PTR(-EOPNOTSUPP);
61 	return efv;
62 }
63 
64 /* Convert a driver-internal vport ID into an internal device (PF or VF) */
65 static s64 efx_tc_flower_internal_mport(struct efx_nic *efx, struct efx_rep *efv)
66 {
67 	u32 mport;
68 
69 	if (IS_ERR(efv))
70 		return PTR_ERR(efv);
71 	if (!efv) /* device is PF (us) */
72 		efx_mae_mport_uplink(efx, &mport);
73 	else /* device is repr */
74 		efx_mae_mport_mport(efx, efv->mport, &mport);
75 	return mport;
76 }
77 
78 /* Convert a driver-internal vport ID into an external device (wire or VF) */
79 s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv)
80 {
81 	u32 mport;
82 
83 	if (IS_ERR(efv))
84 		return PTR_ERR(efv);
85 	if (!efv) /* device is PF (us) */
86 		efx_mae_mport_wire(efx, &mport);
87 	else /* device is repr */
88 		efx_mae_mport_mport(efx, efv->mport, &mport);
89 	return mport;
90 }
91 
92 static const struct rhashtable_params efx_tc_mac_ht_params = {
93 	.key_len	= offsetofend(struct efx_tc_mac_pedit_action, h_addr),
94 	.key_offset	= 0,
95 	.head_offset	= offsetof(struct efx_tc_mac_pedit_action, linkage),
96 };
97 
98 static const struct rhashtable_params efx_tc_encap_match_ht_params = {
99 	.key_len	= offsetof(struct efx_tc_encap_match, linkage),
100 	.key_offset	= 0,
101 	.head_offset	= offsetof(struct efx_tc_encap_match, linkage),
102 };
103 
104 static const struct rhashtable_params efx_tc_match_action_ht_params = {
105 	.key_len	= sizeof(unsigned long),
106 	.key_offset	= offsetof(struct efx_tc_flow_rule, cookie),
107 	.head_offset	= offsetof(struct efx_tc_flow_rule, linkage),
108 };
109 
110 static const struct rhashtable_params efx_tc_lhs_rule_ht_params = {
111 	.key_len	= sizeof(unsigned long),
112 	.key_offset	= offsetof(struct efx_tc_lhs_rule, cookie),
113 	.head_offset	= offsetof(struct efx_tc_lhs_rule, linkage),
114 };
115 
116 static const struct rhashtable_params efx_tc_recirc_ht_params = {
117 	.key_len	= offsetof(struct efx_tc_recirc_id, linkage),
118 	.key_offset	= 0,
119 	.head_offset	= offsetof(struct efx_tc_recirc_id, linkage),
120 };
121 
122 static struct efx_tc_mac_pedit_action *efx_tc_flower_get_mac(struct efx_nic *efx,
123 							     unsigned char h_addr[ETH_ALEN],
124 							     struct netlink_ext_ack *extack)
125 {
126 	struct efx_tc_mac_pedit_action *ped, *old;
127 	int rc;
128 
129 	ped = kzalloc(sizeof(*ped), GFP_USER);
130 	if (!ped)
131 		return ERR_PTR(-ENOMEM);
132 	memcpy(ped->h_addr, h_addr, ETH_ALEN);
133 	old = rhashtable_lookup_get_insert_fast(&efx->tc->mac_ht,
134 						&ped->linkage,
135 						efx_tc_mac_ht_params);
136 	if (old) {
137 		/* don't need our new entry */
138 		kfree(ped);
139 		if (!refcount_inc_not_zero(&old->ref))
140 			return ERR_PTR(-EAGAIN);
141 		/* existing entry found, ref taken */
142 		return old;
143 	}
144 
145 	rc = efx_mae_allocate_pedit_mac(efx, ped);
146 	if (rc < 0) {
147 		NL_SET_ERR_MSG_MOD(extack, "Failed to store pedit MAC address in hw");
148 		goto out_remove;
149 	}
150 
151 	/* ref and return */
152 	refcount_set(&ped->ref, 1);
153 	return ped;
154 out_remove:
155 	rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage,
156 			       efx_tc_mac_ht_params);
157 	kfree(ped);
158 	return ERR_PTR(rc);
159 }
160 
161 static void efx_tc_flower_put_mac(struct efx_nic *efx,
162 				  struct efx_tc_mac_pedit_action *ped)
163 {
164 	if (!refcount_dec_and_test(&ped->ref))
165 		return; /* still in use */
166 	rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage,
167 			       efx_tc_mac_ht_params);
168 	efx_mae_free_pedit_mac(efx, ped);
169 	kfree(ped);
170 }
171 
172 static void efx_tc_free_action_set(struct efx_nic *efx,
173 				   struct efx_tc_action_set *act, bool in_hw)
174 {
175 	/* Failure paths calling this on the 'cursor' action set in_hw=false,
176 	 * because if the alloc had succeeded we'd've put it in acts.list and
177 	 * not still have it in act.
178 	 */
179 	if (in_hw) {
180 		efx_mae_free_action_set(efx, act->fw_id);
181 		/* in_hw is true iff we are on an acts.list; make sure to
182 		 * remove ourselves from that list before we are freed.
183 		 */
184 		list_del(&act->list);
185 	}
186 	if (act->count) {
187 		spin_lock_bh(&act->count->cnt->lock);
188 		if (!list_empty(&act->count_user))
189 			list_del(&act->count_user);
190 		spin_unlock_bh(&act->count->cnt->lock);
191 		efx_tc_flower_put_counter_index(efx, act->count);
192 	}
193 	if (act->encap_md) {
194 		list_del(&act->encap_user);
195 		efx_tc_flower_release_encap_md(efx, act->encap_md);
196 	}
197 	if (act->src_mac)
198 		efx_tc_flower_put_mac(efx, act->src_mac);
199 	if (act->dst_mac)
200 		efx_tc_flower_put_mac(efx, act->dst_mac);
201 	kfree(act);
202 }
203 
204 static void efx_tc_free_action_set_list(struct efx_nic *efx,
205 					struct efx_tc_action_set_list *acts,
206 					bool in_hw)
207 {
208 	struct efx_tc_action_set *act, *next;
209 
210 	/* Failure paths set in_hw=false, because usually the acts didn't get
211 	 * to efx_mae_alloc_action_set_list(); if they did, the failure tree
212 	 * has a separate efx_mae_free_action_set_list() before calling us.
213 	 */
214 	if (in_hw)
215 		efx_mae_free_action_set_list(efx, acts);
216 	/* Any act that's on the list will be in_hw even if the list isn't */
217 	list_for_each_entry_safe(act, next, &acts->list, list)
218 		efx_tc_free_action_set(efx, act, true);
219 	/* Don't kfree, as acts is embedded inside a struct efx_tc_flow_rule */
220 }
221 
222 /* Boilerplate for the simple 'copy a field' cases */
223 #define _MAP_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field)	\
224 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_##_name)) {		\
225 	struct flow_match_##_type fm;					\
226 									\
227 	flow_rule_match_##_tcget(rule, &fm);				\
228 	match->value._field = fm.key->_tcfield;				\
229 	match->mask._field = fm.mask->_tcfield;				\
230 }
231 #define MAP_KEY_AND_MASK(_name, _type, _tcfield, _field)	\
232 	_MAP_KEY_AND_MASK(_name, _type, _type, _tcfield, _field)
233 #define MAP_ENC_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field)	\
234 	_MAP_KEY_AND_MASK(ENC_##_name, _type, _tcget, _tcfield, _field)
235 
236 static int efx_tc_flower_parse_match(struct efx_nic *efx,
237 				     struct flow_rule *rule,
238 				     struct efx_tc_match *match,
239 				     struct netlink_ext_ack *extack)
240 {
241 	struct flow_dissector *dissector = rule->match.dissector;
242 	unsigned char ipv = 0;
243 
244 	/* Owing to internal TC infelicities, the IPV6_ADDRS key might be set
245 	 * even on IPv4 filters; so rather than relying on dissector->used_keys
246 	 * we check the addr_type in the CONTROL key.  If we don't find it (or
247 	 * it's masked, which should never happen), we treat both IPV4_ADDRS
248 	 * and IPV6_ADDRS as absent.
249 	 */
250 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
251 		struct flow_match_control fm;
252 
253 		flow_rule_match_control(rule, &fm);
254 		if (IS_ALL_ONES(fm.mask->addr_type))
255 			switch (fm.key->addr_type) {
256 			case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
257 				ipv = 4;
258 				break;
259 			case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
260 				ipv = 6;
261 				break;
262 			default:
263 				break;
264 			}
265 
266 		if (fm.mask->flags & FLOW_DIS_IS_FRAGMENT) {
267 			match->value.ip_frag = fm.key->flags & FLOW_DIS_IS_FRAGMENT;
268 			match->mask.ip_frag = true;
269 		}
270 		if (fm.mask->flags & FLOW_DIS_FIRST_FRAG) {
271 			match->value.ip_firstfrag = fm.key->flags & FLOW_DIS_FIRST_FRAG;
272 			match->mask.ip_firstfrag = true;
273 		}
274 		if (fm.mask->flags & ~(FLOW_DIS_IS_FRAGMENT | FLOW_DIS_FIRST_FRAG)) {
275 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported match on control.flags %#x",
276 					       fm.mask->flags);
277 			return -EOPNOTSUPP;
278 		}
279 	}
280 	if (dissector->used_keys &
281 	    ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
282 	      BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
283 	      BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
284 	      BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) |
285 	      BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN) |
286 	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
287 	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
288 	      BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
289 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
290 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
291 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
292 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
293 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) |
294 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
295 	      BIT_ULL(FLOW_DISSECTOR_KEY_CT) |
296 	      BIT_ULL(FLOW_DISSECTOR_KEY_TCP) |
297 	      BIT_ULL(FLOW_DISSECTOR_KEY_IP))) {
298 		NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported flower keys %#llx",
299 				       dissector->used_keys);
300 		return -EOPNOTSUPP;
301 	}
302 
303 	MAP_KEY_AND_MASK(BASIC, basic, n_proto, eth_proto);
304 	/* Make sure we're IP if any L3/L4 keys used. */
305 	if (!IS_ALL_ONES(match->mask.eth_proto) ||
306 	    !(match->value.eth_proto == htons(ETH_P_IP) ||
307 	      match->value.eth_proto == htons(ETH_P_IPV6)))
308 		if (dissector->used_keys &
309 		    (BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
310 		     BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
311 		     BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
312 		     BIT_ULL(FLOW_DISSECTOR_KEY_IP) |
313 		     BIT_ULL(FLOW_DISSECTOR_KEY_TCP))) {
314 			NL_SET_ERR_MSG_FMT_MOD(extack,
315 					       "L3/L4 flower keys %#llx require protocol ipv[46]",
316 					       dissector->used_keys);
317 			return -EINVAL;
318 		}
319 
320 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
321 		struct flow_match_vlan fm;
322 
323 		flow_rule_match_vlan(rule, &fm);
324 		if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) {
325 			match->value.vlan_proto[0] = fm.key->vlan_tpid;
326 			match->mask.vlan_proto[0] = fm.mask->vlan_tpid;
327 			match->value.vlan_tci[0] = cpu_to_be16(fm.key->vlan_priority << 13 |
328 							       fm.key->vlan_id);
329 			match->mask.vlan_tci[0] = cpu_to_be16(fm.mask->vlan_priority << 13 |
330 							      fm.mask->vlan_id);
331 		}
332 	}
333 
334 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
335 		struct flow_match_vlan fm;
336 
337 		flow_rule_match_cvlan(rule, &fm);
338 		if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) {
339 			match->value.vlan_proto[1] = fm.key->vlan_tpid;
340 			match->mask.vlan_proto[1] = fm.mask->vlan_tpid;
341 			match->value.vlan_tci[1] = cpu_to_be16(fm.key->vlan_priority << 13 |
342 							       fm.key->vlan_id);
343 			match->mask.vlan_tci[1] = cpu_to_be16(fm.mask->vlan_priority << 13 |
344 							      fm.mask->vlan_id);
345 		}
346 	}
347 
348 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
349 		struct flow_match_eth_addrs fm;
350 
351 		flow_rule_match_eth_addrs(rule, &fm);
352 		ether_addr_copy(match->value.eth_saddr, fm.key->src);
353 		ether_addr_copy(match->value.eth_daddr, fm.key->dst);
354 		ether_addr_copy(match->mask.eth_saddr, fm.mask->src);
355 		ether_addr_copy(match->mask.eth_daddr, fm.mask->dst);
356 	}
357 
358 	MAP_KEY_AND_MASK(BASIC, basic, ip_proto, ip_proto);
359 	/* Make sure we're TCP/UDP if any L4 keys used. */
360 	if ((match->value.ip_proto != IPPROTO_UDP &&
361 	     match->value.ip_proto != IPPROTO_TCP) || !IS_ALL_ONES(match->mask.ip_proto))
362 		if (dissector->used_keys &
363 		    (BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
364 		     BIT_ULL(FLOW_DISSECTOR_KEY_TCP))) {
365 			NL_SET_ERR_MSG_FMT_MOD(extack,
366 					       "L4 flower keys %#llx require ipproto udp or tcp",
367 					       dissector->used_keys);
368 			return -EINVAL;
369 		}
370 	MAP_KEY_AND_MASK(IP, ip, tos, ip_tos);
371 	MAP_KEY_AND_MASK(IP, ip, ttl, ip_ttl);
372 	if (ipv == 4) {
373 		MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, src, src_ip);
374 		MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, dst, dst_ip);
375 	}
376 #ifdef CONFIG_IPV6
377 	else if (ipv == 6) {
378 		MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, src, src_ip6);
379 		MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, dst, dst_ip6);
380 	}
381 #endif
382 	MAP_KEY_AND_MASK(PORTS, ports, src, l4_sport);
383 	MAP_KEY_AND_MASK(PORTS, ports, dst, l4_dport);
384 	MAP_KEY_AND_MASK(TCP, tcp, flags, tcp_flags);
385 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
386 		struct flow_match_control fm;
387 
388 		flow_rule_match_enc_control(rule, &fm);
389 		if (fm.mask->flags) {
390 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported match on enc_control.flags %#x",
391 					       fm.mask->flags);
392 			return -EOPNOTSUPP;
393 		}
394 		if (!IS_ALL_ONES(fm.mask->addr_type)) {
395 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported enc addr_type mask %u (key %u)",
396 					       fm.mask->addr_type,
397 					       fm.key->addr_type);
398 			return -EOPNOTSUPP;
399 		}
400 		switch (fm.key->addr_type) {
401 		case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
402 			MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs,
403 					     src, enc_src_ip);
404 			MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs,
405 					     dst, enc_dst_ip);
406 			break;
407 #ifdef CONFIG_IPV6
408 		case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
409 			MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs,
410 					     src, enc_src_ip6);
411 			MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs,
412 					     dst, enc_dst_ip6);
413 			break;
414 #endif
415 		default:
416 			NL_SET_ERR_MSG_FMT_MOD(extack,
417 					       "Unsupported enc addr_type %u (supported are IPv4, IPv6)",
418 					       fm.key->addr_type);
419 			return -EOPNOTSUPP;
420 		}
421 		MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, tos, enc_ip_tos);
422 		MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, ttl, enc_ip_ttl);
423 		MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, src, enc_sport);
424 		MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, dst, enc_dport);
425 		MAP_ENC_KEY_AND_MASK(KEYID, enc_keyid, enc_keyid, keyid, enc_keyid);
426 	} else if (dissector->used_keys &
427 		   (BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
428 		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
429 		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
430 		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
431 		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS))) {
432 		NL_SET_ERR_MSG_FMT_MOD(extack,
433 				       "Flower enc keys require enc_control (keys: %#llx)",
434 				       dissector->used_keys);
435 		return -EOPNOTSUPP;
436 	}
437 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) {
438 		struct flow_match_ct fm;
439 
440 		flow_rule_match_ct(rule, &fm);
441 		match->value.ct_state_trk = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED);
442 		match->mask.ct_state_trk = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED);
443 		match->value.ct_state_est = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED);
444 		match->mask.ct_state_est = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED);
445 		if (fm.mask->ct_state & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
446 					  TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED)) {
447 			NL_SET_ERR_MSG_FMT_MOD(extack,
448 					       "Unsupported ct_state match %#x",
449 					       fm.mask->ct_state);
450 			return -EOPNOTSUPP;
451 		}
452 		match->value.ct_mark = fm.key->ct_mark;
453 		match->mask.ct_mark = fm.mask->ct_mark;
454 		match->value.ct_zone = fm.key->ct_zone;
455 		match->mask.ct_zone = fm.mask->ct_zone;
456 
457 		if (memchr_inv(fm.mask->ct_labels, 0, sizeof(fm.mask->ct_labels))) {
458 			NL_SET_ERR_MSG_MOD(extack, "Matching on ct_label not supported");
459 			return -EOPNOTSUPP;
460 		}
461 	}
462 
463 	return 0;
464 }
465 
466 static void efx_tc_flower_release_encap_match(struct efx_nic *efx,
467 					      struct efx_tc_encap_match *encap)
468 {
469 	int rc;
470 
471 	if (!refcount_dec_and_test(&encap->ref))
472 		return; /* still in use */
473 
474 	if (encap->type == EFX_TC_EM_DIRECT) {
475 		rc = efx_mae_unregister_encap_match(efx, encap);
476 		if (rc)
477 			/* Display message but carry on and remove entry from our
478 			 * SW tables, because there's not much we can do about it.
479 			 */
480 			netif_err(efx, drv, efx->net_dev,
481 				  "Failed to release encap match %#x, rc %d\n",
482 				  encap->fw_id, rc);
483 	}
484 	rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
485 			       efx_tc_encap_match_ht_params);
486 	if (encap->pseudo)
487 		efx_tc_flower_release_encap_match(efx, encap->pseudo);
488 	kfree(encap);
489 }
490 
491 static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
492 					    struct efx_tc_match *match,
493 					    enum efx_encap_type type,
494 					    enum efx_tc_em_pseudo_type em_type,
495 					    u8 child_ip_tos_mask,
496 					    __be16 child_udp_sport_mask,
497 					    struct netlink_ext_ack *extack)
498 {
499 	struct efx_tc_encap_match *encap, *old, *pseudo = NULL;
500 	bool ipv6 = false;
501 	int rc;
502 
503 	/* We require that the socket-defining fields (IP addrs and UDP dest
504 	 * port) are present and exact-match.  Other fields may only be used
505 	 * if the field-set (and any masks) are the same for all encap
506 	 * matches on the same <sip,dip,dport> tuple; this is enforced by
507 	 * pseudo encap matches.
508 	 */
509 	if (match->mask.enc_dst_ip | match->mask.enc_src_ip) {
510 		if (!IS_ALL_ONES(match->mask.enc_dst_ip)) {
511 			NL_SET_ERR_MSG_MOD(extack,
512 					   "Egress encap match is not exact on dst IP address");
513 			return -EOPNOTSUPP;
514 		}
515 		if (!IS_ALL_ONES(match->mask.enc_src_ip)) {
516 			NL_SET_ERR_MSG_MOD(extack,
517 					   "Egress encap match is not exact on src IP address");
518 			return -EOPNOTSUPP;
519 		}
520 #ifdef CONFIG_IPV6
521 		if (!ipv6_addr_any(&match->mask.enc_dst_ip6) ||
522 		    !ipv6_addr_any(&match->mask.enc_src_ip6)) {
523 			NL_SET_ERR_MSG_MOD(extack,
524 					   "Egress encap match on both IPv4 and IPv6, don't understand");
525 			return -EOPNOTSUPP;
526 		}
527 	} else {
528 		ipv6 = true;
529 		if (!efx_ipv6_addr_all_ones(&match->mask.enc_dst_ip6)) {
530 			NL_SET_ERR_MSG_MOD(extack,
531 					   "Egress encap match is not exact on dst IP address");
532 			return -EOPNOTSUPP;
533 		}
534 		if (!efx_ipv6_addr_all_ones(&match->mask.enc_src_ip6)) {
535 			NL_SET_ERR_MSG_MOD(extack,
536 					   "Egress encap match is not exact on src IP address");
537 			return -EOPNOTSUPP;
538 		}
539 #endif
540 	}
541 	if (!IS_ALL_ONES(match->mask.enc_dport)) {
542 		NL_SET_ERR_MSG_MOD(extack, "Egress encap match is not exact on dst UDP port");
543 		return -EOPNOTSUPP;
544 	}
545 	if (match->mask.enc_sport || match->mask.enc_ip_tos) {
546 		struct efx_tc_match pmatch = *match;
547 
548 		if (em_type == EFX_TC_EM_PSEUDO_MASK) { /* can't happen */
549 			NL_SET_ERR_MSG_MOD(extack, "Bad recursion in egress encap match handler");
550 			return -EOPNOTSUPP;
551 		}
552 		pmatch.value.enc_ip_tos = 0;
553 		pmatch.mask.enc_ip_tos = 0;
554 		pmatch.value.enc_sport = 0;
555 		pmatch.mask.enc_sport = 0;
556 		rc = efx_tc_flower_record_encap_match(efx, &pmatch, type,
557 						      EFX_TC_EM_PSEUDO_MASK,
558 						      match->mask.enc_ip_tos,
559 						      match->mask.enc_sport,
560 						      extack);
561 		if (rc)
562 			return rc;
563 		pseudo = pmatch.encap;
564 	}
565 	if (match->mask.enc_ip_ttl) {
566 		NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP TTL not supported");
567 		rc = -EOPNOTSUPP;
568 		goto fail_pseudo;
569 	}
570 
571 	rc = efx_mae_check_encap_match_caps(efx, ipv6, match->mask.enc_ip_tos,
572 					    match->mask.enc_sport, extack);
573 	if (rc)
574 		goto fail_pseudo;
575 
576 	encap = kzalloc(sizeof(*encap), GFP_USER);
577 	if (!encap) {
578 		rc = -ENOMEM;
579 		goto fail_pseudo;
580 	}
581 	encap->src_ip = match->value.enc_src_ip;
582 	encap->dst_ip = match->value.enc_dst_ip;
583 #ifdef CONFIG_IPV6
584 	encap->src_ip6 = match->value.enc_src_ip6;
585 	encap->dst_ip6 = match->value.enc_dst_ip6;
586 #endif
587 	encap->udp_dport = match->value.enc_dport;
588 	encap->tun_type = type;
589 	encap->ip_tos = match->value.enc_ip_tos;
590 	encap->ip_tos_mask = match->mask.enc_ip_tos;
591 	encap->child_ip_tos_mask = child_ip_tos_mask;
592 	encap->udp_sport = match->value.enc_sport;
593 	encap->udp_sport_mask = match->mask.enc_sport;
594 	encap->child_udp_sport_mask = child_udp_sport_mask;
595 	encap->type = em_type;
596 	encap->pseudo = pseudo;
597 	old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_match_ht,
598 						&encap->linkage,
599 						efx_tc_encap_match_ht_params);
600 	if (old) {
601 		/* don't need our new entry */
602 		kfree(encap);
603 		if (pseudo) /* don't need our new pseudo either */
604 			efx_tc_flower_release_encap_match(efx, pseudo);
605 		/* check old and new em_types are compatible */
606 		switch (old->type) {
607 		case EFX_TC_EM_DIRECT:
608 			/* old EM is in hardware, so mustn't overlap with a
609 			 * pseudo, but may be shared with another direct EM
610 			 */
611 			if (em_type == EFX_TC_EM_DIRECT)
612 				break;
613 			NL_SET_ERR_MSG_MOD(extack, "Pseudo encap match conflicts with existing direct entry");
614 			return -EEXIST;
615 		case EFX_TC_EM_PSEUDO_MASK:
616 			/* old EM is protecting a ToS- or src port-qualified
617 			 * filter, so may only be shared with another pseudo
618 			 * for the same ToS and src port masks.
619 			 */
620 			if (em_type != EFX_TC_EM_PSEUDO_MASK) {
621 				NL_SET_ERR_MSG_FMT_MOD(extack,
622 						       "%s encap match conflicts with existing pseudo(MASK) entry",
623 						       em_type ? "Pseudo" : "Direct");
624 				return -EEXIST;
625 			}
626 			if (child_ip_tos_mask != old->child_ip_tos_mask) {
627 				NL_SET_ERR_MSG_FMT_MOD(extack,
628 						       "Pseudo encap match for TOS mask %#04x conflicts with existing pseudo(MASK) entry for TOS mask %#04x",
629 						       child_ip_tos_mask,
630 						       old->child_ip_tos_mask);
631 				return -EEXIST;
632 			}
633 			if (child_udp_sport_mask != old->child_udp_sport_mask) {
634 				NL_SET_ERR_MSG_FMT_MOD(extack,
635 						       "Pseudo encap match for UDP src port mask %#x conflicts with existing pseudo(MASK) entry for mask %#x",
636 						       child_udp_sport_mask,
637 						       old->child_udp_sport_mask);
638 				return -EEXIST;
639 			}
640 			break;
641 		default: /* Unrecognised pseudo-type.  Just say no */
642 			NL_SET_ERR_MSG_FMT_MOD(extack,
643 					       "%s encap match conflicts with existing pseudo(%d) entry",
644 					       em_type ? "Pseudo" : "Direct",
645 					       old->type);
646 			return -EEXIST;
647 		}
648 		/* check old and new tun_types are compatible */
649 		if (old->tun_type != type) {
650 			NL_SET_ERR_MSG_FMT_MOD(extack,
651 					       "Egress encap match with conflicting tun_type %u != %u",
652 					       old->tun_type, type);
653 			return -EEXIST;
654 		}
655 		if (!refcount_inc_not_zero(&old->ref))
656 			return -EAGAIN;
657 		/* existing entry found */
658 		encap = old;
659 	} else {
660 		if (em_type == EFX_TC_EM_DIRECT) {
661 			rc = efx_mae_register_encap_match(efx, encap);
662 			if (rc) {
663 				NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW");
664 				goto fail;
665 			}
666 		}
667 		refcount_set(&encap->ref, 1);
668 	}
669 	match->encap = encap;
670 	return 0;
671 fail:
672 	rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
673 			       efx_tc_encap_match_ht_params);
674 	kfree(encap);
675 fail_pseudo:
676 	if (pseudo)
677 		efx_tc_flower_release_encap_match(efx, pseudo);
678 	return rc;
679 }
680 
681 static struct efx_tc_recirc_id *efx_tc_get_recirc_id(struct efx_nic *efx,
682 						     u32 chain_index,
683 						     struct net_device *net_dev)
684 {
685 	struct efx_tc_recirc_id *rid, *old;
686 	int rc;
687 
688 	rid = kzalloc(sizeof(*rid), GFP_USER);
689 	if (!rid)
690 		return ERR_PTR(-ENOMEM);
691 	rid->chain_index = chain_index;
692 	/* We don't take a reference here, because it's implied - if there's
693 	 * a rule on the net_dev that's been offloaded to us, then the net_dev
694 	 * can't go away until the rule has been deoffloaded.
695 	 */
696 	rid->net_dev = net_dev;
697 	old = rhashtable_lookup_get_insert_fast(&efx->tc->recirc_ht,
698 						&rid->linkage,
699 						efx_tc_recirc_ht_params);
700 	if (old) {
701 		/* don't need our new entry */
702 		kfree(rid);
703 		if (!refcount_inc_not_zero(&old->ref))
704 			return ERR_PTR(-EAGAIN);
705 		/* existing entry found */
706 		rid = old;
707 	} else {
708 		rc = ida_alloc_range(&efx->tc->recirc_ida, 1, U8_MAX, GFP_USER);
709 		if (rc < 0) {
710 			rhashtable_remove_fast(&efx->tc->recirc_ht,
711 					       &rid->linkage,
712 					       efx_tc_recirc_ht_params);
713 			kfree(rid);
714 			return ERR_PTR(rc);
715 		}
716 		rid->fw_id = rc;
717 		refcount_set(&rid->ref, 1);
718 	}
719 	return rid;
720 }
721 
722 static void efx_tc_put_recirc_id(struct efx_nic *efx, struct efx_tc_recirc_id *rid)
723 {
724 	if (!refcount_dec_and_test(&rid->ref))
725 		return; /* still in use */
726 	rhashtable_remove_fast(&efx->tc->recirc_ht, &rid->linkage,
727 			       efx_tc_recirc_ht_params);
728 	ida_free(&efx->tc->recirc_ida, rid->fw_id);
729 	kfree(rid);
730 }
731 
732 static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
733 {
734 	efx_mae_delete_rule(efx, rule->fw_id);
735 
736 	/* Release entries in subsidiary tables */
737 	efx_tc_free_action_set_list(efx, &rule->acts, true);
738 	if (rule->match.rid)
739 		efx_tc_put_recirc_id(efx, rule->match.rid);
740 	if (rule->match.encap)
741 		efx_tc_flower_release_encap_match(efx, rule->match.encap);
742 	rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
743 }
744 
745 static const char *efx_tc_encap_type_name(enum efx_encap_type typ)
746 {
747 	switch (typ) {
748 	case EFX_ENCAP_TYPE_NONE:
749 		return "none";
750 	case EFX_ENCAP_TYPE_VXLAN:
751 		return "vxlan";
752 	case EFX_ENCAP_TYPE_GENEVE:
753 		return "geneve";
754 	default:
755 		pr_warn_once("Unknown efx_encap_type %d encountered\n", typ);
756 		return "unknown";
757 	}
758 }
759 
760 /* For details of action order constraints refer to SF-123102-TC-1§12.6.1 */
761 enum efx_tc_action_order {
762 	EFX_TC_AO_DECAP,
763 	EFX_TC_AO_DEC_TTL,
764 	EFX_TC_AO_PEDIT_MAC_ADDRS,
765 	EFX_TC_AO_VLAN_POP,
766 	EFX_TC_AO_VLAN_PUSH,
767 	EFX_TC_AO_COUNT,
768 	EFX_TC_AO_ENCAP,
769 	EFX_TC_AO_DELIVER
770 };
771 /* Determine whether we can add @new action without violating order */
772 static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
773 					  enum efx_tc_action_order new)
774 {
775 	switch (new) {
776 	case EFX_TC_AO_DECAP:
777 		if (act->decap)
778 			return false;
779 		/* PEDIT_MAC_ADDRS must not happen before DECAP, though it
780 		 * can wait until much later
781 		 */
782 		if (act->dst_mac || act->src_mac)
783 			return false;
784 
785 		/* Decrementing ttl must not happen before DECAP */
786 		if (act->do_ttl_dec)
787 			return false;
788 		fallthrough;
789 	case EFX_TC_AO_VLAN_POP:
790 		if (act->vlan_pop >= 2)
791 			return false;
792 		/* If we've already pushed a VLAN, we can't then pop it;
793 		 * the hardware would instead try to pop an existing VLAN
794 		 * before pushing the new one.
795 		 */
796 		if (act->vlan_push)
797 			return false;
798 		fallthrough;
799 	case EFX_TC_AO_VLAN_PUSH:
800 		if (act->vlan_push >= 2)
801 			return false;
802 		fallthrough;
803 	case EFX_TC_AO_COUNT:
804 		if (act->count)
805 			return false;
806 		fallthrough;
807 	case EFX_TC_AO_PEDIT_MAC_ADDRS:
808 	case EFX_TC_AO_ENCAP:
809 		if (act->encap_md)
810 			return false;
811 		fallthrough;
812 	case EFX_TC_AO_DELIVER:
813 		return !act->deliver;
814 	case EFX_TC_AO_DEC_TTL:
815 		if (act->encap_md)
816 			return false;
817 		return !act->do_ttl_dec;
818 	default:
819 		/* Bad caller.  Whatever they wanted to do, say they can't. */
820 		WARN_ON_ONCE(1);
821 		return false;
822 	}
823 }
824 
825 /**
826  * DOC: TC conntrack sequences
827  *
828  * The MAE hardware can handle at most two rounds of action rule matching,
829  * consequently we support conntrack through the notion of a "left-hand side
830  * rule".  This is a rule which typically contains only the actions "ct" and
831  * "goto chain N", and corresponds to one or more "right-hand side rules" in
832  * chain N, which typically match on +trk+est, and may perform ct(nat) actions.
833  * RHS rules go in the Action Rule table as normal but with a nonzero recirc_id
834  * (the hardware equivalent of chain_index), while LHS rules may go in either
835  * the Action Rule or the Outer Rule table, the latter being preferred for
836  * performance reasons, and set both DO_CT and a recirc_id in their response.
837  *
838  * Besides the RHS rules, there are often also similar rules matching on
839  * +trk+new which perform the ct(commit) action.  These are not offloaded.
840  */
841 
842 static bool efx_tc_rule_is_lhs_rule(struct flow_rule *fr,
843 				    struct efx_tc_match *match)
844 {
845 	const struct flow_action_entry *fa;
846 	int i;
847 
848 	flow_action_for_each(i, fa, &fr->action) {
849 		switch (fa->id) {
850 		case FLOW_ACTION_GOTO:
851 			return true;
852 		case FLOW_ACTION_CT:
853 			/* If rule is -trk, or doesn't mention trk at all, then
854 			 * a CT action implies a conntrack lookup (hence it's an
855 			 * LHS rule).  If rule is +trk, then a CT action could
856 			 * just be ct(nat) or even ct(commit) (though the latter
857 			 * can't be offloaded).
858 			 */
859 			if (!match->mask.ct_state_trk || !match->value.ct_state_trk)
860 				return true;
861 			break;
862 		default:
863 			break;
864 		}
865 	}
866 	return false;
867 }
868 
869 static int efx_tc_flower_handle_lhs_actions(struct efx_nic *efx,
870 					    struct flow_cls_offload *tc,
871 					    struct flow_rule *fr,
872 					    struct net_device *net_dev,
873 					    struct efx_tc_lhs_rule *rule)
874 
875 {
876 	struct netlink_ext_ack *extack = tc->common.extack;
877 	struct efx_tc_lhs_action *act = &rule->lhs_act;
878 	const struct flow_action_entry *fa;
879 	bool pipe = true;
880 	int i;
881 
882 	flow_action_for_each(i, fa, &fr->action) {
883 		struct efx_tc_ct_zone *ct_zone;
884 		struct efx_tc_recirc_id *rid;
885 
886 		if (!pipe) {
887 			/* more actions after a non-pipe action */
888 			NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action");
889 			return -EINVAL;
890 		}
891 		switch (fa->id) {
892 		case FLOW_ACTION_GOTO:
893 			if (!fa->chain_index) {
894 				NL_SET_ERR_MSG_MOD(extack, "Can't goto chain 0, no looping in hw");
895 				return -EOPNOTSUPP;
896 			}
897 			rid = efx_tc_get_recirc_id(efx, fa->chain_index,
898 						   net_dev);
899 			if (IS_ERR(rid)) {
900 				NL_SET_ERR_MSG_MOD(extack, "Failed to allocate a hardware recirculation ID for this chain_index");
901 				return PTR_ERR(rid);
902 			}
903 			act->rid = rid;
904 			if (fa->hw_stats) {
905 				struct efx_tc_counter_index *cnt;
906 
907 				if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
908 					NL_SET_ERR_MSG_FMT_MOD(extack,
909 							       "hw_stats_type %u not supported (only 'delayed')",
910 							       fa->hw_stats);
911 					return -EOPNOTSUPP;
912 				}
913 				cnt = efx_tc_flower_get_counter_index(efx, tc->cookie,
914 								      EFX_TC_COUNTER_TYPE_OR);
915 				if (IS_ERR(cnt)) {
916 					NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
917 					return PTR_ERR(cnt);
918 				}
919 				WARN_ON(act->count); /* can't happen */
920 				act->count = cnt;
921 			}
922 			pipe = false;
923 			break;
924 		case FLOW_ACTION_CT:
925 			if (act->zone) {
926 				NL_SET_ERR_MSG_MOD(extack, "Can't offload multiple ct actions");
927 				return -EOPNOTSUPP;
928 			}
929 			if (fa->ct.action & (TCA_CT_ACT_COMMIT |
930 					     TCA_CT_ACT_FORCE)) {
931 				NL_SET_ERR_MSG_MOD(extack, "Can't offload ct commit/force");
932 				return -EOPNOTSUPP;
933 			}
934 			if (fa->ct.action & TCA_CT_ACT_CLEAR) {
935 				NL_SET_ERR_MSG_MOD(extack, "Can't clear ct in LHS rule");
936 				return -EOPNOTSUPP;
937 			}
938 			if (fa->ct.action & (TCA_CT_ACT_NAT |
939 					     TCA_CT_ACT_NAT_SRC |
940 					     TCA_CT_ACT_NAT_DST)) {
941 				NL_SET_ERR_MSG_MOD(extack, "Can't perform NAT in LHS rule - packet isn't conntracked yet");
942 				return -EOPNOTSUPP;
943 			}
944 			if (fa->ct.action) {
945 				NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled ct.action %u for LHS rule\n",
946 						       fa->ct.action);
947 				return -EOPNOTSUPP;
948 			}
949 			ct_zone = efx_tc_ct_register_zone(efx, fa->ct.zone,
950 							  fa->ct.flow_table);
951 			if (IS_ERR(ct_zone)) {
952 				NL_SET_ERR_MSG_MOD(extack, "Failed to register for CT updates");
953 				return PTR_ERR(ct_zone);
954 			}
955 			act->zone = ct_zone;
956 			break;
957 		default:
958 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u for LHS rule\n",
959 					       fa->id);
960 			return -EOPNOTSUPP;
961 		}
962 	}
963 
964 	if (pipe) {
965 		NL_SET_ERR_MSG_MOD(extack, "Missing goto chain in LHS rule");
966 		return -EOPNOTSUPP;
967 	}
968 	return 0;
969 }
970 
971 static void efx_tc_flower_release_lhs_actions(struct efx_nic *efx,
972 					      struct efx_tc_lhs_action *act)
973 {
974 	if (act->rid)
975 		efx_tc_put_recirc_id(efx, act->rid);
976 	if (act->zone)
977 		efx_tc_ct_unregister_zone(efx, act->zone);
978 	if (act->count)
979 		efx_tc_flower_put_counter_index(efx, act->count);
980 }
981 
982 /**
983  * struct efx_tc_mangler_state - accumulates 32-bit pedits into fields
984  *
985  * @dst_mac_32:	dst_mac[0:3] has been populated
986  * @dst_mac_16:	dst_mac[4:5] has been populated
987  * @src_mac_16:	src_mac[0:1] has been populated
988  * @src_mac_32:	src_mac[2:5] has been populated
989  * @dst_mac:	h_dest field of ethhdr
990  * @src_mac:	h_source field of ethhdr
991  *
992  * Since FLOW_ACTION_MANGLE comes in 32-bit chunks that do not
993  * necessarily equate to whole fields of the packet header, this
994  * structure is used to hold the cumulative effect of the partial
995  * field pedits that have been processed so far.
996  */
997 struct efx_tc_mangler_state {
998 	u8 dst_mac_32:1; /* eth->h_dest[0:3] */
999 	u8 dst_mac_16:1; /* eth->h_dest[4:5] */
1000 	u8 src_mac_16:1; /* eth->h_source[0:1] */
1001 	u8 src_mac_32:1; /* eth->h_source[2:5] */
1002 	unsigned char dst_mac[ETH_ALEN];
1003 	unsigned char src_mac[ETH_ALEN];
1004 };
1005 
1006 /** efx_tc_complete_mac_mangle() - pull complete field pedits out of @mung
1007  * @efx:	NIC we're installing a flow rule on
1008  * @act:	action set (cursor) to update
1009  * @mung:	accumulated partial mangles
1010  * @extack:	netlink extended ack for reporting errors
1011  *
1012  * Check @mung to find any combinations of partial mangles that can be
1013  * combined into a complete packet field edit, add that edit to @act,
1014  * and consume the partial mangles from @mung.
1015  */
1016 
1017 static int efx_tc_complete_mac_mangle(struct efx_nic *efx,
1018 				      struct efx_tc_action_set *act,
1019 				      struct efx_tc_mangler_state *mung,
1020 				      struct netlink_ext_ack *extack)
1021 {
1022 	struct efx_tc_mac_pedit_action *ped;
1023 
1024 	if (mung->dst_mac_32 && mung->dst_mac_16) {
1025 		ped = efx_tc_flower_get_mac(efx, mung->dst_mac, extack);
1026 		if (IS_ERR(ped))
1027 			return PTR_ERR(ped);
1028 
1029 		/* Check that we have not already populated dst_mac */
1030 		if (act->dst_mac)
1031 			efx_tc_flower_put_mac(efx, act->dst_mac);
1032 
1033 		act->dst_mac = ped;
1034 
1035 		/* consume the incomplete state */
1036 		mung->dst_mac_32 = 0;
1037 		mung->dst_mac_16 = 0;
1038 	}
1039 	if (mung->src_mac_16 && mung->src_mac_32) {
1040 		ped = efx_tc_flower_get_mac(efx, mung->src_mac, extack);
1041 		if (IS_ERR(ped))
1042 			return PTR_ERR(ped);
1043 
1044 		/* Check that we have not already populated src_mac */
1045 		if (act->src_mac)
1046 			efx_tc_flower_put_mac(efx, act->src_mac);
1047 
1048 		act->src_mac = ped;
1049 
1050 		/* consume the incomplete state */
1051 		mung->src_mac_32 = 0;
1052 		mung->src_mac_16 = 0;
1053 	}
1054 	return 0;
1055 }
1056 
1057 static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act,
1058 			    const struct flow_action_entry *fa,
1059 			    struct netlink_ext_ack *extack)
1060 {
1061 	switch (fa->mangle.htype) {
1062 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1063 		switch (fa->mangle.offset) {
1064 		case offsetof(struct iphdr, ttl):
1065 			/* check that pedit applies to ttl only */
1066 			if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK)
1067 				break;
1068 
1069 			/* Adding 0xff is equivalent to decrementing the ttl.
1070 			 * Other added values are not supported.
1071 			 */
1072 			if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) != U8_MAX)
1073 				break;
1074 
1075 			/* check that we do not decrement ttl twice */
1076 			if (!efx_tc_flower_action_order_ok(act,
1077 							   EFX_TC_AO_DEC_TTL)) {
1078 				NL_SET_ERR_MSG_MOD(extack, "Unsupported: multiple dec ttl");
1079 				return -EOPNOTSUPP;
1080 			}
1081 			act->do_ttl_dec = 1;
1082 			return 0;
1083 		default:
1084 			break;
1085 		}
1086 		break;
1087 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
1088 		switch (fa->mangle.offset) {
1089 		case round_down(offsetof(struct ipv6hdr, hop_limit), 4):
1090 			/* check that pedit applies to hoplimit only */
1091 			if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK)
1092 				break;
1093 
1094 			/* Adding 0xff is equivalent to decrementing the hoplimit.
1095 			 * Other added values are not supported.
1096 			 */
1097 			if ((fa->mangle.val >> 24) != U8_MAX)
1098 				break;
1099 
1100 			/* check that we do not decrement hoplimit twice */
1101 			if (!efx_tc_flower_action_order_ok(act,
1102 							   EFX_TC_AO_DEC_TTL)) {
1103 				NL_SET_ERR_MSG_MOD(extack, "Unsupported: multiple dec ttl");
1104 				return -EOPNOTSUPP;
1105 			}
1106 			act->do_ttl_dec = 1;
1107 			return 0;
1108 		default:
1109 			break;
1110 		}
1111 		break;
1112 	default:
1113 		break;
1114 	}
1115 
1116 	NL_SET_ERR_MSG_FMT_MOD(extack,
1117 			       "Unsupported: ttl add action type %x %x %x/%x",
1118 			       fa->mangle.htype, fa->mangle.offset,
1119 			       fa->mangle.val, fa->mangle.mask);
1120 	return -EOPNOTSUPP;
1121 }
1122 
1123 /**
1124  * efx_tc_mangle() - handle a single 32-bit (or less) pedit
1125  * @efx:	NIC we're installing a flow rule on
1126  * @act:	action set (cursor) to update
1127  * @fa:		FLOW_ACTION_MANGLE action metadata
1128  * @mung:	accumulator for partial mangles
1129  * @extack:	netlink extended ack for reporting errors
1130  * @match:	original match used along with the mangle action
1131  *
1132  * Identify the fields written by a FLOW_ACTION_MANGLE, and record
1133  * the partial mangle state in @mung.  If this mangle completes an
1134  * earlier partial mangle, consume and apply to @act by calling
1135  * efx_tc_complete_mac_mangle().
1136  */
1137 
1138 static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act,
1139 			 const struct flow_action_entry *fa,
1140 			 struct efx_tc_mangler_state *mung,
1141 			 struct netlink_ext_ack *extack,
1142 			 struct efx_tc_match *match)
1143 {
1144 	__le32 mac32;
1145 	__le16 mac16;
1146 	u8 tr_ttl;
1147 
1148 	switch (fa->mangle.htype) {
1149 	case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
1150 		BUILD_BUG_ON(offsetof(struct ethhdr, h_dest) != 0);
1151 		BUILD_BUG_ON(offsetof(struct ethhdr, h_source) != 6);
1152 		if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_PEDIT_MAC_ADDRS)) {
1153 			NL_SET_ERR_MSG_MOD(extack,
1154 					   "Pedit mangle mac action violates action order");
1155 			return -EOPNOTSUPP;
1156 		}
1157 		switch (fa->mangle.offset) {
1158 		case 0:
1159 			if (fa->mangle.mask) {
1160 				NL_SET_ERR_MSG_FMT_MOD(extack,
1161 						       "Unsupported: mask (%#x) of eth.dst32 mangle",
1162 						       fa->mangle.mask);
1163 				return -EOPNOTSUPP;
1164 			}
1165 			/* Ethernet address is little-endian */
1166 			mac32 = cpu_to_le32(fa->mangle.val);
1167 			memcpy(mung->dst_mac, &mac32, sizeof(mac32));
1168 			mung->dst_mac_32 = 1;
1169 			return efx_tc_complete_mac_mangle(efx, act, mung, extack);
1170 		case 4:
1171 			if (fa->mangle.mask == 0xffff) {
1172 				mac16 = cpu_to_le16(fa->mangle.val >> 16);
1173 				memcpy(mung->src_mac, &mac16, sizeof(mac16));
1174 				mung->src_mac_16 = 1;
1175 			} else if (fa->mangle.mask == 0xffff0000) {
1176 				mac16 = cpu_to_le16((u16)fa->mangle.val);
1177 				memcpy(mung->dst_mac + 4, &mac16, sizeof(mac16));
1178 				mung->dst_mac_16 = 1;
1179 			} else {
1180 				NL_SET_ERR_MSG_FMT_MOD(extack,
1181 						       "Unsupported: mask (%#x) of eth+4 mangle is not high or low 16b",
1182 						       fa->mangle.mask);
1183 				return -EOPNOTSUPP;
1184 			}
1185 			return efx_tc_complete_mac_mangle(efx, act, mung, extack);
1186 		case 8:
1187 			if (fa->mangle.mask) {
1188 				NL_SET_ERR_MSG_FMT_MOD(extack,
1189 						       "Unsupported: mask (%#x) of eth.src32 mangle",
1190 						       fa->mangle.mask);
1191 				return -EOPNOTSUPP;
1192 			}
1193 			mac32 = cpu_to_le32(fa->mangle.val);
1194 			memcpy(mung->src_mac + 2, &mac32, sizeof(mac32));
1195 			mung->src_mac_32 = 1;
1196 			return efx_tc_complete_mac_mangle(efx, act, mung, extack);
1197 		default:
1198 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported: mangle eth+%u %x/%x",
1199 					       fa->mangle.offset, fa->mangle.val, fa->mangle.mask);
1200 			return -EOPNOTSUPP;
1201 		}
1202 		break;
1203 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1204 		switch (fa->mangle.offset) {
1205 		case offsetof(struct iphdr, ttl):
1206 			/* we currently only support pedit IP4 when it applies
1207 			 * to TTL and then only when it can be achieved with a
1208 			 * decrement ttl action
1209 			 */
1210 
1211 			/* check that pedit applies to ttl only */
1212 			if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK) {
1213 				NL_SET_ERR_MSG_FMT_MOD(extack,
1214 						       "Unsupported: mask (%#x) out of range, only support mangle action on ipv4.ttl",
1215 						       fa->mangle.mask);
1216 				return -EOPNOTSUPP;
1217 			}
1218 
1219 			/* we can only convert to a dec ttl when we have an
1220 			 * exact match on the ttl field
1221 			 */
1222 			if (match->mask.ip_ttl != U8_MAX) {
1223 				NL_SET_ERR_MSG_FMT_MOD(extack,
1224 						       "Unsupported: only support mangle ipv4.ttl when we have an exact match on ttl, mask used for match (%#x)",
1225 						       match->mask.ip_ttl);
1226 				return -EOPNOTSUPP;
1227 			}
1228 
1229 			/* check that we don't try to decrement 0, which equates
1230 			 * to setting the ttl to 0xff
1231 			 */
1232 			if (match->value.ip_ttl == 0) {
1233 				NL_SET_ERR_MSG_MOD(extack,
1234 						   "Unsupported: we cannot decrement ttl past 0");
1235 				return -EOPNOTSUPP;
1236 			}
1237 
1238 			/* check that we do not decrement ttl twice */
1239 			if (!efx_tc_flower_action_order_ok(act,
1240 							   EFX_TC_AO_DEC_TTL)) {
1241 				NL_SET_ERR_MSG_MOD(extack,
1242 						   "Unsupported: multiple dec ttl");
1243 				return -EOPNOTSUPP;
1244 			}
1245 
1246 			/* check pedit can be achieved with decrement action */
1247 			tr_ttl = match->value.ip_ttl - 1;
1248 			if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) == tr_ttl) {
1249 				act->do_ttl_dec = 1;
1250 				return 0;
1251 			}
1252 
1253 			fallthrough;
1254 		default:
1255 			NL_SET_ERR_MSG_FMT_MOD(extack,
1256 					       "Unsupported: only support mangle on the ttl field (offset is %u)",
1257 					       fa->mangle.offset);
1258 			return -EOPNOTSUPP;
1259 		}
1260 		break;
1261 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
1262 		switch (fa->mangle.offset) {
1263 		case round_down(offsetof(struct ipv6hdr, hop_limit), 4):
1264 			/* we currently only support pedit IP6 when it applies
1265 			 * to the hoplimit and then only when it can be achieved
1266 			 * with a decrement hoplimit action
1267 			 */
1268 
1269 			/* check that pedit applies to ttl only */
1270 			if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK) {
1271 				NL_SET_ERR_MSG_FMT_MOD(extack,
1272 						       "Unsupported: mask (%#x) out of range, only support mangle action on ipv6.hop_limit",
1273 						       fa->mangle.mask);
1274 
1275 				return -EOPNOTSUPP;
1276 			}
1277 
1278 			/* we can only convert to a dec ttl when we have an
1279 			 * exact match on the ttl field
1280 			 */
1281 			if (match->mask.ip_ttl != U8_MAX) {
1282 				NL_SET_ERR_MSG_FMT_MOD(extack,
1283 						       "Unsupported: only support mangle ipv6.hop_limit when we have an exact match on ttl, mask used for match (%#x)",
1284 						       match->mask.ip_ttl);
1285 				return -EOPNOTSUPP;
1286 			}
1287 
1288 			/* check that we don't try to decrement 0, which equates
1289 			 * to setting the ttl to 0xff
1290 			 */
1291 			if (match->value.ip_ttl == 0) {
1292 				NL_SET_ERR_MSG_MOD(extack,
1293 						   "Unsupported: we cannot decrement hop_limit past 0");
1294 				return -EOPNOTSUPP;
1295 			}
1296 
1297 			/* check that we do not decrement hoplimit twice */
1298 			if (!efx_tc_flower_action_order_ok(act,
1299 							   EFX_TC_AO_DEC_TTL)) {
1300 				NL_SET_ERR_MSG_MOD(extack,
1301 						   "Unsupported: multiple dec ttl");
1302 				return -EOPNOTSUPP;
1303 			}
1304 
1305 			/* check pedit can be achieved with decrement action */
1306 			tr_ttl = match->value.ip_ttl - 1;
1307 			if ((fa->mangle.val >> 24) == tr_ttl) {
1308 				act->do_ttl_dec = 1;
1309 				return 0;
1310 			}
1311 
1312 			fallthrough;
1313 		default:
1314 			NL_SET_ERR_MSG_FMT_MOD(extack,
1315 					       "Unsupported: only support mangle on the hop_limit field");
1316 			return -EOPNOTSUPP;
1317 		}
1318 	default:
1319 		NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled mangle htype %u for action rule",
1320 				       fa->mangle.htype);
1321 		return -EOPNOTSUPP;
1322 	}
1323 	return 0;
1324 }
1325 
1326 /**
1327  * efx_tc_incomplete_mangle() - check for leftover partial pedits
1328  * @mung:	accumulator for partial mangles
1329  * @extack:	netlink extended ack for reporting errors
1330  *
1331  * Since the MAE can only overwrite whole fields, any partial
1332  * field mangle left over on reaching packet delivery (mirred or
1333  * end of TC actions) cannot be offloaded.  Check for any such
1334  * and reject them with -%EOPNOTSUPP.
1335  */
1336 
1337 static int efx_tc_incomplete_mangle(struct efx_tc_mangler_state *mung,
1338 				    struct netlink_ext_ack *extack)
1339 {
1340 	if (mung->dst_mac_32 || mung->dst_mac_16) {
1341 		NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of destination MAC address");
1342 		return -EOPNOTSUPP;
1343 	}
1344 	if (mung->src_mac_16 || mung->src_mac_32) {
1345 		NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of source MAC address");
1346 		return -EOPNOTSUPP;
1347 	}
1348 	return 0;
1349 }
1350 
1351 static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
1352 					 struct net_device *net_dev,
1353 					 struct flow_cls_offload *tc)
1354 {
1355 	struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
1356 	struct netlink_ext_ack *extack = tc->common.extack;
1357 	struct efx_tc_flow_rule *rule = NULL, *old = NULL;
1358 	struct efx_tc_action_set *act = NULL;
1359 	bool found = false, uplinked = false;
1360 	const struct flow_action_entry *fa;
1361 	struct efx_tc_match match;
1362 	struct efx_rep *to_efv;
1363 	s64 rc;
1364 	int i;
1365 
1366 	/* Parse match */
1367 	memset(&match, 0, sizeof(match));
1368 	rc = efx_tc_flower_parse_match(efx, fr, &match, NULL);
1369 	if (rc)
1370 		return rc;
1371 	/* The rule as given to us doesn't specify a source netdevice.
1372 	 * But, determining whether packets from a VF should match it is
1373 	 * complicated, so leave those to the software slowpath: qualify
1374 	 * the filter with source m-port == wire.
1375 	 */
1376 	rc = efx_tc_flower_external_mport(efx, EFX_EFV_PF);
1377 	if (rc < 0) {
1378 		NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port for foreign filter");
1379 		return rc;
1380 	}
1381 	match.value.ingress_port = rc;
1382 	match.mask.ingress_port = ~0;
1383 
1384 	if (tc->common.chain_index) {
1385 		struct efx_tc_recirc_id *rid;
1386 
1387 		rid = efx_tc_get_recirc_id(efx, tc->common.chain_index, net_dev);
1388 		if (IS_ERR(rid)) {
1389 			NL_SET_ERR_MSG_FMT_MOD(extack,
1390 					       "Failed to allocate a hardware recirculation ID for chain_index %u",
1391 					       tc->common.chain_index);
1392 			return PTR_ERR(rid);
1393 		}
1394 		match.rid = rid;
1395 		match.value.recirc_id = rid->fw_id;
1396 	}
1397 	match.mask.recirc_id = 0xff;
1398 
1399 	/* AR table can't match on DO_CT (+trk).  But a commonly used pattern is
1400 	 * +trk+est, which is strictly implied by +est, so rewrite it to that.
1401 	 */
1402 	if (match.mask.ct_state_trk && match.value.ct_state_trk &&
1403 	    match.mask.ct_state_est && match.value.ct_state_est)
1404 		match.mask.ct_state_trk = 0;
1405 	/* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could
1406 	 * match +trk-est (CT_HIT=0) despite being on an established connection.
1407 	 * So make -est imply -tcp_syn_fin_rst match to ensure these packets
1408 	 * still hit the software path.
1409 	 */
1410 	if (match.mask.ct_state_est && !match.value.ct_state_est) {
1411 		if (match.value.tcp_syn_fin_rst) {
1412 			/* Can't offload this combination */
1413 			rc = -EOPNOTSUPP;
1414 			goto release;
1415 		}
1416 		match.mask.tcp_syn_fin_rst = true;
1417 	}
1418 
1419 	flow_action_for_each(i, fa, &fr->action) {
1420 		switch (fa->id) {
1421 		case FLOW_ACTION_REDIRECT:
1422 		case FLOW_ACTION_MIRRED: /* mirred means mirror here */
1423 			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
1424 			if (IS_ERR(to_efv))
1425 				continue;
1426 			found = true;
1427 			break;
1428 		default:
1429 			break;
1430 		}
1431 	}
1432 	if (!found) { /* We don't care. */
1433 		netif_dbg(efx, drv, efx->net_dev,
1434 			  "Ignoring foreign filter that doesn't egdev us\n");
1435 		rc = -EOPNOTSUPP;
1436 		goto release;
1437 	}
1438 
1439 	rc = efx_mae_match_check_caps(efx, &match.mask, NULL);
1440 	if (rc)
1441 		goto release;
1442 
1443 	if (efx_tc_match_is_encap(&match.mask)) {
1444 		enum efx_encap_type type;
1445 
1446 		type = efx_tc_indr_netdev_type(net_dev);
1447 		if (type == EFX_ENCAP_TYPE_NONE) {
1448 			NL_SET_ERR_MSG_MOD(extack,
1449 					   "Egress encap match on unsupported tunnel device");
1450 			rc = -EOPNOTSUPP;
1451 			goto release;
1452 		}
1453 
1454 		rc = efx_mae_check_encap_type_supported(efx, type);
1455 		if (rc) {
1456 			NL_SET_ERR_MSG_FMT_MOD(extack,
1457 					       "Firmware reports no support for %s encap match",
1458 					       efx_tc_encap_type_name(type));
1459 			goto release;
1460 		}
1461 
1462 		rc = efx_tc_flower_record_encap_match(efx, &match, type,
1463 						      EFX_TC_EM_DIRECT, 0, 0,
1464 						      extack);
1465 		if (rc)
1466 			goto release;
1467 	} else {
1468 		/* This is not a tunnel decap rule, ignore it */
1469 		netif_dbg(efx, drv, efx->net_dev,
1470 			  "Ignoring foreign filter without encap match\n");
1471 		rc = -EOPNOTSUPP;
1472 		goto release;
1473 	}
1474 
1475 	rule = kzalloc(sizeof(*rule), GFP_USER);
1476 	if (!rule) {
1477 		rc = -ENOMEM;
1478 		goto release;
1479 	}
1480 	INIT_LIST_HEAD(&rule->acts.list);
1481 	rule->cookie = tc->cookie;
1482 	old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht,
1483 						&rule->linkage,
1484 						efx_tc_match_action_ht_params);
1485 	if (old) {
1486 		netif_dbg(efx, drv, efx->net_dev,
1487 			  "Ignoring already-offloaded rule (cookie %lx)\n",
1488 			  tc->cookie);
1489 		rc = -EEXIST;
1490 		goto release;
1491 	}
1492 
1493 	act = kzalloc(sizeof(*act), GFP_USER);
1494 	if (!act) {
1495 		rc = -ENOMEM;
1496 		goto release;
1497 	}
1498 
1499 	/* Parse actions.  For foreign rules we only support decap & redirect.
1500 	 * See corresponding code in efx_tc_flower_replace() for theory of
1501 	 * operation & how 'act' cursor is used.
1502 	 */
1503 	flow_action_for_each(i, fa, &fr->action) {
1504 		struct efx_tc_action_set save;
1505 
1506 		switch (fa->id) {
1507 		case FLOW_ACTION_REDIRECT:
1508 		case FLOW_ACTION_MIRRED:
1509 			/* See corresponding code in efx_tc_flower_replace() for
1510 			 * long explanations of what's going on here.
1511 			 */
1512 			save = *act;
1513 			if (fa->hw_stats) {
1514 				struct efx_tc_counter_index *ctr;
1515 
1516 				if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
1517 					NL_SET_ERR_MSG_FMT_MOD(extack,
1518 							       "hw_stats_type %u not supported (only 'delayed')",
1519 							       fa->hw_stats);
1520 					rc = -EOPNOTSUPP;
1521 					goto release;
1522 				}
1523 				if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) {
1524 					rc = -EOPNOTSUPP;
1525 					goto release;
1526 				}
1527 
1528 				ctr = efx_tc_flower_get_counter_index(efx,
1529 								      tc->cookie,
1530 								      EFX_TC_COUNTER_TYPE_AR);
1531 				if (IS_ERR(ctr)) {
1532 					rc = PTR_ERR(ctr);
1533 					NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
1534 					goto release;
1535 				}
1536 				act->count = ctr;
1537 				INIT_LIST_HEAD(&act->count_user);
1538 			}
1539 
1540 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) {
1541 				/* can't happen */
1542 				rc = -EOPNOTSUPP;
1543 				NL_SET_ERR_MSG_MOD(extack,
1544 						   "Deliver action violates action order (can't happen)");
1545 				goto release;
1546 			}
1547 			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
1548 			/* PF implies egdev is us, in which case we really
1549 			 * want to deliver to the uplink (because this is an
1550 			 * ingress filter).  If we don't recognise the egdev
1551 			 * at all, then we'd better trap so SW can handle it.
1552 			 */
1553 			if (IS_ERR(to_efv))
1554 				to_efv = EFX_EFV_PF;
1555 			if (to_efv == EFX_EFV_PF) {
1556 				if (uplinked)
1557 					break;
1558 				uplinked = true;
1559 			}
1560 			rc = efx_tc_flower_internal_mport(efx, to_efv);
1561 			if (rc < 0) {
1562 				NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port");
1563 				goto release;
1564 			}
1565 			act->dest_mport = rc;
1566 			act->deliver = 1;
1567 			rc = efx_mae_alloc_action_set(efx, act);
1568 			if (rc) {
1569 				NL_SET_ERR_MSG_MOD(extack,
1570 						   "Failed to write action set to hw (mirred)");
1571 				goto release;
1572 			}
1573 			list_add_tail(&act->list, &rule->acts.list);
1574 			act = NULL;
1575 			if (fa->id == FLOW_ACTION_REDIRECT)
1576 				break; /* end of the line */
1577 			/* Mirror, so continue on with saved act */
1578 			act = kzalloc(sizeof(*act), GFP_USER);
1579 			if (!act) {
1580 				rc = -ENOMEM;
1581 				goto release;
1582 			}
1583 			*act = save;
1584 			break;
1585 		case FLOW_ACTION_TUNNEL_DECAP:
1586 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DECAP)) {
1587 				rc = -EINVAL;
1588 				NL_SET_ERR_MSG_MOD(extack, "Decap action violates action order");
1589 				goto release;
1590 			}
1591 			act->decap = 1;
1592 			/* If we previously delivered/trapped to uplink, now
1593 			 * that we've decapped we'll want another copy if we
1594 			 * try to deliver/trap to uplink again.
1595 			 */
1596 			uplinked = false;
1597 			break;
1598 		default:
1599 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u",
1600 					       fa->id);
1601 			rc = -EOPNOTSUPP;
1602 			goto release;
1603 		}
1604 	}
1605 
1606 	if (act) {
1607 		if (!uplinked) {
1608 			/* Not shot/redirected, so deliver to default dest (which is
1609 			 * the uplink, as this is an ingress filter)
1610 			 */
1611 			efx_mae_mport_uplink(efx, &act->dest_mport);
1612 			act->deliver = 1;
1613 		}
1614 		rc = efx_mae_alloc_action_set(efx, act);
1615 		if (rc) {
1616 			NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)");
1617 			goto release;
1618 		}
1619 		list_add_tail(&act->list, &rule->acts.list);
1620 		act = NULL; /* Prevent double-free in error path */
1621 	}
1622 
1623 	rule->match = match;
1624 
1625 	netif_dbg(efx, drv, efx->net_dev,
1626 		  "Successfully parsed foreign filter (cookie %lx)\n",
1627 		  tc->cookie);
1628 
1629 	rc = efx_mae_alloc_action_set_list(efx, &rule->acts);
1630 	if (rc) {
1631 		NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw");
1632 		goto release;
1633 	}
1634 	rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC,
1635 				 rule->acts.fw_id, &rule->fw_id);
1636 	if (rc) {
1637 		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
1638 		goto release_acts;
1639 	}
1640 	return 0;
1641 
1642 release_acts:
1643 	efx_mae_free_action_set_list(efx, &rule->acts);
1644 release:
1645 	/* We failed to insert the rule, so free up any entries we created in
1646 	 * subsidiary tables.
1647 	 */
1648 	if (match.rid)
1649 		efx_tc_put_recirc_id(efx, match.rid);
1650 	if (act)
1651 		efx_tc_free_action_set(efx, act, false);
1652 	if (rule) {
1653 		if (!old)
1654 			rhashtable_remove_fast(&efx->tc->match_action_ht,
1655 					       &rule->linkage,
1656 					       efx_tc_match_action_ht_params);
1657 		efx_tc_free_action_set_list(efx, &rule->acts, false);
1658 	}
1659 	kfree(rule);
1660 	if (match.encap)
1661 		efx_tc_flower_release_encap_match(efx, match.encap);
1662 	return rc;
1663 }
1664 
1665 static int efx_tc_flower_replace_lhs(struct efx_nic *efx,
1666 				     struct flow_cls_offload *tc,
1667 				     struct flow_rule *fr,
1668 				     struct efx_tc_match *match,
1669 				     struct efx_rep *efv,
1670 				     struct net_device *net_dev)
1671 {
1672 	struct netlink_ext_ack *extack = tc->common.extack;
1673 	struct efx_tc_lhs_rule *rule, *old;
1674 	int rc;
1675 
1676 	if (tc->common.chain_index) {
1677 		NL_SET_ERR_MSG_MOD(extack, "LHS rule only allowed in chain 0");
1678 		return -EOPNOTSUPP;
1679 	}
1680 
1681 	if (match->mask.ct_state_trk && match->value.ct_state_trk) {
1682 		NL_SET_ERR_MSG_MOD(extack, "LHS rule can never match +trk");
1683 		return -EOPNOTSUPP;
1684 	}
1685 	/* LHS rules are always -trk, so we don't need to match on that */
1686 	match->mask.ct_state_trk = 0;
1687 	match->value.ct_state_trk = 0;
1688 
1689 	rc = efx_mae_match_check_caps_lhs(efx, &match->mask, extack);
1690 	if (rc)
1691 		return rc;
1692 
1693 	rule = kzalloc(sizeof(*rule), GFP_USER);
1694 	if (!rule)
1695 		return -ENOMEM;
1696 	rule->cookie = tc->cookie;
1697 	old = rhashtable_lookup_get_insert_fast(&efx->tc->lhs_rule_ht,
1698 						&rule->linkage,
1699 						efx_tc_lhs_rule_ht_params);
1700 	if (old) {
1701 		netif_dbg(efx, drv, efx->net_dev,
1702 			  "Already offloaded rule (cookie %lx)\n", tc->cookie);
1703 		rc = -EEXIST;
1704 		NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
1705 		goto release;
1706 	}
1707 
1708 	/* Parse actions */
1709 	/* See note in efx_tc_flower_replace() regarding passed net_dev
1710 	 * (used for efx_tc_get_recirc_id()).
1711 	 */
1712 	rc = efx_tc_flower_handle_lhs_actions(efx, tc, fr, efx->net_dev, rule);
1713 	if (rc)
1714 		goto release;
1715 
1716 	rule->match = *match;
1717 
1718 	rc = efx_mae_insert_lhs_rule(efx, rule, EFX_TC_PRIO_TC);
1719 	if (rc) {
1720 		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
1721 		goto release;
1722 	}
1723 	netif_dbg(efx, drv, efx->net_dev,
1724 		  "Successfully parsed lhs rule (cookie %lx)\n",
1725 		  tc->cookie);
1726 	return 0;
1727 
1728 release:
1729 	efx_tc_flower_release_lhs_actions(efx, &rule->lhs_act);
1730 	if (!old)
1731 		rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &rule->linkage,
1732 				       efx_tc_lhs_rule_ht_params);
1733 	kfree(rule);
1734 	return rc;
1735 }
1736 
1737 static int efx_tc_flower_replace(struct efx_nic *efx,
1738 				 struct net_device *net_dev,
1739 				 struct flow_cls_offload *tc,
1740 				 struct efx_rep *efv)
1741 {
1742 	struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
1743 	struct netlink_ext_ack *extack = tc->common.extack;
1744 	const struct ip_tunnel_info *encap_info = NULL;
1745 	struct efx_tc_flow_rule *rule = NULL, *old;
1746 	struct efx_tc_mangler_state mung = {};
1747 	struct efx_tc_action_set *act = NULL;
1748 	const struct flow_action_entry *fa;
1749 	struct efx_rep *from_efv, *to_efv;
1750 	struct efx_tc_match match;
1751 	u32 acts_id;
1752 	s64 rc;
1753 	int i;
1754 
1755 	if (!tc_can_offload_extack(efx->net_dev, extack))
1756 		return -EOPNOTSUPP;
1757 	if (WARN_ON(!efx->tc))
1758 		return -ENETDOWN;
1759 	if (WARN_ON(!efx->tc->up))
1760 		return -ENETDOWN;
1761 
1762 	from_efv = efx_tc_flower_lookup_efv(efx, net_dev);
1763 	if (IS_ERR(from_efv)) {
1764 		/* Not from our PF or representors, so probably a tunnel dev */
1765 		return efx_tc_flower_replace_foreign(efx, net_dev, tc);
1766 	}
1767 
1768 	if (efv != from_efv) {
1769 		/* can't happen */
1770 		NL_SET_ERR_MSG_FMT_MOD(extack, "for %s efv is %snull but from_efv is %snull (can't happen)",
1771 				       netdev_name(net_dev), efv ? "non-" : "",
1772 				       from_efv ? "non-" : "");
1773 		return -EINVAL;
1774 	}
1775 
1776 	/* Parse match */
1777 	memset(&match, 0, sizeof(match));
1778 	rc = efx_tc_flower_external_mport(efx, from_efv);
1779 	if (rc < 0) {
1780 		NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port");
1781 		return rc;
1782 	}
1783 	match.value.ingress_port = rc;
1784 	match.mask.ingress_port = ~0;
1785 	rc = efx_tc_flower_parse_match(efx, fr, &match, extack);
1786 	if (rc)
1787 		return rc;
1788 	if (efx_tc_match_is_encap(&match.mask)) {
1789 		NL_SET_ERR_MSG_MOD(extack, "Ingress enc_key matches not supported");
1790 		return -EOPNOTSUPP;
1791 	}
1792 
1793 	if (efx_tc_rule_is_lhs_rule(fr, &match))
1794 		return efx_tc_flower_replace_lhs(efx, tc, fr, &match, efv,
1795 						 net_dev);
1796 
1797 	/* chain_index 0 is always recirc_id 0 (and does not appear in recirc_ht).
1798 	 * Conveniently, match.rid == NULL and match.value.recirc_id == 0 owing
1799 	 * to the initial memset(), so we don't need to do anything in that case.
1800 	 */
1801 	if (tc->common.chain_index) {
1802 		struct efx_tc_recirc_id *rid;
1803 
1804 		/* Note regarding passed net_dev:
1805 		 * VFreps and PF can share chain namespace, as they have
1806 		 * distinct ingress_mports.  So we don't need to burn an
1807 		 * extra recirc_id if both use the same chain_index.
1808 		 * (Strictly speaking, we could give each VFrep its own
1809 		 * recirc_id namespace that doesn't take IDs away from the
1810 		 * PF, but that would require a bunch of additional IDAs -
1811 		 * one for each representor - and that's not likely to be
1812 		 * the main cause of recirc_id exhaustion anyway.)
1813 		 */
1814 		rid = efx_tc_get_recirc_id(efx, tc->common.chain_index,
1815 					   efx->net_dev);
1816 		if (IS_ERR(rid)) {
1817 			NL_SET_ERR_MSG_FMT_MOD(extack,
1818 					       "Failed to allocate a hardware recirculation ID for chain_index %u",
1819 					       tc->common.chain_index);
1820 			return PTR_ERR(rid);
1821 		}
1822 		match.rid = rid;
1823 		match.value.recirc_id = rid->fw_id;
1824 	}
1825 	match.mask.recirc_id = 0xff;
1826 
1827 	/* AR table can't match on DO_CT (+trk).  But a commonly used pattern is
1828 	 * +trk+est, which is strictly implied by +est, so rewrite it to that.
1829 	 */
1830 	if (match.mask.ct_state_trk && match.value.ct_state_trk &&
1831 	    match.mask.ct_state_est && match.value.ct_state_est)
1832 		match.mask.ct_state_trk = 0;
1833 	/* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could
1834 	 * match +trk-est (CT_HIT=0) despite being on an established connection.
1835 	 * So make -est imply -tcp_syn_fin_rst match to ensure these packets
1836 	 * still hit the software path.
1837 	 */
1838 	if (match.mask.ct_state_est && !match.value.ct_state_est) {
1839 		if (match.value.tcp_syn_fin_rst) {
1840 			/* Can't offload this combination */
1841 			rc = -EOPNOTSUPP;
1842 			goto release;
1843 		}
1844 		match.mask.tcp_syn_fin_rst = true;
1845 	}
1846 
1847 	rc = efx_mae_match_check_caps(efx, &match.mask, extack);
1848 	if (rc)
1849 		goto release;
1850 
1851 	rule = kzalloc(sizeof(*rule), GFP_USER);
1852 	if (!rule) {
1853 		rc = -ENOMEM;
1854 		goto release;
1855 	}
1856 	INIT_LIST_HEAD(&rule->acts.list);
1857 	rule->cookie = tc->cookie;
1858 	old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht,
1859 						&rule->linkage,
1860 						efx_tc_match_action_ht_params);
1861 	if (old) {
1862 		netif_dbg(efx, drv, efx->net_dev,
1863 			  "Already offloaded rule (cookie %lx)\n", tc->cookie);
1864 		NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
1865 		rc = -EEXIST;
1866 		goto release;
1867 	}
1868 
1869 	/* Parse actions */
1870 	act = kzalloc(sizeof(*act), GFP_USER);
1871 	if (!act) {
1872 		rc = -ENOMEM;
1873 		goto release;
1874 	}
1875 
1876 	/**
1877 	 * DOC: TC action translation
1878 	 *
1879 	 * Actions in TC are sequential and cumulative, with delivery actions
1880 	 * potentially anywhere in the order.  The EF100 MAE, however, takes
1881 	 * an 'action set list' consisting of 'action sets', each of which is
1882 	 * applied to the _original_ packet, and consists of a set of optional
1883 	 * actions in a fixed order with delivery at the end.
1884 	 * To translate between these two models, we maintain a 'cursor', @act,
1885 	 * which describes the cumulative effect of all the packet-mutating
1886 	 * actions encountered so far; on handling a delivery (mirred or drop)
1887 	 * action, once the action-set has been inserted into hardware, we
1888 	 * append @act to the action-set list (@rule->acts); if this is a pipe
1889 	 * action (mirred mirror) we then allocate a new @act with a copy of
1890 	 * the cursor state _before_ the delivery action, otherwise we set @act
1891 	 * to %NULL.
1892 	 * This ensures that every allocated action-set is either attached to
1893 	 * @rule->acts or pointed to by @act (and never both), and that only
1894 	 * those action-sets in @rule->acts exist in hardware.  Consequently,
1895 	 * in the failure path, @act only needs to be freed in memory, whereas
1896 	 * for @rule->acts we remove each action-set from hardware before
1897 	 * freeing it (efx_tc_free_action_set_list()), even if the action-set
1898 	 * list itself is not in hardware.
1899 	 */
1900 	flow_action_for_each(i, fa, &fr->action) {
1901 		struct efx_tc_action_set save;
1902 		u16 tci;
1903 
1904 		if (!act) {
1905 			/* more actions after a non-pipe action */
1906 			NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action");
1907 			rc = -EINVAL;
1908 			goto release;
1909 		}
1910 
1911 		if ((fa->id == FLOW_ACTION_REDIRECT ||
1912 		     fa->id == FLOW_ACTION_MIRRED ||
1913 		     fa->id == FLOW_ACTION_DROP) && fa->hw_stats) {
1914 			struct efx_tc_counter_index *ctr;
1915 
1916 			/* Currently the only actions that want stats are
1917 			 * mirred and gact (ok, shot, trap, goto-chain), which
1918 			 * means we want stats just before delivery.  Also,
1919 			 * note that tunnel_key set shouldn't change the length
1920 			 * — it's only the subsequent mirred that does that,
1921 			 * and the stats are taken _before_ the mirred action
1922 			 * happens.
1923 			 */
1924 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) {
1925 				/* All supported actions that count either steal
1926 				 * (gact shot, mirred redirect) or clone act
1927 				 * (mirred mirror), so we should never get two
1928 				 * count actions on one action_set.
1929 				 */
1930 				NL_SET_ERR_MSG_MOD(extack, "Count-action conflict (can't happen)");
1931 				rc = -EOPNOTSUPP;
1932 				goto release;
1933 			}
1934 
1935 			if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
1936 				NL_SET_ERR_MSG_FMT_MOD(extack, "hw_stats_type %u not supported (only 'delayed')",
1937 						       fa->hw_stats);
1938 				rc = -EOPNOTSUPP;
1939 				goto release;
1940 			}
1941 
1942 			ctr = efx_tc_flower_get_counter_index(efx, tc->cookie,
1943 							      EFX_TC_COUNTER_TYPE_AR);
1944 			if (IS_ERR(ctr)) {
1945 				rc = PTR_ERR(ctr);
1946 				NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
1947 				goto release;
1948 			}
1949 			act->count = ctr;
1950 			INIT_LIST_HEAD(&act->count_user);
1951 		}
1952 
1953 		switch (fa->id) {
1954 		case FLOW_ACTION_DROP:
1955 			rc = efx_mae_alloc_action_set(efx, act);
1956 			if (rc) {
1957 				NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (drop)");
1958 				goto release;
1959 			}
1960 			list_add_tail(&act->list, &rule->acts.list);
1961 			act = NULL; /* end of the line */
1962 			break;
1963 		case FLOW_ACTION_REDIRECT:
1964 		case FLOW_ACTION_MIRRED:
1965 			save = *act;
1966 
1967 			if (encap_info) {
1968 				struct efx_tc_encap_action *encap;
1969 
1970 				if (!efx_tc_flower_action_order_ok(act,
1971 								   EFX_TC_AO_ENCAP)) {
1972 					rc = -EOPNOTSUPP;
1973 					NL_SET_ERR_MSG_MOD(extack, "Encap action violates action order");
1974 					goto release;
1975 				}
1976 				encap = efx_tc_flower_create_encap_md(
1977 						efx, encap_info, fa->dev, extack);
1978 				if (IS_ERR_OR_NULL(encap)) {
1979 					rc = PTR_ERR(encap);
1980 					if (!rc)
1981 						rc = -EIO; /* arbitrary */
1982 					goto release;
1983 				}
1984 				act->encap_md = encap;
1985 				list_add_tail(&act->encap_user, &encap->users);
1986 				act->dest_mport = encap->dest_mport;
1987 				act->deliver = 1;
1988 				if (act->count && !WARN_ON(!act->count->cnt)) {
1989 					/* This counter is used by an encap
1990 					 * action, which needs a reference back
1991 					 * so it can prod neighbouring whenever
1992 					 * traffic is seen.
1993 					 */
1994 					spin_lock_bh(&act->count->cnt->lock);
1995 					list_add_tail(&act->count_user,
1996 						      &act->count->cnt->users);
1997 					spin_unlock_bh(&act->count->cnt->lock);
1998 				}
1999 				rc = efx_mae_alloc_action_set(efx, act);
2000 				if (rc) {
2001 					NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (encap)");
2002 					goto release;
2003 				}
2004 				list_add_tail(&act->list, &rule->acts.list);
2005 				act->user = &rule->acts;
2006 				act = NULL;
2007 				if (fa->id == FLOW_ACTION_REDIRECT)
2008 					break; /* end of the line */
2009 				/* Mirror, so continue on with saved act */
2010 				save.count = NULL;
2011 				act = kzalloc(sizeof(*act), GFP_USER);
2012 				if (!act) {
2013 					rc = -ENOMEM;
2014 					goto release;
2015 				}
2016 				*act = save;
2017 				break;
2018 			}
2019 
2020 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) {
2021 				/* can't happen */
2022 				rc = -EOPNOTSUPP;
2023 				NL_SET_ERR_MSG_MOD(extack, "Deliver action violates action order (can't happen)");
2024 				goto release;
2025 			}
2026 
2027 			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
2028 			if (IS_ERR(to_efv)) {
2029 				NL_SET_ERR_MSG_MOD(extack, "Mirred egress device not on switch");
2030 				rc = PTR_ERR(to_efv);
2031 				goto release;
2032 			}
2033 			rc = efx_tc_flower_external_mport(efx, to_efv);
2034 			if (rc < 0) {
2035 				NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port");
2036 				goto release;
2037 			}
2038 			act->dest_mport = rc;
2039 			act->deliver = 1;
2040 			rc = efx_mae_alloc_action_set(efx, act);
2041 			if (rc) {
2042 				NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (mirred)");
2043 				goto release;
2044 			}
2045 			list_add_tail(&act->list, &rule->acts.list);
2046 			act = NULL;
2047 			if (fa->id == FLOW_ACTION_REDIRECT)
2048 				break; /* end of the line */
2049 			/* Mirror, so continue on with saved act */
2050 			save.count = NULL;
2051 			act = kzalloc(sizeof(*act), GFP_USER);
2052 			if (!act) {
2053 				rc = -ENOMEM;
2054 				goto release;
2055 			}
2056 			*act = save;
2057 			break;
2058 		case FLOW_ACTION_VLAN_POP:
2059 			if (act->vlan_push) {
2060 				act->vlan_push--;
2061 			} else if (efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_POP)) {
2062 				act->vlan_pop++;
2063 			} else {
2064 				NL_SET_ERR_MSG_MOD(extack,
2065 						   "More than two VLAN pops, or action order violated");
2066 				rc = -EINVAL;
2067 				goto release;
2068 			}
2069 			break;
2070 		case FLOW_ACTION_VLAN_PUSH:
2071 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_PUSH)) {
2072 				rc = -EINVAL;
2073 				NL_SET_ERR_MSG_MOD(extack,
2074 						   "More than two VLAN pushes, or action order violated");
2075 				goto release;
2076 			}
2077 			tci = fa->vlan.vid & VLAN_VID_MASK;
2078 			tci |= fa->vlan.prio << VLAN_PRIO_SHIFT;
2079 			act->vlan_tci[act->vlan_push] = cpu_to_be16(tci);
2080 			act->vlan_proto[act->vlan_push] = fa->vlan.proto;
2081 			act->vlan_push++;
2082 			break;
2083 		case FLOW_ACTION_ADD:
2084 			rc = efx_tc_pedit_add(efx, act, fa, extack);
2085 			if (rc < 0)
2086 				goto release;
2087 			break;
2088 		case FLOW_ACTION_MANGLE:
2089 			rc = efx_tc_mangle(efx, act, fa, &mung, extack, &match);
2090 			if (rc < 0)
2091 				goto release;
2092 			break;
2093 		case FLOW_ACTION_TUNNEL_ENCAP:
2094 			if (encap_info) {
2095 				/* Can't specify encap multiple times.
2096 				 * If you want to overwrite an existing
2097 				 * encap_info, use an intervening
2098 				 * FLOW_ACTION_TUNNEL_DECAP to clear it.
2099 				 */
2100 				NL_SET_ERR_MSG_MOD(extack, "Tunnel key set when already set");
2101 				rc = -EINVAL;
2102 				goto release;
2103 			}
2104 			if (!fa->tunnel) {
2105 				NL_SET_ERR_MSG_MOD(extack, "Tunnel key set is missing key");
2106 				rc = -EOPNOTSUPP;
2107 				goto release;
2108 			}
2109 			encap_info = fa->tunnel;
2110 			break;
2111 		case FLOW_ACTION_TUNNEL_DECAP:
2112 			if (encap_info) {
2113 				encap_info = NULL;
2114 				break;
2115 			}
2116 			/* Since we don't support enc_key matches on ingress
2117 			 * (and if we did there'd be no tunnel-device to give
2118 			 * us a type), we can't offload a decap that's not
2119 			 * just undoing a previous encap action.
2120 			 */
2121 			NL_SET_ERR_MSG_MOD(extack, "Cannot offload tunnel decap action without tunnel device");
2122 			rc = -EOPNOTSUPP;
2123 			goto release;
2124 		default:
2125 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u",
2126 					       fa->id);
2127 			rc = -EOPNOTSUPP;
2128 			goto release;
2129 		}
2130 	}
2131 
2132 	rc = efx_tc_incomplete_mangle(&mung, extack);
2133 	if (rc < 0)
2134 		goto release;
2135 	if (act) {
2136 		/* Not shot/redirected, so deliver to default dest */
2137 		if (from_efv == EFX_EFV_PF)
2138 			/* Rule applies to traffic from the wire,
2139 			 * and default dest is thus the PF
2140 			 */
2141 			efx_mae_mport_uplink(efx, &act->dest_mport);
2142 		else
2143 			/* Representor, so rule applies to traffic from
2144 			 * representee, and default dest is thus the rep.
2145 			 * All reps use the same mport for delivery
2146 			 */
2147 			efx_mae_mport_mport(efx, efx->tc->reps_mport_id,
2148 					    &act->dest_mport);
2149 		act->deliver = 1;
2150 		rc = efx_mae_alloc_action_set(efx, act);
2151 		if (rc) {
2152 			NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)");
2153 			goto release;
2154 		}
2155 		list_add_tail(&act->list, &rule->acts.list);
2156 		act = NULL; /* Prevent double-free in error path */
2157 	}
2158 
2159 	netif_dbg(efx, drv, efx->net_dev,
2160 		  "Successfully parsed filter (cookie %lx)\n",
2161 		  tc->cookie);
2162 
2163 	rule->match = match;
2164 
2165 	rc = efx_mae_alloc_action_set_list(efx, &rule->acts);
2166 	if (rc) {
2167 		NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw");
2168 		goto release;
2169 	}
2170 	if (from_efv == EFX_EFV_PF)
2171 		/* PF netdev, so rule applies to traffic from wire */
2172 		rule->fallback = &efx->tc->facts.pf;
2173 	else
2174 		/* repdev, so rule applies to traffic from representee */
2175 		rule->fallback = &efx->tc->facts.reps;
2176 	if (!efx_tc_check_ready(efx, rule)) {
2177 		netif_dbg(efx, drv, efx->net_dev, "action not ready for hw\n");
2178 		acts_id = rule->fallback->fw_id;
2179 	} else {
2180 		netif_dbg(efx, drv, efx->net_dev, "ready for hw\n");
2181 		acts_id = rule->acts.fw_id;
2182 	}
2183 	rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC,
2184 				 acts_id, &rule->fw_id);
2185 	if (rc) {
2186 		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
2187 		goto release_acts;
2188 	}
2189 	return 0;
2190 
2191 release_acts:
2192 	efx_mae_free_action_set_list(efx, &rule->acts);
2193 release:
2194 	/* We failed to insert the rule, so free up any entries we created in
2195 	 * subsidiary tables.
2196 	 */
2197 	if (match.rid)
2198 		efx_tc_put_recirc_id(efx, match.rid);
2199 	if (act)
2200 		efx_tc_free_action_set(efx, act, false);
2201 	if (rule) {
2202 		if (!old)
2203 			rhashtable_remove_fast(&efx->tc->match_action_ht,
2204 					       &rule->linkage,
2205 					       efx_tc_match_action_ht_params);
2206 		efx_tc_free_action_set_list(efx, &rule->acts, false);
2207 	}
2208 	kfree(rule);
2209 	return rc;
2210 }
2211 
2212 static int efx_tc_flower_destroy(struct efx_nic *efx,
2213 				 struct net_device *net_dev,
2214 				 struct flow_cls_offload *tc)
2215 {
2216 	struct netlink_ext_ack *extack = tc->common.extack;
2217 	struct efx_tc_lhs_rule *lhs_rule;
2218 	struct efx_tc_flow_rule *rule;
2219 
2220 	lhs_rule = rhashtable_lookup_fast(&efx->tc->lhs_rule_ht, &tc->cookie,
2221 					  efx_tc_lhs_rule_ht_params);
2222 	if (lhs_rule) {
2223 		/* Remove it from HW */
2224 		efx_mae_remove_lhs_rule(efx, lhs_rule);
2225 		/* Delete it from SW */
2226 		efx_tc_flower_release_lhs_actions(efx, &lhs_rule->lhs_act);
2227 		rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &lhs_rule->linkage,
2228 				       efx_tc_lhs_rule_ht_params);
2229 		if (lhs_rule->match.encap)
2230 			efx_tc_flower_release_encap_match(efx, lhs_rule->match.encap);
2231 		netif_dbg(efx, drv, efx->net_dev, "Removed (lhs) filter %lx\n",
2232 			  lhs_rule->cookie);
2233 		kfree(lhs_rule);
2234 		return 0;
2235 	}
2236 
2237 	rule = rhashtable_lookup_fast(&efx->tc->match_action_ht, &tc->cookie,
2238 				      efx_tc_match_action_ht_params);
2239 	if (!rule) {
2240 		/* Only log a message if we're the ingress device.  Otherwise
2241 		 * it's a foreign filter and we might just not have been
2242 		 * interested (e.g. we might not have been the egress device
2243 		 * either).
2244 		 */
2245 		if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev)))
2246 			netif_warn(efx, drv, efx->net_dev,
2247 				   "Filter %lx not found to remove\n", tc->cookie);
2248 		NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules");
2249 		return -ENOENT;
2250 	}
2251 
2252 	/* Remove it from HW */
2253 	efx_tc_delete_rule(efx, rule);
2254 	/* Delete it from SW */
2255 	rhashtable_remove_fast(&efx->tc->match_action_ht, &rule->linkage,
2256 			       efx_tc_match_action_ht_params);
2257 	netif_dbg(efx, drv, efx->net_dev, "Removed filter %lx\n", rule->cookie);
2258 	kfree(rule);
2259 	return 0;
2260 }
2261 
2262 static int efx_tc_flower_stats(struct efx_nic *efx, struct net_device *net_dev,
2263 			       struct flow_cls_offload *tc)
2264 {
2265 	struct netlink_ext_ack *extack = tc->common.extack;
2266 	struct efx_tc_counter_index *ctr;
2267 	struct efx_tc_counter *cnt;
2268 	u64 packets, bytes;
2269 
2270 	ctr = efx_tc_flower_find_counter_index(efx, tc->cookie);
2271 	if (!ctr) {
2272 		/* See comment in efx_tc_flower_destroy() */
2273 		if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev)))
2274 			if (net_ratelimit())
2275 				netif_warn(efx, drv, efx->net_dev,
2276 					   "Filter %lx not found for stats\n",
2277 					   tc->cookie);
2278 		NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules");
2279 		return -ENOENT;
2280 	}
2281 	if (WARN_ON(!ctr->cnt)) /* can't happen */
2282 		return -EIO;
2283 	cnt = ctr->cnt;
2284 
2285 	spin_lock_bh(&cnt->lock);
2286 	/* Report only new pkts/bytes since last time TC asked */
2287 	packets = cnt->packets;
2288 	bytes = cnt->bytes;
2289 	flow_stats_update(&tc->stats, bytes - cnt->old_bytes,
2290 			  packets - cnt->old_packets, 0, cnt->touched,
2291 			  FLOW_ACTION_HW_STATS_DELAYED);
2292 	cnt->old_packets = packets;
2293 	cnt->old_bytes = bytes;
2294 	spin_unlock_bh(&cnt->lock);
2295 	return 0;
2296 }
2297 
2298 int efx_tc_flower(struct efx_nic *efx, struct net_device *net_dev,
2299 		  struct flow_cls_offload *tc, struct efx_rep *efv)
2300 {
2301 	int rc;
2302 
2303 	if (!efx->tc)
2304 		return -EOPNOTSUPP;
2305 
2306 	mutex_lock(&efx->tc->mutex);
2307 	switch (tc->command) {
2308 	case FLOW_CLS_REPLACE:
2309 		rc = efx_tc_flower_replace(efx, net_dev, tc, efv);
2310 		break;
2311 	case FLOW_CLS_DESTROY:
2312 		rc = efx_tc_flower_destroy(efx, net_dev, tc);
2313 		break;
2314 	case FLOW_CLS_STATS:
2315 		rc = efx_tc_flower_stats(efx, net_dev, tc);
2316 		break;
2317 	default:
2318 		rc = -EOPNOTSUPP;
2319 		break;
2320 	}
2321 	mutex_unlock(&efx->tc->mutex);
2322 	return rc;
2323 }
2324 
2325 static int efx_tc_configure_default_rule(struct efx_nic *efx, u32 ing_port,
2326 					 u32 eg_port, struct efx_tc_flow_rule *rule)
2327 {
2328 	struct efx_tc_action_set_list *acts = &rule->acts;
2329 	struct efx_tc_match *match = &rule->match;
2330 	struct efx_tc_action_set *act;
2331 	int rc;
2332 
2333 	match->value.ingress_port = ing_port;
2334 	match->mask.ingress_port = ~0;
2335 	act = kzalloc(sizeof(*act), GFP_KERNEL);
2336 	if (!act)
2337 		return -ENOMEM;
2338 	act->deliver = 1;
2339 	act->dest_mport = eg_port;
2340 	rc = efx_mae_alloc_action_set(efx, act);
2341 	if (rc)
2342 		goto fail1;
2343 	EFX_WARN_ON_PARANOID(!list_empty(&acts->list));
2344 	list_add_tail(&act->list, &acts->list);
2345 	rc = efx_mae_alloc_action_set_list(efx, acts);
2346 	if (rc)
2347 		goto fail2;
2348 	rc = efx_mae_insert_rule(efx, match, EFX_TC_PRIO_DFLT,
2349 				 acts->fw_id, &rule->fw_id);
2350 	if (rc)
2351 		goto fail3;
2352 	return 0;
2353 fail3:
2354 	efx_mae_free_action_set_list(efx, acts);
2355 fail2:
2356 	list_del(&act->list);
2357 	efx_mae_free_action_set(efx, act->fw_id);
2358 fail1:
2359 	kfree(act);
2360 	return rc;
2361 }
2362 
2363 static int efx_tc_configure_default_rule_pf(struct efx_nic *efx)
2364 {
2365 	struct efx_tc_flow_rule *rule = &efx->tc->dflt.pf;
2366 	u32 ing_port, eg_port;
2367 
2368 	efx_mae_mport_uplink(efx, &ing_port);
2369 	efx_mae_mport_wire(efx, &eg_port);
2370 	return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
2371 }
2372 
2373 static int efx_tc_configure_default_rule_wire(struct efx_nic *efx)
2374 {
2375 	struct efx_tc_flow_rule *rule = &efx->tc->dflt.wire;
2376 	u32 ing_port, eg_port;
2377 
2378 	efx_mae_mport_wire(efx, &ing_port);
2379 	efx_mae_mport_uplink(efx, &eg_port);
2380 	return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
2381 }
2382 
2383 int efx_tc_configure_default_rule_rep(struct efx_rep *efv)
2384 {
2385 	struct efx_tc_flow_rule *rule = &efv->dflt;
2386 	struct efx_nic *efx = efv->parent;
2387 	u32 ing_port, eg_port;
2388 
2389 	efx_mae_mport_mport(efx, efv->mport, &ing_port);
2390 	efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port);
2391 	return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
2392 }
2393 
2394 void efx_tc_deconfigure_default_rule(struct efx_nic *efx,
2395 				     struct efx_tc_flow_rule *rule)
2396 {
2397 	if (rule->fw_id != MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL)
2398 		efx_tc_delete_rule(efx, rule);
2399 	rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
2400 }
2401 
2402 static int efx_tc_configure_fallback_acts(struct efx_nic *efx, u32 eg_port,
2403 					  struct efx_tc_action_set_list *acts)
2404 {
2405 	struct efx_tc_action_set *act;
2406 	int rc;
2407 
2408 	act = kzalloc(sizeof(*act), GFP_KERNEL);
2409 	if (!act)
2410 		return -ENOMEM;
2411 	act->deliver = 1;
2412 	act->dest_mport = eg_port;
2413 	rc = efx_mae_alloc_action_set(efx, act);
2414 	if (rc)
2415 		goto fail1;
2416 	EFX_WARN_ON_PARANOID(!list_empty(&acts->list));
2417 	list_add_tail(&act->list, &acts->list);
2418 	rc = efx_mae_alloc_action_set_list(efx, acts);
2419 	if (rc)
2420 		goto fail2;
2421 	return 0;
2422 fail2:
2423 	list_del(&act->list);
2424 	efx_mae_free_action_set(efx, act->fw_id);
2425 fail1:
2426 	kfree(act);
2427 	return rc;
2428 }
2429 
2430 static int efx_tc_configure_fallback_acts_pf(struct efx_nic *efx)
2431 {
2432 	struct efx_tc_action_set_list *acts = &efx->tc->facts.pf;
2433 	u32 eg_port;
2434 
2435 	efx_mae_mport_uplink(efx, &eg_port);
2436 	return efx_tc_configure_fallback_acts(efx, eg_port, acts);
2437 }
2438 
2439 static int efx_tc_configure_fallback_acts_reps(struct efx_nic *efx)
2440 {
2441 	struct efx_tc_action_set_list *acts = &efx->tc->facts.reps;
2442 	u32 eg_port;
2443 
2444 	efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port);
2445 	return efx_tc_configure_fallback_acts(efx, eg_port, acts);
2446 }
2447 
2448 static void efx_tc_deconfigure_fallback_acts(struct efx_nic *efx,
2449 					     struct efx_tc_action_set_list *acts)
2450 {
2451 	efx_tc_free_action_set_list(efx, acts, true);
2452 }
2453 
2454 static int efx_tc_configure_rep_mport(struct efx_nic *efx)
2455 {
2456 	u32 rep_mport_label;
2457 	int rc;
2458 
2459 	rc = efx_mae_allocate_mport(efx, &efx->tc->reps_mport_id, &rep_mport_label);
2460 	if (rc)
2461 		return rc;
2462 	pci_dbg(efx->pci_dev, "created rep mport 0x%08x (0x%04x)\n",
2463 		efx->tc->reps_mport_id, rep_mport_label);
2464 	/* Use mport *selector* as vport ID */
2465 	efx_mae_mport_mport(efx, efx->tc->reps_mport_id,
2466 			    &efx->tc->reps_mport_vport_id);
2467 	return 0;
2468 }
2469 
2470 static void efx_tc_deconfigure_rep_mport(struct efx_nic *efx)
2471 {
2472 	efx_mae_free_mport(efx, efx->tc->reps_mport_id);
2473 	efx->tc->reps_mport_id = MAE_MPORT_SELECTOR_NULL;
2474 }
2475 
2476 int efx_tc_insert_rep_filters(struct efx_nic *efx)
2477 {
2478 	struct efx_filter_spec promisc, allmulti;
2479 	int rc;
2480 
2481 	if (efx->type->is_vf)
2482 		return 0;
2483 	if (!efx->tc)
2484 		return 0;
2485 	efx_filter_init_rx(&promisc, EFX_FILTER_PRI_REQUIRED, 0, 0);
2486 	efx_filter_set_uc_def(&promisc);
2487 	efx_filter_set_vport_id(&promisc, efx->tc->reps_mport_vport_id);
2488 	rc = efx_filter_insert_filter(efx, &promisc, false);
2489 	if (rc < 0)
2490 		return rc;
2491 	efx->tc->reps_filter_uc = rc;
2492 	efx_filter_init_rx(&allmulti, EFX_FILTER_PRI_REQUIRED, 0, 0);
2493 	efx_filter_set_mc_def(&allmulti);
2494 	efx_filter_set_vport_id(&allmulti, efx->tc->reps_mport_vport_id);
2495 	rc = efx_filter_insert_filter(efx, &allmulti, false);
2496 	if (rc < 0)
2497 		return rc;
2498 	efx->tc->reps_filter_mc = rc;
2499 	return 0;
2500 }
2501 
2502 void efx_tc_remove_rep_filters(struct efx_nic *efx)
2503 {
2504 	if (efx->type->is_vf)
2505 		return;
2506 	if (!efx->tc)
2507 		return;
2508 	if (efx->tc->reps_filter_mc >= 0)
2509 		efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_mc);
2510 	efx->tc->reps_filter_mc = -1;
2511 	if (efx->tc->reps_filter_uc >= 0)
2512 		efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_uc);
2513 	efx->tc->reps_filter_uc = -1;
2514 }
2515 
2516 int efx_init_tc(struct efx_nic *efx)
2517 {
2518 	int rc;
2519 
2520 	rc = efx_mae_get_caps(efx, efx->tc->caps);
2521 	if (rc)
2522 		return rc;
2523 	if (efx->tc->caps->match_field_count > MAE_NUM_FIELDS)
2524 		/* Firmware supports some match fields the driver doesn't know
2525 		 * about.  Not fatal, unless any of those fields are required
2526 		 * (MAE_FIELD_SUPPORTED_MATCH_ALWAYS) but if so we don't know.
2527 		 */
2528 		netif_warn(efx, probe, efx->net_dev,
2529 			   "FW reports additional match fields %u\n",
2530 			   efx->tc->caps->match_field_count);
2531 	if (efx->tc->caps->action_prios < EFX_TC_PRIO__NUM) {
2532 		netif_err(efx, probe, efx->net_dev,
2533 			  "Too few action prios supported (have %u, need %u)\n",
2534 			  efx->tc->caps->action_prios, EFX_TC_PRIO__NUM);
2535 		return -EIO;
2536 	}
2537 	rc = efx_tc_configure_default_rule_pf(efx);
2538 	if (rc)
2539 		return rc;
2540 	rc = efx_tc_configure_default_rule_wire(efx);
2541 	if (rc)
2542 		return rc;
2543 	rc = efx_tc_configure_rep_mport(efx);
2544 	if (rc)
2545 		return rc;
2546 	rc = efx_tc_configure_fallback_acts_pf(efx);
2547 	if (rc)
2548 		return rc;
2549 	rc = efx_tc_configure_fallback_acts_reps(efx);
2550 	if (rc)
2551 		return rc;
2552 	rc = efx_mae_get_tables(efx);
2553 	if (rc)
2554 		return rc;
2555 	rc = flow_indr_dev_register(efx_tc_indr_setup_cb, efx);
2556 	if (rc)
2557 		goto out_free;
2558 	efx->tc->up = true;
2559 	return 0;
2560 out_free:
2561 	efx_mae_free_tables(efx);
2562 	return rc;
2563 }
2564 
2565 void efx_fini_tc(struct efx_nic *efx)
2566 {
2567 	/* We can get called even if efx_init_struct_tc() failed */
2568 	if (!efx->tc)
2569 		return;
2570 	if (efx->tc->up)
2571 		flow_indr_dev_unregister(efx_tc_indr_setup_cb, efx, efx_tc_block_unbind);
2572 	efx_tc_deconfigure_rep_mport(efx);
2573 	efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.pf);
2574 	efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.wire);
2575 	efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.pf);
2576 	efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.reps);
2577 	efx->tc->up = false;
2578 	efx_mae_free_tables(efx);
2579 }
2580 
2581 /* At teardown time, all TC filter rules (and thus all resources they created)
2582  * should already have been removed.  If we find any in our hashtables, make a
2583  * cursory attempt to clean up the software side.
2584  */
2585 static void efx_tc_encap_match_free(void *ptr, void *__unused)
2586 {
2587 	struct efx_tc_encap_match *encap = ptr;
2588 
2589 	WARN_ON(refcount_read(&encap->ref));
2590 	kfree(encap);
2591 }
2592 
2593 static void efx_tc_recirc_free(void *ptr, void *arg)
2594 {
2595 	struct efx_tc_recirc_id *rid = ptr;
2596 	struct efx_nic *efx = arg;
2597 
2598 	WARN_ON(refcount_read(&rid->ref));
2599 	ida_free(&efx->tc->recirc_ida, rid->fw_id);
2600 	kfree(rid);
2601 }
2602 
2603 static void efx_tc_lhs_free(void *ptr, void *arg)
2604 {
2605 	struct efx_tc_lhs_rule *rule = ptr;
2606 	struct efx_nic *efx = arg;
2607 
2608 	netif_err(efx, drv, efx->net_dev,
2609 		  "tc lhs_rule %lx still present at teardown, removing\n",
2610 		  rule->cookie);
2611 
2612 	if (rule->lhs_act.zone)
2613 		efx_tc_ct_unregister_zone(efx, rule->lhs_act.zone);
2614 	if (rule->lhs_act.count)
2615 		efx_tc_flower_put_counter_index(efx, rule->lhs_act.count);
2616 	efx_mae_remove_lhs_rule(efx, rule);
2617 
2618 	kfree(rule);
2619 }
2620 
2621 static void efx_tc_mac_free(void *ptr, void *__unused)
2622 {
2623 	struct efx_tc_mac_pedit_action *ped = ptr;
2624 
2625 	WARN_ON(refcount_read(&ped->ref));
2626 	kfree(ped);
2627 }
2628 
2629 static void efx_tc_flow_free(void *ptr, void *arg)
2630 {
2631 	struct efx_tc_flow_rule *rule = ptr;
2632 	struct efx_nic *efx = arg;
2633 
2634 	netif_err(efx, drv, efx->net_dev,
2635 		  "tc rule %lx still present at teardown, removing\n",
2636 		  rule->cookie);
2637 
2638 	/* Also releases entries in subsidiary tables */
2639 	efx_tc_delete_rule(efx, rule);
2640 
2641 	kfree(rule);
2642 }
2643 
2644 int efx_init_struct_tc(struct efx_nic *efx)
2645 {
2646 	int rc;
2647 
2648 	if (efx->type->is_vf)
2649 		return 0;
2650 
2651 	efx->tc = kzalloc(sizeof(*efx->tc), GFP_KERNEL);
2652 	if (!efx->tc)
2653 		return -ENOMEM;
2654 	efx->tc->caps = kzalloc(sizeof(struct mae_caps), GFP_KERNEL);
2655 	if (!efx->tc->caps) {
2656 		rc = -ENOMEM;
2657 		goto fail_alloc_caps;
2658 	}
2659 	INIT_LIST_HEAD(&efx->tc->block_list);
2660 
2661 	mutex_init(&efx->tc->mutex);
2662 	init_waitqueue_head(&efx->tc->flush_wq);
2663 	rc = efx_tc_init_encap_actions(efx);
2664 	if (rc < 0)
2665 		goto fail_encap_actions;
2666 	rc = efx_tc_init_counters(efx);
2667 	if (rc < 0)
2668 		goto fail_counters;
2669 	rc = rhashtable_init(&efx->tc->mac_ht, &efx_tc_mac_ht_params);
2670 	if (rc < 0)
2671 		goto fail_mac_ht;
2672 	rc = rhashtable_init(&efx->tc->encap_match_ht, &efx_tc_encap_match_ht_params);
2673 	if (rc < 0)
2674 		goto fail_encap_match_ht;
2675 	rc = rhashtable_init(&efx->tc->match_action_ht, &efx_tc_match_action_ht_params);
2676 	if (rc < 0)
2677 		goto fail_match_action_ht;
2678 	rc = rhashtable_init(&efx->tc->lhs_rule_ht, &efx_tc_lhs_rule_ht_params);
2679 	if (rc < 0)
2680 		goto fail_lhs_rule_ht;
2681 	rc = efx_tc_init_conntrack(efx);
2682 	if (rc < 0)
2683 		goto fail_conntrack;
2684 	rc = rhashtable_init(&efx->tc->recirc_ht, &efx_tc_recirc_ht_params);
2685 	if (rc < 0)
2686 		goto fail_recirc_ht;
2687 	ida_init(&efx->tc->recirc_ida);
2688 	efx->tc->reps_filter_uc = -1;
2689 	efx->tc->reps_filter_mc = -1;
2690 	INIT_LIST_HEAD(&efx->tc->dflt.pf.acts.list);
2691 	efx->tc->dflt.pf.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
2692 	INIT_LIST_HEAD(&efx->tc->dflt.wire.acts.list);
2693 	efx->tc->dflt.wire.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
2694 	INIT_LIST_HEAD(&efx->tc->facts.pf.list);
2695 	efx->tc->facts.pf.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL;
2696 	INIT_LIST_HEAD(&efx->tc->facts.reps.list);
2697 	efx->tc->facts.reps.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL;
2698 	efx->extra_channel_type[EFX_EXTRA_CHANNEL_TC] = &efx_tc_channel_type;
2699 	return 0;
2700 fail_recirc_ht:
2701 	efx_tc_destroy_conntrack(efx);
2702 fail_conntrack:
2703 	rhashtable_destroy(&efx->tc->lhs_rule_ht);
2704 fail_lhs_rule_ht:
2705 	rhashtable_destroy(&efx->tc->match_action_ht);
2706 fail_match_action_ht:
2707 	rhashtable_destroy(&efx->tc->encap_match_ht);
2708 fail_encap_match_ht:
2709 	rhashtable_destroy(&efx->tc->mac_ht);
2710 fail_mac_ht:
2711 	efx_tc_destroy_counters(efx);
2712 fail_counters:
2713 	efx_tc_destroy_encap_actions(efx);
2714 fail_encap_actions:
2715 	mutex_destroy(&efx->tc->mutex);
2716 	kfree(efx->tc->caps);
2717 fail_alloc_caps:
2718 	kfree(efx->tc);
2719 	efx->tc = NULL;
2720 	return rc;
2721 }
2722 
2723 void efx_fini_struct_tc(struct efx_nic *efx)
2724 {
2725 	if (!efx->tc)
2726 		return;
2727 
2728 	mutex_lock(&efx->tc->mutex);
2729 	EFX_WARN_ON_PARANOID(efx->tc->dflt.pf.fw_id !=
2730 			     MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL);
2731 	EFX_WARN_ON_PARANOID(efx->tc->dflt.wire.fw_id !=
2732 			     MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL);
2733 	EFX_WARN_ON_PARANOID(efx->tc->facts.pf.fw_id !=
2734 			     MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL);
2735 	EFX_WARN_ON_PARANOID(efx->tc->facts.reps.fw_id !=
2736 			     MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL);
2737 	rhashtable_free_and_destroy(&efx->tc->lhs_rule_ht, efx_tc_lhs_free, efx);
2738 	rhashtable_free_and_destroy(&efx->tc->match_action_ht, efx_tc_flow_free,
2739 				    efx);
2740 	rhashtable_free_and_destroy(&efx->tc->encap_match_ht,
2741 				    efx_tc_encap_match_free, NULL);
2742 	efx_tc_fini_conntrack(efx);
2743 	rhashtable_free_and_destroy(&efx->tc->recirc_ht, efx_tc_recirc_free, efx);
2744 	WARN_ON(!ida_is_empty(&efx->tc->recirc_ida));
2745 	ida_destroy(&efx->tc->recirc_ida);
2746 	rhashtable_free_and_destroy(&efx->tc->mac_ht, efx_tc_mac_free, NULL);
2747 	efx_tc_fini_counters(efx);
2748 	efx_tc_fini_encap_actions(efx);
2749 	mutex_unlock(&efx->tc->mutex);
2750 	mutex_destroy(&efx->tc->mutex);
2751 	kfree(efx->tc->caps);
2752 	kfree(efx->tc);
2753 	efx->tc = NULL;
2754 }
2755