xref: /linux/drivers/net/ethernet/sfc/tc.c (revision 90d32e92011eaae8e70a9169b4e7acf4ca8f9d3a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3  * Driver for Solarflare network controllers and boards
4  * Copyright 2019 Solarflare Communications Inc.
5  * Copyright 2020-2022 Xilinx Inc.
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License version 2 as published
9  * by the Free Software Foundation, incorporated herein by reference.
10  */
11 
12 #include <net/pkt_cls.h>
13 #include <net/vxlan.h>
14 #include <net/geneve.h>
15 #include <net/tc_act/tc_ct.h>
16 #include "tc.h"
17 #include "tc_bindings.h"
18 #include "tc_encap_actions.h"
19 #include "tc_conntrack.h"
20 #include "mae.h"
21 #include "ef100_rep.h"
22 #include "efx.h"
23 
24 enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
25 {
26 	if (netif_is_vxlan(net_dev))
27 		return EFX_ENCAP_TYPE_VXLAN;
28 	if (netif_is_geneve(net_dev))
29 		return EFX_ENCAP_TYPE_GENEVE;
30 
31 	return EFX_ENCAP_TYPE_NONE;
32 }
33 
34 #define EFX_TC_HDR_TYPE_TTL_MASK ((u32)0xff)
35 /* Hoplimit is stored in the most significant byte in the pedit ipv6 header action */
36 #define EFX_TC_HDR_TYPE_HLIMIT_MASK ~((u32)0xff000000)
37 #define EFX_EFV_PF	NULL
38 /* Look up the representor information (efv) for a device.
39  * May return NULL for the PF (us), or an error pointer for a device that
40  * isn't supported as a TC offload endpoint
41  */
42 struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx,
43 					 struct net_device *dev)
44 {
45 	struct efx_rep *efv;
46 
47 	if (!dev)
48 		return ERR_PTR(-EOPNOTSUPP);
49 	/* Is it us (the PF)? */
50 	if (dev == efx->net_dev)
51 		return EFX_EFV_PF;
52 	/* Is it an efx vfrep at all? */
53 	if (dev->netdev_ops != &efx_ef100_rep_netdev_ops)
54 		return ERR_PTR(-EOPNOTSUPP);
55 	/* Is it ours?  We don't support TC rules that include another
56 	 * EF100's netdevices (not even on another port of the same NIC).
57 	 */
58 	efv = netdev_priv(dev);
59 	if (efv->parent != efx)
60 		return ERR_PTR(-EOPNOTSUPP);
61 	return efv;
62 }
63 
64 /* Convert a driver-internal vport ID into an internal device (PF or VF) */
65 static s64 efx_tc_flower_internal_mport(struct efx_nic *efx, struct efx_rep *efv)
66 {
67 	u32 mport;
68 
69 	if (IS_ERR(efv))
70 		return PTR_ERR(efv);
71 	if (!efv) /* device is PF (us) */
72 		efx_mae_mport_uplink(efx, &mport);
73 	else /* device is repr */
74 		efx_mae_mport_mport(efx, efv->mport, &mport);
75 	return mport;
76 }
77 
78 /* Convert a driver-internal vport ID into an external device (wire or VF) */
79 s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv)
80 {
81 	u32 mport;
82 
83 	if (IS_ERR(efv))
84 		return PTR_ERR(efv);
85 	if (!efv) /* device is PF (us) */
86 		efx_mae_mport_wire(efx, &mport);
87 	else /* device is repr */
88 		efx_mae_mport_mport(efx, efv->mport, &mport);
89 	return mport;
90 }
91 
92 static const struct rhashtable_params efx_tc_mac_ht_params = {
93 	.key_len	= offsetofend(struct efx_tc_mac_pedit_action, h_addr),
94 	.key_offset	= 0,
95 	.head_offset	= offsetof(struct efx_tc_mac_pedit_action, linkage),
96 };
97 
98 static const struct rhashtable_params efx_tc_encap_match_ht_params = {
99 	.key_len	= offsetof(struct efx_tc_encap_match, linkage),
100 	.key_offset	= 0,
101 	.head_offset	= offsetof(struct efx_tc_encap_match, linkage),
102 };
103 
104 static const struct rhashtable_params efx_tc_match_action_ht_params = {
105 	.key_len	= sizeof(unsigned long),
106 	.key_offset	= offsetof(struct efx_tc_flow_rule, cookie),
107 	.head_offset	= offsetof(struct efx_tc_flow_rule, linkage),
108 };
109 
110 static const struct rhashtable_params efx_tc_lhs_rule_ht_params = {
111 	.key_len	= sizeof(unsigned long),
112 	.key_offset	= offsetof(struct efx_tc_lhs_rule, cookie),
113 	.head_offset	= offsetof(struct efx_tc_lhs_rule, linkage),
114 };
115 
116 static const struct rhashtable_params efx_tc_recirc_ht_params = {
117 	.key_len	= offsetof(struct efx_tc_recirc_id, linkage),
118 	.key_offset	= 0,
119 	.head_offset	= offsetof(struct efx_tc_recirc_id, linkage),
120 };
121 
122 static struct efx_tc_mac_pedit_action *efx_tc_flower_get_mac(struct efx_nic *efx,
123 							     unsigned char h_addr[ETH_ALEN],
124 							     struct netlink_ext_ack *extack)
125 {
126 	struct efx_tc_mac_pedit_action *ped, *old;
127 	int rc;
128 
129 	ped = kzalloc(sizeof(*ped), GFP_USER);
130 	if (!ped)
131 		return ERR_PTR(-ENOMEM);
132 	memcpy(ped->h_addr, h_addr, ETH_ALEN);
133 	old = rhashtable_lookup_get_insert_fast(&efx->tc->mac_ht,
134 						&ped->linkage,
135 						efx_tc_mac_ht_params);
136 	if (old) {
137 		/* don't need our new entry */
138 		kfree(ped);
139 		if (IS_ERR(old)) /* oh dear, it's actually an error */
140 			return ERR_CAST(old);
141 		if (!refcount_inc_not_zero(&old->ref))
142 			return ERR_PTR(-EAGAIN);
143 		/* existing entry found, ref taken */
144 		return old;
145 	}
146 
147 	rc = efx_mae_allocate_pedit_mac(efx, ped);
148 	if (rc < 0) {
149 		NL_SET_ERR_MSG_MOD(extack, "Failed to store pedit MAC address in hw");
150 		goto out_remove;
151 	}
152 
153 	/* ref and return */
154 	refcount_set(&ped->ref, 1);
155 	return ped;
156 out_remove:
157 	rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage,
158 			       efx_tc_mac_ht_params);
159 	kfree(ped);
160 	return ERR_PTR(rc);
161 }
162 
163 static void efx_tc_flower_put_mac(struct efx_nic *efx,
164 				  struct efx_tc_mac_pedit_action *ped)
165 {
166 	if (!refcount_dec_and_test(&ped->ref))
167 		return; /* still in use */
168 	rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage,
169 			       efx_tc_mac_ht_params);
170 	efx_mae_free_pedit_mac(efx, ped);
171 	kfree(ped);
172 }
173 
174 static void efx_tc_free_action_set(struct efx_nic *efx,
175 				   struct efx_tc_action_set *act, bool in_hw)
176 {
177 	/* Failure paths calling this on the 'cursor' action set in_hw=false,
178 	 * because if the alloc had succeeded we'd've put it in acts.list and
179 	 * not still have it in act.
180 	 */
181 	if (in_hw) {
182 		efx_mae_free_action_set(efx, act->fw_id);
183 		/* in_hw is true iff we are on an acts.list; make sure to
184 		 * remove ourselves from that list before we are freed.
185 		 */
186 		list_del(&act->list);
187 	}
188 	if (act->count) {
189 		spin_lock_bh(&act->count->cnt->lock);
190 		if (!list_empty(&act->count_user))
191 			list_del(&act->count_user);
192 		spin_unlock_bh(&act->count->cnt->lock);
193 		efx_tc_flower_put_counter_index(efx, act->count);
194 	}
195 	if (act->encap_md) {
196 		list_del(&act->encap_user);
197 		efx_tc_flower_release_encap_md(efx, act->encap_md);
198 	}
199 	if (act->src_mac)
200 		efx_tc_flower_put_mac(efx, act->src_mac);
201 	if (act->dst_mac)
202 		efx_tc_flower_put_mac(efx, act->dst_mac);
203 	kfree(act);
204 }
205 
206 static void efx_tc_free_action_set_list(struct efx_nic *efx,
207 					struct efx_tc_action_set_list *acts,
208 					bool in_hw)
209 {
210 	struct efx_tc_action_set *act, *next;
211 
212 	/* Failure paths set in_hw=false, because usually the acts didn't get
213 	 * to efx_mae_alloc_action_set_list(); if they did, the failure tree
214 	 * has a separate efx_mae_free_action_set_list() before calling us.
215 	 */
216 	if (in_hw)
217 		efx_mae_free_action_set_list(efx, acts);
218 	/* Any act that's on the list will be in_hw even if the list isn't */
219 	list_for_each_entry_safe(act, next, &acts->list, list)
220 		efx_tc_free_action_set(efx, act, true);
221 	/* Don't kfree, as acts is embedded inside a struct efx_tc_flow_rule */
222 }
223 
224 /* Boilerplate for the simple 'copy a field' cases */
225 #define _MAP_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field)	\
226 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_##_name)) {		\
227 	struct flow_match_##_type fm;					\
228 									\
229 	flow_rule_match_##_tcget(rule, &fm);				\
230 	match->value._field = fm.key->_tcfield;				\
231 	match->mask._field = fm.mask->_tcfield;				\
232 }
233 #define MAP_KEY_AND_MASK(_name, _type, _tcfield, _field)	\
234 	_MAP_KEY_AND_MASK(_name, _type, _type, _tcfield, _field)
235 #define MAP_ENC_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field)	\
236 	_MAP_KEY_AND_MASK(ENC_##_name, _type, _tcget, _tcfield, _field)
237 
238 static int efx_tc_flower_parse_match(struct efx_nic *efx,
239 				     struct flow_rule *rule,
240 				     struct efx_tc_match *match,
241 				     struct netlink_ext_ack *extack)
242 {
243 	struct flow_dissector *dissector = rule->match.dissector;
244 	unsigned char ipv = 0;
245 
246 	/* Owing to internal TC infelicities, the IPV6_ADDRS key might be set
247 	 * even on IPv4 filters; so rather than relying on dissector->used_keys
248 	 * we check the addr_type in the CONTROL key.  If we don't find it (or
249 	 * it's masked, which should never happen), we treat both IPV4_ADDRS
250 	 * and IPV6_ADDRS as absent.
251 	 */
252 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
253 		struct flow_match_control fm;
254 
255 		flow_rule_match_control(rule, &fm);
256 		if (IS_ALL_ONES(fm.mask->addr_type))
257 			switch (fm.key->addr_type) {
258 			case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
259 				ipv = 4;
260 				break;
261 			case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
262 				ipv = 6;
263 				break;
264 			default:
265 				break;
266 			}
267 
268 		if (fm.mask->flags & FLOW_DIS_IS_FRAGMENT) {
269 			match->value.ip_frag = fm.key->flags & FLOW_DIS_IS_FRAGMENT;
270 			match->mask.ip_frag = true;
271 		}
272 		if (fm.mask->flags & FLOW_DIS_FIRST_FRAG) {
273 			match->value.ip_firstfrag = fm.key->flags & FLOW_DIS_FIRST_FRAG;
274 			match->mask.ip_firstfrag = true;
275 		}
276 		if (!flow_rule_is_supp_control_flags(FLOW_DIS_IS_FRAGMENT |
277 						     FLOW_DIS_FIRST_FRAG,
278 						     fm.mask->flags, extack))
279 			return -EOPNOTSUPP;
280 	}
281 	if (dissector->used_keys &
282 	    ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
283 	      BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
284 	      BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
285 	      BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) |
286 	      BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN) |
287 	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
288 	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
289 	      BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
290 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
291 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
292 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
293 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
294 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) |
295 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
296 	      BIT_ULL(FLOW_DISSECTOR_KEY_CT) |
297 	      BIT_ULL(FLOW_DISSECTOR_KEY_TCP) |
298 	      BIT_ULL(FLOW_DISSECTOR_KEY_IP))) {
299 		NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported flower keys %#llx",
300 				       dissector->used_keys);
301 		return -EOPNOTSUPP;
302 	}
303 
304 	MAP_KEY_AND_MASK(BASIC, basic, n_proto, eth_proto);
305 	/* Make sure we're IP if any L3/L4 keys used. */
306 	if (!IS_ALL_ONES(match->mask.eth_proto) ||
307 	    !(match->value.eth_proto == htons(ETH_P_IP) ||
308 	      match->value.eth_proto == htons(ETH_P_IPV6)))
309 		if (dissector->used_keys &
310 		    (BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
311 		     BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
312 		     BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
313 		     BIT_ULL(FLOW_DISSECTOR_KEY_IP) |
314 		     BIT_ULL(FLOW_DISSECTOR_KEY_TCP))) {
315 			NL_SET_ERR_MSG_FMT_MOD(extack,
316 					       "L3/L4 flower keys %#llx require protocol ipv[46]",
317 					       dissector->used_keys);
318 			return -EINVAL;
319 		}
320 
321 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
322 		struct flow_match_vlan fm;
323 
324 		flow_rule_match_vlan(rule, &fm);
325 		if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) {
326 			match->value.vlan_proto[0] = fm.key->vlan_tpid;
327 			match->mask.vlan_proto[0] = fm.mask->vlan_tpid;
328 			match->value.vlan_tci[0] = cpu_to_be16(fm.key->vlan_priority << 13 |
329 							       fm.key->vlan_id);
330 			match->mask.vlan_tci[0] = cpu_to_be16(fm.mask->vlan_priority << 13 |
331 							      fm.mask->vlan_id);
332 		}
333 	}
334 
335 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
336 		struct flow_match_vlan fm;
337 
338 		flow_rule_match_cvlan(rule, &fm);
339 		if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) {
340 			match->value.vlan_proto[1] = fm.key->vlan_tpid;
341 			match->mask.vlan_proto[1] = fm.mask->vlan_tpid;
342 			match->value.vlan_tci[1] = cpu_to_be16(fm.key->vlan_priority << 13 |
343 							       fm.key->vlan_id);
344 			match->mask.vlan_tci[1] = cpu_to_be16(fm.mask->vlan_priority << 13 |
345 							      fm.mask->vlan_id);
346 		}
347 	}
348 
349 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
350 		struct flow_match_eth_addrs fm;
351 
352 		flow_rule_match_eth_addrs(rule, &fm);
353 		ether_addr_copy(match->value.eth_saddr, fm.key->src);
354 		ether_addr_copy(match->value.eth_daddr, fm.key->dst);
355 		ether_addr_copy(match->mask.eth_saddr, fm.mask->src);
356 		ether_addr_copy(match->mask.eth_daddr, fm.mask->dst);
357 	}
358 
359 	MAP_KEY_AND_MASK(BASIC, basic, ip_proto, ip_proto);
360 	/* Make sure we're TCP/UDP if any L4 keys used. */
361 	if ((match->value.ip_proto != IPPROTO_UDP &&
362 	     match->value.ip_proto != IPPROTO_TCP) || !IS_ALL_ONES(match->mask.ip_proto))
363 		if (dissector->used_keys &
364 		    (BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
365 		     BIT_ULL(FLOW_DISSECTOR_KEY_TCP))) {
366 			NL_SET_ERR_MSG_FMT_MOD(extack,
367 					       "L4 flower keys %#llx require ipproto udp or tcp",
368 					       dissector->used_keys);
369 			return -EINVAL;
370 		}
371 	MAP_KEY_AND_MASK(IP, ip, tos, ip_tos);
372 	MAP_KEY_AND_MASK(IP, ip, ttl, ip_ttl);
373 	if (ipv == 4) {
374 		MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, src, src_ip);
375 		MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, dst, dst_ip);
376 	}
377 #ifdef CONFIG_IPV6
378 	else if (ipv == 6) {
379 		MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, src, src_ip6);
380 		MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, dst, dst_ip6);
381 	}
382 #endif
383 	MAP_KEY_AND_MASK(PORTS, ports, src, l4_sport);
384 	MAP_KEY_AND_MASK(PORTS, ports, dst, l4_dport);
385 	MAP_KEY_AND_MASK(TCP, tcp, flags, tcp_flags);
386 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
387 		struct flow_match_control fm;
388 
389 		flow_rule_match_enc_control(rule, &fm);
390 		if (fm.mask->flags) {
391 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported match on enc_control.flags %#x",
392 					       fm.mask->flags);
393 			return -EOPNOTSUPP;
394 		}
395 		if (!IS_ALL_ONES(fm.mask->addr_type)) {
396 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported enc addr_type mask %u (key %u)",
397 					       fm.mask->addr_type,
398 					       fm.key->addr_type);
399 			return -EOPNOTSUPP;
400 		}
401 		switch (fm.key->addr_type) {
402 		case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
403 			MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs,
404 					     src, enc_src_ip);
405 			MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs,
406 					     dst, enc_dst_ip);
407 			break;
408 #ifdef CONFIG_IPV6
409 		case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
410 			MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs,
411 					     src, enc_src_ip6);
412 			MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs,
413 					     dst, enc_dst_ip6);
414 			break;
415 #endif
416 		default:
417 			NL_SET_ERR_MSG_FMT_MOD(extack,
418 					       "Unsupported enc addr_type %u (supported are IPv4, IPv6)",
419 					       fm.key->addr_type);
420 			return -EOPNOTSUPP;
421 		}
422 		MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, tos, enc_ip_tos);
423 		MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, ttl, enc_ip_ttl);
424 		MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, src, enc_sport);
425 		MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, dst, enc_dport);
426 		MAP_ENC_KEY_AND_MASK(KEYID, enc_keyid, enc_keyid, keyid, enc_keyid);
427 	} else if (dissector->used_keys &
428 		   (BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
429 		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
430 		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
431 		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
432 		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS))) {
433 		NL_SET_ERR_MSG_FMT_MOD(extack,
434 				       "Flower enc keys require enc_control (keys: %#llx)",
435 				       dissector->used_keys);
436 		return -EOPNOTSUPP;
437 	}
438 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) {
439 		struct flow_match_ct fm;
440 
441 		flow_rule_match_ct(rule, &fm);
442 		match->value.ct_state_trk = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED);
443 		match->mask.ct_state_trk = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED);
444 		match->value.ct_state_est = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED);
445 		match->mask.ct_state_est = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED);
446 		if (fm.mask->ct_state & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
447 					  TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED)) {
448 			NL_SET_ERR_MSG_FMT_MOD(extack,
449 					       "Unsupported ct_state match %#x",
450 					       fm.mask->ct_state);
451 			return -EOPNOTSUPP;
452 		}
453 		match->value.ct_mark = fm.key->ct_mark;
454 		match->mask.ct_mark = fm.mask->ct_mark;
455 		match->value.ct_zone = fm.key->ct_zone;
456 		match->mask.ct_zone = fm.mask->ct_zone;
457 
458 		if (memchr_inv(fm.mask->ct_labels, 0, sizeof(fm.mask->ct_labels))) {
459 			NL_SET_ERR_MSG_MOD(extack, "Matching on ct_label not supported");
460 			return -EOPNOTSUPP;
461 		}
462 	}
463 
464 	return 0;
465 }
466 
467 static void efx_tc_flower_release_encap_match(struct efx_nic *efx,
468 					      struct efx_tc_encap_match *encap)
469 {
470 	int rc;
471 
472 	if (!refcount_dec_and_test(&encap->ref))
473 		return; /* still in use */
474 
475 	if (encap->type == EFX_TC_EM_DIRECT) {
476 		rc = efx_mae_unregister_encap_match(efx, encap);
477 		if (rc)
478 			/* Display message but carry on and remove entry from our
479 			 * SW tables, because there's not much we can do about it.
480 			 */
481 			netif_err(efx, drv, efx->net_dev,
482 				  "Failed to release encap match %#x, rc %d\n",
483 				  encap->fw_id, rc);
484 	}
485 	rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
486 			       efx_tc_encap_match_ht_params);
487 	if (encap->pseudo)
488 		efx_tc_flower_release_encap_match(efx, encap->pseudo);
489 	kfree(encap);
490 }
491 
492 static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
493 					    struct efx_tc_match *match,
494 					    enum efx_encap_type type,
495 					    enum efx_tc_em_pseudo_type em_type,
496 					    u8 child_ip_tos_mask,
497 					    __be16 child_udp_sport_mask,
498 					    struct netlink_ext_ack *extack)
499 {
500 	struct efx_tc_encap_match *encap, *old, *pseudo = NULL;
501 	bool ipv6 = false;
502 	int rc;
503 
504 	/* We require that the socket-defining fields (IP addrs and UDP dest
505 	 * port) are present and exact-match.  Other fields may only be used
506 	 * if the field-set (and any masks) are the same for all encap
507 	 * matches on the same <sip,dip,dport> tuple; this is enforced by
508 	 * pseudo encap matches.
509 	 */
510 	if (match->mask.enc_dst_ip | match->mask.enc_src_ip) {
511 		if (!IS_ALL_ONES(match->mask.enc_dst_ip)) {
512 			NL_SET_ERR_MSG_MOD(extack,
513 					   "Egress encap match is not exact on dst IP address");
514 			return -EOPNOTSUPP;
515 		}
516 		if (!IS_ALL_ONES(match->mask.enc_src_ip)) {
517 			NL_SET_ERR_MSG_MOD(extack,
518 					   "Egress encap match is not exact on src IP address");
519 			return -EOPNOTSUPP;
520 		}
521 #ifdef CONFIG_IPV6
522 		if (!ipv6_addr_any(&match->mask.enc_dst_ip6) ||
523 		    !ipv6_addr_any(&match->mask.enc_src_ip6)) {
524 			NL_SET_ERR_MSG_MOD(extack,
525 					   "Egress encap match on both IPv4 and IPv6, don't understand");
526 			return -EOPNOTSUPP;
527 		}
528 	} else {
529 		ipv6 = true;
530 		if (!efx_ipv6_addr_all_ones(&match->mask.enc_dst_ip6)) {
531 			NL_SET_ERR_MSG_MOD(extack,
532 					   "Egress encap match is not exact on dst IP address");
533 			return -EOPNOTSUPP;
534 		}
535 		if (!efx_ipv6_addr_all_ones(&match->mask.enc_src_ip6)) {
536 			NL_SET_ERR_MSG_MOD(extack,
537 					   "Egress encap match is not exact on src IP address");
538 			return -EOPNOTSUPP;
539 		}
540 #endif
541 	}
542 	if (!IS_ALL_ONES(match->mask.enc_dport)) {
543 		NL_SET_ERR_MSG_MOD(extack, "Egress encap match is not exact on dst UDP port");
544 		return -EOPNOTSUPP;
545 	}
546 	if (match->mask.enc_sport || match->mask.enc_ip_tos) {
547 		struct efx_tc_match pmatch = *match;
548 
549 		if (em_type == EFX_TC_EM_PSEUDO_MASK) { /* can't happen */
550 			NL_SET_ERR_MSG_MOD(extack, "Bad recursion in egress encap match handler");
551 			return -EOPNOTSUPP;
552 		}
553 		pmatch.value.enc_ip_tos = 0;
554 		pmatch.mask.enc_ip_tos = 0;
555 		pmatch.value.enc_sport = 0;
556 		pmatch.mask.enc_sport = 0;
557 		rc = efx_tc_flower_record_encap_match(efx, &pmatch, type,
558 						      EFX_TC_EM_PSEUDO_MASK,
559 						      match->mask.enc_ip_tos,
560 						      match->mask.enc_sport,
561 						      extack);
562 		if (rc)
563 			return rc;
564 		pseudo = pmatch.encap;
565 	}
566 	if (match->mask.enc_ip_ttl) {
567 		NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP TTL not supported");
568 		rc = -EOPNOTSUPP;
569 		goto fail_pseudo;
570 	}
571 
572 	rc = efx_mae_check_encap_match_caps(efx, ipv6, match->mask.enc_ip_tos,
573 					    match->mask.enc_sport, extack);
574 	if (rc)
575 		goto fail_pseudo;
576 
577 	encap = kzalloc(sizeof(*encap), GFP_USER);
578 	if (!encap) {
579 		rc = -ENOMEM;
580 		goto fail_pseudo;
581 	}
582 	encap->src_ip = match->value.enc_src_ip;
583 	encap->dst_ip = match->value.enc_dst_ip;
584 #ifdef CONFIG_IPV6
585 	encap->src_ip6 = match->value.enc_src_ip6;
586 	encap->dst_ip6 = match->value.enc_dst_ip6;
587 #endif
588 	encap->udp_dport = match->value.enc_dport;
589 	encap->tun_type = type;
590 	encap->ip_tos = match->value.enc_ip_tos;
591 	encap->ip_tos_mask = match->mask.enc_ip_tos;
592 	encap->child_ip_tos_mask = child_ip_tos_mask;
593 	encap->udp_sport = match->value.enc_sport;
594 	encap->udp_sport_mask = match->mask.enc_sport;
595 	encap->child_udp_sport_mask = child_udp_sport_mask;
596 	encap->type = em_type;
597 	encap->pseudo = pseudo;
598 	old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_match_ht,
599 						&encap->linkage,
600 						efx_tc_encap_match_ht_params);
601 	if (old) {
602 		/* don't need our new entry */
603 		kfree(encap);
604 		if (pseudo) /* don't need our new pseudo either */
605 			efx_tc_flower_release_encap_match(efx, pseudo);
606 		if (IS_ERR(old)) /* oh dear, it's actually an error */
607 			return PTR_ERR(old);
608 		/* check old and new em_types are compatible */
609 		switch (old->type) {
610 		case EFX_TC_EM_DIRECT:
611 			/* old EM is in hardware, so mustn't overlap with a
612 			 * pseudo, but may be shared with another direct EM
613 			 */
614 			if (em_type == EFX_TC_EM_DIRECT)
615 				break;
616 			NL_SET_ERR_MSG_MOD(extack, "Pseudo encap match conflicts with existing direct entry");
617 			return -EEXIST;
618 		case EFX_TC_EM_PSEUDO_MASK:
619 			/* old EM is protecting a ToS- or src port-qualified
620 			 * filter, so may only be shared with another pseudo
621 			 * for the same ToS and src port masks.
622 			 */
623 			if (em_type != EFX_TC_EM_PSEUDO_MASK) {
624 				NL_SET_ERR_MSG_FMT_MOD(extack,
625 						       "%s encap match conflicts with existing pseudo(MASK) entry",
626 						       em_type ? "Pseudo" : "Direct");
627 				return -EEXIST;
628 			}
629 			if (child_ip_tos_mask != old->child_ip_tos_mask) {
630 				NL_SET_ERR_MSG_FMT_MOD(extack,
631 						       "Pseudo encap match for TOS mask %#04x conflicts with existing mask %#04x",
632 						       child_ip_tos_mask,
633 						       old->child_ip_tos_mask);
634 				return -EEXIST;
635 			}
636 			if (child_udp_sport_mask != old->child_udp_sport_mask) {
637 				NL_SET_ERR_MSG_FMT_MOD(extack,
638 						       "Pseudo encap match for UDP src port mask %#x conflicts with existing mask %#x",
639 						       child_udp_sport_mask,
640 						       old->child_udp_sport_mask);
641 				return -EEXIST;
642 			}
643 			break;
644 		case EFX_TC_EM_PSEUDO_OR:
645 			/* old EM corresponds to an OR that has to be unique
646 			 * (it must not overlap with any other OR, whether
647 			 * direct-EM or pseudo).
648 			 */
649 			NL_SET_ERR_MSG_FMT_MOD(extack,
650 					       "%s encap match conflicts with existing pseudo(OR) entry",
651 					       em_type ? "Pseudo" : "Direct");
652 			return -EEXIST;
653 		default: /* Unrecognised pseudo-type.  Just say no */
654 			NL_SET_ERR_MSG_FMT_MOD(extack,
655 					       "%s encap match conflicts with existing pseudo(%d) entry",
656 					       em_type ? "Pseudo" : "Direct",
657 					       old->type);
658 			return -EEXIST;
659 		}
660 		/* check old and new tun_types are compatible */
661 		if (old->tun_type != type) {
662 			NL_SET_ERR_MSG_FMT_MOD(extack,
663 					       "Egress encap match with conflicting tun_type %u != %u",
664 					       old->tun_type, type);
665 			return -EEXIST;
666 		}
667 		if (!refcount_inc_not_zero(&old->ref))
668 			return -EAGAIN;
669 		/* existing entry found */
670 		encap = old;
671 	} else {
672 		if (em_type == EFX_TC_EM_DIRECT) {
673 			rc = efx_mae_register_encap_match(efx, encap);
674 			if (rc) {
675 				NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW");
676 				goto fail;
677 			}
678 		}
679 		refcount_set(&encap->ref, 1);
680 	}
681 	match->encap = encap;
682 	return 0;
683 fail:
684 	rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
685 			       efx_tc_encap_match_ht_params);
686 	kfree(encap);
687 fail_pseudo:
688 	if (pseudo)
689 		efx_tc_flower_release_encap_match(efx, pseudo);
690 	return rc;
691 }
692 
693 static struct efx_tc_recirc_id *efx_tc_get_recirc_id(struct efx_nic *efx,
694 						     u32 chain_index,
695 						     struct net_device *net_dev)
696 {
697 	struct efx_tc_recirc_id *rid, *old;
698 	int rc;
699 
700 	rid = kzalloc(sizeof(*rid), GFP_USER);
701 	if (!rid)
702 		return ERR_PTR(-ENOMEM);
703 	rid->chain_index = chain_index;
704 	/* We don't take a reference here, because it's implied - if there's
705 	 * a rule on the net_dev that's been offloaded to us, then the net_dev
706 	 * can't go away until the rule has been deoffloaded.
707 	 */
708 	rid->net_dev = net_dev;
709 	old = rhashtable_lookup_get_insert_fast(&efx->tc->recirc_ht,
710 						&rid->linkage,
711 						efx_tc_recirc_ht_params);
712 	if (old) {
713 		/* don't need our new entry */
714 		kfree(rid);
715 		if (IS_ERR(old)) /* oh dear, it's actually an error */
716 			return ERR_CAST(old);
717 		if (!refcount_inc_not_zero(&old->ref))
718 			return ERR_PTR(-EAGAIN);
719 		/* existing entry found */
720 		rid = old;
721 	} else {
722 		rc = ida_alloc_range(&efx->tc->recirc_ida, 1, U8_MAX, GFP_USER);
723 		if (rc < 0) {
724 			rhashtable_remove_fast(&efx->tc->recirc_ht,
725 					       &rid->linkage,
726 					       efx_tc_recirc_ht_params);
727 			kfree(rid);
728 			return ERR_PTR(rc);
729 		}
730 		rid->fw_id = rc;
731 		refcount_set(&rid->ref, 1);
732 	}
733 	return rid;
734 }
735 
736 static void efx_tc_put_recirc_id(struct efx_nic *efx, struct efx_tc_recirc_id *rid)
737 {
738 	if (!refcount_dec_and_test(&rid->ref))
739 		return; /* still in use */
740 	rhashtable_remove_fast(&efx->tc->recirc_ht, &rid->linkage,
741 			       efx_tc_recirc_ht_params);
742 	ida_free(&efx->tc->recirc_ida, rid->fw_id);
743 	kfree(rid);
744 }
745 
746 static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
747 {
748 	efx_mae_delete_rule(efx, rule->fw_id);
749 
750 	/* Release entries in subsidiary tables */
751 	efx_tc_free_action_set_list(efx, &rule->acts, true);
752 	if (rule->match.rid)
753 		efx_tc_put_recirc_id(efx, rule->match.rid);
754 	if (rule->match.encap)
755 		efx_tc_flower_release_encap_match(efx, rule->match.encap);
756 	rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
757 }
758 
759 static const char *efx_tc_encap_type_name(enum efx_encap_type typ)
760 {
761 	switch (typ) {
762 	case EFX_ENCAP_TYPE_NONE:
763 		return "none";
764 	case EFX_ENCAP_TYPE_VXLAN:
765 		return "vxlan";
766 	case EFX_ENCAP_TYPE_GENEVE:
767 		return "geneve";
768 	default:
769 		pr_warn_once("Unknown efx_encap_type %d encountered\n", typ);
770 		return "unknown";
771 	}
772 }
773 
774 /* For details of action order constraints refer to SF-123102-TC-1§12.6.1 */
775 enum efx_tc_action_order {
776 	EFX_TC_AO_DECAP,
777 	EFX_TC_AO_DEC_TTL,
778 	EFX_TC_AO_PEDIT_MAC_ADDRS,
779 	EFX_TC_AO_VLAN_POP,
780 	EFX_TC_AO_VLAN_PUSH,
781 	EFX_TC_AO_COUNT,
782 	EFX_TC_AO_ENCAP,
783 	EFX_TC_AO_DELIVER
784 };
785 /* Determine whether we can add @new action without violating order */
786 static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
787 					  enum efx_tc_action_order new)
788 {
789 	switch (new) {
790 	case EFX_TC_AO_DECAP:
791 		if (act->decap)
792 			return false;
793 		/* PEDIT_MAC_ADDRS must not happen before DECAP, though it
794 		 * can wait until much later
795 		 */
796 		if (act->dst_mac || act->src_mac)
797 			return false;
798 
799 		/* Decrementing ttl must not happen before DECAP */
800 		if (act->do_ttl_dec)
801 			return false;
802 		fallthrough;
803 	case EFX_TC_AO_VLAN_POP:
804 		if (act->vlan_pop >= 2)
805 			return false;
806 		/* If we've already pushed a VLAN, we can't then pop it;
807 		 * the hardware would instead try to pop an existing VLAN
808 		 * before pushing the new one.
809 		 */
810 		if (act->vlan_push)
811 			return false;
812 		fallthrough;
813 	case EFX_TC_AO_VLAN_PUSH:
814 		if (act->vlan_push >= 2)
815 			return false;
816 		fallthrough;
817 	case EFX_TC_AO_COUNT:
818 		if (act->count)
819 			return false;
820 		fallthrough;
821 	case EFX_TC_AO_PEDIT_MAC_ADDRS:
822 	case EFX_TC_AO_ENCAP:
823 		if (act->encap_md)
824 			return false;
825 		fallthrough;
826 	case EFX_TC_AO_DELIVER:
827 		return !act->deliver;
828 	case EFX_TC_AO_DEC_TTL:
829 		if (act->encap_md)
830 			return false;
831 		return !act->do_ttl_dec;
832 	default:
833 		/* Bad caller.  Whatever they wanted to do, say they can't. */
834 		WARN_ON_ONCE(1);
835 		return false;
836 	}
837 }
838 
839 /**
840  * DOC: TC conntrack sequences
841  *
842  * The MAE hardware can handle at most two rounds of action rule matching,
843  * consequently we support conntrack through the notion of a "left-hand side
844  * rule".  This is a rule which typically contains only the actions "ct" and
845  * "goto chain N", and corresponds to one or more "right-hand side rules" in
846  * chain N, which typically match on +trk+est, and may perform ct(nat) actions.
847  * RHS rules go in the Action Rule table as normal but with a nonzero recirc_id
848  * (the hardware equivalent of chain_index), while LHS rules may go in either
849  * the Action Rule or the Outer Rule table, the latter being preferred for
850  * performance reasons, and set both DO_CT and a recirc_id in their response.
851  *
852  * Besides the RHS rules, there are often also similar rules matching on
853  * +trk+new which perform the ct(commit) action.  These are not offloaded.
854  */
855 
856 static bool efx_tc_rule_is_lhs_rule(struct flow_rule *fr,
857 				    struct efx_tc_match *match)
858 {
859 	const struct flow_action_entry *fa;
860 	int i;
861 
862 	flow_action_for_each(i, fa, &fr->action) {
863 		switch (fa->id) {
864 		case FLOW_ACTION_GOTO:
865 			return true;
866 		case FLOW_ACTION_CT:
867 			/* If rule is -trk, or doesn't mention trk at all, then
868 			 * a CT action implies a conntrack lookup (hence it's an
869 			 * LHS rule).  If rule is +trk, then a CT action could
870 			 * just be ct(nat) or even ct(commit) (though the latter
871 			 * can't be offloaded).
872 			 */
873 			if (!match->mask.ct_state_trk || !match->value.ct_state_trk)
874 				return true;
875 			break;
876 		default:
877 			break;
878 		}
879 	}
880 	return false;
881 }
882 
883 /* A foreign LHS rule has matches on enc_ keys at the TC layer (including an
884  * implied match on enc_ip_proto UDP).  Translate these into non-enc_ keys,
885  * so that we can use the same MAE machinery as local LHS rules (and so that
886  * the lhs_rules entries have uniform semantics).  It may seem odd to do it
887  * this way round, given that the corresponding fields in the MAE MCDIs are
888  * all ENC_, but (a) we don't have enc_L2 or enc_ip_proto in struct
889  * efx_tc_match_fields and (b) semantically an LHS rule doesn't have inner
890  * fields so it's just matching on *the* header rather than the outer header.
891  * Make sure that the non-enc_ keys were not already being matched on, as that
892  * would imply a rule that needed a triple lookup.  (Hardware can do that,
893  * with OR-AR-CT-AR, but it halves packet rate so we avoid it where possible;
894  * see efx_tc_flower_flhs_needs_ar().)
895  */
896 static int efx_tc_flower_translate_flhs_match(struct efx_tc_match *match)
897 {
898 	int rc = 0;
899 
900 #define COPY_MASK_AND_VALUE(_key, _ekey)	({	\
901 	if (match->mask._key) {				\
902 		rc = -EOPNOTSUPP;			\
903 	} else {					\
904 		match->mask._key = match->mask._ekey;	\
905 		match->mask._ekey = 0;			\
906 		match->value._key = match->value._ekey;	\
907 		match->value._ekey = 0;			\
908 	}						\
909 	rc;						\
910 })
911 #define COPY_FROM_ENC(_key)	COPY_MASK_AND_VALUE(_key, enc_##_key)
912 	if (match->mask.ip_proto)
913 		return -EOPNOTSUPP;
914 	match->mask.ip_proto = ~0;
915 	match->value.ip_proto = IPPROTO_UDP;
916 	if (COPY_FROM_ENC(src_ip) || COPY_FROM_ENC(dst_ip))
917 		return rc;
918 #ifdef CONFIG_IPV6
919 	if (!ipv6_addr_any(&match->mask.src_ip6))
920 		return -EOPNOTSUPP;
921 	match->mask.src_ip6 = match->mask.enc_src_ip6;
922 	memset(&match->mask.enc_src_ip6, 0, sizeof(struct in6_addr));
923 	if (!ipv6_addr_any(&match->mask.dst_ip6))
924 		return -EOPNOTSUPP;
925 	match->mask.dst_ip6 = match->mask.enc_dst_ip6;
926 	memset(&match->mask.enc_dst_ip6, 0, sizeof(struct in6_addr));
927 #endif
928 	if (COPY_FROM_ENC(ip_tos) || COPY_FROM_ENC(ip_ttl))
929 		return rc;
930 	/* should really copy enc_ip_frag but we don't have that in
931 	 * parse_match yet
932 	 */
933 	if (COPY_MASK_AND_VALUE(l4_sport, enc_sport) ||
934 	    COPY_MASK_AND_VALUE(l4_dport, enc_dport))
935 		return rc;
936 	return 0;
937 #undef COPY_FROM_ENC
938 #undef COPY_MASK_AND_VALUE
939 }
940 
941 /* If a foreign LHS rule wants to match on keys that are only available after
942  * encap header identification and parsing, then it can't be done in the Outer
943  * Rule lookup, because that lookup determines the encap type used to parse
944  * beyond the outer headers.  Thus, such rules must use the OR-AR-CT-AR lookup
945  * sequence, with an EM (struct efx_tc_encap_match) in the OR step.
946  * Return true iff the passed match requires this.
947  */
948 static bool efx_tc_flower_flhs_needs_ar(struct efx_tc_match *match)
949 {
950 	/* matches on inner-header keys can't be done in OR */
951 	return match->mask.eth_proto ||
952 	       match->mask.vlan_tci[0] || match->mask.vlan_tci[1] ||
953 	       match->mask.vlan_proto[0] || match->mask.vlan_proto[1] ||
954 	       memchr_inv(match->mask.eth_saddr, 0, ETH_ALEN) ||
955 	       memchr_inv(match->mask.eth_daddr, 0, ETH_ALEN) ||
956 	       match->mask.ip_proto ||
957 	       match->mask.ip_tos || match->mask.ip_ttl ||
958 	       match->mask.src_ip || match->mask.dst_ip ||
959 #ifdef CONFIG_IPV6
960 	       !ipv6_addr_any(&match->mask.src_ip6) ||
961 	       !ipv6_addr_any(&match->mask.dst_ip6) ||
962 #endif
963 	       match->mask.ip_frag || match->mask.ip_firstfrag ||
964 	       match->mask.l4_sport || match->mask.l4_dport ||
965 	       match->mask.tcp_flags ||
966 	/* nor can VNI */
967 	       match->mask.enc_keyid;
968 }
969 
970 static int efx_tc_flower_handle_lhs_actions(struct efx_nic *efx,
971 					    struct flow_cls_offload *tc,
972 					    struct flow_rule *fr,
973 					    struct net_device *net_dev,
974 					    struct efx_tc_lhs_rule *rule)
975 
976 {
977 	struct netlink_ext_ack *extack = tc->common.extack;
978 	struct efx_tc_lhs_action *act = &rule->lhs_act;
979 	const struct flow_action_entry *fa;
980 	enum efx_tc_counter_type ctype;
981 	bool pipe = true;
982 	int i;
983 
984 	ctype = rule->is_ar ? EFX_TC_COUNTER_TYPE_AR : EFX_TC_COUNTER_TYPE_OR;
985 
986 	flow_action_for_each(i, fa, &fr->action) {
987 		struct efx_tc_ct_zone *ct_zone;
988 		struct efx_tc_recirc_id *rid;
989 
990 		if (!pipe) {
991 			/* more actions after a non-pipe action */
992 			NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action");
993 			return -EINVAL;
994 		}
995 		switch (fa->id) {
996 		case FLOW_ACTION_GOTO:
997 			if (!fa->chain_index) {
998 				NL_SET_ERR_MSG_MOD(extack, "Can't goto chain 0, no looping in hw");
999 				return -EOPNOTSUPP;
1000 			}
1001 			rid = efx_tc_get_recirc_id(efx, fa->chain_index,
1002 						   net_dev);
1003 			if (IS_ERR(rid)) {
1004 				NL_SET_ERR_MSG_MOD(extack, "Failed to allocate a hardware recirculation ID for this chain_index");
1005 				return PTR_ERR(rid);
1006 			}
1007 			act->rid = rid;
1008 			if (fa->hw_stats) {
1009 				struct efx_tc_counter_index *cnt;
1010 
1011 				if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
1012 					NL_SET_ERR_MSG_FMT_MOD(extack,
1013 							       "hw_stats_type %u not supported (only 'delayed')",
1014 							       fa->hw_stats);
1015 					return -EOPNOTSUPP;
1016 				}
1017 				cnt = efx_tc_flower_get_counter_index(efx, tc->cookie,
1018 								      ctype);
1019 				if (IS_ERR(cnt)) {
1020 					NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
1021 					return PTR_ERR(cnt);
1022 				}
1023 				WARN_ON(act->count); /* can't happen */
1024 				act->count = cnt;
1025 			}
1026 			pipe = false;
1027 			break;
1028 		case FLOW_ACTION_CT:
1029 			if (act->zone) {
1030 				NL_SET_ERR_MSG_MOD(extack, "Can't offload multiple ct actions");
1031 				return -EOPNOTSUPP;
1032 			}
1033 			if (fa->ct.action & (TCA_CT_ACT_COMMIT |
1034 					     TCA_CT_ACT_FORCE)) {
1035 				NL_SET_ERR_MSG_MOD(extack, "Can't offload ct commit/force");
1036 				return -EOPNOTSUPP;
1037 			}
1038 			if (fa->ct.action & TCA_CT_ACT_CLEAR) {
1039 				NL_SET_ERR_MSG_MOD(extack, "Can't clear ct in LHS rule");
1040 				return -EOPNOTSUPP;
1041 			}
1042 			if (fa->ct.action & (TCA_CT_ACT_NAT |
1043 					     TCA_CT_ACT_NAT_SRC |
1044 					     TCA_CT_ACT_NAT_DST)) {
1045 				NL_SET_ERR_MSG_MOD(extack, "Can't perform NAT in LHS rule - packet isn't conntracked yet");
1046 				return -EOPNOTSUPP;
1047 			}
1048 			if (fa->ct.action) {
1049 				NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled ct.action %u for LHS rule\n",
1050 						       fa->ct.action);
1051 				return -EOPNOTSUPP;
1052 			}
1053 			ct_zone = efx_tc_ct_register_zone(efx, fa->ct.zone,
1054 							  fa->ct.flow_table);
1055 			if (IS_ERR(ct_zone)) {
1056 				NL_SET_ERR_MSG_MOD(extack, "Failed to register for CT updates");
1057 				return PTR_ERR(ct_zone);
1058 			}
1059 			act->zone = ct_zone;
1060 			break;
1061 		default:
1062 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u for LHS rule\n",
1063 					       fa->id);
1064 			return -EOPNOTSUPP;
1065 		}
1066 	}
1067 
1068 	if (pipe) {
1069 		NL_SET_ERR_MSG_MOD(extack, "Missing goto chain in LHS rule");
1070 		return -EOPNOTSUPP;
1071 	}
1072 	return 0;
1073 }
1074 
1075 static void efx_tc_flower_release_lhs_actions(struct efx_nic *efx,
1076 					      struct efx_tc_lhs_action *act)
1077 {
1078 	if (act->rid)
1079 		efx_tc_put_recirc_id(efx, act->rid);
1080 	if (act->zone)
1081 		efx_tc_ct_unregister_zone(efx, act->zone);
1082 	if (act->count)
1083 		efx_tc_flower_put_counter_index(efx, act->count);
1084 }
1085 
1086 /**
1087  * struct efx_tc_mangler_state - accumulates 32-bit pedits into fields
1088  *
1089  * @dst_mac_32:	dst_mac[0:3] has been populated
1090  * @dst_mac_16:	dst_mac[4:5] has been populated
1091  * @src_mac_16:	src_mac[0:1] has been populated
1092  * @src_mac_32:	src_mac[2:5] has been populated
1093  * @dst_mac:	h_dest field of ethhdr
1094  * @src_mac:	h_source field of ethhdr
1095  *
1096  * Since FLOW_ACTION_MANGLE comes in 32-bit chunks that do not
1097  * necessarily equate to whole fields of the packet header, this
1098  * structure is used to hold the cumulative effect of the partial
1099  * field pedits that have been processed so far.
1100  */
1101 struct efx_tc_mangler_state {
1102 	u8 dst_mac_32:1; /* eth->h_dest[0:3] */
1103 	u8 dst_mac_16:1; /* eth->h_dest[4:5] */
1104 	u8 src_mac_16:1; /* eth->h_source[0:1] */
1105 	u8 src_mac_32:1; /* eth->h_source[2:5] */
1106 	unsigned char dst_mac[ETH_ALEN];
1107 	unsigned char src_mac[ETH_ALEN];
1108 };
1109 
1110 /** efx_tc_complete_mac_mangle() - pull complete field pedits out of @mung
1111  * @efx:	NIC we're installing a flow rule on
1112  * @act:	action set (cursor) to update
1113  * @mung:	accumulated partial mangles
1114  * @extack:	netlink extended ack for reporting errors
1115  *
1116  * Check @mung to find any combinations of partial mangles that can be
1117  * combined into a complete packet field edit, add that edit to @act,
1118  * and consume the partial mangles from @mung.
1119  */
1120 
1121 static int efx_tc_complete_mac_mangle(struct efx_nic *efx,
1122 				      struct efx_tc_action_set *act,
1123 				      struct efx_tc_mangler_state *mung,
1124 				      struct netlink_ext_ack *extack)
1125 {
1126 	struct efx_tc_mac_pedit_action *ped;
1127 
1128 	if (mung->dst_mac_32 && mung->dst_mac_16) {
1129 		ped = efx_tc_flower_get_mac(efx, mung->dst_mac, extack);
1130 		if (IS_ERR(ped))
1131 			return PTR_ERR(ped);
1132 
1133 		/* Check that we have not already populated dst_mac */
1134 		if (act->dst_mac)
1135 			efx_tc_flower_put_mac(efx, act->dst_mac);
1136 
1137 		act->dst_mac = ped;
1138 
1139 		/* consume the incomplete state */
1140 		mung->dst_mac_32 = 0;
1141 		mung->dst_mac_16 = 0;
1142 	}
1143 	if (mung->src_mac_16 && mung->src_mac_32) {
1144 		ped = efx_tc_flower_get_mac(efx, mung->src_mac, extack);
1145 		if (IS_ERR(ped))
1146 			return PTR_ERR(ped);
1147 
1148 		/* Check that we have not already populated src_mac */
1149 		if (act->src_mac)
1150 			efx_tc_flower_put_mac(efx, act->src_mac);
1151 
1152 		act->src_mac = ped;
1153 
1154 		/* consume the incomplete state */
1155 		mung->src_mac_32 = 0;
1156 		mung->src_mac_16 = 0;
1157 	}
1158 	return 0;
1159 }
1160 
1161 static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act,
1162 			    const struct flow_action_entry *fa,
1163 			    struct netlink_ext_ack *extack)
1164 {
1165 	switch (fa->mangle.htype) {
1166 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1167 		switch (fa->mangle.offset) {
1168 		case offsetof(struct iphdr, ttl):
1169 			/* check that pedit applies to ttl only */
1170 			if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK)
1171 				break;
1172 
1173 			/* Adding 0xff is equivalent to decrementing the ttl.
1174 			 * Other added values are not supported.
1175 			 */
1176 			if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) != U8_MAX)
1177 				break;
1178 
1179 			/* check that we do not decrement ttl twice */
1180 			if (!efx_tc_flower_action_order_ok(act,
1181 							   EFX_TC_AO_DEC_TTL)) {
1182 				NL_SET_ERR_MSG_MOD(extack, "multiple dec ttl are not supported");
1183 				return -EOPNOTSUPP;
1184 			}
1185 			act->do_ttl_dec = 1;
1186 			return 0;
1187 		default:
1188 			break;
1189 		}
1190 		break;
1191 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
1192 		switch (fa->mangle.offset) {
1193 		case round_down(offsetof(struct ipv6hdr, hop_limit), 4):
1194 			/* check that pedit applies to hoplimit only */
1195 			if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK)
1196 				break;
1197 
1198 			/* Adding 0xff is equivalent to decrementing the hoplimit.
1199 			 * Other added values are not supported.
1200 			 */
1201 			if ((fa->mangle.val >> 24) != U8_MAX)
1202 				break;
1203 
1204 			/* check that we do not decrement hoplimit twice */
1205 			if (!efx_tc_flower_action_order_ok(act,
1206 							   EFX_TC_AO_DEC_TTL)) {
1207 				NL_SET_ERR_MSG_MOD(extack, "multiple dec ttl are not supported");
1208 				return -EOPNOTSUPP;
1209 			}
1210 			act->do_ttl_dec = 1;
1211 			return 0;
1212 		default:
1213 			break;
1214 		}
1215 		break;
1216 	default:
1217 		break;
1218 	}
1219 
1220 	NL_SET_ERR_MSG_FMT_MOD(extack,
1221 			       "ttl add action type %x %x %x/%x is not supported",
1222 			       fa->mangle.htype, fa->mangle.offset,
1223 			       fa->mangle.val, fa->mangle.mask);
1224 	return -EOPNOTSUPP;
1225 }
1226 
1227 /**
1228  * efx_tc_mangle() - handle a single 32-bit (or less) pedit
1229  * @efx:	NIC we're installing a flow rule on
1230  * @act:	action set (cursor) to update
1231  * @fa:		FLOW_ACTION_MANGLE action metadata
1232  * @mung:	accumulator for partial mangles
1233  * @extack:	netlink extended ack for reporting errors
1234  * @match:	original match used along with the mangle action
1235  *
1236  * Identify the fields written by a FLOW_ACTION_MANGLE, and record
1237  * the partial mangle state in @mung.  If this mangle completes an
1238  * earlier partial mangle, consume and apply to @act by calling
1239  * efx_tc_complete_mac_mangle().
1240  */
1241 
1242 static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act,
1243 			 const struct flow_action_entry *fa,
1244 			 struct efx_tc_mangler_state *mung,
1245 			 struct netlink_ext_ack *extack,
1246 			 struct efx_tc_match *match)
1247 {
1248 	__le32 mac32;
1249 	__le16 mac16;
1250 	u8 tr_ttl;
1251 
1252 	switch (fa->mangle.htype) {
1253 	case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
1254 		BUILD_BUG_ON(offsetof(struct ethhdr, h_dest) != 0);
1255 		BUILD_BUG_ON(offsetof(struct ethhdr, h_source) != 6);
1256 		if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_PEDIT_MAC_ADDRS)) {
1257 			NL_SET_ERR_MSG_MOD(extack,
1258 					   "Pedit mangle mac action violates action order");
1259 			return -EOPNOTSUPP;
1260 		}
1261 		switch (fa->mangle.offset) {
1262 		case 0:
1263 			if (fa->mangle.mask) {
1264 				NL_SET_ERR_MSG_FMT_MOD(extack,
1265 						       "mask (%#x) of eth.dst32 mangle is not supported",
1266 						       fa->mangle.mask);
1267 				return -EOPNOTSUPP;
1268 			}
1269 			/* Ethernet address is little-endian */
1270 			mac32 = cpu_to_le32(fa->mangle.val);
1271 			memcpy(mung->dst_mac, &mac32, sizeof(mac32));
1272 			mung->dst_mac_32 = 1;
1273 			return efx_tc_complete_mac_mangle(efx, act, mung, extack);
1274 		case 4:
1275 			if (fa->mangle.mask == 0xffff) {
1276 				mac16 = cpu_to_le16(fa->mangle.val >> 16);
1277 				memcpy(mung->src_mac, &mac16, sizeof(mac16));
1278 				mung->src_mac_16 = 1;
1279 			} else if (fa->mangle.mask == 0xffff0000) {
1280 				mac16 = cpu_to_le16((u16)fa->mangle.val);
1281 				memcpy(mung->dst_mac + 4, &mac16, sizeof(mac16));
1282 				mung->dst_mac_16 = 1;
1283 			} else {
1284 				NL_SET_ERR_MSG_FMT_MOD(extack,
1285 						       "mask (%#x) of eth+4 mangle is not high or low 16b",
1286 						       fa->mangle.mask);
1287 				return -EOPNOTSUPP;
1288 			}
1289 			return efx_tc_complete_mac_mangle(efx, act, mung, extack);
1290 		case 8:
1291 			if (fa->mangle.mask) {
1292 				NL_SET_ERR_MSG_FMT_MOD(extack,
1293 						       "mask (%#x) of eth.src32 mangle is not supported",
1294 						       fa->mangle.mask);
1295 				return -EOPNOTSUPP;
1296 			}
1297 			mac32 = cpu_to_le32(fa->mangle.val);
1298 			memcpy(mung->src_mac + 2, &mac32, sizeof(mac32));
1299 			mung->src_mac_32 = 1;
1300 			return efx_tc_complete_mac_mangle(efx, act, mung, extack);
1301 		default:
1302 			NL_SET_ERR_MSG_FMT_MOD(extack, "mangle eth+%u %x/%x is not supported",
1303 					       fa->mangle.offset, fa->mangle.val, fa->mangle.mask);
1304 			return -EOPNOTSUPP;
1305 		}
1306 		break;
1307 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1308 		switch (fa->mangle.offset) {
1309 		case offsetof(struct iphdr, ttl):
1310 			/* we currently only support pedit IP4 when it applies
1311 			 * to TTL and then only when it can be achieved with a
1312 			 * decrement ttl action
1313 			 */
1314 
1315 			/* check that pedit applies to ttl only */
1316 			if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK) {
1317 				NL_SET_ERR_MSG_FMT_MOD(extack,
1318 						       "mask (%#x) out of range, only support mangle action on ipv4.ttl",
1319 						       fa->mangle.mask);
1320 				return -EOPNOTSUPP;
1321 			}
1322 
1323 			/* we can only convert to a dec ttl when we have an
1324 			 * exact match on the ttl field
1325 			 */
1326 			if (match->mask.ip_ttl != U8_MAX) {
1327 				NL_SET_ERR_MSG_FMT_MOD(extack,
1328 						       "only support mangle ttl when we have an exact match, current mask (%#x)",
1329 						       match->mask.ip_ttl);
1330 				return -EOPNOTSUPP;
1331 			}
1332 
1333 			/* check that we don't try to decrement 0, which equates
1334 			 * to setting the ttl to 0xff
1335 			 */
1336 			if (match->value.ip_ttl == 0) {
1337 				NL_SET_ERR_MSG_MOD(extack,
1338 						   "decrement ttl past 0 is not supported");
1339 				return -EOPNOTSUPP;
1340 			}
1341 
1342 			/* check that we do not decrement ttl twice */
1343 			if (!efx_tc_flower_action_order_ok(act,
1344 							   EFX_TC_AO_DEC_TTL)) {
1345 				NL_SET_ERR_MSG_MOD(extack,
1346 						   "multiple dec ttl is not supported");
1347 				return -EOPNOTSUPP;
1348 			}
1349 
1350 			/* check pedit can be achieved with decrement action */
1351 			tr_ttl = match->value.ip_ttl - 1;
1352 			if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) == tr_ttl) {
1353 				act->do_ttl_dec = 1;
1354 				return 0;
1355 			}
1356 
1357 			fallthrough;
1358 		default:
1359 			NL_SET_ERR_MSG_FMT_MOD(extack,
1360 					       "only support mangle on the ttl field (offset is %u)",
1361 					       fa->mangle.offset);
1362 			return -EOPNOTSUPP;
1363 		}
1364 		break;
1365 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
1366 		switch (fa->mangle.offset) {
1367 		case round_down(offsetof(struct ipv6hdr, hop_limit), 4):
1368 			/* we currently only support pedit IP6 when it applies
1369 			 * to the hoplimit and then only when it can be achieved
1370 			 * with a decrement hoplimit action
1371 			 */
1372 
1373 			/* check that pedit applies to ttl only */
1374 			if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK) {
1375 				NL_SET_ERR_MSG_FMT_MOD(extack,
1376 						       "mask (%#x) out of range, only support mangle action on ipv6.hop_limit",
1377 						       fa->mangle.mask);
1378 
1379 				return -EOPNOTSUPP;
1380 			}
1381 
1382 			/* we can only convert to a dec ttl when we have an
1383 			 * exact match on the ttl field
1384 			 */
1385 			if (match->mask.ip_ttl != U8_MAX) {
1386 				NL_SET_ERR_MSG_FMT_MOD(extack,
1387 						       "only support hop_limit when we have an exact match, current mask (%#x)",
1388 						       match->mask.ip_ttl);
1389 				return -EOPNOTSUPP;
1390 			}
1391 
1392 			/* check that we don't try to decrement 0, which equates
1393 			 * to setting the ttl to 0xff
1394 			 */
1395 			if (match->value.ip_ttl == 0) {
1396 				NL_SET_ERR_MSG_MOD(extack,
1397 						   "decrementing hop_limit past 0 is not supported");
1398 				return -EOPNOTSUPP;
1399 			}
1400 
1401 			/* check that we do not decrement hoplimit twice */
1402 			if (!efx_tc_flower_action_order_ok(act,
1403 							   EFX_TC_AO_DEC_TTL)) {
1404 				NL_SET_ERR_MSG_MOD(extack,
1405 						   "multiple dec ttl is not supported");
1406 				return -EOPNOTSUPP;
1407 			}
1408 
1409 			/* check pedit can be achieved with decrement action */
1410 			tr_ttl = match->value.ip_ttl - 1;
1411 			if ((fa->mangle.val >> 24) == tr_ttl) {
1412 				act->do_ttl_dec = 1;
1413 				return 0;
1414 			}
1415 
1416 			fallthrough;
1417 		default:
1418 			NL_SET_ERR_MSG_FMT_MOD(extack,
1419 					       "only support mangle on the hop_limit field");
1420 			return -EOPNOTSUPP;
1421 		}
1422 	default:
1423 		NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled mangle htype %u for action rule",
1424 				       fa->mangle.htype);
1425 		return -EOPNOTSUPP;
1426 	}
1427 	return 0;
1428 }
1429 
1430 /**
1431  * efx_tc_incomplete_mangle() - check for leftover partial pedits
1432  * @mung:	accumulator for partial mangles
1433  * @extack:	netlink extended ack for reporting errors
1434  *
1435  * Since the MAE can only overwrite whole fields, any partial
1436  * field mangle left over on reaching packet delivery (mirred or
1437  * end of TC actions) cannot be offloaded.  Check for any such
1438  * and reject them with -%EOPNOTSUPP.
1439  */
1440 
1441 static int efx_tc_incomplete_mangle(struct efx_tc_mangler_state *mung,
1442 				    struct netlink_ext_ack *extack)
1443 {
1444 	if (mung->dst_mac_32 || mung->dst_mac_16) {
1445 		NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of destination MAC address");
1446 		return -EOPNOTSUPP;
1447 	}
1448 	if (mung->src_mac_16 || mung->src_mac_32) {
1449 		NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of source MAC address");
1450 		return -EOPNOTSUPP;
1451 	}
1452 	return 0;
1453 }
1454 
1455 static int efx_tc_flower_replace_foreign_lhs_ar(struct efx_nic *efx,
1456 						struct flow_cls_offload *tc,
1457 						struct flow_rule *fr,
1458 						struct efx_tc_match *match,
1459 						struct net_device *net_dev)
1460 {
1461 	struct netlink_ext_ack *extack = tc->common.extack;
1462 	struct efx_tc_lhs_rule *rule, *old;
1463 	enum efx_encap_type type;
1464 	int rc;
1465 
1466 	type = efx_tc_indr_netdev_type(net_dev);
1467 	if (type == EFX_ENCAP_TYPE_NONE) {
1468 		NL_SET_ERR_MSG_MOD(extack, "Egress encap match on unsupported tunnel device");
1469 		return -EOPNOTSUPP;
1470 	}
1471 
1472 	rc = efx_mae_check_encap_type_supported(efx, type);
1473 	if (rc) {
1474 		NL_SET_ERR_MSG_FMT_MOD(extack,
1475 				       "Firmware reports no support for %s encap match",
1476 				       efx_tc_encap_type_name(type));
1477 		return rc;
1478 	}
1479 	/* This is an Action Rule, so it needs a separate Encap Match in the
1480 	 * Outer Rule table.  Insert that now.
1481 	 */
1482 	rc = efx_tc_flower_record_encap_match(efx, match, type,
1483 					      EFX_TC_EM_DIRECT, 0, 0, extack);
1484 	if (rc)
1485 		return rc;
1486 
1487 	match->mask.recirc_id = 0xff;
1488 	if (match->mask.ct_state_trk && match->value.ct_state_trk) {
1489 		NL_SET_ERR_MSG_MOD(extack, "LHS rule can never match +trk");
1490 		rc = -EOPNOTSUPP;
1491 		goto release_encap_match;
1492 	}
1493 	/* LHS rules are always -trk, so we don't need to match on that */
1494 	match->mask.ct_state_trk = 0;
1495 	match->value.ct_state_trk = 0;
1496 	/* We must inhibit match on TCP SYN/FIN/RST, so that SW can see
1497 	 * the packet and update the conntrack table.
1498 	 * Outer Rules will do that with CT_TCP_FLAGS_INHIBIT, but Action
1499 	 * Rules don't have that; instead they support matching on
1500 	 * TCP_SYN_FIN_RST (aka TCP_INTERESTING_FLAGS), so use that.
1501 	 * This is only strictly needed if there will be a DO_CT action,
1502 	 * which we don't know yet, but typically there will be and it's
1503 	 * simpler not to bother checking here.
1504 	 */
1505 	match->mask.tcp_syn_fin_rst = true;
1506 
1507 	rc = efx_mae_match_check_caps(efx, &match->mask, extack);
1508 	if (rc)
1509 		goto release_encap_match;
1510 
1511 	rule = kzalloc(sizeof(*rule), GFP_USER);
1512 	if (!rule) {
1513 		rc = -ENOMEM;
1514 		goto release_encap_match;
1515 	}
1516 	rule->cookie = tc->cookie;
1517 	rule->is_ar = true;
1518 	old = rhashtable_lookup_get_insert_fast(&efx->tc->lhs_rule_ht,
1519 						&rule->linkage,
1520 						efx_tc_lhs_rule_ht_params);
1521 	if (old) {
1522 		netif_dbg(efx, drv, efx->net_dev,
1523 			  "Already offloaded rule (cookie %lx)\n", tc->cookie);
1524 		rc = -EEXIST;
1525 		NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
1526 		goto release;
1527 	}
1528 
1529 	/* Parse actions */
1530 	rc = efx_tc_flower_handle_lhs_actions(efx, tc, fr, net_dev, rule);
1531 	if (rc)
1532 		goto release;
1533 
1534 	rule->match = *match;
1535 	rule->lhs_act.tun_type = type;
1536 
1537 	rc = efx_mae_insert_lhs_rule(efx, rule, EFX_TC_PRIO_TC);
1538 	if (rc) {
1539 		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
1540 		goto release;
1541 	}
1542 	netif_dbg(efx, drv, efx->net_dev,
1543 		  "Successfully parsed lhs rule (cookie %lx)\n",
1544 		  tc->cookie);
1545 	return 0;
1546 
1547 release:
1548 	efx_tc_flower_release_lhs_actions(efx, &rule->lhs_act);
1549 	if (!old)
1550 		rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &rule->linkage,
1551 				       efx_tc_lhs_rule_ht_params);
1552 	kfree(rule);
1553 release_encap_match:
1554 	if (match->encap)
1555 		efx_tc_flower_release_encap_match(efx, match->encap);
1556 	return rc;
1557 }
1558 
1559 static int efx_tc_flower_replace_foreign_lhs(struct efx_nic *efx,
1560 					     struct flow_cls_offload *tc,
1561 					     struct flow_rule *fr,
1562 					     struct efx_tc_match *match,
1563 					     struct net_device *net_dev)
1564 {
1565 	struct netlink_ext_ack *extack = tc->common.extack;
1566 	struct efx_tc_lhs_rule *rule, *old;
1567 	enum efx_encap_type type;
1568 	int rc;
1569 
1570 	if (tc->common.chain_index) {
1571 		NL_SET_ERR_MSG_MOD(extack, "LHS rule only allowed in chain 0");
1572 		return -EOPNOTSUPP;
1573 	}
1574 
1575 	if (!efx_tc_match_is_encap(&match->mask)) {
1576 		/* This is not a tunnel decap rule, ignore it */
1577 		netif_dbg(efx, drv, efx->net_dev, "Ignoring foreign LHS filter without encap match\n");
1578 		return -EOPNOTSUPP;
1579 	}
1580 
1581 	if (efx_tc_flower_flhs_needs_ar(match))
1582 		return efx_tc_flower_replace_foreign_lhs_ar(efx, tc, fr, match,
1583 							    net_dev);
1584 
1585 	type = efx_tc_indr_netdev_type(net_dev);
1586 	if (type == EFX_ENCAP_TYPE_NONE) {
1587 		NL_SET_ERR_MSG_MOD(extack, "Egress encap match on unsupported tunnel device\n");
1588 		return -EOPNOTSUPP;
1589 	}
1590 
1591 	rc = efx_mae_check_encap_type_supported(efx, type);
1592 	if (rc) {
1593 		NL_SET_ERR_MSG_FMT_MOD(extack,
1594 				       "Firmware reports no support for %s encap match",
1595 				       efx_tc_encap_type_name(type));
1596 		return rc;
1597 	}
1598 	/* Reserve the outer tuple with a pseudo Encap Match */
1599 	rc = efx_tc_flower_record_encap_match(efx, match, type,
1600 					      EFX_TC_EM_PSEUDO_OR, 0, 0,
1601 					      extack);
1602 	if (rc)
1603 		return rc;
1604 
1605 	if (match->mask.ct_state_trk && match->value.ct_state_trk) {
1606 		NL_SET_ERR_MSG_MOD(extack, "LHS rule can never match +trk");
1607 		rc = -EOPNOTSUPP;
1608 		goto release_encap_match;
1609 	}
1610 	/* LHS rules are always -trk, so we don't need to match on that */
1611 	match->mask.ct_state_trk = 0;
1612 	match->value.ct_state_trk = 0;
1613 
1614 	rc = efx_tc_flower_translate_flhs_match(match);
1615 	if (rc) {
1616 		NL_SET_ERR_MSG_MOD(extack, "LHS rule cannot match on inner fields");
1617 		goto release_encap_match;
1618 	}
1619 
1620 	rc = efx_mae_match_check_caps_lhs(efx, &match->mask, extack);
1621 	if (rc)
1622 		goto release_encap_match;
1623 
1624 	rule = kzalloc(sizeof(*rule), GFP_USER);
1625 	if (!rule) {
1626 		rc = -ENOMEM;
1627 		goto release_encap_match;
1628 	}
1629 	rule->cookie = tc->cookie;
1630 	old = rhashtable_lookup_get_insert_fast(&efx->tc->lhs_rule_ht,
1631 						&rule->linkage,
1632 						efx_tc_lhs_rule_ht_params);
1633 	if (old) {
1634 		netif_dbg(efx, drv, efx->net_dev,
1635 			  "Already offloaded rule (cookie %lx)\n", tc->cookie);
1636 		rc = -EEXIST;
1637 		NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
1638 		goto release;
1639 	}
1640 
1641 	/* Parse actions */
1642 	rc = efx_tc_flower_handle_lhs_actions(efx, tc, fr, net_dev, rule);
1643 	if (rc)
1644 		goto release;
1645 
1646 	rule->match = *match;
1647 	rule->lhs_act.tun_type = type;
1648 
1649 	rc = efx_mae_insert_lhs_rule(efx, rule, EFX_TC_PRIO_TC);
1650 	if (rc) {
1651 		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
1652 		goto release;
1653 	}
1654 	netif_dbg(efx, drv, efx->net_dev,
1655 		  "Successfully parsed lhs rule (cookie %lx)\n",
1656 		  tc->cookie);
1657 	return 0;
1658 
1659 release:
1660 	efx_tc_flower_release_lhs_actions(efx, &rule->lhs_act);
1661 	if (!old)
1662 		rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &rule->linkage,
1663 				       efx_tc_lhs_rule_ht_params);
1664 	kfree(rule);
1665 release_encap_match:
1666 	if (match->encap)
1667 		efx_tc_flower_release_encap_match(efx, match->encap);
1668 	return rc;
1669 }
1670 
1671 static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
1672 					 struct net_device *net_dev,
1673 					 struct flow_cls_offload *tc)
1674 {
1675 	struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
1676 	struct netlink_ext_ack *extack = tc->common.extack;
1677 	struct efx_tc_flow_rule *rule = NULL, *old = NULL;
1678 	struct efx_tc_action_set *act = NULL;
1679 	bool found = false, uplinked = false;
1680 	const struct flow_action_entry *fa;
1681 	struct efx_tc_match match;
1682 	struct efx_rep *to_efv;
1683 	s64 rc;
1684 	int i;
1685 
1686 	/* Parse match */
1687 	memset(&match, 0, sizeof(match));
1688 	rc = efx_tc_flower_parse_match(efx, fr, &match, extack);
1689 	if (rc)
1690 		return rc;
1691 	/* The rule as given to us doesn't specify a source netdevice.
1692 	 * But, determining whether packets from a VF should match it is
1693 	 * complicated, so leave those to the software slowpath: qualify
1694 	 * the filter with source m-port == wire.
1695 	 */
1696 	rc = efx_tc_flower_external_mport(efx, EFX_EFV_PF);
1697 	if (rc < 0) {
1698 		NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port for foreign filter");
1699 		return rc;
1700 	}
1701 	match.value.ingress_port = rc;
1702 	match.mask.ingress_port = ~0;
1703 
1704 	if (efx_tc_rule_is_lhs_rule(fr, &match))
1705 		return efx_tc_flower_replace_foreign_lhs(efx, tc, fr, &match,
1706 							 net_dev);
1707 
1708 	if (tc->common.chain_index) {
1709 		struct efx_tc_recirc_id *rid;
1710 
1711 		rid = efx_tc_get_recirc_id(efx, tc->common.chain_index, net_dev);
1712 		if (IS_ERR(rid)) {
1713 			NL_SET_ERR_MSG_FMT_MOD(extack,
1714 					       "Failed to allocate a hardware recirculation ID for chain_index %u",
1715 					       tc->common.chain_index);
1716 			return PTR_ERR(rid);
1717 		}
1718 		match.rid = rid;
1719 		match.value.recirc_id = rid->fw_id;
1720 	}
1721 	match.mask.recirc_id = 0xff;
1722 
1723 	/* AR table can't match on DO_CT (+trk).  But a commonly used pattern is
1724 	 * +trk+est, which is strictly implied by +est, so rewrite it to that.
1725 	 */
1726 	if (match.mask.ct_state_trk && match.value.ct_state_trk &&
1727 	    match.mask.ct_state_est && match.value.ct_state_est)
1728 		match.mask.ct_state_trk = 0;
1729 	/* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could
1730 	 * match +trk-est (CT_HIT=0) despite being on an established connection.
1731 	 * So make -est imply -tcp_syn_fin_rst match to ensure these packets
1732 	 * still hit the software path.
1733 	 */
1734 	if (match.mask.ct_state_est && !match.value.ct_state_est) {
1735 		if (match.value.tcp_syn_fin_rst) {
1736 			/* Can't offload this combination */
1737 			NL_SET_ERR_MSG_MOD(extack, "TCP flags and -est conflict for offload");
1738 			rc = -EOPNOTSUPP;
1739 			goto release;
1740 		}
1741 		match.mask.tcp_syn_fin_rst = true;
1742 	}
1743 
1744 	flow_action_for_each(i, fa, &fr->action) {
1745 		switch (fa->id) {
1746 		case FLOW_ACTION_REDIRECT:
1747 		case FLOW_ACTION_MIRRED: /* mirred means mirror here */
1748 			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
1749 			if (IS_ERR(to_efv))
1750 				continue;
1751 			found = true;
1752 			break;
1753 		default:
1754 			break;
1755 		}
1756 	}
1757 	if (!found) { /* We don't care. */
1758 		netif_dbg(efx, drv, efx->net_dev,
1759 			  "Ignoring foreign filter that doesn't egdev us\n");
1760 		rc = -EOPNOTSUPP;
1761 		goto release;
1762 	}
1763 
1764 	rc = efx_mae_match_check_caps(efx, &match.mask, extack);
1765 	if (rc)
1766 		goto release;
1767 
1768 	if (efx_tc_match_is_encap(&match.mask)) {
1769 		enum efx_encap_type type;
1770 
1771 		type = efx_tc_indr_netdev_type(net_dev);
1772 		if (type == EFX_ENCAP_TYPE_NONE) {
1773 			NL_SET_ERR_MSG_MOD(extack,
1774 					   "Egress encap match on unsupported tunnel device");
1775 			rc = -EOPNOTSUPP;
1776 			goto release;
1777 		}
1778 
1779 		rc = efx_mae_check_encap_type_supported(efx, type);
1780 		if (rc) {
1781 			NL_SET_ERR_MSG_FMT_MOD(extack,
1782 					       "Firmware reports no support for %s encap match",
1783 					       efx_tc_encap_type_name(type));
1784 			goto release;
1785 		}
1786 
1787 		rc = efx_tc_flower_record_encap_match(efx, &match, type,
1788 						      EFX_TC_EM_DIRECT, 0, 0,
1789 						      extack);
1790 		if (rc)
1791 			goto release;
1792 	} else if (!tc->common.chain_index) {
1793 		/* This is not a tunnel decap rule, ignore it */
1794 		netif_dbg(efx, drv, efx->net_dev,
1795 			  "Ignoring foreign filter without encap match\n");
1796 		rc = -EOPNOTSUPP;
1797 		goto release;
1798 	}
1799 
1800 	rule = kzalloc(sizeof(*rule), GFP_USER);
1801 	if (!rule) {
1802 		rc = -ENOMEM;
1803 		goto release;
1804 	}
1805 	INIT_LIST_HEAD(&rule->acts.list);
1806 	rule->cookie = tc->cookie;
1807 	old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht,
1808 						&rule->linkage,
1809 						efx_tc_match_action_ht_params);
1810 	if (IS_ERR(old)) {
1811 		rc = PTR_ERR(old);
1812 		goto release;
1813 	} else if (old) {
1814 		netif_dbg(efx, drv, efx->net_dev,
1815 			  "Ignoring already-offloaded rule (cookie %lx)\n",
1816 			  tc->cookie);
1817 		rc = -EEXIST;
1818 		goto release;
1819 	}
1820 
1821 	act = kzalloc(sizeof(*act), GFP_USER);
1822 	if (!act) {
1823 		rc = -ENOMEM;
1824 		goto release;
1825 	}
1826 
1827 	/* Parse actions.  For foreign rules we only support decap & redirect.
1828 	 * See corresponding code in efx_tc_flower_replace() for theory of
1829 	 * operation & how 'act' cursor is used.
1830 	 */
1831 	flow_action_for_each(i, fa, &fr->action) {
1832 		struct efx_tc_action_set save;
1833 
1834 		switch (fa->id) {
1835 		case FLOW_ACTION_REDIRECT:
1836 		case FLOW_ACTION_MIRRED:
1837 			/* See corresponding code in efx_tc_flower_replace() for
1838 			 * long explanations of what's going on here.
1839 			 */
1840 			save = *act;
1841 			if (fa->hw_stats) {
1842 				struct efx_tc_counter_index *ctr;
1843 
1844 				if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
1845 					NL_SET_ERR_MSG_FMT_MOD(extack,
1846 							       "hw_stats_type %u not supported (only 'delayed')",
1847 							       fa->hw_stats);
1848 					rc = -EOPNOTSUPP;
1849 					goto release;
1850 				}
1851 				if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) {
1852 					NL_SET_ERR_MSG_MOD(extack, "Count action violates action order (can't happen)");
1853 					rc = -EOPNOTSUPP;
1854 					goto release;
1855 				}
1856 
1857 				ctr = efx_tc_flower_get_counter_index(efx,
1858 								      tc->cookie,
1859 								      EFX_TC_COUNTER_TYPE_AR);
1860 				if (IS_ERR(ctr)) {
1861 					rc = PTR_ERR(ctr);
1862 					NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
1863 					goto release;
1864 				}
1865 				act->count = ctr;
1866 				INIT_LIST_HEAD(&act->count_user);
1867 			}
1868 
1869 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) {
1870 				/* can't happen */
1871 				rc = -EOPNOTSUPP;
1872 				NL_SET_ERR_MSG_MOD(extack,
1873 						   "Deliver action violates action order (can't happen)");
1874 				goto release;
1875 			}
1876 			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
1877 			/* PF implies egdev is us, in which case we really
1878 			 * want to deliver to the uplink (because this is an
1879 			 * ingress filter).  If we don't recognise the egdev
1880 			 * at all, then we'd better trap so SW can handle it.
1881 			 */
1882 			if (IS_ERR(to_efv))
1883 				to_efv = EFX_EFV_PF;
1884 			if (to_efv == EFX_EFV_PF) {
1885 				if (uplinked)
1886 					break;
1887 				uplinked = true;
1888 			}
1889 			rc = efx_tc_flower_internal_mport(efx, to_efv);
1890 			if (rc < 0) {
1891 				NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port");
1892 				goto release;
1893 			}
1894 			act->dest_mport = rc;
1895 			act->deliver = 1;
1896 			rc = efx_mae_alloc_action_set(efx, act);
1897 			if (rc) {
1898 				NL_SET_ERR_MSG_MOD(extack,
1899 						   "Failed to write action set to hw (mirred)");
1900 				goto release;
1901 			}
1902 			list_add_tail(&act->list, &rule->acts.list);
1903 			act = NULL;
1904 			if (fa->id == FLOW_ACTION_REDIRECT)
1905 				break; /* end of the line */
1906 			/* Mirror, so continue on with saved act */
1907 			act = kzalloc(sizeof(*act), GFP_USER);
1908 			if (!act) {
1909 				rc = -ENOMEM;
1910 				goto release;
1911 			}
1912 			*act = save;
1913 			break;
1914 		case FLOW_ACTION_TUNNEL_DECAP:
1915 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DECAP)) {
1916 				rc = -EINVAL;
1917 				NL_SET_ERR_MSG_MOD(extack, "Decap action violates action order");
1918 				goto release;
1919 			}
1920 			act->decap = 1;
1921 			/* If we previously delivered/trapped to uplink, now
1922 			 * that we've decapped we'll want another copy if we
1923 			 * try to deliver/trap to uplink again.
1924 			 */
1925 			uplinked = false;
1926 			break;
1927 		default:
1928 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u",
1929 					       fa->id);
1930 			rc = -EOPNOTSUPP;
1931 			goto release;
1932 		}
1933 	}
1934 
1935 	if (act) {
1936 		if (!uplinked) {
1937 			/* Not shot/redirected, so deliver to default dest (which is
1938 			 * the uplink, as this is an ingress filter)
1939 			 */
1940 			efx_mae_mport_uplink(efx, &act->dest_mport);
1941 			act->deliver = 1;
1942 		}
1943 		rc = efx_mae_alloc_action_set(efx, act);
1944 		if (rc) {
1945 			NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)");
1946 			goto release;
1947 		}
1948 		list_add_tail(&act->list, &rule->acts.list);
1949 		act = NULL; /* Prevent double-free in error path */
1950 	}
1951 
1952 	rule->match = match;
1953 
1954 	netif_dbg(efx, drv, efx->net_dev,
1955 		  "Successfully parsed foreign filter (cookie %lx)\n",
1956 		  tc->cookie);
1957 
1958 	rc = efx_mae_alloc_action_set_list(efx, &rule->acts);
1959 	if (rc) {
1960 		NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw");
1961 		goto release;
1962 	}
1963 	rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC,
1964 				 rule->acts.fw_id, &rule->fw_id);
1965 	if (rc) {
1966 		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
1967 		goto release_acts;
1968 	}
1969 	return 0;
1970 
1971 release_acts:
1972 	efx_mae_free_action_set_list(efx, &rule->acts);
1973 release:
1974 	/* We failed to insert the rule, so free up any entries we created in
1975 	 * subsidiary tables.
1976 	 */
1977 	if (match.rid)
1978 		efx_tc_put_recirc_id(efx, match.rid);
1979 	if (act)
1980 		efx_tc_free_action_set(efx, act, false);
1981 	if (rule) {
1982 		if (!old)
1983 			rhashtable_remove_fast(&efx->tc->match_action_ht,
1984 					       &rule->linkage,
1985 					       efx_tc_match_action_ht_params);
1986 		efx_tc_free_action_set_list(efx, &rule->acts, false);
1987 	}
1988 	kfree(rule);
1989 	if (match.encap)
1990 		efx_tc_flower_release_encap_match(efx, match.encap);
1991 	return rc;
1992 }
1993 
1994 static int efx_tc_flower_replace_lhs(struct efx_nic *efx,
1995 				     struct flow_cls_offload *tc,
1996 				     struct flow_rule *fr,
1997 				     struct efx_tc_match *match,
1998 				     struct efx_rep *efv,
1999 				     struct net_device *net_dev)
2000 {
2001 	struct netlink_ext_ack *extack = tc->common.extack;
2002 	struct efx_tc_lhs_rule *rule, *old;
2003 	int rc;
2004 
2005 	if (tc->common.chain_index) {
2006 		NL_SET_ERR_MSG_MOD(extack, "LHS rule only allowed in chain 0");
2007 		return -EOPNOTSUPP;
2008 	}
2009 
2010 	if (match->mask.ct_state_trk && match->value.ct_state_trk) {
2011 		NL_SET_ERR_MSG_MOD(extack, "LHS rule can never match +trk");
2012 		return -EOPNOTSUPP;
2013 	}
2014 	/* LHS rules are always -trk, so we don't need to match on that */
2015 	match->mask.ct_state_trk = 0;
2016 	match->value.ct_state_trk = 0;
2017 
2018 	rc = efx_mae_match_check_caps_lhs(efx, &match->mask, extack);
2019 	if (rc)
2020 		return rc;
2021 
2022 	rule = kzalloc(sizeof(*rule), GFP_USER);
2023 	if (!rule)
2024 		return -ENOMEM;
2025 	rule->cookie = tc->cookie;
2026 	old = rhashtable_lookup_get_insert_fast(&efx->tc->lhs_rule_ht,
2027 						&rule->linkage,
2028 						efx_tc_lhs_rule_ht_params);
2029 	if (IS_ERR(old)) {
2030 		rc = PTR_ERR(old);
2031 		goto release;
2032 	} else if (old) {
2033 		netif_dbg(efx, drv, efx->net_dev,
2034 			  "Already offloaded rule (cookie %lx)\n", tc->cookie);
2035 		rc = -EEXIST;
2036 		NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
2037 		goto release;
2038 	}
2039 
2040 	/* Parse actions */
2041 	/* See note in efx_tc_flower_replace() regarding passed net_dev
2042 	 * (used for efx_tc_get_recirc_id()).
2043 	 */
2044 	rc = efx_tc_flower_handle_lhs_actions(efx, tc, fr, efx->net_dev, rule);
2045 	if (rc)
2046 		goto release;
2047 
2048 	rule->match = *match;
2049 
2050 	rc = efx_mae_insert_lhs_rule(efx, rule, EFX_TC_PRIO_TC);
2051 	if (rc) {
2052 		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
2053 		goto release;
2054 	}
2055 	netif_dbg(efx, drv, efx->net_dev,
2056 		  "Successfully parsed lhs rule (cookie %lx)\n",
2057 		  tc->cookie);
2058 	return 0;
2059 
2060 release:
2061 	efx_tc_flower_release_lhs_actions(efx, &rule->lhs_act);
2062 	if (!old)
2063 		rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &rule->linkage,
2064 				       efx_tc_lhs_rule_ht_params);
2065 	kfree(rule);
2066 	return rc;
2067 }
2068 
2069 static int efx_tc_flower_replace(struct efx_nic *efx,
2070 				 struct net_device *net_dev,
2071 				 struct flow_cls_offload *tc,
2072 				 struct efx_rep *efv)
2073 {
2074 	struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
2075 	struct netlink_ext_ack *extack = tc->common.extack;
2076 	const struct ip_tunnel_info *encap_info = NULL;
2077 	struct efx_tc_flow_rule *rule = NULL, *old;
2078 	struct efx_tc_mangler_state mung = {};
2079 	struct efx_tc_action_set *act = NULL;
2080 	const struct flow_action_entry *fa;
2081 	struct efx_rep *from_efv, *to_efv;
2082 	struct efx_tc_match match;
2083 	u32 acts_id;
2084 	s64 rc;
2085 	int i;
2086 
2087 	if (!tc_can_offload_extack(efx->net_dev, extack))
2088 		return -EOPNOTSUPP;
2089 	if (WARN_ON(!efx->tc))
2090 		return -ENETDOWN;
2091 	if (WARN_ON(!efx->tc->up))
2092 		return -ENETDOWN;
2093 
2094 	from_efv = efx_tc_flower_lookup_efv(efx, net_dev);
2095 	if (IS_ERR(from_efv)) {
2096 		/* Not from our PF or representors, so probably a tunnel dev */
2097 		return efx_tc_flower_replace_foreign(efx, net_dev, tc);
2098 	}
2099 
2100 	if (efv != from_efv) {
2101 		/* can't happen */
2102 		NL_SET_ERR_MSG_FMT_MOD(extack, "for %s efv is %snull but from_efv is %snull (can't happen)",
2103 				       netdev_name(net_dev), efv ? "non-" : "",
2104 				       from_efv ? "non-" : "");
2105 		return -EINVAL;
2106 	}
2107 
2108 	/* Parse match */
2109 	memset(&match, 0, sizeof(match));
2110 	rc = efx_tc_flower_external_mport(efx, from_efv);
2111 	if (rc < 0) {
2112 		NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port");
2113 		return rc;
2114 	}
2115 	match.value.ingress_port = rc;
2116 	match.mask.ingress_port = ~0;
2117 	rc = efx_tc_flower_parse_match(efx, fr, &match, extack);
2118 	if (rc)
2119 		return rc;
2120 	if (efx_tc_match_is_encap(&match.mask)) {
2121 		NL_SET_ERR_MSG_MOD(extack, "Ingress enc_key matches not supported");
2122 		return -EOPNOTSUPP;
2123 	}
2124 
2125 	if (efx_tc_rule_is_lhs_rule(fr, &match))
2126 		return efx_tc_flower_replace_lhs(efx, tc, fr, &match, efv,
2127 						 net_dev);
2128 
2129 	/* chain_index 0 is always recirc_id 0 (and does not appear in recirc_ht).
2130 	 * Conveniently, match.rid == NULL and match.value.recirc_id == 0 owing
2131 	 * to the initial memset(), so we don't need to do anything in that case.
2132 	 */
2133 	if (tc->common.chain_index) {
2134 		struct efx_tc_recirc_id *rid;
2135 
2136 		/* Note regarding passed net_dev:
2137 		 * VFreps and PF can share chain namespace, as they have
2138 		 * distinct ingress_mports.  So we don't need to burn an
2139 		 * extra recirc_id if both use the same chain_index.
2140 		 * (Strictly speaking, we could give each VFrep its own
2141 		 * recirc_id namespace that doesn't take IDs away from the
2142 		 * PF, but that would require a bunch of additional IDAs -
2143 		 * one for each representor - and that's not likely to be
2144 		 * the main cause of recirc_id exhaustion anyway.)
2145 		 */
2146 		rid = efx_tc_get_recirc_id(efx, tc->common.chain_index,
2147 					   efx->net_dev);
2148 		if (IS_ERR(rid)) {
2149 			NL_SET_ERR_MSG_FMT_MOD(extack,
2150 					       "Failed to allocate a hardware recirculation ID for chain_index %u",
2151 					       tc->common.chain_index);
2152 			return PTR_ERR(rid);
2153 		}
2154 		match.rid = rid;
2155 		match.value.recirc_id = rid->fw_id;
2156 	}
2157 	match.mask.recirc_id = 0xff;
2158 
2159 	/* AR table can't match on DO_CT (+trk).  But a commonly used pattern is
2160 	 * +trk+est, which is strictly implied by +est, so rewrite it to that.
2161 	 */
2162 	if (match.mask.ct_state_trk && match.value.ct_state_trk &&
2163 	    match.mask.ct_state_est && match.value.ct_state_est)
2164 		match.mask.ct_state_trk = 0;
2165 	/* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could
2166 	 * match +trk-est (CT_HIT=0) despite being on an established connection.
2167 	 * So make -est imply -tcp_syn_fin_rst match to ensure these packets
2168 	 * still hit the software path.
2169 	 */
2170 	if (match.mask.ct_state_est && !match.value.ct_state_est) {
2171 		if (match.value.tcp_syn_fin_rst) {
2172 			/* Can't offload this combination */
2173 			rc = -EOPNOTSUPP;
2174 			goto release;
2175 		}
2176 		match.mask.tcp_syn_fin_rst = true;
2177 	}
2178 
2179 	rc = efx_mae_match_check_caps(efx, &match.mask, extack);
2180 	if (rc)
2181 		goto release;
2182 
2183 	rule = kzalloc(sizeof(*rule), GFP_USER);
2184 	if (!rule) {
2185 		rc = -ENOMEM;
2186 		goto release;
2187 	}
2188 	INIT_LIST_HEAD(&rule->acts.list);
2189 	rule->cookie = tc->cookie;
2190 	old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht,
2191 						&rule->linkage,
2192 						efx_tc_match_action_ht_params);
2193 	if (IS_ERR(old)) {
2194 		rc = PTR_ERR(old);
2195 		goto release;
2196 	} else if (old) {
2197 		netif_dbg(efx, drv, efx->net_dev,
2198 			  "Already offloaded rule (cookie %lx)\n", tc->cookie);
2199 		NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
2200 		rc = -EEXIST;
2201 		goto release;
2202 	}
2203 
2204 	/* Parse actions */
2205 	act = kzalloc(sizeof(*act), GFP_USER);
2206 	if (!act) {
2207 		rc = -ENOMEM;
2208 		goto release;
2209 	}
2210 
2211 	/**
2212 	 * DOC: TC action translation
2213 	 *
2214 	 * Actions in TC are sequential and cumulative, with delivery actions
2215 	 * potentially anywhere in the order.  The EF100 MAE, however, takes
2216 	 * an 'action set list' consisting of 'action sets', each of which is
2217 	 * applied to the _original_ packet, and consists of a set of optional
2218 	 * actions in a fixed order with delivery at the end.
2219 	 * To translate between these two models, we maintain a 'cursor', @act,
2220 	 * which describes the cumulative effect of all the packet-mutating
2221 	 * actions encountered so far; on handling a delivery (mirred or drop)
2222 	 * action, once the action-set has been inserted into hardware, we
2223 	 * append @act to the action-set list (@rule->acts); if this is a pipe
2224 	 * action (mirred mirror) we then allocate a new @act with a copy of
2225 	 * the cursor state _before_ the delivery action, otherwise we set @act
2226 	 * to %NULL.
2227 	 * This ensures that every allocated action-set is either attached to
2228 	 * @rule->acts or pointed to by @act (and never both), and that only
2229 	 * those action-sets in @rule->acts exist in hardware.  Consequently,
2230 	 * in the failure path, @act only needs to be freed in memory, whereas
2231 	 * for @rule->acts we remove each action-set from hardware before
2232 	 * freeing it (efx_tc_free_action_set_list()), even if the action-set
2233 	 * list itself is not in hardware.
2234 	 */
2235 	flow_action_for_each(i, fa, &fr->action) {
2236 		struct efx_tc_action_set save;
2237 		u16 tci;
2238 
2239 		if (!act) {
2240 			/* more actions after a non-pipe action */
2241 			NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action");
2242 			rc = -EINVAL;
2243 			goto release;
2244 		}
2245 
2246 		if ((fa->id == FLOW_ACTION_REDIRECT ||
2247 		     fa->id == FLOW_ACTION_MIRRED ||
2248 		     fa->id == FLOW_ACTION_DROP) && fa->hw_stats) {
2249 			struct efx_tc_counter_index *ctr;
2250 
2251 			/* Currently the only actions that want stats are
2252 			 * mirred and gact (ok, shot, trap, goto-chain), which
2253 			 * means we want stats just before delivery.  Also,
2254 			 * note that tunnel_key set shouldn't change the length
2255 			 * — it's only the subsequent mirred that does that,
2256 			 * and the stats are taken _before_ the mirred action
2257 			 * happens.
2258 			 */
2259 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) {
2260 				/* All supported actions that count either steal
2261 				 * (gact shot, mirred redirect) or clone act
2262 				 * (mirred mirror), so we should never get two
2263 				 * count actions on one action_set.
2264 				 */
2265 				NL_SET_ERR_MSG_MOD(extack, "Count-action conflict (can't happen)");
2266 				rc = -EOPNOTSUPP;
2267 				goto release;
2268 			}
2269 
2270 			if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
2271 				NL_SET_ERR_MSG_FMT_MOD(extack, "hw_stats_type %u not supported (only 'delayed')",
2272 						       fa->hw_stats);
2273 				rc = -EOPNOTSUPP;
2274 				goto release;
2275 			}
2276 
2277 			ctr = efx_tc_flower_get_counter_index(efx, tc->cookie,
2278 							      EFX_TC_COUNTER_TYPE_AR);
2279 			if (IS_ERR(ctr)) {
2280 				rc = PTR_ERR(ctr);
2281 				NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
2282 				goto release;
2283 			}
2284 			act->count = ctr;
2285 			INIT_LIST_HEAD(&act->count_user);
2286 		}
2287 
2288 		switch (fa->id) {
2289 		case FLOW_ACTION_DROP:
2290 			rc = efx_mae_alloc_action_set(efx, act);
2291 			if (rc) {
2292 				NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (drop)");
2293 				goto release;
2294 			}
2295 			list_add_tail(&act->list, &rule->acts.list);
2296 			act = NULL; /* end of the line */
2297 			break;
2298 		case FLOW_ACTION_REDIRECT:
2299 		case FLOW_ACTION_MIRRED:
2300 			save = *act;
2301 
2302 			if (encap_info) {
2303 				struct efx_tc_encap_action *encap;
2304 
2305 				if (!efx_tc_flower_action_order_ok(act,
2306 								   EFX_TC_AO_ENCAP)) {
2307 					rc = -EOPNOTSUPP;
2308 					NL_SET_ERR_MSG_MOD(extack, "Encap action violates action order");
2309 					goto release;
2310 				}
2311 				encap = efx_tc_flower_create_encap_md(
2312 						efx, encap_info, fa->dev, extack);
2313 				if (IS_ERR_OR_NULL(encap)) {
2314 					rc = PTR_ERR(encap);
2315 					if (!rc)
2316 						rc = -EIO; /* arbitrary */
2317 					goto release;
2318 				}
2319 				act->encap_md = encap;
2320 				list_add_tail(&act->encap_user, &encap->users);
2321 				act->dest_mport = encap->dest_mport;
2322 				act->deliver = 1;
2323 				if (act->count && !WARN_ON(!act->count->cnt)) {
2324 					/* This counter is used by an encap
2325 					 * action, which needs a reference back
2326 					 * so it can prod neighbouring whenever
2327 					 * traffic is seen.
2328 					 */
2329 					spin_lock_bh(&act->count->cnt->lock);
2330 					list_add_tail(&act->count_user,
2331 						      &act->count->cnt->users);
2332 					spin_unlock_bh(&act->count->cnt->lock);
2333 				}
2334 				rc = efx_mae_alloc_action_set(efx, act);
2335 				if (rc) {
2336 					NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (encap)");
2337 					goto release;
2338 				}
2339 				list_add_tail(&act->list, &rule->acts.list);
2340 				act->user = &rule->acts;
2341 				act = NULL;
2342 				if (fa->id == FLOW_ACTION_REDIRECT)
2343 					break; /* end of the line */
2344 				/* Mirror, so continue on with saved act */
2345 				save.count = NULL;
2346 				act = kzalloc(sizeof(*act), GFP_USER);
2347 				if (!act) {
2348 					rc = -ENOMEM;
2349 					goto release;
2350 				}
2351 				*act = save;
2352 				break;
2353 			}
2354 
2355 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) {
2356 				/* can't happen */
2357 				rc = -EOPNOTSUPP;
2358 				NL_SET_ERR_MSG_MOD(extack, "Deliver action violates action order (can't happen)");
2359 				goto release;
2360 			}
2361 
2362 			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
2363 			if (IS_ERR(to_efv)) {
2364 				NL_SET_ERR_MSG_MOD(extack, "Mirred egress device not on switch");
2365 				rc = PTR_ERR(to_efv);
2366 				goto release;
2367 			}
2368 			rc = efx_tc_flower_external_mport(efx, to_efv);
2369 			if (rc < 0) {
2370 				NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port");
2371 				goto release;
2372 			}
2373 			act->dest_mport = rc;
2374 			act->deliver = 1;
2375 			rc = efx_mae_alloc_action_set(efx, act);
2376 			if (rc) {
2377 				NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (mirred)");
2378 				goto release;
2379 			}
2380 			list_add_tail(&act->list, &rule->acts.list);
2381 			act = NULL;
2382 			if (fa->id == FLOW_ACTION_REDIRECT)
2383 				break; /* end of the line */
2384 			/* Mirror, so continue on with saved act */
2385 			save.count = NULL;
2386 			act = kzalloc(sizeof(*act), GFP_USER);
2387 			if (!act) {
2388 				rc = -ENOMEM;
2389 				goto release;
2390 			}
2391 			*act = save;
2392 			break;
2393 		case FLOW_ACTION_VLAN_POP:
2394 			if (act->vlan_push) {
2395 				act->vlan_push--;
2396 			} else if (efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_POP)) {
2397 				act->vlan_pop++;
2398 			} else {
2399 				NL_SET_ERR_MSG_MOD(extack,
2400 						   "More than two VLAN pops, or action order violated");
2401 				rc = -EINVAL;
2402 				goto release;
2403 			}
2404 			break;
2405 		case FLOW_ACTION_VLAN_PUSH:
2406 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_PUSH)) {
2407 				rc = -EINVAL;
2408 				NL_SET_ERR_MSG_MOD(extack,
2409 						   "More than two VLAN pushes, or action order violated");
2410 				goto release;
2411 			}
2412 			tci = fa->vlan.vid & VLAN_VID_MASK;
2413 			tci |= fa->vlan.prio << VLAN_PRIO_SHIFT;
2414 			act->vlan_tci[act->vlan_push] = cpu_to_be16(tci);
2415 			act->vlan_proto[act->vlan_push] = fa->vlan.proto;
2416 			act->vlan_push++;
2417 			break;
2418 		case FLOW_ACTION_ADD:
2419 			rc = efx_tc_pedit_add(efx, act, fa, extack);
2420 			if (rc < 0)
2421 				goto release;
2422 			break;
2423 		case FLOW_ACTION_MANGLE:
2424 			rc = efx_tc_mangle(efx, act, fa, &mung, extack, &match);
2425 			if (rc < 0)
2426 				goto release;
2427 			break;
2428 		case FLOW_ACTION_TUNNEL_ENCAP:
2429 			if (encap_info) {
2430 				/* Can't specify encap multiple times.
2431 				 * If you want to overwrite an existing
2432 				 * encap_info, use an intervening
2433 				 * FLOW_ACTION_TUNNEL_DECAP to clear it.
2434 				 */
2435 				NL_SET_ERR_MSG_MOD(extack, "Tunnel key set when already set");
2436 				rc = -EINVAL;
2437 				goto release;
2438 			}
2439 			if (!fa->tunnel) {
2440 				NL_SET_ERR_MSG_MOD(extack, "Tunnel key set is missing key");
2441 				rc = -EOPNOTSUPP;
2442 				goto release;
2443 			}
2444 			encap_info = fa->tunnel;
2445 			break;
2446 		case FLOW_ACTION_TUNNEL_DECAP:
2447 			if (encap_info) {
2448 				encap_info = NULL;
2449 				break;
2450 			}
2451 			/* Since we don't support enc_key matches on ingress
2452 			 * (and if we did there'd be no tunnel-device to give
2453 			 * us a type), we can't offload a decap that's not
2454 			 * just undoing a previous encap action.
2455 			 */
2456 			NL_SET_ERR_MSG_MOD(extack, "Cannot offload tunnel decap action without tunnel device");
2457 			rc = -EOPNOTSUPP;
2458 			goto release;
2459 		case FLOW_ACTION_CT:
2460 			if (fa->ct.action != TCA_CT_ACT_NAT) {
2461 				rc = -EOPNOTSUPP;
2462 				NL_SET_ERR_MSG_FMT_MOD(extack, "Can only offload CT 'nat' action in RHS rules, not %d", fa->ct.action);
2463 				goto release;
2464 			}
2465 			act->do_nat = 1;
2466 			break;
2467 		default:
2468 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u",
2469 					       fa->id);
2470 			rc = -EOPNOTSUPP;
2471 			goto release;
2472 		}
2473 	}
2474 
2475 	rc = efx_tc_incomplete_mangle(&mung, extack);
2476 	if (rc < 0)
2477 		goto release;
2478 	if (act) {
2479 		/* Not shot/redirected, so deliver to default dest */
2480 		if (from_efv == EFX_EFV_PF)
2481 			/* Rule applies to traffic from the wire,
2482 			 * and default dest is thus the PF
2483 			 */
2484 			efx_mae_mport_uplink(efx, &act->dest_mport);
2485 		else
2486 			/* Representor, so rule applies to traffic from
2487 			 * representee, and default dest is thus the rep.
2488 			 * All reps use the same mport for delivery
2489 			 */
2490 			efx_mae_mport_mport(efx, efx->tc->reps_mport_id,
2491 					    &act->dest_mport);
2492 		act->deliver = 1;
2493 		rc = efx_mae_alloc_action_set(efx, act);
2494 		if (rc) {
2495 			NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)");
2496 			goto release;
2497 		}
2498 		list_add_tail(&act->list, &rule->acts.list);
2499 		act = NULL; /* Prevent double-free in error path */
2500 	}
2501 
2502 	netif_dbg(efx, drv, efx->net_dev,
2503 		  "Successfully parsed filter (cookie %lx)\n",
2504 		  tc->cookie);
2505 
2506 	rule->match = match;
2507 
2508 	rc = efx_mae_alloc_action_set_list(efx, &rule->acts);
2509 	if (rc) {
2510 		NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw");
2511 		goto release;
2512 	}
2513 	if (from_efv == EFX_EFV_PF)
2514 		/* PF netdev, so rule applies to traffic from wire */
2515 		rule->fallback = &efx->tc->facts.pf;
2516 	else
2517 		/* repdev, so rule applies to traffic from representee */
2518 		rule->fallback = &efx->tc->facts.reps;
2519 	if (!efx_tc_check_ready(efx, rule)) {
2520 		netif_dbg(efx, drv, efx->net_dev, "action not ready for hw\n");
2521 		acts_id = rule->fallback->fw_id;
2522 	} else {
2523 		netif_dbg(efx, drv, efx->net_dev, "ready for hw\n");
2524 		acts_id = rule->acts.fw_id;
2525 	}
2526 	rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC,
2527 				 acts_id, &rule->fw_id);
2528 	if (rc) {
2529 		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
2530 		goto release_acts;
2531 	}
2532 	return 0;
2533 
2534 release_acts:
2535 	efx_mae_free_action_set_list(efx, &rule->acts);
2536 release:
2537 	/* We failed to insert the rule, so free up any entries we created in
2538 	 * subsidiary tables.
2539 	 */
2540 	if (match.rid)
2541 		efx_tc_put_recirc_id(efx, match.rid);
2542 	if (act)
2543 		efx_tc_free_action_set(efx, act, false);
2544 	if (rule) {
2545 		if (!old)
2546 			rhashtable_remove_fast(&efx->tc->match_action_ht,
2547 					       &rule->linkage,
2548 					       efx_tc_match_action_ht_params);
2549 		efx_tc_free_action_set_list(efx, &rule->acts, false);
2550 	}
2551 	kfree(rule);
2552 	return rc;
2553 }
2554 
2555 static int efx_tc_flower_destroy(struct efx_nic *efx,
2556 				 struct net_device *net_dev,
2557 				 struct flow_cls_offload *tc)
2558 {
2559 	struct netlink_ext_ack *extack = tc->common.extack;
2560 	struct efx_tc_lhs_rule *lhs_rule;
2561 	struct efx_tc_flow_rule *rule;
2562 
2563 	lhs_rule = rhashtable_lookup_fast(&efx->tc->lhs_rule_ht, &tc->cookie,
2564 					  efx_tc_lhs_rule_ht_params);
2565 	if (lhs_rule) {
2566 		/* Remove it from HW */
2567 		efx_mae_remove_lhs_rule(efx, lhs_rule);
2568 		/* Delete it from SW */
2569 		efx_tc_flower_release_lhs_actions(efx, &lhs_rule->lhs_act);
2570 		rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &lhs_rule->linkage,
2571 				       efx_tc_lhs_rule_ht_params);
2572 		if (lhs_rule->match.encap)
2573 			efx_tc_flower_release_encap_match(efx, lhs_rule->match.encap);
2574 		netif_dbg(efx, drv, efx->net_dev, "Removed (lhs) filter %lx\n",
2575 			  lhs_rule->cookie);
2576 		kfree(lhs_rule);
2577 		return 0;
2578 	}
2579 
2580 	rule = rhashtable_lookup_fast(&efx->tc->match_action_ht, &tc->cookie,
2581 				      efx_tc_match_action_ht_params);
2582 	if (!rule) {
2583 		/* Only log a message if we're the ingress device.  Otherwise
2584 		 * it's a foreign filter and we might just not have been
2585 		 * interested (e.g. we might not have been the egress device
2586 		 * either).
2587 		 */
2588 		if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev)))
2589 			netif_warn(efx, drv, efx->net_dev,
2590 				   "Filter %lx not found to remove\n", tc->cookie);
2591 		NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules");
2592 		return -ENOENT;
2593 	}
2594 
2595 	/* Remove it from HW */
2596 	efx_tc_delete_rule(efx, rule);
2597 	/* Delete it from SW */
2598 	rhashtable_remove_fast(&efx->tc->match_action_ht, &rule->linkage,
2599 			       efx_tc_match_action_ht_params);
2600 	netif_dbg(efx, drv, efx->net_dev, "Removed filter %lx\n", rule->cookie);
2601 	kfree(rule);
2602 	return 0;
2603 }
2604 
2605 static int efx_tc_flower_stats(struct efx_nic *efx, struct net_device *net_dev,
2606 			       struct flow_cls_offload *tc)
2607 {
2608 	struct netlink_ext_ack *extack = tc->common.extack;
2609 	struct efx_tc_counter_index *ctr;
2610 	struct efx_tc_counter *cnt;
2611 	u64 packets, bytes;
2612 
2613 	ctr = efx_tc_flower_find_counter_index(efx, tc->cookie);
2614 	if (!ctr) {
2615 		/* See comment in efx_tc_flower_destroy() */
2616 		if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev)))
2617 			if (net_ratelimit())
2618 				netif_warn(efx, drv, efx->net_dev,
2619 					   "Filter %lx not found for stats\n",
2620 					   tc->cookie);
2621 		NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules");
2622 		return -ENOENT;
2623 	}
2624 	if (WARN_ON(!ctr->cnt)) /* can't happen */
2625 		return -EIO;
2626 	cnt = ctr->cnt;
2627 
2628 	spin_lock_bh(&cnt->lock);
2629 	/* Report only new pkts/bytes since last time TC asked */
2630 	packets = cnt->packets;
2631 	bytes = cnt->bytes;
2632 	flow_stats_update(&tc->stats, bytes - cnt->old_bytes,
2633 			  packets - cnt->old_packets, 0, cnt->touched,
2634 			  FLOW_ACTION_HW_STATS_DELAYED);
2635 	cnt->old_packets = packets;
2636 	cnt->old_bytes = bytes;
2637 	spin_unlock_bh(&cnt->lock);
2638 	return 0;
2639 }
2640 
2641 int efx_tc_flower(struct efx_nic *efx, struct net_device *net_dev,
2642 		  struct flow_cls_offload *tc, struct efx_rep *efv)
2643 {
2644 	int rc;
2645 
2646 	if (!efx->tc)
2647 		return -EOPNOTSUPP;
2648 
2649 	mutex_lock(&efx->tc->mutex);
2650 	switch (tc->command) {
2651 	case FLOW_CLS_REPLACE:
2652 		rc = efx_tc_flower_replace(efx, net_dev, tc, efv);
2653 		break;
2654 	case FLOW_CLS_DESTROY:
2655 		rc = efx_tc_flower_destroy(efx, net_dev, tc);
2656 		break;
2657 	case FLOW_CLS_STATS:
2658 		rc = efx_tc_flower_stats(efx, net_dev, tc);
2659 		break;
2660 	default:
2661 		rc = -EOPNOTSUPP;
2662 		break;
2663 	}
2664 	mutex_unlock(&efx->tc->mutex);
2665 	return rc;
2666 }
2667 
2668 static int efx_tc_configure_default_rule(struct efx_nic *efx, u32 ing_port,
2669 					 u32 eg_port, struct efx_tc_flow_rule *rule)
2670 {
2671 	struct efx_tc_action_set_list *acts = &rule->acts;
2672 	struct efx_tc_match *match = &rule->match;
2673 	struct efx_tc_action_set *act;
2674 	int rc;
2675 
2676 	match->value.ingress_port = ing_port;
2677 	match->mask.ingress_port = ~0;
2678 	act = kzalloc(sizeof(*act), GFP_KERNEL);
2679 	if (!act)
2680 		return -ENOMEM;
2681 	act->deliver = 1;
2682 	act->dest_mport = eg_port;
2683 	rc = efx_mae_alloc_action_set(efx, act);
2684 	if (rc)
2685 		goto fail1;
2686 	EFX_WARN_ON_PARANOID(!list_empty(&acts->list));
2687 	list_add_tail(&act->list, &acts->list);
2688 	rc = efx_mae_alloc_action_set_list(efx, acts);
2689 	if (rc)
2690 		goto fail2;
2691 	rc = efx_mae_insert_rule(efx, match, EFX_TC_PRIO_DFLT,
2692 				 acts->fw_id, &rule->fw_id);
2693 	if (rc)
2694 		goto fail3;
2695 	return 0;
2696 fail3:
2697 	efx_mae_free_action_set_list(efx, acts);
2698 fail2:
2699 	list_del(&act->list);
2700 	efx_mae_free_action_set(efx, act->fw_id);
2701 fail1:
2702 	kfree(act);
2703 	return rc;
2704 }
2705 
2706 static int efx_tc_configure_default_rule_pf(struct efx_nic *efx)
2707 {
2708 	struct efx_tc_flow_rule *rule = &efx->tc->dflt.pf;
2709 	u32 ing_port, eg_port;
2710 
2711 	efx_mae_mport_uplink(efx, &ing_port);
2712 	efx_mae_mport_wire(efx, &eg_port);
2713 	return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
2714 }
2715 
2716 static int efx_tc_configure_default_rule_wire(struct efx_nic *efx)
2717 {
2718 	struct efx_tc_flow_rule *rule = &efx->tc->dflt.wire;
2719 	u32 ing_port, eg_port;
2720 
2721 	efx_mae_mport_wire(efx, &ing_port);
2722 	efx_mae_mport_uplink(efx, &eg_port);
2723 	return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
2724 }
2725 
2726 int efx_tc_configure_default_rule_rep(struct efx_rep *efv)
2727 {
2728 	struct efx_tc_flow_rule *rule = &efv->dflt;
2729 	struct efx_nic *efx = efv->parent;
2730 	u32 ing_port, eg_port;
2731 
2732 	efx_mae_mport_mport(efx, efv->mport, &ing_port);
2733 	efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port);
2734 	return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
2735 }
2736 
2737 void efx_tc_deconfigure_default_rule(struct efx_nic *efx,
2738 				     struct efx_tc_flow_rule *rule)
2739 {
2740 	if (rule->fw_id != MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL)
2741 		efx_tc_delete_rule(efx, rule);
2742 	rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
2743 }
2744 
2745 static int efx_tc_configure_fallback_acts(struct efx_nic *efx, u32 eg_port,
2746 					  struct efx_tc_action_set_list *acts)
2747 {
2748 	struct efx_tc_action_set *act;
2749 	int rc;
2750 
2751 	act = kzalloc(sizeof(*act), GFP_KERNEL);
2752 	if (!act)
2753 		return -ENOMEM;
2754 	act->deliver = 1;
2755 	act->dest_mport = eg_port;
2756 	rc = efx_mae_alloc_action_set(efx, act);
2757 	if (rc)
2758 		goto fail1;
2759 	EFX_WARN_ON_PARANOID(!list_empty(&acts->list));
2760 	list_add_tail(&act->list, &acts->list);
2761 	rc = efx_mae_alloc_action_set_list(efx, acts);
2762 	if (rc)
2763 		goto fail2;
2764 	return 0;
2765 fail2:
2766 	list_del(&act->list);
2767 	efx_mae_free_action_set(efx, act->fw_id);
2768 fail1:
2769 	kfree(act);
2770 	return rc;
2771 }
2772 
2773 static int efx_tc_configure_fallback_acts_pf(struct efx_nic *efx)
2774 {
2775 	struct efx_tc_action_set_list *acts = &efx->tc->facts.pf;
2776 	u32 eg_port;
2777 
2778 	efx_mae_mport_uplink(efx, &eg_port);
2779 	return efx_tc_configure_fallback_acts(efx, eg_port, acts);
2780 }
2781 
2782 static int efx_tc_configure_fallback_acts_reps(struct efx_nic *efx)
2783 {
2784 	struct efx_tc_action_set_list *acts = &efx->tc->facts.reps;
2785 	u32 eg_port;
2786 
2787 	efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port);
2788 	return efx_tc_configure_fallback_acts(efx, eg_port, acts);
2789 }
2790 
2791 static void efx_tc_deconfigure_fallback_acts(struct efx_nic *efx,
2792 					     struct efx_tc_action_set_list *acts)
2793 {
2794 	efx_tc_free_action_set_list(efx, acts, true);
2795 }
2796 
2797 static int efx_tc_configure_rep_mport(struct efx_nic *efx)
2798 {
2799 	u32 rep_mport_label;
2800 	int rc;
2801 
2802 	rc = efx_mae_allocate_mport(efx, &efx->tc->reps_mport_id, &rep_mport_label);
2803 	if (rc)
2804 		return rc;
2805 	pci_dbg(efx->pci_dev, "created rep mport 0x%08x (0x%04x)\n",
2806 		efx->tc->reps_mport_id, rep_mport_label);
2807 	/* Use mport *selector* as vport ID */
2808 	efx_mae_mport_mport(efx, efx->tc->reps_mport_id,
2809 			    &efx->tc->reps_mport_vport_id);
2810 	return 0;
2811 }
2812 
2813 static void efx_tc_deconfigure_rep_mport(struct efx_nic *efx)
2814 {
2815 	efx_mae_free_mport(efx, efx->tc->reps_mport_id);
2816 	efx->tc->reps_mport_id = MAE_MPORT_SELECTOR_NULL;
2817 }
2818 
2819 int efx_tc_insert_rep_filters(struct efx_nic *efx)
2820 {
2821 	struct efx_filter_spec promisc, allmulti;
2822 	int rc;
2823 
2824 	if (efx->type->is_vf)
2825 		return 0;
2826 	if (!efx->tc)
2827 		return 0;
2828 	efx_filter_init_rx(&promisc, EFX_FILTER_PRI_REQUIRED, 0, 0);
2829 	efx_filter_set_uc_def(&promisc);
2830 	efx_filter_set_vport_id(&promisc, efx->tc->reps_mport_vport_id);
2831 	rc = efx_filter_insert_filter(efx, &promisc, false);
2832 	if (rc < 0)
2833 		return rc;
2834 	efx->tc->reps_filter_uc = rc;
2835 	efx_filter_init_rx(&allmulti, EFX_FILTER_PRI_REQUIRED, 0, 0);
2836 	efx_filter_set_mc_def(&allmulti);
2837 	efx_filter_set_vport_id(&allmulti, efx->tc->reps_mport_vport_id);
2838 	rc = efx_filter_insert_filter(efx, &allmulti, false);
2839 	if (rc < 0)
2840 		return rc;
2841 	efx->tc->reps_filter_mc = rc;
2842 	return 0;
2843 }
2844 
2845 void efx_tc_remove_rep_filters(struct efx_nic *efx)
2846 {
2847 	if (efx->type->is_vf)
2848 		return;
2849 	if (!efx->tc)
2850 		return;
2851 	if (efx->tc->reps_filter_mc >= 0)
2852 		efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_mc);
2853 	efx->tc->reps_filter_mc = -1;
2854 	if (efx->tc->reps_filter_uc >= 0)
2855 		efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_uc);
2856 	efx->tc->reps_filter_uc = -1;
2857 }
2858 
2859 int efx_init_tc(struct efx_nic *efx)
2860 {
2861 	int rc;
2862 
2863 	rc = efx_mae_get_caps(efx, efx->tc->caps);
2864 	if (rc)
2865 		return rc;
2866 	if (efx->tc->caps->match_field_count > MAE_NUM_FIELDS)
2867 		/* Firmware supports some match fields the driver doesn't know
2868 		 * about.  Not fatal, unless any of those fields are required
2869 		 * (MAE_FIELD_SUPPORTED_MATCH_ALWAYS) but if so we don't know.
2870 		 */
2871 		netif_warn(efx, probe, efx->net_dev,
2872 			   "FW reports additional match fields %u\n",
2873 			   efx->tc->caps->match_field_count);
2874 	if (efx->tc->caps->action_prios < EFX_TC_PRIO__NUM) {
2875 		netif_err(efx, probe, efx->net_dev,
2876 			  "Too few action prios supported (have %u, need %u)\n",
2877 			  efx->tc->caps->action_prios, EFX_TC_PRIO__NUM);
2878 		return -EIO;
2879 	}
2880 	rc = efx_tc_configure_default_rule_pf(efx);
2881 	if (rc)
2882 		return rc;
2883 	rc = efx_tc_configure_default_rule_wire(efx);
2884 	if (rc)
2885 		return rc;
2886 	rc = efx_tc_configure_rep_mport(efx);
2887 	if (rc)
2888 		return rc;
2889 	rc = efx_tc_configure_fallback_acts_pf(efx);
2890 	if (rc)
2891 		return rc;
2892 	rc = efx_tc_configure_fallback_acts_reps(efx);
2893 	if (rc)
2894 		return rc;
2895 	rc = efx_mae_get_tables(efx);
2896 	if (rc)
2897 		return rc;
2898 	rc = flow_indr_dev_register(efx_tc_indr_setup_cb, efx);
2899 	if (rc)
2900 		goto out_free;
2901 	efx->tc->up = true;
2902 	return 0;
2903 out_free:
2904 	efx_mae_free_tables(efx);
2905 	return rc;
2906 }
2907 
2908 void efx_fini_tc(struct efx_nic *efx)
2909 {
2910 	/* We can get called even if efx_init_struct_tc() failed */
2911 	if (!efx->tc)
2912 		return;
2913 	if (efx->tc->up)
2914 		flow_indr_dev_unregister(efx_tc_indr_setup_cb, efx, efx_tc_block_unbind);
2915 	efx_tc_deconfigure_rep_mport(efx);
2916 	efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.pf);
2917 	efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.wire);
2918 	efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.pf);
2919 	efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.reps);
2920 	efx->tc->up = false;
2921 	efx_mae_free_tables(efx);
2922 }
2923 
2924 /* At teardown time, all TC filter rules (and thus all resources they created)
2925  * should already have been removed.  If we find any in our hashtables, make a
2926  * cursory attempt to clean up the software side.
2927  */
2928 static void efx_tc_encap_match_free(void *ptr, void *__unused)
2929 {
2930 	struct efx_tc_encap_match *encap = ptr;
2931 
2932 	WARN_ON(refcount_read(&encap->ref));
2933 	kfree(encap);
2934 }
2935 
2936 static void efx_tc_recirc_free(void *ptr, void *arg)
2937 {
2938 	struct efx_tc_recirc_id *rid = ptr;
2939 	struct efx_nic *efx = arg;
2940 
2941 	WARN_ON(refcount_read(&rid->ref));
2942 	ida_free(&efx->tc->recirc_ida, rid->fw_id);
2943 	kfree(rid);
2944 }
2945 
2946 static void efx_tc_lhs_free(void *ptr, void *arg)
2947 {
2948 	struct efx_tc_lhs_rule *rule = ptr;
2949 	struct efx_nic *efx = arg;
2950 
2951 	netif_err(efx, drv, efx->net_dev,
2952 		  "tc lhs_rule %lx still present at teardown, removing\n",
2953 		  rule->cookie);
2954 
2955 	if (rule->lhs_act.zone)
2956 		efx_tc_ct_unregister_zone(efx, rule->lhs_act.zone);
2957 	if (rule->lhs_act.count)
2958 		efx_tc_flower_put_counter_index(efx, rule->lhs_act.count);
2959 	efx_mae_remove_lhs_rule(efx, rule);
2960 
2961 	kfree(rule);
2962 }
2963 
2964 static void efx_tc_mac_free(void *ptr, void *__unused)
2965 {
2966 	struct efx_tc_mac_pedit_action *ped = ptr;
2967 
2968 	WARN_ON(refcount_read(&ped->ref));
2969 	kfree(ped);
2970 }
2971 
2972 static void efx_tc_flow_free(void *ptr, void *arg)
2973 {
2974 	struct efx_tc_flow_rule *rule = ptr;
2975 	struct efx_nic *efx = arg;
2976 
2977 	netif_err(efx, drv, efx->net_dev,
2978 		  "tc rule %lx still present at teardown, removing\n",
2979 		  rule->cookie);
2980 
2981 	/* Also releases entries in subsidiary tables */
2982 	efx_tc_delete_rule(efx, rule);
2983 
2984 	kfree(rule);
2985 }
2986 
2987 int efx_init_struct_tc(struct efx_nic *efx)
2988 {
2989 	int rc;
2990 
2991 	if (efx->type->is_vf)
2992 		return 0;
2993 
2994 	efx->tc = kzalloc(sizeof(*efx->tc), GFP_KERNEL);
2995 	if (!efx->tc)
2996 		return -ENOMEM;
2997 	efx->tc->caps = kzalloc(sizeof(struct mae_caps), GFP_KERNEL);
2998 	if (!efx->tc->caps) {
2999 		rc = -ENOMEM;
3000 		goto fail_alloc_caps;
3001 	}
3002 	INIT_LIST_HEAD(&efx->tc->block_list);
3003 
3004 	mutex_init(&efx->tc->mutex);
3005 	init_waitqueue_head(&efx->tc->flush_wq);
3006 	rc = efx_tc_init_encap_actions(efx);
3007 	if (rc < 0)
3008 		goto fail_encap_actions;
3009 	rc = efx_tc_init_counters(efx);
3010 	if (rc < 0)
3011 		goto fail_counters;
3012 	rc = rhashtable_init(&efx->tc->mac_ht, &efx_tc_mac_ht_params);
3013 	if (rc < 0)
3014 		goto fail_mac_ht;
3015 	rc = rhashtable_init(&efx->tc->encap_match_ht, &efx_tc_encap_match_ht_params);
3016 	if (rc < 0)
3017 		goto fail_encap_match_ht;
3018 	rc = rhashtable_init(&efx->tc->match_action_ht, &efx_tc_match_action_ht_params);
3019 	if (rc < 0)
3020 		goto fail_match_action_ht;
3021 	rc = rhashtable_init(&efx->tc->lhs_rule_ht, &efx_tc_lhs_rule_ht_params);
3022 	if (rc < 0)
3023 		goto fail_lhs_rule_ht;
3024 	rc = efx_tc_init_conntrack(efx);
3025 	if (rc < 0)
3026 		goto fail_conntrack;
3027 	rc = rhashtable_init(&efx->tc->recirc_ht, &efx_tc_recirc_ht_params);
3028 	if (rc < 0)
3029 		goto fail_recirc_ht;
3030 	ida_init(&efx->tc->recirc_ida);
3031 	efx->tc->reps_filter_uc = -1;
3032 	efx->tc->reps_filter_mc = -1;
3033 	INIT_LIST_HEAD(&efx->tc->dflt.pf.acts.list);
3034 	efx->tc->dflt.pf.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
3035 	INIT_LIST_HEAD(&efx->tc->dflt.wire.acts.list);
3036 	efx->tc->dflt.wire.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
3037 	INIT_LIST_HEAD(&efx->tc->facts.pf.list);
3038 	efx->tc->facts.pf.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL;
3039 	INIT_LIST_HEAD(&efx->tc->facts.reps.list);
3040 	efx->tc->facts.reps.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL;
3041 	efx->extra_channel_type[EFX_EXTRA_CHANNEL_TC] = &efx_tc_channel_type;
3042 	return 0;
3043 fail_recirc_ht:
3044 	efx_tc_destroy_conntrack(efx);
3045 fail_conntrack:
3046 	rhashtable_destroy(&efx->tc->lhs_rule_ht);
3047 fail_lhs_rule_ht:
3048 	rhashtable_destroy(&efx->tc->match_action_ht);
3049 fail_match_action_ht:
3050 	rhashtable_destroy(&efx->tc->encap_match_ht);
3051 fail_encap_match_ht:
3052 	rhashtable_destroy(&efx->tc->mac_ht);
3053 fail_mac_ht:
3054 	efx_tc_destroy_counters(efx);
3055 fail_counters:
3056 	efx_tc_destroy_encap_actions(efx);
3057 fail_encap_actions:
3058 	mutex_destroy(&efx->tc->mutex);
3059 	kfree(efx->tc->caps);
3060 fail_alloc_caps:
3061 	kfree(efx->tc);
3062 	efx->tc = NULL;
3063 	return rc;
3064 }
3065 
3066 void efx_fini_struct_tc(struct efx_nic *efx)
3067 {
3068 	if (!efx->tc)
3069 		return;
3070 
3071 	mutex_lock(&efx->tc->mutex);
3072 	EFX_WARN_ON_PARANOID(efx->tc->dflt.pf.fw_id !=
3073 			     MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL);
3074 	EFX_WARN_ON_PARANOID(efx->tc->dflt.wire.fw_id !=
3075 			     MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL);
3076 	EFX_WARN_ON_PARANOID(efx->tc->facts.pf.fw_id !=
3077 			     MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL);
3078 	EFX_WARN_ON_PARANOID(efx->tc->facts.reps.fw_id !=
3079 			     MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL);
3080 	rhashtable_free_and_destroy(&efx->tc->lhs_rule_ht, efx_tc_lhs_free, efx);
3081 	rhashtable_free_and_destroy(&efx->tc->match_action_ht, efx_tc_flow_free,
3082 				    efx);
3083 	rhashtable_free_and_destroy(&efx->tc->encap_match_ht,
3084 				    efx_tc_encap_match_free, NULL);
3085 	efx_tc_fini_conntrack(efx);
3086 	rhashtable_free_and_destroy(&efx->tc->recirc_ht, efx_tc_recirc_free, efx);
3087 	WARN_ON(!ida_is_empty(&efx->tc->recirc_ida));
3088 	ida_destroy(&efx->tc->recirc_ida);
3089 	rhashtable_free_and_destroy(&efx->tc->mac_ht, efx_tc_mac_free, NULL);
3090 	efx_tc_fini_counters(efx);
3091 	efx_tc_fini_encap_actions(efx);
3092 	mutex_unlock(&efx->tc->mutex);
3093 	mutex_destroy(&efx->tc->mutex);
3094 	kfree(efx->tc->caps);
3095 	kfree(efx->tc);
3096 	efx->tc = NULL;
3097 }
3098