xref: /linux/drivers/net/ethernet/sfc/tc_conntrack.c (revision 0ea5c948cb64bab5bc7a5516774eb8536f05aa0d)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3  * Driver for Solarflare network controllers and boards
4  * Copyright 2023, Advanced Micro Devices, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published
8  * by the Free Software Foundation, incorporated herein by reference.
9  */
10 
11 #include "tc_conntrack.h"
12 #include "tc.h"
13 #include "mae.h"
14 
15 static int efx_tc_flow_block(enum tc_setup_type type, void *type_data,
16 			     void *cb_priv);
17 
18 static const struct rhashtable_params efx_tc_ct_zone_ht_params = {
19 	.key_len	= offsetof(struct efx_tc_ct_zone, linkage),
20 	.key_offset	= 0,
21 	.head_offset	= offsetof(struct efx_tc_ct_zone, linkage),
22 };
23 
24 static const struct rhashtable_params efx_tc_ct_ht_params = {
25 	.key_len	= offsetof(struct efx_tc_ct_entry, linkage),
26 	.key_offset	= 0,
27 	.head_offset	= offsetof(struct efx_tc_ct_entry, linkage),
28 };
29 
efx_tc_ct_zone_free(void * ptr,void * arg)30 static void efx_tc_ct_zone_free(void *ptr, void *arg)
31 {
32 	struct efx_tc_ct_zone *zone = ptr;
33 	struct efx_nic *efx = zone->efx;
34 
35 	netif_err(efx, drv, efx->net_dev,
36 		  "tc ct_zone %u still present at teardown, removing\n",
37 		  zone->zone);
38 
39 	nf_flow_table_offload_del_cb(zone->nf_ft, efx_tc_flow_block, zone);
40 	kfree(zone);
41 }
42 
efx_tc_ct_free(void * ptr,void * arg)43 static void efx_tc_ct_free(void *ptr, void *arg)
44 {
45 	struct efx_tc_ct_entry *conn = ptr;
46 	struct efx_nic *efx = arg;
47 
48 	netif_err(efx, drv, efx->net_dev,
49 		  "tc ct_entry %lx still present at teardown\n",
50 		  conn->cookie);
51 
52 	/* We can release the counter, but we can't remove the CT itself
53 	 * from hardware because the table meta is already gone.
54 	 */
55 	efx_tc_flower_release_counter(efx, conn->cnt);
56 	kfree(conn);
57 }
58 
efx_tc_init_conntrack(struct efx_nic * efx)59 int efx_tc_init_conntrack(struct efx_nic *efx)
60 {
61 	int rc;
62 
63 	rc = rhashtable_init(&efx->tc->ct_zone_ht, &efx_tc_ct_zone_ht_params);
64 	if (rc < 0)
65 		goto fail_ct_zone_ht;
66 	rc = rhashtable_init(&efx->tc->ct_ht, &efx_tc_ct_ht_params);
67 	if (rc < 0)
68 		goto fail_ct_ht;
69 	return 0;
70 fail_ct_ht:
71 	rhashtable_destroy(&efx->tc->ct_zone_ht);
72 fail_ct_zone_ht:
73 	return rc;
74 }
75 
76 /* Only call this in init failure teardown.
77  * Normal exit should fini instead as there may be entries in the table.
78  */
efx_tc_destroy_conntrack(struct efx_nic * efx)79 void efx_tc_destroy_conntrack(struct efx_nic *efx)
80 {
81 	rhashtable_destroy(&efx->tc->ct_ht);
82 	rhashtable_destroy(&efx->tc->ct_zone_ht);
83 }
84 
efx_tc_fini_conntrack(struct efx_nic * efx)85 void efx_tc_fini_conntrack(struct efx_nic *efx)
86 {
87 	rhashtable_free_and_destroy(&efx->tc->ct_zone_ht, efx_tc_ct_zone_free, NULL);
88 	rhashtable_free_and_destroy(&efx->tc->ct_ht, efx_tc_ct_free, efx);
89 }
90 
91 #define EFX_NF_TCP_FLAG(flg)	cpu_to_be16(be32_to_cpu(TCP_FLAG_##flg) >> 16)
92 
efx_tc_ct_parse_match(struct efx_nic * efx,struct flow_rule * fr,struct efx_tc_ct_entry * conn)93 static int efx_tc_ct_parse_match(struct efx_nic *efx, struct flow_rule *fr,
94 				 struct efx_tc_ct_entry *conn)
95 {
96 	struct flow_dissector *dissector = fr->match.dissector;
97 	unsigned char ipv = 0;
98 	bool tcp = false;
99 
100 	if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_CONTROL)) {
101 		struct flow_match_control fm;
102 
103 		flow_rule_match_control(fr, &fm);
104 		if (IS_ALL_ONES(fm.mask->addr_type))
105 			switch (fm.key->addr_type) {
106 			case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
107 				ipv = 4;
108 				break;
109 			case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
110 				ipv = 6;
111 				break;
112 			default:
113 				break;
114 			}
115 	}
116 
117 	if (!ipv) {
118 		netif_dbg(efx, drv, efx->net_dev,
119 			  "Conntrack missing ipv specification\n");
120 		return -EOPNOTSUPP;
121 	}
122 
123 	if (dissector->used_keys &
124 	    ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
125 	      BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
126 	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
127 	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
128 	      BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
129 	      BIT_ULL(FLOW_DISSECTOR_KEY_TCP) |
130 	      BIT_ULL(FLOW_DISSECTOR_KEY_META))) {
131 		netif_dbg(efx, drv, efx->net_dev,
132 			  "Unsupported conntrack keys %#llx\n",
133 			  dissector->used_keys);
134 		return -EOPNOTSUPP;
135 	}
136 
137 	if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_BASIC)) {
138 		struct flow_match_basic fm;
139 
140 		flow_rule_match_basic(fr, &fm);
141 		if (!IS_ALL_ONES(fm.mask->n_proto)) {
142 			netif_dbg(efx, drv, efx->net_dev,
143 				  "Conntrack eth_proto is not exact-match; mask %04x\n",
144 				   ntohs(fm.mask->n_proto));
145 			return -EOPNOTSUPP;
146 		}
147 		conn->eth_proto = fm.key->n_proto;
148 		if (conn->eth_proto != (ipv == 4 ? htons(ETH_P_IP)
149 						 : htons(ETH_P_IPV6))) {
150 			netif_dbg(efx, drv, efx->net_dev,
151 				  "Conntrack eth_proto is not IPv%u, is %04x\n",
152 				   ipv, ntohs(conn->eth_proto));
153 			return -EOPNOTSUPP;
154 		}
155 		if (!IS_ALL_ONES(fm.mask->ip_proto)) {
156 			netif_dbg(efx, drv, efx->net_dev,
157 				  "Conntrack ip_proto is not exact-match; mask %02x\n",
158 				   fm.mask->ip_proto);
159 			return -EOPNOTSUPP;
160 		}
161 		conn->ip_proto = fm.key->ip_proto;
162 		switch (conn->ip_proto) {
163 		case IPPROTO_TCP:
164 			tcp = true;
165 			break;
166 		case IPPROTO_UDP:
167 			break;
168 		default:
169 			netif_dbg(efx, drv, efx->net_dev,
170 				  "Conntrack ip_proto not TCP or UDP, is %02x\n",
171 				   conn->ip_proto);
172 			return -EOPNOTSUPP;
173 		}
174 	} else {
175 		netif_dbg(efx, drv, efx->net_dev,
176 			  "Conntrack missing eth_proto, ip_proto\n");
177 		return -EOPNOTSUPP;
178 	}
179 
180 	if (ipv == 4 && flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
181 		struct flow_match_ipv4_addrs fm;
182 
183 		flow_rule_match_ipv4_addrs(fr, &fm);
184 		if (!IS_ALL_ONES(fm.mask->src)) {
185 			netif_dbg(efx, drv, efx->net_dev,
186 				  "Conntrack ipv4.src is not exact-match; mask %08x\n",
187 				   ntohl(fm.mask->src));
188 			return -EOPNOTSUPP;
189 		}
190 		conn->src_ip = fm.key->src;
191 		if (!IS_ALL_ONES(fm.mask->dst)) {
192 			netif_dbg(efx, drv, efx->net_dev,
193 				  "Conntrack ipv4.dst is not exact-match; mask %08x\n",
194 				   ntohl(fm.mask->dst));
195 			return -EOPNOTSUPP;
196 		}
197 		conn->dst_ip = fm.key->dst;
198 	} else if (ipv == 6 && flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
199 		struct flow_match_ipv6_addrs fm;
200 
201 		flow_rule_match_ipv6_addrs(fr, &fm);
202 		if (!efx_ipv6_addr_all_ones(&fm.mask->src)) {
203 			netif_dbg(efx, drv, efx->net_dev,
204 				  "Conntrack ipv6.src is not exact-match; mask %pI6\n",
205 				   &fm.mask->src);
206 			return -EOPNOTSUPP;
207 		}
208 		conn->src_ip6 = fm.key->src;
209 		if (!efx_ipv6_addr_all_ones(&fm.mask->dst)) {
210 			netif_dbg(efx, drv, efx->net_dev,
211 				  "Conntrack ipv6.dst is not exact-match; mask %pI6\n",
212 				   &fm.mask->dst);
213 			return -EOPNOTSUPP;
214 		}
215 		conn->dst_ip6 = fm.key->dst;
216 	} else {
217 		netif_dbg(efx, drv, efx->net_dev,
218 			  "Conntrack missing IPv%u addrs\n", ipv);
219 		return -EOPNOTSUPP;
220 	}
221 
222 	if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_PORTS)) {
223 		struct flow_match_ports fm;
224 
225 		flow_rule_match_ports(fr, &fm);
226 		if (!IS_ALL_ONES(fm.mask->src)) {
227 			netif_dbg(efx, drv, efx->net_dev,
228 				  "Conntrack ports.src is not exact-match; mask %04x\n",
229 				   ntohs(fm.mask->src));
230 			return -EOPNOTSUPP;
231 		}
232 		conn->l4_sport = fm.key->src;
233 		if (!IS_ALL_ONES(fm.mask->dst)) {
234 			netif_dbg(efx, drv, efx->net_dev,
235 				  "Conntrack ports.dst is not exact-match; mask %04x\n",
236 				   ntohs(fm.mask->dst));
237 			return -EOPNOTSUPP;
238 		}
239 		conn->l4_dport = fm.key->dst;
240 	} else {
241 		netif_dbg(efx, drv, efx->net_dev, "Conntrack missing L4 ports\n");
242 		return -EOPNOTSUPP;
243 	}
244 
245 	if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_TCP)) {
246 		__be16 tcp_interesting_flags;
247 		struct flow_match_tcp fm;
248 
249 		if (!tcp) {
250 			netif_dbg(efx, drv, efx->net_dev,
251 				  "Conntrack matching on TCP keys but ipproto is not tcp\n");
252 			return -EOPNOTSUPP;
253 		}
254 		flow_rule_match_tcp(fr, &fm);
255 		tcp_interesting_flags = EFX_NF_TCP_FLAG(SYN) |
256 					EFX_NF_TCP_FLAG(RST) |
257 					EFX_NF_TCP_FLAG(FIN);
258 		/* If any of the tcp_interesting_flags is set, we always
259 		 * inhibit CT lookup in LHS (so SW can update CT table).
260 		 */
261 		if (fm.key->flags & tcp_interesting_flags) {
262 			netif_dbg(efx, drv, efx->net_dev,
263 				  "Unsupported conntrack tcp.flags %04x/%04x\n",
264 				   ntohs(fm.key->flags), ntohs(fm.mask->flags));
265 			return -EOPNOTSUPP;
266 		}
267 		/* Other TCP flags cannot be filtered at CT */
268 		if (fm.mask->flags & ~tcp_interesting_flags) {
269 			netif_dbg(efx, drv, efx->net_dev,
270 				  "Unsupported conntrack tcp.flags %04x/%04x\n",
271 				   ntohs(fm.key->flags), ntohs(fm.mask->flags));
272 			return -EOPNOTSUPP;
273 		}
274 	}
275 
276 	return 0;
277 }
278 
279 /**
280  * struct efx_tc_ct_mangler_state - tracks which fields have been pedited
281  *
282  * @ipv4: IP source or destination addr has been set
283  * @tcpudp: TCP/UDP source or destination port has been set
284  */
285 struct efx_tc_ct_mangler_state {
286 	u8 ipv4:1;
287 	u8 tcpudp:1;
288 };
289 
efx_tc_ct_mangle(struct efx_nic * efx,struct efx_tc_ct_entry * conn,const struct flow_action_entry * fa,struct efx_tc_ct_mangler_state * mung)290 static int efx_tc_ct_mangle(struct efx_nic *efx, struct efx_tc_ct_entry *conn,
291 			    const struct flow_action_entry *fa,
292 			    struct efx_tc_ct_mangler_state *mung)
293 {
294 	/* Is this the first mangle we've processed for this rule? */
295 	bool first = !(mung->ipv4 || mung->tcpudp);
296 	bool dnat = false;
297 
298 	switch (fa->mangle.htype) {
299 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
300 		switch (fa->mangle.offset) {
301 		case offsetof(struct iphdr, daddr):
302 			dnat = true;
303 			fallthrough;
304 		case offsetof(struct iphdr, saddr):
305 			if (fa->mangle.mask)
306 				return -EOPNOTSUPP;
307 			conn->nat_ip = htonl(fa->mangle.val);
308 			mung->ipv4 = 1;
309 			break;
310 		default:
311 			return -EOPNOTSUPP;
312 		}
313 		break;
314 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
315 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
316 		/* Both struct tcphdr and struct udphdr start with
317 		 *	__be16 source;
318 		 *	__be16 dest;
319 		 * so we can use the same code for both.
320 		 */
321 		switch (fa->mangle.offset) {
322 		case offsetof(struct tcphdr, dest):
323 			BUILD_BUG_ON(offsetof(struct tcphdr, dest) !=
324 				     offsetof(struct udphdr, dest));
325 			dnat = true;
326 			fallthrough;
327 		case offsetof(struct tcphdr, source):
328 			BUILD_BUG_ON(offsetof(struct tcphdr, source) !=
329 				     offsetof(struct udphdr, source));
330 			if (~fa->mangle.mask != 0xffff)
331 				return -EOPNOTSUPP;
332 			conn->l4_natport = htons(fa->mangle.val);
333 			mung->tcpudp = 1;
334 			break;
335 		default:
336 			return -EOPNOTSUPP;
337 		}
338 		break;
339 	default:
340 		return -EOPNOTSUPP;
341 	}
342 	/* first mangle tells us whether this is SNAT or DNAT;
343 	 * subsequent mangles must match that
344 	 */
345 	if (first)
346 		conn->dnat = dnat;
347 	else if (conn->dnat != dnat)
348 		return -EOPNOTSUPP;
349 	return 0;
350 }
351 
efx_tc_ct_replace(struct efx_tc_ct_zone * ct_zone,struct flow_cls_offload * tc)352 static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone,
353 			     struct flow_cls_offload *tc)
354 {
355 	struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
356 	struct efx_tc_ct_mangler_state mung = {};
357 	struct efx_tc_ct_entry *conn, *old;
358 	struct efx_nic *efx = ct_zone->efx;
359 	const struct flow_action_entry *fa;
360 	struct efx_tc_counter *cnt;
361 	int rc, i;
362 
363 	if (WARN_ON(!efx->tc))
364 		return -ENETDOWN;
365 	if (WARN_ON(!efx->tc->up))
366 		return -ENETDOWN;
367 
368 	conn = kzalloc(sizeof(*conn), GFP_USER);
369 	if (!conn)
370 		return -ENOMEM;
371 	conn->cookie = tc->cookie;
372 	old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_ht,
373 						&conn->linkage,
374 						efx_tc_ct_ht_params);
375 	if (IS_ERR(old)) {
376 		rc = PTR_ERR(old);
377 		goto release;
378 	} else if (old) {
379 		netif_dbg(efx, drv, efx->net_dev,
380 			  "Already offloaded conntrack (cookie %lx)\n", tc->cookie);
381 		rc = -EEXIST;
382 		goto release;
383 	}
384 
385 	/* Parse match */
386 	conn->zone = ct_zone;
387 	rc = efx_tc_ct_parse_match(efx, fr, conn);
388 	if (rc)
389 		goto release;
390 
391 	/* Parse actions */
392 	flow_action_for_each(i, fa, &fr->action) {
393 		switch (fa->id) {
394 		case FLOW_ACTION_CT_METADATA:
395 			conn->mark = fa->ct_metadata.mark;
396 			if (memchr_inv(fa->ct_metadata.labels, 0, sizeof(fa->ct_metadata.labels))) {
397 				netif_dbg(efx, drv, efx->net_dev,
398 					  "Setting CT label not supported\n");
399 				rc = -EOPNOTSUPP;
400 				goto release;
401 			}
402 			break;
403 		case FLOW_ACTION_MANGLE:
404 			if (conn->eth_proto != htons(ETH_P_IP)) {
405 				netif_dbg(efx, drv, efx->net_dev,
406 					  "NAT only supported for IPv4\n");
407 				rc = -EOPNOTSUPP;
408 				goto release;
409 			}
410 			rc = efx_tc_ct_mangle(efx, conn, fa, &mung);
411 			if (rc)
412 				goto release;
413 			break;
414 		default:
415 			netif_dbg(efx, drv, efx->net_dev,
416 				  "Unhandled action %u for conntrack\n", fa->id);
417 			rc = -EOPNOTSUPP;
418 			goto release;
419 		}
420 	}
421 
422 	/* fill in defaults for unmangled values */
423 	if (!mung.ipv4)
424 		conn->nat_ip = conn->dnat ? conn->dst_ip : conn->src_ip;
425 	if (!mung.tcpudp)
426 		conn->l4_natport = conn->dnat ? conn->l4_dport : conn->l4_sport;
427 
428 	cnt = efx_tc_flower_allocate_counter(efx, EFX_TC_COUNTER_TYPE_CT);
429 	if (IS_ERR(cnt)) {
430 		rc = PTR_ERR(cnt);
431 		goto release;
432 	}
433 	conn->cnt = cnt;
434 
435 	rc = efx_mae_insert_ct(efx, conn);
436 	if (rc) {
437 		netif_dbg(efx, drv, efx->net_dev,
438 			  "Failed to insert conntrack, %d\n", rc);
439 		goto release;
440 	}
441 	mutex_lock(&ct_zone->mutex);
442 	list_add_tail(&conn->list, &ct_zone->cts);
443 	mutex_unlock(&ct_zone->mutex);
444 	return 0;
445 release:
446 	if (conn->cnt)
447 		efx_tc_flower_release_counter(efx, conn->cnt);
448 	if (!old)
449 		rhashtable_remove_fast(&efx->tc->ct_ht, &conn->linkage,
450 				       efx_tc_ct_ht_params);
451 	kfree(conn);
452 	return rc;
453 }
454 
455 /* Caller must follow with efx_tc_ct_remove_finish() after RCU grace period! */
efx_tc_ct_remove(struct efx_nic * efx,struct efx_tc_ct_entry * conn)456 static void efx_tc_ct_remove(struct efx_nic *efx, struct efx_tc_ct_entry *conn)
457 {
458 	int rc;
459 
460 	/* Remove it from HW */
461 	rc = efx_mae_remove_ct(efx, conn);
462 	/* Delete it from SW */
463 	rhashtable_remove_fast(&efx->tc->ct_ht, &conn->linkage,
464 			       efx_tc_ct_ht_params);
465 	if (rc) {
466 		netif_err(efx, drv, efx->net_dev,
467 			  "Failed to remove conntrack %lx from hw, rc %d\n",
468 			  conn->cookie, rc);
469 	} else {
470 		netif_dbg(efx, drv, efx->net_dev, "Removed conntrack %lx\n",
471 			  conn->cookie);
472 	}
473 }
474 
efx_tc_ct_remove_finish(struct efx_nic * efx,struct efx_tc_ct_entry * conn)475 static void efx_tc_ct_remove_finish(struct efx_nic *efx, struct efx_tc_ct_entry *conn)
476 {
477 	/* Remove related CT counter.  This is delayed after the conn object we
478 	 * are working with has been successfully removed.  This protects the
479 	 * counter from being used-after-free inside efx_tc_ct_stats.
480 	 */
481 	efx_tc_flower_release_counter(efx, conn->cnt);
482 	kfree(conn);
483 }
484 
efx_tc_ct_destroy(struct efx_tc_ct_zone * ct_zone,struct flow_cls_offload * tc)485 static int efx_tc_ct_destroy(struct efx_tc_ct_zone *ct_zone,
486 			     struct flow_cls_offload *tc)
487 {
488 	struct efx_nic *efx = ct_zone->efx;
489 	struct efx_tc_ct_entry *conn;
490 
491 	conn = rhashtable_lookup_fast(&efx->tc->ct_ht, &tc->cookie,
492 				      efx_tc_ct_ht_params);
493 	if (!conn) {
494 		netif_warn(efx, drv, efx->net_dev,
495 			   "Conntrack %lx not found to remove\n", tc->cookie);
496 		return -ENOENT;
497 	}
498 
499 	mutex_lock(&ct_zone->mutex);
500 	list_del(&conn->list);
501 	efx_tc_ct_remove(efx, conn);
502 	mutex_unlock(&ct_zone->mutex);
503 	synchronize_rcu();
504 	efx_tc_ct_remove_finish(efx, conn);
505 	return 0;
506 }
507 
efx_tc_ct_stats(struct efx_tc_ct_zone * ct_zone,struct flow_cls_offload * tc)508 static int efx_tc_ct_stats(struct efx_tc_ct_zone *ct_zone,
509 			   struct flow_cls_offload *tc)
510 {
511 	struct efx_nic *efx = ct_zone->efx;
512 	struct efx_tc_ct_entry *conn;
513 	struct efx_tc_counter *cnt;
514 
515 	rcu_read_lock();
516 	conn = rhashtable_lookup_fast(&efx->tc->ct_ht, &tc->cookie,
517 				      efx_tc_ct_ht_params);
518 	if (!conn) {
519 		netif_warn(efx, drv, efx->net_dev,
520 			   "Conntrack %lx not found for stats\n", tc->cookie);
521 		rcu_read_unlock();
522 		return -ENOENT;
523 	}
524 
525 	cnt = conn->cnt;
526 	spin_lock_bh(&cnt->lock);
527 	/* Report only last use */
528 	flow_stats_update(&tc->stats, 0, 0, 0, cnt->touched,
529 			  FLOW_ACTION_HW_STATS_DELAYED);
530 	spin_unlock_bh(&cnt->lock);
531 	rcu_read_unlock();
532 
533 	return 0;
534 }
535 
efx_tc_flow_block(enum tc_setup_type type,void * type_data,void * cb_priv)536 static int efx_tc_flow_block(enum tc_setup_type type, void *type_data,
537 			     void *cb_priv)
538 {
539 	struct flow_cls_offload *tcb = type_data;
540 	struct efx_tc_ct_zone *ct_zone = cb_priv;
541 
542 	if (type != TC_SETUP_CLSFLOWER)
543 		return -EOPNOTSUPP;
544 
545 	switch (tcb->command) {
546 	case FLOW_CLS_REPLACE:
547 		return efx_tc_ct_replace(ct_zone, tcb);
548 	case FLOW_CLS_DESTROY:
549 		return efx_tc_ct_destroy(ct_zone, tcb);
550 	case FLOW_CLS_STATS:
551 		return efx_tc_ct_stats(ct_zone, tcb);
552 	default:
553 		break;
554 	}
555 
556 	return -EOPNOTSUPP;
557 }
558 
efx_tc_ct_register_zone(struct efx_nic * efx,u16 zone,struct nf_flowtable * ct_ft)559 struct efx_tc_ct_zone *efx_tc_ct_register_zone(struct efx_nic *efx, u16 zone,
560 					       struct nf_flowtable *ct_ft)
561 {
562 	struct efx_tc_ct_zone *ct_zone, *old;
563 	int rc;
564 
565 	ct_zone = kzalloc(sizeof(*ct_zone), GFP_USER);
566 	if (!ct_zone)
567 		return ERR_PTR(-ENOMEM);
568 	ct_zone->zone = zone;
569 	old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_zone_ht,
570 						&ct_zone->linkage,
571 						efx_tc_ct_zone_ht_params);
572 	if (old) {
573 		/* don't need our new entry */
574 		kfree(ct_zone);
575 		if (IS_ERR(old)) /* oh dear, it's actually an error */
576 			return ERR_CAST(old);
577 		if (!refcount_inc_not_zero(&old->ref))
578 			return ERR_PTR(-EAGAIN);
579 		/* existing entry found */
580 		WARN_ON_ONCE(old->nf_ft != ct_ft);
581 		netif_dbg(efx, drv, efx->net_dev,
582 			  "Found existing ct_zone for %u\n", zone);
583 		return old;
584 	}
585 	ct_zone->nf_ft = ct_ft;
586 	ct_zone->efx = efx;
587 	INIT_LIST_HEAD(&ct_zone->cts);
588 	mutex_init(&ct_zone->mutex);
589 	rc = nf_flow_table_offload_add_cb(ct_ft, efx_tc_flow_block, ct_zone);
590 	netif_dbg(efx, drv, efx->net_dev, "Adding new ct_zone for %u, rc %d\n",
591 		  zone, rc);
592 	if (rc < 0)
593 		goto fail;
594 	refcount_set(&ct_zone->ref, 1);
595 	return ct_zone;
596 fail:
597 	rhashtable_remove_fast(&efx->tc->ct_zone_ht, &ct_zone->linkage,
598 			       efx_tc_ct_zone_ht_params);
599 	kfree(ct_zone);
600 	return ERR_PTR(rc);
601 }
602 
efx_tc_ct_unregister_zone(struct efx_nic * efx,struct efx_tc_ct_zone * ct_zone)603 void efx_tc_ct_unregister_zone(struct efx_nic *efx,
604 			       struct efx_tc_ct_zone *ct_zone)
605 {
606 	struct efx_tc_ct_entry *conn, *next;
607 
608 	if (!refcount_dec_and_test(&ct_zone->ref))
609 		return; /* still in use */
610 	nf_flow_table_offload_del_cb(ct_zone->nf_ft, efx_tc_flow_block, ct_zone);
611 	rhashtable_remove_fast(&efx->tc->ct_zone_ht, &ct_zone->linkage,
612 			       efx_tc_ct_zone_ht_params);
613 	mutex_lock(&ct_zone->mutex);
614 	list_for_each_entry(conn, &ct_zone->cts, list)
615 		efx_tc_ct_remove(efx, conn);
616 	synchronize_rcu();
617 	/* need to use _safe because efx_tc_ct_remove_finish() frees conn */
618 	list_for_each_entry_safe(conn, next, &ct_zone->cts, list)
619 		efx_tc_ct_remove_finish(efx, conn);
620 	mutex_unlock(&ct_zone->mutex);
621 	mutex_destroy(&ct_zone->mutex);
622 	netif_dbg(efx, drv, efx->net_dev, "Removed ct_zone for %u\n",
623 		  ct_zone->zone);
624 	kfree(ct_zone);
625 }
626