1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include <net/netfilter/nf_conntrack.h>
5 #include <net/netfilter/nf_conntrack_core.h>
6 #include <net/netfilter/nf_conntrack_zones.h>
7 #include <net/netfilter/nf_conntrack_labels.h>
8 #include <net/netfilter/nf_conntrack_helper.h>
9 #include <net/netfilter/nf_conntrack_acct.h>
10 #include <uapi/linux/tc_act/tc_pedit.h>
11 #include <net/tc_act/tc_ct.h>
12 #include <net/flow_offload.h>
13 #include <net/netfilter/nf_flow_table.h>
14 #include <linux/workqueue.h>
15 #include <linux/refcount.h>
16 #include <linux/xarray.h>
17 #include <linux/if_macvlan.h>
18 #include <linux/debugfs.h>
19
20 #include "lib/fs_chains.h"
21 #include "en/tc_ct.h"
22 #include "en/tc/ct_fs.h"
23 #include "en/tc_priv.h"
24 #include "en/mod_hdr.h"
25 #include "en/mapping.h"
26 #include "en/tc/post_act.h"
27 #include "en.h"
28 #include "en_tc.h"
29 #include "en_rep.h"
30 #include "fs_core.h"
31
32 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
33 #define MLX5_CT_STATE_TRK_BIT BIT(2)
34 #define MLX5_CT_STATE_NAT_BIT BIT(3)
35 #define MLX5_CT_STATE_REPLY_BIT BIT(4)
36 #define MLX5_CT_STATE_RELATED_BIT BIT(5)
37 #define MLX5_CT_STATE_INVALID_BIT BIT(6)
38 #define MLX5_CT_STATE_NEW_BIT BIT(7)
39
40 #define MLX5_CT_LABELS_BITS MLX5_REG_MAPPING_MBITS(LABELS_TO_REG)
41 #define MLX5_CT_LABELS_MASK MLX5_REG_MAPPING_MASK(LABELS_TO_REG)
42
43 /* Statically allocate modify actions for
44 * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10.
45 * This will be increased dynamically if needed (for the ipv6 snat + dnat).
46 */
47 #define MLX5_CT_MIN_MOD_ACTS 10
48
49 #define ct_dbg(fmt, args...)\
50 netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
51
52 struct mlx5_tc_ct_debugfs {
53 struct {
54 atomic_t offloaded;
55 atomic_t rx_dropped;
56 } stats;
57
58 struct dentry *root;
59 };
60
61 struct mlx5_tc_ct_priv {
62 struct mlx5_core_dev *dev;
63 struct mlx5e_priv *priv;
64 const struct net_device *netdev;
65 struct mod_hdr_tbl *mod_hdr_tbl;
66 struct xarray tuple_ids;
67 struct rhashtable zone_ht;
68 struct rhashtable ct_tuples_ht;
69 struct rhashtable ct_tuples_nat_ht;
70 struct mlx5_flow_table *ct;
71 struct mlx5_flow_table *ct_nat;
72 struct mlx5_flow_group *ct_nat_miss_group;
73 struct mlx5_flow_handle *ct_nat_miss_rule;
74 struct mlx5e_post_act *post_act;
75 struct mutex control_lock; /* guards parallel adds/dels */
76 struct mapping_ctx *zone_mapping;
77 struct mapping_ctx *labels_mapping;
78 enum mlx5_flow_namespace_type ns_type;
79 struct mlx5_fs_chains *chains;
80 struct mlx5_ct_fs *fs;
81 struct mlx5_ct_fs_ops *fs_ops;
82 spinlock_t ht_lock; /* protects ft entries */
83 struct workqueue_struct *wq;
84
85 struct mlx5_tc_ct_debugfs debugfs;
86 };
87
88 struct mlx5_ct_zone_rule {
89 struct mlx5_ct_fs_rule *rule;
90 struct mlx5e_mod_hdr_handle *mh;
91 struct mlx5_flow_attr *attr;
92 bool nat;
93 };
94
95 struct mlx5_tc_ct_pre {
96 struct mlx5_flow_table *ft;
97 struct mlx5_flow_group *flow_grp;
98 struct mlx5_flow_group *miss_grp;
99 struct mlx5_flow_handle *flow_rule;
100 struct mlx5_flow_handle *miss_rule;
101 struct mlx5_modify_hdr *modify_hdr;
102 };
103
104 struct mlx5_ct_ft {
105 struct rhash_head node;
106 u16 zone;
107 u32 zone_restore_id;
108 refcount_t refcount;
109 struct nf_flowtable *nf_ft;
110 struct mlx5_tc_ct_priv *ct_priv;
111 struct rhashtable ct_entries_ht;
112 struct mlx5_tc_ct_pre pre_ct;
113 struct mlx5_tc_ct_pre pre_ct_nat;
114 };
115
116 struct mlx5_ct_tuple {
117 u16 addr_type;
118 __be16 n_proto;
119 u8 ip_proto;
120 struct {
121 union {
122 __be32 src_v4;
123 struct in6_addr src_v6;
124 };
125 union {
126 __be32 dst_v4;
127 struct in6_addr dst_v6;
128 };
129 } ip;
130 struct {
131 __be16 src;
132 __be16 dst;
133 } port;
134
135 u16 zone;
136 };
137
138 struct mlx5_ct_counter {
139 struct mlx5_fc *counter;
140 refcount_t refcount;
141 bool is_shared;
142 };
143
144 enum {
145 MLX5_CT_ENTRY_FLAG_VALID,
146 MLX5_CT_ENTRY_IN_CT_TABLE,
147 MLX5_CT_ENTRY_IN_CT_NAT_TABLE,
148 };
149
150 struct mlx5_ct_entry {
151 struct rhash_head node;
152 struct rhash_head tuple_node;
153 struct rhash_head tuple_nat_node;
154 struct mlx5_ct_counter *counter;
155 unsigned long cookie;
156 unsigned long restore_cookie;
157 struct mlx5_ct_tuple tuple;
158 struct mlx5_ct_tuple tuple_nat;
159 struct mlx5_ct_zone_rule zone_rules[2];
160
161 struct mlx5_tc_ct_priv *ct_priv;
162 struct work_struct work;
163
164 refcount_t refcnt;
165 unsigned long flags;
166 };
167
168 static void
169 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
170 struct mlx5_flow_attr *attr,
171 struct mlx5e_mod_hdr_handle *mh);
172
173 static const struct rhashtable_params cts_ht_params = {
174 .head_offset = offsetof(struct mlx5_ct_entry, node),
175 .key_offset = offsetof(struct mlx5_ct_entry, cookie),
176 .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
177 .automatic_shrinking = true,
178 .min_size = 16 * 1024,
179 };
180
181 static const struct rhashtable_params zone_params = {
182 .head_offset = offsetof(struct mlx5_ct_ft, node),
183 .key_offset = offsetof(struct mlx5_ct_ft, zone),
184 .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
185 .automatic_shrinking = true,
186 };
187
188 static const struct rhashtable_params tuples_ht_params = {
189 .head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
190 .key_offset = offsetof(struct mlx5_ct_entry, tuple),
191 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
192 .automatic_shrinking = true,
193 .min_size = 16 * 1024,
194 };
195
196 static const struct rhashtable_params tuples_nat_ht_params = {
197 .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
198 .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
199 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
200 .automatic_shrinking = true,
201 .min_size = 16 * 1024,
202 };
203
204 static bool
mlx5_tc_ct_entry_in_ct_table(struct mlx5_ct_entry * entry)205 mlx5_tc_ct_entry_in_ct_table(struct mlx5_ct_entry *entry)
206 {
207 return test_bit(MLX5_CT_ENTRY_IN_CT_TABLE, &entry->flags);
208 }
209
210 static bool
mlx5_tc_ct_entry_in_ct_nat_table(struct mlx5_ct_entry * entry)211 mlx5_tc_ct_entry_in_ct_nat_table(struct mlx5_ct_entry *entry)
212 {
213 return test_bit(MLX5_CT_ENTRY_IN_CT_NAT_TABLE, &entry->flags);
214 }
215
216 static int
mlx5_get_label_mapping(struct mlx5_tc_ct_priv * ct_priv,u32 * labels,u32 * id)217 mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv,
218 u32 *labels, u32 *id)
219 {
220 if (!memchr_inv(labels, 0, sizeof(u32) * 4)) {
221 *id = 0;
222 return 0;
223 }
224
225 if (mapping_add(ct_priv->labels_mapping, labels, id))
226 return -EOPNOTSUPP;
227
228 return 0;
229 }
230
231 static void
mlx5_put_label_mapping(struct mlx5_tc_ct_priv * ct_priv,u32 id)232 mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id)
233 {
234 if (id)
235 mapping_remove(ct_priv->labels_mapping, id);
236 }
237
238 static int
mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple * tuple,struct flow_rule * rule)239 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
240 {
241 struct flow_match_control control;
242 struct flow_match_basic basic;
243
244 flow_rule_match_basic(rule, &basic);
245 flow_rule_match_control(rule, &control);
246
247 tuple->n_proto = basic.key->n_proto;
248 tuple->ip_proto = basic.key->ip_proto;
249 tuple->addr_type = control.key->addr_type;
250
251 if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
252 struct flow_match_ipv4_addrs match;
253
254 flow_rule_match_ipv4_addrs(rule, &match);
255 tuple->ip.src_v4 = match.key->src;
256 tuple->ip.dst_v4 = match.key->dst;
257 } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
258 struct flow_match_ipv6_addrs match;
259
260 flow_rule_match_ipv6_addrs(rule, &match);
261 tuple->ip.src_v6 = match.key->src;
262 tuple->ip.dst_v6 = match.key->dst;
263 } else {
264 return -EOPNOTSUPP;
265 }
266
267 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
268 struct flow_match_ports match;
269
270 flow_rule_match_ports(rule, &match);
271 switch (tuple->ip_proto) {
272 case IPPROTO_TCP:
273 case IPPROTO_UDP:
274 tuple->port.src = match.key->src;
275 tuple->port.dst = match.key->dst;
276 break;
277 default:
278 return -EOPNOTSUPP;
279 }
280 } else {
281 if (tuple->ip_proto != IPPROTO_GRE)
282 return -EOPNOTSUPP;
283 }
284
285 return 0;
286 }
287
288 static int
mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple * tuple,struct flow_rule * rule)289 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
290 struct flow_rule *rule)
291 {
292 struct flow_action *flow_action = &rule->action;
293 struct flow_action_entry *act;
294 u32 offset, val, ip6_offset;
295 int i;
296
297 flow_action_for_each(i, act, flow_action) {
298 if (act->id != FLOW_ACTION_MANGLE)
299 continue;
300
301 offset = act->mangle.offset;
302 val = act->mangle.val;
303 switch (act->mangle.htype) {
304 case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
305 if (offset == offsetof(struct iphdr, saddr))
306 tuple->ip.src_v4 = cpu_to_be32(val);
307 else if (offset == offsetof(struct iphdr, daddr))
308 tuple->ip.dst_v4 = cpu_to_be32(val);
309 else
310 return -EOPNOTSUPP;
311 break;
312
313 case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
314 ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
315 ip6_offset /= 4;
316 if (ip6_offset < 4)
317 tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
318 else if (ip6_offset < 8)
319 tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
320 else
321 return -EOPNOTSUPP;
322 break;
323
324 case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
325 if (offset == offsetof(struct tcphdr, source))
326 tuple->port.src = cpu_to_be16(val);
327 else if (offset == offsetof(struct tcphdr, dest))
328 tuple->port.dst = cpu_to_be16(val);
329 else
330 return -EOPNOTSUPP;
331 break;
332
333 case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
334 if (offset == offsetof(struct udphdr, source))
335 tuple->port.src = cpu_to_be16(val);
336 else if (offset == offsetof(struct udphdr, dest))
337 tuple->port.dst = cpu_to_be16(val);
338 else
339 return -EOPNOTSUPP;
340 break;
341
342 default:
343 return -EOPNOTSUPP;
344 }
345 }
346
347 return 0;
348 }
349
350 static int
mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv * ct_priv,struct net_device * ndev)351 mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv,
352 struct net_device *ndev)
353 {
354 struct mlx5e_priv *other_priv = netdev_priv(ndev);
355 struct mlx5_core_dev *mdev = ct_priv->dev;
356 bool vf_rep, uplink_rep;
357
358 vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
359 uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
360
361 if (vf_rep)
362 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
363 if (uplink_rep)
364 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
365 if (is_vlan_dev(ndev))
366 return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev));
367 if (netif_is_macvlan(ndev))
368 return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev));
369 if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev))
370 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
371
372 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT;
373 }
374
375 static int
mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_spec * spec,struct flow_rule * rule)376 mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv,
377 struct mlx5_flow_spec *spec,
378 struct flow_rule *rule)
379 {
380 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
381 outer_headers);
382 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
383 outer_headers);
384 u16 addr_type = 0;
385 u8 ip_proto = 0;
386
387 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
388 struct flow_match_basic match;
389
390 flow_rule_match_basic(rule, &match);
391
392 mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v);
393 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
394 match.mask->ip_proto);
395 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
396 match.key->ip_proto);
397
398 ip_proto = match.key->ip_proto;
399 }
400
401 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
402 struct flow_match_control match;
403
404 flow_rule_match_control(rule, &match);
405 addr_type = match.key->addr_type;
406 }
407
408 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
409 struct flow_match_ipv4_addrs match;
410
411 flow_rule_match_ipv4_addrs(rule, &match);
412 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
413 src_ipv4_src_ipv6.ipv4_layout.ipv4),
414 &match.mask->src, sizeof(match.mask->src));
415 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
416 src_ipv4_src_ipv6.ipv4_layout.ipv4),
417 &match.key->src, sizeof(match.key->src));
418 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
419 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
420 &match.mask->dst, sizeof(match.mask->dst));
421 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
422 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
423 &match.key->dst, sizeof(match.key->dst));
424 }
425
426 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
427 struct flow_match_ipv6_addrs match;
428
429 flow_rule_match_ipv6_addrs(rule, &match);
430 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
431 src_ipv4_src_ipv6.ipv6_layout.ipv6),
432 &match.mask->src, sizeof(match.mask->src));
433 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
434 src_ipv4_src_ipv6.ipv6_layout.ipv6),
435 &match.key->src, sizeof(match.key->src));
436
437 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
438 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
439 &match.mask->dst, sizeof(match.mask->dst));
440 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
441 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
442 &match.key->dst, sizeof(match.key->dst));
443 }
444
445 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
446 struct flow_match_ports match;
447
448 flow_rule_match_ports(rule, &match);
449 switch (ip_proto) {
450 case IPPROTO_TCP:
451 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
452 tcp_sport, ntohs(match.mask->src));
453 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
454 tcp_sport, ntohs(match.key->src));
455
456 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
457 tcp_dport, ntohs(match.mask->dst));
458 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
459 tcp_dport, ntohs(match.key->dst));
460 break;
461
462 case IPPROTO_UDP:
463 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
464 udp_sport, ntohs(match.mask->src));
465 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
466 udp_sport, ntohs(match.key->src));
467
468 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
469 udp_dport, ntohs(match.mask->dst));
470 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
471 udp_dport, ntohs(match.key->dst));
472 break;
473 default:
474 break;
475 }
476 }
477
478 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
479 struct flow_match_tcp match;
480
481 flow_rule_match_tcp(rule, &match);
482 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
483 ntohs(match.mask->flags));
484 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
485 ntohs(match.key->flags));
486 }
487
488 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
489 struct flow_match_meta match;
490
491 flow_rule_match_meta(rule, &match);
492
493 if (match.key->ingress_ifindex & match.mask->ingress_ifindex) {
494 struct net_device *dev;
495
496 dev = dev_get_by_index(&init_net, match.key->ingress_ifindex);
497 if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source))
498 spec->flow_context.flow_source =
499 mlx5_tc_ct_get_flow_source_match(ct_priv, dev);
500
501 dev_put(dev);
502 }
503 }
504
505 return 0;
506 }
507
508 static void
mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)509 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
510 {
511 if (entry->counter->is_shared &&
512 !refcount_dec_and_test(&entry->counter->refcount))
513 return;
514
515 mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
516 kfree(entry->counter);
517 }
518
519 static void
mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry,bool nat)520 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
521 struct mlx5_ct_entry *entry,
522 bool nat)
523 {
524 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
525 struct mlx5_flow_attr *attr = zone_rule->attr;
526
527 ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
528
529 ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule);
530 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
531 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
532 kfree(attr);
533 }
534
535 static void
mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)536 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
537 struct mlx5_ct_entry *entry)
538 {
539 if (mlx5_tc_ct_entry_in_ct_nat_table(entry))
540 mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
541 if (mlx5_tc_ct_entry_in_ct_table(entry))
542 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
543
544 atomic_dec(&ct_priv->debugfs.stats.offloaded);
545 }
546
547 static struct flow_action_entry *
mlx5_tc_ct_get_ct_metadata_action(struct flow_rule * flow_rule)548 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
549 {
550 struct flow_action *flow_action = &flow_rule->action;
551 struct flow_action_entry *act;
552 int i;
553
554 flow_action_for_each(i, act, flow_action) {
555 if (act->id == FLOW_ACTION_CT_METADATA)
556 return act;
557 }
558
559 return NULL;
560 }
561
562 static int
mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv * ct_priv,struct mlx5e_tc_mod_hdr_acts * mod_acts,u8 ct_state,u32 mark,u32 labels_id,u8 zone_restore_id)563 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
564 struct mlx5e_tc_mod_hdr_acts *mod_acts,
565 u8 ct_state,
566 u32 mark,
567 u32 labels_id,
568 u8 zone_restore_id)
569 {
570 enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
571 struct mlx5_core_dev *dev = ct_priv->dev;
572 int err;
573
574 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
575 CTSTATE_TO_REG, ct_state);
576 if (err)
577 return err;
578
579 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
580 MARK_TO_REG, mark);
581 if (err)
582 return err;
583
584 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
585 LABELS_TO_REG, labels_id);
586 if (err)
587 return err;
588
589 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
590 ZONE_RESTORE_TO_REG, zone_restore_id);
591 if (err)
592 return err;
593
594 /* Make another copy of zone id in reg_b for
595 * NIC rx flows since we don't copy reg_c1 to
596 * reg_b upon miss.
597 */
598 if (ns != MLX5_FLOW_NAMESPACE_FDB) {
599 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
600 NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
601 if (err)
602 return err;
603 }
604 return 0;
605 }
606
607 static int
mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry * act,char * modact)608 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
609 char *modact)
610 {
611 u32 offset = act->mangle.offset, field;
612
613 switch (act->mangle.htype) {
614 case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
615 MLX5_SET(set_action_in, modact, length, 0);
616 if (offset == offsetof(struct iphdr, saddr))
617 field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
618 else if (offset == offsetof(struct iphdr, daddr))
619 field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
620 else
621 return -EOPNOTSUPP;
622 break;
623
624 case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
625 MLX5_SET(set_action_in, modact, length, 0);
626 if (offset == offsetof(struct ipv6hdr, saddr) + 12)
627 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
628 else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
629 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
630 else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
631 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
632 else if (offset == offsetof(struct ipv6hdr, saddr))
633 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
634 else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
635 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
636 else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
637 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
638 else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
639 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
640 else if (offset == offsetof(struct ipv6hdr, daddr))
641 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
642 else
643 return -EOPNOTSUPP;
644 break;
645
646 case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
647 MLX5_SET(set_action_in, modact, length, 16);
648 if (offset == offsetof(struct tcphdr, source))
649 field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
650 else if (offset == offsetof(struct tcphdr, dest))
651 field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
652 else
653 return -EOPNOTSUPP;
654 break;
655
656 case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
657 MLX5_SET(set_action_in, modact, length, 16);
658 if (offset == offsetof(struct udphdr, source))
659 field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
660 else if (offset == offsetof(struct udphdr, dest))
661 field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
662 else
663 return -EOPNOTSUPP;
664 break;
665
666 default:
667 return -EOPNOTSUPP;
668 }
669
670 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
671 MLX5_SET(set_action_in, modact, offset, 0);
672 MLX5_SET(set_action_in, modact, field, field);
673 MLX5_SET(set_action_in, modact, data, act->mangle.val);
674
675 return 0;
676 }
677
678 static int
mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5e_tc_mod_hdr_acts * mod_acts)679 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
680 struct flow_rule *flow_rule,
681 struct mlx5e_tc_mod_hdr_acts *mod_acts)
682 {
683 struct flow_action *flow_action = &flow_rule->action;
684 struct mlx5_core_dev *mdev = ct_priv->dev;
685 struct flow_action_entry *act;
686 char *modact;
687 int err, i;
688
689 flow_action_for_each(i, act, flow_action) {
690 switch (act->id) {
691 case FLOW_ACTION_MANGLE: {
692 modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts);
693 if (IS_ERR(modact))
694 return PTR_ERR(modact);
695
696 err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
697 if (err)
698 return err;
699
700 mod_acts->num_actions++;
701 }
702 break;
703
704 case FLOW_ACTION_CT_METADATA:
705 /* Handled earlier */
706 continue;
707 default:
708 return -EOPNOTSUPP;
709 }
710 }
711
712 return 0;
713 }
714
715 static int
mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_attr * attr,struct flow_rule * flow_rule,struct mlx5e_mod_hdr_handle ** mh,u8 zone_restore_id,bool nat_table,bool has_nat)716 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
717 struct mlx5_flow_attr *attr,
718 struct flow_rule *flow_rule,
719 struct mlx5e_mod_hdr_handle **mh,
720 u8 zone_restore_id, bool nat_table, bool has_nat)
721 {
722 DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS);
723 DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr);
724 struct flow_action_entry *meta;
725 enum ip_conntrack_info ctinfo;
726 u16 ct_state = 0;
727 int err;
728
729 meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
730 if (!meta)
731 return -EOPNOTSUPP;
732 ctinfo = meta->ct_metadata.cookie & NFCT_INFOMASK;
733
734 err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels,
735 &attr->ct_attr.ct_labels_id);
736 if (err)
737 return -EOPNOTSUPP;
738 if (nat_table) {
739 if (has_nat) {
740 err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts);
741 if (err)
742 goto err_mapping;
743 }
744
745 ct_state |= MLX5_CT_STATE_NAT_BIT;
746 }
747
748 ct_state |= MLX5_CT_STATE_TRK_BIT;
749 ct_state |= ctinfo == IP_CT_NEW ? MLX5_CT_STATE_NEW_BIT : MLX5_CT_STATE_ESTABLISHED_BIT;
750 ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT;
751 err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
752 ct_state,
753 meta->ct_metadata.mark,
754 attr->ct_attr.ct_labels_id,
755 zone_restore_id);
756 if (err)
757 goto err_mapping;
758
759 if (nat_table && has_nat) {
760 attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type,
761 mod_acts.num_actions,
762 mod_acts.actions);
763 if (IS_ERR(attr->modify_hdr)) {
764 err = PTR_ERR(attr->modify_hdr);
765 goto err_mapping;
766 }
767
768 *mh = NULL;
769 } else {
770 *mh = mlx5e_mod_hdr_attach(ct_priv->dev,
771 ct_priv->mod_hdr_tbl,
772 ct_priv->ns_type,
773 &mod_acts);
774 if (IS_ERR(*mh)) {
775 err = PTR_ERR(*mh);
776 goto err_mapping;
777 }
778 attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
779 }
780
781 mlx5e_mod_hdr_dealloc(&mod_acts);
782 return 0;
783
784 err_mapping:
785 mlx5e_mod_hdr_dealloc(&mod_acts);
786 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
787 return err;
788 }
789
790 static void
mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_attr * attr,struct mlx5e_mod_hdr_handle * mh)791 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
792 struct mlx5_flow_attr *attr,
793 struct mlx5e_mod_hdr_handle *mh)
794 {
795 if (mh)
796 mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh);
797 else
798 mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr);
799 }
800
801 static int
mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,bool nat,u8 zone_restore_id)802 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
803 struct flow_rule *flow_rule,
804 struct mlx5_ct_entry *entry,
805 bool nat, u8 zone_restore_id)
806 {
807 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
808 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
809 struct mlx5_flow_spec *spec = NULL;
810 struct mlx5_flow_attr *attr;
811 int err;
812
813 zone_rule->nat = nat;
814
815 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
816 if (!spec)
817 return -ENOMEM;
818
819 attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
820 if (!attr) {
821 err = -ENOMEM;
822 goto err_attr;
823 }
824
825 err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
826 &zone_rule->mh,
827 zone_restore_id,
828 nat,
829 mlx5_tc_ct_entry_in_ct_nat_table(entry));
830 if (err) {
831 ct_dbg("Failed to create ct entry mod hdr");
832 goto err_mod_hdr;
833 }
834
835 attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
836 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
837 MLX5_FLOW_CONTEXT_ACTION_COUNT;
838 attr->dest_chain = 0;
839 attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
840 attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
841 if (entry->tuple.ip_proto == IPPROTO_TCP ||
842 entry->tuple.ip_proto == IPPROTO_UDP)
843 attr->outer_match_level = MLX5_MATCH_L4;
844 else
845 attr->outer_match_level = MLX5_MATCH_L3;
846 attr->counter = entry->counter->counter;
847 attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
848 if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
849 attr->esw_attr->in_mdev = priv->mdev;
850
851 mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule);
852 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
853
854 zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule);
855 if (IS_ERR(zone_rule->rule)) {
856 err = PTR_ERR(zone_rule->rule);
857 ct_dbg("Failed to add ct entry rule, nat: %d", nat);
858 goto err_rule;
859 }
860
861 zone_rule->attr = attr;
862
863 kvfree(spec);
864 ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
865
866 return 0;
867
868 err_rule:
869 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, attr, zone_rule->mh);
870 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
871 err_mod_hdr:
872 kfree(attr);
873 err_attr:
874 kvfree(spec);
875 return err;
876 }
877
878 static int
mlx5_tc_ct_entry_update_rule(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,bool nat,u8 zone_restore_id)879 mlx5_tc_ct_entry_update_rule(struct mlx5_tc_ct_priv *ct_priv,
880 struct flow_rule *flow_rule,
881 struct mlx5_ct_entry *entry,
882 bool nat, u8 zone_restore_id)
883 {
884 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
885 struct mlx5_flow_attr *attr = zone_rule->attr, *old_attr;
886 struct mlx5e_mod_hdr_handle *mh;
887 struct mlx5_flow_spec *spec;
888 int err;
889
890 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
891 if (!spec)
892 return -ENOMEM;
893
894 old_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
895 if (!old_attr) {
896 err = -ENOMEM;
897 goto err_attr;
898 }
899 *old_attr = *attr;
900
901 err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, &mh, zone_restore_id,
902 nat, mlx5_tc_ct_entry_in_ct_nat_table(entry));
903 if (err) {
904 ct_dbg("Failed to create ct entry mod hdr, err: %d", err);
905 goto err_mod_hdr;
906 }
907
908 mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule);
909 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
910
911 err = ct_priv->fs_ops->ct_rule_update(ct_priv->fs, zone_rule->rule, spec, attr);
912 if (err) {
913 ct_dbg("Failed to update ct entry rule, nat: %d, err: %d", nat, err);
914 goto err_rule;
915 }
916
917 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, old_attr, zone_rule->mh);
918 zone_rule->mh = mh;
919 mlx5_put_label_mapping(ct_priv, old_attr->ct_attr.ct_labels_id);
920
921 kfree(old_attr);
922 kvfree(spec);
923 ct_dbg("Updated ct entry rule in zone %d", entry->tuple.zone);
924
925 return 0;
926
927 err_rule:
928 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, mh);
929 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
930 err_mod_hdr:
931 *attr = *old_attr;
932 kfree(old_attr);
933 err_attr:
934 kvfree(spec);
935 return err;
936 }
937
938 static bool
mlx5_tc_ct_entry_valid(struct mlx5_ct_entry * entry)939 mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
940 {
941 return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
942 }
943
944 static struct mlx5_ct_entry *
mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_tuple * tuple)945 mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
946 {
947 struct mlx5_ct_entry *entry;
948
949 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
950 tuples_ht_params);
951 if (entry && mlx5_tc_ct_entry_valid(entry) &&
952 refcount_inc_not_zero(&entry->refcnt)) {
953 return entry;
954 } else if (!entry) {
955 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
956 tuple, tuples_nat_ht_params);
957 if (entry && mlx5_tc_ct_entry_valid(entry) &&
958 refcount_inc_not_zero(&entry->refcnt))
959 return entry;
960 }
961
962 return entry ? ERR_PTR(-EINVAL) : NULL;
963 }
964
mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry * entry)965 static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
966 {
967 struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
968
969 if (mlx5_tc_ct_entry_in_ct_nat_table(entry))
970 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
971 &entry->tuple_nat_node,
972 tuples_nat_ht_params);
973 if (mlx5_tc_ct_entry_in_ct_table(entry))
974 rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
975 tuples_ht_params);
976 }
977
mlx5_tc_ct_entry_del(struct mlx5_ct_entry * entry)978 static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
979 {
980 struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
981
982 mlx5_tc_ct_entry_del_rules(ct_priv, entry);
983
984 spin_lock_bh(&ct_priv->ht_lock);
985 mlx5_tc_ct_entry_remove_from_tuples(entry);
986 spin_unlock_bh(&ct_priv->ht_lock);
987
988 mlx5_tc_ct_counter_put(ct_priv, entry);
989 kfree(entry);
990 }
991
992 static void
mlx5_tc_ct_entry_put(struct mlx5_ct_entry * entry)993 mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
994 {
995 if (!refcount_dec_and_test(&entry->refcnt))
996 return;
997
998 mlx5_tc_ct_entry_del(entry);
999 }
1000
mlx5_tc_ct_entry_del_work(struct work_struct * work)1001 static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
1002 {
1003 struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
1004
1005 mlx5_tc_ct_entry_del(entry);
1006 }
1007
1008 static void
__mlx5_tc_ct_entry_put(struct mlx5_ct_entry * entry)1009 __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
1010 {
1011 if (!refcount_dec_and_test(&entry->refcnt))
1012 return;
1013
1014 INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
1015 queue_work(entry->ct_priv->wq, &entry->work);
1016 }
1017
1018 static struct mlx5_ct_counter *
mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv * ct_priv)1019 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
1020 {
1021 struct mlx5_ct_counter *counter;
1022 int ret;
1023
1024 counter = kzalloc(sizeof(*counter), GFP_KERNEL);
1025 if (!counter)
1026 return ERR_PTR(-ENOMEM);
1027
1028 counter->is_shared = false;
1029 counter->counter = mlx5_fc_create(ct_priv->dev, true);
1030 if (IS_ERR(counter->counter)) {
1031 ct_dbg("Failed to create counter for ct entry");
1032 ret = PTR_ERR(counter->counter);
1033 kfree(counter);
1034 return ERR_PTR(ret);
1035 }
1036
1037 return counter;
1038 }
1039
1040 static struct mlx5_ct_counter *
mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)1041 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
1042 struct mlx5_ct_entry *entry)
1043 {
1044 struct mlx5_ct_tuple rev_tuple = entry->tuple;
1045 struct mlx5_ct_counter *shared_counter;
1046 struct mlx5_ct_entry *rev_entry;
1047
1048 /* get the reversed tuple */
1049 swap(rev_tuple.port.src, rev_tuple.port.dst);
1050
1051 if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1052 __be32 tmp_addr = rev_tuple.ip.src_v4;
1053
1054 rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
1055 rev_tuple.ip.dst_v4 = tmp_addr;
1056 } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1057 struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
1058
1059 rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
1060 rev_tuple.ip.dst_v6 = tmp_addr;
1061 } else {
1062 return ERR_PTR(-EOPNOTSUPP);
1063 }
1064
1065 /* Use the same counter as the reverse direction */
1066 spin_lock_bh(&ct_priv->ht_lock);
1067 rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
1068
1069 if (IS_ERR(rev_entry)) {
1070 spin_unlock_bh(&ct_priv->ht_lock);
1071 goto create_counter;
1072 }
1073
1074 if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
1075 ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry);
1076 shared_counter = rev_entry->counter;
1077 spin_unlock_bh(&ct_priv->ht_lock);
1078
1079 mlx5_tc_ct_entry_put(rev_entry);
1080 return shared_counter;
1081 }
1082
1083 spin_unlock_bh(&ct_priv->ht_lock);
1084
1085 create_counter:
1086
1087 shared_counter = mlx5_tc_ct_counter_create(ct_priv);
1088 if (IS_ERR(shared_counter))
1089 return shared_counter;
1090
1091 shared_counter->is_shared = true;
1092 refcount_set(&shared_counter->refcount, 1);
1093 return shared_counter;
1094 }
1095
1096 static int
mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,u8 zone_restore_id)1097 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
1098 struct flow_rule *flow_rule,
1099 struct mlx5_ct_entry *entry,
1100 u8 zone_restore_id)
1101 {
1102 int err;
1103
1104 if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
1105 entry->counter = mlx5_tc_ct_counter_create(ct_priv);
1106 else
1107 entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
1108
1109 if (IS_ERR(entry->counter)) {
1110 err = PTR_ERR(entry->counter);
1111 return err;
1112 }
1113
1114 if (mlx5_tc_ct_entry_in_ct_table(entry)) {
1115 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
1116 zone_restore_id);
1117 if (err)
1118 goto err_orig;
1119 }
1120
1121 if (mlx5_tc_ct_entry_in_ct_nat_table(entry)) {
1122 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
1123 zone_restore_id);
1124 if (err)
1125 goto err_nat;
1126 }
1127
1128 atomic_inc(&ct_priv->debugfs.stats.offloaded);
1129 return 0;
1130
1131 err_nat:
1132 if (mlx5_tc_ct_entry_in_ct_table(entry))
1133 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
1134 err_orig:
1135 mlx5_tc_ct_counter_put(ct_priv, entry);
1136 return err;
1137 }
1138
1139 static int
mlx5_tc_ct_entry_update_rules(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,u8 zone_restore_id)1140 mlx5_tc_ct_entry_update_rules(struct mlx5_tc_ct_priv *ct_priv,
1141 struct flow_rule *flow_rule,
1142 struct mlx5_ct_entry *entry,
1143 u8 zone_restore_id)
1144 {
1145 int err = 0;
1146
1147 if (mlx5_tc_ct_entry_in_ct_table(entry)) {
1148 err = mlx5_tc_ct_entry_update_rule(ct_priv, flow_rule, entry, false,
1149 zone_restore_id);
1150 if (err)
1151 return err;
1152 }
1153
1154 if (mlx5_tc_ct_entry_in_ct_nat_table(entry)) {
1155 err = mlx5_tc_ct_entry_update_rule(ct_priv, flow_rule, entry, true,
1156 zone_restore_id);
1157 if (err && mlx5_tc_ct_entry_in_ct_table(entry))
1158 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
1159 }
1160 return err;
1161 }
1162
1163 static int
mlx5_tc_ct_block_flow_offload_update(struct mlx5_ct_ft * ft,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,unsigned long cookie)1164 mlx5_tc_ct_block_flow_offload_update(struct mlx5_ct_ft *ft, struct flow_rule *flow_rule,
1165 struct mlx5_ct_entry *entry, unsigned long cookie)
1166 {
1167 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1168 int err;
1169
1170 err = mlx5_tc_ct_entry_update_rules(ct_priv, flow_rule, entry, ft->zone_restore_id);
1171 if (!err)
1172 return 0;
1173
1174 /* If failed to update the entry, then look it up again under ht_lock
1175 * protection and properly delete it.
1176 */
1177 spin_lock_bh(&ct_priv->ht_lock);
1178 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1179 if (entry) {
1180 rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
1181 spin_unlock_bh(&ct_priv->ht_lock);
1182 mlx5_tc_ct_entry_put(entry);
1183 } else {
1184 spin_unlock_bh(&ct_priv->ht_lock);
1185 }
1186 return err;
1187 }
1188
1189 static int
mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft * ft,struct flow_cls_offload * flow)1190 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
1191 struct flow_cls_offload *flow)
1192 {
1193 struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
1194 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1195 struct flow_action_entry *meta_action;
1196 unsigned long cookie = flow->cookie;
1197 struct mlx5_ct_entry *entry;
1198 int err;
1199
1200 meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
1201 if (!meta_action)
1202 return -EOPNOTSUPP;
1203
1204 spin_lock_bh(&ct_priv->ht_lock);
1205 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1206 if (entry && refcount_inc_not_zero(&entry->refcnt)) {
1207 if (entry->restore_cookie == meta_action->ct_metadata.cookie) {
1208 spin_unlock_bh(&ct_priv->ht_lock);
1209 mlx5_tc_ct_entry_put(entry);
1210 return -EEXIST;
1211 }
1212 entry->restore_cookie = meta_action->ct_metadata.cookie;
1213 spin_unlock_bh(&ct_priv->ht_lock);
1214
1215 err = mlx5_tc_ct_block_flow_offload_update(ft, flow_rule, entry, cookie);
1216 mlx5_tc_ct_entry_put(entry);
1217 return err;
1218 }
1219 spin_unlock_bh(&ct_priv->ht_lock);
1220
1221 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1222 if (!entry)
1223 return -ENOMEM;
1224
1225 entry->tuple.zone = ft->zone;
1226 entry->cookie = flow->cookie;
1227 entry->restore_cookie = meta_action->ct_metadata.cookie;
1228 refcount_set(&entry->refcnt, 2);
1229 entry->ct_priv = ct_priv;
1230
1231 err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
1232 if (err)
1233 goto err_set;
1234
1235 memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
1236 err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
1237 if (err)
1238 goto err_set;
1239
1240 spin_lock_bh(&ct_priv->ht_lock);
1241
1242 err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
1243 cts_ht_params);
1244 if (err)
1245 goto err_entries;
1246
1247 if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
1248 err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
1249 &entry->tuple_nat_node,
1250 tuples_nat_ht_params);
1251 if (err)
1252 goto err_tuple_nat;
1253
1254 set_bit(MLX5_CT_ENTRY_IN_CT_NAT_TABLE, &entry->flags);
1255 }
1256
1257 if (!mlx5_tc_ct_entry_in_ct_nat_table(entry)) {
1258 err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
1259 &entry->tuple_node,
1260 tuples_ht_params);
1261 if (err)
1262 goto err_tuple;
1263
1264 set_bit(MLX5_CT_ENTRY_IN_CT_TABLE, &entry->flags);
1265 }
1266 spin_unlock_bh(&ct_priv->ht_lock);
1267
1268 err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
1269 ft->zone_restore_id);
1270 if (err)
1271 goto err_rules;
1272
1273 set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
1274 mlx5_tc_ct_entry_put(entry); /* this function reference */
1275
1276 return 0;
1277
1278 err_rules:
1279 spin_lock_bh(&ct_priv->ht_lock);
1280 err_tuple:
1281 mlx5_tc_ct_entry_remove_from_tuples(entry);
1282 err_tuple_nat:
1283 rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
1284 err_entries:
1285 spin_unlock_bh(&ct_priv->ht_lock);
1286 err_set:
1287 kfree(entry);
1288 if (err != -EEXIST)
1289 netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
1290 return err;
1291 }
1292
1293 static int
mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft * ft,struct flow_cls_offload * flow)1294 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
1295 struct flow_cls_offload *flow)
1296 {
1297 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1298 unsigned long cookie = flow->cookie;
1299 struct mlx5_ct_entry *entry;
1300
1301 spin_lock_bh(&ct_priv->ht_lock);
1302 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1303 if (!entry) {
1304 spin_unlock_bh(&ct_priv->ht_lock);
1305 return -ENOENT;
1306 }
1307
1308 if (!mlx5_tc_ct_entry_valid(entry)) {
1309 spin_unlock_bh(&ct_priv->ht_lock);
1310 return -EINVAL;
1311 }
1312
1313 rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
1314 spin_unlock_bh(&ct_priv->ht_lock);
1315
1316 mlx5_tc_ct_entry_put(entry);
1317
1318 return 0;
1319 }
1320
1321 static int
mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft * ft,struct flow_cls_offload * f)1322 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
1323 struct flow_cls_offload *f)
1324 {
1325 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1326 unsigned long cookie = f->cookie;
1327 struct mlx5_ct_entry *entry;
1328 u64 lastuse, packets, bytes;
1329
1330 spin_lock_bh(&ct_priv->ht_lock);
1331 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1332 if (!entry) {
1333 spin_unlock_bh(&ct_priv->ht_lock);
1334 return -ENOENT;
1335 }
1336
1337 if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
1338 spin_unlock_bh(&ct_priv->ht_lock);
1339 return -EINVAL;
1340 }
1341
1342 spin_unlock_bh(&ct_priv->ht_lock);
1343
1344 mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
1345 flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
1346 FLOW_ACTION_HW_STATS_DELAYED);
1347
1348 mlx5_tc_ct_entry_put(entry);
1349 return 0;
1350 }
1351
1352 static bool
mlx5_tc_ct_filter_legacy_non_nic_flows(struct mlx5_ct_ft * ft,struct flow_cls_offload * flow)1353 mlx5_tc_ct_filter_legacy_non_nic_flows(struct mlx5_ct_ft *ft,
1354 struct flow_cls_offload *flow)
1355 {
1356 struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
1357 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1358 struct flow_match_meta match;
1359 struct net_device *netdev;
1360 bool same_dev = false;
1361
1362 if (!is_mdev_legacy_mode(ct_priv->dev) ||
1363 !flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
1364 return true;
1365
1366 flow_rule_match_meta(rule, &match);
1367
1368 if (!(match.key->ingress_ifindex & match.mask->ingress_ifindex))
1369 return true;
1370
1371 netdev = dev_get_by_index(&init_net, match.key->ingress_ifindex);
1372 same_dev = ct_priv->netdev == netdev;
1373 dev_put(netdev);
1374
1375 return same_dev;
1376 }
1377
1378 static int
mlx5_tc_ct_block_flow_offload(enum tc_setup_type type,void * type_data,void * cb_priv)1379 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
1380 void *cb_priv)
1381 {
1382 struct flow_cls_offload *f = type_data;
1383 struct mlx5_ct_ft *ft = cb_priv;
1384
1385 if (type != TC_SETUP_CLSFLOWER)
1386 return -EOPNOTSUPP;
1387
1388 switch (f->command) {
1389 case FLOW_CLS_REPLACE:
1390 if (!mlx5_tc_ct_filter_legacy_non_nic_flows(ft, f))
1391 return -EOPNOTSUPP;
1392
1393 return mlx5_tc_ct_block_flow_offload_add(ft, f);
1394 case FLOW_CLS_DESTROY:
1395 return mlx5_tc_ct_block_flow_offload_del(ft, f);
1396 case FLOW_CLS_STATS:
1397 return mlx5_tc_ct_block_flow_offload_stats(ft, f);
1398 default:
1399 break;
1400 }
1401
1402 return -EOPNOTSUPP;
1403 }
1404
1405 static bool
mlx5_tc_ct_skb_to_tuple(struct sk_buff * skb,struct mlx5_ct_tuple * tuple,u16 zone)1406 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
1407 u16 zone)
1408 {
1409 struct flow_keys flow_keys;
1410
1411 skb_reset_network_header(skb);
1412 skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
1413
1414 tuple->zone = zone;
1415
1416 if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1417 flow_keys.basic.ip_proto != IPPROTO_UDP &&
1418 flow_keys.basic.ip_proto != IPPROTO_GRE)
1419 return false;
1420
1421 if (flow_keys.basic.ip_proto == IPPROTO_TCP ||
1422 flow_keys.basic.ip_proto == IPPROTO_UDP) {
1423 tuple->port.src = flow_keys.ports.src;
1424 tuple->port.dst = flow_keys.ports.dst;
1425 }
1426 tuple->n_proto = flow_keys.basic.n_proto;
1427 tuple->ip_proto = flow_keys.basic.ip_proto;
1428
1429 switch (flow_keys.basic.n_proto) {
1430 case htons(ETH_P_IP):
1431 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1432 tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1433 tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1434 break;
1435
1436 case htons(ETH_P_IPV6):
1437 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1438 tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1439 tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1440 break;
1441 default:
1442 goto out;
1443 }
1444
1445 return true;
1446
1447 out:
1448 return false;
1449 }
1450
mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec * spec)1451 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1452 {
1453 u32 ctstate = 0, ctstate_mask = 0;
1454
1455 mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
1456 &ctstate, &ctstate_mask);
1457
1458 if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
1459 return -EOPNOTSUPP;
1460
1461 ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1462 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1463 ctstate, ctstate_mask);
1464
1465 return 0;
1466 }
1467
mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv * priv,struct mlx5_ct_attr * ct_attr)1468 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1469 {
1470 if (!priv || !ct_attr->ct_labels_id)
1471 return;
1472
1473 mlx5_put_label_mapping(priv, ct_attr->ct_labels_id);
1474 }
1475
1476 int
mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv * priv,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,struct mlx5_ct_attr * ct_attr,struct netlink_ext_ack * extack)1477 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1478 struct mlx5_flow_spec *spec,
1479 struct flow_cls_offload *f,
1480 struct mlx5_ct_attr *ct_attr,
1481 struct netlink_ext_ack *extack)
1482 {
1483 bool trk, est, untrk, unnew, unest, new, rpl, unrpl, rel, unrel, inv, uninv;
1484 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1485 struct flow_dissector_key_ct *mask, *key;
1486 u32 ctstate = 0, ctstate_mask = 0;
1487 u16 ct_state_on, ct_state_off;
1488 u16 ct_state, ct_state_mask;
1489 struct flow_match_ct match;
1490 u32 ct_labels[4];
1491
1492 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
1493 return 0;
1494
1495 if (!priv) {
1496 NL_SET_ERR_MSG_MOD(extack,
1497 "offload of ct matching isn't available");
1498 return -EOPNOTSUPP;
1499 }
1500
1501 flow_rule_match_ct(rule, &match);
1502
1503 key = match.key;
1504 mask = match.mask;
1505
1506 ct_state = key->ct_state;
1507 ct_state_mask = mask->ct_state;
1508
1509 if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1510 TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1511 TCA_FLOWER_KEY_CT_FLAGS_NEW |
1512 TCA_FLOWER_KEY_CT_FLAGS_REPLY |
1513 TCA_FLOWER_KEY_CT_FLAGS_RELATED |
1514 TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
1515 NL_SET_ERR_MSG_MOD(extack,
1516 "only ct_state trk, est, new and rpl are supported for offload");
1517 return -EOPNOTSUPP;
1518 }
1519
1520 ct_state_on = ct_state & ct_state_mask;
1521 ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1522 trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1523 new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1524 est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1525 rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1526 rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1527 inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1528 untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1529 unnew = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1530 unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1531 unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1532 unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1533 uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1534
1535 ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1536 ctstate |= new ? MLX5_CT_STATE_NEW_BIT : 0;
1537 ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1538 ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0;
1539 ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1540 ctstate_mask |= (unnew || new) ? MLX5_CT_STATE_NEW_BIT : 0;
1541 ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1542 ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
1543 ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0;
1544 ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0;
1545
1546 if (rel) {
1547 NL_SET_ERR_MSG_MOD(extack,
1548 "matching on ct_state +rel isn't supported");
1549 return -EOPNOTSUPP;
1550 }
1551
1552 if (inv) {
1553 NL_SET_ERR_MSG_MOD(extack,
1554 "matching on ct_state +inv isn't supported");
1555 return -EOPNOTSUPP;
1556 }
1557
1558 if (mask->ct_zone)
1559 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1560 key->ct_zone, MLX5_CT_ZONE_MASK);
1561 if (ctstate_mask)
1562 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1563 ctstate, ctstate_mask);
1564 if (mask->ct_mark)
1565 mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1566 key->ct_mark, mask->ct_mark);
1567 if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1568 mask->ct_labels[3]) {
1569 ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1570 ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1571 ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1572 ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1573 if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id))
1574 return -EOPNOTSUPP;
1575 mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1576 MLX5_CT_LABELS_MASK);
1577 }
1578
1579 return 0;
1580 }
1581
1582 int
mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv * priv,struct mlx5_flow_attr * attr,const struct flow_action_entry * act,struct netlink_ext_ack * extack)1583 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1584 struct mlx5_flow_attr *attr,
1585 const struct flow_action_entry *act,
1586 struct netlink_ext_ack *extack)
1587 {
1588 if (!priv) {
1589 NL_SET_ERR_MSG_MOD(extack,
1590 "offload of ct action isn't available");
1591 return -EOPNOTSUPP;
1592 }
1593
1594 attr->ct_attr.ct_action |= act->ct.action; /* So we can have clear + ct */
1595 attr->ct_attr.zone = act->ct.zone;
1596 if (!(act->ct.action & TCA_CT_ACT_CLEAR))
1597 attr->ct_attr.nf_ft = act->ct.flow_table;
1598 attr->ct_attr.act_miss_cookie = act->miss_cookie;
1599
1600 return 0;
1601 }
1602
tc_ct_pre_ct_add_rules(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct,bool nat)1603 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1604 struct mlx5_tc_ct_pre *pre_ct,
1605 bool nat)
1606 {
1607 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1608 struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1609 struct mlx5_core_dev *dev = ct_priv->dev;
1610 struct mlx5_flow_table *ft = pre_ct->ft;
1611 struct mlx5_flow_destination dest = {};
1612 struct mlx5_flow_act flow_act = {};
1613 struct mlx5_modify_hdr *mod_hdr;
1614 struct mlx5_flow_handle *rule;
1615 struct mlx5_flow_spec *spec;
1616 u32 ctstate;
1617 u16 zone;
1618 int err;
1619
1620 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1621 if (!spec)
1622 return -ENOMEM;
1623
1624 zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1625 err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
1626 ZONE_TO_REG, zone);
1627 if (err) {
1628 ct_dbg("Failed to set zone register mapping");
1629 goto err_mapping;
1630 }
1631
1632 mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
1633 pre_mod_acts.num_actions,
1634 pre_mod_acts.actions);
1635
1636 if (IS_ERR(mod_hdr)) {
1637 err = PTR_ERR(mod_hdr);
1638 ct_dbg("Failed to create pre ct mod hdr");
1639 goto err_mapping;
1640 }
1641 pre_ct->modify_hdr = mod_hdr;
1642
1643 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1644 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1645 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1646 flow_act.modify_hdr = mod_hdr;
1647 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1648
1649 /* add flow rule */
1650 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1651 zone, MLX5_CT_ZONE_MASK);
1652 ctstate = MLX5_CT_STATE_TRK_BIT;
1653 if (nat)
1654 ctstate |= MLX5_CT_STATE_NAT_BIT;
1655 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1656
1657 dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
1658 rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1659 if (IS_ERR(rule)) {
1660 err = PTR_ERR(rule);
1661 ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1662 goto err_flow_rule;
1663 }
1664 pre_ct->flow_rule = rule;
1665
1666 /* add miss rule */
1667 dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1668 rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1);
1669 if (IS_ERR(rule)) {
1670 err = PTR_ERR(rule);
1671 ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1672 goto err_miss_rule;
1673 }
1674 pre_ct->miss_rule = rule;
1675
1676 mlx5e_mod_hdr_dealloc(&pre_mod_acts);
1677 kvfree(spec);
1678 return 0;
1679
1680 err_miss_rule:
1681 mlx5_del_flow_rules(pre_ct->flow_rule);
1682 err_flow_rule:
1683 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1684 err_mapping:
1685 mlx5e_mod_hdr_dealloc(&pre_mod_acts);
1686 kvfree(spec);
1687 return err;
1688 }
1689
1690 static void
tc_ct_pre_ct_del_rules(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct)1691 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1692 struct mlx5_tc_ct_pre *pre_ct)
1693 {
1694 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1695 struct mlx5_core_dev *dev = ct_priv->dev;
1696
1697 mlx5_del_flow_rules(pre_ct->flow_rule);
1698 mlx5_del_flow_rules(pre_ct->miss_rule);
1699 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1700 }
1701
1702 static int
mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct,bool nat)1703 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1704 struct mlx5_tc_ct_pre *pre_ct,
1705 bool nat)
1706 {
1707 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1708 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1709 struct mlx5_core_dev *dev = ct_priv->dev;
1710 struct mlx5_flow_table_attr ft_attr = {};
1711 struct mlx5_flow_namespace *ns;
1712 struct mlx5_flow_table *ft;
1713 struct mlx5_flow_group *g;
1714 u32 metadata_reg_c_2_mask;
1715 u32 *flow_group_in;
1716 void *misc;
1717 int err;
1718
1719 ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
1720 if (!ns) {
1721 err = -EOPNOTSUPP;
1722 ct_dbg("Failed to get flow namespace");
1723 return err;
1724 }
1725
1726 flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1727 if (!flow_group_in)
1728 return -ENOMEM;
1729
1730 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1731 ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ?
1732 FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1733 ft_attr.max_fte = 2;
1734 ft_attr.level = 1;
1735 ft = mlx5_create_flow_table(ns, &ft_attr);
1736 if (IS_ERR(ft)) {
1737 err = PTR_ERR(ft);
1738 ct_dbg("Failed to create pre ct table");
1739 goto out_free;
1740 }
1741 pre_ct->ft = ft;
1742
1743 /* create flow group */
1744 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1745 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1746 MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1747 MLX5_MATCH_MISC_PARAMETERS_2);
1748
1749 misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1750 match_criteria.misc_parameters_2);
1751
1752 metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1753 metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1754 if (nat)
1755 metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1756
1757 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1758 metadata_reg_c_2_mask);
1759
1760 g = mlx5_create_flow_group(ft, flow_group_in);
1761 if (IS_ERR(g)) {
1762 err = PTR_ERR(g);
1763 ct_dbg("Failed to create pre ct group");
1764 goto err_flow_grp;
1765 }
1766 pre_ct->flow_grp = g;
1767
1768 /* create miss group */
1769 memset(flow_group_in, 0, inlen);
1770 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1771 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1772 g = mlx5_create_flow_group(ft, flow_group_in);
1773 if (IS_ERR(g)) {
1774 err = PTR_ERR(g);
1775 ct_dbg("Failed to create pre ct miss group");
1776 goto err_miss_grp;
1777 }
1778 pre_ct->miss_grp = g;
1779
1780 err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1781 if (err)
1782 goto err_add_rules;
1783
1784 kvfree(flow_group_in);
1785 return 0;
1786
1787 err_add_rules:
1788 mlx5_destroy_flow_group(pre_ct->miss_grp);
1789 err_miss_grp:
1790 mlx5_destroy_flow_group(pre_ct->flow_grp);
1791 err_flow_grp:
1792 mlx5_destroy_flow_table(ft);
1793 out_free:
1794 kvfree(flow_group_in);
1795 return err;
1796 }
1797
1798 static void
mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct)1799 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1800 struct mlx5_tc_ct_pre *pre_ct)
1801 {
1802 tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1803 mlx5_destroy_flow_group(pre_ct->miss_grp);
1804 mlx5_destroy_flow_group(pre_ct->flow_grp);
1805 mlx5_destroy_flow_table(pre_ct->ft);
1806 }
1807
1808 static int
mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft * ft)1809 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1810 {
1811 int err;
1812
1813 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1814 if (err)
1815 return err;
1816
1817 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1818 if (err)
1819 goto err_pre_ct_nat;
1820
1821 return 0;
1822
1823 err_pre_ct_nat:
1824 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1825 return err;
1826 }
1827
1828 static void
mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft * ft)1829 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1830 {
1831 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1832 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1833 }
1834
1835 /* To avoid false lock dependency warning set the ct_entries_ht lock
1836 * class different than the lock class of the ht being used when deleting
1837 * last flow from a group and then deleting a group, we get into del_sw_flow_group()
1838 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
1839 * it's different than the ht->mutex here.
1840 */
1841 static struct lock_class_key ct_entries_ht_lock_key;
1842
1843 static struct mlx5_ct_ft *
mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv * ct_priv,u16 zone,struct nf_flowtable * nf_ft)1844 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1845 struct nf_flowtable *nf_ft)
1846 {
1847 struct mlx5_ct_ft *ft;
1848 int err;
1849
1850 ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1851 if (ft) {
1852 refcount_inc(&ft->refcount);
1853 return ft;
1854 }
1855
1856 ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1857 if (!ft)
1858 return ERR_PTR(-ENOMEM);
1859
1860 err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1861 if (err)
1862 goto err_mapping;
1863
1864 ft->zone = zone;
1865 ft->nf_ft = nf_ft;
1866 ft->ct_priv = ct_priv;
1867 refcount_set(&ft->refcount, 1);
1868
1869 err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1870 if (err)
1871 goto err_alloc_pre_ct;
1872
1873 err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1874 if (err)
1875 goto err_init;
1876
1877 lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key);
1878
1879 err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1880 zone_params);
1881 if (err)
1882 goto err_insert;
1883
1884 err = nf_flow_table_offload_add_cb(ft->nf_ft,
1885 mlx5_tc_ct_block_flow_offload, ft);
1886 if (err)
1887 goto err_add_cb;
1888
1889 return ft;
1890
1891 err_add_cb:
1892 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1893 err_insert:
1894 rhashtable_destroy(&ft->ct_entries_ht);
1895 err_init:
1896 mlx5_tc_ct_free_pre_ct_tables(ft);
1897 err_alloc_pre_ct:
1898 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1899 err_mapping:
1900 kfree(ft);
1901 return ERR_PTR(err);
1902 }
1903
1904 static void
mlx5_tc_ct_flush_ft_entry(void * ptr,void * arg)1905 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1906 {
1907 struct mlx5_ct_entry *entry = ptr;
1908
1909 mlx5_tc_ct_entry_put(entry);
1910 }
1911
1912 static void
mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_ft * ft)1913 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1914 {
1915 if (!refcount_dec_and_test(&ft->refcount))
1916 return;
1917
1918 flush_workqueue(ct_priv->wq);
1919 nf_flow_table_offload_del_cb(ft->nf_ft,
1920 mlx5_tc_ct_block_flow_offload, ft);
1921 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1922 rhashtable_free_and_destroy(&ft->ct_entries_ht,
1923 mlx5_tc_ct_flush_ft_entry,
1924 ct_priv);
1925 mlx5_tc_ct_free_pre_ct_tables(ft);
1926 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1927 kfree(ft);
1928 }
1929
1930 /* We translate the tc filter with CT action to the following HW model:
1931 *
1932 * +-----------------------+
1933 * + rule (either original +
1934 * + or post_act rule) +
1935 * +-----------------------+
1936 * | set act_miss_cookie mapping
1937 * | set fte_id
1938 * | set tunnel_id
1939 * | rest of actions before the CT action (for this orig/post_act rule)
1940 * |
1941 * +-------------+
1942 * | Chain 0 |
1943 * | optimization|
1944 * | v
1945 * | +---------------------+
1946 * | + pre_ct/pre_ct_nat + if matches +----------------------+
1947 * | + zone+nat match +---------------->+ post_act (see below) +
1948 * | +---------------------+ set zone +----------------------+
1949 * | |
1950 * +-------------+ set zone
1951 * |
1952 * v
1953 * +--------------------+
1954 * + CT (nat or no nat) +
1955 * + tuple + zone match +
1956 * +--------------------+
1957 * | set mark
1958 * | set labels_id
1959 * | set established
1960 * | set zone_restore
1961 * | do nat (if needed)
1962 * v
1963 * +--------------+
1964 * + post_act + rest of parsed filter's actions
1965 * + fte_id match +------------------------>
1966 * +--------------+
1967 *
1968 */
1969 static int
__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_attr * attr)1970 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1971 struct mlx5_flow_attr *attr)
1972 {
1973 bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1974 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1975 int act_miss_mapping = 0, err;
1976 struct mlx5_ct_ft *ft;
1977 u16 zone;
1978
1979 /* Register for CT established events */
1980 ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1981 attr->ct_attr.nf_ft);
1982 if (IS_ERR(ft)) {
1983 err = PTR_ERR(ft);
1984 ct_dbg("Failed to register to ft callback");
1985 goto err_ft;
1986 }
1987 attr->ct_attr.ft = ft;
1988
1989 err = mlx5e_tc_action_miss_mapping_get(ct_priv->priv, attr, attr->ct_attr.act_miss_cookie,
1990 &act_miss_mapping);
1991 if (err) {
1992 ct_dbg("Failed to get register mapping for act miss");
1993 goto err_get_act_miss;
1994 }
1995
1996 err = mlx5e_tc_match_to_reg_set(priv->mdev, &attr->parse_attr->mod_hdr_acts,
1997 ct_priv->ns_type, MAPPED_OBJ_TO_REG, act_miss_mapping);
1998 if (err) {
1999 ct_dbg("Failed to set act miss register mapping");
2000 goto err_mapping;
2001 }
2002
2003 /* Chain 0 sets the zone and jumps to ct table
2004 * Other chains jump to pre_ct table to align with act_ct cached logic
2005 */
2006 if (!attr->chain) {
2007 zone = ft->zone & MLX5_CT_ZONE_MASK;
2008 err = mlx5e_tc_match_to_reg_set(priv->mdev, &attr->parse_attr->mod_hdr_acts,
2009 ct_priv->ns_type, ZONE_TO_REG, zone);
2010 if (err) {
2011 ct_dbg("Failed to set zone register mapping");
2012 goto err_mapping;
2013 }
2014
2015 attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct;
2016 } else {
2017 attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
2018 }
2019
2020 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2021 attr->ct_attr.act_miss_mapping = act_miss_mapping;
2022
2023 return 0;
2024
2025 err_mapping:
2026 mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, act_miss_mapping);
2027 err_get_act_miss:
2028 mlx5_tc_ct_del_ft_cb(ct_priv, ft);
2029 err_ft:
2030 netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
2031 return err;
2032 }
2033
2034 int
mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv * priv,struct mlx5_flow_attr * attr)2035 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr)
2036 {
2037 int err;
2038
2039 if (!priv)
2040 return -EOPNOTSUPP;
2041
2042 if (attr->ct_attr.offloaded)
2043 return 0;
2044
2045 if (attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR) {
2046 err = mlx5_tc_ct_entry_set_registers(priv, &attr->parse_attr->mod_hdr_acts,
2047 0, 0, 0, 0);
2048 if (err)
2049 return err;
2050
2051 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2052 }
2053
2054 if (!attr->ct_attr.nf_ft) { /* means only ct clear action, and not ct_clear,ct() */
2055 attr->ct_attr.offloaded = true;
2056 return 0;
2057 }
2058
2059 mutex_lock(&priv->control_lock);
2060 err = __mlx5_tc_ct_flow_offload(priv, attr);
2061 if (!err)
2062 attr->ct_attr.offloaded = true;
2063 mutex_unlock(&priv->control_lock);
2064
2065 return err;
2066 }
2067
2068 static void
__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_attr * attr)2069 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
2070 struct mlx5_flow_attr *attr)
2071 {
2072 mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, attr->ct_attr.act_miss_mapping);
2073 mlx5_tc_ct_del_ft_cb(ct_priv, attr->ct_attr.ft);
2074 }
2075
2076 void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv * priv,struct mlx5_flow_attr * attr)2077 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
2078 struct mlx5_flow_attr *attr)
2079 {
2080 if (!attr->ct_attr.offloaded) /* no ct action, return */
2081 return;
2082 if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */
2083 return;
2084
2085 mutex_lock(&priv->control_lock);
2086 __mlx5_tc_ct_delete_flow(priv, attr);
2087 mutex_unlock(&priv->control_lock);
2088 }
2089
2090 static int
mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv * ct_priv)2091 mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv)
2092 {
2093 struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
2094 struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get();
2095 int err;
2096
2097 if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) {
2098 if (ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_HMFS) {
2099 ct_dbg("Using HMFS ct flow steering provider");
2100 fs_ops = mlx5_ct_fs_hmfs_ops_get();
2101 } else if (ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) {
2102 ct_dbg("Using SMFS ct flow steering provider");
2103 fs_ops = mlx5_ct_fs_smfs_ops_get();
2104 }
2105
2106 if (!fs_ops) {
2107 ct_dbg("Requested flow steering mode is not enabled.");
2108 return -EOPNOTSUPP;
2109 }
2110 }
2111
2112 ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL);
2113 if (!ct_priv->fs)
2114 return -ENOMEM;
2115
2116 ct_priv->fs->netdev = ct_priv->netdev;
2117 ct_priv->fs->dev = ct_priv->dev;
2118 ct_priv->fs_ops = fs_ops;
2119
2120 err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct);
2121 if (err)
2122 goto err_init;
2123
2124 return 0;
2125
2126 err_init:
2127 kfree(ct_priv->fs);
2128 return err;
2129 }
2130
2131 static int
mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch * esw,const char ** err_msg)2132 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
2133 const char **err_msg)
2134 {
2135 if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
2136 /* vlan workaround should be avoided for multi chain rules.
2137 * This is just a sanity check as pop vlan action should
2138 * be supported by any FW that supports ignore_flow_level
2139 */
2140
2141 *err_msg = "firmware vlan actions support is missing";
2142 return -EOPNOTSUPP;
2143 }
2144
2145 if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
2146 fdb_modify_header_fwd_to_table)) {
2147 /* CT always writes to registers which are mod header actions.
2148 * Therefore, mod header and goto is required
2149 */
2150
2151 *err_msg = "firmware fwd and modify support is missing";
2152 return -EOPNOTSUPP;
2153 }
2154
2155 if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2156 *err_msg = "register loopback isn't supported";
2157 return -EOPNOTSUPP;
2158 }
2159
2160 return 0;
2161 }
2162
2163 static int
mlx5_tc_ct_init_check_support(struct mlx5e_priv * priv,enum mlx5_flow_namespace_type ns_type,struct mlx5e_post_act * post_act)2164 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
2165 enum mlx5_flow_namespace_type ns_type,
2166 struct mlx5e_post_act *post_act)
2167 {
2168 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2169 const char *err_msg = NULL;
2170 int err = 0;
2171
2172 if (IS_ERR_OR_NULL(post_act)) {
2173 /* Ignore_flow_level support isn't supported by default for VFs and so post_act
2174 * won't be supported. Skip showing error msg.
2175 */
2176 if (priv->mdev->coredev_type == MLX5_COREDEV_PF)
2177 err_msg = "post action is missing";
2178 err = -EOPNOTSUPP;
2179 goto out_err;
2180 }
2181
2182 if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
2183 err = mlx5_tc_ct_init_check_esw_support(esw, &err_msg);
2184
2185 out_err:
2186 if (err && err_msg)
2187 netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n", err_msg);
2188 return err;
2189 }
2190
2191 static void
mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv * ct_priv)2192 mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
2193 {
2194 struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs;
2195
2196 ct_dbgfs->root = debugfs_create_dir("ct", mlx5_debugfs_get_dev_root(ct_priv->dev));
2197 debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root,
2198 &ct_dbgfs->stats.offloaded);
2199 debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root,
2200 &ct_dbgfs->stats.rx_dropped);
2201 }
2202
2203 static void
mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv * ct_priv)2204 mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
2205 {
2206 debugfs_remove_recursive(ct_priv->debugfs.root);
2207 }
2208
2209 static struct mlx5_flow_handle *
tc_ct_add_miss_rule(struct mlx5_flow_table * ft,struct mlx5_flow_table * next_ft)2210 tc_ct_add_miss_rule(struct mlx5_flow_table *ft,
2211 struct mlx5_flow_table *next_ft)
2212 {
2213 struct mlx5_flow_destination dest = {};
2214 struct mlx5_flow_act act = {};
2215
2216 act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND;
2217 act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2218 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
2219 dest.ft = next_ft;
2220
2221 return mlx5_add_flow_rules(ft, NULL, &act, &dest, 1);
2222 }
2223
2224 static int
tc_ct_add_ct_table_miss_rule(struct mlx5_flow_table * from,struct mlx5_flow_table * to,struct mlx5_flow_group ** miss_group,struct mlx5_flow_handle ** miss_rule)2225 tc_ct_add_ct_table_miss_rule(struct mlx5_flow_table *from,
2226 struct mlx5_flow_table *to,
2227 struct mlx5_flow_group **miss_group,
2228 struct mlx5_flow_handle **miss_rule)
2229 {
2230 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
2231 struct mlx5_flow_group *group;
2232 struct mlx5_flow_handle *rule;
2233 unsigned int max_fte = from->max_fte;
2234 u32 *flow_group_in;
2235 int err = 0;
2236
2237 flow_group_in = kvzalloc(inlen, GFP_KERNEL);
2238 if (!flow_group_in)
2239 return -ENOMEM;
2240
2241 /* create miss group */
2242 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index,
2243 max_fte - 2);
2244 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
2245 max_fte - 1);
2246 group = mlx5_create_flow_group(from, flow_group_in);
2247 if (IS_ERR(group)) {
2248 err = PTR_ERR(group);
2249 goto err_miss_grp;
2250 }
2251
2252 /* add miss rule to next fdb */
2253 rule = tc_ct_add_miss_rule(from, to);
2254 if (IS_ERR(rule)) {
2255 err = PTR_ERR(rule);
2256 goto err_miss_rule;
2257 }
2258
2259 *miss_group = group;
2260 *miss_rule = rule;
2261 kvfree(flow_group_in);
2262 return 0;
2263
2264 err_miss_rule:
2265 mlx5_destroy_flow_group(group);
2266 err_miss_grp:
2267 kvfree(flow_group_in);
2268 return err;
2269 }
2270
2271 static void
tc_ct_del_ct_table_miss_rule(struct mlx5_flow_group * miss_group,struct mlx5_flow_handle * miss_rule)2272 tc_ct_del_ct_table_miss_rule(struct mlx5_flow_group *miss_group,
2273 struct mlx5_flow_handle *miss_rule)
2274 {
2275 mlx5_del_flow_rules(miss_rule);
2276 mlx5_destroy_flow_group(miss_group);
2277 }
2278
2279 #define INIT_ERR_PREFIX "tc ct offload init failed"
2280
2281 struct mlx5_tc_ct_priv *
mlx5_tc_ct_init(struct mlx5e_priv * priv,struct mlx5_fs_chains * chains,struct mod_hdr_tbl * mod_hdr,enum mlx5_flow_namespace_type ns_type,struct mlx5e_post_act * post_act)2282 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
2283 struct mod_hdr_tbl *mod_hdr,
2284 enum mlx5_flow_namespace_type ns_type,
2285 struct mlx5e_post_act *post_act)
2286 {
2287 struct mlx5_tc_ct_priv *ct_priv;
2288 struct mlx5_core_dev *dev;
2289 u64 mapping_id;
2290 int err;
2291
2292 dev = priv->mdev;
2293 err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act);
2294 if (err)
2295 goto err_support;
2296
2297 ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
2298 if (!ct_priv)
2299 goto err_alloc;
2300
2301 mapping_id = mlx5_query_nic_system_image_guid(dev);
2302
2303 ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE,
2304 sizeof(u16), 0, true);
2305 if (IS_ERR(ct_priv->zone_mapping)) {
2306 err = PTR_ERR(ct_priv->zone_mapping);
2307 goto err_mapping_zone;
2308 }
2309
2310 ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS,
2311 sizeof(u32) * 4, 0, true);
2312 if (IS_ERR(ct_priv->labels_mapping)) {
2313 err = PTR_ERR(ct_priv->labels_mapping);
2314 goto err_mapping_labels;
2315 }
2316
2317 spin_lock_init(&ct_priv->ht_lock);
2318 ct_priv->priv = priv;
2319 ct_priv->ns_type = ns_type;
2320 ct_priv->chains = chains;
2321 ct_priv->netdev = priv->netdev;
2322 ct_priv->dev = priv->mdev;
2323 ct_priv->mod_hdr_tbl = mod_hdr;
2324 ct_priv->ct = mlx5_chains_create_global_table(chains);
2325 if (IS_ERR(ct_priv->ct)) {
2326 err = PTR_ERR(ct_priv->ct);
2327 mlx5_core_warn(dev,
2328 "%s, failed to create ct table err: %d\n",
2329 INIT_ERR_PREFIX, err);
2330 goto err_ct_tbl;
2331 }
2332
2333 ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
2334 if (IS_ERR(ct_priv->ct_nat)) {
2335 err = PTR_ERR(ct_priv->ct_nat);
2336 mlx5_core_warn(dev,
2337 "%s, failed to create ct nat table err: %d\n",
2338 INIT_ERR_PREFIX, err);
2339 goto err_ct_nat_tbl;
2340 }
2341
2342 err = tc_ct_add_ct_table_miss_rule(ct_priv->ct_nat, ct_priv->ct,
2343 &ct_priv->ct_nat_miss_group,
2344 &ct_priv->ct_nat_miss_rule);
2345 if (err)
2346 goto err_ct_zone_ht;
2347
2348 ct_priv->post_act = post_act;
2349 mutex_init(&ct_priv->control_lock);
2350 if (rhashtable_init(&ct_priv->zone_ht, &zone_params))
2351 goto err_ct_zone_ht;
2352 if (rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params))
2353 goto err_ct_tuples_ht;
2354 if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params))
2355 goto err_ct_tuples_nat_ht;
2356
2357 ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0);
2358 if (!ct_priv->wq) {
2359 err = -ENOMEM;
2360 goto err_wq;
2361 }
2362
2363 err = mlx5_tc_ct_fs_init(ct_priv);
2364 if (err)
2365 goto err_init_fs;
2366
2367 mlx5_ct_tc_create_dbgfs(ct_priv);
2368 return ct_priv;
2369
2370 err_init_fs:
2371 destroy_workqueue(ct_priv->wq);
2372 err_wq:
2373 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2374 err_ct_tuples_nat_ht:
2375 rhashtable_destroy(&ct_priv->ct_tuples_ht);
2376 err_ct_tuples_ht:
2377 rhashtable_destroy(&ct_priv->zone_ht);
2378 err_ct_zone_ht:
2379 mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2380 err_ct_nat_tbl:
2381 mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2382 err_ct_tbl:
2383 mapping_destroy(ct_priv->labels_mapping);
2384 err_mapping_labels:
2385 mapping_destroy(ct_priv->zone_mapping);
2386 err_mapping_zone:
2387 kfree(ct_priv);
2388 err_alloc:
2389 err_support:
2390
2391 return NULL;
2392 }
2393
2394 void
mlx5_tc_ct_clean(struct mlx5_tc_ct_priv * ct_priv)2395 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
2396 {
2397 struct mlx5_fs_chains *chains;
2398
2399 if (!ct_priv)
2400 return;
2401
2402 destroy_workqueue(ct_priv->wq);
2403 mlx5_ct_tc_remove_dbgfs(ct_priv);
2404 chains = ct_priv->chains;
2405
2406 ct_priv->fs_ops->destroy(ct_priv->fs);
2407 kfree(ct_priv->fs);
2408
2409 tc_ct_del_ct_table_miss_rule(ct_priv->ct_nat_miss_group, ct_priv->ct_nat_miss_rule);
2410 mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2411 mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2412 mapping_destroy(ct_priv->zone_mapping);
2413 mapping_destroy(ct_priv->labels_mapping);
2414
2415 rhashtable_destroy(&ct_priv->ct_tuples_ht);
2416 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2417 rhashtable_destroy(&ct_priv->zone_ht);
2418 mutex_destroy(&ct_priv->control_lock);
2419 kfree(ct_priv);
2420 }
2421
2422 bool
mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv * ct_priv,struct sk_buff * skb,u8 zone_restore_id)2423 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2424 struct sk_buff *skb, u8 zone_restore_id)
2425 {
2426 struct mlx5_ct_tuple tuple = {};
2427 struct mlx5_ct_entry *entry;
2428 u16 zone;
2429
2430 if (!ct_priv || !zone_restore_id)
2431 return true;
2432
2433 if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
2434 goto out_inc_drop;
2435
2436 if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
2437 goto out_inc_drop;
2438
2439 spin_lock(&ct_priv->ht_lock);
2440
2441 entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
2442 if (!entry) {
2443 spin_unlock(&ct_priv->ht_lock);
2444 goto out_inc_drop;
2445 }
2446
2447 if (IS_ERR(entry)) {
2448 spin_unlock(&ct_priv->ht_lock);
2449 goto out_inc_drop;
2450 }
2451 spin_unlock(&ct_priv->ht_lock);
2452
2453 tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
2454 __mlx5_tc_ct_entry_put(entry);
2455
2456 return true;
2457
2458 out_inc_drop:
2459 atomic_inc(&ct_priv->debugfs.stats.rx_dropped);
2460 return false;
2461 }
2462
mlx5e_tc_ct_valid_used_dissector_keys(const u64 used_keys)2463 static bool mlx5e_tc_ct_valid_used_dissector_keys(const u64 used_keys)
2464 {
2465 #define DISS_BIT(name) BIT_ULL(FLOW_DISSECTOR_KEY_ ## name)
2466 const u64 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) |
2467 DISS_BIT(META);
2468 const u64 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) |
2469 DISS_BIT(PORTS) | DISS_BIT(TCP);
2470 const u64 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) |
2471 DISS_BIT(PORTS) | DISS_BIT(TCP);
2472 const u64 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) |
2473 DISS_BIT(PORTS);
2474 const u64 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) |
2475 DISS_BIT(PORTS);
2476 const u64 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS);
2477 const u64 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS);
2478
2479 return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp ||
2480 used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre);
2481 }
2482
mlx5e_tc_ct_is_valid_flow_rule(const struct net_device * dev,struct flow_rule * flow_rule)2483 bool mlx5e_tc_ct_is_valid_flow_rule(const struct net_device *dev, struct flow_rule *flow_rule)
2484 {
2485 struct flow_match_ipv4_addrs ipv4_addrs;
2486 struct flow_match_ipv6_addrs ipv6_addrs;
2487 struct flow_match_control control;
2488 struct flow_match_basic basic;
2489 struct flow_match_ports ports;
2490 struct flow_match_tcp tcp;
2491
2492 if (!mlx5e_tc_ct_valid_used_dissector_keys(flow_rule->match.dissector->used_keys)) {
2493 netdev_dbg(dev, "ct_debug: rule uses unexpected dissectors (0x%016llx)",
2494 flow_rule->match.dissector->used_keys);
2495 return false;
2496 }
2497
2498 flow_rule_match_basic(flow_rule, &basic);
2499 flow_rule_match_control(flow_rule, &control);
2500 flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs);
2501 flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs);
2502 if (basic.key->ip_proto != IPPROTO_GRE)
2503 flow_rule_match_ports(flow_rule, &ports);
2504 if (basic.key->ip_proto == IPPROTO_TCP)
2505 flow_rule_match_tcp(flow_rule, &tcp);
2506
2507 if (basic.mask->n_proto != htons(0xFFFF) ||
2508 (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) ||
2509 basic.mask->ip_proto != 0xFF ||
2510 (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP &&
2511 basic.key->ip_proto != IPPROTO_GRE)) {
2512 netdev_dbg(dev, "ct_debug: rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)",
2513 ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto),
2514 basic.key->ip_proto, basic.mask->ip_proto);
2515 return false;
2516 }
2517
2518 if (basic.key->ip_proto != IPPROTO_GRE &&
2519 (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) {
2520 netdev_dbg(dev, "ct_debug: rule uses ports match (src 0x%04x, dst 0x%04x)",
2521 ports.mask->src, ports.mask->dst);
2522 return false;
2523 }
2524
2525 if (basic.key->ip_proto == IPPROTO_TCP && tcp.mask->flags != MLX5_CT_TCP_FLAGS_MASK) {
2526 netdev_dbg(dev, "ct_debug: rule uses unexpected tcp match (flags 0x%02x)",
2527 tcp.mask->flags);
2528 return false;
2529 }
2530
2531 return true;
2532 }
2533