1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include <net/netfilter/nf_conntrack.h> 5 #include <net/netfilter/nf_conntrack_core.h> 6 #include <net/netfilter/nf_conntrack_zones.h> 7 #include <net/netfilter/nf_conntrack_labels.h> 8 #include <net/netfilter/nf_conntrack_helper.h> 9 #include <net/netfilter/nf_conntrack_acct.h> 10 #include <uapi/linux/tc_act/tc_pedit.h> 11 #include <net/tc_act/tc_ct.h> 12 #include <net/flow_offload.h> 13 #include <net/netfilter/nf_flow_table.h> 14 #include <linux/workqueue.h> 15 #include <linux/refcount.h> 16 #include <linux/xarray.h> 17 #include <linux/if_macvlan.h> 18 #include <linux/debugfs.h> 19 20 #include "lib/fs_chains.h" 21 #include "en/tc_ct.h" 22 #include "en/tc/ct_fs.h" 23 #include "en/tc_priv.h" 24 #include "en/mod_hdr.h" 25 #include "en/mapping.h" 26 #include "en/tc/post_act.h" 27 #include "en.h" 28 #include "en_tc.h" 29 #include "en_rep.h" 30 #include "fs_core.h" 31 32 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) 33 #define MLX5_CT_STATE_TRK_BIT BIT(2) 34 #define MLX5_CT_STATE_NAT_BIT BIT(3) 35 #define MLX5_CT_STATE_REPLY_BIT BIT(4) 36 #define MLX5_CT_STATE_RELATED_BIT BIT(5) 37 #define MLX5_CT_STATE_INVALID_BIT BIT(6) 38 #define MLX5_CT_STATE_NEW_BIT BIT(7) 39 40 #define MLX5_CT_LABELS_BITS MLX5_REG_MAPPING_MBITS(LABELS_TO_REG) 41 #define MLX5_CT_LABELS_MASK MLX5_REG_MAPPING_MASK(LABELS_TO_REG) 42 43 /* Statically allocate modify actions for 44 * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10. 45 * This will be increased dynamically if needed (for the ipv6 snat + dnat). 46 */ 47 #define MLX5_CT_MIN_MOD_ACTS 10 48 49 #define ct_dbg(fmt, args...)\ 50 netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args) 51 52 struct mlx5_tc_ct_debugfs { 53 struct { 54 atomic_t offloaded; 55 atomic_t rx_dropped; 56 } stats; 57 58 struct dentry *root; 59 }; 60 61 struct mlx5_tc_ct_priv { 62 struct mlx5_core_dev *dev; 63 struct mlx5e_priv *priv; 64 const struct net_device *netdev; 65 struct mod_hdr_tbl *mod_hdr_tbl; 66 struct xarray tuple_ids; 67 struct rhashtable zone_ht; 68 struct rhashtable ct_tuples_ht; 69 struct rhashtable ct_tuples_nat_ht; 70 struct mlx5_flow_table *ct; 71 struct mlx5_flow_table *ct_nat; 72 struct mlx5_flow_group *ct_nat_miss_group; 73 struct mlx5_flow_handle *ct_nat_miss_rule; 74 struct mlx5e_post_act *post_act; 75 struct mutex control_lock; /* guards parallel adds/dels */ 76 struct mapping_ctx *zone_mapping; 77 struct mapping_ctx *labels_mapping; 78 enum mlx5_flow_namespace_type ns_type; 79 struct mlx5_fs_chains *chains; 80 struct mlx5_ct_fs *fs; 81 struct mlx5_ct_fs_ops *fs_ops; 82 spinlock_t ht_lock; /* protects ft entries */ 83 struct workqueue_struct *wq; 84 85 struct mlx5_tc_ct_debugfs debugfs; 86 }; 87 88 struct mlx5_ct_zone_rule { 89 struct mlx5_ct_fs_rule *rule; 90 struct mlx5e_mod_hdr_handle *mh; 91 struct mlx5_flow_attr *attr; 92 bool nat; 93 }; 94 95 struct mlx5_tc_ct_pre { 96 struct mlx5_flow_table *ft; 97 struct mlx5_flow_group *flow_grp; 98 struct mlx5_flow_group *miss_grp; 99 struct mlx5_flow_handle *flow_rule; 100 struct mlx5_flow_handle *miss_rule; 101 struct mlx5_modify_hdr *modify_hdr; 102 }; 103 104 struct mlx5_ct_ft { 105 struct rhash_head node; 106 u16 zone; 107 u32 zone_restore_id; 108 refcount_t refcount; 109 struct nf_flowtable *nf_ft; 110 struct mlx5_tc_ct_priv *ct_priv; 111 struct rhashtable ct_entries_ht; 112 struct mlx5_tc_ct_pre pre_ct; 113 struct mlx5_tc_ct_pre pre_ct_nat; 114 }; 115 116 struct mlx5_ct_tuple { 117 u16 addr_type; 118 __be16 n_proto; 119 u8 ip_proto; 120 struct { 121 union { 122 __be32 src_v4; 123 struct in6_addr src_v6; 124 }; 125 union { 126 __be32 dst_v4; 127 struct in6_addr dst_v6; 128 }; 129 } ip; 130 struct { 131 __be16 src; 132 __be16 dst; 133 } port; 134 135 u16 zone; 136 }; 137 138 struct mlx5_ct_counter { 139 struct mlx5_fc *counter; 140 refcount_t refcount; 141 bool is_shared; 142 }; 143 144 enum { 145 MLX5_CT_ENTRY_FLAG_VALID, 146 MLX5_CT_ENTRY_IN_CT_TABLE, 147 MLX5_CT_ENTRY_IN_CT_NAT_TABLE, 148 }; 149 150 struct mlx5_ct_entry { 151 struct rhash_head node; 152 struct rhash_head tuple_node; 153 struct rhash_head tuple_nat_node; 154 struct mlx5_ct_counter *counter; 155 unsigned long cookie; 156 unsigned long restore_cookie; 157 struct mlx5_ct_tuple tuple; 158 struct mlx5_ct_tuple tuple_nat; 159 struct mlx5_ct_zone_rule zone_rules[2]; 160 161 struct mlx5_tc_ct_priv *ct_priv; 162 struct work_struct work; 163 164 refcount_t refcnt; 165 unsigned long flags; 166 }; 167 168 static void 169 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, 170 struct mlx5_flow_attr *attr, 171 struct mlx5e_mod_hdr_handle *mh); 172 173 static const struct rhashtable_params cts_ht_params = { 174 .head_offset = offsetof(struct mlx5_ct_entry, node), 175 .key_offset = offsetof(struct mlx5_ct_entry, cookie), 176 .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie), 177 .automatic_shrinking = true, 178 .min_size = 16 * 1024, 179 }; 180 181 static const struct rhashtable_params zone_params = { 182 .head_offset = offsetof(struct mlx5_ct_ft, node), 183 .key_offset = offsetof(struct mlx5_ct_ft, zone), 184 .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone), 185 .automatic_shrinking = true, 186 }; 187 188 static const struct rhashtable_params tuples_ht_params = { 189 .head_offset = offsetof(struct mlx5_ct_entry, tuple_node), 190 .key_offset = offsetof(struct mlx5_ct_entry, tuple), 191 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple), 192 .automatic_shrinking = true, 193 .min_size = 16 * 1024, 194 }; 195 196 static const struct rhashtable_params tuples_nat_ht_params = { 197 .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node), 198 .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat), 199 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat), 200 .automatic_shrinking = true, 201 .min_size = 16 * 1024, 202 }; 203 204 static bool 205 mlx5_tc_ct_entry_in_ct_table(struct mlx5_ct_entry *entry) 206 { 207 return test_bit(MLX5_CT_ENTRY_IN_CT_TABLE, &entry->flags); 208 } 209 210 static bool 211 mlx5_tc_ct_entry_in_ct_nat_table(struct mlx5_ct_entry *entry) 212 { 213 return test_bit(MLX5_CT_ENTRY_IN_CT_NAT_TABLE, &entry->flags); 214 } 215 216 static int 217 mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv, 218 u32 *labels, u32 *id) 219 { 220 if (!memchr_inv(labels, 0, sizeof(u32) * 4)) { 221 *id = 0; 222 return 0; 223 } 224 225 if (mapping_add(ct_priv->labels_mapping, labels, id)) 226 return -EOPNOTSUPP; 227 228 return 0; 229 } 230 231 static void 232 mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id) 233 { 234 if (id) 235 mapping_remove(ct_priv->labels_mapping, id); 236 } 237 238 static int 239 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) 240 { 241 struct flow_match_control control; 242 struct flow_match_basic basic; 243 244 flow_rule_match_basic(rule, &basic); 245 flow_rule_match_control(rule, &control); 246 247 tuple->n_proto = basic.key->n_proto; 248 tuple->ip_proto = basic.key->ip_proto; 249 tuple->addr_type = control.key->addr_type; 250 251 if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 252 struct flow_match_ipv4_addrs match; 253 254 flow_rule_match_ipv4_addrs(rule, &match); 255 tuple->ip.src_v4 = match.key->src; 256 tuple->ip.dst_v4 = match.key->dst; 257 } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 258 struct flow_match_ipv6_addrs match; 259 260 flow_rule_match_ipv6_addrs(rule, &match); 261 tuple->ip.src_v6 = match.key->src; 262 tuple->ip.dst_v6 = match.key->dst; 263 } else { 264 return -EOPNOTSUPP; 265 } 266 267 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 268 struct flow_match_ports match; 269 270 flow_rule_match_ports(rule, &match); 271 switch (tuple->ip_proto) { 272 case IPPROTO_TCP: 273 case IPPROTO_UDP: 274 tuple->port.src = match.key->src; 275 tuple->port.dst = match.key->dst; 276 break; 277 default: 278 return -EOPNOTSUPP; 279 } 280 } else { 281 if (tuple->ip_proto != IPPROTO_GRE) 282 return -EOPNOTSUPP; 283 } 284 285 return 0; 286 } 287 288 static int 289 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple, 290 struct flow_rule *rule) 291 { 292 struct flow_action *flow_action = &rule->action; 293 struct flow_action_entry *act; 294 u32 offset, val, ip6_offset; 295 int i; 296 297 flow_action_for_each(i, act, flow_action) { 298 if (act->id != FLOW_ACTION_MANGLE) 299 continue; 300 301 offset = act->mangle.offset; 302 val = act->mangle.val; 303 switch (act->mangle.htype) { 304 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 305 if (offset == offsetof(struct iphdr, saddr)) 306 tuple->ip.src_v4 = cpu_to_be32(val); 307 else if (offset == offsetof(struct iphdr, daddr)) 308 tuple->ip.dst_v4 = cpu_to_be32(val); 309 else 310 return -EOPNOTSUPP; 311 break; 312 313 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 314 ip6_offset = (offset - offsetof(struct ipv6hdr, saddr)); 315 ip6_offset /= 4; 316 if (ip6_offset < 4) 317 tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val); 318 else if (ip6_offset < 8) 319 tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val); 320 else 321 return -EOPNOTSUPP; 322 break; 323 324 case FLOW_ACT_MANGLE_HDR_TYPE_TCP: 325 if (offset == offsetof(struct tcphdr, source)) 326 tuple->port.src = cpu_to_be16(val); 327 else if (offset == offsetof(struct tcphdr, dest)) 328 tuple->port.dst = cpu_to_be16(val); 329 else 330 return -EOPNOTSUPP; 331 break; 332 333 case FLOW_ACT_MANGLE_HDR_TYPE_UDP: 334 if (offset == offsetof(struct udphdr, source)) 335 tuple->port.src = cpu_to_be16(val); 336 else if (offset == offsetof(struct udphdr, dest)) 337 tuple->port.dst = cpu_to_be16(val); 338 else 339 return -EOPNOTSUPP; 340 break; 341 342 default: 343 return -EOPNOTSUPP; 344 } 345 } 346 347 return 0; 348 } 349 350 static int 351 mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv, 352 struct net_device *ndev) 353 { 354 struct mlx5e_priv *other_priv = netdev_priv(ndev); 355 struct mlx5_core_dev *mdev = ct_priv->dev; 356 bool vf_rep, uplink_rep; 357 358 vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev); 359 uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev); 360 361 if (vf_rep) 362 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; 363 if (uplink_rep) 364 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; 365 if (is_vlan_dev(ndev)) 366 return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev)); 367 if (netif_is_macvlan(ndev)) 368 return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev)); 369 if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev)) 370 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; 371 372 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT; 373 } 374 375 static int 376 mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv, 377 struct mlx5_flow_spec *spec, 378 struct flow_rule *rule) 379 { 380 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 381 outer_headers); 382 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 383 outer_headers); 384 u16 addr_type = 0; 385 u8 ip_proto = 0; 386 387 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { 388 struct flow_match_basic match; 389 390 flow_rule_match_basic(rule, &match); 391 392 mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v); 393 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 394 match.mask->ip_proto); 395 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 396 match.key->ip_proto); 397 398 ip_proto = match.key->ip_proto; 399 } 400 401 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { 402 struct flow_match_control match; 403 404 flow_rule_match_control(rule, &match); 405 addr_type = match.key->addr_type; 406 } 407 408 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 409 struct flow_match_ipv4_addrs match; 410 411 flow_rule_match_ipv4_addrs(rule, &match); 412 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 413 src_ipv4_src_ipv6.ipv4_layout.ipv4), 414 &match.mask->src, sizeof(match.mask->src)); 415 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 416 src_ipv4_src_ipv6.ipv4_layout.ipv4), 417 &match.key->src, sizeof(match.key->src)); 418 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 419 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 420 &match.mask->dst, sizeof(match.mask->dst)); 421 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 422 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 423 &match.key->dst, sizeof(match.key->dst)); 424 } 425 426 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 427 struct flow_match_ipv6_addrs match; 428 429 flow_rule_match_ipv6_addrs(rule, &match); 430 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 431 src_ipv4_src_ipv6.ipv6_layout.ipv6), 432 &match.mask->src, sizeof(match.mask->src)); 433 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 434 src_ipv4_src_ipv6.ipv6_layout.ipv6), 435 &match.key->src, sizeof(match.key->src)); 436 437 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 438 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 439 &match.mask->dst, sizeof(match.mask->dst)); 440 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 441 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 442 &match.key->dst, sizeof(match.key->dst)); 443 } 444 445 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 446 struct flow_match_ports match; 447 448 flow_rule_match_ports(rule, &match); 449 switch (ip_proto) { 450 case IPPROTO_TCP: 451 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 452 tcp_sport, ntohs(match.mask->src)); 453 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 454 tcp_sport, ntohs(match.key->src)); 455 456 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 457 tcp_dport, ntohs(match.mask->dst)); 458 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 459 tcp_dport, ntohs(match.key->dst)); 460 break; 461 462 case IPPROTO_UDP: 463 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 464 udp_sport, ntohs(match.mask->src)); 465 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 466 udp_sport, ntohs(match.key->src)); 467 468 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 469 udp_dport, ntohs(match.mask->dst)); 470 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 471 udp_dport, ntohs(match.key->dst)); 472 break; 473 default: 474 break; 475 } 476 } 477 478 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { 479 struct flow_match_tcp match; 480 481 flow_rule_match_tcp(rule, &match); 482 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, 483 ntohs(match.mask->flags)); 484 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, 485 ntohs(match.key->flags)); 486 } 487 488 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) { 489 struct flow_match_meta match; 490 491 flow_rule_match_meta(rule, &match); 492 493 if (match.key->ingress_ifindex & match.mask->ingress_ifindex) { 494 struct net_device *dev; 495 496 dev = dev_get_by_index(&init_net, match.key->ingress_ifindex); 497 if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source)) 498 spec->flow_context.flow_source = 499 mlx5_tc_ct_get_flow_source_match(ct_priv, dev); 500 501 dev_put(dev); 502 } 503 } 504 505 return 0; 506 } 507 508 static void 509 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) 510 { 511 if (entry->counter->is_shared && 512 !refcount_dec_and_test(&entry->counter->refcount)) 513 return; 514 515 mlx5_fc_destroy(ct_priv->dev, entry->counter->counter); 516 kfree(entry->counter); 517 } 518 519 static void 520 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, 521 struct mlx5_ct_entry *entry, 522 bool nat) 523 { 524 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; 525 struct mlx5_flow_attr *attr = zone_rule->attr; 526 527 ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone); 528 529 ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule); 530 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh); 531 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); 532 kfree(attr); 533 } 534 535 static void 536 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv, 537 struct mlx5_ct_entry *entry) 538 { 539 if (mlx5_tc_ct_entry_in_ct_nat_table(entry)) 540 mlx5_tc_ct_entry_del_rule(ct_priv, entry, true); 541 if (mlx5_tc_ct_entry_in_ct_table(entry)) 542 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); 543 544 atomic_dec(&ct_priv->debugfs.stats.offloaded); 545 } 546 547 static struct flow_action_entry * 548 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule) 549 { 550 struct flow_action *flow_action = &flow_rule->action; 551 struct flow_action_entry *act; 552 int i; 553 554 flow_action_for_each(i, act, flow_action) { 555 if (act->id == FLOW_ACTION_CT_METADATA) 556 return act; 557 } 558 559 return NULL; 560 } 561 562 static int 563 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, 564 struct mlx5e_tc_mod_hdr_acts *mod_acts, 565 u8 ct_state, 566 u32 mark, 567 u32 labels_id, 568 u8 zone_restore_id) 569 { 570 enum mlx5_flow_namespace_type ns = ct_priv->ns_type; 571 struct mlx5_core_dev *dev = ct_priv->dev; 572 int err; 573 574 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 575 CTSTATE_TO_REG, ct_state); 576 if (err) 577 return err; 578 579 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 580 MARK_TO_REG, mark); 581 if (err) 582 return err; 583 584 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 585 LABELS_TO_REG, labels_id); 586 if (err) 587 return err; 588 589 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 590 ZONE_RESTORE_TO_REG, zone_restore_id); 591 if (err) 592 return err; 593 594 /* Make another copy of zone id in reg_b for 595 * NIC rx flows since we don't copy reg_c1 to 596 * reg_b upon miss. 597 */ 598 if (ns != MLX5_FLOW_NAMESPACE_FDB) { 599 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 600 NIC_ZONE_RESTORE_TO_REG, zone_restore_id); 601 if (err) 602 return err; 603 } 604 return 0; 605 } 606 607 static int 608 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, 609 char *modact) 610 { 611 u32 offset = act->mangle.offset, field; 612 613 switch (act->mangle.htype) { 614 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 615 MLX5_SET(set_action_in, modact, length, 0); 616 if (offset == offsetof(struct iphdr, saddr)) 617 field = MLX5_ACTION_IN_FIELD_OUT_SIPV4; 618 else if (offset == offsetof(struct iphdr, daddr)) 619 field = MLX5_ACTION_IN_FIELD_OUT_DIPV4; 620 else 621 return -EOPNOTSUPP; 622 break; 623 624 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 625 MLX5_SET(set_action_in, modact, length, 0); 626 if (offset == offsetof(struct ipv6hdr, saddr) + 12) 627 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0; 628 else if (offset == offsetof(struct ipv6hdr, saddr) + 8) 629 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32; 630 else if (offset == offsetof(struct ipv6hdr, saddr) + 4) 631 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64; 632 else if (offset == offsetof(struct ipv6hdr, saddr)) 633 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96; 634 else if (offset == offsetof(struct ipv6hdr, daddr) + 12) 635 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0; 636 else if (offset == offsetof(struct ipv6hdr, daddr) + 8) 637 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32; 638 else if (offset == offsetof(struct ipv6hdr, daddr) + 4) 639 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64; 640 else if (offset == offsetof(struct ipv6hdr, daddr)) 641 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96; 642 else 643 return -EOPNOTSUPP; 644 break; 645 646 case FLOW_ACT_MANGLE_HDR_TYPE_TCP: 647 MLX5_SET(set_action_in, modact, length, 16); 648 if (offset == offsetof(struct tcphdr, source)) 649 field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT; 650 else if (offset == offsetof(struct tcphdr, dest)) 651 field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT; 652 else 653 return -EOPNOTSUPP; 654 break; 655 656 case FLOW_ACT_MANGLE_HDR_TYPE_UDP: 657 MLX5_SET(set_action_in, modact, length, 16); 658 if (offset == offsetof(struct udphdr, source)) 659 field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT; 660 else if (offset == offsetof(struct udphdr, dest)) 661 field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT; 662 else 663 return -EOPNOTSUPP; 664 break; 665 666 default: 667 return -EOPNOTSUPP; 668 } 669 670 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); 671 MLX5_SET(set_action_in, modact, offset, 0); 672 MLX5_SET(set_action_in, modact, field, field); 673 MLX5_SET(set_action_in, modact, data, act->mangle.val); 674 675 return 0; 676 } 677 678 static int 679 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, 680 struct flow_rule *flow_rule, 681 struct mlx5e_tc_mod_hdr_acts *mod_acts) 682 { 683 struct flow_action *flow_action = &flow_rule->action; 684 struct mlx5_core_dev *mdev = ct_priv->dev; 685 struct flow_action_entry *act; 686 char *modact; 687 int err, i; 688 689 flow_action_for_each(i, act, flow_action) { 690 switch (act->id) { 691 case FLOW_ACTION_MANGLE: { 692 modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts); 693 if (IS_ERR(modact)) 694 return PTR_ERR(modact); 695 696 err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact); 697 if (err) 698 return err; 699 700 mod_acts->num_actions++; 701 } 702 break; 703 704 case FLOW_ACTION_CT_METADATA: 705 /* Handled earlier */ 706 continue; 707 default: 708 return -EOPNOTSUPP; 709 } 710 } 711 712 return 0; 713 } 714 715 static int 716 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, 717 struct mlx5_flow_attr *attr, 718 struct flow_rule *flow_rule, 719 struct mlx5e_mod_hdr_handle **mh, 720 u8 zone_restore_id, bool nat_table, bool has_nat) 721 { 722 DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS); 723 DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr); 724 struct flow_action_entry *meta; 725 enum ip_conntrack_info ctinfo; 726 u16 ct_state = 0; 727 int err; 728 729 meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); 730 if (!meta) 731 return -EOPNOTSUPP; 732 ctinfo = meta->ct_metadata.cookie & NFCT_INFOMASK; 733 734 err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels, 735 &attr->ct_attr.ct_labels_id); 736 if (err) 737 return -EOPNOTSUPP; 738 if (nat_table) { 739 if (has_nat) { 740 err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts); 741 if (err) 742 goto err_mapping; 743 } 744 745 ct_state |= MLX5_CT_STATE_NAT_BIT; 746 } 747 748 ct_state |= MLX5_CT_STATE_TRK_BIT; 749 ct_state |= ctinfo == IP_CT_NEW ? MLX5_CT_STATE_NEW_BIT : MLX5_CT_STATE_ESTABLISHED_BIT; 750 ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT; 751 err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, 752 ct_state, 753 meta->ct_metadata.mark, 754 attr->ct_attr.ct_labels_id, 755 zone_restore_id); 756 if (err) 757 goto err_mapping; 758 759 if (nat_table && has_nat) { 760 attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type, 761 mod_acts.num_actions, 762 mod_acts.actions); 763 if (IS_ERR(attr->modify_hdr)) { 764 err = PTR_ERR(attr->modify_hdr); 765 goto err_mapping; 766 } 767 768 *mh = NULL; 769 } else { 770 *mh = mlx5e_mod_hdr_attach(ct_priv->dev, 771 ct_priv->mod_hdr_tbl, 772 ct_priv->ns_type, 773 &mod_acts); 774 if (IS_ERR(*mh)) { 775 err = PTR_ERR(*mh); 776 goto err_mapping; 777 } 778 attr->modify_hdr = mlx5e_mod_hdr_get(*mh); 779 } 780 781 mlx5e_mod_hdr_dealloc(&mod_acts); 782 return 0; 783 784 err_mapping: 785 mlx5e_mod_hdr_dealloc(&mod_acts); 786 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); 787 return err; 788 } 789 790 static void 791 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, 792 struct mlx5_flow_attr *attr, 793 struct mlx5e_mod_hdr_handle *mh) 794 { 795 if (mh) 796 mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh); 797 else 798 mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr); 799 } 800 801 static int 802 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, 803 struct flow_rule *flow_rule, 804 struct mlx5_ct_entry *entry, 805 bool nat, u8 zone_restore_id) 806 { 807 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; 808 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); 809 struct mlx5_flow_spec *spec = NULL; 810 struct mlx5_flow_attr *attr; 811 int err; 812 813 zone_rule->nat = nat; 814 815 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 816 if (!spec) 817 return -ENOMEM; 818 819 attr = mlx5_alloc_flow_attr(ct_priv->ns_type); 820 if (!attr) { 821 err = -ENOMEM; 822 goto err_attr; 823 } 824 825 err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, 826 &zone_rule->mh, 827 zone_restore_id, 828 nat, 829 mlx5_tc_ct_entry_in_ct_nat_table(entry)); 830 if (err) { 831 ct_dbg("Failed to create ct entry mod hdr"); 832 goto err_mod_hdr; 833 } 834 835 attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | 836 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 837 MLX5_FLOW_CONTEXT_ACTION_COUNT; 838 attr->dest_chain = 0; 839 attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); 840 attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; 841 if (entry->tuple.ip_proto == IPPROTO_TCP || 842 entry->tuple.ip_proto == IPPROTO_UDP) 843 attr->outer_match_level = MLX5_MATCH_L4; 844 else 845 attr->outer_match_level = MLX5_MATCH_L3; 846 attr->counter = entry->counter->counter; 847 attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; 848 if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) 849 attr->esw_attr->in_mdev = priv->mdev; 850 851 mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule); 852 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK); 853 854 zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule); 855 if (IS_ERR(zone_rule->rule)) { 856 err = PTR_ERR(zone_rule->rule); 857 ct_dbg("Failed to add ct entry rule, nat: %d", nat); 858 goto err_rule; 859 } 860 861 zone_rule->attr = attr; 862 863 kvfree(spec); 864 ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone); 865 866 return 0; 867 868 err_rule: 869 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, attr, zone_rule->mh); 870 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); 871 err_mod_hdr: 872 kfree(attr); 873 err_attr: 874 kvfree(spec); 875 return err; 876 } 877 878 static int 879 mlx5_tc_ct_entry_update_rule(struct mlx5_tc_ct_priv *ct_priv, 880 struct flow_rule *flow_rule, 881 struct mlx5_ct_entry *entry, 882 bool nat, u8 zone_restore_id) 883 { 884 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; 885 struct mlx5_flow_attr *attr = zone_rule->attr, *old_attr; 886 struct mlx5e_mod_hdr_handle *mh; 887 struct mlx5_flow_spec *spec; 888 int err; 889 890 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 891 if (!spec) 892 return -ENOMEM; 893 894 old_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); 895 if (!old_attr) { 896 err = -ENOMEM; 897 goto err_attr; 898 } 899 *old_attr = *attr; 900 901 err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, &mh, zone_restore_id, 902 nat, mlx5_tc_ct_entry_in_ct_nat_table(entry)); 903 if (err) { 904 ct_dbg("Failed to create ct entry mod hdr, err: %d", err); 905 goto err_mod_hdr; 906 } 907 908 mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule); 909 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK); 910 911 err = ct_priv->fs_ops->ct_rule_update(ct_priv->fs, zone_rule->rule, spec, attr); 912 if (err) { 913 ct_dbg("Failed to update ct entry rule, nat: %d, err: %d", nat, err); 914 goto err_rule; 915 } 916 917 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, old_attr, zone_rule->mh); 918 zone_rule->mh = mh; 919 mlx5_put_label_mapping(ct_priv, old_attr->ct_attr.ct_labels_id); 920 921 kfree(old_attr); 922 kvfree(spec); 923 ct_dbg("Updated ct entry rule in zone %d", entry->tuple.zone); 924 925 return 0; 926 927 err_rule: 928 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, mh); 929 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); 930 err_mod_hdr: 931 *attr = *old_attr; 932 kfree(old_attr); 933 err_attr: 934 kvfree(spec); 935 return err; 936 } 937 938 static bool 939 mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry) 940 { 941 return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); 942 } 943 944 static struct mlx5_ct_entry * 945 mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple) 946 { 947 struct mlx5_ct_entry *entry; 948 949 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple, 950 tuples_ht_params); 951 if (entry && mlx5_tc_ct_entry_valid(entry) && 952 refcount_inc_not_zero(&entry->refcnt)) { 953 return entry; 954 } else if (!entry) { 955 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht, 956 tuple, tuples_nat_ht_params); 957 if (entry && mlx5_tc_ct_entry_valid(entry) && 958 refcount_inc_not_zero(&entry->refcnt)) 959 return entry; 960 } 961 962 return entry ? ERR_PTR(-EINVAL) : NULL; 963 } 964 965 static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry) 966 { 967 struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; 968 969 if (mlx5_tc_ct_entry_in_ct_nat_table(entry)) 970 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, 971 &entry->tuple_nat_node, 972 tuples_nat_ht_params); 973 if (mlx5_tc_ct_entry_in_ct_table(entry)) 974 rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, 975 tuples_ht_params); 976 } 977 978 static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry) 979 { 980 struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; 981 982 mlx5_tc_ct_entry_del_rules(ct_priv, entry); 983 984 spin_lock_bh(&ct_priv->ht_lock); 985 mlx5_tc_ct_entry_remove_from_tuples(entry); 986 spin_unlock_bh(&ct_priv->ht_lock); 987 988 mlx5_tc_ct_counter_put(ct_priv, entry); 989 kfree(entry); 990 } 991 992 static void 993 mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) 994 { 995 if (!refcount_dec_and_test(&entry->refcnt)) 996 return; 997 998 mlx5_tc_ct_entry_del(entry); 999 } 1000 1001 static void mlx5_tc_ct_entry_del_work(struct work_struct *work) 1002 { 1003 struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work); 1004 1005 mlx5_tc_ct_entry_del(entry); 1006 } 1007 1008 static void 1009 __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) 1010 { 1011 if (!refcount_dec_and_test(&entry->refcnt)) 1012 return; 1013 1014 INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work); 1015 queue_work(entry->ct_priv->wq, &entry->work); 1016 } 1017 1018 static struct mlx5_ct_counter * 1019 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv) 1020 { 1021 struct mlx5_ct_counter *counter; 1022 int ret; 1023 1024 counter = kzalloc(sizeof(*counter), GFP_KERNEL); 1025 if (!counter) 1026 return ERR_PTR(-ENOMEM); 1027 1028 counter->is_shared = false; 1029 counter->counter = mlx5_fc_create(ct_priv->dev, true); 1030 if (IS_ERR(counter->counter)) { 1031 ct_dbg("Failed to create counter for ct entry"); 1032 ret = PTR_ERR(counter->counter); 1033 kfree(counter); 1034 return ERR_PTR(ret); 1035 } 1036 1037 return counter; 1038 } 1039 1040 static struct mlx5_ct_counter * 1041 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, 1042 struct mlx5_ct_entry *entry) 1043 { 1044 struct mlx5_ct_tuple rev_tuple = entry->tuple; 1045 struct mlx5_ct_counter *shared_counter; 1046 struct mlx5_ct_entry *rev_entry; 1047 1048 /* get the reversed tuple */ 1049 swap(rev_tuple.port.src, rev_tuple.port.dst); 1050 1051 if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 1052 __be32 tmp_addr = rev_tuple.ip.src_v4; 1053 1054 rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4; 1055 rev_tuple.ip.dst_v4 = tmp_addr; 1056 } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 1057 struct in6_addr tmp_addr = rev_tuple.ip.src_v6; 1058 1059 rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6; 1060 rev_tuple.ip.dst_v6 = tmp_addr; 1061 } else { 1062 return ERR_PTR(-EOPNOTSUPP); 1063 } 1064 1065 /* Use the same counter as the reverse direction */ 1066 spin_lock_bh(&ct_priv->ht_lock); 1067 rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple); 1068 1069 if (IS_ERR(rev_entry)) { 1070 spin_unlock_bh(&ct_priv->ht_lock); 1071 goto create_counter; 1072 } 1073 1074 if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) { 1075 ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry); 1076 shared_counter = rev_entry->counter; 1077 spin_unlock_bh(&ct_priv->ht_lock); 1078 1079 mlx5_tc_ct_entry_put(rev_entry); 1080 return shared_counter; 1081 } 1082 1083 spin_unlock_bh(&ct_priv->ht_lock); 1084 1085 create_counter: 1086 1087 shared_counter = mlx5_tc_ct_counter_create(ct_priv); 1088 if (IS_ERR(shared_counter)) 1089 return shared_counter; 1090 1091 shared_counter->is_shared = true; 1092 refcount_set(&shared_counter->refcount, 1); 1093 return shared_counter; 1094 } 1095 1096 static int 1097 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, 1098 struct flow_rule *flow_rule, 1099 struct mlx5_ct_entry *entry, 1100 u8 zone_restore_id) 1101 { 1102 int err; 1103 1104 if (nf_ct_acct_enabled(dev_net(ct_priv->netdev))) 1105 entry->counter = mlx5_tc_ct_counter_create(ct_priv); 1106 else 1107 entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry); 1108 1109 if (IS_ERR(entry->counter)) { 1110 err = PTR_ERR(entry->counter); 1111 return err; 1112 } 1113 1114 if (mlx5_tc_ct_entry_in_ct_table(entry)) { 1115 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false, 1116 zone_restore_id); 1117 if (err) 1118 goto err_orig; 1119 } 1120 1121 if (mlx5_tc_ct_entry_in_ct_nat_table(entry)) { 1122 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true, 1123 zone_restore_id); 1124 if (err) 1125 goto err_nat; 1126 } 1127 1128 atomic_inc(&ct_priv->debugfs.stats.offloaded); 1129 return 0; 1130 1131 err_nat: 1132 if (mlx5_tc_ct_entry_in_ct_table(entry)) 1133 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); 1134 err_orig: 1135 mlx5_tc_ct_counter_put(ct_priv, entry); 1136 return err; 1137 } 1138 1139 static int 1140 mlx5_tc_ct_entry_update_rules(struct mlx5_tc_ct_priv *ct_priv, 1141 struct flow_rule *flow_rule, 1142 struct mlx5_ct_entry *entry, 1143 u8 zone_restore_id) 1144 { 1145 int err = 0; 1146 1147 if (mlx5_tc_ct_entry_in_ct_table(entry)) { 1148 err = mlx5_tc_ct_entry_update_rule(ct_priv, flow_rule, entry, false, 1149 zone_restore_id); 1150 if (err) 1151 return err; 1152 } 1153 1154 if (mlx5_tc_ct_entry_in_ct_nat_table(entry)) { 1155 err = mlx5_tc_ct_entry_update_rule(ct_priv, flow_rule, entry, true, 1156 zone_restore_id); 1157 if (err && mlx5_tc_ct_entry_in_ct_table(entry)) 1158 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); 1159 } 1160 return err; 1161 } 1162 1163 static int 1164 mlx5_tc_ct_block_flow_offload_update(struct mlx5_ct_ft *ft, struct flow_rule *flow_rule, 1165 struct mlx5_ct_entry *entry, unsigned long cookie) 1166 { 1167 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 1168 int err; 1169 1170 err = mlx5_tc_ct_entry_update_rules(ct_priv, flow_rule, entry, ft->zone_restore_id); 1171 if (!err) 1172 return 0; 1173 1174 /* If failed to update the entry, then look it up again under ht_lock 1175 * protection and properly delete it. 1176 */ 1177 spin_lock_bh(&ct_priv->ht_lock); 1178 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 1179 if (entry) { 1180 rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params); 1181 spin_unlock_bh(&ct_priv->ht_lock); 1182 mlx5_tc_ct_entry_put(entry); 1183 } else { 1184 spin_unlock_bh(&ct_priv->ht_lock); 1185 } 1186 return err; 1187 } 1188 1189 static int 1190 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, 1191 struct flow_cls_offload *flow) 1192 { 1193 struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow); 1194 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 1195 struct flow_action_entry *meta_action; 1196 unsigned long cookie = flow->cookie; 1197 struct mlx5_ct_entry *entry; 1198 bool has_nat; 1199 int err; 1200 1201 meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule); 1202 if (!meta_action) 1203 return -EOPNOTSUPP; 1204 1205 spin_lock_bh(&ct_priv->ht_lock); 1206 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 1207 if (entry && refcount_inc_not_zero(&entry->refcnt)) { 1208 if (entry->restore_cookie == meta_action->ct_metadata.cookie) { 1209 spin_unlock_bh(&ct_priv->ht_lock); 1210 mlx5_tc_ct_entry_put(entry); 1211 return -EEXIST; 1212 } 1213 entry->restore_cookie = meta_action->ct_metadata.cookie; 1214 spin_unlock_bh(&ct_priv->ht_lock); 1215 1216 err = mlx5_tc_ct_block_flow_offload_update(ft, flow_rule, entry, cookie); 1217 mlx5_tc_ct_entry_put(entry); 1218 return err; 1219 } 1220 spin_unlock_bh(&ct_priv->ht_lock); 1221 1222 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1223 if (!entry) 1224 return -ENOMEM; 1225 1226 entry->tuple.zone = ft->zone; 1227 entry->cookie = flow->cookie; 1228 entry->restore_cookie = meta_action->ct_metadata.cookie; 1229 refcount_set(&entry->refcnt, 2); 1230 entry->ct_priv = ct_priv; 1231 1232 err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule); 1233 if (err) 1234 goto err_set; 1235 1236 memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple)); 1237 err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule); 1238 if (err) 1239 goto err_set; 1240 has_nat = memcmp(&entry->tuple, &entry->tuple_nat, 1241 sizeof(entry->tuple)); 1242 1243 spin_lock_bh(&ct_priv->ht_lock); 1244 1245 err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node, 1246 cts_ht_params); 1247 if (err) 1248 goto err_entries; 1249 1250 if (has_nat) { 1251 err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht, 1252 &entry->tuple_nat_node, 1253 tuples_nat_ht_params); 1254 if (err) 1255 goto err_tuple_nat; 1256 1257 set_bit(MLX5_CT_ENTRY_IN_CT_NAT_TABLE, &entry->flags); 1258 } 1259 1260 if (!mlx5_tc_ct_entry_in_ct_nat_table(entry)) { 1261 err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht, 1262 &entry->tuple_node, 1263 tuples_ht_params); 1264 if (err) 1265 goto err_tuple; 1266 1267 set_bit(MLX5_CT_ENTRY_IN_CT_TABLE, &entry->flags); 1268 } 1269 spin_unlock_bh(&ct_priv->ht_lock); 1270 1271 err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry, 1272 ft->zone_restore_id); 1273 if (err) 1274 goto err_rules; 1275 1276 set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); 1277 mlx5_tc_ct_entry_put(entry); /* this function reference */ 1278 1279 return 0; 1280 1281 err_rules: 1282 spin_lock_bh(&ct_priv->ht_lock); 1283 err_tuple: 1284 mlx5_tc_ct_entry_remove_from_tuples(entry); 1285 err_tuple_nat: 1286 rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params); 1287 err_entries: 1288 spin_unlock_bh(&ct_priv->ht_lock); 1289 err_set: 1290 kfree(entry); 1291 if (err != -EEXIST) 1292 netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err); 1293 return err; 1294 } 1295 1296 static int 1297 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, 1298 struct flow_cls_offload *flow) 1299 { 1300 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 1301 unsigned long cookie = flow->cookie; 1302 struct mlx5_ct_entry *entry; 1303 1304 spin_lock_bh(&ct_priv->ht_lock); 1305 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 1306 if (!entry) { 1307 spin_unlock_bh(&ct_priv->ht_lock); 1308 return -ENOENT; 1309 } 1310 1311 if (!mlx5_tc_ct_entry_valid(entry)) { 1312 spin_unlock_bh(&ct_priv->ht_lock); 1313 return -EINVAL; 1314 } 1315 1316 rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params); 1317 spin_unlock_bh(&ct_priv->ht_lock); 1318 1319 mlx5_tc_ct_entry_put(entry); 1320 1321 return 0; 1322 } 1323 1324 static int 1325 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, 1326 struct flow_cls_offload *f) 1327 { 1328 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 1329 unsigned long cookie = f->cookie; 1330 struct mlx5_ct_entry *entry; 1331 u64 lastuse, packets, bytes; 1332 1333 spin_lock_bh(&ct_priv->ht_lock); 1334 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 1335 if (!entry) { 1336 spin_unlock_bh(&ct_priv->ht_lock); 1337 return -ENOENT; 1338 } 1339 1340 if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) { 1341 spin_unlock_bh(&ct_priv->ht_lock); 1342 return -EINVAL; 1343 } 1344 1345 spin_unlock_bh(&ct_priv->ht_lock); 1346 1347 mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse); 1348 flow_stats_update(&f->stats, bytes, packets, 0, lastuse, 1349 FLOW_ACTION_HW_STATS_DELAYED); 1350 1351 mlx5_tc_ct_entry_put(entry); 1352 return 0; 1353 } 1354 1355 static bool 1356 mlx5_tc_ct_filter_legacy_non_nic_flows(struct mlx5_ct_ft *ft, 1357 struct flow_cls_offload *flow) 1358 { 1359 struct flow_rule *rule = flow_cls_offload_flow_rule(flow); 1360 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 1361 struct flow_match_meta match; 1362 struct net_device *netdev; 1363 bool same_dev = false; 1364 1365 if (!is_mdev_legacy_mode(ct_priv->dev) || 1366 !flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) 1367 return true; 1368 1369 flow_rule_match_meta(rule, &match); 1370 1371 if (!(match.key->ingress_ifindex & match.mask->ingress_ifindex)) 1372 return true; 1373 1374 netdev = dev_get_by_index(&init_net, match.key->ingress_ifindex); 1375 same_dev = ct_priv->netdev == netdev; 1376 dev_put(netdev); 1377 1378 return same_dev; 1379 } 1380 1381 static int 1382 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data, 1383 void *cb_priv) 1384 { 1385 struct flow_cls_offload *f = type_data; 1386 struct mlx5_ct_ft *ft = cb_priv; 1387 1388 if (type != TC_SETUP_CLSFLOWER) 1389 return -EOPNOTSUPP; 1390 1391 switch (f->command) { 1392 case FLOW_CLS_REPLACE: 1393 if (!mlx5_tc_ct_filter_legacy_non_nic_flows(ft, f)) 1394 return -EOPNOTSUPP; 1395 1396 return mlx5_tc_ct_block_flow_offload_add(ft, f); 1397 case FLOW_CLS_DESTROY: 1398 return mlx5_tc_ct_block_flow_offload_del(ft, f); 1399 case FLOW_CLS_STATS: 1400 return mlx5_tc_ct_block_flow_offload_stats(ft, f); 1401 default: 1402 break; 1403 } 1404 1405 return -EOPNOTSUPP; 1406 } 1407 1408 static bool 1409 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple, 1410 u16 zone) 1411 { 1412 struct flow_keys flow_keys; 1413 1414 skb_reset_network_header(skb); 1415 skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP); 1416 1417 tuple->zone = zone; 1418 1419 if (flow_keys.basic.ip_proto != IPPROTO_TCP && 1420 flow_keys.basic.ip_proto != IPPROTO_UDP && 1421 flow_keys.basic.ip_proto != IPPROTO_GRE) 1422 return false; 1423 1424 if (flow_keys.basic.ip_proto == IPPROTO_TCP || 1425 flow_keys.basic.ip_proto == IPPROTO_UDP) { 1426 tuple->port.src = flow_keys.ports.src; 1427 tuple->port.dst = flow_keys.ports.dst; 1428 } 1429 tuple->n_proto = flow_keys.basic.n_proto; 1430 tuple->ip_proto = flow_keys.basic.ip_proto; 1431 1432 switch (flow_keys.basic.n_proto) { 1433 case htons(ETH_P_IP): 1434 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 1435 tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src; 1436 tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst; 1437 break; 1438 1439 case htons(ETH_P_IPV6): 1440 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 1441 tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src; 1442 tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst; 1443 break; 1444 default: 1445 goto out; 1446 } 1447 1448 return true; 1449 1450 out: 1451 return false; 1452 } 1453 1454 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) 1455 { 1456 u32 ctstate = 0, ctstate_mask = 0; 1457 1458 mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG, 1459 &ctstate, &ctstate_mask); 1460 1461 if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT) 1462 return -EOPNOTSUPP; 1463 1464 ctstate_mask |= MLX5_CT_STATE_TRK_BIT; 1465 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, 1466 ctstate, ctstate_mask); 1467 1468 return 0; 1469 } 1470 1471 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr) 1472 { 1473 if (!priv || !ct_attr->ct_labels_id) 1474 return; 1475 1476 mlx5_put_label_mapping(priv, ct_attr->ct_labels_id); 1477 } 1478 1479 int 1480 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, 1481 struct mlx5_flow_spec *spec, 1482 struct flow_cls_offload *f, 1483 struct mlx5_ct_attr *ct_attr, 1484 struct netlink_ext_ack *extack) 1485 { 1486 bool trk, est, untrk, unnew, unest, new, rpl, unrpl, rel, unrel, inv, uninv; 1487 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 1488 struct flow_dissector_key_ct *mask, *key; 1489 u32 ctstate = 0, ctstate_mask = 0; 1490 u16 ct_state_on, ct_state_off; 1491 u16 ct_state, ct_state_mask; 1492 struct flow_match_ct match; 1493 u32 ct_labels[4]; 1494 1495 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) 1496 return 0; 1497 1498 if (!priv) { 1499 NL_SET_ERR_MSG_MOD(extack, 1500 "offload of ct matching isn't available"); 1501 return -EOPNOTSUPP; 1502 } 1503 1504 flow_rule_match_ct(rule, &match); 1505 1506 key = match.key; 1507 mask = match.mask; 1508 1509 ct_state = key->ct_state; 1510 ct_state_mask = mask->ct_state; 1511 1512 if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | 1513 TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | 1514 TCA_FLOWER_KEY_CT_FLAGS_NEW | 1515 TCA_FLOWER_KEY_CT_FLAGS_REPLY | 1516 TCA_FLOWER_KEY_CT_FLAGS_RELATED | 1517 TCA_FLOWER_KEY_CT_FLAGS_INVALID)) { 1518 NL_SET_ERR_MSG_MOD(extack, 1519 "only ct_state trk, est, new and rpl are supported for offload"); 1520 return -EOPNOTSUPP; 1521 } 1522 1523 ct_state_on = ct_state & ct_state_mask; 1524 ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask; 1525 trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; 1526 new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; 1527 est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; 1528 rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY; 1529 rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED; 1530 inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID; 1531 untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; 1532 unnew = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_NEW; 1533 unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; 1534 unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY; 1535 unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED; 1536 uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID; 1537 1538 ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; 1539 ctstate |= new ? MLX5_CT_STATE_NEW_BIT : 0; 1540 ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; 1541 ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0; 1542 ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; 1543 ctstate_mask |= (unnew || new) ? MLX5_CT_STATE_NEW_BIT : 0; 1544 ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; 1545 ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0; 1546 ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0; 1547 ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0; 1548 1549 if (rel) { 1550 NL_SET_ERR_MSG_MOD(extack, 1551 "matching on ct_state +rel isn't supported"); 1552 return -EOPNOTSUPP; 1553 } 1554 1555 if (inv) { 1556 NL_SET_ERR_MSG_MOD(extack, 1557 "matching on ct_state +inv isn't supported"); 1558 return -EOPNOTSUPP; 1559 } 1560 1561 if (mask->ct_zone) 1562 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 1563 key->ct_zone, MLX5_CT_ZONE_MASK); 1564 if (ctstate_mask) 1565 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, 1566 ctstate, ctstate_mask); 1567 if (mask->ct_mark) 1568 mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG, 1569 key->ct_mark, mask->ct_mark); 1570 if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] || 1571 mask->ct_labels[3]) { 1572 ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0]; 1573 ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1]; 1574 ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2]; 1575 ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3]; 1576 if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id)) 1577 return -EOPNOTSUPP; 1578 mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id, 1579 MLX5_CT_LABELS_MASK); 1580 } 1581 1582 return 0; 1583 } 1584 1585 int 1586 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, 1587 struct mlx5_flow_attr *attr, 1588 const struct flow_action_entry *act, 1589 struct netlink_ext_ack *extack) 1590 { 1591 if (!priv) { 1592 NL_SET_ERR_MSG_MOD(extack, 1593 "offload of ct action isn't available"); 1594 return -EOPNOTSUPP; 1595 } 1596 1597 attr->ct_attr.ct_action |= act->ct.action; /* So we can have clear + ct */ 1598 attr->ct_attr.zone = act->ct.zone; 1599 if (!(act->ct.action & TCA_CT_ACT_CLEAR)) 1600 attr->ct_attr.nf_ft = act->ct.flow_table; 1601 attr->ct_attr.act_miss_cookie = act->miss_cookie; 1602 1603 return 0; 1604 } 1605 1606 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, 1607 struct mlx5_tc_ct_pre *pre_ct, 1608 bool nat) 1609 { 1610 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1611 struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; 1612 struct mlx5_core_dev *dev = ct_priv->dev; 1613 struct mlx5_flow_table *ft = pre_ct->ft; 1614 struct mlx5_flow_destination dest = {}; 1615 struct mlx5_flow_act flow_act = {}; 1616 struct mlx5_modify_hdr *mod_hdr; 1617 struct mlx5_flow_handle *rule; 1618 struct mlx5_flow_spec *spec; 1619 u32 ctstate; 1620 u16 zone; 1621 int err; 1622 1623 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 1624 if (!spec) 1625 return -ENOMEM; 1626 1627 zone = ct_ft->zone & MLX5_CT_ZONE_MASK; 1628 err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type, 1629 ZONE_TO_REG, zone); 1630 if (err) { 1631 ct_dbg("Failed to set zone register mapping"); 1632 goto err_mapping; 1633 } 1634 1635 mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type, 1636 pre_mod_acts.num_actions, 1637 pre_mod_acts.actions); 1638 1639 if (IS_ERR(mod_hdr)) { 1640 err = PTR_ERR(mod_hdr); 1641 ct_dbg("Failed to create pre ct mod hdr"); 1642 goto err_mapping; 1643 } 1644 pre_ct->modify_hdr = mod_hdr; 1645 1646 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 1647 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1648 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; 1649 flow_act.modify_hdr = mod_hdr; 1650 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 1651 1652 /* add flow rule */ 1653 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 1654 zone, MLX5_CT_ZONE_MASK); 1655 ctstate = MLX5_CT_STATE_TRK_BIT; 1656 if (nat) 1657 ctstate |= MLX5_CT_STATE_NAT_BIT; 1658 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate); 1659 1660 dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); 1661 rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); 1662 if (IS_ERR(rule)) { 1663 err = PTR_ERR(rule); 1664 ct_dbg("Failed to add pre ct flow rule zone %d", zone); 1665 goto err_flow_rule; 1666 } 1667 pre_ct->flow_rule = rule; 1668 1669 /* add miss rule */ 1670 dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct; 1671 rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1); 1672 if (IS_ERR(rule)) { 1673 err = PTR_ERR(rule); 1674 ct_dbg("Failed to add pre ct miss rule zone %d", zone); 1675 goto err_miss_rule; 1676 } 1677 pre_ct->miss_rule = rule; 1678 1679 mlx5e_mod_hdr_dealloc(&pre_mod_acts); 1680 kvfree(spec); 1681 return 0; 1682 1683 err_miss_rule: 1684 mlx5_del_flow_rules(pre_ct->flow_rule); 1685 err_flow_rule: 1686 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); 1687 err_mapping: 1688 mlx5e_mod_hdr_dealloc(&pre_mod_acts); 1689 kvfree(spec); 1690 return err; 1691 } 1692 1693 static void 1694 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft, 1695 struct mlx5_tc_ct_pre *pre_ct) 1696 { 1697 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1698 struct mlx5_core_dev *dev = ct_priv->dev; 1699 1700 mlx5_del_flow_rules(pre_ct->flow_rule); 1701 mlx5_del_flow_rules(pre_ct->miss_rule); 1702 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); 1703 } 1704 1705 static int 1706 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft, 1707 struct mlx5_tc_ct_pre *pre_ct, 1708 bool nat) 1709 { 1710 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); 1711 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1712 struct mlx5_core_dev *dev = ct_priv->dev; 1713 struct mlx5_flow_table_attr ft_attr = {}; 1714 struct mlx5_flow_namespace *ns; 1715 struct mlx5_flow_table *ft; 1716 struct mlx5_flow_group *g; 1717 u32 metadata_reg_c_2_mask; 1718 u32 *flow_group_in; 1719 void *misc; 1720 int err; 1721 1722 ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type); 1723 if (!ns) { 1724 err = -EOPNOTSUPP; 1725 ct_dbg("Failed to get flow namespace"); 1726 return err; 1727 } 1728 1729 flow_group_in = kvzalloc(inlen, GFP_KERNEL); 1730 if (!flow_group_in) 1731 return -ENOMEM; 1732 1733 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; 1734 ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ? 1735 FDB_TC_OFFLOAD : MLX5E_TC_PRIO; 1736 ft_attr.max_fte = 2; 1737 ft_attr.level = 1; 1738 ft = mlx5_create_flow_table(ns, &ft_attr); 1739 if (IS_ERR(ft)) { 1740 err = PTR_ERR(ft); 1741 ct_dbg("Failed to create pre ct table"); 1742 goto out_free; 1743 } 1744 pre_ct->ft = ft; 1745 1746 /* create flow group */ 1747 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); 1748 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); 1749 MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 1750 MLX5_MATCH_MISC_PARAMETERS_2); 1751 1752 misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, 1753 match_criteria.misc_parameters_2); 1754 1755 metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK; 1756 metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16); 1757 if (nat) 1758 metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16); 1759 1760 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2, 1761 metadata_reg_c_2_mask); 1762 1763 g = mlx5_create_flow_group(ft, flow_group_in); 1764 if (IS_ERR(g)) { 1765 err = PTR_ERR(g); 1766 ct_dbg("Failed to create pre ct group"); 1767 goto err_flow_grp; 1768 } 1769 pre_ct->flow_grp = g; 1770 1771 /* create miss group */ 1772 memset(flow_group_in, 0, inlen); 1773 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); 1774 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); 1775 g = mlx5_create_flow_group(ft, flow_group_in); 1776 if (IS_ERR(g)) { 1777 err = PTR_ERR(g); 1778 ct_dbg("Failed to create pre ct miss group"); 1779 goto err_miss_grp; 1780 } 1781 pre_ct->miss_grp = g; 1782 1783 err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat); 1784 if (err) 1785 goto err_add_rules; 1786 1787 kvfree(flow_group_in); 1788 return 0; 1789 1790 err_add_rules: 1791 mlx5_destroy_flow_group(pre_ct->miss_grp); 1792 err_miss_grp: 1793 mlx5_destroy_flow_group(pre_ct->flow_grp); 1794 err_flow_grp: 1795 mlx5_destroy_flow_table(ft); 1796 out_free: 1797 kvfree(flow_group_in); 1798 return err; 1799 } 1800 1801 static void 1802 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft, 1803 struct mlx5_tc_ct_pre *pre_ct) 1804 { 1805 tc_ct_pre_ct_del_rules(ct_ft, pre_ct); 1806 mlx5_destroy_flow_group(pre_ct->miss_grp); 1807 mlx5_destroy_flow_group(pre_ct->flow_grp); 1808 mlx5_destroy_flow_table(pre_ct->ft); 1809 } 1810 1811 static int 1812 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft) 1813 { 1814 int err; 1815 1816 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false); 1817 if (err) 1818 return err; 1819 1820 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true); 1821 if (err) 1822 goto err_pre_ct_nat; 1823 1824 return 0; 1825 1826 err_pre_ct_nat: 1827 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); 1828 return err; 1829 } 1830 1831 static void 1832 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft) 1833 { 1834 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat); 1835 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); 1836 } 1837 1838 /* To avoid false lock dependency warning set the ct_entries_ht lock 1839 * class different than the lock class of the ht being used when deleting 1840 * last flow from a group and then deleting a group, we get into del_sw_flow_group() 1841 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but 1842 * it's different than the ht->mutex here. 1843 */ 1844 static struct lock_class_key ct_entries_ht_lock_key; 1845 1846 static struct mlx5_ct_ft * 1847 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, 1848 struct nf_flowtable *nf_ft) 1849 { 1850 struct mlx5_ct_ft *ft; 1851 int err; 1852 1853 ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params); 1854 if (ft) { 1855 refcount_inc(&ft->refcount); 1856 return ft; 1857 } 1858 1859 ft = kzalloc(sizeof(*ft), GFP_KERNEL); 1860 if (!ft) 1861 return ERR_PTR(-ENOMEM); 1862 1863 err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id); 1864 if (err) 1865 goto err_mapping; 1866 1867 ft->zone = zone; 1868 ft->nf_ft = nf_ft; 1869 ft->ct_priv = ct_priv; 1870 refcount_set(&ft->refcount, 1); 1871 1872 err = mlx5_tc_ct_alloc_pre_ct_tables(ft); 1873 if (err) 1874 goto err_alloc_pre_ct; 1875 1876 err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); 1877 if (err) 1878 goto err_init; 1879 1880 lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key); 1881 1882 err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node, 1883 zone_params); 1884 if (err) 1885 goto err_insert; 1886 1887 err = nf_flow_table_offload_add_cb(ft->nf_ft, 1888 mlx5_tc_ct_block_flow_offload, ft); 1889 if (err) 1890 goto err_add_cb; 1891 1892 return ft; 1893 1894 err_add_cb: 1895 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); 1896 err_insert: 1897 rhashtable_destroy(&ft->ct_entries_ht); 1898 err_init: 1899 mlx5_tc_ct_free_pre_ct_tables(ft); 1900 err_alloc_pre_ct: 1901 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); 1902 err_mapping: 1903 kfree(ft); 1904 return ERR_PTR(err); 1905 } 1906 1907 static void 1908 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) 1909 { 1910 struct mlx5_ct_entry *entry = ptr; 1911 1912 mlx5_tc_ct_entry_put(entry); 1913 } 1914 1915 static void 1916 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) 1917 { 1918 if (!refcount_dec_and_test(&ft->refcount)) 1919 return; 1920 1921 flush_workqueue(ct_priv->wq); 1922 nf_flow_table_offload_del_cb(ft->nf_ft, 1923 mlx5_tc_ct_block_flow_offload, ft); 1924 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); 1925 rhashtable_free_and_destroy(&ft->ct_entries_ht, 1926 mlx5_tc_ct_flush_ft_entry, 1927 ct_priv); 1928 mlx5_tc_ct_free_pre_ct_tables(ft); 1929 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); 1930 kfree(ft); 1931 } 1932 1933 /* We translate the tc filter with CT action to the following HW model: 1934 * 1935 * +-----------------------+ 1936 * + rule (either original + 1937 * + or post_act rule) + 1938 * +-----------------------+ 1939 * | set act_miss_cookie mapping 1940 * | set fte_id 1941 * | set tunnel_id 1942 * | rest of actions before the CT action (for this orig/post_act rule) 1943 * | 1944 * +-------------+ 1945 * | Chain 0 | 1946 * | optimization| 1947 * | v 1948 * | +---------------------+ 1949 * | + pre_ct/pre_ct_nat + if matches +----------------------+ 1950 * | + zone+nat match +---------------->+ post_act (see below) + 1951 * | +---------------------+ set zone +----------------------+ 1952 * | | 1953 * +-------------+ set zone 1954 * | 1955 * v 1956 * +--------------------+ 1957 * + CT (nat or no nat) + 1958 * + tuple + zone match + 1959 * +--------------------+ 1960 * | set mark 1961 * | set labels_id 1962 * | set established 1963 * | set zone_restore 1964 * | do nat (if needed) 1965 * v 1966 * +--------------+ 1967 * + post_act + rest of parsed filter's actions 1968 * + fte_id match +------------------------> 1969 * +--------------+ 1970 * 1971 */ 1972 static int 1973 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, 1974 struct mlx5_flow_attr *attr) 1975 { 1976 bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; 1977 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); 1978 int act_miss_mapping = 0, err; 1979 struct mlx5_ct_ft *ft; 1980 u16 zone; 1981 1982 /* Register for CT established events */ 1983 ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone, 1984 attr->ct_attr.nf_ft); 1985 if (IS_ERR(ft)) { 1986 err = PTR_ERR(ft); 1987 ct_dbg("Failed to register to ft callback"); 1988 goto err_ft; 1989 } 1990 attr->ct_attr.ft = ft; 1991 1992 err = mlx5e_tc_action_miss_mapping_get(ct_priv->priv, attr, attr->ct_attr.act_miss_cookie, 1993 &act_miss_mapping); 1994 if (err) { 1995 ct_dbg("Failed to get register mapping for act miss"); 1996 goto err_get_act_miss; 1997 } 1998 1999 err = mlx5e_tc_match_to_reg_set(priv->mdev, &attr->parse_attr->mod_hdr_acts, 2000 ct_priv->ns_type, MAPPED_OBJ_TO_REG, act_miss_mapping); 2001 if (err) { 2002 ct_dbg("Failed to set act miss register mapping"); 2003 goto err_mapping; 2004 } 2005 2006 /* Chain 0 sets the zone and jumps to ct table 2007 * Other chains jump to pre_ct table to align with act_ct cached logic 2008 */ 2009 if (!attr->chain) { 2010 zone = ft->zone & MLX5_CT_ZONE_MASK; 2011 err = mlx5e_tc_match_to_reg_set(priv->mdev, &attr->parse_attr->mod_hdr_acts, 2012 ct_priv->ns_type, ZONE_TO_REG, zone); 2013 if (err) { 2014 ct_dbg("Failed to set zone register mapping"); 2015 goto err_mapping; 2016 } 2017 2018 attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct; 2019 } else { 2020 attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft; 2021 } 2022 2023 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 2024 attr->ct_attr.act_miss_mapping = act_miss_mapping; 2025 2026 return 0; 2027 2028 err_mapping: 2029 mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, act_miss_mapping); 2030 err_get_act_miss: 2031 mlx5_tc_ct_del_ft_cb(ct_priv, ft); 2032 err_ft: 2033 netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); 2034 return err; 2035 } 2036 2037 int 2038 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr) 2039 { 2040 int err; 2041 2042 if (!priv) 2043 return -EOPNOTSUPP; 2044 2045 if (attr->ct_attr.offloaded) 2046 return 0; 2047 2048 if (attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR) { 2049 err = mlx5_tc_ct_entry_set_registers(priv, &attr->parse_attr->mod_hdr_acts, 2050 0, 0, 0, 0); 2051 if (err) 2052 return err; 2053 2054 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 2055 } 2056 2057 if (!attr->ct_attr.nf_ft) { /* means only ct clear action, and not ct_clear,ct() */ 2058 attr->ct_attr.offloaded = true; 2059 return 0; 2060 } 2061 2062 mutex_lock(&priv->control_lock); 2063 err = __mlx5_tc_ct_flow_offload(priv, attr); 2064 if (!err) 2065 attr->ct_attr.offloaded = true; 2066 mutex_unlock(&priv->control_lock); 2067 2068 return err; 2069 } 2070 2071 static void 2072 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, 2073 struct mlx5_flow_attr *attr) 2074 { 2075 mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, attr->ct_attr.act_miss_mapping); 2076 mlx5_tc_ct_del_ft_cb(ct_priv, attr->ct_attr.ft); 2077 } 2078 2079 void 2080 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, 2081 struct mlx5_flow_attr *attr) 2082 { 2083 if (!attr->ct_attr.offloaded) /* no ct action, return */ 2084 return; 2085 if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */ 2086 return; 2087 2088 mutex_lock(&priv->control_lock); 2089 __mlx5_tc_ct_delete_flow(priv, attr); 2090 mutex_unlock(&priv->control_lock); 2091 } 2092 2093 static int 2094 mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv) 2095 { 2096 struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act); 2097 struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get(); 2098 int err; 2099 2100 if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) { 2101 if (ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_HMFS) { 2102 ct_dbg("Using HMFS ct flow steering provider"); 2103 fs_ops = mlx5_ct_fs_hmfs_ops_get(); 2104 } else if (ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) { 2105 ct_dbg("Using SMFS ct flow steering provider"); 2106 fs_ops = mlx5_ct_fs_smfs_ops_get(); 2107 } 2108 2109 if (!fs_ops) { 2110 ct_dbg("Requested flow steering mode is not enabled."); 2111 return -EOPNOTSUPP; 2112 } 2113 } 2114 2115 ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL); 2116 if (!ct_priv->fs) 2117 return -ENOMEM; 2118 2119 ct_priv->fs->netdev = ct_priv->netdev; 2120 ct_priv->fs->dev = ct_priv->dev; 2121 ct_priv->fs_ops = fs_ops; 2122 2123 err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct); 2124 if (err) 2125 goto err_init; 2126 2127 return 0; 2128 2129 err_init: 2130 kfree(ct_priv->fs); 2131 return err; 2132 } 2133 2134 static int 2135 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw, 2136 const char **err_msg) 2137 { 2138 if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) { 2139 /* vlan workaround should be avoided for multi chain rules. 2140 * This is just a sanity check as pop vlan action should 2141 * be supported by any FW that supports ignore_flow_level 2142 */ 2143 2144 *err_msg = "firmware vlan actions support is missing"; 2145 return -EOPNOTSUPP; 2146 } 2147 2148 if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, 2149 fdb_modify_header_fwd_to_table)) { 2150 /* CT always writes to registers which are mod header actions. 2151 * Therefore, mod header and goto is required 2152 */ 2153 2154 *err_msg = "firmware fwd and modify support is missing"; 2155 return -EOPNOTSUPP; 2156 } 2157 2158 if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { 2159 *err_msg = "register loopback isn't supported"; 2160 return -EOPNOTSUPP; 2161 } 2162 2163 return 0; 2164 } 2165 2166 static int 2167 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, 2168 enum mlx5_flow_namespace_type ns_type, 2169 struct mlx5e_post_act *post_act) 2170 { 2171 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 2172 const char *err_msg = NULL; 2173 int err = 0; 2174 2175 if (IS_ERR_OR_NULL(post_act)) { 2176 /* Ignore_flow_level support isn't supported by default for VFs and so post_act 2177 * won't be supported. Skip showing error msg. 2178 */ 2179 if (priv->mdev->coredev_type == MLX5_COREDEV_PF) 2180 err_msg = "post action is missing"; 2181 err = -EOPNOTSUPP; 2182 goto out_err; 2183 } 2184 2185 if (ns_type == MLX5_FLOW_NAMESPACE_FDB) 2186 err = mlx5_tc_ct_init_check_esw_support(esw, &err_msg); 2187 2188 out_err: 2189 if (err && err_msg) 2190 netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n", err_msg); 2191 return err; 2192 } 2193 2194 static void 2195 mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv) 2196 { 2197 struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs; 2198 2199 ct_dbgfs->root = debugfs_create_dir("ct", mlx5_debugfs_get_dev_root(ct_priv->dev)); 2200 debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root, 2201 &ct_dbgfs->stats.offloaded); 2202 debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root, 2203 &ct_dbgfs->stats.rx_dropped); 2204 } 2205 2206 static void 2207 mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv) 2208 { 2209 debugfs_remove_recursive(ct_priv->debugfs.root); 2210 } 2211 2212 static struct mlx5_flow_handle * 2213 tc_ct_add_miss_rule(struct mlx5_flow_table *ft, 2214 struct mlx5_flow_table *next_ft) 2215 { 2216 struct mlx5_flow_destination dest = {}; 2217 struct mlx5_flow_act act = {}; 2218 2219 act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND; 2220 act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 2221 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 2222 dest.ft = next_ft; 2223 2224 return mlx5_add_flow_rules(ft, NULL, &act, &dest, 1); 2225 } 2226 2227 static int 2228 tc_ct_add_ct_table_miss_rule(struct mlx5_flow_table *from, 2229 struct mlx5_flow_table *to, 2230 struct mlx5_flow_group **miss_group, 2231 struct mlx5_flow_handle **miss_rule) 2232 { 2233 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); 2234 struct mlx5_flow_group *group; 2235 struct mlx5_flow_handle *rule; 2236 unsigned int max_fte = from->max_fte; 2237 u32 *flow_group_in; 2238 int err = 0; 2239 2240 flow_group_in = kvzalloc(inlen, GFP_KERNEL); 2241 if (!flow_group_in) 2242 return -ENOMEM; 2243 2244 /* create miss group */ 2245 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2246 max_fte - 2); 2247 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2248 max_fte - 1); 2249 group = mlx5_create_flow_group(from, flow_group_in); 2250 if (IS_ERR(group)) { 2251 err = PTR_ERR(group); 2252 goto err_miss_grp; 2253 } 2254 2255 /* add miss rule to next fdb */ 2256 rule = tc_ct_add_miss_rule(from, to); 2257 if (IS_ERR(rule)) { 2258 err = PTR_ERR(rule); 2259 goto err_miss_rule; 2260 } 2261 2262 *miss_group = group; 2263 *miss_rule = rule; 2264 kvfree(flow_group_in); 2265 return 0; 2266 2267 err_miss_rule: 2268 mlx5_destroy_flow_group(group); 2269 err_miss_grp: 2270 kvfree(flow_group_in); 2271 return err; 2272 } 2273 2274 static void 2275 tc_ct_del_ct_table_miss_rule(struct mlx5_flow_group *miss_group, 2276 struct mlx5_flow_handle *miss_rule) 2277 { 2278 mlx5_del_flow_rules(miss_rule); 2279 mlx5_destroy_flow_group(miss_group); 2280 } 2281 2282 #define INIT_ERR_PREFIX "tc ct offload init failed" 2283 2284 struct mlx5_tc_ct_priv * 2285 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, 2286 struct mod_hdr_tbl *mod_hdr, 2287 enum mlx5_flow_namespace_type ns_type, 2288 struct mlx5e_post_act *post_act) 2289 { 2290 u8 mapping_id[MLX5_SW_IMAGE_GUID_MAX_BYTES]; 2291 struct mlx5_tc_ct_priv *ct_priv; 2292 struct mlx5_core_dev *dev; 2293 u8 id_len; 2294 int err; 2295 2296 dev = priv->mdev; 2297 err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act); 2298 if (err) 2299 goto err_support; 2300 2301 ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL); 2302 if (!ct_priv) 2303 goto err_alloc; 2304 2305 mlx5_query_nic_sw_system_image_guid(dev, mapping_id, &id_len); 2306 2307 ct_priv->zone_mapping = mapping_create_for_id(mapping_id, id_len, 2308 MAPPING_TYPE_ZONE, 2309 sizeof(u16), 0, true); 2310 if (IS_ERR(ct_priv->zone_mapping)) { 2311 err = PTR_ERR(ct_priv->zone_mapping); 2312 goto err_mapping_zone; 2313 } 2314 2315 ct_priv->labels_mapping = mapping_create_for_id(mapping_id, id_len, 2316 MAPPING_TYPE_LABELS, 2317 sizeof(u32) * 4, 0, true); 2318 if (IS_ERR(ct_priv->labels_mapping)) { 2319 err = PTR_ERR(ct_priv->labels_mapping); 2320 goto err_mapping_labels; 2321 } 2322 2323 spin_lock_init(&ct_priv->ht_lock); 2324 ct_priv->priv = priv; 2325 ct_priv->ns_type = ns_type; 2326 ct_priv->chains = chains; 2327 ct_priv->netdev = priv->netdev; 2328 ct_priv->dev = priv->mdev; 2329 ct_priv->mod_hdr_tbl = mod_hdr; 2330 ct_priv->ct = mlx5_chains_create_global_table(chains); 2331 if (IS_ERR(ct_priv->ct)) { 2332 err = PTR_ERR(ct_priv->ct); 2333 mlx5_core_warn(dev, 2334 "%s, failed to create ct table err: %d\n", 2335 INIT_ERR_PREFIX, err); 2336 goto err_ct_tbl; 2337 } 2338 2339 ct_priv->ct_nat = mlx5_chains_create_global_table(chains); 2340 if (IS_ERR(ct_priv->ct_nat)) { 2341 err = PTR_ERR(ct_priv->ct_nat); 2342 mlx5_core_warn(dev, 2343 "%s, failed to create ct nat table err: %d\n", 2344 INIT_ERR_PREFIX, err); 2345 goto err_ct_nat_tbl; 2346 } 2347 2348 err = tc_ct_add_ct_table_miss_rule(ct_priv->ct_nat, ct_priv->ct, 2349 &ct_priv->ct_nat_miss_group, 2350 &ct_priv->ct_nat_miss_rule); 2351 if (err) 2352 goto err_ct_zone_ht; 2353 2354 ct_priv->post_act = post_act; 2355 mutex_init(&ct_priv->control_lock); 2356 if (rhashtable_init(&ct_priv->zone_ht, &zone_params)) 2357 goto err_ct_zone_ht; 2358 if (rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params)) 2359 goto err_ct_tuples_ht; 2360 if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params)) 2361 goto err_ct_tuples_nat_ht; 2362 2363 ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0); 2364 if (!ct_priv->wq) { 2365 err = -ENOMEM; 2366 goto err_wq; 2367 } 2368 2369 err = mlx5_tc_ct_fs_init(ct_priv); 2370 if (err) 2371 goto err_init_fs; 2372 2373 mlx5_ct_tc_create_dbgfs(ct_priv); 2374 return ct_priv; 2375 2376 err_init_fs: 2377 destroy_workqueue(ct_priv->wq); 2378 err_wq: 2379 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); 2380 err_ct_tuples_nat_ht: 2381 rhashtable_destroy(&ct_priv->ct_tuples_ht); 2382 err_ct_tuples_ht: 2383 rhashtable_destroy(&ct_priv->zone_ht); 2384 err_ct_zone_ht: 2385 mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); 2386 err_ct_nat_tbl: 2387 mlx5_chains_destroy_global_table(chains, ct_priv->ct); 2388 err_ct_tbl: 2389 mapping_destroy(ct_priv->labels_mapping); 2390 err_mapping_labels: 2391 mapping_destroy(ct_priv->zone_mapping); 2392 err_mapping_zone: 2393 kfree(ct_priv); 2394 err_alloc: 2395 err_support: 2396 2397 return NULL; 2398 } 2399 2400 void 2401 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) 2402 { 2403 struct mlx5_fs_chains *chains; 2404 2405 if (!ct_priv) 2406 return; 2407 2408 destroy_workqueue(ct_priv->wq); 2409 mlx5_ct_tc_remove_dbgfs(ct_priv); 2410 chains = ct_priv->chains; 2411 2412 ct_priv->fs_ops->destroy(ct_priv->fs); 2413 kfree(ct_priv->fs); 2414 2415 tc_ct_del_ct_table_miss_rule(ct_priv->ct_nat_miss_group, ct_priv->ct_nat_miss_rule); 2416 mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); 2417 mlx5_chains_destroy_global_table(chains, ct_priv->ct); 2418 mapping_destroy(ct_priv->zone_mapping); 2419 mapping_destroy(ct_priv->labels_mapping); 2420 2421 rhashtable_destroy(&ct_priv->ct_tuples_ht); 2422 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); 2423 rhashtable_destroy(&ct_priv->zone_ht); 2424 mutex_destroy(&ct_priv->control_lock); 2425 kfree(ct_priv); 2426 } 2427 2428 bool 2429 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, 2430 struct sk_buff *skb, u8 zone_restore_id) 2431 { 2432 struct mlx5_ct_tuple tuple = {}; 2433 struct mlx5_ct_entry *entry; 2434 u16 zone; 2435 2436 if (!ct_priv || !zone_restore_id) 2437 return true; 2438 2439 if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone)) 2440 goto out_inc_drop; 2441 2442 if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone)) 2443 goto out_inc_drop; 2444 2445 spin_lock(&ct_priv->ht_lock); 2446 2447 entry = mlx5_tc_ct_entry_get(ct_priv, &tuple); 2448 if (!entry) { 2449 spin_unlock(&ct_priv->ht_lock); 2450 goto out_inc_drop; 2451 } 2452 2453 if (IS_ERR(entry)) { 2454 spin_unlock(&ct_priv->ht_lock); 2455 goto out_inc_drop; 2456 } 2457 spin_unlock(&ct_priv->ht_lock); 2458 2459 tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); 2460 __mlx5_tc_ct_entry_put(entry); 2461 2462 return true; 2463 2464 out_inc_drop: 2465 atomic_inc(&ct_priv->debugfs.stats.rx_dropped); 2466 return false; 2467 } 2468 2469 static bool mlx5e_tc_ct_valid_used_dissector_keys(const u64 used_keys) 2470 { 2471 #define DISS_BIT(name) BIT_ULL(FLOW_DISSECTOR_KEY_ ## name) 2472 const u64 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | 2473 DISS_BIT(META); 2474 const u64 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | 2475 DISS_BIT(PORTS) | DISS_BIT(TCP); 2476 const u64 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | 2477 DISS_BIT(PORTS) | DISS_BIT(TCP); 2478 const u64 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | 2479 DISS_BIT(PORTS); 2480 const u64 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | 2481 DISS_BIT(PORTS); 2482 const u64 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS); 2483 const u64 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS); 2484 2485 return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp || 2486 used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre); 2487 } 2488 2489 bool mlx5e_tc_ct_is_valid_flow_rule(const struct net_device *dev, struct flow_rule *flow_rule) 2490 { 2491 struct flow_match_ipv4_addrs ipv4_addrs; 2492 struct flow_match_ipv6_addrs ipv6_addrs; 2493 struct flow_match_control control; 2494 struct flow_match_basic basic; 2495 struct flow_match_ports ports; 2496 struct flow_match_tcp tcp; 2497 2498 if (!mlx5e_tc_ct_valid_used_dissector_keys(flow_rule->match.dissector->used_keys)) { 2499 netdev_dbg(dev, "ct_debug: rule uses unexpected dissectors (0x%016llx)", 2500 flow_rule->match.dissector->used_keys); 2501 return false; 2502 } 2503 2504 flow_rule_match_basic(flow_rule, &basic); 2505 flow_rule_match_control(flow_rule, &control); 2506 flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs); 2507 flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs); 2508 if (basic.key->ip_proto != IPPROTO_GRE) 2509 flow_rule_match_ports(flow_rule, &ports); 2510 if (basic.key->ip_proto == IPPROTO_TCP) 2511 flow_rule_match_tcp(flow_rule, &tcp); 2512 2513 if (basic.mask->n_proto != htons(0xFFFF) || 2514 (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) || 2515 basic.mask->ip_proto != 0xFF || 2516 (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP && 2517 basic.key->ip_proto != IPPROTO_GRE)) { 2518 netdev_dbg(dev, "ct_debug: rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)", 2519 ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto), 2520 basic.key->ip_proto, basic.mask->ip_proto); 2521 return false; 2522 } 2523 2524 if (basic.key->ip_proto != IPPROTO_GRE && 2525 (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) { 2526 netdev_dbg(dev, "ct_debug: rule uses ports match (src 0x%04x, dst 0x%04x)", 2527 ports.mask->src, ports.mask->dst); 2528 return false; 2529 } 2530 2531 if (basic.key->ip_proto == IPPROTO_TCP && tcp.mask->flags != MLX5_CT_TCP_FLAGS_MASK) { 2532 netdev_dbg(dev, "ct_debug: rule uses unexpected tcp match (flags 0x%02x)", 2533 tcp.mask->flags); 2534 return false; 2535 } 2536 2537 return true; 2538 } 2539