1 // SPDX-License-Identifier: GPL-2.0-only 2 /**************************************************************************** 3 * Driver for Solarflare network controllers and boards 4 * Copyright 2019 Solarflare Communications Inc. 5 * Copyright 2020-2022 Xilinx Inc. 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 as published 9 * by the Free Software Foundation, incorporated herein by reference. 10 */ 11 12 #include <net/pkt_cls.h> 13 #include <net/vxlan.h> 14 #include <net/geneve.h> 15 #include <net/tc_act/tc_ct.h> 16 #include "tc.h" 17 #include "tc_bindings.h" 18 #include "tc_encap_actions.h" 19 #include "tc_conntrack.h" 20 #include "mae.h" 21 #include "ef100_rep.h" 22 #include "efx.h" 23 24 enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev) 25 { 26 if (netif_is_vxlan(net_dev)) 27 return EFX_ENCAP_TYPE_VXLAN; 28 if (netif_is_geneve(net_dev)) 29 return EFX_ENCAP_TYPE_GENEVE; 30 31 return EFX_ENCAP_TYPE_NONE; 32 } 33 34 #define EFX_TC_HDR_TYPE_TTL_MASK ((u32)0xff) 35 /* Hoplimit is stored in the most significant byte in the pedit ipv6 header action */ 36 #define EFX_TC_HDR_TYPE_HLIMIT_MASK ~((u32)0xff000000) 37 #define EFX_EFV_PF NULL 38 /* Look up the representor information (efv) for a device. 39 * May return NULL for the PF (us), or an error pointer for a device that 40 * isn't supported as a TC offload endpoint 41 */ 42 struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx, 43 struct net_device *dev) 44 { 45 struct efx_rep *efv; 46 47 if (!dev) 48 return ERR_PTR(-EOPNOTSUPP); 49 /* Is it us (the PF)? */ 50 if (dev == efx->net_dev) 51 return EFX_EFV_PF; 52 /* Is it an efx vfrep at all? */ 53 if (dev->netdev_ops != &efx_ef100_rep_netdev_ops) 54 return ERR_PTR(-EOPNOTSUPP); 55 /* Is it ours? We don't support TC rules that include another 56 * EF100's netdevices (not even on another port of the same NIC). 57 */ 58 efv = netdev_priv(dev); 59 if (efv->parent != efx) 60 return ERR_PTR(-EOPNOTSUPP); 61 return efv; 62 } 63 64 /* Convert a driver-internal vport ID into an internal device (PF or VF) */ 65 static s64 efx_tc_flower_internal_mport(struct efx_nic *efx, struct efx_rep *efv) 66 { 67 u32 mport; 68 69 if (IS_ERR(efv)) 70 return PTR_ERR(efv); 71 if (!efv) /* device is PF (us) */ 72 efx_mae_mport_uplink(efx, &mport); 73 else /* device is repr */ 74 efx_mae_mport_mport(efx, efv->mport, &mport); 75 return mport; 76 } 77 78 /* Convert a driver-internal vport ID into an external device (wire or VF) */ 79 s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv) 80 { 81 u32 mport; 82 83 if (IS_ERR(efv)) 84 return PTR_ERR(efv); 85 if (!efv) /* device is PF (us) */ 86 efx_mae_mport_wire(efx, &mport); 87 else /* device is repr */ 88 efx_mae_mport_mport(efx, efv->mport, &mport); 89 return mport; 90 } 91 92 static const struct rhashtable_params efx_tc_mac_ht_params = { 93 .key_len = offsetofend(struct efx_tc_mac_pedit_action, h_addr), 94 .key_offset = 0, 95 .head_offset = offsetof(struct efx_tc_mac_pedit_action, linkage), 96 }; 97 98 static const struct rhashtable_params efx_tc_encap_match_ht_params = { 99 .key_len = offsetof(struct efx_tc_encap_match, linkage), 100 .key_offset = 0, 101 .head_offset = offsetof(struct efx_tc_encap_match, linkage), 102 }; 103 104 static const struct rhashtable_params efx_tc_match_action_ht_params = { 105 .key_len = sizeof(unsigned long), 106 .key_offset = offsetof(struct efx_tc_flow_rule, cookie), 107 .head_offset = offsetof(struct efx_tc_flow_rule, linkage), 108 }; 109 110 static const struct rhashtable_params efx_tc_lhs_rule_ht_params = { 111 .key_len = sizeof(unsigned long), 112 .key_offset = offsetof(struct efx_tc_lhs_rule, cookie), 113 .head_offset = offsetof(struct efx_tc_lhs_rule, linkage), 114 }; 115 116 static const struct rhashtable_params efx_tc_recirc_ht_params = { 117 .key_len = offsetof(struct efx_tc_recirc_id, linkage), 118 .key_offset = 0, 119 .head_offset = offsetof(struct efx_tc_recirc_id, linkage), 120 }; 121 122 static struct efx_tc_mac_pedit_action *efx_tc_flower_get_mac(struct efx_nic *efx, 123 unsigned char h_addr[ETH_ALEN], 124 struct netlink_ext_ack *extack) 125 { 126 struct efx_tc_mac_pedit_action *ped, *old; 127 int rc; 128 129 ped = kzalloc(sizeof(*ped), GFP_USER); 130 if (!ped) 131 return ERR_PTR(-ENOMEM); 132 memcpy(ped->h_addr, h_addr, ETH_ALEN); 133 old = rhashtable_lookup_get_insert_fast(&efx->tc->mac_ht, 134 &ped->linkage, 135 efx_tc_mac_ht_params); 136 if (old) { 137 /* don't need our new entry */ 138 kfree(ped); 139 if (IS_ERR(old)) /* oh dear, it's actually an error */ 140 return ERR_CAST(old); 141 if (!refcount_inc_not_zero(&old->ref)) 142 return ERR_PTR(-EAGAIN); 143 /* existing entry found, ref taken */ 144 return old; 145 } 146 147 rc = efx_mae_allocate_pedit_mac(efx, ped); 148 if (rc < 0) { 149 NL_SET_ERR_MSG_MOD(extack, "Failed to store pedit MAC address in hw"); 150 goto out_remove; 151 } 152 153 /* ref and return */ 154 refcount_set(&ped->ref, 1); 155 return ped; 156 out_remove: 157 rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage, 158 efx_tc_mac_ht_params); 159 kfree(ped); 160 return ERR_PTR(rc); 161 } 162 163 static void efx_tc_flower_put_mac(struct efx_nic *efx, 164 struct efx_tc_mac_pedit_action *ped) 165 { 166 if (!refcount_dec_and_test(&ped->ref)) 167 return; /* still in use */ 168 rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage, 169 efx_tc_mac_ht_params); 170 efx_mae_free_pedit_mac(efx, ped); 171 kfree(ped); 172 } 173 174 static void efx_tc_free_action_set(struct efx_nic *efx, 175 struct efx_tc_action_set *act, bool in_hw) 176 { 177 /* Failure paths calling this on the 'cursor' action set in_hw=false, 178 * because if the alloc had succeeded we'd've put it in acts.list and 179 * not still have it in act. 180 */ 181 if (in_hw) { 182 efx_mae_free_action_set(efx, act->fw_id); 183 /* in_hw is true iff we are on an acts.list; make sure to 184 * remove ourselves from that list before we are freed. 185 */ 186 list_del(&act->list); 187 } 188 if (act->count) { 189 spin_lock_bh(&act->count->cnt->lock); 190 if (!list_empty(&act->count_user)) 191 list_del(&act->count_user); 192 spin_unlock_bh(&act->count->cnt->lock); 193 efx_tc_flower_put_counter_index(efx, act->count); 194 } 195 if (act->encap_md) { 196 list_del(&act->encap_user); 197 efx_tc_flower_release_encap_md(efx, act->encap_md); 198 } 199 if (act->src_mac) 200 efx_tc_flower_put_mac(efx, act->src_mac); 201 if (act->dst_mac) 202 efx_tc_flower_put_mac(efx, act->dst_mac); 203 kfree(act); 204 } 205 206 static void efx_tc_free_action_set_list(struct efx_nic *efx, 207 struct efx_tc_action_set_list *acts, 208 bool in_hw) 209 { 210 struct efx_tc_action_set *act, *next; 211 212 /* Failure paths set in_hw=false, because usually the acts didn't get 213 * to efx_mae_alloc_action_set_list(); if they did, the failure tree 214 * has a separate efx_mae_free_action_set_list() before calling us. 215 */ 216 if (in_hw) 217 efx_mae_free_action_set_list(efx, acts); 218 /* Any act that's on the list will be in_hw even if the list isn't */ 219 list_for_each_entry_safe(act, next, &acts->list, list) 220 efx_tc_free_action_set(efx, act, true); 221 /* Don't kfree, as acts is embedded inside a struct efx_tc_flow_rule */ 222 } 223 224 /* Boilerplate for the simple 'copy a field' cases */ 225 #define _MAP_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field) \ 226 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_##_name)) { \ 227 struct flow_match_##_type fm; \ 228 \ 229 flow_rule_match_##_tcget(rule, &fm); \ 230 match->value._field = fm.key->_tcfield; \ 231 match->mask._field = fm.mask->_tcfield; \ 232 } 233 #define MAP_KEY_AND_MASK(_name, _type, _tcfield, _field) \ 234 _MAP_KEY_AND_MASK(_name, _type, _type, _tcfield, _field) 235 #define MAP_ENC_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field) \ 236 _MAP_KEY_AND_MASK(ENC_##_name, _type, _tcget, _tcfield, _field) 237 238 static int efx_tc_flower_parse_match(struct efx_nic *efx, 239 struct flow_rule *rule, 240 struct efx_tc_match *match, 241 struct netlink_ext_ack *extack) 242 { 243 struct flow_dissector *dissector = rule->match.dissector; 244 unsigned char ipv = 0; 245 246 /* Owing to internal TC infelicities, the IPV6_ADDRS key might be set 247 * even on IPv4 filters; so rather than relying on dissector->used_keys 248 * we check the addr_type in the CONTROL key. If we don't find it (or 249 * it's masked, which should never happen), we treat both IPV4_ADDRS 250 * and IPV6_ADDRS as absent. 251 */ 252 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { 253 struct flow_match_control fm; 254 255 flow_rule_match_control(rule, &fm); 256 if (IS_ALL_ONES(fm.mask->addr_type)) 257 switch (fm.key->addr_type) { 258 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 259 ipv = 4; 260 break; 261 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 262 ipv = 6; 263 break; 264 default: 265 break; 266 } 267 268 if (fm.mask->flags & FLOW_DIS_IS_FRAGMENT) { 269 match->value.ip_frag = fm.key->flags & FLOW_DIS_IS_FRAGMENT; 270 match->mask.ip_frag = true; 271 } 272 if (fm.mask->flags & FLOW_DIS_FIRST_FRAG) { 273 match->value.ip_firstfrag = fm.key->flags & FLOW_DIS_FIRST_FRAG; 274 match->mask.ip_firstfrag = true; 275 } 276 if (!flow_rule_is_supp_control_flags(FLOW_DIS_IS_FRAGMENT | 277 FLOW_DIS_FIRST_FRAG, 278 fm.mask->flags, extack)) 279 return -EOPNOTSUPP; 280 } 281 if (dissector->used_keys & 282 ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) | 283 BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) | 284 BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) | 285 BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) | 286 BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN) | 287 BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | 288 BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | 289 BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) | 290 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) | 291 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | 292 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | 293 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) | 294 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) | 295 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) | 296 BIT_ULL(FLOW_DISSECTOR_KEY_CT) | 297 BIT_ULL(FLOW_DISSECTOR_KEY_TCP) | 298 BIT_ULL(FLOW_DISSECTOR_KEY_IP))) { 299 NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported flower keys %#llx", 300 dissector->used_keys); 301 return -EOPNOTSUPP; 302 } 303 304 MAP_KEY_AND_MASK(BASIC, basic, n_proto, eth_proto); 305 /* Make sure we're IP if any L3/L4 keys used. */ 306 if (!IS_ALL_ONES(match->mask.eth_proto) || 307 !(match->value.eth_proto == htons(ETH_P_IP) || 308 match->value.eth_proto == htons(ETH_P_IPV6))) 309 if (dissector->used_keys & 310 (BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | 311 BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | 312 BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) | 313 BIT_ULL(FLOW_DISSECTOR_KEY_IP) | 314 BIT_ULL(FLOW_DISSECTOR_KEY_TCP))) { 315 NL_SET_ERR_MSG_FMT_MOD(extack, 316 "L3/L4 flower keys %#llx require protocol ipv[46]", 317 dissector->used_keys); 318 return -EINVAL; 319 } 320 321 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { 322 struct flow_match_vlan fm; 323 324 flow_rule_match_vlan(rule, &fm); 325 if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) { 326 match->value.vlan_proto[0] = fm.key->vlan_tpid; 327 match->mask.vlan_proto[0] = fm.mask->vlan_tpid; 328 match->value.vlan_tci[0] = cpu_to_be16(fm.key->vlan_priority << 13 | 329 fm.key->vlan_id); 330 match->mask.vlan_tci[0] = cpu_to_be16(fm.mask->vlan_priority << 13 | 331 fm.mask->vlan_id); 332 } 333 } 334 335 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { 336 struct flow_match_vlan fm; 337 338 flow_rule_match_cvlan(rule, &fm); 339 if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) { 340 match->value.vlan_proto[1] = fm.key->vlan_tpid; 341 match->mask.vlan_proto[1] = fm.mask->vlan_tpid; 342 match->value.vlan_tci[1] = cpu_to_be16(fm.key->vlan_priority << 13 | 343 fm.key->vlan_id); 344 match->mask.vlan_tci[1] = cpu_to_be16(fm.mask->vlan_priority << 13 | 345 fm.mask->vlan_id); 346 } 347 } 348 349 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 350 struct flow_match_eth_addrs fm; 351 352 flow_rule_match_eth_addrs(rule, &fm); 353 ether_addr_copy(match->value.eth_saddr, fm.key->src); 354 ether_addr_copy(match->value.eth_daddr, fm.key->dst); 355 ether_addr_copy(match->mask.eth_saddr, fm.mask->src); 356 ether_addr_copy(match->mask.eth_daddr, fm.mask->dst); 357 } 358 359 MAP_KEY_AND_MASK(BASIC, basic, ip_proto, ip_proto); 360 /* Make sure we're TCP/UDP if any L4 keys used. */ 361 if ((match->value.ip_proto != IPPROTO_UDP && 362 match->value.ip_proto != IPPROTO_TCP) || !IS_ALL_ONES(match->mask.ip_proto)) 363 if (dissector->used_keys & 364 (BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) | 365 BIT_ULL(FLOW_DISSECTOR_KEY_TCP))) { 366 NL_SET_ERR_MSG_FMT_MOD(extack, 367 "L4 flower keys %#llx require ipproto udp or tcp", 368 dissector->used_keys); 369 return -EINVAL; 370 } 371 MAP_KEY_AND_MASK(IP, ip, tos, ip_tos); 372 MAP_KEY_AND_MASK(IP, ip, ttl, ip_ttl); 373 if (ipv == 4) { 374 MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, src, src_ip); 375 MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, dst, dst_ip); 376 } 377 #ifdef CONFIG_IPV6 378 else if (ipv == 6) { 379 MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, src, src_ip6); 380 MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, dst, dst_ip6); 381 } 382 #endif 383 MAP_KEY_AND_MASK(PORTS, ports, src, l4_sport); 384 MAP_KEY_AND_MASK(PORTS, ports, dst, l4_dport); 385 MAP_KEY_AND_MASK(TCP, tcp, flags, tcp_flags); 386 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { 387 struct flow_match_control fm; 388 389 flow_rule_match_enc_control(rule, &fm); 390 if (flow_rule_has_enc_control_flags(fm.mask->flags, extack)) 391 return -EOPNOTSUPP; 392 if (!IS_ALL_ONES(fm.mask->addr_type)) { 393 NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported enc addr_type mask %u (key %u)", 394 fm.mask->addr_type, 395 fm.key->addr_type); 396 return -EOPNOTSUPP; 397 } 398 switch (fm.key->addr_type) { 399 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 400 MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs, 401 src, enc_src_ip); 402 MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs, 403 dst, enc_dst_ip); 404 break; 405 #ifdef CONFIG_IPV6 406 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 407 MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs, 408 src, enc_src_ip6); 409 MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs, 410 dst, enc_dst_ip6); 411 break; 412 #endif 413 default: 414 NL_SET_ERR_MSG_FMT_MOD(extack, 415 "Unsupported enc addr_type %u (supported are IPv4, IPv6)", 416 fm.key->addr_type); 417 return -EOPNOTSUPP; 418 } 419 MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, tos, enc_ip_tos); 420 MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, ttl, enc_ip_ttl); 421 MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, src, enc_sport); 422 MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, dst, enc_dport); 423 MAP_ENC_KEY_AND_MASK(KEYID, enc_keyid, enc_keyid, keyid, enc_keyid); 424 } else if (dissector->used_keys & 425 (BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) | 426 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | 427 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | 428 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) | 429 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS))) { 430 NL_SET_ERR_MSG_FMT_MOD(extack, 431 "Flower enc keys require enc_control (keys: %#llx)", 432 dissector->used_keys); 433 return -EOPNOTSUPP; 434 } 435 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) { 436 struct flow_match_ct fm; 437 438 flow_rule_match_ct(rule, &fm); 439 match->value.ct_state_trk = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED); 440 match->mask.ct_state_trk = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED); 441 match->value.ct_state_est = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED); 442 match->mask.ct_state_est = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED); 443 if (fm.mask->ct_state & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | 444 TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED)) { 445 NL_SET_ERR_MSG_FMT_MOD(extack, 446 "Unsupported ct_state match %#x", 447 fm.mask->ct_state); 448 return -EOPNOTSUPP; 449 } 450 match->value.ct_mark = fm.key->ct_mark; 451 match->mask.ct_mark = fm.mask->ct_mark; 452 match->value.ct_zone = fm.key->ct_zone; 453 match->mask.ct_zone = fm.mask->ct_zone; 454 455 if (memchr_inv(fm.mask->ct_labels, 0, sizeof(fm.mask->ct_labels))) { 456 NL_SET_ERR_MSG_MOD(extack, "Matching on ct_label not supported"); 457 return -EOPNOTSUPP; 458 } 459 } 460 461 return 0; 462 } 463 464 static void efx_tc_flower_release_encap_match(struct efx_nic *efx, 465 struct efx_tc_encap_match *encap) 466 { 467 int rc; 468 469 if (!refcount_dec_and_test(&encap->ref)) 470 return; /* still in use */ 471 472 if (encap->type == EFX_TC_EM_DIRECT) { 473 rc = efx_mae_unregister_encap_match(efx, encap); 474 if (rc) 475 /* Display message but carry on and remove entry from our 476 * SW tables, because there's not much we can do about it. 477 */ 478 netif_err(efx, drv, efx->net_dev, 479 "Failed to release encap match %#x, rc %d\n", 480 encap->fw_id, rc); 481 } 482 rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage, 483 efx_tc_encap_match_ht_params); 484 if (encap->pseudo) 485 efx_tc_flower_release_encap_match(efx, encap->pseudo); 486 kfree(encap); 487 } 488 489 static int efx_tc_flower_record_encap_match(struct efx_nic *efx, 490 struct efx_tc_match *match, 491 enum efx_encap_type type, 492 enum efx_tc_em_pseudo_type em_type, 493 u8 child_ip_tos_mask, 494 __be16 child_udp_sport_mask, 495 struct netlink_ext_ack *extack) 496 { 497 struct efx_tc_encap_match *encap, *old, *pseudo = NULL; 498 bool ipv6 = false; 499 int rc; 500 501 /* We require that the socket-defining fields (IP addrs and UDP dest 502 * port) are present and exact-match. Other fields may only be used 503 * if the field-set (and any masks) are the same for all encap 504 * matches on the same <sip,dip,dport> tuple; this is enforced by 505 * pseudo encap matches. 506 */ 507 if (match->mask.enc_dst_ip | match->mask.enc_src_ip) { 508 if (!IS_ALL_ONES(match->mask.enc_dst_ip)) { 509 NL_SET_ERR_MSG_MOD(extack, 510 "Egress encap match is not exact on dst IP address"); 511 return -EOPNOTSUPP; 512 } 513 if (!IS_ALL_ONES(match->mask.enc_src_ip)) { 514 NL_SET_ERR_MSG_MOD(extack, 515 "Egress encap match is not exact on src IP address"); 516 return -EOPNOTSUPP; 517 } 518 #ifdef CONFIG_IPV6 519 if (!ipv6_addr_any(&match->mask.enc_dst_ip6) || 520 !ipv6_addr_any(&match->mask.enc_src_ip6)) { 521 NL_SET_ERR_MSG_MOD(extack, 522 "Egress encap match on both IPv4 and IPv6, don't understand"); 523 return -EOPNOTSUPP; 524 } 525 } else { 526 ipv6 = true; 527 if (!efx_ipv6_addr_all_ones(&match->mask.enc_dst_ip6)) { 528 NL_SET_ERR_MSG_MOD(extack, 529 "Egress encap match is not exact on dst IP address"); 530 return -EOPNOTSUPP; 531 } 532 if (!efx_ipv6_addr_all_ones(&match->mask.enc_src_ip6)) { 533 NL_SET_ERR_MSG_MOD(extack, 534 "Egress encap match is not exact on src IP address"); 535 return -EOPNOTSUPP; 536 } 537 #endif 538 } 539 if (!IS_ALL_ONES(match->mask.enc_dport)) { 540 NL_SET_ERR_MSG_MOD(extack, "Egress encap match is not exact on dst UDP port"); 541 return -EOPNOTSUPP; 542 } 543 if (match->mask.enc_sport || match->mask.enc_ip_tos) { 544 struct efx_tc_match pmatch = *match; 545 546 if (em_type == EFX_TC_EM_PSEUDO_MASK) { /* can't happen */ 547 NL_SET_ERR_MSG_MOD(extack, "Bad recursion in egress encap match handler"); 548 return -EOPNOTSUPP; 549 } 550 pmatch.value.enc_ip_tos = 0; 551 pmatch.mask.enc_ip_tos = 0; 552 pmatch.value.enc_sport = 0; 553 pmatch.mask.enc_sport = 0; 554 rc = efx_tc_flower_record_encap_match(efx, &pmatch, type, 555 EFX_TC_EM_PSEUDO_MASK, 556 match->mask.enc_ip_tos, 557 match->mask.enc_sport, 558 extack); 559 if (rc) 560 return rc; 561 pseudo = pmatch.encap; 562 } 563 if (match->mask.enc_ip_ttl) { 564 NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP TTL not supported"); 565 rc = -EOPNOTSUPP; 566 goto fail_pseudo; 567 } 568 569 rc = efx_mae_check_encap_match_caps(efx, ipv6, match->mask.enc_ip_tos, 570 match->mask.enc_sport, extack); 571 if (rc) 572 goto fail_pseudo; 573 574 encap = kzalloc(sizeof(*encap), GFP_USER); 575 if (!encap) { 576 rc = -ENOMEM; 577 goto fail_pseudo; 578 } 579 encap->src_ip = match->value.enc_src_ip; 580 encap->dst_ip = match->value.enc_dst_ip; 581 #ifdef CONFIG_IPV6 582 encap->src_ip6 = match->value.enc_src_ip6; 583 encap->dst_ip6 = match->value.enc_dst_ip6; 584 #endif 585 encap->udp_dport = match->value.enc_dport; 586 encap->tun_type = type; 587 encap->ip_tos = match->value.enc_ip_tos; 588 encap->ip_tos_mask = match->mask.enc_ip_tos; 589 encap->child_ip_tos_mask = child_ip_tos_mask; 590 encap->udp_sport = match->value.enc_sport; 591 encap->udp_sport_mask = match->mask.enc_sport; 592 encap->child_udp_sport_mask = child_udp_sport_mask; 593 encap->type = em_type; 594 encap->pseudo = pseudo; 595 old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_match_ht, 596 &encap->linkage, 597 efx_tc_encap_match_ht_params); 598 if (old) { 599 /* don't need our new entry */ 600 kfree(encap); 601 if (pseudo) /* don't need our new pseudo either */ 602 efx_tc_flower_release_encap_match(efx, pseudo); 603 if (IS_ERR(old)) /* oh dear, it's actually an error */ 604 return PTR_ERR(old); 605 /* check old and new em_types are compatible */ 606 switch (old->type) { 607 case EFX_TC_EM_DIRECT: 608 /* old EM is in hardware, so mustn't overlap with a 609 * pseudo, but may be shared with another direct EM 610 */ 611 if (em_type == EFX_TC_EM_DIRECT) 612 break; 613 NL_SET_ERR_MSG_MOD(extack, "Pseudo encap match conflicts with existing direct entry"); 614 return -EEXIST; 615 case EFX_TC_EM_PSEUDO_MASK: 616 /* old EM is protecting a ToS- or src port-qualified 617 * filter, so may only be shared with another pseudo 618 * for the same ToS and src port masks. 619 */ 620 if (em_type != EFX_TC_EM_PSEUDO_MASK) { 621 NL_SET_ERR_MSG_FMT_MOD(extack, 622 "%s encap match conflicts with existing pseudo(MASK) entry", 623 em_type ? "Pseudo" : "Direct"); 624 return -EEXIST; 625 } 626 if (child_ip_tos_mask != old->child_ip_tos_mask) { 627 NL_SET_ERR_MSG_FMT_MOD(extack, 628 "Pseudo encap match for TOS mask %#04x conflicts with existing mask %#04x", 629 child_ip_tos_mask, 630 old->child_ip_tos_mask); 631 return -EEXIST; 632 } 633 if (child_udp_sport_mask != old->child_udp_sport_mask) { 634 NL_SET_ERR_MSG_FMT_MOD(extack, 635 "Pseudo encap match for UDP src port mask %#x conflicts with existing mask %#x", 636 child_udp_sport_mask, 637 old->child_udp_sport_mask); 638 return -EEXIST; 639 } 640 break; 641 case EFX_TC_EM_PSEUDO_OR: 642 /* old EM corresponds to an OR that has to be unique 643 * (it must not overlap with any other OR, whether 644 * direct-EM or pseudo). 645 */ 646 NL_SET_ERR_MSG_FMT_MOD(extack, 647 "%s encap match conflicts with existing pseudo(OR) entry", 648 em_type ? "Pseudo" : "Direct"); 649 return -EEXIST; 650 default: /* Unrecognised pseudo-type. Just say no */ 651 NL_SET_ERR_MSG_FMT_MOD(extack, 652 "%s encap match conflicts with existing pseudo(%d) entry", 653 em_type ? "Pseudo" : "Direct", 654 old->type); 655 return -EEXIST; 656 } 657 /* check old and new tun_types are compatible */ 658 if (old->tun_type != type) { 659 NL_SET_ERR_MSG_FMT_MOD(extack, 660 "Egress encap match with conflicting tun_type %u != %u", 661 old->tun_type, type); 662 return -EEXIST; 663 } 664 if (!refcount_inc_not_zero(&old->ref)) 665 return -EAGAIN; 666 /* existing entry found */ 667 encap = old; 668 } else { 669 if (em_type == EFX_TC_EM_DIRECT) { 670 rc = efx_mae_register_encap_match(efx, encap); 671 if (rc) { 672 NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW"); 673 goto fail; 674 } 675 } 676 refcount_set(&encap->ref, 1); 677 } 678 match->encap = encap; 679 return 0; 680 fail: 681 rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage, 682 efx_tc_encap_match_ht_params); 683 kfree(encap); 684 fail_pseudo: 685 if (pseudo) 686 efx_tc_flower_release_encap_match(efx, pseudo); 687 return rc; 688 } 689 690 static struct efx_tc_recirc_id *efx_tc_get_recirc_id(struct efx_nic *efx, 691 u32 chain_index, 692 struct net_device *net_dev) 693 { 694 struct efx_tc_recirc_id *rid, *old; 695 int rc; 696 697 rid = kzalloc(sizeof(*rid), GFP_USER); 698 if (!rid) 699 return ERR_PTR(-ENOMEM); 700 rid->chain_index = chain_index; 701 /* We don't take a reference here, because it's implied - if there's 702 * a rule on the net_dev that's been offloaded to us, then the net_dev 703 * can't go away until the rule has been deoffloaded. 704 */ 705 rid->net_dev = net_dev; 706 old = rhashtable_lookup_get_insert_fast(&efx->tc->recirc_ht, 707 &rid->linkage, 708 efx_tc_recirc_ht_params); 709 if (old) { 710 /* don't need our new entry */ 711 kfree(rid); 712 if (IS_ERR(old)) /* oh dear, it's actually an error */ 713 return ERR_CAST(old); 714 if (!refcount_inc_not_zero(&old->ref)) 715 return ERR_PTR(-EAGAIN); 716 /* existing entry found */ 717 rid = old; 718 } else { 719 rc = ida_alloc_range(&efx->tc->recirc_ida, 1, U8_MAX, GFP_USER); 720 if (rc < 0) { 721 rhashtable_remove_fast(&efx->tc->recirc_ht, 722 &rid->linkage, 723 efx_tc_recirc_ht_params); 724 kfree(rid); 725 return ERR_PTR(rc); 726 } 727 rid->fw_id = rc; 728 refcount_set(&rid->ref, 1); 729 } 730 return rid; 731 } 732 733 static void efx_tc_put_recirc_id(struct efx_nic *efx, struct efx_tc_recirc_id *rid) 734 { 735 if (!refcount_dec_and_test(&rid->ref)) 736 return; /* still in use */ 737 rhashtable_remove_fast(&efx->tc->recirc_ht, &rid->linkage, 738 efx_tc_recirc_ht_params); 739 ida_free(&efx->tc->recirc_ida, rid->fw_id); 740 kfree(rid); 741 } 742 743 static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule) 744 { 745 efx_mae_delete_rule(efx, rule->fw_id); 746 747 /* Release entries in subsidiary tables */ 748 efx_tc_free_action_set_list(efx, &rule->acts, true); 749 if (rule->match.rid) 750 efx_tc_put_recirc_id(efx, rule->match.rid); 751 if (rule->match.encap) 752 efx_tc_flower_release_encap_match(efx, rule->match.encap); 753 rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; 754 } 755 756 static const char *efx_tc_encap_type_name(enum efx_encap_type typ) 757 { 758 switch (typ) { 759 case EFX_ENCAP_TYPE_NONE: 760 return "none"; 761 case EFX_ENCAP_TYPE_VXLAN: 762 return "vxlan"; 763 case EFX_ENCAP_TYPE_GENEVE: 764 return "geneve"; 765 default: 766 pr_warn_once("Unknown efx_encap_type %d encountered\n", typ); 767 return "unknown"; 768 } 769 } 770 771 /* For details of action order constraints refer to SF-123102-TC-1§12.6.1 */ 772 enum efx_tc_action_order { 773 EFX_TC_AO_DECAP, 774 EFX_TC_AO_DEC_TTL, 775 EFX_TC_AO_PEDIT_MAC_ADDRS, 776 EFX_TC_AO_VLAN_POP, 777 EFX_TC_AO_VLAN_PUSH, 778 EFX_TC_AO_COUNT, 779 EFX_TC_AO_ENCAP, 780 EFX_TC_AO_DELIVER 781 }; 782 /* Determine whether we can add @new action without violating order */ 783 static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act, 784 enum efx_tc_action_order new) 785 { 786 switch (new) { 787 case EFX_TC_AO_DECAP: 788 if (act->decap) 789 return false; 790 /* PEDIT_MAC_ADDRS must not happen before DECAP, though it 791 * can wait until much later 792 */ 793 if (act->dst_mac || act->src_mac) 794 return false; 795 796 /* Decrementing ttl must not happen before DECAP */ 797 if (act->do_ttl_dec) 798 return false; 799 fallthrough; 800 case EFX_TC_AO_VLAN_POP: 801 if (act->vlan_pop >= 2) 802 return false; 803 /* If we've already pushed a VLAN, we can't then pop it; 804 * the hardware would instead try to pop an existing VLAN 805 * before pushing the new one. 806 */ 807 if (act->vlan_push) 808 return false; 809 fallthrough; 810 case EFX_TC_AO_VLAN_PUSH: 811 if (act->vlan_push >= 2) 812 return false; 813 fallthrough; 814 case EFX_TC_AO_COUNT: 815 if (act->count) 816 return false; 817 fallthrough; 818 case EFX_TC_AO_PEDIT_MAC_ADDRS: 819 case EFX_TC_AO_ENCAP: 820 if (act->encap_md) 821 return false; 822 fallthrough; 823 case EFX_TC_AO_DELIVER: 824 return !act->deliver; 825 case EFX_TC_AO_DEC_TTL: 826 if (act->encap_md) 827 return false; 828 return !act->do_ttl_dec; 829 default: 830 /* Bad caller. Whatever they wanted to do, say they can't. */ 831 WARN_ON_ONCE(1); 832 return false; 833 } 834 } 835 836 /** 837 * DOC: TC conntrack sequences 838 * 839 * The MAE hardware can handle at most two rounds of action rule matching, 840 * consequently we support conntrack through the notion of a "left-hand side 841 * rule". This is a rule which typically contains only the actions "ct" and 842 * "goto chain N", and corresponds to one or more "right-hand side rules" in 843 * chain N, which typically match on +trk+est, and may perform ct(nat) actions. 844 * RHS rules go in the Action Rule table as normal but with a nonzero recirc_id 845 * (the hardware equivalent of chain_index), while LHS rules may go in either 846 * the Action Rule or the Outer Rule table, the latter being preferred for 847 * performance reasons, and set both DO_CT and a recirc_id in their response. 848 * 849 * Besides the RHS rules, there are often also similar rules matching on 850 * +trk+new which perform the ct(commit) action. These are not offloaded. 851 */ 852 853 static bool efx_tc_rule_is_lhs_rule(struct flow_rule *fr, 854 struct efx_tc_match *match) 855 { 856 const struct flow_action_entry *fa; 857 int i; 858 859 flow_action_for_each(i, fa, &fr->action) { 860 switch (fa->id) { 861 case FLOW_ACTION_GOTO: 862 return true; 863 case FLOW_ACTION_CT: 864 /* If rule is -trk, or doesn't mention trk at all, then 865 * a CT action implies a conntrack lookup (hence it's an 866 * LHS rule). If rule is +trk, then a CT action could 867 * just be ct(nat) or even ct(commit) (though the latter 868 * can't be offloaded). 869 */ 870 if (!match->mask.ct_state_trk || !match->value.ct_state_trk) 871 return true; 872 break; 873 default: 874 break; 875 } 876 } 877 return false; 878 } 879 880 /* A foreign LHS rule has matches on enc_ keys at the TC layer (including an 881 * implied match on enc_ip_proto UDP). Translate these into non-enc_ keys, 882 * so that we can use the same MAE machinery as local LHS rules (and so that 883 * the lhs_rules entries have uniform semantics). It may seem odd to do it 884 * this way round, given that the corresponding fields in the MAE MCDIs are 885 * all ENC_, but (a) we don't have enc_L2 or enc_ip_proto in struct 886 * efx_tc_match_fields and (b) semantically an LHS rule doesn't have inner 887 * fields so it's just matching on *the* header rather than the outer header. 888 * Make sure that the non-enc_ keys were not already being matched on, as that 889 * would imply a rule that needed a triple lookup. (Hardware can do that, 890 * with OR-AR-CT-AR, but it halves packet rate so we avoid it where possible; 891 * see efx_tc_flower_flhs_needs_ar().) 892 */ 893 static int efx_tc_flower_translate_flhs_match(struct efx_tc_match *match) 894 { 895 int rc = 0; 896 897 #define COPY_MASK_AND_VALUE(_key, _ekey) ({ \ 898 if (match->mask._key) { \ 899 rc = -EOPNOTSUPP; \ 900 } else { \ 901 match->mask._key = match->mask._ekey; \ 902 match->mask._ekey = 0; \ 903 match->value._key = match->value._ekey; \ 904 match->value._ekey = 0; \ 905 } \ 906 rc; \ 907 }) 908 #define COPY_FROM_ENC(_key) COPY_MASK_AND_VALUE(_key, enc_##_key) 909 if (match->mask.ip_proto) 910 return -EOPNOTSUPP; 911 match->mask.ip_proto = ~0; 912 match->value.ip_proto = IPPROTO_UDP; 913 if (COPY_FROM_ENC(src_ip) || COPY_FROM_ENC(dst_ip)) 914 return rc; 915 #ifdef CONFIG_IPV6 916 if (!ipv6_addr_any(&match->mask.src_ip6)) 917 return -EOPNOTSUPP; 918 match->mask.src_ip6 = match->mask.enc_src_ip6; 919 memset(&match->mask.enc_src_ip6, 0, sizeof(struct in6_addr)); 920 if (!ipv6_addr_any(&match->mask.dst_ip6)) 921 return -EOPNOTSUPP; 922 match->mask.dst_ip6 = match->mask.enc_dst_ip6; 923 memset(&match->mask.enc_dst_ip6, 0, sizeof(struct in6_addr)); 924 #endif 925 if (COPY_FROM_ENC(ip_tos) || COPY_FROM_ENC(ip_ttl)) 926 return rc; 927 /* should really copy enc_ip_frag but we don't have that in 928 * parse_match yet 929 */ 930 if (COPY_MASK_AND_VALUE(l4_sport, enc_sport) || 931 COPY_MASK_AND_VALUE(l4_dport, enc_dport)) 932 return rc; 933 return 0; 934 #undef COPY_FROM_ENC 935 #undef COPY_MASK_AND_VALUE 936 } 937 938 /* If a foreign LHS rule wants to match on keys that are only available after 939 * encap header identification and parsing, then it can't be done in the Outer 940 * Rule lookup, because that lookup determines the encap type used to parse 941 * beyond the outer headers. Thus, such rules must use the OR-AR-CT-AR lookup 942 * sequence, with an EM (struct efx_tc_encap_match) in the OR step. 943 * Return true iff the passed match requires this. 944 */ 945 static bool efx_tc_flower_flhs_needs_ar(struct efx_tc_match *match) 946 { 947 /* matches on inner-header keys can't be done in OR */ 948 return match->mask.eth_proto || 949 match->mask.vlan_tci[0] || match->mask.vlan_tci[1] || 950 match->mask.vlan_proto[0] || match->mask.vlan_proto[1] || 951 memchr_inv(match->mask.eth_saddr, 0, ETH_ALEN) || 952 memchr_inv(match->mask.eth_daddr, 0, ETH_ALEN) || 953 match->mask.ip_proto || 954 match->mask.ip_tos || match->mask.ip_ttl || 955 match->mask.src_ip || match->mask.dst_ip || 956 #ifdef CONFIG_IPV6 957 !ipv6_addr_any(&match->mask.src_ip6) || 958 !ipv6_addr_any(&match->mask.dst_ip6) || 959 #endif 960 match->mask.ip_frag || match->mask.ip_firstfrag || 961 match->mask.l4_sport || match->mask.l4_dport || 962 match->mask.tcp_flags || 963 /* nor can VNI */ 964 match->mask.enc_keyid; 965 } 966 967 static int efx_tc_flower_handle_lhs_actions(struct efx_nic *efx, 968 struct flow_cls_offload *tc, 969 struct flow_rule *fr, 970 struct net_device *net_dev, 971 struct efx_tc_lhs_rule *rule) 972 973 { 974 struct netlink_ext_ack *extack = tc->common.extack; 975 struct efx_tc_lhs_action *act = &rule->lhs_act; 976 const struct flow_action_entry *fa; 977 enum efx_tc_counter_type ctype; 978 bool pipe = true; 979 int i; 980 981 ctype = rule->is_ar ? EFX_TC_COUNTER_TYPE_AR : EFX_TC_COUNTER_TYPE_OR; 982 983 flow_action_for_each(i, fa, &fr->action) { 984 struct efx_tc_ct_zone *ct_zone; 985 struct efx_tc_recirc_id *rid; 986 987 if (!pipe) { 988 /* more actions after a non-pipe action */ 989 NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action"); 990 return -EINVAL; 991 } 992 switch (fa->id) { 993 case FLOW_ACTION_GOTO: 994 if (!fa->chain_index) { 995 NL_SET_ERR_MSG_MOD(extack, "Can't goto chain 0, no looping in hw"); 996 return -EOPNOTSUPP; 997 } 998 rid = efx_tc_get_recirc_id(efx, fa->chain_index, 999 net_dev); 1000 if (IS_ERR(rid)) { 1001 NL_SET_ERR_MSG_MOD(extack, "Failed to allocate a hardware recirculation ID for this chain_index"); 1002 return PTR_ERR(rid); 1003 } 1004 act->rid = rid; 1005 if (fa->hw_stats) { 1006 struct efx_tc_counter_index *cnt; 1007 1008 if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) { 1009 NL_SET_ERR_MSG_FMT_MOD(extack, 1010 "hw_stats_type %u not supported (only 'delayed')", 1011 fa->hw_stats); 1012 return -EOPNOTSUPP; 1013 } 1014 cnt = efx_tc_flower_get_counter_index(efx, tc->cookie, 1015 ctype); 1016 if (IS_ERR(cnt)) { 1017 NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter"); 1018 return PTR_ERR(cnt); 1019 } 1020 WARN_ON(act->count); /* can't happen */ 1021 act->count = cnt; 1022 } 1023 pipe = false; 1024 break; 1025 case FLOW_ACTION_CT: 1026 if (act->zone) { 1027 NL_SET_ERR_MSG_MOD(extack, "Can't offload multiple ct actions"); 1028 return -EOPNOTSUPP; 1029 } 1030 if (fa->ct.action & (TCA_CT_ACT_COMMIT | 1031 TCA_CT_ACT_FORCE)) { 1032 NL_SET_ERR_MSG_MOD(extack, "Can't offload ct commit/force"); 1033 return -EOPNOTSUPP; 1034 } 1035 if (fa->ct.action & TCA_CT_ACT_CLEAR) { 1036 NL_SET_ERR_MSG_MOD(extack, "Can't clear ct in LHS rule"); 1037 return -EOPNOTSUPP; 1038 } 1039 if (fa->ct.action & (TCA_CT_ACT_NAT | 1040 TCA_CT_ACT_NAT_SRC | 1041 TCA_CT_ACT_NAT_DST)) { 1042 NL_SET_ERR_MSG_MOD(extack, "Can't perform NAT in LHS rule - packet isn't conntracked yet"); 1043 return -EOPNOTSUPP; 1044 } 1045 if (fa->ct.action) { 1046 NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled ct.action %u for LHS rule\n", 1047 fa->ct.action); 1048 return -EOPNOTSUPP; 1049 } 1050 ct_zone = efx_tc_ct_register_zone(efx, fa->ct.zone, 1051 fa->ct.flow_table); 1052 if (IS_ERR(ct_zone)) { 1053 NL_SET_ERR_MSG_MOD(extack, "Failed to register for CT updates"); 1054 return PTR_ERR(ct_zone); 1055 } 1056 act->zone = ct_zone; 1057 break; 1058 default: 1059 NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u for LHS rule\n", 1060 fa->id); 1061 return -EOPNOTSUPP; 1062 } 1063 } 1064 1065 if (pipe) { 1066 NL_SET_ERR_MSG_MOD(extack, "Missing goto chain in LHS rule"); 1067 return -EOPNOTSUPP; 1068 } 1069 return 0; 1070 } 1071 1072 static void efx_tc_flower_release_lhs_actions(struct efx_nic *efx, 1073 struct efx_tc_lhs_action *act) 1074 { 1075 if (act->rid) 1076 efx_tc_put_recirc_id(efx, act->rid); 1077 if (act->zone) 1078 efx_tc_ct_unregister_zone(efx, act->zone); 1079 if (act->count) 1080 efx_tc_flower_put_counter_index(efx, act->count); 1081 } 1082 1083 /** 1084 * struct efx_tc_mangler_state - accumulates 32-bit pedits into fields 1085 * 1086 * @dst_mac_32: dst_mac[0:3] has been populated 1087 * @dst_mac_16: dst_mac[4:5] has been populated 1088 * @src_mac_16: src_mac[0:1] has been populated 1089 * @src_mac_32: src_mac[2:5] has been populated 1090 * @dst_mac: h_dest field of ethhdr 1091 * @src_mac: h_source field of ethhdr 1092 * 1093 * Since FLOW_ACTION_MANGLE comes in 32-bit chunks that do not 1094 * necessarily equate to whole fields of the packet header, this 1095 * structure is used to hold the cumulative effect of the partial 1096 * field pedits that have been processed so far. 1097 */ 1098 struct efx_tc_mangler_state { 1099 u8 dst_mac_32:1; /* eth->h_dest[0:3] */ 1100 u8 dst_mac_16:1; /* eth->h_dest[4:5] */ 1101 u8 src_mac_16:1; /* eth->h_source[0:1] */ 1102 u8 src_mac_32:1; /* eth->h_source[2:5] */ 1103 unsigned char dst_mac[ETH_ALEN]; 1104 unsigned char src_mac[ETH_ALEN]; 1105 }; 1106 1107 /** efx_tc_complete_mac_mangle() - pull complete field pedits out of @mung 1108 * @efx: NIC we're installing a flow rule on 1109 * @act: action set (cursor) to update 1110 * @mung: accumulated partial mangles 1111 * @extack: netlink extended ack for reporting errors 1112 * 1113 * Check @mung to find any combinations of partial mangles that can be 1114 * combined into a complete packet field edit, add that edit to @act, 1115 * and consume the partial mangles from @mung. 1116 */ 1117 1118 static int efx_tc_complete_mac_mangle(struct efx_nic *efx, 1119 struct efx_tc_action_set *act, 1120 struct efx_tc_mangler_state *mung, 1121 struct netlink_ext_ack *extack) 1122 { 1123 struct efx_tc_mac_pedit_action *ped; 1124 1125 if (mung->dst_mac_32 && mung->dst_mac_16) { 1126 ped = efx_tc_flower_get_mac(efx, mung->dst_mac, extack); 1127 if (IS_ERR(ped)) 1128 return PTR_ERR(ped); 1129 1130 /* Check that we have not already populated dst_mac */ 1131 if (act->dst_mac) 1132 efx_tc_flower_put_mac(efx, act->dst_mac); 1133 1134 act->dst_mac = ped; 1135 1136 /* consume the incomplete state */ 1137 mung->dst_mac_32 = 0; 1138 mung->dst_mac_16 = 0; 1139 } 1140 if (mung->src_mac_16 && mung->src_mac_32) { 1141 ped = efx_tc_flower_get_mac(efx, mung->src_mac, extack); 1142 if (IS_ERR(ped)) 1143 return PTR_ERR(ped); 1144 1145 /* Check that we have not already populated src_mac */ 1146 if (act->src_mac) 1147 efx_tc_flower_put_mac(efx, act->src_mac); 1148 1149 act->src_mac = ped; 1150 1151 /* consume the incomplete state */ 1152 mung->src_mac_32 = 0; 1153 mung->src_mac_16 = 0; 1154 } 1155 return 0; 1156 } 1157 1158 static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act, 1159 const struct flow_action_entry *fa, 1160 struct netlink_ext_ack *extack) 1161 { 1162 switch (fa->mangle.htype) { 1163 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 1164 switch (fa->mangle.offset) { 1165 case offsetof(struct iphdr, ttl): 1166 /* check that pedit applies to ttl only */ 1167 if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK) 1168 break; 1169 1170 /* Adding 0xff is equivalent to decrementing the ttl. 1171 * Other added values are not supported. 1172 */ 1173 if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) != U8_MAX) 1174 break; 1175 1176 /* check that we do not decrement ttl twice */ 1177 if (!efx_tc_flower_action_order_ok(act, 1178 EFX_TC_AO_DEC_TTL)) { 1179 NL_SET_ERR_MSG_MOD(extack, "multiple dec ttl are not supported"); 1180 return -EOPNOTSUPP; 1181 } 1182 act->do_ttl_dec = 1; 1183 return 0; 1184 default: 1185 break; 1186 } 1187 break; 1188 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 1189 switch (fa->mangle.offset) { 1190 case round_down(offsetof(struct ipv6hdr, hop_limit), 4): 1191 /* check that pedit applies to hoplimit only */ 1192 if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK) 1193 break; 1194 1195 /* Adding 0xff is equivalent to decrementing the hoplimit. 1196 * Other added values are not supported. 1197 */ 1198 if ((fa->mangle.val >> 24) != U8_MAX) 1199 break; 1200 1201 /* check that we do not decrement hoplimit twice */ 1202 if (!efx_tc_flower_action_order_ok(act, 1203 EFX_TC_AO_DEC_TTL)) { 1204 NL_SET_ERR_MSG_MOD(extack, "multiple dec ttl are not supported"); 1205 return -EOPNOTSUPP; 1206 } 1207 act->do_ttl_dec = 1; 1208 return 0; 1209 default: 1210 break; 1211 } 1212 break; 1213 default: 1214 break; 1215 } 1216 1217 NL_SET_ERR_MSG_FMT_MOD(extack, 1218 "ttl add action type %x %x %x/%x is not supported", 1219 fa->mangle.htype, fa->mangle.offset, 1220 fa->mangle.val, fa->mangle.mask); 1221 return -EOPNOTSUPP; 1222 } 1223 1224 /** 1225 * efx_tc_mangle() - handle a single 32-bit (or less) pedit 1226 * @efx: NIC we're installing a flow rule on 1227 * @act: action set (cursor) to update 1228 * @fa: FLOW_ACTION_MANGLE action metadata 1229 * @mung: accumulator for partial mangles 1230 * @extack: netlink extended ack for reporting errors 1231 * @match: original match used along with the mangle action 1232 * 1233 * Identify the fields written by a FLOW_ACTION_MANGLE, and record 1234 * the partial mangle state in @mung. If this mangle completes an 1235 * earlier partial mangle, consume and apply to @act by calling 1236 * efx_tc_complete_mac_mangle(). 1237 */ 1238 1239 static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, 1240 const struct flow_action_entry *fa, 1241 struct efx_tc_mangler_state *mung, 1242 struct netlink_ext_ack *extack, 1243 struct efx_tc_match *match) 1244 { 1245 __le32 mac32; 1246 __le16 mac16; 1247 u8 tr_ttl; 1248 1249 switch (fa->mangle.htype) { 1250 case FLOW_ACT_MANGLE_HDR_TYPE_ETH: 1251 BUILD_BUG_ON(offsetof(struct ethhdr, h_dest) != 0); 1252 BUILD_BUG_ON(offsetof(struct ethhdr, h_source) != 6); 1253 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_PEDIT_MAC_ADDRS)) { 1254 NL_SET_ERR_MSG_MOD(extack, 1255 "Pedit mangle mac action violates action order"); 1256 return -EOPNOTSUPP; 1257 } 1258 switch (fa->mangle.offset) { 1259 case 0: 1260 if (fa->mangle.mask) { 1261 NL_SET_ERR_MSG_FMT_MOD(extack, 1262 "mask (%#x) of eth.dst32 mangle is not supported", 1263 fa->mangle.mask); 1264 return -EOPNOTSUPP; 1265 } 1266 /* Ethernet address is little-endian */ 1267 mac32 = cpu_to_le32(fa->mangle.val); 1268 memcpy(mung->dst_mac, &mac32, sizeof(mac32)); 1269 mung->dst_mac_32 = 1; 1270 return efx_tc_complete_mac_mangle(efx, act, mung, extack); 1271 case 4: 1272 if (fa->mangle.mask == 0xffff) { 1273 mac16 = cpu_to_le16(fa->mangle.val >> 16); 1274 memcpy(mung->src_mac, &mac16, sizeof(mac16)); 1275 mung->src_mac_16 = 1; 1276 } else if (fa->mangle.mask == 0xffff0000) { 1277 mac16 = cpu_to_le16((u16)fa->mangle.val); 1278 memcpy(mung->dst_mac + 4, &mac16, sizeof(mac16)); 1279 mung->dst_mac_16 = 1; 1280 } else { 1281 NL_SET_ERR_MSG_FMT_MOD(extack, 1282 "mask (%#x) of eth+4 mangle is not high or low 16b", 1283 fa->mangle.mask); 1284 return -EOPNOTSUPP; 1285 } 1286 return efx_tc_complete_mac_mangle(efx, act, mung, extack); 1287 case 8: 1288 if (fa->mangle.mask) { 1289 NL_SET_ERR_MSG_FMT_MOD(extack, 1290 "mask (%#x) of eth.src32 mangle is not supported", 1291 fa->mangle.mask); 1292 return -EOPNOTSUPP; 1293 } 1294 mac32 = cpu_to_le32(fa->mangle.val); 1295 memcpy(mung->src_mac + 2, &mac32, sizeof(mac32)); 1296 mung->src_mac_32 = 1; 1297 return efx_tc_complete_mac_mangle(efx, act, mung, extack); 1298 default: 1299 NL_SET_ERR_MSG_FMT_MOD(extack, "mangle eth+%u %x/%x is not supported", 1300 fa->mangle.offset, fa->mangle.val, fa->mangle.mask); 1301 return -EOPNOTSUPP; 1302 } 1303 break; 1304 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 1305 switch (fa->mangle.offset) { 1306 case offsetof(struct iphdr, ttl): 1307 /* we currently only support pedit IP4 when it applies 1308 * to TTL and then only when it can be achieved with a 1309 * decrement ttl action 1310 */ 1311 1312 /* check that pedit applies to ttl only */ 1313 if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK) { 1314 NL_SET_ERR_MSG_FMT_MOD(extack, 1315 "mask (%#x) out of range, only support mangle action on ipv4.ttl", 1316 fa->mangle.mask); 1317 return -EOPNOTSUPP; 1318 } 1319 1320 /* we can only convert to a dec ttl when we have an 1321 * exact match on the ttl field 1322 */ 1323 if (match->mask.ip_ttl != U8_MAX) { 1324 NL_SET_ERR_MSG_FMT_MOD(extack, 1325 "only support mangle ttl when we have an exact match, current mask (%#x)", 1326 match->mask.ip_ttl); 1327 return -EOPNOTSUPP; 1328 } 1329 1330 /* check that we don't try to decrement 0, which equates 1331 * to setting the ttl to 0xff 1332 */ 1333 if (match->value.ip_ttl == 0) { 1334 NL_SET_ERR_MSG_MOD(extack, 1335 "decrement ttl past 0 is not supported"); 1336 return -EOPNOTSUPP; 1337 } 1338 1339 /* check that we do not decrement ttl twice */ 1340 if (!efx_tc_flower_action_order_ok(act, 1341 EFX_TC_AO_DEC_TTL)) { 1342 NL_SET_ERR_MSG_MOD(extack, 1343 "multiple dec ttl is not supported"); 1344 return -EOPNOTSUPP; 1345 } 1346 1347 /* check pedit can be achieved with decrement action */ 1348 tr_ttl = match->value.ip_ttl - 1; 1349 if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) == tr_ttl) { 1350 act->do_ttl_dec = 1; 1351 return 0; 1352 } 1353 1354 fallthrough; 1355 default: 1356 NL_SET_ERR_MSG_FMT_MOD(extack, 1357 "only support mangle on the ttl field (offset is %u)", 1358 fa->mangle.offset); 1359 return -EOPNOTSUPP; 1360 } 1361 break; 1362 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 1363 switch (fa->mangle.offset) { 1364 case round_down(offsetof(struct ipv6hdr, hop_limit), 4): 1365 /* we currently only support pedit IP6 when it applies 1366 * to the hoplimit and then only when it can be achieved 1367 * with a decrement hoplimit action 1368 */ 1369 1370 /* check that pedit applies to ttl only */ 1371 if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK) { 1372 NL_SET_ERR_MSG_FMT_MOD(extack, 1373 "mask (%#x) out of range, only support mangle action on ipv6.hop_limit", 1374 fa->mangle.mask); 1375 1376 return -EOPNOTSUPP; 1377 } 1378 1379 /* we can only convert to a dec ttl when we have an 1380 * exact match on the ttl field 1381 */ 1382 if (match->mask.ip_ttl != U8_MAX) { 1383 NL_SET_ERR_MSG_FMT_MOD(extack, 1384 "only support hop_limit when we have an exact match, current mask (%#x)", 1385 match->mask.ip_ttl); 1386 return -EOPNOTSUPP; 1387 } 1388 1389 /* check that we don't try to decrement 0, which equates 1390 * to setting the ttl to 0xff 1391 */ 1392 if (match->value.ip_ttl == 0) { 1393 NL_SET_ERR_MSG_MOD(extack, 1394 "decrementing hop_limit past 0 is not supported"); 1395 return -EOPNOTSUPP; 1396 } 1397 1398 /* check that we do not decrement hoplimit twice */ 1399 if (!efx_tc_flower_action_order_ok(act, 1400 EFX_TC_AO_DEC_TTL)) { 1401 NL_SET_ERR_MSG_MOD(extack, 1402 "multiple dec ttl is not supported"); 1403 return -EOPNOTSUPP; 1404 } 1405 1406 /* check pedit can be achieved with decrement action */ 1407 tr_ttl = match->value.ip_ttl - 1; 1408 if ((fa->mangle.val >> 24) == tr_ttl) { 1409 act->do_ttl_dec = 1; 1410 return 0; 1411 } 1412 1413 fallthrough; 1414 default: 1415 NL_SET_ERR_MSG_FMT_MOD(extack, 1416 "only support mangle on the hop_limit field"); 1417 return -EOPNOTSUPP; 1418 } 1419 default: 1420 NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled mangle htype %u for action rule", 1421 fa->mangle.htype); 1422 return -EOPNOTSUPP; 1423 } 1424 return 0; 1425 } 1426 1427 /** 1428 * efx_tc_incomplete_mangle() - check for leftover partial pedits 1429 * @mung: accumulator for partial mangles 1430 * @extack: netlink extended ack for reporting errors 1431 * 1432 * Since the MAE can only overwrite whole fields, any partial 1433 * field mangle left over on reaching packet delivery (mirred or 1434 * end of TC actions) cannot be offloaded. Check for any such 1435 * and reject them with -%EOPNOTSUPP. 1436 */ 1437 1438 static int efx_tc_incomplete_mangle(struct efx_tc_mangler_state *mung, 1439 struct netlink_ext_ack *extack) 1440 { 1441 if (mung->dst_mac_32 || mung->dst_mac_16) { 1442 NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of destination MAC address"); 1443 return -EOPNOTSUPP; 1444 } 1445 if (mung->src_mac_16 || mung->src_mac_32) { 1446 NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of source MAC address"); 1447 return -EOPNOTSUPP; 1448 } 1449 return 0; 1450 } 1451 1452 static int efx_tc_flower_replace_foreign_lhs_ar(struct efx_nic *efx, 1453 struct flow_cls_offload *tc, 1454 struct flow_rule *fr, 1455 struct efx_tc_match *match, 1456 struct net_device *net_dev) 1457 { 1458 struct netlink_ext_ack *extack = tc->common.extack; 1459 struct efx_tc_lhs_rule *rule, *old; 1460 enum efx_encap_type type; 1461 int rc; 1462 1463 type = efx_tc_indr_netdev_type(net_dev); 1464 if (type == EFX_ENCAP_TYPE_NONE) { 1465 NL_SET_ERR_MSG_MOD(extack, "Egress encap match on unsupported tunnel device"); 1466 return -EOPNOTSUPP; 1467 } 1468 1469 rc = efx_mae_check_encap_type_supported(efx, type); 1470 if (rc) { 1471 NL_SET_ERR_MSG_FMT_MOD(extack, 1472 "Firmware reports no support for %s encap match", 1473 efx_tc_encap_type_name(type)); 1474 return rc; 1475 } 1476 /* This is an Action Rule, so it needs a separate Encap Match in the 1477 * Outer Rule table. Insert that now. 1478 */ 1479 rc = efx_tc_flower_record_encap_match(efx, match, type, 1480 EFX_TC_EM_DIRECT, 0, 0, extack); 1481 if (rc) 1482 return rc; 1483 1484 match->mask.recirc_id = 0xff; 1485 if (match->mask.ct_state_trk && match->value.ct_state_trk) { 1486 NL_SET_ERR_MSG_MOD(extack, "LHS rule can never match +trk"); 1487 rc = -EOPNOTSUPP; 1488 goto release_encap_match; 1489 } 1490 /* LHS rules are always -trk, so we don't need to match on that */ 1491 match->mask.ct_state_trk = 0; 1492 match->value.ct_state_trk = 0; 1493 /* We must inhibit match on TCP SYN/FIN/RST, so that SW can see 1494 * the packet and update the conntrack table. 1495 * Outer Rules will do that with CT_TCP_FLAGS_INHIBIT, but Action 1496 * Rules don't have that; instead they support matching on 1497 * TCP_SYN_FIN_RST (aka TCP_INTERESTING_FLAGS), so use that. 1498 * This is only strictly needed if there will be a DO_CT action, 1499 * which we don't know yet, but typically there will be and it's 1500 * simpler not to bother checking here. 1501 */ 1502 match->mask.tcp_syn_fin_rst = true; 1503 1504 rc = efx_mae_match_check_caps(efx, &match->mask, extack); 1505 if (rc) 1506 goto release_encap_match; 1507 1508 rule = kzalloc(sizeof(*rule), GFP_USER); 1509 if (!rule) { 1510 rc = -ENOMEM; 1511 goto release_encap_match; 1512 } 1513 rule->cookie = tc->cookie; 1514 rule->is_ar = true; 1515 old = rhashtable_lookup_get_insert_fast(&efx->tc->lhs_rule_ht, 1516 &rule->linkage, 1517 efx_tc_lhs_rule_ht_params); 1518 if (old) { 1519 netif_dbg(efx, drv, efx->net_dev, 1520 "Already offloaded rule (cookie %lx)\n", tc->cookie); 1521 rc = -EEXIST; 1522 NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded"); 1523 goto release; 1524 } 1525 1526 /* Parse actions */ 1527 rc = efx_tc_flower_handle_lhs_actions(efx, tc, fr, net_dev, rule); 1528 if (rc) 1529 goto release; 1530 1531 rule->match = *match; 1532 rule->lhs_act.tun_type = type; 1533 1534 rc = efx_mae_insert_lhs_rule(efx, rule, EFX_TC_PRIO_TC); 1535 if (rc) { 1536 NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw"); 1537 goto release; 1538 } 1539 netif_dbg(efx, drv, efx->net_dev, 1540 "Successfully parsed lhs rule (cookie %lx)\n", 1541 tc->cookie); 1542 return 0; 1543 1544 release: 1545 efx_tc_flower_release_lhs_actions(efx, &rule->lhs_act); 1546 if (!old) 1547 rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &rule->linkage, 1548 efx_tc_lhs_rule_ht_params); 1549 kfree(rule); 1550 release_encap_match: 1551 if (match->encap) 1552 efx_tc_flower_release_encap_match(efx, match->encap); 1553 return rc; 1554 } 1555 1556 static int efx_tc_flower_replace_foreign_lhs(struct efx_nic *efx, 1557 struct flow_cls_offload *tc, 1558 struct flow_rule *fr, 1559 struct efx_tc_match *match, 1560 struct net_device *net_dev) 1561 { 1562 struct netlink_ext_ack *extack = tc->common.extack; 1563 struct efx_tc_lhs_rule *rule, *old; 1564 enum efx_encap_type type; 1565 int rc; 1566 1567 if (tc->common.chain_index) { 1568 NL_SET_ERR_MSG_MOD(extack, "LHS rule only allowed in chain 0"); 1569 return -EOPNOTSUPP; 1570 } 1571 1572 if (!efx_tc_match_is_encap(&match->mask)) { 1573 /* This is not a tunnel decap rule, ignore it */ 1574 netif_dbg(efx, drv, efx->net_dev, "Ignoring foreign LHS filter without encap match\n"); 1575 return -EOPNOTSUPP; 1576 } 1577 1578 if (efx_tc_flower_flhs_needs_ar(match)) 1579 return efx_tc_flower_replace_foreign_lhs_ar(efx, tc, fr, match, 1580 net_dev); 1581 1582 type = efx_tc_indr_netdev_type(net_dev); 1583 if (type == EFX_ENCAP_TYPE_NONE) { 1584 NL_SET_ERR_MSG_MOD(extack, "Egress encap match on unsupported tunnel device\n"); 1585 return -EOPNOTSUPP; 1586 } 1587 1588 rc = efx_mae_check_encap_type_supported(efx, type); 1589 if (rc) { 1590 NL_SET_ERR_MSG_FMT_MOD(extack, 1591 "Firmware reports no support for %s encap match", 1592 efx_tc_encap_type_name(type)); 1593 return rc; 1594 } 1595 /* Reserve the outer tuple with a pseudo Encap Match */ 1596 rc = efx_tc_flower_record_encap_match(efx, match, type, 1597 EFX_TC_EM_PSEUDO_OR, 0, 0, 1598 extack); 1599 if (rc) 1600 return rc; 1601 1602 if (match->mask.ct_state_trk && match->value.ct_state_trk) { 1603 NL_SET_ERR_MSG_MOD(extack, "LHS rule can never match +trk"); 1604 rc = -EOPNOTSUPP; 1605 goto release_encap_match; 1606 } 1607 /* LHS rules are always -trk, so we don't need to match on that */ 1608 match->mask.ct_state_trk = 0; 1609 match->value.ct_state_trk = 0; 1610 1611 rc = efx_tc_flower_translate_flhs_match(match); 1612 if (rc) { 1613 NL_SET_ERR_MSG_MOD(extack, "LHS rule cannot match on inner fields"); 1614 goto release_encap_match; 1615 } 1616 1617 rc = efx_mae_match_check_caps_lhs(efx, &match->mask, extack); 1618 if (rc) 1619 goto release_encap_match; 1620 1621 rule = kzalloc(sizeof(*rule), GFP_USER); 1622 if (!rule) { 1623 rc = -ENOMEM; 1624 goto release_encap_match; 1625 } 1626 rule->cookie = tc->cookie; 1627 old = rhashtable_lookup_get_insert_fast(&efx->tc->lhs_rule_ht, 1628 &rule->linkage, 1629 efx_tc_lhs_rule_ht_params); 1630 if (old) { 1631 netif_dbg(efx, drv, efx->net_dev, 1632 "Already offloaded rule (cookie %lx)\n", tc->cookie); 1633 rc = -EEXIST; 1634 NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded"); 1635 goto release; 1636 } 1637 1638 /* Parse actions */ 1639 rc = efx_tc_flower_handle_lhs_actions(efx, tc, fr, net_dev, rule); 1640 if (rc) 1641 goto release; 1642 1643 rule->match = *match; 1644 rule->lhs_act.tun_type = type; 1645 1646 rc = efx_mae_insert_lhs_rule(efx, rule, EFX_TC_PRIO_TC); 1647 if (rc) { 1648 NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw"); 1649 goto release; 1650 } 1651 netif_dbg(efx, drv, efx->net_dev, 1652 "Successfully parsed lhs rule (cookie %lx)\n", 1653 tc->cookie); 1654 return 0; 1655 1656 release: 1657 efx_tc_flower_release_lhs_actions(efx, &rule->lhs_act); 1658 if (!old) 1659 rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &rule->linkage, 1660 efx_tc_lhs_rule_ht_params); 1661 kfree(rule); 1662 release_encap_match: 1663 if (match->encap) 1664 efx_tc_flower_release_encap_match(efx, match->encap); 1665 return rc; 1666 } 1667 1668 static int efx_tc_flower_replace_foreign(struct efx_nic *efx, 1669 struct net_device *net_dev, 1670 struct flow_cls_offload *tc) 1671 { 1672 struct flow_rule *fr = flow_cls_offload_flow_rule(tc); 1673 struct netlink_ext_ack *extack = tc->common.extack; 1674 struct efx_tc_flow_rule *rule = NULL, *old = NULL; 1675 struct efx_tc_action_set *act = NULL; 1676 bool found = false, uplinked = false; 1677 const struct flow_action_entry *fa; 1678 struct efx_tc_match match; 1679 struct efx_rep *to_efv; 1680 s64 rc; 1681 int i; 1682 1683 /* Parse match */ 1684 memset(&match, 0, sizeof(match)); 1685 rc = efx_tc_flower_parse_match(efx, fr, &match, extack); 1686 if (rc) 1687 return rc; 1688 /* The rule as given to us doesn't specify a source netdevice. 1689 * But, determining whether packets from a VF should match it is 1690 * complicated, so leave those to the software slowpath: qualify 1691 * the filter with source m-port == wire. 1692 */ 1693 rc = efx_tc_flower_external_mport(efx, EFX_EFV_PF); 1694 if (rc < 0) { 1695 NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port for foreign filter"); 1696 return rc; 1697 } 1698 match.value.ingress_port = rc; 1699 match.mask.ingress_port = ~0; 1700 1701 if (efx_tc_rule_is_lhs_rule(fr, &match)) 1702 return efx_tc_flower_replace_foreign_lhs(efx, tc, fr, &match, 1703 net_dev); 1704 1705 if (tc->common.chain_index) { 1706 struct efx_tc_recirc_id *rid; 1707 1708 rid = efx_tc_get_recirc_id(efx, tc->common.chain_index, net_dev); 1709 if (IS_ERR(rid)) { 1710 NL_SET_ERR_MSG_FMT_MOD(extack, 1711 "Failed to allocate a hardware recirculation ID for chain_index %u", 1712 tc->common.chain_index); 1713 return PTR_ERR(rid); 1714 } 1715 match.rid = rid; 1716 match.value.recirc_id = rid->fw_id; 1717 } 1718 match.mask.recirc_id = 0xff; 1719 1720 /* AR table can't match on DO_CT (+trk). But a commonly used pattern is 1721 * +trk+est, which is strictly implied by +est, so rewrite it to that. 1722 */ 1723 if (match.mask.ct_state_trk && match.value.ct_state_trk && 1724 match.mask.ct_state_est && match.value.ct_state_est) 1725 match.mask.ct_state_trk = 0; 1726 /* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could 1727 * match +trk-est (CT_HIT=0) despite being on an established connection. 1728 * So make -est imply -tcp_syn_fin_rst match to ensure these packets 1729 * still hit the software path. 1730 */ 1731 if (match.mask.ct_state_est && !match.value.ct_state_est) { 1732 if (match.value.tcp_syn_fin_rst) { 1733 /* Can't offload this combination */ 1734 NL_SET_ERR_MSG_MOD(extack, "TCP flags and -est conflict for offload"); 1735 rc = -EOPNOTSUPP; 1736 goto release; 1737 } 1738 match.mask.tcp_syn_fin_rst = true; 1739 } 1740 1741 flow_action_for_each(i, fa, &fr->action) { 1742 switch (fa->id) { 1743 case FLOW_ACTION_REDIRECT: 1744 case FLOW_ACTION_MIRRED: /* mirred means mirror here */ 1745 to_efv = efx_tc_flower_lookup_efv(efx, fa->dev); 1746 if (IS_ERR(to_efv)) 1747 continue; 1748 found = true; 1749 break; 1750 default: 1751 break; 1752 } 1753 } 1754 if (!found) { /* We don't care. */ 1755 netif_dbg(efx, drv, efx->net_dev, 1756 "Ignoring foreign filter that doesn't egdev us\n"); 1757 rc = -EOPNOTSUPP; 1758 goto release; 1759 } 1760 1761 rc = efx_mae_match_check_caps(efx, &match.mask, extack); 1762 if (rc) 1763 goto release; 1764 1765 if (efx_tc_match_is_encap(&match.mask)) { 1766 enum efx_encap_type type; 1767 1768 type = efx_tc_indr_netdev_type(net_dev); 1769 if (type == EFX_ENCAP_TYPE_NONE) { 1770 NL_SET_ERR_MSG_MOD(extack, 1771 "Egress encap match on unsupported tunnel device"); 1772 rc = -EOPNOTSUPP; 1773 goto release; 1774 } 1775 1776 rc = efx_mae_check_encap_type_supported(efx, type); 1777 if (rc) { 1778 NL_SET_ERR_MSG_FMT_MOD(extack, 1779 "Firmware reports no support for %s encap match", 1780 efx_tc_encap_type_name(type)); 1781 goto release; 1782 } 1783 1784 rc = efx_tc_flower_record_encap_match(efx, &match, type, 1785 EFX_TC_EM_DIRECT, 0, 0, 1786 extack); 1787 if (rc) 1788 goto release; 1789 } else if (!tc->common.chain_index) { 1790 /* This is not a tunnel decap rule, ignore it */ 1791 netif_dbg(efx, drv, efx->net_dev, 1792 "Ignoring foreign filter without encap match\n"); 1793 rc = -EOPNOTSUPP; 1794 goto release; 1795 } 1796 1797 rule = kzalloc(sizeof(*rule), GFP_USER); 1798 if (!rule) { 1799 rc = -ENOMEM; 1800 goto release; 1801 } 1802 INIT_LIST_HEAD(&rule->acts.list); 1803 rule->cookie = tc->cookie; 1804 old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht, 1805 &rule->linkage, 1806 efx_tc_match_action_ht_params); 1807 if (IS_ERR(old)) { 1808 rc = PTR_ERR(old); 1809 goto release; 1810 } else if (old) { 1811 netif_dbg(efx, drv, efx->net_dev, 1812 "Ignoring already-offloaded rule (cookie %lx)\n", 1813 tc->cookie); 1814 rc = -EEXIST; 1815 goto release; 1816 } 1817 1818 act = kzalloc(sizeof(*act), GFP_USER); 1819 if (!act) { 1820 rc = -ENOMEM; 1821 goto release; 1822 } 1823 1824 /* Parse actions. For foreign rules we only support decap & redirect. 1825 * See corresponding code in efx_tc_flower_replace() for theory of 1826 * operation & how 'act' cursor is used. 1827 */ 1828 flow_action_for_each(i, fa, &fr->action) { 1829 struct efx_tc_action_set save; 1830 1831 switch (fa->id) { 1832 case FLOW_ACTION_REDIRECT: 1833 case FLOW_ACTION_MIRRED: 1834 /* See corresponding code in efx_tc_flower_replace() for 1835 * long explanations of what's going on here. 1836 */ 1837 save = *act; 1838 if (fa->hw_stats) { 1839 struct efx_tc_counter_index *ctr; 1840 1841 if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) { 1842 NL_SET_ERR_MSG_FMT_MOD(extack, 1843 "hw_stats_type %u not supported (only 'delayed')", 1844 fa->hw_stats); 1845 rc = -EOPNOTSUPP; 1846 goto release; 1847 } 1848 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) { 1849 NL_SET_ERR_MSG_MOD(extack, "Count action violates action order (can't happen)"); 1850 rc = -EOPNOTSUPP; 1851 goto release; 1852 } 1853 1854 ctr = efx_tc_flower_get_counter_index(efx, 1855 tc->cookie, 1856 EFX_TC_COUNTER_TYPE_AR); 1857 if (IS_ERR(ctr)) { 1858 rc = PTR_ERR(ctr); 1859 NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter"); 1860 goto release; 1861 } 1862 act->count = ctr; 1863 INIT_LIST_HEAD(&act->count_user); 1864 } 1865 1866 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) { 1867 /* can't happen */ 1868 rc = -EOPNOTSUPP; 1869 NL_SET_ERR_MSG_MOD(extack, 1870 "Deliver action violates action order (can't happen)"); 1871 goto release; 1872 } 1873 to_efv = efx_tc_flower_lookup_efv(efx, fa->dev); 1874 /* PF implies egdev is us, in which case we really 1875 * want to deliver to the uplink (because this is an 1876 * ingress filter). If we don't recognise the egdev 1877 * at all, then we'd better trap so SW can handle it. 1878 */ 1879 if (IS_ERR(to_efv)) 1880 to_efv = EFX_EFV_PF; 1881 if (to_efv == EFX_EFV_PF) { 1882 if (uplinked) 1883 break; 1884 uplinked = true; 1885 } 1886 rc = efx_tc_flower_internal_mport(efx, to_efv); 1887 if (rc < 0) { 1888 NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port"); 1889 goto release; 1890 } 1891 act->dest_mport = rc; 1892 act->deliver = 1; 1893 rc = efx_mae_alloc_action_set(efx, act); 1894 if (rc) { 1895 NL_SET_ERR_MSG_MOD(extack, 1896 "Failed to write action set to hw (mirred)"); 1897 goto release; 1898 } 1899 list_add_tail(&act->list, &rule->acts.list); 1900 act = NULL; 1901 if (fa->id == FLOW_ACTION_REDIRECT) 1902 break; /* end of the line */ 1903 /* Mirror, so continue on with saved act */ 1904 act = kzalloc(sizeof(*act), GFP_USER); 1905 if (!act) { 1906 rc = -ENOMEM; 1907 goto release; 1908 } 1909 *act = save; 1910 break; 1911 case FLOW_ACTION_TUNNEL_DECAP: 1912 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DECAP)) { 1913 rc = -EINVAL; 1914 NL_SET_ERR_MSG_MOD(extack, "Decap action violates action order"); 1915 goto release; 1916 } 1917 act->decap = 1; 1918 /* If we previously delivered/trapped to uplink, now 1919 * that we've decapped we'll want another copy if we 1920 * try to deliver/trap to uplink again. 1921 */ 1922 uplinked = false; 1923 break; 1924 default: 1925 NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u", 1926 fa->id); 1927 rc = -EOPNOTSUPP; 1928 goto release; 1929 } 1930 } 1931 1932 if (act) { 1933 if (!uplinked) { 1934 /* Not shot/redirected, so deliver to default dest (which is 1935 * the uplink, as this is an ingress filter) 1936 */ 1937 efx_mae_mport_uplink(efx, &act->dest_mport); 1938 act->deliver = 1; 1939 } 1940 rc = efx_mae_alloc_action_set(efx, act); 1941 if (rc) { 1942 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)"); 1943 goto release; 1944 } 1945 list_add_tail(&act->list, &rule->acts.list); 1946 act = NULL; /* Prevent double-free in error path */ 1947 } 1948 1949 rule->match = match; 1950 1951 netif_dbg(efx, drv, efx->net_dev, 1952 "Successfully parsed foreign filter (cookie %lx)\n", 1953 tc->cookie); 1954 1955 rc = efx_mae_alloc_action_set_list(efx, &rule->acts); 1956 if (rc) { 1957 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw"); 1958 goto release; 1959 } 1960 rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC, 1961 rule->acts.fw_id, &rule->fw_id); 1962 if (rc) { 1963 NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw"); 1964 goto release_acts; 1965 } 1966 return 0; 1967 1968 release_acts: 1969 efx_mae_free_action_set_list(efx, &rule->acts); 1970 release: 1971 /* We failed to insert the rule, so free up any entries we created in 1972 * subsidiary tables. 1973 */ 1974 if (match.rid) 1975 efx_tc_put_recirc_id(efx, match.rid); 1976 if (act) 1977 efx_tc_free_action_set(efx, act, false); 1978 if (rule) { 1979 if (!old) 1980 rhashtable_remove_fast(&efx->tc->match_action_ht, 1981 &rule->linkage, 1982 efx_tc_match_action_ht_params); 1983 efx_tc_free_action_set_list(efx, &rule->acts, false); 1984 } 1985 kfree(rule); 1986 if (match.encap) 1987 efx_tc_flower_release_encap_match(efx, match.encap); 1988 return rc; 1989 } 1990 1991 static int efx_tc_flower_replace_lhs(struct efx_nic *efx, 1992 struct flow_cls_offload *tc, 1993 struct flow_rule *fr, 1994 struct efx_tc_match *match, 1995 struct efx_rep *efv, 1996 struct net_device *net_dev) 1997 { 1998 struct netlink_ext_ack *extack = tc->common.extack; 1999 struct efx_tc_lhs_rule *rule, *old; 2000 int rc; 2001 2002 if (tc->common.chain_index) { 2003 NL_SET_ERR_MSG_MOD(extack, "LHS rule only allowed in chain 0"); 2004 return -EOPNOTSUPP; 2005 } 2006 2007 if (match->mask.ct_state_trk && match->value.ct_state_trk) { 2008 NL_SET_ERR_MSG_MOD(extack, "LHS rule can never match +trk"); 2009 return -EOPNOTSUPP; 2010 } 2011 /* LHS rules are always -trk, so we don't need to match on that */ 2012 match->mask.ct_state_trk = 0; 2013 match->value.ct_state_trk = 0; 2014 2015 rc = efx_mae_match_check_caps_lhs(efx, &match->mask, extack); 2016 if (rc) 2017 return rc; 2018 2019 rule = kzalloc(sizeof(*rule), GFP_USER); 2020 if (!rule) 2021 return -ENOMEM; 2022 rule->cookie = tc->cookie; 2023 old = rhashtable_lookup_get_insert_fast(&efx->tc->lhs_rule_ht, 2024 &rule->linkage, 2025 efx_tc_lhs_rule_ht_params); 2026 if (IS_ERR(old)) { 2027 rc = PTR_ERR(old); 2028 goto release; 2029 } else if (old) { 2030 netif_dbg(efx, drv, efx->net_dev, 2031 "Already offloaded rule (cookie %lx)\n", tc->cookie); 2032 rc = -EEXIST; 2033 NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded"); 2034 goto release; 2035 } 2036 2037 /* Parse actions */ 2038 /* See note in efx_tc_flower_replace() regarding passed net_dev 2039 * (used for efx_tc_get_recirc_id()). 2040 */ 2041 rc = efx_tc_flower_handle_lhs_actions(efx, tc, fr, efx->net_dev, rule); 2042 if (rc) 2043 goto release; 2044 2045 rule->match = *match; 2046 2047 rc = efx_mae_insert_lhs_rule(efx, rule, EFX_TC_PRIO_TC); 2048 if (rc) { 2049 NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw"); 2050 goto release; 2051 } 2052 netif_dbg(efx, drv, efx->net_dev, 2053 "Successfully parsed lhs rule (cookie %lx)\n", 2054 tc->cookie); 2055 return 0; 2056 2057 release: 2058 efx_tc_flower_release_lhs_actions(efx, &rule->lhs_act); 2059 if (!old) 2060 rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &rule->linkage, 2061 efx_tc_lhs_rule_ht_params); 2062 kfree(rule); 2063 return rc; 2064 } 2065 2066 static int efx_tc_flower_replace(struct efx_nic *efx, 2067 struct net_device *net_dev, 2068 struct flow_cls_offload *tc, 2069 struct efx_rep *efv) 2070 { 2071 struct flow_rule *fr = flow_cls_offload_flow_rule(tc); 2072 struct netlink_ext_ack *extack = tc->common.extack; 2073 const struct ip_tunnel_info *encap_info = NULL; 2074 struct efx_tc_flow_rule *rule = NULL, *old; 2075 struct efx_tc_mangler_state mung = {}; 2076 struct efx_tc_action_set *act = NULL; 2077 const struct flow_action_entry *fa; 2078 struct efx_rep *from_efv, *to_efv; 2079 struct efx_tc_match match; 2080 u32 acts_id; 2081 s64 rc; 2082 int i; 2083 2084 if (!tc_can_offload_extack(efx->net_dev, extack)) 2085 return -EOPNOTSUPP; 2086 if (WARN_ON(!efx->tc)) 2087 return -ENETDOWN; 2088 if (WARN_ON(!efx->tc->up)) 2089 return -ENETDOWN; 2090 2091 from_efv = efx_tc_flower_lookup_efv(efx, net_dev); 2092 if (IS_ERR(from_efv)) { 2093 /* Not from our PF or representors, so probably a tunnel dev */ 2094 return efx_tc_flower_replace_foreign(efx, net_dev, tc); 2095 } 2096 2097 if (efv != from_efv) { 2098 /* can't happen */ 2099 NL_SET_ERR_MSG_FMT_MOD(extack, "for %s efv is %snull but from_efv is %snull (can't happen)", 2100 netdev_name(net_dev), efv ? "non-" : "", 2101 from_efv ? "non-" : ""); 2102 return -EINVAL; 2103 } 2104 2105 /* Parse match */ 2106 memset(&match, 0, sizeof(match)); 2107 rc = efx_tc_flower_external_mport(efx, from_efv); 2108 if (rc < 0) { 2109 NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port"); 2110 return rc; 2111 } 2112 match.value.ingress_port = rc; 2113 match.mask.ingress_port = ~0; 2114 rc = efx_tc_flower_parse_match(efx, fr, &match, extack); 2115 if (rc) 2116 return rc; 2117 if (efx_tc_match_is_encap(&match.mask)) { 2118 NL_SET_ERR_MSG_MOD(extack, "Ingress enc_key matches not supported"); 2119 return -EOPNOTSUPP; 2120 } 2121 2122 if (efx_tc_rule_is_lhs_rule(fr, &match)) 2123 return efx_tc_flower_replace_lhs(efx, tc, fr, &match, efv, 2124 net_dev); 2125 2126 /* chain_index 0 is always recirc_id 0 (and does not appear in recirc_ht). 2127 * Conveniently, match.rid == NULL and match.value.recirc_id == 0 owing 2128 * to the initial memset(), so we don't need to do anything in that case. 2129 */ 2130 if (tc->common.chain_index) { 2131 struct efx_tc_recirc_id *rid; 2132 2133 /* Note regarding passed net_dev: 2134 * VFreps and PF can share chain namespace, as they have 2135 * distinct ingress_mports. So we don't need to burn an 2136 * extra recirc_id if both use the same chain_index. 2137 * (Strictly speaking, we could give each VFrep its own 2138 * recirc_id namespace that doesn't take IDs away from the 2139 * PF, but that would require a bunch of additional IDAs - 2140 * one for each representor - and that's not likely to be 2141 * the main cause of recirc_id exhaustion anyway.) 2142 */ 2143 rid = efx_tc_get_recirc_id(efx, tc->common.chain_index, 2144 efx->net_dev); 2145 if (IS_ERR(rid)) { 2146 NL_SET_ERR_MSG_FMT_MOD(extack, 2147 "Failed to allocate a hardware recirculation ID for chain_index %u", 2148 tc->common.chain_index); 2149 return PTR_ERR(rid); 2150 } 2151 match.rid = rid; 2152 match.value.recirc_id = rid->fw_id; 2153 } 2154 match.mask.recirc_id = 0xff; 2155 2156 /* AR table can't match on DO_CT (+trk). But a commonly used pattern is 2157 * +trk+est, which is strictly implied by +est, so rewrite it to that. 2158 */ 2159 if (match.mask.ct_state_trk && match.value.ct_state_trk && 2160 match.mask.ct_state_est && match.value.ct_state_est) 2161 match.mask.ct_state_trk = 0; 2162 /* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could 2163 * match +trk-est (CT_HIT=0) despite being on an established connection. 2164 * So make -est imply -tcp_syn_fin_rst match to ensure these packets 2165 * still hit the software path. 2166 */ 2167 if (match.mask.ct_state_est && !match.value.ct_state_est) { 2168 if (match.value.tcp_syn_fin_rst) { 2169 /* Can't offload this combination */ 2170 rc = -EOPNOTSUPP; 2171 goto release; 2172 } 2173 match.mask.tcp_syn_fin_rst = true; 2174 } 2175 2176 rc = efx_mae_match_check_caps(efx, &match.mask, extack); 2177 if (rc) 2178 goto release; 2179 2180 rule = kzalloc(sizeof(*rule), GFP_USER); 2181 if (!rule) { 2182 rc = -ENOMEM; 2183 goto release; 2184 } 2185 INIT_LIST_HEAD(&rule->acts.list); 2186 rule->cookie = tc->cookie; 2187 old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht, 2188 &rule->linkage, 2189 efx_tc_match_action_ht_params); 2190 if (IS_ERR(old)) { 2191 rc = PTR_ERR(old); 2192 goto release; 2193 } else if (old) { 2194 netif_dbg(efx, drv, efx->net_dev, 2195 "Already offloaded rule (cookie %lx)\n", tc->cookie); 2196 NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded"); 2197 rc = -EEXIST; 2198 goto release; 2199 } 2200 2201 /* Parse actions */ 2202 act = kzalloc(sizeof(*act), GFP_USER); 2203 if (!act) { 2204 rc = -ENOMEM; 2205 goto release; 2206 } 2207 2208 /** 2209 * DOC: TC action translation 2210 * 2211 * Actions in TC are sequential and cumulative, with delivery actions 2212 * potentially anywhere in the order. The EF100 MAE, however, takes 2213 * an 'action set list' consisting of 'action sets', each of which is 2214 * applied to the _original_ packet, and consists of a set of optional 2215 * actions in a fixed order with delivery at the end. 2216 * To translate between these two models, we maintain a 'cursor', @act, 2217 * which describes the cumulative effect of all the packet-mutating 2218 * actions encountered so far; on handling a delivery (mirred or drop) 2219 * action, once the action-set has been inserted into hardware, we 2220 * append @act to the action-set list (@rule->acts); if this is a pipe 2221 * action (mirred mirror) we then allocate a new @act with a copy of 2222 * the cursor state _before_ the delivery action, otherwise we set @act 2223 * to %NULL. 2224 * This ensures that every allocated action-set is either attached to 2225 * @rule->acts or pointed to by @act (and never both), and that only 2226 * those action-sets in @rule->acts exist in hardware. Consequently, 2227 * in the failure path, @act only needs to be freed in memory, whereas 2228 * for @rule->acts we remove each action-set from hardware before 2229 * freeing it (efx_tc_free_action_set_list()), even if the action-set 2230 * list itself is not in hardware. 2231 */ 2232 flow_action_for_each(i, fa, &fr->action) { 2233 struct efx_tc_action_set save; 2234 u16 tci; 2235 2236 if (!act) { 2237 /* more actions after a non-pipe action */ 2238 NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action"); 2239 rc = -EINVAL; 2240 goto release; 2241 } 2242 2243 if ((fa->id == FLOW_ACTION_REDIRECT || 2244 fa->id == FLOW_ACTION_MIRRED || 2245 fa->id == FLOW_ACTION_DROP) && fa->hw_stats) { 2246 struct efx_tc_counter_index *ctr; 2247 2248 /* Currently the only actions that want stats are 2249 * mirred and gact (ok, shot, trap, goto-chain), which 2250 * means we want stats just before delivery. Also, 2251 * note that tunnel_key set shouldn't change the length 2252 * — it's only the subsequent mirred that does that, 2253 * and the stats are taken _before_ the mirred action 2254 * happens. 2255 */ 2256 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) { 2257 /* All supported actions that count either steal 2258 * (gact shot, mirred redirect) or clone act 2259 * (mirred mirror), so we should never get two 2260 * count actions on one action_set. 2261 */ 2262 NL_SET_ERR_MSG_MOD(extack, "Count-action conflict (can't happen)"); 2263 rc = -EOPNOTSUPP; 2264 goto release; 2265 } 2266 2267 if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) { 2268 NL_SET_ERR_MSG_FMT_MOD(extack, "hw_stats_type %u not supported (only 'delayed')", 2269 fa->hw_stats); 2270 rc = -EOPNOTSUPP; 2271 goto release; 2272 } 2273 2274 ctr = efx_tc_flower_get_counter_index(efx, tc->cookie, 2275 EFX_TC_COUNTER_TYPE_AR); 2276 if (IS_ERR(ctr)) { 2277 rc = PTR_ERR(ctr); 2278 NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter"); 2279 goto release; 2280 } 2281 act->count = ctr; 2282 INIT_LIST_HEAD(&act->count_user); 2283 } 2284 2285 switch (fa->id) { 2286 case FLOW_ACTION_DROP: 2287 rc = efx_mae_alloc_action_set(efx, act); 2288 if (rc) { 2289 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (drop)"); 2290 goto release; 2291 } 2292 list_add_tail(&act->list, &rule->acts.list); 2293 act = NULL; /* end of the line */ 2294 break; 2295 case FLOW_ACTION_REDIRECT: 2296 case FLOW_ACTION_MIRRED: 2297 save = *act; 2298 2299 if (encap_info) { 2300 struct efx_tc_encap_action *encap; 2301 2302 if (!efx_tc_flower_action_order_ok(act, 2303 EFX_TC_AO_ENCAP)) { 2304 rc = -EOPNOTSUPP; 2305 NL_SET_ERR_MSG_MOD(extack, "Encap action violates action order"); 2306 goto release; 2307 } 2308 encap = efx_tc_flower_create_encap_md( 2309 efx, encap_info, fa->dev, extack); 2310 if (IS_ERR_OR_NULL(encap)) { 2311 rc = PTR_ERR(encap); 2312 if (!rc) 2313 rc = -EIO; /* arbitrary */ 2314 goto release; 2315 } 2316 act->encap_md = encap; 2317 list_add_tail(&act->encap_user, &encap->users); 2318 act->dest_mport = encap->dest_mport; 2319 act->deliver = 1; 2320 if (act->count && !WARN_ON(!act->count->cnt)) { 2321 /* This counter is used by an encap 2322 * action, which needs a reference back 2323 * so it can prod neighbouring whenever 2324 * traffic is seen. 2325 */ 2326 spin_lock_bh(&act->count->cnt->lock); 2327 list_add_tail(&act->count_user, 2328 &act->count->cnt->users); 2329 spin_unlock_bh(&act->count->cnt->lock); 2330 } 2331 rc = efx_mae_alloc_action_set(efx, act); 2332 if (rc) { 2333 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (encap)"); 2334 goto release; 2335 } 2336 list_add_tail(&act->list, &rule->acts.list); 2337 act->user = &rule->acts; 2338 act = NULL; 2339 if (fa->id == FLOW_ACTION_REDIRECT) 2340 break; /* end of the line */ 2341 /* Mirror, so continue on with saved act */ 2342 save.count = NULL; 2343 act = kzalloc(sizeof(*act), GFP_USER); 2344 if (!act) { 2345 rc = -ENOMEM; 2346 goto release; 2347 } 2348 *act = save; 2349 break; 2350 } 2351 2352 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) { 2353 /* can't happen */ 2354 rc = -EOPNOTSUPP; 2355 NL_SET_ERR_MSG_MOD(extack, "Deliver action violates action order (can't happen)"); 2356 goto release; 2357 } 2358 2359 to_efv = efx_tc_flower_lookup_efv(efx, fa->dev); 2360 if (IS_ERR(to_efv)) { 2361 NL_SET_ERR_MSG_MOD(extack, "Mirred egress device not on switch"); 2362 rc = PTR_ERR(to_efv); 2363 goto release; 2364 } 2365 rc = efx_tc_flower_external_mport(efx, to_efv); 2366 if (rc < 0) { 2367 NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port"); 2368 goto release; 2369 } 2370 act->dest_mport = rc; 2371 act->deliver = 1; 2372 rc = efx_mae_alloc_action_set(efx, act); 2373 if (rc) { 2374 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (mirred)"); 2375 goto release; 2376 } 2377 list_add_tail(&act->list, &rule->acts.list); 2378 act = NULL; 2379 if (fa->id == FLOW_ACTION_REDIRECT) 2380 break; /* end of the line */ 2381 /* Mirror, so continue on with saved act */ 2382 save.count = NULL; 2383 act = kzalloc(sizeof(*act), GFP_USER); 2384 if (!act) { 2385 rc = -ENOMEM; 2386 goto release; 2387 } 2388 *act = save; 2389 break; 2390 case FLOW_ACTION_VLAN_POP: 2391 if (act->vlan_push) { 2392 act->vlan_push--; 2393 } else if (efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_POP)) { 2394 act->vlan_pop++; 2395 } else { 2396 NL_SET_ERR_MSG_MOD(extack, 2397 "More than two VLAN pops, or action order violated"); 2398 rc = -EINVAL; 2399 goto release; 2400 } 2401 break; 2402 case FLOW_ACTION_VLAN_PUSH: 2403 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_PUSH)) { 2404 rc = -EINVAL; 2405 NL_SET_ERR_MSG_MOD(extack, 2406 "More than two VLAN pushes, or action order violated"); 2407 goto release; 2408 } 2409 tci = fa->vlan.vid & VLAN_VID_MASK; 2410 tci |= fa->vlan.prio << VLAN_PRIO_SHIFT; 2411 act->vlan_tci[act->vlan_push] = cpu_to_be16(tci); 2412 act->vlan_proto[act->vlan_push] = fa->vlan.proto; 2413 act->vlan_push++; 2414 break; 2415 case FLOW_ACTION_ADD: 2416 rc = efx_tc_pedit_add(efx, act, fa, extack); 2417 if (rc < 0) 2418 goto release; 2419 break; 2420 case FLOW_ACTION_MANGLE: 2421 rc = efx_tc_mangle(efx, act, fa, &mung, extack, &match); 2422 if (rc < 0) 2423 goto release; 2424 break; 2425 case FLOW_ACTION_TUNNEL_ENCAP: 2426 if (encap_info) { 2427 /* Can't specify encap multiple times. 2428 * If you want to overwrite an existing 2429 * encap_info, use an intervening 2430 * FLOW_ACTION_TUNNEL_DECAP to clear it. 2431 */ 2432 NL_SET_ERR_MSG_MOD(extack, "Tunnel key set when already set"); 2433 rc = -EINVAL; 2434 goto release; 2435 } 2436 if (!fa->tunnel) { 2437 NL_SET_ERR_MSG_MOD(extack, "Tunnel key set is missing key"); 2438 rc = -EOPNOTSUPP; 2439 goto release; 2440 } 2441 encap_info = fa->tunnel; 2442 break; 2443 case FLOW_ACTION_TUNNEL_DECAP: 2444 if (encap_info) { 2445 encap_info = NULL; 2446 break; 2447 } 2448 /* Since we don't support enc_key matches on ingress 2449 * (and if we did there'd be no tunnel-device to give 2450 * us a type), we can't offload a decap that's not 2451 * just undoing a previous encap action. 2452 */ 2453 NL_SET_ERR_MSG_MOD(extack, "Cannot offload tunnel decap action without tunnel device"); 2454 rc = -EOPNOTSUPP; 2455 goto release; 2456 case FLOW_ACTION_CT: 2457 if (fa->ct.action != TCA_CT_ACT_NAT) { 2458 rc = -EOPNOTSUPP; 2459 NL_SET_ERR_MSG_FMT_MOD(extack, "Can only offload CT 'nat' action in RHS rules, not %d", fa->ct.action); 2460 goto release; 2461 } 2462 act->do_nat = 1; 2463 break; 2464 default: 2465 NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u", 2466 fa->id); 2467 rc = -EOPNOTSUPP; 2468 goto release; 2469 } 2470 } 2471 2472 rc = efx_tc_incomplete_mangle(&mung, extack); 2473 if (rc < 0) 2474 goto release; 2475 if (act) { 2476 /* Not shot/redirected, so deliver to default dest */ 2477 if (from_efv == EFX_EFV_PF) 2478 /* Rule applies to traffic from the wire, 2479 * and default dest is thus the PF 2480 */ 2481 efx_mae_mport_uplink(efx, &act->dest_mport); 2482 else 2483 /* Representor, so rule applies to traffic from 2484 * representee, and default dest is thus the rep. 2485 * All reps use the same mport for delivery 2486 */ 2487 efx_mae_mport_mport(efx, efx->tc->reps_mport_id, 2488 &act->dest_mport); 2489 act->deliver = 1; 2490 rc = efx_mae_alloc_action_set(efx, act); 2491 if (rc) { 2492 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)"); 2493 goto release; 2494 } 2495 list_add_tail(&act->list, &rule->acts.list); 2496 act = NULL; /* Prevent double-free in error path */ 2497 } 2498 2499 netif_dbg(efx, drv, efx->net_dev, 2500 "Successfully parsed filter (cookie %lx)\n", 2501 tc->cookie); 2502 2503 rule->match = match; 2504 2505 rc = efx_mae_alloc_action_set_list(efx, &rule->acts); 2506 if (rc) { 2507 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw"); 2508 goto release; 2509 } 2510 if (from_efv == EFX_EFV_PF) 2511 /* PF netdev, so rule applies to traffic from wire */ 2512 rule->fallback = &efx->tc->facts.pf; 2513 else 2514 /* repdev, so rule applies to traffic from representee */ 2515 rule->fallback = &efx->tc->facts.reps; 2516 if (!efx_tc_check_ready(efx, rule)) { 2517 netif_dbg(efx, drv, efx->net_dev, "action not ready for hw\n"); 2518 acts_id = rule->fallback->fw_id; 2519 } else { 2520 netif_dbg(efx, drv, efx->net_dev, "ready for hw\n"); 2521 acts_id = rule->acts.fw_id; 2522 } 2523 rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC, 2524 acts_id, &rule->fw_id); 2525 if (rc) { 2526 NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw"); 2527 goto release_acts; 2528 } 2529 return 0; 2530 2531 release_acts: 2532 efx_mae_free_action_set_list(efx, &rule->acts); 2533 release: 2534 /* We failed to insert the rule, so free up any entries we created in 2535 * subsidiary tables. 2536 */ 2537 if (match.rid) 2538 efx_tc_put_recirc_id(efx, match.rid); 2539 if (act) 2540 efx_tc_free_action_set(efx, act, false); 2541 if (rule) { 2542 if (!old) 2543 rhashtable_remove_fast(&efx->tc->match_action_ht, 2544 &rule->linkage, 2545 efx_tc_match_action_ht_params); 2546 efx_tc_free_action_set_list(efx, &rule->acts, false); 2547 } 2548 kfree(rule); 2549 return rc; 2550 } 2551 2552 static int efx_tc_flower_destroy(struct efx_nic *efx, 2553 struct net_device *net_dev, 2554 struct flow_cls_offload *tc) 2555 { 2556 struct netlink_ext_ack *extack = tc->common.extack; 2557 struct efx_tc_lhs_rule *lhs_rule; 2558 struct efx_tc_flow_rule *rule; 2559 2560 lhs_rule = rhashtable_lookup_fast(&efx->tc->lhs_rule_ht, &tc->cookie, 2561 efx_tc_lhs_rule_ht_params); 2562 if (lhs_rule) { 2563 /* Remove it from HW */ 2564 efx_mae_remove_lhs_rule(efx, lhs_rule); 2565 /* Delete it from SW */ 2566 efx_tc_flower_release_lhs_actions(efx, &lhs_rule->lhs_act); 2567 rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &lhs_rule->linkage, 2568 efx_tc_lhs_rule_ht_params); 2569 if (lhs_rule->match.encap) 2570 efx_tc_flower_release_encap_match(efx, lhs_rule->match.encap); 2571 netif_dbg(efx, drv, efx->net_dev, "Removed (lhs) filter %lx\n", 2572 lhs_rule->cookie); 2573 kfree(lhs_rule); 2574 return 0; 2575 } 2576 2577 rule = rhashtable_lookup_fast(&efx->tc->match_action_ht, &tc->cookie, 2578 efx_tc_match_action_ht_params); 2579 if (!rule) { 2580 /* Only log a message if we're the ingress device. Otherwise 2581 * it's a foreign filter and we might just not have been 2582 * interested (e.g. we might not have been the egress device 2583 * either). 2584 */ 2585 if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev))) 2586 netif_warn(efx, drv, efx->net_dev, 2587 "Filter %lx not found to remove\n", tc->cookie); 2588 NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules"); 2589 return -ENOENT; 2590 } 2591 2592 /* Remove it from HW */ 2593 efx_tc_delete_rule(efx, rule); 2594 /* Delete it from SW */ 2595 rhashtable_remove_fast(&efx->tc->match_action_ht, &rule->linkage, 2596 efx_tc_match_action_ht_params); 2597 netif_dbg(efx, drv, efx->net_dev, "Removed filter %lx\n", rule->cookie); 2598 kfree(rule); 2599 return 0; 2600 } 2601 2602 static int efx_tc_flower_stats(struct efx_nic *efx, struct net_device *net_dev, 2603 struct flow_cls_offload *tc) 2604 { 2605 struct netlink_ext_ack *extack = tc->common.extack; 2606 struct efx_tc_counter_index *ctr; 2607 struct efx_tc_counter *cnt; 2608 u64 packets, bytes; 2609 2610 ctr = efx_tc_flower_find_counter_index(efx, tc->cookie); 2611 if (!ctr) { 2612 /* See comment in efx_tc_flower_destroy() */ 2613 if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev))) 2614 if (net_ratelimit()) 2615 netif_warn(efx, drv, efx->net_dev, 2616 "Filter %lx not found for stats\n", 2617 tc->cookie); 2618 NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules"); 2619 return -ENOENT; 2620 } 2621 if (WARN_ON(!ctr->cnt)) /* can't happen */ 2622 return -EIO; 2623 cnt = ctr->cnt; 2624 2625 spin_lock_bh(&cnt->lock); 2626 /* Report only new pkts/bytes since last time TC asked */ 2627 packets = cnt->packets; 2628 bytes = cnt->bytes; 2629 flow_stats_update(&tc->stats, bytes - cnt->old_bytes, 2630 packets - cnt->old_packets, 0, cnt->touched, 2631 FLOW_ACTION_HW_STATS_DELAYED); 2632 cnt->old_packets = packets; 2633 cnt->old_bytes = bytes; 2634 spin_unlock_bh(&cnt->lock); 2635 return 0; 2636 } 2637 2638 int efx_tc_flower(struct efx_nic *efx, struct net_device *net_dev, 2639 struct flow_cls_offload *tc, struct efx_rep *efv) 2640 { 2641 int rc; 2642 2643 if (!efx->tc) 2644 return -EOPNOTSUPP; 2645 2646 mutex_lock(&efx->tc->mutex); 2647 switch (tc->command) { 2648 case FLOW_CLS_REPLACE: 2649 rc = efx_tc_flower_replace(efx, net_dev, tc, efv); 2650 break; 2651 case FLOW_CLS_DESTROY: 2652 rc = efx_tc_flower_destroy(efx, net_dev, tc); 2653 break; 2654 case FLOW_CLS_STATS: 2655 rc = efx_tc_flower_stats(efx, net_dev, tc); 2656 break; 2657 default: 2658 rc = -EOPNOTSUPP; 2659 break; 2660 } 2661 mutex_unlock(&efx->tc->mutex); 2662 return rc; 2663 } 2664 2665 static int efx_tc_configure_default_rule(struct efx_nic *efx, u32 ing_port, 2666 u32 eg_port, struct efx_tc_flow_rule *rule) 2667 { 2668 struct efx_tc_action_set_list *acts = &rule->acts; 2669 struct efx_tc_match *match = &rule->match; 2670 struct efx_tc_action_set *act; 2671 int rc; 2672 2673 match->value.ingress_port = ing_port; 2674 match->mask.ingress_port = ~0; 2675 act = kzalloc(sizeof(*act), GFP_KERNEL); 2676 if (!act) 2677 return -ENOMEM; 2678 act->deliver = 1; 2679 act->dest_mport = eg_port; 2680 rc = efx_mae_alloc_action_set(efx, act); 2681 if (rc) 2682 goto fail1; 2683 EFX_WARN_ON_PARANOID(!list_empty(&acts->list)); 2684 list_add_tail(&act->list, &acts->list); 2685 rc = efx_mae_alloc_action_set_list(efx, acts); 2686 if (rc) 2687 goto fail2; 2688 rc = efx_mae_insert_rule(efx, match, EFX_TC_PRIO_DFLT, 2689 acts->fw_id, &rule->fw_id); 2690 if (rc) 2691 goto fail3; 2692 return 0; 2693 fail3: 2694 efx_mae_free_action_set_list(efx, acts); 2695 fail2: 2696 list_del(&act->list); 2697 efx_mae_free_action_set(efx, act->fw_id); 2698 fail1: 2699 kfree(act); 2700 return rc; 2701 } 2702 2703 static int efx_tc_configure_default_rule_pf(struct efx_nic *efx) 2704 { 2705 struct efx_tc_flow_rule *rule = &efx->tc->dflt.pf; 2706 u32 ing_port, eg_port; 2707 2708 efx_mae_mport_uplink(efx, &ing_port); 2709 efx_mae_mport_wire(efx, &eg_port); 2710 return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule); 2711 } 2712 2713 static int efx_tc_configure_default_rule_wire(struct efx_nic *efx) 2714 { 2715 struct efx_tc_flow_rule *rule = &efx->tc->dflt.wire; 2716 u32 ing_port, eg_port; 2717 2718 efx_mae_mport_wire(efx, &ing_port); 2719 efx_mae_mport_uplink(efx, &eg_port); 2720 return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule); 2721 } 2722 2723 int efx_tc_configure_default_rule_rep(struct efx_rep *efv) 2724 { 2725 struct efx_tc_flow_rule *rule = &efv->dflt; 2726 struct efx_nic *efx = efv->parent; 2727 u32 ing_port, eg_port; 2728 2729 efx_mae_mport_mport(efx, efv->mport, &ing_port); 2730 efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port); 2731 return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule); 2732 } 2733 2734 void efx_tc_deconfigure_default_rule(struct efx_nic *efx, 2735 struct efx_tc_flow_rule *rule) 2736 { 2737 if (rule->fw_id != MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL) 2738 efx_tc_delete_rule(efx, rule); 2739 rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; 2740 } 2741 2742 static int efx_tc_configure_fallback_acts(struct efx_nic *efx, u32 eg_port, 2743 struct efx_tc_action_set_list *acts) 2744 { 2745 struct efx_tc_action_set *act; 2746 int rc; 2747 2748 act = kzalloc(sizeof(*act), GFP_KERNEL); 2749 if (!act) 2750 return -ENOMEM; 2751 act->deliver = 1; 2752 act->dest_mport = eg_port; 2753 rc = efx_mae_alloc_action_set(efx, act); 2754 if (rc) 2755 goto fail1; 2756 EFX_WARN_ON_PARANOID(!list_empty(&acts->list)); 2757 list_add_tail(&act->list, &acts->list); 2758 rc = efx_mae_alloc_action_set_list(efx, acts); 2759 if (rc) 2760 goto fail2; 2761 return 0; 2762 fail2: 2763 list_del(&act->list); 2764 efx_mae_free_action_set(efx, act->fw_id); 2765 fail1: 2766 kfree(act); 2767 return rc; 2768 } 2769 2770 static int efx_tc_configure_fallback_acts_pf(struct efx_nic *efx) 2771 { 2772 struct efx_tc_action_set_list *acts = &efx->tc->facts.pf; 2773 u32 eg_port; 2774 2775 efx_mae_mport_uplink(efx, &eg_port); 2776 return efx_tc_configure_fallback_acts(efx, eg_port, acts); 2777 } 2778 2779 static int efx_tc_configure_fallback_acts_reps(struct efx_nic *efx) 2780 { 2781 struct efx_tc_action_set_list *acts = &efx->tc->facts.reps; 2782 u32 eg_port; 2783 2784 efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port); 2785 return efx_tc_configure_fallback_acts(efx, eg_port, acts); 2786 } 2787 2788 static void efx_tc_deconfigure_fallback_acts(struct efx_nic *efx, 2789 struct efx_tc_action_set_list *acts) 2790 { 2791 efx_tc_free_action_set_list(efx, acts, true); 2792 } 2793 2794 static int efx_tc_configure_rep_mport(struct efx_nic *efx) 2795 { 2796 u32 rep_mport_label; 2797 int rc; 2798 2799 rc = efx_mae_allocate_mport(efx, &efx->tc->reps_mport_id, &rep_mport_label); 2800 if (rc) 2801 return rc; 2802 pci_dbg(efx->pci_dev, "created rep mport 0x%08x (0x%04x)\n", 2803 efx->tc->reps_mport_id, rep_mport_label); 2804 /* Use mport *selector* as vport ID */ 2805 efx_mae_mport_mport(efx, efx->tc->reps_mport_id, 2806 &efx->tc->reps_mport_vport_id); 2807 return 0; 2808 } 2809 2810 static void efx_tc_deconfigure_rep_mport(struct efx_nic *efx) 2811 { 2812 efx_mae_free_mport(efx, efx->tc->reps_mport_id); 2813 efx->tc->reps_mport_id = MAE_MPORT_SELECTOR_NULL; 2814 } 2815 2816 int efx_tc_insert_rep_filters(struct efx_nic *efx) 2817 { 2818 struct efx_filter_spec promisc, allmulti; 2819 int rc; 2820 2821 if (efx->type->is_vf) 2822 return 0; 2823 if (!efx->tc) 2824 return 0; 2825 efx_filter_init_rx(&promisc, EFX_FILTER_PRI_REQUIRED, 0, 0); 2826 efx_filter_set_uc_def(&promisc); 2827 efx_filter_set_vport_id(&promisc, efx->tc->reps_mport_vport_id); 2828 rc = efx_filter_insert_filter(efx, &promisc, false); 2829 if (rc < 0) 2830 return rc; 2831 efx->tc->reps_filter_uc = rc; 2832 efx_filter_init_rx(&allmulti, EFX_FILTER_PRI_REQUIRED, 0, 0); 2833 efx_filter_set_mc_def(&allmulti); 2834 efx_filter_set_vport_id(&allmulti, efx->tc->reps_mport_vport_id); 2835 rc = efx_filter_insert_filter(efx, &allmulti, false); 2836 if (rc < 0) 2837 return rc; 2838 efx->tc->reps_filter_mc = rc; 2839 return 0; 2840 } 2841 2842 void efx_tc_remove_rep_filters(struct efx_nic *efx) 2843 { 2844 if (efx->type->is_vf) 2845 return; 2846 if (!efx->tc) 2847 return; 2848 if (efx->tc->reps_filter_mc >= 0) 2849 efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_mc); 2850 efx->tc->reps_filter_mc = -1; 2851 if (efx->tc->reps_filter_uc >= 0) 2852 efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_uc); 2853 efx->tc->reps_filter_uc = -1; 2854 } 2855 2856 int efx_init_tc(struct efx_nic *efx) 2857 { 2858 int rc; 2859 2860 rc = efx_mae_get_caps(efx, efx->tc->caps); 2861 if (rc) 2862 return rc; 2863 if (efx->tc->caps->match_field_count > MAE_NUM_FIELDS) 2864 /* Firmware supports some match fields the driver doesn't know 2865 * about. Not fatal, unless any of those fields are required 2866 * (MAE_FIELD_SUPPORTED_MATCH_ALWAYS) but if so we don't know. 2867 */ 2868 netif_warn(efx, probe, efx->net_dev, 2869 "FW reports additional match fields %u\n", 2870 efx->tc->caps->match_field_count); 2871 if (efx->tc->caps->action_prios < EFX_TC_PRIO__NUM) { 2872 netif_err(efx, probe, efx->net_dev, 2873 "Too few action prios supported (have %u, need %u)\n", 2874 efx->tc->caps->action_prios, EFX_TC_PRIO__NUM); 2875 return -EIO; 2876 } 2877 rc = efx_tc_configure_default_rule_pf(efx); 2878 if (rc) 2879 return rc; 2880 rc = efx_tc_configure_default_rule_wire(efx); 2881 if (rc) 2882 return rc; 2883 rc = efx_tc_configure_rep_mport(efx); 2884 if (rc) 2885 return rc; 2886 rc = efx_tc_configure_fallback_acts_pf(efx); 2887 if (rc) 2888 return rc; 2889 rc = efx_tc_configure_fallback_acts_reps(efx); 2890 if (rc) 2891 return rc; 2892 rc = efx_mae_get_tables(efx); 2893 if (rc) 2894 return rc; 2895 rc = flow_indr_dev_register(efx_tc_indr_setup_cb, efx); 2896 if (rc) 2897 goto out_free; 2898 efx->tc->up = true; 2899 return 0; 2900 out_free: 2901 efx_mae_free_tables(efx); 2902 return rc; 2903 } 2904 2905 void efx_fini_tc(struct efx_nic *efx) 2906 { 2907 /* We can get called even if efx_init_struct_tc() failed */ 2908 if (!efx->tc) 2909 return; 2910 if (efx->tc->up) 2911 flow_indr_dev_unregister(efx_tc_indr_setup_cb, efx, efx_tc_block_unbind); 2912 efx_tc_deconfigure_rep_mport(efx); 2913 efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.pf); 2914 efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.wire); 2915 efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.pf); 2916 efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.reps); 2917 efx->tc->up = false; 2918 efx_mae_free_tables(efx); 2919 } 2920 2921 /* At teardown time, all TC filter rules (and thus all resources they created) 2922 * should already have been removed. If we find any in our hashtables, make a 2923 * cursory attempt to clean up the software side. 2924 */ 2925 static void efx_tc_encap_match_free(void *ptr, void *__unused) 2926 { 2927 struct efx_tc_encap_match *encap = ptr; 2928 2929 WARN_ON(refcount_read(&encap->ref)); 2930 kfree(encap); 2931 } 2932 2933 static void efx_tc_recirc_free(void *ptr, void *arg) 2934 { 2935 struct efx_tc_recirc_id *rid = ptr; 2936 struct efx_nic *efx = arg; 2937 2938 WARN_ON(refcount_read(&rid->ref)); 2939 ida_free(&efx->tc->recirc_ida, rid->fw_id); 2940 kfree(rid); 2941 } 2942 2943 static void efx_tc_lhs_free(void *ptr, void *arg) 2944 { 2945 struct efx_tc_lhs_rule *rule = ptr; 2946 struct efx_nic *efx = arg; 2947 2948 netif_err(efx, drv, efx->net_dev, 2949 "tc lhs_rule %lx still present at teardown, removing\n", 2950 rule->cookie); 2951 2952 if (rule->lhs_act.zone) 2953 efx_tc_ct_unregister_zone(efx, rule->lhs_act.zone); 2954 if (rule->lhs_act.count) 2955 efx_tc_flower_put_counter_index(efx, rule->lhs_act.count); 2956 efx_mae_remove_lhs_rule(efx, rule); 2957 2958 kfree(rule); 2959 } 2960 2961 static void efx_tc_mac_free(void *ptr, void *__unused) 2962 { 2963 struct efx_tc_mac_pedit_action *ped = ptr; 2964 2965 WARN_ON(refcount_read(&ped->ref)); 2966 kfree(ped); 2967 } 2968 2969 static void efx_tc_flow_free(void *ptr, void *arg) 2970 { 2971 struct efx_tc_flow_rule *rule = ptr; 2972 struct efx_nic *efx = arg; 2973 2974 netif_err(efx, drv, efx->net_dev, 2975 "tc rule %lx still present at teardown, removing\n", 2976 rule->cookie); 2977 2978 /* Also releases entries in subsidiary tables */ 2979 efx_tc_delete_rule(efx, rule); 2980 2981 kfree(rule); 2982 } 2983 2984 int efx_init_struct_tc(struct efx_nic *efx) 2985 { 2986 int rc; 2987 2988 if (efx->type->is_vf) 2989 return 0; 2990 2991 efx->tc = kzalloc(sizeof(*efx->tc), GFP_KERNEL); 2992 if (!efx->tc) 2993 return -ENOMEM; 2994 efx->tc->caps = kzalloc(sizeof(struct mae_caps), GFP_KERNEL); 2995 if (!efx->tc->caps) { 2996 rc = -ENOMEM; 2997 goto fail_alloc_caps; 2998 } 2999 INIT_LIST_HEAD(&efx->tc->block_list); 3000 3001 mutex_init(&efx->tc->mutex); 3002 init_waitqueue_head(&efx->tc->flush_wq); 3003 rc = efx_tc_init_encap_actions(efx); 3004 if (rc < 0) 3005 goto fail_encap_actions; 3006 rc = efx_tc_init_counters(efx); 3007 if (rc < 0) 3008 goto fail_counters; 3009 rc = rhashtable_init(&efx->tc->mac_ht, &efx_tc_mac_ht_params); 3010 if (rc < 0) 3011 goto fail_mac_ht; 3012 rc = rhashtable_init(&efx->tc->encap_match_ht, &efx_tc_encap_match_ht_params); 3013 if (rc < 0) 3014 goto fail_encap_match_ht; 3015 rc = rhashtable_init(&efx->tc->match_action_ht, &efx_tc_match_action_ht_params); 3016 if (rc < 0) 3017 goto fail_match_action_ht; 3018 rc = rhashtable_init(&efx->tc->lhs_rule_ht, &efx_tc_lhs_rule_ht_params); 3019 if (rc < 0) 3020 goto fail_lhs_rule_ht; 3021 rc = efx_tc_init_conntrack(efx); 3022 if (rc < 0) 3023 goto fail_conntrack; 3024 rc = rhashtable_init(&efx->tc->recirc_ht, &efx_tc_recirc_ht_params); 3025 if (rc < 0) 3026 goto fail_recirc_ht; 3027 ida_init(&efx->tc->recirc_ida); 3028 efx->tc->reps_filter_uc = -1; 3029 efx->tc->reps_filter_mc = -1; 3030 INIT_LIST_HEAD(&efx->tc->dflt.pf.acts.list); 3031 efx->tc->dflt.pf.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; 3032 INIT_LIST_HEAD(&efx->tc->dflt.wire.acts.list); 3033 efx->tc->dflt.wire.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; 3034 INIT_LIST_HEAD(&efx->tc->facts.pf.list); 3035 efx->tc->facts.pf.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL; 3036 INIT_LIST_HEAD(&efx->tc->facts.reps.list); 3037 efx->tc->facts.reps.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL; 3038 efx->extra_channel_type[EFX_EXTRA_CHANNEL_TC] = &efx_tc_channel_type; 3039 return 0; 3040 fail_recirc_ht: 3041 efx_tc_destroy_conntrack(efx); 3042 fail_conntrack: 3043 rhashtable_destroy(&efx->tc->lhs_rule_ht); 3044 fail_lhs_rule_ht: 3045 rhashtable_destroy(&efx->tc->match_action_ht); 3046 fail_match_action_ht: 3047 rhashtable_destroy(&efx->tc->encap_match_ht); 3048 fail_encap_match_ht: 3049 rhashtable_destroy(&efx->tc->mac_ht); 3050 fail_mac_ht: 3051 efx_tc_destroy_counters(efx); 3052 fail_counters: 3053 efx_tc_destroy_encap_actions(efx); 3054 fail_encap_actions: 3055 mutex_destroy(&efx->tc->mutex); 3056 kfree(efx->tc->caps); 3057 fail_alloc_caps: 3058 kfree(efx->tc); 3059 efx->tc = NULL; 3060 return rc; 3061 } 3062 3063 void efx_fini_struct_tc(struct efx_nic *efx) 3064 { 3065 if (!efx->tc) 3066 return; 3067 3068 mutex_lock(&efx->tc->mutex); 3069 EFX_WARN_ON_PARANOID(efx->tc->dflt.pf.fw_id != 3070 MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL); 3071 EFX_WARN_ON_PARANOID(efx->tc->dflt.wire.fw_id != 3072 MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL); 3073 EFX_WARN_ON_PARANOID(efx->tc->facts.pf.fw_id != 3074 MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL); 3075 EFX_WARN_ON_PARANOID(efx->tc->facts.reps.fw_id != 3076 MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL); 3077 rhashtable_free_and_destroy(&efx->tc->lhs_rule_ht, efx_tc_lhs_free, efx); 3078 rhashtable_free_and_destroy(&efx->tc->match_action_ht, efx_tc_flow_free, 3079 efx); 3080 rhashtable_free_and_destroy(&efx->tc->encap_match_ht, 3081 efx_tc_encap_match_free, NULL); 3082 efx_tc_fini_conntrack(efx); 3083 rhashtable_free_and_destroy(&efx->tc->recirc_ht, efx_tc_recirc_free, efx); 3084 WARN_ON(!ida_is_empty(&efx->tc->recirc_ida)); 3085 ida_destroy(&efx->tc->recirc_ida); 3086 rhashtable_free_and_destroy(&efx->tc->mac_ht, efx_tc_mac_free, NULL); 3087 efx_tc_fini_counters(efx); 3088 efx_tc_fini_encap_actions(efx); 3089 mutex_unlock(&efx->tc->mutex); 3090 mutex_destroy(&efx->tc->mutex); 3091 kfree(efx->tc->caps); 3092 kfree(efx->tc); 3093 efx->tc = NULL; 3094 } 3095