1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 /* Copyright (C) 2021 Corigine, Inc. */ 3 4 #include <net/tc_act/tc_csum.h> 5 #include <net/tc_act/tc_ct.h> 6 7 #include "conntrack.h" 8 #include "../nfp_port.h" 9 10 const struct rhashtable_params nfp_tc_ct_merge_params = { 11 .head_offset = offsetof(struct nfp_fl_ct_tc_merge, 12 hash_node), 13 .key_len = sizeof(unsigned long) * 2, 14 .key_offset = offsetof(struct nfp_fl_ct_tc_merge, cookie), 15 .automatic_shrinking = true, 16 }; 17 18 const struct rhashtable_params nfp_nft_ct_merge_params = { 19 .head_offset = offsetof(struct nfp_fl_nft_tc_merge, 20 hash_node), 21 .key_len = sizeof(unsigned long) * 3, 22 .key_offset = offsetof(struct nfp_fl_nft_tc_merge, cookie), 23 .automatic_shrinking = true, 24 }; 25 26 static struct flow_action_entry *get_flow_act(struct flow_rule *rule, 27 enum flow_action_id act_id); 28 29 /** 30 * get_hashentry() - Wrapper around hashtable lookup. 31 * @ht: hashtable where entry could be found 32 * @key: key to lookup 33 * @params: hashtable params 34 * @size: size of entry to allocate if not in table 35 * 36 * Returns an entry from a hashtable. If entry does not exist 37 * yet allocate the memory for it and return the new entry. 38 */ 39 static void *get_hashentry(struct rhashtable *ht, void *key, 40 const struct rhashtable_params params, size_t size) 41 { 42 void *result; 43 44 result = rhashtable_lookup_fast(ht, key, params); 45 46 if (result) 47 return result; 48 49 result = kzalloc(size, GFP_KERNEL); 50 if (!result) 51 return ERR_PTR(-ENOMEM); 52 53 return result; 54 } 55 56 bool is_pre_ct_flow(struct flow_cls_offload *flow) 57 { 58 struct flow_rule *rule = flow_cls_offload_flow_rule(flow); 59 struct flow_dissector *dissector = rule->match.dissector; 60 struct flow_action_entry *act; 61 struct flow_match_ct ct; 62 int i; 63 64 if (dissector->used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_CT)) { 65 flow_rule_match_ct(rule, &ct); 66 if (ct.key->ct_state) 67 return false; 68 } 69 70 if (flow->common.chain_index) 71 return false; 72 73 flow_action_for_each(i, act, &flow->rule->action) { 74 if (act->id == FLOW_ACTION_CT) { 75 /* The pre_ct rule only have the ct or ct nat action, cannot 76 * contains other ct action e.g ct commit and so on. 77 */ 78 if ((!act->ct.action || act->ct.action == TCA_CT_ACT_NAT)) 79 return true; 80 else 81 return false; 82 } 83 } 84 85 return false; 86 } 87 88 bool is_post_ct_flow(struct flow_cls_offload *flow) 89 { 90 struct flow_rule *rule = flow_cls_offload_flow_rule(flow); 91 struct flow_dissector *dissector = rule->match.dissector; 92 struct flow_action_entry *act; 93 bool exist_ct_clear = false; 94 struct flow_match_ct ct; 95 int i; 96 97 if (dissector->used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_CT)) { 98 flow_rule_match_ct(rule, &ct); 99 if (ct.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED) 100 return true; 101 } else { 102 /* post ct entry cannot contains any ct action except ct_clear. */ 103 flow_action_for_each(i, act, &flow->rule->action) { 104 if (act->id == FLOW_ACTION_CT) { 105 /* ignore ct clear action. */ 106 if (act->ct.action == TCA_CT_ACT_CLEAR) { 107 exist_ct_clear = true; 108 continue; 109 } 110 111 return false; 112 } 113 } 114 /* when do nat with ct, the post ct entry ignore the ct status, 115 * will match the nat field(sip/dip) instead. In this situation, 116 * the flow chain index is not zero and contains ct clear action. 117 */ 118 if (flow->common.chain_index && exist_ct_clear) 119 return true; 120 } 121 122 return false; 123 } 124 125 /** 126 * get_mangled_key() - Mangle the key if mangle act exists 127 * @rule: rule that carries the actions 128 * @buf: pointer to key to be mangled 129 * @offset: used to adjust mangled offset in L2/L3/L4 header 130 * @key_sz: key size 131 * @htype: mangling type 132 * 133 * Returns buf where the mangled key stores. 134 */ 135 static void *get_mangled_key(struct flow_rule *rule, void *buf, 136 u32 offset, size_t key_sz, 137 enum flow_action_mangle_base htype) 138 { 139 struct flow_action_entry *act; 140 u32 *val = (u32 *)buf; 141 u32 off, msk, key; 142 int i; 143 144 flow_action_for_each(i, act, &rule->action) { 145 if (act->id == FLOW_ACTION_MANGLE && 146 act->mangle.htype == htype) { 147 off = act->mangle.offset - offset; 148 msk = act->mangle.mask; 149 key = act->mangle.val; 150 151 /* Mangling is supposed to be u32 aligned */ 152 if (off % 4 || off >= key_sz) 153 continue; 154 155 val[off >> 2] &= msk; 156 val[off >> 2] |= key; 157 } 158 } 159 160 return buf; 161 } 162 163 /* Only tos and ttl are involved in flow_match_ip structure, which 164 * doesn't conform to the layout of ip/ipv6 header definition. So 165 * they need particular process here: fill them into the ip/ipv6 166 * header, so that mangling actions can work directly. 167 */ 168 #define NFP_IPV4_TOS_MASK GENMASK(23, 16) 169 #define NFP_IPV4_TTL_MASK GENMASK(31, 24) 170 #define NFP_IPV6_TCLASS_MASK GENMASK(27, 20) 171 #define NFP_IPV6_HLIMIT_MASK GENMASK(7, 0) 172 static void *get_mangled_tos_ttl(struct flow_rule *rule, void *buf, 173 bool is_v6) 174 { 175 struct flow_match_ip match; 176 /* IPv4's ttl field is in third dword. */ 177 __be32 ip_hdr[3]; 178 u32 tmp, hdr_len; 179 180 flow_rule_match_ip(rule, &match); 181 182 if (is_v6) { 183 tmp = FIELD_PREP(NFP_IPV6_TCLASS_MASK, match.key->tos); 184 ip_hdr[0] = cpu_to_be32(tmp); 185 tmp = FIELD_PREP(NFP_IPV6_HLIMIT_MASK, match.key->ttl); 186 ip_hdr[1] = cpu_to_be32(tmp); 187 hdr_len = 2 * sizeof(__be32); 188 } else { 189 tmp = FIELD_PREP(NFP_IPV4_TOS_MASK, match.key->tos); 190 ip_hdr[0] = cpu_to_be32(tmp); 191 tmp = FIELD_PREP(NFP_IPV4_TTL_MASK, match.key->ttl); 192 ip_hdr[2] = cpu_to_be32(tmp); 193 hdr_len = 3 * sizeof(__be32); 194 } 195 196 get_mangled_key(rule, ip_hdr, 0, hdr_len, 197 is_v6 ? FLOW_ACT_MANGLE_HDR_TYPE_IP6 : 198 FLOW_ACT_MANGLE_HDR_TYPE_IP4); 199 200 match.key = buf; 201 202 if (is_v6) { 203 tmp = be32_to_cpu(ip_hdr[0]); 204 match.key->tos = FIELD_GET(NFP_IPV6_TCLASS_MASK, tmp); 205 tmp = be32_to_cpu(ip_hdr[1]); 206 match.key->ttl = FIELD_GET(NFP_IPV6_HLIMIT_MASK, tmp); 207 } else { 208 tmp = be32_to_cpu(ip_hdr[0]); 209 match.key->tos = FIELD_GET(NFP_IPV4_TOS_MASK, tmp); 210 tmp = be32_to_cpu(ip_hdr[2]); 211 match.key->ttl = FIELD_GET(NFP_IPV4_TTL_MASK, tmp); 212 } 213 214 return buf; 215 } 216 217 /* Note entry1 and entry2 are not swappable. only skip ip and 218 * tport merge check for pre_ct and post_ct when pre_ct do nat. 219 */ 220 static bool nfp_ct_merge_check_cannot_skip(struct nfp_fl_ct_flow_entry *entry1, 221 struct nfp_fl_ct_flow_entry *entry2) 222 { 223 /* only pre_ct have NFP_FL_ACTION_DO_NAT flag. */ 224 if ((entry1->flags & NFP_FL_ACTION_DO_NAT) && 225 entry2->type == CT_TYPE_POST_CT) 226 return false; 227 228 return true; 229 } 230 231 /* Note entry1 and entry2 are not swappable, entry1 should be 232 * the former flow whose mangle action need be taken into account 233 * if existed, and entry2 should be the latter flow whose action 234 * we don't care. 235 */ 236 static int nfp_ct_merge_check(struct nfp_fl_ct_flow_entry *entry1, 237 struct nfp_fl_ct_flow_entry *entry2) 238 { 239 unsigned long long ovlp_keys; 240 bool out, is_v6 = false; 241 u8 ip_proto = 0; 242 ovlp_keys = entry1->rule->match.dissector->used_keys & 243 entry2->rule->match.dissector->used_keys; 244 /* Temporary buffer for mangling keys, 64 is enough to cover max 245 * struct size of key in various fields that may be mangled. 246 * Supported fields to mangle: 247 * mac_src/mac_dst(struct flow_match_eth_addrs, 12B) 248 * nw_tos/nw_ttl(struct flow_match_ip, 2B) 249 * nw_src/nw_dst(struct flow_match_ipv4/6_addrs, 32B) 250 * tp_src/tp_dst(struct flow_match_ports, 4B) 251 */ 252 char buf[64]; 253 254 if (entry1->netdev && entry2->netdev && 255 entry1->netdev != entry2->netdev) 256 return -EINVAL; 257 258 /* Check the overlapped fields one by one, the unmasked part 259 * should not conflict with each other. 260 */ 261 if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL)) { 262 struct flow_match_control match1, match2; 263 264 flow_rule_match_control(entry1->rule, &match1); 265 flow_rule_match_control(entry2->rule, &match2); 266 COMPARE_UNMASKED_FIELDS(match1, match2, &out); 267 if (out) 268 goto check_failed; 269 } 270 271 if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_BASIC)) { 272 struct flow_match_basic match1, match2; 273 274 flow_rule_match_basic(entry1->rule, &match1); 275 flow_rule_match_basic(entry2->rule, &match2); 276 277 /* n_proto field is a must in ct-related flows, 278 * it should be either ipv4 or ipv6. 279 */ 280 is_v6 = match1.key->n_proto == htons(ETH_P_IPV6); 281 /* ip_proto field is a must when port field is cared */ 282 ip_proto = match1.key->ip_proto; 283 284 COMPARE_UNMASKED_FIELDS(match1, match2, &out); 285 if (out) 286 goto check_failed; 287 } 288 289 /* if pre ct entry do nat, the nat ip exists in nft entry, 290 * will be do merge check when do nft and post ct merge, 291 * so skip this ip merge check here. 292 */ 293 if ((ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS)) && 294 nfp_ct_merge_check_cannot_skip(entry1, entry2)) { 295 struct flow_match_ipv4_addrs match1, match2; 296 297 flow_rule_match_ipv4_addrs(entry1->rule, &match1); 298 flow_rule_match_ipv4_addrs(entry2->rule, &match2); 299 300 memcpy(buf, match1.key, sizeof(*match1.key)); 301 match1.key = get_mangled_key(entry1->rule, buf, 302 offsetof(struct iphdr, saddr), 303 sizeof(*match1.key), 304 FLOW_ACT_MANGLE_HDR_TYPE_IP4); 305 306 COMPARE_UNMASKED_FIELDS(match1, match2, &out); 307 if (out) 308 goto check_failed; 309 } 310 311 /* if pre ct entry do nat, the nat ip exists in nft entry, 312 * will be do merge check when do nft and post ct merge, 313 * so skip this ip merge check here. 314 */ 315 if ((ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS)) && 316 nfp_ct_merge_check_cannot_skip(entry1, entry2)) { 317 struct flow_match_ipv6_addrs match1, match2; 318 319 flow_rule_match_ipv6_addrs(entry1->rule, &match1); 320 flow_rule_match_ipv6_addrs(entry2->rule, &match2); 321 322 memcpy(buf, match1.key, sizeof(*match1.key)); 323 match1.key = get_mangled_key(entry1->rule, buf, 324 offsetof(struct ipv6hdr, saddr), 325 sizeof(*match1.key), 326 FLOW_ACT_MANGLE_HDR_TYPE_IP6); 327 328 COMPARE_UNMASKED_FIELDS(match1, match2, &out); 329 if (out) 330 goto check_failed; 331 } 332 333 /* if pre ct entry do nat, the nat tport exists in nft entry, 334 * will be do merge check when do nft and post ct merge, 335 * so skip this tport merge check here. 336 */ 337 if ((ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_PORTS)) && 338 nfp_ct_merge_check_cannot_skip(entry1, entry2)) { 339 enum flow_action_mangle_base htype = FLOW_ACT_MANGLE_UNSPEC; 340 struct flow_match_ports match1, match2; 341 342 flow_rule_match_ports(entry1->rule, &match1); 343 flow_rule_match_ports(entry2->rule, &match2); 344 345 if (ip_proto == IPPROTO_UDP) 346 htype = FLOW_ACT_MANGLE_HDR_TYPE_UDP; 347 else if (ip_proto == IPPROTO_TCP) 348 htype = FLOW_ACT_MANGLE_HDR_TYPE_TCP; 349 350 memcpy(buf, match1.key, sizeof(*match1.key)); 351 match1.key = get_mangled_key(entry1->rule, buf, 0, 352 sizeof(*match1.key), htype); 353 354 COMPARE_UNMASKED_FIELDS(match1, match2, &out); 355 if (out) 356 goto check_failed; 357 } 358 359 if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 360 struct flow_match_eth_addrs match1, match2; 361 362 flow_rule_match_eth_addrs(entry1->rule, &match1); 363 flow_rule_match_eth_addrs(entry2->rule, &match2); 364 365 memcpy(buf, match1.key, sizeof(*match1.key)); 366 match1.key = get_mangled_key(entry1->rule, buf, 0, 367 sizeof(*match1.key), 368 FLOW_ACT_MANGLE_HDR_TYPE_ETH); 369 370 COMPARE_UNMASKED_FIELDS(match1, match2, &out); 371 if (out) 372 goto check_failed; 373 } 374 375 if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_VLAN)) { 376 struct flow_match_vlan match1, match2; 377 378 flow_rule_match_vlan(entry1->rule, &match1); 379 flow_rule_match_vlan(entry2->rule, &match2); 380 COMPARE_UNMASKED_FIELDS(match1, match2, &out); 381 if (out) 382 goto check_failed; 383 } 384 385 if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_MPLS)) { 386 struct flow_match_mpls match1, match2; 387 388 flow_rule_match_mpls(entry1->rule, &match1); 389 flow_rule_match_mpls(entry2->rule, &match2); 390 COMPARE_UNMASKED_FIELDS(match1, match2, &out); 391 if (out) 392 goto check_failed; 393 } 394 395 if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_TCP)) { 396 struct flow_match_tcp match1, match2; 397 398 flow_rule_match_tcp(entry1->rule, &match1); 399 flow_rule_match_tcp(entry2->rule, &match2); 400 COMPARE_UNMASKED_FIELDS(match1, match2, &out); 401 if (out) 402 goto check_failed; 403 } 404 405 if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_IP)) { 406 struct flow_match_ip match1, match2; 407 408 flow_rule_match_ip(entry1->rule, &match1); 409 flow_rule_match_ip(entry2->rule, &match2); 410 411 match1.key = get_mangled_tos_ttl(entry1->rule, buf, is_v6); 412 COMPARE_UNMASKED_FIELDS(match1, match2, &out); 413 if (out) 414 goto check_failed; 415 } 416 417 if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID)) { 418 struct flow_match_enc_keyid match1, match2; 419 420 flow_rule_match_enc_keyid(entry1->rule, &match1); 421 flow_rule_match_enc_keyid(entry2->rule, &match2); 422 COMPARE_UNMASKED_FIELDS(match1, match2, &out); 423 if (out) 424 goto check_failed; 425 } 426 427 if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { 428 struct flow_match_ipv4_addrs match1, match2; 429 430 flow_rule_match_enc_ipv4_addrs(entry1->rule, &match1); 431 flow_rule_match_enc_ipv4_addrs(entry2->rule, &match2); 432 COMPARE_UNMASKED_FIELDS(match1, match2, &out); 433 if (out) 434 goto check_failed; 435 } 436 437 if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { 438 struct flow_match_ipv6_addrs match1, match2; 439 440 flow_rule_match_enc_ipv6_addrs(entry1->rule, &match1); 441 flow_rule_match_enc_ipv6_addrs(entry2->rule, &match2); 442 COMPARE_UNMASKED_FIELDS(match1, match2, &out); 443 if (out) 444 goto check_failed; 445 } 446 447 if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL)) { 448 struct flow_match_control match1, match2; 449 450 flow_rule_match_enc_control(entry1->rule, &match1); 451 flow_rule_match_enc_control(entry2->rule, &match2); 452 COMPARE_UNMASKED_FIELDS(match1, match2, &out); 453 if (out) 454 goto check_failed; 455 } 456 457 if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP)) { 458 struct flow_match_ip match1, match2; 459 460 flow_rule_match_enc_ip(entry1->rule, &match1); 461 flow_rule_match_enc_ip(entry2->rule, &match2); 462 COMPARE_UNMASKED_FIELDS(match1, match2, &out); 463 if (out) 464 goto check_failed; 465 } 466 467 if (ovlp_keys & BIT_ULL(FLOW_DISSECTOR_KEY_ENC_OPTS)) { 468 struct flow_match_enc_opts match1, match2; 469 470 flow_rule_match_enc_opts(entry1->rule, &match1); 471 flow_rule_match_enc_opts(entry2->rule, &match2); 472 COMPARE_UNMASKED_FIELDS(match1, match2, &out); 473 if (out) 474 goto check_failed; 475 } 476 477 return 0; 478 479 check_failed: 480 return -EINVAL; 481 } 482 483 static int nfp_ct_check_vlan_merge(struct flow_action_entry *a_in, 484 struct flow_rule *rule) 485 { 486 struct flow_match_vlan match; 487 488 if (unlikely(flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN))) 489 return -EOPNOTSUPP; 490 491 /* post_ct does not match VLAN KEY, can be merged. */ 492 if (likely(!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN))) 493 return 0; 494 495 switch (a_in->id) { 496 /* pre_ct has pop vlan, post_ct cannot match VLAN KEY, cannot be merged. */ 497 case FLOW_ACTION_VLAN_POP: 498 return -EOPNOTSUPP; 499 500 case FLOW_ACTION_VLAN_PUSH: 501 case FLOW_ACTION_VLAN_MANGLE: 502 flow_rule_match_vlan(rule, &match); 503 /* different vlan id, cannot be merged. */ 504 if ((match.key->vlan_id & match.mask->vlan_id) ^ 505 (a_in->vlan.vid & match.mask->vlan_id)) 506 return -EOPNOTSUPP; 507 508 /* different tpid, cannot be merged. */ 509 if ((match.key->vlan_tpid & match.mask->vlan_tpid) ^ 510 (a_in->vlan.proto & match.mask->vlan_tpid)) 511 return -EOPNOTSUPP; 512 513 /* different priority, cannot be merged. */ 514 if ((match.key->vlan_priority & match.mask->vlan_priority) ^ 515 (a_in->vlan.prio & match.mask->vlan_priority)) 516 return -EOPNOTSUPP; 517 518 break; 519 default: 520 return -EOPNOTSUPP; 521 } 522 523 return 0; 524 } 525 526 /* Extra check for multiple ct-zones merge 527 * currently surpport nft entries merge check in different zones 528 */ 529 static int nfp_ct_merge_extra_check(struct nfp_fl_ct_flow_entry *nft_entry, 530 struct nfp_fl_ct_tc_merge *tc_m_entry) 531 { 532 struct nfp_fl_nft_tc_merge *prev_nft_m_entry; 533 struct nfp_fl_ct_flow_entry *pre_ct_entry; 534 535 pre_ct_entry = tc_m_entry->pre_ct_parent; 536 prev_nft_m_entry = pre_ct_entry->prev_m_entries[pre_ct_entry->num_prev_m_entries - 1]; 537 538 return nfp_ct_merge_check(prev_nft_m_entry->nft_parent, nft_entry); 539 } 540 541 static int nfp_ct_merge_act_check(struct nfp_fl_ct_flow_entry *pre_ct_entry, 542 struct nfp_fl_ct_flow_entry *post_ct_entry, 543 struct nfp_fl_ct_flow_entry *nft_entry) 544 { 545 struct flow_action_entry *act; 546 int i, err; 547 548 /* Check for pre_ct->action conflicts */ 549 flow_action_for_each(i, act, &pre_ct_entry->rule->action) { 550 switch (act->id) { 551 case FLOW_ACTION_VLAN_PUSH: 552 case FLOW_ACTION_VLAN_POP: 553 case FLOW_ACTION_VLAN_MANGLE: 554 err = nfp_ct_check_vlan_merge(act, post_ct_entry->rule); 555 if (err) 556 return err; 557 break; 558 case FLOW_ACTION_MPLS_PUSH: 559 case FLOW_ACTION_MPLS_POP: 560 case FLOW_ACTION_MPLS_MANGLE: 561 return -EOPNOTSUPP; 562 default: 563 break; 564 } 565 } 566 567 /* Check for nft->action conflicts */ 568 flow_action_for_each(i, act, &nft_entry->rule->action) { 569 switch (act->id) { 570 case FLOW_ACTION_VLAN_PUSH: 571 case FLOW_ACTION_VLAN_POP: 572 case FLOW_ACTION_VLAN_MANGLE: 573 case FLOW_ACTION_MPLS_PUSH: 574 case FLOW_ACTION_MPLS_POP: 575 case FLOW_ACTION_MPLS_MANGLE: 576 return -EOPNOTSUPP; 577 default: 578 break; 579 } 580 } 581 return 0; 582 } 583 584 static int nfp_ct_check_meta(struct nfp_fl_ct_flow_entry *post_ct_entry, 585 struct nfp_fl_ct_flow_entry *nft_entry) 586 { 587 struct flow_dissector *dissector = post_ct_entry->rule->match.dissector; 588 struct flow_action_entry *ct_met; 589 struct flow_match_ct ct; 590 int i; 591 592 ct_met = get_flow_act(nft_entry->rule, FLOW_ACTION_CT_METADATA); 593 if (ct_met && (dissector->used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_CT))) { 594 u32 *act_lbl; 595 596 act_lbl = ct_met->ct_metadata.labels; 597 flow_rule_match_ct(post_ct_entry->rule, &ct); 598 for (i = 0; i < 4; i++) { 599 if ((ct.key->ct_labels[i] & ct.mask->ct_labels[i]) ^ 600 (act_lbl[i] & ct.mask->ct_labels[i])) 601 return -EINVAL; 602 } 603 604 if ((ct.key->ct_mark & ct.mask->ct_mark) ^ 605 (ct_met->ct_metadata.mark & ct.mask->ct_mark)) 606 return -EINVAL; 607 608 return 0; 609 } else { 610 /* post_ct with ct clear action will not match the 611 * ct status when nft is nat entry. 612 */ 613 if (nft_entry->flags & NFP_FL_ACTION_DO_MANGLE) 614 return 0; 615 } 616 617 return -EINVAL; 618 } 619 620 static int 621 nfp_fl_calc_key_layers_sz(struct nfp_fl_key_ls in_key_ls, uint16_t *map) 622 { 623 int key_size; 624 625 /* This field must always be present */ 626 key_size = sizeof(struct nfp_flower_meta_tci); 627 map[FLOW_PAY_META_TCI] = 0; 628 629 if (in_key_ls.key_layer & NFP_FLOWER_LAYER_EXT_META) { 630 map[FLOW_PAY_EXT_META] = key_size; 631 key_size += sizeof(struct nfp_flower_ext_meta); 632 } 633 if (in_key_ls.key_layer & NFP_FLOWER_LAYER_PORT) { 634 map[FLOW_PAY_INPORT] = key_size; 635 key_size += sizeof(struct nfp_flower_in_port); 636 } 637 if (in_key_ls.key_layer & NFP_FLOWER_LAYER_MAC) { 638 map[FLOW_PAY_MAC_MPLS] = key_size; 639 key_size += sizeof(struct nfp_flower_mac_mpls); 640 } 641 if (in_key_ls.key_layer & NFP_FLOWER_LAYER_TP) { 642 map[FLOW_PAY_L4] = key_size; 643 key_size += sizeof(struct nfp_flower_tp_ports); 644 } 645 if (in_key_ls.key_layer & NFP_FLOWER_LAYER_IPV4) { 646 map[FLOW_PAY_IPV4] = key_size; 647 key_size += sizeof(struct nfp_flower_ipv4); 648 } 649 if (in_key_ls.key_layer & NFP_FLOWER_LAYER_IPV6) { 650 map[FLOW_PAY_IPV6] = key_size; 651 key_size += sizeof(struct nfp_flower_ipv6); 652 } 653 654 if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_QINQ) { 655 map[FLOW_PAY_QINQ] = key_size; 656 key_size += sizeof(struct nfp_flower_vlan); 657 } 658 659 if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GRE) { 660 map[FLOW_PAY_GRE] = key_size; 661 if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) 662 key_size += sizeof(struct nfp_flower_ipv6_gre_tun); 663 else 664 key_size += sizeof(struct nfp_flower_ipv4_gre_tun); 665 } 666 667 if ((in_key_ls.key_layer & NFP_FLOWER_LAYER_VXLAN) || 668 (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE)) { 669 map[FLOW_PAY_UDP_TUN] = key_size; 670 if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) 671 key_size += sizeof(struct nfp_flower_ipv6_udp_tun); 672 else 673 key_size += sizeof(struct nfp_flower_ipv4_udp_tun); 674 } 675 676 if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { 677 map[FLOW_PAY_GENEVE_OPT] = key_size; 678 key_size += sizeof(struct nfp_flower_geneve_options); 679 } 680 681 return key_size; 682 } 683 684 /* get the csum flag according the ip proto and mangle action. */ 685 static void nfp_fl_get_csum_flag(struct flow_action_entry *a_in, u8 ip_proto, u32 *csum) 686 { 687 if (a_in->id != FLOW_ACTION_MANGLE) 688 return; 689 690 switch (a_in->mangle.htype) { 691 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 692 *csum |= TCA_CSUM_UPDATE_FLAG_IPV4HDR; 693 if (ip_proto == IPPROTO_TCP) 694 *csum |= TCA_CSUM_UPDATE_FLAG_TCP; 695 else if (ip_proto == IPPROTO_UDP) 696 *csum |= TCA_CSUM_UPDATE_FLAG_UDP; 697 break; 698 case FLOW_ACT_MANGLE_HDR_TYPE_TCP: 699 *csum |= TCA_CSUM_UPDATE_FLAG_TCP; 700 break; 701 case FLOW_ACT_MANGLE_HDR_TYPE_UDP: 702 *csum |= TCA_CSUM_UPDATE_FLAG_UDP; 703 break; 704 default: 705 break; 706 } 707 } 708 709 static int nfp_fl_merge_actions_offload(struct flow_rule **rules, 710 struct nfp_flower_priv *priv, 711 struct net_device *netdev, 712 struct nfp_fl_payload *flow_pay, 713 int num_rules) 714 { 715 enum flow_action_hw_stats tmp_stats = FLOW_ACTION_HW_STATS_DONT_CARE; 716 struct flow_action_entry *a_in; 717 int i, j, id, num_actions = 0; 718 struct flow_rule *a_rule; 719 int err = 0, offset = 0; 720 721 for (i = 0; i < num_rules; i++) 722 num_actions += rules[i]->action.num_entries; 723 724 /* Add one action to make sure there is enough room to add an checksum action 725 * when do nat. 726 */ 727 a_rule = flow_rule_alloc(num_actions + (num_rules / 2)); 728 if (!a_rule) 729 return -ENOMEM; 730 731 /* post_ct entry have one action at least. */ 732 if (rules[num_rules - 1]->action.num_entries != 0) 733 tmp_stats = rules[num_rules - 1]->action.entries[0].hw_stats; 734 735 /* Actions need a BASIC dissector. */ 736 a_rule->match = rules[0]->match; 737 738 /* Copy actions */ 739 for (j = 0; j < num_rules; j++) { 740 u32 csum_updated = 0; 741 u8 ip_proto = 0; 742 743 if (flow_rule_match_key(rules[j], FLOW_DISSECTOR_KEY_BASIC)) { 744 struct flow_match_basic match; 745 746 /* ip_proto is the only field that is needed in later compile_action, 747 * needed to set the correct checksum flags. It doesn't really matter 748 * which input rule's ip_proto field we take as the earlier merge checks 749 * would have made sure that they don't conflict. We do not know which 750 * of the subflows would have the ip_proto filled in, so we need to iterate 751 * through the subflows and assign the proper subflow to a_rule 752 */ 753 flow_rule_match_basic(rules[j], &match); 754 if (match.mask->ip_proto) { 755 a_rule->match = rules[j]->match; 756 ip_proto = match.key->ip_proto; 757 } 758 } 759 760 for (i = 0; i < rules[j]->action.num_entries; i++) { 761 a_in = &rules[j]->action.entries[i]; 762 id = a_in->id; 763 764 /* Ignore CT related actions as these would already have 765 * been taken care of by previous checks, and we do not send 766 * any CT actions to the firmware. 767 */ 768 switch (id) { 769 case FLOW_ACTION_CT: 770 case FLOW_ACTION_GOTO: 771 case FLOW_ACTION_CT_METADATA: 772 continue; 773 default: 774 /* nft entry is generated by tc ct, which mangle action do not care 775 * the stats, inherit the post entry stats to meet the 776 * flow_action_hw_stats_check. 777 * nft entry flow rules are at odd array index. 778 */ 779 if (j & 0x01) { 780 if (a_in->hw_stats == FLOW_ACTION_HW_STATS_DONT_CARE) 781 a_in->hw_stats = tmp_stats; 782 nfp_fl_get_csum_flag(a_in, ip_proto, &csum_updated); 783 } 784 memcpy(&a_rule->action.entries[offset++], 785 a_in, sizeof(struct flow_action_entry)); 786 break; 787 } 788 } 789 /* nft entry have mangle action, but do not have checksum action when do NAT, 790 * hardware will automatically fix IPv4 and TCP/UDP checksum. so add an csum action 791 * to meet csum action check. 792 */ 793 if (csum_updated) { 794 struct flow_action_entry *csum_action; 795 796 csum_action = &a_rule->action.entries[offset++]; 797 csum_action->id = FLOW_ACTION_CSUM; 798 csum_action->csum_flags = csum_updated; 799 csum_action->hw_stats = tmp_stats; 800 } 801 } 802 803 /* Some actions would have been ignored, so update the num_entries field */ 804 a_rule->action.num_entries = offset; 805 err = nfp_flower_compile_action(priv->app, a_rule, netdev, flow_pay, NULL); 806 kfree(a_rule); 807 808 return err; 809 } 810 811 static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) 812 { 813 enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE; 814 struct nfp_fl_ct_zone_entry *zt = m_entry->zt; 815 struct flow_rule *rules[NFP_MAX_ENTRY_RULES]; 816 struct nfp_fl_ct_flow_entry *pre_ct_entry; 817 struct nfp_fl_key_ls key_layer, tmp_layer; 818 struct nfp_flower_priv *priv = zt->priv; 819 u16 key_map[_FLOW_PAY_LAYERS_MAX]; 820 struct nfp_fl_payload *flow_pay; 821 u8 *key, *msk, *kdata, *mdata; 822 struct nfp_port *port = NULL; 823 int num_rules, err, i, j = 0; 824 struct net_device *netdev; 825 bool qinq_sup; 826 u32 port_id; 827 u16 offset; 828 829 netdev = m_entry->netdev; 830 qinq_sup = !!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ); 831 832 pre_ct_entry = m_entry->tc_m_parent->pre_ct_parent; 833 num_rules = pre_ct_entry->num_prev_m_entries * 2 + _CT_TYPE_MAX; 834 835 for (i = 0; i < pre_ct_entry->num_prev_m_entries; i++) { 836 rules[j++] = pre_ct_entry->prev_m_entries[i]->tc_m_parent->pre_ct_parent->rule; 837 rules[j++] = pre_ct_entry->prev_m_entries[i]->nft_parent->rule; 838 } 839 840 rules[j++] = m_entry->tc_m_parent->pre_ct_parent->rule; 841 rules[j++] = m_entry->nft_parent->rule; 842 rules[j++] = m_entry->tc_m_parent->post_ct_parent->rule; 843 844 memset(&key_layer, 0, sizeof(struct nfp_fl_key_ls)); 845 memset(&key_map, 0, sizeof(key_map)); 846 847 /* Calculate the resultant key layer and size for offload */ 848 for (i = 0; i < num_rules; i++) { 849 err = nfp_flower_calculate_key_layers(priv->app, 850 m_entry->netdev, 851 &tmp_layer, rules[i], 852 &tun_type, NULL); 853 if (err) 854 return err; 855 856 key_layer.key_layer |= tmp_layer.key_layer; 857 key_layer.key_layer_two |= tmp_layer.key_layer_two; 858 } 859 key_layer.key_size = nfp_fl_calc_key_layers_sz(key_layer, key_map); 860 861 flow_pay = nfp_flower_allocate_new(&key_layer); 862 if (!flow_pay) 863 return -ENOMEM; 864 865 memset(flow_pay->unmasked_data, 0, key_layer.key_size); 866 memset(flow_pay->mask_data, 0, key_layer.key_size); 867 868 kdata = flow_pay->unmasked_data; 869 mdata = flow_pay->mask_data; 870 871 offset = key_map[FLOW_PAY_META_TCI]; 872 key = kdata + offset; 873 msk = mdata + offset; 874 nfp_flower_compile_meta((struct nfp_flower_meta_tci *)key, 875 (struct nfp_flower_meta_tci *)msk, 876 key_layer.key_layer); 877 878 if (NFP_FLOWER_LAYER_EXT_META & key_layer.key_layer) { 879 offset = key_map[FLOW_PAY_EXT_META]; 880 key = kdata + offset; 881 msk = mdata + offset; 882 nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)key, 883 key_layer.key_layer_two); 884 nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)msk, 885 key_layer.key_layer_two); 886 } 887 888 /* Using in_port from the -trk rule. The tc merge checks should already 889 * be checking that the ingress netdevs are the same 890 */ 891 port_id = nfp_flower_get_port_id_from_netdev(priv->app, netdev); 892 offset = key_map[FLOW_PAY_INPORT]; 893 key = kdata + offset; 894 msk = mdata + offset; 895 err = nfp_flower_compile_port((struct nfp_flower_in_port *)key, 896 port_id, false, tun_type, NULL); 897 if (err) 898 goto ct_offload_err; 899 err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk, 900 port_id, true, tun_type, NULL); 901 if (err) 902 goto ct_offload_err; 903 904 /* This following part works on the assumption that previous checks has 905 * already filtered out flows that has different values for the different 906 * layers. Here we iterate through all three rules and merge their respective 907 * masked value(cared bits), basic method is: 908 * final_key = (r1_key & r1_mask) | (r2_key & r2_mask) | (r3_key & r3_mask) 909 * final_mask = r1_mask | r2_mask | r3_mask 910 * If none of the rules contains a match that is also fine, that simply means 911 * that the layer is not present. 912 */ 913 if (!qinq_sup) { 914 for (i = 0; i < num_rules; i++) { 915 offset = key_map[FLOW_PAY_META_TCI]; 916 key = kdata + offset; 917 msk = mdata + offset; 918 nfp_flower_compile_tci((struct nfp_flower_meta_tci *)key, 919 (struct nfp_flower_meta_tci *)msk, 920 rules[i]); 921 } 922 } 923 924 if (NFP_FLOWER_LAYER_MAC & key_layer.key_layer) { 925 offset = key_map[FLOW_PAY_MAC_MPLS]; 926 key = kdata + offset; 927 msk = mdata + offset; 928 for (i = 0; i < num_rules; i++) { 929 nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)key, 930 (struct nfp_flower_mac_mpls *)msk, 931 rules[i]); 932 err = nfp_flower_compile_mpls((struct nfp_flower_mac_mpls *)key, 933 (struct nfp_flower_mac_mpls *)msk, 934 rules[i], NULL); 935 if (err) 936 goto ct_offload_err; 937 } 938 } 939 940 if (NFP_FLOWER_LAYER_IPV4 & key_layer.key_layer) { 941 offset = key_map[FLOW_PAY_IPV4]; 942 key = kdata + offset; 943 msk = mdata + offset; 944 for (i = 0; i < num_rules; i++) { 945 nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)key, 946 (struct nfp_flower_ipv4 *)msk, 947 rules[i]); 948 } 949 } 950 951 if (NFP_FLOWER_LAYER_IPV6 & key_layer.key_layer) { 952 offset = key_map[FLOW_PAY_IPV6]; 953 key = kdata + offset; 954 msk = mdata + offset; 955 for (i = 0; i < num_rules; i++) { 956 nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)key, 957 (struct nfp_flower_ipv6 *)msk, 958 rules[i]); 959 } 960 } 961 962 if (NFP_FLOWER_LAYER_TP & key_layer.key_layer) { 963 offset = key_map[FLOW_PAY_L4]; 964 key = kdata + offset; 965 msk = mdata + offset; 966 for (i = 0; i < num_rules; i++) { 967 nfp_flower_compile_tport((struct nfp_flower_tp_ports *)key, 968 (struct nfp_flower_tp_ports *)msk, 969 rules[i]); 970 } 971 } 972 973 if (NFP_FLOWER_LAYER2_QINQ & key_layer.key_layer_two) { 974 offset = key_map[FLOW_PAY_QINQ]; 975 key = kdata + offset; 976 msk = mdata + offset; 977 for (i = 0; i < num_rules; i++) { 978 nfp_flower_compile_vlan((struct nfp_flower_vlan *)key, 979 (struct nfp_flower_vlan *)msk, 980 rules[i]); 981 } 982 } 983 984 if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GRE) { 985 offset = key_map[FLOW_PAY_GRE]; 986 key = kdata + offset; 987 msk = mdata + offset; 988 if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { 989 struct nfp_flower_ipv6_gre_tun *gre_match; 990 struct nfp_ipv6_addr_entry *entry; 991 struct in6_addr *dst; 992 993 for (i = 0; i < num_rules; i++) { 994 nfp_flower_compile_ipv6_gre_tun((void *)key, 995 (void *)msk, rules[i]); 996 } 997 gre_match = (struct nfp_flower_ipv6_gre_tun *)key; 998 dst = &gre_match->ipv6.dst; 999 1000 entry = nfp_tunnel_add_ipv6_off(priv->app, dst); 1001 if (!entry) { 1002 err = -ENOMEM; 1003 goto ct_offload_err; 1004 } 1005 1006 flow_pay->nfp_tun_ipv6 = entry; 1007 } else { 1008 __be32 dst; 1009 1010 for (i = 0; i < num_rules; i++) { 1011 nfp_flower_compile_ipv4_gre_tun((void *)key, 1012 (void *)msk, rules[i]); 1013 } 1014 dst = ((struct nfp_flower_ipv4_gre_tun *)key)->ipv4.dst; 1015 1016 /* Store the tunnel destination in the rule data. 1017 * This must be present and be an exact match. 1018 */ 1019 flow_pay->nfp_tun_ipv4_addr = dst; 1020 nfp_tunnel_add_ipv4_off(priv->app, dst); 1021 } 1022 } 1023 1024 if (key_layer.key_layer & NFP_FLOWER_LAYER_VXLAN || 1025 key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE) { 1026 offset = key_map[FLOW_PAY_UDP_TUN]; 1027 key = kdata + offset; 1028 msk = mdata + offset; 1029 if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { 1030 struct nfp_flower_ipv6_udp_tun *udp_match; 1031 struct nfp_ipv6_addr_entry *entry; 1032 struct in6_addr *dst; 1033 1034 for (i = 0; i < num_rules; i++) { 1035 nfp_flower_compile_ipv6_udp_tun((void *)key, 1036 (void *)msk, rules[i]); 1037 } 1038 udp_match = (struct nfp_flower_ipv6_udp_tun *)key; 1039 dst = &udp_match->ipv6.dst; 1040 1041 entry = nfp_tunnel_add_ipv6_off(priv->app, dst); 1042 if (!entry) { 1043 err = -ENOMEM; 1044 goto ct_offload_err; 1045 } 1046 1047 flow_pay->nfp_tun_ipv6 = entry; 1048 } else { 1049 __be32 dst; 1050 1051 for (i = 0; i < num_rules; i++) { 1052 nfp_flower_compile_ipv4_udp_tun((void *)key, 1053 (void *)msk, rules[i]); 1054 } 1055 dst = ((struct nfp_flower_ipv4_udp_tun *)key)->ipv4.dst; 1056 1057 /* Store the tunnel destination in the rule data. 1058 * This must be present and be an exact match. 1059 */ 1060 flow_pay->nfp_tun_ipv4_addr = dst; 1061 nfp_tunnel_add_ipv4_off(priv->app, dst); 1062 } 1063 1064 if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { 1065 offset = key_map[FLOW_PAY_GENEVE_OPT]; 1066 key = kdata + offset; 1067 msk = mdata + offset; 1068 for (i = 0; i < num_rules; i++) 1069 nfp_flower_compile_geneve_opt(key, msk, rules[i]); 1070 } 1071 } 1072 1073 /* Merge actions into flow_pay */ 1074 err = nfp_fl_merge_actions_offload(rules, priv, netdev, flow_pay, num_rules); 1075 if (err) 1076 goto ct_offload_err; 1077 1078 /* Use the pointer address as the cookie, but set the last bit to 1. 1079 * This is to avoid the 'is_merge_flow' check from detecting this as 1080 * an already merged flow. This works since address alignment means 1081 * that the last bit for pointer addresses will be 0. 1082 */ 1083 flow_pay->tc_flower_cookie = ((unsigned long)flow_pay) | 0x1; 1084 err = nfp_compile_flow_metadata(priv->app, flow_pay->tc_flower_cookie, 1085 flow_pay, netdev, NULL); 1086 if (err) 1087 goto ct_offload_err; 1088 1089 if (nfp_netdev_is_nfp_repr(netdev)) 1090 port = nfp_port_from_netdev(netdev); 1091 1092 err = rhashtable_insert_fast(&priv->flow_table, &flow_pay->fl_node, 1093 nfp_flower_table_params); 1094 if (err) 1095 goto ct_release_offload_meta_err; 1096 1097 err = nfp_flower_xmit_flow(priv->app, flow_pay, 1098 NFP_FLOWER_CMSG_TYPE_FLOW_ADD); 1099 if (err) 1100 goto ct_remove_rhash_err; 1101 1102 m_entry->tc_flower_cookie = flow_pay->tc_flower_cookie; 1103 m_entry->flow_pay = flow_pay; 1104 1105 if (port) 1106 port->tc_offload_cnt++; 1107 1108 return err; 1109 1110 ct_remove_rhash_err: 1111 WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, 1112 &flow_pay->fl_node, 1113 nfp_flower_table_params)); 1114 ct_release_offload_meta_err: 1115 nfp_modify_flow_metadata(priv->app, flow_pay); 1116 ct_offload_err: 1117 if (flow_pay->nfp_tun_ipv4_addr) 1118 nfp_tunnel_del_ipv4_off(priv->app, flow_pay->nfp_tun_ipv4_addr); 1119 if (flow_pay->nfp_tun_ipv6) 1120 nfp_tunnel_put_ipv6_off(priv->app, flow_pay->nfp_tun_ipv6); 1121 kfree(flow_pay->action_data); 1122 kfree(flow_pay->mask_data); 1123 kfree(flow_pay->unmasked_data); 1124 kfree(flow_pay); 1125 return err; 1126 } 1127 1128 static int nfp_fl_ct_del_offload(struct nfp_app *app, unsigned long cookie, 1129 struct net_device *netdev) 1130 { 1131 struct nfp_flower_priv *priv = app->priv; 1132 struct nfp_fl_payload *flow_pay; 1133 struct nfp_port *port = NULL; 1134 int err = 0; 1135 1136 if (nfp_netdev_is_nfp_repr(netdev)) 1137 port = nfp_port_from_netdev(netdev); 1138 1139 flow_pay = nfp_flower_search_fl_table(app, cookie, netdev); 1140 if (!flow_pay) 1141 return -ENOENT; 1142 1143 err = nfp_modify_flow_metadata(app, flow_pay); 1144 if (err) 1145 goto err_free_merge_flow; 1146 1147 if (flow_pay->nfp_tun_ipv4_addr) 1148 nfp_tunnel_del_ipv4_off(app, flow_pay->nfp_tun_ipv4_addr); 1149 1150 if (flow_pay->nfp_tun_ipv6) 1151 nfp_tunnel_put_ipv6_off(app, flow_pay->nfp_tun_ipv6); 1152 1153 if (!flow_pay->in_hw) { 1154 err = 0; 1155 goto err_free_merge_flow; 1156 } 1157 1158 err = nfp_flower_xmit_flow(app, flow_pay, 1159 NFP_FLOWER_CMSG_TYPE_FLOW_DEL); 1160 1161 err_free_merge_flow: 1162 nfp_flower_del_linked_merge_flows(app, flow_pay); 1163 if (port) 1164 port->tc_offload_cnt--; 1165 kfree(flow_pay->action_data); 1166 kfree(flow_pay->mask_data); 1167 kfree(flow_pay->unmasked_data); 1168 WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, 1169 &flow_pay->fl_node, 1170 nfp_flower_table_params)); 1171 kfree_rcu(flow_pay, rcu); 1172 return err; 1173 } 1174 1175 static int nfp_ct_do_nft_merge(struct nfp_fl_ct_zone_entry *zt, 1176 struct nfp_fl_ct_flow_entry *nft_entry, 1177 struct nfp_fl_ct_tc_merge *tc_m_entry) 1178 { 1179 struct nfp_fl_ct_flow_entry *post_ct_entry, *pre_ct_entry; 1180 struct nfp_fl_nft_tc_merge *nft_m_entry; 1181 unsigned long new_cookie[3]; 1182 int err; 1183 1184 pre_ct_entry = tc_m_entry->pre_ct_parent; 1185 post_ct_entry = tc_m_entry->post_ct_parent; 1186 1187 err = nfp_ct_merge_act_check(pre_ct_entry, post_ct_entry, nft_entry); 1188 if (err) 1189 return err; 1190 1191 /* Check that the two tc flows are also compatible with 1192 * the nft entry. No need to check the pre_ct and post_ct 1193 * entries as that was already done during pre_merge. 1194 * The nft entry does not have a chain populated, so 1195 * skip this check. 1196 */ 1197 err = nfp_ct_merge_check(pre_ct_entry, nft_entry); 1198 if (err) 1199 return err; 1200 err = nfp_ct_merge_check(nft_entry, post_ct_entry); 1201 if (err) 1202 return err; 1203 err = nfp_ct_check_meta(post_ct_entry, nft_entry); 1204 if (err) 1205 return err; 1206 1207 if (pre_ct_entry->num_prev_m_entries > 0) { 1208 err = nfp_ct_merge_extra_check(nft_entry, tc_m_entry); 1209 if (err) 1210 return err; 1211 } 1212 1213 /* Combine tc_merge and nft cookies for this cookie. */ 1214 new_cookie[0] = tc_m_entry->cookie[0]; 1215 new_cookie[1] = tc_m_entry->cookie[1]; 1216 new_cookie[2] = nft_entry->cookie; 1217 nft_m_entry = get_hashentry(&zt->nft_merge_tb, 1218 &new_cookie, 1219 nfp_nft_ct_merge_params, 1220 sizeof(*nft_m_entry)); 1221 1222 if (IS_ERR(nft_m_entry)) 1223 return PTR_ERR(nft_m_entry); 1224 1225 /* nft_m_entry already present, not merging again */ 1226 if (!memcmp(&new_cookie, nft_m_entry->cookie, sizeof(new_cookie))) 1227 return 0; 1228 1229 memcpy(&nft_m_entry->cookie, &new_cookie, sizeof(new_cookie)); 1230 nft_m_entry->zt = zt; 1231 nft_m_entry->tc_m_parent = tc_m_entry; 1232 nft_m_entry->nft_parent = nft_entry; 1233 nft_m_entry->tc_flower_cookie = 0; 1234 /* Copy the netdev from the pre_ct entry. When the tc_m_entry was created 1235 * it only combined them if the netdevs were the same, so can use any of them. 1236 */ 1237 nft_m_entry->netdev = pre_ct_entry->netdev; 1238 1239 /* Add this entry to the tc_m_list and nft_flow lists */ 1240 list_add(&nft_m_entry->tc_merge_list, &tc_m_entry->children); 1241 list_add(&nft_m_entry->nft_flow_list, &nft_entry->children); 1242 1243 err = rhashtable_insert_fast(&zt->nft_merge_tb, &nft_m_entry->hash_node, 1244 nfp_nft_ct_merge_params); 1245 if (err) 1246 goto err_nft_ct_merge_insert; 1247 1248 zt->nft_merge_count++; 1249 1250 if (post_ct_entry->goto_chain_index > 0) 1251 return nfp_fl_create_new_pre_ct(nft_m_entry); 1252 1253 /* Generate offload structure and send to nfp */ 1254 err = nfp_fl_ct_add_offload(nft_m_entry); 1255 if (err) 1256 goto err_nft_ct_offload; 1257 1258 return err; 1259 1260 err_nft_ct_offload: 1261 nfp_fl_ct_del_offload(zt->priv->app, nft_m_entry->tc_flower_cookie, 1262 nft_m_entry->netdev); 1263 err_nft_ct_merge_insert: 1264 list_del(&nft_m_entry->tc_merge_list); 1265 list_del(&nft_m_entry->nft_flow_list); 1266 kfree(nft_m_entry); 1267 return err; 1268 } 1269 1270 static int nfp_ct_do_tc_merge(struct nfp_fl_ct_zone_entry *zt, 1271 struct nfp_fl_ct_flow_entry *ct_entry1, 1272 struct nfp_fl_ct_flow_entry *ct_entry2) 1273 { 1274 struct nfp_fl_ct_flow_entry *post_ct_entry, *pre_ct_entry; 1275 struct nfp_fl_ct_flow_entry *nft_entry, *nft_tmp; 1276 struct nfp_fl_ct_tc_merge *m_entry; 1277 unsigned long new_cookie[2]; 1278 int err; 1279 1280 if (ct_entry1->type == CT_TYPE_PRE_CT) { 1281 pre_ct_entry = ct_entry1; 1282 post_ct_entry = ct_entry2; 1283 } else { 1284 post_ct_entry = ct_entry1; 1285 pre_ct_entry = ct_entry2; 1286 } 1287 1288 /* Checks that the chain_index of the filter matches the 1289 * chain_index of the GOTO action. 1290 */ 1291 if (post_ct_entry->chain_index != pre_ct_entry->goto_chain_index) 1292 return -EINVAL; 1293 1294 err = nfp_ct_merge_check(pre_ct_entry, post_ct_entry); 1295 if (err) 1296 return err; 1297 1298 new_cookie[0] = pre_ct_entry->cookie; 1299 new_cookie[1] = post_ct_entry->cookie; 1300 m_entry = get_hashentry(&zt->tc_merge_tb, &new_cookie, 1301 nfp_tc_ct_merge_params, sizeof(*m_entry)); 1302 if (IS_ERR(m_entry)) 1303 return PTR_ERR(m_entry); 1304 1305 /* m_entry already present, not merging again */ 1306 if (!memcmp(&new_cookie, m_entry->cookie, sizeof(new_cookie))) 1307 return 0; 1308 1309 memcpy(&m_entry->cookie, &new_cookie, sizeof(new_cookie)); 1310 m_entry->zt = zt; 1311 m_entry->post_ct_parent = post_ct_entry; 1312 m_entry->pre_ct_parent = pre_ct_entry; 1313 1314 /* Add this entry to the pre_ct and post_ct lists */ 1315 list_add(&m_entry->post_ct_list, &post_ct_entry->children); 1316 list_add(&m_entry->pre_ct_list, &pre_ct_entry->children); 1317 INIT_LIST_HEAD(&m_entry->children); 1318 1319 err = rhashtable_insert_fast(&zt->tc_merge_tb, &m_entry->hash_node, 1320 nfp_tc_ct_merge_params); 1321 if (err) 1322 goto err_ct_tc_merge_insert; 1323 zt->tc_merge_count++; 1324 1325 /* Merge with existing nft flows */ 1326 list_for_each_entry_safe(nft_entry, nft_tmp, &zt->nft_flows_list, 1327 list_node) { 1328 nfp_ct_do_nft_merge(zt, nft_entry, m_entry); 1329 } 1330 1331 return 0; 1332 1333 err_ct_tc_merge_insert: 1334 list_del(&m_entry->post_ct_list); 1335 list_del(&m_entry->pre_ct_list); 1336 kfree(m_entry); 1337 return err; 1338 } 1339 1340 static struct 1341 nfp_fl_ct_zone_entry *get_nfp_zone_entry(struct nfp_flower_priv *priv, 1342 u16 zone, bool wildcarded) 1343 { 1344 struct nfp_fl_ct_zone_entry *zt; 1345 int err; 1346 1347 if (wildcarded && priv->ct_zone_wc) 1348 return priv->ct_zone_wc; 1349 1350 if (!wildcarded) { 1351 zt = get_hashentry(&priv->ct_zone_table, &zone, 1352 nfp_zone_table_params, sizeof(*zt)); 1353 1354 /* If priv is set this is an existing entry, just return it */ 1355 if (IS_ERR(zt) || zt->priv) 1356 return zt; 1357 } else { 1358 zt = kzalloc(sizeof(*zt), GFP_KERNEL); 1359 if (!zt) 1360 return ERR_PTR(-ENOMEM); 1361 } 1362 1363 zt->zone = zone; 1364 zt->priv = priv; 1365 zt->nft = NULL; 1366 1367 /* init the various hash tables and lists */ 1368 INIT_LIST_HEAD(&zt->pre_ct_list); 1369 INIT_LIST_HEAD(&zt->post_ct_list); 1370 INIT_LIST_HEAD(&zt->nft_flows_list); 1371 1372 err = rhashtable_init(&zt->tc_merge_tb, &nfp_tc_ct_merge_params); 1373 if (err) 1374 goto err_tc_merge_tb_init; 1375 1376 err = rhashtable_init(&zt->nft_merge_tb, &nfp_nft_ct_merge_params); 1377 if (err) 1378 goto err_nft_merge_tb_init; 1379 1380 if (wildcarded) { 1381 priv->ct_zone_wc = zt; 1382 } else { 1383 err = rhashtable_insert_fast(&priv->ct_zone_table, 1384 &zt->hash_node, 1385 nfp_zone_table_params); 1386 if (err) 1387 goto err_zone_insert; 1388 } 1389 1390 return zt; 1391 1392 err_zone_insert: 1393 rhashtable_destroy(&zt->nft_merge_tb); 1394 err_nft_merge_tb_init: 1395 rhashtable_destroy(&zt->tc_merge_tb); 1396 err_tc_merge_tb_init: 1397 kfree(zt); 1398 return ERR_PTR(err); 1399 } 1400 1401 static struct net_device *get_netdev_from_rule(struct flow_rule *rule) 1402 { 1403 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) { 1404 struct flow_match_meta match; 1405 1406 flow_rule_match_meta(rule, &match); 1407 if (match.key->ingress_ifindex & match.mask->ingress_ifindex) 1408 return __dev_get_by_index(&init_net, 1409 match.key->ingress_ifindex); 1410 } 1411 1412 return NULL; 1413 } 1414 1415 static void nfp_nft_ct_translate_mangle_action(struct flow_action_entry *mangle_action) 1416 { 1417 if (mangle_action->id != FLOW_ACTION_MANGLE) 1418 return; 1419 1420 switch (mangle_action->mangle.htype) { 1421 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 1422 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 1423 mangle_action->mangle.val = (__force u32)cpu_to_be32(mangle_action->mangle.val); 1424 mangle_action->mangle.mask = (__force u32)cpu_to_be32(mangle_action->mangle.mask); 1425 return; 1426 1427 /* Both struct tcphdr and struct udphdr start with 1428 * __be16 source; 1429 * __be16 dest; 1430 * so we can use the same code for both. 1431 */ 1432 case FLOW_ACT_MANGLE_HDR_TYPE_TCP: 1433 case FLOW_ACT_MANGLE_HDR_TYPE_UDP: 1434 if (mangle_action->mangle.offset == offsetof(struct tcphdr, source)) { 1435 mangle_action->mangle.val = 1436 (__force u32)cpu_to_be32(mangle_action->mangle.val << 16); 1437 /* The mask of mangle action is inverse mask, 1438 * so clear the dest tp port with 0xFFFF to 1439 * instead of rotate-left operation. 1440 */ 1441 mangle_action->mangle.mask = 1442 (__force u32)cpu_to_be32(mangle_action->mangle.mask << 16 | 0xFFFF); 1443 } 1444 if (mangle_action->mangle.offset == offsetof(struct tcphdr, dest)) { 1445 mangle_action->mangle.offset = 0; 1446 mangle_action->mangle.val = 1447 (__force u32)cpu_to_be32(mangle_action->mangle.val); 1448 mangle_action->mangle.mask = 1449 (__force u32)cpu_to_be32(mangle_action->mangle.mask); 1450 } 1451 return; 1452 1453 default: 1454 return; 1455 } 1456 } 1457 1458 static int nfp_nft_ct_set_flow_flag(struct flow_action_entry *act, 1459 struct nfp_fl_ct_flow_entry *entry) 1460 { 1461 switch (act->id) { 1462 case FLOW_ACTION_CT: 1463 if (act->ct.action == TCA_CT_ACT_NAT) 1464 entry->flags |= NFP_FL_ACTION_DO_NAT; 1465 break; 1466 1467 case FLOW_ACTION_MANGLE: 1468 entry->flags |= NFP_FL_ACTION_DO_MANGLE; 1469 break; 1470 1471 default: 1472 break; 1473 } 1474 1475 return 0; 1476 } 1477 1478 static struct 1479 nfp_fl_ct_flow_entry *nfp_fl_ct_add_flow(struct nfp_fl_ct_zone_entry *zt, 1480 struct net_device *netdev, 1481 struct flow_cls_offload *flow, 1482 bool is_nft, struct netlink_ext_ack *extack) 1483 { 1484 struct nf_flow_match *nft_match = NULL; 1485 struct nfp_fl_ct_flow_entry *entry; 1486 struct nfp_fl_ct_map_entry *map; 1487 struct flow_action_entry *act; 1488 int err, i; 1489 1490 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1491 if (!entry) 1492 return ERR_PTR(-ENOMEM); 1493 1494 entry->rule = flow_rule_alloc(flow->rule->action.num_entries); 1495 if (!entry->rule) { 1496 err = -ENOMEM; 1497 goto err_pre_ct_rule; 1498 } 1499 1500 /* nft flows gets destroyed after callback return, so need 1501 * to do a full copy instead of just a reference. 1502 */ 1503 if (is_nft) { 1504 nft_match = kzalloc(sizeof(*nft_match), GFP_KERNEL); 1505 if (!nft_match) { 1506 err = -ENOMEM; 1507 goto err_pre_ct_act; 1508 } 1509 memcpy(&nft_match->dissector, flow->rule->match.dissector, 1510 sizeof(nft_match->dissector)); 1511 memcpy(&nft_match->mask, flow->rule->match.mask, 1512 sizeof(nft_match->mask)); 1513 memcpy(&nft_match->key, flow->rule->match.key, 1514 sizeof(nft_match->key)); 1515 entry->rule->match.dissector = &nft_match->dissector; 1516 entry->rule->match.mask = &nft_match->mask; 1517 entry->rule->match.key = &nft_match->key; 1518 1519 if (!netdev) 1520 netdev = get_netdev_from_rule(entry->rule); 1521 } else { 1522 entry->rule->match.dissector = flow->rule->match.dissector; 1523 entry->rule->match.mask = flow->rule->match.mask; 1524 entry->rule->match.key = flow->rule->match.key; 1525 } 1526 1527 entry->zt = zt; 1528 entry->netdev = netdev; 1529 entry->cookie = flow->cookie > 0 ? flow->cookie : (unsigned long)entry; 1530 entry->chain_index = flow->common.chain_index; 1531 entry->tun_offset = NFP_FL_CT_NO_TUN; 1532 1533 /* Copy over action data. Unfortunately we do not get a handle to the 1534 * original tcf_action data, and the flow objects gets destroyed, so we 1535 * cannot just save a pointer to this either, so need to copy over the 1536 * data unfortunately. 1537 */ 1538 entry->rule->action.num_entries = flow->rule->action.num_entries; 1539 flow_action_for_each(i, act, &flow->rule->action) { 1540 struct flow_action_entry *new_act; 1541 1542 new_act = &entry->rule->action.entries[i]; 1543 memcpy(new_act, act, sizeof(struct flow_action_entry)); 1544 /* nft entry mangle field is host byte order, need translate to 1545 * network byte order. 1546 */ 1547 if (is_nft) 1548 nfp_nft_ct_translate_mangle_action(new_act); 1549 1550 nfp_nft_ct_set_flow_flag(new_act, entry); 1551 /* Entunnel is a special case, need to allocate and copy 1552 * tunnel info. 1553 */ 1554 if (act->id == FLOW_ACTION_TUNNEL_ENCAP) { 1555 struct ip_tunnel_info *tun = act->tunnel; 1556 size_t tun_size = sizeof(*tun) + tun->options_len; 1557 1558 new_act->tunnel = kmemdup(tun, tun_size, GFP_ATOMIC); 1559 if (!new_act->tunnel) { 1560 err = -ENOMEM; 1561 goto err_pre_ct_tun_cp; 1562 } 1563 entry->tun_offset = i; 1564 } 1565 } 1566 1567 INIT_LIST_HEAD(&entry->children); 1568 1569 if (flow->cookie == 0) 1570 return entry; 1571 1572 /* Now add a ct map entry to flower-priv */ 1573 map = get_hashentry(&zt->priv->ct_map_table, &flow->cookie, 1574 nfp_ct_map_params, sizeof(*map)); 1575 if (IS_ERR(map)) { 1576 NL_SET_ERR_MSG_MOD(extack, 1577 "offload error: ct map entry creation failed"); 1578 err = -ENOMEM; 1579 goto err_ct_flow_insert; 1580 } 1581 map->cookie = flow->cookie; 1582 map->ct_entry = entry; 1583 err = rhashtable_insert_fast(&zt->priv->ct_map_table, 1584 &map->hash_node, 1585 nfp_ct_map_params); 1586 if (err) { 1587 NL_SET_ERR_MSG_MOD(extack, 1588 "offload error: ct map entry table add failed"); 1589 goto err_map_insert; 1590 } 1591 1592 return entry; 1593 1594 err_map_insert: 1595 kfree(map); 1596 err_ct_flow_insert: 1597 if (entry->tun_offset != NFP_FL_CT_NO_TUN) 1598 kfree(entry->rule->action.entries[entry->tun_offset].tunnel); 1599 err_pre_ct_tun_cp: 1600 kfree(nft_match); 1601 err_pre_ct_act: 1602 kfree(entry->rule); 1603 err_pre_ct_rule: 1604 kfree(entry); 1605 return ERR_PTR(err); 1606 } 1607 1608 static void cleanup_nft_merge_entry(struct nfp_fl_nft_tc_merge *m_entry) 1609 { 1610 struct nfp_fl_ct_zone_entry *zt; 1611 int err; 1612 1613 zt = m_entry->zt; 1614 1615 /* Flow is in HW, need to delete */ 1616 if (m_entry->tc_flower_cookie) { 1617 err = nfp_fl_ct_del_offload(zt->priv->app, m_entry->tc_flower_cookie, 1618 m_entry->netdev); 1619 if (err) 1620 return; 1621 } 1622 1623 WARN_ON_ONCE(rhashtable_remove_fast(&zt->nft_merge_tb, 1624 &m_entry->hash_node, 1625 nfp_nft_ct_merge_params)); 1626 zt->nft_merge_count--; 1627 list_del(&m_entry->tc_merge_list); 1628 list_del(&m_entry->nft_flow_list); 1629 1630 if (m_entry->next_pre_ct_entry) { 1631 struct nfp_fl_ct_map_entry pre_ct_map_ent; 1632 1633 pre_ct_map_ent.ct_entry = m_entry->next_pre_ct_entry; 1634 pre_ct_map_ent.cookie = 0; 1635 nfp_fl_ct_del_flow(&pre_ct_map_ent); 1636 } 1637 1638 kfree(m_entry); 1639 } 1640 1641 static void nfp_free_nft_merge_children(void *entry, bool is_nft_flow) 1642 { 1643 struct nfp_fl_nft_tc_merge *m_entry, *tmp; 1644 1645 /* These post entries are parts of two lists, one is a list of nft_entries 1646 * and the other is of from a list of tc_merge structures. Iterate 1647 * through the relevant list and cleanup the entries. 1648 */ 1649 1650 if (is_nft_flow) { 1651 /* Need to iterate through list of nft_flow entries */ 1652 struct nfp_fl_ct_flow_entry *ct_entry = entry; 1653 1654 list_for_each_entry_safe(m_entry, tmp, &ct_entry->children, 1655 nft_flow_list) { 1656 cleanup_nft_merge_entry(m_entry); 1657 } 1658 } else { 1659 /* Need to iterate through list of tc_merged_flow entries */ 1660 struct nfp_fl_ct_tc_merge *ct_entry = entry; 1661 1662 list_for_each_entry_safe(m_entry, tmp, &ct_entry->children, 1663 tc_merge_list) { 1664 cleanup_nft_merge_entry(m_entry); 1665 } 1666 } 1667 } 1668 1669 static void nfp_del_tc_merge_entry(struct nfp_fl_ct_tc_merge *m_ent) 1670 { 1671 struct nfp_fl_ct_zone_entry *zt; 1672 int err; 1673 1674 zt = m_ent->zt; 1675 err = rhashtable_remove_fast(&zt->tc_merge_tb, 1676 &m_ent->hash_node, 1677 nfp_tc_ct_merge_params); 1678 if (err) 1679 pr_warn("WARNING: could not remove merge_entry from hashtable\n"); 1680 zt->tc_merge_count--; 1681 list_del(&m_ent->post_ct_list); 1682 list_del(&m_ent->pre_ct_list); 1683 1684 if (!list_empty(&m_ent->children)) 1685 nfp_free_nft_merge_children(m_ent, false); 1686 kfree(m_ent); 1687 } 1688 1689 static void nfp_free_tc_merge_children(struct nfp_fl_ct_flow_entry *entry) 1690 { 1691 struct nfp_fl_ct_tc_merge *m_ent, *tmp; 1692 1693 switch (entry->type) { 1694 case CT_TYPE_PRE_CT: 1695 list_for_each_entry_safe(m_ent, tmp, &entry->children, pre_ct_list) { 1696 nfp_del_tc_merge_entry(m_ent); 1697 } 1698 break; 1699 case CT_TYPE_POST_CT: 1700 list_for_each_entry_safe(m_ent, tmp, &entry->children, post_ct_list) { 1701 nfp_del_tc_merge_entry(m_ent); 1702 } 1703 break; 1704 default: 1705 break; 1706 } 1707 } 1708 1709 void nfp_fl_ct_clean_flow_entry(struct nfp_fl_ct_flow_entry *entry) 1710 { 1711 list_del(&entry->list_node); 1712 1713 if (!list_empty(&entry->children)) { 1714 if (entry->type == CT_TYPE_NFT) 1715 nfp_free_nft_merge_children(entry, true); 1716 else 1717 nfp_free_tc_merge_children(entry); 1718 } 1719 1720 if (entry->tun_offset != NFP_FL_CT_NO_TUN) 1721 kfree(entry->rule->action.entries[entry->tun_offset].tunnel); 1722 1723 if (entry->type == CT_TYPE_NFT) { 1724 struct nf_flow_match *nft_match; 1725 1726 nft_match = container_of(entry->rule->match.dissector, 1727 struct nf_flow_match, dissector); 1728 kfree(nft_match); 1729 } 1730 1731 kfree(entry->rule); 1732 kfree(entry); 1733 } 1734 1735 static struct flow_action_entry *get_flow_act_ct(struct flow_rule *rule) 1736 { 1737 struct flow_action_entry *act; 1738 int i; 1739 1740 /* More than one ct action may be present in a flow rule, 1741 * Return the first one that is not a CT clear action 1742 */ 1743 flow_action_for_each(i, act, &rule->action) { 1744 if (act->id == FLOW_ACTION_CT && act->ct.action != TCA_CT_ACT_CLEAR) 1745 return act; 1746 } 1747 1748 return NULL; 1749 } 1750 1751 static struct flow_action_entry *get_flow_act(struct flow_rule *rule, 1752 enum flow_action_id act_id) 1753 { 1754 struct flow_action_entry *act = NULL; 1755 int i; 1756 1757 flow_action_for_each(i, act, &rule->action) { 1758 if (act->id == act_id) 1759 return act; 1760 } 1761 return NULL; 1762 } 1763 1764 static void 1765 nfp_ct_merge_tc_entries(struct nfp_fl_ct_flow_entry *ct_entry1, 1766 struct nfp_fl_ct_zone_entry *zt_src, 1767 struct nfp_fl_ct_zone_entry *zt_dst) 1768 { 1769 struct nfp_fl_ct_flow_entry *ct_entry2, *ct_tmp; 1770 struct list_head *ct_list; 1771 1772 if (ct_entry1->type == CT_TYPE_PRE_CT) 1773 ct_list = &zt_src->post_ct_list; 1774 else if (ct_entry1->type == CT_TYPE_POST_CT) 1775 ct_list = &zt_src->pre_ct_list; 1776 else 1777 return; 1778 1779 list_for_each_entry_safe(ct_entry2, ct_tmp, ct_list, 1780 list_node) { 1781 nfp_ct_do_tc_merge(zt_dst, ct_entry2, ct_entry1); 1782 } 1783 } 1784 1785 static void 1786 nfp_ct_merge_nft_with_tc(struct nfp_fl_ct_flow_entry *nft_entry, 1787 struct nfp_fl_ct_zone_entry *zt) 1788 { 1789 struct nfp_fl_ct_tc_merge *tc_merge_entry; 1790 struct rhashtable_iter iter; 1791 1792 rhashtable_walk_enter(&zt->tc_merge_tb, &iter); 1793 rhashtable_walk_start(&iter); 1794 while ((tc_merge_entry = rhashtable_walk_next(&iter)) != NULL) { 1795 if (IS_ERR(tc_merge_entry)) 1796 continue; 1797 rhashtable_walk_stop(&iter); 1798 nfp_ct_do_nft_merge(zt, nft_entry, tc_merge_entry); 1799 rhashtable_walk_start(&iter); 1800 } 1801 rhashtable_walk_stop(&iter); 1802 rhashtable_walk_exit(&iter); 1803 } 1804 1805 int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv, 1806 struct net_device *netdev, 1807 struct flow_cls_offload *flow, 1808 struct netlink_ext_ack *extack, 1809 struct nfp_fl_nft_tc_merge *m_entry) 1810 { 1811 struct flow_action_entry *ct_act, *ct_goto; 1812 struct nfp_fl_ct_flow_entry *ct_entry; 1813 struct nfp_fl_ct_zone_entry *zt; 1814 int err; 1815 1816 ct_act = get_flow_act_ct(flow->rule); 1817 if (!ct_act) { 1818 NL_SET_ERR_MSG_MOD(extack, 1819 "unsupported offload: Conntrack action empty in conntrack offload"); 1820 return -EOPNOTSUPP; 1821 } 1822 1823 ct_goto = get_flow_act(flow->rule, FLOW_ACTION_GOTO); 1824 if (!ct_goto) { 1825 NL_SET_ERR_MSG_MOD(extack, 1826 "unsupported offload: Conntrack requires ACTION_GOTO"); 1827 return -EOPNOTSUPP; 1828 } 1829 1830 zt = get_nfp_zone_entry(priv, ct_act->ct.zone, false); 1831 if (IS_ERR(zt)) { 1832 NL_SET_ERR_MSG_MOD(extack, 1833 "offload error: Could not create zone table entry"); 1834 return PTR_ERR(zt); 1835 } 1836 1837 if (!zt->nft) { 1838 zt->nft = ct_act->ct.flow_table; 1839 err = nf_flow_table_offload_add_cb(zt->nft, nfp_fl_ct_handle_nft_flow, zt); 1840 if (err) { 1841 NL_SET_ERR_MSG_MOD(extack, 1842 "offload error: Could not register nft_callback"); 1843 return err; 1844 } 1845 } 1846 1847 /* Add entry to pre_ct_list */ 1848 ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow, false, extack); 1849 if (IS_ERR(ct_entry)) 1850 return PTR_ERR(ct_entry); 1851 ct_entry->type = CT_TYPE_PRE_CT; 1852 ct_entry->chain_index = flow->common.chain_index; 1853 ct_entry->goto_chain_index = ct_goto->chain_index; 1854 1855 if (m_entry) { 1856 struct nfp_fl_ct_flow_entry *pre_ct_entry; 1857 int i; 1858 1859 pre_ct_entry = m_entry->tc_m_parent->pre_ct_parent; 1860 for (i = 0; i < pre_ct_entry->num_prev_m_entries; i++) 1861 ct_entry->prev_m_entries[i] = pre_ct_entry->prev_m_entries[i]; 1862 ct_entry->prev_m_entries[i++] = m_entry; 1863 ct_entry->num_prev_m_entries = i; 1864 1865 m_entry->next_pre_ct_entry = ct_entry; 1866 } 1867 1868 list_add(&ct_entry->list_node, &zt->pre_ct_list); 1869 zt->pre_ct_count++; 1870 1871 nfp_ct_merge_tc_entries(ct_entry, zt, zt); 1872 1873 /* Need to check and merge with tables in the wc_zone as well */ 1874 if (priv->ct_zone_wc) 1875 nfp_ct_merge_tc_entries(ct_entry, priv->ct_zone_wc, zt); 1876 1877 return 0; 1878 } 1879 1880 int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, 1881 struct net_device *netdev, 1882 struct flow_cls_offload *flow, 1883 struct netlink_ext_ack *extack) 1884 { 1885 struct flow_rule *rule = flow_cls_offload_flow_rule(flow); 1886 struct nfp_fl_ct_flow_entry *ct_entry; 1887 struct flow_action_entry *ct_goto; 1888 struct nfp_fl_ct_zone_entry *zt; 1889 struct flow_action_entry *act; 1890 bool wildcarded = false; 1891 struct flow_match_ct ct; 1892 int i; 1893 1894 flow_action_for_each(i, act, &rule->action) { 1895 switch (act->id) { 1896 case FLOW_ACTION_REDIRECT: 1897 case FLOW_ACTION_REDIRECT_INGRESS: 1898 case FLOW_ACTION_MIRRED: 1899 case FLOW_ACTION_MIRRED_INGRESS: 1900 if (act->dev->rtnl_link_ops && 1901 !strcmp(act->dev->rtnl_link_ops->kind, "openvswitch")) { 1902 NL_SET_ERR_MSG_MOD(extack, 1903 "unsupported offload: out port is openvswitch internal port"); 1904 return -EOPNOTSUPP; 1905 } 1906 break; 1907 default: 1908 break; 1909 } 1910 } 1911 1912 flow_rule_match_ct(rule, &ct); 1913 if (!ct.mask->ct_zone) { 1914 wildcarded = true; 1915 } else if (ct.mask->ct_zone != U16_MAX) { 1916 NL_SET_ERR_MSG_MOD(extack, 1917 "unsupported offload: partially wildcarded ct_zone is not supported"); 1918 return -EOPNOTSUPP; 1919 } 1920 1921 zt = get_nfp_zone_entry(priv, ct.key->ct_zone, wildcarded); 1922 if (IS_ERR(zt)) { 1923 NL_SET_ERR_MSG_MOD(extack, 1924 "offload error: Could not create zone table entry"); 1925 return PTR_ERR(zt); 1926 } 1927 1928 /* Add entry to post_ct_list */ 1929 ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow, false, extack); 1930 if (IS_ERR(ct_entry)) 1931 return PTR_ERR(ct_entry); 1932 1933 ct_entry->type = CT_TYPE_POST_CT; 1934 ct_entry->chain_index = flow->common.chain_index; 1935 ct_goto = get_flow_act(flow->rule, FLOW_ACTION_GOTO); 1936 ct_entry->goto_chain_index = ct_goto ? ct_goto->chain_index : 0; 1937 list_add(&ct_entry->list_node, &zt->post_ct_list); 1938 zt->post_ct_count++; 1939 1940 if (wildcarded) { 1941 /* Iterate through all zone tables if not empty, look for merges with 1942 * pre_ct entries and merge them. 1943 */ 1944 struct rhashtable_iter iter; 1945 struct nfp_fl_ct_zone_entry *zone_table; 1946 1947 rhashtable_walk_enter(&priv->ct_zone_table, &iter); 1948 rhashtable_walk_start(&iter); 1949 while ((zone_table = rhashtable_walk_next(&iter)) != NULL) { 1950 if (IS_ERR(zone_table)) 1951 continue; 1952 rhashtable_walk_stop(&iter); 1953 nfp_ct_merge_tc_entries(ct_entry, zone_table, zone_table); 1954 rhashtable_walk_start(&iter); 1955 } 1956 rhashtable_walk_stop(&iter); 1957 rhashtable_walk_exit(&iter); 1958 } else { 1959 nfp_ct_merge_tc_entries(ct_entry, zt, zt); 1960 } 1961 1962 return 0; 1963 } 1964 1965 int nfp_fl_create_new_pre_ct(struct nfp_fl_nft_tc_merge *m_entry) 1966 { 1967 struct nfp_fl_ct_flow_entry *pre_ct_entry, *post_ct_entry; 1968 struct flow_cls_offload new_pre_ct_flow; 1969 int err; 1970 1971 pre_ct_entry = m_entry->tc_m_parent->pre_ct_parent; 1972 if (pre_ct_entry->num_prev_m_entries >= NFP_MAX_RECIRC_CT_ZONES - 1) 1973 return -1; 1974 1975 post_ct_entry = m_entry->tc_m_parent->post_ct_parent; 1976 memset(&new_pre_ct_flow, 0, sizeof(struct flow_cls_offload)); 1977 new_pre_ct_flow.rule = post_ct_entry->rule; 1978 new_pre_ct_flow.common.chain_index = post_ct_entry->chain_index; 1979 1980 err = nfp_fl_ct_handle_pre_ct(pre_ct_entry->zt->priv, 1981 pre_ct_entry->netdev, 1982 &new_pre_ct_flow, NULL, 1983 m_entry); 1984 return err; 1985 } 1986 1987 static void 1988 nfp_fl_ct_sub_stats(struct nfp_fl_nft_tc_merge *nft_merge, 1989 enum ct_entry_type type, u64 *m_pkts, 1990 u64 *m_bytes, u64 *m_used) 1991 { 1992 struct nfp_flower_priv *priv = nft_merge->zt->priv; 1993 struct nfp_fl_payload *nfp_flow; 1994 u32 ctx_id; 1995 1996 nfp_flow = nft_merge->flow_pay; 1997 if (!nfp_flow) 1998 return; 1999 2000 ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id); 2001 *m_pkts += priv->stats[ctx_id].pkts; 2002 *m_bytes += priv->stats[ctx_id].bytes; 2003 *m_used = max_t(u64, *m_used, priv->stats[ctx_id].used); 2004 2005 /* If request is for a sub_flow which is part of a tunnel merged 2006 * flow then update stats from tunnel merged flows first. 2007 */ 2008 if (!list_empty(&nfp_flow->linked_flows)) 2009 nfp_flower_update_merge_stats(priv->app, nfp_flow); 2010 2011 if (type != CT_TYPE_NFT) { 2012 /* Update nft cached stats */ 2013 flow_stats_update(&nft_merge->nft_parent->stats, 2014 priv->stats[ctx_id].bytes, 2015 priv->stats[ctx_id].pkts, 2016 0, priv->stats[ctx_id].used, 2017 FLOW_ACTION_HW_STATS_DELAYED); 2018 } else { 2019 /* Update pre_ct cached stats */ 2020 flow_stats_update(&nft_merge->tc_m_parent->pre_ct_parent->stats, 2021 priv->stats[ctx_id].bytes, 2022 priv->stats[ctx_id].pkts, 2023 0, priv->stats[ctx_id].used, 2024 FLOW_ACTION_HW_STATS_DELAYED); 2025 /* Update post_ct cached stats */ 2026 flow_stats_update(&nft_merge->tc_m_parent->post_ct_parent->stats, 2027 priv->stats[ctx_id].bytes, 2028 priv->stats[ctx_id].pkts, 2029 0, priv->stats[ctx_id].used, 2030 FLOW_ACTION_HW_STATS_DELAYED); 2031 } 2032 2033 /* Update previous pre_ct/post_ct/nft flow stats */ 2034 if (nft_merge->tc_m_parent->pre_ct_parent->num_prev_m_entries > 0) { 2035 struct nfp_fl_nft_tc_merge *tmp_nft_merge; 2036 int i; 2037 2038 for (i = 0; i < nft_merge->tc_m_parent->pre_ct_parent->num_prev_m_entries; i++) { 2039 tmp_nft_merge = nft_merge->tc_m_parent->pre_ct_parent->prev_m_entries[i]; 2040 flow_stats_update(&tmp_nft_merge->tc_m_parent->pre_ct_parent->stats, 2041 priv->stats[ctx_id].bytes, 2042 priv->stats[ctx_id].pkts, 2043 0, priv->stats[ctx_id].used, 2044 FLOW_ACTION_HW_STATS_DELAYED); 2045 flow_stats_update(&tmp_nft_merge->tc_m_parent->post_ct_parent->stats, 2046 priv->stats[ctx_id].bytes, 2047 priv->stats[ctx_id].pkts, 2048 0, priv->stats[ctx_id].used, 2049 FLOW_ACTION_HW_STATS_DELAYED); 2050 flow_stats_update(&tmp_nft_merge->nft_parent->stats, 2051 priv->stats[ctx_id].bytes, 2052 priv->stats[ctx_id].pkts, 2053 0, priv->stats[ctx_id].used, 2054 FLOW_ACTION_HW_STATS_DELAYED); 2055 } 2056 } 2057 2058 /* Reset stats from the nfp */ 2059 priv->stats[ctx_id].pkts = 0; 2060 priv->stats[ctx_id].bytes = 0; 2061 } 2062 2063 int nfp_fl_ct_stats(struct flow_cls_offload *flow, 2064 struct nfp_fl_ct_map_entry *ct_map_ent) 2065 { 2066 struct nfp_fl_ct_flow_entry *ct_entry = ct_map_ent->ct_entry; 2067 struct nfp_fl_nft_tc_merge *nft_merge, *nft_m_tmp; 2068 struct nfp_fl_ct_tc_merge *tc_merge, *tc_m_tmp; 2069 2070 u64 pkts = 0, bytes = 0, used = 0; 2071 u64 m_pkts, m_bytes, m_used; 2072 2073 spin_lock_bh(&ct_entry->zt->priv->stats_lock); 2074 2075 if (ct_entry->type == CT_TYPE_PRE_CT) { 2076 /* Iterate tc_merge entries associated with this flow */ 2077 list_for_each_entry_safe(tc_merge, tc_m_tmp, &ct_entry->children, 2078 pre_ct_list) { 2079 m_pkts = 0; 2080 m_bytes = 0; 2081 m_used = 0; 2082 /* Iterate nft_merge entries associated with this tc_merge flow */ 2083 list_for_each_entry_safe(nft_merge, nft_m_tmp, &tc_merge->children, 2084 tc_merge_list) { 2085 nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_PRE_CT, 2086 &m_pkts, &m_bytes, &m_used); 2087 } 2088 pkts += m_pkts; 2089 bytes += m_bytes; 2090 used = max_t(u64, used, m_used); 2091 /* Update post_ct partner */ 2092 flow_stats_update(&tc_merge->post_ct_parent->stats, 2093 m_bytes, m_pkts, 0, m_used, 2094 FLOW_ACTION_HW_STATS_DELAYED); 2095 } 2096 } else if (ct_entry->type == CT_TYPE_POST_CT) { 2097 /* Iterate tc_merge entries associated with this flow */ 2098 list_for_each_entry_safe(tc_merge, tc_m_tmp, &ct_entry->children, 2099 post_ct_list) { 2100 m_pkts = 0; 2101 m_bytes = 0; 2102 m_used = 0; 2103 /* Iterate nft_merge entries associated with this tc_merge flow */ 2104 list_for_each_entry_safe(nft_merge, nft_m_tmp, &tc_merge->children, 2105 tc_merge_list) { 2106 nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_POST_CT, 2107 &m_pkts, &m_bytes, &m_used); 2108 } 2109 pkts += m_pkts; 2110 bytes += m_bytes; 2111 used = max_t(u64, used, m_used); 2112 /* Update pre_ct partner */ 2113 flow_stats_update(&tc_merge->pre_ct_parent->stats, 2114 m_bytes, m_pkts, 0, m_used, 2115 FLOW_ACTION_HW_STATS_DELAYED); 2116 } 2117 } else { 2118 /* Iterate nft_merge entries associated with this nft flow */ 2119 list_for_each_entry_safe(nft_merge, nft_m_tmp, &ct_entry->children, 2120 nft_flow_list) { 2121 nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_NFT, 2122 &pkts, &bytes, &used); 2123 } 2124 } 2125 2126 /* Add stats from this request to stats potentially cached by 2127 * previous requests. 2128 */ 2129 flow_stats_update(&ct_entry->stats, bytes, pkts, 0, used, 2130 FLOW_ACTION_HW_STATS_DELAYED); 2131 /* Finally update the flow stats from the original stats request */ 2132 flow_stats_update(&flow->stats, ct_entry->stats.bytes, 2133 ct_entry->stats.pkts, 0, 2134 ct_entry->stats.lastused, 2135 FLOW_ACTION_HW_STATS_DELAYED); 2136 /* Stats has been synced to original flow, can now clear 2137 * the cache. 2138 */ 2139 ct_entry->stats.pkts = 0; 2140 ct_entry->stats.bytes = 0; 2141 spin_unlock_bh(&ct_entry->zt->priv->stats_lock); 2142 2143 return 0; 2144 } 2145 2146 static bool 2147 nfp_fl_ct_offload_nft_supported(struct flow_cls_offload *flow) 2148 { 2149 struct flow_rule *flow_rule = flow->rule; 2150 struct flow_action *flow_action = 2151 &flow_rule->action; 2152 struct flow_action_entry *act; 2153 int i; 2154 2155 flow_action_for_each(i, act, flow_action) { 2156 if (act->id == FLOW_ACTION_CT_METADATA) { 2157 enum ip_conntrack_info ctinfo = 2158 act->ct_metadata.cookie & NFCT_INFOMASK; 2159 2160 return ctinfo != IP_CT_NEW; 2161 } 2162 } 2163 2164 return false; 2165 } 2166 2167 static int 2168 nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offload *flow) 2169 { 2170 struct nfp_fl_ct_map_entry *ct_map_ent; 2171 struct nfp_fl_ct_flow_entry *ct_entry; 2172 struct netlink_ext_ack *extack = NULL; 2173 2174 extack = flow->common.extack; 2175 switch (flow->command) { 2176 case FLOW_CLS_REPLACE: 2177 if (!nfp_fl_ct_offload_nft_supported(flow)) 2178 return -EOPNOTSUPP; 2179 2180 /* Netfilter can request offload multiple times for the same 2181 * flow - protect against adding duplicates. 2182 */ 2183 ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie, 2184 nfp_ct_map_params); 2185 if (!ct_map_ent) { 2186 ct_entry = nfp_fl_ct_add_flow(zt, NULL, flow, true, extack); 2187 if (IS_ERR(ct_entry)) 2188 return PTR_ERR(ct_entry); 2189 ct_entry->type = CT_TYPE_NFT; 2190 list_add(&ct_entry->list_node, &zt->nft_flows_list); 2191 zt->nft_flows_count++; 2192 nfp_ct_merge_nft_with_tc(ct_entry, zt); 2193 } 2194 return 0; 2195 case FLOW_CLS_DESTROY: 2196 ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie, 2197 nfp_ct_map_params); 2198 return nfp_fl_ct_del_flow(ct_map_ent); 2199 case FLOW_CLS_STATS: 2200 ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie, 2201 nfp_ct_map_params); 2202 if (ct_map_ent) 2203 return nfp_fl_ct_stats(flow, ct_map_ent); 2204 break; 2205 default: 2206 break; 2207 } 2208 return -EINVAL; 2209 } 2210 2211 int nfp_fl_ct_handle_nft_flow(enum tc_setup_type type, void *type_data, void *cb_priv) 2212 { 2213 struct flow_cls_offload *flow = type_data; 2214 struct nfp_fl_ct_zone_entry *zt = cb_priv; 2215 int err = -EOPNOTSUPP; 2216 2217 switch (type) { 2218 case TC_SETUP_CLSFLOWER: 2219 while (!mutex_trylock(&zt->priv->nfp_fl_lock)) { 2220 if (!zt->nft) /* avoid deadlock */ 2221 return err; 2222 msleep(20); 2223 } 2224 err = nfp_fl_ct_offload_nft_flow(zt, flow); 2225 mutex_unlock(&zt->priv->nfp_fl_lock); 2226 break; 2227 default: 2228 return -EOPNOTSUPP; 2229 } 2230 return err; 2231 } 2232 2233 static void 2234 nfp_fl_ct_clean_nft_entries(struct nfp_fl_ct_zone_entry *zt) 2235 { 2236 struct nfp_fl_ct_flow_entry *nft_entry, *ct_tmp; 2237 struct nfp_fl_ct_map_entry *ct_map_ent; 2238 2239 list_for_each_entry_safe(nft_entry, ct_tmp, &zt->nft_flows_list, 2240 list_node) { 2241 ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, 2242 &nft_entry->cookie, 2243 nfp_ct_map_params); 2244 nfp_fl_ct_del_flow(ct_map_ent); 2245 } 2246 } 2247 2248 int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent) 2249 { 2250 struct nfp_fl_ct_flow_entry *ct_entry; 2251 struct nfp_fl_ct_zone_entry *zt; 2252 struct rhashtable *m_table; 2253 struct nf_flowtable *nft; 2254 2255 if (!ct_map_ent) 2256 return -ENOENT; 2257 2258 zt = ct_map_ent->ct_entry->zt; 2259 ct_entry = ct_map_ent->ct_entry; 2260 m_table = &zt->priv->ct_map_table; 2261 2262 switch (ct_entry->type) { 2263 case CT_TYPE_PRE_CT: 2264 zt->pre_ct_count--; 2265 if (ct_map_ent->cookie > 0) 2266 rhashtable_remove_fast(m_table, &ct_map_ent->hash_node, 2267 nfp_ct_map_params); 2268 nfp_fl_ct_clean_flow_entry(ct_entry); 2269 if (ct_map_ent->cookie > 0) 2270 kfree(ct_map_ent); 2271 2272 if (!zt->pre_ct_count && zt->nft) { 2273 nft = zt->nft; 2274 zt->nft = NULL; /* avoid deadlock */ 2275 nf_flow_table_offload_del_cb(nft, 2276 nfp_fl_ct_handle_nft_flow, 2277 zt); 2278 nfp_fl_ct_clean_nft_entries(zt); 2279 } 2280 break; 2281 case CT_TYPE_POST_CT: 2282 zt->post_ct_count--; 2283 rhashtable_remove_fast(m_table, &ct_map_ent->hash_node, 2284 nfp_ct_map_params); 2285 nfp_fl_ct_clean_flow_entry(ct_entry); 2286 kfree(ct_map_ent); 2287 break; 2288 case CT_TYPE_NFT: 2289 zt->nft_flows_count--; 2290 rhashtable_remove_fast(m_table, &ct_map_ent->hash_node, 2291 nfp_ct_map_params); 2292 nfp_fl_ct_clean_flow_entry(ct_map_ent->ct_entry); 2293 kfree(ct_map_ent); 2294 break; 2295 default: 2296 break; 2297 } 2298 2299 return 0; 2300 } 2301