1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* - 3 * net/sched/act_ct.c Connection Tracking action 4 * 5 * Authors: Paul Blakey <paulb@mellanox.com> 6 * Yossi Kuperman <yossiku@mellanox.com> 7 * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> 8 */ 9 10 #include <linux/module.h> 11 #include <linux/init.h> 12 #include <linux/kernel.h> 13 #include <linux/skbuff.h> 14 #include <linux/rtnetlink.h> 15 #include <linux/pkt_cls.h> 16 #include <linux/ip.h> 17 #include <linux/ipv6.h> 18 #include <net/netlink.h> 19 #include <net/pkt_sched.h> 20 #include <net/pkt_cls.h> 21 #include <net/act_api.h> 22 #include <net/ip.h> 23 #include <net/ipv6_frag.h> 24 #include <uapi/linux/tc_act/tc_ct.h> 25 #include <net/tc_act/tc_ct.h> 26 27 #include <net/netfilter/nf_conntrack.h> 28 #include <net/netfilter/nf_conntrack_core.h> 29 #include <net/netfilter/nf_conntrack_zones.h> 30 #include <net/netfilter/nf_conntrack_helper.h> 31 #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 32 #include <uapi/linux/netfilter/nf_nat.h> 33 34 static struct tc_action_ops act_ct_ops; 35 static unsigned int ct_net_id; 36 37 struct tc_ct_action_net { 38 struct tc_action_net tn; /* Must be first */ 39 bool labels; 40 }; 41 42 /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */ 43 static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb, 44 u16 zone_id, bool force) 45 { 46 enum ip_conntrack_info ctinfo; 47 struct nf_conn *ct; 48 49 ct = nf_ct_get(skb, &ctinfo); 50 if (!ct) 51 return false; 52 if (!net_eq(net, read_pnet(&ct->ct_net))) 53 return false; 54 if (nf_ct_zone(ct)->id != zone_id) 55 return false; 56 57 /* Force conntrack entry direction. */ 58 if (force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { 59 if (nf_ct_is_confirmed(ct)) 60 nf_ct_kill(ct); 61 62 nf_conntrack_put(&ct->ct_general); 63 nf_ct_set(skb, NULL, IP_CT_UNTRACKED); 64 65 return false; 66 } 67 68 return true; 69 } 70 71 /* Trim the skb to the length specified by the IP/IPv6 header, 72 * removing any trailing lower-layer padding. This prepares the skb 73 * for higher-layer processing that assumes skb->len excludes padding 74 * (such as nf_ip_checksum). The caller needs to pull the skb to the 75 * network header, and ensure ip_hdr/ipv6_hdr points to valid data. 76 */ 77 static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family) 78 { 79 unsigned int len; 80 int err; 81 82 switch (family) { 83 case NFPROTO_IPV4: 84 len = ntohs(ip_hdr(skb)->tot_len); 85 break; 86 case NFPROTO_IPV6: 87 len = sizeof(struct ipv6hdr) 88 + ntohs(ipv6_hdr(skb)->payload_len); 89 break; 90 default: 91 len = skb->len; 92 } 93 94 err = pskb_trim_rcsum(skb, len); 95 96 return err; 97 } 98 99 static u8 tcf_ct_skb_nf_family(struct sk_buff *skb) 100 { 101 u8 family = NFPROTO_UNSPEC; 102 103 switch (skb->protocol) { 104 case htons(ETH_P_IP): 105 family = NFPROTO_IPV4; 106 break; 107 case htons(ETH_P_IPV6): 108 family = NFPROTO_IPV6; 109 break; 110 default: 111 break; 112 } 113 114 return family; 115 } 116 117 static int tcf_ct_ipv4_is_fragment(struct sk_buff *skb, bool *frag) 118 { 119 unsigned int len; 120 121 len = skb_network_offset(skb) + sizeof(struct iphdr); 122 if (unlikely(skb->len < len)) 123 return -EINVAL; 124 if (unlikely(!pskb_may_pull(skb, len))) 125 return -ENOMEM; 126 127 *frag = ip_is_fragment(ip_hdr(skb)); 128 return 0; 129 } 130 131 static int tcf_ct_ipv6_is_fragment(struct sk_buff *skb, bool *frag) 132 { 133 unsigned int flags = 0, len, payload_ofs = 0; 134 unsigned short frag_off; 135 int nexthdr; 136 137 len = skb_network_offset(skb) + sizeof(struct ipv6hdr); 138 if (unlikely(skb->len < len)) 139 return -EINVAL; 140 if (unlikely(!pskb_may_pull(skb, len))) 141 return -ENOMEM; 142 143 nexthdr = ipv6_find_hdr(skb, &payload_ofs, -1, &frag_off, &flags); 144 if (unlikely(nexthdr < 0)) 145 return -EPROTO; 146 147 *frag = flags & IP6_FH_F_FRAG; 148 return 0; 149 } 150 151 static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, 152 u8 family, u16 zone) 153 { 154 enum ip_conntrack_info ctinfo; 155 struct nf_conn *ct; 156 int err = 0; 157 bool frag; 158 159 /* Previously seen (loopback)? Ignore. */ 160 ct = nf_ct_get(skb, &ctinfo); 161 if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED) 162 return 0; 163 164 if (family == NFPROTO_IPV4) 165 err = tcf_ct_ipv4_is_fragment(skb, &frag); 166 else 167 err = tcf_ct_ipv6_is_fragment(skb, &frag); 168 if (err || !frag) 169 return err; 170 171 skb_get(skb); 172 173 if (family == NFPROTO_IPV4) { 174 enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; 175 176 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 177 local_bh_disable(); 178 err = ip_defrag(net, skb, user); 179 local_bh_enable(); 180 if (err && err != -EINPROGRESS) 181 goto out_free; 182 } else { /* NFPROTO_IPV6 */ 183 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 184 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; 185 186 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); 187 err = nf_ct_frag6_gather(net, skb, user); 188 if (err && err != -EINPROGRESS) 189 goto out_free; 190 #else 191 err = -EOPNOTSUPP; 192 goto out_free; 193 #endif 194 } 195 196 skb_clear_hash(skb); 197 skb->ignore_df = 1; 198 return err; 199 200 out_free: 201 kfree_skb(skb); 202 return err; 203 } 204 205 static void tcf_ct_params_free(struct rcu_head *head) 206 { 207 struct tcf_ct_params *params = container_of(head, 208 struct tcf_ct_params, rcu); 209 210 if (params->tmpl) 211 nf_conntrack_put(¶ms->tmpl->ct_general); 212 kfree(params); 213 } 214 215 #if IS_ENABLED(CONFIG_NF_NAT) 216 /* Modelled after nf_nat_ipv[46]_fn(). 217 * range is only used for new, uninitialized NAT state. 218 * Returns either NF_ACCEPT or NF_DROP. 219 */ 220 static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, 221 enum ip_conntrack_info ctinfo, 222 const struct nf_nat_range2 *range, 223 enum nf_nat_manip_type maniptype) 224 { 225 int hooknum, err = NF_ACCEPT; 226 227 /* See HOOK2MANIP(). */ 228 if (maniptype == NF_NAT_MANIP_SRC) 229 hooknum = NF_INET_LOCAL_IN; /* Source NAT */ 230 else 231 hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */ 232 233 switch (ctinfo) { 234 case IP_CT_RELATED: 235 case IP_CT_RELATED_REPLY: 236 if (skb->protocol == htons(ETH_P_IP) && 237 ip_hdr(skb)->protocol == IPPROTO_ICMP) { 238 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, 239 hooknum)) 240 err = NF_DROP; 241 goto out; 242 } else if (IS_ENABLED(CONFIG_IPV6) && 243 skb->protocol == htons(ETH_P_IPV6)) { 244 __be16 frag_off; 245 u8 nexthdr = ipv6_hdr(skb)->nexthdr; 246 int hdrlen = ipv6_skip_exthdr(skb, 247 sizeof(struct ipv6hdr), 248 &nexthdr, &frag_off); 249 250 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { 251 if (!nf_nat_icmpv6_reply_translation(skb, ct, 252 ctinfo, 253 hooknum, 254 hdrlen)) 255 err = NF_DROP; 256 goto out; 257 } 258 } 259 /* Non-ICMP, fall thru to initialize if needed. */ 260 /* fall through */ 261 case IP_CT_NEW: 262 /* Seen it before? This can happen for loopback, retrans, 263 * or local packets. 264 */ 265 if (!nf_nat_initialized(ct, maniptype)) { 266 /* Initialize according to the NAT action. */ 267 err = (range && range->flags & NF_NAT_RANGE_MAP_IPS) 268 /* Action is set up to establish a new 269 * mapping. 270 */ 271 ? nf_nat_setup_info(ct, range, maniptype) 272 : nf_nat_alloc_null_binding(ct, hooknum); 273 if (err != NF_ACCEPT) 274 goto out; 275 } 276 break; 277 278 case IP_CT_ESTABLISHED: 279 case IP_CT_ESTABLISHED_REPLY: 280 break; 281 282 default: 283 err = NF_DROP; 284 goto out; 285 } 286 287 err = nf_nat_packet(ct, ctinfo, hooknum, skb); 288 out: 289 return err; 290 } 291 #endif /* CONFIG_NF_NAT */ 292 293 static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask) 294 { 295 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 296 u32 new_mark; 297 298 if (!mask) 299 return; 300 301 new_mark = mark | (ct->mark & ~(mask)); 302 if (ct->mark != new_mark) { 303 ct->mark = new_mark; 304 if (nf_ct_is_confirmed(ct)) 305 nf_conntrack_event_cache(IPCT_MARK, ct); 306 } 307 #endif 308 } 309 310 static void tcf_ct_act_set_labels(struct nf_conn *ct, 311 u32 *labels, 312 u32 *labels_m) 313 { 314 #if IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) 315 size_t labels_sz = sizeof_field(struct tcf_ct_params, labels); 316 317 if (!memchr_inv(labels_m, 0, labels_sz)) 318 return; 319 320 nf_connlabels_replace(ct, labels, labels_m, 4); 321 #endif 322 } 323 324 static int tcf_ct_act_nat(struct sk_buff *skb, 325 struct nf_conn *ct, 326 enum ip_conntrack_info ctinfo, 327 int ct_action, 328 struct nf_nat_range2 *range, 329 bool commit) 330 { 331 #if IS_ENABLED(CONFIG_NF_NAT) 332 int err; 333 enum nf_nat_manip_type maniptype; 334 335 if (!(ct_action & TCA_CT_ACT_NAT)) 336 return NF_ACCEPT; 337 338 /* Add NAT extension if not confirmed yet. */ 339 if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct)) 340 return NF_DROP; /* Can't NAT. */ 341 342 if (ctinfo != IP_CT_NEW && (ct->status & IPS_NAT_MASK) && 343 (ctinfo != IP_CT_RELATED || commit)) { 344 /* NAT an established or related connection like before. */ 345 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) 346 /* This is the REPLY direction for a connection 347 * for which NAT was applied in the forward 348 * direction. Do the reverse NAT. 349 */ 350 maniptype = ct->status & IPS_SRC_NAT 351 ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC; 352 else 353 maniptype = ct->status & IPS_SRC_NAT 354 ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST; 355 } else if (ct_action & TCA_CT_ACT_NAT_SRC) { 356 maniptype = NF_NAT_MANIP_SRC; 357 } else if (ct_action & TCA_CT_ACT_NAT_DST) { 358 maniptype = NF_NAT_MANIP_DST; 359 } else { 360 return NF_ACCEPT; 361 } 362 363 err = ct_nat_execute(skb, ct, ctinfo, range, maniptype); 364 if (err == NF_ACCEPT && 365 ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) { 366 if (maniptype == NF_NAT_MANIP_SRC) 367 maniptype = NF_NAT_MANIP_DST; 368 else 369 maniptype = NF_NAT_MANIP_SRC; 370 371 err = ct_nat_execute(skb, ct, ctinfo, range, maniptype); 372 } 373 return err; 374 #else 375 return NF_ACCEPT; 376 #endif 377 } 378 379 static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, 380 struct tcf_result *res) 381 { 382 struct net *net = dev_net(skb->dev); 383 bool cached, commit, clear, force; 384 enum ip_conntrack_info ctinfo; 385 struct tcf_ct *c = to_ct(a); 386 struct nf_conn *tmpl = NULL; 387 struct nf_hook_state state; 388 int nh_ofs, err, retval; 389 struct tcf_ct_params *p; 390 struct nf_conn *ct; 391 u8 family; 392 393 p = rcu_dereference_bh(c->params); 394 395 retval = READ_ONCE(c->tcf_action); 396 commit = p->ct_action & TCA_CT_ACT_COMMIT; 397 clear = p->ct_action & TCA_CT_ACT_CLEAR; 398 force = p->ct_action & TCA_CT_ACT_FORCE; 399 tmpl = p->tmpl; 400 401 if (clear) { 402 ct = nf_ct_get(skb, &ctinfo); 403 if (ct) { 404 nf_conntrack_put(&ct->ct_general); 405 nf_ct_set(skb, NULL, IP_CT_UNTRACKED); 406 } 407 408 goto out; 409 } 410 411 family = tcf_ct_skb_nf_family(skb); 412 if (family == NFPROTO_UNSPEC) 413 goto drop; 414 415 /* The conntrack module expects to be working at L3. 416 * We also try to pull the IPv4/6 header to linear area 417 */ 418 nh_ofs = skb_network_offset(skb); 419 skb_pull_rcsum(skb, nh_ofs); 420 err = tcf_ct_handle_fragments(net, skb, family, p->zone); 421 if (err == -EINPROGRESS) { 422 retval = TC_ACT_STOLEN; 423 goto out; 424 } 425 if (err) 426 goto drop; 427 428 err = tcf_ct_skb_network_trim(skb, family); 429 if (err) 430 goto drop; 431 432 /* If we are recirculating packets to match on ct fields and 433 * committing with a separate ct action, then we don't need to 434 * actually run the packet through conntrack twice unless it's for a 435 * different zone. 436 */ 437 cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force); 438 if (!cached) { 439 /* Associate skb with specified zone. */ 440 if (tmpl) { 441 ct = nf_ct_get(skb, &ctinfo); 442 if (skb_nfct(skb)) 443 nf_conntrack_put(skb_nfct(skb)); 444 nf_conntrack_get(&tmpl->ct_general); 445 nf_ct_set(skb, tmpl, IP_CT_NEW); 446 } 447 448 state.hook = NF_INET_PRE_ROUTING; 449 state.net = net; 450 state.pf = family; 451 err = nf_conntrack_in(skb, &state); 452 if (err != NF_ACCEPT) 453 goto out_push; 454 } 455 456 ct = nf_ct_get(skb, &ctinfo); 457 if (!ct) 458 goto out_push; 459 nf_ct_deliver_cached_events(ct); 460 461 err = tcf_ct_act_nat(skb, ct, ctinfo, p->ct_action, &p->range, commit); 462 if (err != NF_ACCEPT) 463 goto drop; 464 465 if (commit) { 466 tcf_ct_act_set_mark(ct, p->mark, p->mark_mask); 467 tcf_ct_act_set_labels(ct, p->labels, p->labels_mask); 468 469 /* This will take care of sending queued events 470 * even if the connection is already confirmed. 471 */ 472 nf_conntrack_confirm(skb); 473 } 474 475 out_push: 476 skb_push_rcsum(skb, nh_ofs); 477 478 out: 479 tcf_action_update_bstats(&c->common, skb); 480 return retval; 481 482 drop: 483 tcf_action_inc_drop_qstats(&c->common); 484 return TC_ACT_SHOT; 485 } 486 487 static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = { 488 [TCA_CT_ACTION] = { .type = NLA_U16 }, 489 [TCA_CT_PARMS] = { .type = NLA_EXACT_LEN, .len = sizeof(struct tc_ct) }, 490 [TCA_CT_ZONE] = { .type = NLA_U16 }, 491 [TCA_CT_MARK] = { .type = NLA_U32 }, 492 [TCA_CT_MARK_MASK] = { .type = NLA_U32 }, 493 [TCA_CT_LABELS] = { .type = NLA_BINARY, 494 .len = 128 / BITS_PER_BYTE }, 495 [TCA_CT_LABELS_MASK] = { .type = NLA_BINARY, 496 .len = 128 / BITS_PER_BYTE }, 497 [TCA_CT_NAT_IPV4_MIN] = { .type = NLA_U32 }, 498 [TCA_CT_NAT_IPV4_MAX] = { .type = NLA_U32 }, 499 [TCA_CT_NAT_IPV6_MIN] = { .type = NLA_EXACT_LEN, 500 .len = sizeof(struct in6_addr) }, 501 [TCA_CT_NAT_IPV6_MAX] = { .type = NLA_EXACT_LEN, 502 .len = sizeof(struct in6_addr) }, 503 [TCA_CT_NAT_PORT_MIN] = { .type = NLA_U16 }, 504 [TCA_CT_NAT_PORT_MAX] = { .type = NLA_U16 }, 505 }; 506 507 static int tcf_ct_fill_params_nat(struct tcf_ct_params *p, 508 struct tc_ct *parm, 509 struct nlattr **tb, 510 struct netlink_ext_ack *extack) 511 { 512 struct nf_nat_range2 *range; 513 514 if (!(p->ct_action & TCA_CT_ACT_NAT)) 515 return 0; 516 517 if (!IS_ENABLED(CONFIG_NF_NAT)) { 518 NL_SET_ERR_MSG_MOD(extack, "Netfilter nat isn't enabled in kernel"); 519 return -EOPNOTSUPP; 520 } 521 522 if (!(p->ct_action & (TCA_CT_ACT_NAT_SRC | TCA_CT_ACT_NAT_DST))) 523 return 0; 524 525 if ((p->ct_action & TCA_CT_ACT_NAT_SRC) && 526 (p->ct_action & TCA_CT_ACT_NAT_DST)) { 527 NL_SET_ERR_MSG_MOD(extack, "dnat and snat can't be enabled at the same time"); 528 return -EOPNOTSUPP; 529 } 530 531 range = &p->range; 532 if (tb[TCA_CT_NAT_IPV4_MIN]) { 533 struct nlattr *max_attr = tb[TCA_CT_NAT_IPV4_MAX]; 534 535 p->ipv4_range = true; 536 range->flags |= NF_NAT_RANGE_MAP_IPS; 537 range->min_addr.ip = 538 nla_get_in_addr(tb[TCA_CT_NAT_IPV4_MIN]); 539 540 range->max_addr.ip = max_attr ? 541 nla_get_in_addr(max_attr) : 542 range->min_addr.ip; 543 } else if (tb[TCA_CT_NAT_IPV6_MIN]) { 544 struct nlattr *max_attr = tb[TCA_CT_NAT_IPV6_MAX]; 545 546 p->ipv4_range = false; 547 range->flags |= NF_NAT_RANGE_MAP_IPS; 548 range->min_addr.in6 = 549 nla_get_in6_addr(tb[TCA_CT_NAT_IPV6_MIN]); 550 551 range->max_addr.in6 = max_attr ? 552 nla_get_in6_addr(max_attr) : 553 range->min_addr.in6; 554 } 555 556 if (tb[TCA_CT_NAT_PORT_MIN]) { 557 range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; 558 range->min_proto.all = nla_get_be16(tb[TCA_CT_NAT_PORT_MIN]); 559 560 range->max_proto.all = tb[TCA_CT_NAT_PORT_MAX] ? 561 nla_get_be16(tb[TCA_CT_NAT_PORT_MAX]) : 562 range->min_proto.all; 563 } 564 565 return 0; 566 } 567 568 static void tcf_ct_set_key_val(struct nlattr **tb, 569 void *val, int val_type, 570 void *mask, int mask_type, 571 int len) 572 { 573 if (!tb[val_type]) 574 return; 575 nla_memcpy(val, tb[val_type], len); 576 577 if (!mask) 578 return; 579 580 if (mask_type == TCA_CT_UNSPEC || !tb[mask_type]) 581 memset(mask, 0xff, len); 582 else 583 nla_memcpy(mask, tb[mask_type], len); 584 } 585 586 static int tcf_ct_fill_params(struct net *net, 587 struct tcf_ct_params *p, 588 struct tc_ct *parm, 589 struct nlattr **tb, 590 struct netlink_ext_ack *extack) 591 { 592 struct tc_ct_action_net *tn = net_generic(net, ct_net_id); 593 struct nf_conntrack_zone zone; 594 struct nf_conn *tmpl; 595 int err; 596 597 p->zone = NF_CT_DEFAULT_ZONE_ID; 598 599 tcf_ct_set_key_val(tb, 600 &p->ct_action, TCA_CT_ACTION, 601 NULL, TCA_CT_UNSPEC, 602 sizeof(p->ct_action)); 603 604 if (p->ct_action & TCA_CT_ACT_CLEAR) 605 return 0; 606 607 err = tcf_ct_fill_params_nat(p, parm, tb, extack); 608 if (err) 609 return err; 610 611 if (tb[TCA_CT_MARK]) { 612 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)) { 613 NL_SET_ERR_MSG_MOD(extack, "Conntrack mark isn't enabled."); 614 return -EOPNOTSUPP; 615 } 616 tcf_ct_set_key_val(tb, 617 &p->mark, TCA_CT_MARK, 618 &p->mark_mask, TCA_CT_MARK_MASK, 619 sizeof(p->mark)); 620 } 621 622 if (tb[TCA_CT_LABELS]) { 623 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) { 624 NL_SET_ERR_MSG_MOD(extack, "Conntrack labels isn't enabled."); 625 return -EOPNOTSUPP; 626 } 627 628 if (!tn->labels) { 629 NL_SET_ERR_MSG_MOD(extack, "Failed to set connlabel length"); 630 return -EOPNOTSUPP; 631 } 632 tcf_ct_set_key_val(tb, 633 p->labels, TCA_CT_LABELS, 634 p->labels_mask, TCA_CT_LABELS_MASK, 635 sizeof(p->labels)); 636 } 637 638 if (tb[TCA_CT_ZONE]) { 639 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)) { 640 NL_SET_ERR_MSG_MOD(extack, "Conntrack zones isn't enabled."); 641 return -EOPNOTSUPP; 642 } 643 644 tcf_ct_set_key_val(tb, 645 &p->zone, TCA_CT_ZONE, 646 NULL, TCA_CT_UNSPEC, 647 sizeof(p->zone)); 648 } 649 650 if (p->zone == NF_CT_DEFAULT_ZONE_ID) 651 return 0; 652 653 nf_ct_zone_init(&zone, p->zone, NF_CT_DEFAULT_ZONE_DIR, 0); 654 tmpl = nf_ct_tmpl_alloc(net, &zone, GFP_KERNEL); 655 if (!tmpl) { 656 NL_SET_ERR_MSG_MOD(extack, "Failed to allocate conntrack template"); 657 return -ENOMEM; 658 } 659 __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); 660 nf_conntrack_get(&tmpl->ct_general); 661 p->tmpl = tmpl; 662 663 return 0; 664 } 665 666 static int tcf_ct_init(struct net *net, struct nlattr *nla, 667 struct nlattr *est, struct tc_action **a, 668 int replace, int bind, bool rtnl_held, 669 struct tcf_proto *tp, u32 flags, 670 struct netlink_ext_ack *extack) 671 { 672 struct tc_action_net *tn = net_generic(net, ct_net_id); 673 struct tcf_ct_params *params = NULL; 674 struct nlattr *tb[TCA_CT_MAX + 1]; 675 struct tcf_chain *goto_ch = NULL; 676 struct tc_ct *parm; 677 struct tcf_ct *c; 678 int err, res = 0; 679 u32 index; 680 681 if (!nla) { 682 NL_SET_ERR_MSG_MOD(extack, "Ct requires attributes to be passed"); 683 return -EINVAL; 684 } 685 686 err = nla_parse_nested(tb, TCA_CT_MAX, nla, ct_policy, extack); 687 if (err < 0) 688 return err; 689 690 if (!tb[TCA_CT_PARMS]) { 691 NL_SET_ERR_MSG_MOD(extack, "Missing required ct parameters"); 692 return -EINVAL; 693 } 694 parm = nla_data(tb[TCA_CT_PARMS]); 695 index = parm->index; 696 err = tcf_idr_check_alloc(tn, &index, a, bind); 697 if (err < 0) 698 return err; 699 700 if (!err) { 701 err = tcf_idr_create_from_flags(tn, index, est, a, 702 &act_ct_ops, bind, flags); 703 if (err) { 704 tcf_idr_cleanup(tn, index); 705 return err; 706 } 707 res = ACT_P_CREATED; 708 } else { 709 if (bind) 710 return 0; 711 712 if (!replace) { 713 tcf_idr_release(*a, bind); 714 return -EEXIST; 715 } 716 } 717 err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); 718 if (err < 0) 719 goto cleanup; 720 721 c = to_ct(*a); 722 723 params = kzalloc(sizeof(*params), GFP_KERNEL); 724 if (unlikely(!params)) { 725 err = -ENOMEM; 726 goto cleanup; 727 } 728 729 err = tcf_ct_fill_params(net, params, parm, tb, extack); 730 if (err) 731 goto cleanup; 732 733 spin_lock_bh(&c->tcf_lock); 734 goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); 735 params = rcu_replace_pointer(c->params, params, 736 lockdep_is_held(&c->tcf_lock)); 737 spin_unlock_bh(&c->tcf_lock); 738 739 if (goto_ch) 740 tcf_chain_put_by_act(goto_ch); 741 if (params) 742 kfree_rcu(params, rcu); 743 if (res == ACT_P_CREATED) 744 tcf_idr_insert(tn, *a); 745 746 return res; 747 748 cleanup: 749 if (goto_ch) 750 tcf_chain_put_by_act(goto_ch); 751 kfree(params); 752 tcf_idr_release(*a, bind); 753 return err; 754 } 755 756 static void tcf_ct_cleanup(struct tc_action *a) 757 { 758 struct tcf_ct_params *params; 759 struct tcf_ct *c = to_ct(a); 760 761 params = rcu_dereference_protected(c->params, 1); 762 if (params) 763 call_rcu(¶ms->rcu, tcf_ct_params_free); 764 } 765 766 static int tcf_ct_dump_key_val(struct sk_buff *skb, 767 void *val, int val_type, 768 void *mask, int mask_type, 769 int len) 770 { 771 int err; 772 773 if (mask && !memchr_inv(mask, 0, len)) 774 return 0; 775 776 err = nla_put(skb, val_type, len, val); 777 if (err) 778 return err; 779 780 if (mask_type != TCA_CT_UNSPEC) { 781 err = nla_put(skb, mask_type, len, mask); 782 if (err) 783 return err; 784 } 785 786 return 0; 787 } 788 789 static int tcf_ct_dump_nat(struct sk_buff *skb, struct tcf_ct_params *p) 790 { 791 struct nf_nat_range2 *range = &p->range; 792 793 if (!(p->ct_action & TCA_CT_ACT_NAT)) 794 return 0; 795 796 if (!(p->ct_action & (TCA_CT_ACT_NAT_SRC | TCA_CT_ACT_NAT_DST))) 797 return 0; 798 799 if (range->flags & NF_NAT_RANGE_MAP_IPS) { 800 if (p->ipv4_range) { 801 if (nla_put_in_addr(skb, TCA_CT_NAT_IPV4_MIN, 802 range->min_addr.ip)) 803 return -1; 804 if (nla_put_in_addr(skb, TCA_CT_NAT_IPV4_MAX, 805 range->max_addr.ip)) 806 return -1; 807 } else { 808 if (nla_put_in6_addr(skb, TCA_CT_NAT_IPV6_MIN, 809 &range->min_addr.in6)) 810 return -1; 811 if (nla_put_in6_addr(skb, TCA_CT_NAT_IPV6_MAX, 812 &range->max_addr.in6)) 813 return -1; 814 } 815 } 816 817 if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { 818 if (nla_put_be16(skb, TCA_CT_NAT_PORT_MIN, 819 range->min_proto.all)) 820 return -1; 821 if (nla_put_be16(skb, TCA_CT_NAT_PORT_MAX, 822 range->max_proto.all)) 823 return -1; 824 } 825 826 return 0; 827 } 828 829 static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a, 830 int bind, int ref) 831 { 832 unsigned char *b = skb_tail_pointer(skb); 833 struct tcf_ct *c = to_ct(a); 834 struct tcf_ct_params *p; 835 836 struct tc_ct opt = { 837 .index = c->tcf_index, 838 .refcnt = refcount_read(&c->tcf_refcnt) - ref, 839 .bindcnt = atomic_read(&c->tcf_bindcnt) - bind, 840 }; 841 struct tcf_t t; 842 843 spin_lock_bh(&c->tcf_lock); 844 p = rcu_dereference_protected(c->params, 845 lockdep_is_held(&c->tcf_lock)); 846 opt.action = c->tcf_action; 847 848 if (tcf_ct_dump_key_val(skb, 849 &p->ct_action, TCA_CT_ACTION, 850 NULL, TCA_CT_UNSPEC, 851 sizeof(p->ct_action))) 852 goto nla_put_failure; 853 854 if (p->ct_action & TCA_CT_ACT_CLEAR) 855 goto skip_dump; 856 857 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && 858 tcf_ct_dump_key_val(skb, 859 &p->mark, TCA_CT_MARK, 860 &p->mark_mask, TCA_CT_MARK_MASK, 861 sizeof(p->mark))) 862 goto nla_put_failure; 863 864 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 865 tcf_ct_dump_key_val(skb, 866 p->labels, TCA_CT_LABELS, 867 p->labels_mask, TCA_CT_LABELS_MASK, 868 sizeof(p->labels))) 869 goto nla_put_failure; 870 871 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 872 tcf_ct_dump_key_val(skb, 873 &p->zone, TCA_CT_ZONE, 874 NULL, TCA_CT_UNSPEC, 875 sizeof(p->zone))) 876 goto nla_put_failure; 877 878 if (tcf_ct_dump_nat(skb, p)) 879 goto nla_put_failure; 880 881 skip_dump: 882 if (nla_put(skb, TCA_CT_PARMS, sizeof(opt), &opt)) 883 goto nla_put_failure; 884 885 tcf_tm_dump(&t, &c->tcf_tm); 886 if (nla_put_64bit(skb, TCA_CT_TM, sizeof(t), &t, TCA_CT_PAD)) 887 goto nla_put_failure; 888 spin_unlock_bh(&c->tcf_lock); 889 890 return skb->len; 891 nla_put_failure: 892 spin_unlock_bh(&c->tcf_lock); 893 nlmsg_trim(skb, b); 894 return -1; 895 } 896 897 static int tcf_ct_walker(struct net *net, struct sk_buff *skb, 898 struct netlink_callback *cb, int type, 899 const struct tc_action_ops *ops, 900 struct netlink_ext_ack *extack) 901 { 902 struct tc_action_net *tn = net_generic(net, ct_net_id); 903 904 return tcf_generic_walker(tn, skb, cb, type, ops, extack); 905 } 906 907 static int tcf_ct_search(struct net *net, struct tc_action **a, u32 index) 908 { 909 struct tc_action_net *tn = net_generic(net, ct_net_id); 910 911 return tcf_idr_search(tn, a, index); 912 } 913 914 static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, 915 u64 lastuse, bool hw) 916 { 917 struct tcf_ct *c = to_ct(a); 918 919 tcf_action_update_stats(a, bytes, packets, false, hw); 920 c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse); 921 } 922 923 static struct tc_action_ops act_ct_ops = { 924 .kind = "ct", 925 .id = TCA_ID_CT, 926 .owner = THIS_MODULE, 927 .act = tcf_ct_act, 928 .dump = tcf_ct_dump, 929 .init = tcf_ct_init, 930 .cleanup = tcf_ct_cleanup, 931 .walk = tcf_ct_walker, 932 .lookup = tcf_ct_search, 933 .stats_update = tcf_stats_update, 934 .size = sizeof(struct tcf_ct), 935 }; 936 937 static __net_init int ct_init_net(struct net *net) 938 { 939 unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8; 940 struct tc_ct_action_net *tn = net_generic(net, ct_net_id); 941 942 if (nf_connlabels_get(net, n_bits - 1)) { 943 tn->labels = false; 944 pr_err("act_ct: Failed to set connlabels length"); 945 } else { 946 tn->labels = true; 947 } 948 949 return tc_action_net_init(net, &tn->tn, &act_ct_ops); 950 } 951 952 static void __net_exit ct_exit_net(struct list_head *net_list) 953 { 954 struct net *net; 955 956 rtnl_lock(); 957 list_for_each_entry(net, net_list, exit_list) { 958 struct tc_ct_action_net *tn = net_generic(net, ct_net_id); 959 960 if (tn->labels) 961 nf_connlabels_put(net); 962 } 963 rtnl_unlock(); 964 965 tc_action_net_exit(net_list, ct_net_id); 966 } 967 968 static struct pernet_operations ct_net_ops = { 969 .init = ct_init_net, 970 .exit_batch = ct_exit_net, 971 .id = &ct_net_id, 972 .size = sizeof(struct tc_ct_action_net), 973 }; 974 975 static int __init ct_init_module(void) 976 { 977 return tcf_register_action(&act_ct_ops, &ct_net_ops); 978 } 979 980 static void __exit ct_cleanup_module(void) 981 { 982 tcf_unregister_action(&act_ct_ops, &ct_net_ops); 983 } 984 985 module_init(ct_init_module); 986 module_exit(ct_cleanup_module); 987 MODULE_AUTHOR("Paul Blakey <paulb@mellanox.com>"); 988 MODULE_AUTHOR("Yossi Kuperman <yossiku@mellanox.com>"); 989 MODULE_AUTHOR("Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>"); 990 MODULE_DESCRIPTION("Connection tracking action"); 991 MODULE_LICENSE("GPL v2"); 992 993