1 /* 2 * Copyright (c) 2015 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 */ 13 14 #include <linux/module.h> 15 #include <linux/openvswitch.h> 16 #include <linux/tcp.h> 17 #include <linux/udp.h> 18 #include <linux/sctp.h> 19 #include <net/ip.h> 20 #include <net/netfilter/nf_conntrack_core.h> 21 #include <net/netfilter/nf_conntrack_helper.h> 22 #include <net/netfilter/nf_conntrack_labels.h> 23 #include <net/netfilter/nf_conntrack_seqadj.h> 24 #include <net/netfilter/nf_conntrack_zones.h> 25 #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 26 27 #ifdef CONFIG_NF_NAT_NEEDED 28 #include <linux/netfilter/nf_nat.h> 29 #include <net/netfilter/nf_nat_core.h> 30 #include <net/netfilter/nf_nat_l3proto.h> 31 #endif 32 33 #include "datapath.h" 34 #include "conntrack.h" 35 #include "flow.h" 36 #include "flow_netlink.h" 37 38 struct ovs_ct_len_tbl { 39 int maxlen; 40 int minlen; 41 }; 42 43 /* Metadata mark for masked write to conntrack mark */ 44 struct md_mark { 45 u32 value; 46 u32 mask; 47 }; 48 49 /* Metadata label for masked write to conntrack label. */ 50 struct md_labels { 51 struct ovs_key_ct_labels value; 52 struct ovs_key_ct_labels mask; 53 }; 54 55 enum ovs_ct_nat { 56 OVS_CT_NAT = 1 << 0, /* NAT for committed connections only. */ 57 OVS_CT_SRC_NAT = 1 << 1, /* Source NAT for NEW connections. */ 58 OVS_CT_DST_NAT = 1 << 2, /* Destination NAT for NEW connections. */ 59 }; 60 61 /* Conntrack action context for execution. */ 62 struct ovs_conntrack_info { 63 struct nf_conntrack_helper *helper; 64 struct nf_conntrack_zone zone; 65 struct nf_conn *ct; 66 u8 commit : 1; 67 u8 nat : 3; /* enum ovs_ct_nat */ 68 u8 force : 1; 69 u16 family; 70 struct md_mark mark; 71 struct md_labels labels; 72 #ifdef CONFIG_NF_NAT_NEEDED 73 struct nf_nat_range range; /* Only present for SRC NAT and DST NAT. */ 74 #endif 75 }; 76 77 static bool labels_nonzero(const struct ovs_key_ct_labels *labels); 78 79 static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info); 80 81 static u16 key_to_nfproto(const struct sw_flow_key *key) 82 { 83 switch (ntohs(key->eth.type)) { 84 case ETH_P_IP: 85 return NFPROTO_IPV4; 86 case ETH_P_IPV6: 87 return NFPROTO_IPV6; 88 default: 89 return NFPROTO_UNSPEC; 90 } 91 } 92 93 /* Map SKB connection state into the values used by flow definition. */ 94 static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo) 95 { 96 u8 ct_state = OVS_CS_F_TRACKED; 97 98 switch (ctinfo) { 99 case IP_CT_ESTABLISHED_REPLY: 100 case IP_CT_RELATED_REPLY: 101 ct_state |= OVS_CS_F_REPLY_DIR; 102 break; 103 default: 104 break; 105 } 106 107 switch (ctinfo) { 108 case IP_CT_ESTABLISHED: 109 case IP_CT_ESTABLISHED_REPLY: 110 ct_state |= OVS_CS_F_ESTABLISHED; 111 break; 112 case IP_CT_RELATED: 113 case IP_CT_RELATED_REPLY: 114 ct_state |= OVS_CS_F_RELATED; 115 break; 116 case IP_CT_NEW: 117 ct_state |= OVS_CS_F_NEW; 118 break; 119 default: 120 break; 121 } 122 123 return ct_state; 124 } 125 126 static u32 ovs_ct_get_mark(const struct nf_conn *ct) 127 { 128 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 129 return ct ? ct->mark : 0; 130 #else 131 return 0; 132 #endif 133 } 134 135 /* Guard against conntrack labels max size shrinking below 128 bits. */ 136 #if NF_CT_LABELS_MAX_SIZE < 16 137 #error NF_CT_LABELS_MAX_SIZE must be at least 16 bytes 138 #endif 139 140 static void ovs_ct_get_labels(const struct nf_conn *ct, 141 struct ovs_key_ct_labels *labels) 142 { 143 struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; 144 145 if (cl) 146 memcpy(labels, cl->bits, OVS_CT_LABELS_LEN); 147 else 148 memset(labels, 0, OVS_CT_LABELS_LEN); 149 } 150 151 static void __ovs_ct_update_key_orig_tp(struct sw_flow_key *key, 152 const struct nf_conntrack_tuple *orig, 153 u8 icmp_proto) 154 { 155 key->ct_orig_proto = orig->dst.protonum; 156 if (orig->dst.protonum == icmp_proto) { 157 key->ct.orig_tp.src = htons(orig->dst.u.icmp.type); 158 key->ct.orig_tp.dst = htons(orig->dst.u.icmp.code); 159 } else { 160 key->ct.orig_tp.src = orig->src.u.all; 161 key->ct.orig_tp.dst = orig->dst.u.all; 162 } 163 } 164 165 static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, 166 const struct nf_conntrack_zone *zone, 167 const struct nf_conn *ct) 168 { 169 key->ct_state = state; 170 key->ct_zone = zone->id; 171 key->ct.mark = ovs_ct_get_mark(ct); 172 ovs_ct_get_labels(ct, &key->ct.labels); 173 174 if (ct) { 175 const struct nf_conntrack_tuple *orig; 176 177 /* Use the master if we have one. */ 178 if (ct->master) 179 ct = ct->master; 180 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 181 182 /* IP version must match with the master connection. */ 183 if (key->eth.type == htons(ETH_P_IP) && 184 nf_ct_l3num(ct) == NFPROTO_IPV4) { 185 key->ipv4.ct_orig.src = orig->src.u3.ip; 186 key->ipv4.ct_orig.dst = orig->dst.u3.ip; 187 __ovs_ct_update_key_orig_tp(key, orig, IPPROTO_ICMP); 188 return; 189 } else if (key->eth.type == htons(ETH_P_IPV6) && 190 !sw_flow_key_is_nd(key) && 191 nf_ct_l3num(ct) == NFPROTO_IPV6) { 192 key->ipv6.ct_orig.src = orig->src.u3.in6; 193 key->ipv6.ct_orig.dst = orig->dst.u3.in6; 194 __ovs_ct_update_key_orig_tp(key, orig, NEXTHDR_ICMP); 195 return; 196 } 197 } 198 /* Clear 'ct_orig_proto' to mark the non-existence of conntrack 199 * original direction key fields. 200 */ 201 key->ct_orig_proto = 0; 202 } 203 204 /* Update 'key' based on skb->_nfct. If 'post_ct' is true, then OVS has 205 * previously sent the packet to conntrack via the ct action. If 206 * 'keep_nat_flags' is true, the existing NAT flags retained, else they are 207 * initialized from the connection status. 208 */ 209 static void ovs_ct_update_key(const struct sk_buff *skb, 210 const struct ovs_conntrack_info *info, 211 struct sw_flow_key *key, bool post_ct, 212 bool keep_nat_flags) 213 { 214 const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; 215 enum ip_conntrack_info ctinfo; 216 struct nf_conn *ct; 217 u8 state = 0; 218 219 ct = nf_ct_get(skb, &ctinfo); 220 if (ct) { 221 state = ovs_ct_get_state(ctinfo); 222 /* All unconfirmed entries are NEW connections. */ 223 if (!nf_ct_is_confirmed(ct)) 224 state |= OVS_CS_F_NEW; 225 /* OVS persists the related flag for the duration of the 226 * connection. 227 */ 228 if (ct->master) 229 state |= OVS_CS_F_RELATED; 230 if (keep_nat_flags) { 231 state |= key->ct_state & OVS_CS_F_NAT_MASK; 232 } else { 233 if (ct->status & IPS_SRC_NAT) 234 state |= OVS_CS_F_SRC_NAT; 235 if (ct->status & IPS_DST_NAT) 236 state |= OVS_CS_F_DST_NAT; 237 } 238 zone = nf_ct_zone(ct); 239 } else if (post_ct) { 240 state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID; 241 if (info) 242 zone = &info->zone; 243 } 244 __ovs_ct_update_key(key, state, zone, ct); 245 } 246 247 /* This is called to initialize CT key fields possibly coming in from the local 248 * stack. 249 */ 250 void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) 251 { 252 ovs_ct_update_key(skb, NULL, key, false, false); 253 } 254 255 #define IN6_ADDR_INITIALIZER(ADDR) \ 256 { (ADDR).s6_addr32[0], (ADDR).s6_addr32[1], \ 257 (ADDR).s6_addr32[2], (ADDR).s6_addr32[3] } 258 259 int ovs_ct_put_key(const struct sw_flow_key *swkey, 260 const struct sw_flow_key *output, struct sk_buff *skb) 261 { 262 if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, output->ct_state)) 263 return -EMSGSIZE; 264 265 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 266 nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, output->ct_zone)) 267 return -EMSGSIZE; 268 269 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && 270 nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, output->ct.mark)) 271 return -EMSGSIZE; 272 273 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 274 nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(output->ct.labels), 275 &output->ct.labels)) 276 return -EMSGSIZE; 277 278 if (swkey->ct_orig_proto) { 279 if (swkey->eth.type == htons(ETH_P_IP)) { 280 struct ovs_key_ct_tuple_ipv4 orig = { 281 output->ipv4.ct_orig.src, 282 output->ipv4.ct_orig.dst, 283 output->ct.orig_tp.src, 284 output->ct.orig_tp.dst, 285 output->ct_orig_proto, 286 }; 287 if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, 288 sizeof(orig), &orig)) 289 return -EMSGSIZE; 290 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 291 struct ovs_key_ct_tuple_ipv6 orig = { 292 IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.src), 293 IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.dst), 294 output->ct.orig_tp.src, 295 output->ct.orig_tp.dst, 296 output->ct_orig_proto, 297 }; 298 if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, 299 sizeof(orig), &orig)) 300 return -EMSGSIZE; 301 } 302 } 303 304 return 0; 305 } 306 307 static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key, 308 u32 ct_mark, u32 mask) 309 { 310 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 311 u32 new_mark; 312 313 new_mark = ct_mark | (ct->mark & ~(mask)); 314 if (ct->mark != new_mark) { 315 ct->mark = new_mark; 316 if (nf_ct_is_confirmed(ct)) 317 nf_conntrack_event_cache(IPCT_MARK, ct); 318 key->ct.mark = new_mark; 319 } 320 321 return 0; 322 #else 323 return -ENOTSUPP; 324 #endif 325 } 326 327 static struct nf_conn_labels *ovs_ct_get_conn_labels(struct nf_conn *ct) 328 { 329 struct nf_conn_labels *cl; 330 331 cl = nf_ct_labels_find(ct); 332 if (!cl) { 333 nf_ct_labels_ext_add(ct); 334 cl = nf_ct_labels_find(ct); 335 } 336 337 return cl; 338 } 339 340 /* Initialize labels for a new, yet to be committed conntrack entry. Note that 341 * since the new connection is not yet confirmed, and thus no-one else has 342 * access to it's labels, we simply write them over. 343 */ 344 static int ovs_ct_init_labels(struct nf_conn *ct, struct sw_flow_key *key, 345 const struct ovs_key_ct_labels *labels, 346 const struct ovs_key_ct_labels *mask) 347 { 348 struct nf_conn_labels *cl, *master_cl; 349 bool have_mask = labels_nonzero(mask); 350 351 /* Inherit master's labels to the related connection? */ 352 master_cl = ct->master ? nf_ct_labels_find(ct->master) : NULL; 353 354 if (!master_cl && !have_mask) 355 return 0; /* Nothing to do. */ 356 357 cl = ovs_ct_get_conn_labels(ct); 358 if (!cl) 359 return -ENOSPC; 360 361 /* Inherit the master's labels, if any. */ 362 if (master_cl) 363 *cl = *master_cl; 364 365 if (have_mask) { 366 u32 *dst = (u32 *)cl->bits; 367 int i; 368 369 for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) 370 dst[i] = (dst[i] & ~mask->ct_labels_32[i]) | 371 (labels->ct_labels_32[i] 372 & mask->ct_labels_32[i]); 373 } 374 375 /* Labels are included in the IPCTNL_MSG_CT_NEW event only if the 376 * IPCT_LABEL bit it set in the event cache. 377 */ 378 nf_conntrack_event_cache(IPCT_LABEL, ct); 379 380 memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN); 381 382 return 0; 383 } 384 385 static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key, 386 const struct ovs_key_ct_labels *labels, 387 const struct ovs_key_ct_labels *mask) 388 { 389 struct nf_conn_labels *cl; 390 int err; 391 392 cl = ovs_ct_get_conn_labels(ct); 393 if (!cl) 394 return -ENOSPC; 395 396 err = nf_connlabels_replace(ct, labels->ct_labels_32, 397 mask->ct_labels_32, 398 OVS_CT_LABELS_LEN_32); 399 if (err) 400 return err; 401 402 memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN); 403 404 return 0; 405 } 406 407 /* 'skb' should already be pulled to nh_ofs. */ 408 static int ovs_ct_helper(struct sk_buff *skb, u16 proto) 409 { 410 const struct nf_conntrack_helper *helper; 411 const struct nf_conn_help *help; 412 enum ip_conntrack_info ctinfo; 413 unsigned int protoff; 414 struct nf_conn *ct; 415 int err; 416 417 ct = nf_ct_get(skb, &ctinfo); 418 if (!ct || ctinfo == IP_CT_RELATED_REPLY) 419 return NF_ACCEPT; 420 421 help = nfct_help(ct); 422 if (!help) 423 return NF_ACCEPT; 424 425 helper = rcu_dereference(help->helper); 426 if (!helper) 427 return NF_ACCEPT; 428 429 switch (proto) { 430 case NFPROTO_IPV4: 431 protoff = ip_hdrlen(skb); 432 break; 433 case NFPROTO_IPV6: { 434 u8 nexthdr = ipv6_hdr(skb)->nexthdr; 435 __be16 frag_off; 436 int ofs; 437 438 ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, 439 &frag_off); 440 if (ofs < 0 || (frag_off & htons(~0x7)) != 0) { 441 pr_debug("proto header not found\n"); 442 return NF_ACCEPT; 443 } 444 protoff = ofs; 445 break; 446 } 447 default: 448 WARN_ONCE(1, "helper invoked on non-IP family!"); 449 return NF_DROP; 450 } 451 452 err = helper->help(skb, protoff, ct, ctinfo); 453 if (err != NF_ACCEPT) 454 return err; 455 456 /* Adjust seqs after helper. This is needed due to some helpers (e.g., 457 * FTP with NAT) adusting the TCP payload size when mangling IP 458 * addresses and/or port numbers in the text-based control connection. 459 */ 460 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 461 !nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) 462 return NF_DROP; 463 return NF_ACCEPT; 464 } 465 466 /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero 467 * value if 'skb' is freed. 468 */ 469 static int handle_fragments(struct net *net, struct sw_flow_key *key, 470 u16 zone, struct sk_buff *skb) 471 { 472 struct ovs_skb_cb ovs_cb = *OVS_CB(skb); 473 int err; 474 475 if (key->eth.type == htons(ETH_P_IP)) { 476 enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; 477 478 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 479 err = ip_defrag(net, skb, user); 480 if (err) 481 return err; 482 483 ovs_cb.mru = IPCB(skb)->frag_max_size; 484 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 485 } else if (key->eth.type == htons(ETH_P_IPV6)) { 486 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; 487 488 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); 489 err = nf_ct_frag6_gather(net, skb, user); 490 if (err) { 491 if (err != -EINPROGRESS) 492 kfree_skb(skb); 493 return err; 494 } 495 496 key->ip.proto = ipv6_hdr(skb)->nexthdr; 497 ovs_cb.mru = IP6CB(skb)->frag_max_size; 498 #endif 499 } else { 500 kfree_skb(skb); 501 return -EPFNOSUPPORT; 502 } 503 504 key->ip.frag = OVS_FRAG_TYPE_NONE; 505 skb_clear_hash(skb); 506 skb->ignore_df = 1; 507 *OVS_CB(skb) = ovs_cb; 508 509 return 0; 510 } 511 512 static struct nf_conntrack_expect * 513 ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone, 514 u16 proto, const struct sk_buff *skb) 515 { 516 struct nf_conntrack_tuple tuple; 517 518 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple)) 519 return NULL; 520 return __nf_ct_expect_find(net, zone, &tuple); 521 } 522 523 /* This replicates logic from nf_conntrack_core.c that is not exported. */ 524 static enum ip_conntrack_info 525 ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h) 526 { 527 const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 528 529 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) 530 return IP_CT_ESTABLISHED_REPLY; 531 /* Once we've had two way comms, always ESTABLISHED. */ 532 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) 533 return IP_CT_ESTABLISHED; 534 if (test_bit(IPS_EXPECTED_BIT, &ct->status)) 535 return IP_CT_RELATED; 536 return IP_CT_NEW; 537 } 538 539 /* Find an existing connection which this packet belongs to without 540 * re-attributing statistics or modifying the connection state. This allows an 541 * skb->_nfct lost due to an upcall to be recovered during actions execution. 542 * 543 * Must be called with rcu_read_lock. 544 * 545 * On success, populates skb->_nfct and returns the connection. Returns NULL 546 * if there is no existing entry. 547 */ 548 static struct nf_conn * 549 ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, 550 u8 l3num, struct sk_buff *skb, bool natted) 551 { 552 struct nf_conntrack_l3proto *l3proto; 553 struct nf_conntrack_l4proto *l4proto; 554 struct nf_conntrack_tuple tuple; 555 struct nf_conntrack_tuple_hash *h; 556 struct nf_conn *ct; 557 unsigned int dataoff; 558 u8 protonum; 559 560 l3proto = __nf_ct_l3proto_find(l3num); 561 if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, 562 &protonum) <= 0) { 563 pr_debug("ovs_ct_find_existing: Can't get protonum\n"); 564 return NULL; 565 } 566 l4proto = __nf_ct_l4proto_find(l3num, protonum); 567 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, 568 protonum, net, &tuple, l3proto, l4proto)) { 569 pr_debug("ovs_ct_find_existing: Can't get tuple\n"); 570 return NULL; 571 } 572 573 /* Must invert the tuple if skb has been transformed by NAT. */ 574 if (natted) { 575 struct nf_conntrack_tuple inverse; 576 577 if (!nf_ct_invert_tuple(&inverse, &tuple, l3proto, l4proto)) { 578 pr_debug("ovs_ct_find_existing: Inversion failed!\n"); 579 return NULL; 580 } 581 tuple = inverse; 582 } 583 584 /* look for tuple match */ 585 h = nf_conntrack_find_get(net, zone, &tuple); 586 if (!h) 587 return NULL; /* Not found. */ 588 589 ct = nf_ct_tuplehash_to_ctrack(h); 590 591 /* Inverted packet tuple matches the reverse direction conntrack tuple, 592 * select the other tuplehash to get the right 'ctinfo' bits for this 593 * packet. 594 */ 595 if (natted) 596 h = &ct->tuplehash[!h->tuple.dst.dir]; 597 598 nf_ct_set(skb, ct, ovs_ct_get_info(h)); 599 return ct; 600 } 601 602 /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */ 603 static bool skb_nfct_cached(struct net *net, 604 const struct sw_flow_key *key, 605 const struct ovs_conntrack_info *info, 606 struct sk_buff *skb) 607 { 608 enum ip_conntrack_info ctinfo; 609 struct nf_conn *ct; 610 611 ct = nf_ct_get(skb, &ctinfo); 612 /* If no ct, check if we have evidence that an existing conntrack entry 613 * might be found for this skb. This happens when we lose a skb->_nfct 614 * due to an upcall. If the connection was not confirmed, it is not 615 * cached and needs to be run through conntrack again. 616 */ 617 if (!ct && key->ct_state & OVS_CS_F_TRACKED && 618 !(key->ct_state & OVS_CS_F_INVALID) && 619 key->ct_zone == info->zone.id) { 620 ct = ovs_ct_find_existing(net, &info->zone, info->family, skb, 621 !!(key->ct_state 622 & OVS_CS_F_NAT_MASK)); 623 if (ct) 624 nf_ct_get(skb, &ctinfo); 625 } 626 if (!ct) 627 return false; 628 if (!net_eq(net, read_pnet(&ct->ct_net))) 629 return false; 630 if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct))) 631 return false; 632 if (info->helper) { 633 struct nf_conn_help *help; 634 635 help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER); 636 if (help && rcu_access_pointer(help->helper) != info->helper) 637 return false; 638 } 639 /* Force conntrack entry direction to the current packet? */ 640 if (info->force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { 641 /* Delete the conntrack entry if confirmed, else just release 642 * the reference. 643 */ 644 if (nf_ct_is_confirmed(ct)) 645 nf_ct_delete(ct, 0, 0); 646 else 647 nf_conntrack_put(&ct->ct_general); 648 nf_ct_set(skb, NULL, 0); 649 return false; 650 } 651 652 return true; 653 } 654 655 #ifdef CONFIG_NF_NAT_NEEDED 656 /* Modelled after nf_nat_ipv[46]_fn(). 657 * range is only used for new, uninitialized NAT state. 658 * Returns either NF_ACCEPT or NF_DROP. 659 */ 660 static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, 661 enum ip_conntrack_info ctinfo, 662 const struct nf_nat_range *range, 663 enum nf_nat_manip_type maniptype) 664 { 665 int hooknum, nh_off, err = NF_ACCEPT; 666 667 nh_off = skb_network_offset(skb); 668 skb_pull_rcsum(skb, nh_off); 669 670 /* See HOOK2MANIP(). */ 671 if (maniptype == NF_NAT_MANIP_SRC) 672 hooknum = NF_INET_LOCAL_IN; /* Source NAT */ 673 else 674 hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */ 675 676 switch (ctinfo) { 677 case IP_CT_RELATED: 678 case IP_CT_RELATED_REPLY: 679 if (IS_ENABLED(CONFIG_NF_NAT_IPV4) && 680 skb->protocol == htons(ETH_P_IP) && 681 ip_hdr(skb)->protocol == IPPROTO_ICMP) { 682 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, 683 hooknum)) 684 err = NF_DROP; 685 goto push; 686 } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) && 687 skb->protocol == htons(ETH_P_IPV6)) { 688 __be16 frag_off; 689 u8 nexthdr = ipv6_hdr(skb)->nexthdr; 690 int hdrlen = ipv6_skip_exthdr(skb, 691 sizeof(struct ipv6hdr), 692 &nexthdr, &frag_off); 693 694 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { 695 if (!nf_nat_icmpv6_reply_translation(skb, ct, 696 ctinfo, 697 hooknum, 698 hdrlen)) 699 err = NF_DROP; 700 goto push; 701 } 702 } 703 /* Non-ICMP, fall thru to initialize if needed. */ 704 case IP_CT_NEW: 705 /* Seen it before? This can happen for loopback, retrans, 706 * or local packets. 707 */ 708 if (!nf_nat_initialized(ct, maniptype)) { 709 /* Initialize according to the NAT action. */ 710 err = (range && range->flags & NF_NAT_RANGE_MAP_IPS) 711 /* Action is set up to establish a new 712 * mapping. 713 */ 714 ? nf_nat_setup_info(ct, range, maniptype) 715 : nf_nat_alloc_null_binding(ct, hooknum); 716 if (err != NF_ACCEPT) 717 goto push; 718 } 719 break; 720 721 case IP_CT_ESTABLISHED: 722 case IP_CT_ESTABLISHED_REPLY: 723 break; 724 725 default: 726 err = NF_DROP; 727 goto push; 728 } 729 730 err = nf_nat_packet(ct, ctinfo, hooknum, skb); 731 push: 732 skb_push(skb, nh_off); 733 skb_postpush_rcsum(skb, skb->data, nh_off); 734 735 return err; 736 } 737 738 static void ovs_nat_update_key(struct sw_flow_key *key, 739 const struct sk_buff *skb, 740 enum nf_nat_manip_type maniptype) 741 { 742 if (maniptype == NF_NAT_MANIP_SRC) { 743 __be16 src; 744 745 key->ct_state |= OVS_CS_F_SRC_NAT; 746 if (key->eth.type == htons(ETH_P_IP)) 747 key->ipv4.addr.src = ip_hdr(skb)->saddr; 748 else if (key->eth.type == htons(ETH_P_IPV6)) 749 memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr, 750 sizeof(key->ipv6.addr.src)); 751 else 752 return; 753 754 if (key->ip.proto == IPPROTO_UDP) 755 src = udp_hdr(skb)->source; 756 else if (key->ip.proto == IPPROTO_TCP) 757 src = tcp_hdr(skb)->source; 758 else if (key->ip.proto == IPPROTO_SCTP) 759 src = sctp_hdr(skb)->source; 760 else 761 return; 762 763 key->tp.src = src; 764 } else { 765 __be16 dst; 766 767 key->ct_state |= OVS_CS_F_DST_NAT; 768 if (key->eth.type == htons(ETH_P_IP)) 769 key->ipv4.addr.dst = ip_hdr(skb)->daddr; 770 else if (key->eth.type == htons(ETH_P_IPV6)) 771 memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr, 772 sizeof(key->ipv6.addr.dst)); 773 else 774 return; 775 776 if (key->ip.proto == IPPROTO_UDP) 777 dst = udp_hdr(skb)->dest; 778 else if (key->ip.proto == IPPROTO_TCP) 779 dst = tcp_hdr(skb)->dest; 780 else if (key->ip.proto == IPPROTO_SCTP) 781 dst = sctp_hdr(skb)->dest; 782 else 783 return; 784 785 key->tp.dst = dst; 786 } 787 } 788 789 /* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */ 790 static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, 791 const struct ovs_conntrack_info *info, 792 struct sk_buff *skb, struct nf_conn *ct, 793 enum ip_conntrack_info ctinfo) 794 { 795 enum nf_nat_manip_type maniptype; 796 int err; 797 798 if (nf_ct_is_untracked(ct)) { 799 /* A NAT action may only be performed on tracked packets. */ 800 return NF_ACCEPT; 801 } 802 803 /* Add NAT extension if not confirmed yet. */ 804 if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct)) 805 return NF_ACCEPT; /* Can't NAT. */ 806 807 /* Determine NAT type. 808 * Check if the NAT type can be deduced from the tracked connection. 809 * Make sure new expected connections (IP_CT_RELATED) are NATted only 810 * when committing. 811 */ 812 if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW && 813 ct->status & IPS_NAT_MASK && 814 (ctinfo != IP_CT_RELATED || info->commit)) { 815 /* NAT an established or related connection like before. */ 816 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) 817 /* This is the REPLY direction for a connection 818 * for which NAT was applied in the forward 819 * direction. Do the reverse NAT. 820 */ 821 maniptype = ct->status & IPS_SRC_NAT 822 ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC; 823 else 824 maniptype = ct->status & IPS_SRC_NAT 825 ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST; 826 } else if (info->nat & OVS_CT_SRC_NAT) { 827 maniptype = NF_NAT_MANIP_SRC; 828 } else if (info->nat & OVS_CT_DST_NAT) { 829 maniptype = NF_NAT_MANIP_DST; 830 } else { 831 return NF_ACCEPT; /* Connection is not NATed. */ 832 } 833 err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype); 834 835 /* Mark NAT done if successful and update the flow key. */ 836 if (err == NF_ACCEPT) 837 ovs_nat_update_key(key, skb, maniptype); 838 839 return err; 840 } 841 #else /* !CONFIG_NF_NAT_NEEDED */ 842 static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, 843 const struct ovs_conntrack_info *info, 844 struct sk_buff *skb, struct nf_conn *ct, 845 enum ip_conntrack_info ctinfo) 846 { 847 return NF_ACCEPT; 848 } 849 #endif 850 851 /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if 852 * not done already. Update key with new CT state after passing the packet 853 * through conntrack. 854 * Note that if the packet is deemed invalid by conntrack, skb->_nfct will be 855 * set to NULL and 0 will be returned. 856 */ 857 static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, 858 const struct ovs_conntrack_info *info, 859 struct sk_buff *skb) 860 { 861 /* If we are recirculating packets to match on conntrack fields and 862 * committing with a separate conntrack action, then we don't need to 863 * actually run the packet through conntrack twice unless it's for a 864 * different zone. 865 */ 866 bool cached = skb_nfct_cached(net, key, info, skb); 867 enum ip_conntrack_info ctinfo; 868 struct nf_conn *ct; 869 870 if (!cached) { 871 struct nf_conn *tmpl = info->ct; 872 int err; 873 874 /* Associate skb with specified zone. */ 875 if (tmpl) { 876 if (skb_nfct(skb)) 877 nf_conntrack_put(skb_nfct(skb)); 878 nf_conntrack_get(&tmpl->ct_general); 879 nf_ct_set(skb, tmpl, IP_CT_NEW); 880 } 881 882 err = nf_conntrack_in(net, info->family, 883 NF_INET_PRE_ROUTING, skb); 884 if (err != NF_ACCEPT) 885 return -ENOENT; 886 887 /* Clear CT state NAT flags to mark that we have not yet done 888 * NAT after the nf_conntrack_in() call. We can actually clear 889 * the whole state, as it will be re-initialized below. 890 */ 891 key->ct_state = 0; 892 893 /* Update the key, but keep the NAT flags. */ 894 ovs_ct_update_key(skb, info, key, true, true); 895 } 896 897 ct = nf_ct_get(skb, &ctinfo); 898 if (ct) { 899 /* Packets starting a new connection must be NATted before the 900 * helper, so that the helper knows about the NAT. We enforce 901 * this by delaying both NAT and helper calls for unconfirmed 902 * connections until the committing CT action. For later 903 * packets NAT and Helper may be called in either order. 904 * 905 * NAT will be done only if the CT action has NAT, and only 906 * once per packet (per zone), as guarded by the NAT bits in 907 * the key->ct_state. 908 */ 909 if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) && 910 (nf_ct_is_confirmed(ct) || info->commit) && 911 ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) { 912 return -EINVAL; 913 } 914 915 /* Userspace may decide to perform a ct lookup without a helper 916 * specified followed by a (recirculate and) commit with one. 917 * Therefore, for unconfirmed connections which we will commit, 918 * we need to attach the helper here. 919 */ 920 if (!nf_ct_is_confirmed(ct) && info->commit && 921 info->helper && !nfct_help(ct)) { 922 int err = __nf_ct_try_assign_helper(ct, info->ct, 923 GFP_ATOMIC); 924 if (err) 925 return err; 926 } 927 928 /* Call the helper only if: 929 * - nf_conntrack_in() was executed above ("!cached") for a 930 * confirmed connection, or 931 * - When committing an unconfirmed connection. 932 */ 933 if ((nf_ct_is_confirmed(ct) ? !cached : info->commit) && 934 ovs_ct_helper(skb, info->family) != NF_ACCEPT) { 935 return -EINVAL; 936 } 937 } 938 939 return 0; 940 } 941 942 /* Lookup connection and read fields into key. */ 943 static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, 944 const struct ovs_conntrack_info *info, 945 struct sk_buff *skb) 946 { 947 struct nf_conntrack_expect *exp; 948 949 /* If we pass an expected packet through nf_conntrack_in() the 950 * expectation is typically removed, but the packet could still be 951 * lost in upcall processing. To prevent this from happening we 952 * perform an explicit expectation lookup. Expected connections are 953 * always new, and will be passed through conntrack only when they are 954 * committed, as it is OK to remove the expectation at that time. 955 */ 956 exp = ovs_ct_expect_find(net, &info->zone, info->family, skb); 957 if (exp) { 958 u8 state; 959 960 /* NOTE: New connections are NATted and Helped only when 961 * committed, so we are not calling into NAT here. 962 */ 963 state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED; 964 __ovs_ct_update_key(key, state, &info->zone, exp->master); 965 } else { 966 struct nf_conn *ct; 967 int err; 968 969 err = __ovs_ct_lookup(net, key, info, skb); 970 if (err) 971 return err; 972 973 ct = (struct nf_conn *)skb_nfct(skb); 974 if (ct) 975 nf_ct_deliver_cached_events(ct); 976 } 977 978 return 0; 979 } 980 981 static bool labels_nonzero(const struct ovs_key_ct_labels *labels) 982 { 983 size_t i; 984 985 for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) 986 if (labels->ct_labels_32[i]) 987 return true; 988 989 return false; 990 } 991 992 /* Lookup connection and confirm if unconfirmed. */ 993 static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, 994 const struct ovs_conntrack_info *info, 995 struct sk_buff *skb) 996 { 997 enum ip_conntrack_info ctinfo; 998 struct nf_conn *ct; 999 int err; 1000 1001 err = __ovs_ct_lookup(net, key, info, skb); 1002 if (err) 1003 return err; 1004 1005 /* The connection could be invalid, in which case this is a no-op.*/ 1006 ct = nf_ct_get(skb, &ctinfo); 1007 if (!ct) 1008 return 0; 1009 1010 /* Apply changes before confirming the connection so that the initial 1011 * conntrack NEW netlink event carries the values given in the CT 1012 * action. 1013 */ 1014 if (info->mark.mask) { 1015 err = ovs_ct_set_mark(ct, key, info->mark.value, 1016 info->mark.mask); 1017 if (err) 1018 return err; 1019 } 1020 if (!nf_ct_is_confirmed(ct)) { 1021 err = ovs_ct_init_labels(ct, key, &info->labels.value, 1022 &info->labels.mask); 1023 if (err) 1024 return err; 1025 } else if (labels_nonzero(&info->labels.mask)) { 1026 err = ovs_ct_set_labels(ct, key, &info->labels.value, 1027 &info->labels.mask); 1028 if (err) 1029 return err; 1030 } 1031 /* This will take care of sending queued events even if the connection 1032 * is already confirmed. 1033 */ 1034 if (nf_conntrack_confirm(skb) != NF_ACCEPT) 1035 return -EINVAL; 1036 1037 return 0; 1038 } 1039 1040 /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero 1041 * value if 'skb' is freed. 1042 */ 1043 int ovs_ct_execute(struct net *net, struct sk_buff *skb, 1044 struct sw_flow_key *key, 1045 const struct ovs_conntrack_info *info) 1046 { 1047 int nh_ofs; 1048 int err; 1049 1050 /* The conntrack module expects to be working at L3. */ 1051 nh_ofs = skb_network_offset(skb); 1052 skb_pull_rcsum(skb, nh_ofs); 1053 1054 if (key->ip.frag != OVS_FRAG_TYPE_NONE) { 1055 err = handle_fragments(net, key, info->zone.id, skb); 1056 if (err) 1057 return err; 1058 } 1059 1060 if (info->commit) 1061 err = ovs_ct_commit(net, key, info, skb); 1062 else 1063 err = ovs_ct_lookup(net, key, info, skb); 1064 1065 skb_push(skb, nh_ofs); 1066 skb_postpush_rcsum(skb, skb->data, nh_ofs); 1067 if (err) 1068 kfree_skb(skb); 1069 return err; 1070 } 1071 1072 static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, 1073 const struct sw_flow_key *key, bool log) 1074 { 1075 struct nf_conntrack_helper *helper; 1076 struct nf_conn_help *help; 1077 1078 helper = nf_conntrack_helper_try_module_get(name, info->family, 1079 key->ip.proto); 1080 if (!helper) { 1081 OVS_NLERR(log, "Unknown helper \"%s\"", name); 1082 return -EINVAL; 1083 } 1084 1085 help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL); 1086 if (!help) { 1087 module_put(helper->me); 1088 return -ENOMEM; 1089 } 1090 1091 rcu_assign_pointer(help->helper, helper); 1092 info->helper = helper; 1093 return 0; 1094 } 1095 1096 #ifdef CONFIG_NF_NAT_NEEDED 1097 static int parse_nat(const struct nlattr *attr, 1098 struct ovs_conntrack_info *info, bool log) 1099 { 1100 struct nlattr *a; 1101 int rem; 1102 bool have_ip_max = false; 1103 bool have_proto_max = false; 1104 bool ip_vers = (info->family == NFPROTO_IPV6); 1105 1106 nla_for_each_nested(a, attr, rem) { 1107 static const int ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1][2] = { 1108 [OVS_NAT_ATTR_SRC] = {0, 0}, 1109 [OVS_NAT_ATTR_DST] = {0, 0}, 1110 [OVS_NAT_ATTR_IP_MIN] = {sizeof(struct in_addr), 1111 sizeof(struct in6_addr)}, 1112 [OVS_NAT_ATTR_IP_MAX] = {sizeof(struct in_addr), 1113 sizeof(struct in6_addr)}, 1114 [OVS_NAT_ATTR_PROTO_MIN] = {sizeof(u16), sizeof(u16)}, 1115 [OVS_NAT_ATTR_PROTO_MAX] = {sizeof(u16), sizeof(u16)}, 1116 [OVS_NAT_ATTR_PERSISTENT] = {0, 0}, 1117 [OVS_NAT_ATTR_PROTO_HASH] = {0, 0}, 1118 [OVS_NAT_ATTR_PROTO_RANDOM] = {0, 0}, 1119 }; 1120 int type = nla_type(a); 1121 1122 if (type > OVS_NAT_ATTR_MAX) { 1123 OVS_NLERR(log, 1124 "Unknown NAT attribute (type=%d, max=%d).\n", 1125 type, OVS_NAT_ATTR_MAX); 1126 return -EINVAL; 1127 } 1128 1129 if (nla_len(a) != ovs_nat_attr_lens[type][ip_vers]) { 1130 OVS_NLERR(log, 1131 "NAT attribute type %d has unexpected length (%d != %d).\n", 1132 type, nla_len(a), 1133 ovs_nat_attr_lens[type][ip_vers]); 1134 return -EINVAL; 1135 } 1136 1137 switch (type) { 1138 case OVS_NAT_ATTR_SRC: 1139 case OVS_NAT_ATTR_DST: 1140 if (info->nat) { 1141 OVS_NLERR(log, 1142 "Only one type of NAT may be specified.\n" 1143 ); 1144 return -ERANGE; 1145 } 1146 info->nat |= OVS_CT_NAT; 1147 info->nat |= ((type == OVS_NAT_ATTR_SRC) 1148 ? OVS_CT_SRC_NAT : OVS_CT_DST_NAT); 1149 break; 1150 1151 case OVS_NAT_ATTR_IP_MIN: 1152 nla_memcpy(&info->range.min_addr, a, 1153 sizeof(info->range.min_addr)); 1154 info->range.flags |= NF_NAT_RANGE_MAP_IPS; 1155 break; 1156 1157 case OVS_NAT_ATTR_IP_MAX: 1158 have_ip_max = true; 1159 nla_memcpy(&info->range.max_addr, a, 1160 sizeof(info->range.max_addr)); 1161 info->range.flags |= NF_NAT_RANGE_MAP_IPS; 1162 break; 1163 1164 case OVS_NAT_ATTR_PROTO_MIN: 1165 info->range.min_proto.all = htons(nla_get_u16(a)); 1166 info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; 1167 break; 1168 1169 case OVS_NAT_ATTR_PROTO_MAX: 1170 have_proto_max = true; 1171 info->range.max_proto.all = htons(nla_get_u16(a)); 1172 info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; 1173 break; 1174 1175 case OVS_NAT_ATTR_PERSISTENT: 1176 info->range.flags |= NF_NAT_RANGE_PERSISTENT; 1177 break; 1178 1179 case OVS_NAT_ATTR_PROTO_HASH: 1180 info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM; 1181 break; 1182 1183 case OVS_NAT_ATTR_PROTO_RANDOM: 1184 info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM_FULLY; 1185 break; 1186 1187 default: 1188 OVS_NLERR(log, "Unknown nat attribute (%d).\n", type); 1189 return -EINVAL; 1190 } 1191 } 1192 1193 if (rem > 0) { 1194 OVS_NLERR(log, "NAT attribute has %d unknown bytes.\n", rem); 1195 return -EINVAL; 1196 } 1197 if (!info->nat) { 1198 /* Do not allow flags if no type is given. */ 1199 if (info->range.flags) { 1200 OVS_NLERR(log, 1201 "NAT flags may be given only when NAT range (SRC or DST) is also specified.\n" 1202 ); 1203 return -EINVAL; 1204 } 1205 info->nat = OVS_CT_NAT; /* NAT existing connections. */ 1206 } else if (!info->commit) { 1207 OVS_NLERR(log, 1208 "NAT attributes may be specified only when CT COMMIT flag is also specified.\n" 1209 ); 1210 return -EINVAL; 1211 } 1212 /* Allow missing IP_MAX. */ 1213 if (info->range.flags & NF_NAT_RANGE_MAP_IPS && !have_ip_max) { 1214 memcpy(&info->range.max_addr, &info->range.min_addr, 1215 sizeof(info->range.max_addr)); 1216 } 1217 /* Allow missing PROTO_MAX. */ 1218 if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED && 1219 !have_proto_max) { 1220 info->range.max_proto.all = info->range.min_proto.all; 1221 } 1222 return 0; 1223 } 1224 #endif 1225 1226 static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { 1227 [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 }, 1228 [OVS_CT_ATTR_FORCE_COMMIT] = { .minlen = 0, .maxlen = 0 }, 1229 [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), 1230 .maxlen = sizeof(u16) }, 1231 [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark), 1232 .maxlen = sizeof(struct md_mark) }, 1233 [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels), 1234 .maxlen = sizeof(struct md_labels) }, 1235 [OVS_CT_ATTR_HELPER] = { .minlen = 1, 1236 .maxlen = NF_CT_HELPER_NAME_LEN }, 1237 #ifdef CONFIG_NF_NAT_NEEDED 1238 /* NAT length is checked when parsing the nested attributes. */ 1239 [OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX }, 1240 #endif 1241 }; 1242 1243 static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, 1244 const char **helper, bool log) 1245 { 1246 struct nlattr *a; 1247 int rem; 1248 1249 nla_for_each_nested(a, attr, rem) { 1250 int type = nla_type(a); 1251 int maxlen = ovs_ct_attr_lens[type].maxlen; 1252 int minlen = ovs_ct_attr_lens[type].minlen; 1253 1254 if (type > OVS_CT_ATTR_MAX) { 1255 OVS_NLERR(log, 1256 "Unknown conntrack attr (type=%d, max=%d)", 1257 type, OVS_CT_ATTR_MAX); 1258 return -EINVAL; 1259 } 1260 if (nla_len(a) < minlen || nla_len(a) > maxlen) { 1261 OVS_NLERR(log, 1262 "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)", 1263 type, nla_len(a), maxlen); 1264 return -EINVAL; 1265 } 1266 1267 switch (type) { 1268 case OVS_CT_ATTR_FORCE_COMMIT: 1269 info->force = true; 1270 /* fall through. */ 1271 case OVS_CT_ATTR_COMMIT: 1272 info->commit = true; 1273 break; 1274 #ifdef CONFIG_NF_CONNTRACK_ZONES 1275 case OVS_CT_ATTR_ZONE: 1276 info->zone.id = nla_get_u16(a); 1277 break; 1278 #endif 1279 #ifdef CONFIG_NF_CONNTRACK_MARK 1280 case OVS_CT_ATTR_MARK: { 1281 struct md_mark *mark = nla_data(a); 1282 1283 if (!mark->mask) { 1284 OVS_NLERR(log, "ct_mark mask cannot be 0"); 1285 return -EINVAL; 1286 } 1287 info->mark = *mark; 1288 break; 1289 } 1290 #endif 1291 #ifdef CONFIG_NF_CONNTRACK_LABELS 1292 case OVS_CT_ATTR_LABELS: { 1293 struct md_labels *labels = nla_data(a); 1294 1295 if (!labels_nonzero(&labels->mask)) { 1296 OVS_NLERR(log, "ct_labels mask cannot be 0"); 1297 return -EINVAL; 1298 } 1299 info->labels = *labels; 1300 break; 1301 } 1302 #endif 1303 case OVS_CT_ATTR_HELPER: 1304 *helper = nla_data(a); 1305 if (!memchr(*helper, '\0', nla_len(a))) { 1306 OVS_NLERR(log, "Invalid conntrack helper"); 1307 return -EINVAL; 1308 } 1309 break; 1310 #ifdef CONFIG_NF_NAT_NEEDED 1311 case OVS_CT_ATTR_NAT: { 1312 int err = parse_nat(a, info, log); 1313 1314 if (err) 1315 return err; 1316 break; 1317 } 1318 #endif 1319 default: 1320 OVS_NLERR(log, "Unknown conntrack attr (%d)", 1321 type); 1322 return -EINVAL; 1323 } 1324 } 1325 1326 #ifdef CONFIG_NF_CONNTRACK_MARK 1327 if (!info->commit && info->mark.mask) { 1328 OVS_NLERR(log, 1329 "Setting conntrack mark requires 'commit' flag."); 1330 return -EINVAL; 1331 } 1332 #endif 1333 #ifdef CONFIG_NF_CONNTRACK_LABELS 1334 if (!info->commit && labels_nonzero(&info->labels.mask)) { 1335 OVS_NLERR(log, 1336 "Setting conntrack labels requires 'commit' flag."); 1337 return -EINVAL; 1338 } 1339 #endif 1340 if (rem > 0) { 1341 OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem); 1342 return -EINVAL; 1343 } 1344 1345 return 0; 1346 } 1347 1348 bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr) 1349 { 1350 if (attr == OVS_KEY_ATTR_CT_STATE) 1351 return true; 1352 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 1353 attr == OVS_KEY_ATTR_CT_ZONE) 1354 return true; 1355 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && 1356 attr == OVS_KEY_ATTR_CT_MARK) 1357 return true; 1358 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 1359 attr == OVS_KEY_ATTR_CT_LABELS) { 1360 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 1361 1362 return ovs_net->xt_label; 1363 } 1364 1365 return false; 1366 } 1367 1368 int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, 1369 const struct sw_flow_key *key, 1370 struct sw_flow_actions **sfa, bool log) 1371 { 1372 struct ovs_conntrack_info ct_info; 1373 const char *helper = NULL; 1374 u16 family; 1375 int err; 1376 1377 family = key_to_nfproto(key); 1378 if (family == NFPROTO_UNSPEC) { 1379 OVS_NLERR(log, "ct family unspecified"); 1380 return -EINVAL; 1381 } 1382 1383 memset(&ct_info, 0, sizeof(ct_info)); 1384 ct_info.family = family; 1385 1386 nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID, 1387 NF_CT_DEFAULT_ZONE_DIR, 0); 1388 1389 err = parse_ct(attr, &ct_info, &helper, log); 1390 if (err) 1391 return err; 1392 1393 /* Set up template for tracking connections in specific zones. */ 1394 ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL); 1395 if (!ct_info.ct) { 1396 OVS_NLERR(log, "Failed to allocate conntrack template"); 1397 return -ENOMEM; 1398 } 1399 1400 __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status); 1401 nf_conntrack_get(&ct_info.ct->ct_general); 1402 1403 if (helper) { 1404 err = ovs_ct_add_helper(&ct_info, helper, key, log); 1405 if (err) 1406 goto err_free_ct; 1407 } 1408 1409 err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info, 1410 sizeof(ct_info), log); 1411 if (err) 1412 goto err_free_ct; 1413 1414 return 0; 1415 err_free_ct: 1416 __ovs_ct_free_action(&ct_info); 1417 return err; 1418 } 1419 1420 #ifdef CONFIG_NF_NAT_NEEDED 1421 static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, 1422 struct sk_buff *skb) 1423 { 1424 struct nlattr *start; 1425 1426 start = nla_nest_start(skb, OVS_CT_ATTR_NAT); 1427 if (!start) 1428 return false; 1429 1430 if (info->nat & OVS_CT_SRC_NAT) { 1431 if (nla_put_flag(skb, OVS_NAT_ATTR_SRC)) 1432 return false; 1433 } else if (info->nat & OVS_CT_DST_NAT) { 1434 if (nla_put_flag(skb, OVS_NAT_ATTR_DST)) 1435 return false; 1436 } else { 1437 goto out; 1438 } 1439 1440 if (info->range.flags & NF_NAT_RANGE_MAP_IPS) { 1441 if (IS_ENABLED(CONFIG_NF_NAT_IPV4) && 1442 info->family == NFPROTO_IPV4) { 1443 if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN, 1444 info->range.min_addr.ip) || 1445 (info->range.max_addr.ip 1446 != info->range.min_addr.ip && 1447 (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX, 1448 info->range.max_addr.ip)))) 1449 return false; 1450 } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) && 1451 info->family == NFPROTO_IPV6) { 1452 if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN, 1453 &info->range.min_addr.in6) || 1454 (memcmp(&info->range.max_addr.in6, 1455 &info->range.min_addr.in6, 1456 sizeof(info->range.max_addr.in6)) && 1457 (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX, 1458 &info->range.max_addr.in6)))) 1459 return false; 1460 } else { 1461 return false; 1462 } 1463 } 1464 if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED && 1465 (nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MIN, 1466 ntohs(info->range.min_proto.all)) || 1467 (info->range.max_proto.all != info->range.min_proto.all && 1468 nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MAX, 1469 ntohs(info->range.max_proto.all))))) 1470 return false; 1471 1472 if (info->range.flags & NF_NAT_RANGE_PERSISTENT && 1473 nla_put_flag(skb, OVS_NAT_ATTR_PERSISTENT)) 1474 return false; 1475 if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM && 1476 nla_put_flag(skb, OVS_NAT_ATTR_PROTO_HASH)) 1477 return false; 1478 if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY && 1479 nla_put_flag(skb, OVS_NAT_ATTR_PROTO_RANDOM)) 1480 return false; 1481 out: 1482 nla_nest_end(skb, start); 1483 1484 return true; 1485 } 1486 #endif 1487 1488 int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, 1489 struct sk_buff *skb) 1490 { 1491 struct nlattr *start; 1492 1493 start = nla_nest_start(skb, OVS_ACTION_ATTR_CT); 1494 if (!start) 1495 return -EMSGSIZE; 1496 1497 if (ct_info->commit && nla_put_flag(skb, ct_info->force 1498 ? OVS_CT_ATTR_FORCE_COMMIT 1499 : OVS_CT_ATTR_COMMIT)) 1500 return -EMSGSIZE; 1501 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 1502 nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id)) 1503 return -EMSGSIZE; 1504 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask && 1505 nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark), 1506 &ct_info->mark)) 1507 return -EMSGSIZE; 1508 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 1509 labels_nonzero(&ct_info->labels.mask) && 1510 nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels), 1511 &ct_info->labels)) 1512 return -EMSGSIZE; 1513 if (ct_info->helper) { 1514 if (nla_put_string(skb, OVS_CT_ATTR_HELPER, 1515 ct_info->helper->name)) 1516 return -EMSGSIZE; 1517 } 1518 #ifdef CONFIG_NF_NAT_NEEDED 1519 if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb)) 1520 return -EMSGSIZE; 1521 #endif 1522 nla_nest_end(skb, start); 1523 1524 return 0; 1525 } 1526 1527 void ovs_ct_free_action(const struct nlattr *a) 1528 { 1529 struct ovs_conntrack_info *ct_info = nla_data(a); 1530 1531 __ovs_ct_free_action(ct_info); 1532 } 1533 1534 static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) 1535 { 1536 if (ct_info->helper) 1537 module_put(ct_info->helper->me); 1538 if (ct_info->ct) 1539 nf_ct_tmpl_free(ct_info->ct); 1540 } 1541 1542 void ovs_ct_init(struct net *net) 1543 { 1544 unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; 1545 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 1546 1547 if (nf_connlabels_get(net, n_bits - 1)) { 1548 ovs_net->xt_label = false; 1549 OVS_NLERR(true, "Failed to set connlabel length"); 1550 } else { 1551 ovs_net->xt_label = true; 1552 } 1553 } 1554 1555 void ovs_ct_exit(struct net *net) 1556 { 1557 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 1558 1559 if (ovs_net->xt_label) 1560 nf_connlabels_put(net); 1561 } 1562