1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/kernel.h> 3 #include <linux/init.h> 4 #include <linux/module.h> 5 #include <linux/netfilter.h> 6 #include <linux/rhashtable.h> 7 #include <linux/netdevice.h> 8 #include <net/ip.h> 9 #include <net/ip6_route.h> 10 #include <net/netfilter/nf_tables.h> 11 #include <net/netfilter/nf_flow_table.h> 12 #include <net/netfilter/nf_conntrack.h> 13 #include <net/netfilter/nf_conntrack_core.h> 14 #include <net/netfilter/nf_conntrack_l4proto.h> 15 #include <net/netfilter/nf_conntrack_tuple.h> 16 17 static DEFINE_MUTEX(flowtable_lock); 18 static LIST_HEAD(flowtables); 19 static __read_mostly struct kmem_cache *flow_offload_cachep; 20 21 static void 22 flow_offload_fill_dir(struct flow_offload *flow, 23 enum flow_offload_tuple_dir dir) 24 { 25 struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple; 26 struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple; 27 28 ft->dir = dir; 29 30 switch (ctt->src.l3num) { 31 case NFPROTO_IPV4: 32 ft->src_v4 = ctt->src.u3.in; 33 ft->dst_v4 = ctt->dst.u3.in; 34 break; 35 case NFPROTO_IPV6: 36 ft->src_v6 = ctt->src.u3.in6; 37 ft->dst_v6 = ctt->dst.u3.in6; 38 break; 39 } 40 41 ft->l3proto = ctt->src.l3num; 42 ft->l4proto = ctt->dst.protonum; 43 44 switch (ctt->dst.protonum) { 45 case IPPROTO_TCP: 46 case IPPROTO_UDP: 47 ft->src_port = ctt->src.u.tcp.port; 48 ft->dst_port = ctt->dst.u.tcp.port; 49 break; 50 } 51 } 52 53 struct flow_offload *flow_offload_alloc(struct nf_conn *ct) 54 { 55 struct flow_offload *flow; 56 57 if (unlikely(nf_ct_is_dying(ct))) 58 return NULL; 59 60 flow = kmem_cache_zalloc(flow_offload_cachep, GFP_ATOMIC); 61 if (!flow) 62 return NULL; 63 64 refcount_inc(&ct->ct_general.use); 65 flow->ct = ct; 66 67 flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL); 68 flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY); 69 70 if (ct->status & IPS_SRC_NAT) 71 __set_bit(NF_FLOW_SNAT, &flow->flags); 72 if (ct->status & IPS_DST_NAT) 73 __set_bit(NF_FLOW_DNAT, &flow->flags); 74 75 return flow; 76 } 77 EXPORT_SYMBOL_GPL(flow_offload_alloc); 78 79 static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) 80 { 81 if (flow_tuple->l3proto == NFPROTO_IPV6) 82 return rt6_get_cookie(dst_rt6_info(flow_tuple->dst_cache)); 83 84 return 0; 85 } 86 87 static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route, 88 enum flow_offload_tuple_dir dir) 89 { 90 struct dst_entry *dst = route->tuple[dir].dst; 91 92 route->tuple[dir].dst = NULL; 93 94 return dst; 95 } 96 97 static int flow_offload_fill_route(struct flow_offload *flow, 98 struct nf_flow_route *route, 99 enum flow_offload_tuple_dir dir) 100 { 101 struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple; 102 struct dst_entry *dst = nft_route_dst_fetch(route, dir); 103 int i, j = 0; 104 105 switch (flow_tuple->l3proto) { 106 case NFPROTO_IPV4: 107 flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true); 108 break; 109 case NFPROTO_IPV6: 110 flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true); 111 break; 112 } 113 114 flow_tuple->iifidx = route->tuple[dir].in.ifindex; 115 for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) { 116 flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id; 117 flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto; 118 if (route->tuple[dir].in.ingress_vlans & BIT(i)) 119 flow_tuple->in_vlan_ingress |= BIT(j); 120 j++; 121 } 122 123 flow_tuple->tun = route->tuple[dir].in.tun; 124 flow_tuple->encap_num = route->tuple[dir].in.num_encaps; 125 flow_tuple->needs_gso_segment = route->tuple[dir].out.needs_gso_segment; 126 flow_tuple->tun_num = route->tuple[dir].in.num_tuns; 127 128 switch (route->tuple[dir].xmit_type) { 129 case FLOW_OFFLOAD_XMIT_DIRECT: 130 memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest, 131 ETH_ALEN); 132 memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source, 133 ETH_ALEN); 134 flow_tuple->out.ifidx = route->tuple[dir].out.ifindex; 135 dst_release(dst); 136 break; 137 case FLOW_OFFLOAD_XMIT_XFRM: 138 case FLOW_OFFLOAD_XMIT_NEIGH: 139 flow_tuple->ifidx = route->tuple[dir].out.ifindex; 140 flow_tuple->dst_cache = dst; 141 flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple); 142 break; 143 default: 144 WARN_ON_ONCE(1); 145 break; 146 } 147 flow_tuple->xmit_type = route->tuple[dir].xmit_type; 148 149 return 0; 150 } 151 152 static void nft_flow_dst_release(struct flow_offload *flow, 153 enum flow_offload_tuple_dir dir) 154 { 155 if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH || 156 flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) 157 dst_release(flow->tuplehash[dir].tuple.dst_cache); 158 } 159 160 void flow_offload_route_init(struct flow_offload *flow, 161 struct nf_flow_route *route) 162 { 163 flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); 164 flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); 165 flow->type = NF_FLOW_OFFLOAD_ROUTE; 166 } 167 EXPORT_SYMBOL_GPL(flow_offload_route_init); 168 169 static inline bool nf_flow_has_expired(const struct flow_offload *flow) 170 { 171 return nf_flow_timeout_delta(flow->timeout) <= 0; 172 } 173 174 static void flow_offload_fixup_tcp(struct nf_conn *ct, u8 tcp_state) 175 { 176 struct ip_ct_tcp *tcp = &ct->proto.tcp; 177 178 spin_lock_bh(&ct->lock); 179 if (tcp->state != tcp_state) 180 tcp->state = tcp_state; 181 182 /* syn packet triggers the TCP reopen case from conntrack. */ 183 if (tcp->state == TCP_CONNTRACK_CLOSE) 184 ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; 185 186 /* Conntrack state is outdated due to offload bypass. 187 * Clear IP_CT_TCP_FLAG_MAXACK_SET, otherwise conntracks 188 * TCP reset validation will fail. 189 */ 190 tcp->seen[0].td_maxwin = 0; 191 tcp->seen[0].flags &= ~IP_CT_TCP_FLAG_MAXACK_SET; 192 tcp->seen[1].td_maxwin = 0; 193 tcp->seen[1].flags &= ~IP_CT_TCP_FLAG_MAXACK_SET; 194 spin_unlock_bh(&ct->lock); 195 } 196 197 static void flow_offload_fixup_ct(struct flow_offload *flow) 198 { 199 struct nf_conn *ct = flow->ct; 200 struct net *net = nf_ct_net(ct); 201 int l4num = nf_ct_protonum(ct); 202 bool expired, closing = false; 203 u32 offload_timeout = 0; 204 s32 timeout; 205 206 if (l4num == IPPROTO_TCP) { 207 const struct nf_tcp_net *tn = nf_tcp_pernet(net); 208 u8 tcp_state; 209 210 /* Enter CLOSE state if fin/rst packet has been seen, this 211 * allows TCP reopen from conntrack. Otherwise, pick up from 212 * the last seen TCP state. 213 */ 214 closing = test_bit(NF_FLOW_CLOSING, &flow->flags); 215 if (closing) { 216 flow_offload_fixup_tcp(ct, TCP_CONNTRACK_CLOSE); 217 timeout = READ_ONCE(tn->timeouts[TCP_CONNTRACK_CLOSE]); 218 expired = false; 219 } else { 220 tcp_state = READ_ONCE(ct->proto.tcp.state); 221 flow_offload_fixup_tcp(ct, tcp_state); 222 timeout = READ_ONCE(tn->timeouts[tcp_state]); 223 expired = nf_flow_has_expired(flow); 224 } 225 offload_timeout = READ_ONCE(tn->offload_timeout); 226 227 } else if (l4num == IPPROTO_UDP) { 228 const struct nf_udp_net *tn = nf_udp_pernet(net); 229 enum udp_conntrack state = 230 test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ? 231 UDP_CT_REPLIED : UDP_CT_UNREPLIED; 232 233 timeout = READ_ONCE(tn->timeouts[state]); 234 expired = nf_flow_has_expired(flow); 235 offload_timeout = READ_ONCE(tn->offload_timeout); 236 } else { 237 return; 238 } 239 240 if (expired) 241 timeout -= offload_timeout; 242 243 if (timeout < 0) 244 timeout = 0; 245 246 if (closing || 247 nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout) 248 nf_ct_refresh(ct, timeout); 249 } 250 251 static void flow_offload_route_release(struct flow_offload *flow) 252 { 253 nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL); 254 nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY); 255 } 256 257 void flow_offload_free(struct flow_offload *flow) 258 { 259 switch (flow->type) { 260 case NF_FLOW_OFFLOAD_ROUTE: 261 flow_offload_route_release(flow); 262 break; 263 default: 264 break; 265 } 266 nf_ct_put(flow->ct); 267 kfree_rcu(flow, rcu_head); 268 } 269 EXPORT_SYMBOL_GPL(flow_offload_free); 270 271 static u32 flow_offload_hash(const void *data, u32 len, u32 seed) 272 { 273 const struct flow_offload_tuple *tuple = data; 274 275 return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed); 276 } 277 278 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) 279 { 280 const struct flow_offload_tuple_rhash *tuplehash = data; 281 282 return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed); 283 } 284 285 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, 286 const void *ptr) 287 { 288 const struct flow_offload_tuple *tuple = arg->key; 289 const struct flow_offload_tuple_rhash *x = ptr; 290 291 if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash))) 292 return 1; 293 294 return 0; 295 } 296 297 static const struct rhashtable_params nf_flow_offload_rhash_params = { 298 .head_offset = offsetof(struct flow_offload_tuple_rhash, node), 299 .hashfn = flow_offload_hash, 300 .obj_hashfn = flow_offload_hash_obj, 301 .obj_cmpfn = flow_offload_hash_cmp, 302 .automatic_shrinking = true, 303 }; 304 305 unsigned long flow_offload_get_timeout(struct flow_offload *flow) 306 { 307 unsigned long timeout = NF_FLOW_TIMEOUT; 308 struct net *net = nf_ct_net(flow->ct); 309 int l4num = nf_ct_protonum(flow->ct); 310 311 if (l4num == IPPROTO_TCP) { 312 struct nf_tcp_net *tn = nf_tcp_pernet(net); 313 314 timeout = tn->offload_timeout; 315 } else if (l4num == IPPROTO_UDP) { 316 struct nf_udp_net *tn = nf_udp_pernet(net); 317 318 timeout = tn->offload_timeout; 319 } 320 321 return timeout; 322 } 323 324 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) 325 { 326 int err; 327 328 flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); 329 330 err = rhashtable_insert_fast(&flow_table->rhashtable, 331 &flow->tuplehash[0].node, 332 nf_flow_offload_rhash_params); 333 if (err < 0) 334 return err; 335 336 err = rhashtable_insert_fast(&flow_table->rhashtable, 337 &flow->tuplehash[1].node, 338 nf_flow_offload_rhash_params); 339 if (err < 0) { 340 rhashtable_remove_fast(&flow_table->rhashtable, 341 &flow->tuplehash[0].node, 342 nf_flow_offload_rhash_params); 343 return err; 344 } 345 346 nf_ct_refresh(flow->ct, NF_CT_DAY); 347 348 if (nf_flowtable_hw_offload(flow_table)) { 349 __set_bit(NF_FLOW_HW, &flow->flags); 350 nf_flow_offload_add(flow_table, flow); 351 } 352 353 return 0; 354 } 355 EXPORT_SYMBOL_GPL(flow_offload_add); 356 357 void flow_offload_refresh(struct nf_flowtable *flow_table, 358 struct flow_offload *flow, bool force) 359 { 360 u32 timeout; 361 362 timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); 363 if (force || timeout - READ_ONCE(flow->timeout) > HZ) 364 WRITE_ONCE(flow->timeout, timeout); 365 else 366 return; 367 368 if (likely(!nf_flowtable_hw_offload(flow_table)) || 369 test_bit(NF_FLOW_CLOSING, &flow->flags)) 370 return; 371 372 nf_flow_offload_add(flow_table, flow); 373 } 374 EXPORT_SYMBOL_GPL(flow_offload_refresh); 375 376 static void flow_offload_del(struct nf_flowtable *flow_table, 377 struct flow_offload *flow) 378 { 379 rhashtable_remove_fast(&flow_table->rhashtable, 380 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, 381 nf_flow_offload_rhash_params); 382 rhashtable_remove_fast(&flow_table->rhashtable, 383 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, 384 nf_flow_offload_rhash_params); 385 flow_offload_free(flow); 386 } 387 388 void flow_offload_teardown(struct flow_offload *flow) 389 { 390 clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); 391 if (!test_and_set_bit(NF_FLOW_TEARDOWN, &flow->flags)) 392 flow_offload_fixup_ct(flow); 393 } 394 EXPORT_SYMBOL_GPL(flow_offload_teardown); 395 396 struct flow_offload_tuple_rhash * 397 flow_offload_lookup(struct nf_flowtable *flow_table, 398 struct flow_offload_tuple *tuple) 399 { 400 struct flow_offload_tuple_rhash *tuplehash; 401 struct flow_offload *flow; 402 int dir; 403 404 tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple, 405 nf_flow_offload_rhash_params); 406 if (!tuplehash) 407 return NULL; 408 409 dir = tuplehash->tuple.dir; 410 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 411 if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) 412 return NULL; 413 414 if (unlikely(nf_ct_is_dying(flow->ct))) 415 return NULL; 416 417 return tuplehash; 418 } 419 EXPORT_SYMBOL_GPL(flow_offload_lookup); 420 421 static int 422 nf_flow_table_iterate(struct nf_flowtable *flow_table, 423 void (*iter)(struct nf_flowtable *flowtable, 424 struct flow_offload *flow, void *data), 425 void *data) 426 { 427 struct flow_offload_tuple_rhash *tuplehash; 428 struct rhashtable_iter hti; 429 struct flow_offload *flow; 430 int err = 0; 431 432 rhashtable_walk_enter(&flow_table->rhashtable, &hti); 433 rhashtable_walk_start(&hti); 434 435 while ((tuplehash = rhashtable_walk_next(&hti))) { 436 if (IS_ERR(tuplehash)) { 437 if (PTR_ERR(tuplehash) != -EAGAIN) { 438 err = PTR_ERR(tuplehash); 439 break; 440 } 441 continue; 442 } 443 if (tuplehash->tuple.dir) 444 continue; 445 446 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); 447 448 iter(flow_table, flow, data); 449 } 450 rhashtable_walk_stop(&hti); 451 rhashtable_walk_exit(&hti); 452 453 return err; 454 } 455 456 static bool nf_flow_custom_gc(struct nf_flowtable *flow_table, 457 const struct flow_offload *flow) 458 { 459 return flow_table->type->gc && flow_table->type->gc(flow); 460 } 461 462 /** 463 * nf_flow_table_tcp_timeout() - new timeout of offloaded tcp entry 464 * @ct: Flowtable offloaded tcp ct 465 * 466 * Return: number of seconds when ct entry should expire. 467 */ 468 static u32 nf_flow_table_tcp_timeout(const struct nf_conn *ct) 469 { 470 u8 state = READ_ONCE(ct->proto.tcp.state); 471 472 switch (state) { 473 case TCP_CONNTRACK_SYN_SENT: 474 case TCP_CONNTRACK_SYN_RECV: 475 return 0; 476 case TCP_CONNTRACK_ESTABLISHED: 477 return NF_CT_DAY; 478 case TCP_CONNTRACK_FIN_WAIT: 479 case TCP_CONNTRACK_CLOSE_WAIT: 480 case TCP_CONNTRACK_LAST_ACK: 481 case TCP_CONNTRACK_TIME_WAIT: 482 return 5 * 60 * HZ; 483 case TCP_CONNTRACK_CLOSE: 484 return 0; 485 } 486 487 return 0; 488 } 489 490 /** 491 * nf_flow_table_extend_ct_timeout() - Extend ct timeout of offloaded conntrack entry 492 * @ct: Flowtable offloaded ct 493 * 494 * Datapath lookups in the conntrack table will evict nf_conn entries 495 * if they have expired. 496 * 497 * Once nf_conn entries have been offloaded, nf_conntrack might not see any 498 * packets anymore. Thus ct->timeout is no longer refreshed and ct can 499 * be evicted. 500 * 501 * To avoid the need for an additional check on the offload bit for every 502 * packet processed via nf_conntrack_in(), set an arbitrary timeout large 503 * enough not to ever expire, this save us a check for the IPS_OFFLOAD_BIT 504 * from the packet path via nf_ct_is_expired(). 505 */ 506 static void nf_flow_table_extend_ct_timeout(struct nf_conn *ct) 507 { 508 static const s32 min_timeout = 5 * 60 * HZ; 509 u32 ct_timeout = READ_ONCE(ct->timeout); 510 s32 expires; 511 512 expires = ct_timeout - nfct_time_stamp; 513 if (expires <= 0) /* already expired */ 514 return; 515 516 /* normal case: large enough timeout, nothing to do. */ 517 if (likely(expires >= min_timeout)) 518 return; 519 520 /* must check offload bit after this, we do not hold any locks. 521 * flowtable and ct entries could have been removed on another CPU. 522 */ 523 if (!refcount_inc_not_zero(&ct->ct_general.use)) 524 return; 525 526 /* load ct->status after refcount increase */ 527 smp_acquire__after_ctrl_dep(); 528 529 if (nf_ct_is_confirmed(ct) && 530 test_bit(IPS_OFFLOAD_BIT, &ct->status)) { 531 u8 l4proto = nf_ct_protonum(ct); 532 u32 new_timeout = 1; 533 534 switch (l4proto) { 535 case IPPROTO_UDP: 536 new_timeout = NF_CT_DAY; 537 break; 538 case IPPROTO_TCP: 539 new_timeout = nf_flow_table_tcp_timeout(ct); 540 break; 541 default: 542 WARN_ON_ONCE(1); 543 break; 544 } 545 546 /* Update to ct->timeout from nf_conntrack happens 547 * without holding ct->lock. 548 * 549 * Use cmpxchg to ensure timeout extension doesn't 550 * happen when we race with conntrack datapath. 551 * 552 * The inverse -- datapath updating ->timeout right 553 * after this -- is fine, datapath is authoritative. 554 */ 555 if (new_timeout) { 556 new_timeout += nfct_time_stamp; 557 cmpxchg(&ct->timeout, ct_timeout, new_timeout); 558 } 559 } 560 561 nf_ct_put(ct); 562 } 563 564 static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, 565 struct flow_offload *flow, void *data) 566 { 567 bool teardown = test_bit(NF_FLOW_TEARDOWN, &flow->flags); 568 569 if (nf_flow_has_expired(flow) || 570 nf_ct_is_dying(flow->ct) || 571 nf_flow_custom_gc(flow_table, flow)) { 572 flow_offload_teardown(flow); 573 teardown = true; 574 } else if (!teardown) { 575 nf_flow_table_extend_ct_timeout(flow->ct); 576 } 577 578 if (teardown) { 579 if (test_bit(NF_FLOW_HW, &flow->flags)) { 580 if (!test_bit(NF_FLOW_HW_DYING, &flow->flags)) 581 nf_flow_offload_del(flow_table, flow); 582 else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags)) 583 flow_offload_del(flow_table, flow); 584 } else { 585 flow_offload_del(flow_table, flow); 586 } 587 } else if (test_bit(NF_FLOW_CLOSING, &flow->flags) && 588 test_bit(NF_FLOW_HW, &flow->flags) && 589 !test_bit(NF_FLOW_HW_DYING, &flow->flags)) { 590 nf_flow_offload_del(flow_table, flow); 591 } else if (test_bit(NF_FLOW_HW, &flow->flags)) { 592 nf_flow_offload_stats(flow_table, flow); 593 } 594 } 595 596 void nf_flow_table_gc_run(struct nf_flowtable *flow_table) 597 { 598 nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); 599 } 600 601 static void nf_flow_offload_work_gc(struct work_struct *work) 602 { 603 struct nf_flowtable *flow_table; 604 605 flow_table = container_of(work, struct nf_flowtable, gc_work.work); 606 nf_flow_table_gc_run(flow_table); 607 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); 608 } 609 610 static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, 611 __be16 port, __be16 new_port) 612 { 613 struct tcphdr *tcph; 614 615 tcph = (void *)(skb_network_header(skb) + thoff); 616 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false); 617 } 618 619 static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, 620 __be16 port, __be16 new_port) 621 { 622 struct udphdr *udph; 623 624 udph = (void *)(skb_network_header(skb) + thoff); 625 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 626 inet_proto_csum_replace2(&udph->check, skb, port, 627 new_port, false); 628 if (!udph->check) 629 udph->check = CSUM_MANGLED_0; 630 } 631 } 632 633 static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, 634 u8 protocol, __be16 port, __be16 new_port) 635 { 636 switch (protocol) { 637 case IPPROTO_TCP: 638 nf_flow_nat_port_tcp(skb, thoff, port, new_port); 639 break; 640 case IPPROTO_UDP: 641 nf_flow_nat_port_udp(skb, thoff, port, new_port); 642 break; 643 } 644 } 645 646 void nf_flow_snat_port(const struct flow_offload *flow, 647 struct sk_buff *skb, unsigned int thoff, 648 u8 protocol, enum flow_offload_tuple_dir dir) 649 { 650 struct flow_ports *hdr; 651 __be16 port, new_port; 652 653 hdr = (void *)(skb_network_header(skb) + thoff); 654 655 switch (dir) { 656 case FLOW_OFFLOAD_DIR_ORIGINAL: 657 port = hdr->source; 658 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port; 659 hdr->source = new_port; 660 break; 661 case FLOW_OFFLOAD_DIR_REPLY: 662 port = hdr->dest; 663 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; 664 hdr->dest = new_port; 665 break; 666 } 667 668 nf_flow_nat_port(skb, thoff, protocol, port, new_port); 669 } 670 EXPORT_SYMBOL_GPL(nf_flow_snat_port); 671 672 void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb, 673 unsigned int thoff, u8 protocol, 674 enum flow_offload_tuple_dir dir) 675 { 676 struct flow_ports *hdr; 677 __be16 port, new_port; 678 679 hdr = (void *)(skb_network_header(skb) + thoff); 680 681 switch (dir) { 682 case FLOW_OFFLOAD_DIR_ORIGINAL: 683 port = hdr->dest; 684 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port; 685 hdr->dest = new_port; 686 break; 687 case FLOW_OFFLOAD_DIR_REPLY: 688 port = hdr->source; 689 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port; 690 hdr->source = new_port; 691 break; 692 } 693 694 nf_flow_nat_port(skb, thoff, protocol, port, new_port); 695 } 696 EXPORT_SYMBOL_GPL(nf_flow_dnat_port); 697 698 int nf_flow_table_init(struct nf_flowtable *flowtable) 699 { 700 int err; 701 702 INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); 703 flow_block_init(&flowtable->flow_block); 704 init_rwsem(&flowtable->flow_block_lock); 705 706 err = rhashtable_init(&flowtable->rhashtable, 707 &nf_flow_offload_rhash_params); 708 if (err < 0) 709 return err; 710 711 queue_delayed_work(system_power_efficient_wq, 712 &flowtable->gc_work, HZ); 713 714 mutex_lock(&flowtable_lock); 715 list_add(&flowtable->list, &flowtables); 716 mutex_unlock(&flowtable_lock); 717 718 return 0; 719 } 720 EXPORT_SYMBOL_GPL(nf_flow_table_init); 721 722 static void nf_flow_table_do_cleanup(struct nf_flowtable *flow_table, 723 struct flow_offload *flow, void *data) 724 { 725 struct net_device *dev = data; 726 727 if (!dev) { 728 flow_offload_teardown(flow); 729 return; 730 } 731 732 if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) && 733 (flow->tuplehash[0].tuple.iifidx == dev->ifindex || 734 flow->tuplehash[1].tuple.iifidx == dev->ifindex)) 735 flow_offload_teardown(flow); 736 } 737 738 void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable, 739 struct net_device *dev) 740 { 741 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); 742 flush_delayed_work(&flowtable->gc_work); 743 nf_flow_table_offload_flush(flowtable); 744 } 745 746 void nf_flow_table_cleanup(struct net_device *dev) 747 { 748 struct nf_flowtable *flowtable; 749 750 mutex_lock(&flowtable_lock); 751 list_for_each_entry(flowtable, &flowtables, list) 752 nf_flow_table_gc_cleanup(flowtable, dev); 753 mutex_unlock(&flowtable_lock); 754 } 755 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); 756 757 void nf_flow_table_free(struct nf_flowtable *flow_table) 758 { 759 mutex_lock(&flowtable_lock); 760 list_del(&flow_table->list); 761 mutex_unlock(&flowtable_lock); 762 763 cancel_delayed_work_sync(&flow_table->gc_work); 764 nf_flow_table_offload_flush(flow_table); 765 /* ... no more pending work after this stage ... */ 766 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); 767 nf_flow_table_gc_run(flow_table); 768 nf_flow_table_offload_flush_cleanup(flow_table); 769 rhashtable_destroy(&flow_table->rhashtable); 770 } 771 EXPORT_SYMBOL_GPL(nf_flow_table_free); 772 773 static int nf_flow_table_init_net(struct net *net) 774 { 775 net->ft.stat = alloc_percpu(struct nf_flow_table_stat); 776 return net->ft.stat ? 0 : -ENOMEM; 777 } 778 779 static void nf_flow_table_fini_net(struct net *net) 780 { 781 free_percpu(net->ft.stat); 782 } 783 784 static int nf_flow_table_pernet_init(struct net *net) 785 { 786 int ret; 787 788 ret = nf_flow_table_init_net(net); 789 if (ret < 0) 790 return ret; 791 792 ret = nf_flow_table_init_proc(net); 793 if (ret < 0) 794 goto out_proc; 795 796 return 0; 797 798 out_proc: 799 nf_flow_table_fini_net(net); 800 return ret; 801 } 802 803 static void nf_flow_table_pernet_exit(struct list_head *net_exit_list) 804 { 805 struct net *net; 806 807 list_for_each_entry(net, net_exit_list, exit_list) { 808 nf_flow_table_fini_proc(net); 809 nf_flow_table_fini_net(net); 810 } 811 } 812 813 static struct pernet_operations nf_flow_table_net_ops = { 814 .init = nf_flow_table_pernet_init, 815 .exit_batch = nf_flow_table_pernet_exit, 816 }; 817 818 static int __init nf_flow_table_module_init(void) 819 { 820 int ret; 821 822 flow_offload_cachep = KMEM_CACHE(flow_offload, SLAB_HWCACHE_ALIGN); 823 if (!flow_offload_cachep) 824 return -ENOMEM; 825 826 ret = register_pernet_subsys(&nf_flow_table_net_ops); 827 if (ret < 0) 828 goto out_pernet; 829 830 ret = nf_flow_table_offload_init(); 831 if (ret) 832 goto out_offload; 833 834 ret = nf_flow_register_bpf(); 835 if (ret) 836 goto out_bpf; 837 838 return 0; 839 840 out_bpf: 841 nf_flow_table_offload_exit(); 842 out_offload: 843 unregister_pernet_subsys(&nf_flow_table_net_ops); 844 out_pernet: 845 kmem_cache_destroy(flow_offload_cachep); 846 return ret; 847 } 848 849 static void __exit nf_flow_table_module_exit(void) 850 { 851 nf_flow_table_offload_exit(); 852 unregister_pernet_subsys(&nf_flow_table_net_ops); 853 kmem_cache_destroy(flow_offload_cachep); 854 } 855 856 module_init(nf_flow_table_module_init); 857 module_exit(nf_flow_table_module_exit); 858 859 MODULE_LICENSE("GPL"); 860 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); 861 MODULE_DESCRIPTION("Netfilter flow table module"); 862