1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/kernel.h> 3 #include <linux/init.h> 4 #include <linux/module.h> 5 #include <linux/netfilter.h> 6 #include <linux/rhashtable.h> 7 #include <linux/netdevice.h> 8 #include <net/ip.h> 9 #include <net/ip6_route.h> 10 #include <net/netfilter/nf_tables.h> 11 #include <net/netfilter/nf_flow_table.h> 12 #include <net/netfilter/nf_conntrack.h> 13 #include <net/netfilter/nf_conntrack_core.h> 14 #include <net/netfilter/nf_conntrack_l4proto.h> 15 #include <net/netfilter/nf_conntrack_tuple.h> 16 17 static DEFINE_MUTEX(flowtable_lock); 18 static LIST_HEAD(flowtables); 19 20 static void 21 flow_offload_fill_dir(struct flow_offload *flow, 22 enum flow_offload_tuple_dir dir) 23 { 24 struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple; 25 struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple; 26 27 ft->dir = dir; 28 29 switch (ctt->src.l3num) { 30 case NFPROTO_IPV4: 31 ft->src_v4 = ctt->src.u3.in; 32 ft->dst_v4 = ctt->dst.u3.in; 33 break; 34 case NFPROTO_IPV6: 35 ft->src_v6 = ctt->src.u3.in6; 36 ft->dst_v6 = ctt->dst.u3.in6; 37 break; 38 } 39 40 ft->l3proto = ctt->src.l3num; 41 ft->l4proto = ctt->dst.protonum; 42 43 switch (ctt->dst.protonum) { 44 case IPPROTO_TCP: 45 case IPPROTO_UDP: 46 ft->src_port = ctt->src.u.tcp.port; 47 ft->dst_port = ctt->dst.u.tcp.port; 48 break; 49 } 50 } 51 52 struct flow_offload *flow_offload_alloc(struct nf_conn *ct) 53 { 54 struct flow_offload *flow; 55 56 if (unlikely(nf_ct_is_dying(ct))) 57 return NULL; 58 59 flow = kzalloc(sizeof(*flow), GFP_ATOMIC); 60 if (!flow) 61 return NULL; 62 63 refcount_inc(&ct->ct_general.use); 64 flow->ct = ct; 65 66 flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL); 67 flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY); 68 69 if (ct->status & IPS_SRC_NAT) 70 __set_bit(NF_FLOW_SNAT, &flow->flags); 71 if (ct->status & IPS_DST_NAT) 72 __set_bit(NF_FLOW_DNAT, &flow->flags); 73 74 return flow; 75 } 76 EXPORT_SYMBOL_GPL(flow_offload_alloc); 77 78 static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) 79 { 80 if (flow_tuple->l3proto == NFPROTO_IPV6) 81 return rt6_get_cookie(dst_rt6_info(flow_tuple->dst_cache)); 82 83 return 0; 84 } 85 86 static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route, 87 enum flow_offload_tuple_dir dir) 88 { 89 struct dst_entry *dst = route->tuple[dir].dst; 90 91 route->tuple[dir].dst = NULL; 92 93 return dst; 94 } 95 96 static int flow_offload_fill_route(struct flow_offload *flow, 97 struct nf_flow_route *route, 98 enum flow_offload_tuple_dir dir) 99 { 100 struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple; 101 struct dst_entry *dst = nft_route_dst_fetch(route, dir); 102 int i, j = 0; 103 104 switch (flow_tuple->l3proto) { 105 case NFPROTO_IPV4: 106 flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true); 107 break; 108 case NFPROTO_IPV6: 109 flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true); 110 break; 111 } 112 113 flow_tuple->iifidx = route->tuple[dir].in.ifindex; 114 for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) { 115 flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id; 116 flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto; 117 if (route->tuple[dir].in.ingress_vlans & BIT(i)) 118 flow_tuple->in_vlan_ingress |= BIT(j); 119 j++; 120 } 121 122 flow_tuple->tun = route->tuple[dir].in.tun; 123 flow_tuple->encap_num = route->tuple[dir].in.num_encaps; 124 flow_tuple->tun_num = route->tuple[dir].in.num_tuns; 125 126 switch (route->tuple[dir].xmit_type) { 127 case FLOW_OFFLOAD_XMIT_DIRECT: 128 memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest, 129 ETH_ALEN); 130 memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source, 131 ETH_ALEN); 132 flow_tuple->out.ifidx = route->tuple[dir].out.ifindex; 133 dst_release(dst); 134 break; 135 case FLOW_OFFLOAD_XMIT_XFRM: 136 case FLOW_OFFLOAD_XMIT_NEIGH: 137 flow_tuple->ifidx = route->tuple[dir].out.ifindex; 138 flow_tuple->dst_cache = dst; 139 flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple); 140 break; 141 default: 142 WARN_ON_ONCE(1); 143 break; 144 } 145 flow_tuple->xmit_type = route->tuple[dir].xmit_type; 146 147 return 0; 148 } 149 150 static void nft_flow_dst_release(struct flow_offload *flow, 151 enum flow_offload_tuple_dir dir) 152 { 153 if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH || 154 flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) 155 dst_release(flow->tuplehash[dir].tuple.dst_cache); 156 } 157 158 void flow_offload_route_init(struct flow_offload *flow, 159 struct nf_flow_route *route) 160 { 161 flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); 162 flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); 163 flow->type = NF_FLOW_OFFLOAD_ROUTE; 164 } 165 EXPORT_SYMBOL_GPL(flow_offload_route_init); 166 167 static inline bool nf_flow_has_expired(const struct flow_offload *flow) 168 { 169 return nf_flow_timeout_delta(flow->timeout) <= 0; 170 } 171 172 static void flow_offload_fixup_tcp(struct nf_conn *ct, u8 tcp_state) 173 { 174 struct ip_ct_tcp *tcp = &ct->proto.tcp; 175 176 spin_lock_bh(&ct->lock); 177 if (tcp->state != tcp_state) 178 tcp->state = tcp_state; 179 180 /* syn packet triggers the TCP reopen case from conntrack. */ 181 if (tcp->state == TCP_CONNTRACK_CLOSE) 182 ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; 183 184 /* Conntrack state is outdated due to offload bypass. 185 * Clear IP_CT_TCP_FLAG_MAXACK_SET, otherwise conntracks 186 * TCP reset validation will fail. 187 */ 188 tcp->seen[0].td_maxwin = 0; 189 tcp->seen[0].flags &= ~IP_CT_TCP_FLAG_MAXACK_SET; 190 tcp->seen[1].td_maxwin = 0; 191 tcp->seen[1].flags &= ~IP_CT_TCP_FLAG_MAXACK_SET; 192 spin_unlock_bh(&ct->lock); 193 } 194 195 static void flow_offload_fixup_ct(struct flow_offload *flow) 196 { 197 struct nf_conn *ct = flow->ct; 198 struct net *net = nf_ct_net(ct); 199 int l4num = nf_ct_protonum(ct); 200 bool expired, closing = false; 201 u32 offload_timeout = 0; 202 s32 timeout; 203 204 if (l4num == IPPROTO_TCP) { 205 const struct nf_tcp_net *tn = nf_tcp_pernet(net); 206 u8 tcp_state; 207 208 /* Enter CLOSE state if fin/rst packet has been seen, this 209 * allows TCP reopen from conntrack. Otherwise, pick up from 210 * the last seen TCP state. 211 */ 212 closing = test_bit(NF_FLOW_CLOSING, &flow->flags); 213 if (closing) { 214 flow_offload_fixup_tcp(ct, TCP_CONNTRACK_CLOSE); 215 timeout = READ_ONCE(tn->timeouts[TCP_CONNTRACK_CLOSE]); 216 expired = false; 217 } else { 218 tcp_state = READ_ONCE(ct->proto.tcp.state); 219 flow_offload_fixup_tcp(ct, tcp_state); 220 timeout = READ_ONCE(tn->timeouts[tcp_state]); 221 expired = nf_flow_has_expired(flow); 222 } 223 offload_timeout = READ_ONCE(tn->offload_timeout); 224 225 } else if (l4num == IPPROTO_UDP) { 226 const struct nf_udp_net *tn = nf_udp_pernet(net); 227 enum udp_conntrack state = 228 test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ? 229 UDP_CT_REPLIED : UDP_CT_UNREPLIED; 230 231 timeout = READ_ONCE(tn->timeouts[state]); 232 expired = nf_flow_has_expired(flow); 233 offload_timeout = READ_ONCE(tn->offload_timeout); 234 } else { 235 return; 236 } 237 238 if (expired) 239 timeout -= offload_timeout; 240 241 if (timeout < 0) 242 timeout = 0; 243 244 if (closing || 245 nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout) 246 nf_ct_refresh(ct, timeout); 247 } 248 249 static void flow_offload_route_release(struct flow_offload *flow) 250 { 251 nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL); 252 nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY); 253 } 254 255 void flow_offload_free(struct flow_offload *flow) 256 { 257 switch (flow->type) { 258 case NF_FLOW_OFFLOAD_ROUTE: 259 flow_offload_route_release(flow); 260 break; 261 default: 262 break; 263 } 264 nf_ct_put(flow->ct); 265 kfree_rcu(flow, rcu_head); 266 } 267 EXPORT_SYMBOL_GPL(flow_offload_free); 268 269 static u32 flow_offload_hash(const void *data, u32 len, u32 seed) 270 { 271 const struct flow_offload_tuple *tuple = data; 272 273 return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed); 274 } 275 276 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) 277 { 278 const struct flow_offload_tuple_rhash *tuplehash = data; 279 280 return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed); 281 } 282 283 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, 284 const void *ptr) 285 { 286 const struct flow_offload_tuple *tuple = arg->key; 287 const struct flow_offload_tuple_rhash *x = ptr; 288 289 if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash))) 290 return 1; 291 292 return 0; 293 } 294 295 static const struct rhashtable_params nf_flow_offload_rhash_params = { 296 .head_offset = offsetof(struct flow_offload_tuple_rhash, node), 297 .hashfn = flow_offload_hash, 298 .obj_hashfn = flow_offload_hash_obj, 299 .obj_cmpfn = flow_offload_hash_cmp, 300 .automatic_shrinking = true, 301 }; 302 303 unsigned long flow_offload_get_timeout(struct flow_offload *flow) 304 { 305 unsigned long timeout = NF_FLOW_TIMEOUT; 306 struct net *net = nf_ct_net(flow->ct); 307 int l4num = nf_ct_protonum(flow->ct); 308 309 if (l4num == IPPROTO_TCP) { 310 struct nf_tcp_net *tn = nf_tcp_pernet(net); 311 312 timeout = tn->offload_timeout; 313 } else if (l4num == IPPROTO_UDP) { 314 struct nf_udp_net *tn = nf_udp_pernet(net); 315 316 timeout = tn->offload_timeout; 317 } 318 319 return timeout; 320 } 321 322 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) 323 { 324 int err; 325 326 flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); 327 328 err = rhashtable_insert_fast(&flow_table->rhashtable, 329 &flow->tuplehash[0].node, 330 nf_flow_offload_rhash_params); 331 if (err < 0) 332 return err; 333 334 err = rhashtable_insert_fast(&flow_table->rhashtable, 335 &flow->tuplehash[1].node, 336 nf_flow_offload_rhash_params); 337 if (err < 0) { 338 rhashtable_remove_fast(&flow_table->rhashtable, 339 &flow->tuplehash[0].node, 340 nf_flow_offload_rhash_params); 341 return err; 342 } 343 344 nf_ct_refresh(flow->ct, NF_CT_DAY); 345 346 if (nf_flowtable_hw_offload(flow_table)) { 347 __set_bit(NF_FLOW_HW, &flow->flags); 348 nf_flow_offload_add(flow_table, flow); 349 } 350 351 return 0; 352 } 353 EXPORT_SYMBOL_GPL(flow_offload_add); 354 355 void flow_offload_refresh(struct nf_flowtable *flow_table, 356 struct flow_offload *flow, bool force) 357 { 358 u32 timeout; 359 360 timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); 361 if (force || timeout - READ_ONCE(flow->timeout) > HZ) 362 WRITE_ONCE(flow->timeout, timeout); 363 else 364 return; 365 366 if (likely(!nf_flowtable_hw_offload(flow_table)) || 367 test_bit(NF_FLOW_CLOSING, &flow->flags)) 368 return; 369 370 nf_flow_offload_add(flow_table, flow); 371 } 372 EXPORT_SYMBOL_GPL(flow_offload_refresh); 373 374 static void flow_offload_del(struct nf_flowtable *flow_table, 375 struct flow_offload *flow) 376 { 377 rhashtable_remove_fast(&flow_table->rhashtable, 378 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, 379 nf_flow_offload_rhash_params); 380 rhashtable_remove_fast(&flow_table->rhashtable, 381 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, 382 nf_flow_offload_rhash_params); 383 flow_offload_free(flow); 384 } 385 386 void flow_offload_teardown(struct flow_offload *flow) 387 { 388 clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); 389 if (!test_and_set_bit(NF_FLOW_TEARDOWN, &flow->flags)) 390 flow_offload_fixup_ct(flow); 391 } 392 EXPORT_SYMBOL_GPL(flow_offload_teardown); 393 394 struct flow_offload_tuple_rhash * 395 flow_offload_lookup(struct nf_flowtable *flow_table, 396 struct flow_offload_tuple *tuple) 397 { 398 struct flow_offload_tuple_rhash *tuplehash; 399 struct flow_offload *flow; 400 int dir; 401 402 tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple, 403 nf_flow_offload_rhash_params); 404 if (!tuplehash) 405 return NULL; 406 407 dir = tuplehash->tuple.dir; 408 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 409 if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) 410 return NULL; 411 412 if (unlikely(nf_ct_is_dying(flow->ct))) 413 return NULL; 414 415 return tuplehash; 416 } 417 EXPORT_SYMBOL_GPL(flow_offload_lookup); 418 419 static int 420 nf_flow_table_iterate(struct nf_flowtable *flow_table, 421 void (*iter)(struct nf_flowtable *flowtable, 422 struct flow_offload *flow, void *data), 423 void *data) 424 { 425 struct flow_offload_tuple_rhash *tuplehash; 426 struct rhashtable_iter hti; 427 struct flow_offload *flow; 428 int err = 0; 429 430 rhashtable_walk_enter(&flow_table->rhashtable, &hti); 431 rhashtable_walk_start(&hti); 432 433 while ((tuplehash = rhashtable_walk_next(&hti))) { 434 if (IS_ERR(tuplehash)) { 435 if (PTR_ERR(tuplehash) != -EAGAIN) { 436 err = PTR_ERR(tuplehash); 437 break; 438 } 439 continue; 440 } 441 if (tuplehash->tuple.dir) 442 continue; 443 444 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); 445 446 iter(flow_table, flow, data); 447 } 448 rhashtable_walk_stop(&hti); 449 rhashtable_walk_exit(&hti); 450 451 return err; 452 } 453 454 static bool nf_flow_custom_gc(struct nf_flowtable *flow_table, 455 const struct flow_offload *flow) 456 { 457 return flow_table->type->gc && flow_table->type->gc(flow); 458 } 459 460 /** 461 * nf_flow_table_tcp_timeout() - new timeout of offloaded tcp entry 462 * @ct: Flowtable offloaded tcp ct 463 * 464 * Return: number of seconds when ct entry should expire. 465 */ 466 static u32 nf_flow_table_tcp_timeout(const struct nf_conn *ct) 467 { 468 u8 state = READ_ONCE(ct->proto.tcp.state); 469 470 switch (state) { 471 case TCP_CONNTRACK_SYN_SENT: 472 case TCP_CONNTRACK_SYN_RECV: 473 return 0; 474 case TCP_CONNTRACK_ESTABLISHED: 475 return NF_CT_DAY; 476 case TCP_CONNTRACK_FIN_WAIT: 477 case TCP_CONNTRACK_CLOSE_WAIT: 478 case TCP_CONNTRACK_LAST_ACK: 479 case TCP_CONNTRACK_TIME_WAIT: 480 return 5 * 60 * HZ; 481 case TCP_CONNTRACK_CLOSE: 482 return 0; 483 } 484 485 return 0; 486 } 487 488 /** 489 * nf_flow_table_extend_ct_timeout() - Extend ct timeout of offloaded conntrack entry 490 * @ct: Flowtable offloaded ct 491 * 492 * Datapath lookups in the conntrack table will evict nf_conn entries 493 * if they have expired. 494 * 495 * Once nf_conn entries have been offloaded, nf_conntrack might not see any 496 * packets anymore. Thus ct->timeout is no longer refreshed and ct can 497 * be evicted. 498 * 499 * To avoid the need for an additional check on the offload bit for every 500 * packet processed via nf_conntrack_in(), set an arbitrary timeout large 501 * enough not to ever expire, this save us a check for the IPS_OFFLOAD_BIT 502 * from the packet path via nf_ct_is_expired(). 503 */ 504 static void nf_flow_table_extend_ct_timeout(struct nf_conn *ct) 505 { 506 static const u32 min_timeout = 5 * 60 * HZ; 507 u32 expires = nf_ct_expires(ct); 508 509 /* normal case: large enough timeout, nothing to do. */ 510 if (likely(expires >= min_timeout)) 511 return; 512 513 /* must check offload bit after this, we do not hold any locks. 514 * flowtable and ct entries could have been removed on another CPU. 515 */ 516 if (!refcount_inc_not_zero(&ct->ct_general.use)) 517 return; 518 519 /* load ct->status after refcount increase */ 520 smp_acquire__after_ctrl_dep(); 521 522 if (nf_ct_is_confirmed(ct) && 523 test_bit(IPS_OFFLOAD_BIT, &ct->status)) { 524 u8 l4proto = nf_ct_protonum(ct); 525 u32 new_timeout = true; 526 527 switch (l4proto) { 528 case IPPROTO_UDP: 529 new_timeout = NF_CT_DAY; 530 break; 531 case IPPROTO_TCP: 532 new_timeout = nf_flow_table_tcp_timeout(ct); 533 break; 534 default: 535 WARN_ON_ONCE(1); 536 break; 537 } 538 539 /* Update to ct->timeout from nf_conntrack happens 540 * without holding ct->lock. 541 * 542 * Use cmpxchg to ensure timeout extension doesn't 543 * happen when we race with conntrack datapath. 544 * 545 * The inverse -- datapath updating ->timeout right 546 * after this -- is fine, datapath is authoritative. 547 */ 548 if (new_timeout) { 549 new_timeout += nfct_time_stamp; 550 cmpxchg(&ct->timeout, expires, new_timeout); 551 } 552 } 553 554 nf_ct_put(ct); 555 } 556 557 static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, 558 struct flow_offload *flow, void *data) 559 { 560 bool teardown = test_bit(NF_FLOW_TEARDOWN, &flow->flags); 561 562 if (nf_flow_has_expired(flow) || 563 nf_ct_is_dying(flow->ct) || 564 nf_flow_custom_gc(flow_table, flow)) { 565 flow_offload_teardown(flow); 566 teardown = true; 567 } else if (!teardown) { 568 nf_flow_table_extend_ct_timeout(flow->ct); 569 } 570 571 if (teardown) { 572 if (test_bit(NF_FLOW_HW, &flow->flags)) { 573 if (!test_bit(NF_FLOW_HW_DYING, &flow->flags)) 574 nf_flow_offload_del(flow_table, flow); 575 else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags)) 576 flow_offload_del(flow_table, flow); 577 } else { 578 flow_offload_del(flow_table, flow); 579 } 580 } else if (test_bit(NF_FLOW_CLOSING, &flow->flags) && 581 test_bit(NF_FLOW_HW, &flow->flags) && 582 !test_bit(NF_FLOW_HW_DYING, &flow->flags)) { 583 nf_flow_offload_del(flow_table, flow); 584 } else if (test_bit(NF_FLOW_HW, &flow->flags)) { 585 nf_flow_offload_stats(flow_table, flow); 586 } 587 } 588 589 void nf_flow_table_gc_run(struct nf_flowtable *flow_table) 590 { 591 nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); 592 } 593 594 static void nf_flow_offload_work_gc(struct work_struct *work) 595 { 596 struct nf_flowtable *flow_table; 597 598 flow_table = container_of(work, struct nf_flowtable, gc_work.work); 599 nf_flow_table_gc_run(flow_table); 600 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); 601 } 602 603 static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, 604 __be16 port, __be16 new_port) 605 { 606 struct tcphdr *tcph; 607 608 tcph = (void *)(skb_network_header(skb) + thoff); 609 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false); 610 } 611 612 static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, 613 __be16 port, __be16 new_port) 614 { 615 struct udphdr *udph; 616 617 udph = (void *)(skb_network_header(skb) + thoff); 618 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 619 inet_proto_csum_replace2(&udph->check, skb, port, 620 new_port, false); 621 if (!udph->check) 622 udph->check = CSUM_MANGLED_0; 623 } 624 } 625 626 static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, 627 u8 protocol, __be16 port, __be16 new_port) 628 { 629 switch (protocol) { 630 case IPPROTO_TCP: 631 nf_flow_nat_port_tcp(skb, thoff, port, new_port); 632 break; 633 case IPPROTO_UDP: 634 nf_flow_nat_port_udp(skb, thoff, port, new_port); 635 break; 636 } 637 } 638 639 void nf_flow_snat_port(const struct flow_offload *flow, 640 struct sk_buff *skb, unsigned int thoff, 641 u8 protocol, enum flow_offload_tuple_dir dir) 642 { 643 struct flow_ports *hdr; 644 __be16 port, new_port; 645 646 hdr = (void *)(skb_network_header(skb) + thoff); 647 648 switch (dir) { 649 case FLOW_OFFLOAD_DIR_ORIGINAL: 650 port = hdr->source; 651 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port; 652 hdr->source = new_port; 653 break; 654 case FLOW_OFFLOAD_DIR_REPLY: 655 port = hdr->dest; 656 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; 657 hdr->dest = new_port; 658 break; 659 } 660 661 nf_flow_nat_port(skb, thoff, protocol, port, new_port); 662 } 663 EXPORT_SYMBOL_GPL(nf_flow_snat_port); 664 665 void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb, 666 unsigned int thoff, u8 protocol, 667 enum flow_offload_tuple_dir dir) 668 { 669 struct flow_ports *hdr; 670 __be16 port, new_port; 671 672 hdr = (void *)(skb_network_header(skb) + thoff); 673 674 switch (dir) { 675 case FLOW_OFFLOAD_DIR_ORIGINAL: 676 port = hdr->dest; 677 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port; 678 hdr->dest = new_port; 679 break; 680 case FLOW_OFFLOAD_DIR_REPLY: 681 port = hdr->source; 682 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port; 683 hdr->source = new_port; 684 break; 685 } 686 687 nf_flow_nat_port(skb, thoff, protocol, port, new_port); 688 } 689 EXPORT_SYMBOL_GPL(nf_flow_dnat_port); 690 691 int nf_flow_table_init(struct nf_flowtable *flowtable) 692 { 693 int err; 694 695 INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); 696 flow_block_init(&flowtable->flow_block); 697 init_rwsem(&flowtable->flow_block_lock); 698 699 err = rhashtable_init(&flowtable->rhashtable, 700 &nf_flow_offload_rhash_params); 701 if (err < 0) 702 return err; 703 704 queue_delayed_work(system_power_efficient_wq, 705 &flowtable->gc_work, HZ); 706 707 mutex_lock(&flowtable_lock); 708 list_add(&flowtable->list, &flowtables); 709 mutex_unlock(&flowtable_lock); 710 711 return 0; 712 } 713 EXPORT_SYMBOL_GPL(nf_flow_table_init); 714 715 static void nf_flow_table_do_cleanup(struct nf_flowtable *flow_table, 716 struct flow_offload *flow, void *data) 717 { 718 struct net_device *dev = data; 719 720 if (!dev) { 721 flow_offload_teardown(flow); 722 return; 723 } 724 725 if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) && 726 (flow->tuplehash[0].tuple.iifidx == dev->ifindex || 727 flow->tuplehash[1].tuple.iifidx == dev->ifindex)) 728 flow_offload_teardown(flow); 729 } 730 731 void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable, 732 struct net_device *dev) 733 { 734 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); 735 flush_delayed_work(&flowtable->gc_work); 736 nf_flow_table_offload_flush(flowtable); 737 } 738 739 void nf_flow_table_cleanup(struct net_device *dev) 740 { 741 struct nf_flowtable *flowtable; 742 743 mutex_lock(&flowtable_lock); 744 list_for_each_entry(flowtable, &flowtables, list) 745 nf_flow_table_gc_cleanup(flowtable, dev); 746 mutex_unlock(&flowtable_lock); 747 } 748 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); 749 750 void nf_flow_table_free(struct nf_flowtable *flow_table) 751 { 752 mutex_lock(&flowtable_lock); 753 list_del(&flow_table->list); 754 mutex_unlock(&flowtable_lock); 755 756 cancel_delayed_work_sync(&flow_table->gc_work); 757 nf_flow_table_offload_flush(flow_table); 758 /* ... no more pending work after this stage ... */ 759 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); 760 nf_flow_table_gc_run(flow_table); 761 nf_flow_table_offload_flush_cleanup(flow_table); 762 rhashtable_destroy(&flow_table->rhashtable); 763 } 764 EXPORT_SYMBOL_GPL(nf_flow_table_free); 765 766 static int nf_flow_table_init_net(struct net *net) 767 { 768 net->ft.stat = alloc_percpu(struct nf_flow_table_stat); 769 return net->ft.stat ? 0 : -ENOMEM; 770 } 771 772 static void nf_flow_table_fini_net(struct net *net) 773 { 774 free_percpu(net->ft.stat); 775 } 776 777 static int nf_flow_table_pernet_init(struct net *net) 778 { 779 int ret; 780 781 ret = nf_flow_table_init_net(net); 782 if (ret < 0) 783 return ret; 784 785 ret = nf_flow_table_init_proc(net); 786 if (ret < 0) 787 goto out_proc; 788 789 return 0; 790 791 out_proc: 792 nf_flow_table_fini_net(net); 793 return ret; 794 } 795 796 static void nf_flow_table_pernet_exit(struct list_head *net_exit_list) 797 { 798 struct net *net; 799 800 list_for_each_entry(net, net_exit_list, exit_list) { 801 nf_flow_table_fini_proc(net); 802 nf_flow_table_fini_net(net); 803 } 804 } 805 806 static struct pernet_operations nf_flow_table_net_ops = { 807 .init = nf_flow_table_pernet_init, 808 .exit_batch = nf_flow_table_pernet_exit, 809 }; 810 811 static int __init nf_flow_table_module_init(void) 812 { 813 int ret; 814 815 ret = register_pernet_subsys(&nf_flow_table_net_ops); 816 if (ret < 0) 817 return ret; 818 819 ret = nf_flow_table_offload_init(); 820 if (ret) 821 goto out_offload; 822 823 ret = nf_flow_register_bpf(); 824 if (ret) 825 goto out_bpf; 826 827 return 0; 828 829 out_bpf: 830 nf_flow_table_offload_exit(); 831 out_offload: 832 unregister_pernet_subsys(&nf_flow_table_net_ops); 833 return ret; 834 } 835 836 static void __exit nf_flow_table_module_exit(void) 837 { 838 nf_flow_table_offload_exit(); 839 unregister_pernet_subsys(&nf_flow_table_net_ops); 840 } 841 842 module_init(nf_flow_table_module_init); 843 module_exit(nf_flow_table_module_exit); 844 845 MODULE_LICENSE("GPL"); 846 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); 847 MODULE_DESCRIPTION("Netfilter flow table module"); 848