1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/kernel.h> 3 #include <linux/init.h> 4 #include <linux/module.h> 5 #include <linux/netfilter.h> 6 #include <linux/rhashtable.h> 7 #include <linux/netdevice.h> 8 #include <net/ip.h> 9 #include <net/ip6_route.h> 10 #include <net/netfilter/nf_tables.h> 11 #include <net/netfilter/nf_flow_table.h> 12 #include <net/netfilter/nf_conntrack.h> 13 #include <net/netfilter/nf_conntrack_core.h> 14 #include <net/netfilter/nf_conntrack_l4proto.h> 15 #include <net/netfilter/nf_conntrack_tuple.h> 16 17 static DEFINE_MUTEX(flowtable_lock); 18 static LIST_HEAD(flowtables); 19 static __read_mostly struct kmem_cache *flow_offload_cachep; 20 21 static void 22 flow_offload_fill_dir(struct flow_offload *flow, 23 enum flow_offload_tuple_dir dir) 24 { 25 struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple; 26 struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple; 27 28 ft->dir = dir; 29 30 switch (ctt->src.l3num) { 31 case NFPROTO_IPV4: 32 ft->src_v4 = ctt->src.u3.in; 33 ft->dst_v4 = ctt->dst.u3.in; 34 break; 35 case NFPROTO_IPV6: 36 ft->src_v6 = ctt->src.u3.in6; 37 ft->dst_v6 = ctt->dst.u3.in6; 38 break; 39 } 40 41 ft->l3proto = ctt->src.l3num; 42 ft->l4proto = ctt->dst.protonum; 43 44 switch (ctt->dst.protonum) { 45 case IPPROTO_TCP: 46 case IPPROTO_UDP: 47 ft->src_port = ctt->src.u.tcp.port; 48 ft->dst_port = ctt->dst.u.tcp.port; 49 break; 50 } 51 } 52 53 struct flow_offload *flow_offload_alloc(struct nf_conn *ct) 54 { 55 struct flow_offload *flow; 56 57 if (unlikely(nf_ct_is_dying(ct))) 58 return NULL; 59 60 flow = kmem_cache_zalloc(flow_offload_cachep, GFP_ATOMIC); 61 if (!flow) 62 return NULL; 63 64 refcount_inc(&ct->ct_general.use); 65 flow->ct = ct; 66 67 flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL); 68 flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY); 69 70 if (ct->status & IPS_SRC_NAT) 71 __set_bit(NF_FLOW_SNAT, &flow->flags); 72 if (ct->status & IPS_DST_NAT) 73 __set_bit(NF_FLOW_DNAT, &flow->flags); 74 75 return flow; 76 } 77 EXPORT_SYMBOL_GPL(flow_offload_alloc); 78 79 static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) 80 { 81 if (flow_tuple->l3proto == NFPROTO_IPV6) 82 return rt6_get_cookie(dst_rt6_info(flow_tuple->dst_cache)); 83 84 return 0; 85 } 86 87 static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route, 88 enum flow_offload_tuple_dir dir) 89 { 90 struct dst_entry *dst = route->tuple[dir].dst; 91 92 route->tuple[dir].dst = NULL; 93 94 return dst; 95 } 96 97 static int flow_offload_fill_route(struct flow_offload *flow, 98 struct nf_flow_route *route, 99 enum flow_offload_tuple_dir dir) 100 { 101 struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple; 102 struct dst_entry *dst = nft_route_dst_fetch(route, dir); 103 int i, j = 0; 104 105 switch (flow_tuple->l3proto) { 106 case NFPROTO_IPV4: 107 flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true); 108 break; 109 case NFPROTO_IPV6: 110 flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true); 111 break; 112 } 113 114 flow_tuple->iifidx = route->tuple[dir].in.ifindex; 115 for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) { 116 flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id; 117 flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto; 118 if (route->tuple[dir].in.ingress_vlans & BIT(i)) 119 flow_tuple->in_vlan_ingress |= BIT(j); 120 j++; 121 } 122 123 flow_tuple->tun = route->tuple[dir].in.tun; 124 flow_tuple->encap_num = route->tuple[dir].in.num_encaps; 125 flow_tuple->tun_num = route->tuple[dir].in.num_tuns; 126 127 switch (route->tuple[dir].xmit_type) { 128 case FLOW_OFFLOAD_XMIT_DIRECT: 129 memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest, 130 ETH_ALEN); 131 memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source, 132 ETH_ALEN); 133 flow_tuple->out.ifidx = route->tuple[dir].out.ifindex; 134 dst_release(dst); 135 break; 136 case FLOW_OFFLOAD_XMIT_XFRM: 137 case FLOW_OFFLOAD_XMIT_NEIGH: 138 flow_tuple->ifidx = route->tuple[dir].out.ifindex; 139 flow_tuple->dst_cache = dst; 140 flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple); 141 break; 142 default: 143 WARN_ON_ONCE(1); 144 break; 145 } 146 flow_tuple->xmit_type = route->tuple[dir].xmit_type; 147 148 return 0; 149 } 150 151 static void nft_flow_dst_release(struct flow_offload *flow, 152 enum flow_offload_tuple_dir dir) 153 { 154 if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH || 155 flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) 156 dst_release(flow->tuplehash[dir].tuple.dst_cache); 157 } 158 159 void flow_offload_route_init(struct flow_offload *flow, 160 struct nf_flow_route *route) 161 { 162 flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); 163 flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); 164 flow->type = NF_FLOW_OFFLOAD_ROUTE; 165 } 166 EXPORT_SYMBOL_GPL(flow_offload_route_init); 167 168 static inline bool nf_flow_has_expired(const struct flow_offload *flow) 169 { 170 return nf_flow_timeout_delta(flow->timeout) <= 0; 171 } 172 173 static void flow_offload_fixup_tcp(struct nf_conn *ct, u8 tcp_state) 174 { 175 struct ip_ct_tcp *tcp = &ct->proto.tcp; 176 177 spin_lock_bh(&ct->lock); 178 if (tcp->state != tcp_state) 179 tcp->state = tcp_state; 180 181 /* syn packet triggers the TCP reopen case from conntrack. */ 182 if (tcp->state == TCP_CONNTRACK_CLOSE) 183 ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; 184 185 /* Conntrack state is outdated due to offload bypass. 186 * Clear IP_CT_TCP_FLAG_MAXACK_SET, otherwise conntracks 187 * TCP reset validation will fail. 188 */ 189 tcp->seen[0].td_maxwin = 0; 190 tcp->seen[0].flags &= ~IP_CT_TCP_FLAG_MAXACK_SET; 191 tcp->seen[1].td_maxwin = 0; 192 tcp->seen[1].flags &= ~IP_CT_TCP_FLAG_MAXACK_SET; 193 spin_unlock_bh(&ct->lock); 194 } 195 196 static void flow_offload_fixup_ct(struct flow_offload *flow) 197 { 198 struct nf_conn *ct = flow->ct; 199 struct net *net = nf_ct_net(ct); 200 int l4num = nf_ct_protonum(ct); 201 bool expired, closing = false; 202 u32 offload_timeout = 0; 203 s32 timeout; 204 205 if (l4num == IPPROTO_TCP) { 206 const struct nf_tcp_net *tn = nf_tcp_pernet(net); 207 u8 tcp_state; 208 209 /* Enter CLOSE state if fin/rst packet has been seen, this 210 * allows TCP reopen from conntrack. Otherwise, pick up from 211 * the last seen TCP state. 212 */ 213 closing = test_bit(NF_FLOW_CLOSING, &flow->flags); 214 if (closing) { 215 flow_offload_fixup_tcp(ct, TCP_CONNTRACK_CLOSE); 216 timeout = READ_ONCE(tn->timeouts[TCP_CONNTRACK_CLOSE]); 217 expired = false; 218 } else { 219 tcp_state = READ_ONCE(ct->proto.tcp.state); 220 flow_offload_fixup_tcp(ct, tcp_state); 221 timeout = READ_ONCE(tn->timeouts[tcp_state]); 222 expired = nf_flow_has_expired(flow); 223 } 224 offload_timeout = READ_ONCE(tn->offload_timeout); 225 226 } else if (l4num == IPPROTO_UDP) { 227 const struct nf_udp_net *tn = nf_udp_pernet(net); 228 enum udp_conntrack state = 229 test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ? 230 UDP_CT_REPLIED : UDP_CT_UNREPLIED; 231 232 timeout = READ_ONCE(tn->timeouts[state]); 233 expired = nf_flow_has_expired(flow); 234 offload_timeout = READ_ONCE(tn->offload_timeout); 235 } else { 236 return; 237 } 238 239 if (expired) 240 timeout -= offload_timeout; 241 242 if (timeout < 0) 243 timeout = 0; 244 245 if (closing || 246 nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout) 247 nf_ct_refresh(ct, timeout); 248 } 249 250 static void flow_offload_route_release(struct flow_offload *flow) 251 { 252 nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL); 253 nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY); 254 } 255 256 void flow_offload_free(struct flow_offload *flow) 257 { 258 switch (flow->type) { 259 case NF_FLOW_OFFLOAD_ROUTE: 260 flow_offload_route_release(flow); 261 break; 262 default: 263 break; 264 } 265 nf_ct_put(flow->ct); 266 kfree_rcu(flow, rcu_head); 267 } 268 EXPORT_SYMBOL_GPL(flow_offload_free); 269 270 static u32 flow_offload_hash(const void *data, u32 len, u32 seed) 271 { 272 const struct flow_offload_tuple *tuple = data; 273 274 return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed); 275 } 276 277 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) 278 { 279 const struct flow_offload_tuple_rhash *tuplehash = data; 280 281 return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed); 282 } 283 284 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, 285 const void *ptr) 286 { 287 const struct flow_offload_tuple *tuple = arg->key; 288 const struct flow_offload_tuple_rhash *x = ptr; 289 290 if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash))) 291 return 1; 292 293 return 0; 294 } 295 296 static const struct rhashtable_params nf_flow_offload_rhash_params = { 297 .head_offset = offsetof(struct flow_offload_tuple_rhash, node), 298 .hashfn = flow_offload_hash, 299 .obj_hashfn = flow_offload_hash_obj, 300 .obj_cmpfn = flow_offload_hash_cmp, 301 .automatic_shrinking = true, 302 }; 303 304 unsigned long flow_offload_get_timeout(struct flow_offload *flow) 305 { 306 unsigned long timeout = NF_FLOW_TIMEOUT; 307 struct net *net = nf_ct_net(flow->ct); 308 int l4num = nf_ct_protonum(flow->ct); 309 310 if (l4num == IPPROTO_TCP) { 311 struct nf_tcp_net *tn = nf_tcp_pernet(net); 312 313 timeout = tn->offload_timeout; 314 } else if (l4num == IPPROTO_UDP) { 315 struct nf_udp_net *tn = nf_udp_pernet(net); 316 317 timeout = tn->offload_timeout; 318 } 319 320 return timeout; 321 } 322 323 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) 324 { 325 int err; 326 327 flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); 328 329 err = rhashtable_insert_fast(&flow_table->rhashtable, 330 &flow->tuplehash[0].node, 331 nf_flow_offload_rhash_params); 332 if (err < 0) 333 return err; 334 335 err = rhashtable_insert_fast(&flow_table->rhashtable, 336 &flow->tuplehash[1].node, 337 nf_flow_offload_rhash_params); 338 if (err < 0) { 339 rhashtable_remove_fast(&flow_table->rhashtable, 340 &flow->tuplehash[0].node, 341 nf_flow_offload_rhash_params); 342 return err; 343 } 344 345 nf_ct_refresh(flow->ct, NF_CT_DAY); 346 347 if (nf_flowtable_hw_offload(flow_table)) { 348 __set_bit(NF_FLOW_HW, &flow->flags); 349 nf_flow_offload_add(flow_table, flow); 350 } 351 352 return 0; 353 } 354 EXPORT_SYMBOL_GPL(flow_offload_add); 355 356 void flow_offload_refresh(struct nf_flowtable *flow_table, 357 struct flow_offload *flow, bool force) 358 { 359 u32 timeout; 360 361 timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); 362 if (force || timeout - READ_ONCE(flow->timeout) > HZ) 363 WRITE_ONCE(flow->timeout, timeout); 364 else 365 return; 366 367 if (likely(!nf_flowtable_hw_offload(flow_table)) || 368 test_bit(NF_FLOW_CLOSING, &flow->flags)) 369 return; 370 371 nf_flow_offload_add(flow_table, flow); 372 } 373 EXPORT_SYMBOL_GPL(flow_offload_refresh); 374 375 static void flow_offload_del(struct nf_flowtable *flow_table, 376 struct flow_offload *flow) 377 { 378 rhashtable_remove_fast(&flow_table->rhashtable, 379 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, 380 nf_flow_offload_rhash_params); 381 rhashtable_remove_fast(&flow_table->rhashtable, 382 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, 383 nf_flow_offload_rhash_params); 384 flow_offload_free(flow); 385 } 386 387 void flow_offload_teardown(struct flow_offload *flow) 388 { 389 clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); 390 if (!test_and_set_bit(NF_FLOW_TEARDOWN, &flow->flags)) 391 flow_offload_fixup_ct(flow); 392 } 393 EXPORT_SYMBOL_GPL(flow_offload_teardown); 394 395 struct flow_offload_tuple_rhash * 396 flow_offload_lookup(struct nf_flowtable *flow_table, 397 struct flow_offload_tuple *tuple) 398 { 399 struct flow_offload_tuple_rhash *tuplehash; 400 struct flow_offload *flow; 401 int dir; 402 403 tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple, 404 nf_flow_offload_rhash_params); 405 if (!tuplehash) 406 return NULL; 407 408 dir = tuplehash->tuple.dir; 409 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 410 if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) 411 return NULL; 412 413 if (unlikely(nf_ct_is_dying(flow->ct))) 414 return NULL; 415 416 return tuplehash; 417 } 418 EXPORT_SYMBOL_GPL(flow_offload_lookup); 419 420 static int 421 nf_flow_table_iterate(struct nf_flowtable *flow_table, 422 void (*iter)(struct nf_flowtable *flowtable, 423 struct flow_offload *flow, void *data), 424 void *data) 425 { 426 struct flow_offload_tuple_rhash *tuplehash; 427 struct rhashtable_iter hti; 428 struct flow_offload *flow; 429 int err = 0; 430 431 rhashtable_walk_enter(&flow_table->rhashtable, &hti); 432 rhashtable_walk_start(&hti); 433 434 while ((tuplehash = rhashtable_walk_next(&hti))) { 435 if (IS_ERR(tuplehash)) { 436 if (PTR_ERR(tuplehash) != -EAGAIN) { 437 err = PTR_ERR(tuplehash); 438 break; 439 } 440 continue; 441 } 442 if (tuplehash->tuple.dir) 443 continue; 444 445 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); 446 447 iter(flow_table, flow, data); 448 } 449 rhashtable_walk_stop(&hti); 450 rhashtable_walk_exit(&hti); 451 452 return err; 453 } 454 455 static bool nf_flow_custom_gc(struct nf_flowtable *flow_table, 456 const struct flow_offload *flow) 457 { 458 return flow_table->type->gc && flow_table->type->gc(flow); 459 } 460 461 /** 462 * nf_flow_table_tcp_timeout() - new timeout of offloaded tcp entry 463 * @ct: Flowtable offloaded tcp ct 464 * 465 * Return: number of seconds when ct entry should expire. 466 */ 467 static u32 nf_flow_table_tcp_timeout(const struct nf_conn *ct) 468 { 469 u8 state = READ_ONCE(ct->proto.tcp.state); 470 471 switch (state) { 472 case TCP_CONNTRACK_SYN_SENT: 473 case TCP_CONNTRACK_SYN_RECV: 474 return 0; 475 case TCP_CONNTRACK_ESTABLISHED: 476 return NF_CT_DAY; 477 case TCP_CONNTRACK_FIN_WAIT: 478 case TCP_CONNTRACK_CLOSE_WAIT: 479 case TCP_CONNTRACK_LAST_ACK: 480 case TCP_CONNTRACK_TIME_WAIT: 481 return 5 * 60 * HZ; 482 case TCP_CONNTRACK_CLOSE: 483 return 0; 484 } 485 486 return 0; 487 } 488 489 /** 490 * nf_flow_table_extend_ct_timeout() - Extend ct timeout of offloaded conntrack entry 491 * @ct: Flowtable offloaded ct 492 * 493 * Datapath lookups in the conntrack table will evict nf_conn entries 494 * if they have expired. 495 * 496 * Once nf_conn entries have been offloaded, nf_conntrack might not see any 497 * packets anymore. Thus ct->timeout is no longer refreshed and ct can 498 * be evicted. 499 * 500 * To avoid the need for an additional check on the offload bit for every 501 * packet processed via nf_conntrack_in(), set an arbitrary timeout large 502 * enough not to ever expire, this save us a check for the IPS_OFFLOAD_BIT 503 * from the packet path via nf_ct_is_expired(). 504 */ 505 static void nf_flow_table_extend_ct_timeout(struct nf_conn *ct) 506 { 507 static const u32 min_timeout = 5 * 60 * HZ; 508 u32 expires = nf_ct_expires(ct); 509 510 /* normal case: large enough timeout, nothing to do. */ 511 if (likely(expires >= min_timeout)) 512 return; 513 514 /* must check offload bit after this, we do not hold any locks. 515 * flowtable and ct entries could have been removed on another CPU. 516 */ 517 if (!refcount_inc_not_zero(&ct->ct_general.use)) 518 return; 519 520 /* load ct->status after refcount increase */ 521 smp_acquire__after_ctrl_dep(); 522 523 if (nf_ct_is_confirmed(ct) && 524 test_bit(IPS_OFFLOAD_BIT, &ct->status)) { 525 u8 l4proto = nf_ct_protonum(ct); 526 u32 new_timeout = true; 527 528 switch (l4proto) { 529 case IPPROTO_UDP: 530 new_timeout = NF_CT_DAY; 531 break; 532 case IPPROTO_TCP: 533 new_timeout = nf_flow_table_tcp_timeout(ct); 534 break; 535 default: 536 WARN_ON_ONCE(1); 537 break; 538 } 539 540 /* Update to ct->timeout from nf_conntrack happens 541 * without holding ct->lock. 542 * 543 * Use cmpxchg to ensure timeout extension doesn't 544 * happen when we race with conntrack datapath. 545 * 546 * The inverse -- datapath updating ->timeout right 547 * after this -- is fine, datapath is authoritative. 548 */ 549 if (new_timeout) { 550 new_timeout += nfct_time_stamp; 551 cmpxchg(&ct->timeout, expires, new_timeout); 552 } 553 } 554 555 nf_ct_put(ct); 556 } 557 558 static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, 559 struct flow_offload *flow, void *data) 560 { 561 bool teardown = test_bit(NF_FLOW_TEARDOWN, &flow->flags); 562 563 if (nf_flow_has_expired(flow) || 564 nf_ct_is_dying(flow->ct) || 565 nf_flow_custom_gc(flow_table, flow)) { 566 flow_offload_teardown(flow); 567 teardown = true; 568 } else if (!teardown) { 569 nf_flow_table_extend_ct_timeout(flow->ct); 570 } 571 572 if (teardown) { 573 if (test_bit(NF_FLOW_HW, &flow->flags)) { 574 if (!test_bit(NF_FLOW_HW_DYING, &flow->flags)) 575 nf_flow_offload_del(flow_table, flow); 576 else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags)) 577 flow_offload_del(flow_table, flow); 578 } else { 579 flow_offload_del(flow_table, flow); 580 } 581 } else if (test_bit(NF_FLOW_CLOSING, &flow->flags) && 582 test_bit(NF_FLOW_HW, &flow->flags) && 583 !test_bit(NF_FLOW_HW_DYING, &flow->flags)) { 584 nf_flow_offload_del(flow_table, flow); 585 } else if (test_bit(NF_FLOW_HW, &flow->flags)) { 586 nf_flow_offload_stats(flow_table, flow); 587 } 588 } 589 590 void nf_flow_table_gc_run(struct nf_flowtable *flow_table) 591 { 592 nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); 593 } 594 595 static void nf_flow_offload_work_gc(struct work_struct *work) 596 { 597 struct nf_flowtable *flow_table; 598 599 flow_table = container_of(work, struct nf_flowtable, gc_work.work); 600 nf_flow_table_gc_run(flow_table); 601 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); 602 } 603 604 static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, 605 __be16 port, __be16 new_port) 606 { 607 struct tcphdr *tcph; 608 609 tcph = (void *)(skb_network_header(skb) + thoff); 610 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false); 611 } 612 613 static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, 614 __be16 port, __be16 new_port) 615 { 616 struct udphdr *udph; 617 618 udph = (void *)(skb_network_header(skb) + thoff); 619 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 620 inet_proto_csum_replace2(&udph->check, skb, port, 621 new_port, false); 622 if (!udph->check) 623 udph->check = CSUM_MANGLED_0; 624 } 625 } 626 627 static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, 628 u8 protocol, __be16 port, __be16 new_port) 629 { 630 switch (protocol) { 631 case IPPROTO_TCP: 632 nf_flow_nat_port_tcp(skb, thoff, port, new_port); 633 break; 634 case IPPROTO_UDP: 635 nf_flow_nat_port_udp(skb, thoff, port, new_port); 636 break; 637 } 638 } 639 640 void nf_flow_snat_port(const struct flow_offload *flow, 641 struct sk_buff *skb, unsigned int thoff, 642 u8 protocol, enum flow_offload_tuple_dir dir) 643 { 644 struct flow_ports *hdr; 645 __be16 port, new_port; 646 647 hdr = (void *)(skb_network_header(skb) + thoff); 648 649 switch (dir) { 650 case FLOW_OFFLOAD_DIR_ORIGINAL: 651 port = hdr->source; 652 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port; 653 hdr->source = new_port; 654 break; 655 case FLOW_OFFLOAD_DIR_REPLY: 656 port = hdr->dest; 657 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; 658 hdr->dest = new_port; 659 break; 660 } 661 662 nf_flow_nat_port(skb, thoff, protocol, port, new_port); 663 } 664 EXPORT_SYMBOL_GPL(nf_flow_snat_port); 665 666 void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb, 667 unsigned int thoff, u8 protocol, 668 enum flow_offload_tuple_dir dir) 669 { 670 struct flow_ports *hdr; 671 __be16 port, new_port; 672 673 hdr = (void *)(skb_network_header(skb) + thoff); 674 675 switch (dir) { 676 case FLOW_OFFLOAD_DIR_ORIGINAL: 677 port = hdr->dest; 678 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port; 679 hdr->dest = new_port; 680 break; 681 case FLOW_OFFLOAD_DIR_REPLY: 682 port = hdr->source; 683 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port; 684 hdr->source = new_port; 685 break; 686 } 687 688 nf_flow_nat_port(skb, thoff, protocol, port, new_port); 689 } 690 EXPORT_SYMBOL_GPL(nf_flow_dnat_port); 691 692 int nf_flow_table_init(struct nf_flowtable *flowtable) 693 { 694 int err; 695 696 INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); 697 flow_block_init(&flowtable->flow_block); 698 init_rwsem(&flowtable->flow_block_lock); 699 700 err = rhashtable_init(&flowtable->rhashtable, 701 &nf_flow_offload_rhash_params); 702 if (err < 0) 703 return err; 704 705 queue_delayed_work(system_power_efficient_wq, 706 &flowtable->gc_work, HZ); 707 708 mutex_lock(&flowtable_lock); 709 list_add(&flowtable->list, &flowtables); 710 mutex_unlock(&flowtable_lock); 711 712 return 0; 713 } 714 EXPORT_SYMBOL_GPL(nf_flow_table_init); 715 716 static void nf_flow_table_do_cleanup(struct nf_flowtable *flow_table, 717 struct flow_offload *flow, void *data) 718 { 719 struct net_device *dev = data; 720 721 if (!dev) { 722 flow_offload_teardown(flow); 723 return; 724 } 725 726 if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) && 727 (flow->tuplehash[0].tuple.iifidx == dev->ifindex || 728 flow->tuplehash[1].tuple.iifidx == dev->ifindex)) 729 flow_offload_teardown(flow); 730 } 731 732 void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable, 733 struct net_device *dev) 734 { 735 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); 736 flush_delayed_work(&flowtable->gc_work); 737 nf_flow_table_offload_flush(flowtable); 738 } 739 740 void nf_flow_table_cleanup(struct net_device *dev) 741 { 742 struct nf_flowtable *flowtable; 743 744 mutex_lock(&flowtable_lock); 745 list_for_each_entry(flowtable, &flowtables, list) 746 nf_flow_table_gc_cleanup(flowtable, dev); 747 mutex_unlock(&flowtable_lock); 748 } 749 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); 750 751 void nf_flow_table_free(struct nf_flowtable *flow_table) 752 { 753 mutex_lock(&flowtable_lock); 754 list_del(&flow_table->list); 755 mutex_unlock(&flowtable_lock); 756 757 cancel_delayed_work_sync(&flow_table->gc_work); 758 nf_flow_table_offload_flush(flow_table); 759 /* ... no more pending work after this stage ... */ 760 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); 761 nf_flow_table_gc_run(flow_table); 762 nf_flow_table_offload_flush_cleanup(flow_table); 763 rhashtable_destroy(&flow_table->rhashtable); 764 } 765 EXPORT_SYMBOL_GPL(nf_flow_table_free); 766 767 static int nf_flow_table_init_net(struct net *net) 768 { 769 net->ft.stat = alloc_percpu(struct nf_flow_table_stat); 770 return net->ft.stat ? 0 : -ENOMEM; 771 } 772 773 static void nf_flow_table_fini_net(struct net *net) 774 { 775 free_percpu(net->ft.stat); 776 } 777 778 static int nf_flow_table_pernet_init(struct net *net) 779 { 780 int ret; 781 782 ret = nf_flow_table_init_net(net); 783 if (ret < 0) 784 return ret; 785 786 ret = nf_flow_table_init_proc(net); 787 if (ret < 0) 788 goto out_proc; 789 790 return 0; 791 792 out_proc: 793 nf_flow_table_fini_net(net); 794 return ret; 795 } 796 797 static void nf_flow_table_pernet_exit(struct list_head *net_exit_list) 798 { 799 struct net *net; 800 801 list_for_each_entry(net, net_exit_list, exit_list) { 802 nf_flow_table_fini_proc(net); 803 nf_flow_table_fini_net(net); 804 } 805 } 806 807 static struct pernet_operations nf_flow_table_net_ops = { 808 .init = nf_flow_table_pernet_init, 809 .exit_batch = nf_flow_table_pernet_exit, 810 }; 811 812 static int __init nf_flow_table_module_init(void) 813 { 814 int ret; 815 816 flow_offload_cachep = KMEM_CACHE(flow_offload, SLAB_HWCACHE_ALIGN); 817 if (!flow_offload_cachep) 818 return -ENOMEM; 819 820 ret = register_pernet_subsys(&nf_flow_table_net_ops); 821 if (ret < 0) 822 goto out_pernet; 823 824 ret = nf_flow_table_offload_init(); 825 if (ret) 826 goto out_offload; 827 828 ret = nf_flow_register_bpf(); 829 if (ret) 830 goto out_bpf; 831 832 return 0; 833 834 out_bpf: 835 nf_flow_table_offload_exit(); 836 out_offload: 837 unregister_pernet_subsys(&nf_flow_table_net_ops); 838 out_pernet: 839 kmem_cache_destroy(flow_offload_cachep); 840 return ret; 841 } 842 843 static void __exit nf_flow_table_module_exit(void) 844 { 845 nf_flow_table_offload_exit(); 846 unregister_pernet_subsys(&nf_flow_table_net_ops); 847 kmem_cache_destroy(flow_offload_cachep); 848 } 849 850 module_init(nf_flow_table_module_init); 851 module_exit(nf_flow_table_module_exit); 852 853 MODULE_LICENSE("GPL"); 854 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); 855 MODULE_DESCRIPTION("Netfilter flow table module"); 856