1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/kernel.h> 3 #include <linux/init.h> 4 #include <linux/module.h> 5 #include <linux/netfilter.h> 6 #include <linux/rhashtable.h> 7 #include <linux/ip.h> 8 #include <linux/ipv6.h> 9 #include <linux/netdevice.h> 10 #include <linux/if_ether.h> 11 #include <net/gso.h> 12 #include <net/ip.h> 13 #include <net/ipv6.h> 14 #include <net/ip6_route.h> 15 #include <net/neighbour.h> 16 #include <net/netfilter/nf_flow_table.h> 17 #include <net/netfilter/nf_conntrack_acct.h> 18 /* For layer 4 checksum field offset. */ 19 #include <linux/tcp.h> 20 #include <linux/udp.h> 21 22 static int nf_flow_state_check(struct flow_offload *flow, int proto, 23 struct sk_buff *skb, unsigned int thoff) 24 { 25 struct tcphdr *tcph; 26 27 if (proto != IPPROTO_TCP) 28 return 0; 29 30 tcph = (void *)(skb_network_header(skb) + thoff); 31 if (tcph->syn && test_bit(NF_FLOW_CLOSING, &flow->flags)) { 32 flow_offload_teardown(flow); 33 return -1; 34 } 35 36 if ((tcph->fin || tcph->rst) && 37 !test_bit(NF_FLOW_CLOSING, &flow->flags)) 38 set_bit(NF_FLOW_CLOSING, &flow->flags); 39 40 return 0; 41 } 42 43 static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, 44 __be32 addr, __be32 new_addr) 45 { 46 struct tcphdr *tcph; 47 48 tcph = (void *)(skb_network_header(skb) + thoff); 49 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); 50 } 51 52 static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, 53 __be32 addr, __be32 new_addr) 54 { 55 struct udphdr *udph; 56 57 udph = (void *)(skb_network_header(skb) + thoff); 58 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 59 inet_proto_csum_replace4(&udph->check, skb, addr, 60 new_addr, true); 61 if (!udph->check) 62 udph->check = CSUM_MANGLED_0; 63 } 64 } 65 66 static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, 67 unsigned int thoff, __be32 addr, 68 __be32 new_addr) 69 { 70 switch (iph->protocol) { 71 case IPPROTO_TCP: 72 nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr); 73 break; 74 case IPPROTO_UDP: 75 nf_flow_nat_ip_udp(skb, thoff, addr, new_addr); 76 break; 77 } 78 } 79 80 static void nf_flow_snat_ip(const struct flow_offload *flow, 81 struct sk_buff *skb, struct iphdr *iph, 82 unsigned int thoff, enum flow_offload_tuple_dir dir) 83 { 84 __be32 addr, new_addr; 85 86 switch (dir) { 87 case FLOW_OFFLOAD_DIR_ORIGINAL: 88 addr = iph->saddr; 89 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; 90 iph->saddr = new_addr; 91 break; 92 case FLOW_OFFLOAD_DIR_REPLY: 93 addr = iph->daddr; 94 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; 95 iph->daddr = new_addr; 96 break; 97 } 98 csum_replace4(&iph->check, addr, new_addr); 99 100 nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); 101 } 102 103 static void nf_flow_dnat_ip(const struct flow_offload *flow, 104 struct sk_buff *skb, struct iphdr *iph, 105 unsigned int thoff, enum flow_offload_tuple_dir dir) 106 { 107 __be32 addr, new_addr; 108 109 switch (dir) { 110 case FLOW_OFFLOAD_DIR_ORIGINAL: 111 addr = iph->daddr; 112 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; 113 iph->daddr = new_addr; 114 break; 115 case FLOW_OFFLOAD_DIR_REPLY: 116 addr = iph->saddr; 117 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; 118 iph->saddr = new_addr; 119 break; 120 } 121 csum_replace4(&iph->check, addr, new_addr); 122 123 nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); 124 } 125 126 static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, 127 unsigned int thoff, enum flow_offload_tuple_dir dir, 128 struct iphdr *iph) 129 { 130 if (test_bit(NF_FLOW_SNAT, &flow->flags)) { 131 nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir); 132 nf_flow_snat_ip(flow, skb, iph, thoff, dir); 133 } 134 if (test_bit(NF_FLOW_DNAT, &flow->flags)) { 135 nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir); 136 nf_flow_dnat_ip(flow, skb, iph, thoff, dir); 137 } 138 } 139 140 static bool ip_has_options(unsigned int thoff) 141 { 142 return thoff != sizeof(struct iphdr); 143 } 144 145 static void nf_flow_tuple_encap(struct sk_buff *skb, 146 struct flow_offload_tuple *tuple) 147 { 148 __be16 inner_proto = skb->protocol; 149 struct vlan_ethhdr *veth; 150 struct pppoe_hdr *phdr; 151 struct iphdr *iph; 152 u16 offset = 0; 153 int i = 0; 154 155 if (skb_vlan_tag_present(skb)) { 156 tuple->encap[i].id = skb_vlan_tag_get(skb); 157 tuple->encap[i].proto = skb->vlan_proto; 158 i++; 159 } 160 switch (skb->protocol) { 161 case htons(ETH_P_8021Q): 162 veth = (struct vlan_ethhdr *)skb_mac_header(skb); 163 tuple->encap[i].id = ntohs(veth->h_vlan_TCI); 164 tuple->encap[i].proto = skb->protocol; 165 inner_proto = veth->h_vlan_encapsulated_proto; 166 offset += VLAN_HLEN; 167 break; 168 case htons(ETH_P_PPP_SES): 169 phdr = (struct pppoe_hdr *)skb_network_header(skb); 170 tuple->encap[i].id = ntohs(phdr->sid); 171 tuple->encap[i].proto = skb->protocol; 172 inner_proto = *((__be16 *)(phdr + 1)); 173 offset += PPPOE_SES_HLEN; 174 break; 175 } 176 177 if (inner_proto == htons(ETH_P_IP)) { 178 iph = (struct iphdr *)(skb_network_header(skb) + offset); 179 if (iph->protocol == IPPROTO_IPIP) { 180 tuple->tun.dst_v4.s_addr = iph->daddr; 181 tuple->tun.src_v4.s_addr = iph->saddr; 182 tuple->tun.l3_proto = IPPROTO_IPIP; 183 } 184 } 185 } 186 187 struct nf_flowtable_ctx { 188 const struct net_device *in; 189 u32 offset; 190 u32 hdrsize; 191 }; 192 193 static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb, 194 struct flow_offload_tuple *tuple) 195 { 196 struct flow_ports *ports; 197 unsigned int thoff; 198 struct iphdr *iph; 199 u8 ipproto; 200 201 if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset)) 202 return -1; 203 204 iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); 205 thoff = (iph->ihl * 4); 206 207 if (ip_is_fragment(iph) || 208 unlikely(ip_has_options(thoff))) 209 return -1; 210 211 thoff += ctx->offset; 212 213 ipproto = iph->protocol; 214 switch (ipproto) { 215 case IPPROTO_TCP: 216 ctx->hdrsize = sizeof(struct tcphdr); 217 break; 218 case IPPROTO_UDP: 219 ctx->hdrsize = sizeof(struct udphdr); 220 break; 221 #ifdef CONFIG_NF_CT_PROTO_GRE 222 case IPPROTO_GRE: 223 ctx->hdrsize = sizeof(struct gre_base_hdr); 224 break; 225 #endif 226 default: 227 return -1; 228 } 229 230 if (iph->ttl <= 1) 231 return -1; 232 233 if (!pskb_may_pull(skb, thoff + ctx->hdrsize)) 234 return -1; 235 236 switch (ipproto) { 237 case IPPROTO_TCP: 238 case IPPROTO_UDP: 239 ports = (struct flow_ports *)(skb_network_header(skb) + thoff); 240 tuple->src_port = ports->source; 241 tuple->dst_port = ports->dest; 242 break; 243 case IPPROTO_GRE: { 244 struct gre_base_hdr *greh; 245 246 greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); 247 if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) 248 return -1; 249 break; 250 } 251 } 252 253 iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); 254 255 tuple->src_v4.s_addr = iph->saddr; 256 tuple->dst_v4.s_addr = iph->daddr; 257 tuple->l3proto = AF_INET; 258 tuple->l4proto = ipproto; 259 tuple->iifidx = ctx->in->ifindex; 260 nf_flow_tuple_encap(skb, tuple); 261 262 return 0; 263 } 264 265 /* Based on ip_exceeds_mtu(). */ 266 static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) 267 { 268 if (skb->len <= mtu) 269 return false; 270 271 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 272 return false; 273 274 return true; 275 } 276 277 static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple) 278 { 279 if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH && 280 tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM) 281 return true; 282 283 return dst_check(tuple->dst_cache, tuple->dst_cookie); 284 } 285 286 static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, 287 const struct nf_hook_state *state, 288 struct dst_entry *dst) 289 { 290 skb_orphan(skb); 291 skb_dst_set_noref(skb, dst); 292 dst_output(state->net, state->sk, skb); 293 return NF_STOLEN; 294 } 295 296 static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize) 297 { 298 struct iphdr *iph; 299 u16 size; 300 301 if (!pskb_may_pull(skb, sizeof(*iph) + *psize)) 302 return false; 303 304 iph = (struct iphdr *)(skb_network_header(skb) + *psize); 305 size = iph->ihl << 2; 306 307 if (ip_is_fragment(iph) || unlikely(ip_has_options(size))) 308 return false; 309 310 if (iph->ttl <= 1) 311 return false; 312 313 if (iph->protocol == IPPROTO_IPIP) 314 *psize += size; 315 316 return true; 317 } 318 319 static void nf_flow_ip4_tunnel_pop(struct sk_buff *skb) 320 { 321 struct iphdr *iph = (struct iphdr *)skb_network_header(skb); 322 323 if (iph->protocol != IPPROTO_IPIP) 324 return; 325 326 skb_pull(skb, iph->ihl << 2); 327 skb_reset_network_header(skb); 328 } 329 330 static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, 331 u32 *offset) 332 { 333 __be16 inner_proto = skb->protocol; 334 struct vlan_ethhdr *veth; 335 bool ret = false; 336 337 switch (skb->protocol) { 338 case htons(ETH_P_8021Q): 339 if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth))) 340 return false; 341 342 veth = (struct vlan_ethhdr *)skb_mac_header(skb); 343 if (veth->h_vlan_encapsulated_proto == proto) { 344 *offset += VLAN_HLEN; 345 inner_proto = proto; 346 ret = true; 347 } 348 break; 349 case htons(ETH_P_PPP_SES): 350 if (nf_flow_pppoe_proto(skb, &inner_proto) && 351 inner_proto == proto) { 352 *offset += PPPOE_SES_HLEN; 353 ret = true; 354 } 355 break; 356 } 357 358 if (inner_proto == htons(ETH_P_IP)) 359 ret = nf_flow_ip4_tunnel_proto(skb, offset); 360 361 return ret; 362 } 363 364 static void nf_flow_encap_pop(struct sk_buff *skb, 365 struct flow_offload_tuple_rhash *tuplehash) 366 { 367 struct vlan_hdr *vlan_hdr; 368 int i; 369 370 for (i = 0; i < tuplehash->tuple.encap_num; i++) { 371 if (skb_vlan_tag_present(skb)) { 372 __vlan_hwaccel_clear_tag(skb); 373 continue; 374 } 375 switch (skb->protocol) { 376 case htons(ETH_P_8021Q): 377 vlan_hdr = (struct vlan_hdr *)skb->data; 378 __skb_pull(skb, VLAN_HLEN); 379 vlan_set_encap_proto(skb, vlan_hdr); 380 skb_reset_network_header(skb); 381 break; 382 case htons(ETH_P_PPP_SES): 383 skb->protocol = __nf_flow_pppoe_proto(skb); 384 skb_pull(skb, PPPOE_SES_HLEN); 385 skb_reset_network_header(skb); 386 break; 387 } 388 } 389 390 if (skb->protocol == htons(ETH_P_IP)) 391 nf_flow_ip4_tunnel_pop(skb); 392 } 393 394 struct nf_flow_xmit { 395 const void *dest; 396 const void *source; 397 struct net_device *outdev; 398 }; 399 400 static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, 401 struct nf_flow_xmit *xmit) 402 { 403 skb->dev = xmit->outdev; 404 dev_hard_header(skb, skb->dev, ntohs(skb->protocol), 405 xmit->dest, xmit->source, skb->len); 406 dev_queue_xmit(skb); 407 408 return NF_STOLEN; 409 } 410 411 static struct flow_offload_tuple_rhash * 412 nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx, 413 struct nf_flowtable *flow_table, struct sk_buff *skb) 414 { 415 struct flow_offload_tuple tuple = {}; 416 417 if (!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset)) 418 return NULL; 419 420 if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0) 421 return NULL; 422 423 return flow_offload_lookup(flow_table, &tuple); 424 } 425 426 static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx, 427 struct nf_flowtable *flow_table, 428 struct flow_offload_tuple_rhash *tuplehash, 429 struct sk_buff *skb) 430 { 431 enum flow_offload_tuple_dir dir; 432 struct flow_offload *flow; 433 unsigned int thoff, mtu; 434 struct iphdr *iph; 435 436 dir = tuplehash->tuple.dir; 437 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 438 439 mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; 440 if (flow->tuplehash[!dir].tuple.tun_num) 441 mtu -= sizeof(*iph); 442 443 if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) 444 return 0; 445 446 iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); 447 thoff = (iph->ihl * 4) + ctx->offset; 448 if (nf_flow_state_check(flow, iph->protocol, skb, thoff)) 449 return 0; 450 451 if (!nf_flow_dst_check(&tuplehash->tuple)) { 452 flow_offload_teardown(flow); 453 return 0; 454 } 455 456 if (skb_try_make_writable(skb, thoff + ctx->hdrsize)) 457 return -1; 458 459 flow_offload_refresh(flow_table, flow, false); 460 461 nf_flow_encap_pop(skb, tuplehash); 462 thoff -= ctx->offset; 463 464 iph = ip_hdr(skb); 465 nf_flow_nat_ip(flow, skb, thoff, dir, iph); 466 467 ip_decrease_ttl(iph); 468 skb_clear_tstamp(skb); 469 470 if (flow_table->flags & NF_FLOWTABLE_COUNTER) 471 nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); 472 473 return 1; 474 } 475 476 static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id) 477 { 478 int data_len = skb->len + sizeof(__be16); 479 struct ppp_hdr { 480 struct pppoe_hdr hdr; 481 __be16 proto; 482 } *ph; 483 __be16 proto; 484 485 if (skb_cow_head(skb, PPPOE_SES_HLEN)) 486 return -1; 487 488 switch (skb->protocol) { 489 case htons(ETH_P_IP): 490 proto = htons(PPP_IP); 491 break; 492 case htons(ETH_P_IPV6): 493 proto = htons(PPP_IPV6); 494 break; 495 default: 496 return -1; 497 } 498 499 __skb_push(skb, PPPOE_SES_HLEN); 500 skb_reset_network_header(skb); 501 502 ph = (struct ppp_hdr *)(skb->data); 503 ph->hdr.ver = 1; 504 ph->hdr.type = 1; 505 ph->hdr.code = 0; 506 ph->hdr.sid = htons(id); 507 ph->hdr.length = htons(data_len); 508 ph->proto = proto; 509 skb->protocol = htons(ETH_P_PPP_SES); 510 511 return 0; 512 } 513 514 static int nf_flow_tunnel_ipip_push(struct net *net, struct sk_buff *skb, 515 struct flow_offload_tuple *tuple, 516 __be32 *ip_daddr) 517 { 518 struct iphdr *iph = (struct iphdr *)skb_network_header(skb); 519 struct rtable *rt = dst_rtable(tuple->dst_cache); 520 u8 tos = iph->tos, ttl = iph->ttl; 521 __be16 frag_off = iph->frag_off; 522 u32 headroom = sizeof(*iph); 523 int err; 524 525 err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4); 526 if (err) 527 return err; 528 529 skb_set_inner_ipproto(skb, IPPROTO_IPIP); 530 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; 531 err = skb_cow_head(skb, headroom); 532 if (err) 533 return err; 534 535 skb_scrub_packet(skb, true); 536 skb_clear_hash_if_not_l4(skb); 537 538 /* Push down and install the IP header. */ 539 skb_push(skb, sizeof(*iph)); 540 skb_reset_network_header(skb); 541 542 iph = ip_hdr(skb); 543 iph->version = 4; 544 iph->ihl = sizeof(*iph) >> 2; 545 iph->frag_off = ip_mtu_locked(&rt->dst) ? 0 : frag_off; 546 iph->protocol = tuple->tun.l3_proto; 547 iph->tos = tos; 548 iph->daddr = tuple->tun.src_v4.s_addr; 549 iph->saddr = tuple->tun.dst_v4.s_addr; 550 iph->ttl = ttl; 551 iph->tot_len = htons(skb->len); 552 __ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1); 553 ip_send_check(iph); 554 555 *ip_daddr = tuple->tun.src_v4.s_addr; 556 557 return 0; 558 } 559 560 static int nf_flow_tunnel_v4_push(struct net *net, struct sk_buff *skb, 561 struct flow_offload_tuple *tuple, 562 __be32 *ip_daddr) 563 { 564 if (tuple->tun_num) 565 return nf_flow_tunnel_ipip_push(net, skb, tuple, ip_daddr); 566 567 return 0; 568 } 569 570 static int nf_flow_encap_push(struct sk_buff *skb, 571 struct flow_offload_tuple *tuple) 572 { 573 int i; 574 575 for (i = 0; i < tuple->encap_num; i++) { 576 switch (tuple->encap[i].proto) { 577 case htons(ETH_P_8021Q): 578 case htons(ETH_P_8021AD): 579 if (skb_vlan_push(skb, tuple->encap[i].proto, 580 tuple->encap[i].id) < 0) 581 return -1; 582 break; 583 case htons(ETH_P_PPP_SES): 584 if (nf_flow_pppoe_push(skb, tuple->encap[i].id) < 0) 585 return -1; 586 break; 587 } 588 } 589 590 return 0; 591 } 592 593 unsigned int 594 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, 595 const struct nf_hook_state *state) 596 { 597 struct flow_offload_tuple_rhash *tuplehash; 598 struct nf_flowtable *flow_table = priv; 599 struct flow_offload_tuple *other_tuple; 600 enum flow_offload_tuple_dir dir; 601 struct nf_flowtable_ctx ctx = { 602 .in = state->in, 603 }; 604 struct nf_flow_xmit xmit = {}; 605 struct flow_offload *flow; 606 struct neighbour *neigh; 607 struct rtable *rt; 608 __be32 ip_daddr; 609 int ret; 610 611 tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb); 612 if (!tuplehash) 613 return NF_ACCEPT; 614 615 ret = nf_flow_offload_forward(&ctx, flow_table, tuplehash, skb); 616 if (ret < 0) 617 return NF_DROP; 618 else if (ret == 0) 619 return NF_ACCEPT; 620 621 if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { 622 rt = dst_rtable(tuplehash->tuple.dst_cache); 623 memset(skb->cb, 0, sizeof(struct inet_skb_parm)); 624 IPCB(skb)->iif = skb->dev->ifindex; 625 IPCB(skb)->flags = IPSKB_FORWARDED; 626 return nf_flow_xmit_xfrm(skb, state, &rt->dst); 627 } 628 629 dir = tuplehash->tuple.dir; 630 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 631 other_tuple = &flow->tuplehash[!dir].tuple; 632 ip_daddr = other_tuple->src_v4.s_addr; 633 634 if (nf_flow_tunnel_v4_push(state->net, skb, other_tuple, &ip_daddr) < 0) 635 return NF_DROP; 636 637 if (nf_flow_encap_push(skb, other_tuple) < 0) 638 return NF_DROP; 639 640 switch (tuplehash->tuple.xmit_type) { 641 case FLOW_OFFLOAD_XMIT_NEIGH: 642 rt = dst_rtable(tuplehash->tuple.dst_cache); 643 xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.ifidx); 644 if (!xmit.outdev) { 645 flow_offload_teardown(flow); 646 return NF_DROP; 647 } 648 neigh = ip_neigh_gw4(rt->dst.dev, rt_nexthop(rt, ip_daddr)); 649 if (IS_ERR(neigh)) { 650 flow_offload_teardown(flow); 651 return NF_DROP; 652 } 653 xmit.dest = neigh->ha; 654 skb_dst_set_noref(skb, &rt->dst); 655 break; 656 case FLOW_OFFLOAD_XMIT_DIRECT: 657 xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.out.ifidx); 658 if (!xmit.outdev) { 659 flow_offload_teardown(flow); 660 return NF_DROP; 661 } 662 xmit.dest = tuplehash->tuple.out.h_dest; 663 xmit.source = tuplehash->tuple.out.h_source; 664 break; 665 default: 666 WARN_ON_ONCE(1); 667 return NF_DROP; 668 } 669 670 return nf_flow_queue_xmit(state->net, skb, &xmit); 671 } 672 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); 673 674 static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, 675 struct in6_addr *addr, 676 struct in6_addr *new_addr, 677 struct ipv6hdr *ip6h) 678 { 679 struct tcphdr *tcph; 680 681 tcph = (void *)(skb_network_header(skb) + thoff); 682 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32, 683 new_addr->s6_addr32, true); 684 } 685 686 static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, 687 struct in6_addr *addr, 688 struct in6_addr *new_addr) 689 { 690 struct udphdr *udph; 691 692 udph = (void *)(skb_network_header(skb) + thoff); 693 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 694 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32, 695 new_addr->s6_addr32, true); 696 if (!udph->check) 697 udph->check = CSUM_MANGLED_0; 698 } 699 } 700 701 static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, 702 unsigned int thoff, struct in6_addr *addr, 703 struct in6_addr *new_addr) 704 { 705 switch (ip6h->nexthdr) { 706 case IPPROTO_TCP: 707 nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h); 708 break; 709 case IPPROTO_UDP: 710 nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr); 711 break; 712 } 713 } 714 715 static void nf_flow_snat_ipv6(const struct flow_offload *flow, 716 struct sk_buff *skb, struct ipv6hdr *ip6h, 717 unsigned int thoff, 718 enum flow_offload_tuple_dir dir) 719 { 720 struct in6_addr addr, new_addr; 721 722 switch (dir) { 723 case FLOW_OFFLOAD_DIR_ORIGINAL: 724 addr = ip6h->saddr; 725 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6; 726 ip6h->saddr = new_addr; 727 break; 728 case FLOW_OFFLOAD_DIR_REPLY: 729 addr = ip6h->daddr; 730 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6; 731 ip6h->daddr = new_addr; 732 break; 733 } 734 735 nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); 736 } 737 738 static void nf_flow_dnat_ipv6(const struct flow_offload *flow, 739 struct sk_buff *skb, struct ipv6hdr *ip6h, 740 unsigned int thoff, 741 enum flow_offload_tuple_dir dir) 742 { 743 struct in6_addr addr, new_addr; 744 745 switch (dir) { 746 case FLOW_OFFLOAD_DIR_ORIGINAL: 747 addr = ip6h->daddr; 748 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6; 749 ip6h->daddr = new_addr; 750 break; 751 case FLOW_OFFLOAD_DIR_REPLY: 752 addr = ip6h->saddr; 753 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6; 754 ip6h->saddr = new_addr; 755 break; 756 } 757 758 nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); 759 } 760 761 static void nf_flow_nat_ipv6(const struct flow_offload *flow, 762 struct sk_buff *skb, 763 enum flow_offload_tuple_dir dir, 764 struct ipv6hdr *ip6h) 765 { 766 unsigned int thoff = sizeof(*ip6h); 767 768 if (test_bit(NF_FLOW_SNAT, &flow->flags)) { 769 nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir); 770 nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir); 771 } 772 if (test_bit(NF_FLOW_DNAT, &flow->flags)) { 773 nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir); 774 nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir); 775 } 776 } 777 778 static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb, 779 struct flow_offload_tuple *tuple) 780 { 781 struct flow_ports *ports; 782 struct ipv6hdr *ip6h; 783 unsigned int thoff; 784 u8 nexthdr; 785 786 thoff = sizeof(*ip6h) + ctx->offset; 787 if (!pskb_may_pull(skb, thoff)) 788 return -1; 789 790 ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); 791 792 nexthdr = ip6h->nexthdr; 793 switch (nexthdr) { 794 case IPPROTO_TCP: 795 ctx->hdrsize = sizeof(struct tcphdr); 796 break; 797 case IPPROTO_UDP: 798 ctx->hdrsize = sizeof(struct udphdr); 799 break; 800 #ifdef CONFIG_NF_CT_PROTO_GRE 801 case IPPROTO_GRE: 802 ctx->hdrsize = sizeof(struct gre_base_hdr); 803 break; 804 #endif 805 default: 806 return -1; 807 } 808 809 if (ip6h->hop_limit <= 1) 810 return -1; 811 812 if (!pskb_may_pull(skb, thoff + ctx->hdrsize)) 813 return -1; 814 815 switch (nexthdr) { 816 case IPPROTO_TCP: 817 case IPPROTO_UDP: 818 ports = (struct flow_ports *)(skb_network_header(skb) + thoff); 819 tuple->src_port = ports->source; 820 tuple->dst_port = ports->dest; 821 break; 822 case IPPROTO_GRE: { 823 struct gre_base_hdr *greh; 824 825 greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); 826 if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) 827 return -1; 828 break; 829 } 830 } 831 832 ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); 833 834 tuple->src_v6 = ip6h->saddr; 835 tuple->dst_v6 = ip6h->daddr; 836 tuple->l3proto = AF_INET6; 837 tuple->l4proto = nexthdr; 838 tuple->iifidx = ctx->in->ifindex; 839 nf_flow_tuple_encap(skb, tuple); 840 841 return 0; 842 } 843 844 static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx, 845 struct nf_flowtable *flow_table, 846 struct flow_offload_tuple_rhash *tuplehash, 847 struct sk_buff *skb) 848 { 849 enum flow_offload_tuple_dir dir; 850 struct flow_offload *flow; 851 unsigned int thoff, mtu; 852 struct ipv6hdr *ip6h; 853 854 dir = tuplehash->tuple.dir; 855 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 856 857 mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; 858 if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) 859 return 0; 860 861 ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); 862 thoff = sizeof(*ip6h) + ctx->offset; 863 if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff)) 864 return 0; 865 866 if (!nf_flow_dst_check(&tuplehash->tuple)) { 867 flow_offload_teardown(flow); 868 return 0; 869 } 870 871 if (skb_try_make_writable(skb, thoff + ctx->hdrsize)) 872 return -1; 873 874 flow_offload_refresh(flow_table, flow, false); 875 876 nf_flow_encap_pop(skb, tuplehash); 877 878 ip6h = ipv6_hdr(skb); 879 nf_flow_nat_ipv6(flow, skb, dir, ip6h); 880 881 ip6h->hop_limit--; 882 skb_clear_tstamp(skb); 883 884 if (flow_table->flags & NF_FLOWTABLE_COUNTER) 885 nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); 886 887 return 1; 888 } 889 890 static struct flow_offload_tuple_rhash * 891 nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx, 892 struct nf_flowtable *flow_table, 893 struct sk_buff *skb) 894 { 895 struct flow_offload_tuple tuple = {}; 896 897 if (skb->protocol != htons(ETH_P_IPV6) && 898 !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &ctx->offset)) 899 return NULL; 900 901 if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0) 902 return NULL; 903 904 return flow_offload_lookup(flow_table, &tuple); 905 } 906 907 unsigned int 908 nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, 909 const struct nf_hook_state *state) 910 { 911 struct flow_offload_tuple_rhash *tuplehash; 912 struct nf_flowtable *flow_table = priv; 913 struct flow_offload_tuple *other_tuple; 914 enum flow_offload_tuple_dir dir; 915 struct nf_flowtable_ctx ctx = { 916 .in = state->in, 917 }; 918 struct nf_flow_xmit xmit = {}; 919 struct in6_addr *ip6_daddr; 920 struct flow_offload *flow; 921 struct neighbour *neigh; 922 struct rt6_info *rt; 923 int ret; 924 925 tuplehash = nf_flow_offload_ipv6_lookup(&ctx, flow_table, skb); 926 if (tuplehash == NULL) 927 return NF_ACCEPT; 928 929 ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb); 930 if (ret < 0) 931 return NF_DROP; 932 else if (ret == 0) 933 return NF_ACCEPT; 934 935 if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { 936 rt = dst_rt6_info(tuplehash->tuple.dst_cache); 937 memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); 938 IP6CB(skb)->iif = skb->dev->ifindex; 939 IP6CB(skb)->flags = IP6SKB_FORWARDED; 940 return nf_flow_xmit_xfrm(skb, state, &rt->dst); 941 } 942 943 dir = tuplehash->tuple.dir; 944 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 945 other_tuple = &flow->tuplehash[!dir].tuple; 946 ip6_daddr = &other_tuple->src_v6; 947 948 if (nf_flow_encap_push(skb, other_tuple) < 0) 949 return NF_DROP; 950 951 switch (tuplehash->tuple.xmit_type) { 952 case FLOW_OFFLOAD_XMIT_NEIGH: 953 rt = dst_rt6_info(tuplehash->tuple.dst_cache); 954 xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.ifidx); 955 if (!xmit.outdev) { 956 flow_offload_teardown(flow); 957 return NF_DROP; 958 } 959 neigh = ip_neigh_gw6(rt->dst.dev, rt6_nexthop(rt, ip6_daddr)); 960 if (IS_ERR(neigh)) { 961 flow_offload_teardown(flow); 962 return NF_DROP; 963 } 964 xmit.dest = neigh->ha; 965 skb_dst_set_noref(skb, &rt->dst); 966 break; 967 case FLOW_OFFLOAD_XMIT_DIRECT: 968 xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.out.ifidx); 969 if (!xmit.outdev) { 970 flow_offload_teardown(flow); 971 return NF_DROP; 972 } 973 xmit.dest = tuplehash->tuple.out.h_dest; 974 xmit.source = tuplehash->tuple.out.h_source; 975 break; 976 default: 977 WARN_ON_ONCE(1); 978 return NF_DROP; 979 } 980 981 return nf_flow_queue_xmit(state->net, skb, &xmit); 982 } 983 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook); 984