1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/kernel.h> 3 #include <linux/init.h> 4 #include <linux/module.h> 5 #include <linux/netfilter.h> 6 #include <linux/rhashtable.h> 7 #include <linux/ip.h> 8 #include <linux/ipv6.h> 9 #include <linux/netdevice.h> 10 #include <linux/if_ether.h> 11 #include <linux/if_vlan.h> 12 #include <net/gre.h> 13 #include <net/gso.h> 14 #include <net/ip.h> 15 #include <net/ipv6.h> 16 #include <net/ip6_route.h> 17 #include <net/ip6_tunnel.h> 18 #include <net/neighbour.h> 19 #include <net/netfilter/nf_flow_table.h> 20 #include <net/netfilter/nf_conntrack_acct.h> 21 /* For layer 4 checksum field offset. */ 22 #include <linux/tcp.h> 23 #include <linux/udp.h> 24 25 static int nf_flow_state_check(struct flow_offload *flow, int proto, 26 struct sk_buff *skb, unsigned int thoff) 27 { 28 struct tcphdr *tcph; 29 30 if (proto != IPPROTO_TCP) 31 return 0; 32 33 tcph = (void *)(skb_network_header(skb) + thoff); 34 if (tcph->syn && test_bit(NF_FLOW_CLOSING, &flow->flags)) { 35 flow_offload_teardown(flow); 36 return -1; 37 } 38 39 if ((tcph->fin || tcph->rst) && 40 !test_bit(NF_FLOW_CLOSING, &flow->flags)) 41 set_bit(NF_FLOW_CLOSING, &flow->flags); 42 43 return 0; 44 } 45 46 static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, 47 __be32 addr, __be32 new_addr) 48 { 49 struct tcphdr *tcph; 50 51 tcph = (void *)(skb_network_header(skb) + thoff); 52 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); 53 } 54 55 static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, 56 __be32 addr, __be32 new_addr) 57 { 58 struct udphdr *udph; 59 60 udph = (void *)(skb_network_header(skb) + thoff); 61 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 62 inet_proto_csum_replace4(&udph->check, skb, addr, 63 new_addr, true); 64 if (!udph->check) 65 udph->check = CSUM_MANGLED_0; 66 } 67 } 68 69 static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, 70 unsigned int thoff, __be32 addr, 71 __be32 new_addr) 72 { 73 switch (iph->protocol) { 74 case IPPROTO_TCP: 75 nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr); 76 break; 77 case IPPROTO_UDP: 78 nf_flow_nat_ip_udp(skb, thoff, addr, new_addr); 79 break; 80 } 81 } 82 83 static void nf_flow_snat_ip(const struct flow_offload *flow, 84 struct sk_buff *skb, struct iphdr *iph, 85 unsigned int thoff, enum flow_offload_tuple_dir dir) 86 { 87 __be32 addr, new_addr; 88 89 switch (dir) { 90 case FLOW_OFFLOAD_DIR_ORIGINAL: 91 addr = iph->saddr; 92 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; 93 iph->saddr = new_addr; 94 break; 95 case FLOW_OFFLOAD_DIR_REPLY: 96 addr = iph->daddr; 97 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; 98 iph->daddr = new_addr; 99 break; 100 } 101 csum_replace4(&iph->check, addr, new_addr); 102 103 nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); 104 } 105 106 static void nf_flow_dnat_ip(const struct flow_offload *flow, 107 struct sk_buff *skb, struct iphdr *iph, 108 unsigned int thoff, enum flow_offload_tuple_dir dir) 109 { 110 __be32 addr, new_addr; 111 112 switch (dir) { 113 case FLOW_OFFLOAD_DIR_ORIGINAL: 114 addr = iph->daddr; 115 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; 116 iph->daddr = new_addr; 117 break; 118 case FLOW_OFFLOAD_DIR_REPLY: 119 addr = iph->saddr; 120 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; 121 iph->saddr = new_addr; 122 break; 123 } 124 csum_replace4(&iph->check, addr, new_addr); 125 126 nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); 127 } 128 129 static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, 130 unsigned int thoff, enum flow_offload_tuple_dir dir, 131 struct iphdr *iph) 132 { 133 if (test_bit(NF_FLOW_SNAT, &flow->flags)) { 134 nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir); 135 nf_flow_snat_ip(flow, skb, iph, thoff, dir); 136 } 137 if (test_bit(NF_FLOW_DNAT, &flow->flags)) { 138 nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir); 139 nf_flow_dnat_ip(flow, skb, iph, thoff, dir); 140 } 141 } 142 143 static bool ip_has_options(unsigned int thoff) 144 { 145 return thoff != sizeof(struct iphdr); 146 } 147 148 struct nf_flowtable_ctx { 149 const struct net_device *in; 150 u32 offset; 151 u32 hdrsize; 152 struct { 153 /* Tunnel IP header size */ 154 u32 hdr_size; 155 /* IP tunnel protocol */ 156 u8 proto; 157 } tun; 158 }; 159 160 static void nf_flow_tuple_encap(struct nf_flowtable_ctx *ctx, 161 struct sk_buff *skb, 162 struct flow_offload_tuple *tuple) 163 { 164 __be16 inner_proto = skb->protocol; 165 struct vlan_ethhdr *veth; 166 struct pppoe_hdr *phdr; 167 struct ipv6hdr *ip6h; 168 struct iphdr *iph; 169 u16 offset = 0; 170 int i = 0; 171 172 if (skb_vlan_tag_present(skb)) { 173 tuple->encap[i].id = skb_vlan_tag_get(skb); 174 tuple->encap[i].proto = skb->vlan_proto; 175 i++; 176 } 177 switch (skb->protocol) { 178 case htons(ETH_P_8021Q): 179 veth = (struct vlan_ethhdr *)skb_mac_header(skb); 180 tuple->encap[i].id = ntohs(veth->h_vlan_TCI); 181 tuple->encap[i].proto = skb->protocol; 182 inner_proto = veth->h_vlan_encapsulated_proto; 183 offset += VLAN_HLEN; 184 break; 185 case htons(ETH_P_PPP_SES): 186 phdr = (struct pppoe_hdr *)skb_network_header(skb); 187 tuple->encap[i].id = ntohs(phdr->sid); 188 tuple->encap[i].proto = skb->protocol; 189 inner_proto = *((__be16 *)(phdr + 1)); 190 offset += PPPOE_SES_HLEN; 191 break; 192 } 193 194 switch (inner_proto) { 195 case htons(ETH_P_IP): 196 iph = (struct iphdr *)(skb_network_header(skb) + offset); 197 if (ctx->tun.proto == IPPROTO_IPIP) { 198 tuple->tun.dst_v4.s_addr = iph->daddr; 199 tuple->tun.src_v4.s_addr = iph->saddr; 200 tuple->tun.l3_proto = IPPROTO_IPIP; 201 } 202 break; 203 case htons(ETH_P_IPV6): 204 ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset); 205 if (ctx->tun.proto == IPPROTO_IPV6) { 206 tuple->tun.dst_v6 = ip6h->daddr; 207 tuple->tun.src_v6 = ip6h->saddr; 208 tuple->tun.l3_proto = IPPROTO_IPV6; 209 } 210 break; 211 default: 212 break; 213 } 214 } 215 216 static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb, 217 struct flow_offload_tuple *tuple) 218 { 219 struct flow_ports *ports; 220 unsigned int thoff; 221 struct iphdr *iph; 222 u8 ipproto; 223 224 if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset)) 225 return -1; 226 227 iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); 228 thoff = (iph->ihl * 4); 229 230 if (ip_is_fragment(iph) || 231 unlikely(ip_has_options(thoff))) 232 return -1; 233 234 thoff += ctx->offset; 235 236 ipproto = iph->protocol; 237 switch (ipproto) { 238 case IPPROTO_TCP: 239 ctx->hdrsize = sizeof(struct tcphdr); 240 break; 241 case IPPROTO_UDP: 242 ctx->hdrsize = sizeof(struct udphdr); 243 break; 244 #ifdef CONFIG_NF_CT_PROTO_GRE 245 case IPPROTO_GRE: 246 ctx->hdrsize = sizeof(struct gre_base_hdr); 247 break; 248 #endif 249 default: 250 return -1; 251 } 252 253 if (iph->ttl <= 1) 254 return -1; 255 256 if (!pskb_may_pull(skb, thoff + ctx->hdrsize)) 257 return -1; 258 259 switch (ipproto) { 260 case IPPROTO_TCP: 261 case IPPROTO_UDP: 262 ports = (struct flow_ports *)(skb_network_header(skb) + thoff); 263 tuple->src_port = ports->source; 264 tuple->dst_port = ports->dest; 265 break; 266 case IPPROTO_GRE: { 267 struct gre_base_hdr *greh; 268 269 greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); 270 if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) 271 return -1; 272 break; 273 } 274 } 275 276 iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); 277 278 tuple->src_v4.s_addr = iph->saddr; 279 tuple->dst_v4.s_addr = iph->daddr; 280 tuple->l3proto = AF_INET; 281 tuple->l4proto = ipproto; 282 tuple->iifidx = ctx->in->ifindex; 283 nf_flow_tuple_encap(ctx, skb, tuple); 284 285 return 0; 286 } 287 288 /* Based on ip_exceeds_mtu(). */ 289 static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) 290 { 291 if (skb->len <= mtu) 292 return false; 293 294 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 295 return false; 296 297 return true; 298 } 299 300 static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple) 301 { 302 if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH && 303 tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM) 304 return true; 305 306 return dst_check(tuple->dst_cache, tuple->dst_cookie); 307 } 308 309 static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, 310 const struct nf_hook_state *state, 311 struct dst_entry *dst) 312 { 313 skb_orphan(skb); 314 skb_dst_set_noref(skb, dst); 315 dst_output(state->net, state->sk, skb); 316 return NF_STOLEN; 317 } 318 319 static bool nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx, 320 struct sk_buff *skb) 321 { 322 struct iphdr *iph; 323 u16 size; 324 325 if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset)) 326 return false; 327 328 iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); 329 size = iph->ihl << 2; 330 331 if (ip_is_fragment(iph) || unlikely(ip_has_options(size))) 332 return false; 333 334 if (iph->ttl <= 1) 335 return false; 336 337 if (iph->protocol == IPPROTO_IPIP) { 338 ctx->tun.proto = IPPROTO_IPIP; 339 ctx->tun.hdr_size = size; 340 ctx->offset += size; 341 } 342 343 return true; 344 } 345 346 static bool nf_flow_ip6_tunnel_proto(struct nf_flowtable_ctx *ctx, 347 struct sk_buff *skb) 348 { 349 #if IS_ENABLED(CONFIG_IPV6) 350 struct ipv6hdr *ip6h, _ip6h; 351 __be16 frag_off; 352 u8 nexthdr; 353 int hdrlen; 354 355 ip6h = skb_header_pointer(skb, ctx->offset, sizeof(*ip6h), &_ip6h); 356 if (!ip6h) 357 return false; 358 359 if (ip6h->hop_limit <= 1) 360 return false; 361 362 nexthdr = ip6h->nexthdr; 363 hdrlen = ipv6_skip_exthdr(skb, sizeof(*ip6h) + ctx->offset, &nexthdr, 364 &frag_off); 365 if (hdrlen < 0) 366 return false; 367 368 if (nexthdr == IPPROTO_IPV6) { 369 ctx->tun.hdr_size = hdrlen; 370 ctx->tun.proto = IPPROTO_IPV6; 371 } 372 ctx->offset += ctx->tun.hdr_size; 373 374 return true; 375 #else 376 return false; 377 #endif /* IS_ENABLED(CONFIG_IPV6) */ 378 } 379 380 static void nf_flow_ip_tunnel_pop(struct nf_flowtable_ctx *ctx, 381 struct sk_buff *skb) 382 { 383 if (ctx->tun.proto != IPPROTO_IPIP && 384 ctx->tun.proto != IPPROTO_IPV6) 385 return; 386 387 skb_pull(skb, ctx->tun.hdr_size); 388 skb_reset_network_header(skb); 389 } 390 391 static bool nf_flow_skb_encap_protocol(struct nf_flowtable_ctx *ctx, 392 struct sk_buff *skb, __be16 proto) 393 { 394 __be16 inner_proto = skb->protocol; 395 struct vlan_ethhdr *veth; 396 bool ret = false; 397 398 switch (skb->protocol) { 399 case htons(ETH_P_8021Q): 400 if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth))) 401 return false; 402 403 veth = (struct vlan_ethhdr *)skb_mac_header(skb); 404 if (veth->h_vlan_encapsulated_proto == proto) { 405 ctx->offset += VLAN_HLEN; 406 inner_proto = proto; 407 ret = true; 408 } 409 break; 410 case htons(ETH_P_PPP_SES): 411 if (nf_flow_pppoe_proto(skb, &inner_proto) && 412 inner_proto == proto) { 413 ctx->offset += PPPOE_SES_HLEN; 414 ret = true; 415 } 416 break; 417 } 418 419 switch (inner_proto) { 420 case htons(ETH_P_IP): 421 ret = nf_flow_ip4_tunnel_proto(ctx, skb); 422 break; 423 case htons(ETH_P_IPV6): 424 ret = nf_flow_ip6_tunnel_proto(ctx, skb); 425 break; 426 default: 427 break; 428 } 429 430 return ret; 431 } 432 433 static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx, 434 struct sk_buff *skb, 435 struct flow_offload_tuple_rhash *tuplehash) 436 { 437 struct vlan_hdr *vlan_hdr; 438 int i; 439 440 for (i = 0; i < tuplehash->tuple.encap_num; i++) { 441 if (skb_vlan_tag_present(skb)) { 442 __vlan_hwaccel_clear_tag(skb); 443 continue; 444 } 445 switch (skb->protocol) { 446 case htons(ETH_P_8021Q): 447 vlan_hdr = (struct vlan_hdr *)skb->data; 448 __skb_pull(skb, VLAN_HLEN); 449 vlan_set_encap_proto(skb, vlan_hdr); 450 skb_reset_network_header(skb); 451 break; 452 case htons(ETH_P_PPP_SES): 453 skb->protocol = __nf_flow_pppoe_proto(skb); 454 skb_pull(skb, PPPOE_SES_HLEN); 455 skb_reset_network_header(skb); 456 break; 457 } 458 } 459 460 if (skb->protocol == htons(ETH_P_IP) || 461 skb->protocol == htons(ETH_P_IPV6)) 462 nf_flow_ip_tunnel_pop(ctx, skb); 463 } 464 465 struct nf_flow_xmit { 466 const void *dest; 467 const void *source; 468 struct net_device *outdev; 469 }; 470 471 static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, 472 struct nf_flow_xmit *xmit) 473 { 474 skb->dev = xmit->outdev; 475 dev_hard_header(skb, skb->dev, ntohs(skb->protocol), 476 xmit->dest, xmit->source, skb->len); 477 dev_queue_xmit(skb); 478 479 return NF_STOLEN; 480 } 481 482 static struct flow_offload_tuple_rhash * 483 nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx, 484 struct nf_flowtable *flow_table, struct sk_buff *skb) 485 { 486 struct flow_offload_tuple tuple = {}; 487 488 if (!nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IP))) 489 return NULL; 490 491 if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0) 492 return NULL; 493 494 return flow_offload_lookup(flow_table, &tuple); 495 } 496 497 static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx, 498 struct nf_flowtable *flow_table, 499 struct flow_offload_tuple_rhash *tuplehash, 500 struct sk_buff *skb) 501 { 502 enum flow_offload_tuple_dir dir; 503 struct flow_offload *flow; 504 unsigned int thoff, mtu; 505 struct iphdr *iph; 506 507 dir = tuplehash->tuple.dir; 508 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 509 510 mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; 511 if (flow->tuplehash[!dir].tuple.tun_num) 512 mtu -= sizeof(*iph); 513 514 if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) 515 return 0; 516 517 iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); 518 thoff = (iph->ihl * 4) + ctx->offset; 519 if (nf_flow_state_check(flow, iph->protocol, skb, thoff)) 520 return 0; 521 522 if (!nf_flow_dst_check(&tuplehash->tuple)) { 523 flow_offload_teardown(flow); 524 return 0; 525 } 526 527 if (skb_try_make_writable(skb, thoff + ctx->hdrsize)) 528 return -1; 529 530 flow_offload_refresh(flow_table, flow, false); 531 532 nf_flow_encap_pop(ctx, skb, tuplehash); 533 thoff -= ctx->offset; 534 535 iph = ip_hdr(skb); 536 nf_flow_nat_ip(flow, skb, thoff, dir, iph); 537 538 ip_decrease_ttl(iph); 539 skb_clear_tstamp(skb); 540 541 if (flow_table->flags & NF_FLOWTABLE_COUNTER) 542 nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); 543 544 return 1; 545 } 546 547 static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id) 548 { 549 int data_len = skb->len + sizeof(__be16); 550 struct ppp_hdr { 551 struct pppoe_hdr hdr; 552 __be16 proto; 553 } *ph; 554 __be16 proto; 555 556 if (skb_cow_head(skb, PPPOE_SES_HLEN)) 557 return -1; 558 559 switch (skb->protocol) { 560 case htons(ETH_P_IP): 561 proto = htons(PPP_IP); 562 break; 563 case htons(ETH_P_IPV6): 564 proto = htons(PPP_IPV6); 565 break; 566 default: 567 return -1; 568 } 569 570 __skb_push(skb, PPPOE_SES_HLEN); 571 skb_reset_network_header(skb); 572 573 ph = (struct ppp_hdr *)(skb->data); 574 ph->hdr.ver = 1; 575 ph->hdr.type = 1; 576 ph->hdr.code = 0; 577 ph->hdr.sid = htons(id); 578 ph->hdr.length = htons(data_len); 579 ph->proto = proto; 580 skb->protocol = htons(ETH_P_PPP_SES); 581 582 return 0; 583 } 584 585 static int nf_flow_tunnel_ipip_push(struct net *net, struct sk_buff *skb, 586 struct flow_offload_tuple *tuple, 587 __be32 *ip_daddr) 588 { 589 struct iphdr *iph = (struct iphdr *)skb_network_header(skb); 590 struct rtable *rt = dst_rtable(tuple->dst_cache); 591 u8 tos = iph->tos, ttl = iph->ttl; 592 __be16 frag_off = iph->frag_off; 593 u32 headroom = sizeof(*iph); 594 int err; 595 596 err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4); 597 if (err) 598 return err; 599 600 skb_set_inner_ipproto(skb, IPPROTO_IPIP); 601 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; 602 err = skb_cow_head(skb, headroom); 603 if (err) 604 return err; 605 606 skb_scrub_packet(skb, true); 607 skb_clear_hash_if_not_l4(skb); 608 609 /* Push down and install the IP header. */ 610 skb_push(skb, sizeof(*iph)); 611 skb_reset_network_header(skb); 612 613 iph = ip_hdr(skb); 614 iph->version = 4; 615 iph->ihl = sizeof(*iph) >> 2; 616 iph->frag_off = ip_mtu_locked(&rt->dst) ? 0 : frag_off; 617 iph->protocol = tuple->tun.l3_proto; 618 iph->tos = tos; 619 iph->daddr = tuple->tun.src_v4.s_addr; 620 iph->saddr = tuple->tun.dst_v4.s_addr; 621 iph->ttl = ttl; 622 iph->tot_len = htons(skb->len); 623 __ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1); 624 ip_send_check(iph); 625 626 *ip_daddr = tuple->tun.src_v4.s_addr; 627 628 return 0; 629 } 630 631 static int nf_flow_tunnel_v4_push(struct net *net, struct sk_buff *skb, 632 struct flow_offload_tuple *tuple, 633 __be32 *ip_daddr) 634 { 635 if (tuple->tun_num) 636 return nf_flow_tunnel_ipip_push(net, skb, tuple, ip_daddr); 637 638 return 0; 639 } 640 641 struct ipv6_tel_txoption { 642 struct ipv6_txoptions ops; 643 __u8 dst_opt[8]; 644 }; 645 646 static int nf_flow_tunnel_ip6ip6_push(struct net *net, struct sk_buff *skb, 647 struct flow_offload_tuple *tuple, 648 struct in6_addr **ip6_daddr, 649 int encap_limit) 650 { 651 struct ipv6hdr *ip6h = (struct ipv6hdr *)skb_network_header(skb); 652 u8 hop_limit = ip6h->hop_limit, proto = IPPROTO_IPV6; 653 struct rtable *rt = dst_rtable(tuple->dst_cache); 654 __u8 dsfield = ipv6_get_dsfield(ip6h); 655 struct flowi6 fl6 = { 656 .daddr = tuple->tun.src_v6, 657 .saddr = tuple->tun.dst_v6, 658 .flowi6_proto = proto, 659 }; 660 int err, mtu; 661 u32 headroom; 662 663 err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6); 664 if (err) 665 return err; 666 667 skb_set_inner_ipproto(skb, proto); 668 headroom = sizeof(*ip6h) + LL_RESERVED_SPACE(rt->dst.dev) + 669 rt->dst.header_len; 670 if (encap_limit) 671 headroom += 8; 672 err = skb_cow_head(skb, headroom); 673 if (err) 674 return err; 675 676 skb_scrub_packet(skb, true); 677 mtu = dst_mtu(&rt->dst) - sizeof(*ip6h); 678 if (encap_limit) 679 mtu -= 8; 680 mtu = max(mtu, IPV6_MIN_MTU); 681 skb_dst_update_pmtu_no_confirm(skb, mtu); 682 683 if (encap_limit > 0) { 684 struct ipv6_tel_txoption opt = { 685 .dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT, 686 .dst_opt[3] = 1, 687 .dst_opt[4] = encap_limit, 688 .dst_opt[5] = IPV6_TLV_PADN, 689 .dst_opt[6] = 1, 690 }; 691 struct ipv6_opt_hdr *hopt; 692 693 opt.ops.dst1opt = (struct ipv6_opt_hdr *)opt.dst_opt; 694 opt.ops.opt_nflen = 8; 695 696 hopt = skb_push(skb, ipv6_optlen(opt.ops.dst1opt)); 697 memcpy(hopt, opt.ops.dst1opt, ipv6_optlen(opt.ops.dst1opt)); 698 hopt->nexthdr = IPPROTO_IPV6; 699 proto = NEXTHDR_DEST; 700 } 701 702 skb_push(skb, sizeof(*ip6h)); 703 skb_reset_network_header(skb); 704 705 ip6h = ipv6_hdr(skb); 706 ip6_flow_hdr(ip6h, dsfield, 707 ip6_make_flowlabel(net, skb, fl6.flowlabel, true, &fl6)); 708 ip6h->hop_limit = hop_limit; 709 ip6h->nexthdr = proto; 710 ip6h->daddr = tuple->tun.src_v6; 711 ip6h->saddr = tuple->tun.dst_v6; 712 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(*ip6h)); 713 IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); 714 715 *ip6_daddr = &tuple->tun.src_v6; 716 717 return 0; 718 } 719 720 static int nf_flow_tunnel_v6_push(struct net *net, struct sk_buff *skb, 721 struct flow_offload_tuple *tuple, 722 struct in6_addr **ip6_daddr, 723 int encap_limit) 724 { 725 if (tuple->tun_num) 726 return nf_flow_tunnel_ip6ip6_push(net, skb, tuple, ip6_daddr, 727 encap_limit); 728 729 return 0; 730 } 731 732 static int nf_flow_encap_push(struct sk_buff *skb, 733 struct flow_offload_tuple *tuple) 734 { 735 int i; 736 737 for (i = 0; i < tuple->encap_num; i++) { 738 switch (tuple->encap[i].proto) { 739 case htons(ETH_P_8021Q): 740 case htons(ETH_P_8021AD): 741 if (skb_vlan_push(skb, tuple->encap[i].proto, 742 tuple->encap[i].id) < 0) 743 return -1; 744 break; 745 case htons(ETH_P_PPP_SES): 746 if (nf_flow_pppoe_push(skb, tuple->encap[i].id) < 0) 747 return -1; 748 break; 749 } 750 } 751 752 return 0; 753 } 754 755 unsigned int 756 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, 757 const struct nf_hook_state *state) 758 { 759 struct flow_offload_tuple_rhash *tuplehash; 760 struct nf_flowtable *flow_table = priv; 761 struct flow_offload_tuple *other_tuple; 762 enum flow_offload_tuple_dir dir; 763 struct nf_flowtable_ctx ctx = { 764 .in = state->in, 765 }; 766 struct nf_flow_xmit xmit = {}; 767 struct flow_offload *flow; 768 struct neighbour *neigh; 769 struct rtable *rt; 770 __be32 ip_daddr; 771 int ret; 772 773 tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb); 774 if (!tuplehash) 775 return NF_ACCEPT; 776 777 ret = nf_flow_offload_forward(&ctx, flow_table, tuplehash, skb); 778 if (ret < 0) 779 return NF_DROP; 780 else if (ret == 0) 781 return NF_ACCEPT; 782 783 if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { 784 rt = dst_rtable(tuplehash->tuple.dst_cache); 785 memset(skb->cb, 0, sizeof(struct inet_skb_parm)); 786 IPCB(skb)->iif = skb->dev->ifindex; 787 IPCB(skb)->flags = IPSKB_FORWARDED; 788 return nf_flow_xmit_xfrm(skb, state, &rt->dst); 789 } 790 791 dir = tuplehash->tuple.dir; 792 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 793 other_tuple = &flow->tuplehash[!dir].tuple; 794 ip_daddr = other_tuple->src_v4.s_addr; 795 796 if (nf_flow_tunnel_v4_push(state->net, skb, other_tuple, &ip_daddr) < 0) 797 return NF_DROP; 798 799 if (nf_flow_encap_push(skb, other_tuple) < 0) 800 return NF_DROP; 801 802 switch (tuplehash->tuple.xmit_type) { 803 case FLOW_OFFLOAD_XMIT_NEIGH: 804 rt = dst_rtable(tuplehash->tuple.dst_cache); 805 xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.ifidx); 806 if (!xmit.outdev) { 807 flow_offload_teardown(flow); 808 return NF_DROP; 809 } 810 neigh = ip_neigh_gw4(rt->dst.dev, rt_nexthop(rt, ip_daddr)); 811 if (IS_ERR(neigh)) { 812 flow_offload_teardown(flow); 813 return NF_DROP; 814 } 815 xmit.dest = neigh->ha; 816 skb_dst_set_noref(skb, &rt->dst); 817 break; 818 case FLOW_OFFLOAD_XMIT_DIRECT: 819 xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.out.ifidx); 820 if (!xmit.outdev) { 821 flow_offload_teardown(flow); 822 return NF_DROP; 823 } 824 xmit.dest = tuplehash->tuple.out.h_dest; 825 xmit.source = tuplehash->tuple.out.h_source; 826 break; 827 default: 828 WARN_ON_ONCE(1); 829 return NF_DROP; 830 } 831 832 return nf_flow_queue_xmit(state->net, skb, &xmit); 833 } 834 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); 835 836 static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, 837 struct in6_addr *addr, 838 struct in6_addr *new_addr, 839 struct ipv6hdr *ip6h) 840 { 841 struct tcphdr *tcph; 842 843 tcph = (void *)(skb_network_header(skb) + thoff); 844 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32, 845 new_addr->s6_addr32, true); 846 } 847 848 static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, 849 struct in6_addr *addr, 850 struct in6_addr *new_addr) 851 { 852 struct udphdr *udph; 853 854 udph = (void *)(skb_network_header(skb) + thoff); 855 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 856 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32, 857 new_addr->s6_addr32, true); 858 if (!udph->check) 859 udph->check = CSUM_MANGLED_0; 860 } 861 } 862 863 static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, 864 unsigned int thoff, struct in6_addr *addr, 865 struct in6_addr *new_addr) 866 { 867 switch (ip6h->nexthdr) { 868 case IPPROTO_TCP: 869 nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h); 870 break; 871 case IPPROTO_UDP: 872 nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr); 873 break; 874 } 875 } 876 877 static void nf_flow_snat_ipv6(const struct flow_offload *flow, 878 struct sk_buff *skb, struct ipv6hdr *ip6h, 879 unsigned int thoff, 880 enum flow_offload_tuple_dir dir) 881 { 882 struct in6_addr addr, new_addr; 883 884 switch (dir) { 885 case FLOW_OFFLOAD_DIR_ORIGINAL: 886 addr = ip6h->saddr; 887 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6; 888 ip6h->saddr = new_addr; 889 break; 890 case FLOW_OFFLOAD_DIR_REPLY: 891 addr = ip6h->daddr; 892 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6; 893 ip6h->daddr = new_addr; 894 break; 895 } 896 897 nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); 898 } 899 900 static void nf_flow_dnat_ipv6(const struct flow_offload *flow, 901 struct sk_buff *skb, struct ipv6hdr *ip6h, 902 unsigned int thoff, 903 enum flow_offload_tuple_dir dir) 904 { 905 struct in6_addr addr, new_addr; 906 907 switch (dir) { 908 case FLOW_OFFLOAD_DIR_ORIGINAL: 909 addr = ip6h->daddr; 910 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6; 911 ip6h->daddr = new_addr; 912 break; 913 case FLOW_OFFLOAD_DIR_REPLY: 914 addr = ip6h->saddr; 915 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6; 916 ip6h->saddr = new_addr; 917 break; 918 } 919 920 nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); 921 } 922 923 static void nf_flow_nat_ipv6(const struct flow_offload *flow, 924 struct sk_buff *skb, 925 enum flow_offload_tuple_dir dir, 926 struct ipv6hdr *ip6h) 927 { 928 unsigned int thoff = sizeof(*ip6h); 929 930 if (test_bit(NF_FLOW_SNAT, &flow->flags)) { 931 nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir); 932 nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir); 933 } 934 if (test_bit(NF_FLOW_DNAT, &flow->flags)) { 935 nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir); 936 nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir); 937 } 938 } 939 940 static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb, 941 struct flow_offload_tuple *tuple) 942 { 943 struct flow_ports *ports; 944 struct ipv6hdr *ip6h; 945 unsigned int thoff; 946 u8 nexthdr; 947 948 thoff = sizeof(*ip6h) + ctx->offset; 949 if (!pskb_may_pull(skb, thoff)) 950 return -1; 951 952 ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); 953 954 nexthdr = ip6h->nexthdr; 955 switch (nexthdr) { 956 case IPPROTO_TCP: 957 ctx->hdrsize = sizeof(struct tcphdr); 958 break; 959 case IPPROTO_UDP: 960 ctx->hdrsize = sizeof(struct udphdr); 961 break; 962 #ifdef CONFIG_NF_CT_PROTO_GRE 963 case IPPROTO_GRE: 964 ctx->hdrsize = sizeof(struct gre_base_hdr); 965 break; 966 #endif 967 default: 968 return -1; 969 } 970 971 if (ip6h->hop_limit <= 1) 972 return -1; 973 974 if (!pskb_may_pull(skb, thoff + ctx->hdrsize)) 975 return -1; 976 977 switch (nexthdr) { 978 case IPPROTO_TCP: 979 case IPPROTO_UDP: 980 ports = (struct flow_ports *)(skb_network_header(skb) + thoff); 981 tuple->src_port = ports->source; 982 tuple->dst_port = ports->dest; 983 break; 984 case IPPROTO_GRE: { 985 struct gre_base_hdr *greh; 986 987 greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); 988 if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) 989 return -1; 990 break; 991 } 992 } 993 994 ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); 995 996 tuple->src_v6 = ip6h->saddr; 997 tuple->dst_v6 = ip6h->daddr; 998 tuple->l3proto = AF_INET6; 999 tuple->l4proto = nexthdr; 1000 tuple->iifidx = ctx->in->ifindex; 1001 nf_flow_tuple_encap(ctx, skb, tuple); 1002 1003 return 0; 1004 } 1005 1006 static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx, 1007 struct nf_flowtable *flow_table, 1008 struct flow_offload_tuple_rhash *tuplehash, 1009 struct sk_buff *skb, int encap_limit) 1010 { 1011 enum flow_offload_tuple_dir dir; 1012 struct flow_offload *flow; 1013 unsigned int thoff, mtu; 1014 struct ipv6hdr *ip6h; 1015 1016 dir = tuplehash->tuple.dir; 1017 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 1018 1019 mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; 1020 if (flow->tuplehash[!dir].tuple.tun_num) { 1021 mtu -= sizeof(*ip6h); 1022 if (encap_limit > 0) 1023 mtu -= 8; /* encap limit option */ 1024 } 1025 1026 if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) 1027 return 0; 1028 1029 ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); 1030 thoff = sizeof(*ip6h) + ctx->offset; 1031 if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff)) 1032 return 0; 1033 1034 if (!nf_flow_dst_check(&tuplehash->tuple)) { 1035 flow_offload_teardown(flow); 1036 return 0; 1037 } 1038 1039 if (skb_try_make_writable(skb, thoff + ctx->hdrsize)) 1040 return -1; 1041 1042 flow_offload_refresh(flow_table, flow, false); 1043 1044 nf_flow_encap_pop(ctx, skb, tuplehash); 1045 1046 ip6h = ipv6_hdr(skb); 1047 nf_flow_nat_ipv6(flow, skb, dir, ip6h); 1048 1049 ip6h->hop_limit--; 1050 skb_clear_tstamp(skb); 1051 1052 if (flow_table->flags & NF_FLOWTABLE_COUNTER) 1053 nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); 1054 1055 return 1; 1056 } 1057 1058 static struct flow_offload_tuple_rhash * 1059 nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx, 1060 struct nf_flowtable *flow_table, 1061 struct sk_buff *skb) 1062 { 1063 struct flow_offload_tuple tuple = {}; 1064 1065 if (!nf_flow_skb_encap_protocol(ctx, skb, htons(ETH_P_IPV6))) 1066 return NULL; 1067 1068 if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0) 1069 return NULL; 1070 1071 return flow_offload_lookup(flow_table, &tuple); 1072 } 1073 1074 unsigned int 1075 nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, 1076 const struct nf_hook_state *state) 1077 { 1078 int encap_limit = IPV6_DEFAULT_TNL_ENCAP_LIMIT; 1079 struct flow_offload_tuple_rhash *tuplehash; 1080 struct nf_flowtable *flow_table = priv; 1081 struct flow_offload_tuple *other_tuple; 1082 enum flow_offload_tuple_dir dir; 1083 struct nf_flowtable_ctx ctx = { 1084 .in = state->in, 1085 }; 1086 struct nf_flow_xmit xmit = {}; 1087 struct in6_addr *ip6_daddr; 1088 struct flow_offload *flow; 1089 struct neighbour *neigh; 1090 struct rt6_info *rt; 1091 int ret; 1092 1093 tuplehash = nf_flow_offload_ipv6_lookup(&ctx, flow_table, skb); 1094 if (tuplehash == NULL) 1095 return NF_ACCEPT; 1096 1097 ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb, 1098 encap_limit); 1099 if (ret < 0) 1100 return NF_DROP; 1101 else if (ret == 0) 1102 return NF_ACCEPT; 1103 1104 if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { 1105 rt = dst_rt6_info(tuplehash->tuple.dst_cache); 1106 memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); 1107 IP6CB(skb)->iif = skb->dev->ifindex; 1108 IP6CB(skb)->flags = IP6SKB_FORWARDED; 1109 return nf_flow_xmit_xfrm(skb, state, &rt->dst); 1110 } 1111 1112 dir = tuplehash->tuple.dir; 1113 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 1114 other_tuple = &flow->tuplehash[!dir].tuple; 1115 ip6_daddr = &other_tuple->src_v6; 1116 1117 if (nf_flow_tunnel_v6_push(state->net, skb, other_tuple, 1118 &ip6_daddr, encap_limit) < 0) 1119 return NF_DROP; 1120 1121 if (nf_flow_encap_push(skb, other_tuple) < 0) 1122 return NF_DROP; 1123 1124 switch (tuplehash->tuple.xmit_type) { 1125 case FLOW_OFFLOAD_XMIT_NEIGH: 1126 rt = dst_rt6_info(tuplehash->tuple.dst_cache); 1127 xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.ifidx); 1128 if (!xmit.outdev) { 1129 flow_offload_teardown(flow); 1130 return NF_DROP; 1131 } 1132 neigh = ip_neigh_gw6(rt->dst.dev, rt6_nexthop(rt, ip6_daddr)); 1133 if (IS_ERR(neigh)) { 1134 flow_offload_teardown(flow); 1135 return NF_DROP; 1136 } 1137 xmit.dest = neigh->ha; 1138 skb_dst_set_noref(skb, &rt->dst); 1139 break; 1140 case FLOW_OFFLOAD_XMIT_DIRECT: 1141 xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.out.ifidx); 1142 if (!xmit.outdev) { 1143 flow_offload_teardown(flow); 1144 return NF_DROP; 1145 } 1146 xmit.dest = tuplehash->tuple.out.h_dest; 1147 xmit.source = tuplehash->tuple.out.h_source; 1148 break; 1149 default: 1150 WARN_ON_ONCE(1); 1151 return NF_DROP; 1152 } 1153 1154 return nf_flow_queue_xmit(state->net, skb, &xmit); 1155 } 1156 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook); 1157