1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IPv6 output functions 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on linux/net/ipv4/ip_output.c 10 * 11 * Changes: 12 * A.N.Kuznetsov : airthmetics in fragmentation. 13 * extension headers are implemented. 14 * route changes now work. 15 * ip6_forward does not confuse sniffers. 16 * etc. 17 * 18 * H. von Brand : Added missing #include <linux/string.h> 19 * Imran Patel : frag id should be in NBO 20 * Kazunori MIYAZAWA @USAGI 21 * : add ip6_append_data and related functions 22 * for datagram xmit 23 */ 24 25 #include <linux/errno.h> 26 #include <linux/kernel.h> 27 #include <linux/string.h> 28 #include <linux/socket.h> 29 #include <linux/net.h> 30 #include <linux/netdevice.h> 31 #include <linux/if_arp.h> 32 #include <linux/in6.h> 33 #include <linux/tcp.h> 34 #include <linux/route.h> 35 #include <linux/module.h> 36 #include <linux/slab.h> 37 38 #include <linux/bpf-cgroup.h> 39 #include <linux/netfilter.h> 40 #include <linux/netfilter_ipv6.h> 41 42 #include <net/sock.h> 43 #include <net/snmp.h> 44 45 #include <net/gso.h> 46 #include <net/ipv6.h> 47 #include <net/ndisc.h> 48 #include <net/protocol.h> 49 #include <net/ip6_route.h> 50 #include <net/addrconf.h> 51 #include <net/rawv6.h> 52 #include <net/icmp.h> 53 #include <net/xfrm.h> 54 #include <net/checksum.h> 55 #include <linux/mroute6.h> 56 #include <net/l3mdev.h> 57 #include <net/lwtunnel.h> 58 #include <net/ip_tunnels.h> 59 60 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 61 { 62 struct dst_entry *dst = skb_dst(skb); 63 struct net_device *dev = dst->dev; 64 struct inet6_dev *idev = ip6_dst_idev(dst); 65 unsigned int hh_len = LL_RESERVED_SPACE(dev); 66 const struct in6_addr *daddr, *nexthop; 67 struct ipv6hdr *hdr; 68 struct neighbour *neigh; 69 int ret; 70 71 /* Be paranoid, rather than too clever. */ 72 if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { 73 skb = skb_expand_head(skb, hh_len); 74 if (!skb) { 75 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 76 return -ENOMEM; 77 } 78 } 79 80 hdr = ipv6_hdr(skb); 81 daddr = &hdr->daddr; 82 if (ipv6_addr_is_multicast(daddr)) { 83 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && 84 ((mroute6_is_socket(net, skb) && 85 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 86 ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) { 87 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 88 89 /* Do not check for IFF_ALLMULTI; multicast routing 90 is not supported in any case. 91 */ 92 if (newskb) 93 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 94 net, sk, newskb, NULL, newskb->dev, 95 dev_loopback_xmit); 96 97 if (hdr->hop_limit == 0) { 98 IP6_INC_STATS(net, idev, 99 IPSTATS_MIB_OUTDISCARDS); 100 kfree_skb(skb); 101 return 0; 102 } 103 } 104 105 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); 106 if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL && 107 !(dev->flags & IFF_LOOPBACK)) { 108 kfree_skb(skb); 109 return 0; 110 } 111 } 112 113 if (lwtunnel_xmit_redirect(dst->lwtstate)) { 114 int res = lwtunnel_xmit(skb); 115 116 if (res != LWTUNNEL_XMIT_CONTINUE) 117 return res; 118 } 119 120 rcu_read_lock(); 121 nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); 122 neigh = __ipv6_neigh_lookup_noref(dev, nexthop); 123 124 if (unlikely(IS_ERR_OR_NULL(neigh))) { 125 if (unlikely(!neigh)) 126 neigh = __neigh_create(&nd_tbl, nexthop, dev, false); 127 if (IS_ERR(neigh)) { 128 rcu_read_unlock(); 129 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); 130 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); 131 return -EINVAL; 132 } 133 } 134 sock_confirm_neigh(skb, neigh); 135 ret = neigh_output(neigh, skb, false); 136 rcu_read_unlock(); 137 return ret; 138 } 139 140 static int 141 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, 142 struct sk_buff *skb, unsigned int mtu) 143 { 144 struct sk_buff *segs, *nskb; 145 netdev_features_t features; 146 int ret = 0; 147 148 /* Please see corresponding comment in ip_finish_output_gso 149 * describing the cases where GSO segment length exceeds the 150 * egress MTU. 151 */ 152 features = netif_skb_features(skb); 153 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 154 if (IS_ERR_OR_NULL(segs)) { 155 kfree_skb(skb); 156 return -ENOMEM; 157 } 158 159 consume_skb(skb); 160 161 skb_list_walk_safe(segs, segs, nskb) { 162 int err; 163 164 skb_mark_not_on_list(segs); 165 err = ip6_fragment(net, sk, segs, ip6_finish_output2); 166 if (err && ret == 0) 167 ret = err; 168 } 169 170 return ret; 171 } 172 173 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 174 { 175 unsigned int mtu; 176 177 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 178 /* Policy lookup after SNAT yielded a new policy */ 179 if (skb_dst(skb)->xfrm) { 180 IP6CB(skb)->flags |= IP6SKB_REROUTED; 181 return dst_output(net, sk, skb); 182 } 183 #endif 184 185 mtu = ip6_skb_dst_mtu(skb); 186 if (skb_is_gso(skb) && 187 !(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) && 188 !skb_gso_validate_network_len(skb, mtu)) 189 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); 190 191 if ((skb->len > mtu && !skb_is_gso(skb)) || 192 dst_allfrag(skb_dst(skb)) || 193 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) 194 return ip6_fragment(net, sk, skb, ip6_finish_output2); 195 else 196 return ip6_finish_output2(net, sk, skb); 197 } 198 199 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 200 { 201 int ret; 202 203 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 204 switch (ret) { 205 case NET_XMIT_SUCCESS: 206 case NET_XMIT_CN: 207 return __ip6_finish_output(net, sk, skb) ? : ret; 208 default: 209 kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS); 210 return ret; 211 } 212 } 213 214 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 215 { 216 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; 217 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 218 219 skb->protocol = htons(ETH_P_IPV6); 220 skb->dev = dev; 221 222 if (unlikely(idev->cnf.disable_ipv6)) { 223 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 224 kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); 225 return 0; 226 } 227 228 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, 229 net, sk, skb, indev, dev, 230 ip6_finish_output, 231 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 232 } 233 EXPORT_SYMBOL(ip6_output); 234 235 bool ip6_autoflowlabel(struct net *net, const struct sock *sk) 236 { 237 if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk)) 238 return ip6_default_np_autolabel(net); 239 return inet6_test_bit(AUTOFLOWLABEL, sk); 240 } 241 242 /* 243 * xmit an sk_buff (used by TCP, SCTP and DCCP) 244 * Note : socket lock is not held for SYNACK packets, but might be modified 245 * by calls to skb_set_owner_w() and ipv6_local_error(), 246 * which are using proper atomic operations or spinlocks. 247 */ 248 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 249 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority) 250 { 251 struct net *net = sock_net(sk); 252 const struct ipv6_pinfo *np = inet6_sk(sk); 253 struct in6_addr *first_hop = &fl6->daddr; 254 struct dst_entry *dst = skb_dst(skb); 255 struct net_device *dev = dst->dev; 256 struct inet6_dev *idev = ip6_dst_idev(dst); 257 struct hop_jumbo_hdr *hop_jumbo; 258 int hoplen = sizeof(*hop_jumbo); 259 unsigned int head_room; 260 struct ipv6hdr *hdr; 261 u8 proto = fl6->flowi6_proto; 262 int seg_len = skb->len; 263 int hlimit = -1; 264 u32 mtu; 265 266 head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev); 267 if (opt) 268 head_room += opt->opt_nflen + opt->opt_flen; 269 270 if (unlikely(head_room > skb_headroom(skb))) { 271 skb = skb_expand_head(skb, head_room); 272 if (!skb) { 273 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 274 return -ENOBUFS; 275 } 276 } 277 278 if (opt) { 279 seg_len += opt->opt_nflen + opt->opt_flen; 280 281 if (opt->opt_flen) 282 ipv6_push_frag_opts(skb, opt, &proto); 283 284 if (opt->opt_nflen) 285 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, 286 &fl6->saddr); 287 } 288 289 if (unlikely(seg_len > IPV6_MAXPLEN)) { 290 hop_jumbo = skb_push(skb, hoplen); 291 292 hop_jumbo->nexthdr = proto; 293 hop_jumbo->hdrlen = 0; 294 hop_jumbo->tlv_type = IPV6_TLV_JUMBO; 295 hop_jumbo->tlv_len = 4; 296 hop_jumbo->jumbo_payload_len = htonl(seg_len + hoplen); 297 298 proto = IPPROTO_HOPOPTS; 299 seg_len = 0; 300 IP6CB(skb)->flags |= IP6SKB_FAKEJUMBO; 301 } 302 303 skb_push(skb, sizeof(struct ipv6hdr)); 304 skb_reset_network_header(skb); 305 hdr = ipv6_hdr(skb); 306 307 /* 308 * Fill in the IPv6 header 309 */ 310 if (np) 311 hlimit = READ_ONCE(np->hop_limit); 312 if (hlimit < 0) 313 hlimit = ip6_dst_hoplimit(dst); 314 315 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 316 ip6_autoflowlabel(net, sk), fl6)); 317 318 hdr->payload_len = htons(seg_len); 319 hdr->nexthdr = proto; 320 hdr->hop_limit = hlimit; 321 322 hdr->saddr = fl6->saddr; 323 hdr->daddr = *first_hop; 324 325 skb->protocol = htons(ETH_P_IPV6); 326 skb->priority = priority; 327 skb->mark = mark; 328 329 mtu = dst_mtu(dst); 330 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { 331 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); 332 333 /* if egress device is enslaved to an L3 master device pass the 334 * skb to its handler for processing 335 */ 336 skb = l3mdev_ip6_out((struct sock *)sk, skb); 337 if (unlikely(!skb)) 338 return 0; 339 340 /* hooks should never assume socket lock is held. 341 * we promote our socket to non const 342 */ 343 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, 344 net, (struct sock *)sk, skb, NULL, dev, 345 dst_output); 346 } 347 348 skb->dev = dev; 349 /* ipv6_local_error() does not require socket lock, 350 * we promote our socket to non const 351 */ 352 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); 353 354 IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS); 355 kfree_skb(skb); 356 return -EMSGSIZE; 357 } 358 EXPORT_SYMBOL(ip6_xmit); 359 360 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 361 { 362 struct ip6_ra_chain *ra; 363 struct sock *last = NULL; 364 365 read_lock(&ip6_ra_lock); 366 for (ra = ip6_ra_chain; ra; ra = ra->next) { 367 struct sock *sk = ra->sk; 368 if (sk && ra->sel == sel && 369 (!sk->sk_bound_dev_if || 370 sk->sk_bound_dev_if == skb->dev->ifindex)) { 371 372 if (inet6_test_bit(RTALERT_ISOLATE, sk) && 373 !net_eq(sock_net(sk), dev_net(skb->dev))) { 374 continue; 375 } 376 if (last) { 377 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 378 if (skb2) 379 rawv6_rcv(last, skb2); 380 } 381 last = sk; 382 } 383 } 384 385 if (last) { 386 rawv6_rcv(last, skb); 387 read_unlock(&ip6_ra_lock); 388 return 1; 389 } 390 read_unlock(&ip6_ra_lock); 391 return 0; 392 } 393 394 static int ip6_forward_proxy_check(struct sk_buff *skb) 395 { 396 struct ipv6hdr *hdr = ipv6_hdr(skb); 397 u8 nexthdr = hdr->nexthdr; 398 __be16 frag_off; 399 int offset; 400 401 if (ipv6_ext_hdr(nexthdr)) { 402 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 403 if (offset < 0) 404 return 0; 405 } else 406 offset = sizeof(struct ipv6hdr); 407 408 if (nexthdr == IPPROTO_ICMPV6) { 409 struct icmp6hdr *icmp6; 410 411 if (!pskb_may_pull(skb, (skb_network_header(skb) + 412 offset + 1 - skb->data))) 413 return 0; 414 415 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 416 417 switch (icmp6->icmp6_type) { 418 case NDISC_ROUTER_SOLICITATION: 419 case NDISC_ROUTER_ADVERTISEMENT: 420 case NDISC_NEIGHBOUR_SOLICITATION: 421 case NDISC_NEIGHBOUR_ADVERTISEMENT: 422 case NDISC_REDIRECT: 423 /* For reaction involving unicast neighbor discovery 424 * message destined to the proxied address, pass it to 425 * input function. 426 */ 427 return 1; 428 default: 429 break; 430 } 431 } 432 433 /* 434 * The proxying router can't forward traffic sent to a link-local 435 * address, so signal the sender and discard the packet. This 436 * behavior is clarified by the MIPv6 specification. 437 */ 438 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 439 dst_link_failure(skb); 440 return -1; 441 } 442 443 return 0; 444 } 445 446 static inline int ip6_forward_finish(struct net *net, struct sock *sk, 447 struct sk_buff *skb) 448 { 449 #ifdef CONFIG_NET_SWITCHDEV 450 if (skb->offload_l3_fwd_mark) { 451 consume_skb(skb); 452 return 0; 453 } 454 #endif 455 456 skb_clear_tstamp(skb); 457 return dst_output(net, sk, skb); 458 } 459 460 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 461 { 462 if (skb->len <= mtu) 463 return false; 464 465 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ 466 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) 467 return true; 468 469 if (skb->ignore_df) 470 return false; 471 472 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 473 return false; 474 475 return true; 476 } 477 478 int ip6_forward(struct sk_buff *skb) 479 { 480 struct dst_entry *dst = skb_dst(skb); 481 struct ipv6hdr *hdr = ipv6_hdr(skb); 482 struct inet6_skb_parm *opt = IP6CB(skb); 483 struct net *net = dev_net(dst->dev); 484 struct inet6_dev *idev; 485 SKB_DR(reason); 486 u32 mtu; 487 488 idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); 489 if (net->ipv6.devconf_all->forwarding == 0) 490 goto error; 491 492 if (skb->pkt_type != PACKET_HOST) 493 goto drop; 494 495 if (unlikely(skb->sk)) 496 goto drop; 497 498 if (skb_warn_if_lro(skb)) 499 goto drop; 500 501 if (!net->ipv6.devconf_all->disable_policy && 502 (!idev || !idev->cnf.disable_policy) && 503 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 504 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 505 goto drop; 506 } 507 508 skb_forward_csum(skb); 509 510 /* 511 * We DO NOT make any processing on 512 * RA packets, pushing them to user level AS IS 513 * without ane WARRANTY that application will be able 514 * to interpret them. The reason is that we 515 * cannot make anything clever here. 516 * 517 * We are not end-node, so that if packet contains 518 * AH/ESP, we cannot make anything. 519 * Defragmentation also would be mistake, RA packets 520 * cannot be fragmented, because there is no warranty 521 * that different fragments will go along one path. --ANK 522 */ 523 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { 524 if (ip6_call_ra_chain(skb, ntohs(opt->ra))) 525 return 0; 526 } 527 528 /* 529 * check and decrement ttl 530 */ 531 if (hdr->hop_limit <= 1) { 532 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 533 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); 534 535 kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); 536 return -ETIMEDOUT; 537 } 538 539 /* XXX: idev->cnf.proxy_ndp? */ 540 if (net->ipv6.devconf_all->proxy_ndp && 541 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 542 int proxied = ip6_forward_proxy_check(skb); 543 if (proxied > 0) { 544 /* It's tempting to decrease the hop limit 545 * here by 1, as we do at the end of the 546 * function too. 547 * 548 * But that would be incorrect, as proxying is 549 * not forwarding. The ip6_input function 550 * will handle this packet locally, and it 551 * depends on the hop limit being unchanged. 552 * 553 * One example is the NDP hop limit, that 554 * always has to stay 255, but other would be 555 * similar checks around RA packets, where the 556 * user can even change the desired limit. 557 */ 558 return ip6_input(skb); 559 } else if (proxied < 0) { 560 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 561 goto drop; 562 } 563 } 564 565 if (!xfrm6_route_forward(skb)) { 566 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 567 SKB_DR_SET(reason, XFRM_POLICY); 568 goto drop; 569 } 570 dst = skb_dst(skb); 571 572 /* IPv6 specs say nothing about it, but it is clear that we cannot 573 send redirects to source routed frames. 574 We don't send redirects to frames decapsulated from IPsec. 575 */ 576 if (IP6CB(skb)->iif == dst->dev->ifindex && 577 opt->srcrt == 0 && !skb_sec_path(skb)) { 578 struct in6_addr *target = NULL; 579 struct inet_peer *peer; 580 struct rt6_info *rt; 581 582 /* 583 * incoming and outgoing devices are the same 584 * send a redirect. 585 */ 586 587 rt = (struct rt6_info *) dst; 588 if (rt->rt6i_flags & RTF_GATEWAY) 589 target = &rt->rt6i_gateway; 590 else 591 target = &hdr->daddr; 592 593 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1); 594 595 /* Limit redirects both by destination (here) 596 and by source (inside ndisc_send_redirect) 597 */ 598 if (inet_peer_xrlim_allow(peer, 1*HZ)) 599 ndisc_send_redirect(skb, target); 600 if (peer) 601 inet_putpeer(peer); 602 } else { 603 int addrtype = ipv6_addr_type(&hdr->saddr); 604 605 /* This check is security critical. */ 606 if (addrtype == IPV6_ADDR_ANY || 607 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 608 goto error; 609 if (addrtype & IPV6_ADDR_LINKLOCAL) { 610 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 611 ICMPV6_NOT_NEIGHBOUR, 0); 612 goto error; 613 } 614 } 615 616 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 617 618 mtu = ip6_dst_mtu_maybe_forward(dst, true); 619 if (mtu < IPV6_MIN_MTU) 620 mtu = IPV6_MIN_MTU; 621 622 if (ip6_pkt_too_big(skb, mtu)) { 623 /* Again, force OUTPUT device used as source address */ 624 skb->dev = dst->dev; 625 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 626 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); 627 __IP6_INC_STATS(net, ip6_dst_idev(dst), 628 IPSTATS_MIB_FRAGFAILS); 629 kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); 630 return -EMSGSIZE; 631 } 632 633 if (skb_cow(skb, dst->dev->hard_header_len)) { 634 __IP6_INC_STATS(net, ip6_dst_idev(dst), 635 IPSTATS_MIB_OUTDISCARDS); 636 goto drop; 637 } 638 639 hdr = ipv6_hdr(skb); 640 641 /* Mangling hops number delayed to point after skb COW */ 642 643 hdr->hop_limit--; 644 645 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 646 net, NULL, skb, skb->dev, dst->dev, 647 ip6_forward_finish); 648 649 error: 650 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); 651 SKB_DR_SET(reason, IP_INADDRERRORS); 652 drop: 653 kfree_skb_reason(skb, reason); 654 return -EINVAL; 655 } 656 657 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 658 { 659 to->pkt_type = from->pkt_type; 660 to->priority = from->priority; 661 to->protocol = from->protocol; 662 skb_dst_drop(to); 663 skb_dst_set(to, dst_clone(skb_dst(from))); 664 to->dev = from->dev; 665 to->mark = from->mark; 666 667 skb_copy_hash(to, from); 668 669 #ifdef CONFIG_NET_SCHED 670 to->tc_index = from->tc_index; 671 #endif 672 nf_copy(to, from); 673 skb_ext_copy(to, from); 674 skb_copy_secmark(to, from); 675 } 676 677 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr, 678 u8 nexthdr, __be32 frag_id, 679 struct ip6_fraglist_iter *iter) 680 { 681 unsigned int first_len; 682 struct frag_hdr *fh; 683 684 /* BUILD HEADER */ 685 *prevhdr = NEXTHDR_FRAGMENT; 686 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 687 if (!iter->tmp_hdr) 688 return -ENOMEM; 689 690 iter->frag = skb_shinfo(skb)->frag_list; 691 skb_frag_list_init(skb); 692 693 iter->offset = 0; 694 iter->hlen = hlen; 695 iter->frag_id = frag_id; 696 iter->nexthdr = nexthdr; 697 698 __skb_pull(skb, hlen); 699 fh = __skb_push(skb, sizeof(struct frag_hdr)); 700 __skb_push(skb, hlen); 701 skb_reset_network_header(skb); 702 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen); 703 704 fh->nexthdr = nexthdr; 705 fh->reserved = 0; 706 fh->frag_off = htons(IP6_MF); 707 fh->identification = frag_id; 708 709 first_len = skb_pagelen(skb); 710 skb->data_len = first_len - skb_headlen(skb); 711 skb->len = first_len; 712 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); 713 714 return 0; 715 } 716 EXPORT_SYMBOL(ip6_fraglist_init); 717 718 void ip6_fraglist_prepare(struct sk_buff *skb, 719 struct ip6_fraglist_iter *iter) 720 { 721 struct sk_buff *frag = iter->frag; 722 unsigned int hlen = iter->hlen; 723 struct frag_hdr *fh; 724 725 frag->ip_summed = CHECKSUM_NONE; 726 skb_reset_transport_header(frag); 727 fh = __skb_push(frag, sizeof(struct frag_hdr)); 728 __skb_push(frag, hlen); 729 skb_reset_network_header(frag); 730 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen); 731 iter->offset += skb->len - hlen - sizeof(struct frag_hdr); 732 fh->nexthdr = iter->nexthdr; 733 fh->reserved = 0; 734 fh->frag_off = htons(iter->offset); 735 if (frag->next) 736 fh->frag_off |= htons(IP6_MF); 737 fh->identification = iter->frag_id; 738 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 739 ip6_copy_metadata(frag, skb); 740 } 741 EXPORT_SYMBOL(ip6_fraglist_prepare); 742 743 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu, 744 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr, 745 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state) 746 { 747 state->prevhdr = prevhdr; 748 state->nexthdr = nexthdr; 749 state->frag_id = frag_id; 750 751 state->hlen = hlen; 752 state->mtu = mtu; 753 754 state->left = skb->len - hlen; /* Space per frame */ 755 state->ptr = hlen; /* Where to start from */ 756 757 state->hroom = hdr_room; 758 state->troom = needed_tailroom; 759 760 state->offset = 0; 761 } 762 EXPORT_SYMBOL(ip6_frag_init); 763 764 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state) 765 { 766 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset; 767 struct sk_buff *frag; 768 struct frag_hdr *fh; 769 unsigned int len; 770 771 len = state->left; 772 /* IF: it doesn't fit, use 'mtu' - the data space left */ 773 if (len > state->mtu) 774 len = state->mtu; 775 /* IF: we are not sending up to and including the packet end 776 then align the next start on an eight byte boundary */ 777 if (len < state->left) 778 len &= ~7; 779 780 /* Allocate buffer */ 781 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) + 782 state->hroom + state->troom, GFP_ATOMIC); 783 if (!frag) 784 return ERR_PTR(-ENOMEM); 785 786 /* 787 * Set up data on packet 788 */ 789 790 ip6_copy_metadata(frag, skb); 791 skb_reserve(frag, state->hroom); 792 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr)); 793 skb_reset_network_header(frag); 794 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen); 795 frag->transport_header = (frag->network_header + state->hlen + 796 sizeof(struct frag_hdr)); 797 798 /* 799 * Charge the memory for the fragment to any owner 800 * it might possess 801 */ 802 if (skb->sk) 803 skb_set_owner_w(frag, skb->sk); 804 805 /* 806 * Copy the packet header into the new buffer. 807 */ 808 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen); 809 810 fragnexthdr_offset = skb_network_header(frag); 811 fragnexthdr_offset += prevhdr - skb_network_header(skb); 812 *fragnexthdr_offset = NEXTHDR_FRAGMENT; 813 814 /* 815 * Build fragment header. 816 */ 817 fh->nexthdr = state->nexthdr; 818 fh->reserved = 0; 819 fh->identification = state->frag_id; 820 821 /* 822 * Copy a block of the IP datagram. 823 */ 824 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag), 825 len)); 826 state->left -= len; 827 828 fh->frag_off = htons(state->offset); 829 if (state->left > 0) 830 fh->frag_off |= htons(IP6_MF); 831 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 832 833 state->ptr += len; 834 state->offset += len; 835 836 return frag; 837 } 838 EXPORT_SYMBOL(ip6_frag_next); 839 840 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 841 int (*output)(struct net *, struct sock *, struct sk_buff *)) 842 { 843 struct sk_buff *frag; 844 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 845 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? 846 inet6_sk(skb->sk) : NULL; 847 bool mono_delivery_time = skb->mono_delivery_time; 848 struct ip6_frag_state state; 849 unsigned int mtu, hlen, nexthdr_offset; 850 ktime_t tstamp = skb->tstamp; 851 int hroom, err = 0; 852 __be32 frag_id; 853 u8 *prevhdr, nexthdr = 0; 854 855 err = ip6_find_1stfragopt(skb, &prevhdr); 856 if (err < 0) 857 goto fail; 858 hlen = err; 859 nexthdr = *prevhdr; 860 nexthdr_offset = prevhdr - skb_network_header(skb); 861 862 mtu = ip6_skb_dst_mtu(skb); 863 864 /* We must not fragment if the socket is set to force MTU discovery 865 * or if the skb it not generated by a local socket. 866 */ 867 if (unlikely(!skb->ignore_df && skb->len > mtu)) 868 goto fail_toobig; 869 870 if (IP6CB(skb)->frag_max_size) { 871 if (IP6CB(skb)->frag_max_size > mtu) 872 goto fail_toobig; 873 874 /* don't send fragments larger than what we received */ 875 mtu = IP6CB(skb)->frag_max_size; 876 if (mtu < IPV6_MIN_MTU) 877 mtu = IPV6_MIN_MTU; 878 } 879 880 if (np) { 881 u32 frag_size = READ_ONCE(np->frag_size); 882 883 if (frag_size && frag_size < mtu) 884 mtu = frag_size; 885 } 886 if (mtu < hlen + sizeof(struct frag_hdr) + 8) 887 goto fail_toobig; 888 mtu -= hlen + sizeof(struct frag_hdr); 889 890 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, 891 &ipv6_hdr(skb)->saddr); 892 893 if (skb->ip_summed == CHECKSUM_PARTIAL && 894 (err = skb_checksum_help(skb))) 895 goto fail; 896 897 prevhdr = skb_network_header(skb) + nexthdr_offset; 898 hroom = LL_RESERVED_SPACE(rt->dst.dev); 899 if (skb_has_frag_list(skb)) { 900 unsigned int first_len = skb_pagelen(skb); 901 struct ip6_fraglist_iter iter; 902 struct sk_buff *frag2; 903 904 if (first_len - hlen > mtu || 905 ((first_len - hlen) & 7) || 906 skb_cloned(skb) || 907 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) 908 goto slow_path; 909 910 skb_walk_frags(skb, frag) { 911 /* Correct geometry. */ 912 if (frag->len > mtu || 913 ((frag->len & 7) && frag->next) || 914 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr))) 915 goto slow_path_clean; 916 917 /* Partially cloned skb? */ 918 if (skb_shared(frag)) 919 goto slow_path_clean; 920 921 BUG_ON(frag->sk); 922 if (skb->sk) { 923 frag->sk = skb->sk; 924 frag->destructor = sock_wfree; 925 } 926 skb->truesize -= frag->truesize; 927 } 928 929 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id, 930 &iter); 931 if (err < 0) 932 goto fail; 933 934 /* We prevent @rt from being freed. */ 935 rcu_read_lock(); 936 937 for (;;) { 938 /* Prepare header of the next frame, 939 * before previous one went down. */ 940 if (iter.frag) 941 ip6_fraglist_prepare(skb, &iter); 942 943 skb_set_delivery_time(skb, tstamp, mono_delivery_time); 944 err = output(net, sk, skb); 945 if (!err) 946 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 947 IPSTATS_MIB_FRAGCREATES); 948 949 if (err || !iter.frag) 950 break; 951 952 skb = ip6_fraglist_next(&iter); 953 } 954 955 kfree(iter.tmp_hdr); 956 957 if (err == 0) { 958 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 959 IPSTATS_MIB_FRAGOKS); 960 rcu_read_unlock(); 961 return 0; 962 } 963 964 kfree_skb_list(iter.frag); 965 966 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 967 IPSTATS_MIB_FRAGFAILS); 968 rcu_read_unlock(); 969 return err; 970 971 slow_path_clean: 972 skb_walk_frags(skb, frag2) { 973 if (frag2 == frag) 974 break; 975 frag2->sk = NULL; 976 frag2->destructor = NULL; 977 skb->truesize += frag2->truesize; 978 } 979 } 980 981 slow_path: 982 /* 983 * Fragment the datagram. 984 */ 985 986 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom, 987 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id, 988 &state); 989 990 /* 991 * Keep copying data until we run out. 992 */ 993 994 while (state.left > 0) { 995 frag = ip6_frag_next(skb, &state); 996 if (IS_ERR(frag)) { 997 err = PTR_ERR(frag); 998 goto fail; 999 } 1000 1001 /* 1002 * Put this fragment into the sending queue. 1003 */ 1004 skb_set_delivery_time(frag, tstamp, mono_delivery_time); 1005 err = output(net, sk, frag); 1006 if (err) 1007 goto fail; 1008 1009 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 1010 IPSTATS_MIB_FRAGCREATES); 1011 } 1012 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 1013 IPSTATS_MIB_FRAGOKS); 1014 consume_skb(skb); 1015 return err; 1016 1017 fail_toobig: 1018 if (skb->sk && dst_allfrag(skb_dst(skb))) 1019 sk_gso_disable(skb->sk); 1020 1021 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1022 err = -EMSGSIZE; 1023 1024 fail: 1025 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 1026 IPSTATS_MIB_FRAGFAILS); 1027 kfree_skb(skb); 1028 return err; 1029 } 1030 1031 static inline int ip6_rt_check(const struct rt6key *rt_key, 1032 const struct in6_addr *fl_addr, 1033 const struct in6_addr *addr_cache) 1034 { 1035 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 1036 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); 1037 } 1038 1039 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 1040 struct dst_entry *dst, 1041 const struct flowi6 *fl6) 1042 { 1043 struct ipv6_pinfo *np = inet6_sk(sk); 1044 struct rt6_info *rt; 1045 1046 if (!dst) 1047 goto out; 1048 1049 if (dst->ops->family != AF_INET6) { 1050 dst_release(dst); 1051 return NULL; 1052 } 1053 1054 rt = (struct rt6_info *)dst; 1055 /* Yes, checking route validity in not connected 1056 * case is not very simple. Take into account, 1057 * that we do not support routing by source, TOS, 1058 * and MSG_DONTROUTE --ANK (980726) 1059 * 1060 * 1. ip6_rt_check(): If route was host route, 1061 * check that cached destination is current. 1062 * If it is network route, we still may 1063 * check its validity using saved pointer 1064 * to the last used address: daddr_cache. 1065 * We do not want to save whole address now, 1066 * (because main consumer of this service 1067 * is tcp, which has not this problem), 1068 * so that the last trick works only on connected 1069 * sockets. 1070 * 2. oif also should be the same. 1071 */ 1072 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 1073 #ifdef CONFIG_IPV6_SUBTREES 1074 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 1075 #endif 1076 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { 1077 dst_release(dst); 1078 dst = NULL; 1079 } 1080 1081 out: 1082 return dst; 1083 } 1084 1085 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, 1086 struct dst_entry **dst, struct flowi6 *fl6) 1087 { 1088 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1089 struct neighbour *n; 1090 struct rt6_info *rt; 1091 #endif 1092 int err; 1093 int flags = 0; 1094 1095 /* The correct way to handle this would be to do 1096 * ip6_route_get_saddr, and then ip6_route_output; however, 1097 * the route-specific preferred source forces the 1098 * ip6_route_output call _before_ ip6_route_get_saddr. 1099 * 1100 * In source specific routing (no src=any default route), 1101 * ip6_route_output will fail given src=any saddr, though, so 1102 * that's why we try it again later. 1103 */ 1104 if (ipv6_addr_any(&fl6->saddr)) { 1105 struct fib6_info *from; 1106 struct rt6_info *rt; 1107 1108 *dst = ip6_route_output(net, sk, fl6); 1109 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; 1110 1111 rcu_read_lock(); 1112 from = rt ? rcu_dereference(rt->from) : NULL; 1113 err = ip6_route_get_saddr(net, from, &fl6->daddr, 1114 sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, 1115 &fl6->saddr); 1116 rcu_read_unlock(); 1117 1118 if (err) 1119 goto out_err_release; 1120 1121 /* If we had an erroneous initial result, pretend it 1122 * never existed and let the SA-enabled version take 1123 * over. 1124 */ 1125 if ((*dst)->error) { 1126 dst_release(*dst); 1127 *dst = NULL; 1128 } 1129 1130 if (fl6->flowi6_oif) 1131 flags |= RT6_LOOKUP_F_IFACE; 1132 } 1133 1134 if (!*dst) 1135 *dst = ip6_route_output_flags(net, sk, fl6, flags); 1136 1137 err = (*dst)->error; 1138 if (err) 1139 goto out_err_release; 1140 1141 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1142 /* 1143 * Here if the dst entry we've looked up 1144 * has a neighbour entry that is in the INCOMPLETE 1145 * state and the src address from the flow is 1146 * marked as OPTIMISTIC, we release the found 1147 * dst entry and replace it instead with the 1148 * dst entry of the nexthop router 1149 */ 1150 rt = (struct rt6_info *) *dst; 1151 rcu_read_lock(); 1152 n = __ipv6_neigh_lookup_noref(rt->dst.dev, 1153 rt6_nexthop(rt, &fl6->daddr)); 1154 err = n && !(READ_ONCE(n->nud_state) & NUD_VALID) ? -EINVAL : 0; 1155 rcu_read_unlock(); 1156 1157 if (err) { 1158 struct inet6_ifaddr *ifp; 1159 struct flowi6 fl_gw6; 1160 int redirect; 1161 1162 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 1163 (*dst)->dev, 1); 1164 1165 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 1166 if (ifp) 1167 in6_ifa_put(ifp); 1168 1169 if (redirect) { 1170 /* 1171 * We need to get the dst entry for the 1172 * default router instead 1173 */ 1174 dst_release(*dst); 1175 memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 1176 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 1177 *dst = ip6_route_output(net, sk, &fl_gw6); 1178 err = (*dst)->error; 1179 if (err) 1180 goto out_err_release; 1181 } 1182 } 1183 #endif 1184 if (ipv6_addr_v4mapped(&fl6->saddr) && 1185 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { 1186 err = -EAFNOSUPPORT; 1187 goto out_err_release; 1188 } 1189 1190 return 0; 1191 1192 out_err_release: 1193 dst_release(*dst); 1194 *dst = NULL; 1195 1196 if (err == -ENETUNREACH) 1197 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); 1198 return err; 1199 } 1200 1201 /** 1202 * ip6_dst_lookup - perform route lookup on flow 1203 * @net: Network namespace to perform lookup in 1204 * @sk: socket which provides route info 1205 * @dst: pointer to dst_entry * for result 1206 * @fl6: flow to lookup 1207 * 1208 * This function performs a route lookup on the given flow. 1209 * 1210 * It returns zero on success, or a standard errno code on error. 1211 */ 1212 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, 1213 struct flowi6 *fl6) 1214 { 1215 *dst = NULL; 1216 return ip6_dst_lookup_tail(net, sk, dst, fl6); 1217 } 1218 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1219 1220 /** 1221 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 1222 * @net: Network namespace to perform lookup in 1223 * @sk: socket which provides route info 1224 * @fl6: flow to lookup 1225 * @final_dst: final destination address for ipsec lookup 1226 * 1227 * This function performs a route lookup on the given flow. 1228 * 1229 * It returns a valid dst pointer on success, or a pointer encoded 1230 * error code. 1231 */ 1232 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, 1233 const struct in6_addr *final_dst) 1234 { 1235 struct dst_entry *dst = NULL; 1236 int err; 1237 1238 err = ip6_dst_lookup_tail(net, sk, &dst, fl6); 1239 if (err) 1240 return ERR_PTR(err); 1241 if (final_dst) 1242 fl6->daddr = *final_dst; 1243 1244 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0); 1245 } 1246 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 1247 1248 /** 1249 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 1250 * @sk: socket which provides the dst cache and route info 1251 * @fl6: flow to lookup 1252 * @final_dst: final destination address for ipsec lookup 1253 * @connected: whether @sk is connected or not 1254 * 1255 * This function performs a route lookup on the given flow with the 1256 * possibility of using the cached route in the socket if it is valid. 1257 * It will take the socket dst lock when operating on the dst cache. 1258 * As a result, this function can only be used in process context. 1259 * 1260 * In addition, for a connected socket, cache the dst in the socket 1261 * if the current cache is not valid. 1262 * 1263 * It returns a valid dst pointer on success, or a pointer encoded 1264 * error code. 1265 */ 1266 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1267 const struct in6_addr *final_dst, 1268 bool connected) 1269 { 1270 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1271 1272 dst = ip6_sk_dst_check(sk, dst, fl6); 1273 if (dst) 1274 return dst; 1275 1276 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst); 1277 if (connected && !IS_ERR(dst)) 1278 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6); 1279 1280 return dst; 1281 } 1282 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1283 1284 /** 1285 * ip6_dst_lookup_tunnel - perform route lookup on tunnel 1286 * @skb: Packet for which lookup is done 1287 * @dev: Tunnel device 1288 * @net: Network namespace of tunnel device 1289 * @sock: Socket which provides route info 1290 * @saddr: Memory to store the src ip address 1291 * @info: Tunnel information 1292 * @protocol: IP protocol 1293 * @use_cache: Flag to enable cache usage 1294 * This function performs a route lookup on a tunnel 1295 * 1296 * It returns a valid dst pointer and stores src address to be used in 1297 * tunnel in param saddr on success, else a pointer encoded error code. 1298 */ 1299 1300 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, 1301 struct net_device *dev, 1302 struct net *net, 1303 struct socket *sock, 1304 struct in6_addr *saddr, 1305 const struct ip_tunnel_info *info, 1306 u8 protocol, 1307 bool use_cache) 1308 { 1309 struct dst_entry *dst = NULL; 1310 #ifdef CONFIG_DST_CACHE 1311 struct dst_cache *dst_cache; 1312 #endif 1313 struct flowi6 fl6; 1314 __u8 prio; 1315 1316 #ifdef CONFIG_DST_CACHE 1317 dst_cache = (struct dst_cache *)&info->dst_cache; 1318 if (use_cache) { 1319 dst = dst_cache_get_ip6(dst_cache, saddr); 1320 if (dst) 1321 return dst; 1322 } 1323 #endif 1324 memset(&fl6, 0, sizeof(fl6)); 1325 fl6.flowi6_mark = skb->mark; 1326 fl6.flowi6_proto = protocol; 1327 fl6.daddr = info->key.u.ipv6.dst; 1328 fl6.saddr = info->key.u.ipv6.src; 1329 prio = info->key.tos; 1330 fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label); 1331 1332 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, 1333 NULL); 1334 if (IS_ERR(dst)) { 1335 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); 1336 return ERR_PTR(-ENETUNREACH); 1337 } 1338 if (dst->dev == dev) { /* is this necessary? */ 1339 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); 1340 dst_release(dst); 1341 return ERR_PTR(-ELOOP); 1342 } 1343 #ifdef CONFIG_DST_CACHE 1344 if (use_cache) 1345 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); 1346 #endif 1347 *saddr = fl6.saddr; 1348 return dst; 1349 } 1350 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel); 1351 1352 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1353 gfp_t gfp) 1354 { 1355 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1356 } 1357 1358 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1359 gfp_t gfp) 1360 { 1361 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1362 } 1363 1364 static void ip6_append_data_mtu(unsigned int *mtu, 1365 int *maxfraglen, 1366 unsigned int fragheaderlen, 1367 struct sk_buff *skb, 1368 struct rt6_info *rt, 1369 unsigned int orig_mtu) 1370 { 1371 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { 1372 if (!skb) { 1373 /* first fragment, reserve header_len */ 1374 *mtu = orig_mtu - rt->dst.header_len; 1375 1376 } else { 1377 /* 1378 * this fragment is not first, the headers 1379 * space is regarded as data space. 1380 */ 1381 *mtu = orig_mtu; 1382 } 1383 *maxfraglen = ((*mtu - fragheaderlen) & ~7) 1384 + fragheaderlen - sizeof(struct frag_hdr); 1385 } 1386 } 1387 1388 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, 1389 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, 1390 struct rt6_info *rt) 1391 { 1392 struct ipv6_pinfo *np = inet6_sk(sk); 1393 unsigned int mtu, frag_size; 1394 struct ipv6_txoptions *nopt, *opt = ipc6->opt; 1395 1396 /* callers pass dst together with a reference, set it first so 1397 * ip6_cork_release() can put it down even in case of an error. 1398 */ 1399 cork->base.dst = &rt->dst; 1400 1401 /* 1402 * setup for corking 1403 */ 1404 if (opt) { 1405 if (WARN_ON(v6_cork->opt)) 1406 return -EINVAL; 1407 1408 nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); 1409 if (unlikely(!nopt)) 1410 return -ENOBUFS; 1411 1412 nopt->tot_len = sizeof(*opt); 1413 nopt->opt_flen = opt->opt_flen; 1414 nopt->opt_nflen = opt->opt_nflen; 1415 1416 nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation); 1417 if (opt->dst0opt && !nopt->dst0opt) 1418 return -ENOBUFS; 1419 1420 nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation); 1421 if (opt->dst1opt && !nopt->dst1opt) 1422 return -ENOBUFS; 1423 1424 nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation); 1425 if (opt->hopopt && !nopt->hopopt) 1426 return -ENOBUFS; 1427 1428 nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation); 1429 if (opt->srcrt && !nopt->srcrt) 1430 return -ENOBUFS; 1431 1432 /* need source address above miyazawa*/ 1433 } 1434 v6_cork->hop_limit = ipc6->hlimit; 1435 v6_cork->tclass = ipc6->tclass; 1436 if (rt->dst.flags & DST_XFRM_TUNNEL) 1437 mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? 1438 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); 1439 else 1440 mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? 1441 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); 1442 1443 frag_size = READ_ONCE(np->frag_size); 1444 if (frag_size && frag_size < mtu) 1445 mtu = frag_size; 1446 1447 cork->base.fragsize = mtu; 1448 cork->base.gso_size = ipc6->gso_size; 1449 cork->base.tx_flags = 0; 1450 cork->base.mark = ipc6->sockc.mark; 1451 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); 1452 1453 if (dst_allfrag(xfrm_dst_path(&rt->dst))) 1454 cork->base.flags |= IPCORK_ALLFRAG; 1455 cork->base.length = 0; 1456 1457 cork->base.transmit_time = ipc6->sockc.transmit_time; 1458 1459 return 0; 1460 } 1461 1462 static int __ip6_append_data(struct sock *sk, 1463 struct sk_buff_head *queue, 1464 struct inet_cork_full *cork_full, 1465 struct inet6_cork *v6_cork, 1466 struct page_frag *pfrag, 1467 int getfrag(void *from, char *to, int offset, 1468 int len, int odd, struct sk_buff *skb), 1469 void *from, size_t length, int transhdrlen, 1470 unsigned int flags, struct ipcm6_cookie *ipc6) 1471 { 1472 struct sk_buff *skb, *skb_prev = NULL; 1473 struct inet_cork *cork = &cork_full->base; 1474 struct flowi6 *fl6 = &cork_full->fl.u.ip6; 1475 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; 1476 struct ubuf_info *uarg = NULL; 1477 int exthdrlen = 0; 1478 int dst_exthdrlen = 0; 1479 int hh_len; 1480 int copy; 1481 int err; 1482 int offset = 0; 1483 bool zc = false; 1484 u32 tskey = 0; 1485 struct rt6_info *rt = (struct rt6_info *)cork->dst; 1486 struct ipv6_txoptions *opt = v6_cork->opt; 1487 int csummode = CHECKSUM_NONE; 1488 unsigned int maxnonfragsize, headersize; 1489 unsigned int wmem_alloc_delta = 0; 1490 bool paged, extra_uref = false; 1491 1492 skb = skb_peek_tail(queue); 1493 if (!skb) { 1494 exthdrlen = opt ? opt->opt_flen : 0; 1495 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; 1496 } 1497 1498 paged = !!cork->gso_size; 1499 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; 1500 orig_mtu = mtu; 1501 1502 if (cork->tx_flags & SKBTX_ANY_TSTAMP && 1503 READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) 1504 tskey = atomic_inc_return(&sk->sk_tskey) - 1; 1505 1506 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1507 1508 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1509 (opt ? opt->opt_nflen : 0); 1510 1511 headersize = sizeof(struct ipv6hdr) + 1512 (opt ? opt->opt_flen + opt->opt_nflen : 0) + 1513 (dst_allfrag(&rt->dst) ? 1514 sizeof(struct frag_hdr) : 0) + 1515 rt->rt6i_nfheader_len; 1516 1517 if (mtu <= fragheaderlen || 1518 ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr)) 1519 goto emsgsize; 1520 1521 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - 1522 sizeof(struct frag_hdr); 1523 1524 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit 1525 * the first fragment 1526 */ 1527 if (headersize + transhdrlen > mtu) 1528 goto emsgsize; 1529 1530 if (cork->length + length > mtu - headersize && ipc6->dontfrag && 1531 (sk->sk_protocol == IPPROTO_UDP || 1532 sk->sk_protocol == IPPROTO_ICMPV6 || 1533 sk->sk_protocol == IPPROTO_RAW)) { 1534 ipv6_local_rxpmtu(sk, fl6, mtu - headersize + 1535 sizeof(struct ipv6hdr)); 1536 goto emsgsize; 1537 } 1538 1539 if (ip6_sk_ignore_df(sk)) 1540 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; 1541 else 1542 maxnonfragsize = mtu; 1543 1544 if (cork->length + length > maxnonfragsize - headersize) { 1545 emsgsize: 1546 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0); 1547 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu); 1548 return -EMSGSIZE; 1549 } 1550 1551 /* CHECKSUM_PARTIAL only with no extension headers and when 1552 * we are not going to fragment 1553 */ 1554 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && 1555 headersize == sizeof(struct ipv6hdr) && 1556 length <= mtu - headersize && 1557 (!(flags & MSG_MORE) || cork->gso_size) && 1558 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) 1559 csummode = CHECKSUM_PARTIAL; 1560 1561 if ((flags & MSG_ZEROCOPY) && length) { 1562 struct msghdr *msg = from; 1563 1564 if (getfrag == ip_generic_getfrag && msg->msg_ubuf) { 1565 if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb)) 1566 return -EINVAL; 1567 1568 /* Leave uarg NULL if can't zerocopy, callers should 1569 * be able to handle it. 1570 */ 1571 if ((rt->dst.dev->features & NETIF_F_SG) && 1572 csummode == CHECKSUM_PARTIAL) { 1573 paged = true; 1574 zc = true; 1575 uarg = msg->msg_ubuf; 1576 } 1577 } else if (sock_flag(sk, SOCK_ZEROCOPY)) { 1578 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); 1579 if (!uarg) 1580 return -ENOBUFS; 1581 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ 1582 if (rt->dst.dev->features & NETIF_F_SG && 1583 csummode == CHECKSUM_PARTIAL) { 1584 paged = true; 1585 zc = true; 1586 } else { 1587 uarg_to_msgzc(uarg)->zerocopy = 0; 1588 skb_zcopy_set(skb, uarg, &extra_uref); 1589 } 1590 } 1591 } else if ((flags & MSG_SPLICE_PAGES) && length) { 1592 if (inet_test_bit(HDRINCL, sk)) 1593 return -EPERM; 1594 if (rt->dst.dev->features & NETIF_F_SG && 1595 getfrag == ip_generic_getfrag) 1596 /* We need an empty buffer to attach stuff to */ 1597 paged = true; 1598 else 1599 flags &= ~MSG_SPLICE_PAGES; 1600 } 1601 1602 /* 1603 * Let's try using as much space as possible. 1604 * Use MTU if total length of the message fits into the MTU. 1605 * Otherwise, we need to reserve fragment header and 1606 * fragment alignment (= 8-15 octects, in total). 1607 * 1608 * Note that we may need to "move" the data from the tail 1609 * of the buffer to the new fragment when we split 1610 * the message. 1611 * 1612 * FIXME: It may be fragmented into multiple chunks 1613 * at once if non-fragmentable extension headers 1614 * are too large. 1615 * --yoshfuji 1616 */ 1617 1618 cork->length += length; 1619 if (!skb) 1620 goto alloc_new_skb; 1621 1622 while (length > 0) { 1623 /* Check if the remaining data fits into current packet. */ 1624 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1625 if (copy < length) 1626 copy = maxfraglen - skb->len; 1627 1628 if (copy <= 0) { 1629 char *data; 1630 unsigned int datalen; 1631 unsigned int fraglen; 1632 unsigned int fraggap; 1633 unsigned int alloclen, alloc_extra; 1634 unsigned int pagedlen; 1635 alloc_new_skb: 1636 /* There's no room in the current skb */ 1637 if (skb) 1638 fraggap = skb->len - maxfraglen; 1639 else 1640 fraggap = 0; 1641 /* update mtu and maxfraglen if necessary */ 1642 if (!skb || !skb_prev) 1643 ip6_append_data_mtu(&mtu, &maxfraglen, 1644 fragheaderlen, skb, rt, 1645 orig_mtu); 1646 1647 skb_prev = skb; 1648 1649 /* 1650 * If remaining data exceeds the mtu, 1651 * we know we need more fragment(s). 1652 */ 1653 datalen = length + fraggap; 1654 1655 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1656 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; 1657 fraglen = datalen + fragheaderlen; 1658 pagedlen = 0; 1659 1660 alloc_extra = hh_len; 1661 alloc_extra += dst_exthdrlen; 1662 alloc_extra += rt->dst.trailer_len; 1663 1664 /* We just reserve space for fragment header. 1665 * Note: this may be overallocation if the message 1666 * (without MSG_MORE) fits into the MTU. 1667 */ 1668 alloc_extra += sizeof(struct frag_hdr); 1669 1670 if ((flags & MSG_MORE) && 1671 !(rt->dst.dev->features&NETIF_F_SG)) 1672 alloclen = mtu; 1673 else if (!paged && 1674 (fraglen + alloc_extra < SKB_MAX_ALLOC || 1675 !(rt->dst.dev->features & NETIF_F_SG))) 1676 alloclen = fraglen; 1677 else { 1678 alloclen = fragheaderlen + transhdrlen; 1679 pagedlen = datalen - transhdrlen; 1680 } 1681 alloclen += alloc_extra; 1682 1683 if (datalen != length + fraggap) { 1684 /* 1685 * this is not the last fragment, the trailer 1686 * space is regarded as data space. 1687 */ 1688 datalen += rt->dst.trailer_len; 1689 } 1690 1691 fraglen = datalen + fragheaderlen; 1692 1693 copy = datalen - transhdrlen - fraggap - pagedlen; 1694 /* [!] NOTE: copy may be negative if pagedlen>0 1695 * because then the equation may reduces to -fraggap. 1696 */ 1697 if (copy < 0 && !(flags & MSG_SPLICE_PAGES)) { 1698 err = -EINVAL; 1699 goto error; 1700 } 1701 if (transhdrlen) { 1702 skb = sock_alloc_send_skb(sk, alloclen, 1703 (flags & MSG_DONTWAIT), &err); 1704 } else { 1705 skb = NULL; 1706 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 1707 2 * sk->sk_sndbuf) 1708 skb = alloc_skb(alloclen, 1709 sk->sk_allocation); 1710 if (unlikely(!skb)) 1711 err = -ENOBUFS; 1712 } 1713 if (!skb) 1714 goto error; 1715 /* 1716 * Fill in the control structures 1717 */ 1718 skb->protocol = htons(ETH_P_IPV6); 1719 skb->ip_summed = csummode; 1720 skb->csum = 0; 1721 /* reserve for fragmentation and ipsec header */ 1722 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 1723 dst_exthdrlen); 1724 1725 /* 1726 * Find where to start putting bytes 1727 */ 1728 data = skb_put(skb, fraglen - pagedlen); 1729 skb_set_network_header(skb, exthdrlen); 1730 data += fragheaderlen; 1731 skb->transport_header = (skb->network_header + 1732 fragheaderlen); 1733 if (fraggap) { 1734 skb->csum = skb_copy_and_csum_bits( 1735 skb_prev, maxfraglen, 1736 data + transhdrlen, fraggap); 1737 skb_prev->csum = csum_sub(skb_prev->csum, 1738 skb->csum); 1739 data += fraggap; 1740 pskb_trim_unique(skb_prev, maxfraglen); 1741 } 1742 if (copy > 0 && 1743 getfrag(from, data + transhdrlen, offset, 1744 copy, fraggap, skb) < 0) { 1745 err = -EFAULT; 1746 kfree_skb(skb); 1747 goto error; 1748 } else if (flags & MSG_SPLICE_PAGES) { 1749 copy = 0; 1750 } 1751 1752 offset += copy; 1753 length -= copy + transhdrlen; 1754 transhdrlen = 0; 1755 exthdrlen = 0; 1756 dst_exthdrlen = 0; 1757 1758 /* Only the initial fragment is time stamped */ 1759 skb_shinfo(skb)->tx_flags = cork->tx_flags; 1760 cork->tx_flags = 0; 1761 skb_shinfo(skb)->tskey = tskey; 1762 tskey = 0; 1763 skb_zcopy_set(skb, uarg, &extra_uref); 1764 1765 if ((flags & MSG_CONFIRM) && !skb_prev) 1766 skb_set_dst_pending_confirm(skb, 1); 1767 1768 /* 1769 * Put the packet on the pending queue 1770 */ 1771 if (!skb->destructor) { 1772 skb->destructor = sock_wfree; 1773 skb->sk = sk; 1774 wmem_alloc_delta += skb->truesize; 1775 } 1776 __skb_queue_tail(queue, skb); 1777 continue; 1778 } 1779 1780 if (copy > length) 1781 copy = length; 1782 1783 if (!(rt->dst.dev->features&NETIF_F_SG) && 1784 skb_tailroom(skb) >= copy) { 1785 unsigned int off; 1786 1787 off = skb->len; 1788 if (getfrag(from, skb_put(skb, copy), 1789 offset, copy, off, skb) < 0) { 1790 __skb_trim(skb, off); 1791 err = -EFAULT; 1792 goto error; 1793 } 1794 } else if (flags & MSG_SPLICE_PAGES) { 1795 struct msghdr *msg = from; 1796 1797 err = -EIO; 1798 if (WARN_ON_ONCE(copy > msg->msg_iter.count)) 1799 goto error; 1800 1801 err = skb_splice_from_iter(skb, &msg->msg_iter, copy, 1802 sk->sk_allocation); 1803 if (err < 0) 1804 goto error; 1805 copy = err; 1806 wmem_alloc_delta += copy; 1807 } else if (!zc) { 1808 int i = skb_shinfo(skb)->nr_frags; 1809 1810 err = -ENOMEM; 1811 if (!sk_page_frag_refill(sk, pfrag)) 1812 goto error; 1813 1814 skb_zcopy_downgrade_managed(skb); 1815 if (!skb_can_coalesce(skb, i, pfrag->page, 1816 pfrag->offset)) { 1817 err = -EMSGSIZE; 1818 if (i == MAX_SKB_FRAGS) 1819 goto error; 1820 1821 __skb_fill_page_desc(skb, i, pfrag->page, 1822 pfrag->offset, 0); 1823 skb_shinfo(skb)->nr_frags = ++i; 1824 get_page(pfrag->page); 1825 } 1826 copy = min_t(int, copy, pfrag->size - pfrag->offset); 1827 if (getfrag(from, 1828 page_address(pfrag->page) + pfrag->offset, 1829 offset, copy, skb->len, skb) < 0) 1830 goto error_efault; 1831 1832 pfrag->offset += copy; 1833 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1834 skb->len += copy; 1835 skb->data_len += copy; 1836 skb->truesize += copy; 1837 wmem_alloc_delta += copy; 1838 } else { 1839 err = skb_zerocopy_iter_dgram(skb, from, copy); 1840 if (err < 0) 1841 goto error; 1842 } 1843 offset += copy; 1844 length -= copy; 1845 } 1846 1847 if (wmem_alloc_delta) 1848 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1849 return 0; 1850 1851 error_efault: 1852 err = -EFAULT; 1853 error: 1854 net_zcopy_put_abort(uarg, extra_uref); 1855 cork->length -= length; 1856 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1857 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1858 return err; 1859 } 1860 1861 int ip6_append_data(struct sock *sk, 1862 int getfrag(void *from, char *to, int offset, int len, 1863 int odd, struct sk_buff *skb), 1864 void *from, size_t length, int transhdrlen, 1865 struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 1866 struct rt6_info *rt, unsigned int flags) 1867 { 1868 struct inet_sock *inet = inet_sk(sk); 1869 struct ipv6_pinfo *np = inet6_sk(sk); 1870 int exthdrlen; 1871 int err; 1872 1873 if (flags&MSG_PROBE) 1874 return 0; 1875 if (skb_queue_empty(&sk->sk_write_queue)) { 1876 /* 1877 * setup for corking 1878 */ 1879 dst_hold(&rt->dst); 1880 err = ip6_setup_cork(sk, &inet->cork, &np->cork, 1881 ipc6, rt); 1882 if (err) 1883 return err; 1884 1885 inet->cork.fl.u.ip6 = *fl6; 1886 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 1887 length += exthdrlen; 1888 transhdrlen += exthdrlen; 1889 } else { 1890 transhdrlen = 0; 1891 } 1892 1893 return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork, 1894 &np->cork, sk_page_frag(sk), getfrag, 1895 from, length, transhdrlen, flags, ipc6); 1896 } 1897 EXPORT_SYMBOL_GPL(ip6_append_data); 1898 1899 static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork) 1900 { 1901 struct dst_entry *dst = cork->base.dst; 1902 1903 cork->base.dst = NULL; 1904 cork->base.flags &= ~IPCORK_ALLFRAG; 1905 skb_dst_set(skb, dst); 1906 } 1907 1908 static void ip6_cork_release(struct inet_cork_full *cork, 1909 struct inet6_cork *v6_cork) 1910 { 1911 if (v6_cork->opt) { 1912 struct ipv6_txoptions *opt = v6_cork->opt; 1913 1914 kfree(opt->dst0opt); 1915 kfree(opt->dst1opt); 1916 kfree(opt->hopopt); 1917 kfree(opt->srcrt); 1918 kfree(opt); 1919 v6_cork->opt = NULL; 1920 } 1921 1922 if (cork->base.dst) { 1923 dst_release(cork->base.dst); 1924 cork->base.dst = NULL; 1925 cork->base.flags &= ~IPCORK_ALLFRAG; 1926 } 1927 } 1928 1929 struct sk_buff *__ip6_make_skb(struct sock *sk, 1930 struct sk_buff_head *queue, 1931 struct inet_cork_full *cork, 1932 struct inet6_cork *v6_cork) 1933 { 1934 struct sk_buff *skb, *tmp_skb; 1935 struct sk_buff **tail_skb; 1936 struct in6_addr *final_dst; 1937 struct net *net = sock_net(sk); 1938 struct ipv6hdr *hdr; 1939 struct ipv6_txoptions *opt = v6_cork->opt; 1940 struct rt6_info *rt = (struct rt6_info *)cork->base.dst; 1941 struct flowi6 *fl6 = &cork->fl.u.ip6; 1942 unsigned char proto = fl6->flowi6_proto; 1943 1944 skb = __skb_dequeue(queue); 1945 if (!skb) 1946 goto out; 1947 tail_skb = &(skb_shinfo(skb)->frag_list); 1948 1949 /* move skb->data to ip header from ext header */ 1950 if (skb->data < skb_network_header(skb)) 1951 __skb_pull(skb, skb_network_offset(skb)); 1952 while ((tmp_skb = __skb_dequeue(queue)) != NULL) { 1953 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1954 *tail_skb = tmp_skb; 1955 tail_skb = &(tmp_skb->next); 1956 skb->len += tmp_skb->len; 1957 skb->data_len += tmp_skb->len; 1958 skb->truesize += tmp_skb->truesize; 1959 tmp_skb->destructor = NULL; 1960 tmp_skb->sk = NULL; 1961 } 1962 1963 /* Allow local fragmentation. */ 1964 skb->ignore_df = ip6_sk_ignore_df(sk); 1965 __skb_pull(skb, skb_network_header_len(skb)); 1966 1967 final_dst = &fl6->daddr; 1968 if (opt && opt->opt_flen) 1969 ipv6_push_frag_opts(skb, opt, &proto); 1970 if (opt && opt->opt_nflen) 1971 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); 1972 1973 skb_push(skb, sizeof(struct ipv6hdr)); 1974 skb_reset_network_header(skb); 1975 hdr = ipv6_hdr(skb); 1976 1977 ip6_flow_hdr(hdr, v6_cork->tclass, 1978 ip6_make_flowlabel(net, skb, fl6->flowlabel, 1979 ip6_autoflowlabel(net, sk), fl6)); 1980 hdr->hop_limit = v6_cork->hop_limit; 1981 hdr->nexthdr = proto; 1982 hdr->saddr = fl6->saddr; 1983 hdr->daddr = *final_dst; 1984 1985 skb->priority = READ_ONCE(sk->sk_priority); 1986 skb->mark = cork->base.mark; 1987 skb->tstamp = cork->base.transmit_time; 1988 1989 ip6_cork_steal_dst(skb, cork); 1990 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1991 if (proto == IPPROTO_ICMPV6) { 1992 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1993 u8 icmp6_type; 1994 1995 if (sk->sk_socket->type == SOCK_RAW && 1996 !inet_test_bit(HDRINCL, sk)) 1997 icmp6_type = fl6->fl6_icmp_type; 1998 else 1999 icmp6_type = icmp6_hdr(skb)->icmp6_type; 2000 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type); 2001 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 2002 } 2003 2004 ip6_cork_release(cork, v6_cork); 2005 out: 2006 return skb; 2007 } 2008 2009 int ip6_send_skb(struct sk_buff *skb) 2010 { 2011 struct net *net = sock_net(skb->sk); 2012 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 2013 int err; 2014 2015 err = ip6_local_out(net, skb->sk, skb); 2016 if (err) { 2017 if (err > 0) 2018 err = net_xmit_errno(err); 2019 if (err) 2020 IP6_INC_STATS(net, rt->rt6i_idev, 2021 IPSTATS_MIB_OUTDISCARDS); 2022 } 2023 2024 return err; 2025 } 2026 2027 int ip6_push_pending_frames(struct sock *sk) 2028 { 2029 struct sk_buff *skb; 2030 2031 skb = ip6_finish_skb(sk); 2032 if (!skb) 2033 return 0; 2034 2035 return ip6_send_skb(skb); 2036 } 2037 EXPORT_SYMBOL_GPL(ip6_push_pending_frames); 2038 2039 static void __ip6_flush_pending_frames(struct sock *sk, 2040 struct sk_buff_head *queue, 2041 struct inet_cork_full *cork, 2042 struct inet6_cork *v6_cork) 2043 { 2044 struct sk_buff *skb; 2045 2046 while ((skb = __skb_dequeue_tail(queue)) != NULL) { 2047 if (skb_dst(skb)) 2048 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 2049 IPSTATS_MIB_OUTDISCARDS); 2050 kfree_skb(skb); 2051 } 2052 2053 ip6_cork_release(cork, v6_cork); 2054 } 2055 2056 void ip6_flush_pending_frames(struct sock *sk) 2057 { 2058 __ip6_flush_pending_frames(sk, &sk->sk_write_queue, 2059 &inet_sk(sk)->cork, &inet6_sk(sk)->cork); 2060 } 2061 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); 2062 2063 struct sk_buff *ip6_make_skb(struct sock *sk, 2064 int getfrag(void *from, char *to, int offset, 2065 int len, int odd, struct sk_buff *skb), 2066 void *from, size_t length, int transhdrlen, 2067 struct ipcm6_cookie *ipc6, struct rt6_info *rt, 2068 unsigned int flags, struct inet_cork_full *cork) 2069 { 2070 struct inet6_cork v6_cork; 2071 struct sk_buff_head queue; 2072 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 2073 int err; 2074 2075 if (flags & MSG_PROBE) { 2076 dst_release(&rt->dst); 2077 return NULL; 2078 } 2079 2080 __skb_queue_head_init(&queue); 2081 2082 cork->base.flags = 0; 2083 cork->base.addr = 0; 2084 cork->base.opt = NULL; 2085 v6_cork.opt = NULL; 2086 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt); 2087 if (err) { 2088 ip6_cork_release(cork, &v6_cork); 2089 return ERR_PTR(err); 2090 } 2091 if (ipc6->dontfrag < 0) 2092 ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk); 2093 2094 err = __ip6_append_data(sk, &queue, cork, &v6_cork, 2095 ¤t->task_frag, getfrag, from, 2096 length + exthdrlen, transhdrlen + exthdrlen, 2097 flags, ipc6); 2098 if (err) { 2099 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); 2100 return ERR_PTR(err); 2101 } 2102 2103 return __ip6_make_skb(sk, &queue, cork, &v6_cork); 2104 } 2105