1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IPv6 output functions 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on linux/net/ipv4/ip_output.c 10 * 11 * Changes: 12 * A.N.Kuznetsov : airthmetics in fragmentation. 13 * extension headers are implemented. 14 * route changes now work. 15 * ip6_forward does not confuse sniffers. 16 * etc. 17 * 18 * H. von Brand : Added missing #include <linux/string.h> 19 * Imran Patel : frag id should be in NBO 20 * Kazunori MIYAZAWA @USAGI 21 * : add ip6_append_data and related functions 22 * for datagram xmit 23 */ 24 25 #include <linux/errno.h> 26 #include <linux/kernel.h> 27 #include <linux/string.h> 28 #include <linux/socket.h> 29 #include <linux/net.h> 30 #include <linux/netdevice.h> 31 #include <linux/if_arp.h> 32 #include <linux/in6.h> 33 #include <linux/tcp.h> 34 #include <linux/route.h> 35 #include <linux/module.h> 36 #include <linux/slab.h> 37 38 #include <linux/bpf-cgroup.h> 39 #include <linux/netfilter.h> 40 #include <linux/netfilter_ipv6.h> 41 42 #include <net/sock.h> 43 #include <net/snmp.h> 44 45 #include <net/ipv6.h> 46 #include <net/ndisc.h> 47 #include <net/protocol.h> 48 #include <net/ip6_route.h> 49 #include <net/addrconf.h> 50 #include <net/rawv6.h> 51 #include <net/icmp.h> 52 #include <net/xfrm.h> 53 #include <net/checksum.h> 54 #include <linux/mroute6.h> 55 #include <net/l3mdev.h> 56 #include <net/lwtunnel.h> 57 #include <net/ip_tunnels.h> 58 59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 60 { 61 struct dst_entry *dst = skb_dst(skb); 62 struct net_device *dev = dst->dev; 63 const struct in6_addr *nexthop; 64 struct neighbour *neigh; 65 int ret; 66 67 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 68 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 69 70 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && 71 ((mroute6_is_socket(net, skb) && 72 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 73 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 74 &ipv6_hdr(skb)->saddr))) { 75 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 76 77 /* Do not check for IFF_ALLMULTI; multicast routing 78 is not supported in any case. 79 */ 80 if (newskb) 81 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 82 net, sk, newskb, NULL, newskb->dev, 83 dev_loopback_xmit); 84 85 if (ipv6_hdr(skb)->hop_limit == 0) { 86 IP6_INC_STATS(net, idev, 87 IPSTATS_MIB_OUTDISCARDS); 88 kfree_skb(skb); 89 return 0; 90 } 91 } 92 93 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); 94 95 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= 96 IPV6_ADDR_SCOPE_NODELOCAL && 97 !(dev->flags & IFF_LOOPBACK)) { 98 kfree_skb(skb); 99 return 0; 100 } 101 } 102 103 if (lwtunnel_xmit_redirect(dst->lwtstate)) { 104 int res = lwtunnel_xmit(skb); 105 106 if (res < 0 || res == LWTUNNEL_XMIT_DONE) 107 return res; 108 } 109 110 rcu_read_lock_bh(); 111 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); 112 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); 113 if (unlikely(!neigh)) 114 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); 115 if (!IS_ERR(neigh)) { 116 sock_confirm_neigh(skb, neigh); 117 ret = neigh_output(neigh, skb, false); 118 rcu_read_unlock_bh(); 119 return ret; 120 } 121 rcu_read_unlock_bh(); 122 123 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 124 kfree_skb(skb); 125 return -EINVAL; 126 } 127 128 static int 129 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, 130 struct sk_buff *skb, unsigned int mtu) 131 { 132 struct sk_buff *segs, *nskb; 133 netdev_features_t features; 134 int ret = 0; 135 136 /* Please see corresponding comment in ip_finish_output_gso 137 * describing the cases where GSO segment length exceeds the 138 * egress MTU. 139 */ 140 features = netif_skb_features(skb); 141 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 142 if (IS_ERR_OR_NULL(segs)) { 143 kfree_skb(skb); 144 return -ENOMEM; 145 } 146 147 consume_skb(skb); 148 149 skb_list_walk_safe(segs, segs, nskb) { 150 int err; 151 152 skb_mark_not_on_list(segs); 153 err = ip6_fragment(net, sk, segs, ip6_finish_output2); 154 if (err && ret == 0) 155 ret = err; 156 } 157 158 return ret; 159 } 160 161 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 162 { 163 unsigned int mtu; 164 165 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 166 /* Policy lookup after SNAT yielded a new policy */ 167 if (skb_dst(skb)->xfrm) { 168 IPCB(skb)->flags |= IPSKB_REROUTED; 169 return dst_output(net, sk, skb); 170 } 171 #endif 172 173 mtu = ip6_skb_dst_mtu(skb); 174 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)) 175 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); 176 177 if ((skb->len > mtu && !skb_is_gso(skb)) || 178 dst_allfrag(skb_dst(skb)) || 179 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) 180 return ip6_fragment(net, sk, skb, ip6_finish_output2); 181 else 182 return ip6_finish_output2(net, sk, skb); 183 } 184 185 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 186 { 187 int ret; 188 189 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 190 switch (ret) { 191 case NET_XMIT_SUCCESS: 192 return __ip6_finish_output(net, sk, skb); 193 case NET_XMIT_CN: 194 return __ip6_finish_output(net, sk, skb) ? : ret; 195 default: 196 kfree_skb(skb); 197 return ret; 198 } 199 } 200 201 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 202 { 203 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; 204 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 205 206 skb->protocol = htons(ETH_P_IPV6); 207 skb->dev = dev; 208 209 if (unlikely(idev->cnf.disable_ipv6)) { 210 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 211 kfree_skb(skb); 212 return 0; 213 } 214 215 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, 216 net, sk, skb, indev, dev, 217 ip6_finish_output, 218 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 219 } 220 221 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) 222 { 223 if (!np->autoflowlabel_set) 224 return ip6_default_np_autolabel(net); 225 else 226 return np->autoflowlabel; 227 } 228 229 /* 230 * xmit an sk_buff (used by TCP, SCTP and DCCP) 231 * Note : socket lock is not held for SYNACK packets, but might be modified 232 * by calls to skb_set_owner_w() and ipv6_local_error(), 233 * which are using proper atomic operations or spinlocks. 234 */ 235 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 236 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority) 237 { 238 struct net *net = sock_net(sk); 239 const struct ipv6_pinfo *np = inet6_sk(sk); 240 struct in6_addr *first_hop = &fl6->daddr; 241 struct dst_entry *dst = skb_dst(skb); 242 unsigned int head_room; 243 struct ipv6hdr *hdr; 244 u8 proto = fl6->flowi6_proto; 245 int seg_len = skb->len; 246 int hlimit = -1; 247 u32 mtu; 248 249 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 250 if (opt) 251 head_room += opt->opt_nflen + opt->opt_flen; 252 253 if (unlikely(skb_headroom(skb) < head_room)) { 254 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 255 if (!skb2) { 256 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 257 IPSTATS_MIB_OUTDISCARDS); 258 kfree_skb(skb); 259 return -ENOBUFS; 260 } 261 if (skb->sk) 262 skb_set_owner_w(skb2, skb->sk); 263 consume_skb(skb); 264 skb = skb2; 265 } 266 267 if (opt) { 268 seg_len += opt->opt_nflen + opt->opt_flen; 269 270 if (opt->opt_flen) 271 ipv6_push_frag_opts(skb, opt, &proto); 272 273 if (opt->opt_nflen) 274 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, 275 &fl6->saddr); 276 } 277 278 skb_push(skb, sizeof(struct ipv6hdr)); 279 skb_reset_network_header(skb); 280 hdr = ipv6_hdr(skb); 281 282 /* 283 * Fill in the IPv6 header 284 */ 285 if (np) 286 hlimit = np->hop_limit; 287 if (hlimit < 0) 288 hlimit = ip6_dst_hoplimit(dst); 289 290 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 291 ip6_autoflowlabel(net, np), fl6)); 292 293 hdr->payload_len = htons(seg_len); 294 hdr->nexthdr = proto; 295 hdr->hop_limit = hlimit; 296 297 hdr->saddr = fl6->saddr; 298 hdr->daddr = *first_hop; 299 300 skb->protocol = htons(ETH_P_IPV6); 301 skb->priority = priority; 302 skb->mark = mark; 303 304 mtu = dst_mtu(dst); 305 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { 306 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 307 IPSTATS_MIB_OUT, skb->len); 308 309 /* if egress device is enslaved to an L3 master device pass the 310 * skb to its handler for processing 311 */ 312 skb = l3mdev_ip6_out((struct sock *)sk, skb); 313 if (unlikely(!skb)) 314 return 0; 315 316 /* hooks should never assume socket lock is held. 317 * we promote our socket to non const 318 */ 319 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, 320 net, (struct sock *)sk, skb, NULL, dst->dev, 321 dst_output); 322 } 323 324 skb->dev = dst->dev; 325 /* ipv6_local_error() does not require socket lock, 326 * we promote our socket to non const 327 */ 328 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); 329 330 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 331 kfree_skb(skb); 332 return -EMSGSIZE; 333 } 334 EXPORT_SYMBOL(ip6_xmit); 335 336 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 337 { 338 struct ip6_ra_chain *ra; 339 struct sock *last = NULL; 340 341 read_lock(&ip6_ra_lock); 342 for (ra = ip6_ra_chain; ra; ra = ra->next) { 343 struct sock *sk = ra->sk; 344 if (sk && ra->sel == sel && 345 (!sk->sk_bound_dev_if || 346 sk->sk_bound_dev_if == skb->dev->ifindex)) { 347 struct ipv6_pinfo *np = inet6_sk(sk); 348 349 if (np && np->rtalert_isolate && 350 !net_eq(sock_net(sk), dev_net(skb->dev))) { 351 continue; 352 } 353 if (last) { 354 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 355 if (skb2) 356 rawv6_rcv(last, skb2); 357 } 358 last = sk; 359 } 360 } 361 362 if (last) { 363 rawv6_rcv(last, skb); 364 read_unlock(&ip6_ra_lock); 365 return 1; 366 } 367 read_unlock(&ip6_ra_lock); 368 return 0; 369 } 370 371 static int ip6_forward_proxy_check(struct sk_buff *skb) 372 { 373 struct ipv6hdr *hdr = ipv6_hdr(skb); 374 u8 nexthdr = hdr->nexthdr; 375 __be16 frag_off; 376 int offset; 377 378 if (ipv6_ext_hdr(nexthdr)) { 379 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 380 if (offset < 0) 381 return 0; 382 } else 383 offset = sizeof(struct ipv6hdr); 384 385 if (nexthdr == IPPROTO_ICMPV6) { 386 struct icmp6hdr *icmp6; 387 388 if (!pskb_may_pull(skb, (skb_network_header(skb) + 389 offset + 1 - skb->data))) 390 return 0; 391 392 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 393 394 switch (icmp6->icmp6_type) { 395 case NDISC_ROUTER_SOLICITATION: 396 case NDISC_ROUTER_ADVERTISEMENT: 397 case NDISC_NEIGHBOUR_SOLICITATION: 398 case NDISC_NEIGHBOUR_ADVERTISEMENT: 399 case NDISC_REDIRECT: 400 /* For reaction involving unicast neighbor discovery 401 * message destined to the proxied address, pass it to 402 * input function. 403 */ 404 return 1; 405 default: 406 break; 407 } 408 } 409 410 /* 411 * The proxying router can't forward traffic sent to a link-local 412 * address, so signal the sender and discard the packet. This 413 * behavior is clarified by the MIPv6 specification. 414 */ 415 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 416 dst_link_failure(skb); 417 return -1; 418 } 419 420 return 0; 421 } 422 423 static inline int ip6_forward_finish(struct net *net, struct sock *sk, 424 struct sk_buff *skb) 425 { 426 struct dst_entry *dst = skb_dst(skb); 427 428 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 429 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); 430 431 #ifdef CONFIG_NET_SWITCHDEV 432 if (skb->offload_l3_fwd_mark) { 433 consume_skb(skb); 434 return 0; 435 } 436 #endif 437 438 skb->tstamp = 0; 439 return dst_output(net, sk, skb); 440 } 441 442 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 443 { 444 if (skb->len <= mtu) 445 return false; 446 447 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ 448 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) 449 return true; 450 451 if (skb->ignore_df) 452 return false; 453 454 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 455 return false; 456 457 return true; 458 } 459 460 int ip6_forward(struct sk_buff *skb) 461 { 462 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev); 463 struct dst_entry *dst = skb_dst(skb); 464 struct ipv6hdr *hdr = ipv6_hdr(skb); 465 struct inet6_skb_parm *opt = IP6CB(skb); 466 struct net *net = dev_net(dst->dev); 467 u32 mtu; 468 469 if (net->ipv6.devconf_all->forwarding == 0) 470 goto error; 471 472 if (skb->pkt_type != PACKET_HOST) 473 goto drop; 474 475 if (unlikely(skb->sk)) 476 goto drop; 477 478 if (skb_warn_if_lro(skb)) 479 goto drop; 480 481 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 482 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 483 goto drop; 484 } 485 486 skb_forward_csum(skb); 487 488 /* 489 * We DO NOT make any processing on 490 * RA packets, pushing them to user level AS IS 491 * without ane WARRANTY that application will be able 492 * to interpret them. The reason is that we 493 * cannot make anything clever here. 494 * 495 * We are not end-node, so that if packet contains 496 * AH/ESP, we cannot make anything. 497 * Defragmentation also would be mistake, RA packets 498 * cannot be fragmented, because there is no warranty 499 * that different fragments will go along one path. --ANK 500 */ 501 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { 502 if (ip6_call_ra_chain(skb, ntohs(opt->ra))) 503 return 0; 504 } 505 506 /* 507 * check and decrement ttl 508 */ 509 if (hdr->hop_limit <= 1) { 510 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 511 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); 512 513 kfree_skb(skb); 514 return -ETIMEDOUT; 515 } 516 517 /* XXX: idev->cnf.proxy_ndp? */ 518 if (net->ipv6.devconf_all->proxy_ndp && 519 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 520 int proxied = ip6_forward_proxy_check(skb); 521 if (proxied > 0) 522 return ip6_input(skb); 523 else if (proxied < 0) { 524 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 525 goto drop; 526 } 527 } 528 529 if (!xfrm6_route_forward(skb)) { 530 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 531 goto drop; 532 } 533 dst = skb_dst(skb); 534 535 /* IPv6 specs say nothing about it, but it is clear that we cannot 536 send redirects to source routed frames. 537 We don't send redirects to frames decapsulated from IPsec. 538 */ 539 if (IP6CB(skb)->iif == dst->dev->ifindex && 540 opt->srcrt == 0 && !skb_sec_path(skb)) { 541 struct in6_addr *target = NULL; 542 struct inet_peer *peer; 543 struct rt6_info *rt; 544 545 /* 546 * incoming and outgoing devices are the same 547 * send a redirect. 548 */ 549 550 rt = (struct rt6_info *) dst; 551 if (rt->rt6i_flags & RTF_GATEWAY) 552 target = &rt->rt6i_gateway; 553 else 554 target = &hdr->daddr; 555 556 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1); 557 558 /* Limit redirects both by destination (here) 559 and by source (inside ndisc_send_redirect) 560 */ 561 if (inet_peer_xrlim_allow(peer, 1*HZ)) 562 ndisc_send_redirect(skb, target); 563 if (peer) 564 inet_putpeer(peer); 565 } else { 566 int addrtype = ipv6_addr_type(&hdr->saddr); 567 568 /* This check is security critical. */ 569 if (addrtype == IPV6_ADDR_ANY || 570 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 571 goto error; 572 if (addrtype & IPV6_ADDR_LINKLOCAL) { 573 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 574 ICMPV6_NOT_NEIGHBOUR, 0); 575 goto error; 576 } 577 } 578 579 mtu = ip6_dst_mtu_forward(dst); 580 if (mtu < IPV6_MIN_MTU) 581 mtu = IPV6_MIN_MTU; 582 583 if (ip6_pkt_too_big(skb, mtu)) { 584 /* Again, force OUTPUT device used as source address */ 585 skb->dev = dst->dev; 586 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 587 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); 588 __IP6_INC_STATS(net, ip6_dst_idev(dst), 589 IPSTATS_MIB_FRAGFAILS); 590 kfree_skb(skb); 591 return -EMSGSIZE; 592 } 593 594 if (skb_cow(skb, dst->dev->hard_header_len)) { 595 __IP6_INC_STATS(net, ip6_dst_idev(dst), 596 IPSTATS_MIB_OUTDISCARDS); 597 goto drop; 598 } 599 600 hdr = ipv6_hdr(skb); 601 602 /* Mangling hops number delayed to point after skb COW */ 603 604 hdr->hop_limit--; 605 606 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 607 net, NULL, skb, skb->dev, dst->dev, 608 ip6_forward_finish); 609 610 error: 611 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); 612 drop: 613 kfree_skb(skb); 614 return -EINVAL; 615 } 616 617 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 618 { 619 to->pkt_type = from->pkt_type; 620 to->priority = from->priority; 621 to->protocol = from->protocol; 622 skb_dst_drop(to); 623 skb_dst_set(to, dst_clone(skb_dst(from))); 624 to->dev = from->dev; 625 to->mark = from->mark; 626 627 skb_copy_hash(to, from); 628 629 #ifdef CONFIG_NET_SCHED 630 to->tc_index = from->tc_index; 631 #endif 632 nf_copy(to, from); 633 skb_ext_copy(to, from); 634 skb_copy_secmark(to, from); 635 } 636 637 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr, 638 u8 nexthdr, __be32 frag_id, 639 struct ip6_fraglist_iter *iter) 640 { 641 unsigned int first_len; 642 struct frag_hdr *fh; 643 644 /* BUILD HEADER */ 645 *prevhdr = NEXTHDR_FRAGMENT; 646 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 647 if (!iter->tmp_hdr) 648 return -ENOMEM; 649 650 iter->frag = skb_shinfo(skb)->frag_list; 651 skb_frag_list_init(skb); 652 653 iter->offset = 0; 654 iter->hlen = hlen; 655 iter->frag_id = frag_id; 656 iter->nexthdr = nexthdr; 657 658 __skb_pull(skb, hlen); 659 fh = __skb_push(skb, sizeof(struct frag_hdr)); 660 __skb_push(skb, hlen); 661 skb_reset_network_header(skb); 662 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen); 663 664 fh->nexthdr = nexthdr; 665 fh->reserved = 0; 666 fh->frag_off = htons(IP6_MF); 667 fh->identification = frag_id; 668 669 first_len = skb_pagelen(skb); 670 skb->data_len = first_len - skb_headlen(skb); 671 skb->len = first_len; 672 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); 673 674 return 0; 675 } 676 EXPORT_SYMBOL(ip6_fraglist_init); 677 678 void ip6_fraglist_prepare(struct sk_buff *skb, 679 struct ip6_fraglist_iter *iter) 680 { 681 struct sk_buff *frag = iter->frag; 682 unsigned int hlen = iter->hlen; 683 struct frag_hdr *fh; 684 685 frag->ip_summed = CHECKSUM_NONE; 686 skb_reset_transport_header(frag); 687 fh = __skb_push(frag, sizeof(struct frag_hdr)); 688 __skb_push(frag, hlen); 689 skb_reset_network_header(frag); 690 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen); 691 iter->offset += skb->len - hlen - sizeof(struct frag_hdr); 692 fh->nexthdr = iter->nexthdr; 693 fh->reserved = 0; 694 fh->frag_off = htons(iter->offset); 695 if (frag->next) 696 fh->frag_off |= htons(IP6_MF); 697 fh->identification = iter->frag_id; 698 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 699 ip6_copy_metadata(frag, skb); 700 } 701 EXPORT_SYMBOL(ip6_fraglist_prepare); 702 703 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu, 704 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr, 705 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state) 706 { 707 state->prevhdr = prevhdr; 708 state->nexthdr = nexthdr; 709 state->frag_id = frag_id; 710 711 state->hlen = hlen; 712 state->mtu = mtu; 713 714 state->left = skb->len - hlen; /* Space per frame */ 715 state->ptr = hlen; /* Where to start from */ 716 717 state->hroom = hdr_room; 718 state->troom = needed_tailroom; 719 720 state->offset = 0; 721 } 722 EXPORT_SYMBOL(ip6_frag_init); 723 724 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state) 725 { 726 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset; 727 struct sk_buff *frag; 728 struct frag_hdr *fh; 729 unsigned int len; 730 731 len = state->left; 732 /* IF: it doesn't fit, use 'mtu' - the data space left */ 733 if (len > state->mtu) 734 len = state->mtu; 735 /* IF: we are not sending up to and including the packet end 736 then align the next start on an eight byte boundary */ 737 if (len < state->left) 738 len &= ~7; 739 740 /* Allocate buffer */ 741 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) + 742 state->hroom + state->troom, GFP_ATOMIC); 743 if (!frag) 744 return ERR_PTR(-ENOMEM); 745 746 /* 747 * Set up data on packet 748 */ 749 750 ip6_copy_metadata(frag, skb); 751 skb_reserve(frag, state->hroom); 752 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr)); 753 skb_reset_network_header(frag); 754 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen); 755 frag->transport_header = (frag->network_header + state->hlen + 756 sizeof(struct frag_hdr)); 757 758 /* 759 * Charge the memory for the fragment to any owner 760 * it might possess 761 */ 762 if (skb->sk) 763 skb_set_owner_w(frag, skb->sk); 764 765 /* 766 * Copy the packet header into the new buffer. 767 */ 768 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen); 769 770 fragnexthdr_offset = skb_network_header(frag); 771 fragnexthdr_offset += prevhdr - skb_network_header(skb); 772 *fragnexthdr_offset = NEXTHDR_FRAGMENT; 773 774 /* 775 * Build fragment header. 776 */ 777 fh->nexthdr = state->nexthdr; 778 fh->reserved = 0; 779 fh->identification = state->frag_id; 780 781 /* 782 * Copy a block of the IP datagram. 783 */ 784 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag), 785 len)); 786 state->left -= len; 787 788 fh->frag_off = htons(state->offset); 789 if (state->left > 0) 790 fh->frag_off |= htons(IP6_MF); 791 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 792 793 state->ptr += len; 794 state->offset += len; 795 796 return frag; 797 } 798 EXPORT_SYMBOL(ip6_frag_next); 799 800 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 801 int (*output)(struct net *, struct sock *, struct sk_buff *)) 802 { 803 struct sk_buff *frag; 804 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 805 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? 806 inet6_sk(skb->sk) : NULL; 807 struct ip6_frag_state state; 808 unsigned int mtu, hlen, nexthdr_offset; 809 ktime_t tstamp = skb->tstamp; 810 int hroom, err = 0; 811 __be32 frag_id; 812 u8 *prevhdr, nexthdr = 0; 813 814 err = ip6_find_1stfragopt(skb, &prevhdr); 815 if (err < 0) 816 goto fail; 817 hlen = err; 818 nexthdr = *prevhdr; 819 nexthdr_offset = prevhdr - skb_network_header(skb); 820 821 mtu = ip6_skb_dst_mtu(skb); 822 823 /* We must not fragment if the socket is set to force MTU discovery 824 * or if the skb it not generated by a local socket. 825 */ 826 if (unlikely(!skb->ignore_df && skb->len > mtu)) 827 goto fail_toobig; 828 829 if (IP6CB(skb)->frag_max_size) { 830 if (IP6CB(skb)->frag_max_size > mtu) 831 goto fail_toobig; 832 833 /* don't send fragments larger than what we received */ 834 mtu = IP6CB(skb)->frag_max_size; 835 if (mtu < IPV6_MIN_MTU) 836 mtu = IPV6_MIN_MTU; 837 } 838 839 if (np && np->frag_size < mtu) { 840 if (np->frag_size) 841 mtu = np->frag_size; 842 } 843 if (mtu < hlen + sizeof(struct frag_hdr) + 8) 844 goto fail_toobig; 845 mtu -= hlen + sizeof(struct frag_hdr); 846 847 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, 848 &ipv6_hdr(skb)->saddr); 849 850 if (skb->ip_summed == CHECKSUM_PARTIAL && 851 (err = skb_checksum_help(skb))) 852 goto fail; 853 854 prevhdr = skb_network_header(skb) + nexthdr_offset; 855 hroom = LL_RESERVED_SPACE(rt->dst.dev); 856 if (skb_has_frag_list(skb)) { 857 unsigned int first_len = skb_pagelen(skb); 858 struct ip6_fraglist_iter iter; 859 struct sk_buff *frag2; 860 861 if (first_len - hlen > mtu || 862 ((first_len - hlen) & 7) || 863 skb_cloned(skb) || 864 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) 865 goto slow_path; 866 867 skb_walk_frags(skb, frag) { 868 /* Correct geometry. */ 869 if (frag->len > mtu || 870 ((frag->len & 7) && frag->next) || 871 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr))) 872 goto slow_path_clean; 873 874 /* Partially cloned skb? */ 875 if (skb_shared(frag)) 876 goto slow_path_clean; 877 878 BUG_ON(frag->sk); 879 if (skb->sk) { 880 frag->sk = skb->sk; 881 frag->destructor = sock_wfree; 882 } 883 skb->truesize -= frag->truesize; 884 } 885 886 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id, 887 &iter); 888 if (err < 0) 889 goto fail; 890 891 for (;;) { 892 /* Prepare header of the next frame, 893 * before previous one went down. */ 894 if (iter.frag) 895 ip6_fraglist_prepare(skb, &iter); 896 897 skb->tstamp = tstamp; 898 err = output(net, sk, skb); 899 if (!err) 900 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 901 IPSTATS_MIB_FRAGCREATES); 902 903 if (err || !iter.frag) 904 break; 905 906 skb = ip6_fraglist_next(&iter); 907 } 908 909 kfree(iter.tmp_hdr); 910 911 if (err == 0) { 912 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 913 IPSTATS_MIB_FRAGOKS); 914 return 0; 915 } 916 917 kfree_skb_list(iter.frag); 918 919 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 920 IPSTATS_MIB_FRAGFAILS); 921 return err; 922 923 slow_path_clean: 924 skb_walk_frags(skb, frag2) { 925 if (frag2 == frag) 926 break; 927 frag2->sk = NULL; 928 frag2->destructor = NULL; 929 skb->truesize += frag2->truesize; 930 } 931 } 932 933 slow_path: 934 /* 935 * Fragment the datagram. 936 */ 937 938 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom, 939 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id, 940 &state); 941 942 /* 943 * Keep copying data until we run out. 944 */ 945 946 while (state.left > 0) { 947 frag = ip6_frag_next(skb, &state); 948 if (IS_ERR(frag)) { 949 err = PTR_ERR(frag); 950 goto fail; 951 } 952 953 /* 954 * Put this fragment into the sending queue. 955 */ 956 frag->tstamp = tstamp; 957 err = output(net, sk, frag); 958 if (err) 959 goto fail; 960 961 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 962 IPSTATS_MIB_FRAGCREATES); 963 } 964 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 965 IPSTATS_MIB_FRAGOKS); 966 consume_skb(skb); 967 return err; 968 969 fail_toobig: 970 if (skb->sk && dst_allfrag(skb_dst(skb))) 971 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); 972 973 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 974 err = -EMSGSIZE; 975 976 fail: 977 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 978 IPSTATS_MIB_FRAGFAILS); 979 kfree_skb(skb); 980 return err; 981 } 982 983 static inline int ip6_rt_check(const struct rt6key *rt_key, 984 const struct in6_addr *fl_addr, 985 const struct in6_addr *addr_cache) 986 { 987 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 988 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); 989 } 990 991 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 992 struct dst_entry *dst, 993 const struct flowi6 *fl6) 994 { 995 struct ipv6_pinfo *np = inet6_sk(sk); 996 struct rt6_info *rt; 997 998 if (!dst) 999 goto out; 1000 1001 if (dst->ops->family != AF_INET6) { 1002 dst_release(dst); 1003 return NULL; 1004 } 1005 1006 rt = (struct rt6_info *)dst; 1007 /* Yes, checking route validity in not connected 1008 * case is not very simple. Take into account, 1009 * that we do not support routing by source, TOS, 1010 * and MSG_DONTROUTE --ANK (980726) 1011 * 1012 * 1. ip6_rt_check(): If route was host route, 1013 * check that cached destination is current. 1014 * If it is network route, we still may 1015 * check its validity using saved pointer 1016 * to the last used address: daddr_cache. 1017 * We do not want to save whole address now, 1018 * (because main consumer of this service 1019 * is tcp, which has not this problem), 1020 * so that the last trick works only on connected 1021 * sockets. 1022 * 2. oif also should be the same. 1023 */ 1024 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 1025 #ifdef CONFIG_IPV6_SUBTREES 1026 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 1027 #endif 1028 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) && 1029 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { 1030 dst_release(dst); 1031 dst = NULL; 1032 } 1033 1034 out: 1035 return dst; 1036 } 1037 1038 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, 1039 struct dst_entry **dst, struct flowi6 *fl6) 1040 { 1041 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1042 struct neighbour *n; 1043 struct rt6_info *rt; 1044 #endif 1045 int err; 1046 int flags = 0; 1047 1048 /* The correct way to handle this would be to do 1049 * ip6_route_get_saddr, and then ip6_route_output; however, 1050 * the route-specific preferred source forces the 1051 * ip6_route_output call _before_ ip6_route_get_saddr. 1052 * 1053 * In source specific routing (no src=any default route), 1054 * ip6_route_output will fail given src=any saddr, though, so 1055 * that's why we try it again later. 1056 */ 1057 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) { 1058 struct fib6_info *from; 1059 struct rt6_info *rt; 1060 bool had_dst = *dst != NULL; 1061 1062 if (!had_dst) 1063 *dst = ip6_route_output(net, sk, fl6); 1064 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; 1065 1066 rcu_read_lock(); 1067 from = rt ? rcu_dereference(rt->from) : NULL; 1068 err = ip6_route_get_saddr(net, from, &fl6->daddr, 1069 sk ? inet6_sk(sk)->srcprefs : 0, 1070 &fl6->saddr); 1071 rcu_read_unlock(); 1072 1073 if (err) 1074 goto out_err_release; 1075 1076 /* If we had an erroneous initial result, pretend it 1077 * never existed and let the SA-enabled version take 1078 * over. 1079 */ 1080 if (!had_dst && (*dst)->error) { 1081 dst_release(*dst); 1082 *dst = NULL; 1083 } 1084 1085 if (fl6->flowi6_oif) 1086 flags |= RT6_LOOKUP_F_IFACE; 1087 } 1088 1089 if (!*dst) 1090 *dst = ip6_route_output_flags(net, sk, fl6, flags); 1091 1092 err = (*dst)->error; 1093 if (err) 1094 goto out_err_release; 1095 1096 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1097 /* 1098 * Here if the dst entry we've looked up 1099 * has a neighbour entry that is in the INCOMPLETE 1100 * state and the src address from the flow is 1101 * marked as OPTIMISTIC, we release the found 1102 * dst entry and replace it instead with the 1103 * dst entry of the nexthop router 1104 */ 1105 rt = (struct rt6_info *) *dst; 1106 rcu_read_lock_bh(); 1107 n = __ipv6_neigh_lookup_noref(rt->dst.dev, 1108 rt6_nexthop(rt, &fl6->daddr)); 1109 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; 1110 rcu_read_unlock_bh(); 1111 1112 if (err) { 1113 struct inet6_ifaddr *ifp; 1114 struct flowi6 fl_gw6; 1115 int redirect; 1116 1117 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 1118 (*dst)->dev, 1); 1119 1120 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 1121 if (ifp) 1122 in6_ifa_put(ifp); 1123 1124 if (redirect) { 1125 /* 1126 * We need to get the dst entry for the 1127 * default router instead 1128 */ 1129 dst_release(*dst); 1130 memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 1131 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 1132 *dst = ip6_route_output(net, sk, &fl_gw6); 1133 err = (*dst)->error; 1134 if (err) 1135 goto out_err_release; 1136 } 1137 } 1138 #endif 1139 if (ipv6_addr_v4mapped(&fl6->saddr) && 1140 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { 1141 err = -EAFNOSUPPORT; 1142 goto out_err_release; 1143 } 1144 1145 return 0; 1146 1147 out_err_release: 1148 dst_release(*dst); 1149 *dst = NULL; 1150 1151 if (err == -ENETUNREACH) 1152 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); 1153 return err; 1154 } 1155 1156 /** 1157 * ip6_dst_lookup - perform route lookup on flow 1158 * @net: Network namespace to perform lookup in 1159 * @sk: socket which provides route info 1160 * @dst: pointer to dst_entry * for result 1161 * @fl6: flow to lookup 1162 * 1163 * This function performs a route lookup on the given flow. 1164 * 1165 * It returns zero on success, or a standard errno code on error. 1166 */ 1167 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, 1168 struct flowi6 *fl6) 1169 { 1170 *dst = NULL; 1171 return ip6_dst_lookup_tail(net, sk, dst, fl6); 1172 } 1173 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1174 1175 /** 1176 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 1177 * @net: Network namespace to perform lookup in 1178 * @sk: socket which provides route info 1179 * @fl6: flow to lookup 1180 * @final_dst: final destination address for ipsec lookup 1181 * 1182 * This function performs a route lookup on the given flow. 1183 * 1184 * It returns a valid dst pointer on success, or a pointer encoded 1185 * error code. 1186 */ 1187 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, 1188 const struct in6_addr *final_dst) 1189 { 1190 struct dst_entry *dst = NULL; 1191 int err; 1192 1193 err = ip6_dst_lookup_tail(net, sk, &dst, fl6); 1194 if (err) 1195 return ERR_PTR(err); 1196 if (final_dst) 1197 fl6->daddr = *final_dst; 1198 1199 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0); 1200 } 1201 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 1202 1203 /** 1204 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 1205 * @sk: socket which provides the dst cache and route info 1206 * @fl6: flow to lookup 1207 * @final_dst: final destination address for ipsec lookup 1208 * @connected: whether @sk is connected or not 1209 * 1210 * This function performs a route lookup on the given flow with the 1211 * possibility of using the cached route in the socket if it is valid. 1212 * It will take the socket dst lock when operating on the dst cache. 1213 * As a result, this function can only be used in process context. 1214 * 1215 * In addition, for a connected socket, cache the dst in the socket 1216 * if the current cache is not valid. 1217 * 1218 * It returns a valid dst pointer on success, or a pointer encoded 1219 * error code. 1220 */ 1221 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1222 const struct in6_addr *final_dst, 1223 bool connected) 1224 { 1225 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1226 1227 dst = ip6_sk_dst_check(sk, dst, fl6); 1228 if (dst) 1229 return dst; 1230 1231 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst); 1232 if (connected && !IS_ERR(dst)) 1233 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6); 1234 1235 return dst; 1236 } 1237 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1238 1239 /** 1240 * ip6_dst_lookup_tunnel - perform route lookup on tunnel 1241 * @skb: Packet for which lookup is done 1242 * @dev: Tunnel device 1243 * @net: Network namespace of tunnel device 1244 * @sock: Socket which provides route info 1245 * @saddr: Memory to store the src ip address 1246 * @info: Tunnel information 1247 * @protocol: IP protocol 1248 * @use_cache: Flag to enable cache usage 1249 * This function performs a route lookup on a tunnel 1250 * 1251 * It returns a valid dst pointer and stores src address to be used in 1252 * tunnel in param saddr on success, else a pointer encoded error code. 1253 */ 1254 1255 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, 1256 struct net_device *dev, 1257 struct net *net, 1258 struct socket *sock, 1259 struct in6_addr *saddr, 1260 const struct ip_tunnel_info *info, 1261 u8 protocol, 1262 bool use_cache) 1263 { 1264 struct dst_entry *dst = NULL; 1265 #ifdef CONFIG_DST_CACHE 1266 struct dst_cache *dst_cache; 1267 #endif 1268 struct flowi6 fl6; 1269 __u8 prio; 1270 1271 #ifdef CONFIG_DST_CACHE 1272 dst_cache = (struct dst_cache *)&info->dst_cache; 1273 if (use_cache) { 1274 dst = dst_cache_get_ip6(dst_cache, saddr); 1275 if (dst) 1276 return dst; 1277 } 1278 #endif 1279 memset(&fl6, 0, sizeof(fl6)); 1280 fl6.flowi6_mark = skb->mark; 1281 fl6.flowi6_proto = protocol; 1282 fl6.daddr = info->key.u.ipv6.dst; 1283 fl6.saddr = info->key.u.ipv6.src; 1284 prio = info->key.tos; 1285 fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio), 1286 info->key.label); 1287 1288 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, 1289 NULL); 1290 if (IS_ERR(dst)) { 1291 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); 1292 return ERR_PTR(-ENETUNREACH); 1293 } 1294 if (dst->dev == dev) { /* is this necessary? */ 1295 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); 1296 dst_release(dst); 1297 return ERR_PTR(-ELOOP); 1298 } 1299 #ifdef CONFIG_DST_CACHE 1300 if (use_cache) 1301 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); 1302 #endif 1303 *saddr = fl6.saddr; 1304 return dst; 1305 } 1306 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel); 1307 1308 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1309 gfp_t gfp) 1310 { 1311 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1312 } 1313 1314 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1315 gfp_t gfp) 1316 { 1317 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1318 } 1319 1320 static void ip6_append_data_mtu(unsigned int *mtu, 1321 int *maxfraglen, 1322 unsigned int fragheaderlen, 1323 struct sk_buff *skb, 1324 struct rt6_info *rt, 1325 unsigned int orig_mtu) 1326 { 1327 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { 1328 if (!skb) { 1329 /* first fragment, reserve header_len */ 1330 *mtu = orig_mtu - rt->dst.header_len; 1331 1332 } else { 1333 /* 1334 * this fragment is not first, the headers 1335 * space is regarded as data space. 1336 */ 1337 *mtu = orig_mtu; 1338 } 1339 *maxfraglen = ((*mtu - fragheaderlen) & ~7) 1340 + fragheaderlen - sizeof(struct frag_hdr); 1341 } 1342 } 1343 1344 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, 1345 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, 1346 struct rt6_info *rt, struct flowi6 *fl6) 1347 { 1348 struct ipv6_pinfo *np = inet6_sk(sk); 1349 unsigned int mtu; 1350 struct ipv6_txoptions *opt = ipc6->opt; 1351 1352 /* 1353 * setup for corking 1354 */ 1355 if (opt) { 1356 if (WARN_ON(v6_cork->opt)) 1357 return -EINVAL; 1358 1359 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); 1360 if (unlikely(!v6_cork->opt)) 1361 return -ENOBUFS; 1362 1363 v6_cork->opt->tot_len = sizeof(*opt); 1364 v6_cork->opt->opt_flen = opt->opt_flen; 1365 v6_cork->opt->opt_nflen = opt->opt_nflen; 1366 1367 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1368 sk->sk_allocation); 1369 if (opt->dst0opt && !v6_cork->opt->dst0opt) 1370 return -ENOBUFS; 1371 1372 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1373 sk->sk_allocation); 1374 if (opt->dst1opt && !v6_cork->opt->dst1opt) 1375 return -ENOBUFS; 1376 1377 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt, 1378 sk->sk_allocation); 1379 if (opt->hopopt && !v6_cork->opt->hopopt) 1380 return -ENOBUFS; 1381 1382 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1383 sk->sk_allocation); 1384 if (opt->srcrt && !v6_cork->opt->srcrt) 1385 return -ENOBUFS; 1386 1387 /* need source address above miyazawa*/ 1388 } 1389 dst_hold(&rt->dst); 1390 cork->base.dst = &rt->dst; 1391 cork->fl.u.ip6 = *fl6; 1392 v6_cork->hop_limit = ipc6->hlimit; 1393 v6_cork->tclass = ipc6->tclass; 1394 if (rt->dst.flags & DST_XFRM_TUNNEL) 1395 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1396 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); 1397 else 1398 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1399 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); 1400 if (np->frag_size < mtu) { 1401 if (np->frag_size) 1402 mtu = np->frag_size; 1403 } 1404 if (mtu < IPV6_MIN_MTU) 1405 return -EINVAL; 1406 cork->base.fragsize = mtu; 1407 cork->base.gso_size = ipc6->gso_size; 1408 cork->base.tx_flags = 0; 1409 cork->base.mark = ipc6->sockc.mark; 1410 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); 1411 1412 if (dst_allfrag(xfrm_dst_path(&rt->dst))) 1413 cork->base.flags |= IPCORK_ALLFRAG; 1414 cork->base.length = 0; 1415 1416 cork->base.transmit_time = ipc6->sockc.transmit_time; 1417 1418 return 0; 1419 } 1420 1421 static int __ip6_append_data(struct sock *sk, 1422 struct flowi6 *fl6, 1423 struct sk_buff_head *queue, 1424 struct inet_cork *cork, 1425 struct inet6_cork *v6_cork, 1426 struct page_frag *pfrag, 1427 int getfrag(void *from, char *to, int offset, 1428 int len, int odd, struct sk_buff *skb), 1429 void *from, int length, int transhdrlen, 1430 unsigned int flags, struct ipcm6_cookie *ipc6) 1431 { 1432 struct sk_buff *skb, *skb_prev = NULL; 1433 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; 1434 struct ubuf_info *uarg = NULL; 1435 int exthdrlen = 0; 1436 int dst_exthdrlen = 0; 1437 int hh_len; 1438 int copy; 1439 int err; 1440 int offset = 0; 1441 u32 tskey = 0; 1442 struct rt6_info *rt = (struct rt6_info *)cork->dst; 1443 struct ipv6_txoptions *opt = v6_cork->opt; 1444 int csummode = CHECKSUM_NONE; 1445 unsigned int maxnonfragsize, headersize; 1446 unsigned int wmem_alloc_delta = 0; 1447 bool paged, extra_uref = false; 1448 1449 skb = skb_peek_tail(queue); 1450 if (!skb) { 1451 exthdrlen = opt ? opt->opt_flen : 0; 1452 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; 1453 } 1454 1455 paged = !!cork->gso_size; 1456 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; 1457 orig_mtu = mtu; 1458 1459 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && 1460 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) 1461 tskey = sk->sk_tskey++; 1462 1463 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1464 1465 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1466 (opt ? opt->opt_nflen : 0); 1467 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - 1468 sizeof(struct frag_hdr); 1469 1470 headersize = sizeof(struct ipv6hdr) + 1471 (opt ? opt->opt_flen + opt->opt_nflen : 0) + 1472 (dst_allfrag(&rt->dst) ? 1473 sizeof(struct frag_hdr) : 0) + 1474 rt->rt6i_nfheader_len; 1475 1476 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit 1477 * the first fragment 1478 */ 1479 if (headersize + transhdrlen > mtu) 1480 goto emsgsize; 1481 1482 if (cork->length + length > mtu - headersize && ipc6->dontfrag && 1483 (sk->sk_protocol == IPPROTO_UDP || 1484 sk->sk_protocol == IPPROTO_RAW)) { 1485 ipv6_local_rxpmtu(sk, fl6, mtu - headersize + 1486 sizeof(struct ipv6hdr)); 1487 goto emsgsize; 1488 } 1489 1490 if (ip6_sk_ignore_df(sk)) 1491 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; 1492 else 1493 maxnonfragsize = mtu; 1494 1495 if (cork->length + length > maxnonfragsize - headersize) { 1496 emsgsize: 1497 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0); 1498 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu); 1499 return -EMSGSIZE; 1500 } 1501 1502 /* CHECKSUM_PARTIAL only with no extension headers and when 1503 * we are not going to fragment 1504 */ 1505 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && 1506 headersize == sizeof(struct ipv6hdr) && 1507 length <= mtu - headersize && 1508 (!(flags & MSG_MORE) || cork->gso_size) && 1509 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) 1510 csummode = CHECKSUM_PARTIAL; 1511 1512 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) { 1513 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb)); 1514 if (!uarg) 1515 return -ENOBUFS; 1516 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ 1517 if (rt->dst.dev->features & NETIF_F_SG && 1518 csummode == CHECKSUM_PARTIAL) { 1519 paged = true; 1520 } else { 1521 uarg->zerocopy = 0; 1522 skb_zcopy_set(skb, uarg, &extra_uref); 1523 } 1524 } 1525 1526 /* 1527 * Let's try using as much space as possible. 1528 * Use MTU if total length of the message fits into the MTU. 1529 * Otherwise, we need to reserve fragment header and 1530 * fragment alignment (= 8-15 octects, in total). 1531 * 1532 * Note that we may need to "move" the data from the tail 1533 * of the buffer to the new fragment when we split 1534 * the message. 1535 * 1536 * FIXME: It may be fragmented into multiple chunks 1537 * at once if non-fragmentable extension headers 1538 * are too large. 1539 * --yoshfuji 1540 */ 1541 1542 cork->length += length; 1543 if (!skb) 1544 goto alloc_new_skb; 1545 1546 while (length > 0) { 1547 /* Check if the remaining data fits into current packet. */ 1548 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1549 if (copy < length) 1550 copy = maxfraglen - skb->len; 1551 1552 if (copy <= 0) { 1553 char *data; 1554 unsigned int datalen; 1555 unsigned int fraglen; 1556 unsigned int fraggap; 1557 unsigned int alloclen; 1558 unsigned int pagedlen; 1559 alloc_new_skb: 1560 /* There's no room in the current skb */ 1561 if (skb) 1562 fraggap = skb->len - maxfraglen; 1563 else 1564 fraggap = 0; 1565 /* update mtu and maxfraglen if necessary */ 1566 if (!skb || !skb_prev) 1567 ip6_append_data_mtu(&mtu, &maxfraglen, 1568 fragheaderlen, skb, rt, 1569 orig_mtu); 1570 1571 skb_prev = skb; 1572 1573 /* 1574 * If remaining data exceeds the mtu, 1575 * we know we need more fragment(s). 1576 */ 1577 datalen = length + fraggap; 1578 1579 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1580 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; 1581 fraglen = datalen + fragheaderlen; 1582 pagedlen = 0; 1583 1584 if ((flags & MSG_MORE) && 1585 !(rt->dst.dev->features&NETIF_F_SG)) 1586 alloclen = mtu; 1587 else if (!paged) 1588 alloclen = fraglen; 1589 else { 1590 alloclen = min_t(int, fraglen, MAX_HEADER); 1591 pagedlen = fraglen - alloclen; 1592 } 1593 1594 alloclen += dst_exthdrlen; 1595 1596 if (datalen != length + fraggap) { 1597 /* 1598 * this is not the last fragment, the trailer 1599 * space is regarded as data space. 1600 */ 1601 datalen += rt->dst.trailer_len; 1602 } 1603 1604 alloclen += rt->dst.trailer_len; 1605 fraglen = datalen + fragheaderlen; 1606 1607 /* 1608 * We just reserve space for fragment header. 1609 * Note: this may be overallocation if the message 1610 * (without MSG_MORE) fits into the MTU. 1611 */ 1612 alloclen += sizeof(struct frag_hdr); 1613 1614 copy = datalen - transhdrlen - fraggap - pagedlen; 1615 if (copy < 0) { 1616 err = -EINVAL; 1617 goto error; 1618 } 1619 if (transhdrlen) { 1620 skb = sock_alloc_send_skb(sk, 1621 alloclen + hh_len, 1622 (flags & MSG_DONTWAIT), &err); 1623 } else { 1624 skb = NULL; 1625 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 1626 2 * sk->sk_sndbuf) 1627 skb = alloc_skb(alloclen + hh_len, 1628 sk->sk_allocation); 1629 if (unlikely(!skb)) 1630 err = -ENOBUFS; 1631 } 1632 if (!skb) 1633 goto error; 1634 /* 1635 * Fill in the control structures 1636 */ 1637 skb->protocol = htons(ETH_P_IPV6); 1638 skb->ip_summed = csummode; 1639 skb->csum = 0; 1640 /* reserve for fragmentation and ipsec header */ 1641 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 1642 dst_exthdrlen); 1643 1644 /* 1645 * Find where to start putting bytes 1646 */ 1647 data = skb_put(skb, fraglen - pagedlen); 1648 skb_set_network_header(skb, exthdrlen); 1649 data += fragheaderlen; 1650 skb->transport_header = (skb->network_header + 1651 fragheaderlen); 1652 if (fraggap) { 1653 skb->csum = skb_copy_and_csum_bits( 1654 skb_prev, maxfraglen, 1655 data + transhdrlen, fraggap); 1656 skb_prev->csum = csum_sub(skb_prev->csum, 1657 skb->csum); 1658 data += fraggap; 1659 pskb_trim_unique(skb_prev, maxfraglen); 1660 } 1661 if (copy > 0 && 1662 getfrag(from, data + transhdrlen, offset, 1663 copy, fraggap, skb) < 0) { 1664 err = -EFAULT; 1665 kfree_skb(skb); 1666 goto error; 1667 } 1668 1669 offset += copy; 1670 length -= copy + transhdrlen; 1671 transhdrlen = 0; 1672 exthdrlen = 0; 1673 dst_exthdrlen = 0; 1674 1675 /* Only the initial fragment is time stamped */ 1676 skb_shinfo(skb)->tx_flags = cork->tx_flags; 1677 cork->tx_flags = 0; 1678 skb_shinfo(skb)->tskey = tskey; 1679 tskey = 0; 1680 skb_zcopy_set(skb, uarg, &extra_uref); 1681 1682 if ((flags & MSG_CONFIRM) && !skb_prev) 1683 skb_set_dst_pending_confirm(skb, 1); 1684 1685 /* 1686 * Put the packet on the pending queue 1687 */ 1688 if (!skb->destructor) { 1689 skb->destructor = sock_wfree; 1690 skb->sk = sk; 1691 wmem_alloc_delta += skb->truesize; 1692 } 1693 __skb_queue_tail(queue, skb); 1694 continue; 1695 } 1696 1697 if (copy > length) 1698 copy = length; 1699 1700 if (!(rt->dst.dev->features&NETIF_F_SG) && 1701 skb_tailroom(skb) >= copy) { 1702 unsigned int off; 1703 1704 off = skb->len; 1705 if (getfrag(from, skb_put(skb, copy), 1706 offset, copy, off, skb) < 0) { 1707 __skb_trim(skb, off); 1708 err = -EFAULT; 1709 goto error; 1710 } 1711 } else if (!uarg || !uarg->zerocopy) { 1712 int i = skb_shinfo(skb)->nr_frags; 1713 1714 err = -ENOMEM; 1715 if (!sk_page_frag_refill(sk, pfrag)) 1716 goto error; 1717 1718 if (!skb_can_coalesce(skb, i, pfrag->page, 1719 pfrag->offset)) { 1720 err = -EMSGSIZE; 1721 if (i == MAX_SKB_FRAGS) 1722 goto error; 1723 1724 __skb_fill_page_desc(skb, i, pfrag->page, 1725 pfrag->offset, 0); 1726 skb_shinfo(skb)->nr_frags = ++i; 1727 get_page(pfrag->page); 1728 } 1729 copy = min_t(int, copy, pfrag->size - pfrag->offset); 1730 if (getfrag(from, 1731 page_address(pfrag->page) + pfrag->offset, 1732 offset, copy, skb->len, skb) < 0) 1733 goto error_efault; 1734 1735 pfrag->offset += copy; 1736 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1737 skb->len += copy; 1738 skb->data_len += copy; 1739 skb->truesize += copy; 1740 wmem_alloc_delta += copy; 1741 } else { 1742 err = skb_zerocopy_iter_dgram(skb, from, copy); 1743 if (err < 0) 1744 goto error; 1745 } 1746 offset += copy; 1747 length -= copy; 1748 } 1749 1750 if (wmem_alloc_delta) 1751 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1752 return 0; 1753 1754 error_efault: 1755 err = -EFAULT; 1756 error: 1757 if (uarg) 1758 sock_zerocopy_put_abort(uarg, extra_uref); 1759 cork->length -= length; 1760 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1761 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1762 return err; 1763 } 1764 1765 int ip6_append_data(struct sock *sk, 1766 int getfrag(void *from, char *to, int offset, int len, 1767 int odd, struct sk_buff *skb), 1768 void *from, int length, int transhdrlen, 1769 struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 1770 struct rt6_info *rt, unsigned int flags) 1771 { 1772 struct inet_sock *inet = inet_sk(sk); 1773 struct ipv6_pinfo *np = inet6_sk(sk); 1774 int exthdrlen; 1775 int err; 1776 1777 if (flags&MSG_PROBE) 1778 return 0; 1779 if (skb_queue_empty(&sk->sk_write_queue)) { 1780 /* 1781 * setup for corking 1782 */ 1783 err = ip6_setup_cork(sk, &inet->cork, &np->cork, 1784 ipc6, rt, fl6); 1785 if (err) 1786 return err; 1787 1788 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 1789 length += exthdrlen; 1790 transhdrlen += exthdrlen; 1791 } else { 1792 fl6 = &inet->cork.fl.u.ip6; 1793 transhdrlen = 0; 1794 } 1795 1796 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, 1797 &np->cork, sk_page_frag(sk), getfrag, 1798 from, length, transhdrlen, flags, ipc6); 1799 } 1800 EXPORT_SYMBOL_GPL(ip6_append_data); 1801 1802 static void ip6_cork_release(struct inet_cork_full *cork, 1803 struct inet6_cork *v6_cork) 1804 { 1805 if (v6_cork->opt) { 1806 kfree(v6_cork->opt->dst0opt); 1807 kfree(v6_cork->opt->dst1opt); 1808 kfree(v6_cork->opt->hopopt); 1809 kfree(v6_cork->opt->srcrt); 1810 kfree(v6_cork->opt); 1811 v6_cork->opt = NULL; 1812 } 1813 1814 if (cork->base.dst) { 1815 dst_release(cork->base.dst); 1816 cork->base.dst = NULL; 1817 cork->base.flags &= ~IPCORK_ALLFRAG; 1818 } 1819 memset(&cork->fl, 0, sizeof(cork->fl)); 1820 } 1821 1822 struct sk_buff *__ip6_make_skb(struct sock *sk, 1823 struct sk_buff_head *queue, 1824 struct inet_cork_full *cork, 1825 struct inet6_cork *v6_cork) 1826 { 1827 struct sk_buff *skb, *tmp_skb; 1828 struct sk_buff **tail_skb; 1829 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1830 struct ipv6_pinfo *np = inet6_sk(sk); 1831 struct net *net = sock_net(sk); 1832 struct ipv6hdr *hdr; 1833 struct ipv6_txoptions *opt = v6_cork->opt; 1834 struct rt6_info *rt = (struct rt6_info *)cork->base.dst; 1835 struct flowi6 *fl6 = &cork->fl.u.ip6; 1836 unsigned char proto = fl6->flowi6_proto; 1837 1838 skb = __skb_dequeue(queue); 1839 if (!skb) 1840 goto out; 1841 tail_skb = &(skb_shinfo(skb)->frag_list); 1842 1843 /* move skb->data to ip header from ext header */ 1844 if (skb->data < skb_network_header(skb)) 1845 __skb_pull(skb, skb_network_offset(skb)); 1846 while ((tmp_skb = __skb_dequeue(queue)) != NULL) { 1847 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1848 *tail_skb = tmp_skb; 1849 tail_skb = &(tmp_skb->next); 1850 skb->len += tmp_skb->len; 1851 skb->data_len += tmp_skb->len; 1852 skb->truesize += tmp_skb->truesize; 1853 tmp_skb->destructor = NULL; 1854 tmp_skb->sk = NULL; 1855 } 1856 1857 /* Allow local fragmentation. */ 1858 skb->ignore_df = ip6_sk_ignore_df(sk); 1859 1860 *final_dst = fl6->daddr; 1861 __skb_pull(skb, skb_network_header_len(skb)); 1862 if (opt && opt->opt_flen) 1863 ipv6_push_frag_opts(skb, opt, &proto); 1864 if (opt && opt->opt_nflen) 1865 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); 1866 1867 skb_push(skb, sizeof(struct ipv6hdr)); 1868 skb_reset_network_header(skb); 1869 hdr = ipv6_hdr(skb); 1870 1871 ip6_flow_hdr(hdr, v6_cork->tclass, 1872 ip6_make_flowlabel(net, skb, fl6->flowlabel, 1873 ip6_autoflowlabel(net, np), fl6)); 1874 hdr->hop_limit = v6_cork->hop_limit; 1875 hdr->nexthdr = proto; 1876 hdr->saddr = fl6->saddr; 1877 hdr->daddr = *final_dst; 1878 1879 skb->priority = sk->sk_priority; 1880 skb->mark = cork->base.mark; 1881 1882 skb->tstamp = cork->base.transmit_time; 1883 1884 skb_dst_set(skb, dst_clone(&rt->dst)); 1885 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1886 if (proto == IPPROTO_ICMPV6) { 1887 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1888 1889 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); 1890 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 1891 } 1892 1893 ip6_cork_release(cork, v6_cork); 1894 out: 1895 return skb; 1896 } 1897 1898 int ip6_send_skb(struct sk_buff *skb) 1899 { 1900 struct net *net = sock_net(skb->sk); 1901 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 1902 int err; 1903 1904 err = ip6_local_out(net, skb->sk, skb); 1905 if (err) { 1906 if (err > 0) 1907 err = net_xmit_errno(err); 1908 if (err) 1909 IP6_INC_STATS(net, rt->rt6i_idev, 1910 IPSTATS_MIB_OUTDISCARDS); 1911 } 1912 1913 return err; 1914 } 1915 1916 int ip6_push_pending_frames(struct sock *sk) 1917 { 1918 struct sk_buff *skb; 1919 1920 skb = ip6_finish_skb(sk); 1921 if (!skb) 1922 return 0; 1923 1924 return ip6_send_skb(skb); 1925 } 1926 EXPORT_SYMBOL_GPL(ip6_push_pending_frames); 1927 1928 static void __ip6_flush_pending_frames(struct sock *sk, 1929 struct sk_buff_head *queue, 1930 struct inet_cork_full *cork, 1931 struct inet6_cork *v6_cork) 1932 { 1933 struct sk_buff *skb; 1934 1935 while ((skb = __skb_dequeue_tail(queue)) != NULL) { 1936 if (skb_dst(skb)) 1937 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1938 IPSTATS_MIB_OUTDISCARDS); 1939 kfree_skb(skb); 1940 } 1941 1942 ip6_cork_release(cork, v6_cork); 1943 } 1944 1945 void ip6_flush_pending_frames(struct sock *sk) 1946 { 1947 __ip6_flush_pending_frames(sk, &sk->sk_write_queue, 1948 &inet_sk(sk)->cork, &inet6_sk(sk)->cork); 1949 } 1950 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); 1951 1952 struct sk_buff *ip6_make_skb(struct sock *sk, 1953 int getfrag(void *from, char *to, int offset, 1954 int len, int odd, struct sk_buff *skb), 1955 void *from, int length, int transhdrlen, 1956 struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 1957 struct rt6_info *rt, unsigned int flags, 1958 struct inet_cork_full *cork) 1959 { 1960 struct inet6_cork v6_cork; 1961 struct sk_buff_head queue; 1962 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 1963 int err; 1964 1965 if (flags & MSG_PROBE) 1966 return NULL; 1967 1968 __skb_queue_head_init(&queue); 1969 1970 cork->base.flags = 0; 1971 cork->base.addr = 0; 1972 cork->base.opt = NULL; 1973 cork->base.dst = NULL; 1974 v6_cork.opt = NULL; 1975 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6); 1976 if (err) { 1977 ip6_cork_release(cork, &v6_cork); 1978 return ERR_PTR(err); 1979 } 1980 if (ipc6->dontfrag < 0) 1981 ipc6->dontfrag = inet6_sk(sk)->dontfrag; 1982 1983 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork, 1984 ¤t->task_frag, getfrag, from, 1985 length + exthdrlen, transhdrlen + exthdrlen, 1986 flags, ipc6); 1987 if (err) { 1988 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); 1989 return ERR_PTR(err); 1990 } 1991 1992 return __ip6_make_skb(sk, &queue, cork, &v6_cork); 1993 } 1994