1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IPv6 output functions 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on linux/net/ipv4/ip_output.c 10 * 11 * Changes: 12 * A.N.Kuznetsov : airthmetics in fragmentation. 13 * extension headers are implemented. 14 * route changes now work. 15 * ip6_forward does not confuse sniffers. 16 * etc. 17 * 18 * H. von Brand : Added missing #include <linux/string.h> 19 * Imran Patel : frag id should be in NBO 20 * Kazunori MIYAZAWA @USAGI 21 * : add ip6_append_data and related functions 22 * for datagram xmit 23 */ 24 25 #include <linux/errno.h> 26 #include <linux/kernel.h> 27 #include <linux/string.h> 28 #include <linux/socket.h> 29 #include <linux/net.h> 30 #include <linux/netdevice.h> 31 #include <linux/if_arp.h> 32 #include <linux/in6.h> 33 #include <linux/tcp.h> 34 #include <linux/route.h> 35 #include <linux/module.h> 36 #include <linux/slab.h> 37 38 #include <linux/bpf-cgroup.h> 39 #include <linux/netfilter.h> 40 #include <linux/netfilter_ipv6.h> 41 42 #include <net/sock.h> 43 #include <net/snmp.h> 44 45 #include <net/ipv6.h> 46 #include <net/ndisc.h> 47 #include <net/protocol.h> 48 #include <net/ip6_route.h> 49 #include <net/addrconf.h> 50 #include <net/rawv6.h> 51 #include <net/icmp.h> 52 #include <net/xfrm.h> 53 #include <net/checksum.h> 54 #include <linux/mroute6.h> 55 #include <net/l3mdev.h> 56 #include <net/lwtunnel.h> 57 #include <net/ip_tunnels.h> 58 59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 60 { 61 struct dst_entry *dst = skb_dst(skb); 62 struct net_device *dev = dst->dev; 63 struct inet6_dev *idev = ip6_dst_idev(dst); 64 unsigned int hh_len = LL_RESERVED_SPACE(dev); 65 const struct in6_addr *daddr, *nexthop; 66 struct ipv6hdr *hdr; 67 struct neighbour *neigh; 68 int ret; 69 70 /* Be paranoid, rather than too clever. */ 71 if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { 72 skb = skb_expand_head(skb, hh_len); 73 if (!skb) { 74 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 75 return -ENOMEM; 76 } 77 } 78 79 hdr = ipv6_hdr(skb); 80 daddr = &hdr->daddr; 81 if (ipv6_addr_is_multicast(daddr)) { 82 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && 83 ((mroute6_is_socket(net, skb) && 84 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 85 ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) { 86 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 87 88 /* Do not check for IFF_ALLMULTI; multicast routing 89 is not supported in any case. 90 */ 91 if (newskb) 92 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 93 net, sk, newskb, NULL, newskb->dev, 94 dev_loopback_xmit); 95 96 if (hdr->hop_limit == 0) { 97 IP6_INC_STATS(net, idev, 98 IPSTATS_MIB_OUTDISCARDS); 99 kfree_skb(skb); 100 return 0; 101 } 102 } 103 104 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); 105 if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL && 106 !(dev->flags & IFF_LOOPBACK)) { 107 kfree_skb(skb); 108 return 0; 109 } 110 } 111 112 if (lwtunnel_xmit_redirect(dst->lwtstate)) { 113 int res = lwtunnel_xmit(skb); 114 115 if (res < 0 || res == LWTUNNEL_XMIT_DONE) 116 return res; 117 } 118 119 rcu_read_lock_bh(); 120 nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); 121 neigh = __ipv6_neigh_lookup_noref(dev, nexthop); 122 if (unlikely(!neigh)) 123 neigh = __neigh_create(&nd_tbl, nexthop, dev, false); 124 if (!IS_ERR(neigh)) { 125 sock_confirm_neigh(skb, neigh); 126 ret = neigh_output(neigh, skb, false); 127 rcu_read_unlock_bh(); 128 return ret; 129 } 130 rcu_read_unlock_bh(); 131 132 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); 133 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); 134 return -EINVAL; 135 } 136 137 static int 138 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, 139 struct sk_buff *skb, unsigned int mtu) 140 { 141 struct sk_buff *segs, *nskb; 142 netdev_features_t features; 143 int ret = 0; 144 145 /* Please see corresponding comment in ip_finish_output_gso 146 * describing the cases where GSO segment length exceeds the 147 * egress MTU. 148 */ 149 features = netif_skb_features(skb); 150 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 151 if (IS_ERR_OR_NULL(segs)) { 152 kfree_skb(skb); 153 return -ENOMEM; 154 } 155 156 consume_skb(skb); 157 158 skb_list_walk_safe(segs, segs, nskb) { 159 int err; 160 161 skb_mark_not_on_list(segs); 162 err = ip6_fragment(net, sk, segs, ip6_finish_output2); 163 if (err && ret == 0) 164 ret = err; 165 } 166 167 return ret; 168 } 169 170 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 171 { 172 unsigned int mtu; 173 174 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 175 /* Policy lookup after SNAT yielded a new policy */ 176 if (skb_dst(skb)->xfrm) { 177 IP6CB(skb)->flags |= IP6SKB_REROUTED; 178 return dst_output(net, sk, skb); 179 } 180 #endif 181 182 mtu = ip6_skb_dst_mtu(skb); 183 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)) 184 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); 185 186 if ((skb->len > mtu && !skb_is_gso(skb)) || 187 dst_allfrag(skb_dst(skb)) || 188 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) 189 return ip6_fragment(net, sk, skb, ip6_finish_output2); 190 else 191 return ip6_finish_output2(net, sk, skb); 192 } 193 194 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 195 { 196 int ret; 197 198 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 199 switch (ret) { 200 case NET_XMIT_SUCCESS: 201 return __ip6_finish_output(net, sk, skb); 202 case NET_XMIT_CN: 203 return __ip6_finish_output(net, sk, skb) ? : ret; 204 default: 205 kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS); 206 return ret; 207 } 208 } 209 210 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 211 { 212 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; 213 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 214 215 skb->protocol = htons(ETH_P_IPV6); 216 skb->dev = dev; 217 218 if (unlikely(idev->cnf.disable_ipv6)) { 219 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 220 kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); 221 return 0; 222 } 223 224 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, 225 net, sk, skb, indev, dev, 226 ip6_finish_output, 227 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 228 } 229 EXPORT_SYMBOL(ip6_output); 230 231 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) 232 { 233 if (!np->autoflowlabel_set) 234 return ip6_default_np_autolabel(net); 235 else 236 return np->autoflowlabel; 237 } 238 239 /* 240 * xmit an sk_buff (used by TCP, SCTP and DCCP) 241 * Note : socket lock is not held for SYNACK packets, but might be modified 242 * by calls to skb_set_owner_w() and ipv6_local_error(), 243 * which are using proper atomic operations or spinlocks. 244 */ 245 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 246 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority) 247 { 248 struct net *net = sock_net(sk); 249 const struct ipv6_pinfo *np = inet6_sk(sk); 250 struct in6_addr *first_hop = &fl6->daddr; 251 struct dst_entry *dst = skb_dst(skb); 252 struct net_device *dev = dst->dev; 253 struct inet6_dev *idev = ip6_dst_idev(dst); 254 unsigned int head_room; 255 struct ipv6hdr *hdr; 256 u8 proto = fl6->flowi6_proto; 257 int seg_len = skb->len; 258 int hlimit = -1; 259 u32 mtu; 260 261 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev); 262 if (opt) 263 head_room += opt->opt_nflen + opt->opt_flen; 264 265 if (unlikely(head_room > skb_headroom(skb))) { 266 skb = skb_expand_head(skb, head_room); 267 if (!skb) { 268 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 269 return -ENOBUFS; 270 } 271 } 272 273 if (opt) { 274 seg_len += opt->opt_nflen + opt->opt_flen; 275 276 if (opt->opt_flen) 277 ipv6_push_frag_opts(skb, opt, &proto); 278 279 if (opt->opt_nflen) 280 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, 281 &fl6->saddr); 282 } 283 284 skb_push(skb, sizeof(struct ipv6hdr)); 285 skb_reset_network_header(skb); 286 hdr = ipv6_hdr(skb); 287 288 /* 289 * Fill in the IPv6 header 290 */ 291 if (np) 292 hlimit = np->hop_limit; 293 if (hlimit < 0) 294 hlimit = ip6_dst_hoplimit(dst); 295 296 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 297 ip6_autoflowlabel(net, np), fl6)); 298 299 hdr->payload_len = htons(seg_len); 300 hdr->nexthdr = proto; 301 hdr->hop_limit = hlimit; 302 303 hdr->saddr = fl6->saddr; 304 hdr->daddr = *first_hop; 305 306 skb->protocol = htons(ETH_P_IPV6); 307 skb->priority = priority; 308 skb->mark = mark; 309 310 mtu = dst_mtu(dst); 311 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { 312 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); 313 314 /* if egress device is enslaved to an L3 master device pass the 315 * skb to its handler for processing 316 */ 317 skb = l3mdev_ip6_out((struct sock *)sk, skb); 318 if (unlikely(!skb)) 319 return 0; 320 321 /* hooks should never assume socket lock is held. 322 * we promote our socket to non const 323 */ 324 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, 325 net, (struct sock *)sk, skb, NULL, dev, 326 dst_output); 327 } 328 329 skb->dev = dev; 330 /* ipv6_local_error() does not require socket lock, 331 * we promote our socket to non const 332 */ 333 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); 334 335 IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS); 336 kfree_skb(skb); 337 return -EMSGSIZE; 338 } 339 EXPORT_SYMBOL(ip6_xmit); 340 341 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 342 { 343 struct ip6_ra_chain *ra; 344 struct sock *last = NULL; 345 346 read_lock(&ip6_ra_lock); 347 for (ra = ip6_ra_chain; ra; ra = ra->next) { 348 struct sock *sk = ra->sk; 349 if (sk && ra->sel == sel && 350 (!sk->sk_bound_dev_if || 351 sk->sk_bound_dev_if == skb->dev->ifindex)) { 352 struct ipv6_pinfo *np = inet6_sk(sk); 353 354 if (np && np->rtalert_isolate && 355 !net_eq(sock_net(sk), dev_net(skb->dev))) { 356 continue; 357 } 358 if (last) { 359 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 360 if (skb2) 361 rawv6_rcv(last, skb2); 362 } 363 last = sk; 364 } 365 } 366 367 if (last) { 368 rawv6_rcv(last, skb); 369 read_unlock(&ip6_ra_lock); 370 return 1; 371 } 372 read_unlock(&ip6_ra_lock); 373 return 0; 374 } 375 376 static int ip6_forward_proxy_check(struct sk_buff *skb) 377 { 378 struct ipv6hdr *hdr = ipv6_hdr(skb); 379 u8 nexthdr = hdr->nexthdr; 380 __be16 frag_off; 381 int offset; 382 383 if (ipv6_ext_hdr(nexthdr)) { 384 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 385 if (offset < 0) 386 return 0; 387 } else 388 offset = sizeof(struct ipv6hdr); 389 390 if (nexthdr == IPPROTO_ICMPV6) { 391 struct icmp6hdr *icmp6; 392 393 if (!pskb_may_pull(skb, (skb_network_header(skb) + 394 offset + 1 - skb->data))) 395 return 0; 396 397 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 398 399 switch (icmp6->icmp6_type) { 400 case NDISC_ROUTER_SOLICITATION: 401 case NDISC_ROUTER_ADVERTISEMENT: 402 case NDISC_NEIGHBOUR_SOLICITATION: 403 case NDISC_NEIGHBOUR_ADVERTISEMENT: 404 case NDISC_REDIRECT: 405 /* For reaction involving unicast neighbor discovery 406 * message destined to the proxied address, pass it to 407 * input function. 408 */ 409 return 1; 410 default: 411 break; 412 } 413 } 414 415 /* 416 * The proxying router can't forward traffic sent to a link-local 417 * address, so signal the sender and discard the packet. This 418 * behavior is clarified by the MIPv6 specification. 419 */ 420 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 421 dst_link_failure(skb); 422 return -1; 423 } 424 425 return 0; 426 } 427 428 static inline int ip6_forward_finish(struct net *net, struct sock *sk, 429 struct sk_buff *skb) 430 { 431 struct dst_entry *dst = skb_dst(skb); 432 433 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 434 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); 435 436 #ifdef CONFIG_NET_SWITCHDEV 437 if (skb->offload_l3_fwd_mark) { 438 consume_skb(skb); 439 return 0; 440 } 441 #endif 442 443 skb_clear_tstamp(skb); 444 return dst_output(net, sk, skb); 445 } 446 447 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 448 { 449 if (skb->len <= mtu) 450 return false; 451 452 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ 453 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) 454 return true; 455 456 if (skb->ignore_df) 457 return false; 458 459 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 460 return false; 461 462 return true; 463 } 464 465 int ip6_forward(struct sk_buff *skb) 466 { 467 struct dst_entry *dst = skb_dst(skb); 468 struct ipv6hdr *hdr = ipv6_hdr(skb); 469 struct inet6_skb_parm *opt = IP6CB(skb); 470 struct net *net = dev_net(dst->dev); 471 struct inet6_dev *idev; 472 SKB_DR(reason); 473 u32 mtu; 474 475 idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); 476 if (net->ipv6.devconf_all->forwarding == 0) 477 goto error; 478 479 if (skb->pkt_type != PACKET_HOST) 480 goto drop; 481 482 if (unlikely(skb->sk)) 483 goto drop; 484 485 if (skb_warn_if_lro(skb)) 486 goto drop; 487 488 if (!net->ipv6.devconf_all->disable_policy && 489 (!idev || !idev->cnf.disable_policy) && 490 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 491 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 492 goto drop; 493 } 494 495 skb_forward_csum(skb); 496 497 /* 498 * We DO NOT make any processing on 499 * RA packets, pushing them to user level AS IS 500 * without ane WARRANTY that application will be able 501 * to interpret them. The reason is that we 502 * cannot make anything clever here. 503 * 504 * We are not end-node, so that if packet contains 505 * AH/ESP, we cannot make anything. 506 * Defragmentation also would be mistake, RA packets 507 * cannot be fragmented, because there is no warranty 508 * that different fragments will go along one path. --ANK 509 */ 510 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { 511 if (ip6_call_ra_chain(skb, ntohs(opt->ra))) 512 return 0; 513 } 514 515 /* 516 * check and decrement ttl 517 */ 518 if (hdr->hop_limit <= 1) { 519 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 520 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); 521 522 kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); 523 return -ETIMEDOUT; 524 } 525 526 /* XXX: idev->cnf.proxy_ndp? */ 527 if (net->ipv6.devconf_all->proxy_ndp && 528 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 529 int proxied = ip6_forward_proxy_check(skb); 530 if (proxied > 0) { 531 hdr->hop_limit--; 532 return ip6_input(skb); 533 } else if (proxied < 0) { 534 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 535 goto drop; 536 } 537 } 538 539 if (!xfrm6_route_forward(skb)) { 540 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 541 SKB_DR_SET(reason, XFRM_POLICY); 542 goto drop; 543 } 544 dst = skb_dst(skb); 545 546 /* IPv6 specs say nothing about it, but it is clear that we cannot 547 send redirects to source routed frames. 548 We don't send redirects to frames decapsulated from IPsec. 549 */ 550 if (IP6CB(skb)->iif == dst->dev->ifindex && 551 opt->srcrt == 0 && !skb_sec_path(skb)) { 552 struct in6_addr *target = NULL; 553 struct inet_peer *peer; 554 struct rt6_info *rt; 555 556 /* 557 * incoming and outgoing devices are the same 558 * send a redirect. 559 */ 560 561 rt = (struct rt6_info *) dst; 562 if (rt->rt6i_flags & RTF_GATEWAY) 563 target = &rt->rt6i_gateway; 564 else 565 target = &hdr->daddr; 566 567 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1); 568 569 /* Limit redirects both by destination (here) 570 and by source (inside ndisc_send_redirect) 571 */ 572 if (inet_peer_xrlim_allow(peer, 1*HZ)) 573 ndisc_send_redirect(skb, target); 574 if (peer) 575 inet_putpeer(peer); 576 } else { 577 int addrtype = ipv6_addr_type(&hdr->saddr); 578 579 /* This check is security critical. */ 580 if (addrtype == IPV6_ADDR_ANY || 581 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 582 goto error; 583 if (addrtype & IPV6_ADDR_LINKLOCAL) { 584 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 585 ICMPV6_NOT_NEIGHBOUR, 0); 586 goto error; 587 } 588 } 589 590 mtu = ip6_dst_mtu_maybe_forward(dst, true); 591 if (mtu < IPV6_MIN_MTU) 592 mtu = IPV6_MIN_MTU; 593 594 if (ip6_pkt_too_big(skb, mtu)) { 595 /* Again, force OUTPUT device used as source address */ 596 skb->dev = dst->dev; 597 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 598 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); 599 __IP6_INC_STATS(net, ip6_dst_idev(dst), 600 IPSTATS_MIB_FRAGFAILS); 601 kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); 602 return -EMSGSIZE; 603 } 604 605 if (skb_cow(skb, dst->dev->hard_header_len)) { 606 __IP6_INC_STATS(net, ip6_dst_idev(dst), 607 IPSTATS_MIB_OUTDISCARDS); 608 goto drop; 609 } 610 611 hdr = ipv6_hdr(skb); 612 613 /* Mangling hops number delayed to point after skb COW */ 614 615 hdr->hop_limit--; 616 617 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 618 net, NULL, skb, skb->dev, dst->dev, 619 ip6_forward_finish); 620 621 error: 622 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); 623 SKB_DR_SET(reason, IP_INADDRERRORS); 624 drop: 625 kfree_skb_reason(skb, reason); 626 return -EINVAL; 627 } 628 629 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 630 { 631 to->pkt_type = from->pkt_type; 632 to->priority = from->priority; 633 to->protocol = from->protocol; 634 skb_dst_drop(to); 635 skb_dst_set(to, dst_clone(skb_dst(from))); 636 to->dev = from->dev; 637 to->mark = from->mark; 638 639 skb_copy_hash(to, from); 640 641 #ifdef CONFIG_NET_SCHED 642 to->tc_index = from->tc_index; 643 #endif 644 nf_copy(to, from); 645 skb_ext_copy(to, from); 646 skb_copy_secmark(to, from); 647 } 648 649 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr, 650 u8 nexthdr, __be32 frag_id, 651 struct ip6_fraglist_iter *iter) 652 { 653 unsigned int first_len; 654 struct frag_hdr *fh; 655 656 /* BUILD HEADER */ 657 *prevhdr = NEXTHDR_FRAGMENT; 658 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 659 if (!iter->tmp_hdr) 660 return -ENOMEM; 661 662 iter->frag = skb_shinfo(skb)->frag_list; 663 skb_frag_list_init(skb); 664 665 iter->offset = 0; 666 iter->hlen = hlen; 667 iter->frag_id = frag_id; 668 iter->nexthdr = nexthdr; 669 670 __skb_pull(skb, hlen); 671 fh = __skb_push(skb, sizeof(struct frag_hdr)); 672 __skb_push(skb, hlen); 673 skb_reset_network_header(skb); 674 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen); 675 676 fh->nexthdr = nexthdr; 677 fh->reserved = 0; 678 fh->frag_off = htons(IP6_MF); 679 fh->identification = frag_id; 680 681 first_len = skb_pagelen(skb); 682 skb->data_len = first_len - skb_headlen(skb); 683 skb->len = first_len; 684 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); 685 686 return 0; 687 } 688 EXPORT_SYMBOL(ip6_fraglist_init); 689 690 void ip6_fraglist_prepare(struct sk_buff *skb, 691 struct ip6_fraglist_iter *iter) 692 { 693 struct sk_buff *frag = iter->frag; 694 unsigned int hlen = iter->hlen; 695 struct frag_hdr *fh; 696 697 frag->ip_summed = CHECKSUM_NONE; 698 skb_reset_transport_header(frag); 699 fh = __skb_push(frag, sizeof(struct frag_hdr)); 700 __skb_push(frag, hlen); 701 skb_reset_network_header(frag); 702 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen); 703 iter->offset += skb->len - hlen - sizeof(struct frag_hdr); 704 fh->nexthdr = iter->nexthdr; 705 fh->reserved = 0; 706 fh->frag_off = htons(iter->offset); 707 if (frag->next) 708 fh->frag_off |= htons(IP6_MF); 709 fh->identification = iter->frag_id; 710 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 711 ip6_copy_metadata(frag, skb); 712 } 713 EXPORT_SYMBOL(ip6_fraglist_prepare); 714 715 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu, 716 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr, 717 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state) 718 { 719 state->prevhdr = prevhdr; 720 state->nexthdr = nexthdr; 721 state->frag_id = frag_id; 722 723 state->hlen = hlen; 724 state->mtu = mtu; 725 726 state->left = skb->len - hlen; /* Space per frame */ 727 state->ptr = hlen; /* Where to start from */ 728 729 state->hroom = hdr_room; 730 state->troom = needed_tailroom; 731 732 state->offset = 0; 733 } 734 EXPORT_SYMBOL(ip6_frag_init); 735 736 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state) 737 { 738 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset; 739 struct sk_buff *frag; 740 struct frag_hdr *fh; 741 unsigned int len; 742 743 len = state->left; 744 /* IF: it doesn't fit, use 'mtu' - the data space left */ 745 if (len > state->mtu) 746 len = state->mtu; 747 /* IF: we are not sending up to and including the packet end 748 then align the next start on an eight byte boundary */ 749 if (len < state->left) 750 len &= ~7; 751 752 /* Allocate buffer */ 753 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) + 754 state->hroom + state->troom, GFP_ATOMIC); 755 if (!frag) 756 return ERR_PTR(-ENOMEM); 757 758 /* 759 * Set up data on packet 760 */ 761 762 ip6_copy_metadata(frag, skb); 763 skb_reserve(frag, state->hroom); 764 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr)); 765 skb_reset_network_header(frag); 766 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen); 767 frag->transport_header = (frag->network_header + state->hlen + 768 sizeof(struct frag_hdr)); 769 770 /* 771 * Charge the memory for the fragment to any owner 772 * it might possess 773 */ 774 if (skb->sk) 775 skb_set_owner_w(frag, skb->sk); 776 777 /* 778 * Copy the packet header into the new buffer. 779 */ 780 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen); 781 782 fragnexthdr_offset = skb_network_header(frag); 783 fragnexthdr_offset += prevhdr - skb_network_header(skb); 784 *fragnexthdr_offset = NEXTHDR_FRAGMENT; 785 786 /* 787 * Build fragment header. 788 */ 789 fh->nexthdr = state->nexthdr; 790 fh->reserved = 0; 791 fh->identification = state->frag_id; 792 793 /* 794 * Copy a block of the IP datagram. 795 */ 796 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag), 797 len)); 798 state->left -= len; 799 800 fh->frag_off = htons(state->offset); 801 if (state->left > 0) 802 fh->frag_off |= htons(IP6_MF); 803 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 804 805 state->ptr += len; 806 state->offset += len; 807 808 return frag; 809 } 810 EXPORT_SYMBOL(ip6_frag_next); 811 812 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 813 int (*output)(struct net *, struct sock *, struct sk_buff *)) 814 { 815 struct sk_buff *frag; 816 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 817 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? 818 inet6_sk(skb->sk) : NULL; 819 bool mono_delivery_time = skb->mono_delivery_time; 820 struct ip6_frag_state state; 821 unsigned int mtu, hlen, nexthdr_offset; 822 ktime_t tstamp = skb->tstamp; 823 int hroom, err = 0; 824 __be32 frag_id; 825 u8 *prevhdr, nexthdr = 0; 826 827 err = ip6_find_1stfragopt(skb, &prevhdr); 828 if (err < 0) 829 goto fail; 830 hlen = err; 831 nexthdr = *prevhdr; 832 nexthdr_offset = prevhdr - skb_network_header(skb); 833 834 mtu = ip6_skb_dst_mtu(skb); 835 836 /* We must not fragment if the socket is set to force MTU discovery 837 * or if the skb it not generated by a local socket. 838 */ 839 if (unlikely(!skb->ignore_df && skb->len > mtu)) 840 goto fail_toobig; 841 842 if (IP6CB(skb)->frag_max_size) { 843 if (IP6CB(skb)->frag_max_size > mtu) 844 goto fail_toobig; 845 846 /* don't send fragments larger than what we received */ 847 mtu = IP6CB(skb)->frag_max_size; 848 if (mtu < IPV6_MIN_MTU) 849 mtu = IPV6_MIN_MTU; 850 } 851 852 if (np && np->frag_size < mtu) { 853 if (np->frag_size) 854 mtu = np->frag_size; 855 } 856 if (mtu < hlen + sizeof(struct frag_hdr) + 8) 857 goto fail_toobig; 858 mtu -= hlen + sizeof(struct frag_hdr); 859 860 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, 861 &ipv6_hdr(skb)->saddr); 862 863 if (skb->ip_summed == CHECKSUM_PARTIAL && 864 (err = skb_checksum_help(skb))) 865 goto fail; 866 867 prevhdr = skb_network_header(skb) + nexthdr_offset; 868 hroom = LL_RESERVED_SPACE(rt->dst.dev); 869 if (skb_has_frag_list(skb)) { 870 unsigned int first_len = skb_pagelen(skb); 871 struct ip6_fraglist_iter iter; 872 struct sk_buff *frag2; 873 874 if (first_len - hlen > mtu || 875 ((first_len - hlen) & 7) || 876 skb_cloned(skb) || 877 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) 878 goto slow_path; 879 880 skb_walk_frags(skb, frag) { 881 /* Correct geometry. */ 882 if (frag->len > mtu || 883 ((frag->len & 7) && frag->next) || 884 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr))) 885 goto slow_path_clean; 886 887 /* Partially cloned skb? */ 888 if (skb_shared(frag)) 889 goto slow_path_clean; 890 891 BUG_ON(frag->sk); 892 if (skb->sk) { 893 frag->sk = skb->sk; 894 frag->destructor = sock_wfree; 895 } 896 skb->truesize -= frag->truesize; 897 } 898 899 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id, 900 &iter); 901 if (err < 0) 902 goto fail; 903 904 for (;;) { 905 /* Prepare header of the next frame, 906 * before previous one went down. */ 907 if (iter.frag) 908 ip6_fraglist_prepare(skb, &iter); 909 910 skb_set_delivery_time(skb, tstamp, mono_delivery_time); 911 err = output(net, sk, skb); 912 if (!err) 913 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 914 IPSTATS_MIB_FRAGCREATES); 915 916 if (err || !iter.frag) 917 break; 918 919 skb = ip6_fraglist_next(&iter); 920 } 921 922 kfree(iter.tmp_hdr); 923 924 if (err == 0) { 925 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 926 IPSTATS_MIB_FRAGOKS); 927 return 0; 928 } 929 930 kfree_skb_list(iter.frag); 931 932 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 933 IPSTATS_MIB_FRAGFAILS); 934 return err; 935 936 slow_path_clean: 937 skb_walk_frags(skb, frag2) { 938 if (frag2 == frag) 939 break; 940 frag2->sk = NULL; 941 frag2->destructor = NULL; 942 skb->truesize += frag2->truesize; 943 } 944 } 945 946 slow_path: 947 /* 948 * Fragment the datagram. 949 */ 950 951 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom, 952 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id, 953 &state); 954 955 /* 956 * Keep copying data until we run out. 957 */ 958 959 while (state.left > 0) { 960 frag = ip6_frag_next(skb, &state); 961 if (IS_ERR(frag)) { 962 err = PTR_ERR(frag); 963 goto fail; 964 } 965 966 /* 967 * Put this fragment into the sending queue. 968 */ 969 skb_set_delivery_time(frag, tstamp, mono_delivery_time); 970 err = output(net, sk, frag); 971 if (err) 972 goto fail; 973 974 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 975 IPSTATS_MIB_FRAGCREATES); 976 } 977 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 978 IPSTATS_MIB_FRAGOKS); 979 consume_skb(skb); 980 return err; 981 982 fail_toobig: 983 if (skb->sk && dst_allfrag(skb_dst(skb))) 984 sk_gso_disable(skb->sk); 985 986 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 987 err = -EMSGSIZE; 988 989 fail: 990 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 991 IPSTATS_MIB_FRAGFAILS); 992 kfree_skb(skb); 993 return err; 994 } 995 996 static inline int ip6_rt_check(const struct rt6key *rt_key, 997 const struct in6_addr *fl_addr, 998 const struct in6_addr *addr_cache) 999 { 1000 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 1001 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); 1002 } 1003 1004 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 1005 struct dst_entry *dst, 1006 const struct flowi6 *fl6) 1007 { 1008 struct ipv6_pinfo *np = inet6_sk(sk); 1009 struct rt6_info *rt; 1010 1011 if (!dst) 1012 goto out; 1013 1014 if (dst->ops->family != AF_INET6) { 1015 dst_release(dst); 1016 return NULL; 1017 } 1018 1019 rt = (struct rt6_info *)dst; 1020 /* Yes, checking route validity in not connected 1021 * case is not very simple. Take into account, 1022 * that we do not support routing by source, TOS, 1023 * and MSG_DONTROUTE --ANK (980726) 1024 * 1025 * 1. ip6_rt_check(): If route was host route, 1026 * check that cached destination is current. 1027 * If it is network route, we still may 1028 * check its validity using saved pointer 1029 * to the last used address: daddr_cache. 1030 * We do not want to save whole address now, 1031 * (because main consumer of this service 1032 * is tcp, which has not this problem), 1033 * so that the last trick works only on connected 1034 * sockets. 1035 * 2. oif also should be the same. 1036 */ 1037 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 1038 #ifdef CONFIG_IPV6_SUBTREES 1039 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 1040 #endif 1041 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { 1042 dst_release(dst); 1043 dst = NULL; 1044 } 1045 1046 out: 1047 return dst; 1048 } 1049 1050 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, 1051 struct dst_entry **dst, struct flowi6 *fl6) 1052 { 1053 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1054 struct neighbour *n; 1055 struct rt6_info *rt; 1056 #endif 1057 int err; 1058 int flags = 0; 1059 1060 /* The correct way to handle this would be to do 1061 * ip6_route_get_saddr, and then ip6_route_output; however, 1062 * the route-specific preferred source forces the 1063 * ip6_route_output call _before_ ip6_route_get_saddr. 1064 * 1065 * In source specific routing (no src=any default route), 1066 * ip6_route_output will fail given src=any saddr, though, so 1067 * that's why we try it again later. 1068 */ 1069 if (ipv6_addr_any(&fl6->saddr)) { 1070 struct fib6_info *from; 1071 struct rt6_info *rt; 1072 1073 *dst = ip6_route_output(net, sk, fl6); 1074 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; 1075 1076 rcu_read_lock(); 1077 from = rt ? rcu_dereference(rt->from) : NULL; 1078 err = ip6_route_get_saddr(net, from, &fl6->daddr, 1079 sk ? inet6_sk(sk)->srcprefs : 0, 1080 &fl6->saddr); 1081 rcu_read_unlock(); 1082 1083 if (err) 1084 goto out_err_release; 1085 1086 /* If we had an erroneous initial result, pretend it 1087 * never existed and let the SA-enabled version take 1088 * over. 1089 */ 1090 if ((*dst)->error) { 1091 dst_release(*dst); 1092 *dst = NULL; 1093 } 1094 1095 if (fl6->flowi6_oif) 1096 flags |= RT6_LOOKUP_F_IFACE; 1097 } 1098 1099 if (!*dst) 1100 *dst = ip6_route_output_flags(net, sk, fl6, flags); 1101 1102 err = (*dst)->error; 1103 if (err) 1104 goto out_err_release; 1105 1106 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1107 /* 1108 * Here if the dst entry we've looked up 1109 * has a neighbour entry that is in the INCOMPLETE 1110 * state and the src address from the flow is 1111 * marked as OPTIMISTIC, we release the found 1112 * dst entry and replace it instead with the 1113 * dst entry of the nexthop router 1114 */ 1115 rt = (struct rt6_info *) *dst; 1116 rcu_read_lock_bh(); 1117 n = __ipv6_neigh_lookup_noref(rt->dst.dev, 1118 rt6_nexthop(rt, &fl6->daddr)); 1119 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; 1120 rcu_read_unlock_bh(); 1121 1122 if (err) { 1123 struct inet6_ifaddr *ifp; 1124 struct flowi6 fl_gw6; 1125 int redirect; 1126 1127 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 1128 (*dst)->dev, 1); 1129 1130 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 1131 if (ifp) 1132 in6_ifa_put(ifp); 1133 1134 if (redirect) { 1135 /* 1136 * We need to get the dst entry for the 1137 * default router instead 1138 */ 1139 dst_release(*dst); 1140 memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 1141 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 1142 *dst = ip6_route_output(net, sk, &fl_gw6); 1143 err = (*dst)->error; 1144 if (err) 1145 goto out_err_release; 1146 } 1147 } 1148 #endif 1149 if (ipv6_addr_v4mapped(&fl6->saddr) && 1150 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { 1151 err = -EAFNOSUPPORT; 1152 goto out_err_release; 1153 } 1154 1155 return 0; 1156 1157 out_err_release: 1158 dst_release(*dst); 1159 *dst = NULL; 1160 1161 if (err == -ENETUNREACH) 1162 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); 1163 return err; 1164 } 1165 1166 /** 1167 * ip6_dst_lookup - perform route lookup on flow 1168 * @net: Network namespace to perform lookup in 1169 * @sk: socket which provides route info 1170 * @dst: pointer to dst_entry * for result 1171 * @fl6: flow to lookup 1172 * 1173 * This function performs a route lookup on the given flow. 1174 * 1175 * It returns zero on success, or a standard errno code on error. 1176 */ 1177 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, 1178 struct flowi6 *fl6) 1179 { 1180 *dst = NULL; 1181 return ip6_dst_lookup_tail(net, sk, dst, fl6); 1182 } 1183 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1184 1185 /** 1186 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 1187 * @net: Network namespace to perform lookup in 1188 * @sk: socket which provides route info 1189 * @fl6: flow to lookup 1190 * @final_dst: final destination address for ipsec lookup 1191 * 1192 * This function performs a route lookup on the given flow. 1193 * 1194 * It returns a valid dst pointer on success, or a pointer encoded 1195 * error code. 1196 */ 1197 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, 1198 const struct in6_addr *final_dst) 1199 { 1200 struct dst_entry *dst = NULL; 1201 int err; 1202 1203 err = ip6_dst_lookup_tail(net, sk, &dst, fl6); 1204 if (err) 1205 return ERR_PTR(err); 1206 if (final_dst) 1207 fl6->daddr = *final_dst; 1208 1209 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0); 1210 } 1211 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 1212 1213 /** 1214 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 1215 * @sk: socket which provides the dst cache and route info 1216 * @fl6: flow to lookup 1217 * @final_dst: final destination address for ipsec lookup 1218 * @connected: whether @sk is connected or not 1219 * 1220 * This function performs a route lookup on the given flow with the 1221 * possibility of using the cached route in the socket if it is valid. 1222 * It will take the socket dst lock when operating on the dst cache. 1223 * As a result, this function can only be used in process context. 1224 * 1225 * In addition, for a connected socket, cache the dst in the socket 1226 * if the current cache is not valid. 1227 * 1228 * It returns a valid dst pointer on success, or a pointer encoded 1229 * error code. 1230 */ 1231 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1232 const struct in6_addr *final_dst, 1233 bool connected) 1234 { 1235 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1236 1237 dst = ip6_sk_dst_check(sk, dst, fl6); 1238 if (dst) 1239 return dst; 1240 1241 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst); 1242 if (connected && !IS_ERR(dst)) 1243 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6); 1244 1245 return dst; 1246 } 1247 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1248 1249 /** 1250 * ip6_dst_lookup_tunnel - perform route lookup on tunnel 1251 * @skb: Packet for which lookup is done 1252 * @dev: Tunnel device 1253 * @net: Network namespace of tunnel device 1254 * @sock: Socket which provides route info 1255 * @saddr: Memory to store the src ip address 1256 * @info: Tunnel information 1257 * @protocol: IP protocol 1258 * @use_cache: Flag to enable cache usage 1259 * This function performs a route lookup on a tunnel 1260 * 1261 * It returns a valid dst pointer and stores src address to be used in 1262 * tunnel in param saddr on success, else a pointer encoded error code. 1263 */ 1264 1265 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, 1266 struct net_device *dev, 1267 struct net *net, 1268 struct socket *sock, 1269 struct in6_addr *saddr, 1270 const struct ip_tunnel_info *info, 1271 u8 protocol, 1272 bool use_cache) 1273 { 1274 struct dst_entry *dst = NULL; 1275 #ifdef CONFIG_DST_CACHE 1276 struct dst_cache *dst_cache; 1277 #endif 1278 struct flowi6 fl6; 1279 __u8 prio; 1280 1281 #ifdef CONFIG_DST_CACHE 1282 dst_cache = (struct dst_cache *)&info->dst_cache; 1283 if (use_cache) { 1284 dst = dst_cache_get_ip6(dst_cache, saddr); 1285 if (dst) 1286 return dst; 1287 } 1288 #endif 1289 memset(&fl6, 0, sizeof(fl6)); 1290 fl6.flowi6_mark = skb->mark; 1291 fl6.flowi6_proto = protocol; 1292 fl6.daddr = info->key.u.ipv6.dst; 1293 fl6.saddr = info->key.u.ipv6.src; 1294 prio = info->key.tos; 1295 fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio), 1296 info->key.label); 1297 1298 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, 1299 NULL); 1300 if (IS_ERR(dst)) { 1301 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); 1302 return ERR_PTR(-ENETUNREACH); 1303 } 1304 if (dst->dev == dev) { /* is this necessary? */ 1305 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); 1306 dst_release(dst); 1307 return ERR_PTR(-ELOOP); 1308 } 1309 #ifdef CONFIG_DST_CACHE 1310 if (use_cache) 1311 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); 1312 #endif 1313 *saddr = fl6.saddr; 1314 return dst; 1315 } 1316 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel); 1317 1318 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1319 gfp_t gfp) 1320 { 1321 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1322 } 1323 1324 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1325 gfp_t gfp) 1326 { 1327 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1328 } 1329 1330 static void ip6_append_data_mtu(unsigned int *mtu, 1331 int *maxfraglen, 1332 unsigned int fragheaderlen, 1333 struct sk_buff *skb, 1334 struct rt6_info *rt, 1335 unsigned int orig_mtu) 1336 { 1337 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { 1338 if (!skb) { 1339 /* first fragment, reserve header_len */ 1340 *mtu = orig_mtu - rt->dst.header_len; 1341 1342 } else { 1343 /* 1344 * this fragment is not first, the headers 1345 * space is regarded as data space. 1346 */ 1347 *mtu = orig_mtu; 1348 } 1349 *maxfraglen = ((*mtu - fragheaderlen) & ~7) 1350 + fragheaderlen - sizeof(struct frag_hdr); 1351 } 1352 } 1353 1354 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, 1355 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, 1356 struct rt6_info *rt) 1357 { 1358 struct ipv6_pinfo *np = inet6_sk(sk); 1359 unsigned int mtu; 1360 struct ipv6_txoptions *nopt, *opt = ipc6->opt; 1361 1362 /* callers pass dst together with a reference, set it first so 1363 * ip6_cork_release() can put it down even in case of an error. 1364 */ 1365 cork->base.dst = &rt->dst; 1366 1367 /* 1368 * setup for corking 1369 */ 1370 if (opt) { 1371 if (WARN_ON(v6_cork->opt)) 1372 return -EINVAL; 1373 1374 nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); 1375 if (unlikely(!nopt)) 1376 return -ENOBUFS; 1377 1378 nopt->tot_len = sizeof(*opt); 1379 nopt->opt_flen = opt->opt_flen; 1380 nopt->opt_nflen = opt->opt_nflen; 1381 1382 nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation); 1383 if (opt->dst0opt && !nopt->dst0opt) 1384 return -ENOBUFS; 1385 1386 nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation); 1387 if (opt->dst1opt && !nopt->dst1opt) 1388 return -ENOBUFS; 1389 1390 nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation); 1391 if (opt->hopopt && !nopt->hopopt) 1392 return -ENOBUFS; 1393 1394 nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation); 1395 if (opt->srcrt && !nopt->srcrt) 1396 return -ENOBUFS; 1397 1398 /* need source address above miyazawa*/ 1399 } 1400 v6_cork->hop_limit = ipc6->hlimit; 1401 v6_cork->tclass = ipc6->tclass; 1402 if (rt->dst.flags & DST_XFRM_TUNNEL) 1403 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1404 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); 1405 else 1406 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1407 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); 1408 if (np->frag_size < mtu) { 1409 if (np->frag_size) 1410 mtu = np->frag_size; 1411 } 1412 cork->base.fragsize = mtu; 1413 cork->base.gso_size = ipc6->gso_size; 1414 cork->base.tx_flags = 0; 1415 cork->base.mark = ipc6->sockc.mark; 1416 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); 1417 1418 if (dst_allfrag(xfrm_dst_path(&rt->dst))) 1419 cork->base.flags |= IPCORK_ALLFRAG; 1420 cork->base.length = 0; 1421 1422 cork->base.transmit_time = ipc6->sockc.transmit_time; 1423 1424 return 0; 1425 } 1426 1427 static int __ip6_append_data(struct sock *sk, 1428 struct sk_buff_head *queue, 1429 struct inet_cork_full *cork_full, 1430 struct inet6_cork *v6_cork, 1431 struct page_frag *pfrag, 1432 int getfrag(void *from, char *to, int offset, 1433 int len, int odd, struct sk_buff *skb), 1434 void *from, int length, int transhdrlen, 1435 unsigned int flags, struct ipcm6_cookie *ipc6) 1436 { 1437 struct sk_buff *skb, *skb_prev = NULL; 1438 struct inet_cork *cork = &cork_full->base; 1439 struct flowi6 *fl6 = &cork_full->fl.u.ip6; 1440 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; 1441 struct ubuf_info *uarg = NULL; 1442 int exthdrlen = 0; 1443 int dst_exthdrlen = 0; 1444 int hh_len; 1445 int copy; 1446 int err; 1447 int offset = 0; 1448 u32 tskey = 0; 1449 struct rt6_info *rt = (struct rt6_info *)cork->dst; 1450 struct ipv6_txoptions *opt = v6_cork->opt; 1451 int csummode = CHECKSUM_NONE; 1452 unsigned int maxnonfragsize, headersize; 1453 unsigned int wmem_alloc_delta = 0; 1454 bool paged, extra_uref = false; 1455 1456 skb = skb_peek_tail(queue); 1457 if (!skb) { 1458 exthdrlen = opt ? opt->opt_flen : 0; 1459 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; 1460 } 1461 1462 paged = !!cork->gso_size; 1463 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; 1464 orig_mtu = mtu; 1465 1466 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && 1467 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) 1468 tskey = atomic_inc_return(&sk->sk_tskey) - 1; 1469 1470 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1471 1472 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1473 (opt ? opt->opt_nflen : 0); 1474 1475 headersize = sizeof(struct ipv6hdr) + 1476 (opt ? opt->opt_flen + opt->opt_nflen : 0) + 1477 (dst_allfrag(&rt->dst) ? 1478 sizeof(struct frag_hdr) : 0) + 1479 rt->rt6i_nfheader_len; 1480 1481 if (mtu <= fragheaderlen || 1482 ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr)) 1483 goto emsgsize; 1484 1485 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - 1486 sizeof(struct frag_hdr); 1487 1488 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit 1489 * the first fragment 1490 */ 1491 if (headersize + transhdrlen > mtu) 1492 goto emsgsize; 1493 1494 if (cork->length + length > mtu - headersize && ipc6->dontfrag && 1495 (sk->sk_protocol == IPPROTO_UDP || 1496 sk->sk_protocol == IPPROTO_ICMPV6 || 1497 sk->sk_protocol == IPPROTO_RAW)) { 1498 ipv6_local_rxpmtu(sk, fl6, mtu - headersize + 1499 sizeof(struct ipv6hdr)); 1500 goto emsgsize; 1501 } 1502 1503 if (ip6_sk_ignore_df(sk)) 1504 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; 1505 else 1506 maxnonfragsize = mtu; 1507 1508 if (cork->length + length > maxnonfragsize - headersize) { 1509 emsgsize: 1510 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0); 1511 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu); 1512 return -EMSGSIZE; 1513 } 1514 1515 /* CHECKSUM_PARTIAL only with no extension headers and when 1516 * we are not going to fragment 1517 */ 1518 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && 1519 headersize == sizeof(struct ipv6hdr) && 1520 length <= mtu - headersize && 1521 (!(flags & MSG_MORE) || cork->gso_size) && 1522 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) 1523 csummode = CHECKSUM_PARTIAL; 1524 1525 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) { 1526 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); 1527 if (!uarg) 1528 return -ENOBUFS; 1529 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ 1530 if (rt->dst.dev->features & NETIF_F_SG && 1531 csummode == CHECKSUM_PARTIAL) { 1532 paged = true; 1533 } else { 1534 uarg->zerocopy = 0; 1535 skb_zcopy_set(skb, uarg, &extra_uref); 1536 } 1537 } 1538 1539 /* 1540 * Let's try using as much space as possible. 1541 * Use MTU if total length of the message fits into the MTU. 1542 * Otherwise, we need to reserve fragment header and 1543 * fragment alignment (= 8-15 octects, in total). 1544 * 1545 * Note that we may need to "move" the data from the tail 1546 * of the buffer to the new fragment when we split 1547 * the message. 1548 * 1549 * FIXME: It may be fragmented into multiple chunks 1550 * at once if non-fragmentable extension headers 1551 * are too large. 1552 * --yoshfuji 1553 */ 1554 1555 cork->length += length; 1556 if (!skb) 1557 goto alloc_new_skb; 1558 1559 while (length > 0) { 1560 /* Check if the remaining data fits into current packet. */ 1561 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1562 if (copy < length) 1563 copy = maxfraglen - skb->len; 1564 1565 if (copy <= 0) { 1566 char *data; 1567 unsigned int datalen; 1568 unsigned int fraglen; 1569 unsigned int fraggap; 1570 unsigned int alloclen, alloc_extra; 1571 unsigned int pagedlen; 1572 alloc_new_skb: 1573 /* There's no room in the current skb */ 1574 if (skb) 1575 fraggap = skb->len - maxfraglen; 1576 else 1577 fraggap = 0; 1578 /* update mtu and maxfraglen if necessary */ 1579 if (!skb || !skb_prev) 1580 ip6_append_data_mtu(&mtu, &maxfraglen, 1581 fragheaderlen, skb, rt, 1582 orig_mtu); 1583 1584 skb_prev = skb; 1585 1586 /* 1587 * If remaining data exceeds the mtu, 1588 * we know we need more fragment(s). 1589 */ 1590 datalen = length + fraggap; 1591 1592 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1593 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; 1594 fraglen = datalen + fragheaderlen; 1595 pagedlen = 0; 1596 1597 alloc_extra = hh_len; 1598 alloc_extra += dst_exthdrlen; 1599 alloc_extra += rt->dst.trailer_len; 1600 1601 /* We just reserve space for fragment header. 1602 * Note: this may be overallocation if the message 1603 * (without MSG_MORE) fits into the MTU. 1604 */ 1605 alloc_extra += sizeof(struct frag_hdr); 1606 1607 if ((flags & MSG_MORE) && 1608 !(rt->dst.dev->features&NETIF_F_SG)) 1609 alloclen = mtu; 1610 else if (!paged && 1611 (fraglen + alloc_extra < SKB_MAX_ALLOC || 1612 !(rt->dst.dev->features & NETIF_F_SG))) 1613 alloclen = fraglen; 1614 else { 1615 alloclen = min_t(int, fraglen, MAX_HEADER); 1616 pagedlen = fraglen - alloclen; 1617 } 1618 alloclen += alloc_extra; 1619 1620 if (datalen != length + fraggap) { 1621 /* 1622 * this is not the last fragment, the trailer 1623 * space is regarded as data space. 1624 */ 1625 datalen += rt->dst.trailer_len; 1626 } 1627 1628 fraglen = datalen + fragheaderlen; 1629 1630 copy = datalen - transhdrlen - fraggap - pagedlen; 1631 if (copy < 0) { 1632 err = -EINVAL; 1633 goto error; 1634 } 1635 if (transhdrlen) { 1636 skb = sock_alloc_send_skb(sk, alloclen, 1637 (flags & MSG_DONTWAIT), &err); 1638 } else { 1639 skb = NULL; 1640 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 1641 2 * sk->sk_sndbuf) 1642 skb = alloc_skb(alloclen, 1643 sk->sk_allocation); 1644 if (unlikely(!skb)) 1645 err = -ENOBUFS; 1646 } 1647 if (!skb) 1648 goto error; 1649 /* 1650 * Fill in the control structures 1651 */ 1652 skb->protocol = htons(ETH_P_IPV6); 1653 skb->ip_summed = csummode; 1654 skb->csum = 0; 1655 /* reserve for fragmentation and ipsec header */ 1656 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 1657 dst_exthdrlen); 1658 1659 /* 1660 * Find where to start putting bytes 1661 */ 1662 data = skb_put(skb, fraglen - pagedlen); 1663 skb_set_network_header(skb, exthdrlen); 1664 data += fragheaderlen; 1665 skb->transport_header = (skb->network_header + 1666 fragheaderlen); 1667 if (fraggap) { 1668 skb->csum = skb_copy_and_csum_bits( 1669 skb_prev, maxfraglen, 1670 data + transhdrlen, fraggap); 1671 skb_prev->csum = csum_sub(skb_prev->csum, 1672 skb->csum); 1673 data += fraggap; 1674 pskb_trim_unique(skb_prev, maxfraglen); 1675 } 1676 if (copy > 0 && 1677 getfrag(from, data + transhdrlen, offset, 1678 copy, fraggap, skb) < 0) { 1679 err = -EFAULT; 1680 kfree_skb(skb); 1681 goto error; 1682 } 1683 1684 offset += copy; 1685 length -= copy + transhdrlen; 1686 transhdrlen = 0; 1687 exthdrlen = 0; 1688 dst_exthdrlen = 0; 1689 1690 /* Only the initial fragment is time stamped */ 1691 skb_shinfo(skb)->tx_flags = cork->tx_flags; 1692 cork->tx_flags = 0; 1693 skb_shinfo(skb)->tskey = tskey; 1694 tskey = 0; 1695 skb_zcopy_set(skb, uarg, &extra_uref); 1696 1697 if ((flags & MSG_CONFIRM) && !skb_prev) 1698 skb_set_dst_pending_confirm(skb, 1); 1699 1700 /* 1701 * Put the packet on the pending queue 1702 */ 1703 if (!skb->destructor) { 1704 skb->destructor = sock_wfree; 1705 skb->sk = sk; 1706 wmem_alloc_delta += skb->truesize; 1707 } 1708 __skb_queue_tail(queue, skb); 1709 continue; 1710 } 1711 1712 if (copy > length) 1713 copy = length; 1714 1715 if (!(rt->dst.dev->features&NETIF_F_SG) && 1716 skb_tailroom(skb) >= copy) { 1717 unsigned int off; 1718 1719 off = skb->len; 1720 if (getfrag(from, skb_put(skb, copy), 1721 offset, copy, off, skb) < 0) { 1722 __skb_trim(skb, off); 1723 err = -EFAULT; 1724 goto error; 1725 } 1726 } else if (!uarg || !uarg->zerocopy) { 1727 int i = skb_shinfo(skb)->nr_frags; 1728 1729 err = -ENOMEM; 1730 if (!sk_page_frag_refill(sk, pfrag)) 1731 goto error; 1732 1733 if (!skb_can_coalesce(skb, i, pfrag->page, 1734 pfrag->offset)) { 1735 err = -EMSGSIZE; 1736 if (i == MAX_SKB_FRAGS) 1737 goto error; 1738 1739 __skb_fill_page_desc(skb, i, pfrag->page, 1740 pfrag->offset, 0); 1741 skb_shinfo(skb)->nr_frags = ++i; 1742 get_page(pfrag->page); 1743 } 1744 copy = min_t(int, copy, pfrag->size - pfrag->offset); 1745 if (getfrag(from, 1746 page_address(pfrag->page) + pfrag->offset, 1747 offset, copy, skb->len, skb) < 0) 1748 goto error_efault; 1749 1750 pfrag->offset += copy; 1751 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1752 skb->len += copy; 1753 skb->data_len += copy; 1754 skb->truesize += copy; 1755 wmem_alloc_delta += copy; 1756 } else { 1757 err = skb_zerocopy_iter_dgram(skb, from, copy); 1758 if (err < 0) 1759 goto error; 1760 } 1761 offset += copy; 1762 length -= copy; 1763 } 1764 1765 if (wmem_alloc_delta) 1766 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1767 return 0; 1768 1769 error_efault: 1770 err = -EFAULT; 1771 error: 1772 net_zcopy_put_abort(uarg, extra_uref); 1773 cork->length -= length; 1774 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1775 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1776 return err; 1777 } 1778 1779 int ip6_append_data(struct sock *sk, 1780 int getfrag(void *from, char *to, int offset, int len, 1781 int odd, struct sk_buff *skb), 1782 void *from, int length, int transhdrlen, 1783 struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 1784 struct rt6_info *rt, unsigned int flags) 1785 { 1786 struct inet_sock *inet = inet_sk(sk); 1787 struct ipv6_pinfo *np = inet6_sk(sk); 1788 int exthdrlen; 1789 int err; 1790 1791 if (flags&MSG_PROBE) 1792 return 0; 1793 if (skb_queue_empty(&sk->sk_write_queue)) { 1794 /* 1795 * setup for corking 1796 */ 1797 dst_hold(&rt->dst); 1798 err = ip6_setup_cork(sk, &inet->cork, &np->cork, 1799 ipc6, rt); 1800 if (err) 1801 return err; 1802 1803 inet->cork.fl.u.ip6 = *fl6; 1804 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 1805 length += exthdrlen; 1806 transhdrlen += exthdrlen; 1807 } else { 1808 transhdrlen = 0; 1809 } 1810 1811 return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork, 1812 &np->cork, sk_page_frag(sk), getfrag, 1813 from, length, transhdrlen, flags, ipc6); 1814 } 1815 EXPORT_SYMBOL_GPL(ip6_append_data); 1816 1817 static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork) 1818 { 1819 struct dst_entry *dst = cork->base.dst; 1820 1821 cork->base.dst = NULL; 1822 cork->base.flags &= ~IPCORK_ALLFRAG; 1823 skb_dst_set(skb, dst); 1824 } 1825 1826 static void ip6_cork_release(struct inet_cork_full *cork, 1827 struct inet6_cork *v6_cork) 1828 { 1829 if (v6_cork->opt) { 1830 struct ipv6_txoptions *opt = v6_cork->opt; 1831 1832 kfree(opt->dst0opt); 1833 kfree(opt->dst1opt); 1834 kfree(opt->hopopt); 1835 kfree(opt->srcrt); 1836 kfree(opt); 1837 v6_cork->opt = NULL; 1838 } 1839 1840 if (cork->base.dst) { 1841 dst_release(cork->base.dst); 1842 cork->base.dst = NULL; 1843 cork->base.flags &= ~IPCORK_ALLFRAG; 1844 } 1845 } 1846 1847 struct sk_buff *__ip6_make_skb(struct sock *sk, 1848 struct sk_buff_head *queue, 1849 struct inet_cork_full *cork, 1850 struct inet6_cork *v6_cork) 1851 { 1852 struct sk_buff *skb, *tmp_skb; 1853 struct sk_buff **tail_skb; 1854 struct in6_addr *final_dst; 1855 struct ipv6_pinfo *np = inet6_sk(sk); 1856 struct net *net = sock_net(sk); 1857 struct ipv6hdr *hdr; 1858 struct ipv6_txoptions *opt = v6_cork->opt; 1859 struct rt6_info *rt = (struct rt6_info *)cork->base.dst; 1860 struct flowi6 *fl6 = &cork->fl.u.ip6; 1861 unsigned char proto = fl6->flowi6_proto; 1862 1863 skb = __skb_dequeue(queue); 1864 if (!skb) 1865 goto out; 1866 tail_skb = &(skb_shinfo(skb)->frag_list); 1867 1868 /* move skb->data to ip header from ext header */ 1869 if (skb->data < skb_network_header(skb)) 1870 __skb_pull(skb, skb_network_offset(skb)); 1871 while ((tmp_skb = __skb_dequeue(queue)) != NULL) { 1872 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1873 *tail_skb = tmp_skb; 1874 tail_skb = &(tmp_skb->next); 1875 skb->len += tmp_skb->len; 1876 skb->data_len += tmp_skb->len; 1877 skb->truesize += tmp_skb->truesize; 1878 tmp_skb->destructor = NULL; 1879 tmp_skb->sk = NULL; 1880 } 1881 1882 /* Allow local fragmentation. */ 1883 skb->ignore_df = ip6_sk_ignore_df(sk); 1884 __skb_pull(skb, skb_network_header_len(skb)); 1885 1886 final_dst = &fl6->daddr; 1887 if (opt && opt->opt_flen) 1888 ipv6_push_frag_opts(skb, opt, &proto); 1889 if (opt && opt->opt_nflen) 1890 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); 1891 1892 skb_push(skb, sizeof(struct ipv6hdr)); 1893 skb_reset_network_header(skb); 1894 hdr = ipv6_hdr(skb); 1895 1896 ip6_flow_hdr(hdr, v6_cork->tclass, 1897 ip6_make_flowlabel(net, skb, fl6->flowlabel, 1898 ip6_autoflowlabel(net, np), fl6)); 1899 hdr->hop_limit = v6_cork->hop_limit; 1900 hdr->nexthdr = proto; 1901 hdr->saddr = fl6->saddr; 1902 hdr->daddr = *final_dst; 1903 1904 skb->priority = sk->sk_priority; 1905 skb->mark = cork->base.mark; 1906 skb->tstamp = cork->base.transmit_time; 1907 1908 ip6_cork_steal_dst(skb, cork); 1909 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1910 if (proto == IPPROTO_ICMPV6) { 1911 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1912 1913 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); 1914 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 1915 } 1916 1917 ip6_cork_release(cork, v6_cork); 1918 out: 1919 return skb; 1920 } 1921 1922 int ip6_send_skb(struct sk_buff *skb) 1923 { 1924 struct net *net = sock_net(skb->sk); 1925 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 1926 int err; 1927 1928 err = ip6_local_out(net, skb->sk, skb); 1929 if (err) { 1930 if (err > 0) 1931 err = net_xmit_errno(err); 1932 if (err) 1933 IP6_INC_STATS(net, rt->rt6i_idev, 1934 IPSTATS_MIB_OUTDISCARDS); 1935 } 1936 1937 return err; 1938 } 1939 1940 int ip6_push_pending_frames(struct sock *sk) 1941 { 1942 struct sk_buff *skb; 1943 1944 skb = ip6_finish_skb(sk); 1945 if (!skb) 1946 return 0; 1947 1948 return ip6_send_skb(skb); 1949 } 1950 EXPORT_SYMBOL_GPL(ip6_push_pending_frames); 1951 1952 static void __ip6_flush_pending_frames(struct sock *sk, 1953 struct sk_buff_head *queue, 1954 struct inet_cork_full *cork, 1955 struct inet6_cork *v6_cork) 1956 { 1957 struct sk_buff *skb; 1958 1959 while ((skb = __skb_dequeue_tail(queue)) != NULL) { 1960 if (skb_dst(skb)) 1961 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1962 IPSTATS_MIB_OUTDISCARDS); 1963 kfree_skb(skb); 1964 } 1965 1966 ip6_cork_release(cork, v6_cork); 1967 } 1968 1969 void ip6_flush_pending_frames(struct sock *sk) 1970 { 1971 __ip6_flush_pending_frames(sk, &sk->sk_write_queue, 1972 &inet_sk(sk)->cork, &inet6_sk(sk)->cork); 1973 } 1974 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); 1975 1976 struct sk_buff *ip6_make_skb(struct sock *sk, 1977 int getfrag(void *from, char *to, int offset, 1978 int len, int odd, struct sk_buff *skb), 1979 void *from, int length, int transhdrlen, 1980 struct ipcm6_cookie *ipc6, struct rt6_info *rt, 1981 unsigned int flags, struct inet_cork_full *cork) 1982 { 1983 struct inet6_cork v6_cork; 1984 struct sk_buff_head queue; 1985 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 1986 int err; 1987 1988 if (flags & MSG_PROBE) { 1989 dst_release(&rt->dst); 1990 return NULL; 1991 } 1992 1993 __skb_queue_head_init(&queue); 1994 1995 cork->base.flags = 0; 1996 cork->base.addr = 0; 1997 cork->base.opt = NULL; 1998 v6_cork.opt = NULL; 1999 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt); 2000 if (err) { 2001 ip6_cork_release(cork, &v6_cork); 2002 return ERR_PTR(err); 2003 } 2004 if (ipc6->dontfrag < 0) 2005 ipc6->dontfrag = inet6_sk(sk)->dontfrag; 2006 2007 err = __ip6_append_data(sk, &queue, cork, &v6_cork, 2008 ¤t->task_frag, getfrag, from, 2009 length + exthdrlen, transhdrlen + exthdrlen, 2010 flags, ipc6); 2011 if (err) { 2012 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); 2013 return ERR_PTR(err); 2014 } 2015 2016 return __ip6_make_skb(sk, &queue, cork, &v6_cork); 2017 } 2018