1 /* 2 * IPv6 output functions 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * Based on linux/net/ipv4/ip_output.c 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 * 15 * Changes: 16 * A.N.Kuznetsov : airthmetics in fragmentation. 17 * extension headers are implemented. 18 * route changes now work. 19 * ip6_forward does not confuse sniffers. 20 * etc. 21 * 22 * H. von Brand : Added missing #include <linux/string.h> 23 * Imran Patel : frag id should be in NBO 24 * Kazunori MIYAZAWA @USAGI 25 * : add ip6_append_data and related functions 26 * for datagram xmit 27 */ 28 29 #include <linux/errno.h> 30 #include <linux/kernel.h> 31 #include <linux/string.h> 32 #include <linux/socket.h> 33 #include <linux/net.h> 34 #include <linux/netdevice.h> 35 #include <linux/if_arp.h> 36 #include <linux/in6.h> 37 #include <linux/tcp.h> 38 #include <linux/route.h> 39 #include <linux/module.h> 40 #include <linux/slab.h> 41 42 #include <linux/netfilter.h> 43 #include <linux/netfilter_ipv6.h> 44 45 #include <net/sock.h> 46 #include <net/snmp.h> 47 48 #include <net/ipv6.h> 49 #include <net/ndisc.h> 50 #include <net/protocol.h> 51 #include <net/ip6_route.h> 52 #include <net/addrconf.h> 53 #include <net/rawv6.h> 54 #include <net/icmp.h> 55 #include <net/xfrm.h> 56 #include <net/checksum.h> 57 #include <linux/mroute6.h> 58 59 static int ip6_finish_output2(struct sk_buff *skb) 60 { 61 struct dst_entry *dst = skb_dst(skb); 62 struct net_device *dev = dst->dev; 63 struct neighbour *neigh; 64 struct in6_addr *nexthop; 65 int ret; 66 67 skb->protocol = htons(ETH_P_IPV6); 68 skb->dev = dev; 69 70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 72 73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && 74 ((mroute6_socket(dev_net(dev), skb) && 75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 77 &ipv6_hdr(skb)->saddr))) { 78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 79 80 /* Do not check for IFF_ALLMULTI; multicast routing 81 is not supported in any case. 82 */ 83 if (newskb) 84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 85 newskb, NULL, newskb->dev, 86 dev_loopback_xmit); 87 88 if (ipv6_hdr(skb)->hop_limit == 0) { 89 IP6_INC_STATS(dev_net(dev), idev, 90 IPSTATS_MIB_OUTDISCARDS); 91 kfree_skb(skb); 92 return 0; 93 } 94 } 95 96 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, 97 skb->len); 98 99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= 100 IPV6_ADDR_SCOPE_NODELOCAL && 101 !(dev->flags & IFF_LOOPBACK)) { 102 kfree_skb(skb); 103 return 0; 104 } 105 } 106 107 rcu_read_lock_bh(); 108 nexthop = rt6_nexthop((struct rt6_info *)dst); 109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); 110 if (unlikely(!neigh)) 111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); 112 if (!IS_ERR(neigh)) { 113 ret = dst_neigh_output(dst, neigh, skb); 114 rcu_read_unlock_bh(); 115 return ret; 116 } 117 rcu_read_unlock_bh(); 118 119 IP6_INC_STATS(dev_net(dst->dev), 120 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 121 kfree_skb(skb); 122 return -EINVAL; 123 } 124 125 static int ip6_finish_output(struct sk_buff *skb) 126 { 127 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 128 dst_allfrag(skb_dst(skb)) || 129 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) 130 return ip6_fragment(skb, ip6_finish_output2); 131 else 132 return ip6_finish_output2(skb); 133 } 134 135 int ip6_output(struct sock *sk, struct sk_buff *skb) 136 { 137 struct net_device *dev = skb_dst(skb)->dev; 138 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 139 if (unlikely(idev->cnf.disable_ipv6)) { 140 IP6_INC_STATS(dev_net(dev), idev, 141 IPSTATS_MIB_OUTDISCARDS); 142 kfree_skb(skb); 143 return 0; 144 } 145 146 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, 147 ip6_finish_output, 148 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 149 } 150 151 /* 152 * xmit an sk_buff (used by TCP, SCTP and DCCP) 153 */ 154 155 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 156 struct ipv6_txoptions *opt, int tclass) 157 { 158 struct net *net = sock_net(sk); 159 struct ipv6_pinfo *np = inet6_sk(sk); 160 struct in6_addr *first_hop = &fl6->daddr; 161 struct dst_entry *dst = skb_dst(skb); 162 struct ipv6hdr *hdr; 163 u8 proto = fl6->flowi6_proto; 164 int seg_len = skb->len; 165 int hlimit = -1; 166 u32 mtu; 167 168 if (opt) { 169 unsigned int head_room; 170 171 /* First: exthdrs may take lots of space (~8K for now) 172 MAX_HEADER is not enough. 173 */ 174 head_room = opt->opt_nflen + opt->opt_flen; 175 seg_len += head_room; 176 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 177 178 if (skb_headroom(skb) < head_room) { 179 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 180 if (skb2 == NULL) { 181 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 182 IPSTATS_MIB_OUTDISCARDS); 183 kfree_skb(skb); 184 return -ENOBUFS; 185 } 186 consume_skb(skb); 187 skb = skb2; 188 skb_set_owner_w(skb, sk); 189 } 190 if (opt->opt_flen) 191 ipv6_push_frag_opts(skb, opt, &proto); 192 if (opt->opt_nflen) 193 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 194 } 195 196 skb_push(skb, sizeof(struct ipv6hdr)); 197 skb_reset_network_header(skb); 198 hdr = ipv6_hdr(skb); 199 200 /* 201 * Fill in the IPv6 header 202 */ 203 if (np) 204 hlimit = np->hop_limit; 205 if (hlimit < 0) 206 hlimit = ip6_dst_hoplimit(dst); 207 208 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 209 np->autoflowlabel)); 210 211 hdr->payload_len = htons(seg_len); 212 hdr->nexthdr = proto; 213 hdr->hop_limit = hlimit; 214 215 hdr->saddr = fl6->saddr; 216 hdr->daddr = *first_hop; 217 218 skb->protocol = htons(ETH_P_IPV6); 219 skb->priority = sk->sk_priority; 220 skb->mark = sk->sk_mark; 221 222 mtu = dst_mtu(dst); 223 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { 224 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 225 IPSTATS_MIB_OUT, skb->len); 226 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 227 dst->dev, dst_output); 228 } 229 230 skb->dev = dst->dev; 231 ipv6_local_error(sk, EMSGSIZE, fl6, mtu); 232 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 233 kfree_skb(skb); 234 return -EMSGSIZE; 235 } 236 EXPORT_SYMBOL(ip6_xmit); 237 238 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 239 { 240 struct ip6_ra_chain *ra; 241 struct sock *last = NULL; 242 243 read_lock(&ip6_ra_lock); 244 for (ra = ip6_ra_chain; ra; ra = ra->next) { 245 struct sock *sk = ra->sk; 246 if (sk && ra->sel == sel && 247 (!sk->sk_bound_dev_if || 248 sk->sk_bound_dev_if == skb->dev->ifindex)) { 249 if (last) { 250 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 251 if (skb2) 252 rawv6_rcv(last, skb2); 253 } 254 last = sk; 255 } 256 } 257 258 if (last) { 259 rawv6_rcv(last, skb); 260 read_unlock(&ip6_ra_lock); 261 return 1; 262 } 263 read_unlock(&ip6_ra_lock); 264 return 0; 265 } 266 267 static int ip6_forward_proxy_check(struct sk_buff *skb) 268 { 269 struct ipv6hdr *hdr = ipv6_hdr(skb); 270 u8 nexthdr = hdr->nexthdr; 271 __be16 frag_off; 272 int offset; 273 274 if (ipv6_ext_hdr(nexthdr)) { 275 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 276 if (offset < 0) 277 return 0; 278 } else 279 offset = sizeof(struct ipv6hdr); 280 281 if (nexthdr == IPPROTO_ICMPV6) { 282 struct icmp6hdr *icmp6; 283 284 if (!pskb_may_pull(skb, (skb_network_header(skb) + 285 offset + 1 - skb->data))) 286 return 0; 287 288 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 289 290 switch (icmp6->icmp6_type) { 291 case NDISC_ROUTER_SOLICITATION: 292 case NDISC_ROUTER_ADVERTISEMENT: 293 case NDISC_NEIGHBOUR_SOLICITATION: 294 case NDISC_NEIGHBOUR_ADVERTISEMENT: 295 case NDISC_REDIRECT: 296 /* For reaction involving unicast neighbor discovery 297 * message destined to the proxied address, pass it to 298 * input function. 299 */ 300 return 1; 301 default: 302 break; 303 } 304 } 305 306 /* 307 * The proxying router can't forward traffic sent to a link-local 308 * address, so signal the sender and discard the packet. This 309 * behavior is clarified by the MIPv6 specification. 310 */ 311 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 312 dst_link_failure(skb); 313 return -1; 314 } 315 316 return 0; 317 } 318 319 static inline int ip6_forward_finish(struct sk_buff *skb) 320 { 321 skb_sender_cpu_clear(skb); 322 return dst_output(skb); 323 } 324 325 static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) 326 { 327 unsigned int mtu; 328 struct inet6_dev *idev; 329 330 if (dst_metric_locked(dst, RTAX_MTU)) { 331 mtu = dst_metric_raw(dst, RTAX_MTU); 332 if (mtu) 333 return mtu; 334 } 335 336 mtu = IPV6_MIN_MTU; 337 rcu_read_lock(); 338 idev = __in6_dev_get(dst->dev); 339 if (idev) 340 mtu = idev->cnf.mtu6; 341 rcu_read_unlock(); 342 343 return mtu; 344 } 345 346 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 347 { 348 if (skb->len <= mtu) 349 return false; 350 351 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ 352 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) 353 return true; 354 355 if (skb->ignore_df) 356 return false; 357 358 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) 359 return false; 360 361 return true; 362 } 363 364 int ip6_forward(struct sk_buff *skb) 365 { 366 struct dst_entry *dst = skb_dst(skb); 367 struct ipv6hdr *hdr = ipv6_hdr(skb); 368 struct inet6_skb_parm *opt = IP6CB(skb); 369 struct net *net = dev_net(dst->dev); 370 u32 mtu; 371 372 if (net->ipv6.devconf_all->forwarding == 0) 373 goto error; 374 375 if (skb->pkt_type != PACKET_HOST) 376 goto drop; 377 378 if (skb_warn_if_lro(skb)) 379 goto drop; 380 381 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 382 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 383 IPSTATS_MIB_INDISCARDS); 384 goto drop; 385 } 386 387 skb_forward_csum(skb); 388 389 /* 390 * We DO NOT make any processing on 391 * RA packets, pushing them to user level AS IS 392 * without ane WARRANTY that application will be able 393 * to interpret them. The reason is that we 394 * cannot make anything clever here. 395 * 396 * We are not end-node, so that if packet contains 397 * AH/ESP, we cannot make anything. 398 * Defragmentation also would be mistake, RA packets 399 * cannot be fragmented, because there is no warranty 400 * that different fragments will go along one path. --ANK 401 */ 402 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { 403 if (ip6_call_ra_chain(skb, ntohs(opt->ra))) 404 return 0; 405 } 406 407 /* 408 * check and decrement ttl 409 */ 410 if (hdr->hop_limit <= 1) { 411 /* Force OUTPUT device used as source address */ 412 skb->dev = dst->dev; 413 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 414 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 415 IPSTATS_MIB_INHDRERRORS); 416 417 kfree_skb(skb); 418 return -ETIMEDOUT; 419 } 420 421 /* XXX: idev->cnf.proxy_ndp? */ 422 if (net->ipv6.devconf_all->proxy_ndp && 423 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 424 int proxied = ip6_forward_proxy_check(skb); 425 if (proxied > 0) 426 return ip6_input(skb); 427 else if (proxied < 0) { 428 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 429 IPSTATS_MIB_INDISCARDS); 430 goto drop; 431 } 432 } 433 434 if (!xfrm6_route_forward(skb)) { 435 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 436 IPSTATS_MIB_INDISCARDS); 437 goto drop; 438 } 439 dst = skb_dst(skb); 440 441 /* IPv6 specs say nothing about it, but it is clear that we cannot 442 send redirects to source routed frames. 443 We don't send redirects to frames decapsulated from IPsec. 444 */ 445 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) { 446 struct in6_addr *target = NULL; 447 struct inet_peer *peer; 448 struct rt6_info *rt; 449 450 /* 451 * incoming and outgoing devices are the same 452 * send a redirect. 453 */ 454 455 rt = (struct rt6_info *) dst; 456 if (rt->rt6i_flags & RTF_GATEWAY) 457 target = &rt->rt6i_gateway; 458 else 459 target = &hdr->daddr; 460 461 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); 462 463 /* Limit redirects both by destination (here) 464 and by source (inside ndisc_send_redirect) 465 */ 466 if (inet_peer_xrlim_allow(peer, 1*HZ)) 467 ndisc_send_redirect(skb, target); 468 if (peer) 469 inet_putpeer(peer); 470 } else { 471 int addrtype = ipv6_addr_type(&hdr->saddr); 472 473 /* This check is security critical. */ 474 if (addrtype == IPV6_ADDR_ANY || 475 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 476 goto error; 477 if (addrtype & IPV6_ADDR_LINKLOCAL) { 478 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 479 ICMPV6_NOT_NEIGHBOUR, 0); 480 goto error; 481 } 482 } 483 484 mtu = ip6_dst_mtu_forward(dst); 485 if (mtu < IPV6_MIN_MTU) 486 mtu = IPV6_MIN_MTU; 487 488 if (ip6_pkt_too_big(skb, mtu)) { 489 /* Again, force OUTPUT device used as source address */ 490 skb->dev = dst->dev; 491 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 492 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 493 IPSTATS_MIB_INTOOBIGERRORS); 494 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 495 IPSTATS_MIB_FRAGFAILS); 496 kfree_skb(skb); 497 return -EMSGSIZE; 498 } 499 500 if (skb_cow(skb, dst->dev->hard_header_len)) { 501 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 502 IPSTATS_MIB_OUTDISCARDS); 503 goto drop; 504 } 505 506 hdr = ipv6_hdr(skb); 507 508 /* Mangling hops number delayed to point after skb COW */ 509 510 hdr->hop_limit--; 511 512 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 513 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); 514 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, 515 ip6_forward_finish); 516 517 error: 518 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 519 drop: 520 kfree_skb(skb); 521 return -EINVAL; 522 } 523 524 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 525 { 526 to->pkt_type = from->pkt_type; 527 to->priority = from->priority; 528 to->protocol = from->protocol; 529 skb_dst_drop(to); 530 skb_dst_set(to, dst_clone(skb_dst(from))); 531 to->dev = from->dev; 532 to->mark = from->mark; 533 534 #ifdef CONFIG_NET_SCHED 535 to->tc_index = from->tc_index; 536 #endif 537 nf_copy(to, from); 538 skb_copy_secmark(to, from); 539 } 540 541 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 542 { 543 struct sk_buff *frag; 544 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 545 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? 546 inet6_sk(skb->sk) : NULL; 547 struct ipv6hdr *tmp_hdr; 548 struct frag_hdr *fh; 549 unsigned int mtu, hlen, left, len; 550 int hroom, troom; 551 __be32 frag_id = 0; 552 int ptr, offset = 0, err = 0; 553 u8 *prevhdr, nexthdr = 0; 554 struct net *net = dev_net(skb_dst(skb)->dev); 555 556 hlen = ip6_find_1stfragopt(skb, &prevhdr); 557 nexthdr = *prevhdr; 558 559 mtu = ip6_skb_dst_mtu(skb); 560 561 /* We must not fragment if the socket is set to force MTU discovery 562 * or if the skb it not generated by a local socket. 563 */ 564 if (unlikely(!skb->ignore_df && skb->len > mtu) || 565 (IP6CB(skb)->frag_max_size && 566 IP6CB(skb)->frag_max_size > mtu)) { 567 if (skb->sk && dst_allfrag(skb_dst(skb))) 568 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); 569 570 skb->dev = skb_dst(skb)->dev; 571 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 572 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 573 IPSTATS_MIB_FRAGFAILS); 574 kfree_skb(skb); 575 return -EMSGSIZE; 576 } 577 578 if (np && np->frag_size < mtu) { 579 if (np->frag_size) 580 mtu = np->frag_size; 581 } 582 mtu -= hlen + sizeof(struct frag_hdr); 583 584 if (skb_has_frag_list(skb)) { 585 int first_len = skb_pagelen(skb); 586 struct sk_buff *frag2; 587 588 if (first_len - hlen > mtu || 589 ((first_len - hlen) & 7) || 590 skb_cloned(skb)) 591 goto slow_path; 592 593 skb_walk_frags(skb, frag) { 594 /* Correct geometry. */ 595 if (frag->len > mtu || 596 ((frag->len & 7) && frag->next) || 597 skb_headroom(frag) < hlen) 598 goto slow_path_clean; 599 600 /* Partially cloned skb? */ 601 if (skb_shared(frag)) 602 goto slow_path_clean; 603 604 BUG_ON(frag->sk); 605 if (skb->sk) { 606 frag->sk = skb->sk; 607 frag->destructor = sock_wfree; 608 } 609 skb->truesize -= frag->truesize; 610 } 611 612 err = 0; 613 offset = 0; 614 frag = skb_shinfo(skb)->frag_list; 615 skb_frag_list_init(skb); 616 /* BUILD HEADER */ 617 618 *prevhdr = NEXTHDR_FRAGMENT; 619 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 620 if (!tmp_hdr) { 621 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 622 IPSTATS_MIB_FRAGFAILS); 623 return -ENOMEM; 624 } 625 626 __skb_pull(skb, hlen); 627 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr)); 628 __skb_push(skb, hlen); 629 skb_reset_network_header(skb); 630 memcpy(skb_network_header(skb), tmp_hdr, hlen); 631 632 ipv6_select_ident(fh, rt); 633 fh->nexthdr = nexthdr; 634 fh->reserved = 0; 635 fh->frag_off = htons(IP6_MF); 636 frag_id = fh->identification; 637 638 first_len = skb_pagelen(skb); 639 skb->data_len = first_len - skb_headlen(skb); 640 skb->len = first_len; 641 ipv6_hdr(skb)->payload_len = htons(first_len - 642 sizeof(struct ipv6hdr)); 643 644 dst_hold(&rt->dst); 645 646 for (;;) { 647 /* Prepare header of the next frame, 648 * before previous one went down. */ 649 if (frag) { 650 frag->ip_summed = CHECKSUM_NONE; 651 skb_reset_transport_header(frag); 652 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr)); 653 __skb_push(frag, hlen); 654 skb_reset_network_header(frag); 655 memcpy(skb_network_header(frag), tmp_hdr, 656 hlen); 657 offset += skb->len - hlen - sizeof(struct frag_hdr); 658 fh->nexthdr = nexthdr; 659 fh->reserved = 0; 660 fh->frag_off = htons(offset); 661 if (frag->next != NULL) 662 fh->frag_off |= htons(IP6_MF); 663 fh->identification = frag_id; 664 ipv6_hdr(frag)->payload_len = 665 htons(frag->len - 666 sizeof(struct ipv6hdr)); 667 ip6_copy_metadata(frag, skb); 668 } 669 670 err = output(skb); 671 if (!err) 672 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 673 IPSTATS_MIB_FRAGCREATES); 674 675 if (err || !frag) 676 break; 677 678 skb = frag; 679 frag = skb->next; 680 skb->next = NULL; 681 } 682 683 kfree(tmp_hdr); 684 685 if (err == 0) { 686 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 687 IPSTATS_MIB_FRAGOKS); 688 ip6_rt_put(rt); 689 return 0; 690 } 691 692 kfree_skb_list(frag); 693 694 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 695 IPSTATS_MIB_FRAGFAILS); 696 ip6_rt_put(rt); 697 return err; 698 699 slow_path_clean: 700 skb_walk_frags(skb, frag2) { 701 if (frag2 == frag) 702 break; 703 frag2->sk = NULL; 704 frag2->destructor = NULL; 705 skb->truesize += frag2->truesize; 706 } 707 } 708 709 slow_path: 710 if ((skb->ip_summed == CHECKSUM_PARTIAL) && 711 skb_checksum_help(skb)) 712 goto fail; 713 714 left = skb->len - hlen; /* Space per frame */ 715 ptr = hlen; /* Where to start from */ 716 717 /* 718 * Fragment the datagram. 719 */ 720 721 *prevhdr = NEXTHDR_FRAGMENT; 722 hroom = LL_RESERVED_SPACE(rt->dst.dev); 723 troom = rt->dst.dev->needed_tailroom; 724 725 /* 726 * Keep copying data until we run out. 727 */ 728 while (left > 0) { 729 len = left; 730 /* IF: it doesn't fit, use 'mtu' - the data space left */ 731 if (len > mtu) 732 len = mtu; 733 /* IF: we are not sending up to and including the packet end 734 then align the next start on an eight byte boundary */ 735 if (len < left) { 736 len &= ~7; 737 } 738 739 /* Allocate buffer */ 740 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + 741 hroom + troom, GFP_ATOMIC); 742 if (!frag) { 743 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 744 IPSTATS_MIB_FRAGFAILS); 745 err = -ENOMEM; 746 goto fail; 747 } 748 749 /* 750 * Set up data on packet 751 */ 752 753 ip6_copy_metadata(frag, skb); 754 skb_reserve(frag, hroom); 755 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 756 skb_reset_network_header(frag); 757 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 758 frag->transport_header = (frag->network_header + hlen + 759 sizeof(struct frag_hdr)); 760 761 /* 762 * Charge the memory for the fragment to any owner 763 * it might possess 764 */ 765 if (skb->sk) 766 skb_set_owner_w(frag, skb->sk); 767 768 /* 769 * Copy the packet header into the new buffer. 770 */ 771 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); 772 773 /* 774 * Build fragment header. 775 */ 776 fh->nexthdr = nexthdr; 777 fh->reserved = 0; 778 if (!frag_id) { 779 ipv6_select_ident(fh, rt); 780 frag_id = fh->identification; 781 } else 782 fh->identification = frag_id; 783 784 /* 785 * Copy a block of the IP datagram. 786 */ 787 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag), 788 len)); 789 left -= len; 790 791 fh->frag_off = htons(offset); 792 if (left > 0) 793 fh->frag_off |= htons(IP6_MF); 794 ipv6_hdr(frag)->payload_len = htons(frag->len - 795 sizeof(struct ipv6hdr)); 796 797 ptr += len; 798 offset += len; 799 800 /* 801 * Put this fragment into the sending queue. 802 */ 803 err = output(frag); 804 if (err) 805 goto fail; 806 807 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 808 IPSTATS_MIB_FRAGCREATES); 809 } 810 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 811 IPSTATS_MIB_FRAGOKS); 812 consume_skb(skb); 813 return err; 814 815 fail: 816 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 817 IPSTATS_MIB_FRAGFAILS); 818 kfree_skb(skb); 819 return err; 820 } 821 822 static inline int ip6_rt_check(const struct rt6key *rt_key, 823 const struct in6_addr *fl_addr, 824 const struct in6_addr *addr_cache) 825 { 826 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 827 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); 828 } 829 830 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 831 struct dst_entry *dst, 832 const struct flowi6 *fl6) 833 { 834 struct ipv6_pinfo *np = inet6_sk(sk); 835 struct rt6_info *rt; 836 837 if (!dst) 838 goto out; 839 840 if (dst->ops->family != AF_INET6) { 841 dst_release(dst); 842 return NULL; 843 } 844 845 rt = (struct rt6_info *)dst; 846 /* Yes, checking route validity in not connected 847 * case is not very simple. Take into account, 848 * that we do not support routing by source, TOS, 849 * and MSG_DONTROUTE --ANK (980726) 850 * 851 * 1. ip6_rt_check(): If route was host route, 852 * check that cached destination is current. 853 * If it is network route, we still may 854 * check its validity using saved pointer 855 * to the last used address: daddr_cache. 856 * We do not want to save whole address now, 857 * (because main consumer of this service 858 * is tcp, which has not this problem), 859 * so that the last trick works only on connected 860 * sockets. 861 * 2. oif also should be the same. 862 */ 863 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 864 #ifdef CONFIG_IPV6_SUBTREES 865 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 866 #endif 867 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { 868 dst_release(dst); 869 dst = NULL; 870 } 871 872 out: 873 return dst; 874 } 875 876 static int ip6_dst_lookup_tail(struct sock *sk, 877 struct dst_entry **dst, struct flowi6 *fl6) 878 { 879 struct net *net = sock_net(sk); 880 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 881 struct neighbour *n; 882 struct rt6_info *rt; 883 #endif 884 int err; 885 886 if (*dst == NULL) 887 *dst = ip6_route_output(net, sk, fl6); 888 889 err = (*dst)->error; 890 if (err) 891 goto out_err_release; 892 893 if (ipv6_addr_any(&fl6->saddr)) { 894 struct rt6_info *rt = (struct rt6_info *) *dst; 895 err = ip6_route_get_saddr(net, rt, &fl6->daddr, 896 sk ? inet6_sk(sk)->srcprefs : 0, 897 &fl6->saddr); 898 if (err) 899 goto out_err_release; 900 } 901 902 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 903 /* 904 * Here if the dst entry we've looked up 905 * has a neighbour entry that is in the INCOMPLETE 906 * state and the src address from the flow is 907 * marked as OPTIMISTIC, we release the found 908 * dst entry and replace it instead with the 909 * dst entry of the nexthop router 910 */ 911 rt = (struct rt6_info *) *dst; 912 rcu_read_lock_bh(); 913 n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); 914 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; 915 rcu_read_unlock_bh(); 916 917 if (err) { 918 struct inet6_ifaddr *ifp; 919 struct flowi6 fl_gw6; 920 int redirect; 921 922 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 923 (*dst)->dev, 1); 924 925 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 926 if (ifp) 927 in6_ifa_put(ifp); 928 929 if (redirect) { 930 /* 931 * We need to get the dst entry for the 932 * default router instead 933 */ 934 dst_release(*dst); 935 memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 936 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 937 *dst = ip6_route_output(net, sk, &fl_gw6); 938 err = (*dst)->error; 939 if (err) 940 goto out_err_release; 941 } 942 } 943 #endif 944 945 return 0; 946 947 out_err_release: 948 if (err == -ENETUNREACH) 949 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); 950 dst_release(*dst); 951 *dst = NULL; 952 return err; 953 } 954 955 /** 956 * ip6_dst_lookup - perform route lookup on flow 957 * @sk: socket which provides route info 958 * @dst: pointer to dst_entry * for result 959 * @fl6: flow to lookup 960 * 961 * This function performs a route lookup on the given flow. 962 * 963 * It returns zero on success, or a standard errno code on error. 964 */ 965 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) 966 { 967 *dst = NULL; 968 return ip6_dst_lookup_tail(sk, dst, fl6); 969 } 970 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 971 972 /** 973 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 974 * @sk: socket which provides route info 975 * @fl6: flow to lookup 976 * @final_dst: final destination address for ipsec lookup 977 * 978 * This function performs a route lookup on the given flow. 979 * 980 * It returns a valid dst pointer on success, or a pointer encoded 981 * error code. 982 */ 983 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 984 const struct in6_addr *final_dst) 985 { 986 struct dst_entry *dst = NULL; 987 int err; 988 989 err = ip6_dst_lookup_tail(sk, &dst, fl6); 990 if (err) 991 return ERR_PTR(err); 992 if (final_dst) 993 fl6->daddr = *final_dst; 994 995 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); 996 } 997 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 998 999 /** 1000 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 1001 * @sk: socket which provides the dst cache and route info 1002 * @fl6: flow to lookup 1003 * @final_dst: final destination address for ipsec lookup 1004 * 1005 * This function performs a route lookup on the given flow with the 1006 * possibility of using the cached route in the socket if it is valid. 1007 * It will take the socket dst lock when operating on the dst cache. 1008 * As a result, this function can only be used in process context. 1009 * 1010 * It returns a valid dst pointer on success, or a pointer encoded 1011 * error code. 1012 */ 1013 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1014 const struct in6_addr *final_dst) 1015 { 1016 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1017 int err; 1018 1019 dst = ip6_sk_dst_check(sk, dst, fl6); 1020 1021 err = ip6_dst_lookup_tail(sk, &dst, fl6); 1022 if (err) 1023 return ERR_PTR(err); 1024 if (final_dst) 1025 fl6->daddr = *final_dst; 1026 1027 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); 1028 } 1029 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1030 1031 static inline int ip6_ufo_append_data(struct sock *sk, 1032 struct sk_buff_head *queue, 1033 int getfrag(void *from, char *to, int offset, int len, 1034 int odd, struct sk_buff *skb), 1035 void *from, int length, int hh_len, int fragheaderlen, 1036 int transhdrlen, int mtu, unsigned int flags, 1037 struct rt6_info *rt) 1038 1039 { 1040 struct sk_buff *skb; 1041 struct frag_hdr fhdr; 1042 int err; 1043 1044 /* There is support for UDP large send offload by network 1045 * device, so create one single skb packet containing complete 1046 * udp datagram 1047 */ 1048 skb = skb_peek_tail(queue); 1049 if (skb == NULL) { 1050 skb = sock_alloc_send_skb(sk, 1051 hh_len + fragheaderlen + transhdrlen + 20, 1052 (flags & MSG_DONTWAIT), &err); 1053 if (skb == NULL) 1054 return err; 1055 1056 /* reserve space for Hardware header */ 1057 skb_reserve(skb, hh_len); 1058 1059 /* create space for UDP/IP header */ 1060 skb_put(skb, fragheaderlen + transhdrlen); 1061 1062 /* initialize network header pointer */ 1063 skb_reset_network_header(skb); 1064 1065 /* initialize protocol header pointer */ 1066 skb->transport_header = skb->network_header + fragheaderlen; 1067 1068 skb->protocol = htons(ETH_P_IPV6); 1069 skb->csum = 0; 1070 1071 __skb_queue_tail(queue, skb); 1072 } else if (skb_is_gso(skb)) { 1073 goto append; 1074 } 1075 1076 skb->ip_summed = CHECKSUM_PARTIAL; 1077 /* Specify the length of each IPv6 datagram fragment. 1078 * It has to be a multiple of 8. 1079 */ 1080 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - 1081 sizeof(struct frag_hdr)) & ~7; 1082 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1083 ipv6_select_ident(&fhdr, rt); 1084 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1085 1086 append: 1087 return skb_append_datato_frags(sk, skb, getfrag, from, 1088 (length - transhdrlen)); 1089 } 1090 1091 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1092 gfp_t gfp) 1093 { 1094 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1095 } 1096 1097 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1098 gfp_t gfp) 1099 { 1100 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1101 } 1102 1103 static void ip6_append_data_mtu(unsigned int *mtu, 1104 int *maxfraglen, 1105 unsigned int fragheaderlen, 1106 struct sk_buff *skb, 1107 struct rt6_info *rt, 1108 unsigned int orig_mtu) 1109 { 1110 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { 1111 if (skb == NULL) { 1112 /* first fragment, reserve header_len */ 1113 *mtu = orig_mtu - rt->dst.header_len; 1114 1115 } else { 1116 /* 1117 * this fragment is not first, the headers 1118 * space is regarded as data space. 1119 */ 1120 *mtu = orig_mtu; 1121 } 1122 *maxfraglen = ((*mtu - fragheaderlen) & ~7) 1123 + fragheaderlen - sizeof(struct frag_hdr); 1124 } 1125 } 1126 1127 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, 1128 struct inet6_cork *v6_cork, 1129 int hlimit, int tclass, struct ipv6_txoptions *opt, 1130 struct rt6_info *rt, struct flowi6 *fl6) 1131 { 1132 struct ipv6_pinfo *np = inet6_sk(sk); 1133 unsigned int mtu; 1134 1135 /* 1136 * setup for corking 1137 */ 1138 if (opt) { 1139 if (WARN_ON(v6_cork->opt)) 1140 return -EINVAL; 1141 1142 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation); 1143 if (unlikely(v6_cork->opt == NULL)) 1144 return -ENOBUFS; 1145 1146 v6_cork->opt->tot_len = opt->tot_len; 1147 v6_cork->opt->opt_flen = opt->opt_flen; 1148 v6_cork->opt->opt_nflen = opt->opt_nflen; 1149 1150 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1151 sk->sk_allocation); 1152 if (opt->dst0opt && !v6_cork->opt->dst0opt) 1153 return -ENOBUFS; 1154 1155 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1156 sk->sk_allocation); 1157 if (opt->dst1opt && !v6_cork->opt->dst1opt) 1158 return -ENOBUFS; 1159 1160 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt, 1161 sk->sk_allocation); 1162 if (opt->hopopt && !v6_cork->opt->hopopt) 1163 return -ENOBUFS; 1164 1165 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1166 sk->sk_allocation); 1167 if (opt->srcrt && !v6_cork->opt->srcrt) 1168 return -ENOBUFS; 1169 1170 /* need source address above miyazawa*/ 1171 } 1172 dst_hold(&rt->dst); 1173 cork->base.dst = &rt->dst; 1174 cork->fl.u.ip6 = *fl6; 1175 v6_cork->hop_limit = hlimit; 1176 v6_cork->tclass = tclass; 1177 if (rt->dst.flags & DST_XFRM_TUNNEL) 1178 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1179 rt->dst.dev->mtu : dst_mtu(&rt->dst); 1180 else 1181 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1182 rt->dst.dev->mtu : dst_mtu(rt->dst.path); 1183 if (np->frag_size < mtu) { 1184 if (np->frag_size) 1185 mtu = np->frag_size; 1186 } 1187 cork->base.fragsize = mtu; 1188 if (dst_allfrag(rt->dst.path)) 1189 cork->base.flags |= IPCORK_ALLFRAG; 1190 cork->base.length = 0; 1191 1192 return 0; 1193 } 1194 1195 static int __ip6_append_data(struct sock *sk, 1196 struct flowi6 *fl6, 1197 struct sk_buff_head *queue, 1198 struct inet_cork *cork, 1199 struct inet6_cork *v6_cork, 1200 struct page_frag *pfrag, 1201 int getfrag(void *from, char *to, int offset, 1202 int len, int odd, struct sk_buff *skb), 1203 void *from, int length, int transhdrlen, 1204 unsigned int flags, int dontfrag) 1205 { 1206 struct sk_buff *skb, *skb_prev = NULL; 1207 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; 1208 int exthdrlen = 0; 1209 int dst_exthdrlen = 0; 1210 int hh_len; 1211 int copy; 1212 int err; 1213 int offset = 0; 1214 __u8 tx_flags = 0; 1215 u32 tskey = 0; 1216 struct rt6_info *rt = (struct rt6_info *)cork->dst; 1217 struct ipv6_txoptions *opt = v6_cork->opt; 1218 int csummode = CHECKSUM_NONE; 1219 1220 skb = skb_peek_tail(queue); 1221 if (!skb) { 1222 exthdrlen = opt ? opt->opt_flen : 0; 1223 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; 1224 } 1225 1226 mtu = cork->fragsize; 1227 orig_mtu = mtu; 1228 1229 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1230 1231 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1232 (opt ? opt->opt_nflen : 0); 1233 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - 1234 sizeof(struct frag_hdr); 1235 1236 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1237 unsigned int maxnonfragsize, headersize; 1238 1239 headersize = sizeof(struct ipv6hdr) + 1240 (opt ? opt->opt_flen + opt->opt_nflen : 0) + 1241 (dst_allfrag(&rt->dst) ? 1242 sizeof(struct frag_hdr) : 0) + 1243 rt->rt6i_nfheader_len; 1244 1245 if (ip6_sk_ignore_df(sk)) 1246 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; 1247 else 1248 maxnonfragsize = mtu; 1249 1250 /* dontfrag active */ 1251 if ((cork->length + length > mtu - headersize) && dontfrag && 1252 (sk->sk_protocol == IPPROTO_UDP || 1253 sk->sk_protocol == IPPROTO_RAW)) { 1254 ipv6_local_rxpmtu(sk, fl6, mtu - headersize + 1255 sizeof(struct ipv6hdr)); 1256 goto emsgsize; 1257 } 1258 1259 if (cork->length + length > maxnonfragsize - headersize) { 1260 emsgsize: 1261 ipv6_local_error(sk, EMSGSIZE, fl6, 1262 mtu - headersize + 1263 sizeof(struct ipv6hdr)); 1264 return -EMSGSIZE; 1265 } 1266 } 1267 1268 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) { 1269 sock_tx_timestamp(sk, &tx_flags); 1270 if (tx_flags & SKBTX_ANY_SW_TSTAMP && 1271 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) 1272 tskey = sk->sk_tskey++; 1273 } 1274 1275 /* If this is the first and only packet and device 1276 * supports checksum offloading, let's use it. 1277 */ 1278 if (!skb && sk->sk_protocol == IPPROTO_UDP && 1279 length + fragheaderlen < mtu && 1280 rt->dst.dev->features & NETIF_F_V6_CSUM && 1281 !exthdrlen) 1282 csummode = CHECKSUM_PARTIAL; 1283 /* 1284 * Let's try using as much space as possible. 1285 * Use MTU if total length of the message fits into the MTU. 1286 * Otherwise, we need to reserve fragment header and 1287 * fragment alignment (= 8-15 octects, in total). 1288 * 1289 * Note that we may need to "move" the data from the tail of 1290 * of the buffer to the new fragment when we split 1291 * the message. 1292 * 1293 * FIXME: It may be fragmented into multiple chunks 1294 * at once if non-fragmentable extension headers 1295 * are too large. 1296 * --yoshfuji 1297 */ 1298 1299 cork->length += length; 1300 if (((length > mtu) || 1301 (skb && skb_is_gso(skb))) && 1302 (sk->sk_protocol == IPPROTO_UDP) && 1303 (rt->dst.dev->features & NETIF_F_UFO) && 1304 (sk->sk_type == SOCK_DGRAM)) { 1305 err = ip6_ufo_append_data(sk, queue, getfrag, from, length, 1306 hh_len, fragheaderlen, 1307 transhdrlen, mtu, flags, rt); 1308 if (err) 1309 goto error; 1310 return 0; 1311 } 1312 1313 if (!skb) 1314 goto alloc_new_skb; 1315 1316 while (length > 0) { 1317 /* Check if the remaining data fits into current packet. */ 1318 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1319 if (copy < length) 1320 copy = maxfraglen - skb->len; 1321 1322 if (copy <= 0) { 1323 char *data; 1324 unsigned int datalen; 1325 unsigned int fraglen; 1326 unsigned int fraggap; 1327 unsigned int alloclen; 1328 alloc_new_skb: 1329 /* There's no room in the current skb */ 1330 if (skb) 1331 fraggap = skb->len - maxfraglen; 1332 else 1333 fraggap = 0; 1334 /* update mtu and maxfraglen if necessary */ 1335 if (skb == NULL || skb_prev == NULL) 1336 ip6_append_data_mtu(&mtu, &maxfraglen, 1337 fragheaderlen, skb, rt, 1338 orig_mtu); 1339 1340 skb_prev = skb; 1341 1342 /* 1343 * If remaining data exceeds the mtu, 1344 * we know we need more fragment(s). 1345 */ 1346 datalen = length + fraggap; 1347 1348 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1349 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; 1350 if ((flags & MSG_MORE) && 1351 !(rt->dst.dev->features&NETIF_F_SG)) 1352 alloclen = mtu; 1353 else 1354 alloclen = datalen + fragheaderlen; 1355 1356 alloclen += dst_exthdrlen; 1357 1358 if (datalen != length + fraggap) { 1359 /* 1360 * this is not the last fragment, the trailer 1361 * space is regarded as data space. 1362 */ 1363 datalen += rt->dst.trailer_len; 1364 } 1365 1366 alloclen += rt->dst.trailer_len; 1367 fraglen = datalen + fragheaderlen; 1368 1369 /* 1370 * We just reserve space for fragment header. 1371 * Note: this may be overallocation if the message 1372 * (without MSG_MORE) fits into the MTU. 1373 */ 1374 alloclen += sizeof(struct frag_hdr); 1375 1376 if (transhdrlen) { 1377 skb = sock_alloc_send_skb(sk, 1378 alloclen + hh_len, 1379 (flags & MSG_DONTWAIT), &err); 1380 } else { 1381 skb = NULL; 1382 if (atomic_read(&sk->sk_wmem_alloc) <= 1383 2 * sk->sk_sndbuf) 1384 skb = sock_wmalloc(sk, 1385 alloclen + hh_len, 1, 1386 sk->sk_allocation); 1387 if (unlikely(skb == NULL)) 1388 err = -ENOBUFS; 1389 } 1390 if (skb == NULL) 1391 goto error; 1392 /* 1393 * Fill in the control structures 1394 */ 1395 skb->protocol = htons(ETH_P_IPV6); 1396 skb->ip_summed = csummode; 1397 skb->csum = 0; 1398 /* reserve for fragmentation and ipsec header */ 1399 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 1400 dst_exthdrlen); 1401 1402 /* Only the initial fragment is time stamped */ 1403 skb_shinfo(skb)->tx_flags = tx_flags; 1404 tx_flags = 0; 1405 skb_shinfo(skb)->tskey = tskey; 1406 tskey = 0; 1407 1408 /* 1409 * Find where to start putting bytes 1410 */ 1411 data = skb_put(skb, fraglen); 1412 skb_set_network_header(skb, exthdrlen); 1413 data += fragheaderlen; 1414 skb->transport_header = (skb->network_header + 1415 fragheaderlen); 1416 if (fraggap) { 1417 skb->csum = skb_copy_and_csum_bits( 1418 skb_prev, maxfraglen, 1419 data + transhdrlen, fraggap, 0); 1420 skb_prev->csum = csum_sub(skb_prev->csum, 1421 skb->csum); 1422 data += fraggap; 1423 pskb_trim_unique(skb_prev, maxfraglen); 1424 } 1425 copy = datalen - transhdrlen - fraggap; 1426 1427 if (copy < 0) { 1428 err = -EINVAL; 1429 kfree_skb(skb); 1430 goto error; 1431 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 1432 err = -EFAULT; 1433 kfree_skb(skb); 1434 goto error; 1435 } 1436 1437 offset += copy; 1438 length -= datalen - fraggap; 1439 transhdrlen = 0; 1440 exthdrlen = 0; 1441 dst_exthdrlen = 0; 1442 1443 /* 1444 * Put the packet on the pending queue 1445 */ 1446 __skb_queue_tail(queue, skb); 1447 continue; 1448 } 1449 1450 if (copy > length) 1451 copy = length; 1452 1453 if (!(rt->dst.dev->features&NETIF_F_SG)) { 1454 unsigned int off; 1455 1456 off = skb->len; 1457 if (getfrag(from, skb_put(skb, copy), 1458 offset, copy, off, skb) < 0) { 1459 __skb_trim(skb, off); 1460 err = -EFAULT; 1461 goto error; 1462 } 1463 } else { 1464 int i = skb_shinfo(skb)->nr_frags; 1465 1466 err = -ENOMEM; 1467 if (!sk_page_frag_refill(sk, pfrag)) 1468 goto error; 1469 1470 if (!skb_can_coalesce(skb, i, pfrag->page, 1471 pfrag->offset)) { 1472 err = -EMSGSIZE; 1473 if (i == MAX_SKB_FRAGS) 1474 goto error; 1475 1476 __skb_fill_page_desc(skb, i, pfrag->page, 1477 pfrag->offset, 0); 1478 skb_shinfo(skb)->nr_frags = ++i; 1479 get_page(pfrag->page); 1480 } 1481 copy = min_t(int, copy, pfrag->size - pfrag->offset); 1482 if (getfrag(from, 1483 page_address(pfrag->page) + pfrag->offset, 1484 offset, copy, skb->len, skb) < 0) 1485 goto error_efault; 1486 1487 pfrag->offset += copy; 1488 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1489 skb->len += copy; 1490 skb->data_len += copy; 1491 skb->truesize += copy; 1492 atomic_add(copy, &sk->sk_wmem_alloc); 1493 } 1494 offset += copy; 1495 length -= copy; 1496 } 1497 1498 return 0; 1499 1500 error_efault: 1501 err = -EFAULT; 1502 error: 1503 cork->length -= length; 1504 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1505 return err; 1506 } 1507 1508 int ip6_append_data(struct sock *sk, 1509 int getfrag(void *from, char *to, int offset, int len, 1510 int odd, struct sk_buff *skb), 1511 void *from, int length, int transhdrlen, int hlimit, 1512 int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, 1513 struct rt6_info *rt, unsigned int flags, int dontfrag) 1514 { 1515 struct inet_sock *inet = inet_sk(sk); 1516 struct ipv6_pinfo *np = inet6_sk(sk); 1517 int exthdrlen; 1518 int err; 1519 1520 if (flags&MSG_PROBE) 1521 return 0; 1522 if (skb_queue_empty(&sk->sk_write_queue)) { 1523 /* 1524 * setup for corking 1525 */ 1526 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit, 1527 tclass, opt, rt, fl6); 1528 if (err) 1529 return err; 1530 1531 exthdrlen = (opt ? opt->opt_flen : 0); 1532 length += exthdrlen; 1533 transhdrlen += exthdrlen; 1534 } else { 1535 fl6 = &inet->cork.fl.u.ip6; 1536 transhdrlen = 0; 1537 } 1538 1539 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, 1540 &np->cork, sk_page_frag(sk), getfrag, 1541 from, length, transhdrlen, flags, dontfrag); 1542 } 1543 EXPORT_SYMBOL_GPL(ip6_append_data); 1544 1545 static void ip6_cork_release(struct inet_cork_full *cork, 1546 struct inet6_cork *v6_cork) 1547 { 1548 if (v6_cork->opt) { 1549 kfree(v6_cork->opt->dst0opt); 1550 kfree(v6_cork->opt->dst1opt); 1551 kfree(v6_cork->opt->hopopt); 1552 kfree(v6_cork->opt->srcrt); 1553 kfree(v6_cork->opt); 1554 v6_cork->opt = NULL; 1555 } 1556 1557 if (cork->base.dst) { 1558 dst_release(cork->base.dst); 1559 cork->base.dst = NULL; 1560 cork->base.flags &= ~IPCORK_ALLFRAG; 1561 } 1562 memset(&cork->fl, 0, sizeof(cork->fl)); 1563 } 1564 1565 struct sk_buff *__ip6_make_skb(struct sock *sk, 1566 struct sk_buff_head *queue, 1567 struct inet_cork_full *cork, 1568 struct inet6_cork *v6_cork) 1569 { 1570 struct sk_buff *skb, *tmp_skb; 1571 struct sk_buff **tail_skb; 1572 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1573 struct ipv6_pinfo *np = inet6_sk(sk); 1574 struct net *net = sock_net(sk); 1575 struct ipv6hdr *hdr; 1576 struct ipv6_txoptions *opt = v6_cork->opt; 1577 struct rt6_info *rt = (struct rt6_info *)cork->base.dst; 1578 struct flowi6 *fl6 = &cork->fl.u.ip6; 1579 unsigned char proto = fl6->flowi6_proto; 1580 1581 skb = __skb_dequeue(queue); 1582 if (skb == NULL) 1583 goto out; 1584 tail_skb = &(skb_shinfo(skb)->frag_list); 1585 1586 /* move skb->data to ip header from ext header */ 1587 if (skb->data < skb_network_header(skb)) 1588 __skb_pull(skb, skb_network_offset(skb)); 1589 while ((tmp_skb = __skb_dequeue(queue)) != NULL) { 1590 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1591 *tail_skb = tmp_skb; 1592 tail_skb = &(tmp_skb->next); 1593 skb->len += tmp_skb->len; 1594 skb->data_len += tmp_skb->len; 1595 skb->truesize += tmp_skb->truesize; 1596 tmp_skb->destructor = NULL; 1597 tmp_skb->sk = NULL; 1598 } 1599 1600 /* Allow local fragmentation. */ 1601 skb->ignore_df = ip6_sk_ignore_df(sk); 1602 1603 *final_dst = fl6->daddr; 1604 __skb_pull(skb, skb_network_header_len(skb)); 1605 if (opt && opt->opt_flen) 1606 ipv6_push_frag_opts(skb, opt, &proto); 1607 if (opt && opt->opt_nflen) 1608 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 1609 1610 skb_push(skb, sizeof(struct ipv6hdr)); 1611 skb_reset_network_header(skb); 1612 hdr = ipv6_hdr(skb); 1613 1614 ip6_flow_hdr(hdr, v6_cork->tclass, 1615 ip6_make_flowlabel(net, skb, fl6->flowlabel, 1616 np->autoflowlabel)); 1617 hdr->hop_limit = v6_cork->hop_limit; 1618 hdr->nexthdr = proto; 1619 hdr->saddr = fl6->saddr; 1620 hdr->daddr = *final_dst; 1621 1622 skb->priority = sk->sk_priority; 1623 skb->mark = sk->sk_mark; 1624 1625 skb_dst_set(skb, dst_clone(&rt->dst)); 1626 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1627 if (proto == IPPROTO_ICMPV6) { 1628 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1629 1630 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); 1631 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 1632 } 1633 1634 ip6_cork_release(cork, v6_cork); 1635 out: 1636 return skb; 1637 } 1638 1639 int ip6_send_skb(struct sk_buff *skb) 1640 { 1641 struct net *net = sock_net(skb->sk); 1642 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 1643 int err; 1644 1645 err = ip6_local_out(skb); 1646 if (err) { 1647 if (err > 0) 1648 err = net_xmit_errno(err); 1649 if (err) 1650 IP6_INC_STATS(net, rt->rt6i_idev, 1651 IPSTATS_MIB_OUTDISCARDS); 1652 } 1653 1654 return err; 1655 } 1656 1657 int ip6_push_pending_frames(struct sock *sk) 1658 { 1659 struct sk_buff *skb; 1660 1661 skb = ip6_finish_skb(sk); 1662 if (!skb) 1663 return 0; 1664 1665 return ip6_send_skb(skb); 1666 } 1667 EXPORT_SYMBOL_GPL(ip6_push_pending_frames); 1668 1669 static void __ip6_flush_pending_frames(struct sock *sk, 1670 struct sk_buff_head *queue, 1671 struct inet_cork_full *cork, 1672 struct inet6_cork *v6_cork) 1673 { 1674 struct sk_buff *skb; 1675 1676 while ((skb = __skb_dequeue_tail(queue)) != NULL) { 1677 if (skb_dst(skb)) 1678 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1679 IPSTATS_MIB_OUTDISCARDS); 1680 kfree_skb(skb); 1681 } 1682 1683 ip6_cork_release(cork, v6_cork); 1684 } 1685 1686 void ip6_flush_pending_frames(struct sock *sk) 1687 { 1688 __ip6_flush_pending_frames(sk, &sk->sk_write_queue, 1689 &inet_sk(sk)->cork, &inet6_sk(sk)->cork); 1690 } 1691 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); 1692 1693 struct sk_buff *ip6_make_skb(struct sock *sk, 1694 int getfrag(void *from, char *to, int offset, 1695 int len, int odd, struct sk_buff *skb), 1696 void *from, int length, int transhdrlen, 1697 int hlimit, int tclass, 1698 struct ipv6_txoptions *opt, struct flowi6 *fl6, 1699 struct rt6_info *rt, unsigned int flags, 1700 int dontfrag) 1701 { 1702 struct inet_cork_full cork; 1703 struct inet6_cork v6_cork; 1704 struct sk_buff_head queue; 1705 int exthdrlen = (opt ? opt->opt_flen : 0); 1706 int err; 1707 1708 if (flags & MSG_PROBE) 1709 return NULL; 1710 1711 __skb_queue_head_init(&queue); 1712 1713 cork.base.flags = 0; 1714 cork.base.addr = 0; 1715 cork.base.opt = NULL; 1716 v6_cork.opt = NULL; 1717 err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6); 1718 if (err) 1719 return ERR_PTR(err); 1720 1721 if (dontfrag < 0) 1722 dontfrag = inet6_sk(sk)->dontfrag; 1723 1724 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork, 1725 ¤t->task_frag, getfrag, from, 1726 length + exthdrlen, transhdrlen + exthdrlen, 1727 flags, dontfrag); 1728 if (err) { 1729 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork); 1730 return ERR_PTR(err); 1731 } 1732 1733 return __ip6_make_skb(sk, &queue, &cork, &v6_cork); 1734 } 1735