1 /* 2 * IPv6 output functions 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * Based on linux/net/ipv4/ip_output.c 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 * 15 * Changes: 16 * A.N.Kuznetsov : airthmetics in fragmentation. 17 * extension headers are implemented. 18 * route changes now work. 19 * ip6_forward does not confuse sniffers. 20 * etc. 21 * 22 * H. von Brand : Added missing #include <linux/string.h> 23 * Imran Patel : frag id should be in NBO 24 * Kazunori MIYAZAWA @USAGI 25 * : add ip6_append_data and related functions 26 * for datagram xmit 27 */ 28 29 #include <linux/errno.h> 30 #include <linux/kernel.h> 31 #include <linux/string.h> 32 #include <linux/socket.h> 33 #include <linux/net.h> 34 #include <linux/netdevice.h> 35 #include <linux/if_arp.h> 36 #include <linux/in6.h> 37 #include <linux/tcp.h> 38 #include <linux/route.h> 39 #include <linux/module.h> 40 41 #include <linux/netfilter.h> 42 #include <linux/netfilter_ipv6.h> 43 44 #include <net/sock.h> 45 #include <net/snmp.h> 46 47 #include <net/ipv6.h> 48 #include <net/ndisc.h> 49 #include <net/protocol.h> 50 #include <net/ip6_route.h> 51 #include <net/addrconf.h> 52 #include <net/rawv6.h> 53 #include <net/icmp.h> 54 #include <net/xfrm.h> 55 #include <net/checksum.h> 56 #include <linux/mroute6.h> 57 58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); 59 60 int __ip6_local_out(struct sk_buff *skb) 61 { 62 int len; 63 64 len = skb->len - sizeof(struct ipv6hdr); 65 if (len > IPV6_MAXPLEN) 66 len = 0; 67 ipv6_hdr(skb)->payload_len = htons(len); 68 69 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, 70 dst_output); 71 } 72 73 int ip6_local_out(struct sk_buff *skb) 74 { 75 int err; 76 77 err = __ip6_local_out(skb); 78 if (likely(err == 1)) 79 err = dst_output(skb); 80 81 return err; 82 } 83 EXPORT_SYMBOL_GPL(ip6_local_out); 84 85 static int ip6_output_finish(struct sk_buff *skb) 86 { 87 struct dst_entry *dst = skb_dst(skb); 88 89 if (dst->hh) 90 return neigh_hh_output(dst->hh, skb); 91 else if (dst->neighbour) 92 return dst->neighbour->output(skb); 93 94 IP6_INC_STATS_BH(dev_net(dst->dev), 95 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 96 kfree_skb(skb); 97 return -EINVAL; 98 99 } 100 101 /* dev_loopback_xmit for use with netfilter. */ 102 static int ip6_dev_loopback_xmit(struct sk_buff *newskb) 103 { 104 skb_reset_mac_header(newskb); 105 __skb_pull(newskb, skb_network_offset(newskb)); 106 newskb->pkt_type = PACKET_LOOPBACK; 107 newskb->ip_summed = CHECKSUM_UNNECESSARY; 108 WARN_ON(!skb_dst(newskb)); 109 110 netif_rx(newskb); 111 return 0; 112 } 113 114 115 static int ip6_output2(struct sk_buff *skb) 116 { 117 struct dst_entry *dst = skb_dst(skb); 118 struct net_device *dev = dst->dev; 119 120 skb->protocol = htons(ETH_P_IPV6); 121 skb->dev = dev; 122 123 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 124 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 125 126 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && 127 ((mroute6_socket(dev_net(dev)) && 128 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 129 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 130 &ipv6_hdr(skb)->saddr))) { 131 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 132 133 /* Do not check for IFF_ALLMULTI; multicast routing 134 is not supported in any case. 135 */ 136 if (newskb) 137 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb, 138 NULL, newskb->dev, 139 ip6_dev_loopback_xmit); 140 141 if (ipv6_hdr(skb)->hop_limit == 0) { 142 IP6_INC_STATS(dev_net(dev), idev, 143 IPSTATS_MIB_OUTDISCARDS); 144 kfree_skb(skb); 145 return 0; 146 } 147 } 148 149 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, 150 skb->len); 151 } 152 153 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev, 154 ip6_output_finish); 155 } 156 157 static inline int ip6_skb_dst_mtu(struct sk_buff *skb) 158 { 159 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 160 161 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? 162 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); 163 } 164 165 int ip6_output(struct sk_buff *skb) 166 { 167 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 168 if (unlikely(idev->cnf.disable_ipv6)) { 169 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev, 170 IPSTATS_MIB_OUTDISCARDS); 171 kfree_skb(skb); 172 return 0; 173 } 174 175 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 176 dst_allfrag(skb_dst(skb))) 177 return ip6_fragment(skb, ip6_output2); 178 else 179 return ip6_output2(skb); 180 } 181 182 /* 183 * xmit an sk_buff (used by TCP) 184 */ 185 186 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, 187 struct ipv6_txoptions *opt, int ipfragok) 188 { 189 struct net *net = sock_net(sk); 190 struct ipv6_pinfo *np = inet6_sk(sk); 191 struct in6_addr *first_hop = &fl->fl6_dst; 192 struct dst_entry *dst = skb_dst(skb); 193 struct ipv6hdr *hdr; 194 u8 proto = fl->proto; 195 int seg_len = skb->len; 196 int hlimit = -1; 197 int tclass = 0; 198 u32 mtu; 199 200 if (opt) { 201 unsigned int head_room; 202 203 /* First: exthdrs may take lots of space (~8K for now) 204 MAX_HEADER is not enough. 205 */ 206 head_room = opt->opt_nflen + opt->opt_flen; 207 seg_len += head_room; 208 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 209 210 if (skb_headroom(skb) < head_room) { 211 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 212 if (skb2 == NULL) { 213 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 214 IPSTATS_MIB_OUTDISCARDS); 215 kfree_skb(skb); 216 return -ENOBUFS; 217 } 218 kfree_skb(skb); 219 skb = skb2; 220 if (sk) 221 skb_set_owner_w(skb, sk); 222 } 223 if (opt->opt_flen) 224 ipv6_push_frag_opts(skb, opt, &proto); 225 if (opt->opt_nflen) 226 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 227 } 228 229 skb_push(skb, sizeof(struct ipv6hdr)); 230 skb_reset_network_header(skb); 231 hdr = ipv6_hdr(skb); 232 233 /* Allow local fragmentation. */ 234 if (ipfragok) 235 skb->local_df = 1; 236 237 /* 238 * Fill in the IPv6 header 239 */ 240 if (np) { 241 tclass = np->tclass; 242 hlimit = np->hop_limit; 243 } 244 if (hlimit < 0) 245 hlimit = ip6_dst_hoplimit(dst); 246 247 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; 248 249 hdr->payload_len = htons(seg_len); 250 hdr->nexthdr = proto; 251 hdr->hop_limit = hlimit; 252 253 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 254 ipv6_addr_copy(&hdr->daddr, first_hop); 255 256 skb->priority = sk->sk_priority; 257 skb->mark = sk->sk_mark; 258 259 mtu = dst_mtu(dst); 260 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { 261 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 262 IPSTATS_MIB_OUT, skb->len); 263 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, 264 dst_output); 265 } 266 267 if (net_ratelimit()) 268 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); 269 skb->dev = dst->dev; 270 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 271 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 272 kfree_skb(skb); 273 return -EMSGSIZE; 274 } 275 276 EXPORT_SYMBOL(ip6_xmit); 277 278 /* 279 * To avoid extra problems ND packets are send through this 280 * routine. It's code duplication but I really want to avoid 281 * extra checks since ipv6_build_header is used by TCP (which 282 * is for us performance critical) 283 */ 284 285 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, 286 const struct in6_addr *saddr, const struct in6_addr *daddr, 287 int proto, int len) 288 { 289 struct ipv6_pinfo *np = inet6_sk(sk); 290 struct ipv6hdr *hdr; 291 int totlen; 292 293 skb->protocol = htons(ETH_P_IPV6); 294 skb->dev = dev; 295 296 totlen = len + sizeof(struct ipv6hdr); 297 298 skb_reset_network_header(skb); 299 skb_put(skb, sizeof(struct ipv6hdr)); 300 hdr = ipv6_hdr(skb); 301 302 *(__be32*)hdr = htonl(0x60000000); 303 304 hdr->payload_len = htons(len); 305 hdr->nexthdr = proto; 306 hdr->hop_limit = np->hop_limit; 307 308 ipv6_addr_copy(&hdr->saddr, saddr); 309 ipv6_addr_copy(&hdr->daddr, daddr); 310 311 return 0; 312 } 313 314 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 315 { 316 struct ip6_ra_chain *ra; 317 struct sock *last = NULL; 318 319 read_lock(&ip6_ra_lock); 320 for (ra = ip6_ra_chain; ra; ra = ra->next) { 321 struct sock *sk = ra->sk; 322 if (sk && ra->sel == sel && 323 (!sk->sk_bound_dev_if || 324 sk->sk_bound_dev_if == skb->dev->ifindex)) { 325 if (last) { 326 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 327 if (skb2) 328 rawv6_rcv(last, skb2); 329 } 330 last = sk; 331 } 332 } 333 334 if (last) { 335 rawv6_rcv(last, skb); 336 read_unlock(&ip6_ra_lock); 337 return 1; 338 } 339 read_unlock(&ip6_ra_lock); 340 return 0; 341 } 342 343 static int ip6_forward_proxy_check(struct sk_buff *skb) 344 { 345 struct ipv6hdr *hdr = ipv6_hdr(skb); 346 u8 nexthdr = hdr->nexthdr; 347 int offset; 348 349 if (ipv6_ext_hdr(nexthdr)) { 350 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); 351 if (offset < 0) 352 return 0; 353 } else 354 offset = sizeof(struct ipv6hdr); 355 356 if (nexthdr == IPPROTO_ICMPV6) { 357 struct icmp6hdr *icmp6; 358 359 if (!pskb_may_pull(skb, (skb_network_header(skb) + 360 offset + 1 - skb->data))) 361 return 0; 362 363 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 364 365 switch (icmp6->icmp6_type) { 366 case NDISC_ROUTER_SOLICITATION: 367 case NDISC_ROUTER_ADVERTISEMENT: 368 case NDISC_NEIGHBOUR_SOLICITATION: 369 case NDISC_NEIGHBOUR_ADVERTISEMENT: 370 case NDISC_REDIRECT: 371 /* For reaction involving unicast neighbor discovery 372 * message destined to the proxied address, pass it to 373 * input function. 374 */ 375 return 1; 376 default: 377 break; 378 } 379 } 380 381 /* 382 * The proxying router can't forward traffic sent to a link-local 383 * address, so signal the sender and discard the packet. This 384 * behavior is clarified by the MIPv6 specification. 385 */ 386 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 387 dst_link_failure(skb); 388 return -1; 389 } 390 391 return 0; 392 } 393 394 static inline int ip6_forward_finish(struct sk_buff *skb) 395 { 396 return dst_output(skb); 397 } 398 399 int ip6_forward(struct sk_buff *skb) 400 { 401 struct dst_entry *dst = skb_dst(skb); 402 struct ipv6hdr *hdr = ipv6_hdr(skb); 403 struct inet6_skb_parm *opt = IP6CB(skb); 404 struct net *net = dev_net(dst->dev); 405 u32 mtu; 406 407 if (net->ipv6.devconf_all->forwarding == 0) 408 goto error; 409 410 if (skb_warn_if_lro(skb)) 411 goto drop; 412 413 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 414 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 415 goto drop; 416 } 417 418 skb_forward_csum(skb); 419 420 /* 421 * We DO NOT make any processing on 422 * RA packets, pushing them to user level AS IS 423 * without ane WARRANTY that application will be able 424 * to interpret them. The reason is that we 425 * cannot make anything clever here. 426 * 427 * We are not end-node, so that if packet contains 428 * AH/ESP, we cannot make anything. 429 * Defragmentation also would be mistake, RA packets 430 * cannot be fragmented, because there is no warranty 431 * that different fragments will go along one path. --ANK 432 */ 433 if (opt->ra) { 434 u8 *ptr = skb_network_header(skb) + opt->ra; 435 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) 436 return 0; 437 } 438 439 /* 440 * check and decrement ttl 441 */ 442 if (hdr->hop_limit <= 1) { 443 /* Force OUTPUT device used as source address */ 444 skb->dev = dst->dev; 445 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 446 IP6_INC_STATS_BH(net, 447 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); 448 449 kfree_skb(skb); 450 return -ETIMEDOUT; 451 } 452 453 /* XXX: idev->cnf.proxy_ndp? */ 454 if (net->ipv6.devconf_all->proxy_ndp && 455 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 456 int proxied = ip6_forward_proxy_check(skb); 457 if (proxied > 0) 458 return ip6_input(skb); 459 else if (proxied < 0) { 460 IP6_INC_STATS(net, ip6_dst_idev(dst), 461 IPSTATS_MIB_INDISCARDS); 462 goto drop; 463 } 464 } 465 466 if (!xfrm6_route_forward(skb)) { 467 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 468 goto drop; 469 } 470 dst = skb_dst(skb); 471 472 /* IPv6 specs say nothing about it, but it is clear that we cannot 473 send redirects to source routed frames. 474 We don't send redirects to frames decapsulated from IPsec. 475 */ 476 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && 477 !skb_sec_path(skb)) { 478 struct in6_addr *target = NULL; 479 struct rt6_info *rt; 480 struct neighbour *n = dst->neighbour; 481 482 /* 483 * incoming and outgoing devices are the same 484 * send a redirect. 485 */ 486 487 rt = (struct rt6_info *) dst; 488 if ((rt->rt6i_flags & RTF_GATEWAY)) 489 target = (struct in6_addr*)&n->primary_key; 490 else 491 target = &hdr->daddr; 492 493 /* Limit redirects both by destination (here) 494 and by source (inside ndisc_send_redirect) 495 */ 496 if (xrlim_allow(dst, 1*HZ)) 497 ndisc_send_redirect(skb, n, target); 498 } else { 499 int addrtype = ipv6_addr_type(&hdr->saddr); 500 501 /* This check is security critical. */ 502 if (addrtype == IPV6_ADDR_ANY || 503 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 504 goto error; 505 if (addrtype & IPV6_ADDR_LINKLOCAL) { 506 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 507 ICMPV6_NOT_NEIGHBOUR, 0); 508 goto error; 509 } 510 } 511 512 mtu = dst_mtu(dst); 513 if (mtu < IPV6_MIN_MTU) 514 mtu = IPV6_MIN_MTU; 515 516 if (skb->len > mtu) { 517 /* Again, force OUTPUT device used as source address */ 518 skb->dev = dst->dev; 519 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 520 IP6_INC_STATS_BH(net, 521 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); 522 IP6_INC_STATS_BH(net, 523 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); 524 kfree_skb(skb); 525 return -EMSGSIZE; 526 } 527 528 if (skb_cow(skb, dst->dev->hard_header_len)) { 529 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); 530 goto drop; 531 } 532 533 hdr = ipv6_hdr(skb); 534 535 /* Mangling hops number delayed to point after skb COW */ 536 537 hdr->hop_limit--; 538 539 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 540 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev, 541 ip6_forward_finish); 542 543 error: 544 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 545 drop: 546 kfree_skb(skb); 547 return -EINVAL; 548 } 549 550 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 551 { 552 to->pkt_type = from->pkt_type; 553 to->priority = from->priority; 554 to->protocol = from->protocol; 555 skb_dst_drop(to); 556 skb_dst_set(to, dst_clone(skb_dst(from))); 557 to->dev = from->dev; 558 to->mark = from->mark; 559 560 #ifdef CONFIG_NET_SCHED 561 to->tc_index = from->tc_index; 562 #endif 563 nf_copy(to, from); 564 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 565 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 566 to->nf_trace = from->nf_trace; 567 #endif 568 skb_copy_secmark(to, from); 569 } 570 571 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) 572 { 573 u16 offset = sizeof(struct ipv6hdr); 574 struct ipv6_opt_hdr *exthdr = 575 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); 576 unsigned int packet_len = skb->tail - skb->network_header; 577 int found_rhdr = 0; 578 *nexthdr = &ipv6_hdr(skb)->nexthdr; 579 580 while (offset + 1 <= packet_len) { 581 582 switch (**nexthdr) { 583 584 case NEXTHDR_HOP: 585 break; 586 case NEXTHDR_ROUTING: 587 found_rhdr = 1; 588 break; 589 case NEXTHDR_DEST: 590 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 591 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) 592 break; 593 #endif 594 if (found_rhdr) 595 return offset; 596 break; 597 default : 598 return offset; 599 } 600 601 offset += ipv6_optlen(exthdr); 602 *nexthdr = &exthdr->nexthdr; 603 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + 604 offset); 605 } 606 607 return offset; 608 } 609 610 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 611 { 612 struct sk_buff *frag; 613 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); 614 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 615 struct ipv6hdr *tmp_hdr; 616 struct frag_hdr *fh; 617 unsigned int mtu, hlen, left, len; 618 __be32 frag_id = 0; 619 int ptr, offset = 0, err=0; 620 u8 *prevhdr, nexthdr = 0; 621 struct net *net = dev_net(skb_dst(skb)->dev); 622 623 hlen = ip6_find_1stfragopt(skb, &prevhdr); 624 nexthdr = *prevhdr; 625 626 mtu = ip6_skb_dst_mtu(skb); 627 628 /* We must not fragment if the socket is set to force MTU discovery 629 * or if the skb it not generated by a local socket. 630 */ 631 if (!skb->local_df) { 632 skb->dev = skb_dst(skb)->dev; 633 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 634 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 635 IPSTATS_MIB_FRAGFAILS); 636 kfree_skb(skb); 637 return -EMSGSIZE; 638 } 639 640 if (np && np->frag_size < mtu) { 641 if (np->frag_size) 642 mtu = np->frag_size; 643 } 644 mtu -= hlen + sizeof(struct frag_hdr); 645 646 if (skb_has_frags(skb)) { 647 int first_len = skb_pagelen(skb); 648 int truesizes = 0; 649 650 if (first_len - hlen > mtu || 651 ((first_len - hlen) & 7) || 652 skb_cloned(skb)) 653 goto slow_path; 654 655 skb_walk_frags(skb, frag) { 656 /* Correct geometry. */ 657 if (frag->len > mtu || 658 ((frag->len & 7) && frag->next) || 659 skb_headroom(frag) < hlen) 660 goto slow_path; 661 662 /* Partially cloned skb? */ 663 if (skb_shared(frag)) 664 goto slow_path; 665 666 BUG_ON(frag->sk); 667 if (skb->sk) { 668 frag->sk = skb->sk; 669 frag->destructor = sock_wfree; 670 truesizes += frag->truesize; 671 } 672 } 673 674 err = 0; 675 offset = 0; 676 frag = skb_shinfo(skb)->frag_list; 677 skb_frag_list_init(skb); 678 /* BUILD HEADER */ 679 680 *prevhdr = NEXTHDR_FRAGMENT; 681 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 682 if (!tmp_hdr) { 683 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 684 IPSTATS_MIB_FRAGFAILS); 685 return -ENOMEM; 686 } 687 688 __skb_pull(skb, hlen); 689 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); 690 __skb_push(skb, hlen); 691 skb_reset_network_header(skb); 692 memcpy(skb_network_header(skb), tmp_hdr, hlen); 693 694 ipv6_select_ident(fh); 695 fh->nexthdr = nexthdr; 696 fh->reserved = 0; 697 fh->frag_off = htons(IP6_MF); 698 frag_id = fh->identification; 699 700 first_len = skb_pagelen(skb); 701 skb->data_len = first_len - skb_headlen(skb); 702 skb->truesize -= truesizes; 703 skb->len = first_len; 704 ipv6_hdr(skb)->payload_len = htons(first_len - 705 sizeof(struct ipv6hdr)); 706 707 dst_hold(&rt->u.dst); 708 709 for (;;) { 710 /* Prepare header of the next frame, 711 * before previous one went down. */ 712 if (frag) { 713 frag->ip_summed = CHECKSUM_NONE; 714 skb_reset_transport_header(frag); 715 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); 716 __skb_push(frag, hlen); 717 skb_reset_network_header(frag); 718 memcpy(skb_network_header(frag), tmp_hdr, 719 hlen); 720 offset += skb->len - hlen - sizeof(struct frag_hdr); 721 fh->nexthdr = nexthdr; 722 fh->reserved = 0; 723 fh->frag_off = htons(offset); 724 if (frag->next != NULL) 725 fh->frag_off |= htons(IP6_MF); 726 fh->identification = frag_id; 727 ipv6_hdr(frag)->payload_len = 728 htons(frag->len - 729 sizeof(struct ipv6hdr)); 730 ip6_copy_metadata(frag, skb); 731 } 732 733 err = output(skb); 734 if(!err) 735 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), 736 IPSTATS_MIB_FRAGCREATES); 737 738 if (err || !frag) 739 break; 740 741 skb = frag; 742 frag = skb->next; 743 skb->next = NULL; 744 } 745 746 kfree(tmp_hdr); 747 748 if (err == 0) { 749 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), 750 IPSTATS_MIB_FRAGOKS); 751 dst_release(&rt->u.dst); 752 return 0; 753 } 754 755 while (frag) { 756 skb = frag->next; 757 kfree_skb(frag); 758 frag = skb; 759 } 760 761 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), 762 IPSTATS_MIB_FRAGFAILS); 763 dst_release(&rt->u.dst); 764 return err; 765 } 766 767 slow_path: 768 left = skb->len - hlen; /* Space per frame */ 769 ptr = hlen; /* Where to start from */ 770 771 /* 772 * Fragment the datagram. 773 */ 774 775 *prevhdr = NEXTHDR_FRAGMENT; 776 777 /* 778 * Keep copying data until we run out. 779 */ 780 while(left > 0) { 781 len = left; 782 /* IF: it doesn't fit, use 'mtu' - the data space left */ 783 if (len > mtu) 784 len = mtu; 785 /* IF: we are not sending upto and including the packet end 786 then align the next start on an eight byte boundary */ 787 if (len < left) { 788 len &= ~7; 789 } 790 /* 791 * Allocate buffer. 792 */ 793 794 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { 795 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); 796 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 797 IPSTATS_MIB_FRAGFAILS); 798 err = -ENOMEM; 799 goto fail; 800 } 801 802 /* 803 * Set up data on packet 804 */ 805 806 ip6_copy_metadata(frag, skb); 807 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev)); 808 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 809 skb_reset_network_header(frag); 810 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 811 frag->transport_header = (frag->network_header + hlen + 812 sizeof(struct frag_hdr)); 813 814 /* 815 * Charge the memory for the fragment to any owner 816 * it might possess 817 */ 818 if (skb->sk) 819 skb_set_owner_w(frag, skb->sk); 820 821 /* 822 * Copy the packet header into the new buffer. 823 */ 824 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); 825 826 /* 827 * Build fragment header. 828 */ 829 fh->nexthdr = nexthdr; 830 fh->reserved = 0; 831 if (!frag_id) { 832 ipv6_select_ident(fh); 833 frag_id = fh->identification; 834 } else 835 fh->identification = frag_id; 836 837 /* 838 * Copy a block of the IP datagram. 839 */ 840 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) 841 BUG(); 842 left -= len; 843 844 fh->frag_off = htons(offset); 845 if (left > 0) 846 fh->frag_off |= htons(IP6_MF); 847 ipv6_hdr(frag)->payload_len = htons(frag->len - 848 sizeof(struct ipv6hdr)); 849 850 ptr += len; 851 offset += len; 852 853 /* 854 * Put this fragment into the sending queue. 855 */ 856 err = output(frag); 857 if (err) 858 goto fail; 859 860 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 861 IPSTATS_MIB_FRAGCREATES); 862 } 863 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 864 IPSTATS_MIB_FRAGOKS); 865 kfree_skb(skb); 866 return err; 867 868 fail: 869 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 870 IPSTATS_MIB_FRAGFAILS); 871 kfree_skb(skb); 872 return err; 873 } 874 875 static inline int ip6_rt_check(struct rt6key *rt_key, 876 struct in6_addr *fl_addr, 877 struct in6_addr *addr_cache) 878 { 879 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 880 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache))); 881 } 882 883 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 884 struct dst_entry *dst, 885 struct flowi *fl) 886 { 887 struct ipv6_pinfo *np = inet6_sk(sk); 888 struct rt6_info *rt = (struct rt6_info *)dst; 889 890 if (!dst) 891 goto out; 892 893 /* Yes, checking route validity in not connected 894 * case is not very simple. Take into account, 895 * that we do not support routing by source, TOS, 896 * and MSG_DONTROUTE --ANK (980726) 897 * 898 * 1. ip6_rt_check(): If route was host route, 899 * check that cached destination is current. 900 * If it is network route, we still may 901 * check its validity using saved pointer 902 * to the last used address: daddr_cache. 903 * We do not want to save whole address now, 904 * (because main consumer of this service 905 * is tcp, which has not this problem), 906 * so that the last trick works only on connected 907 * sockets. 908 * 2. oif also should be the same. 909 */ 910 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || 911 #ifdef CONFIG_IPV6_SUBTREES 912 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || 913 #endif 914 (fl->oif && fl->oif != dst->dev->ifindex)) { 915 dst_release(dst); 916 dst = NULL; 917 } 918 919 out: 920 return dst; 921 } 922 923 static int ip6_dst_lookup_tail(struct sock *sk, 924 struct dst_entry **dst, struct flowi *fl) 925 { 926 int err; 927 struct net *net = sock_net(sk); 928 929 if (*dst == NULL) 930 *dst = ip6_route_output(net, sk, fl); 931 932 if ((err = (*dst)->error)) 933 goto out_err_release; 934 935 if (ipv6_addr_any(&fl->fl6_src)) { 936 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev, 937 &fl->fl6_dst, 938 sk ? inet6_sk(sk)->srcprefs : 0, 939 &fl->fl6_src); 940 if (err) 941 goto out_err_release; 942 } 943 944 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 945 /* 946 * Here if the dst entry we've looked up 947 * has a neighbour entry that is in the INCOMPLETE 948 * state and the src address from the flow is 949 * marked as OPTIMISTIC, we release the found 950 * dst entry and replace it instead with the 951 * dst entry of the nexthop router 952 */ 953 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { 954 struct inet6_ifaddr *ifp; 955 struct flowi fl_gw; 956 int redirect; 957 958 ifp = ipv6_get_ifaddr(net, &fl->fl6_src, 959 (*dst)->dev, 1); 960 961 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 962 if (ifp) 963 in6_ifa_put(ifp); 964 965 if (redirect) { 966 /* 967 * We need to get the dst entry for the 968 * default router instead 969 */ 970 dst_release(*dst); 971 memcpy(&fl_gw, fl, sizeof(struct flowi)); 972 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); 973 *dst = ip6_route_output(net, sk, &fl_gw); 974 if ((err = (*dst)->error)) 975 goto out_err_release; 976 } 977 } 978 #endif 979 980 return 0; 981 982 out_err_release: 983 if (err == -ENETUNREACH) 984 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); 985 dst_release(*dst); 986 *dst = NULL; 987 return err; 988 } 989 990 /** 991 * ip6_dst_lookup - perform route lookup on flow 992 * @sk: socket which provides route info 993 * @dst: pointer to dst_entry * for result 994 * @fl: flow to lookup 995 * 996 * This function performs a route lookup on the given flow. 997 * 998 * It returns zero on success, or a standard errno code on error. 999 */ 1000 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 1001 { 1002 *dst = NULL; 1003 return ip6_dst_lookup_tail(sk, dst, fl); 1004 } 1005 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1006 1007 /** 1008 * ip6_sk_dst_lookup - perform socket cached route lookup on flow 1009 * @sk: socket which provides the dst cache and route info 1010 * @dst: pointer to dst_entry * for result 1011 * @fl: flow to lookup 1012 * 1013 * This function performs a route lookup on the given flow with the 1014 * possibility of using the cached route in the socket if it is valid. 1015 * It will take the socket dst lock when operating on the dst cache. 1016 * As a result, this function can only be used in process context. 1017 * 1018 * It returns zero on success, or a standard errno code on error. 1019 */ 1020 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 1021 { 1022 *dst = NULL; 1023 if (sk) { 1024 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1025 *dst = ip6_sk_dst_check(sk, *dst, fl); 1026 } 1027 1028 return ip6_dst_lookup_tail(sk, dst, fl); 1029 } 1030 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); 1031 1032 static inline int ip6_ufo_append_data(struct sock *sk, 1033 int getfrag(void *from, char *to, int offset, int len, 1034 int odd, struct sk_buff *skb), 1035 void *from, int length, int hh_len, int fragheaderlen, 1036 int transhdrlen, int mtu,unsigned int flags) 1037 1038 { 1039 struct sk_buff *skb; 1040 int err; 1041 1042 /* There is support for UDP large send offload by network 1043 * device, so create one single skb packet containing complete 1044 * udp datagram 1045 */ 1046 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 1047 skb = sock_alloc_send_skb(sk, 1048 hh_len + fragheaderlen + transhdrlen + 20, 1049 (flags & MSG_DONTWAIT), &err); 1050 if (skb == NULL) 1051 return -ENOMEM; 1052 1053 /* reserve space for Hardware header */ 1054 skb_reserve(skb, hh_len); 1055 1056 /* create space for UDP/IP header */ 1057 skb_put(skb,fragheaderlen + transhdrlen); 1058 1059 /* initialize network header pointer */ 1060 skb_reset_network_header(skb); 1061 1062 /* initialize protocol header pointer */ 1063 skb->transport_header = skb->network_header + fragheaderlen; 1064 1065 skb->ip_summed = CHECKSUM_PARTIAL; 1066 skb->csum = 0; 1067 sk->sk_sndmsg_off = 0; 1068 } 1069 1070 err = skb_append_datato_frags(sk,skb, getfrag, from, 1071 (length - transhdrlen)); 1072 if (!err) { 1073 struct frag_hdr fhdr; 1074 1075 /* Specify the length of each IPv6 datagram fragment. 1076 * It has to be a multiple of 8. 1077 */ 1078 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - 1079 sizeof(struct frag_hdr)) & ~7; 1080 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1081 ipv6_select_ident(&fhdr); 1082 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1083 __skb_queue_tail(&sk->sk_write_queue, skb); 1084 1085 return 0; 1086 } 1087 /* There is not enough support do UPD LSO, 1088 * so follow normal path 1089 */ 1090 kfree_skb(skb); 1091 1092 return err; 1093 } 1094 1095 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1096 gfp_t gfp) 1097 { 1098 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1099 } 1100 1101 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1102 gfp_t gfp) 1103 { 1104 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1105 } 1106 1107 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 1108 int offset, int len, int odd, struct sk_buff *skb), 1109 void *from, int length, int transhdrlen, 1110 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, 1111 struct rt6_info *rt, unsigned int flags) 1112 { 1113 struct inet_sock *inet = inet_sk(sk); 1114 struct ipv6_pinfo *np = inet6_sk(sk); 1115 struct sk_buff *skb; 1116 unsigned int maxfraglen, fragheaderlen; 1117 int exthdrlen; 1118 int hh_len; 1119 int mtu; 1120 int copy; 1121 int err; 1122 int offset = 0; 1123 int csummode = CHECKSUM_NONE; 1124 1125 if (flags&MSG_PROBE) 1126 return 0; 1127 if (skb_queue_empty(&sk->sk_write_queue)) { 1128 /* 1129 * setup for corking 1130 */ 1131 if (opt) { 1132 if (WARN_ON(np->cork.opt)) 1133 return -EINVAL; 1134 1135 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); 1136 if (unlikely(np->cork.opt == NULL)) 1137 return -ENOBUFS; 1138 1139 np->cork.opt->tot_len = opt->tot_len; 1140 np->cork.opt->opt_flen = opt->opt_flen; 1141 np->cork.opt->opt_nflen = opt->opt_nflen; 1142 1143 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1144 sk->sk_allocation); 1145 if (opt->dst0opt && !np->cork.opt->dst0opt) 1146 return -ENOBUFS; 1147 1148 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1149 sk->sk_allocation); 1150 if (opt->dst1opt && !np->cork.opt->dst1opt) 1151 return -ENOBUFS; 1152 1153 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, 1154 sk->sk_allocation); 1155 if (opt->hopopt && !np->cork.opt->hopopt) 1156 return -ENOBUFS; 1157 1158 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1159 sk->sk_allocation); 1160 if (opt->srcrt && !np->cork.opt->srcrt) 1161 return -ENOBUFS; 1162 1163 /* need source address above miyazawa*/ 1164 } 1165 dst_hold(&rt->u.dst); 1166 inet->cork.dst = &rt->u.dst; 1167 inet->cork.fl = *fl; 1168 np->cork.hop_limit = hlimit; 1169 np->cork.tclass = tclass; 1170 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1171 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); 1172 if (np->frag_size < mtu) { 1173 if (np->frag_size) 1174 mtu = np->frag_size; 1175 } 1176 inet->cork.fragsize = mtu; 1177 if (dst_allfrag(rt->u.dst.path)) 1178 inet->cork.flags |= IPCORK_ALLFRAG; 1179 inet->cork.length = 0; 1180 sk->sk_sndmsg_page = NULL; 1181 sk->sk_sndmsg_off = 0; 1182 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) - 1183 rt->rt6i_nfheader_len; 1184 length += exthdrlen; 1185 transhdrlen += exthdrlen; 1186 } else { 1187 rt = (struct rt6_info *)inet->cork.dst; 1188 fl = &inet->cork.fl; 1189 opt = np->cork.opt; 1190 transhdrlen = 0; 1191 exthdrlen = 0; 1192 mtu = inet->cork.fragsize; 1193 } 1194 1195 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 1196 1197 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1198 (opt ? opt->opt_nflen : 0); 1199 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 1200 1201 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1202 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { 1203 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); 1204 return -EMSGSIZE; 1205 } 1206 } 1207 1208 /* 1209 * Let's try using as much space as possible. 1210 * Use MTU if total length of the message fits into the MTU. 1211 * Otherwise, we need to reserve fragment header and 1212 * fragment alignment (= 8-15 octects, in total). 1213 * 1214 * Note that we may need to "move" the data from the tail of 1215 * of the buffer to the new fragment when we split 1216 * the message. 1217 * 1218 * FIXME: It may be fragmented into multiple chunks 1219 * at once if non-fragmentable extension headers 1220 * are too large. 1221 * --yoshfuji 1222 */ 1223 1224 inet->cork.length += length; 1225 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && 1226 (rt->u.dst.dev->features & NETIF_F_UFO)) { 1227 1228 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, 1229 fragheaderlen, transhdrlen, mtu, 1230 flags); 1231 if (err) 1232 goto error; 1233 return 0; 1234 } 1235 1236 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1237 goto alloc_new_skb; 1238 1239 while (length > 0) { 1240 /* Check if the remaining data fits into current packet. */ 1241 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1242 if (copy < length) 1243 copy = maxfraglen - skb->len; 1244 1245 if (copy <= 0) { 1246 char *data; 1247 unsigned int datalen; 1248 unsigned int fraglen; 1249 unsigned int fraggap; 1250 unsigned int alloclen; 1251 struct sk_buff *skb_prev; 1252 alloc_new_skb: 1253 skb_prev = skb; 1254 1255 /* There's no room in the current skb */ 1256 if (skb_prev) 1257 fraggap = skb_prev->len - maxfraglen; 1258 else 1259 fraggap = 0; 1260 1261 /* 1262 * If remaining data exceeds the mtu, 1263 * we know we need more fragment(s). 1264 */ 1265 datalen = length + fraggap; 1266 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1267 datalen = maxfraglen - fragheaderlen; 1268 1269 fraglen = datalen + fragheaderlen; 1270 if ((flags & MSG_MORE) && 1271 !(rt->u.dst.dev->features&NETIF_F_SG)) 1272 alloclen = mtu; 1273 else 1274 alloclen = datalen + fragheaderlen; 1275 1276 /* 1277 * The last fragment gets additional space at tail. 1278 * Note: we overallocate on fragments with MSG_MODE 1279 * because we have no idea if we're the last one. 1280 */ 1281 if (datalen == length + fraggap) 1282 alloclen += rt->u.dst.trailer_len; 1283 1284 /* 1285 * We just reserve space for fragment header. 1286 * Note: this may be overallocation if the message 1287 * (without MSG_MORE) fits into the MTU. 1288 */ 1289 alloclen += sizeof(struct frag_hdr); 1290 1291 if (transhdrlen) { 1292 skb = sock_alloc_send_skb(sk, 1293 alloclen + hh_len, 1294 (flags & MSG_DONTWAIT), &err); 1295 } else { 1296 skb = NULL; 1297 if (atomic_read(&sk->sk_wmem_alloc) <= 1298 2 * sk->sk_sndbuf) 1299 skb = sock_wmalloc(sk, 1300 alloclen + hh_len, 1, 1301 sk->sk_allocation); 1302 if (unlikely(skb == NULL)) 1303 err = -ENOBUFS; 1304 } 1305 if (skb == NULL) 1306 goto error; 1307 /* 1308 * Fill in the control structures 1309 */ 1310 skb->ip_summed = csummode; 1311 skb->csum = 0; 1312 /* reserve for fragmentation */ 1313 skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); 1314 1315 /* 1316 * Find where to start putting bytes 1317 */ 1318 data = skb_put(skb, fraglen); 1319 skb_set_network_header(skb, exthdrlen); 1320 data += fragheaderlen; 1321 skb->transport_header = (skb->network_header + 1322 fragheaderlen); 1323 if (fraggap) { 1324 skb->csum = skb_copy_and_csum_bits( 1325 skb_prev, maxfraglen, 1326 data + transhdrlen, fraggap, 0); 1327 skb_prev->csum = csum_sub(skb_prev->csum, 1328 skb->csum); 1329 data += fraggap; 1330 pskb_trim_unique(skb_prev, maxfraglen); 1331 } 1332 copy = datalen - transhdrlen - fraggap; 1333 if (copy < 0) { 1334 err = -EINVAL; 1335 kfree_skb(skb); 1336 goto error; 1337 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 1338 err = -EFAULT; 1339 kfree_skb(skb); 1340 goto error; 1341 } 1342 1343 offset += copy; 1344 length -= datalen - fraggap; 1345 transhdrlen = 0; 1346 exthdrlen = 0; 1347 csummode = CHECKSUM_NONE; 1348 1349 /* 1350 * Put the packet on the pending queue 1351 */ 1352 __skb_queue_tail(&sk->sk_write_queue, skb); 1353 continue; 1354 } 1355 1356 if (copy > length) 1357 copy = length; 1358 1359 if (!(rt->u.dst.dev->features&NETIF_F_SG)) { 1360 unsigned int off; 1361 1362 off = skb->len; 1363 if (getfrag(from, skb_put(skb, copy), 1364 offset, copy, off, skb) < 0) { 1365 __skb_trim(skb, off); 1366 err = -EFAULT; 1367 goto error; 1368 } 1369 } else { 1370 int i = skb_shinfo(skb)->nr_frags; 1371 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 1372 struct page *page = sk->sk_sndmsg_page; 1373 int off = sk->sk_sndmsg_off; 1374 unsigned int left; 1375 1376 if (page && (left = PAGE_SIZE - off) > 0) { 1377 if (copy >= left) 1378 copy = left; 1379 if (page != frag->page) { 1380 if (i == MAX_SKB_FRAGS) { 1381 err = -EMSGSIZE; 1382 goto error; 1383 } 1384 get_page(page); 1385 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); 1386 frag = &skb_shinfo(skb)->frags[i]; 1387 } 1388 } else if(i < MAX_SKB_FRAGS) { 1389 if (copy > PAGE_SIZE) 1390 copy = PAGE_SIZE; 1391 page = alloc_pages(sk->sk_allocation, 0); 1392 if (page == NULL) { 1393 err = -ENOMEM; 1394 goto error; 1395 } 1396 sk->sk_sndmsg_page = page; 1397 sk->sk_sndmsg_off = 0; 1398 1399 skb_fill_page_desc(skb, i, page, 0, 0); 1400 frag = &skb_shinfo(skb)->frags[i]; 1401 } else { 1402 err = -EMSGSIZE; 1403 goto error; 1404 } 1405 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { 1406 err = -EFAULT; 1407 goto error; 1408 } 1409 sk->sk_sndmsg_off += copy; 1410 frag->size += copy; 1411 skb->len += copy; 1412 skb->data_len += copy; 1413 skb->truesize += copy; 1414 atomic_add(copy, &sk->sk_wmem_alloc); 1415 } 1416 offset += copy; 1417 length -= copy; 1418 } 1419 return 0; 1420 error: 1421 inet->cork.length -= length; 1422 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1423 return err; 1424 } 1425 1426 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) 1427 { 1428 if (np->cork.opt) { 1429 kfree(np->cork.opt->dst0opt); 1430 kfree(np->cork.opt->dst1opt); 1431 kfree(np->cork.opt->hopopt); 1432 kfree(np->cork.opt->srcrt); 1433 kfree(np->cork.opt); 1434 np->cork.opt = NULL; 1435 } 1436 1437 if (inet->cork.dst) { 1438 dst_release(inet->cork.dst); 1439 inet->cork.dst = NULL; 1440 inet->cork.flags &= ~IPCORK_ALLFRAG; 1441 } 1442 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); 1443 } 1444 1445 int ip6_push_pending_frames(struct sock *sk) 1446 { 1447 struct sk_buff *skb, *tmp_skb; 1448 struct sk_buff **tail_skb; 1449 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1450 struct inet_sock *inet = inet_sk(sk); 1451 struct ipv6_pinfo *np = inet6_sk(sk); 1452 struct net *net = sock_net(sk); 1453 struct ipv6hdr *hdr; 1454 struct ipv6_txoptions *opt = np->cork.opt; 1455 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; 1456 struct flowi *fl = &inet->cork.fl; 1457 unsigned char proto = fl->proto; 1458 int err = 0; 1459 1460 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1461 goto out; 1462 tail_skb = &(skb_shinfo(skb)->frag_list); 1463 1464 /* move skb->data to ip header from ext header */ 1465 if (skb->data < skb_network_header(skb)) 1466 __skb_pull(skb, skb_network_offset(skb)); 1467 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1468 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1469 *tail_skb = tmp_skb; 1470 tail_skb = &(tmp_skb->next); 1471 skb->len += tmp_skb->len; 1472 skb->data_len += tmp_skb->len; 1473 skb->truesize += tmp_skb->truesize; 1474 tmp_skb->destructor = NULL; 1475 tmp_skb->sk = NULL; 1476 } 1477 1478 /* Allow local fragmentation. */ 1479 if (np->pmtudisc < IPV6_PMTUDISC_DO) 1480 skb->local_df = 1; 1481 1482 ipv6_addr_copy(final_dst, &fl->fl6_dst); 1483 __skb_pull(skb, skb_network_header_len(skb)); 1484 if (opt && opt->opt_flen) 1485 ipv6_push_frag_opts(skb, opt, &proto); 1486 if (opt && opt->opt_nflen) 1487 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 1488 1489 skb_push(skb, sizeof(struct ipv6hdr)); 1490 skb_reset_network_header(skb); 1491 hdr = ipv6_hdr(skb); 1492 1493 *(__be32*)hdr = fl->fl6_flowlabel | 1494 htonl(0x60000000 | ((int)np->cork.tclass << 20)); 1495 1496 hdr->hop_limit = np->cork.hop_limit; 1497 hdr->nexthdr = proto; 1498 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 1499 ipv6_addr_copy(&hdr->daddr, final_dst); 1500 1501 skb->priority = sk->sk_priority; 1502 skb->mark = sk->sk_mark; 1503 1504 skb_dst_set(skb, dst_clone(&rt->u.dst)); 1505 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1506 if (proto == IPPROTO_ICMPV6) { 1507 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1508 1509 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); 1510 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); 1511 } 1512 1513 err = ip6_local_out(skb); 1514 if (err) { 1515 if (err > 0) 1516 err = net_xmit_errno(err); 1517 if (err) 1518 goto error; 1519 } 1520 1521 out: 1522 ip6_cork_release(inet, np); 1523 return err; 1524 error: 1525 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1526 goto out; 1527 } 1528 1529 void ip6_flush_pending_frames(struct sock *sk) 1530 { 1531 struct sk_buff *skb; 1532 1533 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { 1534 if (skb_dst(skb)) 1535 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1536 IPSTATS_MIB_OUTDISCARDS); 1537 kfree_skb(skb); 1538 } 1539 1540 ip6_cork_release(inet_sk(sk), inet6_sk(sk)); 1541 } 1542