1 /* 2 * IPv6 output functions 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * Based on linux/net/ipv4/ip_output.c 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 * 15 * Changes: 16 * A.N.Kuznetsov : airthmetics in fragmentation. 17 * extension headers are implemented. 18 * route changes now work. 19 * ip6_forward does not confuse sniffers. 20 * etc. 21 * 22 * H. von Brand : Added missing #include <linux/string.h> 23 * Imran Patel : frag id should be in NBO 24 * Kazunori MIYAZAWA @USAGI 25 * : add ip6_append_data and related functions 26 * for datagram xmit 27 */ 28 29 #include <linux/errno.h> 30 #include <linux/kernel.h> 31 #include <linux/string.h> 32 #include <linux/socket.h> 33 #include <linux/net.h> 34 #include <linux/netdevice.h> 35 #include <linux/if_arp.h> 36 #include <linux/in6.h> 37 #include <linux/tcp.h> 38 #include <linux/route.h> 39 #include <linux/module.h> 40 #include <linux/slab.h> 41 42 #include <linux/netfilter.h> 43 #include <linux/netfilter_ipv6.h> 44 45 #include <net/sock.h> 46 #include <net/snmp.h> 47 48 #include <net/ipv6.h> 49 #include <net/ndisc.h> 50 #include <net/protocol.h> 51 #include <net/ip6_route.h> 52 #include <net/addrconf.h> 53 #include <net/rawv6.h> 54 #include <net/icmp.h> 55 #include <net/xfrm.h> 56 #include <net/checksum.h> 57 #include <linux/mroute6.h> 58 59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); 60 61 int __ip6_local_out(struct sk_buff *skb) 62 { 63 int len; 64 65 len = skb->len - sizeof(struct ipv6hdr); 66 if (len > IPV6_MAXPLEN) 67 len = 0; 68 ipv6_hdr(skb)->payload_len = htons(len); 69 70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 71 skb_dst(skb)->dev, dst_output); 72 } 73 74 int ip6_local_out(struct sk_buff *skb) 75 { 76 int err; 77 78 err = __ip6_local_out(skb); 79 if (likely(err == 1)) 80 err = dst_output(skb); 81 82 return err; 83 } 84 EXPORT_SYMBOL_GPL(ip6_local_out); 85 86 /* dev_loopback_xmit for use with netfilter. */ 87 static int ip6_dev_loopback_xmit(struct sk_buff *newskb) 88 { 89 skb_reset_mac_header(newskb); 90 __skb_pull(newskb, skb_network_offset(newskb)); 91 newskb->pkt_type = PACKET_LOOPBACK; 92 newskb->ip_summed = CHECKSUM_UNNECESSARY; 93 WARN_ON(!skb_dst(newskb)); 94 95 netif_rx_ni(newskb); 96 return 0; 97 } 98 99 static int ip6_finish_output2(struct sk_buff *skb) 100 { 101 struct dst_entry *dst = skb_dst(skb); 102 struct net_device *dev = dst->dev; 103 struct neighbour *neigh; 104 105 skb->protocol = htons(ETH_P_IPV6); 106 skb->dev = dev; 107 108 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 109 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 110 111 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && 112 ((mroute6_socket(dev_net(dev), skb) && 113 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 114 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 115 &ipv6_hdr(skb)->saddr))) { 116 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 117 118 /* Do not check for IFF_ALLMULTI; multicast routing 119 is not supported in any case. 120 */ 121 if (newskb) 122 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 123 newskb, NULL, newskb->dev, 124 ip6_dev_loopback_xmit); 125 126 if (ipv6_hdr(skb)->hop_limit == 0) { 127 IP6_INC_STATS(dev_net(dev), idev, 128 IPSTATS_MIB_OUTDISCARDS); 129 kfree_skb(skb); 130 return 0; 131 } 132 } 133 134 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, 135 skb->len); 136 } 137 138 rcu_read_lock(); 139 neigh = dst_get_neighbour_noref(dst); 140 if (neigh) { 141 int res = neigh_output(neigh, skb); 142 143 rcu_read_unlock(); 144 return res; 145 } 146 rcu_read_unlock(); 147 IP6_INC_STATS_BH(dev_net(dst->dev), 148 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 149 kfree_skb(skb); 150 return -EINVAL; 151 } 152 153 static int ip6_finish_output(struct sk_buff *skb) 154 { 155 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 156 dst_allfrag(skb_dst(skb))) 157 return ip6_fragment(skb, ip6_finish_output2); 158 else 159 return ip6_finish_output2(skb); 160 } 161 162 int ip6_output(struct sk_buff *skb) 163 { 164 struct net_device *dev = skb_dst(skb)->dev; 165 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 166 if (unlikely(idev->cnf.disable_ipv6)) { 167 IP6_INC_STATS(dev_net(dev), idev, 168 IPSTATS_MIB_OUTDISCARDS); 169 kfree_skb(skb); 170 return 0; 171 } 172 173 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, 174 ip6_finish_output, 175 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 176 } 177 178 /* 179 * xmit an sk_buff (used by TCP, SCTP and DCCP) 180 */ 181 182 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 183 struct ipv6_txoptions *opt, int tclass) 184 { 185 struct net *net = sock_net(sk); 186 struct ipv6_pinfo *np = inet6_sk(sk); 187 struct in6_addr *first_hop = &fl6->daddr; 188 struct dst_entry *dst = skb_dst(skb); 189 struct ipv6hdr *hdr; 190 u8 proto = fl6->flowi6_proto; 191 int seg_len = skb->len; 192 int hlimit = -1; 193 u32 mtu; 194 195 if (opt) { 196 unsigned int head_room; 197 198 /* First: exthdrs may take lots of space (~8K for now) 199 MAX_HEADER is not enough. 200 */ 201 head_room = opt->opt_nflen + opt->opt_flen; 202 seg_len += head_room; 203 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 204 205 if (skb_headroom(skb) < head_room) { 206 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 207 if (skb2 == NULL) { 208 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 209 IPSTATS_MIB_OUTDISCARDS); 210 kfree_skb(skb); 211 return -ENOBUFS; 212 } 213 consume_skb(skb); 214 skb = skb2; 215 skb_set_owner_w(skb, sk); 216 } 217 if (opt->opt_flen) 218 ipv6_push_frag_opts(skb, opt, &proto); 219 if (opt->opt_nflen) 220 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 221 } 222 223 skb_push(skb, sizeof(struct ipv6hdr)); 224 skb_reset_network_header(skb); 225 hdr = ipv6_hdr(skb); 226 227 /* 228 * Fill in the IPv6 header 229 */ 230 if (np) 231 hlimit = np->hop_limit; 232 if (hlimit < 0) 233 hlimit = ip6_dst_hoplimit(dst); 234 235 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel; 236 237 hdr->payload_len = htons(seg_len); 238 hdr->nexthdr = proto; 239 hdr->hop_limit = hlimit; 240 241 hdr->saddr = fl6->saddr; 242 hdr->daddr = *first_hop; 243 244 skb->priority = sk->sk_priority; 245 skb->mark = sk->sk_mark; 246 247 mtu = dst_mtu(dst); 248 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { 249 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 250 IPSTATS_MIB_OUT, skb->len); 251 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 252 dst->dev, dst_output); 253 } 254 255 net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n"); 256 skb->dev = dst->dev; 257 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 258 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 259 kfree_skb(skb); 260 return -EMSGSIZE; 261 } 262 263 EXPORT_SYMBOL(ip6_xmit); 264 265 /* 266 * To avoid extra problems ND packets are send through this 267 * routine. It's code duplication but I really want to avoid 268 * extra checks since ipv6_build_header is used by TCP (which 269 * is for us performance critical) 270 */ 271 272 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, 273 const struct in6_addr *saddr, const struct in6_addr *daddr, 274 int proto, int len) 275 { 276 struct ipv6_pinfo *np = inet6_sk(sk); 277 struct ipv6hdr *hdr; 278 279 skb->protocol = htons(ETH_P_IPV6); 280 skb->dev = dev; 281 282 skb_reset_network_header(skb); 283 skb_put(skb, sizeof(struct ipv6hdr)); 284 hdr = ipv6_hdr(skb); 285 286 *(__be32*)hdr = htonl(0x60000000); 287 288 hdr->payload_len = htons(len); 289 hdr->nexthdr = proto; 290 hdr->hop_limit = np->hop_limit; 291 292 hdr->saddr = *saddr; 293 hdr->daddr = *daddr; 294 295 return 0; 296 } 297 298 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 299 { 300 struct ip6_ra_chain *ra; 301 struct sock *last = NULL; 302 303 read_lock(&ip6_ra_lock); 304 for (ra = ip6_ra_chain; ra; ra = ra->next) { 305 struct sock *sk = ra->sk; 306 if (sk && ra->sel == sel && 307 (!sk->sk_bound_dev_if || 308 sk->sk_bound_dev_if == skb->dev->ifindex)) { 309 if (last) { 310 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 311 if (skb2) 312 rawv6_rcv(last, skb2); 313 } 314 last = sk; 315 } 316 } 317 318 if (last) { 319 rawv6_rcv(last, skb); 320 read_unlock(&ip6_ra_lock); 321 return 1; 322 } 323 read_unlock(&ip6_ra_lock); 324 return 0; 325 } 326 327 static int ip6_forward_proxy_check(struct sk_buff *skb) 328 { 329 struct ipv6hdr *hdr = ipv6_hdr(skb); 330 u8 nexthdr = hdr->nexthdr; 331 __be16 frag_off; 332 int offset; 333 334 if (ipv6_ext_hdr(nexthdr)) { 335 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 336 if (offset < 0) 337 return 0; 338 } else 339 offset = sizeof(struct ipv6hdr); 340 341 if (nexthdr == IPPROTO_ICMPV6) { 342 struct icmp6hdr *icmp6; 343 344 if (!pskb_may_pull(skb, (skb_network_header(skb) + 345 offset + 1 - skb->data))) 346 return 0; 347 348 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 349 350 switch (icmp6->icmp6_type) { 351 case NDISC_ROUTER_SOLICITATION: 352 case NDISC_ROUTER_ADVERTISEMENT: 353 case NDISC_NEIGHBOUR_SOLICITATION: 354 case NDISC_NEIGHBOUR_ADVERTISEMENT: 355 case NDISC_REDIRECT: 356 /* For reaction involving unicast neighbor discovery 357 * message destined to the proxied address, pass it to 358 * input function. 359 */ 360 return 1; 361 default: 362 break; 363 } 364 } 365 366 /* 367 * The proxying router can't forward traffic sent to a link-local 368 * address, so signal the sender and discard the packet. This 369 * behavior is clarified by the MIPv6 specification. 370 */ 371 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 372 dst_link_failure(skb); 373 return -1; 374 } 375 376 return 0; 377 } 378 379 static inline int ip6_forward_finish(struct sk_buff *skb) 380 { 381 return dst_output(skb); 382 } 383 384 int ip6_forward(struct sk_buff *skb) 385 { 386 struct dst_entry *dst = skb_dst(skb); 387 struct ipv6hdr *hdr = ipv6_hdr(skb); 388 struct inet6_skb_parm *opt = IP6CB(skb); 389 struct net *net = dev_net(dst->dev); 390 u32 mtu; 391 392 if (net->ipv6.devconf_all->forwarding == 0) 393 goto error; 394 395 if (skb_warn_if_lro(skb)) 396 goto drop; 397 398 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 399 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 400 goto drop; 401 } 402 403 if (skb->pkt_type != PACKET_HOST) 404 goto drop; 405 406 skb_forward_csum(skb); 407 408 /* 409 * We DO NOT make any processing on 410 * RA packets, pushing them to user level AS IS 411 * without ane WARRANTY that application will be able 412 * to interpret them. The reason is that we 413 * cannot make anything clever here. 414 * 415 * We are not end-node, so that if packet contains 416 * AH/ESP, we cannot make anything. 417 * Defragmentation also would be mistake, RA packets 418 * cannot be fragmented, because there is no warranty 419 * that different fragments will go along one path. --ANK 420 */ 421 if (opt->ra) { 422 u8 *ptr = skb_network_header(skb) + opt->ra; 423 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) 424 return 0; 425 } 426 427 /* 428 * check and decrement ttl 429 */ 430 if (hdr->hop_limit <= 1) { 431 /* Force OUTPUT device used as source address */ 432 skb->dev = dst->dev; 433 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 434 IP6_INC_STATS_BH(net, 435 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); 436 437 kfree_skb(skb); 438 return -ETIMEDOUT; 439 } 440 441 /* XXX: idev->cnf.proxy_ndp? */ 442 if (net->ipv6.devconf_all->proxy_ndp && 443 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 444 int proxied = ip6_forward_proxy_check(skb); 445 if (proxied > 0) 446 return ip6_input(skb); 447 else if (proxied < 0) { 448 IP6_INC_STATS(net, ip6_dst_idev(dst), 449 IPSTATS_MIB_INDISCARDS); 450 goto drop; 451 } 452 } 453 454 if (!xfrm6_route_forward(skb)) { 455 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 456 goto drop; 457 } 458 dst = skb_dst(skb); 459 460 /* IPv6 specs say nothing about it, but it is clear that we cannot 461 send redirects to source routed frames. 462 We don't send redirects to frames decapsulated from IPsec. 463 */ 464 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) { 465 struct in6_addr *target = NULL; 466 struct rt6_info *rt; 467 468 /* 469 * incoming and outgoing devices are the same 470 * send a redirect. 471 */ 472 473 rt = (struct rt6_info *) dst; 474 if (rt->rt6i_flags & RTF_GATEWAY) 475 target = &rt->rt6i_gateway; 476 else 477 target = &hdr->daddr; 478 479 if (!rt->rt6i_peer) 480 rt6_bind_peer(rt, 1); 481 482 /* Limit redirects both by destination (here) 483 and by source (inside ndisc_send_redirect) 484 */ 485 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) 486 ndisc_send_redirect(skb, target); 487 } else { 488 int addrtype = ipv6_addr_type(&hdr->saddr); 489 490 /* This check is security critical. */ 491 if (addrtype == IPV6_ADDR_ANY || 492 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 493 goto error; 494 if (addrtype & IPV6_ADDR_LINKLOCAL) { 495 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 496 ICMPV6_NOT_NEIGHBOUR, 0); 497 goto error; 498 } 499 } 500 501 mtu = dst_mtu(dst); 502 if (mtu < IPV6_MIN_MTU) 503 mtu = IPV6_MIN_MTU; 504 505 if (skb->len > mtu && !skb_is_gso(skb)) { 506 /* Again, force OUTPUT device used as source address */ 507 skb->dev = dst->dev; 508 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 509 IP6_INC_STATS_BH(net, 510 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); 511 IP6_INC_STATS_BH(net, 512 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); 513 kfree_skb(skb); 514 return -EMSGSIZE; 515 } 516 517 if (skb_cow(skb, dst->dev->hard_header_len)) { 518 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); 519 goto drop; 520 } 521 522 hdr = ipv6_hdr(skb); 523 524 /* Mangling hops number delayed to point after skb COW */ 525 526 hdr->hop_limit--; 527 528 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 529 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, 530 ip6_forward_finish); 531 532 error: 533 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 534 drop: 535 kfree_skb(skb); 536 return -EINVAL; 537 } 538 539 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 540 { 541 to->pkt_type = from->pkt_type; 542 to->priority = from->priority; 543 to->protocol = from->protocol; 544 skb_dst_drop(to); 545 skb_dst_set(to, dst_clone(skb_dst(from))); 546 to->dev = from->dev; 547 to->mark = from->mark; 548 549 #ifdef CONFIG_NET_SCHED 550 to->tc_index = from->tc_index; 551 #endif 552 nf_copy(to, from); 553 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 554 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 555 to->nf_trace = from->nf_trace; 556 #endif 557 skb_copy_secmark(to, from); 558 } 559 560 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) 561 { 562 u16 offset = sizeof(struct ipv6hdr); 563 struct ipv6_opt_hdr *exthdr = 564 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); 565 unsigned int packet_len = skb->tail - skb->network_header; 566 int found_rhdr = 0; 567 *nexthdr = &ipv6_hdr(skb)->nexthdr; 568 569 while (offset + 1 <= packet_len) { 570 571 switch (**nexthdr) { 572 573 case NEXTHDR_HOP: 574 break; 575 case NEXTHDR_ROUTING: 576 found_rhdr = 1; 577 break; 578 case NEXTHDR_DEST: 579 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 580 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) 581 break; 582 #endif 583 if (found_rhdr) 584 return offset; 585 break; 586 default : 587 return offset; 588 } 589 590 offset += ipv6_optlen(exthdr); 591 *nexthdr = &exthdr->nexthdr; 592 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + 593 offset); 594 } 595 596 return offset; 597 } 598 599 void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) 600 { 601 static atomic_t ipv6_fragmentation_id; 602 int old, new; 603 604 if (rt && !(rt->dst.flags & DST_NOPEER)) { 605 struct inet_peer *peer; 606 607 if (!rt->rt6i_peer) 608 rt6_bind_peer(rt, 1); 609 peer = rt->rt6i_peer; 610 if (peer) { 611 fhdr->identification = htonl(inet_getid(peer, 0)); 612 return; 613 } 614 } 615 do { 616 old = atomic_read(&ipv6_fragmentation_id); 617 new = old + 1; 618 if (!new) 619 new = 1; 620 } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); 621 fhdr->identification = htonl(new); 622 } 623 624 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 625 { 626 struct sk_buff *frag; 627 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); 628 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 629 struct ipv6hdr *tmp_hdr; 630 struct frag_hdr *fh; 631 unsigned int mtu, hlen, left, len; 632 int hroom, troom; 633 __be32 frag_id = 0; 634 int ptr, offset = 0, err=0; 635 u8 *prevhdr, nexthdr = 0; 636 struct net *net = dev_net(skb_dst(skb)->dev); 637 638 hlen = ip6_find_1stfragopt(skb, &prevhdr); 639 nexthdr = *prevhdr; 640 641 mtu = ip6_skb_dst_mtu(skb); 642 643 /* We must not fragment if the socket is set to force MTU discovery 644 * or if the skb it not generated by a local socket. 645 */ 646 if (!skb->local_df && skb->len > mtu) { 647 skb->dev = skb_dst(skb)->dev; 648 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 649 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 650 IPSTATS_MIB_FRAGFAILS); 651 kfree_skb(skb); 652 return -EMSGSIZE; 653 } 654 655 if (np && np->frag_size < mtu) { 656 if (np->frag_size) 657 mtu = np->frag_size; 658 } 659 mtu -= hlen + sizeof(struct frag_hdr); 660 661 if (skb_has_frag_list(skb)) { 662 int first_len = skb_pagelen(skb); 663 struct sk_buff *frag2; 664 665 if (first_len - hlen > mtu || 666 ((first_len - hlen) & 7) || 667 skb_cloned(skb)) 668 goto slow_path; 669 670 skb_walk_frags(skb, frag) { 671 /* Correct geometry. */ 672 if (frag->len > mtu || 673 ((frag->len & 7) && frag->next) || 674 skb_headroom(frag) < hlen) 675 goto slow_path_clean; 676 677 /* Partially cloned skb? */ 678 if (skb_shared(frag)) 679 goto slow_path_clean; 680 681 BUG_ON(frag->sk); 682 if (skb->sk) { 683 frag->sk = skb->sk; 684 frag->destructor = sock_wfree; 685 } 686 skb->truesize -= frag->truesize; 687 } 688 689 err = 0; 690 offset = 0; 691 frag = skb_shinfo(skb)->frag_list; 692 skb_frag_list_init(skb); 693 /* BUILD HEADER */ 694 695 *prevhdr = NEXTHDR_FRAGMENT; 696 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 697 if (!tmp_hdr) { 698 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 699 IPSTATS_MIB_FRAGFAILS); 700 return -ENOMEM; 701 } 702 703 __skb_pull(skb, hlen); 704 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); 705 __skb_push(skb, hlen); 706 skb_reset_network_header(skb); 707 memcpy(skb_network_header(skb), tmp_hdr, hlen); 708 709 ipv6_select_ident(fh, rt); 710 fh->nexthdr = nexthdr; 711 fh->reserved = 0; 712 fh->frag_off = htons(IP6_MF); 713 frag_id = fh->identification; 714 715 first_len = skb_pagelen(skb); 716 skb->data_len = first_len - skb_headlen(skb); 717 skb->len = first_len; 718 ipv6_hdr(skb)->payload_len = htons(first_len - 719 sizeof(struct ipv6hdr)); 720 721 dst_hold(&rt->dst); 722 723 for (;;) { 724 /* Prepare header of the next frame, 725 * before previous one went down. */ 726 if (frag) { 727 frag->ip_summed = CHECKSUM_NONE; 728 skb_reset_transport_header(frag); 729 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); 730 __skb_push(frag, hlen); 731 skb_reset_network_header(frag); 732 memcpy(skb_network_header(frag), tmp_hdr, 733 hlen); 734 offset += skb->len - hlen - sizeof(struct frag_hdr); 735 fh->nexthdr = nexthdr; 736 fh->reserved = 0; 737 fh->frag_off = htons(offset); 738 if (frag->next != NULL) 739 fh->frag_off |= htons(IP6_MF); 740 fh->identification = frag_id; 741 ipv6_hdr(frag)->payload_len = 742 htons(frag->len - 743 sizeof(struct ipv6hdr)); 744 ip6_copy_metadata(frag, skb); 745 } 746 747 err = output(skb); 748 if(!err) 749 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 750 IPSTATS_MIB_FRAGCREATES); 751 752 if (err || !frag) 753 break; 754 755 skb = frag; 756 frag = skb->next; 757 skb->next = NULL; 758 } 759 760 kfree(tmp_hdr); 761 762 if (err == 0) { 763 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 764 IPSTATS_MIB_FRAGOKS); 765 dst_release(&rt->dst); 766 return 0; 767 } 768 769 while (frag) { 770 skb = frag->next; 771 kfree_skb(frag); 772 frag = skb; 773 } 774 775 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 776 IPSTATS_MIB_FRAGFAILS); 777 dst_release(&rt->dst); 778 return err; 779 780 slow_path_clean: 781 skb_walk_frags(skb, frag2) { 782 if (frag2 == frag) 783 break; 784 frag2->sk = NULL; 785 frag2->destructor = NULL; 786 skb->truesize += frag2->truesize; 787 } 788 } 789 790 slow_path: 791 left = skb->len - hlen; /* Space per frame */ 792 ptr = hlen; /* Where to start from */ 793 794 /* 795 * Fragment the datagram. 796 */ 797 798 *prevhdr = NEXTHDR_FRAGMENT; 799 hroom = LL_RESERVED_SPACE(rt->dst.dev); 800 troom = rt->dst.dev->needed_tailroom; 801 802 /* 803 * Keep copying data until we run out. 804 */ 805 while(left > 0) { 806 len = left; 807 /* IF: it doesn't fit, use 'mtu' - the data space left */ 808 if (len > mtu) 809 len = mtu; 810 /* IF: we are not sending up to and including the packet end 811 then align the next start on an eight byte boundary */ 812 if (len < left) { 813 len &= ~7; 814 } 815 /* 816 * Allocate buffer. 817 */ 818 819 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + 820 hroom + troom, GFP_ATOMIC)) == NULL) { 821 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); 822 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 823 IPSTATS_MIB_FRAGFAILS); 824 err = -ENOMEM; 825 goto fail; 826 } 827 828 /* 829 * Set up data on packet 830 */ 831 832 ip6_copy_metadata(frag, skb); 833 skb_reserve(frag, hroom); 834 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 835 skb_reset_network_header(frag); 836 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 837 frag->transport_header = (frag->network_header + hlen + 838 sizeof(struct frag_hdr)); 839 840 /* 841 * Charge the memory for the fragment to any owner 842 * it might possess 843 */ 844 if (skb->sk) 845 skb_set_owner_w(frag, skb->sk); 846 847 /* 848 * Copy the packet header into the new buffer. 849 */ 850 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); 851 852 /* 853 * Build fragment header. 854 */ 855 fh->nexthdr = nexthdr; 856 fh->reserved = 0; 857 if (!frag_id) { 858 ipv6_select_ident(fh, rt); 859 frag_id = fh->identification; 860 } else 861 fh->identification = frag_id; 862 863 /* 864 * Copy a block of the IP datagram. 865 */ 866 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) 867 BUG(); 868 left -= len; 869 870 fh->frag_off = htons(offset); 871 if (left > 0) 872 fh->frag_off |= htons(IP6_MF); 873 ipv6_hdr(frag)->payload_len = htons(frag->len - 874 sizeof(struct ipv6hdr)); 875 876 ptr += len; 877 offset += len; 878 879 /* 880 * Put this fragment into the sending queue. 881 */ 882 err = output(frag); 883 if (err) 884 goto fail; 885 886 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 887 IPSTATS_MIB_FRAGCREATES); 888 } 889 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 890 IPSTATS_MIB_FRAGOKS); 891 consume_skb(skb); 892 return err; 893 894 fail: 895 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 896 IPSTATS_MIB_FRAGFAILS); 897 kfree_skb(skb); 898 return err; 899 } 900 901 static inline int ip6_rt_check(const struct rt6key *rt_key, 902 const struct in6_addr *fl_addr, 903 const struct in6_addr *addr_cache) 904 { 905 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 906 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); 907 } 908 909 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 910 struct dst_entry *dst, 911 const struct flowi6 *fl6) 912 { 913 struct ipv6_pinfo *np = inet6_sk(sk); 914 struct rt6_info *rt = (struct rt6_info *)dst; 915 916 if (!dst) 917 goto out; 918 919 /* Yes, checking route validity in not connected 920 * case is not very simple. Take into account, 921 * that we do not support routing by source, TOS, 922 * and MSG_DONTROUTE --ANK (980726) 923 * 924 * 1. ip6_rt_check(): If route was host route, 925 * check that cached destination is current. 926 * If it is network route, we still may 927 * check its validity using saved pointer 928 * to the last used address: daddr_cache. 929 * We do not want to save whole address now, 930 * (because main consumer of this service 931 * is tcp, which has not this problem), 932 * so that the last trick works only on connected 933 * sockets. 934 * 2. oif also should be the same. 935 */ 936 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 937 #ifdef CONFIG_IPV6_SUBTREES 938 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 939 #endif 940 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { 941 dst_release(dst); 942 dst = NULL; 943 } 944 945 out: 946 return dst; 947 } 948 949 static int ip6_dst_lookup_tail(struct sock *sk, 950 struct dst_entry **dst, struct flowi6 *fl6) 951 { 952 struct net *net = sock_net(sk); 953 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 954 struct neighbour *n; 955 #endif 956 int err; 957 958 if (*dst == NULL) 959 *dst = ip6_route_output(net, sk, fl6); 960 961 if ((err = (*dst)->error)) 962 goto out_err_release; 963 964 if (ipv6_addr_any(&fl6->saddr)) { 965 struct rt6_info *rt = (struct rt6_info *) *dst; 966 err = ip6_route_get_saddr(net, rt, &fl6->daddr, 967 sk ? inet6_sk(sk)->srcprefs : 0, 968 &fl6->saddr); 969 if (err) 970 goto out_err_release; 971 } 972 973 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 974 /* 975 * Here if the dst entry we've looked up 976 * has a neighbour entry that is in the INCOMPLETE 977 * state and the src address from the flow is 978 * marked as OPTIMISTIC, we release the found 979 * dst entry and replace it instead with the 980 * dst entry of the nexthop router 981 */ 982 rcu_read_lock(); 983 n = dst_get_neighbour_noref(*dst); 984 if (n && !(n->nud_state & NUD_VALID)) { 985 struct inet6_ifaddr *ifp; 986 struct flowi6 fl_gw6; 987 int redirect; 988 989 rcu_read_unlock(); 990 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 991 (*dst)->dev, 1); 992 993 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 994 if (ifp) 995 in6_ifa_put(ifp); 996 997 if (redirect) { 998 /* 999 * We need to get the dst entry for the 1000 * default router instead 1001 */ 1002 dst_release(*dst); 1003 memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 1004 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 1005 *dst = ip6_route_output(net, sk, &fl_gw6); 1006 if ((err = (*dst)->error)) 1007 goto out_err_release; 1008 } 1009 } else { 1010 rcu_read_unlock(); 1011 } 1012 #endif 1013 1014 return 0; 1015 1016 out_err_release: 1017 if (err == -ENETUNREACH) 1018 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); 1019 dst_release(*dst); 1020 *dst = NULL; 1021 return err; 1022 } 1023 1024 /** 1025 * ip6_dst_lookup - perform route lookup on flow 1026 * @sk: socket which provides route info 1027 * @dst: pointer to dst_entry * for result 1028 * @fl6: flow to lookup 1029 * 1030 * This function performs a route lookup on the given flow. 1031 * 1032 * It returns zero on success, or a standard errno code on error. 1033 */ 1034 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) 1035 { 1036 *dst = NULL; 1037 return ip6_dst_lookup_tail(sk, dst, fl6); 1038 } 1039 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1040 1041 /** 1042 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 1043 * @sk: socket which provides route info 1044 * @fl6: flow to lookup 1045 * @final_dst: final destination address for ipsec lookup 1046 * @can_sleep: we are in a sleepable context 1047 * 1048 * This function performs a route lookup on the given flow. 1049 * 1050 * It returns a valid dst pointer on success, or a pointer encoded 1051 * error code. 1052 */ 1053 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1054 const struct in6_addr *final_dst, 1055 bool can_sleep) 1056 { 1057 struct dst_entry *dst = NULL; 1058 int err; 1059 1060 err = ip6_dst_lookup_tail(sk, &dst, fl6); 1061 if (err) 1062 return ERR_PTR(err); 1063 if (final_dst) 1064 fl6->daddr = *final_dst; 1065 if (can_sleep) 1066 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; 1067 1068 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); 1069 } 1070 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 1071 1072 /** 1073 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 1074 * @sk: socket which provides the dst cache and route info 1075 * @fl6: flow to lookup 1076 * @final_dst: final destination address for ipsec lookup 1077 * @can_sleep: we are in a sleepable context 1078 * 1079 * This function performs a route lookup on the given flow with the 1080 * possibility of using the cached route in the socket if it is valid. 1081 * It will take the socket dst lock when operating on the dst cache. 1082 * As a result, this function can only be used in process context. 1083 * 1084 * It returns a valid dst pointer on success, or a pointer encoded 1085 * error code. 1086 */ 1087 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1088 const struct in6_addr *final_dst, 1089 bool can_sleep) 1090 { 1091 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1092 int err; 1093 1094 dst = ip6_sk_dst_check(sk, dst, fl6); 1095 1096 err = ip6_dst_lookup_tail(sk, &dst, fl6); 1097 if (err) 1098 return ERR_PTR(err); 1099 if (final_dst) 1100 fl6->daddr = *final_dst; 1101 if (can_sleep) 1102 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; 1103 1104 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); 1105 } 1106 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1107 1108 static inline int ip6_ufo_append_data(struct sock *sk, 1109 int getfrag(void *from, char *to, int offset, int len, 1110 int odd, struct sk_buff *skb), 1111 void *from, int length, int hh_len, int fragheaderlen, 1112 int transhdrlen, int mtu,unsigned int flags, 1113 struct rt6_info *rt) 1114 1115 { 1116 struct sk_buff *skb; 1117 int err; 1118 1119 /* There is support for UDP large send offload by network 1120 * device, so create one single skb packet containing complete 1121 * udp datagram 1122 */ 1123 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 1124 skb = sock_alloc_send_skb(sk, 1125 hh_len + fragheaderlen + transhdrlen + 20, 1126 (flags & MSG_DONTWAIT), &err); 1127 if (skb == NULL) 1128 return err; 1129 1130 /* reserve space for Hardware header */ 1131 skb_reserve(skb, hh_len); 1132 1133 /* create space for UDP/IP header */ 1134 skb_put(skb,fragheaderlen + transhdrlen); 1135 1136 /* initialize network header pointer */ 1137 skb_reset_network_header(skb); 1138 1139 /* initialize protocol header pointer */ 1140 skb->transport_header = skb->network_header + fragheaderlen; 1141 1142 skb->ip_summed = CHECKSUM_PARTIAL; 1143 skb->csum = 0; 1144 } 1145 1146 err = skb_append_datato_frags(sk,skb, getfrag, from, 1147 (length - transhdrlen)); 1148 if (!err) { 1149 struct frag_hdr fhdr; 1150 1151 /* Specify the length of each IPv6 datagram fragment. 1152 * It has to be a multiple of 8. 1153 */ 1154 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - 1155 sizeof(struct frag_hdr)) & ~7; 1156 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1157 ipv6_select_ident(&fhdr, rt); 1158 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1159 __skb_queue_tail(&sk->sk_write_queue, skb); 1160 1161 return 0; 1162 } 1163 /* There is not enough support do UPD LSO, 1164 * so follow normal path 1165 */ 1166 kfree_skb(skb); 1167 1168 return err; 1169 } 1170 1171 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1172 gfp_t gfp) 1173 { 1174 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1175 } 1176 1177 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1178 gfp_t gfp) 1179 { 1180 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1181 } 1182 1183 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 1184 int offset, int len, int odd, struct sk_buff *skb), 1185 void *from, int length, int transhdrlen, 1186 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, 1187 struct rt6_info *rt, unsigned int flags, int dontfrag) 1188 { 1189 struct inet_sock *inet = inet_sk(sk); 1190 struct ipv6_pinfo *np = inet6_sk(sk); 1191 struct inet_cork *cork; 1192 struct sk_buff *skb; 1193 unsigned int maxfraglen, fragheaderlen; 1194 int exthdrlen; 1195 int dst_exthdrlen; 1196 int hh_len; 1197 int mtu; 1198 int copy; 1199 int err; 1200 int offset = 0; 1201 int csummode = CHECKSUM_NONE; 1202 __u8 tx_flags = 0; 1203 1204 if (flags&MSG_PROBE) 1205 return 0; 1206 cork = &inet->cork.base; 1207 if (skb_queue_empty(&sk->sk_write_queue)) { 1208 /* 1209 * setup for corking 1210 */ 1211 if (opt) { 1212 if (WARN_ON(np->cork.opt)) 1213 return -EINVAL; 1214 1215 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); 1216 if (unlikely(np->cork.opt == NULL)) 1217 return -ENOBUFS; 1218 1219 np->cork.opt->tot_len = opt->tot_len; 1220 np->cork.opt->opt_flen = opt->opt_flen; 1221 np->cork.opt->opt_nflen = opt->opt_nflen; 1222 1223 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1224 sk->sk_allocation); 1225 if (opt->dst0opt && !np->cork.opt->dst0opt) 1226 return -ENOBUFS; 1227 1228 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1229 sk->sk_allocation); 1230 if (opt->dst1opt && !np->cork.opt->dst1opt) 1231 return -ENOBUFS; 1232 1233 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, 1234 sk->sk_allocation); 1235 if (opt->hopopt && !np->cork.opt->hopopt) 1236 return -ENOBUFS; 1237 1238 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1239 sk->sk_allocation); 1240 if (opt->srcrt && !np->cork.opt->srcrt) 1241 return -ENOBUFS; 1242 1243 /* need source address above miyazawa*/ 1244 } 1245 dst_hold(&rt->dst); 1246 cork->dst = &rt->dst; 1247 inet->cork.fl.u.ip6 = *fl6; 1248 np->cork.hop_limit = hlimit; 1249 np->cork.tclass = tclass; 1250 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1251 rt->dst.dev->mtu : dst_mtu(&rt->dst); 1252 if (np->frag_size < mtu) { 1253 if (np->frag_size) 1254 mtu = np->frag_size; 1255 } 1256 cork->fragsize = mtu; 1257 if (dst_allfrag(rt->dst.path)) 1258 cork->flags |= IPCORK_ALLFRAG; 1259 cork->length = 0; 1260 sk->sk_sndmsg_page = NULL; 1261 sk->sk_sndmsg_off = 0; 1262 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; 1263 length += exthdrlen; 1264 transhdrlen += exthdrlen; 1265 dst_exthdrlen = rt->dst.header_len; 1266 } else { 1267 rt = (struct rt6_info *)cork->dst; 1268 fl6 = &inet->cork.fl.u.ip6; 1269 opt = np->cork.opt; 1270 transhdrlen = 0; 1271 exthdrlen = 0; 1272 dst_exthdrlen = 0; 1273 mtu = cork->fragsize; 1274 } 1275 1276 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1277 1278 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1279 (opt ? opt->opt_nflen : 0); 1280 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 1281 1282 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1283 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { 1284 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); 1285 return -EMSGSIZE; 1286 } 1287 } 1288 1289 /* For UDP, check if TX timestamp is enabled */ 1290 if (sk->sk_type == SOCK_DGRAM) { 1291 err = sock_tx_timestamp(sk, &tx_flags); 1292 if (err) 1293 goto error; 1294 } 1295 1296 /* 1297 * Let's try using as much space as possible. 1298 * Use MTU if total length of the message fits into the MTU. 1299 * Otherwise, we need to reserve fragment header and 1300 * fragment alignment (= 8-15 octects, in total). 1301 * 1302 * Note that we may need to "move" the data from the tail of 1303 * of the buffer to the new fragment when we split 1304 * the message. 1305 * 1306 * FIXME: It may be fragmented into multiple chunks 1307 * at once if non-fragmentable extension headers 1308 * are too large. 1309 * --yoshfuji 1310 */ 1311 1312 cork->length += length; 1313 if (length > mtu) { 1314 int proto = sk->sk_protocol; 1315 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ 1316 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); 1317 return -EMSGSIZE; 1318 } 1319 1320 if (proto == IPPROTO_UDP && 1321 (rt->dst.dev->features & NETIF_F_UFO)) { 1322 1323 err = ip6_ufo_append_data(sk, getfrag, from, length, 1324 hh_len, fragheaderlen, 1325 transhdrlen, mtu, flags, rt); 1326 if (err) 1327 goto error; 1328 return 0; 1329 } 1330 } 1331 1332 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1333 goto alloc_new_skb; 1334 1335 while (length > 0) { 1336 /* Check if the remaining data fits into current packet. */ 1337 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1338 if (copy < length) 1339 copy = maxfraglen - skb->len; 1340 1341 if (copy <= 0) { 1342 char *data; 1343 unsigned int datalen; 1344 unsigned int fraglen; 1345 unsigned int fraggap; 1346 unsigned int alloclen; 1347 struct sk_buff *skb_prev; 1348 alloc_new_skb: 1349 skb_prev = skb; 1350 1351 /* There's no room in the current skb */ 1352 if (skb_prev) 1353 fraggap = skb_prev->len - maxfraglen; 1354 else 1355 fraggap = 0; 1356 1357 /* 1358 * If remaining data exceeds the mtu, 1359 * we know we need more fragment(s). 1360 */ 1361 datalen = length + fraggap; 1362 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1363 datalen = maxfraglen - fragheaderlen; 1364 1365 fraglen = datalen + fragheaderlen; 1366 if ((flags & MSG_MORE) && 1367 !(rt->dst.dev->features&NETIF_F_SG)) 1368 alloclen = mtu; 1369 else 1370 alloclen = datalen + fragheaderlen; 1371 1372 alloclen += dst_exthdrlen; 1373 1374 /* 1375 * The last fragment gets additional space at tail. 1376 * Note: we overallocate on fragments with MSG_MODE 1377 * because we have no idea if we're the last one. 1378 */ 1379 if (datalen == length + fraggap) 1380 alloclen += rt->dst.trailer_len; 1381 1382 /* 1383 * We just reserve space for fragment header. 1384 * Note: this may be overallocation if the message 1385 * (without MSG_MORE) fits into the MTU. 1386 */ 1387 alloclen += sizeof(struct frag_hdr); 1388 1389 if (transhdrlen) { 1390 skb = sock_alloc_send_skb(sk, 1391 alloclen + hh_len, 1392 (flags & MSG_DONTWAIT), &err); 1393 } else { 1394 skb = NULL; 1395 if (atomic_read(&sk->sk_wmem_alloc) <= 1396 2 * sk->sk_sndbuf) 1397 skb = sock_wmalloc(sk, 1398 alloclen + hh_len, 1, 1399 sk->sk_allocation); 1400 if (unlikely(skb == NULL)) 1401 err = -ENOBUFS; 1402 else { 1403 /* Only the initial fragment 1404 * is time stamped. 1405 */ 1406 tx_flags = 0; 1407 } 1408 } 1409 if (skb == NULL) 1410 goto error; 1411 /* 1412 * Fill in the control structures 1413 */ 1414 skb->ip_summed = csummode; 1415 skb->csum = 0; 1416 /* reserve for fragmentation and ipsec header */ 1417 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 1418 dst_exthdrlen); 1419 1420 if (sk->sk_type == SOCK_DGRAM) 1421 skb_shinfo(skb)->tx_flags = tx_flags; 1422 1423 /* 1424 * Find where to start putting bytes 1425 */ 1426 data = skb_put(skb, fraglen); 1427 skb_set_network_header(skb, exthdrlen); 1428 data += fragheaderlen; 1429 skb->transport_header = (skb->network_header + 1430 fragheaderlen); 1431 if (fraggap) { 1432 skb->csum = skb_copy_and_csum_bits( 1433 skb_prev, maxfraglen, 1434 data + transhdrlen, fraggap, 0); 1435 skb_prev->csum = csum_sub(skb_prev->csum, 1436 skb->csum); 1437 data += fraggap; 1438 pskb_trim_unique(skb_prev, maxfraglen); 1439 } 1440 copy = datalen - transhdrlen - fraggap; 1441 1442 if (copy < 0) { 1443 err = -EINVAL; 1444 kfree_skb(skb); 1445 goto error; 1446 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 1447 err = -EFAULT; 1448 kfree_skb(skb); 1449 goto error; 1450 } 1451 1452 offset += copy; 1453 length -= datalen - fraggap; 1454 transhdrlen = 0; 1455 exthdrlen = 0; 1456 dst_exthdrlen = 0; 1457 csummode = CHECKSUM_NONE; 1458 1459 /* 1460 * Put the packet on the pending queue 1461 */ 1462 __skb_queue_tail(&sk->sk_write_queue, skb); 1463 continue; 1464 } 1465 1466 if (copy > length) 1467 copy = length; 1468 1469 if (!(rt->dst.dev->features&NETIF_F_SG)) { 1470 unsigned int off; 1471 1472 off = skb->len; 1473 if (getfrag(from, skb_put(skb, copy), 1474 offset, copy, off, skb) < 0) { 1475 __skb_trim(skb, off); 1476 err = -EFAULT; 1477 goto error; 1478 } 1479 } else { 1480 int i = skb_shinfo(skb)->nr_frags; 1481 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 1482 struct page *page = sk->sk_sndmsg_page; 1483 int off = sk->sk_sndmsg_off; 1484 unsigned int left; 1485 1486 if (page && (left = PAGE_SIZE - off) > 0) { 1487 if (copy >= left) 1488 copy = left; 1489 if (page != skb_frag_page(frag)) { 1490 if (i == MAX_SKB_FRAGS) { 1491 err = -EMSGSIZE; 1492 goto error; 1493 } 1494 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); 1495 skb_frag_ref(skb, i); 1496 frag = &skb_shinfo(skb)->frags[i]; 1497 } 1498 } else if(i < MAX_SKB_FRAGS) { 1499 if (copy > PAGE_SIZE) 1500 copy = PAGE_SIZE; 1501 page = alloc_pages(sk->sk_allocation, 0); 1502 if (page == NULL) { 1503 err = -ENOMEM; 1504 goto error; 1505 } 1506 sk->sk_sndmsg_page = page; 1507 sk->sk_sndmsg_off = 0; 1508 1509 skb_fill_page_desc(skb, i, page, 0, 0); 1510 frag = &skb_shinfo(skb)->frags[i]; 1511 } else { 1512 err = -EMSGSIZE; 1513 goto error; 1514 } 1515 if (getfrag(from, 1516 skb_frag_address(frag) + skb_frag_size(frag), 1517 offset, copy, skb->len, skb) < 0) { 1518 err = -EFAULT; 1519 goto error; 1520 } 1521 sk->sk_sndmsg_off += copy; 1522 skb_frag_size_add(frag, copy); 1523 skb->len += copy; 1524 skb->data_len += copy; 1525 skb->truesize += copy; 1526 atomic_add(copy, &sk->sk_wmem_alloc); 1527 } 1528 offset += copy; 1529 length -= copy; 1530 } 1531 return 0; 1532 error: 1533 cork->length -= length; 1534 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1535 return err; 1536 } 1537 EXPORT_SYMBOL_GPL(ip6_append_data); 1538 1539 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) 1540 { 1541 if (np->cork.opt) { 1542 kfree(np->cork.opt->dst0opt); 1543 kfree(np->cork.opt->dst1opt); 1544 kfree(np->cork.opt->hopopt); 1545 kfree(np->cork.opt->srcrt); 1546 kfree(np->cork.opt); 1547 np->cork.opt = NULL; 1548 } 1549 1550 if (inet->cork.base.dst) { 1551 dst_release(inet->cork.base.dst); 1552 inet->cork.base.dst = NULL; 1553 inet->cork.base.flags &= ~IPCORK_ALLFRAG; 1554 } 1555 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); 1556 } 1557 1558 int ip6_push_pending_frames(struct sock *sk) 1559 { 1560 struct sk_buff *skb, *tmp_skb; 1561 struct sk_buff **tail_skb; 1562 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1563 struct inet_sock *inet = inet_sk(sk); 1564 struct ipv6_pinfo *np = inet6_sk(sk); 1565 struct net *net = sock_net(sk); 1566 struct ipv6hdr *hdr; 1567 struct ipv6_txoptions *opt = np->cork.opt; 1568 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst; 1569 struct flowi6 *fl6 = &inet->cork.fl.u.ip6; 1570 unsigned char proto = fl6->flowi6_proto; 1571 int err = 0; 1572 1573 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1574 goto out; 1575 tail_skb = &(skb_shinfo(skb)->frag_list); 1576 1577 /* move skb->data to ip header from ext header */ 1578 if (skb->data < skb_network_header(skb)) 1579 __skb_pull(skb, skb_network_offset(skb)); 1580 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1581 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1582 *tail_skb = tmp_skb; 1583 tail_skb = &(tmp_skb->next); 1584 skb->len += tmp_skb->len; 1585 skb->data_len += tmp_skb->len; 1586 skb->truesize += tmp_skb->truesize; 1587 tmp_skb->destructor = NULL; 1588 tmp_skb->sk = NULL; 1589 } 1590 1591 /* Allow local fragmentation. */ 1592 if (np->pmtudisc < IPV6_PMTUDISC_DO) 1593 skb->local_df = 1; 1594 1595 *final_dst = fl6->daddr; 1596 __skb_pull(skb, skb_network_header_len(skb)); 1597 if (opt && opt->opt_flen) 1598 ipv6_push_frag_opts(skb, opt, &proto); 1599 if (opt && opt->opt_nflen) 1600 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 1601 1602 skb_push(skb, sizeof(struct ipv6hdr)); 1603 skb_reset_network_header(skb); 1604 hdr = ipv6_hdr(skb); 1605 1606 *(__be32*)hdr = fl6->flowlabel | 1607 htonl(0x60000000 | ((int)np->cork.tclass << 20)); 1608 1609 hdr->hop_limit = np->cork.hop_limit; 1610 hdr->nexthdr = proto; 1611 hdr->saddr = fl6->saddr; 1612 hdr->daddr = *final_dst; 1613 1614 skb->priority = sk->sk_priority; 1615 skb->mark = sk->sk_mark; 1616 1617 skb_dst_set(skb, dst_clone(&rt->dst)); 1618 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1619 if (proto == IPPROTO_ICMPV6) { 1620 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1621 1622 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); 1623 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); 1624 } 1625 1626 err = ip6_local_out(skb); 1627 if (err) { 1628 if (err > 0) 1629 err = net_xmit_errno(err); 1630 if (err) 1631 goto error; 1632 } 1633 1634 out: 1635 ip6_cork_release(inet, np); 1636 return err; 1637 error: 1638 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1639 goto out; 1640 } 1641 EXPORT_SYMBOL_GPL(ip6_push_pending_frames); 1642 1643 void ip6_flush_pending_frames(struct sock *sk) 1644 { 1645 struct sk_buff *skb; 1646 1647 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { 1648 if (skb_dst(skb)) 1649 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1650 IPSTATS_MIB_OUTDISCARDS); 1651 kfree_skb(skb); 1652 } 1653 1654 ip6_cork_release(inet_sk(sk), inet6_sk(sk)); 1655 } 1656 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); 1657