1 /* 2 * IPv6 output functions 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * Based on linux/net/ipv4/ip_output.c 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 * 15 * Changes: 16 * A.N.Kuznetsov : airthmetics in fragmentation. 17 * extension headers are implemented. 18 * route changes now work. 19 * ip6_forward does not confuse sniffers. 20 * etc. 21 * 22 * H. von Brand : Added missing #include <linux/string.h> 23 * Imran Patel : frag id should be in NBO 24 * Kazunori MIYAZAWA @USAGI 25 * : add ip6_append_data and related functions 26 * for datagram xmit 27 */ 28 29 #include <linux/errno.h> 30 #include <linux/kernel.h> 31 #include <linux/string.h> 32 #include <linux/socket.h> 33 #include <linux/net.h> 34 #include <linux/netdevice.h> 35 #include <linux/if_arp.h> 36 #include <linux/in6.h> 37 #include <linux/tcp.h> 38 #include <linux/route.h> 39 #include <linux/module.h> 40 #include <linux/slab.h> 41 42 #include <linux/netfilter.h> 43 #include <linux/netfilter_ipv6.h> 44 45 #include <net/sock.h> 46 #include <net/snmp.h> 47 48 #include <net/ipv6.h> 49 #include <net/ndisc.h> 50 #include <net/protocol.h> 51 #include <net/ip6_route.h> 52 #include <net/addrconf.h> 53 #include <net/rawv6.h> 54 #include <net/icmp.h> 55 #include <net/xfrm.h> 56 #include <net/checksum.h> 57 #include <linux/mroute6.h> 58 59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); 60 61 int __ip6_local_out(struct sk_buff *skb) 62 { 63 int len; 64 65 len = skb->len - sizeof(struct ipv6hdr); 66 if (len > IPV6_MAXPLEN) 67 len = 0; 68 ipv6_hdr(skb)->payload_len = htons(len); 69 70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 71 skb_dst(skb)->dev, dst_output); 72 } 73 74 int ip6_local_out(struct sk_buff *skb) 75 { 76 int err; 77 78 err = __ip6_local_out(skb); 79 if (likely(err == 1)) 80 err = dst_output(skb); 81 82 return err; 83 } 84 EXPORT_SYMBOL_GPL(ip6_local_out); 85 86 static int ip6_finish_output2(struct sk_buff *skb) 87 { 88 struct dst_entry *dst = skb_dst(skb); 89 struct net_device *dev = dst->dev; 90 struct neighbour *neigh; 91 struct rt6_info *rt; 92 93 skb->protocol = htons(ETH_P_IPV6); 94 skb->dev = dev; 95 96 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 97 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 98 99 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && 100 ((mroute6_socket(dev_net(dev), skb) && 101 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 102 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 103 &ipv6_hdr(skb)->saddr))) { 104 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 105 106 /* Do not check for IFF_ALLMULTI; multicast routing 107 is not supported in any case. 108 */ 109 if (newskb) 110 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 111 newskb, NULL, newskb->dev, 112 dev_loopback_xmit); 113 114 if (ipv6_hdr(skb)->hop_limit == 0) { 115 IP6_INC_STATS(dev_net(dev), idev, 116 IPSTATS_MIB_OUTDISCARDS); 117 kfree_skb(skb); 118 return 0; 119 } 120 } 121 122 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, 123 skb->len); 124 } 125 126 rt = (struct rt6_info *) dst; 127 neigh = rt->n; 128 if (neigh) 129 return dst_neigh_output(dst, neigh, skb); 130 131 IP6_INC_STATS_BH(dev_net(dst->dev), 132 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 133 kfree_skb(skb); 134 return -EINVAL; 135 } 136 137 static int ip6_finish_output(struct sk_buff *skb) 138 { 139 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 140 dst_allfrag(skb_dst(skb))) 141 return ip6_fragment(skb, ip6_finish_output2); 142 else 143 return ip6_finish_output2(skb); 144 } 145 146 int ip6_output(struct sk_buff *skb) 147 { 148 struct net_device *dev = skb_dst(skb)->dev; 149 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 150 if (unlikely(idev->cnf.disable_ipv6)) { 151 IP6_INC_STATS(dev_net(dev), idev, 152 IPSTATS_MIB_OUTDISCARDS); 153 kfree_skb(skb); 154 return 0; 155 } 156 157 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, 158 ip6_finish_output, 159 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 160 } 161 162 /* 163 * xmit an sk_buff (used by TCP, SCTP and DCCP) 164 */ 165 166 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 167 struct ipv6_txoptions *opt, int tclass) 168 { 169 struct net *net = sock_net(sk); 170 struct ipv6_pinfo *np = inet6_sk(sk); 171 struct in6_addr *first_hop = &fl6->daddr; 172 struct dst_entry *dst = skb_dst(skb); 173 struct ipv6hdr *hdr; 174 u8 proto = fl6->flowi6_proto; 175 int seg_len = skb->len; 176 int hlimit = -1; 177 u32 mtu; 178 179 if (opt) { 180 unsigned int head_room; 181 182 /* First: exthdrs may take lots of space (~8K for now) 183 MAX_HEADER is not enough. 184 */ 185 head_room = opt->opt_nflen + opt->opt_flen; 186 seg_len += head_room; 187 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 188 189 if (skb_headroom(skb) < head_room) { 190 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 191 if (skb2 == NULL) { 192 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 193 IPSTATS_MIB_OUTDISCARDS); 194 kfree_skb(skb); 195 return -ENOBUFS; 196 } 197 consume_skb(skb); 198 skb = skb2; 199 skb_set_owner_w(skb, sk); 200 } 201 if (opt->opt_flen) 202 ipv6_push_frag_opts(skb, opt, &proto); 203 if (opt->opt_nflen) 204 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 205 } 206 207 skb_push(skb, sizeof(struct ipv6hdr)); 208 skb_reset_network_header(skb); 209 hdr = ipv6_hdr(skb); 210 211 /* 212 * Fill in the IPv6 header 213 */ 214 if (np) 215 hlimit = np->hop_limit; 216 if (hlimit < 0) 217 hlimit = ip6_dst_hoplimit(dst); 218 219 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel; 220 221 hdr->payload_len = htons(seg_len); 222 hdr->nexthdr = proto; 223 hdr->hop_limit = hlimit; 224 225 hdr->saddr = fl6->saddr; 226 hdr->daddr = *first_hop; 227 228 skb->priority = sk->sk_priority; 229 skb->mark = sk->sk_mark; 230 231 mtu = dst_mtu(dst); 232 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { 233 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 234 IPSTATS_MIB_OUT, skb->len); 235 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 236 dst->dev, dst_output); 237 } 238 239 net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n"); 240 skb->dev = dst->dev; 241 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 242 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 243 kfree_skb(skb); 244 return -EMSGSIZE; 245 } 246 247 EXPORT_SYMBOL(ip6_xmit); 248 249 /* 250 * To avoid extra problems ND packets are send through this 251 * routine. It's code duplication but I really want to avoid 252 * extra checks since ipv6_build_header is used by TCP (which 253 * is for us performance critical) 254 */ 255 256 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, 257 const struct in6_addr *saddr, const struct in6_addr *daddr, 258 int proto, int len) 259 { 260 struct ipv6_pinfo *np = inet6_sk(sk); 261 struct ipv6hdr *hdr; 262 263 skb->protocol = htons(ETH_P_IPV6); 264 skb->dev = dev; 265 266 skb_reset_network_header(skb); 267 skb_put(skb, sizeof(struct ipv6hdr)); 268 hdr = ipv6_hdr(skb); 269 270 *(__be32*)hdr = htonl(0x60000000); 271 272 hdr->payload_len = htons(len); 273 hdr->nexthdr = proto; 274 hdr->hop_limit = np->hop_limit; 275 276 hdr->saddr = *saddr; 277 hdr->daddr = *daddr; 278 279 return 0; 280 } 281 282 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 283 { 284 struct ip6_ra_chain *ra; 285 struct sock *last = NULL; 286 287 read_lock(&ip6_ra_lock); 288 for (ra = ip6_ra_chain; ra; ra = ra->next) { 289 struct sock *sk = ra->sk; 290 if (sk && ra->sel == sel && 291 (!sk->sk_bound_dev_if || 292 sk->sk_bound_dev_if == skb->dev->ifindex)) { 293 if (last) { 294 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 295 if (skb2) 296 rawv6_rcv(last, skb2); 297 } 298 last = sk; 299 } 300 } 301 302 if (last) { 303 rawv6_rcv(last, skb); 304 read_unlock(&ip6_ra_lock); 305 return 1; 306 } 307 read_unlock(&ip6_ra_lock); 308 return 0; 309 } 310 311 static int ip6_forward_proxy_check(struct sk_buff *skb) 312 { 313 struct ipv6hdr *hdr = ipv6_hdr(skb); 314 u8 nexthdr = hdr->nexthdr; 315 __be16 frag_off; 316 int offset; 317 318 if (ipv6_ext_hdr(nexthdr)) { 319 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 320 if (offset < 0) 321 return 0; 322 } else 323 offset = sizeof(struct ipv6hdr); 324 325 if (nexthdr == IPPROTO_ICMPV6) { 326 struct icmp6hdr *icmp6; 327 328 if (!pskb_may_pull(skb, (skb_network_header(skb) + 329 offset + 1 - skb->data))) 330 return 0; 331 332 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 333 334 switch (icmp6->icmp6_type) { 335 case NDISC_ROUTER_SOLICITATION: 336 case NDISC_ROUTER_ADVERTISEMENT: 337 case NDISC_NEIGHBOUR_SOLICITATION: 338 case NDISC_NEIGHBOUR_ADVERTISEMENT: 339 case NDISC_REDIRECT: 340 /* For reaction involving unicast neighbor discovery 341 * message destined to the proxied address, pass it to 342 * input function. 343 */ 344 return 1; 345 default: 346 break; 347 } 348 } 349 350 /* 351 * The proxying router can't forward traffic sent to a link-local 352 * address, so signal the sender and discard the packet. This 353 * behavior is clarified by the MIPv6 specification. 354 */ 355 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 356 dst_link_failure(skb); 357 return -1; 358 } 359 360 return 0; 361 } 362 363 static inline int ip6_forward_finish(struct sk_buff *skb) 364 { 365 return dst_output(skb); 366 } 367 368 int ip6_forward(struct sk_buff *skb) 369 { 370 struct dst_entry *dst = skb_dst(skb); 371 struct ipv6hdr *hdr = ipv6_hdr(skb); 372 struct inet6_skb_parm *opt = IP6CB(skb); 373 struct net *net = dev_net(dst->dev); 374 u32 mtu; 375 376 if (net->ipv6.devconf_all->forwarding == 0) 377 goto error; 378 379 if (skb_warn_if_lro(skb)) 380 goto drop; 381 382 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 383 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 384 goto drop; 385 } 386 387 if (skb->pkt_type != PACKET_HOST) 388 goto drop; 389 390 skb_forward_csum(skb); 391 392 /* 393 * We DO NOT make any processing on 394 * RA packets, pushing them to user level AS IS 395 * without ane WARRANTY that application will be able 396 * to interpret them. The reason is that we 397 * cannot make anything clever here. 398 * 399 * We are not end-node, so that if packet contains 400 * AH/ESP, we cannot make anything. 401 * Defragmentation also would be mistake, RA packets 402 * cannot be fragmented, because there is no warranty 403 * that different fragments will go along one path. --ANK 404 */ 405 if (opt->ra) { 406 u8 *ptr = skb_network_header(skb) + opt->ra; 407 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) 408 return 0; 409 } 410 411 /* 412 * check and decrement ttl 413 */ 414 if (hdr->hop_limit <= 1) { 415 /* Force OUTPUT device used as source address */ 416 skb->dev = dst->dev; 417 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 418 IP6_INC_STATS_BH(net, 419 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); 420 421 kfree_skb(skb); 422 return -ETIMEDOUT; 423 } 424 425 /* XXX: idev->cnf.proxy_ndp? */ 426 if (net->ipv6.devconf_all->proxy_ndp && 427 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 428 int proxied = ip6_forward_proxy_check(skb); 429 if (proxied > 0) 430 return ip6_input(skb); 431 else if (proxied < 0) { 432 IP6_INC_STATS(net, ip6_dst_idev(dst), 433 IPSTATS_MIB_INDISCARDS); 434 goto drop; 435 } 436 } 437 438 if (!xfrm6_route_forward(skb)) { 439 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 440 goto drop; 441 } 442 dst = skb_dst(skb); 443 444 /* IPv6 specs say nothing about it, but it is clear that we cannot 445 send redirects to source routed frames. 446 We don't send redirects to frames decapsulated from IPsec. 447 */ 448 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) { 449 struct in6_addr *target = NULL; 450 struct inet_peer *peer; 451 struct rt6_info *rt; 452 453 /* 454 * incoming and outgoing devices are the same 455 * send a redirect. 456 */ 457 458 rt = (struct rt6_info *) dst; 459 if (rt->rt6i_flags & RTF_GATEWAY) 460 target = &rt->rt6i_gateway; 461 else 462 target = &hdr->daddr; 463 464 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); 465 466 /* Limit redirects both by destination (here) 467 and by source (inside ndisc_send_redirect) 468 */ 469 if (inet_peer_xrlim_allow(peer, 1*HZ)) 470 ndisc_send_redirect(skb, target); 471 if (peer) 472 inet_putpeer(peer); 473 } else { 474 int addrtype = ipv6_addr_type(&hdr->saddr); 475 476 /* This check is security critical. */ 477 if (addrtype == IPV6_ADDR_ANY || 478 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 479 goto error; 480 if (addrtype & IPV6_ADDR_LINKLOCAL) { 481 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 482 ICMPV6_NOT_NEIGHBOUR, 0); 483 goto error; 484 } 485 } 486 487 mtu = dst_mtu(dst); 488 if (mtu < IPV6_MIN_MTU) 489 mtu = IPV6_MIN_MTU; 490 491 if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) || 492 (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { 493 /* Again, force OUTPUT device used as source address */ 494 skb->dev = dst->dev; 495 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 496 IP6_INC_STATS_BH(net, 497 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); 498 IP6_INC_STATS_BH(net, 499 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); 500 kfree_skb(skb); 501 return -EMSGSIZE; 502 } 503 504 if (skb_cow(skb, dst->dev->hard_header_len)) { 505 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); 506 goto drop; 507 } 508 509 hdr = ipv6_hdr(skb); 510 511 /* Mangling hops number delayed to point after skb COW */ 512 513 hdr->hop_limit--; 514 515 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 516 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); 517 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, 518 ip6_forward_finish); 519 520 error: 521 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 522 drop: 523 kfree_skb(skb); 524 return -EINVAL; 525 } 526 527 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 528 { 529 to->pkt_type = from->pkt_type; 530 to->priority = from->priority; 531 to->protocol = from->protocol; 532 skb_dst_drop(to); 533 skb_dst_set(to, dst_clone(skb_dst(from))); 534 to->dev = from->dev; 535 to->mark = from->mark; 536 537 #ifdef CONFIG_NET_SCHED 538 to->tc_index = from->tc_index; 539 #endif 540 nf_copy(to, from); 541 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 542 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 543 to->nf_trace = from->nf_trace; 544 #endif 545 skb_copy_secmark(to, from); 546 } 547 548 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) 549 { 550 u16 offset = sizeof(struct ipv6hdr); 551 struct ipv6_opt_hdr *exthdr = 552 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); 553 unsigned int packet_len = skb->tail - skb->network_header; 554 int found_rhdr = 0; 555 *nexthdr = &ipv6_hdr(skb)->nexthdr; 556 557 while (offset + 1 <= packet_len) { 558 559 switch (**nexthdr) { 560 561 case NEXTHDR_HOP: 562 break; 563 case NEXTHDR_ROUTING: 564 found_rhdr = 1; 565 break; 566 case NEXTHDR_DEST: 567 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 568 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) 569 break; 570 #endif 571 if (found_rhdr) 572 return offset; 573 break; 574 default : 575 return offset; 576 } 577 578 offset += ipv6_optlen(exthdr); 579 *nexthdr = &exthdr->nexthdr; 580 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + 581 offset); 582 } 583 584 return offset; 585 } 586 587 void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) 588 { 589 static atomic_t ipv6_fragmentation_id; 590 int old, new; 591 592 if (rt && !(rt->dst.flags & DST_NOPEER)) { 593 struct inet_peer *peer; 594 struct net *net; 595 596 net = dev_net(rt->dst.dev); 597 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); 598 if (peer) { 599 fhdr->identification = htonl(inet_getid(peer, 0)); 600 inet_putpeer(peer); 601 return; 602 } 603 } 604 do { 605 old = atomic_read(&ipv6_fragmentation_id); 606 new = old + 1; 607 if (!new) 608 new = 1; 609 } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); 610 fhdr->identification = htonl(new); 611 } 612 613 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 614 { 615 struct sk_buff *frag; 616 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); 617 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 618 struct ipv6hdr *tmp_hdr; 619 struct frag_hdr *fh; 620 unsigned int mtu, hlen, left, len; 621 int hroom, troom; 622 __be32 frag_id = 0; 623 int ptr, offset = 0, err=0; 624 u8 *prevhdr, nexthdr = 0; 625 struct net *net = dev_net(skb_dst(skb)->dev); 626 627 hlen = ip6_find_1stfragopt(skb, &prevhdr); 628 nexthdr = *prevhdr; 629 630 mtu = ip6_skb_dst_mtu(skb); 631 632 /* We must not fragment if the socket is set to force MTU discovery 633 * or if the skb it not generated by a local socket. 634 */ 635 if (unlikely(!skb->local_df && skb->len > mtu) || 636 (IP6CB(skb)->frag_max_size && 637 IP6CB(skb)->frag_max_size > mtu)) { 638 if (skb->sk && dst_allfrag(skb_dst(skb))) 639 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); 640 641 skb->dev = skb_dst(skb)->dev; 642 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 643 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 644 IPSTATS_MIB_FRAGFAILS); 645 kfree_skb(skb); 646 return -EMSGSIZE; 647 } 648 649 if (np && np->frag_size < mtu) { 650 if (np->frag_size) 651 mtu = np->frag_size; 652 } 653 mtu -= hlen + sizeof(struct frag_hdr); 654 655 if (skb_has_frag_list(skb)) { 656 int first_len = skb_pagelen(skb); 657 struct sk_buff *frag2; 658 659 if (first_len - hlen > mtu || 660 ((first_len - hlen) & 7) || 661 skb_cloned(skb)) 662 goto slow_path; 663 664 skb_walk_frags(skb, frag) { 665 /* Correct geometry. */ 666 if (frag->len > mtu || 667 ((frag->len & 7) && frag->next) || 668 skb_headroom(frag) < hlen) 669 goto slow_path_clean; 670 671 /* Partially cloned skb? */ 672 if (skb_shared(frag)) 673 goto slow_path_clean; 674 675 BUG_ON(frag->sk); 676 if (skb->sk) { 677 frag->sk = skb->sk; 678 frag->destructor = sock_wfree; 679 } 680 skb->truesize -= frag->truesize; 681 } 682 683 err = 0; 684 offset = 0; 685 frag = skb_shinfo(skb)->frag_list; 686 skb_frag_list_init(skb); 687 /* BUILD HEADER */ 688 689 *prevhdr = NEXTHDR_FRAGMENT; 690 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 691 if (!tmp_hdr) { 692 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 693 IPSTATS_MIB_FRAGFAILS); 694 return -ENOMEM; 695 } 696 697 __skb_pull(skb, hlen); 698 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); 699 __skb_push(skb, hlen); 700 skb_reset_network_header(skb); 701 memcpy(skb_network_header(skb), tmp_hdr, hlen); 702 703 ipv6_select_ident(fh, rt); 704 fh->nexthdr = nexthdr; 705 fh->reserved = 0; 706 fh->frag_off = htons(IP6_MF); 707 frag_id = fh->identification; 708 709 first_len = skb_pagelen(skb); 710 skb->data_len = first_len - skb_headlen(skb); 711 skb->len = first_len; 712 ipv6_hdr(skb)->payload_len = htons(first_len - 713 sizeof(struct ipv6hdr)); 714 715 dst_hold(&rt->dst); 716 717 for (;;) { 718 /* Prepare header of the next frame, 719 * before previous one went down. */ 720 if (frag) { 721 frag->ip_summed = CHECKSUM_NONE; 722 skb_reset_transport_header(frag); 723 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); 724 __skb_push(frag, hlen); 725 skb_reset_network_header(frag); 726 memcpy(skb_network_header(frag), tmp_hdr, 727 hlen); 728 offset += skb->len - hlen - sizeof(struct frag_hdr); 729 fh->nexthdr = nexthdr; 730 fh->reserved = 0; 731 fh->frag_off = htons(offset); 732 if (frag->next != NULL) 733 fh->frag_off |= htons(IP6_MF); 734 fh->identification = frag_id; 735 ipv6_hdr(frag)->payload_len = 736 htons(frag->len - 737 sizeof(struct ipv6hdr)); 738 ip6_copy_metadata(frag, skb); 739 } 740 741 err = output(skb); 742 if(!err) 743 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 744 IPSTATS_MIB_FRAGCREATES); 745 746 if (err || !frag) 747 break; 748 749 skb = frag; 750 frag = skb->next; 751 skb->next = NULL; 752 } 753 754 kfree(tmp_hdr); 755 756 if (err == 0) { 757 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 758 IPSTATS_MIB_FRAGOKS); 759 dst_release(&rt->dst); 760 return 0; 761 } 762 763 while (frag) { 764 skb = frag->next; 765 kfree_skb(frag); 766 frag = skb; 767 } 768 769 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 770 IPSTATS_MIB_FRAGFAILS); 771 dst_release(&rt->dst); 772 return err; 773 774 slow_path_clean: 775 skb_walk_frags(skb, frag2) { 776 if (frag2 == frag) 777 break; 778 frag2->sk = NULL; 779 frag2->destructor = NULL; 780 skb->truesize += frag2->truesize; 781 } 782 } 783 784 slow_path: 785 if ((skb->ip_summed == CHECKSUM_PARTIAL) && 786 skb_checksum_help(skb)) 787 goto fail; 788 789 left = skb->len - hlen; /* Space per frame */ 790 ptr = hlen; /* Where to start from */ 791 792 /* 793 * Fragment the datagram. 794 */ 795 796 *prevhdr = NEXTHDR_FRAGMENT; 797 hroom = LL_RESERVED_SPACE(rt->dst.dev); 798 troom = rt->dst.dev->needed_tailroom; 799 800 /* 801 * Keep copying data until we run out. 802 */ 803 while(left > 0) { 804 len = left; 805 /* IF: it doesn't fit, use 'mtu' - the data space left */ 806 if (len > mtu) 807 len = mtu; 808 /* IF: we are not sending up to and including the packet end 809 then align the next start on an eight byte boundary */ 810 if (len < left) { 811 len &= ~7; 812 } 813 /* 814 * Allocate buffer. 815 */ 816 817 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + 818 hroom + troom, GFP_ATOMIC)) == NULL) { 819 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); 820 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 821 IPSTATS_MIB_FRAGFAILS); 822 err = -ENOMEM; 823 goto fail; 824 } 825 826 /* 827 * Set up data on packet 828 */ 829 830 ip6_copy_metadata(frag, skb); 831 skb_reserve(frag, hroom); 832 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 833 skb_reset_network_header(frag); 834 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 835 frag->transport_header = (frag->network_header + hlen + 836 sizeof(struct frag_hdr)); 837 838 /* 839 * Charge the memory for the fragment to any owner 840 * it might possess 841 */ 842 if (skb->sk) 843 skb_set_owner_w(frag, skb->sk); 844 845 /* 846 * Copy the packet header into the new buffer. 847 */ 848 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); 849 850 /* 851 * Build fragment header. 852 */ 853 fh->nexthdr = nexthdr; 854 fh->reserved = 0; 855 if (!frag_id) { 856 ipv6_select_ident(fh, rt); 857 frag_id = fh->identification; 858 } else 859 fh->identification = frag_id; 860 861 /* 862 * Copy a block of the IP datagram. 863 */ 864 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) 865 BUG(); 866 left -= len; 867 868 fh->frag_off = htons(offset); 869 if (left > 0) 870 fh->frag_off |= htons(IP6_MF); 871 ipv6_hdr(frag)->payload_len = htons(frag->len - 872 sizeof(struct ipv6hdr)); 873 874 ptr += len; 875 offset += len; 876 877 /* 878 * Put this fragment into the sending queue. 879 */ 880 err = output(frag); 881 if (err) 882 goto fail; 883 884 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 885 IPSTATS_MIB_FRAGCREATES); 886 } 887 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 888 IPSTATS_MIB_FRAGOKS); 889 consume_skb(skb); 890 return err; 891 892 fail: 893 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 894 IPSTATS_MIB_FRAGFAILS); 895 kfree_skb(skb); 896 return err; 897 } 898 899 static inline int ip6_rt_check(const struct rt6key *rt_key, 900 const struct in6_addr *fl_addr, 901 const struct in6_addr *addr_cache) 902 { 903 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 904 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); 905 } 906 907 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 908 struct dst_entry *dst, 909 const struct flowi6 *fl6) 910 { 911 struct ipv6_pinfo *np = inet6_sk(sk); 912 struct rt6_info *rt = (struct rt6_info *)dst; 913 914 if (!dst) 915 goto out; 916 917 /* Yes, checking route validity in not connected 918 * case is not very simple. Take into account, 919 * that we do not support routing by source, TOS, 920 * and MSG_DONTROUTE --ANK (980726) 921 * 922 * 1. ip6_rt_check(): If route was host route, 923 * check that cached destination is current. 924 * If it is network route, we still may 925 * check its validity using saved pointer 926 * to the last used address: daddr_cache. 927 * We do not want to save whole address now, 928 * (because main consumer of this service 929 * is tcp, which has not this problem), 930 * so that the last trick works only on connected 931 * sockets. 932 * 2. oif also should be the same. 933 */ 934 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 935 #ifdef CONFIG_IPV6_SUBTREES 936 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 937 #endif 938 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { 939 dst_release(dst); 940 dst = NULL; 941 } 942 943 out: 944 return dst; 945 } 946 947 static int ip6_dst_lookup_tail(struct sock *sk, 948 struct dst_entry **dst, struct flowi6 *fl6) 949 { 950 struct net *net = sock_net(sk); 951 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 952 struct neighbour *n; 953 struct rt6_info *rt; 954 #endif 955 int err; 956 957 if (*dst == NULL) 958 *dst = ip6_route_output(net, sk, fl6); 959 960 if ((err = (*dst)->error)) 961 goto out_err_release; 962 963 if (ipv6_addr_any(&fl6->saddr)) { 964 struct rt6_info *rt = (struct rt6_info *) *dst; 965 err = ip6_route_get_saddr(net, rt, &fl6->daddr, 966 sk ? inet6_sk(sk)->srcprefs : 0, 967 &fl6->saddr); 968 if (err) 969 goto out_err_release; 970 } 971 972 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 973 /* 974 * Here if the dst entry we've looked up 975 * has a neighbour entry that is in the INCOMPLETE 976 * state and the src address from the flow is 977 * marked as OPTIMISTIC, we release the found 978 * dst entry and replace it instead with the 979 * dst entry of the nexthop router 980 */ 981 rt = (struct rt6_info *) *dst; 982 n = rt->n; 983 if (n && !(n->nud_state & NUD_VALID)) { 984 struct inet6_ifaddr *ifp; 985 struct flowi6 fl_gw6; 986 int redirect; 987 988 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 989 (*dst)->dev, 1); 990 991 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 992 if (ifp) 993 in6_ifa_put(ifp); 994 995 if (redirect) { 996 /* 997 * We need to get the dst entry for the 998 * default router instead 999 */ 1000 dst_release(*dst); 1001 memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 1002 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 1003 *dst = ip6_route_output(net, sk, &fl_gw6); 1004 if ((err = (*dst)->error)) 1005 goto out_err_release; 1006 } 1007 } 1008 #endif 1009 1010 return 0; 1011 1012 out_err_release: 1013 if (err == -ENETUNREACH) 1014 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); 1015 dst_release(*dst); 1016 *dst = NULL; 1017 return err; 1018 } 1019 1020 /** 1021 * ip6_dst_lookup - perform route lookup on flow 1022 * @sk: socket which provides route info 1023 * @dst: pointer to dst_entry * for result 1024 * @fl6: flow to lookup 1025 * 1026 * This function performs a route lookup on the given flow. 1027 * 1028 * It returns zero on success, or a standard errno code on error. 1029 */ 1030 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) 1031 { 1032 *dst = NULL; 1033 return ip6_dst_lookup_tail(sk, dst, fl6); 1034 } 1035 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1036 1037 /** 1038 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 1039 * @sk: socket which provides route info 1040 * @fl6: flow to lookup 1041 * @final_dst: final destination address for ipsec lookup 1042 * @can_sleep: we are in a sleepable context 1043 * 1044 * This function performs a route lookup on the given flow. 1045 * 1046 * It returns a valid dst pointer on success, or a pointer encoded 1047 * error code. 1048 */ 1049 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1050 const struct in6_addr *final_dst, 1051 bool can_sleep) 1052 { 1053 struct dst_entry *dst = NULL; 1054 int err; 1055 1056 err = ip6_dst_lookup_tail(sk, &dst, fl6); 1057 if (err) 1058 return ERR_PTR(err); 1059 if (final_dst) 1060 fl6->daddr = *final_dst; 1061 if (can_sleep) 1062 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; 1063 1064 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); 1065 } 1066 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 1067 1068 /** 1069 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 1070 * @sk: socket which provides the dst cache and route info 1071 * @fl6: flow to lookup 1072 * @final_dst: final destination address for ipsec lookup 1073 * @can_sleep: we are in a sleepable context 1074 * 1075 * This function performs a route lookup on the given flow with the 1076 * possibility of using the cached route in the socket if it is valid. 1077 * It will take the socket dst lock when operating on the dst cache. 1078 * As a result, this function can only be used in process context. 1079 * 1080 * It returns a valid dst pointer on success, or a pointer encoded 1081 * error code. 1082 */ 1083 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1084 const struct in6_addr *final_dst, 1085 bool can_sleep) 1086 { 1087 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1088 int err; 1089 1090 dst = ip6_sk_dst_check(sk, dst, fl6); 1091 1092 err = ip6_dst_lookup_tail(sk, &dst, fl6); 1093 if (err) 1094 return ERR_PTR(err); 1095 if (final_dst) 1096 fl6->daddr = *final_dst; 1097 if (can_sleep) 1098 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; 1099 1100 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); 1101 } 1102 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1103 1104 static inline int ip6_ufo_append_data(struct sock *sk, 1105 int getfrag(void *from, char *to, int offset, int len, 1106 int odd, struct sk_buff *skb), 1107 void *from, int length, int hh_len, int fragheaderlen, 1108 int transhdrlen, int mtu,unsigned int flags, 1109 struct rt6_info *rt) 1110 1111 { 1112 struct sk_buff *skb; 1113 int err; 1114 1115 /* There is support for UDP large send offload by network 1116 * device, so create one single skb packet containing complete 1117 * udp datagram 1118 */ 1119 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 1120 skb = sock_alloc_send_skb(sk, 1121 hh_len + fragheaderlen + transhdrlen + 20, 1122 (flags & MSG_DONTWAIT), &err); 1123 if (skb == NULL) 1124 return err; 1125 1126 /* reserve space for Hardware header */ 1127 skb_reserve(skb, hh_len); 1128 1129 /* create space for UDP/IP header */ 1130 skb_put(skb,fragheaderlen + transhdrlen); 1131 1132 /* initialize network header pointer */ 1133 skb_reset_network_header(skb); 1134 1135 /* initialize protocol header pointer */ 1136 skb->transport_header = skb->network_header + fragheaderlen; 1137 1138 skb->ip_summed = CHECKSUM_PARTIAL; 1139 skb->csum = 0; 1140 } 1141 1142 err = skb_append_datato_frags(sk,skb, getfrag, from, 1143 (length - transhdrlen)); 1144 if (!err) { 1145 struct frag_hdr fhdr; 1146 1147 /* Specify the length of each IPv6 datagram fragment. 1148 * It has to be a multiple of 8. 1149 */ 1150 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - 1151 sizeof(struct frag_hdr)) & ~7; 1152 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1153 ipv6_select_ident(&fhdr, rt); 1154 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1155 __skb_queue_tail(&sk->sk_write_queue, skb); 1156 1157 return 0; 1158 } 1159 /* There is not enough support do UPD LSO, 1160 * so follow normal path 1161 */ 1162 kfree_skb(skb); 1163 1164 return err; 1165 } 1166 1167 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1168 gfp_t gfp) 1169 { 1170 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1171 } 1172 1173 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1174 gfp_t gfp) 1175 { 1176 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1177 } 1178 1179 static void ip6_append_data_mtu(int *mtu, 1180 int *maxfraglen, 1181 unsigned int fragheaderlen, 1182 struct sk_buff *skb, 1183 struct rt6_info *rt) 1184 { 1185 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { 1186 if (skb == NULL) { 1187 /* first fragment, reserve header_len */ 1188 *mtu = *mtu - rt->dst.header_len; 1189 1190 } else { 1191 /* 1192 * this fragment is not first, the headers 1193 * space is regarded as data space. 1194 */ 1195 *mtu = dst_mtu(rt->dst.path); 1196 } 1197 *maxfraglen = ((*mtu - fragheaderlen) & ~7) 1198 + fragheaderlen - sizeof(struct frag_hdr); 1199 } 1200 } 1201 1202 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 1203 int offset, int len, int odd, struct sk_buff *skb), 1204 void *from, int length, int transhdrlen, 1205 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, 1206 struct rt6_info *rt, unsigned int flags, int dontfrag) 1207 { 1208 struct inet_sock *inet = inet_sk(sk); 1209 struct ipv6_pinfo *np = inet6_sk(sk); 1210 struct inet_cork *cork; 1211 struct sk_buff *skb, *skb_prev = NULL; 1212 unsigned int maxfraglen, fragheaderlen; 1213 int exthdrlen; 1214 int dst_exthdrlen; 1215 int hh_len; 1216 int mtu; 1217 int copy; 1218 int err; 1219 int offset = 0; 1220 __u8 tx_flags = 0; 1221 1222 if (flags&MSG_PROBE) 1223 return 0; 1224 cork = &inet->cork.base; 1225 if (skb_queue_empty(&sk->sk_write_queue)) { 1226 /* 1227 * setup for corking 1228 */ 1229 if (opt) { 1230 if (WARN_ON(np->cork.opt)) 1231 return -EINVAL; 1232 1233 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); 1234 if (unlikely(np->cork.opt == NULL)) 1235 return -ENOBUFS; 1236 1237 np->cork.opt->tot_len = opt->tot_len; 1238 np->cork.opt->opt_flen = opt->opt_flen; 1239 np->cork.opt->opt_nflen = opt->opt_nflen; 1240 1241 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1242 sk->sk_allocation); 1243 if (opt->dst0opt && !np->cork.opt->dst0opt) 1244 return -ENOBUFS; 1245 1246 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1247 sk->sk_allocation); 1248 if (opt->dst1opt && !np->cork.opt->dst1opt) 1249 return -ENOBUFS; 1250 1251 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, 1252 sk->sk_allocation); 1253 if (opt->hopopt && !np->cork.opt->hopopt) 1254 return -ENOBUFS; 1255 1256 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1257 sk->sk_allocation); 1258 if (opt->srcrt && !np->cork.opt->srcrt) 1259 return -ENOBUFS; 1260 1261 /* need source address above miyazawa*/ 1262 } 1263 dst_hold(&rt->dst); 1264 cork->dst = &rt->dst; 1265 inet->cork.fl.u.ip6 = *fl6; 1266 np->cork.hop_limit = hlimit; 1267 np->cork.tclass = tclass; 1268 if (rt->dst.flags & DST_XFRM_TUNNEL) 1269 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1270 rt->dst.dev->mtu : dst_mtu(&rt->dst); 1271 else 1272 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1273 rt->dst.dev->mtu : dst_mtu(rt->dst.path); 1274 if (np->frag_size < mtu) { 1275 if (np->frag_size) 1276 mtu = np->frag_size; 1277 } 1278 cork->fragsize = mtu; 1279 if (dst_allfrag(rt->dst.path)) 1280 cork->flags |= IPCORK_ALLFRAG; 1281 cork->length = 0; 1282 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; 1283 length += exthdrlen; 1284 transhdrlen += exthdrlen; 1285 dst_exthdrlen = rt->dst.header_len; 1286 } else { 1287 rt = (struct rt6_info *)cork->dst; 1288 fl6 = &inet->cork.fl.u.ip6; 1289 opt = np->cork.opt; 1290 transhdrlen = 0; 1291 exthdrlen = 0; 1292 dst_exthdrlen = 0; 1293 mtu = cork->fragsize; 1294 } 1295 1296 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1297 1298 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1299 (opt ? opt->opt_nflen : 0); 1300 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 1301 1302 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1303 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { 1304 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); 1305 return -EMSGSIZE; 1306 } 1307 } 1308 1309 /* For UDP, check if TX timestamp is enabled */ 1310 if (sk->sk_type == SOCK_DGRAM) { 1311 err = sock_tx_timestamp(sk, &tx_flags); 1312 if (err) 1313 goto error; 1314 } 1315 1316 /* 1317 * Let's try using as much space as possible. 1318 * Use MTU if total length of the message fits into the MTU. 1319 * Otherwise, we need to reserve fragment header and 1320 * fragment alignment (= 8-15 octects, in total). 1321 * 1322 * Note that we may need to "move" the data from the tail of 1323 * of the buffer to the new fragment when we split 1324 * the message. 1325 * 1326 * FIXME: It may be fragmented into multiple chunks 1327 * at once if non-fragmentable extension headers 1328 * are too large. 1329 * --yoshfuji 1330 */ 1331 1332 cork->length += length; 1333 if (length > mtu) { 1334 int proto = sk->sk_protocol; 1335 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ 1336 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); 1337 return -EMSGSIZE; 1338 } 1339 1340 if (proto == IPPROTO_UDP && 1341 (rt->dst.dev->features & NETIF_F_UFO)) { 1342 1343 err = ip6_ufo_append_data(sk, getfrag, from, length, 1344 hh_len, fragheaderlen, 1345 transhdrlen, mtu, flags, rt); 1346 if (err) 1347 goto error; 1348 return 0; 1349 } 1350 } 1351 1352 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1353 goto alloc_new_skb; 1354 1355 while (length > 0) { 1356 /* Check if the remaining data fits into current packet. */ 1357 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1358 if (copy < length) 1359 copy = maxfraglen - skb->len; 1360 1361 if (copy <= 0) { 1362 char *data; 1363 unsigned int datalen; 1364 unsigned int fraglen; 1365 unsigned int fraggap; 1366 unsigned int alloclen; 1367 alloc_new_skb: 1368 /* There's no room in the current skb */ 1369 if (skb) 1370 fraggap = skb->len - maxfraglen; 1371 else 1372 fraggap = 0; 1373 /* update mtu and maxfraglen if necessary */ 1374 if (skb == NULL || skb_prev == NULL) 1375 ip6_append_data_mtu(&mtu, &maxfraglen, 1376 fragheaderlen, skb, rt); 1377 1378 skb_prev = skb; 1379 1380 /* 1381 * If remaining data exceeds the mtu, 1382 * we know we need more fragment(s). 1383 */ 1384 datalen = length + fraggap; 1385 1386 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1387 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; 1388 if ((flags & MSG_MORE) && 1389 !(rt->dst.dev->features&NETIF_F_SG)) 1390 alloclen = mtu; 1391 else 1392 alloclen = datalen + fragheaderlen; 1393 1394 alloclen += dst_exthdrlen; 1395 1396 if (datalen != length + fraggap) { 1397 /* 1398 * this is not the last fragment, the trailer 1399 * space is regarded as data space. 1400 */ 1401 datalen += rt->dst.trailer_len; 1402 } 1403 1404 alloclen += rt->dst.trailer_len; 1405 fraglen = datalen + fragheaderlen; 1406 1407 /* 1408 * We just reserve space for fragment header. 1409 * Note: this may be overallocation if the message 1410 * (without MSG_MORE) fits into the MTU. 1411 */ 1412 alloclen += sizeof(struct frag_hdr); 1413 1414 if (transhdrlen) { 1415 skb = sock_alloc_send_skb(sk, 1416 alloclen + hh_len, 1417 (flags & MSG_DONTWAIT), &err); 1418 } else { 1419 skb = NULL; 1420 if (atomic_read(&sk->sk_wmem_alloc) <= 1421 2 * sk->sk_sndbuf) 1422 skb = sock_wmalloc(sk, 1423 alloclen + hh_len, 1, 1424 sk->sk_allocation); 1425 if (unlikely(skb == NULL)) 1426 err = -ENOBUFS; 1427 else { 1428 /* Only the initial fragment 1429 * is time stamped. 1430 */ 1431 tx_flags = 0; 1432 } 1433 } 1434 if (skb == NULL) 1435 goto error; 1436 /* 1437 * Fill in the control structures 1438 */ 1439 skb->ip_summed = CHECKSUM_NONE; 1440 skb->csum = 0; 1441 /* reserve for fragmentation and ipsec header */ 1442 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 1443 dst_exthdrlen); 1444 1445 if (sk->sk_type == SOCK_DGRAM) 1446 skb_shinfo(skb)->tx_flags = tx_flags; 1447 1448 /* 1449 * Find where to start putting bytes 1450 */ 1451 data = skb_put(skb, fraglen); 1452 skb_set_network_header(skb, exthdrlen); 1453 data += fragheaderlen; 1454 skb->transport_header = (skb->network_header + 1455 fragheaderlen); 1456 if (fraggap) { 1457 skb->csum = skb_copy_and_csum_bits( 1458 skb_prev, maxfraglen, 1459 data + transhdrlen, fraggap, 0); 1460 skb_prev->csum = csum_sub(skb_prev->csum, 1461 skb->csum); 1462 data += fraggap; 1463 pskb_trim_unique(skb_prev, maxfraglen); 1464 } 1465 copy = datalen - transhdrlen - fraggap; 1466 1467 if (copy < 0) { 1468 err = -EINVAL; 1469 kfree_skb(skb); 1470 goto error; 1471 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 1472 err = -EFAULT; 1473 kfree_skb(skb); 1474 goto error; 1475 } 1476 1477 offset += copy; 1478 length -= datalen - fraggap; 1479 transhdrlen = 0; 1480 exthdrlen = 0; 1481 dst_exthdrlen = 0; 1482 1483 /* 1484 * Put the packet on the pending queue 1485 */ 1486 __skb_queue_tail(&sk->sk_write_queue, skb); 1487 continue; 1488 } 1489 1490 if (copy > length) 1491 copy = length; 1492 1493 if (!(rt->dst.dev->features&NETIF_F_SG)) { 1494 unsigned int off; 1495 1496 off = skb->len; 1497 if (getfrag(from, skb_put(skb, copy), 1498 offset, copy, off, skb) < 0) { 1499 __skb_trim(skb, off); 1500 err = -EFAULT; 1501 goto error; 1502 } 1503 } else { 1504 int i = skb_shinfo(skb)->nr_frags; 1505 struct page_frag *pfrag = sk_page_frag(sk); 1506 1507 err = -ENOMEM; 1508 if (!sk_page_frag_refill(sk, pfrag)) 1509 goto error; 1510 1511 if (!skb_can_coalesce(skb, i, pfrag->page, 1512 pfrag->offset)) { 1513 err = -EMSGSIZE; 1514 if (i == MAX_SKB_FRAGS) 1515 goto error; 1516 1517 __skb_fill_page_desc(skb, i, pfrag->page, 1518 pfrag->offset, 0); 1519 skb_shinfo(skb)->nr_frags = ++i; 1520 get_page(pfrag->page); 1521 } 1522 copy = min_t(int, copy, pfrag->size - pfrag->offset); 1523 if (getfrag(from, 1524 page_address(pfrag->page) + pfrag->offset, 1525 offset, copy, skb->len, skb) < 0) 1526 goto error_efault; 1527 1528 pfrag->offset += copy; 1529 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1530 skb->len += copy; 1531 skb->data_len += copy; 1532 skb->truesize += copy; 1533 atomic_add(copy, &sk->sk_wmem_alloc); 1534 } 1535 offset += copy; 1536 length -= copy; 1537 } 1538 1539 return 0; 1540 1541 error_efault: 1542 err = -EFAULT; 1543 error: 1544 cork->length -= length; 1545 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1546 return err; 1547 } 1548 EXPORT_SYMBOL_GPL(ip6_append_data); 1549 1550 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) 1551 { 1552 if (np->cork.opt) { 1553 kfree(np->cork.opt->dst0opt); 1554 kfree(np->cork.opt->dst1opt); 1555 kfree(np->cork.opt->hopopt); 1556 kfree(np->cork.opt->srcrt); 1557 kfree(np->cork.opt); 1558 np->cork.opt = NULL; 1559 } 1560 1561 if (inet->cork.base.dst) { 1562 dst_release(inet->cork.base.dst); 1563 inet->cork.base.dst = NULL; 1564 inet->cork.base.flags &= ~IPCORK_ALLFRAG; 1565 } 1566 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); 1567 } 1568 1569 int ip6_push_pending_frames(struct sock *sk) 1570 { 1571 struct sk_buff *skb, *tmp_skb; 1572 struct sk_buff **tail_skb; 1573 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1574 struct inet_sock *inet = inet_sk(sk); 1575 struct ipv6_pinfo *np = inet6_sk(sk); 1576 struct net *net = sock_net(sk); 1577 struct ipv6hdr *hdr; 1578 struct ipv6_txoptions *opt = np->cork.opt; 1579 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst; 1580 struct flowi6 *fl6 = &inet->cork.fl.u.ip6; 1581 unsigned char proto = fl6->flowi6_proto; 1582 int err = 0; 1583 1584 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1585 goto out; 1586 tail_skb = &(skb_shinfo(skb)->frag_list); 1587 1588 /* move skb->data to ip header from ext header */ 1589 if (skb->data < skb_network_header(skb)) 1590 __skb_pull(skb, skb_network_offset(skb)); 1591 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1592 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1593 *tail_skb = tmp_skb; 1594 tail_skb = &(tmp_skb->next); 1595 skb->len += tmp_skb->len; 1596 skb->data_len += tmp_skb->len; 1597 skb->truesize += tmp_skb->truesize; 1598 tmp_skb->destructor = NULL; 1599 tmp_skb->sk = NULL; 1600 } 1601 1602 /* Allow local fragmentation. */ 1603 if (np->pmtudisc < IPV6_PMTUDISC_DO) 1604 skb->local_df = 1; 1605 1606 *final_dst = fl6->daddr; 1607 __skb_pull(skb, skb_network_header_len(skb)); 1608 if (opt && opt->opt_flen) 1609 ipv6_push_frag_opts(skb, opt, &proto); 1610 if (opt && opt->opt_nflen) 1611 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 1612 1613 skb_push(skb, sizeof(struct ipv6hdr)); 1614 skb_reset_network_header(skb); 1615 hdr = ipv6_hdr(skb); 1616 1617 *(__be32*)hdr = fl6->flowlabel | 1618 htonl(0x60000000 | ((int)np->cork.tclass << 20)); 1619 1620 hdr->hop_limit = np->cork.hop_limit; 1621 hdr->nexthdr = proto; 1622 hdr->saddr = fl6->saddr; 1623 hdr->daddr = *final_dst; 1624 1625 skb->priority = sk->sk_priority; 1626 skb->mark = sk->sk_mark; 1627 1628 skb_dst_set(skb, dst_clone(&rt->dst)); 1629 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1630 if (proto == IPPROTO_ICMPV6) { 1631 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1632 1633 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); 1634 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); 1635 } 1636 1637 err = ip6_local_out(skb); 1638 if (err) { 1639 if (err > 0) 1640 err = net_xmit_errno(err); 1641 if (err) 1642 goto error; 1643 } 1644 1645 out: 1646 ip6_cork_release(inet, np); 1647 return err; 1648 error: 1649 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1650 goto out; 1651 } 1652 EXPORT_SYMBOL_GPL(ip6_push_pending_frames); 1653 1654 void ip6_flush_pending_frames(struct sock *sk) 1655 { 1656 struct sk_buff *skb; 1657 1658 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { 1659 if (skb_dst(skb)) 1660 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1661 IPSTATS_MIB_OUTDISCARDS); 1662 kfree_skb(skb); 1663 } 1664 1665 ip6_cork_release(inet_sk(sk), inet6_sk(sk)); 1666 } 1667 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); 1668