1 /* 2 * IPv6 output functions 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * Based on linux/net/ipv4/ip_output.c 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 * 15 * Changes: 16 * A.N.Kuznetsov : airthmetics in fragmentation. 17 * extension headers are implemented. 18 * route changes now work. 19 * ip6_forward does not confuse sniffers. 20 * etc. 21 * 22 * H. von Brand : Added missing #include <linux/string.h> 23 * Imran Patel : frag id should be in NBO 24 * Kazunori MIYAZAWA @USAGI 25 * : add ip6_append_data and related functions 26 * for datagram xmit 27 */ 28 29 #include <linux/errno.h> 30 #include <linux/kernel.h> 31 #include <linux/string.h> 32 #include <linux/socket.h> 33 #include <linux/net.h> 34 #include <linux/netdevice.h> 35 #include <linux/if_arp.h> 36 #include <linux/in6.h> 37 #include <linux/tcp.h> 38 #include <linux/route.h> 39 #include <linux/module.h> 40 #include <linux/slab.h> 41 42 #include <linux/netfilter.h> 43 #include <linux/netfilter_ipv6.h> 44 45 #include <net/sock.h> 46 #include <net/snmp.h> 47 48 #include <net/ipv6.h> 49 #include <net/ndisc.h> 50 #include <net/protocol.h> 51 #include <net/ip6_route.h> 52 #include <net/addrconf.h> 53 #include <net/rawv6.h> 54 #include <net/icmp.h> 55 #include <net/xfrm.h> 56 #include <net/checksum.h> 57 #include <linux/mroute6.h> 58 59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); 60 61 int __ip6_local_out(struct sk_buff *skb) 62 { 63 int len; 64 65 len = skb->len - sizeof(struct ipv6hdr); 66 if (len > IPV6_MAXPLEN) 67 len = 0; 68 ipv6_hdr(skb)->payload_len = htons(len); 69 70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 71 skb_dst(skb)->dev, dst_output); 72 } 73 74 int ip6_local_out(struct sk_buff *skb) 75 { 76 int err; 77 78 err = __ip6_local_out(skb); 79 if (likely(err == 1)) 80 err = dst_output(skb); 81 82 return err; 83 } 84 EXPORT_SYMBOL_GPL(ip6_local_out); 85 86 static int ip6_finish_output2(struct sk_buff *skb) 87 { 88 struct dst_entry *dst = skb_dst(skb); 89 struct net_device *dev = dst->dev; 90 struct neighbour *neigh; 91 struct rt6_info *rt; 92 93 skb->protocol = htons(ETH_P_IPV6); 94 skb->dev = dev; 95 96 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 97 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 98 99 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && 100 ((mroute6_socket(dev_net(dev), skb) && 101 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 102 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 103 &ipv6_hdr(skb)->saddr))) { 104 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 105 106 /* Do not check for IFF_ALLMULTI; multicast routing 107 is not supported in any case. 108 */ 109 if (newskb) 110 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 111 newskb, NULL, newskb->dev, 112 dev_loopback_xmit); 113 114 if (ipv6_hdr(skb)->hop_limit == 0) { 115 IP6_INC_STATS(dev_net(dev), idev, 116 IPSTATS_MIB_OUTDISCARDS); 117 kfree_skb(skb); 118 return 0; 119 } 120 } 121 122 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, 123 skb->len); 124 } 125 126 rt = (struct rt6_info *) dst; 127 neigh = rt->n; 128 if (neigh) 129 return dst_neigh_output(dst, neigh, skb); 130 131 IP6_INC_STATS_BH(dev_net(dst->dev), 132 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 133 kfree_skb(skb); 134 return -EINVAL; 135 } 136 137 static int ip6_finish_output(struct sk_buff *skb) 138 { 139 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 140 dst_allfrag(skb_dst(skb))) 141 return ip6_fragment(skb, ip6_finish_output2); 142 else 143 return ip6_finish_output2(skb); 144 } 145 146 int ip6_output(struct sk_buff *skb) 147 { 148 struct net_device *dev = skb_dst(skb)->dev; 149 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 150 if (unlikely(idev->cnf.disable_ipv6)) { 151 IP6_INC_STATS(dev_net(dev), idev, 152 IPSTATS_MIB_OUTDISCARDS); 153 kfree_skb(skb); 154 return 0; 155 } 156 157 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, 158 ip6_finish_output, 159 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 160 } 161 162 /* 163 * xmit an sk_buff (used by TCP, SCTP and DCCP) 164 */ 165 166 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 167 struct ipv6_txoptions *opt, int tclass) 168 { 169 struct net *net = sock_net(sk); 170 struct ipv6_pinfo *np = inet6_sk(sk); 171 struct in6_addr *first_hop = &fl6->daddr; 172 struct dst_entry *dst = skb_dst(skb); 173 struct ipv6hdr *hdr; 174 u8 proto = fl6->flowi6_proto; 175 int seg_len = skb->len; 176 int hlimit = -1; 177 u32 mtu; 178 179 if (opt) { 180 unsigned int head_room; 181 182 /* First: exthdrs may take lots of space (~8K for now) 183 MAX_HEADER is not enough. 184 */ 185 head_room = opt->opt_nflen + opt->opt_flen; 186 seg_len += head_room; 187 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 188 189 if (skb_headroom(skb) < head_room) { 190 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 191 if (skb2 == NULL) { 192 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 193 IPSTATS_MIB_OUTDISCARDS); 194 kfree_skb(skb); 195 return -ENOBUFS; 196 } 197 consume_skb(skb); 198 skb = skb2; 199 skb_set_owner_w(skb, sk); 200 } 201 if (opt->opt_flen) 202 ipv6_push_frag_opts(skb, opt, &proto); 203 if (opt->opt_nflen) 204 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 205 } 206 207 skb_push(skb, sizeof(struct ipv6hdr)); 208 skb_reset_network_header(skb); 209 hdr = ipv6_hdr(skb); 210 211 /* 212 * Fill in the IPv6 header 213 */ 214 if (np) 215 hlimit = np->hop_limit; 216 if (hlimit < 0) 217 hlimit = ip6_dst_hoplimit(dst); 218 219 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel; 220 221 hdr->payload_len = htons(seg_len); 222 hdr->nexthdr = proto; 223 hdr->hop_limit = hlimit; 224 225 hdr->saddr = fl6->saddr; 226 hdr->daddr = *first_hop; 227 228 skb->priority = sk->sk_priority; 229 skb->mark = sk->sk_mark; 230 231 mtu = dst_mtu(dst); 232 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { 233 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 234 IPSTATS_MIB_OUT, skb->len); 235 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 236 dst->dev, dst_output); 237 } 238 239 net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n"); 240 skb->dev = dst->dev; 241 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 242 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 243 kfree_skb(skb); 244 return -EMSGSIZE; 245 } 246 247 EXPORT_SYMBOL(ip6_xmit); 248 249 /* 250 * To avoid extra problems ND packets are send through this 251 * routine. It's code duplication but I really want to avoid 252 * extra checks since ipv6_build_header is used by TCP (which 253 * is for us performance critical) 254 */ 255 256 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, 257 const struct in6_addr *saddr, const struct in6_addr *daddr, 258 int proto, int len) 259 { 260 struct ipv6_pinfo *np = inet6_sk(sk); 261 struct ipv6hdr *hdr; 262 263 skb->protocol = htons(ETH_P_IPV6); 264 skb->dev = dev; 265 266 skb_reset_network_header(skb); 267 skb_put(skb, sizeof(struct ipv6hdr)); 268 hdr = ipv6_hdr(skb); 269 270 *(__be32*)hdr = htonl(0x60000000); 271 272 hdr->payload_len = htons(len); 273 hdr->nexthdr = proto; 274 hdr->hop_limit = np->hop_limit; 275 276 hdr->saddr = *saddr; 277 hdr->daddr = *daddr; 278 279 return 0; 280 } 281 282 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 283 { 284 struct ip6_ra_chain *ra; 285 struct sock *last = NULL; 286 287 read_lock(&ip6_ra_lock); 288 for (ra = ip6_ra_chain; ra; ra = ra->next) { 289 struct sock *sk = ra->sk; 290 if (sk && ra->sel == sel && 291 (!sk->sk_bound_dev_if || 292 sk->sk_bound_dev_if == skb->dev->ifindex)) { 293 if (last) { 294 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 295 if (skb2) 296 rawv6_rcv(last, skb2); 297 } 298 last = sk; 299 } 300 } 301 302 if (last) { 303 rawv6_rcv(last, skb); 304 read_unlock(&ip6_ra_lock); 305 return 1; 306 } 307 read_unlock(&ip6_ra_lock); 308 return 0; 309 } 310 311 static int ip6_forward_proxy_check(struct sk_buff *skb) 312 { 313 struct ipv6hdr *hdr = ipv6_hdr(skb); 314 u8 nexthdr = hdr->nexthdr; 315 __be16 frag_off; 316 int offset; 317 318 if (ipv6_ext_hdr(nexthdr)) { 319 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 320 if (offset < 0) 321 return 0; 322 } else 323 offset = sizeof(struct ipv6hdr); 324 325 if (nexthdr == IPPROTO_ICMPV6) { 326 struct icmp6hdr *icmp6; 327 328 if (!pskb_may_pull(skb, (skb_network_header(skb) + 329 offset + 1 - skb->data))) 330 return 0; 331 332 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 333 334 switch (icmp6->icmp6_type) { 335 case NDISC_ROUTER_SOLICITATION: 336 case NDISC_ROUTER_ADVERTISEMENT: 337 case NDISC_NEIGHBOUR_SOLICITATION: 338 case NDISC_NEIGHBOUR_ADVERTISEMENT: 339 case NDISC_REDIRECT: 340 /* For reaction involving unicast neighbor discovery 341 * message destined to the proxied address, pass it to 342 * input function. 343 */ 344 return 1; 345 default: 346 break; 347 } 348 } 349 350 /* 351 * The proxying router can't forward traffic sent to a link-local 352 * address, so signal the sender and discard the packet. This 353 * behavior is clarified by the MIPv6 specification. 354 */ 355 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 356 dst_link_failure(skb); 357 return -1; 358 } 359 360 return 0; 361 } 362 363 static inline int ip6_forward_finish(struct sk_buff *skb) 364 { 365 return dst_output(skb); 366 } 367 368 int ip6_forward(struct sk_buff *skb) 369 { 370 struct dst_entry *dst = skb_dst(skb); 371 struct ipv6hdr *hdr = ipv6_hdr(skb); 372 struct inet6_skb_parm *opt = IP6CB(skb); 373 struct net *net = dev_net(dst->dev); 374 u32 mtu; 375 376 if (net->ipv6.devconf_all->forwarding == 0) 377 goto error; 378 379 if (skb_warn_if_lro(skb)) 380 goto drop; 381 382 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 383 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 384 goto drop; 385 } 386 387 if (skb->pkt_type != PACKET_HOST) 388 goto drop; 389 390 skb_forward_csum(skb); 391 392 /* 393 * We DO NOT make any processing on 394 * RA packets, pushing them to user level AS IS 395 * without ane WARRANTY that application will be able 396 * to interpret them. The reason is that we 397 * cannot make anything clever here. 398 * 399 * We are not end-node, so that if packet contains 400 * AH/ESP, we cannot make anything. 401 * Defragmentation also would be mistake, RA packets 402 * cannot be fragmented, because there is no warranty 403 * that different fragments will go along one path. --ANK 404 */ 405 if (opt->ra) { 406 u8 *ptr = skb_network_header(skb) + opt->ra; 407 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) 408 return 0; 409 } 410 411 /* 412 * check and decrement ttl 413 */ 414 if (hdr->hop_limit <= 1) { 415 /* Force OUTPUT device used as source address */ 416 skb->dev = dst->dev; 417 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 418 IP6_INC_STATS_BH(net, 419 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); 420 421 kfree_skb(skb); 422 return -ETIMEDOUT; 423 } 424 425 /* XXX: idev->cnf.proxy_ndp? */ 426 if (net->ipv6.devconf_all->proxy_ndp && 427 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 428 int proxied = ip6_forward_proxy_check(skb); 429 if (proxied > 0) 430 return ip6_input(skb); 431 else if (proxied < 0) { 432 IP6_INC_STATS(net, ip6_dst_idev(dst), 433 IPSTATS_MIB_INDISCARDS); 434 goto drop; 435 } 436 } 437 438 if (!xfrm6_route_forward(skb)) { 439 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 440 goto drop; 441 } 442 dst = skb_dst(skb); 443 444 /* IPv6 specs say nothing about it, but it is clear that we cannot 445 send redirects to source routed frames. 446 We don't send redirects to frames decapsulated from IPsec. 447 */ 448 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) { 449 struct in6_addr *target = NULL; 450 struct inet_peer *peer; 451 struct rt6_info *rt; 452 453 /* 454 * incoming and outgoing devices are the same 455 * send a redirect. 456 */ 457 458 rt = (struct rt6_info *) dst; 459 if (rt->rt6i_flags & RTF_GATEWAY) 460 target = &rt->rt6i_gateway; 461 else 462 target = &hdr->daddr; 463 464 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); 465 466 /* Limit redirects both by destination (here) 467 and by source (inside ndisc_send_redirect) 468 */ 469 if (inet_peer_xrlim_allow(peer, 1*HZ)) 470 ndisc_send_redirect(skb, target); 471 if (peer) 472 inet_putpeer(peer); 473 } else { 474 int addrtype = ipv6_addr_type(&hdr->saddr); 475 476 /* This check is security critical. */ 477 if (addrtype == IPV6_ADDR_ANY || 478 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 479 goto error; 480 if (addrtype & IPV6_ADDR_LINKLOCAL) { 481 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 482 ICMPV6_NOT_NEIGHBOUR, 0); 483 goto error; 484 } 485 } 486 487 mtu = dst_mtu(dst); 488 if (mtu < IPV6_MIN_MTU) 489 mtu = IPV6_MIN_MTU; 490 491 if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) || 492 (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { 493 /* Again, force OUTPUT device used as source address */ 494 skb->dev = dst->dev; 495 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 496 IP6_INC_STATS_BH(net, 497 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); 498 IP6_INC_STATS_BH(net, 499 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); 500 kfree_skb(skb); 501 return -EMSGSIZE; 502 } 503 504 if (skb_cow(skb, dst->dev->hard_header_len)) { 505 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); 506 goto drop; 507 } 508 509 hdr = ipv6_hdr(skb); 510 511 /* Mangling hops number delayed to point after skb COW */ 512 513 hdr->hop_limit--; 514 515 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 516 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); 517 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, 518 ip6_forward_finish); 519 520 error: 521 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 522 drop: 523 kfree_skb(skb); 524 return -EINVAL; 525 } 526 527 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 528 { 529 to->pkt_type = from->pkt_type; 530 to->priority = from->priority; 531 to->protocol = from->protocol; 532 skb_dst_drop(to); 533 skb_dst_set(to, dst_clone(skb_dst(from))); 534 to->dev = from->dev; 535 to->mark = from->mark; 536 537 #ifdef CONFIG_NET_SCHED 538 to->tc_index = from->tc_index; 539 #endif 540 nf_copy(to, from); 541 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) 542 to->nf_trace = from->nf_trace; 543 #endif 544 skb_copy_secmark(to, from); 545 } 546 547 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 548 { 549 struct sk_buff *frag; 550 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); 551 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 552 struct ipv6hdr *tmp_hdr; 553 struct frag_hdr *fh; 554 unsigned int mtu, hlen, left, len; 555 int hroom, troom; 556 __be32 frag_id = 0; 557 int ptr, offset = 0, err=0; 558 u8 *prevhdr, nexthdr = 0; 559 struct net *net = dev_net(skb_dst(skb)->dev); 560 561 hlen = ip6_find_1stfragopt(skb, &prevhdr); 562 nexthdr = *prevhdr; 563 564 mtu = ip6_skb_dst_mtu(skb); 565 566 /* We must not fragment if the socket is set to force MTU discovery 567 * or if the skb it not generated by a local socket. 568 */ 569 if (unlikely(!skb->local_df && skb->len > mtu) || 570 (IP6CB(skb)->frag_max_size && 571 IP6CB(skb)->frag_max_size > mtu)) { 572 if (skb->sk && dst_allfrag(skb_dst(skb))) 573 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); 574 575 skb->dev = skb_dst(skb)->dev; 576 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 577 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 578 IPSTATS_MIB_FRAGFAILS); 579 kfree_skb(skb); 580 return -EMSGSIZE; 581 } 582 583 if (np && np->frag_size < mtu) { 584 if (np->frag_size) 585 mtu = np->frag_size; 586 } 587 mtu -= hlen + sizeof(struct frag_hdr); 588 589 if (skb_has_frag_list(skb)) { 590 int first_len = skb_pagelen(skb); 591 struct sk_buff *frag2; 592 593 if (first_len - hlen > mtu || 594 ((first_len - hlen) & 7) || 595 skb_cloned(skb)) 596 goto slow_path; 597 598 skb_walk_frags(skb, frag) { 599 /* Correct geometry. */ 600 if (frag->len > mtu || 601 ((frag->len & 7) && frag->next) || 602 skb_headroom(frag) < hlen) 603 goto slow_path_clean; 604 605 /* Partially cloned skb? */ 606 if (skb_shared(frag)) 607 goto slow_path_clean; 608 609 BUG_ON(frag->sk); 610 if (skb->sk) { 611 frag->sk = skb->sk; 612 frag->destructor = sock_wfree; 613 } 614 skb->truesize -= frag->truesize; 615 } 616 617 err = 0; 618 offset = 0; 619 frag = skb_shinfo(skb)->frag_list; 620 skb_frag_list_init(skb); 621 /* BUILD HEADER */ 622 623 *prevhdr = NEXTHDR_FRAGMENT; 624 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 625 if (!tmp_hdr) { 626 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 627 IPSTATS_MIB_FRAGFAILS); 628 return -ENOMEM; 629 } 630 631 __skb_pull(skb, hlen); 632 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); 633 __skb_push(skb, hlen); 634 skb_reset_network_header(skb); 635 memcpy(skb_network_header(skb), tmp_hdr, hlen); 636 637 ipv6_select_ident(fh, rt); 638 fh->nexthdr = nexthdr; 639 fh->reserved = 0; 640 fh->frag_off = htons(IP6_MF); 641 frag_id = fh->identification; 642 643 first_len = skb_pagelen(skb); 644 skb->data_len = first_len - skb_headlen(skb); 645 skb->len = first_len; 646 ipv6_hdr(skb)->payload_len = htons(first_len - 647 sizeof(struct ipv6hdr)); 648 649 dst_hold(&rt->dst); 650 651 for (;;) { 652 /* Prepare header of the next frame, 653 * before previous one went down. */ 654 if (frag) { 655 frag->ip_summed = CHECKSUM_NONE; 656 skb_reset_transport_header(frag); 657 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); 658 __skb_push(frag, hlen); 659 skb_reset_network_header(frag); 660 memcpy(skb_network_header(frag), tmp_hdr, 661 hlen); 662 offset += skb->len - hlen - sizeof(struct frag_hdr); 663 fh->nexthdr = nexthdr; 664 fh->reserved = 0; 665 fh->frag_off = htons(offset); 666 if (frag->next != NULL) 667 fh->frag_off |= htons(IP6_MF); 668 fh->identification = frag_id; 669 ipv6_hdr(frag)->payload_len = 670 htons(frag->len - 671 sizeof(struct ipv6hdr)); 672 ip6_copy_metadata(frag, skb); 673 } 674 675 err = output(skb); 676 if(!err) 677 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 678 IPSTATS_MIB_FRAGCREATES); 679 680 if (err || !frag) 681 break; 682 683 skb = frag; 684 frag = skb->next; 685 skb->next = NULL; 686 } 687 688 kfree(tmp_hdr); 689 690 if (err == 0) { 691 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 692 IPSTATS_MIB_FRAGOKS); 693 ip6_rt_put(rt); 694 return 0; 695 } 696 697 while (frag) { 698 skb = frag->next; 699 kfree_skb(frag); 700 frag = skb; 701 } 702 703 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 704 IPSTATS_MIB_FRAGFAILS); 705 ip6_rt_put(rt); 706 return err; 707 708 slow_path_clean: 709 skb_walk_frags(skb, frag2) { 710 if (frag2 == frag) 711 break; 712 frag2->sk = NULL; 713 frag2->destructor = NULL; 714 skb->truesize += frag2->truesize; 715 } 716 } 717 718 slow_path: 719 if ((skb->ip_summed == CHECKSUM_PARTIAL) && 720 skb_checksum_help(skb)) 721 goto fail; 722 723 left = skb->len - hlen; /* Space per frame */ 724 ptr = hlen; /* Where to start from */ 725 726 /* 727 * Fragment the datagram. 728 */ 729 730 *prevhdr = NEXTHDR_FRAGMENT; 731 hroom = LL_RESERVED_SPACE(rt->dst.dev); 732 troom = rt->dst.dev->needed_tailroom; 733 734 /* 735 * Keep copying data until we run out. 736 */ 737 while(left > 0) { 738 len = left; 739 /* IF: it doesn't fit, use 'mtu' - the data space left */ 740 if (len > mtu) 741 len = mtu; 742 /* IF: we are not sending up to and including the packet end 743 then align the next start on an eight byte boundary */ 744 if (len < left) { 745 len &= ~7; 746 } 747 /* 748 * Allocate buffer. 749 */ 750 751 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + 752 hroom + troom, GFP_ATOMIC)) == NULL) { 753 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); 754 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 755 IPSTATS_MIB_FRAGFAILS); 756 err = -ENOMEM; 757 goto fail; 758 } 759 760 /* 761 * Set up data on packet 762 */ 763 764 ip6_copy_metadata(frag, skb); 765 skb_reserve(frag, hroom); 766 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 767 skb_reset_network_header(frag); 768 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 769 frag->transport_header = (frag->network_header + hlen + 770 sizeof(struct frag_hdr)); 771 772 /* 773 * Charge the memory for the fragment to any owner 774 * it might possess 775 */ 776 if (skb->sk) 777 skb_set_owner_w(frag, skb->sk); 778 779 /* 780 * Copy the packet header into the new buffer. 781 */ 782 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); 783 784 /* 785 * Build fragment header. 786 */ 787 fh->nexthdr = nexthdr; 788 fh->reserved = 0; 789 if (!frag_id) { 790 ipv6_select_ident(fh, rt); 791 frag_id = fh->identification; 792 } else 793 fh->identification = frag_id; 794 795 /* 796 * Copy a block of the IP datagram. 797 */ 798 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) 799 BUG(); 800 left -= len; 801 802 fh->frag_off = htons(offset); 803 if (left > 0) 804 fh->frag_off |= htons(IP6_MF); 805 ipv6_hdr(frag)->payload_len = htons(frag->len - 806 sizeof(struct ipv6hdr)); 807 808 ptr += len; 809 offset += len; 810 811 /* 812 * Put this fragment into the sending queue. 813 */ 814 err = output(frag); 815 if (err) 816 goto fail; 817 818 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 819 IPSTATS_MIB_FRAGCREATES); 820 } 821 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 822 IPSTATS_MIB_FRAGOKS); 823 consume_skb(skb); 824 return err; 825 826 fail: 827 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 828 IPSTATS_MIB_FRAGFAILS); 829 kfree_skb(skb); 830 return err; 831 } 832 833 static inline int ip6_rt_check(const struct rt6key *rt_key, 834 const struct in6_addr *fl_addr, 835 const struct in6_addr *addr_cache) 836 { 837 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 838 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); 839 } 840 841 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 842 struct dst_entry *dst, 843 const struct flowi6 *fl6) 844 { 845 struct ipv6_pinfo *np = inet6_sk(sk); 846 struct rt6_info *rt = (struct rt6_info *)dst; 847 848 if (!dst) 849 goto out; 850 851 /* Yes, checking route validity in not connected 852 * case is not very simple. Take into account, 853 * that we do not support routing by source, TOS, 854 * and MSG_DONTROUTE --ANK (980726) 855 * 856 * 1. ip6_rt_check(): If route was host route, 857 * check that cached destination is current. 858 * If it is network route, we still may 859 * check its validity using saved pointer 860 * to the last used address: daddr_cache. 861 * We do not want to save whole address now, 862 * (because main consumer of this service 863 * is tcp, which has not this problem), 864 * so that the last trick works only on connected 865 * sockets. 866 * 2. oif also should be the same. 867 */ 868 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 869 #ifdef CONFIG_IPV6_SUBTREES 870 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 871 #endif 872 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { 873 dst_release(dst); 874 dst = NULL; 875 } 876 877 out: 878 return dst; 879 } 880 881 static int ip6_dst_lookup_tail(struct sock *sk, 882 struct dst_entry **dst, struct flowi6 *fl6) 883 { 884 struct net *net = sock_net(sk); 885 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 886 struct neighbour *n; 887 struct rt6_info *rt; 888 #endif 889 int err; 890 891 if (*dst == NULL) 892 *dst = ip6_route_output(net, sk, fl6); 893 894 if ((err = (*dst)->error)) 895 goto out_err_release; 896 897 if (ipv6_addr_any(&fl6->saddr)) { 898 struct rt6_info *rt = (struct rt6_info *) *dst; 899 err = ip6_route_get_saddr(net, rt, &fl6->daddr, 900 sk ? inet6_sk(sk)->srcprefs : 0, 901 &fl6->saddr); 902 if (err) 903 goto out_err_release; 904 } 905 906 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 907 /* 908 * Here if the dst entry we've looked up 909 * has a neighbour entry that is in the INCOMPLETE 910 * state and the src address from the flow is 911 * marked as OPTIMISTIC, we release the found 912 * dst entry and replace it instead with the 913 * dst entry of the nexthop router 914 */ 915 rt = (struct rt6_info *) *dst; 916 n = rt->n; 917 if (n && !(n->nud_state & NUD_VALID)) { 918 struct inet6_ifaddr *ifp; 919 struct flowi6 fl_gw6; 920 int redirect; 921 922 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 923 (*dst)->dev, 1); 924 925 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 926 if (ifp) 927 in6_ifa_put(ifp); 928 929 if (redirect) { 930 /* 931 * We need to get the dst entry for the 932 * default router instead 933 */ 934 dst_release(*dst); 935 memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 936 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 937 *dst = ip6_route_output(net, sk, &fl_gw6); 938 if ((err = (*dst)->error)) 939 goto out_err_release; 940 } 941 } 942 #endif 943 944 return 0; 945 946 out_err_release: 947 if (err == -ENETUNREACH) 948 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); 949 dst_release(*dst); 950 *dst = NULL; 951 return err; 952 } 953 954 /** 955 * ip6_dst_lookup - perform route lookup on flow 956 * @sk: socket which provides route info 957 * @dst: pointer to dst_entry * for result 958 * @fl6: flow to lookup 959 * 960 * This function performs a route lookup on the given flow. 961 * 962 * It returns zero on success, or a standard errno code on error. 963 */ 964 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) 965 { 966 *dst = NULL; 967 return ip6_dst_lookup_tail(sk, dst, fl6); 968 } 969 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 970 971 /** 972 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 973 * @sk: socket which provides route info 974 * @fl6: flow to lookup 975 * @final_dst: final destination address for ipsec lookup 976 * @can_sleep: we are in a sleepable context 977 * 978 * This function performs a route lookup on the given flow. 979 * 980 * It returns a valid dst pointer on success, or a pointer encoded 981 * error code. 982 */ 983 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 984 const struct in6_addr *final_dst, 985 bool can_sleep) 986 { 987 struct dst_entry *dst = NULL; 988 int err; 989 990 err = ip6_dst_lookup_tail(sk, &dst, fl6); 991 if (err) 992 return ERR_PTR(err); 993 if (final_dst) 994 fl6->daddr = *final_dst; 995 if (can_sleep) 996 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; 997 998 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); 999 } 1000 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 1001 1002 /** 1003 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 1004 * @sk: socket which provides the dst cache and route info 1005 * @fl6: flow to lookup 1006 * @final_dst: final destination address for ipsec lookup 1007 * @can_sleep: we are in a sleepable context 1008 * 1009 * This function performs a route lookup on the given flow with the 1010 * possibility of using the cached route in the socket if it is valid. 1011 * It will take the socket dst lock when operating on the dst cache. 1012 * As a result, this function can only be used in process context. 1013 * 1014 * It returns a valid dst pointer on success, or a pointer encoded 1015 * error code. 1016 */ 1017 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1018 const struct in6_addr *final_dst, 1019 bool can_sleep) 1020 { 1021 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1022 int err; 1023 1024 dst = ip6_sk_dst_check(sk, dst, fl6); 1025 1026 err = ip6_dst_lookup_tail(sk, &dst, fl6); 1027 if (err) 1028 return ERR_PTR(err); 1029 if (final_dst) 1030 fl6->daddr = *final_dst; 1031 if (can_sleep) 1032 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; 1033 1034 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); 1035 } 1036 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1037 1038 static inline int ip6_ufo_append_data(struct sock *sk, 1039 int getfrag(void *from, char *to, int offset, int len, 1040 int odd, struct sk_buff *skb), 1041 void *from, int length, int hh_len, int fragheaderlen, 1042 int transhdrlen, int mtu,unsigned int flags, 1043 struct rt6_info *rt) 1044 1045 { 1046 struct sk_buff *skb; 1047 int err; 1048 1049 /* There is support for UDP large send offload by network 1050 * device, so create one single skb packet containing complete 1051 * udp datagram 1052 */ 1053 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 1054 skb = sock_alloc_send_skb(sk, 1055 hh_len + fragheaderlen + transhdrlen + 20, 1056 (flags & MSG_DONTWAIT), &err); 1057 if (skb == NULL) 1058 return err; 1059 1060 /* reserve space for Hardware header */ 1061 skb_reserve(skb, hh_len); 1062 1063 /* create space for UDP/IP header */ 1064 skb_put(skb,fragheaderlen + transhdrlen); 1065 1066 /* initialize network header pointer */ 1067 skb_reset_network_header(skb); 1068 1069 /* initialize protocol header pointer */ 1070 skb->transport_header = skb->network_header + fragheaderlen; 1071 1072 skb->ip_summed = CHECKSUM_PARTIAL; 1073 skb->csum = 0; 1074 } 1075 1076 err = skb_append_datato_frags(sk,skb, getfrag, from, 1077 (length - transhdrlen)); 1078 if (!err) { 1079 struct frag_hdr fhdr; 1080 1081 /* Specify the length of each IPv6 datagram fragment. 1082 * It has to be a multiple of 8. 1083 */ 1084 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - 1085 sizeof(struct frag_hdr)) & ~7; 1086 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1087 ipv6_select_ident(&fhdr, rt); 1088 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1089 __skb_queue_tail(&sk->sk_write_queue, skb); 1090 1091 return 0; 1092 } 1093 /* There is not enough support do UPD LSO, 1094 * so follow normal path 1095 */ 1096 kfree_skb(skb); 1097 1098 return err; 1099 } 1100 1101 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1102 gfp_t gfp) 1103 { 1104 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1105 } 1106 1107 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1108 gfp_t gfp) 1109 { 1110 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1111 } 1112 1113 static void ip6_append_data_mtu(int *mtu, 1114 int *maxfraglen, 1115 unsigned int fragheaderlen, 1116 struct sk_buff *skb, 1117 struct rt6_info *rt) 1118 { 1119 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { 1120 if (skb == NULL) { 1121 /* first fragment, reserve header_len */ 1122 *mtu = *mtu - rt->dst.header_len; 1123 1124 } else { 1125 /* 1126 * this fragment is not first, the headers 1127 * space is regarded as data space. 1128 */ 1129 *mtu = dst_mtu(rt->dst.path); 1130 } 1131 *maxfraglen = ((*mtu - fragheaderlen) & ~7) 1132 + fragheaderlen - sizeof(struct frag_hdr); 1133 } 1134 } 1135 1136 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 1137 int offset, int len, int odd, struct sk_buff *skb), 1138 void *from, int length, int transhdrlen, 1139 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, 1140 struct rt6_info *rt, unsigned int flags, int dontfrag) 1141 { 1142 struct inet_sock *inet = inet_sk(sk); 1143 struct ipv6_pinfo *np = inet6_sk(sk); 1144 struct inet_cork *cork; 1145 struct sk_buff *skb, *skb_prev = NULL; 1146 unsigned int maxfraglen, fragheaderlen; 1147 int exthdrlen; 1148 int dst_exthdrlen; 1149 int hh_len; 1150 int mtu; 1151 int copy; 1152 int err; 1153 int offset = 0; 1154 __u8 tx_flags = 0; 1155 1156 if (flags&MSG_PROBE) 1157 return 0; 1158 cork = &inet->cork.base; 1159 if (skb_queue_empty(&sk->sk_write_queue)) { 1160 /* 1161 * setup for corking 1162 */ 1163 if (opt) { 1164 if (WARN_ON(np->cork.opt)) 1165 return -EINVAL; 1166 1167 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); 1168 if (unlikely(np->cork.opt == NULL)) 1169 return -ENOBUFS; 1170 1171 np->cork.opt->tot_len = opt->tot_len; 1172 np->cork.opt->opt_flen = opt->opt_flen; 1173 np->cork.opt->opt_nflen = opt->opt_nflen; 1174 1175 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1176 sk->sk_allocation); 1177 if (opt->dst0opt && !np->cork.opt->dst0opt) 1178 return -ENOBUFS; 1179 1180 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1181 sk->sk_allocation); 1182 if (opt->dst1opt && !np->cork.opt->dst1opt) 1183 return -ENOBUFS; 1184 1185 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, 1186 sk->sk_allocation); 1187 if (opt->hopopt && !np->cork.opt->hopopt) 1188 return -ENOBUFS; 1189 1190 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1191 sk->sk_allocation); 1192 if (opt->srcrt && !np->cork.opt->srcrt) 1193 return -ENOBUFS; 1194 1195 /* need source address above miyazawa*/ 1196 } 1197 dst_hold(&rt->dst); 1198 cork->dst = &rt->dst; 1199 inet->cork.fl.u.ip6 = *fl6; 1200 np->cork.hop_limit = hlimit; 1201 np->cork.tclass = tclass; 1202 if (rt->dst.flags & DST_XFRM_TUNNEL) 1203 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1204 rt->dst.dev->mtu : dst_mtu(&rt->dst); 1205 else 1206 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1207 rt->dst.dev->mtu : dst_mtu(rt->dst.path); 1208 if (np->frag_size < mtu) { 1209 if (np->frag_size) 1210 mtu = np->frag_size; 1211 } 1212 cork->fragsize = mtu; 1213 if (dst_allfrag(rt->dst.path)) 1214 cork->flags |= IPCORK_ALLFRAG; 1215 cork->length = 0; 1216 exthdrlen = (opt ? opt->opt_flen : 0); 1217 length += exthdrlen; 1218 transhdrlen += exthdrlen; 1219 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; 1220 } else { 1221 rt = (struct rt6_info *)cork->dst; 1222 fl6 = &inet->cork.fl.u.ip6; 1223 opt = np->cork.opt; 1224 transhdrlen = 0; 1225 exthdrlen = 0; 1226 dst_exthdrlen = 0; 1227 mtu = cork->fragsize; 1228 } 1229 1230 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1231 1232 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1233 (opt ? opt->opt_nflen : 0); 1234 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 1235 1236 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1237 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { 1238 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); 1239 return -EMSGSIZE; 1240 } 1241 } 1242 1243 /* For UDP, check if TX timestamp is enabled */ 1244 if (sk->sk_type == SOCK_DGRAM) { 1245 err = sock_tx_timestamp(sk, &tx_flags); 1246 if (err) 1247 goto error; 1248 } 1249 1250 /* 1251 * Let's try using as much space as possible. 1252 * Use MTU if total length of the message fits into the MTU. 1253 * Otherwise, we need to reserve fragment header and 1254 * fragment alignment (= 8-15 octects, in total). 1255 * 1256 * Note that we may need to "move" the data from the tail of 1257 * of the buffer to the new fragment when we split 1258 * the message. 1259 * 1260 * FIXME: It may be fragmented into multiple chunks 1261 * at once if non-fragmentable extension headers 1262 * are too large. 1263 * --yoshfuji 1264 */ 1265 1266 cork->length += length; 1267 if (length > mtu) { 1268 int proto = sk->sk_protocol; 1269 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ 1270 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); 1271 return -EMSGSIZE; 1272 } 1273 1274 if (proto == IPPROTO_UDP && 1275 (rt->dst.dev->features & NETIF_F_UFO)) { 1276 1277 err = ip6_ufo_append_data(sk, getfrag, from, length, 1278 hh_len, fragheaderlen, 1279 transhdrlen, mtu, flags, rt); 1280 if (err) 1281 goto error; 1282 return 0; 1283 } 1284 } 1285 1286 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1287 goto alloc_new_skb; 1288 1289 while (length > 0) { 1290 /* Check if the remaining data fits into current packet. */ 1291 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1292 if (copy < length) 1293 copy = maxfraglen - skb->len; 1294 1295 if (copy <= 0) { 1296 char *data; 1297 unsigned int datalen; 1298 unsigned int fraglen; 1299 unsigned int fraggap; 1300 unsigned int alloclen; 1301 alloc_new_skb: 1302 /* There's no room in the current skb */ 1303 if (skb) 1304 fraggap = skb->len - maxfraglen; 1305 else 1306 fraggap = 0; 1307 /* update mtu and maxfraglen if necessary */ 1308 if (skb == NULL || skb_prev == NULL) 1309 ip6_append_data_mtu(&mtu, &maxfraglen, 1310 fragheaderlen, skb, rt); 1311 1312 skb_prev = skb; 1313 1314 /* 1315 * If remaining data exceeds the mtu, 1316 * we know we need more fragment(s). 1317 */ 1318 datalen = length + fraggap; 1319 1320 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1321 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; 1322 if ((flags & MSG_MORE) && 1323 !(rt->dst.dev->features&NETIF_F_SG)) 1324 alloclen = mtu; 1325 else 1326 alloclen = datalen + fragheaderlen; 1327 1328 alloclen += dst_exthdrlen; 1329 1330 if (datalen != length + fraggap) { 1331 /* 1332 * this is not the last fragment, the trailer 1333 * space is regarded as data space. 1334 */ 1335 datalen += rt->dst.trailer_len; 1336 } 1337 1338 alloclen += rt->dst.trailer_len; 1339 fraglen = datalen + fragheaderlen; 1340 1341 /* 1342 * We just reserve space for fragment header. 1343 * Note: this may be overallocation if the message 1344 * (without MSG_MORE) fits into the MTU. 1345 */ 1346 alloclen += sizeof(struct frag_hdr); 1347 1348 if (transhdrlen) { 1349 skb = sock_alloc_send_skb(sk, 1350 alloclen + hh_len, 1351 (flags & MSG_DONTWAIT), &err); 1352 } else { 1353 skb = NULL; 1354 if (atomic_read(&sk->sk_wmem_alloc) <= 1355 2 * sk->sk_sndbuf) 1356 skb = sock_wmalloc(sk, 1357 alloclen + hh_len, 1, 1358 sk->sk_allocation); 1359 if (unlikely(skb == NULL)) 1360 err = -ENOBUFS; 1361 else { 1362 /* Only the initial fragment 1363 * is time stamped. 1364 */ 1365 tx_flags = 0; 1366 } 1367 } 1368 if (skb == NULL) 1369 goto error; 1370 /* 1371 * Fill in the control structures 1372 */ 1373 skb->ip_summed = CHECKSUM_NONE; 1374 skb->csum = 0; 1375 /* reserve for fragmentation and ipsec header */ 1376 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 1377 dst_exthdrlen); 1378 1379 if (sk->sk_type == SOCK_DGRAM) 1380 skb_shinfo(skb)->tx_flags = tx_flags; 1381 1382 /* 1383 * Find where to start putting bytes 1384 */ 1385 data = skb_put(skb, fraglen); 1386 skb_set_network_header(skb, exthdrlen); 1387 data += fragheaderlen; 1388 skb->transport_header = (skb->network_header + 1389 fragheaderlen); 1390 if (fraggap) { 1391 skb->csum = skb_copy_and_csum_bits( 1392 skb_prev, maxfraglen, 1393 data + transhdrlen, fraggap, 0); 1394 skb_prev->csum = csum_sub(skb_prev->csum, 1395 skb->csum); 1396 data += fraggap; 1397 pskb_trim_unique(skb_prev, maxfraglen); 1398 } 1399 copy = datalen - transhdrlen - fraggap; 1400 1401 if (copy < 0) { 1402 err = -EINVAL; 1403 kfree_skb(skb); 1404 goto error; 1405 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 1406 err = -EFAULT; 1407 kfree_skb(skb); 1408 goto error; 1409 } 1410 1411 offset += copy; 1412 length -= datalen - fraggap; 1413 transhdrlen = 0; 1414 exthdrlen = 0; 1415 dst_exthdrlen = 0; 1416 1417 /* 1418 * Put the packet on the pending queue 1419 */ 1420 __skb_queue_tail(&sk->sk_write_queue, skb); 1421 continue; 1422 } 1423 1424 if (copy > length) 1425 copy = length; 1426 1427 if (!(rt->dst.dev->features&NETIF_F_SG)) { 1428 unsigned int off; 1429 1430 off = skb->len; 1431 if (getfrag(from, skb_put(skb, copy), 1432 offset, copy, off, skb) < 0) { 1433 __skb_trim(skb, off); 1434 err = -EFAULT; 1435 goto error; 1436 } 1437 } else { 1438 int i = skb_shinfo(skb)->nr_frags; 1439 struct page_frag *pfrag = sk_page_frag(sk); 1440 1441 err = -ENOMEM; 1442 if (!sk_page_frag_refill(sk, pfrag)) 1443 goto error; 1444 1445 if (!skb_can_coalesce(skb, i, pfrag->page, 1446 pfrag->offset)) { 1447 err = -EMSGSIZE; 1448 if (i == MAX_SKB_FRAGS) 1449 goto error; 1450 1451 __skb_fill_page_desc(skb, i, pfrag->page, 1452 pfrag->offset, 0); 1453 skb_shinfo(skb)->nr_frags = ++i; 1454 get_page(pfrag->page); 1455 } 1456 copy = min_t(int, copy, pfrag->size - pfrag->offset); 1457 if (getfrag(from, 1458 page_address(pfrag->page) + pfrag->offset, 1459 offset, copy, skb->len, skb) < 0) 1460 goto error_efault; 1461 1462 pfrag->offset += copy; 1463 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1464 skb->len += copy; 1465 skb->data_len += copy; 1466 skb->truesize += copy; 1467 atomic_add(copy, &sk->sk_wmem_alloc); 1468 } 1469 offset += copy; 1470 length -= copy; 1471 } 1472 1473 return 0; 1474 1475 error_efault: 1476 err = -EFAULT; 1477 error: 1478 cork->length -= length; 1479 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1480 return err; 1481 } 1482 EXPORT_SYMBOL_GPL(ip6_append_data); 1483 1484 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) 1485 { 1486 if (np->cork.opt) { 1487 kfree(np->cork.opt->dst0opt); 1488 kfree(np->cork.opt->dst1opt); 1489 kfree(np->cork.opt->hopopt); 1490 kfree(np->cork.opt->srcrt); 1491 kfree(np->cork.opt); 1492 np->cork.opt = NULL; 1493 } 1494 1495 if (inet->cork.base.dst) { 1496 dst_release(inet->cork.base.dst); 1497 inet->cork.base.dst = NULL; 1498 inet->cork.base.flags &= ~IPCORK_ALLFRAG; 1499 } 1500 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); 1501 } 1502 1503 int ip6_push_pending_frames(struct sock *sk) 1504 { 1505 struct sk_buff *skb, *tmp_skb; 1506 struct sk_buff **tail_skb; 1507 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1508 struct inet_sock *inet = inet_sk(sk); 1509 struct ipv6_pinfo *np = inet6_sk(sk); 1510 struct net *net = sock_net(sk); 1511 struct ipv6hdr *hdr; 1512 struct ipv6_txoptions *opt = np->cork.opt; 1513 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst; 1514 struct flowi6 *fl6 = &inet->cork.fl.u.ip6; 1515 unsigned char proto = fl6->flowi6_proto; 1516 int err = 0; 1517 1518 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1519 goto out; 1520 tail_skb = &(skb_shinfo(skb)->frag_list); 1521 1522 /* move skb->data to ip header from ext header */ 1523 if (skb->data < skb_network_header(skb)) 1524 __skb_pull(skb, skb_network_offset(skb)); 1525 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1526 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1527 *tail_skb = tmp_skb; 1528 tail_skb = &(tmp_skb->next); 1529 skb->len += tmp_skb->len; 1530 skb->data_len += tmp_skb->len; 1531 skb->truesize += tmp_skb->truesize; 1532 tmp_skb->destructor = NULL; 1533 tmp_skb->sk = NULL; 1534 } 1535 1536 /* Allow local fragmentation. */ 1537 if (np->pmtudisc < IPV6_PMTUDISC_DO) 1538 skb->local_df = 1; 1539 1540 *final_dst = fl6->daddr; 1541 __skb_pull(skb, skb_network_header_len(skb)); 1542 if (opt && opt->opt_flen) 1543 ipv6_push_frag_opts(skb, opt, &proto); 1544 if (opt && opt->opt_nflen) 1545 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 1546 1547 skb_push(skb, sizeof(struct ipv6hdr)); 1548 skb_reset_network_header(skb); 1549 hdr = ipv6_hdr(skb); 1550 1551 *(__be32*)hdr = fl6->flowlabel | 1552 htonl(0x60000000 | ((int)np->cork.tclass << 20)); 1553 1554 hdr->hop_limit = np->cork.hop_limit; 1555 hdr->nexthdr = proto; 1556 hdr->saddr = fl6->saddr; 1557 hdr->daddr = *final_dst; 1558 1559 skb->priority = sk->sk_priority; 1560 skb->mark = sk->sk_mark; 1561 1562 skb_dst_set(skb, dst_clone(&rt->dst)); 1563 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1564 if (proto == IPPROTO_ICMPV6) { 1565 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1566 1567 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); 1568 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); 1569 } 1570 1571 err = ip6_local_out(skb); 1572 if (err) { 1573 if (err > 0) 1574 err = net_xmit_errno(err); 1575 if (err) 1576 goto error; 1577 } 1578 1579 out: 1580 ip6_cork_release(inet, np); 1581 return err; 1582 error: 1583 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1584 goto out; 1585 } 1586 EXPORT_SYMBOL_GPL(ip6_push_pending_frames); 1587 1588 void ip6_flush_pending_frames(struct sock *sk) 1589 { 1590 struct sk_buff *skb; 1591 1592 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { 1593 if (skb_dst(skb)) 1594 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1595 IPSTATS_MIB_OUTDISCARDS); 1596 kfree_skb(skb); 1597 } 1598 1599 ip6_cork_release(inet_sk(sk), inet6_sk(sk)); 1600 } 1601 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); 1602