xref: /linux/net/ipv6/ip6_output.c (revision 7f3edee81fbd49114c28057512906f169caa0bed)
1 /*
2  *	IPv6 output functions
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9  *
10  *	Based on linux/net/ipv4/ip_output.c
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  *
17  *	Changes:
18  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
19  *				extension headers are implemented.
20  *				route changes now work.
21  *				ip6_forward does not confuse sniffers.
22  *				etc.
23  *
24  *      H. von Brand    :       Added missing #include <linux/string.h>
25  *	Imran Patel	: 	frag id should be in NBO
26  *      Kazunori MIYAZAWA @USAGI
27  *			:       add ip6_append_data and related functions
28  *				for datagram xmit
29  */
30 
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/string.h>
34 #include <linux/socket.h>
35 #include <linux/net.h>
36 #include <linux/netdevice.h>
37 #include <linux/if_arp.h>
38 #include <linux/in6.h>
39 #include <linux/tcp.h>
40 #include <linux/route.h>
41 #include <linux/module.h>
42 
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
45 
46 #include <net/sock.h>
47 #include <net/snmp.h>
48 
49 #include <net/ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
55 #include <net/icmp.h>
56 #include <net/xfrm.h>
57 #include <net/checksum.h>
58 
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60 
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62 {
63 	static u32 ipv6_fragmentation_id = 1;
64 	static DEFINE_SPINLOCK(ip6_id_lock);
65 
66 	spin_lock_bh(&ip6_id_lock);
67 	fhdr->identification = htonl(ipv6_fragmentation_id);
68 	if (++ipv6_fragmentation_id == 0)
69 		ipv6_fragmentation_id = 1;
70 	spin_unlock_bh(&ip6_id_lock);
71 }
72 
73 static int ip6_output_finish(struct sk_buff *skb)
74 {
75 	struct dst_entry *dst = skb->dst;
76 
77 	if (dst->hh)
78 		return neigh_hh_output(dst->hh, skb);
79 	else if (dst->neighbour)
80 		return dst->neighbour->output(skb);
81 
82 	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
83 	kfree_skb(skb);
84 	return -EINVAL;
85 
86 }
87 
88 /* dev_loopback_xmit for use with netfilter. */
89 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
90 {
91 	skb_reset_mac_header(newskb);
92 	__skb_pull(newskb, skb_network_offset(newskb));
93 	newskb->pkt_type = PACKET_LOOPBACK;
94 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
95 	BUG_TRAP(newskb->dst);
96 
97 	netif_rx(newskb);
98 	return 0;
99 }
100 
101 
102 static int ip6_output2(struct sk_buff *skb)
103 {
104 	struct dst_entry *dst = skb->dst;
105 	struct net_device *dev = dst->dev;
106 
107 	skb->protocol = htons(ETH_P_IPV6);
108 	skb->dev = dev;
109 
110 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
111 		struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
112 		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
113 
114 		if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
115 		    ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
116 					&ipv6_hdr(skb)->saddr)) {
117 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
118 
119 			/* Do not check for IFF_ALLMULTI; multicast routing
120 			   is not supported in any case.
121 			 */
122 			if (newskb)
123 				NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
124 					newskb->dev,
125 					ip6_dev_loopback_xmit);
126 
127 			if (ipv6_hdr(skb)->hop_limit == 0) {
128 				IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
129 				kfree_skb(skb);
130 				return 0;
131 			}
132 		}
133 
134 		IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
135 	}
136 
137 	return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
138 }
139 
140 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
141 {
142 	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
143 
144 	return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
145 	       skb->dst->dev->mtu : dst_mtu(skb->dst);
146 }
147 
148 int ip6_output(struct sk_buff *skb)
149 {
150 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
151 				dst_allfrag(skb->dst))
152 		return ip6_fragment(skb, ip6_output2);
153 	else
154 		return ip6_output2(skb);
155 }
156 
157 /*
158  *	xmit an sk_buff (used by TCP)
159  */
160 
161 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
162 	     struct ipv6_txoptions *opt, int ipfragok)
163 {
164 	struct ipv6_pinfo *np = inet6_sk(sk);
165 	struct in6_addr *first_hop = &fl->fl6_dst;
166 	struct dst_entry *dst = skb->dst;
167 	struct ipv6hdr *hdr;
168 	u8  proto = fl->proto;
169 	int seg_len = skb->len;
170 	int hlimit, tclass;
171 	u32 mtu;
172 
173 	if (opt) {
174 		unsigned int head_room;
175 
176 		/* First: exthdrs may take lots of space (~8K for now)
177 		   MAX_HEADER is not enough.
178 		 */
179 		head_room = opt->opt_nflen + opt->opt_flen;
180 		seg_len += head_room;
181 		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
182 
183 		if (skb_headroom(skb) < head_room) {
184 			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
185 			if (skb2 == NULL) {
186 				IP6_INC_STATS(ip6_dst_idev(skb->dst),
187 					      IPSTATS_MIB_OUTDISCARDS);
188 				kfree_skb(skb);
189 				return -ENOBUFS;
190 			}
191 			kfree_skb(skb);
192 			skb = skb2;
193 			if (sk)
194 				skb_set_owner_w(skb, sk);
195 		}
196 		if (opt->opt_flen)
197 			ipv6_push_frag_opts(skb, opt, &proto);
198 		if (opt->opt_nflen)
199 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
200 	}
201 
202 	skb_push(skb, sizeof(struct ipv6hdr));
203 	skb_reset_network_header(skb);
204 	hdr = ipv6_hdr(skb);
205 
206 	/*
207 	 *	Fill in the IPv6 header
208 	 */
209 
210 	hlimit = -1;
211 	if (np)
212 		hlimit = np->hop_limit;
213 	if (hlimit < 0)
214 		hlimit = dst_metric(dst, RTAX_HOPLIMIT);
215 	if (hlimit < 0)
216 		hlimit = ipv6_get_hoplimit(dst->dev);
217 
218 	tclass = -1;
219 	if (np)
220 		tclass = np->tclass;
221 	if (tclass < 0)
222 		tclass = 0;
223 
224 	*(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
225 
226 	hdr->payload_len = htons(seg_len);
227 	hdr->nexthdr = proto;
228 	hdr->hop_limit = hlimit;
229 
230 	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
231 	ipv6_addr_copy(&hdr->daddr, first_hop);
232 
233 	skb->priority = sk->sk_priority;
234 
235 	mtu = dst_mtu(dst);
236 	if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
237 		IP6_INC_STATS(ip6_dst_idev(skb->dst),
238 			      IPSTATS_MIB_OUTREQUESTS);
239 		return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
240 				dst_output);
241 	}
242 
243 	if (net_ratelimit())
244 		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
245 	skb->dev = dst->dev;
246 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
247 	IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
248 	kfree_skb(skb);
249 	return -EMSGSIZE;
250 }
251 
252 EXPORT_SYMBOL(ip6_xmit);
253 
254 /*
255  *	To avoid extra problems ND packets are send through this
256  *	routine. It's code duplication but I really want to avoid
257  *	extra checks since ipv6_build_header is used by TCP (which
258  *	is for us performance critical)
259  */
260 
261 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
262 	       struct in6_addr *saddr, struct in6_addr *daddr,
263 	       int proto, int len)
264 {
265 	struct ipv6_pinfo *np = inet6_sk(sk);
266 	struct ipv6hdr *hdr;
267 	int totlen;
268 
269 	skb->protocol = htons(ETH_P_IPV6);
270 	skb->dev = dev;
271 
272 	totlen = len + sizeof(struct ipv6hdr);
273 
274 	skb_reset_network_header(skb);
275 	skb_put(skb, sizeof(struct ipv6hdr));
276 	hdr = ipv6_hdr(skb);
277 
278 	*(__be32*)hdr = htonl(0x60000000);
279 
280 	hdr->payload_len = htons(len);
281 	hdr->nexthdr = proto;
282 	hdr->hop_limit = np->hop_limit;
283 
284 	ipv6_addr_copy(&hdr->saddr, saddr);
285 	ipv6_addr_copy(&hdr->daddr, daddr);
286 
287 	return 0;
288 }
289 
290 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
291 {
292 	struct ip6_ra_chain *ra;
293 	struct sock *last = NULL;
294 
295 	read_lock(&ip6_ra_lock);
296 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
297 		struct sock *sk = ra->sk;
298 		if (sk && ra->sel == sel &&
299 		    (!sk->sk_bound_dev_if ||
300 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
301 			if (last) {
302 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
303 				if (skb2)
304 					rawv6_rcv(last, skb2);
305 			}
306 			last = sk;
307 		}
308 	}
309 
310 	if (last) {
311 		rawv6_rcv(last, skb);
312 		read_unlock(&ip6_ra_lock);
313 		return 1;
314 	}
315 	read_unlock(&ip6_ra_lock);
316 	return 0;
317 }
318 
319 static int ip6_forward_proxy_check(struct sk_buff *skb)
320 {
321 	struct ipv6hdr *hdr = ipv6_hdr(skb);
322 	u8 nexthdr = hdr->nexthdr;
323 	int offset;
324 
325 	if (ipv6_ext_hdr(nexthdr)) {
326 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
327 		if (offset < 0)
328 			return 0;
329 	} else
330 		offset = sizeof(struct ipv6hdr);
331 
332 	if (nexthdr == IPPROTO_ICMPV6) {
333 		struct icmp6hdr *icmp6;
334 
335 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
336 					 offset + 1 - skb->data)))
337 			return 0;
338 
339 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
340 
341 		switch (icmp6->icmp6_type) {
342 		case NDISC_ROUTER_SOLICITATION:
343 		case NDISC_ROUTER_ADVERTISEMENT:
344 		case NDISC_NEIGHBOUR_SOLICITATION:
345 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
346 		case NDISC_REDIRECT:
347 			/* For reaction involving unicast neighbor discovery
348 			 * message destined to the proxied address, pass it to
349 			 * input function.
350 			 */
351 			return 1;
352 		default:
353 			break;
354 		}
355 	}
356 
357 	/*
358 	 * The proxying router can't forward traffic sent to a link-local
359 	 * address, so signal the sender and discard the packet. This
360 	 * behavior is clarified by the MIPv6 specification.
361 	 */
362 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
363 		dst_link_failure(skb);
364 		return -1;
365 	}
366 
367 	return 0;
368 }
369 
370 static inline int ip6_forward_finish(struct sk_buff *skb)
371 {
372 	return dst_output(skb);
373 }
374 
375 int ip6_forward(struct sk_buff *skb)
376 {
377 	struct dst_entry *dst = skb->dst;
378 	struct ipv6hdr *hdr = ipv6_hdr(skb);
379 	struct inet6_skb_parm *opt = IP6CB(skb);
380 
381 	if (ipv6_devconf.forwarding == 0)
382 		goto error;
383 
384 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
385 		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
386 		goto drop;
387 	}
388 
389 	skb_forward_csum(skb);
390 
391 	/*
392 	 *	We DO NOT make any processing on
393 	 *	RA packets, pushing them to user level AS IS
394 	 *	without ane WARRANTY that application will be able
395 	 *	to interpret them. The reason is that we
396 	 *	cannot make anything clever here.
397 	 *
398 	 *	We are not end-node, so that if packet contains
399 	 *	AH/ESP, we cannot make anything.
400 	 *	Defragmentation also would be mistake, RA packets
401 	 *	cannot be fragmented, because there is no warranty
402 	 *	that different fragments will go along one path. --ANK
403 	 */
404 	if (opt->ra) {
405 		u8 *ptr = skb_network_header(skb) + opt->ra;
406 		if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
407 			return 0;
408 	}
409 
410 	/*
411 	 *	check and decrement ttl
412 	 */
413 	if (hdr->hop_limit <= 1) {
414 		/* Force OUTPUT device used as source address */
415 		skb->dev = dst->dev;
416 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
417 			    0, skb->dev);
418 		IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
419 
420 		kfree_skb(skb);
421 		return -ETIMEDOUT;
422 	}
423 
424 	/* XXX: idev->cnf.proxy_ndp? */
425 	if (ipv6_devconf.proxy_ndp &&
426 	    pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
427 		int proxied = ip6_forward_proxy_check(skb);
428 		if (proxied > 0)
429 			return ip6_input(skb);
430 		else if (proxied < 0) {
431 			IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
432 			goto drop;
433 		}
434 	}
435 
436 	if (!xfrm6_route_forward(skb)) {
437 		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
438 		goto drop;
439 	}
440 	dst = skb->dst;
441 
442 	/* IPv6 specs say nothing about it, but it is clear that we cannot
443 	   send redirects to source routed frames.
444 	   We don't send redirects to frames decapsulated from IPsec.
445 	 */
446 	if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
447 	    !skb->sp) {
448 		struct in6_addr *target = NULL;
449 		struct rt6_info *rt;
450 		struct neighbour *n = dst->neighbour;
451 
452 		/*
453 		 *	incoming and outgoing devices are the same
454 		 *	send a redirect.
455 		 */
456 
457 		rt = (struct rt6_info *) dst;
458 		if ((rt->rt6i_flags & RTF_GATEWAY))
459 			target = (struct in6_addr*)&n->primary_key;
460 		else
461 			target = &hdr->daddr;
462 
463 		/* Limit redirects both by destination (here)
464 		   and by source (inside ndisc_send_redirect)
465 		 */
466 		if (xrlim_allow(dst, 1*HZ))
467 			ndisc_send_redirect(skb, n, target);
468 	} else {
469 		int addrtype = ipv6_addr_type(&hdr->saddr);
470 
471 		/* This check is security critical. */
472 		if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
473 			goto error;
474 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
475 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
476 				ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
477 			goto error;
478 		}
479 	}
480 
481 	if (skb->len > dst_mtu(dst)) {
482 		/* Again, force OUTPUT device used as source address */
483 		skb->dev = dst->dev;
484 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
485 		IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
486 		IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
487 		kfree_skb(skb);
488 		return -EMSGSIZE;
489 	}
490 
491 	if (skb_cow(skb, dst->dev->hard_header_len)) {
492 		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
493 		goto drop;
494 	}
495 
496 	hdr = ipv6_hdr(skb);
497 
498 	/* Mangling hops number delayed to point after skb COW */
499 
500 	hdr->hop_limit--;
501 
502 	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
503 	return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
504 
505 error:
506 	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
507 drop:
508 	kfree_skb(skb);
509 	return -EINVAL;
510 }
511 
512 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
513 {
514 	to->pkt_type = from->pkt_type;
515 	to->priority = from->priority;
516 	to->protocol = from->protocol;
517 	dst_release(to->dst);
518 	to->dst = dst_clone(from->dst);
519 	to->dev = from->dev;
520 	to->mark = from->mark;
521 
522 #ifdef CONFIG_NET_SCHED
523 	to->tc_index = from->tc_index;
524 #endif
525 	nf_copy(to, from);
526 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
527     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
528 	to->nf_trace = from->nf_trace;
529 #endif
530 	skb_copy_secmark(to, from);
531 }
532 
533 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
534 {
535 	u16 offset = sizeof(struct ipv6hdr);
536 	struct ipv6_opt_hdr *exthdr =
537 				(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
538 	unsigned int packet_len = skb->tail - skb->network_header;
539 	int found_rhdr = 0;
540 	*nexthdr = &ipv6_hdr(skb)->nexthdr;
541 
542 	while (offset + 1 <= packet_len) {
543 
544 		switch (**nexthdr) {
545 
546 		case NEXTHDR_HOP:
547 			break;
548 		case NEXTHDR_ROUTING:
549 			found_rhdr = 1;
550 			break;
551 		case NEXTHDR_DEST:
552 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
553 			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
554 				break;
555 #endif
556 			if (found_rhdr)
557 				return offset;
558 			break;
559 		default :
560 			return offset;
561 		}
562 
563 		offset += ipv6_optlen(exthdr);
564 		*nexthdr = &exthdr->nexthdr;
565 		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
566 						 offset);
567 	}
568 
569 	return offset;
570 }
571 EXPORT_SYMBOL_GPL(ip6_find_1stfragopt);
572 
573 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
574 {
575 	struct net_device *dev;
576 	struct sk_buff *frag;
577 	struct rt6_info *rt = (struct rt6_info*)skb->dst;
578 	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
579 	struct ipv6hdr *tmp_hdr;
580 	struct frag_hdr *fh;
581 	unsigned int mtu, hlen, left, len;
582 	__be32 frag_id = 0;
583 	int ptr, offset = 0, err=0;
584 	u8 *prevhdr, nexthdr = 0;
585 
586 	dev = rt->u.dst.dev;
587 	hlen = ip6_find_1stfragopt(skb, &prevhdr);
588 	nexthdr = *prevhdr;
589 
590 	mtu = ip6_skb_dst_mtu(skb);
591 
592 	/* We must not fragment if the socket is set to force MTU discovery
593 	 * or if the skb it not generated by a local socket.  (This last
594 	 * check should be redundant, but it's free.)
595 	 */
596 	if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
597 		skb->dev = skb->dst->dev;
598 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
599 		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
600 		kfree_skb(skb);
601 		return -EMSGSIZE;
602 	}
603 
604 	if (np && np->frag_size < mtu) {
605 		if (np->frag_size)
606 			mtu = np->frag_size;
607 	}
608 	mtu -= hlen + sizeof(struct frag_hdr);
609 
610 	if (skb_shinfo(skb)->frag_list) {
611 		int first_len = skb_pagelen(skb);
612 
613 		if (first_len - hlen > mtu ||
614 		    ((first_len - hlen) & 7) ||
615 		    skb_cloned(skb))
616 			goto slow_path;
617 
618 		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
619 			/* Correct geometry. */
620 			if (frag->len > mtu ||
621 			    ((frag->len & 7) && frag->next) ||
622 			    skb_headroom(frag) < hlen)
623 			    goto slow_path;
624 
625 			/* Partially cloned skb? */
626 			if (skb_shared(frag))
627 				goto slow_path;
628 
629 			BUG_ON(frag->sk);
630 			if (skb->sk) {
631 				sock_hold(skb->sk);
632 				frag->sk = skb->sk;
633 				frag->destructor = sock_wfree;
634 				skb->truesize -= frag->truesize;
635 			}
636 		}
637 
638 		err = 0;
639 		offset = 0;
640 		frag = skb_shinfo(skb)->frag_list;
641 		skb_shinfo(skb)->frag_list = NULL;
642 		/* BUILD HEADER */
643 
644 		*prevhdr = NEXTHDR_FRAGMENT;
645 		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
646 		if (!tmp_hdr) {
647 			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
648 			return -ENOMEM;
649 		}
650 
651 		__skb_pull(skb, hlen);
652 		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
653 		__skb_push(skb, hlen);
654 		skb_reset_network_header(skb);
655 		memcpy(skb_network_header(skb), tmp_hdr, hlen);
656 
657 		ipv6_select_ident(skb, fh);
658 		fh->nexthdr = nexthdr;
659 		fh->reserved = 0;
660 		fh->frag_off = htons(IP6_MF);
661 		frag_id = fh->identification;
662 
663 		first_len = skb_pagelen(skb);
664 		skb->data_len = first_len - skb_headlen(skb);
665 		skb->len = first_len;
666 		ipv6_hdr(skb)->payload_len = htons(first_len -
667 						   sizeof(struct ipv6hdr));
668 
669 		dst_hold(&rt->u.dst);
670 
671 		for (;;) {
672 			/* Prepare header of the next frame,
673 			 * before previous one went down. */
674 			if (frag) {
675 				frag->ip_summed = CHECKSUM_NONE;
676 				skb_reset_transport_header(frag);
677 				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
678 				__skb_push(frag, hlen);
679 				skb_reset_network_header(frag);
680 				memcpy(skb_network_header(frag), tmp_hdr,
681 				       hlen);
682 				offset += skb->len - hlen - sizeof(struct frag_hdr);
683 				fh->nexthdr = nexthdr;
684 				fh->reserved = 0;
685 				fh->frag_off = htons(offset);
686 				if (frag->next != NULL)
687 					fh->frag_off |= htons(IP6_MF);
688 				fh->identification = frag_id;
689 				ipv6_hdr(frag)->payload_len =
690 						htons(frag->len -
691 						      sizeof(struct ipv6hdr));
692 				ip6_copy_metadata(frag, skb);
693 			}
694 
695 			err = output(skb);
696 			if(!err)
697 				IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
698 
699 			if (err || !frag)
700 				break;
701 
702 			skb = frag;
703 			frag = skb->next;
704 			skb->next = NULL;
705 		}
706 
707 		kfree(tmp_hdr);
708 
709 		if (err == 0) {
710 			IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
711 			dst_release(&rt->u.dst);
712 			return 0;
713 		}
714 
715 		while (frag) {
716 			skb = frag->next;
717 			kfree_skb(frag);
718 			frag = skb;
719 		}
720 
721 		IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
722 		dst_release(&rt->u.dst);
723 		return err;
724 	}
725 
726 slow_path:
727 	left = skb->len - hlen;		/* Space per frame */
728 	ptr = hlen;			/* Where to start from */
729 
730 	/*
731 	 *	Fragment the datagram.
732 	 */
733 
734 	*prevhdr = NEXTHDR_FRAGMENT;
735 
736 	/*
737 	 *	Keep copying data until we run out.
738 	 */
739 	while(left > 0)	{
740 		len = left;
741 		/* IF: it doesn't fit, use 'mtu' - the data space left */
742 		if (len > mtu)
743 			len = mtu;
744 		/* IF: we are not sending upto and including the packet end
745 		   then align the next start on an eight byte boundary */
746 		if (len < left)	{
747 			len &= ~7;
748 		}
749 		/*
750 		 *	Allocate buffer.
751 		 */
752 
753 		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
754 			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
755 			IP6_INC_STATS(ip6_dst_idev(skb->dst),
756 				      IPSTATS_MIB_FRAGFAILS);
757 			err = -ENOMEM;
758 			goto fail;
759 		}
760 
761 		/*
762 		 *	Set up data on packet
763 		 */
764 
765 		ip6_copy_metadata(frag, skb);
766 		skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
767 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
768 		skb_reset_network_header(frag);
769 		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
770 		frag->transport_header = (frag->network_header + hlen +
771 					  sizeof(struct frag_hdr));
772 
773 		/*
774 		 *	Charge the memory for the fragment to any owner
775 		 *	it might possess
776 		 */
777 		if (skb->sk)
778 			skb_set_owner_w(frag, skb->sk);
779 
780 		/*
781 		 *	Copy the packet header into the new buffer.
782 		 */
783 		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
784 
785 		/*
786 		 *	Build fragment header.
787 		 */
788 		fh->nexthdr = nexthdr;
789 		fh->reserved = 0;
790 		if (!frag_id) {
791 			ipv6_select_ident(skb, fh);
792 			frag_id = fh->identification;
793 		} else
794 			fh->identification = frag_id;
795 
796 		/*
797 		 *	Copy a block of the IP datagram.
798 		 */
799 		if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
800 			BUG();
801 		left -= len;
802 
803 		fh->frag_off = htons(offset);
804 		if (left > 0)
805 			fh->frag_off |= htons(IP6_MF);
806 		ipv6_hdr(frag)->payload_len = htons(frag->len -
807 						    sizeof(struct ipv6hdr));
808 
809 		ptr += len;
810 		offset += len;
811 
812 		/*
813 		 *	Put this fragment into the sending queue.
814 		 */
815 		err = output(frag);
816 		if (err)
817 			goto fail;
818 
819 		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
820 	}
821 	IP6_INC_STATS(ip6_dst_idev(skb->dst),
822 		      IPSTATS_MIB_FRAGOKS);
823 	kfree_skb(skb);
824 	return err;
825 
826 fail:
827 	IP6_INC_STATS(ip6_dst_idev(skb->dst),
828 		      IPSTATS_MIB_FRAGFAILS);
829 	kfree_skb(skb);
830 	return err;
831 }
832 
833 static inline int ip6_rt_check(struct rt6key *rt_key,
834 			       struct in6_addr *fl_addr,
835 			       struct in6_addr *addr_cache)
836 {
837 	return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
838 		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
839 }
840 
841 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
842 					  struct dst_entry *dst,
843 					  struct flowi *fl)
844 {
845 	struct ipv6_pinfo *np = inet6_sk(sk);
846 	struct rt6_info *rt = (struct rt6_info *)dst;
847 
848 	if (!dst)
849 		goto out;
850 
851 	/* Yes, checking route validity in not connected
852 	 * case is not very simple. Take into account,
853 	 * that we do not support routing by source, TOS,
854 	 * and MSG_DONTROUTE 		--ANK (980726)
855 	 *
856 	 * 1. ip6_rt_check(): If route was host route,
857 	 *    check that cached destination is current.
858 	 *    If it is network route, we still may
859 	 *    check its validity using saved pointer
860 	 *    to the last used address: daddr_cache.
861 	 *    We do not want to save whole address now,
862 	 *    (because main consumer of this service
863 	 *    is tcp, which has not this problem),
864 	 *    so that the last trick works only on connected
865 	 *    sockets.
866 	 * 2. oif also should be the same.
867 	 */
868 	if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
869 #ifdef CONFIG_IPV6_SUBTREES
870 	    ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
871 #endif
872 	    (fl->oif && fl->oif != dst->dev->ifindex)) {
873 		dst_release(dst);
874 		dst = NULL;
875 	}
876 
877 out:
878 	return dst;
879 }
880 
881 static int ip6_dst_lookup_tail(struct sock *sk,
882 			       struct dst_entry **dst, struct flowi *fl)
883 {
884 	int err;
885 
886 	if (*dst == NULL)
887 		*dst = ip6_route_output(sk, fl);
888 
889 	if ((err = (*dst)->error))
890 		goto out_err_release;
891 
892 	if (ipv6_addr_any(&fl->fl6_src)) {
893 		err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
894 		if (err)
895 			goto out_err_release;
896 	}
897 
898 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
899 		/*
900 		 * Here if the dst entry we've looked up
901 		 * has a neighbour entry that is in the INCOMPLETE
902 		 * state and the src address from the flow is
903 		 * marked as OPTIMISTIC, we release the found
904 		 * dst entry and replace it instead with the
905 		 * dst entry of the nexthop router
906 		 */
907 		if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
908 			struct inet6_ifaddr *ifp;
909 			struct flowi fl_gw;
910 			int redirect;
911 
912 			ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1);
913 
914 			redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
915 			if (ifp)
916 				in6_ifa_put(ifp);
917 
918 			if (redirect) {
919 				/*
920 				 * We need to get the dst entry for the
921 				 * default router instead
922 				 */
923 				dst_release(*dst);
924 				memcpy(&fl_gw, fl, sizeof(struct flowi));
925 				memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
926 				*dst = ip6_route_output(sk, &fl_gw);
927 				if ((err = (*dst)->error))
928 					goto out_err_release;
929 			}
930 		}
931 #endif
932 
933 	return 0;
934 
935 out_err_release:
936 	if (err == -ENETUNREACH)
937 		IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
938 	dst_release(*dst);
939 	*dst = NULL;
940 	return err;
941 }
942 
943 /**
944  *	ip6_dst_lookup - perform route lookup on flow
945  *	@sk: socket which provides route info
946  *	@dst: pointer to dst_entry * for result
947  *	@fl: flow to lookup
948  *
949  *	This function performs a route lookup on the given flow.
950  *
951  *	It returns zero on success, or a standard errno code on error.
952  */
953 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
954 {
955 	*dst = NULL;
956 	return ip6_dst_lookup_tail(sk, dst, fl);
957 }
958 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
959 
960 /**
961  *	ip6_sk_dst_lookup - perform socket cached route lookup on flow
962  *	@sk: socket which provides the dst cache and route info
963  *	@dst: pointer to dst_entry * for result
964  *	@fl: flow to lookup
965  *
966  *	This function performs a route lookup on the given flow with the
967  *	possibility of using the cached route in the socket if it is valid.
968  *	It will take the socket dst lock when operating on the dst cache.
969  *	As a result, this function can only be used in process context.
970  *
971  *	It returns zero on success, or a standard errno code on error.
972  */
973 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
974 {
975 	*dst = NULL;
976 	if (sk) {
977 		*dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
978 		*dst = ip6_sk_dst_check(sk, *dst, fl);
979 	}
980 
981 	return ip6_dst_lookup_tail(sk, dst, fl);
982 }
983 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
984 
985 static inline int ip6_ufo_append_data(struct sock *sk,
986 			int getfrag(void *from, char *to, int offset, int len,
987 			int odd, struct sk_buff *skb),
988 			void *from, int length, int hh_len, int fragheaderlen,
989 			int transhdrlen, int mtu,unsigned int flags)
990 
991 {
992 	struct sk_buff *skb;
993 	int err;
994 
995 	/* There is support for UDP large send offload by network
996 	 * device, so create one single skb packet containing complete
997 	 * udp datagram
998 	 */
999 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1000 		skb = sock_alloc_send_skb(sk,
1001 			hh_len + fragheaderlen + transhdrlen + 20,
1002 			(flags & MSG_DONTWAIT), &err);
1003 		if (skb == NULL)
1004 			return -ENOMEM;
1005 
1006 		/* reserve space for Hardware header */
1007 		skb_reserve(skb, hh_len);
1008 
1009 		/* create space for UDP/IP header */
1010 		skb_put(skb,fragheaderlen + transhdrlen);
1011 
1012 		/* initialize network header pointer */
1013 		skb_reset_network_header(skb);
1014 
1015 		/* initialize protocol header pointer */
1016 		skb->transport_header = skb->network_header + fragheaderlen;
1017 
1018 		skb->ip_summed = CHECKSUM_PARTIAL;
1019 		skb->csum = 0;
1020 		sk->sk_sndmsg_off = 0;
1021 	}
1022 
1023 	err = skb_append_datato_frags(sk,skb, getfrag, from,
1024 				      (length - transhdrlen));
1025 	if (!err) {
1026 		struct frag_hdr fhdr;
1027 
1028 		/* specify the length of each IP datagram fragment*/
1029 		skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1030 					    sizeof(struct frag_hdr);
1031 		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1032 		ipv6_select_ident(skb, &fhdr);
1033 		skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1034 		__skb_queue_tail(&sk->sk_write_queue, skb);
1035 
1036 		return 0;
1037 	}
1038 	/* There is not enough support do UPD LSO,
1039 	 * so follow normal path
1040 	 */
1041 	kfree_skb(skb);
1042 
1043 	return err;
1044 }
1045 
1046 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1047 	int offset, int len, int odd, struct sk_buff *skb),
1048 	void *from, int length, int transhdrlen,
1049 	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1050 	struct rt6_info *rt, unsigned int flags)
1051 {
1052 	struct inet_sock *inet = inet_sk(sk);
1053 	struct ipv6_pinfo *np = inet6_sk(sk);
1054 	struct sk_buff *skb;
1055 	unsigned int maxfraglen, fragheaderlen;
1056 	int exthdrlen;
1057 	int hh_len;
1058 	int mtu;
1059 	int copy;
1060 	int err;
1061 	int offset = 0;
1062 	int csummode = CHECKSUM_NONE;
1063 
1064 	if (flags&MSG_PROBE)
1065 		return 0;
1066 	if (skb_queue_empty(&sk->sk_write_queue)) {
1067 		/*
1068 		 * setup for corking
1069 		 */
1070 		if (opt) {
1071 			if (np->cork.opt == NULL) {
1072 				np->cork.opt = kmalloc(opt->tot_len,
1073 						       sk->sk_allocation);
1074 				if (unlikely(np->cork.opt == NULL))
1075 					return -ENOBUFS;
1076 			} else if (np->cork.opt->tot_len < opt->tot_len) {
1077 				printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1078 				return -EINVAL;
1079 			}
1080 			memcpy(np->cork.opt, opt, opt->tot_len);
1081 			inet->cork.flags |= IPCORK_OPT;
1082 			/* need source address above miyazawa*/
1083 		}
1084 		dst_hold(&rt->u.dst);
1085 		np->cork.rt = rt;
1086 		inet->cork.fl = *fl;
1087 		np->cork.hop_limit = hlimit;
1088 		np->cork.tclass = tclass;
1089 		mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1090 		      rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1091 		if (np->frag_size < mtu) {
1092 			if (np->frag_size)
1093 				mtu = np->frag_size;
1094 		}
1095 		inet->cork.fragsize = mtu;
1096 		if (dst_allfrag(rt->u.dst.path))
1097 			inet->cork.flags |= IPCORK_ALLFRAG;
1098 		inet->cork.length = 0;
1099 		sk->sk_sndmsg_page = NULL;
1100 		sk->sk_sndmsg_off = 0;
1101 		exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
1102 		length += exthdrlen;
1103 		transhdrlen += exthdrlen;
1104 	} else {
1105 		rt = np->cork.rt;
1106 		fl = &inet->cork.fl;
1107 		if (inet->cork.flags & IPCORK_OPT)
1108 			opt = np->cork.opt;
1109 		transhdrlen = 0;
1110 		exthdrlen = 0;
1111 		mtu = inet->cork.fragsize;
1112 	}
1113 
1114 	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1115 
1116 	fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0);
1117 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1118 
1119 	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1120 		if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1121 			ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1122 			return -EMSGSIZE;
1123 		}
1124 	}
1125 
1126 	/*
1127 	 * Let's try using as much space as possible.
1128 	 * Use MTU if total length of the message fits into the MTU.
1129 	 * Otherwise, we need to reserve fragment header and
1130 	 * fragment alignment (= 8-15 octects, in total).
1131 	 *
1132 	 * Note that we may need to "move" the data from the tail of
1133 	 * of the buffer to the new fragment when we split
1134 	 * the message.
1135 	 *
1136 	 * FIXME: It may be fragmented into multiple chunks
1137 	 *        at once if non-fragmentable extension headers
1138 	 *        are too large.
1139 	 * --yoshfuji
1140 	 */
1141 
1142 	inet->cork.length += length;
1143 	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1144 	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
1145 
1146 		err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1147 					  fragheaderlen, transhdrlen, mtu,
1148 					  flags);
1149 		if (err)
1150 			goto error;
1151 		return 0;
1152 	}
1153 
1154 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1155 		goto alloc_new_skb;
1156 
1157 	while (length > 0) {
1158 		/* Check if the remaining data fits into current packet. */
1159 		copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1160 		if (copy < length)
1161 			copy = maxfraglen - skb->len;
1162 
1163 		if (copy <= 0) {
1164 			char *data;
1165 			unsigned int datalen;
1166 			unsigned int fraglen;
1167 			unsigned int fraggap;
1168 			unsigned int alloclen;
1169 			struct sk_buff *skb_prev;
1170 alloc_new_skb:
1171 			skb_prev = skb;
1172 
1173 			/* There's no room in the current skb */
1174 			if (skb_prev)
1175 				fraggap = skb_prev->len - maxfraglen;
1176 			else
1177 				fraggap = 0;
1178 
1179 			/*
1180 			 * If remaining data exceeds the mtu,
1181 			 * we know we need more fragment(s).
1182 			 */
1183 			datalen = length + fraggap;
1184 			if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1185 				datalen = maxfraglen - fragheaderlen;
1186 
1187 			fraglen = datalen + fragheaderlen;
1188 			if ((flags & MSG_MORE) &&
1189 			    !(rt->u.dst.dev->features&NETIF_F_SG))
1190 				alloclen = mtu;
1191 			else
1192 				alloclen = datalen + fragheaderlen;
1193 
1194 			/*
1195 			 * The last fragment gets additional space at tail.
1196 			 * Note: we overallocate on fragments with MSG_MODE
1197 			 * because we have no idea if we're the last one.
1198 			 */
1199 			if (datalen == length + fraggap)
1200 				alloclen += rt->u.dst.trailer_len;
1201 
1202 			/*
1203 			 * We just reserve space for fragment header.
1204 			 * Note: this may be overallocation if the message
1205 			 * (without MSG_MORE) fits into the MTU.
1206 			 */
1207 			alloclen += sizeof(struct frag_hdr);
1208 
1209 			if (transhdrlen) {
1210 				skb = sock_alloc_send_skb(sk,
1211 						alloclen + hh_len,
1212 						(flags & MSG_DONTWAIT), &err);
1213 			} else {
1214 				skb = NULL;
1215 				if (atomic_read(&sk->sk_wmem_alloc) <=
1216 				    2 * sk->sk_sndbuf)
1217 					skb = sock_wmalloc(sk,
1218 							   alloclen + hh_len, 1,
1219 							   sk->sk_allocation);
1220 				if (unlikely(skb == NULL))
1221 					err = -ENOBUFS;
1222 			}
1223 			if (skb == NULL)
1224 				goto error;
1225 			/*
1226 			 *	Fill in the control structures
1227 			 */
1228 			skb->ip_summed = csummode;
1229 			skb->csum = 0;
1230 			/* reserve for fragmentation */
1231 			skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1232 
1233 			/*
1234 			 *	Find where to start putting bytes
1235 			 */
1236 			data = skb_put(skb, fraglen);
1237 			skb_set_network_header(skb, exthdrlen);
1238 			data += fragheaderlen;
1239 			skb->transport_header = (skb->network_header +
1240 						 fragheaderlen);
1241 			if (fraggap) {
1242 				skb->csum = skb_copy_and_csum_bits(
1243 					skb_prev, maxfraglen,
1244 					data + transhdrlen, fraggap, 0);
1245 				skb_prev->csum = csum_sub(skb_prev->csum,
1246 							  skb->csum);
1247 				data += fraggap;
1248 				pskb_trim_unique(skb_prev, maxfraglen);
1249 			}
1250 			copy = datalen - transhdrlen - fraggap;
1251 			if (copy < 0) {
1252 				err = -EINVAL;
1253 				kfree_skb(skb);
1254 				goto error;
1255 			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1256 				err = -EFAULT;
1257 				kfree_skb(skb);
1258 				goto error;
1259 			}
1260 
1261 			offset += copy;
1262 			length -= datalen - fraggap;
1263 			transhdrlen = 0;
1264 			exthdrlen = 0;
1265 			csummode = CHECKSUM_NONE;
1266 
1267 			/*
1268 			 * Put the packet on the pending queue
1269 			 */
1270 			__skb_queue_tail(&sk->sk_write_queue, skb);
1271 			continue;
1272 		}
1273 
1274 		if (copy > length)
1275 			copy = length;
1276 
1277 		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1278 			unsigned int off;
1279 
1280 			off = skb->len;
1281 			if (getfrag(from, skb_put(skb, copy),
1282 						offset, copy, off, skb) < 0) {
1283 				__skb_trim(skb, off);
1284 				err = -EFAULT;
1285 				goto error;
1286 			}
1287 		} else {
1288 			int i = skb_shinfo(skb)->nr_frags;
1289 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1290 			struct page *page = sk->sk_sndmsg_page;
1291 			int off = sk->sk_sndmsg_off;
1292 			unsigned int left;
1293 
1294 			if (page && (left = PAGE_SIZE - off) > 0) {
1295 				if (copy >= left)
1296 					copy = left;
1297 				if (page != frag->page) {
1298 					if (i == MAX_SKB_FRAGS) {
1299 						err = -EMSGSIZE;
1300 						goto error;
1301 					}
1302 					get_page(page);
1303 					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1304 					frag = &skb_shinfo(skb)->frags[i];
1305 				}
1306 			} else if(i < MAX_SKB_FRAGS) {
1307 				if (copy > PAGE_SIZE)
1308 					copy = PAGE_SIZE;
1309 				page = alloc_pages(sk->sk_allocation, 0);
1310 				if (page == NULL) {
1311 					err = -ENOMEM;
1312 					goto error;
1313 				}
1314 				sk->sk_sndmsg_page = page;
1315 				sk->sk_sndmsg_off = 0;
1316 
1317 				skb_fill_page_desc(skb, i, page, 0, 0);
1318 				frag = &skb_shinfo(skb)->frags[i];
1319 			} else {
1320 				err = -EMSGSIZE;
1321 				goto error;
1322 			}
1323 			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1324 				err = -EFAULT;
1325 				goto error;
1326 			}
1327 			sk->sk_sndmsg_off += copy;
1328 			frag->size += copy;
1329 			skb->len += copy;
1330 			skb->data_len += copy;
1331 			skb->truesize += copy;
1332 			atomic_add(copy, &sk->sk_wmem_alloc);
1333 		}
1334 		offset += copy;
1335 		length -= copy;
1336 	}
1337 	return 0;
1338 error:
1339 	inet->cork.length -= length;
1340 	IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1341 	return err;
1342 }
1343 
1344 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1345 {
1346 	inet->cork.flags &= ~IPCORK_OPT;
1347 	kfree(np->cork.opt);
1348 	np->cork.opt = NULL;
1349 	if (np->cork.rt) {
1350 		dst_release(&np->cork.rt->u.dst);
1351 		np->cork.rt = NULL;
1352 		inet->cork.flags &= ~IPCORK_ALLFRAG;
1353 	}
1354 	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1355 }
1356 
1357 int ip6_push_pending_frames(struct sock *sk)
1358 {
1359 	struct sk_buff *skb, *tmp_skb;
1360 	struct sk_buff **tail_skb;
1361 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1362 	struct inet_sock *inet = inet_sk(sk);
1363 	struct ipv6_pinfo *np = inet6_sk(sk);
1364 	struct ipv6hdr *hdr;
1365 	struct ipv6_txoptions *opt = np->cork.opt;
1366 	struct rt6_info *rt = np->cork.rt;
1367 	struct flowi *fl = &inet->cork.fl;
1368 	unsigned char proto = fl->proto;
1369 	int err = 0;
1370 
1371 	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1372 		goto out;
1373 	tail_skb = &(skb_shinfo(skb)->frag_list);
1374 
1375 	/* move skb->data to ip header from ext header */
1376 	if (skb->data < skb_network_header(skb))
1377 		__skb_pull(skb, skb_network_offset(skb));
1378 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1379 		__skb_pull(tmp_skb, skb_network_header_len(skb));
1380 		*tail_skb = tmp_skb;
1381 		tail_skb = &(tmp_skb->next);
1382 		skb->len += tmp_skb->len;
1383 		skb->data_len += tmp_skb->len;
1384 		skb->truesize += tmp_skb->truesize;
1385 		__sock_put(tmp_skb->sk);
1386 		tmp_skb->destructor = NULL;
1387 		tmp_skb->sk = NULL;
1388 	}
1389 
1390 	ipv6_addr_copy(final_dst, &fl->fl6_dst);
1391 	__skb_pull(skb, skb_network_header_len(skb));
1392 	if (opt && opt->opt_flen)
1393 		ipv6_push_frag_opts(skb, opt, &proto);
1394 	if (opt && opt->opt_nflen)
1395 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1396 
1397 	skb_push(skb, sizeof(struct ipv6hdr));
1398 	skb_reset_network_header(skb);
1399 	hdr = ipv6_hdr(skb);
1400 
1401 	*(__be32*)hdr = fl->fl6_flowlabel |
1402 		     htonl(0x60000000 | ((int)np->cork.tclass << 20));
1403 
1404 	if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1405 		hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1406 	else
1407 		hdr->payload_len = 0;
1408 	hdr->hop_limit = np->cork.hop_limit;
1409 	hdr->nexthdr = proto;
1410 	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1411 	ipv6_addr_copy(&hdr->daddr, final_dst);
1412 
1413 	skb->priority = sk->sk_priority;
1414 
1415 	skb->dst = dst_clone(&rt->u.dst);
1416 	IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1417 	if (proto == IPPROTO_ICMPV6) {
1418 		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1419 
1420 		ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1421 		ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1422 	}
1423 
1424 	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1425 	if (err) {
1426 		if (err > 0)
1427 			err = np->recverr ? net_xmit_errno(err) : 0;
1428 		if (err)
1429 			goto error;
1430 	}
1431 
1432 out:
1433 	ip6_cork_release(inet, np);
1434 	return err;
1435 error:
1436 	goto out;
1437 }
1438 
1439 void ip6_flush_pending_frames(struct sock *sk)
1440 {
1441 	struct sk_buff *skb;
1442 
1443 	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1444 		if (skb->dst)
1445 			IP6_INC_STATS(ip6_dst_idev(skb->dst),
1446 				      IPSTATS_MIB_OUTDISCARDS);
1447 		kfree_skb(skb);
1448 	}
1449 
1450 	ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1451 }
1452