xref: /linux/net/ipv6/ip6_output.c (revision 42fda66387daa53538ae13a2c858396aaf037158)
1 /*
2  *	IPv6 output functions
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9  *
10  *	Based on linux/net/ipv4/ip_output.c
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  *
17  *	Changes:
18  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
19  *				extension headers are implemented.
20  *				route changes now work.
21  *				ip6_forward does not confuse sniffers.
22  *				etc.
23  *
24  *      H. von Brand    :       Added missing #include <linux/string.h>
25  *	Imran Patel	: 	frag id should be in NBO
26  *      Kazunori MIYAZAWA @USAGI
27  *			:       add ip6_append_data and related functions
28  *				for datagram xmit
29  */
30 
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/string.h>
34 #include <linux/socket.h>
35 #include <linux/net.h>
36 #include <linux/netdevice.h>
37 #include <linux/if_arp.h>
38 #include <linux/in6.h>
39 #include <linux/tcp.h>
40 #include <linux/route.h>
41 #include <linux/module.h>
42 
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
45 
46 #include <net/sock.h>
47 #include <net/snmp.h>
48 
49 #include <net/ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
55 #include <net/icmp.h>
56 #include <net/xfrm.h>
57 #include <net/checksum.h>
58 
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60 
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62 {
63 	static u32 ipv6_fragmentation_id = 1;
64 	static DEFINE_SPINLOCK(ip6_id_lock);
65 
66 	spin_lock_bh(&ip6_id_lock);
67 	fhdr->identification = htonl(ipv6_fragmentation_id);
68 	if (++ipv6_fragmentation_id == 0)
69 		ipv6_fragmentation_id = 1;
70 	spin_unlock_bh(&ip6_id_lock);
71 }
72 
73 static int ip6_output_finish(struct sk_buff *skb)
74 {
75 	struct dst_entry *dst = skb->dst;
76 
77 	if (dst->hh)
78 		return neigh_hh_output(dst->hh, skb);
79 	else if (dst->neighbour)
80 		return dst->neighbour->output(skb);
81 
82 	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
83 	kfree_skb(skb);
84 	return -EINVAL;
85 
86 }
87 
88 /* dev_loopback_xmit for use with netfilter. */
89 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
90 {
91 	skb_reset_mac_header(newskb);
92 	__skb_pull(newskb, skb_network_offset(newskb));
93 	newskb->pkt_type = PACKET_LOOPBACK;
94 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
95 	BUG_TRAP(newskb->dst);
96 
97 	netif_rx(newskb);
98 	return 0;
99 }
100 
101 
102 static int ip6_output2(struct sk_buff *skb)
103 {
104 	struct dst_entry *dst = skb->dst;
105 	struct net_device *dev = dst->dev;
106 
107 	skb->protocol = htons(ETH_P_IPV6);
108 	skb->dev = dev;
109 
110 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
111 		struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
112 		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
113 
114 		if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
115 		    ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
116 					&ipv6_hdr(skb)->saddr)) {
117 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
118 
119 			/* Do not check for IFF_ALLMULTI; multicast routing
120 			   is not supported in any case.
121 			 */
122 			if (newskb)
123 				NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
124 					newskb->dev,
125 					ip6_dev_loopback_xmit);
126 
127 			if (ipv6_hdr(skb)->hop_limit == 0) {
128 				IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
129 				kfree_skb(skb);
130 				return 0;
131 			}
132 		}
133 
134 		IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
135 	}
136 
137 	return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
138 }
139 
140 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
141 {
142 	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
143 
144 	return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
145 	       skb->dst->dev->mtu : dst_mtu(skb->dst);
146 }
147 
148 int ip6_output(struct sk_buff *skb)
149 {
150 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
151 				dst_allfrag(skb->dst))
152 		return ip6_fragment(skb, ip6_output2);
153 	else
154 		return ip6_output2(skb);
155 }
156 
157 /*
158  *	xmit an sk_buff (used by TCP)
159  */
160 
161 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
162 	     struct ipv6_txoptions *opt, int ipfragok)
163 {
164 	struct ipv6_pinfo *np = inet6_sk(sk);
165 	struct in6_addr *first_hop = &fl->fl6_dst;
166 	struct dst_entry *dst = skb->dst;
167 	struct ipv6hdr *hdr;
168 	u8  proto = fl->proto;
169 	int seg_len = skb->len;
170 	int hlimit, tclass;
171 	u32 mtu;
172 
173 	if (opt) {
174 		int head_room;
175 
176 		/* First: exthdrs may take lots of space (~8K for now)
177 		   MAX_HEADER is not enough.
178 		 */
179 		head_room = opt->opt_nflen + opt->opt_flen;
180 		seg_len += head_room;
181 		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
182 
183 		if (skb_headroom(skb) < head_room) {
184 			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
185 			if (skb2 == NULL) {
186 				IP6_INC_STATS(ip6_dst_idev(skb->dst),
187 					      IPSTATS_MIB_OUTDISCARDS);
188 				kfree_skb(skb);
189 				return -ENOBUFS;
190 			}
191 			kfree_skb(skb);
192 			skb = skb2;
193 			if (sk)
194 				skb_set_owner_w(skb, sk);
195 		}
196 		if (opt->opt_flen)
197 			ipv6_push_frag_opts(skb, opt, &proto);
198 		if (opt->opt_nflen)
199 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
200 	}
201 
202 	skb_push(skb, sizeof(struct ipv6hdr));
203 	skb_reset_network_header(skb);
204 	hdr = ipv6_hdr(skb);
205 
206 	/*
207 	 *	Fill in the IPv6 header
208 	 */
209 
210 	hlimit = -1;
211 	if (np)
212 		hlimit = np->hop_limit;
213 	if (hlimit < 0)
214 		hlimit = dst_metric(dst, RTAX_HOPLIMIT);
215 	if (hlimit < 0)
216 		hlimit = ipv6_get_hoplimit(dst->dev);
217 
218 	tclass = -1;
219 	if (np)
220 		tclass = np->tclass;
221 	if (tclass < 0)
222 		tclass = 0;
223 
224 	*(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
225 
226 	hdr->payload_len = htons(seg_len);
227 	hdr->nexthdr = proto;
228 	hdr->hop_limit = hlimit;
229 
230 	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
231 	ipv6_addr_copy(&hdr->daddr, first_hop);
232 
233 	skb->priority = sk->sk_priority;
234 
235 	mtu = dst_mtu(dst);
236 	if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
237 		IP6_INC_STATS(ip6_dst_idev(skb->dst),
238 			      IPSTATS_MIB_OUTREQUESTS);
239 		return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
240 				dst_output);
241 	}
242 
243 	if (net_ratelimit())
244 		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
245 	skb->dev = dst->dev;
246 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
247 	IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
248 	kfree_skb(skb);
249 	return -EMSGSIZE;
250 }
251 
252 EXPORT_SYMBOL(ip6_xmit);
253 
254 /*
255  *	To avoid extra problems ND packets are send through this
256  *	routine. It's code duplication but I really want to avoid
257  *	extra checks since ipv6_build_header is used by TCP (which
258  *	is for us performance critical)
259  */
260 
261 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
262 	       struct in6_addr *saddr, struct in6_addr *daddr,
263 	       int proto, int len)
264 {
265 	struct ipv6_pinfo *np = inet6_sk(sk);
266 	struct ipv6hdr *hdr;
267 	int totlen;
268 
269 	skb->protocol = htons(ETH_P_IPV6);
270 	skb->dev = dev;
271 
272 	totlen = len + sizeof(struct ipv6hdr);
273 
274 	skb_reset_network_header(skb);
275 	skb_put(skb, sizeof(struct ipv6hdr));
276 	hdr = ipv6_hdr(skb);
277 
278 	*(__be32*)hdr = htonl(0x60000000);
279 
280 	hdr->payload_len = htons(len);
281 	hdr->nexthdr = proto;
282 	hdr->hop_limit = np->hop_limit;
283 
284 	ipv6_addr_copy(&hdr->saddr, saddr);
285 	ipv6_addr_copy(&hdr->daddr, daddr);
286 
287 	return 0;
288 }
289 
290 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
291 {
292 	struct ip6_ra_chain *ra;
293 	struct sock *last = NULL;
294 
295 	read_lock(&ip6_ra_lock);
296 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
297 		struct sock *sk = ra->sk;
298 		if (sk && ra->sel == sel &&
299 		    (!sk->sk_bound_dev_if ||
300 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
301 			if (last) {
302 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
303 				if (skb2)
304 					rawv6_rcv(last, skb2);
305 			}
306 			last = sk;
307 		}
308 	}
309 
310 	if (last) {
311 		rawv6_rcv(last, skb);
312 		read_unlock(&ip6_ra_lock);
313 		return 1;
314 	}
315 	read_unlock(&ip6_ra_lock);
316 	return 0;
317 }
318 
319 static int ip6_forward_proxy_check(struct sk_buff *skb)
320 {
321 	struct ipv6hdr *hdr = ipv6_hdr(skb);
322 	u8 nexthdr = hdr->nexthdr;
323 	int offset;
324 
325 	if (ipv6_ext_hdr(nexthdr)) {
326 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
327 		if (offset < 0)
328 			return 0;
329 	} else
330 		offset = sizeof(struct ipv6hdr);
331 
332 	if (nexthdr == IPPROTO_ICMPV6) {
333 		struct icmp6hdr *icmp6;
334 
335 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
336 					 offset + 1 - skb->data)))
337 			return 0;
338 
339 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
340 
341 		switch (icmp6->icmp6_type) {
342 		case NDISC_ROUTER_SOLICITATION:
343 		case NDISC_ROUTER_ADVERTISEMENT:
344 		case NDISC_NEIGHBOUR_SOLICITATION:
345 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
346 		case NDISC_REDIRECT:
347 			/* For reaction involving unicast neighbor discovery
348 			 * message destined to the proxied address, pass it to
349 			 * input function.
350 			 */
351 			return 1;
352 		default:
353 			break;
354 		}
355 	}
356 
357 	/*
358 	 * The proxying router can't forward traffic sent to a link-local
359 	 * address, so signal the sender and discard the packet. This
360 	 * behavior is clarified by the MIPv6 specification.
361 	 */
362 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
363 		dst_link_failure(skb);
364 		return -1;
365 	}
366 
367 	return 0;
368 }
369 
370 static inline int ip6_forward_finish(struct sk_buff *skb)
371 {
372 	return dst_output(skb);
373 }
374 
375 int ip6_forward(struct sk_buff *skb)
376 {
377 	struct dst_entry *dst = skb->dst;
378 	struct ipv6hdr *hdr = ipv6_hdr(skb);
379 	struct inet6_skb_parm *opt = IP6CB(skb);
380 
381 	if (ipv6_devconf.forwarding == 0)
382 		goto error;
383 
384 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
385 		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
386 		goto drop;
387 	}
388 
389 	skb_forward_csum(skb);
390 
391 	/*
392 	 *	We DO NOT make any processing on
393 	 *	RA packets, pushing them to user level AS IS
394 	 *	without ane WARRANTY that application will be able
395 	 *	to interpret them. The reason is that we
396 	 *	cannot make anything clever here.
397 	 *
398 	 *	We are not end-node, so that if packet contains
399 	 *	AH/ESP, we cannot make anything.
400 	 *	Defragmentation also would be mistake, RA packets
401 	 *	cannot be fragmented, because there is no warranty
402 	 *	that different fragments will go along one path. --ANK
403 	 */
404 	if (opt->ra) {
405 		u8 *ptr = skb_network_header(skb) + opt->ra;
406 		if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
407 			return 0;
408 	}
409 
410 	/*
411 	 *	check and decrement ttl
412 	 */
413 	if (hdr->hop_limit <= 1) {
414 		/* Force OUTPUT device used as source address */
415 		skb->dev = dst->dev;
416 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
417 			    0, skb->dev);
418 		IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
419 
420 		kfree_skb(skb);
421 		return -ETIMEDOUT;
422 	}
423 
424 	/* XXX: idev->cnf.proxy_ndp? */
425 	if (ipv6_devconf.proxy_ndp &&
426 	    pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
427 		int proxied = ip6_forward_proxy_check(skb);
428 		if (proxied > 0)
429 			return ip6_input(skb);
430 		else if (proxied < 0) {
431 			IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
432 			goto drop;
433 		}
434 	}
435 
436 	if (!xfrm6_route_forward(skb)) {
437 		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
438 		goto drop;
439 	}
440 	dst = skb->dst;
441 
442 	/* IPv6 specs say nothing about it, but it is clear that we cannot
443 	   send redirects to source routed frames.
444 	   We don't send redirects to frames decapsulated from IPsec.
445 	 */
446 	if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
447 	    !skb->sp) {
448 		struct in6_addr *target = NULL;
449 		struct rt6_info *rt;
450 		struct neighbour *n = dst->neighbour;
451 
452 		/*
453 		 *	incoming and outgoing devices are the same
454 		 *	send a redirect.
455 		 */
456 
457 		rt = (struct rt6_info *) dst;
458 		if ((rt->rt6i_flags & RTF_GATEWAY))
459 			target = (struct in6_addr*)&n->primary_key;
460 		else
461 			target = &hdr->daddr;
462 
463 		/* Limit redirects both by destination (here)
464 		   and by source (inside ndisc_send_redirect)
465 		 */
466 		if (xrlim_allow(dst, 1*HZ))
467 			ndisc_send_redirect(skb, n, target);
468 	} else {
469 		int addrtype = ipv6_addr_type(&hdr->saddr);
470 
471 		/* This check is security critical. */
472 		if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
473 			goto error;
474 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
475 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
476 				ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
477 			goto error;
478 		}
479 	}
480 
481 	if (skb->len > dst_mtu(dst)) {
482 		/* Again, force OUTPUT device used as source address */
483 		skb->dev = dst->dev;
484 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
485 		IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
486 		IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
487 		kfree_skb(skb);
488 		return -EMSGSIZE;
489 	}
490 
491 	if (skb_cow(skb, dst->dev->hard_header_len)) {
492 		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
493 		goto drop;
494 	}
495 
496 	hdr = ipv6_hdr(skb);
497 
498 	/* Mangling hops number delayed to point after skb COW */
499 
500 	hdr->hop_limit--;
501 
502 	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
503 	return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
504 
505 error:
506 	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
507 drop:
508 	kfree_skb(skb);
509 	return -EINVAL;
510 }
511 
512 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
513 {
514 	to->pkt_type = from->pkt_type;
515 	to->priority = from->priority;
516 	to->protocol = from->protocol;
517 	dst_release(to->dst);
518 	to->dst = dst_clone(from->dst);
519 	to->dev = from->dev;
520 	to->mark = from->mark;
521 
522 #ifdef CONFIG_NET_SCHED
523 	to->tc_index = from->tc_index;
524 #endif
525 	nf_copy(to, from);
526 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
527     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
528 	to->nf_trace = from->nf_trace;
529 #endif
530 	skb_copy_secmark(to, from);
531 }
532 
533 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
534 {
535 	u16 offset = sizeof(struct ipv6hdr);
536 	struct ipv6_opt_hdr *exthdr =
537 				(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
538 	unsigned int packet_len = skb->tail - skb->network_header;
539 	int found_rhdr = 0;
540 	*nexthdr = &ipv6_hdr(skb)->nexthdr;
541 
542 	while (offset + 1 <= packet_len) {
543 
544 		switch (**nexthdr) {
545 
546 		case NEXTHDR_HOP:
547 			break;
548 		case NEXTHDR_ROUTING:
549 			found_rhdr = 1;
550 			break;
551 		case NEXTHDR_DEST:
552 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
553 			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
554 				break;
555 #endif
556 			if (found_rhdr)
557 				return offset;
558 			break;
559 		default :
560 			return offset;
561 		}
562 
563 		offset += ipv6_optlen(exthdr);
564 		*nexthdr = &exthdr->nexthdr;
565 		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
566 						 offset);
567 	}
568 
569 	return offset;
570 }
571 EXPORT_SYMBOL_GPL(ip6_find_1stfragopt);
572 
573 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
574 {
575 	struct net_device *dev;
576 	struct sk_buff *frag;
577 	struct rt6_info *rt = (struct rt6_info*)skb->dst;
578 	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
579 	struct ipv6hdr *tmp_hdr;
580 	struct frag_hdr *fh;
581 	unsigned int mtu, hlen, left, len;
582 	__be32 frag_id = 0;
583 	int ptr, offset = 0, err=0;
584 	u8 *prevhdr, nexthdr = 0;
585 
586 	dev = rt->u.dst.dev;
587 	hlen = ip6_find_1stfragopt(skb, &prevhdr);
588 	nexthdr = *prevhdr;
589 
590 	mtu = ip6_skb_dst_mtu(skb);
591 
592 	/* We must not fragment if the socket is set to force MTU discovery
593 	 * or if the skb it not generated by a local socket.  (This last
594 	 * check should be redundant, but it's free.)
595 	 */
596 	if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
597 		skb->dev = skb->dst->dev;
598 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
599 		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
600 		kfree_skb(skb);
601 		return -EMSGSIZE;
602 	}
603 
604 	if (np && np->frag_size < mtu) {
605 		if (np->frag_size)
606 			mtu = np->frag_size;
607 	}
608 	mtu -= hlen + sizeof(struct frag_hdr);
609 
610 	if (skb_shinfo(skb)->frag_list) {
611 		int first_len = skb_pagelen(skb);
612 
613 		if (first_len - hlen > mtu ||
614 		    ((first_len - hlen) & 7) ||
615 		    skb_cloned(skb))
616 			goto slow_path;
617 
618 		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
619 			/* Correct geometry. */
620 			if (frag->len > mtu ||
621 			    ((frag->len & 7) && frag->next) ||
622 			    skb_headroom(frag) < hlen)
623 			    goto slow_path;
624 
625 			/* Partially cloned skb? */
626 			if (skb_shared(frag))
627 				goto slow_path;
628 
629 			BUG_ON(frag->sk);
630 			if (skb->sk) {
631 				sock_hold(skb->sk);
632 				frag->sk = skb->sk;
633 				frag->destructor = sock_wfree;
634 				skb->truesize -= frag->truesize;
635 			}
636 		}
637 
638 		err = 0;
639 		offset = 0;
640 		frag = skb_shinfo(skb)->frag_list;
641 		skb_shinfo(skb)->frag_list = NULL;
642 		/* BUILD HEADER */
643 
644 		*prevhdr = NEXTHDR_FRAGMENT;
645 		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
646 		if (!tmp_hdr) {
647 			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
648 			return -ENOMEM;
649 		}
650 
651 		__skb_pull(skb, hlen);
652 		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
653 		__skb_push(skb, hlen);
654 		skb_reset_network_header(skb);
655 		memcpy(skb_network_header(skb), tmp_hdr, hlen);
656 
657 		ipv6_select_ident(skb, fh);
658 		fh->nexthdr = nexthdr;
659 		fh->reserved = 0;
660 		fh->frag_off = htons(IP6_MF);
661 		frag_id = fh->identification;
662 
663 		first_len = skb_pagelen(skb);
664 		skb->data_len = first_len - skb_headlen(skb);
665 		skb->len = first_len;
666 		ipv6_hdr(skb)->payload_len = htons(first_len -
667 						   sizeof(struct ipv6hdr));
668 
669 		dst_hold(&rt->u.dst);
670 
671 		for (;;) {
672 			/* Prepare header of the next frame,
673 			 * before previous one went down. */
674 			if (frag) {
675 				frag->ip_summed = CHECKSUM_NONE;
676 				skb_reset_transport_header(frag);
677 				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
678 				__skb_push(frag, hlen);
679 				skb_reset_network_header(frag);
680 				memcpy(skb_network_header(frag), tmp_hdr,
681 				       hlen);
682 				offset += skb->len - hlen - sizeof(struct frag_hdr);
683 				fh->nexthdr = nexthdr;
684 				fh->reserved = 0;
685 				fh->frag_off = htons(offset);
686 				if (frag->next != NULL)
687 					fh->frag_off |= htons(IP6_MF);
688 				fh->identification = frag_id;
689 				ipv6_hdr(frag)->payload_len =
690 						htons(frag->len -
691 						      sizeof(struct ipv6hdr));
692 				ip6_copy_metadata(frag, skb);
693 			}
694 
695 			err = output(skb);
696 			if(!err)
697 				IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
698 
699 			if (err || !frag)
700 				break;
701 
702 			skb = frag;
703 			frag = skb->next;
704 			skb->next = NULL;
705 		}
706 
707 		kfree(tmp_hdr);
708 
709 		if (err == 0) {
710 			IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
711 			dst_release(&rt->u.dst);
712 			return 0;
713 		}
714 
715 		while (frag) {
716 			skb = frag->next;
717 			kfree_skb(frag);
718 			frag = skb;
719 		}
720 
721 		IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
722 		dst_release(&rt->u.dst);
723 		return err;
724 	}
725 
726 slow_path:
727 	left = skb->len - hlen;		/* Space per frame */
728 	ptr = hlen;			/* Where to start from */
729 
730 	/*
731 	 *	Fragment the datagram.
732 	 */
733 
734 	*prevhdr = NEXTHDR_FRAGMENT;
735 
736 	/*
737 	 *	Keep copying data until we run out.
738 	 */
739 	while(left > 0)	{
740 		len = left;
741 		/* IF: it doesn't fit, use 'mtu' - the data space left */
742 		if (len > mtu)
743 			len = mtu;
744 		/* IF: we are not sending upto and including the packet end
745 		   then align the next start on an eight byte boundary */
746 		if (len < left)	{
747 			len &= ~7;
748 		}
749 		/*
750 		 *	Allocate buffer.
751 		 */
752 
753 		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
754 			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
755 			IP6_INC_STATS(ip6_dst_idev(skb->dst),
756 				      IPSTATS_MIB_FRAGFAILS);
757 			err = -ENOMEM;
758 			goto fail;
759 		}
760 
761 		/*
762 		 *	Set up data on packet
763 		 */
764 
765 		ip6_copy_metadata(frag, skb);
766 		skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
767 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
768 		skb_reset_network_header(frag);
769 		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
770 		frag->transport_header = (frag->network_header + hlen +
771 					  sizeof(struct frag_hdr));
772 
773 		/*
774 		 *	Charge the memory for the fragment to any owner
775 		 *	it might possess
776 		 */
777 		if (skb->sk)
778 			skb_set_owner_w(frag, skb->sk);
779 
780 		/*
781 		 *	Copy the packet header into the new buffer.
782 		 */
783 		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
784 
785 		/*
786 		 *	Build fragment header.
787 		 */
788 		fh->nexthdr = nexthdr;
789 		fh->reserved = 0;
790 		if (!frag_id) {
791 			ipv6_select_ident(skb, fh);
792 			frag_id = fh->identification;
793 		} else
794 			fh->identification = frag_id;
795 
796 		/*
797 		 *	Copy a block of the IP datagram.
798 		 */
799 		if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
800 			BUG();
801 		left -= len;
802 
803 		fh->frag_off = htons(offset);
804 		if (left > 0)
805 			fh->frag_off |= htons(IP6_MF);
806 		ipv6_hdr(frag)->payload_len = htons(frag->len -
807 						    sizeof(struct ipv6hdr));
808 
809 		ptr += len;
810 		offset += len;
811 
812 		/*
813 		 *	Put this fragment into the sending queue.
814 		 */
815 		err = output(frag);
816 		if (err)
817 			goto fail;
818 
819 		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
820 	}
821 	IP6_INC_STATS(ip6_dst_idev(skb->dst),
822 		      IPSTATS_MIB_FRAGOKS);
823 	kfree_skb(skb);
824 	return err;
825 
826 fail:
827 	IP6_INC_STATS(ip6_dst_idev(skb->dst),
828 		      IPSTATS_MIB_FRAGFAILS);
829 	kfree_skb(skb);
830 	return err;
831 }
832 
833 static inline int ip6_rt_check(struct rt6key *rt_key,
834 			       struct in6_addr *fl_addr,
835 			       struct in6_addr *addr_cache)
836 {
837 	return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
838 		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
839 }
840 
841 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
842 					  struct dst_entry *dst,
843 					  struct flowi *fl)
844 {
845 	struct ipv6_pinfo *np = inet6_sk(sk);
846 	struct rt6_info *rt = (struct rt6_info *)dst;
847 
848 	if (!dst)
849 		goto out;
850 
851 	/* Yes, checking route validity in not connected
852 	 * case is not very simple. Take into account,
853 	 * that we do not support routing by source, TOS,
854 	 * and MSG_DONTROUTE 		--ANK (980726)
855 	 *
856 	 * 1. ip6_rt_check(): If route was host route,
857 	 *    check that cached destination is current.
858 	 *    If it is network route, we still may
859 	 *    check its validity using saved pointer
860 	 *    to the last used address: daddr_cache.
861 	 *    We do not want to save whole address now,
862 	 *    (because main consumer of this service
863 	 *    is tcp, which has not this problem),
864 	 *    so that the last trick works only on connected
865 	 *    sockets.
866 	 * 2. oif also should be the same.
867 	 */
868 	if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
869 #ifdef CONFIG_IPV6_SUBTREES
870 	    ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
871 #endif
872 	    (fl->oif && fl->oif != dst->dev->ifindex)) {
873 		dst_release(dst);
874 		dst = NULL;
875 	}
876 
877 out:
878 	return dst;
879 }
880 
881 static int ip6_dst_lookup_tail(struct sock *sk,
882 			       struct dst_entry **dst, struct flowi *fl)
883 {
884 	int err;
885 
886 	if (*dst == NULL)
887 		*dst = ip6_route_output(sk, fl);
888 
889 	if ((err = (*dst)->error))
890 		goto out_err_release;
891 
892 	if (ipv6_addr_any(&fl->fl6_src)) {
893 		err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
894 		if (err)
895 			goto out_err_release;
896 	}
897 
898 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
899 		/*
900 		 * Here if the dst entry we've looked up
901 		 * has a neighbour entry that is in the INCOMPLETE
902 		 * state and the src address from the flow is
903 		 * marked as OPTIMISTIC, we release the found
904 		 * dst entry and replace it instead with the
905 		 * dst entry of the nexthop router
906 		 */
907 		if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
908 			struct inet6_ifaddr *ifp;
909 			struct flowi fl_gw;
910 			int redirect;
911 
912 			ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1);
913 
914 			redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
915 			if (ifp)
916 				in6_ifa_put(ifp);
917 
918 			if (redirect) {
919 				/*
920 				 * We need to get the dst entry for the
921 				 * default router instead
922 				 */
923 				dst_release(*dst);
924 				memcpy(&fl_gw, fl, sizeof(struct flowi));
925 				memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
926 				*dst = ip6_route_output(sk, &fl_gw);
927 				if ((err = (*dst)->error))
928 					goto out_err_release;
929 			}
930 		}
931 #endif
932 
933 	return 0;
934 
935 out_err_release:
936 	dst_release(*dst);
937 	*dst = NULL;
938 	return err;
939 }
940 
941 /**
942  *	ip6_dst_lookup - perform route lookup on flow
943  *	@sk: socket which provides route info
944  *	@dst: pointer to dst_entry * for result
945  *	@fl: flow to lookup
946  *
947  *	This function performs a route lookup on the given flow.
948  *
949  *	It returns zero on success, or a standard errno code on error.
950  */
951 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
952 {
953 	*dst = NULL;
954 	return ip6_dst_lookup_tail(sk, dst, fl);
955 }
956 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
957 
958 /**
959  *	ip6_sk_dst_lookup - perform socket cached route lookup on flow
960  *	@sk: socket which provides the dst cache and route info
961  *	@dst: pointer to dst_entry * for result
962  *	@fl: flow to lookup
963  *
964  *	This function performs a route lookup on the given flow with the
965  *	possibility of using the cached route in the socket if it is valid.
966  *	It will take the socket dst lock when operating on the dst cache.
967  *	As a result, this function can only be used in process context.
968  *
969  *	It returns zero on success, or a standard errno code on error.
970  */
971 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
972 {
973 	*dst = NULL;
974 	if (sk) {
975 		*dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
976 		*dst = ip6_sk_dst_check(sk, *dst, fl);
977 	}
978 
979 	return ip6_dst_lookup_tail(sk, dst, fl);
980 }
981 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
982 
983 static inline int ip6_ufo_append_data(struct sock *sk,
984 			int getfrag(void *from, char *to, int offset, int len,
985 			int odd, struct sk_buff *skb),
986 			void *from, int length, int hh_len, int fragheaderlen,
987 			int transhdrlen, int mtu,unsigned int flags)
988 
989 {
990 	struct sk_buff *skb;
991 	int err;
992 
993 	/* There is support for UDP large send offload by network
994 	 * device, so create one single skb packet containing complete
995 	 * udp datagram
996 	 */
997 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
998 		skb = sock_alloc_send_skb(sk,
999 			hh_len + fragheaderlen + transhdrlen + 20,
1000 			(flags & MSG_DONTWAIT), &err);
1001 		if (skb == NULL)
1002 			return -ENOMEM;
1003 
1004 		/* reserve space for Hardware header */
1005 		skb_reserve(skb, hh_len);
1006 
1007 		/* create space for UDP/IP header */
1008 		skb_put(skb,fragheaderlen + transhdrlen);
1009 
1010 		/* initialize network header pointer */
1011 		skb_reset_network_header(skb);
1012 
1013 		/* initialize protocol header pointer */
1014 		skb->transport_header = skb->network_header + fragheaderlen;
1015 
1016 		skb->ip_summed = CHECKSUM_PARTIAL;
1017 		skb->csum = 0;
1018 		sk->sk_sndmsg_off = 0;
1019 	}
1020 
1021 	err = skb_append_datato_frags(sk,skb, getfrag, from,
1022 				      (length - transhdrlen));
1023 	if (!err) {
1024 		struct frag_hdr fhdr;
1025 
1026 		/* specify the length of each IP datagram fragment*/
1027 		skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1028 					    sizeof(struct frag_hdr);
1029 		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1030 		ipv6_select_ident(skb, &fhdr);
1031 		skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1032 		__skb_queue_tail(&sk->sk_write_queue, skb);
1033 
1034 		return 0;
1035 	}
1036 	/* There is not enough support do UPD LSO,
1037 	 * so follow normal path
1038 	 */
1039 	kfree_skb(skb);
1040 
1041 	return err;
1042 }
1043 
1044 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1045 	int offset, int len, int odd, struct sk_buff *skb),
1046 	void *from, int length, int transhdrlen,
1047 	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1048 	struct rt6_info *rt, unsigned int flags)
1049 {
1050 	struct inet_sock *inet = inet_sk(sk);
1051 	struct ipv6_pinfo *np = inet6_sk(sk);
1052 	struct sk_buff *skb;
1053 	unsigned int maxfraglen, fragheaderlen;
1054 	int exthdrlen;
1055 	int hh_len;
1056 	int mtu;
1057 	int copy;
1058 	int err;
1059 	int offset = 0;
1060 	int csummode = CHECKSUM_NONE;
1061 
1062 	if (flags&MSG_PROBE)
1063 		return 0;
1064 	if (skb_queue_empty(&sk->sk_write_queue)) {
1065 		/*
1066 		 * setup for corking
1067 		 */
1068 		if (opt) {
1069 			if (np->cork.opt == NULL) {
1070 				np->cork.opt = kmalloc(opt->tot_len,
1071 						       sk->sk_allocation);
1072 				if (unlikely(np->cork.opt == NULL))
1073 					return -ENOBUFS;
1074 			} else if (np->cork.opt->tot_len < opt->tot_len) {
1075 				printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1076 				return -EINVAL;
1077 			}
1078 			memcpy(np->cork.opt, opt, opt->tot_len);
1079 			inet->cork.flags |= IPCORK_OPT;
1080 			/* need source address above miyazawa*/
1081 		}
1082 		dst_hold(&rt->u.dst);
1083 		np->cork.rt = rt;
1084 		inet->cork.fl = *fl;
1085 		np->cork.hop_limit = hlimit;
1086 		np->cork.tclass = tclass;
1087 		mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1088 		      rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1089 		if (np->frag_size < mtu) {
1090 			if (np->frag_size)
1091 				mtu = np->frag_size;
1092 		}
1093 		inet->cork.fragsize = mtu;
1094 		if (dst_allfrag(rt->u.dst.path))
1095 			inet->cork.flags |= IPCORK_ALLFRAG;
1096 		inet->cork.length = 0;
1097 		sk->sk_sndmsg_page = NULL;
1098 		sk->sk_sndmsg_off = 0;
1099 		exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
1100 		length += exthdrlen;
1101 		transhdrlen += exthdrlen;
1102 	} else {
1103 		rt = np->cork.rt;
1104 		fl = &inet->cork.fl;
1105 		if (inet->cork.flags & IPCORK_OPT)
1106 			opt = np->cork.opt;
1107 		transhdrlen = 0;
1108 		exthdrlen = 0;
1109 		mtu = inet->cork.fragsize;
1110 	}
1111 
1112 	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1113 
1114 	fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0);
1115 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1116 
1117 	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1118 		if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1119 			ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1120 			return -EMSGSIZE;
1121 		}
1122 	}
1123 
1124 	/*
1125 	 * Let's try using as much space as possible.
1126 	 * Use MTU if total length of the message fits into the MTU.
1127 	 * Otherwise, we need to reserve fragment header and
1128 	 * fragment alignment (= 8-15 octects, in total).
1129 	 *
1130 	 * Note that we may need to "move" the data from the tail of
1131 	 * of the buffer to the new fragment when we split
1132 	 * the message.
1133 	 *
1134 	 * FIXME: It may be fragmented into multiple chunks
1135 	 *        at once if non-fragmentable extension headers
1136 	 *        are too large.
1137 	 * --yoshfuji
1138 	 */
1139 
1140 	inet->cork.length += length;
1141 	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1142 	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
1143 
1144 		err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1145 					  fragheaderlen, transhdrlen, mtu,
1146 					  flags);
1147 		if (err)
1148 			goto error;
1149 		return 0;
1150 	}
1151 
1152 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1153 		goto alloc_new_skb;
1154 
1155 	while (length > 0) {
1156 		/* Check if the remaining data fits into current packet. */
1157 		copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1158 		if (copy < length)
1159 			copy = maxfraglen - skb->len;
1160 
1161 		if (copy <= 0) {
1162 			char *data;
1163 			unsigned int datalen;
1164 			unsigned int fraglen;
1165 			unsigned int fraggap;
1166 			unsigned int alloclen;
1167 			struct sk_buff *skb_prev;
1168 alloc_new_skb:
1169 			skb_prev = skb;
1170 
1171 			/* There's no room in the current skb */
1172 			if (skb_prev)
1173 				fraggap = skb_prev->len - maxfraglen;
1174 			else
1175 				fraggap = 0;
1176 
1177 			/*
1178 			 * If remaining data exceeds the mtu,
1179 			 * we know we need more fragment(s).
1180 			 */
1181 			datalen = length + fraggap;
1182 			if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1183 				datalen = maxfraglen - fragheaderlen;
1184 
1185 			fraglen = datalen + fragheaderlen;
1186 			if ((flags & MSG_MORE) &&
1187 			    !(rt->u.dst.dev->features&NETIF_F_SG))
1188 				alloclen = mtu;
1189 			else
1190 				alloclen = datalen + fragheaderlen;
1191 
1192 			/*
1193 			 * The last fragment gets additional space at tail.
1194 			 * Note: we overallocate on fragments with MSG_MODE
1195 			 * because we have no idea if we're the last one.
1196 			 */
1197 			if (datalen == length + fraggap)
1198 				alloclen += rt->u.dst.trailer_len;
1199 
1200 			/*
1201 			 * We just reserve space for fragment header.
1202 			 * Note: this may be overallocation if the message
1203 			 * (without MSG_MORE) fits into the MTU.
1204 			 */
1205 			alloclen += sizeof(struct frag_hdr);
1206 
1207 			if (transhdrlen) {
1208 				skb = sock_alloc_send_skb(sk,
1209 						alloclen + hh_len,
1210 						(flags & MSG_DONTWAIT), &err);
1211 			} else {
1212 				skb = NULL;
1213 				if (atomic_read(&sk->sk_wmem_alloc) <=
1214 				    2 * sk->sk_sndbuf)
1215 					skb = sock_wmalloc(sk,
1216 							   alloclen + hh_len, 1,
1217 							   sk->sk_allocation);
1218 				if (unlikely(skb == NULL))
1219 					err = -ENOBUFS;
1220 			}
1221 			if (skb == NULL)
1222 				goto error;
1223 			/*
1224 			 *	Fill in the control structures
1225 			 */
1226 			skb->ip_summed = csummode;
1227 			skb->csum = 0;
1228 			/* reserve for fragmentation */
1229 			skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1230 
1231 			/*
1232 			 *	Find where to start putting bytes
1233 			 */
1234 			data = skb_put(skb, fraglen);
1235 			skb_set_network_header(skb, exthdrlen);
1236 			data += fragheaderlen;
1237 			skb->transport_header = (skb->network_header +
1238 						 fragheaderlen);
1239 			if (fraggap) {
1240 				skb->csum = skb_copy_and_csum_bits(
1241 					skb_prev, maxfraglen,
1242 					data + transhdrlen, fraggap, 0);
1243 				skb_prev->csum = csum_sub(skb_prev->csum,
1244 							  skb->csum);
1245 				data += fraggap;
1246 				pskb_trim_unique(skb_prev, maxfraglen);
1247 			}
1248 			copy = datalen - transhdrlen - fraggap;
1249 			if (copy < 0) {
1250 				err = -EINVAL;
1251 				kfree_skb(skb);
1252 				goto error;
1253 			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1254 				err = -EFAULT;
1255 				kfree_skb(skb);
1256 				goto error;
1257 			}
1258 
1259 			offset += copy;
1260 			length -= datalen - fraggap;
1261 			transhdrlen = 0;
1262 			exthdrlen = 0;
1263 			csummode = CHECKSUM_NONE;
1264 
1265 			/*
1266 			 * Put the packet on the pending queue
1267 			 */
1268 			__skb_queue_tail(&sk->sk_write_queue, skb);
1269 			continue;
1270 		}
1271 
1272 		if (copy > length)
1273 			copy = length;
1274 
1275 		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1276 			unsigned int off;
1277 
1278 			off = skb->len;
1279 			if (getfrag(from, skb_put(skb, copy),
1280 						offset, copy, off, skb) < 0) {
1281 				__skb_trim(skb, off);
1282 				err = -EFAULT;
1283 				goto error;
1284 			}
1285 		} else {
1286 			int i = skb_shinfo(skb)->nr_frags;
1287 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1288 			struct page *page = sk->sk_sndmsg_page;
1289 			int off = sk->sk_sndmsg_off;
1290 			unsigned int left;
1291 
1292 			if (page && (left = PAGE_SIZE - off) > 0) {
1293 				if (copy >= left)
1294 					copy = left;
1295 				if (page != frag->page) {
1296 					if (i == MAX_SKB_FRAGS) {
1297 						err = -EMSGSIZE;
1298 						goto error;
1299 					}
1300 					get_page(page);
1301 					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1302 					frag = &skb_shinfo(skb)->frags[i];
1303 				}
1304 			} else if(i < MAX_SKB_FRAGS) {
1305 				if (copy > PAGE_SIZE)
1306 					copy = PAGE_SIZE;
1307 				page = alloc_pages(sk->sk_allocation, 0);
1308 				if (page == NULL) {
1309 					err = -ENOMEM;
1310 					goto error;
1311 				}
1312 				sk->sk_sndmsg_page = page;
1313 				sk->sk_sndmsg_off = 0;
1314 
1315 				skb_fill_page_desc(skb, i, page, 0, 0);
1316 				frag = &skb_shinfo(skb)->frags[i];
1317 				skb->truesize += PAGE_SIZE;
1318 				atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1319 			} else {
1320 				err = -EMSGSIZE;
1321 				goto error;
1322 			}
1323 			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1324 				err = -EFAULT;
1325 				goto error;
1326 			}
1327 			sk->sk_sndmsg_off += copy;
1328 			frag->size += copy;
1329 			skb->len += copy;
1330 			skb->data_len += copy;
1331 		}
1332 		offset += copy;
1333 		length -= copy;
1334 	}
1335 	return 0;
1336 error:
1337 	inet->cork.length -= length;
1338 	IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1339 	return err;
1340 }
1341 
1342 int ip6_push_pending_frames(struct sock *sk)
1343 {
1344 	struct sk_buff *skb, *tmp_skb;
1345 	struct sk_buff **tail_skb;
1346 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1347 	struct inet_sock *inet = inet_sk(sk);
1348 	struct ipv6_pinfo *np = inet6_sk(sk);
1349 	struct ipv6hdr *hdr;
1350 	struct ipv6_txoptions *opt = np->cork.opt;
1351 	struct rt6_info *rt = np->cork.rt;
1352 	struct flowi *fl = &inet->cork.fl;
1353 	unsigned char proto = fl->proto;
1354 	int err = 0;
1355 
1356 	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1357 		goto out;
1358 	tail_skb = &(skb_shinfo(skb)->frag_list);
1359 
1360 	/* move skb->data to ip header from ext header */
1361 	if (skb->data < skb_network_header(skb))
1362 		__skb_pull(skb, skb_network_offset(skb));
1363 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1364 		__skb_pull(tmp_skb, skb_network_header_len(skb));
1365 		*tail_skb = tmp_skb;
1366 		tail_skb = &(tmp_skb->next);
1367 		skb->len += tmp_skb->len;
1368 		skb->data_len += tmp_skb->len;
1369 		skb->truesize += tmp_skb->truesize;
1370 		__sock_put(tmp_skb->sk);
1371 		tmp_skb->destructor = NULL;
1372 		tmp_skb->sk = NULL;
1373 	}
1374 
1375 	ipv6_addr_copy(final_dst, &fl->fl6_dst);
1376 	__skb_pull(skb, skb_network_header_len(skb));
1377 	if (opt && opt->opt_flen)
1378 		ipv6_push_frag_opts(skb, opt, &proto);
1379 	if (opt && opt->opt_nflen)
1380 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1381 
1382 	skb_push(skb, sizeof(struct ipv6hdr));
1383 	skb_reset_network_header(skb);
1384 	hdr = ipv6_hdr(skb);
1385 
1386 	*(__be32*)hdr = fl->fl6_flowlabel |
1387 		     htonl(0x60000000 | ((int)np->cork.tclass << 20));
1388 
1389 	if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1390 		hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1391 	else
1392 		hdr->payload_len = 0;
1393 	hdr->hop_limit = np->cork.hop_limit;
1394 	hdr->nexthdr = proto;
1395 	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1396 	ipv6_addr_copy(&hdr->daddr, final_dst);
1397 
1398 	skb->priority = sk->sk_priority;
1399 
1400 	skb->dst = dst_clone(&rt->u.dst);
1401 	IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1402 	if (proto == IPPROTO_ICMPV6) {
1403 		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1404 
1405 		ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1406 		ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1407 	}
1408 
1409 	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1410 	if (err) {
1411 		if (err > 0)
1412 			err = np->recverr ? net_xmit_errno(err) : 0;
1413 		if (err)
1414 			goto error;
1415 	}
1416 
1417 out:
1418 	inet->cork.flags &= ~IPCORK_OPT;
1419 	kfree(np->cork.opt);
1420 	np->cork.opt = NULL;
1421 	if (np->cork.rt) {
1422 		dst_release(&np->cork.rt->u.dst);
1423 		np->cork.rt = NULL;
1424 		inet->cork.flags &= ~IPCORK_ALLFRAG;
1425 	}
1426 	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1427 	return err;
1428 error:
1429 	goto out;
1430 }
1431 
1432 void ip6_flush_pending_frames(struct sock *sk)
1433 {
1434 	struct inet_sock *inet = inet_sk(sk);
1435 	struct ipv6_pinfo *np = inet6_sk(sk);
1436 	struct sk_buff *skb;
1437 
1438 	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1439 		if (skb->dst)
1440 			IP6_INC_STATS(ip6_dst_idev(skb->dst),
1441 				      IPSTATS_MIB_OUTDISCARDS);
1442 		kfree_skb(skb);
1443 	}
1444 
1445 	inet->cork.flags &= ~IPCORK_OPT;
1446 
1447 	kfree(np->cork.opt);
1448 	np->cork.opt = NULL;
1449 	if (np->cork.rt) {
1450 		dst_release(&np->cork.rt->u.dst);
1451 		np->cork.rt = NULL;
1452 		inet->cork.flags &= ~IPCORK_ALLFRAG;
1453 	}
1454 	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1455 }
1456