xref: /linux/net/ipv6/ip6_output.c (revision 2638eb8b50cfc16240e0bb080b9afbf541a9b39d)
1 /*
2  *	IPv6 output functions
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on linux/net/ipv4/ip_output.c
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *	Changes:
16  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
17  *				extension headers are implemented.
18  *				route changes now work.
19  *				ip6_forward does not confuse sniffers.
20  *				etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *	Imran Patel	:	frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *			:       add ip6_append_data and related functions
26  *				for datagram xmit
27  */
28 
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41 
42 #include <linux/bpf-cgroup.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
45 
46 #include <net/sock.h>
47 #include <net/snmp.h>
48 
49 #include <net/ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
55 #include <net/icmp.h>
56 #include <net/xfrm.h>
57 #include <net/checksum.h>
58 #include <linux/mroute6.h>
59 #include <net/l3mdev.h>
60 #include <net/lwtunnel.h>
61 
62 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
63 {
64 	struct dst_entry *dst = skb_dst(skb);
65 	struct net_device *dev = dst->dev;
66 	struct neighbour *neigh;
67 	struct in6_addr *nexthop;
68 	int ret;
69 
70 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
72 
73 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
74 		    ((mroute6_is_socket(net, skb) &&
75 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 					 &ipv6_hdr(skb)->saddr))) {
78 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79 
80 			/* Do not check for IFF_ALLMULTI; multicast routing
81 			   is not supported in any case.
82 			 */
83 			if (newskb)
84 				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 					net, sk, newskb, NULL, newskb->dev,
86 					dev_loopback_xmit);
87 
88 			if (ipv6_hdr(skb)->hop_limit == 0) {
89 				IP6_INC_STATS(net, idev,
90 					      IPSTATS_MIB_OUTDISCARDS);
91 				kfree_skb(skb);
92 				return 0;
93 			}
94 		}
95 
96 		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
97 
98 		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 		    IPV6_ADDR_SCOPE_NODELOCAL &&
100 		    !(dev->flags & IFF_LOOPBACK)) {
101 			kfree_skb(skb);
102 			return 0;
103 		}
104 	}
105 
106 	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
107 		int res = lwtunnel_xmit(skb);
108 
109 		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
110 			return res;
111 	}
112 
113 	rcu_read_lock_bh();
114 	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
115 	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
116 	if (unlikely(!neigh))
117 		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
118 	if (!IS_ERR(neigh)) {
119 		sock_confirm_neigh(skb, neigh);
120 		ret = neigh_output(neigh, skb, false);
121 		rcu_read_unlock_bh();
122 		return ret;
123 	}
124 	rcu_read_unlock_bh();
125 
126 	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
127 	kfree_skb(skb);
128 	return -EINVAL;
129 }
130 
131 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
132 {
133 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
134 	/* Policy lookup after SNAT yielded a new policy */
135 	if (skb_dst(skb)->xfrm) {
136 		IPCB(skb)->flags |= IPSKB_REROUTED;
137 		return dst_output(net, sk, skb);
138 	}
139 #endif
140 
141 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
142 	    dst_allfrag(skb_dst(skb)) ||
143 	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
144 		return ip6_fragment(net, sk, skb, ip6_finish_output2);
145 	else
146 		return ip6_finish_output2(net, sk, skb);
147 }
148 
149 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
150 {
151 	int ret;
152 
153 	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
154 	switch (ret) {
155 	case NET_XMIT_SUCCESS:
156 		return __ip6_finish_output(net, sk, skb);
157 	case NET_XMIT_CN:
158 		return __ip6_finish_output(net, sk, skb) ? : ret;
159 	default:
160 		kfree_skb(skb);
161 		return ret;
162 	}
163 }
164 
165 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
166 {
167 	struct net_device *dev = skb_dst(skb)->dev;
168 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
169 
170 	skb->protocol = htons(ETH_P_IPV6);
171 	skb->dev = dev;
172 
173 	if (unlikely(idev->cnf.disable_ipv6)) {
174 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
175 		kfree_skb(skb);
176 		return 0;
177 	}
178 
179 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
180 			    net, sk, skb, NULL, dev,
181 			    ip6_finish_output,
182 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
183 }
184 
185 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
186 {
187 	if (!np->autoflowlabel_set)
188 		return ip6_default_np_autolabel(net);
189 	else
190 		return np->autoflowlabel;
191 }
192 
193 /*
194  * xmit an sk_buff (used by TCP, SCTP and DCCP)
195  * Note : socket lock is not held for SYNACK packets, but might be modified
196  * by calls to skb_set_owner_w() and ipv6_local_error(),
197  * which are using proper atomic operations or spinlocks.
198  */
199 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
200 	     __u32 mark, struct ipv6_txoptions *opt, int tclass)
201 {
202 	struct net *net = sock_net(sk);
203 	const struct ipv6_pinfo *np = inet6_sk(sk);
204 	struct in6_addr *first_hop = &fl6->daddr;
205 	struct dst_entry *dst = skb_dst(skb);
206 	unsigned int head_room;
207 	struct ipv6hdr *hdr;
208 	u8  proto = fl6->flowi6_proto;
209 	int seg_len = skb->len;
210 	int hlimit = -1;
211 	u32 mtu;
212 
213 	head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
214 	if (opt)
215 		head_room += opt->opt_nflen + opt->opt_flen;
216 
217 	if (unlikely(skb_headroom(skb) < head_room)) {
218 		struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
219 		if (!skb2) {
220 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
221 				      IPSTATS_MIB_OUTDISCARDS);
222 			kfree_skb(skb);
223 			return -ENOBUFS;
224 		}
225 		if (skb->sk)
226 			skb_set_owner_w(skb2, skb->sk);
227 		consume_skb(skb);
228 		skb = skb2;
229 	}
230 
231 	if (opt) {
232 		seg_len += opt->opt_nflen + opt->opt_flen;
233 
234 		if (opt->opt_flen)
235 			ipv6_push_frag_opts(skb, opt, &proto);
236 
237 		if (opt->opt_nflen)
238 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
239 					     &fl6->saddr);
240 	}
241 
242 	skb_push(skb, sizeof(struct ipv6hdr));
243 	skb_reset_network_header(skb);
244 	hdr = ipv6_hdr(skb);
245 
246 	/*
247 	 *	Fill in the IPv6 header
248 	 */
249 	if (np)
250 		hlimit = np->hop_limit;
251 	if (hlimit < 0)
252 		hlimit = ip6_dst_hoplimit(dst);
253 
254 	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
255 				ip6_autoflowlabel(net, np), fl6));
256 
257 	hdr->payload_len = htons(seg_len);
258 	hdr->nexthdr = proto;
259 	hdr->hop_limit = hlimit;
260 
261 	hdr->saddr = fl6->saddr;
262 	hdr->daddr = *first_hop;
263 
264 	skb->protocol = htons(ETH_P_IPV6);
265 	skb->priority = sk->sk_priority;
266 	skb->mark = mark;
267 
268 	mtu = dst_mtu(dst);
269 	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
270 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
271 			      IPSTATS_MIB_OUT, skb->len);
272 
273 		/* if egress device is enslaved to an L3 master device pass the
274 		 * skb to its handler for processing
275 		 */
276 		skb = l3mdev_ip6_out((struct sock *)sk, skb);
277 		if (unlikely(!skb))
278 			return 0;
279 
280 		/* hooks should never assume socket lock is held.
281 		 * we promote our socket to non const
282 		 */
283 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
284 			       net, (struct sock *)sk, skb, NULL, dst->dev,
285 			       dst_output);
286 	}
287 
288 	skb->dev = dst->dev;
289 	/* ipv6_local_error() does not require socket lock,
290 	 * we promote our socket to non const
291 	 */
292 	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
293 
294 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
295 	kfree_skb(skb);
296 	return -EMSGSIZE;
297 }
298 EXPORT_SYMBOL(ip6_xmit);
299 
300 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
301 {
302 	struct ip6_ra_chain *ra;
303 	struct sock *last = NULL;
304 
305 	read_lock(&ip6_ra_lock);
306 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
307 		struct sock *sk = ra->sk;
308 		if (sk && ra->sel == sel &&
309 		    (!sk->sk_bound_dev_if ||
310 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
311 			struct ipv6_pinfo *np = inet6_sk(sk);
312 
313 			if (np && np->rtalert_isolate &&
314 			    !net_eq(sock_net(sk), dev_net(skb->dev))) {
315 				continue;
316 			}
317 			if (last) {
318 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
319 				if (skb2)
320 					rawv6_rcv(last, skb2);
321 			}
322 			last = sk;
323 		}
324 	}
325 
326 	if (last) {
327 		rawv6_rcv(last, skb);
328 		read_unlock(&ip6_ra_lock);
329 		return 1;
330 	}
331 	read_unlock(&ip6_ra_lock);
332 	return 0;
333 }
334 
335 static int ip6_forward_proxy_check(struct sk_buff *skb)
336 {
337 	struct ipv6hdr *hdr = ipv6_hdr(skb);
338 	u8 nexthdr = hdr->nexthdr;
339 	__be16 frag_off;
340 	int offset;
341 
342 	if (ipv6_ext_hdr(nexthdr)) {
343 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
344 		if (offset < 0)
345 			return 0;
346 	} else
347 		offset = sizeof(struct ipv6hdr);
348 
349 	if (nexthdr == IPPROTO_ICMPV6) {
350 		struct icmp6hdr *icmp6;
351 
352 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
353 					 offset + 1 - skb->data)))
354 			return 0;
355 
356 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
357 
358 		switch (icmp6->icmp6_type) {
359 		case NDISC_ROUTER_SOLICITATION:
360 		case NDISC_ROUTER_ADVERTISEMENT:
361 		case NDISC_NEIGHBOUR_SOLICITATION:
362 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
363 		case NDISC_REDIRECT:
364 			/* For reaction involving unicast neighbor discovery
365 			 * message destined to the proxied address, pass it to
366 			 * input function.
367 			 */
368 			return 1;
369 		default:
370 			break;
371 		}
372 	}
373 
374 	/*
375 	 * The proxying router can't forward traffic sent to a link-local
376 	 * address, so signal the sender and discard the packet. This
377 	 * behavior is clarified by the MIPv6 specification.
378 	 */
379 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
380 		dst_link_failure(skb);
381 		return -1;
382 	}
383 
384 	return 0;
385 }
386 
387 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
388 				     struct sk_buff *skb)
389 {
390 	struct dst_entry *dst = skb_dst(skb);
391 
392 	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
393 	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
394 
395 #ifdef CONFIG_NET_SWITCHDEV
396 	if (skb->offload_l3_fwd_mark) {
397 		consume_skb(skb);
398 		return 0;
399 	}
400 #endif
401 
402 	skb->tstamp = 0;
403 	return dst_output(net, sk, skb);
404 }
405 
406 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
407 {
408 	if (skb->len <= mtu)
409 		return false;
410 
411 	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
412 	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
413 		return true;
414 
415 	if (skb->ignore_df)
416 		return false;
417 
418 	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
419 		return false;
420 
421 	return true;
422 }
423 
424 int ip6_forward(struct sk_buff *skb)
425 {
426 	struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
427 	struct dst_entry *dst = skb_dst(skb);
428 	struct ipv6hdr *hdr = ipv6_hdr(skb);
429 	struct inet6_skb_parm *opt = IP6CB(skb);
430 	struct net *net = dev_net(dst->dev);
431 	u32 mtu;
432 
433 	if (net->ipv6.devconf_all->forwarding == 0)
434 		goto error;
435 
436 	if (skb->pkt_type != PACKET_HOST)
437 		goto drop;
438 
439 	if (unlikely(skb->sk))
440 		goto drop;
441 
442 	if (skb_warn_if_lro(skb))
443 		goto drop;
444 
445 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
446 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
447 		goto drop;
448 	}
449 
450 	skb_forward_csum(skb);
451 
452 	/*
453 	 *	We DO NOT make any processing on
454 	 *	RA packets, pushing them to user level AS IS
455 	 *	without ane WARRANTY that application will be able
456 	 *	to interpret them. The reason is that we
457 	 *	cannot make anything clever here.
458 	 *
459 	 *	We are not end-node, so that if packet contains
460 	 *	AH/ESP, we cannot make anything.
461 	 *	Defragmentation also would be mistake, RA packets
462 	 *	cannot be fragmented, because there is no warranty
463 	 *	that different fragments will go along one path. --ANK
464 	 */
465 	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
466 		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
467 			return 0;
468 	}
469 
470 	/*
471 	 *	check and decrement ttl
472 	 */
473 	if (hdr->hop_limit <= 1) {
474 		/* Force OUTPUT device used as source address */
475 		skb->dev = dst->dev;
476 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
477 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
478 
479 		kfree_skb(skb);
480 		return -ETIMEDOUT;
481 	}
482 
483 	/* XXX: idev->cnf.proxy_ndp? */
484 	if (net->ipv6.devconf_all->proxy_ndp &&
485 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
486 		int proxied = ip6_forward_proxy_check(skb);
487 		if (proxied > 0)
488 			return ip6_input(skb);
489 		else if (proxied < 0) {
490 			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
491 			goto drop;
492 		}
493 	}
494 
495 	if (!xfrm6_route_forward(skb)) {
496 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
497 		goto drop;
498 	}
499 	dst = skb_dst(skb);
500 
501 	/* IPv6 specs say nothing about it, but it is clear that we cannot
502 	   send redirects to source routed frames.
503 	   We don't send redirects to frames decapsulated from IPsec.
504 	 */
505 	if (IP6CB(skb)->iif == dst->dev->ifindex &&
506 	    opt->srcrt == 0 && !skb_sec_path(skb)) {
507 		struct in6_addr *target = NULL;
508 		struct inet_peer *peer;
509 		struct rt6_info *rt;
510 
511 		/*
512 		 *	incoming and outgoing devices are the same
513 		 *	send a redirect.
514 		 */
515 
516 		rt = (struct rt6_info *) dst;
517 		if (rt->rt6i_flags & RTF_GATEWAY)
518 			target = &rt->rt6i_gateway;
519 		else
520 			target = &hdr->daddr;
521 
522 		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
523 
524 		/* Limit redirects both by destination (here)
525 		   and by source (inside ndisc_send_redirect)
526 		 */
527 		if (inet_peer_xrlim_allow(peer, 1*HZ))
528 			ndisc_send_redirect(skb, target);
529 		if (peer)
530 			inet_putpeer(peer);
531 	} else {
532 		int addrtype = ipv6_addr_type(&hdr->saddr);
533 
534 		/* This check is security critical. */
535 		if (addrtype == IPV6_ADDR_ANY ||
536 		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
537 			goto error;
538 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
539 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
540 				    ICMPV6_NOT_NEIGHBOUR, 0);
541 			goto error;
542 		}
543 	}
544 
545 	mtu = ip6_dst_mtu_forward(dst);
546 	if (mtu < IPV6_MIN_MTU)
547 		mtu = IPV6_MIN_MTU;
548 
549 	if (ip6_pkt_too_big(skb, mtu)) {
550 		/* Again, force OUTPUT device used as source address */
551 		skb->dev = dst->dev;
552 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
553 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
554 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
555 				IPSTATS_MIB_FRAGFAILS);
556 		kfree_skb(skb);
557 		return -EMSGSIZE;
558 	}
559 
560 	if (skb_cow(skb, dst->dev->hard_header_len)) {
561 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
562 				IPSTATS_MIB_OUTDISCARDS);
563 		goto drop;
564 	}
565 
566 	hdr = ipv6_hdr(skb);
567 
568 	/* Mangling hops number delayed to point after skb COW */
569 
570 	hdr->hop_limit--;
571 
572 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
573 		       net, NULL, skb, skb->dev, dst->dev,
574 		       ip6_forward_finish);
575 
576 error:
577 	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
578 drop:
579 	kfree_skb(skb);
580 	return -EINVAL;
581 }
582 
583 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
584 {
585 	to->pkt_type = from->pkt_type;
586 	to->priority = from->priority;
587 	to->protocol = from->protocol;
588 	skb_dst_drop(to);
589 	skb_dst_set(to, dst_clone(skb_dst(from)));
590 	to->dev = from->dev;
591 	to->mark = from->mark;
592 
593 	skb_copy_hash(to, from);
594 
595 #ifdef CONFIG_NET_SCHED
596 	to->tc_index = from->tc_index;
597 #endif
598 	nf_copy(to, from);
599 	skb_ext_copy(to, from);
600 	skb_copy_secmark(to, from);
601 }
602 
603 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
604 		      u8 nexthdr, __be32 frag_id,
605 		      struct ip6_fraglist_iter *iter)
606 {
607 	unsigned int first_len;
608 	struct frag_hdr *fh;
609 
610 	/* BUILD HEADER */
611 	*prevhdr = NEXTHDR_FRAGMENT;
612 	iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
613 	if (!iter->tmp_hdr)
614 		return -ENOMEM;
615 
616 	iter->frag_list = skb_shinfo(skb)->frag_list;
617 	iter->frag = iter->frag_list;
618 	skb_frag_list_init(skb);
619 
620 	iter->offset = 0;
621 	iter->hlen = hlen;
622 	iter->frag_id = frag_id;
623 	iter->nexthdr = nexthdr;
624 
625 	__skb_pull(skb, hlen);
626 	fh = __skb_push(skb, sizeof(struct frag_hdr));
627 	__skb_push(skb, hlen);
628 	skb_reset_network_header(skb);
629 	memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
630 
631 	fh->nexthdr = nexthdr;
632 	fh->reserved = 0;
633 	fh->frag_off = htons(IP6_MF);
634 	fh->identification = frag_id;
635 
636 	first_len = skb_pagelen(skb);
637 	skb->data_len = first_len - skb_headlen(skb);
638 	skb->len = first_len;
639 	ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
640 
641 	return 0;
642 }
643 EXPORT_SYMBOL(ip6_fraglist_init);
644 
645 void ip6_fraglist_prepare(struct sk_buff *skb,
646 			  struct ip6_fraglist_iter *iter)
647 {
648 	struct sk_buff *frag = iter->frag;
649 	unsigned int hlen = iter->hlen;
650 	struct frag_hdr *fh;
651 
652 	frag->ip_summed = CHECKSUM_NONE;
653 	skb_reset_transport_header(frag);
654 	fh = __skb_push(frag, sizeof(struct frag_hdr));
655 	__skb_push(frag, hlen);
656 	skb_reset_network_header(frag);
657 	memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
658 	iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
659 	fh->nexthdr = iter->nexthdr;
660 	fh->reserved = 0;
661 	fh->frag_off = htons(iter->offset);
662 	if (frag->next)
663 		fh->frag_off |= htons(IP6_MF);
664 	fh->identification = iter->frag_id;
665 	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
666 	ip6_copy_metadata(frag, skb);
667 }
668 EXPORT_SYMBOL(ip6_fraglist_prepare);
669 
670 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
671 		   unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
672 		   u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
673 {
674 	state->prevhdr = prevhdr;
675 	state->nexthdr = nexthdr;
676 	state->frag_id = frag_id;
677 
678 	state->hlen = hlen;
679 	state->mtu = mtu;
680 
681 	state->left = skb->len - hlen;	/* Space per frame */
682 	state->ptr = hlen;		/* Where to start from */
683 
684 	state->hroom = hdr_room;
685 	state->troom = needed_tailroom;
686 
687 	state->offset = 0;
688 }
689 EXPORT_SYMBOL(ip6_frag_init);
690 
691 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
692 {
693 	u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
694 	struct sk_buff *frag;
695 	struct frag_hdr *fh;
696 	unsigned int len;
697 
698 	len = state->left;
699 	/* IF: it doesn't fit, use 'mtu' - the data space left */
700 	if (len > state->mtu)
701 		len = state->mtu;
702 	/* IF: we are not sending up to and including the packet end
703 	   then align the next start on an eight byte boundary */
704 	if (len < state->left)
705 		len &= ~7;
706 
707 	/* Allocate buffer */
708 	frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
709 			 state->hroom + state->troom, GFP_ATOMIC);
710 	if (!frag)
711 		return ERR_PTR(-ENOMEM);
712 
713 	/*
714 	 *	Set up data on packet
715 	 */
716 
717 	ip6_copy_metadata(frag, skb);
718 	skb_reserve(frag, state->hroom);
719 	skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
720 	skb_reset_network_header(frag);
721 	fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
722 	frag->transport_header = (frag->network_header + state->hlen +
723 				  sizeof(struct frag_hdr));
724 
725 	/*
726 	 *	Charge the memory for the fragment to any owner
727 	 *	it might possess
728 	 */
729 	if (skb->sk)
730 		skb_set_owner_w(frag, skb->sk);
731 
732 	/*
733 	 *	Copy the packet header into the new buffer.
734 	 */
735 	skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
736 
737 	fragnexthdr_offset = skb_network_header(frag);
738 	fragnexthdr_offset += prevhdr - skb_network_header(skb);
739 	*fragnexthdr_offset = NEXTHDR_FRAGMENT;
740 
741 	/*
742 	 *	Build fragment header.
743 	 */
744 	fh->nexthdr = state->nexthdr;
745 	fh->reserved = 0;
746 	fh->identification = state->frag_id;
747 
748 	/*
749 	 *	Copy a block of the IP datagram.
750 	 */
751 	BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
752 			     len));
753 	state->left -= len;
754 
755 	fh->frag_off = htons(state->offset);
756 	if (state->left > 0)
757 		fh->frag_off |= htons(IP6_MF);
758 	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
759 
760 	state->ptr += len;
761 	state->offset += len;
762 
763 	return frag;
764 }
765 EXPORT_SYMBOL(ip6_frag_next);
766 
767 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
768 		 int (*output)(struct net *, struct sock *, struct sk_buff *))
769 {
770 	struct sk_buff *frag;
771 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
772 	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
773 				inet6_sk(skb->sk) : NULL;
774 	struct ip6_frag_state state;
775 	unsigned int mtu, hlen, nexthdr_offset;
776 	int hroom, err = 0;
777 	__be32 frag_id;
778 	u8 *prevhdr, nexthdr = 0;
779 
780 	err = ip6_find_1stfragopt(skb, &prevhdr);
781 	if (err < 0)
782 		goto fail;
783 	hlen = err;
784 	nexthdr = *prevhdr;
785 	nexthdr_offset = prevhdr - skb_network_header(skb);
786 
787 	mtu = ip6_skb_dst_mtu(skb);
788 
789 	/* We must not fragment if the socket is set to force MTU discovery
790 	 * or if the skb it not generated by a local socket.
791 	 */
792 	if (unlikely(!skb->ignore_df && skb->len > mtu))
793 		goto fail_toobig;
794 
795 	if (IP6CB(skb)->frag_max_size) {
796 		if (IP6CB(skb)->frag_max_size > mtu)
797 			goto fail_toobig;
798 
799 		/* don't send fragments larger than what we received */
800 		mtu = IP6CB(skb)->frag_max_size;
801 		if (mtu < IPV6_MIN_MTU)
802 			mtu = IPV6_MIN_MTU;
803 	}
804 
805 	if (np && np->frag_size < mtu) {
806 		if (np->frag_size)
807 			mtu = np->frag_size;
808 	}
809 	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
810 		goto fail_toobig;
811 	mtu -= hlen + sizeof(struct frag_hdr);
812 
813 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
814 				    &ipv6_hdr(skb)->saddr);
815 
816 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
817 	    (err = skb_checksum_help(skb)))
818 		goto fail;
819 
820 	prevhdr = skb_network_header(skb) + nexthdr_offset;
821 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
822 	if (skb_has_frag_list(skb)) {
823 		unsigned int first_len = skb_pagelen(skb);
824 		struct ip6_fraglist_iter iter;
825 		struct sk_buff *frag2;
826 
827 		if (first_len - hlen > mtu ||
828 		    ((first_len - hlen) & 7) ||
829 		    skb_cloned(skb) ||
830 		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
831 			goto slow_path;
832 
833 		skb_walk_frags(skb, frag) {
834 			/* Correct geometry. */
835 			if (frag->len > mtu ||
836 			    ((frag->len & 7) && frag->next) ||
837 			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
838 				goto slow_path_clean;
839 
840 			/* Partially cloned skb? */
841 			if (skb_shared(frag))
842 				goto slow_path_clean;
843 
844 			BUG_ON(frag->sk);
845 			if (skb->sk) {
846 				frag->sk = skb->sk;
847 				frag->destructor = sock_wfree;
848 			}
849 			skb->truesize -= frag->truesize;
850 		}
851 
852 		err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
853 					&iter);
854 		if (err < 0)
855 			goto fail;
856 
857 		for (;;) {
858 			/* Prepare header of the next frame,
859 			 * before previous one went down. */
860 			if (iter.frag)
861 				ip6_fraglist_prepare(skb, &iter);
862 
863 			err = output(net, sk, skb);
864 			if (!err)
865 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
866 					      IPSTATS_MIB_FRAGCREATES);
867 
868 			if (err || !iter.frag)
869 				break;
870 
871 			skb = ip6_fraglist_next(&iter);
872 		}
873 
874 		kfree(iter.tmp_hdr);
875 
876 		if (err == 0) {
877 			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
878 				      IPSTATS_MIB_FRAGOKS);
879 			return 0;
880 		}
881 
882 		kfree_skb_list(iter.frag_list);
883 
884 		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
885 			      IPSTATS_MIB_FRAGFAILS);
886 		return err;
887 
888 slow_path_clean:
889 		skb_walk_frags(skb, frag2) {
890 			if (frag2 == frag)
891 				break;
892 			frag2->sk = NULL;
893 			frag2->destructor = NULL;
894 			skb->truesize += frag2->truesize;
895 		}
896 	}
897 
898 slow_path:
899 	/*
900 	 *	Fragment the datagram.
901 	 */
902 
903 	ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
904 		      LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
905 		      &state);
906 
907 	/*
908 	 *	Keep copying data until we run out.
909 	 */
910 
911 	while (state.left > 0) {
912 		frag = ip6_frag_next(skb, &state);
913 		if (IS_ERR(frag)) {
914 			err = PTR_ERR(frag);
915 			goto fail;
916 		}
917 
918 		/*
919 		 *	Put this fragment into the sending queue.
920 		 */
921 		err = output(net, sk, frag);
922 		if (err)
923 			goto fail;
924 
925 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
926 			      IPSTATS_MIB_FRAGCREATES);
927 	}
928 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
929 		      IPSTATS_MIB_FRAGOKS);
930 	consume_skb(skb);
931 	return err;
932 
933 fail_toobig:
934 	if (skb->sk && dst_allfrag(skb_dst(skb)))
935 		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
936 
937 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
938 	err = -EMSGSIZE;
939 
940 fail:
941 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
942 		      IPSTATS_MIB_FRAGFAILS);
943 	kfree_skb(skb);
944 	return err;
945 }
946 
947 static inline int ip6_rt_check(const struct rt6key *rt_key,
948 			       const struct in6_addr *fl_addr,
949 			       const struct in6_addr *addr_cache)
950 {
951 	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
952 		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
953 }
954 
955 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
956 					  struct dst_entry *dst,
957 					  const struct flowi6 *fl6)
958 {
959 	struct ipv6_pinfo *np = inet6_sk(sk);
960 	struct rt6_info *rt;
961 
962 	if (!dst)
963 		goto out;
964 
965 	if (dst->ops->family != AF_INET6) {
966 		dst_release(dst);
967 		return NULL;
968 	}
969 
970 	rt = (struct rt6_info *)dst;
971 	/* Yes, checking route validity in not connected
972 	 * case is not very simple. Take into account,
973 	 * that we do not support routing by source, TOS,
974 	 * and MSG_DONTROUTE		--ANK (980726)
975 	 *
976 	 * 1. ip6_rt_check(): If route was host route,
977 	 *    check that cached destination is current.
978 	 *    If it is network route, we still may
979 	 *    check its validity using saved pointer
980 	 *    to the last used address: daddr_cache.
981 	 *    We do not want to save whole address now,
982 	 *    (because main consumer of this service
983 	 *    is tcp, which has not this problem),
984 	 *    so that the last trick works only on connected
985 	 *    sockets.
986 	 * 2. oif also should be the same.
987 	 */
988 	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
989 #ifdef CONFIG_IPV6_SUBTREES
990 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
991 #endif
992 	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
993 	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
994 		dst_release(dst);
995 		dst = NULL;
996 	}
997 
998 out:
999 	return dst;
1000 }
1001 
1002 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1003 			       struct dst_entry **dst, struct flowi6 *fl6)
1004 {
1005 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1006 	struct neighbour *n;
1007 	struct rt6_info *rt;
1008 #endif
1009 	int err;
1010 	int flags = 0;
1011 
1012 	/* The correct way to handle this would be to do
1013 	 * ip6_route_get_saddr, and then ip6_route_output; however,
1014 	 * the route-specific preferred source forces the
1015 	 * ip6_route_output call _before_ ip6_route_get_saddr.
1016 	 *
1017 	 * In source specific routing (no src=any default route),
1018 	 * ip6_route_output will fail given src=any saddr, though, so
1019 	 * that's why we try it again later.
1020 	 */
1021 	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1022 		struct fib6_info *from;
1023 		struct rt6_info *rt;
1024 		bool had_dst = *dst != NULL;
1025 
1026 		if (!had_dst)
1027 			*dst = ip6_route_output(net, sk, fl6);
1028 		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1029 
1030 		rcu_read_lock();
1031 		from = rt ? rcu_dereference(rt->from) : NULL;
1032 		err = ip6_route_get_saddr(net, from, &fl6->daddr,
1033 					  sk ? inet6_sk(sk)->srcprefs : 0,
1034 					  &fl6->saddr);
1035 		rcu_read_unlock();
1036 
1037 		if (err)
1038 			goto out_err_release;
1039 
1040 		/* If we had an erroneous initial result, pretend it
1041 		 * never existed and let the SA-enabled version take
1042 		 * over.
1043 		 */
1044 		if (!had_dst && (*dst)->error) {
1045 			dst_release(*dst);
1046 			*dst = NULL;
1047 		}
1048 
1049 		if (fl6->flowi6_oif)
1050 			flags |= RT6_LOOKUP_F_IFACE;
1051 	}
1052 
1053 	if (!*dst)
1054 		*dst = ip6_route_output_flags(net, sk, fl6, flags);
1055 
1056 	err = (*dst)->error;
1057 	if (err)
1058 		goto out_err_release;
1059 
1060 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1061 	/*
1062 	 * Here if the dst entry we've looked up
1063 	 * has a neighbour entry that is in the INCOMPLETE
1064 	 * state and the src address from the flow is
1065 	 * marked as OPTIMISTIC, we release the found
1066 	 * dst entry and replace it instead with the
1067 	 * dst entry of the nexthop router
1068 	 */
1069 	rt = (struct rt6_info *) *dst;
1070 	rcu_read_lock_bh();
1071 	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1072 				      rt6_nexthop(rt, &fl6->daddr));
1073 	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1074 	rcu_read_unlock_bh();
1075 
1076 	if (err) {
1077 		struct inet6_ifaddr *ifp;
1078 		struct flowi6 fl_gw6;
1079 		int redirect;
1080 
1081 		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1082 				      (*dst)->dev, 1);
1083 
1084 		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1085 		if (ifp)
1086 			in6_ifa_put(ifp);
1087 
1088 		if (redirect) {
1089 			/*
1090 			 * We need to get the dst entry for the
1091 			 * default router instead
1092 			 */
1093 			dst_release(*dst);
1094 			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1095 			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1096 			*dst = ip6_route_output(net, sk, &fl_gw6);
1097 			err = (*dst)->error;
1098 			if (err)
1099 				goto out_err_release;
1100 		}
1101 	}
1102 #endif
1103 	if (ipv6_addr_v4mapped(&fl6->saddr) &&
1104 	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1105 		err = -EAFNOSUPPORT;
1106 		goto out_err_release;
1107 	}
1108 
1109 	return 0;
1110 
1111 out_err_release:
1112 	dst_release(*dst);
1113 	*dst = NULL;
1114 
1115 	if (err == -ENETUNREACH)
1116 		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1117 	return err;
1118 }
1119 
1120 /**
1121  *	ip6_dst_lookup - perform route lookup on flow
1122  *	@sk: socket which provides route info
1123  *	@dst: pointer to dst_entry * for result
1124  *	@fl6: flow to lookup
1125  *
1126  *	This function performs a route lookup on the given flow.
1127  *
1128  *	It returns zero on success, or a standard errno code on error.
1129  */
1130 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1131 		   struct flowi6 *fl6)
1132 {
1133 	*dst = NULL;
1134 	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1135 }
1136 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1137 
1138 /**
1139  *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1140  *	@sk: socket which provides route info
1141  *	@fl6: flow to lookup
1142  *	@final_dst: final destination address for ipsec lookup
1143  *
1144  *	This function performs a route lookup on the given flow.
1145  *
1146  *	It returns a valid dst pointer on success, or a pointer encoded
1147  *	error code.
1148  */
1149 struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1150 				      const struct in6_addr *final_dst)
1151 {
1152 	struct dst_entry *dst = NULL;
1153 	int err;
1154 
1155 	err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1156 	if (err)
1157 		return ERR_PTR(err);
1158 	if (final_dst)
1159 		fl6->daddr = *final_dst;
1160 
1161 	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1162 }
1163 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1164 
1165 /**
1166  *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1167  *	@sk: socket which provides the dst cache and route info
1168  *	@fl6: flow to lookup
1169  *	@final_dst: final destination address for ipsec lookup
1170  *	@connected: whether @sk is connected or not
1171  *
1172  *	This function performs a route lookup on the given flow with the
1173  *	possibility of using the cached route in the socket if it is valid.
1174  *	It will take the socket dst lock when operating on the dst cache.
1175  *	As a result, this function can only be used in process context.
1176  *
1177  *	In addition, for a connected socket, cache the dst in the socket
1178  *	if the current cache is not valid.
1179  *
1180  *	It returns a valid dst pointer on success, or a pointer encoded
1181  *	error code.
1182  */
1183 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1184 					 const struct in6_addr *final_dst,
1185 					 bool connected)
1186 {
1187 	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1188 
1189 	dst = ip6_sk_dst_check(sk, dst, fl6);
1190 	if (dst)
1191 		return dst;
1192 
1193 	dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1194 	if (connected && !IS_ERR(dst))
1195 		ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1196 
1197 	return dst;
1198 }
1199 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1200 
1201 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1202 					       gfp_t gfp)
1203 {
1204 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1205 }
1206 
1207 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1208 						gfp_t gfp)
1209 {
1210 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1211 }
1212 
1213 static void ip6_append_data_mtu(unsigned int *mtu,
1214 				int *maxfraglen,
1215 				unsigned int fragheaderlen,
1216 				struct sk_buff *skb,
1217 				struct rt6_info *rt,
1218 				unsigned int orig_mtu)
1219 {
1220 	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1221 		if (!skb) {
1222 			/* first fragment, reserve header_len */
1223 			*mtu = orig_mtu - rt->dst.header_len;
1224 
1225 		} else {
1226 			/*
1227 			 * this fragment is not first, the headers
1228 			 * space is regarded as data space.
1229 			 */
1230 			*mtu = orig_mtu;
1231 		}
1232 		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1233 			      + fragheaderlen - sizeof(struct frag_hdr);
1234 	}
1235 }
1236 
1237 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1238 			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1239 			  struct rt6_info *rt, struct flowi6 *fl6)
1240 {
1241 	struct ipv6_pinfo *np = inet6_sk(sk);
1242 	unsigned int mtu;
1243 	struct ipv6_txoptions *opt = ipc6->opt;
1244 
1245 	/*
1246 	 * setup for corking
1247 	 */
1248 	if (opt) {
1249 		if (WARN_ON(v6_cork->opt))
1250 			return -EINVAL;
1251 
1252 		v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1253 		if (unlikely(!v6_cork->opt))
1254 			return -ENOBUFS;
1255 
1256 		v6_cork->opt->tot_len = sizeof(*opt);
1257 		v6_cork->opt->opt_flen = opt->opt_flen;
1258 		v6_cork->opt->opt_nflen = opt->opt_nflen;
1259 
1260 		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1261 						    sk->sk_allocation);
1262 		if (opt->dst0opt && !v6_cork->opt->dst0opt)
1263 			return -ENOBUFS;
1264 
1265 		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1266 						    sk->sk_allocation);
1267 		if (opt->dst1opt && !v6_cork->opt->dst1opt)
1268 			return -ENOBUFS;
1269 
1270 		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1271 						   sk->sk_allocation);
1272 		if (opt->hopopt && !v6_cork->opt->hopopt)
1273 			return -ENOBUFS;
1274 
1275 		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1276 						    sk->sk_allocation);
1277 		if (opt->srcrt && !v6_cork->opt->srcrt)
1278 			return -ENOBUFS;
1279 
1280 		/* need source address above miyazawa*/
1281 	}
1282 	dst_hold(&rt->dst);
1283 	cork->base.dst = &rt->dst;
1284 	cork->fl.u.ip6 = *fl6;
1285 	v6_cork->hop_limit = ipc6->hlimit;
1286 	v6_cork->tclass = ipc6->tclass;
1287 	if (rt->dst.flags & DST_XFRM_TUNNEL)
1288 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1289 		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1290 	else
1291 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1292 			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1293 	if (np->frag_size < mtu) {
1294 		if (np->frag_size)
1295 			mtu = np->frag_size;
1296 	}
1297 	if (mtu < IPV6_MIN_MTU)
1298 		return -EINVAL;
1299 	cork->base.fragsize = mtu;
1300 	cork->base.gso_size = ipc6->gso_size;
1301 	cork->base.tx_flags = 0;
1302 	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1303 
1304 	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1305 		cork->base.flags |= IPCORK_ALLFRAG;
1306 	cork->base.length = 0;
1307 
1308 	cork->base.transmit_time = ipc6->sockc.transmit_time;
1309 
1310 	return 0;
1311 }
1312 
1313 static int __ip6_append_data(struct sock *sk,
1314 			     struct flowi6 *fl6,
1315 			     struct sk_buff_head *queue,
1316 			     struct inet_cork *cork,
1317 			     struct inet6_cork *v6_cork,
1318 			     struct page_frag *pfrag,
1319 			     int getfrag(void *from, char *to, int offset,
1320 					 int len, int odd, struct sk_buff *skb),
1321 			     void *from, int length, int transhdrlen,
1322 			     unsigned int flags, struct ipcm6_cookie *ipc6)
1323 {
1324 	struct sk_buff *skb, *skb_prev = NULL;
1325 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1326 	struct ubuf_info *uarg = NULL;
1327 	int exthdrlen = 0;
1328 	int dst_exthdrlen = 0;
1329 	int hh_len;
1330 	int copy;
1331 	int err;
1332 	int offset = 0;
1333 	u32 tskey = 0;
1334 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
1335 	struct ipv6_txoptions *opt = v6_cork->opt;
1336 	int csummode = CHECKSUM_NONE;
1337 	unsigned int maxnonfragsize, headersize;
1338 	unsigned int wmem_alloc_delta = 0;
1339 	bool paged, extra_uref = false;
1340 
1341 	skb = skb_peek_tail(queue);
1342 	if (!skb) {
1343 		exthdrlen = opt ? opt->opt_flen : 0;
1344 		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1345 	}
1346 
1347 	paged = !!cork->gso_size;
1348 	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1349 	orig_mtu = mtu;
1350 
1351 	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1352 	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1353 		tskey = sk->sk_tskey++;
1354 
1355 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1356 
1357 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1358 			(opt ? opt->opt_nflen : 0);
1359 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1360 		     sizeof(struct frag_hdr);
1361 
1362 	headersize = sizeof(struct ipv6hdr) +
1363 		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1364 		     (dst_allfrag(&rt->dst) ?
1365 		      sizeof(struct frag_hdr) : 0) +
1366 		     rt->rt6i_nfheader_len;
1367 
1368 	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1369 	 * the first fragment
1370 	 */
1371 	if (headersize + transhdrlen > mtu)
1372 		goto emsgsize;
1373 
1374 	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1375 	    (sk->sk_protocol == IPPROTO_UDP ||
1376 	     sk->sk_protocol == IPPROTO_RAW)) {
1377 		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1378 				sizeof(struct ipv6hdr));
1379 		goto emsgsize;
1380 	}
1381 
1382 	if (ip6_sk_ignore_df(sk))
1383 		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1384 	else
1385 		maxnonfragsize = mtu;
1386 
1387 	if (cork->length + length > maxnonfragsize - headersize) {
1388 emsgsize:
1389 		pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1390 		ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1391 		return -EMSGSIZE;
1392 	}
1393 
1394 	/* CHECKSUM_PARTIAL only with no extension headers and when
1395 	 * we are not going to fragment
1396 	 */
1397 	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1398 	    headersize == sizeof(struct ipv6hdr) &&
1399 	    length <= mtu - headersize &&
1400 	    (!(flags & MSG_MORE) || cork->gso_size) &&
1401 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1402 		csummode = CHECKSUM_PARTIAL;
1403 
1404 	if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1405 		uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1406 		if (!uarg)
1407 			return -ENOBUFS;
1408 		extra_uref = !skb;	/* only extra ref if !MSG_MORE */
1409 		if (rt->dst.dev->features & NETIF_F_SG &&
1410 		    csummode == CHECKSUM_PARTIAL) {
1411 			paged = true;
1412 		} else {
1413 			uarg->zerocopy = 0;
1414 			skb_zcopy_set(skb, uarg, &extra_uref);
1415 		}
1416 	}
1417 
1418 	/*
1419 	 * Let's try using as much space as possible.
1420 	 * Use MTU if total length of the message fits into the MTU.
1421 	 * Otherwise, we need to reserve fragment header and
1422 	 * fragment alignment (= 8-15 octects, in total).
1423 	 *
1424 	 * Note that we may need to "move" the data from the tail of
1425 	 * of the buffer to the new fragment when we split
1426 	 * the message.
1427 	 *
1428 	 * FIXME: It may be fragmented into multiple chunks
1429 	 *        at once if non-fragmentable extension headers
1430 	 *        are too large.
1431 	 * --yoshfuji
1432 	 */
1433 
1434 	cork->length += length;
1435 	if (!skb)
1436 		goto alloc_new_skb;
1437 
1438 	while (length > 0) {
1439 		/* Check if the remaining data fits into current packet. */
1440 		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1441 		if (copy < length)
1442 			copy = maxfraglen - skb->len;
1443 
1444 		if (copy <= 0) {
1445 			char *data;
1446 			unsigned int datalen;
1447 			unsigned int fraglen;
1448 			unsigned int fraggap;
1449 			unsigned int alloclen;
1450 			unsigned int pagedlen;
1451 alloc_new_skb:
1452 			/* There's no room in the current skb */
1453 			if (skb)
1454 				fraggap = skb->len - maxfraglen;
1455 			else
1456 				fraggap = 0;
1457 			/* update mtu and maxfraglen if necessary */
1458 			if (!skb || !skb_prev)
1459 				ip6_append_data_mtu(&mtu, &maxfraglen,
1460 						    fragheaderlen, skb, rt,
1461 						    orig_mtu);
1462 
1463 			skb_prev = skb;
1464 
1465 			/*
1466 			 * If remaining data exceeds the mtu,
1467 			 * we know we need more fragment(s).
1468 			 */
1469 			datalen = length + fraggap;
1470 
1471 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1472 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1473 			fraglen = datalen + fragheaderlen;
1474 			pagedlen = 0;
1475 
1476 			if ((flags & MSG_MORE) &&
1477 			    !(rt->dst.dev->features&NETIF_F_SG))
1478 				alloclen = mtu;
1479 			else if (!paged)
1480 				alloclen = fraglen;
1481 			else {
1482 				alloclen = min_t(int, fraglen, MAX_HEADER);
1483 				pagedlen = fraglen - alloclen;
1484 			}
1485 
1486 			alloclen += dst_exthdrlen;
1487 
1488 			if (datalen != length + fraggap) {
1489 				/*
1490 				 * this is not the last fragment, the trailer
1491 				 * space is regarded as data space.
1492 				 */
1493 				datalen += rt->dst.trailer_len;
1494 			}
1495 
1496 			alloclen += rt->dst.trailer_len;
1497 			fraglen = datalen + fragheaderlen;
1498 
1499 			/*
1500 			 * We just reserve space for fragment header.
1501 			 * Note: this may be overallocation if the message
1502 			 * (without MSG_MORE) fits into the MTU.
1503 			 */
1504 			alloclen += sizeof(struct frag_hdr);
1505 
1506 			copy = datalen - transhdrlen - fraggap - pagedlen;
1507 			if (copy < 0) {
1508 				err = -EINVAL;
1509 				goto error;
1510 			}
1511 			if (transhdrlen) {
1512 				skb = sock_alloc_send_skb(sk,
1513 						alloclen + hh_len,
1514 						(flags & MSG_DONTWAIT), &err);
1515 			} else {
1516 				skb = NULL;
1517 				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1518 				    2 * sk->sk_sndbuf)
1519 					skb = alloc_skb(alloclen + hh_len,
1520 							sk->sk_allocation);
1521 				if (unlikely(!skb))
1522 					err = -ENOBUFS;
1523 			}
1524 			if (!skb)
1525 				goto error;
1526 			/*
1527 			 *	Fill in the control structures
1528 			 */
1529 			skb->protocol = htons(ETH_P_IPV6);
1530 			skb->ip_summed = csummode;
1531 			skb->csum = 0;
1532 			/* reserve for fragmentation and ipsec header */
1533 			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1534 				    dst_exthdrlen);
1535 
1536 			/*
1537 			 *	Find where to start putting bytes
1538 			 */
1539 			data = skb_put(skb, fraglen - pagedlen);
1540 			skb_set_network_header(skb, exthdrlen);
1541 			data += fragheaderlen;
1542 			skb->transport_header = (skb->network_header +
1543 						 fragheaderlen);
1544 			if (fraggap) {
1545 				skb->csum = skb_copy_and_csum_bits(
1546 					skb_prev, maxfraglen,
1547 					data + transhdrlen, fraggap, 0);
1548 				skb_prev->csum = csum_sub(skb_prev->csum,
1549 							  skb->csum);
1550 				data += fraggap;
1551 				pskb_trim_unique(skb_prev, maxfraglen);
1552 			}
1553 			if (copy > 0 &&
1554 			    getfrag(from, data + transhdrlen, offset,
1555 				    copy, fraggap, skb) < 0) {
1556 				err = -EFAULT;
1557 				kfree_skb(skb);
1558 				goto error;
1559 			}
1560 
1561 			offset += copy;
1562 			length -= copy + transhdrlen;
1563 			transhdrlen = 0;
1564 			exthdrlen = 0;
1565 			dst_exthdrlen = 0;
1566 
1567 			/* Only the initial fragment is time stamped */
1568 			skb_shinfo(skb)->tx_flags = cork->tx_flags;
1569 			cork->tx_flags = 0;
1570 			skb_shinfo(skb)->tskey = tskey;
1571 			tskey = 0;
1572 			skb_zcopy_set(skb, uarg, &extra_uref);
1573 
1574 			if ((flags & MSG_CONFIRM) && !skb_prev)
1575 				skb_set_dst_pending_confirm(skb, 1);
1576 
1577 			/*
1578 			 * Put the packet on the pending queue
1579 			 */
1580 			if (!skb->destructor) {
1581 				skb->destructor = sock_wfree;
1582 				skb->sk = sk;
1583 				wmem_alloc_delta += skb->truesize;
1584 			}
1585 			__skb_queue_tail(queue, skb);
1586 			continue;
1587 		}
1588 
1589 		if (copy > length)
1590 			copy = length;
1591 
1592 		if (!(rt->dst.dev->features&NETIF_F_SG) &&
1593 		    skb_tailroom(skb) >= copy) {
1594 			unsigned int off;
1595 
1596 			off = skb->len;
1597 			if (getfrag(from, skb_put(skb, copy),
1598 						offset, copy, off, skb) < 0) {
1599 				__skb_trim(skb, off);
1600 				err = -EFAULT;
1601 				goto error;
1602 			}
1603 		} else if (!uarg || !uarg->zerocopy) {
1604 			int i = skb_shinfo(skb)->nr_frags;
1605 
1606 			err = -ENOMEM;
1607 			if (!sk_page_frag_refill(sk, pfrag))
1608 				goto error;
1609 
1610 			if (!skb_can_coalesce(skb, i, pfrag->page,
1611 					      pfrag->offset)) {
1612 				err = -EMSGSIZE;
1613 				if (i == MAX_SKB_FRAGS)
1614 					goto error;
1615 
1616 				__skb_fill_page_desc(skb, i, pfrag->page,
1617 						     pfrag->offset, 0);
1618 				skb_shinfo(skb)->nr_frags = ++i;
1619 				get_page(pfrag->page);
1620 			}
1621 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1622 			if (getfrag(from,
1623 				    page_address(pfrag->page) + pfrag->offset,
1624 				    offset, copy, skb->len, skb) < 0)
1625 				goto error_efault;
1626 
1627 			pfrag->offset += copy;
1628 			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1629 			skb->len += copy;
1630 			skb->data_len += copy;
1631 			skb->truesize += copy;
1632 			wmem_alloc_delta += copy;
1633 		} else {
1634 			err = skb_zerocopy_iter_dgram(skb, from, copy);
1635 			if (err < 0)
1636 				goto error;
1637 		}
1638 		offset += copy;
1639 		length -= copy;
1640 	}
1641 
1642 	if (wmem_alloc_delta)
1643 		refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1644 	return 0;
1645 
1646 error_efault:
1647 	err = -EFAULT;
1648 error:
1649 	if (uarg)
1650 		sock_zerocopy_put_abort(uarg, extra_uref);
1651 	cork->length -= length;
1652 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1653 	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1654 	return err;
1655 }
1656 
1657 int ip6_append_data(struct sock *sk,
1658 		    int getfrag(void *from, char *to, int offset, int len,
1659 				int odd, struct sk_buff *skb),
1660 		    void *from, int length, int transhdrlen,
1661 		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1662 		    struct rt6_info *rt, unsigned int flags)
1663 {
1664 	struct inet_sock *inet = inet_sk(sk);
1665 	struct ipv6_pinfo *np = inet6_sk(sk);
1666 	int exthdrlen;
1667 	int err;
1668 
1669 	if (flags&MSG_PROBE)
1670 		return 0;
1671 	if (skb_queue_empty(&sk->sk_write_queue)) {
1672 		/*
1673 		 * setup for corking
1674 		 */
1675 		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1676 				     ipc6, rt, fl6);
1677 		if (err)
1678 			return err;
1679 
1680 		exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1681 		length += exthdrlen;
1682 		transhdrlen += exthdrlen;
1683 	} else {
1684 		fl6 = &inet->cork.fl.u.ip6;
1685 		transhdrlen = 0;
1686 	}
1687 
1688 	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1689 				 &np->cork, sk_page_frag(sk), getfrag,
1690 				 from, length, transhdrlen, flags, ipc6);
1691 }
1692 EXPORT_SYMBOL_GPL(ip6_append_data);
1693 
1694 static void ip6_cork_release(struct inet_cork_full *cork,
1695 			     struct inet6_cork *v6_cork)
1696 {
1697 	if (v6_cork->opt) {
1698 		kfree(v6_cork->opt->dst0opt);
1699 		kfree(v6_cork->opt->dst1opt);
1700 		kfree(v6_cork->opt->hopopt);
1701 		kfree(v6_cork->opt->srcrt);
1702 		kfree(v6_cork->opt);
1703 		v6_cork->opt = NULL;
1704 	}
1705 
1706 	if (cork->base.dst) {
1707 		dst_release(cork->base.dst);
1708 		cork->base.dst = NULL;
1709 		cork->base.flags &= ~IPCORK_ALLFRAG;
1710 	}
1711 	memset(&cork->fl, 0, sizeof(cork->fl));
1712 }
1713 
1714 struct sk_buff *__ip6_make_skb(struct sock *sk,
1715 			       struct sk_buff_head *queue,
1716 			       struct inet_cork_full *cork,
1717 			       struct inet6_cork *v6_cork)
1718 {
1719 	struct sk_buff *skb, *tmp_skb;
1720 	struct sk_buff **tail_skb;
1721 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1722 	struct ipv6_pinfo *np = inet6_sk(sk);
1723 	struct net *net = sock_net(sk);
1724 	struct ipv6hdr *hdr;
1725 	struct ipv6_txoptions *opt = v6_cork->opt;
1726 	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1727 	struct flowi6 *fl6 = &cork->fl.u.ip6;
1728 	unsigned char proto = fl6->flowi6_proto;
1729 
1730 	skb = __skb_dequeue(queue);
1731 	if (!skb)
1732 		goto out;
1733 	tail_skb = &(skb_shinfo(skb)->frag_list);
1734 
1735 	/* move skb->data to ip header from ext header */
1736 	if (skb->data < skb_network_header(skb))
1737 		__skb_pull(skb, skb_network_offset(skb));
1738 	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1739 		__skb_pull(tmp_skb, skb_network_header_len(skb));
1740 		*tail_skb = tmp_skb;
1741 		tail_skb = &(tmp_skb->next);
1742 		skb->len += tmp_skb->len;
1743 		skb->data_len += tmp_skb->len;
1744 		skb->truesize += tmp_skb->truesize;
1745 		tmp_skb->destructor = NULL;
1746 		tmp_skb->sk = NULL;
1747 	}
1748 
1749 	/* Allow local fragmentation. */
1750 	skb->ignore_df = ip6_sk_ignore_df(sk);
1751 
1752 	*final_dst = fl6->daddr;
1753 	__skb_pull(skb, skb_network_header_len(skb));
1754 	if (opt && opt->opt_flen)
1755 		ipv6_push_frag_opts(skb, opt, &proto);
1756 	if (opt && opt->opt_nflen)
1757 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1758 
1759 	skb_push(skb, sizeof(struct ipv6hdr));
1760 	skb_reset_network_header(skb);
1761 	hdr = ipv6_hdr(skb);
1762 
1763 	ip6_flow_hdr(hdr, v6_cork->tclass,
1764 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1765 					ip6_autoflowlabel(net, np), fl6));
1766 	hdr->hop_limit = v6_cork->hop_limit;
1767 	hdr->nexthdr = proto;
1768 	hdr->saddr = fl6->saddr;
1769 	hdr->daddr = *final_dst;
1770 
1771 	skb->priority = sk->sk_priority;
1772 	skb->mark = sk->sk_mark;
1773 
1774 	skb->tstamp = cork->base.transmit_time;
1775 
1776 	skb_dst_set(skb, dst_clone(&rt->dst));
1777 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1778 	if (proto == IPPROTO_ICMPV6) {
1779 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1780 
1781 		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1782 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1783 	}
1784 
1785 	ip6_cork_release(cork, v6_cork);
1786 out:
1787 	return skb;
1788 }
1789 
1790 int ip6_send_skb(struct sk_buff *skb)
1791 {
1792 	struct net *net = sock_net(skb->sk);
1793 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1794 	int err;
1795 
1796 	err = ip6_local_out(net, skb->sk, skb);
1797 	if (err) {
1798 		if (err > 0)
1799 			err = net_xmit_errno(err);
1800 		if (err)
1801 			IP6_INC_STATS(net, rt->rt6i_idev,
1802 				      IPSTATS_MIB_OUTDISCARDS);
1803 	}
1804 
1805 	return err;
1806 }
1807 
1808 int ip6_push_pending_frames(struct sock *sk)
1809 {
1810 	struct sk_buff *skb;
1811 
1812 	skb = ip6_finish_skb(sk);
1813 	if (!skb)
1814 		return 0;
1815 
1816 	return ip6_send_skb(skb);
1817 }
1818 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1819 
1820 static void __ip6_flush_pending_frames(struct sock *sk,
1821 				       struct sk_buff_head *queue,
1822 				       struct inet_cork_full *cork,
1823 				       struct inet6_cork *v6_cork)
1824 {
1825 	struct sk_buff *skb;
1826 
1827 	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1828 		if (skb_dst(skb))
1829 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1830 				      IPSTATS_MIB_OUTDISCARDS);
1831 		kfree_skb(skb);
1832 	}
1833 
1834 	ip6_cork_release(cork, v6_cork);
1835 }
1836 
1837 void ip6_flush_pending_frames(struct sock *sk)
1838 {
1839 	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1840 				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1841 }
1842 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1843 
1844 struct sk_buff *ip6_make_skb(struct sock *sk,
1845 			     int getfrag(void *from, char *to, int offset,
1846 					 int len, int odd, struct sk_buff *skb),
1847 			     void *from, int length, int transhdrlen,
1848 			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1849 			     struct rt6_info *rt, unsigned int flags,
1850 			     struct inet_cork_full *cork)
1851 {
1852 	struct inet6_cork v6_cork;
1853 	struct sk_buff_head queue;
1854 	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1855 	int err;
1856 
1857 	if (flags & MSG_PROBE)
1858 		return NULL;
1859 
1860 	__skb_queue_head_init(&queue);
1861 
1862 	cork->base.flags = 0;
1863 	cork->base.addr = 0;
1864 	cork->base.opt = NULL;
1865 	cork->base.dst = NULL;
1866 	v6_cork.opt = NULL;
1867 	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1868 	if (err) {
1869 		ip6_cork_release(cork, &v6_cork);
1870 		return ERR_PTR(err);
1871 	}
1872 	if (ipc6->dontfrag < 0)
1873 		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1874 
1875 	err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1876 				&current->task_frag, getfrag, from,
1877 				length + exthdrlen, transhdrlen + exthdrlen,
1878 				flags, ipc6);
1879 	if (err) {
1880 		__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1881 		return ERR_PTR(err);
1882 	}
1883 
1884 	return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1885 }
1886