xref: /linux/net/ipv6/ip6_output.c (revision b8265621f4888af9494e1d685620871ec81bc33d)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	IPv6 output functions
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on linux/net/ipv4/ip_output.c
10  *
11  *	Changes:
12  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
13  *				extension headers are implemented.
14  *				route changes now work.
15  *				ip6_forward does not confuse sniffers.
16  *				etc.
17  *
18  *      H. von Brand    :       Added missing #include <linux/string.h>
19  *	Imran Patel	:	frag id should be in NBO
20  *      Kazunori MIYAZAWA @USAGI
21  *			:       add ip6_append_data and related functions
22  *				for datagram xmit
23  */
24 
25 #include <linux/errno.h>
26 #include <linux/kernel.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/net.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/in6.h>
33 #include <linux/tcp.h>
34 #include <linux/route.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
37 
38 #include <linux/bpf-cgroup.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
41 
42 #include <net/sock.h>
43 #include <net/snmp.h>
44 
45 #include <net/ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
51 #include <net/icmp.h>
52 #include <net/xfrm.h>
53 #include <net/checksum.h>
54 #include <linux/mroute6.h>
55 #include <net/l3mdev.h>
56 #include <net/lwtunnel.h>
57 #include <net/ip_tunnels.h>
58 
59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60 {
61 	struct dst_entry *dst = skb_dst(skb);
62 	struct net_device *dev = dst->dev;
63 	const struct in6_addr *nexthop;
64 	struct neighbour *neigh;
65 	int ret;
66 
67 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
68 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
69 
70 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
71 		    ((mroute6_is_socket(net, skb) &&
72 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
73 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
74 					 &ipv6_hdr(skb)->saddr))) {
75 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
76 
77 			/* Do not check for IFF_ALLMULTI; multicast routing
78 			   is not supported in any case.
79 			 */
80 			if (newskb)
81 				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
82 					net, sk, newskb, NULL, newskb->dev,
83 					dev_loopback_xmit);
84 
85 			if (ipv6_hdr(skb)->hop_limit == 0) {
86 				IP6_INC_STATS(net, idev,
87 					      IPSTATS_MIB_OUTDISCARDS);
88 				kfree_skb(skb);
89 				return 0;
90 			}
91 		}
92 
93 		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
94 
95 		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
96 		    IPV6_ADDR_SCOPE_NODELOCAL &&
97 		    !(dev->flags & IFF_LOOPBACK)) {
98 			kfree_skb(skb);
99 			return 0;
100 		}
101 	}
102 
103 	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
104 		int res = lwtunnel_xmit(skb);
105 
106 		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
107 			return res;
108 	}
109 
110 	rcu_read_lock_bh();
111 	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
112 	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
113 	if (unlikely(!neigh))
114 		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
115 	if (!IS_ERR(neigh)) {
116 		sock_confirm_neigh(skb, neigh);
117 		ret = neigh_output(neigh, skb, false);
118 		rcu_read_unlock_bh();
119 		return ret;
120 	}
121 	rcu_read_unlock_bh();
122 
123 	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
124 	kfree_skb(skb);
125 	return -EINVAL;
126 }
127 
128 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
129 {
130 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
131 	/* Policy lookup after SNAT yielded a new policy */
132 	if (skb_dst(skb)->xfrm) {
133 		IPCB(skb)->flags |= IPSKB_REROUTED;
134 		return dst_output(net, sk, skb);
135 	}
136 #endif
137 
138 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
139 	    dst_allfrag(skb_dst(skb)) ||
140 	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
141 		return ip6_fragment(net, sk, skb, ip6_finish_output2);
142 	else
143 		return ip6_finish_output2(net, sk, skb);
144 }
145 
146 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
147 {
148 	int ret;
149 
150 	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
151 	switch (ret) {
152 	case NET_XMIT_SUCCESS:
153 		return __ip6_finish_output(net, sk, skb);
154 	case NET_XMIT_CN:
155 		return __ip6_finish_output(net, sk, skb) ? : ret;
156 	default:
157 		kfree_skb(skb);
158 		return ret;
159 	}
160 }
161 
162 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
163 {
164 	struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
165 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
166 
167 	skb->protocol = htons(ETH_P_IPV6);
168 	skb->dev = dev;
169 
170 	if (unlikely(idev->cnf.disable_ipv6)) {
171 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
172 		kfree_skb(skb);
173 		return 0;
174 	}
175 
176 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
177 			    net, sk, skb, indev, dev,
178 			    ip6_finish_output,
179 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
180 }
181 
182 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
183 {
184 	if (!np->autoflowlabel_set)
185 		return ip6_default_np_autolabel(net);
186 	else
187 		return np->autoflowlabel;
188 }
189 
190 /*
191  * xmit an sk_buff (used by TCP, SCTP and DCCP)
192  * Note : socket lock is not held for SYNACK packets, but might be modified
193  * by calls to skb_set_owner_w() and ipv6_local_error(),
194  * which are using proper atomic operations or spinlocks.
195  */
196 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
197 	     __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
198 {
199 	struct net *net = sock_net(sk);
200 	const struct ipv6_pinfo *np = inet6_sk(sk);
201 	struct in6_addr *first_hop = &fl6->daddr;
202 	struct dst_entry *dst = skb_dst(skb);
203 	unsigned int head_room;
204 	struct ipv6hdr *hdr;
205 	u8  proto = fl6->flowi6_proto;
206 	int seg_len = skb->len;
207 	int hlimit = -1;
208 	u32 mtu;
209 
210 	head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
211 	if (opt)
212 		head_room += opt->opt_nflen + opt->opt_flen;
213 
214 	if (unlikely(skb_headroom(skb) < head_room)) {
215 		struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
216 		if (!skb2) {
217 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
218 				      IPSTATS_MIB_OUTDISCARDS);
219 			kfree_skb(skb);
220 			return -ENOBUFS;
221 		}
222 		if (skb->sk)
223 			skb_set_owner_w(skb2, skb->sk);
224 		consume_skb(skb);
225 		skb = skb2;
226 	}
227 
228 	if (opt) {
229 		seg_len += opt->opt_nflen + opt->opt_flen;
230 
231 		if (opt->opt_flen)
232 			ipv6_push_frag_opts(skb, opt, &proto);
233 
234 		if (opt->opt_nflen)
235 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
236 					     &fl6->saddr);
237 	}
238 
239 	skb_push(skb, sizeof(struct ipv6hdr));
240 	skb_reset_network_header(skb);
241 	hdr = ipv6_hdr(skb);
242 
243 	/*
244 	 *	Fill in the IPv6 header
245 	 */
246 	if (np)
247 		hlimit = np->hop_limit;
248 	if (hlimit < 0)
249 		hlimit = ip6_dst_hoplimit(dst);
250 
251 	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
252 				ip6_autoflowlabel(net, np), fl6));
253 
254 	hdr->payload_len = htons(seg_len);
255 	hdr->nexthdr = proto;
256 	hdr->hop_limit = hlimit;
257 
258 	hdr->saddr = fl6->saddr;
259 	hdr->daddr = *first_hop;
260 
261 	skb->protocol = htons(ETH_P_IPV6);
262 	skb->priority = priority;
263 	skb->mark = mark;
264 
265 	mtu = dst_mtu(dst);
266 	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
267 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
268 			      IPSTATS_MIB_OUT, skb->len);
269 
270 		/* if egress device is enslaved to an L3 master device pass the
271 		 * skb to its handler for processing
272 		 */
273 		skb = l3mdev_ip6_out((struct sock *)sk, skb);
274 		if (unlikely(!skb))
275 			return 0;
276 
277 		/* hooks should never assume socket lock is held.
278 		 * we promote our socket to non const
279 		 */
280 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
281 			       net, (struct sock *)sk, skb, NULL, dst->dev,
282 			       dst_output);
283 	}
284 
285 	skb->dev = dst->dev;
286 	/* ipv6_local_error() does not require socket lock,
287 	 * we promote our socket to non const
288 	 */
289 	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
290 
291 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
292 	kfree_skb(skb);
293 	return -EMSGSIZE;
294 }
295 EXPORT_SYMBOL(ip6_xmit);
296 
297 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
298 {
299 	struct ip6_ra_chain *ra;
300 	struct sock *last = NULL;
301 
302 	read_lock(&ip6_ra_lock);
303 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
304 		struct sock *sk = ra->sk;
305 		if (sk && ra->sel == sel &&
306 		    (!sk->sk_bound_dev_if ||
307 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
308 			struct ipv6_pinfo *np = inet6_sk(sk);
309 
310 			if (np && np->rtalert_isolate &&
311 			    !net_eq(sock_net(sk), dev_net(skb->dev))) {
312 				continue;
313 			}
314 			if (last) {
315 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
316 				if (skb2)
317 					rawv6_rcv(last, skb2);
318 			}
319 			last = sk;
320 		}
321 	}
322 
323 	if (last) {
324 		rawv6_rcv(last, skb);
325 		read_unlock(&ip6_ra_lock);
326 		return 1;
327 	}
328 	read_unlock(&ip6_ra_lock);
329 	return 0;
330 }
331 
332 static int ip6_forward_proxy_check(struct sk_buff *skb)
333 {
334 	struct ipv6hdr *hdr = ipv6_hdr(skb);
335 	u8 nexthdr = hdr->nexthdr;
336 	__be16 frag_off;
337 	int offset;
338 
339 	if (ipv6_ext_hdr(nexthdr)) {
340 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
341 		if (offset < 0)
342 			return 0;
343 	} else
344 		offset = sizeof(struct ipv6hdr);
345 
346 	if (nexthdr == IPPROTO_ICMPV6) {
347 		struct icmp6hdr *icmp6;
348 
349 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
350 					 offset + 1 - skb->data)))
351 			return 0;
352 
353 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
354 
355 		switch (icmp6->icmp6_type) {
356 		case NDISC_ROUTER_SOLICITATION:
357 		case NDISC_ROUTER_ADVERTISEMENT:
358 		case NDISC_NEIGHBOUR_SOLICITATION:
359 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
360 		case NDISC_REDIRECT:
361 			/* For reaction involving unicast neighbor discovery
362 			 * message destined to the proxied address, pass it to
363 			 * input function.
364 			 */
365 			return 1;
366 		default:
367 			break;
368 		}
369 	}
370 
371 	/*
372 	 * The proxying router can't forward traffic sent to a link-local
373 	 * address, so signal the sender and discard the packet. This
374 	 * behavior is clarified by the MIPv6 specification.
375 	 */
376 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
377 		dst_link_failure(skb);
378 		return -1;
379 	}
380 
381 	return 0;
382 }
383 
384 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
385 				     struct sk_buff *skb)
386 {
387 	struct dst_entry *dst = skb_dst(skb);
388 
389 	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
390 	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
391 
392 #ifdef CONFIG_NET_SWITCHDEV
393 	if (skb->offload_l3_fwd_mark) {
394 		consume_skb(skb);
395 		return 0;
396 	}
397 #endif
398 
399 	skb->tstamp = 0;
400 	return dst_output(net, sk, skb);
401 }
402 
403 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
404 {
405 	if (skb->len <= mtu)
406 		return false;
407 
408 	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
409 	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
410 		return true;
411 
412 	if (skb->ignore_df)
413 		return false;
414 
415 	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
416 		return false;
417 
418 	return true;
419 }
420 
421 int ip6_forward(struct sk_buff *skb)
422 {
423 	struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
424 	struct dst_entry *dst = skb_dst(skb);
425 	struct ipv6hdr *hdr = ipv6_hdr(skb);
426 	struct inet6_skb_parm *opt = IP6CB(skb);
427 	struct net *net = dev_net(dst->dev);
428 	u32 mtu;
429 
430 	if (net->ipv6.devconf_all->forwarding == 0)
431 		goto error;
432 
433 	if (skb->pkt_type != PACKET_HOST)
434 		goto drop;
435 
436 	if (unlikely(skb->sk))
437 		goto drop;
438 
439 	if (skb_warn_if_lro(skb))
440 		goto drop;
441 
442 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
443 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
444 		goto drop;
445 	}
446 
447 	skb_forward_csum(skb);
448 
449 	/*
450 	 *	We DO NOT make any processing on
451 	 *	RA packets, pushing them to user level AS IS
452 	 *	without ane WARRANTY that application will be able
453 	 *	to interpret them. The reason is that we
454 	 *	cannot make anything clever here.
455 	 *
456 	 *	We are not end-node, so that if packet contains
457 	 *	AH/ESP, we cannot make anything.
458 	 *	Defragmentation also would be mistake, RA packets
459 	 *	cannot be fragmented, because there is no warranty
460 	 *	that different fragments will go along one path. --ANK
461 	 */
462 	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
463 		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
464 			return 0;
465 	}
466 
467 	/*
468 	 *	check and decrement ttl
469 	 */
470 	if (hdr->hop_limit <= 1) {
471 		/* Force OUTPUT device used as source address */
472 		skb->dev = dst->dev;
473 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
474 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
475 
476 		kfree_skb(skb);
477 		return -ETIMEDOUT;
478 	}
479 
480 	/* XXX: idev->cnf.proxy_ndp? */
481 	if (net->ipv6.devconf_all->proxy_ndp &&
482 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
483 		int proxied = ip6_forward_proxy_check(skb);
484 		if (proxied > 0)
485 			return ip6_input(skb);
486 		else if (proxied < 0) {
487 			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
488 			goto drop;
489 		}
490 	}
491 
492 	if (!xfrm6_route_forward(skb)) {
493 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
494 		goto drop;
495 	}
496 	dst = skb_dst(skb);
497 
498 	/* IPv6 specs say nothing about it, but it is clear that we cannot
499 	   send redirects to source routed frames.
500 	   We don't send redirects to frames decapsulated from IPsec.
501 	 */
502 	if (IP6CB(skb)->iif == dst->dev->ifindex &&
503 	    opt->srcrt == 0 && !skb_sec_path(skb)) {
504 		struct in6_addr *target = NULL;
505 		struct inet_peer *peer;
506 		struct rt6_info *rt;
507 
508 		/*
509 		 *	incoming and outgoing devices are the same
510 		 *	send a redirect.
511 		 */
512 
513 		rt = (struct rt6_info *) dst;
514 		if (rt->rt6i_flags & RTF_GATEWAY)
515 			target = &rt->rt6i_gateway;
516 		else
517 			target = &hdr->daddr;
518 
519 		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
520 
521 		/* Limit redirects both by destination (here)
522 		   and by source (inside ndisc_send_redirect)
523 		 */
524 		if (inet_peer_xrlim_allow(peer, 1*HZ))
525 			ndisc_send_redirect(skb, target);
526 		if (peer)
527 			inet_putpeer(peer);
528 	} else {
529 		int addrtype = ipv6_addr_type(&hdr->saddr);
530 
531 		/* This check is security critical. */
532 		if (addrtype == IPV6_ADDR_ANY ||
533 		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
534 			goto error;
535 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
536 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
537 				    ICMPV6_NOT_NEIGHBOUR, 0);
538 			goto error;
539 		}
540 	}
541 
542 	mtu = ip6_dst_mtu_forward(dst);
543 	if (mtu < IPV6_MIN_MTU)
544 		mtu = IPV6_MIN_MTU;
545 
546 	if (ip6_pkt_too_big(skb, mtu)) {
547 		/* Again, force OUTPUT device used as source address */
548 		skb->dev = dst->dev;
549 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
550 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
551 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
552 				IPSTATS_MIB_FRAGFAILS);
553 		kfree_skb(skb);
554 		return -EMSGSIZE;
555 	}
556 
557 	if (skb_cow(skb, dst->dev->hard_header_len)) {
558 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
559 				IPSTATS_MIB_OUTDISCARDS);
560 		goto drop;
561 	}
562 
563 	hdr = ipv6_hdr(skb);
564 
565 	/* Mangling hops number delayed to point after skb COW */
566 
567 	hdr->hop_limit--;
568 
569 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
570 		       net, NULL, skb, skb->dev, dst->dev,
571 		       ip6_forward_finish);
572 
573 error:
574 	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
575 drop:
576 	kfree_skb(skb);
577 	return -EINVAL;
578 }
579 
580 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
581 {
582 	to->pkt_type = from->pkt_type;
583 	to->priority = from->priority;
584 	to->protocol = from->protocol;
585 	skb_dst_drop(to);
586 	skb_dst_set(to, dst_clone(skb_dst(from)));
587 	to->dev = from->dev;
588 	to->mark = from->mark;
589 
590 	skb_copy_hash(to, from);
591 
592 #ifdef CONFIG_NET_SCHED
593 	to->tc_index = from->tc_index;
594 #endif
595 	nf_copy(to, from);
596 	skb_ext_copy(to, from);
597 	skb_copy_secmark(to, from);
598 }
599 
600 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
601 		      u8 nexthdr, __be32 frag_id,
602 		      struct ip6_fraglist_iter *iter)
603 {
604 	unsigned int first_len;
605 	struct frag_hdr *fh;
606 
607 	/* BUILD HEADER */
608 	*prevhdr = NEXTHDR_FRAGMENT;
609 	iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
610 	if (!iter->tmp_hdr)
611 		return -ENOMEM;
612 
613 	iter->frag = skb_shinfo(skb)->frag_list;
614 	skb_frag_list_init(skb);
615 
616 	iter->offset = 0;
617 	iter->hlen = hlen;
618 	iter->frag_id = frag_id;
619 	iter->nexthdr = nexthdr;
620 
621 	__skb_pull(skb, hlen);
622 	fh = __skb_push(skb, sizeof(struct frag_hdr));
623 	__skb_push(skb, hlen);
624 	skb_reset_network_header(skb);
625 	memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
626 
627 	fh->nexthdr = nexthdr;
628 	fh->reserved = 0;
629 	fh->frag_off = htons(IP6_MF);
630 	fh->identification = frag_id;
631 
632 	first_len = skb_pagelen(skb);
633 	skb->data_len = first_len - skb_headlen(skb);
634 	skb->len = first_len;
635 	ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
636 
637 	return 0;
638 }
639 EXPORT_SYMBOL(ip6_fraglist_init);
640 
641 void ip6_fraglist_prepare(struct sk_buff *skb,
642 			  struct ip6_fraglist_iter *iter)
643 {
644 	struct sk_buff *frag = iter->frag;
645 	unsigned int hlen = iter->hlen;
646 	struct frag_hdr *fh;
647 
648 	frag->ip_summed = CHECKSUM_NONE;
649 	skb_reset_transport_header(frag);
650 	fh = __skb_push(frag, sizeof(struct frag_hdr));
651 	__skb_push(frag, hlen);
652 	skb_reset_network_header(frag);
653 	memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
654 	iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
655 	fh->nexthdr = iter->nexthdr;
656 	fh->reserved = 0;
657 	fh->frag_off = htons(iter->offset);
658 	if (frag->next)
659 		fh->frag_off |= htons(IP6_MF);
660 	fh->identification = iter->frag_id;
661 	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
662 	ip6_copy_metadata(frag, skb);
663 }
664 EXPORT_SYMBOL(ip6_fraglist_prepare);
665 
666 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
667 		   unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
668 		   u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
669 {
670 	state->prevhdr = prevhdr;
671 	state->nexthdr = nexthdr;
672 	state->frag_id = frag_id;
673 
674 	state->hlen = hlen;
675 	state->mtu = mtu;
676 
677 	state->left = skb->len - hlen;	/* Space per frame */
678 	state->ptr = hlen;		/* Where to start from */
679 
680 	state->hroom = hdr_room;
681 	state->troom = needed_tailroom;
682 
683 	state->offset = 0;
684 }
685 EXPORT_SYMBOL(ip6_frag_init);
686 
687 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
688 {
689 	u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
690 	struct sk_buff *frag;
691 	struct frag_hdr *fh;
692 	unsigned int len;
693 
694 	len = state->left;
695 	/* IF: it doesn't fit, use 'mtu' - the data space left */
696 	if (len > state->mtu)
697 		len = state->mtu;
698 	/* IF: we are not sending up to and including the packet end
699 	   then align the next start on an eight byte boundary */
700 	if (len < state->left)
701 		len &= ~7;
702 
703 	/* Allocate buffer */
704 	frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
705 			 state->hroom + state->troom, GFP_ATOMIC);
706 	if (!frag)
707 		return ERR_PTR(-ENOMEM);
708 
709 	/*
710 	 *	Set up data on packet
711 	 */
712 
713 	ip6_copy_metadata(frag, skb);
714 	skb_reserve(frag, state->hroom);
715 	skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
716 	skb_reset_network_header(frag);
717 	fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
718 	frag->transport_header = (frag->network_header + state->hlen +
719 				  sizeof(struct frag_hdr));
720 
721 	/*
722 	 *	Charge the memory for the fragment to any owner
723 	 *	it might possess
724 	 */
725 	if (skb->sk)
726 		skb_set_owner_w(frag, skb->sk);
727 
728 	/*
729 	 *	Copy the packet header into the new buffer.
730 	 */
731 	skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
732 
733 	fragnexthdr_offset = skb_network_header(frag);
734 	fragnexthdr_offset += prevhdr - skb_network_header(skb);
735 	*fragnexthdr_offset = NEXTHDR_FRAGMENT;
736 
737 	/*
738 	 *	Build fragment header.
739 	 */
740 	fh->nexthdr = state->nexthdr;
741 	fh->reserved = 0;
742 	fh->identification = state->frag_id;
743 
744 	/*
745 	 *	Copy a block of the IP datagram.
746 	 */
747 	BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
748 			     len));
749 	state->left -= len;
750 
751 	fh->frag_off = htons(state->offset);
752 	if (state->left > 0)
753 		fh->frag_off |= htons(IP6_MF);
754 	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
755 
756 	state->ptr += len;
757 	state->offset += len;
758 
759 	return frag;
760 }
761 EXPORT_SYMBOL(ip6_frag_next);
762 
763 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
764 		 int (*output)(struct net *, struct sock *, struct sk_buff *))
765 {
766 	struct sk_buff *frag;
767 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
768 	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
769 				inet6_sk(skb->sk) : NULL;
770 	struct ip6_frag_state state;
771 	unsigned int mtu, hlen, nexthdr_offset;
772 	ktime_t tstamp = skb->tstamp;
773 	int hroom, err = 0;
774 	__be32 frag_id;
775 	u8 *prevhdr, nexthdr = 0;
776 
777 	err = ip6_find_1stfragopt(skb, &prevhdr);
778 	if (err < 0)
779 		goto fail;
780 	hlen = err;
781 	nexthdr = *prevhdr;
782 	nexthdr_offset = prevhdr - skb_network_header(skb);
783 
784 	mtu = ip6_skb_dst_mtu(skb);
785 
786 	/* We must not fragment if the socket is set to force MTU discovery
787 	 * or if the skb it not generated by a local socket.
788 	 */
789 	if (unlikely(!skb->ignore_df && skb->len > mtu))
790 		goto fail_toobig;
791 
792 	if (IP6CB(skb)->frag_max_size) {
793 		if (IP6CB(skb)->frag_max_size > mtu)
794 			goto fail_toobig;
795 
796 		/* don't send fragments larger than what we received */
797 		mtu = IP6CB(skb)->frag_max_size;
798 		if (mtu < IPV6_MIN_MTU)
799 			mtu = IPV6_MIN_MTU;
800 	}
801 
802 	if (np && np->frag_size < mtu) {
803 		if (np->frag_size)
804 			mtu = np->frag_size;
805 	}
806 	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
807 		goto fail_toobig;
808 	mtu -= hlen + sizeof(struct frag_hdr);
809 
810 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
811 				    &ipv6_hdr(skb)->saddr);
812 
813 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
814 	    (err = skb_checksum_help(skb)))
815 		goto fail;
816 
817 	prevhdr = skb_network_header(skb) + nexthdr_offset;
818 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
819 	if (skb_has_frag_list(skb)) {
820 		unsigned int first_len = skb_pagelen(skb);
821 		struct ip6_fraglist_iter iter;
822 		struct sk_buff *frag2;
823 
824 		if (first_len - hlen > mtu ||
825 		    ((first_len - hlen) & 7) ||
826 		    skb_cloned(skb) ||
827 		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
828 			goto slow_path;
829 
830 		skb_walk_frags(skb, frag) {
831 			/* Correct geometry. */
832 			if (frag->len > mtu ||
833 			    ((frag->len & 7) && frag->next) ||
834 			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
835 				goto slow_path_clean;
836 
837 			/* Partially cloned skb? */
838 			if (skb_shared(frag))
839 				goto slow_path_clean;
840 
841 			BUG_ON(frag->sk);
842 			if (skb->sk) {
843 				frag->sk = skb->sk;
844 				frag->destructor = sock_wfree;
845 			}
846 			skb->truesize -= frag->truesize;
847 		}
848 
849 		err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
850 					&iter);
851 		if (err < 0)
852 			goto fail;
853 
854 		for (;;) {
855 			/* Prepare header of the next frame,
856 			 * before previous one went down. */
857 			if (iter.frag)
858 				ip6_fraglist_prepare(skb, &iter);
859 
860 			skb->tstamp = tstamp;
861 			err = output(net, sk, skb);
862 			if (!err)
863 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
864 					      IPSTATS_MIB_FRAGCREATES);
865 
866 			if (err || !iter.frag)
867 				break;
868 
869 			skb = ip6_fraglist_next(&iter);
870 		}
871 
872 		kfree(iter.tmp_hdr);
873 
874 		if (err == 0) {
875 			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
876 				      IPSTATS_MIB_FRAGOKS);
877 			return 0;
878 		}
879 
880 		kfree_skb_list(iter.frag);
881 
882 		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
883 			      IPSTATS_MIB_FRAGFAILS);
884 		return err;
885 
886 slow_path_clean:
887 		skb_walk_frags(skb, frag2) {
888 			if (frag2 == frag)
889 				break;
890 			frag2->sk = NULL;
891 			frag2->destructor = NULL;
892 			skb->truesize += frag2->truesize;
893 		}
894 	}
895 
896 slow_path:
897 	/*
898 	 *	Fragment the datagram.
899 	 */
900 
901 	ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
902 		      LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
903 		      &state);
904 
905 	/*
906 	 *	Keep copying data until we run out.
907 	 */
908 
909 	while (state.left > 0) {
910 		frag = ip6_frag_next(skb, &state);
911 		if (IS_ERR(frag)) {
912 			err = PTR_ERR(frag);
913 			goto fail;
914 		}
915 
916 		/*
917 		 *	Put this fragment into the sending queue.
918 		 */
919 		frag->tstamp = tstamp;
920 		err = output(net, sk, frag);
921 		if (err)
922 			goto fail;
923 
924 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
925 			      IPSTATS_MIB_FRAGCREATES);
926 	}
927 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
928 		      IPSTATS_MIB_FRAGOKS);
929 	consume_skb(skb);
930 	return err;
931 
932 fail_toobig:
933 	if (skb->sk && dst_allfrag(skb_dst(skb)))
934 		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
935 
936 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
937 	err = -EMSGSIZE;
938 
939 fail:
940 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
941 		      IPSTATS_MIB_FRAGFAILS);
942 	kfree_skb(skb);
943 	return err;
944 }
945 
946 static inline int ip6_rt_check(const struct rt6key *rt_key,
947 			       const struct in6_addr *fl_addr,
948 			       const struct in6_addr *addr_cache)
949 {
950 	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
951 		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
952 }
953 
954 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
955 					  struct dst_entry *dst,
956 					  const struct flowi6 *fl6)
957 {
958 	struct ipv6_pinfo *np = inet6_sk(sk);
959 	struct rt6_info *rt;
960 
961 	if (!dst)
962 		goto out;
963 
964 	if (dst->ops->family != AF_INET6) {
965 		dst_release(dst);
966 		return NULL;
967 	}
968 
969 	rt = (struct rt6_info *)dst;
970 	/* Yes, checking route validity in not connected
971 	 * case is not very simple. Take into account,
972 	 * that we do not support routing by source, TOS,
973 	 * and MSG_DONTROUTE		--ANK (980726)
974 	 *
975 	 * 1. ip6_rt_check(): If route was host route,
976 	 *    check that cached destination is current.
977 	 *    If it is network route, we still may
978 	 *    check its validity using saved pointer
979 	 *    to the last used address: daddr_cache.
980 	 *    We do not want to save whole address now,
981 	 *    (because main consumer of this service
982 	 *    is tcp, which has not this problem),
983 	 *    so that the last trick works only on connected
984 	 *    sockets.
985 	 * 2. oif also should be the same.
986 	 */
987 	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
988 #ifdef CONFIG_IPV6_SUBTREES
989 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
990 #endif
991 	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
992 	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
993 		dst_release(dst);
994 		dst = NULL;
995 	}
996 
997 out:
998 	return dst;
999 }
1000 
1001 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1002 			       struct dst_entry **dst, struct flowi6 *fl6)
1003 {
1004 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1005 	struct neighbour *n;
1006 	struct rt6_info *rt;
1007 #endif
1008 	int err;
1009 	int flags = 0;
1010 
1011 	/* The correct way to handle this would be to do
1012 	 * ip6_route_get_saddr, and then ip6_route_output; however,
1013 	 * the route-specific preferred source forces the
1014 	 * ip6_route_output call _before_ ip6_route_get_saddr.
1015 	 *
1016 	 * In source specific routing (no src=any default route),
1017 	 * ip6_route_output will fail given src=any saddr, though, so
1018 	 * that's why we try it again later.
1019 	 */
1020 	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1021 		struct fib6_info *from;
1022 		struct rt6_info *rt;
1023 		bool had_dst = *dst != NULL;
1024 
1025 		if (!had_dst)
1026 			*dst = ip6_route_output(net, sk, fl6);
1027 		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1028 
1029 		rcu_read_lock();
1030 		from = rt ? rcu_dereference(rt->from) : NULL;
1031 		err = ip6_route_get_saddr(net, from, &fl6->daddr,
1032 					  sk ? inet6_sk(sk)->srcprefs : 0,
1033 					  &fl6->saddr);
1034 		rcu_read_unlock();
1035 
1036 		if (err)
1037 			goto out_err_release;
1038 
1039 		/* If we had an erroneous initial result, pretend it
1040 		 * never existed and let the SA-enabled version take
1041 		 * over.
1042 		 */
1043 		if (!had_dst && (*dst)->error) {
1044 			dst_release(*dst);
1045 			*dst = NULL;
1046 		}
1047 
1048 		if (fl6->flowi6_oif)
1049 			flags |= RT6_LOOKUP_F_IFACE;
1050 	}
1051 
1052 	if (!*dst)
1053 		*dst = ip6_route_output_flags(net, sk, fl6, flags);
1054 
1055 	err = (*dst)->error;
1056 	if (err)
1057 		goto out_err_release;
1058 
1059 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1060 	/*
1061 	 * Here if the dst entry we've looked up
1062 	 * has a neighbour entry that is in the INCOMPLETE
1063 	 * state and the src address from the flow is
1064 	 * marked as OPTIMISTIC, we release the found
1065 	 * dst entry and replace it instead with the
1066 	 * dst entry of the nexthop router
1067 	 */
1068 	rt = (struct rt6_info *) *dst;
1069 	rcu_read_lock_bh();
1070 	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1071 				      rt6_nexthop(rt, &fl6->daddr));
1072 	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1073 	rcu_read_unlock_bh();
1074 
1075 	if (err) {
1076 		struct inet6_ifaddr *ifp;
1077 		struct flowi6 fl_gw6;
1078 		int redirect;
1079 
1080 		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1081 				      (*dst)->dev, 1);
1082 
1083 		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1084 		if (ifp)
1085 			in6_ifa_put(ifp);
1086 
1087 		if (redirect) {
1088 			/*
1089 			 * We need to get the dst entry for the
1090 			 * default router instead
1091 			 */
1092 			dst_release(*dst);
1093 			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1094 			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1095 			*dst = ip6_route_output(net, sk, &fl_gw6);
1096 			err = (*dst)->error;
1097 			if (err)
1098 				goto out_err_release;
1099 		}
1100 	}
1101 #endif
1102 	if (ipv6_addr_v4mapped(&fl6->saddr) &&
1103 	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1104 		err = -EAFNOSUPPORT;
1105 		goto out_err_release;
1106 	}
1107 
1108 	return 0;
1109 
1110 out_err_release:
1111 	dst_release(*dst);
1112 	*dst = NULL;
1113 
1114 	if (err == -ENETUNREACH)
1115 		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1116 	return err;
1117 }
1118 
1119 /**
1120  *	ip6_dst_lookup - perform route lookup on flow
1121  *	@net: Network namespace to perform lookup in
1122  *	@sk: socket which provides route info
1123  *	@dst: pointer to dst_entry * for result
1124  *	@fl6: flow to lookup
1125  *
1126  *	This function performs a route lookup on the given flow.
1127  *
1128  *	It returns zero on success, or a standard errno code on error.
1129  */
1130 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1131 		   struct flowi6 *fl6)
1132 {
1133 	*dst = NULL;
1134 	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1135 }
1136 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1137 
1138 /**
1139  *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1140  *	@net: Network namespace to perform lookup in
1141  *	@sk: socket which provides route info
1142  *	@fl6: flow to lookup
1143  *	@final_dst: final destination address for ipsec lookup
1144  *
1145  *	This function performs a route lookup on the given flow.
1146  *
1147  *	It returns a valid dst pointer on success, or a pointer encoded
1148  *	error code.
1149  */
1150 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1151 				      const struct in6_addr *final_dst)
1152 {
1153 	struct dst_entry *dst = NULL;
1154 	int err;
1155 
1156 	err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1157 	if (err)
1158 		return ERR_PTR(err);
1159 	if (final_dst)
1160 		fl6->daddr = *final_dst;
1161 
1162 	return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1163 }
1164 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1165 
1166 /**
1167  *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1168  *	@sk: socket which provides the dst cache and route info
1169  *	@fl6: flow to lookup
1170  *	@final_dst: final destination address for ipsec lookup
1171  *	@connected: whether @sk is connected or not
1172  *
1173  *	This function performs a route lookup on the given flow with the
1174  *	possibility of using the cached route in the socket if it is valid.
1175  *	It will take the socket dst lock when operating on the dst cache.
1176  *	As a result, this function can only be used in process context.
1177  *
1178  *	In addition, for a connected socket, cache the dst in the socket
1179  *	if the current cache is not valid.
1180  *
1181  *	It returns a valid dst pointer on success, or a pointer encoded
1182  *	error code.
1183  */
1184 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1185 					 const struct in6_addr *final_dst,
1186 					 bool connected)
1187 {
1188 	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1189 
1190 	dst = ip6_sk_dst_check(sk, dst, fl6);
1191 	if (dst)
1192 		return dst;
1193 
1194 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1195 	if (connected && !IS_ERR(dst))
1196 		ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1197 
1198 	return dst;
1199 }
1200 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1201 
1202 /**
1203  *      ip6_dst_lookup_tunnel - perform route lookup on tunnel
1204  *      @skb: Packet for which lookup is done
1205  *      @dev: Tunnel device
1206  *      @net: Network namespace of tunnel device
1207  *      @sock: Socket which provides route info
1208  *      @saddr: Memory to store the src ip address
1209  *      @info: Tunnel information
1210  *      @protocol: IP protocol
1211  *      @use_cache: Flag to enable cache usage
1212  *      This function performs a route lookup on a tunnel
1213  *
1214  *      It returns a valid dst pointer and stores src address to be used in
1215  *      tunnel in param saddr on success, else a pointer encoded error code.
1216  */
1217 
1218 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1219 					struct net_device *dev,
1220 					struct net *net,
1221 					struct socket *sock,
1222 					struct in6_addr *saddr,
1223 					const struct ip_tunnel_info *info,
1224 					u8 protocol,
1225 					bool use_cache)
1226 {
1227 	struct dst_entry *dst = NULL;
1228 #ifdef CONFIG_DST_CACHE
1229 	struct dst_cache *dst_cache;
1230 #endif
1231 	struct flowi6 fl6;
1232 	__u8 prio;
1233 
1234 #ifdef CONFIG_DST_CACHE
1235 	dst_cache = (struct dst_cache *)&info->dst_cache;
1236 	if (use_cache) {
1237 		dst = dst_cache_get_ip6(dst_cache, saddr);
1238 		if (dst)
1239 			return dst;
1240 	}
1241 #endif
1242 	memset(&fl6, 0, sizeof(fl6));
1243 	fl6.flowi6_mark = skb->mark;
1244 	fl6.flowi6_proto = protocol;
1245 	fl6.daddr = info->key.u.ipv6.dst;
1246 	fl6.saddr = info->key.u.ipv6.src;
1247 	prio = info->key.tos;
1248 	fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1249 					  info->key.label);
1250 
1251 	dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1252 					      NULL);
1253 	if (IS_ERR(dst)) {
1254 		netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1255 		return ERR_PTR(-ENETUNREACH);
1256 	}
1257 	if (dst->dev == dev) { /* is this necessary? */
1258 		netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1259 		dst_release(dst);
1260 		return ERR_PTR(-ELOOP);
1261 	}
1262 #ifdef CONFIG_DST_CACHE
1263 	if (use_cache)
1264 		dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1265 #endif
1266 	*saddr = fl6.saddr;
1267 	return dst;
1268 }
1269 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1270 
1271 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1272 					       gfp_t gfp)
1273 {
1274 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1275 }
1276 
1277 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1278 						gfp_t gfp)
1279 {
1280 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1281 }
1282 
1283 static void ip6_append_data_mtu(unsigned int *mtu,
1284 				int *maxfraglen,
1285 				unsigned int fragheaderlen,
1286 				struct sk_buff *skb,
1287 				struct rt6_info *rt,
1288 				unsigned int orig_mtu)
1289 {
1290 	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1291 		if (!skb) {
1292 			/* first fragment, reserve header_len */
1293 			*mtu = orig_mtu - rt->dst.header_len;
1294 
1295 		} else {
1296 			/*
1297 			 * this fragment is not first, the headers
1298 			 * space is regarded as data space.
1299 			 */
1300 			*mtu = orig_mtu;
1301 		}
1302 		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1303 			      + fragheaderlen - sizeof(struct frag_hdr);
1304 	}
1305 }
1306 
1307 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1308 			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1309 			  struct rt6_info *rt, struct flowi6 *fl6)
1310 {
1311 	struct ipv6_pinfo *np = inet6_sk(sk);
1312 	unsigned int mtu;
1313 	struct ipv6_txoptions *opt = ipc6->opt;
1314 
1315 	/*
1316 	 * setup for corking
1317 	 */
1318 	if (opt) {
1319 		if (WARN_ON(v6_cork->opt))
1320 			return -EINVAL;
1321 
1322 		v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1323 		if (unlikely(!v6_cork->opt))
1324 			return -ENOBUFS;
1325 
1326 		v6_cork->opt->tot_len = sizeof(*opt);
1327 		v6_cork->opt->opt_flen = opt->opt_flen;
1328 		v6_cork->opt->opt_nflen = opt->opt_nflen;
1329 
1330 		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1331 						    sk->sk_allocation);
1332 		if (opt->dst0opt && !v6_cork->opt->dst0opt)
1333 			return -ENOBUFS;
1334 
1335 		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1336 						    sk->sk_allocation);
1337 		if (opt->dst1opt && !v6_cork->opt->dst1opt)
1338 			return -ENOBUFS;
1339 
1340 		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1341 						   sk->sk_allocation);
1342 		if (opt->hopopt && !v6_cork->opt->hopopt)
1343 			return -ENOBUFS;
1344 
1345 		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1346 						    sk->sk_allocation);
1347 		if (opt->srcrt && !v6_cork->opt->srcrt)
1348 			return -ENOBUFS;
1349 
1350 		/* need source address above miyazawa*/
1351 	}
1352 	dst_hold(&rt->dst);
1353 	cork->base.dst = &rt->dst;
1354 	cork->fl.u.ip6 = *fl6;
1355 	v6_cork->hop_limit = ipc6->hlimit;
1356 	v6_cork->tclass = ipc6->tclass;
1357 	if (rt->dst.flags & DST_XFRM_TUNNEL)
1358 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1359 		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1360 	else
1361 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1362 			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1363 	if (np->frag_size < mtu) {
1364 		if (np->frag_size)
1365 			mtu = np->frag_size;
1366 	}
1367 	if (mtu < IPV6_MIN_MTU)
1368 		return -EINVAL;
1369 	cork->base.fragsize = mtu;
1370 	cork->base.gso_size = ipc6->gso_size;
1371 	cork->base.tx_flags = 0;
1372 	cork->base.mark = ipc6->sockc.mark;
1373 	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1374 
1375 	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1376 		cork->base.flags |= IPCORK_ALLFRAG;
1377 	cork->base.length = 0;
1378 
1379 	cork->base.transmit_time = ipc6->sockc.transmit_time;
1380 
1381 	return 0;
1382 }
1383 
1384 static int __ip6_append_data(struct sock *sk,
1385 			     struct flowi6 *fl6,
1386 			     struct sk_buff_head *queue,
1387 			     struct inet_cork *cork,
1388 			     struct inet6_cork *v6_cork,
1389 			     struct page_frag *pfrag,
1390 			     int getfrag(void *from, char *to, int offset,
1391 					 int len, int odd, struct sk_buff *skb),
1392 			     void *from, int length, int transhdrlen,
1393 			     unsigned int flags, struct ipcm6_cookie *ipc6)
1394 {
1395 	struct sk_buff *skb, *skb_prev = NULL;
1396 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1397 	struct ubuf_info *uarg = NULL;
1398 	int exthdrlen = 0;
1399 	int dst_exthdrlen = 0;
1400 	int hh_len;
1401 	int copy;
1402 	int err;
1403 	int offset = 0;
1404 	u32 tskey = 0;
1405 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
1406 	struct ipv6_txoptions *opt = v6_cork->opt;
1407 	int csummode = CHECKSUM_NONE;
1408 	unsigned int maxnonfragsize, headersize;
1409 	unsigned int wmem_alloc_delta = 0;
1410 	bool paged, extra_uref = false;
1411 
1412 	skb = skb_peek_tail(queue);
1413 	if (!skb) {
1414 		exthdrlen = opt ? opt->opt_flen : 0;
1415 		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1416 	}
1417 
1418 	paged = !!cork->gso_size;
1419 	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1420 	orig_mtu = mtu;
1421 
1422 	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1423 	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1424 		tskey = sk->sk_tskey++;
1425 
1426 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1427 
1428 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1429 			(opt ? opt->opt_nflen : 0);
1430 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1431 		     sizeof(struct frag_hdr);
1432 
1433 	headersize = sizeof(struct ipv6hdr) +
1434 		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1435 		     (dst_allfrag(&rt->dst) ?
1436 		      sizeof(struct frag_hdr) : 0) +
1437 		     rt->rt6i_nfheader_len;
1438 
1439 	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1440 	 * the first fragment
1441 	 */
1442 	if (headersize + transhdrlen > mtu)
1443 		goto emsgsize;
1444 
1445 	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1446 	    (sk->sk_protocol == IPPROTO_UDP ||
1447 	     sk->sk_protocol == IPPROTO_RAW)) {
1448 		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1449 				sizeof(struct ipv6hdr));
1450 		goto emsgsize;
1451 	}
1452 
1453 	if (ip6_sk_ignore_df(sk))
1454 		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1455 	else
1456 		maxnonfragsize = mtu;
1457 
1458 	if (cork->length + length > maxnonfragsize - headersize) {
1459 emsgsize:
1460 		pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1461 		ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1462 		return -EMSGSIZE;
1463 	}
1464 
1465 	/* CHECKSUM_PARTIAL only with no extension headers and when
1466 	 * we are not going to fragment
1467 	 */
1468 	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1469 	    headersize == sizeof(struct ipv6hdr) &&
1470 	    length <= mtu - headersize &&
1471 	    (!(flags & MSG_MORE) || cork->gso_size) &&
1472 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1473 		csummode = CHECKSUM_PARTIAL;
1474 
1475 	if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1476 		uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1477 		if (!uarg)
1478 			return -ENOBUFS;
1479 		extra_uref = !skb_zcopy(skb);	/* only ref on new uarg */
1480 		if (rt->dst.dev->features & NETIF_F_SG &&
1481 		    csummode == CHECKSUM_PARTIAL) {
1482 			paged = true;
1483 		} else {
1484 			uarg->zerocopy = 0;
1485 			skb_zcopy_set(skb, uarg, &extra_uref);
1486 		}
1487 	}
1488 
1489 	/*
1490 	 * Let's try using as much space as possible.
1491 	 * Use MTU if total length of the message fits into the MTU.
1492 	 * Otherwise, we need to reserve fragment header and
1493 	 * fragment alignment (= 8-15 octects, in total).
1494 	 *
1495 	 * Note that we may need to "move" the data from the tail of
1496 	 * of the buffer to the new fragment when we split
1497 	 * the message.
1498 	 *
1499 	 * FIXME: It may be fragmented into multiple chunks
1500 	 *        at once if non-fragmentable extension headers
1501 	 *        are too large.
1502 	 * --yoshfuji
1503 	 */
1504 
1505 	cork->length += length;
1506 	if (!skb)
1507 		goto alloc_new_skb;
1508 
1509 	while (length > 0) {
1510 		/* Check if the remaining data fits into current packet. */
1511 		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1512 		if (copy < length)
1513 			copy = maxfraglen - skb->len;
1514 
1515 		if (copy <= 0) {
1516 			char *data;
1517 			unsigned int datalen;
1518 			unsigned int fraglen;
1519 			unsigned int fraggap;
1520 			unsigned int alloclen;
1521 			unsigned int pagedlen;
1522 alloc_new_skb:
1523 			/* There's no room in the current skb */
1524 			if (skb)
1525 				fraggap = skb->len - maxfraglen;
1526 			else
1527 				fraggap = 0;
1528 			/* update mtu and maxfraglen if necessary */
1529 			if (!skb || !skb_prev)
1530 				ip6_append_data_mtu(&mtu, &maxfraglen,
1531 						    fragheaderlen, skb, rt,
1532 						    orig_mtu);
1533 
1534 			skb_prev = skb;
1535 
1536 			/*
1537 			 * If remaining data exceeds the mtu,
1538 			 * we know we need more fragment(s).
1539 			 */
1540 			datalen = length + fraggap;
1541 
1542 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1543 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1544 			fraglen = datalen + fragheaderlen;
1545 			pagedlen = 0;
1546 
1547 			if ((flags & MSG_MORE) &&
1548 			    !(rt->dst.dev->features&NETIF_F_SG))
1549 				alloclen = mtu;
1550 			else if (!paged)
1551 				alloclen = fraglen;
1552 			else {
1553 				alloclen = min_t(int, fraglen, MAX_HEADER);
1554 				pagedlen = fraglen - alloclen;
1555 			}
1556 
1557 			alloclen += dst_exthdrlen;
1558 
1559 			if (datalen != length + fraggap) {
1560 				/*
1561 				 * this is not the last fragment, the trailer
1562 				 * space is regarded as data space.
1563 				 */
1564 				datalen += rt->dst.trailer_len;
1565 			}
1566 
1567 			alloclen += rt->dst.trailer_len;
1568 			fraglen = datalen + fragheaderlen;
1569 
1570 			/*
1571 			 * We just reserve space for fragment header.
1572 			 * Note: this may be overallocation if the message
1573 			 * (without MSG_MORE) fits into the MTU.
1574 			 */
1575 			alloclen += sizeof(struct frag_hdr);
1576 
1577 			copy = datalen - transhdrlen - fraggap - pagedlen;
1578 			if (copy < 0) {
1579 				err = -EINVAL;
1580 				goto error;
1581 			}
1582 			if (transhdrlen) {
1583 				skb = sock_alloc_send_skb(sk,
1584 						alloclen + hh_len,
1585 						(flags & MSG_DONTWAIT), &err);
1586 			} else {
1587 				skb = NULL;
1588 				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1589 				    2 * sk->sk_sndbuf)
1590 					skb = alloc_skb(alloclen + hh_len,
1591 							sk->sk_allocation);
1592 				if (unlikely(!skb))
1593 					err = -ENOBUFS;
1594 			}
1595 			if (!skb)
1596 				goto error;
1597 			/*
1598 			 *	Fill in the control structures
1599 			 */
1600 			skb->protocol = htons(ETH_P_IPV6);
1601 			skb->ip_summed = csummode;
1602 			skb->csum = 0;
1603 			/* reserve for fragmentation and ipsec header */
1604 			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1605 				    dst_exthdrlen);
1606 
1607 			/*
1608 			 *	Find where to start putting bytes
1609 			 */
1610 			data = skb_put(skb, fraglen - pagedlen);
1611 			skb_set_network_header(skb, exthdrlen);
1612 			data += fragheaderlen;
1613 			skb->transport_header = (skb->network_header +
1614 						 fragheaderlen);
1615 			if (fraggap) {
1616 				skb->csum = skb_copy_and_csum_bits(
1617 					skb_prev, maxfraglen,
1618 					data + transhdrlen, fraggap, 0);
1619 				skb_prev->csum = csum_sub(skb_prev->csum,
1620 							  skb->csum);
1621 				data += fraggap;
1622 				pskb_trim_unique(skb_prev, maxfraglen);
1623 			}
1624 			if (copy > 0 &&
1625 			    getfrag(from, data + transhdrlen, offset,
1626 				    copy, fraggap, skb) < 0) {
1627 				err = -EFAULT;
1628 				kfree_skb(skb);
1629 				goto error;
1630 			}
1631 
1632 			offset += copy;
1633 			length -= copy + transhdrlen;
1634 			transhdrlen = 0;
1635 			exthdrlen = 0;
1636 			dst_exthdrlen = 0;
1637 
1638 			/* Only the initial fragment is time stamped */
1639 			skb_shinfo(skb)->tx_flags = cork->tx_flags;
1640 			cork->tx_flags = 0;
1641 			skb_shinfo(skb)->tskey = tskey;
1642 			tskey = 0;
1643 			skb_zcopy_set(skb, uarg, &extra_uref);
1644 
1645 			if ((flags & MSG_CONFIRM) && !skb_prev)
1646 				skb_set_dst_pending_confirm(skb, 1);
1647 
1648 			/*
1649 			 * Put the packet on the pending queue
1650 			 */
1651 			if (!skb->destructor) {
1652 				skb->destructor = sock_wfree;
1653 				skb->sk = sk;
1654 				wmem_alloc_delta += skb->truesize;
1655 			}
1656 			__skb_queue_tail(queue, skb);
1657 			continue;
1658 		}
1659 
1660 		if (copy > length)
1661 			copy = length;
1662 
1663 		if (!(rt->dst.dev->features&NETIF_F_SG) &&
1664 		    skb_tailroom(skb) >= copy) {
1665 			unsigned int off;
1666 
1667 			off = skb->len;
1668 			if (getfrag(from, skb_put(skb, copy),
1669 						offset, copy, off, skb) < 0) {
1670 				__skb_trim(skb, off);
1671 				err = -EFAULT;
1672 				goto error;
1673 			}
1674 		} else if (!uarg || !uarg->zerocopy) {
1675 			int i = skb_shinfo(skb)->nr_frags;
1676 
1677 			err = -ENOMEM;
1678 			if (!sk_page_frag_refill(sk, pfrag))
1679 				goto error;
1680 
1681 			if (!skb_can_coalesce(skb, i, pfrag->page,
1682 					      pfrag->offset)) {
1683 				err = -EMSGSIZE;
1684 				if (i == MAX_SKB_FRAGS)
1685 					goto error;
1686 
1687 				__skb_fill_page_desc(skb, i, pfrag->page,
1688 						     pfrag->offset, 0);
1689 				skb_shinfo(skb)->nr_frags = ++i;
1690 				get_page(pfrag->page);
1691 			}
1692 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1693 			if (getfrag(from,
1694 				    page_address(pfrag->page) + pfrag->offset,
1695 				    offset, copy, skb->len, skb) < 0)
1696 				goto error_efault;
1697 
1698 			pfrag->offset += copy;
1699 			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1700 			skb->len += copy;
1701 			skb->data_len += copy;
1702 			skb->truesize += copy;
1703 			wmem_alloc_delta += copy;
1704 		} else {
1705 			err = skb_zerocopy_iter_dgram(skb, from, copy);
1706 			if (err < 0)
1707 				goto error;
1708 		}
1709 		offset += copy;
1710 		length -= copy;
1711 	}
1712 
1713 	if (wmem_alloc_delta)
1714 		refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1715 	return 0;
1716 
1717 error_efault:
1718 	err = -EFAULT;
1719 error:
1720 	if (uarg)
1721 		sock_zerocopy_put_abort(uarg, extra_uref);
1722 	cork->length -= length;
1723 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1724 	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1725 	return err;
1726 }
1727 
1728 int ip6_append_data(struct sock *sk,
1729 		    int getfrag(void *from, char *to, int offset, int len,
1730 				int odd, struct sk_buff *skb),
1731 		    void *from, int length, int transhdrlen,
1732 		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1733 		    struct rt6_info *rt, unsigned int flags)
1734 {
1735 	struct inet_sock *inet = inet_sk(sk);
1736 	struct ipv6_pinfo *np = inet6_sk(sk);
1737 	int exthdrlen;
1738 	int err;
1739 
1740 	if (flags&MSG_PROBE)
1741 		return 0;
1742 	if (skb_queue_empty(&sk->sk_write_queue)) {
1743 		/*
1744 		 * setup for corking
1745 		 */
1746 		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1747 				     ipc6, rt, fl6);
1748 		if (err)
1749 			return err;
1750 
1751 		exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1752 		length += exthdrlen;
1753 		transhdrlen += exthdrlen;
1754 	} else {
1755 		fl6 = &inet->cork.fl.u.ip6;
1756 		transhdrlen = 0;
1757 	}
1758 
1759 	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1760 				 &np->cork, sk_page_frag(sk), getfrag,
1761 				 from, length, transhdrlen, flags, ipc6);
1762 }
1763 EXPORT_SYMBOL_GPL(ip6_append_data);
1764 
1765 static void ip6_cork_release(struct inet_cork_full *cork,
1766 			     struct inet6_cork *v6_cork)
1767 {
1768 	if (v6_cork->opt) {
1769 		kfree(v6_cork->opt->dst0opt);
1770 		kfree(v6_cork->opt->dst1opt);
1771 		kfree(v6_cork->opt->hopopt);
1772 		kfree(v6_cork->opt->srcrt);
1773 		kfree(v6_cork->opt);
1774 		v6_cork->opt = NULL;
1775 	}
1776 
1777 	if (cork->base.dst) {
1778 		dst_release(cork->base.dst);
1779 		cork->base.dst = NULL;
1780 		cork->base.flags &= ~IPCORK_ALLFRAG;
1781 	}
1782 	memset(&cork->fl, 0, sizeof(cork->fl));
1783 }
1784 
1785 struct sk_buff *__ip6_make_skb(struct sock *sk,
1786 			       struct sk_buff_head *queue,
1787 			       struct inet_cork_full *cork,
1788 			       struct inet6_cork *v6_cork)
1789 {
1790 	struct sk_buff *skb, *tmp_skb;
1791 	struct sk_buff **tail_skb;
1792 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1793 	struct ipv6_pinfo *np = inet6_sk(sk);
1794 	struct net *net = sock_net(sk);
1795 	struct ipv6hdr *hdr;
1796 	struct ipv6_txoptions *opt = v6_cork->opt;
1797 	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1798 	struct flowi6 *fl6 = &cork->fl.u.ip6;
1799 	unsigned char proto = fl6->flowi6_proto;
1800 
1801 	skb = __skb_dequeue(queue);
1802 	if (!skb)
1803 		goto out;
1804 	tail_skb = &(skb_shinfo(skb)->frag_list);
1805 
1806 	/* move skb->data to ip header from ext header */
1807 	if (skb->data < skb_network_header(skb))
1808 		__skb_pull(skb, skb_network_offset(skb));
1809 	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1810 		__skb_pull(tmp_skb, skb_network_header_len(skb));
1811 		*tail_skb = tmp_skb;
1812 		tail_skb = &(tmp_skb->next);
1813 		skb->len += tmp_skb->len;
1814 		skb->data_len += tmp_skb->len;
1815 		skb->truesize += tmp_skb->truesize;
1816 		tmp_skb->destructor = NULL;
1817 		tmp_skb->sk = NULL;
1818 	}
1819 
1820 	/* Allow local fragmentation. */
1821 	skb->ignore_df = ip6_sk_ignore_df(sk);
1822 
1823 	*final_dst = fl6->daddr;
1824 	__skb_pull(skb, skb_network_header_len(skb));
1825 	if (opt && opt->opt_flen)
1826 		ipv6_push_frag_opts(skb, opt, &proto);
1827 	if (opt && opt->opt_nflen)
1828 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1829 
1830 	skb_push(skb, sizeof(struct ipv6hdr));
1831 	skb_reset_network_header(skb);
1832 	hdr = ipv6_hdr(skb);
1833 
1834 	ip6_flow_hdr(hdr, v6_cork->tclass,
1835 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1836 					ip6_autoflowlabel(net, np), fl6));
1837 	hdr->hop_limit = v6_cork->hop_limit;
1838 	hdr->nexthdr = proto;
1839 	hdr->saddr = fl6->saddr;
1840 	hdr->daddr = *final_dst;
1841 
1842 	skb->priority = sk->sk_priority;
1843 	skb->mark = cork->base.mark;
1844 
1845 	skb->tstamp = cork->base.transmit_time;
1846 
1847 	skb_dst_set(skb, dst_clone(&rt->dst));
1848 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1849 	if (proto == IPPROTO_ICMPV6) {
1850 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1851 
1852 		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1853 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1854 	}
1855 
1856 	ip6_cork_release(cork, v6_cork);
1857 out:
1858 	return skb;
1859 }
1860 
1861 int ip6_send_skb(struct sk_buff *skb)
1862 {
1863 	struct net *net = sock_net(skb->sk);
1864 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1865 	int err;
1866 
1867 	err = ip6_local_out(net, skb->sk, skb);
1868 	if (err) {
1869 		if (err > 0)
1870 			err = net_xmit_errno(err);
1871 		if (err)
1872 			IP6_INC_STATS(net, rt->rt6i_idev,
1873 				      IPSTATS_MIB_OUTDISCARDS);
1874 	}
1875 
1876 	return err;
1877 }
1878 
1879 int ip6_push_pending_frames(struct sock *sk)
1880 {
1881 	struct sk_buff *skb;
1882 
1883 	skb = ip6_finish_skb(sk);
1884 	if (!skb)
1885 		return 0;
1886 
1887 	return ip6_send_skb(skb);
1888 }
1889 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1890 
1891 static void __ip6_flush_pending_frames(struct sock *sk,
1892 				       struct sk_buff_head *queue,
1893 				       struct inet_cork_full *cork,
1894 				       struct inet6_cork *v6_cork)
1895 {
1896 	struct sk_buff *skb;
1897 
1898 	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1899 		if (skb_dst(skb))
1900 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1901 				      IPSTATS_MIB_OUTDISCARDS);
1902 		kfree_skb(skb);
1903 	}
1904 
1905 	ip6_cork_release(cork, v6_cork);
1906 }
1907 
1908 void ip6_flush_pending_frames(struct sock *sk)
1909 {
1910 	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1911 				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1912 }
1913 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1914 
1915 struct sk_buff *ip6_make_skb(struct sock *sk,
1916 			     int getfrag(void *from, char *to, int offset,
1917 					 int len, int odd, struct sk_buff *skb),
1918 			     void *from, int length, int transhdrlen,
1919 			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1920 			     struct rt6_info *rt, unsigned int flags,
1921 			     struct inet_cork_full *cork)
1922 {
1923 	struct inet6_cork v6_cork;
1924 	struct sk_buff_head queue;
1925 	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1926 	int err;
1927 
1928 	if (flags & MSG_PROBE)
1929 		return NULL;
1930 
1931 	__skb_queue_head_init(&queue);
1932 
1933 	cork->base.flags = 0;
1934 	cork->base.addr = 0;
1935 	cork->base.opt = NULL;
1936 	cork->base.dst = NULL;
1937 	v6_cork.opt = NULL;
1938 	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1939 	if (err) {
1940 		ip6_cork_release(cork, &v6_cork);
1941 		return ERR_PTR(err);
1942 	}
1943 	if (ipc6->dontfrag < 0)
1944 		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1945 
1946 	err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1947 				&current->task_frag, getfrag, from,
1948 				length + exthdrlen, transhdrlen + exthdrlen,
1949 				flags, ipc6);
1950 	if (err) {
1951 		__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1952 		return ERR_PTR(err);
1953 	}
1954 
1955 	return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1956 }
1957