xref: /linux/net/ipv6/tcp_ipv6.c (revision 2624f124b3b5d550ab2fbef7ee3bc0e1fed09722)
1 /*
2  *	TCP over IPv6
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9  *
10  *	Based on:
11  *	linux/net/ipv4/tcp.c
12  *	linux/net/ipv4/tcp_input.c
13  *	linux/net/ipv4/tcp_output.c
14  *
15  *	Fixes:
16  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
17  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
18  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
19  *					a single port at the same time.
20  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
21  *
22  *	This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  */
27 
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
36 #include <linux/in.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
43 
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
47 
48 #include <net/tcp.h>
49 #include <net/ndisc.h>
50 #include <net/inet6_hashtables.h>
51 #include <net/ipv6.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
58 #include <net/xfrm.h>
59 #include <net/addrconf.h>
60 #include <net/snmp.h>
61 #include <net/dsfield.h>
62 
63 #include <asm/uaccess.h>
64 
65 #include <linux/proc_fs.h>
66 #include <linux/seq_file.h>
67 
68 static void	tcp_v6_send_reset(struct sk_buff *skb);
69 static void	tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
70 static void	tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
71 				  struct sk_buff *skb);
72 
73 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
74 static int	tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
75 
76 static struct tcp_func ipv6_mapped;
77 static struct tcp_func ipv6_specific;
78 
79 static inline int tcp_v6_bind_conflict(const struct sock *sk,
80 				       const struct inet_bind_bucket *tb)
81 {
82 	const struct sock *sk2;
83 	const struct hlist_node *node;
84 
85 	/* We must walk the whole port owner list in this case. -DaveM */
86 	sk_for_each_bound(sk2, node, &tb->owners) {
87 		if (sk != sk2 &&
88 		    (!sk->sk_bound_dev_if ||
89 		     !sk2->sk_bound_dev_if ||
90 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
91 		    (!sk->sk_reuse || !sk2->sk_reuse ||
92 		     sk2->sk_state == TCP_LISTEN) &&
93 		     ipv6_rcv_saddr_equal(sk, sk2))
94 			break;
95 	}
96 
97 	return node != NULL;
98 }
99 
100 /* Grrr, addr_type already calculated by caller, but I don't want
101  * to add some silly "cookie" argument to this method just for that.
102  * But it doesn't matter, the recalculation is in the rarest path
103  * this function ever takes.
104  */
105 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
106 {
107 	struct inet_bind_hashbucket *head;
108 	struct inet_bind_bucket *tb;
109 	struct hlist_node *node;
110 	int ret;
111 
112 	local_bh_disable();
113 	if (snum == 0) {
114 		int low = sysctl_local_port_range[0];
115 		int high = sysctl_local_port_range[1];
116 		int remaining = (high - low) + 1;
117 		int rover;
118 
119 		spin_lock(&tcp_hashinfo.portalloc_lock);
120 		if (tcp_hashinfo.port_rover < low)
121 			rover = low;
122 		else
123 			rover = tcp_hashinfo.port_rover;
124 		do {	rover++;
125 			if (rover > high)
126 				rover = low;
127 			head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
128 			spin_lock(&head->lock);
129 			inet_bind_bucket_for_each(tb, node, &head->chain)
130 				if (tb->port == rover)
131 					goto next;
132 			break;
133 		next:
134 			spin_unlock(&head->lock);
135 		} while (--remaining > 0);
136 		tcp_hashinfo.port_rover = rover;
137 		spin_unlock(&tcp_hashinfo.portalloc_lock);
138 
139 		/* Exhausted local port range during search?  It is not
140 		 * possible for us to be holding one of the bind hash
141 		 * locks if this test triggers, because if 'remaining'
142 		 * drops to zero, we broke out of the do/while loop at
143 		 * the top level, not from the 'break;' statement.
144 		 */
145 		ret = 1;
146 		if (unlikely(remaining <= 0))
147 			goto fail;
148 
149 		/* OK, here is the one we will use. */
150 		snum = rover;
151 	} else {
152 		head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
153 		spin_lock(&head->lock);
154 		inet_bind_bucket_for_each(tb, node, &head->chain)
155 			if (tb->port == snum)
156 				goto tb_found;
157 	}
158 	tb = NULL;
159 	goto tb_not_found;
160 tb_found:
161 	if (tb && !hlist_empty(&tb->owners)) {
162 		if (tb->fastreuse > 0 && sk->sk_reuse &&
163 		    sk->sk_state != TCP_LISTEN) {
164 			goto success;
165 		} else {
166 			ret = 1;
167 			if (tcp_v6_bind_conflict(sk, tb))
168 				goto fail_unlock;
169 		}
170 	}
171 tb_not_found:
172 	ret = 1;
173 	if (tb == NULL) {
174 	       	tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
175 		if (tb == NULL)
176 			goto fail_unlock;
177 	}
178 	if (hlist_empty(&tb->owners)) {
179 		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
180 			tb->fastreuse = 1;
181 		else
182 			tb->fastreuse = 0;
183 	} else if (tb->fastreuse &&
184 		   (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
185 		tb->fastreuse = 0;
186 
187 success:
188 	if (!inet_csk(sk)->icsk_bind_hash)
189 		inet_bind_hash(sk, tb, snum);
190 	BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
191 	ret = 0;
192 
193 fail_unlock:
194 	spin_unlock(&head->lock);
195 fail:
196 	local_bh_enable();
197 	return ret;
198 }
199 
200 static __inline__ void __tcp_v6_hash(struct sock *sk)
201 {
202 	struct hlist_head *list;
203 	rwlock_t *lock;
204 
205 	BUG_TRAP(sk_unhashed(sk));
206 
207 	if (sk->sk_state == TCP_LISTEN) {
208 		list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
209 		lock = &tcp_hashinfo.lhash_lock;
210 		inet_listen_wlock(&tcp_hashinfo);
211 	} else {
212 		sk->sk_hashent = inet6_sk_ehashfn(sk, tcp_hashinfo.ehash_size);
213 		list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
214 		lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
215 		write_lock(lock);
216 	}
217 
218 	__sk_add_node(sk, list);
219 	sock_prot_inc_use(sk->sk_prot);
220 	write_unlock(lock);
221 }
222 
223 
224 static void tcp_v6_hash(struct sock *sk)
225 {
226 	if (sk->sk_state != TCP_CLOSE) {
227 		struct tcp_sock *tp = tcp_sk(sk);
228 
229 		if (tp->af_specific == &ipv6_mapped) {
230 			tcp_prot.hash(sk);
231 			return;
232 		}
233 		local_bh_disable();
234 		__tcp_v6_hash(sk);
235 		local_bh_enable();
236 	}
237 }
238 
239 /*
240  * Open request hash tables.
241  */
242 
243 static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
244 {
245 	u32 a, b, c;
246 
247 	a = raddr->s6_addr32[0];
248 	b = raddr->s6_addr32[1];
249 	c = raddr->s6_addr32[2];
250 
251 	a += JHASH_GOLDEN_RATIO;
252 	b += JHASH_GOLDEN_RATIO;
253 	c += rnd;
254 	__jhash_mix(a, b, c);
255 
256 	a += raddr->s6_addr32[3];
257 	b += (u32) rport;
258 	__jhash_mix(a, b, c);
259 
260 	return c & (TCP_SYNQ_HSIZE - 1);
261 }
262 
263 static struct request_sock *tcp_v6_search_req(const struct sock *sk,
264 					      struct request_sock ***prevp,
265 					      __u16 rport,
266 					      struct in6_addr *raddr,
267 					      struct in6_addr *laddr,
268 					      int iif)
269 {
270 	const struct inet_connection_sock *icsk = inet_csk(sk);
271 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
272 	struct request_sock *req, **prev;
273 
274 	for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
275 	     (req = *prev) != NULL;
276 	     prev = &req->dl_next) {
277 		const struct tcp6_request_sock *treq = tcp6_rsk(req);
278 
279 		if (inet_rsk(req)->rmt_port == rport &&
280 		    req->rsk_ops->family == AF_INET6 &&
281 		    ipv6_addr_equal(&treq->rmt_addr, raddr) &&
282 		    ipv6_addr_equal(&treq->loc_addr, laddr) &&
283 		    (!treq->iif || treq->iif == iif)) {
284 			BUG_TRAP(req->sk == NULL);
285 			*prevp = prev;
286 			return req;
287 		}
288 	}
289 
290 	return NULL;
291 }
292 
293 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
294 				   struct in6_addr *saddr,
295 				   struct in6_addr *daddr,
296 				   unsigned long base)
297 {
298 	return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
299 }
300 
301 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
302 {
303 	if (skb->protocol == htons(ETH_P_IPV6)) {
304 		return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
305 						    skb->nh.ipv6h->saddr.s6_addr32,
306 						    skb->h.th->dest,
307 						    skb->h.th->source);
308 	} else {
309 		return secure_tcp_sequence_number(skb->nh.iph->daddr,
310 						  skb->nh.iph->saddr,
311 						  skb->h.th->dest,
312 						  skb->h.th->source);
313 	}
314 }
315 
316 static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
317 				      struct inet_timewait_sock **twp)
318 {
319 	struct inet_sock *inet = inet_sk(sk);
320 	const struct ipv6_pinfo *np = inet6_sk(sk);
321 	const struct in6_addr *daddr = &np->rcv_saddr;
322 	const struct in6_addr *saddr = &np->daddr;
323 	const int dif = sk->sk_bound_dev_if;
324 	const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
325 	const int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport,
326 				       tcp_hashinfo.ehash_size);
327 	struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
328 	struct sock *sk2;
329 	const struct hlist_node *node;
330 	struct inet_timewait_sock *tw;
331 
332 	write_lock(&head->lock);
333 
334 	/* Check TIME-WAIT sockets first. */
335 	sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
336 		const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
337 
338 		tw = inet_twsk(sk2);
339 
340 		if(*((__u32 *)&(tw->tw_dport))	== ports	&&
341 		   sk2->sk_family		== PF_INET6	&&
342 		   ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr)	&&
343 		   ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr)	&&
344 		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
345 			const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
346 			struct tcp_sock *tp = tcp_sk(sk);
347 
348 			if (tcptw->tw_ts_recent_stamp &&
349 			    (!twp ||
350 			     (sysctl_tcp_tw_reuse &&
351 			      xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
352 				/* See comment in tcp_ipv4.c */
353 				tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
354 				if (!tp->write_seq)
355 					tp->write_seq = 1;
356 				tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
357 				tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
358 				sock_hold(sk2);
359 				goto unique;
360 			} else
361 				goto not_unique;
362 		}
363 	}
364 	tw = NULL;
365 
366 	/* And established part... */
367 	sk_for_each(sk2, node, &head->chain) {
368 		if (INET6_MATCH(sk2, saddr, daddr, ports, dif))
369 			goto not_unique;
370 	}
371 
372 unique:
373 	BUG_TRAP(sk_unhashed(sk));
374 	__sk_add_node(sk, &head->chain);
375 	sk->sk_hashent = hash;
376 	sock_prot_inc_use(sk->sk_prot);
377 	write_unlock(&head->lock);
378 
379 	if (twp) {
380 		*twp = tw;
381 		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
382 	} else if (tw) {
383 		/* Silly. Should hash-dance instead... */
384 		inet_twsk_deschedule(tw, &tcp_death_row);
385 		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
386 
387 		inet_twsk_put(tw);
388 	}
389 	return 0;
390 
391 not_unique:
392 	write_unlock(&head->lock);
393 	return -EADDRNOTAVAIL;
394 }
395 
396 static inline u32 tcpv6_port_offset(const struct sock *sk)
397 {
398 	const struct inet_sock *inet = inet_sk(sk);
399 	const struct ipv6_pinfo *np = inet6_sk(sk);
400 
401 	return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
402 					   np->daddr.s6_addr32,
403 					   inet->dport);
404 }
405 
406 static int tcp_v6_hash_connect(struct sock *sk)
407 {
408 	unsigned short snum = inet_sk(sk)->num;
409  	struct inet_bind_hashbucket *head;
410  	struct inet_bind_bucket *tb;
411 	int ret;
412 
413  	if (!snum) {
414  		int low = sysctl_local_port_range[0];
415  		int high = sysctl_local_port_range[1];
416 		int range = high - low;
417  		int i;
418 		int port;
419 		static u32 hint;
420 		u32 offset = hint + tcpv6_port_offset(sk);
421 		struct hlist_node *node;
422  		struct inet_timewait_sock *tw = NULL;
423 
424  		local_bh_disable();
425 		for (i = 1; i <= range; i++) {
426 			port = low + (i + offset) % range;
427  			head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
428  			spin_lock(&head->lock);
429 
430  			/* Does not bother with rcv_saddr checks,
431  			 * because the established check is already
432  			 * unique enough.
433  			 */
434 			inet_bind_bucket_for_each(tb, node, &head->chain) {
435  				if (tb->port == port) {
436  					BUG_TRAP(!hlist_empty(&tb->owners));
437  					if (tb->fastreuse >= 0)
438  						goto next_port;
439  					if (!__tcp_v6_check_established(sk,
440 									port,
441 									&tw))
442  						goto ok;
443  					goto next_port;
444  				}
445  			}
446 
447  			tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
448  			if (!tb) {
449  				spin_unlock(&head->lock);
450  				break;
451  			}
452  			tb->fastreuse = -1;
453  			goto ok;
454 
455  		next_port:
456  			spin_unlock(&head->lock);
457  		}
458  		local_bh_enable();
459 
460  		return -EADDRNOTAVAIL;
461 
462 ok:
463 		hint += i;
464 
465  		/* Head lock still held and bh's disabled */
466  		inet_bind_hash(sk, tb, port);
467 		if (sk_unhashed(sk)) {
468  			inet_sk(sk)->sport = htons(port);
469  			__tcp_v6_hash(sk);
470  		}
471  		spin_unlock(&head->lock);
472 
473  		if (tw) {
474  			inet_twsk_deschedule(tw, &tcp_death_row);
475  			inet_twsk_put(tw);
476  		}
477 
478 		ret = 0;
479 		goto out;
480  	}
481 
482  	head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
483  	tb   = inet_csk(sk)->icsk_bind_hash;
484 	spin_lock_bh(&head->lock);
485 
486 	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
487 		__tcp_v6_hash(sk);
488 		spin_unlock_bh(&head->lock);
489 		return 0;
490 	} else {
491 		spin_unlock(&head->lock);
492 		/* No definite answer... Walk to established hash table */
493 		ret = __tcp_v6_check_established(sk, snum, NULL);
494 out:
495 		local_bh_enable();
496 		return ret;
497 	}
498 }
499 
500 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
501 			  int addr_len)
502 {
503 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
504 	struct inet_sock *inet = inet_sk(sk);
505 	struct ipv6_pinfo *np = inet6_sk(sk);
506 	struct tcp_sock *tp = tcp_sk(sk);
507 	struct in6_addr *saddr = NULL, *final_p = NULL, final;
508 	struct flowi fl;
509 	struct dst_entry *dst;
510 	int addr_type;
511 	int err;
512 
513 	if (addr_len < SIN6_LEN_RFC2133)
514 		return -EINVAL;
515 
516 	if (usin->sin6_family != AF_INET6)
517 		return(-EAFNOSUPPORT);
518 
519 	memset(&fl, 0, sizeof(fl));
520 
521 	if (np->sndflow) {
522 		fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
523 		IP6_ECN_flow_init(fl.fl6_flowlabel);
524 		if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
525 			struct ip6_flowlabel *flowlabel;
526 			flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
527 			if (flowlabel == NULL)
528 				return -EINVAL;
529 			ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
530 			fl6_sock_release(flowlabel);
531 		}
532 	}
533 
534 	/*
535   	 *	connect() to INADDR_ANY means loopback (BSD'ism).
536   	 */
537 
538   	if(ipv6_addr_any(&usin->sin6_addr))
539 		usin->sin6_addr.s6_addr[15] = 0x1;
540 
541 	addr_type = ipv6_addr_type(&usin->sin6_addr);
542 
543 	if(addr_type & IPV6_ADDR_MULTICAST)
544 		return -ENETUNREACH;
545 
546 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
547 		if (addr_len >= sizeof(struct sockaddr_in6) &&
548 		    usin->sin6_scope_id) {
549 			/* If interface is set while binding, indices
550 			 * must coincide.
551 			 */
552 			if (sk->sk_bound_dev_if &&
553 			    sk->sk_bound_dev_if != usin->sin6_scope_id)
554 				return -EINVAL;
555 
556 			sk->sk_bound_dev_if = usin->sin6_scope_id;
557 		}
558 
559 		/* Connect to link-local address requires an interface */
560 		if (!sk->sk_bound_dev_if)
561 			return -EINVAL;
562 	}
563 
564 	if (tp->rx_opt.ts_recent_stamp &&
565 	    !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
566 		tp->rx_opt.ts_recent = 0;
567 		tp->rx_opt.ts_recent_stamp = 0;
568 		tp->write_seq = 0;
569 	}
570 
571 	ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
572 	np->flow_label = fl.fl6_flowlabel;
573 
574 	/*
575 	 *	TCP over IPv4
576 	 */
577 
578 	if (addr_type == IPV6_ADDR_MAPPED) {
579 		u32 exthdrlen = tp->ext_header_len;
580 		struct sockaddr_in sin;
581 
582 		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
583 
584 		if (__ipv6_only_sock(sk))
585 			return -ENETUNREACH;
586 
587 		sin.sin_family = AF_INET;
588 		sin.sin_port = usin->sin6_port;
589 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
590 
591 		tp->af_specific = &ipv6_mapped;
592 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
593 
594 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
595 
596 		if (err) {
597 			tp->ext_header_len = exthdrlen;
598 			tp->af_specific = &ipv6_specific;
599 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
600 			goto failure;
601 		} else {
602 			ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
603 				      inet->saddr);
604 			ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
605 				      inet->rcv_saddr);
606 		}
607 
608 		return err;
609 	}
610 
611 	if (!ipv6_addr_any(&np->rcv_saddr))
612 		saddr = &np->rcv_saddr;
613 
614 	fl.proto = IPPROTO_TCP;
615 	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
616 	ipv6_addr_copy(&fl.fl6_src,
617 		       (saddr ? saddr : &np->saddr));
618 	fl.oif = sk->sk_bound_dev_if;
619 	fl.fl_ip_dport = usin->sin6_port;
620 	fl.fl_ip_sport = inet->sport;
621 
622 	if (np->opt && np->opt->srcrt) {
623 		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
624 		ipv6_addr_copy(&final, &fl.fl6_dst);
625 		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
626 		final_p = &final;
627 	}
628 
629 	err = ip6_dst_lookup(sk, &dst, &fl);
630 	if (err)
631 		goto failure;
632 	if (final_p)
633 		ipv6_addr_copy(&fl.fl6_dst, final_p);
634 
635 	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
636 		goto failure;
637 
638 	if (saddr == NULL) {
639 		saddr = &fl.fl6_src;
640 		ipv6_addr_copy(&np->rcv_saddr, saddr);
641 	}
642 
643 	/* set the source address */
644 	ipv6_addr_copy(&np->saddr, saddr);
645 	inet->rcv_saddr = LOOPBACK4_IPV6;
646 
647 	ip6_dst_store(sk, dst, NULL);
648 	sk->sk_route_caps = dst->dev->features &
649 		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
650 
651 	tp->ext_header_len = 0;
652 	if (np->opt)
653 		tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
654 
655 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
656 
657 	inet->dport = usin->sin6_port;
658 
659 	tcp_set_state(sk, TCP_SYN_SENT);
660 	err = tcp_v6_hash_connect(sk);
661 	if (err)
662 		goto late_failure;
663 
664 	if (!tp->write_seq)
665 		tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
666 							     np->daddr.s6_addr32,
667 							     inet->sport,
668 							     inet->dport);
669 
670 	err = tcp_connect(sk);
671 	if (err)
672 		goto late_failure;
673 
674 	return 0;
675 
676 late_failure:
677 	tcp_set_state(sk, TCP_CLOSE);
678 	__sk_dst_reset(sk);
679 failure:
680 	inet->dport = 0;
681 	sk->sk_route_caps = 0;
682 	return err;
683 }
684 
685 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
686 		int type, int code, int offset, __u32 info)
687 {
688 	struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
689 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
690 	struct ipv6_pinfo *np;
691 	struct sock *sk;
692 	int err;
693 	struct tcp_sock *tp;
694 	__u32 seq;
695 
696 	sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
697 			  th->source, skb->dev->ifindex);
698 
699 	if (sk == NULL) {
700 		ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
701 		return;
702 	}
703 
704 	if (sk->sk_state == TCP_TIME_WAIT) {
705 		inet_twsk_put((struct inet_timewait_sock *)sk);
706 		return;
707 	}
708 
709 	bh_lock_sock(sk);
710 	if (sock_owned_by_user(sk))
711 		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
712 
713 	if (sk->sk_state == TCP_CLOSE)
714 		goto out;
715 
716 	tp = tcp_sk(sk);
717 	seq = ntohl(th->seq);
718 	if (sk->sk_state != TCP_LISTEN &&
719 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
720 		NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
721 		goto out;
722 	}
723 
724 	np = inet6_sk(sk);
725 
726 	if (type == ICMPV6_PKT_TOOBIG) {
727 		struct dst_entry *dst = NULL;
728 
729 		if (sock_owned_by_user(sk))
730 			goto out;
731 		if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
732 			goto out;
733 
734 		/* icmp should have updated the destination cache entry */
735 		dst = __sk_dst_check(sk, np->dst_cookie);
736 
737 		if (dst == NULL) {
738 			struct inet_sock *inet = inet_sk(sk);
739 			struct flowi fl;
740 
741 			/* BUGGG_FUTURE: Again, it is not clear how
742 			   to handle rthdr case. Ignore this complexity
743 			   for now.
744 			 */
745 			memset(&fl, 0, sizeof(fl));
746 			fl.proto = IPPROTO_TCP;
747 			ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
748 			ipv6_addr_copy(&fl.fl6_src, &np->saddr);
749 			fl.oif = sk->sk_bound_dev_if;
750 			fl.fl_ip_dport = inet->dport;
751 			fl.fl_ip_sport = inet->sport;
752 
753 			if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
754 				sk->sk_err_soft = -err;
755 				goto out;
756 			}
757 
758 			if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
759 				sk->sk_err_soft = -err;
760 				goto out;
761 			}
762 
763 		} else
764 			dst_hold(dst);
765 
766 		if (tp->pmtu_cookie > dst_mtu(dst)) {
767 			tcp_sync_mss(sk, dst_mtu(dst));
768 			tcp_simple_retransmit(sk);
769 		} /* else let the usual retransmit timer handle it */
770 		dst_release(dst);
771 		goto out;
772 	}
773 
774 	icmpv6_err_convert(type, code, &err);
775 
776 	/* Might be for an request_sock */
777 	switch (sk->sk_state) {
778 		struct request_sock *req, **prev;
779 	case TCP_LISTEN:
780 		if (sock_owned_by_user(sk))
781 			goto out;
782 
783 		req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
784 					&hdr->saddr, inet6_iif(skb));
785 		if (!req)
786 			goto out;
787 
788 		/* ICMPs are not backlogged, hence we cannot get
789 		 * an established socket here.
790 		 */
791 		BUG_TRAP(req->sk == NULL);
792 
793 		if (seq != tcp_rsk(req)->snt_isn) {
794 			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
795 			goto out;
796 		}
797 
798 		inet_csk_reqsk_queue_drop(sk, req, prev);
799 		goto out;
800 
801 	case TCP_SYN_SENT:
802 	case TCP_SYN_RECV:  /* Cannot happen.
803 			       It can, it SYNs are crossed. --ANK */
804 		if (!sock_owned_by_user(sk)) {
805 			TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
806 			sk->sk_err = err;
807 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
808 
809 			tcp_done(sk);
810 		} else
811 			sk->sk_err_soft = err;
812 		goto out;
813 	}
814 
815 	if (!sock_owned_by_user(sk) && np->recverr) {
816 		sk->sk_err = err;
817 		sk->sk_error_report(sk);
818 	} else
819 		sk->sk_err_soft = err;
820 
821 out:
822 	bh_unlock_sock(sk);
823 	sock_put(sk);
824 }
825 
826 
827 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
828 			      struct dst_entry *dst)
829 {
830 	struct tcp6_request_sock *treq = tcp6_rsk(req);
831 	struct ipv6_pinfo *np = inet6_sk(sk);
832 	struct sk_buff * skb;
833 	struct ipv6_txoptions *opt = NULL;
834 	struct in6_addr * final_p = NULL, final;
835 	struct flowi fl;
836 	int err = -1;
837 
838 	memset(&fl, 0, sizeof(fl));
839 	fl.proto = IPPROTO_TCP;
840 	ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
841 	ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
842 	fl.fl6_flowlabel = 0;
843 	fl.oif = treq->iif;
844 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
845 	fl.fl_ip_sport = inet_sk(sk)->sport;
846 
847 	if (dst == NULL) {
848 		opt = np->opt;
849 		if (opt == NULL &&
850 		    np->rxopt.bits.osrcrt == 2 &&
851 		    treq->pktopts) {
852 			struct sk_buff *pktopts = treq->pktopts;
853 			struct inet6_skb_parm *rxopt = IP6CB(pktopts);
854 			if (rxopt->srcrt)
855 				opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
856 		}
857 
858 		if (opt && opt->srcrt) {
859 			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
860 			ipv6_addr_copy(&final, &fl.fl6_dst);
861 			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
862 			final_p = &final;
863 		}
864 
865 		err = ip6_dst_lookup(sk, &dst, &fl);
866 		if (err)
867 			goto done;
868 		if (final_p)
869 			ipv6_addr_copy(&fl.fl6_dst, final_p);
870 		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
871 			goto done;
872 	}
873 
874 	skb = tcp_make_synack(sk, dst, req);
875 	if (skb) {
876 		struct tcphdr *th = skb->h.th;
877 
878 		th->check = tcp_v6_check(th, skb->len,
879 					 &treq->loc_addr, &treq->rmt_addr,
880 					 csum_partial((char *)th, skb->len, skb->csum));
881 
882 		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
883 		err = ip6_xmit(sk, skb, &fl, opt, 0);
884 		if (err == NET_XMIT_CN)
885 			err = 0;
886 	}
887 
888 done:
889         if (opt && opt != np->opt)
890 		sock_kfree_s(sk, opt, opt->tot_len);
891 	return err;
892 }
893 
894 static void tcp_v6_reqsk_destructor(struct request_sock *req)
895 {
896 	if (tcp6_rsk(req)->pktopts)
897 		kfree_skb(tcp6_rsk(req)->pktopts);
898 }
899 
900 static struct request_sock_ops tcp6_request_sock_ops = {
901 	.family		=	AF_INET6,
902 	.obj_size	=	sizeof(struct tcp6_request_sock),
903 	.rtx_syn_ack	=	tcp_v6_send_synack,
904 	.send_ack	=	tcp_v6_reqsk_send_ack,
905 	.destructor	=	tcp_v6_reqsk_destructor,
906 	.send_reset	=	tcp_v6_send_reset
907 };
908 
909 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
910 {
911 	struct ipv6_pinfo *np = inet6_sk(sk);
912 	struct inet6_skb_parm *opt = IP6CB(skb);
913 
914 	if (np->rxopt.all) {
915 		if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
916 		    ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
917 		    (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
918 		    ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
919 			return 1;
920 	}
921 	return 0;
922 }
923 
924 
925 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
926 			      struct sk_buff *skb)
927 {
928 	struct ipv6_pinfo *np = inet6_sk(sk);
929 
930 	if (skb->ip_summed == CHECKSUM_HW) {
931 		th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
932 		skb->csum = offsetof(struct tcphdr, check);
933 	} else {
934 		th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
935 					    csum_partial((char *)th, th->doff<<2,
936 							 skb->csum));
937 	}
938 }
939 
940 
941 static void tcp_v6_send_reset(struct sk_buff *skb)
942 {
943 	struct tcphdr *th = skb->h.th, *t1;
944 	struct sk_buff *buff;
945 	struct flowi fl;
946 
947 	if (th->rst)
948 		return;
949 
950 	if (!ipv6_unicast_destination(skb))
951 		return;
952 
953 	/*
954 	 * We need to grab some memory, and put together an RST,
955 	 * and then put it into the queue to be sent.
956 	 */
957 
958 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
959 			 GFP_ATOMIC);
960 	if (buff == NULL)
961 	  	return;
962 
963 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
964 
965 	t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
966 
967 	/* Swap the send and the receive. */
968 	memset(t1, 0, sizeof(*t1));
969 	t1->dest = th->source;
970 	t1->source = th->dest;
971 	t1->doff = sizeof(*t1)/4;
972 	t1->rst = 1;
973 
974 	if(th->ack) {
975 	  	t1->seq = th->ack_seq;
976 	} else {
977 		t1->ack = 1;
978 		t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
979 				    + skb->len - (th->doff<<2));
980 	}
981 
982 	buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
983 
984 	memset(&fl, 0, sizeof(fl));
985 	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
986 	ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
987 
988 	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
989 				    sizeof(*t1), IPPROTO_TCP,
990 				    buff->csum);
991 
992 	fl.proto = IPPROTO_TCP;
993 	fl.oif = inet6_iif(skb);
994 	fl.fl_ip_dport = t1->dest;
995 	fl.fl_ip_sport = t1->source;
996 
997 	/* sk = NULL, but it is safe for now. RST socket required. */
998 	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
999 
1000 		if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0)
1001 			return;
1002 
1003 		ip6_xmit(NULL, buff, &fl, NULL, 0);
1004 		TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1005 		TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1006 		return;
1007 	}
1008 
1009 	kfree_skb(buff);
1010 }
1011 
1012 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1013 {
1014 	struct tcphdr *th = skb->h.th, *t1;
1015 	struct sk_buff *buff;
1016 	struct flowi fl;
1017 	int tot_len = sizeof(struct tcphdr);
1018 
1019 	if (ts)
1020 		tot_len += 3*4;
1021 
1022 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1023 			 GFP_ATOMIC);
1024 	if (buff == NULL)
1025 		return;
1026 
1027 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1028 
1029 	t1 = (struct tcphdr *) skb_push(buff,tot_len);
1030 
1031 	/* Swap the send and the receive. */
1032 	memset(t1, 0, sizeof(*t1));
1033 	t1->dest = th->source;
1034 	t1->source = th->dest;
1035 	t1->doff = tot_len/4;
1036 	t1->seq = htonl(seq);
1037 	t1->ack_seq = htonl(ack);
1038 	t1->ack = 1;
1039 	t1->window = htons(win);
1040 
1041 	if (ts) {
1042 		u32 *ptr = (u32*)(t1 + 1);
1043 		*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1044 			       (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1045 		*ptr++ = htonl(tcp_time_stamp);
1046 		*ptr = htonl(ts);
1047 	}
1048 
1049 	buff->csum = csum_partial((char *)t1, tot_len, 0);
1050 
1051 	memset(&fl, 0, sizeof(fl));
1052 	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1053 	ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1054 
1055 	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1056 				    tot_len, IPPROTO_TCP,
1057 				    buff->csum);
1058 
1059 	fl.proto = IPPROTO_TCP;
1060 	fl.oif = inet6_iif(skb);
1061 	fl.fl_ip_dport = t1->dest;
1062 	fl.fl_ip_sport = t1->source;
1063 
1064 	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1065 		if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0)
1066 			return;
1067 		ip6_xmit(NULL, buff, &fl, NULL, 0);
1068 		TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1069 		return;
1070 	}
1071 
1072 	kfree_skb(buff);
1073 }
1074 
1075 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1076 {
1077 	struct inet_timewait_sock *tw = inet_twsk(sk);
1078 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1079 
1080 	tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1081 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1082 			tcptw->tw_ts_recent);
1083 
1084 	inet_twsk_put(tw);
1085 }
1086 
1087 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1088 {
1089 	tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1090 }
1091 
1092 
1093 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1094 {
1095 	struct request_sock *req, **prev;
1096 	const struct tcphdr *th = skb->h.th;
1097 	struct sock *nsk;
1098 
1099 	/* Find possible connection requests. */
1100 	req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
1101 				&skb->nh.ipv6h->daddr, inet6_iif(skb));
1102 	if (req)
1103 		return tcp_check_req(sk, skb, req, prev);
1104 
1105 	nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
1106 					 th->source, &skb->nh.ipv6h->daddr,
1107 					 ntohs(th->dest), inet6_iif(skb));
1108 
1109 	if (nsk) {
1110 		if (nsk->sk_state != TCP_TIME_WAIT) {
1111 			bh_lock_sock(nsk);
1112 			return nsk;
1113 		}
1114 		inet_twsk_put((struct inet_timewait_sock *)nsk);
1115 		return NULL;
1116 	}
1117 
1118 #if 0 /*def CONFIG_SYN_COOKIES*/
1119 	if (!th->rst && !th->syn && th->ack)
1120 		sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1121 #endif
1122 	return sk;
1123 }
1124 
1125 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1126 {
1127 	struct inet_connection_sock *icsk = inet_csk(sk);
1128 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1129 	const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1130 
1131 	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1132 	inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
1133 }
1134 
1135 
1136 /* FIXME: this is substantially similar to the ipv4 code.
1137  * Can some kind of merge be done? -- erics
1138  */
1139 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1140 {
1141 	struct tcp6_request_sock *treq;
1142 	struct ipv6_pinfo *np = inet6_sk(sk);
1143 	struct tcp_options_received tmp_opt;
1144 	struct tcp_sock *tp = tcp_sk(sk);
1145 	struct request_sock *req = NULL;
1146 	__u32 isn = TCP_SKB_CB(skb)->when;
1147 
1148 	if (skb->protocol == htons(ETH_P_IP))
1149 		return tcp_v4_conn_request(sk, skb);
1150 
1151 	if (!ipv6_unicast_destination(skb))
1152 		goto drop;
1153 
1154 	/*
1155 	 *	There are no SYN attacks on IPv6, yet...
1156 	 */
1157 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1158 		if (net_ratelimit())
1159 			printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1160 		goto drop;
1161 	}
1162 
1163 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1164 		goto drop;
1165 
1166 	req = reqsk_alloc(&tcp6_request_sock_ops);
1167 	if (req == NULL)
1168 		goto drop;
1169 
1170 	tcp_clear_options(&tmp_opt);
1171 	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1172 	tmp_opt.user_mss = tp->rx_opt.user_mss;
1173 
1174 	tcp_parse_options(skb, &tmp_opt, 0);
1175 
1176 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1177 	tcp_openreq_init(req, &tmp_opt, skb);
1178 
1179 	treq = tcp6_rsk(req);
1180 	ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1181 	ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1182 	TCP_ECN_create_request(req, skb->h.th);
1183 	treq->pktopts = NULL;
1184 	if (ipv6_opt_accepted(sk, skb) ||
1185 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1186 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1187 		atomic_inc(&skb->users);
1188 		treq->pktopts = skb;
1189 	}
1190 	treq->iif = sk->sk_bound_dev_if;
1191 
1192 	/* So that link locals have meaning */
1193 	if (!sk->sk_bound_dev_if &&
1194 	    ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1195 		treq->iif = inet6_iif(skb);
1196 
1197 	if (isn == 0)
1198 		isn = tcp_v6_init_sequence(sk,skb);
1199 
1200 	tcp_rsk(req)->snt_isn = isn;
1201 
1202 	if (tcp_v6_send_synack(sk, req, NULL))
1203 		goto drop;
1204 
1205 	tcp_v6_synq_add(sk, req);
1206 
1207 	return 0;
1208 
1209 drop:
1210 	if (req)
1211 		reqsk_free(req);
1212 
1213 	TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1214 	return 0; /* don't send reset */
1215 }
1216 
1217 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1218 					  struct request_sock *req,
1219 					  struct dst_entry *dst)
1220 {
1221 	struct tcp6_request_sock *treq = tcp6_rsk(req);
1222 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1223 	struct tcp6_sock *newtcp6sk;
1224 	struct inet_sock *newinet;
1225 	struct tcp_sock *newtp;
1226 	struct sock *newsk;
1227 	struct ipv6_txoptions *opt;
1228 
1229 	if (skb->protocol == htons(ETH_P_IP)) {
1230 		/*
1231 		 *	v6 mapped
1232 		 */
1233 
1234 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1235 
1236 		if (newsk == NULL)
1237 			return NULL;
1238 
1239 		newtcp6sk = (struct tcp6_sock *)newsk;
1240 		inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1241 
1242 		newinet = inet_sk(newsk);
1243 		newnp = inet6_sk(newsk);
1244 		newtp = tcp_sk(newsk);
1245 
1246 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1247 
1248 		ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1249 			      newinet->daddr);
1250 
1251 		ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1252 			      newinet->saddr);
1253 
1254 		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1255 
1256 		newtp->af_specific = &ipv6_mapped;
1257 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1258 		newnp->pktoptions  = NULL;
1259 		newnp->opt	   = NULL;
1260 		newnp->mcast_oif   = inet6_iif(skb);
1261 		newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
1262 
1263 		/*
1264 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1265 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1266 		 * that function for the gory details. -acme
1267 		 */
1268 
1269 		/* It is tricky place. Until this moment IPv4 tcp
1270 		   worked with IPv6 af_tcp.af_specific.
1271 		   Sync it now.
1272 		 */
1273 		tcp_sync_mss(newsk, newtp->pmtu_cookie);
1274 
1275 		return newsk;
1276 	}
1277 
1278 	opt = np->opt;
1279 
1280 	if (sk_acceptq_is_full(sk))
1281 		goto out_overflow;
1282 
1283 	if (np->rxopt.bits.osrcrt == 2 &&
1284 	    opt == NULL && treq->pktopts) {
1285 		struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1286 		if (rxopt->srcrt)
1287 			opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1288 	}
1289 
1290 	if (dst == NULL) {
1291 		struct in6_addr *final_p = NULL, final;
1292 		struct flowi fl;
1293 
1294 		memset(&fl, 0, sizeof(fl));
1295 		fl.proto = IPPROTO_TCP;
1296 		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1297 		if (opt && opt->srcrt) {
1298 			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1299 			ipv6_addr_copy(&final, &fl.fl6_dst);
1300 			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1301 			final_p = &final;
1302 		}
1303 		ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1304 		fl.oif = sk->sk_bound_dev_if;
1305 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1306 		fl.fl_ip_sport = inet_sk(sk)->sport;
1307 
1308 		if (ip6_dst_lookup(sk, &dst, &fl))
1309 			goto out;
1310 
1311 		if (final_p)
1312 			ipv6_addr_copy(&fl.fl6_dst, final_p);
1313 
1314 		if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1315 			goto out;
1316 	}
1317 
1318 	newsk = tcp_create_openreq_child(sk, req, skb);
1319 	if (newsk == NULL)
1320 		goto out;
1321 
1322 	/*
1323 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1324 	 * count here, tcp_create_openreq_child now does this for us, see the
1325 	 * comment in that function for the gory details. -acme
1326 	 */
1327 
1328 	ip6_dst_store(newsk, dst, NULL);
1329 	newsk->sk_route_caps = dst->dev->features &
1330 		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1331 
1332 	newtcp6sk = (struct tcp6_sock *)newsk;
1333 	inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1334 
1335 	newtp = tcp_sk(newsk);
1336 	newinet = inet_sk(newsk);
1337 	newnp = inet6_sk(newsk);
1338 
1339 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1340 
1341 	ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1342 	ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1343 	ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1344 	newsk->sk_bound_dev_if = treq->iif;
1345 
1346 	/* Now IPv6 options...
1347 
1348 	   First: no IPv4 options.
1349 	 */
1350 	newinet->opt = NULL;
1351 
1352 	/* Clone RX bits */
1353 	newnp->rxopt.all = np->rxopt.all;
1354 
1355 	/* Clone pktoptions received with SYN */
1356 	newnp->pktoptions = NULL;
1357 	if (treq->pktopts != NULL) {
1358 		newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1359 		kfree_skb(treq->pktopts);
1360 		treq->pktopts = NULL;
1361 		if (newnp->pktoptions)
1362 			skb_set_owner_r(newnp->pktoptions, newsk);
1363 	}
1364 	newnp->opt	  = NULL;
1365 	newnp->mcast_oif  = inet6_iif(skb);
1366 	newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1367 
1368 	/* Clone native IPv6 options from listening socket (if any)
1369 
1370 	   Yes, keeping reference count would be much more clever,
1371 	   but we make one more one thing there: reattach optmem
1372 	   to newsk.
1373 	 */
1374 	if (opt) {
1375 		newnp->opt = ipv6_dup_options(newsk, opt);
1376 		if (opt != np->opt)
1377 			sock_kfree_s(sk, opt, opt->tot_len);
1378 	}
1379 
1380 	newtp->ext_header_len = 0;
1381 	if (newnp->opt)
1382 		newtp->ext_header_len = newnp->opt->opt_nflen +
1383 					newnp->opt->opt_flen;
1384 
1385 	tcp_sync_mss(newsk, dst_mtu(dst));
1386 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1387 	tcp_initialize_rcv_mss(newsk);
1388 
1389 	newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1390 
1391 	__tcp_v6_hash(newsk);
1392 	inet_inherit_port(&tcp_hashinfo, sk, newsk);
1393 
1394 	return newsk;
1395 
1396 out_overflow:
1397 	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1398 out:
1399 	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1400 	if (opt && opt != np->opt)
1401 		sock_kfree_s(sk, opt, opt->tot_len);
1402 	dst_release(dst);
1403 	return NULL;
1404 }
1405 
1406 static int tcp_v6_checksum_init(struct sk_buff *skb)
1407 {
1408 	if (skb->ip_summed == CHECKSUM_HW) {
1409 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1410 		if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1411 				  &skb->nh.ipv6h->daddr,skb->csum))
1412 			return 0;
1413 		LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
1414 	}
1415 	if (skb->len <= 76) {
1416 		if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1417 				 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1418 			return -1;
1419 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1420 	} else {
1421 		skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1422 					  &skb->nh.ipv6h->daddr,0);
1423 	}
1424 	return 0;
1425 }
1426 
1427 /* The socket must have it's spinlock held when we get
1428  * here.
1429  *
1430  * We have a potential double-lock case here, so even when
1431  * doing backlog processing we use the BH locking scheme.
1432  * This is because we cannot sleep with the original spinlock
1433  * held.
1434  */
1435 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1436 {
1437 	struct ipv6_pinfo *np = inet6_sk(sk);
1438 	struct tcp_sock *tp;
1439 	struct sk_buff *opt_skb = NULL;
1440 
1441 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1442 	   goes to IPv4 receive handler and backlogged.
1443 	   From backlog it always goes here. Kerboom...
1444 	   Fortunately, tcp_rcv_established and rcv_established
1445 	   handle them correctly, but it is not case with
1446 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1447 	 */
1448 
1449 	if (skb->protocol == htons(ETH_P_IP))
1450 		return tcp_v4_do_rcv(sk, skb);
1451 
1452 	if (sk_filter(sk, skb, 0))
1453 		goto discard;
1454 
1455 	/*
1456 	 *	socket locking is here for SMP purposes as backlog rcv
1457 	 *	is currently called with bh processing disabled.
1458 	 */
1459 
1460 	/* Do Stevens' IPV6_PKTOPTIONS.
1461 
1462 	   Yes, guys, it is the only place in our code, where we
1463 	   may make it not affecting IPv4.
1464 	   The rest of code is protocol independent,
1465 	   and I do not like idea to uglify IPv4.
1466 
1467 	   Actually, all the idea behind IPV6_PKTOPTIONS
1468 	   looks not very well thought. For now we latch
1469 	   options, received in the last packet, enqueued
1470 	   by tcp. Feel free to propose better solution.
1471 	                                       --ANK (980728)
1472 	 */
1473 	if (np->rxopt.all)
1474 		opt_skb = skb_clone(skb, GFP_ATOMIC);
1475 
1476 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1477 		TCP_CHECK_TIMER(sk);
1478 		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1479 			goto reset;
1480 		TCP_CHECK_TIMER(sk);
1481 		if (opt_skb)
1482 			goto ipv6_pktoptions;
1483 		return 0;
1484 	}
1485 
1486 	if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1487 		goto csum_err;
1488 
1489 	if (sk->sk_state == TCP_LISTEN) {
1490 		struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1491 		if (!nsk)
1492 			goto discard;
1493 
1494 		/*
1495 		 * Queue it on the new socket if the new socket is active,
1496 		 * otherwise we just shortcircuit this and continue with
1497 		 * the new socket..
1498 		 */
1499  		if(nsk != sk) {
1500 			if (tcp_child_process(sk, nsk, skb))
1501 				goto reset;
1502 			if (opt_skb)
1503 				__kfree_skb(opt_skb);
1504 			return 0;
1505 		}
1506 	}
1507 
1508 	TCP_CHECK_TIMER(sk);
1509 	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1510 		goto reset;
1511 	TCP_CHECK_TIMER(sk);
1512 	if (opt_skb)
1513 		goto ipv6_pktoptions;
1514 	return 0;
1515 
1516 reset:
1517 	tcp_v6_send_reset(skb);
1518 discard:
1519 	if (opt_skb)
1520 		__kfree_skb(opt_skb);
1521 	kfree_skb(skb);
1522 	return 0;
1523 csum_err:
1524 	TCP_INC_STATS_BH(TCP_MIB_INERRS);
1525 	goto discard;
1526 
1527 
1528 ipv6_pktoptions:
1529 	/* Do you ask, what is it?
1530 
1531 	   1. skb was enqueued by tcp.
1532 	   2. skb is added to tail of read queue, rather than out of order.
1533 	   3. socket is not in passive state.
1534 	   4. Finally, it really contains options, which user wants to receive.
1535 	 */
1536 	tp = tcp_sk(sk);
1537 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1538 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1539 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1540 			np->mcast_oif = inet6_iif(opt_skb);
1541 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1542 			np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1543 		if (ipv6_opt_accepted(sk, opt_skb)) {
1544 			skb_set_owner_r(opt_skb, sk);
1545 			opt_skb = xchg(&np->pktoptions, opt_skb);
1546 		} else {
1547 			__kfree_skb(opt_skb);
1548 			opt_skb = xchg(&np->pktoptions, NULL);
1549 		}
1550 	}
1551 
1552 	if (opt_skb)
1553 		kfree_skb(opt_skb);
1554 	return 0;
1555 }
1556 
1557 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1558 {
1559 	struct sk_buff *skb = *pskb;
1560 	struct tcphdr *th;
1561 	struct sock *sk;
1562 	int ret;
1563 
1564 	if (skb->pkt_type != PACKET_HOST)
1565 		goto discard_it;
1566 
1567 	/*
1568 	 *	Count it even if it's bad.
1569 	 */
1570 	TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1571 
1572 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1573 		goto discard_it;
1574 
1575 	th = skb->h.th;
1576 
1577 	if (th->doff < sizeof(struct tcphdr)/4)
1578 		goto bad_packet;
1579 	if (!pskb_may_pull(skb, th->doff*4))
1580 		goto discard_it;
1581 
1582 	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1583 	     tcp_v6_checksum_init(skb) < 0))
1584 		goto bad_packet;
1585 
1586 	th = skb->h.th;
1587 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1588 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1589 				    skb->len - th->doff*4);
1590 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1591 	TCP_SKB_CB(skb)->when = 0;
1592 	TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1593 	TCP_SKB_CB(skb)->sacked = 0;
1594 
1595 	sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1596 			    &skb->nh.ipv6h->daddr, ntohs(th->dest),
1597 			    inet6_iif(skb));
1598 
1599 	if (!sk)
1600 		goto no_tcp_socket;
1601 
1602 process:
1603 	if (sk->sk_state == TCP_TIME_WAIT)
1604 		goto do_time_wait;
1605 
1606 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1607 		goto discard_and_relse;
1608 
1609 	if (sk_filter(sk, skb, 0))
1610 		goto discard_and_relse;
1611 
1612 	skb->dev = NULL;
1613 
1614 	bh_lock_sock(sk);
1615 	ret = 0;
1616 	if (!sock_owned_by_user(sk)) {
1617 		if (!tcp_prequeue(sk, skb))
1618 			ret = tcp_v6_do_rcv(sk, skb);
1619 	} else
1620 		sk_add_backlog(sk, skb);
1621 	bh_unlock_sock(sk);
1622 
1623 	sock_put(sk);
1624 	return ret ? -1 : 0;
1625 
1626 no_tcp_socket:
1627 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1628 		goto discard_it;
1629 
1630 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1631 bad_packet:
1632 		TCP_INC_STATS_BH(TCP_MIB_INERRS);
1633 	} else {
1634 		tcp_v6_send_reset(skb);
1635 	}
1636 
1637 discard_it:
1638 
1639 	/*
1640 	 *	Discard frame
1641 	 */
1642 
1643 	kfree_skb(skb);
1644 	return 0;
1645 
1646 discard_and_relse:
1647 	sock_put(sk);
1648 	goto discard_it;
1649 
1650 do_time_wait:
1651 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1652 		inet_twsk_put((struct inet_timewait_sock *)sk);
1653 		goto discard_it;
1654 	}
1655 
1656 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1657 		TCP_INC_STATS_BH(TCP_MIB_INERRS);
1658 		inet_twsk_put((struct inet_timewait_sock *)sk);
1659 		goto discard_it;
1660 	}
1661 
1662 	switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1663 					   skb, th)) {
1664 	case TCP_TW_SYN:
1665 	{
1666 		struct sock *sk2;
1667 
1668 		sk2 = inet6_lookup_listener(&tcp_hashinfo,
1669 					    &skb->nh.ipv6h->daddr,
1670 					    ntohs(th->dest), inet6_iif(skb));
1671 		if (sk2 != NULL) {
1672 			struct inet_timewait_sock *tw = inet_twsk(sk);
1673 			inet_twsk_deschedule(tw, &tcp_death_row);
1674 			inet_twsk_put(tw);
1675 			sk = sk2;
1676 			goto process;
1677 		}
1678 		/* Fall through to ACK */
1679 	}
1680 	case TCP_TW_ACK:
1681 		tcp_v6_timewait_ack(sk, skb);
1682 		break;
1683 	case TCP_TW_RST:
1684 		goto no_tcp_socket;
1685 	case TCP_TW_SUCCESS:;
1686 	}
1687 	goto discard_it;
1688 }
1689 
1690 static int tcp_v6_rebuild_header(struct sock *sk)
1691 {
1692 	int err;
1693 	struct dst_entry *dst;
1694 	struct ipv6_pinfo *np = inet6_sk(sk);
1695 
1696 	dst = __sk_dst_check(sk, np->dst_cookie);
1697 
1698 	if (dst == NULL) {
1699 		struct inet_sock *inet = inet_sk(sk);
1700 		struct in6_addr *final_p = NULL, final;
1701 		struct flowi fl;
1702 
1703 		memset(&fl, 0, sizeof(fl));
1704 		fl.proto = IPPROTO_TCP;
1705 		ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1706 		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1707 		fl.fl6_flowlabel = np->flow_label;
1708 		fl.oif = sk->sk_bound_dev_if;
1709 		fl.fl_ip_dport = inet->dport;
1710 		fl.fl_ip_sport = inet->sport;
1711 
1712 		if (np->opt && np->opt->srcrt) {
1713 			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1714 			ipv6_addr_copy(&final, &fl.fl6_dst);
1715 			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1716 			final_p = &final;
1717 		}
1718 
1719 		err = ip6_dst_lookup(sk, &dst, &fl);
1720 		if (err) {
1721 			sk->sk_route_caps = 0;
1722 			return err;
1723 		}
1724 		if (final_p)
1725 			ipv6_addr_copy(&fl.fl6_dst, final_p);
1726 
1727 		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1728 			sk->sk_err_soft = -err;
1729 			return err;
1730 		}
1731 
1732 		ip6_dst_store(sk, dst, NULL);
1733 		sk->sk_route_caps = dst->dev->features &
1734 			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1735 	}
1736 
1737 	return 0;
1738 }
1739 
1740 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1741 {
1742 	struct sock *sk = skb->sk;
1743 	struct inet_sock *inet = inet_sk(sk);
1744 	struct ipv6_pinfo *np = inet6_sk(sk);
1745 	struct flowi fl;
1746 	struct dst_entry *dst;
1747 	struct in6_addr *final_p = NULL, final;
1748 
1749 	memset(&fl, 0, sizeof(fl));
1750 	fl.proto = IPPROTO_TCP;
1751 	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1752 	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1753 	fl.fl6_flowlabel = np->flow_label;
1754 	IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1755 	fl.oif = sk->sk_bound_dev_if;
1756 	fl.fl_ip_sport = inet->sport;
1757 	fl.fl_ip_dport = inet->dport;
1758 
1759 	if (np->opt && np->opt->srcrt) {
1760 		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1761 		ipv6_addr_copy(&final, &fl.fl6_dst);
1762 		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1763 		final_p = &final;
1764 	}
1765 
1766 	dst = __sk_dst_check(sk, np->dst_cookie);
1767 
1768 	if (dst == NULL) {
1769 		int err = ip6_dst_lookup(sk, &dst, &fl);
1770 
1771 		if (err) {
1772 			sk->sk_err_soft = -err;
1773 			return err;
1774 		}
1775 
1776 		if (final_p)
1777 			ipv6_addr_copy(&fl.fl6_dst, final_p);
1778 
1779 		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1780 			sk->sk_route_caps = 0;
1781 			return err;
1782 		}
1783 
1784 		ip6_dst_store(sk, dst, NULL);
1785 		sk->sk_route_caps = dst->dev->features &
1786 			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1787 	}
1788 
1789 	skb->dst = dst_clone(dst);
1790 
1791 	/* Restore final destination back after routing done */
1792 	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1793 
1794 	return ip6_xmit(sk, skb, &fl, np->opt, 0);
1795 }
1796 
1797 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1798 {
1799 	struct ipv6_pinfo *np = inet6_sk(sk);
1800 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1801 
1802 	sin6->sin6_family = AF_INET6;
1803 	ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1804 	sin6->sin6_port	= inet_sk(sk)->dport;
1805 	/* We do not store received flowlabel for TCP */
1806 	sin6->sin6_flowinfo = 0;
1807 	sin6->sin6_scope_id = 0;
1808 	if (sk->sk_bound_dev_if &&
1809 	    ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1810 		sin6->sin6_scope_id = sk->sk_bound_dev_if;
1811 }
1812 
1813 static int tcp_v6_remember_stamp(struct sock *sk)
1814 {
1815 	/* Alas, not yet... */
1816 	return 0;
1817 }
1818 
1819 static struct tcp_func ipv6_specific = {
1820 	.queue_xmit	=	tcp_v6_xmit,
1821 	.send_check	=	tcp_v6_send_check,
1822 	.rebuild_header	=	tcp_v6_rebuild_header,
1823 	.conn_request	=	tcp_v6_conn_request,
1824 	.syn_recv_sock	=	tcp_v6_syn_recv_sock,
1825 	.remember_stamp	=	tcp_v6_remember_stamp,
1826 	.net_header_len	=	sizeof(struct ipv6hdr),
1827 
1828 	.setsockopt	=	ipv6_setsockopt,
1829 	.getsockopt	=	ipv6_getsockopt,
1830 	.addr2sockaddr	=	v6_addr2sockaddr,
1831 	.sockaddr_len	=	sizeof(struct sockaddr_in6)
1832 };
1833 
1834 /*
1835  *	TCP over IPv4 via INET6 API
1836  */
1837 
1838 static struct tcp_func ipv6_mapped = {
1839 	.queue_xmit	=	ip_queue_xmit,
1840 	.send_check	=	tcp_v4_send_check,
1841 	.rebuild_header	=	inet_sk_rebuild_header,
1842 	.conn_request	=	tcp_v6_conn_request,
1843 	.syn_recv_sock	=	tcp_v6_syn_recv_sock,
1844 	.remember_stamp	=	tcp_v4_remember_stamp,
1845 	.net_header_len	=	sizeof(struct iphdr),
1846 
1847 	.setsockopt	=	ipv6_setsockopt,
1848 	.getsockopt	=	ipv6_getsockopt,
1849 	.addr2sockaddr	=	v6_addr2sockaddr,
1850 	.sockaddr_len	=	sizeof(struct sockaddr_in6)
1851 };
1852 
1853 
1854 
1855 /* NOTE: A lot of things set to zero explicitly by call to
1856  *       sk_alloc() so need not be done here.
1857  */
1858 static int tcp_v6_init_sock(struct sock *sk)
1859 {
1860 	struct inet_connection_sock *icsk = inet_csk(sk);
1861 	struct tcp_sock *tp = tcp_sk(sk);
1862 
1863 	skb_queue_head_init(&tp->out_of_order_queue);
1864 	tcp_init_xmit_timers(sk);
1865 	tcp_prequeue_init(tp);
1866 
1867 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
1868 	tp->mdev = TCP_TIMEOUT_INIT;
1869 
1870 	/* So many TCP implementations out there (incorrectly) count the
1871 	 * initial SYN frame in their delayed-ACK and congestion control
1872 	 * algorithms that we must have the following bandaid to talk
1873 	 * efficiently to them.  -DaveM
1874 	 */
1875 	tp->snd_cwnd = 2;
1876 
1877 	/* See draft-stevens-tcpca-spec-01 for discussion of the
1878 	 * initialization of these values.
1879 	 */
1880 	tp->snd_ssthresh = 0x7fffffff;
1881 	tp->snd_cwnd_clamp = ~0;
1882 	tp->mss_cache = 536;
1883 
1884 	tp->reordering = sysctl_tcp_reordering;
1885 
1886 	sk->sk_state = TCP_CLOSE;
1887 
1888 	tp->af_specific = &ipv6_specific;
1889 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1890 	sk->sk_write_space = sk_stream_write_space;
1891 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1892 
1893 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
1894 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1895 
1896 	atomic_inc(&tcp_sockets_allocated);
1897 
1898 	return 0;
1899 }
1900 
1901 static int tcp_v6_destroy_sock(struct sock *sk)
1902 {
1903 	tcp_v4_destroy_sock(sk);
1904 	return inet6_destroy_sock(sk);
1905 }
1906 
1907 /* Proc filesystem TCPv6 sock list dumping. */
1908 static void get_openreq6(struct seq_file *seq,
1909 			 struct sock *sk, struct request_sock *req, int i, int uid)
1910 {
1911 	struct in6_addr *dest, *src;
1912 	int ttd = req->expires - jiffies;
1913 
1914 	if (ttd < 0)
1915 		ttd = 0;
1916 
1917 	src = &tcp6_rsk(req)->loc_addr;
1918 	dest = &tcp6_rsk(req)->rmt_addr;
1919 	seq_printf(seq,
1920 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1921 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1922 		   i,
1923 		   src->s6_addr32[0], src->s6_addr32[1],
1924 		   src->s6_addr32[2], src->s6_addr32[3],
1925 		   ntohs(inet_sk(sk)->sport),
1926 		   dest->s6_addr32[0], dest->s6_addr32[1],
1927 		   dest->s6_addr32[2], dest->s6_addr32[3],
1928 		   ntohs(inet_rsk(req)->rmt_port),
1929 		   TCP_SYN_RECV,
1930 		   0,0, /* could print option size, but that is af dependent. */
1931 		   1,   /* timers active (only the expire timer) */
1932 		   jiffies_to_clock_t(ttd),
1933 		   req->retrans,
1934 		   uid,
1935 		   0,  /* non standard timer */
1936 		   0, /* open_requests have no inode */
1937 		   0, req);
1938 }
1939 
1940 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1941 {
1942 	struct in6_addr *dest, *src;
1943 	__u16 destp, srcp;
1944 	int timer_active;
1945 	unsigned long timer_expires;
1946 	struct inet_sock *inet = inet_sk(sp);
1947 	struct tcp_sock *tp = tcp_sk(sp);
1948 	const struct inet_connection_sock *icsk = inet_csk(sp);
1949 	struct ipv6_pinfo *np = inet6_sk(sp);
1950 
1951 	dest  = &np->daddr;
1952 	src   = &np->rcv_saddr;
1953 	destp = ntohs(inet->dport);
1954 	srcp  = ntohs(inet->sport);
1955 
1956 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1957 		timer_active	= 1;
1958 		timer_expires	= icsk->icsk_timeout;
1959 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1960 		timer_active	= 4;
1961 		timer_expires	= icsk->icsk_timeout;
1962 	} else if (timer_pending(&sp->sk_timer)) {
1963 		timer_active	= 2;
1964 		timer_expires	= sp->sk_timer.expires;
1965 	} else {
1966 		timer_active	= 0;
1967 		timer_expires = jiffies;
1968 	}
1969 
1970 	seq_printf(seq,
1971 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1972 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1973 		   i,
1974 		   src->s6_addr32[0], src->s6_addr32[1],
1975 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
1976 		   dest->s6_addr32[0], dest->s6_addr32[1],
1977 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
1978 		   sp->sk_state,
1979 		   tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1980 		   timer_active,
1981 		   jiffies_to_clock_t(timer_expires - jiffies),
1982 		   icsk->icsk_retransmits,
1983 		   sock_i_uid(sp),
1984 		   icsk->icsk_probes_out,
1985 		   sock_i_ino(sp),
1986 		   atomic_read(&sp->sk_refcnt), sp,
1987 		   icsk->icsk_rto,
1988 		   icsk->icsk_ack.ato,
1989 		   (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1990 		   tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1991 		   );
1992 }
1993 
1994 static void get_timewait6_sock(struct seq_file *seq,
1995 			       struct inet_timewait_sock *tw, int i)
1996 {
1997 	struct in6_addr *dest, *src;
1998 	__u16 destp, srcp;
1999 	struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
2000 	int ttd = tw->tw_ttd - jiffies;
2001 
2002 	if (ttd < 0)
2003 		ttd = 0;
2004 
2005 	dest = &tcp6tw->tw_v6_daddr;
2006 	src  = &tcp6tw->tw_v6_rcv_saddr;
2007 	destp = ntohs(tw->tw_dport);
2008 	srcp  = ntohs(tw->tw_sport);
2009 
2010 	seq_printf(seq,
2011 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2012 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2013 		   i,
2014 		   src->s6_addr32[0], src->s6_addr32[1],
2015 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2016 		   dest->s6_addr32[0], dest->s6_addr32[1],
2017 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2018 		   tw->tw_substate, 0, 0,
2019 		   3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2020 		   atomic_read(&tw->tw_refcnt), tw);
2021 }
2022 
2023 #ifdef CONFIG_PROC_FS
2024 static int tcp6_seq_show(struct seq_file *seq, void *v)
2025 {
2026 	struct tcp_iter_state *st;
2027 
2028 	if (v == SEQ_START_TOKEN) {
2029 		seq_puts(seq,
2030 			 "  sl  "
2031 			 "local_address                         "
2032 			 "remote_address                        "
2033 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2034 			 "   uid  timeout inode\n");
2035 		goto out;
2036 	}
2037 	st = seq->private;
2038 
2039 	switch (st->state) {
2040 	case TCP_SEQ_STATE_LISTENING:
2041 	case TCP_SEQ_STATE_ESTABLISHED:
2042 		get_tcp6_sock(seq, v, st->num);
2043 		break;
2044 	case TCP_SEQ_STATE_OPENREQ:
2045 		get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2046 		break;
2047 	case TCP_SEQ_STATE_TIME_WAIT:
2048 		get_timewait6_sock(seq, v, st->num);
2049 		break;
2050 	}
2051 out:
2052 	return 0;
2053 }
2054 
2055 static struct file_operations tcp6_seq_fops;
2056 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2057 	.owner		= THIS_MODULE,
2058 	.name		= "tcp6",
2059 	.family		= AF_INET6,
2060 	.seq_show	= tcp6_seq_show,
2061 	.seq_fops	= &tcp6_seq_fops,
2062 };
2063 
2064 int __init tcp6_proc_init(void)
2065 {
2066 	return tcp_proc_register(&tcp6_seq_afinfo);
2067 }
2068 
2069 void tcp6_proc_exit(void)
2070 {
2071 	tcp_proc_unregister(&tcp6_seq_afinfo);
2072 }
2073 #endif
2074 
2075 struct proto tcpv6_prot = {
2076 	.name			= "TCPv6",
2077 	.owner			= THIS_MODULE,
2078 	.close			= tcp_close,
2079 	.connect		= tcp_v6_connect,
2080 	.disconnect		= tcp_disconnect,
2081 	.accept			= inet_csk_accept,
2082 	.ioctl			= tcp_ioctl,
2083 	.init			= tcp_v6_init_sock,
2084 	.destroy		= tcp_v6_destroy_sock,
2085 	.shutdown		= tcp_shutdown,
2086 	.setsockopt		= tcp_setsockopt,
2087 	.getsockopt		= tcp_getsockopt,
2088 	.sendmsg		= tcp_sendmsg,
2089 	.recvmsg		= tcp_recvmsg,
2090 	.backlog_rcv		= tcp_v6_do_rcv,
2091 	.hash			= tcp_v6_hash,
2092 	.unhash			= tcp_unhash,
2093 	.get_port		= tcp_v6_get_port,
2094 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2095 	.sockets_allocated	= &tcp_sockets_allocated,
2096 	.memory_allocated	= &tcp_memory_allocated,
2097 	.memory_pressure	= &tcp_memory_pressure,
2098 	.orphan_count		= &tcp_orphan_count,
2099 	.sysctl_mem		= sysctl_tcp_mem,
2100 	.sysctl_wmem		= sysctl_tcp_wmem,
2101 	.sysctl_rmem		= sysctl_tcp_rmem,
2102 	.max_header		= MAX_TCP_HEADER,
2103 	.obj_size		= sizeof(struct tcp6_sock),
2104 	.twsk_obj_size		= sizeof(struct tcp6_timewait_sock),
2105 	.rsk_prot		= &tcp6_request_sock_ops,
2106 };
2107 
2108 static struct inet6_protocol tcpv6_protocol = {
2109 	.handler	=	tcp_v6_rcv,
2110 	.err_handler	=	tcp_v6_err,
2111 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2112 };
2113 
2114 static struct inet_protosw tcpv6_protosw = {
2115 	.type		=	SOCK_STREAM,
2116 	.protocol	=	IPPROTO_TCP,
2117 	.prot		=	&tcpv6_prot,
2118 	.ops		=	&inet6_stream_ops,
2119 	.capability	=	-1,
2120 	.no_check	=	0,
2121 	.flags		=	INET_PROTOSW_PERMANENT,
2122 };
2123 
2124 void __init tcpv6_init(void)
2125 {
2126 	/* register inet6 protocol */
2127 	if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2128 		printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2129 	inet6_register_protosw(&tcpv6_protosw);
2130 }
2131