xref: /linux/net/ipv6/tcp_ipv6.c (revision ae99fb8baafc881b35aa0b79d7ac0178a7c40c89)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 
71 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 				      struct request_sock *req);
74 
75 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 						   const struct in6_addr *addr,
85 						   int l3index)
86 {
87 	return NULL;
88 }
89 #endif
90 
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98 	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99 
100 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102 
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105 	struct dst_entry *dst = skb_dst(skb);
106 
107 	if (dst && dst_hold_safe(dst)) {
108 		const struct rt6_info *rt = (const struct rt6_info *)dst;
109 
110 		sk->sk_rx_dst = dst;
111 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112 		tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113 	}
114 }
115 
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 				ipv6_hdr(skb)->saddr.s6_addr32,
120 				tcp_hdr(skb)->dest,
121 				tcp_hdr(skb)->source);
122 }
123 
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 				   ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129 
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131 			      int addr_len)
132 {
133 	/* This check is replicated from tcp_v6_connect() and intended to
134 	 * prevent BPF program called below from accessing bytes that are out
135 	 * of the bound specified by user in addr_len.
136 	 */
137 	if (addr_len < SIN6_LEN_RFC2133)
138 		return -EINVAL;
139 
140 	sock_owned_by_me(sk);
141 
142 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144 
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146 			  int addr_len)
147 {
148 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 	struct inet_sock *inet = inet_sk(sk);
150 	struct inet_connection_sock *icsk = inet_csk(sk);
151 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152 	struct tcp_sock *tp = tcp_sk(sk);
153 	struct in6_addr *saddr = NULL, *final_p, final;
154 	struct ipv6_txoptions *opt;
155 	struct flowi6 fl6;
156 	struct dst_entry *dst;
157 	int addr_type;
158 	int err;
159 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160 
161 	if (addr_len < SIN6_LEN_RFC2133)
162 		return -EINVAL;
163 
164 	if (usin->sin6_family != AF_INET6)
165 		return -EAFNOSUPPORT;
166 
167 	memset(&fl6, 0, sizeof(fl6));
168 
169 	if (np->sndflow) {
170 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 		IP6_ECN_flow_init(fl6.flowlabel);
172 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 			struct ip6_flowlabel *flowlabel;
174 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 			if (IS_ERR(flowlabel))
176 				return -EINVAL;
177 			fl6_sock_release(flowlabel);
178 		}
179 	}
180 
181 	/*
182 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
183 	 */
184 
185 	if (ipv6_addr_any(&usin->sin6_addr)) {
186 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188 					       &usin->sin6_addr);
189 		else
190 			usin->sin6_addr = in6addr_loopback;
191 	}
192 
193 	addr_type = ipv6_addr_type(&usin->sin6_addr);
194 
195 	if (addr_type & IPV6_ADDR_MULTICAST)
196 		return -ENETUNREACH;
197 
198 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 		if (addr_len >= sizeof(struct sockaddr_in6) &&
200 		    usin->sin6_scope_id) {
201 			/* If interface is set while binding, indices
202 			 * must coincide.
203 			 */
204 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205 				return -EINVAL;
206 
207 			sk->sk_bound_dev_if = usin->sin6_scope_id;
208 		}
209 
210 		/* Connect to link-local address requires an interface */
211 		if (!sk->sk_bound_dev_if)
212 			return -EINVAL;
213 	}
214 
215 	if (tp->rx_opt.ts_recent_stamp &&
216 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 		tp->rx_opt.ts_recent = 0;
218 		tp->rx_opt.ts_recent_stamp = 0;
219 		WRITE_ONCE(tp->write_seq, 0);
220 	}
221 
222 	sk->sk_v6_daddr = usin->sin6_addr;
223 	np->flow_label = fl6.flowlabel;
224 
225 	/*
226 	 *	TCP over IPv4
227 	 */
228 
229 	if (addr_type & IPV6_ADDR_MAPPED) {
230 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 		struct sockaddr_in sin;
232 
233 		if (__ipv6_only_sock(sk))
234 			return -ENETUNREACH;
235 
236 		sin.sin_family = AF_INET;
237 		sin.sin_port = usin->sin6_port;
238 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239 
240 		icsk->icsk_af_ops = &ipv6_mapped;
241 		if (sk_is_mptcp(sk))
242 			mptcpv6_handle_mapped(sk, true);
243 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247 
248 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249 
250 		if (err) {
251 			icsk->icsk_ext_hdr_len = exthdrlen;
252 			icsk->icsk_af_ops = &ipv6_specific;
253 			if (sk_is_mptcp(sk))
254 				mptcpv6_handle_mapped(sk, false);
255 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257 			tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259 			goto failure;
260 		}
261 		np->saddr = sk->sk_v6_rcv_saddr;
262 
263 		return err;
264 	}
265 
266 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267 		saddr = &sk->sk_v6_rcv_saddr;
268 
269 	fl6.flowi6_proto = IPPROTO_TCP;
270 	fl6.daddr = sk->sk_v6_daddr;
271 	fl6.saddr = saddr ? *saddr : np->saddr;
272 	fl6.flowi6_oif = sk->sk_bound_dev_if;
273 	fl6.flowi6_mark = sk->sk_mark;
274 	fl6.fl6_dport = usin->sin6_port;
275 	fl6.fl6_sport = inet->inet_sport;
276 	fl6.flowi6_uid = sk->sk_uid;
277 
278 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279 	final_p = fl6_update_dst(&fl6, opt, &final);
280 
281 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
282 
283 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284 	if (IS_ERR(dst)) {
285 		err = PTR_ERR(dst);
286 		goto failure;
287 	}
288 
289 	if (!saddr) {
290 		saddr = &fl6.saddr;
291 		sk->sk_v6_rcv_saddr = *saddr;
292 	}
293 
294 	/* set the source address */
295 	np->saddr = *saddr;
296 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297 
298 	sk->sk_gso_type = SKB_GSO_TCPV6;
299 	ip6_dst_store(sk, dst, NULL, NULL);
300 
301 	icsk->icsk_ext_hdr_len = 0;
302 	if (opt)
303 		icsk->icsk_ext_hdr_len = opt->opt_flen +
304 					 opt->opt_nflen;
305 
306 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307 
308 	inet->inet_dport = usin->sin6_port;
309 
310 	tcp_set_state(sk, TCP_SYN_SENT);
311 	err = inet6_hash_connect(tcp_death_row, sk);
312 	if (err)
313 		goto late_failure;
314 
315 	sk_set_txhash(sk);
316 
317 	if (likely(!tp->repair)) {
318 		if (!tp->write_seq)
319 			WRITE_ONCE(tp->write_seq,
320 				   secure_tcpv6_seq(np->saddr.s6_addr32,
321 						    sk->sk_v6_daddr.s6_addr32,
322 						    inet->inet_sport,
323 						    inet->inet_dport));
324 		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
325 						   np->saddr.s6_addr32,
326 						   sk->sk_v6_daddr.s6_addr32);
327 	}
328 
329 	if (tcp_fastopen_defer_connect(sk, &err))
330 		return err;
331 	if (err)
332 		goto late_failure;
333 
334 	err = tcp_connect(sk);
335 	if (err)
336 		goto late_failure;
337 
338 	return 0;
339 
340 late_failure:
341 	tcp_set_state(sk, TCP_CLOSE);
342 failure:
343 	inet->inet_dport = 0;
344 	sk->sk_route_caps = 0;
345 	return err;
346 }
347 
348 static void tcp_v6_mtu_reduced(struct sock *sk)
349 {
350 	struct dst_entry *dst;
351 
352 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
353 		return;
354 
355 	dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
356 	if (!dst)
357 		return;
358 
359 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
360 		tcp_sync_mss(sk, dst_mtu(dst));
361 		tcp_simple_retransmit(sk);
362 	}
363 }
364 
365 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
366 		u8 type, u8 code, int offset, __be32 info)
367 {
368 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
369 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
370 	struct net *net = dev_net(skb->dev);
371 	struct request_sock *fastopen;
372 	struct ipv6_pinfo *np;
373 	struct tcp_sock *tp;
374 	__u32 seq, snd_una;
375 	struct sock *sk;
376 	bool fatal;
377 	int err;
378 
379 	sk = __inet6_lookup_established(net, &tcp_hashinfo,
380 					&hdr->daddr, th->dest,
381 					&hdr->saddr, ntohs(th->source),
382 					skb->dev->ifindex, inet6_sdif(skb));
383 
384 	if (!sk) {
385 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
386 				  ICMP6_MIB_INERRORS);
387 		return -ENOENT;
388 	}
389 
390 	if (sk->sk_state == TCP_TIME_WAIT) {
391 		inet_twsk_put(inet_twsk(sk));
392 		return 0;
393 	}
394 	seq = ntohl(th->seq);
395 	fatal = icmpv6_err_convert(type, code, &err);
396 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
397 		tcp_req_err(sk, seq, fatal);
398 		return 0;
399 	}
400 
401 	bh_lock_sock(sk);
402 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
403 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
404 
405 	if (sk->sk_state == TCP_CLOSE)
406 		goto out;
407 
408 	if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
409 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
410 		goto out;
411 	}
412 
413 	tp = tcp_sk(sk);
414 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
415 	fastopen = rcu_dereference(tp->fastopen_rsk);
416 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
417 	if (sk->sk_state != TCP_LISTEN &&
418 	    !between(seq, snd_una, tp->snd_nxt)) {
419 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
420 		goto out;
421 	}
422 
423 	np = tcp_inet6_sk(sk);
424 
425 	if (type == NDISC_REDIRECT) {
426 		if (!sock_owned_by_user(sk)) {
427 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
428 
429 			if (dst)
430 				dst->ops->redirect(dst, sk, skb);
431 		}
432 		goto out;
433 	}
434 
435 	if (type == ICMPV6_PKT_TOOBIG) {
436 		/* We are not interested in TCP_LISTEN and open_requests
437 		 * (SYN-ACKs send out by Linux are always <576bytes so
438 		 * they should go through unfragmented).
439 		 */
440 		if (sk->sk_state == TCP_LISTEN)
441 			goto out;
442 
443 		if (!ip6_sk_accept_pmtu(sk))
444 			goto out;
445 
446 		tp->mtu_info = ntohl(info);
447 		if (!sock_owned_by_user(sk))
448 			tcp_v6_mtu_reduced(sk);
449 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
450 					   &sk->sk_tsq_flags))
451 			sock_hold(sk);
452 		goto out;
453 	}
454 
455 
456 	/* Might be for an request_sock */
457 	switch (sk->sk_state) {
458 	case TCP_SYN_SENT:
459 	case TCP_SYN_RECV:
460 		/* Only in fast or simultaneous open. If a fast open socket is
461 		 * is already accepted it is treated as a connected one below.
462 		 */
463 		if (fastopen && !fastopen->sk)
464 			break;
465 
466 		if (!sock_owned_by_user(sk)) {
467 			sk->sk_err = err;
468 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
469 
470 			tcp_done(sk);
471 		} else
472 			sk->sk_err_soft = err;
473 		goto out;
474 	}
475 
476 	if (!sock_owned_by_user(sk) && np->recverr) {
477 		sk->sk_err = err;
478 		sk->sk_error_report(sk);
479 	} else
480 		sk->sk_err_soft = err;
481 
482 out:
483 	bh_unlock_sock(sk);
484 	sock_put(sk);
485 	return 0;
486 }
487 
488 
489 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
490 			      struct flowi *fl,
491 			      struct request_sock *req,
492 			      struct tcp_fastopen_cookie *foc,
493 			      enum tcp_synack_type synack_type)
494 {
495 	struct inet_request_sock *ireq = inet_rsk(req);
496 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
497 	struct ipv6_txoptions *opt;
498 	struct flowi6 *fl6 = &fl->u.ip6;
499 	struct sk_buff *skb;
500 	int err = -ENOMEM;
501 
502 	/* First, grab a route. */
503 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
504 					       IPPROTO_TCP)) == NULL)
505 		goto done;
506 
507 	skb = tcp_make_synack(sk, dst, req, foc, synack_type);
508 
509 	if (skb) {
510 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
511 				    &ireq->ir_v6_rmt_addr);
512 
513 		fl6->daddr = ireq->ir_v6_rmt_addr;
514 		if (np->repflow && ireq->pktopts)
515 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
516 
517 		rcu_read_lock();
518 		opt = ireq->ipv6_opt;
519 		if (!opt)
520 			opt = rcu_dereference(np->opt);
521 		err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass,
522 			       sk->sk_priority);
523 		rcu_read_unlock();
524 		err = net_xmit_eval(err);
525 	}
526 
527 done:
528 	return err;
529 }
530 
531 
532 static void tcp_v6_reqsk_destructor(struct request_sock *req)
533 {
534 	kfree(inet_rsk(req)->ipv6_opt);
535 	kfree_skb(inet_rsk(req)->pktopts);
536 }
537 
538 #ifdef CONFIG_TCP_MD5SIG
539 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
540 						   const struct in6_addr *addr,
541 						   int l3index)
542 {
543 	return tcp_md5_do_lookup(sk, l3index,
544 				 (union tcp_md5_addr *)addr, AF_INET6);
545 }
546 
547 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
548 						const struct sock *addr_sk)
549 {
550 	int l3index;
551 
552 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
553 						 addr_sk->sk_bound_dev_if);
554 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
555 				    l3index);
556 }
557 
558 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
559 				 char __user *optval, int optlen)
560 {
561 	struct tcp_md5sig cmd;
562 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
563 	int l3index = 0;
564 	u8 prefixlen;
565 
566 	if (optlen < sizeof(cmd))
567 		return -EINVAL;
568 
569 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
570 		return -EFAULT;
571 
572 	if (sin6->sin6_family != AF_INET6)
573 		return -EINVAL;
574 
575 	if (optname == TCP_MD5SIG_EXT &&
576 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
577 		prefixlen = cmd.tcpm_prefixlen;
578 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
579 					prefixlen > 32))
580 			return -EINVAL;
581 	} else {
582 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
583 	}
584 
585 	if (optname == TCP_MD5SIG_EXT &&
586 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
587 		struct net_device *dev;
588 
589 		rcu_read_lock();
590 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
591 		if (dev && netif_is_l3_master(dev))
592 			l3index = dev->ifindex;
593 		rcu_read_unlock();
594 
595 		/* ok to reference set/not set outside of rcu;
596 		 * right now device MUST be an L3 master
597 		 */
598 		if (!dev || !l3index)
599 			return -EINVAL;
600 	}
601 
602 	if (!cmd.tcpm_keylen) {
603 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
604 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
605 					      AF_INET, prefixlen,
606 					      l3index);
607 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
608 				      AF_INET6, prefixlen, l3index);
609 	}
610 
611 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
612 		return -EINVAL;
613 
614 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
615 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
616 				      AF_INET, prefixlen, l3index,
617 				      cmd.tcpm_key, cmd.tcpm_keylen,
618 				      GFP_KERNEL);
619 
620 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
621 			      AF_INET6, prefixlen, l3index,
622 			      cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
623 }
624 
625 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
626 				   const struct in6_addr *daddr,
627 				   const struct in6_addr *saddr,
628 				   const struct tcphdr *th, int nbytes)
629 {
630 	struct tcp6_pseudohdr *bp;
631 	struct scatterlist sg;
632 	struct tcphdr *_th;
633 
634 	bp = hp->scratch;
635 	/* 1. TCP pseudo-header (RFC2460) */
636 	bp->saddr = *saddr;
637 	bp->daddr = *daddr;
638 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
639 	bp->len = cpu_to_be32(nbytes);
640 
641 	_th = (struct tcphdr *)(bp + 1);
642 	memcpy(_th, th, sizeof(*th));
643 	_th->check = 0;
644 
645 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
646 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
647 				sizeof(*bp) + sizeof(*th));
648 	return crypto_ahash_update(hp->md5_req);
649 }
650 
651 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
652 			       const struct in6_addr *daddr, struct in6_addr *saddr,
653 			       const struct tcphdr *th)
654 {
655 	struct tcp_md5sig_pool *hp;
656 	struct ahash_request *req;
657 
658 	hp = tcp_get_md5sig_pool();
659 	if (!hp)
660 		goto clear_hash_noput;
661 	req = hp->md5_req;
662 
663 	if (crypto_ahash_init(req))
664 		goto clear_hash;
665 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
666 		goto clear_hash;
667 	if (tcp_md5_hash_key(hp, key))
668 		goto clear_hash;
669 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
670 	if (crypto_ahash_final(req))
671 		goto clear_hash;
672 
673 	tcp_put_md5sig_pool();
674 	return 0;
675 
676 clear_hash:
677 	tcp_put_md5sig_pool();
678 clear_hash_noput:
679 	memset(md5_hash, 0, 16);
680 	return 1;
681 }
682 
683 static int tcp_v6_md5_hash_skb(char *md5_hash,
684 			       const struct tcp_md5sig_key *key,
685 			       const struct sock *sk,
686 			       const struct sk_buff *skb)
687 {
688 	const struct in6_addr *saddr, *daddr;
689 	struct tcp_md5sig_pool *hp;
690 	struct ahash_request *req;
691 	const struct tcphdr *th = tcp_hdr(skb);
692 
693 	if (sk) { /* valid for establish/request sockets */
694 		saddr = &sk->sk_v6_rcv_saddr;
695 		daddr = &sk->sk_v6_daddr;
696 	} else {
697 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
698 		saddr = &ip6h->saddr;
699 		daddr = &ip6h->daddr;
700 	}
701 
702 	hp = tcp_get_md5sig_pool();
703 	if (!hp)
704 		goto clear_hash_noput;
705 	req = hp->md5_req;
706 
707 	if (crypto_ahash_init(req))
708 		goto clear_hash;
709 
710 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
711 		goto clear_hash;
712 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
713 		goto clear_hash;
714 	if (tcp_md5_hash_key(hp, key))
715 		goto clear_hash;
716 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
717 	if (crypto_ahash_final(req))
718 		goto clear_hash;
719 
720 	tcp_put_md5sig_pool();
721 	return 0;
722 
723 clear_hash:
724 	tcp_put_md5sig_pool();
725 clear_hash_noput:
726 	memset(md5_hash, 0, 16);
727 	return 1;
728 }
729 
730 #endif
731 
732 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
733 				    const struct sk_buff *skb,
734 				    int dif, int sdif)
735 {
736 #ifdef CONFIG_TCP_MD5SIG
737 	const __u8 *hash_location = NULL;
738 	struct tcp_md5sig_key *hash_expected;
739 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
740 	const struct tcphdr *th = tcp_hdr(skb);
741 	int genhash, l3index;
742 	u8 newhash[16];
743 
744 	/* sdif set, means packet ingressed via a device
745 	 * in an L3 domain and dif is set to the l3mdev
746 	 */
747 	l3index = sdif ? dif : 0;
748 
749 	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
750 	hash_location = tcp_parse_md5sig_option(th);
751 
752 	/* We've parsed the options - do we have a hash? */
753 	if (!hash_expected && !hash_location)
754 		return false;
755 
756 	if (hash_expected && !hash_location) {
757 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
758 		return true;
759 	}
760 
761 	if (!hash_expected && hash_location) {
762 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
763 		return true;
764 	}
765 
766 	/* check the signature */
767 	genhash = tcp_v6_md5_hash_skb(newhash,
768 				      hash_expected,
769 				      NULL, skb);
770 
771 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
772 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
773 		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
774 				     genhash ? "failed" : "mismatch",
775 				     &ip6h->saddr, ntohs(th->source),
776 				     &ip6h->daddr, ntohs(th->dest), l3index);
777 		return true;
778 	}
779 #endif
780 	return false;
781 }
782 
783 static void tcp_v6_init_req(struct request_sock *req,
784 			    const struct sock *sk_listener,
785 			    struct sk_buff *skb)
786 {
787 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
788 	struct inet_request_sock *ireq = inet_rsk(req);
789 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
790 
791 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
792 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
793 
794 	/* So that link locals have meaning */
795 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
796 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
797 		ireq->ir_iif = tcp_v6_iif(skb);
798 
799 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
800 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
801 	     np->rxopt.bits.rxinfo ||
802 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
803 	     np->rxopt.bits.rxohlim || np->repflow)) {
804 		refcount_inc(&skb->users);
805 		ireq->pktopts = skb;
806 	}
807 }
808 
809 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
810 					  struct flowi *fl,
811 					  const struct request_sock *req)
812 {
813 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
814 }
815 
816 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
817 	.family		=	AF_INET6,
818 	.obj_size	=	sizeof(struct tcp6_request_sock),
819 	.rtx_syn_ack	=	tcp_rtx_synack,
820 	.send_ack	=	tcp_v6_reqsk_send_ack,
821 	.destructor	=	tcp_v6_reqsk_destructor,
822 	.send_reset	=	tcp_v6_send_reset,
823 	.syn_ack_timeout =	tcp_syn_ack_timeout,
824 };
825 
826 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
827 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
828 				sizeof(struct ipv6hdr),
829 #ifdef CONFIG_TCP_MD5SIG
830 	.req_md5_lookup	=	tcp_v6_md5_lookup,
831 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
832 #endif
833 	.init_req	=	tcp_v6_init_req,
834 #ifdef CONFIG_SYN_COOKIES
835 	.cookie_init_seq =	cookie_v6_init_sequence,
836 #endif
837 	.route_req	=	tcp_v6_route_req,
838 	.init_seq	=	tcp_v6_init_seq,
839 	.init_ts_off	=	tcp_v6_init_ts_off,
840 	.send_synack	=	tcp_v6_send_synack,
841 };
842 
843 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
844 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
845 				 int oif, struct tcp_md5sig_key *key, int rst,
846 				 u8 tclass, __be32 label, u32 priority)
847 {
848 	const struct tcphdr *th = tcp_hdr(skb);
849 	struct tcphdr *t1;
850 	struct sk_buff *buff;
851 	struct flowi6 fl6;
852 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
853 	struct sock *ctl_sk = net->ipv6.tcp_sk;
854 	unsigned int tot_len = sizeof(struct tcphdr);
855 	struct dst_entry *dst;
856 	__be32 *topt;
857 	__u32 mark = 0;
858 
859 	if (tsecr)
860 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
861 #ifdef CONFIG_TCP_MD5SIG
862 	if (key)
863 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
864 #endif
865 
866 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
867 			 GFP_ATOMIC);
868 	if (!buff)
869 		return;
870 
871 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
872 
873 	t1 = skb_push(buff, tot_len);
874 	skb_reset_transport_header(buff);
875 
876 	/* Swap the send and the receive. */
877 	memset(t1, 0, sizeof(*t1));
878 	t1->dest = th->source;
879 	t1->source = th->dest;
880 	t1->doff = tot_len / 4;
881 	t1->seq = htonl(seq);
882 	t1->ack_seq = htonl(ack);
883 	t1->ack = !rst || !th->ack;
884 	t1->rst = rst;
885 	t1->window = htons(win);
886 
887 	topt = (__be32 *)(t1 + 1);
888 
889 	if (tsecr) {
890 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
891 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
892 		*topt++ = htonl(tsval);
893 		*topt++ = htonl(tsecr);
894 	}
895 
896 #ifdef CONFIG_TCP_MD5SIG
897 	if (key) {
898 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
899 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
900 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
901 				    &ipv6_hdr(skb)->saddr,
902 				    &ipv6_hdr(skb)->daddr, t1);
903 	}
904 #endif
905 
906 	memset(&fl6, 0, sizeof(fl6));
907 	fl6.daddr = ipv6_hdr(skb)->saddr;
908 	fl6.saddr = ipv6_hdr(skb)->daddr;
909 	fl6.flowlabel = label;
910 
911 	buff->ip_summed = CHECKSUM_PARTIAL;
912 	buff->csum = 0;
913 
914 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
915 
916 	fl6.flowi6_proto = IPPROTO_TCP;
917 	if (rt6_need_strict(&fl6.daddr) && !oif)
918 		fl6.flowi6_oif = tcp_v6_iif(skb);
919 	else {
920 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
921 			oif = skb->skb_iif;
922 
923 		fl6.flowi6_oif = oif;
924 	}
925 
926 	if (sk) {
927 		if (sk->sk_state == TCP_TIME_WAIT) {
928 			mark = inet_twsk(sk)->tw_mark;
929 			/* autoflowlabel relies on buff->hash */
930 			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
931 				     PKT_HASH_TYPE_L4);
932 		} else {
933 			mark = sk->sk_mark;
934 		}
935 		buff->tstamp = tcp_transmit_time(sk);
936 	}
937 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
938 	fl6.fl6_dport = t1->dest;
939 	fl6.fl6_sport = t1->source;
940 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
941 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
942 
943 	/* Pass a socket to ip6_dst_lookup either it is for RST
944 	 * Underlying function will use this to retrieve the network
945 	 * namespace
946 	 */
947 	dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
948 	if (!IS_ERR(dst)) {
949 		skb_dst_set(buff, dst);
950 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
951 			 priority);
952 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
953 		if (rst)
954 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
955 		return;
956 	}
957 
958 	kfree_skb(buff);
959 }
960 
961 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
962 {
963 	const struct tcphdr *th = tcp_hdr(skb);
964 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
965 	u32 seq = 0, ack_seq = 0;
966 	struct tcp_md5sig_key *key = NULL;
967 #ifdef CONFIG_TCP_MD5SIG
968 	const __u8 *hash_location = NULL;
969 	unsigned char newhash[16];
970 	int genhash;
971 	struct sock *sk1 = NULL;
972 #endif
973 	__be32 label = 0;
974 	u32 priority = 0;
975 	struct net *net;
976 	int oif = 0;
977 
978 	if (th->rst)
979 		return;
980 
981 	/* If sk not NULL, it means we did a successful lookup and incoming
982 	 * route had to be correct. prequeue might have dropped our dst.
983 	 */
984 	if (!sk && !ipv6_unicast_destination(skb))
985 		return;
986 
987 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
988 #ifdef CONFIG_TCP_MD5SIG
989 	rcu_read_lock();
990 	hash_location = tcp_parse_md5sig_option(th);
991 	if (sk && sk_fullsock(sk)) {
992 		int l3index;
993 
994 		/* sdif set, means packet ingressed via a device
995 		 * in an L3 domain and inet_iif is set to it.
996 		 */
997 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
998 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
999 	} else if (hash_location) {
1000 		int dif = tcp_v6_iif_l3_slave(skb);
1001 		int sdif = tcp_v6_sdif(skb);
1002 		int l3index;
1003 
1004 		/*
1005 		 * active side is lost. Try to find listening socket through
1006 		 * source port, and then find md5 key through listening socket.
1007 		 * we are not loose security here:
1008 		 * Incoming packet is checked with md5 hash with finding key,
1009 		 * no RST generated if md5 hash doesn't match.
1010 		 */
1011 		sk1 = inet6_lookup_listener(net,
1012 					   &tcp_hashinfo, NULL, 0,
1013 					   &ipv6h->saddr,
1014 					   th->source, &ipv6h->daddr,
1015 					   ntohs(th->source), dif, sdif);
1016 		if (!sk1)
1017 			goto out;
1018 
1019 		/* sdif set, means packet ingressed via a device
1020 		 * in an L3 domain and dif is set to it.
1021 		 */
1022 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1023 
1024 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1025 		if (!key)
1026 			goto out;
1027 
1028 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1029 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1030 			goto out;
1031 	}
1032 #endif
1033 
1034 	if (th->ack)
1035 		seq = ntohl(th->ack_seq);
1036 	else
1037 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1038 			  (th->doff << 2);
1039 
1040 	if (sk) {
1041 		oif = sk->sk_bound_dev_if;
1042 		if (sk_fullsock(sk)) {
1043 			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1044 
1045 			trace_tcp_send_reset(sk, skb);
1046 			if (np->repflow)
1047 				label = ip6_flowlabel(ipv6h);
1048 			priority = sk->sk_priority;
1049 		}
1050 		if (sk->sk_state == TCP_TIME_WAIT) {
1051 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1052 			priority = inet_twsk(sk)->tw_priority;
1053 		}
1054 	} else {
1055 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1056 			label = ip6_flowlabel(ipv6h);
1057 	}
1058 
1059 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
1060 			     label, priority);
1061 
1062 #ifdef CONFIG_TCP_MD5SIG
1063 out:
1064 	rcu_read_unlock();
1065 #endif
1066 }
1067 
1068 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1069 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1070 			    struct tcp_md5sig_key *key, u8 tclass,
1071 			    __be32 label, u32 priority)
1072 {
1073 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1074 			     tclass, label, priority);
1075 }
1076 
1077 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1078 {
1079 	struct inet_timewait_sock *tw = inet_twsk(sk);
1080 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1081 
1082 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1083 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1084 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1085 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1086 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1087 
1088 	inet_twsk_put(tw);
1089 }
1090 
1091 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1092 				  struct request_sock *req)
1093 {
1094 	int l3index;
1095 
1096 	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1097 
1098 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1099 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1100 	 */
1101 	/* RFC 7323 2.3
1102 	 * The window field (SEG.WND) of every outgoing segment, with the
1103 	 * exception of <SYN> segments, MUST be right-shifted by
1104 	 * Rcv.Wind.Shift bits:
1105 	 */
1106 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1107 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1108 			tcp_rsk(req)->rcv_nxt,
1109 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1110 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1111 			req->ts_recent, sk->sk_bound_dev_if,
1112 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1113 			0, 0, sk->sk_priority);
1114 }
1115 
1116 
1117 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1118 {
1119 #ifdef CONFIG_SYN_COOKIES
1120 	const struct tcphdr *th = tcp_hdr(skb);
1121 
1122 	if (!th->syn)
1123 		sk = cookie_v6_check(sk, skb);
1124 #endif
1125 	return sk;
1126 }
1127 
1128 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1129 			 struct tcphdr *th, u32 *cookie)
1130 {
1131 	u16 mss = 0;
1132 #ifdef CONFIG_SYN_COOKIES
1133 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1134 				    &tcp_request_sock_ipv6_ops, sk, th);
1135 	if (mss) {
1136 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1137 		tcp_synq_overflow(sk);
1138 	}
1139 #endif
1140 	return mss;
1141 }
1142 
1143 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1144 {
1145 	if (skb->protocol == htons(ETH_P_IP))
1146 		return tcp_v4_conn_request(sk, skb);
1147 
1148 	if (!ipv6_unicast_destination(skb))
1149 		goto drop;
1150 
1151 	return tcp_conn_request(&tcp6_request_sock_ops,
1152 				&tcp_request_sock_ipv6_ops, sk, skb);
1153 
1154 drop:
1155 	tcp_listendrop(sk);
1156 	return 0; /* don't send reset */
1157 }
1158 
1159 static void tcp_v6_restore_cb(struct sk_buff *skb)
1160 {
1161 	/* We need to move header back to the beginning if xfrm6_policy_check()
1162 	 * and tcp_v6_fill_cb() are going to be called again.
1163 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1164 	 */
1165 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1166 		sizeof(struct inet6_skb_parm));
1167 }
1168 
1169 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1170 					 struct request_sock *req,
1171 					 struct dst_entry *dst,
1172 					 struct request_sock *req_unhash,
1173 					 bool *own_req)
1174 {
1175 	struct inet_request_sock *ireq;
1176 	struct ipv6_pinfo *newnp;
1177 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1178 	struct ipv6_txoptions *opt;
1179 	struct inet_sock *newinet;
1180 	struct tcp_sock *newtp;
1181 	struct sock *newsk;
1182 #ifdef CONFIG_TCP_MD5SIG
1183 	struct tcp_md5sig_key *key;
1184 	int l3index;
1185 #endif
1186 	struct flowi6 fl6;
1187 
1188 	if (skb->protocol == htons(ETH_P_IP)) {
1189 		/*
1190 		 *	v6 mapped
1191 		 */
1192 
1193 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1194 					     req_unhash, own_req);
1195 
1196 		if (!newsk)
1197 			return NULL;
1198 
1199 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1200 
1201 		newinet = inet_sk(newsk);
1202 		newnp = tcp_inet6_sk(newsk);
1203 		newtp = tcp_sk(newsk);
1204 
1205 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1206 
1207 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1208 
1209 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1210 		if (sk_is_mptcp(newsk))
1211 			mptcpv6_handle_mapped(newsk, true);
1212 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1213 #ifdef CONFIG_TCP_MD5SIG
1214 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1215 #endif
1216 
1217 		newnp->ipv6_mc_list = NULL;
1218 		newnp->ipv6_ac_list = NULL;
1219 		newnp->ipv6_fl_list = NULL;
1220 		newnp->pktoptions  = NULL;
1221 		newnp->opt	   = NULL;
1222 		newnp->mcast_oif   = inet_iif(skb);
1223 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1224 		newnp->rcv_flowinfo = 0;
1225 		if (np->repflow)
1226 			newnp->flow_label = 0;
1227 
1228 		/*
1229 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1230 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1231 		 * that function for the gory details. -acme
1232 		 */
1233 
1234 		/* It is tricky place. Until this moment IPv4 tcp
1235 		   worked with IPv6 icsk.icsk_af_ops.
1236 		   Sync it now.
1237 		 */
1238 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1239 
1240 		return newsk;
1241 	}
1242 
1243 	ireq = inet_rsk(req);
1244 
1245 	if (sk_acceptq_is_full(sk))
1246 		goto out_overflow;
1247 
1248 	if (!dst) {
1249 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1250 		if (!dst)
1251 			goto out;
1252 	}
1253 
1254 	newsk = tcp_create_openreq_child(sk, req, skb);
1255 	if (!newsk)
1256 		goto out_nonewsk;
1257 
1258 	/*
1259 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1260 	 * count here, tcp_create_openreq_child now does this for us, see the
1261 	 * comment in that function for the gory details. -acme
1262 	 */
1263 
1264 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1265 	ip6_dst_store(newsk, dst, NULL, NULL);
1266 	inet6_sk_rx_dst_set(newsk, skb);
1267 
1268 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1269 
1270 	newtp = tcp_sk(newsk);
1271 	newinet = inet_sk(newsk);
1272 	newnp = tcp_inet6_sk(newsk);
1273 
1274 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1275 
1276 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1277 	newnp->saddr = ireq->ir_v6_loc_addr;
1278 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1279 	newsk->sk_bound_dev_if = ireq->ir_iif;
1280 
1281 	/* Now IPv6 options...
1282 
1283 	   First: no IPv4 options.
1284 	 */
1285 	newinet->inet_opt = NULL;
1286 	newnp->ipv6_mc_list = NULL;
1287 	newnp->ipv6_ac_list = NULL;
1288 	newnp->ipv6_fl_list = NULL;
1289 
1290 	/* Clone RX bits */
1291 	newnp->rxopt.all = np->rxopt.all;
1292 
1293 	newnp->pktoptions = NULL;
1294 	newnp->opt	  = NULL;
1295 	newnp->mcast_oif  = tcp_v6_iif(skb);
1296 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1297 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1298 	if (np->repflow)
1299 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1300 
1301 	/* Clone native IPv6 options from listening socket (if any)
1302 
1303 	   Yes, keeping reference count would be much more clever,
1304 	   but we make one more one thing there: reattach optmem
1305 	   to newsk.
1306 	 */
1307 	opt = ireq->ipv6_opt;
1308 	if (!opt)
1309 		opt = rcu_dereference(np->opt);
1310 	if (opt) {
1311 		opt = ipv6_dup_options(newsk, opt);
1312 		RCU_INIT_POINTER(newnp->opt, opt);
1313 	}
1314 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1315 	if (opt)
1316 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1317 						    opt->opt_flen;
1318 
1319 	tcp_ca_openreq_child(newsk, dst);
1320 
1321 	tcp_sync_mss(newsk, dst_mtu(dst));
1322 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1323 
1324 	tcp_initialize_rcv_mss(newsk);
1325 
1326 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1327 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1328 
1329 #ifdef CONFIG_TCP_MD5SIG
1330 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1331 
1332 	/* Copy over the MD5 key from the original socket */
1333 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1334 	if (key) {
1335 		/* We're using one, so create a matching key
1336 		 * on the newsk structure. If we fail to get
1337 		 * memory, then we end up not copying the key
1338 		 * across. Shucks.
1339 		 */
1340 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1341 			       AF_INET6, 128, l3index, key->key, key->keylen,
1342 			       sk_gfp_mask(sk, GFP_ATOMIC));
1343 	}
1344 #endif
1345 
1346 	if (__inet_inherit_port(sk, newsk) < 0) {
1347 		inet_csk_prepare_forced_close(newsk);
1348 		tcp_done(newsk);
1349 		goto out;
1350 	}
1351 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1352 	if (*own_req) {
1353 		tcp_move_syn(newtp, req);
1354 
1355 		/* Clone pktoptions received with SYN, if we own the req */
1356 		if (ireq->pktopts) {
1357 			newnp->pktoptions = skb_clone(ireq->pktopts,
1358 						      sk_gfp_mask(sk, GFP_ATOMIC));
1359 			consume_skb(ireq->pktopts);
1360 			ireq->pktopts = NULL;
1361 			if (newnp->pktoptions) {
1362 				tcp_v6_restore_cb(newnp->pktoptions);
1363 				skb_set_owner_r(newnp->pktoptions, newsk);
1364 			}
1365 		}
1366 	}
1367 
1368 	return newsk;
1369 
1370 out_overflow:
1371 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1372 out_nonewsk:
1373 	dst_release(dst);
1374 out:
1375 	tcp_listendrop(sk);
1376 	return NULL;
1377 }
1378 
1379 /* The socket must have it's spinlock held when we get
1380  * here, unless it is a TCP_LISTEN socket.
1381  *
1382  * We have a potential double-lock case here, so even when
1383  * doing backlog processing we use the BH locking scheme.
1384  * This is because we cannot sleep with the original spinlock
1385  * held.
1386  */
1387 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1388 {
1389 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1390 	struct sk_buff *opt_skb = NULL;
1391 	struct tcp_sock *tp;
1392 
1393 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1394 	   goes to IPv4 receive handler and backlogged.
1395 	   From backlog it always goes here. Kerboom...
1396 	   Fortunately, tcp_rcv_established and rcv_established
1397 	   handle them correctly, but it is not case with
1398 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1399 	 */
1400 
1401 	if (skb->protocol == htons(ETH_P_IP))
1402 		return tcp_v4_do_rcv(sk, skb);
1403 
1404 	/*
1405 	 *	socket locking is here for SMP purposes as backlog rcv
1406 	 *	is currently called with bh processing disabled.
1407 	 */
1408 
1409 	/* Do Stevens' IPV6_PKTOPTIONS.
1410 
1411 	   Yes, guys, it is the only place in our code, where we
1412 	   may make it not affecting IPv4.
1413 	   The rest of code is protocol independent,
1414 	   and I do not like idea to uglify IPv4.
1415 
1416 	   Actually, all the idea behind IPV6_PKTOPTIONS
1417 	   looks not very well thought. For now we latch
1418 	   options, received in the last packet, enqueued
1419 	   by tcp. Feel free to propose better solution.
1420 					       --ANK (980728)
1421 	 */
1422 	if (np->rxopt.all)
1423 		opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1424 
1425 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1426 		struct dst_entry *dst = sk->sk_rx_dst;
1427 
1428 		sock_rps_save_rxhash(sk, skb);
1429 		sk_mark_napi_id(sk, skb);
1430 		if (dst) {
1431 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1432 			    dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1433 				dst_release(dst);
1434 				sk->sk_rx_dst = NULL;
1435 			}
1436 		}
1437 
1438 		tcp_rcv_established(sk, skb);
1439 		if (opt_skb)
1440 			goto ipv6_pktoptions;
1441 		return 0;
1442 	}
1443 
1444 	if (tcp_checksum_complete(skb))
1445 		goto csum_err;
1446 
1447 	if (sk->sk_state == TCP_LISTEN) {
1448 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1449 
1450 		if (!nsk)
1451 			goto discard;
1452 
1453 		if (nsk != sk) {
1454 			if (tcp_child_process(sk, nsk, skb))
1455 				goto reset;
1456 			if (opt_skb)
1457 				__kfree_skb(opt_skb);
1458 			return 0;
1459 		}
1460 	} else
1461 		sock_rps_save_rxhash(sk, skb);
1462 
1463 	if (tcp_rcv_state_process(sk, skb))
1464 		goto reset;
1465 	if (opt_skb)
1466 		goto ipv6_pktoptions;
1467 	return 0;
1468 
1469 reset:
1470 	tcp_v6_send_reset(sk, skb);
1471 discard:
1472 	if (opt_skb)
1473 		__kfree_skb(opt_skb);
1474 	kfree_skb(skb);
1475 	return 0;
1476 csum_err:
1477 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1478 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1479 	goto discard;
1480 
1481 
1482 ipv6_pktoptions:
1483 	/* Do you ask, what is it?
1484 
1485 	   1. skb was enqueued by tcp.
1486 	   2. skb is added to tail of read queue, rather than out of order.
1487 	   3. socket is not in passive state.
1488 	   4. Finally, it really contains options, which user wants to receive.
1489 	 */
1490 	tp = tcp_sk(sk);
1491 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1492 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1493 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1494 			np->mcast_oif = tcp_v6_iif(opt_skb);
1495 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1496 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1497 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1498 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1499 		if (np->repflow)
1500 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1501 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1502 			skb_set_owner_r(opt_skb, sk);
1503 			tcp_v6_restore_cb(opt_skb);
1504 			opt_skb = xchg(&np->pktoptions, opt_skb);
1505 		} else {
1506 			__kfree_skb(opt_skb);
1507 			opt_skb = xchg(&np->pktoptions, NULL);
1508 		}
1509 	}
1510 
1511 	kfree_skb(opt_skb);
1512 	return 0;
1513 }
1514 
1515 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1516 			   const struct tcphdr *th)
1517 {
1518 	/* This is tricky: we move IP6CB at its correct location into
1519 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1520 	 * _decode_session6() uses IP6CB().
1521 	 * barrier() makes sure compiler won't play aliasing games.
1522 	 */
1523 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1524 		sizeof(struct inet6_skb_parm));
1525 	barrier();
1526 
1527 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1528 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1529 				    skb->len - th->doff*4);
1530 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1531 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1532 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1533 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1534 	TCP_SKB_CB(skb)->sacked = 0;
1535 	TCP_SKB_CB(skb)->has_rxtstamp =
1536 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1537 }
1538 
1539 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1540 {
1541 	struct sk_buff *skb_to_free;
1542 	int sdif = inet6_sdif(skb);
1543 	int dif = inet6_iif(skb);
1544 	const struct tcphdr *th;
1545 	const struct ipv6hdr *hdr;
1546 	bool refcounted;
1547 	struct sock *sk;
1548 	int ret;
1549 	struct net *net = dev_net(skb->dev);
1550 
1551 	if (skb->pkt_type != PACKET_HOST)
1552 		goto discard_it;
1553 
1554 	/*
1555 	 *	Count it even if it's bad.
1556 	 */
1557 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1558 
1559 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1560 		goto discard_it;
1561 
1562 	th = (const struct tcphdr *)skb->data;
1563 
1564 	if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1565 		goto bad_packet;
1566 	if (!pskb_may_pull(skb, th->doff*4))
1567 		goto discard_it;
1568 
1569 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1570 		goto csum_error;
1571 
1572 	th = (const struct tcphdr *)skb->data;
1573 	hdr = ipv6_hdr(skb);
1574 
1575 lookup:
1576 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1577 				th->source, th->dest, inet6_iif(skb), sdif,
1578 				&refcounted);
1579 	if (!sk)
1580 		goto no_tcp_socket;
1581 
1582 process:
1583 	if (sk->sk_state == TCP_TIME_WAIT)
1584 		goto do_time_wait;
1585 
1586 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1587 		struct request_sock *req = inet_reqsk(sk);
1588 		bool req_stolen = false;
1589 		struct sock *nsk;
1590 
1591 		sk = req->rsk_listener;
1592 		if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1593 			sk_drops_add(sk, skb);
1594 			reqsk_put(req);
1595 			goto discard_it;
1596 		}
1597 		if (tcp_checksum_complete(skb)) {
1598 			reqsk_put(req);
1599 			goto csum_error;
1600 		}
1601 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1602 			inet_csk_reqsk_queue_drop_and_put(sk, req);
1603 			goto lookup;
1604 		}
1605 		sock_hold(sk);
1606 		refcounted = true;
1607 		nsk = NULL;
1608 		if (!tcp_filter(sk, skb)) {
1609 			th = (const struct tcphdr *)skb->data;
1610 			hdr = ipv6_hdr(skb);
1611 			tcp_v6_fill_cb(skb, hdr, th);
1612 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1613 		}
1614 		if (!nsk) {
1615 			reqsk_put(req);
1616 			if (req_stolen) {
1617 				/* Another cpu got exclusive access to req
1618 				 * and created a full blown socket.
1619 				 * Try to feed this packet to this socket
1620 				 * instead of discarding it.
1621 				 */
1622 				tcp_v6_restore_cb(skb);
1623 				sock_put(sk);
1624 				goto lookup;
1625 			}
1626 			goto discard_and_relse;
1627 		}
1628 		if (nsk == sk) {
1629 			reqsk_put(req);
1630 			tcp_v6_restore_cb(skb);
1631 		} else if (tcp_child_process(sk, nsk, skb)) {
1632 			tcp_v6_send_reset(nsk, skb);
1633 			goto discard_and_relse;
1634 		} else {
1635 			sock_put(sk);
1636 			return 0;
1637 		}
1638 	}
1639 	if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1640 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1641 		goto discard_and_relse;
1642 	}
1643 
1644 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1645 		goto discard_and_relse;
1646 
1647 	if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1648 		goto discard_and_relse;
1649 
1650 	if (tcp_filter(sk, skb))
1651 		goto discard_and_relse;
1652 	th = (const struct tcphdr *)skb->data;
1653 	hdr = ipv6_hdr(skb);
1654 	tcp_v6_fill_cb(skb, hdr, th);
1655 
1656 	skb->dev = NULL;
1657 
1658 	if (sk->sk_state == TCP_LISTEN) {
1659 		ret = tcp_v6_do_rcv(sk, skb);
1660 		goto put_and_return;
1661 	}
1662 
1663 	sk_incoming_cpu_update(sk);
1664 
1665 	bh_lock_sock_nested(sk);
1666 	tcp_segs_in(tcp_sk(sk), skb);
1667 	ret = 0;
1668 	if (!sock_owned_by_user(sk)) {
1669 		skb_to_free = sk->sk_rx_skb_cache;
1670 		sk->sk_rx_skb_cache = NULL;
1671 		ret = tcp_v6_do_rcv(sk, skb);
1672 	} else {
1673 		if (tcp_add_backlog(sk, skb))
1674 			goto discard_and_relse;
1675 		skb_to_free = NULL;
1676 	}
1677 	bh_unlock_sock(sk);
1678 	if (skb_to_free)
1679 		__kfree_skb(skb_to_free);
1680 put_and_return:
1681 	if (refcounted)
1682 		sock_put(sk);
1683 	return ret ? -1 : 0;
1684 
1685 no_tcp_socket:
1686 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1687 		goto discard_it;
1688 
1689 	tcp_v6_fill_cb(skb, hdr, th);
1690 
1691 	if (tcp_checksum_complete(skb)) {
1692 csum_error:
1693 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1694 bad_packet:
1695 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1696 	} else {
1697 		tcp_v6_send_reset(NULL, skb);
1698 	}
1699 
1700 discard_it:
1701 	kfree_skb(skb);
1702 	return 0;
1703 
1704 discard_and_relse:
1705 	sk_drops_add(sk, skb);
1706 	if (refcounted)
1707 		sock_put(sk);
1708 	goto discard_it;
1709 
1710 do_time_wait:
1711 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1712 		inet_twsk_put(inet_twsk(sk));
1713 		goto discard_it;
1714 	}
1715 
1716 	tcp_v6_fill_cb(skb, hdr, th);
1717 
1718 	if (tcp_checksum_complete(skb)) {
1719 		inet_twsk_put(inet_twsk(sk));
1720 		goto csum_error;
1721 	}
1722 
1723 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1724 	case TCP_TW_SYN:
1725 	{
1726 		struct sock *sk2;
1727 
1728 		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1729 					    skb, __tcp_hdrlen(th),
1730 					    &ipv6_hdr(skb)->saddr, th->source,
1731 					    &ipv6_hdr(skb)->daddr,
1732 					    ntohs(th->dest),
1733 					    tcp_v6_iif_l3_slave(skb),
1734 					    sdif);
1735 		if (sk2) {
1736 			struct inet_timewait_sock *tw = inet_twsk(sk);
1737 			inet_twsk_deschedule_put(tw);
1738 			sk = sk2;
1739 			tcp_v6_restore_cb(skb);
1740 			refcounted = false;
1741 			goto process;
1742 		}
1743 	}
1744 		/* to ACK */
1745 		/* fall through */
1746 	case TCP_TW_ACK:
1747 		tcp_v6_timewait_ack(sk, skb);
1748 		break;
1749 	case TCP_TW_RST:
1750 		tcp_v6_send_reset(sk, skb);
1751 		inet_twsk_deschedule_put(inet_twsk(sk));
1752 		goto discard_it;
1753 	case TCP_TW_SUCCESS:
1754 		;
1755 	}
1756 	goto discard_it;
1757 }
1758 
1759 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1760 {
1761 	const struct ipv6hdr *hdr;
1762 	const struct tcphdr *th;
1763 	struct sock *sk;
1764 
1765 	if (skb->pkt_type != PACKET_HOST)
1766 		return;
1767 
1768 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1769 		return;
1770 
1771 	hdr = ipv6_hdr(skb);
1772 	th = tcp_hdr(skb);
1773 
1774 	if (th->doff < sizeof(struct tcphdr) / 4)
1775 		return;
1776 
1777 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1778 	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1779 					&hdr->saddr, th->source,
1780 					&hdr->daddr, ntohs(th->dest),
1781 					inet6_iif(skb), inet6_sdif(skb));
1782 	if (sk) {
1783 		skb->sk = sk;
1784 		skb->destructor = sock_edemux;
1785 		if (sk_fullsock(sk)) {
1786 			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1787 
1788 			if (dst)
1789 				dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1790 			if (dst &&
1791 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1792 				skb_dst_set_noref(skb, dst);
1793 		}
1794 	}
1795 }
1796 
1797 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1798 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1799 	.twsk_unique	= tcp_twsk_unique,
1800 	.twsk_destructor = tcp_twsk_destructor,
1801 };
1802 
1803 const struct inet_connection_sock_af_ops ipv6_specific = {
1804 	.queue_xmit	   = inet6_csk_xmit,
1805 	.send_check	   = tcp_v6_send_check,
1806 	.rebuild_header	   = inet6_sk_rebuild_header,
1807 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1808 	.conn_request	   = tcp_v6_conn_request,
1809 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1810 	.net_header_len	   = sizeof(struct ipv6hdr),
1811 	.net_frag_header_len = sizeof(struct frag_hdr),
1812 	.setsockopt	   = ipv6_setsockopt,
1813 	.getsockopt	   = ipv6_getsockopt,
1814 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1815 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1816 #ifdef CONFIG_COMPAT
1817 	.compat_setsockopt = compat_ipv6_setsockopt,
1818 	.compat_getsockopt = compat_ipv6_getsockopt,
1819 #endif
1820 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1821 };
1822 
1823 #ifdef CONFIG_TCP_MD5SIG
1824 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1825 	.md5_lookup	=	tcp_v6_md5_lookup,
1826 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1827 	.md5_parse	=	tcp_v6_parse_md5_keys,
1828 };
1829 #endif
1830 
1831 /*
1832  *	TCP over IPv4 via INET6 API
1833  */
1834 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1835 	.queue_xmit	   = ip_queue_xmit,
1836 	.send_check	   = tcp_v4_send_check,
1837 	.rebuild_header	   = inet_sk_rebuild_header,
1838 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1839 	.conn_request	   = tcp_v6_conn_request,
1840 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1841 	.net_header_len	   = sizeof(struct iphdr),
1842 	.setsockopt	   = ipv6_setsockopt,
1843 	.getsockopt	   = ipv6_getsockopt,
1844 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1845 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1846 #ifdef CONFIG_COMPAT
1847 	.compat_setsockopt = compat_ipv6_setsockopt,
1848 	.compat_getsockopt = compat_ipv6_getsockopt,
1849 #endif
1850 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1851 };
1852 
1853 #ifdef CONFIG_TCP_MD5SIG
1854 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1855 	.md5_lookup	=	tcp_v4_md5_lookup,
1856 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1857 	.md5_parse	=	tcp_v6_parse_md5_keys,
1858 };
1859 #endif
1860 
1861 /* NOTE: A lot of things set to zero explicitly by call to
1862  *       sk_alloc() so need not be done here.
1863  */
1864 static int tcp_v6_init_sock(struct sock *sk)
1865 {
1866 	struct inet_connection_sock *icsk = inet_csk(sk);
1867 
1868 	tcp_init_sock(sk);
1869 
1870 	icsk->icsk_af_ops = &ipv6_specific;
1871 
1872 #ifdef CONFIG_TCP_MD5SIG
1873 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1874 #endif
1875 
1876 	return 0;
1877 }
1878 
1879 static void tcp_v6_destroy_sock(struct sock *sk)
1880 {
1881 	tcp_v4_destroy_sock(sk);
1882 	inet6_destroy_sock(sk);
1883 }
1884 
1885 #ifdef CONFIG_PROC_FS
1886 /* Proc filesystem TCPv6 sock list dumping. */
1887 static void get_openreq6(struct seq_file *seq,
1888 			 const struct request_sock *req, int i)
1889 {
1890 	long ttd = req->rsk_timer.expires - jiffies;
1891 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1892 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1893 
1894 	if (ttd < 0)
1895 		ttd = 0;
1896 
1897 	seq_printf(seq,
1898 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1899 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1900 		   i,
1901 		   src->s6_addr32[0], src->s6_addr32[1],
1902 		   src->s6_addr32[2], src->s6_addr32[3],
1903 		   inet_rsk(req)->ir_num,
1904 		   dest->s6_addr32[0], dest->s6_addr32[1],
1905 		   dest->s6_addr32[2], dest->s6_addr32[3],
1906 		   ntohs(inet_rsk(req)->ir_rmt_port),
1907 		   TCP_SYN_RECV,
1908 		   0, 0, /* could print option size, but that is af dependent. */
1909 		   1,   /* timers active (only the expire timer) */
1910 		   jiffies_to_clock_t(ttd),
1911 		   req->num_timeout,
1912 		   from_kuid_munged(seq_user_ns(seq),
1913 				    sock_i_uid(req->rsk_listener)),
1914 		   0,  /* non standard timer */
1915 		   0, /* open_requests have no inode */
1916 		   0, req);
1917 }
1918 
1919 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1920 {
1921 	const struct in6_addr *dest, *src;
1922 	__u16 destp, srcp;
1923 	int timer_active;
1924 	unsigned long timer_expires;
1925 	const struct inet_sock *inet = inet_sk(sp);
1926 	const struct tcp_sock *tp = tcp_sk(sp);
1927 	const struct inet_connection_sock *icsk = inet_csk(sp);
1928 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1929 	int rx_queue;
1930 	int state;
1931 
1932 	dest  = &sp->sk_v6_daddr;
1933 	src   = &sp->sk_v6_rcv_saddr;
1934 	destp = ntohs(inet->inet_dport);
1935 	srcp  = ntohs(inet->inet_sport);
1936 
1937 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1938 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1939 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1940 		timer_active	= 1;
1941 		timer_expires	= icsk->icsk_timeout;
1942 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1943 		timer_active	= 4;
1944 		timer_expires	= icsk->icsk_timeout;
1945 	} else if (timer_pending(&sp->sk_timer)) {
1946 		timer_active	= 2;
1947 		timer_expires	= sp->sk_timer.expires;
1948 	} else {
1949 		timer_active	= 0;
1950 		timer_expires = jiffies;
1951 	}
1952 
1953 	state = inet_sk_state_load(sp);
1954 	if (state == TCP_LISTEN)
1955 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
1956 	else
1957 		/* Because we don't lock the socket,
1958 		 * we might find a transient negative value.
1959 		 */
1960 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1961 				      READ_ONCE(tp->copied_seq), 0);
1962 
1963 	seq_printf(seq,
1964 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1965 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1966 		   i,
1967 		   src->s6_addr32[0], src->s6_addr32[1],
1968 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
1969 		   dest->s6_addr32[0], dest->s6_addr32[1],
1970 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
1971 		   state,
1972 		   READ_ONCE(tp->write_seq) - tp->snd_una,
1973 		   rx_queue,
1974 		   timer_active,
1975 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
1976 		   icsk->icsk_retransmits,
1977 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1978 		   icsk->icsk_probes_out,
1979 		   sock_i_ino(sp),
1980 		   refcount_read(&sp->sk_refcnt), sp,
1981 		   jiffies_to_clock_t(icsk->icsk_rto),
1982 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
1983 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
1984 		   tp->snd_cwnd,
1985 		   state == TCP_LISTEN ?
1986 			fastopenq->max_qlen :
1987 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1988 		   );
1989 }
1990 
1991 static void get_timewait6_sock(struct seq_file *seq,
1992 			       struct inet_timewait_sock *tw, int i)
1993 {
1994 	long delta = tw->tw_timer.expires - jiffies;
1995 	const struct in6_addr *dest, *src;
1996 	__u16 destp, srcp;
1997 
1998 	dest = &tw->tw_v6_daddr;
1999 	src  = &tw->tw_v6_rcv_saddr;
2000 	destp = ntohs(tw->tw_dport);
2001 	srcp  = ntohs(tw->tw_sport);
2002 
2003 	seq_printf(seq,
2004 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2005 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2006 		   i,
2007 		   src->s6_addr32[0], src->s6_addr32[1],
2008 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2009 		   dest->s6_addr32[0], dest->s6_addr32[1],
2010 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2011 		   tw->tw_substate, 0, 0,
2012 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2013 		   refcount_read(&tw->tw_refcnt), tw);
2014 }
2015 
2016 static int tcp6_seq_show(struct seq_file *seq, void *v)
2017 {
2018 	struct tcp_iter_state *st;
2019 	struct sock *sk = v;
2020 
2021 	if (v == SEQ_START_TOKEN) {
2022 		seq_puts(seq,
2023 			 "  sl  "
2024 			 "local_address                         "
2025 			 "remote_address                        "
2026 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2027 			 "   uid  timeout inode\n");
2028 		goto out;
2029 	}
2030 	st = seq->private;
2031 
2032 	if (sk->sk_state == TCP_TIME_WAIT)
2033 		get_timewait6_sock(seq, v, st->num);
2034 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2035 		get_openreq6(seq, v, st->num);
2036 	else
2037 		get_tcp6_sock(seq, v, st->num);
2038 out:
2039 	return 0;
2040 }
2041 
2042 static const struct seq_operations tcp6_seq_ops = {
2043 	.show		= tcp6_seq_show,
2044 	.start		= tcp_seq_start,
2045 	.next		= tcp_seq_next,
2046 	.stop		= tcp_seq_stop,
2047 };
2048 
2049 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2050 	.family		= AF_INET6,
2051 };
2052 
2053 int __net_init tcp6_proc_init(struct net *net)
2054 {
2055 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2056 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2057 		return -ENOMEM;
2058 	return 0;
2059 }
2060 
2061 void tcp6_proc_exit(struct net *net)
2062 {
2063 	remove_proc_entry("tcp6", net->proc_net);
2064 }
2065 #endif
2066 
2067 struct proto tcpv6_prot = {
2068 	.name			= "TCPv6",
2069 	.owner			= THIS_MODULE,
2070 	.close			= tcp_close,
2071 	.pre_connect		= tcp_v6_pre_connect,
2072 	.connect		= tcp_v6_connect,
2073 	.disconnect		= tcp_disconnect,
2074 	.accept			= inet_csk_accept,
2075 	.ioctl			= tcp_ioctl,
2076 	.init			= tcp_v6_init_sock,
2077 	.destroy		= tcp_v6_destroy_sock,
2078 	.shutdown		= tcp_shutdown,
2079 	.setsockopt		= tcp_setsockopt,
2080 	.getsockopt		= tcp_getsockopt,
2081 	.keepalive		= tcp_set_keepalive,
2082 	.recvmsg		= tcp_recvmsg,
2083 	.sendmsg		= tcp_sendmsg,
2084 	.sendpage		= tcp_sendpage,
2085 	.backlog_rcv		= tcp_v6_do_rcv,
2086 	.release_cb		= tcp_release_cb,
2087 	.hash			= inet6_hash,
2088 	.unhash			= inet_unhash,
2089 	.get_port		= inet_csk_get_port,
2090 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2091 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2092 	.stream_memory_free	= tcp_stream_memory_free,
2093 	.sockets_allocated	= &tcp_sockets_allocated,
2094 	.memory_allocated	= &tcp_memory_allocated,
2095 	.memory_pressure	= &tcp_memory_pressure,
2096 	.orphan_count		= &tcp_orphan_count,
2097 	.sysctl_mem		= sysctl_tcp_mem,
2098 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2099 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2100 	.max_header		= MAX_TCP_HEADER,
2101 	.obj_size		= sizeof(struct tcp6_sock),
2102 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2103 	.twsk_prot		= &tcp6_timewait_sock_ops,
2104 	.rsk_prot		= &tcp6_request_sock_ops,
2105 	.h.hashinfo		= &tcp_hashinfo,
2106 	.no_autobind		= true,
2107 #ifdef CONFIG_COMPAT
2108 	.compat_setsockopt	= compat_tcp_setsockopt,
2109 	.compat_getsockopt	= compat_tcp_getsockopt,
2110 #endif
2111 	.diag_destroy		= tcp_abort,
2112 };
2113 
2114 /* thinking of making this const? Don't.
2115  * early_demux can change based on sysctl.
2116  */
2117 static struct inet6_protocol tcpv6_protocol = {
2118 	.early_demux	=	tcp_v6_early_demux,
2119 	.early_demux_handler =  tcp_v6_early_demux,
2120 	.handler	=	tcp_v6_rcv,
2121 	.err_handler	=	tcp_v6_err,
2122 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2123 };
2124 
2125 static struct inet_protosw tcpv6_protosw = {
2126 	.type		=	SOCK_STREAM,
2127 	.protocol	=	IPPROTO_TCP,
2128 	.prot		=	&tcpv6_prot,
2129 	.ops		=	&inet6_stream_ops,
2130 	.flags		=	INET_PROTOSW_PERMANENT |
2131 				INET_PROTOSW_ICSK,
2132 };
2133 
2134 static int __net_init tcpv6_net_init(struct net *net)
2135 {
2136 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2137 				    SOCK_RAW, IPPROTO_TCP, net);
2138 }
2139 
2140 static void __net_exit tcpv6_net_exit(struct net *net)
2141 {
2142 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2143 }
2144 
2145 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2146 {
2147 	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2148 }
2149 
2150 static struct pernet_operations tcpv6_net_ops = {
2151 	.init	    = tcpv6_net_init,
2152 	.exit	    = tcpv6_net_exit,
2153 	.exit_batch = tcpv6_net_exit_batch,
2154 };
2155 
2156 int __init tcpv6_init(void)
2157 {
2158 	int ret;
2159 
2160 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2161 	if (ret)
2162 		goto out;
2163 
2164 	/* register inet6 protocol */
2165 	ret = inet6_register_protosw(&tcpv6_protosw);
2166 	if (ret)
2167 		goto out_tcpv6_protocol;
2168 
2169 	ret = register_pernet_subsys(&tcpv6_net_ops);
2170 	if (ret)
2171 		goto out_tcpv6_protosw;
2172 
2173 	ret = mptcpv6_init();
2174 	if (ret)
2175 		goto out_tcpv6_pernet_subsys;
2176 
2177 out:
2178 	return ret;
2179 
2180 out_tcpv6_pernet_subsys:
2181 	unregister_pernet_subsys(&tcpv6_net_ops);
2182 out_tcpv6_protosw:
2183 	inet6_unregister_protosw(&tcpv6_protosw);
2184 out_tcpv6_protocol:
2185 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2186 	goto out;
2187 }
2188 
2189 void tcpv6_exit(void)
2190 {
2191 	unregister_pernet_subsys(&tcpv6_net_ops);
2192 	inet6_unregister_protosw(&tcpv6_protosw);
2193 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2194 }
2195