xref: /linux/net/ipv6/tcp_ipv6.c (revision ba2290b1b7505b28912092a0976e071a447ee18c)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 
71 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 				      struct request_sock *req);
74 
75 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 						   const struct in6_addr *addr,
85 						   int l3index)
86 {
87 	return NULL;
88 }
89 #endif
90 
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98 	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99 
100 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102 
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105 	struct dst_entry *dst = skb_dst(skb);
106 
107 	if (dst && dst_hold_safe(dst)) {
108 		const struct rt6_info *rt = (const struct rt6_info *)dst;
109 
110 		sk->sk_rx_dst = dst;
111 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112 		tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113 	}
114 }
115 
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 				ipv6_hdr(skb)->saddr.s6_addr32,
120 				tcp_hdr(skb)->dest,
121 				tcp_hdr(skb)->source);
122 }
123 
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 				   ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129 
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131 			      int addr_len)
132 {
133 	/* This check is replicated from tcp_v6_connect() and intended to
134 	 * prevent BPF program called below from accessing bytes that are out
135 	 * of the bound specified by user in addr_len.
136 	 */
137 	if (addr_len < SIN6_LEN_RFC2133)
138 		return -EINVAL;
139 
140 	sock_owned_by_me(sk);
141 
142 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144 
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146 			  int addr_len)
147 {
148 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 	struct inet_sock *inet = inet_sk(sk);
150 	struct inet_connection_sock *icsk = inet_csk(sk);
151 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152 	struct tcp_sock *tp = tcp_sk(sk);
153 	struct in6_addr *saddr = NULL, *final_p, final;
154 	struct ipv6_txoptions *opt;
155 	struct flowi6 fl6;
156 	struct dst_entry *dst;
157 	int addr_type;
158 	int err;
159 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160 
161 	if (addr_len < SIN6_LEN_RFC2133)
162 		return -EINVAL;
163 
164 	if (usin->sin6_family != AF_INET6)
165 		return -EAFNOSUPPORT;
166 
167 	memset(&fl6, 0, sizeof(fl6));
168 
169 	if (np->sndflow) {
170 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 		IP6_ECN_flow_init(fl6.flowlabel);
172 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 			struct ip6_flowlabel *flowlabel;
174 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 			if (IS_ERR(flowlabel))
176 				return -EINVAL;
177 			fl6_sock_release(flowlabel);
178 		}
179 	}
180 
181 	/*
182 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
183 	 */
184 
185 	if (ipv6_addr_any(&usin->sin6_addr)) {
186 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188 					       &usin->sin6_addr);
189 		else
190 			usin->sin6_addr = in6addr_loopback;
191 	}
192 
193 	addr_type = ipv6_addr_type(&usin->sin6_addr);
194 
195 	if (addr_type & IPV6_ADDR_MULTICAST)
196 		return -ENETUNREACH;
197 
198 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 		if (addr_len >= sizeof(struct sockaddr_in6) &&
200 		    usin->sin6_scope_id) {
201 			/* If interface is set while binding, indices
202 			 * must coincide.
203 			 */
204 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205 				return -EINVAL;
206 
207 			sk->sk_bound_dev_if = usin->sin6_scope_id;
208 		}
209 
210 		/* Connect to link-local address requires an interface */
211 		if (!sk->sk_bound_dev_if)
212 			return -EINVAL;
213 	}
214 
215 	if (tp->rx_opt.ts_recent_stamp &&
216 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 		tp->rx_opt.ts_recent = 0;
218 		tp->rx_opt.ts_recent_stamp = 0;
219 		WRITE_ONCE(tp->write_seq, 0);
220 	}
221 
222 	sk->sk_v6_daddr = usin->sin6_addr;
223 	np->flow_label = fl6.flowlabel;
224 
225 	/*
226 	 *	TCP over IPv4
227 	 */
228 
229 	if (addr_type & IPV6_ADDR_MAPPED) {
230 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 		struct sockaddr_in sin;
232 
233 		if (__ipv6_only_sock(sk))
234 			return -ENETUNREACH;
235 
236 		sin.sin_family = AF_INET;
237 		sin.sin_port = usin->sin6_port;
238 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239 
240 		icsk->icsk_af_ops = &ipv6_mapped;
241 		if (sk_is_mptcp(sk))
242 			mptcpv6_handle_mapped(sk, true);
243 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247 
248 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249 
250 		if (err) {
251 			icsk->icsk_ext_hdr_len = exthdrlen;
252 			icsk->icsk_af_ops = &ipv6_specific;
253 			if (sk_is_mptcp(sk))
254 				mptcpv6_handle_mapped(sk, false);
255 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257 			tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259 			goto failure;
260 		}
261 		np->saddr = sk->sk_v6_rcv_saddr;
262 
263 		return err;
264 	}
265 
266 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267 		saddr = &sk->sk_v6_rcv_saddr;
268 
269 	fl6.flowi6_proto = IPPROTO_TCP;
270 	fl6.daddr = sk->sk_v6_daddr;
271 	fl6.saddr = saddr ? *saddr : np->saddr;
272 	fl6.flowi6_oif = sk->sk_bound_dev_if;
273 	fl6.flowi6_mark = sk->sk_mark;
274 	fl6.fl6_dport = usin->sin6_port;
275 	fl6.fl6_sport = inet->inet_sport;
276 	fl6.flowi6_uid = sk->sk_uid;
277 
278 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279 	final_p = fl6_update_dst(&fl6, opt, &final);
280 
281 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
282 
283 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284 	if (IS_ERR(dst)) {
285 		err = PTR_ERR(dst);
286 		goto failure;
287 	}
288 
289 	if (!saddr) {
290 		saddr = &fl6.saddr;
291 		sk->sk_v6_rcv_saddr = *saddr;
292 	}
293 
294 	/* set the source address */
295 	np->saddr = *saddr;
296 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297 
298 	sk->sk_gso_type = SKB_GSO_TCPV6;
299 	ip6_dst_store(sk, dst, NULL, NULL);
300 
301 	icsk->icsk_ext_hdr_len = 0;
302 	if (opt)
303 		icsk->icsk_ext_hdr_len = opt->opt_flen +
304 					 opt->opt_nflen;
305 
306 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307 
308 	inet->inet_dport = usin->sin6_port;
309 
310 	tcp_set_state(sk, TCP_SYN_SENT);
311 	err = inet6_hash_connect(tcp_death_row, sk);
312 	if (err)
313 		goto late_failure;
314 
315 	sk_set_txhash(sk);
316 
317 	if (likely(!tp->repair)) {
318 		if (!tp->write_seq)
319 			WRITE_ONCE(tp->write_seq,
320 				   secure_tcpv6_seq(np->saddr.s6_addr32,
321 						    sk->sk_v6_daddr.s6_addr32,
322 						    inet->inet_sport,
323 						    inet->inet_dport));
324 		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
325 						   np->saddr.s6_addr32,
326 						   sk->sk_v6_daddr.s6_addr32);
327 	}
328 
329 	if (tcp_fastopen_defer_connect(sk, &err))
330 		return err;
331 	if (err)
332 		goto late_failure;
333 
334 	err = tcp_connect(sk);
335 	if (err)
336 		goto late_failure;
337 
338 	return 0;
339 
340 late_failure:
341 	tcp_set_state(sk, TCP_CLOSE);
342 failure:
343 	inet->inet_dport = 0;
344 	sk->sk_route_caps = 0;
345 	return err;
346 }
347 
348 static void tcp_v6_mtu_reduced(struct sock *sk)
349 {
350 	struct dst_entry *dst;
351 
352 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
353 		return;
354 
355 	dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
356 	if (!dst)
357 		return;
358 
359 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
360 		tcp_sync_mss(sk, dst_mtu(dst));
361 		tcp_simple_retransmit(sk);
362 	}
363 }
364 
365 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
366 		u8 type, u8 code, int offset, __be32 info)
367 {
368 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
369 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
370 	struct net *net = dev_net(skb->dev);
371 	struct request_sock *fastopen;
372 	struct ipv6_pinfo *np;
373 	struct tcp_sock *tp;
374 	__u32 seq, snd_una;
375 	struct sock *sk;
376 	bool fatal;
377 	int err;
378 
379 	sk = __inet6_lookup_established(net, &tcp_hashinfo,
380 					&hdr->daddr, th->dest,
381 					&hdr->saddr, ntohs(th->source),
382 					skb->dev->ifindex, inet6_sdif(skb));
383 
384 	if (!sk) {
385 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
386 				  ICMP6_MIB_INERRORS);
387 		return -ENOENT;
388 	}
389 
390 	if (sk->sk_state == TCP_TIME_WAIT) {
391 		inet_twsk_put(inet_twsk(sk));
392 		return 0;
393 	}
394 	seq = ntohl(th->seq);
395 	fatal = icmpv6_err_convert(type, code, &err);
396 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
397 		tcp_req_err(sk, seq, fatal);
398 		return 0;
399 	}
400 
401 	bh_lock_sock(sk);
402 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
403 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
404 
405 	if (sk->sk_state == TCP_CLOSE)
406 		goto out;
407 
408 	if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
409 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
410 		goto out;
411 	}
412 
413 	tp = tcp_sk(sk);
414 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
415 	fastopen = rcu_dereference(tp->fastopen_rsk);
416 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
417 	if (sk->sk_state != TCP_LISTEN &&
418 	    !between(seq, snd_una, tp->snd_nxt)) {
419 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
420 		goto out;
421 	}
422 
423 	np = tcp_inet6_sk(sk);
424 
425 	if (type == NDISC_REDIRECT) {
426 		if (!sock_owned_by_user(sk)) {
427 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
428 
429 			if (dst)
430 				dst->ops->redirect(dst, sk, skb);
431 		}
432 		goto out;
433 	}
434 
435 	if (type == ICMPV6_PKT_TOOBIG) {
436 		/* We are not interested in TCP_LISTEN and open_requests
437 		 * (SYN-ACKs send out by Linux are always <576bytes so
438 		 * they should go through unfragmented).
439 		 */
440 		if (sk->sk_state == TCP_LISTEN)
441 			goto out;
442 
443 		if (!ip6_sk_accept_pmtu(sk))
444 			goto out;
445 
446 		tp->mtu_info = ntohl(info);
447 		if (!sock_owned_by_user(sk))
448 			tcp_v6_mtu_reduced(sk);
449 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
450 					   &sk->sk_tsq_flags))
451 			sock_hold(sk);
452 		goto out;
453 	}
454 
455 
456 	/* Might be for an request_sock */
457 	switch (sk->sk_state) {
458 	case TCP_SYN_SENT:
459 	case TCP_SYN_RECV:
460 		/* Only in fast or simultaneous open. If a fast open socket is
461 		 * already accepted it is treated as a connected one below.
462 		 */
463 		if (fastopen && !fastopen->sk)
464 			break;
465 
466 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
467 
468 		if (!sock_owned_by_user(sk)) {
469 			sk->sk_err = err;
470 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
471 
472 			tcp_done(sk);
473 		} else
474 			sk->sk_err_soft = err;
475 		goto out;
476 	case TCP_LISTEN:
477 		break;
478 	default:
479 		/* check if this ICMP message allows revert of backoff.
480 		 * (see RFC 6069)
481 		 */
482 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
483 		    code == ICMPV6_NOROUTE)
484 			tcp_ld_RTO_revert(sk, seq);
485 	}
486 
487 	if (!sock_owned_by_user(sk) && np->recverr) {
488 		sk->sk_err = err;
489 		sk->sk_error_report(sk);
490 	} else
491 		sk->sk_err_soft = err;
492 
493 out:
494 	bh_unlock_sock(sk);
495 	sock_put(sk);
496 	return 0;
497 }
498 
499 
500 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
501 			      struct flowi *fl,
502 			      struct request_sock *req,
503 			      struct tcp_fastopen_cookie *foc,
504 			      enum tcp_synack_type synack_type,
505 			      struct sk_buff *syn_skb)
506 {
507 	struct inet_request_sock *ireq = inet_rsk(req);
508 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
509 	struct ipv6_txoptions *opt;
510 	struct flowi6 *fl6 = &fl->u.ip6;
511 	struct sk_buff *skb;
512 	int err = -ENOMEM;
513 	u8 tclass;
514 
515 	/* First, grab a route. */
516 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
517 					       IPPROTO_TCP)) == NULL)
518 		goto done;
519 
520 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
521 
522 	if (skb) {
523 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
524 				    &ireq->ir_v6_rmt_addr);
525 
526 		fl6->daddr = ireq->ir_v6_rmt_addr;
527 		if (np->repflow && ireq->pktopts)
528 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
529 
530 		rcu_read_lock();
531 		opt = ireq->ipv6_opt;
532 		tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
533 				tcp_rsk(req)->syn_tos : np->tclass;
534 		if (!opt)
535 			opt = rcu_dereference(np->opt);
536 		err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt,
537 			       tclass & ~INET_ECN_MASK,
538 			       sk->sk_priority);
539 		rcu_read_unlock();
540 		err = net_xmit_eval(err);
541 	}
542 
543 done:
544 	return err;
545 }
546 
547 
548 static void tcp_v6_reqsk_destructor(struct request_sock *req)
549 {
550 	kfree(inet_rsk(req)->ipv6_opt);
551 	kfree_skb(inet_rsk(req)->pktopts);
552 }
553 
554 #ifdef CONFIG_TCP_MD5SIG
555 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
556 						   const struct in6_addr *addr,
557 						   int l3index)
558 {
559 	return tcp_md5_do_lookup(sk, l3index,
560 				 (union tcp_md5_addr *)addr, AF_INET6);
561 }
562 
563 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
564 						const struct sock *addr_sk)
565 {
566 	int l3index;
567 
568 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
569 						 addr_sk->sk_bound_dev_if);
570 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
571 				    l3index);
572 }
573 
574 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
575 				 sockptr_t optval, int optlen)
576 {
577 	struct tcp_md5sig cmd;
578 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
579 	int l3index = 0;
580 	u8 prefixlen;
581 
582 	if (optlen < sizeof(cmd))
583 		return -EINVAL;
584 
585 	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
586 		return -EFAULT;
587 
588 	if (sin6->sin6_family != AF_INET6)
589 		return -EINVAL;
590 
591 	if (optname == TCP_MD5SIG_EXT &&
592 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
593 		prefixlen = cmd.tcpm_prefixlen;
594 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
595 					prefixlen > 32))
596 			return -EINVAL;
597 	} else {
598 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
599 	}
600 
601 	if (optname == TCP_MD5SIG_EXT &&
602 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
603 		struct net_device *dev;
604 
605 		rcu_read_lock();
606 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
607 		if (dev && netif_is_l3_master(dev))
608 			l3index = dev->ifindex;
609 		rcu_read_unlock();
610 
611 		/* ok to reference set/not set outside of rcu;
612 		 * right now device MUST be an L3 master
613 		 */
614 		if (!dev || !l3index)
615 			return -EINVAL;
616 	}
617 
618 	if (!cmd.tcpm_keylen) {
619 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
620 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
621 					      AF_INET, prefixlen,
622 					      l3index);
623 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
624 				      AF_INET6, prefixlen, l3index);
625 	}
626 
627 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
628 		return -EINVAL;
629 
630 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
631 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
632 				      AF_INET, prefixlen, l3index,
633 				      cmd.tcpm_key, cmd.tcpm_keylen,
634 				      GFP_KERNEL);
635 
636 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
637 			      AF_INET6, prefixlen, l3index,
638 			      cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
639 }
640 
641 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
642 				   const struct in6_addr *daddr,
643 				   const struct in6_addr *saddr,
644 				   const struct tcphdr *th, int nbytes)
645 {
646 	struct tcp6_pseudohdr *bp;
647 	struct scatterlist sg;
648 	struct tcphdr *_th;
649 
650 	bp = hp->scratch;
651 	/* 1. TCP pseudo-header (RFC2460) */
652 	bp->saddr = *saddr;
653 	bp->daddr = *daddr;
654 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
655 	bp->len = cpu_to_be32(nbytes);
656 
657 	_th = (struct tcphdr *)(bp + 1);
658 	memcpy(_th, th, sizeof(*th));
659 	_th->check = 0;
660 
661 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
662 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
663 				sizeof(*bp) + sizeof(*th));
664 	return crypto_ahash_update(hp->md5_req);
665 }
666 
667 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
668 			       const struct in6_addr *daddr, struct in6_addr *saddr,
669 			       const struct tcphdr *th)
670 {
671 	struct tcp_md5sig_pool *hp;
672 	struct ahash_request *req;
673 
674 	hp = tcp_get_md5sig_pool();
675 	if (!hp)
676 		goto clear_hash_noput;
677 	req = hp->md5_req;
678 
679 	if (crypto_ahash_init(req))
680 		goto clear_hash;
681 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
682 		goto clear_hash;
683 	if (tcp_md5_hash_key(hp, key))
684 		goto clear_hash;
685 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
686 	if (crypto_ahash_final(req))
687 		goto clear_hash;
688 
689 	tcp_put_md5sig_pool();
690 	return 0;
691 
692 clear_hash:
693 	tcp_put_md5sig_pool();
694 clear_hash_noput:
695 	memset(md5_hash, 0, 16);
696 	return 1;
697 }
698 
699 static int tcp_v6_md5_hash_skb(char *md5_hash,
700 			       const struct tcp_md5sig_key *key,
701 			       const struct sock *sk,
702 			       const struct sk_buff *skb)
703 {
704 	const struct in6_addr *saddr, *daddr;
705 	struct tcp_md5sig_pool *hp;
706 	struct ahash_request *req;
707 	const struct tcphdr *th = tcp_hdr(skb);
708 
709 	if (sk) { /* valid for establish/request sockets */
710 		saddr = &sk->sk_v6_rcv_saddr;
711 		daddr = &sk->sk_v6_daddr;
712 	} else {
713 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
714 		saddr = &ip6h->saddr;
715 		daddr = &ip6h->daddr;
716 	}
717 
718 	hp = tcp_get_md5sig_pool();
719 	if (!hp)
720 		goto clear_hash_noput;
721 	req = hp->md5_req;
722 
723 	if (crypto_ahash_init(req))
724 		goto clear_hash;
725 
726 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
727 		goto clear_hash;
728 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
729 		goto clear_hash;
730 	if (tcp_md5_hash_key(hp, key))
731 		goto clear_hash;
732 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
733 	if (crypto_ahash_final(req))
734 		goto clear_hash;
735 
736 	tcp_put_md5sig_pool();
737 	return 0;
738 
739 clear_hash:
740 	tcp_put_md5sig_pool();
741 clear_hash_noput:
742 	memset(md5_hash, 0, 16);
743 	return 1;
744 }
745 
746 #endif
747 
748 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
749 				    const struct sk_buff *skb,
750 				    int dif, int sdif)
751 {
752 #ifdef CONFIG_TCP_MD5SIG
753 	const __u8 *hash_location = NULL;
754 	struct tcp_md5sig_key *hash_expected;
755 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
756 	const struct tcphdr *th = tcp_hdr(skb);
757 	int genhash, l3index;
758 	u8 newhash[16];
759 
760 	/* sdif set, means packet ingressed via a device
761 	 * in an L3 domain and dif is set to the l3mdev
762 	 */
763 	l3index = sdif ? dif : 0;
764 
765 	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
766 	hash_location = tcp_parse_md5sig_option(th);
767 
768 	/* We've parsed the options - do we have a hash? */
769 	if (!hash_expected && !hash_location)
770 		return false;
771 
772 	if (hash_expected && !hash_location) {
773 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
774 		return true;
775 	}
776 
777 	if (!hash_expected && hash_location) {
778 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
779 		return true;
780 	}
781 
782 	/* check the signature */
783 	genhash = tcp_v6_md5_hash_skb(newhash,
784 				      hash_expected,
785 				      NULL, skb);
786 
787 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
788 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
789 		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
790 				     genhash ? "failed" : "mismatch",
791 				     &ip6h->saddr, ntohs(th->source),
792 				     &ip6h->daddr, ntohs(th->dest), l3index);
793 		return true;
794 	}
795 #endif
796 	return false;
797 }
798 
799 static void tcp_v6_init_req(struct request_sock *req,
800 			    const struct sock *sk_listener,
801 			    struct sk_buff *skb)
802 {
803 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
804 	struct inet_request_sock *ireq = inet_rsk(req);
805 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
806 
807 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
808 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
809 
810 	/* So that link locals have meaning */
811 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
812 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
813 		ireq->ir_iif = tcp_v6_iif(skb);
814 
815 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
816 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
817 	     np->rxopt.bits.rxinfo ||
818 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
819 	     np->rxopt.bits.rxohlim || np->repflow)) {
820 		refcount_inc(&skb->users);
821 		ireq->pktopts = skb;
822 	}
823 }
824 
825 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
826 					  struct flowi *fl,
827 					  const struct request_sock *req)
828 {
829 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
830 }
831 
832 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
833 	.family		=	AF_INET6,
834 	.obj_size	=	sizeof(struct tcp6_request_sock),
835 	.rtx_syn_ack	=	tcp_rtx_synack,
836 	.send_ack	=	tcp_v6_reqsk_send_ack,
837 	.destructor	=	tcp_v6_reqsk_destructor,
838 	.send_reset	=	tcp_v6_send_reset,
839 	.syn_ack_timeout =	tcp_syn_ack_timeout,
840 };
841 
842 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
843 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
844 				sizeof(struct ipv6hdr),
845 #ifdef CONFIG_TCP_MD5SIG
846 	.req_md5_lookup	=	tcp_v6_md5_lookup,
847 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
848 #endif
849 	.init_req	=	tcp_v6_init_req,
850 #ifdef CONFIG_SYN_COOKIES
851 	.cookie_init_seq =	cookie_v6_init_sequence,
852 #endif
853 	.route_req	=	tcp_v6_route_req,
854 	.init_seq	=	tcp_v6_init_seq,
855 	.init_ts_off	=	tcp_v6_init_ts_off,
856 	.send_synack	=	tcp_v6_send_synack,
857 };
858 
859 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
860 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
861 				 int oif, struct tcp_md5sig_key *key, int rst,
862 				 u8 tclass, __be32 label, u32 priority)
863 {
864 	const struct tcphdr *th = tcp_hdr(skb);
865 	struct tcphdr *t1;
866 	struct sk_buff *buff;
867 	struct flowi6 fl6;
868 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
869 	struct sock *ctl_sk = net->ipv6.tcp_sk;
870 	unsigned int tot_len = sizeof(struct tcphdr);
871 	struct dst_entry *dst;
872 	__be32 *topt;
873 	__u32 mark = 0;
874 
875 	if (tsecr)
876 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
877 #ifdef CONFIG_TCP_MD5SIG
878 	if (key)
879 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
880 #endif
881 
882 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
883 			 GFP_ATOMIC);
884 	if (!buff)
885 		return;
886 
887 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
888 
889 	t1 = skb_push(buff, tot_len);
890 	skb_reset_transport_header(buff);
891 
892 	/* Swap the send and the receive. */
893 	memset(t1, 0, sizeof(*t1));
894 	t1->dest = th->source;
895 	t1->source = th->dest;
896 	t1->doff = tot_len / 4;
897 	t1->seq = htonl(seq);
898 	t1->ack_seq = htonl(ack);
899 	t1->ack = !rst || !th->ack;
900 	t1->rst = rst;
901 	t1->window = htons(win);
902 
903 	topt = (__be32 *)(t1 + 1);
904 
905 	if (tsecr) {
906 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
907 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
908 		*topt++ = htonl(tsval);
909 		*topt++ = htonl(tsecr);
910 	}
911 
912 #ifdef CONFIG_TCP_MD5SIG
913 	if (key) {
914 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
915 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
916 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
917 				    &ipv6_hdr(skb)->saddr,
918 				    &ipv6_hdr(skb)->daddr, t1);
919 	}
920 #endif
921 
922 	memset(&fl6, 0, sizeof(fl6));
923 	fl6.daddr = ipv6_hdr(skb)->saddr;
924 	fl6.saddr = ipv6_hdr(skb)->daddr;
925 	fl6.flowlabel = label;
926 
927 	buff->ip_summed = CHECKSUM_PARTIAL;
928 	buff->csum = 0;
929 
930 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
931 
932 	fl6.flowi6_proto = IPPROTO_TCP;
933 	if (rt6_need_strict(&fl6.daddr) && !oif)
934 		fl6.flowi6_oif = tcp_v6_iif(skb);
935 	else {
936 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
937 			oif = skb->skb_iif;
938 
939 		fl6.flowi6_oif = oif;
940 	}
941 
942 	if (sk) {
943 		if (sk->sk_state == TCP_TIME_WAIT) {
944 			mark = inet_twsk(sk)->tw_mark;
945 			/* autoflowlabel relies on buff->hash */
946 			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
947 				     PKT_HASH_TYPE_L4);
948 		} else {
949 			mark = sk->sk_mark;
950 		}
951 		buff->tstamp = tcp_transmit_time(sk);
952 	}
953 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
954 	fl6.fl6_dport = t1->dest;
955 	fl6.fl6_sport = t1->source;
956 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
957 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
958 
959 	/* Pass a socket to ip6_dst_lookup either it is for RST
960 	 * Underlying function will use this to retrieve the network
961 	 * namespace
962 	 */
963 	dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
964 	if (!IS_ERR(dst)) {
965 		skb_dst_set(buff, dst);
966 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
967 			 tclass & ~INET_ECN_MASK, priority);
968 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
969 		if (rst)
970 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
971 		return;
972 	}
973 
974 	kfree_skb(buff);
975 }
976 
977 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
978 {
979 	const struct tcphdr *th = tcp_hdr(skb);
980 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
981 	u32 seq = 0, ack_seq = 0;
982 	struct tcp_md5sig_key *key = NULL;
983 #ifdef CONFIG_TCP_MD5SIG
984 	const __u8 *hash_location = NULL;
985 	unsigned char newhash[16];
986 	int genhash;
987 	struct sock *sk1 = NULL;
988 #endif
989 	__be32 label = 0;
990 	u32 priority = 0;
991 	struct net *net;
992 	int oif = 0;
993 
994 	if (th->rst)
995 		return;
996 
997 	/* If sk not NULL, it means we did a successful lookup and incoming
998 	 * route had to be correct. prequeue might have dropped our dst.
999 	 */
1000 	if (!sk && !ipv6_unicast_destination(skb))
1001 		return;
1002 
1003 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1004 #ifdef CONFIG_TCP_MD5SIG
1005 	rcu_read_lock();
1006 	hash_location = tcp_parse_md5sig_option(th);
1007 	if (sk && sk_fullsock(sk)) {
1008 		int l3index;
1009 
1010 		/* sdif set, means packet ingressed via a device
1011 		 * in an L3 domain and inet_iif is set to it.
1012 		 */
1013 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1014 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1015 	} else if (hash_location) {
1016 		int dif = tcp_v6_iif_l3_slave(skb);
1017 		int sdif = tcp_v6_sdif(skb);
1018 		int l3index;
1019 
1020 		/*
1021 		 * active side is lost. Try to find listening socket through
1022 		 * source port, and then find md5 key through listening socket.
1023 		 * we are not loose security here:
1024 		 * Incoming packet is checked with md5 hash with finding key,
1025 		 * no RST generated if md5 hash doesn't match.
1026 		 */
1027 		sk1 = inet6_lookup_listener(net,
1028 					   &tcp_hashinfo, NULL, 0,
1029 					   &ipv6h->saddr,
1030 					   th->source, &ipv6h->daddr,
1031 					   ntohs(th->source), dif, sdif);
1032 		if (!sk1)
1033 			goto out;
1034 
1035 		/* sdif set, means packet ingressed via a device
1036 		 * in an L3 domain and dif is set to it.
1037 		 */
1038 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1039 
1040 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1041 		if (!key)
1042 			goto out;
1043 
1044 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1045 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1046 			goto out;
1047 	}
1048 #endif
1049 
1050 	if (th->ack)
1051 		seq = ntohl(th->ack_seq);
1052 	else
1053 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1054 			  (th->doff << 2);
1055 
1056 	if (sk) {
1057 		oif = sk->sk_bound_dev_if;
1058 		if (sk_fullsock(sk)) {
1059 			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1060 
1061 			trace_tcp_send_reset(sk, skb);
1062 			if (np->repflow)
1063 				label = ip6_flowlabel(ipv6h);
1064 			priority = sk->sk_priority;
1065 		}
1066 		if (sk->sk_state == TCP_TIME_WAIT) {
1067 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1068 			priority = inet_twsk(sk)->tw_priority;
1069 		}
1070 	} else {
1071 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1072 			label = ip6_flowlabel(ipv6h);
1073 	}
1074 
1075 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1076 			     ipv6_get_dsfield(ipv6h), label, priority);
1077 
1078 #ifdef CONFIG_TCP_MD5SIG
1079 out:
1080 	rcu_read_unlock();
1081 #endif
1082 }
1083 
1084 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1085 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1086 			    struct tcp_md5sig_key *key, u8 tclass,
1087 			    __be32 label, u32 priority)
1088 {
1089 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1090 			     tclass, label, priority);
1091 }
1092 
1093 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1094 {
1095 	struct inet_timewait_sock *tw = inet_twsk(sk);
1096 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1097 
1098 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1099 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1100 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1101 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1102 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1103 
1104 	inet_twsk_put(tw);
1105 }
1106 
1107 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1108 				  struct request_sock *req)
1109 {
1110 	int l3index;
1111 
1112 	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1113 
1114 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1115 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1116 	 */
1117 	/* RFC 7323 2.3
1118 	 * The window field (SEG.WND) of every outgoing segment, with the
1119 	 * exception of <SYN> segments, MUST be right-shifted by
1120 	 * Rcv.Wind.Shift bits:
1121 	 */
1122 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1123 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1124 			tcp_rsk(req)->rcv_nxt,
1125 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1126 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1127 			req->ts_recent, sk->sk_bound_dev_if,
1128 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1129 			ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1130 }
1131 
1132 
1133 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1134 {
1135 #ifdef CONFIG_SYN_COOKIES
1136 	const struct tcphdr *th = tcp_hdr(skb);
1137 
1138 	if (!th->syn)
1139 		sk = cookie_v6_check(sk, skb);
1140 #endif
1141 	return sk;
1142 }
1143 
1144 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1145 			 struct tcphdr *th, u32 *cookie)
1146 {
1147 	u16 mss = 0;
1148 #ifdef CONFIG_SYN_COOKIES
1149 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1150 				    &tcp_request_sock_ipv6_ops, sk, th);
1151 	if (mss) {
1152 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1153 		tcp_synq_overflow(sk);
1154 	}
1155 #endif
1156 	return mss;
1157 }
1158 
1159 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1160 {
1161 	if (skb->protocol == htons(ETH_P_IP))
1162 		return tcp_v4_conn_request(sk, skb);
1163 
1164 	if (!ipv6_unicast_destination(skb))
1165 		goto drop;
1166 
1167 	return tcp_conn_request(&tcp6_request_sock_ops,
1168 				&tcp_request_sock_ipv6_ops, sk, skb);
1169 
1170 drop:
1171 	tcp_listendrop(sk);
1172 	return 0; /* don't send reset */
1173 }
1174 
1175 static void tcp_v6_restore_cb(struct sk_buff *skb)
1176 {
1177 	/* We need to move header back to the beginning if xfrm6_policy_check()
1178 	 * and tcp_v6_fill_cb() are going to be called again.
1179 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1180 	 */
1181 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1182 		sizeof(struct inet6_skb_parm));
1183 }
1184 
1185 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1186 					 struct request_sock *req,
1187 					 struct dst_entry *dst,
1188 					 struct request_sock *req_unhash,
1189 					 bool *own_req)
1190 {
1191 	struct inet_request_sock *ireq;
1192 	struct ipv6_pinfo *newnp;
1193 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1194 	struct ipv6_txoptions *opt;
1195 	struct inet_sock *newinet;
1196 	struct tcp_sock *newtp;
1197 	struct sock *newsk;
1198 #ifdef CONFIG_TCP_MD5SIG
1199 	struct tcp_md5sig_key *key;
1200 	int l3index;
1201 #endif
1202 	struct flowi6 fl6;
1203 
1204 	if (skb->protocol == htons(ETH_P_IP)) {
1205 		/*
1206 		 *	v6 mapped
1207 		 */
1208 
1209 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1210 					     req_unhash, own_req);
1211 
1212 		if (!newsk)
1213 			return NULL;
1214 
1215 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1216 
1217 		newinet = inet_sk(newsk);
1218 		newnp = tcp_inet6_sk(newsk);
1219 		newtp = tcp_sk(newsk);
1220 
1221 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1222 
1223 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1224 
1225 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1226 		if (sk_is_mptcp(newsk))
1227 			mptcpv6_handle_mapped(newsk, true);
1228 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1229 #ifdef CONFIG_TCP_MD5SIG
1230 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1231 #endif
1232 
1233 		newnp->ipv6_mc_list = NULL;
1234 		newnp->ipv6_ac_list = NULL;
1235 		newnp->ipv6_fl_list = NULL;
1236 		newnp->pktoptions  = NULL;
1237 		newnp->opt	   = NULL;
1238 		newnp->mcast_oif   = inet_iif(skb);
1239 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1240 		newnp->rcv_flowinfo = 0;
1241 		if (np->repflow)
1242 			newnp->flow_label = 0;
1243 
1244 		/*
1245 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1246 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1247 		 * that function for the gory details. -acme
1248 		 */
1249 
1250 		/* It is tricky place. Until this moment IPv4 tcp
1251 		   worked with IPv6 icsk.icsk_af_ops.
1252 		   Sync it now.
1253 		 */
1254 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1255 
1256 		return newsk;
1257 	}
1258 
1259 	ireq = inet_rsk(req);
1260 
1261 	if (sk_acceptq_is_full(sk))
1262 		goto out_overflow;
1263 
1264 	if (!dst) {
1265 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1266 		if (!dst)
1267 			goto out;
1268 	}
1269 
1270 	newsk = tcp_create_openreq_child(sk, req, skb);
1271 	if (!newsk)
1272 		goto out_nonewsk;
1273 
1274 	/*
1275 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1276 	 * count here, tcp_create_openreq_child now does this for us, see the
1277 	 * comment in that function for the gory details. -acme
1278 	 */
1279 
1280 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1281 	ip6_dst_store(newsk, dst, NULL, NULL);
1282 	inet6_sk_rx_dst_set(newsk, skb);
1283 
1284 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1285 
1286 	newtp = tcp_sk(newsk);
1287 	newinet = inet_sk(newsk);
1288 	newnp = tcp_inet6_sk(newsk);
1289 
1290 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1291 
1292 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1293 	newnp->saddr = ireq->ir_v6_loc_addr;
1294 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1295 	newsk->sk_bound_dev_if = ireq->ir_iif;
1296 
1297 	/* Now IPv6 options...
1298 
1299 	   First: no IPv4 options.
1300 	 */
1301 	newinet->inet_opt = NULL;
1302 	newnp->ipv6_mc_list = NULL;
1303 	newnp->ipv6_ac_list = NULL;
1304 	newnp->ipv6_fl_list = NULL;
1305 
1306 	/* Clone RX bits */
1307 	newnp->rxopt.all = np->rxopt.all;
1308 
1309 	newnp->pktoptions = NULL;
1310 	newnp->opt	  = NULL;
1311 	newnp->mcast_oif  = tcp_v6_iif(skb);
1312 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1313 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1314 	if (np->repflow)
1315 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1316 
1317 	/* Set ToS of the new socket based upon the value of incoming SYN. */
1318 	if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
1319 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1320 
1321 	/* Clone native IPv6 options from listening socket (if any)
1322 
1323 	   Yes, keeping reference count would be much more clever,
1324 	   but we make one more one thing there: reattach optmem
1325 	   to newsk.
1326 	 */
1327 	opt = ireq->ipv6_opt;
1328 	if (!opt)
1329 		opt = rcu_dereference(np->opt);
1330 	if (opt) {
1331 		opt = ipv6_dup_options(newsk, opt);
1332 		RCU_INIT_POINTER(newnp->opt, opt);
1333 	}
1334 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1335 	if (opt)
1336 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1337 						    opt->opt_flen;
1338 
1339 	tcp_ca_openreq_child(newsk, dst);
1340 
1341 	tcp_sync_mss(newsk, dst_mtu(dst));
1342 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1343 
1344 	tcp_initialize_rcv_mss(newsk);
1345 
1346 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1347 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1348 
1349 #ifdef CONFIG_TCP_MD5SIG
1350 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1351 
1352 	/* Copy over the MD5 key from the original socket */
1353 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1354 	if (key) {
1355 		/* We're using one, so create a matching key
1356 		 * on the newsk structure. If we fail to get
1357 		 * memory, then we end up not copying the key
1358 		 * across. Shucks.
1359 		 */
1360 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1361 			       AF_INET6, 128, l3index, key->key, key->keylen,
1362 			       sk_gfp_mask(sk, GFP_ATOMIC));
1363 	}
1364 #endif
1365 
1366 	if (__inet_inherit_port(sk, newsk) < 0) {
1367 		inet_csk_prepare_forced_close(newsk);
1368 		tcp_done(newsk);
1369 		goto out;
1370 	}
1371 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1372 	if (*own_req) {
1373 		tcp_move_syn(newtp, req);
1374 
1375 		/* Clone pktoptions received with SYN, if we own the req */
1376 		if (ireq->pktopts) {
1377 			newnp->pktoptions = skb_clone(ireq->pktopts,
1378 						      sk_gfp_mask(sk, GFP_ATOMIC));
1379 			consume_skb(ireq->pktopts);
1380 			ireq->pktopts = NULL;
1381 			if (newnp->pktoptions) {
1382 				tcp_v6_restore_cb(newnp->pktoptions);
1383 				skb_set_owner_r(newnp->pktoptions, newsk);
1384 			}
1385 		}
1386 	}
1387 
1388 	return newsk;
1389 
1390 out_overflow:
1391 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1392 out_nonewsk:
1393 	dst_release(dst);
1394 out:
1395 	tcp_listendrop(sk);
1396 	return NULL;
1397 }
1398 
1399 /* The socket must have it's spinlock held when we get
1400  * here, unless it is a TCP_LISTEN socket.
1401  *
1402  * We have a potential double-lock case here, so even when
1403  * doing backlog processing we use the BH locking scheme.
1404  * This is because we cannot sleep with the original spinlock
1405  * held.
1406  */
1407 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1408 {
1409 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1410 	struct sk_buff *opt_skb = NULL;
1411 	struct tcp_sock *tp;
1412 
1413 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1414 	   goes to IPv4 receive handler and backlogged.
1415 	   From backlog it always goes here. Kerboom...
1416 	   Fortunately, tcp_rcv_established and rcv_established
1417 	   handle them correctly, but it is not case with
1418 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1419 	 */
1420 
1421 	if (skb->protocol == htons(ETH_P_IP))
1422 		return tcp_v4_do_rcv(sk, skb);
1423 
1424 	/*
1425 	 *	socket locking is here for SMP purposes as backlog rcv
1426 	 *	is currently called with bh processing disabled.
1427 	 */
1428 
1429 	/* Do Stevens' IPV6_PKTOPTIONS.
1430 
1431 	   Yes, guys, it is the only place in our code, where we
1432 	   may make it not affecting IPv4.
1433 	   The rest of code is protocol independent,
1434 	   and I do not like idea to uglify IPv4.
1435 
1436 	   Actually, all the idea behind IPV6_PKTOPTIONS
1437 	   looks not very well thought. For now we latch
1438 	   options, received in the last packet, enqueued
1439 	   by tcp. Feel free to propose better solution.
1440 					       --ANK (980728)
1441 	 */
1442 	if (np->rxopt.all)
1443 		opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1444 
1445 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1446 		struct dst_entry *dst = sk->sk_rx_dst;
1447 
1448 		sock_rps_save_rxhash(sk, skb);
1449 		sk_mark_napi_id(sk, skb);
1450 		if (dst) {
1451 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1452 			    dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1453 				dst_release(dst);
1454 				sk->sk_rx_dst = NULL;
1455 			}
1456 		}
1457 
1458 		tcp_rcv_established(sk, skb);
1459 		if (opt_skb)
1460 			goto ipv6_pktoptions;
1461 		return 0;
1462 	}
1463 
1464 	if (tcp_checksum_complete(skb))
1465 		goto csum_err;
1466 
1467 	if (sk->sk_state == TCP_LISTEN) {
1468 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1469 
1470 		if (!nsk)
1471 			goto discard;
1472 
1473 		if (nsk != sk) {
1474 			if (tcp_child_process(sk, nsk, skb))
1475 				goto reset;
1476 			if (opt_skb)
1477 				__kfree_skb(opt_skb);
1478 			return 0;
1479 		}
1480 	} else
1481 		sock_rps_save_rxhash(sk, skb);
1482 
1483 	if (tcp_rcv_state_process(sk, skb))
1484 		goto reset;
1485 	if (opt_skb)
1486 		goto ipv6_pktoptions;
1487 	return 0;
1488 
1489 reset:
1490 	tcp_v6_send_reset(sk, skb);
1491 discard:
1492 	if (opt_skb)
1493 		__kfree_skb(opt_skb);
1494 	kfree_skb(skb);
1495 	return 0;
1496 csum_err:
1497 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1498 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1499 	goto discard;
1500 
1501 
1502 ipv6_pktoptions:
1503 	/* Do you ask, what is it?
1504 
1505 	   1. skb was enqueued by tcp.
1506 	   2. skb is added to tail of read queue, rather than out of order.
1507 	   3. socket is not in passive state.
1508 	   4. Finally, it really contains options, which user wants to receive.
1509 	 */
1510 	tp = tcp_sk(sk);
1511 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1512 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1513 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1514 			np->mcast_oif = tcp_v6_iif(opt_skb);
1515 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1516 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1517 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1518 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1519 		if (np->repflow)
1520 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1521 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1522 			skb_set_owner_r(opt_skb, sk);
1523 			tcp_v6_restore_cb(opt_skb);
1524 			opt_skb = xchg(&np->pktoptions, opt_skb);
1525 		} else {
1526 			__kfree_skb(opt_skb);
1527 			opt_skb = xchg(&np->pktoptions, NULL);
1528 		}
1529 	}
1530 
1531 	kfree_skb(opt_skb);
1532 	return 0;
1533 }
1534 
1535 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1536 			   const struct tcphdr *th)
1537 {
1538 	/* This is tricky: we move IP6CB at its correct location into
1539 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1540 	 * _decode_session6() uses IP6CB().
1541 	 * barrier() makes sure compiler won't play aliasing games.
1542 	 */
1543 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1544 		sizeof(struct inet6_skb_parm));
1545 	barrier();
1546 
1547 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1548 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1549 				    skb->len - th->doff*4);
1550 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1551 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1552 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1553 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1554 	TCP_SKB_CB(skb)->sacked = 0;
1555 	TCP_SKB_CB(skb)->has_rxtstamp =
1556 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1557 }
1558 
1559 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1560 {
1561 	struct sk_buff *skb_to_free;
1562 	int sdif = inet6_sdif(skb);
1563 	int dif = inet6_iif(skb);
1564 	const struct tcphdr *th;
1565 	const struct ipv6hdr *hdr;
1566 	bool refcounted;
1567 	struct sock *sk;
1568 	int ret;
1569 	struct net *net = dev_net(skb->dev);
1570 
1571 	if (skb->pkt_type != PACKET_HOST)
1572 		goto discard_it;
1573 
1574 	/*
1575 	 *	Count it even if it's bad.
1576 	 */
1577 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1578 
1579 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1580 		goto discard_it;
1581 
1582 	th = (const struct tcphdr *)skb->data;
1583 
1584 	if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1585 		goto bad_packet;
1586 	if (!pskb_may_pull(skb, th->doff*4))
1587 		goto discard_it;
1588 
1589 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1590 		goto csum_error;
1591 
1592 	th = (const struct tcphdr *)skb->data;
1593 	hdr = ipv6_hdr(skb);
1594 
1595 lookup:
1596 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1597 				th->source, th->dest, inet6_iif(skb), sdif,
1598 				&refcounted);
1599 	if (!sk)
1600 		goto no_tcp_socket;
1601 
1602 process:
1603 	if (sk->sk_state == TCP_TIME_WAIT)
1604 		goto do_time_wait;
1605 
1606 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1607 		struct request_sock *req = inet_reqsk(sk);
1608 		bool req_stolen = false;
1609 		struct sock *nsk;
1610 
1611 		sk = req->rsk_listener;
1612 		if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1613 			sk_drops_add(sk, skb);
1614 			reqsk_put(req);
1615 			goto discard_it;
1616 		}
1617 		if (tcp_checksum_complete(skb)) {
1618 			reqsk_put(req);
1619 			goto csum_error;
1620 		}
1621 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1622 			inet_csk_reqsk_queue_drop_and_put(sk, req);
1623 			goto lookup;
1624 		}
1625 		sock_hold(sk);
1626 		refcounted = true;
1627 		nsk = NULL;
1628 		if (!tcp_filter(sk, skb)) {
1629 			th = (const struct tcphdr *)skb->data;
1630 			hdr = ipv6_hdr(skb);
1631 			tcp_v6_fill_cb(skb, hdr, th);
1632 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1633 		}
1634 		if (!nsk) {
1635 			reqsk_put(req);
1636 			if (req_stolen) {
1637 				/* Another cpu got exclusive access to req
1638 				 * and created a full blown socket.
1639 				 * Try to feed this packet to this socket
1640 				 * instead of discarding it.
1641 				 */
1642 				tcp_v6_restore_cb(skb);
1643 				sock_put(sk);
1644 				goto lookup;
1645 			}
1646 			goto discard_and_relse;
1647 		}
1648 		if (nsk == sk) {
1649 			reqsk_put(req);
1650 			tcp_v6_restore_cb(skb);
1651 		} else if (tcp_child_process(sk, nsk, skb)) {
1652 			tcp_v6_send_reset(nsk, skb);
1653 			goto discard_and_relse;
1654 		} else {
1655 			sock_put(sk);
1656 			return 0;
1657 		}
1658 	}
1659 	if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1660 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1661 		goto discard_and_relse;
1662 	}
1663 
1664 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1665 		goto discard_and_relse;
1666 
1667 	if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1668 		goto discard_and_relse;
1669 
1670 	if (tcp_filter(sk, skb))
1671 		goto discard_and_relse;
1672 	th = (const struct tcphdr *)skb->data;
1673 	hdr = ipv6_hdr(skb);
1674 	tcp_v6_fill_cb(skb, hdr, th);
1675 
1676 	skb->dev = NULL;
1677 
1678 	if (sk->sk_state == TCP_LISTEN) {
1679 		ret = tcp_v6_do_rcv(sk, skb);
1680 		goto put_and_return;
1681 	}
1682 
1683 	sk_incoming_cpu_update(sk);
1684 
1685 	bh_lock_sock_nested(sk);
1686 	tcp_segs_in(tcp_sk(sk), skb);
1687 	ret = 0;
1688 	if (!sock_owned_by_user(sk)) {
1689 		skb_to_free = sk->sk_rx_skb_cache;
1690 		sk->sk_rx_skb_cache = NULL;
1691 		ret = tcp_v6_do_rcv(sk, skb);
1692 	} else {
1693 		if (tcp_add_backlog(sk, skb))
1694 			goto discard_and_relse;
1695 		skb_to_free = NULL;
1696 	}
1697 	bh_unlock_sock(sk);
1698 	if (skb_to_free)
1699 		__kfree_skb(skb_to_free);
1700 put_and_return:
1701 	if (refcounted)
1702 		sock_put(sk);
1703 	return ret ? -1 : 0;
1704 
1705 no_tcp_socket:
1706 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1707 		goto discard_it;
1708 
1709 	tcp_v6_fill_cb(skb, hdr, th);
1710 
1711 	if (tcp_checksum_complete(skb)) {
1712 csum_error:
1713 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1714 bad_packet:
1715 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1716 	} else {
1717 		tcp_v6_send_reset(NULL, skb);
1718 	}
1719 
1720 discard_it:
1721 	kfree_skb(skb);
1722 	return 0;
1723 
1724 discard_and_relse:
1725 	sk_drops_add(sk, skb);
1726 	if (refcounted)
1727 		sock_put(sk);
1728 	goto discard_it;
1729 
1730 do_time_wait:
1731 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1732 		inet_twsk_put(inet_twsk(sk));
1733 		goto discard_it;
1734 	}
1735 
1736 	tcp_v6_fill_cb(skb, hdr, th);
1737 
1738 	if (tcp_checksum_complete(skb)) {
1739 		inet_twsk_put(inet_twsk(sk));
1740 		goto csum_error;
1741 	}
1742 
1743 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1744 	case TCP_TW_SYN:
1745 	{
1746 		struct sock *sk2;
1747 
1748 		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1749 					    skb, __tcp_hdrlen(th),
1750 					    &ipv6_hdr(skb)->saddr, th->source,
1751 					    &ipv6_hdr(skb)->daddr,
1752 					    ntohs(th->dest),
1753 					    tcp_v6_iif_l3_slave(skb),
1754 					    sdif);
1755 		if (sk2) {
1756 			struct inet_timewait_sock *tw = inet_twsk(sk);
1757 			inet_twsk_deschedule_put(tw);
1758 			sk = sk2;
1759 			tcp_v6_restore_cb(skb);
1760 			refcounted = false;
1761 			goto process;
1762 		}
1763 	}
1764 		/* to ACK */
1765 		fallthrough;
1766 	case TCP_TW_ACK:
1767 		tcp_v6_timewait_ack(sk, skb);
1768 		break;
1769 	case TCP_TW_RST:
1770 		tcp_v6_send_reset(sk, skb);
1771 		inet_twsk_deschedule_put(inet_twsk(sk));
1772 		goto discard_it;
1773 	case TCP_TW_SUCCESS:
1774 		;
1775 	}
1776 	goto discard_it;
1777 }
1778 
1779 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1780 {
1781 	const struct ipv6hdr *hdr;
1782 	const struct tcphdr *th;
1783 	struct sock *sk;
1784 
1785 	if (skb->pkt_type != PACKET_HOST)
1786 		return;
1787 
1788 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1789 		return;
1790 
1791 	hdr = ipv6_hdr(skb);
1792 	th = tcp_hdr(skb);
1793 
1794 	if (th->doff < sizeof(struct tcphdr) / 4)
1795 		return;
1796 
1797 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1798 	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1799 					&hdr->saddr, th->source,
1800 					&hdr->daddr, ntohs(th->dest),
1801 					inet6_iif(skb), inet6_sdif(skb));
1802 	if (sk) {
1803 		skb->sk = sk;
1804 		skb->destructor = sock_edemux;
1805 		if (sk_fullsock(sk)) {
1806 			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1807 
1808 			if (dst)
1809 				dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1810 			if (dst &&
1811 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1812 				skb_dst_set_noref(skb, dst);
1813 		}
1814 	}
1815 }
1816 
1817 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1818 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1819 	.twsk_unique	= tcp_twsk_unique,
1820 	.twsk_destructor = tcp_twsk_destructor,
1821 };
1822 
1823 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1824 {
1825 	struct ipv6_pinfo *np = inet6_sk(sk);
1826 
1827 	__tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1828 }
1829 
1830 const struct inet_connection_sock_af_ops ipv6_specific = {
1831 	.queue_xmit	   = inet6_csk_xmit,
1832 	.send_check	   = tcp_v6_send_check,
1833 	.rebuild_header	   = inet6_sk_rebuild_header,
1834 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1835 	.conn_request	   = tcp_v6_conn_request,
1836 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1837 	.net_header_len	   = sizeof(struct ipv6hdr),
1838 	.net_frag_header_len = sizeof(struct frag_hdr),
1839 	.setsockopt	   = ipv6_setsockopt,
1840 	.getsockopt	   = ipv6_getsockopt,
1841 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1842 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1843 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1844 };
1845 
1846 #ifdef CONFIG_TCP_MD5SIG
1847 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1848 	.md5_lookup	=	tcp_v6_md5_lookup,
1849 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1850 	.md5_parse	=	tcp_v6_parse_md5_keys,
1851 };
1852 #endif
1853 
1854 /*
1855  *	TCP over IPv4 via INET6 API
1856  */
1857 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1858 	.queue_xmit	   = ip_queue_xmit,
1859 	.send_check	   = tcp_v4_send_check,
1860 	.rebuild_header	   = inet_sk_rebuild_header,
1861 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1862 	.conn_request	   = tcp_v6_conn_request,
1863 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1864 	.net_header_len	   = sizeof(struct iphdr),
1865 	.setsockopt	   = ipv6_setsockopt,
1866 	.getsockopt	   = ipv6_getsockopt,
1867 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1868 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1869 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1870 };
1871 
1872 #ifdef CONFIG_TCP_MD5SIG
1873 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1874 	.md5_lookup	=	tcp_v4_md5_lookup,
1875 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1876 	.md5_parse	=	tcp_v6_parse_md5_keys,
1877 };
1878 #endif
1879 
1880 /* NOTE: A lot of things set to zero explicitly by call to
1881  *       sk_alloc() so need not be done here.
1882  */
1883 static int tcp_v6_init_sock(struct sock *sk)
1884 {
1885 	struct inet_connection_sock *icsk = inet_csk(sk);
1886 
1887 	tcp_init_sock(sk);
1888 
1889 	icsk->icsk_af_ops = &ipv6_specific;
1890 
1891 #ifdef CONFIG_TCP_MD5SIG
1892 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1893 #endif
1894 
1895 	return 0;
1896 }
1897 
1898 static void tcp_v6_destroy_sock(struct sock *sk)
1899 {
1900 	tcp_v4_destroy_sock(sk);
1901 	inet6_destroy_sock(sk);
1902 }
1903 
1904 #ifdef CONFIG_PROC_FS
1905 /* Proc filesystem TCPv6 sock list dumping. */
1906 static void get_openreq6(struct seq_file *seq,
1907 			 const struct request_sock *req, int i)
1908 {
1909 	long ttd = req->rsk_timer.expires - jiffies;
1910 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1911 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1912 
1913 	if (ttd < 0)
1914 		ttd = 0;
1915 
1916 	seq_printf(seq,
1917 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1918 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1919 		   i,
1920 		   src->s6_addr32[0], src->s6_addr32[1],
1921 		   src->s6_addr32[2], src->s6_addr32[3],
1922 		   inet_rsk(req)->ir_num,
1923 		   dest->s6_addr32[0], dest->s6_addr32[1],
1924 		   dest->s6_addr32[2], dest->s6_addr32[3],
1925 		   ntohs(inet_rsk(req)->ir_rmt_port),
1926 		   TCP_SYN_RECV,
1927 		   0, 0, /* could print option size, but that is af dependent. */
1928 		   1,   /* timers active (only the expire timer) */
1929 		   jiffies_to_clock_t(ttd),
1930 		   req->num_timeout,
1931 		   from_kuid_munged(seq_user_ns(seq),
1932 				    sock_i_uid(req->rsk_listener)),
1933 		   0,  /* non standard timer */
1934 		   0, /* open_requests have no inode */
1935 		   0, req);
1936 }
1937 
1938 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1939 {
1940 	const struct in6_addr *dest, *src;
1941 	__u16 destp, srcp;
1942 	int timer_active;
1943 	unsigned long timer_expires;
1944 	const struct inet_sock *inet = inet_sk(sp);
1945 	const struct tcp_sock *tp = tcp_sk(sp);
1946 	const struct inet_connection_sock *icsk = inet_csk(sp);
1947 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1948 	int rx_queue;
1949 	int state;
1950 
1951 	dest  = &sp->sk_v6_daddr;
1952 	src   = &sp->sk_v6_rcv_saddr;
1953 	destp = ntohs(inet->inet_dport);
1954 	srcp  = ntohs(inet->inet_sport);
1955 
1956 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1957 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1958 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1959 		timer_active	= 1;
1960 		timer_expires	= icsk->icsk_timeout;
1961 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1962 		timer_active	= 4;
1963 		timer_expires	= icsk->icsk_timeout;
1964 	} else if (timer_pending(&sp->sk_timer)) {
1965 		timer_active	= 2;
1966 		timer_expires	= sp->sk_timer.expires;
1967 	} else {
1968 		timer_active	= 0;
1969 		timer_expires = jiffies;
1970 	}
1971 
1972 	state = inet_sk_state_load(sp);
1973 	if (state == TCP_LISTEN)
1974 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
1975 	else
1976 		/* Because we don't lock the socket,
1977 		 * we might find a transient negative value.
1978 		 */
1979 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1980 				      READ_ONCE(tp->copied_seq), 0);
1981 
1982 	seq_printf(seq,
1983 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1984 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1985 		   i,
1986 		   src->s6_addr32[0], src->s6_addr32[1],
1987 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
1988 		   dest->s6_addr32[0], dest->s6_addr32[1],
1989 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
1990 		   state,
1991 		   READ_ONCE(tp->write_seq) - tp->snd_una,
1992 		   rx_queue,
1993 		   timer_active,
1994 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
1995 		   icsk->icsk_retransmits,
1996 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1997 		   icsk->icsk_probes_out,
1998 		   sock_i_ino(sp),
1999 		   refcount_read(&sp->sk_refcnt), sp,
2000 		   jiffies_to_clock_t(icsk->icsk_rto),
2001 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2002 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2003 		   tp->snd_cwnd,
2004 		   state == TCP_LISTEN ?
2005 			fastopenq->max_qlen :
2006 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2007 		   );
2008 }
2009 
2010 static void get_timewait6_sock(struct seq_file *seq,
2011 			       struct inet_timewait_sock *tw, int i)
2012 {
2013 	long delta = tw->tw_timer.expires - jiffies;
2014 	const struct in6_addr *dest, *src;
2015 	__u16 destp, srcp;
2016 
2017 	dest = &tw->tw_v6_daddr;
2018 	src  = &tw->tw_v6_rcv_saddr;
2019 	destp = ntohs(tw->tw_dport);
2020 	srcp  = ntohs(tw->tw_sport);
2021 
2022 	seq_printf(seq,
2023 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2024 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2025 		   i,
2026 		   src->s6_addr32[0], src->s6_addr32[1],
2027 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2028 		   dest->s6_addr32[0], dest->s6_addr32[1],
2029 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2030 		   tw->tw_substate, 0, 0,
2031 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2032 		   refcount_read(&tw->tw_refcnt), tw);
2033 }
2034 
2035 static int tcp6_seq_show(struct seq_file *seq, void *v)
2036 {
2037 	struct tcp_iter_state *st;
2038 	struct sock *sk = v;
2039 
2040 	if (v == SEQ_START_TOKEN) {
2041 		seq_puts(seq,
2042 			 "  sl  "
2043 			 "local_address                         "
2044 			 "remote_address                        "
2045 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2046 			 "   uid  timeout inode\n");
2047 		goto out;
2048 	}
2049 	st = seq->private;
2050 
2051 	if (sk->sk_state == TCP_TIME_WAIT)
2052 		get_timewait6_sock(seq, v, st->num);
2053 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2054 		get_openreq6(seq, v, st->num);
2055 	else
2056 		get_tcp6_sock(seq, v, st->num);
2057 out:
2058 	return 0;
2059 }
2060 
2061 static const struct seq_operations tcp6_seq_ops = {
2062 	.show		= tcp6_seq_show,
2063 	.start		= tcp_seq_start,
2064 	.next		= tcp_seq_next,
2065 	.stop		= tcp_seq_stop,
2066 };
2067 
2068 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2069 	.family		= AF_INET6,
2070 };
2071 
2072 int __net_init tcp6_proc_init(struct net *net)
2073 {
2074 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2075 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2076 		return -ENOMEM;
2077 	return 0;
2078 }
2079 
2080 void tcp6_proc_exit(struct net *net)
2081 {
2082 	remove_proc_entry("tcp6", net->proc_net);
2083 }
2084 #endif
2085 
2086 struct proto tcpv6_prot = {
2087 	.name			= "TCPv6",
2088 	.owner			= THIS_MODULE,
2089 	.close			= tcp_close,
2090 	.pre_connect		= tcp_v6_pre_connect,
2091 	.connect		= tcp_v6_connect,
2092 	.disconnect		= tcp_disconnect,
2093 	.accept			= inet_csk_accept,
2094 	.ioctl			= tcp_ioctl,
2095 	.init			= tcp_v6_init_sock,
2096 	.destroy		= tcp_v6_destroy_sock,
2097 	.shutdown		= tcp_shutdown,
2098 	.setsockopt		= tcp_setsockopt,
2099 	.getsockopt		= tcp_getsockopt,
2100 	.keepalive		= tcp_set_keepalive,
2101 	.recvmsg		= tcp_recvmsg,
2102 	.sendmsg		= tcp_sendmsg,
2103 	.sendpage		= tcp_sendpage,
2104 	.backlog_rcv		= tcp_v6_do_rcv,
2105 	.release_cb		= tcp_release_cb,
2106 	.hash			= inet6_hash,
2107 	.unhash			= inet_unhash,
2108 	.get_port		= inet_csk_get_port,
2109 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2110 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2111 	.stream_memory_free	= tcp_stream_memory_free,
2112 	.sockets_allocated	= &tcp_sockets_allocated,
2113 	.memory_allocated	= &tcp_memory_allocated,
2114 	.memory_pressure	= &tcp_memory_pressure,
2115 	.orphan_count		= &tcp_orphan_count,
2116 	.sysctl_mem		= sysctl_tcp_mem,
2117 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2118 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2119 	.max_header		= MAX_TCP_HEADER,
2120 	.obj_size		= sizeof(struct tcp6_sock),
2121 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2122 	.twsk_prot		= &tcp6_timewait_sock_ops,
2123 	.rsk_prot		= &tcp6_request_sock_ops,
2124 	.h.hashinfo		= &tcp_hashinfo,
2125 	.no_autobind		= true,
2126 	.diag_destroy		= tcp_abort,
2127 };
2128 EXPORT_SYMBOL_GPL(tcpv6_prot);
2129 
2130 /* thinking of making this const? Don't.
2131  * early_demux can change based on sysctl.
2132  */
2133 static struct inet6_protocol tcpv6_protocol = {
2134 	.early_demux	=	tcp_v6_early_demux,
2135 	.early_demux_handler =  tcp_v6_early_demux,
2136 	.handler	=	tcp_v6_rcv,
2137 	.err_handler	=	tcp_v6_err,
2138 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2139 };
2140 
2141 static struct inet_protosw tcpv6_protosw = {
2142 	.type		=	SOCK_STREAM,
2143 	.protocol	=	IPPROTO_TCP,
2144 	.prot		=	&tcpv6_prot,
2145 	.ops		=	&inet6_stream_ops,
2146 	.flags		=	INET_PROTOSW_PERMANENT |
2147 				INET_PROTOSW_ICSK,
2148 };
2149 
2150 static int __net_init tcpv6_net_init(struct net *net)
2151 {
2152 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2153 				    SOCK_RAW, IPPROTO_TCP, net);
2154 }
2155 
2156 static void __net_exit tcpv6_net_exit(struct net *net)
2157 {
2158 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2159 }
2160 
2161 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2162 {
2163 	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2164 }
2165 
2166 static struct pernet_operations tcpv6_net_ops = {
2167 	.init	    = tcpv6_net_init,
2168 	.exit	    = tcpv6_net_exit,
2169 	.exit_batch = tcpv6_net_exit_batch,
2170 };
2171 
2172 int __init tcpv6_init(void)
2173 {
2174 	int ret;
2175 
2176 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2177 	if (ret)
2178 		goto out;
2179 
2180 	/* register inet6 protocol */
2181 	ret = inet6_register_protosw(&tcpv6_protosw);
2182 	if (ret)
2183 		goto out_tcpv6_protocol;
2184 
2185 	ret = register_pernet_subsys(&tcpv6_net_ops);
2186 	if (ret)
2187 		goto out_tcpv6_protosw;
2188 
2189 	ret = mptcpv6_init();
2190 	if (ret)
2191 		goto out_tcpv6_pernet_subsys;
2192 
2193 out:
2194 	return ret;
2195 
2196 out_tcpv6_pernet_subsys:
2197 	unregister_pernet_subsys(&tcpv6_net_ops);
2198 out_tcpv6_protosw:
2199 	inet6_unregister_protosw(&tcpv6_protosw);
2200 out_tcpv6_protocol:
2201 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2202 	goto out;
2203 }
2204 
2205 void tcpv6_exit(void)
2206 {
2207 	unregister_pernet_subsys(&tcpv6_net_ops);
2208 	inet6_unregister_protosw(&tcpv6_protosw);
2209 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2210 }
2211