xref: /linux/net/ipv6/tcp_ipv6.c (revision 5027ec19f1049a07df5b0a37b1f462514cf2724b)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 
71 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 				      struct request_sock *req);
74 
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #endif
83 
84 /* Helper returning the inet6 address from a given tcp socket.
85  * It can be used in TCP stack instead of inet6_sk(sk).
86  * This avoids a dereference and allow compiler optimizations.
87  * It is a specialized version of inet6_sk_generic().
88  */
89 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
90 					      struct tcp6_sock, tcp)->inet6)
91 
92 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
93 {
94 	struct dst_entry *dst = skb_dst(skb);
95 
96 	if (dst && dst_hold_safe(dst)) {
97 		const struct rt6_info *rt = (const struct rt6_info *)dst;
98 
99 		rcu_assign_pointer(sk->sk_rx_dst, dst);
100 		sk->sk_rx_dst_ifindex = skb->skb_iif;
101 		sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
102 	}
103 }
104 
105 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
106 {
107 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
108 				ipv6_hdr(skb)->saddr.s6_addr32,
109 				tcp_hdr(skb)->dest,
110 				tcp_hdr(skb)->source);
111 }
112 
113 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
114 {
115 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
116 				   ipv6_hdr(skb)->saddr.s6_addr32);
117 }
118 
119 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
120 			      int addr_len)
121 {
122 	/* This check is replicated from tcp_v6_connect() and intended to
123 	 * prevent BPF program called below from accessing bytes that are out
124 	 * of the bound specified by user in addr_len.
125 	 */
126 	if (addr_len < SIN6_LEN_RFC2133)
127 		return -EINVAL;
128 
129 	sock_owned_by_me(sk);
130 
131 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
132 }
133 
134 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
135 			  int addr_len)
136 {
137 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
138 	struct inet_connection_sock *icsk = inet_csk(sk);
139 	struct in6_addr *saddr = NULL, *final_p, final;
140 	struct inet_timewait_death_row *tcp_death_row;
141 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
142 	struct inet_sock *inet = inet_sk(sk);
143 	struct tcp_sock *tp = tcp_sk(sk);
144 	struct net *net = sock_net(sk);
145 	struct ipv6_txoptions *opt;
146 	struct dst_entry *dst;
147 	struct flowi6 fl6;
148 	int addr_type;
149 	int err;
150 
151 	if (addr_len < SIN6_LEN_RFC2133)
152 		return -EINVAL;
153 
154 	if (usin->sin6_family != AF_INET6)
155 		return -EAFNOSUPPORT;
156 
157 	memset(&fl6, 0, sizeof(fl6));
158 
159 	if (inet6_test_bit(SNDFLOW, sk)) {
160 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
161 		IP6_ECN_flow_init(fl6.flowlabel);
162 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
163 			struct ip6_flowlabel *flowlabel;
164 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
165 			if (IS_ERR(flowlabel))
166 				return -EINVAL;
167 			fl6_sock_release(flowlabel);
168 		}
169 	}
170 
171 	/*
172 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
173 	 */
174 
175 	if (ipv6_addr_any(&usin->sin6_addr)) {
176 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
177 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
178 					       &usin->sin6_addr);
179 		else
180 			usin->sin6_addr = in6addr_loopback;
181 	}
182 
183 	addr_type = ipv6_addr_type(&usin->sin6_addr);
184 
185 	if (addr_type & IPV6_ADDR_MULTICAST)
186 		return -ENETUNREACH;
187 
188 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
189 		if (addr_len >= sizeof(struct sockaddr_in6) &&
190 		    usin->sin6_scope_id) {
191 			/* If interface is set while binding, indices
192 			 * must coincide.
193 			 */
194 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
195 				return -EINVAL;
196 
197 			sk->sk_bound_dev_if = usin->sin6_scope_id;
198 		}
199 
200 		/* Connect to link-local address requires an interface */
201 		if (!sk->sk_bound_dev_if)
202 			return -EINVAL;
203 	}
204 
205 	if (tp->rx_opt.ts_recent_stamp &&
206 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
207 		tp->rx_opt.ts_recent = 0;
208 		tp->rx_opt.ts_recent_stamp = 0;
209 		WRITE_ONCE(tp->write_seq, 0);
210 	}
211 
212 	sk->sk_v6_daddr = usin->sin6_addr;
213 	np->flow_label = fl6.flowlabel;
214 
215 	/*
216 	 *	TCP over IPv4
217 	 */
218 
219 	if (addr_type & IPV6_ADDR_MAPPED) {
220 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
221 		struct sockaddr_in sin;
222 
223 		if (ipv6_only_sock(sk))
224 			return -ENETUNREACH;
225 
226 		sin.sin_family = AF_INET;
227 		sin.sin_port = usin->sin6_port;
228 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
229 
230 		/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
231 		WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
232 		if (sk_is_mptcp(sk))
233 			mptcpv6_handle_mapped(sk, true);
234 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
235 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
236 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
237 #endif
238 
239 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
240 
241 		if (err) {
242 			icsk->icsk_ext_hdr_len = exthdrlen;
243 			/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
244 			WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
245 			if (sk_is_mptcp(sk))
246 				mptcpv6_handle_mapped(sk, false);
247 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
248 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
249 			tp->af_specific = &tcp_sock_ipv6_specific;
250 #endif
251 			goto failure;
252 		}
253 		np->saddr = sk->sk_v6_rcv_saddr;
254 
255 		return err;
256 	}
257 
258 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
259 		saddr = &sk->sk_v6_rcv_saddr;
260 
261 	fl6.flowi6_proto = IPPROTO_TCP;
262 	fl6.daddr = sk->sk_v6_daddr;
263 	fl6.saddr = saddr ? *saddr : np->saddr;
264 	fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
265 	fl6.flowi6_oif = sk->sk_bound_dev_if;
266 	fl6.flowi6_mark = sk->sk_mark;
267 	fl6.fl6_dport = usin->sin6_port;
268 	fl6.fl6_sport = inet->inet_sport;
269 	fl6.flowi6_uid = sk->sk_uid;
270 
271 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
272 	final_p = fl6_update_dst(&fl6, opt, &final);
273 
274 	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
275 
276 	dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
277 	if (IS_ERR(dst)) {
278 		err = PTR_ERR(dst);
279 		goto failure;
280 	}
281 
282 	tp->tcp_usec_ts = dst_tcp_usec_ts(dst);
283 	tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
284 
285 	if (!saddr) {
286 		saddr = &fl6.saddr;
287 
288 		err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
289 		if (err)
290 			goto failure;
291 	}
292 
293 	/* set the source address */
294 	np->saddr = *saddr;
295 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
296 
297 	sk->sk_gso_type = SKB_GSO_TCPV6;
298 	ip6_dst_store(sk, dst, NULL, NULL);
299 
300 	icsk->icsk_ext_hdr_len = 0;
301 	if (opt)
302 		icsk->icsk_ext_hdr_len = opt->opt_flen +
303 					 opt->opt_nflen;
304 
305 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
306 
307 	inet->inet_dport = usin->sin6_port;
308 
309 	tcp_set_state(sk, TCP_SYN_SENT);
310 	err = inet6_hash_connect(tcp_death_row, sk);
311 	if (err)
312 		goto late_failure;
313 
314 	sk_set_txhash(sk);
315 
316 	if (likely(!tp->repair)) {
317 		if (!tp->write_seq)
318 			WRITE_ONCE(tp->write_seq,
319 				   secure_tcpv6_seq(np->saddr.s6_addr32,
320 						    sk->sk_v6_daddr.s6_addr32,
321 						    inet->inet_sport,
322 						    inet->inet_dport));
323 		tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
324 						   sk->sk_v6_daddr.s6_addr32);
325 	}
326 
327 	if (tcp_fastopen_defer_connect(sk, &err))
328 		return err;
329 	if (err)
330 		goto late_failure;
331 
332 	err = tcp_connect(sk);
333 	if (err)
334 		goto late_failure;
335 
336 	return 0;
337 
338 late_failure:
339 	tcp_set_state(sk, TCP_CLOSE);
340 	inet_bhash2_reset_saddr(sk);
341 failure:
342 	inet->inet_dport = 0;
343 	sk->sk_route_caps = 0;
344 	return err;
345 }
346 
347 static void tcp_v6_mtu_reduced(struct sock *sk)
348 {
349 	struct dst_entry *dst;
350 	u32 mtu;
351 
352 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
353 		return;
354 
355 	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
356 
357 	/* Drop requests trying to increase our current mss.
358 	 * Check done in __ip6_rt_update_pmtu() is too late.
359 	 */
360 	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
361 		return;
362 
363 	dst = inet6_csk_update_pmtu(sk, mtu);
364 	if (!dst)
365 		return;
366 
367 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
368 		tcp_sync_mss(sk, dst_mtu(dst));
369 		tcp_simple_retransmit(sk);
370 	}
371 }
372 
373 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
374 		u8 type, u8 code, int offset, __be32 info)
375 {
376 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
377 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
378 	struct net *net = dev_net(skb->dev);
379 	struct request_sock *fastopen;
380 	struct ipv6_pinfo *np;
381 	struct tcp_sock *tp;
382 	__u32 seq, snd_una;
383 	struct sock *sk;
384 	bool harderr;
385 	int err;
386 
387 	sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
388 					&hdr->daddr, th->dest,
389 					&hdr->saddr, ntohs(th->source),
390 					skb->dev->ifindex, inet6_sdif(skb));
391 
392 	if (!sk) {
393 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
394 				  ICMP6_MIB_INERRORS);
395 		return -ENOENT;
396 	}
397 
398 	if (sk->sk_state == TCP_TIME_WAIT) {
399 		/* To increase the counter of ignored icmps for TCP-AO */
400 		tcp_ao_ignore_icmp(sk, AF_INET6, type, code);
401 		inet_twsk_put(inet_twsk(sk));
402 		return 0;
403 	}
404 	seq = ntohl(th->seq);
405 	harderr = icmpv6_err_convert(type, code, &err);
406 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
407 		tcp_req_err(sk, seq, harderr);
408 		return 0;
409 	}
410 
411 	if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) {
412 		sock_put(sk);
413 		return 0;
414 	}
415 
416 	bh_lock_sock(sk);
417 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
418 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
419 
420 	if (sk->sk_state == TCP_CLOSE)
421 		goto out;
422 
423 	if (static_branch_unlikely(&ip6_min_hopcount)) {
424 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
425 		if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
426 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
427 			goto out;
428 		}
429 	}
430 
431 	tp = tcp_sk(sk);
432 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
433 	fastopen = rcu_dereference(tp->fastopen_rsk);
434 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
435 	if (sk->sk_state != TCP_LISTEN &&
436 	    !between(seq, snd_una, tp->snd_nxt)) {
437 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
438 		goto out;
439 	}
440 
441 	np = tcp_inet6_sk(sk);
442 
443 	if (type == NDISC_REDIRECT) {
444 		if (!sock_owned_by_user(sk)) {
445 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
446 
447 			if (dst)
448 				dst->ops->redirect(dst, sk, skb);
449 		}
450 		goto out;
451 	}
452 
453 	if (type == ICMPV6_PKT_TOOBIG) {
454 		u32 mtu = ntohl(info);
455 
456 		/* We are not interested in TCP_LISTEN and open_requests
457 		 * (SYN-ACKs send out by Linux are always <576bytes so
458 		 * they should go through unfragmented).
459 		 */
460 		if (sk->sk_state == TCP_LISTEN)
461 			goto out;
462 
463 		if (!ip6_sk_accept_pmtu(sk))
464 			goto out;
465 
466 		if (mtu < IPV6_MIN_MTU)
467 			goto out;
468 
469 		WRITE_ONCE(tp->mtu_info, mtu);
470 
471 		if (!sock_owned_by_user(sk))
472 			tcp_v6_mtu_reduced(sk);
473 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
474 					   &sk->sk_tsq_flags))
475 			sock_hold(sk);
476 		goto out;
477 	}
478 
479 
480 	/* Might be for an request_sock */
481 	switch (sk->sk_state) {
482 	case TCP_SYN_SENT:
483 	case TCP_SYN_RECV:
484 		/* Only in fast or simultaneous open. If a fast open socket is
485 		 * already accepted it is treated as a connected one below.
486 		 */
487 		if (fastopen && !fastopen->sk)
488 			break;
489 
490 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
491 
492 		if (!harderr)
493 			break;
494 
495 		if (!sock_owned_by_user(sk)) {
496 			WRITE_ONCE(sk->sk_err, err);
497 			sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
498 
499 			tcp_done(sk);
500 		} else {
501 			WRITE_ONCE(sk->sk_err_soft, err);
502 		}
503 		goto out;
504 	case TCP_LISTEN:
505 		break;
506 	default:
507 		/* check if this ICMP message allows revert of backoff.
508 		 * (see RFC 6069)
509 		 */
510 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
511 		    code == ICMPV6_NOROUTE)
512 			tcp_ld_RTO_revert(sk, seq);
513 	}
514 
515 	if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
516 		WRITE_ONCE(sk->sk_err, err);
517 		sk_error_report(sk);
518 	} else {
519 		WRITE_ONCE(sk->sk_err_soft, err);
520 	}
521 out:
522 	bh_unlock_sock(sk);
523 	sock_put(sk);
524 	return 0;
525 }
526 
527 
528 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
529 			      struct flowi *fl,
530 			      struct request_sock *req,
531 			      struct tcp_fastopen_cookie *foc,
532 			      enum tcp_synack_type synack_type,
533 			      struct sk_buff *syn_skb)
534 {
535 	struct inet_request_sock *ireq = inet_rsk(req);
536 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
537 	struct ipv6_txoptions *opt;
538 	struct flowi6 *fl6 = &fl->u.ip6;
539 	struct sk_buff *skb;
540 	int err = -ENOMEM;
541 	u8 tclass;
542 
543 	/* First, grab a route. */
544 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
545 					       IPPROTO_TCP)) == NULL)
546 		goto done;
547 
548 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
549 
550 	if (skb) {
551 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
552 				    &ireq->ir_v6_rmt_addr);
553 
554 		fl6->daddr = ireq->ir_v6_rmt_addr;
555 		if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts)
556 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
557 
558 		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
559 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
560 				(np->tclass & INET_ECN_MASK) :
561 				np->tclass;
562 
563 		if (!INET_ECN_is_capable(tclass) &&
564 		    tcp_bpf_ca_needs_ecn((struct sock *)req))
565 			tclass |= INET_ECN_ECT_0;
566 
567 		rcu_read_lock();
568 		opt = ireq->ipv6_opt;
569 		if (!opt)
570 			opt = rcu_dereference(np->opt);
571 		err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
572 			       opt, tclass, READ_ONCE(sk->sk_priority));
573 		rcu_read_unlock();
574 		err = net_xmit_eval(err);
575 	}
576 
577 done:
578 	return err;
579 }
580 
581 
582 static void tcp_v6_reqsk_destructor(struct request_sock *req)
583 {
584 	kfree(inet_rsk(req)->ipv6_opt);
585 	consume_skb(inet_rsk(req)->pktopts);
586 }
587 
588 #ifdef CONFIG_TCP_MD5SIG
589 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
590 						   const struct in6_addr *addr,
591 						   int l3index)
592 {
593 	return tcp_md5_do_lookup(sk, l3index,
594 				 (union tcp_md5_addr *)addr, AF_INET6);
595 }
596 
597 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
598 						const struct sock *addr_sk)
599 {
600 	int l3index;
601 
602 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
603 						 addr_sk->sk_bound_dev_if);
604 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
605 				    l3index);
606 }
607 
608 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
609 				 sockptr_t optval, int optlen)
610 {
611 	struct tcp_md5sig cmd;
612 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
613 	union tcp_ao_addr *addr;
614 	int l3index = 0;
615 	u8 prefixlen;
616 	bool l3flag;
617 	u8 flags;
618 
619 	if (optlen < sizeof(cmd))
620 		return -EINVAL;
621 
622 	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
623 		return -EFAULT;
624 
625 	if (sin6->sin6_family != AF_INET6)
626 		return -EINVAL;
627 
628 	flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
629 	l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
630 
631 	if (optname == TCP_MD5SIG_EXT &&
632 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
633 		prefixlen = cmd.tcpm_prefixlen;
634 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
635 					prefixlen > 32))
636 			return -EINVAL;
637 	} else {
638 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
639 	}
640 
641 	if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
642 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
643 		struct net_device *dev;
644 
645 		rcu_read_lock();
646 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
647 		if (dev && netif_is_l3_master(dev))
648 			l3index = dev->ifindex;
649 		rcu_read_unlock();
650 
651 		/* ok to reference set/not set outside of rcu;
652 		 * right now device MUST be an L3 master
653 		 */
654 		if (!dev || !l3index)
655 			return -EINVAL;
656 	}
657 
658 	if (!cmd.tcpm_keylen) {
659 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
660 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
661 					      AF_INET, prefixlen,
662 					      l3index, flags);
663 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
664 				      AF_INET6, prefixlen, l3index, flags);
665 	}
666 
667 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
668 		return -EINVAL;
669 
670 	if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
671 		addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3];
672 
673 		/* Don't allow keys for peers that have a matching TCP-AO key.
674 		 * See the comment in tcp_ao_add_cmd()
675 		 */
676 		if (tcp_ao_required(sk, addr, AF_INET,
677 				    l3flag ? l3index : -1, false))
678 			return -EKEYREJECTED;
679 		return tcp_md5_do_add(sk, addr,
680 				      AF_INET, prefixlen, l3index, flags,
681 				      cmd.tcpm_key, cmd.tcpm_keylen);
682 	}
683 
684 	addr = (union tcp_md5_addr *)&sin6->sin6_addr;
685 
686 	/* Don't allow keys for peers that have a matching TCP-AO key.
687 	 * See the comment in tcp_ao_add_cmd()
688 	 */
689 	if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false))
690 		return -EKEYREJECTED;
691 
692 	return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags,
693 			      cmd.tcpm_key, cmd.tcpm_keylen);
694 }
695 
696 static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp,
697 				   const struct in6_addr *daddr,
698 				   const struct in6_addr *saddr,
699 				   const struct tcphdr *th, int nbytes)
700 {
701 	struct tcp6_pseudohdr *bp;
702 	struct scatterlist sg;
703 	struct tcphdr *_th;
704 
705 	bp = hp->scratch;
706 	/* 1. TCP pseudo-header (RFC2460) */
707 	bp->saddr = *saddr;
708 	bp->daddr = *daddr;
709 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
710 	bp->len = cpu_to_be32(nbytes);
711 
712 	_th = (struct tcphdr *)(bp + 1);
713 	memcpy(_th, th, sizeof(*th));
714 	_th->check = 0;
715 
716 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
717 	ahash_request_set_crypt(hp->req, &sg, NULL,
718 				sizeof(*bp) + sizeof(*th));
719 	return crypto_ahash_update(hp->req);
720 }
721 
722 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
723 			       const struct in6_addr *daddr, struct in6_addr *saddr,
724 			       const struct tcphdr *th)
725 {
726 	struct tcp_sigpool hp;
727 
728 	if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
729 		goto clear_hash_nostart;
730 
731 	if (crypto_ahash_init(hp.req))
732 		goto clear_hash;
733 	if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2))
734 		goto clear_hash;
735 	if (tcp_md5_hash_key(&hp, key))
736 		goto clear_hash;
737 	ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
738 	if (crypto_ahash_final(hp.req))
739 		goto clear_hash;
740 
741 	tcp_sigpool_end(&hp);
742 	return 0;
743 
744 clear_hash:
745 	tcp_sigpool_end(&hp);
746 clear_hash_nostart:
747 	memset(md5_hash, 0, 16);
748 	return 1;
749 }
750 
751 static int tcp_v6_md5_hash_skb(char *md5_hash,
752 			       const struct tcp_md5sig_key *key,
753 			       const struct sock *sk,
754 			       const struct sk_buff *skb)
755 {
756 	const struct tcphdr *th = tcp_hdr(skb);
757 	const struct in6_addr *saddr, *daddr;
758 	struct tcp_sigpool hp;
759 
760 	if (sk) { /* valid for establish/request sockets */
761 		saddr = &sk->sk_v6_rcv_saddr;
762 		daddr = &sk->sk_v6_daddr;
763 	} else {
764 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
765 		saddr = &ip6h->saddr;
766 		daddr = &ip6h->daddr;
767 	}
768 
769 	if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
770 		goto clear_hash_nostart;
771 
772 	if (crypto_ahash_init(hp.req))
773 		goto clear_hash;
774 
775 	if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len))
776 		goto clear_hash;
777 	if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2))
778 		goto clear_hash;
779 	if (tcp_md5_hash_key(&hp, key))
780 		goto clear_hash;
781 	ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
782 	if (crypto_ahash_final(hp.req))
783 		goto clear_hash;
784 
785 	tcp_sigpool_end(&hp);
786 	return 0;
787 
788 clear_hash:
789 	tcp_sigpool_end(&hp);
790 clear_hash_nostart:
791 	memset(md5_hash, 0, 16);
792 	return 1;
793 }
794 #endif
795 
796 static void tcp_v6_init_req(struct request_sock *req,
797 			    const struct sock *sk_listener,
798 			    struct sk_buff *skb)
799 {
800 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
801 	struct inet_request_sock *ireq = inet_rsk(req);
802 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
803 
804 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
805 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
806 
807 	/* So that link locals have meaning */
808 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
809 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
810 		ireq->ir_iif = tcp_v6_iif(skb);
811 
812 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
813 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
814 	     np->rxopt.bits.rxinfo ||
815 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
816 	     np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) {
817 		refcount_inc(&skb->users);
818 		ireq->pktopts = skb;
819 	}
820 }
821 
822 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
823 					  struct sk_buff *skb,
824 					  struct flowi *fl,
825 					  struct request_sock *req)
826 {
827 	tcp_v6_init_req(req, sk, skb);
828 
829 	if (security_inet_conn_request(sk, skb, req))
830 		return NULL;
831 
832 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
833 }
834 
835 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
836 	.family		=	AF_INET6,
837 	.obj_size	=	sizeof(struct tcp6_request_sock),
838 	.rtx_syn_ack	=	tcp_rtx_synack,
839 	.send_ack	=	tcp_v6_reqsk_send_ack,
840 	.destructor	=	tcp_v6_reqsk_destructor,
841 	.send_reset	=	tcp_v6_send_reset,
842 	.syn_ack_timeout =	tcp_syn_ack_timeout,
843 };
844 
845 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
846 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
847 				sizeof(struct ipv6hdr),
848 #ifdef CONFIG_TCP_MD5SIG
849 	.req_md5_lookup	=	tcp_v6_md5_lookup,
850 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
851 #endif
852 #ifdef CONFIG_TCP_AO
853 	.ao_lookup	=	tcp_v6_ao_lookup_rsk,
854 	.ao_calc_key	=	tcp_v6_ao_calc_key_rsk,
855 	.ao_synack_hash =	tcp_v6_ao_synack_hash,
856 #endif
857 #ifdef CONFIG_SYN_COOKIES
858 	.cookie_init_seq =	cookie_v6_init_sequence,
859 #endif
860 	.route_req	=	tcp_v6_route_req,
861 	.init_seq	=	tcp_v6_init_seq,
862 	.init_ts_off	=	tcp_v6_init_ts_off,
863 	.send_synack	=	tcp_v6_send_synack,
864 };
865 
866 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
867 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
868 				 int oif, int rst, u8 tclass, __be32 label,
869 				 u32 priority, u32 txhash, struct tcp_key *key)
870 {
871 	const struct tcphdr *th = tcp_hdr(skb);
872 	struct tcphdr *t1;
873 	struct sk_buff *buff;
874 	struct flowi6 fl6;
875 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
876 	struct sock *ctl_sk = net->ipv6.tcp_sk;
877 	unsigned int tot_len = sizeof(struct tcphdr);
878 	__be32 mrst = 0, *topt;
879 	struct dst_entry *dst;
880 	__u32 mark = 0;
881 
882 	if (tsecr)
883 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
884 	if (tcp_key_is_md5(key))
885 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
886 	if (tcp_key_is_ao(key))
887 		tot_len += tcp_ao_len(key->ao_key);
888 
889 #ifdef CONFIG_MPTCP
890 	if (rst && !tcp_key_is_md5(key)) {
891 		mrst = mptcp_reset_option(skb);
892 
893 		if (mrst)
894 			tot_len += sizeof(__be32);
895 	}
896 #endif
897 
898 	buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
899 	if (!buff)
900 		return;
901 
902 	skb_reserve(buff, MAX_TCP_HEADER);
903 
904 	t1 = skb_push(buff, tot_len);
905 	skb_reset_transport_header(buff);
906 
907 	/* Swap the send and the receive. */
908 	memset(t1, 0, sizeof(*t1));
909 	t1->dest = th->source;
910 	t1->source = th->dest;
911 	t1->doff = tot_len / 4;
912 	t1->seq = htonl(seq);
913 	t1->ack_seq = htonl(ack);
914 	t1->ack = !rst || !th->ack;
915 	t1->rst = rst;
916 	t1->window = htons(win);
917 
918 	topt = (__be32 *)(t1 + 1);
919 
920 	if (tsecr) {
921 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
922 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
923 		*topt++ = htonl(tsval);
924 		*topt++ = htonl(tsecr);
925 	}
926 
927 	if (mrst)
928 		*topt++ = mrst;
929 
930 #ifdef CONFIG_TCP_MD5SIG
931 	if (tcp_key_is_md5(key)) {
932 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
933 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
934 		tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key,
935 				    &ipv6_hdr(skb)->saddr,
936 				    &ipv6_hdr(skb)->daddr, t1);
937 	}
938 #endif
939 #ifdef CONFIG_TCP_AO
940 	if (tcp_key_is_ao(key)) {
941 		*topt++ = htonl((TCPOPT_AO << 24) |
942 				(tcp_ao_len(key->ao_key) << 16) |
943 				(key->ao_key->sndid << 8) |
944 				(key->rcv_next));
945 
946 		tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key,
947 				key->traffic_key,
948 				(union tcp_ao_addr *)&ipv6_hdr(skb)->saddr,
949 				(union tcp_ao_addr *)&ipv6_hdr(skb)->daddr,
950 				t1, key->sne);
951 	}
952 #endif
953 
954 	memset(&fl6, 0, sizeof(fl6));
955 	fl6.daddr = ipv6_hdr(skb)->saddr;
956 	fl6.saddr = ipv6_hdr(skb)->daddr;
957 	fl6.flowlabel = label;
958 
959 	buff->ip_summed = CHECKSUM_PARTIAL;
960 
961 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
962 
963 	fl6.flowi6_proto = IPPROTO_TCP;
964 	if (rt6_need_strict(&fl6.daddr) && !oif)
965 		fl6.flowi6_oif = tcp_v6_iif(skb);
966 	else {
967 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
968 			oif = skb->skb_iif;
969 
970 		fl6.flowi6_oif = oif;
971 	}
972 
973 	if (sk) {
974 		if (sk->sk_state == TCP_TIME_WAIT)
975 			mark = inet_twsk(sk)->tw_mark;
976 		else
977 			mark = READ_ONCE(sk->sk_mark);
978 		skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
979 	}
980 	if (txhash) {
981 		/* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
982 		skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
983 	}
984 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
985 	fl6.fl6_dport = t1->dest;
986 	fl6.fl6_sport = t1->source;
987 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
988 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
989 
990 	/* Pass a socket to ip6_dst_lookup either it is for RST
991 	 * Underlying function will use this to retrieve the network
992 	 * namespace
993 	 */
994 	if (sk && sk->sk_state != TCP_TIME_WAIT)
995 		dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
996 	else
997 		dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
998 	if (!IS_ERR(dst)) {
999 		skb_dst_set(buff, dst);
1000 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1001 			 tclass & ~INET_ECN_MASK, priority);
1002 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1003 		if (rst)
1004 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1005 		return;
1006 	}
1007 
1008 	kfree_skb(buff);
1009 }
1010 
1011 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1012 {
1013 	const struct tcphdr *th = tcp_hdr(skb);
1014 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1015 	const __u8 *md5_hash_location = NULL;
1016 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1017 	bool allocated_traffic_key = false;
1018 #endif
1019 	const struct tcp_ao_hdr *aoh;
1020 	struct tcp_key key = {};
1021 	u32 seq = 0, ack_seq = 0;
1022 	__be32 label = 0;
1023 	u32 priority = 0;
1024 	struct net *net;
1025 	u32 txhash = 0;
1026 	int oif = 0;
1027 #ifdef CONFIG_TCP_MD5SIG
1028 	unsigned char newhash[16];
1029 	int genhash;
1030 	struct sock *sk1 = NULL;
1031 #endif
1032 
1033 	if (th->rst)
1034 		return;
1035 
1036 	/* If sk not NULL, it means we did a successful lookup and incoming
1037 	 * route had to be correct. prequeue might have dropped our dst.
1038 	 */
1039 	if (!sk && !ipv6_unicast_destination(skb))
1040 		return;
1041 
1042 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1043 	/* Invalid TCP option size or twice included auth */
1044 	if (tcp_parse_auth_options(th, &md5_hash_location, &aoh))
1045 		return;
1046 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1047 	rcu_read_lock();
1048 #endif
1049 #ifdef CONFIG_TCP_MD5SIG
1050 	if (sk && sk_fullsock(sk)) {
1051 		int l3index;
1052 
1053 		/* sdif set, means packet ingressed via a device
1054 		 * in an L3 domain and inet_iif is set to it.
1055 		 */
1056 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1057 		key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1058 		if (key.md5_key)
1059 			key.type = TCP_KEY_MD5;
1060 	} else if (md5_hash_location) {
1061 		int dif = tcp_v6_iif_l3_slave(skb);
1062 		int sdif = tcp_v6_sdif(skb);
1063 		int l3index;
1064 
1065 		/*
1066 		 * active side is lost. Try to find listening socket through
1067 		 * source port, and then find md5 key through listening socket.
1068 		 * we are not loose security here:
1069 		 * Incoming packet is checked with md5 hash with finding key,
1070 		 * no RST generated if md5 hash doesn't match.
1071 		 */
1072 		sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1073 					    NULL, 0, &ipv6h->saddr, th->source,
1074 					    &ipv6h->daddr, ntohs(th->source),
1075 					    dif, sdif);
1076 		if (!sk1)
1077 			goto out;
1078 
1079 		/* sdif set, means packet ingressed via a device
1080 		 * in an L3 domain and dif is set to it.
1081 		 */
1082 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1083 
1084 		key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1085 		if (!key.md5_key)
1086 			goto out;
1087 		key.type = TCP_KEY_MD5;
1088 
1089 		genhash = tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb);
1090 		if (genhash || memcmp(md5_hash_location, newhash, 16) != 0)
1091 			goto out;
1092 	}
1093 #endif
1094 
1095 	if (th->ack)
1096 		seq = ntohl(th->ack_seq);
1097 	else
1098 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1099 			  (th->doff << 2);
1100 
1101 #ifdef CONFIG_TCP_AO
1102 	if (aoh) {
1103 		int l3index;
1104 
1105 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1106 		if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq,
1107 					 &key.ao_key, &key.traffic_key,
1108 					 &allocated_traffic_key,
1109 					 &key.rcv_next, &key.sne))
1110 			goto out;
1111 		key.type = TCP_KEY_AO;
1112 	}
1113 #endif
1114 
1115 	if (sk) {
1116 		oif = sk->sk_bound_dev_if;
1117 		if (sk_fullsock(sk)) {
1118 			trace_tcp_send_reset(sk, skb);
1119 			if (inet6_test_bit(REPFLOW, sk))
1120 				label = ip6_flowlabel(ipv6h);
1121 			priority = READ_ONCE(sk->sk_priority);
1122 			txhash = sk->sk_txhash;
1123 		}
1124 		if (sk->sk_state == TCP_TIME_WAIT) {
1125 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1126 			priority = inet_twsk(sk)->tw_priority;
1127 			txhash = inet_twsk(sk)->tw_txhash;
1128 		}
1129 	} else {
1130 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1131 			label = ip6_flowlabel(ipv6h);
1132 	}
1133 
1134 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
1135 			     ipv6_get_dsfield(ipv6h), label, priority, txhash,
1136 			     &key);
1137 
1138 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1139 out:
1140 	if (allocated_traffic_key)
1141 		kfree(key.traffic_key);
1142 	rcu_read_unlock();
1143 #endif
1144 }
1145 
1146 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1147 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1148 			    struct tcp_key *key, u8 tclass,
1149 			    __be32 label, u32 priority, u32 txhash)
1150 {
1151 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0,
1152 			     tclass, label, priority, txhash, key);
1153 }
1154 
1155 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1156 {
1157 	struct inet_timewait_sock *tw = inet_twsk(sk);
1158 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1159 	struct tcp_key key = {};
1160 #ifdef CONFIG_TCP_AO
1161 	struct tcp_ao_info *ao_info;
1162 
1163 	if (static_branch_unlikely(&tcp_ao_needed.key)) {
1164 
1165 		/* FIXME: the segment to-be-acked is not verified yet */
1166 		ao_info = rcu_dereference(tcptw->ao_info);
1167 		if (ao_info) {
1168 			const struct tcp_ao_hdr *aoh;
1169 
1170 			/* Invalid TCP option size or twice included auth */
1171 			if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
1172 				goto out;
1173 			if (aoh)
1174 				key.ao_key = tcp_ao_established_key(ao_info,
1175 						aoh->rnext_keyid, -1);
1176 		}
1177 	}
1178 	if (key.ao_key) {
1179 		struct tcp_ao_key *rnext_key;
1180 
1181 		key.traffic_key = snd_other_key(key.ao_key);
1182 		/* rcv_next switches to our rcv_next */
1183 		rnext_key = READ_ONCE(ao_info->rnext_key);
1184 		key.rcv_next = rnext_key->rcvid;
1185 		key.sne = READ_ONCE(ao_info->snd_sne);
1186 		key.type = TCP_KEY_AO;
1187 #else
1188 	if (0) {
1189 #endif
1190 #ifdef CONFIG_TCP_MD5SIG
1191 	} else if (static_branch_unlikely(&tcp_md5_needed.key)) {
1192 		key.md5_key = tcp_twsk_md5_key(tcptw);
1193 		if (key.md5_key)
1194 			key.type = TCP_KEY_MD5;
1195 #endif
1196 	}
1197 
1198 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1199 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1200 			tcp_tw_tsval(tcptw),
1201 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, &key,
1202 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1203 			tw->tw_txhash);
1204 
1205 #ifdef CONFIG_TCP_AO
1206 out:
1207 #endif
1208 	inet_twsk_put(tw);
1209 }
1210 
1211 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1212 				  struct request_sock *req)
1213 {
1214 	struct tcp_key key = {};
1215 
1216 #ifdef CONFIG_TCP_AO
1217 	if (static_branch_unlikely(&tcp_ao_needed.key) &&
1218 	    tcp_rsk_used_ao(req)) {
1219 		const struct in6_addr *addr = &ipv6_hdr(skb)->saddr;
1220 		const struct tcp_ao_hdr *aoh;
1221 		int l3index;
1222 
1223 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1224 		/* Invalid TCP option size or twice included auth */
1225 		if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
1226 			return;
1227 		if (!aoh)
1228 			return;
1229 		key.ao_key = tcp_ao_do_lookup(sk, l3index,
1230 					      (union tcp_ao_addr *)addr,
1231 					      AF_INET6, aoh->rnext_keyid, -1);
1232 		if (unlikely(!key.ao_key)) {
1233 			/* Send ACK with any matching MKT for the peer */
1234 			key.ao_key = tcp_ao_do_lookup(sk, l3index,
1235 						      (union tcp_ao_addr *)addr,
1236 						      AF_INET6, -1, -1);
1237 			/* Matching key disappeared (user removed the key?)
1238 			 * let the handshake timeout.
1239 			 */
1240 			if (!key.ao_key) {
1241 				net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n",
1242 						     addr,
1243 						     ntohs(tcp_hdr(skb)->source),
1244 						     &ipv6_hdr(skb)->daddr,
1245 						     ntohs(tcp_hdr(skb)->dest));
1246 				return;
1247 			}
1248 		}
1249 		key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC);
1250 		if (!key.traffic_key)
1251 			return;
1252 
1253 		key.type = TCP_KEY_AO;
1254 		key.rcv_next = aoh->keyid;
1255 		tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req);
1256 #else
1257 	if (0) {
1258 #endif
1259 #ifdef CONFIG_TCP_MD5SIG
1260 	} else if (static_branch_unlikely(&tcp_md5_needed.key)) {
1261 		int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1262 
1263 		key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr,
1264 						   l3index);
1265 		if (key.md5_key)
1266 			key.type = TCP_KEY_MD5;
1267 #endif
1268 	}
1269 
1270 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1271 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1272 	 */
1273 	/* RFC 7323 2.3
1274 	 * The window field (SEG.WND) of every outgoing segment, with the
1275 	 * exception of <SYN> segments, MUST be right-shifted by
1276 	 * Rcv.Wind.Shift bits:
1277 	 */
1278 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1279 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1280 			tcp_rsk(req)->rcv_nxt,
1281 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1282 			tcp_rsk_tsval(tcp_rsk(req)),
1283 			READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1284 			&key, ipv6_get_dsfield(ipv6_hdr(skb)), 0,
1285 			READ_ONCE(sk->sk_priority),
1286 			READ_ONCE(tcp_rsk(req)->txhash));
1287 	if (tcp_key_is_ao(&key))
1288 		kfree(key.traffic_key);
1289 }
1290 
1291 
1292 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1293 {
1294 #ifdef CONFIG_SYN_COOKIES
1295 	const struct tcphdr *th = tcp_hdr(skb);
1296 
1297 	if (!th->syn)
1298 		sk = cookie_v6_check(sk, skb);
1299 #endif
1300 	return sk;
1301 }
1302 
1303 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1304 			 struct tcphdr *th, u32 *cookie)
1305 {
1306 	u16 mss = 0;
1307 #ifdef CONFIG_SYN_COOKIES
1308 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1309 				    &tcp_request_sock_ipv6_ops, sk, th);
1310 	if (mss) {
1311 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1312 		tcp_synq_overflow(sk);
1313 	}
1314 #endif
1315 	return mss;
1316 }
1317 
1318 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1319 {
1320 	if (skb->protocol == htons(ETH_P_IP))
1321 		return tcp_v4_conn_request(sk, skb);
1322 
1323 	if (!ipv6_unicast_destination(skb))
1324 		goto drop;
1325 
1326 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1327 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1328 		return 0;
1329 	}
1330 
1331 	return tcp_conn_request(&tcp6_request_sock_ops,
1332 				&tcp_request_sock_ipv6_ops, sk, skb);
1333 
1334 drop:
1335 	tcp_listendrop(sk);
1336 	return 0; /* don't send reset */
1337 }
1338 
1339 static void tcp_v6_restore_cb(struct sk_buff *skb)
1340 {
1341 	/* We need to move header back to the beginning if xfrm6_policy_check()
1342 	 * and tcp_v6_fill_cb() are going to be called again.
1343 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1344 	 */
1345 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1346 		sizeof(struct inet6_skb_parm));
1347 }
1348 
1349 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1350 					 struct request_sock *req,
1351 					 struct dst_entry *dst,
1352 					 struct request_sock *req_unhash,
1353 					 bool *own_req)
1354 {
1355 	struct inet_request_sock *ireq;
1356 	struct ipv6_pinfo *newnp;
1357 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1358 	struct ipv6_txoptions *opt;
1359 	struct inet_sock *newinet;
1360 	bool found_dup_sk = false;
1361 	struct tcp_sock *newtp;
1362 	struct sock *newsk;
1363 #ifdef CONFIG_TCP_MD5SIG
1364 	struct tcp_md5sig_key *key;
1365 	int l3index;
1366 #endif
1367 	struct flowi6 fl6;
1368 
1369 	if (skb->protocol == htons(ETH_P_IP)) {
1370 		/*
1371 		 *	v6 mapped
1372 		 */
1373 
1374 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1375 					     req_unhash, own_req);
1376 
1377 		if (!newsk)
1378 			return NULL;
1379 
1380 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1381 
1382 		newnp = tcp_inet6_sk(newsk);
1383 		newtp = tcp_sk(newsk);
1384 
1385 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1386 
1387 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1388 
1389 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1390 		if (sk_is_mptcp(newsk))
1391 			mptcpv6_handle_mapped(newsk, true);
1392 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1393 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1394 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1395 #endif
1396 
1397 		newnp->ipv6_mc_list = NULL;
1398 		newnp->ipv6_ac_list = NULL;
1399 		newnp->ipv6_fl_list = NULL;
1400 		newnp->pktoptions  = NULL;
1401 		newnp->opt	   = NULL;
1402 		newnp->mcast_oif   = inet_iif(skb);
1403 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1404 		newnp->rcv_flowinfo = 0;
1405 		if (inet6_test_bit(REPFLOW, sk))
1406 			newnp->flow_label = 0;
1407 
1408 		/*
1409 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1410 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1411 		 * that function for the gory details. -acme
1412 		 */
1413 
1414 		/* It is tricky place. Until this moment IPv4 tcp
1415 		   worked with IPv6 icsk.icsk_af_ops.
1416 		   Sync it now.
1417 		 */
1418 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1419 
1420 		return newsk;
1421 	}
1422 
1423 	ireq = inet_rsk(req);
1424 
1425 	if (sk_acceptq_is_full(sk))
1426 		goto out_overflow;
1427 
1428 	if (!dst) {
1429 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1430 		if (!dst)
1431 			goto out;
1432 	}
1433 
1434 	newsk = tcp_create_openreq_child(sk, req, skb);
1435 	if (!newsk)
1436 		goto out_nonewsk;
1437 
1438 	/*
1439 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1440 	 * count here, tcp_create_openreq_child now does this for us, see the
1441 	 * comment in that function for the gory details. -acme
1442 	 */
1443 
1444 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1445 	ip6_dst_store(newsk, dst, NULL, NULL);
1446 	inet6_sk_rx_dst_set(newsk, skb);
1447 
1448 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1449 
1450 	newtp = tcp_sk(newsk);
1451 	newinet = inet_sk(newsk);
1452 	newnp = tcp_inet6_sk(newsk);
1453 
1454 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1455 
1456 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1457 	newnp->saddr = ireq->ir_v6_loc_addr;
1458 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1459 	newsk->sk_bound_dev_if = ireq->ir_iif;
1460 
1461 	/* Now IPv6 options...
1462 
1463 	   First: no IPv4 options.
1464 	 */
1465 	newinet->inet_opt = NULL;
1466 	newnp->ipv6_mc_list = NULL;
1467 	newnp->ipv6_ac_list = NULL;
1468 	newnp->ipv6_fl_list = NULL;
1469 
1470 	/* Clone RX bits */
1471 	newnp->rxopt.all = np->rxopt.all;
1472 
1473 	newnp->pktoptions = NULL;
1474 	newnp->opt	  = NULL;
1475 	newnp->mcast_oif  = tcp_v6_iif(skb);
1476 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1477 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1478 	if (inet6_test_bit(REPFLOW, sk))
1479 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1480 
1481 	/* Set ToS of the new socket based upon the value of incoming SYN.
1482 	 * ECT bits are set later in tcp_init_transfer().
1483 	 */
1484 	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1485 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1486 
1487 	/* Clone native IPv6 options from listening socket (if any)
1488 
1489 	   Yes, keeping reference count would be much more clever,
1490 	   but we make one more one thing there: reattach optmem
1491 	   to newsk.
1492 	 */
1493 	opt = ireq->ipv6_opt;
1494 	if (!opt)
1495 		opt = rcu_dereference(np->opt);
1496 	if (opt) {
1497 		opt = ipv6_dup_options(newsk, opt);
1498 		RCU_INIT_POINTER(newnp->opt, opt);
1499 	}
1500 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1501 	if (opt)
1502 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1503 						    opt->opt_flen;
1504 
1505 	tcp_ca_openreq_child(newsk, dst);
1506 
1507 	tcp_sync_mss(newsk, dst_mtu(dst));
1508 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1509 
1510 	tcp_initialize_rcv_mss(newsk);
1511 
1512 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1513 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1514 
1515 #ifdef CONFIG_TCP_MD5SIG
1516 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1517 
1518 	if (!tcp_rsk_used_ao(req)) {
1519 		/* Copy over the MD5 key from the original socket */
1520 		key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1521 		if (key) {
1522 			const union tcp_md5_addr *addr;
1523 
1524 			addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1525 			if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
1526 				inet_csk_prepare_forced_close(newsk);
1527 				tcp_done(newsk);
1528 				goto out;
1529 			}
1530 		}
1531 	}
1532 #endif
1533 #ifdef CONFIG_TCP_AO
1534 	/* Copy over tcp_ao_info if any */
1535 	if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6))
1536 		goto out; /* OOM */
1537 #endif
1538 
1539 	if (__inet_inherit_port(sk, newsk) < 0) {
1540 		inet_csk_prepare_forced_close(newsk);
1541 		tcp_done(newsk);
1542 		goto out;
1543 	}
1544 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1545 				       &found_dup_sk);
1546 	if (*own_req) {
1547 		tcp_move_syn(newtp, req);
1548 
1549 		/* Clone pktoptions received with SYN, if we own the req */
1550 		if (ireq->pktopts) {
1551 			newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1552 			consume_skb(ireq->pktopts);
1553 			ireq->pktopts = NULL;
1554 			if (newnp->pktoptions)
1555 				tcp_v6_restore_cb(newnp->pktoptions);
1556 		}
1557 	} else {
1558 		if (!req_unhash && found_dup_sk) {
1559 			/* This code path should only be executed in the
1560 			 * syncookie case only
1561 			 */
1562 			bh_unlock_sock(newsk);
1563 			sock_put(newsk);
1564 			newsk = NULL;
1565 		}
1566 	}
1567 
1568 	return newsk;
1569 
1570 out_overflow:
1571 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1572 out_nonewsk:
1573 	dst_release(dst);
1574 out:
1575 	tcp_listendrop(sk);
1576 	return NULL;
1577 }
1578 
1579 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1580 							   u32));
1581 /* The socket must have it's spinlock held when we get
1582  * here, unless it is a TCP_LISTEN socket.
1583  *
1584  * We have a potential double-lock case here, so even when
1585  * doing backlog processing we use the BH locking scheme.
1586  * This is because we cannot sleep with the original spinlock
1587  * held.
1588  */
1589 INDIRECT_CALLABLE_SCOPE
1590 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1591 {
1592 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1593 	struct sk_buff *opt_skb = NULL;
1594 	enum skb_drop_reason reason;
1595 	struct tcp_sock *tp;
1596 
1597 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1598 	   goes to IPv4 receive handler and backlogged.
1599 	   From backlog it always goes here. Kerboom...
1600 	   Fortunately, tcp_rcv_established and rcv_established
1601 	   handle them correctly, but it is not case with
1602 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1603 	 */
1604 
1605 	if (skb->protocol == htons(ETH_P_IP))
1606 		return tcp_v4_do_rcv(sk, skb);
1607 
1608 	/*
1609 	 *	socket locking is here for SMP purposes as backlog rcv
1610 	 *	is currently called with bh processing disabled.
1611 	 */
1612 
1613 	/* Do Stevens' IPV6_PKTOPTIONS.
1614 
1615 	   Yes, guys, it is the only place in our code, where we
1616 	   may make it not affecting IPv4.
1617 	   The rest of code is protocol independent,
1618 	   and I do not like idea to uglify IPv4.
1619 
1620 	   Actually, all the idea behind IPV6_PKTOPTIONS
1621 	   looks not very well thought. For now we latch
1622 	   options, received in the last packet, enqueued
1623 	   by tcp. Feel free to propose better solution.
1624 					       --ANK (980728)
1625 	 */
1626 	if (np->rxopt.all)
1627 		opt_skb = skb_clone_and_charge_r(skb, sk);
1628 
1629 	reason = SKB_DROP_REASON_NOT_SPECIFIED;
1630 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1631 		struct dst_entry *dst;
1632 
1633 		dst = rcu_dereference_protected(sk->sk_rx_dst,
1634 						lockdep_sock_is_held(sk));
1635 
1636 		sock_rps_save_rxhash(sk, skb);
1637 		sk_mark_napi_id(sk, skb);
1638 		if (dst) {
1639 			if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1640 			    INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1641 					    dst, sk->sk_rx_dst_cookie) == NULL) {
1642 				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1643 				dst_release(dst);
1644 			}
1645 		}
1646 
1647 		tcp_rcv_established(sk, skb);
1648 		if (opt_skb)
1649 			goto ipv6_pktoptions;
1650 		return 0;
1651 	}
1652 
1653 	if (tcp_checksum_complete(skb))
1654 		goto csum_err;
1655 
1656 	if (sk->sk_state == TCP_LISTEN) {
1657 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1658 
1659 		if (!nsk)
1660 			goto discard;
1661 
1662 		if (nsk != sk) {
1663 			if (tcp_child_process(sk, nsk, skb))
1664 				goto reset;
1665 			if (opt_skb)
1666 				__kfree_skb(opt_skb);
1667 			return 0;
1668 		}
1669 	} else
1670 		sock_rps_save_rxhash(sk, skb);
1671 
1672 	if (tcp_rcv_state_process(sk, skb))
1673 		goto reset;
1674 	if (opt_skb)
1675 		goto ipv6_pktoptions;
1676 	return 0;
1677 
1678 reset:
1679 	tcp_v6_send_reset(sk, skb);
1680 discard:
1681 	if (opt_skb)
1682 		__kfree_skb(opt_skb);
1683 	kfree_skb_reason(skb, reason);
1684 	return 0;
1685 csum_err:
1686 	reason = SKB_DROP_REASON_TCP_CSUM;
1687 	trace_tcp_bad_csum(skb);
1688 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1689 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1690 	goto discard;
1691 
1692 
1693 ipv6_pktoptions:
1694 	/* Do you ask, what is it?
1695 
1696 	   1. skb was enqueued by tcp.
1697 	   2. skb is added to tail of read queue, rather than out of order.
1698 	   3. socket is not in passive state.
1699 	   4. Finally, it really contains options, which user wants to receive.
1700 	 */
1701 	tp = tcp_sk(sk);
1702 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1703 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1704 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1705 			np->mcast_oif = tcp_v6_iif(opt_skb);
1706 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1707 			WRITE_ONCE(np->mcast_hops,
1708 				   ipv6_hdr(opt_skb)->hop_limit);
1709 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1710 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1711 		if (inet6_test_bit(REPFLOW, sk))
1712 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1713 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1714 			tcp_v6_restore_cb(opt_skb);
1715 			opt_skb = xchg(&np->pktoptions, opt_skb);
1716 		} else {
1717 			__kfree_skb(opt_skb);
1718 			opt_skb = xchg(&np->pktoptions, NULL);
1719 		}
1720 	}
1721 
1722 	consume_skb(opt_skb);
1723 	return 0;
1724 }
1725 
1726 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1727 			   const struct tcphdr *th)
1728 {
1729 	/* This is tricky: we move IP6CB at its correct location into
1730 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1731 	 * _decode_session6() uses IP6CB().
1732 	 * barrier() makes sure compiler won't play aliasing games.
1733 	 */
1734 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1735 		sizeof(struct inet6_skb_parm));
1736 	barrier();
1737 
1738 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1739 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1740 				    skb->len - th->doff*4);
1741 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1742 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1743 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1744 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1745 	TCP_SKB_CB(skb)->sacked = 0;
1746 	TCP_SKB_CB(skb)->has_rxtstamp =
1747 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1748 }
1749 
1750 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1751 {
1752 	enum skb_drop_reason drop_reason;
1753 	int sdif = inet6_sdif(skb);
1754 	int dif = inet6_iif(skb);
1755 	const struct tcphdr *th;
1756 	const struct ipv6hdr *hdr;
1757 	bool refcounted;
1758 	struct sock *sk;
1759 	int ret;
1760 	struct net *net = dev_net(skb->dev);
1761 
1762 	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1763 	if (skb->pkt_type != PACKET_HOST)
1764 		goto discard_it;
1765 
1766 	/*
1767 	 *	Count it even if it's bad.
1768 	 */
1769 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1770 
1771 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1772 		goto discard_it;
1773 
1774 	th = (const struct tcphdr *)skb->data;
1775 
1776 	if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1777 		drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1778 		goto bad_packet;
1779 	}
1780 	if (!pskb_may_pull(skb, th->doff*4))
1781 		goto discard_it;
1782 
1783 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1784 		goto csum_error;
1785 
1786 	th = (const struct tcphdr *)skb->data;
1787 	hdr = ipv6_hdr(skb);
1788 
1789 lookup:
1790 	sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1791 				th->source, th->dest, inet6_iif(skb), sdif,
1792 				&refcounted);
1793 	if (!sk)
1794 		goto no_tcp_socket;
1795 
1796 process:
1797 	if (sk->sk_state == TCP_TIME_WAIT)
1798 		goto do_time_wait;
1799 
1800 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1801 		struct request_sock *req = inet_reqsk(sk);
1802 		bool req_stolen = false;
1803 		struct sock *nsk;
1804 
1805 		sk = req->rsk_listener;
1806 		if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1807 			drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1808 		else
1809 			drop_reason = tcp_inbound_hash(sk, req, skb,
1810 						       &hdr->saddr, &hdr->daddr,
1811 						       AF_INET6, dif, sdif);
1812 		if (drop_reason) {
1813 			sk_drops_add(sk, skb);
1814 			reqsk_put(req);
1815 			goto discard_it;
1816 		}
1817 		if (tcp_checksum_complete(skb)) {
1818 			reqsk_put(req);
1819 			goto csum_error;
1820 		}
1821 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1822 			nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1823 			if (!nsk) {
1824 				inet_csk_reqsk_queue_drop_and_put(sk, req);
1825 				goto lookup;
1826 			}
1827 			sk = nsk;
1828 			/* reuseport_migrate_sock() has already held one sk_refcnt
1829 			 * before returning.
1830 			 */
1831 		} else {
1832 			sock_hold(sk);
1833 		}
1834 		refcounted = true;
1835 		nsk = NULL;
1836 		if (!tcp_filter(sk, skb)) {
1837 			th = (const struct tcphdr *)skb->data;
1838 			hdr = ipv6_hdr(skb);
1839 			tcp_v6_fill_cb(skb, hdr, th);
1840 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1841 		} else {
1842 			drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1843 		}
1844 		if (!nsk) {
1845 			reqsk_put(req);
1846 			if (req_stolen) {
1847 				/* Another cpu got exclusive access to req
1848 				 * and created a full blown socket.
1849 				 * Try to feed this packet to this socket
1850 				 * instead of discarding it.
1851 				 */
1852 				tcp_v6_restore_cb(skb);
1853 				sock_put(sk);
1854 				goto lookup;
1855 			}
1856 			goto discard_and_relse;
1857 		}
1858 		nf_reset_ct(skb);
1859 		if (nsk == sk) {
1860 			reqsk_put(req);
1861 			tcp_v6_restore_cb(skb);
1862 		} else if (tcp_child_process(sk, nsk, skb)) {
1863 			tcp_v6_send_reset(nsk, skb);
1864 			goto discard_and_relse;
1865 		} else {
1866 			sock_put(sk);
1867 			return 0;
1868 		}
1869 	}
1870 
1871 	if (static_branch_unlikely(&ip6_min_hopcount)) {
1872 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1873 		if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1874 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1875 			drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1876 			goto discard_and_relse;
1877 		}
1878 	}
1879 
1880 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1881 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1882 		goto discard_and_relse;
1883 	}
1884 
1885 	drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr,
1886 				       AF_INET6, dif, sdif);
1887 	if (drop_reason)
1888 		goto discard_and_relse;
1889 
1890 	nf_reset_ct(skb);
1891 
1892 	if (tcp_filter(sk, skb)) {
1893 		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1894 		goto discard_and_relse;
1895 	}
1896 	th = (const struct tcphdr *)skb->data;
1897 	hdr = ipv6_hdr(skb);
1898 	tcp_v6_fill_cb(skb, hdr, th);
1899 
1900 	skb->dev = NULL;
1901 
1902 	if (sk->sk_state == TCP_LISTEN) {
1903 		ret = tcp_v6_do_rcv(sk, skb);
1904 		goto put_and_return;
1905 	}
1906 
1907 	sk_incoming_cpu_update(sk);
1908 
1909 	bh_lock_sock_nested(sk);
1910 	tcp_segs_in(tcp_sk(sk), skb);
1911 	ret = 0;
1912 	if (!sock_owned_by_user(sk)) {
1913 		ret = tcp_v6_do_rcv(sk, skb);
1914 	} else {
1915 		if (tcp_add_backlog(sk, skb, &drop_reason))
1916 			goto discard_and_relse;
1917 	}
1918 	bh_unlock_sock(sk);
1919 put_and_return:
1920 	if (refcounted)
1921 		sock_put(sk);
1922 	return ret ? -1 : 0;
1923 
1924 no_tcp_socket:
1925 	drop_reason = SKB_DROP_REASON_NO_SOCKET;
1926 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1927 		goto discard_it;
1928 
1929 	tcp_v6_fill_cb(skb, hdr, th);
1930 
1931 	if (tcp_checksum_complete(skb)) {
1932 csum_error:
1933 		drop_reason = SKB_DROP_REASON_TCP_CSUM;
1934 		trace_tcp_bad_csum(skb);
1935 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1936 bad_packet:
1937 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1938 	} else {
1939 		tcp_v6_send_reset(NULL, skb);
1940 	}
1941 
1942 discard_it:
1943 	SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1944 	kfree_skb_reason(skb, drop_reason);
1945 	return 0;
1946 
1947 discard_and_relse:
1948 	sk_drops_add(sk, skb);
1949 	if (refcounted)
1950 		sock_put(sk);
1951 	goto discard_it;
1952 
1953 do_time_wait:
1954 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1955 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1956 		inet_twsk_put(inet_twsk(sk));
1957 		goto discard_it;
1958 	}
1959 
1960 	tcp_v6_fill_cb(skb, hdr, th);
1961 
1962 	if (tcp_checksum_complete(skb)) {
1963 		inet_twsk_put(inet_twsk(sk));
1964 		goto csum_error;
1965 	}
1966 
1967 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1968 	case TCP_TW_SYN:
1969 	{
1970 		struct sock *sk2;
1971 
1972 		sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1973 					    skb, __tcp_hdrlen(th),
1974 					    &ipv6_hdr(skb)->saddr, th->source,
1975 					    &ipv6_hdr(skb)->daddr,
1976 					    ntohs(th->dest),
1977 					    tcp_v6_iif_l3_slave(skb),
1978 					    sdif);
1979 		if (sk2) {
1980 			struct inet_timewait_sock *tw = inet_twsk(sk);
1981 			inet_twsk_deschedule_put(tw);
1982 			sk = sk2;
1983 			tcp_v6_restore_cb(skb);
1984 			refcounted = false;
1985 			goto process;
1986 		}
1987 	}
1988 		/* to ACK */
1989 		fallthrough;
1990 	case TCP_TW_ACK:
1991 		tcp_v6_timewait_ack(sk, skb);
1992 		break;
1993 	case TCP_TW_RST:
1994 		tcp_v6_send_reset(sk, skb);
1995 		inet_twsk_deschedule_put(inet_twsk(sk));
1996 		goto discard_it;
1997 	case TCP_TW_SUCCESS:
1998 		;
1999 	}
2000 	goto discard_it;
2001 }
2002 
2003 void tcp_v6_early_demux(struct sk_buff *skb)
2004 {
2005 	struct net *net = dev_net(skb->dev);
2006 	const struct ipv6hdr *hdr;
2007 	const struct tcphdr *th;
2008 	struct sock *sk;
2009 
2010 	if (skb->pkt_type != PACKET_HOST)
2011 		return;
2012 
2013 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
2014 		return;
2015 
2016 	hdr = ipv6_hdr(skb);
2017 	th = tcp_hdr(skb);
2018 
2019 	if (th->doff < sizeof(struct tcphdr) / 4)
2020 		return;
2021 
2022 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
2023 	sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
2024 					&hdr->saddr, th->source,
2025 					&hdr->daddr, ntohs(th->dest),
2026 					inet6_iif(skb), inet6_sdif(skb));
2027 	if (sk) {
2028 		skb->sk = sk;
2029 		skb->destructor = sock_edemux;
2030 		if (sk_fullsock(sk)) {
2031 			struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
2032 
2033 			if (dst)
2034 				dst = dst_check(dst, sk->sk_rx_dst_cookie);
2035 			if (dst &&
2036 			    sk->sk_rx_dst_ifindex == skb->skb_iif)
2037 				skb_dst_set_noref(skb, dst);
2038 		}
2039 	}
2040 }
2041 
2042 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
2043 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
2044 	.twsk_unique	= tcp_twsk_unique,
2045 	.twsk_destructor = tcp_twsk_destructor,
2046 };
2047 
2048 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
2049 {
2050 	__tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
2051 }
2052 
2053 const struct inet_connection_sock_af_ops ipv6_specific = {
2054 	.queue_xmit	   = inet6_csk_xmit,
2055 	.send_check	   = tcp_v6_send_check,
2056 	.rebuild_header	   = inet6_sk_rebuild_header,
2057 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
2058 	.conn_request	   = tcp_v6_conn_request,
2059 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
2060 	.net_header_len	   = sizeof(struct ipv6hdr),
2061 	.setsockopt	   = ipv6_setsockopt,
2062 	.getsockopt	   = ipv6_getsockopt,
2063 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
2064 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
2065 	.mtu_reduced	   = tcp_v6_mtu_reduced,
2066 };
2067 
2068 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2069 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
2070 #ifdef CONFIG_TCP_MD5SIG
2071 	.md5_lookup	=	tcp_v6_md5_lookup,
2072 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
2073 	.md5_parse	=	tcp_v6_parse_md5_keys,
2074 #endif
2075 #ifdef CONFIG_TCP_AO
2076 	.ao_lookup	=	tcp_v6_ao_lookup,
2077 	.calc_ao_hash	=	tcp_v6_ao_hash_skb,
2078 	.ao_parse	=	tcp_v6_parse_ao,
2079 	.ao_calc_key_sk	=	tcp_v6_ao_calc_key_sk,
2080 #endif
2081 };
2082 #endif
2083 
2084 /*
2085  *	TCP over IPv4 via INET6 API
2086  */
2087 static const struct inet_connection_sock_af_ops ipv6_mapped = {
2088 	.queue_xmit	   = ip_queue_xmit,
2089 	.send_check	   = tcp_v4_send_check,
2090 	.rebuild_header	   = inet_sk_rebuild_header,
2091 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
2092 	.conn_request	   = tcp_v6_conn_request,
2093 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
2094 	.net_header_len	   = sizeof(struct iphdr),
2095 	.setsockopt	   = ipv6_setsockopt,
2096 	.getsockopt	   = ipv6_getsockopt,
2097 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
2098 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
2099 	.mtu_reduced	   = tcp_v4_mtu_reduced,
2100 };
2101 
2102 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2103 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
2104 #ifdef CONFIG_TCP_MD5SIG
2105 	.md5_lookup	=	tcp_v4_md5_lookup,
2106 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
2107 	.md5_parse	=	tcp_v6_parse_md5_keys,
2108 #endif
2109 #ifdef CONFIG_TCP_AO
2110 	.ao_lookup	=	tcp_v6_ao_lookup,
2111 	.calc_ao_hash	=	tcp_v4_ao_hash_skb,
2112 	.ao_parse	=	tcp_v6_parse_ao,
2113 	.ao_calc_key_sk	=	tcp_v4_ao_calc_key_sk,
2114 #endif
2115 };
2116 #endif
2117 
2118 /* NOTE: A lot of things set to zero explicitly by call to
2119  *       sk_alloc() so need not be done here.
2120  */
2121 static int tcp_v6_init_sock(struct sock *sk)
2122 {
2123 	struct inet_connection_sock *icsk = inet_csk(sk);
2124 
2125 	tcp_init_sock(sk);
2126 
2127 	icsk->icsk_af_ops = &ipv6_specific;
2128 
2129 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2130 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
2131 #endif
2132 
2133 	return 0;
2134 }
2135 
2136 #ifdef CONFIG_PROC_FS
2137 /* Proc filesystem TCPv6 sock list dumping. */
2138 static void get_openreq6(struct seq_file *seq,
2139 			 const struct request_sock *req, int i)
2140 {
2141 	long ttd = req->rsk_timer.expires - jiffies;
2142 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
2143 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
2144 
2145 	if (ttd < 0)
2146 		ttd = 0;
2147 
2148 	seq_printf(seq,
2149 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2150 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
2151 		   i,
2152 		   src->s6_addr32[0], src->s6_addr32[1],
2153 		   src->s6_addr32[2], src->s6_addr32[3],
2154 		   inet_rsk(req)->ir_num,
2155 		   dest->s6_addr32[0], dest->s6_addr32[1],
2156 		   dest->s6_addr32[2], dest->s6_addr32[3],
2157 		   ntohs(inet_rsk(req)->ir_rmt_port),
2158 		   TCP_SYN_RECV,
2159 		   0, 0, /* could print option size, but that is af dependent. */
2160 		   1,   /* timers active (only the expire timer) */
2161 		   jiffies_to_clock_t(ttd),
2162 		   req->num_timeout,
2163 		   from_kuid_munged(seq_user_ns(seq),
2164 				    sock_i_uid(req->rsk_listener)),
2165 		   0,  /* non standard timer */
2166 		   0, /* open_requests have no inode */
2167 		   0, req);
2168 }
2169 
2170 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2171 {
2172 	const struct in6_addr *dest, *src;
2173 	__u16 destp, srcp;
2174 	int timer_active;
2175 	unsigned long timer_expires;
2176 	const struct inet_sock *inet = inet_sk(sp);
2177 	const struct tcp_sock *tp = tcp_sk(sp);
2178 	const struct inet_connection_sock *icsk = inet_csk(sp);
2179 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2180 	int rx_queue;
2181 	int state;
2182 
2183 	dest  = &sp->sk_v6_daddr;
2184 	src   = &sp->sk_v6_rcv_saddr;
2185 	destp = ntohs(inet->inet_dport);
2186 	srcp  = ntohs(inet->inet_sport);
2187 
2188 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2189 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2190 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2191 		timer_active	= 1;
2192 		timer_expires	= icsk->icsk_timeout;
2193 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2194 		timer_active	= 4;
2195 		timer_expires	= icsk->icsk_timeout;
2196 	} else if (timer_pending(&sp->sk_timer)) {
2197 		timer_active	= 2;
2198 		timer_expires	= sp->sk_timer.expires;
2199 	} else {
2200 		timer_active	= 0;
2201 		timer_expires = jiffies;
2202 	}
2203 
2204 	state = inet_sk_state_load(sp);
2205 	if (state == TCP_LISTEN)
2206 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2207 	else
2208 		/* Because we don't lock the socket,
2209 		 * we might find a transient negative value.
2210 		 */
2211 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2212 				      READ_ONCE(tp->copied_seq), 0);
2213 
2214 	seq_printf(seq,
2215 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2216 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2217 		   i,
2218 		   src->s6_addr32[0], src->s6_addr32[1],
2219 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2220 		   dest->s6_addr32[0], dest->s6_addr32[1],
2221 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2222 		   state,
2223 		   READ_ONCE(tp->write_seq) - tp->snd_una,
2224 		   rx_queue,
2225 		   timer_active,
2226 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2227 		   icsk->icsk_retransmits,
2228 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2229 		   icsk->icsk_probes_out,
2230 		   sock_i_ino(sp),
2231 		   refcount_read(&sp->sk_refcnt), sp,
2232 		   jiffies_to_clock_t(icsk->icsk_rto),
2233 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2234 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2235 		   tcp_snd_cwnd(tp),
2236 		   state == TCP_LISTEN ?
2237 			fastopenq->max_qlen :
2238 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2239 		   );
2240 }
2241 
2242 static void get_timewait6_sock(struct seq_file *seq,
2243 			       struct inet_timewait_sock *tw, int i)
2244 {
2245 	long delta = tw->tw_timer.expires - jiffies;
2246 	const struct in6_addr *dest, *src;
2247 	__u16 destp, srcp;
2248 
2249 	dest = &tw->tw_v6_daddr;
2250 	src  = &tw->tw_v6_rcv_saddr;
2251 	destp = ntohs(tw->tw_dport);
2252 	srcp  = ntohs(tw->tw_sport);
2253 
2254 	seq_printf(seq,
2255 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2256 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2257 		   i,
2258 		   src->s6_addr32[0], src->s6_addr32[1],
2259 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2260 		   dest->s6_addr32[0], dest->s6_addr32[1],
2261 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2262 		   tw->tw_substate, 0, 0,
2263 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2264 		   refcount_read(&tw->tw_refcnt), tw);
2265 }
2266 
2267 static int tcp6_seq_show(struct seq_file *seq, void *v)
2268 {
2269 	struct tcp_iter_state *st;
2270 	struct sock *sk = v;
2271 
2272 	if (v == SEQ_START_TOKEN) {
2273 		seq_puts(seq,
2274 			 "  sl  "
2275 			 "local_address                         "
2276 			 "remote_address                        "
2277 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2278 			 "   uid  timeout inode\n");
2279 		goto out;
2280 	}
2281 	st = seq->private;
2282 
2283 	if (sk->sk_state == TCP_TIME_WAIT)
2284 		get_timewait6_sock(seq, v, st->num);
2285 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2286 		get_openreq6(seq, v, st->num);
2287 	else
2288 		get_tcp6_sock(seq, v, st->num);
2289 out:
2290 	return 0;
2291 }
2292 
2293 static const struct seq_operations tcp6_seq_ops = {
2294 	.show		= tcp6_seq_show,
2295 	.start		= tcp_seq_start,
2296 	.next		= tcp_seq_next,
2297 	.stop		= tcp_seq_stop,
2298 };
2299 
2300 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2301 	.family		= AF_INET6,
2302 };
2303 
2304 int __net_init tcp6_proc_init(struct net *net)
2305 {
2306 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2307 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2308 		return -ENOMEM;
2309 	return 0;
2310 }
2311 
2312 void tcp6_proc_exit(struct net *net)
2313 {
2314 	remove_proc_entry("tcp6", net->proc_net);
2315 }
2316 #endif
2317 
2318 struct proto tcpv6_prot = {
2319 	.name			= "TCPv6",
2320 	.owner			= THIS_MODULE,
2321 	.close			= tcp_close,
2322 	.pre_connect		= tcp_v6_pre_connect,
2323 	.connect		= tcp_v6_connect,
2324 	.disconnect		= tcp_disconnect,
2325 	.accept			= inet_csk_accept,
2326 	.ioctl			= tcp_ioctl,
2327 	.init			= tcp_v6_init_sock,
2328 	.destroy		= tcp_v4_destroy_sock,
2329 	.shutdown		= tcp_shutdown,
2330 	.setsockopt		= tcp_setsockopt,
2331 	.getsockopt		= tcp_getsockopt,
2332 	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
2333 	.keepalive		= tcp_set_keepalive,
2334 	.recvmsg		= tcp_recvmsg,
2335 	.sendmsg		= tcp_sendmsg,
2336 	.splice_eof		= tcp_splice_eof,
2337 	.backlog_rcv		= tcp_v6_do_rcv,
2338 	.release_cb		= tcp_release_cb,
2339 	.hash			= inet6_hash,
2340 	.unhash			= inet_unhash,
2341 	.get_port		= inet_csk_get_port,
2342 	.put_port		= inet_put_port,
2343 #ifdef CONFIG_BPF_SYSCALL
2344 	.psock_update_sk_prot	= tcp_bpf_update_proto,
2345 #endif
2346 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2347 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2348 	.stream_memory_free	= tcp_stream_memory_free,
2349 	.sockets_allocated	= &tcp_sockets_allocated,
2350 
2351 	.memory_allocated	= &tcp_memory_allocated,
2352 	.per_cpu_fw_alloc	= &tcp_memory_per_cpu_fw_alloc,
2353 
2354 	.memory_pressure	= &tcp_memory_pressure,
2355 	.orphan_count		= &tcp_orphan_count,
2356 	.sysctl_mem		= sysctl_tcp_mem,
2357 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2358 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2359 	.max_header		= MAX_TCP_HEADER,
2360 	.obj_size		= sizeof(struct tcp6_sock),
2361 	.ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2362 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2363 	.twsk_prot		= &tcp6_timewait_sock_ops,
2364 	.rsk_prot		= &tcp6_request_sock_ops,
2365 	.h.hashinfo		= NULL,
2366 	.no_autobind		= true,
2367 	.diag_destroy		= tcp_abort,
2368 };
2369 EXPORT_SYMBOL_GPL(tcpv6_prot);
2370 
2371 static const struct inet6_protocol tcpv6_protocol = {
2372 	.handler	=	tcp_v6_rcv,
2373 	.err_handler	=	tcp_v6_err,
2374 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2375 };
2376 
2377 static struct inet_protosw tcpv6_protosw = {
2378 	.type		=	SOCK_STREAM,
2379 	.protocol	=	IPPROTO_TCP,
2380 	.prot		=	&tcpv6_prot,
2381 	.ops		=	&inet6_stream_ops,
2382 	.flags		=	INET_PROTOSW_PERMANENT |
2383 				INET_PROTOSW_ICSK,
2384 };
2385 
2386 static int __net_init tcpv6_net_init(struct net *net)
2387 {
2388 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2389 				    SOCK_RAW, IPPROTO_TCP, net);
2390 }
2391 
2392 static void __net_exit tcpv6_net_exit(struct net *net)
2393 {
2394 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2395 }
2396 
2397 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2398 {
2399 	tcp_twsk_purge(net_exit_list, AF_INET6);
2400 }
2401 
2402 static struct pernet_operations tcpv6_net_ops = {
2403 	.init	    = tcpv6_net_init,
2404 	.exit	    = tcpv6_net_exit,
2405 	.exit_batch = tcpv6_net_exit_batch,
2406 };
2407 
2408 int __init tcpv6_init(void)
2409 {
2410 	int ret;
2411 
2412 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2413 	if (ret)
2414 		goto out;
2415 
2416 	/* register inet6 protocol */
2417 	ret = inet6_register_protosw(&tcpv6_protosw);
2418 	if (ret)
2419 		goto out_tcpv6_protocol;
2420 
2421 	ret = register_pernet_subsys(&tcpv6_net_ops);
2422 	if (ret)
2423 		goto out_tcpv6_protosw;
2424 
2425 	ret = mptcpv6_init();
2426 	if (ret)
2427 		goto out_tcpv6_pernet_subsys;
2428 
2429 out:
2430 	return ret;
2431 
2432 out_tcpv6_pernet_subsys:
2433 	unregister_pernet_subsys(&tcpv6_net_ops);
2434 out_tcpv6_protosw:
2435 	inet6_unregister_protosw(&tcpv6_protosw);
2436 out_tcpv6_protocol:
2437 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2438 	goto out;
2439 }
2440 
2441 void tcpv6_exit(void)
2442 {
2443 	unregister_pernet_subsys(&tcpv6_net_ops);
2444 	inet6_unregister_protosw(&tcpv6_protosw);
2445 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2446 }
2447