xref: /linux/net/ipv6/tcp_ipv6.c (revision fcee7d82f27d6a8b1ddc5bbefda59b4e441e9bc0)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/aligned_data.h>
45 #include <net/tcp.h>
46 #include <net/ndisc.h>
47 #include <net/inet6_hashtables.h>
48 #include <net/inet6_connection_sock.h>
49 #include <net/ipv6.h>
50 #include <net/transp_v6.h>
51 #include <net/addrconf.h>
52 #include <net/ip6_route.h>
53 #include <net/ip6_checksum.h>
54 #include <net/inet_ecn.h>
55 #include <net/protocol.h>
56 #include <net/xfrm.h>
57 #include <net/snmp.h>
58 #include <net/dsfield.h>
59 #include <net/timewait_sock.h>
60 #include <net/inet_common.h>
61 #include <net/secure_seq.h>
62 #include <net/hotdata.h>
63 #include <net/busy_poll.h>
64 #include <net/rstreason.h>
65 #include <net/psp.h>
66 
67 #include <linux/proc_fs.h>
68 #include <linux/seq_file.h>
69 
70 #include <crypto/md5.h>
71 #include <crypto/utils.h>
72 
73 #include <trace/events/tcp.h>
74 
75 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
76 			      enum sk_rst_reason reason);
77 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
78 				      struct request_sock *req);
79 
80 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
81 
82 static const struct inet_connection_sock_af_ops ipv6_mapped;
83 const struct inet_connection_sock_af_ops ipv6_specific;
84 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
85 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
86 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
87 #endif
88 
89 /* Helper returning the inet6 address from a given tcp socket.
90  * It can be used in TCP stack instead of inet6_sk(sk).
91  * This avoids a dereference and allow compiler optimizations.
92  * It is a specialized version of inet6_sk_generic().
93  */
94 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
95 					      struct tcp6_sock, tcp)->inet6)
96 
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)97 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
98 {
99 	struct dst_entry *dst = skb_dst(skb);
100 
101 	if (dst && dst_hold_safe(dst)) {
102 		rcu_assign_pointer(sk->sk_rx_dst, dst);
103 		sk->sk_rx_dst_ifindex = skb->skb_iif;
104 		sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
105 	}
106 }
107 
108 INDIRECT_CALLABLE_SCOPE union tcp_seq_and_ts_off
tcp_v6_init_seq_and_ts_off(const struct net * net,const struct sk_buff * skb)109 tcp_v6_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb)
110 {
111 	return secure_tcpv6_seq_and_ts_off(net,
112 					   ipv6_hdr(skb)->daddr.s6_addr32,
113 					   ipv6_hdr(skb)->saddr.s6_addr32,
114 					   tcp_hdr(skb)->dest,
115 					   tcp_hdr(skb)->source);
116 }
117 
tcp_v6_pre_connect(struct sock * sk,struct sockaddr_unsized * uaddr,int addr_len)118 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
119 			      int addr_len)
120 {
121 	/* This check is replicated from tcp_v6_connect() and intended to
122 	 * prevent BPF program called below from accessing bytes that are out
123 	 * of the bound specified by user in addr_len.
124 	 */
125 	if (addr_len < SIN6_LEN_RFC2133)
126 		return -EINVAL;
127 
128 	sock_owned_by_me(sk);
129 
130 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
131 }
132 
tcp_v6_connect(struct sock * sk,struct sockaddr_unsized * uaddr,int addr_len)133 static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
134 			  int addr_len)
135 {
136 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
137 	struct inet_connection_sock *icsk = inet_csk(sk);
138 	struct inet_timewait_death_row *tcp_death_row;
139 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
140 	struct in6_addr *saddr = NULL, *final_p;
141 	struct inet_sock *inet = inet_sk(sk);
142 	struct tcp_sock *tp = tcp_sk(sk);
143 	struct net *net = sock_net(sk);
144 	struct ipv6_txoptions *opt;
145 	struct dst_entry *dst;
146 	struct flowi6 *fl6;
147 	int addr_type;
148 	int err;
149 
150 	if (addr_len < SIN6_LEN_RFC2133)
151 		return -EINVAL;
152 
153 	if (usin->sin6_family != AF_INET6)
154 		return -EAFNOSUPPORT;
155 
156 	fl6 = &inet_sk(sk)->cork.fl.u.ip6;
157 	memset(fl6, 0, sizeof(*fl6));
158 
159 	if (inet6_test_bit(SNDFLOW, sk)) {
160 		fl6->flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
161 		IP6_ECN_flow_init(fl6->flowlabel);
162 		if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) {
163 			struct ip6_flowlabel *flowlabel;
164 			flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
165 			if (IS_ERR(flowlabel))
166 				return -EINVAL;
167 			fl6_sock_release(flowlabel);
168 		}
169 	}
170 
171 	/*
172 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
173 	 */
174 
175 	if (ipv6_addr_any(&usin->sin6_addr)) {
176 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
177 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
178 					       &usin->sin6_addr);
179 		else
180 			usin->sin6_addr = in6addr_loopback;
181 	}
182 
183 	addr_type = ipv6_addr_type(&usin->sin6_addr);
184 
185 	if (addr_type & IPV6_ADDR_MULTICAST)
186 		return -ENETUNREACH;
187 
188 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
189 		if (addr_len >= sizeof(struct sockaddr_in6) &&
190 		    usin->sin6_scope_id) {
191 			/* If interface is set while binding, indices
192 			 * must coincide.
193 			 */
194 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
195 				return -EINVAL;
196 
197 			sk->sk_bound_dev_if = usin->sin6_scope_id;
198 		}
199 
200 		/* Connect to link-local address requires an interface */
201 		if (!sk->sk_bound_dev_if)
202 			return -EINVAL;
203 	}
204 
205 	if (tp->rx_opt.ts_recent_stamp &&
206 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
207 		tp->rx_opt.ts_recent = 0;
208 		tp->rx_opt.ts_recent_stamp = 0;
209 		WRITE_ONCE(tp->write_seq, 0);
210 	}
211 
212 	sk->sk_v6_daddr = usin->sin6_addr;
213 	np->flow_label = fl6->flowlabel;
214 
215 	/*
216 	 *	TCP over IPv4
217 	 */
218 
219 	if (addr_type & IPV6_ADDR_MAPPED) {
220 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
221 		struct sockaddr_in sin;
222 
223 		if (ipv6_only_sock(sk))
224 			return -ENETUNREACH;
225 
226 		sin.sin_family = AF_INET;
227 		sin.sin_port = usin->sin6_port;
228 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
229 
230 		/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
231 		WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
232 		if (sk_is_mptcp(sk))
233 			mptcpv6_handle_mapped(sk, true);
234 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
235 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
236 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
237 #endif
238 
239 		err = tcp_v4_connect(sk, (struct sockaddr_unsized *)&sin, sizeof(sin));
240 
241 		if (err) {
242 			icsk->icsk_ext_hdr_len = exthdrlen;
243 			/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
244 			WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
245 			if (sk_is_mptcp(sk))
246 				mptcpv6_handle_mapped(sk, false);
247 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
248 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
249 			tp->af_specific = &tcp_sock_ipv6_specific;
250 #endif
251 			goto failure;
252 		}
253 		np->saddr = sk->sk_v6_rcv_saddr;
254 
255 		return err;
256 	}
257 
258 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
259 		saddr = &sk->sk_v6_rcv_saddr;
260 
261 	fl6->flowi6_proto = IPPROTO_TCP;
262 	fl6->daddr = sk->sk_v6_daddr;
263 	fl6->saddr = saddr ? *saddr : np->saddr;
264 	fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
265 	fl6->flowi6_oif = sk->sk_bound_dev_if;
266 	fl6->flowi6_mark = sk->sk_mark;
267 	fl6->fl6_dport = usin->sin6_port;
268 	fl6->fl6_sport = inet->inet_sport;
269 	if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6->fl6_sport)
270 		fl6->flowi6_flags = FLOWI_FLAG_ANY_SPORT;
271 	fl6->flowi6_uid = sk_uid(sk);
272 
273 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
274 	final_p = fl6_update_dst(fl6, opt, &np->final);
275 
276 	security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
277 
278 	dst = ip6_dst_lookup_flow(net, sk, fl6, final_p);
279 	if (IS_ERR(dst)) {
280 		err = PTR_ERR(dst);
281 		goto failure;
282 	}
283 
284 	tp->tcp_usec_ts = dst_tcp_usec_ts(dst);
285 	tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
286 
287 	if (!saddr) {
288 		saddr = &fl6->saddr;
289 
290 		err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
291 		if (err) {
292 			dst_release(dst);
293 			goto failure;
294 		}
295 	}
296 
297 	/* set the source address */
298 	np->saddr = *saddr;
299 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
300 
301 	sk->sk_gso_type = SKB_GSO_TCPV6;
302 	ip6_dst_store(sk, dst, false, false);
303 
304 	icsk->icsk_ext_hdr_len = psp_sk_overhead(sk);
305 	if (opt)
306 		icsk->icsk_ext_hdr_len += opt->opt_flen +
307 					  opt->opt_nflen;
308 
309 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
310 
311 	inet->inet_dport = usin->sin6_port;
312 
313 	tcp_set_state(sk, TCP_SYN_SENT);
314 	err = inet6_hash_connect(tcp_death_row, sk);
315 	if (err)
316 		goto late_failure;
317 
318 	sk_set_txhash(sk);
319 
320 	if (likely(!tp->repair)) {
321 		union tcp_seq_and_ts_off st;
322 
323 		st = secure_tcpv6_seq_and_ts_off(net,
324 						 np->saddr.s6_addr32,
325 						 sk->sk_v6_daddr.s6_addr32,
326 						 inet->inet_sport,
327 						 inet->inet_dport);
328 		if (!tp->write_seq)
329 			WRITE_ONCE(tp->write_seq, st.seq);
330 		WRITE_ONCE(tp->tsoffset, st.ts_off);
331 	}
332 
333 	if (tcp_fastopen_defer_connect(sk, &err))
334 		return err;
335 	if (err)
336 		goto late_failure;
337 
338 	err = tcp_connect(sk);
339 	if (err)
340 		goto late_failure;
341 
342 	return 0;
343 
344 late_failure:
345 	tcp_set_state(sk, TCP_CLOSE);
346 	inet_bhash2_reset_saddr(sk);
347 failure:
348 	inet->inet_dport = 0;
349 	sk->sk_route_caps = 0;
350 	return err;
351 }
352 
inet6_csk_update_pmtu(struct sock * sk,u32 mtu)353 static struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu)
354 {
355 	struct flowi6 *fl6 = &inet_sk(sk)->cork.fl.u.ip6;
356 	struct dst_entry *dst;
357 
358 	dst = inet6_csk_route_socket(sk, fl6);
359 
360 	if (IS_ERR(dst))
361 		return NULL;
362 	dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
363 
364 	dst = inet6_csk_route_socket(sk, fl6);
365 	return IS_ERR(dst) ? NULL : dst;
366 }
367 
tcp_v6_mtu_reduced(struct sock * sk)368 static void tcp_v6_mtu_reduced(struct sock *sk)
369 {
370 	struct dst_entry *dst;
371 	u32 mtu, dmtu;
372 
373 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
374 		return;
375 
376 	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
377 
378 	/* Drop requests trying to increase our current mss.
379 	 * Check done in __ip6_rt_update_pmtu() is too late.
380 	 */
381 	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
382 		return;
383 
384 	dst = inet6_csk_update_pmtu(sk, mtu);
385 	if (!dst)
386 		return;
387 
388 	dmtu = dst6_mtu(dst);
389 	if (inet_csk(sk)->icsk_pmtu_cookie > dmtu) {
390 		tcp_sync_mss(sk, dmtu);
391 		tcp_simple_retransmit(sk);
392 	}
393 }
394 
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)395 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
396 		u8 type, u8 code, int offset, __be32 info)
397 {
398 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
399 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
400 	struct net *net = dev_net_rcu(skb->dev);
401 	struct request_sock *fastopen;
402 	struct ipv6_pinfo *np;
403 	struct tcp_sock *tp;
404 	__u32 seq, snd_una;
405 	struct sock *sk;
406 	bool fatal;
407 	int err;
408 
409 	sk = __inet6_lookup_established(net, &hdr->daddr, th->dest,
410 					&hdr->saddr, ntohs(th->source),
411 					skb->dev->ifindex, inet6_sdif(skb));
412 
413 	if (!sk) {
414 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
415 				  ICMP6_MIB_INERRORS);
416 		return -ENOENT;
417 	}
418 
419 	if (sk->sk_state == TCP_TIME_WAIT) {
420 		/* To increase the counter of ignored icmps for TCP-AO */
421 		tcp_ao_ignore_icmp(sk, AF_INET6, type, code);
422 		inet_twsk_put(inet_twsk(sk));
423 		return 0;
424 	}
425 	seq = ntohl(th->seq);
426 	fatal = icmpv6_err_convert(type, code, &err);
427 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
428 		tcp_req_err(sk, seq, fatal);
429 		return 0;
430 	}
431 
432 	if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) {
433 		sock_put(sk);
434 		return 0;
435 	}
436 
437 	bh_lock_sock(sk);
438 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
439 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
440 
441 	if (sk->sk_state == TCP_CLOSE)
442 		goto out;
443 
444 	if (static_branch_unlikely(&ip6_min_hopcount)) {
445 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
446 		if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
447 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
448 			goto out;
449 		}
450 	}
451 
452 	tp = tcp_sk(sk);
453 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
454 	fastopen = rcu_dereference(tp->fastopen_rsk);
455 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
456 	if (sk->sk_state != TCP_LISTEN &&
457 	    !between(seq, snd_una, tp->snd_nxt)) {
458 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
459 		goto out;
460 	}
461 
462 	np = tcp_inet6_sk(sk);
463 
464 	if (type == NDISC_REDIRECT) {
465 		if (!sock_owned_by_user(sk)) {
466 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
467 
468 			if (dst)
469 				dst->ops->redirect(dst, sk, skb);
470 		}
471 		goto out;
472 	}
473 
474 	if (type == ICMPV6_PKT_TOOBIG) {
475 		u32 mtu = ntohl(info);
476 
477 		/* We are not interested in TCP_LISTEN and open_requests
478 		 * (SYN-ACKs send out by Linux are always <576bytes so
479 		 * they should go through unfragmented).
480 		 */
481 		if (sk->sk_state == TCP_LISTEN)
482 			goto out;
483 
484 		if (!ip6_sk_accept_pmtu(sk))
485 			goto out;
486 
487 		if (mtu < IPV6_MIN_MTU)
488 			goto out;
489 
490 		WRITE_ONCE(tp->mtu_info, mtu);
491 
492 		if (!sock_owned_by_user(sk))
493 			tcp_v6_mtu_reduced(sk);
494 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
495 					   &sk->sk_tsq_flags))
496 			sock_hold(sk);
497 		goto out;
498 	}
499 
500 
501 	/* Might be for an request_sock */
502 	switch (sk->sk_state) {
503 	case TCP_SYN_SENT:
504 	case TCP_SYN_RECV:
505 		/* Only in fast or simultaneous open. If a fast open socket is
506 		 * already accepted it is treated as a connected one below.
507 		 */
508 		if (fastopen && !fastopen->sk)
509 			break;
510 
511 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
512 
513 		if (!sock_owned_by_user(sk))
514 			tcp_done_with_error(sk, err);
515 		else
516 			WRITE_ONCE(sk->sk_err_soft, err);
517 		goto out;
518 	case TCP_LISTEN:
519 		break;
520 	default:
521 		/* check if this ICMP message allows revert of backoff.
522 		 * (see RFC 6069)
523 		 */
524 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
525 		    code == ICMPV6_NOROUTE)
526 			tcp_ld_RTO_revert(sk, seq);
527 	}
528 
529 	if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
530 		WRITE_ONCE(sk->sk_err, err);
531 		sk_error_report(sk);
532 	} else {
533 		WRITE_ONCE(sk->sk_err_soft, err);
534 	}
535 out:
536 	bh_unlock_sock(sk);
537 	sock_put(sk);
538 	return 0;
539 }
540 
541 
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)542 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
543 			      struct flowi *fl,
544 			      struct request_sock *req,
545 			      struct tcp_fastopen_cookie *foc,
546 			      enum tcp_synack_type synack_type,
547 			      struct sk_buff *syn_skb)
548 {
549 	struct inet_request_sock *ireq = inet_rsk(req);
550 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
551 	struct ipv6_txoptions *opt;
552 	struct flowi6 *fl6 = &fl->u.ip6;
553 	struct sk_buff *skb;
554 	int err = -ENOMEM;
555 	u8 tclass;
556 
557 	/* First, grab a route. */
558 	if (!dst && (dst = inet6_csk_route_req(sk, NULL, fl6, req,
559 					       IPPROTO_TCP)) == NULL)
560 		goto done;
561 
562 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
563 
564 	if (skb) {
565 		tcp_rsk(req)->syn_ect_snt = np->tclass & INET_ECN_MASK;
566 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
567 				    &ireq->ir_v6_rmt_addr);
568 
569 		fl6->daddr = ireq->ir_v6_rmt_addr;
570 		if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts)
571 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
572 
573 		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
574 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
575 				(np->tclass & INET_ECN_MASK) :
576 				np->tclass;
577 
578 		if (!INET_ECN_is_capable(tclass) &&
579 		    tcp_bpf_ca_needs_ecn((struct sock *)req))
580 			tclass |= INET_ECN_ECT_0;
581 
582 		rcu_read_lock();
583 		opt = ireq->ipv6_opt;
584 		if (!opt)
585 			opt = rcu_dereference(np->opt);
586 		err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
587 			       opt, tclass, READ_ONCE(sk->sk_priority));
588 		rcu_read_unlock();
589 		err = net_xmit_eval(err);
590 	}
591 
592 done:
593 	return err;
594 }
595 
596 
tcp_v6_reqsk_destructor(struct request_sock * req)597 static void tcp_v6_reqsk_destructor(struct request_sock *req)
598 {
599 	kfree(inet_rsk(req)->ipv6_opt);
600 	consume_skb(inet_rsk(req)->pktopts);
601 }
602 
603 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)604 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
605 						   const struct in6_addr *addr,
606 						   int l3index)
607 {
608 	return tcp_md5_do_lookup(sk, l3index,
609 				 (union tcp_md5_addr *)addr, AF_INET6);
610 }
611 
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)612 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
613 						const struct sock *addr_sk)
614 {
615 	int l3index;
616 
617 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
618 						 addr_sk->sk_bound_dev_if);
619 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
620 				    l3index);
621 }
622 
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)623 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
624 				 sockptr_t optval, int optlen)
625 {
626 	struct tcp_md5sig cmd;
627 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
628 	union tcp_ao_addr *addr;
629 	int l3index = 0;
630 	u8 prefixlen;
631 	bool l3flag;
632 	u8 flags;
633 
634 	if (optlen < sizeof(cmd))
635 		return -EINVAL;
636 
637 	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
638 		return -EFAULT;
639 
640 	if (sin6->sin6_family != AF_INET6)
641 		return -EINVAL;
642 
643 	flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
644 	l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
645 
646 	if (optname == TCP_MD5SIG_EXT &&
647 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
648 		prefixlen = cmd.tcpm_prefixlen;
649 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
650 					prefixlen > 32))
651 			return -EINVAL;
652 	} else {
653 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
654 	}
655 
656 	if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
657 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
658 		struct net_device *dev;
659 
660 		rcu_read_lock();
661 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
662 		if (dev && netif_is_l3_master(dev))
663 			l3index = dev->ifindex;
664 		rcu_read_unlock();
665 
666 		/* ok to reference set/not set outside of rcu;
667 		 * right now device MUST be an L3 master
668 		 */
669 		if (!dev || !l3index)
670 			return -EINVAL;
671 	}
672 
673 	if (!cmd.tcpm_keylen) {
674 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
675 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
676 					      AF_INET, prefixlen,
677 					      l3index, flags);
678 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
679 				      AF_INET6, prefixlen, l3index, flags);
680 	}
681 
682 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
683 		return -EINVAL;
684 
685 	if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
686 		addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3];
687 
688 		/* Don't allow keys for peers that have a matching TCP-AO key.
689 		 * See the comment in tcp_ao_add_cmd()
690 		 */
691 		if (tcp_ao_required(sk, addr, AF_INET,
692 				    l3flag ? l3index : -1, false))
693 			return -EKEYREJECTED;
694 		return tcp_md5_do_add(sk, addr,
695 				      AF_INET, prefixlen, l3index, flags,
696 				      cmd.tcpm_key, cmd.tcpm_keylen);
697 	}
698 
699 	addr = (union tcp_md5_addr *)&sin6->sin6_addr;
700 
701 	/* Don't allow keys for peers that have a matching TCP-AO key.
702 	 * See the comment in tcp_ao_add_cmd()
703 	 */
704 	if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false))
705 		return -EKEYREJECTED;
706 
707 	return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags,
708 			      cmd.tcpm_key, cmd.tcpm_keylen);
709 }
710 
tcp_v6_md5_hash_headers(struct md5_ctx * ctx,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)711 static void tcp_v6_md5_hash_headers(struct md5_ctx *ctx,
712 				    const struct in6_addr *daddr,
713 				    const struct in6_addr *saddr,
714 				    const struct tcphdr *th, int nbytes)
715 {
716 	struct {
717 		struct tcp6_pseudohdr ip; /* TCP pseudo-header (RFC2460) */
718 		struct tcphdr tcp;
719 	} h;
720 
721 	h.ip.saddr = *saddr;
722 	h.ip.daddr = *daddr;
723 	h.ip.protocol = cpu_to_be32(IPPROTO_TCP);
724 	h.ip.len = cpu_to_be32(nbytes);
725 	h.tcp = *th;
726 	h.tcp.check = 0;
727 	md5_update(ctx, (const u8 *)&h, sizeof(h.ip) + sizeof(h.tcp));
728 }
729 
730 static noinline_for_stack void
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)731 tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
732 		    const struct in6_addr *daddr, struct in6_addr *saddr,
733 		    const struct tcphdr *th)
734 {
735 	struct md5_ctx ctx;
736 
737 	md5_init(&ctx);
738 	tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, th->doff << 2);
739 	tcp_md5_hash_key(&ctx, key);
740 	md5_final(&ctx, md5_hash);
741 }
742 
743 static noinline_for_stack void
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)744 tcp_v6_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
745 		    const struct sock *sk, const struct sk_buff *skb)
746 {
747 	const struct tcphdr *th = tcp_hdr(skb);
748 	const struct in6_addr *saddr, *daddr;
749 	struct md5_ctx ctx;
750 
751 	if (sk) { /* valid for establish/request sockets */
752 		saddr = &sk->sk_v6_rcv_saddr;
753 		daddr = &sk->sk_v6_daddr;
754 	} else {
755 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
756 		saddr = &ip6h->saddr;
757 		daddr = &ip6h->daddr;
758 	}
759 
760 	md5_init(&ctx);
761 	tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, skb->len);
762 	tcp_md5_hash_skb_data(&ctx, skb, th->doff << 2);
763 	tcp_md5_hash_key(&ctx, key);
764 	md5_final(&ctx, md5_hash);
765 }
766 #endif
767 
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb,u32 tw_isn)768 static void tcp_v6_init_req(struct request_sock *req,
769 			    const struct sock *sk_listener,
770 			    struct sk_buff *skb,
771 			    u32 tw_isn)
772 {
773 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
774 	struct inet_request_sock *ireq = inet_rsk(req);
775 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
776 
777 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
778 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
779 	ireq->ir_rmt_addr = LOOPBACK4_IPV6;
780 	ireq->ir_loc_addr = LOOPBACK4_IPV6;
781 
782 	/* So that link locals have meaning */
783 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
784 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
785 		ireq->ir_iif = tcp_v6_iif(skb);
786 
787 	if (!tw_isn &&
788 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
789 	     np->rxopt.bits.rxinfo ||
790 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
791 	     np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) {
792 		refcount_inc(&skb->users);
793 		ireq->pktopts = skb;
794 	}
795 }
796 
tcp_v6_route_req(const struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct request_sock * req,u32 tw_isn)797 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
798 					  struct sk_buff *skb,
799 					  struct flowi *fl,
800 					  struct request_sock *req,
801 					  u32 tw_isn)
802 {
803 	tcp_v6_init_req(req, sk, skb, tw_isn);
804 
805 	if (security_inet_conn_request(sk, skb, req))
806 		return NULL;
807 
808 	return inet6_csk_route_req(sk, NULL, &fl->u.ip6, req, IPPROTO_TCP);
809 }
810 
811 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
812 	.family		=	AF_INET6,
813 	.obj_size	=	sizeof(struct tcp6_request_sock),
814 	.send_ack	=	tcp_v6_reqsk_send_ack,
815 	.destructor	=	tcp_v6_reqsk_destructor,
816 	.send_reset	=	tcp_v6_send_reset,
817 };
818 
819 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
820 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
821 				sizeof(struct ipv6hdr),
822 #ifdef CONFIG_TCP_MD5SIG
823 	.req_md5_lookup	=	tcp_v6_md5_lookup,
824 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
825 #endif
826 #ifdef CONFIG_TCP_AO
827 	.ao_lookup	=	tcp_v6_ao_lookup_rsk,
828 	.ao_calc_key	=	tcp_v6_ao_calc_key_rsk,
829 	.ao_synack_hash =	tcp_v6_ao_synack_hash,
830 #endif
831 #ifdef CONFIG_SYN_COOKIES
832 	.cookie_init_seq =	cookie_v6_init_sequence,
833 #endif
834 	.route_req	=	tcp_v6_route_req,
835 	.init_seq_and_ts_off	= tcp_v6_init_seq_and_ts_off,
836 	.send_synack	=	tcp_v6_send_synack,
837 };
838 
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,int rst,u8 tclass,__be32 label,u32 priority,u32 txhash,struct tcp_key * key)839 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
840 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
841 				 int oif, int rst, u8 tclass, __be32 label,
842 				 u32 priority, u32 txhash, struct tcp_key *key)
843 {
844 	struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb);
845 	unsigned int tot_len = sizeof(struct tcphdr);
846 	struct sock *ctl_sk = net->ipv6.tcp_sk;
847 	const struct tcphdr *th = tcp_hdr(skb);
848 	__be32 mrst = 0, *topt;
849 	struct dst_entry *dst;
850 	struct sk_buff *buff;
851 	struct tcphdr *t1;
852 	struct flowi6 fl6;
853 	u32 mark = 0;
854 
855 	if (tsecr)
856 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
857 	if (tcp_key_is_md5(key))
858 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
859 	if (tcp_key_is_ao(key))
860 		tot_len += tcp_ao_len_aligned(key->ao_key);
861 
862 #ifdef CONFIG_MPTCP
863 	if (rst && !tcp_key_is_md5(key)) {
864 		mrst = mptcp_reset_option(skb);
865 
866 		if (mrst)
867 			tot_len += sizeof(__be32);
868 	}
869 #endif
870 
871 	buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
872 	if (!buff)
873 		return;
874 
875 	skb_reserve(buff, MAX_TCP_HEADER);
876 
877 	t1 = skb_push(buff, tot_len);
878 	skb_reset_transport_header(buff);
879 
880 	/* Swap the send and the receive. */
881 	memset(t1, 0, sizeof(*t1));
882 	t1->dest = th->source;
883 	t1->source = th->dest;
884 	t1->doff = tot_len / 4;
885 	t1->seq = htonl(seq);
886 	t1->ack_seq = htonl(ack);
887 	t1->ack = !rst || !th->ack;
888 	t1->rst = rst;
889 	t1->window = htons(win);
890 
891 	topt = (__be32 *)(t1 + 1);
892 
893 	if (tsecr) {
894 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
895 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
896 		*topt++ = htonl(tsval);
897 		*topt++ = htonl(tsecr);
898 	}
899 
900 	if (mrst)
901 		*topt++ = mrst;
902 
903 #ifdef CONFIG_TCP_MD5SIG
904 	if (tcp_key_is_md5(key)) {
905 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
906 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
907 		tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key,
908 				    &ipv6_hdr(skb)->saddr,
909 				    &ipv6_hdr(skb)->daddr, t1);
910 	}
911 #endif
912 #ifdef CONFIG_TCP_AO
913 	if (tcp_key_is_ao(key)) {
914 		*topt++ = htonl((TCPOPT_AO << 24) |
915 				(tcp_ao_len(key->ao_key) << 16) |
916 				(key->ao_key->sndid << 8) |
917 				(key->rcv_next));
918 
919 		tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key,
920 				key->traffic_key,
921 				(union tcp_ao_addr *)&ipv6_hdr(skb)->saddr,
922 				(union tcp_ao_addr *)&ipv6_hdr(skb)->daddr,
923 				t1, key->sne);
924 	}
925 #endif
926 
927 	memset(&fl6, 0, sizeof(fl6));
928 	fl6.daddr = ipv6_hdr(skb)->saddr;
929 	fl6.saddr = ipv6_hdr(skb)->daddr;
930 	fl6.flowlabel = label;
931 
932 	buff->ip_summed = CHECKSUM_PARTIAL;
933 
934 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
935 
936 	fl6.flowi6_proto = IPPROTO_TCP;
937 	if (rt6_need_strict(&fl6.daddr) && !oif)
938 		fl6.flowi6_oif = tcp_v6_iif(skb);
939 	else {
940 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
941 			oif = skb->skb_iif;
942 
943 		fl6.flowi6_oif = oif;
944 	}
945 
946 	if (sk) {
947 		/* unconstify the socket only to attach it to buff with care. */
948 		skb_set_owner_edemux(buff, (struct sock *)sk);
949 		psp_reply_set_decrypted(sk, buff);
950 
951 		if (sk->sk_state == TCP_TIME_WAIT)
952 			mark = inet_twsk(sk)->tw_mark;
953 		else
954 			mark = READ_ONCE(sk->sk_mark);
955 		skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC);
956 	}
957 	if (txhash) {
958 		/* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
959 		skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
960 	}
961 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
962 	fl6.fl6_dport = t1->dest;
963 	fl6.fl6_sport = t1->source;
964 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
965 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
966 
967 	/* Pass a socket to ip6_dst_lookup either it is for RST
968 	 * Underlying function will use this to retrieve the network
969 	 * namespace
970 	 */
971 	if (sk && sk->sk_state != TCP_TIME_WAIT)
972 		dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
973 	else
974 		dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
975 	if (!IS_ERR(dst)) {
976 		skb_dst_set(buff, dst);
977 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
978 			 tclass, priority);
979 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
980 		if (rst)
981 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
982 		return;
983 	}
984 
985 	kfree_skb(buff);
986 }
987 
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb,enum sk_rst_reason reason)988 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
989 			      enum sk_rst_reason reason)
990 {
991 	const struct tcphdr *th = tcp_hdr(skb);
992 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
993 	const __u8 *md5_hash_location = NULL;
994 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
995 	bool allocated_traffic_key = false;
996 #endif
997 	const struct tcp_ao_hdr *aoh;
998 	struct tcp_key key = {};
999 	u32 seq = 0, ack_seq = 0;
1000 	__be32 label = 0;
1001 	u32 priority = 0;
1002 	struct net *net;
1003 	u32 txhash = 0;
1004 	int oif = 0;
1005 #ifdef CONFIG_TCP_MD5SIG
1006 	unsigned char newhash[16];
1007 	struct sock *sk1 = NULL;
1008 #endif
1009 
1010 	if (th->rst)
1011 		return;
1012 
1013 	/* If sk not NULL, it means we did a successful lookup and incoming
1014 	 * route had to be correct. prequeue might have dropped our dst.
1015 	 */
1016 	if (!sk && !ipv6_unicast_destination(skb))
1017 		return;
1018 
1019 	net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb);
1020 	/* Invalid TCP option size or twice included auth */
1021 	if (tcp_parse_auth_options(th, &md5_hash_location, &aoh))
1022 		return;
1023 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1024 	rcu_read_lock();
1025 #endif
1026 #ifdef CONFIG_TCP_MD5SIG
1027 	if (sk && sk_fullsock(sk)) {
1028 		int l3index;
1029 
1030 		/* sdif set, means packet ingressed via a device
1031 		 * in an L3 domain and inet_iif is set to it.
1032 		 */
1033 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1034 		key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1035 		if (key.md5_key)
1036 			key.type = TCP_KEY_MD5;
1037 	} else if (md5_hash_location) {
1038 		int dif = tcp_v6_iif_l3_slave(skb);
1039 		int sdif = tcp_v6_sdif(skb);
1040 		int l3index;
1041 
1042 		/*
1043 		 * active side is lost. Try to find listening socket through
1044 		 * source port, and then find md5 key through listening socket.
1045 		 * we are not loose security here:
1046 		 * Incoming packet is checked with md5 hash with finding key,
1047 		 * no RST generated if md5 hash doesn't match.
1048 		 */
1049 		sk1 = inet6_lookup_listener(net, NULL, 0, &ipv6h->saddr, th->source,
1050 					    &ipv6h->daddr, ntohs(th->source),
1051 					    dif, sdif);
1052 		if (!sk1)
1053 			goto out;
1054 
1055 		/* sdif set, means packet ingressed via a device
1056 		 * in an L3 domain and dif is set to it.
1057 		 */
1058 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1059 
1060 		key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1061 		if (!key.md5_key)
1062 			goto out;
1063 		key.type = TCP_KEY_MD5;
1064 
1065 		tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb);
1066 		if (crypto_memneq(md5_hash_location, newhash, 16))
1067 			goto out;
1068 	}
1069 #endif
1070 
1071 	if (th->ack)
1072 		seq = ntohl(th->ack_seq);
1073 	else
1074 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1075 			  (th->doff << 2);
1076 
1077 #ifdef CONFIG_TCP_AO
1078 	if (aoh) {
1079 		int l3index;
1080 
1081 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1082 		if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq,
1083 					 &key.ao_key, &key.traffic_key,
1084 					 &allocated_traffic_key,
1085 					 &key.rcv_next, &key.sne))
1086 			goto out;
1087 		key.type = TCP_KEY_AO;
1088 	}
1089 #endif
1090 
1091 	if (sk) {
1092 		oif = sk->sk_bound_dev_if;
1093 		if (sk_fullsock(sk)) {
1094 			if (inet6_test_bit(REPFLOW, sk))
1095 				label = ip6_flowlabel(ipv6h);
1096 			priority = READ_ONCE(sk->sk_priority);
1097 			txhash = sk->sk_txhash;
1098 		}
1099 		if (sk->sk_state == TCP_TIME_WAIT) {
1100 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1101 			priority = inet_twsk(sk)->tw_priority;
1102 			txhash = inet_twsk(sk)->tw_txhash;
1103 		}
1104 	} else {
1105 		if (READ_ONCE(net->ipv6.sysctl.flowlabel_reflect) &
1106 		    FLOWLABEL_REFLECT_TCP_RESET)
1107 			label = ip6_flowlabel(ipv6h);
1108 	}
1109 
1110 	trace_tcp_send_reset(sk, skb, reason);
1111 
1112 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
1113 			     ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK,
1114 			     label, priority, txhash,
1115 			     &key);
1116 
1117 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1118 out:
1119 	if (allocated_traffic_key)
1120 		kfree(key.traffic_key);
1121 	rcu_read_unlock();
1122 #endif
1123 }
1124 
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_key * key,u8 tclass,__be32 label,u32 priority,u32 txhash)1125 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1126 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1127 			    struct tcp_key *key, u8 tclass,
1128 			    __be32 label, u32 priority, u32 txhash)
1129 {
1130 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0,
1131 			     tclass, label, priority, txhash, key);
1132 }
1133 
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb,enum tcp_tw_status tw_status)1134 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb,
1135 				enum tcp_tw_status tw_status)
1136 {
1137 	struct inet_timewait_sock *tw = inet_twsk(sk);
1138 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1139 	u8 tclass = tw->tw_tclass;
1140 	struct tcp_key key = {};
1141 
1142 	if (tw_status == TCP_TW_ACK_OOW)
1143 		tclass &= ~INET_ECN_MASK;
1144 #ifdef CONFIG_TCP_AO
1145 	struct tcp_ao_info *ao_info;
1146 
1147 	if (static_branch_unlikely(&tcp_ao_needed.key)) {
1148 
1149 		/* FIXME: the segment to-be-acked is not verified yet */
1150 		ao_info = rcu_dereference(tcptw->ao_info);
1151 		if (ao_info) {
1152 			const struct tcp_ao_hdr *aoh;
1153 
1154 			/* Invalid TCP option size or twice included auth */
1155 			if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
1156 				goto out;
1157 			if (aoh)
1158 				key.ao_key = tcp_ao_established_key(sk, ao_info,
1159 								    aoh->rnext_keyid, -1);
1160 		}
1161 	}
1162 	if (key.ao_key) {
1163 		struct tcp_ao_key *rnext_key;
1164 
1165 		key.traffic_key = snd_other_key(key.ao_key);
1166 		/* rcv_next switches to our rcv_next */
1167 		rnext_key = READ_ONCE(ao_info->rnext_key);
1168 		key.rcv_next = rnext_key->rcvid;
1169 		key.sne = READ_ONCE(ao_info->snd_sne);
1170 		key.type = TCP_KEY_AO;
1171 #else
1172 	if (0) {
1173 #endif
1174 #ifdef CONFIG_TCP_MD5SIG
1175 	} else if (static_branch_unlikely(&tcp_md5_needed.key)) {
1176 		key.md5_key = tcp_twsk_md5_key(tcptw);
1177 		if (key.md5_key)
1178 			key.type = TCP_KEY_MD5;
1179 #endif
1180 	}
1181 
1182 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt,
1183 			READ_ONCE(tcptw->tw_rcv_nxt),
1184 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1185 			tcp_tw_tsval(tcptw),
1186 			READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if,
1187 			&key, tclass, cpu_to_be32(tw->tw_flowlabel),
1188 			tw->tw_priority, tw->tw_txhash);
1189 
1190 #ifdef CONFIG_TCP_AO
1191 out:
1192 #endif
1193 	inet_twsk_put(tw);
1194 }
1195 
1196 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1197 				  struct request_sock *req)
1198 {
1199 	struct tcp_key key = {};
1200 
1201 #ifdef CONFIG_TCP_AO
1202 	if (static_branch_unlikely(&tcp_ao_needed.key) &&
1203 	    tcp_rsk_used_ao(req)) {
1204 		const struct in6_addr *addr = &ipv6_hdr(skb)->saddr;
1205 		const struct tcp_ao_hdr *aoh;
1206 		int l3index;
1207 
1208 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1209 		/* Invalid TCP option size or twice included auth */
1210 		if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
1211 			return;
1212 		if (!aoh)
1213 			return;
1214 		key.ao_key = tcp_ao_do_lookup(sk, l3index,
1215 					      (union tcp_ao_addr *)addr,
1216 					      AF_INET6, aoh->rnext_keyid, -1);
1217 		if (unlikely(!key.ao_key)) {
1218 			/* Send ACK with any matching MKT for the peer */
1219 			key.ao_key = tcp_ao_do_lookup(sk, l3index,
1220 						      (union tcp_ao_addr *)addr,
1221 						      AF_INET6, -1, -1);
1222 			/* Matching key disappeared (user removed the key?)
1223 			 * let the handshake timeout.
1224 			 */
1225 			if (!key.ao_key) {
1226 				net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n",
1227 						     addr,
1228 						     ntohs(tcp_hdr(skb)->source),
1229 						     &ipv6_hdr(skb)->daddr,
1230 						     ntohs(tcp_hdr(skb)->dest));
1231 				return;
1232 			}
1233 		}
1234 		key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC);
1235 		if (!key.traffic_key)
1236 			return;
1237 
1238 		key.type = TCP_KEY_AO;
1239 		key.rcv_next = aoh->keyid;
1240 		tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req);
1241 #else
1242 	if (0) {
1243 #endif
1244 #ifdef CONFIG_TCP_MD5SIG
1245 	} else if (static_branch_unlikely(&tcp_md5_needed.key)) {
1246 		int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1247 
1248 		key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr,
1249 						   l3index);
1250 		if (key.md5_key)
1251 			key.type = TCP_KEY_MD5;
1252 #endif
1253 	}
1254 
1255 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1256 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1257 	 */
1258 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1259 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1260 			tcp_rsk(req)->rcv_nxt,
1261 			tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
1262 			tcp_rsk_tsval(tcp_rsk(req)),
1263 			req->ts_recent, sk->sk_bound_dev_if,
1264 			&key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK,
1265 			0,
1266 			READ_ONCE(sk->sk_priority),
1267 			READ_ONCE(tcp_rsk(req)->txhash));
1268 	if (tcp_key_is_ao(&key))
1269 		kfree(key.traffic_key);
1270 }
1271 
1272 
1273 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1274 {
1275 #ifdef CONFIG_SYN_COOKIES
1276 	const struct tcphdr *th = tcp_hdr(skb);
1277 
1278 	if (!th->syn)
1279 		sk = cookie_v6_check(sk, skb);
1280 #endif
1281 	return sk;
1282 }
1283 
1284 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1285 			 struct tcphdr *th, u32 *cookie)
1286 {
1287 	u16 mss = 0;
1288 #ifdef CONFIG_SYN_COOKIES
1289 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1290 				    &tcp_request_sock_ipv6_ops, sk, th);
1291 	if (mss) {
1292 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1293 		tcp_synq_overflow(sk);
1294 	}
1295 #endif
1296 	return mss;
1297 }
1298 
1299 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1300 {
1301 	if (skb->protocol == htons(ETH_P_IP))
1302 		return tcp_v4_conn_request(sk, skb);
1303 
1304 	if (!ipv6_unicast_destination(skb))
1305 		goto drop;
1306 
1307 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1308 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1309 		return 0;
1310 	}
1311 
1312 	return tcp_conn_request(&tcp6_request_sock_ops,
1313 				&tcp_request_sock_ipv6_ops, sk, skb);
1314 
1315 drop:
1316 	tcp_listendrop(sk);
1317 	return 0; /* don't send reset */
1318 }
1319 
1320 static void tcp_v6_restore_cb(struct sk_buff *skb)
1321 {
1322 	/* We need to move header back to the beginning if xfrm6_policy_check()
1323 	 * and tcp_v6_fill_cb() are going to be called again.
1324 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1325 	 */
1326 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1327 		sizeof(struct inet6_skb_parm));
1328 }
1329 
1330 /* Called from tcp_v4_syn_recv_sock() for v6_mapped children. */
1331 static void tcp_v6_mapped_child_init(struct sock *newsk, const struct sock *sk)
1332 {
1333 	struct inet_sock *newinet = inet_sk(newsk);
1334 	struct ipv6_pinfo *newnp;
1335 
1336 	newinet->pinet6 = newnp = tcp_inet6_sk(newsk);
1337 	newinet->ipv6_fl_list = NULL;
1338 
1339 	memcpy(newnp, tcp_inet6_sk(sk), sizeof(struct ipv6_pinfo));
1340 
1341 	newnp->saddr = newsk->sk_v6_rcv_saddr;
1342 
1343 	inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1344 	if (sk_is_mptcp(newsk))
1345 		mptcpv6_handle_mapped(newsk, true);
1346 	newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1347 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1348 	tcp_sk(newsk)->af_specific = &tcp_sock_ipv6_mapped_specific;
1349 #endif
1350 
1351 	newnp->ipv6_mc_list = NULL;
1352 	newnp->ipv6_ac_list = NULL;
1353 	newnp->pktoptions  = NULL;
1354 	newnp->opt	   = NULL;
1355 
1356 	/* tcp_v4_syn_recv_sock() has initialized newinet->mc_{index,ttl} */
1357 	newnp->mcast_oif   = newinet->mc_index;
1358 	newnp->mcast_hops  = newinet->mc_ttl;
1359 
1360 	newnp->rcv_flowinfo = 0;
1361 	if (inet6_test_bit(REPFLOW, sk))
1362 		newnp->flow_label = 0;
1363 }
1364 
1365 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1366 					 struct request_sock *req,
1367 					 struct dst_entry *dst,
1368 					 struct request_sock *req_unhash,
1369 					 bool *own_req,
1370 					 void (*opt_child_init)(struct sock *newsk,
1371 								const struct sock *sk))
1372 {
1373 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1374 	struct inet_request_sock *ireq;
1375 	struct ipv6_txoptions *opt;
1376 	struct inet_sock *newinet;
1377 	bool found_dup_sk = false;
1378 	struct ipv6_pinfo *newnp;
1379 	struct tcp_sock *newtp;
1380 	struct sock *newsk;
1381 #ifdef CONFIG_TCP_MD5SIG
1382 	struct tcp_md5sig_key *key;
1383 	int l3index;
1384 #endif
1385 	struct flowi6 fl6;
1386 
1387 	if (skb->protocol == htons(ETH_P_IP))
1388 		return tcp_v4_syn_recv_sock(sk, skb, req, dst,
1389 					    req_unhash, own_req,
1390 					    tcp_v6_mapped_child_init);
1391 	ireq = inet_rsk(req);
1392 
1393 	if (sk_acceptq_is_full(sk))
1394 		goto exit_overflow;
1395 
1396 	dst = inet6_csk_route_req(sk, dst, &fl6, req, IPPROTO_TCP);
1397 	if (!dst)
1398 		goto exit;
1399 
1400 	newsk = tcp_create_openreq_child(sk, req, skb);
1401 	if (!newsk)
1402 		goto exit_nonewsk;
1403 
1404 	/*
1405 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1406 	 * count here, tcp_create_openreq_child now does this for us, see the
1407 	 * comment in that function for the gory details. -acme
1408 	 */
1409 
1410 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1411 	inet6_sk_rx_dst_set(newsk, skb);
1412 
1413 	newinet = inet_sk(newsk);
1414 	newinet->cork.fl.u.ip6 = fl6;
1415 	newinet->pinet6 = tcp_inet6_sk(newsk);
1416 	newinet->ipv6_fl_list = NULL;
1417 	newinet->inet_opt = NULL;
1418 
1419 	newtp = tcp_sk(newsk);
1420 	newnp = tcp_inet6_sk(newsk);
1421 
1422 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1423 
1424 	ip6_dst_store(newsk, dst, false, false);
1425 
1426 	newnp->saddr = ireq->ir_v6_loc_addr;
1427 
1428 	/* Now IPv6 options...
1429 
1430 	   First: no IPv4 options.
1431 	 */
1432 	newnp->ipv6_mc_list = NULL;
1433 	newnp->ipv6_ac_list = NULL;
1434 
1435 	/* Clone RX bits */
1436 	newnp->rxopt.all = np->rxopt.all;
1437 
1438 	newnp->pktoptions = NULL;
1439 	newnp->opt	  = NULL;
1440 	newnp->mcast_oif  = tcp_v6_iif(skb);
1441 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1442 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1443 	if (inet6_test_bit(REPFLOW, sk))
1444 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1445 
1446 	/* Set ToS of the new socket based upon the value of incoming SYN.
1447 	 * ECT bits are set later in tcp_init_transfer().
1448 	 */
1449 	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1450 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1451 
1452 	/* Clone native IPv6 options from listening socket (if any)
1453 
1454 	   Yes, keeping reference count would be much more clever,
1455 	   but we make one more one thing there: reattach optmem
1456 	   to newsk.
1457 	 */
1458 	opt = ireq->ipv6_opt;
1459 	if (!opt)
1460 		opt = rcu_dereference(np->opt);
1461 	if (opt) {
1462 		opt = ipv6_dup_options(newsk, opt);
1463 		RCU_INIT_POINTER(newnp->opt, opt);
1464 	}
1465 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1466 	if (opt)
1467 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1468 						    opt->opt_flen;
1469 
1470 	tcp_ca_openreq_child(newsk, dst);
1471 
1472 	tcp_sync_mss(newsk, dst6_mtu(dst));
1473 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1474 
1475 	tcp_initialize_rcv_mss(newsk);
1476 
1477 #ifdef CONFIG_TCP_MD5SIG
1478 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1479 
1480 	if (!tcp_rsk_used_ao(req)) {
1481 		/* Copy over the MD5 key from the original socket */
1482 		key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1483 		if (key) {
1484 			const union tcp_md5_addr *addr;
1485 
1486 			addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1487 			if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key))
1488 				goto put_and_exit;
1489 		}
1490 	}
1491 #endif
1492 #ifdef CONFIG_TCP_AO
1493 	/* Copy over tcp_ao_info if any */
1494 	if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6))
1495 		goto put_and_exit; /* OOM */
1496 #endif
1497 
1498 	if (__inet_inherit_port(sk, newsk) < 0)
1499 		goto put_and_exit;
1500 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1501 				       &found_dup_sk);
1502 	if (*own_req) {
1503 		tcp_move_syn(newtp, req);
1504 
1505 		/* Clone pktoptions received with SYN, if we own the req */
1506 		if (ireq->pktopts) {
1507 			newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1508 			consume_skb(ireq->pktopts);
1509 			ireq->pktopts = NULL;
1510 			if (newnp->pktoptions)
1511 				tcp_v6_restore_cb(newnp->pktoptions);
1512 		}
1513 	} else {
1514 		if (!req_unhash && found_dup_sk) {
1515 			/* This code path should only be executed in the
1516 			 * syncookie case only
1517 			 */
1518 			bh_unlock_sock(newsk);
1519 			sock_put(newsk);
1520 			newsk = NULL;
1521 		}
1522 	}
1523 
1524 	return newsk;
1525 
1526 exit_overflow:
1527 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1528 exit_nonewsk:
1529 	dst_release(dst);
1530 exit:
1531 	tcp_listendrop(sk);
1532 	return NULL;
1533 put_and_exit:
1534 	inet_csk_prepare_forced_close(newsk);
1535 	tcp_done(newsk);
1536 	goto exit;
1537 }
1538 
1539 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1540 							   u32));
1541 /* The socket must have it's spinlock held when we get
1542  * here, unless it is a TCP_LISTEN socket.
1543  *
1544  * We have a potential double-lock case here, so even when
1545  * doing backlog processing we use the BH locking scheme.
1546  * This is because we cannot sleep with the original spinlock
1547  * held.
1548  */
1549 INDIRECT_CALLABLE_SCOPE
1550 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1551 {
1552 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1553 	struct sk_buff *opt_skb = NULL;
1554 	enum skb_drop_reason reason;
1555 	struct tcp_sock *tp;
1556 
1557 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1558 	   goes to IPv4 receive handler and backlogged.
1559 	   From backlog it always goes here. Kerboom...
1560 	   Fortunately, tcp_rcv_established and rcv_established
1561 	   handle them correctly, but it is not case with
1562 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1563 	 */
1564 
1565 	if (skb->protocol == htons(ETH_P_IP))
1566 		return tcp_v4_do_rcv(sk, skb);
1567 
1568 	reason = psp_sk_rx_policy_check(sk, skb);
1569 	if (reason)
1570 		goto err_discard;
1571 
1572 	/*
1573 	 *	socket locking is here for SMP purposes as backlog rcv
1574 	 *	is currently called with bh processing disabled.
1575 	 */
1576 
1577 	/* Do Stevens' IPV6_PKTOPTIONS.
1578 
1579 	   Yes, guys, it is the only place in our code, where we
1580 	   may make it not affecting IPv4.
1581 	   The rest of code is protocol independent,
1582 	   and I do not like idea to uglify IPv4.
1583 
1584 	   Actually, all the idea behind IPV6_PKTOPTIONS
1585 	   looks not very well thought. For now we latch
1586 	   options, received in the last packet, enqueued
1587 	   by tcp. Feel free to propose better solution.
1588 					       --ANK (980728)
1589 	 */
1590 	if (np->rxopt.all && sk->sk_state != TCP_LISTEN)
1591 		opt_skb = skb_clone_and_charge_r(skb, sk);
1592 
1593 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1594 		struct dst_entry *dst;
1595 
1596 		dst = rcu_dereference_protected(sk->sk_rx_dst,
1597 						lockdep_sock_is_held(sk));
1598 
1599 		sock_rps_save_rxhash(sk, skb);
1600 		sk_mark_napi_id(sk, skb);
1601 		if (dst && unlikely(dst != skb_dst(skb))) {
1602 			if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1603 			    INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1604 					    dst, sk->sk_rx_dst_cookie) == NULL) {
1605 				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1606 				dst_release(dst);
1607 			}
1608 		}
1609 
1610 		tcp_rcv_established(sk, skb);
1611 		if (opt_skb)
1612 			goto ipv6_pktoptions;
1613 		return 0;
1614 	}
1615 
1616 	if (tcp_checksum_complete(skb))
1617 		goto csum_err;
1618 
1619 	if (sk->sk_state == TCP_LISTEN) {
1620 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1621 
1622 		if (!nsk)
1623 			return 0;
1624 		if (nsk != sk) {
1625 			reason = tcp_child_process(sk, nsk, skb);
1626 			sock_put(nsk);
1627 			if (reason)
1628 				goto reset;
1629 			return 0;
1630 		}
1631 	} else
1632 		sock_rps_save_rxhash(sk, skb);
1633 
1634 	reason = tcp_rcv_state_process(sk, skb);
1635 	if (reason)
1636 		goto reset;
1637 	if (opt_skb)
1638 		goto ipv6_pktoptions;
1639 	return 0;
1640 
1641 reset:
1642 	tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason));
1643 discard:
1644 	if (opt_skb)
1645 		__kfree_skb(opt_skb);
1646 	sk_skb_reason_drop(sk, skb, reason);
1647 	return 0;
1648 csum_err:
1649 	reason = SKB_DROP_REASON_TCP_CSUM;
1650 	trace_tcp_bad_csum(skb);
1651 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1652 err_discard:
1653 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1654 	goto discard;
1655 
1656 
1657 ipv6_pktoptions:
1658 	/* Do you ask, what is it?
1659 
1660 	   1. skb was enqueued by tcp.
1661 	   2. skb is added to tail of read queue, rather than out of order.
1662 	   3. socket is not in passive state.
1663 	   4. Finally, it really contains options, which user wants to receive.
1664 	 */
1665 	tp = tcp_sk(sk);
1666 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1667 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1668 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1669 			WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb));
1670 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1671 			WRITE_ONCE(np->mcast_hops,
1672 				   ipv6_hdr(opt_skb)->hop_limit);
1673 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1674 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1675 		if (inet6_test_bit(REPFLOW, sk))
1676 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1677 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1678 			tcp_v6_restore_cb(opt_skb);
1679 			opt_skb = xchg(&np->pktoptions, opt_skb);
1680 		} else {
1681 			__kfree_skb(opt_skb);
1682 			opt_skb = xchg(&np->pktoptions, NULL);
1683 		}
1684 	}
1685 
1686 	consume_skb(opt_skb);
1687 	return 0;
1688 }
1689 
1690 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1691 			   const struct tcphdr *th)
1692 {
1693 	/* This is tricky: we move IP6CB at its correct location into
1694 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1695 	 * _decode_session6() uses IP6CB().
1696 	 * barrier() makes sure compiler won't play aliasing games.
1697 	 */
1698 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1699 		sizeof(struct inet6_skb_parm));
1700 	barrier();
1701 
1702 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1703 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1704 				    skb->len - th->doff*4);
1705 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1706 	TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th);
1707 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1708 	TCP_SKB_CB(skb)->sacked = 0;
1709 	TCP_SKB_CB(skb)->has_rxtstamp =
1710 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1711 }
1712 
1713 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1714 {
1715 	struct net *net = dev_net_rcu(skb->dev);
1716 	enum skb_drop_reason drop_reason;
1717 	enum tcp_tw_status tw_status;
1718 	int sdif = inet6_sdif(skb);
1719 	int dif = inet6_iif(skb);
1720 	const struct tcphdr *th;
1721 	const struct ipv6hdr *hdr;
1722 	struct sock *sk = NULL;
1723 	bool refcounted;
1724 	int ret;
1725 	u32 isn;
1726 
1727 	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1728 	if (skb->pkt_type != PACKET_HOST)
1729 		goto discard_it;
1730 
1731 	/*
1732 	 *	Count it even if it's bad.
1733 	 */
1734 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1735 
1736 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1737 		goto discard_it;
1738 
1739 	th = (const struct tcphdr *)skb->data;
1740 
1741 	if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1742 		drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1743 		goto bad_packet;
1744 	}
1745 	if (!pskb_may_pull(skb, th->doff*4))
1746 		goto discard_it;
1747 
1748 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1749 		goto csum_error;
1750 
1751 	th = (const struct tcphdr *)skb->data;
1752 	hdr = ipv6_hdr(skb);
1753 
1754 lookup:
1755 	sk = __inet6_lookup_skb(skb, __tcp_hdrlen(th),
1756 				th->source, th->dest, inet6_iif(skb), sdif,
1757 				&refcounted);
1758 	if (!sk)
1759 		goto no_tcp_socket;
1760 
1761 	if (sk->sk_state == TCP_TIME_WAIT)
1762 		goto do_time_wait;
1763 
1764 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1765 		struct request_sock *req = inet_reqsk(sk);
1766 		bool req_stolen = false;
1767 		struct sock *nsk;
1768 
1769 		sk = req->rsk_listener;
1770 		if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1771 			drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1772 		else
1773 			drop_reason = tcp_inbound_hash(sk, req, skb,
1774 						       &hdr->saddr, &hdr->daddr,
1775 						       AF_INET6, dif, sdif);
1776 		if (drop_reason) {
1777 			sk_drops_skbadd(sk, skb);
1778 			reqsk_put(req);
1779 			goto discard_it;
1780 		}
1781 		if (tcp_checksum_complete(skb)) {
1782 			reqsk_put(req);
1783 			goto csum_error;
1784 		}
1785 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1786 			nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1787 			if (!nsk) {
1788 				inet_csk_reqsk_queue_drop_and_put(sk, req);
1789 				goto lookup;
1790 			}
1791 			sk = nsk;
1792 			/* reuseport_migrate_sock() has already held one sk_refcnt
1793 			 * before returning.
1794 			 */
1795 		} else {
1796 			sock_hold(sk);
1797 		}
1798 		refcounted = true;
1799 		nsk = NULL;
1800 		drop_reason = tcp_filter(sk, skb);
1801 		if (!drop_reason) {
1802 			th = (const struct tcphdr *)skb->data;
1803 			hdr = ipv6_hdr(skb);
1804 			tcp_v6_fill_cb(skb, hdr, th);
1805 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen,
1806 					    &drop_reason);
1807 		}
1808 		if (!nsk) {
1809 			reqsk_put(req);
1810 			if (req_stolen) {
1811 				/* Another cpu got exclusive access to req
1812 				 * and created a full blown socket.
1813 				 * Try to feed this packet to this socket
1814 				 * instead of discarding it.
1815 				 */
1816 				tcp_v6_restore_cb(skb);
1817 				sock_put(sk);
1818 				goto lookup;
1819 			}
1820 			goto discard_and_relse;
1821 		}
1822 		nf_reset_ct(skb);
1823 		if (nsk == sk) {
1824 			reqsk_put(req);
1825 			tcp_v6_restore_cb(skb);
1826 		} else {
1827 			drop_reason = tcp_child_process(sk, nsk, skb);
1828 			if (drop_reason) {
1829 				enum sk_rst_reason rst_reason;
1830 
1831 				rst_reason = sk_rst_convert_drop_reason(drop_reason);
1832 				tcp_v6_send_reset(nsk, skb, rst_reason);
1833 				sock_put(nsk);
1834 				goto discard_and_relse;
1835 			}
1836 			sock_put(nsk);
1837 			sock_put(sk);
1838 			return 0;
1839 		}
1840 	}
1841 
1842 process:
1843 	if (static_branch_unlikely(&ip6_min_hopcount)) {
1844 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1845 		if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1846 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1847 			drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1848 			goto discard_and_relse;
1849 		}
1850 	}
1851 
1852 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1853 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1854 		goto discard_and_relse;
1855 	}
1856 
1857 	drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr,
1858 				       AF_INET6, dif, sdif);
1859 	if (drop_reason)
1860 		goto discard_and_relse;
1861 
1862 	nf_reset_ct(skb);
1863 
1864 	drop_reason = tcp_filter(sk, skb);
1865 	if (drop_reason)
1866 		goto discard_and_relse;
1867 
1868 	th = (const struct tcphdr *)skb->data;
1869 	hdr = ipv6_hdr(skb);
1870 	tcp_v6_fill_cb(skb, hdr, th);
1871 
1872 	skb->dev = NULL;
1873 
1874 	if (sk->sk_state == TCP_LISTEN) {
1875 		ret = tcp_v6_do_rcv(sk, skb);
1876 		goto put_and_return;
1877 	}
1878 
1879 	sk_incoming_cpu_update(sk);
1880 
1881 	bh_lock_sock_nested(sk);
1882 	tcp_segs_in(tcp_sk(sk), skb);
1883 	ret = 0;
1884 	if (!sock_owned_by_user(sk)) {
1885 		ret = tcp_v6_do_rcv(sk, skb);
1886 	} else {
1887 		drop_reason = tcp_add_backlog(sk, skb);
1888 		if (drop_reason)
1889 			goto discard_and_relse;
1890 	}
1891 	bh_unlock_sock(sk);
1892 put_and_return:
1893 	if (refcounted)
1894 		sock_put(sk);
1895 	return ret ? -1 : 0;
1896 
1897 no_tcp_socket:
1898 	drop_reason = SKB_DROP_REASON_NO_SOCKET;
1899 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1900 		goto discard_it;
1901 
1902 	tcp_v6_fill_cb(skb, hdr, th);
1903 
1904 	if (tcp_checksum_complete(skb)) {
1905 csum_error:
1906 		drop_reason = SKB_DROP_REASON_TCP_CSUM;
1907 		trace_tcp_bad_csum(skb);
1908 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1909 bad_packet:
1910 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1911 	} else {
1912 		tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
1913 	}
1914 
1915 discard_it:
1916 	SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1917 	sk_skb_reason_drop(sk, skb, drop_reason);
1918 	return 0;
1919 
1920 discard_and_relse:
1921 	sk_drops_skbadd(sk, skb);
1922 	if (refcounted)
1923 		sock_put(sk);
1924 	goto discard_it;
1925 
1926 do_time_wait:
1927 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1928 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1929 		inet_twsk_put(inet_twsk(sk));
1930 		goto discard_it;
1931 	}
1932 
1933 	tcp_v6_fill_cb(skb, hdr, th);
1934 
1935 	if (tcp_checksum_complete(skb)) {
1936 		inet_twsk_put(inet_twsk(sk));
1937 		goto csum_error;
1938 	}
1939 
1940 	tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn,
1941 					       &drop_reason);
1942 	switch (tw_status) {
1943 	case TCP_TW_SYN:
1944 	{
1945 		struct sock *sk2;
1946 
1947 		sk2 = inet6_lookup_listener(net, skb, __tcp_hdrlen(th),
1948 					    &ipv6_hdr(skb)->saddr, th->source,
1949 					    &ipv6_hdr(skb)->daddr,
1950 					    ntohs(th->dest),
1951 					    tcp_v6_iif_l3_slave(skb),
1952 					    sdif);
1953 		if (sk2) {
1954 			struct inet_timewait_sock *tw = inet_twsk(sk);
1955 			inet_twsk_deschedule_put(tw);
1956 			sk = sk2;
1957 			tcp_v6_restore_cb(skb);
1958 			refcounted = false;
1959 			__this_cpu_write(tcp_tw_isn, isn);
1960 			goto process;
1961 		}
1962 
1963 		drop_reason = psp_twsk_rx_policy_check(inet_twsk(sk), skb);
1964 		if (drop_reason)
1965 			break;
1966 	}
1967 		/* to ACK */
1968 		fallthrough;
1969 	case TCP_TW_ACK:
1970 	case TCP_TW_ACK_OOW:
1971 		tcp_v6_timewait_ack(sk, skb, tw_status);
1972 		break;
1973 	case TCP_TW_RST:
1974 		tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
1975 		inet_twsk_deschedule_put(inet_twsk(sk));
1976 		goto discard_it;
1977 	case TCP_TW_SUCCESS:
1978 		;
1979 	}
1980 	goto discard_it;
1981 }
1982 
1983 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1984 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1985 };
1986 
1987 const struct inet_connection_sock_af_ops ipv6_specific = {
1988 	.queue_xmit	   = inet6_csk_xmit,
1989 	.rebuild_header	   = inet6_sk_rebuild_header,
1990 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1991 	.conn_request	   = tcp_v6_conn_request,
1992 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1993 	.net_header_len	   = sizeof(struct ipv6hdr),
1994 	.setsockopt	   = ipv6_setsockopt,
1995 	.getsockopt	   = ipv6_getsockopt,
1996 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1997 };
1998 
1999 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2000 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
2001 #ifdef CONFIG_TCP_MD5SIG
2002 	.md5_lookup	=	tcp_v6_md5_lookup,
2003 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
2004 	.md5_parse	=	tcp_v6_parse_md5_keys,
2005 #endif
2006 #ifdef CONFIG_TCP_AO
2007 	.ao_lookup	=	tcp_v6_ao_lookup,
2008 	.calc_ao_hash	=	tcp_v6_ao_hash_skb,
2009 	.ao_parse	=	tcp_v6_parse_ao,
2010 	.ao_calc_key_sk	=	tcp_v6_ao_calc_key_sk,
2011 #endif
2012 };
2013 #endif
2014 
2015 /*
2016  *	TCP over IPv4 via INET6 API
2017  */
2018 static const struct inet_connection_sock_af_ops ipv6_mapped = {
2019 	.queue_xmit	   = ip_queue_xmit,
2020 	.rebuild_header	   = inet_sk_rebuild_header,
2021 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
2022 	.conn_request	   = tcp_v6_conn_request,
2023 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
2024 	.net_header_len	   = sizeof(struct iphdr),
2025 	.setsockopt	   = ipv6_setsockopt,
2026 	.getsockopt	   = ipv6_getsockopt,
2027 	.mtu_reduced	   = tcp_v4_mtu_reduced,
2028 };
2029 
2030 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2031 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
2032 #ifdef CONFIG_TCP_MD5SIG
2033 	.md5_lookup	=	tcp_v4_md5_lookup,
2034 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
2035 	.md5_parse	=	tcp_v6_parse_md5_keys,
2036 #endif
2037 #ifdef CONFIG_TCP_AO
2038 	.ao_lookup	=	tcp_v6_ao_lookup,
2039 	.calc_ao_hash	=	tcp_v4_ao_hash_skb,
2040 	.ao_parse	=	tcp_v6_parse_ao,
2041 	.ao_calc_key_sk	=	tcp_v4_ao_calc_key_sk,
2042 #endif
2043 };
2044 
2045 static void tcp6_destruct_sock(struct sock *sk)
2046 {
2047 	tcp_md5_destruct_sock(sk);
2048 	tcp_ao_destroy_sock(sk, false);
2049 	inet6_sock_destruct(sk);
2050 }
2051 #endif
2052 
2053 /* NOTE: A lot of things set to zero explicitly by call to
2054  *       sk_alloc() so need not be done here.
2055  */
2056 static int tcp_v6_init_sock(struct sock *sk)
2057 {
2058 	struct inet_connection_sock *icsk = inet_csk(sk);
2059 
2060 	tcp_init_sock(sk);
2061 
2062 	icsk->icsk_af_ops = &ipv6_specific;
2063 
2064 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2065 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
2066 	sk->sk_destruct = tcp6_destruct_sock;
2067 #endif
2068 
2069 	return 0;
2070 }
2071 
2072 #ifdef CONFIG_PROC_FS
2073 /* Proc filesystem TCPv6 sock list dumping. */
2074 static void get_openreq6(struct seq_file *seq,
2075 			 const struct request_sock *req, int i)
2076 {
2077 	long ttd = req->rsk_timer.expires - jiffies;
2078 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
2079 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
2080 
2081 	if (ttd < 0)
2082 		ttd = 0;
2083 
2084 	seq_printf(seq,
2085 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2086 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
2087 		   i,
2088 		   src->s6_addr32[0], src->s6_addr32[1],
2089 		   src->s6_addr32[2], src->s6_addr32[3],
2090 		   inet_rsk(req)->ir_num,
2091 		   dest->s6_addr32[0], dest->s6_addr32[1],
2092 		   dest->s6_addr32[2], dest->s6_addr32[3],
2093 		   ntohs(inet_rsk(req)->ir_rmt_port),
2094 		   TCP_SYN_RECV,
2095 		   0, 0, /* could print option size, but that is af dependent. */
2096 		   1,   /* timers active (only the expire timer) */
2097 		   jiffies_to_clock_t(ttd),
2098 		   req->num_timeout,
2099 		   from_kuid_munged(seq_user_ns(seq),
2100 				    sk_uid(req->rsk_listener)),
2101 		   0,  /* non standard timer */
2102 		   0, /* open_requests have no inode */
2103 		   0, req);
2104 }
2105 
2106 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2107 {
2108 	const struct in6_addr *dest, *src;
2109 	__u16 destp, srcp;
2110 	int timer_active;
2111 	unsigned long timer_expires;
2112 	const struct inet_sock *inet = inet_sk(sp);
2113 	const struct tcp_sock *tp = tcp_sk(sp);
2114 	const struct inet_connection_sock *icsk = inet_csk(sp);
2115 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2116 	u8 icsk_pending;
2117 	int rx_queue;
2118 	int state;
2119 
2120 	dest  = &sp->sk_v6_daddr;
2121 	src   = &sp->sk_v6_rcv_saddr;
2122 	destp = ntohs(inet->inet_dport);
2123 	srcp  = ntohs(inet->inet_sport);
2124 
2125 	icsk_pending = smp_load_acquire(&icsk->icsk_pending);
2126 	if (icsk_pending == ICSK_TIME_RETRANS ||
2127 	    icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2128 	    icsk_pending == ICSK_TIME_LOSS_PROBE) {
2129 		timer_active	= 1;
2130 		timer_expires	= tcp_timeout_expires(sp);
2131 	} else if (icsk_pending == ICSK_TIME_PROBE0) {
2132 		timer_active	= 4;
2133 		timer_expires	= tcp_timeout_expires(sp);
2134 	} else if (timer_pending(&icsk->icsk_keepalive_timer)) {
2135 		timer_active	= 2;
2136 		timer_expires	= icsk->icsk_keepalive_timer.expires;
2137 	} else {
2138 		timer_active	= 0;
2139 		timer_expires = jiffies;
2140 	}
2141 
2142 	state = inet_sk_state_load(sp);
2143 	if (state == TCP_LISTEN)
2144 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2145 	else
2146 		/* Because we don't lock the socket,
2147 		 * we might find a transient negative value.
2148 		 */
2149 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2150 				      READ_ONCE(tp->copied_seq), 0);
2151 
2152 	seq_printf(seq,
2153 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2154 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %llu %d %pK %lu %lu %u %u %d\n",
2155 		   i,
2156 		   src->s6_addr32[0], src->s6_addr32[1],
2157 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2158 		   dest->s6_addr32[0], dest->s6_addr32[1],
2159 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2160 		   state,
2161 		   READ_ONCE(tp->write_seq) - tp->snd_una,
2162 		   rx_queue,
2163 		   timer_active,
2164 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2165 		   READ_ONCE(icsk->icsk_retransmits),
2166 		   from_kuid_munged(seq_user_ns(seq), sk_uid(sp)),
2167 		   READ_ONCE(icsk->icsk_probes_out),
2168 		   sock_i_ino(sp),
2169 		   refcount_read(&sp->sk_refcnt), sp,
2170 		   jiffies_to_clock_t(icsk->icsk_rto),
2171 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2172 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2173 		   tcp_snd_cwnd(tp),
2174 		   state == TCP_LISTEN ?
2175 			fastopenq->max_qlen :
2176 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2177 		   );
2178 }
2179 
2180 static void get_timewait6_sock(struct seq_file *seq,
2181 			       struct inet_timewait_sock *tw, int i)
2182 {
2183 	long delta = tw->tw_timer.expires - jiffies;
2184 	const struct in6_addr *dest, *src;
2185 	__u16 destp, srcp;
2186 
2187 	dest = &tw->tw_v6_daddr;
2188 	src  = &tw->tw_v6_rcv_saddr;
2189 	destp = ntohs(tw->tw_dport);
2190 	srcp  = ntohs(tw->tw_sport);
2191 
2192 	seq_printf(seq,
2193 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2194 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2195 		   i,
2196 		   src->s6_addr32[0], src->s6_addr32[1],
2197 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2198 		   dest->s6_addr32[0], dest->s6_addr32[1],
2199 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2200 		   READ_ONCE(tw->tw_substate), 0, 0,
2201 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2202 		   refcount_read(&tw->tw_refcnt), tw);
2203 }
2204 
2205 static int tcp6_seq_show(struct seq_file *seq, void *v)
2206 {
2207 	struct tcp_iter_state *st;
2208 	struct sock *sk = v;
2209 
2210 	if (v == SEQ_START_TOKEN) {
2211 		seq_puts(seq,
2212 			 "  sl  "
2213 			 "local_address                         "
2214 			 "remote_address                        "
2215 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2216 			 "   uid  timeout inode\n");
2217 		goto out;
2218 	}
2219 	st = seq->private;
2220 
2221 	if (sk->sk_state == TCP_TIME_WAIT)
2222 		get_timewait6_sock(seq, v, st->num);
2223 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2224 		get_openreq6(seq, v, st->num);
2225 	else
2226 		get_tcp6_sock(seq, v, st->num);
2227 out:
2228 	return 0;
2229 }
2230 
2231 static const struct seq_operations tcp6_seq_ops = {
2232 	.show		= tcp6_seq_show,
2233 	.start		= tcp_seq_start,
2234 	.next		= tcp_seq_next,
2235 	.stop		= tcp_seq_stop,
2236 };
2237 
2238 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2239 	.family		= AF_INET6,
2240 };
2241 
2242 int __net_init tcp6_proc_init(struct net *net)
2243 {
2244 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2245 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2246 		return -ENOMEM;
2247 	return 0;
2248 }
2249 
2250 void tcp6_proc_exit(struct net *net)
2251 {
2252 	remove_proc_entry("tcp6", net->proc_net);
2253 }
2254 #endif
2255 
2256 struct proto tcpv6_prot = {
2257 	.name			= "TCPv6",
2258 	.owner			= THIS_MODULE,
2259 	.close			= tcp_close,
2260 	.pre_connect		= tcp_v6_pre_connect,
2261 	.connect		= tcp_v6_connect,
2262 	.disconnect		= tcp_disconnect,
2263 	.accept			= inet_csk_accept,
2264 	.ioctl			= tcp_ioctl,
2265 	.init			= tcp_v6_init_sock,
2266 	.destroy		= tcp_v4_destroy_sock,
2267 	.shutdown		= tcp_shutdown,
2268 	.setsockopt		= tcp_setsockopt,
2269 	.getsockopt		= tcp_getsockopt,
2270 	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
2271 	.keepalive		= tcp_set_keepalive,
2272 	.recvmsg		= tcp_recvmsg,
2273 	.sendmsg		= tcp_sendmsg,
2274 	.splice_eof		= tcp_splice_eof,
2275 	.backlog_rcv		= tcp_v6_do_rcv,
2276 	.release_cb		= tcp_release_cb,
2277 	.hash			= inet_hash,
2278 	.unhash			= inet_unhash,
2279 	.get_port		= inet_csk_get_port,
2280 	.put_port		= inet_put_port,
2281 #ifdef CONFIG_BPF_SYSCALL
2282 	.psock_update_sk_prot	= tcp_bpf_update_proto,
2283 #endif
2284 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2285 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2286 	.stream_memory_free	= tcp_stream_memory_free,
2287 	.sockets_allocated	= &tcp_sockets_allocated,
2288 
2289 	.memory_allocated	= &net_aligned_data.tcp_memory_allocated,
2290 	.per_cpu_fw_alloc	= &tcp_memory_per_cpu_fw_alloc,
2291 
2292 	.memory_pressure	= &tcp_memory_pressure,
2293 	.sysctl_mem		= sysctl_tcp_mem,
2294 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2295 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2296 	.max_header		= MAX_TCP_HEADER,
2297 	.obj_size		= sizeof(struct tcp6_sock),
2298 	.freeptr_offset		= offsetof(struct tcp6_sock,
2299 					   tcp.inet_conn.icsk_inet.sk.sk_freeptr),
2300 	.ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2301 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2302 	.twsk_prot		= &tcp6_timewait_sock_ops,
2303 	.rsk_prot		= &tcp6_request_sock_ops,
2304 	.h.hashinfo		= NULL,
2305 	.no_autobind		= true,
2306 	.diag_destroy		= tcp_abort,
2307 };
2308 EXPORT_SYMBOL_GPL(tcpv6_prot);
2309 
2310 
2311 static struct inet_protosw tcpv6_protosw = {
2312 	.type		=	SOCK_STREAM,
2313 	.protocol	=	IPPROTO_TCP,
2314 	.prot		=	&tcpv6_prot,
2315 	.ops		=	&inet6_stream_ops,
2316 	.flags		=	INET_PROTOSW_PERMANENT |
2317 				INET_PROTOSW_ICSK,
2318 };
2319 
2320 static int __net_init tcpv6_net_init(struct net *net)
2321 {
2322 	int res;
2323 
2324 	res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2325 				   SOCK_RAW, IPPROTO_TCP, net);
2326 	if (!res)
2327 		net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC;
2328 
2329 	return res;
2330 }
2331 
2332 static void __net_exit tcpv6_net_exit(struct net *net)
2333 {
2334 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2335 }
2336 
2337 static struct pernet_operations tcpv6_net_ops = {
2338 	.init	    = tcpv6_net_init,
2339 	.exit	    = tcpv6_net_exit,
2340 };
2341 
2342 int __init tcpv6_init(void)
2343 {
2344 	int ret;
2345 
2346 	net_hotdata.tcpv6_protocol = (struct inet6_protocol) {
2347 		.handler     = tcp_v6_rcv,
2348 		.err_handler = tcp_v6_err,
2349 		.flags	     = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
2350 	};
2351 	ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
2352 	if (ret)
2353 		goto out;
2354 
2355 	/* register inet6 protocol */
2356 	ret = inet6_register_protosw(&tcpv6_protosw);
2357 	if (ret)
2358 		goto out_tcpv6_protocol;
2359 
2360 	ret = register_pernet_subsys(&tcpv6_net_ops);
2361 	if (ret)
2362 		goto out_tcpv6_protosw;
2363 
2364 	ret = mptcpv6_init();
2365 	if (ret)
2366 		goto out_tcpv6_pernet_subsys;
2367 
2368 out:
2369 	return ret;
2370 
2371 out_tcpv6_pernet_subsys:
2372 	unregister_pernet_subsys(&tcpv6_net_ops);
2373 out_tcpv6_protosw:
2374 	inet6_unregister_protosw(&tcpv6_protosw);
2375 out_tcpv6_protocol:
2376 	inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
2377 	goto out;
2378 }
2379 
2380 void tcpv6_exit(void)
2381 {
2382 	unregister_pernet_subsys(&tcpv6_net_ops);
2383 	inet6_unregister_protosw(&tcpv6_protosw);
2384 	inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
2385 }
2386