xref: /linux/net/ipv6/tcp_ipv6.c (revision 1a9239bb4253f9076b5b4b2a1a4e8d7defd77a95)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/hotdata.h>
62 #include <net/busy_poll.h>
63 #include <net/rstreason.h>
64 
65 #include <linux/proc_fs.h>
66 #include <linux/seq_file.h>
67 
68 #include <crypto/hash.h>
69 #include <linux/scatterlist.h>
70 
71 #include <trace/events/tcp.h>
72 
73 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
74 			      enum sk_rst_reason reason);
75 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
76 				      struct request_sock *req);
77 
78 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
79 
80 static const struct inet_connection_sock_af_ops ipv6_mapped;
81 const struct inet_connection_sock_af_ops ipv6_specific;
82 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
83 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
84 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
85 #endif
86 
87 /* Helper returning the inet6 address from a given tcp socket.
88  * It can be used in TCP stack instead of inet6_sk(sk).
89  * This avoids a dereference and allow compiler optimizations.
90  * It is a specialized version of inet6_sk_generic().
91  */
92 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
93 					      struct tcp6_sock, tcp)->inet6)
94 
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)95 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
96 {
97 	struct dst_entry *dst = skb_dst(skb);
98 
99 	if (dst && dst_hold_safe(dst)) {
100 		rcu_assign_pointer(sk->sk_rx_dst, dst);
101 		sk->sk_rx_dst_ifindex = skb->skb_iif;
102 		sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
103 	}
104 }
105 
tcp_v6_init_seq(const struct sk_buff * skb)106 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
107 {
108 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
109 				ipv6_hdr(skb)->saddr.s6_addr32,
110 				tcp_hdr(skb)->dest,
111 				tcp_hdr(skb)->source);
112 }
113 
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)114 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
115 {
116 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
117 				   ipv6_hdr(skb)->saddr.s6_addr32);
118 }
119 
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)120 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
121 			      int addr_len)
122 {
123 	/* This check is replicated from tcp_v6_connect() and intended to
124 	 * prevent BPF program called below from accessing bytes that are out
125 	 * of the bound specified by user in addr_len.
126 	 */
127 	if (addr_len < SIN6_LEN_RFC2133)
128 		return -EINVAL;
129 
130 	sock_owned_by_me(sk);
131 
132 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
133 }
134 
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)135 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
136 			  int addr_len)
137 {
138 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
139 	struct inet_connection_sock *icsk = inet_csk(sk);
140 	struct in6_addr *saddr = NULL, *final_p, final;
141 	struct inet_timewait_death_row *tcp_death_row;
142 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
143 	struct inet_sock *inet = inet_sk(sk);
144 	struct tcp_sock *tp = tcp_sk(sk);
145 	struct net *net = sock_net(sk);
146 	struct ipv6_txoptions *opt;
147 	struct dst_entry *dst;
148 	struct flowi6 fl6;
149 	int addr_type;
150 	int err;
151 
152 	if (addr_len < SIN6_LEN_RFC2133)
153 		return -EINVAL;
154 
155 	if (usin->sin6_family != AF_INET6)
156 		return -EAFNOSUPPORT;
157 
158 	memset(&fl6, 0, sizeof(fl6));
159 
160 	if (inet6_test_bit(SNDFLOW, sk)) {
161 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
162 		IP6_ECN_flow_init(fl6.flowlabel);
163 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
164 			struct ip6_flowlabel *flowlabel;
165 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
166 			if (IS_ERR(flowlabel))
167 				return -EINVAL;
168 			fl6_sock_release(flowlabel);
169 		}
170 	}
171 
172 	/*
173 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
174 	 */
175 
176 	if (ipv6_addr_any(&usin->sin6_addr)) {
177 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
178 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
179 					       &usin->sin6_addr);
180 		else
181 			usin->sin6_addr = in6addr_loopback;
182 	}
183 
184 	addr_type = ipv6_addr_type(&usin->sin6_addr);
185 
186 	if (addr_type & IPV6_ADDR_MULTICAST)
187 		return -ENETUNREACH;
188 
189 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
190 		if (addr_len >= sizeof(struct sockaddr_in6) &&
191 		    usin->sin6_scope_id) {
192 			/* If interface is set while binding, indices
193 			 * must coincide.
194 			 */
195 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
196 				return -EINVAL;
197 
198 			sk->sk_bound_dev_if = usin->sin6_scope_id;
199 		}
200 
201 		/* Connect to link-local address requires an interface */
202 		if (!sk->sk_bound_dev_if)
203 			return -EINVAL;
204 	}
205 
206 	if (tp->rx_opt.ts_recent_stamp &&
207 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
208 		tp->rx_opt.ts_recent = 0;
209 		tp->rx_opt.ts_recent_stamp = 0;
210 		WRITE_ONCE(tp->write_seq, 0);
211 	}
212 
213 	sk->sk_v6_daddr = usin->sin6_addr;
214 	np->flow_label = fl6.flowlabel;
215 
216 	/*
217 	 *	TCP over IPv4
218 	 */
219 
220 	if (addr_type & IPV6_ADDR_MAPPED) {
221 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
222 		struct sockaddr_in sin;
223 
224 		if (ipv6_only_sock(sk))
225 			return -ENETUNREACH;
226 
227 		sin.sin_family = AF_INET;
228 		sin.sin_port = usin->sin6_port;
229 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
230 
231 		/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
232 		WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
233 		if (sk_is_mptcp(sk))
234 			mptcpv6_handle_mapped(sk, true);
235 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
236 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
237 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
238 #endif
239 
240 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
241 
242 		if (err) {
243 			icsk->icsk_ext_hdr_len = exthdrlen;
244 			/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
245 			WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
246 			if (sk_is_mptcp(sk))
247 				mptcpv6_handle_mapped(sk, false);
248 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
249 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
250 			tp->af_specific = &tcp_sock_ipv6_specific;
251 #endif
252 			goto failure;
253 		}
254 		np->saddr = sk->sk_v6_rcv_saddr;
255 
256 		return err;
257 	}
258 
259 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
260 		saddr = &sk->sk_v6_rcv_saddr;
261 
262 	fl6.flowi6_proto = IPPROTO_TCP;
263 	fl6.daddr = sk->sk_v6_daddr;
264 	fl6.saddr = saddr ? *saddr : np->saddr;
265 	fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
266 	fl6.flowi6_oif = sk->sk_bound_dev_if;
267 	fl6.flowi6_mark = sk->sk_mark;
268 	fl6.fl6_dport = usin->sin6_port;
269 	fl6.fl6_sport = inet->inet_sport;
270 	fl6.flowi6_uid = sk->sk_uid;
271 
272 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
273 	final_p = fl6_update_dst(&fl6, opt, &final);
274 
275 	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
276 
277 	dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
278 	if (IS_ERR(dst)) {
279 		err = PTR_ERR(dst);
280 		goto failure;
281 	}
282 
283 	tp->tcp_usec_ts = dst_tcp_usec_ts(dst);
284 	tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
285 
286 	if (!saddr) {
287 		saddr = &fl6.saddr;
288 
289 		err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
290 		if (err)
291 			goto failure;
292 	}
293 
294 	/* set the source address */
295 	np->saddr = *saddr;
296 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297 
298 	sk->sk_gso_type = SKB_GSO_TCPV6;
299 	ip6_dst_store(sk, dst, NULL, NULL);
300 
301 	icsk->icsk_ext_hdr_len = 0;
302 	if (opt)
303 		icsk->icsk_ext_hdr_len = opt->opt_flen +
304 					 opt->opt_nflen;
305 
306 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307 
308 	inet->inet_dport = usin->sin6_port;
309 
310 	tcp_set_state(sk, TCP_SYN_SENT);
311 	err = inet6_hash_connect(tcp_death_row, sk);
312 	if (err)
313 		goto late_failure;
314 
315 	sk_set_txhash(sk);
316 
317 	if (likely(!tp->repair)) {
318 		if (!tp->write_seq)
319 			WRITE_ONCE(tp->write_seq,
320 				   secure_tcpv6_seq(np->saddr.s6_addr32,
321 						    sk->sk_v6_daddr.s6_addr32,
322 						    inet->inet_sport,
323 						    inet->inet_dport));
324 		tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
325 						   sk->sk_v6_daddr.s6_addr32);
326 	}
327 
328 	if (tcp_fastopen_defer_connect(sk, &err))
329 		return err;
330 	if (err)
331 		goto late_failure;
332 
333 	err = tcp_connect(sk);
334 	if (err)
335 		goto late_failure;
336 
337 	return 0;
338 
339 late_failure:
340 	tcp_set_state(sk, TCP_CLOSE);
341 	inet_bhash2_reset_saddr(sk);
342 failure:
343 	inet->inet_dport = 0;
344 	sk->sk_route_caps = 0;
345 	return err;
346 }
347 
tcp_v6_mtu_reduced(struct sock * sk)348 static void tcp_v6_mtu_reduced(struct sock *sk)
349 {
350 	struct dst_entry *dst;
351 	u32 mtu;
352 
353 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
354 		return;
355 
356 	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
357 
358 	/* Drop requests trying to increase our current mss.
359 	 * Check done in __ip6_rt_update_pmtu() is too late.
360 	 */
361 	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
362 		return;
363 
364 	dst = inet6_csk_update_pmtu(sk, mtu);
365 	if (!dst)
366 		return;
367 
368 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
369 		tcp_sync_mss(sk, dst_mtu(dst));
370 		tcp_simple_retransmit(sk);
371 	}
372 }
373 
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)374 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
375 		u8 type, u8 code, int offset, __be32 info)
376 {
377 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
378 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
379 	struct net *net = dev_net_rcu(skb->dev);
380 	struct request_sock *fastopen;
381 	struct ipv6_pinfo *np;
382 	struct tcp_sock *tp;
383 	__u32 seq, snd_una;
384 	struct sock *sk;
385 	bool fatal;
386 	int err;
387 
388 	sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
389 					&hdr->daddr, th->dest,
390 					&hdr->saddr, ntohs(th->source),
391 					skb->dev->ifindex, inet6_sdif(skb));
392 
393 	if (!sk) {
394 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
395 				  ICMP6_MIB_INERRORS);
396 		return -ENOENT;
397 	}
398 
399 	if (sk->sk_state == TCP_TIME_WAIT) {
400 		/* To increase the counter of ignored icmps for TCP-AO */
401 		tcp_ao_ignore_icmp(sk, AF_INET6, type, code);
402 		inet_twsk_put(inet_twsk(sk));
403 		return 0;
404 	}
405 	seq = ntohl(th->seq);
406 	fatal = icmpv6_err_convert(type, code, &err);
407 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
408 		tcp_req_err(sk, seq, fatal);
409 		return 0;
410 	}
411 
412 	if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) {
413 		sock_put(sk);
414 		return 0;
415 	}
416 
417 	bh_lock_sock(sk);
418 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
419 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
420 
421 	if (sk->sk_state == TCP_CLOSE)
422 		goto out;
423 
424 	if (static_branch_unlikely(&ip6_min_hopcount)) {
425 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
426 		if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
427 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
428 			goto out;
429 		}
430 	}
431 
432 	tp = tcp_sk(sk);
433 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
434 	fastopen = rcu_dereference(tp->fastopen_rsk);
435 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
436 	if (sk->sk_state != TCP_LISTEN &&
437 	    !between(seq, snd_una, tp->snd_nxt)) {
438 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
439 		goto out;
440 	}
441 
442 	np = tcp_inet6_sk(sk);
443 
444 	if (type == NDISC_REDIRECT) {
445 		if (!sock_owned_by_user(sk)) {
446 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
447 
448 			if (dst)
449 				dst->ops->redirect(dst, sk, skb);
450 		}
451 		goto out;
452 	}
453 
454 	if (type == ICMPV6_PKT_TOOBIG) {
455 		u32 mtu = ntohl(info);
456 
457 		/* We are not interested in TCP_LISTEN and open_requests
458 		 * (SYN-ACKs send out by Linux are always <576bytes so
459 		 * they should go through unfragmented).
460 		 */
461 		if (sk->sk_state == TCP_LISTEN)
462 			goto out;
463 
464 		if (!ip6_sk_accept_pmtu(sk))
465 			goto out;
466 
467 		if (mtu < IPV6_MIN_MTU)
468 			goto out;
469 
470 		WRITE_ONCE(tp->mtu_info, mtu);
471 
472 		if (!sock_owned_by_user(sk))
473 			tcp_v6_mtu_reduced(sk);
474 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
475 					   &sk->sk_tsq_flags))
476 			sock_hold(sk);
477 		goto out;
478 	}
479 
480 
481 	/* Might be for an request_sock */
482 	switch (sk->sk_state) {
483 	case TCP_SYN_SENT:
484 	case TCP_SYN_RECV:
485 		/* Only in fast or simultaneous open. If a fast open socket is
486 		 * already accepted it is treated as a connected one below.
487 		 */
488 		if (fastopen && !fastopen->sk)
489 			break;
490 
491 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
492 
493 		if (!sock_owned_by_user(sk))
494 			tcp_done_with_error(sk, err);
495 		else
496 			WRITE_ONCE(sk->sk_err_soft, err);
497 		goto out;
498 	case TCP_LISTEN:
499 		break;
500 	default:
501 		/* check if this ICMP message allows revert of backoff.
502 		 * (see RFC 6069)
503 		 */
504 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
505 		    code == ICMPV6_NOROUTE)
506 			tcp_ld_RTO_revert(sk, seq);
507 	}
508 
509 	if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
510 		WRITE_ONCE(sk->sk_err, err);
511 		sk_error_report(sk);
512 	} else {
513 		WRITE_ONCE(sk->sk_err_soft, err);
514 	}
515 out:
516 	bh_unlock_sock(sk);
517 	sock_put(sk);
518 	return 0;
519 }
520 
521 
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)522 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
523 			      struct flowi *fl,
524 			      struct request_sock *req,
525 			      struct tcp_fastopen_cookie *foc,
526 			      enum tcp_synack_type synack_type,
527 			      struct sk_buff *syn_skb)
528 {
529 	struct inet_request_sock *ireq = inet_rsk(req);
530 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
531 	struct ipv6_txoptions *opt;
532 	struct flowi6 *fl6 = &fl->u.ip6;
533 	struct sk_buff *skb;
534 	int err = -ENOMEM;
535 	u8 tclass;
536 
537 	/* First, grab a route. */
538 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
539 					       IPPROTO_TCP)) == NULL)
540 		goto done;
541 
542 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
543 
544 	if (skb) {
545 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
546 				    &ireq->ir_v6_rmt_addr);
547 
548 		fl6->daddr = ireq->ir_v6_rmt_addr;
549 		if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts)
550 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
551 
552 		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
553 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
554 				(np->tclass & INET_ECN_MASK) :
555 				np->tclass;
556 
557 		if (!INET_ECN_is_capable(tclass) &&
558 		    tcp_bpf_ca_needs_ecn((struct sock *)req))
559 			tclass |= INET_ECN_ECT_0;
560 
561 		rcu_read_lock();
562 		opt = ireq->ipv6_opt;
563 		if (!opt)
564 			opt = rcu_dereference(np->opt);
565 		err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
566 			       opt, tclass, READ_ONCE(sk->sk_priority));
567 		rcu_read_unlock();
568 		err = net_xmit_eval(err);
569 	}
570 
571 done:
572 	return err;
573 }
574 
575 
tcp_v6_reqsk_destructor(struct request_sock * req)576 static void tcp_v6_reqsk_destructor(struct request_sock *req)
577 {
578 	kfree(inet_rsk(req)->ipv6_opt);
579 	consume_skb(inet_rsk(req)->pktopts);
580 }
581 
582 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)583 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
584 						   const struct in6_addr *addr,
585 						   int l3index)
586 {
587 	return tcp_md5_do_lookup(sk, l3index,
588 				 (union tcp_md5_addr *)addr, AF_INET6);
589 }
590 
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)591 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
592 						const struct sock *addr_sk)
593 {
594 	int l3index;
595 
596 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
597 						 addr_sk->sk_bound_dev_if);
598 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
599 				    l3index);
600 }
601 
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)602 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
603 				 sockptr_t optval, int optlen)
604 {
605 	struct tcp_md5sig cmd;
606 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
607 	union tcp_ao_addr *addr;
608 	int l3index = 0;
609 	u8 prefixlen;
610 	bool l3flag;
611 	u8 flags;
612 
613 	if (optlen < sizeof(cmd))
614 		return -EINVAL;
615 
616 	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
617 		return -EFAULT;
618 
619 	if (sin6->sin6_family != AF_INET6)
620 		return -EINVAL;
621 
622 	flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
623 	l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
624 
625 	if (optname == TCP_MD5SIG_EXT &&
626 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
627 		prefixlen = cmd.tcpm_prefixlen;
628 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
629 					prefixlen > 32))
630 			return -EINVAL;
631 	} else {
632 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
633 	}
634 
635 	if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
636 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
637 		struct net_device *dev;
638 
639 		rcu_read_lock();
640 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
641 		if (dev && netif_is_l3_master(dev))
642 			l3index = dev->ifindex;
643 		rcu_read_unlock();
644 
645 		/* ok to reference set/not set outside of rcu;
646 		 * right now device MUST be an L3 master
647 		 */
648 		if (!dev || !l3index)
649 			return -EINVAL;
650 	}
651 
652 	if (!cmd.tcpm_keylen) {
653 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
654 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
655 					      AF_INET, prefixlen,
656 					      l3index, flags);
657 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
658 				      AF_INET6, prefixlen, l3index, flags);
659 	}
660 
661 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
662 		return -EINVAL;
663 
664 	if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
665 		addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3];
666 
667 		/* Don't allow keys for peers that have a matching TCP-AO key.
668 		 * See the comment in tcp_ao_add_cmd()
669 		 */
670 		if (tcp_ao_required(sk, addr, AF_INET,
671 				    l3flag ? l3index : -1, false))
672 			return -EKEYREJECTED;
673 		return tcp_md5_do_add(sk, addr,
674 				      AF_INET, prefixlen, l3index, flags,
675 				      cmd.tcpm_key, cmd.tcpm_keylen);
676 	}
677 
678 	addr = (union tcp_md5_addr *)&sin6->sin6_addr;
679 
680 	/* Don't allow keys for peers that have a matching TCP-AO key.
681 	 * See the comment in tcp_ao_add_cmd()
682 	 */
683 	if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false))
684 		return -EKEYREJECTED;
685 
686 	return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags,
687 			      cmd.tcpm_key, cmd.tcpm_keylen);
688 }
689 
tcp_v6_md5_hash_headers(struct tcp_sigpool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)690 static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp,
691 				   const struct in6_addr *daddr,
692 				   const struct in6_addr *saddr,
693 				   const struct tcphdr *th, int nbytes)
694 {
695 	struct tcp6_pseudohdr *bp;
696 	struct scatterlist sg;
697 	struct tcphdr *_th;
698 
699 	bp = hp->scratch;
700 	/* 1. TCP pseudo-header (RFC2460) */
701 	bp->saddr = *saddr;
702 	bp->daddr = *daddr;
703 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
704 	bp->len = cpu_to_be32(nbytes);
705 
706 	_th = (struct tcphdr *)(bp + 1);
707 	memcpy(_th, th, sizeof(*th));
708 	_th->check = 0;
709 
710 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
711 	ahash_request_set_crypt(hp->req, &sg, NULL,
712 				sizeof(*bp) + sizeof(*th));
713 	return crypto_ahash_update(hp->req);
714 }
715 
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)716 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
717 			       const struct in6_addr *daddr, struct in6_addr *saddr,
718 			       const struct tcphdr *th)
719 {
720 	struct tcp_sigpool hp;
721 
722 	if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
723 		goto clear_hash_nostart;
724 
725 	if (crypto_ahash_init(hp.req))
726 		goto clear_hash;
727 	if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2))
728 		goto clear_hash;
729 	if (tcp_md5_hash_key(&hp, key))
730 		goto clear_hash;
731 	ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
732 	if (crypto_ahash_final(hp.req))
733 		goto clear_hash;
734 
735 	tcp_sigpool_end(&hp);
736 	return 0;
737 
738 clear_hash:
739 	tcp_sigpool_end(&hp);
740 clear_hash_nostart:
741 	memset(md5_hash, 0, 16);
742 	return 1;
743 }
744 
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)745 static int tcp_v6_md5_hash_skb(char *md5_hash,
746 			       const struct tcp_md5sig_key *key,
747 			       const struct sock *sk,
748 			       const struct sk_buff *skb)
749 {
750 	const struct tcphdr *th = tcp_hdr(skb);
751 	const struct in6_addr *saddr, *daddr;
752 	struct tcp_sigpool hp;
753 
754 	if (sk) { /* valid for establish/request sockets */
755 		saddr = &sk->sk_v6_rcv_saddr;
756 		daddr = &sk->sk_v6_daddr;
757 	} else {
758 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
759 		saddr = &ip6h->saddr;
760 		daddr = &ip6h->daddr;
761 	}
762 
763 	if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
764 		goto clear_hash_nostart;
765 
766 	if (crypto_ahash_init(hp.req))
767 		goto clear_hash;
768 
769 	if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len))
770 		goto clear_hash;
771 	if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2))
772 		goto clear_hash;
773 	if (tcp_md5_hash_key(&hp, key))
774 		goto clear_hash;
775 	ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
776 	if (crypto_ahash_final(hp.req))
777 		goto clear_hash;
778 
779 	tcp_sigpool_end(&hp);
780 	return 0;
781 
782 clear_hash:
783 	tcp_sigpool_end(&hp);
784 clear_hash_nostart:
785 	memset(md5_hash, 0, 16);
786 	return 1;
787 }
788 #endif
789 
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb,u32 tw_isn)790 static void tcp_v6_init_req(struct request_sock *req,
791 			    const struct sock *sk_listener,
792 			    struct sk_buff *skb,
793 			    u32 tw_isn)
794 {
795 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
796 	struct inet_request_sock *ireq = inet_rsk(req);
797 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
798 
799 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
800 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
801 	ireq->ir_rmt_addr = LOOPBACK4_IPV6;
802 	ireq->ir_loc_addr = LOOPBACK4_IPV6;
803 
804 	/* So that link locals have meaning */
805 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
806 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
807 		ireq->ir_iif = tcp_v6_iif(skb);
808 
809 	if (!tw_isn &&
810 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
811 	     np->rxopt.bits.rxinfo ||
812 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
813 	     np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) {
814 		refcount_inc(&skb->users);
815 		ireq->pktopts = skb;
816 	}
817 }
818 
tcp_v6_route_req(const struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct request_sock * req,u32 tw_isn)819 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
820 					  struct sk_buff *skb,
821 					  struct flowi *fl,
822 					  struct request_sock *req,
823 					  u32 tw_isn)
824 {
825 	tcp_v6_init_req(req, sk, skb, tw_isn);
826 
827 	if (security_inet_conn_request(sk, skb, req))
828 		return NULL;
829 
830 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
831 }
832 
833 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
834 	.family		=	AF_INET6,
835 	.obj_size	=	sizeof(struct tcp6_request_sock),
836 	.rtx_syn_ack	=	tcp_rtx_synack,
837 	.send_ack	=	tcp_v6_reqsk_send_ack,
838 	.destructor	=	tcp_v6_reqsk_destructor,
839 	.send_reset	=	tcp_v6_send_reset,
840 	.syn_ack_timeout =	tcp_syn_ack_timeout,
841 };
842 
843 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
844 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
845 				sizeof(struct ipv6hdr),
846 #ifdef CONFIG_TCP_MD5SIG
847 	.req_md5_lookup	=	tcp_v6_md5_lookup,
848 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
849 #endif
850 #ifdef CONFIG_TCP_AO
851 	.ao_lookup	=	tcp_v6_ao_lookup_rsk,
852 	.ao_calc_key	=	tcp_v6_ao_calc_key_rsk,
853 	.ao_synack_hash =	tcp_v6_ao_synack_hash,
854 #endif
855 #ifdef CONFIG_SYN_COOKIES
856 	.cookie_init_seq =	cookie_v6_init_sequence,
857 #endif
858 	.route_req	=	tcp_v6_route_req,
859 	.init_seq	=	tcp_v6_init_seq,
860 	.init_ts_off	=	tcp_v6_init_ts_off,
861 	.send_synack	=	tcp_v6_send_synack,
862 };
863 
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,int rst,u8 tclass,__be32 label,u32 priority,u32 txhash,struct tcp_key * key)864 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
865 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
866 				 int oif, int rst, u8 tclass, __be32 label,
867 				 u32 priority, u32 txhash, struct tcp_key *key)
868 {
869 	struct net *net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev);
870 	unsigned int tot_len = sizeof(struct tcphdr);
871 	struct sock *ctl_sk = net->ipv6.tcp_sk;
872 	const struct tcphdr *th = tcp_hdr(skb);
873 	__be32 mrst = 0, *topt;
874 	struct dst_entry *dst;
875 	struct sk_buff *buff;
876 	struct tcphdr *t1;
877 	struct flowi6 fl6;
878 	u32 mark = 0;
879 
880 	if (tsecr)
881 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
882 	if (tcp_key_is_md5(key))
883 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
884 	if (tcp_key_is_ao(key))
885 		tot_len += tcp_ao_len_aligned(key->ao_key);
886 
887 #ifdef CONFIG_MPTCP
888 	if (rst && !tcp_key_is_md5(key)) {
889 		mrst = mptcp_reset_option(skb);
890 
891 		if (mrst)
892 			tot_len += sizeof(__be32);
893 	}
894 #endif
895 
896 	buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
897 	if (!buff)
898 		return;
899 
900 	skb_reserve(buff, MAX_TCP_HEADER);
901 
902 	t1 = skb_push(buff, tot_len);
903 	skb_reset_transport_header(buff);
904 
905 	/* Swap the send and the receive. */
906 	memset(t1, 0, sizeof(*t1));
907 	t1->dest = th->source;
908 	t1->source = th->dest;
909 	t1->doff = tot_len / 4;
910 	t1->seq = htonl(seq);
911 	t1->ack_seq = htonl(ack);
912 	t1->ack = !rst || !th->ack;
913 	t1->rst = rst;
914 	t1->window = htons(win);
915 
916 	topt = (__be32 *)(t1 + 1);
917 
918 	if (tsecr) {
919 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
920 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
921 		*topt++ = htonl(tsval);
922 		*topt++ = htonl(tsecr);
923 	}
924 
925 	if (mrst)
926 		*topt++ = mrst;
927 
928 #ifdef CONFIG_TCP_MD5SIG
929 	if (tcp_key_is_md5(key)) {
930 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
931 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
932 		tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key,
933 				    &ipv6_hdr(skb)->saddr,
934 				    &ipv6_hdr(skb)->daddr, t1);
935 	}
936 #endif
937 #ifdef CONFIG_TCP_AO
938 	if (tcp_key_is_ao(key)) {
939 		*topt++ = htonl((TCPOPT_AO << 24) |
940 				(tcp_ao_len(key->ao_key) << 16) |
941 				(key->ao_key->sndid << 8) |
942 				(key->rcv_next));
943 
944 		tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key,
945 				key->traffic_key,
946 				(union tcp_ao_addr *)&ipv6_hdr(skb)->saddr,
947 				(union tcp_ao_addr *)&ipv6_hdr(skb)->daddr,
948 				t1, key->sne);
949 	}
950 #endif
951 
952 	memset(&fl6, 0, sizeof(fl6));
953 	fl6.daddr = ipv6_hdr(skb)->saddr;
954 	fl6.saddr = ipv6_hdr(skb)->daddr;
955 	fl6.flowlabel = label;
956 
957 	buff->ip_summed = CHECKSUM_PARTIAL;
958 
959 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
960 
961 	fl6.flowi6_proto = IPPROTO_TCP;
962 	if (rt6_need_strict(&fl6.daddr) && !oif)
963 		fl6.flowi6_oif = tcp_v6_iif(skb);
964 	else {
965 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
966 			oif = skb->skb_iif;
967 
968 		fl6.flowi6_oif = oif;
969 	}
970 
971 	if (sk) {
972 		/* unconstify the socket only to attach it to buff with care. */
973 		skb_set_owner_edemux(buff, (struct sock *)sk);
974 
975 		if (sk->sk_state == TCP_TIME_WAIT)
976 			mark = inet_twsk(sk)->tw_mark;
977 		else
978 			mark = READ_ONCE(sk->sk_mark);
979 		skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC);
980 	}
981 	if (txhash) {
982 		/* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
983 		skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
984 	}
985 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
986 	fl6.fl6_dport = t1->dest;
987 	fl6.fl6_sport = t1->source;
988 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
989 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
990 
991 	/* Pass a socket to ip6_dst_lookup either it is for RST
992 	 * Underlying function will use this to retrieve the network
993 	 * namespace
994 	 */
995 	if (sk && sk->sk_state != TCP_TIME_WAIT)
996 		dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
997 	else
998 		dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
999 	if (!IS_ERR(dst)) {
1000 		skb_dst_set(buff, dst);
1001 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1002 			 tclass, priority);
1003 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1004 		if (rst)
1005 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1006 		return;
1007 	}
1008 
1009 	kfree_skb(buff);
1010 }
1011 
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb,enum sk_rst_reason reason)1012 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
1013 			      enum sk_rst_reason reason)
1014 {
1015 	const struct tcphdr *th = tcp_hdr(skb);
1016 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1017 	const __u8 *md5_hash_location = NULL;
1018 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1019 	bool allocated_traffic_key = false;
1020 #endif
1021 	const struct tcp_ao_hdr *aoh;
1022 	struct tcp_key key = {};
1023 	u32 seq = 0, ack_seq = 0;
1024 	__be32 label = 0;
1025 	u32 priority = 0;
1026 	struct net *net;
1027 	u32 txhash = 0;
1028 	int oif = 0;
1029 #ifdef CONFIG_TCP_MD5SIG
1030 	unsigned char newhash[16];
1031 	int genhash;
1032 	struct sock *sk1 = NULL;
1033 #endif
1034 
1035 	if (th->rst)
1036 		return;
1037 
1038 	/* If sk not NULL, it means we did a successful lookup and incoming
1039 	 * route had to be correct. prequeue might have dropped our dst.
1040 	 */
1041 	if (!sk && !ipv6_unicast_destination(skb))
1042 		return;
1043 
1044 	net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev);
1045 	/* Invalid TCP option size or twice included auth */
1046 	if (tcp_parse_auth_options(th, &md5_hash_location, &aoh))
1047 		return;
1048 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1049 	rcu_read_lock();
1050 #endif
1051 #ifdef CONFIG_TCP_MD5SIG
1052 	if (sk && sk_fullsock(sk)) {
1053 		int l3index;
1054 
1055 		/* sdif set, means packet ingressed via a device
1056 		 * in an L3 domain and inet_iif is set to it.
1057 		 */
1058 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1059 		key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1060 		if (key.md5_key)
1061 			key.type = TCP_KEY_MD5;
1062 	} else if (md5_hash_location) {
1063 		int dif = tcp_v6_iif_l3_slave(skb);
1064 		int sdif = tcp_v6_sdif(skb);
1065 		int l3index;
1066 
1067 		/*
1068 		 * active side is lost. Try to find listening socket through
1069 		 * source port, and then find md5 key through listening socket.
1070 		 * we are not loose security here:
1071 		 * Incoming packet is checked with md5 hash with finding key,
1072 		 * no RST generated if md5 hash doesn't match.
1073 		 */
1074 		sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1075 					    NULL, 0, &ipv6h->saddr, th->source,
1076 					    &ipv6h->daddr, ntohs(th->source),
1077 					    dif, sdif);
1078 		if (!sk1)
1079 			goto out;
1080 
1081 		/* sdif set, means packet ingressed via a device
1082 		 * in an L3 domain and dif is set to it.
1083 		 */
1084 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1085 
1086 		key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1087 		if (!key.md5_key)
1088 			goto out;
1089 		key.type = TCP_KEY_MD5;
1090 
1091 		genhash = tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb);
1092 		if (genhash || memcmp(md5_hash_location, newhash, 16) != 0)
1093 			goto out;
1094 	}
1095 #endif
1096 
1097 	if (th->ack)
1098 		seq = ntohl(th->ack_seq);
1099 	else
1100 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1101 			  (th->doff << 2);
1102 
1103 #ifdef CONFIG_TCP_AO
1104 	if (aoh) {
1105 		int l3index;
1106 
1107 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1108 		if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq,
1109 					 &key.ao_key, &key.traffic_key,
1110 					 &allocated_traffic_key,
1111 					 &key.rcv_next, &key.sne))
1112 			goto out;
1113 		key.type = TCP_KEY_AO;
1114 	}
1115 #endif
1116 
1117 	if (sk) {
1118 		oif = sk->sk_bound_dev_if;
1119 		if (sk_fullsock(sk)) {
1120 			if (inet6_test_bit(REPFLOW, sk))
1121 				label = ip6_flowlabel(ipv6h);
1122 			priority = READ_ONCE(sk->sk_priority);
1123 			txhash = sk->sk_txhash;
1124 		}
1125 		if (sk->sk_state == TCP_TIME_WAIT) {
1126 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1127 			priority = inet_twsk(sk)->tw_priority;
1128 			txhash = inet_twsk(sk)->tw_txhash;
1129 		}
1130 	} else {
1131 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1132 			label = ip6_flowlabel(ipv6h);
1133 	}
1134 
1135 	trace_tcp_send_reset(sk, skb, reason);
1136 
1137 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
1138 			     ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK,
1139 			     label, priority, txhash,
1140 			     &key);
1141 
1142 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1143 out:
1144 	if (allocated_traffic_key)
1145 		kfree(key.traffic_key);
1146 	rcu_read_unlock();
1147 #endif
1148 }
1149 
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_key * key,u8 tclass,__be32 label,u32 priority,u32 txhash)1150 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1151 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1152 			    struct tcp_key *key, u8 tclass,
1153 			    __be32 label, u32 priority, u32 txhash)
1154 {
1155 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0,
1156 			     tclass, label, priority, txhash, key);
1157 }
1158 
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb,enum tcp_tw_status tw_status)1159 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb,
1160 				enum tcp_tw_status tw_status)
1161 {
1162 	struct inet_timewait_sock *tw = inet_twsk(sk);
1163 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1164 	u8 tclass = tw->tw_tclass;
1165 	struct tcp_key key = {};
1166 
1167 	if (tw_status == TCP_TW_ACK_OOW)
1168 		tclass &= ~INET_ECN_MASK;
1169 #ifdef CONFIG_TCP_AO
1170 	struct tcp_ao_info *ao_info;
1171 
1172 	if (static_branch_unlikely(&tcp_ao_needed.key)) {
1173 
1174 		/* FIXME: the segment to-be-acked is not verified yet */
1175 		ao_info = rcu_dereference(tcptw->ao_info);
1176 		if (ao_info) {
1177 			const struct tcp_ao_hdr *aoh;
1178 
1179 			/* Invalid TCP option size or twice included auth */
1180 			if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
1181 				goto out;
1182 			if (aoh)
1183 				key.ao_key = tcp_ao_established_key(sk, ao_info,
1184 								    aoh->rnext_keyid, -1);
1185 		}
1186 	}
1187 	if (key.ao_key) {
1188 		struct tcp_ao_key *rnext_key;
1189 
1190 		key.traffic_key = snd_other_key(key.ao_key);
1191 		/* rcv_next switches to our rcv_next */
1192 		rnext_key = READ_ONCE(ao_info->rnext_key);
1193 		key.rcv_next = rnext_key->rcvid;
1194 		key.sne = READ_ONCE(ao_info->snd_sne);
1195 		key.type = TCP_KEY_AO;
1196 #else
1197 	if (0) {
1198 #endif
1199 #ifdef CONFIG_TCP_MD5SIG
1200 	} else if (static_branch_unlikely(&tcp_md5_needed.key)) {
1201 		key.md5_key = tcp_twsk_md5_key(tcptw);
1202 		if (key.md5_key)
1203 			key.type = TCP_KEY_MD5;
1204 #endif
1205 	}
1206 
1207 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt,
1208 			READ_ONCE(tcptw->tw_rcv_nxt),
1209 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1210 			tcp_tw_tsval(tcptw),
1211 			READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if,
1212 			&key, tclass, cpu_to_be32(tw->tw_flowlabel),
1213 			tw->tw_priority, tw->tw_txhash);
1214 
1215 #ifdef CONFIG_TCP_AO
1216 out:
1217 #endif
1218 	inet_twsk_put(tw);
1219 }
1220 
1221 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1222 				  struct request_sock *req)
1223 {
1224 	struct tcp_key key = {};
1225 
1226 #ifdef CONFIG_TCP_AO
1227 	if (static_branch_unlikely(&tcp_ao_needed.key) &&
1228 	    tcp_rsk_used_ao(req)) {
1229 		const struct in6_addr *addr = &ipv6_hdr(skb)->saddr;
1230 		const struct tcp_ao_hdr *aoh;
1231 		int l3index;
1232 
1233 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1234 		/* Invalid TCP option size or twice included auth */
1235 		if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
1236 			return;
1237 		if (!aoh)
1238 			return;
1239 		key.ao_key = tcp_ao_do_lookup(sk, l3index,
1240 					      (union tcp_ao_addr *)addr,
1241 					      AF_INET6, aoh->rnext_keyid, -1);
1242 		if (unlikely(!key.ao_key)) {
1243 			/* Send ACK with any matching MKT for the peer */
1244 			key.ao_key = tcp_ao_do_lookup(sk, l3index,
1245 						      (union tcp_ao_addr *)addr,
1246 						      AF_INET6, -1, -1);
1247 			/* Matching key disappeared (user removed the key?)
1248 			 * let the handshake timeout.
1249 			 */
1250 			if (!key.ao_key) {
1251 				net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n",
1252 						     addr,
1253 						     ntohs(tcp_hdr(skb)->source),
1254 						     &ipv6_hdr(skb)->daddr,
1255 						     ntohs(tcp_hdr(skb)->dest));
1256 				return;
1257 			}
1258 		}
1259 		key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC);
1260 		if (!key.traffic_key)
1261 			return;
1262 
1263 		key.type = TCP_KEY_AO;
1264 		key.rcv_next = aoh->keyid;
1265 		tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req);
1266 #else
1267 	if (0) {
1268 #endif
1269 #ifdef CONFIG_TCP_MD5SIG
1270 	} else if (static_branch_unlikely(&tcp_md5_needed.key)) {
1271 		int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1272 
1273 		key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr,
1274 						   l3index);
1275 		if (key.md5_key)
1276 			key.type = TCP_KEY_MD5;
1277 #endif
1278 	}
1279 
1280 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1281 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1282 	 */
1283 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1284 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1285 			tcp_rsk(req)->rcv_nxt,
1286 			tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
1287 			tcp_rsk_tsval(tcp_rsk(req)),
1288 			req->ts_recent, sk->sk_bound_dev_if,
1289 			&key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK,
1290 			0,
1291 			READ_ONCE(sk->sk_priority),
1292 			READ_ONCE(tcp_rsk(req)->txhash));
1293 	if (tcp_key_is_ao(&key))
1294 		kfree(key.traffic_key);
1295 }
1296 
1297 
1298 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1299 {
1300 #ifdef CONFIG_SYN_COOKIES
1301 	const struct tcphdr *th = tcp_hdr(skb);
1302 
1303 	if (!th->syn)
1304 		sk = cookie_v6_check(sk, skb);
1305 #endif
1306 	return sk;
1307 }
1308 
1309 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1310 			 struct tcphdr *th, u32 *cookie)
1311 {
1312 	u16 mss = 0;
1313 #ifdef CONFIG_SYN_COOKIES
1314 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1315 				    &tcp_request_sock_ipv6_ops, sk, th);
1316 	if (mss) {
1317 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1318 		tcp_synq_overflow(sk);
1319 	}
1320 #endif
1321 	return mss;
1322 }
1323 
1324 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1325 {
1326 	if (skb->protocol == htons(ETH_P_IP))
1327 		return tcp_v4_conn_request(sk, skb);
1328 
1329 	if (!ipv6_unicast_destination(skb))
1330 		goto drop;
1331 
1332 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1333 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1334 		return 0;
1335 	}
1336 
1337 	return tcp_conn_request(&tcp6_request_sock_ops,
1338 				&tcp_request_sock_ipv6_ops, sk, skb);
1339 
1340 drop:
1341 	tcp_listendrop(sk);
1342 	return 0; /* don't send reset */
1343 }
1344 
1345 static void tcp_v6_restore_cb(struct sk_buff *skb)
1346 {
1347 	/* We need to move header back to the beginning if xfrm6_policy_check()
1348 	 * and tcp_v6_fill_cb() are going to be called again.
1349 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1350 	 */
1351 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1352 		sizeof(struct inet6_skb_parm));
1353 }
1354 
1355 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1356 					 struct request_sock *req,
1357 					 struct dst_entry *dst,
1358 					 struct request_sock *req_unhash,
1359 					 bool *own_req)
1360 {
1361 	struct inet_request_sock *ireq;
1362 	struct ipv6_pinfo *newnp;
1363 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1364 	struct ipv6_txoptions *opt;
1365 	struct inet_sock *newinet;
1366 	bool found_dup_sk = false;
1367 	struct tcp_sock *newtp;
1368 	struct sock *newsk;
1369 #ifdef CONFIG_TCP_MD5SIG
1370 	struct tcp_md5sig_key *key;
1371 	int l3index;
1372 #endif
1373 	struct flowi6 fl6;
1374 
1375 	if (skb->protocol == htons(ETH_P_IP)) {
1376 		/*
1377 		 *	v6 mapped
1378 		 */
1379 
1380 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1381 					     req_unhash, own_req);
1382 
1383 		if (!newsk)
1384 			return NULL;
1385 
1386 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1387 
1388 		newnp = tcp_inet6_sk(newsk);
1389 		newtp = tcp_sk(newsk);
1390 
1391 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1392 
1393 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1394 
1395 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1396 		if (sk_is_mptcp(newsk))
1397 			mptcpv6_handle_mapped(newsk, true);
1398 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1399 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1400 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1401 #endif
1402 
1403 		newnp->ipv6_mc_list = NULL;
1404 		newnp->ipv6_ac_list = NULL;
1405 		newnp->ipv6_fl_list = NULL;
1406 		newnp->pktoptions  = NULL;
1407 		newnp->opt	   = NULL;
1408 		newnp->mcast_oif   = inet_iif(skb);
1409 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1410 		newnp->rcv_flowinfo = 0;
1411 		if (inet6_test_bit(REPFLOW, sk))
1412 			newnp->flow_label = 0;
1413 
1414 		/*
1415 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1416 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1417 		 * that function for the gory details. -acme
1418 		 */
1419 
1420 		/* It is tricky place. Until this moment IPv4 tcp
1421 		   worked with IPv6 icsk.icsk_af_ops.
1422 		   Sync it now.
1423 		 */
1424 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1425 
1426 		return newsk;
1427 	}
1428 
1429 	ireq = inet_rsk(req);
1430 
1431 	if (sk_acceptq_is_full(sk))
1432 		goto out_overflow;
1433 
1434 	if (!dst) {
1435 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1436 		if (!dst)
1437 			goto out;
1438 	}
1439 
1440 	newsk = tcp_create_openreq_child(sk, req, skb);
1441 	if (!newsk)
1442 		goto out_nonewsk;
1443 
1444 	/*
1445 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1446 	 * count here, tcp_create_openreq_child now does this for us, see the
1447 	 * comment in that function for the gory details. -acme
1448 	 */
1449 
1450 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1451 	inet6_sk_rx_dst_set(newsk, skb);
1452 
1453 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1454 
1455 	newtp = tcp_sk(newsk);
1456 	newinet = inet_sk(newsk);
1457 	newnp = tcp_inet6_sk(newsk);
1458 
1459 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1460 
1461 	ip6_dst_store(newsk, dst, NULL, NULL);
1462 
1463 	newnp->saddr = ireq->ir_v6_loc_addr;
1464 
1465 	/* Now IPv6 options...
1466 
1467 	   First: no IPv4 options.
1468 	 */
1469 	newinet->inet_opt = NULL;
1470 	newnp->ipv6_mc_list = NULL;
1471 	newnp->ipv6_ac_list = NULL;
1472 	newnp->ipv6_fl_list = NULL;
1473 
1474 	/* Clone RX bits */
1475 	newnp->rxopt.all = np->rxopt.all;
1476 
1477 	newnp->pktoptions = NULL;
1478 	newnp->opt	  = NULL;
1479 	newnp->mcast_oif  = tcp_v6_iif(skb);
1480 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1481 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1482 	if (inet6_test_bit(REPFLOW, sk))
1483 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1484 
1485 	/* Set ToS of the new socket based upon the value of incoming SYN.
1486 	 * ECT bits are set later in tcp_init_transfer().
1487 	 */
1488 	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1489 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1490 
1491 	/* Clone native IPv6 options from listening socket (if any)
1492 
1493 	   Yes, keeping reference count would be much more clever,
1494 	   but we make one more one thing there: reattach optmem
1495 	   to newsk.
1496 	 */
1497 	opt = ireq->ipv6_opt;
1498 	if (!opt)
1499 		opt = rcu_dereference(np->opt);
1500 	if (opt) {
1501 		opt = ipv6_dup_options(newsk, opt);
1502 		RCU_INIT_POINTER(newnp->opt, opt);
1503 	}
1504 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1505 	if (opt)
1506 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1507 						    opt->opt_flen;
1508 
1509 	tcp_ca_openreq_child(newsk, dst);
1510 
1511 	tcp_sync_mss(newsk, dst_mtu(dst));
1512 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1513 
1514 	tcp_initialize_rcv_mss(newsk);
1515 
1516 #ifdef CONFIG_TCP_MD5SIG
1517 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1518 
1519 	if (!tcp_rsk_used_ao(req)) {
1520 		/* Copy over the MD5 key from the original socket */
1521 		key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1522 		if (key) {
1523 			const union tcp_md5_addr *addr;
1524 
1525 			addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1526 			if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
1527 				inet_csk_prepare_forced_close(newsk);
1528 				tcp_done(newsk);
1529 				goto out;
1530 			}
1531 		}
1532 	}
1533 #endif
1534 #ifdef CONFIG_TCP_AO
1535 	/* Copy over tcp_ao_info if any */
1536 	if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6))
1537 		goto out; /* OOM */
1538 #endif
1539 
1540 	if (__inet_inherit_port(sk, newsk) < 0) {
1541 		inet_csk_prepare_forced_close(newsk);
1542 		tcp_done(newsk);
1543 		goto out;
1544 	}
1545 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1546 				       &found_dup_sk);
1547 	if (*own_req) {
1548 		tcp_move_syn(newtp, req);
1549 
1550 		/* Clone pktoptions received with SYN, if we own the req */
1551 		if (ireq->pktopts) {
1552 			newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1553 			consume_skb(ireq->pktopts);
1554 			ireq->pktopts = NULL;
1555 			if (newnp->pktoptions)
1556 				tcp_v6_restore_cb(newnp->pktoptions);
1557 		}
1558 	} else {
1559 		if (!req_unhash && found_dup_sk) {
1560 			/* This code path should only be executed in the
1561 			 * syncookie case only
1562 			 */
1563 			bh_unlock_sock(newsk);
1564 			sock_put(newsk);
1565 			newsk = NULL;
1566 		}
1567 	}
1568 
1569 	return newsk;
1570 
1571 out_overflow:
1572 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1573 out_nonewsk:
1574 	dst_release(dst);
1575 out:
1576 	tcp_listendrop(sk);
1577 	return NULL;
1578 }
1579 
1580 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1581 							   u32));
1582 /* The socket must have it's spinlock held when we get
1583  * here, unless it is a TCP_LISTEN socket.
1584  *
1585  * We have a potential double-lock case here, so even when
1586  * doing backlog processing we use the BH locking scheme.
1587  * This is because we cannot sleep with the original spinlock
1588  * held.
1589  */
1590 INDIRECT_CALLABLE_SCOPE
1591 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1592 {
1593 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1594 	struct sk_buff *opt_skb = NULL;
1595 	enum skb_drop_reason reason;
1596 	struct tcp_sock *tp;
1597 
1598 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1599 	   goes to IPv4 receive handler and backlogged.
1600 	   From backlog it always goes here. Kerboom...
1601 	   Fortunately, tcp_rcv_established and rcv_established
1602 	   handle them correctly, but it is not case with
1603 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1604 	 */
1605 
1606 	if (skb->protocol == htons(ETH_P_IP))
1607 		return tcp_v4_do_rcv(sk, skb);
1608 
1609 	/*
1610 	 *	socket locking is here for SMP purposes as backlog rcv
1611 	 *	is currently called with bh processing disabled.
1612 	 */
1613 
1614 	/* Do Stevens' IPV6_PKTOPTIONS.
1615 
1616 	   Yes, guys, it is the only place in our code, where we
1617 	   may make it not affecting IPv4.
1618 	   The rest of code is protocol independent,
1619 	   and I do not like idea to uglify IPv4.
1620 
1621 	   Actually, all the idea behind IPV6_PKTOPTIONS
1622 	   looks not very well thought. For now we latch
1623 	   options, received in the last packet, enqueued
1624 	   by tcp. Feel free to propose better solution.
1625 					       --ANK (980728)
1626 	 */
1627 	if (np->rxopt.all && sk->sk_state != TCP_LISTEN)
1628 		opt_skb = skb_clone_and_charge_r(skb, sk);
1629 
1630 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1631 		struct dst_entry *dst;
1632 
1633 		dst = rcu_dereference_protected(sk->sk_rx_dst,
1634 						lockdep_sock_is_held(sk));
1635 
1636 		sock_rps_save_rxhash(sk, skb);
1637 		sk_mark_napi_id(sk, skb);
1638 		if (dst) {
1639 			if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1640 			    INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1641 					    dst, sk->sk_rx_dst_cookie) == NULL) {
1642 				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1643 				dst_release(dst);
1644 			}
1645 		}
1646 
1647 		tcp_rcv_established(sk, skb);
1648 		if (opt_skb)
1649 			goto ipv6_pktoptions;
1650 		return 0;
1651 	}
1652 
1653 	if (tcp_checksum_complete(skb))
1654 		goto csum_err;
1655 
1656 	if (sk->sk_state == TCP_LISTEN) {
1657 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1658 
1659 		if (nsk != sk) {
1660 			if (nsk) {
1661 				reason = tcp_child_process(sk, nsk, skb);
1662 				if (reason)
1663 					goto reset;
1664 			}
1665 			return 0;
1666 		}
1667 	} else
1668 		sock_rps_save_rxhash(sk, skb);
1669 
1670 	reason = tcp_rcv_state_process(sk, skb);
1671 	if (reason)
1672 		goto reset;
1673 	if (opt_skb)
1674 		goto ipv6_pktoptions;
1675 	return 0;
1676 
1677 reset:
1678 	tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason));
1679 discard:
1680 	if (opt_skb)
1681 		__kfree_skb(opt_skb);
1682 	sk_skb_reason_drop(sk, skb, reason);
1683 	return 0;
1684 csum_err:
1685 	reason = SKB_DROP_REASON_TCP_CSUM;
1686 	trace_tcp_bad_csum(skb);
1687 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1688 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1689 	goto discard;
1690 
1691 
1692 ipv6_pktoptions:
1693 	/* Do you ask, what is it?
1694 
1695 	   1. skb was enqueued by tcp.
1696 	   2. skb is added to tail of read queue, rather than out of order.
1697 	   3. socket is not in passive state.
1698 	   4. Finally, it really contains options, which user wants to receive.
1699 	 */
1700 	tp = tcp_sk(sk);
1701 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1702 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1703 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1704 			WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb));
1705 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1706 			WRITE_ONCE(np->mcast_hops,
1707 				   ipv6_hdr(opt_skb)->hop_limit);
1708 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1709 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1710 		if (inet6_test_bit(REPFLOW, sk))
1711 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1712 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1713 			tcp_v6_restore_cb(opt_skb);
1714 			opt_skb = xchg(&np->pktoptions, opt_skb);
1715 		} else {
1716 			__kfree_skb(opt_skb);
1717 			opt_skb = xchg(&np->pktoptions, NULL);
1718 		}
1719 	}
1720 
1721 	consume_skb(opt_skb);
1722 	return 0;
1723 }
1724 
1725 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1726 			   const struct tcphdr *th)
1727 {
1728 	/* This is tricky: we move IP6CB at its correct location into
1729 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1730 	 * _decode_session6() uses IP6CB().
1731 	 * barrier() makes sure compiler won't play aliasing games.
1732 	 */
1733 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1734 		sizeof(struct inet6_skb_parm));
1735 	barrier();
1736 
1737 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1738 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1739 				    skb->len - th->doff*4);
1740 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1741 	TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th);
1742 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1743 	TCP_SKB_CB(skb)->sacked = 0;
1744 	TCP_SKB_CB(skb)->has_rxtstamp =
1745 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1746 }
1747 
1748 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1749 {
1750 	struct net *net = dev_net_rcu(skb->dev);
1751 	enum skb_drop_reason drop_reason;
1752 	enum tcp_tw_status tw_status;
1753 	int sdif = inet6_sdif(skb);
1754 	int dif = inet6_iif(skb);
1755 	const struct tcphdr *th;
1756 	const struct ipv6hdr *hdr;
1757 	struct sock *sk = NULL;
1758 	bool refcounted;
1759 	int ret;
1760 	u32 isn;
1761 
1762 	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1763 	if (skb->pkt_type != PACKET_HOST)
1764 		goto discard_it;
1765 
1766 	/*
1767 	 *	Count it even if it's bad.
1768 	 */
1769 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1770 
1771 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1772 		goto discard_it;
1773 
1774 	th = (const struct tcphdr *)skb->data;
1775 
1776 	if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1777 		drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1778 		goto bad_packet;
1779 	}
1780 	if (!pskb_may_pull(skb, th->doff*4))
1781 		goto discard_it;
1782 
1783 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1784 		goto csum_error;
1785 
1786 	th = (const struct tcphdr *)skb->data;
1787 	hdr = ipv6_hdr(skb);
1788 
1789 lookup:
1790 	sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1791 				th->source, th->dest, inet6_iif(skb), sdif,
1792 				&refcounted);
1793 	if (!sk)
1794 		goto no_tcp_socket;
1795 
1796 	if (sk->sk_state == TCP_TIME_WAIT)
1797 		goto do_time_wait;
1798 
1799 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1800 		struct request_sock *req = inet_reqsk(sk);
1801 		bool req_stolen = false;
1802 		struct sock *nsk;
1803 
1804 		sk = req->rsk_listener;
1805 		if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1806 			drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1807 		else
1808 			drop_reason = tcp_inbound_hash(sk, req, skb,
1809 						       &hdr->saddr, &hdr->daddr,
1810 						       AF_INET6, dif, sdif);
1811 		if (drop_reason) {
1812 			sk_drops_add(sk, skb);
1813 			reqsk_put(req);
1814 			goto discard_it;
1815 		}
1816 		if (tcp_checksum_complete(skb)) {
1817 			reqsk_put(req);
1818 			goto csum_error;
1819 		}
1820 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1821 			nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1822 			if (!nsk) {
1823 				inet_csk_reqsk_queue_drop_and_put(sk, req);
1824 				goto lookup;
1825 			}
1826 			sk = nsk;
1827 			/* reuseport_migrate_sock() has already held one sk_refcnt
1828 			 * before returning.
1829 			 */
1830 		} else {
1831 			sock_hold(sk);
1832 		}
1833 		refcounted = true;
1834 		nsk = NULL;
1835 		if (!tcp_filter(sk, skb)) {
1836 			th = (const struct tcphdr *)skb->data;
1837 			hdr = ipv6_hdr(skb);
1838 			tcp_v6_fill_cb(skb, hdr, th);
1839 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen,
1840 					    &drop_reason);
1841 		} else {
1842 			drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1843 		}
1844 		if (!nsk) {
1845 			reqsk_put(req);
1846 			if (req_stolen) {
1847 				/* Another cpu got exclusive access to req
1848 				 * and created a full blown socket.
1849 				 * Try to feed this packet to this socket
1850 				 * instead of discarding it.
1851 				 */
1852 				tcp_v6_restore_cb(skb);
1853 				sock_put(sk);
1854 				goto lookup;
1855 			}
1856 			goto discard_and_relse;
1857 		}
1858 		nf_reset_ct(skb);
1859 		if (nsk == sk) {
1860 			reqsk_put(req);
1861 			tcp_v6_restore_cb(skb);
1862 		} else {
1863 			drop_reason = tcp_child_process(sk, nsk, skb);
1864 			if (drop_reason) {
1865 				enum sk_rst_reason rst_reason;
1866 
1867 				rst_reason = sk_rst_convert_drop_reason(drop_reason);
1868 				tcp_v6_send_reset(nsk, skb, rst_reason);
1869 				goto discard_and_relse;
1870 			}
1871 			sock_put(sk);
1872 			return 0;
1873 		}
1874 	}
1875 
1876 process:
1877 	if (static_branch_unlikely(&ip6_min_hopcount)) {
1878 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1879 		if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1880 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1881 			drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1882 			goto discard_and_relse;
1883 		}
1884 	}
1885 
1886 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1887 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1888 		goto discard_and_relse;
1889 	}
1890 
1891 	drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr,
1892 				       AF_INET6, dif, sdif);
1893 	if (drop_reason)
1894 		goto discard_and_relse;
1895 
1896 	nf_reset_ct(skb);
1897 
1898 	if (tcp_filter(sk, skb)) {
1899 		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1900 		goto discard_and_relse;
1901 	}
1902 	th = (const struct tcphdr *)skb->data;
1903 	hdr = ipv6_hdr(skb);
1904 	tcp_v6_fill_cb(skb, hdr, th);
1905 
1906 	skb->dev = NULL;
1907 
1908 	if (sk->sk_state == TCP_LISTEN) {
1909 		ret = tcp_v6_do_rcv(sk, skb);
1910 		goto put_and_return;
1911 	}
1912 
1913 	sk_incoming_cpu_update(sk);
1914 
1915 	bh_lock_sock_nested(sk);
1916 	tcp_segs_in(tcp_sk(sk), skb);
1917 	ret = 0;
1918 	if (!sock_owned_by_user(sk)) {
1919 		ret = tcp_v6_do_rcv(sk, skb);
1920 	} else {
1921 		if (tcp_add_backlog(sk, skb, &drop_reason))
1922 			goto discard_and_relse;
1923 	}
1924 	bh_unlock_sock(sk);
1925 put_and_return:
1926 	if (refcounted)
1927 		sock_put(sk);
1928 	return ret ? -1 : 0;
1929 
1930 no_tcp_socket:
1931 	drop_reason = SKB_DROP_REASON_NO_SOCKET;
1932 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1933 		goto discard_it;
1934 
1935 	tcp_v6_fill_cb(skb, hdr, th);
1936 
1937 	if (tcp_checksum_complete(skb)) {
1938 csum_error:
1939 		drop_reason = SKB_DROP_REASON_TCP_CSUM;
1940 		trace_tcp_bad_csum(skb);
1941 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1942 bad_packet:
1943 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1944 	} else {
1945 		tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
1946 	}
1947 
1948 discard_it:
1949 	SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1950 	sk_skb_reason_drop(sk, skb, drop_reason);
1951 	return 0;
1952 
1953 discard_and_relse:
1954 	sk_drops_add(sk, skb);
1955 	if (refcounted)
1956 		sock_put(sk);
1957 	goto discard_it;
1958 
1959 do_time_wait:
1960 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1961 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1962 		inet_twsk_put(inet_twsk(sk));
1963 		goto discard_it;
1964 	}
1965 
1966 	tcp_v6_fill_cb(skb, hdr, th);
1967 
1968 	if (tcp_checksum_complete(skb)) {
1969 		inet_twsk_put(inet_twsk(sk));
1970 		goto csum_error;
1971 	}
1972 
1973 	tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn);
1974 	switch (tw_status) {
1975 	case TCP_TW_SYN:
1976 	{
1977 		struct sock *sk2;
1978 
1979 		sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1980 					    skb, __tcp_hdrlen(th),
1981 					    &ipv6_hdr(skb)->saddr, th->source,
1982 					    &ipv6_hdr(skb)->daddr,
1983 					    ntohs(th->dest),
1984 					    tcp_v6_iif_l3_slave(skb),
1985 					    sdif);
1986 		if (sk2) {
1987 			struct inet_timewait_sock *tw = inet_twsk(sk);
1988 			inet_twsk_deschedule_put(tw);
1989 			sk = sk2;
1990 			tcp_v6_restore_cb(skb);
1991 			refcounted = false;
1992 			__this_cpu_write(tcp_tw_isn, isn);
1993 			goto process;
1994 		}
1995 	}
1996 		/* to ACK */
1997 		fallthrough;
1998 	case TCP_TW_ACK:
1999 	case TCP_TW_ACK_OOW:
2000 		tcp_v6_timewait_ack(sk, skb, tw_status);
2001 		break;
2002 	case TCP_TW_RST:
2003 		tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
2004 		inet_twsk_deschedule_put(inet_twsk(sk));
2005 		goto discard_it;
2006 	case TCP_TW_SUCCESS:
2007 		;
2008 	}
2009 	goto discard_it;
2010 }
2011 
2012 void tcp_v6_early_demux(struct sk_buff *skb)
2013 {
2014 	struct net *net = dev_net_rcu(skb->dev);
2015 	const struct ipv6hdr *hdr;
2016 	const struct tcphdr *th;
2017 	struct sock *sk;
2018 
2019 	if (skb->pkt_type != PACKET_HOST)
2020 		return;
2021 
2022 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
2023 		return;
2024 
2025 	hdr = ipv6_hdr(skb);
2026 	th = tcp_hdr(skb);
2027 
2028 	if (th->doff < sizeof(struct tcphdr) / 4)
2029 		return;
2030 
2031 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
2032 	sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
2033 					&hdr->saddr, th->source,
2034 					&hdr->daddr, ntohs(th->dest),
2035 					inet6_iif(skb), inet6_sdif(skb));
2036 	if (sk) {
2037 		skb->sk = sk;
2038 		skb->destructor = sock_edemux;
2039 		if (sk_fullsock(sk)) {
2040 			struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
2041 
2042 			if (dst)
2043 				dst = dst_check(dst, sk->sk_rx_dst_cookie);
2044 			if (dst &&
2045 			    sk->sk_rx_dst_ifindex == skb->skb_iif)
2046 				skb_dst_set_noref(skb, dst);
2047 		}
2048 	}
2049 }
2050 
2051 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
2052 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
2053 	.twsk_destructor = tcp_twsk_destructor,
2054 };
2055 
2056 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
2057 {
2058 	__tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
2059 }
2060 
2061 const struct inet_connection_sock_af_ops ipv6_specific = {
2062 	.queue_xmit	   = inet6_csk_xmit,
2063 	.send_check	   = tcp_v6_send_check,
2064 	.rebuild_header	   = inet6_sk_rebuild_header,
2065 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
2066 	.conn_request	   = tcp_v6_conn_request,
2067 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
2068 	.net_header_len	   = sizeof(struct ipv6hdr),
2069 	.setsockopt	   = ipv6_setsockopt,
2070 	.getsockopt	   = ipv6_getsockopt,
2071 	.mtu_reduced	   = tcp_v6_mtu_reduced,
2072 };
2073 
2074 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2075 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
2076 #ifdef CONFIG_TCP_MD5SIG
2077 	.md5_lookup	=	tcp_v6_md5_lookup,
2078 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
2079 	.md5_parse	=	tcp_v6_parse_md5_keys,
2080 #endif
2081 #ifdef CONFIG_TCP_AO
2082 	.ao_lookup	=	tcp_v6_ao_lookup,
2083 	.calc_ao_hash	=	tcp_v6_ao_hash_skb,
2084 	.ao_parse	=	tcp_v6_parse_ao,
2085 	.ao_calc_key_sk	=	tcp_v6_ao_calc_key_sk,
2086 #endif
2087 };
2088 #endif
2089 
2090 /*
2091  *	TCP over IPv4 via INET6 API
2092  */
2093 static const struct inet_connection_sock_af_ops ipv6_mapped = {
2094 	.queue_xmit	   = ip_queue_xmit,
2095 	.send_check	   = tcp_v4_send_check,
2096 	.rebuild_header	   = inet_sk_rebuild_header,
2097 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
2098 	.conn_request	   = tcp_v6_conn_request,
2099 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
2100 	.net_header_len	   = sizeof(struct iphdr),
2101 	.setsockopt	   = ipv6_setsockopt,
2102 	.getsockopt	   = ipv6_getsockopt,
2103 	.mtu_reduced	   = tcp_v4_mtu_reduced,
2104 };
2105 
2106 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2107 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
2108 #ifdef CONFIG_TCP_MD5SIG
2109 	.md5_lookup	=	tcp_v4_md5_lookup,
2110 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
2111 	.md5_parse	=	tcp_v6_parse_md5_keys,
2112 #endif
2113 #ifdef CONFIG_TCP_AO
2114 	.ao_lookup	=	tcp_v6_ao_lookup,
2115 	.calc_ao_hash	=	tcp_v4_ao_hash_skb,
2116 	.ao_parse	=	tcp_v6_parse_ao,
2117 	.ao_calc_key_sk	=	tcp_v4_ao_calc_key_sk,
2118 #endif
2119 };
2120 #endif
2121 
2122 /* NOTE: A lot of things set to zero explicitly by call to
2123  *       sk_alloc() so need not be done here.
2124  */
2125 static int tcp_v6_init_sock(struct sock *sk)
2126 {
2127 	struct inet_connection_sock *icsk = inet_csk(sk);
2128 
2129 	tcp_init_sock(sk);
2130 
2131 	icsk->icsk_af_ops = &ipv6_specific;
2132 
2133 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2134 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
2135 #endif
2136 
2137 	return 0;
2138 }
2139 
2140 #ifdef CONFIG_PROC_FS
2141 /* Proc filesystem TCPv6 sock list dumping. */
2142 static void get_openreq6(struct seq_file *seq,
2143 			 const struct request_sock *req, int i)
2144 {
2145 	long ttd = req->rsk_timer.expires - jiffies;
2146 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
2147 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
2148 
2149 	if (ttd < 0)
2150 		ttd = 0;
2151 
2152 	seq_printf(seq,
2153 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2154 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
2155 		   i,
2156 		   src->s6_addr32[0], src->s6_addr32[1],
2157 		   src->s6_addr32[2], src->s6_addr32[3],
2158 		   inet_rsk(req)->ir_num,
2159 		   dest->s6_addr32[0], dest->s6_addr32[1],
2160 		   dest->s6_addr32[2], dest->s6_addr32[3],
2161 		   ntohs(inet_rsk(req)->ir_rmt_port),
2162 		   TCP_SYN_RECV,
2163 		   0, 0, /* could print option size, but that is af dependent. */
2164 		   1,   /* timers active (only the expire timer) */
2165 		   jiffies_to_clock_t(ttd),
2166 		   req->num_timeout,
2167 		   from_kuid_munged(seq_user_ns(seq),
2168 				    sock_i_uid(req->rsk_listener)),
2169 		   0,  /* non standard timer */
2170 		   0, /* open_requests have no inode */
2171 		   0, req);
2172 }
2173 
2174 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2175 {
2176 	const struct in6_addr *dest, *src;
2177 	__u16 destp, srcp;
2178 	int timer_active;
2179 	unsigned long timer_expires;
2180 	const struct inet_sock *inet = inet_sk(sp);
2181 	const struct tcp_sock *tp = tcp_sk(sp);
2182 	const struct inet_connection_sock *icsk = inet_csk(sp);
2183 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2184 	u8 icsk_pending;
2185 	int rx_queue;
2186 	int state;
2187 
2188 	dest  = &sp->sk_v6_daddr;
2189 	src   = &sp->sk_v6_rcv_saddr;
2190 	destp = ntohs(inet->inet_dport);
2191 	srcp  = ntohs(inet->inet_sport);
2192 
2193 	icsk_pending = smp_load_acquire(&icsk->icsk_pending);
2194 	if (icsk_pending == ICSK_TIME_RETRANS ||
2195 	    icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2196 	    icsk_pending == ICSK_TIME_LOSS_PROBE) {
2197 		timer_active	= 1;
2198 		timer_expires	= icsk_timeout(icsk);
2199 	} else if (icsk_pending == ICSK_TIME_PROBE0) {
2200 		timer_active	= 4;
2201 		timer_expires	= icsk_timeout(icsk);
2202 	} else if (timer_pending(&sp->sk_timer)) {
2203 		timer_active	= 2;
2204 		timer_expires	= sp->sk_timer.expires;
2205 	} else {
2206 		timer_active	= 0;
2207 		timer_expires = jiffies;
2208 	}
2209 
2210 	state = inet_sk_state_load(sp);
2211 	if (state == TCP_LISTEN)
2212 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2213 	else
2214 		/* Because we don't lock the socket,
2215 		 * we might find a transient negative value.
2216 		 */
2217 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2218 				      READ_ONCE(tp->copied_seq), 0);
2219 
2220 	seq_printf(seq,
2221 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2222 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2223 		   i,
2224 		   src->s6_addr32[0], src->s6_addr32[1],
2225 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2226 		   dest->s6_addr32[0], dest->s6_addr32[1],
2227 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2228 		   state,
2229 		   READ_ONCE(tp->write_seq) - tp->snd_una,
2230 		   rx_queue,
2231 		   timer_active,
2232 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2233 		   icsk->icsk_retransmits,
2234 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2235 		   icsk->icsk_probes_out,
2236 		   sock_i_ino(sp),
2237 		   refcount_read(&sp->sk_refcnt), sp,
2238 		   jiffies_to_clock_t(icsk->icsk_rto),
2239 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2240 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2241 		   tcp_snd_cwnd(tp),
2242 		   state == TCP_LISTEN ?
2243 			fastopenq->max_qlen :
2244 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2245 		   );
2246 }
2247 
2248 static void get_timewait6_sock(struct seq_file *seq,
2249 			       struct inet_timewait_sock *tw, int i)
2250 {
2251 	long delta = tw->tw_timer.expires - jiffies;
2252 	const struct in6_addr *dest, *src;
2253 	__u16 destp, srcp;
2254 
2255 	dest = &tw->tw_v6_daddr;
2256 	src  = &tw->tw_v6_rcv_saddr;
2257 	destp = ntohs(tw->tw_dport);
2258 	srcp  = ntohs(tw->tw_sport);
2259 
2260 	seq_printf(seq,
2261 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2262 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2263 		   i,
2264 		   src->s6_addr32[0], src->s6_addr32[1],
2265 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2266 		   dest->s6_addr32[0], dest->s6_addr32[1],
2267 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2268 		   READ_ONCE(tw->tw_substate), 0, 0,
2269 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2270 		   refcount_read(&tw->tw_refcnt), tw);
2271 }
2272 
2273 static int tcp6_seq_show(struct seq_file *seq, void *v)
2274 {
2275 	struct tcp_iter_state *st;
2276 	struct sock *sk = v;
2277 
2278 	if (v == SEQ_START_TOKEN) {
2279 		seq_puts(seq,
2280 			 "  sl  "
2281 			 "local_address                         "
2282 			 "remote_address                        "
2283 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2284 			 "   uid  timeout inode\n");
2285 		goto out;
2286 	}
2287 	st = seq->private;
2288 
2289 	if (sk->sk_state == TCP_TIME_WAIT)
2290 		get_timewait6_sock(seq, v, st->num);
2291 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2292 		get_openreq6(seq, v, st->num);
2293 	else
2294 		get_tcp6_sock(seq, v, st->num);
2295 out:
2296 	return 0;
2297 }
2298 
2299 static const struct seq_operations tcp6_seq_ops = {
2300 	.show		= tcp6_seq_show,
2301 	.start		= tcp_seq_start,
2302 	.next		= tcp_seq_next,
2303 	.stop		= tcp_seq_stop,
2304 };
2305 
2306 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2307 	.family		= AF_INET6,
2308 };
2309 
2310 int __net_init tcp6_proc_init(struct net *net)
2311 {
2312 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2313 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2314 		return -ENOMEM;
2315 	return 0;
2316 }
2317 
2318 void tcp6_proc_exit(struct net *net)
2319 {
2320 	remove_proc_entry("tcp6", net->proc_net);
2321 }
2322 #endif
2323 
2324 struct proto tcpv6_prot = {
2325 	.name			= "TCPv6",
2326 	.owner			= THIS_MODULE,
2327 	.close			= tcp_close,
2328 	.pre_connect		= tcp_v6_pre_connect,
2329 	.connect		= tcp_v6_connect,
2330 	.disconnect		= tcp_disconnect,
2331 	.accept			= inet_csk_accept,
2332 	.ioctl			= tcp_ioctl,
2333 	.init			= tcp_v6_init_sock,
2334 	.destroy		= tcp_v4_destroy_sock,
2335 	.shutdown		= tcp_shutdown,
2336 	.setsockopt		= tcp_setsockopt,
2337 	.getsockopt		= tcp_getsockopt,
2338 	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
2339 	.keepalive		= tcp_set_keepalive,
2340 	.recvmsg		= tcp_recvmsg,
2341 	.sendmsg		= tcp_sendmsg,
2342 	.splice_eof		= tcp_splice_eof,
2343 	.backlog_rcv		= tcp_v6_do_rcv,
2344 	.release_cb		= tcp_release_cb,
2345 	.hash			= inet6_hash,
2346 	.unhash			= inet_unhash,
2347 	.get_port		= inet_csk_get_port,
2348 	.put_port		= inet_put_port,
2349 #ifdef CONFIG_BPF_SYSCALL
2350 	.psock_update_sk_prot	= tcp_bpf_update_proto,
2351 #endif
2352 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2353 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2354 	.stream_memory_free	= tcp_stream_memory_free,
2355 	.sockets_allocated	= &tcp_sockets_allocated,
2356 
2357 	.memory_allocated	= &tcp_memory_allocated,
2358 	.per_cpu_fw_alloc	= &tcp_memory_per_cpu_fw_alloc,
2359 
2360 	.memory_pressure	= &tcp_memory_pressure,
2361 	.orphan_count		= &tcp_orphan_count,
2362 	.sysctl_mem		= sysctl_tcp_mem,
2363 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2364 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2365 	.max_header		= MAX_TCP_HEADER,
2366 	.obj_size		= sizeof(struct tcp6_sock),
2367 	.ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2368 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2369 	.twsk_prot		= &tcp6_timewait_sock_ops,
2370 	.rsk_prot		= &tcp6_request_sock_ops,
2371 	.h.hashinfo		= NULL,
2372 	.no_autobind		= true,
2373 	.diag_destroy		= tcp_abort,
2374 };
2375 EXPORT_SYMBOL_GPL(tcpv6_prot);
2376 
2377 
2378 static struct inet_protosw tcpv6_protosw = {
2379 	.type		=	SOCK_STREAM,
2380 	.protocol	=	IPPROTO_TCP,
2381 	.prot		=	&tcpv6_prot,
2382 	.ops		=	&inet6_stream_ops,
2383 	.flags		=	INET_PROTOSW_PERMANENT |
2384 				INET_PROTOSW_ICSK,
2385 };
2386 
2387 static int __net_init tcpv6_net_init(struct net *net)
2388 {
2389 	int res;
2390 
2391 	res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2392 				   SOCK_RAW, IPPROTO_TCP, net);
2393 	if (!res)
2394 		net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC;
2395 
2396 	return res;
2397 }
2398 
2399 static void __net_exit tcpv6_net_exit(struct net *net)
2400 {
2401 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2402 }
2403 
2404 static struct pernet_operations tcpv6_net_ops = {
2405 	.init	    = tcpv6_net_init,
2406 	.exit	    = tcpv6_net_exit,
2407 };
2408 
2409 int __init tcpv6_init(void)
2410 {
2411 	int ret;
2412 
2413 	net_hotdata.tcpv6_protocol = (struct inet6_protocol) {
2414 		.handler     = tcp_v6_rcv,
2415 		.err_handler = tcp_v6_err,
2416 		.flags	     = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
2417 	};
2418 	ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
2419 	if (ret)
2420 		goto out;
2421 
2422 	/* register inet6 protocol */
2423 	ret = inet6_register_protosw(&tcpv6_protosw);
2424 	if (ret)
2425 		goto out_tcpv6_protocol;
2426 
2427 	ret = register_pernet_subsys(&tcpv6_net_ops);
2428 	if (ret)
2429 		goto out_tcpv6_protosw;
2430 
2431 	ret = mptcpv6_init();
2432 	if (ret)
2433 		goto out_tcpv6_pernet_subsys;
2434 
2435 out:
2436 	return ret;
2437 
2438 out_tcpv6_pernet_subsys:
2439 	unregister_pernet_subsys(&tcpv6_net_ops);
2440 out_tcpv6_protosw:
2441 	inet6_unregister_protosw(&tcpv6_protosw);
2442 out_tcpv6_protocol:
2443 	inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
2444 	goto out;
2445 }
2446 
2447 void tcpv6_exit(void)
2448 {
2449 	unregister_pernet_subsys(&tcpv6_net_ops);
2450 	inet6_unregister_protosw(&tcpv6_protosw);
2451 	inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
2452 }
2453