1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/aligned_data.h>
45 #include <net/tcp.h>
46 #include <net/ndisc.h>
47 #include <net/inet6_hashtables.h>
48 #include <net/inet6_connection_sock.h>
49 #include <net/ipv6.h>
50 #include <net/transp_v6.h>
51 #include <net/addrconf.h>
52 #include <net/ip6_route.h>
53 #include <net/ip6_checksum.h>
54 #include <net/inet_ecn.h>
55 #include <net/protocol.h>
56 #include <net/xfrm.h>
57 #include <net/snmp.h>
58 #include <net/dsfield.h>
59 #include <net/timewait_sock.h>
60 #include <net/inet_common.h>
61 #include <net/secure_seq.h>
62 #include <net/hotdata.h>
63 #include <net/busy_poll.h>
64 #include <net/rstreason.h>
65 #include <net/psp.h>
66 
67 #include <linux/proc_fs.h>
68 #include <linux/seq_file.h>
69 
70 #include <crypto/hash.h>
71 #include <linux/scatterlist.h>
72 
73 #include <trace/events/tcp.h>
74 
75 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
76 			      enum sk_rst_reason reason);
77 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
78 				      struct request_sock *req);
79 
80 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
81 
82 static const struct inet_connection_sock_af_ops ipv6_mapped;
83 const struct inet_connection_sock_af_ops ipv6_specific;
84 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
85 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
86 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
87 #endif
88 
89 /* Helper returning the inet6 address from a given tcp socket.
90  * It can be used in TCP stack instead of inet6_sk(sk).
91  * This avoids a dereference and allow compiler optimizations.
92  * It is a specialized version of inet6_sk_generic().
93  */
94 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
95 					      struct tcp6_sock, tcp)->inet6)
96 
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)97 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
98 {
99 	struct dst_entry *dst = skb_dst(skb);
100 
101 	if (dst && dst_hold_safe(dst)) {
102 		rcu_assign_pointer(sk->sk_rx_dst, dst);
103 		sk->sk_rx_dst_ifindex = skb->skb_iif;
104 		sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
105 	}
106 }
107 
tcp_v6_init_seq(const struct sk_buff * skb)108 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
109 {
110 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
111 				ipv6_hdr(skb)->saddr.s6_addr32,
112 				tcp_hdr(skb)->dest,
113 				tcp_hdr(skb)->source);
114 }
115 
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)116 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
117 {
118 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
119 				   ipv6_hdr(skb)->saddr.s6_addr32);
120 }
121 
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)122 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
123 			      int addr_len)
124 {
125 	/* This check is replicated from tcp_v6_connect() and intended to
126 	 * prevent BPF program called below from accessing bytes that are out
127 	 * of the bound specified by user in addr_len.
128 	 */
129 	if (addr_len < SIN6_LEN_RFC2133)
130 		return -EINVAL;
131 
132 	sock_owned_by_me(sk);
133 
134 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
135 }
136 
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)137 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
138 			  int addr_len)
139 {
140 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
141 	struct inet_connection_sock *icsk = inet_csk(sk);
142 	struct in6_addr *saddr = NULL, *final_p, final;
143 	struct inet_timewait_death_row *tcp_death_row;
144 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
145 	struct inet_sock *inet = inet_sk(sk);
146 	struct tcp_sock *tp = tcp_sk(sk);
147 	struct net *net = sock_net(sk);
148 	struct ipv6_txoptions *opt;
149 	struct dst_entry *dst;
150 	struct flowi6 fl6;
151 	int addr_type;
152 	int err;
153 
154 	if (addr_len < SIN6_LEN_RFC2133)
155 		return -EINVAL;
156 
157 	if (usin->sin6_family != AF_INET6)
158 		return -EAFNOSUPPORT;
159 
160 	memset(&fl6, 0, sizeof(fl6));
161 
162 	if (inet6_test_bit(SNDFLOW, sk)) {
163 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
164 		IP6_ECN_flow_init(fl6.flowlabel);
165 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
166 			struct ip6_flowlabel *flowlabel;
167 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
168 			if (IS_ERR(flowlabel))
169 				return -EINVAL;
170 			fl6_sock_release(flowlabel);
171 		}
172 	}
173 
174 	/*
175 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
176 	 */
177 
178 	if (ipv6_addr_any(&usin->sin6_addr)) {
179 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
180 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
181 					       &usin->sin6_addr);
182 		else
183 			usin->sin6_addr = in6addr_loopback;
184 	}
185 
186 	addr_type = ipv6_addr_type(&usin->sin6_addr);
187 
188 	if (addr_type & IPV6_ADDR_MULTICAST)
189 		return -ENETUNREACH;
190 
191 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
192 		if (addr_len >= sizeof(struct sockaddr_in6) &&
193 		    usin->sin6_scope_id) {
194 			/* If interface is set while binding, indices
195 			 * must coincide.
196 			 */
197 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
198 				return -EINVAL;
199 
200 			sk->sk_bound_dev_if = usin->sin6_scope_id;
201 		}
202 
203 		/* Connect to link-local address requires an interface */
204 		if (!sk->sk_bound_dev_if)
205 			return -EINVAL;
206 	}
207 
208 	if (tp->rx_opt.ts_recent_stamp &&
209 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
210 		tp->rx_opt.ts_recent = 0;
211 		tp->rx_opt.ts_recent_stamp = 0;
212 		WRITE_ONCE(tp->write_seq, 0);
213 	}
214 
215 	sk->sk_v6_daddr = usin->sin6_addr;
216 	np->flow_label = fl6.flowlabel;
217 
218 	/*
219 	 *	TCP over IPv4
220 	 */
221 
222 	if (addr_type & IPV6_ADDR_MAPPED) {
223 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
224 		struct sockaddr_in sin;
225 
226 		if (ipv6_only_sock(sk))
227 			return -ENETUNREACH;
228 
229 		sin.sin_family = AF_INET;
230 		sin.sin_port = usin->sin6_port;
231 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
232 
233 		/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
234 		WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
235 		if (sk_is_mptcp(sk))
236 			mptcpv6_handle_mapped(sk, true);
237 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
238 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
239 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
240 #endif
241 
242 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
243 
244 		if (err) {
245 			icsk->icsk_ext_hdr_len = exthdrlen;
246 			/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
247 			WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
248 			if (sk_is_mptcp(sk))
249 				mptcpv6_handle_mapped(sk, false);
250 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
251 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
252 			tp->af_specific = &tcp_sock_ipv6_specific;
253 #endif
254 			goto failure;
255 		}
256 		np->saddr = sk->sk_v6_rcv_saddr;
257 
258 		return err;
259 	}
260 
261 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
262 		saddr = &sk->sk_v6_rcv_saddr;
263 
264 	fl6.flowi6_proto = IPPROTO_TCP;
265 	fl6.daddr = sk->sk_v6_daddr;
266 	fl6.saddr = saddr ? *saddr : np->saddr;
267 	fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
268 	fl6.flowi6_oif = sk->sk_bound_dev_if;
269 	fl6.flowi6_mark = sk->sk_mark;
270 	fl6.fl6_dport = usin->sin6_port;
271 	fl6.fl6_sport = inet->inet_sport;
272 	if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6.fl6_sport)
273 		fl6.flowi6_flags = FLOWI_FLAG_ANY_SPORT;
274 	fl6.flowi6_uid = sk_uid(sk);
275 
276 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
277 	final_p = fl6_update_dst(&fl6, opt, &final);
278 
279 	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
280 
281 	dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
282 	if (IS_ERR(dst)) {
283 		err = PTR_ERR(dst);
284 		goto failure;
285 	}
286 
287 	tp->tcp_usec_ts = dst_tcp_usec_ts(dst);
288 	tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
289 
290 	if (!saddr) {
291 		saddr = &fl6.saddr;
292 
293 		err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
294 		if (err)
295 			goto failure;
296 	}
297 
298 	/* set the source address */
299 	np->saddr = *saddr;
300 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
301 
302 	sk->sk_gso_type = SKB_GSO_TCPV6;
303 	ip6_dst_store(sk, dst, false, false);
304 
305 	icsk->icsk_ext_hdr_len = psp_sk_overhead(sk);
306 	if (opt)
307 		icsk->icsk_ext_hdr_len += opt->opt_flen +
308 					  opt->opt_nflen;
309 
310 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
311 
312 	inet->inet_dport = usin->sin6_port;
313 
314 	tcp_set_state(sk, TCP_SYN_SENT);
315 	err = inet6_hash_connect(tcp_death_row, sk);
316 	if (err)
317 		goto late_failure;
318 
319 	sk_set_txhash(sk);
320 
321 	if (likely(!tp->repair)) {
322 		if (!tp->write_seq)
323 			WRITE_ONCE(tp->write_seq,
324 				   secure_tcpv6_seq(np->saddr.s6_addr32,
325 						    sk->sk_v6_daddr.s6_addr32,
326 						    inet->inet_sport,
327 						    inet->inet_dport));
328 		tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
329 						   sk->sk_v6_daddr.s6_addr32);
330 	}
331 
332 	if (tcp_fastopen_defer_connect(sk, &err))
333 		return err;
334 	if (err)
335 		goto late_failure;
336 
337 	err = tcp_connect(sk);
338 	if (err)
339 		goto late_failure;
340 
341 	return 0;
342 
343 late_failure:
344 	tcp_set_state(sk, TCP_CLOSE);
345 	inet_bhash2_reset_saddr(sk);
346 failure:
347 	inet->inet_dport = 0;
348 	sk->sk_route_caps = 0;
349 	return err;
350 }
351 
tcp_v6_mtu_reduced(struct sock * sk)352 static void tcp_v6_mtu_reduced(struct sock *sk)
353 {
354 	struct dst_entry *dst;
355 	u32 mtu;
356 
357 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
358 		return;
359 
360 	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
361 
362 	/* Drop requests trying to increase our current mss.
363 	 * Check done in __ip6_rt_update_pmtu() is too late.
364 	 */
365 	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
366 		return;
367 
368 	dst = inet6_csk_update_pmtu(sk, mtu);
369 	if (!dst)
370 		return;
371 
372 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
373 		tcp_sync_mss(sk, dst_mtu(dst));
374 		tcp_simple_retransmit(sk);
375 	}
376 }
377 
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)378 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
379 		u8 type, u8 code, int offset, __be32 info)
380 {
381 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
382 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
383 	struct net *net = dev_net_rcu(skb->dev);
384 	struct request_sock *fastopen;
385 	struct ipv6_pinfo *np;
386 	struct tcp_sock *tp;
387 	__u32 seq, snd_una;
388 	struct sock *sk;
389 	bool fatal;
390 	int err;
391 
392 	sk = __inet6_lookup_established(net, &hdr->daddr, th->dest,
393 					&hdr->saddr, ntohs(th->source),
394 					skb->dev->ifindex, inet6_sdif(skb));
395 
396 	if (!sk) {
397 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
398 				  ICMP6_MIB_INERRORS);
399 		return -ENOENT;
400 	}
401 
402 	if (sk->sk_state == TCP_TIME_WAIT) {
403 		/* To increase the counter of ignored icmps for TCP-AO */
404 		tcp_ao_ignore_icmp(sk, AF_INET6, type, code);
405 		inet_twsk_put(inet_twsk(sk));
406 		return 0;
407 	}
408 	seq = ntohl(th->seq);
409 	fatal = icmpv6_err_convert(type, code, &err);
410 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
411 		tcp_req_err(sk, seq, fatal);
412 		return 0;
413 	}
414 
415 	if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) {
416 		sock_put(sk);
417 		return 0;
418 	}
419 
420 	bh_lock_sock(sk);
421 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
422 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
423 
424 	if (sk->sk_state == TCP_CLOSE)
425 		goto out;
426 
427 	if (static_branch_unlikely(&ip6_min_hopcount)) {
428 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
429 		if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
430 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
431 			goto out;
432 		}
433 	}
434 
435 	tp = tcp_sk(sk);
436 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
437 	fastopen = rcu_dereference(tp->fastopen_rsk);
438 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
439 	if (sk->sk_state != TCP_LISTEN &&
440 	    !between(seq, snd_una, tp->snd_nxt)) {
441 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
442 		goto out;
443 	}
444 
445 	np = tcp_inet6_sk(sk);
446 
447 	if (type == NDISC_REDIRECT) {
448 		if (!sock_owned_by_user(sk)) {
449 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
450 
451 			if (dst)
452 				dst->ops->redirect(dst, sk, skb);
453 		}
454 		goto out;
455 	}
456 
457 	if (type == ICMPV6_PKT_TOOBIG) {
458 		u32 mtu = ntohl(info);
459 
460 		/* We are not interested in TCP_LISTEN and open_requests
461 		 * (SYN-ACKs send out by Linux are always <576bytes so
462 		 * they should go through unfragmented).
463 		 */
464 		if (sk->sk_state == TCP_LISTEN)
465 			goto out;
466 
467 		if (!ip6_sk_accept_pmtu(sk))
468 			goto out;
469 
470 		if (mtu < IPV6_MIN_MTU)
471 			goto out;
472 
473 		WRITE_ONCE(tp->mtu_info, mtu);
474 
475 		if (!sock_owned_by_user(sk))
476 			tcp_v6_mtu_reduced(sk);
477 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
478 					   &sk->sk_tsq_flags))
479 			sock_hold(sk);
480 		goto out;
481 	}
482 
483 
484 	/* Might be for an request_sock */
485 	switch (sk->sk_state) {
486 	case TCP_SYN_SENT:
487 	case TCP_SYN_RECV:
488 		/* Only in fast or simultaneous open. If a fast open socket is
489 		 * already accepted it is treated as a connected one below.
490 		 */
491 		if (fastopen && !fastopen->sk)
492 			break;
493 
494 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
495 
496 		if (!sock_owned_by_user(sk))
497 			tcp_done_with_error(sk, err);
498 		else
499 			WRITE_ONCE(sk->sk_err_soft, err);
500 		goto out;
501 	case TCP_LISTEN:
502 		break;
503 	default:
504 		/* check if this ICMP message allows revert of backoff.
505 		 * (see RFC 6069)
506 		 */
507 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
508 		    code == ICMPV6_NOROUTE)
509 			tcp_ld_RTO_revert(sk, seq);
510 	}
511 
512 	if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
513 		WRITE_ONCE(sk->sk_err, err);
514 		sk_error_report(sk);
515 	} else {
516 		WRITE_ONCE(sk->sk_err_soft, err);
517 	}
518 out:
519 	bh_unlock_sock(sk);
520 	sock_put(sk);
521 	return 0;
522 }
523 
524 
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)525 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
526 			      struct flowi *fl,
527 			      struct request_sock *req,
528 			      struct tcp_fastopen_cookie *foc,
529 			      enum tcp_synack_type synack_type,
530 			      struct sk_buff *syn_skb)
531 {
532 	struct inet_request_sock *ireq = inet_rsk(req);
533 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
534 	struct ipv6_txoptions *opt;
535 	struct flowi6 *fl6 = &fl->u.ip6;
536 	struct sk_buff *skb;
537 	int err = -ENOMEM;
538 	u8 tclass;
539 
540 	/* First, grab a route. */
541 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
542 					       IPPROTO_TCP)) == NULL)
543 		goto done;
544 
545 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
546 
547 	if (skb) {
548 		tcp_rsk(req)->syn_ect_snt = np->tclass & INET_ECN_MASK;
549 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
550 				    &ireq->ir_v6_rmt_addr);
551 
552 		fl6->daddr = ireq->ir_v6_rmt_addr;
553 		if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts)
554 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
555 
556 		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
557 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
558 				(np->tclass & INET_ECN_MASK) :
559 				np->tclass;
560 
561 		if (!INET_ECN_is_capable(tclass) &&
562 		    tcp_bpf_ca_needs_ecn((struct sock *)req))
563 			tclass |= INET_ECN_ECT_0;
564 
565 		rcu_read_lock();
566 		opt = ireq->ipv6_opt;
567 		if (!opt)
568 			opt = rcu_dereference(np->opt);
569 		err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
570 			       opt, tclass, READ_ONCE(sk->sk_priority));
571 		rcu_read_unlock();
572 		err = net_xmit_eval(err);
573 	}
574 
575 done:
576 	return err;
577 }
578 
579 
tcp_v6_reqsk_destructor(struct request_sock * req)580 static void tcp_v6_reqsk_destructor(struct request_sock *req)
581 {
582 	kfree(inet_rsk(req)->ipv6_opt);
583 	consume_skb(inet_rsk(req)->pktopts);
584 }
585 
586 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)587 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
588 						   const struct in6_addr *addr,
589 						   int l3index)
590 {
591 	return tcp_md5_do_lookup(sk, l3index,
592 				 (union tcp_md5_addr *)addr, AF_INET6);
593 }
594 
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)595 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
596 						const struct sock *addr_sk)
597 {
598 	int l3index;
599 
600 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
601 						 addr_sk->sk_bound_dev_if);
602 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
603 				    l3index);
604 }
605 
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)606 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
607 				 sockptr_t optval, int optlen)
608 {
609 	struct tcp_md5sig cmd;
610 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
611 	union tcp_ao_addr *addr;
612 	int l3index = 0;
613 	u8 prefixlen;
614 	bool l3flag;
615 	u8 flags;
616 
617 	if (optlen < sizeof(cmd))
618 		return -EINVAL;
619 
620 	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
621 		return -EFAULT;
622 
623 	if (sin6->sin6_family != AF_INET6)
624 		return -EINVAL;
625 
626 	flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
627 	l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
628 
629 	if (optname == TCP_MD5SIG_EXT &&
630 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
631 		prefixlen = cmd.tcpm_prefixlen;
632 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
633 					prefixlen > 32))
634 			return -EINVAL;
635 	} else {
636 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
637 	}
638 
639 	if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
640 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
641 		struct net_device *dev;
642 
643 		rcu_read_lock();
644 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
645 		if (dev && netif_is_l3_master(dev))
646 			l3index = dev->ifindex;
647 		rcu_read_unlock();
648 
649 		/* ok to reference set/not set outside of rcu;
650 		 * right now device MUST be an L3 master
651 		 */
652 		if (!dev || !l3index)
653 			return -EINVAL;
654 	}
655 
656 	if (!cmd.tcpm_keylen) {
657 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
658 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
659 					      AF_INET, prefixlen,
660 					      l3index, flags);
661 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
662 				      AF_INET6, prefixlen, l3index, flags);
663 	}
664 
665 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
666 		return -EINVAL;
667 
668 	if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
669 		addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3];
670 
671 		/* Don't allow keys for peers that have a matching TCP-AO key.
672 		 * See the comment in tcp_ao_add_cmd()
673 		 */
674 		if (tcp_ao_required(sk, addr, AF_INET,
675 				    l3flag ? l3index : -1, false))
676 			return -EKEYREJECTED;
677 		return tcp_md5_do_add(sk, addr,
678 				      AF_INET, prefixlen, l3index, flags,
679 				      cmd.tcpm_key, cmd.tcpm_keylen);
680 	}
681 
682 	addr = (union tcp_md5_addr *)&sin6->sin6_addr;
683 
684 	/* Don't allow keys for peers that have a matching TCP-AO key.
685 	 * See the comment in tcp_ao_add_cmd()
686 	 */
687 	if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false))
688 		return -EKEYREJECTED;
689 
690 	return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags,
691 			      cmd.tcpm_key, cmd.tcpm_keylen);
692 }
693 
tcp_v6_md5_hash_headers(struct tcp_sigpool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)694 static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp,
695 				   const struct in6_addr *daddr,
696 				   const struct in6_addr *saddr,
697 				   const struct tcphdr *th, int nbytes)
698 {
699 	struct tcp6_pseudohdr *bp;
700 	struct scatterlist sg;
701 	struct tcphdr *_th;
702 
703 	bp = hp->scratch;
704 	/* 1. TCP pseudo-header (RFC2460) */
705 	bp->saddr = *saddr;
706 	bp->daddr = *daddr;
707 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
708 	bp->len = cpu_to_be32(nbytes);
709 
710 	_th = (struct tcphdr *)(bp + 1);
711 	memcpy(_th, th, sizeof(*th));
712 	_th->check = 0;
713 
714 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
715 	ahash_request_set_crypt(hp->req, &sg, NULL,
716 				sizeof(*bp) + sizeof(*th));
717 	return crypto_ahash_update(hp->req);
718 }
719 
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)720 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
721 			       const struct in6_addr *daddr, struct in6_addr *saddr,
722 			       const struct tcphdr *th)
723 {
724 	struct tcp_sigpool hp;
725 
726 	if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
727 		goto clear_hash_nostart;
728 
729 	if (crypto_ahash_init(hp.req))
730 		goto clear_hash;
731 	if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2))
732 		goto clear_hash;
733 	if (tcp_md5_hash_key(&hp, key))
734 		goto clear_hash;
735 	ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
736 	if (crypto_ahash_final(hp.req))
737 		goto clear_hash;
738 
739 	tcp_sigpool_end(&hp);
740 	return 0;
741 
742 clear_hash:
743 	tcp_sigpool_end(&hp);
744 clear_hash_nostart:
745 	memset(md5_hash, 0, 16);
746 	return 1;
747 }
748 
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)749 static int tcp_v6_md5_hash_skb(char *md5_hash,
750 			       const struct tcp_md5sig_key *key,
751 			       const struct sock *sk,
752 			       const struct sk_buff *skb)
753 {
754 	const struct tcphdr *th = tcp_hdr(skb);
755 	const struct in6_addr *saddr, *daddr;
756 	struct tcp_sigpool hp;
757 
758 	if (sk) { /* valid for establish/request sockets */
759 		saddr = &sk->sk_v6_rcv_saddr;
760 		daddr = &sk->sk_v6_daddr;
761 	} else {
762 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
763 		saddr = &ip6h->saddr;
764 		daddr = &ip6h->daddr;
765 	}
766 
767 	if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
768 		goto clear_hash_nostart;
769 
770 	if (crypto_ahash_init(hp.req))
771 		goto clear_hash;
772 
773 	if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len))
774 		goto clear_hash;
775 	if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2))
776 		goto clear_hash;
777 	if (tcp_md5_hash_key(&hp, key))
778 		goto clear_hash;
779 	ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
780 	if (crypto_ahash_final(hp.req))
781 		goto clear_hash;
782 
783 	tcp_sigpool_end(&hp);
784 	return 0;
785 
786 clear_hash:
787 	tcp_sigpool_end(&hp);
788 clear_hash_nostart:
789 	memset(md5_hash, 0, 16);
790 	return 1;
791 }
792 #endif
793 
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb,u32 tw_isn)794 static void tcp_v6_init_req(struct request_sock *req,
795 			    const struct sock *sk_listener,
796 			    struct sk_buff *skb,
797 			    u32 tw_isn)
798 {
799 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
800 	struct inet_request_sock *ireq = inet_rsk(req);
801 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
802 
803 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
804 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
805 	ireq->ir_rmt_addr = LOOPBACK4_IPV6;
806 	ireq->ir_loc_addr = LOOPBACK4_IPV6;
807 
808 	/* So that link locals have meaning */
809 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
810 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
811 		ireq->ir_iif = tcp_v6_iif(skb);
812 
813 	if (!tw_isn &&
814 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
815 	     np->rxopt.bits.rxinfo ||
816 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
817 	     np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) {
818 		refcount_inc(&skb->users);
819 		ireq->pktopts = skb;
820 	}
821 }
822 
tcp_v6_route_req(const struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct request_sock * req,u32 tw_isn)823 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
824 					  struct sk_buff *skb,
825 					  struct flowi *fl,
826 					  struct request_sock *req,
827 					  u32 tw_isn)
828 {
829 	tcp_v6_init_req(req, sk, skb, tw_isn);
830 
831 	if (security_inet_conn_request(sk, skb, req))
832 		return NULL;
833 
834 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
835 }
836 
837 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
838 	.family		=	AF_INET6,
839 	.obj_size	=	sizeof(struct tcp6_request_sock),
840 	.send_ack	=	tcp_v6_reqsk_send_ack,
841 	.destructor	=	tcp_v6_reqsk_destructor,
842 	.send_reset	=	tcp_v6_send_reset,
843 	.syn_ack_timeout =	tcp_syn_ack_timeout,
844 };
845 
846 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
847 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
848 				sizeof(struct ipv6hdr),
849 #ifdef CONFIG_TCP_MD5SIG
850 	.req_md5_lookup	=	tcp_v6_md5_lookup,
851 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
852 #endif
853 #ifdef CONFIG_TCP_AO
854 	.ao_lookup	=	tcp_v6_ao_lookup_rsk,
855 	.ao_calc_key	=	tcp_v6_ao_calc_key_rsk,
856 	.ao_synack_hash =	tcp_v6_ao_synack_hash,
857 #endif
858 #ifdef CONFIG_SYN_COOKIES
859 	.cookie_init_seq =	cookie_v6_init_sequence,
860 #endif
861 	.route_req	=	tcp_v6_route_req,
862 	.init_seq	=	tcp_v6_init_seq,
863 	.init_ts_off	=	tcp_v6_init_ts_off,
864 	.send_synack	=	tcp_v6_send_synack,
865 };
866 
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,int rst,u8 tclass,__be32 label,u32 priority,u32 txhash,struct tcp_key * key)867 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
868 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
869 				 int oif, int rst, u8 tclass, __be32 label,
870 				 u32 priority, u32 txhash, struct tcp_key *key)
871 {
872 	struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb);
873 	unsigned int tot_len = sizeof(struct tcphdr);
874 	struct sock *ctl_sk = net->ipv6.tcp_sk;
875 	const struct tcphdr *th = tcp_hdr(skb);
876 	__be32 mrst = 0, *topt;
877 	struct dst_entry *dst;
878 	struct sk_buff *buff;
879 	struct tcphdr *t1;
880 	struct flowi6 fl6;
881 	u32 mark = 0;
882 
883 	if (tsecr)
884 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
885 	if (tcp_key_is_md5(key))
886 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
887 	if (tcp_key_is_ao(key))
888 		tot_len += tcp_ao_len_aligned(key->ao_key);
889 
890 #ifdef CONFIG_MPTCP
891 	if (rst && !tcp_key_is_md5(key)) {
892 		mrst = mptcp_reset_option(skb);
893 
894 		if (mrst)
895 			tot_len += sizeof(__be32);
896 	}
897 #endif
898 
899 	buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
900 	if (!buff)
901 		return;
902 
903 	skb_reserve(buff, MAX_TCP_HEADER);
904 
905 	t1 = skb_push(buff, tot_len);
906 	skb_reset_transport_header(buff);
907 
908 	/* Swap the send and the receive. */
909 	memset(t1, 0, sizeof(*t1));
910 	t1->dest = th->source;
911 	t1->source = th->dest;
912 	t1->doff = tot_len / 4;
913 	t1->seq = htonl(seq);
914 	t1->ack_seq = htonl(ack);
915 	t1->ack = !rst || !th->ack;
916 	t1->rst = rst;
917 	t1->window = htons(win);
918 
919 	topt = (__be32 *)(t1 + 1);
920 
921 	if (tsecr) {
922 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
923 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
924 		*topt++ = htonl(tsval);
925 		*topt++ = htonl(tsecr);
926 	}
927 
928 	if (mrst)
929 		*topt++ = mrst;
930 
931 #ifdef CONFIG_TCP_MD5SIG
932 	if (tcp_key_is_md5(key)) {
933 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
934 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
935 		tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key,
936 				    &ipv6_hdr(skb)->saddr,
937 				    &ipv6_hdr(skb)->daddr, t1);
938 	}
939 #endif
940 #ifdef CONFIG_TCP_AO
941 	if (tcp_key_is_ao(key)) {
942 		*topt++ = htonl((TCPOPT_AO << 24) |
943 				(tcp_ao_len(key->ao_key) << 16) |
944 				(key->ao_key->sndid << 8) |
945 				(key->rcv_next));
946 
947 		tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key,
948 				key->traffic_key,
949 				(union tcp_ao_addr *)&ipv6_hdr(skb)->saddr,
950 				(union tcp_ao_addr *)&ipv6_hdr(skb)->daddr,
951 				t1, key->sne);
952 	}
953 #endif
954 
955 	memset(&fl6, 0, sizeof(fl6));
956 	fl6.daddr = ipv6_hdr(skb)->saddr;
957 	fl6.saddr = ipv6_hdr(skb)->daddr;
958 	fl6.flowlabel = label;
959 
960 	buff->ip_summed = CHECKSUM_PARTIAL;
961 
962 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
963 
964 	fl6.flowi6_proto = IPPROTO_TCP;
965 	if (rt6_need_strict(&fl6.daddr) && !oif)
966 		fl6.flowi6_oif = tcp_v6_iif(skb);
967 	else {
968 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
969 			oif = skb->skb_iif;
970 
971 		fl6.flowi6_oif = oif;
972 	}
973 
974 	if (sk) {
975 		/* unconstify the socket only to attach it to buff with care. */
976 		skb_set_owner_edemux(buff, (struct sock *)sk);
977 		psp_reply_set_decrypted(sk, buff);
978 
979 		if (sk->sk_state == TCP_TIME_WAIT)
980 			mark = inet_twsk(sk)->tw_mark;
981 		else
982 			mark = READ_ONCE(sk->sk_mark);
983 		skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC);
984 	}
985 	if (txhash) {
986 		/* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
987 		skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
988 	}
989 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
990 	fl6.fl6_dport = t1->dest;
991 	fl6.fl6_sport = t1->source;
992 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
993 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
994 
995 	/* Pass a socket to ip6_dst_lookup either it is for RST
996 	 * Underlying function will use this to retrieve the network
997 	 * namespace
998 	 */
999 	if (sk && sk->sk_state != TCP_TIME_WAIT)
1000 		dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
1001 	else
1002 		dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
1003 	if (!IS_ERR(dst)) {
1004 		skb_dst_set(buff, dst);
1005 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1006 			 tclass, priority);
1007 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1008 		if (rst)
1009 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1010 		return;
1011 	}
1012 
1013 	kfree_skb(buff);
1014 }
1015 
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb,enum sk_rst_reason reason)1016 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
1017 			      enum sk_rst_reason reason)
1018 {
1019 	const struct tcphdr *th = tcp_hdr(skb);
1020 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1021 	const __u8 *md5_hash_location = NULL;
1022 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1023 	bool allocated_traffic_key = false;
1024 #endif
1025 	const struct tcp_ao_hdr *aoh;
1026 	struct tcp_key key = {};
1027 	u32 seq = 0, ack_seq = 0;
1028 	__be32 label = 0;
1029 	u32 priority = 0;
1030 	struct net *net;
1031 	u32 txhash = 0;
1032 	int oif = 0;
1033 #ifdef CONFIG_TCP_MD5SIG
1034 	unsigned char newhash[16];
1035 	int genhash;
1036 	struct sock *sk1 = NULL;
1037 #endif
1038 
1039 	if (th->rst)
1040 		return;
1041 
1042 	/* If sk not NULL, it means we did a successful lookup and incoming
1043 	 * route had to be correct. prequeue might have dropped our dst.
1044 	 */
1045 	if (!sk && !ipv6_unicast_destination(skb))
1046 		return;
1047 
1048 	net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb);
1049 	/* Invalid TCP option size or twice included auth */
1050 	if (tcp_parse_auth_options(th, &md5_hash_location, &aoh))
1051 		return;
1052 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1053 	rcu_read_lock();
1054 #endif
1055 #ifdef CONFIG_TCP_MD5SIG
1056 	if (sk && sk_fullsock(sk)) {
1057 		int l3index;
1058 
1059 		/* sdif set, means packet ingressed via a device
1060 		 * in an L3 domain and inet_iif is set to it.
1061 		 */
1062 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1063 		key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1064 		if (key.md5_key)
1065 			key.type = TCP_KEY_MD5;
1066 	} else if (md5_hash_location) {
1067 		int dif = tcp_v6_iif_l3_slave(skb);
1068 		int sdif = tcp_v6_sdif(skb);
1069 		int l3index;
1070 
1071 		/*
1072 		 * active side is lost. Try to find listening socket through
1073 		 * source port, and then find md5 key through listening socket.
1074 		 * we are not loose security here:
1075 		 * Incoming packet is checked with md5 hash with finding key,
1076 		 * no RST generated if md5 hash doesn't match.
1077 		 */
1078 		sk1 = inet6_lookup_listener(net, NULL, 0, &ipv6h->saddr, th->source,
1079 					    &ipv6h->daddr, ntohs(th->source),
1080 					    dif, sdif);
1081 		if (!sk1)
1082 			goto out;
1083 
1084 		/* sdif set, means packet ingressed via a device
1085 		 * in an L3 domain and dif is set to it.
1086 		 */
1087 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1088 
1089 		key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1090 		if (!key.md5_key)
1091 			goto out;
1092 		key.type = TCP_KEY_MD5;
1093 
1094 		genhash = tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb);
1095 		if (genhash || memcmp(md5_hash_location, newhash, 16) != 0)
1096 			goto out;
1097 	}
1098 #endif
1099 
1100 	if (th->ack)
1101 		seq = ntohl(th->ack_seq);
1102 	else
1103 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1104 			  (th->doff << 2);
1105 
1106 #ifdef CONFIG_TCP_AO
1107 	if (aoh) {
1108 		int l3index;
1109 
1110 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1111 		if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq,
1112 					 &key.ao_key, &key.traffic_key,
1113 					 &allocated_traffic_key,
1114 					 &key.rcv_next, &key.sne))
1115 			goto out;
1116 		key.type = TCP_KEY_AO;
1117 	}
1118 #endif
1119 
1120 	if (sk) {
1121 		oif = sk->sk_bound_dev_if;
1122 		if (sk_fullsock(sk)) {
1123 			if (inet6_test_bit(REPFLOW, sk))
1124 				label = ip6_flowlabel(ipv6h);
1125 			priority = READ_ONCE(sk->sk_priority);
1126 			txhash = sk->sk_txhash;
1127 		}
1128 		if (sk->sk_state == TCP_TIME_WAIT) {
1129 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1130 			priority = inet_twsk(sk)->tw_priority;
1131 			txhash = inet_twsk(sk)->tw_txhash;
1132 		}
1133 	} else {
1134 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1135 			label = ip6_flowlabel(ipv6h);
1136 	}
1137 
1138 	trace_tcp_send_reset(sk, skb, reason);
1139 
1140 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
1141 			     ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK,
1142 			     label, priority, txhash,
1143 			     &key);
1144 
1145 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1146 out:
1147 	if (allocated_traffic_key)
1148 		kfree(key.traffic_key);
1149 	rcu_read_unlock();
1150 #endif
1151 }
1152 
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_key * key,u8 tclass,__be32 label,u32 priority,u32 txhash)1153 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1154 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1155 			    struct tcp_key *key, u8 tclass,
1156 			    __be32 label, u32 priority, u32 txhash)
1157 {
1158 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0,
1159 			     tclass, label, priority, txhash, key);
1160 }
1161 
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb,enum tcp_tw_status tw_status)1162 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb,
1163 				enum tcp_tw_status tw_status)
1164 {
1165 	struct inet_timewait_sock *tw = inet_twsk(sk);
1166 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1167 	u8 tclass = tw->tw_tclass;
1168 	struct tcp_key key = {};
1169 
1170 	if (tw_status == TCP_TW_ACK_OOW)
1171 		tclass &= ~INET_ECN_MASK;
1172 #ifdef CONFIG_TCP_AO
1173 	struct tcp_ao_info *ao_info;
1174 
1175 	if (static_branch_unlikely(&tcp_ao_needed.key)) {
1176 
1177 		/* FIXME: the segment to-be-acked is not verified yet */
1178 		ao_info = rcu_dereference(tcptw->ao_info);
1179 		if (ao_info) {
1180 			const struct tcp_ao_hdr *aoh;
1181 
1182 			/* Invalid TCP option size or twice included auth */
1183 			if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
1184 				goto out;
1185 			if (aoh)
1186 				key.ao_key = tcp_ao_established_key(sk, ao_info,
1187 								    aoh->rnext_keyid, -1);
1188 		}
1189 	}
1190 	if (key.ao_key) {
1191 		struct tcp_ao_key *rnext_key;
1192 
1193 		key.traffic_key = snd_other_key(key.ao_key);
1194 		/* rcv_next switches to our rcv_next */
1195 		rnext_key = READ_ONCE(ao_info->rnext_key);
1196 		key.rcv_next = rnext_key->rcvid;
1197 		key.sne = READ_ONCE(ao_info->snd_sne);
1198 		key.type = TCP_KEY_AO;
1199 #else
1200 	if (0) {
1201 #endif
1202 #ifdef CONFIG_TCP_MD5SIG
1203 	} else if (static_branch_unlikely(&tcp_md5_needed.key)) {
1204 		key.md5_key = tcp_twsk_md5_key(tcptw);
1205 		if (key.md5_key)
1206 			key.type = TCP_KEY_MD5;
1207 #endif
1208 	}
1209 
1210 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt,
1211 			READ_ONCE(tcptw->tw_rcv_nxt),
1212 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1213 			tcp_tw_tsval(tcptw),
1214 			READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if,
1215 			&key, tclass, cpu_to_be32(tw->tw_flowlabel),
1216 			tw->tw_priority, tw->tw_txhash);
1217 
1218 #ifdef CONFIG_TCP_AO
1219 out:
1220 #endif
1221 	inet_twsk_put(tw);
1222 }
1223 
1224 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1225 				  struct request_sock *req)
1226 {
1227 	struct tcp_key key = {};
1228 
1229 #ifdef CONFIG_TCP_AO
1230 	if (static_branch_unlikely(&tcp_ao_needed.key) &&
1231 	    tcp_rsk_used_ao(req)) {
1232 		const struct in6_addr *addr = &ipv6_hdr(skb)->saddr;
1233 		const struct tcp_ao_hdr *aoh;
1234 		int l3index;
1235 
1236 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1237 		/* Invalid TCP option size or twice included auth */
1238 		if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
1239 			return;
1240 		if (!aoh)
1241 			return;
1242 		key.ao_key = tcp_ao_do_lookup(sk, l3index,
1243 					      (union tcp_ao_addr *)addr,
1244 					      AF_INET6, aoh->rnext_keyid, -1);
1245 		if (unlikely(!key.ao_key)) {
1246 			/* Send ACK with any matching MKT for the peer */
1247 			key.ao_key = tcp_ao_do_lookup(sk, l3index,
1248 						      (union tcp_ao_addr *)addr,
1249 						      AF_INET6, -1, -1);
1250 			/* Matching key disappeared (user removed the key?)
1251 			 * let the handshake timeout.
1252 			 */
1253 			if (!key.ao_key) {
1254 				net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n",
1255 						     addr,
1256 						     ntohs(tcp_hdr(skb)->source),
1257 						     &ipv6_hdr(skb)->daddr,
1258 						     ntohs(tcp_hdr(skb)->dest));
1259 				return;
1260 			}
1261 		}
1262 		key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC);
1263 		if (!key.traffic_key)
1264 			return;
1265 
1266 		key.type = TCP_KEY_AO;
1267 		key.rcv_next = aoh->keyid;
1268 		tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req);
1269 #else
1270 	if (0) {
1271 #endif
1272 #ifdef CONFIG_TCP_MD5SIG
1273 	} else if (static_branch_unlikely(&tcp_md5_needed.key)) {
1274 		int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1275 
1276 		key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr,
1277 						   l3index);
1278 		if (key.md5_key)
1279 			key.type = TCP_KEY_MD5;
1280 #endif
1281 	}
1282 
1283 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1284 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1285 	 */
1286 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1287 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1288 			tcp_rsk(req)->rcv_nxt,
1289 			tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
1290 			tcp_rsk_tsval(tcp_rsk(req)),
1291 			req->ts_recent, sk->sk_bound_dev_if,
1292 			&key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK,
1293 			0,
1294 			READ_ONCE(sk->sk_priority),
1295 			READ_ONCE(tcp_rsk(req)->txhash));
1296 	if (tcp_key_is_ao(&key))
1297 		kfree(key.traffic_key);
1298 }
1299 
1300 
1301 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1302 {
1303 #ifdef CONFIG_SYN_COOKIES
1304 	const struct tcphdr *th = tcp_hdr(skb);
1305 
1306 	if (!th->syn)
1307 		sk = cookie_v6_check(sk, skb);
1308 #endif
1309 	return sk;
1310 }
1311 
1312 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1313 			 struct tcphdr *th, u32 *cookie)
1314 {
1315 	u16 mss = 0;
1316 #ifdef CONFIG_SYN_COOKIES
1317 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1318 				    &tcp_request_sock_ipv6_ops, sk, th);
1319 	if (mss) {
1320 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1321 		tcp_synq_overflow(sk);
1322 	}
1323 #endif
1324 	return mss;
1325 }
1326 
1327 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1328 {
1329 	if (skb->protocol == htons(ETH_P_IP))
1330 		return tcp_v4_conn_request(sk, skb);
1331 
1332 	if (!ipv6_unicast_destination(skb))
1333 		goto drop;
1334 
1335 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1336 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1337 		return 0;
1338 	}
1339 
1340 	return tcp_conn_request(&tcp6_request_sock_ops,
1341 				&tcp_request_sock_ipv6_ops, sk, skb);
1342 
1343 drop:
1344 	tcp_listendrop(sk);
1345 	return 0; /* don't send reset */
1346 }
1347 
1348 static void tcp_v6_restore_cb(struct sk_buff *skb)
1349 {
1350 	/* We need to move header back to the beginning if xfrm6_policy_check()
1351 	 * and tcp_v6_fill_cb() are going to be called again.
1352 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1353 	 */
1354 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1355 		sizeof(struct inet6_skb_parm));
1356 }
1357 
1358 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1359 					 struct request_sock *req,
1360 					 struct dst_entry *dst,
1361 					 struct request_sock *req_unhash,
1362 					 bool *own_req)
1363 {
1364 	struct inet_request_sock *ireq;
1365 	struct ipv6_pinfo *newnp;
1366 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1367 	struct ipv6_txoptions *opt;
1368 	struct inet_sock *newinet;
1369 	bool found_dup_sk = false;
1370 	struct tcp_sock *newtp;
1371 	struct sock *newsk;
1372 #ifdef CONFIG_TCP_MD5SIG
1373 	struct tcp_md5sig_key *key;
1374 	int l3index;
1375 #endif
1376 	struct flowi6 fl6;
1377 
1378 	if (skb->protocol == htons(ETH_P_IP)) {
1379 		/*
1380 		 *	v6 mapped
1381 		 */
1382 
1383 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1384 					     req_unhash, own_req);
1385 
1386 		if (!newsk)
1387 			return NULL;
1388 
1389 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1390 
1391 		newnp = tcp_inet6_sk(newsk);
1392 		newtp = tcp_sk(newsk);
1393 
1394 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1395 
1396 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1397 
1398 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1399 		if (sk_is_mptcp(newsk))
1400 			mptcpv6_handle_mapped(newsk, true);
1401 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1402 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1403 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1404 #endif
1405 
1406 		newnp->ipv6_mc_list = NULL;
1407 		newnp->ipv6_ac_list = NULL;
1408 		newnp->ipv6_fl_list = NULL;
1409 		newnp->pktoptions  = NULL;
1410 		newnp->opt	   = NULL;
1411 		newnp->mcast_oif   = inet_iif(skb);
1412 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1413 		newnp->rcv_flowinfo = 0;
1414 		if (inet6_test_bit(REPFLOW, sk))
1415 			newnp->flow_label = 0;
1416 
1417 		/*
1418 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1419 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1420 		 * that function for the gory details. -acme
1421 		 */
1422 
1423 		/* It is tricky place. Until this moment IPv4 tcp
1424 		   worked with IPv6 icsk.icsk_af_ops.
1425 		   Sync it now.
1426 		 */
1427 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1428 
1429 		return newsk;
1430 	}
1431 
1432 	ireq = inet_rsk(req);
1433 
1434 	if (sk_acceptq_is_full(sk))
1435 		goto exit_overflow;
1436 
1437 	if (!dst) {
1438 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1439 		if (!dst)
1440 			goto exit;
1441 	}
1442 
1443 	newsk = tcp_create_openreq_child(sk, req, skb);
1444 	if (!newsk)
1445 		goto exit_nonewsk;
1446 
1447 	/*
1448 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1449 	 * count here, tcp_create_openreq_child now does this for us, see the
1450 	 * comment in that function for the gory details. -acme
1451 	 */
1452 
1453 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1454 	inet6_sk_rx_dst_set(newsk, skb);
1455 
1456 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1457 
1458 	newtp = tcp_sk(newsk);
1459 	newinet = inet_sk(newsk);
1460 	newnp = tcp_inet6_sk(newsk);
1461 
1462 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1463 
1464 	ip6_dst_store(newsk, dst, false, false);
1465 
1466 	newnp->saddr = ireq->ir_v6_loc_addr;
1467 
1468 	/* Now IPv6 options...
1469 
1470 	   First: no IPv4 options.
1471 	 */
1472 	newinet->inet_opt = NULL;
1473 	newnp->ipv6_mc_list = NULL;
1474 	newnp->ipv6_ac_list = NULL;
1475 	newnp->ipv6_fl_list = NULL;
1476 
1477 	/* Clone RX bits */
1478 	newnp->rxopt.all = np->rxopt.all;
1479 
1480 	newnp->pktoptions = NULL;
1481 	newnp->opt	  = NULL;
1482 	newnp->mcast_oif  = tcp_v6_iif(skb);
1483 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1484 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1485 	if (inet6_test_bit(REPFLOW, sk))
1486 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1487 
1488 	/* Set ToS of the new socket based upon the value of incoming SYN.
1489 	 * ECT bits are set later in tcp_init_transfer().
1490 	 */
1491 	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1492 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1493 
1494 	/* Clone native IPv6 options from listening socket (if any)
1495 
1496 	   Yes, keeping reference count would be much more clever,
1497 	   but we make one more one thing there: reattach optmem
1498 	   to newsk.
1499 	 */
1500 	opt = ireq->ipv6_opt;
1501 	if (!opt)
1502 		opt = rcu_dereference(np->opt);
1503 	if (opt) {
1504 		opt = ipv6_dup_options(newsk, opt);
1505 		RCU_INIT_POINTER(newnp->opt, opt);
1506 	}
1507 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1508 	if (opt)
1509 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1510 						    opt->opt_flen;
1511 
1512 	tcp_ca_openreq_child(newsk, dst);
1513 
1514 	tcp_sync_mss(newsk, dst_mtu(dst));
1515 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1516 
1517 	tcp_initialize_rcv_mss(newsk);
1518 
1519 #ifdef CONFIG_TCP_MD5SIG
1520 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1521 
1522 	if (!tcp_rsk_used_ao(req)) {
1523 		/* Copy over the MD5 key from the original socket */
1524 		key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1525 		if (key) {
1526 			const union tcp_md5_addr *addr;
1527 
1528 			addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1529 			if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key))
1530 				goto put_and_exit;
1531 		}
1532 	}
1533 #endif
1534 #ifdef CONFIG_TCP_AO
1535 	/* Copy over tcp_ao_info if any */
1536 	if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6))
1537 		goto put_and_exit; /* OOM */
1538 #endif
1539 
1540 	if (__inet_inherit_port(sk, newsk) < 0)
1541 		goto put_and_exit;
1542 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1543 				       &found_dup_sk);
1544 	if (*own_req) {
1545 		tcp_move_syn(newtp, req);
1546 
1547 		/* Clone pktoptions received with SYN, if we own the req */
1548 		if (ireq->pktopts) {
1549 			newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1550 			consume_skb(ireq->pktopts);
1551 			ireq->pktopts = NULL;
1552 			if (newnp->pktoptions)
1553 				tcp_v6_restore_cb(newnp->pktoptions);
1554 		}
1555 	} else {
1556 		if (!req_unhash && found_dup_sk) {
1557 			/* This code path should only be executed in the
1558 			 * syncookie case only
1559 			 */
1560 			bh_unlock_sock(newsk);
1561 			sock_put(newsk);
1562 			newsk = NULL;
1563 		}
1564 	}
1565 
1566 	return newsk;
1567 
1568 exit_overflow:
1569 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1570 exit_nonewsk:
1571 	dst_release(dst);
1572 exit:
1573 	tcp_listendrop(sk);
1574 	return NULL;
1575 put_and_exit:
1576 	inet_csk_prepare_forced_close(newsk);
1577 	tcp_done(newsk);
1578 	goto exit;
1579 }
1580 
1581 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1582 							   u32));
1583 /* The socket must have it's spinlock held when we get
1584  * here, unless it is a TCP_LISTEN socket.
1585  *
1586  * We have a potential double-lock case here, so even when
1587  * doing backlog processing we use the BH locking scheme.
1588  * This is because we cannot sleep with the original spinlock
1589  * held.
1590  */
1591 INDIRECT_CALLABLE_SCOPE
1592 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1593 {
1594 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1595 	struct sk_buff *opt_skb = NULL;
1596 	enum skb_drop_reason reason;
1597 	struct tcp_sock *tp;
1598 
1599 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1600 	   goes to IPv4 receive handler and backlogged.
1601 	   From backlog it always goes here. Kerboom...
1602 	   Fortunately, tcp_rcv_established and rcv_established
1603 	   handle them correctly, but it is not case with
1604 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1605 	 */
1606 
1607 	if (skb->protocol == htons(ETH_P_IP))
1608 		return tcp_v4_do_rcv(sk, skb);
1609 
1610 	reason = psp_sk_rx_policy_check(sk, skb);
1611 	if (reason)
1612 		goto err_discard;
1613 
1614 	/*
1615 	 *	socket locking is here for SMP purposes as backlog rcv
1616 	 *	is currently called with bh processing disabled.
1617 	 */
1618 
1619 	/* Do Stevens' IPV6_PKTOPTIONS.
1620 
1621 	   Yes, guys, it is the only place in our code, where we
1622 	   may make it not affecting IPv4.
1623 	   The rest of code is protocol independent,
1624 	   and I do not like idea to uglify IPv4.
1625 
1626 	   Actually, all the idea behind IPV6_PKTOPTIONS
1627 	   looks not very well thought. For now we latch
1628 	   options, received in the last packet, enqueued
1629 	   by tcp. Feel free to propose better solution.
1630 					       --ANK (980728)
1631 	 */
1632 	if (np->rxopt.all && sk->sk_state != TCP_LISTEN)
1633 		opt_skb = skb_clone_and_charge_r(skb, sk);
1634 
1635 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1636 		struct dst_entry *dst;
1637 
1638 		dst = rcu_dereference_protected(sk->sk_rx_dst,
1639 						lockdep_sock_is_held(sk));
1640 
1641 		sock_rps_save_rxhash(sk, skb);
1642 		sk_mark_napi_id(sk, skb);
1643 		if (dst) {
1644 			if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1645 			    INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1646 					    dst, sk->sk_rx_dst_cookie) == NULL) {
1647 				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1648 				dst_release(dst);
1649 			}
1650 		}
1651 
1652 		tcp_rcv_established(sk, skb);
1653 		if (opt_skb)
1654 			goto ipv6_pktoptions;
1655 		return 0;
1656 	}
1657 
1658 	if (tcp_checksum_complete(skb))
1659 		goto csum_err;
1660 
1661 	if (sk->sk_state == TCP_LISTEN) {
1662 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1663 
1664 		if (nsk != sk) {
1665 			if (nsk) {
1666 				reason = tcp_child_process(sk, nsk, skb);
1667 				if (reason)
1668 					goto reset;
1669 			}
1670 			return 0;
1671 		}
1672 	} else
1673 		sock_rps_save_rxhash(sk, skb);
1674 
1675 	reason = tcp_rcv_state_process(sk, skb);
1676 	if (reason)
1677 		goto reset;
1678 	if (opt_skb)
1679 		goto ipv6_pktoptions;
1680 	return 0;
1681 
1682 reset:
1683 	tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason));
1684 discard:
1685 	if (opt_skb)
1686 		__kfree_skb(opt_skb);
1687 	sk_skb_reason_drop(sk, skb, reason);
1688 	return 0;
1689 csum_err:
1690 	reason = SKB_DROP_REASON_TCP_CSUM;
1691 	trace_tcp_bad_csum(skb);
1692 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1693 err_discard:
1694 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1695 	goto discard;
1696 
1697 
1698 ipv6_pktoptions:
1699 	/* Do you ask, what is it?
1700 
1701 	   1. skb was enqueued by tcp.
1702 	   2. skb is added to tail of read queue, rather than out of order.
1703 	   3. socket is not in passive state.
1704 	   4. Finally, it really contains options, which user wants to receive.
1705 	 */
1706 	tp = tcp_sk(sk);
1707 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1708 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1709 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1710 			WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb));
1711 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1712 			WRITE_ONCE(np->mcast_hops,
1713 				   ipv6_hdr(opt_skb)->hop_limit);
1714 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1715 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1716 		if (inet6_test_bit(REPFLOW, sk))
1717 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1718 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1719 			tcp_v6_restore_cb(opt_skb);
1720 			opt_skb = xchg(&np->pktoptions, opt_skb);
1721 		} else {
1722 			__kfree_skb(opt_skb);
1723 			opt_skb = xchg(&np->pktoptions, NULL);
1724 		}
1725 	}
1726 
1727 	consume_skb(opt_skb);
1728 	return 0;
1729 }
1730 
1731 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1732 			   const struct tcphdr *th)
1733 {
1734 	/* This is tricky: we move IP6CB at its correct location into
1735 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1736 	 * _decode_session6() uses IP6CB().
1737 	 * barrier() makes sure compiler won't play aliasing games.
1738 	 */
1739 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1740 		sizeof(struct inet6_skb_parm));
1741 	barrier();
1742 
1743 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1744 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1745 				    skb->len - th->doff*4);
1746 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1747 	TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th);
1748 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1749 	TCP_SKB_CB(skb)->sacked = 0;
1750 	TCP_SKB_CB(skb)->has_rxtstamp =
1751 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1752 }
1753 
1754 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1755 {
1756 	struct net *net = dev_net_rcu(skb->dev);
1757 	enum skb_drop_reason drop_reason;
1758 	enum tcp_tw_status tw_status;
1759 	int sdif = inet6_sdif(skb);
1760 	int dif = inet6_iif(skb);
1761 	const struct tcphdr *th;
1762 	const struct ipv6hdr *hdr;
1763 	struct sock *sk = NULL;
1764 	bool refcounted;
1765 	int ret;
1766 	u32 isn;
1767 
1768 	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1769 	if (skb->pkt_type != PACKET_HOST)
1770 		goto discard_it;
1771 
1772 	/*
1773 	 *	Count it even if it's bad.
1774 	 */
1775 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1776 
1777 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1778 		goto discard_it;
1779 
1780 	th = (const struct tcphdr *)skb->data;
1781 
1782 	if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1783 		drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1784 		goto bad_packet;
1785 	}
1786 	if (!pskb_may_pull(skb, th->doff*4))
1787 		goto discard_it;
1788 
1789 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1790 		goto csum_error;
1791 
1792 	th = (const struct tcphdr *)skb->data;
1793 	hdr = ipv6_hdr(skb);
1794 
1795 lookup:
1796 	sk = __inet6_lookup_skb(skb, __tcp_hdrlen(th),
1797 				th->source, th->dest, inet6_iif(skb), sdif,
1798 				&refcounted);
1799 	if (!sk)
1800 		goto no_tcp_socket;
1801 
1802 	if (sk->sk_state == TCP_TIME_WAIT)
1803 		goto do_time_wait;
1804 
1805 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1806 		struct request_sock *req = inet_reqsk(sk);
1807 		bool req_stolen = false;
1808 		struct sock *nsk;
1809 
1810 		sk = req->rsk_listener;
1811 		if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1812 			drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1813 		else
1814 			drop_reason = tcp_inbound_hash(sk, req, skb,
1815 						       &hdr->saddr, &hdr->daddr,
1816 						       AF_INET6, dif, sdif);
1817 		if (drop_reason) {
1818 			sk_drops_skbadd(sk, skb);
1819 			reqsk_put(req);
1820 			goto discard_it;
1821 		}
1822 		if (tcp_checksum_complete(skb)) {
1823 			reqsk_put(req);
1824 			goto csum_error;
1825 		}
1826 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1827 			nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1828 			if (!nsk) {
1829 				inet_csk_reqsk_queue_drop_and_put(sk, req);
1830 				goto lookup;
1831 			}
1832 			sk = nsk;
1833 			/* reuseport_migrate_sock() has already held one sk_refcnt
1834 			 * before returning.
1835 			 */
1836 		} else {
1837 			sock_hold(sk);
1838 		}
1839 		refcounted = true;
1840 		nsk = NULL;
1841 		if (!tcp_filter(sk, skb, &drop_reason)) {
1842 			th = (const struct tcphdr *)skb->data;
1843 			hdr = ipv6_hdr(skb);
1844 			tcp_v6_fill_cb(skb, hdr, th);
1845 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen,
1846 					    &drop_reason);
1847 		}
1848 		if (!nsk) {
1849 			reqsk_put(req);
1850 			if (req_stolen) {
1851 				/* Another cpu got exclusive access to req
1852 				 * and created a full blown socket.
1853 				 * Try to feed this packet to this socket
1854 				 * instead of discarding it.
1855 				 */
1856 				tcp_v6_restore_cb(skb);
1857 				sock_put(sk);
1858 				goto lookup;
1859 			}
1860 			goto discard_and_relse;
1861 		}
1862 		nf_reset_ct(skb);
1863 		if (nsk == sk) {
1864 			reqsk_put(req);
1865 			tcp_v6_restore_cb(skb);
1866 		} else {
1867 			drop_reason = tcp_child_process(sk, nsk, skb);
1868 			if (drop_reason) {
1869 				enum sk_rst_reason rst_reason;
1870 
1871 				rst_reason = sk_rst_convert_drop_reason(drop_reason);
1872 				tcp_v6_send_reset(nsk, skb, rst_reason);
1873 				goto discard_and_relse;
1874 			}
1875 			sock_put(sk);
1876 			return 0;
1877 		}
1878 	}
1879 
1880 process:
1881 	if (static_branch_unlikely(&ip6_min_hopcount)) {
1882 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1883 		if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1884 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1885 			drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1886 			goto discard_and_relse;
1887 		}
1888 	}
1889 
1890 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1891 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1892 		goto discard_and_relse;
1893 	}
1894 
1895 	drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr,
1896 				       AF_INET6, dif, sdif);
1897 	if (drop_reason)
1898 		goto discard_and_relse;
1899 
1900 	nf_reset_ct(skb);
1901 
1902 	if (tcp_filter(sk, skb, &drop_reason))
1903 		goto discard_and_relse;
1904 
1905 	th = (const struct tcphdr *)skb->data;
1906 	hdr = ipv6_hdr(skb);
1907 	tcp_v6_fill_cb(skb, hdr, th);
1908 
1909 	skb->dev = NULL;
1910 
1911 	if (sk->sk_state == TCP_LISTEN) {
1912 		ret = tcp_v6_do_rcv(sk, skb);
1913 		goto put_and_return;
1914 	}
1915 
1916 	sk_incoming_cpu_update(sk);
1917 
1918 	bh_lock_sock_nested(sk);
1919 	tcp_segs_in(tcp_sk(sk), skb);
1920 	ret = 0;
1921 	if (!sock_owned_by_user(sk)) {
1922 		ret = tcp_v6_do_rcv(sk, skb);
1923 	} else {
1924 		if (tcp_add_backlog(sk, skb, &drop_reason))
1925 			goto discard_and_relse;
1926 	}
1927 	bh_unlock_sock(sk);
1928 put_and_return:
1929 	if (refcounted)
1930 		sock_put(sk);
1931 	return ret ? -1 : 0;
1932 
1933 no_tcp_socket:
1934 	drop_reason = SKB_DROP_REASON_NO_SOCKET;
1935 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1936 		goto discard_it;
1937 
1938 	tcp_v6_fill_cb(skb, hdr, th);
1939 
1940 	if (tcp_checksum_complete(skb)) {
1941 csum_error:
1942 		drop_reason = SKB_DROP_REASON_TCP_CSUM;
1943 		trace_tcp_bad_csum(skb);
1944 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1945 bad_packet:
1946 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1947 	} else {
1948 		tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
1949 	}
1950 
1951 discard_it:
1952 	SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1953 	sk_skb_reason_drop(sk, skb, drop_reason);
1954 	return 0;
1955 
1956 discard_and_relse:
1957 	sk_drops_skbadd(sk, skb);
1958 	if (refcounted)
1959 		sock_put(sk);
1960 	goto discard_it;
1961 
1962 do_time_wait:
1963 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1964 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1965 		inet_twsk_put(inet_twsk(sk));
1966 		goto discard_it;
1967 	}
1968 
1969 	tcp_v6_fill_cb(skb, hdr, th);
1970 
1971 	if (tcp_checksum_complete(skb)) {
1972 		inet_twsk_put(inet_twsk(sk));
1973 		goto csum_error;
1974 	}
1975 
1976 	tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn,
1977 					       &drop_reason);
1978 	switch (tw_status) {
1979 	case TCP_TW_SYN:
1980 	{
1981 		struct sock *sk2;
1982 
1983 		sk2 = inet6_lookup_listener(net, skb, __tcp_hdrlen(th),
1984 					    &ipv6_hdr(skb)->saddr, th->source,
1985 					    &ipv6_hdr(skb)->daddr,
1986 					    ntohs(th->dest),
1987 					    tcp_v6_iif_l3_slave(skb),
1988 					    sdif);
1989 		if (sk2) {
1990 			struct inet_timewait_sock *tw = inet_twsk(sk);
1991 			inet_twsk_deschedule_put(tw);
1992 			sk = sk2;
1993 			tcp_v6_restore_cb(skb);
1994 			refcounted = false;
1995 			__this_cpu_write(tcp_tw_isn, isn);
1996 			goto process;
1997 		}
1998 
1999 		drop_reason = psp_twsk_rx_policy_check(inet_twsk(sk), skb);
2000 		if (drop_reason)
2001 			break;
2002 	}
2003 		/* to ACK */
2004 		fallthrough;
2005 	case TCP_TW_ACK:
2006 	case TCP_TW_ACK_OOW:
2007 		tcp_v6_timewait_ack(sk, skb, tw_status);
2008 		break;
2009 	case TCP_TW_RST:
2010 		tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
2011 		inet_twsk_deschedule_put(inet_twsk(sk));
2012 		goto discard_it;
2013 	case TCP_TW_SUCCESS:
2014 		;
2015 	}
2016 	goto discard_it;
2017 }
2018 
2019 void tcp_v6_early_demux(struct sk_buff *skb)
2020 {
2021 	struct net *net = dev_net_rcu(skb->dev);
2022 	const struct ipv6hdr *hdr;
2023 	const struct tcphdr *th;
2024 	struct sock *sk;
2025 
2026 	if (skb->pkt_type != PACKET_HOST)
2027 		return;
2028 
2029 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
2030 		return;
2031 
2032 	hdr = ipv6_hdr(skb);
2033 	th = tcp_hdr(skb);
2034 
2035 	if (th->doff < sizeof(struct tcphdr) / 4)
2036 		return;
2037 
2038 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
2039 	sk = __inet6_lookup_established(net, &hdr->saddr, th->source,
2040 					&hdr->daddr, ntohs(th->dest),
2041 					inet6_iif(skb), inet6_sdif(skb));
2042 	if (sk) {
2043 		skb->sk = sk;
2044 		skb->destructor = sock_edemux;
2045 		if (sk_fullsock(sk)) {
2046 			struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
2047 
2048 			if (dst)
2049 				dst = dst_check(dst, sk->sk_rx_dst_cookie);
2050 			if (dst &&
2051 			    sk->sk_rx_dst_ifindex == skb->skb_iif)
2052 				skb_dst_set_noref(skb, dst);
2053 		}
2054 	}
2055 }
2056 
2057 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
2058 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
2059 };
2060 
2061 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
2062 {
2063 	__tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
2064 }
2065 
2066 const struct inet_connection_sock_af_ops ipv6_specific = {
2067 	.queue_xmit	   = inet6_csk_xmit,
2068 	.send_check	   = tcp_v6_send_check,
2069 	.rebuild_header	   = inet6_sk_rebuild_header,
2070 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
2071 	.conn_request	   = tcp_v6_conn_request,
2072 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
2073 	.net_header_len	   = sizeof(struct ipv6hdr),
2074 	.setsockopt	   = ipv6_setsockopt,
2075 	.getsockopt	   = ipv6_getsockopt,
2076 	.mtu_reduced	   = tcp_v6_mtu_reduced,
2077 };
2078 
2079 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2080 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
2081 #ifdef CONFIG_TCP_MD5SIG
2082 	.md5_lookup	=	tcp_v6_md5_lookup,
2083 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
2084 	.md5_parse	=	tcp_v6_parse_md5_keys,
2085 #endif
2086 #ifdef CONFIG_TCP_AO
2087 	.ao_lookup	=	tcp_v6_ao_lookup,
2088 	.calc_ao_hash	=	tcp_v6_ao_hash_skb,
2089 	.ao_parse	=	tcp_v6_parse_ao,
2090 	.ao_calc_key_sk	=	tcp_v6_ao_calc_key_sk,
2091 #endif
2092 };
2093 #endif
2094 
2095 /*
2096  *	TCP over IPv4 via INET6 API
2097  */
2098 static const struct inet_connection_sock_af_ops ipv6_mapped = {
2099 	.queue_xmit	   = ip_queue_xmit,
2100 	.send_check	   = tcp_v4_send_check,
2101 	.rebuild_header	   = inet_sk_rebuild_header,
2102 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
2103 	.conn_request	   = tcp_v6_conn_request,
2104 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
2105 	.net_header_len	   = sizeof(struct iphdr),
2106 	.setsockopt	   = ipv6_setsockopt,
2107 	.getsockopt	   = ipv6_getsockopt,
2108 	.mtu_reduced	   = tcp_v4_mtu_reduced,
2109 };
2110 
2111 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2112 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
2113 #ifdef CONFIG_TCP_MD5SIG
2114 	.md5_lookup	=	tcp_v4_md5_lookup,
2115 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
2116 	.md5_parse	=	tcp_v6_parse_md5_keys,
2117 #endif
2118 #ifdef CONFIG_TCP_AO
2119 	.ao_lookup	=	tcp_v6_ao_lookup,
2120 	.calc_ao_hash	=	tcp_v4_ao_hash_skb,
2121 	.ao_parse	=	tcp_v6_parse_ao,
2122 	.ao_calc_key_sk	=	tcp_v4_ao_calc_key_sk,
2123 #endif
2124 };
2125 
2126 static void tcp6_destruct_sock(struct sock *sk)
2127 {
2128 	tcp_md5_destruct_sock(sk);
2129 	tcp_ao_destroy_sock(sk, false);
2130 	inet6_sock_destruct(sk);
2131 }
2132 #endif
2133 
2134 /* NOTE: A lot of things set to zero explicitly by call to
2135  *       sk_alloc() so need not be done here.
2136  */
2137 static int tcp_v6_init_sock(struct sock *sk)
2138 {
2139 	struct inet_connection_sock *icsk = inet_csk(sk);
2140 
2141 	tcp_init_sock(sk);
2142 
2143 	icsk->icsk_af_ops = &ipv6_specific;
2144 
2145 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2146 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
2147 	sk->sk_destruct = tcp6_destruct_sock;
2148 #endif
2149 
2150 	return 0;
2151 }
2152 
2153 #ifdef CONFIG_PROC_FS
2154 /* Proc filesystem TCPv6 sock list dumping. */
2155 static void get_openreq6(struct seq_file *seq,
2156 			 const struct request_sock *req, int i)
2157 {
2158 	long ttd = req->rsk_timer.expires - jiffies;
2159 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
2160 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
2161 
2162 	if (ttd < 0)
2163 		ttd = 0;
2164 
2165 	seq_printf(seq,
2166 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2167 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
2168 		   i,
2169 		   src->s6_addr32[0], src->s6_addr32[1],
2170 		   src->s6_addr32[2], src->s6_addr32[3],
2171 		   inet_rsk(req)->ir_num,
2172 		   dest->s6_addr32[0], dest->s6_addr32[1],
2173 		   dest->s6_addr32[2], dest->s6_addr32[3],
2174 		   ntohs(inet_rsk(req)->ir_rmt_port),
2175 		   TCP_SYN_RECV,
2176 		   0, 0, /* could print option size, but that is af dependent. */
2177 		   1,   /* timers active (only the expire timer) */
2178 		   jiffies_to_clock_t(ttd),
2179 		   req->num_timeout,
2180 		   from_kuid_munged(seq_user_ns(seq),
2181 				    sk_uid(req->rsk_listener)),
2182 		   0,  /* non standard timer */
2183 		   0, /* open_requests have no inode */
2184 		   0, req);
2185 }
2186 
2187 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2188 {
2189 	const struct in6_addr *dest, *src;
2190 	__u16 destp, srcp;
2191 	int timer_active;
2192 	unsigned long timer_expires;
2193 	const struct inet_sock *inet = inet_sk(sp);
2194 	const struct tcp_sock *tp = tcp_sk(sp);
2195 	const struct inet_connection_sock *icsk = inet_csk(sp);
2196 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2197 	u8 icsk_pending;
2198 	int rx_queue;
2199 	int state;
2200 
2201 	dest  = &sp->sk_v6_daddr;
2202 	src   = &sp->sk_v6_rcv_saddr;
2203 	destp = ntohs(inet->inet_dport);
2204 	srcp  = ntohs(inet->inet_sport);
2205 
2206 	icsk_pending = smp_load_acquire(&icsk->icsk_pending);
2207 	if (icsk_pending == ICSK_TIME_RETRANS ||
2208 	    icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2209 	    icsk_pending == ICSK_TIME_LOSS_PROBE) {
2210 		timer_active	= 1;
2211 		timer_expires	= icsk_timeout(icsk);
2212 	} else if (icsk_pending == ICSK_TIME_PROBE0) {
2213 		timer_active	= 4;
2214 		timer_expires	= icsk_timeout(icsk);
2215 	} else if (timer_pending(&sp->sk_timer)) {
2216 		timer_active	= 2;
2217 		timer_expires	= sp->sk_timer.expires;
2218 	} else {
2219 		timer_active	= 0;
2220 		timer_expires = jiffies;
2221 	}
2222 
2223 	state = inet_sk_state_load(sp);
2224 	if (state == TCP_LISTEN)
2225 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2226 	else
2227 		/* Because we don't lock the socket,
2228 		 * we might find a transient negative value.
2229 		 */
2230 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2231 				      READ_ONCE(tp->copied_seq), 0);
2232 
2233 	seq_printf(seq,
2234 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2235 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2236 		   i,
2237 		   src->s6_addr32[0], src->s6_addr32[1],
2238 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2239 		   dest->s6_addr32[0], dest->s6_addr32[1],
2240 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2241 		   state,
2242 		   READ_ONCE(tp->write_seq) - tp->snd_una,
2243 		   rx_queue,
2244 		   timer_active,
2245 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2246 		   READ_ONCE(icsk->icsk_retransmits),
2247 		   from_kuid_munged(seq_user_ns(seq), sk_uid(sp)),
2248 		   READ_ONCE(icsk->icsk_probes_out),
2249 		   sock_i_ino(sp),
2250 		   refcount_read(&sp->sk_refcnt), sp,
2251 		   jiffies_to_clock_t(icsk->icsk_rto),
2252 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2253 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2254 		   tcp_snd_cwnd(tp),
2255 		   state == TCP_LISTEN ?
2256 			fastopenq->max_qlen :
2257 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2258 		   );
2259 }
2260 
2261 static void get_timewait6_sock(struct seq_file *seq,
2262 			       struct inet_timewait_sock *tw, int i)
2263 {
2264 	long delta = tw->tw_timer.expires - jiffies;
2265 	const struct in6_addr *dest, *src;
2266 	__u16 destp, srcp;
2267 
2268 	dest = &tw->tw_v6_daddr;
2269 	src  = &tw->tw_v6_rcv_saddr;
2270 	destp = ntohs(tw->tw_dport);
2271 	srcp  = ntohs(tw->tw_sport);
2272 
2273 	seq_printf(seq,
2274 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2275 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2276 		   i,
2277 		   src->s6_addr32[0], src->s6_addr32[1],
2278 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2279 		   dest->s6_addr32[0], dest->s6_addr32[1],
2280 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2281 		   READ_ONCE(tw->tw_substate), 0, 0,
2282 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2283 		   refcount_read(&tw->tw_refcnt), tw);
2284 }
2285 
2286 static int tcp6_seq_show(struct seq_file *seq, void *v)
2287 {
2288 	struct tcp_iter_state *st;
2289 	struct sock *sk = v;
2290 
2291 	if (v == SEQ_START_TOKEN) {
2292 		seq_puts(seq,
2293 			 "  sl  "
2294 			 "local_address                         "
2295 			 "remote_address                        "
2296 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2297 			 "   uid  timeout inode\n");
2298 		goto out;
2299 	}
2300 	st = seq->private;
2301 
2302 	if (sk->sk_state == TCP_TIME_WAIT)
2303 		get_timewait6_sock(seq, v, st->num);
2304 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2305 		get_openreq6(seq, v, st->num);
2306 	else
2307 		get_tcp6_sock(seq, v, st->num);
2308 out:
2309 	return 0;
2310 }
2311 
2312 static const struct seq_operations tcp6_seq_ops = {
2313 	.show		= tcp6_seq_show,
2314 	.start		= tcp_seq_start,
2315 	.next		= tcp_seq_next,
2316 	.stop		= tcp_seq_stop,
2317 };
2318 
2319 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2320 	.family		= AF_INET6,
2321 };
2322 
2323 int __net_init tcp6_proc_init(struct net *net)
2324 {
2325 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2326 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2327 		return -ENOMEM;
2328 	return 0;
2329 }
2330 
2331 void tcp6_proc_exit(struct net *net)
2332 {
2333 	remove_proc_entry("tcp6", net->proc_net);
2334 }
2335 #endif
2336 
2337 struct proto tcpv6_prot = {
2338 	.name			= "TCPv6",
2339 	.owner			= THIS_MODULE,
2340 	.close			= tcp_close,
2341 	.pre_connect		= tcp_v6_pre_connect,
2342 	.connect		= tcp_v6_connect,
2343 	.disconnect		= tcp_disconnect,
2344 	.accept			= inet_csk_accept,
2345 	.ioctl			= tcp_ioctl,
2346 	.init			= tcp_v6_init_sock,
2347 	.destroy		= tcp_v4_destroy_sock,
2348 	.shutdown		= tcp_shutdown,
2349 	.setsockopt		= tcp_setsockopt,
2350 	.getsockopt		= tcp_getsockopt,
2351 	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
2352 	.keepalive		= tcp_set_keepalive,
2353 	.recvmsg		= tcp_recvmsg,
2354 	.sendmsg		= tcp_sendmsg,
2355 	.splice_eof		= tcp_splice_eof,
2356 	.backlog_rcv		= tcp_v6_do_rcv,
2357 	.release_cb		= tcp_release_cb,
2358 	.hash			= inet_hash,
2359 	.unhash			= inet_unhash,
2360 	.get_port		= inet_csk_get_port,
2361 	.put_port		= inet_put_port,
2362 #ifdef CONFIG_BPF_SYSCALL
2363 	.psock_update_sk_prot	= tcp_bpf_update_proto,
2364 #endif
2365 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2366 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2367 	.stream_memory_free	= tcp_stream_memory_free,
2368 	.sockets_allocated	= &tcp_sockets_allocated,
2369 
2370 	.memory_allocated	= &net_aligned_data.tcp_memory_allocated,
2371 	.per_cpu_fw_alloc	= &tcp_memory_per_cpu_fw_alloc,
2372 
2373 	.memory_pressure	= &tcp_memory_pressure,
2374 	.sysctl_mem		= sysctl_tcp_mem,
2375 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2376 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2377 	.max_header		= MAX_TCP_HEADER,
2378 	.obj_size		= sizeof(struct tcp6_sock),
2379 	.ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2380 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2381 	.twsk_prot		= &tcp6_timewait_sock_ops,
2382 	.rsk_prot		= &tcp6_request_sock_ops,
2383 	.h.hashinfo		= NULL,
2384 	.no_autobind		= true,
2385 	.diag_destroy		= tcp_abort,
2386 };
2387 EXPORT_SYMBOL_GPL(tcpv6_prot);
2388 
2389 
2390 static struct inet_protosw tcpv6_protosw = {
2391 	.type		=	SOCK_STREAM,
2392 	.protocol	=	IPPROTO_TCP,
2393 	.prot		=	&tcpv6_prot,
2394 	.ops		=	&inet6_stream_ops,
2395 	.flags		=	INET_PROTOSW_PERMANENT |
2396 				INET_PROTOSW_ICSK,
2397 };
2398 
2399 static int __net_init tcpv6_net_init(struct net *net)
2400 {
2401 	int res;
2402 
2403 	res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2404 				   SOCK_RAW, IPPROTO_TCP, net);
2405 	if (!res)
2406 		net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC;
2407 
2408 	return res;
2409 }
2410 
2411 static void __net_exit tcpv6_net_exit(struct net *net)
2412 {
2413 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2414 }
2415 
2416 static struct pernet_operations tcpv6_net_ops = {
2417 	.init	    = tcpv6_net_init,
2418 	.exit	    = tcpv6_net_exit,
2419 };
2420 
2421 int __init tcpv6_init(void)
2422 {
2423 	int ret;
2424 
2425 	net_hotdata.tcpv6_protocol = (struct inet6_protocol) {
2426 		.handler     = tcp_v6_rcv,
2427 		.err_handler = tcp_v6_err,
2428 		.flags	     = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
2429 	};
2430 	ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
2431 	if (ret)
2432 		goto out;
2433 
2434 	/* register inet6 protocol */
2435 	ret = inet6_register_protosw(&tcpv6_protosw);
2436 	if (ret)
2437 		goto out_tcpv6_protocol;
2438 
2439 	ret = register_pernet_subsys(&tcpv6_net_ops);
2440 	if (ret)
2441 		goto out_tcpv6_protosw;
2442 
2443 	ret = mptcpv6_init();
2444 	if (ret)
2445 		goto out_tcpv6_pernet_subsys;
2446 
2447 out:
2448 	return ret;
2449 
2450 out_tcpv6_pernet_subsys:
2451 	unregister_pernet_subsys(&tcpv6_net_ops);
2452 out_tcpv6_protosw:
2453 	inet6_unregister_protosw(&tcpv6_protosw);
2454 out_tcpv6_protocol:
2455 	inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
2456 	goto out;
2457 }
2458 
2459 void tcpv6_exit(void)
2460 {
2461 	unregister_pernet_subsys(&tcpv6_net_ops);
2462 	inet6_unregister_protosw(&tcpv6_protosw);
2463 	inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
2464 }
2465