1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * TCP over IPv6
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on:
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
13 *
14 * Fixes:
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/aligned_data.h>
45 #include <net/tcp.h>
46 #include <net/ndisc.h>
47 #include <net/inet6_hashtables.h>
48 #include <net/inet6_connection_sock.h>
49 #include <net/ipv6.h>
50 #include <net/transp_v6.h>
51 #include <net/addrconf.h>
52 #include <net/ip6_route.h>
53 #include <net/ip6_checksum.h>
54 #include <net/inet_ecn.h>
55 #include <net/protocol.h>
56 #include <net/xfrm.h>
57 #include <net/snmp.h>
58 #include <net/dsfield.h>
59 #include <net/timewait_sock.h>
60 #include <net/inet_common.h>
61 #include <net/secure_seq.h>
62 #include <net/hotdata.h>
63 #include <net/busy_poll.h>
64 #include <net/rstreason.h>
65 #include <net/psp.h>
66
67 #include <linux/proc_fs.h>
68 #include <linux/seq_file.h>
69
70 #include <crypto/md5.h>
71 #include <crypto/utils.h>
72
73 #include <trace/events/tcp.h>
74
75 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
76 enum sk_rst_reason reason);
77 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
78 struct request_sock *req);
79
80 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
81
82 static const struct inet_connection_sock_af_ops ipv6_mapped;
83 const struct inet_connection_sock_af_ops ipv6_specific;
84 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
85 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
86 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
87 #endif
88
89 /* Helper returning the inet6 address from a given tcp socket.
90 * It can be used in TCP stack instead of inet6_sk(sk).
91 * This avoids a dereference and allow compiler optimizations.
92 * It is a specialized version of inet6_sk_generic().
93 */
94 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
95 struct tcp6_sock, tcp)->inet6)
96
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)97 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
98 {
99 struct dst_entry *dst = skb_dst(skb);
100
101 if (dst && dst_hold_safe(dst)) {
102 rcu_assign_pointer(sk->sk_rx_dst, dst);
103 sk->sk_rx_dst_ifindex = skb->skb_iif;
104 sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
105 }
106 }
107
108 static union tcp_seq_and_ts_off
tcp_v6_init_seq_and_ts_off(const struct net * net,const struct sk_buff * skb)109 tcp_v6_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb)
110 {
111 return secure_tcpv6_seq_and_ts_off(net,
112 ipv6_hdr(skb)->daddr.s6_addr32,
113 ipv6_hdr(skb)->saddr.s6_addr32,
114 tcp_hdr(skb)->dest,
115 tcp_hdr(skb)->source);
116 }
117
tcp_v6_pre_connect(struct sock * sk,struct sockaddr_unsized * uaddr,int addr_len)118 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
119 int addr_len)
120 {
121 /* This check is replicated from tcp_v6_connect() and intended to
122 * prevent BPF program called below from accessing bytes that are out
123 * of the bound specified by user in addr_len.
124 */
125 if (addr_len < SIN6_LEN_RFC2133)
126 return -EINVAL;
127
128 sock_owned_by_me(sk);
129
130 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
131 }
132
tcp_v6_connect(struct sock * sk,struct sockaddr_unsized * uaddr,int addr_len)133 static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
134 int addr_len)
135 {
136 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
137 struct inet_connection_sock *icsk = inet_csk(sk);
138 struct inet_timewait_death_row *tcp_death_row;
139 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
140 struct in6_addr *saddr = NULL, *final_p;
141 struct inet_sock *inet = inet_sk(sk);
142 struct tcp_sock *tp = tcp_sk(sk);
143 struct net *net = sock_net(sk);
144 struct ipv6_txoptions *opt;
145 struct dst_entry *dst;
146 struct flowi6 *fl6;
147 int addr_type;
148 int err;
149
150 if (addr_len < SIN6_LEN_RFC2133)
151 return -EINVAL;
152
153 if (usin->sin6_family != AF_INET6)
154 return -EAFNOSUPPORT;
155
156 fl6 = &inet_sk(sk)->cork.fl.u.ip6;
157 memset(fl6, 0, sizeof(*fl6));
158
159 if (inet6_test_bit(SNDFLOW, sk)) {
160 fl6->flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
161 IP6_ECN_flow_init(fl6->flowlabel);
162 if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) {
163 struct ip6_flowlabel *flowlabel;
164 flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
165 if (IS_ERR(flowlabel))
166 return -EINVAL;
167 fl6_sock_release(flowlabel);
168 }
169 }
170
171 /*
172 * connect() to INADDR_ANY means loopback (BSD'ism).
173 */
174
175 if (ipv6_addr_any(&usin->sin6_addr)) {
176 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
177 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
178 &usin->sin6_addr);
179 else
180 usin->sin6_addr = in6addr_loopback;
181 }
182
183 addr_type = ipv6_addr_type(&usin->sin6_addr);
184
185 if (addr_type & IPV6_ADDR_MULTICAST)
186 return -ENETUNREACH;
187
188 if (addr_type&IPV6_ADDR_LINKLOCAL) {
189 if (addr_len >= sizeof(struct sockaddr_in6) &&
190 usin->sin6_scope_id) {
191 /* If interface is set while binding, indices
192 * must coincide.
193 */
194 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
195 return -EINVAL;
196
197 sk->sk_bound_dev_if = usin->sin6_scope_id;
198 }
199
200 /* Connect to link-local address requires an interface */
201 if (!sk->sk_bound_dev_if)
202 return -EINVAL;
203 }
204
205 if (tp->rx_opt.ts_recent_stamp &&
206 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
207 tp->rx_opt.ts_recent = 0;
208 tp->rx_opt.ts_recent_stamp = 0;
209 WRITE_ONCE(tp->write_seq, 0);
210 }
211
212 sk->sk_v6_daddr = usin->sin6_addr;
213 np->flow_label = fl6->flowlabel;
214
215 /*
216 * TCP over IPv4
217 */
218
219 if (addr_type & IPV6_ADDR_MAPPED) {
220 u32 exthdrlen = icsk->icsk_ext_hdr_len;
221 struct sockaddr_in sin;
222
223 if (ipv6_only_sock(sk))
224 return -ENETUNREACH;
225
226 sin.sin_family = AF_INET;
227 sin.sin_port = usin->sin6_port;
228 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
229
230 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
231 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
232 if (sk_is_mptcp(sk))
233 mptcpv6_handle_mapped(sk, true);
234 sk->sk_backlog_rcv = tcp_v4_do_rcv;
235 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
236 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
237 #endif
238
239 err = tcp_v4_connect(sk, (struct sockaddr_unsized *)&sin, sizeof(sin));
240
241 if (err) {
242 icsk->icsk_ext_hdr_len = exthdrlen;
243 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
244 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
245 if (sk_is_mptcp(sk))
246 mptcpv6_handle_mapped(sk, false);
247 sk->sk_backlog_rcv = tcp_v6_do_rcv;
248 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
249 tp->af_specific = &tcp_sock_ipv6_specific;
250 #endif
251 goto failure;
252 }
253 np->saddr = sk->sk_v6_rcv_saddr;
254
255 return err;
256 }
257
258 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
259 saddr = &sk->sk_v6_rcv_saddr;
260
261 fl6->flowi6_proto = IPPROTO_TCP;
262 fl6->daddr = sk->sk_v6_daddr;
263 fl6->saddr = saddr ? *saddr : np->saddr;
264 fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
265 fl6->flowi6_oif = sk->sk_bound_dev_if;
266 fl6->flowi6_mark = sk->sk_mark;
267 fl6->fl6_dport = usin->sin6_port;
268 fl6->fl6_sport = inet->inet_sport;
269 if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6->fl6_sport)
270 fl6->flowi6_flags = FLOWI_FLAG_ANY_SPORT;
271 fl6->flowi6_uid = sk_uid(sk);
272
273 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
274 final_p = fl6_update_dst(fl6, opt, &np->final);
275
276 security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
277
278 dst = ip6_dst_lookup_flow(net, sk, fl6, final_p);
279 if (IS_ERR(dst)) {
280 err = PTR_ERR(dst);
281 goto failure;
282 }
283
284 tp->tcp_usec_ts = dst_tcp_usec_ts(dst);
285 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
286
287 if (!saddr) {
288 saddr = &fl6->saddr;
289
290 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
291 if (err)
292 goto failure;
293 }
294
295 /* set the source address */
296 np->saddr = *saddr;
297 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
298
299 sk->sk_gso_type = SKB_GSO_TCPV6;
300 ip6_dst_store(sk, dst, false, false);
301
302 icsk->icsk_ext_hdr_len = psp_sk_overhead(sk);
303 if (opt)
304 icsk->icsk_ext_hdr_len += opt->opt_flen +
305 opt->opt_nflen;
306
307 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
308
309 inet->inet_dport = usin->sin6_port;
310
311 tcp_set_state(sk, TCP_SYN_SENT);
312 err = inet6_hash_connect(tcp_death_row, sk);
313 if (err)
314 goto late_failure;
315
316 sk_set_txhash(sk);
317
318 if (likely(!tp->repair)) {
319 union tcp_seq_and_ts_off st;
320
321 st = secure_tcpv6_seq_and_ts_off(net,
322 np->saddr.s6_addr32,
323 sk->sk_v6_daddr.s6_addr32,
324 inet->inet_sport,
325 inet->inet_dport);
326 if (!tp->write_seq)
327 WRITE_ONCE(tp->write_seq, st.seq);
328 tp->tsoffset = st.ts_off;
329 }
330
331 if (tcp_fastopen_defer_connect(sk, &err))
332 return err;
333 if (err)
334 goto late_failure;
335
336 err = tcp_connect(sk);
337 if (err)
338 goto late_failure;
339
340 return 0;
341
342 late_failure:
343 tcp_set_state(sk, TCP_CLOSE);
344 inet_bhash2_reset_saddr(sk);
345 failure:
346 inet->inet_dport = 0;
347 sk->sk_route_caps = 0;
348 return err;
349 }
350
tcp_v6_mtu_reduced(struct sock * sk)351 static void tcp_v6_mtu_reduced(struct sock *sk)
352 {
353 struct dst_entry *dst;
354 u32 mtu, dmtu;
355
356 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
357 return;
358
359 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
360
361 /* Drop requests trying to increase our current mss.
362 * Check done in __ip6_rt_update_pmtu() is too late.
363 */
364 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
365 return;
366
367 dst = inet6_csk_update_pmtu(sk, mtu);
368 if (!dst)
369 return;
370
371 dmtu = dst6_mtu(dst);
372 if (inet_csk(sk)->icsk_pmtu_cookie > dmtu) {
373 tcp_sync_mss(sk, dmtu);
374 tcp_simple_retransmit(sk);
375 }
376 }
377
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)378 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
379 u8 type, u8 code, int offset, __be32 info)
380 {
381 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
382 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
383 struct net *net = dev_net_rcu(skb->dev);
384 struct request_sock *fastopen;
385 struct ipv6_pinfo *np;
386 struct tcp_sock *tp;
387 __u32 seq, snd_una;
388 struct sock *sk;
389 bool fatal;
390 int err;
391
392 sk = __inet6_lookup_established(net, &hdr->daddr, th->dest,
393 &hdr->saddr, ntohs(th->source),
394 skb->dev->ifindex, inet6_sdif(skb));
395
396 if (!sk) {
397 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
398 ICMP6_MIB_INERRORS);
399 return -ENOENT;
400 }
401
402 if (sk->sk_state == TCP_TIME_WAIT) {
403 /* To increase the counter of ignored icmps for TCP-AO */
404 tcp_ao_ignore_icmp(sk, AF_INET6, type, code);
405 inet_twsk_put(inet_twsk(sk));
406 return 0;
407 }
408 seq = ntohl(th->seq);
409 fatal = icmpv6_err_convert(type, code, &err);
410 if (sk->sk_state == TCP_NEW_SYN_RECV) {
411 tcp_req_err(sk, seq, fatal);
412 return 0;
413 }
414
415 if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) {
416 sock_put(sk);
417 return 0;
418 }
419
420 bh_lock_sock(sk);
421 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
422 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
423
424 if (sk->sk_state == TCP_CLOSE)
425 goto out;
426
427 if (static_branch_unlikely(&ip6_min_hopcount)) {
428 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
429 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
430 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
431 goto out;
432 }
433 }
434
435 tp = tcp_sk(sk);
436 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
437 fastopen = rcu_dereference(tp->fastopen_rsk);
438 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
439 if (sk->sk_state != TCP_LISTEN &&
440 !between(seq, snd_una, tp->snd_nxt)) {
441 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
442 goto out;
443 }
444
445 np = tcp_inet6_sk(sk);
446
447 if (type == NDISC_REDIRECT) {
448 if (!sock_owned_by_user(sk)) {
449 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
450
451 if (dst)
452 dst->ops->redirect(dst, sk, skb);
453 }
454 goto out;
455 }
456
457 if (type == ICMPV6_PKT_TOOBIG) {
458 u32 mtu = ntohl(info);
459
460 /* We are not interested in TCP_LISTEN and open_requests
461 * (SYN-ACKs send out by Linux are always <576bytes so
462 * they should go through unfragmented).
463 */
464 if (sk->sk_state == TCP_LISTEN)
465 goto out;
466
467 if (!ip6_sk_accept_pmtu(sk))
468 goto out;
469
470 if (mtu < IPV6_MIN_MTU)
471 goto out;
472
473 WRITE_ONCE(tp->mtu_info, mtu);
474
475 if (!sock_owned_by_user(sk))
476 tcp_v6_mtu_reduced(sk);
477 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
478 &sk->sk_tsq_flags))
479 sock_hold(sk);
480 goto out;
481 }
482
483
484 /* Might be for an request_sock */
485 switch (sk->sk_state) {
486 case TCP_SYN_SENT:
487 case TCP_SYN_RECV:
488 /* Only in fast or simultaneous open. If a fast open socket is
489 * already accepted it is treated as a connected one below.
490 */
491 if (fastopen && !fastopen->sk)
492 break;
493
494 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
495
496 if (!sock_owned_by_user(sk))
497 tcp_done_with_error(sk, err);
498 else
499 WRITE_ONCE(sk->sk_err_soft, err);
500 goto out;
501 case TCP_LISTEN:
502 break;
503 default:
504 /* check if this ICMP message allows revert of backoff.
505 * (see RFC 6069)
506 */
507 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
508 code == ICMPV6_NOROUTE)
509 tcp_ld_RTO_revert(sk, seq);
510 }
511
512 if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
513 WRITE_ONCE(sk->sk_err, err);
514 sk_error_report(sk);
515 } else {
516 WRITE_ONCE(sk->sk_err_soft, err);
517 }
518 out:
519 bh_unlock_sock(sk);
520 sock_put(sk);
521 return 0;
522 }
523
524
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)525 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
526 struct flowi *fl,
527 struct request_sock *req,
528 struct tcp_fastopen_cookie *foc,
529 enum tcp_synack_type synack_type,
530 struct sk_buff *syn_skb)
531 {
532 struct inet_request_sock *ireq = inet_rsk(req);
533 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
534 struct ipv6_txoptions *opt;
535 struct flowi6 *fl6 = &fl->u.ip6;
536 struct sk_buff *skb;
537 int err = -ENOMEM;
538 u8 tclass;
539
540 /* First, grab a route. */
541 if (!dst && (dst = inet6_csk_route_req(sk, NULL, fl6, req,
542 IPPROTO_TCP)) == NULL)
543 goto done;
544
545 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
546
547 if (skb) {
548 tcp_rsk(req)->syn_ect_snt = np->tclass & INET_ECN_MASK;
549 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
550 &ireq->ir_v6_rmt_addr);
551
552 fl6->daddr = ireq->ir_v6_rmt_addr;
553 if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts)
554 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
555
556 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
557 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
558 (np->tclass & INET_ECN_MASK) :
559 np->tclass;
560
561 if (!INET_ECN_is_capable(tclass) &&
562 tcp_bpf_ca_needs_ecn((struct sock *)req))
563 tclass |= INET_ECN_ECT_0;
564
565 rcu_read_lock();
566 opt = ireq->ipv6_opt;
567 if (!opt)
568 opt = rcu_dereference(np->opt);
569 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
570 opt, tclass, READ_ONCE(sk->sk_priority));
571 rcu_read_unlock();
572 err = net_xmit_eval(err);
573 }
574
575 done:
576 return err;
577 }
578
579
tcp_v6_reqsk_destructor(struct request_sock * req)580 static void tcp_v6_reqsk_destructor(struct request_sock *req)
581 {
582 kfree(inet_rsk(req)->ipv6_opt);
583 consume_skb(inet_rsk(req)->pktopts);
584 }
585
586 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)587 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
588 const struct in6_addr *addr,
589 int l3index)
590 {
591 return tcp_md5_do_lookup(sk, l3index,
592 (union tcp_md5_addr *)addr, AF_INET6);
593 }
594
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)595 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
596 const struct sock *addr_sk)
597 {
598 int l3index;
599
600 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
601 addr_sk->sk_bound_dev_if);
602 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
603 l3index);
604 }
605
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)606 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
607 sockptr_t optval, int optlen)
608 {
609 struct tcp_md5sig cmd;
610 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
611 union tcp_ao_addr *addr;
612 int l3index = 0;
613 u8 prefixlen;
614 bool l3flag;
615 u8 flags;
616
617 if (optlen < sizeof(cmd))
618 return -EINVAL;
619
620 if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
621 return -EFAULT;
622
623 if (sin6->sin6_family != AF_INET6)
624 return -EINVAL;
625
626 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
627 l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
628
629 if (optname == TCP_MD5SIG_EXT &&
630 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
631 prefixlen = cmd.tcpm_prefixlen;
632 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
633 prefixlen > 32))
634 return -EINVAL;
635 } else {
636 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
637 }
638
639 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
640 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
641 struct net_device *dev;
642
643 rcu_read_lock();
644 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
645 if (dev && netif_is_l3_master(dev))
646 l3index = dev->ifindex;
647 rcu_read_unlock();
648
649 /* ok to reference set/not set outside of rcu;
650 * right now device MUST be an L3 master
651 */
652 if (!dev || !l3index)
653 return -EINVAL;
654 }
655
656 if (!cmd.tcpm_keylen) {
657 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
658 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
659 AF_INET, prefixlen,
660 l3index, flags);
661 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
662 AF_INET6, prefixlen, l3index, flags);
663 }
664
665 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
666 return -EINVAL;
667
668 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
669 addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3];
670
671 /* Don't allow keys for peers that have a matching TCP-AO key.
672 * See the comment in tcp_ao_add_cmd()
673 */
674 if (tcp_ao_required(sk, addr, AF_INET,
675 l3flag ? l3index : -1, false))
676 return -EKEYREJECTED;
677 return tcp_md5_do_add(sk, addr,
678 AF_INET, prefixlen, l3index, flags,
679 cmd.tcpm_key, cmd.tcpm_keylen);
680 }
681
682 addr = (union tcp_md5_addr *)&sin6->sin6_addr;
683
684 /* Don't allow keys for peers that have a matching TCP-AO key.
685 * See the comment in tcp_ao_add_cmd()
686 */
687 if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false))
688 return -EKEYREJECTED;
689
690 return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags,
691 cmd.tcpm_key, cmd.tcpm_keylen);
692 }
693
tcp_v6_md5_hash_headers(struct md5_ctx * ctx,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)694 static void tcp_v6_md5_hash_headers(struct md5_ctx *ctx,
695 const struct in6_addr *daddr,
696 const struct in6_addr *saddr,
697 const struct tcphdr *th, int nbytes)
698 {
699 struct {
700 struct tcp6_pseudohdr ip; /* TCP pseudo-header (RFC2460) */
701 struct tcphdr tcp;
702 } h;
703
704 h.ip.saddr = *saddr;
705 h.ip.daddr = *daddr;
706 h.ip.protocol = cpu_to_be32(IPPROTO_TCP);
707 h.ip.len = cpu_to_be32(nbytes);
708 h.tcp = *th;
709 h.tcp.check = 0;
710 md5_update(ctx, (const u8 *)&h, sizeof(h.ip) + sizeof(h.tcp));
711 }
712
713 static noinline_for_stack void
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)714 tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
715 const struct in6_addr *daddr, struct in6_addr *saddr,
716 const struct tcphdr *th)
717 {
718 struct md5_ctx ctx;
719
720 md5_init(&ctx);
721 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, th->doff << 2);
722 tcp_md5_hash_key(&ctx, key);
723 md5_final(&ctx, md5_hash);
724 }
725
726 static noinline_for_stack void
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)727 tcp_v6_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
728 const struct sock *sk, const struct sk_buff *skb)
729 {
730 const struct tcphdr *th = tcp_hdr(skb);
731 const struct in6_addr *saddr, *daddr;
732 struct md5_ctx ctx;
733
734 if (sk) { /* valid for establish/request sockets */
735 saddr = &sk->sk_v6_rcv_saddr;
736 daddr = &sk->sk_v6_daddr;
737 } else {
738 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
739 saddr = &ip6h->saddr;
740 daddr = &ip6h->daddr;
741 }
742
743 md5_init(&ctx);
744 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, skb->len);
745 tcp_md5_hash_skb_data(&ctx, skb, th->doff << 2);
746 tcp_md5_hash_key(&ctx, key);
747 md5_final(&ctx, md5_hash);
748 }
749 #endif
750
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb,u32 tw_isn)751 static void tcp_v6_init_req(struct request_sock *req,
752 const struct sock *sk_listener,
753 struct sk_buff *skb,
754 u32 tw_isn)
755 {
756 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
757 struct inet_request_sock *ireq = inet_rsk(req);
758 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
759
760 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
761 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
762 ireq->ir_rmt_addr = LOOPBACK4_IPV6;
763 ireq->ir_loc_addr = LOOPBACK4_IPV6;
764
765 /* So that link locals have meaning */
766 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
767 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
768 ireq->ir_iif = tcp_v6_iif(skb);
769
770 if (!tw_isn &&
771 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
772 np->rxopt.bits.rxinfo ||
773 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
774 np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) {
775 refcount_inc(&skb->users);
776 ireq->pktopts = skb;
777 }
778 }
779
tcp_v6_route_req(const struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct request_sock * req,u32 tw_isn)780 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
781 struct sk_buff *skb,
782 struct flowi *fl,
783 struct request_sock *req,
784 u32 tw_isn)
785 {
786 tcp_v6_init_req(req, sk, skb, tw_isn);
787
788 if (security_inet_conn_request(sk, skb, req))
789 return NULL;
790
791 return inet6_csk_route_req(sk, NULL, &fl->u.ip6, req, IPPROTO_TCP);
792 }
793
794 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
795 .family = AF_INET6,
796 .obj_size = sizeof(struct tcp6_request_sock),
797 .send_ack = tcp_v6_reqsk_send_ack,
798 .destructor = tcp_v6_reqsk_destructor,
799 .send_reset = tcp_v6_send_reset,
800 };
801
802 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
803 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
804 sizeof(struct ipv6hdr),
805 #ifdef CONFIG_TCP_MD5SIG
806 .req_md5_lookup = tcp_v6_md5_lookup,
807 .calc_md5_hash = tcp_v6_md5_hash_skb,
808 #endif
809 #ifdef CONFIG_TCP_AO
810 .ao_lookup = tcp_v6_ao_lookup_rsk,
811 .ao_calc_key = tcp_v6_ao_calc_key_rsk,
812 .ao_synack_hash = tcp_v6_ao_synack_hash,
813 #endif
814 #ifdef CONFIG_SYN_COOKIES
815 .cookie_init_seq = cookie_v6_init_sequence,
816 #endif
817 .route_req = tcp_v6_route_req,
818 .init_seq_and_ts_off = tcp_v6_init_seq_and_ts_off,
819 .send_synack = tcp_v6_send_synack,
820 };
821
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,int rst,u8 tclass,__be32 label,u32 priority,u32 txhash,struct tcp_key * key)822 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
823 u32 ack, u32 win, u32 tsval, u32 tsecr,
824 int oif, int rst, u8 tclass, __be32 label,
825 u32 priority, u32 txhash, struct tcp_key *key)
826 {
827 struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb);
828 unsigned int tot_len = sizeof(struct tcphdr);
829 struct sock *ctl_sk = net->ipv6.tcp_sk;
830 const struct tcphdr *th = tcp_hdr(skb);
831 __be32 mrst = 0, *topt;
832 struct dst_entry *dst;
833 struct sk_buff *buff;
834 struct tcphdr *t1;
835 struct flowi6 fl6;
836 u32 mark = 0;
837
838 if (tsecr)
839 tot_len += TCPOLEN_TSTAMP_ALIGNED;
840 if (tcp_key_is_md5(key))
841 tot_len += TCPOLEN_MD5SIG_ALIGNED;
842 if (tcp_key_is_ao(key))
843 tot_len += tcp_ao_len_aligned(key->ao_key);
844
845 #ifdef CONFIG_MPTCP
846 if (rst && !tcp_key_is_md5(key)) {
847 mrst = mptcp_reset_option(skb);
848
849 if (mrst)
850 tot_len += sizeof(__be32);
851 }
852 #endif
853
854 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
855 if (!buff)
856 return;
857
858 skb_reserve(buff, MAX_TCP_HEADER);
859
860 t1 = skb_push(buff, tot_len);
861 skb_reset_transport_header(buff);
862
863 /* Swap the send and the receive. */
864 memset(t1, 0, sizeof(*t1));
865 t1->dest = th->source;
866 t1->source = th->dest;
867 t1->doff = tot_len / 4;
868 t1->seq = htonl(seq);
869 t1->ack_seq = htonl(ack);
870 t1->ack = !rst || !th->ack;
871 t1->rst = rst;
872 t1->window = htons(win);
873
874 topt = (__be32 *)(t1 + 1);
875
876 if (tsecr) {
877 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
878 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
879 *topt++ = htonl(tsval);
880 *topt++ = htonl(tsecr);
881 }
882
883 if (mrst)
884 *topt++ = mrst;
885
886 #ifdef CONFIG_TCP_MD5SIG
887 if (tcp_key_is_md5(key)) {
888 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
889 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
890 tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key,
891 &ipv6_hdr(skb)->saddr,
892 &ipv6_hdr(skb)->daddr, t1);
893 }
894 #endif
895 #ifdef CONFIG_TCP_AO
896 if (tcp_key_is_ao(key)) {
897 *topt++ = htonl((TCPOPT_AO << 24) |
898 (tcp_ao_len(key->ao_key) << 16) |
899 (key->ao_key->sndid << 8) |
900 (key->rcv_next));
901
902 tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key,
903 key->traffic_key,
904 (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr,
905 (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr,
906 t1, key->sne);
907 }
908 #endif
909
910 memset(&fl6, 0, sizeof(fl6));
911 fl6.daddr = ipv6_hdr(skb)->saddr;
912 fl6.saddr = ipv6_hdr(skb)->daddr;
913 fl6.flowlabel = label;
914
915 buff->ip_summed = CHECKSUM_PARTIAL;
916
917 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
918
919 fl6.flowi6_proto = IPPROTO_TCP;
920 if (rt6_need_strict(&fl6.daddr) && !oif)
921 fl6.flowi6_oif = tcp_v6_iif(skb);
922 else {
923 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
924 oif = skb->skb_iif;
925
926 fl6.flowi6_oif = oif;
927 }
928
929 if (sk) {
930 /* unconstify the socket only to attach it to buff with care. */
931 skb_set_owner_edemux(buff, (struct sock *)sk);
932 psp_reply_set_decrypted(sk, buff);
933
934 if (sk->sk_state == TCP_TIME_WAIT)
935 mark = inet_twsk(sk)->tw_mark;
936 else
937 mark = READ_ONCE(sk->sk_mark);
938 skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC);
939 }
940 if (txhash) {
941 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
942 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
943 }
944 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
945 fl6.fl6_dport = t1->dest;
946 fl6.fl6_sport = t1->source;
947 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
948 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
949
950 /* Pass a socket to ip6_dst_lookup either it is for RST
951 * Underlying function will use this to retrieve the network
952 * namespace
953 */
954 if (sk && sk->sk_state != TCP_TIME_WAIT)
955 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
956 else
957 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
958 if (!IS_ERR(dst)) {
959 skb_dst_set(buff, dst);
960 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
961 tclass, priority);
962 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
963 if (rst)
964 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
965 return;
966 }
967
968 kfree_skb(buff);
969 }
970
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb,enum sk_rst_reason reason)971 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
972 enum sk_rst_reason reason)
973 {
974 const struct tcphdr *th = tcp_hdr(skb);
975 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
976 const __u8 *md5_hash_location = NULL;
977 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
978 bool allocated_traffic_key = false;
979 #endif
980 const struct tcp_ao_hdr *aoh;
981 struct tcp_key key = {};
982 u32 seq = 0, ack_seq = 0;
983 __be32 label = 0;
984 u32 priority = 0;
985 struct net *net;
986 u32 txhash = 0;
987 int oif = 0;
988 #ifdef CONFIG_TCP_MD5SIG
989 unsigned char newhash[16];
990 struct sock *sk1 = NULL;
991 #endif
992
993 if (th->rst)
994 return;
995
996 /* If sk not NULL, it means we did a successful lookup and incoming
997 * route had to be correct. prequeue might have dropped our dst.
998 */
999 if (!sk && !ipv6_unicast_destination(skb))
1000 return;
1001
1002 net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb);
1003 /* Invalid TCP option size or twice included auth */
1004 if (tcp_parse_auth_options(th, &md5_hash_location, &aoh))
1005 return;
1006 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1007 rcu_read_lock();
1008 #endif
1009 #ifdef CONFIG_TCP_MD5SIG
1010 if (sk && sk_fullsock(sk)) {
1011 int l3index;
1012
1013 /* sdif set, means packet ingressed via a device
1014 * in an L3 domain and inet_iif is set to it.
1015 */
1016 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1017 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1018 if (key.md5_key)
1019 key.type = TCP_KEY_MD5;
1020 } else if (md5_hash_location) {
1021 int dif = tcp_v6_iif_l3_slave(skb);
1022 int sdif = tcp_v6_sdif(skb);
1023 int l3index;
1024
1025 /*
1026 * active side is lost. Try to find listening socket through
1027 * source port, and then find md5 key through listening socket.
1028 * we are not loose security here:
1029 * Incoming packet is checked with md5 hash with finding key,
1030 * no RST generated if md5 hash doesn't match.
1031 */
1032 sk1 = inet6_lookup_listener(net, NULL, 0, &ipv6h->saddr, th->source,
1033 &ipv6h->daddr, ntohs(th->source),
1034 dif, sdif);
1035 if (!sk1)
1036 goto out;
1037
1038 /* sdif set, means packet ingressed via a device
1039 * in an L3 domain and dif is set to it.
1040 */
1041 l3index = tcp_v6_sdif(skb) ? dif : 0;
1042
1043 key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1044 if (!key.md5_key)
1045 goto out;
1046 key.type = TCP_KEY_MD5;
1047
1048 tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb);
1049 if (crypto_memneq(md5_hash_location, newhash, 16))
1050 goto out;
1051 }
1052 #endif
1053
1054 if (th->ack)
1055 seq = ntohl(th->ack_seq);
1056 else
1057 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1058 (th->doff << 2);
1059
1060 #ifdef CONFIG_TCP_AO
1061 if (aoh) {
1062 int l3index;
1063
1064 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1065 if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq,
1066 &key.ao_key, &key.traffic_key,
1067 &allocated_traffic_key,
1068 &key.rcv_next, &key.sne))
1069 goto out;
1070 key.type = TCP_KEY_AO;
1071 }
1072 #endif
1073
1074 if (sk) {
1075 oif = sk->sk_bound_dev_if;
1076 if (sk_fullsock(sk)) {
1077 if (inet6_test_bit(REPFLOW, sk))
1078 label = ip6_flowlabel(ipv6h);
1079 priority = READ_ONCE(sk->sk_priority);
1080 txhash = sk->sk_txhash;
1081 }
1082 if (sk->sk_state == TCP_TIME_WAIT) {
1083 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1084 priority = inet_twsk(sk)->tw_priority;
1085 txhash = inet_twsk(sk)->tw_txhash;
1086 }
1087 } else {
1088 if (READ_ONCE(net->ipv6.sysctl.flowlabel_reflect) &
1089 FLOWLABEL_REFLECT_TCP_RESET)
1090 label = ip6_flowlabel(ipv6h);
1091 }
1092
1093 trace_tcp_send_reset(sk, skb, reason);
1094
1095 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
1096 ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK,
1097 label, priority, txhash,
1098 &key);
1099
1100 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1101 out:
1102 if (allocated_traffic_key)
1103 kfree(key.traffic_key);
1104 rcu_read_unlock();
1105 #endif
1106 }
1107
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_key * key,u8 tclass,__be32 label,u32 priority,u32 txhash)1108 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1109 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1110 struct tcp_key *key, u8 tclass,
1111 __be32 label, u32 priority, u32 txhash)
1112 {
1113 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0,
1114 tclass, label, priority, txhash, key);
1115 }
1116
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb,enum tcp_tw_status tw_status)1117 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb,
1118 enum tcp_tw_status tw_status)
1119 {
1120 struct inet_timewait_sock *tw = inet_twsk(sk);
1121 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1122 u8 tclass = tw->tw_tclass;
1123 struct tcp_key key = {};
1124
1125 if (tw_status == TCP_TW_ACK_OOW)
1126 tclass &= ~INET_ECN_MASK;
1127 #ifdef CONFIG_TCP_AO
1128 struct tcp_ao_info *ao_info;
1129
1130 if (static_branch_unlikely(&tcp_ao_needed.key)) {
1131
1132 /* FIXME: the segment to-be-acked is not verified yet */
1133 ao_info = rcu_dereference(tcptw->ao_info);
1134 if (ao_info) {
1135 const struct tcp_ao_hdr *aoh;
1136
1137 /* Invalid TCP option size or twice included auth */
1138 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
1139 goto out;
1140 if (aoh)
1141 key.ao_key = tcp_ao_established_key(sk, ao_info,
1142 aoh->rnext_keyid, -1);
1143 }
1144 }
1145 if (key.ao_key) {
1146 struct tcp_ao_key *rnext_key;
1147
1148 key.traffic_key = snd_other_key(key.ao_key);
1149 /* rcv_next switches to our rcv_next */
1150 rnext_key = READ_ONCE(ao_info->rnext_key);
1151 key.rcv_next = rnext_key->rcvid;
1152 key.sne = READ_ONCE(ao_info->snd_sne);
1153 key.type = TCP_KEY_AO;
1154 #else
1155 if (0) {
1156 #endif
1157 #ifdef CONFIG_TCP_MD5SIG
1158 } else if (static_branch_unlikely(&tcp_md5_needed.key)) {
1159 key.md5_key = tcp_twsk_md5_key(tcptw);
1160 if (key.md5_key)
1161 key.type = TCP_KEY_MD5;
1162 #endif
1163 }
1164
1165 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt,
1166 READ_ONCE(tcptw->tw_rcv_nxt),
1167 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1168 tcp_tw_tsval(tcptw),
1169 READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if,
1170 &key, tclass, cpu_to_be32(tw->tw_flowlabel),
1171 tw->tw_priority, tw->tw_txhash);
1172
1173 #ifdef CONFIG_TCP_AO
1174 out:
1175 #endif
1176 inet_twsk_put(tw);
1177 }
1178
1179 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1180 struct request_sock *req)
1181 {
1182 struct tcp_key key = {};
1183
1184 #ifdef CONFIG_TCP_AO
1185 if (static_branch_unlikely(&tcp_ao_needed.key) &&
1186 tcp_rsk_used_ao(req)) {
1187 const struct in6_addr *addr = &ipv6_hdr(skb)->saddr;
1188 const struct tcp_ao_hdr *aoh;
1189 int l3index;
1190
1191 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1192 /* Invalid TCP option size or twice included auth */
1193 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
1194 return;
1195 if (!aoh)
1196 return;
1197 key.ao_key = tcp_ao_do_lookup(sk, l3index,
1198 (union tcp_ao_addr *)addr,
1199 AF_INET6, aoh->rnext_keyid, -1);
1200 if (unlikely(!key.ao_key)) {
1201 /* Send ACK with any matching MKT for the peer */
1202 key.ao_key = tcp_ao_do_lookup(sk, l3index,
1203 (union tcp_ao_addr *)addr,
1204 AF_INET6, -1, -1);
1205 /* Matching key disappeared (user removed the key?)
1206 * let the handshake timeout.
1207 */
1208 if (!key.ao_key) {
1209 net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n",
1210 addr,
1211 ntohs(tcp_hdr(skb)->source),
1212 &ipv6_hdr(skb)->daddr,
1213 ntohs(tcp_hdr(skb)->dest));
1214 return;
1215 }
1216 }
1217 key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC);
1218 if (!key.traffic_key)
1219 return;
1220
1221 key.type = TCP_KEY_AO;
1222 key.rcv_next = aoh->keyid;
1223 tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req);
1224 #else
1225 if (0) {
1226 #endif
1227 #ifdef CONFIG_TCP_MD5SIG
1228 } else if (static_branch_unlikely(&tcp_md5_needed.key)) {
1229 int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1230
1231 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr,
1232 l3index);
1233 if (key.md5_key)
1234 key.type = TCP_KEY_MD5;
1235 #endif
1236 }
1237
1238 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1239 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1240 */
1241 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1242 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1243 tcp_rsk(req)->rcv_nxt,
1244 tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
1245 tcp_rsk_tsval(tcp_rsk(req)),
1246 req->ts_recent, sk->sk_bound_dev_if,
1247 &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK,
1248 0,
1249 READ_ONCE(sk->sk_priority),
1250 READ_ONCE(tcp_rsk(req)->txhash));
1251 if (tcp_key_is_ao(&key))
1252 kfree(key.traffic_key);
1253 }
1254
1255
1256 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1257 {
1258 #ifdef CONFIG_SYN_COOKIES
1259 const struct tcphdr *th = tcp_hdr(skb);
1260
1261 if (!th->syn)
1262 sk = cookie_v6_check(sk, skb);
1263 #endif
1264 return sk;
1265 }
1266
1267 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1268 struct tcphdr *th, u32 *cookie)
1269 {
1270 u16 mss = 0;
1271 #ifdef CONFIG_SYN_COOKIES
1272 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1273 &tcp_request_sock_ipv6_ops, sk, th);
1274 if (mss) {
1275 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1276 tcp_synq_overflow(sk);
1277 }
1278 #endif
1279 return mss;
1280 }
1281
1282 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1283 {
1284 if (skb->protocol == htons(ETH_P_IP))
1285 return tcp_v4_conn_request(sk, skb);
1286
1287 if (!ipv6_unicast_destination(skb))
1288 goto drop;
1289
1290 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1291 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1292 return 0;
1293 }
1294
1295 return tcp_conn_request(&tcp6_request_sock_ops,
1296 &tcp_request_sock_ipv6_ops, sk, skb);
1297
1298 drop:
1299 tcp_listendrop(sk);
1300 return 0; /* don't send reset */
1301 }
1302
1303 static void tcp_v6_restore_cb(struct sk_buff *skb)
1304 {
1305 /* We need to move header back to the beginning if xfrm6_policy_check()
1306 * and tcp_v6_fill_cb() are going to be called again.
1307 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1308 */
1309 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1310 sizeof(struct inet6_skb_parm));
1311 }
1312
1313 /* Called from tcp_v4_syn_recv_sock() for v6_mapped children. */
1314 static void tcp_v6_mapped_child_init(struct sock *newsk, const struct sock *sk)
1315 {
1316 struct inet_sock *newinet = inet_sk(newsk);
1317 struct ipv6_pinfo *newnp;
1318
1319 newinet->pinet6 = newnp = tcp_inet6_sk(newsk);
1320 newinet->ipv6_fl_list = NULL;
1321
1322 memcpy(newnp, tcp_inet6_sk(sk), sizeof(struct ipv6_pinfo));
1323
1324 newnp->saddr = newsk->sk_v6_rcv_saddr;
1325
1326 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1327 if (sk_is_mptcp(newsk))
1328 mptcpv6_handle_mapped(newsk, true);
1329 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1330 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1331 tcp_sk(newsk)->af_specific = &tcp_sock_ipv6_mapped_specific;
1332 #endif
1333
1334 newnp->ipv6_mc_list = NULL;
1335 newnp->ipv6_ac_list = NULL;
1336 newnp->pktoptions = NULL;
1337 newnp->opt = NULL;
1338
1339 /* tcp_v4_syn_recv_sock() has initialized newinet->mc_{index,ttl} */
1340 newnp->mcast_oif = newinet->mc_index;
1341 newnp->mcast_hops = newinet->mc_ttl;
1342
1343 newnp->rcv_flowinfo = 0;
1344 if (inet6_test_bit(REPFLOW, sk))
1345 newnp->flow_label = 0;
1346 }
1347
1348 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1349 struct request_sock *req,
1350 struct dst_entry *dst,
1351 struct request_sock *req_unhash,
1352 bool *own_req,
1353 void (*opt_child_init)(struct sock *newsk,
1354 const struct sock *sk))
1355 {
1356 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1357 struct inet_request_sock *ireq;
1358 struct ipv6_txoptions *opt;
1359 struct inet_sock *newinet;
1360 bool found_dup_sk = false;
1361 struct ipv6_pinfo *newnp;
1362 struct tcp_sock *newtp;
1363 struct sock *newsk;
1364 #ifdef CONFIG_TCP_MD5SIG
1365 struct tcp_md5sig_key *key;
1366 int l3index;
1367 #endif
1368 struct flowi6 fl6;
1369
1370 if (skb->protocol == htons(ETH_P_IP))
1371 return tcp_v4_syn_recv_sock(sk, skb, req, dst,
1372 req_unhash, own_req,
1373 tcp_v6_mapped_child_init);
1374 ireq = inet_rsk(req);
1375
1376 if (sk_acceptq_is_full(sk))
1377 goto exit_overflow;
1378
1379 dst = inet6_csk_route_req(sk, dst, &fl6, req, IPPROTO_TCP);
1380 if (!dst)
1381 goto exit;
1382
1383 newsk = tcp_create_openreq_child(sk, req, skb);
1384 if (!newsk)
1385 goto exit_nonewsk;
1386
1387 /*
1388 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1389 * count here, tcp_create_openreq_child now does this for us, see the
1390 * comment in that function for the gory details. -acme
1391 */
1392
1393 newsk->sk_gso_type = SKB_GSO_TCPV6;
1394 inet6_sk_rx_dst_set(newsk, skb);
1395
1396 newinet = inet_sk(newsk);
1397 newinet->cork.fl.u.ip6 = fl6;
1398 newinet->pinet6 = tcp_inet6_sk(newsk);
1399 newinet->ipv6_fl_list = NULL;
1400 newinet->inet_opt = NULL;
1401
1402 newtp = tcp_sk(newsk);
1403 newnp = tcp_inet6_sk(newsk);
1404
1405 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1406
1407 ip6_dst_store(newsk, dst, false, false);
1408
1409 newnp->saddr = ireq->ir_v6_loc_addr;
1410
1411 /* Now IPv6 options...
1412
1413 First: no IPv4 options.
1414 */
1415 newnp->ipv6_mc_list = NULL;
1416 newnp->ipv6_ac_list = NULL;
1417
1418 /* Clone RX bits */
1419 newnp->rxopt.all = np->rxopt.all;
1420
1421 newnp->pktoptions = NULL;
1422 newnp->opt = NULL;
1423 newnp->mcast_oif = tcp_v6_iif(skb);
1424 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1425 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1426 if (inet6_test_bit(REPFLOW, sk))
1427 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1428
1429 /* Set ToS of the new socket based upon the value of incoming SYN.
1430 * ECT bits are set later in tcp_init_transfer().
1431 */
1432 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1433 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1434
1435 /* Clone native IPv6 options from listening socket (if any)
1436
1437 Yes, keeping reference count would be much more clever,
1438 but we make one more one thing there: reattach optmem
1439 to newsk.
1440 */
1441 opt = ireq->ipv6_opt;
1442 if (!opt)
1443 opt = rcu_dereference(np->opt);
1444 if (opt) {
1445 opt = ipv6_dup_options(newsk, opt);
1446 RCU_INIT_POINTER(newnp->opt, opt);
1447 }
1448 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1449 if (opt)
1450 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1451 opt->opt_flen;
1452
1453 tcp_ca_openreq_child(newsk, dst);
1454
1455 tcp_sync_mss(newsk, dst6_mtu(dst));
1456 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1457
1458 tcp_initialize_rcv_mss(newsk);
1459
1460 #ifdef CONFIG_TCP_MD5SIG
1461 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1462
1463 if (!tcp_rsk_used_ao(req)) {
1464 /* Copy over the MD5 key from the original socket */
1465 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1466 if (key) {
1467 const union tcp_md5_addr *addr;
1468
1469 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1470 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key))
1471 goto put_and_exit;
1472 }
1473 }
1474 #endif
1475 #ifdef CONFIG_TCP_AO
1476 /* Copy over tcp_ao_info if any */
1477 if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6))
1478 goto put_and_exit; /* OOM */
1479 #endif
1480
1481 if (__inet_inherit_port(sk, newsk) < 0)
1482 goto put_and_exit;
1483 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1484 &found_dup_sk);
1485 if (*own_req) {
1486 tcp_move_syn(newtp, req);
1487
1488 /* Clone pktoptions received with SYN, if we own the req */
1489 if (ireq->pktopts) {
1490 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1491 consume_skb(ireq->pktopts);
1492 ireq->pktopts = NULL;
1493 if (newnp->pktoptions)
1494 tcp_v6_restore_cb(newnp->pktoptions);
1495 }
1496 } else {
1497 if (!req_unhash && found_dup_sk) {
1498 /* This code path should only be executed in the
1499 * syncookie case only
1500 */
1501 bh_unlock_sock(newsk);
1502 sock_put(newsk);
1503 newsk = NULL;
1504 }
1505 }
1506
1507 return newsk;
1508
1509 exit_overflow:
1510 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1511 exit_nonewsk:
1512 dst_release(dst);
1513 exit:
1514 tcp_listendrop(sk);
1515 return NULL;
1516 put_and_exit:
1517 inet_csk_prepare_forced_close(newsk);
1518 tcp_done(newsk);
1519 goto exit;
1520 }
1521
1522 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1523 u32));
1524 /* The socket must have it's spinlock held when we get
1525 * here, unless it is a TCP_LISTEN socket.
1526 *
1527 * We have a potential double-lock case here, so even when
1528 * doing backlog processing we use the BH locking scheme.
1529 * This is because we cannot sleep with the original spinlock
1530 * held.
1531 */
1532 INDIRECT_CALLABLE_SCOPE
1533 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1534 {
1535 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1536 struct sk_buff *opt_skb = NULL;
1537 enum skb_drop_reason reason;
1538 struct tcp_sock *tp;
1539
1540 /* Imagine: socket is IPv6. IPv4 packet arrives,
1541 goes to IPv4 receive handler and backlogged.
1542 From backlog it always goes here. Kerboom...
1543 Fortunately, tcp_rcv_established and rcv_established
1544 handle them correctly, but it is not case with
1545 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1546 */
1547
1548 if (skb->protocol == htons(ETH_P_IP))
1549 return tcp_v4_do_rcv(sk, skb);
1550
1551 reason = psp_sk_rx_policy_check(sk, skb);
1552 if (reason)
1553 goto err_discard;
1554
1555 /*
1556 * socket locking is here for SMP purposes as backlog rcv
1557 * is currently called with bh processing disabled.
1558 */
1559
1560 /* Do Stevens' IPV6_PKTOPTIONS.
1561
1562 Yes, guys, it is the only place in our code, where we
1563 may make it not affecting IPv4.
1564 The rest of code is protocol independent,
1565 and I do not like idea to uglify IPv4.
1566
1567 Actually, all the idea behind IPV6_PKTOPTIONS
1568 looks not very well thought. For now we latch
1569 options, received in the last packet, enqueued
1570 by tcp. Feel free to propose better solution.
1571 --ANK (980728)
1572 */
1573 if (np->rxopt.all && sk->sk_state != TCP_LISTEN)
1574 opt_skb = skb_clone_and_charge_r(skb, sk);
1575
1576 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1577 struct dst_entry *dst;
1578
1579 dst = rcu_dereference_protected(sk->sk_rx_dst,
1580 lockdep_sock_is_held(sk));
1581
1582 sock_rps_save_rxhash(sk, skb);
1583 sk_mark_napi_id(sk, skb);
1584 if (dst) {
1585 if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1586 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1587 dst, sk->sk_rx_dst_cookie) == NULL) {
1588 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1589 dst_release(dst);
1590 }
1591 }
1592
1593 tcp_rcv_established(sk, skb);
1594 if (opt_skb)
1595 goto ipv6_pktoptions;
1596 return 0;
1597 }
1598
1599 if (tcp_checksum_complete(skb))
1600 goto csum_err;
1601
1602 if (sk->sk_state == TCP_LISTEN) {
1603 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1604
1605 if (nsk != sk) {
1606 if (nsk) {
1607 reason = tcp_child_process(sk, nsk, skb);
1608 if (reason)
1609 goto reset;
1610 }
1611 return 0;
1612 }
1613 } else
1614 sock_rps_save_rxhash(sk, skb);
1615
1616 reason = tcp_rcv_state_process(sk, skb);
1617 if (reason)
1618 goto reset;
1619 if (opt_skb)
1620 goto ipv6_pktoptions;
1621 return 0;
1622
1623 reset:
1624 tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason));
1625 discard:
1626 if (opt_skb)
1627 __kfree_skb(opt_skb);
1628 sk_skb_reason_drop(sk, skb, reason);
1629 return 0;
1630 csum_err:
1631 reason = SKB_DROP_REASON_TCP_CSUM;
1632 trace_tcp_bad_csum(skb);
1633 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1634 err_discard:
1635 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1636 goto discard;
1637
1638
1639 ipv6_pktoptions:
1640 /* Do you ask, what is it?
1641
1642 1. skb was enqueued by tcp.
1643 2. skb is added to tail of read queue, rather than out of order.
1644 3. socket is not in passive state.
1645 4. Finally, it really contains options, which user wants to receive.
1646 */
1647 tp = tcp_sk(sk);
1648 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1649 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1650 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1651 WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb));
1652 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1653 WRITE_ONCE(np->mcast_hops,
1654 ipv6_hdr(opt_skb)->hop_limit);
1655 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1656 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1657 if (inet6_test_bit(REPFLOW, sk))
1658 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1659 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1660 tcp_v6_restore_cb(opt_skb);
1661 opt_skb = xchg(&np->pktoptions, opt_skb);
1662 } else {
1663 __kfree_skb(opt_skb);
1664 opt_skb = xchg(&np->pktoptions, NULL);
1665 }
1666 }
1667
1668 consume_skb(opt_skb);
1669 return 0;
1670 }
1671
1672 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1673 const struct tcphdr *th)
1674 {
1675 /* This is tricky: we move IP6CB at its correct location into
1676 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1677 * _decode_session6() uses IP6CB().
1678 * barrier() makes sure compiler won't play aliasing games.
1679 */
1680 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1681 sizeof(struct inet6_skb_parm));
1682 barrier();
1683
1684 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1685 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1686 skb->len - th->doff*4);
1687 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1688 TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th);
1689 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1690 TCP_SKB_CB(skb)->sacked = 0;
1691 TCP_SKB_CB(skb)->has_rxtstamp =
1692 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1693 }
1694
1695 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1696 {
1697 struct net *net = dev_net_rcu(skb->dev);
1698 enum skb_drop_reason drop_reason;
1699 enum tcp_tw_status tw_status;
1700 int sdif = inet6_sdif(skb);
1701 int dif = inet6_iif(skb);
1702 const struct tcphdr *th;
1703 const struct ipv6hdr *hdr;
1704 struct sock *sk = NULL;
1705 bool refcounted;
1706 int ret;
1707 u32 isn;
1708
1709 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1710 if (skb->pkt_type != PACKET_HOST)
1711 goto discard_it;
1712
1713 /*
1714 * Count it even if it's bad.
1715 */
1716 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1717
1718 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1719 goto discard_it;
1720
1721 th = (const struct tcphdr *)skb->data;
1722
1723 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1724 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1725 goto bad_packet;
1726 }
1727 if (!pskb_may_pull(skb, th->doff*4))
1728 goto discard_it;
1729
1730 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1731 goto csum_error;
1732
1733 th = (const struct tcphdr *)skb->data;
1734 hdr = ipv6_hdr(skb);
1735
1736 lookup:
1737 sk = __inet6_lookup_skb(skb, __tcp_hdrlen(th),
1738 th->source, th->dest, inet6_iif(skb), sdif,
1739 &refcounted);
1740 if (!sk)
1741 goto no_tcp_socket;
1742
1743 if (sk->sk_state == TCP_TIME_WAIT)
1744 goto do_time_wait;
1745
1746 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1747 struct request_sock *req = inet_reqsk(sk);
1748 bool req_stolen = false;
1749 struct sock *nsk;
1750
1751 sk = req->rsk_listener;
1752 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1753 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1754 else
1755 drop_reason = tcp_inbound_hash(sk, req, skb,
1756 &hdr->saddr, &hdr->daddr,
1757 AF_INET6, dif, sdif);
1758 if (drop_reason) {
1759 sk_drops_skbadd(sk, skb);
1760 reqsk_put(req);
1761 goto discard_it;
1762 }
1763 if (tcp_checksum_complete(skb)) {
1764 reqsk_put(req);
1765 goto csum_error;
1766 }
1767 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1768 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1769 if (!nsk) {
1770 inet_csk_reqsk_queue_drop_and_put(sk, req);
1771 goto lookup;
1772 }
1773 sk = nsk;
1774 /* reuseport_migrate_sock() has already held one sk_refcnt
1775 * before returning.
1776 */
1777 } else {
1778 sock_hold(sk);
1779 }
1780 refcounted = true;
1781 nsk = NULL;
1782 if (!tcp_filter(sk, skb, &drop_reason)) {
1783 th = (const struct tcphdr *)skb->data;
1784 hdr = ipv6_hdr(skb);
1785 tcp_v6_fill_cb(skb, hdr, th);
1786 nsk = tcp_check_req(sk, skb, req, false, &req_stolen,
1787 &drop_reason);
1788 }
1789 if (!nsk) {
1790 reqsk_put(req);
1791 if (req_stolen) {
1792 /* Another cpu got exclusive access to req
1793 * and created a full blown socket.
1794 * Try to feed this packet to this socket
1795 * instead of discarding it.
1796 */
1797 tcp_v6_restore_cb(skb);
1798 sock_put(sk);
1799 goto lookup;
1800 }
1801 goto discard_and_relse;
1802 }
1803 nf_reset_ct(skb);
1804 if (nsk == sk) {
1805 reqsk_put(req);
1806 tcp_v6_restore_cb(skb);
1807 } else {
1808 drop_reason = tcp_child_process(sk, nsk, skb);
1809 if (drop_reason) {
1810 enum sk_rst_reason rst_reason;
1811
1812 rst_reason = sk_rst_convert_drop_reason(drop_reason);
1813 tcp_v6_send_reset(nsk, skb, rst_reason);
1814 goto discard_and_relse;
1815 }
1816 sock_put(sk);
1817 return 0;
1818 }
1819 }
1820
1821 process:
1822 if (static_branch_unlikely(&ip6_min_hopcount)) {
1823 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1824 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1825 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1826 drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1827 goto discard_and_relse;
1828 }
1829 }
1830
1831 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1832 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1833 goto discard_and_relse;
1834 }
1835
1836 drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr,
1837 AF_INET6, dif, sdif);
1838 if (drop_reason)
1839 goto discard_and_relse;
1840
1841 nf_reset_ct(skb);
1842
1843 if (tcp_filter(sk, skb, &drop_reason))
1844 goto discard_and_relse;
1845
1846 th = (const struct tcphdr *)skb->data;
1847 hdr = ipv6_hdr(skb);
1848 tcp_v6_fill_cb(skb, hdr, th);
1849
1850 skb->dev = NULL;
1851
1852 if (sk->sk_state == TCP_LISTEN) {
1853 ret = tcp_v6_do_rcv(sk, skb);
1854 goto put_and_return;
1855 }
1856
1857 sk_incoming_cpu_update(sk);
1858
1859 bh_lock_sock_nested(sk);
1860 tcp_segs_in(tcp_sk(sk), skb);
1861 ret = 0;
1862 if (!sock_owned_by_user(sk)) {
1863 ret = tcp_v6_do_rcv(sk, skb);
1864 } else {
1865 if (tcp_add_backlog(sk, skb, &drop_reason))
1866 goto discard_and_relse;
1867 }
1868 bh_unlock_sock(sk);
1869 put_and_return:
1870 if (refcounted)
1871 sock_put(sk);
1872 return ret ? -1 : 0;
1873
1874 no_tcp_socket:
1875 drop_reason = SKB_DROP_REASON_NO_SOCKET;
1876 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1877 goto discard_it;
1878
1879 tcp_v6_fill_cb(skb, hdr, th);
1880
1881 if (tcp_checksum_complete(skb)) {
1882 csum_error:
1883 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1884 trace_tcp_bad_csum(skb);
1885 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1886 bad_packet:
1887 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1888 } else {
1889 tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
1890 }
1891
1892 discard_it:
1893 SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1894 sk_skb_reason_drop(sk, skb, drop_reason);
1895 return 0;
1896
1897 discard_and_relse:
1898 sk_drops_skbadd(sk, skb);
1899 if (refcounted)
1900 sock_put(sk);
1901 goto discard_it;
1902
1903 do_time_wait:
1904 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1905 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1906 inet_twsk_put(inet_twsk(sk));
1907 goto discard_it;
1908 }
1909
1910 tcp_v6_fill_cb(skb, hdr, th);
1911
1912 if (tcp_checksum_complete(skb)) {
1913 inet_twsk_put(inet_twsk(sk));
1914 goto csum_error;
1915 }
1916
1917 tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn,
1918 &drop_reason);
1919 switch (tw_status) {
1920 case TCP_TW_SYN:
1921 {
1922 struct sock *sk2;
1923
1924 sk2 = inet6_lookup_listener(net, skb, __tcp_hdrlen(th),
1925 &ipv6_hdr(skb)->saddr, th->source,
1926 &ipv6_hdr(skb)->daddr,
1927 ntohs(th->dest),
1928 tcp_v6_iif_l3_slave(skb),
1929 sdif);
1930 if (sk2) {
1931 struct inet_timewait_sock *tw = inet_twsk(sk);
1932 inet_twsk_deschedule_put(tw);
1933 sk = sk2;
1934 tcp_v6_restore_cb(skb);
1935 refcounted = false;
1936 __this_cpu_write(tcp_tw_isn, isn);
1937 goto process;
1938 }
1939
1940 drop_reason = psp_twsk_rx_policy_check(inet_twsk(sk), skb);
1941 if (drop_reason)
1942 break;
1943 }
1944 /* to ACK */
1945 fallthrough;
1946 case TCP_TW_ACK:
1947 case TCP_TW_ACK_OOW:
1948 tcp_v6_timewait_ack(sk, skb, tw_status);
1949 break;
1950 case TCP_TW_RST:
1951 tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
1952 inet_twsk_deschedule_put(inet_twsk(sk));
1953 goto discard_it;
1954 case TCP_TW_SUCCESS:
1955 ;
1956 }
1957 goto discard_it;
1958 }
1959
1960 void tcp_v6_early_demux(struct sk_buff *skb)
1961 {
1962 struct net *net = dev_net_rcu(skb->dev);
1963 const struct ipv6hdr *hdr;
1964 const struct tcphdr *th;
1965 struct sock *sk;
1966
1967 if (skb->pkt_type != PACKET_HOST)
1968 return;
1969
1970 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1971 return;
1972
1973 hdr = ipv6_hdr(skb);
1974 th = tcp_hdr(skb);
1975
1976 if (th->doff < sizeof(struct tcphdr) / 4)
1977 return;
1978
1979 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1980 sk = __inet6_lookup_established(net, &hdr->saddr, th->source,
1981 &hdr->daddr, ntohs(th->dest),
1982 inet6_iif(skb), inet6_sdif(skb));
1983 if (sk) {
1984 skb->sk = sk;
1985 skb->destructor = sock_edemux;
1986 if (sk_fullsock(sk)) {
1987 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1988
1989 if (dst)
1990 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1991 if (dst &&
1992 sk->sk_rx_dst_ifindex == skb->skb_iif)
1993 skb_dst_set_noref(skb, dst);
1994 }
1995 }
1996 }
1997
1998 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1999 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
2000 };
2001
2002 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
2003 {
2004 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
2005 }
2006
2007 const struct inet_connection_sock_af_ops ipv6_specific = {
2008 .queue_xmit = inet6_csk_xmit,
2009 .send_check = tcp_v6_send_check,
2010 .rebuild_header = inet6_sk_rebuild_header,
2011 .sk_rx_dst_set = inet6_sk_rx_dst_set,
2012 .conn_request = tcp_v6_conn_request,
2013 .syn_recv_sock = tcp_v6_syn_recv_sock,
2014 .net_header_len = sizeof(struct ipv6hdr),
2015 .setsockopt = ipv6_setsockopt,
2016 .getsockopt = ipv6_getsockopt,
2017 .mtu_reduced = tcp_v6_mtu_reduced,
2018 };
2019
2020 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2021 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
2022 #ifdef CONFIG_TCP_MD5SIG
2023 .md5_lookup = tcp_v6_md5_lookup,
2024 .calc_md5_hash = tcp_v6_md5_hash_skb,
2025 .md5_parse = tcp_v6_parse_md5_keys,
2026 #endif
2027 #ifdef CONFIG_TCP_AO
2028 .ao_lookup = tcp_v6_ao_lookup,
2029 .calc_ao_hash = tcp_v6_ao_hash_skb,
2030 .ao_parse = tcp_v6_parse_ao,
2031 .ao_calc_key_sk = tcp_v6_ao_calc_key_sk,
2032 #endif
2033 };
2034 #endif
2035
2036 /*
2037 * TCP over IPv4 via INET6 API
2038 */
2039 static const struct inet_connection_sock_af_ops ipv6_mapped = {
2040 .queue_xmit = ip_queue_xmit,
2041 .send_check = tcp_v4_send_check,
2042 .rebuild_header = inet_sk_rebuild_header,
2043 .sk_rx_dst_set = inet_sk_rx_dst_set,
2044 .conn_request = tcp_v6_conn_request,
2045 .syn_recv_sock = tcp_v6_syn_recv_sock,
2046 .net_header_len = sizeof(struct iphdr),
2047 .setsockopt = ipv6_setsockopt,
2048 .getsockopt = ipv6_getsockopt,
2049 .mtu_reduced = tcp_v4_mtu_reduced,
2050 };
2051
2052 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2053 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
2054 #ifdef CONFIG_TCP_MD5SIG
2055 .md5_lookup = tcp_v4_md5_lookup,
2056 .calc_md5_hash = tcp_v4_md5_hash_skb,
2057 .md5_parse = tcp_v6_parse_md5_keys,
2058 #endif
2059 #ifdef CONFIG_TCP_AO
2060 .ao_lookup = tcp_v6_ao_lookup,
2061 .calc_ao_hash = tcp_v4_ao_hash_skb,
2062 .ao_parse = tcp_v6_parse_ao,
2063 .ao_calc_key_sk = tcp_v4_ao_calc_key_sk,
2064 #endif
2065 };
2066
2067 static void tcp6_destruct_sock(struct sock *sk)
2068 {
2069 tcp_md5_destruct_sock(sk);
2070 tcp_ao_destroy_sock(sk, false);
2071 inet6_sock_destruct(sk);
2072 }
2073 #endif
2074
2075 /* NOTE: A lot of things set to zero explicitly by call to
2076 * sk_alloc() so need not be done here.
2077 */
2078 static int tcp_v6_init_sock(struct sock *sk)
2079 {
2080 struct inet_connection_sock *icsk = inet_csk(sk);
2081
2082 tcp_init_sock(sk);
2083
2084 icsk->icsk_af_ops = &ipv6_specific;
2085
2086 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2087 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
2088 sk->sk_destruct = tcp6_destruct_sock;
2089 #endif
2090
2091 return 0;
2092 }
2093
2094 #ifdef CONFIG_PROC_FS
2095 /* Proc filesystem TCPv6 sock list dumping. */
2096 static void get_openreq6(struct seq_file *seq,
2097 const struct request_sock *req, int i)
2098 {
2099 long ttd = req->rsk_timer.expires - jiffies;
2100 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
2101 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
2102
2103 if (ttd < 0)
2104 ttd = 0;
2105
2106 seq_printf(seq,
2107 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2108 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
2109 i,
2110 src->s6_addr32[0], src->s6_addr32[1],
2111 src->s6_addr32[2], src->s6_addr32[3],
2112 inet_rsk(req)->ir_num,
2113 dest->s6_addr32[0], dest->s6_addr32[1],
2114 dest->s6_addr32[2], dest->s6_addr32[3],
2115 ntohs(inet_rsk(req)->ir_rmt_port),
2116 TCP_SYN_RECV,
2117 0, 0, /* could print option size, but that is af dependent. */
2118 1, /* timers active (only the expire timer) */
2119 jiffies_to_clock_t(ttd),
2120 req->num_timeout,
2121 from_kuid_munged(seq_user_ns(seq),
2122 sk_uid(req->rsk_listener)),
2123 0, /* non standard timer */
2124 0, /* open_requests have no inode */
2125 0, req);
2126 }
2127
2128 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2129 {
2130 const struct in6_addr *dest, *src;
2131 __u16 destp, srcp;
2132 int timer_active;
2133 unsigned long timer_expires;
2134 const struct inet_sock *inet = inet_sk(sp);
2135 const struct tcp_sock *tp = tcp_sk(sp);
2136 const struct inet_connection_sock *icsk = inet_csk(sp);
2137 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2138 u8 icsk_pending;
2139 int rx_queue;
2140 int state;
2141
2142 dest = &sp->sk_v6_daddr;
2143 src = &sp->sk_v6_rcv_saddr;
2144 destp = ntohs(inet->inet_dport);
2145 srcp = ntohs(inet->inet_sport);
2146
2147 icsk_pending = smp_load_acquire(&icsk->icsk_pending);
2148 if (icsk_pending == ICSK_TIME_RETRANS ||
2149 icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2150 icsk_pending == ICSK_TIME_LOSS_PROBE) {
2151 timer_active = 1;
2152 timer_expires = tcp_timeout_expires(sp);
2153 } else if (icsk_pending == ICSK_TIME_PROBE0) {
2154 timer_active = 4;
2155 timer_expires = tcp_timeout_expires(sp);
2156 } else if (timer_pending(&icsk->icsk_keepalive_timer)) {
2157 timer_active = 2;
2158 timer_expires = icsk->icsk_keepalive_timer.expires;
2159 } else {
2160 timer_active = 0;
2161 timer_expires = jiffies;
2162 }
2163
2164 state = inet_sk_state_load(sp);
2165 if (state == TCP_LISTEN)
2166 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2167 else
2168 /* Because we don't lock the socket,
2169 * we might find a transient negative value.
2170 */
2171 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2172 READ_ONCE(tp->copied_seq), 0);
2173
2174 seq_printf(seq,
2175 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2176 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2177 i,
2178 src->s6_addr32[0], src->s6_addr32[1],
2179 src->s6_addr32[2], src->s6_addr32[3], srcp,
2180 dest->s6_addr32[0], dest->s6_addr32[1],
2181 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2182 state,
2183 READ_ONCE(tp->write_seq) - tp->snd_una,
2184 rx_queue,
2185 timer_active,
2186 jiffies_delta_to_clock_t(timer_expires - jiffies),
2187 READ_ONCE(icsk->icsk_retransmits),
2188 from_kuid_munged(seq_user_ns(seq), sk_uid(sp)),
2189 READ_ONCE(icsk->icsk_probes_out),
2190 sock_i_ino(sp),
2191 refcount_read(&sp->sk_refcnt), sp,
2192 jiffies_to_clock_t(icsk->icsk_rto),
2193 jiffies_to_clock_t(icsk->icsk_ack.ato),
2194 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2195 tcp_snd_cwnd(tp),
2196 state == TCP_LISTEN ?
2197 fastopenq->max_qlen :
2198 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2199 );
2200 }
2201
2202 static void get_timewait6_sock(struct seq_file *seq,
2203 struct inet_timewait_sock *tw, int i)
2204 {
2205 long delta = tw->tw_timer.expires - jiffies;
2206 const struct in6_addr *dest, *src;
2207 __u16 destp, srcp;
2208
2209 dest = &tw->tw_v6_daddr;
2210 src = &tw->tw_v6_rcv_saddr;
2211 destp = ntohs(tw->tw_dport);
2212 srcp = ntohs(tw->tw_sport);
2213
2214 seq_printf(seq,
2215 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2216 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2217 i,
2218 src->s6_addr32[0], src->s6_addr32[1],
2219 src->s6_addr32[2], src->s6_addr32[3], srcp,
2220 dest->s6_addr32[0], dest->s6_addr32[1],
2221 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2222 READ_ONCE(tw->tw_substate), 0, 0,
2223 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2224 refcount_read(&tw->tw_refcnt), tw);
2225 }
2226
2227 static int tcp6_seq_show(struct seq_file *seq, void *v)
2228 {
2229 struct tcp_iter_state *st;
2230 struct sock *sk = v;
2231
2232 if (v == SEQ_START_TOKEN) {
2233 seq_puts(seq,
2234 " sl "
2235 "local_address "
2236 "remote_address "
2237 "st tx_queue rx_queue tr tm->when retrnsmt"
2238 " uid timeout inode\n");
2239 goto out;
2240 }
2241 st = seq->private;
2242
2243 if (sk->sk_state == TCP_TIME_WAIT)
2244 get_timewait6_sock(seq, v, st->num);
2245 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2246 get_openreq6(seq, v, st->num);
2247 else
2248 get_tcp6_sock(seq, v, st->num);
2249 out:
2250 return 0;
2251 }
2252
2253 static const struct seq_operations tcp6_seq_ops = {
2254 .show = tcp6_seq_show,
2255 .start = tcp_seq_start,
2256 .next = tcp_seq_next,
2257 .stop = tcp_seq_stop,
2258 };
2259
2260 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2261 .family = AF_INET6,
2262 };
2263
2264 int __net_init tcp6_proc_init(struct net *net)
2265 {
2266 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2267 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2268 return -ENOMEM;
2269 return 0;
2270 }
2271
2272 void tcp6_proc_exit(struct net *net)
2273 {
2274 remove_proc_entry("tcp6", net->proc_net);
2275 }
2276 #endif
2277
2278 struct proto tcpv6_prot = {
2279 .name = "TCPv6",
2280 .owner = THIS_MODULE,
2281 .close = tcp_close,
2282 .pre_connect = tcp_v6_pre_connect,
2283 .connect = tcp_v6_connect,
2284 .disconnect = tcp_disconnect,
2285 .accept = inet_csk_accept,
2286 .ioctl = tcp_ioctl,
2287 .init = tcp_v6_init_sock,
2288 .destroy = tcp_v4_destroy_sock,
2289 .shutdown = tcp_shutdown,
2290 .setsockopt = tcp_setsockopt,
2291 .getsockopt = tcp_getsockopt,
2292 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
2293 .keepalive = tcp_set_keepalive,
2294 .recvmsg = tcp_recvmsg,
2295 .sendmsg = tcp_sendmsg,
2296 .splice_eof = tcp_splice_eof,
2297 .backlog_rcv = tcp_v6_do_rcv,
2298 .release_cb = tcp_release_cb,
2299 .hash = inet_hash,
2300 .unhash = inet_unhash,
2301 .get_port = inet_csk_get_port,
2302 .put_port = inet_put_port,
2303 #ifdef CONFIG_BPF_SYSCALL
2304 .psock_update_sk_prot = tcp_bpf_update_proto,
2305 #endif
2306 .enter_memory_pressure = tcp_enter_memory_pressure,
2307 .leave_memory_pressure = tcp_leave_memory_pressure,
2308 .stream_memory_free = tcp_stream_memory_free,
2309 .sockets_allocated = &tcp_sockets_allocated,
2310
2311 .memory_allocated = &net_aligned_data.tcp_memory_allocated,
2312 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
2313
2314 .memory_pressure = &tcp_memory_pressure,
2315 .sysctl_mem = sysctl_tcp_mem,
2316 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2317 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2318 .max_header = MAX_TCP_HEADER,
2319 .obj_size = sizeof(struct tcp6_sock),
2320 .freeptr_offset = offsetof(struct tcp6_sock,
2321 tcp.inet_conn.icsk_inet.sk.sk_freeptr),
2322 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2323 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2324 .twsk_prot = &tcp6_timewait_sock_ops,
2325 .rsk_prot = &tcp6_request_sock_ops,
2326 .h.hashinfo = NULL,
2327 .no_autobind = true,
2328 .diag_destroy = tcp_abort,
2329 };
2330 EXPORT_SYMBOL_GPL(tcpv6_prot);
2331
2332
2333 static struct inet_protosw tcpv6_protosw = {
2334 .type = SOCK_STREAM,
2335 .protocol = IPPROTO_TCP,
2336 .prot = &tcpv6_prot,
2337 .ops = &inet6_stream_ops,
2338 .flags = INET_PROTOSW_PERMANENT |
2339 INET_PROTOSW_ICSK,
2340 };
2341
2342 static int __net_init tcpv6_net_init(struct net *net)
2343 {
2344 int res;
2345
2346 res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2347 SOCK_RAW, IPPROTO_TCP, net);
2348 if (!res)
2349 net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC;
2350
2351 return res;
2352 }
2353
2354 static void __net_exit tcpv6_net_exit(struct net *net)
2355 {
2356 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2357 }
2358
2359 static struct pernet_operations tcpv6_net_ops = {
2360 .init = tcpv6_net_init,
2361 .exit = tcpv6_net_exit,
2362 };
2363
2364 int __init tcpv6_init(void)
2365 {
2366 int ret;
2367
2368 net_hotdata.tcpv6_protocol = (struct inet6_protocol) {
2369 .handler = tcp_v6_rcv,
2370 .err_handler = tcp_v6_err,
2371 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
2372 };
2373 ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
2374 if (ret)
2375 goto out;
2376
2377 /* register inet6 protocol */
2378 ret = inet6_register_protosw(&tcpv6_protosw);
2379 if (ret)
2380 goto out_tcpv6_protocol;
2381
2382 ret = register_pernet_subsys(&tcpv6_net_ops);
2383 if (ret)
2384 goto out_tcpv6_protosw;
2385
2386 ret = mptcpv6_init();
2387 if (ret)
2388 goto out_tcpv6_pernet_subsys;
2389
2390 out:
2391 return ret;
2392
2393 out_tcpv6_pernet_subsys:
2394 unregister_pernet_subsys(&tcpv6_net_ops);
2395 out_tcpv6_protosw:
2396 inet6_unregister_protosw(&tcpv6_protosw);
2397 out_tcpv6_protocol:
2398 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
2399 goto out;
2400 }
2401
2402 void tcpv6_exit(void)
2403 {
2404 unregister_pernet_subsys(&tcpv6_net_ops);
2405 inet6_unregister_protosw(&tcpv6_protosw);
2406 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
2407 }
2408