1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * TCP over IPv6
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on:
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
13 *
14 * Fixes:
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/aligned_data.h>
45 #include <net/tcp.h>
46 #include <net/ndisc.h>
47 #include <net/inet6_hashtables.h>
48 #include <net/inet6_connection_sock.h>
49 #include <net/ipv6.h>
50 #include <net/transp_v6.h>
51 #include <net/addrconf.h>
52 #include <net/ip6_route.h>
53 #include <net/ip6_checksum.h>
54 #include <net/inet_ecn.h>
55 #include <net/protocol.h>
56 #include <net/xfrm.h>
57 #include <net/snmp.h>
58 #include <net/dsfield.h>
59 #include <net/timewait_sock.h>
60 #include <net/inet_common.h>
61 #include <net/secure_seq.h>
62 #include <net/hotdata.h>
63 #include <net/busy_poll.h>
64 #include <net/rstreason.h>
65 #include <net/psp.h>
66
67 #include <linux/proc_fs.h>
68 #include <linux/seq_file.h>
69
70 #include <crypto/md5.h>
71 #include <crypto/utils.h>
72
73 #include <trace/events/tcp.h>
74
75 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
76 enum sk_rst_reason reason);
77 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
78 struct request_sock *req);
79
80 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
81
82 static const struct inet_connection_sock_af_ops ipv6_mapped;
83 const struct inet_connection_sock_af_ops ipv6_specific;
84 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
85 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
86 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
87 #endif
88
89 /* Helper returning the inet6 address from a given tcp socket.
90 * It can be used in TCP stack instead of inet6_sk(sk).
91 * This avoids a dereference and allow compiler optimizations.
92 * It is a specialized version of inet6_sk_generic().
93 */
94 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
95 struct tcp6_sock, tcp)->inet6)
96
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)97 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
98 {
99 struct dst_entry *dst = skb_dst(skb);
100
101 if (dst && dst_hold_safe(dst)) {
102 rcu_assign_pointer(sk->sk_rx_dst, dst);
103 sk->sk_rx_dst_ifindex = skb->skb_iif;
104 sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
105 }
106 }
107
108 INDIRECT_CALLABLE_SCOPE union tcp_seq_and_ts_off
tcp_v6_init_seq_and_ts_off(const struct net * net,const struct sk_buff * skb)109 tcp_v6_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb)
110 {
111 return secure_tcpv6_seq_and_ts_off(net,
112 ipv6_hdr(skb)->daddr.s6_addr32,
113 ipv6_hdr(skb)->saddr.s6_addr32,
114 tcp_hdr(skb)->dest,
115 tcp_hdr(skb)->source);
116 }
117
tcp_v6_pre_connect(struct sock * sk,struct sockaddr_unsized * uaddr,int addr_len)118 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
119 int addr_len)
120 {
121 /* This check is replicated from tcp_v6_connect() and intended to
122 * prevent BPF program called below from accessing bytes that are out
123 * of the bound specified by user in addr_len.
124 */
125 if (addr_len < SIN6_LEN_RFC2133)
126 return -EINVAL;
127
128 sock_owned_by_me(sk);
129
130 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
131 }
132
tcp_v6_connect(struct sock * sk,struct sockaddr_unsized * uaddr,int addr_len)133 static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
134 int addr_len)
135 {
136 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
137 struct inet_connection_sock *icsk = inet_csk(sk);
138 struct inet_timewait_death_row *tcp_death_row;
139 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
140 struct in6_addr *saddr = NULL, *final_p;
141 struct inet_sock *inet = inet_sk(sk);
142 struct tcp_sock *tp = tcp_sk(sk);
143 struct net *net = sock_net(sk);
144 struct ipv6_txoptions *opt;
145 struct dst_entry *dst;
146 struct flowi6 *fl6;
147 int addr_type;
148 int err;
149
150 if (addr_len < SIN6_LEN_RFC2133)
151 return -EINVAL;
152
153 if (usin->sin6_family != AF_INET6)
154 return -EAFNOSUPPORT;
155
156 fl6 = &inet_sk(sk)->cork.fl.u.ip6;
157 memset(fl6, 0, sizeof(*fl6));
158
159 if (inet6_test_bit(SNDFLOW, sk)) {
160 fl6->flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
161 IP6_ECN_flow_init(fl6->flowlabel);
162 if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) {
163 struct ip6_flowlabel *flowlabel;
164 flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
165 if (IS_ERR(flowlabel))
166 return -EINVAL;
167 fl6_sock_release(flowlabel);
168 }
169 }
170
171 /*
172 * connect() to INADDR_ANY means loopback (BSD'ism).
173 */
174
175 if (ipv6_addr_any(&usin->sin6_addr)) {
176 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
177 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
178 &usin->sin6_addr);
179 else
180 usin->sin6_addr = in6addr_loopback;
181 }
182
183 addr_type = ipv6_addr_type(&usin->sin6_addr);
184
185 if (addr_type & IPV6_ADDR_MULTICAST)
186 return -ENETUNREACH;
187
188 if (addr_type&IPV6_ADDR_LINKLOCAL) {
189 if (addr_len >= sizeof(struct sockaddr_in6) &&
190 usin->sin6_scope_id) {
191 /* If interface is set while binding, indices
192 * must coincide.
193 */
194 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
195 return -EINVAL;
196
197 sk->sk_bound_dev_if = usin->sin6_scope_id;
198 }
199
200 /* Connect to link-local address requires an interface */
201 if (!sk->sk_bound_dev_if)
202 return -EINVAL;
203 }
204
205 if (tp->rx_opt.ts_recent_stamp &&
206 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
207 tp->rx_opt.ts_recent = 0;
208 tp->rx_opt.ts_recent_stamp = 0;
209 WRITE_ONCE(tp->write_seq, 0);
210 }
211
212 sk->sk_v6_daddr = usin->sin6_addr;
213 np->flow_label = fl6->flowlabel;
214
215 /*
216 * TCP over IPv4
217 */
218
219 if (addr_type & IPV6_ADDR_MAPPED) {
220 u32 exthdrlen = icsk->icsk_ext_hdr_len;
221 struct sockaddr_in sin;
222
223 if (ipv6_only_sock(sk))
224 return -ENETUNREACH;
225
226 sin.sin_family = AF_INET;
227 sin.sin_port = usin->sin6_port;
228 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
229
230 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
231 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
232 if (sk_is_mptcp(sk))
233 mptcpv6_handle_mapped(sk, true);
234 sk->sk_backlog_rcv = tcp_v4_do_rcv;
235 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
236 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
237 #endif
238
239 err = tcp_v4_connect(sk, (struct sockaddr_unsized *)&sin, sizeof(sin));
240
241 if (err) {
242 icsk->icsk_ext_hdr_len = exthdrlen;
243 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
244 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
245 if (sk_is_mptcp(sk))
246 mptcpv6_handle_mapped(sk, false);
247 sk->sk_backlog_rcv = tcp_v6_do_rcv;
248 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
249 tp->af_specific = &tcp_sock_ipv6_specific;
250 #endif
251 goto failure;
252 }
253 np->saddr = sk->sk_v6_rcv_saddr;
254
255 return err;
256 }
257
258 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
259 saddr = &sk->sk_v6_rcv_saddr;
260
261 fl6->flowi6_proto = IPPROTO_TCP;
262 fl6->daddr = sk->sk_v6_daddr;
263 fl6->saddr = saddr ? *saddr : np->saddr;
264 fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
265 fl6->flowi6_oif = sk->sk_bound_dev_if;
266 fl6->flowi6_mark = sk->sk_mark;
267 fl6->fl6_dport = usin->sin6_port;
268 fl6->fl6_sport = inet->inet_sport;
269 if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6->fl6_sport)
270 fl6->flowi6_flags = FLOWI_FLAG_ANY_SPORT;
271 fl6->flowi6_uid = sk_uid(sk);
272
273 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
274 final_p = fl6_update_dst(fl6, opt, &np->final);
275
276 security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
277
278 dst = ip6_dst_lookup_flow(net, sk, fl6, final_p);
279 if (IS_ERR(dst)) {
280 err = PTR_ERR(dst);
281 goto failure;
282 }
283
284 tp->tcp_usec_ts = dst_tcp_usec_ts(dst);
285 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
286
287 if (!saddr) {
288 saddr = &fl6->saddr;
289
290 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
291 if (err) {
292 dst_release(dst);
293 goto failure;
294 }
295 }
296
297 /* set the source address */
298 np->saddr = *saddr;
299 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
300
301 sk->sk_gso_type = SKB_GSO_TCPV6;
302 ip6_dst_store(sk, dst, false, false);
303
304 icsk->icsk_ext_hdr_len = psp_sk_overhead(sk);
305 if (opt)
306 icsk->icsk_ext_hdr_len += opt->opt_flen +
307 opt->opt_nflen;
308
309 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
310
311 inet->inet_dport = usin->sin6_port;
312
313 tcp_set_state(sk, TCP_SYN_SENT);
314 err = inet6_hash_connect(tcp_death_row, sk);
315 if (err)
316 goto late_failure;
317
318 sk_set_txhash(sk);
319
320 if (likely(!tp->repair)) {
321 union tcp_seq_and_ts_off st;
322
323 st = secure_tcpv6_seq_and_ts_off(net,
324 np->saddr.s6_addr32,
325 sk->sk_v6_daddr.s6_addr32,
326 inet->inet_sport,
327 inet->inet_dport);
328 if (!tp->write_seq)
329 WRITE_ONCE(tp->write_seq, st.seq);
330 WRITE_ONCE(tp->tsoffset, st.ts_off);
331 }
332
333 if (tcp_fastopen_defer_connect(sk, &err))
334 return err;
335 if (err)
336 goto late_failure;
337
338 err = tcp_connect(sk);
339 if (err)
340 goto late_failure;
341
342 return 0;
343
344 late_failure:
345 tcp_set_state(sk, TCP_CLOSE);
346 inet_bhash2_reset_saddr(sk);
347 failure:
348 inet->inet_dport = 0;
349 sk->sk_route_caps = 0;
350 return err;
351 }
352
inet6_csk_update_pmtu(struct sock * sk,u32 mtu)353 static struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu)
354 {
355 struct flowi6 *fl6 = &inet_sk(sk)->cork.fl.u.ip6;
356 struct dst_entry *dst;
357
358 dst = inet6_csk_route_socket(sk, fl6);
359
360 if (IS_ERR(dst))
361 return NULL;
362 dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
363
364 dst = inet6_csk_route_socket(sk, fl6);
365 return IS_ERR(dst) ? NULL : dst;
366 }
367
tcp_v6_mtu_reduced(struct sock * sk)368 static void tcp_v6_mtu_reduced(struct sock *sk)
369 {
370 struct dst_entry *dst;
371 u32 mtu, dmtu;
372
373 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
374 return;
375
376 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
377
378 /* Drop requests trying to increase our current mss.
379 * Check done in __ip6_rt_update_pmtu() is too late.
380 */
381 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
382 return;
383
384 dst = inet6_csk_update_pmtu(sk, mtu);
385 if (!dst)
386 return;
387
388 dmtu = dst6_mtu(dst);
389 if (inet_csk(sk)->icsk_pmtu_cookie > dmtu) {
390 tcp_sync_mss(sk, dmtu);
391 tcp_simple_retransmit(sk);
392 }
393 }
394
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)395 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
396 u8 type, u8 code, int offset, __be32 info)
397 {
398 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
399 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
400 struct net *net = dev_net_rcu(skb->dev);
401 struct request_sock *fastopen;
402 struct ipv6_pinfo *np;
403 struct tcp_sock *tp;
404 __u32 seq, snd_una;
405 struct sock *sk;
406 bool fatal;
407 int err;
408
409 sk = __inet6_lookup_established(net, &hdr->daddr, th->dest,
410 &hdr->saddr, ntohs(th->source),
411 skb->dev->ifindex, inet6_sdif(skb));
412
413 if (!sk) {
414 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
415 ICMP6_MIB_INERRORS);
416 return -ENOENT;
417 }
418
419 if (sk->sk_state == TCP_TIME_WAIT) {
420 /* To increase the counter of ignored icmps for TCP-AO */
421 tcp_ao_ignore_icmp(sk, AF_INET6, type, code);
422 inet_twsk_put(inet_twsk(sk));
423 return 0;
424 }
425 seq = ntohl(th->seq);
426 fatal = icmpv6_err_convert(type, code, &err);
427 if (sk->sk_state == TCP_NEW_SYN_RECV) {
428 tcp_req_err(sk, seq, fatal);
429 return 0;
430 }
431
432 if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) {
433 sock_put(sk);
434 return 0;
435 }
436
437 bh_lock_sock(sk);
438 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
439 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
440
441 if (sk->sk_state == TCP_CLOSE)
442 goto out;
443
444 if (static_branch_unlikely(&ip6_min_hopcount)) {
445 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
446 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
447 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
448 goto out;
449 }
450 }
451
452 tp = tcp_sk(sk);
453 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
454 fastopen = rcu_dereference(tp->fastopen_rsk);
455 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
456 if (sk->sk_state != TCP_LISTEN &&
457 !between(seq, snd_una, tp->snd_nxt)) {
458 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
459 goto out;
460 }
461
462 np = tcp_inet6_sk(sk);
463
464 if (type == NDISC_REDIRECT) {
465 if (!sock_owned_by_user(sk)) {
466 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
467
468 if (dst)
469 dst->ops->redirect(dst, sk, skb);
470 }
471 goto out;
472 }
473
474 if (type == ICMPV6_PKT_TOOBIG) {
475 u32 mtu = ntohl(info);
476
477 /* We are not interested in TCP_LISTEN and open_requests
478 * (SYN-ACKs send out by Linux are always <576bytes so
479 * they should go through unfragmented).
480 */
481 if (sk->sk_state == TCP_LISTEN)
482 goto out;
483
484 if (!ip6_sk_accept_pmtu(sk))
485 goto out;
486
487 if (mtu < IPV6_MIN_MTU)
488 goto out;
489
490 WRITE_ONCE(tp->mtu_info, mtu);
491
492 if (!sock_owned_by_user(sk))
493 tcp_v6_mtu_reduced(sk);
494 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
495 &sk->sk_tsq_flags))
496 sock_hold(sk);
497 goto out;
498 }
499
500
501 /* Might be for an request_sock */
502 switch (sk->sk_state) {
503 case TCP_SYN_SENT:
504 case TCP_SYN_RECV:
505 /* Only in fast or simultaneous open. If a fast open socket is
506 * already accepted it is treated as a connected one below.
507 */
508 if (fastopen && !fastopen->sk)
509 break;
510
511 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
512
513 if (!sock_owned_by_user(sk))
514 tcp_done_with_error(sk, err);
515 else
516 WRITE_ONCE(sk->sk_err_soft, err);
517 goto out;
518 case TCP_LISTEN:
519 break;
520 default:
521 /* check if this ICMP message allows revert of backoff.
522 * (see RFC 6069)
523 */
524 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
525 code == ICMPV6_NOROUTE)
526 tcp_ld_RTO_revert(sk, seq);
527 }
528
529 if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
530 WRITE_ONCE(sk->sk_err, err);
531 sk_error_report(sk);
532 } else {
533 WRITE_ONCE(sk->sk_err_soft, err);
534 }
535 out:
536 bh_unlock_sock(sk);
537 sock_put(sk);
538 return 0;
539 }
540
541
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)542 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
543 struct flowi *fl,
544 struct request_sock *req,
545 struct tcp_fastopen_cookie *foc,
546 enum tcp_synack_type synack_type,
547 struct sk_buff *syn_skb)
548 {
549 struct inet_request_sock *ireq = inet_rsk(req);
550 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
551 struct ipv6_txoptions *opt;
552 struct flowi6 *fl6 = &fl->u.ip6;
553 struct sk_buff *skb;
554 int err = -ENOMEM;
555 u8 tclass;
556
557 /* First, grab a route. */
558 if (!dst && (dst = inet6_csk_route_req(sk, NULL, fl6, req,
559 IPPROTO_TCP)) == NULL)
560 goto done;
561
562 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
563
564 if (skb) {
565 tcp_rsk(req)->syn_ect_snt = np->tclass & INET_ECN_MASK;
566 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
567 &ireq->ir_v6_rmt_addr);
568
569 fl6->daddr = ireq->ir_v6_rmt_addr;
570 if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts)
571 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
572
573 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
574 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
575 (np->tclass & INET_ECN_MASK) :
576 np->tclass;
577
578 if (!INET_ECN_is_capable(tclass) &&
579 tcp_bpf_ca_needs_ecn((struct sock *)req))
580 tclass |= INET_ECN_ECT_0;
581
582 rcu_read_lock();
583 opt = ireq->ipv6_opt;
584 if (!opt)
585 opt = rcu_dereference(np->opt);
586 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
587 opt, tclass, READ_ONCE(sk->sk_priority));
588 rcu_read_unlock();
589 err = net_xmit_eval(err);
590 }
591
592 done:
593 return err;
594 }
595
596
tcp_v6_reqsk_destructor(struct request_sock * req)597 static void tcp_v6_reqsk_destructor(struct request_sock *req)
598 {
599 kfree(inet_rsk(req)->ipv6_opt);
600 consume_skb(inet_rsk(req)->pktopts);
601 }
602
603 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)604 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
605 const struct in6_addr *addr,
606 int l3index)
607 {
608 return tcp_md5_do_lookup(sk, l3index,
609 (union tcp_md5_addr *)addr, AF_INET6);
610 }
611
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)612 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
613 const struct sock *addr_sk)
614 {
615 int l3index;
616
617 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
618 addr_sk->sk_bound_dev_if);
619 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
620 l3index);
621 }
622
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)623 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
624 sockptr_t optval, int optlen)
625 {
626 struct tcp_md5sig cmd;
627 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
628 union tcp_ao_addr *addr;
629 int l3index = 0;
630 u8 prefixlen;
631 bool l3flag;
632 u8 flags;
633
634 if (optlen < sizeof(cmd))
635 return -EINVAL;
636
637 if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
638 return -EFAULT;
639
640 if (sin6->sin6_family != AF_INET6)
641 return -EINVAL;
642
643 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
644 l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
645
646 if (optname == TCP_MD5SIG_EXT &&
647 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
648 prefixlen = cmd.tcpm_prefixlen;
649 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
650 prefixlen > 32))
651 return -EINVAL;
652 } else {
653 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
654 }
655
656 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
657 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
658 struct net_device *dev;
659
660 rcu_read_lock();
661 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
662 if (dev && netif_is_l3_master(dev))
663 l3index = dev->ifindex;
664 rcu_read_unlock();
665
666 /* ok to reference set/not set outside of rcu;
667 * right now device MUST be an L3 master
668 */
669 if (!dev || !l3index)
670 return -EINVAL;
671 }
672
673 if (!cmd.tcpm_keylen) {
674 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
675 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
676 AF_INET, prefixlen,
677 l3index, flags);
678 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
679 AF_INET6, prefixlen, l3index, flags);
680 }
681
682 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
683 return -EINVAL;
684
685 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
686 addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3];
687
688 /* Don't allow keys for peers that have a matching TCP-AO key.
689 * See the comment in tcp_ao_add_cmd()
690 */
691 if (tcp_ao_required(sk, addr, AF_INET,
692 l3flag ? l3index : -1, false))
693 return -EKEYREJECTED;
694 return tcp_md5_do_add(sk, addr,
695 AF_INET, prefixlen, l3index, flags,
696 cmd.tcpm_key, cmd.tcpm_keylen);
697 }
698
699 addr = (union tcp_md5_addr *)&sin6->sin6_addr;
700
701 /* Don't allow keys for peers that have a matching TCP-AO key.
702 * See the comment in tcp_ao_add_cmd()
703 */
704 if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false))
705 return -EKEYREJECTED;
706
707 return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags,
708 cmd.tcpm_key, cmd.tcpm_keylen);
709 }
710
tcp_v6_md5_hash_headers(struct md5_ctx * ctx,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)711 static void tcp_v6_md5_hash_headers(struct md5_ctx *ctx,
712 const struct in6_addr *daddr,
713 const struct in6_addr *saddr,
714 const struct tcphdr *th, int nbytes)
715 {
716 struct {
717 struct tcp6_pseudohdr ip; /* TCP pseudo-header (RFC2460) */
718 struct tcphdr tcp;
719 } h;
720
721 h.ip.saddr = *saddr;
722 h.ip.daddr = *daddr;
723 h.ip.protocol = cpu_to_be32(IPPROTO_TCP);
724 h.ip.len = cpu_to_be32(nbytes);
725 h.tcp = *th;
726 h.tcp.check = 0;
727 md5_update(ctx, (const u8 *)&h, sizeof(h.ip) + sizeof(h.tcp));
728 }
729
730 static noinline_for_stack void
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)731 tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
732 const struct in6_addr *daddr, struct in6_addr *saddr,
733 const struct tcphdr *th)
734 {
735 struct md5_ctx ctx;
736
737 md5_init(&ctx);
738 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, th->doff << 2);
739 tcp_md5_hash_key(&ctx, key);
740 md5_final(&ctx, md5_hash);
741 }
742
743 static noinline_for_stack void
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)744 tcp_v6_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
745 const struct sock *sk, const struct sk_buff *skb)
746 {
747 const struct tcphdr *th = tcp_hdr(skb);
748 const struct in6_addr *saddr, *daddr;
749 struct md5_ctx ctx;
750
751 if (sk) { /* valid for establish/request sockets */
752 saddr = &sk->sk_v6_rcv_saddr;
753 daddr = &sk->sk_v6_daddr;
754 } else {
755 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
756 saddr = &ip6h->saddr;
757 daddr = &ip6h->daddr;
758 }
759
760 md5_init(&ctx);
761 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, skb->len);
762 tcp_md5_hash_skb_data(&ctx, skb, th->doff << 2);
763 tcp_md5_hash_key(&ctx, key);
764 md5_final(&ctx, md5_hash);
765 }
766 #endif
767
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb,u32 tw_isn)768 static void tcp_v6_init_req(struct request_sock *req,
769 const struct sock *sk_listener,
770 struct sk_buff *skb,
771 u32 tw_isn)
772 {
773 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
774 struct inet_request_sock *ireq = inet_rsk(req);
775 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
776
777 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
778 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
779 ireq->ir_rmt_addr = LOOPBACK4_IPV6;
780 ireq->ir_loc_addr = LOOPBACK4_IPV6;
781
782 /* So that link locals have meaning */
783 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
784 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
785 ireq->ir_iif = tcp_v6_iif(skb);
786
787 if (!tw_isn &&
788 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
789 np->rxopt.bits.rxinfo ||
790 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
791 np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) {
792 refcount_inc(&skb->users);
793 ireq->pktopts = skb;
794 }
795 }
796
tcp_v6_route_req(const struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct request_sock * req,u32 tw_isn)797 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
798 struct sk_buff *skb,
799 struct flowi *fl,
800 struct request_sock *req,
801 u32 tw_isn)
802 {
803 tcp_v6_init_req(req, sk, skb, tw_isn);
804
805 if (security_inet_conn_request(sk, skb, req))
806 return NULL;
807
808 return inet6_csk_route_req(sk, NULL, &fl->u.ip6, req, IPPROTO_TCP);
809 }
810
811 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
812 .family = AF_INET6,
813 .obj_size = sizeof(struct tcp6_request_sock),
814 .send_ack = tcp_v6_reqsk_send_ack,
815 .destructor = tcp_v6_reqsk_destructor,
816 .send_reset = tcp_v6_send_reset,
817 };
818
819 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
820 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
821 sizeof(struct ipv6hdr),
822 #ifdef CONFIG_TCP_MD5SIG
823 .req_md5_lookup = tcp_v6_md5_lookup,
824 .calc_md5_hash = tcp_v6_md5_hash_skb,
825 #endif
826 #ifdef CONFIG_TCP_AO
827 .ao_lookup = tcp_v6_ao_lookup_rsk,
828 .ao_calc_key = tcp_v6_ao_calc_key_rsk,
829 .ao_synack_hash = tcp_v6_ao_synack_hash,
830 #endif
831 #ifdef CONFIG_SYN_COOKIES
832 .cookie_init_seq = cookie_v6_init_sequence,
833 #endif
834 .route_req = tcp_v6_route_req,
835 .init_seq_and_ts_off = tcp_v6_init_seq_and_ts_off,
836 .send_synack = tcp_v6_send_synack,
837 };
838
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,int rst,u8 tclass,__be32 label,u32 priority,u32 txhash,struct tcp_key * key)839 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
840 u32 ack, u32 win, u32 tsval, u32 tsecr,
841 int oif, int rst, u8 tclass, __be32 label,
842 u32 priority, u32 txhash, struct tcp_key *key)
843 {
844 struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb);
845 unsigned int tot_len = sizeof(struct tcphdr);
846 struct sock *ctl_sk = net->ipv6.tcp_sk;
847 const struct tcphdr *th = tcp_hdr(skb);
848 __be32 mrst = 0, *topt;
849 struct dst_entry *dst;
850 struct sk_buff *buff;
851 struct tcphdr *t1;
852 struct flowi6 fl6;
853 u32 mark = 0;
854
855 if (tsecr)
856 tot_len += TCPOLEN_TSTAMP_ALIGNED;
857 if (tcp_key_is_md5(key))
858 tot_len += TCPOLEN_MD5SIG_ALIGNED;
859 if (tcp_key_is_ao(key))
860 tot_len += tcp_ao_len_aligned(key->ao_key);
861
862 #ifdef CONFIG_MPTCP
863 if (rst && !tcp_key_is_md5(key)) {
864 mrst = mptcp_reset_option(skb);
865
866 if (mrst)
867 tot_len += sizeof(__be32);
868 }
869 #endif
870
871 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
872 if (!buff)
873 return;
874
875 skb_reserve(buff, MAX_TCP_HEADER);
876
877 t1 = skb_push(buff, tot_len);
878 skb_reset_transport_header(buff);
879
880 /* Swap the send and the receive. */
881 memset(t1, 0, sizeof(*t1));
882 t1->dest = th->source;
883 t1->source = th->dest;
884 t1->doff = tot_len / 4;
885 t1->seq = htonl(seq);
886 t1->ack_seq = htonl(ack);
887 t1->ack = !rst || !th->ack;
888 t1->rst = rst;
889 t1->window = htons(win);
890
891 topt = (__be32 *)(t1 + 1);
892
893 if (tsecr) {
894 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
895 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
896 *topt++ = htonl(tsval);
897 *topt++ = htonl(tsecr);
898 }
899
900 if (mrst)
901 *topt++ = mrst;
902
903 #ifdef CONFIG_TCP_MD5SIG
904 if (tcp_key_is_md5(key)) {
905 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
906 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
907 tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key,
908 &ipv6_hdr(skb)->saddr,
909 &ipv6_hdr(skb)->daddr, t1);
910 }
911 #endif
912 #ifdef CONFIG_TCP_AO
913 if (tcp_key_is_ao(key)) {
914 *topt++ = htonl((TCPOPT_AO << 24) |
915 (tcp_ao_len(key->ao_key) << 16) |
916 (key->ao_key->sndid << 8) |
917 (key->rcv_next));
918
919 tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key,
920 key->traffic_key,
921 (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr,
922 (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr,
923 t1, key->sne);
924 }
925 #endif
926
927 memset(&fl6, 0, sizeof(fl6));
928 fl6.daddr = ipv6_hdr(skb)->saddr;
929 fl6.saddr = ipv6_hdr(skb)->daddr;
930 fl6.flowlabel = label;
931
932 buff->ip_summed = CHECKSUM_PARTIAL;
933
934 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
935
936 fl6.flowi6_proto = IPPROTO_TCP;
937 if (rt6_need_strict(&fl6.daddr) && !oif)
938 fl6.flowi6_oif = tcp_v6_iif(skb);
939 else {
940 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
941 oif = skb->skb_iif;
942
943 fl6.flowi6_oif = oif;
944 }
945
946 if (sk) {
947 /* unconstify the socket only to attach it to buff with care. */
948 skb_set_owner_edemux(buff, (struct sock *)sk);
949 psp_reply_set_decrypted(sk, buff);
950
951 if (sk->sk_state == TCP_TIME_WAIT)
952 mark = inet_twsk(sk)->tw_mark;
953 else
954 mark = READ_ONCE(sk->sk_mark);
955 skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC);
956 }
957 if (txhash) {
958 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
959 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
960 }
961 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
962 fl6.fl6_dport = t1->dest;
963 fl6.fl6_sport = t1->source;
964 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
965 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
966
967 /* Pass a socket to ip6_dst_lookup either it is for RST
968 * Underlying function will use this to retrieve the network
969 * namespace
970 */
971 if (sk && sk->sk_state != TCP_TIME_WAIT)
972 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
973 else
974 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
975 if (!IS_ERR(dst)) {
976 skb_dst_set(buff, dst);
977 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
978 tclass, priority);
979 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
980 if (rst)
981 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
982 return;
983 }
984
985 kfree_skb(buff);
986 }
987
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb,enum sk_rst_reason reason)988 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
989 enum sk_rst_reason reason)
990 {
991 const struct tcphdr *th = tcp_hdr(skb);
992 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
993 const __u8 *md5_hash_location = NULL;
994 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
995 bool allocated_traffic_key = false;
996 #endif
997 const struct tcp_ao_hdr *aoh;
998 struct tcp_key key = {};
999 u32 seq = 0, ack_seq = 0;
1000 __be32 label = 0;
1001 u32 priority = 0;
1002 struct net *net;
1003 u32 txhash = 0;
1004 int oif = 0;
1005 #ifdef CONFIG_TCP_MD5SIG
1006 unsigned char newhash[16];
1007 struct sock *sk1 = NULL;
1008 #endif
1009
1010 if (th->rst)
1011 return;
1012
1013 /* If sk not NULL, it means we did a successful lookup and incoming
1014 * route had to be correct. prequeue might have dropped our dst.
1015 */
1016 if (!sk && !ipv6_unicast_destination(skb))
1017 return;
1018
1019 net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb);
1020 /* Invalid TCP option size or twice included auth */
1021 if (tcp_parse_auth_options(th, &md5_hash_location, &aoh))
1022 return;
1023 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1024 rcu_read_lock();
1025 #endif
1026 #ifdef CONFIG_TCP_MD5SIG
1027 if (sk && sk_fullsock(sk)) {
1028 int l3index;
1029
1030 /* sdif set, means packet ingressed via a device
1031 * in an L3 domain and inet_iif is set to it.
1032 */
1033 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1034 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1035 if (key.md5_key)
1036 key.type = TCP_KEY_MD5;
1037 } else if (md5_hash_location) {
1038 int dif = tcp_v6_iif_l3_slave(skb);
1039 int sdif = tcp_v6_sdif(skb);
1040 int l3index;
1041
1042 /*
1043 * active side is lost. Try to find listening socket through
1044 * source port, and then find md5 key through listening socket.
1045 * we are not loose security here:
1046 * Incoming packet is checked with md5 hash with finding key,
1047 * no RST generated if md5 hash doesn't match.
1048 */
1049 sk1 = inet6_lookup_listener(net, NULL, 0, &ipv6h->saddr, th->source,
1050 &ipv6h->daddr, ntohs(th->source),
1051 dif, sdif);
1052 if (!sk1)
1053 goto out;
1054
1055 /* sdif set, means packet ingressed via a device
1056 * in an L3 domain and dif is set to it.
1057 */
1058 l3index = tcp_v6_sdif(skb) ? dif : 0;
1059
1060 key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1061 if (!key.md5_key)
1062 goto out;
1063 key.type = TCP_KEY_MD5;
1064
1065 tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb);
1066 if (crypto_memneq(md5_hash_location, newhash, 16))
1067 goto out;
1068 }
1069 #endif
1070
1071 if (th->ack)
1072 seq = ntohl(th->ack_seq);
1073 else
1074 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1075 (th->doff << 2);
1076
1077 #ifdef CONFIG_TCP_AO
1078 if (aoh) {
1079 int l3index;
1080
1081 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1082 if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq,
1083 &key.ao_key, &key.traffic_key,
1084 &allocated_traffic_key,
1085 &key.rcv_next, &key.sne))
1086 goto out;
1087 key.type = TCP_KEY_AO;
1088 }
1089 #endif
1090
1091 if (sk) {
1092 oif = sk->sk_bound_dev_if;
1093 if (sk_fullsock(sk)) {
1094 if (inet6_test_bit(REPFLOW, sk))
1095 label = ip6_flowlabel(ipv6h);
1096 priority = READ_ONCE(sk->sk_priority);
1097 txhash = sk->sk_txhash;
1098 }
1099 if (sk->sk_state == TCP_TIME_WAIT) {
1100 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1101 priority = inet_twsk(sk)->tw_priority;
1102 txhash = inet_twsk(sk)->tw_txhash;
1103 }
1104 } else {
1105 if (READ_ONCE(net->ipv6.sysctl.flowlabel_reflect) &
1106 FLOWLABEL_REFLECT_TCP_RESET)
1107 label = ip6_flowlabel(ipv6h);
1108 }
1109
1110 trace_tcp_send_reset(sk, skb, reason);
1111
1112 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
1113 ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK,
1114 label, priority, txhash,
1115 &key);
1116
1117 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1118 out:
1119 if (allocated_traffic_key)
1120 kfree(key.traffic_key);
1121 rcu_read_unlock();
1122 #endif
1123 }
1124
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_key * key,u8 tclass,__be32 label,u32 priority,u32 txhash)1125 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1126 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1127 struct tcp_key *key, u8 tclass,
1128 __be32 label, u32 priority, u32 txhash)
1129 {
1130 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0,
1131 tclass, label, priority, txhash, key);
1132 }
1133
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb,enum tcp_tw_status tw_status)1134 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb,
1135 enum tcp_tw_status tw_status)
1136 {
1137 struct inet_timewait_sock *tw = inet_twsk(sk);
1138 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1139 u8 tclass = tw->tw_tclass;
1140 struct tcp_key key = {};
1141
1142 if (tw_status == TCP_TW_ACK_OOW)
1143 tclass &= ~INET_ECN_MASK;
1144 #ifdef CONFIG_TCP_AO
1145 struct tcp_ao_info *ao_info;
1146
1147 if (static_branch_unlikely(&tcp_ao_needed.key)) {
1148
1149 /* FIXME: the segment to-be-acked is not verified yet */
1150 ao_info = rcu_dereference(tcptw->ao_info);
1151 if (ao_info) {
1152 const struct tcp_ao_hdr *aoh;
1153
1154 /* Invalid TCP option size or twice included auth */
1155 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
1156 goto out;
1157 if (aoh)
1158 key.ao_key = tcp_ao_established_key(sk, ao_info,
1159 aoh->rnext_keyid, -1);
1160 }
1161 }
1162 if (key.ao_key) {
1163 struct tcp_ao_key *rnext_key;
1164
1165 key.traffic_key = snd_other_key(key.ao_key);
1166 /* rcv_next switches to our rcv_next */
1167 rnext_key = READ_ONCE(ao_info->rnext_key);
1168 key.rcv_next = rnext_key->rcvid;
1169 key.sne = READ_ONCE(ao_info->snd_sne);
1170 key.type = TCP_KEY_AO;
1171 #else
1172 if (0) {
1173 #endif
1174 #ifdef CONFIG_TCP_MD5SIG
1175 } else if (static_branch_unlikely(&tcp_md5_needed.key)) {
1176 key.md5_key = tcp_twsk_md5_key(tcptw);
1177 if (key.md5_key)
1178 key.type = TCP_KEY_MD5;
1179 #endif
1180 }
1181
1182 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt,
1183 READ_ONCE(tcptw->tw_rcv_nxt),
1184 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1185 tcp_tw_tsval(tcptw),
1186 READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if,
1187 &key, tclass, cpu_to_be32(tw->tw_flowlabel),
1188 tw->tw_priority, tw->tw_txhash);
1189
1190 #ifdef CONFIG_TCP_AO
1191 out:
1192 #endif
1193 inet_twsk_put(tw);
1194 }
1195
1196 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1197 struct request_sock *req)
1198 {
1199 struct tcp_key key = {};
1200
1201 #ifdef CONFIG_TCP_AO
1202 if (static_branch_unlikely(&tcp_ao_needed.key) &&
1203 tcp_rsk_used_ao(req)) {
1204 const struct in6_addr *addr = &ipv6_hdr(skb)->saddr;
1205 const struct tcp_ao_hdr *aoh;
1206 int l3index;
1207
1208 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1209 /* Invalid TCP option size or twice included auth */
1210 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
1211 return;
1212 if (!aoh)
1213 return;
1214 key.ao_key = tcp_ao_do_lookup(sk, l3index,
1215 (union tcp_ao_addr *)addr,
1216 AF_INET6, aoh->rnext_keyid, -1);
1217 if (unlikely(!key.ao_key)) {
1218 /* Send ACK with any matching MKT for the peer */
1219 key.ao_key = tcp_ao_do_lookup(sk, l3index,
1220 (union tcp_ao_addr *)addr,
1221 AF_INET6, -1, -1);
1222 /* Matching key disappeared (user removed the key?)
1223 * let the handshake timeout.
1224 */
1225 if (!key.ao_key) {
1226 net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n",
1227 addr,
1228 ntohs(tcp_hdr(skb)->source),
1229 &ipv6_hdr(skb)->daddr,
1230 ntohs(tcp_hdr(skb)->dest));
1231 return;
1232 }
1233 }
1234 key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC);
1235 if (!key.traffic_key)
1236 return;
1237
1238 key.type = TCP_KEY_AO;
1239 key.rcv_next = aoh->keyid;
1240 tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req);
1241 #else
1242 if (0) {
1243 #endif
1244 #ifdef CONFIG_TCP_MD5SIG
1245 } else if (static_branch_unlikely(&tcp_md5_needed.key)) {
1246 int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1247
1248 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr,
1249 l3index);
1250 if (key.md5_key)
1251 key.type = TCP_KEY_MD5;
1252 #endif
1253 }
1254
1255 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1256 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1257 */
1258 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1259 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1260 tcp_rsk(req)->rcv_nxt,
1261 tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
1262 tcp_rsk_tsval(tcp_rsk(req)),
1263 req->ts_recent, sk->sk_bound_dev_if,
1264 &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK,
1265 0,
1266 READ_ONCE(sk->sk_priority),
1267 READ_ONCE(tcp_rsk(req)->txhash));
1268 if (tcp_key_is_ao(&key))
1269 kfree(key.traffic_key);
1270 }
1271
1272
1273 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1274 {
1275 #ifdef CONFIG_SYN_COOKIES
1276 const struct tcphdr *th = tcp_hdr(skb);
1277
1278 if (!th->syn)
1279 sk = cookie_v6_check(sk, skb);
1280 #endif
1281 return sk;
1282 }
1283
1284 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1285 struct tcphdr *th, u32 *cookie)
1286 {
1287 u16 mss = 0;
1288 #ifdef CONFIG_SYN_COOKIES
1289 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1290 &tcp_request_sock_ipv6_ops, sk, th);
1291 if (mss) {
1292 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1293 tcp_synq_overflow(sk);
1294 }
1295 #endif
1296 return mss;
1297 }
1298
1299 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1300 {
1301 if (skb->protocol == htons(ETH_P_IP))
1302 return tcp_v4_conn_request(sk, skb);
1303
1304 if (!ipv6_unicast_destination(skb))
1305 goto drop;
1306
1307 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1308 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1309 return 0;
1310 }
1311
1312 return tcp_conn_request(&tcp6_request_sock_ops,
1313 &tcp_request_sock_ipv6_ops, sk, skb);
1314
1315 drop:
1316 tcp_listendrop(sk);
1317 return 0; /* don't send reset */
1318 }
1319
1320 static void tcp_v6_restore_cb(struct sk_buff *skb)
1321 {
1322 /* We need to move header back to the beginning if xfrm6_policy_check()
1323 * and tcp_v6_fill_cb() are going to be called again.
1324 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1325 */
1326 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1327 sizeof(struct inet6_skb_parm));
1328 }
1329
1330 /* Called from tcp_v4_syn_recv_sock() for v6_mapped children. */
1331 static void tcp_v6_mapped_child_init(struct sock *newsk, const struct sock *sk)
1332 {
1333 struct inet_sock *newinet = inet_sk(newsk);
1334 struct ipv6_pinfo *newnp;
1335
1336 newinet->pinet6 = newnp = tcp_inet6_sk(newsk);
1337 newinet->ipv6_fl_list = NULL;
1338
1339 memcpy(newnp, tcp_inet6_sk(sk), sizeof(struct ipv6_pinfo));
1340
1341 newnp->saddr = newsk->sk_v6_rcv_saddr;
1342
1343 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1344 if (sk_is_mptcp(newsk))
1345 mptcpv6_handle_mapped(newsk, true);
1346 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1347 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1348 tcp_sk(newsk)->af_specific = &tcp_sock_ipv6_mapped_specific;
1349 #endif
1350
1351 newnp->ipv6_mc_list = NULL;
1352 newnp->ipv6_ac_list = NULL;
1353 newnp->pktoptions = NULL;
1354 newnp->opt = NULL;
1355
1356 /* tcp_v4_syn_recv_sock() has initialized newinet->mc_{index,ttl} */
1357 newnp->mcast_oif = newinet->mc_index;
1358 newnp->mcast_hops = newinet->mc_ttl;
1359
1360 newnp->rcv_flowinfo = 0;
1361 if (inet6_test_bit(REPFLOW, sk))
1362 newnp->flow_label = 0;
1363 }
1364
1365 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1366 struct request_sock *req,
1367 struct dst_entry *dst,
1368 struct request_sock *req_unhash,
1369 bool *own_req,
1370 void (*opt_child_init)(struct sock *newsk,
1371 const struct sock *sk))
1372 {
1373 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1374 struct inet_request_sock *ireq;
1375 struct ipv6_txoptions *opt;
1376 struct inet_sock *newinet;
1377 bool found_dup_sk = false;
1378 struct ipv6_pinfo *newnp;
1379 struct tcp_sock *newtp;
1380 struct sock *newsk;
1381 #ifdef CONFIG_TCP_MD5SIG
1382 struct tcp_md5sig_key *key;
1383 int l3index;
1384 #endif
1385 struct flowi6 fl6;
1386
1387 if (skb->protocol == htons(ETH_P_IP))
1388 return tcp_v4_syn_recv_sock(sk, skb, req, dst,
1389 req_unhash, own_req,
1390 tcp_v6_mapped_child_init);
1391 ireq = inet_rsk(req);
1392
1393 if (sk_acceptq_is_full(sk))
1394 goto exit_overflow;
1395
1396 dst = inet6_csk_route_req(sk, dst, &fl6, req, IPPROTO_TCP);
1397 if (!dst)
1398 goto exit;
1399
1400 newsk = tcp_create_openreq_child(sk, req, skb);
1401 if (!newsk)
1402 goto exit_nonewsk;
1403
1404 /*
1405 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1406 * count here, tcp_create_openreq_child now does this for us, see the
1407 * comment in that function for the gory details. -acme
1408 */
1409
1410 newsk->sk_gso_type = SKB_GSO_TCPV6;
1411 inet6_sk_rx_dst_set(newsk, skb);
1412
1413 newinet = inet_sk(newsk);
1414 newinet->cork.fl.u.ip6 = fl6;
1415 newinet->pinet6 = tcp_inet6_sk(newsk);
1416 newinet->ipv6_fl_list = NULL;
1417 newinet->inet_opt = NULL;
1418
1419 newtp = tcp_sk(newsk);
1420 newnp = tcp_inet6_sk(newsk);
1421
1422 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1423
1424 ip6_dst_store(newsk, dst, false, false);
1425
1426 newnp->saddr = ireq->ir_v6_loc_addr;
1427
1428 /* Now IPv6 options...
1429
1430 First: no IPv4 options.
1431 */
1432 newnp->ipv6_mc_list = NULL;
1433 newnp->ipv6_ac_list = NULL;
1434
1435 /* Clone RX bits */
1436 newnp->rxopt.all = np->rxopt.all;
1437
1438 newnp->pktoptions = NULL;
1439 newnp->opt = NULL;
1440 newnp->mcast_oif = tcp_v6_iif(skb);
1441 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1442 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1443 if (inet6_test_bit(REPFLOW, sk))
1444 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1445
1446 /* Set ToS of the new socket based upon the value of incoming SYN.
1447 * ECT bits are set later in tcp_init_transfer().
1448 */
1449 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1450 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1451
1452 /* Clone native IPv6 options from listening socket (if any)
1453
1454 Yes, keeping reference count would be much more clever,
1455 but we make one more one thing there: reattach optmem
1456 to newsk.
1457 */
1458 opt = ireq->ipv6_opt;
1459 if (!opt)
1460 opt = rcu_dereference(np->opt);
1461 if (opt) {
1462 opt = ipv6_dup_options(newsk, opt);
1463 RCU_INIT_POINTER(newnp->opt, opt);
1464 }
1465 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1466 if (opt)
1467 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1468 opt->opt_flen;
1469
1470 tcp_ca_openreq_child(newsk, dst);
1471
1472 tcp_sync_mss(newsk, dst6_mtu(dst));
1473 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1474
1475 tcp_initialize_rcv_mss(newsk);
1476
1477 #ifdef CONFIG_TCP_MD5SIG
1478 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1479
1480 if (!tcp_rsk_used_ao(req)) {
1481 /* Copy over the MD5 key from the original socket */
1482 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1483 if (key) {
1484 const union tcp_md5_addr *addr;
1485
1486 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1487 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key))
1488 goto put_and_exit;
1489 }
1490 }
1491 #endif
1492 #ifdef CONFIG_TCP_AO
1493 /* Copy over tcp_ao_info if any */
1494 if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6))
1495 goto put_and_exit; /* OOM */
1496 #endif
1497
1498 if (__inet_inherit_port(sk, newsk) < 0)
1499 goto put_and_exit;
1500 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1501 &found_dup_sk);
1502 if (*own_req) {
1503 tcp_move_syn(newtp, req);
1504
1505 /* Clone pktoptions received with SYN, if we own the req */
1506 if (ireq->pktopts) {
1507 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1508 consume_skb(ireq->pktopts);
1509 ireq->pktopts = NULL;
1510 if (newnp->pktoptions)
1511 tcp_v6_restore_cb(newnp->pktoptions);
1512 }
1513 } else {
1514 if (!req_unhash && found_dup_sk) {
1515 /* This code path should only be executed in the
1516 * syncookie case only
1517 */
1518 bh_unlock_sock(newsk);
1519 sock_put(newsk);
1520 newsk = NULL;
1521 }
1522 }
1523
1524 return newsk;
1525
1526 exit_overflow:
1527 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1528 exit_nonewsk:
1529 dst_release(dst);
1530 exit:
1531 tcp_listendrop(sk);
1532 return NULL;
1533 put_and_exit:
1534 inet_csk_prepare_forced_close(newsk);
1535 tcp_done(newsk);
1536 goto exit;
1537 }
1538
1539 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1540 u32));
1541 /* The socket must have it's spinlock held when we get
1542 * here, unless it is a TCP_LISTEN socket.
1543 *
1544 * We have a potential double-lock case here, so even when
1545 * doing backlog processing we use the BH locking scheme.
1546 * This is because we cannot sleep with the original spinlock
1547 * held.
1548 */
1549 INDIRECT_CALLABLE_SCOPE
1550 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1551 {
1552 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1553 struct sk_buff *opt_skb = NULL;
1554 enum skb_drop_reason reason;
1555 struct tcp_sock *tp;
1556
1557 /* Imagine: socket is IPv6. IPv4 packet arrives,
1558 goes to IPv4 receive handler and backlogged.
1559 From backlog it always goes here. Kerboom...
1560 Fortunately, tcp_rcv_established and rcv_established
1561 handle them correctly, but it is not case with
1562 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1563 */
1564
1565 if (skb->protocol == htons(ETH_P_IP))
1566 return tcp_v4_do_rcv(sk, skb);
1567
1568 reason = psp_sk_rx_policy_check(sk, skb);
1569 if (reason)
1570 goto err_discard;
1571
1572 /*
1573 * socket locking is here for SMP purposes as backlog rcv
1574 * is currently called with bh processing disabled.
1575 */
1576
1577 /* Do Stevens' IPV6_PKTOPTIONS.
1578
1579 Yes, guys, it is the only place in our code, where we
1580 may make it not affecting IPv4.
1581 The rest of code is protocol independent,
1582 and I do not like idea to uglify IPv4.
1583
1584 Actually, all the idea behind IPV6_PKTOPTIONS
1585 looks not very well thought. For now we latch
1586 options, received in the last packet, enqueued
1587 by tcp. Feel free to propose better solution.
1588 --ANK (980728)
1589 */
1590 if (np->rxopt.all && sk->sk_state != TCP_LISTEN)
1591 opt_skb = skb_clone_and_charge_r(skb, sk);
1592
1593 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1594 struct dst_entry *dst;
1595
1596 dst = rcu_dereference_protected(sk->sk_rx_dst,
1597 lockdep_sock_is_held(sk));
1598
1599 sock_rps_save_rxhash(sk, skb);
1600 sk_mark_napi_id(sk, skb);
1601 if (dst && unlikely(dst != skb_dst(skb))) {
1602 if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1603 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1604 dst, sk->sk_rx_dst_cookie) == NULL) {
1605 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1606 dst_release(dst);
1607 }
1608 }
1609
1610 tcp_rcv_established(sk, skb);
1611 if (opt_skb)
1612 goto ipv6_pktoptions;
1613 return 0;
1614 }
1615
1616 if (tcp_checksum_complete(skb))
1617 goto csum_err;
1618
1619 if (sk->sk_state == TCP_LISTEN) {
1620 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1621
1622 if (!nsk)
1623 return 0;
1624 if (nsk != sk) {
1625 reason = tcp_child_process(sk, nsk, skb);
1626 sock_put(nsk);
1627 if (reason)
1628 goto reset;
1629 return 0;
1630 }
1631 } else
1632 sock_rps_save_rxhash(sk, skb);
1633
1634 reason = tcp_rcv_state_process(sk, skb);
1635 if (reason)
1636 goto reset;
1637 if (opt_skb)
1638 goto ipv6_pktoptions;
1639 return 0;
1640
1641 reset:
1642 tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason));
1643 discard:
1644 if (opt_skb)
1645 __kfree_skb(opt_skb);
1646 sk_skb_reason_drop(sk, skb, reason);
1647 return 0;
1648 csum_err:
1649 reason = SKB_DROP_REASON_TCP_CSUM;
1650 trace_tcp_bad_csum(skb);
1651 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1652 err_discard:
1653 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1654 goto discard;
1655
1656
1657 ipv6_pktoptions:
1658 /* Do you ask, what is it?
1659
1660 1. skb was enqueued by tcp.
1661 2. skb is added to tail of read queue, rather than out of order.
1662 3. socket is not in passive state.
1663 4. Finally, it really contains options, which user wants to receive.
1664 */
1665 tp = tcp_sk(sk);
1666 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1667 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1668 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1669 WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb));
1670 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1671 WRITE_ONCE(np->mcast_hops,
1672 ipv6_hdr(opt_skb)->hop_limit);
1673 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1674 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1675 if (inet6_test_bit(REPFLOW, sk))
1676 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1677 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1678 tcp_v6_restore_cb(opt_skb);
1679 opt_skb = xchg(&np->pktoptions, opt_skb);
1680 } else {
1681 __kfree_skb(opt_skb);
1682 opt_skb = xchg(&np->pktoptions, NULL);
1683 }
1684 }
1685
1686 consume_skb(opt_skb);
1687 return 0;
1688 }
1689
1690 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1691 const struct tcphdr *th)
1692 {
1693 /* This is tricky: we move IP6CB at its correct location into
1694 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1695 * _decode_session6() uses IP6CB().
1696 * barrier() makes sure compiler won't play aliasing games.
1697 */
1698 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1699 sizeof(struct inet6_skb_parm));
1700 barrier();
1701
1702 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1703 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1704 skb->len - th->doff*4);
1705 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1706 TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th);
1707 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1708 TCP_SKB_CB(skb)->sacked = 0;
1709 TCP_SKB_CB(skb)->has_rxtstamp =
1710 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1711 }
1712
1713 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1714 {
1715 struct net *net = dev_net_rcu(skb->dev);
1716 enum skb_drop_reason drop_reason;
1717 enum tcp_tw_status tw_status;
1718 int sdif = inet6_sdif(skb);
1719 int dif = inet6_iif(skb);
1720 const struct tcphdr *th;
1721 const struct ipv6hdr *hdr;
1722 struct sock *sk = NULL;
1723 bool refcounted;
1724 int ret;
1725 u32 isn;
1726
1727 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1728 if (skb->pkt_type != PACKET_HOST)
1729 goto discard_it;
1730
1731 /*
1732 * Count it even if it's bad.
1733 */
1734 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1735
1736 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1737 goto discard_it;
1738
1739 th = (const struct tcphdr *)skb->data;
1740
1741 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1742 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1743 goto bad_packet;
1744 }
1745 if (!pskb_may_pull(skb, th->doff*4))
1746 goto discard_it;
1747
1748 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1749 goto csum_error;
1750
1751 th = (const struct tcphdr *)skb->data;
1752 hdr = ipv6_hdr(skb);
1753
1754 lookup:
1755 sk = __inet6_lookup_skb(skb, __tcp_hdrlen(th),
1756 th->source, th->dest, inet6_iif(skb), sdif,
1757 &refcounted);
1758 if (!sk)
1759 goto no_tcp_socket;
1760
1761 if (sk->sk_state == TCP_TIME_WAIT)
1762 goto do_time_wait;
1763
1764 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1765 struct request_sock *req = inet_reqsk(sk);
1766 bool req_stolen = false;
1767 struct sock *nsk;
1768
1769 sk = req->rsk_listener;
1770 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1771 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1772 else
1773 drop_reason = tcp_inbound_hash(sk, req, skb,
1774 &hdr->saddr, &hdr->daddr,
1775 AF_INET6, dif, sdif);
1776 if (drop_reason) {
1777 sk_drops_skbadd(sk, skb);
1778 reqsk_put(req);
1779 goto discard_it;
1780 }
1781 if (tcp_checksum_complete(skb)) {
1782 reqsk_put(req);
1783 goto csum_error;
1784 }
1785 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1786 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1787 if (!nsk) {
1788 inet_csk_reqsk_queue_drop_and_put(sk, req);
1789 goto lookup;
1790 }
1791 sk = nsk;
1792 /* reuseport_migrate_sock() has already held one sk_refcnt
1793 * before returning.
1794 */
1795 } else {
1796 sock_hold(sk);
1797 }
1798 refcounted = true;
1799 nsk = NULL;
1800 drop_reason = tcp_filter(sk, skb);
1801 if (!drop_reason) {
1802 th = (const struct tcphdr *)skb->data;
1803 hdr = ipv6_hdr(skb);
1804 tcp_v6_fill_cb(skb, hdr, th);
1805 nsk = tcp_check_req(sk, skb, req, false, &req_stolen,
1806 &drop_reason);
1807 }
1808 if (!nsk) {
1809 reqsk_put(req);
1810 if (req_stolen) {
1811 /* Another cpu got exclusive access to req
1812 * and created a full blown socket.
1813 * Try to feed this packet to this socket
1814 * instead of discarding it.
1815 */
1816 tcp_v6_restore_cb(skb);
1817 sock_put(sk);
1818 goto lookup;
1819 }
1820 goto discard_and_relse;
1821 }
1822 nf_reset_ct(skb);
1823 if (nsk == sk) {
1824 reqsk_put(req);
1825 tcp_v6_restore_cb(skb);
1826 } else {
1827 drop_reason = tcp_child_process(sk, nsk, skb);
1828 if (drop_reason) {
1829 enum sk_rst_reason rst_reason;
1830
1831 rst_reason = sk_rst_convert_drop_reason(drop_reason);
1832 tcp_v6_send_reset(nsk, skb, rst_reason);
1833 sock_put(nsk);
1834 goto discard_and_relse;
1835 }
1836 sock_put(nsk);
1837 sock_put(sk);
1838 return 0;
1839 }
1840 }
1841
1842 process:
1843 if (static_branch_unlikely(&ip6_min_hopcount)) {
1844 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1845 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1846 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1847 drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1848 goto discard_and_relse;
1849 }
1850 }
1851
1852 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1853 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1854 goto discard_and_relse;
1855 }
1856
1857 drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr,
1858 AF_INET6, dif, sdif);
1859 if (drop_reason)
1860 goto discard_and_relse;
1861
1862 nf_reset_ct(skb);
1863
1864 drop_reason = tcp_filter(sk, skb);
1865 if (drop_reason)
1866 goto discard_and_relse;
1867
1868 th = (const struct tcphdr *)skb->data;
1869 hdr = ipv6_hdr(skb);
1870 tcp_v6_fill_cb(skb, hdr, th);
1871
1872 skb->dev = NULL;
1873
1874 if (sk->sk_state == TCP_LISTEN) {
1875 ret = tcp_v6_do_rcv(sk, skb);
1876 goto put_and_return;
1877 }
1878
1879 sk_incoming_cpu_update(sk);
1880
1881 bh_lock_sock_nested(sk);
1882 tcp_segs_in(tcp_sk(sk), skb);
1883 ret = 0;
1884 if (!sock_owned_by_user(sk)) {
1885 ret = tcp_v6_do_rcv(sk, skb);
1886 } else {
1887 drop_reason = tcp_add_backlog(sk, skb);
1888 if (drop_reason)
1889 goto discard_and_relse;
1890 }
1891 bh_unlock_sock(sk);
1892 put_and_return:
1893 if (refcounted)
1894 sock_put(sk);
1895 return ret ? -1 : 0;
1896
1897 no_tcp_socket:
1898 drop_reason = SKB_DROP_REASON_NO_SOCKET;
1899 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1900 goto discard_it;
1901
1902 tcp_v6_fill_cb(skb, hdr, th);
1903
1904 if (tcp_checksum_complete(skb)) {
1905 csum_error:
1906 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1907 trace_tcp_bad_csum(skb);
1908 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1909 bad_packet:
1910 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1911 } else {
1912 tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
1913 }
1914
1915 discard_it:
1916 SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1917 sk_skb_reason_drop(sk, skb, drop_reason);
1918 return 0;
1919
1920 discard_and_relse:
1921 sk_drops_skbadd(sk, skb);
1922 if (refcounted)
1923 sock_put(sk);
1924 goto discard_it;
1925
1926 do_time_wait:
1927 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1928 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1929 inet_twsk_put(inet_twsk(sk));
1930 goto discard_it;
1931 }
1932
1933 tcp_v6_fill_cb(skb, hdr, th);
1934
1935 if (tcp_checksum_complete(skb)) {
1936 inet_twsk_put(inet_twsk(sk));
1937 goto csum_error;
1938 }
1939
1940 tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn,
1941 &drop_reason);
1942 switch (tw_status) {
1943 case TCP_TW_SYN:
1944 {
1945 struct sock *sk2;
1946
1947 sk2 = inet6_lookup_listener(net, skb, __tcp_hdrlen(th),
1948 &ipv6_hdr(skb)->saddr, th->source,
1949 &ipv6_hdr(skb)->daddr,
1950 ntohs(th->dest),
1951 tcp_v6_iif_l3_slave(skb),
1952 sdif);
1953 if (sk2) {
1954 struct inet_timewait_sock *tw = inet_twsk(sk);
1955 inet_twsk_deschedule_put(tw);
1956 sk = sk2;
1957 tcp_v6_restore_cb(skb);
1958 refcounted = false;
1959 __this_cpu_write(tcp_tw_isn, isn);
1960 goto process;
1961 }
1962
1963 drop_reason = psp_twsk_rx_policy_check(inet_twsk(sk), skb);
1964 if (drop_reason)
1965 break;
1966 }
1967 /* to ACK */
1968 fallthrough;
1969 case TCP_TW_ACK:
1970 case TCP_TW_ACK_OOW:
1971 tcp_v6_timewait_ack(sk, skb, tw_status);
1972 break;
1973 case TCP_TW_RST:
1974 tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
1975 inet_twsk_deschedule_put(inet_twsk(sk));
1976 goto discard_it;
1977 case TCP_TW_SUCCESS:
1978 ;
1979 }
1980 goto discard_it;
1981 }
1982
1983 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1984 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1985 };
1986
1987 const struct inet_connection_sock_af_ops ipv6_specific = {
1988 .queue_xmit = inet6_csk_xmit,
1989 .rebuild_header = inet6_sk_rebuild_header,
1990 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1991 .conn_request = tcp_v6_conn_request,
1992 .syn_recv_sock = tcp_v6_syn_recv_sock,
1993 .net_header_len = sizeof(struct ipv6hdr),
1994 .setsockopt = ipv6_setsockopt,
1995 .getsockopt = ipv6_getsockopt,
1996 .mtu_reduced = tcp_v6_mtu_reduced,
1997 };
1998
1999 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2000 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
2001 #ifdef CONFIG_TCP_MD5SIG
2002 .md5_lookup = tcp_v6_md5_lookup,
2003 .calc_md5_hash = tcp_v6_md5_hash_skb,
2004 .md5_parse = tcp_v6_parse_md5_keys,
2005 #endif
2006 #ifdef CONFIG_TCP_AO
2007 .ao_lookup = tcp_v6_ao_lookup,
2008 .calc_ao_hash = tcp_v6_ao_hash_skb,
2009 .ao_parse = tcp_v6_parse_ao,
2010 .ao_calc_key_sk = tcp_v6_ao_calc_key_sk,
2011 #endif
2012 };
2013 #endif
2014
2015 /*
2016 * TCP over IPv4 via INET6 API
2017 */
2018 static const struct inet_connection_sock_af_ops ipv6_mapped = {
2019 .queue_xmit = ip_queue_xmit,
2020 .rebuild_header = inet_sk_rebuild_header,
2021 .sk_rx_dst_set = inet_sk_rx_dst_set,
2022 .conn_request = tcp_v6_conn_request,
2023 .syn_recv_sock = tcp_v6_syn_recv_sock,
2024 .net_header_len = sizeof(struct iphdr),
2025 .setsockopt = ipv6_setsockopt,
2026 .getsockopt = ipv6_getsockopt,
2027 .mtu_reduced = tcp_v4_mtu_reduced,
2028 };
2029
2030 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2031 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
2032 #ifdef CONFIG_TCP_MD5SIG
2033 .md5_lookup = tcp_v4_md5_lookup,
2034 .calc_md5_hash = tcp_v4_md5_hash_skb,
2035 .md5_parse = tcp_v6_parse_md5_keys,
2036 #endif
2037 #ifdef CONFIG_TCP_AO
2038 .ao_lookup = tcp_v6_ao_lookup,
2039 .calc_ao_hash = tcp_v4_ao_hash_skb,
2040 .ao_parse = tcp_v6_parse_ao,
2041 .ao_calc_key_sk = tcp_v4_ao_calc_key_sk,
2042 #endif
2043 };
2044
2045 static void tcp6_destruct_sock(struct sock *sk)
2046 {
2047 tcp_md5_destruct_sock(sk);
2048 tcp_ao_destroy_sock(sk, false);
2049 inet6_sock_destruct(sk);
2050 }
2051 #endif
2052
2053 /* NOTE: A lot of things set to zero explicitly by call to
2054 * sk_alloc() so need not be done here.
2055 */
2056 static int tcp_v6_init_sock(struct sock *sk)
2057 {
2058 struct inet_connection_sock *icsk = inet_csk(sk);
2059
2060 tcp_init_sock(sk);
2061
2062 icsk->icsk_af_ops = &ipv6_specific;
2063
2064 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2065 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
2066 sk->sk_destruct = tcp6_destruct_sock;
2067 #endif
2068
2069 return 0;
2070 }
2071
2072 #ifdef CONFIG_PROC_FS
2073 /* Proc filesystem TCPv6 sock list dumping. */
2074 static void get_openreq6(struct seq_file *seq,
2075 const struct request_sock *req, int i)
2076 {
2077 long ttd = req->rsk_timer.expires - jiffies;
2078 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
2079 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
2080
2081 if (ttd < 0)
2082 ttd = 0;
2083
2084 seq_printf(seq,
2085 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2086 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
2087 i,
2088 src->s6_addr32[0], src->s6_addr32[1],
2089 src->s6_addr32[2], src->s6_addr32[3],
2090 inet_rsk(req)->ir_num,
2091 dest->s6_addr32[0], dest->s6_addr32[1],
2092 dest->s6_addr32[2], dest->s6_addr32[3],
2093 ntohs(inet_rsk(req)->ir_rmt_port),
2094 TCP_SYN_RECV,
2095 0, 0, /* could print option size, but that is af dependent. */
2096 1, /* timers active (only the expire timer) */
2097 jiffies_to_clock_t(ttd),
2098 req->num_timeout,
2099 from_kuid_munged(seq_user_ns(seq),
2100 sk_uid(req->rsk_listener)),
2101 0, /* non standard timer */
2102 0, /* open_requests have no inode */
2103 0, req);
2104 }
2105
2106 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2107 {
2108 const struct in6_addr *dest, *src;
2109 __u16 destp, srcp;
2110 int timer_active;
2111 unsigned long timer_expires;
2112 const struct inet_sock *inet = inet_sk(sp);
2113 const struct tcp_sock *tp = tcp_sk(sp);
2114 const struct inet_connection_sock *icsk = inet_csk(sp);
2115 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2116 u8 icsk_pending;
2117 int rx_queue;
2118 int state;
2119
2120 dest = &sp->sk_v6_daddr;
2121 src = &sp->sk_v6_rcv_saddr;
2122 destp = ntohs(inet->inet_dport);
2123 srcp = ntohs(inet->inet_sport);
2124
2125 icsk_pending = smp_load_acquire(&icsk->icsk_pending);
2126 if (icsk_pending == ICSK_TIME_RETRANS ||
2127 icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2128 icsk_pending == ICSK_TIME_LOSS_PROBE) {
2129 timer_active = 1;
2130 timer_expires = tcp_timeout_expires(sp);
2131 } else if (icsk_pending == ICSK_TIME_PROBE0) {
2132 timer_active = 4;
2133 timer_expires = tcp_timeout_expires(sp);
2134 } else if (timer_pending(&icsk->icsk_keepalive_timer)) {
2135 timer_active = 2;
2136 timer_expires = icsk->icsk_keepalive_timer.expires;
2137 } else {
2138 timer_active = 0;
2139 timer_expires = jiffies;
2140 }
2141
2142 state = inet_sk_state_load(sp);
2143 if (state == TCP_LISTEN)
2144 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2145 else
2146 /* Because we don't lock the socket,
2147 * we might find a transient negative value.
2148 */
2149 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2150 READ_ONCE(tp->copied_seq), 0);
2151
2152 seq_printf(seq,
2153 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2154 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %llu %d %pK %lu %lu %u %u %d\n",
2155 i,
2156 src->s6_addr32[0], src->s6_addr32[1],
2157 src->s6_addr32[2], src->s6_addr32[3], srcp,
2158 dest->s6_addr32[0], dest->s6_addr32[1],
2159 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2160 state,
2161 READ_ONCE(tp->write_seq) - tp->snd_una,
2162 rx_queue,
2163 timer_active,
2164 jiffies_delta_to_clock_t(timer_expires - jiffies),
2165 READ_ONCE(icsk->icsk_retransmits),
2166 from_kuid_munged(seq_user_ns(seq), sk_uid(sp)),
2167 READ_ONCE(icsk->icsk_probes_out),
2168 sock_i_ino(sp),
2169 refcount_read(&sp->sk_refcnt), sp,
2170 jiffies_to_clock_t(icsk->icsk_rto),
2171 jiffies_to_clock_t(icsk->icsk_ack.ato),
2172 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2173 tcp_snd_cwnd(tp),
2174 state == TCP_LISTEN ?
2175 fastopenq->max_qlen :
2176 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2177 );
2178 }
2179
2180 static void get_timewait6_sock(struct seq_file *seq,
2181 struct inet_timewait_sock *tw, int i)
2182 {
2183 long delta = tw->tw_timer.expires - jiffies;
2184 const struct in6_addr *dest, *src;
2185 __u16 destp, srcp;
2186
2187 dest = &tw->tw_v6_daddr;
2188 src = &tw->tw_v6_rcv_saddr;
2189 destp = ntohs(tw->tw_dport);
2190 srcp = ntohs(tw->tw_sport);
2191
2192 seq_printf(seq,
2193 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2194 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2195 i,
2196 src->s6_addr32[0], src->s6_addr32[1],
2197 src->s6_addr32[2], src->s6_addr32[3], srcp,
2198 dest->s6_addr32[0], dest->s6_addr32[1],
2199 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2200 READ_ONCE(tw->tw_substate), 0, 0,
2201 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2202 refcount_read(&tw->tw_refcnt), tw);
2203 }
2204
2205 static int tcp6_seq_show(struct seq_file *seq, void *v)
2206 {
2207 struct tcp_iter_state *st;
2208 struct sock *sk = v;
2209
2210 if (v == SEQ_START_TOKEN) {
2211 seq_puts(seq,
2212 " sl "
2213 "local_address "
2214 "remote_address "
2215 "st tx_queue rx_queue tr tm->when retrnsmt"
2216 " uid timeout inode\n");
2217 goto out;
2218 }
2219 st = seq->private;
2220
2221 if (sk->sk_state == TCP_TIME_WAIT)
2222 get_timewait6_sock(seq, v, st->num);
2223 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2224 get_openreq6(seq, v, st->num);
2225 else
2226 get_tcp6_sock(seq, v, st->num);
2227 out:
2228 return 0;
2229 }
2230
2231 static const struct seq_operations tcp6_seq_ops = {
2232 .show = tcp6_seq_show,
2233 .start = tcp_seq_start,
2234 .next = tcp_seq_next,
2235 .stop = tcp_seq_stop,
2236 };
2237
2238 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2239 .family = AF_INET6,
2240 };
2241
2242 int __net_init tcp6_proc_init(struct net *net)
2243 {
2244 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2245 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2246 return -ENOMEM;
2247 return 0;
2248 }
2249
2250 void tcp6_proc_exit(struct net *net)
2251 {
2252 remove_proc_entry("tcp6", net->proc_net);
2253 }
2254 #endif
2255
2256 struct proto tcpv6_prot = {
2257 .name = "TCPv6",
2258 .owner = THIS_MODULE,
2259 .close = tcp_close,
2260 .pre_connect = tcp_v6_pre_connect,
2261 .connect = tcp_v6_connect,
2262 .disconnect = tcp_disconnect,
2263 .accept = inet_csk_accept,
2264 .ioctl = tcp_ioctl,
2265 .init = tcp_v6_init_sock,
2266 .destroy = tcp_v4_destroy_sock,
2267 .shutdown = tcp_shutdown,
2268 .setsockopt = tcp_setsockopt,
2269 .getsockopt = tcp_getsockopt,
2270 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
2271 .keepalive = tcp_set_keepalive,
2272 .recvmsg = tcp_recvmsg,
2273 .sendmsg = tcp_sendmsg,
2274 .splice_eof = tcp_splice_eof,
2275 .backlog_rcv = tcp_v6_do_rcv,
2276 .release_cb = tcp_release_cb,
2277 .hash = inet_hash,
2278 .unhash = inet_unhash,
2279 .get_port = inet_csk_get_port,
2280 .put_port = inet_put_port,
2281 #ifdef CONFIG_BPF_SYSCALL
2282 .psock_update_sk_prot = tcp_bpf_update_proto,
2283 #endif
2284 .enter_memory_pressure = tcp_enter_memory_pressure,
2285 .leave_memory_pressure = tcp_leave_memory_pressure,
2286 .stream_memory_free = tcp_stream_memory_free,
2287 .sockets_allocated = &tcp_sockets_allocated,
2288
2289 .memory_allocated = &net_aligned_data.tcp_memory_allocated,
2290 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
2291
2292 .memory_pressure = &tcp_memory_pressure,
2293 .sysctl_mem = sysctl_tcp_mem,
2294 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2295 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2296 .max_header = MAX_TCP_HEADER,
2297 .obj_size = sizeof(struct tcp6_sock),
2298 .freeptr_offset = offsetof(struct tcp6_sock,
2299 tcp.inet_conn.icsk_inet.sk.sk_freeptr),
2300 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2301 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2302 .twsk_prot = &tcp6_timewait_sock_ops,
2303 .rsk_prot = &tcp6_request_sock_ops,
2304 .h.hashinfo = NULL,
2305 .no_autobind = true,
2306 .diag_destroy = tcp_abort,
2307 };
2308 EXPORT_SYMBOL_GPL(tcpv6_prot);
2309
2310
2311 static struct inet_protosw tcpv6_protosw = {
2312 .type = SOCK_STREAM,
2313 .protocol = IPPROTO_TCP,
2314 .prot = &tcpv6_prot,
2315 .ops = &inet6_stream_ops,
2316 .flags = INET_PROTOSW_PERMANENT |
2317 INET_PROTOSW_ICSK,
2318 };
2319
2320 static int __net_init tcpv6_net_init(struct net *net)
2321 {
2322 int res;
2323
2324 res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2325 SOCK_RAW, IPPROTO_TCP, net);
2326 if (!res)
2327 net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC;
2328
2329 return res;
2330 }
2331
2332 static void __net_exit tcpv6_net_exit(struct net *net)
2333 {
2334 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2335 }
2336
2337 static struct pernet_operations tcpv6_net_ops = {
2338 .init = tcpv6_net_init,
2339 .exit = tcpv6_net_exit,
2340 };
2341
2342 int __init tcpv6_init(void)
2343 {
2344 int ret;
2345
2346 net_hotdata.tcpv6_protocol = (struct inet6_protocol) {
2347 .handler = tcp_v6_rcv,
2348 .err_handler = tcp_v6_err,
2349 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
2350 };
2351 ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
2352 if (ret)
2353 goto out;
2354
2355 /* register inet6 protocol */
2356 ret = inet6_register_protosw(&tcpv6_protosw);
2357 if (ret)
2358 goto out_tcpv6_protocol;
2359
2360 ret = register_pernet_subsys(&tcpv6_net_ops);
2361 if (ret)
2362 goto out_tcpv6_protosw;
2363
2364 ret = mptcpv6_init();
2365 if (ret)
2366 goto out_tcpv6_pernet_subsys;
2367
2368 out:
2369 return ret;
2370
2371 out_tcpv6_pernet_subsys:
2372 unregister_pernet_subsys(&tcpv6_net_ops);
2373 out_tcpv6_protosw:
2374 inet6_unregister_protosw(&tcpv6_protosw);
2375 out_tcpv6_protocol:
2376 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
2377 goto out;
2378 }
2379
2380 void tcpv6_exit(void)
2381 {
2382 unregister_pernet_subsys(&tcpv6_net_ops);
2383 inet6_unregister_protosw(&tcpv6_protosw);
2384 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
2385 }
2386