1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/aligned_data.h> 45 #include <net/tcp.h> 46 #include <net/ndisc.h> 47 #include <net/inet6_hashtables.h> 48 #include <net/inet6_connection_sock.h> 49 #include <net/ipv6.h> 50 #include <net/transp_v6.h> 51 #include <net/addrconf.h> 52 #include <net/ip6_route.h> 53 #include <net/ip6_checksum.h> 54 #include <net/inet_ecn.h> 55 #include <net/protocol.h> 56 #include <net/xfrm.h> 57 #include <net/snmp.h> 58 #include <net/dsfield.h> 59 #include <net/timewait_sock.h> 60 #include <net/inet_common.h> 61 #include <net/secure_seq.h> 62 #include <net/hotdata.h> 63 #include <net/busy_poll.h> 64 #include <net/rstreason.h> 65 #include <net/psp.h> 66 67 #include <linux/proc_fs.h> 68 #include <linux/seq_file.h> 69 70 #include <crypto/md5.h> 71 #include <crypto/utils.h> 72 73 #include <trace/events/tcp.h> 74 75 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 76 enum sk_rst_reason reason); 77 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 78 struct request_sock *req); 79 80 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 81 82 static const struct inet_connection_sock_af_ops ipv6_mapped; 83 const struct inet_connection_sock_af_ops ipv6_specific; 84 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 85 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 86 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 87 #endif 88 89 /* Helper returning the inet6 address from a given tcp socket. 90 * It can be used in TCP stack instead of inet6_sk(sk). 91 * This avoids a dereference and allow compiler optimizations. 92 * It is a specialized version of inet6_sk_generic(). 93 */ 94 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \ 95 struct tcp6_sock, tcp)->inet6) 96 97 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 98 { 99 struct dst_entry *dst = skb_dst(skb); 100 101 if (dst && dst_hold_safe(dst)) { 102 rcu_assign_pointer(sk->sk_rx_dst, dst); 103 sk->sk_rx_dst_ifindex = skb->skb_iif; 104 sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); 105 } 106 } 107 108 static union tcp_seq_and_ts_off 109 tcp_v6_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb) 110 { 111 return secure_tcpv6_seq_and_ts_off(net, 112 ipv6_hdr(skb)->daddr.s6_addr32, 113 ipv6_hdr(skb)->saddr.s6_addr32, 114 tcp_hdr(skb)->dest, 115 tcp_hdr(skb)->source); 116 } 117 118 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, 119 int addr_len) 120 { 121 /* This check is replicated from tcp_v6_connect() and intended to 122 * prevent BPF program called below from accessing bytes that are out 123 * of the bound specified by user in addr_len. 124 */ 125 if (addr_len < SIN6_LEN_RFC2133) 126 return -EINVAL; 127 128 sock_owned_by_me(sk); 129 130 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); 131 } 132 133 static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, 134 int addr_len) 135 { 136 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 137 struct inet_connection_sock *icsk = inet_csk(sk); 138 struct inet_timewait_death_row *tcp_death_row; 139 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 140 struct in6_addr *saddr = NULL, *final_p; 141 struct inet_sock *inet = inet_sk(sk); 142 struct tcp_sock *tp = tcp_sk(sk); 143 struct net *net = sock_net(sk); 144 struct ipv6_txoptions *opt; 145 struct dst_entry *dst; 146 struct flowi6 *fl6; 147 int addr_type; 148 int err; 149 150 if (addr_len < SIN6_LEN_RFC2133) 151 return -EINVAL; 152 153 if (usin->sin6_family != AF_INET6) 154 return -EAFNOSUPPORT; 155 156 fl6 = &inet_sk(sk)->cork.fl.u.ip6; 157 memset(fl6, 0, sizeof(*fl6)); 158 159 if (inet6_test_bit(SNDFLOW, sk)) { 160 fl6->flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; 161 IP6_ECN_flow_init(fl6->flowlabel); 162 if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) { 163 struct ip6_flowlabel *flowlabel; 164 flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); 165 if (IS_ERR(flowlabel)) 166 return -EINVAL; 167 fl6_sock_release(flowlabel); 168 } 169 } 170 171 /* 172 * connect() to INADDR_ANY means loopback (BSD'ism). 173 */ 174 175 if (ipv6_addr_any(&usin->sin6_addr)) { 176 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 177 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 178 &usin->sin6_addr); 179 else 180 usin->sin6_addr = in6addr_loopback; 181 } 182 183 addr_type = ipv6_addr_type(&usin->sin6_addr); 184 185 if (addr_type & IPV6_ADDR_MULTICAST) 186 return -ENETUNREACH; 187 188 if (addr_type&IPV6_ADDR_LINKLOCAL) { 189 if (addr_len >= sizeof(struct sockaddr_in6) && 190 usin->sin6_scope_id) { 191 /* If interface is set while binding, indices 192 * must coincide. 193 */ 194 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 195 return -EINVAL; 196 197 sk->sk_bound_dev_if = usin->sin6_scope_id; 198 } 199 200 /* Connect to link-local address requires an interface */ 201 if (!sk->sk_bound_dev_if) 202 return -EINVAL; 203 } 204 205 if (tp->rx_opt.ts_recent_stamp && 206 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 207 tp->rx_opt.ts_recent = 0; 208 tp->rx_opt.ts_recent_stamp = 0; 209 WRITE_ONCE(tp->write_seq, 0); 210 } 211 212 sk->sk_v6_daddr = usin->sin6_addr; 213 np->flow_label = fl6->flowlabel; 214 215 /* 216 * TCP over IPv4 217 */ 218 219 if (addr_type & IPV6_ADDR_MAPPED) { 220 u32 exthdrlen = icsk->icsk_ext_hdr_len; 221 struct sockaddr_in sin; 222 223 if (ipv6_only_sock(sk)) 224 return -ENETUNREACH; 225 226 sin.sin_family = AF_INET; 227 sin.sin_port = usin->sin6_port; 228 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 229 230 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 231 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); 232 if (sk_is_mptcp(sk)) 233 mptcpv6_handle_mapped(sk, true); 234 sk->sk_backlog_rcv = tcp_v4_do_rcv; 235 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 236 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 237 #endif 238 239 err = tcp_v4_connect(sk, (struct sockaddr_unsized *)&sin, sizeof(sin)); 240 241 if (err) { 242 icsk->icsk_ext_hdr_len = exthdrlen; 243 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 244 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); 245 if (sk_is_mptcp(sk)) 246 mptcpv6_handle_mapped(sk, false); 247 sk->sk_backlog_rcv = tcp_v6_do_rcv; 248 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 249 tp->af_specific = &tcp_sock_ipv6_specific; 250 #endif 251 goto failure; 252 } 253 np->saddr = sk->sk_v6_rcv_saddr; 254 255 return err; 256 } 257 258 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 259 saddr = &sk->sk_v6_rcv_saddr; 260 261 fl6->flowi6_proto = IPPROTO_TCP; 262 fl6->daddr = sk->sk_v6_daddr; 263 fl6->saddr = saddr ? *saddr : np->saddr; 264 fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); 265 fl6->flowi6_oif = sk->sk_bound_dev_if; 266 fl6->flowi6_mark = sk->sk_mark; 267 fl6->fl6_dport = usin->sin6_port; 268 fl6->fl6_sport = inet->inet_sport; 269 if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6->fl6_sport) 270 fl6->flowi6_flags = FLOWI_FLAG_ANY_SPORT; 271 fl6->flowi6_uid = sk_uid(sk); 272 273 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 274 final_p = fl6_update_dst(fl6, opt, &np->final); 275 276 security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); 277 278 dst = ip6_dst_lookup_flow(net, sk, fl6, final_p); 279 if (IS_ERR(dst)) { 280 err = PTR_ERR(dst); 281 goto failure; 282 } 283 284 tp->tcp_usec_ts = dst_tcp_usec_ts(dst); 285 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 286 287 if (!saddr) { 288 saddr = &fl6->saddr; 289 290 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); 291 if (err) 292 goto failure; 293 } 294 295 /* set the source address */ 296 np->saddr = *saddr; 297 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 298 299 sk->sk_gso_type = SKB_GSO_TCPV6; 300 ip6_dst_store(sk, dst, false, false); 301 302 icsk->icsk_ext_hdr_len = psp_sk_overhead(sk); 303 if (opt) 304 icsk->icsk_ext_hdr_len += opt->opt_flen + 305 opt->opt_nflen; 306 307 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 308 309 inet->inet_dport = usin->sin6_port; 310 311 tcp_set_state(sk, TCP_SYN_SENT); 312 err = inet6_hash_connect(tcp_death_row, sk); 313 if (err) 314 goto late_failure; 315 316 sk_set_txhash(sk); 317 318 if (likely(!tp->repair)) { 319 union tcp_seq_and_ts_off st; 320 321 st = secure_tcpv6_seq_and_ts_off(net, 322 np->saddr.s6_addr32, 323 sk->sk_v6_daddr.s6_addr32, 324 inet->inet_sport, 325 inet->inet_dport); 326 if (!tp->write_seq) 327 WRITE_ONCE(tp->write_seq, st.seq); 328 WRITE_ONCE(tp->tsoffset, st.ts_off); 329 } 330 331 if (tcp_fastopen_defer_connect(sk, &err)) 332 return err; 333 if (err) 334 goto late_failure; 335 336 err = tcp_connect(sk); 337 if (err) 338 goto late_failure; 339 340 return 0; 341 342 late_failure: 343 tcp_set_state(sk, TCP_CLOSE); 344 inet_bhash2_reset_saddr(sk); 345 failure: 346 inet->inet_dport = 0; 347 sk->sk_route_caps = 0; 348 return err; 349 } 350 351 static struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) 352 { 353 struct flowi6 *fl6 = &inet_sk(sk)->cork.fl.u.ip6; 354 struct dst_entry *dst; 355 356 dst = inet6_csk_route_socket(sk, fl6); 357 358 if (IS_ERR(dst)) 359 return NULL; 360 dst->ops->update_pmtu(dst, sk, NULL, mtu, true); 361 362 dst = inet6_csk_route_socket(sk, fl6); 363 return IS_ERR(dst) ? NULL : dst; 364 } 365 366 static void tcp_v6_mtu_reduced(struct sock *sk) 367 { 368 struct dst_entry *dst; 369 u32 mtu, dmtu; 370 371 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 372 return; 373 374 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 375 376 /* Drop requests trying to increase our current mss. 377 * Check done in __ip6_rt_update_pmtu() is too late. 378 */ 379 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 380 return; 381 382 dst = inet6_csk_update_pmtu(sk, mtu); 383 if (!dst) 384 return; 385 386 dmtu = dst6_mtu(dst); 387 if (inet_csk(sk)->icsk_pmtu_cookie > dmtu) { 388 tcp_sync_mss(sk, dmtu); 389 tcp_simple_retransmit(sk); 390 } 391 } 392 393 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 394 u8 type, u8 code, int offset, __be32 info) 395 { 396 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 397 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 398 struct net *net = dev_net_rcu(skb->dev); 399 struct request_sock *fastopen; 400 struct ipv6_pinfo *np; 401 struct tcp_sock *tp; 402 __u32 seq, snd_una; 403 struct sock *sk; 404 bool fatal; 405 int err; 406 407 sk = __inet6_lookup_established(net, &hdr->daddr, th->dest, 408 &hdr->saddr, ntohs(th->source), 409 skb->dev->ifindex, inet6_sdif(skb)); 410 411 if (!sk) { 412 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 413 ICMP6_MIB_INERRORS); 414 return -ENOENT; 415 } 416 417 if (sk->sk_state == TCP_TIME_WAIT) { 418 /* To increase the counter of ignored icmps for TCP-AO */ 419 tcp_ao_ignore_icmp(sk, AF_INET6, type, code); 420 inet_twsk_put(inet_twsk(sk)); 421 return 0; 422 } 423 seq = ntohl(th->seq); 424 fatal = icmpv6_err_convert(type, code, &err); 425 if (sk->sk_state == TCP_NEW_SYN_RECV) { 426 tcp_req_err(sk, seq, fatal); 427 return 0; 428 } 429 430 if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) { 431 sock_put(sk); 432 return 0; 433 } 434 435 bh_lock_sock(sk); 436 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 437 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 438 439 if (sk->sk_state == TCP_CLOSE) 440 goto out; 441 442 if (static_branch_unlikely(&ip6_min_hopcount)) { 443 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 444 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 445 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 446 goto out; 447 } 448 } 449 450 tp = tcp_sk(sk); 451 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 452 fastopen = rcu_dereference(tp->fastopen_rsk); 453 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 454 if (sk->sk_state != TCP_LISTEN && 455 !between(seq, snd_una, tp->snd_nxt)) { 456 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 457 goto out; 458 } 459 460 np = tcp_inet6_sk(sk); 461 462 if (type == NDISC_REDIRECT) { 463 if (!sock_owned_by_user(sk)) { 464 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 465 466 if (dst) 467 dst->ops->redirect(dst, sk, skb); 468 } 469 goto out; 470 } 471 472 if (type == ICMPV6_PKT_TOOBIG) { 473 u32 mtu = ntohl(info); 474 475 /* We are not interested in TCP_LISTEN and open_requests 476 * (SYN-ACKs send out by Linux are always <576bytes so 477 * they should go through unfragmented). 478 */ 479 if (sk->sk_state == TCP_LISTEN) 480 goto out; 481 482 if (!ip6_sk_accept_pmtu(sk)) 483 goto out; 484 485 if (mtu < IPV6_MIN_MTU) 486 goto out; 487 488 WRITE_ONCE(tp->mtu_info, mtu); 489 490 if (!sock_owned_by_user(sk)) 491 tcp_v6_mtu_reduced(sk); 492 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 493 &sk->sk_tsq_flags)) 494 sock_hold(sk); 495 goto out; 496 } 497 498 499 /* Might be for an request_sock */ 500 switch (sk->sk_state) { 501 case TCP_SYN_SENT: 502 case TCP_SYN_RECV: 503 /* Only in fast or simultaneous open. If a fast open socket is 504 * already accepted it is treated as a connected one below. 505 */ 506 if (fastopen && !fastopen->sk) 507 break; 508 509 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 510 511 if (!sock_owned_by_user(sk)) 512 tcp_done_with_error(sk, err); 513 else 514 WRITE_ONCE(sk->sk_err_soft, err); 515 goto out; 516 case TCP_LISTEN: 517 break; 518 default: 519 /* check if this ICMP message allows revert of backoff. 520 * (see RFC 6069) 521 */ 522 if (!fastopen && type == ICMPV6_DEST_UNREACH && 523 code == ICMPV6_NOROUTE) 524 tcp_ld_RTO_revert(sk, seq); 525 } 526 527 if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { 528 WRITE_ONCE(sk->sk_err, err); 529 sk_error_report(sk); 530 } else { 531 WRITE_ONCE(sk->sk_err_soft, err); 532 } 533 out: 534 bh_unlock_sock(sk); 535 sock_put(sk); 536 return 0; 537 } 538 539 540 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 541 struct flowi *fl, 542 struct request_sock *req, 543 struct tcp_fastopen_cookie *foc, 544 enum tcp_synack_type synack_type, 545 struct sk_buff *syn_skb) 546 { 547 struct inet_request_sock *ireq = inet_rsk(req); 548 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 549 struct ipv6_txoptions *opt; 550 struct flowi6 *fl6 = &fl->u.ip6; 551 struct sk_buff *skb; 552 int err = -ENOMEM; 553 u8 tclass; 554 555 /* First, grab a route. */ 556 if (!dst && (dst = inet6_csk_route_req(sk, NULL, fl6, req, 557 IPPROTO_TCP)) == NULL) 558 goto done; 559 560 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 561 562 if (skb) { 563 tcp_rsk(req)->syn_ect_snt = np->tclass & INET_ECN_MASK; 564 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 565 &ireq->ir_v6_rmt_addr); 566 567 fl6->daddr = ireq->ir_v6_rmt_addr; 568 if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) 569 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 570 571 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 572 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 573 (np->tclass & INET_ECN_MASK) : 574 np->tclass; 575 576 if (!INET_ECN_is_capable(tclass) && 577 tcp_bpf_ca_needs_ecn((struct sock *)req)) 578 tclass |= INET_ECN_ECT_0; 579 580 rcu_read_lock(); 581 opt = ireq->ipv6_opt; 582 if (!opt) 583 opt = rcu_dereference(np->opt); 584 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), 585 opt, tclass, READ_ONCE(sk->sk_priority)); 586 rcu_read_unlock(); 587 err = net_xmit_eval(err); 588 } 589 590 done: 591 return err; 592 } 593 594 595 static void tcp_v6_reqsk_destructor(struct request_sock *req) 596 { 597 kfree(inet_rsk(req)->ipv6_opt); 598 consume_skb(inet_rsk(req)->pktopts); 599 } 600 601 #ifdef CONFIG_TCP_MD5SIG 602 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 603 const struct in6_addr *addr, 604 int l3index) 605 { 606 return tcp_md5_do_lookup(sk, l3index, 607 (union tcp_md5_addr *)addr, AF_INET6); 608 } 609 610 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 611 const struct sock *addr_sk) 612 { 613 int l3index; 614 615 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 616 addr_sk->sk_bound_dev_if); 617 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 618 l3index); 619 } 620 621 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 622 sockptr_t optval, int optlen) 623 { 624 struct tcp_md5sig cmd; 625 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 626 union tcp_ao_addr *addr; 627 int l3index = 0; 628 u8 prefixlen; 629 bool l3flag; 630 u8 flags; 631 632 if (optlen < sizeof(cmd)) 633 return -EINVAL; 634 635 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 636 return -EFAULT; 637 638 if (sin6->sin6_family != AF_INET6) 639 return -EINVAL; 640 641 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 642 l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 643 644 if (optname == TCP_MD5SIG_EXT && 645 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 646 prefixlen = cmd.tcpm_prefixlen; 647 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 648 prefixlen > 32)) 649 return -EINVAL; 650 } else { 651 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 652 } 653 654 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 655 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 656 struct net_device *dev; 657 658 rcu_read_lock(); 659 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 660 if (dev && netif_is_l3_master(dev)) 661 l3index = dev->ifindex; 662 rcu_read_unlock(); 663 664 /* ok to reference set/not set outside of rcu; 665 * right now device MUST be an L3 master 666 */ 667 if (!dev || !l3index) 668 return -EINVAL; 669 } 670 671 if (!cmd.tcpm_keylen) { 672 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 673 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 674 AF_INET, prefixlen, 675 l3index, flags); 676 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 677 AF_INET6, prefixlen, l3index, flags); 678 } 679 680 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 681 return -EINVAL; 682 683 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { 684 addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3]; 685 686 /* Don't allow keys for peers that have a matching TCP-AO key. 687 * See the comment in tcp_ao_add_cmd() 688 */ 689 if (tcp_ao_required(sk, addr, AF_INET, 690 l3flag ? l3index : -1, false)) 691 return -EKEYREJECTED; 692 return tcp_md5_do_add(sk, addr, 693 AF_INET, prefixlen, l3index, flags, 694 cmd.tcpm_key, cmd.tcpm_keylen); 695 } 696 697 addr = (union tcp_md5_addr *)&sin6->sin6_addr; 698 699 /* Don't allow keys for peers that have a matching TCP-AO key. 700 * See the comment in tcp_ao_add_cmd() 701 */ 702 if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false)) 703 return -EKEYREJECTED; 704 705 return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, 706 cmd.tcpm_key, cmd.tcpm_keylen); 707 } 708 709 static void tcp_v6_md5_hash_headers(struct md5_ctx *ctx, 710 const struct in6_addr *daddr, 711 const struct in6_addr *saddr, 712 const struct tcphdr *th, int nbytes) 713 { 714 struct { 715 struct tcp6_pseudohdr ip; /* TCP pseudo-header (RFC2460) */ 716 struct tcphdr tcp; 717 } h; 718 719 h.ip.saddr = *saddr; 720 h.ip.daddr = *daddr; 721 h.ip.protocol = cpu_to_be32(IPPROTO_TCP); 722 h.ip.len = cpu_to_be32(nbytes); 723 h.tcp = *th; 724 h.tcp.check = 0; 725 md5_update(ctx, (const u8 *)&h, sizeof(h.ip) + sizeof(h.tcp)); 726 } 727 728 static noinline_for_stack void 729 tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 730 const struct in6_addr *daddr, struct in6_addr *saddr, 731 const struct tcphdr *th) 732 { 733 struct md5_ctx ctx; 734 735 md5_init(&ctx); 736 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, th->doff << 2); 737 tcp_md5_hash_key(&ctx, key); 738 md5_final(&ctx, md5_hash); 739 } 740 741 static noinline_for_stack void 742 tcp_v6_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, 743 const struct sock *sk, const struct sk_buff *skb) 744 { 745 const struct tcphdr *th = tcp_hdr(skb); 746 const struct in6_addr *saddr, *daddr; 747 struct md5_ctx ctx; 748 749 if (sk) { /* valid for establish/request sockets */ 750 saddr = &sk->sk_v6_rcv_saddr; 751 daddr = &sk->sk_v6_daddr; 752 } else { 753 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 754 saddr = &ip6h->saddr; 755 daddr = &ip6h->daddr; 756 } 757 758 md5_init(&ctx); 759 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, skb->len); 760 tcp_md5_hash_skb_data(&ctx, skb, th->doff << 2); 761 tcp_md5_hash_key(&ctx, key); 762 md5_final(&ctx, md5_hash); 763 } 764 #endif 765 766 static void tcp_v6_init_req(struct request_sock *req, 767 const struct sock *sk_listener, 768 struct sk_buff *skb, 769 u32 tw_isn) 770 { 771 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 772 struct inet_request_sock *ireq = inet_rsk(req); 773 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 774 775 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 776 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 777 ireq->ir_rmt_addr = LOOPBACK4_IPV6; 778 ireq->ir_loc_addr = LOOPBACK4_IPV6; 779 780 /* So that link locals have meaning */ 781 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 782 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 783 ireq->ir_iif = tcp_v6_iif(skb); 784 785 if (!tw_isn && 786 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 787 np->rxopt.bits.rxinfo || 788 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 789 np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { 790 refcount_inc(&skb->users); 791 ireq->pktopts = skb; 792 } 793 } 794 795 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 796 struct sk_buff *skb, 797 struct flowi *fl, 798 struct request_sock *req, 799 u32 tw_isn) 800 { 801 tcp_v6_init_req(req, sk, skb, tw_isn); 802 803 if (security_inet_conn_request(sk, skb, req)) 804 return NULL; 805 806 return inet6_csk_route_req(sk, NULL, &fl->u.ip6, req, IPPROTO_TCP); 807 } 808 809 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 810 .family = AF_INET6, 811 .obj_size = sizeof(struct tcp6_request_sock), 812 .send_ack = tcp_v6_reqsk_send_ack, 813 .destructor = tcp_v6_reqsk_destructor, 814 .send_reset = tcp_v6_send_reset, 815 }; 816 817 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 818 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 819 sizeof(struct ipv6hdr), 820 #ifdef CONFIG_TCP_MD5SIG 821 .req_md5_lookup = tcp_v6_md5_lookup, 822 .calc_md5_hash = tcp_v6_md5_hash_skb, 823 #endif 824 #ifdef CONFIG_TCP_AO 825 .ao_lookup = tcp_v6_ao_lookup_rsk, 826 .ao_calc_key = tcp_v6_ao_calc_key_rsk, 827 .ao_synack_hash = tcp_v6_ao_synack_hash, 828 #endif 829 #ifdef CONFIG_SYN_COOKIES 830 .cookie_init_seq = cookie_v6_init_sequence, 831 #endif 832 .route_req = tcp_v6_route_req, 833 .init_seq_and_ts_off = tcp_v6_init_seq_and_ts_off, 834 .send_synack = tcp_v6_send_synack, 835 }; 836 837 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 838 u32 ack, u32 win, u32 tsval, u32 tsecr, 839 int oif, int rst, u8 tclass, __be32 label, 840 u32 priority, u32 txhash, struct tcp_key *key) 841 { 842 struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 843 unsigned int tot_len = sizeof(struct tcphdr); 844 struct sock *ctl_sk = net->ipv6.tcp_sk; 845 const struct tcphdr *th = tcp_hdr(skb); 846 __be32 mrst = 0, *topt; 847 struct dst_entry *dst; 848 struct sk_buff *buff; 849 struct tcphdr *t1; 850 struct flowi6 fl6; 851 u32 mark = 0; 852 853 if (tsecr) 854 tot_len += TCPOLEN_TSTAMP_ALIGNED; 855 if (tcp_key_is_md5(key)) 856 tot_len += TCPOLEN_MD5SIG_ALIGNED; 857 if (tcp_key_is_ao(key)) 858 tot_len += tcp_ao_len_aligned(key->ao_key); 859 860 #ifdef CONFIG_MPTCP 861 if (rst && !tcp_key_is_md5(key)) { 862 mrst = mptcp_reset_option(skb); 863 864 if (mrst) 865 tot_len += sizeof(__be32); 866 } 867 #endif 868 869 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 870 if (!buff) 871 return; 872 873 skb_reserve(buff, MAX_TCP_HEADER); 874 875 t1 = skb_push(buff, tot_len); 876 skb_reset_transport_header(buff); 877 878 /* Swap the send and the receive. */ 879 memset(t1, 0, sizeof(*t1)); 880 t1->dest = th->source; 881 t1->source = th->dest; 882 t1->doff = tot_len / 4; 883 t1->seq = htonl(seq); 884 t1->ack_seq = htonl(ack); 885 t1->ack = !rst || !th->ack; 886 t1->rst = rst; 887 t1->window = htons(win); 888 889 topt = (__be32 *)(t1 + 1); 890 891 if (tsecr) { 892 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 893 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 894 *topt++ = htonl(tsval); 895 *topt++ = htonl(tsecr); 896 } 897 898 if (mrst) 899 *topt++ = mrst; 900 901 #ifdef CONFIG_TCP_MD5SIG 902 if (tcp_key_is_md5(key)) { 903 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 904 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 905 tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, 906 &ipv6_hdr(skb)->saddr, 907 &ipv6_hdr(skb)->daddr, t1); 908 } 909 #endif 910 #ifdef CONFIG_TCP_AO 911 if (tcp_key_is_ao(key)) { 912 *topt++ = htonl((TCPOPT_AO << 24) | 913 (tcp_ao_len(key->ao_key) << 16) | 914 (key->ao_key->sndid << 8) | 915 (key->rcv_next)); 916 917 tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, 918 key->traffic_key, 919 (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, 920 (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, 921 t1, key->sne); 922 } 923 #endif 924 925 memset(&fl6, 0, sizeof(fl6)); 926 fl6.daddr = ipv6_hdr(skb)->saddr; 927 fl6.saddr = ipv6_hdr(skb)->daddr; 928 fl6.flowlabel = label; 929 930 buff->ip_summed = CHECKSUM_PARTIAL; 931 932 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 933 934 fl6.flowi6_proto = IPPROTO_TCP; 935 if (rt6_need_strict(&fl6.daddr) && !oif) 936 fl6.flowi6_oif = tcp_v6_iif(skb); 937 else { 938 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 939 oif = skb->skb_iif; 940 941 fl6.flowi6_oif = oif; 942 } 943 944 if (sk) { 945 /* unconstify the socket only to attach it to buff with care. */ 946 skb_set_owner_edemux(buff, (struct sock *)sk); 947 psp_reply_set_decrypted(sk, buff); 948 949 if (sk->sk_state == TCP_TIME_WAIT) 950 mark = inet_twsk(sk)->tw_mark; 951 else 952 mark = READ_ONCE(sk->sk_mark); 953 skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); 954 } 955 if (txhash) { 956 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ 957 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); 958 } 959 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 960 fl6.fl6_dport = t1->dest; 961 fl6.fl6_sport = t1->source; 962 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 963 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 964 965 /* Pass a socket to ip6_dst_lookup either it is for RST 966 * Underlying function will use this to retrieve the network 967 * namespace 968 */ 969 if (sk && sk->sk_state != TCP_TIME_WAIT) 970 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 971 else 972 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 973 if (!IS_ERR(dst)) { 974 skb_dst_set(buff, dst); 975 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 976 tclass, priority); 977 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 978 if (rst) 979 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 980 return; 981 } 982 983 kfree_skb(buff); 984 } 985 986 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 987 enum sk_rst_reason reason) 988 { 989 const struct tcphdr *th = tcp_hdr(skb); 990 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 991 const __u8 *md5_hash_location = NULL; 992 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 993 bool allocated_traffic_key = false; 994 #endif 995 const struct tcp_ao_hdr *aoh; 996 struct tcp_key key = {}; 997 u32 seq = 0, ack_seq = 0; 998 __be32 label = 0; 999 u32 priority = 0; 1000 struct net *net; 1001 u32 txhash = 0; 1002 int oif = 0; 1003 #ifdef CONFIG_TCP_MD5SIG 1004 unsigned char newhash[16]; 1005 struct sock *sk1 = NULL; 1006 #endif 1007 1008 if (th->rst) 1009 return; 1010 1011 /* If sk not NULL, it means we did a successful lookup and incoming 1012 * route had to be correct. prequeue might have dropped our dst. 1013 */ 1014 if (!sk && !ipv6_unicast_destination(skb)) 1015 return; 1016 1017 net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 1018 /* Invalid TCP option size or twice included auth */ 1019 if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) 1020 return; 1021 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1022 rcu_read_lock(); 1023 #endif 1024 #ifdef CONFIG_TCP_MD5SIG 1025 if (sk && sk_fullsock(sk)) { 1026 int l3index; 1027 1028 /* sdif set, means packet ingressed via a device 1029 * in an L3 domain and inet_iif is set to it. 1030 */ 1031 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1032 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1033 if (key.md5_key) 1034 key.type = TCP_KEY_MD5; 1035 } else if (md5_hash_location) { 1036 int dif = tcp_v6_iif_l3_slave(skb); 1037 int sdif = tcp_v6_sdif(skb); 1038 int l3index; 1039 1040 /* 1041 * active side is lost. Try to find listening socket through 1042 * source port, and then find md5 key through listening socket. 1043 * we are not loose security here: 1044 * Incoming packet is checked with md5 hash with finding key, 1045 * no RST generated if md5 hash doesn't match. 1046 */ 1047 sk1 = inet6_lookup_listener(net, NULL, 0, &ipv6h->saddr, th->source, 1048 &ipv6h->daddr, ntohs(th->source), 1049 dif, sdif); 1050 if (!sk1) 1051 goto out; 1052 1053 /* sdif set, means packet ingressed via a device 1054 * in an L3 domain and dif is set to it. 1055 */ 1056 l3index = tcp_v6_sdif(skb) ? dif : 0; 1057 1058 key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1059 if (!key.md5_key) 1060 goto out; 1061 key.type = TCP_KEY_MD5; 1062 1063 tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); 1064 if (crypto_memneq(md5_hash_location, newhash, 16)) 1065 goto out; 1066 } 1067 #endif 1068 1069 if (th->ack) 1070 seq = ntohl(th->ack_seq); 1071 else 1072 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1073 (th->doff << 2); 1074 1075 #ifdef CONFIG_TCP_AO 1076 if (aoh) { 1077 int l3index; 1078 1079 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1080 if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq, 1081 &key.ao_key, &key.traffic_key, 1082 &allocated_traffic_key, 1083 &key.rcv_next, &key.sne)) 1084 goto out; 1085 key.type = TCP_KEY_AO; 1086 } 1087 #endif 1088 1089 if (sk) { 1090 oif = sk->sk_bound_dev_if; 1091 if (sk_fullsock(sk)) { 1092 if (inet6_test_bit(REPFLOW, sk)) 1093 label = ip6_flowlabel(ipv6h); 1094 priority = READ_ONCE(sk->sk_priority); 1095 txhash = sk->sk_txhash; 1096 } 1097 if (sk->sk_state == TCP_TIME_WAIT) { 1098 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1099 priority = inet_twsk(sk)->tw_priority; 1100 txhash = inet_twsk(sk)->tw_txhash; 1101 } 1102 } else { 1103 if (READ_ONCE(net->ipv6.sysctl.flowlabel_reflect) & 1104 FLOWLABEL_REFLECT_TCP_RESET) 1105 label = ip6_flowlabel(ipv6h); 1106 } 1107 1108 trace_tcp_send_reset(sk, skb, reason); 1109 1110 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, 1111 ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK, 1112 label, priority, txhash, 1113 &key); 1114 1115 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1116 out: 1117 if (allocated_traffic_key) 1118 kfree(key.traffic_key); 1119 rcu_read_unlock(); 1120 #endif 1121 } 1122 1123 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1124 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1125 struct tcp_key *key, u8 tclass, 1126 __be32 label, u32 priority, u32 txhash) 1127 { 1128 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, 1129 tclass, label, priority, txhash, key); 1130 } 1131 1132 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb, 1133 enum tcp_tw_status tw_status) 1134 { 1135 struct inet_timewait_sock *tw = inet_twsk(sk); 1136 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1137 u8 tclass = tw->tw_tclass; 1138 struct tcp_key key = {}; 1139 1140 if (tw_status == TCP_TW_ACK_OOW) 1141 tclass &= ~INET_ECN_MASK; 1142 #ifdef CONFIG_TCP_AO 1143 struct tcp_ao_info *ao_info; 1144 1145 if (static_branch_unlikely(&tcp_ao_needed.key)) { 1146 1147 /* FIXME: the segment to-be-acked is not verified yet */ 1148 ao_info = rcu_dereference(tcptw->ao_info); 1149 if (ao_info) { 1150 const struct tcp_ao_hdr *aoh; 1151 1152 /* Invalid TCP option size or twice included auth */ 1153 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1154 goto out; 1155 if (aoh) 1156 key.ao_key = tcp_ao_established_key(sk, ao_info, 1157 aoh->rnext_keyid, -1); 1158 } 1159 } 1160 if (key.ao_key) { 1161 struct tcp_ao_key *rnext_key; 1162 1163 key.traffic_key = snd_other_key(key.ao_key); 1164 /* rcv_next switches to our rcv_next */ 1165 rnext_key = READ_ONCE(ao_info->rnext_key); 1166 key.rcv_next = rnext_key->rcvid; 1167 key.sne = READ_ONCE(ao_info->snd_sne); 1168 key.type = TCP_KEY_AO; 1169 #else 1170 if (0) { 1171 #endif 1172 #ifdef CONFIG_TCP_MD5SIG 1173 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1174 key.md5_key = tcp_twsk_md5_key(tcptw); 1175 if (key.md5_key) 1176 key.type = TCP_KEY_MD5; 1177 #endif 1178 } 1179 1180 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, 1181 READ_ONCE(tcptw->tw_rcv_nxt), 1182 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1183 tcp_tw_tsval(tcptw), 1184 READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, 1185 &key, tclass, cpu_to_be32(tw->tw_flowlabel), 1186 tw->tw_priority, tw->tw_txhash); 1187 1188 #ifdef CONFIG_TCP_AO 1189 out: 1190 #endif 1191 inet_twsk_put(tw); 1192 } 1193 1194 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1195 struct request_sock *req) 1196 { 1197 struct tcp_key key = {}; 1198 1199 #ifdef CONFIG_TCP_AO 1200 if (static_branch_unlikely(&tcp_ao_needed.key) && 1201 tcp_rsk_used_ao(req)) { 1202 const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; 1203 const struct tcp_ao_hdr *aoh; 1204 int l3index; 1205 1206 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1207 /* Invalid TCP option size or twice included auth */ 1208 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1209 return; 1210 if (!aoh) 1211 return; 1212 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1213 (union tcp_ao_addr *)addr, 1214 AF_INET6, aoh->rnext_keyid, -1); 1215 if (unlikely(!key.ao_key)) { 1216 /* Send ACK with any matching MKT for the peer */ 1217 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1218 (union tcp_ao_addr *)addr, 1219 AF_INET6, -1, -1); 1220 /* Matching key disappeared (user removed the key?) 1221 * let the handshake timeout. 1222 */ 1223 if (!key.ao_key) { 1224 net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n", 1225 addr, 1226 ntohs(tcp_hdr(skb)->source), 1227 &ipv6_hdr(skb)->daddr, 1228 ntohs(tcp_hdr(skb)->dest)); 1229 return; 1230 } 1231 } 1232 key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); 1233 if (!key.traffic_key) 1234 return; 1235 1236 key.type = TCP_KEY_AO; 1237 key.rcv_next = aoh->keyid; 1238 tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); 1239 #else 1240 if (0) { 1241 #endif 1242 #ifdef CONFIG_TCP_MD5SIG 1243 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1244 int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1245 1246 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, 1247 l3index); 1248 if (key.md5_key) 1249 key.type = TCP_KEY_MD5; 1250 #endif 1251 } 1252 1253 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1254 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1255 */ 1256 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1257 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1258 tcp_rsk(req)->rcv_nxt, 1259 tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, 1260 tcp_rsk_tsval(tcp_rsk(req)), 1261 req->ts_recent, sk->sk_bound_dev_if, 1262 &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK, 1263 0, 1264 READ_ONCE(sk->sk_priority), 1265 READ_ONCE(tcp_rsk(req)->txhash)); 1266 if (tcp_key_is_ao(&key)) 1267 kfree(key.traffic_key); 1268 } 1269 1270 1271 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1272 { 1273 #ifdef CONFIG_SYN_COOKIES 1274 const struct tcphdr *th = tcp_hdr(skb); 1275 1276 if (!th->syn) 1277 sk = cookie_v6_check(sk, skb); 1278 #endif 1279 return sk; 1280 } 1281 1282 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1283 struct tcphdr *th, u32 *cookie) 1284 { 1285 u16 mss = 0; 1286 #ifdef CONFIG_SYN_COOKIES 1287 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1288 &tcp_request_sock_ipv6_ops, sk, th); 1289 if (mss) { 1290 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1291 tcp_synq_overflow(sk); 1292 } 1293 #endif 1294 return mss; 1295 } 1296 1297 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1298 { 1299 if (skb->protocol == htons(ETH_P_IP)) 1300 return tcp_v4_conn_request(sk, skb); 1301 1302 if (!ipv6_unicast_destination(skb)) 1303 goto drop; 1304 1305 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1306 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1307 return 0; 1308 } 1309 1310 return tcp_conn_request(&tcp6_request_sock_ops, 1311 &tcp_request_sock_ipv6_ops, sk, skb); 1312 1313 drop: 1314 tcp_listendrop(sk); 1315 return 0; /* don't send reset */ 1316 } 1317 1318 static void tcp_v6_restore_cb(struct sk_buff *skb) 1319 { 1320 /* We need to move header back to the beginning if xfrm6_policy_check() 1321 * and tcp_v6_fill_cb() are going to be called again. 1322 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1323 */ 1324 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1325 sizeof(struct inet6_skb_parm)); 1326 } 1327 1328 /* Called from tcp_v4_syn_recv_sock() for v6_mapped children. */ 1329 static void tcp_v6_mapped_child_init(struct sock *newsk, const struct sock *sk) 1330 { 1331 struct inet_sock *newinet = inet_sk(newsk); 1332 struct ipv6_pinfo *newnp; 1333 1334 newinet->pinet6 = newnp = tcp_inet6_sk(newsk); 1335 newinet->ipv6_fl_list = NULL; 1336 1337 memcpy(newnp, tcp_inet6_sk(sk), sizeof(struct ipv6_pinfo)); 1338 1339 newnp->saddr = newsk->sk_v6_rcv_saddr; 1340 1341 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1342 if (sk_is_mptcp(newsk)) 1343 mptcpv6_handle_mapped(newsk, true); 1344 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1345 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1346 tcp_sk(newsk)->af_specific = &tcp_sock_ipv6_mapped_specific; 1347 #endif 1348 1349 newnp->ipv6_mc_list = NULL; 1350 newnp->ipv6_ac_list = NULL; 1351 newnp->pktoptions = NULL; 1352 newnp->opt = NULL; 1353 1354 /* tcp_v4_syn_recv_sock() has initialized newinet->mc_{index,ttl} */ 1355 newnp->mcast_oif = newinet->mc_index; 1356 newnp->mcast_hops = newinet->mc_ttl; 1357 1358 newnp->rcv_flowinfo = 0; 1359 if (inet6_test_bit(REPFLOW, sk)) 1360 newnp->flow_label = 0; 1361 } 1362 1363 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1364 struct request_sock *req, 1365 struct dst_entry *dst, 1366 struct request_sock *req_unhash, 1367 bool *own_req, 1368 void (*opt_child_init)(struct sock *newsk, 1369 const struct sock *sk)) 1370 { 1371 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1372 struct inet_request_sock *ireq; 1373 struct ipv6_txoptions *opt; 1374 struct inet_sock *newinet; 1375 bool found_dup_sk = false; 1376 struct ipv6_pinfo *newnp; 1377 struct tcp_sock *newtp; 1378 struct sock *newsk; 1379 #ifdef CONFIG_TCP_MD5SIG 1380 struct tcp_md5sig_key *key; 1381 int l3index; 1382 #endif 1383 struct flowi6 fl6; 1384 1385 if (skb->protocol == htons(ETH_P_IP)) 1386 return tcp_v4_syn_recv_sock(sk, skb, req, dst, 1387 req_unhash, own_req, 1388 tcp_v6_mapped_child_init); 1389 ireq = inet_rsk(req); 1390 1391 if (sk_acceptq_is_full(sk)) 1392 goto exit_overflow; 1393 1394 dst = inet6_csk_route_req(sk, dst, &fl6, req, IPPROTO_TCP); 1395 if (!dst) 1396 goto exit; 1397 1398 newsk = tcp_create_openreq_child(sk, req, skb); 1399 if (!newsk) 1400 goto exit_nonewsk; 1401 1402 /* 1403 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1404 * count here, tcp_create_openreq_child now does this for us, see the 1405 * comment in that function for the gory details. -acme 1406 */ 1407 1408 newsk->sk_gso_type = SKB_GSO_TCPV6; 1409 inet6_sk_rx_dst_set(newsk, skb); 1410 1411 newinet = inet_sk(newsk); 1412 newinet->cork.fl.u.ip6 = fl6; 1413 newinet->pinet6 = tcp_inet6_sk(newsk); 1414 newinet->ipv6_fl_list = NULL; 1415 newinet->inet_opt = NULL; 1416 1417 newtp = tcp_sk(newsk); 1418 newnp = tcp_inet6_sk(newsk); 1419 1420 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1421 1422 ip6_dst_store(newsk, dst, false, false); 1423 1424 newnp->saddr = ireq->ir_v6_loc_addr; 1425 1426 /* Now IPv6 options... 1427 1428 First: no IPv4 options. 1429 */ 1430 newnp->ipv6_mc_list = NULL; 1431 newnp->ipv6_ac_list = NULL; 1432 1433 /* Clone RX bits */ 1434 newnp->rxopt.all = np->rxopt.all; 1435 1436 newnp->pktoptions = NULL; 1437 newnp->opt = NULL; 1438 newnp->mcast_oif = tcp_v6_iif(skb); 1439 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1440 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1441 if (inet6_test_bit(REPFLOW, sk)) 1442 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1443 1444 /* Set ToS of the new socket based upon the value of incoming SYN. 1445 * ECT bits are set later in tcp_init_transfer(). 1446 */ 1447 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1448 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1449 1450 /* Clone native IPv6 options from listening socket (if any) 1451 1452 Yes, keeping reference count would be much more clever, 1453 but we make one more one thing there: reattach optmem 1454 to newsk. 1455 */ 1456 opt = ireq->ipv6_opt; 1457 if (!opt) 1458 opt = rcu_dereference(np->opt); 1459 if (opt) { 1460 opt = ipv6_dup_options(newsk, opt); 1461 RCU_INIT_POINTER(newnp->opt, opt); 1462 } 1463 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1464 if (opt) 1465 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1466 opt->opt_flen; 1467 1468 tcp_ca_openreq_child(newsk, dst); 1469 1470 tcp_sync_mss(newsk, dst6_mtu(dst)); 1471 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1472 1473 tcp_initialize_rcv_mss(newsk); 1474 1475 #ifdef CONFIG_TCP_MD5SIG 1476 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1477 1478 if (!tcp_rsk_used_ao(req)) { 1479 /* Copy over the MD5 key from the original socket */ 1480 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1481 if (key) { 1482 const union tcp_md5_addr *addr; 1483 1484 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; 1485 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) 1486 goto put_and_exit; 1487 } 1488 } 1489 #endif 1490 #ifdef CONFIG_TCP_AO 1491 /* Copy over tcp_ao_info if any */ 1492 if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) 1493 goto put_and_exit; /* OOM */ 1494 #endif 1495 1496 if (__inet_inherit_port(sk, newsk) < 0) 1497 goto put_and_exit; 1498 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1499 &found_dup_sk); 1500 if (*own_req) { 1501 tcp_move_syn(newtp, req); 1502 1503 /* Clone pktoptions received with SYN, if we own the req */ 1504 if (ireq->pktopts) { 1505 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); 1506 consume_skb(ireq->pktopts); 1507 ireq->pktopts = NULL; 1508 if (newnp->pktoptions) 1509 tcp_v6_restore_cb(newnp->pktoptions); 1510 } 1511 } else { 1512 if (!req_unhash && found_dup_sk) { 1513 /* This code path should only be executed in the 1514 * syncookie case only 1515 */ 1516 bh_unlock_sock(newsk); 1517 sock_put(newsk); 1518 newsk = NULL; 1519 } 1520 } 1521 1522 return newsk; 1523 1524 exit_overflow: 1525 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1526 exit_nonewsk: 1527 dst_release(dst); 1528 exit: 1529 tcp_listendrop(sk); 1530 return NULL; 1531 put_and_exit: 1532 inet_csk_prepare_forced_close(newsk); 1533 tcp_done(newsk); 1534 goto exit; 1535 } 1536 1537 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1538 u32)); 1539 /* The socket must have it's spinlock held when we get 1540 * here, unless it is a TCP_LISTEN socket. 1541 * 1542 * We have a potential double-lock case here, so even when 1543 * doing backlog processing we use the BH locking scheme. 1544 * This is because we cannot sleep with the original spinlock 1545 * held. 1546 */ 1547 INDIRECT_CALLABLE_SCOPE 1548 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1549 { 1550 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1551 struct sk_buff *opt_skb = NULL; 1552 enum skb_drop_reason reason; 1553 struct tcp_sock *tp; 1554 1555 /* Imagine: socket is IPv6. IPv4 packet arrives, 1556 goes to IPv4 receive handler and backlogged. 1557 From backlog it always goes here. Kerboom... 1558 Fortunately, tcp_rcv_established and rcv_established 1559 handle them correctly, but it is not case with 1560 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1561 */ 1562 1563 if (skb->protocol == htons(ETH_P_IP)) 1564 return tcp_v4_do_rcv(sk, skb); 1565 1566 reason = psp_sk_rx_policy_check(sk, skb); 1567 if (reason) 1568 goto err_discard; 1569 1570 /* 1571 * socket locking is here for SMP purposes as backlog rcv 1572 * is currently called with bh processing disabled. 1573 */ 1574 1575 /* Do Stevens' IPV6_PKTOPTIONS. 1576 1577 Yes, guys, it is the only place in our code, where we 1578 may make it not affecting IPv4. 1579 The rest of code is protocol independent, 1580 and I do not like idea to uglify IPv4. 1581 1582 Actually, all the idea behind IPV6_PKTOPTIONS 1583 looks not very well thought. For now we latch 1584 options, received in the last packet, enqueued 1585 by tcp. Feel free to propose better solution. 1586 --ANK (980728) 1587 */ 1588 if (np->rxopt.all && sk->sk_state != TCP_LISTEN) 1589 opt_skb = skb_clone_and_charge_r(skb, sk); 1590 1591 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1592 struct dst_entry *dst; 1593 1594 dst = rcu_dereference_protected(sk->sk_rx_dst, 1595 lockdep_sock_is_held(sk)); 1596 1597 sock_rps_save_rxhash(sk, skb); 1598 sk_mark_napi_id(sk, skb); 1599 if (dst && unlikely(dst != skb_dst(skb))) { 1600 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1601 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1602 dst, sk->sk_rx_dst_cookie) == NULL) { 1603 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1604 dst_release(dst); 1605 } 1606 } 1607 1608 tcp_rcv_established(sk, skb); 1609 if (opt_skb) 1610 goto ipv6_pktoptions; 1611 return 0; 1612 } 1613 1614 if (tcp_checksum_complete(skb)) 1615 goto csum_err; 1616 1617 if (sk->sk_state == TCP_LISTEN) { 1618 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1619 1620 if (nsk != sk) { 1621 if (nsk) { 1622 reason = tcp_child_process(sk, nsk, skb); 1623 if (reason) 1624 goto reset; 1625 } 1626 return 0; 1627 } 1628 } else 1629 sock_rps_save_rxhash(sk, skb); 1630 1631 reason = tcp_rcv_state_process(sk, skb); 1632 if (reason) 1633 goto reset; 1634 if (opt_skb) 1635 goto ipv6_pktoptions; 1636 return 0; 1637 1638 reset: 1639 tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); 1640 discard: 1641 if (opt_skb) 1642 __kfree_skb(opt_skb); 1643 sk_skb_reason_drop(sk, skb, reason); 1644 return 0; 1645 csum_err: 1646 reason = SKB_DROP_REASON_TCP_CSUM; 1647 trace_tcp_bad_csum(skb); 1648 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1649 err_discard: 1650 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1651 goto discard; 1652 1653 1654 ipv6_pktoptions: 1655 /* Do you ask, what is it? 1656 1657 1. skb was enqueued by tcp. 1658 2. skb is added to tail of read queue, rather than out of order. 1659 3. socket is not in passive state. 1660 4. Finally, it really contains options, which user wants to receive. 1661 */ 1662 tp = tcp_sk(sk); 1663 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1664 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1665 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1666 WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb)); 1667 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1668 WRITE_ONCE(np->mcast_hops, 1669 ipv6_hdr(opt_skb)->hop_limit); 1670 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1671 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1672 if (inet6_test_bit(REPFLOW, sk)) 1673 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1674 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1675 tcp_v6_restore_cb(opt_skb); 1676 opt_skb = xchg(&np->pktoptions, opt_skb); 1677 } else { 1678 __kfree_skb(opt_skb); 1679 opt_skb = xchg(&np->pktoptions, NULL); 1680 } 1681 } 1682 1683 consume_skb(opt_skb); 1684 return 0; 1685 } 1686 1687 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1688 const struct tcphdr *th) 1689 { 1690 /* This is tricky: we move IP6CB at its correct location into 1691 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1692 * _decode_session6() uses IP6CB(). 1693 * barrier() makes sure compiler won't play aliasing games. 1694 */ 1695 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1696 sizeof(struct inet6_skb_parm)); 1697 barrier(); 1698 1699 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1700 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1701 skb->len - th->doff*4); 1702 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1703 TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th); 1704 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1705 TCP_SKB_CB(skb)->sacked = 0; 1706 TCP_SKB_CB(skb)->has_rxtstamp = 1707 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1708 } 1709 1710 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1711 { 1712 struct net *net = dev_net_rcu(skb->dev); 1713 enum skb_drop_reason drop_reason; 1714 enum tcp_tw_status tw_status; 1715 int sdif = inet6_sdif(skb); 1716 int dif = inet6_iif(skb); 1717 const struct tcphdr *th; 1718 const struct ipv6hdr *hdr; 1719 struct sock *sk = NULL; 1720 bool refcounted; 1721 int ret; 1722 u32 isn; 1723 1724 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1725 if (skb->pkt_type != PACKET_HOST) 1726 goto discard_it; 1727 1728 /* 1729 * Count it even if it's bad. 1730 */ 1731 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1732 1733 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1734 goto discard_it; 1735 1736 th = (const struct tcphdr *)skb->data; 1737 1738 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1739 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1740 goto bad_packet; 1741 } 1742 if (!pskb_may_pull(skb, th->doff*4)) 1743 goto discard_it; 1744 1745 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1746 goto csum_error; 1747 1748 th = (const struct tcphdr *)skb->data; 1749 hdr = ipv6_hdr(skb); 1750 1751 lookup: 1752 sk = __inet6_lookup_skb(skb, __tcp_hdrlen(th), 1753 th->source, th->dest, inet6_iif(skb), sdif, 1754 &refcounted); 1755 if (!sk) 1756 goto no_tcp_socket; 1757 1758 if (sk->sk_state == TCP_TIME_WAIT) 1759 goto do_time_wait; 1760 1761 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1762 struct request_sock *req = inet_reqsk(sk); 1763 bool req_stolen = false; 1764 struct sock *nsk; 1765 1766 sk = req->rsk_listener; 1767 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1768 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1769 else 1770 drop_reason = tcp_inbound_hash(sk, req, skb, 1771 &hdr->saddr, &hdr->daddr, 1772 AF_INET6, dif, sdif); 1773 if (drop_reason) { 1774 sk_drops_skbadd(sk, skb); 1775 reqsk_put(req); 1776 goto discard_it; 1777 } 1778 if (tcp_checksum_complete(skb)) { 1779 reqsk_put(req); 1780 goto csum_error; 1781 } 1782 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1783 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1784 if (!nsk) { 1785 inet_csk_reqsk_queue_drop_and_put(sk, req); 1786 goto lookup; 1787 } 1788 sk = nsk; 1789 /* reuseport_migrate_sock() has already held one sk_refcnt 1790 * before returning. 1791 */ 1792 } else { 1793 sock_hold(sk); 1794 } 1795 refcounted = true; 1796 nsk = NULL; 1797 if (!tcp_filter(sk, skb, &drop_reason)) { 1798 th = (const struct tcphdr *)skb->data; 1799 hdr = ipv6_hdr(skb); 1800 tcp_v6_fill_cb(skb, hdr, th); 1801 nsk = tcp_check_req(sk, skb, req, false, &req_stolen, 1802 &drop_reason); 1803 } 1804 if (!nsk) { 1805 reqsk_put(req); 1806 if (req_stolen) { 1807 /* Another cpu got exclusive access to req 1808 * and created a full blown socket. 1809 * Try to feed this packet to this socket 1810 * instead of discarding it. 1811 */ 1812 tcp_v6_restore_cb(skb); 1813 sock_put(sk); 1814 goto lookup; 1815 } 1816 goto discard_and_relse; 1817 } 1818 nf_reset_ct(skb); 1819 if (nsk == sk) { 1820 reqsk_put(req); 1821 tcp_v6_restore_cb(skb); 1822 } else { 1823 drop_reason = tcp_child_process(sk, nsk, skb); 1824 if (drop_reason) { 1825 enum sk_rst_reason rst_reason; 1826 1827 rst_reason = sk_rst_convert_drop_reason(drop_reason); 1828 tcp_v6_send_reset(nsk, skb, rst_reason); 1829 goto discard_and_relse; 1830 } 1831 sock_put(sk); 1832 return 0; 1833 } 1834 } 1835 1836 process: 1837 if (static_branch_unlikely(&ip6_min_hopcount)) { 1838 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1839 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) { 1840 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1841 drop_reason = SKB_DROP_REASON_TCP_MINTTL; 1842 goto discard_and_relse; 1843 } 1844 } 1845 1846 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1847 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1848 goto discard_and_relse; 1849 } 1850 1851 drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr, 1852 AF_INET6, dif, sdif); 1853 if (drop_reason) 1854 goto discard_and_relse; 1855 1856 nf_reset_ct(skb); 1857 1858 if (tcp_filter(sk, skb, &drop_reason)) 1859 goto discard_and_relse; 1860 1861 th = (const struct tcphdr *)skb->data; 1862 hdr = ipv6_hdr(skb); 1863 tcp_v6_fill_cb(skb, hdr, th); 1864 1865 skb->dev = NULL; 1866 1867 if (sk->sk_state == TCP_LISTEN) { 1868 ret = tcp_v6_do_rcv(sk, skb); 1869 goto put_and_return; 1870 } 1871 1872 sk_incoming_cpu_update(sk); 1873 1874 bh_lock_sock_nested(sk); 1875 tcp_segs_in(tcp_sk(sk), skb); 1876 ret = 0; 1877 if (!sock_owned_by_user(sk)) { 1878 ret = tcp_v6_do_rcv(sk, skb); 1879 } else { 1880 if (tcp_add_backlog(sk, skb, &drop_reason)) 1881 goto discard_and_relse; 1882 } 1883 bh_unlock_sock(sk); 1884 put_and_return: 1885 if (refcounted) 1886 sock_put(sk); 1887 return ret ? -1 : 0; 1888 1889 no_tcp_socket: 1890 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1891 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1892 goto discard_it; 1893 1894 tcp_v6_fill_cb(skb, hdr, th); 1895 1896 if (tcp_checksum_complete(skb)) { 1897 csum_error: 1898 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1899 trace_tcp_bad_csum(skb); 1900 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1901 bad_packet: 1902 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1903 } else { 1904 tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason)); 1905 } 1906 1907 discard_it: 1908 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1909 sk_skb_reason_drop(sk, skb, drop_reason); 1910 return 0; 1911 1912 discard_and_relse: 1913 sk_drops_skbadd(sk, skb); 1914 if (refcounted) 1915 sock_put(sk); 1916 goto discard_it; 1917 1918 do_time_wait: 1919 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1920 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1921 inet_twsk_put(inet_twsk(sk)); 1922 goto discard_it; 1923 } 1924 1925 tcp_v6_fill_cb(skb, hdr, th); 1926 1927 if (tcp_checksum_complete(skb)) { 1928 inet_twsk_put(inet_twsk(sk)); 1929 goto csum_error; 1930 } 1931 1932 tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn, 1933 &drop_reason); 1934 switch (tw_status) { 1935 case TCP_TW_SYN: 1936 { 1937 struct sock *sk2; 1938 1939 sk2 = inet6_lookup_listener(net, skb, __tcp_hdrlen(th), 1940 &ipv6_hdr(skb)->saddr, th->source, 1941 &ipv6_hdr(skb)->daddr, 1942 ntohs(th->dest), 1943 tcp_v6_iif_l3_slave(skb), 1944 sdif); 1945 if (sk2) { 1946 struct inet_timewait_sock *tw = inet_twsk(sk); 1947 inet_twsk_deschedule_put(tw); 1948 sk = sk2; 1949 tcp_v6_restore_cb(skb); 1950 refcounted = false; 1951 __this_cpu_write(tcp_tw_isn, isn); 1952 goto process; 1953 } 1954 1955 drop_reason = psp_twsk_rx_policy_check(inet_twsk(sk), skb); 1956 if (drop_reason) 1957 break; 1958 } 1959 /* to ACK */ 1960 fallthrough; 1961 case TCP_TW_ACK: 1962 case TCP_TW_ACK_OOW: 1963 tcp_v6_timewait_ack(sk, skb, tw_status); 1964 break; 1965 case TCP_TW_RST: 1966 tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); 1967 inet_twsk_deschedule_put(inet_twsk(sk)); 1968 goto discard_it; 1969 case TCP_TW_SUCCESS: 1970 ; 1971 } 1972 goto discard_it; 1973 } 1974 1975 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 1976 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1977 }; 1978 1979 const struct inet_connection_sock_af_ops ipv6_specific = { 1980 .queue_xmit = inet6_csk_xmit, 1981 .rebuild_header = inet6_sk_rebuild_header, 1982 .sk_rx_dst_set = inet6_sk_rx_dst_set, 1983 .conn_request = tcp_v6_conn_request, 1984 .syn_recv_sock = tcp_v6_syn_recv_sock, 1985 .net_header_len = sizeof(struct ipv6hdr), 1986 .setsockopt = ipv6_setsockopt, 1987 .getsockopt = ipv6_getsockopt, 1988 .mtu_reduced = tcp_v6_mtu_reduced, 1989 }; 1990 1991 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1992 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 1993 #ifdef CONFIG_TCP_MD5SIG 1994 .md5_lookup = tcp_v6_md5_lookup, 1995 .calc_md5_hash = tcp_v6_md5_hash_skb, 1996 .md5_parse = tcp_v6_parse_md5_keys, 1997 #endif 1998 #ifdef CONFIG_TCP_AO 1999 .ao_lookup = tcp_v6_ao_lookup, 2000 .calc_ao_hash = tcp_v6_ao_hash_skb, 2001 .ao_parse = tcp_v6_parse_ao, 2002 .ao_calc_key_sk = tcp_v6_ao_calc_key_sk, 2003 #endif 2004 }; 2005 #endif 2006 2007 /* 2008 * TCP over IPv4 via INET6 API 2009 */ 2010 static const struct inet_connection_sock_af_ops ipv6_mapped = { 2011 .queue_xmit = ip_queue_xmit, 2012 .rebuild_header = inet_sk_rebuild_header, 2013 .sk_rx_dst_set = inet_sk_rx_dst_set, 2014 .conn_request = tcp_v6_conn_request, 2015 .syn_recv_sock = tcp_v6_syn_recv_sock, 2016 .net_header_len = sizeof(struct iphdr), 2017 .setsockopt = ipv6_setsockopt, 2018 .getsockopt = ipv6_getsockopt, 2019 .mtu_reduced = tcp_v4_mtu_reduced, 2020 }; 2021 2022 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2023 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 2024 #ifdef CONFIG_TCP_MD5SIG 2025 .md5_lookup = tcp_v4_md5_lookup, 2026 .calc_md5_hash = tcp_v4_md5_hash_skb, 2027 .md5_parse = tcp_v6_parse_md5_keys, 2028 #endif 2029 #ifdef CONFIG_TCP_AO 2030 .ao_lookup = tcp_v6_ao_lookup, 2031 .calc_ao_hash = tcp_v4_ao_hash_skb, 2032 .ao_parse = tcp_v6_parse_ao, 2033 .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, 2034 #endif 2035 }; 2036 2037 static void tcp6_destruct_sock(struct sock *sk) 2038 { 2039 tcp_md5_destruct_sock(sk); 2040 tcp_ao_destroy_sock(sk, false); 2041 inet6_sock_destruct(sk); 2042 } 2043 #endif 2044 2045 /* NOTE: A lot of things set to zero explicitly by call to 2046 * sk_alloc() so need not be done here. 2047 */ 2048 static int tcp_v6_init_sock(struct sock *sk) 2049 { 2050 struct inet_connection_sock *icsk = inet_csk(sk); 2051 2052 tcp_init_sock(sk); 2053 2054 icsk->icsk_af_ops = &ipv6_specific; 2055 2056 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2057 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 2058 sk->sk_destruct = tcp6_destruct_sock; 2059 #endif 2060 2061 return 0; 2062 } 2063 2064 #ifdef CONFIG_PROC_FS 2065 /* Proc filesystem TCPv6 sock list dumping. */ 2066 static void get_openreq6(struct seq_file *seq, 2067 const struct request_sock *req, int i) 2068 { 2069 long ttd = req->rsk_timer.expires - jiffies; 2070 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 2071 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 2072 2073 if (ttd < 0) 2074 ttd = 0; 2075 2076 seq_printf(seq, 2077 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2078 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 2079 i, 2080 src->s6_addr32[0], src->s6_addr32[1], 2081 src->s6_addr32[2], src->s6_addr32[3], 2082 inet_rsk(req)->ir_num, 2083 dest->s6_addr32[0], dest->s6_addr32[1], 2084 dest->s6_addr32[2], dest->s6_addr32[3], 2085 ntohs(inet_rsk(req)->ir_rmt_port), 2086 TCP_SYN_RECV, 2087 0, 0, /* could print option size, but that is af dependent. */ 2088 1, /* timers active (only the expire timer) */ 2089 jiffies_to_clock_t(ttd), 2090 req->num_timeout, 2091 from_kuid_munged(seq_user_ns(seq), 2092 sk_uid(req->rsk_listener)), 2093 0, /* non standard timer */ 2094 0, /* open_requests have no inode */ 2095 0, req); 2096 } 2097 2098 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2099 { 2100 const struct in6_addr *dest, *src; 2101 __u16 destp, srcp; 2102 int timer_active; 2103 unsigned long timer_expires; 2104 const struct inet_sock *inet = inet_sk(sp); 2105 const struct tcp_sock *tp = tcp_sk(sp); 2106 const struct inet_connection_sock *icsk = inet_csk(sp); 2107 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2108 u8 icsk_pending; 2109 int rx_queue; 2110 int state; 2111 2112 dest = &sp->sk_v6_daddr; 2113 src = &sp->sk_v6_rcv_saddr; 2114 destp = ntohs(inet->inet_dport); 2115 srcp = ntohs(inet->inet_sport); 2116 2117 icsk_pending = smp_load_acquire(&icsk->icsk_pending); 2118 if (icsk_pending == ICSK_TIME_RETRANS || 2119 icsk_pending == ICSK_TIME_REO_TIMEOUT || 2120 icsk_pending == ICSK_TIME_LOSS_PROBE) { 2121 timer_active = 1; 2122 timer_expires = tcp_timeout_expires(sp); 2123 } else if (icsk_pending == ICSK_TIME_PROBE0) { 2124 timer_active = 4; 2125 timer_expires = tcp_timeout_expires(sp); 2126 } else if (timer_pending(&icsk->icsk_keepalive_timer)) { 2127 timer_active = 2; 2128 timer_expires = icsk->icsk_keepalive_timer.expires; 2129 } else { 2130 timer_active = 0; 2131 timer_expires = jiffies; 2132 } 2133 2134 state = inet_sk_state_load(sp); 2135 if (state == TCP_LISTEN) 2136 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2137 else 2138 /* Because we don't lock the socket, 2139 * we might find a transient negative value. 2140 */ 2141 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2142 READ_ONCE(tp->copied_seq), 0); 2143 2144 seq_printf(seq, 2145 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2146 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2147 i, 2148 src->s6_addr32[0], src->s6_addr32[1], 2149 src->s6_addr32[2], src->s6_addr32[3], srcp, 2150 dest->s6_addr32[0], dest->s6_addr32[1], 2151 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2152 state, 2153 READ_ONCE(tp->write_seq) - tp->snd_una, 2154 rx_queue, 2155 timer_active, 2156 jiffies_delta_to_clock_t(timer_expires - jiffies), 2157 READ_ONCE(icsk->icsk_retransmits), 2158 from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), 2159 READ_ONCE(icsk->icsk_probes_out), 2160 sock_i_ino(sp), 2161 refcount_read(&sp->sk_refcnt), sp, 2162 jiffies_to_clock_t(icsk->icsk_rto), 2163 jiffies_to_clock_t(icsk->icsk_ack.ato), 2164 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2165 tcp_snd_cwnd(tp), 2166 state == TCP_LISTEN ? 2167 fastopenq->max_qlen : 2168 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2169 ); 2170 } 2171 2172 static void get_timewait6_sock(struct seq_file *seq, 2173 struct inet_timewait_sock *tw, int i) 2174 { 2175 long delta = tw->tw_timer.expires - jiffies; 2176 const struct in6_addr *dest, *src; 2177 __u16 destp, srcp; 2178 2179 dest = &tw->tw_v6_daddr; 2180 src = &tw->tw_v6_rcv_saddr; 2181 destp = ntohs(tw->tw_dport); 2182 srcp = ntohs(tw->tw_sport); 2183 2184 seq_printf(seq, 2185 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2186 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2187 i, 2188 src->s6_addr32[0], src->s6_addr32[1], 2189 src->s6_addr32[2], src->s6_addr32[3], srcp, 2190 dest->s6_addr32[0], dest->s6_addr32[1], 2191 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2192 READ_ONCE(tw->tw_substate), 0, 0, 2193 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2194 refcount_read(&tw->tw_refcnt), tw); 2195 } 2196 2197 static int tcp6_seq_show(struct seq_file *seq, void *v) 2198 { 2199 struct tcp_iter_state *st; 2200 struct sock *sk = v; 2201 2202 if (v == SEQ_START_TOKEN) { 2203 seq_puts(seq, 2204 " sl " 2205 "local_address " 2206 "remote_address " 2207 "st tx_queue rx_queue tr tm->when retrnsmt" 2208 " uid timeout inode\n"); 2209 goto out; 2210 } 2211 st = seq->private; 2212 2213 if (sk->sk_state == TCP_TIME_WAIT) 2214 get_timewait6_sock(seq, v, st->num); 2215 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2216 get_openreq6(seq, v, st->num); 2217 else 2218 get_tcp6_sock(seq, v, st->num); 2219 out: 2220 return 0; 2221 } 2222 2223 static const struct seq_operations tcp6_seq_ops = { 2224 .show = tcp6_seq_show, 2225 .start = tcp_seq_start, 2226 .next = tcp_seq_next, 2227 .stop = tcp_seq_stop, 2228 }; 2229 2230 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2231 .family = AF_INET6, 2232 }; 2233 2234 int __net_init tcp6_proc_init(struct net *net) 2235 { 2236 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2237 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2238 return -ENOMEM; 2239 return 0; 2240 } 2241 2242 void tcp6_proc_exit(struct net *net) 2243 { 2244 remove_proc_entry("tcp6", net->proc_net); 2245 } 2246 #endif 2247 2248 struct proto tcpv6_prot = { 2249 .name = "TCPv6", 2250 .owner = THIS_MODULE, 2251 .close = tcp_close, 2252 .pre_connect = tcp_v6_pre_connect, 2253 .connect = tcp_v6_connect, 2254 .disconnect = tcp_disconnect, 2255 .accept = inet_csk_accept, 2256 .ioctl = tcp_ioctl, 2257 .init = tcp_v6_init_sock, 2258 .destroy = tcp_v4_destroy_sock, 2259 .shutdown = tcp_shutdown, 2260 .setsockopt = tcp_setsockopt, 2261 .getsockopt = tcp_getsockopt, 2262 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2263 .keepalive = tcp_set_keepalive, 2264 .recvmsg = tcp_recvmsg, 2265 .sendmsg = tcp_sendmsg, 2266 .splice_eof = tcp_splice_eof, 2267 .backlog_rcv = tcp_v6_do_rcv, 2268 .release_cb = tcp_release_cb, 2269 .hash = inet_hash, 2270 .unhash = inet_unhash, 2271 .get_port = inet_csk_get_port, 2272 .put_port = inet_put_port, 2273 #ifdef CONFIG_BPF_SYSCALL 2274 .psock_update_sk_prot = tcp_bpf_update_proto, 2275 #endif 2276 .enter_memory_pressure = tcp_enter_memory_pressure, 2277 .leave_memory_pressure = tcp_leave_memory_pressure, 2278 .stream_memory_free = tcp_stream_memory_free, 2279 .sockets_allocated = &tcp_sockets_allocated, 2280 2281 .memory_allocated = &net_aligned_data.tcp_memory_allocated, 2282 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 2283 2284 .memory_pressure = &tcp_memory_pressure, 2285 .sysctl_mem = sysctl_tcp_mem, 2286 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2287 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2288 .max_header = MAX_TCP_HEADER, 2289 .obj_size = sizeof(struct tcp6_sock), 2290 .freeptr_offset = offsetof(struct tcp6_sock, 2291 tcp.inet_conn.icsk_inet.sk.sk_freeptr), 2292 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6), 2293 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2294 .twsk_prot = &tcp6_timewait_sock_ops, 2295 .rsk_prot = &tcp6_request_sock_ops, 2296 .h.hashinfo = NULL, 2297 .no_autobind = true, 2298 .diag_destroy = tcp_abort, 2299 }; 2300 EXPORT_SYMBOL_GPL(tcpv6_prot); 2301 2302 2303 static struct inet_protosw tcpv6_protosw = { 2304 .type = SOCK_STREAM, 2305 .protocol = IPPROTO_TCP, 2306 .prot = &tcpv6_prot, 2307 .ops = &inet6_stream_ops, 2308 .flags = INET_PROTOSW_PERMANENT | 2309 INET_PROTOSW_ICSK, 2310 }; 2311 2312 static int __net_init tcpv6_net_init(struct net *net) 2313 { 2314 int res; 2315 2316 res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2317 SOCK_RAW, IPPROTO_TCP, net); 2318 if (!res) 2319 net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC; 2320 2321 return res; 2322 } 2323 2324 static void __net_exit tcpv6_net_exit(struct net *net) 2325 { 2326 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2327 } 2328 2329 static struct pernet_operations tcpv6_net_ops = { 2330 .init = tcpv6_net_init, 2331 .exit = tcpv6_net_exit, 2332 }; 2333 2334 int __init tcpv6_init(void) 2335 { 2336 int ret; 2337 2338 net_hotdata.tcpv6_protocol = (struct inet6_protocol) { 2339 .handler = tcp_v6_rcv, 2340 .err_handler = tcp_v6_err, 2341 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, 2342 }; 2343 ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2344 if (ret) 2345 goto out; 2346 2347 /* register inet6 protocol */ 2348 ret = inet6_register_protosw(&tcpv6_protosw); 2349 if (ret) 2350 goto out_tcpv6_protocol; 2351 2352 ret = register_pernet_subsys(&tcpv6_net_ops); 2353 if (ret) 2354 goto out_tcpv6_protosw; 2355 2356 ret = mptcpv6_init(); 2357 if (ret) 2358 goto out_tcpv6_pernet_subsys; 2359 2360 out: 2361 return ret; 2362 2363 out_tcpv6_pernet_subsys: 2364 unregister_pernet_subsys(&tcpv6_net_ops); 2365 out_tcpv6_protosw: 2366 inet6_unregister_protosw(&tcpv6_protosw); 2367 out_tcpv6_protocol: 2368 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2369 goto out; 2370 } 2371 2372 void tcpv6_exit(void) 2373 { 2374 unregister_pernet_subsys(&tcpv6_net_ops); 2375 inet6_unregister_protosw(&tcpv6_protosw); 2376 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2377 } 2378