1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/aligned_data.h> 45 #include <net/tcp.h> 46 #include <net/ndisc.h> 47 #include <net/inet6_hashtables.h> 48 #include <net/inet6_connection_sock.h> 49 #include <net/ipv6.h> 50 #include <net/transp_v6.h> 51 #include <net/addrconf.h> 52 #include <net/ip6_route.h> 53 #include <net/ip6_checksum.h> 54 #include <net/inet_ecn.h> 55 #include <net/protocol.h> 56 #include <net/xfrm.h> 57 #include <net/snmp.h> 58 #include <net/dsfield.h> 59 #include <net/timewait_sock.h> 60 #include <net/inet_common.h> 61 #include <net/secure_seq.h> 62 #include <net/hotdata.h> 63 #include <net/busy_poll.h> 64 #include <net/rstreason.h> 65 #include <net/psp.h> 66 67 #include <linux/proc_fs.h> 68 #include <linux/seq_file.h> 69 70 #include <crypto/md5.h> 71 #include <crypto/utils.h> 72 73 #include <trace/events/tcp.h> 74 75 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 76 enum sk_rst_reason reason); 77 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 78 struct request_sock *req); 79 80 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 81 82 static const struct inet_connection_sock_af_ops ipv6_mapped; 83 const struct inet_connection_sock_af_ops ipv6_specific; 84 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 85 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 86 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 87 #endif 88 89 /* Helper returning the inet6 address from a given tcp socket. 90 * It can be used in TCP stack instead of inet6_sk(sk). 91 * This avoids a dereference and allow compiler optimizations. 92 * It is a specialized version of inet6_sk_generic(). 93 */ 94 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \ 95 struct tcp6_sock, tcp)->inet6) 96 97 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 98 { 99 struct dst_entry *dst = skb_dst(skb); 100 101 if (dst && dst_hold_safe(dst)) { 102 rcu_assign_pointer(sk->sk_rx_dst, dst); 103 sk->sk_rx_dst_ifindex = skb->skb_iif; 104 sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); 105 } 106 } 107 108 static union tcp_seq_and_ts_off 109 tcp_v6_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb) 110 { 111 return secure_tcpv6_seq_and_ts_off(net, 112 ipv6_hdr(skb)->daddr.s6_addr32, 113 ipv6_hdr(skb)->saddr.s6_addr32, 114 tcp_hdr(skb)->dest, 115 tcp_hdr(skb)->source); 116 } 117 118 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, 119 int addr_len) 120 { 121 /* This check is replicated from tcp_v6_connect() and intended to 122 * prevent BPF program called below from accessing bytes that are out 123 * of the bound specified by user in addr_len. 124 */ 125 if (addr_len < SIN6_LEN_RFC2133) 126 return -EINVAL; 127 128 sock_owned_by_me(sk); 129 130 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); 131 } 132 133 static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, 134 int addr_len) 135 { 136 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 137 struct inet_connection_sock *icsk = inet_csk(sk); 138 struct inet_timewait_death_row *tcp_death_row; 139 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 140 struct in6_addr *saddr = NULL, *final_p; 141 struct inet_sock *inet = inet_sk(sk); 142 struct tcp_sock *tp = tcp_sk(sk); 143 struct net *net = sock_net(sk); 144 struct ipv6_txoptions *opt; 145 struct dst_entry *dst; 146 struct flowi6 *fl6; 147 int addr_type; 148 int err; 149 150 if (addr_len < SIN6_LEN_RFC2133) 151 return -EINVAL; 152 153 if (usin->sin6_family != AF_INET6) 154 return -EAFNOSUPPORT; 155 156 fl6 = &inet_sk(sk)->cork.fl.u.ip6; 157 memset(fl6, 0, sizeof(*fl6)); 158 159 if (inet6_test_bit(SNDFLOW, sk)) { 160 fl6->flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; 161 IP6_ECN_flow_init(fl6->flowlabel); 162 if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) { 163 struct ip6_flowlabel *flowlabel; 164 flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); 165 if (IS_ERR(flowlabel)) 166 return -EINVAL; 167 fl6_sock_release(flowlabel); 168 } 169 } 170 171 /* 172 * connect() to INADDR_ANY means loopback (BSD'ism). 173 */ 174 175 if (ipv6_addr_any(&usin->sin6_addr)) { 176 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 177 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 178 &usin->sin6_addr); 179 else 180 usin->sin6_addr = in6addr_loopback; 181 } 182 183 addr_type = ipv6_addr_type(&usin->sin6_addr); 184 185 if (addr_type & IPV6_ADDR_MULTICAST) 186 return -ENETUNREACH; 187 188 if (addr_type&IPV6_ADDR_LINKLOCAL) { 189 if (addr_len >= sizeof(struct sockaddr_in6) && 190 usin->sin6_scope_id) { 191 /* If interface is set while binding, indices 192 * must coincide. 193 */ 194 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 195 return -EINVAL; 196 197 sk->sk_bound_dev_if = usin->sin6_scope_id; 198 } 199 200 /* Connect to link-local address requires an interface */ 201 if (!sk->sk_bound_dev_if) 202 return -EINVAL; 203 } 204 205 if (tp->rx_opt.ts_recent_stamp && 206 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 207 tp->rx_opt.ts_recent = 0; 208 tp->rx_opt.ts_recent_stamp = 0; 209 WRITE_ONCE(tp->write_seq, 0); 210 } 211 212 sk->sk_v6_daddr = usin->sin6_addr; 213 np->flow_label = fl6->flowlabel; 214 215 /* 216 * TCP over IPv4 217 */ 218 219 if (addr_type & IPV6_ADDR_MAPPED) { 220 u32 exthdrlen = icsk->icsk_ext_hdr_len; 221 struct sockaddr_in sin; 222 223 if (ipv6_only_sock(sk)) 224 return -ENETUNREACH; 225 226 sin.sin_family = AF_INET; 227 sin.sin_port = usin->sin6_port; 228 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 229 230 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 231 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); 232 if (sk_is_mptcp(sk)) 233 mptcpv6_handle_mapped(sk, true); 234 sk->sk_backlog_rcv = tcp_v4_do_rcv; 235 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 236 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 237 #endif 238 239 err = tcp_v4_connect(sk, (struct sockaddr_unsized *)&sin, sizeof(sin)); 240 241 if (err) { 242 icsk->icsk_ext_hdr_len = exthdrlen; 243 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 244 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); 245 if (sk_is_mptcp(sk)) 246 mptcpv6_handle_mapped(sk, false); 247 sk->sk_backlog_rcv = tcp_v6_do_rcv; 248 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 249 tp->af_specific = &tcp_sock_ipv6_specific; 250 #endif 251 goto failure; 252 } 253 np->saddr = sk->sk_v6_rcv_saddr; 254 255 return err; 256 } 257 258 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 259 saddr = &sk->sk_v6_rcv_saddr; 260 261 fl6->flowi6_proto = IPPROTO_TCP; 262 fl6->daddr = sk->sk_v6_daddr; 263 fl6->saddr = saddr ? *saddr : np->saddr; 264 fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); 265 fl6->flowi6_oif = sk->sk_bound_dev_if; 266 fl6->flowi6_mark = sk->sk_mark; 267 fl6->fl6_dport = usin->sin6_port; 268 fl6->fl6_sport = inet->inet_sport; 269 if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6->fl6_sport) 270 fl6->flowi6_flags = FLOWI_FLAG_ANY_SPORT; 271 fl6->flowi6_uid = sk_uid(sk); 272 273 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 274 final_p = fl6_update_dst(fl6, opt, &np->final); 275 276 security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); 277 278 dst = ip6_dst_lookup_flow(net, sk, fl6, final_p); 279 if (IS_ERR(dst)) { 280 err = PTR_ERR(dst); 281 goto failure; 282 } 283 284 tp->tcp_usec_ts = dst_tcp_usec_ts(dst); 285 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 286 287 if (!saddr) { 288 saddr = &fl6->saddr; 289 290 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); 291 if (err) 292 goto failure; 293 } 294 295 /* set the source address */ 296 np->saddr = *saddr; 297 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 298 299 sk->sk_gso_type = SKB_GSO_TCPV6; 300 ip6_dst_store(sk, dst, false, false); 301 302 icsk->icsk_ext_hdr_len = psp_sk_overhead(sk); 303 if (opt) 304 icsk->icsk_ext_hdr_len += opt->opt_flen + 305 opt->opt_nflen; 306 307 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 308 309 inet->inet_dport = usin->sin6_port; 310 311 tcp_set_state(sk, TCP_SYN_SENT); 312 err = inet6_hash_connect(tcp_death_row, sk); 313 if (err) 314 goto late_failure; 315 316 sk_set_txhash(sk); 317 318 if (likely(!tp->repair)) { 319 union tcp_seq_and_ts_off st; 320 321 st = secure_tcpv6_seq_and_ts_off(net, 322 np->saddr.s6_addr32, 323 sk->sk_v6_daddr.s6_addr32, 324 inet->inet_sport, 325 inet->inet_dport); 326 if (!tp->write_seq) 327 WRITE_ONCE(tp->write_seq, st.seq); 328 tp->tsoffset = st.ts_off; 329 } 330 331 if (tcp_fastopen_defer_connect(sk, &err)) 332 return err; 333 if (err) 334 goto late_failure; 335 336 err = tcp_connect(sk); 337 if (err) 338 goto late_failure; 339 340 return 0; 341 342 late_failure: 343 tcp_set_state(sk, TCP_CLOSE); 344 inet_bhash2_reset_saddr(sk); 345 failure: 346 inet->inet_dport = 0; 347 sk->sk_route_caps = 0; 348 return err; 349 } 350 351 static void tcp_v6_mtu_reduced(struct sock *sk) 352 { 353 struct dst_entry *dst; 354 u32 mtu, dmtu; 355 356 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 357 return; 358 359 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 360 361 /* Drop requests trying to increase our current mss. 362 * Check done in __ip6_rt_update_pmtu() is too late. 363 */ 364 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 365 return; 366 367 dst = inet6_csk_update_pmtu(sk, mtu); 368 if (!dst) 369 return; 370 371 dmtu = dst6_mtu(dst); 372 if (inet_csk(sk)->icsk_pmtu_cookie > dmtu) { 373 tcp_sync_mss(sk, dmtu); 374 tcp_simple_retransmit(sk); 375 } 376 } 377 378 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 379 u8 type, u8 code, int offset, __be32 info) 380 { 381 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 382 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 383 struct net *net = dev_net_rcu(skb->dev); 384 struct request_sock *fastopen; 385 struct ipv6_pinfo *np; 386 struct tcp_sock *tp; 387 __u32 seq, snd_una; 388 struct sock *sk; 389 bool fatal; 390 int err; 391 392 sk = __inet6_lookup_established(net, &hdr->daddr, th->dest, 393 &hdr->saddr, ntohs(th->source), 394 skb->dev->ifindex, inet6_sdif(skb)); 395 396 if (!sk) { 397 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 398 ICMP6_MIB_INERRORS); 399 return -ENOENT; 400 } 401 402 if (sk->sk_state == TCP_TIME_WAIT) { 403 /* To increase the counter of ignored icmps for TCP-AO */ 404 tcp_ao_ignore_icmp(sk, AF_INET6, type, code); 405 inet_twsk_put(inet_twsk(sk)); 406 return 0; 407 } 408 seq = ntohl(th->seq); 409 fatal = icmpv6_err_convert(type, code, &err); 410 if (sk->sk_state == TCP_NEW_SYN_RECV) { 411 tcp_req_err(sk, seq, fatal); 412 return 0; 413 } 414 415 if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) { 416 sock_put(sk); 417 return 0; 418 } 419 420 bh_lock_sock(sk); 421 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 422 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 423 424 if (sk->sk_state == TCP_CLOSE) 425 goto out; 426 427 if (static_branch_unlikely(&ip6_min_hopcount)) { 428 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 429 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 430 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 431 goto out; 432 } 433 } 434 435 tp = tcp_sk(sk); 436 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 437 fastopen = rcu_dereference(tp->fastopen_rsk); 438 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 439 if (sk->sk_state != TCP_LISTEN && 440 !between(seq, snd_una, tp->snd_nxt)) { 441 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 442 goto out; 443 } 444 445 np = tcp_inet6_sk(sk); 446 447 if (type == NDISC_REDIRECT) { 448 if (!sock_owned_by_user(sk)) { 449 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 450 451 if (dst) 452 dst->ops->redirect(dst, sk, skb); 453 } 454 goto out; 455 } 456 457 if (type == ICMPV6_PKT_TOOBIG) { 458 u32 mtu = ntohl(info); 459 460 /* We are not interested in TCP_LISTEN and open_requests 461 * (SYN-ACKs send out by Linux are always <576bytes so 462 * they should go through unfragmented). 463 */ 464 if (sk->sk_state == TCP_LISTEN) 465 goto out; 466 467 if (!ip6_sk_accept_pmtu(sk)) 468 goto out; 469 470 if (mtu < IPV6_MIN_MTU) 471 goto out; 472 473 WRITE_ONCE(tp->mtu_info, mtu); 474 475 if (!sock_owned_by_user(sk)) 476 tcp_v6_mtu_reduced(sk); 477 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 478 &sk->sk_tsq_flags)) 479 sock_hold(sk); 480 goto out; 481 } 482 483 484 /* Might be for an request_sock */ 485 switch (sk->sk_state) { 486 case TCP_SYN_SENT: 487 case TCP_SYN_RECV: 488 /* Only in fast or simultaneous open. If a fast open socket is 489 * already accepted it is treated as a connected one below. 490 */ 491 if (fastopen && !fastopen->sk) 492 break; 493 494 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 495 496 if (!sock_owned_by_user(sk)) 497 tcp_done_with_error(sk, err); 498 else 499 WRITE_ONCE(sk->sk_err_soft, err); 500 goto out; 501 case TCP_LISTEN: 502 break; 503 default: 504 /* check if this ICMP message allows revert of backoff. 505 * (see RFC 6069) 506 */ 507 if (!fastopen && type == ICMPV6_DEST_UNREACH && 508 code == ICMPV6_NOROUTE) 509 tcp_ld_RTO_revert(sk, seq); 510 } 511 512 if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { 513 WRITE_ONCE(sk->sk_err, err); 514 sk_error_report(sk); 515 } else { 516 WRITE_ONCE(sk->sk_err_soft, err); 517 } 518 out: 519 bh_unlock_sock(sk); 520 sock_put(sk); 521 return 0; 522 } 523 524 525 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 526 struct flowi *fl, 527 struct request_sock *req, 528 struct tcp_fastopen_cookie *foc, 529 enum tcp_synack_type synack_type, 530 struct sk_buff *syn_skb) 531 { 532 struct inet_request_sock *ireq = inet_rsk(req); 533 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 534 struct ipv6_txoptions *opt; 535 struct flowi6 *fl6 = &fl->u.ip6; 536 struct sk_buff *skb; 537 int err = -ENOMEM; 538 u8 tclass; 539 540 /* First, grab a route. */ 541 if (!dst && (dst = inet6_csk_route_req(sk, NULL, fl6, req, 542 IPPROTO_TCP)) == NULL) 543 goto done; 544 545 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 546 547 if (skb) { 548 tcp_rsk(req)->syn_ect_snt = np->tclass & INET_ECN_MASK; 549 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 550 &ireq->ir_v6_rmt_addr); 551 552 fl6->daddr = ireq->ir_v6_rmt_addr; 553 if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) 554 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 555 556 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 557 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 558 (np->tclass & INET_ECN_MASK) : 559 np->tclass; 560 561 if (!INET_ECN_is_capable(tclass) && 562 tcp_bpf_ca_needs_ecn((struct sock *)req)) 563 tclass |= INET_ECN_ECT_0; 564 565 rcu_read_lock(); 566 opt = ireq->ipv6_opt; 567 if (!opt) 568 opt = rcu_dereference(np->opt); 569 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), 570 opt, tclass, READ_ONCE(sk->sk_priority)); 571 rcu_read_unlock(); 572 err = net_xmit_eval(err); 573 } 574 575 done: 576 return err; 577 } 578 579 580 static void tcp_v6_reqsk_destructor(struct request_sock *req) 581 { 582 kfree(inet_rsk(req)->ipv6_opt); 583 consume_skb(inet_rsk(req)->pktopts); 584 } 585 586 #ifdef CONFIG_TCP_MD5SIG 587 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 588 const struct in6_addr *addr, 589 int l3index) 590 { 591 return tcp_md5_do_lookup(sk, l3index, 592 (union tcp_md5_addr *)addr, AF_INET6); 593 } 594 595 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 596 const struct sock *addr_sk) 597 { 598 int l3index; 599 600 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 601 addr_sk->sk_bound_dev_if); 602 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 603 l3index); 604 } 605 606 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 607 sockptr_t optval, int optlen) 608 { 609 struct tcp_md5sig cmd; 610 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 611 union tcp_ao_addr *addr; 612 int l3index = 0; 613 u8 prefixlen; 614 bool l3flag; 615 u8 flags; 616 617 if (optlen < sizeof(cmd)) 618 return -EINVAL; 619 620 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 621 return -EFAULT; 622 623 if (sin6->sin6_family != AF_INET6) 624 return -EINVAL; 625 626 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 627 l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 628 629 if (optname == TCP_MD5SIG_EXT && 630 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 631 prefixlen = cmd.tcpm_prefixlen; 632 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 633 prefixlen > 32)) 634 return -EINVAL; 635 } else { 636 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 637 } 638 639 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 640 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 641 struct net_device *dev; 642 643 rcu_read_lock(); 644 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 645 if (dev && netif_is_l3_master(dev)) 646 l3index = dev->ifindex; 647 rcu_read_unlock(); 648 649 /* ok to reference set/not set outside of rcu; 650 * right now device MUST be an L3 master 651 */ 652 if (!dev || !l3index) 653 return -EINVAL; 654 } 655 656 if (!cmd.tcpm_keylen) { 657 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 658 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 659 AF_INET, prefixlen, 660 l3index, flags); 661 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 662 AF_INET6, prefixlen, l3index, flags); 663 } 664 665 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 666 return -EINVAL; 667 668 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { 669 addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3]; 670 671 /* Don't allow keys for peers that have a matching TCP-AO key. 672 * See the comment in tcp_ao_add_cmd() 673 */ 674 if (tcp_ao_required(sk, addr, AF_INET, 675 l3flag ? l3index : -1, false)) 676 return -EKEYREJECTED; 677 return tcp_md5_do_add(sk, addr, 678 AF_INET, prefixlen, l3index, flags, 679 cmd.tcpm_key, cmd.tcpm_keylen); 680 } 681 682 addr = (union tcp_md5_addr *)&sin6->sin6_addr; 683 684 /* Don't allow keys for peers that have a matching TCP-AO key. 685 * See the comment in tcp_ao_add_cmd() 686 */ 687 if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false)) 688 return -EKEYREJECTED; 689 690 return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, 691 cmd.tcpm_key, cmd.tcpm_keylen); 692 } 693 694 static void tcp_v6_md5_hash_headers(struct md5_ctx *ctx, 695 const struct in6_addr *daddr, 696 const struct in6_addr *saddr, 697 const struct tcphdr *th, int nbytes) 698 { 699 struct { 700 struct tcp6_pseudohdr ip; /* TCP pseudo-header (RFC2460) */ 701 struct tcphdr tcp; 702 } h; 703 704 h.ip.saddr = *saddr; 705 h.ip.daddr = *daddr; 706 h.ip.protocol = cpu_to_be32(IPPROTO_TCP); 707 h.ip.len = cpu_to_be32(nbytes); 708 h.tcp = *th; 709 h.tcp.check = 0; 710 md5_update(ctx, (const u8 *)&h, sizeof(h.ip) + sizeof(h.tcp)); 711 } 712 713 static noinline_for_stack void 714 tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 715 const struct in6_addr *daddr, struct in6_addr *saddr, 716 const struct tcphdr *th) 717 { 718 struct md5_ctx ctx; 719 720 md5_init(&ctx); 721 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, th->doff << 2); 722 tcp_md5_hash_key(&ctx, key); 723 md5_final(&ctx, md5_hash); 724 } 725 726 static noinline_for_stack void 727 tcp_v6_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, 728 const struct sock *sk, const struct sk_buff *skb) 729 { 730 const struct tcphdr *th = tcp_hdr(skb); 731 const struct in6_addr *saddr, *daddr; 732 struct md5_ctx ctx; 733 734 if (sk) { /* valid for establish/request sockets */ 735 saddr = &sk->sk_v6_rcv_saddr; 736 daddr = &sk->sk_v6_daddr; 737 } else { 738 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 739 saddr = &ip6h->saddr; 740 daddr = &ip6h->daddr; 741 } 742 743 md5_init(&ctx); 744 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, skb->len); 745 tcp_md5_hash_skb_data(&ctx, skb, th->doff << 2); 746 tcp_md5_hash_key(&ctx, key); 747 md5_final(&ctx, md5_hash); 748 } 749 #endif 750 751 static void tcp_v6_init_req(struct request_sock *req, 752 const struct sock *sk_listener, 753 struct sk_buff *skb, 754 u32 tw_isn) 755 { 756 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 757 struct inet_request_sock *ireq = inet_rsk(req); 758 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 759 760 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 761 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 762 ireq->ir_rmt_addr = LOOPBACK4_IPV6; 763 ireq->ir_loc_addr = LOOPBACK4_IPV6; 764 765 /* So that link locals have meaning */ 766 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 767 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 768 ireq->ir_iif = tcp_v6_iif(skb); 769 770 if (!tw_isn && 771 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 772 np->rxopt.bits.rxinfo || 773 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 774 np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { 775 refcount_inc(&skb->users); 776 ireq->pktopts = skb; 777 } 778 } 779 780 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 781 struct sk_buff *skb, 782 struct flowi *fl, 783 struct request_sock *req, 784 u32 tw_isn) 785 { 786 tcp_v6_init_req(req, sk, skb, tw_isn); 787 788 if (security_inet_conn_request(sk, skb, req)) 789 return NULL; 790 791 return inet6_csk_route_req(sk, NULL, &fl->u.ip6, req, IPPROTO_TCP); 792 } 793 794 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 795 .family = AF_INET6, 796 .obj_size = sizeof(struct tcp6_request_sock), 797 .send_ack = tcp_v6_reqsk_send_ack, 798 .destructor = tcp_v6_reqsk_destructor, 799 .send_reset = tcp_v6_send_reset, 800 }; 801 802 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 803 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 804 sizeof(struct ipv6hdr), 805 #ifdef CONFIG_TCP_MD5SIG 806 .req_md5_lookup = tcp_v6_md5_lookup, 807 .calc_md5_hash = tcp_v6_md5_hash_skb, 808 #endif 809 #ifdef CONFIG_TCP_AO 810 .ao_lookup = tcp_v6_ao_lookup_rsk, 811 .ao_calc_key = tcp_v6_ao_calc_key_rsk, 812 .ao_synack_hash = tcp_v6_ao_synack_hash, 813 #endif 814 #ifdef CONFIG_SYN_COOKIES 815 .cookie_init_seq = cookie_v6_init_sequence, 816 #endif 817 .route_req = tcp_v6_route_req, 818 .init_seq_and_ts_off = tcp_v6_init_seq_and_ts_off, 819 .send_synack = tcp_v6_send_synack, 820 }; 821 822 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 823 u32 ack, u32 win, u32 tsval, u32 tsecr, 824 int oif, int rst, u8 tclass, __be32 label, 825 u32 priority, u32 txhash, struct tcp_key *key) 826 { 827 struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 828 unsigned int tot_len = sizeof(struct tcphdr); 829 struct sock *ctl_sk = net->ipv6.tcp_sk; 830 const struct tcphdr *th = tcp_hdr(skb); 831 __be32 mrst = 0, *topt; 832 struct dst_entry *dst; 833 struct sk_buff *buff; 834 struct tcphdr *t1; 835 struct flowi6 fl6; 836 u32 mark = 0; 837 838 if (tsecr) 839 tot_len += TCPOLEN_TSTAMP_ALIGNED; 840 if (tcp_key_is_md5(key)) 841 tot_len += TCPOLEN_MD5SIG_ALIGNED; 842 if (tcp_key_is_ao(key)) 843 tot_len += tcp_ao_len_aligned(key->ao_key); 844 845 #ifdef CONFIG_MPTCP 846 if (rst && !tcp_key_is_md5(key)) { 847 mrst = mptcp_reset_option(skb); 848 849 if (mrst) 850 tot_len += sizeof(__be32); 851 } 852 #endif 853 854 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 855 if (!buff) 856 return; 857 858 skb_reserve(buff, MAX_TCP_HEADER); 859 860 t1 = skb_push(buff, tot_len); 861 skb_reset_transport_header(buff); 862 863 /* Swap the send and the receive. */ 864 memset(t1, 0, sizeof(*t1)); 865 t1->dest = th->source; 866 t1->source = th->dest; 867 t1->doff = tot_len / 4; 868 t1->seq = htonl(seq); 869 t1->ack_seq = htonl(ack); 870 t1->ack = !rst || !th->ack; 871 t1->rst = rst; 872 t1->window = htons(win); 873 874 topt = (__be32 *)(t1 + 1); 875 876 if (tsecr) { 877 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 878 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 879 *topt++ = htonl(tsval); 880 *topt++ = htonl(tsecr); 881 } 882 883 if (mrst) 884 *topt++ = mrst; 885 886 #ifdef CONFIG_TCP_MD5SIG 887 if (tcp_key_is_md5(key)) { 888 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 889 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 890 tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, 891 &ipv6_hdr(skb)->saddr, 892 &ipv6_hdr(skb)->daddr, t1); 893 } 894 #endif 895 #ifdef CONFIG_TCP_AO 896 if (tcp_key_is_ao(key)) { 897 *topt++ = htonl((TCPOPT_AO << 24) | 898 (tcp_ao_len(key->ao_key) << 16) | 899 (key->ao_key->sndid << 8) | 900 (key->rcv_next)); 901 902 tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, 903 key->traffic_key, 904 (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, 905 (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, 906 t1, key->sne); 907 } 908 #endif 909 910 memset(&fl6, 0, sizeof(fl6)); 911 fl6.daddr = ipv6_hdr(skb)->saddr; 912 fl6.saddr = ipv6_hdr(skb)->daddr; 913 fl6.flowlabel = label; 914 915 buff->ip_summed = CHECKSUM_PARTIAL; 916 917 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 918 919 fl6.flowi6_proto = IPPROTO_TCP; 920 if (rt6_need_strict(&fl6.daddr) && !oif) 921 fl6.flowi6_oif = tcp_v6_iif(skb); 922 else { 923 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 924 oif = skb->skb_iif; 925 926 fl6.flowi6_oif = oif; 927 } 928 929 if (sk) { 930 /* unconstify the socket only to attach it to buff with care. */ 931 skb_set_owner_edemux(buff, (struct sock *)sk); 932 psp_reply_set_decrypted(sk, buff); 933 934 if (sk->sk_state == TCP_TIME_WAIT) 935 mark = inet_twsk(sk)->tw_mark; 936 else 937 mark = READ_ONCE(sk->sk_mark); 938 skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); 939 } 940 if (txhash) { 941 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ 942 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); 943 } 944 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 945 fl6.fl6_dport = t1->dest; 946 fl6.fl6_sport = t1->source; 947 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 948 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 949 950 /* Pass a socket to ip6_dst_lookup either it is for RST 951 * Underlying function will use this to retrieve the network 952 * namespace 953 */ 954 if (sk && sk->sk_state != TCP_TIME_WAIT) 955 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 956 else 957 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 958 if (!IS_ERR(dst)) { 959 skb_dst_set(buff, dst); 960 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 961 tclass, priority); 962 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 963 if (rst) 964 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 965 return; 966 } 967 968 kfree_skb(buff); 969 } 970 971 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 972 enum sk_rst_reason reason) 973 { 974 const struct tcphdr *th = tcp_hdr(skb); 975 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 976 const __u8 *md5_hash_location = NULL; 977 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 978 bool allocated_traffic_key = false; 979 #endif 980 const struct tcp_ao_hdr *aoh; 981 struct tcp_key key = {}; 982 u32 seq = 0, ack_seq = 0; 983 __be32 label = 0; 984 u32 priority = 0; 985 struct net *net; 986 u32 txhash = 0; 987 int oif = 0; 988 #ifdef CONFIG_TCP_MD5SIG 989 unsigned char newhash[16]; 990 struct sock *sk1 = NULL; 991 #endif 992 993 if (th->rst) 994 return; 995 996 /* If sk not NULL, it means we did a successful lookup and incoming 997 * route had to be correct. prequeue might have dropped our dst. 998 */ 999 if (!sk && !ipv6_unicast_destination(skb)) 1000 return; 1001 1002 net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 1003 /* Invalid TCP option size or twice included auth */ 1004 if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) 1005 return; 1006 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1007 rcu_read_lock(); 1008 #endif 1009 #ifdef CONFIG_TCP_MD5SIG 1010 if (sk && sk_fullsock(sk)) { 1011 int l3index; 1012 1013 /* sdif set, means packet ingressed via a device 1014 * in an L3 domain and inet_iif is set to it. 1015 */ 1016 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1017 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1018 if (key.md5_key) 1019 key.type = TCP_KEY_MD5; 1020 } else if (md5_hash_location) { 1021 int dif = tcp_v6_iif_l3_slave(skb); 1022 int sdif = tcp_v6_sdif(skb); 1023 int l3index; 1024 1025 /* 1026 * active side is lost. Try to find listening socket through 1027 * source port, and then find md5 key through listening socket. 1028 * we are not loose security here: 1029 * Incoming packet is checked with md5 hash with finding key, 1030 * no RST generated if md5 hash doesn't match. 1031 */ 1032 sk1 = inet6_lookup_listener(net, NULL, 0, &ipv6h->saddr, th->source, 1033 &ipv6h->daddr, ntohs(th->source), 1034 dif, sdif); 1035 if (!sk1) 1036 goto out; 1037 1038 /* sdif set, means packet ingressed via a device 1039 * in an L3 domain and dif is set to it. 1040 */ 1041 l3index = tcp_v6_sdif(skb) ? dif : 0; 1042 1043 key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1044 if (!key.md5_key) 1045 goto out; 1046 key.type = TCP_KEY_MD5; 1047 1048 tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); 1049 if (crypto_memneq(md5_hash_location, newhash, 16)) 1050 goto out; 1051 } 1052 #endif 1053 1054 if (th->ack) 1055 seq = ntohl(th->ack_seq); 1056 else 1057 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1058 (th->doff << 2); 1059 1060 #ifdef CONFIG_TCP_AO 1061 if (aoh) { 1062 int l3index; 1063 1064 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1065 if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq, 1066 &key.ao_key, &key.traffic_key, 1067 &allocated_traffic_key, 1068 &key.rcv_next, &key.sne)) 1069 goto out; 1070 key.type = TCP_KEY_AO; 1071 } 1072 #endif 1073 1074 if (sk) { 1075 oif = sk->sk_bound_dev_if; 1076 if (sk_fullsock(sk)) { 1077 if (inet6_test_bit(REPFLOW, sk)) 1078 label = ip6_flowlabel(ipv6h); 1079 priority = READ_ONCE(sk->sk_priority); 1080 txhash = sk->sk_txhash; 1081 } 1082 if (sk->sk_state == TCP_TIME_WAIT) { 1083 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1084 priority = inet_twsk(sk)->tw_priority; 1085 txhash = inet_twsk(sk)->tw_txhash; 1086 } 1087 } else { 1088 if (READ_ONCE(net->ipv6.sysctl.flowlabel_reflect) & 1089 FLOWLABEL_REFLECT_TCP_RESET) 1090 label = ip6_flowlabel(ipv6h); 1091 } 1092 1093 trace_tcp_send_reset(sk, skb, reason); 1094 1095 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, 1096 ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK, 1097 label, priority, txhash, 1098 &key); 1099 1100 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1101 out: 1102 if (allocated_traffic_key) 1103 kfree(key.traffic_key); 1104 rcu_read_unlock(); 1105 #endif 1106 } 1107 1108 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1109 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1110 struct tcp_key *key, u8 tclass, 1111 __be32 label, u32 priority, u32 txhash) 1112 { 1113 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, 1114 tclass, label, priority, txhash, key); 1115 } 1116 1117 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb, 1118 enum tcp_tw_status tw_status) 1119 { 1120 struct inet_timewait_sock *tw = inet_twsk(sk); 1121 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1122 u8 tclass = tw->tw_tclass; 1123 struct tcp_key key = {}; 1124 1125 if (tw_status == TCP_TW_ACK_OOW) 1126 tclass &= ~INET_ECN_MASK; 1127 #ifdef CONFIG_TCP_AO 1128 struct tcp_ao_info *ao_info; 1129 1130 if (static_branch_unlikely(&tcp_ao_needed.key)) { 1131 1132 /* FIXME: the segment to-be-acked is not verified yet */ 1133 ao_info = rcu_dereference(tcptw->ao_info); 1134 if (ao_info) { 1135 const struct tcp_ao_hdr *aoh; 1136 1137 /* Invalid TCP option size or twice included auth */ 1138 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1139 goto out; 1140 if (aoh) 1141 key.ao_key = tcp_ao_established_key(sk, ao_info, 1142 aoh->rnext_keyid, -1); 1143 } 1144 } 1145 if (key.ao_key) { 1146 struct tcp_ao_key *rnext_key; 1147 1148 key.traffic_key = snd_other_key(key.ao_key); 1149 /* rcv_next switches to our rcv_next */ 1150 rnext_key = READ_ONCE(ao_info->rnext_key); 1151 key.rcv_next = rnext_key->rcvid; 1152 key.sne = READ_ONCE(ao_info->snd_sne); 1153 key.type = TCP_KEY_AO; 1154 #else 1155 if (0) { 1156 #endif 1157 #ifdef CONFIG_TCP_MD5SIG 1158 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1159 key.md5_key = tcp_twsk_md5_key(tcptw); 1160 if (key.md5_key) 1161 key.type = TCP_KEY_MD5; 1162 #endif 1163 } 1164 1165 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, 1166 READ_ONCE(tcptw->tw_rcv_nxt), 1167 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1168 tcp_tw_tsval(tcptw), 1169 READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, 1170 &key, tclass, cpu_to_be32(tw->tw_flowlabel), 1171 tw->tw_priority, tw->tw_txhash); 1172 1173 #ifdef CONFIG_TCP_AO 1174 out: 1175 #endif 1176 inet_twsk_put(tw); 1177 } 1178 1179 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1180 struct request_sock *req) 1181 { 1182 struct tcp_key key = {}; 1183 1184 #ifdef CONFIG_TCP_AO 1185 if (static_branch_unlikely(&tcp_ao_needed.key) && 1186 tcp_rsk_used_ao(req)) { 1187 const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; 1188 const struct tcp_ao_hdr *aoh; 1189 int l3index; 1190 1191 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1192 /* Invalid TCP option size or twice included auth */ 1193 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1194 return; 1195 if (!aoh) 1196 return; 1197 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1198 (union tcp_ao_addr *)addr, 1199 AF_INET6, aoh->rnext_keyid, -1); 1200 if (unlikely(!key.ao_key)) { 1201 /* Send ACK with any matching MKT for the peer */ 1202 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1203 (union tcp_ao_addr *)addr, 1204 AF_INET6, -1, -1); 1205 /* Matching key disappeared (user removed the key?) 1206 * let the handshake timeout. 1207 */ 1208 if (!key.ao_key) { 1209 net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n", 1210 addr, 1211 ntohs(tcp_hdr(skb)->source), 1212 &ipv6_hdr(skb)->daddr, 1213 ntohs(tcp_hdr(skb)->dest)); 1214 return; 1215 } 1216 } 1217 key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); 1218 if (!key.traffic_key) 1219 return; 1220 1221 key.type = TCP_KEY_AO; 1222 key.rcv_next = aoh->keyid; 1223 tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); 1224 #else 1225 if (0) { 1226 #endif 1227 #ifdef CONFIG_TCP_MD5SIG 1228 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1229 int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1230 1231 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, 1232 l3index); 1233 if (key.md5_key) 1234 key.type = TCP_KEY_MD5; 1235 #endif 1236 } 1237 1238 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1239 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1240 */ 1241 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1242 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1243 tcp_rsk(req)->rcv_nxt, 1244 tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, 1245 tcp_rsk_tsval(tcp_rsk(req)), 1246 req->ts_recent, sk->sk_bound_dev_if, 1247 &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK, 1248 0, 1249 READ_ONCE(sk->sk_priority), 1250 READ_ONCE(tcp_rsk(req)->txhash)); 1251 if (tcp_key_is_ao(&key)) 1252 kfree(key.traffic_key); 1253 } 1254 1255 1256 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1257 { 1258 #ifdef CONFIG_SYN_COOKIES 1259 const struct tcphdr *th = tcp_hdr(skb); 1260 1261 if (!th->syn) 1262 sk = cookie_v6_check(sk, skb); 1263 #endif 1264 return sk; 1265 } 1266 1267 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1268 struct tcphdr *th, u32 *cookie) 1269 { 1270 u16 mss = 0; 1271 #ifdef CONFIG_SYN_COOKIES 1272 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1273 &tcp_request_sock_ipv6_ops, sk, th); 1274 if (mss) { 1275 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1276 tcp_synq_overflow(sk); 1277 } 1278 #endif 1279 return mss; 1280 } 1281 1282 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1283 { 1284 if (skb->protocol == htons(ETH_P_IP)) 1285 return tcp_v4_conn_request(sk, skb); 1286 1287 if (!ipv6_unicast_destination(skb)) 1288 goto drop; 1289 1290 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1291 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1292 return 0; 1293 } 1294 1295 return tcp_conn_request(&tcp6_request_sock_ops, 1296 &tcp_request_sock_ipv6_ops, sk, skb); 1297 1298 drop: 1299 tcp_listendrop(sk); 1300 return 0; /* don't send reset */ 1301 } 1302 1303 static void tcp_v6_restore_cb(struct sk_buff *skb) 1304 { 1305 /* We need to move header back to the beginning if xfrm6_policy_check() 1306 * and tcp_v6_fill_cb() are going to be called again. 1307 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1308 */ 1309 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1310 sizeof(struct inet6_skb_parm)); 1311 } 1312 1313 /* Called from tcp_v4_syn_recv_sock() for v6_mapped children. */ 1314 static void tcp_v6_mapped_child_init(struct sock *newsk, const struct sock *sk) 1315 { 1316 struct inet_sock *newinet = inet_sk(newsk); 1317 struct ipv6_pinfo *newnp; 1318 1319 newinet->pinet6 = newnp = tcp_inet6_sk(newsk); 1320 newinet->ipv6_fl_list = NULL; 1321 1322 memcpy(newnp, tcp_inet6_sk(sk), sizeof(struct ipv6_pinfo)); 1323 1324 newnp->saddr = newsk->sk_v6_rcv_saddr; 1325 1326 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1327 if (sk_is_mptcp(newsk)) 1328 mptcpv6_handle_mapped(newsk, true); 1329 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1330 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1331 tcp_sk(newsk)->af_specific = &tcp_sock_ipv6_mapped_specific; 1332 #endif 1333 1334 newnp->ipv6_mc_list = NULL; 1335 newnp->ipv6_ac_list = NULL; 1336 newnp->pktoptions = NULL; 1337 newnp->opt = NULL; 1338 1339 /* tcp_v4_syn_recv_sock() has initialized newinet->mc_{index,ttl} */ 1340 newnp->mcast_oif = newinet->mc_index; 1341 newnp->mcast_hops = newinet->mc_ttl; 1342 1343 newnp->rcv_flowinfo = 0; 1344 if (inet6_test_bit(REPFLOW, sk)) 1345 newnp->flow_label = 0; 1346 } 1347 1348 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1349 struct request_sock *req, 1350 struct dst_entry *dst, 1351 struct request_sock *req_unhash, 1352 bool *own_req, 1353 void (*opt_child_init)(struct sock *newsk, 1354 const struct sock *sk)) 1355 { 1356 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1357 struct inet_request_sock *ireq; 1358 struct ipv6_txoptions *opt; 1359 struct inet_sock *newinet; 1360 bool found_dup_sk = false; 1361 struct ipv6_pinfo *newnp; 1362 struct tcp_sock *newtp; 1363 struct sock *newsk; 1364 #ifdef CONFIG_TCP_MD5SIG 1365 struct tcp_md5sig_key *key; 1366 int l3index; 1367 #endif 1368 struct flowi6 fl6; 1369 1370 if (skb->protocol == htons(ETH_P_IP)) 1371 return tcp_v4_syn_recv_sock(sk, skb, req, dst, 1372 req_unhash, own_req, 1373 tcp_v6_mapped_child_init); 1374 ireq = inet_rsk(req); 1375 1376 if (sk_acceptq_is_full(sk)) 1377 goto exit_overflow; 1378 1379 dst = inet6_csk_route_req(sk, dst, &fl6, req, IPPROTO_TCP); 1380 if (!dst) 1381 goto exit; 1382 1383 newsk = tcp_create_openreq_child(sk, req, skb); 1384 if (!newsk) 1385 goto exit_nonewsk; 1386 1387 /* 1388 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1389 * count here, tcp_create_openreq_child now does this for us, see the 1390 * comment in that function for the gory details. -acme 1391 */ 1392 1393 newsk->sk_gso_type = SKB_GSO_TCPV6; 1394 inet6_sk_rx_dst_set(newsk, skb); 1395 1396 newinet = inet_sk(newsk); 1397 newinet->cork.fl.u.ip6 = fl6; 1398 newinet->pinet6 = tcp_inet6_sk(newsk); 1399 newinet->ipv6_fl_list = NULL; 1400 newinet->inet_opt = NULL; 1401 1402 newtp = tcp_sk(newsk); 1403 newnp = tcp_inet6_sk(newsk); 1404 1405 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1406 1407 ip6_dst_store(newsk, dst, false, false); 1408 1409 newnp->saddr = ireq->ir_v6_loc_addr; 1410 1411 /* Now IPv6 options... 1412 1413 First: no IPv4 options. 1414 */ 1415 newnp->ipv6_mc_list = NULL; 1416 newnp->ipv6_ac_list = NULL; 1417 1418 /* Clone RX bits */ 1419 newnp->rxopt.all = np->rxopt.all; 1420 1421 newnp->pktoptions = NULL; 1422 newnp->opt = NULL; 1423 newnp->mcast_oif = tcp_v6_iif(skb); 1424 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1425 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1426 if (inet6_test_bit(REPFLOW, sk)) 1427 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1428 1429 /* Set ToS of the new socket based upon the value of incoming SYN. 1430 * ECT bits are set later in tcp_init_transfer(). 1431 */ 1432 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1433 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1434 1435 /* Clone native IPv6 options from listening socket (if any) 1436 1437 Yes, keeping reference count would be much more clever, 1438 but we make one more one thing there: reattach optmem 1439 to newsk. 1440 */ 1441 opt = ireq->ipv6_opt; 1442 if (!opt) 1443 opt = rcu_dereference(np->opt); 1444 if (opt) { 1445 opt = ipv6_dup_options(newsk, opt); 1446 RCU_INIT_POINTER(newnp->opt, opt); 1447 } 1448 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1449 if (opt) 1450 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1451 opt->opt_flen; 1452 1453 tcp_ca_openreq_child(newsk, dst); 1454 1455 tcp_sync_mss(newsk, dst6_mtu(dst)); 1456 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1457 1458 tcp_initialize_rcv_mss(newsk); 1459 1460 #ifdef CONFIG_TCP_MD5SIG 1461 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1462 1463 if (!tcp_rsk_used_ao(req)) { 1464 /* Copy over the MD5 key from the original socket */ 1465 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1466 if (key) { 1467 const union tcp_md5_addr *addr; 1468 1469 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; 1470 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) 1471 goto put_and_exit; 1472 } 1473 } 1474 #endif 1475 #ifdef CONFIG_TCP_AO 1476 /* Copy over tcp_ao_info if any */ 1477 if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) 1478 goto put_and_exit; /* OOM */ 1479 #endif 1480 1481 if (__inet_inherit_port(sk, newsk) < 0) 1482 goto put_and_exit; 1483 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1484 &found_dup_sk); 1485 if (*own_req) { 1486 tcp_move_syn(newtp, req); 1487 1488 /* Clone pktoptions received with SYN, if we own the req */ 1489 if (ireq->pktopts) { 1490 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); 1491 consume_skb(ireq->pktopts); 1492 ireq->pktopts = NULL; 1493 if (newnp->pktoptions) 1494 tcp_v6_restore_cb(newnp->pktoptions); 1495 } 1496 } else { 1497 if (!req_unhash && found_dup_sk) { 1498 /* This code path should only be executed in the 1499 * syncookie case only 1500 */ 1501 bh_unlock_sock(newsk); 1502 sock_put(newsk); 1503 newsk = NULL; 1504 } 1505 } 1506 1507 return newsk; 1508 1509 exit_overflow: 1510 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1511 exit_nonewsk: 1512 dst_release(dst); 1513 exit: 1514 tcp_listendrop(sk); 1515 return NULL; 1516 put_and_exit: 1517 inet_csk_prepare_forced_close(newsk); 1518 tcp_done(newsk); 1519 goto exit; 1520 } 1521 1522 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1523 u32)); 1524 /* The socket must have it's spinlock held when we get 1525 * here, unless it is a TCP_LISTEN socket. 1526 * 1527 * We have a potential double-lock case here, so even when 1528 * doing backlog processing we use the BH locking scheme. 1529 * This is because we cannot sleep with the original spinlock 1530 * held. 1531 */ 1532 INDIRECT_CALLABLE_SCOPE 1533 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1534 { 1535 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1536 struct sk_buff *opt_skb = NULL; 1537 enum skb_drop_reason reason; 1538 struct tcp_sock *tp; 1539 1540 /* Imagine: socket is IPv6. IPv4 packet arrives, 1541 goes to IPv4 receive handler and backlogged. 1542 From backlog it always goes here. Kerboom... 1543 Fortunately, tcp_rcv_established and rcv_established 1544 handle them correctly, but it is not case with 1545 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1546 */ 1547 1548 if (skb->protocol == htons(ETH_P_IP)) 1549 return tcp_v4_do_rcv(sk, skb); 1550 1551 reason = psp_sk_rx_policy_check(sk, skb); 1552 if (reason) 1553 goto err_discard; 1554 1555 /* 1556 * socket locking is here for SMP purposes as backlog rcv 1557 * is currently called with bh processing disabled. 1558 */ 1559 1560 /* Do Stevens' IPV6_PKTOPTIONS. 1561 1562 Yes, guys, it is the only place in our code, where we 1563 may make it not affecting IPv4. 1564 The rest of code is protocol independent, 1565 and I do not like idea to uglify IPv4. 1566 1567 Actually, all the idea behind IPV6_PKTOPTIONS 1568 looks not very well thought. For now we latch 1569 options, received in the last packet, enqueued 1570 by tcp. Feel free to propose better solution. 1571 --ANK (980728) 1572 */ 1573 if (np->rxopt.all && sk->sk_state != TCP_LISTEN) 1574 opt_skb = skb_clone_and_charge_r(skb, sk); 1575 1576 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1577 struct dst_entry *dst; 1578 1579 dst = rcu_dereference_protected(sk->sk_rx_dst, 1580 lockdep_sock_is_held(sk)); 1581 1582 sock_rps_save_rxhash(sk, skb); 1583 sk_mark_napi_id(sk, skb); 1584 if (dst) { 1585 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1586 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1587 dst, sk->sk_rx_dst_cookie) == NULL) { 1588 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1589 dst_release(dst); 1590 } 1591 } 1592 1593 tcp_rcv_established(sk, skb); 1594 if (opt_skb) 1595 goto ipv6_pktoptions; 1596 return 0; 1597 } 1598 1599 if (tcp_checksum_complete(skb)) 1600 goto csum_err; 1601 1602 if (sk->sk_state == TCP_LISTEN) { 1603 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1604 1605 if (nsk != sk) { 1606 if (nsk) { 1607 reason = tcp_child_process(sk, nsk, skb); 1608 if (reason) 1609 goto reset; 1610 } 1611 return 0; 1612 } 1613 } else 1614 sock_rps_save_rxhash(sk, skb); 1615 1616 reason = tcp_rcv_state_process(sk, skb); 1617 if (reason) 1618 goto reset; 1619 if (opt_skb) 1620 goto ipv6_pktoptions; 1621 return 0; 1622 1623 reset: 1624 tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); 1625 discard: 1626 if (opt_skb) 1627 __kfree_skb(opt_skb); 1628 sk_skb_reason_drop(sk, skb, reason); 1629 return 0; 1630 csum_err: 1631 reason = SKB_DROP_REASON_TCP_CSUM; 1632 trace_tcp_bad_csum(skb); 1633 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1634 err_discard: 1635 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1636 goto discard; 1637 1638 1639 ipv6_pktoptions: 1640 /* Do you ask, what is it? 1641 1642 1. skb was enqueued by tcp. 1643 2. skb is added to tail of read queue, rather than out of order. 1644 3. socket is not in passive state. 1645 4. Finally, it really contains options, which user wants to receive. 1646 */ 1647 tp = tcp_sk(sk); 1648 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1649 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1650 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1651 WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb)); 1652 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1653 WRITE_ONCE(np->mcast_hops, 1654 ipv6_hdr(opt_skb)->hop_limit); 1655 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1656 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1657 if (inet6_test_bit(REPFLOW, sk)) 1658 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1659 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1660 tcp_v6_restore_cb(opt_skb); 1661 opt_skb = xchg(&np->pktoptions, opt_skb); 1662 } else { 1663 __kfree_skb(opt_skb); 1664 opt_skb = xchg(&np->pktoptions, NULL); 1665 } 1666 } 1667 1668 consume_skb(opt_skb); 1669 return 0; 1670 } 1671 1672 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1673 const struct tcphdr *th) 1674 { 1675 /* This is tricky: we move IP6CB at its correct location into 1676 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1677 * _decode_session6() uses IP6CB(). 1678 * barrier() makes sure compiler won't play aliasing games. 1679 */ 1680 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1681 sizeof(struct inet6_skb_parm)); 1682 barrier(); 1683 1684 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1685 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1686 skb->len - th->doff*4); 1687 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1688 TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th); 1689 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1690 TCP_SKB_CB(skb)->sacked = 0; 1691 TCP_SKB_CB(skb)->has_rxtstamp = 1692 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1693 } 1694 1695 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1696 { 1697 struct net *net = dev_net_rcu(skb->dev); 1698 enum skb_drop_reason drop_reason; 1699 enum tcp_tw_status tw_status; 1700 int sdif = inet6_sdif(skb); 1701 int dif = inet6_iif(skb); 1702 const struct tcphdr *th; 1703 const struct ipv6hdr *hdr; 1704 struct sock *sk = NULL; 1705 bool refcounted; 1706 int ret; 1707 u32 isn; 1708 1709 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1710 if (skb->pkt_type != PACKET_HOST) 1711 goto discard_it; 1712 1713 /* 1714 * Count it even if it's bad. 1715 */ 1716 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1717 1718 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1719 goto discard_it; 1720 1721 th = (const struct tcphdr *)skb->data; 1722 1723 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1724 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1725 goto bad_packet; 1726 } 1727 if (!pskb_may_pull(skb, th->doff*4)) 1728 goto discard_it; 1729 1730 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1731 goto csum_error; 1732 1733 th = (const struct tcphdr *)skb->data; 1734 hdr = ipv6_hdr(skb); 1735 1736 lookup: 1737 sk = __inet6_lookup_skb(skb, __tcp_hdrlen(th), 1738 th->source, th->dest, inet6_iif(skb), sdif, 1739 &refcounted); 1740 if (!sk) 1741 goto no_tcp_socket; 1742 1743 if (sk->sk_state == TCP_TIME_WAIT) 1744 goto do_time_wait; 1745 1746 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1747 struct request_sock *req = inet_reqsk(sk); 1748 bool req_stolen = false; 1749 struct sock *nsk; 1750 1751 sk = req->rsk_listener; 1752 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1753 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1754 else 1755 drop_reason = tcp_inbound_hash(sk, req, skb, 1756 &hdr->saddr, &hdr->daddr, 1757 AF_INET6, dif, sdif); 1758 if (drop_reason) { 1759 sk_drops_skbadd(sk, skb); 1760 reqsk_put(req); 1761 goto discard_it; 1762 } 1763 if (tcp_checksum_complete(skb)) { 1764 reqsk_put(req); 1765 goto csum_error; 1766 } 1767 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1768 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1769 if (!nsk) { 1770 inet_csk_reqsk_queue_drop_and_put(sk, req); 1771 goto lookup; 1772 } 1773 sk = nsk; 1774 /* reuseport_migrate_sock() has already held one sk_refcnt 1775 * before returning. 1776 */ 1777 } else { 1778 sock_hold(sk); 1779 } 1780 refcounted = true; 1781 nsk = NULL; 1782 if (!tcp_filter(sk, skb, &drop_reason)) { 1783 th = (const struct tcphdr *)skb->data; 1784 hdr = ipv6_hdr(skb); 1785 tcp_v6_fill_cb(skb, hdr, th); 1786 nsk = tcp_check_req(sk, skb, req, false, &req_stolen, 1787 &drop_reason); 1788 } 1789 if (!nsk) { 1790 reqsk_put(req); 1791 if (req_stolen) { 1792 /* Another cpu got exclusive access to req 1793 * and created a full blown socket. 1794 * Try to feed this packet to this socket 1795 * instead of discarding it. 1796 */ 1797 tcp_v6_restore_cb(skb); 1798 sock_put(sk); 1799 goto lookup; 1800 } 1801 goto discard_and_relse; 1802 } 1803 nf_reset_ct(skb); 1804 if (nsk == sk) { 1805 reqsk_put(req); 1806 tcp_v6_restore_cb(skb); 1807 } else { 1808 drop_reason = tcp_child_process(sk, nsk, skb); 1809 if (drop_reason) { 1810 enum sk_rst_reason rst_reason; 1811 1812 rst_reason = sk_rst_convert_drop_reason(drop_reason); 1813 tcp_v6_send_reset(nsk, skb, rst_reason); 1814 goto discard_and_relse; 1815 } 1816 sock_put(sk); 1817 return 0; 1818 } 1819 } 1820 1821 process: 1822 if (static_branch_unlikely(&ip6_min_hopcount)) { 1823 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1824 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) { 1825 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1826 drop_reason = SKB_DROP_REASON_TCP_MINTTL; 1827 goto discard_and_relse; 1828 } 1829 } 1830 1831 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1832 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1833 goto discard_and_relse; 1834 } 1835 1836 drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr, 1837 AF_INET6, dif, sdif); 1838 if (drop_reason) 1839 goto discard_and_relse; 1840 1841 nf_reset_ct(skb); 1842 1843 if (tcp_filter(sk, skb, &drop_reason)) 1844 goto discard_and_relse; 1845 1846 th = (const struct tcphdr *)skb->data; 1847 hdr = ipv6_hdr(skb); 1848 tcp_v6_fill_cb(skb, hdr, th); 1849 1850 skb->dev = NULL; 1851 1852 if (sk->sk_state == TCP_LISTEN) { 1853 ret = tcp_v6_do_rcv(sk, skb); 1854 goto put_and_return; 1855 } 1856 1857 sk_incoming_cpu_update(sk); 1858 1859 bh_lock_sock_nested(sk); 1860 tcp_segs_in(tcp_sk(sk), skb); 1861 ret = 0; 1862 if (!sock_owned_by_user(sk)) { 1863 ret = tcp_v6_do_rcv(sk, skb); 1864 } else { 1865 if (tcp_add_backlog(sk, skb, &drop_reason)) 1866 goto discard_and_relse; 1867 } 1868 bh_unlock_sock(sk); 1869 put_and_return: 1870 if (refcounted) 1871 sock_put(sk); 1872 return ret ? -1 : 0; 1873 1874 no_tcp_socket: 1875 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1876 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1877 goto discard_it; 1878 1879 tcp_v6_fill_cb(skb, hdr, th); 1880 1881 if (tcp_checksum_complete(skb)) { 1882 csum_error: 1883 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1884 trace_tcp_bad_csum(skb); 1885 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1886 bad_packet: 1887 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1888 } else { 1889 tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason)); 1890 } 1891 1892 discard_it: 1893 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1894 sk_skb_reason_drop(sk, skb, drop_reason); 1895 return 0; 1896 1897 discard_and_relse: 1898 sk_drops_skbadd(sk, skb); 1899 if (refcounted) 1900 sock_put(sk); 1901 goto discard_it; 1902 1903 do_time_wait: 1904 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1905 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1906 inet_twsk_put(inet_twsk(sk)); 1907 goto discard_it; 1908 } 1909 1910 tcp_v6_fill_cb(skb, hdr, th); 1911 1912 if (tcp_checksum_complete(skb)) { 1913 inet_twsk_put(inet_twsk(sk)); 1914 goto csum_error; 1915 } 1916 1917 tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn, 1918 &drop_reason); 1919 switch (tw_status) { 1920 case TCP_TW_SYN: 1921 { 1922 struct sock *sk2; 1923 1924 sk2 = inet6_lookup_listener(net, skb, __tcp_hdrlen(th), 1925 &ipv6_hdr(skb)->saddr, th->source, 1926 &ipv6_hdr(skb)->daddr, 1927 ntohs(th->dest), 1928 tcp_v6_iif_l3_slave(skb), 1929 sdif); 1930 if (sk2) { 1931 struct inet_timewait_sock *tw = inet_twsk(sk); 1932 inet_twsk_deschedule_put(tw); 1933 sk = sk2; 1934 tcp_v6_restore_cb(skb); 1935 refcounted = false; 1936 __this_cpu_write(tcp_tw_isn, isn); 1937 goto process; 1938 } 1939 1940 drop_reason = psp_twsk_rx_policy_check(inet_twsk(sk), skb); 1941 if (drop_reason) 1942 break; 1943 } 1944 /* to ACK */ 1945 fallthrough; 1946 case TCP_TW_ACK: 1947 case TCP_TW_ACK_OOW: 1948 tcp_v6_timewait_ack(sk, skb, tw_status); 1949 break; 1950 case TCP_TW_RST: 1951 tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); 1952 inet_twsk_deschedule_put(inet_twsk(sk)); 1953 goto discard_it; 1954 case TCP_TW_SUCCESS: 1955 ; 1956 } 1957 goto discard_it; 1958 } 1959 1960 void tcp_v6_early_demux(struct sk_buff *skb) 1961 { 1962 struct net *net = dev_net_rcu(skb->dev); 1963 const struct ipv6hdr *hdr; 1964 const struct tcphdr *th; 1965 struct sock *sk; 1966 1967 if (skb->pkt_type != PACKET_HOST) 1968 return; 1969 1970 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1971 return; 1972 1973 hdr = ipv6_hdr(skb); 1974 th = tcp_hdr(skb); 1975 1976 if (th->doff < sizeof(struct tcphdr) / 4) 1977 return; 1978 1979 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1980 sk = __inet6_lookup_established(net, &hdr->saddr, th->source, 1981 &hdr->daddr, ntohs(th->dest), 1982 inet6_iif(skb), inet6_sdif(skb)); 1983 if (sk) { 1984 skb->sk = sk; 1985 skb->destructor = sock_edemux; 1986 if (sk_fullsock(sk)) { 1987 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 1988 1989 if (dst) 1990 dst = dst_check(dst, sk->sk_rx_dst_cookie); 1991 if (dst && 1992 sk->sk_rx_dst_ifindex == skb->skb_iif) 1993 skb_dst_set_noref(skb, dst); 1994 } 1995 } 1996 } 1997 1998 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 1999 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 2000 }; 2001 2002 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 2003 { 2004 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); 2005 } 2006 2007 const struct inet_connection_sock_af_ops ipv6_specific = { 2008 .queue_xmit = inet6_csk_xmit, 2009 .send_check = tcp_v6_send_check, 2010 .rebuild_header = inet6_sk_rebuild_header, 2011 .sk_rx_dst_set = inet6_sk_rx_dst_set, 2012 .conn_request = tcp_v6_conn_request, 2013 .syn_recv_sock = tcp_v6_syn_recv_sock, 2014 .net_header_len = sizeof(struct ipv6hdr), 2015 .setsockopt = ipv6_setsockopt, 2016 .getsockopt = ipv6_getsockopt, 2017 .mtu_reduced = tcp_v6_mtu_reduced, 2018 }; 2019 2020 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2021 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 2022 #ifdef CONFIG_TCP_MD5SIG 2023 .md5_lookup = tcp_v6_md5_lookup, 2024 .calc_md5_hash = tcp_v6_md5_hash_skb, 2025 .md5_parse = tcp_v6_parse_md5_keys, 2026 #endif 2027 #ifdef CONFIG_TCP_AO 2028 .ao_lookup = tcp_v6_ao_lookup, 2029 .calc_ao_hash = tcp_v6_ao_hash_skb, 2030 .ao_parse = tcp_v6_parse_ao, 2031 .ao_calc_key_sk = tcp_v6_ao_calc_key_sk, 2032 #endif 2033 }; 2034 #endif 2035 2036 /* 2037 * TCP over IPv4 via INET6 API 2038 */ 2039 static const struct inet_connection_sock_af_ops ipv6_mapped = { 2040 .queue_xmit = ip_queue_xmit, 2041 .send_check = tcp_v4_send_check, 2042 .rebuild_header = inet_sk_rebuild_header, 2043 .sk_rx_dst_set = inet_sk_rx_dst_set, 2044 .conn_request = tcp_v6_conn_request, 2045 .syn_recv_sock = tcp_v6_syn_recv_sock, 2046 .net_header_len = sizeof(struct iphdr), 2047 .setsockopt = ipv6_setsockopt, 2048 .getsockopt = ipv6_getsockopt, 2049 .mtu_reduced = tcp_v4_mtu_reduced, 2050 }; 2051 2052 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2053 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 2054 #ifdef CONFIG_TCP_MD5SIG 2055 .md5_lookup = tcp_v4_md5_lookup, 2056 .calc_md5_hash = tcp_v4_md5_hash_skb, 2057 .md5_parse = tcp_v6_parse_md5_keys, 2058 #endif 2059 #ifdef CONFIG_TCP_AO 2060 .ao_lookup = tcp_v6_ao_lookup, 2061 .calc_ao_hash = tcp_v4_ao_hash_skb, 2062 .ao_parse = tcp_v6_parse_ao, 2063 .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, 2064 #endif 2065 }; 2066 2067 static void tcp6_destruct_sock(struct sock *sk) 2068 { 2069 tcp_md5_destruct_sock(sk); 2070 tcp_ao_destroy_sock(sk, false); 2071 inet6_sock_destruct(sk); 2072 } 2073 #endif 2074 2075 /* NOTE: A lot of things set to zero explicitly by call to 2076 * sk_alloc() so need not be done here. 2077 */ 2078 static int tcp_v6_init_sock(struct sock *sk) 2079 { 2080 struct inet_connection_sock *icsk = inet_csk(sk); 2081 2082 tcp_init_sock(sk); 2083 2084 icsk->icsk_af_ops = &ipv6_specific; 2085 2086 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2087 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 2088 sk->sk_destruct = tcp6_destruct_sock; 2089 #endif 2090 2091 return 0; 2092 } 2093 2094 #ifdef CONFIG_PROC_FS 2095 /* Proc filesystem TCPv6 sock list dumping. */ 2096 static void get_openreq6(struct seq_file *seq, 2097 const struct request_sock *req, int i) 2098 { 2099 long ttd = req->rsk_timer.expires - jiffies; 2100 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 2101 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 2102 2103 if (ttd < 0) 2104 ttd = 0; 2105 2106 seq_printf(seq, 2107 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2108 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 2109 i, 2110 src->s6_addr32[0], src->s6_addr32[1], 2111 src->s6_addr32[2], src->s6_addr32[3], 2112 inet_rsk(req)->ir_num, 2113 dest->s6_addr32[0], dest->s6_addr32[1], 2114 dest->s6_addr32[2], dest->s6_addr32[3], 2115 ntohs(inet_rsk(req)->ir_rmt_port), 2116 TCP_SYN_RECV, 2117 0, 0, /* could print option size, but that is af dependent. */ 2118 1, /* timers active (only the expire timer) */ 2119 jiffies_to_clock_t(ttd), 2120 req->num_timeout, 2121 from_kuid_munged(seq_user_ns(seq), 2122 sk_uid(req->rsk_listener)), 2123 0, /* non standard timer */ 2124 0, /* open_requests have no inode */ 2125 0, req); 2126 } 2127 2128 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2129 { 2130 const struct in6_addr *dest, *src; 2131 __u16 destp, srcp; 2132 int timer_active; 2133 unsigned long timer_expires; 2134 const struct inet_sock *inet = inet_sk(sp); 2135 const struct tcp_sock *tp = tcp_sk(sp); 2136 const struct inet_connection_sock *icsk = inet_csk(sp); 2137 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2138 u8 icsk_pending; 2139 int rx_queue; 2140 int state; 2141 2142 dest = &sp->sk_v6_daddr; 2143 src = &sp->sk_v6_rcv_saddr; 2144 destp = ntohs(inet->inet_dport); 2145 srcp = ntohs(inet->inet_sport); 2146 2147 icsk_pending = smp_load_acquire(&icsk->icsk_pending); 2148 if (icsk_pending == ICSK_TIME_RETRANS || 2149 icsk_pending == ICSK_TIME_REO_TIMEOUT || 2150 icsk_pending == ICSK_TIME_LOSS_PROBE) { 2151 timer_active = 1; 2152 timer_expires = tcp_timeout_expires(sp); 2153 } else if (icsk_pending == ICSK_TIME_PROBE0) { 2154 timer_active = 4; 2155 timer_expires = tcp_timeout_expires(sp); 2156 } else if (timer_pending(&icsk->icsk_keepalive_timer)) { 2157 timer_active = 2; 2158 timer_expires = icsk->icsk_keepalive_timer.expires; 2159 } else { 2160 timer_active = 0; 2161 timer_expires = jiffies; 2162 } 2163 2164 state = inet_sk_state_load(sp); 2165 if (state == TCP_LISTEN) 2166 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2167 else 2168 /* Because we don't lock the socket, 2169 * we might find a transient negative value. 2170 */ 2171 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2172 READ_ONCE(tp->copied_seq), 0); 2173 2174 seq_printf(seq, 2175 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2176 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2177 i, 2178 src->s6_addr32[0], src->s6_addr32[1], 2179 src->s6_addr32[2], src->s6_addr32[3], srcp, 2180 dest->s6_addr32[0], dest->s6_addr32[1], 2181 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2182 state, 2183 READ_ONCE(tp->write_seq) - tp->snd_una, 2184 rx_queue, 2185 timer_active, 2186 jiffies_delta_to_clock_t(timer_expires - jiffies), 2187 READ_ONCE(icsk->icsk_retransmits), 2188 from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), 2189 READ_ONCE(icsk->icsk_probes_out), 2190 sock_i_ino(sp), 2191 refcount_read(&sp->sk_refcnt), sp, 2192 jiffies_to_clock_t(icsk->icsk_rto), 2193 jiffies_to_clock_t(icsk->icsk_ack.ato), 2194 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2195 tcp_snd_cwnd(tp), 2196 state == TCP_LISTEN ? 2197 fastopenq->max_qlen : 2198 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2199 ); 2200 } 2201 2202 static void get_timewait6_sock(struct seq_file *seq, 2203 struct inet_timewait_sock *tw, int i) 2204 { 2205 long delta = tw->tw_timer.expires - jiffies; 2206 const struct in6_addr *dest, *src; 2207 __u16 destp, srcp; 2208 2209 dest = &tw->tw_v6_daddr; 2210 src = &tw->tw_v6_rcv_saddr; 2211 destp = ntohs(tw->tw_dport); 2212 srcp = ntohs(tw->tw_sport); 2213 2214 seq_printf(seq, 2215 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2216 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2217 i, 2218 src->s6_addr32[0], src->s6_addr32[1], 2219 src->s6_addr32[2], src->s6_addr32[3], srcp, 2220 dest->s6_addr32[0], dest->s6_addr32[1], 2221 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2222 READ_ONCE(tw->tw_substate), 0, 0, 2223 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2224 refcount_read(&tw->tw_refcnt), tw); 2225 } 2226 2227 static int tcp6_seq_show(struct seq_file *seq, void *v) 2228 { 2229 struct tcp_iter_state *st; 2230 struct sock *sk = v; 2231 2232 if (v == SEQ_START_TOKEN) { 2233 seq_puts(seq, 2234 " sl " 2235 "local_address " 2236 "remote_address " 2237 "st tx_queue rx_queue tr tm->when retrnsmt" 2238 " uid timeout inode\n"); 2239 goto out; 2240 } 2241 st = seq->private; 2242 2243 if (sk->sk_state == TCP_TIME_WAIT) 2244 get_timewait6_sock(seq, v, st->num); 2245 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2246 get_openreq6(seq, v, st->num); 2247 else 2248 get_tcp6_sock(seq, v, st->num); 2249 out: 2250 return 0; 2251 } 2252 2253 static const struct seq_operations tcp6_seq_ops = { 2254 .show = tcp6_seq_show, 2255 .start = tcp_seq_start, 2256 .next = tcp_seq_next, 2257 .stop = tcp_seq_stop, 2258 }; 2259 2260 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2261 .family = AF_INET6, 2262 }; 2263 2264 int __net_init tcp6_proc_init(struct net *net) 2265 { 2266 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2267 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2268 return -ENOMEM; 2269 return 0; 2270 } 2271 2272 void tcp6_proc_exit(struct net *net) 2273 { 2274 remove_proc_entry("tcp6", net->proc_net); 2275 } 2276 #endif 2277 2278 struct proto tcpv6_prot = { 2279 .name = "TCPv6", 2280 .owner = THIS_MODULE, 2281 .close = tcp_close, 2282 .pre_connect = tcp_v6_pre_connect, 2283 .connect = tcp_v6_connect, 2284 .disconnect = tcp_disconnect, 2285 .accept = inet_csk_accept, 2286 .ioctl = tcp_ioctl, 2287 .init = tcp_v6_init_sock, 2288 .destroy = tcp_v4_destroy_sock, 2289 .shutdown = tcp_shutdown, 2290 .setsockopt = tcp_setsockopt, 2291 .getsockopt = tcp_getsockopt, 2292 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2293 .keepalive = tcp_set_keepalive, 2294 .recvmsg = tcp_recvmsg, 2295 .sendmsg = tcp_sendmsg, 2296 .splice_eof = tcp_splice_eof, 2297 .backlog_rcv = tcp_v6_do_rcv, 2298 .release_cb = tcp_release_cb, 2299 .hash = inet_hash, 2300 .unhash = inet_unhash, 2301 .get_port = inet_csk_get_port, 2302 .put_port = inet_put_port, 2303 #ifdef CONFIG_BPF_SYSCALL 2304 .psock_update_sk_prot = tcp_bpf_update_proto, 2305 #endif 2306 .enter_memory_pressure = tcp_enter_memory_pressure, 2307 .leave_memory_pressure = tcp_leave_memory_pressure, 2308 .stream_memory_free = tcp_stream_memory_free, 2309 .sockets_allocated = &tcp_sockets_allocated, 2310 2311 .memory_allocated = &net_aligned_data.tcp_memory_allocated, 2312 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 2313 2314 .memory_pressure = &tcp_memory_pressure, 2315 .sysctl_mem = sysctl_tcp_mem, 2316 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2317 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2318 .max_header = MAX_TCP_HEADER, 2319 .obj_size = sizeof(struct tcp6_sock), 2320 .freeptr_offset = offsetof(struct tcp6_sock, 2321 tcp.inet_conn.icsk_inet.sk.sk_freeptr), 2322 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6), 2323 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2324 .twsk_prot = &tcp6_timewait_sock_ops, 2325 .rsk_prot = &tcp6_request_sock_ops, 2326 .h.hashinfo = NULL, 2327 .no_autobind = true, 2328 .diag_destroy = tcp_abort, 2329 }; 2330 EXPORT_SYMBOL_GPL(tcpv6_prot); 2331 2332 2333 static struct inet_protosw tcpv6_protosw = { 2334 .type = SOCK_STREAM, 2335 .protocol = IPPROTO_TCP, 2336 .prot = &tcpv6_prot, 2337 .ops = &inet6_stream_ops, 2338 .flags = INET_PROTOSW_PERMANENT | 2339 INET_PROTOSW_ICSK, 2340 }; 2341 2342 static int __net_init tcpv6_net_init(struct net *net) 2343 { 2344 int res; 2345 2346 res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2347 SOCK_RAW, IPPROTO_TCP, net); 2348 if (!res) 2349 net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC; 2350 2351 return res; 2352 } 2353 2354 static void __net_exit tcpv6_net_exit(struct net *net) 2355 { 2356 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2357 } 2358 2359 static struct pernet_operations tcpv6_net_ops = { 2360 .init = tcpv6_net_init, 2361 .exit = tcpv6_net_exit, 2362 }; 2363 2364 int __init tcpv6_init(void) 2365 { 2366 int ret; 2367 2368 net_hotdata.tcpv6_protocol = (struct inet6_protocol) { 2369 .handler = tcp_v6_rcv, 2370 .err_handler = tcp_v6_err, 2371 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, 2372 }; 2373 ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2374 if (ret) 2375 goto out; 2376 2377 /* register inet6 protocol */ 2378 ret = inet6_register_protosw(&tcpv6_protosw); 2379 if (ret) 2380 goto out_tcpv6_protocol; 2381 2382 ret = register_pernet_subsys(&tcpv6_net_ops); 2383 if (ret) 2384 goto out_tcpv6_protosw; 2385 2386 ret = mptcpv6_init(); 2387 if (ret) 2388 goto out_tcpv6_pernet_subsys; 2389 2390 out: 2391 return ret; 2392 2393 out_tcpv6_pernet_subsys: 2394 unregister_pernet_subsys(&tcpv6_net_ops); 2395 out_tcpv6_protosw: 2396 inet6_unregister_protosw(&tcpv6_protosw); 2397 out_tcpv6_protocol: 2398 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2399 goto out; 2400 } 2401 2402 void tcpv6_exit(void) 2403 { 2404 unregister_pernet_subsys(&tcpv6_net_ops); 2405 inet6_unregister_protosw(&tcpv6_protosw); 2406 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2407 } 2408