1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/aligned_data.h> 45 #include <net/tcp.h> 46 #include <net/ndisc.h> 47 #include <net/inet6_hashtables.h> 48 #include <net/inet6_connection_sock.h> 49 #include <net/ipv6.h> 50 #include <net/transp_v6.h> 51 #include <net/addrconf.h> 52 #include <net/ip6_route.h> 53 #include <net/ip6_checksum.h> 54 #include <net/inet_ecn.h> 55 #include <net/protocol.h> 56 #include <net/xfrm.h> 57 #include <net/snmp.h> 58 #include <net/dsfield.h> 59 #include <net/timewait_sock.h> 60 #include <net/inet_common.h> 61 #include <net/secure_seq.h> 62 #include <net/hotdata.h> 63 #include <net/busy_poll.h> 64 #include <net/rstreason.h> 65 #include <net/psp.h> 66 67 #include <linux/proc_fs.h> 68 #include <linux/seq_file.h> 69 70 #include <crypto/md5.h> 71 72 #include <trace/events/tcp.h> 73 74 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 75 enum sk_rst_reason reason); 76 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 77 struct request_sock *req); 78 79 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 80 81 static const struct inet_connection_sock_af_ops ipv6_mapped; 82 const struct inet_connection_sock_af_ops ipv6_specific; 83 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 84 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 85 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 86 #endif 87 88 /* Helper returning the inet6 address from a given tcp socket. 89 * It can be used in TCP stack instead of inet6_sk(sk). 90 * This avoids a dereference and allow compiler optimizations. 91 * It is a specialized version of inet6_sk_generic(). 92 */ 93 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \ 94 struct tcp6_sock, tcp)->inet6) 95 96 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 97 { 98 struct dst_entry *dst = skb_dst(skb); 99 100 if (dst && dst_hold_safe(dst)) { 101 rcu_assign_pointer(sk->sk_rx_dst, dst); 102 sk->sk_rx_dst_ifindex = skb->skb_iif; 103 sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); 104 } 105 } 106 107 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 108 { 109 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 110 ipv6_hdr(skb)->saddr.s6_addr32, 111 tcp_hdr(skb)->dest, 112 tcp_hdr(skb)->source); 113 } 114 115 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 116 { 117 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 118 ipv6_hdr(skb)->saddr.s6_addr32); 119 } 120 121 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, 122 int addr_len) 123 { 124 /* This check is replicated from tcp_v6_connect() and intended to 125 * prevent BPF program called below from accessing bytes that are out 126 * of the bound specified by user in addr_len. 127 */ 128 if (addr_len < SIN6_LEN_RFC2133) 129 return -EINVAL; 130 131 sock_owned_by_me(sk); 132 133 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); 134 } 135 136 static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, 137 int addr_len) 138 { 139 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 140 struct inet_connection_sock *icsk = inet_csk(sk); 141 struct in6_addr *saddr = NULL, *final_p, final; 142 struct inet_timewait_death_row *tcp_death_row; 143 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 144 struct inet_sock *inet = inet_sk(sk); 145 struct tcp_sock *tp = tcp_sk(sk); 146 struct net *net = sock_net(sk); 147 struct ipv6_txoptions *opt; 148 struct dst_entry *dst; 149 struct flowi6 fl6; 150 int addr_type; 151 int err; 152 153 if (addr_len < SIN6_LEN_RFC2133) 154 return -EINVAL; 155 156 if (usin->sin6_family != AF_INET6) 157 return -EAFNOSUPPORT; 158 159 memset(&fl6, 0, sizeof(fl6)); 160 161 if (inet6_test_bit(SNDFLOW, sk)) { 162 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 163 IP6_ECN_flow_init(fl6.flowlabel); 164 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 165 struct ip6_flowlabel *flowlabel; 166 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 167 if (IS_ERR(flowlabel)) 168 return -EINVAL; 169 fl6_sock_release(flowlabel); 170 } 171 } 172 173 /* 174 * connect() to INADDR_ANY means loopback (BSD'ism). 175 */ 176 177 if (ipv6_addr_any(&usin->sin6_addr)) { 178 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 179 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 180 &usin->sin6_addr); 181 else 182 usin->sin6_addr = in6addr_loopback; 183 } 184 185 addr_type = ipv6_addr_type(&usin->sin6_addr); 186 187 if (addr_type & IPV6_ADDR_MULTICAST) 188 return -ENETUNREACH; 189 190 if (addr_type&IPV6_ADDR_LINKLOCAL) { 191 if (addr_len >= sizeof(struct sockaddr_in6) && 192 usin->sin6_scope_id) { 193 /* If interface is set while binding, indices 194 * must coincide. 195 */ 196 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 197 return -EINVAL; 198 199 sk->sk_bound_dev_if = usin->sin6_scope_id; 200 } 201 202 /* Connect to link-local address requires an interface */ 203 if (!sk->sk_bound_dev_if) 204 return -EINVAL; 205 } 206 207 if (tp->rx_opt.ts_recent_stamp && 208 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 209 tp->rx_opt.ts_recent = 0; 210 tp->rx_opt.ts_recent_stamp = 0; 211 WRITE_ONCE(tp->write_seq, 0); 212 } 213 214 sk->sk_v6_daddr = usin->sin6_addr; 215 np->flow_label = fl6.flowlabel; 216 217 /* 218 * TCP over IPv4 219 */ 220 221 if (addr_type & IPV6_ADDR_MAPPED) { 222 u32 exthdrlen = icsk->icsk_ext_hdr_len; 223 struct sockaddr_in sin; 224 225 if (ipv6_only_sock(sk)) 226 return -ENETUNREACH; 227 228 sin.sin_family = AF_INET; 229 sin.sin_port = usin->sin6_port; 230 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 231 232 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 233 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); 234 if (sk_is_mptcp(sk)) 235 mptcpv6_handle_mapped(sk, true); 236 sk->sk_backlog_rcv = tcp_v4_do_rcv; 237 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 238 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 239 #endif 240 241 err = tcp_v4_connect(sk, (struct sockaddr_unsized *)&sin, sizeof(sin)); 242 243 if (err) { 244 icsk->icsk_ext_hdr_len = exthdrlen; 245 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 246 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); 247 if (sk_is_mptcp(sk)) 248 mptcpv6_handle_mapped(sk, false); 249 sk->sk_backlog_rcv = tcp_v6_do_rcv; 250 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 251 tp->af_specific = &tcp_sock_ipv6_specific; 252 #endif 253 goto failure; 254 } 255 np->saddr = sk->sk_v6_rcv_saddr; 256 257 return err; 258 } 259 260 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 261 saddr = &sk->sk_v6_rcv_saddr; 262 263 fl6.flowi6_proto = IPPROTO_TCP; 264 fl6.daddr = sk->sk_v6_daddr; 265 fl6.saddr = saddr ? *saddr : np->saddr; 266 fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); 267 fl6.flowi6_oif = sk->sk_bound_dev_if; 268 fl6.flowi6_mark = sk->sk_mark; 269 fl6.fl6_dport = usin->sin6_port; 270 fl6.fl6_sport = inet->inet_sport; 271 if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6.fl6_sport) 272 fl6.flowi6_flags = FLOWI_FLAG_ANY_SPORT; 273 fl6.flowi6_uid = sk_uid(sk); 274 275 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 276 final_p = fl6_update_dst(&fl6, opt, &final); 277 278 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); 279 280 dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); 281 if (IS_ERR(dst)) { 282 err = PTR_ERR(dst); 283 goto failure; 284 } 285 286 tp->tcp_usec_ts = dst_tcp_usec_ts(dst); 287 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 288 289 if (!saddr) { 290 saddr = &fl6.saddr; 291 292 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); 293 if (err) 294 goto failure; 295 } 296 297 /* set the source address */ 298 np->saddr = *saddr; 299 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 300 301 sk->sk_gso_type = SKB_GSO_TCPV6; 302 ip6_dst_store(sk, dst, false, false); 303 304 icsk->icsk_ext_hdr_len = psp_sk_overhead(sk); 305 if (opt) 306 icsk->icsk_ext_hdr_len += opt->opt_flen + 307 opt->opt_nflen; 308 309 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 310 311 inet->inet_dport = usin->sin6_port; 312 313 tcp_set_state(sk, TCP_SYN_SENT); 314 err = inet6_hash_connect(tcp_death_row, sk); 315 if (err) 316 goto late_failure; 317 318 sk_set_txhash(sk); 319 320 if (likely(!tp->repair)) { 321 if (!tp->write_seq) 322 WRITE_ONCE(tp->write_seq, 323 secure_tcpv6_seq(np->saddr.s6_addr32, 324 sk->sk_v6_daddr.s6_addr32, 325 inet->inet_sport, 326 inet->inet_dport)); 327 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, 328 sk->sk_v6_daddr.s6_addr32); 329 } 330 331 if (tcp_fastopen_defer_connect(sk, &err)) 332 return err; 333 if (err) 334 goto late_failure; 335 336 err = tcp_connect(sk); 337 if (err) 338 goto late_failure; 339 340 return 0; 341 342 late_failure: 343 tcp_set_state(sk, TCP_CLOSE); 344 inet_bhash2_reset_saddr(sk); 345 failure: 346 inet->inet_dport = 0; 347 sk->sk_route_caps = 0; 348 return err; 349 } 350 351 static void tcp_v6_mtu_reduced(struct sock *sk) 352 { 353 struct dst_entry *dst; 354 u32 mtu, dmtu; 355 356 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 357 return; 358 359 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 360 361 /* Drop requests trying to increase our current mss. 362 * Check done in __ip6_rt_update_pmtu() is too late. 363 */ 364 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 365 return; 366 367 dst = inet6_csk_update_pmtu(sk, mtu); 368 if (!dst) 369 return; 370 371 dmtu = dst6_mtu(dst); 372 if (inet_csk(sk)->icsk_pmtu_cookie > dmtu) { 373 tcp_sync_mss(sk, dmtu); 374 tcp_simple_retransmit(sk); 375 } 376 } 377 378 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 379 u8 type, u8 code, int offset, __be32 info) 380 { 381 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 382 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 383 struct net *net = dev_net_rcu(skb->dev); 384 struct request_sock *fastopen; 385 struct ipv6_pinfo *np; 386 struct tcp_sock *tp; 387 __u32 seq, snd_una; 388 struct sock *sk; 389 bool fatal; 390 int err; 391 392 sk = __inet6_lookup_established(net, &hdr->daddr, th->dest, 393 &hdr->saddr, ntohs(th->source), 394 skb->dev->ifindex, inet6_sdif(skb)); 395 396 if (!sk) { 397 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 398 ICMP6_MIB_INERRORS); 399 return -ENOENT; 400 } 401 402 if (sk->sk_state == TCP_TIME_WAIT) { 403 /* To increase the counter of ignored icmps for TCP-AO */ 404 tcp_ao_ignore_icmp(sk, AF_INET6, type, code); 405 inet_twsk_put(inet_twsk(sk)); 406 return 0; 407 } 408 seq = ntohl(th->seq); 409 fatal = icmpv6_err_convert(type, code, &err); 410 if (sk->sk_state == TCP_NEW_SYN_RECV) { 411 tcp_req_err(sk, seq, fatal); 412 return 0; 413 } 414 415 if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) { 416 sock_put(sk); 417 return 0; 418 } 419 420 bh_lock_sock(sk); 421 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 422 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 423 424 if (sk->sk_state == TCP_CLOSE) 425 goto out; 426 427 if (static_branch_unlikely(&ip6_min_hopcount)) { 428 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 429 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 430 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 431 goto out; 432 } 433 } 434 435 tp = tcp_sk(sk); 436 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 437 fastopen = rcu_dereference(tp->fastopen_rsk); 438 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 439 if (sk->sk_state != TCP_LISTEN && 440 !between(seq, snd_una, tp->snd_nxt)) { 441 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 442 goto out; 443 } 444 445 np = tcp_inet6_sk(sk); 446 447 if (type == NDISC_REDIRECT) { 448 if (!sock_owned_by_user(sk)) { 449 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 450 451 if (dst) 452 dst->ops->redirect(dst, sk, skb); 453 } 454 goto out; 455 } 456 457 if (type == ICMPV6_PKT_TOOBIG) { 458 u32 mtu = ntohl(info); 459 460 /* We are not interested in TCP_LISTEN and open_requests 461 * (SYN-ACKs send out by Linux are always <576bytes so 462 * they should go through unfragmented). 463 */ 464 if (sk->sk_state == TCP_LISTEN) 465 goto out; 466 467 if (!ip6_sk_accept_pmtu(sk)) 468 goto out; 469 470 if (mtu < IPV6_MIN_MTU) 471 goto out; 472 473 WRITE_ONCE(tp->mtu_info, mtu); 474 475 if (!sock_owned_by_user(sk)) 476 tcp_v6_mtu_reduced(sk); 477 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 478 &sk->sk_tsq_flags)) 479 sock_hold(sk); 480 goto out; 481 } 482 483 484 /* Might be for an request_sock */ 485 switch (sk->sk_state) { 486 case TCP_SYN_SENT: 487 case TCP_SYN_RECV: 488 /* Only in fast or simultaneous open. If a fast open socket is 489 * already accepted it is treated as a connected one below. 490 */ 491 if (fastopen && !fastopen->sk) 492 break; 493 494 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 495 496 if (!sock_owned_by_user(sk)) 497 tcp_done_with_error(sk, err); 498 else 499 WRITE_ONCE(sk->sk_err_soft, err); 500 goto out; 501 case TCP_LISTEN: 502 break; 503 default: 504 /* check if this ICMP message allows revert of backoff. 505 * (see RFC 6069) 506 */ 507 if (!fastopen && type == ICMPV6_DEST_UNREACH && 508 code == ICMPV6_NOROUTE) 509 tcp_ld_RTO_revert(sk, seq); 510 } 511 512 if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { 513 WRITE_ONCE(sk->sk_err, err); 514 sk_error_report(sk); 515 } else { 516 WRITE_ONCE(sk->sk_err_soft, err); 517 } 518 out: 519 bh_unlock_sock(sk); 520 sock_put(sk); 521 return 0; 522 } 523 524 525 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 526 struct flowi *fl, 527 struct request_sock *req, 528 struct tcp_fastopen_cookie *foc, 529 enum tcp_synack_type synack_type, 530 struct sk_buff *syn_skb) 531 { 532 struct inet_request_sock *ireq = inet_rsk(req); 533 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 534 struct ipv6_txoptions *opt; 535 struct flowi6 *fl6 = &fl->u.ip6; 536 struct sk_buff *skb; 537 int err = -ENOMEM; 538 u8 tclass; 539 540 /* First, grab a route. */ 541 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 542 IPPROTO_TCP)) == NULL) 543 goto done; 544 545 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 546 547 if (skb) { 548 tcp_rsk(req)->syn_ect_snt = np->tclass & INET_ECN_MASK; 549 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 550 &ireq->ir_v6_rmt_addr); 551 552 fl6->daddr = ireq->ir_v6_rmt_addr; 553 if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) 554 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 555 556 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 557 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 558 (np->tclass & INET_ECN_MASK) : 559 np->tclass; 560 561 if (!INET_ECN_is_capable(tclass) && 562 tcp_bpf_ca_needs_ecn((struct sock *)req)) 563 tclass |= INET_ECN_ECT_0; 564 565 rcu_read_lock(); 566 opt = ireq->ipv6_opt; 567 if (!opt) 568 opt = rcu_dereference(np->opt); 569 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), 570 opt, tclass, READ_ONCE(sk->sk_priority)); 571 rcu_read_unlock(); 572 err = net_xmit_eval(err); 573 } 574 575 done: 576 return err; 577 } 578 579 580 static void tcp_v6_reqsk_destructor(struct request_sock *req) 581 { 582 kfree(inet_rsk(req)->ipv6_opt); 583 consume_skb(inet_rsk(req)->pktopts); 584 } 585 586 #ifdef CONFIG_TCP_MD5SIG 587 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 588 const struct in6_addr *addr, 589 int l3index) 590 { 591 return tcp_md5_do_lookup(sk, l3index, 592 (union tcp_md5_addr *)addr, AF_INET6); 593 } 594 595 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 596 const struct sock *addr_sk) 597 { 598 int l3index; 599 600 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 601 addr_sk->sk_bound_dev_if); 602 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 603 l3index); 604 } 605 606 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 607 sockptr_t optval, int optlen) 608 { 609 struct tcp_md5sig cmd; 610 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 611 union tcp_ao_addr *addr; 612 int l3index = 0; 613 u8 prefixlen; 614 bool l3flag; 615 u8 flags; 616 617 if (optlen < sizeof(cmd)) 618 return -EINVAL; 619 620 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 621 return -EFAULT; 622 623 if (sin6->sin6_family != AF_INET6) 624 return -EINVAL; 625 626 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 627 l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 628 629 if (optname == TCP_MD5SIG_EXT && 630 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 631 prefixlen = cmd.tcpm_prefixlen; 632 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 633 prefixlen > 32)) 634 return -EINVAL; 635 } else { 636 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 637 } 638 639 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 640 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 641 struct net_device *dev; 642 643 rcu_read_lock(); 644 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 645 if (dev && netif_is_l3_master(dev)) 646 l3index = dev->ifindex; 647 rcu_read_unlock(); 648 649 /* ok to reference set/not set outside of rcu; 650 * right now device MUST be an L3 master 651 */ 652 if (!dev || !l3index) 653 return -EINVAL; 654 } 655 656 if (!cmd.tcpm_keylen) { 657 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 658 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 659 AF_INET, prefixlen, 660 l3index, flags); 661 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 662 AF_INET6, prefixlen, l3index, flags); 663 } 664 665 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 666 return -EINVAL; 667 668 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { 669 addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3]; 670 671 /* Don't allow keys for peers that have a matching TCP-AO key. 672 * See the comment in tcp_ao_add_cmd() 673 */ 674 if (tcp_ao_required(sk, addr, AF_INET, 675 l3flag ? l3index : -1, false)) 676 return -EKEYREJECTED; 677 return tcp_md5_do_add(sk, addr, 678 AF_INET, prefixlen, l3index, flags, 679 cmd.tcpm_key, cmd.tcpm_keylen); 680 } 681 682 addr = (union tcp_md5_addr *)&sin6->sin6_addr; 683 684 /* Don't allow keys for peers that have a matching TCP-AO key. 685 * See the comment in tcp_ao_add_cmd() 686 */ 687 if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false)) 688 return -EKEYREJECTED; 689 690 return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, 691 cmd.tcpm_key, cmd.tcpm_keylen); 692 } 693 694 static void tcp_v6_md5_hash_headers(struct md5_ctx *ctx, 695 const struct in6_addr *daddr, 696 const struct in6_addr *saddr, 697 const struct tcphdr *th, int nbytes) 698 { 699 struct { 700 struct tcp6_pseudohdr ip; /* TCP pseudo-header (RFC2460) */ 701 struct tcphdr tcp; 702 } h; 703 704 h.ip.saddr = *saddr; 705 h.ip.daddr = *daddr; 706 h.ip.protocol = cpu_to_be32(IPPROTO_TCP); 707 h.ip.len = cpu_to_be32(nbytes); 708 h.tcp = *th; 709 h.tcp.check = 0; 710 md5_update(ctx, (const u8 *)&h, sizeof(h.ip) + sizeof(h.tcp)); 711 } 712 713 static noinline_for_stack void 714 tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 715 const struct in6_addr *daddr, struct in6_addr *saddr, 716 const struct tcphdr *th) 717 { 718 struct md5_ctx ctx; 719 720 md5_init(&ctx); 721 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, th->doff << 2); 722 tcp_md5_hash_key(&ctx, key); 723 md5_final(&ctx, md5_hash); 724 } 725 726 static noinline_for_stack void 727 tcp_v6_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, 728 const struct sock *sk, const struct sk_buff *skb) 729 { 730 const struct tcphdr *th = tcp_hdr(skb); 731 const struct in6_addr *saddr, *daddr; 732 struct md5_ctx ctx; 733 734 if (sk) { /* valid for establish/request sockets */ 735 saddr = &sk->sk_v6_rcv_saddr; 736 daddr = &sk->sk_v6_daddr; 737 } else { 738 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 739 saddr = &ip6h->saddr; 740 daddr = &ip6h->daddr; 741 } 742 743 md5_init(&ctx); 744 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, skb->len); 745 tcp_md5_hash_skb_data(&ctx, skb, th->doff << 2); 746 tcp_md5_hash_key(&ctx, key); 747 md5_final(&ctx, md5_hash); 748 } 749 #endif 750 751 static void tcp_v6_init_req(struct request_sock *req, 752 const struct sock *sk_listener, 753 struct sk_buff *skb, 754 u32 tw_isn) 755 { 756 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 757 struct inet_request_sock *ireq = inet_rsk(req); 758 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 759 760 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 761 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 762 ireq->ir_rmt_addr = LOOPBACK4_IPV6; 763 ireq->ir_loc_addr = LOOPBACK4_IPV6; 764 765 /* So that link locals have meaning */ 766 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 767 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 768 ireq->ir_iif = tcp_v6_iif(skb); 769 770 if (!tw_isn && 771 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 772 np->rxopt.bits.rxinfo || 773 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 774 np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { 775 refcount_inc(&skb->users); 776 ireq->pktopts = skb; 777 } 778 } 779 780 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 781 struct sk_buff *skb, 782 struct flowi *fl, 783 struct request_sock *req, 784 u32 tw_isn) 785 { 786 tcp_v6_init_req(req, sk, skb, tw_isn); 787 788 if (security_inet_conn_request(sk, skb, req)) 789 return NULL; 790 791 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 792 } 793 794 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 795 .family = AF_INET6, 796 .obj_size = sizeof(struct tcp6_request_sock), 797 .send_ack = tcp_v6_reqsk_send_ack, 798 .destructor = tcp_v6_reqsk_destructor, 799 .send_reset = tcp_v6_send_reset, 800 }; 801 802 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 803 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 804 sizeof(struct ipv6hdr), 805 #ifdef CONFIG_TCP_MD5SIG 806 .req_md5_lookup = tcp_v6_md5_lookup, 807 .calc_md5_hash = tcp_v6_md5_hash_skb, 808 #endif 809 #ifdef CONFIG_TCP_AO 810 .ao_lookup = tcp_v6_ao_lookup_rsk, 811 .ao_calc_key = tcp_v6_ao_calc_key_rsk, 812 .ao_synack_hash = tcp_v6_ao_synack_hash, 813 #endif 814 #ifdef CONFIG_SYN_COOKIES 815 .cookie_init_seq = cookie_v6_init_sequence, 816 #endif 817 .route_req = tcp_v6_route_req, 818 .init_seq = tcp_v6_init_seq, 819 .init_ts_off = tcp_v6_init_ts_off, 820 .send_synack = tcp_v6_send_synack, 821 }; 822 823 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 824 u32 ack, u32 win, u32 tsval, u32 tsecr, 825 int oif, int rst, u8 tclass, __be32 label, 826 u32 priority, u32 txhash, struct tcp_key *key) 827 { 828 struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 829 unsigned int tot_len = sizeof(struct tcphdr); 830 struct sock *ctl_sk = net->ipv6.tcp_sk; 831 const struct tcphdr *th = tcp_hdr(skb); 832 __be32 mrst = 0, *topt; 833 struct dst_entry *dst; 834 struct sk_buff *buff; 835 struct tcphdr *t1; 836 struct flowi6 fl6; 837 u32 mark = 0; 838 839 if (tsecr) 840 tot_len += TCPOLEN_TSTAMP_ALIGNED; 841 if (tcp_key_is_md5(key)) 842 tot_len += TCPOLEN_MD5SIG_ALIGNED; 843 if (tcp_key_is_ao(key)) 844 tot_len += tcp_ao_len_aligned(key->ao_key); 845 846 #ifdef CONFIG_MPTCP 847 if (rst && !tcp_key_is_md5(key)) { 848 mrst = mptcp_reset_option(skb); 849 850 if (mrst) 851 tot_len += sizeof(__be32); 852 } 853 #endif 854 855 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 856 if (!buff) 857 return; 858 859 skb_reserve(buff, MAX_TCP_HEADER); 860 861 t1 = skb_push(buff, tot_len); 862 skb_reset_transport_header(buff); 863 864 /* Swap the send and the receive. */ 865 memset(t1, 0, sizeof(*t1)); 866 t1->dest = th->source; 867 t1->source = th->dest; 868 t1->doff = tot_len / 4; 869 t1->seq = htonl(seq); 870 t1->ack_seq = htonl(ack); 871 t1->ack = !rst || !th->ack; 872 t1->rst = rst; 873 t1->window = htons(win); 874 875 topt = (__be32 *)(t1 + 1); 876 877 if (tsecr) { 878 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 879 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 880 *topt++ = htonl(tsval); 881 *topt++ = htonl(tsecr); 882 } 883 884 if (mrst) 885 *topt++ = mrst; 886 887 #ifdef CONFIG_TCP_MD5SIG 888 if (tcp_key_is_md5(key)) { 889 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 890 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 891 tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, 892 &ipv6_hdr(skb)->saddr, 893 &ipv6_hdr(skb)->daddr, t1); 894 } 895 #endif 896 #ifdef CONFIG_TCP_AO 897 if (tcp_key_is_ao(key)) { 898 *topt++ = htonl((TCPOPT_AO << 24) | 899 (tcp_ao_len(key->ao_key) << 16) | 900 (key->ao_key->sndid << 8) | 901 (key->rcv_next)); 902 903 tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, 904 key->traffic_key, 905 (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, 906 (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, 907 t1, key->sne); 908 } 909 #endif 910 911 memset(&fl6, 0, sizeof(fl6)); 912 fl6.daddr = ipv6_hdr(skb)->saddr; 913 fl6.saddr = ipv6_hdr(skb)->daddr; 914 fl6.flowlabel = label; 915 916 buff->ip_summed = CHECKSUM_PARTIAL; 917 918 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 919 920 fl6.flowi6_proto = IPPROTO_TCP; 921 if (rt6_need_strict(&fl6.daddr) && !oif) 922 fl6.flowi6_oif = tcp_v6_iif(skb); 923 else { 924 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 925 oif = skb->skb_iif; 926 927 fl6.flowi6_oif = oif; 928 } 929 930 if (sk) { 931 /* unconstify the socket only to attach it to buff with care. */ 932 skb_set_owner_edemux(buff, (struct sock *)sk); 933 psp_reply_set_decrypted(sk, buff); 934 935 if (sk->sk_state == TCP_TIME_WAIT) 936 mark = inet_twsk(sk)->tw_mark; 937 else 938 mark = READ_ONCE(sk->sk_mark); 939 skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); 940 } 941 if (txhash) { 942 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ 943 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); 944 } 945 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 946 fl6.fl6_dport = t1->dest; 947 fl6.fl6_sport = t1->source; 948 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 949 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 950 951 /* Pass a socket to ip6_dst_lookup either it is for RST 952 * Underlying function will use this to retrieve the network 953 * namespace 954 */ 955 if (sk && sk->sk_state != TCP_TIME_WAIT) 956 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 957 else 958 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 959 if (!IS_ERR(dst)) { 960 skb_dst_set(buff, dst); 961 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 962 tclass, priority); 963 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 964 if (rst) 965 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 966 return; 967 } 968 969 kfree_skb(buff); 970 } 971 972 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 973 enum sk_rst_reason reason) 974 { 975 const struct tcphdr *th = tcp_hdr(skb); 976 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 977 const __u8 *md5_hash_location = NULL; 978 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 979 bool allocated_traffic_key = false; 980 #endif 981 const struct tcp_ao_hdr *aoh; 982 struct tcp_key key = {}; 983 u32 seq = 0, ack_seq = 0; 984 __be32 label = 0; 985 u32 priority = 0; 986 struct net *net; 987 u32 txhash = 0; 988 int oif = 0; 989 #ifdef CONFIG_TCP_MD5SIG 990 unsigned char newhash[16]; 991 struct sock *sk1 = NULL; 992 #endif 993 994 if (th->rst) 995 return; 996 997 /* If sk not NULL, it means we did a successful lookup and incoming 998 * route had to be correct. prequeue might have dropped our dst. 999 */ 1000 if (!sk && !ipv6_unicast_destination(skb)) 1001 return; 1002 1003 net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 1004 /* Invalid TCP option size or twice included auth */ 1005 if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) 1006 return; 1007 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1008 rcu_read_lock(); 1009 #endif 1010 #ifdef CONFIG_TCP_MD5SIG 1011 if (sk && sk_fullsock(sk)) { 1012 int l3index; 1013 1014 /* sdif set, means packet ingressed via a device 1015 * in an L3 domain and inet_iif is set to it. 1016 */ 1017 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1018 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1019 if (key.md5_key) 1020 key.type = TCP_KEY_MD5; 1021 } else if (md5_hash_location) { 1022 int dif = tcp_v6_iif_l3_slave(skb); 1023 int sdif = tcp_v6_sdif(skb); 1024 int l3index; 1025 1026 /* 1027 * active side is lost. Try to find listening socket through 1028 * source port, and then find md5 key through listening socket. 1029 * we are not loose security here: 1030 * Incoming packet is checked with md5 hash with finding key, 1031 * no RST generated if md5 hash doesn't match. 1032 */ 1033 sk1 = inet6_lookup_listener(net, NULL, 0, &ipv6h->saddr, th->source, 1034 &ipv6h->daddr, ntohs(th->source), 1035 dif, sdif); 1036 if (!sk1) 1037 goto out; 1038 1039 /* sdif set, means packet ingressed via a device 1040 * in an L3 domain and dif is set to it. 1041 */ 1042 l3index = tcp_v6_sdif(skb) ? dif : 0; 1043 1044 key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1045 if (!key.md5_key) 1046 goto out; 1047 key.type = TCP_KEY_MD5; 1048 1049 tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); 1050 if (memcmp(md5_hash_location, newhash, 16) != 0) 1051 goto out; 1052 } 1053 #endif 1054 1055 if (th->ack) 1056 seq = ntohl(th->ack_seq); 1057 else 1058 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1059 (th->doff << 2); 1060 1061 #ifdef CONFIG_TCP_AO 1062 if (aoh) { 1063 int l3index; 1064 1065 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1066 if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq, 1067 &key.ao_key, &key.traffic_key, 1068 &allocated_traffic_key, 1069 &key.rcv_next, &key.sne)) 1070 goto out; 1071 key.type = TCP_KEY_AO; 1072 } 1073 #endif 1074 1075 if (sk) { 1076 oif = sk->sk_bound_dev_if; 1077 if (sk_fullsock(sk)) { 1078 if (inet6_test_bit(REPFLOW, sk)) 1079 label = ip6_flowlabel(ipv6h); 1080 priority = READ_ONCE(sk->sk_priority); 1081 txhash = sk->sk_txhash; 1082 } 1083 if (sk->sk_state == TCP_TIME_WAIT) { 1084 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1085 priority = inet_twsk(sk)->tw_priority; 1086 txhash = inet_twsk(sk)->tw_txhash; 1087 } 1088 } else { 1089 if (READ_ONCE(net->ipv6.sysctl.flowlabel_reflect) & 1090 FLOWLABEL_REFLECT_TCP_RESET) 1091 label = ip6_flowlabel(ipv6h); 1092 } 1093 1094 trace_tcp_send_reset(sk, skb, reason); 1095 1096 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, 1097 ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK, 1098 label, priority, txhash, 1099 &key); 1100 1101 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1102 out: 1103 if (allocated_traffic_key) 1104 kfree(key.traffic_key); 1105 rcu_read_unlock(); 1106 #endif 1107 } 1108 1109 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1110 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1111 struct tcp_key *key, u8 tclass, 1112 __be32 label, u32 priority, u32 txhash) 1113 { 1114 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, 1115 tclass, label, priority, txhash, key); 1116 } 1117 1118 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb, 1119 enum tcp_tw_status tw_status) 1120 { 1121 struct inet_timewait_sock *tw = inet_twsk(sk); 1122 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1123 u8 tclass = tw->tw_tclass; 1124 struct tcp_key key = {}; 1125 1126 if (tw_status == TCP_TW_ACK_OOW) 1127 tclass &= ~INET_ECN_MASK; 1128 #ifdef CONFIG_TCP_AO 1129 struct tcp_ao_info *ao_info; 1130 1131 if (static_branch_unlikely(&tcp_ao_needed.key)) { 1132 1133 /* FIXME: the segment to-be-acked is not verified yet */ 1134 ao_info = rcu_dereference(tcptw->ao_info); 1135 if (ao_info) { 1136 const struct tcp_ao_hdr *aoh; 1137 1138 /* Invalid TCP option size or twice included auth */ 1139 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1140 goto out; 1141 if (aoh) 1142 key.ao_key = tcp_ao_established_key(sk, ao_info, 1143 aoh->rnext_keyid, -1); 1144 } 1145 } 1146 if (key.ao_key) { 1147 struct tcp_ao_key *rnext_key; 1148 1149 key.traffic_key = snd_other_key(key.ao_key); 1150 /* rcv_next switches to our rcv_next */ 1151 rnext_key = READ_ONCE(ao_info->rnext_key); 1152 key.rcv_next = rnext_key->rcvid; 1153 key.sne = READ_ONCE(ao_info->snd_sne); 1154 key.type = TCP_KEY_AO; 1155 #else 1156 if (0) { 1157 #endif 1158 #ifdef CONFIG_TCP_MD5SIG 1159 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1160 key.md5_key = tcp_twsk_md5_key(tcptw); 1161 if (key.md5_key) 1162 key.type = TCP_KEY_MD5; 1163 #endif 1164 } 1165 1166 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, 1167 READ_ONCE(tcptw->tw_rcv_nxt), 1168 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1169 tcp_tw_tsval(tcptw), 1170 READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, 1171 &key, tclass, cpu_to_be32(tw->tw_flowlabel), 1172 tw->tw_priority, tw->tw_txhash); 1173 1174 #ifdef CONFIG_TCP_AO 1175 out: 1176 #endif 1177 inet_twsk_put(tw); 1178 } 1179 1180 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1181 struct request_sock *req) 1182 { 1183 struct tcp_key key = {}; 1184 1185 #ifdef CONFIG_TCP_AO 1186 if (static_branch_unlikely(&tcp_ao_needed.key) && 1187 tcp_rsk_used_ao(req)) { 1188 const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; 1189 const struct tcp_ao_hdr *aoh; 1190 int l3index; 1191 1192 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1193 /* Invalid TCP option size or twice included auth */ 1194 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1195 return; 1196 if (!aoh) 1197 return; 1198 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1199 (union tcp_ao_addr *)addr, 1200 AF_INET6, aoh->rnext_keyid, -1); 1201 if (unlikely(!key.ao_key)) { 1202 /* Send ACK with any matching MKT for the peer */ 1203 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1204 (union tcp_ao_addr *)addr, 1205 AF_INET6, -1, -1); 1206 /* Matching key disappeared (user removed the key?) 1207 * let the handshake timeout. 1208 */ 1209 if (!key.ao_key) { 1210 net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n", 1211 addr, 1212 ntohs(tcp_hdr(skb)->source), 1213 &ipv6_hdr(skb)->daddr, 1214 ntohs(tcp_hdr(skb)->dest)); 1215 return; 1216 } 1217 } 1218 key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); 1219 if (!key.traffic_key) 1220 return; 1221 1222 key.type = TCP_KEY_AO; 1223 key.rcv_next = aoh->keyid; 1224 tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); 1225 #else 1226 if (0) { 1227 #endif 1228 #ifdef CONFIG_TCP_MD5SIG 1229 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1230 int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1231 1232 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, 1233 l3index); 1234 if (key.md5_key) 1235 key.type = TCP_KEY_MD5; 1236 #endif 1237 } 1238 1239 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1240 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1241 */ 1242 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1243 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1244 tcp_rsk(req)->rcv_nxt, 1245 tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, 1246 tcp_rsk_tsval(tcp_rsk(req)), 1247 req->ts_recent, sk->sk_bound_dev_if, 1248 &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK, 1249 0, 1250 READ_ONCE(sk->sk_priority), 1251 READ_ONCE(tcp_rsk(req)->txhash)); 1252 if (tcp_key_is_ao(&key)) 1253 kfree(key.traffic_key); 1254 } 1255 1256 1257 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1258 { 1259 #ifdef CONFIG_SYN_COOKIES 1260 const struct tcphdr *th = tcp_hdr(skb); 1261 1262 if (!th->syn) 1263 sk = cookie_v6_check(sk, skb); 1264 #endif 1265 return sk; 1266 } 1267 1268 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1269 struct tcphdr *th, u32 *cookie) 1270 { 1271 u16 mss = 0; 1272 #ifdef CONFIG_SYN_COOKIES 1273 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1274 &tcp_request_sock_ipv6_ops, sk, th); 1275 if (mss) { 1276 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1277 tcp_synq_overflow(sk); 1278 } 1279 #endif 1280 return mss; 1281 } 1282 1283 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1284 { 1285 if (skb->protocol == htons(ETH_P_IP)) 1286 return tcp_v4_conn_request(sk, skb); 1287 1288 if (!ipv6_unicast_destination(skb)) 1289 goto drop; 1290 1291 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1292 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1293 return 0; 1294 } 1295 1296 return tcp_conn_request(&tcp6_request_sock_ops, 1297 &tcp_request_sock_ipv6_ops, sk, skb); 1298 1299 drop: 1300 tcp_listendrop(sk); 1301 return 0; /* don't send reset */ 1302 } 1303 1304 static void tcp_v6_restore_cb(struct sk_buff *skb) 1305 { 1306 /* We need to move header back to the beginning if xfrm6_policy_check() 1307 * and tcp_v6_fill_cb() are going to be called again. 1308 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1309 */ 1310 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1311 sizeof(struct inet6_skb_parm)); 1312 } 1313 1314 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1315 struct request_sock *req, 1316 struct dst_entry *dst, 1317 struct request_sock *req_unhash, 1318 bool *own_req) 1319 { 1320 struct inet_request_sock *ireq; 1321 struct ipv6_pinfo *newnp; 1322 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1323 struct ipv6_txoptions *opt; 1324 struct inet_sock *newinet; 1325 bool found_dup_sk = false; 1326 struct tcp_sock *newtp; 1327 struct sock *newsk; 1328 #ifdef CONFIG_TCP_MD5SIG 1329 struct tcp_md5sig_key *key; 1330 int l3index; 1331 #endif 1332 struct flowi6 fl6; 1333 1334 if (skb->protocol == htons(ETH_P_IP)) { 1335 /* 1336 * v6 mapped 1337 */ 1338 1339 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1340 req_unhash, own_req); 1341 1342 if (!newsk) 1343 return NULL; 1344 1345 newinet = inet_sk(newsk); 1346 newinet->pinet6 = tcp_inet6_sk(newsk); 1347 newinet->ipv6_fl_list = NULL; 1348 1349 newnp = tcp_inet6_sk(newsk); 1350 newtp = tcp_sk(newsk); 1351 1352 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1353 1354 newnp->saddr = newsk->sk_v6_rcv_saddr; 1355 1356 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1357 if (sk_is_mptcp(newsk)) 1358 mptcpv6_handle_mapped(newsk, true); 1359 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1360 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1361 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1362 #endif 1363 1364 newnp->ipv6_mc_list = NULL; 1365 newnp->ipv6_ac_list = NULL; 1366 newnp->pktoptions = NULL; 1367 newnp->opt = NULL; 1368 newnp->mcast_oif = inet_iif(skb); 1369 newnp->mcast_hops = ip_hdr(skb)->ttl; 1370 newnp->rcv_flowinfo = 0; 1371 if (inet6_test_bit(REPFLOW, sk)) 1372 newnp->flow_label = 0; 1373 1374 /* 1375 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1376 * here, tcp_create_openreq_child now does this for us, see the comment in 1377 * that function for the gory details. -acme 1378 */ 1379 1380 /* It is tricky place. Until this moment IPv4 tcp 1381 worked with IPv6 icsk.icsk_af_ops. 1382 Sync it now. 1383 */ 1384 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1385 1386 return newsk; 1387 } 1388 1389 ireq = inet_rsk(req); 1390 1391 if (sk_acceptq_is_full(sk)) 1392 goto exit_overflow; 1393 1394 if (!dst) { 1395 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1396 if (!dst) 1397 goto exit; 1398 } 1399 1400 newsk = tcp_create_openreq_child(sk, req, skb); 1401 if (!newsk) 1402 goto exit_nonewsk; 1403 1404 /* 1405 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1406 * count here, tcp_create_openreq_child now does this for us, see the 1407 * comment in that function for the gory details. -acme 1408 */ 1409 1410 newsk->sk_gso_type = SKB_GSO_TCPV6; 1411 inet6_sk_rx_dst_set(newsk, skb); 1412 1413 newinet = inet_sk(newsk); 1414 newinet->pinet6 = tcp_inet6_sk(newsk); 1415 newinet->ipv6_fl_list = NULL; 1416 newinet->inet_opt = NULL; 1417 1418 newtp = tcp_sk(newsk); 1419 newnp = tcp_inet6_sk(newsk); 1420 1421 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1422 1423 ip6_dst_store(newsk, dst, false, false); 1424 1425 newnp->saddr = ireq->ir_v6_loc_addr; 1426 1427 /* Now IPv6 options... 1428 1429 First: no IPv4 options. 1430 */ 1431 newnp->ipv6_mc_list = NULL; 1432 newnp->ipv6_ac_list = NULL; 1433 1434 /* Clone RX bits */ 1435 newnp->rxopt.all = np->rxopt.all; 1436 1437 newnp->pktoptions = NULL; 1438 newnp->opt = NULL; 1439 newnp->mcast_oif = tcp_v6_iif(skb); 1440 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1441 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1442 if (inet6_test_bit(REPFLOW, sk)) 1443 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1444 1445 /* Set ToS of the new socket based upon the value of incoming SYN. 1446 * ECT bits are set later in tcp_init_transfer(). 1447 */ 1448 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1449 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1450 1451 /* Clone native IPv6 options from listening socket (if any) 1452 1453 Yes, keeping reference count would be much more clever, 1454 but we make one more one thing there: reattach optmem 1455 to newsk. 1456 */ 1457 opt = ireq->ipv6_opt; 1458 if (!opt) 1459 opt = rcu_dereference(np->opt); 1460 if (opt) { 1461 opt = ipv6_dup_options(newsk, opt); 1462 RCU_INIT_POINTER(newnp->opt, opt); 1463 } 1464 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1465 if (opt) 1466 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1467 opt->opt_flen; 1468 1469 tcp_ca_openreq_child(newsk, dst); 1470 1471 tcp_sync_mss(newsk, dst6_mtu(dst)); 1472 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1473 1474 tcp_initialize_rcv_mss(newsk); 1475 1476 #ifdef CONFIG_TCP_MD5SIG 1477 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1478 1479 if (!tcp_rsk_used_ao(req)) { 1480 /* Copy over the MD5 key from the original socket */ 1481 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1482 if (key) { 1483 const union tcp_md5_addr *addr; 1484 1485 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; 1486 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) 1487 goto put_and_exit; 1488 } 1489 } 1490 #endif 1491 #ifdef CONFIG_TCP_AO 1492 /* Copy over tcp_ao_info if any */ 1493 if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) 1494 goto put_and_exit; /* OOM */ 1495 #endif 1496 1497 if (__inet_inherit_port(sk, newsk) < 0) 1498 goto put_and_exit; 1499 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1500 &found_dup_sk); 1501 if (*own_req) { 1502 tcp_move_syn(newtp, req); 1503 1504 /* Clone pktoptions received with SYN, if we own the req */ 1505 if (ireq->pktopts) { 1506 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); 1507 consume_skb(ireq->pktopts); 1508 ireq->pktopts = NULL; 1509 if (newnp->pktoptions) 1510 tcp_v6_restore_cb(newnp->pktoptions); 1511 } 1512 } else { 1513 if (!req_unhash && found_dup_sk) { 1514 /* This code path should only be executed in the 1515 * syncookie case only 1516 */ 1517 bh_unlock_sock(newsk); 1518 sock_put(newsk); 1519 newsk = NULL; 1520 } 1521 } 1522 1523 return newsk; 1524 1525 exit_overflow: 1526 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1527 exit_nonewsk: 1528 dst_release(dst); 1529 exit: 1530 tcp_listendrop(sk); 1531 return NULL; 1532 put_and_exit: 1533 inet_csk_prepare_forced_close(newsk); 1534 tcp_done(newsk); 1535 goto exit; 1536 } 1537 1538 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1539 u32)); 1540 /* The socket must have it's spinlock held when we get 1541 * here, unless it is a TCP_LISTEN socket. 1542 * 1543 * We have a potential double-lock case here, so even when 1544 * doing backlog processing we use the BH locking scheme. 1545 * This is because we cannot sleep with the original spinlock 1546 * held. 1547 */ 1548 INDIRECT_CALLABLE_SCOPE 1549 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1550 { 1551 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1552 struct sk_buff *opt_skb = NULL; 1553 enum skb_drop_reason reason; 1554 struct tcp_sock *tp; 1555 1556 /* Imagine: socket is IPv6. IPv4 packet arrives, 1557 goes to IPv4 receive handler and backlogged. 1558 From backlog it always goes here. Kerboom... 1559 Fortunately, tcp_rcv_established and rcv_established 1560 handle them correctly, but it is not case with 1561 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1562 */ 1563 1564 if (skb->protocol == htons(ETH_P_IP)) 1565 return tcp_v4_do_rcv(sk, skb); 1566 1567 reason = psp_sk_rx_policy_check(sk, skb); 1568 if (reason) 1569 goto err_discard; 1570 1571 /* 1572 * socket locking is here for SMP purposes as backlog rcv 1573 * is currently called with bh processing disabled. 1574 */ 1575 1576 /* Do Stevens' IPV6_PKTOPTIONS. 1577 1578 Yes, guys, it is the only place in our code, where we 1579 may make it not affecting IPv4. 1580 The rest of code is protocol independent, 1581 and I do not like idea to uglify IPv4. 1582 1583 Actually, all the idea behind IPV6_PKTOPTIONS 1584 looks not very well thought. For now we latch 1585 options, received in the last packet, enqueued 1586 by tcp. Feel free to propose better solution. 1587 --ANK (980728) 1588 */ 1589 if (np->rxopt.all && sk->sk_state != TCP_LISTEN) 1590 opt_skb = skb_clone_and_charge_r(skb, sk); 1591 1592 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1593 struct dst_entry *dst; 1594 1595 dst = rcu_dereference_protected(sk->sk_rx_dst, 1596 lockdep_sock_is_held(sk)); 1597 1598 sock_rps_save_rxhash(sk, skb); 1599 sk_mark_napi_id(sk, skb); 1600 if (dst) { 1601 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1602 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1603 dst, sk->sk_rx_dst_cookie) == NULL) { 1604 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1605 dst_release(dst); 1606 } 1607 } 1608 1609 tcp_rcv_established(sk, skb); 1610 if (opt_skb) 1611 goto ipv6_pktoptions; 1612 return 0; 1613 } 1614 1615 if (tcp_checksum_complete(skb)) 1616 goto csum_err; 1617 1618 if (sk->sk_state == TCP_LISTEN) { 1619 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1620 1621 if (nsk != sk) { 1622 if (nsk) { 1623 reason = tcp_child_process(sk, nsk, skb); 1624 if (reason) 1625 goto reset; 1626 } 1627 return 0; 1628 } 1629 } else 1630 sock_rps_save_rxhash(sk, skb); 1631 1632 reason = tcp_rcv_state_process(sk, skb); 1633 if (reason) 1634 goto reset; 1635 if (opt_skb) 1636 goto ipv6_pktoptions; 1637 return 0; 1638 1639 reset: 1640 tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); 1641 discard: 1642 if (opt_skb) 1643 __kfree_skb(opt_skb); 1644 sk_skb_reason_drop(sk, skb, reason); 1645 return 0; 1646 csum_err: 1647 reason = SKB_DROP_REASON_TCP_CSUM; 1648 trace_tcp_bad_csum(skb); 1649 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1650 err_discard: 1651 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1652 goto discard; 1653 1654 1655 ipv6_pktoptions: 1656 /* Do you ask, what is it? 1657 1658 1. skb was enqueued by tcp. 1659 2. skb is added to tail of read queue, rather than out of order. 1660 3. socket is not in passive state. 1661 4. Finally, it really contains options, which user wants to receive. 1662 */ 1663 tp = tcp_sk(sk); 1664 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1665 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1666 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1667 WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb)); 1668 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1669 WRITE_ONCE(np->mcast_hops, 1670 ipv6_hdr(opt_skb)->hop_limit); 1671 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1672 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1673 if (inet6_test_bit(REPFLOW, sk)) 1674 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1675 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1676 tcp_v6_restore_cb(opt_skb); 1677 opt_skb = xchg(&np->pktoptions, opt_skb); 1678 } else { 1679 __kfree_skb(opt_skb); 1680 opt_skb = xchg(&np->pktoptions, NULL); 1681 } 1682 } 1683 1684 consume_skb(opt_skb); 1685 return 0; 1686 } 1687 1688 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1689 const struct tcphdr *th) 1690 { 1691 /* This is tricky: we move IP6CB at its correct location into 1692 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1693 * _decode_session6() uses IP6CB(). 1694 * barrier() makes sure compiler won't play aliasing games. 1695 */ 1696 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1697 sizeof(struct inet6_skb_parm)); 1698 barrier(); 1699 1700 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1701 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1702 skb->len - th->doff*4); 1703 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1704 TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th); 1705 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1706 TCP_SKB_CB(skb)->sacked = 0; 1707 TCP_SKB_CB(skb)->has_rxtstamp = 1708 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1709 } 1710 1711 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1712 { 1713 struct net *net = dev_net_rcu(skb->dev); 1714 enum skb_drop_reason drop_reason; 1715 enum tcp_tw_status tw_status; 1716 int sdif = inet6_sdif(skb); 1717 int dif = inet6_iif(skb); 1718 const struct tcphdr *th; 1719 const struct ipv6hdr *hdr; 1720 struct sock *sk = NULL; 1721 bool refcounted; 1722 int ret; 1723 u32 isn; 1724 1725 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1726 if (skb->pkt_type != PACKET_HOST) 1727 goto discard_it; 1728 1729 /* 1730 * Count it even if it's bad. 1731 */ 1732 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1733 1734 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1735 goto discard_it; 1736 1737 th = (const struct tcphdr *)skb->data; 1738 1739 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1740 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1741 goto bad_packet; 1742 } 1743 if (!pskb_may_pull(skb, th->doff*4)) 1744 goto discard_it; 1745 1746 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1747 goto csum_error; 1748 1749 th = (const struct tcphdr *)skb->data; 1750 hdr = ipv6_hdr(skb); 1751 1752 lookup: 1753 sk = __inet6_lookup_skb(skb, __tcp_hdrlen(th), 1754 th->source, th->dest, inet6_iif(skb), sdif, 1755 &refcounted); 1756 if (!sk) 1757 goto no_tcp_socket; 1758 1759 if (sk->sk_state == TCP_TIME_WAIT) 1760 goto do_time_wait; 1761 1762 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1763 struct request_sock *req = inet_reqsk(sk); 1764 bool req_stolen = false; 1765 struct sock *nsk; 1766 1767 sk = req->rsk_listener; 1768 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1769 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1770 else 1771 drop_reason = tcp_inbound_hash(sk, req, skb, 1772 &hdr->saddr, &hdr->daddr, 1773 AF_INET6, dif, sdif); 1774 if (drop_reason) { 1775 sk_drops_skbadd(sk, skb); 1776 reqsk_put(req); 1777 goto discard_it; 1778 } 1779 if (tcp_checksum_complete(skb)) { 1780 reqsk_put(req); 1781 goto csum_error; 1782 } 1783 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1784 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1785 if (!nsk) { 1786 inet_csk_reqsk_queue_drop_and_put(sk, req); 1787 goto lookup; 1788 } 1789 sk = nsk; 1790 /* reuseport_migrate_sock() has already held one sk_refcnt 1791 * before returning. 1792 */ 1793 } else { 1794 sock_hold(sk); 1795 } 1796 refcounted = true; 1797 nsk = NULL; 1798 if (!tcp_filter(sk, skb, &drop_reason)) { 1799 th = (const struct tcphdr *)skb->data; 1800 hdr = ipv6_hdr(skb); 1801 tcp_v6_fill_cb(skb, hdr, th); 1802 nsk = tcp_check_req(sk, skb, req, false, &req_stolen, 1803 &drop_reason); 1804 } 1805 if (!nsk) { 1806 reqsk_put(req); 1807 if (req_stolen) { 1808 /* Another cpu got exclusive access to req 1809 * and created a full blown socket. 1810 * Try to feed this packet to this socket 1811 * instead of discarding it. 1812 */ 1813 tcp_v6_restore_cb(skb); 1814 sock_put(sk); 1815 goto lookup; 1816 } 1817 goto discard_and_relse; 1818 } 1819 nf_reset_ct(skb); 1820 if (nsk == sk) { 1821 reqsk_put(req); 1822 tcp_v6_restore_cb(skb); 1823 } else { 1824 drop_reason = tcp_child_process(sk, nsk, skb); 1825 if (drop_reason) { 1826 enum sk_rst_reason rst_reason; 1827 1828 rst_reason = sk_rst_convert_drop_reason(drop_reason); 1829 tcp_v6_send_reset(nsk, skb, rst_reason); 1830 goto discard_and_relse; 1831 } 1832 sock_put(sk); 1833 return 0; 1834 } 1835 } 1836 1837 process: 1838 if (static_branch_unlikely(&ip6_min_hopcount)) { 1839 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1840 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) { 1841 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1842 drop_reason = SKB_DROP_REASON_TCP_MINTTL; 1843 goto discard_and_relse; 1844 } 1845 } 1846 1847 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1848 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1849 goto discard_and_relse; 1850 } 1851 1852 drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr, 1853 AF_INET6, dif, sdif); 1854 if (drop_reason) 1855 goto discard_and_relse; 1856 1857 nf_reset_ct(skb); 1858 1859 if (tcp_filter(sk, skb, &drop_reason)) 1860 goto discard_and_relse; 1861 1862 th = (const struct tcphdr *)skb->data; 1863 hdr = ipv6_hdr(skb); 1864 tcp_v6_fill_cb(skb, hdr, th); 1865 1866 skb->dev = NULL; 1867 1868 if (sk->sk_state == TCP_LISTEN) { 1869 ret = tcp_v6_do_rcv(sk, skb); 1870 goto put_and_return; 1871 } 1872 1873 sk_incoming_cpu_update(sk); 1874 1875 bh_lock_sock_nested(sk); 1876 tcp_segs_in(tcp_sk(sk), skb); 1877 ret = 0; 1878 if (!sock_owned_by_user(sk)) { 1879 ret = tcp_v6_do_rcv(sk, skb); 1880 } else { 1881 if (tcp_add_backlog(sk, skb, &drop_reason)) 1882 goto discard_and_relse; 1883 } 1884 bh_unlock_sock(sk); 1885 put_and_return: 1886 if (refcounted) 1887 sock_put(sk); 1888 return ret ? -1 : 0; 1889 1890 no_tcp_socket: 1891 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1892 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1893 goto discard_it; 1894 1895 tcp_v6_fill_cb(skb, hdr, th); 1896 1897 if (tcp_checksum_complete(skb)) { 1898 csum_error: 1899 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1900 trace_tcp_bad_csum(skb); 1901 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1902 bad_packet: 1903 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1904 } else { 1905 tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason)); 1906 } 1907 1908 discard_it: 1909 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1910 sk_skb_reason_drop(sk, skb, drop_reason); 1911 return 0; 1912 1913 discard_and_relse: 1914 sk_drops_skbadd(sk, skb); 1915 if (refcounted) 1916 sock_put(sk); 1917 goto discard_it; 1918 1919 do_time_wait: 1920 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1921 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1922 inet_twsk_put(inet_twsk(sk)); 1923 goto discard_it; 1924 } 1925 1926 tcp_v6_fill_cb(skb, hdr, th); 1927 1928 if (tcp_checksum_complete(skb)) { 1929 inet_twsk_put(inet_twsk(sk)); 1930 goto csum_error; 1931 } 1932 1933 tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn, 1934 &drop_reason); 1935 switch (tw_status) { 1936 case TCP_TW_SYN: 1937 { 1938 struct sock *sk2; 1939 1940 sk2 = inet6_lookup_listener(net, skb, __tcp_hdrlen(th), 1941 &ipv6_hdr(skb)->saddr, th->source, 1942 &ipv6_hdr(skb)->daddr, 1943 ntohs(th->dest), 1944 tcp_v6_iif_l3_slave(skb), 1945 sdif); 1946 if (sk2) { 1947 struct inet_timewait_sock *tw = inet_twsk(sk); 1948 inet_twsk_deschedule_put(tw); 1949 sk = sk2; 1950 tcp_v6_restore_cb(skb); 1951 refcounted = false; 1952 __this_cpu_write(tcp_tw_isn, isn); 1953 goto process; 1954 } 1955 1956 drop_reason = psp_twsk_rx_policy_check(inet_twsk(sk), skb); 1957 if (drop_reason) 1958 break; 1959 } 1960 /* to ACK */ 1961 fallthrough; 1962 case TCP_TW_ACK: 1963 case TCP_TW_ACK_OOW: 1964 tcp_v6_timewait_ack(sk, skb, tw_status); 1965 break; 1966 case TCP_TW_RST: 1967 tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); 1968 inet_twsk_deschedule_put(inet_twsk(sk)); 1969 goto discard_it; 1970 case TCP_TW_SUCCESS: 1971 ; 1972 } 1973 goto discard_it; 1974 } 1975 1976 void tcp_v6_early_demux(struct sk_buff *skb) 1977 { 1978 struct net *net = dev_net_rcu(skb->dev); 1979 const struct ipv6hdr *hdr; 1980 const struct tcphdr *th; 1981 struct sock *sk; 1982 1983 if (skb->pkt_type != PACKET_HOST) 1984 return; 1985 1986 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1987 return; 1988 1989 hdr = ipv6_hdr(skb); 1990 th = tcp_hdr(skb); 1991 1992 if (th->doff < sizeof(struct tcphdr) / 4) 1993 return; 1994 1995 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1996 sk = __inet6_lookup_established(net, &hdr->saddr, th->source, 1997 &hdr->daddr, ntohs(th->dest), 1998 inet6_iif(skb), inet6_sdif(skb)); 1999 if (sk) { 2000 skb->sk = sk; 2001 skb->destructor = sock_edemux; 2002 if (sk_fullsock(sk)) { 2003 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 2004 2005 if (dst) 2006 dst = dst_check(dst, sk->sk_rx_dst_cookie); 2007 if (dst && 2008 sk->sk_rx_dst_ifindex == skb->skb_iif) 2009 skb_dst_set_noref(skb, dst); 2010 } 2011 } 2012 } 2013 2014 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 2015 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 2016 }; 2017 2018 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 2019 { 2020 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); 2021 } 2022 2023 const struct inet_connection_sock_af_ops ipv6_specific = { 2024 .queue_xmit = inet6_csk_xmit, 2025 .send_check = tcp_v6_send_check, 2026 .rebuild_header = inet6_sk_rebuild_header, 2027 .sk_rx_dst_set = inet6_sk_rx_dst_set, 2028 .conn_request = tcp_v6_conn_request, 2029 .syn_recv_sock = tcp_v6_syn_recv_sock, 2030 .net_header_len = sizeof(struct ipv6hdr), 2031 .setsockopt = ipv6_setsockopt, 2032 .getsockopt = ipv6_getsockopt, 2033 .mtu_reduced = tcp_v6_mtu_reduced, 2034 }; 2035 2036 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2037 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 2038 #ifdef CONFIG_TCP_MD5SIG 2039 .md5_lookup = tcp_v6_md5_lookup, 2040 .calc_md5_hash = tcp_v6_md5_hash_skb, 2041 .md5_parse = tcp_v6_parse_md5_keys, 2042 #endif 2043 #ifdef CONFIG_TCP_AO 2044 .ao_lookup = tcp_v6_ao_lookup, 2045 .calc_ao_hash = tcp_v6_ao_hash_skb, 2046 .ao_parse = tcp_v6_parse_ao, 2047 .ao_calc_key_sk = tcp_v6_ao_calc_key_sk, 2048 #endif 2049 }; 2050 #endif 2051 2052 /* 2053 * TCP over IPv4 via INET6 API 2054 */ 2055 static const struct inet_connection_sock_af_ops ipv6_mapped = { 2056 .queue_xmit = ip_queue_xmit, 2057 .send_check = tcp_v4_send_check, 2058 .rebuild_header = inet_sk_rebuild_header, 2059 .sk_rx_dst_set = inet_sk_rx_dst_set, 2060 .conn_request = tcp_v6_conn_request, 2061 .syn_recv_sock = tcp_v6_syn_recv_sock, 2062 .net_header_len = sizeof(struct iphdr), 2063 .setsockopt = ipv6_setsockopt, 2064 .getsockopt = ipv6_getsockopt, 2065 .mtu_reduced = tcp_v4_mtu_reduced, 2066 }; 2067 2068 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2069 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 2070 #ifdef CONFIG_TCP_MD5SIG 2071 .md5_lookup = tcp_v4_md5_lookup, 2072 .calc_md5_hash = tcp_v4_md5_hash_skb, 2073 .md5_parse = tcp_v6_parse_md5_keys, 2074 #endif 2075 #ifdef CONFIG_TCP_AO 2076 .ao_lookup = tcp_v6_ao_lookup, 2077 .calc_ao_hash = tcp_v4_ao_hash_skb, 2078 .ao_parse = tcp_v6_parse_ao, 2079 .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, 2080 #endif 2081 }; 2082 2083 static void tcp6_destruct_sock(struct sock *sk) 2084 { 2085 tcp_md5_destruct_sock(sk); 2086 tcp_ao_destroy_sock(sk, false); 2087 inet6_sock_destruct(sk); 2088 } 2089 #endif 2090 2091 /* NOTE: A lot of things set to zero explicitly by call to 2092 * sk_alloc() so need not be done here. 2093 */ 2094 static int tcp_v6_init_sock(struct sock *sk) 2095 { 2096 struct inet_connection_sock *icsk = inet_csk(sk); 2097 2098 tcp_init_sock(sk); 2099 2100 icsk->icsk_af_ops = &ipv6_specific; 2101 2102 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2103 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 2104 sk->sk_destruct = tcp6_destruct_sock; 2105 #endif 2106 2107 return 0; 2108 } 2109 2110 #ifdef CONFIG_PROC_FS 2111 /* Proc filesystem TCPv6 sock list dumping. */ 2112 static void get_openreq6(struct seq_file *seq, 2113 const struct request_sock *req, int i) 2114 { 2115 long ttd = req->rsk_timer.expires - jiffies; 2116 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 2117 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 2118 2119 if (ttd < 0) 2120 ttd = 0; 2121 2122 seq_printf(seq, 2123 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2124 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 2125 i, 2126 src->s6_addr32[0], src->s6_addr32[1], 2127 src->s6_addr32[2], src->s6_addr32[3], 2128 inet_rsk(req)->ir_num, 2129 dest->s6_addr32[0], dest->s6_addr32[1], 2130 dest->s6_addr32[2], dest->s6_addr32[3], 2131 ntohs(inet_rsk(req)->ir_rmt_port), 2132 TCP_SYN_RECV, 2133 0, 0, /* could print option size, but that is af dependent. */ 2134 1, /* timers active (only the expire timer) */ 2135 jiffies_to_clock_t(ttd), 2136 req->num_timeout, 2137 from_kuid_munged(seq_user_ns(seq), 2138 sk_uid(req->rsk_listener)), 2139 0, /* non standard timer */ 2140 0, /* open_requests have no inode */ 2141 0, req); 2142 } 2143 2144 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2145 { 2146 const struct in6_addr *dest, *src; 2147 __u16 destp, srcp; 2148 int timer_active; 2149 unsigned long timer_expires; 2150 const struct inet_sock *inet = inet_sk(sp); 2151 const struct tcp_sock *tp = tcp_sk(sp); 2152 const struct inet_connection_sock *icsk = inet_csk(sp); 2153 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2154 u8 icsk_pending; 2155 int rx_queue; 2156 int state; 2157 2158 dest = &sp->sk_v6_daddr; 2159 src = &sp->sk_v6_rcv_saddr; 2160 destp = ntohs(inet->inet_dport); 2161 srcp = ntohs(inet->inet_sport); 2162 2163 icsk_pending = smp_load_acquire(&icsk->icsk_pending); 2164 if (icsk_pending == ICSK_TIME_RETRANS || 2165 icsk_pending == ICSK_TIME_REO_TIMEOUT || 2166 icsk_pending == ICSK_TIME_LOSS_PROBE) { 2167 timer_active = 1; 2168 timer_expires = tcp_timeout_expires(sp); 2169 } else if (icsk_pending == ICSK_TIME_PROBE0) { 2170 timer_active = 4; 2171 timer_expires = tcp_timeout_expires(sp); 2172 } else if (timer_pending(&icsk->icsk_keepalive_timer)) { 2173 timer_active = 2; 2174 timer_expires = icsk->icsk_keepalive_timer.expires; 2175 } else { 2176 timer_active = 0; 2177 timer_expires = jiffies; 2178 } 2179 2180 state = inet_sk_state_load(sp); 2181 if (state == TCP_LISTEN) 2182 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2183 else 2184 /* Because we don't lock the socket, 2185 * we might find a transient negative value. 2186 */ 2187 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2188 READ_ONCE(tp->copied_seq), 0); 2189 2190 seq_printf(seq, 2191 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2192 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2193 i, 2194 src->s6_addr32[0], src->s6_addr32[1], 2195 src->s6_addr32[2], src->s6_addr32[3], srcp, 2196 dest->s6_addr32[0], dest->s6_addr32[1], 2197 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2198 state, 2199 READ_ONCE(tp->write_seq) - tp->snd_una, 2200 rx_queue, 2201 timer_active, 2202 jiffies_delta_to_clock_t(timer_expires - jiffies), 2203 READ_ONCE(icsk->icsk_retransmits), 2204 from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), 2205 READ_ONCE(icsk->icsk_probes_out), 2206 sock_i_ino(sp), 2207 refcount_read(&sp->sk_refcnt), sp, 2208 jiffies_to_clock_t(icsk->icsk_rto), 2209 jiffies_to_clock_t(icsk->icsk_ack.ato), 2210 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2211 tcp_snd_cwnd(tp), 2212 state == TCP_LISTEN ? 2213 fastopenq->max_qlen : 2214 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2215 ); 2216 } 2217 2218 static void get_timewait6_sock(struct seq_file *seq, 2219 struct inet_timewait_sock *tw, int i) 2220 { 2221 long delta = tw->tw_timer.expires - jiffies; 2222 const struct in6_addr *dest, *src; 2223 __u16 destp, srcp; 2224 2225 dest = &tw->tw_v6_daddr; 2226 src = &tw->tw_v6_rcv_saddr; 2227 destp = ntohs(tw->tw_dport); 2228 srcp = ntohs(tw->tw_sport); 2229 2230 seq_printf(seq, 2231 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2232 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2233 i, 2234 src->s6_addr32[0], src->s6_addr32[1], 2235 src->s6_addr32[2], src->s6_addr32[3], srcp, 2236 dest->s6_addr32[0], dest->s6_addr32[1], 2237 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2238 READ_ONCE(tw->tw_substate), 0, 0, 2239 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2240 refcount_read(&tw->tw_refcnt), tw); 2241 } 2242 2243 static int tcp6_seq_show(struct seq_file *seq, void *v) 2244 { 2245 struct tcp_iter_state *st; 2246 struct sock *sk = v; 2247 2248 if (v == SEQ_START_TOKEN) { 2249 seq_puts(seq, 2250 " sl " 2251 "local_address " 2252 "remote_address " 2253 "st tx_queue rx_queue tr tm->when retrnsmt" 2254 " uid timeout inode\n"); 2255 goto out; 2256 } 2257 st = seq->private; 2258 2259 if (sk->sk_state == TCP_TIME_WAIT) 2260 get_timewait6_sock(seq, v, st->num); 2261 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2262 get_openreq6(seq, v, st->num); 2263 else 2264 get_tcp6_sock(seq, v, st->num); 2265 out: 2266 return 0; 2267 } 2268 2269 static const struct seq_operations tcp6_seq_ops = { 2270 .show = tcp6_seq_show, 2271 .start = tcp_seq_start, 2272 .next = tcp_seq_next, 2273 .stop = tcp_seq_stop, 2274 }; 2275 2276 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2277 .family = AF_INET6, 2278 }; 2279 2280 int __net_init tcp6_proc_init(struct net *net) 2281 { 2282 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2283 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2284 return -ENOMEM; 2285 return 0; 2286 } 2287 2288 void tcp6_proc_exit(struct net *net) 2289 { 2290 remove_proc_entry("tcp6", net->proc_net); 2291 } 2292 #endif 2293 2294 struct proto tcpv6_prot = { 2295 .name = "TCPv6", 2296 .owner = THIS_MODULE, 2297 .close = tcp_close, 2298 .pre_connect = tcp_v6_pre_connect, 2299 .connect = tcp_v6_connect, 2300 .disconnect = tcp_disconnect, 2301 .accept = inet_csk_accept, 2302 .ioctl = tcp_ioctl, 2303 .init = tcp_v6_init_sock, 2304 .destroy = tcp_v4_destroy_sock, 2305 .shutdown = tcp_shutdown, 2306 .setsockopt = tcp_setsockopt, 2307 .getsockopt = tcp_getsockopt, 2308 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2309 .keepalive = tcp_set_keepalive, 2310 .recvmsg = tcp_recvmsg, 2311 .sendmsg = tcp_sendmsg, 2312 .splice_eof = tcp_splice_eof, 2313 .backlog_rcv = tcp_v6_do_rcv, 2314 .release_cb = tcp_release_cb, 2315 .hash = inet_hash, 2316 .unhash = inet_unhash, 2317 .get_port = inet_csk_get_port, 2318 .put_port = inet_put_port, 2319 #ifdef CONFIG_BPF_SYSCALL 2320 .psock_update_sk_prot = tcp_bpf_update_proto, 2321 #endif 2322 .enter_memory_pressure = tcp_enter_memory_pressure, 2323 .leave_memory_pressure = tcp_leave_memory_pressure, 2324 .stream_memory_free = tcp_stream_memory_free, 2325 .sockets_allocated = &tcp_sockets_allocated, 2326 2327 .memory_allocated = &net_aligned_data.tcp_memory_allocated, 2328 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 2329 2330 .memory_pressure = &tcp_memory_pressure, 2331 .sysctl_mem = sysctl_tcp_mem, 2332 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2333 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2334 .max_header = MAX_TCP_HEADER, 2335 .obj_size = sizeof(struct tcp6_sock), 2336 .freeptr_offset = offsetof(struct tcp6_sock, 2337 tcp.inet_conn.icsk_inet.sk.sk_freeptr), 2338 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6), 2339 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2340 .twsk_prot = &tcp6_timewait_sock_ops, 2341 .rsk_prot = &tcp6_request_sock_ops, 2342 .h.hashinfo = NULL, 2343 .no_autobind = true, 2344 .diag_destroy = tcp_abort, 2345 }; 2346 EXPORT_SYMBOL_GPL(tcpv6_prot); 2347 2348 2349 static struct inet_protosw tcpv6_protosw = { 2350 .type = SOCK_STREAM, 2351 .protocol = IPPROTO_TCP, 2352 .prot = &tcpv6_prot, 2353 .ops = &inet6_stream_ops, 2354 .flags = INET_PROTOSW_PERMANENT | 2355 INET_PROTOSW_ICSK, 2356 }; 2357 2358 static int __net_init tcpv6_net_init(struct net *net) 2359 { 2360 int res; 2361 2362 res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2363 SOCK_RAW, IPPROTO_TCP, net); 2364 if (!res) 2365 net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC; 2366 2367 return res; 2368 } 2369 2370 static void __net_exit tcpv6_net_exit(struct net *net) 2371 { 2372 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2373 } 2374 2375 static struct pernet_operations tcpv6_net_ops = { 2376 .init = tcpv6_net_init, 2377 .exit = tcpv6_net_exit, 2378 }; 2379 2380 int __init tcpv6_init(void) 2381 { 2382 int ret; 2383 2384 net_hotdata.tcpv6_protocol = (struct inet6_protocol) { 2385 .handler = tcp_v6_rcv, 2386 .err_handler = tcp_v6_err, 2387 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, 2388 }; 2389 ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2390 if (ret) 2391 goto out; 2392 2393 /* register inet6 protocol */ 2394 ret = inet6_register_protosw(&tcpv6_protosw); 2395 if (ret) 2396 goto out_tcpv6_protocol; 2397 2398 ret = register_pernet_subsys(&tcpv6_net_ops); 2399 if (ret) 2400 goto out_tcpv6_protosw; 2401 2402 ret = mptcpv6_init(); 2403 if (ret) 2404 goto out_tcpv6_pernet_subsys; 2405 2406 out: 2407 return ret; 2408 2409 out_tcpv6_pernet_subsys: 2410 unregister_pernet_subsys(&tcpv6_net_ops); 2411 out_tcpv6_protosw: 2412 inet6_unregister_protosw(&tcpv6_protosw); 2413 out_tcpv6_protocol: 2414 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2415 goto out; 2416 } 2417 2418 void tcpv6_exit(void) 2419 { 2420 unregister_pernet_subsys(&tcpv6_net_ops); 2421 inet6_unregister_protosw(&tcpv6_protosw); 2422 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2423 } 2424